diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/ChangeLog | 6 | ||||
| -rw-r--r-- | src/coding.c | 24 |
2 files changed, 25 insertions, 5 deletions
diff --git a/src/ChangeLog b/src/ChangeLog index 5eac57c3543..9d7f54bddee 100644 --- a/src/ChangeLog +++ b/src/ChangeLog | |||
| @@ -1,4 +1,8 @@ | |||
| 1 | 2002-05-26 Dave Love <d.love@dl.ac.uk> | 1 | 2002-05-27 Dave Love <fx@gnu.org> |
| 2 | |||
| 3 | * coding.c (decode_coding_utf_8): Reject overlong sequences. | ||
| 4 | |||
| 5 | 2002-05-26 Dave Love <fx@gnu.org> | ||
| 2 | 6 | ||
| 3 | * coding.c: Doc fixes. | 7 | * coding.c: Doc fixes. |
| 4 | (Fcoding_system_aliases): Fix return value. | 8 | (Fcoding_system_aliases): Fix return value. |
diff --git a/src/coding.c b/src/coding.c index a4ad9c6a542..1fc59a02bb1 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -1078,6 +1078,7 @@ detect_coding_utf_8 (coding, mask) | |||
| 1078 | } | 1078 | } |
| 1079 | 1079 | ||
| 1080 | 1080 | ||
| 1081 | /* Fixme: deal with surrogates? */ | ||
| 1081 | static void | 1082 | static void |
| 1082 | decode_coding_utf_8 (coding) | 1083 | decode_coding_utf_8 (coding) |
| 1083 | struct coding_system *coding; | 1084 | struct coding_system *coding; |
| @@ -1126,23 +1127,38 @@ decode_coding_utf_8 (coding) | |||
| 1126 | if (! UTF_8_EXTRA_OCTET_P (c2)) | 1127 | if (! UTF_8_EXTRA_OCTET_P (c2)) |
| 1127 | goto invalid_code; | 1128 | goto invalid_code; |
| 1128 | if (UTF_8_2_OCTET_LEADING_P (c1)) | 1129 | if (UTF_8_2_OCTET_LEADING_P (c1)) |
| 1129 | c = ((c1 & 0x1F) << 6) | (c2 & 0x3F); | 1130 | { |
| 1131 | c = ((c1 & 0x1F) << 6) | (c2 & 0x3F); | ||
| 1132 | /* Reject overlong sequences here and below. Encoders | ||
| 1133 | producing them are incorrect, they can be misleading, | ||
| 1134 | and they mess up read/write invariance. */ | ||
| 1135 | if (c < 128) | ||
| 1136 | goto invalid_code; | ||
| 1137 | } | ||
| 1130 | else | 1138 | else |
| 1131 | { | 1139 | { |
| 1132 | ONE_MORE_BYTE (c3); | 1140 | ONE_MORE_BYTE (c3); |
| 1133 | if (! UTF_8_EXTRA_OCTET_P (c3)) | 1141 | if (! UTF_8_EXTRA_OCTET_P (c3)) |
| 1134 | goto invalid_code; | 1142 | goto invalid_code; |
| 1135 | if (UTF_8_3_OCTET_LEADING_P (c1)) | 1143 | if (UTF_8_3_OCTET_LEADING_P (c1)) |
| 1136 | c = (((c1 & 0xF) << 12) | 1144 | { |
| 1137 | | ((c2 & 0x3F) << 6) | (c3 & 0x3F)); | 1145 | c = (((c1 & 0xF) << 12) |
| 1146 | | ((c2 & 0x3F) << 6) | (c3 & 0x3F)); | ||
| 1147 | if (c < 0x800) | ||
| 1148 | goto invalid_code; | ||
| 1149 | } | ||
| 1138 | else | 1150 | else |
| 1139 | { | 1151 | { |
| 1140 | ONE_MORE_BYTE (c4); | 1152 | ONE_MORE_BYTE (c4); |
| 1141 | if (! UTF_8_EXTRA_OCTET_P (c4)) | 1153 | if (! UTF_8_EXTRA_OCTET_P (c4)) |
| 1142 | goto invalid_code; | 1154 | goto invalid_code; |
| 1143 | if (UTF_8_4_OCTET_LEADING_P (c1)) | 1155 | if (UTF_8_4_OCTET_LEADING_P (c1)) |
| 1156 | { | ||
| 1144 | c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12) | 1157 | c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12) |
| 1145 | | ((c3 & 0x3F) << 6) | (c4 & 0x3F)); | 1158 | | ((c3 & 0x3F) << 6) | (c4 & 0x3F)); |
| 1159 | if (c < 0x10000) | ||
| 1160 | goto invalid_code; | ||
| 1161 | } | ||
| 1146 | else | 1162 | else |
| 1147 | { | 1163 | { |
| 1148 | ONE_MORE_BYTE (c5); | 1164 | ONE_MORE_BYTE (c5); |
| @@ -1153,7 +1169,7 @@ decode_coding_utf_8 (coding) | |||
| 1153 | c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18) | 1169 | c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18) |
| 1154 | | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6) | 1170 | | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6) |
| 1155 | | (c5 & 0x3F)); | 1171 | | (c5 & 0x3F)); |
| 1156 | if (c > MAX_CHAR) | 1172 | if ((c > MAX_CHAR) || (c < 0x200000)) |
| 1157 | goto invalid_code; | 1173 | goto invalid_code; |
| 1158 | } | 1174 | } |
| 1159 | else | 1175 | else |