diff options
| author | Dave Love | 2002-05-27 22:19:17 +0000 |
|---|---|---|
| committer | Dave Love | 2002-05-27 22:19:17 +0000 |
| commit | b0edb2c59cccb8fa81b35ce03a39367f6ac9475f (patch) | |
| tree | aae8f98ffc0aaed78d9691cc68739c2548f5641a /src/coding.c | |
| parent | afb2e0083ac2bebe4992e0cd02bec274da4cdfb1 (diff) | |
| download | emacs-b0edb2c59cccb8fa81b35ce03a39367f6ac9475f.tar.gz emacs-b0edb2c59cccb8fa81b35ce03a39367f6ac9475f.zip | |
(decode_coding_utf_8): Reject overlong sequences.
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 24 |
1 files changed, 20 insertions, 4 deletions
diff --git a/src/coding.c b/src/coding.c index a4ad9c6a542..1fc59a02bb1 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -1078,6 +1078,7 @@ detect_coding_utf_8 (coding, mask) | |||
| 1078 | } | 1078 | } |
| 1079 | 1079 | ||
| 1080 | 1080 | ||
| 1081 | /* Fixme: deal with surrogates? */ | ||
| 1081 | static void | 1082 | static void |
| 1082 | decode_coding_utf_8 (coding) | 1083 | decode_coding_utf_8 (coding) |
| 1083 | struct coding_system *coding; | 1084 | struct coding_system *coding; |
| @@ -1126,23 +1127,38 @@ decode_coding_utf_8 (coding) | |||
| 1126 | if (! UTF_8_EXTRA_OCTET_P (c2)) | 1127 | if (! UTF_8_EXTRA_OCTET_P (c2)) |
| 1127 | goto invalid_code; | 1128 | goto invalid_code; |
| 1128 | if (UTF_8_2_OCTET_LEADING_P (c1)) | 1129 | if (UTF_8_2_OCTET_LEADING_P (c1)) |
| 1129 | c = ((c1 & 0x1F) << 6) | (c2 & 0x3F); | 1130 | { |
| 1131 | c = ((c1 & 0x1F) << 6) | (c2 & 0x3F); | ||
| 1132 | /* Reject overlong sequences here and below. Encoders | ||
| 1133 | producing them are incorrect, they can be misleading, | ||
| 1134 | and they mess up read/write invariance. */ | ||
| 1135 | if (c < 128) | ||
| 1136 | goto invalid_code; | ||
| 1137 | } | ||
| 1130 | else | 1138 | else |
| 1131 | { | 1139 | { |
| 1132 | ONE_MORE_BYTE (c3); | 1140 | ONE_MORE_BYTE (c3); |
| 1133 | if (! UTF_8_EXTRA_OCTET_P (c3)) | 1141 | if (! UTF_8_EXTRA_OCTET_P (c3)) |
| 1134 | goto invalid_code; | 1142 | goto invalid_code; |
| 1135 | if (UTF_8_3_OCTET_LEADING_P (c1)) | 1143 | if (UTF_8_3_OCTET_LEADING_P (c1)) |
| 1136 | c = (((c1 & 0xF) << 12) | 1144 | { |
| 1137 | | ((c2 & 0x3F) << 6) | (c3 & 0x3F)); | 1145 | c = (((c1 & 0xF) << 12) |
| 1146 | | ((c2 & 0x3F) << 6) | (c3 & 0x3F)); | ||
| 1147 | if (c < 0x800) | ||
| 1148 | goto invalid_code; | ||
| 1149 | } | ||
| 1138 | else | 1150 | else |
| 1139 | { | 1151 | { |
| 1140 | ONE_MORE_BYTE (c4); | 1152 | ONE_MORE_BYTE (c4); |
| 1141 | if (! UTF_8_EXTRA_OCTET_P (c4)) | 1153 | if (! UTF_8_EXTRA_OCTET_P (c4)) |
| 1142 | goto invalid_code; | 1154 | goto invalid_code; |
| 1143 | if (UTF_8_4_OCTET_LEADING_P (c1)) | 1155 | if (UTF_8_4_OCTET_LEADING_P (c1)) |
| 1156 | { | ||
| 1144 | c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12) | 1157 | c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12) |
| 1145 | | ((c3 & 0x3F) << 6) | (c4 & 0x3F)); | 1158 | | ((c3 & 0x3F) << 6) | (c4 & 0x3F)); |
| 1159 | if (c < 0x10000) | ||
| 1160 | goto invalid_code; | ||
| 1161 | } | ||
| 1146 | else | 1162 | else |
| 1147 | { | 1163 | { |
| 1148 | ONE_MORE_BYTE (c5); | 1164 | ONE_MORE_BYTE (c5); |
| @@ -1153,7 +1169,7 @@ decode_coding_utf_8 (coding) | |||
| 1153 | c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18) | 1169 | c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18) |
| 1154 | | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6) | 1170 | | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6) |
| 1155 | | (c5 & 0x3F)); | 1171 | | (c5 & 0x3F)); |
| 1156 | if (c > MAX_CHAR) | 1172 | if ((c > MAX_CHAR) || (c < 0x200000)) |
| 1157 | goto invalid_code; | 1173 | goto invalid_code; |
| 1158 | } | 1174 | } |
| 1159 | else | 1175 | else |