aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/ChangeLog6
-rw-r--r--src/coding.c24
2 files changed, 25 insertions, 5 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index 5eac57c3543..9d7f54bddee 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,4 +1,8 @@
12002-05-26 Dave Love <d.love@dl.ac.uk> 12002-05-27 Dave Love <fx@gnu.org>
2
3 * coding.c (decode_coding_utf_8): Reject overlong sequences.
4
52002-05-26 Dave Love <fx@gnu.org>
2 6
3 * coding.c: Doc fixes. 7 * coding.c: Doc fixes.
4 (Fcoding_system_aliases): Fix return value. 8 (Fcoding_system_aliases): Fix return value.
diff --git a/src/coding.c b/src/coding.c
index a4ad9c6a542..1fc59a02bb1 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -1078,6 +1078,7 @@ detect_coding_utf_8 (coding, mask)
1078} 1078}
1079 1079
1080 1080
1081/* Fixme: deal with surrogates? */
1081static void 1082static void
1082decode_coding_utf_8 (coding) 1083decode_coding_utf_8 (coding)
1083 struct coding_system *coding; 1084 struct coding_system *coding;
@@ -1126,23 +1127,38 @@ decode_coding_utf_8 (coding)
1126 if (! UTF_8_EXTRA_OCTET_P (c2)) 1127 if (! UTF_8_EXTRA_OCTET_P (c2))
1127 goto invalid_code; 1128 goto invalid_code;
1128 if (UTF_8_2_OCTET_LEADING_P (c1)) 1129 if (UTF_8_2_OCTET_LEADING_P (c1))
1129 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F); 1130 {
1131 c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1132 /* Reject overlong sequences here and below. Encoders
1133 producing them are incorrect, they can be misleading,
1134 and they mess up read/write invariance. */
1135 if (c < 128)
1136 goto invalid_code;
1137 }
1130 else 1138 else
1131 { 1139 {
1132 ONE_MORE_BYTE (c3); 1140 ONE_MORE_BYTE (c3);
1133 if (! UTF_8_EXTRA_OCTET_P (c3)) 1141 if (! UTF_8_EXTRA_OCTET_P (c3))
1134 goto invalid_code; 1142 goto invalid_code;
1135 if (UTF_8_3_OCTET_LEADING_P (c1)) 1143 if (UTF_8_3_OCTET_LEADING_P (c1))
1136 c = (((c1 & 0xF) << 12) 1144 {
1137 | ((c2 & 0x3F) << 6) | (c3 & 0x3F)); 1145 c = (((c1 & 0xF) << 12)
1146 | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1147 if (c < 0x800)
1148 goto invalid_code;
1149 }
1138 else 1150 else
1139 { 1151 {
1140 ONE_MORE_BYTE (c4); 1152 ONE_MORE_BYTE (c4);
1141 if (! UTF_8_EXTRA_OCTET_P (c4)) 1153 if (! UTF_8_EXTRA_OCTET_P (c4))
1142 goto invalid_code; 1154 goto invalid_code;
1143 if (UTF_8_4_OCTET_LEADING_P (c1)) 1155 if (UTF_8_4_OCTET_LEADING_P (c1))
1156 {
1144 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12) 1157 c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1145 | ((c3 & 0x3F) << 6) | (c4 & 0x3F)); 1158 | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1159 if (c < 0x10000)
1160 goto invalid_code;
1161 }
1146 else 1162 else
1147 { 1163 {
1148 ONE_MORE_BYTE (c5); 1164 ONE_MORE_BYTE (c5);
@@ -1153,7 +1169,7 @@ decode_coding_utf_8 (coding)
1153 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18) 1169 c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1154 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6) 1170 | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1155 | (c5 & 0x3F)); 1171 | (c5 & 0x3F));
1156 if (c > MAX_CHAR) 1172 if ((c > MAX_CHAR) || (c < 0x200000))
1157 goto invalid_code; 1173 goto invalid_code;
1158 } 1174 }
1159 else 1175 else