diff options
| author | Kenichi Handa | 2003-12-29 07:52:49 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2003-12-29 07:52:49 +0000 |
| commit | 065e359516274ece9178f9ddef01ae8573ff18a7 (patch) | |
| tree | fa9a061eea97e56998a3ea33b9e440ca4542eaf4 /src/coding.c | |
| parent | 63e114786927f008363f57d2fc52c613782c59b5 (diff) | |
| download | emacs-065e359516274ece9178f9ddef01ae8573ff18a7.tar.gz emacs-065e359516274ece9178f9ddef01ae8573ff18a7.zip | |
(Qinsufficient_source, Qinconsistent_eol)
(Qinvalid_source, Qinterrupted, Qinsufficient_memory): New
variables.
(Vlast_code_conversion_error): New variables.
(syms_of_coding): DEFSYM or DEFVAR_LISP them.
(ONE_MORE_BYTE): Record error if any instead of signaling an
error. If non-ASCII multibyte char is found, return the negative
value of the code. All callers changed to check it.
(ONE_MORE_BYTE_NO_CHECK): Likewise.
(record_conversion_result): New function. All codes setting
coding->result are changed to call this function.
(detect_coding_utf_8): Don't use the local variable incomplete.
(decode_coding_utf_8): Likewise.
(emacs_mule_char): Change the second arg to `const'.
(detect_coding_emacs_mule): Don't use the local variable
incomplete.
(detect_coding_sjis): Likewise.
(detect_coding_big5): Likewise.
(decode_coding): Fix of flushing out unprocessed data.
(make_conversion_work_buffer): Fix making of a work buffer.
(decode_coding_object): Return coding->dst_object;
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 414 |
1 files changed, 270 insertions, 144 deletions
diff --git a/src/coding.c b/src/coding.c index 353f165f4ca..9da19e53c2e 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -318,6 +318,9 @@ Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument; | |||
| 318 | Lisp_Object Qstart_process, Qopen_network_stream; | 318 | Lisp_Object Qstart_process, Qopen_network_stream; |
| 319 | Lisp_Object Qtarget_idx; | 319 | Lisp_Object Qtarget_idx; |
| 320 | 320 | ||
| 321 | Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source; | ||
| 322 | Lisp_Object Qinterrupted, Qinsufficient_memory; | ||
| 323 | |||
| 321 | int coding_system_require_warning; | 324 | int coding_system_require_warning; |
| 322 | 325 | ||
| 323 | Lisp_Object Vselect_safe_coding_system_function; | 326 | Lisp_Object Vselect_safe_coding_system_function; |
| @@ -347,7 +350,8 @@ Lisp_Object Vcoding_system_for_read; | |||
| 347 | Lisp_Object Vcoding_system_for_write; | 350 | Lisp_Object Vcoding_system_for_write; |
| 348 | /* Coding-system actually used in the latest I/O. */ | 351 | /* Coding-system actually used in the latest I/O. */ |
| 349 | Lisp_Object Vlast_coding_system_used; | 352 | Lisp_Object Vlast_coding_system_used; |
| 350 | 353 | /* Set to non-nil when an error is detected while code conversion. */ | |
| 354 | Lisp_Object Vlast_code_conversion_error; | ||
| 351 | /* A vector of length 256 which contains information about special | 355 | /* A vector of length 256 which contains information about special |
| 352 | Latin codes (especially for dealing with Microsoft codes). */ | 356 | Latin codes (especially for dealing with Microsoft codes). */ |
| 353 | Lisp_Object Vlatin_extra_code_table; | 357 | Lisp_Object Vlatin_extra_code_table; |
| @@ -406,6 +410,8 @@ Lisp_Object Vsjis_coding_system; | |||
| 406 | Lisp_Object Vbig5_coding_system; | 410 | Lisp_Object Vbig5_coding_system; |
| 407 | 411 | ||
| 408 | 412 | ||
| 413 | static void record_conversion_result (struct coding_system *coding, | ||
| 414 | enum coding_result_code result); | ||
| 409 | static int detect_coding_utf_8 P_ ((struct coding_system *, | 415 | static int detect_coding_utf_8 P_ ((struct coding_system *, |
| 410 | struct coding_detection_info *info)); | 416 | struct coding_detection_info *info)); |
| 411 | static void decode_coding_utf_8 P_ ((struct coding_system *)); | 417 | static void decode_coding_utf_8 P_ ((struct coding_system *)); |
| @@ -718,40 +724,52 @@ static struct coding_system coding_categories[coding_category_max]; | |||
| 718 | 724 | ||
| 719 | /* Safely get one byte from the source text pointed by SRC which ends | 725 | /* Safely get one byte from the source text pointed by SRC which ends |
| 720 | at SRC_END, and set C to that byte. If there are not enough bytes | 726 | at SRC_END, and set C to that byte. If there are not enough bytes |
| 721 | in the source, it jumps to `no_more_source'. The caller | 727 | in the source, it jumps to `no_more_source'. If multibytep is |
| 722 | should declare and set these variables appropriately in advance: | 728 | nonzero, and a multibyte character is found at SRC, set C to the |
| 723 | src, src_end, multibytep | 729 | negative value of the character code. The caller should declare |
| 724 | */ | 730 | and set these variables appropriately in advance: |
| 731 | src, src_end, multibytep */ | ||
| 725 | 732 | ||
| 726 | #define ONE_MORE_BYTE(c) \ | 733 | #define ONE_MORE_BYTE(c) \ |
| 727 | do { \ | 734 | do { \ |
| 728 | if (src == src_end) \ | 735 | if (src == src_end) \ |
| 729 | { \ | 736 | { \ |
| 730 | if (src_base < src) \ | 737 | if (src_base < src) \ |
| 731 | coding->result = CODING_RESULT_INSUFFICIENT_SRC; \ | 738 | record_conversion_result \ |
| 732 | goto no_more_source; \ | 739 | (coding, CODING_RESULT_INSUFFICIENT_SRC); \ |
| 733 | } \ | 740 | goto no_more_source; \ |
| 734 | c = *src++; \ | 741 | } \ |
| 735 | if (multibytep && (c & 0x80)) \ | 742 | c = *src++; \ |
| 736 | { \ | 743 | if (multibytep && (c & 0x80)) \ |
| 737 | if ((c & 0xFE) != 0xC0) \ | 744 | { \ |
| 738 | error ("Undecodable char found"); \ | 745 | if ((c & 0xFE) == 0xC0) \ |
| 739 | c = ((c & 1) << 6) | *src++; \ | 746 | c = ((c & 1) << 6) | *src++; \ |
| 740 | } \ | 747 | else \ |
| 741 | consumed_chars++; \ | 748 | { \ |
| 749 | c = - string_char (--src, &src, NULL); \ | ||
| 750 | record_conversion_result \ | ||
| 751 | (coding, CODING_RESULT_INVALID_SRC); \ | ||
| 752 | } \ | ||
| 753 | } \ | ||
| 754 | consumed_chars++; \ | ||
| 742 | } while (0) | 755 | } while (0) |
| 743 | 756 | ||
| 744 | 757 | ||
| 745 | #define ONE_MORE_BYTE_NO_CHECK(c) \ | 758 | #define ONE_MORE_BYTE_NO_CHECK(c) \ |
| 746 | do { \ | 759 | do { \ |
| 747 | c = *src++; \ | 760 | c = *src++; \ |
| 748 | if (multibytep && (c & 0x80)) \ | 761 | if (multibytep && (c & 0x80)) \ |
| 749 | { \ | 762 | { \ |
| 750 | if ((c & 0xFE) != 0xC0) \ | 763 | if ((c & 0xFE) == 0xC0) \ |
| 751 | error ("Undecodable char found"); \ | 764 | c = ((c & 1) << 6) | *src++; \ |
| 752 | c = ((c & 1) << 6) | *src++; \ | 765 | else \ |
| 753 | } \ | 766 | { \ |
| 754 | consumed_chars++; \ | 767 | c = - string_char (--src, &src, NULL); \ |
| 768 | record_conversion_result \ | ||
| 769 | (coding, CODING_RESULT_INVALID_SRC); \ | ||
| 770 | } \ | ||
| 771 | } \ | ||
| 772 | consumed_chars++; \ | ||
| 755 | } while (0) | 773 | } while (0) |
| 756 | 774 | ||
| 757 | 775 | ||
| @@ -839,6 +857,31 @@ static struct coding_system coding_categories[coding_category_max]; | |||
| 839 | } while (0) | 857 | } while (0) |
| 840 | 858 | ||
| 841 | 859 | ||
| 860 | static void | ||
| 861 | record_conversion_result (struct coding_system *coding, | ||
| 862 | enum coding_result_code result) | ||
| 863 | { | ||
| 864 | coding->result = result; | ||
| 865 | switch (result) | ||
| 866 | { | ||
| 867 | case CODING_RESULT_INSUFFICIENT_SRC: | ||
| 868 | Vlast_code_conversion_error = Qinsufficient_source; | ||
| 869 | break; | ||
| 870 | case CODING_RESULT_INCONSISTENT_EOL: | ||
| 871 | Vlast_code_conversion_error = Qinconsistent_eol; | ||
| 872 | break; | ||
| 873 | case CODING_RESULT_INVALID_SRC: | ||
| 874 | Vlast_code_conversion_error = Qinvalid_source; | ||
| 875 | break; | ||
| 876 | case CODING_RESULT_INTERRUPT: | ||
| 877 | Vlast_code_conversion_error = Qinterrupted; | ||
| 878 | break; | ||
| 879 | case CODING_RESULT_INSUFFICIENT_MEM: | ||
| 880 | Vlast_code_conversion_error = Qinsufficient_memory; | ||
| 881 | break; | ||
| 882 | } | ||
| 883 | } | ||
| 884 | |||
| 842 | #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \ | 885 | #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \ |
| 843 | do { \ | 886 | do { \ |
| 844 | charset_map_loaded = 0; \ | 887 | charset_map_loaded = 0; \ |
| @@ -971,7 +1014,7 @@ alloc_destination (coding, nbytes, dst) | |||
| 971 | coding_alloc_by_making_gap (coding, nbytes); | 1014 | coding_alloc_by_making_gap (coding, nbytes); |
| 972 | else | 1015 | else |
| 973 | coding_alloc_by_realloc (coding, nbytes); | 1016 | coding_alloc_by_realloc (coding, nbytes); |
| 974 | coding->result = CODING_RESULT_SUCCESS; | 1017 | record_conversion_result (coding, CODING_RESULT_SUCCESS); |
| 975 | coding_set_destination (coding); | 1018 | coding_set_destination (coding); |
| 976 | dst = coding->destination + offset; | 1019 | dst = coding->destination + offset; |
| 977 | return dst; | 1020 | return dst; |
| @@ -1049,12 +1092,11 @@ detect_coding_utf_8 (coding, detect_info) | |||
| 1049 | struct coding_system *coding; | 1092 | struct coding_system *coding; |
| 1050 | struct coding_detection_info *detect_info; | 1093 | struct coding_detection_info *detect_info; |
| 1051 | { | 1094 | { |
| 1052 | const unsigned char *src = coding->source, *src_base = src; | 1095 | const unsigned char *src = coding->source, *src_base; |
| 1053 | const unsigned char *src_end = coding->source + coding->src_bytes; | 1096 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 1054 | int multibytep = coding->src_multibyte; | 1097 | int multibytep = coding->src_multibyte; |
| 1055 | int consumed_chars = 0; | 1098 | int consumed_chars = 0; |
| 1056 | int found = 0; | 1099 | int found = 0; |
| 1057 | int incomplete; | ||
| 1058 | 1100 | ||
| 1059 | detect_info->checked |= CATEGORY_MASK_UTF_8; | 1101 | detect_info->checked |= CATEGORY_MASK_UTF_8; |
| 1060 | /* A coding system of this category is always ASCII compatible. */ | 1102 | /* A coding system of this category is always ASCII compatible. */ |
| @@ -1064,13 +1106,12 @@ detect_coding_utf_8 (coding, detect_info) | |||
| 1064 | { | 1106 | { |
| 1065 | int c, c1, c2, c3, c4; | 1107 | int c, c1, c2, c3, c4; |
| 1066 | 1108 | ||
| 1067 | incomplete = 0; | 1109 | src_base = src; |
| 1068 | ONE_MORE_BYTE (c); | 1110 | ONE_MORE_BYTE (c); |
| 1069 | if (UTF_8_1_OCTET_P (c)) | 1111 | if (c < 0 || UTF_8_1_OCTET_P (c)) |
| 1070 | continue; | 1112 | continue; |
| 1071 | incomplete = 1; | ||
| 1072 | ONE_MORE_BYTE (c1); | 1113 | ONE_MORE_BYTE (c1); |
| 1073 | if (! UTF_8_EXTRA_OCTET_P (c1)) | 1114 | if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1)) |
| 1074 | break; | 1115 | break; |
| 1075 | if (UTF_8_2_OCTET_LEADING_P (c)) | 1116 | if (UTF_8_2_OCTET_LEADING_P (c)) |
| 1076 | { | 1117 | { |
| @@ -1078,7 +1119,7 @@ detect_coding_utf_8 (coding, detect_info) | |||
| 1078 | continue; | 1119 | continue; |
| 1079 | } | 1120 | } |
| 1080 | ONE_MORE_BYTE (c2); | 1121 | ONE_MORE_BYTE (c2); |
| 1081 | if (! UTF_8_EXTRA_OCTET_P (c2)) | 1122 | if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2)) |
| 1082 | break; | 1123 | break; |
| 1083 | if (UTF_8_3_OCTET_LEADING_P (c)) | 1124 | if (UTF_8_3_OCTET_LEADING_P (c)) |
| 1084 | { | 1125 | { |
| @@ -1086,7 +1127,7 @@ detect_coding_utf_8 (coding, detect_info) | |||
| 1086 | continue; | 1127 | continue; |
| 1087 | } | 1128 | } |
| 1088 | ONE_MORE_BYTE (c3); | 1129 | ONE_MORE_BYTE (c3); |
| 1089 | if (! UTF_8_EXTRA_OCTET_P (c3)) | 1130 | if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3)) |
| 1090 | break; | 1131 | break; |
| 1091 | if (UTF_8_4_OCTET_LEADING_P (c)) | 1132 | if (UTF_8_4_OCTET_LEADING_P (c)) |
| 1092 | { | 1133 | { |
| @@ -1094,7 +1135,7 @@ detect_coding_utf_8 (coding, detect_info) | |||
| 1094 | continue; | 1135 | continue; |
| 1095 | } | 1136 | } |
| 1096 | ONE_MORE_BYTE (c4); | 1137 | ONE_MORE_BYTE (c4); |
| 1097 | if (! UTF_8_EXTRA_OCTET_P (c4)) | 1138 | if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4)) |
| 1098 | break; | 1139 | break; |
| 1099 | if (UTF_8_5_OCTET_LEADING_P (c)) | 1140 | if (UTF_8_5_OCTET_LEADING_P (c)) |
| 1100 | { | 1141 | { |
| @@ -1107,7 +1148,7 @@ detect_coding_utf_8 (coding, detect_info) | |||
| 1107 | return 0; | 1148 | return 0; |
| 1108 | 1149 | ||
| 1109 | no_more_source: | 1150 | no_more_source: |
| 1110 | if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK) | 1151 | if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK) |
| 1111 | { | 1152 | { |
| 1112 | detect_info->rejected |= CATEGORY_MASK_UTF_8; | 1153 | detect_info->rejected |= CATEGORY_MASK_UTF_8; |
| 1113 | return 0; | 1154 | return 0; |
| @@ -1143,14 +1184,18 @@ decode_coding_utf_8 (coding) | |||
| 1143 | break; | 1184 | break; |
| 1144 | 1185 | ||
| 1145 | ONE_MORE_BYTE (c1); | 1186 | ONE_MORE_BYTE (c1); |
| 1146 | if (UTF_8_1_OCTET_P(c1)) | 1187 | if (c1 < 0) |
| 1188 | { | ||
| 1189 | c = - c1; | ||
| 1190 | } | ||
| 1191 | else if (UTF_8_1_OCTET_P(c1)) | ||
| 1147 | { | 1192 | { |
| 1148 | c = c1; | 1193 | c = c1; |
| 1149 | } | 1194 | } |
| 1150 | else | 1195 | else |
| 1151 | { | 1196 | { |
| 1152 | ONE_MORE_BYTE (c2); | 1197 | ONE_MORE_BYTE (c2); |
| 1153 | if (! UTF_8_EXTRA_OCTET_P (c2)) | 1198 | if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2)) |
| 1154 | goto invalid_code; | 1199 | goto invalid_code; |
| 1155 | if (UTF_8_2_OCTET_LEADING_P (c1)) | 1200 | if (UTF_8_2_OCTET_LEADING_P (c1)) |
| 1156 | { | 1201 | { |
| @@ -1164,7 +1209,7 @@ decode_coding_utf_8 (coding) | |||
| 1164 | else | 1209 | else |
| 1165 | { | 1210 | { |
| 1166 | ONE_MORE_BYTE (c3); | 1211 | ONE_MORE_BYTE (c3); |
| 1167 | if (! UTF_8_EXTRA_OCTET_P (c3)) | 1212 | if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3)) |
| 1168 | goto invalid_code; | 1213 | goto invalid_code; |
| 1169 | if (UTF_8_3_OCTET_LEADING_P (c1)) | 1214 | if (UTF_8_3_OCTET_LEADING_P (c1)) |
| 1170 | { | 1215 | { |
| @@ -1177,7 +1222,7 @@ decode_coding_utf_8 (coding) | |||
| 1177 | else | 1222 | else |
| 1178 | { | 1223 | { |
| 1179 | ONE_MORE_BYTE (c4); | 1224 | ONE_MORE_BYTE (c4); |
| 1180 | if (! UTF_8_EXTRA_OCTET_P (c4)) | 1225 | if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4)) |
| 1181 | goto invalid_code; | 1226 | goto invalid_code; |
| 1182 | if (UTF_8_4_OCTET_LEADING_P (c1)) | 1227 | if (UTF_8_4_OCTET_LEADING_P (c1)) |
| 1183 | { | 1228 | { |
| @@ -1189,7 +1234,7 @@ decode_coding_utf_8 (coding) | |||
| 1189 | else | 1234 | else |
| 1190 | { | 1235 | { |
| 1191 | ONE_MORE_BYTE (c5); | 1236 | ONE_MORE_BYTE (c5); |
| 1192 | if (! UTF_8_EXTRA_OCTET_P (c5)) | 1237 | if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5)) |
| 1193 | goto invalid_code; | 1238 | goto invalid_code; |
| 1194 | if (UTF_8_5_OCTET_LEADING_P (c1)) | 1239 | if (UTF_8_5_OCTET_LEADING_P (c1)) |
| 1195 | { | 1240 | { |
| @@ -1271,7 +1316,7 @@ encode_coding_utf_8 (coding) | |||
| 1271 | produced_chars++; | 1316 | produced_chars++; |
| 1272 | } | 1317 | } |
| 1273 | } | 1318 | } |
| 1274 | coding->result = CODING_RESULT_SUCCESS; | 1319 | record_conversion_result (coding, CODING_RESULT_SUCCESS); |
| 1275 | coding->produced_char += produced_chars; | 1320 | coding->produced_char += produced_chars; |
| 1276 | coding->produced = dst - coding->destination; | 1321 | coding->produced = dst - coding->destination; |
| 1277 | return 0; | 1322 | return 0; |
| @@ -1331,7 +1376,7 @@ detect_coding_utf_16 (coding, detect_info) | |||
| 1331 | | CATEGORY_MASK_UTF_16_BE_NOSIG | 1376 | | CATEGORY_MASK_UTF_16_BE_NOSIG |
| 1332 | | CATEGORY_MASK_UTF_16_LE_NOSIG); | 1377 | | CATEGORY_MASK_UTF_16_LE_NOSIG); |
| 1333 | } | 1378 | } |
| 1334 | else | 1379 | else if (c1 >= 0 && c2 >= 0) |
| 1335 | { | 1380 | { |
| 1336 | unsigned char b1[256], b2[256]; | 1381 | unsigned char b1[256], b2[256]; |
| 1337 | int b1_variants = 1, b2_variants = 1; | 1382 | int b1_variants = 1, b2_variants = 1; |
| @@ -1341,8 +1386,11 @@ detect_coding_utf_16 (coding, detect_info) | |||
| 1341 | b1[c1]++, b2[c2]++; | 1386 | b1[c1]++, b2[c2]++; |
| 1342 | for (n = 0; n < 256 && src < src_end; n++) | 1387 | for (n = 0; n < 256 && src < src_end; n++) |
| 1343 | { | 1388 | { |
| 1389 | src_base = src; | ||
| 1344 | ONE_MORE_BYTE (c1); | 1390 | ONE_MORE_BYTE (c1); |
| 1345 | ONE_MORE_BYTE (c2); | 1391 | ONE_MORE_BYTE (c2); |
| 1392 | if (c1 < 0 || c2 < 0) | ||
| 1393 | break; | ||
| 1346 | if (! b1[c1++]) b1_variants++; | 1394 | if (! b1[c1++]) b1_variants++; |
| 1347 | if (! b2[c2++]) b2_variants++; | 1395 | if (! b2[c2++]) b2_variants++; |
| 1348 | } | 1396 | } |
| @@ -1412,7 +1460,18 @@ decode_coding_utf_16 (coding) | |||
| 1412 | break; | 1460 | break; |
| 1413 | 1461 | ||
| 1414 | ONE_MORE_BYTE (c1); | 1462 | ONE_MORE_BYTE (c1); |
| 1463 | if (c1 < 0) | ||
| 1464 | { | ||
| 1465 | *charbuf++ = -c1; | ||
| 1466 | continue; | ||
| 1467 | } | ||
| 1415 | ONE_MORE_BYTE (c2); | 1468 | ONE_MORE_BYTE (c2); |
| 1469 | if (c2 < 0) | ||
| 1470 | { | ||
| 1471 | *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1); | ||
| 1472 | *charbuf++ = -c2; | ||
| 1473 | continue; | ||
| 1474 | } | ||
| 1416 | c = (endian == utf_16_big_endian | 1475 | c = (endian == utf_16_big_endian |
| 1417 | ? ((c1 << 8) | c2) : ((c2 << 8) | c1)); | 1476 | ? ((c1 << 8) | c2) : ((c2 << 8) | c1)); |
| 1418 | if (surrogate) | 1477 | if (surrogate) |
| @@ -1508,7 +1567,7 @@ encode_coding_utf_16 (coding) | |||
| 1508 | EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8); | 1567 | EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8); |
| 1509 | } | 1568 | } |
| 1510 | } | 1569 | } |
| 1511 | coding->result = CODING_RESULT_SUCCESS; | 1570 | record_conversion_result (coding, CODING_RESULT_SUCCESS); |
| 1512 | coding->produced = dst - coding->destination; | 1571 | coding->produced = dst - coding->destination; |
| 1513 | coding->produced_char += produced_chars; | 1572 | coding->produced_char += produced_chars; |
| 1514 | return 0; | 1573 | return 0; |
| @@ -1593,7 +1652,7 @@ char emacs_mule_bytes[256]; | |||
| 1593 | int | 1652 | int |
| 1594 | emacs_mule_char (coding, src, nbytes, nchars, id) | 1653 | emacs_mule_char (coding, src, nbytes, nchars, id) |
| 1595 | struct coding_system *coding; | 1654 | struct coding_system *coding; |
| 1596 | unsigned char *src; | 1655 | const unsigned char *src; |
| 1597 | int *nbytes, *nchars, *id; | 1656 | int *nbytes, *nchars, *id; |
| 1598 | { | 1657 | { |
| 1599 | const unsigned char *src_end = coding->source + coding->src_bytes; | 1658 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| @@ -1605,58 +1664,78 @@ emacs_mule_char (coding, src, nbytes, nchars, id) | |||
| 1605 | int consumed_chars = 0; | 1664 | int consumed_chars = 0; |
| 1606 | 1665 | ||
| 1607 | ONE_MORE_BYTE (c); | 1666 | ONE_MORE_BYTE (c); |
| 1608 | switch (emacs_mule_bytes[c]) | 1667 | if (c < 0) |
| 1609 | { | 1668 | { |
| 1610 | case 2: | 1669 | c = -c; |
| 1611 | if (! (charset = emacs_mule_charset[c])) | 1670 | charset = emacs_mule_charset[0]; |
| 1612 | goto invalid_code; | 1671 | } |
| 1613 | ONE_MORE_BYTE (c); | 1672 | else |
| 1614 | code = c & 0x7F; | 1673 | { |
| 1615 | break; | 1674 | switch (emacs_mule_bytes[c]) |
| 1616 | |||
| 1617 | case 3: | ||
| 1618 | if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11 | ||
| 1619 | || c == EMACS_MULE_LEADING_CODE_PRIVATE_12) | ||
| 1620 | { | 1675 | { |
| 1621 | ONE_MORE_BYTE (c); | 1676 | case 2: |
| 1622 | if (! (charset = emacs_mule_charset[c])) | 1677 | if (! (charset = emacs_mule_charset[c])) |
| 1623 | goto invalid_code; | 1678 | goto invalid_code; |
| 1624 | ONE_MORE_BYTE (c); | 1679 | ONE_MORE_BYTE (c); |
| 1680 | if (c < 0) | ||
| 1681 | goto invalid_code; | ||
| 1625 | code = c & 0x7F; | 1682 | code = c & 0x7F; |
| 1626 | } | 1683 | break; |
| 1627 | else | 1684 | |
| 1628 | { | 1685 | case 3: |
| 1629 | if (! (charset = emacs_mule_charset[c])) | 1686 | if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11 |
| 1687 | || c == EMACS_MULE_LEADING_CODE_PRIVATE_12) | ||
| 1688 | { | ||
| 1689 | ONE_MORE_BYTE (c); | ||
| 1690 | if (c < 0 || ! (charset = emacs_mule_charset[c])) | ||
| 1691 | goto invalid_code; | ||
| 1692 | ONE_MORE_BYTE (c); | ||
| 1693 | if (c < 0) | ||
| 1694 | goto invalid_code; | ||
| 1695 | code = c & 0x7F; | ||
| 1696 | } | ||
| 1697 | else | ||
| 1698 | { | ||
| 1699 | if (! (charset = emacs_mule_charset[c])) | ||
| 1700 | goto invalid_code; | ||
| 1701 | ONE_MORE_BYTE (c); | ||
| 1702 | if (c < 0) | ||
| 1703 | goto invalid_code; | ||
| 1704 | code = (c & 0x7F) << 8; | ||
| 1705 | ONE_MORE_BYTE (c); | ||
| 1706 | if (c < 0) | ||
| 1707 | goto invalid_code; | ||
| 1708 | code |= c & 0x7F; | ||
| 1709 | } | ||
| 1710 | break; | ||
| 1711 | |||
| 1712 | case 4: | ||
| 1713 | ONE_MORE_BYTE (c); | ||
| 1714 | if (c < 0 || ! (charset = emacs_mule_charset[c])) | ||
| 1630 | goto invalid_code; | 1715 | goto invalid_code; |
| 1631 | ONE_MORE_BYTE (c); | 1716 | ONE_MORE_BYTE (c); |
| 1717 | if (c < 0) | ||
| 1718 | goto invalid_code; | ||
| 1632 | code = (c & 0x7F) << 8; | 1719 | code = (c & 0x7F) << 8; |
| 1633 | ONE_MORE_BYTE (c); | 1720 | ONE_MORE_BYTE (c); |
| 1721 | if (c < 0) | ||
| 1722 | goto invalid_code; | ||
| 1634 | code |= c & 0x7F; | 1723 | code |= c & 0x7F; |
| 1635 | } | 1724 | break; |
| 1636 | break; | ||
| 1637 | |||
| 1638 | case 4: | ||
| 1639 | ONE_MORE_BYTE (c); | ||
| 1640 | if (! (charset = emacs_mule_charset[c])) | ||
| 1641 | goto invalid_code; | ||
| 1642 | ONE_MORE_BYTE (c); | ||
| 1643 | code = (c & 0x7F) << 8; | ||
| 1644 | ONE_MORE_BYTE (c); | ||
| 1645 | code |= c & 0x7F; | ||
| 1646 | break; | ||
| 1647 | 1725 | ||
| 1648 | case 1: | 1726 | case 1: |
| 1649 | code = c; | 1727 | code = c; |
| 1650 | charset = CHARSET_FROM_ID (ASCII_BYTE_P (code) | 1728 | charset = CHARSET_FROM_ID (ASCII_BYTE_P (code) |
| 1651 | ? charset_ascii : charset_eight_bit); | 1729 | ? charset_ascii : charset_eight_bit); |
| 1652 | break; | 1730 | break; |
| 1653 | 1731 | ||
| 1654 | default: | 1732 | default: |
| 1655 | abort (); | 1733 | abort (); |
| 1734 | } | ||
| 1735 | c = DECODE_CHAR (charset, code); | ||
| 1736 | if (c < 0) | ||
| 1737 | goto invalid_code; | ||
| 1656 | } | 1738 | } |
| 1657 | c = DECODE_CHAR (charset, code); | ||
| 1658 | if (c < 0) | ||
| 1659 | goto invalid_code; | ||
| 1660 | *nbytes = src - src_base; | 1739 | *nbytes = src - src_base; |
| 1661 | *nchars = consumed_chars; | 1740 | *nchars = consumed_chars; |
| 1662 | if (id) | 1741 | if (id) |
| @@ -1680,13 +1759,12 @@ detect_coding_emacs_mule (coding, detect_info) | |||
| 1680 | struct coding_system *coding; | 1759 | struct coding_system *coding; |
| 1681 | struct coding_detection_info *detect_info; | 1760 | struct coding_detection_info *detect_info; |
| 1682 | { | 1761 | { |
| 1683 | const unsigned char *src = coding->source, *src_base = src; | 1762 | const unsigned char *src = coding->source, *src_base; |
| 1684 | const unsigned char *src_end = coding->source + coding->src_bytes; | 1763 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 1685 | int multibytep = coding->src_multibyte; | 1764 | int multibytep = coding->src_multibyte; |
| 1686 | int consumed_chars = 0; | 1765 | int consumed_chars = 0; |
| 1687 | int c; | 1766 | int c; |
| 1688 | int found = 0; | 1767 | int found = 0; |
| 1689 | int incomplete; | ||
| 1690 | 1768 | ||
| 1691 | detect_info->checked |= CATEGORY_MASK_EMACS_MULE; | 1769 | detect_info->checked |= CATEGORY_MASK_EMACS_MULE; |
| 1692 | /* A coding system of this category is always ASCII compatible. */ | 1770 | /* A coding system of this category is always ASCII compatible. */ |
| @@ -1694,10 +1772,10 @@ detect_coding_emacs_mule (coding, detect_info) | |||
| 1694 | 1772 | ||
| 1695 | while (1) | 1773 | while (1) |
| 1696 | { | 1774 | { |
| 1697 | incomplete = 0; | 1775 | src_base = src; |
| 1698 | ONE_MORE_BYTE (c); | 1776 | ONE_MORE_BYTE (c); |
| 1699 | incomplete = 1; | 1777 | if (c < 0) |
| 1700 | 1778 | continue; | |
| 1701 | if (c == 0x80) | 1779 | if (c == 0x80) |
| 1702 | { | 1780 | { |
| 1703 | /* Perhaps the start of composite character. We simple skip | 1781 | /* Perhaps the start of composite character. We simple skip |
| @@ -1745,7 +1823,7 @@ detect_coding_emacs_mule (coding, detect_info) | |||
| 1745 | return 0; | 1823 | return 0; |
| 1746 | 1824 | ||
| 1747 | no_more_source: | 1825 | no_more_source: |
| 1748 | if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK) | 1826 | if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK) |
| 1749 | { | 1827 | { |
| 1750 | detect_info->rejected |= CATEGORY_MASK_EMACS_MULE; | 1828 | detect_info->rejected |= CATEGORY_MASK_EMACS_MULE; |
| 1751 | return 0; | 1829 | return 0; |
| @@ -1842,10 +1920,14 @@ detect_coding_emacs_mule (coding, detect_info) | |||
| 1842 | int nbytes, nchars; \ | 1920 | int nbytes, nchars; \ |
| 1843 | \ | 1921 | \ |
| 1844 | ONE_MORE_BYTE (c); \ | 1922 | ONE_MORE_BYTE (c); \ |
| 1923 | if (c < 0) \ | ||
| 1924 | goto invalid_code; \ | ||
| 1845 | nbytes = c - 0xA0; \ | 1925 | nbytes = c - 0xA0; \ |
| 1846 | if (nbytes < 3) \ | 1926 | if (nbytes < 3) \ |
| 1847 | goto invalid_code; \ | 1927 | goto invalid_code; \ |
| 1848 | ONE_MORE_BYTE (c); \ | 1928 | ONE_MORE_BYTE (c); \ |
| 1929 | if (c < 0) \ | ||
| 1930 | goto invalid_code; \ | ||
| 1849 | nchars = c - 0xA0; \ | 1931 | nchars = c - 0xA0; \ |
| 1850 | from = coding->produced + char_offset; \ | 1932 | from = coding->produced + char_offset; \ |
| 1851 | to = from + nchars; \ | 1933 | to = from + nchars; \ |
| @@ -1952,8 +2034,12 @@ decode_coding_emacs_mule (coding) | |||
| 1952 | break; | 2034 | break; |
| 1953 | 2035 | ||
| 1954 | ONE_MORE_BYTE (c); | 2036 | ONE_MORE_BYTE (c); |
| 1955 | 2037 | if (c < 0) | |
| 1956 | if (c < 0x80) | 2038 | { |
| 2039 | *charbuf++ = -c; | ||
| 2040 | char_offset++; | ||
| 2041 | } | ||
| 2042 | else if (c < 0x80) | ||
| 1957 | { | 2043 | { |
| 1958 | *charbuf++ = c; | 2044 | *charbuf++ = c; |
| 1959 | char_offset++; | 2045 | char_offset++; |
| @@ -1961,6 +2047,8 @@ decode_coding_emacs_mule (coding) | |||
| 1961 | else if (c == 0x80) | 2047 | else if (c == 0x80) |
| 1962 | { | 2048 | { |
| 1963 | ONE_MORE_BYTE (c); | 2049 | ONE_MORE_BYTE (c); |
| 2050 | if (c < 0) | ||
| 2051 | goto invalid_code; | ||
| 1964 | if (c - 0xF2 >= COMPOSITION_RELATIVE | 2052 | if (c - 0xF2 >= COMPOSITION_RELATIVE |
| 1965 | && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS) | 2053 | && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS) |
| 1966 | DECODE_EMACS_MULE_21_COMPOSITION (c); | 2054 | DECODE_EMACS_MULE_21_COMPOSITION (c); |
| @@ -2130,7 +2218,7 @@ encode_coding_emacs_mule (coding) | |||
| 2130 | } | 2218 | } |
| 2131 | } | 2219 | } |
| 2132 | } | 2220 | } |
| 2133 | coding->result = CODING_RESULT_SUCCESS; | 2221 | record_conversion_result (coding, CODING_RESULT_SUCCESS); |
| 2134 | coding->produced_char += produced_chars; | 2222 | coding->produced_char += produced_chars; |
| 2135 | coding->produced = dst - coding->destination; | 2223 | coding->produced = dst - coding->destination; |
| 2136 | return 0; | 2224 | return 0; |
| @@ -2430,6 +2518,7 @@ detect_coding_iso_2022 (coding, detect_info) | |||
| 2430 | 2518 | ||
| 2431 | while (rejected != CATEGORY_MASK_ISO) | 2519 | while (rejected != CATEGORY_MASK_ISO) |
| 2432 | { | 2520 | { |
| 2521 | src_base = src; | ||
| 2433 | ONE_MORE_BYTE (c); | 2522 | ONE_MORE_BYTE (c); |
| 2434 | switch (c) | 2523 | switch (c) |
| 2435 | { | 2524 | { |
| @@ -2543,6 +2632,8 @@ detect_coding_iso_2022 (coding, detect_info) | |||
| 2543 | goto check_extra_latin; | 2632 | goto check_extra_latin; |
| 2544 | 2633 | ||
| 2545 | default: | 2634 | default: |
| 2635 | if (c < 0) | ||
| 2636 | continue; | ||
| 2546 | if (c < 0x80) | 2637 | if (c < 0x80) |
| 2547 | { | 2638 | { |
| 2548 | single_shifting = 0; | 2639 | single_shifting = 0; |
| @@ -2816,6 +2907,8 @@ decode_coding_iso_2022 (coding) | |||
| 2816 | break; | 2907 | break; |
| 2817 | 2908 | ||
| 2818 | ONE_MORE_BYTE (c1); | 2909 | ONE_MORE_BYTE (c1); |
| 2910 | if (c1 < 0) | ||
| 2911 | goto invalid_code; | ||
| 2819 | 2912 | ||
| 2820 | /* We produce at most one character. */ | 2913 | /* We produce at most one character. */ |
| 2821 | switch (iso_code_class [c1]) | 2914 | switch (iso_code_class [c1]) |
| @@ -3186,7 +3279,7 @@ decode_coding_iso_2022 (coding) | |||
| 3186 | src = src_base; | 3279 | src = src_base; |
| 3187 | consumed_chars = consumed_chars_base; | 3280 | consumed_chars = consumed_chars_base; |
| 3188 | ONE_MORE_BYTE (c); | 3281 | ONE_MORE_BYTE (c); |
| 3189 | *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); | 3282 | *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); |
| 3190 | char_offset++; | 3283 | char_offset++; |
| 3191 | coding->errors++; | 3284 | coding->errors++; |
| 3192 | continue; | 3285 | continue; |
| @@ -3745,7 +3838,7 @@ encode_coding_iso_2022 (coding) | |||
| 3745 | ASSURE_DESTINATION (safe_room); | 3838 | ASSURE_DESTINATION (safe_room); |
| 3746 | ENCODE_RESET_PLANE_AND_REGISTER (); | 3839 | ENCODE_RESET_PLANE_AND_REGISTER (); |
| 3747 | } | 3840 | } |
| 3748 | coding->result = CODING_RESULT_SUCCESS; | 3841 | record_conversion_result (coding, CODING_RESULT_SUCCESS); |
| 3749 | CODING_ISO_BOL (coding) = bol_designation; | 3842 | CODING_ISO_BOL (coding) = bol_designation; |
| 3750 | coding->produced_char += produced_chars; | 3843 | coding->produced_char += produced_chars; |
| 3751 | coding->produced = dst - coding->destination; | 3844 | coding->produced = dst - coding->destination; |
| @@ -3798,13 +3891,12 @@ detect_coding_sjis (coding, detect_info) | |||
| 3798 | struct coding_system *coding; | 3891 | struct coding_system *coding; |
| 3799 | struct coding_detection_info *detect_info; | 3892 | struct coding_detection_info *detect_info; |
| 3800 | { | 3893 | { |
| 3801 | const unsigned char *src = coding->source, *src_base = src; | 3894 | const unsigned char *src = coding->source, *src_base; |
| 3802 | const unsigned char *src_end = coding->source + coding->src_bytes; | 3895 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 3803 | int multibytep = coding->src_multibyte; | 3896 | int multibytep = coding->src_multibyte; |
| 3804 | int consumed_chars = 0; | 3897 | int consumed_chars = 0; |
| 3805 | int found = 0; | 3898 | int found = 0; |
| 3806 | int c; | 3899 | int c; |
| 3807 | int incomplete; | ||
| 3808 | 3900 | ||
| 3809 | detect_info->checked |= CATEGORY_MASK_SJIS; | 3901 | detect_info->checked |= CATEGORY_MASK_SJIS; |
| 3810 | /* A coding system of this category is always ASCII compatible. */ | 3902 | /* A coding system of this category is always ASCII compatible. */ |
| @@ -3812,9 +3904,8 @@ detect_coding_sjis (coding, detect_info) | |||
| 3812 | 3904 | ||
| 3813 | while (1) | 3905 | while (1) |
| 3814 | { | 3906 | { |
| 3815 | incomplete = 0; | 3907 | src_base = src; |
| 3816 | ONE_MORE_BYTE (c); | 3908 | ONE_MORE_BYTE (c); |
| 3817 | incomplete = 1; | ||
| 3818 | if (c < 0x80) | 3909 | if (c < 0x80) |
| 3819 | continue; | 3910 | continue; |
| 3820 | if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF)) | 3911 | if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF)) |
| @@ -3833,7 +3924,7 @@ detect_coding_sjis (coding, detect_info) | |||
| 3833 | return 0; | 3924 | return 0; |
| 3834 | 3925 | ||
| 3835 | no_more_source: | 3926 | no_more_source: |
| 3836 | if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK) | 3927 | if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK) |
| 3837 | { | 3928 | { |
| 3838 | detect_info->rejected |= CATEGORY_MASK_SJIS; | 3929 | detect_info->rejected |= CATEGORY_MASK_SJIS; |
| 3839 | return 0; | 3930 | return 0; |
| @@ -3851,13 +3942,12 @@ detect_coding_big5 (coding, detect_info) | |||
| 3851 | struct coding_system *coding; | 3942 | struct coding_system *coding; |
| 3852 | struct coding_detection_info *detect_info; | 3943 | struct coding_detection_info *detect_info; |
| 3853 | { | 3944 | { |
| 3854 | const unsigned char *src = coding->source, *src_base = src; | 3945 | const unsigned char *src = coding->source, *src_base; |
| 3855 | const unsigned char *src_end = coding->source + coding->src_bytes; | 3946 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 3856 | int multibytep = coding->src_multibyte; | 3947 | int multibytep = coding->src_multibyte; |
| 3857 | int consumed_chars = 0; | 3948 | int consumed_chars = 0; |
| 3858 | int found = 0; | 3949 | int found = 0; |
| 3859 | int c; | 3950 | int c; |
| 3860 | int incomplete; | ||
| 3861 | 3951 | ||
| 3862 | detect_info->checked |= CATEGORY_MASK_BIG5; | 3952 | detect_info->checked |= CATEGORY_MASK_BIG5; |
| 3863 | /* A coding system of this category is always ASCII compatible. */ | 3953 | /* A coding system of this category is always ASCII compatible. */ |
| @@ -3865,9 +3955,8 @@ detect_coding_big5 (coding, detect_info) | |||
| 3865 | 3955 | ||
| 3866 | while (1) | 3956 | while (1) |
| 3867 | { | 3957 | { |
| 3868 | incomplete = 0; | 3958 | src_base = src; |
| 3869 | ONE_MORE_BYTE (c); | 3959 | ONE_MORE_BYTE (c); |
| 3870 | incomplete = 1; | ||
| 3871 | if (c < 0x80) | 3960 | if (c < 0x80) |
| 3872 | continue; | 3961 | continue; |
| 3873 | if (c >= 0xA1) | 3962 | if (c >= 0xA1) |
| @@ -3884,7 +3973,7 @@ detect_coding_big5 (coding, detect_info) | |||
| 3884 | return 0; | 3973 | return 0; |
| 3885 | 3974 | ||
| 3886 | no_more_source: | 3975 | no_more_source: |
| 3887 | if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK) | 3976 | if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK) |
| 3888 | { | 3977 | { |
| 3889 | detect_info->rejected |= CATEGORY_MASK_BIG5; | 3978 | detect_info->rejected |= CATEGORY_MASK_BIG5; |
| 3890 | return 0; | 3979 | return 0; |
| @@ -3932,7 +4021,8 @@ decode_coding_sjis (coding) | |||
| 3932 | break; | 4021 | break; |
| 3933 | 4022 | ||
| 3934 | ONE_MORE_BYTE (c); | 4023 | ONE_MORE_BYTE (c); |
| 3935 | 4024 | if (c < 0) | |
| 4025 | goto invalid_code; | ||
| 3936 | if (c < 0x80) | 4026 | if (c < 0x80) |
| 3937 | charset = charset_roman; | 4027 | charset = charset_roman; |
| 3938 | else | 4028 | else |
| @@ -3975,7 +4065,7 @@ decode_coding_sjis (coding) | |||
| 3975 | src = src_base; | 4065 | src = src_base; |
| 3976 | consumed_chars = consumed_chars_base; | 4066 | consumed_chars = consumed_chars_base; |
| 3977 | ONE_MORE_BYTE (c); | 4067 | ONE_MORE_BYTE (c); |
| 3978 | *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); | 4068 | *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c); |
| 3979 | char_offset++; | 4069 | char_offset++; |
| 3980 | coding->errors++; | 4070 | coding->errors++; |
| 3981 | } | 4071 | } |
| @@ -4023,6 +4113,8 @@ decode_coding_big5 (coding) | |||
| 4023 | 4113 | ||
| 4024 | ONE_MORE_BYTE (c); | 4114 | ONE_MORE_BYTE (c); |
| 4025 | 4115 | ||
| 4116 | if (c < 0) | ||
| 4117 | goto invalid_code; | ||
| 4026 | if (c < 0x80) | 4118 | if (c < 0x80) |
| 4027 | charset = charset_roman; | 4119 | charset = charset_roman; |
| 4028 | else | 4120 | else |
| @@ -4053,7 +4145,7 @@ decode_coding_big5 (coding) | |||
| 4053 | src = src_base; | 4145 | src = src_base; |
| 4054 | consumed_chars = consumed_chars_base; | 4146 | consumed_chars = consumed_chars_base; |
| 4055 | ONE_MORE_BYTE (c); | 4147 | ONE_MORE_BYTE (c); |
| 4056 | *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); | 4148 | *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c); |
| 4057 | char_offset++; | 4149 | char_offset++; |
| 4058 | coding->errors++; | 4150 | coding->errors++; |
| 4059 | } | 4151 | } |
| @@ -4143,7 +4235,7 @@ encode_coding_sjis (coding) | |||
| 4143 | EMIT_ONE_ASCII_BYTE (code & 0x7F); | 4235 | EMIT_ONE_ASCII_BYTE (code & 0x7F); |
| 4144 | } | 4236 | } |
| 4145 | } | 4237 | } |
| 4146 | coding->result = CODING_RESULT_SUCCESS; | 4238 | record_conversion_result (coding, CODING_RESULT_SUCCESS); |
| 4147 | coding->produced_char += produced_chars; | 4239 | coding->produced_char += produced_chars; |
| 4148 | coding->produced = dst - coding->destination; | 4240 | coding->produced = dst - coding->destination; |
| 4149 | return 0; | 4241 | return 0; |
| @@ -4214,7 +4306,7 @@ encode_coding_big5 (coding) | |||
| 4214 | EMIT_ONE_ASCII_BYTE (code & 0x7F); | 4306 | EMIT_ONE_ASCII_BYTE (code & 0x7F); |
| 4215 | } | 4307 | } |
| 4216 | } | 4308 | } |
| 4217 | coding->result = CODING_RESULT_SUCCESS; | 4309 | record_conversion_result (coding, CODING_RESULT_SUCCESS); |
| 4218 | coding->produced_char += produced_chars; | 4310 | coding->produced_char += produced_chars; |
| 4219 | coding->produced = dst - coding->destination; | 4311 | coding->produced = dst - coding->destination; |
| 4220 | return 0; | 4312 | return 0; |
| @@ -4233,7 +4325,7 @@ detect_coding_ccl (coding, detect_info) | |||
| 4233 | struct coding_system *coding; | 4325 | struct coding_system *coding; |
| 4234 | struct coding_detection_info *detect_info; | 4326 | struct coding_detection_info *detect_info; |
| 4235 | { | 4327 | { |
| 4236 | const unsigned char *src = coding->source, *src_base = src; | 4328 | const unsigned char *src = coding->source, *src_base; |
| 4237 | const unsigned char *src_end = coding->source + coding->src_bytes; | 4329 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 4238 | int multibytep = coding->src_multibyte; | 4330 | int multibytep = coding->src_multibyte; |
| 4239 | int consumed_chars = 0; | 4331 | int consumed_chars = 0; |
| @@ -4252,8 +4344,10 @@ detect_coding_ccl (coding, detect_info) | |||
| 4252 | while (1) | 4344 | while (1) |
| 4253 | { | 4345 | { |
| 4254 | int c; | 4346 | int c; |
| 4347 | |||
| 4348 | src_base = src; | ||
| 4255 | ONE_MORE_BYTE (c); | 4349 | ONE_MORE_BYTE (c); |
| 4256 | if (! valids[c]) | 4350 | if (c < 0 || ! valids[c]) |
| 4257 | break; | 4351 | break; |
| 4258 | if ((valids[c] > 1)) | 4352 | if ((valids[c] > 1)) |
| 4259 | found = CATEGORY_MASK_CCL; | 4353 | found = CATEGORY_MASK_CCL; |
| @@ -4329,16 +4423,16 @@ decode_coding_ccl (coding) | |||
| 4329 | switch (ccl.status) | 4423 | switch (ccl.status) |
| 4330 | { | 4424 | { |
| 4331 | case CCL_STAT_SUSPEND_BY_SRC: | 4425 | case CCL_STAT_SUSPEND_BY_SRC: |
| 4332 | coding->result = CODING_RESULT_INSUFFICIENT_SRC; | 4426 | record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC); |
| 4333 | break; | 4427 | break; |
| 4334 | case CCL_STAT_SUSPEND_BY_DST: | 4428 | case CCL_STAT_SUSPEND_BY_DST: |
| 4335 | break; | 4429 | break; |
| 4336 | case CCL_STAT_QUIT: | 4430 | case CCL_STAT_QUIT: |
| 4337 | case CCL_STAT_INVALID_CMD: | 4431 | case CCL_STAT_INVALID_CMD: |
| 4338 | coding->result = CODING_RESULT_INTERRUPT; | 4432 | record_conversion_result (coding, CODING_RESULT_INTERRUPT); |
| 4339 | break; | 4433 | break; |
| 4340 | default: | 4434 | default: |
| 4341 | coding->result = CODING_RESULT_SUCCESS; | 4435 | record_conversion_result (coding, CODING_RESULT_SUCCESS); |
| 4342 | break; | 4436 | break; |
| 4343 | } | 4437 | } |
| 4344 | coding->consumed_char += consumed_chars; | 4438 | coding->consumed_char += consumed_chars; |
| @@ -4390,17 +4484,17 @@ encode_coding_ccl (coding) | |||
| 4390 | switch (ccl.status) | 4484 | switch (ccl.status) |
| 4391 | { | 4485 | { |
| 4392 | case CCL_STAT_SUSPEND_BY_SRC: | 4486 | case CCL_STAT_SUSPEND_BY_SRC: |
| 4393 | coding->result = CODING_RESULT_INSUFFICIENT_SRC; | 4487 | record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC); |
| 4394 | break; | 4488 | break; |
| 4395 | case CCL_STAT_SUSPEND_BY_DST: | 4489 | case CCL_STAT_SUSPEND_BY_DST: |
| 4396 | coding->result = CODING_RESULT_INSUFFICIENT_DST; | 4490 | record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST); |
| 4397 | break; | 4491 | break; |
| 4398 | case CCL_STAT_QUIT: | 4492 | case CCL_STAT_QUIT: |
| 4399 | case CCL_STAT_INVALID_CMD: | 4493 | case CCL_STAT_INVALID_CMD: |
| 4400 | coding->result = CODING_RESULT_INTERRUPT; | 4494 | record_conversion_result (coding, CODING_RESULT_INTERRUPT); |
| 4401 | break; | 4495 | break; |
| 4402 | default: | 4496 | default: |
| 4403 | coding->result = CODING_RESULT_SUCCESS; | 4497 | record_conversion_result (coding, CODING_RESULT_SUCCESS); |
| 4404 | break; | 4498 | break; |
| 4405 | } | 4499 | } |
| 4406 | 4500 | ||
| @@ -4422,7 +4516,7 @@ decode_coding_raw_text (coding) | |||
| 4422 | coding->chars_at_source = 1; | 4516 | coding->chars_at_source = 1; |
| 4423 | coding->consumed_char = 0; | 4517 | coding->consumed_char = 0; |
| 4424 | coding->consumed = 0; | 4518 | coding->consumed = 0; |
| 4425 | coding->result = CODING_RESULT_SUCCESS; | 4519 | record_conversion_result (coding, CODING_RESULT_SUCCESS); |
| 4426 | } | 4520 | } |
| 4427 | 4521 | ||
| 4428 | static int | 4522 | static int |
| @@ -4500,7 +4594,7 @@ encode_coding_raw_text (coding) | |||
| 4500 | produced_chars = dst - (coding->destination + coding->dst_bytes); | 4594 | produced_chars = dst - (coding->destination + coding->dst_bytes); |
| 4501 | } | 4595 | } |
| 4502 | } | 4596 | } |
| 4503 | coding->result = CODING_RESULT_SUCCESS; | 4597 | record_conversion_result (coding, CODING_RESULT_SUCCESS); |
| 4504 | coding->produced_char += produced_chars; | 4598 | coding->produced_char += produced_chars; |
| 4505 | coding->produced = dst - coding->destination; | 4599 | coding->produced = dst - coding->destination; |
| 4506 | return 0; | 4600 | return 0; |
| @@ -4515,7 +4609,7 @@ detect_coding_charset (coding, detect_info) | |||
| 4515 | struct coding_system *coding; | 4609 | struct coding_system *coding; |
| 4516 | struct coding_detection_info *detect_info; | 4610 | struct coding_detection_info *detect_info; |
| 4517 | { | 4611 | { |
| 4518 | const unsigned char *src = coding->source, *src_base = src; | 4612 | const unsigned char *src = coding->source, *src_base; |
| 4519 | const unsigned char *src_end = coding->source + coding->src_bytes; | 4613 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 4520 | int multibytep = coding->src_multibyte; | 4614 | int multibytep = coding->src_multibyte; |
| 4521 | int consumed_chars = 0; | 4615 | int consumed_chars = 0; |
| @@ -4535,7 +4629,10 @@ detect_coding_charset (coding, detect_info) | |||
| 4535 | { | 4629 | { |
| 4536 | int c; | 4630 | int c; |
| 4537 | 4631 | ||
| 4632 | src_base = src; | ||
| 4538 | ONE_MORE_BYTE (c); | 4633 | ONE_MORE_BYTE (c); |
| 4634 | if (c < 0) | ||
| 4635 | continue; | ||
| 4539 | if (NILP (AREF (valids, c))) | 4636 | if (NILP (AREF (valids, c))) |
| 4540 | break; | 4637 | break; |
| 4541 | if (c >= 0x80) | 4638 | if (c >= 0x80) |
| @@ -4584,6 +4681,8 @@ decode_coding_charset (coding) | |||
| 4584 | break; | 4681 | break; |
| 4585 | 4682 | ||
| 4586 | ONE_MORE_BYTE (c); | 4683 | ONE_MORE_BYTE (c); |
| 4684 | if (c < 0) | ||
| 4685 | goto invalid_code; | ||
| 4587 | code = c; | 4686 | code = c; |
| 4588 | 4687 | ||
| 4589 | val = AREF (valids, c); | 4688 | val = AREF (valids, c); |
| @@ -4643,7 +4742,7 @@ decode_coding_charset (coding) | |||
| 4643 | src = src_base; | 4742 | src = src_base; |
| 4644 | consumed_chars = consumed_chars_base; | 4743 | consumed_chars = consumed_chars_base; |
| 4645 | ONE_MORE_BYTE (c); | 4744 | ONE_MORE_BYTE (c); |
| 4646 | *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); | 4745 | *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); |
| 4647 | char_offset++; | 4746 | char_offset++; |
| 4648 | coding->errors++; | 4747 | coding->errors++; |
| 4649 | } | 4748 | } |
| @@ -4714,7 +4813,7 @@ encode_coding_charset (coding) | |||
| 4714 | } | 4813 | } |
| 4715 | } | 4814 | } |
| 4716 | 4815 | ||
| 4717 | coding->result = CODING_RESULT_SUCCESS; | 4816 | record_conversion_result (coding, CODING_RESULT_SUCCESS); |
| 4718 | coding->produced_char += produced_chars; | 4817 | coding->produced_char += produced_chars; |
| 4719 | coding->produced = dst - coding->destination; | 4818 | coding->produced = dst - coding->destination; |
| 4720 | return 0; | 4819 | return 0; |
| @@ -5480,7 +5579,8 @@ produce_chars (coding) | |||
| 5480 | { | 5579 | { |
| 5481 | if (src == src_end) | 5580 | if (src == src_end) |
| 5482 | { | 5581 | { |
| 5483 | coding->result = CODING_RESULT_INSUFFICIENT_SRC; | 5582 | record_conversion_result |
| 5583 | (coding, CODING_RESULT_INSUFFICIENT_SRC); | ||
| 5484 | goto no_more_source; | 5584 | goto no_more_source; |
| 5485 | } | 5585 | } |
| 5486 | if (*src == '\n') | 5586 | if (*src == '\n') |
| @@ -5669,7 +5769,7 @@ produce_charset (coding, charbuf) | |||
| 5669 | } \ | 5769 | } \ |
| 5670 | if (! coding->charbuf) \ | 5770 | if (! coding->charbuf) \ |
| 5671 | { \ | 5771 | { \ |
| 5672 | coding->result = CODING_RESULT_INSUFFICIENT_MEM; \ | 5772 | record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \ |
| 5673 | return coding->result; \ | 5773 | return coding->result; \ |
| 5674 | } \ | 5774 | } \ |
| 5675 | coding->charbuf_size = size; \ | 5775 | coding->charbuf_size = size; \ |
| @@ -5759,7 +5859,7 @@ decode_coding (coding) | |||
| 5759 | coding->consumed = coding->consumed_char = 0; | 5859 | coding->consumed = coding->consumed_char = 0; |
| 5760 | coding->produced = coding->produced_char = 0; | 5860 | coding->produced = coding->produced_char = 0; |
| 5761 | coding->chars_at_source = 0; | 5861 | coding->chars_at_source = 0; |
| 5762 | coding->result = CODING_RESULT_SUCCESS; | 5862 | record_conversion_result (coding, CODING_RESULT_SUCCESS); |
| 5763 | coding->errors = 0; | 5863 | coding->errors = 0; |
| 5764 | 5864 | ||
| 5765 | ALLOC_CONVERSION_WORK_AREA (coding); | 5865 | ALLOC_CONVERSION_WORK_AREA (coding); |
| @@ -5798,6 +5898,7 @@ decode_coding (coding) | |||
| 5798 | /* Flush out unprocessed data as binary chars. We are sure | 5898 | /* Flush out unprocessed data as binary chars. We are sure |
| 5799 | that the number of data is less than the size of | 5899 | that the number of data is less than the size of |
| 5800 | coding->charbuf. */ | 5900 | coding->charbuf. */ |
| 5901 | coding->charbuf_used = 0; | ||
| 5801 | while (nbytes-- > 0) | 5902 | while (nbytes-- > 0) |
| 5802 | { | 5903 | { |
| 5803 | int c = *src++; | 5904 | int c = *src++; |
| @@ -6076,7 +6177,7 @@ encode_coding (coding) | |||
| 6076 | 6177 | ||
| 6077 | coding->consumed = coding->consumed_char = 0; | 6178 | coding->consumed = coding->consumed_char = 0; |
| 6078 | coding->produced = coding->produced_char = 0; | 6179 | coding->produced = coding->produced_char = 0; |
| 6079 | coding->result = CODING_RESULT_SUCCESS; | 6180 | record_conversion_result (coding, CODING_RESULT_SUCCESS); |
| 6080 | coding->errors = 0; | 6181 | coding->errors = 0; |
| 6081 | 6182 | ||
| 6082 | ALLOC_CONVERSION_WORK_AREA (coding); | 6183 | ALLOC_CONVERSION_WORK_AREA (coding); |
| @@ -6125,10 +6226,17 @@ make_conversion_work_buffer (multibyte) | |||
| 6125 | struct buffer *current; | 6226 | struct buffer *current; |
| 6126 | 6227 | ||
| 6127 | if (reused_workbuf_in_use++) | 6228 | if (reused_workbuf_in_use++) |
| 6128 | name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil); | 6229 | { |
| 6230 | name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil); | ||
| 6231 | workbuf = Fget_buffer_create (name); | ||
| 6232 | } | ||
| 6129 | else | 6233 | else |
| 6130 | name = Vcode_conversion_workbuf_name; | 6234 | { |
| 6131 | workbuf = Fget_buffer_create (name); | 6235 | name = Vcode_conversion_workbuf_name; |
| 6236 | workbuf = Fget_buffer_create (name); | ||
| 6237 | if (NILP (Vcode_conversion_reused_workbuf)) | ||
| 6238 | Vcode_conversion_reused_workbuf = workbuf; | ||
| 6239 | } | ||
| 6132 | current = current_buffer; | 6240 | current = current_buffer; |
| 6133 | set_buffer_internal (XBUFFER (workbuf)); | 6241 | set_buffer_internal (XBUFFER (workbuf)); |
| 6134 | Ferase_buffer (); | 6242 | Ferase_buffer (); |
| @@ -6389,7 +6497,8 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte, | |||
| 6389 | = (unsigned char *) xrealloc (destination, coding->produced); | 6497 | = (unsigned char *) xrealloc (destination, coding->produced); |
| 6390 | if (! destination) | 6498 | if (! destination) |
| 6391 | { | 6499 | { |
| 6392 | coding->result = CODING_RESULT_INSUFFICIENT_DST; | 6500 | record_conversion_result (coding, |
| 6501 | CODING_RESULT_INSUFFICIENT_DST); | ||
| 6393 | unbind_to (count, Qnil); | 6502 | unbind_to (count, Qnil); |
| 6394 | return; | 6503 | return; |
| 6395 | } | 6504 | } |
| @@ -6419,7 +6528,7 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte, | |||
| 6419 | saved_pt_byte + (coding->produced - bytes)); | 6528 | saved_pt_byte + (coding->produced - bytes)); |
| 6420 | } | 6529 | } |
| 6421 | 6530 | ||
| 6422 | unbind_to (count, Qnil); | 6531 | unbind_to (count, coding->dst_object); |
| 6423 | } | 6532 | } |
| 6424 | 6533 | ||
| 6425 | 6534 | ||
| @@ -7363,9 +7472,6 @@ code_convert_region (start, end, coding_system, dst_object, encodep, norecord) | |||
| 7363 | if (! norecord) | 7472 | if (! norecord) |
| 7364 | Vlast_coding_system_used = CODING_ID_NAME (coding.id); | 7473 | Vlast_coding_system_used = CODING_ID_NAME (coding.id); |
| 7365 | 7474 | ||
| 7366 | if (coding.result != CODING_RESULT_SUCCESS) | ||
| 7367 | error ("Code conversion error: %d", coding.result); | ||
| 7368 | |||
| 7369 | return (BUFFERP (dst_object) | 7475 | return (BUFFERP (dst_object) |
| 7370 | ? make_number (coding.produced_char) | 7476 | ? make_number (coding.produced_char) |
| 7371 | : coding.dst_object); | 7477 | : coding.dst_object); |
| @@ -7453,9 +7559,6 @@ code_convert_string (string, coding_system, dst_object, | |||
| 7453 | if (! norecord) | 7559 | if (! norecord) |
| 7454 | Vlast_coding_system_used = CODING_ID_NAME (coding.id); | 7560 | Vlast_coding_system_used = CODING_ID_NAME (coding.id); |
| 7455 | 7561 | ||
| 7456 | if (coding.result != CODING_RESULT_SUCCESS) | ||
| 7457 | error ("Code conversion error: %d", coding.result); | ||
| 7458 | |||
| 7459 | return (BUFFERP (dst_object) | 7562 | return (BUFFERP (dst_object) |
| 7460 | ? make_number (coding.produced_char) | 7563 | ? make_number (coding.produced_char) |
| 7461 | : coding.dst_object); | 7564 | : coding.dst_object); |
| @@ -8740,6 +8843,12 @@ syms_of_coding () | |||
| 8740 | ASET (Vcoding_category_table, coding_category_undecided, | 8843 | ASET (Vcoding_category_table, coding_category_undecided, |
| 8741 | intern ("coding-category-undecided")); | 8844 | intern ("coding-category-undecided")); |
| 8742 | 8845 | ||
| 8846 | DEFSYM (Qinsufficient_source, "insufficient-source"); | ||
| 8847 | DEFSYM (Qinconsistent_eol, "inconsistent-eol"); | ||
| 8848 | DEFSYM (Qinvalid_source, "invalid-source"); | ||
| 8849 | DEFSYM (Qinterrupted, "interrupted"); | ||
| 8850 | DEFSYM (Qinsufficient_memory, "insufficient-memory"); | ||
| 8851 | |||
| 8743 | defsubr (&Scoding_system_p); | 8852 | defsubr (&Scoding_system_p); |
| 8744 | defsubr (&Sread_coding_system); | 8853 | defsubr (&Sread_coding_system); |
| 8745 | defsubr (&Sread_non_nil_coding_system); | 8854 | defsubr (&Sread_non_nil_coding_system); |
| @@ -8835,6 +8944,23 @@ the value of `buffer-file-coding-system' is used. */); | |||
| 8835 | Coding system used in the latest file or process I/O. */); | 8944 | Coding system used in the latest file or process I/O. */); |
| 8836 | Vlast_coding_system_used = Qnil; | 8945 | Vlast_coding_system_used = Qnil; |
| 8837 | 8946 | ||
| 8947 | DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error, | ||
| 8948 | doc: /* | ||
| 8949 | Error status of the last code conversion. | ||
| 8950 | |||
| 8951 | When an error was detected in the last code conversion, this variable | ||
| 8952 | is set to one of the following symbols. | ||
| 8953 | `insufficient-source' | ||
| 8954 | `inconsistent-eol' | ||
| 8955 | `invalid-source' | ||
| 8956 | `interrupted' | ||
| 8957 | `insufficient-memory' | ||
| 8958 | When no error was detected, the value doesn't change. So, to check | ||
| 8959 | the error status of a code conversion by this variable, you must | ||
| 8960 | explicitly set this variable to nil before performing code | ||
| 8961 | conversion. */); | ||
| 8962 | Vlast_code_conversion_error = Qnil; | ||
| 8963 | |||
| 8838 | DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion, | 8964 | DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion, |
| 8839 | doc: /* | 8965 | doc: /* |
| 8840 | *Non-nil means always inhibit code conversion of end-of-line format. | 8966 | *Non-nil means always inhibit code conversion of end-of-line format. |