diff options
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 726 |
1 files changed, 331 insertions, 395 deletions
diff --git a/src/coding.c b/src/coding.c index 3a3ba11ee9d..04985ab3c74 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -55,8 +55,8 @@ CODING SYSTEM | |||
| 55 | character sequence of emacs-utf-8 to a byte sequence of a specific | 55 | character sequence of emacs-utf-8 to a byte sequence of a specific |
| 56 | coding system. | 56 | coding system. |
| 57 | 57 | ||
| 58 | In Emacs Lisp, a coding system is represented by a Lisp symbol. In | 58 | In Emacs Lisp, a coding system is represented by a Lisp symbol. On |
| 59 | C level, a coding system is represented by a vector of attributes | 59 | the C level, a coding system is represented by a vector of attributes |
| 60 | stored in the hash table Vcharset_hash_table. The conversion from | 60 | stored in the hash table Vcharset_hash_table. The conversion from |
| 61 | coding system symbol to attributes vector is done by looking up | 61 | coding system symbol to attributes vector is done by looking up |
| 62 | Vcharset_hash_table by the symbol. | 62 | Vcharset_hash_table by the symbol. |
| @@ -159,7 +159,7 @@ detect_coding_XXX (struct coding_system *coding, | |||
| 159 | const unsigned char *src = coding->source; | 159 | const unsigned char *src = coding->source; |
| 160 | const unsigned char *src_end = coding->source + coding->src_bytes; | 160 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 161 | int multibytep = coding->src_multibyte; | 161 | int multibytep = coding->src_multibyte; |
| 162 | int consumed_chars = 0; | 162 | EMACS_INT consumed_chars = 0; |
| 163 | int found = 0; | 163 | int found = 0; |
| 164 | ...; | 164 | ...; |
| 165 | 165 | ||
| @@ -266,7 +266,7 @@ encode_coding_XXX (struct coding_system *coding) | |||
| 266 | unsigned char *dst = coding->destination + coding->produced; | 266 | unsigned char *dst = coding->destination + coding->produced; |
| 267 | unsigned char *dst_end = coding->destination + coding->dst_bytes; | 267 | unsigned char *dst_end = coding->destination + coding->dst_bytes; |
| 268 | unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_; | 268 | unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_; |
| 269 | int produced_chars = 0; | 269 | EMACS_INT produced_chars = 0; |
| 270 | 270 | ||
| 271 | for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++) | 271 | for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++) |
| 272 | { | 272 | { |
| @@ -300,27 +300,30 @@ encode_coding_XXX (struct coding_system *coding) | |||
| 300 | 300 | ||
| 301 | Lisp_Object Vcoding_system_hash_table; | 301 | Lisp_Object Vcoding_system_hash_table; |
| 302 | 302 | ||
| 303 | Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type; | 303 | static Lisp_Object Qcoding_system, Qeol_type; |
| 304 | static Lisp_Object Qcoding_aliases; | ||
| 304 | Lisp_Object Qunix, Qdos; | 305 | Lisp_Object Qunix, Qdos; |
| 305 | Lisp_Object Qbuffer_file_coding_system; | 306 | Lisp_Object Qbuffer_file_coding_system; |
| 306 | Lisp_Object Qpost_read_conversion, Qpre_write_conversion; | 307 | static Lisp_Object Qpost_read_conversion, Qpre_write_conversion; |
| 307 | Lisp_Object Qdefault_char; | 308 | static Lisp_Object Qdefault_char; |
| 308 | Lisp_Object Qno_conversion, Qundecided; | 309 | Lisp_Object Qno_conversion, Qundecided; |
| 309 | Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5; | 310 | Lisp_Object Qcharset, Qutf_8; |
| 310 | Lisp_Object Qbig, Qlittle; | 311 | static Lisp_Object Qiso_2022; |
| 311 | Lisp_Object Qcoding_system_history; | 312 | static Lisp_Object Qutf_16, Qshift_jis, Qbig5; |
| 312 | Lisp_Object Qvalid_codes; | 313 | static Lisp_Object Qbig, Qlittle; |
| 313 | Lisp_Object QCcategory, QCmnemonic, QCdefault_char; | 314 | static Lisp_Object Qcoding_system_history; |
| 314 | Lisp_Object QCdecode_translation_table, QCencode_translation_table; | 315 | static Lisp_Object Qvalid_codes; |
| 315 | Lisp_Object QCpost_read_conversion, QCpre_write_conversion; | 316 | static Lisp_Object QCcategory, QCmnemonic, QCdefault_char; |
| 316 | Lisp_Object QCascii_compatible_p; | 317 | static Lisp_Object QCdecode_translation_table, QCencode_translation_table; |
| 318 | static Lisp_Object QCpost_read_conversion, QCpre_write_conversion; | ||
| 319 | static Lisp_Object QCascii_compatible_p; | ||
| 317 | 320 | ||
| 318 | Lisp_Object Qcall_process, Qcall_process_region; | 321 | Lisp_Object Qcall_process, Qcall_process_region; |
| 319 | Lisp_Object Qstart_process, Qopen_network_stream; | 322 | Lisp_Object Qstart_process, Qopen_network_stream; |
| 320 | Lisp_Object Qtarget_idx; | 323 | static Lisp_Object Qtarget_idx; |
| 321 | 324 | ||
| 322 | Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source; | 325 | static Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source; |
| 323 | Lisp_Object Qinterrupted, Qinsufficient_memory; | 326 | static Lisp_Object Qinterrupted, Qinsufficient_memory; |
| 324 | 327 | ||
| 325 | /* If a symbol has this property, evaluate the value to define the | 328 | /* If a symbol has this property, evaluate the value to define the |
| 326 | symbol as a coding system. */ | 329 | symbol as a coding system. */ |
| @@ -351,12 +354,12 @@ struct coding_system safe_terminal_coding; | |||
| 351 | 354 | ||
| 352 | Lisp_Object Qtranslation_table; | 355 | Lisp_Object Qtranslation_table; |
| 353 | Lisp_Object Qtranslation_table_id; | 356 | Lisp_Object Qtranslation_table_id; |
| 354 | Lisp_Object Qtranslation_table_for_decode; | 357 | static Lisp_Object Qtranslation_table_for_decode; |
| 355 | Lisp_Object Qtranslation_table_for_encode; | 358 | static Lisp_Object Qtranslation_table_for_encode; |
| 356 | 359 | ||
| 357 | /* Two special coding systems. */ | 360 | /* Two special coding systems. */ |
| 358 | Lisp_Object Vsjis_coding_system; | 361 | static Lisp_Object Vsjis_coding_system; |
| 359 | Lisp_Object Vbig5_coding_system; | 362 | static Lisp_Object Vbig5_coding_system; |
| 360 | 363 | ||
| 361 | /* ISO2022 section */ | 364 | /* ISO2022 section */ |
| 362 | 365 | ||
| @@ -395,8 +398,6 @@ Lisp_Object Vbig5_coding_system; | |||
| 395 | 398 | ||
| 396 | /* Control characters of ISO2022. */ | 399 | /* Control characters of ISO2022. */ |
| 397 | /* code */ /* function */ | 400 | /* code */ /* function */ |
| 398 | #define ISO_CODE_LF 0x0A /* line-feed */ | ||
| 399 | #define ISO_CODE_CR 0x0D /* carriage-return */ | ||
| 400 | #define ISO_CODE_SO 0x0E /* shift-out */ | 401 | #define ISO_CODE_SO 0x0E /* shift-out */ |
| 401 | #define ISO_CODE_SI 0x0F /* shift-in */ | 402 | #define ISO_CODE_SI 0x0F /* shift-in */ |
| 402 | #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */ | 403 | #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */ |
| @@ -479,7 +480,7 @@ enum iso_code_class_type | |||
| 479 | 480 | ||
| 480 | #define CODING_ISO_FLAG_COMPOSITION 0x2000 | 481 | #define CODING_ISO_FLAG_COMPOSITION 0x2000 |
| 481 | 482 | ||
| 482 | #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000 | 483 | /* #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000 */ |
| 483 | 484 | ||
| 484 | #define CODING_ISO_FLAG_USE_ROMAN 0x8000 | 485 | #define CODING_ISO_FLAG_USE_ROMAN 0x8000 |
| 485 | 486 | ||
| @@ -721,25 +722,6 @@ static struct coding_system coding_categories[coding_category_max]; | |||
| 721 | } while (0) | 722 | } while (0) |
| 722 | 723 | ||
| 723 | 724 | ||
| 724 | #define ONE_MORE_BYTE_NO_CHECK(c) \ | ||
| 725 | do { \ | ||
| 726 | c = *src++; \ | ||
| 727 | if (multibytep && (c & 0x80)) \ | ||
| 728 | { \ | ||
| 729 | if ((c & 0xFE) == 0xC0) \ | ||
| 730 | c = ((c & 1) << 6) | *src++; \ | ||
| 731 | else \ | ||
| 732 | { \ | ||
| 733 | src--; \ | ||
| 734 | c = - string_char (src, &src, NULL); \ | ||
| 735 | record_conversion_result \ | ||
| 736 | (coding, CODING_RESULT_INVALID_SRC); \ | ||
| 737 | } \ | ||
| 738 | } \ | ||
| 739 | consumed_chars++; \ | ||
| 740 | } while (0) | ||
| 741 | |||
| 742 | |||
| 743 | /* Store a byte C in the place pointed by DST and increment DST to the | 725 | /* Store a byte C in the place pointed by DST and increment DST to the |
| 744 | next free point, and increment PRODUCED_CHARS. The caller should | 726 | next free point, and increment PRODUCED_CHARS. The caller should |
| 745 | assure that C is 0..127, and declare and set the variable `dst' | 727 | assure that C is 0..127, and declare and set the variable `dst' |
| @@ -774,7 +756,7 @@ static struct coding_system coding_categories[coding_category_max]; | |||
| 774 | produced_chars++; \ | 756 | produced_chars++; \ |
| 775 | if (multibytep) \ | 757 | if (multibytep) \ |
| 776 | { \ | 758 | { \ |
| 777 | int ch = (c); \ | 759 | unsigned ch = (c); \ |
| 778 | if (ch >= 0x80) \ | 760 | if (ch >= 0x80) \ |
| 779 | ch = BYTE8_TO_CHAR (ch); \ | 761 | ch = BYTE8_TO_CHAR (ch); \ |
| 780 | CHAR_STRING_ADVANCE (ch, dst); \ | 762 | CHAR_STRING_ADVANCE (ch, dst); \ |
| @@ -791,7 +773,7 @@ static struct coding_system coding_categories[coding_category_max]; | |||
| 791 | produced_chars += 2; \ | 773 | produced_chars += 2; \ |
| 792 | if (multibytep) \ | 774 | if (multibytep) \ |
| 793 | { \ | 775 | { \ |
| 794 | int ch; \ | 776 | unsigned ch; \ |
| 795 | \ | 777 | \ |
| 796 | ch = (c1); \ | 778 | ch = (c1); \ |
| 797 | if (ch >= 0x80) \ | 779 | if (ch >= 0x80) \ |
| @@ -874,8 +856,7 @@ static unsigned char *alloc_destination (struct coding_system *, | |||
| 874 | EMACS_INT, unsigned char *); | 856 | EMACS_INT, unsigned char *); |
| 875 | static void setup_iso_safe_charsets (Lisp_Object); | 857 | static void setup_iso_safe_charsets (Lisp_Object); |
| 876 | static unsigned char *encode_designation_at_bol (struct coding_system *, | 858 | static unsigned char *encode_designation_at_bol (struct coding_system *, |
| 877 | int *, int *, | 859 | int *, unsigned char *); |
| 878 | unsigned char *); | ||
| 879 | static int detect_eol (const unsigned char *, | 860 | static int detect_eol (const unsigned char *, |
| 880 | EMACS_INT, enum coding_category); | 861 | EMACS_INT, enum coding_category); |
| 881 | static Lisp_Object adjust_coding_eol_type (struct coding_system *, int); | 862 | static Lisp_Object adjust_coding_eol_type (struct coding_system *, int); |
| @@ -883,21 +864,21 @@ static void decode_eol (struct coding_system *); | |||
| 883 | static Lisp_Object get_translation_table (Lisp_Object, int, int *); | 864 | static Lisp_Object get_translation_table (Lisp_Object, int, int *); |
| 884 | static Lisp_Object get_translation (Lisp_Object, int *, int *); | 865 | static Lisp_Object get_translation (Lisp_Object, int *, int *); |
| 885 | static int produce_chars (struct coding_system *, Lisp_Object, int); | 866 | static int produce_chars (struct coding_system *, Lisp_Object, int); |
| 886 | static INLINE void produce_charset (struct coding_system *, int *, | 867 | static inline void produce_charset (struct coding_system *, int *, |
| 887 | EMACS_INT); | 868 | EMACS_INT); |
| 888 | static void produce_annotation (struct coding_system *, EMACS_INT); | 869 | static void produce_annotation (struct coding_system *, EMACS_INT); |
| 889 | static int decode_coding (struct coding_system *); | 870 | static int decode_coding (struct coding_system *); |
| 890 | static INLINE int *handle_composition_annotation (EMACS_INT, EMACS_INT, | 871 | static inline int *handle_composition_annotation (EMACS_INT, EMACS_INT, |
| 891 | struct coding_system *, | 872 | struct coding_system *, |
| 892 | int *, EMACS_INT *); | 873 | int *, EMACS_INT *); |
| 893 | static INLINE int *handle_charset_annotation (EMACS_INT, EMACS_INT, | 874 | static inline int *handle_charset_annotation (EMACS_INT, EMACS_INT, |
| 894 | struct coding_system *, | 875 | struct coding_system *, |
| 895 | int *, EMACS_INT *); | 876 | int *, EMACS_INT *); |
| 896 | static void consume_chars (struct coding_system *, Lisp_Object, int); | 877 | static void consume_chars (struct coding_system *, Lisp_Object, int); |
| 897 | static int encode_coding (struct coding_system *); | 878 | static int encode_coding (struct coding_system *); |
| 898 | static Lisp_Object make_conversion_work_buffer (int); | 879 | static Lisp_Object make_conversion_work_buffer (int); |
| 899 | static Lisp_Object code_conversion_restore (Lisp_Object); | 880 | static Lisp_Object code_conversion_restore (Lisp_Object); |
| 900 | static INLINE int char_encodable_p (int, Lisp_Object); | 881 | static inline int char_encodable_p (int, Lisp_Object); |
| 901 | static Lisp_Object make_subsidiaries (Lisp_Object); | 882 | static Lisp_Object make_subsidiaries (Lisp_Object); |
| 902 | 883 | ||
| 903 | static void | 884 | static void |
| @@ -965,7 +946,7 @@ record_conversion_result (struct coding_system *coding, | |||
| 965 | do { \ | 946 | do { \ |
| 966 | if (dst + (bytes) >= dst_end) \ | 947 | if (dst + (bytes) >= dst_end) \ |
| 967 | { \ | 948 | { \ |
| 968 | int more_bytes = charbuf_end - charbuf + (bytes); \ | 949 | EMACS_INT more_bytes = charbuf_end - charbuf + (bytes); \ |
| 969 | \ | 950 | \ |
| 970 | dst = alloc_destination (coding, more_bytes, dst); \ | 951 | dst = alloc_destination (coding, more_bytes, dst); \ |
| 971 | dst_end = coding->destination + coding->dst_bytes; \ | 952 | dst_end = coding->destination + coding->dst_bytes; \ |
| @@ -1051,9 +1032,10 @@ coding_set_source (struct coding_system *coding) | |||
| 1051 | coding->source = SDATA (coding->src_object) + coding->src_pos_byte; | 1032 | coding->source = SDATA (coding->src_object) + coding->src_pos_byte; |
| 1052 | } | 1033 | } |
| 1053 | else | 1034 | else |
| 1054 | /* Otherwise, the source is C string and is never relocated | 1035 | { |
| 1055 | automatically. Thus we don't have to update anything. */ | 1036 | /* Otherwise, the source is C string and is never relocated |
| 1056 | ; | 1037 | automatically. Thus we don't have to update anything. */ |
| 1038 | } | ||
| 1057 | } | 1039 | } |
| 1058 | 1040 | ||
| 1059 | static void | 1041 | static void |
| @@ -1079,15 +1061,18 @@ coding_set_destination (struct coding_system *coding) | |||
| 1079 | } | 1061 | } |
| 1080 | } | 1062 | } |
| 1081 | else | 1063 | else |
| 1082 | /* Otherwise, the destination is C string and is never relocated | 1064 | { |
| 1083 | automatically. Thus we don't have to update anything. */ | 1065 | /* Otherwise, the destination is C string and is never relocated |
| 1084 | ; | 1066 | automatically. Thus we don't have to update anything. */ |
| 1067 | } | ||
| 1085 | } | 1068 | } |
| 1086 | 1069 | ||
| 1087 | 1070 | ||
| 1088 | static void | 1071 | static void |
| 1089 | coding_alloc_by_realloc (struct coding_system *coding, EMACS_INT bytes) | 1072 | coding_alloc_by_realloc (struct coding_system *coding, EMACS_INT bytes) |
| 1090 | { | 1073 | { |
| 1074 | if (STRING_BYTES_BOUND - coding->dst_bytes < bytes) | ||
| 1075 | string_overflow (); | ||
| 1091 | coding->destination = (unsigned char *) xrealloc (coding->destination, | 1076 | coding->destination = (unsigned char *) xrealloc (coding->destination, |
| 1092 | coding->dst_bytes + bytes); | 1077 | coding->dst_bytes + bytes); |
| 1093 | coding->dst_bytes += bytes; | 1078 | coding->dst_bytes += bytes; |
| @@ -1217,7 +1202,6 @@ alloc_destination (struct coding_system *coding, EMACS_INT nbytes, | |||
| 1217 | #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0) | 1202 | #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0) |
| 1218 | #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8) | 1203 | #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8) |
| 1219 | 1204 | ||
| 1220 | #define UTF_BOM 0xFEFF | ||
| 1221 | #define UTF_8_BOM_1 0xEF | 1205 | #define UTF_8_BOM_1 0xEF |
| 1222 | #define UTF_8_BOM_2 0xBB | 1206 | #define UTF_8_BOM_2 0xBB |
| 1223 | #define UTF_8_BOM_3 0xBF | 1207 | #define UTF_8_BOM_3 0xBF |
| @@ -1229,7 +1213,7 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1229 | const unsigned char *src = coding->source, *src_base; | 1213 | const unsigned char *src = coding->source, *src_base; |
| 1230 | const unsigned char *src_end = coding->source + coding->src_bytes; | 1214 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 1231 | int multibytep = coding->src_multibyte; | 1215 | int multibytep = coding->src_multibyte; |
| 1232 | int consumed_chars = 0; | 1216 | EMACS_INT consumed_chars = 0; |
| 1233 | int bom_found = 0; | 1217 | int bom_found = 0; |
| 1234 | int found = 0; | 1218 | int found = 0; |
| 1235 | 1219 | ||
| @@ -1314,16 +1298,13 @@ decode_coding_utf_8 (struct coding_system *coding) | |||
| 1314 | const unsigned char *src_base; | 1298 | const unsigned char *src_base; |
| 1315 | int *charbuf = coding->charbuf + coding->charbuf_used; | 1299 | int *charbuf = coding->charbuf + coding->charbuf_used; |
| 1316 | int *charbuf_end = coding->charbuf + coding->charbuf_size; | 1300 | int *charbuf_end = coding->charbuf + coding->charbuf_size; |
| 1317 | int consumed_chars = 0, consumed_chars_base = 0; | 1301 | EMACS_INT consumed_chars = 0, consumed_chars_base = 0; |
| 1318 | int multibytep = coding->src_multibyte; | 1302 | int multibytep = coding->src_multibyte; |
| 1319 | enum utf_bom_type bom = CODING_UTF_8_BOM (coding); | 1303 | enum utf_bom_type bom = CODING_UTF_8_BOM (coding); |
| 1320 | Lisp_Object attr, charset_list; | 1304 | int eol_dos = |
| 1321 | int eol_crlf = | ||
| 1322 | !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | 1305 | !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); |
| 1323 | int byte_after_cr = -1; | 1306 | int byte_after_cr = -1; |
| 1324 | 1307 | ||
| 1325 | CODING_GET_INFO (coding, attr, charset_list); | ||
| 1326 | |||
| 1327 | if (bom != utf_without_bom) | 1308 | if (bom != utf_without_bom) |
| 1328 | { | 1309 | { |
| 1329 | int c1, c2, c3; | 1310 | int c1, c2, c3; |
| @@ -1379,7 +1360,7 @@ decode_coding_utf_8 (struct coding_system *coding) | |||
| 1379 | } | 1360 | } |
| 1380 | else if (UTF_8_1_OCTET_P (c1)) | 1361 | else if (UTF_8_1_OCTET_P (c1)) |
| 1381 | { | 1362 | { |
| 1382 | if (eol_crlf && c1 == '\r') | 1363 | if (eol_dos && c1 == '\r') |
| 1383 | ONE_MORE_BYTE (byte_after_cr); | 1364 | ONE_MORE_BYTE (byte_after_cr); |
| 1384 | c = c1; | 1365 | c = c1; |
| 1385 | } | 1366 | } |
| @@ -1468,7 +1449,7 @@ encode_coding_utf_8 (struct coding_system *coding) | |||
| 1468 | int *charbuf_end = charbuf + coding->charbuf_used; | 1449 | int *charbuf_end = charbuf + coding->charbuf_used; |
| 1469 | unsigned char *dst = coding->destination + coding->produced; | 1450 | unsigned char *dst = coding->destination + coding->produced; |
| 1470 | unsigned char *dst_end = coding->destination + coding->dst_bytes; | 1451 | unsigned char *dst_end = coding->destination + coding->dst_bytes; |
| 1471 | int produced_chars = 0; | 1452 | EMACS_INT produced_chars = 0; |
| 1472 | int c; | 1453 | int c; |
| 1473 | 1454 | ||
| 1474 | if (CODING_UTF_8_BOM (coding) == utf_with_bom) | 1455 | if (CODING_UTF_8_BOM (coding) == utf_with_bom) |
| @@ -1533,11 +1514,6 @@ encode_coding_utf_8 (struct coding_system *coding) | |||
| 1533 | #define UTF_16_LOW_SURROGATE_P(val) \ | 1514 | #define UTF_16_LOW_SURROGATE_P(val) \ |
| 1534 | (((val) & 0xFC00) == 0xDC00) | 1515 | (((val) & 0xFC00) == 0xDC00) |
| 1535 | 1516 | ||
| 1536 | #define UTF_16_INVALID_P(val) \ | ||
| 1537 | (((val) == 0xFFFE) \ | ||
| 1538 | || ((val) == 0xFFFF) \ | ||
| 1539 | || UTF_16_LOW_SURROGATE_P (val)) | ||
| 1540 | |||
| 1541 | 1517 | ||
| 1542 | static int | 1518 | static int |
| 1543 | detect_coding_utf_16 (struct coding_system *coding, | 1519 | detect_coding_utf_16 (struct coding_system *coding, |
| @@ -1631,18 +1607,15 @@ decode_coding_utf_16 (struct coding_system *coding) | |||
| 1631 | int *charbuf = coding->charbuf + coding->charbuf_used; | 1607 | int *charbuf = coding->charbuf + coding->charbuf_used; |
| 1632 | /* We may produces at most 3 chars in one loop. */ | 1608 | /* We may produces at most 3 chars in one loop. */ |
| 1633 | int *charbuf_end = coding->charbuf + coding->charbuf_size - 2; | 1609 | int *charbuf_end = coding->charbuf + coding->charbuf_size - 2; |
| 1634 | int consumed_chars = 0, consumed_chars_base = 0; | 1610 | EMACS_INT consumed_chars = 0, consumed_chars_base = 0; |
| 1635 | int multibytep = coding->src_multibyte; | 1611 | int multibytep = coding->src_multibyte; |
| 1636 | enum utf_bom_type bom = CODING_UTF_16_BOM (coding); | 1612 | enum utf_bom_type bom = CODING_UTF_16_BOM (coding); |
| 1637 | enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding); | 1613 | enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding); |
| 1638 | int surrogate = CODING_UTF_16_SURROGATE (coding); | 1614 | int surrogate = CODING_UTF_16_SURROGATE (coding); |
| 1639 | Lisp_Object attr, charset_list; | 1615 | int eol_dos = |
| 1640 | int eol_crlf = | ||
| 1641 | !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | 1616 | !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); |
| 1642 | int byte_after_cr1 = -1, byte_after_cr2 = -1; | 1617 | int byte_after_cr1 = -1, byte_after_cr2 = -1; |
| 1643 | 1618 | ||
| 1644 | CODING_GET_INFO (coding, attr, charset_list); | ||
| 1645 | |||
| 1646 | if (bom == utf_with_bom) | 1619 | if (bom == utf_with_bom) |
| 1647 | { | 1620 | { |
| 1648 | int c, c1, c2; | 1621 | int c, c1, c2; |
| @@ -1734,7 +1707,7 @@ decode_coding_utf_16 (struct coding_system *coding) | |||
| 1734 | CODING_UTF_16_SURROGATE (coding) = surrogate = c; | 1707 | CODING_UTF_16_SURROGATE (coding) = surrogate = c; |
| 1735 | else | 1708 | else |
| 1736 | { | 1709 | { |
| 1737 | if (eol_crlf && c == '\r') | 1710 | if (eol_dos && c == '\r') |
| 1738 | { | 1711 | { |
| 1739 | ONE_MORE_BYTE (byte_after_cr1); | 1712 | ONE_MORE_BYTE (byte_after_cr1); |
| 1740 | ONE_MORE_BYTE (byte_after_cr2); | 1713 | ONE_MORE_BYTE (byte_after_cr2); |
| @@ -1761,12 +1734,9 @@ encode_coding_utf_16 (struct coding_system *coding) | |||
| 1761 | int safe_room = 8; | 1734 | int safe_room = 8; |
| 1762 | enum utf_bom_type bom = CODING_UTF_16_BOM (coding); | 1735 | enum utf_bom_type bom = CODING_UTF_16_BOM (coding); |
| 1763 | int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian; | 1736 | int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian; |
| 1764 | int produced_chars = 0; | 1737 | EMACS_INT produced_chars = 0; |
| 1765 | Lisp_Object attrs, charset_list; | ||
| 1766 | int c; | 1738 | int c; |
| 1767 | 1739 | ||
| 1768 | CODING_GET_INFO (coding, attrs, charset_list); | ||
| 1769 | |||
| 1770 | if (bom != utf_without_bom) | 1740 | if (bom != utf_without_bom) |
| 1771 | { | 1741 | { |
| 1772 | ASSURE_DESTINATION (safe_room); | 1742 | ASSURE_DESTINATION (safe_room); |
| @@ -1898,7 +1868,7 @@ detect_coding_emacs_mule (struct coding_system *coding, | |||
| 1898 | const unsigned char *src = coding->source, *src_base; | 1868 | const unsigned char *src = coding->source, *src_base; |
| 1899 | const unsigned char *src_end = coding->source + coding->src_bytes; | 1869 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 1900 | int multibytep = coding->src_multibyte; | 1870 | int multibytep = coding->src_multibyte; |
| 1901 | int consumed_chars = 0; | 1871 | EMACS_INT consumed_chars = 0; |
| 1902 | int c; | 1872 | int c; |
| 1903 | int found = 0; | 1873 | int found = 0; |
| 1904 | 1874 | ||
| @@ -1918,17 +1888,17 @@ detect_coding_emacs_mule (struct coding_system *coding, | |||
| 1918 | it because analyzing it is too heavy for detecting. But, | 1888 | it because analyzing it is too heavy for detecting. But, |
| 1919 | at least, we check that the composite character | 1889 | at least, we check that the composite character |
| 1920 | constitutes of more than 4 bytes. */ | 1890 | constitutes of more than 4 bytes. */ |
| 1921 | const unsigned char *src_base; | 1891 | const unsigned char *src_start; |
| 1922 | 1892 | ||
| 1923 | repeat: | 1893 | repeat: |
| 1924 | src_base = src; | 1894 | src_start = src; |
| 1925 | do | 1895 | do |
| 1926 | { | 1896 | { |
| 1927 | ONE_MORE_BYTE (c); | 1897 | ONE_MORE_BYTE (c); |
| 1928 | } | 1898 | } |
| 1929 | while (c >= 0xA0); | 1899 | while (c >= 0xA0); |
| 1930 | 1900 | ||
| 1931 | if (src - src_base <= 4) | 1901 | if (src - src_start <= 4) |
| 1932 | break; | 1902 | break; |
| 1933 | found = CATEGORY_MASK_EMACS_MULE; | 1903 | found = CATEGORY_MASK_EMACS_MULE; |
| 1934 | if (c == 0x80) | 1904 | if (c == 0x80) |
| @@ -1980,7 +1950,7 @@ detect_coding_emacs_mule (struct coding_system *coding, | |||
| 1980 | the decoded character or rule. If an invalid byte is found, return | 1950 | the decoded character or rule. If an invalid byte is found, return |
| 1981 | -1. If SRC is too short, return -2. */ | 1951 | -1. If SRC is too short, return -2. */ |
| 1982 | 1952 | ||
| 1983 | int | 1953 | static int |
| 1984 | emacs_mule_char (struct coding_system *coding, const unsigned char *src, | 1954 | emacs_mule_char (struct coding_system *coding, const unsigned char *src, |
| 1985 | int *nbytes, int *nchars, int *id, | 1955 | int *nbytes, int *nchars, int *id, |
| 1986 | struct composition_status *cmp_status) | 1956 | struct composition_status *cmp_status) |
| @@ -1988,7 +1958,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src, | |||
| 1988 | const unsigned char *src_end = coding->source + coding->src_bytes; | 1958 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 1989 | const unsigned char *src_base = src; | 1959 | const unsigned char *src_base = src; |
| 1990 | int multibytep = coding->src_multibyte; | 1960 | int multibytep = coding->src_multibyte; |
| 1991 | int charset_id; | 1961 | int charset_ID; |
| 1992 | unsigned code; | 1962 | unsigned code; |
| 1993 | int c; | 1963 | int c; |
| 1994 | int consumed_chars = 0; | 1964 | int consumed_chars = 0; |
| @@ -1998,7 +1968,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src, | |||
| 1998 | if (c < 0) | 1968 | if (c < 0) |
| 1999 | { | 1969 | { |
| 2000 | c = -c; | 1970 | c = -c; |
| 2001 | charset_id = emacs_mule_charset[0]; | 1971 | charset_ID = emacs_mule_charset[0]; |
| 2002 | } | 1972 | } |
| 2003 | else | 1973 | else |
| 2004 | { | 1974 | { |
| @@ -2034,7 +2004,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src, | |||
| 2034 | switch (emacs_mule_bytes[c]) | 2004 | switch (emacs_mule_bytes[c]) |
| 2035 | { | 2005 | { |
| 2036 | case 2: | 2006 | case 2: |
| 2037 | if ((charset_id = emacs_mule_charset[c]) < 0) | 2007 | if ((charset_ID = emacs_mule_charset[c]) < 0) |
| 2038 | goto invalid_code; | 2008 | goto invalid_code; |
| 2039 | ONE_MORE_BYTE (c); | 2009 | ONE_MORE_BYTE (c); |
| 2040 | if (c < 0xA0) | 2010 | if (c < 0xA0) |
| @@ -2047,7 +2017,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src, | |||
| 2047 | || c == EMACS_MULE_LEADING_CODE_PRIVATE_12) | 2017 | || c == EMACS_MULE_LEADING_CODE_PRIVATE_12) |
| 2048 | { | 2018 | { |
| 2049 | ONE_MORE_BYTE (c); | 2019 | ONE_MORE_BYTE (c); |
| 2050 | if (c < 0xA0 || (charset_id = emacs_mule_charset[c]) < 0) | 2020 | if (c < 0xA0 || (charset_ID = emacs_mule_charset[c]) < 0) |
| 2051 | goto invalid_code; | 2021 | goto invalid_code; |
| 2052 | ONE_MORE_BYTE (c); | 2022 | ONE_MORE_BYTE (c); |
| 2053 | if (c < 0xA0) | 2023 | if (c < 0xA0) |
| @@ -2056,7 +2026,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src, | |||
| 2056 | } | 2026 | } |
| 2057 | else | 2027 | else |
| 2058 | { | 2028 | { |
| 2059 | if ((charset_id = emacs_mule_charset[c]) < 0) | 2029 | if ((charset_ID = emacs_mule_charset[c]) < 0) |
| 2060 | goto invalid_code; | 2030 | goto invalid_code; |
| 2061 | ONE_MORE_BYTE (c); | 2031 | ONE_MORE_BYTE (c); |
| 2062 | if (c < 0xA0) | 2032 | if (c < 0xA0) |
| @@ -2071,7 +2041,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src, | |||
| 2071 | 2041 | ||
| 2072 | case 4: | 2042 | case 4: |
| 2073 | ONE_MORE_BYTE (c); | 2043 | ONE_MORE_BYTE (c); |
| 2074 | if (c < 0 || (charset_id = emacs_mule_charset[c]) < 0) | 2044 | if (c < 0 || (charset_ID = emacs_mule_charset[c]) < 0) |
| 2075 | goto invalid_code; | 2045 | goto invalid_code; |
| 2076 | ONE_MORE_BYTE (c); | 2046 | ONE_MORE_BYTE (c); |
| 2077 | if (c < 0xA0) | 2047 | if (c < 0xA0) |
| @@ -2085,21 +2055,21 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src, | |||
| 2085 | 2055 | ||
| 2086 | case 1: | 2056 | case 1: |
| 2087 | code = c; | 2057 | code = c; |
| 2088 | charset_id = ASCII_BYTE_P (code) ? charset_ascii : charset_eight_bit; | 2058 | charset_ID = ASCII_BYTE_P (code) ? charset_ascii : charset_eight_bit; |
| 2089 | break; | 2059 | break; |
| 2090 | 2060 | ||
| 2091 | default: | 2061 | default: |
| 2092 | abort (); | 2062 | abort (); |
| 2093 | } | 2063 | } |
| 2094 | CODING_DECODE_CHAR (coding, src, src_base, src_end, | 2064 | CODING_DECODE_CHAR (coding, src, src_base, src_end, |
| 2095 | CHARSET_FROM_ID (charset_id), code, c); | 2065 | CHARSET_FROM_ID (charset_ID), code, c); |
| 2096 | if (c < 0) | 2066 | if (c < 0) |
| 2097 | goto invalid_code; | 2067 | goto invalid_code; |
| 2098 | } | 2068 | } |
| 2099 | *nbytes = src - src_base; | 2069 | *nbytes = src - src_base; |
| 2100 | *nchars = consumed_chars; | 2070 | *nchars = consumed_chars; |
| 2101 | if (id) | 2071 | if (id) |
| 2102 | *id = charset_id; | 2072 | *id = charset_ID; |
| 2103 | return (mseq_found ? -c : c); | 2073 | return (mseq_found ? -c : c); |
| 2104 | 2074 | ||
| 2105 | no_more_source: | 2075 | no_more_source: |
| @@ -2365,24 +2335,25 @@ decode_coding_emacs_mule (struct coding_system *coding) | |||
| 2365 | /* We may produce two annotations (charset and composition) in one | 2335 | /* We may produce two annotations (charset and composition) in one |
| 2366 | loop and one more charset annotation at the end. */ | 2336 | loop and one more charset annotation at the end. */ |
| 2367 | int *charbuf_end | 2337 | int *charbuf_end |
| 2368 | = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3); | 2338 | = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3) |
| 2369 | int consumed_chars = 0, consumed_chars_base; | 2339 | /* We can produce up to 2 characters in a loop. */ |
| 2340 | - 1; | ||
| 2341 | EMACS_INT consumed_chars = 0, consumed_chars_base; | ||
| 2370 | int multibytep = coding->src_multibyte; | 2342 | int multibytep = coding->src_multibyte; |
| 2371 | Lisp_Object attrs, charset_list; | 2343 | EMACS_INT char_offset = coding->produced_char; |
| 2372 | int char_offset = coding->produced_char; | 2344 | EMACS_INT last_offset = char_offset; |
| 2373 | int last_offset = char_offset; | ||
| 2374 | int last_id = charset_ascii; | 2345 | int last_id = charset_ascii; |
| 2375 | int eol_crlf = | 2346 | int eol_dos = |
| 2376 | !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | 2347 | !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); |
| 2377 | int byte_after_cr = -1; | 2348 | int byte_after_cr = -1; |
| 2378 | struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status; | 2349 | struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status; |
| 2379 | 2350 | ||
| 2380 | CODING_GET_INFO (coding, attrs, charset_list); | ||
| 2381 | |||
| 2382 | if (cmp_status->state != COMPOSING_NO) | 2351 | if (cmp_status->state != COMPOSING_NO) |
| 2383 | { | 2352 | { |
| 2384 | int i; | 2353 | int i; |
| 2385 | 2354 | ||
| 2355 | if (charbuf_end - charbuf < cmp_status->length) | ||
| 2356 | abort (); | ||
| 2386 | for (i = 0; i < cmp_status->length; i++) | 2357 | for (i = 0; i < cmp_status->length; i++) |
| 2387 | *charbuf++ = cmp_status->carryover[i]; | 2358 | *charbuf++ = cmp_status->carryover[i]; |
| 2388 | coding->annotated = 1; | 2359 | coding->annotated = 1; |
| @@ -2390,7 +2361,7 @@ decode_coding_emacs_mule (struct coding_system *coding) | |||
| 2390 | 2361 | ||
| 2391 | while (1) | 2362 | while (1) |
| 2392 | { | 2363 | { |
| 2393 | int c, id; | 2364 | int c, id IF_LINT (= 0); |
| 2394 | 2365 | ||
| 2395 | src_base = src; | 2366 | src_base = src; |
| 2396 | consumed_chars_base = consumed_chars; | 2367 | consumed_chars_base = consumed_chars; |
| @@ -2422,7 +2393,7 @@ decode_coding_emacs_mule (struct coding_system *coding) | |||
| 2422 | 2393 | ||
| 2423 | if (c < 0x80) | 2394 | if (c < 0x80) |
| 2424 | { | 2395 | { |
| 2425 | if (eol_crlf && c == '\r') | 2396 | if (eol_dos && c == '\r') |
| 2426 | ONE_MORE_BYTE (byte_after_cr); | 2397 | ONE_MORE_BYTE (byte_after_cr); |
| 2427 | id = charset_ascii; | 2398 | id = charset_ascii; |
| 2428 | if (cmp_status->state != COMPOSING_NO) | 2399 | if (cmp_status->state != COMPOSING_NO) |
| @@ -2435,7 +2406,7 @@ decode_coding_emacs_mule (struct coding_system *coding) | |||
| 2435 | } | 2406 | } |
| 2436 | else | 2407 | else |
| 2437 | { | 2408 | { |
| 2438 | int nchars, nbytes; | 2409 | int nchars IF_LINT (= 0), nbytes IF_LINT (= 0); |
| 2439 | /* emacs_mule_char can load a charset map from a file, which | 2410 | /* emacs_mule_char can load a charset map from a file, which |
| 2440 | allocates a large structure and might cause buffer text | 2411 | allocates a large structure and might cause buffer text |
| 2441 | to be relocated as result. Thus, we need to remember the | 2412 | to be relocated as result. Thus, we need to remember the |
| @@ -2623,7 +2594,7 @@ encode_coding_emacs_mule (struct coding_system *coding) | |||
| 2623 | unsigned char *dst = coding->destination + coding->produced; | 2594 | unsigned char *dst = coding->destination + coding->produced; |
| 2624 | unsigned char *dst_end = coding->destination + coding->dst_bytes; | 2595 | unsigned char *dst_end = coding->destination + coding->dst_bytes; |
| 2625 | int safe_room = 8; | 2596 | int safe_room = 8; |
| 2626 | int produced_chars = 0; | 2597 | EMACS_INT produced_chars = 0; |
| 2627 | Lisp_Object attrs, charset_list; | 2598 | Lisp_Object attrs, charset_list; |
| 2628 | int c; | 2599 | int c; |
| 2629 | int preferred_charset_id = -1; | 2600 | int preferred_charset_id = -1; |
| @@ -2897,16 +2868,12 @@ encode_coding_emacs_mule (struct coding_system *coding) | |||
| 2897 | COMPOSITION_WITH_RULE_ALTCHARS: | 2868 | COMPOSITION_WITH_RULE_ALTCHARS: |
| 2898 | ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */ | 2869 | ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */ |
| 2899 | 2870 | ||
| 2900 | enum iso_code_class_type iso_code_class[256]; | 2871 | static enum iso_code_class_type iso_code_class[256]; |
| 2901 | 2872 | ||
| 2902 | #define SAFE_CHARSET_P(coding, id) \ | 2873 | #define SAFE_CHARSET_P(coding, id) \ |
| 2903 | ((id) <= (coding)->max_charset_id \ | 2874 | ((id) <= (coding)->max_charset_id \ |
| 2904 | && (coding)->safe_charsets[id] != 255) | 2875 | && (coding)->safe_charsets[id] != 255) |
| 2905 | 2876 | ||
| 2906 | |||
| 2907 | #define SHIFT_OUT_OK(category) \ | ||
| 2908 | (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0) | ||
| 2909 | |||
| 2910 | static void | 2877 | static void |
| 2911 | setup_iso_safe_charsets (Lisp_Object attrs) | 2878 | setup_iso_safe_charsets (Lisp_Object attrs) |
| 2912 | { | 2879 | { |
| @@ -2985,7 +2952,7 @@ detect_coding_iso_2022 (struct coding_system *coding, | |||
| 2985 | int single_shifting = 0; | 2952 | int single_shifting = 0; |
| 2986 | int id; | 2953 | int id; |
| 2987 | int c, c1; | 2954 | int c, c1; |
| 2988 | int consumed_chars = 0; | 2955 | EMACS_INT consumed_chars = 0; |
| 2989 | int i; | 2956 | int i; |
| 2990 | int rejected = 0; | 2957 | int rejected = 0; |
| 2991 | int found = 0; | 2958 | int found = 0; |
| @@ -3023,40 +2990,11 @@ detect_coding_iso_2022 (struct coding_system *coding, | |||
| 3023 | break; | 2990 | break; |
| 3024 | single_shifting = 0; | 2991 | single_shifting = 0; |
| 3025 | ONE_MORE_BYTE (c); | 2992 | ONE_MORE_BYTE (c); |
| 3026 | if (c >= '(' && c <= '/') | 2993 | if (c == 'N' || c == 'O') |
| 3027 | { | ||
| 3028 | /* Designation sequence for a charset of dimension 1. */ | ||
| 3029 | ONE_MORE_BYTE (c1); | ||
| 3030 | if (c1 < ' ' || c1 >= 0x80 | ||
| 3031 | || (id = iso_charset_table[0][c >= ','][c1]) < 0) | ||
| 3032 | /* Invalid designation sequence. Just ignore. */ | ||
| 3033 | break; | ||
| 3034 | } | ||
| 3035 | else if (c == '$') | ||
| 3036 | { | ||
| 3037 | /* Designation sequence for a charset of dimension 2. */ | ||
| 3038 | ONE_MORE_BYTE (c); | ||
| 3039 | if (c >= '@' && c <= 'B') | ||
| 3040 | /* Designation for JISX0208.1978, GB2312, or JISX0208. */ | ||
| 3041 | id = iso_charset_table[1][0][c]; | ||
| 3042 | else if (c >= '(' && c <= '/') | ||
| 3043 | { | ||
| 3044 | ONE_MORE_BYTE (c1); | ||
| 3045 | if (c1 < ' ' || c1 >= 0x80 | ||
| 3046 | || (id = iso_charset_table[1][c >= ','][c1]) < 0) | ||
| 3047 | /* Invalid designation sequence. Just ignore. */ | ||
| 3048 | break; | ||
| 3049 | } | ||
| 3050 | else | ||
| 3051 | /* Invalid designation sequence. Just ignore it. */ | ||
| 3052 | break; | ||
| 3053 | } | ||
| 3054 | else if (c == 'N' || c == 'O') | ||
| 3055 | { | 2994 | { |
| 3056 | /* ESC <Fe> for SS2 or SS3. */ | 2995 | /* ESC <Fe> for SS2 or SS3. */ |
| 3057 | single_shifting = 1; | 2996 | single_shifting = 1; |
| 3058 | rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT; | 2997 | rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT; |
| 3059 | break; | ||
| 3060 | } | 2998 | } |
| 3061 | else if (c == '1') | 2999 | else if (c == '1') |
| 3062 | { | 3000 | { |
| @@ -3072,36 +3010,66 @@ detect_coding_iso_2022 (struct coding_system *coding, | |||
| 3072 | { | 3010 | { |
| 3073 | /* ESC <Fp> for start/end composition. */ | 3011 | /* ESC <Fp> for start/end composition. */ |
| 3074 | composition_count = 0; | 3012 | composition_count = 0; |
| 3075 | break; | ||
| 3076 | } | 3013 | } |
| 3077 | else | 3014 | else |
| 3078 | { | 3015 | { |
| 3079 | /* Invalid escape sequence. Just ignore it. */ | 3016 | if (c >= '(' && c <= '/') |
| 3080 | break; | 3017 | { |
| 3081 | } | 3018 | /* Designation sequence for a charset of dimension 1. */ |
| 3019 | ONE_MORE_BYTE (c1); | ||
| 3020 | if (c1 < ' ' || c1 >= 0x80 | ||
| 3021 | || (id = iso_charset_table[0][c >= ','][c1]) < 0) | ||
| 3022 | /* Invalid designation sequence. Just ignore. */ | ||
| 3023 | break; | ||
| 3024 | } | ||
| 3025 | else if (c == '$') | ||
| 3026 | { | ||
| 3027 | /* Designation sequence for a charset of dimension 2. */ | ||
| 3028 | ONE_MORE_BYTE (c); | ||
| 3029 | if (c >= '@' && c <= 'B') | ||
| 3030 | /* Designation for JISX0208.1978, GB2312, or JISX0208. */ | ||
| 3031 | id = iso_charset_table[1][0][c]; | ||
| 3032 | else if (c >= '(' && c <= '/') | ||
| 3033 | { | ||
| 3034 | ONE_MORE_BYTE (c1); | ||
| 3035 | if (c1 < ' ' || c1 >= 0x80 | ||
| 3036 | || (id = iso_charset_table[1][c >= ','][c1]) < 0) | ||
| 3037 | /* Invalid designation sequence. Just ignore. */ | ||
| 3038 | break; | ||
| 3039 | } | ||
| 3040 | else | ||
| 3041 | /* Invalid designation sequence. Just ignore it. */ | ||
| 3042 | break; | ||
| 3043 | } | ||
| 3044 | else | ||
| 3045 | { | ||
| 3046 | /* Invalid escape sequence. Just ignore it. */ | ||
| 3047 | break; | ||
| 3048 | } | ||
| 3082 | 3049 | ||
| 3083 | /* We found a valid designation sequence for CHARSET. */ | 3050 | /* We found a valid designation sequence for CHARSET. */ |
| 3084 | rejected |= CATEGORY_MASK_ISO_8BIT; | 3051 | rejected |= CATEGORY_MASK_ISO_8BIT; |
| 3085 | if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7], | 3052 | if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7], |
| 3086 | id)) | 3053 | id)) |
| 3087 | found |= CATEGORY_MASK_ISO_7; | 3054 | found |= CATEGORY_MASK_ISO_7; |
| 3088 | else | 3055 | else |
| 3089 | rejected |= CATEGORY_MASK_ISO_7; | 3056 | rejected |= CATEGORY_MASK_ISO_7; |
| 3090 | if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight], | 3057 | if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight], |
| 3091 | id)) | 3058 | id)) |
| 3092 | found |= CATEGORY_MASK_ISO_7_TIGHT; | 3059 | found |= CATEGORY_MASK_ISO_7_TIGHT; |
| 3093 | else | 3060 | else |
| 3094 | rejected |= CATEGORY_MASK_ISO_7_TIGHT; | 3061 | rejected |= CATEGORY_MASK_ISO_7_TIGHT; |
| 3095 | if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else], | 3062 | if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else], |
| 3096 | id)) | 3063 | id)) |
| 3097 | found |= CATEGORY_MASK_ISO_7_ELSE; | 3064 | found |= CATEGORY_MASK_ISO_7_ELSE; |
| 3098 | else | 3065 | else |
| 3099 | rejected |= CATEGORY_MASK_ISO_7_ELSE; | 3066 | rejected |= CATEGORY_MASK_ISO_7_ELSE; |
| 3100 | if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else], | 3067 | if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else], |
| 3101 | id)) | 3068 | id)) |
| 3102 | found |= CATEGORY_MASK_ISO_8_ELSE; | 3069 | found |= CATEGORY_MASK_ISO_8_ELSE; |
| 3103 | else | 3070 | else |
| 3104 | rejected |= CATEGORY_MASK_ISO_8_ELSE; | 3071 | rejected |= CATEGORY_MASK_ISO_8_ELSE; |
| 3072 | } | ||
| 3105 | break; | 3073 | break; |
| 3106 | 3074 | ||
| 3107 | case ISO_CODE_SO: | 3075 | case ISO_CODE_SO: |
| @@ -3129,13 +3097,32 @@ detect_coding_iso_2022 (struct coding_system *coding, | |||
| 3129 | rejected |= CATEGORY_MASK_ISO_7BIT; | 3097 | rejected |= CATEGORY_MASK_ISO_7BIT; |
| 3130 | if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1]) | 3098 | if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1]) |
| 3131 | & CODING_ISO_FLAG_SINGLE_SHIFT) | 3099 | & CODING_ISO_FLAG_SINGLE_SHIFT) |
| 3132 | found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1; | 3100 | { |
| 3101 | found |= CATEGORY_MASK_ISO_8_1; | ||
| 3102 | single_shifting = 1; | ||
| 3103 | } | ||
| 3133 | if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2]) | 3104 | if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2]) |
| 3134 | & CODING_ISO_FLAG_SINGLE_SHIFT) | 3105 | & CODING_ISO_FLAG_SINGLE_SHIFT) |
| 3135 | found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1; | 3106 | { |
| 3107 | found |= CATEGORY_MASK_ISO_8_2; | ||
| 3108 | single_shifting = 1; | ||
| 3109 | } | ||
| 3136 | if (single_shifting) | 3110 | if (single_shifting) |
| 3137 | break; | 3111 | break; |
| 3138 | goto check_extra_latin; | 3112 | check_extra_latin: |
| 3113 | if (! VECTORP (Vlatin_extra_code_table) | ||
| 3114 | || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) | ||
| 3115 | { | ||
| 3116 | rejected = CATEGORY_MASK_ISO; | ||
| 3117 | break; | ||
| 3118 | } | ||
| 3119 | if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1]) | ||
| 3120 | & CODING_ISO_FLAG_LATIN_EXTRA) | ||
| 3121 | found |= CATEGORY_MASK_ISO_8_1; | ||
| 3122 | else | ||
| 3123 | rejected |= CATEGORY_MASK_ISO_8_1; | ||
| 3124 | rejected |= CATEGORY_MASK_ISO_8_2; | ||
| 3125 | break; | ||
| 3139 | 3126 | ||
| 3140 | default: | 3127 | default: |
| 3141 | if (c < 0) | 3128 | if (c < 0) |
| @@ -3158,7 +3145,7 @@ detect_coding_iso_2022 (struct coding_system *coding, | |||
| 3158 | if (! single_shifting | 3145 | if (! single_shifting |
| 3159 | && ! (rejected & CATEGORY_MASK_ISO_8_2)) | 3146 | && ! (rejected & CATEGORY_MASK_ISO_8_2)) |
| 3160 | { | 3147 | { |
| 3161 | int i = 1; | 3148 | int len = 1; |
| 3162 | while (src < src_end) | 3149 | while (src < src_end) |
| 3163 | { | 3150 | { |
| 3164 | src_base = src; | 3151 | src_base = src; |
| @@ -3168,38 +3155,24 @@ detect_coding_iso_2022 (struct coding_system *coding, | |||
| 3168 | src = src_base; | 3155 | src = src_base; |
| 3169 | break; | 3156 | break; |
| 3170 | } | 3157 | } |
| 3171 | i++; | 3158 | len++; |
| 3172 | } | 3159 | } |
| 3173 | 3160 | ||
| 3174 | if (i & 1 && src < src_end) | 3161 | if (len & 1 && src < src_end) |
| 3175 | { | 3162 | { |
| 3176 | rejected |= CATEGORY_MASK_ISO_8_2; | 3163 | rejected |= CATEGORY_MASK_ISO_8_2; |
| 3177 | if (composition_count >= 0) | 3164 | if (composition_count >= 0) |
| 3178 | composition_count += i; | 3165 | composition_count += len; |
| 3179 | } | 3166 | } |
| 3180 | else | 3167 | else |
| 3181 | { | 3168 | { |
| 3182 | found |= CATEGORY_MASK_ISO_8_2; | 3169 | found |= CATEGORY_MASK_ISO_8_2; |
| 3183 | if (composition_count >= 0) | 3170 | if (composition_count >= 0) |
| 3184 | composition_count += i / 2; | 3171 | composition_count += len / 2; |
| 3185 | } | 3172 | } |
| 3186 | } | 3173 | } |
| 3187 | break; | 3174 | break; |
| 3188 | } | 3175 | } |
| 3189 | check_extra_latin: | ||
| 3190 | single_shifting = 0; | ||
| 3191 | if (! VECTORP (Vlatin_extra_code_table) | ||
| 3192 | || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) | ||
| 3193 | { | ||
| 3194 | rejected = CATEGORY_MASK_ISO; | ||
| 3195 | break; | ||
| 3196 | } | ||
| 3197 | if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1]) | ||
| 3198 | & CODING_ISO_FLAG_LATIN_EXTRA) | ||
| 3199 | found |= CATEGORY_MASK_ISO_8_1; | ||
| 3200 | else | ||
| 3201 | rejected |= CATEGORY_MASK_ISO_8_1; | ||
| 3202 | rejected |= CATEGORY_MASK_ISO_8_2; | ||
| 3203 | } | 3176 | } |
| 3204 | } | 3177 | } |
| 3205 | detect_info->rejected |= CATEGORY_MASK_ISO; | 3178 | detect_info->rejected |= CATEGORY_MASK_ISO; |
| @@ -3289,15 +3262,14 @@ detect_coding_iso_2022 (struct coding_system *coding, | |||
| 3289 | */ | 3262 | */ |
| 3290 | 3263 | ||
| 3291 | /* Decode a composition rule C1 and maybe one more byte from the | 3264 | /* Decode a composition rule C1 and maybe one more byte from the |
| 3292 | source, and set RULE to the encoded composition rule, NBYTES to the | 3265 | source, and set RULE to the encoded composition rule. If the rule |
| 3293 | length of the composition rule. If the rule is invalid, set RULE | 3266 | is invalid, goto invalid_code. */ |
| 3294 | to some negative value. */ | ||
| 3295 | 3267 | ||
| 3296 | #define DECODE_COMPOSITION_RULE(rule, nbytes) \ | 3268 | #define DECODE_COMPOSITION_RULE(rule) \ |
| 3297 | do { \ | 3269 | do { \ |
| 3298 | rule = c1 - 32; \ | 3270 | rule = c1 - 32; \ |
| 3299 | if (rule < 0) \ | 3271 | if (rule < 0) \ |
| 3300 | break; \ | 3272 | goto invalid_code; \ |
| 3301 | if (rule < 81) /* old format (before ver.21) */ \ | 3273 | if (rule < 81) /* old format (before ver.21) */ \ |
| 3302 | { \ | 3274 | { \ |
| 3303 | int gref = (rule) / 9; \ | 3275 | int gref = (rule) / 9; \ |
| @@ -3305,17 +3277,16 @@ detect_coding_iso_2022 (struct coding_system *coding, | |||
| 3305 | if (gref == 4) gref = 10; \ | 3277 | if (gref == 4) gref = 10; \ |
| 3306 | if (nref == 4) nref = 10; \ | 3278 | if (nref == 4) nref = 10; \ |
| 3307 | rule = COMPOSITION_ENCODE_RULE (gref, nref); \ | 3279 | rule = COMPOSITION_ENCODE_RULE (gref, nref); \ |
| 3308 | nbytes = 1; \ | ||
| 3309 | } \ | 3280 | } \ |
| 3310 | else /* new format (after ver.21) */ \ | 3281 | else /* new format (after ver.21) */ \ |
| 3311 | { \ | 3282 | { \ |
| 3312 | int c; \ | 3283 | int b; \ |
| 3313 | \ | 3284 | \ |
| 3314 | ONE_MORE_BYTE (c); \ | 3285 | ONE_MORE_BYTE (b); \ |
| 3315 | rule = COMPOSITION_ENCODE_RULE (rule - 81, c - 32); \ | 3286 | if (! COMPOSITION_ENCODE_RULE_VALID (rule - 81, b - 32)) \ |
| 3316 | if (rule >= 0) \ | 3287 | goto invalid_code; \ |
| 3317 | rule += 0x100; /* to destinguish it from the old format */ \ | 3288 | rule = COMPOSITION_ENCODE_RULE (rule - 81, b - 32); \ |
| 3318 | nbytes = 2; \ | 3289 | rule += 0x100; /* Distinguish it from the old format. */ \ |
| 3319 | } \ | 3290 | } \ |
| 3320 | } while (0) | 3291 | } while (0) |
| 3321 | 3292 | ||
| @@ -3491,7 +3462,7 @@ decode_coding_iso_2022 (struct coding_system *coding) | |||
| 3491 | loop and one more charset annotation at the end. */ | 3462 | loop and one more charset annotation at the end. */ |
| 3492 | int *charbuf_end | 3463 | int *charbuf_end |
| 3493 | = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3); | 3464 | = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3); |
| 3494 | int consumed_chars = 0, consumed_chars_base; | 3465 | EMACS_INT consumed_chars = 0, consumed_chars_base; |
| 3495 | int multibytep = coding->src_multibyte; | 3466 | int multibytep = coding->src_multibyte; |
| 3496 | /* Charsets invoked to graphic plane 0 and 1 respectively. */ | 3467 | /* Charsets invoked to graphic plane 0 and 1 respectively. */ |
| 3497 | int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0); | 3468 | int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0); |
| @@ -3500,23 +3471,22 @@ decode_coding_iso_2022 (struct coding_system *coding) | |||
| 3500 | struct charset *charset; | 3471 | struct charset *charset; |
| 3501 | int c; | 3472 | int c; |
| 3502 | struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding); | 3473 | struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding); |
| 3503 | Lisp_Object attrs, charset_list; | 3474 | Lisp_Object attrs = CODING_ID_ATTRS (coding->id); |
| 3504 | int char_offset = coding->produced_char; | 3475 | EMACS_INT char_offset = coding->produced_char; |
| 3505 | int last_offset = char_offset; | 3476 | EMACS_INT last_offset = char_offset; |
| 3506 | int last_id = charset_ascii; | 3477 | int last_id = charset_ascii; |
| 3507 | int eol_crlf = | 3478 | int eol_dos = |
| 3508 | !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | 3479 | !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); |
| 3509 | int byte_after_cr = -1; | 3480 | int byte_after_cr = -1; |
| 3510 | int i; | 3481 | int i; |
| 3511 | 3482 | ||
| 3512 | CODING_GET_INFO (coding, attrs, charset_list); | ||
| 3513 | setup_iso_safe_charsets (attrs); | 3483 | setup_iso_safe_charsets (attrs); |
| 3514 | /* Charset list may have been changed. */ | ||
| 3515 | charset_list = CODING_ATTR_CHARSET_LIST (attrs); | ||
| 3516 | coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs)); | 3484 | coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs)); |
| 3517 | 3485 | ||
| 3518 | if (cmp_status->state != COMPOSING_NO) | 3486 | if (cmp_status->state != COMPOSING_NO) |
| 3519 | { | 3487 | { |
| 3488 | if (charbuf_end - charbuf < cmp_status->length) | ||
| 3489 | abort (); | ||
| 3520 | for (i = 0; i < cmp_status->length; i++) | 3490 | for (i = 0; i < cmp_status->length; i++) |
| 3521 | *charbuf++ = cmp_status->carryover[i]; | 3491 | *charbuf++ = cmp_status->carryover[i]; |
| 3522 | coding->annotated = 1; | 3492 | coding->annotated = 1; |
| @@ -3582,11 +3552,9 @@ decode_coding_iso_2022 (struct coding_system *coding) | |||
| 3582 | || cmp_status->state == COMPOSING_COMPONENT_RULE) | 3552 | || cmp_status->state == COMPOSING_COMPONENT_RULE) |
| 3583 | && c1 != ISO_CODE_ESC) | 3553 | && c1 != ISO_CODE_ESC) |
| 3584 | { | 3554 | { |
| 3585 | int rule, nbytes; | 3555 | int rule; |
| 3586 | 3556 | ||
| 3587 | DECODE_COMPOSITION_RULE (rule, nbytes); | 3557 | DECODE_COMPOSITION_RULE (rule); |
| 3588 | if (rule < 0) | ||
| 3589 | goto invalid_code; | ||
| 3590 | STORE_COMPOSITION_RULE (rule); | 3558 | STORE_COMPOSITION_RULE (rule); |
| 3591 | continue; | 3559 | continue; |
| 3592 | } | 3560 | } |
| @@ -3624,7 +3592,7 @@ decode_coding_iso_2022 (struct coding_system *coding) | |||
| 3624 | break; | 3592 | break; |
| 3625 | 3593 | ||
| 3626 | case ISO_control_0: | 3594 | case ISO_control_0: |
| 3627 | if (eol_crlf && c1 == '\r') | 3595 | if (eol_dos && c1 == '\r') |
| 3628 | ONE_MORE_BYTE (byte_after_cr); | 3596 | ONE_MORE_BYTE (byte_after_cr); |
| 3629 | MAYBE_FINISH_COMPOSITION (); | 3597 | MAYBE_FINISH_COMPOSITION (); |
| 3630 | charset = CHARSET_FROM_ID (charset_ascii); | 3598 | charset = CHARSET_FROM_ID (charset_ascii); |
| @@ -3897,6 +3865,10 @@ decode_coding_iso_2022 (struct coding_system *coding) | |||
| 3897 | } | 3865 | } |
| 3898 | continue; | 3866 | continue; |
| 3899 | } | 3867 | } |
| 3868 | break; | ||
| 3869 | |||
| 3870 | default: | ||
| 3871 | abort (); | ||
| 3900 | } | 3872 | } |
| 3901 | 3873 | ||
| 3902 | if (cmp_status->state == COMPOSING_NO | 3874 | if (cmp_status->state == COMPOSING_NO |
| @@ -4029,7 +4001,6 @@ decode_coding_iso_2022 (struct coding_system *coding) | |||
| 4029 | const char *intermediate_char_94 = "()*+"; \ | 4001 | const char *intermediate_char_94 = "()*+"; \ |
| 4030 | const char *intermediate_char_96 = ",-./"; \ | 4002 | const char *intermediate_char_96 = ",-./"; \ |
| 4031 | int revision = -1; \ | 4003 | int revision = -1; \ |
| 4032 | int c; \ | ||
| 4033 | \ | 4004 | \ |
| 4034 | if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \ | 4005 | if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \ |
| 4035 | revision = CHARSET_ISO_REVISION (charset); \ | 4006 | revision = CHARSET_ISO_REVISION (charset); \ |
| @@ -4042,11 +4013,12 @@ decode_coding_iso_2022 (struct coding_system *coding) | |||
| 4042 | EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \ | 4013 | EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \ |
| 4043 | if (CHARSET_DIMENSION (charset) == 1) \ | 4014 | if (CHARSET_DIMENSION (charset) == 1) \ |
| 4044 | { \ | 4015 | { \ |
| 4016 | int b; \ | ||
| 4045 | if (! CHARSET_ISO_CHARS_96 (charset)) \ | 4017 | if (! CHARSET_ISO_CHARS_96 (charset)) \ |
| 4046 | c = intermediate_char_94[reg]; \ | 4018 | b = intermediate_char_94[reg]; \ |
| 4047 | else \ | 4019 | else \ |
| 4048 | c = intermediate_char_96[reg]; \ | 4020 | b = intermediate_char_96[reg]; \ |
| 4049 | EMIT_ONE_ASCII_BYTE (c); \ | 4021 | EMIT_ONE_ASCII_BYTE (b); \ |
| 4050 | } \ | 4022 | } \ |
| 4051 | else \ | 4023 | else \ |
| 4052 | { \ | 4024 | { \ |
| @@ -4226,13 +4198,13 @@ decode_coding_iso_2022 (struct coding_system *coding) | |||
| 4226 | to use CHARSET. The element `spec.iso_2022' of *CODING is updated. | 4198 | to use CHARSET. The element `spec.iso_2022' of *CODING is updated. |
| 4227 | Return new DST. */ | 4199 | Return new DST. */ |
| 4228 | 4200 | ||
| 4229 | unsigned char * | 4201 | static unsigned char * |
| 4230 | encode_invocation_designation (struct charset *charset, | 4202 | encode_invocation_designation (struct charset *charset, |
| 4231 | struct coding_system *coding, | 4203 | struct coding_system *coding, |
| 4232 | unsigned char *dst, int *p_nchars) | 4204 | unsigned char *dst, EMACS_INT *p_nchars) |
| 4233 | { | 4205 | { |
| 4234 | int multibytep = coding->dst_multibyte; | 4206 | int multibytep = coding->dst_multibyte; |
| 4235 | int produced_chars = *p_nchars; | 4207 | EMACS_INT produced_chars = *p_nchars; |
| 4236 | int reg; /* graphic register number */ | 4208 | int reg; /* graphic register number */ |
| 4237 | int id = CHARSET_ID (charset); | 4209 | int id = CHARSET_ID (charset); |
| 4238 | 4210 | ||
| @@ -4289,30 +4261,6 @@ encode_invocation_designation (struct charset *charset, | |||
| 4289 | return dst; | 4261 | return dst; |
| 4290 | } | 4262 | } |
| 4291 | 4263 | ||
| 4292 | /* The following three macros produce codes for indicating direction | ||
| 4293 | of text. */ | ||
| 4294 | #define ENCODE_CONTROL_SEQUENCE_INTRODUCER \ | ||
| 4295 | do { \ | ||
| 4296 | if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \ | ||
| 4297 | EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \ | ||
| 4298 | else \ | ||
| 4299 | EMIT_ONE_BYTE (ISO_CODE_CSI); \ | ||
| 4300 | } while (0) | ||
| 4301 | |||
| 4302 | |||
| 4303 | #define ENCODE_DIRECTION_R2L() \ | ||
| 4304 | do { \ | ||
| 4305 | ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \ | ||
| 4306 | EMIT_TWO_ASCII_BYTES ('2', ']'); \ | ||
| 4307 | } while (0) | ||
| 4308 | |||
| 4309 | |||
| 4310 | #define ENCODE_DIRECTION_L2R() \ | ||
| 4311 | do { \ | ||
| 4312 | ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \ | ||
| 4313 | EMIT_TWO_ASCII_BYTES ('0', ']'); \ | ||
| 4314 | } while (0) | ||
| 4315 | |||
| 4316 | 4264 | ||
| 4317 | /* Produce codes for designation and invocation to reset the graphic | 4265 | /* Produce codes for designation and invocation to reset the graphic |
| 4318 | planes and registers to initial state. */ | 4266 | planes and registers to initial state. */ |
| @@ -4342,13 +4290,13 @@ encode_invocation_designation (struct charset *charset, | |||
| 4342 | 4290 | ||
| 4343 | static unsigned char * | 4291 | static unsigned char * |
| 4344 | encode_designation_at_bol (struct coding_system *coding, int *charbuf, | 4292 | encode_designation_at_bol (struct coding_system *coding, int *charbuf, |
| 4345 | int *charbuf_end, unsigned char *dst) | 4293 | unsigned char *dst) |
| 4346 | { | 4294 | { |
| 4347 | struct charset *charset; | 4295 | struct charset *charset; |
| 4348 | /* Table of charsets to be designated to each graphic register. */ | 4296 | /* Table of charsets to be designated to each graphic register. */ |
| 4349 | int r[4]; | 4297 | int r[4]; |
| 4350 | int c, found = 0, reg; | 4298 | int c, found = 0, reg; |
| 4351 | int produced_chars = 0; | 4299 | EMACS_INT produced_chars = 0; |
| 4352 | int multibytep = coding->dst_multibyte; | 4300 | int multibytep = coding->dst_multibyte; |
| 4353 | Lisp_Object attrs; | 4301 | Lisp_Object attrs; |
| 4354 | Lisp_Object charset_list; | 4302 | Lisp_Object charset_list; |
| @@ -4403,7 +4351,7 @@ encode_coding_iso_2022 (struct coding_system *coding) | |||
| 4403 | int bol_designation | 4351 | int bol_designation |
| 4404 | = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL | 4352 | = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL |
| 4405 | && CODING_ISO_BOL (coding)); | 4353 | && CODING_ISO_BOL (coding)); |
| 4406 | int produced_chars = 0; | 4354 | EMACS_INT produced_chars = 0; |
| 4407 | Lisp_Object attrs, eol_type, charset_list; | 4355 | Lisp_Object attrs, eol_type, charset_list; |
| 4408 | int ascii_compatible; | 4356 | int ascii_compatible; |
| 4409 | int c; | 4357 | int c; |
| @@ -4433,7 +4381,7 @@ encode_coding_iso_2022 (struct coding_system *coding) | |||
| 4433 | unsigned char *dst_prev = dst; | 4381 | unsigned char *dst_prev = dst; |
| 4434 | 4382 | ||
| 4435 | /* We have to produce designation sequences if any now. */ | 4383 | /* We have to produce designation sequences if any now. */ |
| 4436 | dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst); | 4384 | dst = encode_designation_at_bol (coding, charbuf, dst); |
| 4437 | bol_designation = 0; | 4385 | bol_designation = 0; |
| 4438 | /* We are sure that designation sequences are all ASCII bytes. */ | 4386 | /* We are sure that designation sequences are all ASCII bytes. */ |
| 4439 | produced_chars += dst - dst_prev; | 4387 | produced_chars += dst - dst_prev; |
| @@ -4591,7 +4539,7 @@ detect_coding_sjis (struct coding_system *coding, | |||
| 4591 | const unsigned char *src = coding->source, *src_base; | 4539 | const unsigned char *src = coding->source, *src_base; |
| 4592 | const unsigned char *src_end = coding->source + coding->src_bytes; | 4540 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 4593 | int multibytep = coding->src_multibyte; | 4541 | int multibytep = coding->src_multibyte; |
| 4594 | int consumed_chars = 0; | 4542 | EMACS_INT consumed_chars = 0; |
| 4595 | int found = 0; | 4543 | int found = 0; |
| 4596 | int c; | 4544 | int c; |
| 4597 | Lisp_Object attrs, charset_list; | 4545 | Lisp_Object attrs, charset_list; |
| @@ -4648,7 +4596,7 @@ detect_coding_big5 (struct coding_system *coding, | |||
| 4648 | const unsigned char *src = coding->source, *src_base; | 4596 | const unsigned char *src = coding->source, *src_base; |
| 4649 | const unsigned char *src_end = coding->source + coding->src_bytes; | 4597 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 4650 | int multibytep = coding->src_multibyte; | 4598 | int multibytep = coding->src_multibyte; |
| 4651 | int consumed_chars = 0; | 4599 | EMACS_INT consumed_chars = 0; |
| 4652 | int found = 0; | 4600 | int found = 0; |
| 4653 | int c; | 4601 | int c; |
| 4654 | 4602 | ||
| @@ -4699,15 +4647,15 @@ decode_coding_sjis (struct coding_system *coding) | |||
| 4699 | the end. */ | 4647 | the end. */ |
| 4700 | int *charbuf_end | 4648 | int *charbuf_end |
| 4701 | = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); | 4649 | = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); |
| 4702 | int consumed_chars = 0, consumed_chars_base; | 4650 | EMACS_INT consumed_chars = 0, consumed_chars_base; |
| 4703 | int multibytep = coding->src_multibyte; | 4651 | int multibytep = coding->src_multibyte; |
| 4704 | struct charset *charset_roman, *charset_kanji, *charset_kana; | 4652 | struct charset *charset_roman, *charset_kanji, *charset_kana; |
| 4705 | struct charset *charset_kanji2; | 4653 | struct charset *charset_kanji2; |
| 4706 | Lisp_Object attrs, charset_list, val; | 4654 | Lisp_Object attrs, charset_list, val; |
| 4707 | int char_offset = coding->produced_char; | 4655 | EMACS_INT char_offset = coding->produced_char; |
| 4708 | int last_offset = char_offset; | 4656 | EMACS_INT last_offset = char_offset; |
| 4709 | int last_id = charset_ascii; | 4657 | int last_id = charset_ascii; |
| 4710 | int eol_crlf = | 4658 | int eol_dos = |
| 4711 | !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | 4659 | !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); |
| 4712 | int byte_after_cr = -1; | 4660 | int byte_after_cr = -1; |
| 4713 | 4661 | ||
| @@ -4742,7 +4690,7 @@ decode_coding_sjis (struct coding_system *coding) | |||
| 4742 | goto invalid_code; | 4690 | goto invalid_code; |
| 4743 | if (c < 0x80) | 4691 | if (c < 0x80) |
| 4744 | { | 4692 | { |
| 4745 | if (eol_crlf && c == '\r') | 4693 | if (eol_dos && c == '\r') |
| 4746 | ONE_MORE_BYTE (byte_after_cr); | 4694 | ONE_MORE_BYTE (byte_after_cr); |
| 4747 | charset = charset_roman; | 4695 | charset = charset_roman; |
| 4748 | } | 4696 | } |
| @@ -4817,14 +4765,14 @@ decode_coding_big5 (struct coding_system *coding) | |||
| 4817 | the end. */ | 4765 | the end. */ |
| 4818 | int *charbuf_end | 4766 | int *charbuf_end |
| 4819 | = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); | 4767 | = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); |
| 4820 | int consumed_chars = 0, consumed_chars_base; | 4768 | EMACS_INT consumed_chars = 0, consumed_chars_base; |
| 4821 | int multibytep = coding->src_multibyte; | 4769 | int multibytep = coding->src_multibyte; |
| 4822 | struct charset *charset_roman, *charset_big5; | 4770 | struct charset *charset_roman, *charset_big5; |
| 4823 | Lisp_Object attrs, charset_list, val; | 4771 | Lisp_Object attrs, charset_list, val; |
| 4824 | int char_offset = coding->produced_char; | 4772 | EMACS_INT char_offset = coding->produced_char; |
| 4825 | int last_offset = char_offset; | 4773 | EMACS_INT last_offset = char_offset; |
| 4826 | int last_id = charset_ascii; | 4774 | int last_id = charset_ascii; |
| 4827 | int eol_crlf = | 4775 | int eol_dos = |
| 4828 | !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | 4776 | !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); |
| 4829 | int byte_after_cr = -1; | 4777 | int byte_after_cr = -1; |
| 4830 | 4778 | ||
| @@ -4857,7 +4805,7 @@ decode_coding_big5 (struct coding_system *coding) | |||
| 4857 | goto invalid_code; | 4805 | goto invalid_code; |
| 4858 | if (c < 0x80) | 4806 | if (c < 0x80) |
| 4859 | { | 4807 | { |
| 4860 | if (eol_crlf && c == '\r') | 4808 | if (eol_dos && c == '\r') |
| 4861 | ONE_MORE_BYTE (byte_after_cr); | 4809 | ONE_MORE_BYTE (byte_after_cr); |
| 4862 | charset = charset_roman; | 4810 | charset = charset_roman; |
| 4863 | } | 4811 | } |
| @@ -4919,16 +4867,15 @@ encode_coding_sjis (struct coding_system *coding) | |||
| 4919 | unsigned char *dst = coding->destination + coding->produced; | 4867 | unsigned char *dst = coding->destination + coding->produced; |
| 4920 | unsigned char *dst_end = coding->destination + coding->dst_bytes; | 4868 | unsigned char *dst_end = coding->destination + coding->dst_bytes; |
| 4921 | int safe_room = 4; | 4869 | int safe_room = 4; |
| 4922 | int produced_chars = 0; | 4870 | EMACS_INT produced_chars = 0; |
| 4923 | Lisp_Object attrs, charset_list, val; | 4871 | Lisp_Object attrs, charset_list, val; |
| 4924 | int ascii_compatible; | 4872 | int ascii_compatible; |
| 4925 | struct charset *charset_roman, *charset_kanji, *charset_kana; | 4873 | struct charset *charset_kanji, *charset_kana; |
| 4926 | struct charset *charset_kanji2; | 4874 | struct charset *charset_kanji2; |
| 4927 | int c; | 4875 | int c; |
| 4928 | 4876 | ||
| 4929 | CODING_GET_INFO (coding, attrs, charset_list); | 4877 | CODING_GET_INFO (coding, attrs, charset_list); |
| 4930 | val = charset_list; | 4878 | val = XCDR (charset_list); |
| 4931 | charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); | ||
| 4932 | charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); | 4879 | charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); |
| 4933 | charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); | 4880 | charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); |
| 4934 | charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val))); | 4881 | charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val))); |
| @@ -5011,15 +4958,14 @@ encode_coding_big5 (struct coding_system *coding) | |||
| 5011 | unsigned char *dst = coding->destination + coding->produced; | 4958 | unsigned char *dst = coding->destination + coding->produced; |
| 5012 | unsigned char *dst_end = coding->destination + coding->dst_bytes; | 4959 | unsigned char *dst_end = coding->destination + coding->dst_bytes; |
| 5013 | int safe_room = 4; | 4960 | int safe_room = 4; |
| 5014 | int produced_chars = 0; | 4961 | EMACS_INT produced_chars = 0; |
| 5015 | Lisp_Object attrs, charset_list, val; | 4962 | Lisp_Object attrs, charset_list, val; |
| 5016 | int ascii_compatible; | 4963 | int ascii_compatible; |
| 5017 | struct charset *charset_roman, *charset_big5; | 4964 | struct charset *charset_big5; |
| 5018 | int c; | 4965 | int c; |
| 5019 | 4966 | ||
| 5020 | CODING_GET_INFO (coding, attrs, charset_list); | 4967 | CODING_GET_INFO (coding, attrs, charset_list); |
| 5021 | val = charset_list; | 4968 | val = XCDR (charset_list); |
| 5022 | charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); | ||
| 5023 | charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val))); | 4969 | charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val))); |
| 5024 | ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)); | 4970 | ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)); |
| 5025 | 4971 | ||
| @@ -5087,10 +5033,10 @@ detect_coding_ccl (struct coding_system *coding, | |||
| 5087 | const unsigned char *src = coding->source, *src_base; | 5033 | const unsigned char *src = coding->source, *src_base; |
| 5088 | const unsigned char *src_end = coding->source + coding->src_bytes; | 5034 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 5089 | int multibytep = coding->src_multibyte; | 5035 | int multibytep = coding->src_multibyte; |
| 5090 | int consumed_chars = 0; | 5036 | EMACS_INT consumed_chars = 0; |
| 5091 | int found = 0; | 5037 | int found = 0; |
| 5092 | unsigned char *valids; | 5038 | unsigned char *valids; |
| 5093 | int head_ascii = coding->head_ascii; | 5039 | EMACS_INT head_ascii = coding->head_ascii; |
| 5094 | Lisp_Object attrs; | 5040 | Lisp_Object attrs; |
| 5095 | 5041 | ||
| 5096 | detect_info->checked |= CATEGORY_MASK_CCL; | 5042 | detect_info->checked |= CATEGORY_MASK_CCL; |
| @@ -5127,7 +5073,7 @@ decode_coding_ccl (struct coding_system *coding) | |||
| 5127 | const unsigned char *src_end = coding->source + coding->src_bytes; | 5073 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 5128 | int *charbuf = coding->charbuf + coding->charbuf_used; | 5074 | int *charbuf = coding->charbuf + coding->charbuf_used; |
| 5129 | int *charbuf_end = coding->charbuf + coding->charbuf_size; | 5075 | int *charbuf_end = coding->charbuf + coding->charbuf_size; |
| 5130 | int consumed_chars = 0; | 5076 | EMACS_INT consumed_chars = 0; |
| 5131 | int multibytep = coding->src_multibyte; | 5077 | int multibytep = coding->src_multibyte; |
| 5132 | struct ccl_program *ccl = &coding->spec.ccl->ccl; | 5078 | struct ccl_program *ccl = &coding->spec.ccl->ccl; |
| 5133 | int source_charbuf[1024]; | 5079 | int source_charbuf[1024]; |
| @@ -5199,7 +5145,8 @@ encode_coding_ccl (struct coding_system *coding) | |||
| 5199 | unsigned char *dst = coding->destination + coding->produced; | 5145 | unsigned char *dst = coding->destination + coding->produced; |
| 5200 | unsigned char *dst_end = coding->destination + coding->dst_bytes; | 5146 | unsigned char *dst_end = coding->destination + coding->dst_bytes; |
| 5201 | int destination_charbuf[1024]; | 5147 | int destination_charbuf[1024]; |
| 5202 | int i, produced_chars = 0; | 5148 | EMACS_INT produced_chars = 0; |
| 5149 | int i; | ||
| 5203 | Lisp_Object attrs, charset_list; | 5150 | Lisp_Object attrs, charset_list; |
| 5204 | 5151 | ||
| 5205 | CODING_GET_INFO (coding, attrs, charset_list); | 5152 | CODING_GET_INFO (coding, attrs, charset_list); |
| @@ -5261,13 +5208,13 @@ encode_coding_ccl (struct coding_system *coding) | |||
| 5261 | static void | 5208 | static void |
| 5262 | decode_coding_raw_text (struct coding_system *coding) | 5209 | decode_coding_raw_text (struct coding_system *coding) |
| 5263 | { | 5210 | { |
| 5264 | int eol_crlf = | 5211 | int eol_dos = |
| 5265 | !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | 5212 | !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); |
| 5266 | 5213 | ||
| 5267 | coding->chars_at_source = 1; | 5214 | coding->chars_at_source = 1; |
| 5268 | coding->consumed_char = coding->src_chars; | 5215 | coding->consumed_char = coding->src_chars; |
| 5269 | coding->consumed = coding->src_bytes; | 5216 | coding->consumed = coding->src_bytes; |
| 5270 | if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r') | 5217 | if (eol_dos && coding->source[coding->src_bytes - 1] == '\r') |
| 5271 | { | 5218 | { |
| 5272 | coding->consumed_char--; | 5219 | coding->consumed_char--; |
| 5273 | coding->consumed--; | 5220 | coding->consumed--; |
| @@ -5285,7 +5232,7 @@ encode_coding_raw_text (struct coding_system *coding) | |||
| 5285 | int *charbuf_end = coding->charbuf + coding->charbuf_used; | 5232 | int *charbuf_end = coding->charbuf + coding->charbuf_used; |
| 5286 | unsigned char *dst = coding->destination + coding->produced; | 5233 | unsigned char *dst = coding->destination + coding->produced; |
| 5287 | unsigned char *dst_end = coding->destination + coding->dst_bytes; | 5234 | unsigned char *dst_end = coding->destination + coding->dst_bytes; |
| 5288 | int produced_chars = 0; | 5235 | EMACS_INT produced_chars = 0; |
| 5289 | int c; | 5236 | int c; |
| 5290 | 5237 | ||
| 5291 | if (multibytep) | 5238 | if (multibytep) |
| @@ -5309,11 +5256,12 @@ encode_coding_raw_text (struct coding_system *coding) | |||
| 5309 | unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str; | 5256 | unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str; |
| 5310 | 5257 | ||
| 5311 | CHAR_STRING_ADVANCE (c, p1); | 5258 | CHAR_STRING_ADVANCE (c, p1); |
| 5312 | while (p0 < p1) | 5259 | do |
| 5313 | { | 5260 | { |
| 5314 | EMIT_ONE_BYTE (*p0); | 5261 | EMIT_ONE_BYTE (*p0); |
| 5315 | p0++; | 5262 | p0++; |
| 5316 | } | 5263 | } |
| 5264 | while (p0 < p1); | ||
| 5317 | } | 5265 | } |
| 5318 | } | 5266 | } |
| 5319 | else | 5267 | else |
| @@ -5367,10 +5315,10 @@ detect_coding_charset (struct coding_system *coding, | |||
| 5367 | const unsigned char *src = coding->source, *src_base; | 5315 | const unsigned char *src = coding->source, *src_base; |
| 5368 | const unsigned char *src_end = coding->source + coding->src_bytes; | 5316 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 5369 | int multibytep = coding->src_multibyte; | 5317 | int multibytep = coding->src_multibyte; |
| 5370 | int consumed_chars = 0; | 5318 | EMACS_INT consumed_chars = 0; |
| 5371 | Lisp_Object attrs, valids, name; | 5319 | Lisp_Object attrs, valids, name; |
| 5372 | int found = 0; | 5320 | int found = 0; |
| 5373 | int head_ascii = coding->head_ascii; | 5321 | EMACS_INT head_ascii = coding->head_ascii; |
| 5374 | int check_latin_extra = 0; | 5322 | int check_latin_extra = 0; |
| 5375 | 5323 | ||
| 5376 | detect_info->checked |= CATEGORY_MASK_CHARSET; | 5324 | detect_info->checked |= CATEGORY_MASK_CHARSET; |
| @@ -5420,8 +5368,8 @@ detect_coding_charset (struct coding_system *coding, | |||
| 5420 | if (src == src_end) | 5368 | if (src == src_end) |
| 5421 | goto too_short; | 5369 | goto too_short; |
| 5422 | ONE_MORE_BYTE (c); | 5370 | ONE_MORE_BYTE (c); |
| 5423 | if (c < charset->code_space[(dim - 1 - idx) * 2] | 5371 | if (c < charset->code_space[(dim - 1 - idx) * 4] |
| 5424 | || c > charset->code_space[(dim - 1 - idx) * 2 + 1]) | 5372 | || c > charset->code_space[(dim - 1 - idx) * 4 + 1]) |
| 5425 | break; | 5373 | break; |
| 5426 | } | 5374 | } |
| 5427 | if (idx < dim) | 5375 | if (idx < dim) |
| @@ -5474,17 +5422,17 @@ decode_coding_charset (struct coding_system *coding) | |||
| 5474 | the end. */ | 5422 | the end. */ |
| 5475 | int *charbuf_end | 5423 | int *charbuf_end |
| 5476 | = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); | 5424 | = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); |
| 5477 | int consumed_chars = 0, consumed_chars_base; | 5425 | EMACS_INT consumed_chars = 0, consumed_chars_base; |
| 5478 | int multibytep = coding->src_multibyte; | 5426 | int multibytep = coding->src_multibyte; |
| 5479 | Lisp_Object attrs, charset_list, valids; | 5427 | Lisp_Object attrs = CODING_ID_ATTRS (coding->id); |
| 5480 | int char_offset = coding->produced_char; | 5428 | Lisp_Object valids; |
| 5481 | int last_offset = char_offset; | 5429 | EMACS_INT char_offset = coding->produced_char; |
| 5430 | EMACS_INT last_offset = char_offset; | ||
| 5482 | int last_id = charset_ascii; | 5431 | int last_id = charset_ascii; |
| 5483 | int eol_crlf = | 5432 | int eol_dos = |
| 5484 | !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | 5433 | !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); |
| 5485 | int byte_after_cr = -1; | 5434 | int byte_after_cr = -1; |
| 5486 | 5435 | ||
| 5487 | CODING_GET_INFO (coding, attrs, charset_list); | ||
| 5488 | valids = AREF (attrs, coding_attr_charset_valids); | 5436 | valids = AREF (attrs, coding_attr_charset_valids); |
| 5489 | 5437 | ||
| 5490 | while (1) | 5438 | while (1) |
| @@ -5514,7 +5462,7 @@ decode_coding_charset (struct coding_system *coding) | |||
| 5514 | else | 5462 | else |
| 5515 | { | 5463 | { |
| 5516 | ONE_MORE_BYTE (c); | 5464 | ONE_MORE_BYTE (c); |
| 5517 | if (eol_crlf && c == '\r') | 5465 | if (eol_dos && c == '\r') |
| 5518 | ONE_MORE_BYTE (byte_after_cr); | 5466 | ONE_MORE_BYTE (byte_after_cr); |
| 5519 | } | 5467 | } |
| 5520 | if (c < 0) | 5468 | if (c < 0) |
| @@ -5600,7 +5548,7 @@ encode_coding_charset (struct coding_system *coding) | |||
| 5600 | unsigned char *dst = coding->destination + coding->produced; | 5548 | unsigned char *dst = coding->destination + coding->produced; |
| 5601 | unsigned char *dst_end = coding->destination + coding->dst_bytes; | 5549 | unsigned char *dst_end = coding->destination + coding->dst_bytes; |
| 5602 | int safe_room = MAX_MULTIBYTE_LENGTH; | 5550 | int safe_room = MAX_MULTIBYTE_LENGTH; |
| 5603 | int produced_chars = 0; | 5551 | EMACS_INT produced_chars = 0; |
| 5604 | Lisp_Object attrs, charset_list; | 5552 | Lisp_Object attrs, charset_list; |
| 5605 | int ascii_compatible; | 5553 | int ascii_compatible; |
| 5606 | int c; | 5554 | int c; |
| @@ -6272,7 +6220,7 @@ adjust_coding_eol_type (struct coding_system *coding, int eol_seen) | |||
| 6272 | system is detected, update fields of CODING by the detected coding | 6220 | system is detected, update fields of CODING by the detected coding |
| 6273 | system. */ | 6221 | system. */ |
| 6274 | 6222 | ||
| 6275 | void | 6223 | static void |
| 6276 | detect_coding (struct coding_system *coding) | 6224 | detect_coding (struct coding_system *coding) |
| 6277 | { | 6225 | { |
| 6278 | const unsigned char *src, *src_end; | 6226 | const unsigned char *src, *src_end; |
| @@ -6508,7 +6456,7 @@ decode_eol (struct coding_system *coding) | |||
| 6508 | } | 6456 | } |
| 6509 | else if (EQ (eol_type, Qdos)) | 6457 | else if (EQ (eol_type, Qdos)) |
| 6510 | { | 6458 | { |
| 6511 | int n = 0; | 6459 | EMACS_INT n = 0; |
| 6512 | 6460 | ||
| 6513 | if (NILP (coding->dst_object)) | 6461 | if (NILP (coding->dst_object)) |
| 6514 | { | 6462 | { |
| @@ -6523,9 +6471,9 @@ decode_eol (struct coding_system *coding) | |||
| 6523 | } | 6471 | } |
| 6524 | else | 6472 | else |
| 6525 | { | 6473 | { |
| 6526 | int pos_byte = coding->dst_pos_byte; | 6474 | EMACS_INT pos_byte = coding->dst_pos_byte; |
| 6527 | int pos = coding->dst_pos; | 6475 | EMACS_INT pos = coding->dst_pos; |
| 6528 | int pos_end = pos + coding->produced_char - 1; | 6476 | EMACS_INT pos_end = pos + coding->produced_char - 1; |
| 6529 | 6477 | ||
| 6530 | while (pos < pos_end) | 6478 | while (pos < pos_end) |
| 6531 | { | 6479 | { |
| @@ -6607,15 +6555,15 @@ get_translation_table (Lisp_Object attrs, int encodep, int *max_lookup) | |||
| 6607 | } | 6555 | } |
| 6608 | else if (CONSP (translation_table)) | 6556 | else if (CONSP (translation_table)) |
| 6609 | { | 6557 | { |
| 6610 | Lisp_Object tail, val; | 6558 | Lisp_Object tail; |
| 6611 | 6559 | ||
| 6612 | for (tail = translation_table; CONSP (tail); tail = XCDR (tail)) | 6560 | for (tail = translation_table; CONSP (tail); tail = XCDR (tail)) |
| 6613 | if (CHAR_TABLE_P (XCAR (tail)) | 6561 | if (CHAR_TABLE_P (XCAR (tail)) |
| 6614 | && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1) | 6562 | && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1) |
| 6615 | { | 6563 | { |
| 6616 | val = XCHAR_TABLE (XCAR (tail))->extras[1]; | 6564 | Lisp_Object tailval = XCHAR_TABLE (XCAR (tail))->extras[1]; |
| 6617 | if (NATNUMP (val) && *max_lookup < XFASTINT (val)) | 6565 | if (NATNUMP (tailval) && *max_lookup < XFASTINT (tailval)) |
| 6618 | *max_lookup = XFASTINT (val); | 6566 | *max_lookup = XFASTINT (tailval); |
| 6619 | } | 6567 | } |
| 6620 | } | 6568 | } |
| 6621 | } | 6569 | } |
| @@ -6710,7 +6658,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table, | |||
| 6710 | 6658 | ||
| 6711 | if (c >= 0) | 6659 | if (c >= 0) |
| 6712 | { | 6660 | { |
| 6713 | int from_nchars = 1, to_nchars = 1; | 6661 | EMACS_INT from_nchars = 1, to_nchars = 1; |
| 6714 | Lisp_Object trans = Qnil; | 6662 | Lisp_Object trans = Qnil; |
| 6715 | 6663 | ||
| 6716 | LOOKUP_TRANSLATION_TABLE (translation_table, c, trans); | 6664 | LOOKUP_TRANSLATION_TABLE (translation_table, c, trans); |
| @@ -6881,7 +6829,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table, | |||
| 6881 | [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ] | 6829 | [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ] |
| 6882 | */ | 6830 | */ |
| 6883 | 6831 | ||
| 6884 | static INLINE void | 6832 | static inline void |
| 6885 | produce_composition (struct coding_system *coding, int *charbuf, EMACS_INT pos) | 6833 | produce_composition (struct coding_system *coding, int *charbuf, EMACS_INT pos) |
| 6886 | { | 6834 | { |
| 6887 | int len; | 6835 | int len; |
| @@ -6925,7 +6873,7 @@ produce_composition (struct coding_system *coding, int *charbuf, EMACS_INT pos) | |||
| 6925 | [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ] | 6873 | [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ] |
| 6926 | */ | 6874 | */ |
| 6927 | 6875 | ||
| 6928 | static INLINE void | 6876 | static inline void |
| 6929 | produce_charset (struct coding_system *coding, int *charbuf, EMACS_INT pos) | 6877 | produce_charset (struct coding_system *coding, int *charbuf, EMACS_INT pos) |
| 6930 | { | 6878 | { |
| 6931 | EMACS_INT from = pos - charbuf[2]; | 6879 | EMACS_INT from = pos - charbuf[2]; |
| @@ -7038,8 +6986,8 @@ decode_coding (struct coding_system *coding) | |||
| 7038 | set_buffer_internal (XBUFFER (coding->dst_object)); | 6986 | set_buffer_internal (XBUFFER (coding->dst_object)); |
| 7039 | if (GPT != PT) | 6987 | if (GPT != PT) |
| 7040 | move_gap_both (PT, PT_BYTE); | 6988 | move_gap_both (PT, PT_BYTE); |
| 7041 | undo_list = current_buffer->undo_list; | 6989 | undo_list = BVAR (current_buffer, undo_list); |
| 7042 | current_buffer->undo_list = Qt; | 6990 | BVAR (current_buffer, undo_list) = Qt; |
| 7043 | } | 6991 | } |
| 7044 | 6992 | ||
| 7045 | coding->consumed = coding->consumed_char = 0; | 6993 | coding->consumed = coding->consumed_char = 0; |
| @@ -7136,7 +7084,7 @@ decode_coding (struct coding_system *coding) | |||
| 7136 | decode_eol (coding); | 7084 | decode_eol (coding); |
| 7137 | if (BUFFERP (coding->dst_object)) | 7085 | if (BUFFERP (coding->dst_object)) |
| 7138 | { | 7086 | { |
| 7139 | current_buffer->undo_list = undo_list; | 7087 | BVAR (current_buffer, undo_list) = undo_list; |
| 7140 | record_insert (coding->dst_pos, coding->produced_char); | 7088 | record_insert (coding->dst_pos, coding->produced_char); |
| 7141 | } | 7089 | } |
| 7142 | return coding->result; | 7090 | return coding->result; |
| @@ -7153,7 +7101,7 @@ decode_coding (struct coding_system *coding) | |||
| 7153 | position of a composition after POS (if any) or to LIMIT, and | 7101 | position of a composition after POS (if any) or to LIMIT, and |
| 7154 | return BUF. */ | 7102 | return BUF. */ |
| 7155 | 7103 | ||
| 7156 | static INLINE int * | 7104 | static inline int * |
| 7157 | handle_composition_annotation (EMACS_INT pos, EMACS_INT limit, | 7105 | handle_composition_annotation (EMACS_INT pos, EMACS_INT limit, |
| 7158 | struct coding_system *coding, int *buf, | 7106 | struct coding_system *coding, int *buf, |
| 7159 | EMACS_INT *stop) | 7107 | EMACS_INT *stop) |
| @@ -7185,7 +7133,7 @@ handle_composition_annotation (EMACS_INT pos, EMACS_INT limit, | |||
| 7185 | components = COMPOSITION_COMPONENTS (prop); | 7133 | components = COMPOSITION_COMPONENTS (prop); |
| 7186 | if (VECTORP (components)) | 7134 | if (VECTORP (components)) |
| 7187 | { | 7135 | { |
| 7188 | len = XVECTOR (components)->size; | 7136 | len = ASIZE (components); |
| 7189 | for (i = 0; i < len; i++) | 7137 | for (i = 0; i < len; i++) |
| 7190 | *buf++ = XINT (AREF (components, i)); | 7138 | *buf++ = XINT (AREF (components, i)); |
| 7191 | } | 7139 | } |
| @@ -7236,7 +7184,7 @@ handle_composition_annotation (EMACS_INT pos, EMACS_INT limit, | |||
| 7236 | If the property value is nil, set *STOP to the position where the | 7184 | If the property value is nil, set *STOP to the position where the |
| 7237 | property value is non-nil (limiting by LIMIT), and return BUF. */ | 7185 | property value is non-nil (limiting by LIMIT), and return BUF. */ |
| 7238 | 7186 | ||
| 7239 | static INLINE int * | 7187 | static inline int * |
| 7240 | handle_charset_annotation (EMACS_INT pos, EMACS_INT limit, | 7188 | handle_charset_annotation (EMACS_INT pos, EMACS_INT limit, |
| 7241 | struct coding_system *coding, int *buf, | 7189 | struct coding_system *coding, int *buf, |
| 7242 | EMACS_INT *stop) | 7190 | EMACS_INT *stop) |
| @@ -7433,7 +7381,7 @@ encode_coding (struct coding_system *coding) | |||
| 7433 | { | 7381 | { |
| 7434 | set_buffer_internal (XBUFFER (coding->dst_object)); | 7382 | set_buffer_internal (XBUFFER (coding->dst_object)); |
| 7435 | coding->dst_multibyte | 7383 | coding->dst_multibyte |
| 7436 | = ! NILP (current_buffer->enable_multibyte_characters); | 7384 | = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); |
| 7437 | } | 7385 | } |
| 7438 | 7386 | ||
| 7439 | coding->consumed = coding->consumed_char = 0; | 7387 | coding->consumed = coding->consumed_char = 0; |
| @@ -7504,8 +7452,8 @@ make_conversion_work_buffer (int multibyte) | |||
| 7504 | doesn't compile new regexps. */ | 7452 | doesn't compile new regexps. */ |
| 7505 | Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt); | 7453 | Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt); |
| 7506 | Ferase_buffer (); | 7454 | Ferase_buffer (); |
| 7507 | current_buffer->undo_list = Qt; | 7455 | BVAR (current_buffer, undo_list) = Qt; |
| 7508 | current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil; | 7456 | BVAR (current_buffer, enable_multibyte_characters) = multibyte ? Qt : Qnil; |
| 7509 | set_buffer_internal (current); | 7457 | set_buffer_internal (current); |
| 7510 | return workbuf; | 7458 | return workbuf; |
| 7511 | } | 7459 | } |
| @@ -7562,7 +7510,7 @@ decode_coding_gap (struct coding_system *coding, | |||
| 7562 | coding->dst_object = coding->src_object; | 7510 | coding->dst_object = coding->src_object; |
| 7563 | coding->dst_pos = PT; | 7511 | coding->dst_pos = PT; |
| 7564 | coding->dst_pos_byte = PT_BYTE; | 7512 | coding->dst_pos_byte = PT_BYTE; |
| 7565 | coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters); | 7513 | coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); |
| 7566 | 7514 | ||
| 7567 | if (CODING_REQUIRE_DETECTION (coding)) | 7515 | if (CODING_REQUIRE_DETECTION (coding)) |
| 7568 | detect_coding (coding); | 7516 | detect_coding (coding); |
| @@ -7590,30 +7538,6 @@ decode_coding_gap (struct coding_system *coding, | |||
| 7590 | return coding->result; | 7538 | return coding->result; |
| 7591 | } | 7539 | } |
| 7592 | 7540 | ||
| 7593 | int | ||
| 7594 | encode_coding_gap (struct coding_system *coding, | ||
| 7595 | EMACS_INT chars, EMACS_INT bytes) | ||
| 7596 | { | ||
| 7597 | int count = SPECPDL_INDEX (); | ||
| 7598 | |||
| 7599 | code_conversion_save (0, 0); | ||
| 7600 | |||
| 7601 | coding->src_object = Fcurrent_buffer (); | ||
| 7602 | coding->src_chars = chars; | ||
| 7603 | coding->src_bytes = bytes; | ||
| 7604 | coding->src_pos = -chars; | ||
| 7605 | coding->src_pos_byte = -bytes; | ||
| 7606 | coding->src_multibyte = chars < bytes; | ||
| 7607 | coding->dst_object = coding->src_object; | ||
| 7608 | coding->dst_pos = PT; | ||
| 7609 | coding->dst_pos_byte = PT_BYTE; | ||
| 7610 | |||
| 7611 | encode_coding (coding); | ||
| 7612 | |||
| 7613 | unbind_to (count, Qnil); | ||
| 7614 | return coding->result; | ||
| 7615 | } | ||
| 7616 | |||
| 7617 | 7541 | ||
| 7618 | /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in | 7542 | /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in |
| 7619 | SRC_OBJECT into DST_OBJECT by coding context CODING. | 7543 | SRC_OBJECT into DST_OBJECT by coding context CODING. |
| @@ -7652,12 +7576,12 @@ decode_coding_object (struct coding_system *coding, | |||
| 7652 | Lisp_Object dst_object) | 7576 | Lisp_Object dst_object) |
| 7653 | { | 7577 | { |
| 7654 | int count = SPECPDL_INDEX (); | 7578 | int count = SPECPDL_INDEX (); |
| 7655 | unsigned char *destination; | 7579 | unsigned char *destination IF_LINT (= NULL); |
| 7656 | EMACS_INT dst_bytes; | 7580 | EMACS_INT dst_bytes IF_LINT (= 0); |
| 7657 | EMACS_INT chars = to - from; | 7581 | EMACS_INT chars = to - from; |
| 7658 | EMACS_INT bytes = to_byte - from_byte; | 7582 | EMACS_INT bytes = to_byte - from_byte; |
| 7659 | Lisp_Object attrs; | 7583 | Lisp_Object attrs; |
| 7660 | int saved_pt = -1, saved_pt_byte; | 7584 | int saved_pt = -1, saved_pt_byte IF_LINT (= 0); |
| 7661 | int need_marker_adjustment = 0; | 7585 | int need_marker_adjustment = 0; |
| 7662 | Lisp_Object old_deactivate_mark; | 7586 | Lisp_Object old_deactivate_mark; |
| 7663 | 7587 | ||
| @@ -7728,7 +7652,7 @@ decode_coding_object (struct coding_system *coding, | |||
| 7728 | coding->dst_pos = BUF_PT (XBUFFER (dst_object)); | 7652 | coding->dst_pos = BUF_PT (XBUFFER (dst_object)); |
| 7729 | coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object)); | 7653 | coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object)); |
| 7730 | coding->dst_multibyte | 7654 | coding->dst_multibyte |
| 7731 | = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters); | 7655 | = ! NILP (BVAR (XBUFFER (dst_object), enable_multibyte_characters)); |
| 7732 | } | 7656 | } |
| 7733 | else | 7657 | else |
| 7734 | { | 7658 | { |
| @@ -7798,7 +7722,7 @@ decode_coding_object (struct coding_system *coding, | |||
| 7798 | TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte); | 7722 | TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte); |
| 7799 | else if (saved_pt < from + chars) | 7723 | else if (saved_pt < from + chars) |
| 7800 | TEMP_SET_PT_BOTH (from, from_byte); | 7724 | TEMP_SET_PT_BOTH (from, from_byte); |
| 7801 | else if (! NILP (current_buffer->enable_multibyte_characters)) | 7725 | else if (! NILP (BVAR (current_buffer, enable_multibyte_characters))) |
| 7802 | TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars), | 7726 | TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars), |
| 7803 | saved_pt_byte + (coding->produced - bytes)); | 7727 | saved_pt_byte + (coding->produced - bytes)); |
| 7804 | else | 7728 | else |
| @@ -7822,7 +7746,7 @@ decode_coding_object (struct coding_system *coding, | |||
| 7822 | { | 7746 | { |
| 7823 | tail->bytepos = from_byte + coding->produced; | 7747 | tail->bytepos = from_byte + coding->produced; |
| 7824 | tail->charpos | 7748 | tail->charpos |
| 7825 | = (NILP (current_buffer->enable_multibyte_characters) | 7749 | = (NILP (BVAR (current_buffer, enable_multibyte_characters)) |
| 7826 | ? tail->bytepos : from + coding->produced_char); | 7750 | ? tail->bytepos : from + coding->produced_char); |
| 7827 | } | 7751 | } |
| 7828 | } | 7752 | } |
| @@ -7845,7 +7769,7 @@ encode_coding_object (struct coding_system *coding, | |||
| 7845 | EMACS_INT chars = to - from; | 7769 | EMACS_INT chars = to - from; |
| 7846 | EMACS_INT bytes = to_byte - from_byte; | 7770 | EMACS_INT bytes = to_byte - from_byte; |
| 7847 | Lisp_Object attrs; | 7771 | Lisp_Object attrs; |
| 7848 | int saved_pt = -1, saved_pt_byte; | 7772 | int saved_pt = -1, saved_pt_byte IF_LINT (= 0); |
| 7849 | int need_marker_adjustment = 0; | 7773 | int need_marker_adjustment = 0; |
| 7850 | int kill_src_buffer = 0; | 7774 | int kill_src_buffer = 0; |
| 7851 | Lisp_Object old_deactivate_mark; | 7775 | Lisp_Object old_deactivate_mark; |
| @@ -7880,7 +7804,7 @@ encode_coding_object (struct coding_system *coding, | |||
| 7880 | else if (BUFFERP (src_object)) | 7804 | else if (BUFFERP (src_object)) |
| 7881 | insert_from_buffer (XBUFFER (src_object), from, chars, 0); | 7805 | insert_from_buffer (XBUFFER (src_object), from, chars, 0); |
| 7882 | else | 7806 | else |
| 7883 | insert_1_both (coding->source + from, chars, bytes, 0, 0, 0); | 7807 | insert_1_both ((char *) coding->source + from, chars, bytes, 0, 0, 0); |
| 7884 | 7808 | ||
| 7885 | if (EQ (src_object, dst_object)) | 7809 | if (EQ (src_object, dst_object)) |
| 7886 | { | 7810 | { |
| @@ -7960,7 +7884,7 @@ encode_coding_object (struct coding_system *coding, | |||
| 7960 | set_buffer_temp (current); | 7884 | set_buffer_temp (current); |
| 7961 | } | 7885 | } |
| 7962 | coding->dst_multibyte | 7886 | coding->dst_multibyte |
| 7963 | = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters); | 7887 | = ! NILP (BVAR (XBUFFER (dst_object), enable_multibyte_characters)); |
| 7964 | } | 7888 | } |
| 7965 | else if (EQ (dst_object, Qt)) | 7889 | else if (EQ (dst_object, Qt)) |
| 7966 | { | 7890 | { |
| @@ -8003,7 +7927,7 @@ encode_coding_object (struct coding_system *coding, | |||
| 8003 | TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte); | 7927 | TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte); |
| 8004 | else if (saved_pt < from + chars) | 7928 | else if (saved_pt < from + chars) |
| 8005 | TEMP_SET_PT_BOTH (from, from_byte); | 7929 | TEMP_SET_PT_BOTH (from, from_byte); |
| 8006 | else if (! NILP (current_buffer->enable_multibyte_characters)) | 7930 | else if (! NILP (BVAR (current_buffer, enable_multibyte_characters))) |
| 8007 | TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars), | 7931 | TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars), |
| 8008 | saved_pt_byte + (coding->produced - bytes)); | 7932 | saved_pt_byte + (coding->produced - bytes)); |
| 8009 | else | 7933 | else |
| @@ -8027,7 +7951,7 @@ encode_coding_object (struct coding_system *coding, | |||
| 8027 | { | 7951 | { |
| 8028 | tail->bytepos = from_byte + coding->produced; | 7952 | tail->bytepos = from_byte + coding->produced; |
| 8029 | tail->charpos | 7953 | tail->charpos |
| 8030 | = (NILP (current_buffer->enable_multibyte_characters) | 7954 | = (NILP (BVAR (current_buffer, enable_multibyte_characters)) |
| 8031 | ? tail->bytepos : from + coding->produced_char); | 7955 | ? tail->bytepos : from + coding->produced_char); |
| 8032 | } | 7956 | } |
| 8033 | } | 7957 | } |
| @@ -8178,8 +8102,8 @@ detect_coding_system (const unsigned char *src, | |||
| 8178 | base_category = XINT (CODING_ATTR_CATEGORY (attrs)); | 8102 | base_category = XINT (CODING_ATTR_CATEGORY (attrs)); |
| 8179 | if (base_category == coding_category_undecided) | 8103 | if (base_category == coding_category_undecided) |
| 8180 | { | 8104 | { |
| 8181 | enum coding_category category; | 8105 | enum coding_category category IF_LINT (= 0); |
| 8182 | struct coding_system *this; | 8106 | struct coding_system *this IF_LINT (= NULL); |
| 8183 | int c, i; | 8107 | int c, i; |
| 8184 | 8108 | ||
| 8185 | /* Skip all ASCII bytes except for a few ISO2022 controls. */ | 8109 | /* Skip all ASCII bytes except for a few ISO2022 controls. */ |
| @@ -8481,8 +8405,8 @@ highest priority. */) | |||
| 8481 | return detect_coding_system (BYTE_POS_ADDR (from_byte), | 8405 | return detect_coding_system (BYTE_POS_ADDR (from_byte), |
| 8482 | to - from, to_byte - from_byte, | 8406 | to - from, to_byte - from_byte, |
| 8483 | !NILP (highest), | 8407 | !NILP (highest), |
| 8484 | !NILP (current_buffer | 8408 | !NILP (BVAR (current_buffer |
| 8485 | ->enable_multibyte_characters), | 8409 | , enable_multibyte_characters)), |
| 8486 | Qnil); | 8410 | Qnil); |
| 8487 | } | 8411 | } |
| 8488 | 8412 | ||
| @@ -8511,7 +8435,7 @@ highest priority. */) | |||
| 8511 | } | 8435 | } |
| 8512 | 8436 | ||
| 8513 | 8437 | ||
| 8514 | static INLINE int | 8438 | static inline int |
| 8515 | char_encodable_p (int c, Lisp_Object attrs) | 8439 | char_encodable_p (int c, Lisp_Object attrs) |
| 8516 | { | 8440 | { |
| 8517 | Lisp_Object tail; | 8441 | Lisp_Object tail; |
| @@ -8564,7 +8488,7 @@ DEFUN ("find-coding-systems-region-internal", | |||
| 8564 | CHECK_NUMBER_COERCE_MARKER (end); | 8488 | CHECK_NUMBER_COERCE_MARKER (end); |
| 8565 | if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end)) | 8489 | if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end)) |
| 8566 | args_out_of_range (start, end); | 8490 | args_out_of_range (start, end); |
| 8567 | if (NILP (current_buffer->enable_multibyte_characters)) | 8491 | if (NILP (BVAR (current_buffer, enable_multibyte_characters))) |
| 8568 | return Qt; | 8492 | return Qt; |
| 8569 | start_byte = CHAR_TO_BYTE (XINT (start)); | 8493 | start_byte = CHAR_TO_BYTE (XINT (start)); |
| 8570 | end_byte = CHAR_TO_BYTE (XINT (end)); | 8494 | end_byte = CHAR_TO_BYTE (XINT (end)); |
| @@ -8698,7 +8622,7 @@ to the string. */) | |||
| 8698 | validate_region (&start, &end); | 8622 | validate_region (&start, &end); |
| 8699 | from = XINT (start); | 8623 | from = XINT (start); |
| 8700 | to = XINT (end); | 8624 | to = XINT (end); |
| 8701 | if (NILP (current_buffer->enable_multibyte_characters) | 8625 | if (NILP (BVAR (current_buffer, enable_multibyte_characters)) |
| 8702 | || (ascii_compatible | 8626 | || (ascii_compatible |
| 8703 | && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from))))) | 8627 | && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from))))) |
| 8704 | return Qnil; | 8628 | return Qnil; |
| @@ -8814,7 +8738,7 @@ is nil. */) | |||
| 8814 | CHECK_NUMBER_COERCE_MARKER (end); | 8738 | CHECK_NUMBER_COERCE_MARKER (end); |
| 8815 | if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end)) | 8739 | if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end)) |
| 8816 | args_out_of_range (start, end); | 8740 | args_out_of_range (start, end); |
| 8817 | if (NILP (current_buffer->enable_multibyte_characters)) | 8741 | if (NILP (BVAR (current_buffer, enable_multibyte_characters))) |
| 8818 | return Qnil; | 8742 | return Qnil; |
| 8819 | start_byte = CHAR_TO_BYTE (XINT (start)); | 8743 | start_byte = CHAR_TO_BYTE (XINT (start)); |
| 8820 | end_byte = CHAR_TO_BYTE (XINT (end)); | 8744 | end_byte = CHAR_TO_BYTE (XINT (end)); |
| @@ -8894,7 +8818,7 @@ is nil. */) | |||
| 8894 | } | 8818 | } |
| 8895 | 8819 | ||
| 8896 | 8820 | ||
| 8897 | Lisp_Object | 8821 | static Lisp_Object |
| 8898 | code_convert_region (Lisp_Object start, Lisp_Object end, | 8822 | code_convert_region (Lisp_Object start, Lisp_Object end, |
| 8899 | Lisp_Object coding_system, Lisp_Object dst_object, | 8823 | Lisp_Object coding_system, Lisp_Object dst_object, |
| 8900 | int encodep, int norecord) | 8824 | int encodep, int norecord) |
| @@ -9087,14 +9011,15 @@ Return the corresponding character. */) | |||
| 9087 | { | 9011 | { |
| 9088 | Lisp_Object spec, attrs, val; | 9012 | Lisp_Object spec, attrs, val; |
| 9089 | struct charset *charset_roman, *charset_kanji, *charset_kana, *charset; | 9013 | struct charset *charset_roman, *charset_kanji, *charset_kana, *charset; |
| 9014 | EMACS_INT ch; | ||
| 9090 | int c; | 9015 | int c; |
| 9091 | 9016 | ||
| 9092 | CHECK_NATNUM (code); | 9017 | CHECK_NATNUM (code); |
| 9093 | c = XFASTINT (code); | 9018 | ch = XFASTINT (code); |
| 9094 | CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec); | 9019 | CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec); |
| 9095 | attrs = AREF (spec, 0); | 9020 | attrs = AREF (spec, 0); |
| 9096 | 9021 | ||
| 9097 | if (ASCII_BYTE_P (c) | 9022 | if (ASCII_BYTE_P (ch) |
| 9098 | && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs))) | 9023 | && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs))) |
| 9099 | return code; | 9024 | return code; |
| 9100 | 9025 | ||
| @@ -9103,26 +9028,31 @@ Return the corresponding character. */) | |||
| 9103 | charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); | 9028 | charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); |
| 9104 | charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))); | 9029 | charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))); |
| 9105 | 9030 | ||
| 9106 | if (c <= 0x7F) | 9031 | if (ch <= 0x7F) |
| 9107 | charset = charset_roman; | ||
| 9108 | else if (c >= 0xA0 && c < 0xDF) | ||
| 9109 | { | 9032 | { |
| 9033 | c = ch; | ||
| 9034 | charset = charset_roman; | ||
| 9035 | } | ||
| 9036 | else if (ch >= 0xA0 && ch < 0xDF) | ||
| 9037 | { | ||
| 9038 | c = ch - 0x80; | ||
| 9110 | charset = charset_kana; | 9039 | charset = charset_kana; |
| 9111 | c -= 0x80; | ||
| 9112 | } | 9040 | } |
| 9113 | else | 9041 | else |
| 9114 | { | 9042 | { |
| 9115 | int s1 = c >> 8, s2 = c & 0xFF; | 9043 | EMACS_INT c1 = ch >> 8; |
| 9044 | int c2 = ch & 0xFF; | ||
| 9116 | 9045 | ||
| 9117 | if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF | 9046 | if (c1 < 0x81 || (c1 > 0x9F && c1 < 0xE0) || c1 > 0xEF |
| 9118 | || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC) | 9047 | || c2 < 0x40 || c2 == 0x7F || c2 > 0xFC) |
| 9119 | error ("Invalid code: %d", code); | 9048 | error ("Invalid code: %"pI"d", ch); |
| 9049 | c = ch; | ||
| 9120 | SJIS_TO_JIS (c); | 9050 | SJIS_TO_JIS (c); |
| 9121 | charset = charset_kanji; | 9051 | charset = charset_kanji; |
| 9122 | } | 9052 | } |
| 9123 | c = DECODE_CHAR (charset, c); | 9053 | c = DECODE_CHAR (charset, c); |
| 9124 | if (c < 0) | 9054 | if (c < 0) |
| 9125 | error ("Invalid code: %d", code); | 9055 | error ("Invalid code: %"pI"d", ch); |
| 9126 | return make_number (c); | 9056 | return make_number (c); |
| 9127 | } | 9057 | } |
| 9128 | 9058 | ||
| @@ -9149,7 +9079,7 @@ Return the corresponding code in SJIS. */) | |||
| 9149 | charset_list = CODING_ATTR_CHARSET_LIST (attrs); | 9079 | charset_list = CODING_ATTR_CHARSET_LIST (attrs); |
| 9150 | charset = char_charset (c, charset_list, &code); | 9080 | charset = char_charset (c, charset_list, &code); |
| 9151 | if (code == CHARSET_INVALID_CODE (charset)) | 9081 | if (code == CHARSET_INVALID_CODE (charset)) |
| 9152 | error ("Can't encode by shift_jis encoding: %d", c); | 9082 | error ("Can't encode by shift_jis encoding: %c", c); |
| 9153 | JIS_TO_SJIS (code); | 9083 | JIS_TO_SJIS (code); |
| 9154 | 9084 | ||
| 9155 | return make_number (code); | 9085 | return make_number (code); |
| @@ -9162,14 +9092,15 @@ Return the corresponding character. */) | |||
| 9162 | { | 9092 | { |
| 9163 | Lisp_Object spec, attrs, val; | 9093 | Lisp_Object spec, attrs, val; |
| 9164 | struct charset *charset_roman, *charset_big5, *charset; | 9094 | struct charset *charset_roman, *charset_big5, *charset; |
| 9095 | EMACS_INT ch; | ||
| 9165 | int c; | 9096 | int c; |
| 9166 | 9097 | ||
| 9167 | CHECK_NATNUM (code); | 9098 | CHECK_NATNUM (code); |
| 9168 | c = XFASTINT (code); | 9099 | ch = XFASTINT (code); |
| 9169 | CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec); | 9100 | CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec); |
| 9170 | attrs = AREF (spec, 0); | 9101 | attrs = AREF (spec, 0); |
| 9171 | 9102 | ||
| 9172 | if (ASCII_BYTE_P (c) | 9103 | if (ASCII_BYTE_P (ch) |
| 9173 | && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs))) | 9104 | && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs))) |
| 9174 | return code; | 9105 | return code; |
| 9175 | 9106 | ||
| @@ -9177,19 +9108,24 @@ Return the corresponding character. */) | |||
| 9177 | charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); | 9108 | charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); |
| 9178 | charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val))); | 9109 | charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val))); |
| 9179 | 9110 | ||
| 9180 | if (c <= 0x7F) | 9111 | if (ch <= 0x7F) |
| 9181 | charset = charset_roman; | 9112 | { |
| 9113 | c = ch; | ||
| 9114 | charset = charset_roman; | ||
| 9115 | } | ||
| 9182 | else | 9116 | else |
| 9183 | { | 9117 | { |
| 9184 | int b1 = c >> 8, b2 = c & 0x7F; | 9118 | EMACS_INT b1 = ch >> 8; |
| 9119 | int b2 = ch & 0x7F; | ||
| 9185 | if (b1 < 0xA1 || b1 > 0xFE | 9120 | if (b1 < 0xA1 || b1 > 0xFE |
| 9186 | || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE) | 9121 | || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE) |
| 9187 | error ("Invalid code: %d", code); | 9122 | error ("Invalid code: %"pI"d", ch); |
| 9123 | c = ch; | ||
| 9188 | charset = charset_big5; | 9124 | charset = charset_big5; |
| 9189 | } | 9125 | } |
| 9190 | c = DECODE_CHAR (charset, (unsigned )c); | 9126 | c = DECODE_CHAR (charset, c); |
| 9191 | if (c < 0) | 9127 | if (c < 0) |
| 9192 | error ("Invalid code: %d", code); | 9128 | error ("Invalid code: %"pI"d", ch); |
| 9193 | return make_number (c); | 9129 | return make_number (c); |
| 9194 | } | 9130 | } |
| 9195 | 9131 | ||
| @@ -9214,7 +9150,7 @@ Return the corresponding character code in Big5. */) | |||
| 9214 | charset_list = CODING_ATTR_CHARSET_LIST (attrs); | 9150 | charset_list = CODING_ATTR_CHARSET_LIST (attrs); |
| 9215 | charset = char_charset (c, charset_list, &code); | 9151 | charset = char_charset (c, charset_list, &code); |
| 9216 | if (code == CHARSET_INVALID_CODE (charset)) | 9152 | if (code == CHARSET_INVALID_CODE (charset)) |
| 9217 | error ("Can't encode by Big5 encoding: %d", c); | 9153 | error ("Can't encode by Big5 encoding: %c", c); |
| 9218 | 9154 | ||
| 9219 | return make_number (code); | 9155 | return make_number (code); |
| 9220 | } | 9156 | } |
| @@ -9342,7 +9278,7 @@ function to call for FILENAME, that function should examine the | |||
| 9342 | contents of BUFFER instead of reading the file. | 9278 | contents of BUFFER instead of reading the file. |
| 9343 | 9279 | ||
| 9344 | usage: (find-operation-coding-system OPERATION ARGUMENTS...) */) | 9280 | usage: (find-operation-coding-system OPERATION ARGUMENTS...) */) |
| 9345 | (int nargs, Lisp_Object *args) | 9281 | (ptrdiff_t nargs, Lisp_Object *args) |
| 9346 | { | 9282 | { |
| 9347 | Lisp_Object operation, target_idx, target, val; | 9283 | Lisp_Object operation, target_idx, target, val; |
| 9348 | register Lisp_Object chain; | 9284 | register Lisp_Object chain; |
| @@ -9351,17 +9287,18 @@ usage: (find-operation-coding-system OPERATION ARGUMENTS...) */) | |||
| 9351 | error ("Too few arguments"); | 9287 | error ("Too few arguments"); |
| 9352 | operation = args[0]; | 9288 | operation = args[0]; |
| 9353 | if (!SYMBOLP (operation) | 9289 | if (!SYMBOLP (operation) |
| 9354 | || !INTEGERP (target_idx = Fget (operation, Qtarget_idx))) | 9290 | || !NATNUMP (target_idx = Fget (operation, Qtarget_idx))) |
| 9355 | error ("Invalid first argument"); | 9291 | error ("Invalid first argument"); |
| 9356 | if (nargs < 1 + XINT (target_idx)) | 9292 | if (nargs < 1 + XFASTINT (target_idx)) |
| 9357 | error ("Too few arguments for operation: %s", | 9293 | error ("Too few arguments for operation `%s'", |
| 9358 | SDATA (SYMBOL_NAME (operation))); | 9294 | SDATA (SYMBOL_NAME (operation))); |
| 9359 | target = args[XINT (target_idx) + 1]; | 9295 | target = args[XFASTINT (target_idx) + 1]; |
| 9360 | if (!(STRINGP (target) | 9296 | if (!(STRINGP (target) |
| 9361 | || (EQ (operation, Qinsert_file_contents) && CONSP (target) | 9297 | || (EQ (operation, Qinsert_file_contents) && CONSP (target) |
| 9362 | && STRINGP (XCAR (target)) && BUFFERP (XCDR (target))) | 9298 | && STRINGP (XCAR (target)) && BUFFERP (XCDR (target))) |
| 9363 | || (EQ (operation, Qopen_network_stream) && INTEGERP (target)))) | 9299 | || (EQ (operation, Qopen_network_stream) && INTEGERP (target)))) |
| 9364 | error ("Invalid %dth argument", XINT (target_idx) + 1); | 9300 | error ("Invalid argument %"pI"d of operation `%s'", |
| 9301 | XFASTINT (target_idx) + 1, SDATA (SYMBOL_NAME (operation))); | ||
| 9365 | if (CONSP (target)) | 9302 | if (CONSP (target)) |
| 9366 | target = XCAR (target); | 9303 | target = XCAR (target); |
| 9367 | 9304 | ||
| @@ -9418,9 +9355,9 @@ If multiple coding systems belong to the same category, | |||
| 9418 | all but the first one are ignored. | 9355 | all but the first one are ignored. |
| 9419 | 9356 | ||
| 9420 | usage: (set-coding-system-priority &rest coding-systems) */) | 9357 | usage: (set-coding-system-priority &rest coding-systems) */) |
| 9421 | (int nargs, Lisp_Object *args) | 9358 | (ptrdiff_t nargs, Lisp_Object *args) |
| 9422 | { | 9359 | { |
| 9423 | int i, j; | 9360 | ptrdiff_t i, j; |
| 9424 | int changed[coding_category_max]; | 9361 | int changed[coding_category_max]; |
| 9425 | enum coding_category priorities[coding_category_max]; | 9362 | enum coding_category priorities[coding_category_max]; |
| 9426 | 9363 | ||
| @@ -9463,7 +9400,7 @@ usage: (set-coding-system-priority &rest coding-systems) */) | |||
| 9463 | 9400 | ||
| 9464 | /* Update `coding-category-list'. */ | 9401 | /* Update `coding-category-list'. */ |
| 9465 | Vcoding_category_list = Qnil; | 9402 | Vcoding_category_list = Qnil; |
| 9466 | for (i = coding_category_max - 1; i >= 0; i--) | 9403 | for (i = coding_category_max; i-- > 0; ) |
| 9467 | Vcoding_category_list | 9404 | Vcoding_category_list |
| 9468 | = Fcons (AREF (Vcoding_category_table, priorities[i]), | 9405 | = Fcons (AREF (Vcoding_category_table, priorities[i]), |
| 9469 | Vcoding_category_list); | 9406 | Vcoding_category_list); |
| @@ -9524,7 +9461,7 @@ DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal, | |||
| 9524 | Sdefine_coding_system_internal, coding_arg_max, MANY, 0, | 9461 | Sdefine_coding_system_internal, coding_arg_max, MANY, 0, |
| 9525 | doc: /* For internal use only. | 9462 | doc: /* For internal use only. |
| 9526 | usage: (define-coding-system-internal ...) */) | 9463 | usage: (define-coding-system-internal ...) */) |
| 9527 | (int nargs, Lisp_Object *args) | 9464 | (ptrdiff_t nargs, Lisp_Object *args) |
| 9528 | { | 9465 | { |
| 9529 | Lisp_Object name; | 9466 | Lisp_Object name; |
| 9530 | Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */ | 9467 | Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */ |
| @@ -9798,7 +9735,6 @@ usage: (define-coding-system-internal ...) */) | |||
| 9798 | else if (EQ (coding_type, Qiso_2022)) | 9735 | else if (EQ (coding_type, Qiso_2022)) |
| 9799 | { | 9736 | { |
| 9800 | Lisp_Object initial, reg_usage, request, flags; | 9737 | Lisp_Object initial, reg_usage, request, flags; |
| 9801 | int i; | ||
| 9802 | 9738 | ||
| 9803 | if (nargs < coding_arg_iso2022_max) | 9739 | if (nargs < coding_arg_iso2022_max) |
| 9804 | goto short_args; | 9740 | goto short_args; |
| @@ -9830,15 +9766,15 @@ usage: (define-coding-system-internal ...) */) | |||
| 9830 | for (tail = request; ! NILP (tail); tail = Fcdr (tail)) | 9766 | for (tail = request; ! NILP (tail); tail = Fcdr (tail)) |
| 9831 | { | 9767 | { |
| 9832 | int id; | 9768 | int id; |
| 9833 | Lisp_Object tmp; | 9769 | Lisp_Object tmp1; |
| 9834 | 9770 | ||
| 9835 | val = Fcar (tail); | 9771 | val = Fcar (tail); |
| 9836 | CHECK_CONS (val); | 9772 | CHECK_CONS (val); |
| 9837 | tmp = XCAR (val); | 9773 | tmp1 = XCAR (val); |
| 9838 | CHECK_CHARSET_GET_ID (tmp, id); | 9774 | CHECK_CHARSET_GET_ID (tmp1, id); |
| 9839 | CHECK_NATNUM_CDR (val); | 9775 | CHECK_NATNUM_CDR (val); |
| 9840 | if (XINT (XCDR (val)) >= 4) | 9776 | if (XINT (XCDR (val)) >= 4) |
| 9841 | error ("Invalid graphic register number: %d", XINT (XCDR (val))); | 9777 | error ("Invalid graphic register number: %"pI"d", XINT (XCDR (val))); |
| 9842 | XSETCAR (val, make_number (id)); | 9778 | XSETCAR (val, make_number (id)); |
| 9843 | } | 9779 | } |
| 9844 | 9780 | ||