aboutsummaryrefslogtreecommitdiffstats
path: root/src/coding.c
diff options
context:
space:
mode:
authorJoakim Verona2011-06-16 00:22:07 +0200
committerJoakim Verona2011-06-16 00:22:07 +0200
commita7513ade3bc0fe79430d5541d88c9dcda0932bec (patch)
tree4383951ba698a11e9f8933a9d8c72e00aa872a10 /src/coding.c
parent4bd51ad5c3445b644dfb017d5b57b10a90aa325f (diff)
parent4bba86e6210a74326e843a8fdc8409127105e1fe (diff)
downloademacs-a7513ade3bc0fe79430d5541d88c9dcda0932bec.tar.gz
emacs-a7513ade3bc0fe79430d5541d88c9dcda0932bec.zip
merge from upstream
Diffstat (limited to 'src/coding.c')
-rw-r--r--src/coding.c726
1 files changed, 331 insertions, 395 deletions
diff --git a/src/coding.c b/src/coding.c
index 3a3ba11ee9d..04985ab3c74 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -55,8 +55,8 @@ CODING SYSTEM
55 character sequence of emacs-utf-8 to a byte sequence of a specific 55 character sequence of emacs-utf-8 to a byte sequence of a specific
56 coding system. 56 coding system.
57 57
58 In Emacs Lisp, a coding system is represented by a Lisp symbol. In 58 In Emacs Lisp, a coding system is represented by a Lisp symbol. On
59 C level, a coding system is represented by a vector of attributes 59 the C level, a coding system is represented by a vector of attributes
60 stored in the hash table Vcharset_hash_table. The conversion from 60 stored in the hash table Vcharset_hash_table. The conversion from
61 coding system symbol to attributes vector is done by looking up 61 coding system symbol to attributes vector is done by looking up
62 Vcharset_hash_table by the symbol. 62 Vcharset_hash_table by the symbol.
@@ -159,7 +159,7 @@ detect_coding_XXX (struct coding_system *coding,
159 const unsigned char *src = coding->source; 159 const unsigned char *src = coding->source;
160 const unsigned char *src_end = coding->source + coding->src_bytes; 160 const unsigned char *src_end = coding->source + coding->src_bytes;
161 int multibytep = coding->src_multibyte; 161 int multibytep = coding->src_multibyte;
162 int consumed_chars = 0; 162 EMACS_INT consumed_chars = 0;
163 int found = 0; 163 int found = 0;
164 ...; 164 ...;
165 165
@@ -266,7 +266,7 @@ encode_coding_XXX (struct coding_system *coding)
266 unsigned char *dst = coding->destination + coding->produced; 266 unsigned char *dst = coding->destination + coding->produced;
267 unsigned char *dst_end = coding->destination + coding->dst_bytes; 267 unsigned char *dst_end = coding->destination + coding->dst_bytes;
268 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_; 268 unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
269 int produced_chars = 0; 269 EMACS_INT produced_chars = 0;
270 270
271 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++) 271 for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
272 { 272 {
@@ -300,27 +300,30 @@ encode_coding_XXX (struct coding_system *coding)
300 300
301Lisp_Object Vcoding_system_hash_table; 301Lisp_Object Vcoding_system_hash_table;
302 302
303Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type; 303static Lisp_Object Qcoding_system, Qeol_type;
304static Lisp_Object Qcoding_aliases;
304Lisp_Object Qunix, Qdos; 305Lisp_Object Qunix, Qdos;
305Lisp_Object Qbuffer_file_coding_system; 306Lisp_Object Qbuffer_file_coding_system;
306Lisp_Object Qpost_read_conversion, Qpre_write_conversion; 307static Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
307Lisp_Object Qdefault_char; 308static Lisp_Object Qdefault_char;
308Lisp_Object Qno_conversion, Qundecided; 309Lisp_Object Qno_conversion, Qundecided;
309Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5; 310Lisp_Object Qcharset, Qutf_8;
310Lisp_Object Qbig, Qlittle; 311static Lisp_Object Qiso_2022;
311Lisp_Object Qcoding_system_history; 312static Lisp_Object Qutf_16, Qshift_jis, Qbig5;
312Lisp_Object Qvalid_codes; 313static Lisp_Object Qbig, Qlittle;
313Lisp_Object QCcategory, QCmnemonic, QCdefault_char; 314static Lisp_Object Qcoding_system_history;
314Lisp_Object QCdecode_translation_table, QCencode_translation_table; 315static Lisp_Object Qvalid_codes;
315Lisp_Object QCpost_read_conversion, QCpre_write_conversion; 316static Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
316Lisp_Object QCascii_compatible_p; 317static Lisp_Object QCdecode_translation_table, QCencode_translation_table;
318static Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
319static Lisp_Object QCascii_compatible_p;
317 320
318Lisp_Object Qcall_process, Qcall_process_region; 321Lisp_Object Qcall_process, Qcall_process_region;
319Lisp_Object Qstart_process, Qopen_network_stream; 322Lisp_Object Qstart_process, Qopen_network_stream;
320Lisp_Object Qtarget_idx; 323static Lisp_Object Qtarget_idx;
321 324
322Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source; 325static Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
323Lisp_Object Qinterrupted, Qinsufficient_memory; 326static Lisp_Object Qinterrupted, Qinsufficient_memory;
324 327
325/* If a symbol has this property, evaluate the value to define the 328/* If a symbol has this property, evaluate the value to define the
326 symbol as a coding system. */ 329 symbol as a coding system. */
@@ -351,12 +354,12 @@ struct coding_system safe_terminal_coding;
351 354
352Lisp_Object Qtranslation_table; 355Lisp_Object Qtranslation_table;
353Lisp_Object Qtranslation_table_id; 356Lisp_Object Qtranslation_table_id;
354Lisp_Object Qtranslation_table_for_decode; 357static Lisp_Object Qtranslation_table_for_decode;
355Lisp_Object Qtranslation_table_for_encode; 358static Lisp_Object Qtranslation_table_for_encode;
356 359
357/* Two special coding systems. */ 360/* Two special coding systems. */
358Lisp_Object Vsjis_coding_system; 361static Lisp_Object Vsjis_coding_system;
359Lisp_Object Vbig5_coding_system; 362static Lisp_Object Vbig5_coding_system;
360 363
361/* ISO2022 section */ 364/* ISO2022 section */
362 365
@@ -395,8 +398,6 @@ Lisp_Object Vbig5_coding_system;
395 398
396/* Control characters of ISO2022. */ 399/* Control characters of ISO2022. */
397 /* code */ /* function */ 400 /* code */ /* function */
398#define ISO_CODE_LF 0x0A /* line-feed */
399#define ISO_CODE_CR 0x0D /* carriage-return */
400#define ISO_CODE_SO 0x0E /* shift-out */ 401#define ISO_CODE_SO 0x0E /* shift-out */
401#define ISO_CODE_SI 0x0F /* shift-in */ 402#define ISO_CODE_SI 0x0F /* shift-in */
402#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */ 403#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
@@ -479,7 +480,7 @@ enum iso_code_class_type
479 480
480#define CODING_ISO_FLAG_COMPOSITION 0x2000 481#define CODING_ISO_FLAG_COMPOSITION 0x2000
481 482
482#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000 483/* #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000 */
483 484
484#define CODING_ISO_FLAG_USE_ROMAN 0x8000 485#define CODING_ISO_FLAG_USE_ROMAN 0x8000
485 486
@@ -721,25 +722,6 @@ static struct coding_system coding_categories[coding_category_max];
721 } while (0) 722 } while (0)
722 723
723 724
724#define ONE_MORE_BYTE_NO_CHECK(c) \
725 do { \
726 c = *src++; \
727 if (multibytep && (c & 0x80)) \
728 { \
729 if ((c & 0xFE) == 0xC0) \
730 c = ((c & 1) << 6) | *src++; \
731 else \
732 { \
733 src--; \
734 c = - string_char (src, &src, NULL); \
735 record_conversion_result \
736 (coding, CODING_RESULT_INVALID_SRC); \
737 } \
738 } \
739 consumed_chars++; \
740 } while (0)
741
742
743/* Store a byte C in the place pointed by DST and increment DST to the 725/* Store a byte C in the place pointed by DST and increment DST to the
744 next free point, and increment PRODUCED_CHARS. The caller should 726 next free point, and increment PRODUCED_CHARS. The caller should
745 assure that C is 0..127, and declare and set the variable `dst' 727 assure that C is 0..127, and declare and set the variable `dst'
@@ -774,7 +756,7 @@ static struct coding_system coding_categories[coding_category_max];
774 produced_chars++; \ 756 produced_chars++; \
775 if (multibytep) \ 757 if (multibytep) \
776 { \ 758 { \
777 int ch = (c); \ 759 unsigned ch = (c); \
778 if (ch >= 0x80) \ 760 if (ch >= 0x80) \
779 ch = BYTE8_TO_CHAR (ch); \ 761 ch = BYTE8_TO_CHAR (ch); \
780 CHAR_STRING_ADVANCE (ch, dst); \ 762 CHAR_STRING_ADVANCE (ch, dst); \
@@ -791,7 +773,7 @@ static struct coding_system coding_categories[coding_category_max];
791 produced_chars += 2; \ 773 produced_chars += 2; \
792 if (multibytep) \ 774 if (multibytep) \
793 { \ 775 { \
794 int ch; \ 776 unsigned ch; \
795 \ 777 \
796 ch = (c1); \ 778 ch = (c1); \
797 if (ch >= 0x80) \ 779 if (ch >= 0x80) \
@@ -874,8 +856,7 @@ static unsigned char *alloc_destination (struct coding_system *,
874 EMACS_INT, unsigned char *); 856 EMACS_INT, unsigned char *);
875static void setup_iso_safe_charsets (Lisp_Object); 857static void setup_iso_safe_charsets (Lisp_Object);
876static unsigned char *encode_designation_at_bol (struct coding_system *, 858static unsigned char *encode_designation_at_bol (struct coding_system *,
877 int *, int *, 859 int *, unsigned char *);
878 unsigned char *);
879static int detect_eol (const unsigned char *, 860static int detect_eol (const unsigned char *,
880 EMACS_INT, enum coding_category); 861 EMACS_INT, enum coding_category);
881static Lisp_Object adjust_coding_eol_type (struct coding_system *, int); 862static Lisp_Object adjust_coding_eol_type (struct coding_system *, int);
@@ -883,21 +864,21 @@ static void decode_eol (struct coding_system *);
883static Lisp_Object get_translation_table (Lisp_Object, int, int *); 864static Lisp_Object get_translation_table (Lisp_Object, int, int *);
884static Lisp_Object get_translation (Lisp_Object, int *, int *); 865static Lisp_Object get_translation (Lisp_Object, int *, int *);
885static int produce_chars (struct coding_system *, Lisp_Object, int); 866static int produce_chars (struct coding_system *, Lisp_Object, int);
886static INLINE void produce_charset (struct coding_system *, int *, 867static inline void produce_charset (struct coding_system *, int *,
887 EMACS_INT); 868 EMACS_INT);
888static void produce_annotation (struct coding_system *, EMACS_INT); 869static void produce_annotation (struct coding_system *, EMACS_INT);
889static int decode_coding (struct coding_system *); 870static int decode_coding (struct coding_system *);
890static INLINE int *handle_composition_annotation (EMACS_INT, EMACS_INT, 871static inline int *handle_composition_annotation (EMACS_INT, EMACS_INT,
891 struct coding_system *, 872 struct coding_system *,
892 int *, EMACS_INT *); 873 int *, EMACS_INT *);
893static INLINE int *handle_charset_annotation (EMACS_INT, EMACS_INT, 874static inline int *handle_charset_annotation (EMACS_INT, EMACS_INT,
894 struct coding_system *, 875 struct coding_system *,
895 int *, EMACS_INT *); 876 int *, EMACS_INT *);
896static void consume_chars (struct coding_system *, Lisp_Object, int); 877static void consume_chars (struct coding_system *, Lisp_Object, int);
897static int encode_coding (struct coding_system *); 878static int encode_coding (struct coding_system *);
898static Lisp_Object make_conversion_work_buffer (int); 879static Lisp_Object make_conversion_work_buffer (int);
899static Lisp_Object code_conversion_restore (Lisp_Object); 880static Lisp_Object code_conversion_restore (Lisp_Object);
900static INLINE int char_encodable_p (int, Lisp_Object); 881static inline int char_encodable_p (int, Lisp_Object);
901static Lisp_Object make_subsidiaries (Lisp_Object); 882static Lisp_Object make_subsidiaries (Lisp_Object);
902 883
903static void 884static void
@@ -965,7 +946,7 @@ record_conversion_result (struct coding_system *coding,
965 do { \ 946 do { \
966 if (dst + (bytes) >= dst_end) \ 947 if (dst + (bytes) >= dst_end) \
967 { \ 948 { \
968 int more_bytes = charbuf_end - charbuf + (bytes); \ 949 EMACS_INT more_bytes = charbuf_end - charbuf + (bytes); \
969 \ 950 \
970 dst = alloc_destination (coding, more_bytes, dst); \ 951 dst = alloc_destination (coding, more_bytes, dst); \
971 dst_end = coding->destination + coding->dst_bytes; \ 952 dst_end = coding->destination + coding->dst_bytes; \
@@ -1051,9 +1032,10 @@ coding_set_source (struct coding_system *coding)
1051 coding->source = SDATA (coding->src_object) + coding->src_pos_byte; 1032 coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1052 } 1033 }
1053 else 1034 else
1054 /* Otherwise, the source is C string and is never relocated 1035 {
1055 automatically. Thus we don't have to update anything. */ 1036 /* Otherwise, the source is C string and is never relocated
1056 ; 1037 automatically. Thus we don't have to update anything. */
1038 }
1057} 1039}
1058 1040
1059static void 1041static void
@@ -1079,15 +1061,18 @@ coding_set_destination (struct coding_system *coding)
1079 } 1061 }
1080 } 1062 }
1081 else 1063 else
1082 /* Otherwise, the destination is C string and is never relocated 1064 {
1083 automatically. Thus we don't have to update anything. */ 1065 /* Otherwise, the destination is C string and is never relocated
1084 ; 1066 automatically. Thus we don't have to update anything. */
1067 }
1085} 1068}
1086 1069
1087 1070
1088static void 1071static void
1089coding_alloc_by_realloc (struct coding_system *coding, EMACS_INT bytes) 1072coding_alloc_by_realloc (struct coding_system *coding, EMACS_INT bytes)
1090{ 1073{
1074 if (STRING_BYTES_BOUND - coding->dst_bytes < bytes)
1075 string_overflow ();
1091 coding->destination = (unsigned char *) xrealloc (coding->destination, 1076 coding->destination = (unsigned char *) xrealloc (coding->destination,
1092 coding->dst_bytes + bytes); 1077 coding->dst_bytes + bytes);
1093 coding->dst_bytes += bytes; 1078 coding->dst_bytes += bytes;
@@ -1217,7 +1202,6 @@ alloc_destination (struct coding_system *coding, EMACS_INT nbytes,
1217#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0) 1202#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1218#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8) 1203#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1219 1204
1220#define UTF_BOM 0xFEFF
1221#define UTF_8_BOM_1 0xEF 1205#define UTF_8_BOM_1 0xEF
1222#define UTF_8_BOM_2 0xBB 1206#define UTF_8_BOM_2 0xBB
1223#define UTF_8_BOM_3 0xBF 1207#define UTF_8_BOM_3 0xBF
@@ -1229,7 +1213,7 @@ detect_coding_utf_8 (struct coding_system *coding,
1229 const unsigned char *src = coding->source, *src_base; 1213 const unsigned char *src = coding->source, *src_base;
1230 const unsigned char *src_end = coding->source + coding->src_bytes; 1214 const unsigned char *src_end = coding->source + coding->src_bytes;
1231 int multibytep = coding->src_multibyte; 1215 int multibytep = coding->src_multibyte;
1232 int consumed_chars = 0; 1216 EMACS_INT consumed_chars = 0;
1233 int bom_found = 0; 1217 int bom_found = 0;
1234 int found = 0; 1218 int found = 0;
1235 1219
@@ -1314,16 +1298,13 @@ decode_coding_utf_8 (struct coding_system *coding)
1314 const unsigned char *src_base; 1298 const unsigned char *src_base;
1315 int *charbuf = coding->charbuf + coding->charbuf_used; 1299 int *charbuf = coding->charbuf + coding->charbuf_used;
1316 int *charbuf_end = coding->charbuf + coding->charbuf_size; 1300 int *charbuf_end = coding->charbuf + coding->charbuf_size;
1317 int consumed_chars = 0, consumed_chars_base = 0; 1301 EMACS_INT consumed_chars = 0, consumed_chars_base = 0;
1318 int multibytep = coding->src_multibyte; 1302 int multibytep = coding->src_multibyte;
1319 enum utf_bom_type bom = CODING_UTF_8_BOM (coding); 1303 enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1320 Lisp_Object attr, charset_list; 1304 int eol_dos =
1321 int eol_crlf =
1322 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 1305 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1323 int byte_after_cr = -1; 1306 int byte_after_cr = -1;
1324 1307
1325 CODING_GET_INFO (coding, attr, charset_list);
1326
1327 if (bom != utf_without_bom) 1308 if (bom != utf_without_bom)
1328 { 1309 {
1329 int c1, c2, c3; 1310 int c1, c2, c3;
@@ -1379,7 +1360,7 @@ decode_coding_utf_8 (struct coding_system *coding)
1379 } 1360 }
1380 else if (UTF_8_1_OCTET_P (c1)) 1361 else if (UTF_8_1_OCTET_P (c1))
1381 { 1362 {
1382 if (eol_crlf && c1 == '\r') 1363 if (eol_dos && c1 == '\r')
1383 ONE_MORE_BYTE (byte_after_cr); 1364 ONE_MORE_BYTE (byte_after_cr);
1384 c = c1; 1365 c = c1;
1385 } 1366 }
@@ -1468,7 +1449,7 @@ encode_coding_utf_8 (struct coding_system *coding)
1468 int *charbuf_end = charbuf + coding->charbuf_used; 1449 int *charbuf_end = charbuf + coding->charbuf_used;
1469 unsigned char *dst = coding->destination + coding->produced; 1450 unsigned char *dst = coding->destination + coding->produced;
1470 unsigned char *dst_end = coding->destination + coding->dst_bytes; 1451 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1471 int produced_chars = 0; 1452 EMACS_INT produced_chars = 0;
1472 int c; 1453 int c;
1473 1454
1474 if (CODING_UTF_8_BOM (coding) == utf_with_bom) 1455 if (CODING_UTF_8_BOM (coding) == utf_with_bom)
@@ -1533,11 +1514,6 @@ encode_coding_utf_8 (struct coding_system *coding)
1533#define UTF_16_LOW_SURROGATE_P(val) \ 1514#define UTF_16_LOW_SURROGATE_P(val) \
1534 (((val) & 0xFC00) == 0xDC00) 1515 (((val) & 0xFC00) == 0xDC00)
1535 1516
1536#define UTF_16_INVALID_P(val) \
1537 (((val) == 0xFFFE) \
1538 || ((val) == 0xFFFF) \
1539 || UTF_16_LOW_SURROGATE_P (val))
1540
1541 1517
1542static int 1518static int
1543detect_coding_utf_16 (struct coding_system *coding, 1519detect_coding_utf_16 (struct coding_system *coding,
@@ -1631,18 +1607,15 @@ decode_coding_utf_16 (struct coding_system *coding)
1631 int *charbuf = coding->charbuf + coding->charbuf_used; 1607 int *charbuf = coding->charbuf + coding->charbuf_used;
1632 /* We may produces at most 3 chars in one loop. */ 1608 /* We may produces at most 3 chars in one loop. */
1633 int *charbuf_end = coding->charbuf + coding->charbuf_size - 2; 1609 int *charbuf_end = coding->charbuf + coding->charbuf_size - 2;
1634 int consumed_chars = 0, consumed_chars_base = 0; 1610 EMACS_INT consumed_chars = 0, consumed_chars_base = 0;
1635 int multibytep = coding->src_multibyte; 1611 int multibytep = coding->src_multibyte;
1636 enum utf_bom_type bom = CODING_UTF_16_BOM (coding); 1612 enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1637 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding); 1613 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1638 int surrogate = CODING_UTF_16_SURROGATE (coding); 1614 int surrogate = CODING_UTF_16_SURROGATE (coding);
1639 Lisp_Object attr, charset_list; 1615 int eol_dos =
1640 int eol_crlf =
1641 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 1616 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1642 int byte_after_cr1 = -1, byte_after_cr2 = -1; 1617 int byte_after_cr1 = -1, byte_after_cr2 = -1;
1643 1618
1644 CODING_GET_INFO (coding, attr, charset_list);
1645
1646 if (bom == utf_with_bom) 1619 if (bom == utf_with_bom)
1647 { 1620 {
1648 int c, c1, c2; 1621 int c, c1, c2;
@@ -1734,7 +1707,7 @@ decode_coding_utf_16 (struct coding_system *coding)
1734 CODING_UTF_16_SURROGATE (coding) = surrogate = c; 1707 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1735 else 1708 else
1736 { 1709 {
1737 if (eol_crlf && c == '\r') 1710 if (eol_dos && c == '\r')
1738 { 1711 {
1739 ONE_MORE_BYTE (byte_after_cr1); 1712 ONE_MORE_BYTE (byte_after_cr1);
1740 ONE_MORE_BYTE (byte_after_cr2); 1713 ONE_MORE_BYTE (byte_after_cr2);
@@ -1761,12 +1734,9 @@ encode_coding_utf_16 (struct coding_system *coding)
1761 int safe_room = 8; 1734 int safe_room = 8;
1762 enum utf_bom_type bom = CODING_UTF_16_BOM (coding); 1735 enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1763 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian; 1736 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1764 int produced_chars = 0; 1737 EMACS_INT produced_chars = 0;
1765 Lisp_Object attrs, charset_list;
1766 int c; 1738 int c;
1767 1739
1768 CODING_GET_INFO (coding, attrs, charset_list);
1769
1770 if (bom != utf_without_bom) 1740 if (bom != utf_without_bom)
1771 { 1741 {
1772 ASSURE_DESTINATION (safe_room); 1742 ASSURE_DESTINATION (safe_room);
@@ -1898,7 +1868,7 @@ detect_coding_emacs_mule (struct coding_system *coding,
1898 const unsigned char *src = coding->source, *src_base; 1868 const unsigned char *src = coding->source, *src_base;
1899 const unsigned char *src_end = coding->source + coding->src_bytes; 1869 const unsigned char *src_end = coding->source + coding->src_bytes;
1900 int multibytep = coding->src_multibyte; 1870 int multibytep = coding->src_multibyte;
1901 int consumed_chars = 0; 1871 EMACS_INT consumed_chars = 0;
1902 int c; 1872 int c;
1903 int found = 0; 1873 int found = 0;
1904 1874
@@ -1918,17 +1888,17 @@ detect_coding_emacs_mule (struct coding_system *coding,
1918 it because analyzing it is too heavy for detecting. But, 1888 it because analyzing it is too heavy for detecting. But,
1919 at least, we check that the composite character 1889 at least, we check that the composite character
1920 constitutes of more than 4 bytes. */ 1890 constitutes of more than 4 bytes. */
1921 const unsigned char *src_base; 1891 const unsigned char *src_start;
1922 1892
1923 repeat: 1893 repeat:
1924 src_base = src; 1894 src_start = src;
1925 do 1895 do
1926 { 1896 {
1927 ONE_MORE_BYTE (c); 1897 ONE_MORE_BYTE (c);
1928 } 1898 }
1929 while (c >= 0xA0); 1899 while (c >= 0xA0);
1930 1900
1931 if (src - src_base <= 4) 1901 if (src - src_start <= 4)
1932 break; 1902 break;
1933 found = CATEGORY_MASK_EMACS_MULE; 1903 found = CATEGORY_MASK_EMACS_MULE;
1934 if (c == 0x80) 1904 if (c == 0x80)
@@ -1980,7 +1950,7 @@ detect_coding_emacs_mule (struct coding_system *coding,
1980 the decoded character or rule. If an invalid byte is found, return 1950 the decoded character or rule. If an invalid byte is found, return
1981 -1. If SRC is too short, return -2. */ 1951 -1. If SRC is too short, return -2. */
1982 1952
1983int 1953static int
1984emacs_mule_char (struct coding_system *coding, const unsigned char *src, 1954emacs_mule_char (struct coding_system *coding, const unsigned char *src,
1985 int *nbytes, int *nchars, int *id, 1955 int *nbytes, int *nchars, int *id,
1986 struct composition_status *cmp_status) 1956 struct composition_status *cmp_status)
@@ -1988,7 +1958,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src,
1988 const unsigned char *src_end = coding->source + coding->src_bytes; 1958 const unsigned char *src_end = coding->source + coding->src_bytes;
1989 const unsigned char *src_base = src; 1959 const unsigned char *src_base = src;
1990 int multibytep = coding->src_multibyte; 1960 int multibytep = coding->src_multibyte;
1991 int charset_id; 1961 int charset_ID;
1992 unsigned code; 1962 unsigned code;
1993 int c; 1963 int c;
1994 int consumed_chars = 0; 1964 int consumed_chars = 0;
@@ -1998,7 +1968,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src,
1998 if (c < 0) 1968 if (c < 0)
1999 { 1969 {
2000 c = -c; 1970 c = -c;
2001 charset_id = emacs_mule_charset[0]; 1971 charset_ID = emacs_mule_charset[0];
2002 } 1972 }
2003 else 1973 else
2004 { 1974 {
@@ -2034,7 +2004,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src,
2034 switch (emacs_mule_bytes[c]) 2004 switch (emacs_mule_bytes[c])
2035 { 2005 {
2036 case 2: 2006 case 2:
2037 if ((charset_id = emacs_mule_charset[c]) < 0) 2007 if ((charset_ID = emacs_mule_charset[c]) < 0)
2038 goto invalid_code; 2008 goto invalid_code;
2039 ONE_MORE_BYTE (c); 2009 ONE_MORE_BYTE (c);
2040 if (c < 0xA0) 2010 if (c < 0xA0)
@@ -2047,7 +2017,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src,
2047 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12) 2017 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
2048 { 2018 {
2049 ONE_MORE_BYTE (c); 2019 ONE_MORE_BYTE (c);
2050 if (c < 0xA0 || (charset_id = emacs_mule_charset[c]) < 0) 2020 if (c < 0xA0 || (charset_ID = emacs_mule_charset[c]) < 0)
2051 goto invalid_code; 2021 goto invalid_code;
2052 ONE_MORE_BYTE (c); 2022 ONE_MORE_BYTE (c);
2053 if (c < 0xA0) 2023 if (c < 0xA0)
@@ -2056,7 +2026,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src,
2056 } 2026 }
2057 else 2027 else
2058 { 2028 {
2059 if ((charset_id = emacs_mule_charset[c]) < 0) 2029 if ((charset_ID = emacs_mule_charset[c]) < 0)
2060 goto invalid_code; 2030 goto invalid_code;
2061 ONE_MORE_BYTE (c); 2031 ONE_MORE_BYTE (c);
2062 if (c < 0xA0) 2032 if (c < 0xA0)
@@ -2071,7 +2041,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src,
2071 2041
2072 case 4: 2042 case 4:
2073 ONE_MORE_BYTE (c); 2043 ONE_MORE_BYTE (c);
2074 if (c < 0 || (charset_id = emacs_mule_charset[c]) < 0) 2044 if (c < 0 || (charset_ID = emacs_mule_charset[c]) < 0)
2075 goto invalid_code; 2045 goto invalid_code;
2076 ONE_MORE_BYTE (c); 2046 ONE_MORE_BYTE (c);
2077 if (c < 0xA0) 2047 if (c < 0xA0)
@@ -2085,21 +2055,21 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src,
2085 2055
2086 case 1: 2056 case 1:
2087 code = c; 2057 code = c;
2088 charset_id = ASCII_BYTE_P (code) ? charset_ascii : charset_eight_bit; 2058 charset_ID = ASCII_BYTE_P (code) ? charset_ascii : charset_eight_bit;
2089 break; 2059 break;
2090 2060
2091 default: 2061 default:
2092 abort (); 2062 abort ();
2093 } 2063 }
2094 CODING_DECODE_CHAR (coding, src, src_base, src_end, 2064 CODING_DECODE_CHAR (coding, src, src_base, src_end,
2095 CHARSET_FROM_ID (charset_id), code, c); 2065 CHARSET_FROM_ID (charset_ID), code, c);
2096 if (c < 0) 2066 if (c < 0)
2097 goto invalid_code; 2067 goto invalid_code;
2098 } 2068 }
2099 *nbytes = src - src_base; 2069 *nbytes = src - src_base;
2100 *nchars = consumed_chars; 2070 *nchars = consumed_chars;
2101 if (id) 2071 if (id)
2102 *id = charset_id; 2072 *id = charset_ID;
2103 return (mseq_found ? -c : c); 2073 return (mseq_found ? -c : c);
2104 2074
2105 no_more_source: 2075 no_more_source:
@@ -2365,24 +2335,25 @@ decode_coding_emacs_mule (struct coding_system *coding)
2365 /* We may produce two annotations (charset and composition) in one 2335 /* We may produce two annotations (charset and composition) in one
2366 loop and one more charset annotation at the end. */ 2336 loop and one more charset annotation at the end. */
2367 int *charbuf_end 2337 int *charbuf_end
2368 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3); 2338 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3)
2369 int consumed_chars = 0, consumed_chars_base; 2339 /* We can produce up to 2 characters in a loop. */
2340 - 1;
2341 EMACS_INT consumed_chars = 0, consumed_chars_base;
2370 int multibytep = coding->src_multibyte; 2342 int multibytep = coding->src_multibyte;
2371 Lisp_Object attrs, charset_list; 2343 EMACS_INT char_offset = coding->produced_char;
2372 int char_offset = coding->produced_char; 2344 EMACS_INT last_offset = char_offset;
2373 int last_offset = char_offset;
2374 int last_id = charset_ascii; 2345 int last_id = charset_ascii;
2375 int eol_crlf = 2346 int eol_dos =
2376 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 2347 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2377 int byte_after_cr = -1; 2348 int byte_after_cr = -1;
2378 struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status; 2349 struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
2379 2350
2380 CODING_GET_INFO (coding, attrs, charset_list);
2381
2382 if (cmp_status->state != COMPOSING_NO) 2351 if (cmp_status->state != COMPOSING_NO)
2383 { 2352 {
2384 int i; 2353 int i;
2385 2354
2355 if (charbuf_end - charbuf < cmp_status->length)
2356 abort ();
2386 for (i = 0; i < cmp_status->length; i++) 2357 for (i = 0; i < cmp_status->length; i++)
2387 *charbuf++ = cmp_status->carryover[i]; 2358 *charbuf++ = cmp_status->carryover[i];
2388 coding->annotated = 1; 2359 coding->annotated = 1;
@@ -2390,7 +2361,7 @@ decode_coding_emacs_mule (struct coding_system *coding)
2390 2361
2391 while (1) 2362 while (1)
2392 { 2363 {
2393 int c, id; 2364 int c, id IF_LINT (= 0);
2394 2365
2395 src_base = src; 2366 src_base = src;
2396 consumed_chars_base = consumed_chars; 2367 consumed_chars_base = consumed_chars;
@@ -2422,7 +2393,7 @@ decode_coding_emacs_mule (struct coding_system *coding)
2422 2393
2423 if (c < 0x80) 2394 if (c < 0x80)
2424 { 2395 {
2425 if (eol_crlf && c == '\r') 2396 if (eol_dos && c == '\r')
2426 ONE_MORE_BYTE (byte_after_cr); 2397 ONE_MORE_BYTE (byte_after_cr);
2427 id = charset_ascii; 2398 id = charset_ascii;
2428 if (cmp_status->state != COMPOSING_NO) 2399 if (cmp_status->state != COMPOSING_NO)
@@ -2435,7 +2406,7 @@ decode_coding_emacs_mule (struct coding_system *coding)
2435 } 2406 }
2436 else 2407 else
2437 { 2408 {
2438 int nchars, nbytes; 2409 int nchars IF_LINT (= 0), nbytes IF_LINT (= 0);
2439 /* emacs_mule_char can load a charset map from a file, which 2410 /* emacs_mule_char can load a charset map from a file, which
2440 allocates a large structure and might cause buffer text 2411 allocates a large structure and might cause buffer text
2441 to be relocated as result. Thus, we need to remember the 2412 to be relocated as result. Thus, we need to remember the
@@ -2623,7 +2594,7 @@ encode_coding_emacs_mule (struct coding_system *coding)
2623 unsigned char *dst = coding->destination + coding->produced; 2594 unsigned char *dst = coding->destination + coding->produced;
2624 unsigned char *dst_end = coding->destination + coding->dst_bytes; 2595 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2625 int safe_room = 8; 2596 int safe_room = 8;
2626 int produced_chars = 0; 2597 EMACS_INT produced_chars = 0;
2627 Lisp_Object attrs, charset_list; 2598 Lisp_Object attrs, charset_list;
2628 int c; 2599 int c;
2629 int preferred_charset_id = -1; 2600 int preferred_charset_id = -1;
@@ -2897,16 +2868,12 @@ encode_coding_emacs_mule (struct coding_system *coding)
2897 COMPOSITION_WITH_RULE_ALTCHARS: 2868 COMPOSITION_WITH_RULE_ALTCHARS:
2898 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */ 2869 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2899 2870
2900enum iso_code_class_type iso_code_class[256]; 2871static enum iso_code_class_type iso_code_class[256];
2901 2872
2902#define SAFE_CHARSET_P(coding, id) \ 2873#define SAFE_CHARSET_P(coding, id) \
2903 ((id) <= (coding)->max_charset_id \ 2874 ((id) <= (coding)->max_charset_id \
2904 && (coding)->safe_charsets[id] != 255) 2875 && (coding)->safe_charsets[id] != 255)
2905 2876
2906
2907#define SHIFT_OUT_OK(category) \
2908 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2909
2910static void 2877static void
2911setup_iso_safe_charsets (Lisp_Object attrs) 2878setup_iso_safe_charsets (Lisp_Object attrs)
2912{ 2879{
@@ -2985,7 +2952,7 @@ detect_coding_iso_2022 (struct coding_system *coding,
2985 int single_shifting = 0; 2952 int single_shifting = 0;
2986 int id; 2953 int id;
2987 int c, c1; 2954 int c, c1;
2988 int consumed_chars = 0; 2955 EMACS_INT consumed_chars = 0;
2989 int i; 2956 int i;
2990 int rejected = 0; 2957 int rejected = 0;
2991 int found = 0; 2958 int found = 0;
@@ -3023,40 +2990,11 @@ detect_coding_iso_2022 (struct coding_system *coding,
3023 break; 2990 break;
3024 single_shifting = 0; 2991 single_shifting = 0;
3025 ONE_MORE_BYTE (c); 2992 ONE_MORE_BYTE (c);
3026 if (c >= '(' && c <= '/') 2993 if (c == 'N' || c == 'O')
3027 {
3028 /* Designation sequence for a charset of dimension 1. */
3029 ONE_MORE_BYTE (c1);
3030 if (c1 < ' ' || c1 >= 0x80
3031 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
3032 /* Invalid designation sequence. Just ignore. */
3033 break;
3034 }
3035 else if (c == '$')
3036 {
3037 /* Designation sequence for a charset of dimension 2. */
3038 ONE_MORE_BYTE (c);
3039 if (c >= '@' && c <= 'B')
3040 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
3041 id = iso_charset_table[1][0][c];
3042 else if (c >= '(' && c <= '/')
3043 {
3044 ONE_MORE_BYTE (c1);
3045 if (c1 < ' ' || c1 >= 0x80
3046 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
3047 /* Invalid designation sequence. Just ignore. */
3048 break;
3049 }
3050 else
3051 /* Invalid designation sequence. Just ignore it. */
3052 break;
3053 }
3054 else if (c == 'N' || c == 'O')
3055 { 2994 {
3056 /* ESC <Fe> for SS2 or SS3. */ 2995 /* ESC <Fe> for SS2 or SS3. */
3057 single_shifting = 1; 2996 single_shifting = 1;
3058 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT; 2997 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3059 break;
3060 } 2998 }
3061 else if (c == '1') 2999 else if (c == '1')
3062 { 3000 {
@@ -3072,36 +3010,66 @@ detect_coding_iso_2022 (struct coding_system *coding,
3072 { 3010 {
3073 /* ESC <Fp> for start/end composition. */ 3011 /* ESC <Fp> for start/end composition. */
3074 composition_count = 0; 3012 composition_count = 0;
3075 break;
3076 } 3013 }
3077 else 3014 else
3078 { 3015 {
3079 /* Invalid escape sequence. Just ignore it. */ 3016 if (c >= '(' && c <= '/')
3080 break; 3017 {
3081 } 3018 /* Designation sequence for a charset of dimension 1. */
3019 ONE_MORE_BYTE (c1);
3020 if (c1 < ' ' || c1 >= 0x80
3021 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
3022 /* Invalid designation sequence. Just ignore. */
3023 break;
3024 }
3025 else if (c == '$')
3026 {
3027 /* Designation sequence for a charset of dimension 2. */
3028 ONE_MORE_BYTE (c);
3029 if (c >= '@' && c <= 'B')
3030 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
3031 id = iso_charset_table[1][0][c];
3032 else if (c >= '(' && c <= '/')
3033 {
3034 ONE_MORE_BYTE (c1);
3035 if (c1 < ' ' || c1 >= 0x80
3036 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
3037 /* Invalid designation sequence. Just ignore. */
3038 break;
3039 }
3040 else
3041 /* Invalid designation sequence. Just ignore it. */
3042 break;
3043 }
3044 else
3045 {
3046 /* Invalid escape sequence. Just ignore it. */
3047 break;
3048 }
3082 3049
3083 /* We found a valid designation sequence for CHARSET. */ 3050 /* We found a valid designation sequence for CHARSET. */
3084 rejected |= CATEGORY_MASK_ISO_8BIT; 3051 rejected |= CATEGORY_MASK_ISO_8BIT;
3085 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7], 3052 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
3086 id)) 3053 id))
3087 found |= CATEGORY_MASK_ISO_7; 3054 found |= CATEGORY_MASK_ISO_7;
3088 else 3055 else
3089 rejected |= CATEGORY_MASK_ISO_7; 3056 rejected |= CATEGORY_MASK_ISO_7;
3090 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight], 3057 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
3091 id)) 3058 id))
3092 found |= CATEGORY_MASK_ISO_7_TIGHT; 3059 found |= CATEGORY_MASK_ISO_7_TIGHT;
3093 else 3060 else
3094 rejected |= CATEGORY_MASK_ISO_7_TIGHT; 3061 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
3095 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else], 3062 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
3096 id)) 3063 id))
3097 found |= CATEGORY_MASK_ISO_7_ELSE; 3064 found |= CATEGORY_MASK_ISO_7_ELSE;
3098 else 3065 else
3099 rejected |= CATEGORY_MASK_ISO_7_ELSE; 3066 rejected |= CATEGORY_MASK_ISO_7_ELSE;
3100 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else], 3067 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
3101 id)) 3068 id))
3102 found |= CATEGORY_MASK_ISO_8_ELSE; 3069 found |= CATEGORY_MASK_ISO_8_ELSE;
3103 else 3070 else
3104 rejected |= CATEGORY_MASK_ISO_8_ELSE; 3071 rejected |= CATEGORY_MASK_ISO_8_ELSE;
3072 }
3105 break; 3073 break;
3106 3074
3107 case ISO_CODE_SO: 3075 case ISO_CODE_SO:
@@ -3129,13 +3097,32 @@ detect_coding_iso_2022 (struct coding_system *coding,
3129 rejected |= CATEGORY_MASK_ISO_7BIT; 3097 rejected |= CATEGORY_MASK_ISO_7BIT;
3130 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1]) 3098 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3131 & CODING_ISO_FLAG_SINGLE_SHIFT) 3099 & CODING_ISO_FLAG_SINGLE_SHIFT)
3132 found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1; 3100 {
3101 found |= CATEGORY_MASK_ISO_8_1;
3102 single_shifting = 1;
3103 }
3133 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2]) 3104 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
3134 & CODING_ISO_FLAG_SINGLE_SHIFT) 3105 & CODING_ISO_FLAG_SINGLE_SHIFT)
3135 found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1; 3106 {
3107 found |= CATEGORY_MASK_ISO_8_2;
3108 single_shifting = 1;
3109 }
3136 if (single_shifting) 3110 if (single_shifting)
3137 break; 3111 break;
3138 goto check_extra_latin; 3112 check_extra_latin:
3113 if (! VECTORP (Vlatin_extra_code_table)
3114 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3115 {
3116 rejected = CATEGORY_MASK_ISO;
3117 break;
3118 }
3119 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3120 & CODING_ISO_FLAG_LATIN_EXTRA)
3121 found |= CATEGORY_MASK_ISO_8_1;
3122 else
3123 rejected |= CATEGORY_MASK_ISO_8_1;
3124 rejected |= CATEGORY_MASK_ISO_8_2;
3125 break;
3139 3126
3140 default: 3127 default:
3141 if (c < 0) 3128 if (c < 0)
@@ -3158,7 +3145,7 @@ detect_coding_iso_2022 (struct coding_system *coding,
3158 if (! single_shifting 3145 if (! single_shifting
3159 && ! (rejected & CATEGORY_MASK_ISO_8_2)) 3146 && ! (rejected & CATEGORY_MASK_ISO_8_2))
3160 { 3147 {
3161 int i = 1; 3148 int len = 1;
3162 while (src < src_end) 3149 while (src < src_end)
3163 { 3150 {
3164 src_base = src; 3151 src_base = src;
@@ -3168,38 +3155,24 @@ detect_coding_iso_2022 (struct coding_system *coding,
3168 src = src_base; 3155 src = src_base;
3169 break; 3156 break;
3170 } 3157 }
3171 i++; 3158 len++;
3172 } 3159 }
3173 3160
3174 if (i & 1 && src < src_end) 3161 if (len & 1 && src < src_end)
3175 { 3162 {
3176 rejected |= CATEGORY_MASK_ISO_8_2; 3163 rejected |= CATEGORY_MASK_ISO_8_2;
3177 if (composition_count >= 0) 3164 if (composition_count >= 0)
3178 composition_count += i; 3165 composition_count += len;
3179 } 3166 }
3180 else 3167 else
3181 { 3168 {
3182 found |= CATEGORY_MASK_ISO_8_2; 3169 found |= CATEGORY_MASK_ISO_8_2;
3183 if (composition_count >= 0) 3170 if (composition_count >= 0)
3184 composition_count += i / 2; 3171 composition_count += len / 2;
3185 } 3172 }
3186 } 3173 }
3187 break; 3174 break;
3188 } 3175 }
3189 check_extra_latin:
3190 single_shifting = 0;
3191 if (! VECTORP (Vlatin_extra_code_table)
3192 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3193 {
3194 rejected = CATEGORY_MASK_ISO;
3195 break;
3196 }
3197 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3198 & CODING_ISO_FLAG_LATIN_EXTRA)
3199 found |= CATEGORY_MASK_ISO_8_1;
3200 else
3201 rejected |= CATEGORY_MASK_ISO_8_1;
3202 rejected |= CATEGORY_MASK_ISO_8_2;
3203 } 3176 }
3204 } 3177 }
3205 detect_info->rejected |= CATEGORY_MASK_ISO; 3178 detect_info->rejected |= CATEGORY_MASK_ISO;
@@ -3289,15 +3262,14 @@ detect_coding_iso_2022 (struct coding_system *coding,
3289*/ 3262*/
3290 3263
3291/* Decode a composition rule C1 and maybe one more byte from the 3264/* Decode a composition rule C1 and maybe one more byte from the
3292 source, and set RULE to the encoded composition rule, NBYTES to the 3265 source, and set RULE to the encoded composition rule. If the rule
3293 length of the composition rule. If the rule is invalid, set RULE 3266 is invalid, goto invalid_code. */
3294 to some negative value. */
3295 3267
3296#define DECODE_COMPOSITION_RULE(rule, nbytes) \ 3268#define DECODE_COMPOSITION_RULE(rule) \
3297 do { \ 3269 do { \
3298 rule = c1 - 32; \ 3270 rule = c1 - 32; \
3299 if (rule < 0) \ 3271 if (rule < 0) \
3300 break; \ 3272 goto invalid_code; \
3301 if (rule < 81) /* old format (before ver.21) */ \ 3273 if (rule < 81) /* old format (before ver.21) */ \
3302 { \ 3274 { \
3303 int gref = (rule) / 9; \ 3275 int gref = (rule) / 9; \
@@ -3305,17 +3277,16 @@ detect_coding_iso_2022 (struct coding_system *coding,
3305 if (gref == 4) gref = 10; \ 3277 if (gref == 4) gref = 10; \
3306 if (nref == 4) nref = 10; \ 3278 if (nref == 4) nref = 10; \
3307 rule = COMPOSITION_ENCODE_RULE (gref, nref); \ 3279 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
3308 nbytes = 1; \
3309 } \ 3280 } \
3310 else /* new format (after ver.21) */ \ 3281 else /* new format (after ver.21) */ \
3311 { \ 3282 { \
3312 int c; \ 3283 int b; \
3313 \ 3284 \
3314 ONE_MORE_BYTE (c); \ 3285 ONE_MORE_BYTE (b); \
3315 rule = COMPOSITION_ENCODE_RULE (rule - 81, c - 32); \ 3286 if (! COMPOSITION_ENCODE_RULE_VALID (rule - 81, b - 32)) \
3316 if (rule >= 0) \ 3287 goto invalid_code; \
3317 rule += 0x100; /* to destinguish it from the old format */ \ 3288 rule = COMPOSITION_ENCODE_RULE (rule - 81, b - 32); \
3318 nbytes = 2; \ 3289 rule += 0x100; /* Distinguish it from the old format. */ \
3319 } \ 3290 } \
3320 } while (0) 3291 } while (0)
3321 3292
@@ -3491,7 +3462,7 @@ decode_coding_iso_2022 (struct coding_system *coding)
3491 loop and one more charset annotation at the end. */ 3462 loop and one more charset annotation at the end. */
3492 int *charbuf_end 3463 int *charbuf_end
3493 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3); 3464 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
3494 int consumed_chars = 0, consumed_chars_base; 3465 EMACS_INT consumed_chars = 0, consumed_chars_base;
3495 int multibytep = coding->src_multibyte; 3466 int multibytep = coding->src_multibyte;
3496 /* Charsets invoked to graphic plane 0 and 1 respectively. */ 3467 /* Charsets invoked to graphic plane 0 and 1 respectively. */
3497 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0); 3468 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
@@ -3500,23 +3471,22 @@ decode_coding_iso_2022 (struct coding_system *coding)
3500 struct charset *charset; 3471 struct charset *charset;
3501 int c; 3472 int c;
3502 struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding); 3473 struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
3503 Lisp_Object attrs, charset_list; 3474 Lisp_Object attrs = CODING_ID_ATTRS (coding->id);
3504 int char_offset = coding->produced_char; 3475 EMACS_INT char_offset = coding->produced_char;
3505 int last_offset = char_offset; 3476 EMACS_INT last_offset = char_offset;
3506 int last_id = charset_ascii; 3477 int last_id = charset_ascii;
3507 int eol_crlf = 3478 int eol_dos =
3508 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 3479 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3509 int byte_after_cr = -1; 3480 int byte_after_cr = -1;
3510 int i; 3481 int i;
3511 3482
3512 CODING_GET_INFO (coding, attrs, charset_list);
3513 setup_iso_safe_charsets (attrs); 3483 setup_iso_safe_charsets (attrs);
3514 /* Charset list may have been changed. */
3515 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3516 coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs)); 3484 coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
3517 3485
3518 if (cmp_status->state != COMPOSING_NO) 3486 if (cmp_status->state != COMPOSING_NO)
3519 { 3487 {
3488 if (charbuf_end - charbuf < cmp_status->length)
3489 abort ();
3520 for (i = 0; i < cmp_status->length; i++) 3490 for (i = 0; i < cmp_status->length; i++)
3521 *charbuf++ = cmp_status->carryover[i]; 3491 *charbuf++ = cmp_status->carryover[i];
3522 coding->annotated = 1; 3492 coding->annotated = 1;
@@ -3582,11 +3552,9 @@ decode_coding_iso_2022 (struct coding_system *coding)
3582 || cmp_status->state == COMPOSING_COMPONENT_RULE) 3552 || cmp_status->state == COMPOSING_COMPONENT_RULE)
3583 && c1 != ISO_CODE_ESC) 3553 && c1 != ISO_CODE_ESC)
3584 { 3554 {
3585 int rule, nbytes; 3555 int rule;
3586 3556
3587 DECODE_COMPOSITION_RULE (rule, nbytes); 3557 DECODE_COMPOSITION_RULE (rule);
3588 if (rule < 0)
3589 goto invalid_code;
3590 STORE_COMPOSITION_RULE (rule); 3558 STORE_COMPOSITION_RULE (rule);
3591 continue; 3559 continue;
3592 } 3560 }
@@ -3624,7 +3592,7 @@ decode_coding_iso_2022 (struct coding_system *coding)
3624 break; 3592 break;
3625 3593
3626 case ISO_control_0: 3594 case ISO_control_0:
3627 if (eol_crlf && c1 == '\r') 3595 if (eol_dos && c1 == '\r')
3628 ONE_MORE_BYTE (byte_after_cr); 3596 ONE_MORE_BYTE (byte_after_cr);
3629 MAYBE_FINISH_COMPOSITION (); 3597 MAYBE_FINISH_COMPOSITION ();
3630 charset = CHARSET_FROM_ID (charset_ascii); 3598 charset = CHARSET_FROM_ID (charset_ascii);
@@ -3897,6 +3865,10 @@ decode_coding_iso_2022 (struct coding_system *coding)
3897 } 3865 }
3898 continue; 3866 continue;
3899 } 3867 }
3868 break;
3869
3870 default:
3871 abort ();
3900 } 3872 }
3901 3873
3902 if (cmp_status->state == COMPOSING_NO 3874 if (cmp_status->state == COMPOSING_NO
@@ -4029,7 +4001,6 @@ decode_coding_iso_2022 (struct coding_system *coding)
4029 const char *intermediate_char_94 = "()*+"; \ 4001 const char *intermediate_char_94 = "()*+"; \
4030 const char *intermediate_char_96 = ",-./"; \ 4002 const char *intermediate_char_96 = ",-./"; \
4031 int revision = -1; \ 4003 int revision = -1; \
4032 int c; \
4033 \ 4004 \
4034 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \ 4005 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
4035 revision = CHARSET_ISO_REVISION (charset); \ 4006 revision = CHARSET_ISO_REVISION (charset); \
@@ -4042,11 +4013,12 @@ decode_coding_iso_2022 (struct coding_system *coding)
4042 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \ 4013 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4043 if (CHARSET_DIMENSION (charset) == 1) \ 4014 if (CHARSET_DIMENSION (charset) == 1) \
4044 { \ 4015 { \
4016 int b; \
4045 if (! CHARSET_ISO_CHARS_96 (charset)) \ 4017 if (! CHARSET_ISO_CHARS_96 (charset)) \
4046 c = intermediate_char_94[reg]; \ 4018 b = intermediate_char_94[reg]; \
4047 else \ 4019 else \
4048 c = intermediate_char_96[reg]; \ 4020 b = intermediate_char_96[reg]; \
4049 EMIT_ONE_ASCII_BYTE (c); \ 4021 EMIT_ONE_ASCII_BYTE (b); \
4050 } \ 4022 } \
4051 else \ 4023 else \
4052 { \ 4024 { \
@@ -4226,13 +4198,13 @@ decode_coding_iso_2022 (struct coding_system *coding)
4226 to use CHARSET. The element `spec.iso_2022' of *CODING is updated. 4198 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4227 Return new DST. */ 4199 Return new DST. */
4228 4200
4229unsigned char * 4201static unsigned char *
4230encode_invocation_designation (struct charset *charset, 4202encode_invocation_designation (struct charset *charset,
4231 struct coding_system *coding, 4203 struct coding_system *coding,
4232 unsigned char *dst, int *p_nchars) 4204 unsigned char *dst, EMACS_INT *p_nchars)
4233{ 4205{
4234 int multibytep = coding->dst_multibyte; 4206 int multibytep = coding->dst_multibyte;
4235 int produced_chars = *p_nchars; 4207 EMACS_INT produced_chars = *p_nchars;
4236 int reg; /* graphic register number */ 4208 int reg; /* graphic register number */
4237 int id = CHARSET_ID (charset); 4209 int id = CHARSET_ID (charset);
4238 4210
@@ -4289,30 +4261,6 @@ encode_invocation_designation (struct charset *charset,
4289 return dst; 4261 return dst;
4290} 4262}
4291 4263
4292/* The following three macros produce codes for indicating direction
4293 of text. */
4294#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
4295 do { \
4296 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
4297 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
4298 else \
4299 EMIT_ONE_BYTE (ISO_CODE_CSI); \
4300 } while (0)
4301
4302
4303#define ENCODE_DIRECTION_R2L() \
4304 do { \
4305 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
4306 EMIT_TWO_ASCII_BYTES ('2', ']'); \
4307 } while (0)
4308
4309
4310#define ENCODE_DIRECTION_L2R() \
4311 do { \
4312 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
4313 EMIT_TWO_ASCII_BYTES ('0', ']'); \
4314 } while (0)
4315
4316 4264
4317/* Produce codes for designation and invocation to reset the graphic 4265/* Produce codes for designation and invocation to reset the graphic
4318 planes and registers to initial state. */ 4266 planes and registers to initial state. */
@@ -4342,13 +4290,13 @@ encode_invocation_designation (struct charset *charset,
4342 4290
4343static unsigned char * 4291static unsigned char *
4344encode_designation_at_bol (struct coding_system *coding, int *charbuf, 4292encode_designation_at_bol (struct coding_system *coding, int *charbuf,
4345 int *charbuf_end, unsigned char *dst) 4293 unsigned char *dst)
4346{ 4294{
4347 struct charset *charset; 4295 struct charset *charset;
4348 /* Table of charsets to be designated to each graphic register. */ 4296 /* Table of charsets to be designated to each graphic register. */
4349 int r[4]; 4297 int r[4];
4350 int c, found = 0, reg; 4298 int c, found = 0, reg;
4351 int produced_chars = 0; 4299 EMACS_INT produced_chars = 0;
4352 int multibytep = coding->dst_multibyte; 4300 int multibytep = coding->dst_multibyte;
4353 Lisp_Object attrs; 4301 Lisp_Object attrs;
4354 Lisp_Object charset_list; 4302 Lisp_Object charset_list;
@@ -4403,7 +4351,7 @@ encode_coding_iso_2022 (struct coding_system *coding)
4403 int bol_designation 4351 int bol_designation
4404 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL 4352 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4405 && CODING_ISO_BOL (coding)); 4353 && CODING_ISO_BOL (coding));
4406 int produced_chars = 0; 4354 EMACS_INT produced_chars = 0;
4407 Lisp_Object attrs, eol_type, charset_list; 4355 Lisp_Object attrs, eol_type, charset_list;
4408 int ascii_compatible; 4356 int ascii_compatible;
4409 int c; 4357 int c;
@@ -4433,7 +4381,7 @@ encode_coding_iso_2022 (struct coding_system *coding)
4433 unsigned char *dst_prev = dst; 4381 unsigned char *dst_prev = dst;
4434 4382
4435 /* We have to produce designation sequences if any now. */ 4383 /* We have to produce designation sequences if any now. */
4436 dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst); 4384 dst = encode_designation_at_bol (coding, charbuf, dst);
4437 bol_designation = 0; 4385 bol_designation = 0;
4438 /* We are sure that designation sequences are all ASCII bytes. */ 4386 /* We are sure that designation sequences are all ASCII bytes. */
4439 produced_chars += dst - dst_prev; 4387 produced_chars += dst - dst_prev;
@@ -4591,7 +4539,7 @@ detect_coding_sjis (struct coding_system *coding,
4591 const unsigned char *src = coding->source, *src_base; 4539 const unsigned char *src = coding->source, *src_base;
4592 const unsigned char *src_end = coding->source + coding->src_bytes; 4540 const unsigned char *src_end = coding->source + coding->src_bytes;
4593 int multibytep = coding->src_multibyte; 4541 int multibytep = coding->src_multibyte;
4594 int consumed_chars = 0; 4542 EMACS_INT consumed_chars = 0;
4595 int found = 0; 4543 int found = 0;
4596 int c; 4544 int c;
4597 Lisp_Object attrs, charset_list; 4545 Lisp_Object attrs, charset_list;
@@ -4648,7 +4596,7 @@ detect_coding_big5 (struct coding_system *coding,
4648 const unsigned char *src = coding->source, *src_base; 4596 const unsigned char *src = coding->source, *src_base;
4649 const unsigned char *src_end = coding->source + coding->src_bytes; 4597 const unsigned char *src_end = coding->source + coding->src_bytes;
4650 int multibytep = coding->src_multibyte; 4598 int multibytep = coding->src_multibyte;
4651 int consumed_chars = 0; 4599 EMACS_INT consumed_chars = 0;
4652 int found = 0; 4600 int found = 0;
4653 int c; 4601 int c;
4654 4602
@@ -4699,15 +4647,15 @@ decode_coding_sjis (struct coding_system *coding)
4699 the end. */ 4647 the end. */
4700 int *charbuf_end 4648 int *charbuf_end
4701 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); 4649 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4702 int consumed_chars = 0, consumed_chars_base; 4650 EMACS_INT consumed_chars = 0, consumed_chars_base;
4703 int multibytep = coding->src_multibyte; 4651 int multibytep = coding->src_multibyte;
4704 struct charset *charset_roman, *charset_kanji, *charset_kana; 4652 struct charset *charset_roman, *charset_kanji, *charset_kana;
4705 struct charset *charset_kanji2; 4653 struct charset *charset_kanji2;
4706 Lisp_Object attrs, charset_list, val; 4654 Lisp_Object attrs, charset_list, val;
4707 int char_offset = coding->produced_char; 4655 EMACS_INT char_offset = coding->produced_char;
4708 int last_offset = char_offset; 4656 EMACS_INT last_offset = char_offset;
4709 int last_id = charset_ascii; 4657 int last_id = charset_ascii;
4710 int eol_crlf = 4658 int eol_dos =
4711 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 4659 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4712 int byte_after_cr = -1; 4660 int byte_after_cr = -1;
4713 4661
@@ -4742,7 +4690,7 @@ decode_coding_sjis (struct coding_system *coding)
4742 goto invalid_code; 4690 goto invalid_code;
4743 if (c < 0x80) 4691 if (c < 0x80)
4744 { 4692 {
4745 if (eol_crlf && c == '\r') 4693 if (eol_dos && c == '\r')
4746 ONE_MORE_BYTE (byte_after_cr); 4694 ONE_MORE_BYTE (byte_after_cr);
4747 charset = charset_roman; 4695 charset = charset_roman;
4748 } 4696 }
@@ -4817,14 +4765,14 @@ decode_coding_big5 (struct coding_system *coding)
4817 the end. */ 4765 the end. */
4818 int *charbuf_end 4766 int *charbuf_end
4819 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); 4767 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4820 int consumed_chars = 0, consumed_chars_base; 4768 EMACS_INT consumed_chars = 0, consumed_chars_base;
4821 int multibytep = coding->src_multibyte; 4769 int multibytep = coding->src_multibyte;
4822 struct charset *charset_roman, *charset_big5; 4770 struct charset *charset_roman, *charset_big5;
4823 Lisp_Object attrs, charset_list, val; 4771 Lisp_Object attrs, charset_list, val;
4824 int char_offset = coding->produced_char; 4772 EMACS_INT char_offset = coding->produced_char;
4825 int last_offset = char_offset; 4773 EMACS_INT last_offset = char_offset;
4826 int last_id = charset_ascii; 4774 int last_id = charset_ascii;
4827 int eol_crlf = 4775 int eol_dos =
4828 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 4776 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4829 int byte_after_cr = -1; 4777 int byte_after_cr = -1;
4830 4778
@@ -4857,7 +4805,7 @@ decode_coding_big5 (struct coding_system *coding)
4857 goto invalid_code; 4805 goto invalid_code;
4858 if (c < 0x80) 4806 if (c < 0x80)
4859 { 4807 {
4860 if (eol_crlf && c == '\r') 4808 if (eol_dos && c == '\r')
4861 ONE_MORE_BYTE (byte_after_cr); 4809 ONE_MORE_BYTE (byte_after_cr);
4862 charset = charset_roman; 4810 charset = charset_roman;
4863 } 4811 }
@@ -4919,16 +4867,15 @@ encode_coding_sjis (struct coding_system *coding)
4919 unsigned char *dst = coding->destination + coding->produced; 4867 unsigned char *dst = coding->destination + coding->produced;
4920 unsigned char *dst_end = coding->destination + coding->dst_bytes; 4868 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4921 int safe_room = 4; 4869 int safe_room = 4;
4922 int produced_chars = 0; 4870 EMACS_INT produced_chars = 0;
4923 Lisp_Object attrs, charset_list, val; 4871 Lisp_Object attrs, charset_list, val;
4924 int ascii_compatible; 4872 int ascii_compatible;
4925 struct charset *charset_roman, *charset_kanji, *charset_kana; 4873 struct charset *charset_kanji, *charset_kana;
4926 struct charset *charset_kanji2; 4874 struct charset *charset_kanji2;
4927 int c; 4875 int c;
4928 4876
4929 CODING_GET_INFO (coding, attrs, charset_list); 4877 CODING_GET_INFO (coding, attrs, charset_list);
4930 val = charset_list; 4878 val = XCDR (charset_list);
4931 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4932 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); 4879 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4933 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); 4880 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4934 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val))); 4881 charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
@@ -5011,15 +4958,14 @@ encode_coding_big5 (struct coding_system *coding)
5011 unsigned char *dst = coding->destination + coding->produced; 4958 unsigned char *dst = coding->destination + coding->produced;
5012 unsigned char *dst_end = coding->destination + coding->dst_bytes; 4959 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5013 int safe_room = 4; 4960 int safe_room = 4;
5014 int produced_chars = 0; 4961 EMACS_INT produced_chars = 0;
5015 Lisp_Object attrs, charset_list, val; 4962 Lisp_Object attrs, charset_list, val;
5016 int ascii_compatible; 4963 int ascii_compatible;
5017 struct charset *charset_roman, *charset_big5; 4964 struct charset *charset_big5;
5018 int c; 4965 int c;
5019 4966
5020 CODING_GET_INFO (coding, attrs, charset_list); 4967 CODING_GET_INFO (coding, attrs, charset_list);
5021 val = charset_list; 4968 val = XCDR (charset_list);
5022 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5023 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val))); 4969 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
5024 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)); 4970 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5025 4971
@@ -5087,10 +5033,10 @@ detect_coding_ccl (struct coding_system *coding,
5087 const unsigned char *src = coding->source, *src_base; 5033 const unsigned char *src = coding->source, *src_base;
5088 const unsigned char *src_end = coding->source + coding->src_bytes; 5034 const unsigned char *src_end = coding->source + coding->src_bytes;
5089 int multibytep = coding->src_multibyte; 5035 int multibytep = coding->src_multibyte;
5090 int consumed_chars = 0; 5036 EMACS_INT consumed_chars = 0;
5091 int found = 0; 5037 int found = 0;
5092 unsigned char *valids; 5038 unsigned char *valids;
5093 int head_ascii = coding->head_ascii; 5039 EMACS_INT head_ascii = coding->head_ascii;
5094 Lisp_Object attrs; 5040 Lisp_Object attrs;
5095 5041
5096 detect_info->checked |= CATEGORY_MASK_CCL; 5042 detect_info->checked |= CATEGORY_MASK_CCL;
@@ -5127,7 +5073,7 @@ decode_coding_ccl (struct coding_system *coding)
5127 const unsigned char *src_end = coding->source + coding->src_bytes; 5073 const unsigned char *src_end = coding->source + coding->src_bytes;
5128 int *charbuf = coding->charbuf + coding->charbuf_used; 5074 int *charbuf = coding->charbuf + coding->charbuf_used;
5129 int *charbuf_end = coding->charbuf + coding->charbuf_size; 5075 int *charbuf_end = coding->charbuf + coding->charbuf_size;
5130 int consumed_chars = 0; 5076 EMACS_INT consumed_chars = 0;
5131 int multibytep = coding->src_multibyte; 5077 int multibytep = coding->src_multibyte;
5132 struct ccl_program *ccl = &coding->spec.ccl->ccl; 5078 struct ccl_program *ccl = &coding->spec.ccl->ccl;
5133 int source_charbuf[1024]; 5079 int source_charbuf[1024];
@@ -5199,7 +5145,8 @@ encode_coding_ccl (struct coding_system *coding)
5199 unsigned char *dst = coding->destination + coding->produced; 5145 unsigned char *dst = coding->destination + coding->produced;
5200 unsigned char *dst_end = coding->destination + coding->dst_bytes; 5146 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5201 int destination_charbuf[1024]; 5147 int destination_charbuf[1024];
5202 int i, produced_chars = 0; 5148 EMACS_INT produced_chars = 0;
5149 int i;
5203 Lisp_Object attrs, charset_list; 5150 Lisp_Object attrs, charset_list;
5204 5151
5205 CODING_GET_INFO (coding, attrs, charset_list); 5152 CODING_GET_INFO (coding, attrs, charset_list);
@@ -5261,13 +5208,13 @@ encode_coding_ccl (struct coding_system *coding)
5261static void 5208static void
5262decode_coding_raw_text (struct coding_system *coding) 5209decode_coding_raw_text (struct coding_system *coding)
5263{ 5210{
5264 int eol_crlf = 5211 int eol_dos =
5265 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 5212 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5266 5213
5267 coding->chars_at_source = 1; 5214 coding->chars_at_source = 1;
5268 coding->consumed_char = coding->src_chars; 5215 coding->consumed_char = coding->src_chars;
5269 coding->consumed = coding->src_bytes; 5216 coding->consumed = coding->src_bytes;
5270 if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r') 5217 if (eol_dos && coding->source[coding->src_bytes - 1] == '\r')
5271 { 5218 {
5272 coding->consumed_char--; 5219 coding->consumed_char--;
5273 coding->consumed--; 5220 coding->consumed--;
@@ -5285,7 +5232,7 @@ encode_coding_raw_text (struct coding_system *coding)
5285 int *charbuf_end = coding->charbuf + coding->charbuf_used; 5232 int *charbuf_end = coding->charbuf + coding->charbuf_used;
5286 unsigned char *dst = coding->destination + coding->produced; 5233 unsigned char *dst = coding->destination + coding->produced;
5287 unsigned char *dst_end = coding->destination + coding->dst_bytes; 5234 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5288 int produced_chars = 0; 5235 EMACS_INT produced_chars = 0;
5289 int c; 5236 int c;
5290 5237
5291 if (multibytep) 5238 if (multibytep)
@@ -5309,11 +5256,12 @@ encode_coding_raw_text (struct coding_system *coding)
5309 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str; 5256 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
5310 5257
5311 CHAR_STRING_ADVANCE (c, p1); 5258 CHAR_STRING_ADVANCE (c, p1);
5312 while (p0 < p1) 5259 do
5313 { 5260 {
5314 EMIT_ONE_BYTE (*p0); 5261 EMIT_ONE_BYTE (*p0);
5315 p0++; 5262 p0++;
5316 } 5263 }
5264 while (p0 < p1);
5317 } 5265 }
5318 } 5266 }
5319 else 5267 else
@@ -5367,10 +5315,10 @@ detect_coding_charset (struct coding_system *coding,
5367 const unsigned char *src = coding->source, *src_base; 5315 const unsigned char *src = coding->source, *src_base;
5368 const unsigned char *src_end = coding->source + coding->src_bytes; 5316 const unsigned char *src_end = coding->source + coding->src_bytes;
5369 int multibytep = coding->src_multibyte; 5317 int multibytep = coding->src_multibyte;
5370 int consumed_chars = 0; 5318 EMACS_INT consumed_chars = 0;
5371 Lisp_Object attrs, valids, name; 5319 Lisp_Object attrs, valids, name;
5372 int found = 0; 5320 int found = 0;
5373 int head_ascii = coding->head_ascii; 5321 EMACS_INT head_ascii = coding->head_ascii;
5374 int check_latin_extra = 0; 5322 int check_latin_extra = 0;
5375 5323
5376 detect_info->checked |= CATEGORY_MASK_CHARSET; 5324 detect_info->checked |= CATEGORY_MASK_CHARSET;
@@ -5420,8 +5368,8 @@ detect_coding_charset (struct coding_system *coding,
5420 if (src == src_end) 5368 if (src == src_end)
5421 goto too_short; 5369 goto too_short;
5422 ONE_MORE_BYTE (c); 5370 ONE_MORE_BYTE (c);
5423 if (c < charset->code_space[(dim - 1 - idx) * 2] 5371 if (c < charset->code_space[(dim - 1 - idx) * 4]
5424 || c > charset->code_space[(dim - 1 - idx) * 2 + 1]) 5372 || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5425 break; 5373 break;
5426 } 5374 }
5427 if (idx < dim) 5375 if (idx < dim)
@@ -5474,17 +5422,17 @@ decode_coding_charset (struct coding_system *coding)
5474 the end. */ 5422 the end. */
5475 int *charbuf_end 5423 int *charbuf_end
5476 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); 5424 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
5477 int consumed_chars = 0, consumed_chars_base; 5425 EMACS_INT consumed_chars = 0, consumed_chars_base;
5478 int multibytep = coding->src_multibyte; 5426 int multibytep = coding->src_multibyte;
5479 Lisp_Object attrs, charset_list, valids; 5427 Lisp_Object attrs = CODING_ID_ATTRS (coding->id);
5480 int char_offset = coding->produced_char; 5428 Lisp_Object valids;
5481 int last_offset = char_offset; 5429 EMACS_INT char_offset = coding->produced_char;
5430 EMACS_INT last_offset = char_offset;
5482 int last_id = charset_ascii; 5431 int last_id = charset_ascii;
5483 int eol_crlf = 5432 int eol_dos =
5484 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 5433 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5485 int byte_after_cr = -1; 5434 int byte_after_cr = -1;
5486 5435
5487 CODING_GET_INFO (coding, attrs, charset_list);
5488 valids = AREF (attrs, coding_attr_charset_valids); 5436 valids = AREF (attrs, coding_attr_charset_valids);
5489 5437
5490 while (1) 5438 while (1)
@@ -5514,7 +5462,7 @@ decode_coding_charset (struct coding_system *coding)
5514 else 5462 else
5515 { 5463 {
5516 ONE_MORE_BYTE (c); 5464 ONE_MORE_BYTE (c);
5517 if (eol_crlf && c == '\r') 5465 if (eol_dos && c == '\r')
5518 ONE_MORE_BYTE (byte_after_cr); 5466 ONE_MORE_BYTE (byte_after_cr);
5519 } 5467 }
5520 if (c < 0) 5468 if (c < 0)
@@ -5600,7 +5548,7 @@ encode_coding_charset (struct coding_system *coding)
5600 unsigned char *dst = coding->destination + coding->produced; 5548 unsigned char *dst = coding->destination + coding->produced;
5601 unsigned char *dst_end = coding->destination + coding->dst_bytes; 5549 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5602 int safe_room = MAX_MULTIBYTE_LENGTH; 5550 int safe_room = MAX_MULTIBYTE_LENGTH;
5603 int produced_chars = 0; 5551 EMACS_INT produced_chars = 0;
5604 Lisp_Object attrs, charset_list; 5552 Lisp_Object attrs, charset_list;
5605 int ascii_compatible; 5553 int ascii_compatible;
5606 int c; 5554 int c;
@@ -6272,7 +6220,7 @@ adjust_coding_eol_type (struct coding_system *coding, int eol_seen)
6272 system is detected, update fields of CODING by the detected coding 6220 system is detected, update fields of CODING by the detected coding
6273 system. */ 6221 system. */
6274 6222
6275void 6223static void
6276detect_coding (struct coding_system *coding) 6224detect_coding (struct coding_system *coding)
6277{ 6225{
6278 const unsigned char *src, *src_end; 6226 const unsigned char *src, *src_end;
@@ -6508,7 +6456,7 @@ decode_eol (struct coding_system *coding)
6508 } 6456 }
6509 else if (EQ (eol_type, Qdos)) 6457 else if (EQ (eol_type, Qdos))
6510 { 6458 {
6511 int n = 0; 6459 EMACS_INT n = 0;
6512 6460
6513 if (NILP (coding->dst_object)) 6461 if (NILP (coding->dst_object))
6514 { 6462 {
@@ -6523,9 +6471,9 @@ decode_eol (struct coding_system *coding)
6523 } 6471 }
6524 else 6472 else
6525 { 6473 {
6526 int pos_byte = coding->dst_pos_byte; 6474 EMACS_INT pos_byte = coding->dst_pos_byte;
6527 int pos = coding->dst_pos; 6475 EMACS_INT pos = coding->dst_pos;
6528 int pos_end = pos + coding->produced_char - 1; 6476 EMACS_INT pos_end = pos + coding->produced_char - 1;
6529 6477
6530 while (pos < pos_end) 6478 while (pos < pos_end)
6531 { 6479 {
@@ -6607,15 +6555,15 @@ get_translation_table (Lisp_Object attrs, int encodep, int *max_lookup)
6607 } 6555 }
6608 else if (CONSP (translation_table)) 6556 else if (CONSP (translation_table))
6609 { 6557 {
6610 Lisp_Object tail, val; 6558 Lisp_Object tail;
6611 6559
6612 for (tail = translation_table; CONSP (tail); tail = XCDR (tail)) 6560 for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6613 if (CHAR_TABLE_P (XCAR (tail)) 6561 if (CHAR_TABLE_P (XCAR (tail))
6614 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1) 6562 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6615 { 6563 {
6616 val = XCHAR_TABLE (XCAR (tail))->extras[1]; 6564 Lisp_Object tailval = XCHAR_TABLE (XCAR (tail))->extras[1];
6617 if (NATNUMP (val) && *max_lookup < XFASTINT (val)) 6565 if (NATNUMP (tailval) && *max_lookup < XFASTINT (tailval))
6618 *max_lookup = XFASTINT (val); 6566 *max_lookup = XFASTINT (tailval);
6619 } 6567 }
6620 } 6568 }
6621 } 6569 }
@@ -6710,7 +6658,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table,
6710 6658
6711 if (c >= 0) 6659 if (c >= 0)
6712 { 6660 {
6713 int from_nchars = 1, to_nchars = 1; 6661 EMACS_INT from_nchars = 1, to_nchars = 1;
6714 Lisp_Object trans = Qnil; 6662 Lisp_Object trans = Qnil;
6715 6663
6716 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans); 6664 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
@@ -6881,7 +6829,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table,
6881 [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ] 6829 [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
6882 */ 6830 */
6883 6831
6884static INLINE void 6832static inline void
6885produce_composition (struct coding_system *coding, int *charbuf, EMACS_INT pos) 6833produce_composition (struct coding_system *coding, int *charbuf, EMACS_INT pos)
6886{ 6834{
6887 int len; 6835 int len;
@@ -6925,7 +6873,7 @@ produce_composition (struct coding_system *coding, int *charbuf, EMACS_INT pos)
6925 [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ] 6873 [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6926 */ 6874 */
6927 6875
6928static INLINE void 6876static inline void
6929produce_charset (struct coding_system *coding, int *charbuf, EMACS_INT pos) 6877produce_charset (struct coding_system *coding, int *charbuf, EMACS_INT pos)
6930{ 6878{
6931 EMACS_INT from = pos - charbuf[2]; 6879 EMACS_INT from = pos - charbuf[2];
@@ -7038,8 +6986,8 @@ decode_coding (struct coding_system *coding)
7038 set_buffer_internal (XBUFFER (coding->dst_object)); 6986 set_buffer_internal (XBUFFER (coding->dst_object));
7039 if (GPT != PT) 6987 if (GPT != PT)
7040 move_gap_both (PT, PT_BYTE); 6988 move_gap_both (PT, PT_BYTE);
7041 undo_list = current_buffer->undo_list; 6989 undo_list = BVAR (current_buffer, undo_list);
7042 current_buffer->undo_list = Qt; 6990 BVAR (current_buffer, undo_list) = Qt;
7043 } 6991 }
7044 6992
7045 coding->consumed = coding->consumed_char = 0; 6993 coding->consumed = coding->consumed_char = 0;
@@ -7136,7 +7084,7 @@ decode_coding (struct coding_system *coding)
7136 decode_eol (coding); 7084 decode_eol (coding);
7137 if (BUFFERP (coding->dst_object)) 7085 if (BUFFERP (coding->dst_object))
7138 { 7086 {
7139 current_buffer->undo_list = undo_list; 7087 BVAR (current_buffer, undo_list) = undo_list;
7140 record_insert (coding->dst_pos, coding->produced_char); 7088 record_insert (coding->dst_pos, coding->produced_char);
7141 } 7089 }
7142 return coding->result; 7090 return coding->result;
@@ -7153,7 +7101,7 @@ decode_coding (struct coding_system *coding)
7153 position of a composition after POS (if any) or to LIMIT, and 7101 position of a composition after POS (if any) or to LIMIT, and
7154 return BUF. */ 7102 return BUF. */
7155 7103
7156static INLINE int * 7104static inline int *
7157handle_composition_annotation (EMACS_INT pos, EMACS_INT limit, 7105handle_composition_annotation (EMACS_INT pos, EMACS_INT limit,
7158 struct coding_system *coding, int *buf, 7106 struct coding_system *coding, int *buf,
7159 EMACS_INT *stop) 7107 EMACS_INT *stop)
@@ -7185,7 +7133,7 @@ handle_composition_annotation (EMACS_INT pos, EMACS_INT limit,
7185 components = COMPOSITION_COMPONENTS (prop); 7133 components = COMPOSITION_COMPONENTS (prop);
7186 if (VECTORP (components)) 7134 if (VECTORP (components))
7187 { 7135 {
7188 len = XVECTOR (components)->size; 7136 len = ASIZE (components);
7189 for (i = 0; i < len; i++) 7137 for (i = 0; i < len; i++)
7190 *buf++ = XINT (AREF (components, i)); 7138 *buf++ = XINT (AREF (components, i));
7191 } 7139 }
@@ -7236,7 +7184,7 @@ handle_composition_annotation (EMACS_INT pos, EMACS_INT limit,
7236 If the property value is nil, set *STOP to the position where the 7184 If the property value is nil, set *STOP to the position where the
7237 property value is non-nil (limiting by LIMIT), and return BUF. */ 7185 property value is non-nil (limiting by LIMIT), and return BUF. */
7238 7186
7239static INLINE int * 7187static inline int *
7240handle_charset_annotation (EMACS_INT pos, EMACS_INT limit, 7188handle_charset_annotation (EMACS_INT pos, EMACS_INT limit,
7241 struct coding_system *coding, int *buf, 7189 struct coding_system *coding, int *buf,
7242 EMACS_INT *stop) 7190 EMACS_INT *stop)
@@ -7433,7 +7381,7 @@ encode_coding (struct coding_system *coding)
7433 { 7381 {
7434 set_buffer_internal (XBUFFER (coding->dst_object)); 7382 set_buffer_internal (XBUFFER (coding->dst_object));
7435 coding->dst_multibyte 7383 coding->dst_multibyte
7436 = ! NILP (current_buffer->enable_multibyte_characters); 7384 = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
7437 } 7385 }
7438 7386
7439 coding->consumed = coding->consumed_char = 0; 7387 coding->consumed = coding->consumed_char = 0;
@@ -7504,8 +7452,8 @@ make_conversion_work_buffer (int multibyte)
7504 doesn't compile new regexps. */ 7452 doesn't compile new regexps. */
7505 Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt); 7453 Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
7506 Ferase_buffer (); 7454 Ferase_buffer ();
7507 current_buffer->undo_list = Qt; 7455 BVAR (current_buffer, undo_list) = Qt;
7508 current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil; 7456 BVAR (current_buffer, enable_multibyte_characters) = multibyte ? Qt : Qnil;
7509 set_buffer_internal (current); 7457 set_buffer_internal (current);
7510 return workbuf; 7458 return workbuf;
7511} 7459}
@@ -7562,7 +7510,7 @@ decode_coding_gap (struct coding_system *coding,
7562 coding->dst_object = coding->src_object; 7510 coding->dst_object = coding->src_object;
7563 coding->dst_pos = PT; 7511 coding->dst_pos = PT;
7564 coding->dst_pos_byte = PT_BYTE; 7512 coding->dst_pos_byte = PT_BYTE;
7565 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters); 7513 coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
7566 7514
7567 if (CODING_REQUIRE_DETECTION (coding)) 7515 if (CODING_REQUIRE_DETECTION (coding))
7568 detect_coding (coding); 7516 detect_coding (coding);
@@ -7590,30 +7538,6 @@ decode_coding_gap (struct coding_system *coding,
7590 return coding->result; 7538 return coding->result;
7591} 7539}
7592 7540
7593int
7594encode_coding_gap (struct coding_system *coding,
7595 EMACS_INT chars, EMACS_INT bytes)
7596{
7597 int count = SPECPDL_INDEX ();
7598
7599 code_conversion_save (0, 0);
7600
7601 coding->src_object = Fcurrent_buffer ();
7602 coding->src_chars = chars;
7603 coding->src_bytes = bytes;
7604 coding->src_pos = -chars;
7605 coding->src_pos_byte = -bytes;
7606 coding->src_multibyte = chars < bytes;
7607 coding->dst_object = coding->src_object;
7608 coding->dst_pos = PT;
7609 coding->dst_pos_byte = PT_BYTE;
7610
7611 encode_coding (coding);
7612
7613 unbind_to (count, Qnil);
7614 return coding->result;
7615}
7616
7617 7541
7618/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in 7542/* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7619 SRC_OBJECT into DST_OBJECT by coding context CODING. 7543 SRC_OBJECT into DST_OBJECT by coding context CODING.
@@ -7652,12 +7576,12 @@ decode_coding_object (struct coding_system *coding,
7652 Lisp_Object dst_object) 7576 Lisp_Object dst_object)
7653{ 7577{
7654 int count = SPECPDL_INDEX (); 7578 int count = SPECPDL_INDEX ();
7655 unsigned char *destination; 7579 unsigned char *destination IF_LINT (= NULL);
7656 EMACS_INT dst_bytes; 7580 EMACS_INT dst_bytes IF_LINT (= 0);
7657 EMACS_INT chars = to - from; 7581 EMACS_INT chars = to - from;
7658 EMACS_INT bytes = to_byte - from_byte; 7582 EMACS_INT bytes = to_byte - from_byte;
7659 Lisp_Object attrs; 7583 Lisp_Object attrs;
7660 int saved_pt = -1, saved_pt_byte; 7584 int saved_pt = -1, saved_pt_byte IF_LINT (= 0);
7661 int need_marker_adjustment = 0; 7585 int need_marker_adjustment = 0;
7662 Lisp_Object old_deactivate_mark; 7586 Lisp_Object old_deactivate_mark;
7663 7587
@@ -7728,7 +7652,7 @@ decode_coding_object (struct coding_system *coding,
7728 coding->dst_pos = BUF_PT (XBUFFER (dst_object)); 7652 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7729 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object)); 7653 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7730 coding->dst_multibyte 7654 coding->dst_multibyte
7731 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters); 7655 = ! NILP (BVAR (XBUFFER (dst_object), enable_multibyte_characters));
7732 } 7656 }
7733 else 7657 else
7734 { 7658 {
@@ -7798,7 +7722,7 @@ decode_coding_object (struct coding_system *coding,
7798 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte); 7722 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7799 else if (saved_pt < from + chars) 7723 else if (saved_pt < from + chars)
7800 TEMP_SET_PT_BOTH (from, from_byte); 7724 TEMP_SET_PT_BOTH (from, from_byte);
7801 else if (! NILP (current_buffer->enable_multibyte_characters)) 7725 else if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
7802 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars), 7726 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7803 saved_pt_byte + (coding->produced - bytes)); 7727 saved_pt_byte + (coding->produced - bytes));
7804 else 7728 else
@@ -7822,7 +7746,7 @@ decode_coding_object (struct coding_system *coding,
7822 { 7746 {
7823 tail->bytepos = from_byte + coding->produced; 7747 tail->bytepos = from_byte + coding->produced;
7824 tail->charpos 7748 tail->charpos
7825 = (NILP (current_buffer->enable_multibyte_characters) 7749 = (NILP (BVAR (current_buffer, enable_multibyte_characters))
7826 ? tail->bytepos : from + coding->produced_char); 7750 ? tail->bytepos : from + coding->produced_char);
7827 } 7751 }
7828 } 7752 }
@@ -7845,7 +7769,7 @@ encode_coding_object (struct coding_system *coding,
7845 EMACS_INT chars = to - from; 7769 EMACS_INT chars = to - from;
7846 EMACS_INT bytes = to_byte - from_byte; 7770 EMACS_INT bytes = to_byte - from_byte;
7847 Lisp_Object attrs; 7771 Lisp_Object attrs;
7848 int saved_pt = -1, saved_pt_byte; 7772 int saved_pt = -1, saved_pt_byte IF_LINT (= 0);
7849 int need_marker_adjustment = 0; 7773 int need_marker_adjustment = 0;
7850 int kill_src_buffer = 0; 7774 int kill_src_buffer = 0;
7851 Lisp_Object old_deactivate_mark; 7775 Lisp_Object old_deactivate_mark;
@@ -7880,7 +7804,7 @@ encode_coding_object (struct coding_system *coding,
7880 else if (BUFFERP (src_object)) 7804 else if (BUFFERP (src_object))
7881 insert_from_buffer (XBUFFER (src_object), from, chars, 0); 7805 insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7882 else 7806 else
7883 insert_1_both (coding->source + from, chars, bytes, 0, 0, 0); 7807 insert_1_both ((char *) coding->source + from, chars, bytes, 0, 0, 0);
7884 7808
7885 if (EQ (src_object, dst_object)) 7809 if (EQ (src_object, dst_object))
7886 { 7810 {
@@ -7960,7 +7884,7 @@ encode_coding_object (struct coding_system *coding,
7960 set_buffer_temp (current); 7884 set_buffer_temp (current);
7961 } 7885 }
7962 coding->dst_multibyte 7886 coding->dst_multibyte
7963 = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters); 7887 = ! NILP (BVAR (XBUFFER (dst_object), enable_multibyte_characters));
7964 } 7888 }
7965 else if (EQ (dst_object, Qt)) 7889 else if (EQ (dst_object, Qt))
7966 { 7890 {
@@ -8003,7 +7927,7 @@ encode_coding_object (struct coding_system *coding,
8003 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte); 7927 TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
8004 else if (saved_pt < from + chars) 7928 else if (saved_pt < from + chars)
8005 TEMP_SET_PT_BOTH (from, from_byte); 7929 TEMP_SET_PT_BOTH (from, from_byte);
8006 else if (! NILP (current_buffer->enable_multibyte_characters)) 7930 else if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
8007 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars), 7931 TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
8008 saved_pt_byte + (coding->produced - bytes)); 7932 saved_pt_byte + (coding->produced - bytes));
8009 else 7933 else
@@ -8027,7 +7951,7 @@ encode_coding_object (struct coding_system *coding,
8027 { 7951 {
8028 tail->bytepos = from_byte + coding->produced; 7952 tail->bytepos = from_byte + coding->produced;
8029 tail->charpos 7953 tail->charpos
8030 = (NILP (current_buffer->enable_multibyte_characters) 7954 = (NILP (BVAR (current_buffer, enable_multibyte_characters))
8031 ? tail->bytepos : from + coding->produced_char); 7955 ? tail->bytepos : from + coding->produced_char);
8032 } 7956 }
8033 } 7957 }
@@ -8178,8 +8102,8 @@ detect_coding_system (const unsigned char *src,
8178 base_category = XINT (CODING_ATTR_CATEGORY (attrs)); 8102 base_category = XINT (CODING_ATTR_CATEGORY (attrs));
8179 if (base_category == coding_category_undecided) 8103 if (base_category == coding_category_undecided)
8180 { 8104 {
8181 enum coding_category category; 8105 enum coding_category category IF_LINT (= 0);
8182 struct coding_system *this; 8106 struct coding_system *this IF_LINT (= NULL);
8183 int c, i; 8107 int c, i;
8184 8108
8185 /* Skip all ASCII bytes except for a few ISO2022 controls. */ 8109 /* Skip all ASCII bytes except for a few ISO2022 controls. */
@@ -8481,8 +8405,8 @@ highest priority. */)
8481 return detect_coding_system (BYTE_POS_ADDR (from_byte), 8405 return detect_coding_system (BYTE_POS_ADDR (from_byte),
8482 to - from, to_byte - from_byte, 8406 to - from, to_byte - from_byte,
8483 !NILP (highest), 8407 !NILP (highest),
8484 !NILP (current_buffer 8408 !NILP (BVAR (current_buffer
8485 ->enable_multibyte_characters), 8409 , enable_multibyte_characters)),
8486 Qnil); 8410 Qnil);
8487} 8411}
8488 8412
@@ -8511,7 +8435,7 @@ highest priority. */)
8511} 8435}
8512 8436
8513 8437
8514static INLINE int 8438static inline int
8515char_encodable_p (int c, Lisp_Object attrs) 8439char_encodable_p (int c, Lisp_Object attrs)
8516{ 8440{
8517 Lisp_Object tail; 8441 Lisp_Object tail;
@@ -8564,7 +8488,7 @@ DEFUN ("find-coding-systems-region-internal",
8564 CHECK_NUMBER_COERCE_MARKER (end); 8488 CHECK_NUMBER_COERCE_MARKER (end);
8565 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end)) 8489 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8566 args_out_of_range (start, end); 8490 args_out_of_range (start, end);
8567 if (NILP (current_buffer->enable_multibyte_characters)) 8491 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
8568 return Qt; 8492 return Qt;
8569 start_byte = CHAR_TO_BYTE (XINT (start)); 8493 start_byte = CHAR_TO_BYTE (XINT (start));
8570 end_byte = CHAR_TO_BYTE (XINT (end)); 8494 end_byte = CHAR_TO_BYTE (XINT (end));
@@ -8698,7 +8622,7 @@ to the string. */)
8698 validate_region (&start, &end); 8622 validate_region (&start, &end);
8699 from = XINT (start); 8623 from = XINT (start);
8700 to = XINT (end); 8624 to = XINT (end);
8701 if (NILP (current_buffer->enable_multibyte_characters) 8625 if (NILP (BVAR (current_buffer, enable_multibyte_characters))
8702 || (ascii_compatible 8626 || (ascii_compatible
8703 && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from))))) 8627 && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8704 return Qnil; 8628 return Qnil;
@@ -8814,7 +8738,7 @@ is nil. */)
8814 CHECK_NUMBER_COERCE_MARKER (end); 8738 CHECK_NUMBER_COERCE_MARKER (end);
8815 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end)) 8739 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8816 args_out_of_range (start, end); 8740 args_out_of_range (start, end);
8817 if (NILP (current_buffer->enable_multibyte_characters)) 8741 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
8818 return Qnil; 8742 return Qnil;
8819 start_byte = CHAR_TO_BYTE (XINT (start)); 8743 start_byte = CHAR_TO_BYTE (XINT (start));
8820 end_byte = CHAR_TO_BYTE (XINT (end)); 8744 end_byte = CHAR_TO_BYTE (XINT (end));
@@ -8894,7 +8818,7 @@ is nil. */)
8894} 8818}
8895 8819
8896 8820
8897Lisp_Object 8821static Lisp_Object
8898code_convert_region (Lisp_Object start, Lisp_Object end, 8822code_convert_region (Lisp_Object start, Lisp_Object end,
8899 Lisp_Object coding_system, Lisp_Object dst_object, 8823 Lisp_Object coding_system, Lisp_Object dst_object,
8900 int encodep, int norecord) 8824 int encodep, int norecord)
@@ -9087,14 +9011,15 @@ Return the corresponding character. */)
9087{ 9011{
9088 Lisp_Object spec, attrs, val; 9012 Lisp_Object spec, attrs, val;
9089 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset; 9013 struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
9014 EMACS_INT ch;
9090 int c; 9015 int c;
9091 9016
9092 CHECK_NATNUM (code); 9017 CHECK_NATNUM (code);
9093 c = XFASTINT (code); 9018 ch = XFASTINT (code);
9094 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec); 9019 CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9095 attrs = AREF (spec, 0); 9020 attrs = AREF (spec, 0);
9096 9021
9097 if (ASCII_BYTE_P (c) 9022 if (ASCII_BYTE_P (ch)
9098 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs))) 9023 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9099 return code; 9024 return code;
9100 9025
@@ -9103,26 +9028,31 @@ Return the corresponding character. */)
9103 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); 9028 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9104 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))); 9029 charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
9105 9030
9106 if (c <= 0x7F) 9031 if (ch <= 0x7F)
9107 charset = charset_roman;
9108 else if (c >= 0xA0 && c < 0xDF)
9109 { 9032 {
9033 c = ch;
9034 charset = charset_roman;
9035 }
9036 else if (ch >= 0xA0 && ch < 0xDF)
9037 {
9038 c = ch - 0x80;
9110 charset = charset_kana; 9039 charset = charset_kana;
9111 c -= 0x80;
9112 } 9040 }
9113 else 9041 else
9114 { 9042 {
9115 int s1 = c >> 8, s2 = c & 0xFF; 9043 EMACS_INT c1 = ch >> 8;
9044 int c2 = ch & 0xFF;
9116 9045
9117 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF 9046 if (c1 < 0x81 || (c1 > 0x9F && c1 < 0xE0) || c1 > 0xEF
9118 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC) 9047 || c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
9119 error ("Invalid code: %d", code); 9048 error ("Invalid code: %"pI"d", ch);
9049 c = ch;
9120 SJIS_TO_JIS (c); 9050 SJIS_TO_JIS (c);
9121 charset = charset_kanji; 9051 charset = charset_kanji;
9122 } 9052 }
9123 c = DECODE_CHAR (charset, c); 9053 c = DECODE_CHAR (charset, c);
9124 if (c < 0) 9054 if (c < 0)
9125 error ("Invalid code: %d", code); 9055 error ("Invalid code: %"pI"d", ch);
9126 return make_number (c); 9056 return make_number (c);
9127} 9057}
9128 9058
@@ -9149,7 +9079,7 @@ Return the corresponding code in SJIS. */)
9149 charset_list = CODING_ATTR_CHARSET_LIST (attrs); 9079 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9150 charset = char_charset (c, charset_list, &code); 9080 charset = char_charset (c, charset_list, &code);
9151 if (code == CHARSET_INVALID_CODE (charset)) 9081 if (code == CHARSET_INVALID_CODE (charset))
9152 error ("Can't encode by shift_jis encoding: %d", c); 9082 error ("Can't encode by shift_jis encoding: %c", c);
9153 JIS_TO_SJIS (code); 9083 JIS_TO_SJIS (code);
9154 9084
9155 return make_number (code); 9085 return make_number (code);
@@ -9162,14 +9092,15 @@ Return the corresponding character. */)
9162{ 9092{
9163 Lisp_Object spec, attrs, val; 9093 Lisp_Object spec, attrs, val;
9164 struct charset *charset_roman, *charset_big5, *charset; 9094 struct charset *charset_roman, *charset_big5, *charset;
9095 EMACS_INT ch;
9165 int c; 9096 int c;
9166 9097
9167 CHECK_NATNUM (code); 9098 CHECK_NATNUM (code);
9168 c = XFASTINT (code); 9099 ch = XFASTINT (code);
9169 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec); 9100 CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9170 attrs = AREF (spec, 0); 9101 attrs = AREF (spec, 0);
9171 9102
9172 if (ASCII_BYTE_P (c) 9103 if (ASCII_BYTE_P (ch)
9173 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs))) 9104 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9174 return code; 9105 return code;
9175 9106
@@ -9177,19 +9108,24 @@ Return the corresponding character. */)
9177 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); 9108 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9178 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val))); 9109 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
9179 9110
9180 if (c <= 0x7F) 9111 if (ch <= 0x7F)
9181 charset = charset_roman; 9112 {
9113 c = ch;
9114 charset = charset_roman;
9115 }
9182 else 9116 else
9183 { 9117 {
9184 int b1 = c >> 8, b2 = c & 0x7F; 9118 EMACS_INT b1 = ch >> 8;
9119 int b2 = ch & 0x7F;
9185 if (b1 < 0xA1 || b1 > 0xFE 9120 if (b1 < 0xA1 || b1 > 0xFE
9186 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE) 9121 || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
9187 error ("Invalid code: %d", code); 9122 error ("Invalid code: %"pI"d", ch);
9123 c = ch;
9188 charset = charset_big5; 9124 charset = charset_big5;
9189 } 9125 }
9190 c = DECODE_CHAR (charset, (unsigned )c); 9126 c = DECODE_CHAR (charset, c);
9191 if (c < 0) 9127 if (c < 0)
9192 error ("Invalid code: %d", code); 9128 error ("Invalid code: %"pI"d", ch);
9193 return make_number (c); 9129 return make_number (c);
9194} 9130}
9195 9131
@@ -9214,7 +9150,7 @@ Return the corresponding character code in Big5. */)
9214 charset_list = CODING_ATTR_CHARSET_LIST (attrs); 9150 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9215 charset = char_charset (c, charset_list, &code); 9151 charset = char_charset (c, charset_list, &code);
9216 if (code == CHARSET_INVALID_CODE (charset)) 9152 if (code == CHARSET_INVALID_CODE (charset))
9217 error ("Can't encode by Big5 encoding: %d", c); 9153 error ("Can't encode by Big5 encoding: %c", c);
9218 9154
9219 return make_number (code); 9155 return make_number (code);
9220} 9156}
@@ -9342,7 +9278,7 @@ function to call for FILENAME, that function should examine the
9342contents of BUFFER instead of reading the file. 9278contents of BUFFER instead of reading the file.
9343 9279
9344usage: (find-operation-coding-system OPERATION ARGUMENTS...) */) 9280usage: (find-operation-coding-system OPERATION ARGUMENTS...) */)
9345 (int nargs, Lisp_Object *args) 9281 (ptrdiff_t nargs, Lisp_Object *args)
9346{ 9282{
9347 Lisp_Object operation, target_idx, target, val; 9283 Lisp_Object operation, target_idx, target, val;
9348 register Lisp_Object chain; 9284 register Lisp_Object chain;
@@ -9351,17 +9287,18 @@ usage: (find-operation-coding-system OPERATION ARGUMENTS...) */)
9351 error ("Too few arguments"); 9287 error ("Too few arguments");
9352 operation = args[0]; 9288 operation = args[0];
9353 if (!SYMBOLP (operation) 9289 if (!SYMBOLP (operation)
9354 || !INTEGERP (target_idx = Fget (operation, Qtarget_idx))) 9290 || !NATNUMP (target_idx = Fget (operation, Qtarget_idx)))
9355 error ("Invalid first argument"); 9291 error ("Invalid first argument");
9356 if (nargs < 1 + XINT (target_idx)) 9292 if (nargs < 1 + XFASTINT (target_idx))
9357 error ("Too few arguments for operation: %s", 9293 error ("Too few arguments for operation `%s'",
9358 SDATA (SYMBOL_NAME (operation))); 9294 SDATA (SYMBOL_NAME (operation)));
9359 target = args[XINT (target_idx) + 1]; 9295 target = args[XFASTINT (target_idx) + 1];
9360 if (!(STRINGP (target) 9296 if (!(STRINGP (target)
9361 || (EQ (operation, Qinsert_file_contents) && CONSP (target) 9297 || (EQ (operation, Qinsert_file_contents) && CONSP (target)
9362 && STRINGP (XCAR (target)) && BUFFERP (XCDR (target))) 9298 && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
9363 || (EQ (operation, Qopen_network_stream) && INTEGERP (target)))) 9299 || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
9364 error ("Invalid %dth argument", XINT (target_idx) + 1); 9300 error ("Invalid argument %"pI"d of operation `%s'",
9301 XFASTINT (target_idx) + 1, SDATA (SYMBOL_NAME (operation)));
9365 if (CONSP (target)) 9302 if (CONSP (target))
9366 target = XCAR (target); 9303 target = XCAR (target);
9367 9304
@@ -9418,9 +9355,9 @@ If multiple coding systems belong to the same category,
9418all but the first one are ignored. 9355all but the first one are ignored.
9419 9356
9420usage: (set-coding-system-priority &rest coding-systems) */) 9357usage: (set-coding-system-priority &rest coding-systems) */)
9421 (int nargs, Lisp_Object *args) 9358 (ptrdiff_t nargs, Lisp_Object *args)
9422{ 9359{
9423 int i, j; 9360 ptrdiff_t i, j;
9424 int changed[coding_category_max]; 9361 int changed[coding_category_max];
9425 enum coding_category priorities[coding_category_max]; 9362 enum coding_category priorities[coding_category_max];
9426 9363
@@ -9463,7 +9400,7 @@ usage: (set-coding-system-priority &rest coding-systems) */)
9463 9400
9464 /* Update `coding-category-list'. */ 9401 /* Update `coding-category-list'. */
9465 Vcoding_category_list = Qnil; 9402 Vcoding_category_list = Qnil;
9466 for (i = coding_category_max - 1; i >= 0; i--) 9403 for (i = coding_category_max; i-- > 0; )
9467 Vcoding_category_list 9404 Vcoding_category_list
9468 = Fcons (AREF (Vcoding_category_table, priorities[i]), 9405 = Fcons (AREF (Vcoding_category_table, priorities[i]),
9469 Vcoding_category_list); 9406 Vcoding_category_list);
@@ -9524,7 +9461,7 @@ DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9524 Sdefine_coding_system_internal, coding_arg_max, MANY, 0, 9461 Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
9525 doc: /* For internal use only. 9462 doc: /* For internal use only.
9526usage: (define-coding-system-internal ...) */) 9463usage: (define-coding-system-internal ...) */)
9527 (int nargs, Lisp_Object *args) 9464 (ptrdiff_t nargs, Lisp_Object *args)
9528{ 9465{
9529 Lisp_Object name; 9466 Lisp_Object name;
9530 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */ 9467 Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */
@@ -9798,7 +9735,6 @@ usage: (define-coding-system-internal ...) */)
9798 else if (EQ (coding_type, Qiso_2022)) 9735 else if (EQ (coding_type, Qiso_2022))
9799 { 9736 {
9800 Lisp_Object initial, reg_usage, request, flags; 9737 Lisp_Object initial, reg_usage, request, flags;
9801 int i;
9802 9738
9803 if (nargs < coding_arg_iso2022_max) 9739 if (nargs < coding_arg_iso2022_max)
9804 goto short_args; 9740 goto short_args;
@@ -9830,15 +9766,15 @@ usage: (define-coding-system-internal ...) */)
9830 for (tail = request; ! NILP (tail); tail = Fcdr (tail)) 9766 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
9831 { 9767 {
9832 int id; 9768 int id;
9833 Lisp_Object tmp; 9769 Lisp_Object tmp1;
9834 9770
9835 val = Fcar (tail); 9771 val = Fcar (tail);
9836 CHECK_CONS (val); 9772 CHECK_CONS (val);
9837 tmp = XCAR (val); 9773 tmp1 = XCAR (val);
9838 CHECK_CHARSET_GET_ID (tmp, id); 9774 CHECK_CHARSET_GET_ID (tmp1, id);
9839 CHECK_NATNUM_CDR (val); 9775 CHECK_NATNUM_CDR (val);
9840 if (XINT (XCDR (val)) >= 4) 9776 if (XINT (XCDR (val)) >= 4)
9841 error ("Invalid graphic register number: %d", XINT (XCDR (val))); 9777 error ("Invalid graphic register number: %"pI"d", XINT (XCDR (val)));
9842 XSETCAR (val, make_number (id)); 9778 XSETCAR (val, make_number (id));
9843 } 9779 }
9844 9780