aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorKenichi Handa2003-12-02 01:40:27 +0000
committerKenichi Handa2003-12-02 01:40:27 +0000
commit24a73b0a8678e5afc8e95d9efa09f0932516f04e (patch)
tree9c9f805c17f356c3da6eced0d04d0e8a9d9288a8 /src
parent8a0e01e2a6da3180447bd3c707e3933951461672 (diff)
downloademacs-24a73b0a8678e5afc8e95d9efa09f0932516f04e.tar.gz
emacs-24a73b0a8678e5afc8e95d9efa09f0932516f04e.zip
(enum iso_code_class_type): Delete ISO_carriage_return.
(CODING_GET_INFO): Delete argument eol_type. Callers changed. (decode_coding_utf_8): Don't do eol converion. (detect_coding_utf_16): Check coding->src_chars, not coding->src_bytes. Add heuristics for those that have no signature. (decode_coding_emacs_mule): Don't do eol converion. (decode_coding_iso_2022): Likewise. (decode_coding_sjis): Likewise. (decode_coding_big5): Likewise. (decode_coding_charset): Likewise. (adjust_coding_eol_type): Return a new coding system. (detect_coding): Don't detect eol. Fix for utf-16 detection. (decode_eol): In case of CRLF->LF conversion, use del_range_2 on each change. (decode_coding): Pay attention to undo_list. Do eol convesion for all types of coding-systems (if necessary). (Vcode_conversion_work_buf_list): Delete it. (Vcode_conversion_reused_workbuf): Renamed from Vcode_conversion_reused_work_buf. (Vcode_conversion_workbuf_name): New variable. (reused_workbuf_in_use): New variable. (make_conversion_work_buffer): Delete the arg DEPTH. (code_conversion_restore): Argument changed to cons. (code_conversion_save): Delete the argument BUFFER. Callers changed. (detect_coding_system): New argument src_chars. Callers changed. Fix for utf-16 detection. (init_coding_once): Don't use ISO_carriage_return. (syms_of_coding): Initialized Vcode_conversion_workbuf_name and reused_workbuf_in_use.
Diffstat (limited to 'src')
-rw-r--r--src/coding.c753
1 files changed, 358 insertions, 395 deletions
diff --git a/src/coding.c b/src/coding.c
index 76b61e36baa..b62a4133a6b 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -491,7 +491,6 @@ enum iso_code_class_type
491 ISO_control_0, /* Control codes in the range 491 ISO_control_0, /* Control codes in the range
492 0x00..0x1F and 0x7F, except for the 492 0x00..0x1F and 0x7F, except for the
493 following 5 codes. */ 493 following 5 codes. */
494 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
495 ISO_shift_out, /* ISO_CODE_SO (0x0E) */ 494 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
496 ISO_shift_in, /* ISO_CODE_SI (0x0F) */ 495 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
497 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */ 496 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
@@ -710,13 +709,10 @@ static struct coding_system coding_categories[coding_category_max];
710#define max(a, b) ((a) > (b) ? (a) : (b)) 709#define max(a, b) ((a) > (b) ? (a) : (b))
711#endif 710#endif
712 711
713#define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \ 712#define CODING_GET_INFO(coding, attrs, charset_list) \
714 do { \ 713 do { \
715 attrs = CODING_ID_ATTRS (coding->id); \ 714 (attrs) = CODING_ID_ATTRS ((coding)->id); \
716 eol_type = CODING_ID_EOL_TYPE (coding->id); \ 715 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
717 if (VECTORP (eol_type)) \
718 eol_type = Qunix; \
719 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
720 } while (0) 716 } while (0)
721 717
722 718
@@ -1132,9 +1128,9 @@ decode_coding_utf_8 (coding)
1132 int *charbuf_end = charbuf + coding->charbuf_size; 1128 int *charbuf_end = charbuf + coding->charbuf_size;
1133 int consumed_chars = 0, consumed_chars_base; 1129 int consumed_chars = 0, consumed_chars_base;
1134 int multibytep = coding->src_multibyte; 1130 int multibytep = coding->src_multibyte;
1135 Lisp_Object attr, eol_type, charset_list; 1131 Lisp_Object attr, charset_list;
1136 1132
1137 CODING_GET_INFO (coding, attr, eol_type, charset_list); 1133 CODING_GET_INFO (coding, attr, charset_list);
1138 1134
1139 while (1) 1135 while (1)
1140 { 1136 {
@@ -1150,21 +1146,6 @@ decode_coding_utf_8 (coding)
1150 if (UTF_8_1_OCTET_P(c1)) 1146 if (UTF_8_1_OCTET_P(c1))
1151 { 1147 {
1152 c = c1; 1148 c = c1;
1153 if (c == '\r')
1154 {
1155 if (EQ (eol_type, Qdos))
1156 {
1157 if (src == src_end)
1158 {
1159 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1160 goto no_more_source;
1161 }
1162 if (*src == '\n')
1163 ONE_MORE_BYTE (c);
1164 }
1165 else if (EQ (eol_type, Qmac))
1166 c = '\n';
1167 }
1168 } 1149 }
1169 else 1150 else
1170 { 1151 {
@@ -1325,27 +1306,52 @@ detect_coding_utf_16 (coding, detect_info)
1325 int c1, c2; 1306 int c1, c2;
1326 1307
1327 detect_info->checked |= CATEGORY_MASK_UTF_16; 1308 detect_info->checked |= CATEGORY_MASK_UTF_16;
1328
1329 if (coding->mode & CODING_MODE_LAST_BLOCK 1309 if (coding->mode & CODING_MODE_LAST_BLOCK
1330 && (coding->src_bytes & 1)) 1310 && (coding->src_chars & 1))
1331 { 1311 {
1332 detect_info->rejected |= CATEGORY_MASK_UTF_16; 1312 detect_info->rejected |= CATEGORY_MASK_UTF_16;
1333 return 0; 1313 return 0;
1334 } 1314 }
1315
1335 ONE_MORE_BYTE (c1); 1316 ONE_MORE_BYTE (c1);
1336 ONE_MORE_BYTE (c2); 1317 ONE_MORE_BYTE (c2);
1337
1338 if ((c1 == 0xFF) && (c2 == 0xFE)) 1318 if ((c1 == 0xFF) && (c2 == 0xFE))
1339 { 1319 {
1340 detect_info->found |= (CATEGORY_MASK_UTF_16_LE 1320 detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1341 | CATEGORY_MASK_UTF_16_AUTO); 1321 | CATEGORY_MASK_UTF_16_AUTO);
1342 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE; 1322 detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1323 | CATEGORY_MASK_UTF_16_BE_NOSIG
1324 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1343 } 1325 }
1344 else if ((c1 == 0xFE) && (c2 == 0xFF)) 1326 else if ((c1 == 0xFE) && (c2 == 0xFF))
1345 { 1327 {
1346 detect_info->found |= (CATEGORY_MASK_UTF_16_BE 1328 detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1347 | CATEGORY_MASK_UTF_16_AUTO); 1329 | CATEGORY_MASK_UTF_16_AUTO);
1348 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE; 1330 detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1331 | CATEGORY_MASK_UTF_16_BE_NOSIG
1332 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1333 }
1334 else
1335 {
1336 unsigned char b1[256], b2[256];
1337 int b1_variants = 1, b2_variants = 1;
1338 int n;
1339
1340 bzero (b1, 256), bzero (b2, 256);
1341 b1[c1]++, b2[c2]++;
1342 for (n = 0; n < 256 && src < src_end; n++)
1343 {
1344 ONE_MORE_BYTE (c1);
1345 ONE_MORE_BYTE (c2);
1346 if (! b1[c1++]) b1_variants++;
1347 if (! b2[c2++]) b2_variants++;
1348 }
1349 if (b1_variants < b2_variants)
1350 detect_info->found |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1351 else
1352 detect_info->found |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1353 detect_info->rejected
1354 |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
1349 } 1355 }
1350 no_more_source: 1356 no_more_source:
1351 return 1; 1357 return 1;
@@ -1365,9 +1371,9 @@ decode_coding_utf_16 (coding)
1365 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding); 1371 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1366 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding); 1372 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1367 int surrogate = CODING_UTF_16_SURROGATE (coding); 1373 int surrogate = CODING_UTF_16_SURROGATE (coding);
1368 Lisp_Object attr, eol_type, charset_list; 1374 Lisp_Object attr, charset_list;
1369 1375
1370 CODING_GET_INFO (coding, attr, eol_type, charset_list); 1376 CODING_GET_INFO (coding, attr, charset_list);
1371 1377
1372 if (bom == utf_16_with_bom) 1378 if (bom == utf_16_with_bom)
1373 { 1379 {
@@ -1460,10 +1466,10 @@ encode_coding_utf_16 (coding)
1460 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding); 1466 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1461 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian; 1467 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1462 int produced_chars = 0; 1468 int produced_chars = 0;
1463 Lisp_Object attrs, eol_type, charset_list; 1469 Lisp_Object attrs, charset_list;
1464 int c; 1470 int c;
1465 1471
1466 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 1472 CODING_GET_INFO (coding, attrs, charset_list);
1467 1473
1468 if (bom != utf_16_without_bom) 1474 if (bom != utf_16_without_bom)
1469 { 1475 {
@@ -1928,12 +1934,12 @@ decode_coding_emacs_mule (coding)
1928 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; 1934 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
1929 int consumed_chars = 0, consumed_chars_base; 1935 int consumed_chars = 0, consumed_chars_base;
1930 int multibytep = coding->src_multibyte; 1936 int multibytep = coding->src_multibyte;
1931 Lisp_Object attrs, eol_type, charset_list; 1937 Lisp_Object attrs, charset_list;
1932 int char_offset = coding->produced_char; 1938 int char_offset = coding->produced_char;
1933 int last_offset = char_offset; 1939 int last_offset = char_offset;
1934 int last_id = charset_ascii; 1940 int last_id = charset_ascii;
1935 1941
1936 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 1942 CODING_GET_INFO (coding, attrs, charset_list);
1937 1943
1938 while (1) 1944 while (1)
1939 { 1945 {
@@ -1949,21 +1955,6 @@ decode_coding_emacs_mule (coding)
1949 1955
1950 if (c < 0x80) 1956 if (c < 0x80)
1951 { 1957 {
1952 if (c == '\r')
1953 {
1954 if (EQ (eol_type, Qdos))
1955 {
1956 if (src == src_end)
1957 {
1958 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1959 goto no_more_source;
1960 }
1961 if (*src == '\n')
1962 ONE_MORE_BYTE (c);
1963 }
1964 else if (EQ (eol_type, Qmac))
1965 c = '\n';
1966 }
1967 *charbuf++ = c; 1958 *charbuf++ = c;
1968 char_offset++; 1959 char_offset++;
1969 } 1960 }
@@ -2052,11 +2043,11 @@ encode_coding_emacs_mule (coding)
2052 unsigned char *dst_end = coding->destination + coding->dst_bytes; 2043 unsigned char *dst_end = coding->destination + coding->dst_bytes;
2053 int safe_room = 8; 2044 int safe_room = 8;
2054 int produced_chars = 0; 2045 int produced_chars = 0;
2055 Lisp_Object attrs, eol_type, charset_list; 2046 Lisp_Object attrs, charset_list;
2056 int c; 2047 int c;
2057 int preferred_charset_id = -1; 2048 int preferred_charset_id = -1;
2058 2049
2059 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 2050 CODING_GET_INFO (coding, attrs, charset_list);
2060 if (! EQ (charset_list, Vemacs_mule_charset_list)) 2051 if (! EQ (charset_list, Vemacs_mule_charset_list))
2061 { 2052 {
2062 CODING_ATTR_CHARSET_LIST (attrs) 2053 CODING_ATTR_CHARSET_LIST (attrs)
@@ -2806,12 +2797,12 @@ decode_coding_iso_2022 (coding)
2806 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1]; 2797 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2807 int component_idx; 2798 int component_idx;
2808 int component_len; 2799 int component_len;
2809 Lisp_Object attrs, eol_type, charset_list; 2800 Lisp_Object attrs, charset_list;
2810 int char_offset = coding->produced_char; 2801 int char_offset = coding->produced_char;
2811 int last_offset = char_offset; 2802 int last_offset = char_offset;
2812 int last_id = charset_ascii; 2803 int last_id = charset_ascii;
2813 2804
2814 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 2805 CODING_GET_INFO (coding, attrs, charset_list);
2815 setup_iso_safe_charsets (attrs); 2806 setup_iso_safe_charsets (attrs);
2816 2807
2817 while (1) 2808 while (1)
@@ -2877,24 +2868,6 @@ decode_coding_iso_2022 (coding)
2877 charset = CHARSET_FROM_ID (charset_id_1); 2868 charset = CHARSET_FROM_ID (charset_id_1);
2878 break; 2869 break;
2879 2870
2880 case ISO_carriage_return:
2881 if (c1 == '\r')
2882 {
2883 if (EQ (eol_type, Qdos))
2884 {
2885 if (src == src_end)
2886 {
2887 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
2888 goto no_more_source;
2889 }
2890 if (*src == '\n')
2891 ONE_MORE_BYTE (c1);
2892 }
2893 else if (EQ (eol_type, Qmac))
2894 c1 = '\n';
2895 }
2896 /* fall through */
2897
2898 case ISO_control_0: 2871 case ISO_control_0:
2899 MAYBE_FINISH_COMPOSITION (); 2872 MAYBE_FINISH_COMPOSITION ();
2900 charset = CHARSET_FROM_ID (charset_ascii); 2873 charset = CHARSET_FROM_ID (charset_ascii);
@@ -3648,7 +3621,11 @@ encode_coding_iso_2022 (coding)
3648 int c; 3621 int c;
3649 int preferred_charset_id = -1; 3622 int preferred_charset_id = -1;
3650 3623
3651 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 3624 CODING_GET_INFO (coding, attrs, charset_list);
3625 eol_type = CODING_ID_EOL_TYPE (coding->id);
3626 if (VECTORP (eol_type))
3627 eol_type = Qunix;
3628
3652 setup_iso_safe_charsets (attrs); 3629 setup_iso_safe_charsets (attrs);
3653 /* Charset list may have been changed. */ 3630 /* Charset list may have been changed. */
3654 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \ 3631 charset_list = CODING_ATTR_CHARSET_LIST (attrs); \
@@ -3931,12 +3908,12 @@ decode_coding_sjis (coding)
3931 int consumed_chars = 0, consumed_chars_base; 3908 int consumed_chars = 0, consumed_chars_base;
3932 int multibytep = coding->src_multibyte; 3909 int multibytep = coding->src_multibyte;
3933 struct charset *charset_roman, *charset_kanji, *charset_kana; 3910 struct charset *charset_roman, *charset_kanji, *charset_kana;
3934 Lisp_Object attrs, eol_type, charset_list, val; 3911 Lisp_Object attrs, charset_list, val;
3935 int char_offset = coding->produced_char; 3912 int char_offset = coding->produced_char;
3936 int last_offset = char_offset; 3913 int last_offset = char_offset;
3937 int last_id = charset_ascii; 3914 int last_id = charset_ascii;
3938 3915
3939 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 3916 CODING_GET_INFO (coding, attrs, charset_list);
3940 3917
3941 val = charset_list; 3918 val = charset_list;
3942 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); 3919 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
@@ -3946,6 +3923,7 @@ decode_coding_sjis (coding)
3946 while (1) 3923 while (1)
3947 { 3924 {
3948 int c, c1; 3925 int c, c1;
3926 struct charset *charset;
3949 3927
3950 src_base = src; 3928 src_base = src;
3951 consumed_chars_base = consumed_chars; 3929 consumed_chars_base = consumed_chars;
@@ -3955,60 +3933,40 @@ decode_coding_sjis (coding)
3955 3933
3956 ONE_MORE_BYTE (c); 3934 ONE_MORE_BYTE (c);
3957 3935
3958 if (c == '\r') 3936 if (c < 0x80)
3959 { 3937 charset = charset_roman;
3960 if (EQ (eol_type, Qdos))
3961 {
3962 if (src == src_end)
3963 {
3964 coding->result = CODING_RESULT_INSUFFICIENT_SRC;
3965 goto no_more_source;
3966 }
3967 if (*src == '\n')
3968 ONE_MORE_BYTE (c);
3969 }
3970 else if (EQ (eol_type, Qmac))
3971 c = '\n';
3972 }
3973 else 3938 else
3974 { 3939 {
3975 struct charset *charset; 3940 if (c >= 0xF0)
3976 3941 goto invalid_code;
3977 if (c < 0x80) 3942 if (c < 0xA0 || c >= 0xE0)
3978 charset = charset_roman;
3979 else
3980 { 3943 {
3981 if (c >= 0xF0) 3944 /* SJIS -> JISX0208 */
3982 goto invalid_code; 3945 ONE_MORE_BYTE (c1);
3983 if (c < 0xA0 || c >= 0xE0) 3946 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3984 {
3985 /* SJIS -> JISX0208 */
3986 ONE_MORE_BYTE (c1);
3987 if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3988 goto invalid_code;
3989 c = (c << 8) | c1;
3990 SJIS_TO_JIS (c);
3991 charset = charset_kanji;
3992 }
3993 else if (c > 0xA0)
3994 {
3995 /* SJIS -> JISX0201-Kana */
3996 c &= 0x7F;
3997 charset = charset_kana;
3998 }
3999 else
4000 goto invalid_code; 3947 goto invalid_code;
3948 c = (c << 8) | c1;
3949 SJIS_TO_JIS (c);
3950 charset = charset_kanji;
4001 } 3951 }
4002 if (charset->id != charset_ascii 3952 else if (c > 0xA0)
4003 && last_id != charset->id)
4004 { 3953 {
4005 if (last_id != charset_ascii) 3954 /* SJIS -> JISX0201-Kana */
4006 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); 3955 c &= 0x7F;
4007 last_id = charset->id; 3956 charset = charset_kana;
4008 last_offset = char_offset;
4009 } 3957 }
4010 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c); 3958 else
3959 goto invalid_code;
4011 } 3960 }
3961 if (charset->id != charset_ascii
3962 && last_id != charset->id)
3963 {
3964 if (last_id != charset_ascii)
3965 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3966 last_id = charset->id;
3967 last_offset = char_offset;
3968 }
3969 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4012 *charbuf++ = c; 3970 *charbuf++ = c;
4013 char_offset++; 3971 char_offset++;
4014 continue; 3972 continue;
@@ -4042,12 +4000,12 @@ decode_coding_big5 (coding)
4042 int consumed_chars = 0, consumed_chars_base; 4000 int consumed_chars = 0, consumed_chars_base;
4043 int multibytep = coding->src_multibyte; 4001 int multibytep = coding->src_multibyte;
4044 struct charset *charset_roman, *charset_big5; 4002 struct charset *charset_roman, *charset_big5;
4045 Lisp_Object attrs, eol_type, charset_list, val; 4003 Lisp_Object attrs, charset_list, val;
4046 int char_offset = coding->produced_char; 4004 int char_offset = coding->produced_char;
4047 int last_offset = char_offset; 4005 int last_offset = char_offset;
4048 int last_id = charset_ascii; 4006 int last_id = charset_ascii;
4049 4007
4050 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 4008 CODING_GET_INFO (coding, attrs, charset_list);
4051 val = charset_list; 4009 val = charset_list;
4052 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); 4010 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4053 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val))); 4011 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
@@ -4055,6 +4013,7 @@ decode_coding_big5 (coding)
4055 while (1) 4013 while (1)
4056 { 4014 {
4057 int c, c1; 4015 int c, c1;
4016 struct charset *charset;
4058 4017
4059 src_base = src; 4018 src_base = src;
4060 consumed_chars_base = consumed_chars; 4019 consumed_chars_base = consumed_chars;
@@ -4064,48 +4023,28 @@ decode_coding_big5 (coding)
4064 4023
4065 ONE_MORE_BYTE (c); 4024 ONE_MORE_BYTE (c);
4066 4025
4067 if (c == '\r') 4026 if (c < 0x80)
4027 charset = charset_roman;
4028 else
4068 { 4029 {
4069 if (EQ (eol_type, Qdos)) 4030 /* BIG5 -> Big5 */
4070 { 4031 if (c < 0xA1 || c > 0xFE)
4071 if (src == src_end) 4032 goto invalid_code;
4072 { 4033 ONE_MORE_BYTE (c1);
4073 coding->result = CODING_RESULT_INSUFFICIENT_SRC; 4034 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4074 goto no_more_source; 4035 goto invalid_code;
4075 } 4036 c = c << 8 | c1;
4076 if (*src == '\n') 4037 charset = charset_big5;
4077 ONE_MORE_BYTE (c);
4078 }
4079 else if (EQ (eol_type, Qmac))
4080 c = '\n';
4081 } 4038 }
4082 else 4039 if (charset->id != charset_ascii
4040 && last_id != charset->id)
4083 { 4041 {
4084 struct charset *charset; 4042 if (last_id != charset_ascii)
4085 if (c < 0x80) 4043 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4086 charset = charset_roman; 4044 last_id = charset->id;
4087 else 4045 last_offset = char_offset;
4088 {
4089 /* BIG5 -> Big5 */
4090 if (c < 0xA1 || c > 0xFE)
4091 goto invalid_code;
4092 ONE_MORE_BYTE (c1);
4093 if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4094 goto invalid_code;
4095 c = c << 8 | c1;
4096 charset = charset_big5;
4097 }
4098 if (charset->id != charset_ascii
4099 && last_id != charset->id)
4100 {
4101 if (last_id != charset_ascii)
4102 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4103 last_id = charset->id;
4104 last_offset = char_offset;
4105 }
4106 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4107 } 4046 }
4108 4047 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4109 *charbuf++ = c; 4048 *charbuf++ = c;
4110 char_offset++; 4049 char_offset++;
4111 continue; 4050 continue;
@@ -4146,12 +4085,12 @@ encode_coding_sjis (coding)
4146 unsigned char *dst_end = coding->destination + coding->dst_bytes; 4085 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4147 int safe_room = 4; 4086 int safe_room = 4;
4148 int produced_chars = 0; 4087 int produced_chars = 0;
4149 Lisp_Object attrs, eol_type, charset_list, val; 4088 Lisp_Object attrs, charset_list, val;
4150 int ascii_compatible; 4089 int ascii_compatible;
4151 struct charset *charset_roman, *charset_kanji, *charset_kana; 4090 struct charset *charset_roman, *charset_kanji, *charset_kana;
4152 int c; 4091 int c;
4153 4092
4154 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 4093 CODING_GET_INFO (coding, attrs, charset_list);
4155 val = charset_list; 4094 val = charset_list;
4156 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); 4095 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4157 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); 4096 charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
@@ -4221,12 +4160,12 @@ encode_coding_big5 (coding)
4221 unsigned char *dst_end = coding->destination + coding->dst_bytes; 4160 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4222 int safe_room = 4; 4161 int safe_room = 4;
4223 int produced_chars = 0; 4162 int produced_chars = 0;
4224 Lisp_Object attrs, eol_type, charset_list, val; 4163 Lisp_Object attrs, charset_list, val;
4225 int ascii_compatible; 4164 int ascii_compatible;
4226 struct charset *charset_roman, *charset_big5; 4165 struct charset *charset_roman, *charset_big5;
4227 int c; 4166 int c;
4228 4167
4229 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 4168 CODING_GET_INFO (coding, attrs, charset_list);
4230 val = charset_list; 4169 val = charset_list;
4231 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); 4170 charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4232 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val))); 4171 charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
@@ -4340,9 +4279,9 @@ decode_coding_ccl (coding)
4340 struct ccl_program ccl; 4279 struct ccl_program ccl;
4341 int source_charbuf[1024]; 4280 int source_charbuf[1024];
4342 int source_byteidx[1024]; 4281 int source_byteidx[1024];
4343 Lisp_Object attrs, eol_type, charset_list; 4282 Lisp_Object attrs, charset_list;
4344 4283
4345 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 4284 CODING_GET_INFO (coding, attrs, charset_list);
4346 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding)); 4285 setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4347 4286
4348 while (src < src_end) 4287 while (src < src_end)
@@ -4420,9 +4359,9 @@ encode_coding_ccl (coding)
4420 unsigned char *adjusted_dst_end = dst_end - 1; 4359 unsigned char *adjusted_dst_end = dst_end - 1;
4421 int destination_charbuf[1024]; 4360 int destination_charbuf[1024];
4422 int i, produced_chars = 0; 4361 int i, produced_chars = 0;
4423 Lisp_Object attrs, eol_type, charset_list; 4362 Lisp_Object attrs, charset_list;
4424 4363
4425 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 4364 CODING_GET_INFO (coding, attrs, charset_list);
4426 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding)); 4365 setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4427 4366
4428 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK; 4367 ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
@@ -4621,17 +4560,22 @@ decode_coding_charset (coding)
4621 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; 4560 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4622 int consumed_chars = 0, consumed_chars_base; 4561 int consumed_chars = 0, consumed_chars_base;
4623 int multibytep = coding->src_multibyte; 4562 int multibytep = coding->src_multibyte;
4624 Lisp_Object attrs, eol_type, charset_list, valids; 4563 Lisp_Object attrs, charset_list, valids;
4625 int char_offset = coding->produced_char; 4564 int char_offset = coding->produced_char;
4626 int last_offset = char_offset; 4565 int last_offset = char_offset;
4627 int last_id = charset_ascii; 4566 int last_id = charset_ascii;
4628 4567
4629 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 4568 CODING_GET_INFO (coding, attrs, charset_list);
4630 valids = AREF (attrs, coding_attr_charset_valids); 4569 valids = AREF (attrs, coding_attr_charset_valids);
4631 4570
4632 while (1) 4571 while (1)
4633 { 4572 {
4634 int c; 4573 int c;
4574 Lisp_Object val;
4575 struct charset *charset;
4576 int dim;
4577 int len = 1;
4578 unsigned code;
4635 4579
4636 src_base = src; 4580 src_base = src;
4637 consumed_chars_base = consumed_chars; 4581 consumed_chars_base = consumed_chars;
@@ -4640,37 +4584,32 @@ decode_coding_charset (coding)
4640 break; 4584 break;
4641 4585
4642 ONE_MORE_BYTE (c); 4586 ONE_MORE_BYTE (c);
4643 if (c == '\r') 4587 code = c;
4588
4589 val = AREF (valids, c);
4590 if (NILP (val))
4591 goto invalid_code;
4592 if (INTEGERP (val))
4644 { 4593 {
4645 /* Here we assume that no charset maps '\r' to something 4594 charset = CHARSET_FROM_ID (XFASTINT (val));
4646 else. */ 4595 dim = CHARSET_DIMENSION (charset);
4647 if (EQ (eol_type, Qdos)) 4596 while (len < dim)
4648 { 4597 {
4649 if (src == src_end) 4598 ONE_MORE_BYTE (c);
4650 { 4599 code = (code << 8) | c;
4651 coding->result = CODING_RESULT_INSUFFICIENT_SRC; 4600 len++;
4652 goto no_more_source;
4653 }
4654 if (*src == '\n')
4655 ONE_MORE_BYTE (c);
4656 } 4601 }
4657 else if (EQ (eol_type, Qmac)) 4602 CODING_DECODE_CHAR (coding, src, src_base, src_end,
4658 c = '\n'; 4603 charset, code, c);
4659 } 4604 }
4660 else 4605 else
4661 { 4606 {
4662 Lisp_Object val; 4607 /* VAL is a list of charset IDs. It is assured that the
4663 struct charset *charset; 4608 list is sorted by charset dimensions (smaller one
4664 int dim; 4609 comes first). */
4665 int len = 1; 4610 while (CONSP (val))
4666 unsigned code = c;
4667
4668 val = AREF (valids, c);
4669 if (NILP (val))
4670 goto invalid_code;
4671 if (INTEGERP (val))
4672 { 4611 {
4673 charset = CHARSET_FROM_ID (XFASTINT (val)); 4612 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4674 dim = CHARSET_DIMENSION (charset); 4613 dim = CHARSET_DIMENSION (charset);
4675 while (len < dim) 4614 while (len < dim)
4676 { 4615 {
@@ -4678,42 +4617,24 @@ decode_coding_charset (coding)
4678 code = (code << 8) | c; 4617 code = (code << 8) | c;
4679 len++; 4618 len++;
4680 } 4619 }
4681 CODING_DECODE_CHAR (coding, src, src_base, src_end, 4620 CODING_DECODE_CHAR (coding, src, src_base,
4682 charset, code, c); 4621 src_end, charset, code, c);
4683 } 4622 if (c >= 0)
4684 else 4623 break;
4685 { 4624 val = XCDR (val);
4686 /* VAL is a list of charset IDs. It is assured that the
4687 list is sorted by charset dimensions (smaller one
4688 comes first). */
4689 while (CONSP (val))
4690 {
4691 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4692 dim = CHARSET_DIMENSION (charset);
4693 while (len < dim)
4694 {
4695 ONE_MORE_BYTE (c);
4696 code = (code << 8) | c;
4697 len++;
4698 }
4699 CODING_DECODE_CHAR (coding, src, src_base,
4700 src_end, charset, code, c);
4701 if (c >= 0)
4702 break;
4703 val = XCDR (val);
4704 }
4705 }
4706 if (c < 0)
4707 goto invalid_code;
4708 if (charset->id != charset_ascii
4709 && last_id != charset->id)
4710 {
4711 if (last_id != charset_ascii)
4712 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4713 last_id = charset->id;
4714 last_offset = char_offset;
4715 } 4625 }
4716 } 4626 }
4627 if (c < 0)
4628 goto invalid_code;
4629 if (charset->id != charset_ascii
4630 && last_id != charset->id)
4631 {
4632 if (last_id != charset_ascii)
4633 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4634 last_id = charset->id;
4635 last_offset = char_offset;
4636 }
4637
4717 *charbuf++ = c; 4638 *charbuf++ = c;
4718 char_offset++; 4639 char_offset++;
4719 continue; 4640 continue;
@@ -4746,11 +4667,11 @@ encode_coding_charset (coding)
4746 unsigned char *dst_end = coding->destination + coding->dst_bytes; 4667 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4747 int safe_room = MAX_MULTIBYTE_LENGTH; 4668 int safe_room = MAX_MULTIBYTE_LENGTH;
4748 int produced_chars = 0; 4669 int produced_chars = 0;
4749 Lisp_Object attrs, eol_type, charset_list; 4670 Lisp_Object attrs, charset_list;
4750 int ascii_compatible; 4671 int ascii_compatible;
4751 int c; 4672 int c;
4752 4673
4753 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 4674 CODING_GET_INFO (coding, attrs, charset_list);
4754 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)); 4675 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4755 4676
4756 while (charbuf < charbuf_end) 4677 while (charbuf < charbuf_end)
@@ -5250,7 +5171,7 @@ detect_eol (source, src_bytes, category)
5250} 5171}
5251 5172
5252 5173
5253static void 5174static Lisp_Object
5254adjust_coding_eol_type (coding, eol_seen) 5175adjust_coding_eol_type (coding, eol_seen)
5255 struct coding_system *coding; 5176 struct coding_system *coding;
5256 int eol_seen; 5177 int eol_seen;
@@ -5259,11 +5180,21 @@ adjust_coding_eol_type (coding, eol_seen)
5259 5180
5260 eol_type = CODING_ID_EOL_TYPE (coding->id); 5181 eol_type = CODING_ID_EOL_TYPE (coding->id);
5261 if (eol_seen & EOL_SEEN_LF) 5182 if (eol_seen & EOL_SEEN_LF)
5262 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0)); 5183 {
5184 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5185 eol_type = Qunix;
5186 }
5263 else if (eol_seen & EOL_SEEN_CRLF) 5187 else if (eol_seen & EOL_SEEN_CRLF)
5264 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1)); 5188 {
5189 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5190 eol_type = Qdos;
5191 }
5265 else if (eol_seen & EOL_SEEN_CR) 5192 else if (eol_seen & EOL_SEEN_CR)
5266 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2)); 5193 {
5194 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5195 eol_type = Qmac;
5196 }
5197 return eol_type;
5267} 5198}
5268 5199
5269/* Detect how a text specified in CODING is encoded. If a coding 5200/* Detect how a text specified in CODING is encoded. If a coding
@@ -5289,14 +5220,18 @@ detect_coding (coding)
5289 { 5220 {
5290 int c, i; 5221 int c, i;
5291 5222
5292 for (src = coding->source; src < src_end; src++) 5223 for (i = 0, src = coding->source; src < src_end; i++, src++)
5293 { 5224 {
5294 c = *src; 5225 c = *src;
5295 if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC 5226 if (c & 0x80 || (c < 0x20 && (c == 0
5227 || c == ISO_CODE_ESC
5296 || c == ISO_CODE_SI 5228 || c == ISO_CODE_SI
5297 || c == ISO_CODE_SO))) 5229 || c == ISO_CODE_SO)))
5298 break; 5230 break;
5299 } 5231 }
5232 /* Skipped bytes must be even for utf-16 detector. */
5233 if (i % 2)
5234 src--;
5300 coding->head_ascii = src - (coding->source + coding->consumed); 5235 coding->head_ascii = src - (coding->source + coding->consumed);
5301 5236
5302 if (coding->head_ascii < coding->src_bytes) 5237 if (coding->head_ascii < coding->src_bytes)
@@ -5324,7 +5259,16 @@ detect_coding (coding)
5324 } 5259 }
5325 else if ((*(this->detector)) (coding, &detect_info) 5260 else if ((*(this->detector)) (coding, &detect_info)
5326 && detect_info.found & (1 << category)) 5261 && detect_info.found & (1 << category))
5327 break; 5262 {
5263 if (category == coding_category_utf_16_auto)
5264 {
5265 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5266 category = coding_category_utf_16_le;
5267 else
5268 category = coding_category_utf_16_be;
5269 }
5270 break;
5271 }
5328 } 5272 }
5329 if (i < coding_category_raw_text) 5273 if (i < coding_category_raw_text)
5330 setup_coding_system (CODING_ID_NAME (this->id), coding); 5274 setup_coding_system (CODING_ID_NAME (this->id), coding);
@@ -5340,7 +5284,8 @@ detect_coding (coding)
5340 } 5284 }
5341 } 5285 }
5342 } 5286 }
5343 else if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qutf_16)) 5287 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5288 == coding_category_utf_16_auto)
5344 { 5289 {
5345 Lisp_Object coding_systems; 5290 Lisp_Object coding_systems;
5346 struct coding_detection_info detect_info; 5291 struct coding_detection_info detect_info;
@@ -5349,32 +5294,14 @@ detect_coding (coding)
5349 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom); 5294 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5350 detect_info.found = detect_info.rejected = 0; 5295 detect_info.found = detect_info.rejected = 0;
5351 if (CONSP (coding_systems) 5296 if (CONSP (coding_systems)
5352 && detect_coding_utf_16 (coding, &detect_info) 5297 && detect_coding_utf_16 (coding, &detect_info))
5353 && (detect_info.found & (CATEGORY_MASK_UTF_16_LE
5354 | CATEGORY_MASK_UTF_16_BE)))
5355 { 5298 {
5356 if (detect_info.found & CATEGORY_MASK_UTF_16_LE) 5299 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5357 setup_coding_system (XCAR (coding_systems), coding); 5300 setup_coding_system (XCAR (coding_systems), coding);
5358 else 5301 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
5359 setup_coding_system (XCDR (coding_systems), coding); 5302 setup_coding_system (XCDR (coding_systems), coding);
5360 } 5303 }
5361 } 5304 }
5362
5363 attrs = CODING_ID_ATTRS (coding->id);
5364 coding_type = CODING_ATTR_TYPE (attrs);
5365
5366 /* If we have not yet decided the EOL type, detect it now. But, the
5367 detection is impossible for a CCL based coding system, in which
5368 case, we detct the EOL type after decoding. */
5369 if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
5370 && ! EQ (coding_type, Qccl))
5371 {
5372 int eol_seen = detect_eol (coding->source, coding->src_bytes,
5373 (enum coding_category) XINT (CODING_ATTR_CATEGORY (attrs)));
5374
5375 if (eol_seen != EOL_SEEN_NONE)
5376 adjust_coding_eol_type (coding, eol_seen);
5377 }
5378} 5305}
5379 5306
5380 5307
@@ -5382,13 +5309,24 @@ static void
5382decode_eol (coding) 5309decode_eol (coding)
5383 struct coding_system *coding; 5310 struct coding_system *coding;
5384{ 5311{
5385 if (VECTORP (CODING_ID_EOL_TYPE (coding->id))) 5312 Lisp_Object eol_type;
5313 unsigned char *p, *pbeg, *pend;
5314
5315 eol_type = CODING_ID_EOL_TYPE (coding->id);
5316 if (EQ (eol_type, Qunix))
5317 return;
5318
5319 if (NILP (coding->dst_object))
5320 pbeg = coding->destination;
5321 else
5322 pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
5323 pend = pbeg + coding->produced;
5324
5325 if (VECTORP (eol_type))
5386 { 5326 {
5387 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5388 unsigned char *pend = p + coding->produced;
5389 int eol_seen = EOL_SEEN_NONE; 5327 int eol_seen = EOL_SEEN_NONE;
5390 5328
5391 for (; p < pend; p++) 5329 for (p = pbeg; p < pend; p++)
5392 { 5330 {
5393 if (*p == '\n') 5331 if (*p == '\n')
5394 eol_seen |= EOL_SEEN_LF; 5332 eol_seen |= EOL_SEEN_LF;
@@ -5403,42 +5341,48 @@ decode_eol (coding)
5403 eol_seen |= EOL_SEEN_CR; 5341 eol_seen |= EOL_SEEN_CR;
5404 } 5342 }
5405 } 5343 }
5344 if (eol_seen != EOL_SEEN_NONE
5345 && eol_seen != EOL_SEEN_LF
5346 && eol_seen != EOL_SEEN_CRLF
5347 && eol_seen != EOL_SEEN_CR)
5348 eol_seen = EOL_SEEN_LF;
5406 if (eol_seen != EOL_SEEN_NONE) 5349 if (eol_seen != EOL_SEEN_NONE)
5407 adjust_coding_eol_type (coding, eol_seen); 5350 eol_type = adjust_coding_eol_type (coding, eol_seen);
5408 } 5351 }
5409 5352
5410 if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac)) 5353 if (EQ (eol_type, Qmac))
5411 { 5354 {
5412 unsigned char *p = CHAR_POS_ADDR (coding->dst_pos); 5355 for (p = pbeg; p < pend; p++)
5413 unsigned char *pend = p + coding->produced;
5414
5415 for (; p < pend; p++)
5416 if (*p == '\r') 5356 if (*p == '\r')
5417 *p = '\n'; 5357 *p = '\n';
5418 } 5358 }
5419 else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos)) 5359 else if (EQ (eol_type, Qdos))
5420 { 5360 {
5421 unsigned char *p, *pbeg, *pend; 5361 int n = 0;
5422 Lisp_Object undo_list;
5423
5424 move_gap_both (coding->dst_pos + coding->produced_char,
5425 coding->dst_pos_byte + coding->produced);
5426 undo_list = current_buffer->undo_list;
5427 current_buffer->undo_list = Qt;
5428 del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, 0);
5429 current_buffer->undo_list = undo_list;
5430 pbeg = GPT_ADDR;
5431 pend = pbeg + coding->produced;
5432 5362
5433 for (p = pend - 1; p >= pbeg; p--) 5363 if (NILP (coding->dst_object))
5434 if (*p == '\r') 5364 {
5435 { 5365 for (p = pend - 2; p >= pbeg; p--)
5436 safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1); 5366 if (*p == '\r')
5437 pend--; 5367 {
5438 } 5368 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
5439 coding->produced_char -= coding->produced - (pend - pbeg); 5369 n++;
5440 coding->produced = pend - pbeg; 5370 }
5441 insert_from_gap (coding->produced_char, coding->produced); 5371 }
5372 else
5373 {
5374 for (p = pend - 2; p >= pbeg; p--)
5375 if (*p == '\r')
5376 {
5377 int pos_byte = coding->dst_pos_byte + (p - pbeg);
5378 int pos = BYTE_TO_CHAR (pos_byte);
5379
5380 del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
5381 n++;
5382 }
5383 }
5384 coding->produced -= n;
5385 coding->produced_char -= n;
5442 } 5386 }
5443} 5387}
5444 5388
@@ -5796,6 +5740,7 @@ decode_coding (coding)
5796 struct coding_system *coding; 5740 struct coding_system *coding;
5797{ 5741{
5798 Lisp_Object attrs; 5742 Lisp_Object attrs;
5743 Lisp_Object undo_list;
5799 5744
5800 if (BUFFERP (coding->src_object) 5745 if (BUFFERP (coding->src_object)
5801 && coding->src_pos > 0 5746 && coding->src_pos > 0
@@ -5803,12 +5748,15 @@ decode_coding (coding)
5803 && coding->src_pos + coding->src_chars > GPT) 5748 && coding->src_pos + coding->src_chars > GPT)
5804 move_gap_both (coding->src_pos, coding->src_pos_byte); 5749 move_gap_both (coding->src_pos, coding->src_pos_byte);
5805 5750
5751 undo_list = Qt;
5806 if (BUFFERP (coding->dst_object)) 5752 if (BUFFERP (coding->dst_object))
5807 { 5753 {
5808 if (current_buffer != XBUFFER (coding->dst_object)) 5754 if (current_buffer != XBUFFER (coding->dst_object))
5809 set_buffer_internal (XBUFFER (coding->dst_object)); 5755 set_buffer_internal (XBUFFER (coding->dst_object));
5810 if (GPT != PT) 5756 if (GPT != PT)
5811 move_gap_both (PT, PT_BYTE); 5757 move_gap_both (PT, PT_BYTE);
5758 undo_list = current_buffer->undo_list;
5759 current_buffer->undo_list = Qt;
5812 } 5760 }
5813 5761
5814 coding->consumed = coding->consumed_char = 0; 5762 coding->consumed = coding->consumed_char = 0;
@@ -5838,11 +5786,6 @@ decode_coding (coding)
5838 while (coding->consumed < coding->src_bytes 5786 while (coding->consumed < coding->src_bytes
5839 && ! coding->result); 5787 && ! coding->result);
5840 5788
5841 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5842 && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5843 && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5844 decode_eol (coding);
5845
5846 coding->carryover_bytes = 0; 5789 coding->carryover_bytes = 0;
5847 if (coding->consumed < coding->src_bytes) 5790 if (coding->consumed < coding->src_bytes)
5848 { 5791 {
@@ -5880,6 +5823,13 @@ decode_coding (coding)
5880 coding->consumed = coding->src_bytes; 5823 coding->consumed = coding->src_bytes;
5881 } 5824 }
5882 5825
5826 if (BUFFERP (coding->dst_object))
5827 {
5828 current_buffer->undo_list = undo_list;
5829 record_insert (coding->dst_pos, coding->produced_char);
5830 }
5831 if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5832 decode_eol (coding);
5883 return coding->result; 5833 return coding->result;
5884} 5834}
5885 5835
@@ -6039,7 +5989,7 @@ consume_chars (coding)
6039 stop_charset = end_pos; 5989 stop_charset = end_pos;
6040 } 5990 }
6041 5991
6042 /* Compensate for CRLF and annotation. */ 5992 /* Compensate for CRLF and conversion. */
6043 buf_end -= 1 + MAX_ANNOTATION_LENGTH; 5993 buf_end -= 1 + MAX_ANNOTATION_LENGTH;
6044 while (buf < buf_end) 5994 while (buf < buf_end)
6045 { 5995 {
@@ -6154,91 +6104,73 @@ encode_coding (coding)
6154} 6104}
6155 6105
6156 6106
6157/* Stack of working buffers used in code conversion. An nil element 6107/* Name (or base name) of work buffer for code conversion. */
6158 means that the code conversion of that level is not using a working 6108static Lisp_Object Vcode_conversion_workbuf_name;
6159 buffer. */
6160Lisp_Object Vcode_conversion_work_buf_list;
6161 6109
6162/* A working buffer used by the top level conversion. */ 6110/* A working buffer used by the top level conversion. Once it is
6163Lisp_Object Vcode_conversion_reused_work_buf; 6111 created, it is never destroyed. It has the name
6112 Vcode_conversion_workbuf_name. The other working buffers are
6113 destroyed after the use is finished, and their names are modified
6114 versions of Vcode_conversion_workbuf_name. */
6115static Lisp_Object Vcode_conversion_reused_workbuf;
6164 6116
6117/* 1 iff Vcode_conversion_reused_workbuf is already in use. */
6118static int reused_workbuf_in_use;
6165 6119
6166/* Return a working buffer that can be freely used by the following 6120
6167 code conversion. MULTIBYTEP specifies the multibyteness of the 6121/* Return a working buffer of code convesion. MULTIBYTE specifies the
6168 buffer. */ 6122 multibyteness of returning buffer. */
6169 6123
6170Lisp_Object 6124Lisp_Object
6171make_conversion_work_buffer (multibytep, depth) 6125make_conversion_work_buffer (multibyte)
6172 int multibytep, depth;
6173{ 6126{
6174 struct buffer *current = current_buffer; 6127 Lisp_Object name, workbuf;
6175 Lisp_Object buf, name; 6128 struct buffer *current;
6176 6129
6177 if (depth == 0) 6130 if (reused_workbuf_in_use++)
6178 { 6131 name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
6179 if (NILP (Vcode_conversion_reused_work_buf))
6180 Vcode_conversion_reused_work_buf
6181 = Fget_buffer_create (build_string (" *code-converting-work<0>*"));
6182 buf = Vcode_conversion_reused_work_buf;
6183 }
6184 else 6132 else
6185 { 6133 name = Vcode_conversion_workbuf_name;
6186 if (depth < 0) 6134 workbuf = Fget_buffer_create (name);
6187 { 6135 current = current_buffer;
6188 name = build_string (" *code-converting-work*"); 6136 set_buffer_internal (XBUFFER (workbuf));
6189 name = Fgenerate_new_buffer_name (name, Qnil); 6137 Ferase_buffer ();
6190 }
6191 else
6192 {
6193 char str[128];
6194
6195 sprintf (str, " *code-converting-work*<%d>", depth);
6196 name = build_string (str);
6197 }
6198 buf = Fget_buffer_create (name);
6199 }
6200 set_buffer_internal (XBUFFER (buf));
6201 current_buffer->undo_list = Qt; 6138 current_buffer->undo_list = Qt;
6202 Ferase_buffer (); 6139 current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
6203 Fset_buffer_multibyte (multibytep ? Qt : Qnil);
6204 set_buffer_internal (current); 6140 set_buffer_internal (current);
6205 return buf; 6141 return workbuf;
6206} 6142}
6207 6143
6144
6208static Lisp_Object 6145static Lisp_Object
6209code_conversion_restore (buffer) 6146code_conversion_restore (arg)
6210 Lisp_Object buffer; 6147 Lisp_Object arg;
6211{ 6148{
6212 Lisp_Object workbuf; 6149 Lisp_Object current, workbuf;
6213 6150
6214 workbuf = XCAR (Vcode_conversion_work_buf_list); 6151 current = XCAR (arg);
6215 if (! NILP (workbuf) 6152 workbuf = XCDR (arg);
6216 && ! EQ (workbuf, Vcode_conversion_reused_work_buf) 6153 if (! NILP (workbuf))
6217 && ! NILP (Fbuffer_live_p (workbuf))) 6154 {
6218 Fkill_buffer (workbuf); 6155 if (EQ (workbuf, Vcode_conversion_reused_workbuf))
6219 Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list); 6156 reused_workbuf_in_use = 0;
6220 set_buffer_internal (XBUFFER (buffer)); 6157 else if (! NILP (Fbuffer_live_p (workbuf)))
6158 Fkill_buffer (workbuf);
6159 }
6160 set_buffer_internal (XBUFFER (current));
6221 return Qnil; 6161 return Qnil;
6222} 6162}
6223 6163
6224static Lisp_Object 6164Lisp_Object
6225code_conversion_save (buffer, with_work_buf, multibyte) 6165code_conversion_save (with_work_buf, multibyte)
6226 Lisp_Object buffer;
6227 int with_work_buf, multibyte; 6166 int with_work_buf, multibyte;
6228{ 6167{
6229 Lisp_Object workbuf; 6168 Lisp_Object workbuf = Qnil;
6230 6169
6231 if (with_work_buf) 6170 if (with_work_buf)
6232 { 6171 workbuf = make_conversion_work_buffer (multibyte);
6233 int depth = XINT (Flength (Vcode_conversion_work_buf_list)); 6172 record_unwind_protect (code_conversion_restore,
6234 6173 Fcons (Fcurrent_buffer (), workbuf));
6235 workbuf = make_conversion_work_buffer (multibyte, depth);
6236 }
6237 else
6238 workbuf = Qnil;
6239 Vcode_conversion_work_buf_list
6240 = Fcons (workbuf, Vcode_conversion_work_buf_list);
6241 record_unwind_protect (code_conversion_restore, buffer);
6242 return workbuf; 6174 return workbuf;
6243} 6175}
6244 6176
@@ -6249,18 +6181,16 @@ decode_coding_gap (coding, chars, bytes)
6249{ 6181{
6250 int count = specpdl_ptr - specpdl; 6182 int count = specpdl_ptr - specpdl;
6251 Lisp_Object attrs; 6183 Lisp_Object attrs;
6252 Lisp_Object buffer;
6253 6184
6254 buffer = Fcurrent_buffer (); 6185 code_conversion_save (0, 0);
6255 code_conversion_save (buffer, 0, 0);
6256 6186
6257 coding->src_object = buffer; 6187 coding->src_object = Fcurrent_buffer ();
6258 coding->src_chars = chars; 6188 coding->src_chars = chars;
6259 coding->src_bytes = bytes; 6189 coding->src_bytes = bytes;
6260 coding->src_pos = -chars; 6190 coding->src_pos = -chars;
6261 coding->src_pos_byte = -bytes; 6191 coding->src_pos_byte = -bytes;
6262 coding->src_multibyte = chars < bytes; 6192 coding->src_multibyte = chars < bytes;
6263 coding->dst_object = buffer; 6193 coding->dst_object = coding->src_object;
6264 coding->dst_pos = PT; 6194 coding->dst_pos = PT;
6265 coding->dst_pos_byte = PT_BYTE; 6195 coding->dst_pos_byte = PT_BYTE;
6266 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters); 6196 coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
@@ -6295,12 +6225,10 @@ encode_coding_gap (coding, chars, bytes)
6295 EMACS_INT chars, bytes; 6225 EMACS_INT chars, bytes;
6296{ 6226{
6297 int count = specpdl_ptr - specpdl; 6227 int count = specpdl_ptr - specpdl;
6298 Lisp_Object buffer;
6299 6228
6300 buffer = Fcurrent_buffer (); 6229 code_conversion_save (0, 0);
6301 code_conversion_save (buffer, 0, 0);
6302 6230
6303 coding->src_object = buffer; 6231 coding->src_object = Fcurrent_buffer ();
6304 coding->src_chars = chars; 6232 coding->src_chars = chars;
6305 coding->src_bytes = bytes; 6233 coding->src_bytes = bytes;
6306 coding->src_pos = -chars; 6234 coding->src_pos = -chars;
@@ -6409,14 +6337,14 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6409 || (! NILP (CODING_ATTR_POST_READ (attrs)) 6337 || (! NILP (CODING_ATTR_POST_READ (attrs))
6410 && NILP (dst_object))) 6338 && NILP (dst_object)))
6411 { 6339 {
6412 coding->dst_object = code_conversion_save (buffer, 1, 1); 6340 coding->dst_object = code_conversion_save (1, 1);
6413 coding->dst_pos = BEG; 6341 coding->dst_pos = BEG;
6414 coding->dst_pos_byte = BEG_BYTE; 6342 coding->dst_pos_byte = BEG_BYTE;
6415 coding->dst_multibyte = 1; 6343 coding->dst_multibyte = 1;
6416 } 6344 }
6417 else if (BUFFERP (dst_object)) 6345 else if (BUFFERP (dst_object))
6418 { 6346 {
6419 code_conversion_save (buffer, 0, 0); 6347 code_conversion_save (0, 0);
6420 coding->dst_object = dst_object; 6348 coding->dst_object = dst_object;
6421 coding->dst_pos = BUF_PT (XBUFFER (dst_object)); 6349 coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6422 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object)); 6350 coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
@@ -6425,7 +6353,7 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6425 } 6353 }
6426 else 6354 else
6427 { 6355 {
6428 code_conversion_save (buffer, 0, 0); 6356 code_conversion_save (0, 0);
6429 coding->dst_object = Qnil; 6357 coding->dst_object = Qnil;
6430 coding->dst_multibyte = 1; 6358 coding->dst_multibyte = 1;
6431 } 6359 }
@@ -6524,8 +6452,7 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6524 6452
6525 if (! NILP (CODING_ATTR_PRE_WRITE (attrs))) 6453 if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6526 { 6454 {
6527 coding->src_object = code_conversion_save (buffer, 1, 6455 coding->src_object = code_conversion_save (1, coding->src_multibyte);
6528 coding->src_multibyte);
6529 set_buffer_internal (XBUFFER (coding->src_object)); 6456 set_buffer_internal (XBUFFER (coding->src_object));
6530 if (STRINGP (src_object)) 6457 if (STRINGP (src_object))
6531 insert_from_string (src_object, from, from_byte, chars, bytes, 0); 6458 insert_from_string (src_object, from, from_byte, chars, bytes, 0);
@@ -6555,13 +6482,13 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6555 } 6482 }
6556 else if (STRINGP (src_object)) 6483 else if (STRINGP (src_object))
6557 { 6484 {
6558 code_conversion_save (buffer, 0, 0); 6485 code_conversion_save (0, 0);
6559 coding->src_pos = from; 6486 coding->src_pos = from;
6560 coding->src_pos_byte = from_byte; 6487 coding->src_pos_byte = from_byte;
6561 } 6488 }
6562 else if (BUFFERP (src_object)) 6489 else if (BUFFERP (src_object))
6563 { 6490 {
6564 code_conversion_save (buffer, 0, 0); 6491 code_conversion_save (0, 0);
6565 set_buffer_internal (XBUFFER (src_object)); 6492 set_buffer_internal (XBUFFER (src_object));
6566 if (EQ (src_object, dst_object)) 6493 if (EQ (src_object, dst_object))
6567 { 6494 {
@@ -6579,7 +6506,7 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6579 } 6506 }
6580 } 6507 }
6581 else 6508 else
6582 code_conversion_save (buffer, 0, 0); 6509 code_conversion_save (0, 0);
6583 6510
6584 if (BUFFERP (dst_object)) 6511 if (BUFFERP (dst_object))
6585 { 6512 {
@@ -6735,9 +6662,10 @@ If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. */)
6735 detect only text-format. */ 6662 detect only text-format. */
6736 6663
6737Lisp_Object 6664Lisp_Object
6738detect_coding_system (src, src_bytes, highest, multibytep, coding_system) 6665detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
6666 coding_system)
6739 const unsigned char *src; 6667 const unsigned char *src;
6740 int src_bytes, highest; 6668 int src_chars, src_bytes, highest;
6741 int multibytep; 6669 int multibytep;
6742 Lisp_Object coding_system; 6670 Lisp_Object coding_system;
6743{ 6671{
@@ -6747,6 +6675,7 @@ detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
6747 struct coding_system coding; 6675 struct coding_system coding;
6748 int id; 6676 int id;
6749 struct coding_detection_info detect_info; 6677 struct coding_detection_info detect_info;
6678 enum coding_category base_category;
6750 6679
6751 if (NILP (coding_system)) 6680 if (NILP (coding_system))
6752 coding_system = Qundecided; 6681 coding_system = Qundecided;
@@ -6756,6 +6685,7 @@ detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
6756 coding_system = CODING_ATTR_BASE_NAME (attrs); 6685 coding_system = CODING_ATTR_BASE_NAME (attrs);
6757 6686
6758 coding.source = src; 6687 coding.source = src;
6688 coding.src_chars = src_chars;
6759 coding.src_bytes = src_bytes; 6689 coding.src_bytes = src_bytes;
6760 coding.src_multibyte = multibytep; 6690 coding.src_multibyte = multibytep;
6761 coding.consumed = 0; 6691 coding.consumed = 0;
@@ -6764,21 +6694,26 @@ detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
6764 detect_info.checked = detect_info.found = detect_info.rejected = 0; 6694 detect_info.checked = detect_info.found = detect_info.rejected = 0;
6765 6695
6766 /* At first, detect text-format if necessary. */ 6696 /* At first, detect text-format if necessary. */
6767 if (XINT (CODING_ATTR_CATEGORY (attrs)) == coding_category_undecided) 6697 base_category = XINT (CODING_ATTR_CATEGORY (attrs));
6698 if (base_category == coding_category_undecided)
6768 { 6699 {
6769 enum coding_category category; 6700 enum coding_category category;
6770 struct coding_system *this; 6701 struct coding_system *this;
6771 int c, i; 6702 int c, i;
6772 6703
6773 for (; src < src_end; src++) 6704 /* Skip all ASCII bytes except for a few ISO2022 controls. */
6705 for (i = 0; src < src_end; i++, src++)
6774 { 6706 {
6775 c = *src; 6707 c = *src;
6776 if (c & 0x80 6708 if (c & 0x80 || (c < 0x20 && (c == 0
6777 || (c < 0x20 && (c == ISO_CODE_ESC 6709 || c == ISO_CODE_ESC
6778 || c == ISO_CODE_SI 6710 || c == ISO_CODE_SI
6779 || c == ISO_CODE_SO))) 6711 || c == ISO_CODE_SO)))
6780 break; 6712 break;
6781 } 6713 }
6714 /* Skipped bytes must be even for utf-16 detecor. */
6715 if (i % 2)
6716 src--;
6782 coding.head_ascii = src - coding.source; 6717 coding.head_ascii = src - coding.source;
6783 6718
6784 if (src < src_end) 6719 if (src < src_end)
@@ -6805,11 +6740,19 @@ detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
6805 if ((*(this->detector)) (&coding, &detect_info) 6740 if ((*(this->detector)) (&coding, &detect_info)
6806 && highest 6741 && highest
6807 && (detect_info.found & (1 << category))) 6742 && (detect_info.found & (1 << category)))
6808 break; 6743 {
6744 if (category == coding_category_utf_16_auto)
6745 {
6746 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6747 category = coding_category_utf_16_le;
6748 else
6749 category = coding_category_utf_16_be;
6750 }
6751 break;
6752 }
6809 } 6753 }
6810 } 6754 }
6811 6755
6812
6813 if (detect_info.rejected == CATEGORY_MASK_ANY) 6756 if (detect_info.rejected == CATEGORY_MASK_ANY)
6814 { 6757 {
6815 detect_info.found = CATEGORY_MASK_RAW_TEXT; 6758 detect_info.found = CATEGORY_MASK_RAW_TEXT;
@@ -6867,6 +6810,24 @@ detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
6867 detect_info.found |= found; 6810 detect_info.found |= found;
6868 } 6811 }
6869 } 6812 }
6813 else if (base_category == coding_category_utf_16_auto)
6814 {
6815 if (detect_coding_utf_16 (&coding, &detect_info))
6816 {
6817 enum coding_category category;
6818 struct coding_system *this;
6819
6820 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6821 this = coding_categories + coding_category_utf_16_le;
6822 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6823 this = coding_categories + coding_category_utf_16_be;
6824 else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
6825 this = coding_categories + coding_category_utf_16_be_nosig;
6826 else
6827 this = coding_categories + coding_category_utf_16_le_nosig;
6828 val = Fcons (make_number (this->id), Qnil);
6829 }
6830 }
6870 else 6831 else
6871 { 6832 {
6872 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs)); 6833 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
@@ -6969,7 +6930,7 @@ highest priority. */)
6969 move_gap_both (to, to_byte); 6930 move_gap_both (to, to_byte);
6970 6931
6971 return detect_coding_system (BYTE_POS_ADDR (from_byte), 6932 return detect_coding_system (BYTE_POS_ADDR (from_byte),
6972 to_byte - from_byte, 6933 to - from, to_byte - from_byte,
6973 !NILP (highest), 6934 !NILP (highest),
6974 !NILP (current_buffer 6935 !NILP (current_buffer
6975 ->enable_multibyte_characters), 6936 ->enable_multibyte_characters),
@@ -6992,7 +6953,8 @@ highest priority. */)
6992{ 6953{
6993 CHECK_STRING (string); 6954 CHECK_STRING (string);
6994 6955
6995 return detect_coding_system (SDATA (string), SBYTES (string), 6956 return detect_coding_system (SDATA (string),
6957 SCHARS (string), SBYTES (string),
6996 !NILP (highest), STRING_MULTIBYTE (string), 6958 !NILP (highest), STRING_MULTIBYTE (string),
6997 Qnil); 6959 Qnil);
6998} 6960}
@@ -8617,7 +8579,6 @@ init_coding_once ()
8617 iso_code_class[i] = ISO_graphic_plane_1; 8579 iso_code_class[i] = ISO_graphic_plane_1;
8618 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F; 8580 iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
8619 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF; 8581 iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
8620 iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
8621 iso_code_class[ISO_CODE_SO] = ISO_shift_out; 8582 iso_code_class[ISO_CODE_SO] = ISO_shift_out;
8622 iso_code_class[ISO_CODE_SI] = ISO_shift_in; 8583 iso_code_class[ISO_CODE_SI] = ISO_shift_in;
8623 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7; 8584 iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
@@ -8655,11 +8616,13 @@ syms_of_coding ()
8655 staticpro (&Vbig5_coding_system); 8616 staticpro (&Vbig5_coding_system);
8656 Vbig5_coding_system = Qnil; 8617 Vbig5_coding_system = Qnil;
8657 8618
8658 staticpro (&Vcode_conversion_work_buf_list); 8619 staticpro (&Vcode_conversion_reused_workbuf);
8659 Vcode_conversion_work_buf_list = Qnil; 8620 Vcode_conversion_reused_workbuf = Qnil;
8621
8622 staticpro (&Vcode_conversion_workbuf_name);
8623 Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
8660 8624
8661 staticpro (&Vcode_conversion_reused_work_buf); 8625 reused_workbuf_in_use = 0;
8662 Vcode_conversion_reused_work_buf = Qnil;
8663 8626
8664 DEFSYM (Qcharset, "charset"); 8627 DEFSYM (Qcharset, "charset");
8665 DEFSYM (Qtarget_idx, "target-idx"); 8628 DEFSYM (Qtarget_idx, "target-idx");