diff options
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 753 |
1 files changed, 358 insertions, 395 deletions
diff --git a/src/coding.c b/src/coding.c index 76b61e36baa..b62a4133a6b 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -491,7 +491,6 @@ enum iso_code_class_type | |||
| 491 | ISO_control_0, /* Control codes in the range | 491 | ISO_control_0, /* Control codes in the range |
| 492 | 0x00..0x1F and 0x7F, except for the | 492 | 0x00..0x1F and 0x7F, except for the |
| 493 | following 5 codes. */ | 493 | following 5 codes. */ |
| 494 | ISO_carriage_return, /* ISO_CODE_CR (0x0D) */ | ||
| 495 | ISO_shift_out, /* ISO_CODE_SO (0x0E) */ | 494 | ISO_shift_out, /* ISO_CODE_SO (0x0E) */ |
| 496 | ISO_shift_in, /* ISO_CODE_SI (0x0F) */ | 495 | ISO_shift_in, /* ISO_CODE_SI (0x0F) */ |
| 497 | ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */ | 496 | ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */ |
| @@ -710,13 +709,10 @@ static struct coding_system coding_categories[coding_category_max]; | |||
| 710 | #define max(a, b) ((a) > (b) ? (a) : (b)) | 709 | #define max(a, b) ((a) > (b) ? (a) : (b)) |
| 711 | #endif | 710 | #endif |
| 712 | 711 | ||
| 713 | #define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \ | 712 | #define CODING_GET_INFO(coding, attrs, charset_list) \ |
| 714 | do { \ | 713 | do { \ |
| 715 | attrs = CODING_ID_ATTRS (coding->id); \ | 714 | (attrs) = CODING_ID_ATTRS ((coding)->id); \ |
| 716 | eol_type = CODING_ID_EOL_TYPE (coding->id); \ | 715 | (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \ |
| 717 | if (VECTORP (eol_type)) \ | ||
| 718 | eol_type = Qunix; \ | ||
| 719 | charset_list = CODING_ATTR_CHARSET_LIST (attrs); \ | ||
| 720 | } while (0) | 716 | } while (0) |
| 721 | 717 | ||
| 722 | 718 | ||
| @@ -1132,9 +1128,9 @@ decode_coding_utf_8 (coding) | |||
| 1132 | int *charbuf_end = charbuf + coding->charbuf_size; | 1128 | int *charbuf_end = charbuf + coding->charbuf_size; |
| 1133 | int consumed_chars = 0, consumed_chars_base; | 1129 | int consumed_chars = 0, consumed_chars_base; |
| 1134 | int multibytep = coding->src_multibyte; | 1130 | int multibytep = coding->src_multibyte; |
| 1135 | Lisp_Object attr, eol_type, charset_list; | 1131 | Lisp_Object attr, charset_list; |
| 1136 | 1132 | ||
| 1137 | CODING_GET_INFO (coding, attr, eol_type, charset_list); | 1133 | CODING_GET_INFO (coding, attr, charset_list); |
| 1138 | 1134 | ||
| 1139 | while (1) | 1135 | while (1) |
| 1140 | { | 1136 | { |
| @@ -1150,21 +1146,6 @@ decode_coding_utf_8 (coding) | |||
| 1150 | if (UTF_8_1_OCTET_P(c1)) | 1146 | if (UTF_8_1_OCTET_P(c1)) |
| 1151 | { | 1147 | { |
| 1152 | c = c1; | 1148 | c = c1; |
| 1153 | if (c == '\r') | ||
| 1154 | { | ||
| 1155 | if (EQ (eol_type, Qdos)) | ||
| 1156 | { | ||
| 1157 | if (src == src_end) | ||
| 1158 | { | ||
| 1159 | coding->result = CODING_RESULT_INSUFFICIENT_SRC; | ||
| 1160 | goto no_more_source; | ||
| 1161 | } | ||
| 1162 | if (*src == '\n') | ||
| 1163 | ONE_MORE_BYTE (c); | ||
| 1164 | } | ||
| 1165 | else if (EQ (eol_type, Qmac)) | ||
| 1166 | c = '\n'; | ||
| 1167 | } | ||
| 1168 | } | 1149 | } |
| 1169 | else | 1150 | else |
| 1170 | { | 1151 | { |
| @@ -1325,27 +1306,52 @@ detect_coding_utf_16 (coding, detect_info) | |||
| 1325 | int c1, c2; | 1306 | int c1, c2; |
| 1326 | 1307 | ||
| 1327 | detect_info->checked |= CATEGORY_MASK_UTF_16; | 1308 | detect_info->checked |= CATEGORY_MASK_UTF_16; |
| 1328 | |||
| 1329 | if (coding->mode & CODING_MODE_LAST_BLOCK | 1309 | if (coding->mode & CODING_MODE_LAST_BLOCK |
| 1330 | && (coding->src_bytes & 1)) | 1310 | && (coding->src_chars & 1)) |
| 1331 | { | 1311 | { |
| 1332 | detect_info->rejected |= CATEGORY_MASK_UTF_16; | 1312 | detect_info->rejected |= CATEGORY_MASK_UTF_16; |
| 1333 | return 0; | 1313 | return 0; |
| 1334 | } | 1314 | } |
| 1315 | |||
| 1335 | ONE_MORE_BYTE (c1); | 1316 | ONE_MORE_BYTE (c1); |
| 1336 | ONE_MORE_BYTE (c2); | 1317 | ONE_MORE_BYTE (c2); |
| 1337 | |||
| 1338 | if ((c1 == 0xFF) && (c2 == 0xFE)) | 1318 | if ((c1 == 0xFF) && (c2 == 0xFE)) |
| 1339 | { | 1319 | { |
| 1340 | detect_info->found |= (CATEGORY_MASK_UTF_16_LE | 1320 | detect_info->found |= (CATEGORY_MASK_UTF_16_LE |
| 1341 | | CATEGORY_MASK_UTF_16_AUTO); | 1321 | | CATEGORY_MASK_UTF_16_AUTO); |
| 1342 | detect_info->rejected |= CATEGORY_MASK_UTF_16_BE; | 1322 | detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE |
| 1323 | | CATEGORY_MASK_UTF_16_BE_NOSIG | ||
| 1324 | | CATEGORY_MASK_UTF_16_LE_NOSIG); | ||
| 1343 | } | 1325 | } |
| 1344 | else if ((c1 == 0xFE) && (c2 == 0xFF)) | 1326 | else if ((c1 == 0xFE) && (c2 == 0xFF)) |
| 1345 | { | 1327 | { |
| 1346 | detect_info->found |= (CATEGORY_MASK_UTF_16_BE | 1328 | detect_info->found |= (CATEGORY_MASK_UTF_16_BE |
| 1347 | | CATEGORY_MASK_UTF_16_AUTO); | 1329 | | CATEGORY_MASK_UTF_16_AUTO); |
| 1348 | detect_info->rejected |= CATEGORY_MASK_UTF_16_LE; | 1330 | detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE |
| 1331 | | CATEGORY_MASK_UTF_16_BE_NOSIG | ||
| 1332 | | CATEGORY_MASK_UTF_16_LE_NOSIG); | ||
| 1333 | } | ||
| 1334 | else | ||
| 1335 | { | ||
| 1336 | unsigned char b1[256], b2[256]; | ||
| 1337 | int b1_variants = 1, b2_variants = 1; | ||
| 1338 | int n; | ||
| 1339 | |||
| 1340 | bzero (b1, 256), bzero (b2, 256); | ||
| 1341 | b1[c1]++, b2[c2]++; | ||
| 1342 | for (n = 0; n < 256 && src < src_end; n++) | ||
| 1343 | { | ||
| 1344 | ONE_MORE_BYTE (c1); | ||
| 1345 | ONE_MORE_BYTE (c2); | ||
| 1346 | if (! b1[c1++]) b1_variants++; | ||
| 1347 | if (! b2[c2++]) b2_variants++; | ||
| 1348 | } | ||
| 1349 | if (b1_variants < b2_variants) | ||
| 1350 | detect_info->found |= CATEGORY_MASK_UTF_16_BE_NOSIG; | ||
| 1351 | else | ||
| 1352 | detect_info->found |= CATEGORY_MASK_UTF_16_LE_NOSIG; | ||
| 1353 | detect_info->rejected | ||
| 1354 | |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE); | ||
| 1349 | } | 1355 | } |
| 1350 | no_more_source: | 1356 | no_more_source: |
| 1351 | return 1; | 1357 | return 1; |
| @@ -1365,9 +1371,9 @@ decode_coding_utf_16 (coding) | |||
| 1365 | enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding); | 1371 | enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding); |
| 1366 | enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding); | 1372 | enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding); |
| 1367 | int surrogate = CODING_UTF_16_SURROGATE (coding); | 1373 | int surrogate = CODING_UTF_16_SURROGATE (coding); |
| 1368 | Lisp_Object attr, eol_type, charset_list; | 1374 | Lisp_Object attr, charset_list; |
| 1369 | 1375 | ||
| 1370 | CODING_GET_INFO (coding, attr, eol_type, charset_list); | 1376 | CODING_GET_INFO (coding, attr, charset_list); |
| 1371 | 1377 | ||
| 1372 | if (bom == utf_16_with_bom) | 1378 | if (bom == utf_16_with_bom) |
| 1373 | { | 1379 | { |
| @@ -1460,10 +1466,10 @@ encode_coding_utf_16 (coding) | |||
| 1460 | enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding); | 1466 | enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding); |
| 1461 | int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian; | 1467 | int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian; |
| 1462 | int produced_chars = 0; | 1468 | int produced_chars = 0; |
| 1463 | Lisp_Object attrs, eol_type, charset_list; | 1469 | Lisp_Object attrs, charset_list; |
| 1464 | int c; | 1470 | int c; |
| 1465 | 1471 | ||
| 1466 | CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 1472 | CODING_GET_INFO (coding, attrs, charset_list); |
| 1467 | 1473 | ||
| 1468 | if (bom != utf_16_without_bom) | 1474 | if (bom != utf_16_without_bom) |
| 1469 | { | 1475 | { |
| @@ -1928,12 +1934,12 @@ decode_coding_emacs_mule (coding) | |||
| 1928 | int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; | 1934 | int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; |
| 1929 | int consumed_chars = 0, consumed_chars_base; | 1935 | int consumed_chars = 0, consumed_chars_base; |
| 1930 | int multibytep = coding->src_multibyte; | 1936 | int multibytep = coding->src_multibyte; |
| 1931 | Lisp_Object attrs, eol_type, charset_list; | 1937 | Lisp_Object attrs, charset_list; |
| 1932 | int char_offset = coding->produced_char; | 1938 | int char_offset = coding->produced_char; |
| 1933 | int last_offset = char_offset; | 1939 | int last_offset = char_offset; |
| 1934 | int last_id = charset_ascii; | 1940 | int last_id = charset_ascii; |
| 1935 | 1941 | ||
| 1936 | CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 1942 | CODING_GET_INFO (coding, attrs, charset_list); |
| 1937 | 1943 | ||
| 1938 | while (1) | 1944 | while (1) |
| 1939 | { | 1945 | { |
| @@ -1949,21 +1955,6 @@ decode_coding_emacs_mule (coding) | |||
| 1949 | 1955 | ||
| 1950 | if (c < 0x80) | 1956 | if (c < 0x80) |
| 1951 | { | 1957 | { |
| 1952 | if (c == '\r') | ||
| 1953 | { | ||
| 1954 | if (EQ (eol_type, Qdos)) | ||
| 1955 | { | ||
| 1956 | if (src == src_end) | ||
| 1957 | { | ||
| 1958 | coding->result = CODING_RESULT_INSUFFICIENT_SRC; | ||
| 1959 | goto no_more_source; | ||
| 1960 | } | ||
| 1961 | if (*src == '\n') | ||
| 1962 | ONE_MORE_BYTE (c); | ||
| 1963 | } | ||
| 1964 | else if (EQ (eol_type, Qmac)) | ||
| 1965 | c = '\n'; | ||
| 1966 | } | ||
| 1967 | *charbuf++ = c; | 1958 | *charbuf++ = c; |
| 1968 | char_offset++; | 1959 | char_offset++; |
| 1969 | } | 1960 | } |
| @@ -2052,11 +2043,11 @@ encode_coding_emacs_mule (coding) | |||
| 2052 | unsigned char *dst_end = coding->destination + coding->dst_bytes; | 2043 | unsigned char *dst_end = coding->destination + coding->dst_bytes; |
| 2053 | int safe_room = 8; | 2044 | int safe_room = 8; |
| 2054 | int produced_chars = 0; | 2045 | int produced_chars = 0; |
| 2055 | Lisp_Object attrs, eol_type, charset_list; | 2046 | Lisp_Object attrs, charset_list; |
| 2056 | int c; | 2047 | int c; |
| 2057 | int preferred_charset_id = -1; | 2048 | int preferred_charset_id = -1; |
| 2058 | 2049 | ||
| 2059 | CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 2050 | CODING_GET_INFO (coding, attrs, charset_list); |
| 2060 | if (! EQ (charset_list, Vemacs_mule_charset_list)) | 2051 | if (! EQ (charset_list, Vemacs_mule_charset_list)) |
| 2061 | { | 2052 | { |
| 2062 | CODING_ATTR_CHARSET_LIST (attrs) | 2053 | CODING_ATTR_CHARSET_LIST (attrs) |
| @@ -2806,12 +2797,12 @@ decode_coding_iso_2022 (coding) | |||
| 2806 | int components[MAX_COMPOSITION_COMPONENTS * 2 + 1]; | 2797 | int components[MAX_COMPOSITION_COMPONENTS * 2 + 1]; |
| 2807 | int component_idx; | 2798 | int component_idx; |
| 2808 | int component_len; | 2799 | int component_len; |
| 2809 | Lisp_Object attrs, eol_type, charset_list; | 2800 | Lisp_Object attrs, charset_list; |
| 2810 | int char_offset = coding->produced_char; | 2801 | int char_offset = coding->produced_char; |
| 2811 | int last_offset = char_offset; | 2802 | int last_offset = char_offset; |
| 2812 | int last_id = charset_ascii; | 2803 | int last_id = charset_ascii; |
| 2813 | 2804 | ||
| 2814 | CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 2805 | CODING_GET_INFO (coding, attrs, charset_list); |
| 2815 | setup_iso_safe_charsets (attrs); | 2806 | setup_iso_safe_charsets (attrs); |
| 2816 | 2807 | ||
| 2817 | while (1) | 2808 | while (1) |
| @@ -2877,24 +2868,6 @@ decode_coding_iso_2022 (coding) | |||
| 2877 | charset = CHARSET_FROM_ID (charset_id_1); | 2868 | charset = CHARSET_FROM_ID (charset_id_1); |
| 2878 | break; | 2869 | break; |
| 2879 | 2870 | ||
| 2880 | case ISO_carriage_return: | ||
| 2881 | if (c1 == '\r') | ||
| 2882 | { | ||
| 2883 | if (EQ (eol_type, Qdos)) | ||
| 2884 | { | ||
| 2885 | if (src == src_end) | ||
| 2886 | { | ||
| 2887 | coding->result = CODING_RESULT_INSUFFICIENT_SRC; | ||
| 2888 | goto no_more_source; | ||
| 2889 | } | ||
| 2890 | if (*src == '\n') | ||
| 2891 | ONE_MORE_BYTE (c1); | ||
| 2892 | } | ||
| 2893 | else if (EQ (eol_type, Qmac)) | ||
| 2894 | c1 = '\n'; | ||
| 2895 | } | ||
| 2896 | /* fall through */ | ||
| 2897 | |||
| 2898 | case ISO_control_0: | 2871 | case ISO_control_0: |
| 2899 | MAYBE_FINISH_COMPOSITION (); | 2872 | MAYBE_FINISH_COMPOSITION (); |
| 2900 | charset = CHARSET_FROM_ID (charset_ascii); | 2873 | charset = CHARSET_FROM_ID (charset_ascii); |
| @@ -3648,7 +3621,11 @@ encode_coding_iso_2022 (coding) | |||
| 3648 | int c; | 3621 | int c; |
| 3649 | int preferred_charset_id = -1; | 3622 | int preferred_charset_id = -1; |
| 3650 | 3623 | ||
| 3651 | CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 3624 | CODING_GET_INFO (coding, attrs, charset_list); |
| 3625 | eol_type = CODING_ID_EOL_TYPE (coding->id); | ||
| 3626 | if (VECTORP (eol_type)) | ||
| 3627 | eol_type = Qunix; | ||
| 3628 | |||
| 3652 | setup_iso_safe_charsets (attrs); | 3629 | setup_iso_safe_charsets (attrs); |
| 3653 | /* Charset list may have been changed. */ | 3630 | /* Charset list may have been changed. */ |
| 3654 | charset_list = CODING_ATTR_CHARSET_LIST (attrs); \ | 3631 | charset_list = CODING_ATTR_CHARSET_LIST (attrs); \ |
| @@ -3931,12 +3908,12 @@ decode_coding_sjis (coding) | |||
| 3931 | int consumed_chars = 0, consumed_chars_base; | 3908 | int consumed_chars = 0, consumed_chars_base; |
| 3932 | int multibytep = coding->src_multibyte; | 3909 | int multibytep = coding->src_multibyte; |
| 3933 | struct charset *charset_roman, *charset_kanji, *charset_kana; | 3910 | struct charset *charset_roman, *charset_kanji, *charset_kana; |
| 3934 | Lisp_Object attrs, eol_type, charset_list, val; | 3911 | Lisp_Object attrs, charset_list, val; |
| 3935 | int char_offset = coding->produced_char; | 3912 | int char_offset = coding->produced_char; |
| 3936 | int last_offset = char_offset; | 3913 | int last_offset = char_offset; |
| 3937 | int last_id = charset_ascii; | 3914 | int last_id = charset_ascii; |
| 3938 | 3915 | ||
| 3939 | CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 3916 | CODING_GET_INFO (coding, attrs, charset_list); |
| 3940 | 3917 | ||
| 3941 | val = charset_list; | 3918 | val = charset_list; |
| 3942 | charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); | 3919 | charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); |
| @@ -3946,6 +3923,7 @@ decode_coding_sjis (coding) | |||
| 3946 | while (1) | 3923 | while (1) |
| 3947 | { | 3924 | { |
| 3948 | int c, c1; | 3925 | int c, c1; |
| 3926 | struct charset *charset; | ||
| 3949 | 3927 | ||
| 3950 | src_base = src; | 3928 | src_base = src; |
| 3951 | consumed_chars_base = consumed_chars; | 3929 | consumed_chars_base = consumed_chars; |
| @@ -3955,60 +3933,40 @@ decode_coding_sjis (coding) | |||
| 3955 | 3933 | ||
| 3956 | ONE_MORE_BYTE (c); | 3934 | ONE_MORE_BYTE (c); |
| 3957 | 3935 | ||
| 3958 | if (c == '\r') | 3936 | if (c < 0x80) |
| 3959 | { | 3937 | charset = charset_roman; |
| 3960 | if (EQ (eol_type, Qdos)) | ||
| 3961 | { | ||
| 3962 | if (src == src_end) | ||
| 3963 | { | ||
| 3964 | coding->result = CODING_RESULT_INSUFFICIENT_SRC; | ||
| 3965 | goto no_more_source; | ||
| 3966 | } | ||
| 3967 | if (*src == '\n') | ||
| 3968 | ONE_MORE_BYTE (c); | ||
| 3969 | } | ||
| 3970 | else if (EQ (eol_type, Qmac)) | ||
| 3971 | c = '\n'; | ||
| 3972 | } | ||
| 3973 | else | 3938 | else |
| 3974 | { | 3939 | { |
| 3975 | struct charset *charset; | 3940 | if (c >= 0xF0) |
| 3976 | 3941 | goto invalid_code; | |
| 3977 | if (c < 0x80) | 3942 | if (c < 0xA0 || c >= 0xE0) |
| 3978 | charset = charset_roman; | ||
| 3979 | else | ||
| 3980 | { | 3943 | { |
| 3981 | if (c >= 0xF0) | 3944 | /* SJIS -> JISX0208 */ |
| 3982 | goto invalid_code; | 3945 | ONE_MORE_BYTE (c1); |
| 3983 | if (c < 0xA0 || c >= 0xE0) | 3946 | if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC) |
| 3984 | { | ||
| 3985 | /* SJIS -> JISX0208 */ | ||
| 3986 | ONE_MORE_BYTE (c1); | ||
| 3987 | if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC) | ||
| 3988 | goto invalid_code; | ||
| 3989 | c = (c << 8) | c1; | ||
| 3990 | SJIS_TO_JIS (c); | ||
| 3991 | charset = charset_kanji; | ||
| 3992 | } | ||
| 3993 | else if (c > 0xA0) | ||
| 3994 | { | ||
| 3995 | /* SJIS -> JISX0201-Kana */ | ||
| 3996 | c &= 0x7F; | ||
| 3997 | charset = charset_kana; | ||
| 3998 | } | ||
| 3999 | else | ||
| 4000 | goto invalid_code; | 3947 | goto invalid_code; |
| 3948 | c = (c << 8) | c1; | ||
| 3949 | SJIS_TO_JIS (c); | ||
| 3950 | charset = charset_kanji; | ||
| 4001 | } | 3951 | } |
| 4002 | if (charset->id != charset_ascii | 3952 | else if (c > 0xA0) |
| 4003 | && last_id != charset->id) | ||
| 4004 | { | 3953 | { |
| 4005 | if (last_id != charset_ascii) | 3954 | /* SJIS -> JISX0201-Kana */ |
| 4006 | ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); | 3955 | c &= 0x7F; |
| 4007 | last_id = charset->id; | 3956 | charset = charset_kana; |
| 4008 | last_offset = char_offset; | ||
| 4009 | } | 3957 | } |
| 4010 | CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c); | 3958 | else |
| 3959 | goto invalid_code; | ||
| 4011 | } | 3960 | } |
| 3961 | if (charset->id != charset_ascii | ||
| 3962 | && last_id != charset->id) | ||
| 3963 | { | ||
| 3964 | if (last_id != charset_ascii) | ||
| 3965 | ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); | ||
| 3966 | last_id = charset->id; | ||
| 3967 | last_offset = char_offset; | ||
| 3968 | } | ||
| 3969 | CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c); | ||
| 4012 | *charbuf++ = c; | 3970 | *charbuf++ = c; |
| 4013 | char_offset++; | 3971 | char_offset++; |
| 4014 | continue; | 3972 | continue; |
| @@ -4042,12 +4000,12 @@ decode_coding_big5 (coding) | |||
| 4042 | int consumed_chars = 0, consumed_chars_base; | 4000 | int consumed_chars = 0, consumed_chars_base; |
| 4043 | int multibytep = coding->src_multibyte; | 4001 | int multibytep = coding->src_multibyte; |
| 4044 | struct charset *charset_roman, *charset_big5; | 4002 | struct charset *charset_roman, *charset_big5; |
| 4045 | Lisp_Object attrs, eol_type, charset_list, val; | 4003 | Lisp_Object attrs, charset_list, val; |
| 4046 | int char_offset = coding->produced_char; | 4004 | int char_offset = coding->produced_char; |
| 4047 | int last_offset = char_offset; | 4005 | int last_offset = char_offset; |
| 4048 | int last_id = charset_ascii; | 4006 | int last_id = charset_ascii; |
| 4049 | 4007 | ||
| 4050 | CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 4008 | CODING_GET_INFO (coding, attrs, charset_list); |
| 4051 | val = charset_list; | 4009 | val = charset_list; |
| 4052 | charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); | 4010 | charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); |
| 4053 | charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val))); | 4011 | charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val))); |
| @@ -4055,6 +4013,7 @@ decode_coding_big5 (coding) | |||
| 4055 | while (1) | 4013 | while (1) |
| 4056 | { | 4014 | { |
| 4057 | int c, c1; | 4015 | int c, c1; |
| 4016 | struct charset *charset; | ||
| 4058 | 4017 | ||
| 4059 | src_base = src; | 4018 | src_base = src; |
| 4060 | consumed_chars_base = consumed_chars; | 4019 | consumed_chars_base = consumed_chars; |
| @@ -4064,48 +4023,28 @@ decode_coding_big5 (coding) | |||
| 4064 | 4023 | ||
| 4065 | ONE_MORE_BYTE (c); | 4024 | ONE_MORE_BYTE (c); |
| 4066 | 4025 | ||
| 4067 | if (c == '\r') | 4026 | if (c < 0x80) |
| 4027 | charset = charset_roman; | ||
| 4028 | else | ||
| 4068 | { | 4029 | { |
| 4069 | if (EQ (eol_type, Qdos)) | 4030 | /* BIG5 -> Big5 */ |
| 4070 | { | 4031 | if (c < 0xA1 || c > 0xFE) |
| 4071 | if (src == src_end) | 4032 | goto invalid_code; |
| 4072 | { | 4033 | ONE_MORE_BYTE (c1); |
| 4073 | coding->result = CODING_RESULT_INSUFFICIENT_SRC; | 4034 | if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE) |
| 4074 | goto no_more_source; | 4035 | goto invalid_code; |
| 4075 | } | 4036 | c = c << 8 | c1; |
| 4076 | if (*src == '\n') | 4037 | charset = charset_big5; |
| 4077 | ONE_MORE_BYTE (c); | ||
| 4078 | } | ||
| 4079 | else if (EQ (eol_type, Qmac)) | ||
| 4080 | c = '\n'; | ||
| 4081 | } | 4038 | } |
| 4082 | else | 4039 | if (charset->id != charset_ascii |
| 4040 | && last_id != charset->id) | ||
| 4083 | { | 4041 | { |
| 4084 | struct charset *charset; | 4042 | if (last_id != charset_ascii) |
| 4085 | if (c < 0x80) | 4043 | ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); |
| 4086 | charset = charset_roman; | 4044 | last_id = charset->id; |
| 4087 | else | 4045 | last_offset = char_offset; |
| 4088 | { | ||
| 4089 | /* BIG5 -> Big5 */ | ||
| 4090 | if (c < 0xA1 || c > 0xFE) | ||
| 4091 | goto invalid_code; | ||
| 4092 | ONE_MORE_BYTE (c1); | ||
| 4093 | if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE) | ||
| 4094 | goto invalid_code; | ||
| 4095 | c = c << 8 | c1; | ||
| 4096 | charset = charset_big5; | ||
| 4097 | } | ||
| 4098 | if (charset->id != charset_ascii | ||
| 4099 | && last_id != charset->id) | ||
| 4100 | { | ||
| 4101 | if (last_id != charset_ascii) | ||
| 4102 | ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); | ||
| 4103 | last_id = charset->id; | ||
| 4104 | last_offset = char_offset; | ||
| 4105 | } | ||
| 4106 | CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c); | ||
| 4107 | } | 4046 | } |
| 4108 | 4047 | CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c); | |
| 4109 | *charbuf++ = c; | 4048 | *charbuf++ = c; |
| 4110 | char_offset++; | 4049 | char_offset++; |
| 4111 | continue; | 4050 | continue; |
| @@ -4146,12 +4085,12 @@ encode_coding_sjis (coding) | |||
| 4146 | unsigned char *dst_end = coding->destination + coding->dst_bytes; | 4085 | unsigned char *dst_end = coding->destination + coding->dst_bytes; |
| 4147 | int safe_room = 4; | 4086 | int safe_room = 4; |
| 4148 | int produced_chars = 0; | 4087 | int produced_chars = 0; |
| 4149 | Lisp_Object attrs, eol_type, charset_list, val; | 4088 | Lisp_Object attrs, charset_list, val; |
| 4150 | int ascii_compatible; | 4089 | int ascii_compatible; |
| 4151 | struct charset *charset_roman, *charset_kanji, *charset_kana; | 4090 | struct charset *charset_roman, *charset_kanji, *charset_kana; |
| 4152 | int c; | 4091 | int c; |
| 4153 | 4092 | ||
| 4154 | CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 4093 | CODING_GET_INFO (coding, attrs, charset_list); |
| 4155 | val = charset_list; | 4094 | val = charset_list; |
| 4156 | charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); | 4095 | charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); |
| 4157 | charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); | 4096 | charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); |
| @@ -4221,12 +4160,12 @@ encode_coding_big5 (coding) | |||
| 4221 | unsigned char *dst_end = coding->destination + coding->dst_bytes; | 4160 | unsigned char *dst_end = coding->destination + coding->dst_bytes; |
| 4222 | int safe_room = 4; | 4161 | int safe_room = 4; |
| 4223 | int produced_chars = 0; | 4162 | int produced_chars = 0; |
| 4224 | Lisp_Object attrs, eol_type, charset_list, val; | 4163 | Lisp_Object attrs, charset_list, val; |
| 4225 | int ascii_compatible; | 4164 | int ascii_compatible; |
| 4226 | struct charset *charset_roman, *charset_big5; | 4165 | struct charset *charset_roman, *charset_big5; |
| 4227 | int c; | 4166 | int c; |
| 4228 | 4167 | ||
| 4229 | CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 4168 | CODING_GET_INFO (coding, attrs, charset_list); |
| 4230 | val = charset_list; | 4169 | val = charset_list; |
| 4231 | charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); | 4170 | charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); |
| 4232 | charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val))); | 4171 | charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val))); |
| @@ -4340,9 +4279,9 @@ decode_coding_ccl (coding) | |||
| 4340 | struct ccl_program ccl; | 4279 | struct ccl_program ccl; |
| 4341 | int source_charbuf[1024]; | 4280 | int source_charbuf[1024]; |
| 4342 | int source_byteidx[1024]; | 4281 | int source_byteidx[1024]; |
| 4343 | Lisp_Object attrs, eol_type, charset_list; | 4282 | Lisp_Object attrs, charset_list; |
| 4344 | 4283 | ||
| 4345 | CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 4284 | CODING_GET_INFO (coding, attrs, charset_list); |
| 4346 | setup_ccl_program (&ccl, CODING_CCL_DECODER (coding)); | 4285 | setup_ccl_program (&ccl, CODING_CCL_DECODER (coding)); |
| 4347 | 4286 | ||
| 4348 | while (src < src_end) | 4287 | while (src < src_end) |
| @@ -4420,9 +4359,9 @@ encode_coding_ccl (coding) | |||
| 4420 | unsigned char *adjusted_dst_end = dst_end - 1; | 4359 | unsigned char *adjusted_dst_end = dst_end - 1; |
| 4421 | int destination_charbuf[1024]; | 4360 | int destination_charbuf[1024]; |
| 4422 | int i, produced_chars = 0; | 4361 | int i, produced_chars = 0; |
| 4423 | Lisp_Object attrs, eol_type, charset_list; | 4362 | Lisp_Object attrs, charset_list; |
| 4424 | 4363 | ||
| 4425 | CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 4364 | CODING_GET_INFO (coding, attrs, charset_list); |
| 4426 | setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding)); | 4365 | setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding)); |
| 4427 | 4366 | ||
| 4428 | ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK; | 4367 | ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK; |
| @@ -4621,17 +4560,22 @@ decode_coding_charset (coding) | |||
| 4621 | int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; | 4560 | int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; |
| 4622 | int consumed_chars = 0, consumed_chars_base; | 4561 | int consumed_chars = 0, consumed_chars_base; |
| 4623 | int multibytep = coding->src_multibyte; | 4562 | int multibytep = coding->src_multibyte; |
| 4624 | Lisp_Object attrs, eol_type, charset_list, valids; | 4563 | Lisp_Object attrs, charset_list, valids; |
| 4625 | int char_offset = coding->produced_char; | 4564 | int char_offset = coding->produced_char; |
| 4626 | int last_offset = char_offset; | 4565 | int last_offset = char_offset; |
| 4627 | int last_id = charset_ascii; | 4566 | int last_id = charset_ascii; |
| 4628 | 4567 | ||
| 4629 | CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 4568 | CODING_GET_INFO (coding, attrs, charset_list); |
| 4630 | valids = AREF (attrs, coding_attr_charset_valids); | 4569 | valids = AREF (attrs, coding_attr_charset_valids); |
| 4631 | 4570 | ||
| 4632 | while (1) | 4571 | while (1) |
| 4633 | { | 4572 | { |
| 4634 | int c; | 4573 | int c; |
| 4574 | Lisp_Object val; | ||
| 4575 | struct charset *charset; | ||
| 4576 | int dim; | ||
| 4577 | int len = 1; | ||
| 4578 | unsigned code; | ||
| 4635 | 4579 | ||
| 4636 | src_base = src; | 4580 | src_base = src; |
| 4637 | consumed_chars_base = consumed_chars; | 4581 | consumed_chars_base = consumed_chars; |
| @@ -4640,37 +4584,32 @@ decode_coding_charset (coding) | |||
| 4640 | break; | 4584 | break; |
| 4641 | 4585 | ||
| 4642 | ONE_MORE_BYTE (c); | 4586 | ONE_MORE_BYTE (c); |
| 4643 | if (c == '\r') | 4587 | code = c; |
| 4588 | |||
| 4589 | val = AREF (valids, c); | ||
| 4590 | if (NILP (val)) | ||
| 4591 | goto invalid_code; | ||
| 4592 | if (INTEGERP (val)) | ||
| 4644 | { | 4593 | { |
| 4645 | /* Here we assume that no charset maps '\r' to something | 4594 | charset = CHARSET_FROM_ID (XFASTINT (val)); |
| 4646 | else. */ | 4595 | dim = CHARSET_DIMENSION (charset); |
| 4647 | if (EQ (eol_type, Qdos)) | 4596 | while (len < dim) |
| 4648 | { | 4597 | { |
| 4649 | if (src == src_end) | 4598 | ONE_MORE_BYTE (c); |
| 4650 | { | 4599 | code = (code << 8) | c; |
| 4651 | coding->result = CODING_RESULT_INSUFFICIENT_SRC; | 4600 | len++; |
| 4652 | goto no_more_source; | ||
| 4653 | } | ||
| 4654 | if (*src == '\n') | ||
| 4655 | ONE_MORE_BYTE (c); | ||
| 4656 | } | 4601 | } |
| 4657 | else if (EQ (eol_type, Qmac)) | 4602 | CODING_DECODE_CHAR (coding, src, src_base, src_end, |
| 4658 | c = '\n'; | 4603 | charset, code, c); |
| 4659 | } | 4604 | } |
| 4660 | else | 4605 | else |
| 4661 | { | 4606 | { |
| 4662 | Lisp_Object val; | 4607 | /* VAL is a list of charset IDs. It is assured that the |
| 4663 | struct charset *charset; | 4608 | list is sorted by charset dimensions (smaller one |
| 4664 | int dim; | 4609 | comes first). */ |
| 4665 | int len = 1; | 4610 | while (CONSP (val)) |
| 4666 | unsigned code = c; | ||
| 4667 | |||
| 4668 | val = AREF (valids, c); | ||
| 4669 | if (NILP (val)) | ||
| 4670 | goto invalid_code; | ||
| 4671 | if (INTEGERP (val)) | ||
| 4672 | { | 4611 | { |
| 4673 | charset = CHARSET_FROM_ID (XFASTINT (val)); | 4612 | charset = CHARSET_FROM_ID (XFASTINT (XCAR (val))); |
| 4674 | dim = CHARSET_DIMENSION (charset); | 4613 | dim = CHARSET_DIMENSION (charset); |
| 4675 | while (len < dim) | 4614 | while (len < dim) |
| 4676 | { | 4615 | { |
| @@ -4678,42 +4617,24 @@ decode_coding_charset (coding) | |||
| 4678 | code = (code << 8) | c; | 4617 | code = (code << 8) | c; |
| 4679 | len++; | 4618 | len++; |
| 4680 | } | 4619 | } |
| 4681 | CODING_DECODE_CHAR (coding, src, src_base, src_end, | 4620 | CODING_DECODE_CHAR (coding, src, src_base, |
| 4682 | charset, code, c); | 4621 | src_end, charset, code, c); |
| 4683 | } | 4622 | if (c >= 0) |
| 4684 | else | 4623 | break; |
| 4685 | { | 4624 | val = XCDR (val); |
| 4686 | /* VAL is a list of charset IDs. It is assured that the | ||
| 4687 | list is sorted by charset dimensions (smaller one | ||
| 4688 | comes first). */ | ||
| 4689 | while (CONSP (val)) | ||
| 4690 | { | ||
| 4691 | charset = CHARSET_FROM_ID (XFASTINT (XCAR (val))); | ||
| 4692 | dim = CHARSET_DIMENSION (charset); | ||
| 4693 | while (len < dim) | ||
| 4694 | { | ||
| 4695 | ONE_MORE_BYTE (c); | ||
| 4696 | code = (code << 8) | c; | ||
| 4697 | len++; | ||
| 4698 | } | ||
| 4699 | CODING_DECODE_CHAR (coding, src, src_base, | ||
| 4700 | src_end, charset, code, c); | ||
| 4701 | if (c >= 0) | ||
| 4702 | break; | ||
| 4703 | val = XCDR (val); | ||
| 4704 | } | ||
| 4705 | } | ||
| 4706 | if (c < 0) | ||
| 4707 | goto invalid_code; | ||
| 4708 | if (charset->id != charset_ascii | ||
| 4709 | && last_id != charset->id) | ||
| 4710 | { | ||
| 4711 | if (last_id != charset_ascii) | ||
| 4712 | ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); | ||
| 4713 | last_id = charset->id; | ||
| 4714 | last_offset = char_offset; | ||
| 4715 | } | 4625 | } |
| 4716 | } | 4626 | } |
| 4627 | if (c < 0) | ||
| 4628 | goto invalid_code; | ||
| 4629 | if (charset->id != charset_ascii | ||
| 4630 | && last_id != charset->id) | ||
| 4631 | { | ||
| 4632 | if (last_id != charset_ascii) | ||
| 4633 | ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); | ||
| 4634 | last_id = charset->id; | ||
| 4635 | last_offset = char_offset; | ||
| 4636 | } | ||
| 4637 | |||
| 4717 | *charbuf++ = c; | 4638 | *charbuf++ = c; |
| 4718 | char_offset++; | 4639 | char_offset++; |
| 4719 | continue; | 4640 | continue; |
| @@ -4746,11 +4667,11 @@ encode_coding_charset (coding) | |||
| 4746 | unsigned char *dst_end = coding->destination + coding->dst_bytes; | 4667 | unsigned char *dst_end = coding->destination + coding->dst_bytes; |
| 4747 | int safe_room = MAX_MULTIBYTE_LENGTH; | 4668 | int safe_room = MAX_MULTIBYTE_LENGTH; |
| 4748 | int produced_chars = 0; | 4669 | int produced_chars = 0; |
| 4749 | Lisp_Object attrs, eol_type, charset_list; | 4670 | Lisp_Object attrs, charset_list; |
| 4750 | int ascii_compatible; | 4671 | int ascii_compatible; |
| 4751 | int c; | 4672 | int c; |
| 4752 | 4673 | ||
| 4753 | CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 4674 | CODING_GET_INFO (coding, attrs, charset_list); |
| 4754 | ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)); | 4675 | ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)); |
| 4755 | 4676 | ||
| 4756 | while (charbuf < charbuf_end) | 4677 | while (charbuf < charbuf_end) |
| @@ -5250,7 +5171,7 @@ detect_eol (source, src_bytes, category) | |||
| 5250 | } | 5171 | } |
| 5251 | 5172 | ||
| 5252 | 5173 | ||
| 5253 | static void | 5174 | static Lisp_Object |
| 5254 | adjust_coding_eol_type (coding, eol_seen) | 5175 | adjust_coding_eol_type (coding, eol_seen) |
| 5255 | struct coding_system *coding; | 5176 | struct coding_system *coding; |
| 5256 | int eol_seen; | 5177 | int eol_seen; |
| @@ -5259,11 +5180,21 @@ adjust_coding_eol_type (coding, eol_seen) | |||
| 5259 | 5180 | ||
| 5260 | eol_type = CODING_ID_EOL_TYPE (coding->id); | 5181 | eol_type = CODING_ID_EOL_TYPE (coding->id); |
| 5261 | if (eol_seen & EOL_SEEN_LF) | 5182 | if (eol_seen & EOL_SEEN_LF) |
| 5262 | coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0)); | 5183 | { |
| 5184 | coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0)); | ||
| 5185 | eol_type = Qunix; | ||
| 5186 | } | ||
| 5263 | else if (eol_seen & EOL_SEEN_CRLF) | 5187 | else if (eol_seen & EOL_SEEN_CRLF) |
| 5264 | coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1)); | 5188 | { |
| 5189 | coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1)); | ||
| 5190 | eol_type = Qdos; | ||
| 5191 | } | ||
| 5265 | else if (eol_seen & EOL_SEEN_CR) | 5192 | else if (eol_seen & EOL_SEEN_CR) |
| 5266 | coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2)); | 5193 | { |
| 5194 | coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2)); | ||
| 5195 | eol_type = Qmac; | ||
| 5196 | } | ||
| 5197 | return eol_type; | ||
| 5267 | } | 5198 | } |
| 5268 | 5199 | ||
| 5269 | /* Detect how a text specified in CODING is encoded. If a coding | 5200 | /* Detect how a text specified in CODING is encoded. If a coding |
| @@ -5289,14 +5220,18 @@ detect_coding (coding) | |||
| 5289 | { | 5220 | { |
| 5290 | int c, i; | 5221 | int c, i; |
| 5291 | 5222 | ||
| 5292 | for (src = coding->source; src < src_end; src++) | 5223 | for (i = 0, src = coding->source; src < src_end; i++, src++) |
| 5293 | { | 5224 | { |
| 5294 | c = *src; | 5225 | c = *src; |
| 5295 | if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC | 5226 | if (c & 0x80 || (c < 0x20 && (c == 0 |
| 5227 | || c == ISO_CODE_ESC | ||
| 5296 | || c == ISO_CODE_SI | 5228 | || c == ISO_CODE_SI |
| 5297 | || c == ISO_CODE_SO))) | 5229 | || c == ISO_CODE_SO))) |
| 5298 | break; | 5230 | break; |
| 5299 | } | 5231 | } |
| 5232 | /* Skipped bytes must be even for utf-16 detector. */ | ||
| 5233 | if (i % 2) | ||
| 5234 | src--; | ||
| 5300 | coding->head_ascii = src - (coding->source + coding->consumed); | 5235 | coding->head_ascii = src - (coding->source + coding->consumed); |
| 5301 | 5236 | ||
| 5302 | if (coding->head_ascii < coding->src_bytes) | 5237 | if (coding->head_ascii < coding->src_bytes) |
| @@ -5324,7 +5259,16 @@ detect_coding (coding) | |||
| 5324 | } | 5259 | } |
| 5325 | else if ((*(this->detector)) (coding, &detect_info) | 5260 | else if ((*(this->detector)) (coding, &detect_info) |
| 5326 | && detect_info.found & (1 << category)) | 5261 | && detect_info.found & (1 << category)) |
| 5327 | break; | 5262 | { |
| 5263 | if (category == coding_category_utf_16_auto) | ||
| 5264 | { | ||
| 5265 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) | ||
| 5266 | category = coding_category_utf_16_le; | ||
| 5267 | else | ||
| 5268 | category = coding_category_utf_16_be; | ||
| 5269 | } | ||
| 5270 | break; | ||
| 5271 | } | ||
| 5328 | } | 5272 | } |
| 5329 | if (i < coding_category_raw_text) | 5273 | if (i < coding_category_raw_text) |
| 5330 | setup_coding_system (CODING_ID_NAME (this->id), coding); | 5274 | setup_coding_system (CODING_ID_NAME (this->id), coding); |
| @@ -5340,7 +5284,8 @@ detect_coding (coding) | |||
| 5340 | } | 5284 | } |
| 5341 | } | 5285 | } |
| 5342 | } | 5286 | } |
| 5343 | else if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qutf_16)) | 5287 | else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id))) |
| 5288 | == coding_category_utf_16_auto) | ||
| 5344 | { | 5289 | { |
| 5345 | Lisp_Object coding_systems; | 5290 | Lisp_Object coding_systems; |
| 5346 | struct coding_detection_info detect_info; | 5291 | struct coding_detection_info detect_info; |
| @@ -5349,32 +5294,14 @@ detect_coding (coding) | |||
| 5349 | = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom); | 5294 | = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom); |
| 5350 | detect_info.found = detect_info.rejected = 0; | 5295 | detect_info.found = detect_info.rejected = 0; |
| 5351 | if (CONSP (coding_systems) | 5296 | if (CONSP (coding_systems) |
| 5352 | && detect_coding_utf_16 (coding, &detect_info) | 5297 | && detect_coding_utf_16 (coding, &detect_info)) |
| 5353 | && (detect_info.found & (CATEGORY_MASK_UTF_16_LE | ||
| 5354 | | CATEGORY_MASK_UTF_16_BE))) | ||
| 5355 | { | 5298 | { |
| 5356 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) | 5299 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) |
| 5357 | setup_coding_system (XCAR (coding_systems), coding); | 5300 | setup_coding_system (XCAR (coding_systems), coding); |
| 5358 | else | 5301 | else if (detect_info.found & CATEGORY_MASK_UTF_16_BE) |
| 5359 | setup_coding_system (XCDR (coding_systems), coding); | 5302 | setup_coding_system (XCDR (coding_systems), coding); |
| 5360 | } | 5303 | } |
| 5361 | } | 5304 | } |
| 5362 | |||
| 5363 | attrs = CODING_ID_ATTRS (coding->id); | ||
| 5364 | coding_type = CODING_ATTR_TYPE (attrs); | ||
| 5365 | |||
| 5366 | /* If we have not yet decided the EOL type, detect it now. But, the | ||
| 5367 | detection is impossible for a CCL based coding system, in which | ||
| 5368 | case, we detct the EOL type after decoding. */ | ||
| 5369 | if (VECTORP (CODING_ID_EOL_TYPE (coding->id)) | ||
| 5370 | && ! EQ (coding_type, Qccl)) | ||
| 5371 | { | ||
| 5372 | int eol_seen = detect_eol (coding->source, coding->src_bytes, | ||
| 5373 | (enum coding_category) XINT (CODING_ATTR_CATEGORY (attrs))); | ||
| 5374 | |||
| 5375 | if (eol_seen != EOL_SEEN_NONE) | ||
| 5376 | adjust_coding_eol_type (coding, eol_seen); | ||
| 5377 | } | ||
| 5378 | } | 5305 | } |
| 5379 | 5306 | ||
| 5380 | 5307 | ||
| @@ -5382,13 +5309,24 @@ static void | |||
| 5382 | decode_eol (coding) | 5309 | decode_eol (coding) |
| 5383 | struct coding_system *coding; | 5310 | struct coding_system *coding; |
| 5384 | { | 5311 | { |
| 5385 | if (VECTORP (CODING_ID_EOL_TYPE (coding->id))) | 5312 | Lisp_Object eol_type; |
| 5313 | unsigned char *p, *pbeg, *pend; | ||
| 5314 | |||
| 5315 | eol_type = CODING_ID_EOL_TYPE (coding->id); | ||
| 5316 | if (EQ (eol_type, Qunix)) | ||
| 5317 | return; | ||
| 5318 | |||
| 5319 | if (NILP (coding->dst_object)) | ||
| 5320 | pbeg = coding->destination; | ||
| 5321 | else | ||
| 5322 | pbeg = BYTE_POS_ADDR (coding->dst_pos_byte); | ||
| 5323 | pend = pbeg + coding->produced; | ||
| 5324 | |||
| 5325 | if (VECTORP (eol_type)) | ||
| 5386 | { | 5326 | { |
| 5387 | unsigned char *p = CHAR_POS_ADDR (coding->dst_pos); | ||
| 5388 | unsigned char *pend = p + coding->produced; | ||
| 5389 | int eol_seen = EOL_SEEN_NONE; | 5327 | int eol_seen = EOL_SEEN_NONE; |
| 5390 | 5328 | ||
| 5391 | for (; p < pend; p++) | 5329 | for (p = pbeg; p < pend; p++) |
| 5392 | { | 5330 | { |
| 5393 | if (*p == '\n') | 5331 | if (*p == '\n') |
| 5394 | eol_seen |= EOL_SEEN_LF; | 5332 | eol_seen |= EOL_SEEN_LF; |
| @@ -5403,42 +5341,48 @@ decode_eol (coding) | |||
| 5403 | eol_seen |= EOL_SEEN_CR; | 5341 | eol_seen |= EOL_SEEN_CR; |
| 5404 | } | 5342 | } |
| 5405 | } | 5343 | } |
| 5344 | if (eol_seen != EOL_SEEN_NONE | ||
| 5345 | && eol_seen != EOL_SEEN_LF | ||
| 5346 | && eol_seen != EOL_SEEN_CRLF | ||
| 5347 | && eol_seen != EOL_SEEN_CR) | ||
| 5348 | eol_seen = EOL_SEEN_LF; | ||
| 5406 | if (eol_seen != EOL_SEEN_NONE) | 5349 | if (eol_seen != EOL_SEEN_NONE) |
| 5407 | adjust_coding_eol_type (coding, eol_seen); | 5350 | eol_type = adjust_coding_eol_type (coding, eol_seen); |
| 5408 | } | 5351 | } |
| 5409 | 5352 | ||
| 5410 | if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac)) | 5353 | if (EQ (eol_type, Qmac)) |
| 5411 | { | 5354 | { |
| 5412 | unsigned char *p = CHAR_POS_ADDR (coding->dst_pos); | 5355 | for (p = pbeg; p < pend; p++) |
| 5413 | unsigned char *pend = p + coding->produced; | ||
| 5414 | |||
| 5415 | for (; p < pend; p++) | ||
| 5416 | if (*p == '\r') | 5356 | if (*p == '\r') |
| 5417 | *p = '\n'; | 5357 | *p = '\n'; |
| 5418 | } | 5358 | } |
| 5419 | else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos)) | 5359 | else if (EQ (eol_type, Qdos)) |
| 5420 | { | 5360 | { |
| 5421 | unsigned char *p, *pbeg, *pend; | 5361 | int n = 0; |
| 5422 | Lisp_Object undo_list; | ||
| 5423 | |||
| 5424 | move_gap_both (coding->dst_pos + coding->produced_char, | ||
| 5425 | coding->dst_pos_byte + coding->produced); | ||
| 5426 | undo_list = current_buffer->undo_list; | ||
| 5427 | current_buffer->undo_list = Qt; | ||
| 5428 | del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, 0); | ||
| 5429 | current_buffer->undo_list = undo_list; | ||
| 5430 | pbeg = GPT_ADDR; | ||
| 5431 | pend = pbeg + coding->produced; | ||
| 5432 | 5362 | ||
| 5433 | for (p = pend - 1; p >= pbeg; p--) | 5363 | if (NILP (coding->dst_object)) |
| 5434 | if (*p == '\r') | 5364 | { |
| 5435 | { | 5365 | for (p = pend - 2; p >= pbeg; p--) |
| 5436 | safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1); | 5366 | if (*p == '\r') |
| 5437 | pend--; | 5367 | { |
| 5438 | } | 5368 | safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1); |
| 5439 | coding->produced_char -= coding->produced - (pend - pbeg); | 5369 | n++; |
| 5440 | coding->produced = pend - pbeg; | 5370 | } |
| 5441 | insert_from_gap (coding->produced_char, coding->produced); | 5371 | } |
| 5372 | else | ||
| 5373 | { | ||
| 5374 | for (p = pend - 2; p >= pbeg; p--) | ||
| 5375 | if (*p == '\r') | ||
| 5376 | { | ||
| 5377 | int pos_byte = coding->dst_pos_byte + (p - pbeg); | ||
| 5378 | int pos = BYTE_TO_CHAR (pos_byte); | ||
| 5379 | |||
| 5380 | del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0); | ||
| 5381 | n++; | ||
| 5382 | } | ||
| 5383 | } | ||
| 5384 | coding->produced -= n; | ||
| 5385 | coding->produced_char -= n; | ||
| 5442 | } | 5386 | } |
| 5443 | } | 5387 | } |
| 5444 | 5388 | ||
| @@ -5796,6 +5740,7 @@ decode_coding (coding) | |||
| 5796 | struct coding_system *coding; | 5740 | struct coding_system *coding; |
| 5797 | { | 5741 | { |
| 5798 | Lisp_Object attrs; | 5742 | Lisp_Object attrs; |
| 5743 | Lisp_Object undo_list; | ||
| 5799 | 5744 | ||
| 5800 | if (BUFFERP (coding->src_object) | 5745 | if (BUFFERP (coding->src_object) |
| 5801 | && coding->src_pos > 0 | 5746 | && coding->src_pos > 0 |
| @@ -5803,12 +5748,15 @@ decode_coding (coding) | |||
| 5803 | && coding->src_pos + coding->src_chars > GPT) | 5748 | && coding->src_pos + coding->src_chars > GPT) |
| 5804 | move_gap_both (coding->src_pos, coding->src_pos_byte); | 5749 | move_gap_both (coding->src_pos, coding->src_pos_byte); |
| 5805 | 5750 | ||
| 5751 | undo_list = Qt; | ||
| 5806 | if (BUFFERP (coding->dst_object)) | 5752 | if (BUFFERP (coding->dst_object)) |
| 5807 | { | 5753 | { |
| 5808 | if (current_buffer != XBUFFER (coding->dst_object)) | 5754 | if (current_buffer != XBUFFER (coding->dst_object)) |
| 5809 | set_buffer_internal (XBUFFER (coding->dst_object)); | 5755 | set_buffer_internal (XBUFFER (coding->dst_object)); |
| 5810 | if (GPT != PT) | 5756 | if (GPT != PT) |
| 5811 | move_gap_both (PT, PT_BYTE); | 5757 | move_gap_both (PT, PT_BYTE); |
| 5758 | undo_list = current_buffer->undo_list; | ||
| 5759 | current_buffer->undo_list = Qt; | ||
| 5812 | } | 5760 | } |
| 5813 | 5761 | ||
| 5814 | coding->consumed = coding->consumed_char = 0; | 5762 | coding->consumed = coding->consumed_char = 0; |
| @@ -5838,11 +5786,6 @@ decode_coding (coding) | |||
| 5838 | while (coding->consumed < coding->src_bytes | 5786 | while (coding->consumed < coding->src_bytes |
| 5839 | && ! coding->result); | 5787 | && ! coding->result); |
| 5840 | 5788 | ||
| 5841 | if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl) | ||
| 5842 | && SYMBOLP (CODING_ID_EOL_TYPE (coding->id)) | ||
| 5843 | && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)) | ||
| 5844 | decode_eol (coding); | ||
| 5845 | |||
| 5846 | coding->carryover_bytes = 0; | 5789 | coding->carryover_bytes = 0; |
| 5847 | if (coding->consumed < coding->src_bytes) | 5790 | if (coding->consumed < coding->src_bytes) |
| 5848 | { | 5791 | { |
| @@ -5880,6 +5823,13 @@ decode_coding (coding) | |||
| 5880 | coding->consumed = coding->src_bytes; | 5823 | coding->consumed = coding->src_bytes; |
| 5881 | } | 5824 | } |
| 5882 | 5825 | ||
| 5826 | if (BUFFERP (coding->dst_object)) | ||
| 5827 | { | ||
| 5828 | current_buffer->undo_list = undo_list; | ||
| 5829 | record_insert (coding->dst_pos, coding->produced_char); | ||
| 5830 | } | ||
| 5831 | if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)) | ||
| 5832 | decode_eol (coding); | ||
| 5883 | return coding->result; | 5833 | return coding->result; |
| 5884 | } | 5834 | } |
| 5885 | 5835 | ||
| @@ -6039,7 +5989,7 @@ consume_chars (coding) | |||
| 6039 | stop_charset = end_pos; | 5989 | stop_charset = end_pos; |
| 6040 | } | 5990 | } |
| 6041 | 5991 | ||
| 6042 | /* Compensate for CRLF and annotation. */ | 5992 | /* Compensate for CRLF and conversion. */ |
| 6043 | buf_end -= 1 + MAX_ANNOTATION_LENGTH; | 5993 | buf_end -= 1 + MAX_ANNOTATION_LENGTH; |
| 6044 | while (buf < buf_end) | 5994 | while (buf < buf_end) |
| 6045 | { | 5995 | { |
| @@ -6154,91 +6104,73 @@ encode_coding (coding) | |||
| 6154 | } | 6104 | } |
| 6155 | 6105 | ||
| 6156 | 6106 | ||
| 6157 | /* Stack of working buffers used in code conversion. An nil element | 6107 | /* Name (or base name) of work buffer for code conversion. */ |
| 6158 | means that the code conversion of that level is not using a working | 6108 | static Lisp_Object Vcode_conversion_workbuf_name; |
| 6159 | buffer. */ | ||
| 6160 | Lisp_Object Vcode_conversion_work_buf_list; | ||
| 6161 | 6109 | ||
| 6162 | /* A working buffer used by the top level conversion. */ | 6110 | /* A working buffer used by the top level conversion. Once it is |
| 6163 | Lisp_Object Vcode_conversion_reused_work_buf; | 6111 | created, it is never destroyed. It has the name |
| 6112 | Vcode_conversion_workbuf_name. The other working buffers are | ||
| 6113 | destroyed after the use is finished, and their names are modified | ||
| 6114 | versions of Vcode_conversion_workbuf_name. */ | ||
| 6115 | static Lisp_Object Vcode_conversion_reused_workbuf; | ||
| 6164 | 6116 | ||
| 6117 | /* 1 iff Vcode_conversion_reused_workbuf is already in use. */ | ||
| 6118 | static int reused_workbuf_in_use; | ||
| 6165 | 6119 | ||
| 6166 | /* Return a working buffer that can be freely used by the following | 6120 | |
| 6167 | code conversion. MULTIBYTEP specifies the multibyteness of the | 6121 | /* Return a working buffer of code convesion. MULTIBYTE specifies the |
| 6168 | buffer. */ | 6122 | multibyteness of returning buffer. */ |
| 6169 | 6123 | ||
| 6170 | Lisp_Object | 6124 | Lisp_Object |
| 6171 | make_conversion_work_buffer (multibytep, depth) | 6125 | make_conversion_work_buffer (multibyte) |
| 6172 | int multibytep, depth; | ||
| 6173 | { | 6126 | { |
| 6174 | struct buffer *current = current_buffer; | 6127 | Lisp_Object name, workbuf; |
| 6175 | Lisp_Object buf, name; | 6128 | struct buffer *current; |
| 6176 | 6129 | ||
| 6177 | if (depth == 0) | 6130 | if (reused_workbuf_in_use++) |
| 6178 | { | 6131 | name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil); |
| 6179 | if (NILP (Vcode_conversion_reused_work_buf)) | ||
| 6180 | Vcode_conversion_reused_work_buf | ||
| 6181 | = Fget_buffer_create (build_string (" *code-converting-work<0>*")); | ||
| 6182 | buf = Vcode_conversion_reused_work_buf; | ||
| 6183 | } | ||
| 6184 | else | 6132 | else |
| 6185 | { | 6133 | name = Vcode_conversion_workbuf_name; |
| 6186 | if (depth < 0) | 6134 | workbuf = Fget_buffer_create (name); |
| 6187 | { | 6135 | current = current_buffer; |
| 6188 | name = build_string (" *code-converting-work*"); | 6136 | set_buffer_internal (XBUFFER (workbuf)); |
| 6189 | name = Fgenerate_new_buffer_name (name, Qnil); | 6137 | Ferase_buffer (); |
| 6190 | } | ||
| 6191 | else | ||
| 6192 | { | ||
| 6193 | char str[128]; | ||
| 6194 | |||
| 6195 | sprintf (str, " *code-converting-work*<%d>", depth); | ||
| 6196 | name = build_string (str); | ||
| 6197 | } | ||
| 6198 | buf = Fget_buffer_create (name); | ||
| 6199 | } | ||
| 6200 | set_buffer_internal (XBUFFER (buf)); | ||
| 6201 | current_buffer->undo_list = Qt; | 6138 | current_buffer->undo_list = Qt; |
| 6202 | Ferase_buffer (); | 6139 | current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil; |
| 6203 | Fset_buffer_multibyte (multibytep ? Qt : Qnil); | ||
| 6204 | set_buffer_internal (current); | 6140 | set_buffer_internal (current); |
| 6205 | return buf; | 6141 | return workbuf; |
| 6206 | } | 6142 | } |
| 6207 | 6143 | ||
| 6144 | |||
| 6208 | static Lisp_Object | 6145 | static Lisp_Object |
| 6209 | code_conversion_restore (buffer) | 6146 | code_conversion_restore (arg) |
| 6210 | Lisp_Object buffer; | 6147 | Lisp_Object arg; |
| 6211 | { | 6148 | { |
| 6212 | Lisp_Object workbuf; | 6149 | Lisp_Object current, workbuf; |
| 6213 | 6150 | ||
| 6214 | workbuf = XCAR (Vcode_conversion_work_buf_list); | 6151 | current = XCAR (arg); |
| 6215 | if (! NILP (workbuf) | 6152 | workbuf = XCDR (arg); |
| 6216 | && ! EQ (workbuf, Vcode_conversion_reused_work_buf) | 6153 | if (! NILP (workbuf)) |
| 6217 | && ! NILP (Fbuffer_live_p (workbuf))) | 6154 | { |
| 6218 | Fkill_buffer (workbuf); | 6155 | if (EQ (workbuf, Vcode_conversion_reused_workbuf)) |
| 6219 | Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list); | 6156 | reused_workbuf_in_use = 0; |
| 6220 | set_buffer_internal (XBUFFER (buffer)); | 6157 | else if (! NILP (Fbuffer_live_p (workbuf))) |
| 6158 | Fkill_buffer (workbuf); | ||
| 6159 | } | ||
| 6160 | set_buffer_internal (XBUFFER (current)); | ||
| 6221 | return Qnil; | 6161 | return Qnil; |
| 6222 | } | 6162 | } |
| 6223 | 6163 | ||
| 6224 | static Lisp_Object | 6164 | Lisp_Object |
| 6225 | code_conversion_save (buffer, with_work_buf, multibyte) | 6165 | code_conversion_save (with_work_buf, multibyte) |
| 6226 | Lisp_Object buffer; | ||
| 6227 | int with_work_buf, multibyte; | 6166 | int with_work_buf, multibyte; |
| 6228 | { | 6167 | { |
| 6229 | Lisp_Object workbuf; | 6168 | Lisp_Object workbuf = Qnil; |
| 6230 | 6169 | ||
| 6231 | if (with_work_buf) | 6170 | if (with_work_buf) |
| 6232 | { | 6171 | workbuf = make_conversion_work_buffer (multibyte); |
| 6233 | int depth = XINT (Flength (Vcode_conversion_work_buf_list)); | 6172 | record_unwind_protect (code_conversion_restore, |
| 6234 | 6173 | Fcons (Fcurrent_buffer (), workbuf)); | |
| 6235 | workbuf = make_conversion_work_buffer (multibyte, depth); | ||
| 6236 | } | ||
| 6237 | else | ||
| 6238 | workbuf = Qnil; | ||
| 6239 | Vcode_conversion_work_buf_list | ||
| 6240 | = Fcons (workbuf, Vcode_conversion_work_buf_list); | ||
| 6241 | record_unwind_protect (code_conversion_restore, buffer); | ||
| 6242 | return workbuf; | 6174 | return workbuf; |
| 6243 | } | 6175 | } |
| 6244 | 6176 | ||
| @@ -6249,18 +6181,16 @@ decode_coding_gap (coding, chars, bytes) | |||
| 6249 | { | 6181 | { |
| 6250 | int count = specpdl_ptr - specpdl; | 6182 | int count = specpdl_ptr - specpdl; |
| 6251 | Lisp_Object attrs; | 6183 | Lisp_Object attrs; |
| 6252 | Lisp_Object buffer; | ||
| 6253 | 6184 | ||
| 6254 | buffer = Fcurrent_buffer (); | 6185 | code_conversion_save (0, 0); |
| 6255 | code_conversion_save (buffer, 0, 0); | ||
| 6256 | 6186 | ||
| 6257 | coding->src_object = buffer; | 6187 | coding->src_object = Fcurrent_buffer (); |
| 6258 | coding->src_chars = chars; | 6188 | coding->src_chars = chars; |
| 6259 | coding->src_bytes = bytes; | 6189 | coding->src_bytes = bytes; |
| 6260 | coding->src_pos = -chars; | 6190 | coding->src_pos = -chars; |
| 6261 | coding->src_pos_byte = -bytes; | 6191 | coding->src_pos_byte = -bytes; |
| 6262 | coding->src_multibyte = chars < bytes; | 6192 | coding->src_multibyte = chars < bytes; |
| 6263 | coding->dst_object = buffer; | 6193 | coding->dst_object = coding->src_object; |
| 6264 | coding->dst_pos = PT; | 6194 | coding->dst_pos = PT; |
| 6265 | coding->dst_pos_byte = PT_BYTE; | 6195 | coding->dst_pos_byte = PT_BYTE; |
| 6266 | coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters); | 6196 | coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters); |
| @@ -6295,12 +6225,10 @@ encode_coding_gap (coding, chars, bytes) | |||
| 6295 | EMACS_INT chars, bytes; | 6225 | EMACS_INT chars, bytes; |
| 6296 | { | 6226 | { |
| 6297 | int count = specpdl_ptr - specpdl; | 6227 | int count = specpdl_ptr - specpdl; |
| 6298 | Lisp_Object buffer; | ||
| 6299 | 6228 | ||
| 6300 | buffer = Fcurrent_buffer (); | 6229 | code_conversion_save (0, 0); |
| 6301 | code_conversion_save (buffer, 0, 0); | ||
| 6302 | 6230 | ||
| 6303 | coding->src_object = buffer; | 6231 | coding->src_object = Fcurrent_buffer (); |
| 6304 | coding->src_chars = chars; | 6232 | coding->src_chars = chars; |
| 6305 | coding->src_bytes = bytes; | 6233 | coding->src_bytes = bytes; |
| 6306 | coding->src_pos = -chars; | 6234 | coding->src_pos = -chars; |
| @@ -6409,14 +6337,14 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte, | |||
| 6409 | || (! NILP (CODING_ATTR_POST_READ (attrs)) | 6337 | || (! NILP (CODING_ATTR_POST_READ (attrs)) |
| 6410 | && NILP (dst_object))) | 6338 | && NILP (dst_object))) |
| 6411 | { | 6339 | { |
| 6412 | coding->dst_object = code_conversion_save (buffer, 1, 1); | 6340 | coding->dst_object = code_conversion_save (1, 1); |
| 6413 | coding->dst_pos = BEG; | 6341 | coding->dst_pos = BEG; |
| 6414 | coding->dst_pos_byte = BEG_BYTE; | 6342 | coding->dst_pos_byte = BEG_BYTE; |
| 6415 | coding->dst_multibyte = 1; | 6343 | coding->dst_multibyte = 1; |
| 6416 | } | 6344 | } |
| 6417 | else if (BUFFERP (dst_object)) | 6345 | else if (BUFFERP (dst_object)) |
| 6418 | { | 6346 | { |
| 6419 | code_conversion_save (buffer, 0, 0); | 6347 | code_conversion_save (0, 0); |
| 6420 | coding->dst_object = dst_object; | 6348 | coding->dst_object = dst_object; |
| 6421 | coding->dst_pos = BUF_PT (XBUFFER (dst_object)); | 6349 | coding->dst_pos = BUF_PT (XBUFFER (dst_object)); |
| 6422 | coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object)); | 6350 | coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object)); |
| @@ -6425,7 +6353,7 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte, | |||
| 6425 | } | 6353 | } |
| 6426 | else | 6354 | else |
| 6427 | { | 6355 | { |
| 6428 | code_conversion_save (buffer, 0, 0); | 6356 | code_conversion_save (0, 0); |
| 6429 | coding->dst_object = Qnil; | 6357 | coding->dst_object = Qnil; |
| 6430 | coding->dst_multibyte = 1; | 6358 | coding->dst_multibyte = 1; |
| 6431 | } | 6359 | } |
| @@ -6524,8 +6452,7 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte, | |||
| 6524 | 6452 | ||
| 6525 | if (! NILP (CODING_ATTR_PRE_WRITE (attrs))) | 6453 | if (! NILP (CODING_ATTR_PRE_WRITE (attrs))) |
| 6526 | { | 6454 | { |
| 6527 | coding->src_object = code_conversion_save (buffer, 1, | 6455 | coding->src_object = code_conversion_save (1, coding->src_multibyte); |
| 6528 | coding->src_multibyte); | ||
| 6529 | set_buffer_internal (XBUFFER (coding->src_object)); | 6456 | set_buffer_internal (XBUFFER (coding->src_object)); |
| 6530 | if (STRINGP (src_object)) | 6457 | if (STRINGP (src_object)) |
| 6531 | insert_from_string (src_object, from, from_byte, chars, bytes, 0); | 6458 | insert_from_string (src_object, from, from_byte, chars, bytes, 0); |
| @@ -6555,13 +6482,13 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte, | |||
| 6555 | } | 6482 | } |
| 6556 | else if (STRINGP (src_object)) | 6483 | else if (STRINGP (src_object)) |
| 6557 | { | 6484 | { |
| 6558 | code_conversion_save (buffer, 0, 0); | 6485 | code_conversion_save (0, 0); |
| 6559 | coding->src_pos = from; | 6486 | coding->src_pos = from; |
| 6560 | coding->src_pos_byte = from_byte; | 6487 | coding->src_pos_byte = from_byte; |
| 6561 | } | 6488 | } |
| 6562 | else if (BUFFERP (src_object)) | 6489 | else if (BUFFERP (src_object)) |
| 6563 | { | 6490 | { |
| 6564 | code_conversion_save (buffer, 0, 0); | 6491 | code_conversion_save (0, 0); |
| 6565 | set_buffer_internal (XBUFFER (src_object)); | 6492 | set_buffer_internal (XBUFFER (src_object)); |
| 6566 | if (EQ (src_object, dst_object)) | 6493 | if (EQ (src_object, dst_object)) |
| 6567 | { | 6494 | { |
| @@ -6579,7 +6506,7 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte, | |||
| 6579 | } | 6506 | } |
| 6580 | } | 6507 | } |
| 6581 | else | 6508 | else |
| 6582 | code_conversion_save (buffer, 0, 0); | 6509 | code_conversion_save (0, 0); |
| 6583 | 6510 | ||
| 6584 | if (BUFFERP (dst_object)) | 6511 | if (BUFFERP (dst_object)) |
| 6585 | { | 6512 | { |
| @@ -6735,9 +6662,10 @@ If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. */) | |||
| 6735 | detect only text-format. */ | 6662 | detect only text-format. */ |
| 6736 | 6663 | ||
| 6737 | Lisp_Object | 6664 | Lisp_Object |
| 6738 | detect_coding_system (src, src_bytes, highest, multibytep, coding_system) | 6665 | detect_coding_system (src, src_chars, src_bytes, highest, multibytep, |
| 6666 | coding_system) | ||
| 6739 | const unsigned char *src; | 6667 | const unsigned char *src; |
| 6740 | int src_bytes, highest; | 6668 | int src_chars, src_bytes, highest; |
| 6741 | int multibytep; | 6669 | int multibytep; |
| 6742 | Lisp_Object coding_system; | 6670 | Lisp_Object coding_system; |
| 6743 | { | 6671 | { |
| @@ -6747,6 +6675,7 @@ detect_coding_system (src, src_bytes, highest, multibytep, coding_system) | |||
| 6747 | struct coding_system coding; | 6675 | struct coding_system coding; |
| 6748 | int id; | 6676 | int id; |
| 6749 | struct coding_detection_info detect_info; | 6677 | struct coding_detection_info detect_info; |
| 6678 | enum coding_category base_category; | ||
| 6750 | 6679 | ||
| 6751 | if (NILP (coding_system)) | 6680 | if (NILP (coding_system)) |
| 6752 | coding_system = Qundecided; | 6681 | coding_system = Qundecided; |
| @@ -6756,6 +6685,7 @@ detect_coding_system (src, src_bytes, highest, multibytep, coding_system) | |||
| 6756 | coding_system = CODING_ATTR_BASE_NAME (attrs); | 6685 | coding_system = CODING_ATTR_BASE_NAME (attrs); |
| 6757 | 6686 | ||
| 6758 | coding.source = src; | 6687 | coding.source = src; |
| 6688 | coding.src_chars = src_chars; | ||
| 6759 | coding.src_bytes = src_bytes; | 6689 | coding.src_bytes = src_bytes; |
| 6760 | coding.src_multibyte = multibytep; | 6690 | coding.src_multibyte = multibytep; |
| 6761 | coding.consumed = 0; | 6691 | coding.consumed = 0; |
| @@ -6764,21 +6694,26 @@ detect_coding_system (src, src_bytes, highest, multibytep, coding_system) | |||
| 6764 | detect_info.checked = detect_info.found = detect_info.rejected = 0; | 6694 | detect_info.checked = detect_info.found = detect_info.rejected = 0; |
| 6765 | 6695 | ||
| 6766 | /* At first, detect text-format if necessary. */ | 6696 | /* At first, detect text-format if necessary. */ |
| 6767 | if (XINT (CODING_ATTR_CATEGORY (attrs)) == coding_category_undecided) | 6697 | base_category = XINT (CODING_ATTR_CATEGORY (attrs)); |
| 6698 | if (base_category == coding_category_undecided) | ||
| 6768 | { | 6699 | { |
| 6769 | enum coding_category category; | 6700 | enum coding_category category; |
| 6770 | struct coding_system *this; | 6701 | struct coding_system *this; |
| 6771 | int c, i; | 6702 | int c, i; |
| 6772 | 6703 | ||
| 6773 | for (; src < src_end; src++) | 6704 | /* Skip all ASCII bytes except for a few ISO2022 controls. */ |
| 6705 | for (i = 0; src < src_end; i++, src++) | ||
| 6774 | { | 6706 | { |
| 6775 | c = *src; | 6707 | c = *src; |
| 6776 | if (c & 0x80 | 6708 | if (c & 0x80 || (c < 0x20 && (c == 0 |
| 6777 | || (c < 0x20 && (c == ISO_CODE_ESC | 6709 | || c == ISO_CODE_ESC |
| 6778 | || c == ISO_CODE_SI | 6710 | || c == ISO_CODE_SI |
| 6779 | || c == ISO_CODE_SO))) | 6711 | || c == ISO_CODE_SO))) |
| 6780 | break; | 6712 | break; |
| 6781 | } | 6713 | } |
| 6714 | /* Skipped bytes must be even for utf-16 detecor. */ | ||
| 6715 | if (i % 2) | ||
| 6716 | src--; | ||
| 6782 | coding.head_ascii = src - coding.source; | 6717 | coding.head_ascii = src - coding.source; |
| 6783 | 6718 | ||
| 6784 | if (src < src_end) | 6719 | if (src < src_end) |
| @@ -6805,11 +6740,19 @@ detect_coding_system (src, src_bytes, highest, multibytep, coding_system) | |||
| 6805 | if ((*(this->detector)) (&coding, &detect_info) | 6740 | if ((*(this->detector)) (&coding, &detect_info) |
| 6806 | && highest | 6741 | && highest |
| 6807 | && (detect_info.found & (1 << category))) | 6742 | && (detect_info.found & (1 << category))) |
| 6808 | break; | 6743 | { |
| 6744 | if (category == coding_category_utf_16_auto) | ||
| 6745 | { | ||
| 6746 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) | ||
| 6747 | category = coding_category_utf_16_le; | ||
| 6748 | else | ||
| 6749 | category = coding_category_utf_16_be; | ||
| 6750 | } | ||
| 6751 | break; | ||
| 6752 | } | ||
| 6809 | } | 6753 | } |
| 6810 | } | 6754 | } |
| 6811 | 6755 | ||
| 6812 | |||
| 6813 | if (detect_info.rejected == CATEGORY_MASK_ANY) | 6756 | if (detect_info.rejected == CATEGORY_MASK_ANY) |
| 6814 | { | 6757 | { |
| 6815 | detect_info.found = CATEGORY_MASK_RAW_TEXT; | 6758 | detect_info.found = CATEGORY_MASK_RAW_TEXT; |
| @@ -6867,6 +6810,24 @@ detect_coding_system (src, src_bytes, highest, multibytep, coding_system) | |||
| 6867 | detect_info.found |= found; | 6810 | detect_info.found |= found; |
| 6868 | } | 6811 | } |
| 6869 | } | 6812 | } |
| 6813 | else if (base_category == coding_category_utf_16_auto) | ||
| 6814 | { | ||
| 6815 | if (detect_coding_utf_16 (&coding, &detect_info)) | ||
| 6816 | { | ||
| 6817 | enum coding_category category; | ||
| 6818 | struct coding_system *this; | ||
| 6819 | |||
| 6820 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) | ||
| 6821 | this = coding_categories + coding_category_utf_16_le; | ||
| 6822 | else if (detect_info.found & CATEGORY_MASK_UTF_16_BE) | ||
| 6823 | this = coding_categories + coding_category_utf_16_be; | ||
| 6824 | else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG) | ||
| 6825 | this = coding_categories + coding_category_utf_16_be_nosig; | ||
| 6826 | else | ||
| 6827 | this = coding_categories + coding_category_utf_16_le_nosig; | ||
| 6828 | val = Fcons (make_number (this->id), Qnil); | ||
| 6829 | } | ||
| 6830 | } | ||
| 6870 | else | 6831 | else |
| 6871 | { | 6832 | { |
| 6872 | detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs)); | 6833 | detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs)); |
| @@ -6969,7 +6930,7 @@ highest priority. */) | |||
| 6969 | move_gap_both (to, to_byte); | 6930 | move_gap_both (to, to_byte); |
| 6970 | 6931 | ||
| 6971 | return detect_coding_system (BYTE_POS_ADDR (from_byte), | 6932 | return detect_coding_system (BYTE_POS_ADDR (from_byte), |
| 6972 | to_byte - from_byte, | 6933 | to - from, to_byte - from_byte, |
| 6973 | !NILP (highest), | 6934 | !NILP (highest), |
| 6974 | !NILP (current_buffer | 6935 | !NILP (current_buffer |
| 6975 | ->enable_multibyte_characters), | 6936 | ->enable_multibyte_characters), |
| @@ -6992,7 +6953,8 @@ highest priority. */) | |||
| 6992 | { | 6953 | { |
| 6993 | CHECK_STRING (string); | 6954 | CHECK_STRING (string); |
| 6994 | 6955 | ||
| 6995 | return detect_coding_system (SDATA (string), SBYTES (string), | 6956 | return detect_coding_system (SDATA (string), |
| 6957 | SCHARS (string), SBYTES (string), | ||
| 6996 | !NILP (highest), STRING_MULTIBYTE (string), | 6958 | !NILP (highest), STRING_MULTIBYTE (string), |
| 6997 | Qnil); | 6959 | Qnil); |
| 6998 | } | 6960 | } |
| @@ -8617,7 +8579,6 @@ init_coding_once () | |||
| 8617 | iso_code_class[i] = ISO_graphic_plane_1; | 8579 | iso_code_class[i] = ISO_graphic_plane_1; |
| 8618 | iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F; | 8580 | iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F; |
| 8619 | iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF; | 8581 | iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF; |
| 8620 | iso_code_class[ISO_CODE_CR] = ISO_carriage_return; | ||
| 8621 | iso_code_class[ISO_CODE_SO] = ISO_shift_out; | 8582 | iso_code_class[ISO_CODE_SO] = ISO_shift_out; |
| 8622 | iso_code_class[ISO_CODE_SI] = ISO_shift_in; | 8583 | iso_code_class[ISO_CODE_SI] = ISO_shift_in; |
| 8623 | iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7; | 8584 | iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7; |
| @@ -8655,11 +8616,13 @@ syms_of_coding () | |||
| 8655 | staticpro (&Vbig5_coding_system); | 8616 | staticpro (&Vbig5_coding_system); |
| 8656 | Vbig5_coding_system = Qnil; | 8617 | Vbig5_coding_system = Qnil; |
| 8657 | 8618 | ||
| 8658 | staticpro (&Vcode_conversion_work_buf_list); | 8619 | staticpro (&Vcode_conversion_reused_workbuf); |
| 8659 | Vcode_conversion_work_buf_list = Qnil; | 8620 | Vcode_conversion_reused_workbuf = Qnil; |
| 8621 | |||
| 8622 | staticpro (&Vcode_conversion_workbuf_name); | ||
| 8623 | Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*"); | ||
| 8660 | 8624 | ||
| 8661 | staticpro (&Vcode_conversion_reused_work_buf); | 8625 | reused_workbuf_in_use = 0; |
| 8662 | Vcode_conversion_reused_work_buf = Qnil; | ||
| 8663 | 8626 | ||
| 8664 | DEFSYM (Qcharset, "charset"); | 8627 | DEFSYM (Qcharset, "charset"); |
| 8665 | DEFSYM (Qtarget_idx, "target-idx"); | 8628 | DEFSYM (Qtarget_idx, "target-idx"); |