aboutsummaryrefslogtreecommitdiffstats
path: root/src/coding.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/coding.c')
-rw-r--r--src/coding.c348
1 files changed, 152 insertions, 196 deletions
diff --git a/src/coding.c b/src/coding.c
index f6310369ad3..0c2836c19f6 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -395,8 +395,6 @@ Lisp_Object Vbig5_coding_system;
395 395
396/* Control characters of ISO2022. */ 396/* Control characters of ISO2022. */
397 /* code */ /* function */ 397 /* code */ /* function */
398#define ISO_CODE_LF 0x0A /* line-feed */
399#define ISO_CODE_CR 0x0D /* carriage-return */
400#define ISO_CODE_SO 0x0E /* shift-out */ 398#define ISO_CODE_SO 0x0E /* shift-out */
401#define ISO_CODE_SI 0x0F /* shift-in */ 399#define ISO_CODE_SI 0x0F /* shift-in */
402#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */ 400#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
@@ -479,7 +477,7 @@ enum iso_code_class_type
479 477
480#define CODING_ISO_FLAG_COMPOSITION 0x2000 478#define CODING_ISO_FLAG_COMPOSITION 0x2000
481 479
482#define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000 480/* #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000 */
483 481
484#define CODING_ISO_FLAG_USE_ROMAN 0x8000 482#define CODING_ISO_FLAG_USE_ROMAN 0x8000
485 483
@@ -721,25 +719,6 @@ static struct coding_system coding_categories[coding_category_max];
721 } while (0) 719 } while (0)
722 720
723 721
724#define ONE_MORE_BYTE_NO_CHECK(c) \
725 do { \
726 c = *src++; \
727 if (multibytep && (c & 0x80)) \
728 { \
729 if ((c & 0xFE) == 0xC0) \
730 c = ((c & 1) << 6) | *src++; \
731 else \
732 { \
733 src--; \
734 c = - string_char (src, &src, NULL); \
735 record_conversion_result \
736 (coding, CODING_RESULT_INVALID_SRC); \
737 } \
738 } \
739 consumed_chars++; \
740 } while (0)
741
742
743/* Store a byte C in the place pointed by DST and increment DST to the 722/* Store a byte C in the place pointed by DST and increment DST to the
744 next free point, and increment PRODUCED_CHARS. The caller should 723 next free point, and increment PRODUCED_CHARS. The caller should
745 assure that C is 0..127, and declare and set the variable `dst' 724 assure that C is 0..127, and declare and set the variable `dst'
@@ -1051,9 +1030,10 @@ coding_set_source (struct coding_system *coding)
1051 coding->source = SDATA (coding->src_object) + coding->src_pos_byte; 1030 coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1052 } 1031 }
1053 else 1032 else
1054 /* Otherwise, the source is C string and is never relocated 1033 {
1055 automatically. Thus we don't have to update anything. */ 1034 /* Otherwise, the source is C string and is never relocated
1056 ; 1035 automatically. Thus we don't have to update anything. */
1036 }
1057} 1037}
1058 1038
1059static void 1039static void
@@ -1079,9 +1059,10 @@ coding_set_destination (struct coding_system *coding)
1079 } 1059 }
1080 } 1060 }
1081 else 1061 else
1082 /* Otherwise, the destination is C string and is never relocated 1062 {
1083 automatically. Thus we don't have to update anything. */ 1063 /* Otherwise, the destination is C string and is never relocated
1084 ; 1064 automatically. Thus we don't have to update anything. */
1065 }
1085} 1066}
1086 1067
1087 1068
@@ -1217,7 +1198,6 @@ alloc_destination (struct coding_system *coding, EMACS_INT nbytes,
1217#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0) 1198#define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1218#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8) 1199#define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1219 1200
1220#define UTF_BOM 0xFEFF
1221#define UTF_8_BOM_1 0xEF 1201#define UTF_8_BOM_1 0xEF
1222#define UTF_8_BOM_2 0xBB 1202#define UTF_8_BOM_2 0xBB
1223#define UTF_8_BOM_3 0xBF 1203#define UTF_8_BOM_3 0xBF
@@ -1318,7 +1298,7 @@ decode_coding_utf_8 (struct coding_system *coding)
1318 int multibytep = coding->src_multibyte; 1298 int multibytep = coding->src_multibyte;
1319 enum utf_bom_type bom = CODING_UTF_8_BOM (coding); 1299 enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1320 Lisp_Object attr, charset_list; 1300 Lisp_Object attr, charset_list;
1321 int eol_crlf = 1301 int eol_dos =
1322 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 1302 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1323 int byte_after_cr = -1; 1303 int byte_after_cr = -1;
1324 1304
@@ -1379,7 +1359,7 @@ decode_coding_utf_8 (struct coding_system *coding)
1379 } 1359 }
1380 else if (UTF_8_1_OCTET_P (c1)) 1360 else if (UTF_8_1_OCTET_P (c1))
1381 { 1361 {
1382 if (eol_crlf && c1 == '\r') 1362 if (eol_dos && c1 == '\r')
1383 ONE_MORE_BYTE (byte_after_cr); 1363 ONE_MORE_BYTE (byte_after_cr);
1384 c = c1; 1364 c = c1;
1385 } 1365 }
@@ -1533,11 +1513,6 @@ encode_coding_utf_8 (struct coding_system *coding)
1533#define UTF_16_LOW_SURROGATE_P(val) \ 1513#define UTF_16_LOW_SURROGATE_P(val) \
1534 (((val) & 0xFC00) == 0xDC00) 1514 (((val) & 0xFC00) == 0xDC00)
1535 1515
1536#define UTF_16_INVALID_P(val) \
1537 (((val) == 0xFFFE) \
1538 || ((val) == 0xFFFF) \
1539 || UTF_16_LOW_SURROGATE_P (val))
1540
1541 1516
1542static int 1517static int
1543detect_coding_utf_16 (struct coding_system *coding, 1518detect_coding_utf_16 (struct coding_system *coding,
@@ -1637,7 +1612,7 @@ decode_coding_utf_16 (struct coding_system *coding)
1637 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding); 1612 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1638 int surrogate = CODING_UTF_16_SURROGATE (coding); 1613 int surrogate = CODING_UTF_16_SURROGATE (coding);
1639 Lisp_Object attr, charset_list; 1614 Lisp_Object attr, charset_list;
1640 int eol_crlf = 1615 int eol_dos =
1641 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 1616 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1642 int byte_after_cr1 = -1, byte_after_cr2 = -1; 1617 int byte_after_cr1 = -1, byte_after_cr2 = -1;
1643 1618
@@ -1734,7 +1709,7 @@ decode_coding_utf_16 (struct coding_system *coding)
1734 CODING_UTF_16_SURROGATE (coding) = surrogate = c; 1709 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1735 else 1710 else
1736 { 1711 {
1737 if (eol_crlf && c == '\r') 1712 if (eol_dos && c == '\r')
1738 { 1713 {
1739 ONE_MORE_BYTE (byte_after_cr1); 1714 ONE_MORE_BYTE (byte_after_cr1);
1740 ONE_MORE_BYTE (byte_after_cr2); 1715 ONE_MORE_BYTE (byte_after_cr2);
@@ -1918,17 +1893,17 @@ detect_coding_emacs_mule (struct coding_system *coding,
1918 it because analyzing it is too heavy for detecting. But, 1893 it because analyzing it is too heavy for detecting. But,
1919 at least, we check that the composite character 1894 at least, we check that the composite character
1920 constitutes of more than 4 bytes. */ 1895 constitutes of more than 4 bytes. */
1921 const unsigned char *src_base; 1896 const unsigned char *src_start;
1922 1897
1923 repeat: 1898 repeat:
1924 src_base = src; 1899 src_start = src;
1925 do 1900 do
1926 { 1901 {
1927 ONE_MORE_BYTE (c); 1902 ONE_MORE_BYTE (c);
1928 } 1903 }
1929 while (c >= 0xA0); 1904 while (c >= 0xA0);
1930 1905
1931 if (src - src_base <= 4) 1906 if (src - src_start <= 4)
1932 break; 1907 break;
1933 found = CATEGORY_MASK_EMACS_MULE; 1908 found = CATEGORY_MASK_EMACS_MULE;
1934 if (c == 0x80) 1909 if (c == 0x80)
@@ -1980,7 +1955,7 @@ detect_coding_emacs_mule (struct coding_system *coding,
1980 the decoded character or rule. If an invalid byte is found, return 1955 the decoded character or rule. If an invalid byte is found, return
1981 -1. If SRC is too short, return -2. */ 1956 -1. If SRC is too short, return -2. */
1982 1957
1983int 1958static int
1984emacs_mule_char (struct coding_system *coding, const unsigned char *src, 1959emacs_mule_char (struct coding_system *coding, const unsigned char *src,
1985 int *nbytes, int *nchars, int *id, 1960 int *nbytes, int *nchars, int *id,
1986 struct composition_status *cmp_status) 1961 struct composition_status *cmp_status)
@@ -1988,7 +1963,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src,
1988 const unsigned char *src_end = coding->source + coding->src_bytes; 1963 const unsigned char *src_end = coding->source + coding->src_bytes;
1989 const unsigned char *src_base = src; 1964 const unsigned char *src_base = src;
1990 int multibytep = coding->src_multibyte; 1965 int multibytep = coding->src_multibyte;
1991 int charset_id; 1966 int charset_ID;
1992 unsigned code; 1967 unsigned code;
1993 int c; 1968 int c;
1994 int consumed_chars = 0; 1969 int consumed_chars = 0;
@@ -1998,7 +1973,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src,
1998 if (c < 0) 1973 if (c < 0)
1999 { 1974 {
2000 c = -c; 1975 c = -c;
2001 charset_id = emacs_mule_charset[0]; 1976 charset_ID = emacs_mule_charset[0];
2002 } 1977 }
2003 else 1978 else
2004 { 1979 {
@@ -2034,7 +2009,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src,
2034 switch (emacs_mule_bytes[c]) 2009 switch (emacs_mule_bytes[c])
2035 { 2010 {
2036 case 2: 2011 case 2:
2037 if ((charset_id = emacs_mule_charset[c]) < 0) 2012 if ((charset_ID = emacs_mule_charset[c]) < 0)
2038 goto invalid_code; 2013 goto invalid_code;
2039 ONE_MORE_BYTE (c); 2014 ONE_MORE_BYTE (c);
2040 if (c < 0xA0) 2015 if (c < 0xA0)
@@ -2047,7 +2022,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src,
2047 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12) 2022 || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
2048 { 2023 {
2049 ONE_MORE_BYTE (c); 2024 ONE_MORE_BYTE (c);
2050 if (c < 0xA0 || (charset_id = emacs_mule_charset[c]) < 0) 2025 if (c < 0xA0 || (charset_ID = emacs_mule_charset[c]) < 0)
2051 goto invalid_code; 2026 goto invalid_code;
2052 ONE_MORE_BYTE (c); 2027 ONE_MORE_BYTE (c);
2053 if (c < 0xA0) 2028 if (c < 0xA0)
@@ -2056,7 +2031,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src,
2056 } 2031 }
2057 else 2032 else
2058 { 2033 {
2059 if ((charset_id = emacs_mule_charset[c]) < 0) 2034 if ((charset_ID = emacs_mule_charset[c]) < 0)
2060 goto invalid_code; 2035 goto invalid_code;
2061 ONE_MORE_BYTE (c); 2036 ONE_MORE_BYTE (c);
2062 if (c < 0xA0) 2037 if (c < 0xA0)
@@ -2071,7 +2046,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src,
2071 2046
2072 case 4: 2047 case 4:
2073 ONE_MORE_BYTE (c); 2048 ONE_MORE_BYTE (c);
2074 if (c < 0 || (charset_id = emacs_mule_charset[c]) < 0) 2049 if (c < 0 || (charset_ID = emacs_mule_charset[c]) < 0)
2075 goto invalid_code; 2050 goto invalid_code;
2076 ONE_MORE_BYTE (c); 2051 ONE_MORE_BYTE (c);
2077 if (c < 0xA0) 2052 if (c < 0xA0)
@@ -2085,21 +2060,21 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src,
2085 2060
2086 case 1: 2061 case 1:
2087 code = c; 2062 code = c;
2088 charset_id = ASCII_BYTE_P (code) ? charset_ascii : charset_eight_bit; 2063 charset_ID = ASCII_BYTE_P (code) ? charset_ascii : charset_eight_bit;
2089 break; 2064 break;
2090 2065
2091 default: 2066 default:
2092 abort (); 2067 abort ();
2093 } 2068 }
2094 CODING_DECODE_CHAR (coding, src, src_base, src_end, 2069 CODING_DECODE_CHAR (coding, src, src_base, src_end,
2095 CHARSET_FROM_ID (charset_id), code, c); 2070 CHARSET_FROM_ID (charset_ID), code, c);
2096 if (c < 0) 2071 if (c < 0)
2097 goto invalid_code; 2072 goto invalid_code;
2098 } 2073 }
2099 *nbytes = src - src_base; 2074 *nbytes = src - src_base;
2100 *nchars = consumed_chars; 2075 *nchars = consumed_chars;
2101 if (id) 2076 if (id)
2102 *id = charset_id; 2077 *id = charset_ID;
2103 return (mseq_found ? -c : c); 2078 return (mseq_found ? -c : c);
2104 2079
2105 no_more_source: 2080 no_more_source:
@@ -2372,7 +2347,7 @@ decode_coding_emacs_mule (struct coding_system *coding)
2372 int char_offset = coding->produced_char; 2347 int char_offset = coding->produced_char;
2373 int last_offset = char_offset; 2348 int last_offset = char_offset;
2374 int last_id = charset_ascii; 2349 int last_id = charset_ascii;
2375 int eol_crlf = 2350 int eol_dos =
2376 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 2351 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2377 int byte_after_cr = -1; 2352 int byte_after_cr = -1;
2378 struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status; 2353 struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
@@ -2390,7 +2365,7 @@ decode_coding_emacs_mule (struct coding_system *coding)
2390 2365
2391 while (1) 2366 while (1)
2392 { 2367 {
2393 int c, id; 2368 int c, id IF_LINT (= 0);
2394 2369
2395 src_base = src; 2370 src_base = src;
2396 consumed_chars_base = consumed_chars; 2371 consumed_chars_base = consumed_chars;
@@ -2422,7 +2397,7 @@ decode_coding_emacs_mule (struct coding_system *coding)
2422 2397
2423 if (c < 0x80) 2398 if (c < 0x80)
2424 { 2399 {
2425 if (eol_crlf && c == '\r') 2400 if (eol_dos && c == '\r')
2426 ONE_MORE_BYTE (byte_after_cr); 2401 ONE_MORE_BYTE (byte_after_cr);
2427 id = charset_ascii; 2402 id = charset_ascii;
2428 if (cmp_status->state != COMPOSING_NO) 2403 if (cmp_status->state != COMPOSING_NO)
@@ -2435,7 +2410,7 @@ decode_coding_emacs_mule (struct coding_system *coding)
2435 } 2410 }
2436 else 2411 else
2437 { 2412 {
2438 int nchars, nbytes; 2413 int nchars IF_LINT (= 0), nbytes IF_LINT (= 0);
2439 /* emacs_mule_char can load a charset map from a file, which 2414 /* emacs_mule_char can load a charset map from a file, which
2440 allocates a large structure and might cause buffer text 2415 allocates a large structure and might cause buffer text
2441 to be relocated as result. Thus, we need to remember the 2416 to be relocated as result. Thus, we need to remember the
@@ -2903,10 +2878,6 @@ enum iso_code_class_type iso_code_class[256];
2903 ((id) <= (coding)->max_charset_id \ 2878 ((id) <= (coding)->max_charset_id \
2904 && (coding)->safe_charsets[id] != 255) 2879 && (coding)->safe_charsets[id] != 255)
2905 2880
2906
2907#define SHIFT_OUT_OK(category) \
2908 (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2909
2910static void 2881static void
2911setup_iso_safe_charsets (Lisp_Object attrs) 2882setup_iso_safe_charsets (Lisp_Object attrs)
2912{ 2883{
@@ -3023,40 +2994,11 @@ detect_coding_iso_2022 (struct coding_system *coding,
3023 break; 2994 break;
3024 single_shifting = 0; 2995 single_shifting = 0;
3025 ONE_MORE_BYTE (c); 2996 ONE_MORE_BYTE (c);
3026 if (c >= '(' && c <= '/') 2997 if (c == 'N' || c == 'O')
3027 {
3028 /* Designation sequence for a charset of dimension 1. */
3029 ONE_MORE_BYTE (c1);
3030 if (c1 < ' ' || c1 >= 0x80
3031 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
3032 /* Invalid designation sequence. Just ignore. */
3033 break;
3034 }
3035 else if (c == '$')
3036 {
3037 /* Designation sequence for a charset of dimension 2. */
3038 ONE_MORE_BYTE (c);
3039 if (c >= '@' && c <= 'B')
3040 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
3041 id = iso_charset_table[1][0][c];
3042 else if (c >= '(' && c <= '/')
3043 {
3044 ONE_MORE_BYTE (c1);
3045 if (c1 < ' ' || c1 >= 0x80
3046 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
3047 /* Invalid designation sequence. Just ignore. */
3048 break;
3049 }
3050 else
3051 /* Invalid designation sequence. Just ignore it. */
3052 break;
3053 }
3054 else if (c == 'N' || c == 'O')
3055 { 2998 {
3056 /* ESC <Fe> for SS2 or SS3. */ 2999 /* ESC <Fe> for SS2 or SS3. */
3057 single_shifting = 1; 3000 single_shifting = 1;
3058 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT; 3001 rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3059 break;
3060 } 3002 }
3061 else if (c == '1') 3003 else if (c == '1')
3062 { 3004 {
@@ -3072,36 +3014,66 @@ detect_coding_iso_2022 (struct coding_system *coding,
3072 { 3014 {
3073 /* ESC <Fp> for start/end composition. */ 3015 /* ESC <Fp> for start/end composition. */
3074 composition_count = 0; 3016 composition_count = 0;
3075 break;
3076 } 3017 }
3077 else 3018 else
3078 { 3019 {
3079 /* Invalid escape sequence. Just ignore it. */ 3020 if (c >= '(' && c <= '/')
3080 break; 3021 {
3081 } 3022 /* Designation sequence for a charset of dimension 1. */
3023 ONE_MORE_BYTE (c1);
3024 if (c1 < ' ' || c1 >= 0x80
3025 || (id = iso_charset_table[0][c >= ','][c1]) < 0)
3026 /* Invalid designation sequence. Just ignore. */
3027 break;
3028 }
3029 else if (c == '$')
3030 {
3031 /* Designation sequence for a charset of dimension 2. */
3032 ONE_MORE_BYTE (c);
3033 if (c >= '@' && c <= 'B')
3034 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
3035 id = iso_charset_table[1][0][c];
3036 else if (c >= '(' && c <= '/')
3037 {
3038 ONE_MORE_BYTE (c1);
3039 if (c1 < ' ' || c1 >= 0x80
3040 || (id = iso_charset_table[1][c >= ','][c1]) < 0)
3041 /* Invalid designation sequence. Just ignore. */
3042 break;
3043 }
3044 else
3045 /* Invalid designation sequence. Just ignore it. */
3046 break;
3047 }
3048 else
3049 {
3050 /* Invalid escape sequence. Just ignore it. */
3051 break;
3052 }
3082 3053
3083 /* We found a valid designation sequence for CHARSET. */ 3054 /* We found a valid designation sequence for CHARSET. */
3084 rejected |= CATEGORY_MASK_ISO_8BIT; 3055 rejected |= CATEGORY_MASK_ISO_8BIT;
3085 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7], 3056 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
3086 id)) 3057 id))
3087 found |= CATEGORY_MASK_ISO_7; 3058 found |= CATEGORY_MASK_ISO_7;
3088 else 3059 else
3089 rejected |= CATEGORY_MASK_ISO_7; 3060 rejected |= CATEGORY_MASK_ISO_7;
3090 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight], 3061 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
3091 id)) 3062 id))
3092 found |= CATEGORY_MASK_ISO_7_TIGHT; 3063 found |= CATEGORY_MASK_ISO_7_TIGHT;
3093 else 3064 else
3094 rejected |= CATEGORY_MASK_ISO_7_TIGHT; 3065 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
3095 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else], 3066 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
3096 id)) 3067 id))
3097 found |= CATEGORY_MASK_ISO_7_ELSE; 3068 found |= CATEGORY_MASK_ISO_7_ELSE;
3098 else 3069 else
3099 rejected |= CATEGORY_MASK_ISO_7_ELSE; 3070 rejected |= CATEGORY_MASK_ISO_7_ELSE;
3100 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else], 3071 if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
3101 id)) 3072 id))
3102 found |= CATEGORY_MASK_ISO_8_ELSE; 3073 found |= CATEGORY_MASK_ISO_8_ELSE;
3103 else 3074 else
3104 rejected |= CATEGORY_MASK_ISO_8_ELSE; 3075 rejected |= CATEGORY_MASK_ISO_8_ELSE;
3076 }
3105 break; 3077 break;
3106 3078
3107 case ISO_CODE_SO: 3079 case ISO_CODE_SO:
@@ -3129,13 +3101,32 @@ detect_coding_iso_2022 (struct coding_system *coding,
3129 rejected |= CATEGORY_MASK_ISO_7BIT; 3101 rejected |= CATEGORY_MASK_ISO_7BIT;
3130 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1]) 3102 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3131 & CODING_ISO_FLAG_SINGLE_SHIFT) 3103 & CODING_ISO_FLAG_SINGLE_SHIFT)
3132 found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1; 3104 {
3105 found |= CATEGORY_MASK_ISO_8_1;
3106 single_shifting = 1;
3107 }
3133 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2]) 3108 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
3134 & CODING_ISO_FLAG_SINGLE_SHIFT) 3109 & CODING_ISO_FLAG_SINGLE_SHIFT)
3135 found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1; 3110 {
3111 found |= CATEGORY_MASK_ISO_8_2;
3112 single_shifting = 1;
3113 }
3136 if (single_shifting) 3114 if (single_shifting)
3137 break; 3115 break;
3138 goto check_extra_latin; 3116 check_extra_latin:
3117 if (! VECTORP (Vlatin_extra_code_table)
3118 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3119 {
3120 rejected = CATEGORY_MASK_ISO;
3121 break;
3122 }
3123 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3124 & CODING_ISO_FLAG_LATIN_EXTRA)
3125 found |= CATEGORY_MASK_ISO_8_1;
3126 else
3127 rejected |= CATEGORY_MASK_ISO_8_1;
3128 rejected |= CATEGORY_MASK_ISO_8_2;
3129 break;
3139 3130
3140 default: 3131 default:
3141 if (c < 0) 3132 if (c < 0)
@@ -3158,7 +3149,7 @@ detect_coding_iso_2022 (struct coding_system *coding,
3158 if (! single_shifting 3149 if (! single_shifting
3159 && ! (rejected & CATEGORY_MASK_ISO_8_2)) 3150 && ! (rejected & CATEGORY_MASK_ISO_8_2))
3160 { 3151 {
3161 int i = 1; 3152 int len = 1;
3162 while (src < src_end) 3153 while (src < src_end)
3163 { 3154 {
3164 src_base = src; 3155 src_base = src;
@@ -3168,38 +3159,24 @@ detect_coding_iso_2022 (struct coding_system *coding,
3168 src = src_base; 3159 src = src_base;
3169 break; 3160 break;
3170 } 3161 }
3171 i++; 3162 len++;
3172 } 3163 }
3173 3164
3174 if (i & 1 && src < src_end) 3165 if (len & 1 && src < src_end)
3175 { 3166 {
3176 rejected |= CATEGORY_MASK_ISO_8_2; 3167 rejected |= CATEGORY_MASK_ISO_8_2;
3177 if (composition_count >= 0) 3168 if (composition_count >= 0)
3178 composition_count += i; 3169 composition_count += len;
3179 } 3170 }
3180 else 3171 else
3181 { 3172 {
3182 found |= CATEGORY_MASK_ISO_8_2; 3173 found |= CATEGORY_MASK_ISO_8_2;
3183 if (composition_count >= 0) 3174 if (composition_count >= 0)
3184 composition_count += i / 2; 3175 composition_count += len / 2;
3185 } 3176 }
3186 } 3177 }
3187 break; 3178 break;
3188 } 3179 }
3189 check_extra_latin:
3190 single_shifting = 0;
3191 if (! VECTORP (Vlatin_extra_code_table)
3192 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3193 {
3194 rejected = CATEGORY_MASK_ISO;
3195 break;
3196 }
3197 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3198 & CODING_ISO_FLAG_LATIN_EXTRA)
3199 found |= CATEGORY_MASK_ISO_8_1;
3200 else
3201 rejected |= CATEGORY_MASK_ISO_8_1;
3202 rejected |= CATEGORY_MASK_ISO_8_2;
3203 } 3180 }
3204 } 3181 }
3205 detect_info->rejected |= CATEGORY_MASK_ISO; 3182 detect_info->rejected |= CATEGORY_MASK_ISO;
@@ -3309,10 +3286,10 @@ detect_coding_iso_2022 (struct coding_system *coding,
3309 } \ 3286 } \
3310 else /* new format (after ver.21) */ \ 3287 else /* new format (after ver.21) */ \
3311 { \ 3288 { \
3312 int c; \ 3289 int b; \
3313 \ 3290 \
3314 ONE_MORE_BYTE (c); \ 3291 ONE_MORE_BYTE (b); \
3315 rule = COMPOSITION_ENCODE_RULE (rule - 81, c - 32); \ 3292 rule = COMPOSITION_ENCODE_RULE (rule - 81, b - 32); \
3316 if (rule >= 0) \ 3293 if (rule >= 0) \
3317 rule += 0x100; /* to destinguish it from the old format */ \ 3294 rule += 0x100; /* to destinguish it from the old format */ \
3318 nbytes = 2; \ 3295 nbytes = 2; \
@@ -3504,7 +3481,7 @@ decode_coding_iso_2022 (struct coding_system *coding)
3504 int char_offset = coding->produced_char; 3481 int char_offset = coding->produced_char;
3505 int last_offset = char_offset; 3482 int last_offset = char_offset;
3506 int last_id = charset_ascii; 3483 int last_id = charset_ascii;
3507 int eol_crlf = 3484 int eol_dos =
3508 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 3485 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3509 int byte_after_cr = -1; 3486 int byte_after_cr = -1;
3510 int i; 3487 int i;
@@ -3624,7 +3601,7 @@ decode_coding_iso_2022 (struct coding_system *coding)
3624 break; 3601 break;
3625 3602
3626 case ISO_control_0: 3603 case ISO_control_0:
3627 if (eol_crlf && c1 == '\r') 3604 if (eol_dos && c1 == '\r')
3628 ONE_MORE_BYTE (byte_after_cr); 3605 ONE_MORE_BYTE (byte_after_cr);
3629 MAYBE_FINISH_COMPOSITION (); 3606 MAYBE_FINISH_COMPOSITION ();
3630 charset = CHARSET_FROM_ID (charset_ascii); 3607 charset = CHARSET_FROM_ID (charset_ascii);
@@ -3897,6 +3874,10 @@ decode_coding_iso_2022 (struct coding_system *coding)
3897 } 3874 }
3898 continue; 3875 continue;
3899 } 3876 }
3877 break;
3878
3879 default:
3880 abort ();
3900 } 3881 }
3901 3882
3902 if (cmp_status->state == COMPOSING_NO 3883 if (cmp_status->state == COMPOSING_NO
@@ -4029,7 +4010,6 @@ decode_coding_iso_2022 (struct coding_system *coding)
4029 const char *intermediate_char_94 = "()*+"; \ 4010 const char *intermediate_char_94 = "()*+"; \
4030 const char *intermediate_char_96 = ",-./"; \ 4011 const char *intermediate_char_96 = ",-./"; \
4031 int revision = -1; \ 4012 int revision = -1; \
4032 int c; \
4033 \ 4013 \
4034 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \ 4014 if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION) \
4035 revision = CHARSET_ISO_REVISION (charset); \ 4015 revision = CHARSET_ISO_REVISION (charset); \
@@ -4042,11 +4022,12 @@ decode_coding_iso_2022 (struct coding_system *coding)
4042 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \ 4022 EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC); \
4043 if (CHARSET_DIMENSION (charset) == 1) \ 4023 if (CHARSET_DIMENSION (charset) == 1) \
4044 { \ 4024 { \
4025 int b; \
4045 if (! CHARSET_ISO_CHARS_96 (charset)) \ 4026 if (! CHARSET_ISO_CHARS_96 (charset)) \
4046 c = intermediate_char_94[reg]; \ 4027 b = intermediate_char_94[reg]; \
4047 else \ 4028 else \
4048 c = intermediate_char_96[reg]; \ 4029 b = intermediate_char_96[reg]; \
4049 EMIT_ONE_ASCII_BYTE (c); \ 4030 EMIT_ONE_ASCII_BYTE (b); \
4050 } \ 4031 } \
4051 else \ 4032 else \
4052 { \ 4033 { \
@@ -4226,7 +4207,7 @@ decode_coding_iso_2022 (struct coding_system *coding)
4226 to use CHARSET. The element `spec.iso_2022' of *CODING is updated. 4207 to use CHARSET. The element `spec.iso_2022' of *CODING is updated.
4227 Return new DST. */ 4208 Return new DST. */
4228 4209
4229unsigned char * 4210static unsigned char *
4230encode_invocation_designation (struct charset *charset, 4211encode_invocation_designation (struct charset *charset,
4231 struct coding_system *coding, 4212 struct coding_system *coding,
4232 unsigned char *dst, int *p_nchars) 4213 unsigned char *dst, int *p_nchars)
@@ -4289,30 +4270,6 @@ encode_invocation_designation (struct charset *charset,
4289 return dst; 4270 return dst;
4290} 4271}
4291 4272
4292/* The following three macros produce codes for indicating direction
4293 of text. */
4294#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
4295 do { \
4296 if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS) \
4297 EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '['); \
4298 else \
4299 EMIT_ONE_BYTE (ISO_CODE_CSI); \
4300 } while (0)
4301
4302
4303#define ENCODE_DIRECTION_R2L() \
4304 do { \
4305 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
4306 EMIT_TWO_ASCII_BYTES ('2', ']'); \
4307 } while (0)
4308
4309
4310#define ENCODE_DIRECTION_L2R() \
4311 do { \
4312 ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst); \
4313 EMIT_TWO_ASCII_BYTES ('0', ']'); \
4314 } while (0)
4315
4316 4273
4317/* Produce codes for designation and invocation to reset the graphic 4274/* Produce codes for designation and invocation to reset the graphic
4318 planes and registers to initial state. */ 4275 planes and registers to initial state. */
@@ -4707,7 +4664,7 @@ decode_coding_sjis (struct coding_system *coding)
4707 int char_offset = coding->produced_char; 4664 int char_offset = coding->produced_char;
4708 int last_offset = char_offset; 4665 int last_offset = char_offset;
4709 int last_id = charset_ascii; 4666 int last_id = charset_ascii;
4710 int eol_crlf = 4667 int eol_dos =
4711 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 4668 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4712 int byte_after_cr = -1; 4669 int byte_after_cr = -1;
4713 4670
@@ -4742,7 +4699,7 @@ decode_coding_sjis (struct coding_system *coding)
4742 goto invalid_code; 4699 goto invalid_code;
4743 if (c < 0x80) 4700 if (c < 0x80)
4744 { 4701 {
4745 if (eol_crlf && c == '\r') 4702 if (eol_dos && c == '\r')
4746 ONE_MORE_BYTE (byte_after_cr); 4703 ONE_MORE_BYTE (byte_after_cr);
4747 charset = charset_roman; 4704 charset = charset_roman;
4748 } 4705 }
@@ -4824,7 +4781,7 @@ decode_coding_big5 (struct coding_system *coding)
4824 int char_offset = coding->produced_char; 4781 int char_offset = coding->produced_char;
4825 int last_offset = char_offset; 4782 int last_offset = char_offset;
4826 int last_id = charset_ascii; 4783 int last_id = charset_ascii;
4827 int eol_crlf = 4784 int eol_dos =
4828 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 4785 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4829 int byte_after_cr = -1; 4786 int byte_after_cr = -1;
4830 4787
@@ -4857,7 +4814,7 @@ decode_coding_big5 (struct coding_system *coding)
4857 goto invalid_code; 4814 goto invalid_code;
4858 if (c < 0x80) 4815 if (c < 0x80)
4859 { 4816 {
4860 if (eol_crlf && c == '\r') 4817 if (eol_dos && c == '\r')
4861 ONE_MORE_BYTE (byte_after_cr); 4818 ONE_MORE_BYTE (byte_after_cr);
4862 charset = charset_roman; 4819 charset = charset_roman;
4863 } 4820 }
@@ -5261,13 +5218,13 @@ encode_coding_ccl (struct coding_system *coding)
5261static void 5218static void
5262decode_coding_raw_text (struct coding_system *coding) 5219decode_coding_raw_text (struct coding_system *coding)
5263{ 5220{
5264 int eol_crlf = 5221 int eol_dos =
5265 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 5222 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5266 5223
5267 coding->chars_at_source = 1; 5224 coding->chars_at_source = 1;
5268 coding->consumed_char = coding->src_chars; 5225 coding->consumed_char = coding->src_chars;
5269 coding->consumed = coding->src_bytes; 5226 coding->consumed = coding->src_bytes;
5270 if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r') 5227 if (eol_dos && coding->source[coding->src_bytes - 1] == '\r')
5271 { 5228 {
5272 coding->consumed_char--; 5229 coding->consumed_char--;
5273 coding->consumed--; 5230 coding->consumed--;
@@ -5480,7 +5437,7 @@ decode_coding_charset (struct coding_system *coding)
5480 int char_offset = coding->produced_char; 5437 int char_offset = coding->produced_char;
5481 int last_offset = char_offset; 5438 int last_offset = char_offset;
5482 int last_id = charset_ascii; 5439 int last_id = charset_ascii;
5483 int eol_crlf = 5440 int eol_dos =
5484 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 5441 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5485 int byte_after_cr = -1; 5442 int byte_after_cr = -1;
5486 5443
@@ -5514,7 +5471,7 @@ decode_coding_charset (struct coding_system *coding)
5514 else 5471 else
5515 { 5472 {
5516 ONE_MORE_BYTE (c); 5473 ONE_MORE_BYTE (c);
5517 if (eol_crlf && c == '\r') 5474 if (eol_dos && c == '\r')
5518 ONE_MORE_BYTE (byte_after_cr); 5475 ONE_MORE_BYTE (byte_after_cr);
5519 } 5476 }
5520 if (c < 0) 5477 if (c < 0)
@@ -6607,15 +6564,15 @@ get_translation_table (Lisp_Object attrs, int encodep, int *max_lookup)
6607 } 6564 }
6608 else if (CONSP (translation_table)) 6565 else if (CONSP (translation_table))
6609 { 6566 {
6610 Lisp_Object tail, val; 6567 Lisp_Object tail;
6611 6568
6612 for (tail = translation_table; CONSP (tail); tail = XCDR (tail)) 6569 for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6613 if (CHAR_TABLE_P (XCAR (tail)) 6570 if (CHAR_TABLE_P (XCAR (tail))
6614 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1) 6571 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6615 { 6572 {
6616 val = XCHAR_TABLE (XCAR (tail))->extras[1]; 6573 Lisp_Object tailval = XCHAR_TABLE (XCAR (tail))->extras[1];
6617 if (NATNUMP (val) && *max_lookup < XFASTINT (val)) 6574 if (NATNUMP (tailval) && *max_lookup < XFASTINT (tailval))
6618 *max_lookup = XFASTINT (val); 6575 *max_lookup = XFASTINT (tailval);
6619 } 6576 }
6620 } 6577 }
6621 } 6578 }
@@ -7652,12 +7609,12 @@ decode_coding_object (struct coding_system *coding,
7652 Lisp_Object dst_object) 7609 Lisp_Object dst_object)
7653{ 7610{
7654 int count = SPECPDL_INDEX (); 7611 int count = SPECPDL_INDEX ();
7655 unsigned char *destination; 7612 unsigned char *destination IF_LINT (= NULL);
7656 EMACS_INT dst_bytes; 7613 EMACS_INT dst_bytes IF_LINT (= 0);
7657 EMACS_INT chars = to - from; 7614 EMACS_INT chars = to - from;
7658 EMACS_INT bytes = to_byte - from_byte; 7615 EMACS_INT bytes = to_byte - from_byte;
7659 Lisp_Object attrs; 7616 Lisp_Object attrs;
7660 int saved_pt = -1, saved_pt_byte; 7617 int saved_pt = -1, saved_pt_byte IF_LINT (= 0);
7661 int need_marker_adjustment = 0; 7618 int need_marker_adjustment = 0;
7662 Lisp_Object old_deactivate_mark; 7619 Lisp_Object old_deactivate_mark;
7663 7620
@@ -7845,7 +7802,7 @@ encode_coding_object (struct coding_system *coding,
7845 EMACS_INT chars = to - from; 7802 EMACS_INT chars = to - from;
7846 EMACS_INT bytes = to_byte - from_byte; 7803 EMACS_INT bytes = to_byte - from_byte;
7847 Lisp_Object attrs; 7804 Lisp_Object attrs;
7848 int saved_pt = -1, saved_pt_byte; 7805 int saved_pt = -1, saved_pt_byte IF_LINT (= 0);
7849 int need_marker_adjustment = 0; 7806 int need_marker_adjustment = 0;
7850 int kill_src_buffer = 0; 7807 int kill_src_buffer = 0;
7851 Lisp_Object old_deactivate_mark; 7808 Lisp_Object old_deactivate_mark;
@@ -8178,8 +8135,8 @@ detect_coding_system (const unsigned char *src,
8178 base_category = XINT (CODING_ATTR_CATEGORY (attrs)); 8135 base_category = XINT (CODING_ATTR_CATEGORY (attrs));
8179 if (base_category == coding_category_undecided) 8136 if (base_category == coding_category_undecided)
8180 { 8137 {
8181 enum coding_category category; 8138 enum coding_category category IF_LINT (= 0);
8182 struct coding_system *this; 8139 struct coding_system *this IF_LINT (= NULL);
8183 int c, i; 8140 int c, i;
8184 8141
8185 /* Skip all ASCII bytes except for a few ISO2022 controls. */ 8142 /* Skip all ASCII bytes except for a few ISO2022 controls. */
@@ -9112,10 +9069,10 @@ Return the corresponding character. */)
9112 } 9069 }
9113 else 9070 else
9114 { 9071 {
9115 int s1 = c >> 8, s2 = c & 0xFF; 9072 int c1 = c >> 8, c2 = c & 0xFF;
9116 9073
9117 if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF 9074 if (c1 < 0x81 || (c1 > 0x9F && c1 < 0xE0) || c1 > 0xEF
9118 || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC) 9075 || c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
9119 error ("Invalid code: %d", code); 9076 error ("Invalid code: %d", code);
9120 SJIS_TO_JIS (c); 9077 SJIS_TO_JIS (c);
9121 charset = charset_kanji; 9078 charset = charset_kanji;
@@ -9798,7 +9755,6 @@ usage: (define-coding-system-internal ...) */)
9798 else if (EQ (coding_type, Qiso_2022)) 9755 else if (EQ (coding_type, Qiso_2022))
9799 { 9756 {
9800 Lisp_Object initial, reg_usage, request, flags; 9757 Lisp_Object initial, reg_usage, request, flags;
9801 int i;
9802 9758
9803 if (nargs < coding_arg_iso2022_max) 9759 if (nargs < coding_arg_iso2022_max)
9804 goto short_args; 9760 goto short_args;
@@ -9830,12 +9786,12 @@ usage: (define-coding-system-internal ...) */)
9830 for (tail = request; ! NILP (tail); tail = Fcdr (tail)) 9786 for (tail = request; ! NILP (tail); tail = Fcdr (tail))
9831 { 9787 {
9832 int id; 9788 int id;
9833 Lisp_Object tmp; 9789 Lisp_Object tmp1;
9834 9790
9835 val = Fcar (tail); 9791 val = Fcar (tail);
9836 CHECK_CONS (val); 9792 CHECK_CONS (val);
9837 tmp = XCAR (val); 9793 tmp1 = XCAR (val);
9838 CHECK_CHARSET_GET_ID (tmp, id); 9794 CHECK_CHARSET_GET_ID (tmp1, id);
9839 CHECK_NATNUM_CDR (val); 9795 CHECK_NATNUM_CDR (val);
9840 if (XINT (XCDR (val)) >= 4) 9796 if (XINT (XCDR (val)) >= 4)
9841 error ("Invalid graphic register number: %d", XINT (XCDR (val))); 9797 error ("Invalid graphic register number: %d", XINT (XCDR (val)));