aboutsummaryrefslogtreecommitdiffstats
path: root/src/coding.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/coding.c')
-rw-r--r--src/coding.c61
1 files changed, 25 insertions, 36 deletions
diff --git a/src/coding.c b/src/coding.c
index ae9f6749792..28e7a3fafc1 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -219,14 +219,15 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
219 219
220 220
221/* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte 221/* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte
222 form if MULTIBYTEP is nonzero. */ 222 form if MULTIBYTEP is nonzero. In addition, if SRC is not less
223 than SRC_END, return with RET. */
223 224
224#define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep) \ 225#define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep, ret) \
225 do { \ 226 do { \
226 if (src >= src_end) \ 227 if (src >= src_end) \
227 { \ 228 { \
228 coding->result = CODING_FINISH_INSUFFICIENT_SRC; \ 229 coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
229 goto label_end_of_loop; \ 230 return ret; \
230 } \ 231 } \
231 c1 = *src++; \ 232 c1 = *src++; \
232 if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL) \ 233 if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL) \
@@ -632,15 +633,15 @@ detect_coding_emacs_mule (src, src_end, multibytep)
632 633
633 while (1) 634 while (1)
634 { 635 {
635 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 636 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep,
636 637 CODING_CATEGORY_MASK_EMACS_MULE);
637 if (composing) 638 if (composing)
638 { 639 {
639 if (c < 0xA0) 640 if (c < 0xA0)
640 composing = 0; 641 composing = 0;
641 else if (c == 0xA0) 642 else if (c == 0xA0)
642 { 643 {
643 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 644 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0);
644 c &= 0x7F; 645 c &= 0x7F;
645 } 646 }
646 else 647 else
@@ -669,8 +670,6 @@ detect_coding_emacs_mule (src, src_end, multibytep)
669 } 670 }
670 } 671 }
671 } 672 }
672 label_end_of_loop:
673 return CODING_CATEGORY_MASK_EMACS_MULE;
674} 673}
675 674
676 675
@@ -1425,9 +1424,9 @@ detect_coding_iso2022 (src, src_end, multibytep)
1425 Lisp_Object safe_chars; 1424 Lisp_Object safe_chars;
1426 1425
1427 reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1; 1426 reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
1428 while (mask && src < src_end) 1427 while (mask)
1429 { 1428 {
1430 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 1429 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, mask & mask_found);
1431 retry: 1430 retry:
1432 switch (c) 1431 switch (c)
1433 { 1432 {
@@ -1435,11 +1434,11 @@ detect_coding_iso2022 (src, src_end, multibytep)
1435 if (inhibit_iso_escape_detection) 1434 if (inhibit_iso_escape_detection)
1436 break; 1435 break;
1437 single_shifting = 0; 1436 single_shifting = 0;
1438 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 1437 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, mask & mask_found);
1439 if (c >= '(' && c <= '/') 1438 if (c >= '(' && c <= '/')
1440 { 1439 {
1441 /* Designation sequence for a charset of dimension 1. */ 1440 /* Designation sequence for a charset of dimension 1. */
1442 ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep); 1441 ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep, mask & mask_found);
1443 if (c1 < ' ' || c1 >= 0x80 1442 if (c1 < ' ' || c1 >= 0x80
1444 || (charset = iso_charset_table[0][c >= ','][c1]) < 0) 1443 || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
1445 /* Invalid designation sequence. Just ignore. */ 1444 /* Invalid designation sequence. Just ignore. */
@@ -1449,13 +1448,14 @@ detect_coding_iso2022 (src, src_end, multibytep)
1449 else if (c == '$') 1448 else if (c == '$')
1450 { 1449 {
1451 /* Designation sequence for a charset of dimension 2. */ 1450 /* Designation sequence for a charset of dimension 2. */
1452 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 1451 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, mask & mask_found);
1453 if (c >= '@' && c <= 'B') 1452 if (c >= '@' && c <= 'B')
1454 /* Designation for JISX0208.1978, GB2312, or JISX0208. */ 1453 /* Designation for JISX0208.1978, GB2312, or JISX0208. */
1455 reg[0] = charset = iso_charset_table[1][0][c]; 1454 reg[0] = charset = iso_charset_table[1][0][c];
1456 else if (c >= '(' && c <= '/') 1455 else if (c >= '(' && c <= '/')
1457 { 1456 {
1458 ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep); 1457 ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep,
1458 mask & mask_found);
1459 if (c1 < ' ' || c1 >= 0x80 1459 if (c1 < ' ' || c1 >= 0x80
1460 || (charset = iso_charset_table[1][c >= ','][c1]) < 0) 1460 || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
1461 /* Invalid designation sequence. Just ignore. */ 1461 /* Invalid designation sequence. Just ignore. */
@@ -1630,7 +1630,8 @@ detect_coding_iso2022 (src, src_end, multibytep)
1630 c = -1; 1630 c = -1;
1631 while (src < src_end) 1631 while (src < src_end)
1632 { 1632 {
1633 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 1633 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep,
1634 mask & mask_found);
1634 if (c < 0xA0) 1635 if (c < 0xA0)
1635 break; 1636 break;
1636 i++; 1637 i++;
@@ -1648,7 +1649,6 @@ detect_coding_iso2022 (src, src_end, multibytep)
1648 break; 1649 break;
1649 } 1650 }
1650 } 1651 }
1651 label_end_of_loop:
1652 return (mask & mask_found); 1652 return (mask & mask_found);
1653} 1653}
1654 1654
@@ -2919,20 +2919,18 @@ detect_coding_sjis (src, src_end, multibytep)
2919 2919
2920 while (1) 2920 while (1)
2921 { 2921 {
2922 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 2922 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_SJIS);
2923 if (c < 0x80) 2923 if (c < 0x80)
2924 continue; 2924 continue;
2925 if (c == 0x80 || c == 0xA0 || c > 0xEF) 2925 if (c == 0x80 || c == 0xA0 || c > 0xEF)
2926 return 0; 2926 return 0;
2927 if (c <= 0x9F || c >= 0xE0) 2927 if (c <= 0x9F || c >= 0xE0)
2928 { 2928 {
2929 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 2929 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0);
2930 if (c < 0x40 || c == 0x7F || c > 0xFC) 2930 if (c < 0x40 || c == 0x7F || c > 0xFC)
2931 return 0; 2931 return 0;
2932 } 2932 }
2933 } 2933 }
2934 label_end_of_loop:
2935 return CODING_CATEGORY_MASK_SJIS;
2936} 2934}
2937 2935
2938/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 2936/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
@@ -2951,17 +2949,15 @@ detect_coding_big5 (src, src_end, multibytep)
2951 2949
2952 while (1) 2950 while (1)
2953 { 2951 {
2954 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 2952 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_BIG5);
2955 if (c < 0x80) 2953 if (c < 0x80)
2956 continue; 2954 continue;
2957 if (c < 0xA1 || c > 0xFE) 2955 if (c < 0xA1 || c > 0xFE)
2958 return 0; 2956 return 0;
2959 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 2957 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0);
2960 if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE) 2958 if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE)
2961 return 0; 2959 return 0;
2962 } 2960 }
2963 label_end_of_loop:
2964 return CODING_CATEGORY_MASK_BIG5;
2965} 2961}
2966 2962
2967/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 2963/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
@@ -2989,7 +2985,7 @@ detect_coding_utf_8 (src, src_end, multibytep)
2989 2985
2990 while (1) 2986 while (1)
2991 { 2987 {
2992 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 2988 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_UTF_8);
2993 if (UTF_8_1_OCTET_P (c)) 2989 if (UTF_8_1_OCTET_P (c))
2994 continue; 2990 continue;
2995 else if (UTF_8_2_OCTET_LEADING_P (c)) 2991 else if (UTF_8_2_OCTET_LEADING_P (c))
@@ -3007,16 +3003,13 @@ detect_coding_utf_8 (src, src_end, multibytep)
3007 3003
3008 do 3004 do
3009 { 3005 {
3010 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 3006 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0);
3011 if (!UTF_8_EXTRA_OCTET_P (c)) 3007 if (!UTF_8_EXTRA_OCTET_P (c))
3012 return 0; 3008 return 0;
3013 seq_maybe_bytes--; 3009 seq_maybe_bytes--;
3014 } 3010 }
3015 while (seq_maybe_bytes > 0); 3011 while (seq_maybe_bytes > 0);
3016 } 3012 }
3017
3018 label_end_of_loop:
3019 return CODING_CATEGORY_MASK_UTF_8;
3020} 3013}
3021 3014
3022/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 3015/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
@@ -3045,15 +3038,13 @@ detect_coding_utf_16 (src, src_end, multibytep)
3045 struct coding_system dummy_coding; 3038 struct coding_system dummy_coding;
3046 struct coding_system *coding = &dummy_coding; 3039 struct coding_system *coding = &dummy_coding;
3047 3040
3048 ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep); 3041 ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep, 0);
3049 ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep); 3042 ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep, 0);
3050 3043
3051 if ((c1 == 0xFF) && (c2 == 0xFE)) 3044 if ((c1 == 0xFF) && (c2 == 0xFE))
3052 return CODING_CATEGORY_MASK_UTF_16_LE; 3045 return CODING_CATEGORY_MASK_UTF_16_LE;
3053 else if ((c1 == 0xFE) && (c2 == 0xFF)) 3046 else if ((c1 == 0xFE) && (c2 == 0xFF))
3054 return CODING_CATEGORY_MASK_UTF_16_BE; 3047 return CODING_CATEGORY_MASK_UTF_16_BE;
3055
3056 label_end_of_loop:
3057 return 0; 3048 return 0;
3058} 3049}
3059 3050
@@ -3322,12 +3313,10 @@ detect_coding_ccl (src, src_end, multibytep)
3322 valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes; 3313 valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
3323 while (1) 3314 while (1)
3324 { 3315 {
3325 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 3316 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_CCL);
3326 if (! valid[c]) 3317 if (! valid[c])
3327 return 0; 3318 return 0;
3328 } 3319 }
3329 label_end_of_loop:
3330 return CODING_CATEGORY_MASK_CCL;
3331} 3320}
3332 3321
3333 3322