diff options
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 71 |
1 files changed, 32 insertions, 39 deletions
diff --git a/src/coding.c b/src/coding.c index 5b067b9dd2f..53f37c580de 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -219,14 +219,15 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) | |||
| 219 | 219 | ||
| 220 | 220 | ||
| 221 | /* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte | 221 | /* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte |
| 222 | form if MULTIBYTEP is nonzero. */ | 222 | form if MULTIBYTEP is nonzero. In addition, if SRC is not less |
| 223 | than SRC_END, return with RET. */ | ||
| 223 | 224 | ||
| 224 | #define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep) \ | 225 | #define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep, ret) \ |
| 225 | do { \ | 226 | do { \ |
| 226 | if (src >= src_end) \ | 227 | if (src >= src_end) \ |
| 227 | { \ | 228 | { \ |
| 228 | coding->result = CODING_FINISH_INSUFFICIENT_SRC; \ | 229 | coding->result = CODING_FINISH_INSUFFICIENT_SRC; \ |
| 229 | goto label_end_of_loop; \ | 230 | return ret; \ |
| 230 | } \ | 231 | } \ |
| 231 | c1 = *src++; \ | 232 | c1 = *src++; \ |
| 232 | if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL) \ | 233 | if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL) \ |
| @@ -628,15 +629,15 @@ detect_coding_emacs_mule (src, src_end, multibytep) | |||
| 628 | 629 | ||
| 629 | while (1) | 630 | while (1) |
| 630 | { | 631 | { |
| 631 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); | 632 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, |
| 632 | 633 | CODING_CATEGORY_MASK_EMACS_MULE); | |
| 633 | if (composing) | 634 | if (composing) |
| 634 | { | 635 | { |
| 635 | if (c < 0xA0) | 636 | if (c < 0xA0) |
| 636 | composing = 0; | 637 | composing = 0; |
| 637 | else if (c == 0xA0) | 638 | else if (c == 0xA0) |
| 638 | { | 639 | { |
| 639 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); | 640 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0); |
| 640 | c &= 0x7F; | 641 | c &= 0x7F; |
| 641 | } | 642 | } |
| 642 | else | 643 | else |
| @@ -665,8 +666,6 @@ detect_coding_emacs_mule (src, src_end, multibytep) | |||
| 665 | } | 666 | } |
| 666 | } | 667 | } |
| 667 | } | 668 | } |
| 668 | label_end_of_loop: | ||
| 669 | return CODING_CATEGORY_MASK_EMACS_MULE; | ||
| 670 | } | 669 | } |
| 671 | 670 | ||
| 672 | 671 | ||
| @@ -1421,9 +1420,9 @@ detect_coding_iso2022 (src, src_end, multibytep) | |||
| 1421 | Lisp_Object safe_chars; | 1420 | Lisp_Object safe_chars; |
| 1422 | 1421 | ||
| 1423 | reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1; | 1422 | reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1; |
| 1424 | while (mask && src < src_end) | 1423 | while (mask) |
| 1425 | { | 1424 | { |
| 1426 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); | 1425 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, mask & mask_found); |
| 1427 | retry: | 1426 | retry: |
| 1428 | switch (c) | 1427 | switch (c) |
| 1429 | { | 1428 | { |
| @@ -1431,11 +1430,11 @@ detect_coding_iso2022 (src, src_end, multibytep) | |||
| 1431 | if (inhibit_iso_escape_detection) | 1430 | if (inhibit_iso_escape_detection) |
| 1432 | break; | 1431 | break; |
| 1433 | single_shifting = 0; | 1432 | single_shifting = 0; |
| 1434 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); | 1433 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, mask & mask_found); |
| 1435 | if (c >= '(' && c <= '/') | 1434 | if (c >= '(' && c <= '/') |
| 1436 | { | 1435 | { |
| 1437 | /* Designation sequence for a charset of dimension 1. */ | 1436 | /* Designation sequence for a charset of dimension 1. */ |
| 1438 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep); | 1437 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep, mask & mask_found); |
| 1439 | if (c1 < ' ' || c1 >= 0x80 | 1438 | if (c1 < ' ' || c1 >= 0x80 |
| 1440 | || (charset = iso_charset_table[0][c >= ','][c1]) < 0) | 1439 | || (charset = iso_charset_table[0][c >= ','][c1]) < 0) |
| 1441 | /* Invalid designation sequence. Just ignore. */ | 1440 | /* Invalid designation sequence. Just ignore. */ |
| @@ -1445,13 +1444,14 @@ detect_coding_iso2022 (src, src_end, multibytep) | |||
| 1445 | else if (c == '$') | 1444 | else if (c == '$') |
| 1446 | { | 1445 | { |
| 1447 | /* Designation sequence for a charset of dimension 2. */ | 1446 | /* Designation sequence for a charset of dimension 2. */ |
| 1448 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); | 1447 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, mask & mask_found); |
| 1449 | if (c >= '@' && c <= 'B') | 1448 | if (c >= '@' && c <= 'B') |
| 1450 | /* Designation for JISX0208.1978, GB2312, or JISX0208. */ | 1449 | /* Designation for JISX0208.1978, GB2312, or JISX0208. */ |
| 1451 | reg[0] = charset = iso_charset_table[1][0][c]; | 1450 | reg[0] = charset = iso_charset_table[1][0][c]; |
| 1452 | else if (c >= '(' && c <= '/') | 1451 | else if (c >= '(' && c <= '/') |
| 1453 | { | 1452 | { |
| 1454 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep); | 1453 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep, |
| 1454 | mask & mask_found); | ||
| 1455 | if (c1 < ' ' || c1 >= 0x80 | 1455 | if (c1 < ' ' || c1 >= 0x80 |
| 1456 | || (charset = iso_charset_table[1][c >= ','][c1]) < 0) | 1456 | || (charset = iso_charset_table[1][c >= ','][c1]) < 0) |
| 1457 | /* Invalid designation sequence. Just ignore. */ | 1457 | /* Invalid designation sequence. Just ignore. */ |
| @@ -1626,7 +1626,8 @@ detect_coding_iso2022 (src, src_end, multibytep) | |||
| 1626 | c = -1; | 1626 | c = -1; |
| 1627 | while (src < src_end) | 1627 | while (src < src_end) |
| 1628 | { | 1628 | { |
| 1629 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); | 1629 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, |
| 1630 | mask & mask_found); | ||
| 1630 | if (c < 0xA0) | 1631 | if (c < 0xA0) |
| 1631 | break; | 1632 | break; |
| 1632 | i++; | 1633 | i++; |
| @@ -1644,7 +1645,6 @@ detect_coding_iso2022 (src, src_end, multibytep) | |||
| 1644 | break; | 1645 | break; |
| 1645 | } | 1646 | } |
| 1646 | } | 1647 | } |
| 1647 | label_end_of_loop: | ||
| 1648 | return (mask & mask_found); | 1648 | return (mask & mask_found); |
| 1649 | } | 1649 | } |
| 1650 | 1650 | ||
| @@ -2915,20 +2915,18 @@ detect_coding_sjis (src, src_end, multibytep) | |||
| 2915 | 2915 | ||
| 2916 | while (1) | 2916 | while (1) |
| 2917 | { | 2917 | { |
| 2918 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); | 2918 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_SJIS); |
| 2919 | if (c < 0x80) | 2919 | if (c < 0x80) |
| 2920 | continue; | 2920 | continue; |
| 2921 | if (c == 0x80 || c == 0xA0 || c > 0xEF) | 2921 | if (c == 0x80 || c == 0xA0 || c > 0xEF) |
| 2922 | return 0; | 2922 | return 0; |
| 2923 | if (c <= 0x9F || c >= 0xE0) | 2923 | if (c <= 0x9F || c >= 0xE0) |
| 2924 | { | 2924 | { |
| 2925 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); | 2925 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0); |
| 2926 | if (c < 0x40 || c == 0x7F || c > 0xFC) | 2926 | if (c < 0x40 || c == 0x7F || c > 0xFC) |
| 2927 | return 0; | 2927 | return 0; |
| 2928 | } | 2928 | } |
| 2929 | } | 2929 | } |
| 2930 | label_end_of_loop: | ||
| 2931 | return CODING_CATEGORY_MASK_SJIS; | ||
| 2932 | } | 2930 | } |
| 2933 | 2931 | ||
| 2934 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 2932 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
| @@ -2947,17 +2945,15 @@ detect_coding_big5 (src, src_end, multibytep) | |||
| 2947 | 2945 | ||
| 2948 | while (1) | 2946 | while (1) |
| 2949 | { | 2947 | { |
| 2950 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); | 2948 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_BIG5); |
| 2951 | if (c < 0x80) | 2949 | if (c < 0x80) |
| 2952 | continue; | 2950 | continue; |
| 2953 | if (c < 0xA1 || c > 0xFE) | 2951 | if (c < 0xA1 || c > 0xFE) |
| 2954 | return 0; | 2952 | return 0; |
| 2955 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); | 2953 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0); |
| 2956 | if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE) | 2954 | if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE) |
| 2957 | return 0; | 2955 | return 0; |
| 2958 | } | 2956 | } |
| 2959 | label_end_of_loop: | ||
| 2960 | return CODING_CATEGORY_MASK_BIG5; | ||
| 2961 | } | 2957 | } |
| 2962 | 2958 | ||
| 2963 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 2959 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
| @@ -2985,7 +2981,7 @@ detect_coding_utf_8 (src, src_end, multibytep) | |||
| 2985 | 2981 | ||
| 2986 | while (1) | 2982 | while (1) |
| 2987 | { | 2983 | { |
| 2988 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); | 2984 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_UTF_8); |
| 2989 | if (UTF_8_1_OCTET_P (c)) | 2985 | if (UTF_8_1_OCTET_P (c)) |
| 2990 | continue; | 2986 | continue; |
| 2991 | else if (UTF_8_2_OCTET_LEADING_P (c)) | 2987 | else if (UTF_8_2_OCTET_LEADING_P (c)) |
| @@ -3003,16 +2999,13 @@ detect_coding_utf_8 (src, src_end, multibytep) | |||
| 3003 | 2999 | ||
| 3004 | do | 3000 | do |
| 3005 | { | 3001 | { |
| 3006 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); | 3002 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0); |
| 3007 | if (!UTF_8_EXTRA_OCTET_P (c)) | 3003 | if (!UTF_8_EXTRA_OCTET_P (c)) |
| 3008 | return 0; | 3004 | return 0; |
| 3009 | seq_maybe_bytes--; | 3005 | seq_maybe_bytes--; |
| 3010 | } | 3006 | } |
| 3011 | while (seq_maybe_bytes > 0); | 3007 | while (seq_maybe_bytes > 0); |
| 3012 | } | 3008 | } |
| 3013 | |||
| 3014 | label_end_of_loop: | ||
| 3015 | return CODING_CATEGORY_MASK_UTF_8; | ||
| 3016 | } | 3009 | } |
| 3017 | 3010 | ||
| 3018 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 3011 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
| @@ -3041,15 +3034,13 @@ detect_coding_utf_16 (src, src_end, multibytep) | |||
| 3041 | struct coding_system dummy_coding; | 3034 | struct coding_system dummy_coding; |
| 3042 | struct coding_system *coding = &dummy_coding; | 3035 | struct coding_system *coding = &dummy_coding; |
| 3043 | 3036 | ||
| 3044 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep); | 3037 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep, 0); |
| 3045 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep); | 3038 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep, 0); |
| 3046 | 3039 | ||
| 3047 | if ((c1 == 0xFF) && (c2 == 0xFE)) | 3040 | if ((c1 == 0xFF) && (c2 == 0xFE)) |
| 3048 | return CODING_CATEGORY_MASK_UTF_16_LE; | 3041 | return CODING_CATEGORY_MASK_UTF_16_LE; |
| 3049 | else if ((c1 == 0xFE) && (c2 == 0xFF)) | 3042 | else if ((c1 == 0xFE) && (c2 == 0xFF)) |
| 3050 | return CODING_CATEGORY_MASK_UTF_16_BE; | 3043 | return CODING_CATEGORY_MASK_UTF_16_BE; |
| 3051 | |||
| 3052 | label_end_of_loop: | ||
| 3053 | return 0; | 3044 | return 0; |
| 3054 | } | 3045 | } |
| 3055 | 3046 | ||
| @@ -3318,12 +3309,10 @@ detect_coding_ccl (src, src_end, multibytep) | |||
| 3318 | valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes; | 3309 | valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes; |
| 3319 | while (1) | 3310 | while (1) |
| 3320 | { | 3311 | { |
| 3321 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); | 3312 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_CCL); |
| 3322 | if (! valid[c]) | 3313 | if (! valid[c]) |
| 3323 | return 0; | 3314 | return 0; |
| 3324 | } | 3315 | } |
| 3325 | label_end_of_loop: | ||
| 3326 | return CODING_CATEGORY_MASK_CCL; | ||
| 3327 | } | 3316 | } |
| 3328 | 3317 | ||
| 3329 | 3318 | ||
| @@ -7547,7 +7536,10 @@ usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */) | |||
| 7547 | return Fcons (val, val); | 7536 | return Fcons (val, val); |
| 7548 | if (! NILP (Ffboundp (val))) | 7537 | if (! NILP (Ffboundp (val))) |
| 7549 | { | 7538 | { |
| 7550 | val = safe_call1 (val, Flist (nargs, args)); | 7539 | /* We use call1 rather than safe_call1 |
| 7540 | so as to get bug reports about functions called here | ||
| 7541 | which don't handle the current interface. */ | ||
| 7542 | val = call1 (val, Flist (nargs, args)); | ||
| 7551 | if (CONSP (val)) | 7543 | if (CONSP (val)) |
| 7552 | return val; | 7544 | return val; |
| 7553 | if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val))) | 7545 | if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val))) |
| @@ -7971,8 +7963,9 @@ the file contents. | |||
| 7971 | If VAL is a cons of coding systems, the car part is used for decoding, | 7963 | If VAL is a cons of coding systems, the car part is used for decoding, |
| 7972 | and the cdr part is used for encoding. | 7964 | and the cdr part is used for encoding. |
| 7973 | If VAL is a function symbol, the function must return a coding system | 7965 | If VAL is a function symbol, the function must return a coding system |
| 7974 | or a cons of coding systems which are used as above. The function gets | 7966 | or a cons of coding systems which are used as above. The function is |
| 7975 | the arguments with which `find-operation-coding-system' was called. | 7967 | called with an argument that is a list of the arguments with which |
| 7968 | `find-operation-coding-system' was called. | ||
| 7976 | 7969 | ||
| 7977 | See also the function `find-operation-coding-system' | 7970 | See also the function `find-operation-coding-system' |
| 7978 | and the variable `auto-coding-alist'. */); | 7971 | and the variable `auto-coding-alist'. */); |