diff options
| author | Kenichi Handa | 2000-12-13 23:24:37 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2000-12-13 23:24:37 +0000 |
| commit | 0a28aafbbc877fa133a5028920aba3d6ad981402 (patch) | |
| tree | 0fcace4238886e79cdc98b5f9d3a65ff99d2c771 /src | |
| parent | 0544ef495833a49404cf63d11f95e715daebb8ff (diff) | |
| download | emacs-0a28aafbbc877fa133a5028920aba3d6ad981402.tar.gz emacs-0a28aafbbc877fa133a5028920aba3d6ad981402.zip | |
(ONE_MORE_BYTE_CHECK_MULTIBYTE): New macro.
(detect_coding_emacs_mule, detect_coding_iso2022,)
(detect_coding_sjis, detect_coding_big5, detect_coding_utf_8)
(detect_coding_utf_16, detect_coding_ccl): Make them static. New
argument MULTIBYTEP. Callers changed.
(detect_coding_mask, detect_coding_system): New argument
MULTIBYTEP. Callers changed.
Diffstat (limited to 'src')
| -rw-r--r-- | src/coding.c | 140 |
1 files changed, 87 insertions, 53 deletions
diff --git a/src/coding.c b/src/coding.c index 62bb2223d22..44647abaf13 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -112,11 +112,13 @@ Boston, MA 02111-1307, USA. */ | |||
| 112 | in the coding system category XXX. Each returns an integer value in | 112 | in the coding system category XXX. Each returns an integer value in |
| 113 | which appropriate flag bits for the category XXX is set. The flag | 113 | which appropriate flag bits for the category XXX is set. The flag |
| 114 | bits are defined in macros CODING_CATEGORY_MASK_XXX. Below is the | 114 | bits are defined in macros CODING_CATEGORY_MASK_XXX. Below is the |
| 115 | template of these functions. */ | 115 | template of these functions. If MULTIBYTEP is nonzero, 8-bit codes |
| 116 | of the range 0x80..0x9F are in multibyte form. */ | ||
| 116 | #if 0 | 117 | #if 0 |
| 117 | int | 118 | int |
| 118 | detect_coding_emacs_mule (src, src_end) | 119 | detect_coding_emacs_mule (src, src_end, multibytep) |
| 119 | unsigned char *src, *src_end; | 120 | unsigned char *src, *src_end; |
| 121 | int multibytep; | ||
| 120 | { | 122 | { |
| 121 | ... | 123 | ... |
| 122 | } | 124 | } |
| @@ -210,6 +212,21 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) | |||
| 210 | } while (0) | 212 | } while (0) |
| 211 | 213 | ||
| 212 | 214 | ||
| 215 | /* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte | ||
| 216 | form if MULTIBYTEP is nonzero. */ | ||
| 217 | |||
| 218 | #define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep) \ | ||
| 219 | do { \ | ||
| 220 | if (src >= src_end) \ | ||
| 221 | { \ | ||
| 222 | coding->result = CODING_FINISH_INSUFFICIENT_SRC; \ | ||
| 223 | goto label_end_of_loop; \ | ||
| 224 | } \ | ||
| 225 | c1 = *src++; \ | ||
| 226 | if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL) \ | ||
| 227 | c1 = *src++ - 0x20; \ | ||
| 228 | } while (0) | ||
| 229 | |||
| 213 | /* Set C to the next character at the source text pointed by `src'. | 230 | /* Set C to the next character at the source text pointed by `src'. |
| 214 | If there are not enough characters in the source, jump to | 231 | If there are not enough characters in the source, jump to |
| 215 | `label_end_of_loop'. The caller should set variables `coding' | 232 | `label_end_of_loop'. The caller should set variables `coding' |
| @@ -536,9 +553,10 @@ enum emacs_code_class_type emacs_code_class[256]; | |||
| 536 | Check if a text is encoded in Emacs' internal format. If it is, | 553 | Check if a text is encoded in Emacs' internal format. If it is, |
| 537 | return CODING_CATEGORY_MASK_EMACS_MULE, else return 0. */ | 554 | return CODING_CATEGORY_MASK_EMACS_MULE, else return 0. */ |
| 538 | 555 | ||
| 539 | int | 556 | static int |
| 540 | detect_coding_emacs_mule (src, src_end) | 557 | detect_coding_emacs_mule (src, src_end, multibytep) |
| 541 | unsigned char *src, *src_end; | 558 | unsigned char *src, *src_end; |
| 559 | int multibytep; | ||
| 542 | { | 560 | { |
| 543 | unsigned char c; | 561 | unsigned char c; |
| 544 | int composing = 0; | 562 | int composing = 0; |
| @@ -548,7 +566,7 @@ detect_coding_emacs_mule (src, src_end) | |||
| 548 | 566 | ||
| 549 | while (1) | 567 | while (1) |
| 550 | { | 568 | { |
| 551 | ONE_MORE_BYTE (c); | 569 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); |
| 552 | 570 | ||
| 553 | if (composing) | 571 | if (composing) |
| 554 | { | 572 | { |
| @@ -556,7 +574,7 @@ detect_coding_emacs_mule (src, src_end) | |||
| 556 | composing = 0; | 574 | composing = 0; |
| 557 | else if (c == 0xA0) | 575 | else if (c == 0xA0) |
| 558 | { | 576 | { |
| 559 | ONE_MORE_BYTE (c); | 577 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); |
| 560 | c &= 0x7F; | 578 | c &= 0x7F; |
| 561 | } | 579 | } |
| 562 | else | 580 | else |
| @@ -881,9 +899,10 @@ enum iso_code_class_type iso_code_class[256]; | |||
| 881 | are set. If a code which should never appear in ISO2022 is found, | 899 | are set. If a code which should never appear in ISO2022 is found, |
| 882 | returns 0. */ | 900 | returns 0. */ |
| 883 | 901 | ||
| 884 | int | 902 | static int |
| 885 | detect_coding_iso2022 (src, src_end) | 903 | detect_coding_iso2022 (src, src_end, multibytep) |
| 886 | unsigned char *src, *src_end; | 904 | unsigned char *src, *src_end; |
| 905 | int multibytep; | ||
| 887 | { | 906 | { |
| 888 | int mask = CODING_CATEGORY_MASK_ISO; | 907 | int mask = CODING_CATEGORY_MASK_ISO; |
| 889 | int mask_found = 0; | 908 | int mask_found = 0; |
| @@ -897,18 +916,18 @@ detect_coding_iso2022 (src, src_end) | |||
| 897 | reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1; | 916 | reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1; |
| 898 | while (mask && src < src_end) | 917 | while (mask && src < src_end) |
| 899 | { | 918 | { |
| 900 | ONE_MORE_BYTE (c); | 919 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); |
| 901 | switch (c) | 920 | switch (c) |
| 902 | { | 921 | { |
| 903 | case ISO_CODE_ESC: | 922 | case ISO_CODE_ESC: |
| 904 | if (inhibit_iso_escape_detection) | 923 | if (inhibit_iso_escape_detection) |
| 905 | break; | 924 | break; |
| 906 | single_shifting = 0; | 925 | single_shifting = 0; |
| 907 | ONE_MORE_BYTE (c); | 926 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); |
| 908 | if (c >= '(' && c <= '/') | 927 | if (c >= '(' && c <= '/') |
| 909 | { | 928 | { |
| 910 | /* Designation sequence for a charset of dimension 1. */ | 929 | /* Designation sequence for a charset of dimension 1. */ |
| 911 | ONE_MORE_BYTE (c1); | 930 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep); |
| 912 | if (c1 < ' ' || c1 >= 0x80 | 931 | if (c1 < ' ' || c1 >= 0x80 |
| 913 | || (charset = iso_charset_table[0][c >= ','][c1]) < 0) | 932 | || (charset = iso_charset_table[0][c >= ','][c1]) < 0) |
| 914 | /* Invalid designation sequence. Just ignore. */ | 933 | /* Invalid designation sequence. Just ignore. */ |
| @@ -918,13 +937,13 @@ detect_coding_iso2022 (src, src_end) | |||
| 918 | else if (c == '$') | 937 | else if (c == '$') |
| 919 | { | 938 | { |
| 920 | /* Designation sequence for a charset of dimension 2. */ | 939 | /* Designation sequence for a charset of dimension 2. */ |
| 921 | ONE_MORE_BYTE (c); | 940 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); |
| 922 | if (c >= '@' && c <= 'B') | 941 | if (c >= '@' && c <= 'B') |
| 923 | /* Designation for JISX0208.1978, GB2312, or JISX0208. */ | 942 | /* Designation for JISX0208.1978, GB2312, or JISX0208. */ |
| 924 | reg[0] = charset = iso_charset_table[1][0][c]; | 943 | reg[0] = charset = iso_charset_table[1][0][c]; |
| 925 | else if (c >= '(' && c <= '/') | 944 | else if (c >= '(' && c <= '/') |
| 926 | { | 945 | { |
| 927 | ONE_MORE_BYTE (c1); | 946 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep); |
| 928 | if (c1 < ' ' || c1 >= 0x80 | 947 | if (c1 < ' ' || c1 >= 0x80 |
| 929 | || (charset = iso_charset_table[1][c >= ','][c1]) < 0) | 948 | || (charset = iso_charset_table[1][c >= ','][c1]) < 0) |
| 930 | /* Invalid designation sequence. Just ignore. */ | 949 | /* Invalid designation sequence. Just ignore. */ |
| @@ -1074,7 +1093,7 @@ detect_coding_iso2022 (src, src_end) | |||
| 1074 | int i = 1; | 1093 | int i = 1; |
| 1075 | while (src < src_end) | 1094 | while (src < src_end) |
| 1076 | { | 1095 | { |
| 1077 | ONE_MORE_BYTE (c); | 1096 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); |
| 1078 | if (c < 0xA0) | 1097 | if (c < 0xA0) |
| 1079 | break; | 1098 | break; |
| 1080 | i++; | 1099 | i++; |
| @@ -2292,9 +2311,10 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 2292 | Check if a text is encoded in SJIS. If it is, return | 2311 | Check if a text is encoded in SJIS. If it is, return |
| 2293 | CODING_CATEGORY_MASK_SJIS, else return 0. */ | 2312 | CODING_CATEGORY_MASK_SJIS, else return 0. */ |
| 2294 | 2313 | ||
| 2295 | int | 2314 | static int |
| 2296 | detect_coding_sjis (src, src_end) | 2315 | detect_coding_sjis (src, src_end, multibytep) |
| 2297 | unsigned char *src, *src_end; | 2316 | unsigned char *src, *src_end; |
| 2317 | int multibytep; | ||
| 2298 | { | 2318 | { |
| 2299 | int c; | 2319 | int c; |
| 2300 | /* Dummy for ONE_MORE_BYTE. */ | 2320 | /* Dummy for ONE_MORE_BYTE. */ |
| @@ -2303,12 +2323,12 @@ detect_coding_sjis (src, src_end) | |||
| 2303 | 2323 | ||
| 2304 | while (1) | 2324 | while (1) |
| 2305 | { | 2325 | { |
| 2306 | ONE_MORE_BYTE (c); | 2326 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); |
| 2307 | if (c >= 0x81) | 2327 | if (c >= 0x81) |
| 2308 | { | 2328 | { |
| 2309 | if (c <= 0x9F || (c >= 0xE0 && c <= 0xEF)) | 2329 | if (c <= 0x9F || (c >= 0xE0 && c <= 0xEF)) |
| 2310 | { | 2330 | { |
| 2311 | ONE_MORE_BYTE (c); | 2331 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); |
| 2312 | if (c < 0x40 || c == 0x7F || c > 0xFC) | 2332 | if (c < 0x40 || c == 0x7F || c > 0xFC) |
| 2313 | return 0; | 2333 | return 0; |
| 2314 | } | 2334 | } |
| @@ -2324,9 +2344,10 @@ detect_coding_sjis (src, src_end) | |||
| 2324 | Check if a text is encoded in BIG5. If it is, return | 2344 | Check if a text is encoded in BIG5. If it is, return |
| 2325 | CODING_CATEGORY_MASK_BIG5, else return 0. */ | 2345 | CODING_CATEGORY_MASK_BIG5, else return 0. */ |
| 2326 | 2346 | ||
| 2327 | int | 2347 | static int |
| 2328 | detect_coding_big5 (src, src_end) | 2348 | detect_coding_big5 (src, src_end, multibytep) |
| 2329 | unsigned char *src, *src_end; | 2349 | unsigned char *src, *src_end; |
| 2350 | int multibytep; | ||
| 2330 | { | 2351 | { |
| 2331 | int c; | 2352 | int c; |
| 2332 | /* Dummy for ONE_MORE_BYTE. */ | 2353 | /* Dummy for ONE_MORE_BYTE. */ |
| @@ -2335,10 +2356,10 @@ detect_coding_big5 (src, src_end) | |||
| 2335 | 2356 | ||
| 2336 | while (1) | 2357 | while (1) |
| 2337 | { | 2358 | { |
| 2338 | ONE_MORE_BYTE (c); | 2359 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); |
| 2339 | if (c >= 0xA1) | 2360 | if (c >= 0xA1) |
| 2340 | { | 2361 | { |
| 2341 | ONE_MORE_BYTE (c); | 2362 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); |
| 2342 | if (c < 0x40 || (c >= 0x7F && c <= 0xA0)) | 2363 | if (c < 0x40 || (c >= 0x7F && c <= 0xA0)) |
| 2343 | return 0; | 2364 | return 0; |
| 2344 | } | 2365 | } |
| @@ -2359,9 +2380,10 @@ detect_coding_big5 (src, src_end) | |||
| 2359 | #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8) | 2380 | #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8) |
| 2360 | #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC) | 2381 | #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC) |
| 2361 | 2382 | ||
| 2362 | int | 2383 | static int |
| 2363 | detect_coding_utf_8 (src, src_end) | 2384 | detect_coding_utf_8 (src, src_end, multibytep) |
| 2364 | unsigned char *src, *src_end; | 2385 | unsigned char *src, *src_end; |
| 2386 | int multibytep; | ||
| 2365 | { | 2387 | { |
| 2366 | unsigned char c; | 2388 | unsigned char c; |
| 2367 | int seq_maybe_bytes; | 2389 | int seq_maybe_bytes; |
| @@ -2371,7 +2393,7 @@ detect_coding_utf_8 (src, src_end) | |||
| 2371 | 2393 | ||
| 2372 | while (1) | 2394 | while (1) |
| 2373 | { | 2395 | { |
| 2374 | ONE_MORE_BYTE (c); | 2396 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); |
| 2375 | if (UTF_8_1_OCTET_P (c)) | 2397 | if (UTF_8_1_OCTET_P (c)) |
| 2376 | continue; | 2398 | continue; |
| 2377 | else if (UTF_8_2_OCTET_LEADING_P (c)) | 2399 | else if (UTF_8_2_OCTET_LEADING_P (c)) |
| @@ -2389,7 +2411,7 @@ detect_coding_utf_8 (src, src_end) | |||
| 2389 | 2411 | ||
| 2390 | do | 2412 | do |
| 2391 | { | 2413 | { |
| 2392 | ONE_MORE_BYTE (c); | 2414 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); |
| 2393 | if (!UTF_8_EXTRA_OCTET_P (c)) | 2415 | if (!UTF_8_EXTRA_OCTET_P (c)) |
| 2394 | return 0; | 2416 | return 0; |
| 2395 | seq_maybe_bytes--; | 2417 | seq_maybe_bytes--; |
| @@ -2417,16 +2439,18 @@ detect_coding_utf_8 (src, src_end) | |||
| 2417 | #define UTF_16_LOW_SURROGATE_P(val) \ | 2439 | #define UTF_16_LOW_SURROGATE_P(val) \ |
| 2418 | (((val) & 0xDC00) == 0xDC00) | 2440 | (((val) & 0xDC00) == 0xDC00) |
| 2419 | 2441 | ||
| 2420 | int | 2442 | static int |
| 2421 | detect_coding_utf_16 (src, src_end) | 2443 | detect_coding_utf_16 (src, src_end, multibytep) |
| 2422 | unsigned char *src, *src_end; | 2444 | unsigned char *src, *src_end; |
| 2445 | int multibytep; | ||
| 2423 | { | 2446 | { |
| 2424 | unsigned char c1, c2; | 2447 | unsigned char c1, c2; |
| 2425 | /* Dummy for TWO_MORE_BYTES. */ | 2448 | /* Dummy for TWO_MORE_BYTES. */ |
| 2426 | struct coding_system dummy_coding; | 2449 | struct coding_system dummy_coding; |
| 2427 | struct coding_system *coding = &dummy_coding; | 2450 | struct coding_system *coding = &dummy_coding; |
| 2428 | 2451 | ||
| 2429 | TWO_MORE_BYTES (c1, c2); | 2452 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep); |
| 2453 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep); | ||
| 2430 | 2454 | ||
| 2431 | if ((c1 == 0xFF) && (c2 == 0xFE)) | 2455 | if ((c1 == 0xFF) && (c2 == 0xFE)) |
| 2432 | return CODING_CATEGORY_MASK_UTF_16_LE; | 2456 | return CODING_CATEGORY_MASK_UTF_16_LE; |
| @@ -2677,9 +2701,10 @@ encode_coding_sjis_big5 (coding, source, destination, | |||
| 2677 | encoder/decoder are written in CCL program. If it is, return | 2701 | encoder/decoder are written in CCL program. If it is, return |
| 2678 | CODING_CATEGORY_MASK_CCL, else return 0. */ | 2702 | CODING_CATEGORY_MASK_CCL, else return 0. */ |
| 2679 | 2703 | ||
| 2680 | int | 2704 | static int |
| 2681 | detect_coding_ccl (src, src_end) | 2705 | detect_coding_ccl (src, src_end, multibytep) |
| 2682 | unsigned char *src, *src_end; | 2706 | unsigned char *src, *src_end; |
| 2707 | int multibytep; | ||
| 2683 | { | 2708 | { |
| 2684 | unsigned char *valid; | 2709 | unsigned char *valid; |
| 2685 | int c; | 2710 | int c; |
| @@ -2694,7 +2719,7 @@ detect_coding_ccl (src, src_end) | |||
| 2694 | valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes; | 2719 | valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes; |
| 2695 | while (1) | 2720 | while (1) |
| 2696 | { | 2721 | { |
| 2697 | ONE_MORE_BYTE (c); | 2722 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); |
| 2698 | if (! valid[c]) | 2723 | if (! valid[c]) |
| 2699 | return 0; | 2724 | return 0; |
| 2700 | } | 2725 | } |
| @@ -3484,14 +3509,16 @@ int ascii_skip_code[256]; | |||
| 3484 | CODING_CATEGORY_MASK_XXX in `coding.h'. If PRIORITIES is non-NULL, | 3509 | CODING_CATEGORY_MASK_XXX in `coding.h'. If PRIORITIES is non-NULL, |
| 3485 | it should point the table `coding_priorities'. In that case, only | 3510 | it should point the table `coding_priorities'. In that case, only |
| 3486 | the flag bit for a coding system of the highest priority is set in | 3511 | the flag bit for a coding system of the highest priority is set in |
| 3487 | the returned value. | 3512 | the returned value. If MULTIBYTEP is nonzero, 8-bit codes of the |
| 3513 | range 0x80..0x9F are in multibyte form. | ||
| 3488 | 3514 | ||
| 3489 | How many ASCII characters are at the head is returned as *SKIP. */ | 3515 | How many ASCII characters are at the head is returned as *SKIP. */ |
| 3490 | 3516 | ||
| 3491 | static int | 3517 | static int |
| 3492 | detect_coding_mask (source, src_bytes, priorities, skip) | 3518 | detect_coding_mask (source, src_bytes, priorities, skip, multibytep) |
| 3493 | unsigned char *source; | 3519 | unsigned char *source; |
| 3494 | int src_bytes, *priorities, *skip; | 3520 | int src_bytes, *priorities, *skip; |
| 3521 | int multibytep; | ||
| 3495 | { | 3522 | { |
| 3496 | register unsigned char c; | 3523 | register unsigned char c; |
| 3497 | unsigned char *src = source, *src_end = source + src_bytes; | 3524 | unsigned char *src = source, *src_end = source + src_bytes; |
| @@ -3519,7 +3546,7 @@ detect_coding_mask (source, src_bytes, priorities, skip) | |||
| 3519 | { | 3546 | { |
| 3520 | /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */ | 3547 | /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */ |
| 3521 | /* C is an ISO2022 specific control code of C0. */ | 3548 | /* C is an ISO2022 specific control code of C0. */ |
| 3522 | mask = detect_coding_iso2022 (src, src_end); | 3549 | mask = detect_coding_iso2022 (src, src_end, multibytep); |
| 3523 | if (mask == 0) | 3550 | if (mask == 0) |
| 3524 | { | 3551 | { |
| 3525 | /* No valid ISO2022 code follows C. Try again. */ | 3552 | /* No valid ISO2022 code follows C. Try again. */ |
| @@ -3544,6 +3571,9 @@ detect_coding_mask (source, src_bytes, priorities, skip) | |||
| 3544 | { | 3571 | { |
| 3545 | int try; | 3572 | int try; |
| 3546 | 3573 | ||
| 3574 | if (multibytep && c == LEADING_CODE_8_BIT_CONTROL) | ||
| 3575 | c = *src++ - 0x20; | ||
| 3576 | |||
| 3547 | if (c < 0xA0) | 3577 | if (c < 0xA0) |
| 3548 | { | 3578 | { |
| 3549 | /* C is the first byte of SJIS character code, | 3579 | /* C is the first byte of SJIS character code, |
| @@ -3602,22 +3632,22 @@ detect_coding_mask (source, src_bytes, priorities, skip) | |||
| 3602 | iso2022_examined_p = 1; | 3632 | iso2022_examined_p = 1; |
| 3603 | } | 3633 | } |
| 3604 | else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS) | 3634 | else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS) |
| 3605 | mask |= detect_coding_sjis (src, src_end); | 3635 | mask |= detect_coding_sjis (src, src_end, multibytep); |
| 3606 | else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8) | 3636 | else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8) |
| 3607 | mask |= detect_coding_utf_8 (src, src_end); | 3637 | mask |= detect_coding_utf_8 (src, src_end, multibytep); |
| 3608 | else if (!utf16_examined_p | 3638 | else if (!utf16_examined_p |
| 3609 | && (priorities[i] & try & | 3639 | && (priorities[i] & try & |
| 3610 | CODING_CATEGORY_MASK_UTF_16_BE_LE)) | 3640 | CODING_CATEGORY_MASK_UTF_16_BE_LE)) |
| 3611 | { | 3641 | { |
| 3612 | mask |= detect_coding_utf_16 (src, src_end); | 3642 | mask |= detect_coding_utf_16 (src, src_end, multibytep); |
| 3613 | utf16_examined_p = 1; | 3643 | utf16_examined_p = 1; |
| 3614 | } | 3644 | } |
| 3615 | else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5) | 3645 | else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5) |
| 3616 | mask |= detect_coding_big5 (src, src_end); | 3646 | mask |= detect_coding_big5 (src, src_end, multibytep); |
| 3617 | else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE) | 3647 | else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE) |
| 3618 | mask |= detect_coding_emacs_mule (src, src_end); | 3648 | mask |= detect_coding_emacs_mule (src, src_end, multibytep); |
| 3619 | else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL) | 3649 | else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL) |
| 3620 | mask |= detect_coding_ccl (src, src_end); | 3650 | mask |= detect_coding_ccl (src, src_end, multibytep); |
| 3621 | else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT) | 3651 | else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT) |
| 3622 | mask |= CODING_CATEGORY_MASK_RAW_TEXT; | 3652 | mask |= CODING_CATEGORY_MASK_RAW_TEXT; |
| 3623 | else if (priorities[i] & CODING_CATEGORY_MASK_BINARY) | 3653 | else if (priorities[i] & CODING_CATEGORY_MASK_BINARY) |
| @@ -3628,19 +3658,19 @@ detect_coding_mask (source, src_bytes, priorities, skip) | |||
| 3628 | return CODING_CATEGORY_MASK_RAW_TEXT; | 3658 | return CODING_CATEGORY_MASK_RAW_TEXT; |
| 3629 | } | 3659 | } |
| 3630 | if (try & CODING_CATEGORY_MASK_ISO) | 3660 | if (try & CODING_CATEGORY_MASK_ISO) |
| 3631 | mask |= detect_coding_iso2022 (src, src_end); | 3661 | mask |= detect_coding_iso2022 (src, src_end, multibytep); |
| 3632 | if (try & CODING_CATEGORY_MASK_SJIS) | 3662 | if (try & CODING_CATEGORY_MASK_SJIS) |
| 3633 | mask |= detect_coding_sjis (src, src_end); | 3663 | mask |= detect_coding_sjis (src, src_end, multibytep); |
| 3634 | if (try & CODING_CATEGORY_MASK_BIG5) | 3664 | if (try & CODING_CATEGORY_MASK_BIG5) |
| 3635 | mask |= detect_coding_big5 (src, src_end); | 3665 | mask |= detect_coding_big5 (src, src_end, multibytep); |
| 3636 | if (try & CODING_CATEGORY_MASK_UTF_8) | 3666 | if (try & CODING_CATEGORY_MASK_UTF_8) |
| 3637 | mask |= detect_coding_utf_8 (src, src_end); | 3667 | mask |= detect_coding_utf_8 (src, src_end, multibytep); |
| 3638 | if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE) | 3668 | if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE) |
| 3639 | mask |= detect_coding_utf_16 (src, src_end); | 3669 | mask |= detect_coding_utf_16 (src, src_end, multibytep); |
| 3640 | if (try & CODING_CATEGORY_MASK_EMACS_MULE) | 3670 | if (try & CODING_CATEGORY_MASK_EMACS_MULE) |
| 3641 | mask |= detect_coding_emacs_mule (src, src_end); | 3671 | mask |= detect_coding_emacs_mule (src, src_end, multibytep); |
| 3642 | if (try & CODING_CATEGORY_MASK_CCL) | 3672 | if (try & CODING_CATEGORY_MASK_CCL) |
| 3643 | mask |= detect_coding_ccl (src, src_end); | 3673 | mask |= detect_coding_ccl (src, src_end, multibytep); |
| 3644 | } | 3674 | } |
| 3645 | return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY); | 3675 | return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY); |
| 3646 | } | 3676 | } |
| @@ -3659,7 +3689,7 @@ detect_coding (coding, src, src_bytes) | |||
| 3659 | Lisp_Object val; | 3689 | Lisp_Object val; |
| 3660 | 3690 | ||
| 3661 | val = Vcoding_category_list; | 3691 | val = Vcoding_category_list; |
| 3662 | mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip); | 3692 | mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip, 0); |
| 3663 | coding->heading_ascii = skip; | 3693 | coding->heading_ascii = skip; |
| 3664 | 3694 | ||
| 3665 | if (!mask) return; | 3695 | if (!mask) return; |
| @@ -5607,15 +5637,16 @@ The value of property should be a vector of length 5.") | |||
| 5607 | } | 5637 | } |
| 5608 | 5638 | ||
| 5609 | Lisp_Object | 5639 | Lisp_Object |
| 5610 | detect_coding_system (src, src_bytes, highest) | 5640 | detect_coding_system (src, src_bytes, highest, multibytep) |
| 5611 | unsigned char *src; | 5641 | unsigned char *src; |
| 5612 | int src_bytes, highest; | 5642 | int src_bytes, highest; |
| 5643 | int multibytep; | ||
| 5613 | { | 5644 | { |
| 5614 | int coding_mask, eol_type; | 5645 | int coding_mask, eol_type; |
| 5615 | Lisp_Object val, tmp; | 5646 | Lisp_Object val, tmp; |
| 5616 | int dummy; | 5647 | int dummy; |
| 5617 | 5648 | ||
| 5618 | coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy); | 5649 | coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy, multibytep); |
| 5619 | eol_type = detect_eol_type (src, src_bytes, &dummy); | 5650 | eol_type = detect_eol_type (src, src_bytes, &dummy); |
| 5620 | if (eol_type == CODING_EOL_INCONSISTENT) | 5651 | if (eol_type == CODING_EOL_INCONSISTENT) |
| 5621 | eol_type = CODING_EOL_UNDECIDED; | 5652 | eol_type = CODING_EOL_UNDECIDED; |
| @@ -5698,7 +5729,9 @@ highest priority.") | |||
| 5698 | 5729 | ||
| 5699 | return detect_coding_system (BYTE_POS_ADDR (from_byte), | 5730 | return detect_coding_system (BYTE_POS_ADDR (from_byte), |
| 5700 | to_byte - from_byte, | 5731 | to_byte - from_byte, |
| 5701 | !NILP (highest)); | 5732 | !NILP (highest), |
| 5733 | !NILP (current_buffer | ||
| 5734 | ->enable_multibyte_characters)); | ||
| 5702 | } | 5735 | } |
| 5703 | 5736 | ||
| 5704 | DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string, | 5737 | DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string, |
| @@ -5719,7 +5752,8 @@ highest priority.") | |||
| 5719 | 5752 | ||
| 5720 | return detect_coding_system (XSTRING (string)->data, | 5753 | return detect_coding_system (XSTRING (string)->data, |
| 5721 | STRING_BYTES (XSTRING (string)), | 5754 | STRING_BYTES (XSTRING (string)), |
| 5722 | !NILP (highest)); | 5755 | !NILP (highest), |
| 5756 | STRING_MULTIBYTE (string)); | ||
| 5723 | } | 5757 | } |
| 5724 | 5758 | ||
| 5725 | /* Return an intersection of lists L1 and L2. */ | 5759 | /* Return an intersection of lists L1 and L2. */ |