diff options
| author | Kenichi Handa | 2000-03-07 06:17:54 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2000-03-07 06:17:54 +0000 |
| commit | fa42c37fe3359505d76a5e7416728f6802705363 (patch) | |
| tree | 9e30906fa4f3af5b04b5fa2435ff647e4dedcd35 /src/coding.c | |
| parent | 62537270d91ac38791353b1790730e1b6c18e1cf (diff) | |
| download | emacs-fa42c37fe3359505d76a5e7416728f6802705363.tar.gz emacs-fa42c37fe3359505d76a5e7416728f6802705363.zip | |
Add comments on coding-category-utf-8,
coding-category-utf-16-be, and coding-category-utf-16-le.
(coding_category_name): Include "coding-category-utf-8",
"coding-category-utf-16-be", and "coding-category-utf-16-le".
(UTF_8_1_OCTET_P) (UTF_8_EXTRA_OCTET_P) (UTF_8_2_OCTET_LEADING_P)
(UTF_8_3_OCTET_LEADING_P) (UTF_8_4_OCTET_LEADING_P)
(UTF_8_5_OCTET_LEADING_P) (UTF_8_6_OCTET_LEADING_P): New macros.
(detect_coding_utf_8): New function.
(UTF_16_INVALID_P) (TF_16_HIGH_SURROGATE_P)
(UTF_16_LOW_SURROGATE_P): New macros.
(detect_coding_utf_16): New function.
(detect_coding_mask): Fix bug of returning wrong mask bits in the
case that detect_coding_XXX returns a mask not set in
priorities[i].
(detect_eol_type_in_2_octet_form): New function.
(detect_eol): If cooding->category_idx is for UTF-16, call
detect_eol_type_in_2_octet_form instead of dectect_eol_type.
(detect_coding_system): Don't include `nil' coding-system in the
result.
(Fupdate_coding_systems_internal): Update all coding-categories.
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 299 |
1 files changed, 258 insertions, 41 deletions
diff --git a/src/coding.c b/src/coding.c index 40fe424fe05..fcd5e89e004 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -362,6 +362,9 @@ char *coding_category_name[CODING_CATEGORY_IDX_MAX] = { | |||
| 362 | "coding-category-iso-8-else", | 362 | "coding-category-iso-8-else", |
| 363 | "coding-category-ccl", | 363 | "coding-category-ccl", |
| 364 | "coding-category-big5", | 364 | "coding-category-big5", |
| 365 | "coding-category-utf-8", | ||
| 366 | "coding-category-utf-16-be", | ||
| 367 | "coding-category-utf-16-le", | ||
| 365 | "coding-category-raw-text", | 368 | "coding-category-raw-text", |
| 366 | "coding-category-binary" | 369 | "coding-category-binary" |
| 367 | }; | 370 | }; |
| @@ -2348,6 +2351,89 @@ detect_coding_big5 (src, src_end) | |||
| 2348 | return CODING_CATEGORY_MASK_BIG5; | 2351 | return CODING_CATEGORY_MASK_BIG5; |
| 2349 | } | 2352 | } |
| 2350 | 2353 | ||
| 2354 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | ||
| 2355 | Check if a text is encoded in UTF-8. If it is, return | ||
| 2356 | CODING_CATEGORY_MASK_UTF_8, else return 0. */ | ||
| 2357 | |||
| 2358 | #define UTF_8_1_OCTET_P(c) ((c) < 0x80) | ||
| 2359 | #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80) | ||
| 2360 | #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0) | ||
| 2361 | #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0) | ||
| 2362 | #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0) | ||
| 2363 | #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8) | ||
| 2364 | #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC) | ||
| 2365 | |||
| 2366 | int | ||
| 2367 | detect_coding_utf_8 (src, src_end) | ||
| 2368 | unsigned char *src, *src_end; | ||
| 2369 | { | ||
| 2370 | unsigned char c; | ||
| 2371 | int seq_maybe_bytes; | ||
| 2372 | |||
| 2373 | while (src < src_end) | ||
| 2374 | { | ||
| 2375 | c = *src++; | ||
| 2376 | if (UTF_8_1_OCTET_P (c)) | ||
| 2377 | continue; | ||
| 2378 | else if (UTF_8_2_OCTET_LEADING_P (c)) | ||
| 2379 | seq_maybe_bytes = 1; | ||
| 2380 | else if (UTF_8_3_OCTET_LEADING_P (c)) | ||
| 2381 | seq_maybe_bytes = 2; | ||
| 2382 | else if (UTF_8_4_OCTET_LEADING_P (c)) | ||
| 2383 | seq_maybe_bytes = 3; | ||
| 2384 | else if (UTF_8_5_OCTET_LEADING_P (c)) | ||
| 2385 | seq_maybe_bytes = 4; | ||
| 2386 | else if (UTF_8_6_OCTET_LEADING_P (c)) | ||
| 2387 | seq_maybe_bytes = 5; | ||
| 2388 | else | ||
| 2389 | return 0; | ||
| 2390 | |||
| 2391 | do | ||
| 2392 | { | ||
| 2393 | if (src >= src_end) | ||
| 2394 | return CODING_CATEGORY_MASK_UTF_8; | ||
| 2395 | |||
| 2396 | c = *src++; | ||
| 2397 | if (!UTF_8_EXTRA_OCTET_P (c)) | ||
| 2398 | return 0; | ||
| 2399 | seq_maybe_bytes--; | ||
| 2400 | } | ||
| 2401 | while (seq_maybe_bytes > 0); | ||
| 2402 | } | ||
| 2403 | |||
| 2404 | return CODING_CATEGORY_MASK_UTF_8; | ||
| 2405 | } | ||
| 2406 | |||
| 2407 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | ||
| 2408 | Check if a text is encoded in UTF-16 Big Endian (endian == 1) or | ||
| 2409 | Little Endian (otherwise). If it is, return | ||
| 2410 | CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE, | ||
| 2411 | else return 0. */ | ||
| 2412 | |||
| 2413 | #define UTF_16_INVALID_P(val) \ | ||
| 2414 | (((val) == 0xFFFE) \ | ||
| 2415 | || ((val) == 0xFFFF)) | ||
| 2416 | |||
| 2417 | #define UTF_16_HIGH_SURROGATE_P(val) \ | ||
| 2418 | (((val) & 0xD800) == 0xD800) | ||
| 2419 | |||
| 2420 | #define UTF_16_LOW_SURROGATE_P(val) \ | ||
| 2421 | (((val) & 0xDC00) == 0xDC00) | ||
| 2422 | |||
| 2423 | int | ||
| 2424 | detect_coding_utf_16 (src, src_end) | ||
| 2425 | unsigned char *src, *src_end; | ||
| 2426 | { | ||
| 2427 | if ((src + 1) >= src_end) return 0; | ||
| 2428 | |||
| 2429 | if ((src[0] == 0xFF) && (src[1] == 0xFE)) | ||
| 2430 | return CODING_CATEGORY_MASK_UTF_16_LE; | ||
| 2431 | else if ((src[0] == 0xFE) && (src[1] == 0xFF)) | ||
| 2432 | return CODING_CATEGORY_MASK_UTF_16_BE; | ||
| 2433 | |||
| 2434 | return 0; | ||
| 2435 | } | ||
| 2436 | |||
| 2351 | /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". | 2437 | /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". |
| 2352 | If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */ | 2438 | If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */ |
| 2353 | 2439 | ||
| @@ -3453,6 +3539,26 @@ setup_raw_text_coding_system (coding) | |||
| 3453 | as BIG5. Assigned the coding-system (Lisp symbol) | 3539 | as BIG5. Assigned the coding-system (Lisp symbol) |
| 3454 | `cn-big5' by default. | 3540 | `cn-big5' by default. |
| 3455 | 3541 | ||
| 3542 | o coding-category-utf-8 | ||
| 3543 | |||
| 3544 | The category for a coding system which has the same code range | ||
| 3545 | as UTF-8 (cf. RFC2279). Assigned the coding-system (Lisp | ||
| 3546 | symbol) `utf-8' by default. | ||
| 3547 | |||
| 3548 | o coding-category-utf-16-be | ||
| 3549 | |||
| 3550 | The category for a coding system in which a text has an | ||
| 3551 | Unicode signature (cf. Unicode Standard) in the order of BIG | ||
| 3552 | endian at the head. Assigned the coding-system (Lisp symbol) | ||
| 3553 | `utf-16-be' by default. | ||
| 3554 | |||
| 3555 | o coding-category-utf-16-le | ||
| 3556 | |||
| 3557 | The category for a coding system in which a text has an | ||
| 3558 | Unicode signature (cf. Unicode Standard) in the order of | ||
| 3559 | LITTLE endian at the head. Assigned the coding-system (Lisp | ||
| 3560 | symbol) `utf-16-le' by default. | ||
| 3561 | |||
| 3456 | o coding-category-ccl | 3562 | o coding-category-ccl |
| 3457 | 3563 | ||
| 3458 | The category for a coding system of which encoder/decoder is | 3564 | The category for a coding system of which encoder/decoder is |
| @@ -3481,7 +3587,10 @@ int ascii_skip_code[256]; | |||
| 3481 | /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded. | 3587 | /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded. |
| 3482 | If it detects possible coding systems, return an integer in which | 3588 | If it detects possible coding systems, return an integer in which |
| 3483 | appropriate flag bits are set. Flag bits are defined by macros | 3589 | appropriate flag bits are set. Flag bits are defined by macros |
| 3484 | CODING_CATEGORY_MASK_XXX in `coding.h'. | 3590 | CODING_CATEGORY_MASK_XXX in `coding.h'. If PRIORITIES is non-NULL, |
| 3591 | it should point the table `coding_priorities'. In that case, only | ||
| 3592 | the flag bit for a coding system of the highest priority is set in | ||
| 3593 | the returned value. | ||
| 3485 | 3594 | ||
| 3486 | How many ASCII characters are at the head is returned as *SKIP. */ | 3595 | How many ASCII characters are at the head is returned as *SKIP. */ |
| 3487 | 3596 | ||
| @@ -3492,8 +3601,8 @@ detect_coding_mask (source, src_bytes, priorities, skip) | |||
| 3492 | { | 3601 | { |
| 3493 | register unsigned char c; | 3602 | register unsigned char c; |
| 3494 | unsigned char *src = source, *src_end = source + src_bytes; | 3603 | unsigned char *src = source, *src_end = source + src_bytes; |
| 3495 | unsigned int mask; | 3604 | unsigned int mask, utf16_examined_p, iso2022_examined_p; |
| 3496 | int i; | 3605 | int i, idx; |
| 3497 | 3606 | ||
| 3498 | /* At first, skip all ASCII characters and control characters except | 3607 | /* At first, skip all ASCII characters and control characters except |
| 3499 | for three ISO2022 specific control characters. */ | 3608 | for three ISO2022 specific control characters. */ |
| @@ -3528,7 +3637,14 @@ detect_coding_mask (source, src_bytes, priorities, skip) | |||
| 3528 | goto label_loop_detect_coding; | 3637 | goto label_loop_detect_coding; |
| 3529 | } | 3638 | } |
| 3530 | if (priorities) | 3639 | if (priorities) |
| 3531 | goto label_return_highest_only; | 3640 | { |
| 3641 | for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++) | ||
| 3642 | { | ||
| 3643 | if (mask & priorities[i]) | ||
| 3644 | return priorities[i]; | ||
| 3645 | } | ||
| 3646 | return CODING_CATEGORY_MASK_RAW_TEXT; | ||
| 3647 | } | ||
| 3532 | } | 3648 | } |
| 3533 | else | 3649 | else |
| 3534 | { | 3650 | { |
| @@ -3537,8 +3653,12 @@ detect_coding_mask (source, src_bytes, priorities, skip) | |||
| 3537 | if (c < 0xA0) | 3653 | if (c < 0xA0) |
| 3538 | { | 3654 | { |
| 3539 | /* C is the first byte of SJIS character code, | 3655 | /* C is the first byte of SJIS character code, |
| 3540 | or a leading-code of Emacs' internal format (emacs-mule). */ | 3656 | or a leading-code of Emacs' internal format (emacs-mule), |
| 3541 | try = CODING_CATEGORY_MASK_SJIS | CODING_CATEGORY_MASK_EMACS_MULE; | 3657 | or the first byte of UTF-16. */ |
| 3658 | try = (CODING_CATEGORY_MASK_SJIS | ||
| 3659 | | CODING_CATEGORY_MASK_EMACS_MULE | ||
| 3660 | | CODING_CATEGORY_MASK_UTF_16_BE | ||
| 3661 | | CODING_CATEGORY_MASK_UTF_16_LE); | ||
| 3542 | 3662 | ||
| 3543 | /* Or, if C is a special latin extra code, | 3663 | /* Or, if C is a special latin extra code, |
| 3544 | or is an ISO2022 specific control code of C1 (SS2 or SS3), | 3664 | or is an ISO2022 specific control code of C1 (SS2 or SS3), |
| @@ -3559,11 +3679,15 @@ detect_coding_mask (source, src_bytes, priorities, skip) | |||
| 3559 | else | 3679 | else |
| 3560 | /* C is a character of ISO2022 in graphic plane right, | 3680 | /* C is a character of ISO2022 in graphic plane right, |
| 3561 | or a SJIS's 1-byte character code (i.e. JISX0201), | 3681 | or a SJIS's 1-byte character code (i.e. JISX0201), |
| 3562 | or the first byte of BIG5's 2-byte code. */ | 3682 | or the first byte of BIG5's 2-byte code, |
| 3683 | or the first byte of UTF-8/16. */ | ||
| 3563 | try = (CODING_CATEGORY_MASK_ISO_8_ELSE | 3684 | try = (CODING_CATEGORY_MASK_ISO_8_ELSE |
| 3564 | | CODING_CATEGORY_MASK_ISO_8BIT | 3685 | | CODING_CATEGORY_MASK_ISO_8BIT |
| 3565 | | CODING_CATEGORY_MASK_SJIS | 3686 | | CODING_CATEGORY_MASK_SJIS |
| 3566 | | CODING_CATEGORY_MASK_BIG5); | 3687 | | CODING_CATEGORY_MASK_BIG5 |
| 3688 | | CODING_CATEGORY_MASK_UTF_8 | ||
| 3689 | | CODING_CATEGORY_MASK_UTF_16_BE | ||
| 3690 | | CODING_CATEGORY_MASK_UTF_16_LE); | ||
| 3567 | 3691 | ||
| 3568 | /* Or, we may have to consider the possibility of CCL. */ | 3692 | /* Or, we may have to consider the possibility of CCL. */ |
| 3569 | if (coding_system_table[CODING_CATEGORY_IDX_CCL] | 3693 | if (coding_system_table[CODING_CATEGORY_IDX_CCL] |
| @@ -3572,26 +3696,40 @@ detect_coding_mask (source, src_bytes, priorities, skip) | |||
| 3572 | try |= CODING_CATEGORY_MASK_CCL; | 3696 | try |= CODING_CATEGORY_MASK_CCL; |
| 3573 | 3697 | ||
| 3574 | mask = 0; | 3698 | mask = 0; |
| 3699 | utf16_examined_p = iso2022_examined_p = 0; | ||
| 3575 | if (priorities) | 3700 | if (priorities) |
| 3576 | { | 3701 | { |
| 3577 | for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++) | 3702 | for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++) |
| 3578 | { | 3703 | { |
| 3579 | if (priorities[i] & try & CODING_CATEGORY_MASK_ISO) | 3704 | if (!iso2022_examined_p |
| 3580 | mask = detect_coding_iso2022 (src, src_end); | 3705 | && (priorities[i] & try & CODING_CATEGORY_MASK_ISO)) |
| 3706 | { | ||
| 3707 | mask |= detect_coding_iso2022 (src, src_end); | ||
| 3708 | iso2022_examined_p = 1; | ||
| 3709 | } | ||
| 3581 | else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS) | 3710 | else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS) |
| 3582 | mask = detect_coding_sjis (src, src_end); | 3711 | mask |= detect_coding_sjis (src, src_end); |
| 3712 | else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8) | ||
| 3713 | mask |= detect_coding_utf_8 (src, src_end); | ||
| 3714 | else if (!utf16_examined_p | ||
| 3715 | && (priorities[i] & try & | ||
| 3716 | CODING_CATEGORY_MASK_UTF_16_BE_LE)) | ||
| 3717 | { | ||
| 3718 | mask |= detect_coding_utf_16 (src, src_end); | ||
| 3719 | utf16_examined_p = 1; | ||
| 3720 | } | ||
| 3583 | else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5) | 3721 | else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5) |
| 3584 | mask = detect_coding_big5 (src, src_end); | 3722 | mask |= detect_coding_big5 (src, src_end); |
| 3585 | else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE) | 3723 | else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE) |
| 3586 | mask = detect_coding_emacs_mule (src, src_end); | 3724 | mask |= detect_coding_emacs_mule (src, src_end); |
| 3587 | else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL) | 3725 | else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL) |
| 3588 | mask = detect_coding_ccl (src, src_end); | 3726 | mask |= detect_coding_ccl (src, src_end); |
| 3589 | else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT) | 3727 | else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT) |
| 3590 | mask = CODING_CATEGORY_MASK_RAW_TEXT; | 3728 | mask |= CODING_CATEGORY_MASK_RAW_TEXT; |
| 3591 | else if (priorities[i] & CODING_CATEGORY_MASK_BINARY) | 3729 | else if (priorities[i] & CODING_CATEGORY_MASK_BINARY) |
| 3592 | mask = CODING_CATEGORY_MASK_BINARY; | 3730 | mask |= CODING_CATEGORY_MASK_BINARY; |
| 3593 | if (mask) | 3731 | if (mask & priorities[i]) |
| 3594 | goto label_return_highest_only; | 3732 | return priorities[i]; |
| 3595 | } | 3733 | } |
| 3596 | return CODING_CATEGORY_MASK_RAW_TEXT; | 3734 | return CODING_CATEGORY_MASK_RAW_TEXT; |
| 3597 | } | 3735 | } |
| @@ -3601,20 +3739,16 @@ detect_coding_mask (source, src_bytes, priorities, skip) | |||
| 3601 | mask |= detect_coding_sjis (src, src_end); | 3739 | mask |= detect_coding_sjis (src, src_end); |
| 3602 | if (try & CODING_CATEGORY_MASK_BIG5) | 3740 | if (try & CODING_CATEGORY_MASK_BIG5) |
| 3603 | mask |= detect_coding_big5 (src, src_end); | 3741 | mask |= detect_coding_big5 (src, src_end); |
| 3742 | if (try & CODING_CATEGORY_MASK_UTF_8) | ||
| 3743 | mask |= detect_coding_utf_8 (src, src_end); | ||
| 3744 | if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE) | ||
| 3745 | mask |= detect_coding_utf_16 (src, src_end); | ||
| 3604 | if (try & CODING_CATEGORY_MASK_EMACS_MULE) | 3746 | if (try & CODING_CATEGORY_MASK_EMACS_MULE) |
| 3605 | mask |= detect_coding_emacs_mule (src, src_end); | 3747 | mask |= detect_coding_emacs_mule (src, src_end); |
| 3606 | if (try & CODING_CATEGORY_MASK_CCL) | 3748 | if (try & CODING_CATEGORY_MASK_CCL) |
| 3607 | mask |= detect_coding_ccl (src, src_end); | 3749 | mask |= detect_coding_ccl (src, src_end); |
| 3608 | } | 3750 | } |
| 3609 | return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY); | 3751 | return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY); |
| 3610 | |||
| 3611 | label_return_highest_only: | ||
| 3612 | for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++) | ||
| 3613 | { | ||
| 3614 | if (mask & priorities[i]) | ||
| 3615 | return priorities[i]; | ||
| 3616 | } | ||
| 3617 | return CODING_CATEGORY_MASK_RAW_TEXT; | ||
| 3618 | } | 3752 | } |
| 3619 | 3753 | ||
| 3620 | /* Detect how a text of length SRC_BYTES pointed by SRC is encoded. | 3754 | /* Detect how a text of length SRC_BYTES pointed by SRC is encoded. |
| @@ -3710,6 +3844,76 @@ detect_eol_type (source, src_bytes, skip) | |||
| 3710 | return eol_type; | 3844 | return eol_type; |
| 3711 | } | 3845 | } |
| 3712 | 3846 | ||
| 3847 | /* Like detect_eol_type, but detect EOL type in 2-octet | ||
| 3848 | big-endian/little-endian format for coding systems utf-16-be and | ||
| 3849 | utf-16-le. */ | ||
| 3850 | |||
| 3851 | static int | ||
| 3852 | detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p) | ||
| 3853 | unsigned char *source; | ||
| 3854 | int src_bytes, *skip; | ||
| 3855 | { | ||
| 3856 | unsigned char *src = source, *src_end = src + src_bytes; | ||
| 3857 | unsigned int c1, c2; | ||
| 3858 | int total = 0; /* How many end-of-lines are found so far. */ | ||
| 3859 | int eol_type = CODING_EOL_UNDECIDED; | ||
| 3860 | int this_eol_type; | ||
| 3861 | int msb, lsb; | ||
| 3862 | |||
| 3863 | if (big_endian_p) | ||
| 3864 | msb = 0, lsb = 1; | ||
| 3865 | else | ||
| 3866 | msb = 1, lsb = 0; | ||
| 3867 | |||
| 3868 | *skip = 0; | ||
| 3869 | |||
| 3870 | while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT) | ||
| 3871 | { | ||
| 3872 | c1 = (src[msb] << 8) | (src[lsb]); | ||
| 3873 | src += 2; | ||
| 3874 | |||
| 3875 | if (c1 == '\n' || c1 == '\r') | ||
| 3876 | { | ||
| 3877 | if (*skip == 0) | ||
| 3878 | *skip = src - 2 - source; | ||
| 3879 | total++; | ||
| 3880 | if (c1 == '\n') | ||
| 3881 | { | ||
| 3882 | this_eol_type = CODING_EOL_LF; | ||
| 3883 | } | ||
| 3884 | else | ||
| 3885 | { | ||
| 3886 | if ((src + 1) >= src_end) | ||
| 3887 | { | ||
| 3888 | this_eol_type = CODING_EOL_CR; | ||
| 3889 | } | ||
| 3890 | else | ||
| 3891 | { | ||
| 3892 | c2 = (src[msb] << 8) | (src[lsb]); | ||
| 3893 | if (c2 == '\n') | ||
| 3894 | this_eol_type = CODING_EOL_CRLF, src += 2; | ||
| 3895 | else | ||
| 3896 | this_eol_type = CODING_EOL_CR; | ||
| 3897 | } | ||
| 3898 | } | ||
| 3899 | |||
| 3900 | if (eol_type == CODING_EOL_UNDECIDED) | ||
| 3901 | /* This is the first end-of-line. */ | ||
| 3902 | eol_type = this_eol_type; | ||
| 3903 | else if (eol_type != this_eol_type) | ||
| 3904 | { | ||
| 3905 | /* The found type is different from what found before. */ | ||
| 3906 | eol_type = CODING_EOL_INCONSISTENT; | ||
| 3907 | break; | ||
| 3908 | } | ||
| 3909 | } | ||
| 3910 | } | ||
| 3911 | |||
| 3912 | if (*skip == 0) | ||
| 3913 | *skip = src_end - source; | ||
| 3914 | return eol_type; | ||
| 3915 | } | ||
| 3916 | |||
| 3713 | /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC | 3917 | /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC |
| 3714 | is encoded. If it detects an appropriate format of end-of-line, it | 3918 | is encoded. If it detects an appropriate format of end-of-line, it |
| 3715 | sets the information in *CODING. */ | 3919 | sets the information in *CODING. */ |
| @@ -3722,7 +3926,20 @@ detect_eol (coding, src, src_bytes) | |||
| 3722 | { | 3926 | { |
| 3723 | Lisp_Object val; | 3927 | Lisp_Object val; |
| 3724 | int skip; | 3928 | int skip; |
| 3725 | int eol_type = detect_eol_type (src, src_bytes, &skip); | 3929 | int eol_type; |
| 3930 | |||
| 3931 | switch (coding->category_idx) | ||
| 3932 | { | ||
| 3933 | case CODING_CATEGORY_IDX_UTF_16_BE: | ||
| 3934 | eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1); | ||
| 3935 | break; | ||
| 3936 | case CODING_CATEGORY_IDX_UTF_16_LE: | ||
| 3937 | eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0); | ||
| 3938 | break; | ||
| 3939 | default: | ||
| 3940 | eol_type = detect_eol_type (src, src_bytes, &skip); | ||
| 3941 | break; | ||
| 3942 | } | ||
| 3726 | 3943 | ||
| 3727 | if (coding->heading_ascii > skip) | 3944 | if (coding->heading_ascii > skip) |
| 3728 | coding->heading_ascii = skip; | 3945 | coding->heading_ascii = skip; |
| @@ -5216,13 +5433,17 @@ detect_coding_system (src, src_bytes, highest) | |||
| 5216 | 5433 | ||
| 5217 | /* At first, gather possible coding systems in VAL. */ | 5434 | /* At first, gather possible coding systems in VAL. */ |
| 5218 | val = Qnil; | 5435 | val = Qnil; |
| 5219 | for (tmp = Vcoding_category_list; !NILP (tmp); tmp = XCDR (tmp)) | 5436 | for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp)) |
| 5220 | { | 5437 | { |
| 5221 | int idx | 5438 | Lisp_Object category_val, category_index; |
| 5222 | = XFASTINT (Fget (XCAR (tmp), Qcoding_category_index)); | 5439 | |
| 5223 | if (coding_mask & (1 << idx)) | 5440 | category_index = Fget (XCAR (tmp), Qcoding_category_index); |
| 5441 | category_val = Fsymbol_value (XCAR (tmp)); | ||
| 5442 | if (!NILP (category_val) | ||
| 5443 | && NATNUMP (category_index) | ||
| 5444 | && (coding_mask & (1 << XFASTINT (category_index)))) | ||
| 5224 | { | 5445 | { |
| 5225 | val = Fcons (Fsymbol_value (XCAR (tmp)), val); | 5446 | val = Fcons (category_val, val); |
| 5226 | if (highest) | 5447 | if (highest) |
| 5227 | break; | 5448 | break; |
| 5228 | } | 5449 | } |
| @@ -5231,7 +5452,7 @@ detect_coding_system (src, src_bytes, highest) | |||
| 5231 | val = Fnreverse (val); | 5452 | val = Fnreverse (val); |
| 5232 | 5453 | ||
| 5233 | /* Then, replace the elements with subsidiary coding systems. */ | 5454 | /* Then, replace the elements with subsidiary coding systems. */ |
| 5234 | for (tmp = val; !NILP (tmp); tmp = XCDR (tmp)) | 5455 | for (tmp = val; CONSP (tmp); tmp = XCDR (tmp)) |
| 5235 | { | 5456 | { |
| 5236 | if (eol_type != CODING_EOL_UNDECIDED | 5457 | if (eol_type != CODING_EOL_UNDECIDED |
| 5237 | && eol_type != CODING_EOL_INCONSISTENT) | 5458 | && eol_type != CODING_EOL_INCONSISTENT) |
| @@ -5712,17 +5933,13 @@ which is a list of all the arguments given to this function.") | |||
| 5712 | DEFUN ("update-coding-systems-internal", Fupdate_coding_systems_internal, | 5933 | DEFUN ("update-coding-systems-internal", Fupdate_coding_systems_internal, |
| 5713 | Supdate_coding_systems_internal, 0, 0, 0, | 5934 | Supdate_coding_systems_internal, 0, 0, 0, |
| 5714 | "Update internal database for ISO2022 and CCL based coding systems.\n\ | 5935 | "Update internal database for ISO2022 and CCL based coding systems.\n\ |
| 5715 | When values of the following coding categories are changed, you must\n\ | 5936 | When values of any coding categories are changed, you must\n\ |
| 5716 | call this function:\n\ | 5937 | call this function") |
| 5717 | coding-category-iso-7, coding-category-iso-7-tight,\n\ | ||
| 5718 | coding-category-iso-8-1, coding-category-iso-8-2,\n\ | ||
| 5719 | coding-category-iso-7-else, coding-category-iso-8-else,\n\ | ||
| 5720 | coding-category-ccl") | ||
| 5721 | () | 5938 | () |
| 5722 | { | 5939 | { |
| 5723 | int i; | 5940 | int i; |
| 5724 | 5941 | ||
| 5725 | for (i = CODING_CATEGORY_IDX_ISO_7; i <= CODING_CATEGORY_IDX_CCL; i++) | 5942 | for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++) |
| 5726 | { | 5943 | { |
| 5727 | Lisp_Object val; | 5944 | Lisp_Object val; |
| 5728 | 5945 | ||
| @@ -5767,7 +5984,7 @@ This function is internal use only.") | |||
| 5767 | } | 5984 | } |
| 5768 | /* If coding-category-list is valid and contains all coding | 5985 | /* If coding-category-list is valid and contains all coding |
| 5769 | categories, `i' should be CODING_CATEGORY_IDX_MAX now. If not, | 5986 | categories, `i' should be CODING_CATEGORY_IDX_MAX now. If not, |
| 5770 | the following code saves Emacs from craching. */ | 5987 | the following code saves Emacs from crashing. */ |
| 5771 | while (i < CODING_CATEGORY_IDX_MAX) | 5988 | while (i < CODING_CATEGORY_IDX_MAX) |
| 5772 | coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT; | 5989 | coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT; |
| 5773 | 5990 | ||