diff options
| author | Kenichi Handa | 2001-03-08 02:01:00 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2001-03-08 02:01:00 +0000 |
| commit | 682169fe7f80e9c3b69e7ca35c88f1eb1c8a9d55 (patch) | |
| tree | 1c9087ab8aef4166219023cc669886c3270931eb /src | |
| parent | bb5c338d2c2a59432bf2f474961a89e151f84d89 (diff) | |
| download | emacs-682169fe7f80e9c3b69e7ca35c88f1eb1c8a9d55.tar.gz emacs-682169fe7f80e9c3b69e7ca35c88f1eb1c8a9d55.zip | |
(detect_coding_sjis): Do more rigid check.
(detect_coding_big5): Likewise.
(decode_coding_sjis_big5): Likewise.
(Fdetect_coding_region): Call detect_coding_system with tailing
anchor byte `\0' for more rigid detection.
(Fdetect_coding_string): Likewise.
Diffstat (limited to 'src')
| -rw-r--r-- | src/coding.c | 52 |
1 files changed, 31 insertions, 21 deletions
diff --git a/src/coding.c b/src/coding.c index 01762df59b7..7abf28647ad 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -2653,7 +2653,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 2653 | --- CODE RANGE of SJIS --- | 2653 | --- CODE RANGE of SJIS --- |
| 2654 | (character set) (range) | 2654 | (character set) (range) |
| 2655 | ASCII 0x00 .. 0x7F | 2655 | ASCII 0x00 .. 0x7F |
| 2656 | KATAKANA-JISX0201 0xA0 .. 0xDF | 2656 | KATAKANA-JISX0201 0xA1 .. 0xDF |
| 2657 | JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF | 2657 | JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF |
| 2658 | (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC | 2658 | (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC |
| 2659 | ------------------------------- | 2659 | ------------------------------- |
| @@ -2728,15 +2728,14 @@ detect_coding_sjis (src, src_end, multibytep) | |||
| 2728 | while (1) | 2728 | while (1) |
| 2729 | { | 2729 | { |
| 2730 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); | 2730 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); |
| 2731 | if (c >= 0x81) | 2731 | if (c < 0x80) |
| 2732 | continue; | ||
| 2733 | if (c == 0x80 || c == 0xA0 || c > 0xEF) | ||
| 2734 | return 0; | ||
| 2735 | if (c <= 0x9F || c >= 0xE0) | ||
| 2732 | { | 2736 | { |
| 2733 | if (c <= 0x9F || (c >= 0xE0 && c <= 0xEF)) | 2737 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); |
| 2734 | { | 2738 | if (c < 0x40 || c == 0x7F || c > 0xFC) |
| 2735 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); | ||
| 2736 | if (c < 0x40 || c == 0x7F || c > 0xFC) | ||
| 2737 | return 0; | ||
| 2738 | } | ||
| 2739 | else if (c > 0xDF) | ||
| 2740 | return 0; | 2739 | return 0; |
| 2741 | } | 2740 | } |
| 2742 | } | 2741 | } |
| @@ -2761,12 +2760,13 @@ detect_coding_big5 (src, src_end, multibytep) | |||
| 2761 | while (1) | 2760 | while (1) |
| 2762 | { | 2761 | { |
| 2763 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); | 2762 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); |
| 2764 | if (c >= 0xA1) | 2763 | if (c < 0x80) |
| 2765 | { | 2764 | continue; |
| 2766 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); | 2765 | if (c < 0xA1 || c > 0xFE) |
| 2767 | if (c < 0x40 || (c >= 0x7F && c <= 0xA0)) | 2766 | return 0; |
| 2768 | return 0; | 2767 | ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); |
| 2769 | } | 2768 | if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE) |
| 2769 | return 0; | ||
| 2770 | } | 2770 | } |
| 2771 | label_end_of_loop: | 2771 | label_end_of_loop: |
| 2772 | return CODING_CATEGORY_MASK_BIG5; | 2772 | return CODING_CATEGORY_MASK_BIG5; |
| @@ -2944,9 +2944,9 @@ decode_coding_sjis_big5 (coding, source, destination, | |||
| 2944 | { | 2944 | { |
| 2945 | if (sjis_p) | 2945 | if (sjis_p) |
| 2946 | { | 2946 | { |
| 2947 | if (c1 >= 0xF0) | 2947 | if (c1 == 0x80 || c1 == 0xA0 || c1 > 0xEF) |
| 2948 | goto label_invalid_code; | 2948 | goto label_invalid_code; |
| 2949 | if (c1 < 0xA0 || c1 >= 0xE0) | 2949 | if (c1 <= 0x9F || c1 >= 0xE0) |
| 2950 | { | 2950 | { |
| 2951 | /* SJIS -> JISX0208 */ | 2951 | /* SJIS -> JISX0208 */ |
| 2952 | ONE_MORE_BYTE (c2); | 2952 | ONE_MORE_BYTE (c2); |
| @@ -2962,7 +2962,7 @@ decode_coding_sjis_big5 (coding, source, destination, | |||
| 2962 | else | 2962 | else |
| 2963 | { | 2963 | { |
| 2964 | /* BIG5 -> Big5 */ | 2964 | /* BIG5 -> Big5 */ |
| 2965 | if (c1 < 0xA1 || c1 > 0xFE) | 2965 | if (c1 < 0xA0 || c1 > 0xFE) |
| 2966 | goto label_invalid_code; | 2966 | goto label_invalid_code; |
| 2967 | ONE_MORE_BYTE (c2); | 2967 | ONE_MORE_BYTE (c2); |
| 2968 | if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE) | 2968 | if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE) |
| @@ -6217,6 +6217,7 @@ highest priority.") | |||
| 6217 | { | 6217 | { |
| 6218 | int from, to; | 6218 | int from, to; |
| 6219 | int from_byte, to_byte; | 6219 | int from_byte, to_byte; |
| 6220 | int include_anchor_byte = 0; | ||
| 6220 | 6221 | ||
| 6221 | CHECK_NUMBER_COERCE_MARKER (start, 0); | 6222 | CHECK_NUMBER_COERCE_MARKER (start, 0); |
| 6222 | CHECK_NUMBER_COERCE_MARKER (end, 1); | 6223 | CHECK_NUMBER_COERCE_MARKER (end, 1); |
| @@ -6228,9 +6229,14 @@ highest priority.") | |||
| 6228 | 6229 | ||
| 6229 | if (from < GPT && to >= GPT) | 6230 | if (from < GPT && to >= GPT) |
| 6230 | move_gap_both (to, to_byte); | 6231 | move_gap_both (to, to_byte); |
| 6231 | 6232 | if (to == Z || (to == GPT && GAP_SIZE > 0)) | |
| 6233 | include_anchor_byte = 1; | ||
| 6232 | return detect_coding_system (BYTE_POS_ADDR (from_byte), | 6234 | return detect_coding_system (BYTE_POS_ADDR (from_byte), |
| 6233 | to_byte - from_byte, | 6235 | /* "+ include_anchor_byteq" is to |
| 6236 | include the anchor byte `\0'. With | ||
| 6237 | this, code detectors can check if | ||
| 6238 | tailing bytes are valid. */ | ||
| 6239 | to_byte - from_byte + include_anchor_byte, | ||
| 6234 | !NILP (highest), | 6240 | !NILP (highest), |
| 6235 | !NILP (current_buffer | 6241 | !NILP (current_buffer |
| 6236 | ->enable_multibyte_characters)); | 6242 | ->enable_multibyte_characters)); |
| @@ -6253,7 +6259,11 @@ highest priority.") | |||
| 6253 | CHECK_STRING (string, 0); | 6259 | CHECK_STRING (string, 0); |
| 6254 | 6260 | ||
| 6255 | return detect_coding_system (XSTRING (string)->data, | 6261 | return detect_coding_system (XSTRING (string)->data, |
| 6256 | STRING_BYTES (XSTRING (string)), | 6262 | /* "+ 1" is to include the anchor byte |
| 6263 | `\0'. With this, code detectors can | ||
| 6264 | check if tailing bytes are | ||
| 6265 | valid. */ | ||
| 6266 | STRING_BYTES (XSTRING (string)) + 1, | ||
| 6257 | !NILP (highest), | 6267 | !NILP (highest), |
| 6258 | STRING_MULTIBYTE (string)); | 6268 | STRING_MULTIBYTE (string)); |
| 6259 | } | 6269 | } |