aboutsummaryrefslogtreecommitdiffstats
path: root/src/coding.c
diff options
context:
space:
mode:
authorKenichi Handa2001-03-08 02:01:00 +0000
committerKenichi Handa2001-03-08 02:01:00 +0000
commit682169fe7f80e9c3b69e7ca35c88f1eb1c8a9d55 (patch)
tree1c9087ab8aef4166219023cc669886c3270931eb /src/coding.c
parentbb5c338d2c2a59432bf2f474961a89e151f84d89 (diff)
downloademacs-682169fe7f80e9c3b69e7ca35c88f1eb1c8a9d55.tar.gz
emacs-682169fe7f80e9c3b69e7ca35c88f1eb1c8a9d55.zip
(detect_coding_sjis): Do more rigid check.
(detect_coding_big5): Likewise. (decode_coding_sjis_big5): Likewise. (Fdetect_coding_region): Call detect_coding_system with tailing anchor byte `\0' for more rigid detection. (Fdetect_coding_string): Likewise.
Diffstat (limited to 'src/coding.c')
-rw-r--r--src/coding.c52
1 files changed, 31 insertions, 21 deletions
diff --git a/src/coding.c b/src/coding.c
index 01762df59b7..7abf28647ad 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -2653,7 +2653,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
2653 --- CODE RANGE of SJIS --- 2653 --- CODE RANGE of SJIS ---
2654 (character set) (range) 2654 (character set) (range)
2655 ASCII 0x00 .. 0x7F 2655 ASCII 0x00 .. 0x7F
2656 KATAKANA-JISX0201 0xA0 .. 0xDF 2656 KATAKANA-JISX0201 0xA1 .. 0xDF
2657 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF 2657 JISX0208 (1st byte) 0x81 .. 0x9F and 0xE0 .. 0xEF
2658 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC 2658 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC
2659 ------------------------------- 2659 -------------------------------
@@ -2728,15 +2728,14 @@ detect_coding_sjis (src, src_end, multibytep)
2728 while (1) 2728 while (1)
2729 { 2729 {
2730 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 2730 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2731 if (c >= 0x81) 2731 if (c < 0x80)
2732 continue;
2733 if (c == 0x80 || c == 0xA0 || c > 0xEF)
2734 return 0;
2735 if (c <= 0x9F || c >= 0xE0)
2732 { 2736 {
2733 if (c <= 0x9F || (c >= 0xE0 && c <= 0xEF)) 2737 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2734 { 2738 if (c < 0x40 || c == 0x7F || c > 0xFC)
2735 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2736 if (c < 0x40 || c == 0x7F || c > 0xFC)
2737 return 0;
2738 }
2739 else if (c > 0xDF)
2740 return 0; 2739 return 0;
2741 } 2740 }
2742 } 2741 }
@@ -2761,12 +2760,13 @@ detect_coding_big5 (src, src_end, multibytep)
2761 while (1) 2760 while (1)
2762 { 2761 {
2763 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 2762 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2764 if (c >= 0xA1) 2763 if (c < 0x80)
2765 { 2764 continue;
2766 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); 2765 if (c < 0xA1 || c > 0xFE)
2767 if (c < 0x40 || (c >= 0x7F && c <= 0xA0)) 2766 return 0;
2768 return 0; 2767 ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2769 } 2768 if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE)
2769 return 0;
2770 } 2770 }
2771 label_end_of_loop: 2771 label_end_of_loop:
2772 return CODING_CATEGORY_MASK_BIG5; 2772 return CODING_CATEGORY_MASK_BIG5;
@@ -2944,9 +2944,9 @@ decode_coding_sjis_big5 (coding, source, destination,
2944 { 2944 {
2945 if (sjis_p) 2945 if (sjis_p)
2946 { 2946 {
2947 if (c1 >= 0xF0) 2947 if (c1 == 0x80 || c1 == 0xA0 || c1 > 0xEF)
2948 goto label_invalid_code; 2948 goto label_invalid_code;
2949 if (c1 < 0xA0 || c1 >= 0xE0) 2949 if (c1 <= 0x9F || c1 >= 0xE0)
2950 { 2950 {
2951 /* SJIS -> JISX0208 */ 2951 /* SJIS -> JISX0208 */
2952 ONE_MORE_BYTE (c2); 2952 ONE_MORE_BYTE (c2);
@@ -2962,7 +2962,7 @@ decode_coding_sjis_big5 (coding, source, destination,
2962 else 2962 else
2963 { 2963 {
2964 /* BIG5 -> Big5 */ 2964 /* BIG5 -> Big5 */
2965 if (c1 < 0xA1 || c1 > 0xFE) 2965 if (c1 < 0xA0 || c1 > 0xFE)
2966 goto label_invalid_code; 2966 goto label_invalid_code;
2967 ONE_MORE_BYTE (c2); 2967 ONE_MORE_BYTE (c2);
2968 if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE) 2968 if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
@@ -6217,6 +6217,7 @@ highest priority.")
6217{ 6217{
6218 int from, to; 6218 int from, to;
6219 int from_byte, to_byte; 6219 int from_byte, to_byte;
6220 int include_anchor_byte = 0;
6220 6221
6221 CHECK_NUMBER_COERCE_MARKER (start, 0); 6222 CHECK_NUMBER_COERCE_MARKER (start, 0);
6222 CHECK_NUMBER_COERCE_MARKER (end, 1); 6223 CHECK_NUMBER_COERCE_MARKER (end, 1);
@@ -6228,9 +6229,14 @@ highest priority.")
6228 6229
6229 if (from < GPT && to >= GPT) 6230 if (from < GPT && to >= GPT)
6230 move_gap_both (to, to_byte); 6231 move_gap_both (to, to_byte);
6231 6232 if (to == Z || (to == GPT && GAP_SIZE > 0))
6233 include_anchor_byte = 1;
6232 return detect_coding_system (BYTE_POS_ADDR (from_byte), 6234 return detect_coding_system (BYTE_POS_ADDR (from_byte),
6233 to_byte - from_byte, 6235 /* "+ include_anchor_byteq" is to
6236 include the anchor byte `\0'. With
6237 this, code detectors can check if
6238 tailing bytes are valid. */
6239 to_byte - from_byte + include_anchor_byte,
6234 !NILP (highest), 6240 !NILP (highest),
6235 !NILP (current_buffer 6241 !NILP (current_buffer
6236 ->enable_multibyte_characters)); 6242 ->enable_multibyte_characters));
@@ -6253,7 +6259,11 @@ highest priority.")
6253 CHECK_STRING (string, 0); 6259 CHECK_STRING (string, 0);
6254 6260
6255 return detect_coding_system (XSTRING (string)->data, 6261 return detect_coding_system (XSTRING (string)->data,
6256 STRING_BYTES (XSTRING (string)), 6262 /* "+ 1" is to include the anchor byte
6263 `\0'. With this, code detectors can
6264 check if tailing bytes are
6265 valid. */
6266 STRING_BYTES (XSTRING (string)) + 1,
6257 !NILP (highest), 6267 !NILP (highest),
6258 STRING_MULTIBYTE (string)); 6268 STRING_MULTIBYTE (string));
6259} 6269}