diff options
| author | Kenichi Handa | 1998-06-26 03:29:15 +0000 |
|---|---|---|
| committer | Kenichi Handa | 1998-06-26 03:29:15 +0000 |
| commit | 54f7817141b0252d988849b107c2439847d0ed89 (patch) | |
| tree | b4f8e7924102a2ed3deeee34a5c04ef9d452d10f /src/coding.c | |
| parent | e5ff4bc29e09a46a72ac15e428dbf8365b639e50 (diff) | |
| download | emacs-54f7817141b0252d988849b107c2439847d0ed89.tar.gz emacs-54f7817141b0252d988849b107c2439847d0ed89.zip | |
(setup_raw_text_coding_system): New function.
(decode_coding_sjis_big5): Bug for handling invalid code fixed.
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 106 |
1 files changed, 55 insertions, 51 deletions
diff --git a/src/coding.c b/src/coding.c index 2a986fd1d4e..6c537df830c 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -1990,7 +1990,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1990 | (character set) (range) | 1990 | (character set) (range) |
| 1991 | ASCII 0x00 .. 0x7F | 1991 | ASCII 0x00 .. 0x7F |
| 1992 | KATAKANA-JISX0201 0xA0 .. 0xDF | 1992 | KATAKANA-JISX0201 0xA0 .. 0xDF |
| 1993 | JISX0208 (1st byte) 0x80 .. 0x9F and 0xE0 .. 0xFF | 1993 | JISX0208 (1st byte) 0x80 .. 0x9F and 0xE0 .. 0xEF |
| 1994 | (2nd byte) 0x40 .. 0xFF | 1994 | (2nd byte) 0x40 .. 0xFF |
| 1995 | ------------------------------- | 1995 | ------------------------------- |
| 1996 | 1996 | ||
| @@ -2236,69 +2236,47 @@ decode_coding_sjis_big5 (coding, source, destination, | |||
| 2236 | } | 2236 | } |
| 2237 | else if (c1 < 0x80) | 2237 | else if (c1 < 0x80) |
| 2238 | DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2); | 2238 | DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2); |
| 2239 | else if (c1 < 0xA0) | 2239 | else |
| 2240 | { | 2240 | { |
| 2241 | /* SJIS -> JISX0208 */ | ||
| 2242 | if (sjis_p) | 2241 | if (sjis_p) |
| 2243 | { | 2242 | { |
| 2244 | ONE_MORE_BYTE (c2); | 2243 | if (c1 < 0xA0 || (c1 >= 0xE0 && c1 < 0xF0)) |
| 2245 | if (c2 >= 0x40) | ||
| 2246 | { | 2244 | { |
| 2247 | DECODE_SJIS (c1, c2, c3, c4); | 2245 | /* SJIS -> JISX0208 */ |
| 2248 | DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4); | 2246 | ONE_MORE_BYTE (c2); |
| 2247 | if (c2 >= 0x40) | ||
| 2248 | { | ||
| 2249 | DECODE_SJIS (c1, c2, c3, c4); | ||
| 2250 | DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4); | ||
| 2251 | } | ||
| 2252 | else | ||
| 2253 | goto label_invalid_code_2; | ||
| 2249 | } | 2254 | } |
| 2255 | else if (c1 < 0xE0) | ||
| 2256 | /* SJIS -> JISX0201-Kana */ | ||
| 2257 | DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1, | ||
| 2258 | /* dummy */ c2); | ||
| 2250 | else | 2259 | else |
| 2251 | goto label_invalid_code_2; | 2260 | goto label_invalid_code_1; |
| 2252 | } | 2261 | } |
| 2253 | else | 2262 | else |
| 2254 | goto label_invalid_code_1; | ||
| 2255 | } | ||
| 2256 | else if (c1 < 0xE0) | ||
| 2257 | { | ||
| 2258 | /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */ | ||
| 2259 | if (sjis_p) | ||
| 2260 | DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1, | ||
| 2261 | /* dummy */ c2); | ||
| 2262 | else | ||
| 2263 | { | 2263 | { |
| 2264 | int charset; | 2264 | /* BIG5 -> Big5 */ |
| 2265 | 2265 | if (c1 >= 0xA1 && c1 <= 0xFE) | |
| 2266 | ONE_MORE_BYTE (c2); | ||
| 2267 | if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) | ||
| 2268 | { | 2266 | { |
| 2269 | DECODE_BIG5 (c1, c2, charset, c3, c4); | 2267 | ONE_MORE_BYTE (c2); |
| 2270 | DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); | 2268 | if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) |
| 2271 | } | 2269 | { |
| 2272 | else | 2270 | int charset; |
| 2273 | goto label_invalid_code_2; | ||
| 2274 | } | ||
| 2275 | } | ||
| 2276 | else /* C1 >= 0xE0 */ | ||
| 2277 | { | ||
| 2278 | /* SJIS -> JISX0208, BIG5 -> Big5 */ | ||
| 2279 | if (sjis_p) | ||
| 2280 | { | ||
| 2281 | ONE_MORE_BYTE (c2); | ||
| 2282 | if (c2 >= 0x40) | ||
| 2283 | { | ||
| 2284 | DECODE_SJIS (c1, c2, c3, c4); | ||
| 2285 | DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4); | ||
| 2286 | } | ||
| 2287 | else | ||
| 2288 | goto label_invalid_code_2; | ||
| 2289 | } | ||
| 2290 | else | ||
| 2291 | { | ||
| 2292 | int charset; | ||
| 2293 | 2271 | ||
| 2294 | ONE_MORE_BYTE (c2); | 2272 | DECODE_BIG5 (c1, c2, charset, c3, c4); |
| 2295 | if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) | 2273 | DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); |
| 2296 | { | 2274 | } |
| 2297 | DECODE_BIG5 (c1, c2, charset, c3, c4); | 2275 | else |
| 2298 | DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); | 2276 | goto label_invalid_code_2; |
| 2299 | } | 2277 | } |
| 2300 | else | 2278 | else |
| 2301 | goto label_invalid_code_2; | 2279 | goto label_invalid_code_1; |
| 2302 | } | 2280 | } |
| 2303 | } | 2281 | } |
| 2304 | continue; | 2282 | continue; |
| @@ -3087,6 +3065,32 @@ setup_coding_system (coding_system, coding) | |||
| 3087 | return -1; | 3065 | return -1; |
| 3088 | } | 3066 | } |
| 3089 | 3067 | ||
| 3068 | /* Setup raw-text or one of its subsidiaries in the structure | ||
| 3069 | coding_system CODING according to the already setup value eol_type | ||
| 3070 | in CODING. CODING should be setup for some coding system in | ||
| 3071 | advance. */ | ||
| 3072 | |||
| 3073 | void | ||
| 3074 | setup_raw_text_coding_system (coding) | ||
| 3075 | struct coding_system *coding; | ||
| 3076 | { | ||
| 3077 | if (coding->type != coding_type_raw_text) | ||
| 3078 | { | ||
| 3079 | coding->symbol = Qraw_text; | ||
| 3080 | coding->type = coding_type_raw_text; | ||
| 3081 | if (coding->eol_type != CODING_EOL_UNDECIDED) | ||
| 3082 | { | ||
| 3083 | Lisp_Object subsidiaries = Fget (Qraw_text, Qeol_type); | ||
| 3084 | |||
| 3085 | if (VECTORP (subsidiaries) | ||
| 3086 | && XVECTOR (subsidiaries)->size == 3) | ||
| 3087 | coding->symbol | ||
| 3088 | = XVECTOR (subsidiaries)->contents[coding->eol_type]; | ||
| 3089 | } | ||
| 3090 | } | ||
| 3091 | return; | ||
| 3092 | } | ||
| 3093 | |||
| 3090 | /* Emacs has a mechanism to automatically detect a coding system if it | 3094 | /* Emacs has a mechanism to automatically detect a coding system if it |
| 3091 | is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But, | 3095 | is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But, |
| 3092 | it's impossible to distinguish some coding systems accurately | 3096 | it's impossible to distinguish some coding systems accurately |