diff options
| author | Kenichi Handa | 1998-09-26 04:20:48 +0000 |
|---|---|---|
| committer | Kenichi Handa | 1998-09-26 04:20:48 +0000 |
| commit | de79a6a5ed49e728d1ee62efd9b1542cb72c095d (patch) | |
| tree | 3f8e9f4c034b93a63065244b5ab0b68d37fa10a2 /src/coding.c | |
| parent | 450c60a5597beb1aea1a549f53baece4e7d26983 (diff) | |
| download | emacs-de79a6a5ed49e728d1ee62efd9b1542cb72c095d.tar.gz emacs-de79a6a5ed49e728d1ee62efd9b1542cb72c095d.zip | |
(check_composing_code): If the current composing
sequence doesn't end properly, return -1.
(DECODE_CHARACTER_ASCII): Update coding->composed_chars.
(DECODE_CHARACTER_DIMENSION1): Likewise.
(decode_coding_iso2022): Check validity of a composing sequence.
(code_convert_string): If the length of text to be converted is
shrunk to zero, don't perform code conversion.
(shrink_decoding_region): Fix previous change.
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 123 |
1 files changed, 82 insertions, 41 deletions
diff --git a/src/coding.c b/src/coding.c index fa2bbc620a0..5c3299b6b56 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -213,15 +213,18 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) | |||
| 213 | 213 | ||
| 214 | /* Decode one ASCII character C. */ | 214 | /* Decode one ASCII character C. */ |
| 215 | 215 | ||
| 216 | #define DECODE_CHARACTER_ASCII(c) \ | 216 | #define DECODE_CHARACTER_ASCII(c) \ |
| 217 | do { \ | 217 | do { \ |
| 218 | if (COMPOSING_P (coding->composing)) \ | 218 | if (COMPOSING_P (coding->composing)) \ |
| 219 | *dst++ = 0xA0, *dst++ = (c) | 0x80; \ | 219 | { \ |
| 220 | else \ | 220 | *dst++ = 0xA0, *dst++ = (c) | 0x80; \ |
| 221 | { \ | 221 | coding->composed_chars++; \ |
| 222 | *dst++ = (c); \ | 222 | } \ |
| 223 | coding->produced_char++; \ | 223 | else \ |
| 224 | } \ | 224 | { \ |
| 225 | *dst++ = (c); \ | ||
| 226 | coding->produced_char++; \ | ||
| 227 | } \ | ||
| 225 | } while (0) | 228 | } while (0) |
| 226 | 229 | ||
| 227 | /* Decode one DIMENSION1 character whose charset is CHARSET and whose | 230 | /* Decode one DIMENSION1 character whose charset is CHARSET and whose |
| @@ -231,7 +234,10 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) | |||
| 231 | do { \ | 234 | do { \ |
| 232 | unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset); \ | 235 | unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset); \ |
| 233 | if (COMPOSING_P (coding->composing)) \ | 236 | if (COMPOSING_P (coding->composing)) \ |
| 234 | *dst++ = leading_code + 0x20; \ | 237 | { \ |
| 238 | *dst++ = leading_code + 0x20; \ | ||
| 239 | coding->composed_chars++; \ | ||
| 240 | } \ | ||
| 235 | else \ | 241 | else \ |
| 236 | { \ | 242 | { \ |
| 237 | *dst++ = leading_code; \ | 243 | *dst++ = leading_code; \ |
| @@ -997,9 +1003,7 @@ check_composing_code (coding, src, src_end) | |||
| 997 | invalid_code_found = 1; | 1003 | invalid_code_found = 1; |
| 998 | } | 1004 | } |
| 999 | } | 1005 | } |
| 1000 | return (invalid_code_found | 1006 | return (invalid_code_found ? src - src_start : -1); |
| 1001 | ? src - src_start | ||
| 1002 | : (coding->mode & CODING_MODE_LAST_BLOCK ? 0 : -1)); | ||
| 1003 | } | 1007 | } |
| 1004 | 1008 | ||
| 1005 | /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ | 1009 | /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ |
| @@ -1030,6 +1034,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1030 | translation_table = Vstandard_translation_table_for_decode; | 1034 | translation_table = Vstandard_translation_table_for_decode; |
| 1031 | 1035 | ||
| 1032 | coding->produced_char = 0; | 1036 | coding->produced_char = 0; |
| 1037 | coding->composed_chars = 0; | ||
| 1033 | coding->fake_multibyte = 0; | 1038 | coding->fake_multibyte = 0; |
| 1034 | while (src < src_end && (dst_bytes | 1039 | while (src < src_end && (dst_bytes |
| 1035 | ? (dst < adjusted_dst_end) | 1040 | ? (dst < adjusted_dst_end) |
| @@ -1243,7 +1248,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1243 | coding->composing = (c1 == '0' | 1248 | coding->composing = (c1 == '0' |
| 1244 | ? COMPOSING_NO_RULE_HEAD | 1249 | ? COMPOSING_NO_RULE_HEAD |
| 1245 | : COMPOSING_WITH_RULE_HEAD); | 1250 | : COMPOSING_WITH_RULE_HEAD); |
| 1246 | coding->produced_char++; | 1251 | coding->composed_chars = 0; |
| 1247 | } | 1252 | } |
| 1248 | else if (result1 > 0) | 1253 | else if (result1 > 0) |
| 1249 | { | 1254 | { |
| @@ -1253,6 +1258,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1253 | src += result1; | 1258 | src += result1; |
| 1254 | dst += result1 + 2; | 1259 | dst += result1 + 2; |
| 1255 | coding->produced_char += result1 + 2; | 1260 | coding->produced_char += result1 + 2; |
| 1261 | coding->fake_multibyte = 1; | ||
| 1256 | } | 1262 | } |
| 1257 | else | 1263 | else |
| 1258 | { | 1264 | { |
| @@ -1266,6 +1272,28 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1266 | break; | 1272 | break; |
| 1267 | 1273 | ||
| 1268 | case '1': /* end composing */ | 1274 | case '1': /* end composing */ |
| 1275 | if (coding->composed_chars > 0) | ||
| 1276 | { | ||
| 1277 | if (coding->composed_chars == 1) | ||
| 1278 | { | ||
| 1279 | unsigned char *this_char_start = dst; | ||
| 1280 | int this_bytes; | ||
| 1281 | |||
| 1282 | /* Only one character is in the composing | ||
| 1283 | sequence. Make it a normal character. */ | ||
| 1284 | while (*--this_char_start != LEADING_CODE_COMPOSITION); | ||
| 1285 | dst = (this_char_start | ||
| 1286 | + (coding->composing == COMPOSING_NO_RULE_TAIL | ||
| 1287 | ? 1 : 2)); | ||
| 1288 | *dst -= 0x20; | ||
| 1289 | if (*dst == 0x80) | ||
| 1290 | *++dst &= 0x7F; | ||
| 1291 | this_bytes = BYTES_BY_CHAR_HEAD (*dst); | ||
| 1292 | while (this_bytes--) *this_char_start++ = *dst++; | ||
| 1293 | dst = this_char_start; | ||
| 1294 | } | ||
| 1295 | coding->produced_char++; | ||
| 1296 | } | ||
| 1269 | coding->composing = COMPOSING_NO; | 1297 | coding->composing = COMPOSING_NO; |
| 1270 | break; | 1298 | break; |
| 1271 | 1299 | ||
| @@ -3938,30 +3966,45 @@ shrink_decoding_region (beg, end, coding, str) | |||
| 3938 | 3966 | ||
| 3939 | case CODING_CATEGORY_IDX_ISO_7: | 3967 | case CODING_CATEGORY_IDX_ISO_7: |
| 3940 | case CODING_CATEGORY_IDX_ISO_7_TIGHT: | 3968 | case CODING_CATEGORY_IDX_ISO_7_TIGHT: |
| 3941 | /* We can skip all charactes at the tail except for ESC and | 3969 | { |
| 3942 | the following 2-byte at the tail. */ | 3970 | /* We can skip all charactes at the tail except for 8-bit |
| 3943 | if (eol_conversion) | 3971 | codes and ESC and the following 2-byte at the tail. */ |
| 3944 | while (begp < endp | 3972 | unsigned char *eight_bit = NULL; |
| 3945 | && (c = endp[-1]) != ISO_CODE_ESC && c != '\r') | 3973 | |
| 3946 | endp--; | 3974 | if (eol_conversion) |
| 3947 | else | 3975 | while (begp < endp |
| 3948 | while (begp < endp | 3976 | && (c = endp[-1]) != ISO_CODE_ESC && c != '\r') |
| 3949 | && (c = endp[-1]) != ISO_CODE_ESC) | 3977 | { |
| 3950 | endp--; | 3978 | if (!eight_bit && c & 0x80) eight_bit = endp; |
| 3951 | /* Do not consider LF as ascii if preceded by CR, since that | 3979 | endp--; |
| 3952 | confuses eol decoding. */ | 3980 | } |
| 3953 | if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n') | 3981 | else |
| 3954 | endp++; | 3982 | while (begp < endp |
| 3955 | if (begp < endp && endp[-1] == ISO_CODE_ESC) | 3983 | && (c = endp[-1]) != ISO_CODE_ESC) |
| 3956 | { | 3984 | { |
| 3957 | if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B') | 3985 | if (!eight_bit && c & 0x80) eight_bit = endp; |
| 3958 | /* This is an ASCII designation sequence. We can | 3986 | endp--; |
| 3959 | surely skip the tail. */ | 3987 | } |
| 3960 | endp += 2; | 3988 | /* Do not consider LF as ascii if preceded by CR, since that |
| 3961 | else | 3989 | confuses eol decoding. */ |
| 3962 | /* Hmmm, we can't skip the tail. */ | 3990 | if (begp < endp && endp < endp_orig |
| 3963 | endp = endp_orig; | 3991 | && endp[-1] == '\r' && endp[0] == '\n') |
| 3964 | } | 3992 | endp++; |
| 3993 | if (begp < endp && endp[-1] == ISO_CODE_ESC) | ||
| 3994 | { | ||
| 3995 | if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B') | ||
| 3996 | /* This is an ASCII designation sequence. We can | ||
| 3997 | surely skip the tail. But, if we have | ||
| 3998 | encountered an 8-bit code, skip only the codes | ||
| 3999 | after that. */ | ||
| 4000 | endp = eight_bit ? eight_bit : endp + 2; | ||
| 4001 | else | ||
| 4002 | /* Hmmm, we can't skip the tail. */ | ||
| 4003 | endp = endp_orig; | ||
| 4004 | } | ||
| 4005 | else if (eight_bit) | ||
| 4006 | endp = eight_bit; | ||
| 4007 | } | ||
| 3965 | } | 4008 | } |
| 3966 | } | 4009 | } |
| 3967 | *beg += begp - begp_orig; | 4010 | *beg += begp - begp_orig; |
| @@ -4524,9 +4567,7 @@ code_convert_string (str, coding, encodep, nocopy) | |||
| 4524 | else | 4567 | else |
| 4525 | shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data); | 4568 | shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data); |
| 4526 | } | 4569 | } |
| 4527 | if (from == to_byte | 4570 | if (from == to_byte) |
| 4528 | && ! (coding->mode & CODING_MODE_LAST_BLOCK | ||
| 4529 | && CODING_REQUIRE_FLUSHING (coding))) | ||
| 4530 | return (nocopy ? str : Fcopy_sequence (str)); | 4571 | return (nocopy ? str : Fcopy_sequence (str)); |
| 4531 | 4572 | ||
| 4532 | if (encodep) | 4573 | if (encodep) |