aboutsummaryrefslogtreecommitdiffstats
path: root/src/coding.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/coding.c')
-rw-r--r--src/coding.c123
1 files changed, 82 insertions, 41 deletions
diff --git a/src/coding.c b/src/coding.c
index fa2bbc620a0..5c3299b6b56 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -213,15 +213,18 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
213 213
214/* Decode one ASCII character C. */ 214/* Decode one ASCII character C. */
215 215
216#define DECODE_CHARACTER_ASCII(c) \ 216#define DECODE_CHARACTER_ASCII(c) \
217 do { \ 217 do { \
218 if (COMPOSING_P (coding->composing)) \ 218 if (COMPOSING_P (coding->composing)) \
219 *dst++ = 0xA0, *dst++ = (c) | 0x80; \ 219 { \
220 else \ 220 *dst++ = 0xA0, *dst++ = (c) | 0x80; \
221 { \ 221 coding->composed_chars++; \
222 *dst++ = (c); \ 222 } \
223 coding->produced_char++; \ 223 else \
224 } \ 224 { \
225 *dst++ = (c); \
226 coding->produced_char++; \
227 } \
225 } while (0) 228 } while (0)
226 229
227/* Decode one DIMENSION1 character whose charset is CHARSET and whose 230/* Decode one DIMENSION1 character whose charset is CHARSET and whose
@@ -231,7 +234,10 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
231 do { \ 234 do { \
232 unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset); \ 235 unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset); \
233 if (COMPOSING_P (coding->composing)) \ 236 if (COMPOSING_P (coding->composing)) \
234 *dst++ = leading_code + 0x20; \ 237 { \
238 *dst++ = leading_code + 0x20; \
239 coding->composed_chars++; \
240 } \
235 else \ 241 else \
236 { \ 242 { \
237 *dst++ = leading_code; \ 243 *dst++ = leading_code; \
@@ -997,9 +1003,7 @@ check_composing_code (coding, src, src_end)
997 invalid_code_found = 1; 1003 invalid_code_found = 1;
998 } 1004 }
999 } 1005 }
1000 return (invalid_code_found 1006 return (invalid_code_found ? src - src_start : -1);
1001 ? src - src_start
1002 : (coding->mode & CODING_MODE_LAST_BLOCK ? 0 : -1));
1003} 1007}
1004 1008
1005/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ 1009/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
@@ -1030,6 +1034,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1030 translation_table = Vstandard_translation_table_for_decode; 1034 translation_table = Vstandard_translation_table_for_decode;
1031 1035
1032 coding->produced_char = 0; 1036 coding->produced_char = 0;
1037 coding->composed_chars = 0;
1033 coding->fake_multibyte = 0; 1038 coding->fake_multibyte = 0;
1034 while (src < src_end && (dst_bytes 1039 while (src < src_end && (dst_bytes
1035 ? (dst < adjusted_dst_end) 1040 ? (dst < adjusted_dst_end)
@@ -1243,7 +1248,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1243 coding->composing = (c1 == '0' 1248 coding->composing = (c1 == '0'
1244 ? COMPOSING_NO_RULE_HEAD 1249 ? COMPOSING_NO_RULE_HEAD
1245 : COMPOSING_WITH_RULE_HEAD); 1250 : COMPOSING_WITH_RULE_HEAD);
1246 coding->produced_char++; 1251 coding->composed_chars = 0;
1247 } 1252 }
1248 else if (result1 > 0) 1253 else if (result1 > 0)
1249 { 1254 {
@@ -1253,6 +1258,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1253 src += result1; 1258 src += result1;
1254 dst += result1 + 2; 1259 dst += result1 + 2;
1255 coding->produced_char += result1 + 2; 1260 coding->produced_char += result1 + 2;
1261 coding->fake_multibyte = 1;
1256 } 1262 }
1257 else 1263 else
1258 { 1264 {
@@ -1266,6 +1272,28 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1266 break; 1272 break;
1267 1273
1268 case '1': /* end composing */ 1274 case '1': /* end composing */
1275 if (coding->composed_chars > 0)
1276 {
1277 if (coding->composed_chars == 1)
1278 {
1279 unsigned char *this_char_start = dst;
1280 int this_bytes;
1281
1282 /* Only one character is in the composing
1283 sequence. Make it a normal character. */
1284 while (*--this_char_start != LEADING_CODE_COMPOSITION);
1285 dst = (this_char_start
1286 + (coding->composing == COMPOSING_NO_RULE_TAIL
1287 ? 1 : 2));
1288 *dst -= 0x20;
1289 if (*dst == 0x80)
1290 *++dst &= 0x7F;
1291 this_bytes = BYTES_BY_CHAR_HEAD (*dst);
1292 while (this_bytes--) *this_char_start++ = *dst++;
1293 dst = this_char_start;
1294 }
1295 coding->produced_char++;
1296 }
1269 coding->composing = COMPOSING_NO; 1297 coding->composing = COMPOSING_NO;
1270 break; 1298 break;
1271 1299
@@ -3938,30 +3966,45 @@ shrink_decoding_region (beg, end, coding, str)
3938 3966
3939 case CODING_CATEGORY_IDX_ISO_7: 3967 case CODING_CATEGORY_IDX_ISO_7:
3940 case CODING_CATEGORY_IDX_ISO_7_TIGHT: 3968 case CODING_CATEGORY_IDX_ISO_7_TIGHT:
3941 /* We can skip all charactes at the tail except for ESC and 3969 {
3942 the following 2-byte at the tail. */ 3970 /* We can skip all charactes at the tail except for 8-bit
3943 if (eol_conversion) 3971 codes and ESC and the following 2-byte at the tail. */
3944 while (begp < endp 3972 unsigned char *eight_bit = NULL;
3945 && (c = endp[-1]) != ISO_CODE_ESC && c != '\r') 3973
3946 endp--; 3974 if (eol_conversion)
3947 else 3975 while (begp < endp
3948 while (begp < endp 3976 && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
3949 && (c = endp[-1]) != ISO_CODE_ESC) 3977 {
3950 endp--; 3978 if (!eight_bit && c & 0x80) eight_bit = endp;
3951 /* Do not consider LF as ascii if preceded by CR, since that 3979 endp--;
3952 confuses eol decoding. */ 3980 }
3953 if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n') 3981 else
3954 endp++; 3982 while (begp < endp
3955 if (begp < endp && endp[-1] == ISO_CODE_ESC) 3983 && (c = endp[-1]) != ISO_CODE_ESC)
3956 { 3984 {
3957 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B') 3985 if (!eight_bit && c & 0x80) eight_bit = endp;
3958 /* This is an ASCII designation sequence. We can 3986 endp--;
3959 surely skip the tail. */ 3987 }
3960 endp += 2; 3988 /* Do not consider LF as ascii if preceded by CR, since that
3961 else 3989 confuses eol decoding. */
3962 /* Hmmm, we can't skip the tail. */ 3990 if (begp < endp && endp < endp_orig
3963 endp = endp_orig; 3991 && endp[-1] == '\r' && endp[0] == '\n')
3964 } 3992 endp++;
3993 if (begp < endp && endp[-1] == ISO_CODE_ESC)
3994 {
3995 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
3996 /* This is an ASCII designation sequence. We can
3997 surely skip the tail. But, if we have
3998 encountered an 8-bit code, skip only the codes
3999 after that. */
4000 endp = eight_bit ? eight_bit : endp + 2;
4001 else
4002 /* Hmmm, we can't skip the tail. */
4003 endp = endp_orig;
4004 }
4005 else if (eight_bit)
4006 endp = eight_bit;
4007 }
3965 } 4008 }
3966 } 4009 }
3967 *beg += begp - begp_orig; 4010 *beg += begp - begp_orig;
@@ -4524,9 +4567,7 @@ code_convert_string (str, coding, encodep, nocopy)
4524 else 4567 else
4525 shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data); 4568 shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4526 } 4569 }
4527 if (from == to_byte 4570 if (from == to_byte)
4528 && ! (coding->mode & CODING_MODE_LAST_BLOCK
4529 && CODING_REQUIRE_FLUSHING (coding)))
4530 return (nocopy ? str : Fcopy_sequence (str)); 4571 return (nocopy ? str : Fcopy_sequence (str));
4531 4572
4532 if (encodep) 4573 if (encodep)