diff options
| author | Kenichi Handa | 2008-03-16 01:24:55 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2008-03-16 01:24:55 +0000 |
| commit | db274c7a061fd40e54fdd3b4a590959bda03c48e (patch) | |
| tree | 439e71eda5dc606a0bf9a3cc54c4756f003df6da | |
| parent | b2fba0132f2714bbcf13984833aad43f24035b85 (diff) | |
| download | emacs-db274c7a061fd40e54fdd3b4a590959bda03c48e.tar.gz emacs-db274c7a061fd40e54fdd3b4a590959bda03c48e.zip | |
(CHAR_STRING_ADVANCE_NO_UNIFY)
(STRING_CHAR_ADVANCE_NO_UNIFY): New macros.
(coding_alloc_by_making_gap): Fix the way to preserve data in the
gap.
(alloc_destination): Fix the 2nd arg to
coding_alloc_by_making_gap.
(encode_coding_utf_8): Use CHAR_STRING_ADVANCE_NO_UNIFY instead of
CHAR_STRING_ADVANCE.
(produce_chars): Fix for the case that the source and the
destination are the same buffer. Use CHAR_STRING_ADVANCE_NO_UNIFY
instead of CHAR_STRING_ADVANCE.
(consume_chars): Use STRING_CHAR_ADVANCE_NO_UNIFY instead of
STRING_CHAR_ADVANCE.
| -rw-r--r-- | src/coding.c | 125 |
1 files changed, 105 insertions, 20 deletions
diff --git a/src/coding.c b/src/coding.c index e6f544c8f87..901f81a9247 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -972,6 +972,66 @@ record_conversion_result (struct coding_system *coding, | |||
| 972 | } while (0) | 972 | } while (0) |
| 973 | 973 | ||
| 974 | 974 | ||
| 975 | /* Store multibyte form of the character C in P, and advance P to the | ||
| 976 | end of the multibyte form. This is like CHAR_STRING_ADVANCE but it | ||
| 977 | never calls MAYBE_UNIFY_CHAR. */ | ||
| 978 | |||
| 979 | #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) \ | ||
| 980 | do { \ | ||
| 981 | if ((c) <= MAX_1_BYTE_CHAR) \ | ||
| 982 | *(p)++ = (c); \ | ||
| 983 | else if ((c) <= MAX_2_BYTE_CHAR) \ | ||
| 984 | *(p)++ = (0xC0 | ((c) >> 6)), \ | ||
| 985 | *(p)++ = (0x80 | ((c) & 0x3F)); \ | ||
| 986 | else if ((c) <= MAX_3_BYTE_CHAR) \ | ||
| 987 | *(p)++ = (0xE0 | ((c) >> 12)), \ | ||
| 988 | *(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \ | ||
| 989 | *(p)++ = (0x80 | ((c) & 0x3F)); \ | ||
| 990 | else if ((c) <= MAX_4_BYTE_CHAR) \ | ||
| 991 | *(p)++ = (0xF0 | (c >> 18)), \ | ||
| 992 | *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \ | ||
| 993 | *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \ | ||
| 994 | *(p)++ = (0x80 | (c & 0x3F)); \ | ||
| 995 | else if ((c) <= MAX_5_BYTE_CHAR) \ | ||
| 996 | *(p)++ = 0xF8, \ | ||
| 997 | *(p)++ = (0x80 | ((c >> 18) & 0x0F)), \ | ||
| 998 | *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \ | ||
| 999 | *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \ | ||
| 1000 | *(p)++ = (0x80 | (c & 0x3F)); \ | ||
| 1001 | else \ | ||
| 1002 | (p) += BYTE8_STRING ((c) - 0x3FFF80, p); \ | ||
| 1003 | } while (0) | ||
| 1004 | |||
| 1005 | |||
| 1006 | /* Return the character code of character whose multibyte form is at | ||
| 1007 | P, and advance P to the end of the multibyte form. This is like | ||
| 1008 | STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR. */ | ||
| 1009 | |||
| 1010 | #define STRING_CHAR_ADVANCE_NO_UNIFY(p) \ | ||
| 1011 | (!((p)[0] & 0x80) \ | ||
| 1012 | ? *(p)++ \ | ||
| 1013 | : ! ((p)[0] & 0x20) \ | ||
| 1014 | ? ((p) += 2, \ | ||
| 1015 | ((((p)[-2] & 0x1F) << 6) \ | ||
| 1016 | | ((p)[-1] & 0x3F) \ | ||
| 1017 | | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \ | ||
| 1018 | : ! ((p)[0] & 0x10) \ | ||
| 1019 | ? ((p) += 3, \ | ||
| 1020 | ((((p)[-3] & 0x0F) << 12) \ | ||
| 1021 | | (((p)[-2] & 0x3F) << 6) \ | ||
| 1022 | | ((p)[-1] & 0x3F))) \ | ||
| 1023 | : ! ((p)[0] & 0x08) \ | ||
| 1024 | ? ((p) += 4, \ | ||
| 1025 | ((((p)[-4] & 0xF) << 18) \ | ||
| 1026 | | (((p)[-3] & 0x3F) << 12) \ | ||
| 1027 | | (((p)[-2] & 0x3F) << 6) \ | ||
| 1028 | | ((p)[-1] & 0x3F))) \ | ||
| 1029 | : ((p) += 5, \ | ||
| 1030 | ((((p)[-4] & 0x3F) << 18) \ | ||
| 1031 | | (((p)[-3] & 0x3F) << 12) \ | ||
| 1032 | | (((p)[-2] & 0x3F) << 6) \ | ||
| 1033 | | ((p)[-1] & 0x3F)))) | ||
| 1034 | |||
| 975 | 1035 | ||
| 976 | static void | 1036 | static void |
| 977 | coding_set_source (coding) | 1037 | coding_set_source (coding) |
| @@ -1037,20 +1097,23 @@ coding_alloc_by_realloc (coding, bytes) | |||
| 1037 | } | 1097 | } |
| 1038 | 1098 | ||
| 1039 | static void | 1099 | static void |
| 1040 | coding_alloc_by_making_gap (coding, offset, bytes) | 1100 | coding_alloc_by_making_gap (coding, gap_head_used, bytes) |
| 1041 | struct coding_system *coding; | 1101 | struct coding_system *coding; |
| 1042 | EMACS_INT offset, bytes; | 1102 | EMACS_INT gap_head_used, bytes; |
| 1043 | { | 1103 | { |
| 1044 | if (BUFFERP (coding->dst_object) | 1104 | if (EQ (coding->src_object, coding->dst_object)) |
| 1045 | && EQ (coding->src_object, coding->dst_object)) | ||
| 1046 | { | 1105 | { |
| 1047 | EMACS_INT add = offset + (coding->src_bytes - coding->consumed); | 1106 | /* The gap may contain the produced data at the head and not-yet |
| 1107 | consumed data at the tail. To preserve those data, we at | ||
| 1108 | first make the gap size to zero, then increase the gap | ||
| 1109 | size. */ | ||
| 1110 | EMACS_INT add = GAP_SIZE; | ||
| 1048 | 1111 | ||
| 1049 | GPT += offset, GPT_BYTE += offset; | 1112 | GPT += gap_head_used, GPT_BYTE += gap_head_used; |
| 1050 | GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add; | 1113 | GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add; |
| 1051 | make_gap (bytes); | 1114 | make_gap (bytes); |
| 1052 | GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add; | 1115 | GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add; |
| 1053 | GPT -= offset, GPT_BYTE -= offset; | 1116 | GPT -= gap_head_used, GPT_BYTE -= gap_head_used; |
| 1054 | } | 1117 | } |
| 1055 | else | 1118 | else |
| 1056 | { | 1119 | { |
| @@ -1073,7 +1136,11 @@ alloc_destination (coding, nbytes, dst) | |||
| 1073 | EMACS_INT offset = dst - coding->destination; | 1136 | EMACS_INT offset = dst - coding->destination; |
| 1074 | 1137 | ||
| 1075 | if (BUFFERP (coding->dst_object)) | 1138 | if (BUFFERP (coding->dst_object)) |
| 1076 | coding_alloc_by_making_gap (coding, offset, nbytes); | 1139 | { |
| 1140 | struct buffer *buf = XBUFFER (coding->dst_object); | ||
| 1141 | |||
| 1142 | coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes); | ||
| 1143 | } | ||
| 1077 | else | 1144 | else |
| 1078 | coding_alloc_by_realloc (coding, nbytes); | 1145 | coding_alloc_by_realloc (coding, nbytes); |
| 1079 | record_conversion_result (coding, CODING_RESULT_SUCCESS); | 1146 | record_conversion_result (coding, CODING_RESULT_SUCCESS); |
| @@ -1365,7 +1432,7 @@ encode_coding_utf_8 (coding) | |||
| 1365 | } | 1432 | } |
| 1366 | else | 1433 | else |
| 1367 | { | 1434 | { |
| 1368 | CHAR_STRING_ADVANCE (c, pend); | 1435 | CHAR_STRING_ADVANCE_NO_UNIFY (c, pend); |
| 1369 | for (p = str; p < pend; p++) | 1436 | for (p = str; p < pend; p++) |
| 1370 | EMIT_ONE_BYTE (*p); | 1437 | EMIT_ONE_BYTE (*p); |
| 1371 | } | 1438 | } |
| @@ -1382,7 +1449,7 @@ encode_coding_utf_8 (coding) | |||
| 1382 | if (CHAR_BYTE8_P (c)) | 1449 | if (CHAR_BYTE8_P (c)) |
| 1383 | *dst++ = CHAR_TO_BYTE8 (c); | 1450 | *dst++ = CHAR_TO_BYTE8 (c); |
| 1384 | else | 1451 | else |
| 1385 | dst += CHAR_STRING (c, dst); | 1452 | CHAR_STRING_ADVANCE_NO_UNIFY (c, dst); |
| 1386 | produced_chars++; | 1453 | produced_chars++; |
| 1387 | } | 1454 | } |
| 1388 | } | 1455 | } |
| @@ -5971,9 +6038,11 @@ produce_chars (coding, translation_table, last_block) | |||
| 5971 | int *buf = coding->charbuf; | 6038 | int *buf = coding->charbuf; |
| 5972 | int *buf_end = buf + coding->charbuf_used; | 6039 | int *buf_end = buf + coding->charbuf_used; |
| 5973 | 6040 | ||
| 5974 | if (BUFFERP (coding->src_object) | 6041 | if (EQ (coding->src_object, coding->dst_object)) |
| 5975 | && EQ (coding->src_object, coding->dst_object)) | 6042 | { |
| 5976 | dst_end = ((unsigned char *) coding->source) + coding->consumed; | 6043 | coding_set_source (coding); |
| 6044 | dst_end = ((unsigned char *) coding->source) + coding->consumed; | ||
| 6045 | } | ||
| 5977 | 6046 | ||
| 5978 | while (buf < buf_end) | 6047 | while (buf < buf_end) |
| 5979 | { | 6048 | { |
| @@ -6000,7 +6069,13 @@ produce_chars (coding, translation_table, last_block) | |||
| 6000 | buf_end - buf | 6069 | buf_end - buf |
| 6001 | + MAX_MULTIBYTE_LENGTH * to_nchars, | 6070 | + MAX_MULTIBYTE_LENGTH * to_nchars, |
| 6002 | dst); | 6071 | dst); |
| 6003 | dst_end = coding->destination + coding->dst_bytes; | 6072 | if (EQ (coding->src_object, coding->dst_object)) |
| 6073 | { | ||
| 6074 | coding_set_source (coding); | ||
| 6075 | dst_end = ((unsigned char *) coding->source) + coding->consumed; | ||
| 6076 | } | ||
| 6077 | else | ||
| 6078 | dst_end = coding->destination + coding->dst_bytes; | ||
| 6004 | } | 6079 | } |
| 6005 | 6080 | ||
| 6006 | for (i = 0; i < to_nchars; i++) | 6081 | for (i = 0; i < to_nchars; i++) |
| @@ -6009,7 +6084,7 @@ produce_chars (coding, translation_table, last_block) | |||
| 6009 | c = XINT (AREF (trans, i)); | 6084 | c = XINT (AREF (trans, i)); |
| 6010 | if (coding->dst_multibyte | 6085 | if (coding->dst_multibyte |
| 6011 | || ! CHAR_BYTE8_P (c)) | 6086 | || ! CHAR_BYTE8_P (c)) |
| 6012 | CHAR_STRING_ADVANCE (c, dst); | 6087 | CHAR_STRING_ADVANCE_NO_UNIFY (c, dst); |
| 6013 | else | 6088 | else |
| 6014 | *dst++ = CHAR_TO_BYTE8 (c); | 6089 | *dst++ = CHAR_TO_BYTE8 (c); |
| 6015 | } | 6090 | } |
| @@ -6030,6 +6105,8 @@ produce_chars (coding, translation_table, last_block) | |||
| 6030 | const unsigned char *src = coding->source; | 6105 | const unsigned char *src = coding->source; |
| 6031 | const unsigned char *src_end = src + coding->consumed; | 6106 | const unsigned char *src_end = src + coding->consumed; |
| 6032 | 6107 | ||
| 6108 | if (EQ (coding->dst_object, coding->src_object)) | ||
| 6109 | dst_end = (unsigned char *) src; | ||
| 6033 | if (coding->src_multibyte != coding->dst_multibyte) | 6110 | if (coding->src_multibyte != coding->dst_multibyte) |
| 6034 | { | 6111 | { |
| 6035 | if (coding->src_multibyte) | 6112 | if (coding->src_multibyte) |
| @@ -6057,6 +6134,8 @@ produce_chars (coding, translation_table, last_block) | |||
| 6057 | coding_set_source (coding); | 6134 | coding_set_source (coding); |
| 6058 | src = coding->source + offset; | 6135 | src = coding->source + offset; |
| 6059 | src_end = coding->source + coding->src_bytes; | 6136 | src_end = coding->source + coding->src_bytes; |
| 6137 | if (EQ (coding->src_object, coding->dst_object)) | ||
| 6138 | dst_end = (unsigned char *) src; | ||
| 6060 | } | 6139 | } |
| 6061 | } | 6140 | } |
| 6062 | *dst++ = c; | 6141 | *dst++ = c; |
| @@ -6078,13 +6157,19 @@ produce_chars (coding, translation_table, last_block) | |||
| 6078 | if (dst >= dst_end - 1) | 6157 | if (dst >= dst_end - 1) |
| 6079 | { | 6158 | { |
| 6080 | EMACS_INT offset = src - coding->source; | 6159 | EMACS_INT offset = src - coding->source; |
| 6160 | EMACS_INT more_bytes; | ||
| 6081 | 6161 | ||
| 6082 | dst = alloc_destination (coding, src_end - src + 2, | 6162 | if (EQ (coding->src_object, coding->dst_object)) |
| 6083 | dst); | 6163 | more_bytes = ((src_end - src) / 2) + 2; |
| 6164 | else | ||
| 6165 | more_bytes = src_end - src + 2; | ||
| 6166 | dst = alloc_destination (coding, more_bytes, dst); | ||
| 6084 | dst_end = coding->destination + coding->dst_bytes; | 6167 | dst_end = coding->destination + coding->dst_bytes; |
| 6085 | coding_set_source (coding); | 6168 | coding_set_source (coding); |
| 6086 | src = coding->source + offset; | 6169 | src = coding->source + offset; |
| 6087 | src_end = coding->source + coding->src_bytes; | 6170 | src_end = coding->source + coding->src_bytes; |
| 6171 | if (EQ (coding->src_object, coding->dst_object)) | ||
| 6172 | dst_end = (unsigned char *) src; | ||
| 6088 | } | 6173 | } |
| 6089 | } | 6174 | } |
| 6090 | EMIT_ONE_BYTE (c); | 6175 | EMIT_ONE_BYTE (c); |
| @@ -6572,12 +6657,12 @@ consume_chars (coding, translation_table, max_lookup) | |||
| 6572 | if (coding->encoder == encode_coding_raw_text) | 6657 | if (coding->encoder == encode_coding_raw_text) |
| 6573 | c = *src++, pos++; | 6658 | c = *src++, pos++; |
| 6574 | else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0) | 6659 | else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0) |
| 6575 | c = STRING_CHAR_ADVANCE (src), pos += bytes; | 6660 | c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes; |
| 6576 | else | 6661 | else |
| 6577 | c = BYTE8_TO_CHAR (*src), src++, pos++; | 6662 | c = BYTE8_TO_CHAR (*src), src++, pos++; |
| 6578 | } | 6663 | } |
| 6579 | else | 6664 | else |
| 6580 | c = STRING_CHAR_ADVANCE (src), pos++; | 6665 | c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++; |
| 6581 | if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)) | 6666 | if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)) |
| 6582 | c = '\n'; | 6667 | c = '\n'; |
| 6583 | if (! EQ (eol_type, Qunix)) | 6668 | if (! EQ (eol_type, Qunix)) |