aboutsummaryrefslogtreecommitdiffstats
path: root/src/coding.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/coding.c')
-rw-r--r--src/coding.c125
1 files changed, 105 insertions, 20 deletions
diff --git a/src/coding.c b/src/coding.c
index e6f544c8f87..901f81a9247 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -972,6 +972,66 @@ record_conversion_result (struct coding_system *coding,
972 } while (0) 972 } while (0)
973 973
974 974
975/* Store multibyte form of the character C in P, and advance P to the
976 end of the multibyte form. This is like CHAR_STRING_ADVANCE but it
977 never calls MAYBE_UNIFY_CHAR. */
978
979#define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) \
980 do { \
981 if ((c) <= MAX_1_BYTE_CHAR) \
982 *(p)++ = (c); \
983 else if ((c) <= MAX_2_BYTE_CHAR) \
984 *(p)++ = (0xC0 | ((c) >> 6)), \
985 *(p)++ = (0x80 | ((c) & 0x3F)); \
986 else if ((c) <= MAX_3_BYTE_CHAR) \
987 *(p)++ = (0xE0 | ((c) >> 12)), \
988 *(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \
989 *(p)++ = (0x80 | ((c) & 0x3F)); \
990 else if ((c) <= MAX_4_BYTE_CHAR) \
991 *(p)++ = (0xF0 | (c >> 18)), \
992 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
993 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
994 *(p)++ = (0x80 | (c & 0x3F)); \
995 else if ((c) <= MAX_5_BYTE_CHAR) \
996 *(p)++ = 0xF8, \
997 *(p)++ = (0x80 | ((c >> 18) & 0x0F)), \
998 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
999 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
1000 *(p)++ = (0x80 | (c & 0x3F)); \
1001 else \
1002 (p) += BYTE8_STRING ((c) - 0x3FFF80, p); \
1003 } while (0)
1004
1005
1006/* Return the character code of character whose multibyte form is at
1007 P, and advance P to the end of the multibyte form. This is like
1008 STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR. */
1009
1010#define STRING_CHAR_ADVANCE_NO_UNIFY(p) \
1011 (!((p)[0] & 0x80) \
1012 ? *(p)++ \
1013 : ! ((p)[0] & 0x20) \
1014 ? ((p) += 2, \
1015 ((((p)[-2] & 0x1F) << 6) \
1016 | ((p)[-1] & 0x3F) \
1017 | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \
1018 : ! ((p)[0] & 0x10) \
1019 ? ((p) += 3, \
1020 ((((p)[-3] & 0x0F) << 12) \
1021 | (((p)[-2] & 0x3F) << 6) \
1022 | ((p)[-1] & 0x3F))) \
1023 : ! ((p)[0] & 0x08) \
1024 ? ((p) += 4, \
1025 ((((p)[-4] & 0xF) << 18) \
1026 | (((p)[-3] & 0x3F) << 12) \
1027 | (((p)[-2] & 0x3F) << 6) \
1028 | ((p)[-1] & 0x3F))) \
1029 : ((p) += 5, \
1030 ((((p)[-4] & 0x3F) << 18) \
1031 | (((p)[-3] & 0x3F) << 12) \
1032 | (((p)[-2] & 0x3F) << 6) \
1033 | ((p)[-1] & 0x3F))))
1034
975 1035
976static void 1036static void
977coding_set_source (coding) 1037coding_set_source (coding)
@@ -1037,20 +1097,23 @@ coding_alloc_by_realloc (coding, bytes)
1037} 1097}
1038 1098
1039static void 1099static void
1040coding_alloc_by_making_gap (coding, offset, bytes) 1100coding_alloc_by_making_gap (coding, gap_head_used, bytes)
1041 struct coding_system *coding; 1101 struct coding_system *coding;
1042 EMACS_INT offset, bytes; 1102 EMACS_INT gap_head_used, bytes;
1043{ 1103{
1044 if (BUFFERP (coding->dst_object) 1104 if (EQ (coding->src_object, coding->dst_object))
1045 && EQ (coding->src_object, coding->dst_object))
1046 { 1105 {
1047 EMACS_INT add = offset + (coding->src_bytes - coding->consumed); 1106 /* The gap may contain the produced data at the head and not-yet
1107 consumed data at the tail. To preserve those data, we at
1108 first make the gap size to zero, then increase the gap
1109 size. */
1110 EMACS_INT add = GAP_SIZE;
1048 1111
1049 GPT += offset, GPT_BYTE += offset; 1112 GPT += gap_head_used, GPT_BYTE += gap_head_used;
1050 GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add; 1113 GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1051 make_gap (bytes); 1114 make_gap (bytes);
1052 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add; 1115 GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1053 GPT -= offset, GPT_BYTE -= offset; 1116 GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1054 } 1117 }
1055 else 1118 else
1056 { 1119 {
@@ -1073,7 +1136,11 @@ alloc_destination (coding, nbytes, dst)
1073 EMACS_INT offset = dst - coding->destination; 1136 EMACS_INT offset = dst - coding->destination;
1074 1137
1075 if (BUFFERP (coding->dst_object)) 1138 if (BUFFERP (coding->dst_object))
1076 coding_alloc_by_making_gap (coding, offset, nbytes); 1139 {
1140 struct buffer *buf = XBUFFER (coding->dst_object);
1141
1142 coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1143 }
1077 else 1144 else
1078 coding_alloc_by_realloc (coding, nbytes); 1145 coding_alloc_by_realloc (coding, nbytes);
1079 record_conversion_result (coding, CODING_RESULT_SUCCESS); 1146 record_conversion_result (coding, CODING_RESULT_SUCCESS);
@@ -1365,7 +1432,7 @@ encode_coding_utf_8 (coding)
1365 } 1432 }
1366 else 1433 else
1367 { 1434 {
1368 CHAR_STRING_ADVANCE (c, pend); 1435 CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1369 for (p = str; p < pend; p++) 1436 for (p = str; p < pend; p++)
1370 EMIT_ONE_BYTE (*p); 1437 EMIT_ONE_BYTE (*p);
1371 } 1438 }
@@ -1382,7 +1449,7 @@ encode_coding_utf_8 (coding)
1382 if (CHAR_BYTE8_P (c)) 1449 if (CHAR_BYTE8_P (c))
1383 *dst++ = CHAR_TO_BYTE8 (c); 1450 *dst++ = CHAR_TO_BYTE8 (c);
1384 else 1451 else
1385 dst += CHAR_STRING (c, dst); 1452 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1386 produced_chars++; 1453 produced_chars++;
1387 } 1454 }
1388 } 1455 }
@@ -5971,9 +6038,11 @@ produce_chars (coding, translation_table, last_block)
5971 int *buf = coding->charbuf; 6038 int *buf = coding->charbuf;
5972 int *buf_end = buf + coding->charbuf_used; 6039 int *buf_end = buf + coding->charbuf_used;
5973 6040
5974 if (BUFFERP (coding->src_object) 6041 if (EQ (coding->src_object, coding->dst_object))
5975 && EQ (coding->src_object, coding->dst_object)) 6042 {
5976 dst_end = ((unsigned char *) coding->source) + coding->consumed; 6043 coding_set_source (coding);
6044 dst_end = ((unsigned char *) coding->source) + coding->consumed;
6045 }
5977 6046
5978 while (buf < buf_end) 6047 while (buf < buf_end)
5979 { 6048 {
@@ -6000,7 +6069,13 @@ produce_chars (coding, translation_table, last_block)
6000 buf_end - buf 6069 buf_end - buf
6001 + MAX_MULTIBYTE_LENGTH * to_nchars, 6070 + MAX_MULTIBYTE_LENGTH * to_nchars,
6002 dst); 6071 dst);
6003 dst_end = coding->destination + coding->dst_bytes; 6072 if (EQ (coding->src_object, coding->dst_object))
6073 {
6074 coding_set_source (coding);
6075 dst_end = ((unsigned char *) coding->source) + coding->consumed;
6076 }
6077 else
6078 dst_end = coding->destination + coding->dst_bytes;
6004 } 6079 }
6005 6080
6006 for (i = 0; i < to_nchars; i++) 6081 for (i = 0; i < to_nchars; i++)
@@ -6009,7 +6084,7 @@ produce_chars (coding, translation_table, last_block)
6009 c = XINT (AREF (trans, i)); 6084 c = XINT (AREF (trans, i));
6010 if (coding->dst_multibyte 6085 if (coding->dst_multibyte
6011 || ! CHAR_BYTE8_P (c)) 6086 || ! CHAR_BYTE8_P (c))
6012 CHAR_STRING_ADVANCE (c, dst); 6087 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6013 else 6088 else
6014 *dst++ = CHAR_TO_BYTE8 (c); 6089 *dst++ = CHAR_TO_BYTE8 (c);
6015 } 6090 }
@@ -6030,6 +6105,8 @@ produce_chars (coding, translation_table, last_block)
6030 const unsigned char *src = coding->source; 6105 const unsigned char *src = coding->source;
6031 const unsigned char *src_end = src + coding->consumed; 6106 const unsigned char *src_end = src + coding->consumed;
6032 6107
6108 if (EQ (coding->dst_object, coding->src_object))
6109 dst_end = (unsigned char *) src;
6033 if (coding->src_multibyte != coding->dst_multibyte) 6110 if (coding->src_multibyte != coding->dst_multibyte)
6034 { 6111 {
6035 if (coding->src_multibyte) 6112 if (coding->src_multibyte)
@@ -6057,6 +6134,8 @@ produce_chars (coding, translation_table, last_block)
6057 coding_set_source (coding); 6134 coding_set_source (coding);
6058 src = coding->source + offset; 6135 src = coding->source + offset;
6059 src_end = coding->source + coding->src_bytes; 6136 src_end = coding->source + coding->src_bytes;
6137 if (EQ (coding->src_object, coding->dst_object))
6138 dst_end = (unsigned char *) src;
6060 } 6139 }
6061 } 6140 }
6062 *dst++ = c; 6141 *dst++ = c;
@@ -6078,13 +6157,19 @@ produce_chars (coding, translation_table, last_block)
6078 if (dst >= dst_end - 1) 6157 if (dst >= dst_end - 1)
6079 { 6158 {
6080 EMACS_INT offset = src - coding->source; 6159 EMACS_INT offset = src - coding->source;
6160 EMACS_INT more_bytes;
6081 6161
6082 dst = alloc_destination (coding, src_end - src + 2, 6162 if (EQ (coding->src_object, coding->dst_object))
6083 dst); 6163 more_bytes = ((src_end - src) / 2) + 2;
6164 else
6165 more_bytes = src_end - src + 2;
6166 dst = alloc_destination (coding, more_bytes, dst);
6084 dst_end = coding->destination + coding->dst_bytes; 6167 dst_end = coding->destination + coding->dst_bytes;
6085 coding_set_source (coding); 6168 coding_set_source (coding);
6086 src = coding->source + offset; 6169 src = coding->source + offset;
6087 src_end = coding->source + coding->src_bytes; 6170 src_end = coding->source + coding->src_bytes;
6171 if (EQ (coding->src_object, coding->dst_object))
6172 dst_end = (unsigned char *) src;
6088 } 6173 }
6089 } 6174 }
6090 EMIT_ONE_BYTE (c); 6175 EMIT_ONE_BYTE (c);
@@ -6572,12 +6657,12 @@ consume_chars (coding, translation_table, max_lookup)
6572 if (coding->encoder == encode_coding_raw_text) 6657 if (coding->encoder == encode_coding_raw_text)
6573 c = *src++, pos++; 6658 c = *src++, pos++;
6574 else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0) 6659 else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
6575 c = STRING_CHAR_ADVANCE (src), pos += bytes; 6660 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
6576 else 6661 else
6577 c = BYTE8_TO_CHAR (*src), src++, pos++; 6662 c = BYTE8_TO_CHAR (*src), src++, pos++;
6578 } 6663 }
6579 else 6664 else
6580 c = STRING_CHAR_ADVANCE (src), pos++; 6665 c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
6581 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)) 6666 if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6582 c = '\n'; 6667 c = '\n';
6583 if (! EQ (eol_type, Qunix)) 6668 if (! EQ (eol_type, Qunix))