aboutsummaryrefslogtreecommitdiffstats
path: root/src/coding.c
diff options
context:
space:
mode:
authorKenichi Handa2004-03-10 23:11:18 +0000
committerKenichi Handa2004-03-10 23:11:18 +0000
commit69a80ea3340f36a6a47c0fdc4c1d146d93810950 (patch)
tree65a003da81169383739b6b33c8fd12c8627f42d0 /src/coding.c
parent6e76ae9103e7dd95d4b690b512ed8728eacae676 (diff)
downloademacs-69a80ea3340f36a6a47c0fdc4c1d146d93810950.tar.gz
emacs-69a80ea3340f36a6a47c0fdc4c1d146d93810950.zip
(MAX_ANNOTATION_LENGTH): Adjusted for the change of
annotation data format. (ADD_ANNOTATION_DATA, ADD_COMPOSITION_DATA, ADD_CHARSET_DATA): Change arguments FROM and TO to single argument NCHARS. Caller changed. (decode_coding_utf_8): Pay attention to coding->charbuf_used. (decode_coding_utf_16, decode_coding_emacs_mule) (decode_coding_iso_2022, decode_coding_sjis, decode_coding_big5) (decode_coding_ccl, decode_coding_charset): Likewise. (get_translation): New function. (produce_chars): New arguments translation_table and last_block. Translate characters here. Return number of carryover chars. Caller changed. (produce_composition): New argument pos. Caller changed. Adjusted for the change of annotation data format. (produce_charset, produce_annotation): Likewise. (decode_coding): Don't call translate_chars.
Diffstat (limited to 'src/coding.c')
-rw-r--r--src/coding.c269
1 files changed, 165 insertions, 104 deletions
diff --git a/src/coding.c b/src/coding.c
index bde7ee586e6..e18c6f6c511 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -212,8 +212,8 @@ decode_coding_XXXX (coding)
212 when there's no room in CHARBUF for a decoded character. */ 212 when there's no room in CHARBUF for a decoded character. */
213 unsigned char *src_base; 213 unsigned char *src_base;
214 /* A buffer to produce decoded characters. */ 214 /* A buffer to produce decoded characters. */
215 int *charbuf = coding->charbuf; 215 int *charbuf = coding->charbuf + coding->charbuf_used;
216 int *charbuf_end = charbuf + coding->charbuf_size; 216 int *charbuf_end = coding->charbuf + coding->charbuf_size;
217 int multibytep = coding->src_multibyte; 217 int multibytep = coding->src_multibyte;
218 218
219 while (1) 219 while (1)
@@ -1025,15 +1025,14 @@ alloc_destination (coding, nbytes, dst)
1025 1025
1026/* Maximum length of annotation data (sum of annotations for 1026/* Maximum length of annotation data (sum of annotations for
1027 composition and charset). */ 1027 composition and charset). */
1028#define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5) 1028#define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
1029 1029
1030/* An annotation data is stored in the array coding->charbuf in this 1030/* An annotation data is stored in the array coding->charbuf in this
1031 format: 1031 format:
1032 [ -LENGTH ANNOTATION_MASK FROM TO ... ] 1032 [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1033 LENGTH is the number of elements in the annotation. 1033 LENGTH is the number of elements in the annotation.
1034 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK. 1034 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1035 FROM and TO specify the range of text annotated. They are relative 1035 NCHARS is the number of characters in the text annotated.
1036 to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
1037 1036
1038 The format of the following elements depend on ANNOTATION_MASK. 1037 The format of the following elements depend on ANNOTATION_MASK.
1039 1038
@@ -1047,26 +1046,25 @@ alloc_destination (coding, nbytes, dst)
1047 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID 1046 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1048 follows. */ 1047 follows. */
1049 1048
1050#define ADD_ANNOTATION_DATA(buf, len, mask, from, to) \ 1049#define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \
1051 do { \ 1050 do { \
1052 *(buf)++ = -(len); \ 1051 *(buf)++ = -(len); \
1053 *(buf)++ = (mask); \ 1052 *(buf)++ = (mask); \
1054 *(buf)++ = (from); \ 1053 *(buf)++ = (nchars); \
1055 *(buf)++ = (to); \
1056 coding->annotated = 1; \ 1054 coding->annotated = 1; \
1057 } while (0); 1055 } while (0);
1058 1056
1059#define ADD_COMPOSITION_DATA(buf, from, to, method) \ 1057#define ADD_COMPOSITION_DATA(buf, nchars, method) \
1060 do { \ 1058 do { \
1061 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \ 1059 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1062 *buf++ = method; \ 1060 *buf++ = method; \
1063 } while (0) 1061 } while (0)
1064 1062
1065 1063
1066#define ADD_CHARSET_DATA(buf, from, to, id) \ 1064#define ADD_CHARSET_DATA(buf, nchars, id) \
1067 do { \ 1065 do { \
1068 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \ 1066 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1069 *buf++ = id; \ 1067 *buf++ = id; \
1070 } while (0) 1068 } while (0)
1071 1069
1072 1070
@@ -1166,8 +1164,8 @@ decode_coding_utf_8 (coding)
1166 const unsigned char *src = coding->source + coding->consumed; 1164 const unsigned char *src = coding->source + coding->consumed;
1167 const unsigned char *src_end = coding->source + coding->src_bytes; 1165 const unsigned char *src_end = coding->source + coding->src_bytes;
1168 const unsigned char *src_base; 1166 const unsigned char *src_base;
1169 int *charbuf = coding->charbuf; 1167 int *charbuf = coding->charbuf + coding->charbuf_used;
1170 int *charbuf_end = charbuf + coding->charbuf_size; 1168 int *charbuf_end = coding->charbuf + coding->charbuf_size;
1171 int consumed_chars = 0, consumed_chars_base; 1169 int consumed_chars = 0, consumed_chars_base;
1172 int multibytep = coding->src_multibyte; 1170 int multibytep = coding->src_multibyte;
1173 Lisp_Object attr, charset_list; 1171 Lisp_Object attr, charset_list;
@@ -1413,8 +1411,8 @@ decode_coding_utf_16 (coding)
1413 const unsigned char *src = coding->source + coding->consumed; 1411 const unsigned char *src = coding->source + coding->consumed;
1414 const unsigned char *src_end = coding->source + coding->src_bytes; 1412 const unsigned char *src_end = coding->source + coding->src_bytes;
1415 const unsigned char *src_base; 1413 const unsigned char *src_base;
1416 int *charbuf = coding->charbuf; 1414 int *charbuf = coding->charbuf + coding->charbuf_used;
1417 int *charbuf_end = charbuf + coding->charbuf_size; 1415 int *charbuf_end = coding->charbuf + coding->charbuf_size;
1418 int consumed_chars = 0, consumed_chars_base; 1416 int consumed_chars = 0, consumed_chars_base;
1419 int multibytep = coding->src_multibyte; 1417 int multibytep = coding->src_multibyte;
1420 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding); 1418 enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
@@ -1921,7 +1919,6 @@ detect_coding_emacs_mule (coding, detect_info)
1921 number of characters composed by this composition. */ \ 1919 number of characters composed by this composition. */ \
1922 enum composition_method method = c - 0xF2; \ 1920 enum composition_method method = c - 0xF2; \
1923 int *charbuf_base = charbuf; \ 1921 int *charbuf_base = charbuf; \
1924 int from, to; \
1925 int consumed_chars_limit; \ 1922 int consumed_chars_limit; \
1926 int nbytes, nchars; \ 1923 int nbytes, nchars; \
1927 \ 1924 \
@@ -1935,9 +1932,7 @@ detect_coding_emacs_mule (coding, detect_info)
1935 if (c < 0) \ 1932 if (c < 0) \
1936 goto invalid_code; \ 1933 goto invalid_code; \
1937 nchars = c - 0xA0; \ 1934 nchars = c - 0xA0; \
1938 from = coding->produced + char_offset; \ 1935 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
1939 to = from + nchars; \
1940 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
1941 consumed_chars_limit = consumed_chars_base + nbytes; \ 1936 consumed_chars_limit = consumed_chars_base + nbytes; \
1942 if (method != COMPOSITION_RELATIVE) \ 1937 if (method != COMPOSITION_RELATIVE) \
1943 { \ 1938 { \
@@ -1965,7 +1960,6 @@ detect_coding_emacs_mule (coding, detect_info)
1965 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \ 1960 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1966 int *buf = components; \ 1961 int *buf = components; \
1967 int i, j; \ 1962 int i, j; \
1968 int from, to; \
1969 \ 1963 \
1970 src = src_base; \ 1964 src = src_base; \
1971 ONE_MORE_BYTE (c); /* skip 0x80 */ \ 1965 ONE_MORE_BYTE (c); /* skip 0x80 */ \
@@ -1973,9 +1967,7 @@ detect_coding_emacs_mule (coding, detect_info)
1973 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ 1967 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1974 if (i < 2) \ 1968 if (i < 2) \
1975 goto invalid_code; \ 1969 goto invalid_code; \
1976 from = coding->produced_char + char_offset; \ 1970 ADD_COMPOSITION_DATA (charbuf, i, method); \
1977 to = from + i; \
1978 ADD_COMPOSITION_DATA (charbuf, from, to, method); \
1979 for (j = 0; j < i; j++) \ 1971 for (j = 0; j < i; j++) \
1980 *charbuf++ = components[j]; \ 1972 *charbuf++ = components[j]; \
1981 } while (0) 1973 } while (0)
@@ -1989,7 +1981,6 @@ detect_coding_emacs_mule (coding, detect_info)
1989 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \ 1981 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
1990 int *buf = components; \ 1982 int *buf = components; \
1991 int i, j; \ 1983 int i, j; \
1992 int from, to; \
1993 \ 1984 \
1994 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ 1985 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1995 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \ 1986 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
@@ -2001,9 +1992,7 @@ detect_coding_emacs_mule (coding, detect_info)
2001 goto invalid_code; \ 1992 goto invalid_code; \
2002 if (charbuf + i + (i / 2) + 1 < charbuf_end) \ 1993 if (charbuf + i + (i / 2) + 1 < charbuf_end) \
2003 goto no_more_source; \ 1994 goto no_more_source; \
2004 from = coding->produced_char + char_offset; \ 1995 ADD_COMPOSITION_DATA (buf, i, method); \
2005 to = from + i; \
2006 ADD_COMPOSITION_DATA (buf, from, to, method); \
2007 for (j = 0; j < i; j++) \ 1996 for (j = 0; j < i; j++) \
2008 *charbuf++ = components[j]; \ 1997 *charbuf++ = components[j]; \
2009 for (j = 0; j < i; j += 2) \ 1998 for (j = 0; j < i; j += 2) \
@@ -2018,8 +2007,9 @@ decode_coding_emacs_mule (coding)
2018 const unsigned char *src = coding->source + coding->consumed; 2007 const unsigned char *src = coding->source + coding->consumed;
2019 const unsigned char *src_end = coding->source + coding->src_bytes; 2008 const unsigned char *src_end = coding->source + coding->src_bytes;
2020 const unsigned char *src_base; 2009 const unsigned char *src_base;
2021 int *charbuf = coding->charbuf; 2010 int *charbuf = coding->charbuf + coding->charbuf_used;
2022 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; 2011 int *charbuf_end
2012 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
2023 int consumed_chars = 0, consumed_chars_base; 2013 int consumed_chars = 0, consumed_chars_base;
2024 int multibytep = coding->src_multibyte; 2014 int multibytep = coding->src_multibyte;
2025 Lisp_Object attrs, charset_list; 2015 Lisp_Object attrs, charset_list;
@@ -2082,7 +2072,7 @@ decode_coding_emacs_mule (coding)
2082 if (last_id != id) 2072 if (last_id != id)
2083 { 2073 {
2084 if (last_id != charset_ascii) 2074 if (last_id != charset_ascii)
2085 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); 2075 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2086 last_id = id; 2076 last_id = id;
2087 last_offset = char_offset; 2077 last_offset = char_offset;
2088 } 2078 }
@@ -2104,7 +2094,7 @@ decode_coding_emacs_mule (coding)
2104 2094
2105 no_more_source: 2095 no_more_source:
2106 if (last_id != charset_ascii) 2096 if (last_id != charset_ascii)
2107 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); 2097 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2108 coding->consumed_char += consumed_chars_base; 2098 coding->consumed_char += consumed_chars_base;
2109 coding->consumed = src_base - coding->source; 2099 coding->consumed = src_base - coding->source;
2110 coding->charbuf_used = charbuf - coding->charbuf; 2100 coding->charbuf_used = charbuf - coding->charbuf;
@@ -2810,10 +2800,8 @@ detect_coding_iso_2022 (coding, detect_info)
2810 : (component_idx + 1) / 2); \ 2800 : (component_idx + 1) / 2); \
2811 int i; \ 2801 int i; \
2812 int *saved_charbuf = charbuf; \ 2802 int *saved_charbuf = charbuf; \
2813 int from = char_offset; \
2814 int to = from + nchars; \
2815 \ 2803 \
2816 ADD_COMPOSITION_DATA (charbuf, from, to, method); \ 2804 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
2817 if (method != COMPOSITION_RELATIVE) \ 2805 if (method != COMPOSITION_RELATIVE) \
2818 { \ 2806 { \
2819 if (component_len == 0) \ 2807 if (component_len == 0) \
@@ -2869,9 +2857,9 @@ decode_coding_iso_2022 (coding)
2869 const unsigned char *src = coding->source + coding->consumed; 2857 const unsigned char *src = coding->source + coding->consumed;
2870 const unsigned char *src_end = coding->source + coding->src_bytes; 2858 const unsigned char *src_end = coding->source + coding->src_bytes;
2871 const unsigned char *src_base; 2859 const unsigned char *src_base;
2872 int *charbuf = coding->charbuf; 2860 int *charbuf = coding->charbuf + coding->charbuf_used;
2873 int *charbuf_end 2861 int *charbuf_end
2874 = charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH; 2862 = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
2875 int consumed_chars = 0, consumed_chars_base; 2863 int consumed_chars = 0, consumed_chars_base;
2876 int multibytep = coding->src_multibyte; 2864 int multibytep = coding->src_multibyte;
2877 /* Charsets invoked to graphic plane 0 and 1 respectively. */ 2865 /* Charsets invoked to graphic plane 0 and 1 respectively. */
@@ -3224,7 +3212,7 @@ decode_coding_iso_2022 (coding)
3224 && last_id != charset->id) 3212 && last_id != charset->id)
3225 { 3213 {
3226 if (last_id != charset_ascii) 3214 if (last_id != charset_ascii)
3227 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); 3215 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3228 last_id = charset->id; 3216 last_id = charset->id;
3229 last_offset = char_offset; 3217 last_offset = char_offset;
3230 } 3218 }
@@ -3293,7 +3281,7 @@ decode_coding_iso_2022 (coding)
3293 3281
3294 no_more_source: 3282 no_more_source:
3295 if (last_id != charset_ascii) 3283 if (last_id != charset_ascii)
3296 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); 3284 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3297 coding->consumed_char += consumed_chars_base; 3285 coding->consumed_char += consumed_chars_base;
3298 coding->consumed = src_base - coding->source; 3286 coding->consumed = src_base - coding->source;
3299 coding->charbuf_used = charbuf - coding->charbuf; 3287 coding->charbuf_used = charbuf - coding->charbuf;
@@ -3995,8 +3983,9 @@ decode_coding_sjis (coding)
3995 const unsigned char *src = coding->source + coding->consumed; 3983 const unsigned char *src = coding->source + coding->consumed;
3996 const unsigned char *src_end = coding->source + coding->src_bytes; 3984 const unsigned char *src_end = coding->source + coding->src_bytes;
3997 const unsigned char *src_base; 3985 const unsigned char *src_base;
3998 int *charbuf = coding->charbuf; 3986 int *charbuf = coding->charbuf + coding->charbuf_used;
3999 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; 3987 int *charbuf_end
3988 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4000 int consumed_chars = 0, consumed_chars_base; 3989 int consumed_chars = 0, consumed_chars_base;
4001 int multibytep = coding->src_multibyte; 3990 int multibytep = coding->src_multibyte;
4002 struct charset *charset_roman, *charset_kanji, *charset_kana; 3991 struct charset *charset_roman, *charset_kanji, *charset_kana;
@@ -4064,7 +4053,7 @@ decode_coding_sjis (coding)
4064 && last_id != charset->id) 4053 && last_id != charset->id)
4065 { 4054 {
4066 if (last_id != charset_ascii) 4055 if (last_id != charset_ascii)
4067 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); 4056 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4068 last_id = charset->id; 4057 last_id = charset->id;
4069 last_offset = char_offset; 4058 last_offset = char_offset;
4070 } 4059 }
@@ -4084,7 +4073,7 @@ decode_coding_sjis (coding)
4084 4073
4085 no_more_source: 4074 no_more_source:
4086 if (last_id != charset_ascii) 4075 if (last_id != charset_ascii)
4087 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); 4076 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4088 coding->consumed_char += consumed_chars_base; 4077 coding->consumed_char += consumed_chars_base;
4089 coding->consumed = src_base - coding->source; 4078 coding->consumed = src_base - coding->source;
4090 coding->charbuf_used = charbuf - coding->charbuf; 4079 coding->charbuf_used = charbuf - coding->charbuf;
@@ -4097,8 +4086,9 @@ decode_coding_big5 (coding)
4097 const unsigned char *src = coding->source + coding->consumed; 4086 const unsigned char *src = coding->source + coding->consumed;
4098 const unsigned char *src_end = coding->source + coding->src_bytes; 4087 const unsigned char *src_end = coding->source + coding->src_bytes;
4099 const unsigned char *src_base; 4088 const unsigned char *src_base;
4100 int *charbuf = coding->charbuf; 4089 int *charbuf = coding->charbuf + coding->charbuf_used;
4101 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; 4090 int *charbuf_end
4091 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4102 int consumed_chars = 0, consumed_chars_base; 4092 int consumed_chars = 0, consumed_chars_base;
4103 int multibytep = coding->src_multibyte; 4093 int multibytep = coding->src_multibyte;
4104 struct charset *charset_roman, *charset_big5; 4094 struct charset *charset_roman, *charset_big5;
@@ -4144,7 +4134,7 @@ decode_coding_big5 (coding)
4144 && last_id != charset->id) 4134 && last_id != charset->id)
4145 { 4135 {
4146 if (last_id != charset_ascii) 4136 if (last_id != charset_ascii)
4147 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); 4137 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4148 last_id = charset->id; 4138 last_id = charset->id;
4149 last_offset = char_offset; 4139 last_offset = char_offset;
4150 } 4140 }
@@ -4164,7 +4154,7 @@ decode_coding_big5 (coding)
4164 4154
4165 no_more_source: 4155 no_more_source:
4166 if (last_id != charset_ascii) 4156 if (last_id != charset_ascii)
4167 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); 4157 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4168 coding->consumed_char += consumed_chars_base; 4158 coding->consumed_char += consumed_chars_base;
4169 coding->consumed = src_base - coding->source; 4159 coding->consumed = src_base - coding->source;
4170 coding->charbuf_used = charbuf - coding->charbuf; 4160 coding->charbuf_used = charbuf - coding->charbuf;
@@ -4396,8 +4386,8 @@ decode_coding_ccl (coding)
4396{ 4386{
4397 const unsigned char *src = coding->source + coding->consumed; 4387 const unsigned char *src = coding->source + coding->consumed;
4398 const unsigned char *src_end = coding->source + coding->src_bytes; 4388 const unsigned char *src_end = coding->source + coding->src_bytes;
4399 int *charbuf = coding->charbuf; 4389 int *charbuf = coding->charbuf + coding->charbuf_used;
4400 int *charbuf_end = charbuf + coding->charbuf_size; 4390 int *charbuf_end = coding->charbuf + coding->charbuf_size;
4401 int consumed_chars = 0; 4391 int consumed_chars = 0;
4402 int multibytep = coding->src_multibyte; 4392 int multibytep = coding->src_multibyte;
4403 struct ccl_program ccl; 4393 struct ccl_program ccl;
@@ -4683,8 +4673,9 @@ decode_coding_charset (coding)
4683 const unsigned char *src = coding->source + coding->consumed; 4673 const unsigned char *src = coding->source + coding->consumed;
4684 const unsigned char *src_end = coding->source + coding->src_bytes; 4674 const unsigned char *src_end = coding->source + coding->src_bytes;
4685 const unsigned char *src_base; 4675 const unsigned char *src_base;
4686 int *charbuf = coding->charbuf; 4676 int *charbuf = coding->charbuf + coding->charbuf_used;
4687 int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; 4677 int *charbuf_end
4678 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4688 int consumed_chars = 0, consumed_chars_base; 4679 int consumed_chars = 0, consumed_chars_base;
4689 int multibytep = coding->src_multibyte; 4680 int multibytep = coding->src_multibyte;
4690 Lisp_Object attrs, charset_list, valids; 4681 Lisp_Object attrs, charset_list, valids;
@@ -4759,7 +4750,7 @@ decode_coding_charset (coding)
4759 && last_id != charset->id) 4750 && last_id != charset->id)
4760 { 4751 {
4761 if (last_id != charset_ascii) 4752 if (last_id != charset_ascii)
4762 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); 4753 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4763 last_id = charset->id; 4754 last_id = charset->id;
4764 last_offset = char_offset; 4755 last_offset = char_offset;
4765 } 4756 }
@@ -4779,7 +4770,7 @@ decode_coding_charset (coding)
4779 4770
4780 no_more_source: 4771 no_more_source:
4781 if (last_id != charset_ascii) 4772 if (last_id != charset_ascii)
4782 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); 4773 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4783 coding->consumed_char += consumed_chars_base; 4774 coding->consumed_char += consumed_chars_base;
4784 coding->consumed = src_base - coding->source; 4775 coding->consumed = src_base - coding->source;
4785 coding->charbuf_used = charbuf - coding->charbuf; 4776 coding->charbuf_used = charbuf - coding->charbuf;
@@ -5573,53 +5564,108 @@ translate_chars (coding, table)
5573 } 5564 }
5574} 5565}
5575 5566
5567static Lisp_Object
5568get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
5569 Lisp_Object val;
5570 int *buf, *buf_end;
5571 int last_block;
5572 int *from_nchars, *to_nchars;
5573{
5574 /* VAL is TO-CHAR, [TO-CHAR ...], ([FROM-CHAR ...] . TO-CHAR), or
5575 ([FROM-CHAR ...] . [TO-CHAR ...]). */
5576 if (CONSP (val))
5577 {
5578 Lisp_Object from;
5579 int i, len;
5580
5581 from = XCAR (val);
5582 val = XCDR (val);
5583 len = ASIZE (from);
5584 for (i = 0; i < len; i++)
5585 {
5586 if (buf + i == buf_end)
5587 return (last_block ? Qnil : Qt);
5588 if (XINT (AREF (from, i)) != buf[i])
5589 return Qnil;
5590 }
5591 *from_nchars = len;
5592 }
5593 if (VECTORP (val))
5594 *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
5595 else
5596 *buf = XINT (val);
5597 return val;
5598}
5599
5600
5576static int 5601static int
5577produce_chars (coding) 5602produce_chars (coding, translation_table, last_block)
5578 struct coding_system *coding; 5603 struct coding_system *coding;
5604 Lisp_Object translation_table;
5605 int last_block;
5579{ 5606{
5580 unsigned char *dst = coding->destination + coding->produced; 5607 unsigned char *dst = coding->destination + coding->produced;
5581 unsigned char *dst_end = coding->destination + coding->dst_bytes; 5608 unsigned char *dst_end = coding->destination + coding->dst_bytes;
5582 int produced; 5609 int produced;
5583 int produced_chars = 0; 5610 int produced_chars = 0;
5611 int carryover = 0;
5584 5612
5585 if (! coding->chars_at_source) 5613 if (! coding->chars_at_source)
5586 { 5614 {
5587 /* Characters are in coding->charbuf. */ 5615 /* Characters are in coding->charbuf. */
5588 int *buf = coding->charbuf; 5616 int *buf = coding->charbuf;
5589 int *buf_end = buf + coding->charbuf_used; 5617 int *buf_end = buf + coding->charbuf_used;
5590 unsigned char *adjusted_dst_end;
5591 5618
5592 if (BUFFERP (coding->src_object) 5619 if (BUFFERP (coding->src_object)
5593 && EQ (coding->src_object, coding->dst_object)) 5620 && EQ (coding->src_object, coding->dst_object))
5594 dst_end = ((unsigned char *) coding->source) + coding->consumed; 5621 dst_end = ((unsigned char *) coding->source) + coding->consumed;
5595 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5596 5622
5597 while (buf < buf_end) 5623 while (buf < buf_end)
5598 { 5624 {
5599 int c = *buf++; 5625 int c = *buf, i;
5600 5626
5601 if (dst >= adjusted_dst_end)
5602 {
5603 dst = alloc_destination (coding,
5604 buf_end - buf + MAX_MULTIBYTE_LENGTH,
5605 dst);
5606 dst_end = coding->destination + coding->dst_bytes;
5607 adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5608 }
5609 if (c >= 0) 5627 if (c >= 0)
5610 { 5628 {
5611 if (coding->dst_multibyte 5629 int from_nchars = 1, to_nchars = 1;
5612 || ! CHAR_BYTE8_P (c)) 5630 Lisp_Object trans = Qnil;
5613 CHAR_STRING_ADVANCE (c, dst); 5631
5614 else 5632 if (! NILP (translation_table)
5615 *dst++ = CHAR_TO_BYTE8 (c); 5633 && ! NILP (trans = CHAR_TABLE_REF (translation_table, c)))
5616 produced_chars++; 5634 {
5635 trans = get_translation (trans, buf, buf_end, last_block,
5636 &from_nchars, &to_nchars);
5637 if (EQ (trans, Qt))
5638 break;
5639 c = *buf;
5640 }
5641
5642 if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
5643 {
5644 dst = alloc_destination (coding,
5645 buf_end - buf
5646 + MAX_MULTIBYTE_LENGTH * to_nchars,
5647 dst);
5648 dst_end = coding->destination + coding->dst_bytes;
5649 }
5650
5651 for (i = 0; i < to_nchars; i++, c = XINT (AREF (trans, i)))
5652 {
5653 if (coding->dst_multibyte
5654 || ! CHAR_BYTE8_P (c))
5655 CHAR_STRING_ADVANCE (c, dst);
5656 else
5657 *dst++ = CHAR_TO_BYTE8 (c);
5658 }
5659 produced_chars += to_nchars;
5660 *buf++ = to_nchars;
5661 while (--from_nchars > 0)
5662 *buf++ = 0;
5617 } 5663 }
5618 else 5664 else
5619 /* This is an annotation datum. (-C) is the length of 5665 /* This is an annotation datum. (-C) is the length. */
5620 it. */ 5666 buf += -c;
5621 buf += -c - 1;
5622 } 5667 }
5668 carryover = buf_end - buf;
5623 } 5669 }
5624 else 5670 else
5625 { 5671 {
@@ -5761,7 +5807,7 @@ produce_chars (coding)
5761 insert_from_gap (produced_chars, produced); 5807 insert_from_gap (produced_chars, produced);
5762 coding->produced += produced; 5808 coding->produced += produced;
5763 coding->produced_char += produced_chars; 5809 coding->produced_char += produced_chars;
5764 return produced_chars; 5810 return carryover;
5765} 5811}
5766 5812
5767/* Compose text in CODING->object according to the annotation data at 5813/* Compose text in CODING->object according to the annotation data at
@@ -5770,19 +5816,19 @@ produce_chars (coding)
5770 */ 5816 */
5771 5817
5772static INLINE void 5818static INLINE void
5773produce_composition (coding, charbuf) 5819produce_composition (coding, charbuf, pos)
5774 struct coding_system *coding; 5820 struct coding_system *coding;
5775 int *charbuf; 5821 int *charbuf;
5822 EMACS_INT pos;
5776{ 5823{
5777 int len; 5824 int len;
5778 EMACS_INT from, to; 5825 EMACS_INT to;
5779 enum composition_method method; 5826 enum composition_method method;
5780 Lisp_Object components; 5827 Lisp_Object components;
5781 5828
5782 len = -charbuf[0]; 5829 len = -charbuf[0];
5783 from = coding->dst_pos + charbuf[2]; 5830 to = pos + charbuf[2];
5784 to = coding->dst_pos + charbuf[3]; 5831 method = (enum composition_method) (charbuf[3]);
5785 method = (enum composition_method) (charbuf[4]);
5786 5832
5787 if (method == COMPOSITION_RELATIVE) 5833 if (method == COMPOSITION_RELATIVE)
5788 components = Qnil; 5834 components = Qnil;
@@ -5791,32 +5837,32 @@ produce_composition (coding, charbuf)
5791 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1]; 5837 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5792 int i; 5838 int i;
5793 5839
5794 len -= 5; 5840 len -= 4;
5795 charbuf += 5; 5841 charbuf += 4;
5796 for (i = 0; i < len; i++) 5842 for (i = 0; i < len; i++)
5797 args[i] = make_number (charbuf[i]); 5843 args[i] = make_number (charbuf[i]);
5798 components = (method == COMPOSITION_WITH_ALTCHARS 5844 components = (method == COMPOSITION_WITH_ALTCHARS
5799 ? Fstring (len, args) : Fvector (len, args)); 5845 ? Fstring (len, args) : Fvector (len, args));
5800 } 5846 }
5801 compose_text (from, to, components, Qnil, coding->dst_object); 5847 compose_text (pos, to, components, Qnil, coding->dst_object);
5802} 5848}
5803 5849
5804 5850
5805/* Put `charset' property on text in CODING->object according to 5851/* Put `charset' property on text in CODING->object according to
5806 the annotation data at CHARBUF. CHARBUF is an array: 5852 the annotation data at CHARBUF. CHARBUF is an array:
5807 [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ] 5853 [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
5808 */ 5854 */
5809 5855
5810static INLINE void 5856static INLINE void
5811produce_charset (coding, charbuf) 5857produce_charset (coding, charbuf, pos)
5812 struct coding_system *coding; 5858 struct coding_system *coding;
5813 int *charbuf; 5859 int *charbuf;
5860 EMACS_INT pos;
5814{ 5861{
5815 EMACS_INT from = coding->dst_pos + charbuf[2]; 5862 EMACS_INT from = pos - charbuf[2];
5816 EMACS_INT to = coding->dst_pos + charbuf[3]; 5863 struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
5817 struct charset *charset = CHARSET_FROM_ID (charbuf[4]);
5818 5864
5819 Fput_text_property (make_number (from), make_number (to), 5865 Fput_text_property (make_number (from), make_number (pos),
5820 Qcharset, CHARSET_NAME (charset), 5866 Qcharset, CHARSET_NAME (charset),
5821 coding->dst_object); 5867 coding->dst_object);
5822} 5868}
@@ -5846,8 +5892,9 @@ produce_charset (coding, charbuf)
5846 5892
5847 5893
5848static void 5894static void
5849produce_annotation (coding) 5895produce_annotation (coding, pos)
5850 struct coding_system *coding; 5896 struct coding_system *coding;
5897 EMACS_INT pos;
5851{ 5898{
5852 int *charbuf = coding->charbuf; 5899 int *charbuf = coding->charbuf;
5853 int *charbuf_end = charbuf + coding->charbuf_used; 5900 int *charbuf_end = charbuf + coding->charbuf_used;
@@ -5858,17 +5905,17 @@ produce_annotation (coding)
5858 while (charbuf < charbuf_end) 5905 while (charbuf < charbuf_end)
5859 { 5906 {
5860 if (*charbuf >= 0) 5907 if (*charbuf >= 0)
5861 charbuf++; 5908 pos += *charbuf++;
5862 else 5909 else
5863 { 5910 {
5864 int len = -*charbuf; 5911 int len = -*charbuf;
5865 switch (charbuf[1]) 5912 switch (charbuf[1])
5866 { 5913 {
5867 case CODING_ANNOTATE_COMPOSITION_MASK: 5914 case CODING_ANNOTATE_COMPOSITION_MASK:
5868 produce_composition (coding, charbuf); 5915 produce_composition (coding, charbuf, pos);
5869 break; 5916 break;
5870 case CODING_ANNOTATE_CHARSET_MASK: 5917 case CODING_ANNOTATE_CHARSET_MASK:
5871 produce_charset (coding, charbuf); 5918 produce_charset (coding, charbuf, pos);
5872 break; 5919 break;
5873 default: 5920 default:
5874 abort (); 5921 abort ();
@@ -5908,6 +5955,8 @@ decode_coding (coding)
5908 Lisp_Object attrs; 5955 Lisp_Object attrs;
5909 Lisp_Object undo_list; 5956 Lisp_Object undo_list;
5910 Lisp_Object translation_table; 5957 Lisp_Object translation_table;
5958 int carryover;
5959 int i;
5911 5960
5912 if (BUFFERP (coding->src_object) 5961 if (BUFFERP (coding->src_object)
5913 && coding->src_pos > 0 5962 && coding->src_pos > 0
@@ -5937,21 +5986,33 @@ decode_coding (coding)
5937 attrs = CODING_ID_ATTRS (coding->id); 5986 attrs = CODING_ID_ATTRS (coding->id);
5938 translation_table = get_translation_table (attrs, 0); 5987 translation_table = get_translation_table (attrs, 0);
5939 5988
5989 carryover = 0;
5940 do 5990 do
5941 { 5991 {
5992 EMACS_INT pos = coding->dst_pos + coding->produced_char;
5993
5942 coding_set_source (coding); 5994 coding_set_source (coding);
5943 coding->annotated = 0; 5995 coding->annotated = 0;
5996 coding->charbuf_used = carryover;
5944 (*(coding->decoder)) (coding); 5997 (*(coding->decoder)) (coding);
5945 if (!NILP (translation_table))
5946 translate_chars (coding, translation_table);
5947 coding_set_destination (coding); 5998 coding_set_destination (coding);
5948 produce_chars (coding); 5999 carryover = produce_chars (coding, translation_table, 0);
5949 if (coding->annotated) 6000 if (coding->annotated)
5950 produce_annotation (coding); 6001 produce_annotation (coding, pos);
6002 for (i = 0; i < carryover; i++)
6003 coding->charbuf[i]
6004 = coding->charbuf[coding->charbuf_used - carryover + i];
5951 } 6005 }
5952 while (coding->consumed < coding->src_bytes 6006 while (coding->consumed < coding->src_bytes
5953 && ! coding->result); 6007 && ! coding->result);
5954 6008
6009 if (carryover > 0)
6010 {
6011 coding_set_destination (coding);
6012 coding->charbuf_used = carryover;
6013 produce_chars (coding, translation_table, 1);
6014 }
6015
5955 coding->carryover_bytes = 0; 6016 coding->carryover_bytes = 0;
5956 if (coding->consumed < coding->src_bytes) 6017 if (coding->consumed < coding->src_bytes)
5957 { 6018 {
@@ -6036,7 +6097,7 @@ handle_composition_annotation (pos, limit, coding, buf, stop)
6036 enum composition_method method = COMPOSITION_METHOD (prop); 6097 enum composition_method method = COMPOSITION_METHOD (prop);
6037 int nchars = COMPOSITION_LENGTH (prop); 6098 int nchars = COMPOSITION_LENGTH (prop);
6038 6099
6039 ADD_COMPOSITION_DATA (buf, 0, nchars, method); 6100 ADD_COMPOSITION_DATA (buf, nchars, method);
6040 if (method != COMPOSITION_RELATIVE) 6101 if (method != COMPOSITION_RELATIVE)
6041 { 6102 {
6042 Lisp_Object components; 6103 Lisp_Object components;
@@ -6111,7 +6172,7 @@ handle_charset_annotation (pos, limit, coding, buf, stop)
6111 id = XINT (CHARSET_SYMBOL_ID (val)); 6172 id = XINT (CHARSET_SYMBOL_ID (val));
6112 else 6173 else
6113 id = -1; 6174 id = -1;
6114 ADD_CHARSET_DATA (buf, 0, 0, id); 6175 ADD_CHARSET_DATA (buf, 0, id);
6115 next = Fnext_single_property_change (make_number (pos), Qcharset, 6176 next = Fnext_single_property_change (make_number (pos), Qcharset,
6116 coding->src_object, 6177 coding->src_object,
6117 make_number (limit)); 6178 make_number (limit));