aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorKenichi Handa2009-03-06 07:51:52 +0000
committerKenichi Handa2009-03-06 07:51:52 +0000
commite951386e3862e77b38b96e00652b8ce4ba788aff (patch)
tree73c3c8e41ed92ade080a6bcf3ce759e00bfb1860 /src
parentc8644de0d406a99a4a493a0b78a1f3df62d4cfda (diff)
downloademacs-e951386e3862e77b38b96e00652b8ce4ba788aff.tar.gz
emacs-e951386e3862e77b38b96e00652b8ce4ba788aff.zip
(CODING_ISO_CMP_STATUS): New macro.
(CODING_ISO_EXTSEGMENT_LEN, CODING_ISO_EMBEDDED_UTF_8): New macros. (MAX_ANNOTATION_LENGTH): Defined to 5. (ADD_COMPOSITION_DATA): New arg nbytes. (emacs_mule_char): New arg cmp_status. (DECODE_EMACS_MULE_COMPOSITION_CHAR): Delete it. (DECODE_EMACS_MULE_COMPOSITION_RULE_20): New arg c. (DECODE_EMACS_MULE_COMPOSITION_RULE_21): New arg c. (DECODE_EMACS_MULE_21_COMPOSITION): Delete the arg c. (DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION): Likewise. (DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION): Likewise. (DECODE_EMACS_MULE_COMPOSITION_START): New macro. (EMACS_MULE_COMPOSITION_END): New macro. (emacs_mule_finish_composition): New function. (EMACS_MULE_MAYBE_FINISH_COMPOSITION): New macro. (decode_coding_emacs_mule): Avoid long looking ahead while handling composition. (DECODE_COMPOSITION_RULE): Argument changed to rule and nbytes. (ENCODE_COMPOSITION_RULE): New macro. (finish_composition): New function. (MAYBE_FINISH_COMPOSITION): Call finish_composition. (DECODE_COMPOSITION_START): New implementation. (DECODE_COMPOSITION_END): Likewise. (STORE_COMPOSITION_RULE): New macro. (decode_coding_iso_2022): Avoid long looking ahead while handling composition, CTEXT extended segment, and embedded UTF-8. (setup_coding_system): For a coding of type iso-2022, reset CODING_ISO_EXTSEGMENT_LEN (coding) and CODING_ISO_EMBEDDED_UTF_8 (coding). (get_translation): Delete arguments last_block, from_nchars, to_nchars. Callers changed. (produce_chars): Don't modify charbuf. Adjusted for the change of get_translation. (produce_composition): Adjusted for the new annotation sequence. (handle_composition_annotation): Likewise. (consume_chars): Adjusted for the change of get_translation.
Diffstat (limited to 'src')
-rw-r--r--src/coding.c1443
1 files changed, 906 insertions, 537 deletions
diff --git a/src/coding.c b/src/coding.c
index 91811f79bfe..8d5304dc114 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -452,6 +452,12 @@ Lisp_Object Vbig5_coding_system;
452 ((coding)->spec.iso_2022.bol) 452 ((coding)->spec.iso_2022.bol)
453#define CODING_ISO_INVOKED_CHARSET(coding, plane) \ 453#define CODING_ISO_INVOKED_CHARSET(coding, plane) \
454 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane))) 454 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
455#define CODING_ISO_CMP_STATUS(coding) \
456 (&(coding)->spec.iso_2022.cmp_status)
457#define CODING_ISO_EXTSEGMENT_LEN(coding) \
458 ((coding)->spec.iso_2022.ctext_extended_segment_len)
459#define CODING_ISO_EMBEDDED_UTF_8(coding) \
460 ((coding)->spec.iso_2022.embedded_utf_8)
455 461
456/* Control characters of ISO2022. */ 462/* Control characters of ISO2022. */
457 /* code */ /* function */ 463 /* code */ /* function */
@@ -945,11 +951,8 @@ static int detect_eol P_ ((const unsigned char *,
945static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int)); 951static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
946static void decode_eol P_ ((struct coding_system *)); 952static void decode_eol P_ ((struct coding_system *));
947static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *)); 953static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
948static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *, 954static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *));
949 int, int *, int *));
950static int produce_chars P_ ((struct coding_system *, Lisp_Object, int)); 955static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
951static INLINE void produce_composition P_ ((struct coding_system *, int *,
952 EMACS_INT));
953static INLINE void produce_charset P_ ((struct coding_system *, int *, 956static INLINE void produce_charset P_ ((struct coding_system *, int *,
954 EMACS_INT)); 957 EMACS_INT));
955static void produce_annotation P_ ((struct coding_system *, EMACS_INT)); 958static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
@@ -1208,10 +1211,6 @@ alloc_destination (coding, nbytes, dst)
1208 1211
1209/** Macros for annotations. */ 1212/** Macros for annotations. */
1210 1213
1211/* Maximum length of annotation data (sum of annotations for
1212 composition and charset). */
1213#define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
1214
1215/* An annotation data is stored in the array coding->charbuf in this 1214/* An annotation data is stored in the array coding->charbuf in this
1216 format: 1215 format:
1217 [ -LENGTH ANNOTATION_MASK NCHARS ... ] 1216 [ -LENGTH ANNOTATION_MASK NCHARS ... ]
@@ -1223,13 +1222,26 @@ alloc_destination (coding, nbytes, dst)
1223 1222
1224 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements 1223 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1225 follows: 1224 follows:
1226 ... METHOD [ COMPOSITION-COMPONENTS ... ] 1225 ... NBYTES METHOD [ COMPOSITION-COMPONENTS ... ]
1226
1227 NBYTES is the number of bytes specified in the header part of
1228 old-style emacs-mule encoding, or 0 for the other kind of
1229 composition.
1230
1227 METHOD is one of enum composition_method. 1231 METHOD is one of enum composition_method.
1232
1228 Optionnal COMPOSITION-COMPONENTS are characters and composition 1233 Optionnal COMPOSITION-COMPONENTS are characters and composition
1229 rules. 1234 rules.
1230 1235
1231 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID 1236 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1232 follows. */ 1237 follows.
1238
1239 If ANNOTATION_MASK is 0, this annotation is just a space holder to
1240 recover from an invalid annotation, and should be skipped by
1241 produce_annotation. */
1242
1243/* Maximum length of the header of annotation data. */
1244#define MAX_ANNOTATION_LENGTH 5
1233 1245
1234#define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \ 1246#define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \
1235 do { \ 1247 do { \
@@ -1239,9 +1251,10 @@ alloc_destination (coding, nbytes, dst)
1239 coding->annotated = 1; \ 1251 coding->annotated = 1; \
1240 } while (0); 1252 } while (0);
1241 1253
1242#define ADD_COMPOSITION_DATA(buf, nchars, method) \ 1254#define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method) \
1243 do { \ 1255 do { \
1244 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \ 1256 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1257 *buf++ = nbytes; \
1245 *buf++ = method; \ 1258 *buf++ = method; \
1246 } while (0) 1259 } while (0)
1247 1260
@@ -1920,12 +1933,12 @@ encode_coding_utf_16 (coding)
1920 Next, character composition data are represented by the byte 1933 Next, character composition data are represented by the byte
1921 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ..., 1934 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1922 where, 1935 where,
1923 METHOD is 0xF0 plus one of composition method (enum 1936 METHOD is 0xF2 plus one of composition method (enum
1924 composition_method), 1937 composition_method),
1925 1938
1926 BYTES is 0xA0 plus a byte length of this composition data, 1939 BYTES is 0xA0 plus a byte length of this composition data,
1927 1940
1928 CHARS is 0x20 plus a number of characters composed by this 1941 CHARS is 0xA0 plus a number of characters composed by this
1929 data, 1942 data,
1930 1943
1931 COMPONENTs are characters of multibye form or composition 1944 COMPONENTs are characters of multibye form or composition
@@ -1947,11 +1960,107 @@ encode_coding_utf_16 (coding)
1947 1960
1948char emacs_mule_bytes[256]; 1961char emacs_mule_bytes[256];
1949 1962
1963
1964/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1965 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1966 else return 0. */
1967
1968static int
1969detect_coding_emacs_mule (coding, detect_info)
1970 struct coding_system *coding;
1971 struct coding_detection_info *detect_info;
1972{
1973 const unsigned char *src = coding->source, *src_base;
1974 const unsigned char *src_end = coding->source + coding->src_bytes;
1975 int multibytep = coding->src_multibyte;
1976 int consumed_chars = 0;
1977 int c;
1978 int found = 0;
1979
1980 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1981 /* A coding system of this category is always ASCII compatible. */
1982 src += coding->head_ascii;
1983
1984 while (1)
1985 {
1986 src_base = src;
1987 ONE_MORE_BYTE (c);
1988 if (c < 0)
1989 continue;
1990 if (c == 0x80)
1991 {
1992 /* Perhaps the start of composite character. We simply skip
1993 it because analyzing it is too heavy for detecting. But,
1994 at least, we check that the composite character
1995 constitutes of more than 4 bytes. */
1996 const unsigned char *src_base;
1997
1998 repeat:
1999 src_base = src;
2000 do
2001 {
2002 ONE_MORE_BYTE (c);
2003 }
2004 while (c >= 0xA0);
2005
2006 if (src - src_base <= 4)
2007 break;
2008 found = CATEGORY_MASK_EMACS_MULE;
2009 if (c == 0x80)
2010 goto repeat;
2011 }
2012
2013 if (c < 0x80)
2014 {
2015 if (c < 0x20
2016 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2017 break;
2018 }
2019 else
2020 {
2021 int more_bytes = emacs_mule_bytes[*src_base] - 1;
2022
2023 while (more_bytes > 0)
2024 {
2025 ONE_MORE_BYTE (c);
2026 if (c < 0xA0)
2027 {
2028 src--; /* Unread the last byte. */
2029 break;
2030 }
2031 more_bytes--;
2032 }
2033 if (more_bytes != 0)
2034 break;
2035 found = CATEGORY_MASK_EMACS_MULE;
2036 }
2037 }
2038 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2039 return 0;
2040
2041 no_more_source:
2042 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2043 {
2044 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2045 return 0;
2046 }
2047 detect_info->found |= found;
2048 return 1;
2049}
2050
2051
2052/* Parse emacs-mule multibyte sequence at SRC and return the decoded
2053 character. If CMP_STATUS indicates that we must expect MSEQ or
2054 RULE described above, decode it and return the negative value of
2055 the deocded character or rule. If an invalid byte is found, return
2056 -1. If SRC is too short, return -2. */
2057
1950int 2058int
1951emacs_mule_char (coding, src, nbytes, nchars, id) 2059emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status)
1952 struct coding_system *coding; 2060 struct coding_system *coding;
1953 const unsigned char *src; 2061 const unsigned char *src;
1954 int *nbytes, *nchars, *id; 2062 int *nbytes, *nchars, *id;
2063 struct composition_status *cmp_status;
1955{ 2064{
1956 const unsigned char *src_end = coding->source + coding->src_bytes; 2065 const unsigned char *src_end = coding->source + coding->src_bytes;
1957 const unsigned char *src_base = src; 2066 const unsigned char *src_base = src;
@@ -1960,6 +2069,7 @@ emacs_mule_char (coding, src, nbytes, nchars, id)
1960 unsigned code; 2069 unsigned code;
1961 int c; 2070 int c;
1962 int consumed_chars = 0; 2071 int consumed_chars = 0;
2072 int mseq_found = 0;
1963 2073
1964 ONE_MORE_BYTE (c); 2074 ONE_MORE_BYTE (c);
1965 if (c < 0) 2075 if (c < 0)
@@ -1971,14 +2081,31 @@ emacs_mule_char (coding, src, nbytes, nchars, id)
1971 { 2081 {
1972 if (c >= 0xA0) 2082 if (c >= 0xA0)
1973 { 2083 {
1974 /* Old style component character of a composition. */ 2084 if (cmp_status->state != COMPOSING_NO
1975 if (c == 0xA0) 2085 && cmp_status->old_form)
1976 { 2086 {
1977 ONE_MORE_BYTE (c); 2087 if (cmp_status->state == COMPOSING_CHAR)
1978 c -= 0x80; 2088 {
2089 if (c == 0xA0)
2090 {
2091 ONE_MORE_BYTE (c);
2092 c -= 0x80;
2093 if (c < 0)
2094 goto invalid_code;
2095 }
2096 else
2097 c -= 0x20;
2098 mseq_found = 1;
2099 }
2100 else
2101 {
2102 *nbytes = src - src_base;
2103 *nchars = consumed_chars;
2104 return -c;
2105 }
1979 } 2106 }
1980 else 2107 else
1981 c -= 0x20; 2108 goto invalid_code;
1982 } 2109 }
1983 2110
1984 switch (emacs_mule_bytes[c]) 2111 switch (emacs_mule_bytes[c])
@@ -2050,7 +2177,7 @@ emacs_mule_char (coding, src, nbytes, nchars, id)
2050 *nchars = consumed_chars; 2177 *nchars = consumed_chars;
2051 if (id) 2178 if (id)
2052 *id = charset->id; 2179 *id = charset->id;
2053 return c; 2180 return (mseq_found ? -c : c);
2054 2181
2055 no_more_source: 2182 no_more_source:
2056 return -2; 2183 return -2;
@@ -2060,259 +2187,250 @@ emacs_mule_char (coding, src, nbytes, nchars, id)
2060} 2187}
2061 2188
2062 2189
2063/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 2190/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2064 Check if a text is encoded in `emacs-mule'. If it is, return 1,
2065 else return 0. */
2066 2191
2067static int 2192/* Handle these composition sequence ('|': the end of header elements,
2068detect_coding_emacs_mule (coding, detect_info) 2193 BYTES and CHARS >= 0xA0):
2069 struct coding_system *coding;
2070 struct coding_detection_info *detect_info;
2071{
2072 const unsigned char *src = coding->source, *src_base;
2073 const unsigned char *src_end = coding->source + coding->src_bytes;
2074 int multibytep = coding->src_multibyte;
2075 int consumed_chars = 0;
2076 int c;
2077 int found = 0;
2078 2194
2079 detect_info->checked |= CATEGORY_MASK_EMACS_MULE; 2195 (1) relative composition: 0x80 0xF2 BYTES CHARS | CHAR ...
2080 /* A coding system of this category is always ASCII compatible. */ 2196 (2) altchar composition: 0x80 0xF4 BYTES CHARS | ALT ... ALT CHAR ...
2081 src += coding->head_ascii; 2197 (3) alt&rule composition: 0x80 0xF5 BYTES CHARS | ALT RULE ... ALT CHAR ...
2082 2198
2083 while (1) 2199 and these old form:
2084 { 2200
2085 src_base = src; 2201 (4) relative composition: 0x80 | MSEQ ... MSEQ
2086 ONE_MORE_BYTE (c); 2202 (5) rulebase composition: 0x80 0xFF | MSEQ MRULE ... MSEQ
2087 if (c < 0)
2088 continue;
2089 if (c == 0x80)
2090 {
2091 /* Perhaps the start of composite character. We simple skip
2092 it because analyzing it is too heavy for detecting. But,
2093 at least, we check that the composite character
2094 constitutes of more than 4 bytes. */
2095 const unsigned char *src_base;
2096 2203
2097 repeat: 2204 When the starter 0x80 and the following header elements are found,
2098 src_base = src; 2205 this annotation header is produced.
2099 do
2100 {
2101 ONE_MORE_BYTE (c);
2102 }
2103 while (c >= 0xA0);
2104 2206
2105 if (src - src_base <= 4) 2207 [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS NBYTES METHOD ]
2106 break;
2107 found = CATEGORY_MASK_EMACS_MULE;
2108 if (c == 0x80)
2109 goto repeat;
2110 }
2111 2208
2112 if (c < 0x80) 2209 NCHARS is CHARS - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2113 { 2210 NBYTES is BYTES - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2114 if (c < 0x20
2115 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2116 break;
2117 }
2118 else
2119 {
2120 int more_bytes = emacs_mule_bytes[*src_base] - 1;
2121 2211
2122 while (more_bytes > 0) 2212 Then, upon reading the following elements, these codes are produced
2123 { 2213 until the composition end is found:
2124 ONE_MORE_BYTE (c);
2125 if (c < 0xA0)
2126 {
2127 src--; /* Unread the last byte. */
2128 break;
2129 }
2130 more_bytes--;
2131 }
2132 if (more_bytes != 0)
2133 break;
2134 found = CATEGORY_MASK_EMACS_MULE;
2135 }
2136 }
2137 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2138 return 0;
2139 2214
2140 no_more_source: 2215 (1) CHAR ... CHAR
2141 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK) 2216 (2) ALT ... ALT CHAR ... CHAR
2142 { 2217 (3) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT CHAR ... CHAR
2143 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE; 2218 (4) CHAR ... CHAR
2144 return 0; 2219 (5) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
2145 }
2146 detect_info->found |= found;
2147 return 1;
2148}
2149 2220
2221 When the composition end is found, LENGTH and NCHARS in the
2222 annotation header is updated as below:
2150 2223
2151/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ 2224 (1) LENGTH: unchanged, NCHARS: unchanged
2225 (2) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2226 (3) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2227 (4) LENGTH: unchanged, NCHARS: number of CHARs
2228 (5) LENGTH: unchanged, NCHARS: number of CHARs
2152 2229
2153/* Decode a character represented as a component of composition 2230 If an error is found while composing, the annotation header is
2154 sequence of Emacs 20/21 style at SRC. Set C to that character and 2231 changed to the original composition header (plus filler -1s) as
2155 update SRC to the head of next character (or an encoded composition 2232 below:
2156 rule). If SRC doesn't points a composition component, set C to -1. 2233
2157 If SRC points an invalid byte sequence, global exit by a return 2234 (1),(2),(3) [ 0x80 0xF2+METHOD BYTES CHARS -1 ]
2158 value 0. */ 2235 (5) [ 0x80 0xFF -1 -1- -1 ]
2159 2236
2160#define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \ 2237 and the sequence [ -2 DECODED-RULE ] is changed to the original
2161 do \ 2238 byte sequence as below:
2162 { \ 2239 o the original byte sequence is B: [ B -1 ]
2163 int c; \ 2240 o the original byte sequence is B1 B2: [ B1 B2 ]
2164 int nbytes, nchars; \ 2241
2165 \ 2242 Most of the routines are implemented by macros because many
2166 if (src == src_end) \ 2243 variables and labels in the caller decode_coding_emacs_mule must be
2167 break; \ 2244 accessible, and they are usually called just once (thus doesn't
2168 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\ 2245 increase the size of compiled object). */
2169 if (c < 0) \ 2246
2170 { \ 2247/* Decode a composition rule represented by C as a component of
2171 if (c == -2) \ 2248 composition sequence of Emacs 20 style. Set RULE to the decoded
2172 break; \ 2249 rule. */
2173 goto invalid_code; \ 2250
2174 } \ 2251#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule) \
2175 *buf++ = c; \
2176 src += nbytes; \
2177 consumed_chars += nchars; \
2178 } \
2179 while (0)
2180
2181
2182/* Decode a composition rule represented as a component of composition
2183 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
2184 and increment BUF. If SRC points an invalid byte sequence, set C
2185 to -1. */
2186
2187#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
2188 do { \ 2252 do { \
2189 int c, gref, nref; \ 2253 int gref, nref; \
2190 \ 2254 \
2191 if (src >= src_end) \
2192 goto invalid_code; \
2193 ONE_MORE_BYTE_NO_CHECK (c); \
2194 c -= 0xA0; \ 2255 c -= 0xA0; \
2195 if (c < 0 || c >= 81) \ 2256 if (c < 0 || c >= 81) \
2196 goto invalid_code; \ 2257 goto invalid_code; \
2197 \
2198 gref = c / 9, nref = c % 9; \ 2258 gref = c / 9, nref = c % 9; \
2199 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \ 2259 if (gref == 4) gref = 10; \
2260 if (nref == 4) nref = 10; \
2261 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
2200 } while (0) 2262 } while (0)
2201 2263
2202 2264
2203/* Decode a composition rule represented as a component of composition 2265/* Decode a composition rule represented by C and the following byte
2204 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF, 2266 at SRC as a component of composition sequence of Emacs 21 style.
2205 and increment BUF. If SRC points an invalid byte sequence, set C 2267 Set RULE to the decoded rule. */
2206 to -1. */
2207 2268
2208#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \ 2269#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule) \
2209 do { \ 2270 do { \
2210 int gref, nref; \ 2271 int gref, nref; \
2211 \ 2272 \
2212 if (src + 1>= src_end) \ 2273 gref = c - 0x20; \
2274 if (gref < 0 || gref >= 81) \
2213 goto invalid_code; \ 2275 goto invalid_code; \
2214 ONE_MORE_BYTE_NO_CHECK (gref); \ 2276 ONE_MORE_BYTE (c); \
2215 gref -= 0x20; \ 2277 nref = c - 0x20; \
2216 ONE_MORE_BYTE_NO_CHECK (nref); \ 2278 if (nref < 0 || nref >= 81) \
2217 nref -= 0x20; \
2218 if (gref < 0 || gref >= 81 \
2219 || nref < 0 || nref >= 81) \
2220 goto invalid_code; \ 2279 goto invalid_code; \
2221 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \ 2280 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
2222 } while (0) 2281 } while (0)
2223 2282
2224 2283
2225#define DECODE_EMACS_MULE_21_COMPOSITION(c) \ 2284/* Start of Emacs 21 style format. The first three bytes at SRC are
2285 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is the
2286 byte length of this composition information, CHARS is the number of
2287 characters composed by this composition. */
2288
2289#define DECODE_EMACS_MULE_21_COMPOSITION() \
2226 do { \ 2290 do { \
2227 /* Emacs 21 style format. The first three bytes at SRC are \
2228 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
2229 the byte length of this composition information, CHARS is the \
2230 number of characters composed by this composition. */ \
2231 enum composition_method method = c - 0xF2; \ 2291 enum composition_method method = c - 0xF2; \
2232 int *charbuf_base = charbuf; \ 2292 int *charbuf_base = charbuf; \
2233 int consumed_chars_limit; \
2234 int nbytes, nchars; \ 2293 int nbytes, nchars; \
2235 \ 2294 \
2236 ONE_MORE_BYTE (c); \ 2295 ONE_MORE_BYTE (c); \
2237 if (c < 0) \ 2296 if (c < 0) \
2238 goto invalid_code; \ 2297 goto invalid_code; \
2239 nbytes = c - 0xA0; \ 2298 nbytes = c - 0xA0; \
2240 if (nbytes < 3) \ 2299 if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4)) \
2241 goto invalid_code; \ 2300 goto invalid_code; \
2242 ONE_MORE_BYTE (c); \ 2301 ONE_MORE_BYTE (c); \
2243 if (c < 0) \
2244 goto invalid_code; \
2245 nchars = c - 0xA0; \ 2302 nchars = c - 0xA0; \
2246 ADD_COMPOSITION_DATA (charbuf, nchars, method); \ 2303 if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS) \
2247 consumed_chars_limit = consumed_chars_base + nbytes; \ 2304 goto invalid_code; \
2248 if (method != COMPOSITION_RELATIVE) \ 2305 cmp_status->old_form = 0; \
2249 { \ 2306 cmp_status->method = method; \
2250 int i = 0; \ 2307 if (method == COMPOSITION_RELATIVE) \
2251 while (consumed_chars < consumed_chars_limit) \ 2308 cmp_status->state = COMPOSING_CHAR; \
2252 { \ 2309 else \
2253 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \ 2310 cmp_status->state = COMPOSING_COMPONENT_CHAR; \
2254 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \ 2311 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2255 else \ 2312 cmp_status->nchars = nchars; \
2256 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \ 2313 cmp_status->ncomps = nbytes - 4; \
2257 i++; \ 2314 ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method); \
2258 } \
2259 if (consumed_chars < consumed_chars_limit) \
2260 goto invalid_code; \
2261 charbuf_base[0] -= i; \
2262 } \
2263 } while (0) 2315 } while (0)
2264 2316
2265 2317
2266#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \ 2318/* Start of Emacs 20 style format for relative composition. */
2267 do { \ 2319
2268 /* Emacs 20 style format for relative composition. */ \ 2320#define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION() \
2269 /* Store multibyte form of characters to be composed. */ \ 2321 do { \
2270 enum composition_method method = COMPOSITION_RELATIVE; \ 2322 cmp_status->old_form = 1; \
2271 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \ 2323 cmp_status->method = COMPOSITION_RELATIVE; \
2272 int *buf = components; \ 2324 cmp_status->state = COMPOSING_CHAR; \
2273 int i, j; \ 2325 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2274 \ 2326 cmp_status->nchars = cmp_status->ncomps = 0; \
2275 src = src_base; \ 2327 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
2276 ONE_MORE_BYTE (c); /* skip 0x80 */ \
2277 for (i = 0; *src >= 0xA0 && i < MAX_COMPOSITION_COMPONENTS; i++) \
2278 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2279 if (i < 2) \
2280 goto invalid_code; \
2281 ADD_COMPOSITION_DATA (charbuf, i, method); \
2282 for (j = 0; j < i; j++) \
2283 *charbuf++ = components[j]; \
2284 } while (0) 2328 } while (0)
2285 2329
2286 2330
2287#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \ 2331/* Start of Emacs 20 style format for rule-base composition. */
2332
2333#define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION() \
2334 do { \
2335 cmp_status->old_form = 1; \
2336 cmp_status->method = COMPOSITION_WITH_RULE; \
2337 cmp_status->state = COMPOSING_CHAR; \
2338 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2339 cmp_status->nchars = cmp_status->ncomps = 0; \
2340 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
2341 } while (0)
2342
2343
2344#define DECODE_EMACS_MULE_COMPOSITION_START() \
2345 do { \
2346 const unsigned char *current_src = src; \
2347 \
2348 ONE_MORE_BYTE (c); \
2349 if (c < 0) \
2350 goto invalid_code; \
2351 if (c - 0xF2 >= COMPOSITION_RELATIVE \
2352 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS) \
2353 DECODE_EMACS_MULE_21_COMPOSITION (); \
2354 else if (c < 0xA0) \
2355 goto invalid_code; \
2356 else if (c < 0xC0) \
2357 { \
2358 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (); \
2359 /* Re-read C as a composition component. */ \
2360 src = current_src; \
2361 } \
2362 else if (c == 0xFF) \
2363 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (); \
2364 else \
2365 goto invalid_code; \
2366 } while (0)
2367
2368#define EMACS_MULE_COMPOSITION_END() \
2288 do { \ 2369 do { \
2289 /* Emacs 20 style format for rule-base composition. */ \ 2370 int idx = - cmp_status->length; \
2290 /* Store multibyte form of characters to be composed. */ \
2291 enum composition_method method = COMPOSITION_WITH_RULE; \
2292 int *charbuf_base = charbuf; \
2293 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \
2294 int *buf = components; \
2295 int i, j; \
2296 \ 2371 \
2297 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ 2372 if (cmp_status->old_form) \
2298 for (i = 1; i < MAX_COMPOSITION_COMPONENTS; i++) \ 2373 charbuf[idx + 2] = cmp_status->nchars; \
2299 { \ 2374 else if (cmp_status->method > COMPOSITION_RELATIVE) \
2300 if (*src < 0xA0) \ 2375 charbuf[idx] = charbuf[idx + 2] - cmp_status->length; \
2301 break; \ 2376 cmp_status->state = COMPOSING_NO; \
2302 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \ 2377 } while (0)
2303 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ 2378
2304 } \ 2379
2305 if (i <= 1 || (buf - components) % 2 == 0) \ 2380static int
2306 goto invalid_code; \ 2381emacs_mule_finish_composition (charbuf, cmp_status)
2307 if (charbuf + i + (i / 2) + 1 >= charbuf_end) \ 2382 int *charbuf;
2308 goto no_more_source; \ 2383 struct composition_status *cmp_status;
2309 ADD_COMPOSITION_DATA (charbuf, i, method); \ 2384{
2310 i = i * 2 - 1; \ 2385 int idx = - cmp_status->length;
2311 for (j = 0; j < i; j++) \ 2386 int new_chars;
2312 *charbuf++ = components[j]; \ 2387
2313 charbuf_base[0] -= i; \ 2388 if (cmp_status->old_form && cmp_status->nchars > 0)
2314 for (j = 0; j < i; j += 2) \ 2389 {
2315 *charbuf++ = components[j]; \ 2390 charbuf[idx + 2] = cmp_status->nchars;
2391 new_chars = 0;
2392 if (cmp_status->method == COMPOSITION_WITH_RULE
2393 && cmp_status->state == COMPOSING_CHAR)
2394 {
2395 /* The last rule was invalid. */
2396 int rule = charbuf[-1] + 0xA0;
2397
2398 charbuf[-2] = BYTE8_TO_CHAR (rule);
2399 charbuf[-1] = -1;
2400 new_chars = 1;
2401 }
2402 }
2403 else
2404 {
2405 charbuf[idx++] = BYTE8_TO_CHAR (0x80);
2406
2407 if (cmp_status->method == COMPOSITION_WITH_RULE)
2408 {
2409 charbuf[idx++] = BYTE8_TO_CHAR (0xFF);
2410 charbuf[idx++] = -3;
2411 charbuf[idx++] = 0;
2412 new_chars = 1;
2413 }
2414 else
2415 {
2416 int nchars = charbuf[idx + 1] + 0xA0;
2417 int nbytes = charbuf[idx + 2] + 0xA0;
2418
2419 charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method);
2420 charbuf[idx++] = BYTE8_TO_CHAR (nbytes);
2421 charbuf[idx++] = BYTE8_TO_CHAR (nchars);
2422 charbuf[idx++] = -1;
2423 new_chars = 4;
2424 }
2425 }
2426 cmp_status->state = COMPOSING_NO;
2427 return new_chars;
2428}
2429
2430#define EMACS_MULE_MAYBE_FINISH_COMPOSITION() \
2431 do { \
2432 if (cmp_status->state != COMPOSING_NO) \
2433 char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \
2316 } while (0) 2434 } while (0)
2317 2435
2318 2436
@@ -2335,12 +2453,22 @@ decode_coding_emacs_mule (coding)
2335 int eol_crlf = 2453 int eol_crlf =
2336 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 2454 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2337 int byte_after_cr = -1; 2455 int byte_after_cr = -1;
2456 struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
2338 2457
2339 CODING_GET_INFO (coding, attrs, charset_list); 2458 CODING_GET_INFO (coding, attrs, charset_list);
2340 2459
2460 if (cmp_status->state != COMPOSING_NO)
2461 {
2462 int i;
2463
2464 for (i = 0; i < cmp_status->length; i++)
2465 *charbuf++ = cmp_status->carryover[i];
2466 coding->annotated = 1;
2467 }
2468
2341 while (1) 2469 while (1)
2342 { 2470 {
2343 int c; 2471 int c, id;
2344 2472
2345 src_base = src; 2473 src_base = src;
2346 consumed_chars_base = consumed_chars; 2474 consumed_chars_base = consumed_chars;
@@ -2356,64 +2484,160 @@ decode_coding_emacs_mule (coding)
2356 c = byte_after_cr, byte_after_cr = -1; 2484 c = byte_after_cr, byte_after_cr = -1;
2357 else 2485 else
2358 ONE_MORE_BYTE (c); 2486 ONE_MORE_BYTE (c);
2359 if (c < 0) 2487
2488 if (c < 0 || c == 0x80)
2360 { 2489 {
2361 *charbuf++ = -c; 2490 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2362 char_offset++; 2491 if (c < 0)
2492 {
2493 *charbuf++ = -c;
2494 char_offset++;
2495 }
2496 else
2497 DECODE_EMACS_MULE_COMPOSITION_START ();
2498 continue;
2363 } 2499 }
2364 else if (c < 0x80) 2500
2501 if (c < 0x80)
2365 { 2502 {
2366 if (eol_crlf && c == '\r') 2503 if (eol_crlf && c == '\r')
2367 ONE_MORE_BYTE (byte_after_cr); 2504 ONE_MORE_BYTE (byte_after_cr);
2368 *charbuf++ = c; 2505 id = charset_ascii;
2369 char_offset++; 2506 if (cmp_status->state != COMPOSING_NO)
2370 } 2507 {
2371 else if (c == 0x80) 2508 if (cmp_status->old_form)
2372 { 2509 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2373 ONE_MORE_BYTE (c); 2510 else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2374 if (c < 0) 2511 cmp_status->ncomps--;
2375 goto invalid_code; 2512 }
2376 if (c - 0xF2 >= COMPOSITION_RELATIVE
2377 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
2378 DECODE_EMACS_MULE_21_COMPOSITION (c);
2379 else if (c < 0xC0)
2380 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2381 else if (c == 0xFF)
2382 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2383 else
2384 goto invalid_code;
2385 } 2513 }
2386 else if (c < 0xA0 && emacs_mule_bytes[c] > 1) 2514 else
2387 { 2515 {
2388 int nbytes, nchars; 2516 int nchars, nbytes;
2389 int id;
2390 2517
2391 src = src_base; 2518 c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
2392 consumed_chars = consumed_chars_base; 2519 cmp_status);
2393 c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
2394 if (c < 0) 2520 if (c < 0)
2395 { 2521 {
2522 if (c == -1)
2523 goto invalid_code;
2396 if (c == -2) 2524 if (c == -2)
2397 break; 2525 break;
2398 goto invalid_code;
2399 } 2526 }
2527 src = src_base + nbytes;
2528 consumed_chars = consumed_chars_base + nchars;
2529 if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2530 cmp_status->ncomps -= nchars;
2531 }
2532
2533 /* Now if C >= 0, we found a normally encoded characer, if C <
2534 0, we found an old-style composition component character or
2535 rule. */
2536
2537 if (cmp_status->state == COMPOSING_NO)
2538 {
2400 if (last_id != id) 2539 if (last_id != id)
2401 { 2540 {
2402 if (last_id != charset_ascii) 2541 if (last_id != charset_ascii)
2403 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id); 2542 ADD_CHARSET_DATA (charbuf, char_offset - last_offset,
2543 last_id);
2404 last_id = id; 2544 last_id = id;
2405 last_offset = char_offset; 2545 last_offset = char_offset;
2406 } 2546 }
2407 *charbuf++ = c; 2547 *charbuf++ = c;
2408 src += nbytes;
2409 consumed_chars += nchars;
2410 char_offset++; 2548 char_offset++;
2411 } 2549 }
2412 else 2550 else if (cmp_status->state == COMPOSING_CHAR)
2413 goto invalid_code; 2551 {
2552 if (cmp_status->old_form)
2553 {
2554 if (c >= 0)
2555 {
2556 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2557 *charbuf++ = c;
2558 char_offset++;
2559 }
2560 else
2561 {
2562 *charbuf++ = -c;
2563 cmp_status->nchars++;
2564 cmp_status->length++;
2565 if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS)
2566 EMACS_MULE_COMPOSITION_END ();
2567 else if (cmp_status->method == COMPOSITION_WITH_RULE)
2568 cmp_status->state = COMPOSING_RULE;
2569 }
2570 }
2571 else
2572 {
2573 *charbuf++ = c;
2574 cmp_status->length++;
2575 cmp_status->nchars--;
2576 if (cmp_status->nchars == 0)
2577 EMACS_MULE_COMPOSITION_END ();
2578 }
2579 }
2580 else if (cmp_status->state == COMPOSING_RULE)
2581 {
2582 int rule;
2583
2584 if (c >= 0)
2585 {
2586 EMACS_MULE_COMPOSITION_END ();
2587 *charbuf++ = c;
2588 char_offset++;
2589 }
2590 else
2591 {
2592 c = -c;
2593 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule);
2594 if (rule < 0)
2595 goto invalid_code;
2596 *charbuf++ = -2;
2597 *charbuf++ = rule;
2598 cmp_status->length += 2;
2599 cmp_status->state = COMPOSING_CHAR;
2600 }
2601 }
2602 else if (cmp_status->state == COMPOSING_COMPONENT_CHAR)
2603 {
2604 *charbuf++ = c;
2605 cmp_status->length++;
2606 if (cmp_status->ncomps == 0)
2607 cmp_status->state = COMPOSING_CHAR;
2608 else if (cmp_status->ncomps > 0)
2609 {
2610 if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)
2611 cmp_status->state = COMPOSING_COMPONENT_RULE;
2612 }
2613 else
2614 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2615 }
2616 else /* COMPOSING_COMPONENT_RULE */
2617 {
2618 int rule;
2619
2620 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule);
2621 if (rule < 0)
2622 goto invalid_code;
2623 *charbuf++ = -2;
2624 *charbuf++ = rule;
2625 cmp_status->length += 2;
2626 cmp_status->ncomps--;
2627 if (cmp_status->ncomps > 0)
2628 cmp_status->state = COMPOSING_COMPONENT_CHAR;
2629 else
2630 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2631 }
2632 continue;
2633
2634 retry:
2635 src = src_base;
2636 consumed_chars = consumed_chars_base;
2414 continue; 2637 continue;
2415 2638
2416 invalid_code: 2639 invalid_code:
2640 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2417 src = src_base; 2641 src = src_base;
2418 consumed_chars = consumed_chars_base; 2642 consumed_chars = consumed_chars_base;
2419 ONE_MORE_BYTE (c); 2643 ONE_MORE_BYTE (c);
@@ -2423,6 +2647,19 @@ decode_coding_emacs_mule (coding)
2423 } 2647 }
2424 2648
2425 no_more_source: 2649 no_more_source:
2650 if (cmp_status->state != COMPOSING_NO)
2651 {
2652 if (coding->mode & CODING_MODE_LAST_BLOCK)
2653 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2654 else
2655 {
2656 int i;
2657
2658 charbuf -= cmp_status->length;
2659 for (i = 0; i < cmp_status->length; i++)
2660 cmp_status->carryover[i] = charbuf[i];
2661 }
2662 }
2426 if (last_id != charset_ascii) 2663 if (last_id != charset_ascii)
2427 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id); 2664 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2428 coding->consumed_char += consumed_chars_base; 2665 coding->consumed_char += consumed_chars_base;
@@ -3077,134 +3314,237 @@ detect_coding_iso_2022 (coding, detect_info)
3077 } while (0) 3314 } while (0)
3078 3315
3079 3316
3080#define MAYBE_FINISH_COMPOSITION() \ 3317/* Handle these composition sequence (ALT: alternate char):
3318
3319 (1) relative composition: ESC 0 CHAR ... ESC 1
3320 (2) rulebase composition: ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3321 (3) altchar composition: ESC 3 ALT ... ALT ESC 0 CHAR ... ESC 1
3322 (4) alt&rule composition: ESC 4 ALT RULE ... ALT ESC 0 CHAR ... ESC 1
3323
3324 When the start sequence (ESC 0/2/3/4) is found, this annotation
3325 header is produced.
3326
3327 [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) 0 METHOD ]
3328
3329 Then, upon reading CHAR or RULE (one or two bytes), these codes are
3330 produced until the end sequence (ESC 1) is found:
3331
3332 (1) CHAR ... CHAR
3333 (2) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
3334 (3) ALT ... ALT -1 -1 CHAR ... CHAR
3335 (4) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT -1 -1 CHAR ... CHAR
3336
3337 When the end sequence (ESC 1) is found, LENGTH and NCHARS in the
3338 annotation header is updated as below:
3339
3340 (1) LENGTH: unchanged, NCHARS: number of CHARs
3341 (2) LENGTH: unchanged, NCHARS: number of CHARs
3342 (3) LENGTH: += number of ALTs + 2, NCHARS: number of CHARs
3343 (4) LENGTH: += number of ALTs * 3, NCHARS: number of CHARs
3344
3345 If an error is found while composing, the annotation header is
3346 changed to:
3347
3348 [ ESC '0'/'2'/'3'/'4' -2 0 ]
3349
3350 and the sequence [ -2 DECODED-RULE ] is changed to the original
3351 byte sequence as below:
3352 o the original byte sequence is B: [ B -1 ]
3353 o the original byte sequence is B1 B2: [ B1 B2 ]
3354 and the sequence [ -1 -1 ] is changed to the original byte
3355 sequence:
3356 [ ESC '0' ]
3357*/
3358
3359/* Decode a composition rule C1 and maybe one more byte from the
3360 source, and set RULE to the encoded composition rule, NBYTES to the
3361 length of the composition rule. If the rule is invalid, set RULE
3362 to some negative value. */
3363
3364#define DECODE_COMPOSITION_RULE(rule, nbytes) \
3365 do { \
3366 rule = c1 - 32; \
3367 if (rule < 0) \
3368 break; \
3369 if (rule < 81) /* old format (before ver.21) */ \
3370 { \
3371 int gref = (rule) / 9; \
3372 int nref = (rule) % 9; \
3373 if (gref == 4) gref = 10; \
3374 if (nref == 4) nref = 10; \
3375 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
3376 nbytes = 1; \
3377 } \
3378 else /* new format (after ver.21) */ \
3379 { \
3380 int c; \
3381 \
3382 ONE_MORE_BYTE (c); \
3383 rule = COMPOSITION_ENCODE_RULE (rule - 81, c - 32); \
3384 if (rule >= 0) \
3385 rule += 0x100; /* to destinguish it from the old format */ \
3386 nbytes = 2; \
3387 } \
3388 } while (0)
3389
3390#define ENCODE_COMPOSITION_RULE(rule) \
3081 do { \ 3391 do { \
3082 int i; \ 3392 int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \
3083 if (composition_state == COMPOSING_NO) \ 3393 \
3084 break; \ 3394 if (rule < 0x100) /* old format */ \
3085 /* It is assured that we have enough room for producing \
3086 characters stored in the table `components'. */ \
3087 if (charbuf + component_idx > charbuf_end) \
3088 goto no_more_source; \
3089 composition_state = COMPOSING_NO; \
3090 if (method == COMPOSITION_RELATIVE \
3091 || method == COMPOSITION_WITH_ALTCHARS) \
3092 { \ 3395 { \
3093 for (i = 0; i < component_idx; i++) \ 3396 if (gref == 10) gref = 4; \
3094 *charbuf++ = components[i]; \ 3397 if (nref == 10) nref = 4; \
3095 char_offset += component_idx; \ 3398 charbuf[idx] = 32 + gref * 9 + nref; \
3399 charbuf[idx + 1] = -1; \
3400 new_chars++; \
3096 } \ 3401 } \
3097 else \ 3402 else /* new format */ \
3098 { \ 3403 { \
3099 for (i = 0; i < component_idx; i += 2) \ 3404 charbuf[idx] = 32 + 81 + gref; \
3100 *charbuf++ = components[i]; \ 3405 charbuf[idx + 1] = 32 + nref; \
3101 char_offset += (component_idx / 2) + 1; \ 3406 new_chars += 2; \
3102 } \ 3407 } \
3103 } while (0) 3408 } while (0)
3104 3409
3410/* Finish the current composition as invalid. */
3411
3412static int finish_composition P_ ((int *, struct composition_status *));
3413
3414static int
3415finish_composition (charbuf, cmp_status)
3416 int *charbuf;
3417 struct composition_status *cmp_status;
3418{
3419 int idx = - cmp_status->length;
3420 int new_chars;
3421
3422 /* Recover the original ESC sequence */
3423 charbuf[idx++] = ISO_CODE_ESC;
3424 charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0'
3425 : cmp_status->method == COMPOSITION_WITH_RULE ? '2'
3426 : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3'
3427 /* cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS */
3428 : '4');
3429 charbuf[idx++] = -2;
3430 charbuf[idx++] = 0;
3431 charbuf[idx++] = -1;
3432 new_chars = cmp_status->nchars;
3433 if (cmp_status->method >= COMPOSITION_WITH_RULE)
3434 for (; idx < 0; idx++)
3435 {
3436 int elt = charbuf[idx];
3437
3438 if (elt == -2)
3439 {
3440 ENCODE_COMPOSITION_RULE (charbuf[idx + 1]);
3441 idx++;
3442 }
3443 else if (elt == -1)
3444 {
3445 charbuf[idx++] = ISO_CODE_ESC;
3446 charbuf[idx] = '0';
3447 new_chars += 2;
3448 }
3449 }
3450 cmp_status->state = COMPOSING_NO;
3451 return new_chars;
3452}
3453
3454/* If characers are under composition, finish the composition. */
3455#define MAYBE_FINISH_COMPOSITION() \
3456 do { \
3457 if (cmp_status->state != COMPOSING_NO) \
3458 char_offset += finish_composition (charbuf, cmp_status); \
3459 } while (0)
3105 3460
3106/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4. 3461/* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
3462
3107 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1 3463 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3108 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1 3464 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3109 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1 3465 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3110 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1 3466 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
3111 */
3112 3467
3113#define DECODE_COMPOSITION_START(c1) \ 3468 Produce this annotation sequence now:
3114 do { \ 3469
3115 if (c1 == '0' \ 3470 [ -LENGTH(==-4) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) METHOD ]
3116 && composition_state == COMPOSING_COMPONENT_RULE) \ 3471*/
3117 { \ 3472
3118 component_len = component_idx; \ 3473#define DECODE_COMPOSITION_START(c1) \
3119 composition_state = COMPOSING_CHAR; \ 3474 do { \
3120 } \ 3475 if (c1 == '0' \
3121 else \ 3476 && ((cmp_status->state == COMPOSING_COMPONENT_CHAR \
3122 { \ 3477 && cmp_status->method == COMPOSITION_WITH_ALTCHARS) \
3123 const unsigned char *p; \ 3478 || (cmp_status->state == COMPOSING_COMPONENT_RULE \
3124 \ 3479 && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \
3125 MAYBE_FINISH_COMPOSITION (); \ 3480 { \
3126 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \ 3481 *charbuf++ = -1; \
3127 goto no_more_source; \ 3482 *charbuf++= -1; \
3128 for (p = src; p < src_end - 1; p++) \ 3483 cmp_status->state = COMPOSING_CHAR; \
3129 if (*p == ISO_CODE_ESC && p[1] == '1') \ 3484 cmp_status->length += 2; \
3130 break; \ 3485 } \
3131 if (p == src_end - 1) \ 3486 else \
3132 { \ 3487 { \
3133 if (coding->mode & CODING_MODE_LAST_BLOCK) \ 3488 MAYBE_FINISH_COMPOSITION (); \
3134 goto invalid_code; \ 3489 cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE \
3135 /* The current composition doesn't end in the current \ 3490 : c1 == '2' ? COMPOSITION_WITH_RULE \
3136 source. */ \ 3491 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
3137 record_conversion_result \ 3492 : COMPOSITION_WITH_RULE_ALTCHARS); \
3138 (coding, CODING_RESULT_INSUFFICIENT_SRC); \ 3493 cmp_status->state \
3139 goto no_more_source; \ 3494 = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR); \
3140 } \ 3495 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
3141 \ 3496 cmp_status->length = MAX_ANNOTATION_LENGTH; \
3142 /* This is surely the start of a composition. */ \ 3497 cmp_status->nchars = cmp_status->ncomps = 0; \
3143 method = (c1 == '0' ? COMPOSITION_RELATIVE \ 3498 coding->annotated = 1; \
3144 : c1 == '2' ? COMPOSITION_WITH_RULE \ 3499 } \
3145 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
3146 : COMPOSITION_WITH_RULE_ALTCHARS); \
3147 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
3148 : COMPOSING_COMPONENT_CHAR); \
3149 component_idx = component_len = 0; \
3150 } \
3151 } while (0) 3500 } while (0)
3152 3501
3153 3502
3154/* Handle compositoin end sequence ESC 1. */ 3503/* Handle composition end sequence ESC 1. */
3155 3504
3156#define DECODE_COMPOSITION_END() \ 3505#define DECODE_COMPOSITION_END() \
3157 do { \ 3506 do { \
3158 int nchars = (component_len > 0 ? component_idx - component_len \ 3507 if (cmp_status->nchars == 0 \
3159 : method == COMPOSITION_RELATIVE ? component_idx \ 3508 || ((cmp_status->state == COMPOSING_CHAR) \
3160 : (component_idx + 1) / 2); \ 3509 == (cmp_status->method == COMPOSITION_WITH_RULE))) \
3161 int i; \
3162 int *saved_charbuf = charbuf; \
3163 \
3164 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
3165 if (method != COMPOSITION_RELATIVE) \
3166 { \ 3510 { \
3167 if (component_len == 0) \ 3511 MAYBE_FINISH_COMPOSITION (); \
3168 for (i = 0; i < component_idx; i++) \ 3512 goto invalid_code; \
3169 *charbuf++ = components[i]; \
3170 else \
3171 for (i = 0; i < component_len; i++) \
3172 *charbuf++ = components[i]; \
3173 *saved_charbuf = saved_charbuf - charbuf; \
3174 } \ 3513 } \
3175 if (method == COMPOSITION_WITH_RULE) \ 3514 if (cmp_status->method == COMPOSITION_WITH_ALTCHARS) \
3176 for (i = 0; i < component_idx; i += 2, char_offset++) \ 3515 charbuf[- cmp_status->length] -= cmp_status->ncomps + 2; \
3177 *charbuf++ = components[i]; \ 3516 else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS) \
3178 else \ 3517 charbuf[- cmp_status->length] -= cmp_status->ncomps * 3; \
3179 for (i = component_len; i < component_idx; i++, char_offset++) \ 3518 charbuf[- cmp_status->length + 2] = cmp_status->nchars; \
3180 *charbuf++ = components[i]; \ 3519 char_offset += cmp_status->nchars; \
3181 coding->annotated = 1; \ 3520 cmp_status->state = COMPOSING_NO; \
3182 composition_state = COMPOSING_NO; \
3183 } while (0) 3521 } while (0)
3184 3522
3523/* Store a composition rule RULE in charbuf, and update cmp_status. */
3185 3524
3186/* Decode a composition rule from the byte C1 (and maybe one more byte 3525#define STORE_COMPOSITION_RULE(rule) \
3187 from SRC) and store one encoded composition rule in 3526 do { \
3188 coding->cmp_data. */ 3527 *charbuf++ = -2; \
3528 *charbuf++ = rule; \
3529 cmp_status->length += 2; \
3530 cmp_status->state--; \
3531 } while (0)
3189 3532
3190#define DECODE_COMPOSITION_RULE(c1) \ 3533/* Store a composed char or a component char C in charbuf, and update
3534 cmp_status. */
3535
3536#define STORE_COMPOSITION_CHAR(c) \
3191 do { \ 3537 do { \
3192 (c1) -= 32; \ 3538 *charbuf++ = (c); \
3193 if (c1 < 81) /* old format (before ver.21) */ \ 3539 cmp_status->length++; \
3194 { \ 3540 if (cmp_status->state == COMPOSING_CHAR) \
3195 int gref = (c1) / 9; \ 3541 cmp_status->nchars++; \
3196 int nref = (c1) % 9; \
3197 if (gref == 4) gref = 10; \
3198 if (nref == 4) nref = 10; \
3199 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
3200 } \
3201 else if (c1 < 93) /* new format (after ver.21) */ \
3202 { \
3203 ONE_MORE_BYTE (c2); \
3204 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
3205 } \
3206 else \ 3542 else \
3207 c1 = 0; \ 3543 cmp_status->ncomps++; \
3544 if (cmp_status->method == COMPOSITION_WITH_RULE \
3545 || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS \
3546 && cmp_status->state == COMPOSING_COMPONENT_CHAR)) \
3547 cmp_status->state++; \
3208 } while (0) 3548 } while (0)
3209 3549
3210 3550
@@ -3219,7 +3559,7 @@ decode_coding_iso_2022 (coding)
3219 const unsigned char *src_base; 3559 const unsigned char *src_base;
3220 int *charbuf = coding->charbuf + coding->charbuf_used; 3560 int *charbuf = coding->charbuf + coding->charbuf_used;
3221 int *charbuf_end 3561 int *charbuf_end
3222 = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH; 3562 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
3223 int consumed_chars = 0, consumed_chars_base; 3563 int consumed_chars = 0, consumed_chars_base;
3224 int multibytep = coding->src_multibyte; 3564 int multibytep = coding->src_multibyte;
3225 /* Charsets invoked to graphic plane 0 and 1 respectively. */ 3565 /* Charsets invoked to graphic plane 0 and 1 respectively. */
@@ -3228,18 +3568,7 @@ decode_coding_iso_2022 (coding)
3228 int charset_id_2, charset_id_3; 3568 int charset_id_2, charset_id_3;
3229 struct charset *charset; 3569 struct charset *charset;
3230 int c; 3570 int c;
3231 /* For handling composition sequence. */ 3571 struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
3232#define COMPOSING_NO 0
3233#define COMPOSING_CHAR 1
3234#define COMPOSING_RULE 2
3235#define COMPOSING_COMPONENT_CHAR 3
3236#define COMPOSING_COMPONENT_RULE 4
3237
3238 int composition_state = COMPOSING_NO;
3239 enum composition_method method;
3240 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
3241 int component_idx;
3242 int component_len;
3243 Lisp_Object attrs, charset_list; 3572 Lisp_Object attrs, charset_list;
3244 int char_offset = coding->produced_char; 3573 int char_offset = coding->produced_char;
3245 int last_offset = char_offset; 3574 int last_offset = char_offset;
@@ -3247,6 +3576,7 @@ decode_coding_iso_2022 (coding)
3247 int eol_crlf = 3576 int eol_crlf =
3248 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 3577 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3249 int byte_after_cr = -1; 3578 int byte_after_cr = -1;
3579 int i;
3250 3580
3251 CODING_GET_INFO (coding, attrs, charset_list); 3581 CODING_GET_INFO (coding, attrs, charset_list);
3252 setup_iso_safe_charsets (attrs); 3582 setup_iso_safe_charsets (attrs);
@@ -3254,6 +3584,13 @@ decode_coding_iso_2022 (coding)
3254 charset_list = CODING_ATTR_CHARSET_LIST (attrs); 3584 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3255 coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs)); 3585 coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
3256 3586
3587 if (cmp_status->state != COMPOSING_NO)
3588 {
3589 for (i = 0; i < cmp_status->length; i++)
3590 *charbuf++ = cmp_status->carryover[i];
3591 coding->annotated = 1;
3592 }
3593
3257 while (1) 3594 while (1)
3258 { 3595 {
3259 int c1, c2; 3596 int c1, c2;
@@ -3275,26 +3612,58 @@ decode_coding_iso_2022 (coding)
3275 if (c1 < 0) 3612 if (c1 < 0)
3276 goto invalid_code; 3613 goto invalid_code;
3277 3614
3278 /* We produce at most one character. */ 3615 if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
3279 switch (iso_code_class [c1])
3280 { 3616 {
3281 case ISO_0x20_or_0x7F: 3617 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3282 if (composition_state != COMPOSING_NO) 3618 char_offset++;
3619 CODING_ISO_EXTSEGMENT_LEN (coding)--;
3620 continue;
3621 }
3622
3623 if (CODING_ISO_EMBEDDED_UTF_8 (coding))
3624 {
3625 if (c1 == ISO_CODE_ESC)
3283 { 3626 {
3284 if (composition_state == COMPOSING_RULE 3627 if (src + 1 >= src_end)
3285 || composition_state == COMPOSING_COMPONENT_RULE) 3628 goto no_more_source;
3629 *charbuf++ = ISO_CODE_ESC;
3630 char_offset++;
3631 if (src[0] == '%' && src[1] == '@')
3286 { 3632 {
3287 if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1) 3633 src += 2;
3288 { 3634 consumed_chars += 2;
3289 DECODE_COMPOSITION_RULE (c1); 3635 char_offset += 2;
3290 components[component_idx++] = c1; 3636 /* We are sure charbuf can contain two more chars. */
3291 composition_state--; 3637 *charbuf++ = '%';
3292 continue; 3638 *charbuf++ = '@';
3293 } 3639 CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
3294 /* Too long composition. */
3295 MAYBE_FINISH_COMPOSITION ();
3296 } 3640 }
3297 } 3641 }
3642 else
3643 {
3644 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3645 char_offset++;
3646 }
3647 continue;
3648 }
3649
3650 if ((cmp_status->state == COMPOSING_RULE
3651 || cmp_status->state == COMPOSING_COMPONENT_RULE)
3652 && c1 != ISO_CODE_ESC)
3653 {
3654 int rule, nbytes;
3655
3656 DECODE_COMPOSITION_RULE (rule, nbytes);
3657 if (rule < 0)
3658 goto invalid_code;
3659 STORE_COMPOSITION_RULE (rule);
3660 continue;
3661 }
3662
3663 /* We produce at most one character. */
3664 switch (iso_code_class [c1])
3665 {
3666 case ISO_0x20_or_0x7F:
3298 if (charset_id_0 < 0 3667 if (charset_id_0 < 0
3299 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0))) 3668 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3300 /* This is SPACE or DEL. */ 3669 /* This is SPACE or DEL. */
@@ -3304,21 +3673,6 @@ decode_coding_iso_2022 (coding)
3304 break; 3673 break;
3305 3674
3306 case ISO_graphic_plane_0: 3675 case ISO_graphic_plane_0:
3307 if (composition_state != COMPOSING_NO)
3308 {
3309 if (composition_state == COMPOSING_RULE
3310 || composition_state == COMPOSING_COMPONENT_RULE)
3311 {
3312 if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1)
3313 {
3314 DECODE_COMPOSITION_RULE (c1);
3315 components[component_idx++] = c1;
3316 composition_state--;
3317 continue;
3318 }
3319 MAYBE_FINISH_COMPOSITION ();
3320 }
3321 }
3322 if (charset_id_0 < 0) 3676 if (charset_id_0 < 0)
3323 charset = CHARSET_FROM_ID (charset_ascii); 3677 charset = CHARSET_FROM_ID (charset_ascii);
3324 else 3678 else
@@ -3346,7 +3700,6 @@ decode_coding_iso_2022 (coding)
3346 break; 3700 break;
3347 3701
3348 case ISO_control_1: 3702 case ISO_control_1:
3349 MAYBE_FINISH_COMPOSITION ();
3350 goto invalid_code; 3703 goto invalid_code;
3351 3704
3352 case ISO_shift_out: 3705 case ISO_shift_out:
@@ -3484,11 +3837,17 @@ decode_coding_iso_2022 (coding)
3484 case '0': case '2': case '3': case '4': /* start composition */ 3837 case '0': case '2': case '3': case '4': /* start composition */
3485 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)) 3838 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3486 goto invalid_code; 3839 goto invalid_code;
3840 if (last_id != charset_ascii)
3841 {
3842 ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id);
3843 last_id = charset_ascii;
3844 last_offset = char_offset;
3845 }
3487 DECODE_COMPOSITION_START (c1); 3846 DECODE_COMPOSITION_START (c1);
3488 continue; 3847 continue;
3489 3848
3490 case '1': /* end composition */ 3849 case '1': /* end composition */
3491 if (composition_state == COMPOSING_NO) 3850 if (cmp_status->state == COMPOSING_NO)
3492 goto invalid_code; 3851 goto invalid_code;
3493 DECODE_COMPOSITION_END (); 3852 DECODE_COMPOSITION_END ();
3494 continue; 3853 continue;
@@ -3539,10 +3898,16 @@ decode_coding_iso_2022 (coding)
3539 int size; 3898 int size;
3540 3899
3541 ONE_MORE_BYTE (dim); 3900 ONE_MORE_BYTE (dim);
3901 if (dim < 0 || dim > 4)
3902 goto invalid_code;
3542 ONE_MORE_BYTE (M); 3903 ONE_MORE_BYTE (M);
3904 if (M < 128)
3905 goto invalid_code;
3543 ONE_MORE_BYTE (L); 3906 ONE_MORE_BYTE (L);
3907 if (L < 128)
3908 goto invalid_code;
3544 size = ((M - 128) * 128) + (L - 128); 3909 size = ((M - 128) * 128) + (L - 128);
3545 if (charbuf + 8 + size > charbuf_end) 3910 if (charbuf + 6 > charbuf_end)
3546 goto break_loop; 3911 goto break_loop;
3547 *charbuf++ = ISO_CODE_ESC; 3912 *charbuf++ = ISO_CODE_ESC;
3548 *charbuf++ = '%'; 3913 *charbuf++ = '%';
@@ -3550,11 +3915,7 @@ decode_coding_iso_2022 (coding)
3550 *charbuf++ = dim; 3915 *charbuf++ = dim;
3551 *charbuf++ = BYTE8_TO_CHAR (M); 3916 *charbuf++ = BYTE8_TO_CHAR (M);
3552 *charbuf++ = BYTE8_TO_CHAR (L); 3917 *charbuf++ = BYTE8_TO_CHAR (L);
3553 while (size-- > 0) 3918 CODING_ISO_EXTSEGMENT_LEN (coding) = size;
3554 {
3555 ONE_MORE_BYTE (c1);
3556 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3557 }
3558 } 3919 }
3559 else if (c1 == 'G') 3920 else if (c1 == 'G')
3560 { 3921 {
@@ -3562,32 +3923,12 @@ decode_coding_iso_2022 (coding)
3562 ESC % G --UTF-8-BYTES-- ESC % @ 3923 ESC % G --UTF-8-BYTES-- ESC % @
3563 We keep these bytes as is for the moment. 3924 We keep these bytes as is for the moment.
3564 They may be decoded by post-read-conversion. */ 3925 They may be decoded by post-read-conversion. */
3565 int *p = charbuf; 3926 if (charbuf + 3 > charbuf_end)
3566
3567 if (p + 6 > charbuf_end)
3568 goto break_loop; 3927 goto break_loop;
3569 *p++ = ISO_CODE_ESC; 3928 *charbuf++ = ISO_CODE_ESC;
3570 *p++ = '%'; 3929 *charbuf++ = '%';
3571 *p++ = 'G'; 3930 *charbuf++ = 'G';
3572 while (p < charbuf_end) 3931 CODING_ISO_EMBEDDED_UTF_8 (coding) = 1;
3573 {
3574 ONE_MORE_BYTE (c1);
3575 if (c1 == ISO_CODE_ESC
3576 && src + 1 < src_end
3577 && src[0] == '%'
3578 && src[1] == '@')
3579 {
3580 src += 2;
3581 break;
3582 }
3583 *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3584 }
3585 if (p + 3 > charbuf_end)
3586 goto break_loop;
3587 *p++ = ISO_CODE_ESC;
3588 *p++ = '%';
3589 *p++ = '@';
3590 charbuf = p;
3591 } 3932 }
3592 else 3933 else
3593 goto invalid_code; 3934 goto invalid_code;
@@ -3625,7 +3966,8 @@ decode_coding_iso_2022 (coding)
3625 } 3966 }
3626 } 3967 }
3627 3968
3628 if (charset->id != charset_ascii 3969 if (cmp_status->state == COMPOSING_NO
3970 && charset->id != charset_ascii
3629 && last_id != charset->id) 3971 && last_id != charset->id)
3630 { 3972 {
3631 if (last_id != charset_ascii) 3973 if (last_id != charset_ascii)
@@ -3667,28 +4009,23 @@ decode_coding_iso_2022 (coding)
3667 *charbuf++ = BYTE8_TO_CHAR (*src_base); 4009 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3668 } 4010 }
3669 } 4011 }
3670 else if (composition_state == COMPOSING_NO) 4012 else if (cmp_status->state == COMPOSING_NO)
3671 { 4013 {
3672 *charbuf++ = c; 4014 *charbuf++ = c;
3673 char_offset++; 4015 char_offset++;
3674 } 4016 }
3675 else 4017 else if ((cmp_status->state == COMPOSING_CHAR
4018 ? cmp_status->nchars
4019 : cmp_status->ncomps)
4020 >= MAX_COMPOSITION_COMPONENTS)
3676 { 4021 {
3677 if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1) 4022 /* Too long composition. */
3678 { 4023 MAYBE_FINISH_COMPOSITION ();
3679 components[component_idx++] = c; 4024 *charbuf++ = c;
3680 if (method == COMPOSITION_WITH_RULE 4025 char_offset++;
3681 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3682 && composition_state == COMPOSING_COMPONENT_CHAR))
3683 composition_state++;
3684 }
3685 else
3686 {
3687 MAYBE_FINISH_COMPOSITION ();
3688 *charbuf++ = c;
3689 char_offset++;
3690 }
3691 } 4026 }
4027 else
4028 STORE_COMPOSITION_CHAR (c);
3692 continue; 4029 continue;
3693 4030
3694 invalid_code: 4031 invalid_code:
@@ -3706,7 +4043,18 @@ decode_coding_iso_2022 (coding)
3706 } 4043 }
3707 4044
3708 no_more_source: 4045 no_more_source:
3709 if (last_id != charset_ascii) 4046 if (cmp_status->state != COMPOSING_NO)
4047 {
4048 if (coding->mode & CODING_MODE_LAST_BLOCK)
4049 MAYBE_FINISH_COMPOSITION ();
4050 else
4051 {
4052 charbuf -= cmp_status->length;
4053 for (i = 0; i < cmp_status->length; i++)
4054 cmp_status->carryover[i] = charbuf[i];
4055 }
4056 }
4057 else if (last_id != charset_ascii)
3710 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id); 4058 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3711 coding->consumed_char += consumed_chars_base; 4059 coding->consumed_char += consumed_chars_base;
3712 coding->consumed = src_base - coding->source; 4060 coding->consumed = src_base - coding->source;
@@ -5476,6 +5824,10 @@ setup_coding_system (coding_system, coding)
5476 coding->safe_charsets = SDATA (val); 5824 coding->safe_charsets = SDATA (val);
5477 } 5825 }
5478 CODING_ISO_FLAGS (coding) = flags; 5826 CODING_ISO_FLAGS (coding) = flags;
5827 CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO;
5828 CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO;
5829 CODING_ISO_EXTSEGMENT_LEN (coding) = 0;
5830 CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
5479 } 5831 }
5480 else if (EQ (coding_type, Qcharset)) 5832 else if (EQ (coding_type, Qcharset))
5481 { 5833 {
@@ -5533,6 +5885,7 @@ setup_coding_system (coding_system, coding)
5533 coding->encoder = encode_coding_emacs_mule; 5885 coding->encoder = encode_coding_emacs_mule;
5534 coding->common_flags 5886 coding->common_flags
5535 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK); 5887 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5888 coding->spec.emacs_mule.full_support = 1;
5536 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full)) 5889 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5537 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list)) 5890 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5538 { 5891 {
@@ -5550,7 +5903,10 @@ setup_coding_system (coding_system, coding)
5550 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0); 5903 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5551 coding->max_charset_id = max_charset_id; 5904 coding->max_charset_id = max_charset_id;
5552 coding->safe_charsets = SDATA (safe_charsets); 5905 coding->safe_charsets = SDATA (safe_charsets);
5906 coding->spec.emacs_mule.full_support = 1;
5553 } 5907 }
5908 coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
5909 coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
5554 } 5910 }
5555 else if (EQ (coding_type, Qshift_jis)) 5911 else if (EQ (coding_type, Qshift_jis))
5556 { 5912 {
@@ -6338,51 +6694,39 @@ get_translation_table (attrs, encodep, max_lookup)
6338 } while (0) 6694 } while (0)
6339 6695
6340 6696
6697/* Return a translation of character(s) at BUF according to TRANS.
6698 TRANS is TO-CHAR or ((FROM . TO) ...) where
6699 FROM = [FROM-CHAR ...], TO is TO-CHAR or [TO-CHAR ...].
6700 The return value is TO-CHAR or ([FROM-CHAR ...] . TO) if a
6701 translation is found, and Qnil if not found..
6702 If BUF is too short to lookup characters in FROM, return Qt. */
6703
6341static Lisp_Object 6704static Lisp_Object
6342get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars) 6705get_translation (trans, buf, buf_end)
6343 Lisp_Object val; 6706 Lisp_Object trans;
6344 int *buf, *buf_end; 6707 int *buf, *buf_end;
6345 int last_block;
6346 int *from_nchars, *to_nchars;
6347{ 6708{
6348 /* VAL is TO or (([FROM-CHAR ...] . TO) ...) where TO is TO-CHAR or 6709
6349 [TO-CHAR ...]. */ 6710 if (INTEGERP (trans))
6350 if (CONSP (val)) 6711 return trans;
6712 for (; CONSP (trans); trans = XCDR (trans))
6351 { 6713 {
6352 Lisp_Object from, tail; 6714 Lisp_Object val = XCAR (trans);
6353 int i, len; 6715 Lisp_Object from = XCAR (val);
6716 int len = ASIZE (from);
6717 int i;
6354 6718
6355 for (tail = val; CONSP (tail); tail = XCDR (tail)) 6719 for (i = 0; i < len; i++)
6356 { 6720 {
6357 val = XCAR (tail); 6721 if (buf + i == buf_end)
6358 from = XCAR (val); 6722 return Qt;
6359 len = ASIZE (from); 6723 if (XINT (AREF (from, i)) != buf[i])
6360 for (i = 0; i < len; i++) 6724 break;
6361 {
6362 if (buf + i == buf_end)
6363 {
6364 if (! last_block)
6365 return Qt;
6366 break;
6367 }
6368 if (XINT (AREF (from, i)) != buf[i])
6369 break;
6370 }
6371 if (i == len)
6372 {
6373 val = XCDR (val);
6374 *from_nchars = len;
6375 break;
6376 }
6377 } 6725 }
6378 if (! CONSP (tail)) 6726 if (i == len)
6379 return Qnil; 6727 return val;
6380 } 6728 }
6381 if (VECTORP (val)) 6729 return Qnil;
6382 *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
6383 else
6384 *buf = XINT (val);
6385 return val;
6386} 6730}
6387 6731
6388 6732
@@ -6422,11 +6766,23 @@ produce_chars (coding, translation_table, last_block)
6422 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans); 6766 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6423 if (! NILP (trans)) 6767 if (! NILP (trans))
6424 { 6768 {
6425 trans = get_translation (trans, buf, buf_end, last_block, 6769 trans = get_translation (trans, buf, buf_end);
6426 &from_nchars, &to_nchars); 6770 if (INTEGERP (trans))
6427 if (EQ (trans, Qt)) 6771 c = XINT (trans);
6772 else if (CONSP (trans))
6773 {
6774 from_nchars = ASIZE (XCAR (trans));
6775 trans = XCDR (trans);
6776 if (INTEGERP (trans))
6777 c = XINT (trans);
6778 else
6779 {
6780 to_nchars = ASIZE (trans);
6781 c = XINT (AREF (trans, 0));
6782 }
6783 }
6784 else if (EQ (trans, Qt) && ! last_block)
6428 break; 6785 break;
6429 c = *buf;
6430 } 6786 }
6431 6787
6432 if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end) 6788 if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
@@ -6438,7 +6794,8 @@ produce_chars (coding, translation_table, last_block)
6438 if (EQ (coding->src_object, coding->dst_object)) 6794 if (EQ (coding->src_object, coding->dst_object))
6439 { 6795 {
6440 coding_set_source (coding); 6796 coding_set_source (coding);
6441 dst_end = ((unsigned char *) coding->source) + coding->consumed; 6797 dst_end = (((unsigned char *) coding->source)
6798 + coding->consumed);
6442 } 6799 }
6443 else 6800 else
6444 dst_end = coding->destination + coding->dst_bytes; 6801 dst_end = coding->destination + coding->dst_bytes;
@@ -6455,9 +6812,7 @@ produce_chars (coding, translation_table, last_block)
6455 *dst++ = CHAR_TO_BYTE8 (c); 6812 *dst++ = CHAR_TO_BYTE8 (c);
6456 } 6813 }
6457 produced_chars += to_nchars; 6814 produced_chars += to_nchars;
6458 *buf++ = to_nchars; 6815 buf += from_nchars;
6459 while (--from_nchars > 0)
6460 *buf++ = 0;
6461 } 6816 }
6462 else 6817 else
6463 /* This is an annotation datum. (-C) is the length. */ 6818 /* This is an annotation datum. (-C) is the length. */
@@ -6573,7 +6928,7 @@ produce_chars (coding, translation_table, last_block)
6573 6928
6574/* Compose text in CODING->object according to the annotation data at 6929/* Compose text in CODING->object according to the annotation data at
6575 CHARBUF. CHARBUF is an array: 6930 CHARBUF. CHARBUF is an array:
6576 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ] 6931 [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
6577 */ 6932 */
6578 6933
6579static INLINE void 6934static INLINE void
@@ -6587,33 +6942,33 @@ produce_composition (coding, charbuf, pos)
6587 enum composition_method method; 6942 enum composition_method method;
6588 Lisp_Object components; 6943 Lisp_Object components;
6589 6944
6590 len = -charbuf[0]; 6945 len = -charbuf[0] - MAX_ANNOTATION_LENGTH;
6591 to = pos + charbuf[2]; 6946 to = pos + charbuf[2];
6592 if (to <= pos) 6947 method = (enum composition_method) (charbuf[4]);
6593 return;
6594 method = (enum composition_method) (charbuf[3]);
6595 6948
6596 if (method == COMPOSITION_RELATIVE) 6949 if (method == COMPOSITION_RELATIVE)
6597 components = Qnil; 6950 components = Qnil;
6598 else if (method >= COMPOSITION_WITH_RULE 6951 else
6599 && method <= COMPOSITION_WITH_RULE_ALTCHARS)
6600 { 6952 {
6601 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1]; 6953 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6602 int i; 6954 int i, j;
6603 6955
6604 len -= 4; 6956 if (method == COMPOSITION_WITH_RULE)
6605 charbuf += 4; 6957 len = charbuf[2] * 3 - 2;
6606 for (i = 0; i < len; i++) 6958 charbuf += MAX_ANNOTATION_LENGTH;
6959 /* charbuf = [ CHRA ... CHAR] or [ CHAR -2 RULE ... CHAR ] */
6960 for (i = j = 0; i < len && charbuf[i] != -1; i++, j++)
6607 { 6961 {
6608 args[i] = make_number (charbuf[i]); 6962 if (charbuf[i] >= 0)
6609 if (charbuf[i] < 0) 6963 args[j] = make_number (charbuf[i]);
6610 return; 6964 else
6965 {
6966 i++;
6967 args[j] = make_number (charbuf[i] % 0x100);
6968 }
6611 } 6969 }
6612 components = (method == COMPOSITION_WITH_ALTCHARS 6970 components = (i == j ? Fstring (j, args) : Fvector (j, args));
6613 ? Fstring (len, args) : Fvector (len, args));
6614 } 6971 }
6615 else
6616 return;
6617 compose_text (pos, to, components, Qnil, coding->dst_object); 6972 compose_text (pos, to, components, Qnil, coding->dst_object);
6618} 6973}
6619 6974
@@ -6675,21 +7030,21 @@ produce_annotation (coding, pos)
6675 while (charbuf < charbuf_end) 7030 while (charbuf < charbuf_end)
6676 { 7031 {
6677 if (*charbuf >= 0) 7032 if (*charbuf >= 0)
6678 pos += *charbuf++; 7033 pos++, charbuf++;
6679 else 7034 else
6680 { 7035 {
6681 int len = -*charbuf; 7036 int len = -*charbuf;
6682 switch (charbuf[1]) 7037
6683 { 7038 if (len > 2)
6684 case CODING_ANNOTATE_COMPOSITION_MASK: 7039 switch (charbuf[1])
6685 produce_composition (coding, charbuf, pos); 7040 {
6686 break; 7041 case CODING_ANNOTATE_COMPOSITION_MASK:
6687 case CODING_ANNOTATE_CHARSET_MASK: 7042 produce_composition (coding, charbuf, pos);
6688 produce_charset (coding, charbuf, pos); 7043 break;
6689 break; 7044 case CODING_ANNOTATE_CHARSET_MASK:
6690 default: 7045 produce_charset (coding, charbuf, pos);
6691 abort (); 7046 break;
6692 } 7047 }
6693 charbuf += len; 7048 charbuf += len;
6694 } 7049 }
6695 } 7050 }
@@ -6875,7 +7230,7 @@ handle_composition_annotation (pos, limit, coding, buf, stop)
6875 enum composition_method method = COMPOSITION_METHOD (prop); 7230 enum composition_method method = COMPOSITION_METHOD (prop);
6876 int nchars = COMPOSITION_LENGTH (prop); 7231 int nchars = COMPOSITION_LENGTH (prop);
6877 7232
6878 ADD_COMPOSITION_DATA (buf, nchars, method); 7233 ADD_COMPOSITION_DATA (buf, nchars, 0, method);
6879 if (method != COMPOSITION_RELATIVE) 7234 if (method != COMPOSITION_RELATIVE)
6880 { 7235 {
6881 Lisp_Object components; 7236 Lisp_Object components;
@@ -7062,12 +7417,26 @@ consume_chars (coding, translation_table, max_lookup)
7062 for (i = 1; i < max_lookup && p < src_end; i++) 7417 for (i = 1; i < max_lookup && p < src_end; i++)
7063 lookup_buf[i] = STRING_CHAR_ADVANCE (p); 7418 lookup_buf[i] = STRING_CHAR_ADVANCE (p);
7064 lookup_buf_end = lookup_buf + i; 7419 lookup_buf_end = lookup_buf + i;
7065 trans = get_translation (trans, lookup_buf, lookup_buf_end, 1, 7420 trans = get_translation (trans, lookup_buf, lookup_buf_end);
7066 &from_nchars, &to_nchars); 7421 if (INTEGERP (trans))
7067 if (EQ (trans, Qt) 7422 c = XINT (trans);
7068 || buf + to_nchars > buf_end) 7423 else if (CONSP (trans))
7424 {
7425 from_nchars = ASIZE (XCAR (trans));
7426 trans = XCDR (trans);
7427 if (INTEGERP (trans))
7428 c = XINT (trans);
7429 else
7430 {
7431 to_nchars = ASIZE (trans);
7432 if (buf + to_nchars > buf_end)
7433 break;
7434 c = XINT (AREF (trans, 0));
7435 }
7436 }
7437 else
7069 break; 7438 break;
7070 *buf++ = *lookup_buf; 7439 *buf++ = c;
7071 for (i = 1; i < to_nchars; i++) 7440 for (i = 1; i < to_nchars; i++)
7072 *buf++ = XINT (AREF (trans, i)); 7441 *buf++ = XINT (AREF (trans, i));
7073 for (i = 1; i < from_nchars; i++, pos++) 7442 for (i = 1; i < from_nchars; i++, pos++)