diff options
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 542 |
1 files changed, 382 insertions, 160 deletions
diff --git a/src/coding.c b/src/coding.c index 3b9661dfa63..62708fb60f4 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -1007,6 +1007,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1007 | unification_table = Vstandard_character_unification_table_for_decode; | 1007 | unification_table = Vstandard_character_unification_table_for_decode; |
| 1008 | 1008 | ||
| 1009 | coding->produced_char = 0; | 1009 | coding->produced_char = 0; |
| 1010 | coding->fake_multibyte = 0; | ||
| 1010 | while (src < src_end && (dst_bytes | 1011 | while (src < src_end && (dst_bytes |
| 1011 | ? (dst < adjusted_dst_end) | 1012 | ? (dst < adjusted_dst_end) |
| 1012 | : (dst < src - 6))) | 1013 | : (dst < src - 6))) |
| @@ -1046,21 +1047,12 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1046 | case ISO_0xA0_or_0xFF: | 1047 | case ISO_0xA0_or_0xFF: |
| 1047 | if (charset1 < 0 || CHARSET_CHARS (charset1) == 94 | 1048 | if (charset1 < 0 || CHARSET_CHARS (charset1) == 94 |
| 1048 | || coding->flags & CODING_FLAG_ISO_SEVEN_BITS) | 1049 | || coding->flags & CODING_FLAG_ISO_SEVEN_BITS) |
| 1049 | { | 1050 | goto label_invalid_code; |
| 1050 | /* Invalid code. */ | ||
| 1051 | *dst++ = c1; | ||
| 1052 | coding->produced_char++; | ||
| 1053 | break; | ||
| 1054 | } | ||
| 1055 | /* This is a graphic character, we fall down ... */ | 1051 | /* This is a graphic character, we fall down ... */ |
| 1056 | 1052 | ||
| 1057 | case ISO_graphic_plane_1: | 1053 | case ISO_graphic_plane_1: |
| 1058 | if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) | 1054 | if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) |
| 1059 | { | 1055 | goto label_invalid_code; |
| 1060 | /* Invalid code. */ | ||
| 1061 | *dst++ = c1; | ||
| 1062 | coding->produced_char++; | ||
| 1063 | } | ||
| 1064 | else | 1056 | else |
| 1065 | DECODE_ISO_CHARACTER (charset1, c1); | 1057 | DECODE_ISO_CHARACTER (charset1, c1); |
| 1066 | break; | 1058 | break; |
| @@ -1310,9 +1302,9 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1310 | break; | 1302 | break; |
| 1311 | 1303 | ||
| 1312 | label_invalid_code: | 1304 | label_invalid_code: |
| 1313 | coding->produced_char += src - src_base; | ||
| 1314 | while (src_base < src) | 1305 | while (src_base < src) |
| 1315 | *dst++ = *src_base++; | 1306 | *dst++ = *src_base++; |
| 1307 | coding->fake_multibyte = 1; | ||
| 1316 | } | 1308 | } |
| 1317 | continue; | 1309 | continue; |
| 1318 | 1310 | ||
| @@ -1323,19 +1315,26 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1323 | break; | 1315 | break; |
| 1324 | } | 1316 | } |
| 1325 | 1317 | ||
| 1326 | if (result == CODING_FINISH_NORMAL | 1318 | if (src < src_end) |
| 1327 | && src < src_end) | ||
| 1328 | result = CODING_FINISH_INSUFFICIENT_DST; | ||
| 1329 | |||
| 1330 | /* If this is the last block of the text to be decoded, we had | ||
| 1331 | better just flush out all remaining codes in the text although | ||
| 1332 | they are not valid characters. */ | ||
| 1333 | if (coding->mode & CODING_MODE_LAST_BLOCK) | ||
| 1334 | { | 1319 | { |
| 1335 | bcopy (src, dst, src_end - src); | 1320 | if (result == CODING_FINISH_NORMAL) |
| 1336 | dst += (src_end - src); | 1321 | result = CODING_FINISH_INSUFFICIENT_DST; |
| 1337 | src = src_end; | 1322 | else if (result != CODING_FINISH_INCONSISTENT_EOL |
| 1323 | && coding->mode & CODING_MODE_LAST_BLOCK) | ||
| 1324 | { | ||
| 1325 | /* This is the last block of the text to be decoded. We had | ||
| 1326 | better just flush out all remaining codes in the text | ||
| 1327 | although they are not valid characters. */ | ||
| 1328 | src_bytes = src_end - src; | ||
| 1329 | if (dst_bytes && (dst_end - dst < src_bytes)) | ||
| 1330 | src_bytes = dst_end - dst; | ||
| 1331 | bcopy (src, dst, src_bytes); | ||
| 1332 | dst += src_bytes; | ||
| 1333 | src += src_bytes; | ||
| 1334 | coding->fake_multibyte = 1; | ||
| 1335 | } | ||
| 1338 | } | 1336 | } |
| 1337 | |||
| 1339 | coding->consumed = coding->consumed_char = src - source; | 1338 | coding->consumed = coding->consumed_char = src - source; |
| 1340 | coding->produced = dst - destination; | 1339 | coding->produced = dst - destination; |
| 1341 | return result; | 1340 | return result; |
| @@ -1413,16 +1412,22 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1413 | if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \ | 1412 | if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \ |
| 1414 | *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \ | 1413 | *dst++ = ISO_CODE_ESC, *dst++ = 'N'; \ |
| 1415 | else \ | 1414 | else \ |
| 1416 | *dst++ = ISO_CODE_SS2; \ | 1415 | { \ |
| 1416 | *dst++ = ISO_CODE_SS2; \ | ||
| 1417 | coding->fake_multibyte = 1; \ | ||
| 1418 | } \ | ||
| 1417 | CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \ | 1419 | CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \ |
| 1418 | } while (0) | 1420 | } while (0) |
| 1419 | 1421 | ||
| 1420 | #define ENCODE_SINGLE_SHIFT_3 \ | 1422 | #define ENCODE_SINGLE_SHIFT_3 \ |
| 1421 | do { \ | 1423 | do { \ |
| 1422 | if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \ | 1424 | if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \ |
| 1423 | *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \ | 1425 | *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \ |
| 1424 | else \ | 1426 | else \ |
| 1425 | *dst++ = ISO_CODE_SS3; \ | 1427 | { \ |
| 1428 | *dst++ = ISO_CODE_SS3; \ | ||
| 1429 | coding->fake_multibyte = 1; \ | ||
| 1430 | } \ | ||
| 1426 | CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \ | 1431 | CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \ |
| 1427 | } while (0) | 1432 | } while (0) |
| 1428 | 1433 | ||
| @@ -1746,6 +1751,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1746 | unification_table = Vstandard_character_unification_table_for_encode; | 1751 | unification_table = Vstandard_character_unification_table_for_encode; |
| 1747 | 1752 | ||
| 1748 | coding->consumed_char = 0; | 1753 | coding->consumed_char = 0; |
| 1754 | coding->fake_multibyte = 0; | ||
| 1749 | while (src < src_end && (dst_bytes | 1755 | while (src < src_end && (dst_bytes |
| 1750 | ? (dst < adjusted_dst_end) | 1756 | ? (dst < adjusted_dst_end) |
| 1751 | : (dst < src - 19))) | 1757 | : (dst < src - 19))) |
| @@ -1933,15 +1939,17 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1933 | break; | 1939 | break; |
| 1934 | } | 1940 | } |
| 1935 | 1941 | ||
| 1936 | if (result == CODING_FINISH_NORMAL | 1942 | if (src < src_end) |
| 1937 | && src < src_end) | 1943 | { |
| 1938 | result = CODING_FINISH_INSUFFICIENT_DST; | 1944 | if (result == CODING_FINISH_NORMAL) |
| 1939 | 1945 | result = CODING_FINISH_INSUFFICIENT_DST; | |
| 1940 | /* If this is the last block of the text to be encoded, we must | 1946 | else |
| 1941 | reset graphic planes and registers to the initial state, and | 1947 | /* If this is the last block of the text to be encoded, we |
| 1942 | flush out the carryover if any. */ | 1948 | must reset graphic planes and registers to the initial |
| 1943 | if (coding->mode & CODING_MODE_LAST_BLOCK) | 1949 | state, and flush out the carryover if any. */ |
| 1944 | ENCODE_RESET_PLANE_AND_REGISTER; | 1950 | if (coding->mode & CODING_MODE_LAST_BLOCK) |
| 1951 | ENCODE_RESET_PLANE_AND_REGISTER; | ||
| 1952 | } | ||
| 1945 | 1953 | ||
| 1946 | coding->consumed = src - source; | 1954 | coding->consumed = src - source; |
| 1947 | coding->produced = coding->produced_char = dst - destination; | 1955 | coding->produced = coding->produced_char = dst - destination; |
| @@ -2054,7 +2062,10 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 2054 | if (sjis_p && charset_alt == charset_katakana_jisx0201) \ | 2062 | if (sjis_p && charset_alt == charset_katakana_jisx0201) \ |
| 2055 | *dst++ = c1; \ | 2063 | *dst++ = c1; \ |
| 2056 | else \ | 2064 | else \ |
| 2057 | *dst++ = charset_alt, *dst++ = c1; \ | 2065 | { \ |
| 2066 | *dst++ = charset_alt, *dst++ = c1; \ | ||
| 2067 | coding->fake_multibyte = 1; \ | ||
| 2068 | } \ | ||
| 2058 | } \ | 2069 | } \ |
| 2059 | else \ | 2070 | else \ |
| 2060 | { \ | 2071 | { \ |
| @@ -2062,21 +2073,25 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 2062 | if (sjis_p && charset_alt == charset_jisx0208) \ | 2073 | if (sjis_p && charset_alt == charset_jisx0208) \ |
| 2063 | { \ | 2074 | { \ |
| 2064 | unsigned char s1, s2; \ | 2075 | unsigned char s1, s2; \ |
| 2065 | \ | 2076 | \ |
| 2066 | ENCODE_SJIS (c1, c2, s1, s2); \ | 2077 | ENCODE_SJIS (c1, c2, s1, s2); \ |
| 2067 | *dst++ = s1, *dst++ = s2; \ | 2078 | *dst++ = s1, *dst++ = s2; \ |
| 2079 | coding->fake_multibyte = 1; \ | ||
| 2068 | } \ | 2080 | } \ |
| 2069 | else if (!sjis_p \ | 2081 | else if (!sjis_p \ |
| 2070 | && (charset_alt == charset_big5_1 \ | 2082 | && (charset_alt == charset_big5_1 \ |
| 2071 | || charset_alt == charset_big5_2)) \ | 2083 | || charset_alt == charset_big5_2)) \ |
| 2072 | { \ | 2084 | { \ |
| 2073 | unsigned char b1, b2; \ | 2085 | unsigned char b1, b2; \ |
| 2074 | \ | 2086 | \ |
| 2075 | ENCODE_BIG5 (charset_alt, c1, c2, b1, b2); \ | 2087 | ENCODE_BIG5 (charset_alt, c1, c2, b1, b2); \ |
| 2076 | *dst++ = b1, *dst++ = b2; \ | 2088 | *dst++ = b1, *dst++ = b2; \ |
| 2077 | } \ | 2089 | } \ |
| 2078 | else \ | 2090 | else \ |
| 2079 | *dst++ = charset_alt, *dst++ = c1, *dst++ = c2; \ | 2091 | { \ |
| 2092 | *dst++ = charset_alt, *dst++ = c1, *dst++ = c2; \ | ||
| 2093 | coding->fake_multibyte = 1; \ | ||
| 2094 | } \ | ||
| 2080 | } \ | 2095 | } \ |
| 2081 | coding->consumed_char++; \ | 2096 | coding->consumed_char++; \ |
| 2082 | } while (0); | 2097 | } while (0); |
| @@ -2155,6 +2170,7 @@ decode_coding_sjis_big5 (coding, source, destination, | |||
| 2155 | unification_table = Vstandard_character_unification_table_for_decode; | 2170 | unification_table = Vstandard_character_unification_table_for_decode; |
| 2156 | 2171 | ||
| 2157 | coding->produced_char = 0; | 2172 | coding->produced_char = 0; |
| 2173 | coding->fake_multibyte = 0; | ||
| 2158 | while (src < src_end && (dst_bytes | 2174 | while (src < src_end && (dst_bytes |
| 2159 | ? (dst < adjusted_dst_end) | 2175 | ? (dst < adjusted_dst_end) |
| 2160 | : (dst < src - 3))) | 2176 | : (dst < src - 3))) |
| @@ -2203,46 +2219,85 @@ decode_coding_sjis_big5 (coding, source, destination, | |||
| 2203 | } | 2219 | } |
| 2204 | else if (c1 < 0x80) | 2220 | else if (c1 < 0x80) |
| 2205 | DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2); | 2221 | DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2); |
| 2206 | else if (c1 < 0xA0 || c1 >= 0xE0) | 2222 | else if (c1 < 0xA0) |
| 2207 | { | 2223 | { |
| 2208 | /* SJIS -> JISX0208, BIG5 -> Big5 (only if 0xE0 <= c1 < 0xFF) */ | 2224 | /* SJIS -> JISX0208 */ |
| 2209 | if (sjis_p) | 2225 | if (sjis_p) |
| 2210 | { | 2226 | { |
| 2211 | ONE_MORE_BYTE (c2); | 2227 | ONE_MORE_BYTE (c2); |
| 2212 | DECODE_SJIS (c1, c2, c3, c4); | 2228 | if (c2 >= 0x40) |
| 2213 | DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4); | 2229 | { |
| 2230 | DECODE_SJIS (c1, c2, c3, c4); | ||
| 2231 | DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4); | ||
| 2232 | } | ||
| 2233 | else | ||
| 2234 | goto label_invalid_code_2; | ||
| 2214 | } | 2235 | } |
| 2215 | else if (c1 >= 0xE0 && c1 < 0xFF) | 2236 | else |
| 2237 | goto label_invalid_code_1; | ||
| 2238 | } | ||
| 2239 | else if (c1 < 0xE0) | ||
| 2240 | { | ||
| 2241 | /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */ | ||
| 2242 | if (sjis_p) | ||
| 2243 | DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1, | ||
| 2244 | /* dummy */ c2); | ||
| 2245 | else | ||
| 2216 | { | 2246 | { |
| 2217 | int charset; | 2247 | int charset; |
| 2218 | 2248 | ||
| 2219 | ONE_MORE_BYTE (c2); | 2249 | ONE_MORE_BYTE (c2); |
| 2220 | DECODE_BIG5 (c1, c2, charset, c3, c4); | 2250 | if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) |
| 2221 | DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); | 2251 | { |
| 2222 | } | 2252 | DECODE_BIG5 (c1, c2, charset, c3, c4); |
| 2223 | else /* Invalid code */ | 2253 | DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); |
| 2224 | { | 2254 | } |
| 2225 | *dst++ = c1; | 2255 | else |
| 2226 | coding->produced_char++; | 2256 | goto label_invalid_code_2; |
| 2227 | } | 2257 | } |
| 2228 | } | 2258 | } |
| 2229 | else | 2259 | else /* C1 >= 0xE0 */ |
| 2230 | { | 2260 | { |
| 2231 | /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */ | 2261 | /* SJIS -> JISX0208, BIG5 -> Big5 */ |
| 2232 | if (sjis_p) | 2262 | if (sjis_p) |
| 2233 | DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1, | 2263 | { |
| 2234 | /* dummy */ c2); | 2264 | ONE_MORE_BYTE (c2); |
| 2265 | if (c2 >= 0x40) | ||
| 2266 | { | ||
| 2267 | DECODE_SJIS (c1, c2, c3, c4); | ||
| 2268 | DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4); | ||
| 2269 | } | ||
| 2270 | else | ||
| 2271 | goto label_invalid_code_2; | ||
| 2272 | } | ||
| 2235 | else | 2273 | else |
| 2236 | { | 2274 | { |
| 2237 | int charset; | 2275 | int charset; |
| 2238 | 2276 | ||
| 2239 | ONE_MORE_BYTE (c2); | 2277 | ONE_MORE_BYTE (c2); |
| 2240 | DECODE_BIG5 (c1, c2, charset, c3, c4); | 2278 | if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) |
| 2241 | DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); | 2279 | { |
| 2280 | DECODE_BIG5 (c1, c2, charset, c3, c4); | ||
| 2281 | DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); | ||
| 2282 | } | ||
| 2283 | else | ||
| 2284 | goto label_invalid_code_2; | ||
| 2242 | } | 2285 | } |
| 2243 | } | 2286 | } |
| 2244 | continue; | 2287 | continue; |
| 2245 | 2288 | ||
| 2289 | label_invalid_code_1: | ||
| 2290 | *dst++ = c1; | ||
| 2291 | coding->produced_char++; | ||
| 2292 | coding->fake_multibyte = 1; | ||
| 2293 | continue; | ||
| 2294 | |||
| 2295 | label_invalid_code_2: | ||
| 2296 | *dst++ = c1; *dst++= c2; | ||
| 2297 | coding->produced_char += 2; | ||
| 2298 | coding->fake_multibyte = 1; | ||
| 2299 | continue; | ||
| 2300 | |||
| 2246 | label_end_of_loop: | 2301 | label_end_of_loop: |
| 2247 | result = CODING_FINISH_INSUFFICIENT_SRC; | 2302 | result = CODING_FINISH_INSUFFICIENT_SRC; |
| 2248 | label_end_of_loop_2: | 2303 | label_end_of_loop_2: |
| @@ -2250,9 +2305,22 @@ decode_coding_sjis_big5 (coding, source, destination, | |||
| 2250 | break; | 2305 | break; |
| 2251 | } | 2306 | } |
| 2252 | 2307 | ||
| 2253 | if (result == CODING_FINISH_NORMAL | 2308 | if (src < src_end) |
| 2254 | && src < src_end) | 2309 | { |
| 2255 | result = CODING_FINISH_INSUFFICIENT_DST; | 2310 | if (result == CODING_FINISH_NORMAL) |
| 2311 | result = CODING_FINISH_INSUFFICIENT_DST; | ||
| 2312 | else if (result != CODING_FINISH_INCONSISTENT_EOL | ||
| 2313 | && coding->mode & CODING_MODE_LAST_BLOCK) | ||
| 2314 | { | ||
| 2315 | src_bytes = src_end - src; | ||
| 2316 | if (dst_bytes && (dst_end - dst < src_bytes)) | ||
| 2317 | src_bytes = dst_end - dst; | ||
| 2318 | bcopy (dst, src, src_bytes); | ||
| 2319 | src += src_bytes; | ||
| 2320 | dst += src_bytes; | ||
| 2321 | coding->fake_multibyte = 1; | ||
| 2322 | } | ||
| 2323 | } | ||
| 2256 | 2324 | ||
| 2257 | coding->consumed = coding->consumed_char = src - source; | 2325 | coding->consumed = coding->consumed_char = src - source; |
| 2258 | coding->produced = dst - destination; | 2326 | coding->produced = dst - destination; |
| @@ -2291,6 +2359,7 @@ encode_coding_sjis_big5 (coding, source, destination, | |||
| 2291 | unification_table = Vstandard_character_unification_table_for_encode; | 2359 | unification_table = Vstandard_character_unification_table_for_encode; |
| 2292 | 2360 | ||
| 2293 | coding->consumed_char = 0; | 2361 | coding->consumed_char = 0; |
| 2362 | coding->fake_multibyte = 0; | ||
| 2294 | while (src < src_end && (dst_bytes | 2363 | while (src < src_end && (dst_bytes |
| 2295 | ? (dst < adjusted_dst_end) | 2364 | ? (dst < adjusted_dst_end) |
| 2296 | : (dst < src - 1))) | 2365 | : (dst < src - 1))) |
| @@ -2402,8 +2471,11 @@ decode_eol (coding, source, destination, src_bytes, dst_bytes) | |||
| 2402 | unsigned char *src_end = source + src_bytes; | 2471 | unsigned char *src_end = source + src_bytes; |
| 2403 | unsigned char *dst = destination; | 2472 | unsigned char *dst = destination; |
| 2404 | unsigned char *dst_end = destination + dst_bytes; | 2473 | unsigned char *dst_end = destination + dst_bytes; |
| 2474 | unsigned char c; | ||
| 2405 | int result = CODING_FINISH_NORMAL; | 2475 | int result = CODING_FINISH_NORMAL; |
| 2406 | 2476 | ||
| 2477 | coding->fake_multibyte = 0; | ||
| 2478 | |||
| 2407 | if (src_bytes <= 0) | 2479 | if (src_bytes <= 0) |
| 2408 | return result; | 2480 | return result; |
| 2409 | 2481 | ||
| @@ -2421,7 +2493,8 @@ decode_eol (coding, source, destination, src_bytes, dst_bytes) | |||
| 2421 | : (dst < src - 1))) | 2493 | : (dst < src - 1))) |
| 2422 | { | 2494 | { |
| 2423 | unsigned char *src_base = src; | 2495 | unsigned char *src_base = src; |
| 2424 | unsigned char c = *src++; | 2496 | |
| 2497 | c = *src++; | ||
| 2425 | if (c == '\r') | 2498 | if (c == '\r') |
| 2426 | { | 2499 | { |
| 2427 | ONE_MORE_BYTE (c); | 2500 | ONE_MORE_BYTE (c); |
| @@ -2433,6 +2506,8 @@ decode_eol (coding, source, destination, src_bytes, dst_bytes) | |||
| 2433 | goto label_end_of_loop_2; | 2506 | goto label_end_of_loop_2; |
| 2434 | } | 2507 | } |
| 2435 | *dst++ = '\r'; | 2508 | *dst++ = '\r'; |
| 2509 | if (BASE_LEADING_CODE_P (c)) | ||
| 2510 | coding->fake_multibyte = 1; | ||
| 2436 | } | 2511 | } |
| 2437 | *dst++ = c; | 2512 | *dst++ = c; |
| 2438 | } | 2513 | } |
| @@ -2443,7 +2518,11 @@ decode_eol (coding, source, destination, src_bytes, dst_bytes) | |||
| 2443 | goto label_end_of_loop_2; | 2518 | goto label_end_of_loop_2; |
| 2444 | } | 2519 | } |
| 2445 | else | 2520 | else |
| 2446 | *dst++ = c; | 2521 | { |
| 2522 | *dst++ = c; | ||
| 2523 | if (BASE_LEADING_CODE_P (c)) | ||
| 2524 | coding->fake_multibyte = 1; | ||
| 2525 | } | ||
| 2447 | continue; | 2526 | continue; |
| 2448 | 2527 | ||
| 2449 | label_end_of_loop: | 2528 | label_end_of_loop: |
| @@ -2461,7 +2540,13 @@ decode_eol (coding, source, destination, src_bytes, dst_bytes) | |||
| 2461 | case CODING_EOL_CR: | 2540 | case CODING_EOL_CR: |
| 2462 | if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) | 2541 | if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) |
| 2463 | { | 2542 | { |
| 2464 | while (src < src_end) if (*src++ == '\n') break; | 2543 | while (src < src_end) |
| 2544 | { | ||
| 2545 | if ((c = *src++) == '\n') | ||
| 2546 | break; | ||
| 2547 | if (BASE_LEADING_CODE_P (c)) | ||
| 2548 | coding->fake_multibyte = 1; | ||
| 2549 | } | ||
| 2465 | if (*--src == '\n') | 2550 | if (*--src == '\n') |
| 2466 | { | 2551 | { |
| 2467 | src_bytes = src - source; | 2552 | src_bytes = src - source; |
| @@ -2493,6 +2578,7 @@ decode_eol (coding, source, destination, src_bytes, dst_bytes) | |||
| 2493 | safe_bcopy (source, destination, src_bytes); | 2578 | safe_bcopy (source, destination, src_bytes); |
| 2494 | src += src_bytes; | 2579 | src += src_bytes; |
| 2495 | dst += dst_bytes; | 2580 | dst += dst_bytes; |
| 2581 | coding->fake_multibyte = 1; | ||
| 2496 | break; | 2582 | break; |
| 2497 | } | 2583 | } |
| 2498 | 2584 | ||
| @@ -2515,6 +2601,8 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes) | |||
| 2515 | unsigned char *dst = destination; | 2601 | unsigned char *dst = destination; |
| 2516 | int result = CODING_FINISH_NORMAL; | 2602 | int result = CODING_FINISH_NORMAL; |
| 2517 | 2603 | ||
| 2604 | coding->fake_multibyte = 0; | ||
| 2605 | |||
| 2518 | if (coding->eol_type == CODING_EOL_CRLF) | 2606 | if (coding->eol_type == CODING_EOL_CRLF) |
| 2519 | { | 2607 | { |
| 2520 | unsigned char c; | 2608 | unsigned char c; |
| @@ -2534,13 +2622,19 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes) | |||
| 2534 | || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))) | 2622 | || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))) |
| 2535 | *dst++ = '\r', *dst++ = '\n'; | 2623 | *dst++ = '\r', *dst++ = '\n'; |
| 2536 | else | 2624 | else |
| 2537 | *dst++ = c; | 2625 | { |
| 2626 | *dst++ = c; | ||
| 2627 | if (BASE_LEADING_CODE_P (c)) | ||
| 2628 | coding->fake_multibyte = 1; | ||
| 2629 | } | ||
| 2538 | } | 2630 | } |
| 2539 | if (src < src_end) | 2631 | if (src < src_end) |
| 2540 | result = CODING_FINISH_INSUFFICIENT_DST; | 2632 | result = CODING_FINISH_INSUFFICIENT_DST; |
| 2541 | } | 2633 | } |
| 2542 | else | 2634 | else |
| 2543 | { | 2635 | { |
| 2636 | unsigned char c; | ||
| 2637 | |||
| 2544 | if (dst_bytes && src_bytes > dst_bytes) | 2638 | if (dst_bytes && src_bytes > dst_bytes) |
| 2545 | { | 2639 | { |
| 2546 | src_bytes = dst_bytes; | 2640 | src_bytes = dst_bytes; |
| @@ -2549,19 +2643,31 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes) | |||
| 2549 | if (dst_bytes) | 2643 | if (dst_bytes) |
| 2550 | bcopy (source, destination, src_bytes); | 2644 | bcopy (source, destination, src_bytes); |
| 2551 | else | 2645 | else |
| 2552 | safe_bcopy (source, destination, src_bytes); | 2646 | { |
| 2647 | safe_bcopy (source, destination, src_bytes); | ||
| 2648 | dst_bytes = src_bytes; | ||
| 2649 | } | ||
| 2553 | if (coding->eol_type == CODING_EOL_CRLF) | 2650 | if (coding->eol_type == CODING_EOL_CRLF) |
| 2554 | { | 2651 | { |
| 2555 | while (src_bytes--) | 2652 | while (src_bytes--) |
| 2556 | if (*dst++ == '\n') dst[-1] = '\r'; | 2653 | { |
| 2654 | if ((c = *dst++) == '\n') | ||
| 2655 | dst[-1] = '\r'; | ||
| 2656 | else if (BASE_LEADING_CODE_P (c)) | ||
| 2657 | coding->fake_multibyte = 1; | ||
| 2658 | } | ||
| 2557 | } | 2659 | } |
| 2558 | else if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY) | 2660 | else |
| 2559 | { | 2661 | { |
| 2560 | while (src_bytes--) | 2662 | if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY) |
| 2561 | if (*dst++ == '\r') dst[-1] = '\n'; | 2663 | { |
| 2664 | while (src_bytes--) | ||
| 2665 | if (*dst++ == '\r') dst[-1] = '\n'; | ||
| 2666 | } | ||
| 2667 | coding->fake_multibyte = 1; | ||
| 2562 | } | 2668 | } |
| 2563 | src += src_bytes; | 2669 | src = source + dst_bytes; |
| 2564 | dst += src_bytes; | 2670 | dst = destination + dst_bytes; |
| 2565 | } | 2671 | } |
| 2566 | 2672 | ||
| 2567 | coding->consumed = coding->consumed_char = src - source; | 2673 | coding->consumed = coding->consumed_char = src - source; |
| @@ -3458,6 +3564,7 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes) | |||
| 3458 | { | 3564 | { |
| 3459 | coding->produced = coding->produced_char = 0; | 3565 | coding->produced = coding->produced_char = 0; |
| 3460 | coding->consumed = coding->consumed_char = 0; | 3566 | coding->consumed = coding->consumed_char = 0; |
| 3567 | coding->fake_multibyte = 0; | ||
| 3461 | return CODING_FINISH_NORMAL; | 3568 | return CODING_FINISH_NORMAL; |
| 3462 | } | 3569 | } |
| 3463 | 3570 | ||
| @@ -3514,6 +3621,7 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes) | |||
| 3514 | bcopy (source, destination, coding->produced); | 3621 | bcopy (source, destination, coding->produced); |
| 3515 | else | 3622 | else |
| 3516 | safe_bcopy (source, destination, coding->produced); | 3623 | safe_bcopy (source, destination, coding->produced); |
| 3624 | coding->fake_multibyte = 1; | ||
| 3517 | coding->consumed | 3625 | coding->consumed |
| 3518 | = coding->consumed_char = coding->produced_char = coding->produced; | 3626 | = coding->consumed_char = coding->produced_char = coding->produced; |
| 3519 | break; | 3627 | break; |
| @@ -3536,6 +3644,7 @@ encode_coding (coding, source, destination, src_bytes, dst_bytes) | |||
| 3536 | { | 3644 | { |
| 3537 | coding->produced = coding->produced_char = 0; | 3645 | coding->produced = coding->produced_char = 0; |
| 3538 | coding->consumed = coding->consumed_char = 0; | 3646 | coding->consumed = coding->consumed_char = 0; |
| 3647 | coding->fake_multibyte = 0; | ||
| 3539 | return CODING_FINISH_NORMAL; | 3648 | return CODING_FINISH_NORMAL; |
| 3540 | } | 3649 | } |
| 3541 | 3650 | ||
| @@ -3592,6 +3701,7 @@ encode_coding (coding, source, destination, src_bytes, dst_bytes) | |||
| 3592 | while (p < pend) | 3701 | while (p < pend) |
| 3593 | if (*p++ == '\015') p[-1] = '\n'; | 3702 | if (*p++ == '\015') p[-1] = '\n'; |
| 3594 | } | 3703 | } |
| 3704 | coding->fake_multibyte = 1; | ||
| 3595 | coding->consumed | 3705 | coding->consumed |
| 3596 | = coding->consumed_char = coding->produced_char = coding->produced; | 3706 | = coding->consumed_char = coding->produced_char = coding->produced; |
| 3597 | break; | 3707 | break; |
| @@ -3600,10 +3710,11 @@ encode_coding (coding, source, destination, src_bytes, dst_bytes) | |||
| 3600 | return result; | 3710 | return result; |
| 3601 | } | 3711 | } |
| 3602 | 3712 | ||
| 3603 | /* Scan text in the region between *BEG and *END, skip characters | 3713 | /* Scan text in the region between *BEG and *END (byte positions), |
| 3604 | which we don't have to decode by coding system CODING at the head | 3714 | skip characters which we don't have to decode by coding system |
| 3605 | and tail, then set *BEG and *END to the region of the text we | 3715 | CODING at the head and tail, then set *BEG and *END to the region |
| 3606 | actually have to convert. | 3716 | of the text we actually have to convert. The caller should move |
| 3717 | the gap out of the region in advance. | ||
| 3607 | 3718 | ||
| 3608 | If STR is not NULL, *BEG and *END are indices into STR. */ | 3719 | If STR is not NULL, *BEG and *END are indices into STR. */ |
| 3609 | 3720 | ||
| @@ -3613,7 +3724,7 @@ shrink_decoding_region (beg, end, coding, str) | |||
| 3613 | struct coding_system *coding; | 3724 | struct coding_system *coding; |
| 3614 | unsigned char *str; | 3725 | unsigned char *str; |
| 3615 | { | 3726 | { |
| 3616 | unsigned char *begp_orig, *begp, *endp_orig, *endp; | 3727 | unsigned char *begp_orig, *begp, *endp_orig, *endp, c; |
| 3617 | int eol_conversion; | 3728 | int eol_conversion; |
| 3618 | 3729 | ||
| 3619 | if (coding->type == coding_type_ccl | 3730 | if (coding->type == coding_type_ccl |
| @@ -3625,8 +3736,8 @@ shrink_decoding_region (beg, end, coding, str) | |||
| 3625 | } | 3736 | } |
| 3626 | else if (coding->type == coding_type_no_conversion) | 3737 | else if (coding->type == coding_type_no_conversion) |
| 3627 | { | 3738 | { |
| 3628 | /* We need no conversion. */ | 3739 | /* We need no conversion, but don't have to skip any data here. |
| 3629 | *beg = *end; | 3740 | Decoding routine handles them effectively anyway. */ |
| 3630 | return; | 3741 | return; |
| 3631 | } | 3742 | } |
| 3632 | 3743 | ||
| @@ -3642,8 +3753,7 @@ shrink_decoding_region (beg, end, coding, str) | |||
| 3642 | } | 3753 | } |
| 3643 | else | 3754 | else |
| 3644 | { | 3755 | { |
| 3645 | move_gap (*beg); | 3756 | begp_orig = begp = BYTE_POS_ADDR (*beg); |
| 3646 | begp_orig = begp = GAP_END_ADDR; | ||
| 3647 | endp_orig = endp = begp + *end - *beg; | 3757 | endp_orig = endp = begp + *end - *beg; |
| 3648 | } | 3758 | } |
| 3649 | 3759 | ||
| @@ -3656,8 +3766,9 @@ shrink_decoding_region (beg, end, coding, str) | |||
| 3656 | if (eol_conversion) | 3766 | if (eol_conversion) |
| 3657 | { | 3767 | { |
| 3658 | if (coding->heading_ascii < 0) | 3768 | if (coding->heading_ascii < 0) |
| 3659 | while (begp < endp && *begp != '\r') begp++; | 3769 | while (begp < endp && *begp != '\r' && *begp < 0x80) begp++; |
| 3660 | while (begp < endp && *(endp - 1) != '\r') endp--; | 3770 | while (begp < endp && *(endp - 1) != '\r' && *(endp - 1) < 0x80) |
| 3771 | endp--; | ||
| 3661 | } | 3772 | } |
| 3662 | else | 3773 | else |
| 3663 | begp = endp; | 3774 | begp = endp; |
| @@ -3686,8 +3797,6 @@ shrink_decoding_region (beg, end, coding, str) | |||
| 3686 | default: /* i.e. case coding_type_iso2022: */ | 3797 | default: /* i.e. case coding_type_iso2022: */ |
| 3687 | if (coding->heading_ascii < 0) | 3798 | if (coding->heading_ascii < 0) |
| 3688 | { | 3799 | { |
| 3689 | unsigned char c; | ||
| 3690 | |||
| 3691 | /* We can skip all ASCII characters at the head except for a | 3800 | /* We can skip all ASCII characters at the head except for a |
| 3692 | few control codes. */ | 3801 | few control codes. */ |
| 3693 | while (begp < endp && (c = *begp) < 0x80 | 3802 | while (begp < endp && (c = *begp) < 0x80 |
| @@ -3702,7 +3811,7 @@ shrink_decoding_region (beg, end, coding, str) | |||
| 3702 | case CODING_CATEGORY_IDX_ISO_8_2: | 3811 | case CODING_CATEGORY_IDX_ISO_8_2: |
| 3703 | /* We can skip all ASCII characters at the tail. */ | 3812 | /* We can skip all ASCII characters at the tail. */ |
| 3704 | if (eol_conversion) | 3813 | if (eol_conversion) |
| 3705 | while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--; | 3814 | while (begp < endp && (c = endp[-1]) < 0x80 && c != '\n') endp--; |
| 3706 | else | 3815 | else |
| 3707 | while (begp < endp && endp[-1] < 0x80) endp--; | 3816 | while (begp < endp && endp[-1] < 0x80) endp--; |
| 3708 | break; | 3817 | break; |
| @@ -3712,10 +3821,12 @@ shrink_decoding_region (beg, end, coding, str) | |||
| 3712 | /* We can skip all charactes at the tail except for ESC and | 3821 | /* We can skip all charactes at the tail except for ESC and |
| 3713 | the following 2-byte at the tail. */ | 3822 | the following 2-byte at the tail. */ |
| 3714 | if (eol_conversion) | 3823 | if (eol_conversion) |
| 3715 | while (begp < endp && endp[-1] != ISO_CODE_ESC && endp[-1] != '\n') | 3824 | while (begp < endp |
| 3825 | && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC && c != '\n') | ||
| 3716 | endp--; | 3826 | endp--; |
| 3717 | else | 3827 | else |
| 3718 | while (begp < endp && endp[-1] != ISO_CODE_ESC) | 3828 | while (begp < endp |
| 3829 | && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC) | ||
| 3719 | endp--; | 3830 | endp--; |
| 3720 | if (begp < endp && endp[-1] == ISO_CODE_ESC) | 3831 | if (begp < endp && endp[-1] == ISO_CODE_ESC) |
| 3721 | { | 3832 | { |
| @@ -3762,8 +3873,7 @@ shrink_encoding_region (beg, end, coding, str) | |||
| 3762 | } | 3873 | } |
| 3763 | else | 3874 | else |
| 3764 | { | 3875 | { |
| 3765 | move_gap (*beg); | 3876 | begp_orig = begp = BYTE_POS_ADDR (*beg); |
| 3766 | begp_orig = begp = GAP_END_ADDR; | ||
| 3767 | endp_orig = endp = begp + *end - *beg; | 3877 | endp_orig = endp = begp + *end - *beg; |
| 3768 | } | 3878 | } |
| 3769 | 3879 | ||
| @@ -3821,8 +3931,13 @@ shrink_encoding_region (beg, end, coding, str) | |||
| 3821 | } | 3931 | } |
| 3822 | 3932 | ||
| 3823 | /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the | 3933 | /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the |
| 3824 | text from FROM to TO by coding system CODING, and return number of | 3934 | text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by |
| 3825 | characters in the resulting text. | 3935 | coding system CODING, and return the status code of code conversion |
| 3936 | (currently, this value has no meaning). | ||
| 3937 | |||
| 3938 | How many characters (and bytes) are converted to how many | ||
| 3939 | characters (and bytes) are recorded in members of the structure | ||
| 3940 | CODING. | ||
| 3826 | 3941 | ||
| 3827 | If ADJUST is nonzero, we do various things as if the original text | 3942 | If ADJUST is nonzero, we do various things as if the original text |
| 3828 | is deleted and a new text is inserted. See the comments in | 3943 | is deleted and a new text is inserted. See the comments in |
| @@ -3832,22 +3947,34 @@ shrink_encoding_region (beg, end, coding, str) | |||
| 3832 | pre-write-conversion functions (if any) should be processed. */ | 3947 | pre-write-conversion functions (if any) should be processed. */ |
| 3833 | 3948 | ||
| 3834 | int | 3949 | int |
| 3835 | code_convert_region (from, to, coding, encodep, adjust) | 3950 | code_convert_region (from, from_byte, to, to_byte, coding, encodep, adjust) |
| 3836 | int from, to, encodep, adjust; | 3951 | int from, from_byte, to, to_byte, encodep, adjust; |
| 3837 | struct coding_system *coding; | 3952 | struct coding_system *coding; |
| 3838 | { | 3953 | { |
| 3839 | int len = to - from, require, inserted, inserted_byte; | 3954 | int len = to - from, len_byte = to_byte - from_byte; |
| 3840 | int from_byte, to_byte, len_byte; | 3955 | int require, inserted, inserted_byte; |
| 3841 | int from_byte_orig, to_byte_orig; | 3956 | int from_byte_orig, to_byte_orig; |
| 3842 | Lisp_Object saved_coding_symbol = Qnil; | 3957 | Lisp_Object saved_coding_symbol = Qnil; |
| 3958 | int multibyte = !NILP (current_buffer->enable_multibyte_characters); | ||
| 3959 | int first = 1; | ||
| 3960 | int fake_multibyte = 0; | ||
| 3961 | unsigned char *src, *dst; | ||
| 3843 | 3962 | ||
| 3844 | if (adjust) | 3963 | if (adjust) |
| 3845 | { | 3964 | { |
| 3965 | int saved_from = from; | ||
| 3966 | |||
| 3846 | prepare_to_modify_buffer (from, to, &from); | 3967 | prepare_to_modify_buffer (from, to, &from); |
| 3847 | to = from + len; | 3968 | if (saved_from != from) |
| 3969 | { | ||
| 3970 | to = from + len; | ||
| 3971 | if (multibyte) | ||
| 3972 | from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to); | ||
| 3973 | else | ||
| 3974 | from_byte = from, to_byte = to; | ||
| 3975 | len_byte = to_byte - from_byte; | ||
| 3976 | } | ||
| 3848 | } | 3977 | } |
| 3849 | from_byte = CHAR_TO_BYTE (from); to_byte = CHAR_TO_BYTE (to); | ||
| 3850 | len_byte = to_byte - from_byte; | ||
| 3851 | 3978 | ||
| 3852 | if (! encodep && CODING_REQUIRE_DETECTION (coding)) | 3979 | if (! encodep && CODING_REQUIRE_DETECTION (coding)) |
| 3853 | { | 3980 | { |
| @@ -3860,7 +3987,7 @@ code_convert_region (from, to, coding, encodep, adjust) | |||
| 3860 | move_gap_both (from, from_byte); | 3987 | move_gap_both (from, from_byte); |
| 3861 | if (coding->type == coding_type_undecided) | 3988 | if (coding->type == coding_type_undecided) |
| 3862 | { | 3989 | { |
| 3863 | detect_coding (coding, BYTE_POS_ADDR (from), len); | 3990 | detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte); |
| 3864 | if (coding->type == coding_type_undecided) | 3991 | if (coding->type == coding_type_undecided) |
| 3865 | coding->type = coding_type_emacs_mule; | 3992 | coding->type = coding_type_emacs_mule; |
| 3866 | } | 3993 | } |
| @@ -3876,10 +4003,35 @@ code_convert_region (from, to, coding, encodep, adjust) | |||
| 3876 | } | 4003 | } |
| 3877 | } | 4004 | } |
| 3878 | 4005 | ||
| 4006 | coding->consumed_char = len, coding->consumed = len_byte; | ||
| 4007 | |||
| 3879 | if (encodep | 4008 | if (encodep |
| 3880 | ? ! CODING_REQUIRE_ENCODING (coding) | 4009 | ? ! CODING_REQUIRE_ENCODING (coding) |
| 3881 | : ! CODING_REQUIRE_DECODING (coding)) | 4010 | : ! CODING_REQUIRE_DECODING (coding)) |
| 3882 | return len; | 4011 | { |
| 4012 | coding->produced = len_byte; | ||
| 4013 | if (multibyte) | ||
| 4014 | { | ||
| 4015 | if (GPT < from || GPT > to) | ||
| 4016 | move_gap_both (from, from_byte); | ||
| 4017 | coding->produced_char | ||
| 4018 | = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte); | ||
| 4019 | if (coding->produced_char != len) | ||
| 4020 | { | ||
| 4021 | int diff = coding->produced_char - len; | ||
| 4022 | |||
| 4023 | if (adjust) | ||
| 4024 | adjust_before_replace (from, from_byte, to, to_byte); | ||
| 4025 | ZV += diff; Z += diff; GPT += diff; | ||
| 4026 | if (adjust) | ||
| 4027 | adjust_after_replace (from, from_byte, to, to_byte, | ||
| 4028 | diff, 0); | ||
| 4029 | } | ||
| 4030 | } | ||
| 4031 | else | ||
| 4032 | coding->produced_char = len_byte; | ||
| 4033 | return 0; | ||
| 4034 | } | ||
| 3883 | 4035 | ||
| 3884 | /* Now we convert the text. */ | 4036 | /* Now we convert the text. */ |
| 3885 | 4037 | ||
| @@ -3900,33 +4052,40 @@ code_convert_region (from, to, coding, encodep, adjust) | |||
| 3900 | len = ZV - BEGV; | 4052 | len = ZV - BEGV; |
| 3901 | new = current_buffer; | 4053 | new = current_buffer; |
| 3902 | set_buffer_internal_1 (prev); | 4054 | set_buffer_internal_1 (prev); |
| 3903 | del_range (from, to); | 4055 | del_range_2 (from, to, from_byte, to_byte); |
| 3904 | insert_from_buffer (new, BEG, len, 0); | 4056 | insert_from_buffer (new, BEG, len, 0); |
| 3905 | to = from + len; | 4057 | to = from + len; |
| 3906 | to_byte = CHAR_TO_BYTE (to); | 4058 | to_byte = multibyte ? CHAR_TO_BYTE (to) : to; |
| 3907 | len_byte = to_byte - from_byte; | 4059 | len_byte = to_byte - from_byte; |
| 3908 | } | 4060 | } |
| 3909 | } | 4061 | } |
| 3910 | 4062 | ||
| 3911 | /* Try to skip the heading and tailing ASCIIs. */ | 4063 | /* Try to skip the heading and tailing ASCIIs. */ |
| 3912 | from_byte_orig = from_byte; to_byte_orig = to_byte; | 4064 | from_byte_orig = from_byte; to_byte_orig = to_byte; |
| 4065 | if (from < GPT && GPT < to) | ||
| 4066 | move_gap (from); | ||
| 3913 | if (encodep) | 4067 | if (encodep) |
| 3914 | shrink_encoding_region (&from_byte, &to_byte, coding, NULL); | 4068 | shrink_encoding_region (&from_byte, &to_byte, coding, NULL); |
| 3915 | else | 4069 | else |
| 3916 | shrink_decoding_region (&from_byte, &to_byte, coding, NULL); | 4070 | shrink_decoding_region (&from_byte, &to_byte, coding, NULL); |
| 3917 | if (from_byte == to_byte) | 4071 | if (from_byte == to_byte) |
| 3918 | return len; | 4072 | { |
| 4073 | coding->produced = len_byte; | ||
| 4074 | coding->produced_char = multibyte ? len : len_byte; | ||
| 4075 | return 0; | ||
| 4076 | } | ||
| 4077 | |||
| 3919 | /* Here, the excluded region by shrinking contains only ASCIIs. */ | 4078 | /* Here, the excluded region by shrinking contains only ASCIIs. */ |
| 3920 | from += (from_byte - from_byte_orig); | 4079 | from += (from_byte - from_byte_orig); |
| 3921 | to += (to_byte - to_byte_orig); | 4080 | to += (to_byte - to_byte_orig); |
| 3922 | len = to - from; | 4081 | len = to - from; |
| 3923 | len_byte = to_byte - from_byte; | 4082 | len_byte = to_byte - from_byte; |
| 3924 | 4083 | ||
| 3925 | /* For converion, we must put the gap before the text to be decoded | 4084 | /* For converion, we must put the gap before the text in addition to |
| 3926 | in addition to make the gap larger for efficient decoding. The | 4085 | making the gap larger for efficient decoding. The required gap |
| 3927 | required gap size starts from 2000 which is the magic number used | 4086 | size starts from 2000 which is the magic number used in make_gap. |
| 3928 | in make_gap. But, after one batch of conversion, it will be | 4087 | But, after one batch of conversion, it will be incremented if we |
| 3929 | incremented if we find that it is not enough . */ | 4088 | find that it is not enough . */ |
| 3930 | require = 2000; | 4089 | require = 2000; |
| 3931 | 4090 | ||
| 3932 | if (GAP_SIZE < require) | 4091 | if (GAP_SIZE < require) |
| @@ -3942,38 +4101,44 @@ code_convert_region (from, to, coding, encodep, adjust) | |||
| 3942 | end_unchanged = Z - GPT; | 4101 | end_unchanged = Z - GPT; |
| 3943 | 4102 | ||
| 3944 | inserted = inserted_byte = 0; | 4103 | inserted = inserted_byte = 0; |
| 4104 | src = GAP_END_ADDR, dst = GPT_ADDR; | ||
| 4105 | |||
| 4106 | GAP_SIZE += len_byte; | ||
| 4107 | ZV -= len; | ||
| 4108 | Z -= len; | ||
| 4109 | ZV_BYTE -= len_byte; | ||
| 4110 | Z_BYTE -= len_byte; | ||
| 4111 | |||
| 3945 | for (;;) | 4112 | for (;;) |
| 3946 | { | 4113 | { |
| 3947 | int result, diff_char, diff_byte; | 4114 | int result; |
| 3948 | 4115 | ||
| 3949 | /* The buffer memory is changed from: | 4116 | /* The buffer memory is changed from: |
| 3950 | +--------+converted-text+------------+-----original-text-----+---+ | 4117 | +--------+converted-text+---------+-------original-text------+---+ |
| 3951 | |<-from->|<--inserted-->|<-GAP_SIZE->|<---------len--------->|---| */ | 4118 | |<-from->|<--inserted-->|---------|<-----------len---------->|---| |
| 3952 | 4119 | |<------------------- GAP_SIZE -------------------->| */ | |
| 3953 | if (encodep) | 4120 | if (encodep) |
| 3954 | result = encode_coding (coding, GAP_END_ADDR, GPT_ADDR, len_byte, 0); | 4121 | result = encode_coding (coding, src, dst, len_byte, 0); |
| 3955 | else | 4122 | else |
| 3956 | result = decode_coding (coding, GAP_END_ADDR, GPT_ADDR, len_byte, 0); | 4123 | result = decode_coding (coding, src, dst, len_byte, 0); |
| 3957 | /* to: | 4124 | /* to: |
| 3958 | +--------+-------converted-text--------+--+---original-text--+---+ | 4125 | +--------+-------converted-text--------+--+---original-text--+---+ |
| 3959 | |<-from->|<----(inserted+produced)---->|--|<-(len-consumed)->|---| */ | 4126 | |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---| |
| 3960 | 4127 | |<------------------- GAP_SIZE -------------------->| */ | |
| 3961 | diff_char = coding->produced_char - coding->consumed_char; | 4128 | if (coding->fake_multibyte) |
| 3962 | diff_byte = coding->produced - coding->consumed; | 4129 | fake_multibyte = 1; |
| 3963 | |||
| 3964 | GAP_SIZE -= diff_byte; | ||
| 3965 | ZV += diff_char; ZV_BYTE += diff_byte; | ||
| 3966 | Z += diff_char; Z_BYTE += diff_byte; | ||
| 3967 | GPT += coding->produced_char; GPT_BYTE += coding->produced; | ||
| 3968 | 4130 | ||
| 4131 | if (!encodep && !multibyte) | ||
| 4132 | coding->produced_char = coding->produced; | ||
| 3969 | inserted += coding->produced_char; | 4133 | inserted += coding->produced_char; |
| 3970 | inserted_byte += coding->produced; | 4134 | inserted_byte += coding->produced; |
| 3971 | len -= coding->consumed_char; | ||
| 3972 | len_byte -= coding->consumed; | 4135 | len_byte -= coding->consumed; |
| 4136 | src += coding->consumed; | ||
| 4137 | dst += inserted_byte; | ||
| 3973 | 4138 | ||
| 3974 | if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL) | 4139 | if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL) |
| 3975 | { | 4140 | { |
| 3976 | unsigned char *p = GPT_ADDR - inserted_byte, *pend = GPT_ADDR; | 4141 | unsigned char *pend = dst, *p = pend - inserted_byte; |
| 3977 | 4142 | ||
| 3978 | /* Encode LFs back to the original eol format (CR or CRLF). */ | 4143 | /* Encode LFs back to the original eol format (CR or CRLF). */ |
| 3979 | if (coding->eol_type == CODING_EOL_CR) | 4144 | if (coding->eol_type == CODING_EOL_CR) |
| @@ -3982,24 +4147,39 @@ code_convert_region (from, to, coding, encodep, adjust) | |||
| 3982 | } | 4147 | } |
| 3983 | else | 4148 | else |
| 3984 | { | 4149 | { |
| 3985 | unsigned char *p2 = p; | ||
| 3986 | int count = 0; | 4150 | int count = 0; |
| 3987 | 4151 | ||
| 3988 | while (p2 < pend) if (*p2++ == '\n') count++; | 4152 | while (p < pend) if (*p++ == '\n') count++; |
| 3989 | if (GAP_SIZE < count) | 4153 | if (src - dst < count) |
| 3990 | make_gap (count - GAP_SIZE); | ||
| 3991 | p2 = GPT_ADDR + count; | ||
| 3992 | while (p < pend) | ||
| 3993 | { | 4154 | { |
| 3994 | *--p2 = *--pend; | 4155 | /* We don't have sufficient room for putting LFs |
| 3995 | if (*pend == '\n') *--p2 = '\r'; | 4156 | back to CRLF. We must record converted and |
| 4157 | not-yet-converted text back to the buffer | ||
| 4158 | content, enlarge the gap, then record them out of | ||
| 4159 | the buffer contents again. */ | ||
| 4160 | int add = len_byte + inserted_byte; | ||
| 4161 | |||
| 4162 | GAP_SIZE -= add; | ||
| 4163 | ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add; | ||
| 4164 | GPT += inserted_byte; GPT_BYTE += inserted_byte; | ||
| 4165 | make_gap (count - GAP_SIZE); | ||
| 4166 | GAP_SIZE += add; | ||
| 4167 | ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add; | ||
| 4168 | GPT -= inserted_byte; GPT_BYTE -= inserted_byte; | ||
| 4169 | /* Don't forget to update SRC, DST, and PEND. */ | ||
| 4170 | src = GAP_END_ADDR - len_byte; | ||
| 4171 | dst = GPT_ADDR + inserted_byte; | ||
| 4172 | pend = dst; | ||
| 3996 | } | 4173 | } |
| 3997 | GPT += count; GAP_SIZE -= count; ZV += count; Z += count; | ||
| 3998 | ZV_BYTE += count; Z_BYTE += count; | ||
| 3999 | coding->produced += count; | ||
| 4000 | coding->produced_char += count; | ||
| 4001 | inserted += count; | 4174 | inserted += count; |
| 4002 | inserted_byte += count; | 4175 | inserted_byte += count; |
| 4176 | coding->produced += count; | ||
| 4177 | p = dst = pend + count; | ||
| 4178 | while (count) | ||
| 4179 | { | ||
| 4180 | *--p = *--pend; | ||
| 4181 | if (*p == '\n') count--, *--p = '\r'; | ||
| 4182 | } | ||
| 4003 | } | 4183 | } |
| 4004 | 4184 | ||
| 4005 | /* Suppress eol-format conversion in the further conversion. */ | 4185 | /* Suppress eol-format conversion in the further conversion. */ |
| @@ -4007,6 +4187,8 @@ code_convert_region (from, to, coding, encodep, adjust) | |||
| 4007 | 4187 | ||
| 4008 | /* Restore the original symbol. */ | 4188 | /* Restore the original symbol. */ |
| 4009 | coding->symbol = saved_coding_symbol; | 4189 | coding->symbol = saved_coding_symbol; |
| 4190 | |||
| 4191 | continue; | ||
| 4010 | } | 4192 | } |
| 4011 | if (len_byte <= 0) | 4193 | if (len_byte <= 0) |
| 4012 | break; | 4194 | break; |
| @@ -4014,26 +4196,56 @@ code_convert_region (from, to, coding, encodep, adjust) | |||
| 4014 | { | 4196 | { |
| 4015 | /* The source text ends in invalid codes. Let's just | 4197 | /* The source text ends in invalid codes. Let's just |
| 4016 | make them valid buffer contents, and finish conversion. */ | 4198 | make them valid buffer contents, and finish conversion. */ |
| 4017 | inserted += len; | 4199 | inserted += len_byte; |
| 4018 | inserted_byte += len_byte; | 4200 | inserted_byte += len_byte; |
| 4201 | while (len_byte--) | ||
| 4202 | *src++ = *dst++; | ||
| 4203 | fake_multibyte = 1; | ||
| 4019 | break; | 4204 | break; |
| 4020 | } | 4205 | } |
| 4021 | if (inserted == coding->produced_char) | 4206 | if (first) |
| 4022 | /* We have just done the first batch of conversion. Let's | 4207 | { |
| 4023 | reconsider the required gap size now. | 4208 | /* We have just done the first batch of conversion which was |
| 4024 | 4209 | stoped because of insufficient gap. Let's reconsider the | |
| 4025 | We have converted CONSUMED bytes into PRODUCED bytes. To | 4210 | required gap size (i.e. SRT - DST) now. |
| 4026 | convert the remaining LEN bytes, we may need REQUIRE bytes | 4211 | |
| 4027 | of gap, where: | 4212 | We have converted ORIG bytes (== coding->consumed) into |
| 4028 | REQUIRE + LEN = (LEN * PRODUCED / CONSUMED) | 4213 | NEW bytes (coding->produced). To convert the remaining |
| 4029 | REQUIRE = LEN * (PRODUCED - CONSUMED) / CONSUMED | 4214 | LEN bytes, we may need REQUIRE bytes of gap, where: |
| 4030 | = LEN * DIFF / CONSUMED | 4215 | REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG) |
| 4031 | Here, we are sure that DIFF is positive. */ | 4216 | REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG |
| 4032 | require = len_byte * diff_byte / coding->consumed; | 4217 | Here, we are sure that NEW >= ORIG. */ |
| 4033 | if (GAP_SIZE < require) | 4218 | require = (len_byte * (coding->produced - coding->consumed) |
| 4034 | make_gap (require - GAP_SIZE); | 4219 | / coding->consumed); |
| 4220 | first = 0; | ||
| 4221 | } | ||
| 4222 | if ((src - dst) < (require + 2000)) | ||
| 4223 | { | ||
| 4224 | /* See the comment above the previous call of make_gap. */ | ||
| 4225 | int add = len_byte + inserted_byte; | ||
| 4226 | |||
| 4227 | GAP_SIZE -= add; | ||
| 4228 | ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add; | ||
| 4229 | GPT += inserted_byte; GPT_BYTE += inserted_byte; | ||
| 4230 | make_gap (require + 2000); | ||
| 4231 | GAP_SIZE += add; | ||
| 4232 | ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add; | ||
| 4233 | GPT -= inserted_byte; GPT_BYTE -= inserted_byte; | ||
| 4234 | /* Don't forget to update SRC, DST. */ | ||
| 4235 | src = GAP_END_ADDR - len_byte; | ||
| 4236 | dst = GPT_ADDR + inserted_byte; | ||
| 4237 | } | ||
| 4035 | } | 4238 | } |
| 4036 | if (GAP_SIZE > 0) *GPT_ADDR = 0; /* Put an anchor. */ | 4239 | if (src - dst > 0) *dst = 0; /* Put an anchor. */ |
| 4240 | |||
| 4241 | if (multibyte && (fake_multibyte || !encodep && (to - from) != (to_byte - from_byte))) | ||
| 4242 | inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte); | ||
| 4243 | |||
| 4244 | /* Update various buffer positions for the new text. */ | ||
| 4245 | GAP_SIZE -= inserted_byte; | ||
| 4246 | ZV += inserted; Z+= inserted; | ||
| 4247 | ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte; | ||
| 4248 | GPT += inserted; GPT_BYTE += inserted_byte; | ||
| 4037 | 4249 | ||
| 4038 | if (adjust) | 4250 | if (adjust) |
| 4039 | { | 4251 | { |
| @@ -4055,9 +4267,18 @@ code_convert_region (from, to, coding, encodep, adjust) | |||
| 4055 | if (pos >= from + orig_inserted) | 4267 | if (pos >= from + orig_inserted) |
| 4056 | temp_set_point (current_buffer, pos + (inserted - orig_inserted)); | 4268 | temp_set_point (current_buffer, pos + (inserted - orig_inserted)); |
| 4057 | } | 4269 | } |
| 4270 | signal_after_change (from, to - from, inserted); | ||
| 4058 | } | 4271 | } |
| 4059 | 4272 | ||
| 4060 | return ((from_byte - from_byte_orig) + inserted + (to_byte_orig - to_byte)); | 4273 | { |
| 4274 | int skip = (to_byte_orig - to_byte) + (from_byte - from_byte_orig); | ||
| 4275 | |||
| 4276 | coding->consumed = to_byte_orig - from_byte_orig; | ||
| 4277 | coding->consumed_char = skip + (to - from); | ||
| 4278 | coding->produced = skip + inserted_byte; | ||
| 4279 | coding->produced_char = skip + inserted; | ||
| 4280 | } | ||
| 4281 | return 0; | ||
| 4061 | } | 4282 | } |
| 4062 | 4283 | ||
| 4063 | Lisp_Object | 4284 | Lisp_Object |
| @@ -4095,7 +4316,7 @@ code_convert_string (str, coding, encodep, nocopy) | |||
| 4095 | insert_from_string (str, 0, 0, to_byte, to_byte, 0); | 4316 | insert_from_string (str, 0, 0, to_byte, to_byte, 0); |
| 4096 | current_buffer->enable_multibyte_characters = Qt; | 4317 | current_buffer->enable_multibyte_characters = Qt; |
| 4097 | } | 4318 | } |
| 4098 | code_convert_region (BEGV, ZV, coding, encodep, 1); | 4319 | code_convert_region (BEGV, BEGV_BYTE, ZV, ZV_BYTE, coding, encodep, 1); |
| 4099 | if (encodep) | 4320 | if (encodep) |
| 4100 | /* We must return the buffer contents as unibyte string. */ | 4321 | /* We must return the buffer contents as unibyte string. */ |
| 4101 | current_buffer->enable_multibyte_characters = Qnil; | 4322 | current_buffer->enable_multibyte_characters = Qnil; |
| @@ -4377,8 +4598,9 @@ code_convert_region1 (start, end, coding_system, encodep) | |||
| 4377 | error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); | 4598 | error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); |
| 4378 | 4599 | ||
| 4379 | coding.mode |= CODING_MODE_LAST_BLOCK; | 4600 | coding.mode |= CODING_MODE_LAST_BLOCK; |
| 4380 | len = code_convert_region (from, to, &coding, encodep, 1); | 4601 | code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to), |
| 4381 | return make_number (len); | 4602 | &coding, encodep, 1); |
| 4603 | return make_number (coding.produced_char); | ||
| 4382 | } | 4604 | } |
| 4383 | 4605 | ||
| 4384 | DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region, | 4606 | DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region, |