diff options
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 809 |
1 files changed, 681 insertions, 128 deletions
diff --git a/src/coding.c b/src/coding.c index 868fb7df0ea..654e39c0e3d 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* Coding system handler (conversion, detection, etc). | 1 | /* Coding system handler (conversion, detection, etc). |
| 2 | Copyright (C) 2001-2013 Free Software Foundation, Inc. | 2 | Copyright (C) 2001-2014 Free Software Foundation, Inc. |
| 3 | Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, | 3 | Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
| 4 | 2005, 2006, 2007, 2008, 2009, 2010, 2011 | 4 | 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
| 5 | National Institute of Advanced Industrial Science and Technology (AIST) | 5 | National Institute of Advanced Industrial Science and Technology (AIST) |
| @@ -286,6 +286,10 @@ encode_coding_XXX (struct coding_system *coding) | |||
| 286 | #include <config.h> | 286 | #include <config.h> |
| 287 | #include <stdio.h> | 287 | #include <stdio.h> |
| 288 | 288 | ||
| 289 | #ifdef HAVE_WCHAR_H | ||
| 290 | #include <wchar.h> | ||
| 291 | #endif /* HAVE_WCHAR_H */ | ||
| 292 | |||
| 289 | #include "lisp.h" | 293 | #include "lisp.h" |
| 290 | #include "character.h" | 294 | #include "character.h" |
| 291 | #include "buffer.h" | 295 | #include "buffer.h" |
| @@ -322,8 +326,7 @@ Lisp_Object Qcall_process, Qcall_process_region; | |||
| 322 | Lisp_Object Qstart_process, Qopen_network_stream; | 326 | Lisp_Object Qstart_process, Qopen_network_stream; |
| 323 | static Lisp_Object Qtarget_idx; | 327 | static Lisp_Object Qtarget_idx; |
| 324 | 328 | ||
| 325 | static Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source; | 329 | static Lisp_Object Qinsufficient_source, Qinvalid_source, Qinterrupted; |
| 326 | static Lisp_Object Qinterrupted, Qinsufficient_memory; | ||
| 327 | 330 | ||
| 328 | /* If a symbol has this property, evaluate the value to define the | 331 | /* If a symbol has this property, evaluate the value to define the |
| 329 | symbol as a coding system. */ | 332 | symbol as a coding system. */ |
| @@ -490,6 +493,8 @@ enum iso_code_class_type | |||
| 490 | 493 | ||
| 491 | #define CODING_ISO_FLAG_USE_OLDJIS 0x10000 | 494 | #define CODING_ISO_FLAG_USE_OLDJIS 0x10000 |
| 492 | 495 | ||
| 496 | #define CODING_ISO_FLAG_LEVEL_4 0x20000 | ||
| 497 | |||
| 493 | #define CODING_ISO_FLAG_FULL_SUPPORT 0x100000 | 498 | #define CODING_ISO_FLAG_FULL_SUPPORT 0x100000 |
| 494 | 499 | ||
| 495 | /* A character to be produced on output if encoding of the original | 500 | /* A character to be produced on output if encoding of the original |
| @@ -646,12 +651,45 @@ static struct coding_system coding_categories[coding_category_max]; | |||
| 646 | #define max(a, b) ((a) > (b) ? (a) : (b)) | 651 | #define max(a, b) ((a) > (b) ? (a) : (b)) |
| 647 | #endif | 652 | #endif |
| 648 | 653 | ||
| 654 | /* Encode a flag that can be nil, something else, or t as -1, 0, 1. */ | ||
| 655 | |||
| 656 | static int | ||
| 657 | encode_inhibit_flag (Lisp_Object flag) | ||
| 658 | { | ||
| 659 | return NILP (flag) ? -1 : EQ (flag, Qt); | ||
| 660 | } | ||
| 661 | |||
| 662 | /* True if the value of ENCODED_FLAG says a flag should be treated as set. | ||
| 663 | 1 means yes, -1 means no, 0 means ask the user variable VAR. */ | ||
| 664 | |||
| 665 | static bool | ||
| 666 | inhibit_flag (int encoded_flag, bool var) | ||
| 667 | { | ||
| 668 | return 0 < encoded_flag + var; | ||
| 669 | } | ||
| 670 | |||
| 649 | #define CODING_GET_INFO(coding, attrs, charset_list) \ | 671 | #define CODING_GET_INFO(coding, attrs, charset_list) \ |
| 650 | do { \ | 672 | do { \ |
| 651 | (attrs) = CODING_ID_ATTRS ((coding)->id); \ | 673 | (attrs) = CODING_ID_ATTRS ((coding)->id); \ |
| 652 | (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \ | 674 | (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \ |
| 653 | } while (0) | 675 | } while (0) |
| 654 | 676 | ||
| 677 | static void | ||
| 678 | CHECK_NATNUM_CAR (Lisp_Object x) | ||
| 679 | { | ||
| 680 | Lisp_Object tmp = XCAR (x); | ||
| 681 | CHECK_NATNUM (tmp); | ||
| 682 | XSETCAR (x, tmp); | ||
| 683 | } | ||
| 684 | |||
| 685 | static void | ||
| 686 | CHECK_NATNUM_CDR (Lisp_Object x) | ||
| 687 | { | ||
| 688 | Lisp_Object tmp = XCDR (x); | ||
| 689 | CHECK_NATNUM (tmp); | ||
| 690 | XSETCDR (x, tmp); | ||
| 691 | } | ||
| 692 | |||
| 655 | 693 | ||
| 656 | /* Safely get one byte from the source text pointed by SRC which ends | 694 | /* Safely get one byte from the source text pointed by SRC which ends |
| 657 | at SRC_END, and set C to that byte. If there are not enough bytes | 695 | at SRC_END, and set C to that byte. If there are not enough bytes |
| @@ -820,18 +858,12 @@ record_conversion_result (struct coding_system *coding, | |||
| 820 | case CODING_RESULT_INSUFFICIENT_SRC: | 858 | case CODING_RESULT_INSUFFICIENT_SRC: |
| 821 | Vlast_code_conversion_error = Qinsufficient_source; | 859 | Vlast_code_conversion_error = Qinsufficient_source; |
| 822 | break; | 860 | break; |
| 823 | case CODING_RESULT_INCONSISTENT_EOL: | ||
| 824 | Vlast_code_conversion_error = Qinconsistent_eol; | ||
| 825 | break; | ||
| 826 | case CODING_RESULT_INVALID_SRC: | 861 | case CODING_RESULT_INVALID_SRC: |
| 827 | Vlast_code_conversion_error = Qinvalid_source; | 862 | Vlast_code_conversion_error = Qinvalid_source; |
| 828 | break; | 863 | break; |
| 829 | case CODING_RESULT_INTERRUPT: | 864 | case CODING_RESULT_INTERRUPT: |
| 830 | Vlast_code_conversion_error = Qinterrupted; | 865 | Vlast_code_conversion_error = Qinterrupted; |
| 831 | break; | 866 | break; |
| 832 | case CODING_RESULT_INSUFFICIENT_MEM: | ||
| 833 | Vlast_code_conversion_error = Qinsufficient_memory; | ||
| 834 | break; | ||
| 835 | case CODING_RESULT_INSUFFICIENT_DST: | 867 | case CODING_RESULT_INSUFFICIENT_DST: |
| 836 | /* Don't record this error in Vlast_code_conversion_error | 868 | /* Don't record this error in Vlast_code_conversion_error |
| 837 | because it happens just temporarily and is resolved when the | 869 | because it happens just temporarily and is resolved when the |
| @@ -1128,6 +1160,14 @@ alloc_destination (struct coding_system *coding, ptrdiff_t nbytes, | |||
| 1128 | *buf++ = id; \ | 1160 | *buf++ = id; \ |
| 1129 | } while (0) | 1161 | } while (0) |
| 1130 | 1162 | ||
| 1163 | |||
| 1164 | /* Bitmasks for coding->eol_seen. */ | ||
| 1165 | |||
| 1166 | #define EOL_SEEN_NONE 0 | ||
| 1167 | #define EOL_SEEN_LF 1 | ||
| 1168 | #define EOL_SEEN_CR 2 | ||
| 1169 | #define EOL_SEEN_CRLF 4 | ||
| 1170 | |||
| 1131 | 1171 | ||
| 1132 | /*** 2. Emacs' internal format (emacs-utf-8) ***/ | 1172 | /*** 2. Emacs' internal format (emacs-utf-8) ***/ |
| 1133 | 1173 | ||
| @@ -1150,6 +1190,9 @@ alloc_destination (struct coding_system *coding, ptrdiff_t nbytes, | |||
| 1150 | #define UTF_8_BOM_2 0xBB | 1190 | #define UTF_8_BOM_2 0xBB |
| 1151 | #define UTF_8_BOM_3 0xBF | 1191 | #define UTF_8_BOM_3 0xBF |
| 1152 | 1192 | ||
| 1193 | /* Unlike the other detect_coding_XXX, this function counts number of | ||
| 1194 | characters and check EOL format. */ | ||
| 1195 | |||
| 1153 | static bool | 1196 | static bool |
| 1154 | detect_coding_utf_8 (struct coding_system *coding, | 1197 | detect_coding_utf_8 (struct coding_system *coding, |
| 1155 | struct coding_detection_info *detect_info) | 1198 | struct coding_detection_info *detect_info) |
| @@ -1159,11 +1202,23 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1159 | bool multibytep = coding->src_multibyte; | 1202 | bool multibytep = coding->src_multibyte; |
| 1160 | ptrdiff_t consumed_chars = 0; | 1203 | ptrdiff_t consumed_chars = 0; |
| 1161 | bool bom_found = 0; | 1204 | bool bom_found = 0; |
| 1162 | bool found = 0; | 1205 | ptrdiff_t nchars = coding->head_ascii; |
| 1206 | int eol_seen = coding->eol_seen; | ||
| 1163 | 1207 | ||
| 1164 | detect_info->checked |= CATEGORY_MASK_UTF_8; | 1208 | detect_info->checked |= CATEGORY_MASK_UTF_8; |
| 1165 | /* A coding system of this category is always ASCII compatible. */ | 1209 | /* A coding system of this category is always ASCII compatible. */ |
| 1166 | src += coding->head_ascii; | 1210 | src += nchars; |
| 1211 | |||
| 1212 | if (src == coding->source /* BOM should be at the head. */ | ||
| 1213 | && src + 3 < src_end /* BOM is 3-byte long. */ | ||
| 1214 | && src[0] == UTF_8_BOM_1 | ||
| 1215 | && src[1] == UTF_8_BOM_2 | ||
| 1216 | && src[2] == UTF_8_BOM_3) | ||
| 1217 | { | ||
| 1218 | bom_found = 1; | ||
| 1219 | src += 3; | ||
| 1220 | nchars++; | ||
| 1221 | } | ||
| 1167 | 1222 | ||
| 1168 | while (1) | 1223 | while (1) |
| 1169 | { | 1224 | { |
| @@ -1172,13 +1227,29 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1172 | src_base = src; | 1227 | src_base = src; |
| 1173 | ONE_MORE_BYTE (c); | 1228 | ONE_MORE_BYTE (c); |
| 1174 | if (c < 0 || UTF_8_1_OCTET_P (c)) | 1229 | if (c < 0 || UTF_8_1_OCTET_P (c)) |
| 1175 | continue; | 1230 | { |
| 1231 | nchars++; | ||
| 1232 | if (c == '\r') | ||
| 1233 | { | ||
| 1234 | if (src < src_end && *src == '\n') | ||
| 1235 | { | ||
| 1236 | eol_seen |= EOL_SEEN_CRLF; | ||
| 1237 | src++; | ||
| 1238 | nchars++; | ||
| 1239 | } | ||
| 1240 | else | ||
| 1241 | eol_seen |= EOL_SEEN_CR; | ||
| 1242 | } | ||
| 1243 | else if (c == '\n') | ||
| 1244 | eol_seen |= EOL_SEEN_LF; | ||
| 1245 | continue; | ||
| 1246 | } | ||
| 1176 | ONE_MORE_BYTE (c1); | 1247 | ONE_MORE_BYTE (c1); |
| 1177 | if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1)) | 1248 | if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1)) |
| 1178 | break; | 1249 | break; |
| 1179 | if (UTF_8_2_OCTET_LEADING_P (c)) | 1250 | if (UTF_8_2_OCTET_LEADING_P (c)) |
| 1180 | { | 1251 | { |
| 1181 | found = 1; | 1252 | nchars++; |
| 1182 | continue; | 1253 | continue; |
| 1183 | } | 1254 | } |
| 1184 | ONE_MORE_BYTE (c2); | 1255 | ONE_MORE_BYTE (c2); |
| @@ -1186,10 +1257,7 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1186 | break; | 1257 | break; |
| 1187 | if (UTF_8_3_OCTET_LEADING_P (c)) | 1258 | if (UTF_8_3_OCTET_LEADING_P (c)) |
| 1188 | { | 1259 | { |
| 1189 | found = 1; | 1260 | nchars++; |
| 1190 | if (src_base == coding->source | ||
| 1191 | && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3) | ||
| 1192 | bom_found = 1; | ||
| 1193 | continue; | 1261 | continue; |
| 1194 | } | 1262 | } |
| 1195 | ONE_MORE_BYTE (c3); | 1263 | ONE_MORE_BYTE (c3); |
| @@ -1197,7 +1265,7 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1197 | break; | 1265 | break; |
| 1198 | if (UTF_8_4_OCTET_LEADING_P (c)) | 1266 | if (UTF_8_4_OCTET_LEADING_P (c)) |
| 1199 | { | 1267 | { |
| 1200 | found = 1; | 1268 | nchars++; |
| 1201 | continue; | 1269 | continue; |
| 1202 | } | 1270 | } |
| 1203 | ONE_MORE_BYTE (c4); | 1271 | ONE_MORE_BYTE (c4); |
| @@ -1205,7 +1273,7 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1205 | break; | 1273 | break; |
| 1206 | if (UTF_8_5_OCTET_LEADING_P (c)) | 1274 | if (UTF_8_5_OCTET_LEADING_P (c)) |
| 1207 | { | 1275 | { |
| 1208 | found = 1; | 1276 | nchars++; |
| 1209 | continue; | 1277 | continue; |
| 1210 | } | 1278 | } |
| 1211 | break; | 1279 | break; |
| @@ -1222,14 +1290,18 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1222 | if (bom_found) | 1290 | if (bom_found) |
| 1223 | { | 1291 | { |
| 1224 | /* The first character 0xFFFE doesn't necessarily mean a BOM. */ | 1292 | /* The first character 0xFFFE doesn't necessarily mean a BOM. */ |
| 1225 | detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG; | 1293 | detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG; |
| 1226 | } | 1294 | } |
| 1227 | else | 1295 | else |
| 1228 | { | 1296 | { |
| 1229 | detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG; | 1297 | detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG; |
| 1230 | if (found) | 1298 | if (nchars < src_end - coding->source) |
| 1231 | detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG; | 1299 | /* The found characters are less than source bytes, which |
| 1300 | means that we found a valid non-ASCII characters. */ | ||
| 1301 | detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_NOSIG; | ||
| 1232 | } | 1302 | } |
| 1303 | coding->detected_utf8_bytes = src_base - coding->source; | ||
| 1304 | coding->detected_utf8_chars = nchars; | ||
| 1233 | return 1; | 1305 | return 1; |
| 1234 | } | 1306 | } |
| 1235 | 1307 | ||
| @@ -1294,6 +1366,45 @@ decode_coding_utf_8 (struct coding_system *coding) | |||
| 1294 | break; | 1366 | break; |
| 1295 | } | 1367 | } |
| 1296 | 1368 | ||
| 1369 | /* In the simple case, rapidly handle ordinary characters */ | ||
| 1370 | if (multibytep && ! eol_dos | ||
| 1371 | && charbuf < charbuf_end - 6 && src < src_end - 6) | ||
| 1372 | { | ||
| 1373 | while (charbuf < charbuf_end - 6 && src < src_end - 6) | ||
| 1374 | { | ||
| 1375 | c1 = *src; | ||
| 1376 | if (c1 & 0x80) | ||
| 1377 | break; | ||
| 1378 | src++; | ||
| 1379 | consumed_chars++; | ||
| 1380 | *charbuf++ = c1; | ||
| 1381 | |||
| 1382 | c1 = *src; | ||
| 1383 | if (c1 & 0x80) | ||
| 1384 | break; | ||
| 1385 | src++; | ||
| 1386 | consumed_chars++; | ||
| 1387 | *charbuf++ = c1; | ||
| 1388 | |||
| 1389 | c1 = *src; | ||
| 1390 | if (c1 & 0x80) | ||
| 1391 | break; | ||
| 1392 | src++; | ||
| 1393 | consumed_chars++; | ||
| 1394 | *charbuf++ = c1; | ||
| 1395 | |||
| 1396 | c1 = *src; | ||
| 1397 | if (c1 & 0x80) | ||
| 1398 | break; | ||
| 1399 | src++; | ||
| 1400 | consumed_chars++; | ||
| 1401 | *charbuf++ = c1; | ||
| 1402 | } | ||
| 1403 | /* If we handled at least one character, restart the main loop. */ | ||
| 1404 | if (src != src_base) | ||
| 1405 | continue; | ||
| 1406 | } | ||
| 1407 | |||
| 1297 | if (byte_after_cr >= 0) | 1408 | if (byte_after_cr >= 0) |
| 1298 | c1 = byte_after_cr, byte_after_cr = -1; | 1409 | c1 = byte_after_cr, byte_after_cr = -1; |
| 1299 | else | 1410 | else |
| @@ -1903,7 +2014,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src, | |||
| 1903 | int charset_ID; | 2014 | int charset_ID; |
| 1904 | unsigned code; | 2015 | unsigned code; |
| 1905 | int c; | 2016 | int c; |
| 1906 | int consumed_chars = 0; | 2017 | ptrdiff_t consumed_chars = 0; |
| 1907 | bool mseq_found = 0; | 2018 | bool mseq_found = 0; |
| 1908 | 2019 | ||
| 1909 | ONE_MORE_BYTE (c); | 2020 | ONE_MORE_BYTE (c); |
| @@ -3080,7 +3191,7 @@ detect_coding_iso_2022 (struct coding_system *coding, | |||
| 3080 | if (! single_shifting | 3191 | if (! single_shifting |
| 3081 | && ! (rejected & CATEGORY_MASK_ISO_8_2)) | 3192 | && ! (rejected & CATEGORY_MASK_ISO_8_2)) |
| 3082 | { | 3193 | { |
| 3083 | int len = 1; | 3194 | ptrdiff_t len = 1; |
| 3084 | while (src < src_end) | 3195 | while (src < src_end) |
| 3085 | { | 3196 | { |
| 3086 | src_base = src; | 3197 | src_base = src; |
| @@ -3664,7 +3775,10 @@ decode_coding_iso_2022 (struct coding_system *coding) | |||
| 3664 | else | 3775 | else |
| 3665 | charset = CHARSET_FROM_ID (charset_id_2); | 3776 | charset = CHARSET_FROM_ID (charset_id_2); |
| 3666 | ONE_MORE_BYTE (c1); | 3777 | ONE_MORE_BYTE (c1); |
| 3667 | if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)) | 3778 | if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0) |
| 3779 | || (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) | ||
| 3780 | && ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LEVEL_4) | ||
| 3781 | ? c1 >= 0x80 : c1 < 0x80))) | ||
| 3668 | goto invalid_code; | 3782 | goto invalid_code; |
| 3669 | break; | 3783 | break; |
| 3670 | 3784 | ||
| @@ -3678,7 +3792,10 @@ decode_coding_iso_2022 (struct coding_system *coding) | |||
| 3678 | else | 3792 | else |
| 3679 | charset = CHARSET_FROM_ID (charset_id_3); | 3793 | charset = CHARSET_FROM_ID (charset_id_3); |
| 3680 | ONE_MORE_BYTE (c1); | 3794 | ONE_MORE_BYTE (c1); |
| 3681 | if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)) | 3795 | if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0) |
| 3796 | || (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) | ||
| 3797 | && ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LEVEL_4) | ||
| 3798 | ? c1 >= 0x80 : c1 < 0x80))) | ||
| 3682 | goto invalid_code; | 3799 | goto invalid_code; |
| 3683 | break; | 3800 | break; |
| 3684 | 3801 | ||
| @@ -3890,6 +4007,14 @@ decode_coding_iso_2022 (struct coding_system *coding) | |||
| 3890 | *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); | 4007 | *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); |
| 3891 | char_offset++; | 4008 | char_offset++; |
| 3892 | coding->errors++; | 4009 | coding->errors++; |
| 4010 | /* Reset the invocation and designation status to the safest | ||
| 4011 | one; i.e. designate ASCII to the graphic register 0, and | ||
| 4012 | invoke that register to the graphic plane 0. This typically | ||
| 4013 | helps the case that an designation sequence for ASCII "ESC ( | ||
| 4014 | B" is somehow broken (e.g. broken by a newline). */ | ||
| 4015 | CODING_ISO_INVOCATION (coding, 0) = 0; | ||
| 4016 | CODING_ISO_DESIGNATION (coding, 0) = charset_ascii; | ||
| 4017 | charset_id_0 = charset_ascii; | ||
| 3893 | continue; | 4018 | continue; |
| 3894 | 4019 | ||
| 3895 | break_loop: | 4020 | break_loop: |
| @@ -4332,7 +4457,7 @@ encode_coding_iso_2022 (struct coding_system *coding) | |||
| 4332 | { | 4457 | { |
| 4333 | /* We have to produce designation sequences if any now. */ | 4458 | /* We have to produce designation sequences if any now. */ |
| 4334 | unsigned char desig_buf[16]; | 4459 | unsigned char desig_buf[16]; |
| 4335 | int nbytes; | 4460 | ptrdiff_t nbytes; |
| 4336 | ptrdiff_t offset; | 4461 | ptrdiff_t offset; |
| 4337 | 4462 | ||
| 4338 | charset_map_loaded = 0; | 4463 | charset_map_loaded = 0; |
| @@ -5075,7 +5200,7 @@ decode_coding_ccl (struct coding_system *coding) | |||
| 5075 | source_charbuf[i++] = *p++; | 5200 | source_charbuf[i++] = *p++; |
| 5076 | 5201 | ||
| 5077 | if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK) | 5202 | if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK) |
| 5078 | ccl->last_block = 1; | 5203 | ccl->last_block = true; |
| 5079 | /* As ccl_driver calls DECODE_CHAR, buffer may be relocated. */ | 5204 | /* As ccl_driver calls DECODE_CHAR, buffer may be relocated. */ |
| 5080 | charset_map_loaded = 0; | 5205 | charset_map_loaded = 0; |
| 5081 | ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf, | 5206 | ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf, |
| @@ -5135,7 +5260,7 @@ encode_coding_ccl (struct coding_system *coding) | |||
| 5135 | CODING_GET_INFO (coding, attrs, charset_list); | 5260 | CODING_GET_INFO (coding, attrs, charset_list); |
| 5136 | if (coding->consumed_char == coding->src_chars | 5261 | if (coding->consumed_char == coding->src_chars |
| 5137 | && coding->mode & CODING_MODE_LAST_BLOCK) | 5262 | && coding->mode & CODING_MODE_LAST_BLOCK) |
| 5138 | ccl->last_block = 1; | 5263 | ccl->last_block = true; |
| 5139 | 5264 | ||
| 5140 | do | 5265 | do |
| 5141 | { | 5266 | { |
| @@ -5617,7 +5742,6 @@ setup_coding_system (Lisp_Object coding_system, struct coding_system *coding) | |||
| 5617 | eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id); | 5742 | eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id); |
| 5618 | 5743 | ||
| 5619 | coding->mode = 0; | 5744 | coding->mode = 0; |
| 5620 | coding->head_ascii = -1; | ||
| 5621 | if (VECTORP (eol_type)) | 5745 | if (VECTORP (eol_type)) |
| 5622 | coding->common_flags = (CODING_REQUIRE_DECODING_MASK | 5746 | coding->common_flags = (CODING_REQUIRE_DECODING_MASK |
| 5623 | | CODING_REQUIRE_DETECTION_MASK); | 5747 | | CODING_REQUIRE_DETECTION_MASK); |
| @@ -5638,6 +5762,7 @@ setup_coding_system (Lisp_Object coding_system, struct coding_system *coding) | |||
| 5638 | coding->safe_charsets = SDATA (val); | 5762 | coding->safe_charsets = SDATA (val); |
| 5639 | coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs)); | 5763 | coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs)); |
| 5640 | coding->carryover_bytes = 0; | 5764 | coding->carryover_bytes = 0; |
| 5765 | coding->raw_destination = 0; | ||
| 5641 | 5766 | ||
| 5642 | coding_type = CODING_ATTR_TYPE (attrs); | 5767 | coding_type = CODING_ATTR_TYPE (attrs); |
| 5643 | if (EQ (coding_type, Qundecided)) | 5768 | if (EQ (coding_type, Qundecided)) |
| @@ -5646,6 +5771,14 @@ setup_coding_system (Lisp_Object coding_system, struct coding_system *coding) | |||
| 5646 | coding->decoder = decode_coding_raw_text; | 5771 | coding->decoder = decode_coding_raw_text; |
| 5647 | coding->encoder = encode_coding_raw_text; | 5772 | coding->encoder = encode_coding_raw_text; |
| 5648 | coding->common_flags |= CODING_REQUIRE_DETECTION_MASK; | 5773 | coding->common_flags |= CODING_REQUIRE_DETECTION_MASK; |
| 5774 | coding->spec.undecided.inhibit_nbd | ||
| 5775 | = (encode_inhibit_flag | ||
| 5776 | (AREF (attrs, coding_attr_undecided_inhibit_null_byte_detection))); | ||
| 5777 | coding->spec.undecided.inhibit_ied | ||
| 5778 | = (encode_inhibit_flag | ||
| 5779 | (AREF (attrs, coding_attr_undecided_inhibit_iso_escape_detection))); | ||
| 5780 | coding->spec.undecided.prefer_utf_8 | ||
| 5781 | = ! NILP (AREF (attrs, coding_attr_undecided_prefer_utf_8)); | ||
| 5649 | } | 5782 | } |
| 5650 | else if (EQ (coding_type, Qiso_2022)) | 5783 | else if (EQ (coding_type, Qiso_2022)) |
| 5651 | { | 5784 | { |
| @@ -6069,10 +6202,181 @@ complement_process_encoding_system (Lisp_Object coding_system) | |||
| 6069 | 6202 | ||
| 6070 | */ | 6203 | */ |
| 6071 | 6204 | ||
| 6072 | #define EOL_SEEN_NONE 0 | 6205 | static Lisp_Object adjust_coding_eol_type (struct coding_system *coding, |
| 6073 | #define EOL_SEEN_LF 1 | 6206 | int eol_seen); |
| 6074 | #define EOL_SEEN_CR 2 | 6207 | |
| 6075 | #define EOL_SEEN_CRLF 4 | 6208 | |
| 6209 | /* Return the number of ASCII characters at the head of the source. | ||
| 6210 | By side effects, set coding->head_ascii and update | ||
| 6211 | coding->eol_seen. The value of coding->eol_seen is "logical or" of | ||
| 6212 | EOL_SEEN_LF, EOL_SEEN_CR, and EOL_SEEN_CRLF, but the value is | ||
| 6213 | reliable only when all the source bytes are ASCII. */ | ||
| 6214 | |||
| 6215 | static ptrdiff_t | ||
| 6216 | check_ascii (struct coding_system *coding) | ||
| 6217 | { | ||
| 6218 | const unsigned char *src, *end; | ||
| 6219 | Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id); | ||
| 6220 | int eol_seen = coding->eol_seen; | ||
| 6221 | |||
| 6222 | coding_set_source (coding); | ||
| 6223 | src = coding->source; | ||
| 6224 | end = src + coding->src_bytes; | ||
| 6225 | |||
| 6226 | if (inhibit_eol_conversion | ||
| 6227 | || SYMBOLP (eol_type)) | ||
| 6228 | { | ||
| 6229 | /* We don't have to check EOL format. */ | ||
| 6230 | while (src < end && !( *src & 0x80)) | ||
| 6231 | { | ||
| 6232 | if (*src++ == '\n') | ||
| 6233 | eol_seen |= EOL_SEEN_LF; | ||
| 6234 | } | ||
| 6235 | } | ||
| 6236 | else | ||
| 6237 | { | ||
| 6238 | end--; /* We look ahead one byte for "CR LF". */ | ||
| 6239 | while (src < end) | ||
| 6240 | { | ||
| 6241 | int c = *src; | ||
| 6242 | |||
| 6243 | if (c & 0x80) | ||
| 6244 | break; | ||
| 6245 | src++; | ||
| 6246 | if (c == '\r') | ||
| 6247 | { | ||
| 6248 | if (*src == '\n') | ||
| 6249 | { | ||
| 6250 | eol_seen |= EOL_SEEN_CRLF; | ||
| 6251 | src++; | ||
| 6252 | } | ||
| 6253 | else | ||
| 6254 | eol_seen |= EOL_SEEN_CR; | ||
| 6255 | } | ||
| 6256 | else if (c == '\n') | ||
| 6257 | eol_seen |= EOL_SEEN_LF; | ||
| 6258 | } | ||
| 6259 | if (src == end) | ||
| 6260 | { | ||
| 6261 | int c = *src; | ||
| 6262 | |||
| 6263 | /* All bytes but the last one C are ASCII. */ | ||
| 6264 | if (! (c & 0x80)) | ||
| 6265 | { | ||
| 6266 | if (c == '\r') | ||
| 6267 | eol_seen |= EOL_SEEN_CR; | ||
| 6268 | else if (c == '\n') | ||
| 6269 | eol_seen |= EOL_SEEN_LF; | ||
| 6270 | src++; | ||
| 6271 | } | ||
| 6272 | } | ||
| 6273 | } | ||
| 6274 | coding->head_ascii = src - coding->source; | ||
| 6275 | coding->eol_seen = eol_seen; | ||
| 6276 | return (coding->head_ascii); | ||
| 6277 | } | ||
| 6278 | |||
| 6279 | |||
| 6280 | /* Return the number of characters at the source if all the bytes are | ||
| 6281 | valid UTF-8 (of Unicode range). Otherwise, return -1. By side | ||
| 6282 | effects, update coding->eol_seen. The value of coding->eol_seen is | ||
| 6283 | "logical or" of EOL_SEEN_LF, EOL_SEEN_CR, and EOL_SEEN_CRLF, but | ||
| 6284 | the value is reliable only when all the source bytes are valid | ||
| 6285 | UTF-8. */ | ||
| 6286 | |||
| 6287 | static ptrdiff_t | ||
| 6288 | check_utf_8 (struct coding_system *coding) | ||
| 6289 | { | ||
| 6290 | const unsigned char *src, *end; | ||
| 6291 | int eol_seen; | ||
| 6292 | ptrdiff_t nchars = coding->head_ascii; | ||
| 6293 | |||
| 6294 | if (coding->head_ascii < 0) | ||
| 6295 | check_ascii (coding); | ||
| 6296 | else | ||
| 6297 | coding_set_source (coding); | ||
| 6298 | src = coding->source + coding->head_ascii; | ||
| 6299 | /* We look ahead one byte for CR LF. */ | ||
| 6300 | end = coding->source + coding->src_bytes - 1; | ||
| 6301 | eol_seen = coding->eol_seen; | ||
| 6302 | while (src < end) | ||
| 6303 | { | ||
| 6304 | int c = *src; | ||
| 6305 | |||
| 6306 | if (UTF_8_1_OCTET_P (*src)) | ||
| 6307 | { | ||
| 6308 | src++; | ||
| 6309 | if (c < 0x20) | ||
| 6310 | { | ||
| 6311 | if (c == '\r') | ||
| 6312 | { | ||
| 6313 | if (*src == '\n') | ||
| 6314 | { | ||
| 6315 | eol_seen |= EOL_SEEN_CRLF; | ||
| 6316 | src++; | ||
| 6317 | nchars++; | ||
| 6318 | } | ||
| 6319 | else | ||
| 6320 | eol_seen |= EOL_SEEN_CR; | ||
| 6321 | } | ||
| 6322 | else if (c == '\n') | ||
| 6323 | eol_seen |= EOL_SEEN_LF; | ||
| 6324 | } | ||
| 6325 | } | ||
| 6326 | else if (UTF_8_2_OCTET_LEADING_P (c)) | ||
| 6327 | { | ||
| 6328 | if (c < 0xC2 /* overlong sequence */ | ||
| 6329 | || src + 1 >= end | ||
| 6330 | || ! UTF_8_EXTRA_OCTET_P (src[1])) | ||
| 6331 | return -1; | ||
| 6332 | src += 2; | ||
| 6333 | } | ||
| 6334 | else if (UTF_8_3_OCTET_LEADING_P (c)) | ||
| 6335 | { | ||
| 6336 | if (src + 2 >= end | ||
| 6337 | || ! (UTF_8_EXTRA_OCTET_P (src[1]) | ||
| 6338 | && UTF_8_EXTRA_OCTET_P (src[2]))) | ||
| 6339 | return -1; | ||
| 6340 | c = (((c & 0xF) << 12) | ||
| 6341 | | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F)); | ||
| 6342 | if (c < 0x800 /* overlong sequence */ | ||
| 6343 | || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */ | ||
| 6344 | return -1; | ||
| 6345 | src += 3; | ||
| 6346 | } | ||
| 6347 | else if (UTF_8_4_OCTET_LEADING_P (c)) | ||
| 6348 | { | ||
| 6349 | if (src + 3 >= end | ||
| 6350 | || ! (UTF_8_EXTRA_OCTET_P (src[1]) | ||
| 6351 | && UTF_8_EXTRA_OCTET_P (src[2]) | ||
| 6352 | && UTF_8_EXTRA_OCTET_P (src[3]))) | ||
| 6353 | return -1; | ||
| 6354 | c = (((c & 0x7) << 18) | ((src[1] & 0x3F) << 12) | ||
| 6355 | | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)); | ||
| 6356 | if (c < 0x10000 /* overlong sequence */ | ||
| 6357 | || c >= 0x110000) /* non-Unicode character */ | ||
| 6358 | return -1; | ||
| 6359 | src += 4; | ||
| 6360 | } | ||
| 6361 | else | ||
| 6362 | return -1; | ||
| 6363 | nchars++; | ||
| 6364 | } | ||
| 6365 | |||
| 6366 | if (src == end) | ||
| 6367 | { | ||
| 6368 | if (! UTF_8_1_OCTET_P (*src)) | ||
| 6369 | return -1; | ||
| 6370 | nchars++; | ||
| 6371 | if (*src == '\r') | ||
| 6372 | eol_seen |= EOL_SEEN_CR; | ||
| 6373 | else if (*src == '\n') | ||
| 6374 | eol_seen |= EOL_SEEN_LF; | ||
| 6375 | } | ||
| 6376 | coding->eol_seen = eol_seen; | ||
| 6377 | return nchars; | ||
| 6378 | } | ||
| 6379 | |||
| 6076 | 6380 | ||
| 6077 | /* Detect how end-of-line of a text of length SRC_BYTES pointed by | 6381 | /* Detect how end-of-line of a text of length SRC_BYTES pointed by |
| 6078 | SOURCE is encoded. If CATEGORY is one of | 6382 | SOURCE is encoded. If CATEGORY is one of |
| @@ -6185,6 +6489,9 @@ adjust_coding_eol_type (struct coding_system *coding, int eol_seen) | |||
| 6185 | Lisp_Object eol_type; | 6489 | Lisp_Object eol_type; |
| 6186 | 6490 | ||
| 6187 | eol_type = CODING_ID_EOL_TYPE (coding->id); | 6491 | eol_type = CODING_ID_EOL_TYPE (coding->id); |
| 6492 | if (! VECTORP (eol_type)) | ||
| 6493 | /* Already adjusted. */ | ||
| 6494 | return eol_type; | ||
| 6188 | if (eol_seen & EOL_SEEN_LF) | 6495 | if (eol_seen & EOL_SEEN_LF) |
| 6189 | { | 6496 | { |
| 6190 | coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0)); | 6497 | coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0)); |
| @@ -6212,14 +6519,16 @@ detect_coding (struct coding_system *coding) | |||
| 6212 | { | 6519 | { |
| 6213 | const unsigned char *src, *src_end; | 6520 | const unsigned char *src, *src_end; |
| 6214 | unsigned int saved_mode = coding->mode; | 6521 | unsigned int saved_mode = coding->mode; |
| 6522 | Lisp_Object found = Qnil; | ||
| 6523 | Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id); | ||
| 6215 | 6524 | ||
| 6216 | coding->consumed = coding->consumed_char = 0; | 6525 | coding->consumed = coding->consumed_char = 0; |
| 6217 | coding->produced = coding->produced_char = 0; | 6526 | coding->produced = coding->produced_char = 0; |
| 6218 | coding_set_source (coding); | 6527 | coding_set_source (coding); |
| 6219 | 6528 | ||
| 6220 | src_end = coding->source + coding->src_bytes; | 6529 | src_end = coding->source + coding->src_bytes; |
| 6221 | coding->head_ascii = 0; | ||
| 6222 | 6530 | ||
| 6531 | coding->eol_seen = EOL_SEEN_NONE; | ||
| 6223 | /* If we have not yet decided the text encoding type, detect it | 6532 | /* If we have not yet decided the text encoding type, detect it |
| 6224 | now. */ | 6533 | now. */ |
| 6225 | if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided)) | 6534 | if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided)) |
| @@ -6227,7 +6536,13 @@ detect_coding (struct coding_system *coding) | |||
| 6227 | int c, i; | 6536 | int c, i; |
| 6228 | struct coding_detection_info detect_info; | 6537 | struct coding_detection_info detect_info; |
| 6229 | bool null_byte_found = 0, eight_bit_found = 0; | 6538 | bool null_byte_found = 0, eight_bit_found = 0; |
| 6539 | bool inhibit_nbd = inhibit_flag (coding->spec.undecided.inhibit_nbd, | ||
| 6540 | inhibit_null_byte_detection); | ||
| 6541 | bool inhibit_ied = inhibit_flag (coding->spec.undecided.inhibit_ied, | ||
| 6542 | inhibit_iso_escape_detection); | ||
| 6543 | bool prefer_utf_8 = coding->spec.undecided.prefer_utf_8; | ||
| 6230 | 6544 | ||
| 6545 | coding->head_ascii = 0; | ||
| 6231 | detect_info.checked = detect_info.found = detect_info.rejected = 0; | 6546 | detect_info.checked = detect_info.found = detect_info.rejected = 0; |
| 6232 | for (src = coding->source; src < src_end; src++) | 6547 | for (src = coding->source; src < src_end; src++) |
| 6233 | { | 6548 | { |
| @@ -6241,7 +6556,7 @@ detect_coding (struct coding_system *coding) | |||
| 6241 | else if (c < 0x20) | 6556 | else if (c < 0x20) |
| 6242 | { | 6557 | { |
| 6243 | if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) | 6558 | if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) |
| 6244 | && ! inhibit_iso_escape_detection | 6559 | && ! inhibit_ied |
| 6245 | && ! detect_info.checked) | 6560 | && ! detect_info.checked) |
| 6246 | { | 6561 | { |
| 6247 | if (detect_coding_iso_2022 (coding, &detect_info)) | 6562 | if (detect_coding_iso_2022 (coding, &detect_info)) |
| @@ -6260,12 +6575,33 @@ detect_coding (struct coding_system *coding) | |||
| 6260 | break; | 6575 | break; |
| 6261 | } | 6576 | } |
| 6262 | } | 6577 | } |
| 6263 | else if (! c && !inhibit_null_byte_detection) | 6578 | else if (! c && !inhibit_nbd) |
| 6264 | { | 6579 | { |
| 6265 | null_byte_found = 1; | 6580 | null_byte_found = 1; |
| 6266 | if (eight_bit_found) | 6581 | if (eight_bit_found) |
| 6267 | break; | 6582 | break; |
| 6268 | } | 6583 | } |
| 6584 | else if (! disable_ascii_optimization | ||
| 6585 | && ! inhibit_eol_conversion) | ||
| 6586 | { | ||
| 6587 | if (c == '\r') | ||
| 6588 | { | ||
| 6589 | if (src < src_end && src[1] == '\n') | ||
| 6590 | { | ||
| 6591 | coding->eol_seen |= EOL_SEEN_CRLF; | ||
| 6592 | src++; | ||
| 6593 | if (! eight_bit_found) | ||
| 6594 | coding->head_ascii++; | ||
| 6595 | } | ||
| 6596 | else | ||
| 6597 | coding->eol_seen |= EOL_SEEN_CR; | ||
| 6598 | } | ||
| 6599 | else if (c == '\n') | ||
| 6600 | { | ||
| 6601 | coding->eol_seen |= EOL_SEEN_LF; | ||
| 6602 | } | ||
| 6603 | } | ||
| 6604 | |||
| 6269 | if (! eight_bit_found) | 6605 | if (! eight_bit_found) |
| 6270 | coding->head_ascii++; | 6606 | coding->head_ascii++; |
| 6271 | } | 6607 | } |
| @@ -6296,6 +6632,12 @@ detect_coding (struct coding_system *coding) | |||
| 6296 | detect_info.checked |= ~CATEGORY_MASK_UTF_16; | 6632 | detect_info.checked |= ~CATEGORY_MASK_UTF_16; |
| 6297 | detect_info.rejected |= ~CATEGORY_MASK_UTF_16; | 6633 | detect_info.rejected |= ~CATEGORY_MASK_UTF_16; |
| 6298 | } | 6634 | } |
| 6635 | else if (prefer_utf_8 | ||
| 6636 | && detect_coding_utf_8 (coding, &detect_info)) | ||
| 6637 | { | ||
| 6638 | detect_info.checked |= ~CATEGORY_MASK_UTF_8; | ||
| 6639 | detect_info.rejected |= ~CATEGORY_MASK_UTF_8; | ||
| 6640 | } | ||
| 6299 | for (i = 0; i < coding_category_raw_text; i++) | 6641 | for (i = 0; i < coding_category_raw_text; i++) |
| 6300 | { | 6642 | { |
| 6301 | category = coding_priorities[i]; | 6643 | category = coding_priorities[i]; |
| @@ -6317,32 +6659,58 @@ detect_coding (struct coding_system *coding) | |||
| 6317 | } | 6659 | } |
| 6318 | else if ((*(this->detector)) (coding, &detect_info) | 6660 | else if ((*(this->detector)) (coding, &detect_info) |
| 6319 | && detect_info.found & (1 << category)) | 6661 | && detect_info.found & (1 << category)) |
| 6320 | { | 6662 | break; |
| 6321 | if (category == coding_category_utf_16_auto) | ||
| 6322 | { | ||
| 6323 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) | ||
| 6324 | category = coding_category_utf_16_le; | ||
| 6325 | else | ||
| 6326 | category = coding_category_utf_16_be; | ||
| 6327 | } | ||
| 6328 | break; | ||
| 6329 | } | ||
| 6330 | } | 6663 | } |
| 6331 | } | 6664 | } |
| 6332 | 6665 | ||
| 6333 | if (i < coding_category_raw_text) | 6666 | if (i < coding_category_raw_text) |
| 6334 | setup_coding_system (CODING_ID_NAME (this->id), coding); | 6667 | { |
| 6668 | if (category == coding_category_utf_8_auto) | ||
| 6669 | { | ||
| 6670 | Lisp_Object coding_systems; | ||
| 6671 | |||
| 6672 | coding_systems = AREF (CODING_ID_ATTRS (this->id), | ||
| 6673 | coding_attr_utf_bom); | ||
| 6674 | if (CONSP (coding_systems)) | ||
| 6675 | { | ||
| 6676 | if (detect_info.found & CATEGORY_MASK_UTF_8_SIG) | ||
| 6677 | found = XCAR (coding_systems); | ||
| 6678 | else | ||
| 6679 | found = XCDR (coding_systems); | ||
| 6680 | } | ||
| 6681 | else | ||
| 6682 | found = CODING_ID_NAME (this->id); | ||
| 6683 | } | ||
| 6684 | else if (category == coding_category_utf_16_auto) | ||
| 6685 | { | ||
| 6686 | Lisp_Object coding_systems; | ||
| 6687 | |||
| 6688 | coding_systems = AREF (CODING_ID_ATTRS (this->id), | ||
| 6689 | coding_attr_utf_bom); | ||
| 6690 | if (CONSP (coding_systems)) | ||
| 6691 | { | ||
| 6692 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) | ||
| 6693 | found = XCAR (coding_systems); | ||
| 6694 | else if (detect_info.found & CATEGORY_MASK_UTF_16_BE) | ||
| 6695 | found = XCDR (coding_systems); | ||
| 6696 | } | ||
| 6697 | else | ||
| 6698 | found = CODING_ID_NAME (this->id); | ||
| 6699 | } | ||
| 6700 | else | ||
| 6701 | found = CODING_ID_NAME (this->id); | ||
| 6702 | } | ||
| 6335 | else if (null_byte_found) | 6703 | else if (null_byte_found) |
| 6336 | setup_coding_system (Qno_conversion, coding); | 6704 | found = Qno_conversion; |
| 6337 | else if ((detect_info.rejected & CATEGORY_MASK_ANY) | 6705 | else if ((detect_info.rejected & CATEGORY_MASK_ANY) |
| 6338 | == CATEGORY_MASK_ANY) | 6706 | == CATEGORY_MASK_ANY) |
| 6339 | setup_coding_system (Qraw_text, coding); | 6707 | found = Qraw_text; |
| 6340 | else if (detect_info.rejected) | 6708 | else if (detect_info.rejected) |
| 6341 | for (i = 0; i < coding_category_raw_text; i++) | 6709 | for (i = 0; i < coding_category_raw_text; i++) |
| 6342 | if (! (detect_info.rejected & (1 << coding_priorities[i]))) | 6710 | if (! (detect_info.rejected & (1 << coding_priorities[i]))) |
| 6343 | { | 6711 | { |
| 6344 | this = coding_categories + coding_priorities[i]; | 6712 | this = coding_categories + coding_priorities[i]; |
| 6345 | setup_coding_system (CODING_ID_NAME (this->id), coding); | 6713 | found = CODING_ID_NAME (this->id); |
| 6346 | break; | 6714 | break; |
| 6347 | } | 6715 | } |
| 6348 | } | 6716 | } |
| @@ -6356,14 +6724,21 @@ detect_coding (struct coding_system *coding) | |||
| 6356 | coding_systems | 6724 | coding_systems |
| 6357 | = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom); | 6725 | = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom); |
| 6358 | detect_info.found = detect_info.rejected = 0; | 6726 | detect_info.found = detect_info.rejected = 0; |
| 6359 | coding->head_ascii = 0; | 6727 | if (check_ascii (coding) == coding->src_bytes) |
| 6360 | if (CONSP (coding_systems) | ||
| 6361 | && detect_coding_utf_8 (coding, &detect_info)) | ||
| 6362 | { | 6728 | { |
| 6363 | if (detect_info.found & CATEGORY_MASK_UTF_8_SIG) | 6729 | if (CONSP (coding_systems)) |
| 6364 | setup_coding_system (XCAR (coding_systems), coding); | 6730 | found = XCDR (coding_systems); |
| 6365 | else | 6731 | } |
| 6366 | setup_coding_system (XCDR (coding_systems), coding); | 6732 | else |
| 6733 | { | ||
| 6734 | if (CONSP (coding_systems) | ||
| 6735 | && detect_coding_utf_8 (coding, &detect_info)) | ||
| 6736 | { | ||
| 6737 | if (detect_info.found & CATEGORY_MASK_UTF_8_SIG) | ||
| 6738 | found = XCAR (coding_systems); | ||
| 6739 | else | ||
| 6740 | found = XCDR (coding_systems); | ||
| 6741 | } | ||
| 6367 | } | 6742 | } |
| 6368 | } | 6743 | } |
| 6369 | else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id))) | 6744 | else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id))) |
| @@ -6380,11 +6755,24 @@ detect_coding (struct coding_system *coding) | |||
| 6380 | && detect_coding_utf_16 (coding, &detect_info)) | 6755 | && detect_coding_utf_16 (coding, &detect_info)) |
| 6381 | { | 6756 | { |
| 6382 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) | 6757 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) |
| 6383 | setup_coding_system (XCAR (coding_systems), coding); | 6758 | found = XCAR (coding_systems); |
| 6384 | else if (detect_info.found & CATEGORY_MASK_UTF_16_BE) | 6759 | else if (detect_info.found & CATEGORY_MASK_UTF_16_BE) |
| 6385 | setup_coding_system (XCDR (coding_systems), coding); | 6760 | found = XCDR (coding_systems); |
| 6386 | } | 6761 | } |
| 6387 | } | 6762 | } |
| 6763 | |||
| 6764 | if (! NILP (found)) | ||
| 6765 | { | ||
| 6766 | int specified_eol = (VECTORP (eol_type) ? EOL_SEEN_NONE | ||
| 6767 | : EQ (eol_type, Qdos) ? EOL_SEEN_CRLF | ||
| 6768 | : EQ (eol_type, Qmac) ? EOL_SEEN_CR | ||
| 6769 | : EOL_SEEN_LF); | ||
| 6770 | |||
| 6771 | setup_coding_system (found, coding); | ||
| 6772 | if (specified_eol != EOL_SEEN_NONE) | ||
| 6773 | adjust_coding_eol_type (coding, specified_eol); | ||
| 6774 | } | ||
| 6775 | |||
| 6388 | coding->mode = saved_mode; | 6776 | coding->mode = saved_mode; |
| 6389 | } | 6777 | } |
| 6390 | 6778 | ||
| @@ -6525,11 +6913,9 @@ get_translation_table (Lisp_Object attrs, bool encodep, int *max_lookup) | |||
| 6525 | if (CHAR_TABLE_P (standard)) | 6913 | if (CHAR_TABLE_P (standard)) |
| 6526 | { | 6914 | { |
| 6527 | if (CONSP (translation_table)) | 6915 | if (CONSP (translation_table)) |
| 6528 | translation_table = nconc2 (translation_table, | 6916 | translation_table = nconc2 (translation_table, list1 (standard)); |
| 6529 | Fcons (standard, Qnil)); | ||
| 6530 | else | 6917 | else |
| 6531 | translation_table = Fcons (translation_table, | 6918 | translation_table = list2 (translation_table, standard); |
| 6532 | Fcons (standard, Qnil)); | ||
| 6533 | } | 6919 | } |
| 6534 | } | 6920 | } |
| 6535 | 6921 | ||
| @@ -6813,7 +7199,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table, | |||
| 6813 | 7199 | ||
| 6814 | produced = dst - (coding->destination + coding->produced); | 7200 | produced = dst - (coding->destination + coding->produced); |
| 6815 | if (BUFFERP (coding->dst_object) && produced_chars > 0) | 7201 | if (BUFFERP (coding->dst_object) && produced_chars > 0) |
| 6816 | insert_from_gap (produced_chars, produced); | 7202 | insert_from_gap (produced_chars, produced, 0); |
| 6817 | coding->produced += produced; | 7203 | coding->produced += produced; |
| 6818 | coding->produced_char += produced_chars; | 7204 | coding->produced_char += produced_chars; |
| 6819 | return carryover; | 7205 | return carryover; |
| @@ -6884,22 +7270,8 @@ produce_charset (struct coding_system *coding, int *charbuf, ptrdiff_t pos) | |||
| 6884 | 7270 | ||
| 6885 | #define ALLOC_CONVERSION_WORK_AREA(coding) \ | 7271 | #define ALLOC_CONVERSION_WORK_AREA(coding) \ |
| 6886 | do { \ | 7272 | do { \ |
| 6887 | int size = CHARBUF_SIZE; \ | 7273 | coding->charbuf = SAFE_ALLOCA (CHARBUF_SIZE * sizeof (int)); \ |
| 6888 | \ | 7274 | coding->charbuf_size = CHARBUF_SIZE; \ |
| 6889 | coding->charbuf = NULL; \ | ||
| 6890 | while (size > 1024) \ | ||
| 6891 | { \ | ||
| 6892 | coding->charbuf = alloca (sizeof (int) * size); \ | ||
| 6893 | if (coding->charbuf) \ | ||
| 6894 | break; \ | ||
| 6895 | size >>= 1; \ | ||
| 6896 | } \ | ||
| 6897 | if (! coding->charbuf) \ | ||
| 6898 | { \ | ||
| 6899 | record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \ | ||
| 6900 | return; \ | ||
| 6901 | } \ | ||
| 6902 | coding->charbuf_size = size; \ | ||
| 6903 | } while (0) | 7275 | } while (0) |
| 6904 | 7276 | ||
| 6905 | 7277 | ||
| @@ -6968,6 +7340,8 @@ decode_coding (struct coding_system *coding) | |||
| 6968 | int carryover; | 7340 | int carryover; |
| 6969 | int i; | 7341 | int i; |
| 6970 | 7342 | ||
| 7343 | USE_SAFE_ALLOCA; | ||
| 7344 | |||
| 6971 | if (BUFFERP (coding->src_object) | 7345 | if (BUFFERP (coding->src_object) |
| 6972 | && coding->src_pos > 0 | 7346 | && coding->src_pos > 0 |
| 6973 | && coding->src_pos < GPT | 7347 | && coding->src_pos < GPT |
| @@ -7041,7 +7415,7 @@ decode_coding (struct coding_system *coding) | |||
| 7041 | coding->carryover_bytes = 0; | 7415 | coding->carryover_bytes = 0; |
| 7042 | if (coding->consumed < coding->src_bytes) | 7416 | if (coding->consumed < coding->src_bytes) |
| 7043 | { | 7417 | { |
| 7044 | int nbytes = coding->src_bytes - coding->consumed; | 7418 | ptrdiff_t nbytes = coding->src_bytes - coding->consumed; |
| 7045 | const unsigned char *src; | 7419 | const unsigned char *src; |
| 7046 | 7420 | ||
| 7047 | coding_set_source (coding); | 7421 | coding_set_source (coding); |
| @@ -7090,6 +7464,8 @@ decode_coding (struct coding_system *coding) | |||
| 7090 | bset_undo_list (current_buffer, undo_list); | 7464 | bset_undo_list (current_buffer, undo_list); |
| 7091 | record_insert (coding->dst_pos, coding->produced_char); | 7465 | record_insert (coding->dst_pos, coding->produced_char); |
| 7092 | } | 7466 | } |
| 7467 | |||
| 7468 | SAFE_FREE (); | ||
| 7093 | } | 7469 | } |
| 7094 | 7470 | ||
| 7095 | 7471 | ||
| @@ -7123,7 +7499,7 @@ handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit, | |||
| 7123 | /* We found a composition. Store the corresponding | 7499 | /* We found a composition. Store the corresponding |
| 7124 | annotation data in BUF. */ | 7500 | annotation data in BUF. */ |
| 7125 | int *head = buf; | 7501 | int *head = buf; |
| 7126 | enum composition_method method = COMPOSITION_METHOD (prop); | 7502 | enum composition_method method = composition_method (prop); |
| 7127 | int nchars = COMPOSITION_LENGTH (prop); | 7503 | int nchars = COMPOSITION_LENGTH (prop); |
| 7128 | 7504 | ||
| 7129 | ADD_COMPOSITION_DATA (buf, nchars, 0, method); | 7505 | ADD_COMPOSITION_DATA (buf, nchars, 0, method); |
| @@ -7373,6 +7749,8 @@ encode_coding (struct coding_system *coding) | |||
| 7373 | int max_lookup; | 7749 | int max_lookup; |
| 7374 | struct ccl_spec cclspec; | 7750 | struct ccl_spec cclspec; |
| 7375 | 7751 | ||
| 7752 | USE_SAFE_ALLOCA; | ||
| 7753 | |||
| 7376 | attrs = CODING_ID_ATTRS (coding->id); | 7754 | attrs = CODING_ID_ATTRS (coding->id); |
| 7377 | if (coding->encoder == encode_coding_raw_text) | 7755 | if (coding->encoder == encode_coding_raw_text) |
| 7378 | translation_table = Qnil, max_lookup = 0; | 7756 | translation_table = Qnil, max_lookup = 0; |
| @@ -7406,7 +7784,9 @@ encode_coding (struct coding_system *coding) | |||
| 7406 | } while (coding->consumed_char < coding->src_chars); | 7784 | } while (coding->consumed_char < coding->src_chars); |
| 7407 | 7785 | ||
| 7408 | if (BUFFERP (coding->dst_object) && coding->produced_char > 0) | 7786 | if (BUFFERP (coding->dst_object) && coding->produced_char > 0) |
| 7409 | insert_from_gap (coding->produced_char, coding->produced); | 7787 | insert_from_gap (coding->produced_char, coding->produced, 0); |
| 7788 | |||
| 7789 | SAFE_FREE (); | ||
| 7410 | } | 7790 | } |
| 7411 | 7791 | ||
| 7412 | 7792 | ||
| @@ -7460,7 +7840,7 @@ make_conversion_work_buffer (bool multibyte) | |||
| 7460 | } | 7840 | } |
| 7461 | 7841 | ||
| 7462 | 7842 | ||
| 7463 | static Lisp_Object | 7843 | static void |
| 7464 | code_conversion_restore (Lisp_Object arg) | 7844 | code_conversion_restore (Lisp_Object arg) |
| 7465 | { | 7845 | { |
| 7466 | Lisp_Object current, workbuf; | 7846 | Lisp_Object current, workbuf; |
| @@ -7478,7 +7858,6 @@ code_conversion_restore (Lisp_Object arg) | |||
| 7478 | } | 7858 | } |
| 7479 | set_buffer_internal (XBUFFER (current)); | 7859 | set_buffer_internal (XBUFFER (current)); |
| 7480 | UNGCPRO; | 7860 | UNGCPRO; |
| 7481 | return Qnil; | ||
| 7482 | } | 7861 | } |
| 7483 | 7862 | ||
| 7484 | Lisp_Object | 7863 | Lisp_Object |
| @@ -7500,8 +7879,6 @@ decode_coding_gap (struct coding_system *coding, | |||
| 7500 | ptrdiff_t count = SPECPDL_INDEX (); | 7879 | ptrdiff_t count = SPECPDL_INDEX (); |
| 7501 | Lisp_Object attrs; | 7880 | Lisp_Object attrs; |
| 7502 | 7881 | ||
| 7503 | code_conversion_save (0, 0); | ||
| 7504 | |||
| 7505 | coding->src_object = Fcurrent_buffer (); | 7882 | coding->src_object = Fcurrent_buffer (); |
| 7506 | coding->src_chars = chars; | 7883 | coding->src_chars = chars; |
| 7507 | coding->src_bytes = bytes; | 7884 | coding->src_bytes = bytes; |
| @@ -7513,15 +7890,96 @@ decode_coding_gap (struct coding_system *coding, | |||
| 7513 | coding->dst_pos_byte = PT_BYTE; | 7890 | coding->dst_pos_byte = PT_BYTE; |
| 7514 | coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); | 7891 | coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); |
| 7515 | 7892 | ||
| 7893 | coding->head_ascii = -1; | ||
| 7894 | coding->detected_utf8_bytes = coding->detected_utf8_chars = -1; | ||
| 7895 | coding->eol_seen = EOL_SEEN_NONE; | ||
| 7516 | if (CODING_REQUIRE_DETECTION (coding)) | 7896 | if (CODING_REQUIRE_DETECTION (coding)) |
| 7517 | detect_coding (coding); | 7897 | detect_coding (coding); |
| 7898 | attrs = CODING_ID_ATTRS (coding->id); | ||
| 7899 | if (! disable_ascii_optimization | ||
| 7900 | && ! coding->src_multibyte | ||
| 7901 | && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)) | ||
| 7902 | && NILP (CODING_ATTR_POST_READ (attrs)) | ||
| 7903 | && NILP (get_translation_table (attrs, 0, NULL))) | ||
| 7904 | { | ||
| 7905 | chars = coding->head_ascii; | ||
| 7906 | if (chars < 0) | ||
| 7907 | chars = check_ascii (coding); | ||
| 7908 | if (chars != bytes) | ||
| 7909 | { | ||
| 7910 | /* There exists a non-ASCII byte. */ | ||
| 7911 | if (EQ (CODING_ATTR_TYPE (attrs), Qutf_8) | ||
| 7912 | && coding->detected_utf8_bytes == coding->src_bytes) | ||
| 7913 | { | ||
| 7914 | if (coding->detected_utf8_chars >= 0) | ||
| 7915 | chars = coding->detected_utf8_chars; | ||
| 7916 | else | ||
| 7917 | chars = check_utf_8 (coding); | ||
| 7918 | if (CODING_UTF_8_BOM (coding) != utf_without_bom | ||
| 7919 | && coding->head_ascii == 0 | ||
| 7920 | && coding->source[0] == UTF_8_BOM_1 | ||
| 7921 | && coding->source[1] == UTF_8_BOM_2 | ||
| 7922 | && coding->source[2] == UTF_8_BOM_3) | ||
| 7923 | { | ||
| 7924 | chars--; | ||
| 7925 | bytes -= 3; | ||
| 7926 | coding->src_bytes -= 3; | ||
| 7927 | } | ||
| 7928 | } | ||
| 7929 | else | ||
| 7930 | chars = -1; | ||
| 7931 | } | ||
| 7932 | if (chars >= 0) | ||
| 7933 | { | ||
| 7934 | Lisp_Object eol_type; | ||
| 7935 | |||
| 7936 | eol_type = CODING_ID_EOL_TYPE (coding->id); | ||
| 7937 | if (VECTORP (eol_type)) | ||
| 7938 | { | ||
| 7939 | if (coding->eol_seen != EOL_SEEN_NONE) | ||
| 7940 | eol_type = adjust_coding_eol_type (coding, coding->eol_seen); | ||
| 7941 | } | ||
| 7942 | if (EQ (eol_type, Qmac)) | ||
| 7943 | { | ||
| 7944 | unsigned char *src_end = GAP_END_ADDR; | ||
| 7945 | unsigned char *src = src_end - coding->src_bytes; | ||
| 7946 | |||
| 7947 | while (src < src_end) | ||
| 7948 | { | ||
| 7949 | if (*src++ == '\r') | ||
| 7950 | src[-1] = '\n'; | ||
| 7951 | } | ||
| 7952 | } | ||
| 7953 | else if (EQ (eol_type, Qdos)) | ||
| 7954 | { | ||
| 7955 | unsigned char *src = GAP_END_ADDR; | ||
| 7956 | unsigned char *src_beg = src - coding->src_bytes; | ||
| 7957 | unsigned char *dst = src; | ||
| 7958 | ptrdiff_t diff; | ||
| 7959 | |||
| 7960 | while (src_beg < src) | ||
| 7961 | { | ||
| 7962 | *--dst = *--src; | ||
| 7963 | if (*src == '\n' && src > src_beg && src[-1] == '\r') | ||
| 7964 | src--; | ||
| 7965 | } | ||
| 7966 | diff = dst - src; | ||
| 7967 | bytes -= diff; | ||
| 7968 | chars -= diff; | ||
| 7969 | } | ||
| 7970 | coding->produced = bytes; | ||
| 7971 | coding->produced_char = chars; | ||
| 7972 | insert_from_gap (chars, bytes, 1); | ||
| 7973 | return; | ||
| 7974 | } | ||
| 7975 | } | ||
| 7976 | code_conversion_save (0, 0); | ||
| 7518 | 7977 | ||
| 7519 | coding->mode |= CODING_MODE_LAST_BLOCK; | 7978 | coding->mode |= CODING_MODE_LAST_BLOCK; |
| 7520 | current_buffer->text->inhibit_shrinking = 1; | 7979 | current_buffer->text->inhibit_shrinking = 1; |
| 7521 | decode_coding (coding); | 7980 | decode_coding (coding); |
| 7522 | current_buffer->text->inhibit_shrinking = 0; | 7981 | current_buffer->text->inhibit_shrinking = 0; |
| 7523 | 7982 | ||
| 7524 | attrs = CODING_ID_ATTRS (coding->id); | ||
| 7525 | if (! NILP (CODING_ATTR_POST_READ (attrs))) | 7983 | if (! NILP (CODING_ATTR_POST_READ (attrs))) |
| 7526 | { | 7984 | { |
| 7527 | ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE; | 7985 | ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE; |
| @@ -7695,14 +8153,8 @@ decode_coding_object (struct coding_system *coding, | |||
| 7695 | set_buffer_internal (XBUFFER (coding->dst_object)); | 8153 | set_buffer_internal (XBUFFER (coding->dst_object)); |
| 7696 | if (dst_bytes < coding->produced) | 8154 | if (dst_bytes < coding->produced) |
| 7697 | { | 8155 | { |
| 8156 | eassert (coding->produced > 0); | ||
| 7698 | destination = xrealloc (destination, coding->produced); | 8157 | destination = xrealloc (destination, coding->produced); |
| 7699 | if (! destination) | ||
| 7700 | { | ||
| 7701 | record_conversion_result (coding, | ||
| 7702 | CODING_RESULT_INSUFFICIENT_MEM); | ||
| 7703 | unbind_to (count, Qnil); | ||
| 7704 | return; | ||
| 7705 | } | ||
| 7706 | if (BEGV < GPT && GPT < BEGV + coding->produced_char) | 8158 | if (BEGV < GPT && GPT < BEGV + coding->produced_char) |
| 7707 | move_gap_both (BEGV, BEGV_BYTE); | 8159 | move_gap_both (BEGV, BEGV_BYTE); |
| 7708 | memcpy (destination, BEGV_ADDR, coding->produced); | 8160 | memcpy (destination, BEGV_ADDR, coding->produced); |
| @@ -7903,6 +8355,11 @@ encode_coding_object (struct coding_system *coding, | |||
| 7903 | { | 8355 | { |
| 7904 | if (BUFFERP (coding->dst_object)) | 8356 | if (BUFFERP (coding->dst_object)) |
| 7905 | coding->dst_object = Fbuffer_string (); | 8357 | coding->dst_object = Fbuffer_string (); |
| 8358 | else if (coding->raw_destination) | ||
| 8359 | /* This is used to avoid creating huge Lisp string. | ||
| 8360 | NOTE: caller who sets `raw_destination' is also | ||
| 8361 | responsible for freeing `destination' buffer. */ | ||
| 8362 | coding->dst_object = Qnil; | ||
| 7906 | else | 8363 | else |
| 7907 | { | 8364 | { |
| 7908 | coding->dst_object | 8365 | coding->dst_object |
| @@ -7985,11 +8442,21 @@ from_unicode (Lisp_Object str) | |||
| 7985 | return code_convert_string_norecord (str, Qutf_16le, 0); | 8442 | return code_convert_string_norecord (str, Qutf_16le, 0); |
| 7986 | } | 8443 | } |
| 7987 | 8444 | ||
| 8445 | Lisp_Object | ||
| 8446 | from_unicode_buffer (const wchar_t* wstr) | ||
| 8447 | { | ||
| 8448 | return from_unicode ( | ||
| 8449 | make_unibyte_string ( | ||
| 8450 | (char*) wstr, | ||
| 8451 | /* we get one of the two final 0 bytes for free. */ | ||
| 8452 | 1 + sizeof (wchar_t) * wcslen (wstr))); | ||
| 8453 | } | ||
| 8454 | |||
| 7988 | wchar_t * | 8455 | wchar_t * |
| 7989 | to_unicode (Lisp_Object str, Lisp_Object *buf) | 8456 | to_unicode (Lisp_Object str, Lisp_Object *buf) |
| 7990 | { | 8457 | { |
| 7991 | *buf = code_convert_string_norecord (str, Qutf_16le, 1); | 8458 | *buf = code_convert_string_norecord (str, Qutf_16le, 1); |
| 7992 | /* We need to make a another copy (in addition to the one made by | 8459 | /* We need to make another copy (in addition to the one made by |
| 7993 | code_convert_string_norecord) to ensure that the final string is | 8460 | code_convert_string_norecord) to ensure that the final string is |
| 7994 | _doubly_ zero terminated --- that is, that the string is | 8461 | _doubly_ zero terminated --- that is, that the string is |
| 7995 | terminated by two zero bytes and one utf-16le null character. | 8462 | terminated by two zero bytes and one utf-16le null character. |
| @@ -8135,6 +8602,11 @@ detect_coding_system (const unsigned char *src, | |||
| 8135 | enum coding_category category IF_LINT (= 0); | 8602 | enum coding_category category IF_LINT (= 0); |
| 8136 | struct coding_system *this IF_LINT (= NULL); | 8603 | struct coding_system *this IF_LINT (= NULL); |
| 8137 | int c, i; | 8604 | int c, i; |
| 8605 | bool inhibit_nbd = inhibit_flag (coding.spec.undecided.inhibit_nbd, | ||
| 8606 | inhibit_null_byte_detection); | ||
| 8607 | bool inhibit_ied = inhibit_flag (coding.spec.undecided.inhibit_ied, | ||
| 8608 | inhibit_iso_escape_detection); | ||
| 8609 | bool prefer_utf_8 = coding.spec.undecided.prefer_utf_8; | ||
| 8138 | 8610 | ||
| 8139 | /* Skip all ASCII bytes except for a few ISO2022 controls. */ | 8611 | /* Skip all ASCII bytes except for a few ISO2022 controls. */ |
| 8140 | for (; src < src_end; src++) | 8612 | for (; src < src_end; src++) |
| @@ -8149,7 +8621,7 @@ detect_coding_system (const unsigned char *src, | |||
| 8149 | else if (c < 0x20) | 8621 | else if (c < 0x20) |
| 8150 | { | 8622 | { |
| 8151 | if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) | 8623 | if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) |
| 8152 | && ! inhibit_iso_escape_detection | 8624 | && ! inhibit_ied |
| 8153 | && ! detect_info.checked) | 8625 | && ! detect_info.checked) |
| 8154 | { | 8626 | { |
| 8155 | if (detect_coding_iso_2022 (&coding, &detect_info)) | 8627 | if (detect_coding_iso_2022 (&coding, &detect_info)) |
| @@ -8168,7 +8640,7 @@ detect_coding_system (const unsigned char *src, | |||
| 8168 | break; | 8640 | break; |
| 8169 | } | 8641 | } |
| 8170 | } | 8642 | } |
| 8171 | else if (! c && !inhibit_null_byte_detection) | 8643 | else if (! c && !inhibit_nbd) |
| 8172 | { | 8644 | { |
| 8173 | null_byte_found = 1; | 8645 | null_byte_found = 1; |
| 8174 | if (eight_bit_found) | 8646 | if (eight_bit_found) |
| @@ -8201,6 +8673,12 @@ detect_coding_system (const unsigned char *src, | |||
| 8201 | detect_info.checked |= ~CATEGORY_MASK_UTF_16; | 8673 | detect_info.checked |= ~CATEGORY_MASK_UTF_16; |
| 8202 | detect_info.rejected |= ~CATEGORY_MASK_UTF_16; | 8674 | detect_info.rejected |= ~CATEGORY_MASK_UTF_16; |
| 8203 | } | 8675 | } |
| 8676 | else if (prefer_utf_8 | ||
| 8677 | && detect_coding_utf_8 (&coding, &detect_info)) | ||
| 8678 | { | ||
| 8679 | detect_info.checked |= ~CATEGORY_MASK_UTF_8; | ||
| 8680 | detect_info.rejected |= ~CATEGORY_MASK_UTF_8; | ||
| 8681 | } | ||
| 8204 | for (i = 0; i < coding_category_raw_text; i++) | 8682 | for (i = 0; i < coding_category_raw_text; i++) |
| 8205 | { | 8683 | { |
| 8206 | category = coding_priorities[i]; | 8684 | category = coding_priorities[i]; |
| @@ -8241,20 +8719,20 @@ detect_coding_system (const unsigned char *src, | |||
| 8241 | { | 8719 | { |
| 8242 | detect_info.found = CATEGORY_MASK_RAW_TEXT; | 8720 | detect_info.found = CATEGORY_MASK_RAW_TEXT; |
| 8243 | id = CODING_SYSTEM_ID (Qno_conversion); | 8721 | id = CODING_SYSTEM_ID (Qno_conversion); |
| 8244 | val = Fcons (make_number (id), Qnil); | 8722 | val = list1 (make_number (id)); |
| 8245 | } | 8723 | } |
| 8246 | else if (! detect_info.rejected && ! detect_info.found) | 8724 | else if (! detect_info.rejected && ! detect_info.found) |
| 8247 | { | 8725 | { |
| 8248 | detect_info.found = CATEGORY_MASK_ANY; | 8726 | detect_info.found = CATEGORY_MASK_ANY; |
| 8249 | id = coding_categories[coding_category_undecided].id; | 8727 | id = coding_categories[coding_category_undecided].id; |
| 8250 | val = Fcons (make_number (id), Qnil); | 8728 | val = list1 (make_number (id)); |
| 8251 | } | 8729 | } |
| 8252 | else if (highest) | 8730 | else if (highest) |
| 8253 | { | 8731 | { |
| 8254 | if (detect_info.found) | 8732 | if (detect_info.found) |
| 8255 | { | 8733 | { |
| 8256 | detect_info.found = 1 << category; | 8734 | detect_info.found = 1 << category; |
| 8257 | val = Fcons (make_number (this->id), Qnil); | 8735 | val = list1 (make_number (this->id)); |
| 8258 | } | 8736 | } |
| 8259 | else | 8737 | else |
| 8260 | for (i = 0; i < coding_category_raw_text; i++) | 8738 | for (i = 0; i < coding_category_raw_text; i++) |
| @@ -8262,7 +8740,7 @@ detect_coding_system (const unsigned char *src, | |||
| 8262 | { | 8740 | { |
| 8263 | detect_info.found = 1 << coding_priorities[i]; | 8741 | detect_info.found = 1 << coding_priorities[i]; |
| 8264 | id = coding_categories[coding_priorities[i]].id; | 8742 | id = coding_categories[coding_priorities[i]].id; |
| 8265 | val = Fcons (make_number (id), Qnil); | 8743 | val = list1 (make_number (id)); |
| 8266 | break; | 8744 | break; |
| 8267 | } | 8745 | } |
| 8268 | } | 8746 | } |
| @@ -8279,7 +8757,7 @@ detect_coding_system (const unsigned char *src, | |||
| 8279 | found |= 1 << category; | 8757 | found |= 1 << category; |
| 8280 | id = coding_categories[category].id; | 8758 | id = coding_categories[category].id; |
| 8281 | if (id >= 0) | 8759 | if (id >= 0) |
| 8282 | val = Fcons (make_number (id), val); | 8760 | val = list1 (make_number (id)); |
| 8283 | } | 8761 | } |
| 8284 | } | 8762 | } |
| 8285 | for (i = coding_category_raw_text - 1; i >= 0; i--) | 8763 | for (i = coding_category_raw_text - 1; i >= 0; i--) |
| @@ -8304,7 +8782,7 @@ detect_coding_system (const unsigned char *src, | |||
| 8304 | this = coding_categories + coding_category_utf_8_sig; | 8782 | this = coding_categories + coding_category_utf_8_sig; |
| 8305 | else | 8783 | else |
| 8306 | this = coding_categories + coding_category_utf_8_nosig; | 8784 | this = coding_categories + coding_category_utf_8_nosig; |
| 8307 | val = Fcons (make_number (this->id), Qnil); | 8785 | val = list1 (make_number (this->id)); |
| 8308 | } | 8786 | } |
| 8309 | } | 8787 | } |
| 8310 | else if (base_category == coding_category_utf_16_auto) | 8788 | else if (base_category == coding_category_utf_16_auto) |
| @@ -8321,13 +8799,13 @@ detect_coding_system (const unsigned char *src, | |||
| 8321 | this = coding_categories + coding_category_utf_16_be_nosig; | 8799 | this = coding_categories + coding_category_utf_16_be_nosig; |
| 8322 | else | 8800 | else |
| 8323 | this = coding_categories + coding_category_utf_16_le_nosig; | 8801 | this = coding_categories + coding_category_utf_16_le_nosig; |
| 8324 | val = Fcons (make_number (this->id), Qnil); | 8802 | val = list1 (make_number (this->id)); |
| 8325 | } | 8803 | } |
| 8326 | } | 8804 | } |
| 8327 | else | 8805 | else |
| 8328 | { | 8806 | { |
| 8329 | detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs)); | 8807 | detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs)); |
| 8330 | val = Fcons (make_number (coding.id), Qnil); | 8808 | val = list1 (make_number (coding.id)); |
| 8331 | } | 8809 | } |
| 8332 | 8810 | ||
| 8333 | /* Then, detect eol-format if necessary. */ | 8811 | /* Then, detect eol-format if necessary. */ |
| @@ -8539,8 +9017,7 @@ DEFUN ("find-coding-systems-region-internal", | |||
| 8539 | Lisp_Object attrs; | 9017 | Lisp_Object attrs; |
| 8540 | 9018 | ||
| 8541 | attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0); | 9019 | attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0); |
| 8542 | if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs)) | 9020 | if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))) |
| 8543 | && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided)) | ||
| 8544 | { | 9021 | { |
| 8545 | ASET (attrs, coding_attr_trans_tbl, | 9022 | ASET (attrs, coding_attr_trans_tbl, |
| 8546 | get_translation_table (attrs, 1, NULL)); | 9023 | get_translation_table (attrs, 1, NULL)); |
| @@ -8799,7 +9276,7 @@ is nil. */) | |||
| 8799 | attrs = AREF (CODING_SYSTEM_SPEC (elt), 0); | 9276 | attrs = AREF (CODING_SYSTEM_SPEC (elt), 0); |
| 8800 | ASET (attrs, coding_attr_trans_tbl, | 9277 | ASET (attrs, coding_attr_trans_tbl, |
| 8801 | get_translation_table (attrs, 1, NULL)); | 9278 | get_translation_table (attrs, 1, NULL)); |
| 8802 | list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list); | 9279 | list = Fcons (list2 (elt, attrs), list); |
| 8803 | } | 9280 | } |
| 8804 | 9281 | ||
| 8805 | if (STRINGP (start)) | 9282 | if (STRINGP (start)) |
| @@ -8883,6 +9360,14 @@ code_convert_region (Lisp_Object start, Lisp_Object end, | |||
| 8883 | setup_coding_system (coding_system, &coding); | 9360 | setup_coding_system (coding_system, &coding); |
| 8884 | coding.mode |= CODING_MODE_LAST_BLOCK; | 9361 | coding.mode |= CODING_MODE_LAST_BLOCK; |
| 8885 | 9362 | ||
| 9363 | if (BUFFERP (dst_object) && !EQ (dst_object, src_object)) | ||
| 9364 | { | ||
| 9365 | struct buffer *buf = XBUFFER (dst_object); | ||
| 9366 | ptrdiff_t buf_pt = BUF_PT (buf); | ||
| 9367 | |||
| 9368 | invalidate_buffer_caches (buf, buf_pt, buf_pt); | ||
| 9369 | } | ||
| 9370 | |||
| 8886 | if (encodep) | 9371 | if (encodep) |
| 8887 | encode_coding_object (&coding, src_object, from, from_byte, to, to_byte, | 9372 | encode_coding_object (&coding, src_object, from, from_byte, to, to_byte, |
| 8888 | dst_object); | 9373 | dst_object); |
| @@ -8972,6 +9457,15 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system, | |||
| 8972 | coding.mode |= CODING_MODE_LAST_BLOCK; | 9457 | coding.mode |= CODING_MODE_LAST_BLOCK; |
| 8973 | chars = SCHARS (string); | 9458 | chars = SCHARS (string); |
| 8974 | bytes = SBYTES (string); | 9459 | bytes = SBYTES (string); |
| 9460 | |||
| 9461 | if (BUFFERP (dst_object)) | ||
| 9462 | { | ||
| 9463 | struct buffer *buf = XBUFFER (dst_object); | ||
| 9464 | ptrdiff_t buf_pt = BUF_PT (buf); | ||
| 9465 | |||
| 9466 | invalidate_buffer_caches (buf, buf_pt, buf_pt); | ||
| 9467 | } | ||
| 9468 | |||
| 8975 | if (encodep) | 9469 | if (encodep) |
| 8976 | encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object); | 9470 | encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object); |
| 8977 | else | 9471 | else |
| @@ -8998,6 +9492,55 @@ code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system, | |||
| 8998 | return code_convert_string (string, coding_system, Qt, encodep, 0, 1); | 9492 | return code_convert_string (string, coding_system, Qt, encodep, 0, 1); |
| 8999 | } | 9493 | } |
| 9000 | 9494 | ||
| 9495 | /* Encode or decode a file name, to or from a unibyte string suitable | ||
| 9496 | for passing to C library functions. */ | ||
| 9497 | Lisp_Object | ||
| 9498 | decode_file_name (Lisp_Object fname) | ||
| 9499 | { | ||
| 9500 | #ifdef WINDOWSNT | ||
| 9501 | /* The w32 build pretends to use UTF-8 for file-name encoding, and | ||
| 9502 | converts the file names either to UTF-16LE or to the system ANSI | ||
| 9503 | codepage internally, depending on the underlying OS; see w32.c. */ | ||
| 9504 | if (! NILP (Fcoding_system_p (Qutf_8))) | ||
| 9505 | return code_convert_string_norecord (fname, Qutf_8, 0); | ||
| 9506 | return fname; | ||
| 9507 | #else /* !WINDOWSNT */ | ||
| 9508 | if (! NILP (Vfile_name_coding_system)) | ||
| 9509 | return code_convert_string_norecord (fname, Vfile_name_coding_system, 0); | ||
| 9510 | else if (! NILP (Vdefault_file_name_coding_system)) | ||
| 9511 | return code_convert_string_norecord (fname, | ||
| 9512 | Vdefault_file_name_coding_system, 0); | ||
| 9513 | else | ||
| 9514 | return fname; | ||
| 9515 | #endif | ||
| 9516 | } | ||
| 9517 | |||
| 9518 | Lisp_Object | ||
| 9519 | encode_file_name (Lisp_Object fname) | ||
| 9520 | { | ||
| 9521 | /* This is especially important during bootstrap and dumping, when | ||
| 9522 | file-name encoding is not yet known, and therefore any non-ASCII | ||
| 9523 | file names are unibyte strings, and could only be thrashed if we | ||
| 9524 | try to encode them. */ | ||
| 9525 | if (!STRING_MULTIBYTE (fname)) | ||
| 9526 | return fname; | ||
| 9527 | #ifdef WINDOWSNT | ||
| 9528 | /* The w32 build pretends to use UTF-8 for file-name encoding, and | ||
| 9529 | converts the file names either to UTF-16LE or to the system ANSI | ||
| 9530 | codepage internally, depending on the underlying OS; see w32.c. */ | ||
| 9531 | if (! NILP (Fcoding_system_p (Qutf_8))) | ||
| 9532 | return code_convert_string_norecord (fname, Qutf_8, 1); | ||
| 9533 | return fname; | ||
| 9534 | #else /* !WINDOWSNT */ | ||
| 9535 | if (! NILP (Vfile_name_coding_system)) | ||
| 9536 | return code_convert_string_norecord (fname, Vfile_name_coding_system, 1); | ||
| 9537 | else if (! NILP (Vdefault_file_name_coding_system)) | ||
| 9538 | return code_convert_string_norecord (fname, | ||
| 9539 | Vdefault_file_name_coding_system, 1); | ||
| 9540 | else | ||
| 9541 | return fname; | ||
| 9542 | #endif | ||
| 9543 | } | ||
| 9001 | 9544 | ||
| 9002 | DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string, | 9545 | DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string, |
| 9003 | 2, 4, 0, | 9546 | 2, 4, 0, |
| @@ -9210,7 +9753,7 @@ DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_intern | |||
| 9210 | tset_charset_list | 9753 | tset_charset_list |
| 9211 | (term, (terminal_coding->common_flags & CODING_REQUIRE_ENCODING_MASK | 9754 | (term, (terminal_coding->common_flags & CODING_REQUIRE_ENCODING_MASK |
| 9212 | ? coding_charset_list (terminal_coding) | 9755 | ? coding_charset_list (terminal_coding) |
| 9213 | : Fcons (make_number (charset_ascii), Qnil))); | 9756 | : list1 (make_number (charset_ascii)))); |
| 9214 | return Qnil; | 9757 | return Qnil; |
| 9215 | } | 9758 | } |
| 9216 | 9759 | ||
| @@ -9655,9 +10198,9 @@ usage: (define-coding-system-internal ...) */) | |||
| 9655 | { | 10198 | { |
| 9656 | dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp))); | 10199 | dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp))); |
| 9657 | if (dim < dim2) | 10200 | if (dim < dim2) |
| 9658 | tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil)); | 10201 | tmp = list2 (XCAR (tail), tmp); |
| 9659 | else | 10202 | else |
| 9660 | tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil)); | 10203 | tmp = list2 (tmp, XCAR (tail)); |
| 9661 | } | 10204 | } |
| 9662 | else | 10205 | else |
| 9663 | { | 10206 | { |
| @@ -9668,7 +10211,7 @@ usage: (define-coding-system-internal ...) */) | |||
| 9668 | break; | 10211 | break; |
| 9669 | } | 10212 | } |
| 9670 | if (NILP (tmp2)) | 10213 | if (NILP (tmp2)) |
| 9671 | tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil)); | 10214 | tmp = nconc2 (tmp, list1 (XCAR (tail))); |
| 9672 | else | 10215 | else |
| 9673 | { | 10216 | { |
| 9674 | XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2))); | 10217 | XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2))); |
| @@ -9954,7 +10497,17 @@ usage: (define-coding-system-internal ...) */) | |||
| 9954 | : coding_category_utf_8_sig); | 10497 | : coding_category_utf_8_sig); |
| 9955 | } | 10498 | } |
| 9956 | else if (EQ (coding_type, Qundecided)) | 10499 | else if (EQ (coding_type, Qundecided)) |
| 9957 | category = coding_category_undecided; | 10500 | { |
| 10501 | if (nargs < coding_arg_undecided_max) | ||
| 10502 | goto short_args; | ||
| 10503 | ASET (attrs, coding_attr_undecided_inhibit_null_byte_detection, | ||
| 10504 | args[coding_arg_undecided_inhibit_null_byte_detection]); | ||
| 10505 | ASET (attrs, coding_attr_undecided_inhibit_iso_escape_detection, | ||
| 10506 | args[coding_arg_undecided_inhibit_iso_escape_detection]); | ||
| 10507 | ASET (attrs, coding_attr_undecided_prefer_utf_8, | ||
| 10508 | args[coding_arg_undecided_prefer_utf_8]); | ||
| 10509 | category = coding_category_undecided; | ||
| 10510 | } | ||
| 9958 | else | 10511 | else |
| 9959 | error ("Invalid coding system type: %s", | 10512 | error ("Invalid coding system type: %s", |
| 9960 | SDATA (SYMBOL_NAME (coding_type))); | 10513 | SDATA (SYMBOL_NAME (coding_type))); |
| @@ -9976,7 +10529,7 @@ usage: (define-coding-system-internal ...) */) | |||
| 9976 | && ! EQ (eol_type, Qmac)) | 10529 | && ! EQ (eol_type, Qmac)) |
| 9977 | error ("Invalid eol-type"); | 10530 | error ("Invalid eol-type"); |
| 9978 | 10531 | ||
| 9979 | aliases = Fcons (name, Qnil); | 10532 | aliases = list1 (name); |
| 9980 | 10533 | ||
| 9981 | if (NILP (eol_type)) | 10534 | if (NILP (eol_type)) |
| 9982 | { | 10535 | { |
| @@ -9986,7 +10539,7 @@ usage: (define-coding-system-internal ...) */) | |||
| 9986 | Lisp_Object this_spec, this_name, this_aliases, this_eol_type; | 10539 | Lisp_Object this_spec, this_name, this_aliases, this_eol_type; |
| 9987 | 10540 | ||
| 9988 | this_name = AREF (eol_type, i); | 10541 | this_name = AREF (eol_type, i); |
| 9989 | this_aliases = Fcons (this_name, Qnil); | 10542 | this_aliases = list1 (this_name); |
| 9990 | this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac); | 10543 | this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac); |
| 9991 | this_spec = make_uninit_vector (3); | 10544 | this_spec = make_uninit_vector (3); |
| 9992 | ASET (this_spec, 0, attrs); | 10545 | ASET (this_spec, 0, attrs); |
| @@ -10101,7 +10654,7 @@ DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias, | |||
| 10101 | list. */ | 10654 | list. */ |
| 10102 | while (!NILP (XCDR (aliases))) | 10655 | while (!NILP (XCDR (aliases))) |
| 10103 | aliases = XCDR (aliases); | 10656 | aliases = XCDR (aliases); |
| 10104 | XSETCDR (aliases, Fcons (alias, Qnil)); | 10657 | XSETCDR (aliases, list1 (alias)); |
| 10105 | 10658 | ||
| 10106 | eol_type = AREF (spec, 2); | 10659 | eol_type = AREF (spec, 2); |
| 10107 | if (VECTORP (eol_type)) | 10660 | if (VECTORP (eol_type)) |
| @@ -10335,11 +10888,6 @@ syms_of_coding (void) | |||
| 10335 | Fput (Qcoding_system_error, Qerror_message, | 10888 | Fput (Qcoding_system_error, Qerror_message, |
| 10336 | build_pure_c_string ("Invalid coding system")); | 10889 | build_pure_c_string ("Invalid coding system")); |
| 10337 | 10890 | ||
| 10338 | /* Intern this now in case it isn't already done. | ||
| 10339 | Setting this variable twice is harmless. | ||
| 10340 | But don't staticpro it here--that is done in alloc.c. */ | ||
| 10341 | Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots"); | ||
| 10342 | |||
| 10343 | DEFSYM (Qtranslation_table, "translation-table"); | 10891 | DEFSYM (Qtranslation_table, "translation-table"); |
| 10344 | Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2)); | 10892 | Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2)); |
| 10345 | DEFSYM (Qtranslation_table_id, "translation-table-id"); | 10893 | DEFSYM (Qtranslation_table_id, "translation-table-id"); |
| @@ -10408,10 +10956,8 @@ syms_of_coding (void) | |||
| 10408 | intern_c_string ("coding-category-undecided")); | 10956 | intern_c_string ("coding-category-undecided")); |
| 10409 | 10957 | ||
| 10410 | DEFSYM (Qinsufficient_source, "insufficient-source"); | 10958 | DEFSYM (Qinsufficient_source, "insufficient-source"); |
| 10411 | DEFSYM (Qinconsistent_eol, "inconsistent-eol"); | ||
| 10412 | DEFSYM (Qinvalid_source, "invalid-source"); | 10959 | DEFSYM (Qinvalid_source, "invalid-source"); |
| 10413 | DEFSYM (Qinterrupted, "interrupted"); | 10960 | DEFSYM (Qinterrupted, "interrupted"); |
| 10414 | DEFSYM (Qinsufficient_memory, "insufficient-memory"); | ||
| 10415 | DEFSYM (Qcoding_system_define_form, "coding-system-define-form"); | 10961 | DEFSYM (Qcoding_system_define_form, "coding-system-define-form"); |
| 10416 | 10962 | ||
| 10417 | defsubr (&Scoding_system_p); | 10963 | defsubr (&Scoding_system_p); |
| @@ -10728,6 +11274,11 @@ from GNU Find and GNU Grep. Emacs will then ignore the null bytes and | |||
| 10728 | decode text as usual. */); | 11274 | decode text as usual. */); |
| 10729 | inhibit_null_byte_detection = 0; | 11275 | inhibit_null_byte_detection = 0; |
| 10730 | 11276 | ||
| 11277 | DEFVAR_BOOL ("disable-ascii-optimization", disable_ascii_optimization, | ||
| 11278 | doc: /* If non-nil, Emacs does not optimize code decoder for ASCII files. | ||
| 11279 | Internal use only. Removed after the experimental optimizer gets stable. */); | ||
| 11280 | disable_ascii_optimization = 0; | ||
| 11281 | |||
| 10731 | DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input, | 11282 | DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input, |
| 10732 | doc: /* Char table for translating self-inserting characters. | 11283 | doc: /* Char table for translating self-inserting characters. |
| 10733 | This is applied to the result of input methods, not their input. | 11284 | This is applied to the result of input methods, not their input. |
| @@ -10739,11 +11290,11 @@ internal character representation. */); | |||
| 10739 | Vtranslation_table_for_input = Qnil; | 11290 | Vtranslation_table_for_input = Qnil; |
| 10740 | 11291 | ||
| 10741 | { | 11292 | { |
| 10742 | Lisp_Object args[coding_arg_max]; | 11293 | Lisp_Object args[coding_arg_undecided_max]; |
| 10743 | Lisp_Object plist[16]; | 11294 | Lisp_Object plist[16]; |
| 10744 | int i; | 11295 | int i; |
| 10745 | 11296 | ||
| 10746 | for (i = 0; i < coding_arg_max; i++) | 11297 | for (i = 0; i < coding_arg_undecided_max; i++) |
| 10747 | args[i] = Qnil; | 11298 | args[i] = Qnil; |
| 10748 | 11299 | ||
| 10749 | plist[0] = intern_c_string (":name"); | 11300 | plist[0] = intern_c_string (":name"); |
| @@ -10780,7 +11331,9 @@ character."); | |||
| 10780 | plist[13] = build_pure_c_string ("No conversion on encoding, automatic conversion on decoding."); | 11331 | plist[13] = build_pure_c_string ("No conversion on encoding, automatic conversion on decoding."); |
| 10781 | plist[15] = args[coding_arg_eol_type] = Qnil; | 11332 | plist[15] = args[coding_arg_eol_type] = Qnil; |
| 10782 | args[coding_arg_plist] = Flist (16, plist); | 11333 | args[coding_arg_plist] = Flist (16, plist); |
| 10783 | Fdefine_coding_system_internal (coding_arg_max, args); | 11334 | args[coding_arg_undecided_inhibit_null_byte_detection] = make_number (0); |
| 11335 | args[coding_arg_undecided_inhibit_iso_escape_detection] = make_number (0); | ||
| 11336 | Fdefine_coding_system_internal (coding_arg_undecided_max, args); | ||
| 10784 | } | 11337 | } |
| 10785 | 11338 | ||
| 10786 | setup_coding_system (Qno_conversion, &safe_terminal_coding); | 11339 | setup_coding_system (Qno_conversion, &safe_terminal_coding); |