aboutsummaryrefslogtreecommitdiffstats
path: root/src/coding.c
diff options
context:
space:
mode:
authorBill Wohler2014-02-23 18:04:35 -0800
committerBill Wohler2014-02-23 18:04:35 -0800
commit3e93bafb95608467e438ba7f725fd1f020669f8c (patch)
treef2f90109f283e06a18caea3cb2a2623abcfb3a92 /src/coding.c
parent791c0d7634e44bb92ca85af605be84ff2ae08963 (diff)
parente918e27fdf331e89268fc2c9d7cf838d3ecf7aa7 (diff)
downloademacs-3e93bafb95608467e438ba7f725fd1f020669f8c.tar.gz
emacs-3e93bafb95608467e438ba7f725fd1f020669f8c.zip
Merge from trunk; up to 2014-02-23T23:41:17Z!lekktu@gmail.com.
Diffstat (limited to 'src/coding.c')
-rw-r--r--src/coding.c809
1 files changed, 681 insertions, 128 deletions
diff --git a/src/coding.c b/src/coding.c
index 868fb7df0ea..654e39c0e3d 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -1,5 +1,5 @@
1/* Coding system handler (conversion, detection, etc). 1/* Coding system handler (conversion, detection, etc).
2 Copyright (C) 2001-2013 Free Software Foundation, Inc. 2 Copyright (C) 2001-2014 Free Software Foundation, Inc.
3 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 3 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
4 2005, 2006, 2007, 2008, 2009, 2010, 2011 4 2005, 2006, 2007, 2008, 2009, 2010, 2011
5 National Institute of Advanced Industrial Science and Technology (AIST) 5 National Institute of Advanced Industrial Science and Technology (AIST)
@@ -286,6 +286,10 @@ encode_coding_XXX (struct coding_system *coding)
286#include <config.h> 286#include <config.h>
287#include <stdio.h> 287#include <stdio.h>
288 288
289#ifdef HAVE_WCHAR_H
290#include <wchar.h>
291#endif /* HAVE_WCHAR_H */
292
289#include "lisp.h" 293#include "lisp.h"
290#include "character.h" 294#include "character.h"
291#include "buffer.h" 295#include "buffer.h"
@@ -322,8 +326,7 @@ Lisp_Object Qcall_process, Qcall_process_region;
322Lisp_Object Qstart_process, Qopen_network_stream; 326Lisp_Object Qstart_process, Qopen_network_stream;
323static Lisp_Object Qtarget_idx; 327static Lisp_Object Qtarget_idx;
324 328
325static Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source; 329static Lisp_Object Qinsufficient_source, Qinvalid_source, Qinterrupted;
326static Lisp_Object Qinterrupted, Qinsufficient_memory;
327 330
328/* If a symbol has this property, evaluate the value to define the 331/* If a symbol has this property, evaluate the value to define the
329 symbol as a coding system. */ 332 symbol as a coding system. */
@@ -490,6 +493,8 @@ enum iso_code_class_type
490 493
491#define CODING_ISO_FLAG_USE_OLDJIS 0x10000 494#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
492 495
496#define CODING_ISO_FLAG_LEVEL_4 0x20000
497
493#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000 498#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
494 499
495/* A character to be produced on output if encoding of the original 500/* A character to be produced on output if encoding of the original
@@ -646,12 +651,45 @@ static struct coding_system coding_categories[coding_category_max];
646#define max(a, b) ((a) > (b) ? (a) : (b)) 651#define max(a, b) ((a) > (b) ? (a) : (b))
647#endif 652#endif
648 653
654/* Encode a flag that can be nil, something else, or t as -1, 0, 1. */
655
656static int
657encode_inhibit_flag (Lisp_Object flag)
658{
659 return NILP (flag) ? -1 : EQ (flag, Qt);
660}
661
662/* True if the value of ENCODED_FLAG says a flag should be treated as set.
663 1 means yes, -1 means no, 0 means ask the user variable VAR. */
664
665static bool
666inhibit_flag (int encoded_flag, bool var)
667{
668 return 0 < encoded_flag + var;
669}
670
649#define CODING_GET_INFO(coding, attrs, charset_list) \ 671#define CODING_GET_INFO(coding, attrs, charset_list) \
650 do { \ 672 do { \
651 (attrs) = CODING_ID_ATTRS ((coding)->id); \ 673 (attrs) = CODING_ID_ATTRS ((coding)->id); \
652 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \ 674 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
653 } while (0) 675 } while (0)
654 676
677static void
678CHECK_NATNUM_CAR (Lisp_Object x)
679{
680 Lisp_Object tmp = XCAR (x);
681 CHECK_NATNUM (tmp);
682 XSETCAR (x, tmp);
683}
684
685static void
686CHECK_NATNUM_CDR (Lisp_Object x)
687{
688 Lisp_Object tmp = XCDR (x);
689 CHECK_NATNUM (tmp);
690 XSETCDR (x, tmp);
691}
692
655 693
656/* Safely get one byte from the source text pointed by SRC which ends 694/* Safely get one byte from the source text pointed by SRC which ends
657 at SRC_END, and set C to that byte. If there are not enough bytes 695 at SRC_END, and set C to that byte. If there are not enough bytes
@@ -820,18 +858,12 @@ record_conversion_result (struct coding_system *coding,
820 case CODING_RESULT_INSUFFICIENT_SRC: 858 case CODING_RESULT_INSUFFICIENT_SRC:
821 Vlast_code_conversion_error = Qinsufficient_source; 859 Vlast_code_conversion_error = Qinsufficient_source;
822 break; 860 break;
823 case CODING_RESULT_INCONSISTENT_EOL:
824 Vlast_code_conversion_error = Qinconsistent_eol;
825 break;
826 case CODING_RESULT_INVALID_SRC: 861 case CODING_RESULT_INVALID_SRC:
827 Vlast_code_conversion_error = Qinvalid_source; 862 Vlast_code_conversion_error = Qinvalid_source;
828 break; 863 break;
829 case CODING_RESULT_INTERRUPT: 864 case CODING_RESULT_INTERRUPT:
830 Vlast_code_conversion_error = Qinterrupted; 865 Vlast_code_conversion_error = Qinterrupted;
831 break; 866 break;
832 case CODING_RESULT_INSUFFICIENT_MEM:
833 Vlast_code_conversion_error = Qinsufficient_memory;
834 break;
835 case CODING_RESULT_INSUFFICIENT_DST: 867 case CODING_RESULT_INSUFFICIENT_DST:
836 /* Don't record this error in Vlast_code_conversion_error 868 /* Don't record this error in Vlast_code_conversion_error
837 because it happens just temporarily and is resolved when the 869 because it happens just temporarily and is resolved when the
@@ -1128,6 +1160,14 @@ alloc_destination (struct coding_system *coding, ptrdiff_t nbytes,
1128 *buf++ = id; \ 1160 *buf++ = id; \
1129 } while (0) 1161 } while (0)
1130 1162
1163
1164/* Bitmasks for coding->eol_seen. */
1165
1166#define EOL_SEEN_NONE 0
1167#define EOL_SEEN_LF 1
1168#define EOL_SEEN_CR 2
1169#define EOL_SEEN_CRLF 4
1170
1131 1171
1132/*** 2. Emacs' internal format (emacs-utf-8) ***/ 1172/*** 2. Emacs' internal format (emacs-utf-8) ***/
1133 1173
@@ -1150,6 +1190,9 @@ alloc_destination (struct coding_system *coding, ptrdiff_t nbytes,
1150#define UTF_8_BOM_2 0xBB 1190#define UTF_8_BOM_2 0xBB
1151#define UTF_8_BOM_3 0xBF 1191#define UTF_8_BOM_3 0xBF
1152 1192
1193/* Unlike the other detect_coding_XXX, this function counts number of
1194 characters and check EOL format. */
1195
1153static bool 1196static bool
1154detect_coding_utf_8 (struct coding_system *coding, 1197detect_coding_utf_8 (struct coding_system *coding,
1155 struct coding_detection_info *detect_info) 1198 struct coding_detection_info *detect_info)
@@ -1159,11 +1202,23 @@ detect_coding_utf_8 (struct coding_system *coding,
1159 bool multibytep = coding->src_multibyte; 1202 bool multibytep = coding->src_multibyte;
1160 ptrdiff_t consumed_chars = 0; 1203 ptrdiff_t consumed_chars = 0;
1161 bool bom_found = 0; 1204 bool bom_found = 0;
1162 bool found = 0; 1205 ptrdiff_t nchars = coding->head_ascii;
1206 int eol_seen = coding->eol_seen;
1163 1207
1164 detect_info->checked |= CATEGORY_MASK_UTF_8; 1208 detect_info->checked |= CATEGORY_MASK_UTF_8;
1165 /* A coding system of this category is always ASCII compatible. */ 1209 /* A coding system of this category is always ASCII compatible. */
1166 src += coding->head_ascii; 1210 src += nchars;
1211
1212 if (src == coding->source /* BOM should be at the head. */
1213 && src + 3 < src_end /* BOM is 3-byte long. */
1214 && src[0] == UTF_8_BOM_1
1215 && src[1] == UTF_8_BOM_2
1216 && src[2] == UTF_8_BOM_3)
1217 {
1218 bom_found = 1;
1219 src += 3;
1220 nchars++;
1221 }
1167 1222
1168 while (1) 1223 while (1)
1169 { 1224 {
@@ -1172,13 +1227,29 @@ detect_coding_utf_8 (struct coding_system *coding,
1172 src_base = src; 1227 src_base = src;
1173 ONE_MORE_BYTE (c); 1228 ONE_MORE_BYTE (c);
1174 if (c < 0 || UTF_8_1_OCTET_P (c)) 1229 if (c < 0 || UTF_8_1_OCTET_P (c))
1175 continue; 1230 {
1231 nchars++;
1232 if (c == '\r')
1233 {
1234 if (src < src_end && *src == '\n')
1235 {
1236 eol_seen |= EOL_SEEN_CRLF;
1237 src++;
1238 nchars++;
1239 }
1240 else
1241 eol_seen |= EOL_SEEN_CR;
1242 }
1243 else if (c == '\n')
1244 eol_seen |= EOL_SEEN_LF;
1245 continue;
1246 }
1176 ONE_MORE_BYTE (c1); 1247 ONE_MORE_BYTE (c1);
1177 if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1)) 1248 if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1178 break; 1249 break;
1179 if (UTF_8_2_OCTET_LEADING_P (c)) 1250 if (UTF_8_2_OCTET_LEADING_P (c))
1180 { 1251 {
1181 found = 1; 1252 nchars++;
1182 continue; 1253 continue;
1183 } 1254 }
1184 ONE_MORE_BYTE (c2); 1255 ONE_MORE_BYTE (c2);
@@ -1186,10 +1257,7 @@ detect_coding_utf_8 (struct coding_system *coding,
1186 break; 1257 break;
1187 if (UTF_8_3_OCTET_LEADING_P (c)) 1258 if (UTF_8_3_OCTET_LEADING_P (c))
1188 { 1259 {
1189 found = 1; 1260 nchars++;
1190 if (src_base == coding->source
1191 && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1192 bom_found = 1;
1193 continue; 1261 continue;
1194 } 1262 }
1195 ONE_MORE_BYTE (c3); 1263 ONE_MORE_BYTE (c3);
@@ -1197,7 +1265,7 @@ detect_coding_utf_8 (struct coding_system *coding,
1197 break; 1265 break;
1198 if (UTF_8_4_OCTET_LEADING_P (c)) 1266 if (UTF_8_4_OCTET_LEADING_P (c))
1199 { 1267 {
1200 found = 1; 1268 nchars++;
1201 continue; 1269 continue;
1202 } 1270 }
1203 ONE_MORE_BYTE (c4); 1271 ONE_MORE_BYTE (c4);
@@ -1205,7 +1273,7 @@ detect_coding_utf_8 (struct coding_system *coding,
1205 break; 1273 break;
1206 if (UTF_8_5_OCTET_LEADING_P (c)) 1274 if (UTF_8_5_OCTET_LEADING_P (c))
1207 { 1275 {
1208 found = 1; 1276 nchars++;
1209 continue; 1277 continue;
1210 } 1278 }
1211 break; 1279 break;
@@ -1222,14 +1290,18 @@ detect_coding_utf_8 (struct coding_system *coding,
1222 if (bom_found) 1290 if (bom_found)
1223 { 1291 {
1224 /* The first character 0xFFFE doesn't necessarily mean a BOM. */ 1292 /* The first character 0xFFFE doesn't necessarily mean a BOM. */
1225 detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG; 1293 detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1226 } 1294 }
1227 else 1295 else
1228 { 1296 {
1229 detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG; 1297 detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1230 if (found) 1298 if (nchars < src_end - coding->source)
1231 detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG; 1299 /* The found characters are less than source bytes, which
1300 means that we found a valid non-ASCII characters. */
1301 detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_NOSIG;
1232 } 1302 }
1303 coding->detected_utf8_bytes = src_base - coding->source;
1304 coding->detected_utf8_chars = nchars;
1233 return 1; 1305 return 1;
1234} 1306}
1235 1307
@@ -1294,6 +1366,45 @@ decode_coding_utf_8 (struct coding_system *coding)
1294 break; 1366 break;
1295 } 1367 }
1296 1368
1369 /* In the simple case, rapidly handle ordinary characters */
1370 if (multibytep && ! eol_dos
1371 && charbuf < charbuf_end - 6 && src < src_end - 6)
1372 {
1373 while (charbuf < charbuf_end - 6 && src < src_end - 6)
1374 {
1375 c1 = *src;
1376 if (c1 & 0x80)
1377 break;
1378 src++;
1379 consumed_chars++;
1380 *charbuf++ = c1;
1381
1382 c1 = *src;
1383 if (c1 & 0x80)
1384 break;
1385 src++;
1386 consumed_chars++;
1387 *charbuf++ = c1;
1388
1389 c1 = *src;
1390 if (c1 & 0x80)
1391 break;
1392 src++;
1393 consumed_chars++;
1394 *charbuf++ = c1;
1395
1396 c1 = *src;
1397 if (c1 & 0x80)
1398 break;
1399 src++;
1400 consumed_chars++;
1401 *charbuf++ = c1;
1402 }
1403 /* If we handled at least one character, restart the main loop. */
1404 if (src != src_base)
1405 continue;
1406 }
1407
1297 if (byte_after_cr >= 0) 1408 if (byte_after_cr >= 0)
1298 c1 = byte_after_cr, byte_after_cr = -1; 1409 c1 = byte_after_cr, byte_after_cr = -1;
1299 else 1410 else
@@ -1903,7 +2014,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src,
1903 int charset_ID; 2014 int charset_ID;
1904 unsigned code; 2015 unsigned code;
1905 int c; 2016 int c;
1906 int consumed_chars = 0; 2017 ptrdiff_t consumed_chars = 0;
1907 bool mseq_found = 0; 2018 bool mseq_found = 0;
1908 2019
1909 ONE_MORE_BYTE (c); 2020 ONE_MORE_BYTE (c);
@@ -3080,7 +3191,7 @@ detect_coding_iso_2022 (struct coding_system *coding,
3080 if (! single_shifting 3191 if (! single_shifting
3081 && ! (rejected & CATEGORY_MASK_ISO_8_2)) 3192 && ! (rejected & CATEGORY_MASK_ISO_8_2))
3082 { 3193 {
3083 int len = 1; 3194 ptrdiff_t len = 1;
3084 while (src < src_end) 3195 while (src < src_end)
3085 { 3196 {
3086 src_base = src; 3197 src_base = src;
@@ -3664,7 +3775,10 @@ decode_coding_iso_2022 (struct coding_system *coding)
3664 else 3775 else
3665 charset = CHARSET_FROM_ID (charset_id_2); 3776 charset = CHARSET_FROM_ID (charset_id_2);
3666 ONE_MORE_BYTE (c1); 3777 ONE_MORE_BYTE (c1);
3667 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)) 3778 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)
3779 || (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3780 && ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LEVEL_4)
3781 ? c1 >= 0x80 : c1 < 0x80)))
3668 goto invalid_code; 3782 goto invalid_code;
3669 break; 3783 break;
3670 3784
@@ -3678,7 +3792,10 @@ decode_coding_iso_2022 (struct coding_system *coding)
3678 else 3792 else
3679 charset = CHARSET_FROM_ID (charset_id_3); 3793 charset = CHARSET_FROM_ID (charset_id_3);
3680 ONE_MORE_BYTE (c1); 3794 ONE_MORE_BYTE (c1);
3681 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)) 3795 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)
3796 || (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3797 && ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LEVEL_4)
3798 ? c1 >= 0x80 : c1 < 0x80)))
3682 goto invalid_code; 3799 goto invalid_code;
3683 break; 3800 break;
3684 3801
@@ -3890,6 +4007,14 @@ decode_coding_iso_2022 (struct coding_system *coding)
3890 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); 4007 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3891 char_offset++; 4008 char_offset++;
3892 coding->errors++; 4009 coding->errors++;
4010 /* Reset the invocation and designation status to the safest
4011 one; i.e. designate ASCII to the graphic register 0, and
4012 invoke that register to the graphic plane 0. This typically
4013 helps the case that an designation sequence for ASCII "ESC (
4014 B" is somehow broken (e.g. broken by a newline). */
4015 CODING_ISO_INVOCATION (coding, 0) = 0;
4016 CODING_ISO_DESIGNATION (coding, 0) = charset_ascii;
4017 charset_id_0 = charset_ascii;
3893 continue; 4018 continue;
3894 4019
3895 break_loop: 4020 break_loop:
@@ -4332,7 +4457,7 @@ encode_coding_iso_2022 (struct coding_system *coding)
4332 { 4457 {
4333 /* We have to produce designation sequences if any now. */ 4458 /* We have to produce designation sequences if any now. */
4334 unsigned char desig_buf[16]; 4459 unsigned char desig_buf[16];
4335 int nbytes; 4460 ptrdiff_t nbytes;
4336 ptrdiff_t offset; 4461 ptrdiff_t offset;
4337 4462
4338 charset_map_loaded = 0; 4463 charset_map_loaded = 0;
@@ -5075,7 +5200,7 @@ decode_coding_ccl (struct coding_system *coding)
5075 source_charbuf[i++] = *p++; 5200 source_charbuf[i++] = *p++;
5076 5201
5077 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK) 5202 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
5078 ccl->last_block = 1; 5203 ccl->last_block = true;
5079 /* As ccl_driver calls DECODE_CHAR, buffer may be relocated. */ 5204 /* As ccl_driver calls DECODE_CHAR, buffer may be relocated. */
5080 charset_map_loaded = 0; 5205 charset_map_loaded = 0;
5081 ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf, 5206 ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
@@ -5135,7 +5260,7 @@ encode_coding_ccl (struct coding_system *coding)
5135 CODING_GET_INFO (coding, attrs, charset_list); 5260 CODING_GET_INFO (coding, attrs, charset_list);
5136 if (coding->consumed_char == coding->src_chars 5261 if (coding->consumed_char == coding->src_chars
5137 && coding->mode & CODING_MODE_LAST_BLOCK) 5262 && coding->mode & CODING_MODE_LAST_BLOCK)
5138 ccl->last_block = 1; 5263 ccl->last_block = true;
5139 5264
5140 do 5265 do
5141 { 5266 {
@@ -5617,7 +5742,6 @@ setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
5617 eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id); 5742 eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
5618 5743
5619 coding->mode = 0; 5744 coding->mode = 0;
5620 coding->head_ascii = -1;
5621 if (VECTORP (eol_type)) 5745 if (VECTORP (eol_type))
5622 coding->common_flags = (CODING_REQUIRE_DECODING_MASK 5746 coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5623 | CODING_REQUIRE_DETECTION_MASK); 5747 | CODING_REQUIRE_DETECTION_MASK);
@@ -5638,6 +5762,7 @@ setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
5638 coding->safe_charsets = SDATA (val); 5762 coding->safe_charsets = SDATA (val);
5639 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs)); 5763 coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5640 coding->carryover_bytes = 0; 5764 coding->carryover_bytes = 0;
5765 coding->raw_destination = 0;
5641 5766
5642 coding_type = CODING_ATTR_TYPE (attrs); 5767 coding_type = CODING_ATTR_TYPE (attrs);
5643 if (EQ (coding_type, Qundecided)) 5768 if (EQ (coding_type, Qundecided))
@@ -5646,6 +5771,14 @@ setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
5646 coding->decoder = decode_coding_raw_text; 5771 coding->decoder = decode_coding_raw_text;
5647 coding->encoder = encode_coding_raw_text; 5772 coding->encoder = encode_coding_raw_text;
5648 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK; 5773 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5774 coding->spec.undecided.inhibit_nbd
5775 = (encode_inhibit_flag
5776 (AREF (attrs, coding_attr_undecided_inhibit_null_byte_detection)));
5777 coding->spec.undecided.inhibit_ied
5778 = (encode_inhibit_flag
5779 (AREF (attrs, coding_attr_undecided_inhibit_iso_escape_detection)));
5780 coding->spec.undecided.prefer_utf_8
5781 = ! NILP (AREF (attrs, coding_attr_undecided_prefer_utf_8));
5649 } 5782 }
5650 else if (EQ (coding_type, Qiso_2022)) 5783 else if (EQ (coding_type, Qiso_2022))
5651 { 5784 {
@@ -6069,10 +6202,181 @@ complement_process_encoding_system (Lisp_Object coding_system)
6069 6202
6070*/ 6203*/
6071 6204
6072#define EOL_SEEN_NONE 0 6205static Lisp_Object adjust_coding_eol_type (struct coding_system *coding,
6073#define EOL_SEEN_LF 1 6206 int eol_seen);
6074#define EOL_SEEN_CR 2 6207
6075#define EOL_SEEN_CRLF 4 6208
6209/* Return the number of ASCII characters at the head of the source.
6210 By side effects, set coding->head_ascii and update
6211 coding->eol_seen. The value of coding->eol_seen is "logical or" of
6212 EOL_SEEN_LF, EOL_SEEN_CR, and EOL_SEEN_CRLF, but the value is
6213 reliable only when all the source bytes are ASCII. */
6214
6215static ptrdiff_t
6216check_ascii (struct coding_system *coding)
6217{
6218 const unsigned char *src, *end;
6219 Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id);
6220 int eol_seen = coding->eol_seen;
6221
6222 coding_set_source (coding);
6223 src = coding->source;
6224 end = src + coding->src_bytes;
6225
6226 if (inhibit_eol_conversion
6227 || SYMBOLP (eol_type))
6228 {
6229 /* We don't have to check EOL format. */
6230 while (src < end && !( *src & 0x80))
6231 {
6232 if (*src++ == '\n')
6233 eol_seen |= EOL_SEEN_LF;
6234 }
6235 }
6236 else
6237 {
6238 end--; /* We look ahead one byte for "CR LF". */
6239 while (src < end)
6240 {
6241 int c = *src;
6242
6243 if (c & 0x80)
6244 break;
6245 src++;
6246 if (c == '\r')
6247 {
6248 if (*src == '\n')
6249 {
6250 eol_seen |= EOL_SEEN_CRLF;
6251 src++;
6252 }
6253 else
6254 eol_seen |= EOL_SEEN_CR;
6255 }
6256 else if (c == '\n')
6257 eol_seen |= EOL_SEEN_LF;
6258 }
6259 if (src == end)
6260 {
6261 int c = *src;
6262
6263 /* All bytes but the last one C are ASCII. */
6264 if (! (c & 0x80))
6265 {
6266 if (c == '\r')
6267 eol_seen |= EOL_SEEN_CR;
6268 else if (c == '\n')
6269 eol_seen |= EOL_SEEN_LF;
6270 src++;
6271 }
6272 }
6273 }
6274 coding->head_ascii = src - coding->source;
6275 coding->eol_seen = eol_seen;
6276 return (coding->head_ascii);
6277}
6278
6279
6280/* Return the number of characters at the source if all the bytes are
6281 valid UTF-8 (of Unicode range). Otherwise, return -1. By side
6282 effects, update coding->eol_seen. The value of coding->eol_seen is
6283 "logical or" of EOL_SEEN_LF, EOL_SEEN_CR, and EOL_SEEN_CRLF, but
6284 the value is reliable only when all the source bytes are valid
6285 UTF-8. */
6286
6287static ptrdiff_t
6288check_utf_8 (struct coding_system *coding)
6289{
6290 const unsigned char *src, *end;
6291 int eol_seen;
6292 ptrdiff_t nchars = coding->head_ascii;
6293
6294 if (coding->head_ascii < 0)
6295 check_ascii (coding);
6296 else
6297 coding_set_source (coding);
6298 src = coding->source + coding->head_ascii;
6299 /* We look ahead one byte for CR LF. */
6300 end = coding->source + coding->src_bytes - 1;
6301 eol_seen = coding->eol_seen;
6302 while (src < end)
6303 {
6304 int c = *src;
6305
6306 if (UTF_8_1_OCTET_P (*src))
6307 {
6308 src++;
6309 if (c < 0x20)
6310 {
6311 if (c == '\r')
6312 {
6313 if (*src == '\n')
6314 {
6315 eol_seen |= EOL_SEEN_CRLF;
6316 src++;
6317 nchars++;
6318 }
6319 else
6320 eol_seen |= EOL_SEEN_CR;
6321 }
6322 else if (c == '\n')
6323 eol_seen |= EOL_SEEN_LF;
6324 }
6325 }
6326 else if (UTF_8_2_OCTET_LEADING_P (c))
6327 {
6328 if (c < 0xC2 /* overlong sequence */
6329 || src + 1 >= end
6330 || ! UTF_8_EXTRA_OCTET_P (src[1]))
6331 return -1;
6332 src += 2;
6333 }
6334 else if (UTF_8_3_OCTET_LEADING_P (c))
6335 {
6336 if (src + 2 >= end
6337 || ! (UTF_8_EXTRA_OCTET_P (src[1])
6338 && UTF_8_EXTRA_OCTET_P (src[2])))
6339 return -1;
6340 c = (((c & 0xF) << 12)
6341 | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F));
6342 if (c < 0x800 /* overlong sequence */
6343 || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
6344 return -1;
6345 src += 3;
6346 }
6347 else if (UTF_8_4_OCTET_LEADING_P (c))
6348 {
6349 if (src + 3 >= end
6350 || ! (UTF_8_EXTRA_OCTET_P (src[1])
6351 && UTF_8_EXTRA_OCTET_P (src[2])
6352 && UTF_8_EXTRA_OCTET_P (src[3])))
6353 return -1;
6354 c = (((c & 0x7) << 18) | ((src[1] & 0x3F) << 12)
6355 | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F));
6356 if (c < 0x10000 /* overlong sequence */
6357 || c >= 0x110000) /* non-Unicode character */
6358 return -1;
6359 src += 4;
6360 }
6361 else
6362 return -1;
6363 nchars++;
6364 }
6365
6366 if (src == end)
6367 {
6368 if (! UTF_8_1_OCTET_P (*src))
6369 return -1;
6370 nchars++;
6371 if (*src == '\r')
6372 eol_seen |= EOL_SEEN_CR;
6373 else if (*src == '\n')
6374 eol_seen |= EOL_SEEN_LF;
6375 }
6376 coding->eol_seen = eol_seen;
6377 return nchars;
6378}
6379
6076 6380
6077/* Detect how end-of-line of a text of length SRC_BYTES pointed by 6381/* Detect how end-of-line of a text of length SRC_BYTES pointed by
6078 SOURCE is encoded. If CATEGORY is one of 6382 SOURCE is encoded. If CATEGORY is one of
@@ -6185,6 +6489,9 @@ adjust_coding_eol_type (struct coding_system *coding, int eol_seen)
6185 Lisp_Object eol_type; 6489 Lisp_Object eol_type;
6186 6490
6187 eol_type = CODING_ID_EOL_TYPE (coding->id); 6491 eol_type = CODING_ID_EOL_TYPE (coding->id);
6492 if (! VECTORP (eol_type))
6493 /* Already adjusted. */
6494 return eol_type;
6188 if (eol_seen & EOL_SEEN_LF) 6495 if (eol_seen & EOL_SEEN_LF)
6189 { 6496 {
6190 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0)); 6497 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
@@ -6212,14 +6519,16 @@ detect_coding (struct coding_system *coding)
6212{ 6519{
6213 const unsigned char *src, *src_end; 6520 const unsigned char *src, *src_end;
6214 unsigned int saved_mode = coding->mode; 6521 unsigned int saved_mode = coding->mode;
6522 Lisp_Object found = Qnil;
6523 Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id);
6215 6524
6216 coding->consumed = coding->consumed_char = 0; 6525 coding->consumed = coding->consumed_char = 0;
6217 coding->produced = coding->produced_char = 0; 6526 coding->produced = coding->produced_char = 0;
6218 coding_set_source (coding); 6527 coding_set_source (coding);
6219 6528
6220 src_end = coding->source + coding->src_bytes; 6529 src_end = coding->source + coding->src_bytes;
6221 coding->head_ascii = 0;
6222 6530
6531 coding->eol_seen = EOL_SEEN_NONE;
6223 /* If we have not yet decided the text encoding type, detect it 6532 /* If we have not yet decided the text encoding type, detect it
6224 now. */ 6533 now. */
6225 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided)) 6534 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
@@ -6227,7 +6536,13 @@ detect_coding (struct coding_system *coding)
6227 int c, i; 6536 int c, i;
6228 struct coding_detection_info detect_info; 6537 struct coding_detection_info detect_info;
6229 bool null_byte_found = 0, eight_bit_found = 0; 6538 bool null_byte_found = 0, eight_bit_found = 0;
6539 bool inhibit_nbd = inhibit_flag (coding->spec.undecided.inhibit_nbd,
6540 inhibit_null_byte_detection);
6541 bool inhibit_ied = inhibit_flag (coding->spec.undecided.inhibit_ied,
6542 inhibit_iso_escape_detection);
6543 bool prefer_utf_8 = coding->spec.undecided.prefer_utf_8;
6230 6544
6545 coding->head_ascii = 0;
6231 detect_info.checked = detect_info.found = detect_info.rejected = 0; 6546 detect_info.checked = detect_info.found = detect_info.rejected = 0;
6232 for (src = coding->source; src < src_end; src++) 6547 for (src = coding->source; src < src_end; src++)
6233 { 6548 {
@@ -6241,7 +6556,7 @@ detect_coding (struct coding_system *coding)
6241 else if (c < 0x20) 6556 else if (c < 0x20)
6242 { 6557 {
6243 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) 6558 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
6244 && ! inhibit_iso_escape_detection 6559 && ! inhibit_ied
6245 && ! detect_info.checked) 6560 && ! detect_info.checked)
6246 { 6561 {
6247 if (detect_coding_iso_2022 (coding, &detect_info)) 6562 if (detect_coding_iso_2022 (coding, &detect_info))
@@ -6260,12 +6575,33 @@ detect_coding (struct coding_system *coding)
6260 break; 6575 break;
6261 } 6576 }
6262 } 6577 }
6263 else if (! c && !inhibit_null_byte_detection) 6578 else if (! c && !inhibit_nbd)
6264 { 6579 {
6265 null_byte_found = 1; 6580 null_byte_found = 1;
6266 if (eight_bit_found) 6581 if (eight_bit_found)
6267 break; 6582 break;
6268 } 6583 }
6584 else if (! disable_ascii_optimization
6585 && ! inhibit_eol_conversion)
6586 {
6587 if (c == '\r')
6588 {
6589 if (src < src_end && src[1] == '\n')
6590 {
6591 coding->eol_seen |= EOL_SEEN_CRLF;
6592 src++;
6593 if (! eight_bit_found)
6594 coding->head_ascii++;
6595 }
6596 else
6597 coding->eol_seen |= EOL_SEEN_CR;
6598 }
6599 else if (c == '\n')
6600 {
6601 coding->eol_seen |= EOL_SEEN_LF;
6602 }
6603 }
6604
6269 if (! eight_bit_found) 6605 if (! eight_bit_found)
6270 coding->head_ascii++; 6606 coding->head_ascii++;
6271 } 6607 }
@@ -6296,6 +6632,12 @@ detect_coding (struct coding_system *coding)
6296 detect_info.checked |= ~CATEGORY_MASK_UTF_16; 6632 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6297 detect_info.rejected |= ~CATEGORY_MASK_UTF_16; 6633 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
6298 } 6634 }
6635 else if (prefer_utf_8
6636 && detect_coding_utf_8 (coding, &detect_info))
6637 {
6638 detect_info.checked |= ~CATEGORY_MASK_UTF_8;
6639 detect_info.rejected |= ~CATEGORY_MASK_UTF_8;
6640 }
6299 for (i = 0; i < coding_category_raw_text; i++) 6641 for (i = 0; i < coding_category_raw_text; i++)
6300 { 6642 {
6301 category = coding_priorities[i]; 6643 category = coding_priorities[i];
@@ -6317,32 +6659,58 @@ detect_coding (struct coding_system *coding)
6317 } 6659 }
6318 else if ((*(this->detector)) (coding, &detect_info) 6660 else if ((*(this->detector)) (coding, &detect_info)
6319 && detect_info.found & (1 << category)) 6661 && detect_info.found & (1 << category))
6320 { 6662 break;
6321 if (category == coding_category_utf_16_auto)
6322 {
6323 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6324 category = coding_category_utf_16_le;
6325 else
6326 category = coding_category_utf_16_be;
6327 }
6328 break;
6329 }
6330 } 6663 }
6331 } 6664 }
6332 6665
6333 if (i < coding_category_raw_text) 6666 if (i < coding_category_raw_text)
6334 setup_coding_system (CODING_ID_NAME (this->id), coding); 6667 {
6668 if (category == coding_category_utf_8_auto)
6669 {
6670 Lisp_Object coding_systems;
6671
6672 coding_systems = AREF (CODING_ID_ATTRS (this->id),
6673 coding_attr_utf_bom);
6674 if (CONSP (coding_systems))
6675 {
6676 if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6677 found = XCAR (coding_systems);
6678 else
6679 found = XCDR (coding_systems);
6680 }
6681 else
6682 found = CODING_ID_NAME (this->id);
6683 }
6684 else if (category == coding_category_utf_16_auto)
6685 {
6686 Lisp_Object coding_systems;
6687
6688 coding_systems = AREF (CODING_ID_ATTRS (this->id),
6689 coding_attr_utf_bom);
6690 if (CONSP (coding_systems))
6691 {
6692 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6693 found = XCAR (coding_systems);
6694 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6695 found = XCDR (coding_systems);
6696 }
6697 else
6698 found = CODING_ID_NAME (this->id);
6699 }
6700 else
6701 found = CODING_ID_NAME (this->id);
6702 }
6335 else if (null_byte_found) 6703 else if (null_byte_found)
6336 setup_coding_system (Qno_conversion, coding); 6704 found = Qno_conversion;
6337 else if ((detect_info.rejected & CATEGORY_MASK_ANY) 6705 else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6338 == CATEGORY_MASK_ANY) 6706 == CATEGORY_MASK_ANY)
6339 setup_coding_system (Qraw_text, coding); 6707 found = Qraw_text;
6340 else if (detect_info.rejected) 6708 else if (detect_info.rejected)
6341 for (i = 0; i < coding_category_raw_text; i++) 6709 for (i = 0; i < coding_category_raw_text; i++)
6342 if (! (detect_info.rejected & (1 << coding_priorities[i]))) 6710 if (! (detect_info.rejected & (1 << coding_priorities[i])))
6343 { 6711 {
6344 this = coding_categories + coding_priorities[i]; 6712 this = coding_categories + coding_priorities[i];
6345 setup_coding_system (CODING_ID_NAME (this->id), coding); 6713 found = CODING_ID_NAME (this->id);
6346 break; 6714 break;
6347 } 6715 }
6348 } 6716 }
@@ -6356,14 +6724,21 @@ detect_coding (struct coding_system *coding)
6356 coding_systems 6724 coding_systems
6357 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom); 6725 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6358 detect_info.found = detect_info.rejected = 0; 6726 detect_info.found = detect_info.rejected = 0;
6359 coding->head_ascii = 0; 6727 if (check_ascii (coding) == coding->src_bytes)
6360 if (CONSP (coding_systems)
6361 && detect_coding_utf_8 (coding, &detect_info))
6362 { 6728 {
6363 if (detect_info.found & CATEGORY_MASK_UTF_8_SIG) 6729 if (CONSP (coding_systems))
6364 setup_coding_system (XCAR (coding_systems), coding); 6730 found = XCDR (coding_systems);
6365 else 6731 }
6366 setup_coding_system (XCDR (coding_systems), coding); 6732 else
6733 {
6734 if (CONSP (coding_systems)
6735 && detect_coding_utf_8 (coding, &detect_info))
6736 {
6737 if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6738 found = XCAR (coding_systems);
6739 else
6740 found = XCDR (coding_systems);
6741 }
6367 } 6742 }
6368 } 6743 }
6369 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id))) 6744 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
@@ -6380,11 +6755,24 @@ detect_coding (struct coding_system *coding)
6380 && detect_coding_utf_16 (coding, &detect_info)) 6755 && detect_coding_utf_16 (coding, &detect_info))
6381 { 6756 {
6382 if (detect_info.found & CATEGORY_MASK_UTF_16_LE) 6757 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6383 setup_coding_system (XCAR (coding_systems), coding); 6758 found = XCAR (coding_systems);
6384 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE) 6759 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6385 setup_coding_system (XCDR (coding_systems), coding); 6760 found = XCDR (coding_systems);
6386 } 6761 }
6387 } 6762 }
6763
6764 if (! NILP (found))
6765 {
6766 int specified_eol = (VECTORP (eol_type) ? EOL_SEEN_NONE
6767 : EQ (eol_type, Qdos) ? EOL_SEEN_CRLF
6768 : EQ (eol_type, Qmac) ? EOL_SEEN_CR
6769 : EOL_SEEN_LF);
6770
6771 setup_coding_system (found, coding);
6772 if (specified_eol != EOL_SEEN_NONE)
6773 adjust_coding_eol_type (coding, specified_eol);
6774 }
6775
6388 coding->mode = saved_mode; 6776 coding->mode = saved_mode;
6389} 6777}
6390 6778
@@ -6525,11 +6913,9 @@ get_translation_table (Lisp_Object attrs, bool encodep, int *max_lookup)
6525 if (CHAR_TABLE_P (standard)) 6913 if (CHAR_TABLE_P (standard))
6526 { 6914 {
6527 if (CONSP (translation_table)) 6915 if (CONSP (translation_table))
6528 translation_table = nconc2 (translation_table, 6916 translation_table = nconc2 (translation_table, list1 (standard));
6529 Fcons (standard, Qnil));
6530 else 6917 else
6531 translation_table = Fcons (translation_table, 6918 translation_table = list2 (translation_table, standard);
6532 Fcons (standard, Qnil));
6533 } 6919 }
6534 } 6920 }
6535 6921
@@ -6813,7 +7199,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table,
6813 7199
6814 produced = dst - (coding->destination + coding->produced); 7200 produced = dst - (coding->destination + coding->produced);
6815 if (BUFFERP (coding->dst_object) && produced_chars > 0) 7201 if (BUFFERP (coding->dst_object) && produced_chars > 0)
6816 insert_from_gap (produced_chars, produced); 7202 insert_from_gap (produced_chars, produced, 0);
6817 coding->produced += produced; 7203 coding->produced += produced;
6818 coding->produced_char += produced_chars; 7204 coding->produced_char += produced_chars;
6819 return carryover; 7205 return carryover;
@@ -6884,22 +7270,8 @@ produce_charset (struct coding_system *coding, int *charbuf, ptrdiff_t pos)
6884 7270
6885#define ALLOC_CONVERSION_WORK_AREA(coding) \ 7271#define ALLOC_CONVERSION_WORK_AREA(coding) \
6886 do { \ 7272 do { \
6887 int size = CHARBUF_SIZE; \ 7273 coding->charbuf = SAFE_ALLOCA (CHARBUF_SIZE * sizeof (int)); \
6888 \ 7274 coding->charbuf_size = CHARBUF_SIZE; \
6889 coding->charbuf = NULL; \
6890 while (size > 1024) \
6891 { \
6892 coding->charbuf = alloca (sizeof (int) * size); \
6893 if (coding->charbuf) \
6894 break; \
6895 size >>= 1; \
6896 } \
6897 if (! coding->charbuf) \
6898 { \
6899 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
6900 return; \
6901 } \
6902 coding->charbuf_size = size; \
6903 } while (0) 7275 } while (0)
6904 7276
6905 7277
@@ -6968,6 +7340,8 @@ decode_coding (struct coding_system *coding)
6968 int carryover; 7340 int carryover;
6969 int i; 7341 int i;
6970 7342
7343 USE_SAFE_ALLOCA;
7344
6971 if (BUFFERP (coding->src_object) 7345 if (BUFFERP (coding->src_object)
6972 && coding->src_pos > 0 7346 && coding->src_pos > 0
6973 && coding->src_pos < GPT 7347 && coding->src_pos < GPT
@@ -7041,7 +7415,7 @@ decode_coding (struct coding_system *coding)
7041 coding->carryover_bytes = 0; 7415 coding->carryover_bytes = 0;
7042 if (coding->consumed < coding->src_bytes) 7416 if (coding->consumed < coding->src_bytes)
7043 { 7417 {
7044 int nbytes = coding->src_bytes - coding->consumed; 7418 ptrdiff_t nbytes = coding->src_bytes - coding->consumed;
7045 const unsigned char *src; 7419 const unsigned char *src;
7046 7420
7047 coding_set_source (coding); 7421 coding_set_source (coding);
@@ -7090,6 +7464,8 @@ decode_coding (struct coding_system *coding)
7090 bset_undo_list (current_buffer, undo_list); 7464 bset_undo_list (current_buffer, undo_list);
7091 record_insert (coding->dst_pos, coding->produced_char); 7465 record_insert (coding->dst_pos, coding->produced_char);
7092 } 7466 }
7467
7468 SAFE_FREE ();
7093} 7469}
7094 7470
7095 7471
@@ -7123,7 +7499,7 @@ handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit,
7123 /* We found a composition. Store the corresponding 7499 /* We found a composition. Store the corresponding
7124 annotation data in BUF. */ 7500 annotation data in BUF. */
7125 int *head = buf; 7501 int *head = buf;
7126 enum composition_method method = COMPOSITION_METHOD (prop); 7502 enum composition_method method = composition_method (prop);
7127 int nchars = COMPOSITION_LENGTH (prop); 7503 int nchars = COMPOSITION_LENGTH (prop);
7128 7504
7129 ADD_COMPOSITION_DATA (buf, nchars, 0, method); 7505 ADD_COMPOSITION_DATA (buf, nchars, 0, method);
@@ -7373,6 +7749,8 @@ encode_coding (struct coding_system *coding)
7373 int max_lookup; 7749 int max_lookup;
7374 struct ccl_spec cclspec; 7750 struct ccl_spec cclspec;
7375 7751
7752 USE_SAFE_ALLOCA;
7753
7376 attrs = CODING_ID_ATTRS (coding->id); 7754 attrs = CODING_ID_ATTRS (coding->id);
7377 if (coding->encoder == encode_coding_raw_text) 7755 if (coding->encoder == encode_coding_raw_text)
7378 translation_table = Qnil, max_lookup = 0; 7756 translation_table = Qnil, max_lookup = 0;
@@ -7406,7 +7784,9 @@ encode_coding (struct coding_system *coding)
7406 } while (coding->consumed_char < coding->src_chars); 7784 } while (coding->consumed_char < coding->src_chars);
7407 7785
7408 if (BUFFERP (coding->dst_object) && coding->produced_char > 0) 7786 if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
7409 insert_from_gap (coding->produced_char, coding->produced); 7787 insert_from_gap (coding->produced_char, coding->produced, 0);
7788
7789 SAFE_FREE ();
7410} 7790}
7411 7791
7412 7792
@@ -7460,7 +7840,7 @@ make_conversion_work_buffer (bool multibyte)
7460} 7840}
7461 7841
7462 7842
7463static Lisp_Object 7843static void
7464code_conversion_restore (Lisp_Object arg) 7844code_conversion_restore (Lisp_Object arg)
7465{ 7845{
7466 Lisp_Object current, workbuf; 7846 Lisp_Object current, workbuf;
@@ -7478,7 +7858,6 @@ code_conversion_restore (Lisp_Object arg)
7478 } 7858 }
7479 set_buffer_internal (XBUFFER (current)); 7859 set_buffer_internal (XBUFFER (current));
7480 UNGCPRO; 7860 UNGCPRO;
7481 return Qnil;
7482} 7861}
7483 7862
7484Lisp_Object 7863Lisp_Object
@@ -7500,8 +7879,6 @@ decode_coding_gap (struct coding_system *coding,
7500 ptrdiff_t count = SPECPDL_INDEX (); 7879 ptrdiff_t count = SPECPDL_INDEX ();
7501 Lisp_Object attrs; 7880 Lisp_Object attrs;
7502 7881
7503 code_conversion_save (0, 0);
7504
7505 coding->src_object = Fcurrent_buffer (); 7882 coding->src_object = Fcurrent_buffer ();
7506 coding->src_chars = chars; 7883 coding->src_chars = chars;
7507 coding->src_bytes = bytes; 7884 coding->src_bytes = bytes;
@@ -7513,15 +7890,96 @@ decode_coding_gap (struct coding_system *coding,
7513 coding->dst_pos_byte = PT_BYTE; 7890 coding->dst_pos_byte = PT_BYTE;
7514 coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); 7891 coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
7515 7892
7893 coding->head_ascii = -1;
7894 coding->detected_utf8_bytes = coding->detected_utf8_chars = -1;
7895 coding->eol_seen = EOL_SEEN_NONE;
7516 if (CODING_REQUIRE_DETECTION (coding)) 7896 if (CODING_REQUIRE_DETECTION (coding))
7517 detect_coding (coding); 7897 detect_coding (coding);
7898 attrs = CODING_ID_ATTRS (coding->id);
7899 if (! disable_ascii_optimization
7900 && ! coding->src_multibyte
7901 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
7902 && NILP (CODING_ATTR_POST_READ (attrs))
7903 && NILP (get_translation_table (attrs, 0, NULL)))
7904 {
7905 chars = coding->head_ascii;
7906 if (chars < 0)
7907 chars = check_ascii (coding);
7908 if (chars != bytes)
7909 {
7910 /* There exists a non-ASCII byte. */
7911 if (EQ (CODING_ATTR_TYPE (attrs), Qutf_8)
7912 && coding->detected_utf8_bytes == coding->src_bytes)
7913 {
7914 if (coding->detected_utf8_chars >= 0)
7915 chars = coding->detected_utf8_chars;
7916 else
7917 chars = check_utf_8 (coding);
7918 if (CODING_UTF_8_BOM (coding) != utf_without_bom
7919 && coding->head_ascii == 0
7920 && coding->source[0] == UTF_8_BOM_1
7921 && coding->source[1] == UTF_8_BOM_2
7922 && coding->source[2] == UTF_8_BOM_3)
7923 {
7924 chars--;
7925 bytes -= 3;
7926 coding->src_bytes -= 3;
7927 }
7928 }
7929 else
7930 chars = -1;
7931 }
7932 if (chars >= 0)
7933 {
7934 Lisp_Object eol_type;
7935
7936 eol_type = CODING_ID_EOL_TYPE (coding->id);
7937 if (VECTORP (eol_type))
7938 {
7939 if (coding->eol_seen != EOL_SEEN_NONE)
7940 eol_type = adjust_coding_eol_type (coding, coding->eol_seen);
7941 }
7942 if (EQ (eol_type, Qmac))
7943 {
7944 unsigned char *src_end = GAP_END_ADDR;
7945 unsigned char *src = src_end - coding->src_bytes;
7946
7947 while (src < src_end)
7948 {
7949 if (*src++ == '\r')
7950 src[-1] = '\n';
7951 }
7952 }
7953 else if (EQ (eol_type, Qdos))
7954 {
7955 unsigned char *src = GAP_END_ADDR;
7956 unsigned char *src_beg = src - coding->src_bytes;
7957 unsigned char *dst = src;
7958 ptrdiff_t diff;
7959
7960 while (src_beg < src)
7961 {
7962 *--dst = *--src;
7963 if (*src == '\n' && src > src_beg && src[-1] == '\r')
7964 src--;
7965 }
7966 diff = dst - src;
7967 bytes -= diff;
7968 chars -= diff;
7969 }
7970 coding->produced = bytes;
7971 coding->produced_char = chars;
7972 insert_from_gap (chars, bytes, 1);
7973 return;
7974 }
7975 }
7976 code_conversion_save (0, 0);
7518 7977
7519 coding->mode |= CODING_MODE_LAST_BLOCK; 7978 coding->mode |= CODING_MODE_LAST_BLOCK;
7520 current_buffer->text->inhibit_shrinking = 1; 7979 current_buffer->text->inhibit_shrinking = 1;
7521 decode_coding (coding); 7980 decode_coding (coding);
7522 current_buffer->text->inhibit_shrinking = 0; 7981 current_buffer->text->inhibit_shrinking = 0;
7523 7982
7524 attrs = CODING_ID_ATTRS (coding->id);
7525 if (! NILP (CODING_ATTR_POST_READ (attrs))) 7983 if (! NILP (CODING_ATTR_POST_READ (attrs)))
7526 { 7984 {
7527 ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE; 7985 ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE;
@@ -7695,14 +8153,8 @@ decode_coding_object (struct coding_system *coding,
7695 set_buffer_internal (XBUFFER (coding->dst_object)); 8153 set_buffer_internal (XBUFFER (coding->dst_object));
7696 if (dst_bytes < coding->produced) 8154 if (dst_bytes < coding->produced)
7697 { 8155 {
8156 eassert (coding->produced > 0);
7698 destination = xrealloc (destination, coding->produced); 8157 destination = xrealloc (destination, coding->produced);
7699 if (! destination)
7700 {
7701 record_conversion_result (coding,
7702 CODING_RESULT_INSUFFICIENT_MEM);
7703 unbind_to (count, Qnil);
7704 return;
7705 }
7706 if (BEGV < GPT && GPT < BEGV + coding->produced_char) 8158 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7707 move_gap_both (BEGV, BEGV_BYTE); 8159 move_gap_both (BEGV, BEGV_BYTE);
7708 memcpy (destination, BEGV_ADDR, coding->produced); 8160 memcpy (destination, BEGV_ADDR, coding->produced);
@@ -7903,6 +8355,11 @@ encode_coding_object (struct coding_system *coding,
7903 { 8355 {
7904 if (BUFFERP (coding->dst_object)) 8356 if (BUFFERP (coding->dst_object))
7905 coding->dst_object = Fbuffer_string (); 8357 coding->dst_object = Fbuffer_string ();
8358 else if (coding->raw_destination)
8359 /* This is used to avoid creating huge Lisp string.
8360 NOTE: caller who sets `raw_destination' is also
8361 responsible for freeing `destination' buffer. */
8362 coding->dst_object = Qnil;
7906 else 8363 else
7907 { 8364 {
7908 coding->dst_object 8365 coding->dst_object
@@ -7985,11 +8442,21 @@ from_unicode (Lisp_Object str)
7985 return code_convert_string_norecord (str, Qutf_16le, 0); 8442 return code_convert_string_norecord (str, Qutf_16le, 0);
7986} 8443}
7987 8444
8445Lisp_Object
8446from_unicode_buffer (const wchar_t* wstr)
8447{
8448 return from_unicode (
8449 make_unibyte_string (
8450 (char*) wstr,
8451 /* we get one of the two final 0 bytes for free. */
8452 1 + sizeof (wchar_t) * wcslen (wstr)));
8453}
8454
7988wchar_t * 8455wchar_t *
7989to_unicode (Lisp_Object str, Lisp_Object *buf) 8456to_unicode (Lisp_Object str, Lisp_Object *buf)
7990{ 8457{
7991 *buf = code_convert_string_norecord (str, Qutf_16le, 1); 8458 *buf = code_convert_string_norecord (str, Qutf_16le, 1);
7992 /* We need to make a another copy (in addition to the one made by 8459 /* We need to make another copy (in addition to the one made by
7993 code_convert_string_norecord) to ensure that the final string is 8460 code_convert_string_norecord) to ensure that the final string is
7994 _doubly_ zero terminated --- that is, that the string is 8461 _doubly_ zero terminated --- that is, that the string is
7995 terminated by two zero bytes and one utf-16le null character. 8462 terminated by two zero bytes and one utf-16le null character.
@@ -8135,6 +8602,11 @@ detect_coding_system (const unsigned char *src,
8135 enum coding_category category IF_LINT (= 0); 8602 enum coding_category category IF_LINT (= 0);
8136 struct coding_system *this IF_LINT (= NULL); 8603 struct coding_system *this IF_LINT (= NULL);
8137 int c, i; 8604 int c, i;
8605 bool inhibit_nbd = inhibit_flag (coding.spec.undecided.inhibit_nbd,
8606 inhibit_null_byte_detection);
8607 bool inhibit_ied = inhibit_flag (coding.spec.undecided.inhibit_ied,
8608 inhibit_iso_escape_detection);
8609 bool prefer_utf_8 = coding.spec.undecided.prefer_utf_8;
8138 8610
8139 /* Skip all ASCII bytes except for a few ISO2022 controls. */ 8611 /* Skip all ASCII bytes except for a few ISO2022 controls. */
8140 for (; src < src_end; src++) 8612 for (; src < src_end; src++)
@@ -8149,7 +8621,7 @@ detect_coding_system (const unsigned char *src,
8149 else if (c < 0x20) 8621 else if (c < 0x20)
8150 { 8622 {
8151 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) 8623 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
8152 && ! inhibit_iso_escape_detection 8624 && ! inhibit_ied
8153 && ! detect_info.checked) 8625 && ! detect_info.checked)
8154 { 8626 {
8155 if (detect_coding_iso_2022 (&coding, &detect_info)) 8627 if (detect_coding_iso_2022 (&coding, &detect_info))
@@ -8168,7 +8640,7 @@ detect_coding_system (const unsigned char *src,
8168 break; 8640 break;
8169 } 8641 }
8170 } 8642 }
8171 else if (! c && !inhibit_null_byte_detection) 8643 else if (! c && !inhibit_nbd)
8172 { 8644 {
8173 null_byte_found = 1; 8645 null_byte_found = 1;
8174 if (eight_bit_found) 8646 if (eight_bit_found)
@@ -8201,6 +8673,12 @@ detect_coding_system (const unsigned char *src,
8201 detect_info.checked |= ~CATEGORY_MASK_UTF_16; 8673 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
8202 detect_info.rejected |= ~CATEGORY_MASK_UTF_16; 8674 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
8203 } 8675 }
8676 else if (prefer_utf_8
8677 && detect_coding_utf_8 (&coding, &detect_info))
8678 {
8679 detect_info.checked |= ~CATEGORY_MASK_UTF_8;
8680 detect_info.rejected |= ~CATEGORY_MASK_UTF_8;
8681 }
8204 for (i = 0; i < coding_category_raw_text; i++) 8682 for (i = 0; i < coding_category_raw_text; i++)
8205 { 8683 {
8206 category = coding_priorities[i]; 8684 category = coding_priorities[i];
@@ -8241,20 +8719,20 @@ detect_coding_system (const unsigned char *src,
8241 { 8719 {
8242 detect_info.found = CATEGORY_MASK_RAW_TEXT; 8720 detect_info.found = CATEGORY_MASK_RAW_TEXT;
8243 id = CODING_SYSTEM_ID (Qno_conversion); 8721 id = CODING_SYSTEM_ID (Qno_conversion);
8244 val = Fcons (make_number (id), Qnil); 8722 val = list1 (make_number (id));
8245 } 8723 }
8246 else if (! detect_info.rejected && ! detect_info.found) 8724 else if (! detect_info.rejected && ! detect_info.found)
8247 { 8725 {
8248 detect_info.found = CATEGORY_MASK_ANY; 8726 detect_info.found = CATEGORY_MASK_ANY;
8249 id = coding_categories[coding_category_undecided].id; 8727 id = coding_categories[coding_category_undecided].id;
8250 val = Fcons (make_number (id), Qnil); 8728 val = list1 (make_number (id));
8251 } 8729 }
8252 else if (highest) 8730 else if (highest)
8253 { 8731 {
8254 if (detect_info.found) 8732 if (detect_info.found)
8255 { 8733 {
8256 detect_info.found = 1 << category; 8734 detect_info.found = 1 << category;
8257 val = Fcons (make_number (this->id), Qnil); 8735 val = list1 (make_number (this->id));
8258 } 8736 }
8259 else 8737 else
8260 for (i = 0; i < coding_category_raw_text; i++) 8738 for (i = 0; i < coding_category_raw_text; i++)
@@ -8262,7 +8740,7 @@ detect_coding_system (const unsigned char *src,
8262 { 8740 {
8263 detect_info.found = 1 << coding_priorities[i]; 8741 detect_info.found = 1 << coding_priorities[i];
8264 id = coding_categories[coding_priorities[i]].id; 8742 id = coding_categories[coding_priorities[i]].id;
8265 val = Fcons (make_number (id), Qnil); 8743 val = list1 (make_number (id));
8266 break; 8744 break;
8267 } 8745 }
8268 } 8746 }
@@ -8279,7 +8757,7 @@ detect_coding_system (const unsigned char *src,
8279 found |= 1 << category; 8757 found |= 1 << category;
8280 id = coding_categories[category].id; 8758 id = coding_categories[category].id;
8281 if (id >= 0) 8759 if (id >= 0)
8282 val = Fcons (make_number (id), val); 8760 val = list1 (make_number (id));
8283 } 8761 }
8284 } 8762 }
8285 for (i = coding_category_raw_text - 1; i >= 0; i--) 8763 for (i = coding_category_raw_text - 1; i >= 0; i--)
@@ -8304,7 +8782,7 @@ detect_coding_system (const unsigned char *src,
8304 this = coding_categories + coding_category_utf_8_sig; 8782 this = coding_categories + coding_category_utf_8_sig;
8305 else 8783 else
8306 this = coding_categories + coding_category_utf_8_nosig; 8784 this = coding_categories + coding_category_utf_8_nosig;
8307 val = Fcons (make_number (this->id), Qnil); 8785 val = list1 (make_number (this->id));
8308 } 8786 }
8309 } 8787 }
8310 else if (base_category == coding_category_utf_16_auto) 8788 else if (base_category == coding_category_utf_16_auto)
@@ -8321,13 +8799,13 @@ detect_coding_system (const unsigned char *src,
8321 this = coding_categories + coding_category_utf_16_be_nosig; 8799 this = coding_categories + coding_category_utf_16_be_nosig;
8322 else 8800 else
8323 this = coding_categories + coding_category_utf_16_le_nosig; 8801 this = coding_categories + coding_category_utf_16_le_nosig;
8324 val = Fcons (make_number (this->id), Qnil); 8802 val = list1 (make_number (this->id));
8325 } 8803 }
8326 } 8804 }
8327 else 8805 else
8328 { 8806 {
8329 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs)); 8807 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
8330 val = Fcons (make_number (coding.id), Qnil); 8808 val = list1 (make_number (coding.id));
8331 } 8809 }
8332 8810
8333 /* Then, detect eol-format if necessary. */ 8811 /* Then, detect eol-format if necessary. */
@@ -8539,8 +9017,7 @@ DEFUN ("find-coding-systems-region-internal",
8539 Lisp_Object attrs; 9017 Lisp_Object attrs;
8540 9018
8541 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0); 9019 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8542 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs)) 9020 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs)))
8543 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
8544 { 9021 {
8545 ASET (attrs, coding_attr_trans_tbl, 9022 ASET (attrs, coding_attr_trans_tbl,
8546 get_translation_table (attrs, 1, NULL)); 9023 get_translation_table (attrs, 1, NULL));
@@ -8799,7 +9276,7 @@ is nil. */)
8799 attrs = AREF (CODING_SYSTEM_SPEC (elt), 0); 9276 attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8800 ASET (attrs, coding_attr_trans_tbl, 9277 ASET (attrs, coding_attr_trans_tbl,
8801 get_translation_table (attrs, 1, NULL)); 9278 get_translation_table (attrs, 1, NULL));
8802 list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list); 9279 list = Fcons (list2 (elt, attrs), list);
8803 } 9280 }
8804 9281
8805 if (STRINGP (start)) 9282 if (STRINGP (start))
@@ -8883,6 +9360,14 @@ code_convert_region (Lisp_Object start, Lisp_Object end,
8883 setup_coding_system (coding_system, &coding); 9360 setup_coding_system (coding_system, &coding);
8884 coding.mode |= CODING_MODE_LAST_BLOCK; 9361 coding.mode |= CODING_MODE_LAST_BLOCK;
8885 9362
9363 if (BUFFERP (dst_object) && !EQ (dst_object, src_object))
9364 {
9365 struct buffer *buf = XBUFFER (dst_object);
9366 ptrdiff_t buf_pt = BUF_PT (buf);
9367
9368 invalidate_buffer_caches (buf, buf_pt, buf_pt);
9369 }
9370
8886 if (encodep) 9371 if (encodep)
8887 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte, 9372 encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8888 dst_object); 9373 dst_object);
@@ -8972,6 +9457,15 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system,
8972 coding.mode |= CODING_MODE_LAST_BLOCK; 9457 coding.mode |= CODING_MODE_LAST_BLOCK;
8973 chars = SCHARS (string); 9458 chars = SCHARS (string);
8974 bytes = SBYTES (string); 9459 bytes = SBYTES (string);
9460
9461 if (BUFFERP (dst_object))
9462 {
9463 struct buffer *buf = XBUFFER (dst_object);
9464 ptrdiff_t buf_pt = BUF_PT (buf);
9465
9466 invalidate_buffer_caches (buf, buf_pt, buf_pt);
9467 }
9468
8975 if (encodep) 9469 if (encodep)
8976 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object); 9470 encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8977 else 9471 else
@@ -8998,6 +9492,55 @@ code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system,
8998 return code_convert_string (string, coding_system, Qt, encodep, 0, 1); 9492 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
8999} 9493}
9000 9494
9495/* Encode or decode a file name, to or from a unibyte string suitable
9496 for passing to C library functions. */
9497Lisp_Object
9498decode_file_name (Lisp_Object fname)
9499{
9500#ifdef WINDOWSNT
9501 /* The w32 build pretends to use UTF-8 for file-name encoding, and
9502 converts the file names either to UTF-16LE or to the system ANSI
9503 codepage internally, depending on the underlying OS; see w32.c. */
9504 if (! NILP (Fcoding_system_p (Qutf_8)))
9505 return code_convert_string_norecord (fname, Qutf_8, 0);
9506 return fname;
9507#else /* !WINDOWSNT */
9508 if (! NILP (Vfile_name_coding_system))
9509 return code_convert_string_norecord (fname, Vfile_name_coding_system, 0);
9510 else if (! NILP (Vdefault_file_name_coding_system))
9511 return code_convert_string_norecord (fname,
9512 Vdefault_file_name_coding_system, 0);
9513 else
9514 return fname;
9515#endif
9516}
9517
9518Lisp_Object
9519encode_file_name (Lisp_Object fname)
9520{
9521 /* This is especially important during bootstrap and dumping, when
9522 file-name encoding is not yet known, and therefore any non-ASCII
9523 file names are unibyte strings, and could only be thrashed if we
9524 try to encode them. */
9525 if (!STRING_MULTIBYTE (fname))
9526 return fname;
9527#ifdef WINDOWSNT
9528 /* The w32 build pretends to use UTF-8 for file-name encoding, and
9529 converts the file names either to UTF-16LE or to the system ANSI
9530 codepage internally, depending on the underlying OS; see w32.c. */
9531 if (! NILP (Fcoding_system_p (Qutf_8)))
9532 return code_convert_string_norecord (fname, Qutf_8, 1);
9533 return fname;
9534#else /* !WINDOWSNT */
9535 if (! NILP (Vfile_name_coding_system))
9536 return code_convert_string_norecord (fname, Vfile_name_coding_system, 1);
9537 else if (! NILP (Vdefault_file_name_coding_system))
9538 return code_convert_string_norecord (fname,
9539 Vdefault_file_name_coding_system, 1);
9540 else
9541 return fname;
9542#endif
9543}
9001 9544
9002DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string, 9545DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
9003 2, 4, 0, 9546 2, 4, 0,
@@ -9210,7 +9753,7 @@ DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_intern
9210 tset_charset_list 9753 tset_charset_list
9211 (term, (terminal_coding->common_flags & CODING_REQUIRE_ENCODING_MASK 9754 (term, (terminal_coding->common_flags & CODING_REQUIRE_ENCODING_MASK
9212 ? coding_charset_list (terminal_coding) 9755 ? coding_charset_list (terminal_coding)
9213 : Fcons (make_number (charset_ascii), Qnil))); 9756 : list1 (make_number (charset_ascii))));
9214 return Qnil; 9757 return Qnil;
9215} 9758}
9216 9759
@@ -9655,9 +10198,9 @@ usage: (define-coding-system-internal ...) */)
9655 { 10198 {
9656 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp))); 10199 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9657 if (dim < dim2) 10200 if (dim < dim2)
9658 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil)); 10201 tmp = list2 (XCAR (tail), tmp);
9659 else 10202 else
9660 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil)); 10203 tmp = list2 (tmp, XCAR (tail));
9661 } 10204 }
9662 else 10205 else
9663 { 10206 {
@@ -9668,7 +10211,7 @@ usage: (define-coding-system-internal ...) */)
9668 break; 10211 break;
9669 } 10212 }
9670 if (NILP (tmp2)) 10213 if (NILP (tmp2))
9671 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil)); 10214 tmp = nconc2 (tmp, list1 (XCAR (tail)));
9672 else 10215 else
9673 { 10216 {
9674 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2))); 10217 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
@@ -9954,7 +10497,17 @@ usage: (define-coding-system-internal ...) */)
9954 : coding_category_utf_8_sig); 10497 : coding_category_utf_8_sig);
9955 } 10498 }
9956 else if (EQ (coding_type, Qundecided)) 10499 else if (EQ (coding_type, Qundecided))
9957 category = coding_category_undecided; 10500 {
10501 if (nargs < coding_arg_undecided_max)
10502 goto short_args;
10503 ASET (attrs, coding_attr_undecided_inhibit_null_byte_detection,
10504 args[coding_arg_undecided_inhibit_null_byte_detection]);
10505 ASET (attrs, coding_attr_undecided_inhibit_iso_escape_detection,
10506 args[coding_arg_undecided_inhibit_iso_escape_detection]);
10507 ASET (attrs, coding_attr_undecided_prefer_utf_8,
10508 args[coding_arg_undecided_prefer_utf_8]);
10509 category = coding_category_undecided;
10510 }
9958 else 10511 else
9959 error ("Invalid coding system type: %s", 10512 error ("Invalid coding system type: %s",
9960 SDATA (SYMBOL_NAME (coding_type))); 10513 SDATA (SYMBOL_NAME (coding_type)));
@@ -9976,7 +10529,7 @@ usage: (define-coding-system-internal ...) */)
9976 && ! EQ (eol_type, Qmac)) 10529 && ! EQ (eol_type, Qmac))
9977 error ("Invalid eol-type"); 10530 error ("Invalid eol-type");
9978 10531
9979 aliases = Fcons (name, Qnil); 10532 aliases = list1 (name);
9980 10533
9981 if (NILP (eol_type)) 10534 if (NILP (eol_type))
9982 { 10535 {
@@ -9986,7 +10539,7 @@ usage: (define-coding-system-internal ...) */)
9986 Lisp_Object this_spec, this_name, this_aliases, this_eol_type; 10539 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
9987 10540
9988 this_name = AREF (eol_type, i); 10541 this_name = AREF (eol_type, i);
9989 this_aliases = Fcons (this_name, Qnil); 10542 this_aliases = list1 (this_name);
9990 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac); 10543 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
9991 this_spec = make_uninit_vector (3); 10544 this_spec = make_uninit_vector (3);
9992 ASET (this_spec, 0, attrs); 10545 ASET (this_spec, 0, attrs);
@@ -10101,7 +10654,7 @@ DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
10101 list. */ 10654 list. */
10102 while (!NILP (XCDR (aliases))) 10655 while (!NILP (XCDR (aliases)))
10103 aliases = XCDR (aliases); 10656 aliases = XCDR (aliases);
10104 XSETCDR (aliases, Fcons (alias, Qnil)); 10657 XSETCDR (aliases, list1 (alias));
10105 10658
10106 eol_type = AREF (spec, 2); 10659 eol_type = AREF (spec, 2);
10107 if (VECTORP (eol_type)) 10660 if (VECTORP (eol_type))
@@ -10335,11 +10888,6 @@ syms_of_coding (void)
10335 Fput (Qcoding_system_error, Qerror_message, 10888 Fput (Qcoding_system_error, Qerror_message,
10336 build_pure_c_string ("Invalid coding system")); 10889 build_pure_c_string ("Invalid coding system"));
10337 10890
10338 /* Intern this now in case it isn't already done.
10339 Setting this variable twice is harmless.
10340 But don't staticpro it here--that is done in alloc.c. */
10341 Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
10342
10343 DEFSYM (Qtranslation_table, "translation-table"); 10891 DEFSYM (Qtranslation_table, "translation-table");
10344 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2)); 10892 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
10345 DEFSYM (Qtranslation_table_id, "translation-table-id"); 10893 DEFSYM (Qtranslation_table_id, "translation-table-id");
@@ -10408,10 +10956,8 @@ syms_of_coding (void)
10408 intern_c_string ("coding-category-undecided")); 10956 intern_c_string ("coding-category-undecided"));
10409 10957
10410 DEFSYM (Qinsufficient_source, "insufficient-source"); 10958 DEFSYM (Qinsufficient_source, "insufficient-source");
10411 DEFSYM (Qinconsistent_eol, "inconsistent-eol");
10412 DEFSYM (Qinvalid_source, "invalid-source"); 10959 DEFSYM (Qinvalid_source, "invalid-source");
10413 DEFSYM (Qinterrupted, "interrupted"); 10960 DEFSYM (Qinterrupted, "interrupted");
10414 DEFSYM (Qinsufficient_memory, "insufficient-memory");
10415 DEFSYM (Qcoding_system_define_form, "coding-system-define-form"); 10961 DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
10416 10962
10417 defsubr (&Scoding_system_p); 10963 defsubr (&Scoding_system_p);
@@ -10728,6 +11274,11 @@ from GNU Find and GNU Grep. Emacs will then ignore the null bytes and
10728decode text as usual. */); 11274decode text as usual. */);
10729 inhibit_null_byte_detection = 0; 11275 inhibit_null_byte_detection = 0;
10730 11276
11277 DEFVAR_BOOL ("disable-ascii-optimization", disable_ascii_optimization,
11278 doc: /* If non-nil, Emacs does not optimize code decoder for ASCII files.
11279Internal use only. Removed after the experimental optimizer gets stable. */);
11280 disable_ascii_optimization = 0;
11281
10731 DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input, 11282 DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input,
10732 doc: /* Char table for translating self-inserting characters. 11283 doc: /* Char table for translating self-inserting characters.
10733This is applied to the result of input methods, not their input. 11284This is applied to the result of input methods, not their input.
@@ -10739,11 +11290,11 @@ internal character representation. */);
10739 Vtranslation_table_for_input = Qnil; 11290 Vtranslation_table_for_input = Qnil;
10740 11291
10741 { 11292 {
10742 Lisp_Object args[coding_arg_max]; 11293 Lisp_Object args[coding_arg_undecided_max];
10743 Lisp_Object plist[16]; 11294 Lisp_Object plist[16];
10744 int i; 11295 int i;
10745 11296
10746 for (i = 0; i < coding_arg_max; i++) 11297 for (i = 0; i < coding_arg_undecided_max; i++)
10747 args[i] = Qnil; 11298 args[i] = Qnil;
10748 11299
10749 plist[0] = intern_c_string (":name"); 11300 plist[0] = intern_c_string (":name");
@@ -10780,7 +11331,9 @@ character.");
10780 plist[13] = build_pure_c_string ("No conversion on encoding, automatic conversion on decoding."); 11331 plist[13] = build_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
10781 plist[15] = args[coding_arg_eol_type] = Qnil; 11332 plist[15] = args[coding_arg_eol_type] = Qnil;
10782 args[coding_arg_plist] = Flist (16, plist); 11333 args[coding_arg_plist] = Flist (16, plist);
10783 Fdefine_coding_system_internal (coding_arg_max, args); 11334 args[coding_arg_undecided_inhibit_null_byte_detection] = make_number (0);
11335 args[coding_arg_undecided_inhibit_iso_escape_detection] = make_number (0);
11336 Fdefine_coding_system_internal (coding_arg_undecided_max, args);
10784 } 11337 }
10785 11338
10786 setup_coding_system (Qno_conversion, &safe_terminal_coding); 11339 setup_coding_system (Qno_conversion, &safe_terminal_coding);