aboutsummaryrefslogtreecommitdiffstats
path: root/src/coding.c
diff options
context:
space:
mode:
authorEli Zaretskii2013-09-05 11:01:04 +0300
committerEli Zaretskii2013-09-05 11:01:04 +0300
commit41306318777a942420bc4feadbfacf662ea179dc (patch)
tree669e5cca02f95d6064ce73c0d3fbbf91b8c8b563 /src/coding.c
parent141f1ff7a40cda10f0558e891dd196a943a5082e (diff)
parent257b3b03cb1cff917e0b3b7832ad3eab5b59f257 (diff)
downloademacs-41306318777a942420bc4feadbfacf662ea179dc.tar.gz
emacs-41306318777a942420bc4feadbfacf662ea179dc.zip
Merge from trunk after a lot of time.
Diffstat (limited to 'src/coding.c')
-rw-r--r--src/coding.c1464
1 files changed, 932 insertions, 532 deletions
diff --git a/src/coding.c b/src/coding.c
index 17e342298b9..c10fb375672 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -1,5 +1,5 @@
1/* Coding system handler (conversion, detection, etc). 1/* Coding system handler (conversion, detection, etc).
2 Copyright (C) 2001-2012 Free Software Foundation, Inc. 2 Copyright (C) 2001-2013 Free Software Foundation, Inc.
3 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 3 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
4 2005, 2006, 2007, 2008, 2009, 2010, 2011 4 2005, 2006, 2007, 2008, 2009, 2010, 2011
5 National Institute of Advanced Industrial Science and Technology (AIST) 5 National Institute of Advanced Industrial Science and Technology (AIST)
@@ -147,18 +147,18 @@ STRUCT CODING_SYSTEM
147 CODING conforms to the format of XXX, and update the members of 147 CODING conforms to the format of XXX, and update the members of
148 DETECT_INFO. 148 DETECT_INFO.
149 149
150 Return 1 if the byte sequence conforms to XXX, otherwise return 0. 150 Return true if the byte sequence conforms to XXX.
151 151
152 Below is the template of these functions. */ 152 Below is the template of these functions. */
153 153
154#if 0 154#if 0
155static int 155static bool
156detect_coding_XXX (struct coding_system *coding, 156detect_coding_XXX (struct coding_system *coding,
157 struct coding_detection_info *detect_info) 157 struct coding_detection_info *detect_info)
158{ 158{
159 const unsigned char *src = coding->source; 159 const unsigned char *src = coding->source;
160 const unsigned char *src_end = coding->source + coding->src_bytes; 160 const unsigned char *src_end = coding->source + coding->src_bytes;
161 int multibytep = coding->src_multibyte; 161 bool multibytep = coding->src_multibyte;
162 ptrdiff_t consumed_chars = 0; 162 ptrdiff_t consumed_chars = 0;
163 int found = 0; 163 int found = 0;
164 ...; 164 ...;
@@ -212,7 +212,7 @@ decode_coding_XXXX (struct coding_system *coding)
212 /* A buffer to produce decoded characters. */ 212 /* A buffer to produce decoded characters. */
213 int *charbuf = coding->charbuf + coding->charbuf_used; 213 int *charbuf = coding->charbuf + coding->charbuf_used;
214 int *charbuf_end = coding->charbuf + coding->charbuf_size; 214 int *charbuf_end = coding->charbuf + coding->charbuf_size;
215 int multibytep = coding->src_multibyte; 215 bool multibytep = coding->src_multibyte;
216 216
217 while (1) 217 while (1)
218 { 218 {
@@ -260,7 +260,7 @@ decode_coding_XXXX (struct coding_system *coding)
260static void 260static void
261encode_coding_XXX (struct coding_system *coding) 261encode_coding_XXX (struct coding_system *coding)
262{ 262{
263 int multibytep = coding->dst_multibyte; 263 bool multibytep = coding->dst_multibyte;
264 int *charbuf = coding->charbuf; 264 int *charbuf = coding->charbuf;
265 int *charbuf_end = charbuf->charbuf + coding->charbuf_used; 265 int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
266 unsigned char *dst = coding->destination + coding->produced; 266 unsigned char *dst = coding->destination + coding->produced;
@@ -285,11 +285,14 @@ encode_coding_XXX (struct coding_system *coding)
285 285
286#include <config.h> 286#include <config.h>
287#include <stdio.h> 287#include <stdio.h>
288#include <setjmp.h> 288
289#ifdef HAVE_WCHAR_H
290#include <wchar.h>
291#endif /* HAVE_WCHAR_H */
289 292
290#include "lisp.h" 293#include "lisp.h"
291#include "buffer.h"
292#include "character.h" 294#include "character.h"
295#include "buffer.h"
293#include "charset.h" 296#include "charset.h"
294#include "ccl.h" 297#include "ccl.h"
295#include "composite.h" 298#include "composite.h"
@@ -303,6 +306,7 @@ Lisp_Object Vcoding_system_hash_table;
303static Lisp_Object Qcoding_system, Qeol_type; 306static Lisp_Object Qcoding_system, Qeol_type;
304static Lisp_Object Qcoding_aliases; 307static Lisp_Object Qcoding_aliases;
305Lisp_Object Qunix, Qdos; 308Lisp_Object Qunix, Qdos;
309static Lisp_Object Qmac;
306Lisp_Object Qbuffer_file_coding_system; 310Lisp_Object Qbuffer_file_coding_system;
307static Lisp_Object Qpost_read_conversion, Qpre_write_conversion; 311static Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
308static Lisp_Object Qdefault_char; 312static Lisp_Object Qdefault_char;
@@ -322,8 +326,7 @@ Lisp_Object Qcall_process, Qcall_process_region;
322Lisp_Object Qstart_process, Qopen_network_stream; 326Lisp_Object Qstart_process, Qopen_network_stream;
323static Lisp_Object Qtarget_idx; 327static Lisp_Object Qtarget_idx;
324 328
325static Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source; 329static Lisp_Object Qinsufficient_source, Qinvalid_source, Qinterrupted;
326static Lisp_Object Qinterrupted, Qinsufficient_memory;
327 330
328/* If a symbol has this property, evaluate the value to define the 331/* If a symbol has this property, evaluate the value to define the
329 symbol as a coding system. */ 332 symbol as a coding system. */
@@ -344,6 +347,10 @@ Lisp_Object Qcoding_system_p, Qcoding_system_error;
344Lisp_Object Qemacs_mule, Qraw_text; 347Lisp_Object Qemacs_mule, Qraw_text;
345Lisp_Object Qutf_8_emacs; 348Lisp_Object Qutf_8_emacs;
346 349
350#if defined (WINDOWSNT) || defined (CYGWIN)
351static Lisp_Object Qutf_16le;
352#endif
353
347/* Coding-systems are handed between Emacs Lisp programs and C internal 354/* Coding-systems are handed between Emacs Lisp programs and C internal
348 routines by the following three variables. */ 355 routines by the following three variables. */
349/* Coding system to be used to encode text for terminal display when 356/* Coding system to be used to encode text for terminal display when
@@ -416,7 +423,7 @@ enum iso_code_class_type
416 ISO_shift_out, /* ISO_CODE_SO (0x0E) */ 423 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
417 ISO_shift_in, /* ISO_CODE_SI (0x0F) */ 424 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
418 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */ 425 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
419 ISO_escape, /* ISO_CODE_SO (0x1B) */ 426 ISO_escape, /* ISO_CODE_ESC (0x1B) */
420 ISO_control_1, /* Control codes in the range 427 ISO_control_1, /* Control codes in the range
421 0x80..0x9F, except for the 428 0x80..0x9F, except for the
422 following 3 codes. */ 429 following 3 codes. */
@@ -486,6 +493,8 @@ enum iso_code_class_type
486 493
487#define CODING_ISO_FLAG_USE_OLDJIS 0x10000 494#define CODING_ISO_FLAG_USE_OLDJIS 0x10000
488 495
496#define CODING_ISO_FLAG_LEVEL_4 0x20000
497
489#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000 498#define CODING_ISO_FLAG_FULL_SUPPORT 0x100000
490 499
491/* A character to be produced on output if encoding of the original 500/* A character to be produced on output if encoding of the original
@@ -642,17 +651,50 @@ static struct coding_system coding_categories[coding_category_max];
642#define max(a, b) ((a) > (b) ? (a) : (b)) 651#define max(a, b) ((a) > (b) ? (a) : (b))
643#endif 652#endif
644 653
654/* Encode a flag that can be nil, something else, or t as -1, 0, 1. */
655
656static int
657encode_inhibit_flag (Lisp_Object flag)
658{
659 return NILP (flag) ? -1 : EQ (flag, Qt);
660}
661
662/* True if the value of ENCODED_FLAG says a flag should be treated as set.
663 1 means yes, -1 means no, 0 means ask the user variable VAR. */
664
665static bool
666inhibit_flag (int encoded_flag, bool var)
667{
668 return 0 < encoded_flag + var;
669}
670
645#define CODING_GET_INFO(coding, attrs, charset_list) \ 671#define CODING_GET_INFO(coding, attrs, charset_list) \
646 do { \ 672 do { \
647 (attrs) = CODING_ID_ATTRS ((coding)->id); \ 673 (attrs) = CODING_ID_ATTRS ((coding)->id); \
648 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \ 674 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
649 } while (0) 675 } while (0)
650 676
677static void
678CHECK_NATNUM_CAR (Lisp_Object x)
679{
680 Lisp_Object tmp = XCAR (x);
681 CHECK_NATNUM (tmp);
682 XSETCAR (x, tmp);
683}
684
685static void
686CHECK_NATNUM_CDR (Lisp_Object x)
687{
688 Lisp_Object tmp = XCDR (x);
689 CHECK_NATNUM (tmp);
690 XSETCDR (x, tmp);
691}
692
651 693
652/* Safely get one byte from the source text pointed by SRC which ends 694/* Safely get one byte from the source text pointed by SRC which ends
653 at SRC_END, and set C to that byte. If there are not enough bytes 695 at SRC_END, and set C to that byte. If there are not enough bytes
654 in the source, it jumps to `no_more_source'. If multibytep is 696 in the source, it jumps to 'no_more_source'. If MULTIBYTEP,
655 nonzero, and a multibyte character is found at SRC, set C to the 697 and a multibyte character is found at SRC, set C to the
656 negative value of the character code. The caller should declare 698 negative value of the character code. The caller should declare
657 and set these variables appropriately in advance: 699 and set these variables appropriately in advance:
658 src, src_end, multibytep */ 700 src, src_end, multibytep */
@@ -685,7 +727,7 @@ static struct coding_system coding_categories[coding_category_max];
685/* Safely get two bytes from the source text pointed by SRC which ends 727/* Safely get two bytes from the source text pointed by SRC which ends
686 at SRC_END, and set C1 and C2 to those bytes while skipping the 728 at SRC_END, and set C1 and C2 to those bytes while skipping the
687 heading multibyte characters. If there are not enough bytes in the 729 heading multibyte characters. If there are not enough bytes in the
688 source, it jumps to `no_more_source'. If multibytep is nonzero and 730 source, it jumps to 'no_more_source'. If MULTIBYTEP and
689 a multibyte character is found for C2, set C2 to the negative value 731 a multibyte character is found for C2, set C2 to the negative value
690 of the character code. The caller should declare and set these 732 of the character code. The caller should declare and set these
691 variables appropriately in advance: 733 variables appropriately in advance:
@@ -746,8 +788,8 @@ static struct coding_system coding_categories[coding_category_max];
746 788
747 789
748/* Store a byte C in the place pointed by DST and increment DST to the 790/* Store a byte C in the place pointed by DST and increment DST to the
749 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is 791 next free point, and increment PRODUCED_CHARS. If MULTIBYTEP,
750 nonzero, store in an appropriate multibyte from. The caller should 792 store in an appropriate multibyte form. The caller should
751 declare and set the variables `dst' and `multibytep' appropriately 793 declare and set the variables `dst' and `multibytep' appropriately
752 in advance. */ 794 in advance. */
753 795
@@ -806,83 +848,6 @@ static struct coding_system coding_categories[coding_category_max];
806 } while (0) 848 } while (0)
807 849
808 850
809/* Prototypes for static functions. */
810static void record_conversion_result (struct coding_system *coding,
811 enum coding_result_code result);
812static int detect_coding_utf_8 (struct coding_system *,
813 struct coding_detection_info *info);
814static void decode_coding_utf_8 (struct coding_system *);
815static int encode_coding_utf_8 (struct coding_system *);
816
817static int detect_coding_utf_16 (struct coding_system *,
818 struct coding_detection_info *info);
819static void decode_coding_utf_16 (struct coding_system *);
820static int encode_coding_utf_16 (struct coding_system *);
821
822static int detect_coding_iso_2022 (struct coding_system *,
823 struct coding_detection_info *info);
824static void decode_coding_iso_2022 (struct coding_system *);
825static int encode_coding_iso_2022 (struct coding_system *);
826
827static int detect_coding_emacs_mule (struct coding_system *,
828 struct coding_detection_info *info);
829static void decode_coding_emacs_mule (struct coding_system *);
830static int encode_coding_emacs_mule (struct coding_system *);
831
832static int detect_coding_sjis (struct coding_system *,
833 struct coding_detection_info *info);
834static void decode_coding_sjis (struct coding_system *);
835static int encode_coding_sjis (struct coding_system *);
836
837static int detect_coding_big5 (struct coding_system *,
838 struct coding_detection_info *info);
839static void decode_coding_big5 (struct coding_system *);
840static int encode_coding_big5 (struct coding_system *);
841
842static int detect_coding_ccl (struct coding_system *,
843 struct coding_detection_info *info);
844static void decode_coding_ccl (struct coding_system *);
845static int encode_coding_ccl (struct coding_system *);
846
847static void decode_coding_raw_text (struct coding_system *);
848static int encode_coding_raw_text (struct coding_system *);
849
850static void coding_set_source (struct coding_system *);
851static ptrdiff_t coding_change_source (struct coding_system *);
852static void coding_set_destination (struct coding_system *);
853static ptrdiff_t coding_change_destination (struct coding_system *);
854static void coding_alloc_by_realloc (struct coding_system *, ptrdiff_t);
855static void coding_alloc_by_making_gap (struct coding_system *,
856 ptrdiff_t, ptrdiff_t);
857static unsigned char *alloc_destination (struct coding_system *,
858 ptrdiff_t, unsigned char *);
859static void setup_iso_safe_charsets (Lisp_Object);
860static ptrdiff_t encode_designation_at_bol (struct coding_system *,
861 int *, int *, unsigned char *);
862static int detect_eol (const unsigned char *,
863 ptrdiff_t, enum coding_category);
864static Lisp_Object adjust_coding_eol_type (struct coding_system *, int);
865static void decode_eol (struct coding_system *);
866static Lisp_Object get_translation_table (Lisp_Object, int, int *);
867static Lisp_Object get_translation (Lisp_Object, int *, int *);
868static int produce_chars (struct coding_system *, Lisp_Object, int);
869static inline void produce_charset (struct coding_system *, int *,
870 ptrdiff_t);
871static void produce_annotation (struct coding_system *, ptrdiff_t);
872static int decode_coding (struct coding_system *);
873static inline int *handle_composition_annotation (ptrdiff_t, ptrdiff_t,
874 struct coding_system *,
875 int *, ptrdiff_t *);
876static inline int *handle_charset_annotation (ptrdiff_t, ptrdiff_t,
877 struct coding_system *,
878 int *, ptrdiff_t *);
879static void consume_chars (struct coding_system *, Lisp_Object, int);
880static int encode_coding (struct coding_system *);
881static Lisp_Object make_conversion_work_buffer (int);
882static Lisp_Object code_conversion_restore (Lisp_Object);
883static inline int char_encodable_p (int, Lisp_Object);
884static Lisp_Object make_subsidiaries (Lisp_Object);
885
886static void 851static void
887record_conversion_result (struct coding_system *coding, 852record_conversion_result (struct coding_system *coding,
888 enum coding_result_code result) 853 enum coding_result_code result)
@@ -893,18 +858,12 @@ record_conversion_result (struct coding_system *coding,
893 case CODING_RESULT_INSUFFICIENT_SRC: 858 case CODING_RESULT_INSUFFICIENT_SRC:
894 Vlast_code_conversion_error = Qinsufficient_source; 859 Vlast_code_conversion_error = Qinsufficient_source;
895 break; 860 break;
896 case CODING_RESULT_INCONSISTENT_EOL:
897 Vlast_code_conversion_error = Qinconsistent_eol;
898 break;
899 case CODING_RESULT_INVALID_SRC: 861 case CODING_RESULT_INVALID_SRC:
900 Vlast_code_conversion_error = Qinvalid_source; 862 Vlast_code_conversion_error = Qinvalid_source;
901 break; 863 break;
902 case CODING_RESULT_INTERRUPT: 864 case CODING_RESULT_INTERRUPT:
903 Vlast_code_conversion_error = Qinterrupted; 865 Vlast_code_conversion_error = Qinterrupted;
904 break; 866 break;
905 case CODING_RESULT_INSUFFICIENT_MEM:
906 Vlast_code_conversion_error = Qinsufficient_memory;
907 break;
908 case CODING_RESULT_INSUFFICIENT_DST: 867 case CODING_RESULT_INSUFFICIENT_DST:
909 /* Don't record this error in Vlast_code_conversion_error 868 /* Don't record this error in Vlast_code_conversion_error
910 because it happens just temporarily and is resolved when the 869 because it happens just temporarily and is resolved when the
@@ -998,65 +957,18 @@ record_conversion_result (struct coding_system *coding,
998 957
999 958
1000/* Store multibyte form of the character C in P, and advance P to the 959/* Store multibyte form of the character C in P, and advance P to the
1001 end of the multibyte form. This is like CHAR_STRING_ADVANCE but it 960 end of the multibyte form. This used to be like CHAR_STRING_ADVANCE
1002 never calls MAYBE_UNIFY_CHAR. */ 961 without ever calling MAYBE_UNIFY_CHAR, but nowadays we don't call
1003 962 MAYBE_UNIFY_CHAR in CHAR_STRING_ADVANCE. */
1004#define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) \
1005 do { \
1006 if ((c) <= MAX_1_BYTE_CHAR) \
1007 *(p)++ = (c); \
1008 else if ((c) <= MAX_2_BYTE_CHAR) \
1009 *(p)++ = (0xC0 | ((c) >> 6)), \
1010 *(p)++ = (0x80 | ((c) & 0x3F)); \
1011 else if ((c) <= MAX_3_BYTE_CHAR) \
1012 *(p)++ = (0xE0 | ((c) >> 12)), \
1013 *(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \
1014 *(p)++ = (0x80 | ((c) & 0x3F)); \
1015 else if ((c) <= MAX_4_BYTE_CHAR) \
1016 *(p)++ = (0xF0 | (c >> 18)), \
1017 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
1018 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
1019 *(p)++ = (0x80 | (c & 0x3F)); \
1020 else if ((c) <= MAX_5_BYTE_CHAR) \
1021 *(p)++ = 0xF8, \
1022 *(p)++ = (0x80 | ((c >> 18) & 0x0F)), \
1023 *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \
1024 *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \
1025 *(p)++ = (0x80 | (c & 0x3F)); \
1026 else \
1027 (p) += BYTE8_STRING ((c) - 0x3FFF80, p); \
1028 } while (0)
1029 963
964#define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) CHAR_STRING_ADVANCE(c, p)
1030 965
1031/* Return the character code of character whose multibyte form is at 966/* Return the character code of character whose multibyte form is at
1032 P, and advance P to the end of the multibyte form. This is like 967 P, and advance P to the end of the multibyte form. This used to be
1033 STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR. */ 968 like STRING_CHAR_ADVANCE without ever calling MAYBE_UNIFY_CHAR, but
1034 969 nowadays STRING_CHAR_ADVANCE doesn't call MAYBE_UNIFY_CHAR. */
1035#define STRING_CHAR_ADVANCE_NO_UNIFY(p) \
1036 (!((p)[0] & 0x80) \
1037 ? *(p)++ \
1038 : ! ((p)[0] & 0x20) \
1039 ? ((p) += 2, \
1040 ((((p)[-2] & 0x1F) << 6) \
1041 | ((p)[-1] & 0x3F) \
1042 | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \
1043 : ! ((p)[0] & 0x10) \
1044 ? ((p) += 3, \
1045 ((((p)[-3] & 0x0F) << 12) \
1046 | (((p)[-2] & 0x3F) << 6) \
1047 | ((p)[-1] & 0x3F))) \
1048 : ! ((p)[0] & 0x08) \
1049 ? ((p) += 4, \
1050 ((((p)[-4] & 0xF) << 18) \
1051 | (((p)[-3] & 0x3F) << 12) \
1052 | (((p)[-2] & 0x3F) << 6) \
1053 | ((p)[-1] & 0x3F))) \
1054 : ((p) += 5, \
1055 ((((p)[-4] & 0x3F) << 18) \
1056 | (((p)[-3] & 0x3F) << 12) \
1057 | (((p)[-2] & 0x3F) << 6) \
1058 | ((p)[-1] & 0x3F))))
1059 970
971#define STRING_CHAR_ADVANCE_NO_UNIFY(p) STRING_CHAR_ADVANCE(p)
1060 972
1061/* Set coding->source from coding->src_object. */ 973/* Set coding->source from coding->src_object. */
1062 974
@@ -1145,8 +1057,8 @@ coding_alloc_by_realloc (struct coding_system *coding, ptrdiff_t bytes)
1145{ 1057{
1146 if (STRING_BYTES_BOUND - coding->dst_bytes < bytes) 1058 if (STRING_BYTES_BOUND - coding->dst_bytes < bytes)
1147 string_overflow (); 1059 string_overflow ();
1148 coding->destination = (unsigned char *) xrealloc (coding->destination, 1060 coding->destination = xrealloc (coding->destination,
1149 coding->dst_bytes + bytes); 1061 coding->dst_bytes + bytes);
1150 coding->dst_bytes += bytes; 1062 coding->dst_bytes += bytes;
1151} 1063}
1152 1064
@@ -1169,14 +1081,7 @@ coding_alloc_by_making_gap (struct coding_system *coding,
1169 GPT -= gap_head_used, GPT_BYTE -= gap_head_used; 1081 GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1170 } 1082 }
1171 else 1083 else
1172 { 1084 make_gap_1 (XBUFFER (coding->dst_object), bytes);
1173 Lisp_Object this_buffer;
1174
1175 this_buffer = Fcurrent_buffer ();
1176 set_buffer_internal (XBUFFER (coding->dst_object));
1177 make_gap (bytes);
1178 set_buffer_internal (XBUFFER (this_buffer));
1179 }
1180} 1085}
1181 1086
1182 1087
@@ -1255,6 +1160,14 @@ alloc_destination (struct coding_system *coding, ptrdiff_t nbytes,
1255 *buf++ = id; \ 1160 *buf++ = id; \
1256 } while (0) 1161 } while (0)
1257 1162
1163
1164/* Bitmasks for coding->eol_seen. */
1165
1166#define EOL_SEEN_NONE 0
1167#define EOL_SEEN_LF 1
1168#define EOL_SEEN_CR 2
1169#define EOL_SEEN_CRLF 4
1170
1258 1171
1259/*** 2. Emacs' internal format (emacs-utf-8) ***/ 1172/*** 2. Emacs' internal format (emacs-utf-8) ***/
1260 1173
@@ -1264,8 +1177,7 @@ alloc_destination (struct coding_system *coding, ptrdiff_t nbytes,
1264/*** 3. UTF-8 ***/ 1177/*** 3. UTF-8 ***/
1265 1178
1266/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 1179/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1267 Check if a text is encoded in UTF-8. If it is, return 1, else 1180 Return true if a text is encoded in UTF-8. */
1268 return 0. */
1269 1181
1270#define UTF_8_1_OCTET_P(c) ((c) < 0x80) 1182#define UTF_8_1_OCTET_P(c) ((c) < 0x80)
1271#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80) 1183#define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80)
@@ -1278,20 +1190,35 @@ alloc_destination (struct coding_system *coding, ptrdiff_t nbytes,
1278#define UTF_8_BOM_2 0xBB 1190#define UTF_8_BOM_2 0xBB
1279#define UTF_8_BOM_3 0xBF 1191#define UTF_8_BOM_3 0xBF
1280 1192
1281static int 1193/* Unlike the other detect_coding_XXX, this function counts number of
1194 characters and check EOL format. */
1195
1196static bool
1282detect_coding_utf_8 (struct coding_system *coding, 1197detect_coding_utf_8 (struct coding_system *coding,
1283 struct coding_detection_info *detect_info) 1198 struct coding_detection_info *detect_info)
1284{ 1199{
1285 const unsigned char *src = coding->source, *src_base; 1200 const unsigned char *src = coding->source, *src_base;
1286 const unsigned char *src_end = coding->source + coding->src_bytes; 1201 const unsigned char *src_end = coding->source + coding->src_bytes;
1287 int multibytep = coding->src_multibyte; 1202 bool multibytep = coding->src_multibyte;
1288 ptrdiff_t consumed_chars = 0; 1203 ptrdiff_t consumed_chars = 0;
1289 int bom_found = 0; 1204 bool bom_found = 0;
1290 int found = 0; 1205 int nchars = coding->head_ascii;
1206 int eol_seen = coding->eol_seen;
1291 1207
1292 detect_info->checked |= CATEGORY_MASK_UTF_8; 1208 detect_info->checked |= CATEGORY_MASK_UTF_8;
1293 /* A coding system of this category is always ASCII compatible. */ 1209 /* A coding system of this category is always ASCII compatible. */
1294 src += coding->head_ascii; 1210 src += nchars;
1211
1212 if (src == coding->source /* BOM should be at the head. */
1213 && src + 3 < src_end /* BOM is 3-byte long. */
1214 && src[0] == UTF_8_BOM_1
1215 && src[1] == UTF_8_BOM_2
1216 && src[2] == UTF_8_BOM_3)
1217 {
1218 bom_found = 1;
1219 src += 3;
1220 nchars++;
1221 }
1295 1222
1296 while (1) 1223 while (1)
1297 { 1224 {
@@ -1300,13 +1227,29 @@ detect_coding_utf_8 (struct coding_system *coding,
1300 src_base = src; 1227 src_base = src;
1301 ONE_MORE_BYTE (c); 1228 ONE_MORE_BYTE (c);
1302 if (c < 0 || UTF_8_1_OCTET_P (c)) 1229 if (c < 0 || UTF_8_1_OCTET_P (c))
1303 continue; 1230 {
1231 nchars++;
1232 if (c == '\r')
1233 {
1234 if (src < src_end && *src == '\n')
1235 {
1236 eol_seen |= EOL_SEEN_CRLF;
1237 src++;
1238 nchars++;
1239 }
1240 else
1241 eol_seen |= EOL_SEEN_CR;
1242 }
1243 else if (c == '\n')
1244 eol_seen |= EOL_SEEN_LF;
1245 continue;
1246 }
1304 ONE_MORE_BYTE (c1); 1247 ONE_MORE_BYTE (c1);
1305 if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1)) 1248 if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1306 break; 1249 break;
1307 if (UTF_8_2_OCTET_LEADING_P (c)) 1250 if (UTF_8_2_OCTET_LEADING_P (c))
1308 { 1251 {
1309 found = 1; 1252 nchars++;
1310 continue; 1253 continue;
1311 } 1254 }
1312 ONE_MORE_BYTE (c2); 1255 ONE_MORE_BYTE (c2);
@@ -1314,10 +1257,7 @@ detect_coding_utf_8 (struct coding_system *coding,
1314 break; 1257 break;
1315 if (UTF_8_3_OCTET_LEADING_P (c)) 1258 if (UTF_8_3_OCTET_LEADING_P (c))
1316 { 1259 {
1317 found = 1; 1260 nchars++;
1318 if (src_base == coding->source
1319 && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1320 bom_found = 1;
1321 continue; 1261 continue;
1322 } 1262 }
1323 ONE_MORE_BYTE (c3); 1263 ONE_MORE_BYTE (c3);
@@ -1325,7 +1265,7 @@ detect_coding_utf_8 (struct coding_system *coding,
1325 break; 1265 break;
1326 if (UTF_8_4_OCTET_LEADING_P (c)) 1266 if (UTF_8_4_OCTET_LEADING_P (c))
1327 { 1267 {
1328 found = 1; 1268 nchars++;
1329 continue; 1269 continue;
1330 } 1270 }
1331 ONE_MORE_BYTE (c4); 1271 ONE_MORE_BYTE (c4);
@@ -1333,7 +1273,7 @@ detect_coding_utf_8 (struct coding_system *coding,
1333 break; 1273 break;
1334 if (UTF_8_5_OCTET_LEADING_P (c)) 1274 if (UTF_8_5_OCTET_LEADING_P (c))
1335 { 1275 {
1336 found = 1; 1276 nchars++;
1337 continue; 1277 continue;
1338 } 1278 }
1339 break; 1279 break;
@@ -1350,14 +1290,17 @@ detect_coding_utf_8 (struct coding_system *coding,
1350 if (bom_found) 1290 if (bom_found)
1351 { 1291 {
1352 /* The first character 0xFFFE doesn't necessarily mean a BOM. */ 1292 /* The first character 0xFFFE doesn't necessarily mean a BOM. */
1353 detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG; 1293 detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1354 } 1294 }
1355 else 1295 else
1356 { 1296 {
1357 detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG; 1297 detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1358 if (found) 1298 if (nchars < src_end - coding->source)
1359 detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG; 1299 /* The found characters are less than source bytes, which
1300 means that we found a valid non-ASCII characters. */
1301 detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_NOSIG;
1360 } 1302 }
1303 coding->detected_utf8_chars = nchars;
1361 return 1; 1304 return 1;
1362} 1305}
1363 1306
@@ -1371,10 +1314,10 @@ decode_coding_utf_8 (struct coding_system *coding)
1371 int *charbuf = coding->charbuf + coding->charbuf_used; 1314 int *charbuf = coding->charbuf + coding->charbuf_used;
1372 int *charbuf_end = coding->charbuf + coding->charbuf_size; 1315 int *charbuf_end = coding->charbuf + coding->charbuf_size;
1373 ptrdiff_t consumed_chars = 0, consumed_chars_base = 0; 1316 ptrdiff_t consumed_chars = 0, consumed_chars_base = 0;
1374 int multibytep = coding->src_multibyte; 1317 bool multibytep = coding->src_multibyte;
1375 enum utf_bom_type bom = CODING_UTF_8_BOM (coding); 1318 enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1376 int eol_dos = 1319 bool eol_dos
1377 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 1320 = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1378 int byte_after_cr = -1; 1321 int byte_after_cr = -1;
1379 1322
1380 if (bom != utf_without_bom) 1323 if (bom != utf_without_bom)
@@ -1422,6 +1365,45 @@ decode_coding_utf_8 (struct coding_system *coding)
1422 break; 1365 break;
1423 } 1366 }
1424 1367
1368 /* In the simple case, rapidly handle ordinary characters */
1369 if (multibytep && ! eol_dos
1370 && charbuf < charbuf_end - 6 && src < src_end - 6)
1371 {
1372 while (charbuf < charbuf_end - 6 && src < src_end - 6)
1373 {
1374 c1 = *src;
1375 if (c1 & 0x80)
1376 break;
1377 src++;
1378 consumed_chars++;
1379 *charbuf++ = c1;
1380
1381 c1 = *src;
1382 if (c1 & 0x80)
1383 break;
1384 src++;
1385 consumed_chars++;
1386 *charbuf++ = c1;
1387
1388 c1 = *src;
1389 if (c1 & 0x80)
1390 break;
1391 src++;
1392 consumed_chars++;
1393 *charbuf++ = c1;
1394
1395 c1 = *src;
1396 if (c1 & 0x80)
1397 break;
1398 src++;
1399 consumed_chars++;
1400 *charbuf++ = c1;
1401 }
1402 /* If we handled at least one character, restart the main loop. */
1403 if (src != src_base)
1404 continue;
1405 }
1406
1425 if (byte_after_cr >= 0) 1407 if (byte_after_cr >= 0)
1426 c1 = byte_after_cr, byte_after_cr = -1; 1408 c1 = byte_after_cr, byte_after_cr = -1;
1427 else 1409 else
@@ -1513,10 +1495,10 @@ decode_coding_utf_8 (struct coding_system *coding)
1513} 1495}
1514 1496
1515 1497
1516static int 1498static bool
1517encode_coding_utf_8 (struct coding_system *coding) 1499encode_coding_utf_8 (struct coding_system *coding)
1518{ 1500{
1519 int multibytep = coding->dst_multibyte; 1501 bool multibytep = coding->dst_multibyte;
1520 int *charbuf = coding->charbuf; 1502 int *charbuf = coding->charbuf;
1521 int *charbuf_end = charbuf + coding->charbuf_used; 1503 int *charbuf_end = charbuf + coding->charbuf_used;
1522 unsigned char *dst = coding->destination + coding->produced; 1504 unsigned char *dst = coding->destination + coding->produced;
@@ -1577,8 +1559,7 @@ encode_coding_utf_8 (struct coding_system *coding)
1577 1559
1578 1560
1579/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 1561/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1580 Check if a text is encoded in one of UTF-16 based coding systems. 1562 Return true if a text is encoded in one of UTF-16 based coding systems. */
1581 If it is, return 1, else return 0. */
1582 1563
1583#define UTF_16_HIGH_SURROGATE_P(val) \ 1564#define UTF_16_HIGH_SURROGATE_P(val) \
1584 (((val) & 0xFC00) == 0xD800) 1565 (((val) & 0xFC00) == 0xD800)
@@ -1587,13 +1568,13 @@ encode_coding_utf_8 (struct coding_system *coding)
1587 (((val) & 0xFC00) == 0xDC00) 1568 (((val) & 0xFC00) == 0xDC00)
1588 1569
1589 1570
1590static int 1571static bool
1591detect_coding_utf_16 (struct coding_system *coding, 1572detect_coding_utf_16 (struct coding_system *coding,
1592 struct coding_detection_info *detect_info) 1573 struct coding_detection_info *detect_info)
1593{ 1574{
1594 const unsigned char *src = coding->source; 1575 const unsigned char *src = coding->source;
1595 const unsigned char *src_end = coding->source + coding->src_bytes; 1576 const unsigned char *src_end = coding->source + coding->src_bytes;
1596 int multibytep = coding->src_multibyte; 1577 bool multibytep = coding->src_multibyte;
1597 int c1, c2; 1578 int c1, c2;
1598 1579
1599 detect_info->checked |= CATEGORY_MASK_UTF_16; 1580 detect_info->checked |= CATEGORY_MASK_UTF_16;
@@ -1680,12 +1661,12 @@ decode_coding_utf_16 (struct coding_system *coding)
1680 /* We may produces at most 3 chars in one loop. */ 1661 /* We may produces at most 3 chars in one loop. */
1681 int *charbuf_end = coding->charbuf + coding->charbuf_size - 2; 1662 int *charbuf_end = coding->charbuf + coding->charbuf_size - 2;
1682 ptrdiff_t consumed_chars = 0, consumed_chars_base = 0; 1663 ptrdiff_t consumed_chars = 0, consumed_chars_base = 0;
1683 int multibytep = coding->src_multibyte; 1664 bool multibytep = coding->src_multibyte;
1684 enum utf_bom_type bom = CODING_UTF_16_BOM (coding); 1665 enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1685 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding); 1666 enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1686 int surrogate = CODING_UTF_16_SURROGATE (coding); 1667 int surrogate = CODING_UTF_16_SURROGATE (coding);
1687 int eol_dos = 1668 bool eol_dos
1688 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 1669 = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1689 int byte_after_cr1 = -1, byte_after_cr2 = -1; 1670 int byte_after_cr1 = -1, byte_after_cr2 = -1;
1690 1671
1691 if (bom == utf_with_bom) 1672 if (bom == utf_with_bom)
@@ -1795,17 +1776,17 @@ decode_coding_utf_16 (struct coding_system *coding)
1795 coding->charbuf_used = charbuf - coding->charbuf; 1776 coding->charbuf_used = charbuf - coding->charbuf;
1796} 1777}
1797 1778
1798static int 1779static bool
1799encode_coding_utf_16 (struct coding_system *coding) 1780encode_coding_utf_16 (struct coding_system *coding)
1800{ 1781{
1801 int multibytep = coding->dst_multibyte; 1782 bool multibytep = coding->dst_multibyte;
1802 int *charbuf = coding->charbuf; 1783 int *charbuf = coding->charbuf;
1803 int *charbuf_end = charbuf + coding->charbuf_used; 1784 int *charbuf_end = charbuf + coding->charbuf_used;
1804 unsigned char *dst = coding->destination + coding->produced; 1785 unsigned char *dst = coding->destination + coding->produced;
1805 unsigned char *dst_end = coding->destination + coding->dst_bytes; 1786 unsigned char *dst_end = coding->destination + coding->dst_bytes;
1806 int safe_room = 8; 1787 int safe_room = 8;
1807 enum utf_bom_type bom = CODING_UTF_16_BOM (coding); 1788 enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1808 int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian; 1789 bool big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1809 ptrdiff_t produced_chars = 0; 1790 ptrdiff_t produced_chars = 0;
1810 int c; 1791 int c;
1811 1792
@@ -1930,16 +1911,15 @@ char emacs_mule_bytes[256];
1930 1911
1931 1912
1932/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 1913/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1933 Check if a text is encoded in `emacs-mule'. If it is, return 1, 1914 Return true if a text is encoded in 'emacs-mule'. */
1934 else return 0. */
1935 1915
1936static int 1916static bool
1937detect_coding_emacs_mule (struct coding_system *coding, 1917detect_coding_emacs_mule (struct coding_system *coding,
1938 struct coding_detection_info *detect_info) 1918 struct coding_detection_info *detect_info)
1939{ 1919{
1940 const unsigned char *src = coding->source, *src_base; 1920 const unsigned char *src = coding->source, *src_base;
1941 const unsigned char *src_end = coding->source + coding->src_bytes; 1921 const unsigned char *src_end = coding->source + coding->src_bytes;
1942 int multibytep = coding->src_multibyte; 1922 bool multibytep = coding->src_multibyte;
1943 ptrdiff_t consumed_chars = 0; 1923 ptrdiff_t consumed_chars = 0;
1944 int c; 1924 int c;
1945 int found = 0; 1925 int found = 0;
@@ -2029,12 +2009,12 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src,
2029{ 2009{
2030 const unsigned char *src_end = coding->source + coding->src_bytes; 2010 const unsigned char *src_end = coding->source + coding->src_bytes;
2031 const unsigned char *src_base = src; 2011 const unsigned char *src_base = src;
2032 int multibytep = coding->src_multibyte; 2012 bool multibytep = coding->src_multibyte;
2033 int charset_ID; 2013 int charset_ID;
2034 unsigned code; 2014 unsigned code;
2035 int c; 2015 int c;
2036 int consumed_chars = 0; 2016 int consumed_chars = 0;
2037 int mseq_found = 0; 2017 bool mseq_found = 0;
2038 2018
2039 ONE_MORE_BYTE (c); 2019 ONE_MORE_BYTE (c);
2040 if (c < 0) 2020 if (c < 0)
@@ -2131,7 +2111,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src,
2131 break; 2111 break;
2132 2112
2133 default: 2113 default:
2134 abort (); 2114 emacs_abort ();
2135 } 2115 }
2136 CODING_DECODE_CHAR (coding, src, src_base, src_end, 2116 CODING_DECODE_CHAR (coding, src, src_base, src_end,
2137 CHARSET_FROM_ID (charset_ID), code, c); 2117 CHARSET_FROM_ID (charset_ID), code, c);
@@ -2411,12 +2391,12 @@ decode_coding_emacs_mule (struct coding_system *coding)
2411 /* We can produce up to 2 characters in a loop. */ 2391 /* We can produce up to 2 characters in a loop. */
2412 - 1; 2392 - 1;
2413 ptrdiff_t consumed_chars = 0, consumed_chars_base; 2393 ptrdiff_t consumed_chars = 0, consumed_chars_base;
2414 int multibytep = coding->src_multibyte; 2394 bool multibytep = coding->src_multibyte;
2415 ptrdiff_t char_offset = coding->produced_char; 2395 ptrdiff_t char_offset = coding->produced_char;
2416 ptrdiff_t last_offset = char_offset; 2396 ptrdiff_t last_offset = char_offset;
2417 int last_id = charset_ascii; 2397 int last_id = charset_ascii;
2418 int eol_dos = 2398 bool eol_dos
2419 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 2399 = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2420 int byte_after_cr = -1; 2400 int byte_after_cr = -1;
2421 struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status; 2401 struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
2422 2402
@@ -2425,7 +2405,7 @@ decode_coding_emacs_mule (struct coding_system *coding)
2425 int i; 2405 int i;
2426 2406
2427 if (charbuf_end - charbuf < cmp_status->length) 2407 if (charbuf_end - charbuf < cmp_status->length)
2428 abort (); 2408 emacs_abort ();
2429 for (i = 0; i < cmp_status->length; i++) 2409 for (i = 0; i < cmp_status->length; i++)
2430 *charbuf++ = cmp_status->carryover[i]; 2410 *charbuf++ = cmp_status->carryover[i];
2431 coding->annotated = 1; 2411 coding->annotated = 1;
@@ -2657,10 +2637,10 @@ decode_coding_emacs_mule (struct coding_system *coding)
2657 } while (0); 2637 } while (0);
2658 2638
2659 2639
2660static int 2640static bool
2661encode_coding_emacs_mule (struct coding_system *coding) 2641encode_coding_emacs_mule (struct coding_system *coding)
2662{ 2642{
2663 int multibytep = coding->dst_multibyte; 2643 bool multibytep = coding->dst_multibyte;
2664 int *charbuf = coding->charbuf; 2644 int *charbuf = coding->charbuf;
2665 int *charbuf_end = charbuf + coding->charbuf_used; 2645 int *charbuf_end = charbuf + coding->charbuf_used;
2666 unsigned char *dst = coding->destination + coding->produced; 2646 unsigned char *dst = coding->destination + coding->produced;
@@ -2674,8 +2654,8 @@ encode_coding_emacs_mule (struct coding_system *coding)
2674 CODING_GET_INFO (coding, attrs, charset_list); 2654 CODING_GET_INFO (coding, attrs, charset_list);
2675 if (! EQ (charset_list, Vemacs_mule_charset_list)) 2655 if (! EQ (charset_list, Vemacs_mule_charset_list))
2676 { 2656 {
2677 CODING_ATTR_CHARSET_LIST (attrs) 2657 charset_list = Vemacs_mule_charset_list;
2678 = charset_list = Vemacs_mule_charset_list; 2658 ASET (attrs, coding_attr_charset_list, charset_list);
2679 } 2659 }
2680 2660
2681 while (charbuf < charbuf_end) 2661 while (charbuf < charbuf_end)
@@ -2699,7 +2679,7 @@ encode_coding_emacs_mule (struct coding_system *coding)
2699 preferred_charset_id = -1; 2679 preferred_charset_id = -1;
2700 break; 2680 break;
2701 default: 2681 default:
2702 abort (); 2682 emacs_abort ();
2703 } 2683 }
2704 charbuf += -c - 1; 2684 charbuf += -c - 1;
2705 continue; 2685 continue;
@@ -2722,7 +2702,7 @@ encode_coding_emacs_mule (struct coding_system *coding)
2722 2702
2723 if (preferred_charset_id >= 0) 2703 if (preferred_charset_id >= 0)
2724 { 2704 {
2725 int result; 2705 bool result;
2726 2706
2727 charset = CHARSET_FROM_ID (preferred_charset_id); 2707 charset = CHARSET_FROM_ID (preferred_charset_id);
2728 CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result); 2708 CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
@@ -2967,8 +2947,8 @@ setup_iso_safe_charsets (Lisp_Object attrs)
2967 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT) 2947 if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2968 && ! EQ (charset_list, Viso_2022_charset_list)) 2948 && ! EQ (charset_list, Viso_2022_charset_list))
2969 { 2949 {
2970 CODING_ATTR_CHARSET_LIST (attrs) 2950 charset_list = Viso_2022_charset_list;
2971 = charset_list = Viso_2022_charset_list; 2951 ASET (attrs, coding_attr_charset_list, charset_list);
2972 ASET (attrs, coding_attr_safe_charsets, Qnil); 2952 ASET (attrs, coding_attr_safe_charsets, Qnil);
2973 } 2953 }
2974 2954
@@ -3017,17 +2997,17 @@ setup_iso_safe_charsets (Lisp_Object attrs)
3017 2997
3018 2998
3019/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 2999/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3020 Check if a text is encoded in one of ISO-2022 based coding systems. 3000 Return true if a text is encoded in one of ISO-2022 based coding
3021 If it is, return 1, else return 0. */ 3001 systems. */
3022 3002
3023static int 3003static bool
3024detect_coding_iso_2022 (struct coding_system *coding, 3004detect_coding_iso_2022 (struct coding_system *coding,
3025 struct coding_detection_info *detect_info) 3005 struct coding_detection_info *detect_info)
3026{ 3006{
3027 const unsigned char *src = coding->source, *src_base = src; 3007 const unsigned char *src = coding->source, *src_base = src;
3028 const unsigned char *src_end = coding->source + coding->src_bytes; 3008 const unsigned char *src_end = coding->source + coding->src_bytes;
3029 int multibytep = coding->src_multibyte; 3009 bool multibytep = coding->src_multibyte;
3030 int single_shifting = 0; 3010 bool single_shifting = 0;
3031 int id; 3011 int id;
3032 int c, c1; 3012 int c, c1;
3033 ptrdiff_t consumed_chars = 0; 3013 ptrdiff_t consumed_chars = 0;
@@ -3187,20 +3167,7 @@ detect_coding_iso_2022 (struct coding_system *coding,
3187 } 3167 }
3188 if (single_shifting) 3168 if (single_shifting)
3189 break; 3169 break;
3190 check_extra_latin: 3170 goto check_extra_latin;
3191 if (! VECTORP (Vlatin_extra_code_table)
3192 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3193 {
3194 rejected = CATEGORY_MASK_ISO;
3195 break;
3196 }
3197 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3198 & CODING_ISO_FLAG_LATIN_EXTRA)
3199 found |= CATEGORY_MASK_ISO_8_1;
3200 else
3201 rejected |= CATEGORY_MASK_ISO_8_1;
3202 rejected |= CATEGORY_MASK_ISO_8_2;
3203 break;
3204 3171
3205 default: 3172 default:
3206 if (c < 0) 3173 if (c < 0)
@@ -3251,6 +3218,20 @@ detect_coding_iso_2022 (struct coding_system *coding,
3251 } 3218 }
3252 break; 3219 break;
3253 } 3220 }
3221 check_extra_latin:
3222 if (! VECTORP (Vlatin_extra_code_table)
3223 || NILP (AREF (Vlatin_extra_code_table, c)))
3224 {
3225 rejected = CATEGORY_MASK_ISO;
3226 break;
3227 }
3228 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3229 & CODING_ISO_FLAG_LATIN_EXTRA)
3230 found |= CATEGORY_MASK_ISO_8_1;
3231 else
3232 rejected |= CATEGORY_MASK_ISO_8_1;
3233 rejected |= CATEGORY_MASK_ISO_8_2;
3234 break;
3254 } 3235 }
3255 } 3236 }
3256 detect_info->rejected |= CATEGORY_MASK_ISO; 3237 detect_info->rejected |= CATEGORY_MASK_ISO;
@@ -3390,8 +3371,6 @@ detect_coding_iso_2022 (struct coding_system *coding,
3390 3371
3391/* Finish the current composition as invalid. */ 3372/* Finish the current composition as invalid. */
3392 3373
3393static int finish_composition (int *, struct composition_status *);
3394
3395static int 3374static int
3396finish_composition (int *charbuf, struct composition_status *cmp_status) 3375finish_composition (int *charbuf, struct composition_status *cmp_status)
3397{ 3376{
@@ -3541,7 +3520,7 @@ decode_coding_iso_2022 (struct coding_system *coding)
3541 int *charbuf_end 3520 int *charbuf_end
3542 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3); 3521 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
3543 ptrdiff_t consumed_chars = 0, consumed_chars_base; 3522 ptrdiff_t consumed_chars = 0, consumed_chars_base;
3544 int multibytep = coding->src_multibyte; 3523 bool multibytep = coding->src_multibyte;
3545 /* Charsets invoked to graphic plane 0 and 1 respectively. */ 3524 /* Charsets invoked to graphic plane 0 and 1 respectively. */
3546 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0); 3525 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3547 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1); 3526 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
@@ -3553,8 +3532,8 @@ decode_coding_iso_2022 (struct coding_system *coding)
3553 ptrdiff_t char_offset = coding->produced_char; 3532 ptrdiff_t char_offset = coding->produced_char;
3554 ptrdiff_t last_offset = char_offset; 3533 ptrdiff_t last_offset = char_offset;
3555 int last_id = charset_ascii; 3534 int last_id = charset_ascii;
3556 int eol_dos = 3535 bool eol_dos
3557 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 3536 = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3558 int byte_after_cr = -1; 3537 int byte_after_cr = -1;
3559 int i; 3538 int i;
3560 3539
@@ -3564,7 +3543,7 @@ decode_coding_iso_2022 (struct coding_system *coding)
3564 if (cmp_status->state != COMPOSING_NO) 3543 if (cmp_status->state != COMPOSING_NO)
3565 { 3544 {
3566 if (charbuf_end - charbuf < cmp_status->length) 3545 if (charbuf_end - charbuf < cmp_status->length)
3567 abort (); 3546 emacs_abort ();
3568 for (i = 0; i < cmp_status->length; i++) 3547 for (i = 0; i < cmp_status->length; i++)
3569 *charbuf++ = cmp_status->carryover[i]; 3548 *charbuf++ = cmp_status->carryover[i];
3570 coding->annotated = 1; 3549 coding->annotated = 1;
@@ -3795,7 +3774,10 @@ decode_coding_iso_2022 (struct coding_system *coding)
3795 else 3774 else
3796 charset = CHARSET_FROM_ID (charset_id_2); 3775 charset = CHARSET_FROM_ID (charset_id_2);
3797 ONE_MORE_BYTE (c1); 3776 ONE_MORE_BYTE (c1);
3798 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)) 3777 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)
3778 || (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3779 && ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LEVEL_4)
3780 ? c1 >= 0x80 : c1 < 0x80)))
3799 goto invalid_code; 3781 goto invalid_code;
3800 break; 3782 break;
3801 3783
@@ -3809,7 +3791,10 @@ decode_coding_iso_2022 (struct coding_system *coding)
3809 else 3791 else
3810 charset = CHARSET_FROM_ID (charset_id_3); 3792 charset = CHARSET_FROM_ID (charset_id_3);
3811 ONE_MORE_BYTE (c1); 3793 ONE_MORE_BYTE (c1);
3812 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)) 3794 if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)
3795 || (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3796 && ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LEVEL_4)
3797 ? c1 >= 0x80 : c1 < 0x80)))
3813 goto invalid_code; 3798 goto invalid_code;
3814 break; 3799 break;
3815 3800
@@ -3946,7 +3931,7 @@ decode_coding_iso_2022 (struct coding_system *coding)
3946 break; 3931 break;
3947 3932
3948 default: 3933 default:
3949 abort (); 3934 emacs_abort ();
3950 } 3935 }
3951 3936
3952 if (cmp_status->state == COMPOSING_NO 3937 if (cmp_status->state == COMPOSING_NO
@@ -4021,6 +4006,14 @@ decode_coding_iso_2022 (struct coding_system *coding)
4021 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); 4006 *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4022 char_offset++; 4007 char_offset++;
4023 coding->errors++; 4008 coding->errors++;
4009 /* Reset the invocation and designation status to the safest
4010 one; i.e. designate ASCII to the graphic register 0, and
4011 invoke that register to the graphic plane 0. This typically
4012 helps the case that an designation sequence for ASCII "ESC (
4013 B" is somehow broken (e.g. broken by a newline). */
4014 CODING_ISO_INVOCATION (coding, 0) = 0;
4015 CODING_ISO_DESIGNATION (coding, 0) = charset_ascii;
4016 charset_id_0 = charset_ascii;
4024 continue; 4017 continue;
4025 4018
4026 break_loop: 4019 break_loop:
@@ -4282,7 +4275,7 @@ encode_invocation_designation (struct charset *charset,
4282 struct coding_system *coding, 4275 struct coding_system *coding,
4283 unsigned char *dst, ptrdiff_t *p_nchars) 4276 unsigned char *dst, ptrdiff_t *p_nchars)
4284{ 4277{
4285 int multibytep = coding->dst_multibyte; 4278 bool multibytep = coding->dst_multibyte;
4286 ptrdiff_t produced_chars = *p_nchars; 4279 ptrdiff_t produced_chars = *p_nchars;
4287 int reg; /* graphic register number */ 4280 int reg; /* graphic register number */
4288 int id = CHARSET_ID (charset); 4281 int id = CHARSET_ID (charset);
@@ -4380,7 +4373,7 @@ encode_designation_at_bol (struct coding_system *coding,
4380 int r[4]; 4373 int r[4];
4381 int c, found = 0, reg; 4374 int c, found = 0, reg;
4382 ptrdiff_t produced_chars = 0; 4375 ptrdiff_t produced_chars = 0;
4383 int multibytep = coding->dst_multibyte; 4376 bool multibytep = coding->dst_multibyte;
4384 Lisp_Object attrs; 4377 Lisp_Object attrs;
4385 Lisp_Object charset_list; 4378 Lisp_Object charset_list;
4386 4379
@@ -4422,21 +4415,21 @@ encode_designation_at_bol (struct coding_system *coding,
4422 4415
4423/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */ 4416/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
4424 4417
4425static int 4418static bool
4426encode_coding_iso_2022 (struct coding_system *coding) 4419encode_coding_iso_2022 (struct coding_system *coding)
4427{ 4420{
4428 int multibytep = coding->dst_multibyte; 4421 bool multibytep = coding->dst_multibyte;
4429 int *charbuf = coding->charbuf; 4422 int *charbuf = coding->charbuf;
4430 int *charbuf_end = charbuf + coding->charbuf_used; 4423 int *charbuf_end = charbuf + coding->charbuf_used;
4431 unsigned char *dst = coding->destination + coding->produced; 4424 unsigned char *dst = coding->destination + coding->produced;
4432 unsigned char *dst_end = coding->destination + coding->dst_bytes; 4425 unsigned char *dst_end = coding->destination + coding->dst_bytes;
4433 int safe_room = 16; 4426 int safe_room = 16;
4434 int bol_designation 4427 bool bol_designation
4435 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL 4428 = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4436 && CODING_ISO_BOL (coding)); 4429 && CODING_ISO_BOL (coding));
4437 ptrdiff_t produced_chars = 0; 4430 ptrdiff_t produced_chars = 0;
4438 Lisp_Object attrs, eol_type, charset_list; 4431 Lisp_Object attrs, eol_type, charset_list;
4439 int ascii_compatible; 4432 bool ascii_compatible;
4440 int c; 4433 int c;
4441 int preferred_charset_id = -1; 4434 int preferred_charset_id = -1;
4442 4435
@@ -4501,7 +4494,7 @@ encode_coding_iso_2022 (struct coding_system *coding)
4501 preferred_charset_id = -1; 4494 preferred_charset_id = -1;
4502 break; 4495 break;
4503 default: 4496 default:
4504 abort (); 4497 emacs_abort ();
4505 } 4498 }
4506 charbuf += -c - 1; 4499 charbuf += -c - 1;
4507 continue; 4500 continue;
@@ -4523,8 +4516,9 @@ encode_coding_iso_2022 (struct coding_system *coding)
4523 CODING_ISO_DESIGNATION (coding, i) 4516 CODING_ISO_DESIGNATION (coding, i)
4524 = CODING_ISO_INITIAL (coding, i); 4517 = CODING_ISO_INITIAL (coding, i);
4525 } 4518 }
4526 bol_designation 4519 bol_designation = ((CODING_ISO_FLAGS (coding)
4527 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL; 4520 & CODING_ISO_FLAG_DESIGNATE_AT_BOL)
4521 != 0);
4528 } 4522 }
4529 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL) 4523 else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4530 ENCODE_RESET_PLANE_AND_REGISTER (); 4524 ENCODE_RESET_PLANE_AND_REGISTER ();
@@ -4551,7 +4545,7 @@ encode_coding_iso_2022 (struct coding_system *coding)
4551 4545
4552 if (preferred_charset_id >= 0) 4546 if (preferred_charset_id >= 0)
4553 { 4547 {
4554 int result; 4548 bool result;
4555 4549
4556 charset = CHARSET_FROM_ID (preferred_charset_id); 4550 charset = CHARSET_FROM_ID (preferred_charset_id);
4557 CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result); 4551 CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
@@ -4631,16 +4625,15 @@ encode_coding_iso_2022 (struct coding_system *coding)
4631 */ 4625 */
4632 4626
4633/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 4627/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4634 Check if a text is encoded in SJIS. If it is, return 4628 Return true if a text is encoded in SJIS. */
4635 CATEGORY_MASK_SJIS, else return 0. */
4636 4629
4637static int 4630static bool
4638detect_coding_sjis (struct coding_system *coding, 4631detect_coding_sjis (struct coding_system *coding,
4639 struct coding_detection_info *detect_info) 4632 struct coding_detection_info *detect_info)
4640{ 4633{
4641 const unsigned char *src = coding->source, *src_base; 4634 const unsigned char *src = coding->source, *src_base;
4642 const unsigned char *src_end = coding->source + coding->src_bytes; 4635 const unsigned char *src_end = coding->source + coding->src_bytes;
4643 int multibytep = coding->src_multibyte; 4636 bool multibytep = coding->src_multibyte;
4644 ptrdiff_t consumed_chars = 0; 4637 ptrdiff_t consumed_chars = 0;
4645 int found = 0; 4638 int found = 0;
4646 int c; 4639 int c;
@@ -4688,16 +4681,15 @@ detect_coding_sjis (struct coding_system *coding,
4688} 4681}
4689 4682
4690/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 4683/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4691 Check if a text is encoded in BIG5. If it is, return 4684 Return true if a text is encoded in BIG5. */
4692 CATEGORY_MASK_BIG5, else return 0. */
4693 4685
4694static int 4686static bool
4695detect_coding_big5 (struct coding_system *coding, 4687detect_coding_big5 (struct coding_system *coding,
4696 struct coding_detection_info *detect_info) 4688 struct coding_detection_info *detect_info)
4697{ 4689{
4698 const unsigned char *src = coding->source, *src_base; 4690 const unsigned char *src = coding->source, *src_base;
4699 const unsigned char *src_end = coding->source + coding->src_bytes; 4691 const unsigned char *src_end = coding->source + coding->src_bytes;
4700 int multibytep = coding->src_multibyte; 4692 bool multibytep = coding->src_multibyte;
4701 ptrdiff_t consumed_chars = 0; 4693 ptrdiff_t consumed_chars = 0;
4702 int found = 0; 4694 int found = 0;
4703 int c; 4695 int c;
@@ -4735,8 +4727,7 @@ detect_coding_big5 (struct coding_system *coding,
4735 return 1; 4727 return 1;
4736} 4728}
4737 4729
4738/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". 4730/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
4739 If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
4740 4731
4741static void 4732static void
4742decode_coding_sjis (struct coding_system *coding) 4733decode_coding_sjis (struct coding_system *coding)
@@ -4750,15 +4741,15 @@ decode_coding_sjis (struct coding_system *coding)
4750 int *charbuf_end 4741 int *charbuf_end
4751 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); 4742 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4752 ptrdiff_t consumed_chars = 0, consumed_chars_base; 4743 ptrdiff_t consumed_chars = 0, consumed_chars_base;
4753 int multibytep = coding->src_multibyte; 4744 bool multibytep = coding->src_multibyte;
4754 struct charset *charset_roman, *charset_kanji, *charset_kana; 4745 struct charset *charset_roman, *charset_kanji, *charset_kana;
4755 struct charset *charset_kanji2; 4746 struct charset *charset_kanji2;
4756 Lisp_Object attrs, charset_list, val; 4747 Lisp_Object attrs, charset_list, val;
4757 ptrdiff_t char_offset = coding->produced_char; 4748 ptrdiff_t char_offset = coding->produced_char;
4758 ptrdiff_t last_offset = char_offset; 4749 ptrdiff_t last_offset = char_offset;
4759 int last_id = charset_ascii; 4750 int last_id = charset_ascii;
4760 int eol_dos = 4751 bool eol_dos
4761 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 4752 = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4762 int byte_after_cr = -1; 4753 int byte_after_cr = -1;
4763 4754
4764 CODING_GET_INFO (coding, attrs, charset_list); 4755 CODING_GET_INFO (coding, attrs, charset_list);
@@ -4868,14 +4859,14 @@ decode_coding_big5 (struct coding_system *coding)
4868 int *charbuf_end 4859 int *charbuf_end
4869 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); 4860 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4870 ptrdiff_t consumed_chars = 0, consumed_chars_base; 4861 ptrdiff_t consumed_chars = 0, consumed_chars_base;
4871 int multibytep = coding->src_multibyte; 4862 bool multibytep = coding->src_multibyte;
4872 struct charset *charset_roman, *charset_big5; 4863 struct charset *charset_roman, *charset_big5;
4873 Lisp_Object attrs, charset_list, val; 4864 Lisp_Object attrs, charset_list, val;
4874 ptrdiff_t char_offset = coding->produced_char; 4865 ptrdiff_t char_offset = coding->produced_char;
4875 ptrdiff_t last_offset = char_offset; 4866 ptrdiff_t last_offset = char_offset;
4876 int last_id = charset_ascii; 4867 int last_id = charset_ascii;
4877 int eol_dos = 4868 bool eol_dos
4878 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 4869 = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4879 int byte_after_cr = -1; 4870 int byte_after_cr = -1;
4880 4871
4881 CODING_GET_INFO (coding, attrs, charset_list); 4872 CODING_GET_INFO (coding, attrs, charset_list);
@@ -4957,13 +4948,12 @@ decode_coding_big5 (struct coding_system *coding)
4957 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We 4948 `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
4958 are sure that all these charsets are registered as official charset 4949 are sure that all these charsets are registered as official charset
4959 (i.e. do not have extended leading-codes). Characters of other 4950 (i.e. do not have extended leading-codes). Characters of other
4960 charsets are produced without any encoding. If SJIS_P is 1, encode 4951 charsets are produced without any encoding. */
4961 SJIS text, else encode BIG5 text. */
4962 4952
4963static int 4953static bool
4964encode_coding_sjis (struct coding_system *coding) 4954encode_coding_sjis (struct coding_system *coding)
4965{ 4955{
4966 int multibytep = coding->dst_multibyte; 4956 bool multibytep = coding->dst_multibyte;
4967 int *charbuf = coding->charbuf; 4957 int *charbuf = coding->charbuf;
4968 int *charbuf_end = charbuf + coding->charbuf_used; 4958 int *charbuf_end = charbuf + coding->charbuf_used;
4969 unsigned char *dst = coding->destination + coding->produced; 4959 unsigned char *dst = coding->destination + coding->produced;
@@ -4971,7 +4961,7 @@ encode_coding_sjis (struct coding_system *coding)
4971 int safe_room = 4; 4961 int safe_room = 4;
4972 ptrdiff_t produced_chars = 0; 4962 ptrdiff_t produced_chars = 0;
4973 Lisp_Object attrs, charset_list, val; 4963 Lisp_Object attrs, charset_list, val;
4974 int ascii_compatible; 4964 bool ascii_compatible;
4975 struct charset *charset_kanji, *charset_kana; 4965 struct charset *charset_kanji, *charset_kana;
4976 struct charset *charset_kanji2; 4966 struct charset *charset_kanji2;
4977 int c; 4967 int c;
@@ -5018,7 +5008,7 @@ encode_coding_sjis (struct coding_system *coding)
5018 } 5008 }
5019 } 5009 }
5020 if (code == CHARSET_INVALID_CODE (charset)) 5010 if (code == CHARSET_INVALID_CODE (charset))
5021 abort (); 5011 emacs_abort ();
5022 if (charset == charset_kanji) 5012 if (charset == charset_kanji)
5023 { 5013 {
5024 int c1, c2; 5014 int c1, c2;
@@ -5054,10 +5044,10 @@ encode_coding_sjis (struct coding_system *coding)
5054 return 0; 5044 return 0;
5055} 5045}
5056 5046
5057static int 5047static bool
5058encode_coding_big5 (struct coding_system *coding) 5048encode_coding_big5 (struct coding_system *coding)
5059{ 5049{
5060 int multibytep = coding->dst_multibyte; 5050 bool multibytep = coding->dst_multibyte;
5061 int *charbuf = coding->charbuf; 5051 int *charbuf = coding->charbuf;
5062 int *charbuf_end = charbuf + coding->charbuf_used; 5052 int *charbuf_end = charbuf + coding->charbuf_used;
5063 unsigned char *dst = coding->destination + coding->produced; 5053 unsigned char *dst = coding->destination + coding->produced;
@@ -5065,7 +5055,7 @@ encode_coding_big5 (struct coding_system *coding)
5065 int safe_room = 4; 5055 int safe_room = 4;
5066 ptrdiff_t produced_chars = 0; 5056 ptrdiff_t produced_chars = 0;
5067 Lisp_Object attrs, charset_list, val; 5057 Lisp_Object attrs, charset_list, val;
5068 int ascii_compatible; 5058 bool ascii_compatible;
5069 struct charset *charset_big5; 5059 struct charset *charset_big5;
5070 int c; 5060 int c;
5071 5061
@@ -5108,7 +5098,7 @@ encode_coding_big5 (struct coding_system *coding)
5108 } 5098 }
5109 } 5099 }
5110 if (code == CHARSET_INVALID_CODE (charset)) 5100 if (code == CHARSET_INVALID_CODE (charset))
5111 abort (); 5101 emacs_abort ();
5112 if (charset == charset_big5) 5102 if (charset == charset_big5)
5113 { 5103 {
5114 int c1, c2; 5104 int c1, c2;
@@ -5130,17 +5120,16 @@ encode_coding_big5 (struct coding_system *coding)
5130/*** 10. CCL handlers ***/ 5120/*** 10. CCL handlers ***/
5131 5121
5132/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 5122/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5133 Check if a text is encoded in a coding system of which 5123 Return true if a text is encoded in a coding system of which
5134 encoder/decoder are written in CCL program. If it is, return 5124 encoder/decoder are written in CCL program. */
5135 CATEGORY_MASK_CCL, else return 0. */
5136 5125
5137static int 5126static bool
5138detect_coding_ccl (struct coding_system *coding, 5127detect_coding_ccl (struct coding_system *coding,
5139 struct coding_detection_info *detect_info) 5128 struct coding_detection_info *detect_info)
5140{ 5129{
5141 const unsigned char *src = coding->source, *src_base; 5130 const unsigned char *src = coding->source, *src_base;
5142 const unsigned char *src_end = coding->source + coding->src_bytes; 5131 const unsigned char *src_end = coding->source + coding->src_bytes;
5143 int multibytep = coding->src_multibyte; 5132 bool multibytep = coding->src_multibyte;
5144 ptrdiff_t consumed_chars = 0; 5133 ptrdiff_t consumed_chars = 0;
5145 int found = 0; 5134 int found = 0;
5146 unsigned char *valids; 5135 unsigned char *valids;
@@ -5182,7 +5171,7 @@ decode_coding_ccl (struct coding_system *coding)
5182 int *charbuf = coding->charbuf + coding->charbuf_used; 5171 int *charbuf = coding->charbuf + coding->charbuf_used;
5183 int *charbuf_end = coding->charbuf + coding->charbuf_size; 5172 int *charbuf_end = coding->charbuf + coding->charbuf_size;
5184 ptrdiff_t consumed_chars = 0; 5173 ptrdiff_t consumed_chars = 0;
5185 int multibytep = coding->src_multibyte; 5174 bool multibytep = coding->src_multibyte;
5186 struct ccl_program *ccl = &coding->spec.ccl->ccl; 5175 struct ccl_program *ccl = &coding->spec.ccl->ccl;
5187 int source_charbuf[1024]; 5176 int source_charbuf[1024];
5188 int source_byteidx[1025]; 5177 int source_byteidx[1025];
@@ -5193,6 +5182,7 @@ decode_coding_ccl (struct coding_system *coding)
5193 while (1) 5182 while (1)
5194 { 5183 {
5195 const unsigned char *p = src; 5184 const unsigned char *p = src;
5185 ptrdiff_t offset;
5196 int i = 0; 5186 int i = 0;
5197 5187
5198 if (multibytep) 5188 if (multibytep)
@@ -5210,8 +5200,17 @@ decode_coding_ccl (struct coding_system *coding)
5210 5200
5211 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK) 5201 if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
5212 ccl->last_block = 1; 5202 ccl->last_block = 1;
5203 /* As ccl_driver calls DECODE_CHAR, buffer may be relocated. */
5204 charset_map_loaded = 0;
5213 ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf, 5205 ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
5214 charset_list); 5206 charset_list);
5207 if (charset_map_loaded
5208 && (offset = coding_change_source (coding)))
5209 {
5210 p += offset;
5211 src += offset;
5212 src_end += offset;
5213 }
5215 charbuf += ccl->produced; 5214 charbuf += ccl->produced;
5216 if (multibytep) 5215 if (multibytep)
5217 src += source_byteidx[ccl->consumed]; 5216 src += source_byteidx[ccl->consumed];
@@ -5243,11 +5242,11 @@ decode_coding_ccl (struct coding_system *coding)
5243 coding->charbuf_used = charbuf - coding->charbuf; 5242 coding->charbuf_used = charbuf - coding->charbuf;
5244} 5243}
5245 5244
5246static int 5245static bool
5247encode_coding_ccl (struct coding_system *coding) 5246encode_coding_ccl (struct coding_system *coding)
5248{ 5247{
5249 struct ccl_program *ccl = &coding->spec.ccl->ccl; 5248 struct ccl_program *ccl = &coding->spec.ccl->ccl;
5250 int multibytep = coding->dst_multibyte; 5249 bool multibytep = coding->dst_multibyte;
5251 int *charbuf = coding->charbuf; 5250 int *charbuf = coding->charbuf;
5252 int *charbuf_end = charbuf + coding->charbuf_used; 5251 int *charbuf_end = charbuf + coding->charbuf_used;
5253 unsigned char *dst = coding->destination + coding->produced; 5252 unsigned char *dst = coding->destination + coding->produced;
@@ -5264,8 +5263,15 @@ encode_coding_ccl (struct coding_system *coding)
5264 5263
5265 do 5264 do
5266 { 5265 {
5266 ptrdiff_t offset;
5267
5268 /* As ccl_driver calls DECODE_CHAR, buffer may be relocated. */
5269 charset_map_loaded = 0;
5267 ccl_driver (ccl, charbuf, destination_charbuf, 5270 ccl_driver (ccl, charbuf, destination_charbuf,
5268 charbuf_end - charbuf, 1024, charset_list); 5271 charbuf_end - charbuf, 1024, charset_list);
5272 if (charset_map_loaded
5273 && (offset = coding_change_destination (coding)))
5274 dst += offset;
5269 if (multibytep) 5275 if (multibytep)
5270 { 5276 {
5271 ASSURE_DESTINATION (ccl->produced * 2); 5277 ASSURE_DESTINATION (ccl->produced * 2);
@@ -5308,7 +5314,6 @@ encode_coding_ccl (struct coding_system *coding)
5308 return 0; 5314 return 0;
5309} 5315}
5310 5316
5311
5312 5317
5313/*** 10, 11. no-conversion handlers ***/ 5318/*** 10, 11. no-conversion handlers ***/
5314 5319
@@ -5317,8 +5322,8 @@ encode_coding_ccl (struct coding_system *coding)
5317static void 5322static void
5318decode_coding_raw_text (struct coding_system *coding) 5323decode_coding_raw_text (struct coding_system *coding)
5319{ 5324{
5320 int eol_dos = 5325 bool eol_dos
5321 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 5326 = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5322 5327
5323 coding->chars_at_source = 1; 5328 coding->chars_at_source = 1;
5324 coding->consumed_char = coding->src_chars; 5329 coding->consumed_char = coding->src_chars;
@@ -5333,10 +5338,10 @@ decode_coding_raw_text (struct coding_system *coding)
5333 record_conversion_result (coding, CODING_RESULT_SUCCESS); 5338 record_conversion_result (coding, CODING_RESULT_SUCCESS);
5334} 5339}
5335 5340
5336static int 5341static bool
5337encode_coding_raw_text (struct coding_system *coding) 5342encode_coding_raw_text (struct coding_system *coding)
5338{ 5343{
5339 int multibytep = coding->dst_multibyte; 5344 bool multibytep = coding->dst_multibyte;
5340 int *charbuf = coding->charbuf; 5345 int *charbuf = coding->charbuf;
5341 int *charbuf_end = coding->charbuf + coding->charbuf_used; 5346 int *charbuf_end = coding->charbuf + coding->charbuf_used;
5342 unsigned char *dst = coding->destination + coding->produced; 5347 unsigned char *dst = coding->destination + coding->produced;
@@ -5414,21 +5419,20 @@ encode_coding_raw_text (struct coding_system *coding)
5414} 5419}
5415 5420
5416/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". 5421/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5417 Check if a text is encoded in a charset-based coding system. If it 5422 Return true if a text is encoded in a charset-based coding system. */
5418 is, return 1, else return 0. */
5419 5423
5420static int 5424static bool
5421detect_coding_charset (struct coding_system *coding, 5425detect_coding_charset (struct coding_system *coding,
5422 struct coding_detection_info *detect_info) 5426 struct coding_detection_info *detect_info)
5423{ 5427{
5424 const unsigned char *src = coding->source, *src_base; 5428 const unsigned char *src = coding->source, *src_base;
5425 const unsigned char *src_end = coding->source + coding->src_bytes; 5429 const unsigned char *src_end = coding->source + coding->src_bytes;
5426 int multibytep = coding->src_multibyte; 5430 bool multibytep = coding->src_multibyte;
5427 ptrdiff_t consumed_chars = 0; 5431 ptrdiff_t consumed_chars = 0;
5428 Lisp_Object attrs, valids, name; 5432 Lisp_Object attrs, valids, name;
5429 int found = 0; 5433 int found = 0;
5430 ptrdiff_t head_ascii = coding->head_ascii; 5434 ptrdiff_t head_ascii = coding->head_ascii;
5431 int check_latin_extra = 0; 5435 bool check_latin_extra = 0;
5432 5436
5433 detect_info->checked |= CATEGORY_MASK_CHARSET; 5437 detect_info->checked |= CATEGORY_MASK_CHARSET;
5434 5438
@@ -5464,7 +5468,7 @@ detect_coding_charset (struct coding_system *coding,
5464 if (c < 0xA0 5468 if (c < 0xA0
5465 && check_latin_extra 5469 && check_latin_extra
5466 && (!VECTORP (Vlatin_extra_code_table) 5470 && (!VECTORP (Vlatin_extra_code_table)
5467 || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))) 5471 || NILP (AREF (Vlatin_extra_code_table, c))))
5468 break; 5472 break;
5469 found = CATEGORY_MASK_CHARSET; 5473 found = CATEGORY_MASK_CHARSET;
5470 } 5474 }
@@ -5532,14 +5536,14 @@ decode_coding_charset (struct coding_system *coding)
5532 int *charbuf_end 5536 int *charbuf_end
5533 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); 5537 = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
5534 ptrdiff_t consumed_chars = 0, consumed_chars_base; 5538 ptrdiff_t consumed_chars = 0, consumed_chars_base;
5535 int multibytep = coding->src_multibyte; 5539 bool multibytep = coding->src_multibyte;
5536 Lisp_Object attrs = CODING_ID_ATTRS (coding->id); 5540 Lisp_Object attrs = CODING_ID_ATTRS (coding->id);
5537 Lisp_Object valids; 5541 Lisp_Object valids;
5538 ptrdiff_t char_offset = coding->produced_char; 5542 ptrdiff_t char_offset = coding->produced_char;
5539 ptrdiff_t last_offset = char_offset; 5543 ptrdiff_t last_offset = char_offset;
5540 int last_id = charset_ascii; 5544 int last_id = charset_ascii;
5541 int eol_dos = 5545 bool eol_dos
5542 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 5546 = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5543 int byte_after_cr = -1; 5547 int byte_after_cr = -1;
5544 5548
5545 valids = AREF (attrs, coding_attr_charset_valids); 5549 valids = AREF (attrs, coding_attr_charset_valids);
@@ -5648,10 +5652,10 @@ decode_coding_charset (struct coding_system *coding)
5648 coding->charbuf_used = charbuf - coding->charbuf; 5652 coding->charbuf_used = charbuf - coding->charbuf;
5649} 5653}
5650 5654
5651static int 5655static bool
5652encode_coding_charset (struct coding_system *coding) 5656encode_coding_charset (struct coding_system *coding)
5653{ 5657{
5654 int multibytep = coding->dst_multibyte; 5658 bool multibytep = coding->dst_multibyte;
5655 int *charbuf = coding->charbuf; 5659 int *charbuf = coding->charbuf;
5656 int *charbuf_end = charbuf + coding->charbuf_used; 5660 int *charbuf_end = charbuf + coding->charbuf_used;
5657 unsigned char *dst = coding->destination + coding->produced; 5661 unsigned char *dst = coding->destination + coding->produced;
@@ -5659,7 +5663,7 @@ encode_coding_charset (struct coding_system *coding)
5659 int safe_room = MAX_MULTIBYTE_LENGTH; 5663 int safe_room = MAX_MULTIBYTE_LENGTH;
5660 ptrdiff_t produced_chars = 0; 5664 ptrdiff_t produced_chars = 0;
5661 Lisp_Object attrs, charset_list; 5665 Lisp_Object attrs, charset_list;
5662 int ascii_compatible; 5666 bool ascii_compatible;
5663 int c; 5667 int c;
5664 5668
5665 CODING_GET_INFO (coding, attrs, charset_list); 5669 CODING_GET_INFO (coding, attrs, charset_list);
@@ -5737,7 +5741,6 @@ setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
5737 eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id); 5741 eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
5738 5742
5739 coding->mode = 0; 5743 coding->mode = 0;
5740 coding->head_ascii = -1;
5741 if (VECTORP (eol_type)) 5744 if (VECTORP (eol_type))
5742 coding->common_flags = (CODING_REQUIRE_DECODING_MASK 5745 coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5743 | CODING_REQUIRE_DETECTION_MASK); 5746 | CODING_REQUIRE_DETECTION_MASK);
@@ -5766,6 +5769,14 @@ setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
5766 coding->decoder = decode_coding_raw_text; 5769 coding->decoder = decode_coding_raw_text;
5767 coding->encoder = encode_coding_raw_text; 5770 coding->encoder = encode_coding_raw_text;
5768 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK; 5771 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5772 coding->spec.undecided.inhibit_nbd
5773 = (encode_inhibit_flag
5774 (AREF (attrs, coding_attr_undecided_inhibit_null_byte_detection)));
5775 coding->spec.undecided.inhibit_ied
5776 = (encode_inhibit_flag
5777 (AREF (attrs, coding_attr_undecided_inhibit_iso_escape_detection)));
5778 coding->spec.undecided.prefer_utf_8
5779 = ! NILP (AREF (attrs, coding_attr_undecided_prefer_utf_8));
5769 } 5780 }
5770 else if (EQ (coding_type, Qiso_2022)) 5781 else if (EQ (coding_type, Qiso_2022))
5771 { 5782 {
@@ -5865,7 +5876,6 @@ setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
5865 coding->encoder = encode_coding_emacs_mule; 5876 coding->encoder = encode_coding_emacs_mule;
5866 coding->common_flags 5877 coding->common_flags
5867 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK); 5878 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5868 coding->spec.emacs_mule.full_support = 1;
5869 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full)) 5879 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5870 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list)) 5880 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5871 { 5881 {
@@ -5883,7 +5893,6 @@ setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
5883 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0); 5893 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5884 coding->max_charset_id = max_charset_id; 5894 coding->max_charset_id = max_charset_id;
5885 coding->safe_charsets = SDATA (safe_charsets); 5895 coding->safe_charsets = SDATA (safe_charsets);
5886 coding->spec.emacs_mule.full_support = 1;
5887 } 5896 }
5888 coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO; 5897 coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
5889 coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO; 5898 coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
@@ -6191,10 +6200,181 @@ complement_process_encoding_system (Lisp_Object coding_system)
6191 6200
6192*/ 6201*/
6193 6202
6194#define EOL_SEEN_NONE 0 6203static Lisp_Object adjust_coding_eol_type (struct coding_system *coding,
6195#define EOL_SEEN_LF 1 6204 int eol_seen);
6196#define EOL_SEEN_CR 2 6205
6197#define EOL_SEEN_CRLF 4 6206
6207/* Return the number of ASCII characters at the head of the source.
6208 By side effects, set coding->head_ascii and update
6209 coding->eol_seen. The value of coding->eol_seen is "logical or" of
6210 EOL_SEEN_LF, EOL_SEEN_CR, and EOL_SEEN_CRLF, but the value is
6211 reliable only when all the source bytes are ASCII. */
6212
6213static int
6214check_ascii (struct coding_system *coding)
6215{
6216 const unsigned char *src, *end;
6217 Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id);
6218 int eol_seen = coding->eol_seen;
6219
6220 coding_set_source (coding);
6221 src = coding->source;
6222 end = src + coding->src_bytes;
6223
6224 if (inhibit_eol_conversion
6225 || SYMBOLP (eol_type))
6226 {
6227 /* We don't have to check EOL format. */
6228 while (src < end && !( *src & 0x80))
6229 {
6230 if (*src++ == '\n')
6231 eol_seen |= EOL_SEEN_LF;
6232 }
6233 }
6234 else
6235 {
6236 end--; /* We look ahead one byte for "CR LF". */
6237 while (src < end)
6238 {
6239 int c = *src;
6240
6241 if (c & 0x80)
6242 break;
6243 src++;
6244 if (c == '\r')
6245 {
6246 if (*src == '\n')
6247 {
6248 eol_seen |= EOL_SEEN_CRLF;
6249 src++;
6250 }
6251 else
6252 eol_seen |= EOL_SEEN_CR;
6253 }
6254 else if (c == '\n')
6255 eol_seen |= EOL_SEEN_LF;
6256 }
6257 if (src == end)
6258 {
6259 int c = *src;
6260
6261 /* All bytes but the last one C are ASCII. */
6262 if (! (c & 0x80))
6263 {
6264 if (c == '\r')
6265 eol_seen |= EOL_SEEN_CR;
6266 else if (c == '\n')
6267 eol_seen |= EOL_SEEN_LF;
6268 src++;
6269 }
6270 }
6271 }
6272 coding->head_ascii = src - coding->source;
6273 coding->eol_seen = eol_seen;
6274 return (coding->head_ascii);
6275}
6276
6277
6278/* Return the number of characters at the source if all the bytes are
6279 valid UTF-8 (of Unicode range). Otherwise, return -1. By side
6280 effects, update coding->eol_seen. The value of coding->eol_seen is
6281 "logical or" of EOL_SEEN_LF, EOL_SEEN_CR, and EOL_SEEN_CRLF, but
6282 the value is reliable only when all the source bytes are valid
6283 UTF-8. */
6284
6285static int
6286check_utf_8 (struct coding_system *coding)
6287{
6288 const unsigned char *src, *end;
6289 int eol_seen;
6290 int nchars = coding->head_ascii;
6291
6292 if (coding->head_ascii < 0)
6293 check_ascii (coding);
6294 else
6295 coding_set_source (coding);
6296 src = coding->source + coding->head_ascii;
6297 /* We look ahead one byte for CR LF. */
6298 end = coding->source + coding->src_bytes - 1;
6299 eol_seen = coding->eol_seen;
6300 while (src < end)
6301 {
6302 int c = *src;
6303
6304 if (UTF_8_1_OCTET_P (*src))
6305 {
6306 src++;
6307 if (c < 0x20)
6308 {
6309 if (c == '\r')
6310 {
6311 if (*src == '\n')
6312 {
6313 eol_seen |= EOL_SEEN_CRLF;
6314 src++;
6315 nchars++;
6316 }
6317 else
6318 eol_seen |= EOL_SEEN_CR;
6319 }
6320 else if (c == '\n')
6321 eol_seen |= EOL_SEEN_LF;
6322 }
6323 }
6324 else if (UTF_8_2_OCTET_LEADING_P (c))
6325 {
6326 if (c < 0xC2 /* overlong sequence */
6327 || src + 1 >= end
6328 || ! UTF_8_EXTRA_OCTET_P (src[1]))
6329 return -1;
6330 src += 2;
6331 }
6332 else if (UTF_8_3_OCTET_LEADING_P (c))
6333 {
6334 if (src + 2 >= end
6335 || ! (UTF_8_EXTRA_OCTET_P (src[1])
6336 && UTF_8_EXTRA_OCTET_P (src[2])))
6337 return -1;
6338 c = (((c & 0xF) << 12)
6339 | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F));
6340 if (c < 0x800 /* overlong sequence */
6341 || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
6342 return -1;
6343 src += 3;
6344 }
6345 else if (UTF_8_4_OCTET_LEADING_P (c))
6346 {
6347 if (src + 3 >= end
6348 || ! (UTF_8_EXTRA_OCTET_P (src[1])
6349 && UTF_8_EXTRA_OCTET_P (src[2])
6350 && UTF_8_EXTRA_OCTET_P (src[3])))
6351 return -1;
6352 c = (((c & 0x7) << 18) | ((src[1] & 0x3F) << 12)
6353 | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F));
6354 if (c < 0x10000 /* overlong sequence */
6355 || c >= 0x110000) /* non-Unicode character */
6356 return -1;
6357 src += 4;
6358 }
6359 else
6360 return -1;
6361 nchars++;
6362 }
6363
6364 if (src == end)
6365 {
6366 if (! UTF_8_1_OCTET_P (*src))
6367 return -1;
6368 nchars++;
6369 if (*src == '\r')
6370 eol_seen |= EOL_SEEN_CR;
6371 else if (*src == '\n')
6372 eol_seen |= EOL_SEEN_LF;
6373 }
6374 coding->eol_seen = eol_seen;
6375 return nchars;
6376}
6377
6198 6378
6199/* Detect how end-of-line of a text of length SRC_BYTES pointed by 6379/* Detect how end-of-line of a text of length SRC_BYTES pointed by
6200 SOURCE is encoded. If CATEGORY is one of 6380 SOURCE is encoded. If CATEGORY is one of
@@ -6216,11 +6396,9 @@ detect_eol (const unsigned char *source, ptrdiff_t src_bytes,
6216 6396
6217 if ((1 << category) & CATEGORY_MASK_UTF_16) 6397 if ((1 << category) & CATEGORY_MASK_UTF_16)
6218 { 6398 {
6219 int msb, lsb; 6399 bool msb = category == (coding_category_utf_16_le
6220 6400 | coding_category_utf_16_le_nosig);
6221 msb = category == (coding_category_utf_16_le 6401 bool lsb = !msb;
6222 | coding_category_utf_16_le_nosig);
6223 lsb = 1 - msb;
6224 6402
6225 while (src + 1 < src_end) 6403 while (src + 1 < src_end)
6226 { 6404 {
@@ -6309,6 +6487,9 @@ adjust_coding_eol_type (struct coding_system *coding, int eol_seen)
6309 Lisp_Object eol_type; 6487 Lisp_Object eol_type;
6310 6488
6311 eol_type = CODING_ID_EOL_TYPE (coding->id); 6489 eol_type = CODING_ID_EOL_TYPE (coding->id);
6490 if (! VECTORP (eol_type))
6491 /* Already adjusted. */
6492 return eol_type;
6312 if (eol_seen & EOL_SEEN_LF) 6493 if (eol_seen & EOL_SEEN_LF)
6313 { 6494 {
6314 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0)); 6495 coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
@@ -6335,23 +6516,31 @@ static void
6335detect_coding (struct coding_system *coding) 6516detect_coding (struct coding_system *coding)
6336{ 6517{
6337 const unsigned char *src, *src_end; 6518 const unsigned char *src, *src_end;
6338 int saved_mode = coding->mode; 6519 unsigned int saved_mode = coding->mode;
6520 Lisp_Object found = Qnil;
6521 Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id);
6339 6522
6340 coding->consumed = coding->consumed_char = 0; 6523 coding->consumed = coding->consumed_char = 0;
6341 coding->produced = coding->produced_char = 0; 6524 coding->produced = coding->produced_char = 0;
6342 coding_set_source (coding); 6525 coding_set_source (coding);
6343 6526
6344 src_end = coding->source + coding->src_bytes; 6527 src_end = coding->source + coding->src_bytes;
6345 coding->head_ascii = 0;
6346 6528
6529 coding->eol_seen = EOL_SEEN_NONE;
6347 /* If we have not yet decided the text encoding type, detect it 6530 /* If we have not yet decided the text encoding type, detect it
6348 now. */ 6531 now. */
6349 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided)) 6532 if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
6350 { 6533 {
6351 int c, i; 6534 int c, i;
6352 struct coding_detection_info detect_info; 6535 struct coding_detection_info detect_info;
6353 int null_byte_found = 0, eight_bit_found = 0; 6536 bool null_byte_found = 0, eight_bit_found = 0;
6537 bool inhibit_nbd = inhibit_flag (coding->spec.undecided.inhibit_nbd,
6538 inhibit_null_byte_detection);
6539 bool inhibit_ied = inhibit_flag (coding->spec.undecided.inhibit_ied,
6540 inhibit_iso_escape_detection);
6541 bool prefer_utf_8 = coding->spec.undecided.prefer_utf_8;
6354 6542
6543 coding->head_ascii = 0;
6355 detect_info.checked = detect_info.found = detect_info.rejected = 0; 6544 detect_info.checked = detect_info.found = detect_info.rejected = 0;
6356 for (src = coding->source; src < src_end; src++) 6545 for (src = coding->source; src < src_end; src++)
6357 { 6546 {
@@ -6365,7 +6554,7 @@ detect_coding (struct coding_system *coding)
6365 else if (c < 0x20) 6554 else if (c < 0x20)
6366 { 6555 {
6367 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) 6556 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
6368 && ! inhibit_iso_escape_detection 6557 && ! inhibit_ied
6369 && ! detect_info.checked) 6558 && ! detect_info.checked)
6370 { 6559 {
6371 if (detect_coding_iso_2022 (coding, &detect_info)) 6560 if (detect_coding_iso_2022 (coding, &detect_info))
@@ -6384,12 +6573,33 @@ detect_coding (struct coding_system *coding)
6384 break; 6573 break;
6385 } 6574 }
6386 } 6575 }
6387 else if (! c && !inhibit_null_byte_detection) 6576 else if (! c && !inhibit_nbd)
6388 { 6577 {
6389 null_byte_found = 1; 6578 null_byte_found = 1;
6390 if (eight_bit_found) 6579 if (eight_bit_found)
6391 break; 6580 break;
6392 } 6581 }
6582 else if (! disable_ascii_optimization
6583 && ! inhibit_eol_conversion)
6584 {
6585 if (c == '\r')
6586 {
6587 if (src < src_end && src[1] == '\n')
6588 {
6589 coding->eol_seen |= EOL_SEEN_CRLF;
6590 src++;
6591 if (! eight_bit_found)
6592 coding->head_ascii++;
6593 }
6594 else
6595 coding->eol_seen |= EOL_SEEN_CR;
6596 }
6597 else if (c == '\n')
6598 {
6599 coding->eol_seen |= EOL_SEEN_LF;
6600 }
6601 }
6602
6393 if (! eight_bit_found) 6603 if (! eight_bit_found)
6394 coding->head_ascii++; 6604 coding->head_ascii++;
6395 } 6605 }
@@ -6420,10 +6630,19 @@ detect_coding (struct coding_system *coding)
6420 detect_info.checked |= ~CATEGORY_MASK_UTF_16; 6630 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6421 detect_info.rejected |= ~CATEGORY_MASK_UTF_16; 6631 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
6422 } 6632 }
6633 else if (prefer_utf_8
6634 && detect_coding_utf_8 (coding, &detect_info))
6635 {
6636 detect_info.checked |= ~CATEGORY_MASK_UTF_8;
6637 detect_info.rejected |= ~CATEGORY_MASK_UTF_8;
6638 }
6423 for (i = 0; i < coding_category_raw_text; i++) 6639 for (i = 0; i < coding_category_raw_text; i++)
6424 { 6640 {
6425 category = coding_priorities[i]; 6641 category = coding_priorities[i];
6426 this = coding_categories + category; 6642 this = coding_categories + category;
6643 /* Some of this->detector (e.g. detect_coding_sjis)
6644 require this information. */
6645 coding->id = this->id;
6427 if (this->id < 0) 6646 if (this->id < 0)
6428 { 6647 {
6429 /* No coding system of this category is defined. */ 6648 /* No coding system of this category is defined. */
@@ -6438,32 +6657,58 @@ detect_coding (struct coding_system *coding)
6438 } 6657 }
6439 else if ((*(this->detector)) (coding, &detect_info) 6658 else if ((*(this->detector)) (coding, &detect_info)
6440 && detect_info.found & (1 << category)) 6659 && detect_info.found & (1 << category))
6441 { 6660 break;
6442 if (category == coding_category_utf_16_auto)
6443 {
6444 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6445 category = coding_category_utf_16_le;
6446 else
6447 category = coding_category_utf_16_be;
6448 }
6449 break;
6450 }
6451 } 6661 }
6452 } 6662 }
6453 6663
6454 if (i < coding_category_raw_text) 6664 if (i < coding_category_raw_text)
6455 setup_coding_system (CODING_ID_NAME (this->id), coding); 6665 {
6666 if (category == coding_category_utf_8_auto)
6667 {
6668 Lisp_Object coding_systems;
6669
6670 coding_systems = AREF (CODING_ID_ATTRS (this->id),
6671 coding_attr_utf_bom);
6672 if (CONSP (coding_systems))
6673 {
6674 if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6675 found = XCAR (coding_systems);
6676 else
6677 found = XCDR (coding_systems);
6678 }
6679 else
6680 found = CODING_ID_NAME (this->id);
6681 }
6682 else if (category == coding_category_utf_16_auto)
6683 {
6684 Lisp_Object coding_systems;
6685
6686 coding_systems = AREF (CODING_ID_ATTRS (this->id),
6687 coding_attr_utf_bom);
6688 if (CONSP (coding_systems))
6689 {
6690 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6691 found = XCAR (coding_systems);
6692 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6693 found = XCDR (coding_systems);
6694 }
6695 else
6696 found = CODING_ID_NAME (this->id);
6697 }
6698 else
6699 found = CODING_ID_NAME (this->id);
6700 }
6456 else if (null_byte_found) 6701 else if (null_byte_found)
6457 setup_coding_system (Qno_conversion, coding); 6702 found = Qno_conversion;
6458 else if ((detect_info.rejected & CATEGORY_MASK_ANY) 6703 else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6459 == CATEGORY_MASK_ANY) 6704 == CATEGORY_MASK_ANY)
6460 setup_coding_system (Qraw_text, coding); 6705 found = Qraw_text;
6461 else if (detect_info.rejected) 6706 else if (detect_info.rejected)
6462 for (i = 0; i < coding_category_raw_text; i++) 6707 for (i = 0; i < coding_category_raw_text; i++)
6463 if (! (detect_info.rejected & (1 << coding_priorities[i]))) 6708 if (! (detect_info.rejected & (1 << coding_priorities[i])))
6464 { 6709 {
6465 this = coding_categories + coding_priorities[i]; 6710 this = coding_categories + coding_priorities[i];
6466 setup_coding_system (CODING_ID_NAME (this->id), coding); 6711 found = CODING_ID_NAME (this->id);
6467 break; 6712 break;
6468 } 6713 }
6469 } 6714 }
@@ -6477,14 +6722,21 @@ detect_coding (struct coding_system *coding)
6477 coding_systems 6722 coding_systems
6478 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom); 6723 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6479 detect_info.found = detect_info.rejected = 0; 6724 detect_info.found = detect_info.rejected = 0;
6480 coding->head_ascii = 0; 6725 if (check_ascii (coding) == coding->src_bytes)
6481 if (CONSP (coding_systems)
6482 && detect_coding_utf_8 (coding, &detect_info))
6483 { 6726 {
6484 if (detect_info.found & CATEGORY_MASK_UTF_8_SIG) 6727 if (CONSP (coding_systems))
6485 setup_coding_system (XCAR (coding_systems), coding); 6728 found = XCDR (coding_systems);
6486 else 6729 }
6487 setup_coding_system (XCDR (coding_systems), coding); 6730 else
6731 {
6732 if (CONSP (coding_systems)
6733 && detect_coding_utf_8 (coding, &detect_info))
6734 {
6735 if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6736 found = XCAR (coding_systems);
6737 else
6738 found = XCDR (coding_systems);
6739 }
6488 } 6740 }
6489 } 6741 }
6490 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id))) 6742 else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
@@ -6501,11 +6753,24 @@ detect_coding (struct coding_system *coding)
6501 && detect_coding_utf_16 (coding, &detect_info)) 6753 && detect_coding_utf_16 (coding, &detect_info))
6502 { 6754 {
6503 if (detect_info.found & CATEGORY_MASK_UTF_16_LE) 6755 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6504 setup_coding_system (XCAR (coding_systems), coding); 6756 found = XCAR (coding_systems);
6505 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE) 6757 else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6506 setup_coding_system (XCDR (coding_systems), coding); 6758 found = XCDR (coding_systems);
6507 } 6759 }
6508 } 6760 }
6761
6762 if (! NILP (found))
6763 {
6764 int specified_eol = (VECTORP (eol_type) ? EOL_SEEN_NONE
6765 : EQ (eol_type, Qdos) ? EOL_SEEN_CRLF
6766 : EQ (eol_type, Qmac) ? EOL_SEEN_CR
6767 : EOL_SEEN_LF);
6768
6769 setup_coding_system (found, coding);
6770 if (specified_eol != EOL_SEEN_NONE)
6771 adjust_coding_eol_type (coding, specified_eol);
6772 }
6773
6509 coding->mode = saved_mode; 6774 coding->mode = saved_mode;
6510} 6775}
6511 6776
@@ -6609,11 +6874,11 @@ decode_eol (struct coding_system *coding)
6609 6874
6610 6875
6611/* Return a translation table (or list of them) from coding system 6876/* Return a translation table (or list of them) from coding system
6612 attribute vector ATTRS for encoding (ENCODEP is nonzero) or 6877 attribute vector ATTRS for encoding (if ENCODEP) or decoding (if
6613 decoding (ENCODEP is zero). */ 6878 not ENCODEP). */
6614 6879
6615static Lisp_Object 6880static Lisp_Object
6616get_translation_table (Lisp_Object attrs, int encodep, int *max_lookup) 6881get_translation_table (Lisp_Object attrs, bool encodep, int *max_lookup)
6617{ 6882{
6618 Lisp_Object standard, translation_table; 6883 Lisp_Object standard, translation_table;
6619 Lisp_Object val; 6884 Lisp_Object val;
@@ -6646,11 +6911,9 @@ get_translation_table (Lisp_Object attrs, int encodep, int *max_lookup)
6646 if (CHAR_TABLE_P (standard)) 6911 if (CHAR_TABLE_P (standard))
6647 { 6912 {
6648 if (CONSP (translation_table)) 6913 if (CONSP (translation_table))
6649 translation_table = nconc2 (translation_table, 6914 translation_table = nconc2 (translation_table, list1 (standard));
6650 Fcons (standard, Qnil));
6651 else 6915 else
6652 translation_table = Fcons (translation_table, 6916 translation_table = list2 (translation_table, standard);
6653 Fcons (standard, Qnil));
6654 } 6917 }
6655 } 6918 }
6656 6919
@@ -6743,7 +7006,7 @@ get_translation (Lisp_Object trans, int *buf, int *buf_end)
6743 7006
6744static int 7007static int
6745produce_chars (struct coding_system *coding, Lisp_Object translation_table, 7008produce_chars (struct coding_system *coding, Lisp_Object translation_table,
6746 int last_block) 7009 bool last_block)
6747{ 7010{
6748 unsigned char *dst = coding->destination + coding->produced; 7011 unsigned char *dst = coding->destination + coding->produced;
6749 unsigned char *dst_end = coding->destination + coding->dst_bytes; 7012 unsigned char *dst_end = coding->destination + coding->dst_bytes;
@@ -6765,7 +7028,8 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table,
6765 7028
6766 while (buf < buf_end) 7029 while (buf < buf_end)
6767 { 7030 {
6768 int c = *buf, i; 7031 int c = *buf;
7032 ptrdiff_t i;
6769 7033
6770 if (c >= 0) 7034 if (c >= 0)
6771 { 7035 {
@@ -6845,7 +7109,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table,
6845 { 7109 {
6846 if (coding->src_multibyte) 7110 if (coding->src_multibyte)
6847 { 7111 {
6848 int multibytep = 1; 7112 bool multibytep = 1;
6849 ptrdiff_t consumed_chars = 0; 7113 ptrdiff_t consumed_chars = 0;
6850 7114
6851 while (1) 7115 while (1)
@@ -6881,7 +7145,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table,
6881 else 7145 else
6882 while (src < src_end) 7146 while (src < src_end)
6883 { 7147 {
6884 int multibytep = 1; 7148 bool multibytep = 1;
6885 int c = *src++; 7149 int c = *src++;
6886 7150
6887 if (dst >= dst_end - 1) 7151 if (dst >= dst_end - 1)
@@ -6933,7 +7197,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table,
6933 7197
6934 produced = dst - (coding->destination + coding->produced); 7198 produced = dst - (coding->destination + coding->produced);
6935 if (BUFFERP (coding->dst_object) && produced_chars > 0) 7199 if (BUFFERP (coding->dst_object) && produced_chars > 0)
6936 insert_from_gap (produced_chars, produced); 7200 insert_from_gap (produced_chars, produced, 0);
6937 coding->produced += produced; 7201 coding->produced += produced;
6938 coding->produced_char += produced_chars; 7202 coding->produced_char += produced_chars;
6939 return carryover; 7203 return carryover;
@@ -6944,7 +7208,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table,
6944 [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ] 7208 [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
6945 */ 7209 */
6946 7210
6947static inline void 7211static void
6948produce_composition (struct coding_system *coding, int *charbuf, ptrdiff_t pos) 7212produce_composition (struct coding_system *coding, int *charbuf, ptrdiff_t pos)
6949{ 7213{
6950 int len; 7214 int len;
@@ -6988,7 +7252,7 @@ produce_composition (struct coding_system *coding, int *charbuf, ptrdiff_t pos)
6988 [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ] 7252 [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6989 */ 7253 */
6990 7254
6991static inline void 7255static void
6992produce_charset (struct coding_system *coding, int *charbuf, ptrdiff_t pos) 7256produce_charset (struct coding_system *coding, int *charbuf, ptrdiff_t pos)
6993{ 7257{
6994 ptrdiff_t from = pos - charbuf[2]; 7258 ptrdiff_t from = pos - charbuf[2];
@@ -7004,22 +7268,8 @@ produce_charset (struct coding_system *coding, int *charbuf, ptrdiff_t pos)
7004 7268
7005#define ALLOC_CONVERSION_WORK_AREA(coding) \ 7269#define ALLOC_CONVERSION_WORK_AREA(coding) \
7006 do { \ 7270 do { \
7007 int size = CHARBUF_SIZE; \ 7271 coding->charbuf = SAFE_ALLOCA (CHARBUF_SIZE * sizeof (int)); \
7008 \ 7272 coding->charbuf_size = CHARBUF_SIZE; \
7009 coding->charbuf = NULL; \
7010 while (size > 1024) \
7011 { \
7012 coding->charbuf = (int *) alloca (sizeof (int) * size); \
7013 if (coding->charbuf) \
7014 break; \
7015 size >>= 1; \
7016 } \
7017 if (! coding->charbuf) \
7018 { \
7019 record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
7020 return coding->result; \
7021 } \
7022 coding->charbuf_size = size; \
7023 } while (0) 7273 } while (0)
7024 7274
7025 7275
@@ -7078,7 +7328,7 @@ produce_annotation (struct coding_system *coding, ptrdiff_t pos)
7078 CODING->dst_object. 7328 CODING->dst_object.
7079*/ 7329*/
7080 7330
7081static int 7331static void
7082decode_coding (struct coding_system *coding) 7332decode_coding (struct coding_system *coding)
7083{ 7333{
7084 Lisp_Object attrs; 7334 Lisp_Object attrs;
@@ -7088,6 +7338,8 @@ decode_coding (struct coding_system *coding)
7088 int carryover; 7338 int carryover;
7089 int i; 7339 int i;
7090 7340
7341 USE_SAFE_ALLOCA;
7342
7091 if (BUFFERP (coding->src_object) 7343 if (BUFFERP (coding->src_object)
7092 && coding->src_pos > 0 7344 && coding->src_pos > 0
7093 && coding->src_pos < GPT 7345 && coding->src_pos < GPT
@@ -7097,12 +7349,20 @@ decode_coding (struct coding_system *coding)
7097 undo_list = Qt; 7349 undo_list = Qt;
7098 if (BUFFERP (coding->dst_object)) 7350 if (BUFFERP (coding->dst_object))
7099 { 7351 {
7100 if (current_buffer != XBUFFER (coding->dst_object)) 7352 set_buffer_internal (XBUFFER (coding->dst_object));
7101 set_buffer_internal (XBUFFER (coding->dst_object));
7102 if (GPT != PT) 7353 if (GPT != PT)
7103 move_gap_both (PT, PT_BYTE); 7354 move_gap_both (PT, PT_BYTE);
7355
7356 /* We must disable undo_list in order to record the whole insert
7357 transaction via record_insert at the end. But doing so also
7358 disables the recording of the first change to the undo_list.
7359 Therefore we check for first change here and record it via
7360 record_first_change if needed. */
7361 if (MODIFF <= SAVE_MODIFF)
7362 record_first_change ();
7363
7104 undo_list = BVAR (current_buffer, undo_list); 7364 undo_list = BVAR (current_buffer, undo_list);
7105 BVAR (current_buffer, undo_list) = Qt; 7365 bset_undo_list (current_buffer, Qt);
7106 } 7366 }
7107 7367
7108 coding->consumed = coding->consumed_char = 0; 7368 coding->consumed = coding->consumed_char = 0;
@@ -7199,10 +7459,11 @@ decode_coding (struct coding_system *coding)
7199 decode_eol (coding); 7459 decode_eol (coding);
7200 if (BUFFERP (coding->dst_object)) 7460 if (BUFFERP (coding->dst_object))
7201 { 7461 {
7202 BVAR (current_buffer, undo_list) = undo_list; 7462 bset_undo_list (current_buffer, undo_list);
7203 record_insert (coding->dst_pos, coding->produced_char); 7463 record_insert (coding->dst_pos, coding->produced_char);
7204 } 7464 }
7205 return coding->result; 7465
7466 SAFE_FREE ();
7206} 7467}
7207 7468
7208 7469
@@ -7216,7 +7477,7 @@ decode_coding (struct coding_system *coding)
7216 position of a composition after POS (if any) or to LIMIT, and 7477 position of a composition after POS (if any) or to LIMIT, and
7217 return BUF. */ 7478 return BUF. */
7218 7479
7219static inline int * 7480static int *
7220handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit, 7481handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit,
7221 struct coding_system *coding, int *buf, 7482 struct coding_system *coding, int *buf,
7222 ptrdiff_t *stop) 7483 ptrdiff_t *stop)
@@ -7236,7 +7497,7 @@ handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit,
7236 /* We found a composition. Store the corresponding 7497 /* We found a composition. Store the corresponding
7237 annotation data in BUF. */ 7498 annotation data in BUF. */
7238 int *head = buf; 7499 int *head = buf;
7239 enum composition_method method = COMPOSITION_METHOD (prop); 7500 enum composition_method method = composition_method (prop);
7240 int nchars = COMPOSITION_LENGTH (prop); 7501 int nchars = COMPOSITION_LENGTH (prop);
7241 7502
7242 ADD_COMPOSITION_DATA (buf, nchars, 0, method); 7503 ADD_COMPOSITION_DATA (buf, nchars, 0, method);
@@ -7274,7 +7535,7 @@ handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit,
7274 *buf++ = XINT (XCAR (components)); 7535 *buf++ = XINT (XCAR (components));
7275 } 7536 }
7276 else 7537 else
7277 abort (); 7538 emacs_abort ();
7278 *head -= len; 7539 *head -= len;
7279 } 7540 }
7280 } 7541 }
@@ -7299,7 +7560,7 @@ handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit,
7299 If the property value is nil, set *STOP to the position where the 7560 If the property value is nil, set *STOP to the position where the
7300 property value is non-nil (limiting by LIMIT), and return BUF. */ 7561 property value is non-nil (limiting by LIMIT), and return BUF. */
7301 7562
7302static inline int * 7563static int *
7303handle_charset_annotation (ptrdiff_t pos, ptrdiff_t limit, 7564handle_charset_annotation (ptrdiff_t pos, ptrdiff_t limit,
7304 struct coding_system *coding, int *buf, 7565 struct coding_system *coding, int *buf,
7305 ptrdiff_t *stop) 7566 ptrdiff_t *stop)
@@ -7331,7 +7592,7 @@ consume_chars (struct coding_system *coding, Lisp_Object translation_table,
7331 const unsigned char *src_end = coding->source + coding->src_bytes; 7592 const unsigned char *src_end = coding->source + coding->src_bytes;
7332 ptrdiff_t pos = coding->src_pos + coding->consumed_char; 7593 ptrdiff_t pos = coding->src_pos + coding->consumed_char;
7333 ptrdiff_t end_pos = coding->src_pos + coding->src_chars; 7594 ptrdiff_t end_pos = coding->src_pos + coding->src_chars;
7334 int multibytep = coding->src_multibyte; 7595 bool multibytep = coding->src_multibyte;
7335 Lisp_Object eol_type; 7596 Lisp_Object eol_type;
7336 int c; 7597 int c;
7337 ptrdiff_t stop, stop_composition, stop_charset; 7598 ptrdiff_t stop, stop_composition, stop_charset;
@@ -7478,7 +7739,7 @@ consume_chars (struct coding_system *coding, Lisp_Object translation_table,
7478 If CODING->dst_object is nil, the encoded data is placed at the 7739 If CODING->dst_object is nil, the encoded data is placed at the
7479 memory area specified by CODING->destination. */ 7740 memory area specified by CODING->destination. */
7480 7741
7481static int 7742static void
7482encode_coding (struct coding_system *coding) 7743encode_coding (struct coding_system *coding)
7483{ 7744{
7484 Lisp_Object attrs; 7745 Lisp_Object attrs;
@@ -7486,6 +7747,8 @@ encode_coding (struct coding_system *coding)
7486 int max_lookup; 7747 int max_lookup;
7487 struct ccl_spec cclspec; 7748 struct ccl_spec cclspec;
7488 7749
7750 USE_SAFE_ALLOCA;
7751
7489 attrs = CODING_ID_ATTRS (coding->id); 7752 attrs = CODING_ID_ATTRS (coding->id);
7490 if (coding->encoder == encode_coding_raw_text) 7753 if (coding->encoder == encode_coding_raw_text)
7491 translation_table = Qnil, max_lookup = 0; 7754 translation_table = Qnil, max_lookup = 0;
@@ -7519,9 +7782,9 @@ encode_coding (struct coding_system *coding)
7519 } while (coding->consumed_char < coding->src_chars); 7782 } while (coding->consumed_char < coding->src_chars);
7520 7783
7521 if (BUFFERP (coding->dst_object) && coding->produced_char > 0) 7784 if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
7522 insert_from_gap (coding->produced_char, coding->produced); 7785 insert_from_gap (coding->produced_char, coding->produced, 0);
7523 7786
7524 return (coding->result); 7787 SAFE_FREE ();
7525} 7788}
7526 7789
7527 7790
@@ -7535,26 +7798,27 @@ static Lisp_Object Vcode_conversion_workbuf_name;
7535 versions of Vcode_conversion_workbuf_name. */ 7798 versions of Vcode_conversion_workbuf_name. */
7536static Lisp_Object Vcode_conversion_reused_workbuf; 7799static Lisp_Object Vcode_conversion_reused_workbuf;
7537 7800
7538/* 1 iff Vcode_conversion_reused_workbuf is already in use. */ 7801/* True iff Vcode_conversion_reused_workbuf is already in use. */
7539static int reused_workbuf_in_use; 7802static bool reused_workbuf_in_use;
7540 7803
7541 7804
7542/* Return a working buffer of code conversion. MULTIBYTE specifies the 7805/* Return a working buffer of code conversion. MULTIBYTE specifies the
7543 multibyteness of returning buffer. */ 7806 multibyteness of returning buffer. */
7544 7807
7545static Lisp_Object 7808static Lisp_Object
7546make_conversion_work_buffer (int multibyte) 7809make_conversion_work_buffer (bool multibyte)
7547{ 7810{
7548 Lisp_Object name, workbuf; 7811 Lisp_Object name, workbuf;
7549 struct buffer *current; 7812 struct buffer *current;
7550 7813
7551 if (reused_workbuf_in_use++) 7814 if (reused_workbuf_in_use)
7552 { 7815 {
7553 name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil); 7816 name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7554 workbuf = Fget_buffer_create (name); 7817 workbuf = Fget_buffer_create (name);
7555 } 7818 }
7556 else 7819 else
7557 { 7820 {
7821 reused_workbuf_in_use = 1;
7558 if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf))) 7822 if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
7559 Vcode_conversion_reused_workbuf 7823 Vcode_conversion_reused_workbuf
7560 = Fget_buffer_create (Vcode_conversion_workbuf_name); 7824 = Fget_buffer_create (Vcode_conversion_workbuf_name);
@@ -7567,14 +7831,14 @@ make_conversion_work_buffer (int multibyte)
7567 doesn't compile new regexps. */ 7831 doesn't compile new regexps. */
7568 Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt); 7832 Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
7569 Ferase_buffer (); 7833 Ferase_buffer ();
7570 BVAR (current_buffer, undo_list) = Qt; 7834 bset_undo_list (current_buffer, Qt);
7571 BVAR (current_buffer, enable_multibyte_characters) = multibyte ? Qt : Qnil; 7835 bset_enable_multibyte_characters (current_buffer, multibyte ? Qt : Qnil);
7572 set_buffer_internal (current); 7836 set_buffer_internal (current);
7573 return workbuf; 7837 return workbuf;
7574} 7838}
7575 7839
7576 7840
7577static Lisp_Object 7841static void
7578code_conversion_restore (Lisp_Object arg) 7842code_conversion_restore (Lisp_Object arg)
7579{ 7843{
7580 Lisp_Object current, workbuf; 7844 Lisp_Object current, workbuf;
@@ -7587,16 +7851,15 @@ code_conversion_restore (Lisp_Object arg)
7587 { 7851 {
7588 if (EQ (workbuf, Vcode_conversion_reused_workbuf)) 7852 if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7589 reused_workbuf_in_use = 0; 7853 reused_workbuf_in_use = 0;
7590 else if (! NILP (Fbuffer_live_p (workbuf))) 7854 else
7591 Fkill_buffer (workbuf); 7855 Fkill_buffer (workbuf);
7592 } 7856 }
7593 set_buffer_internal (XBUFFER (current)); 7857 set_buffer_internal (XBUFFER (current));
7594 UNGCPRO; 7858 UNGCPRO;
7595 return Qnil;
7596} 7859}
7597 7860
7598Lisp_Object 7861Lisp_Object
7599code_conversion_save (int with_work_buf, int multibyte) 7862code_conversion_save (bool with_work_buf, bool multibyte)
7600{ 7863{
7601 Lisp_Object workbuf = Qnil; 7864 Lisp_Object workbuf = Qnil;
7602 7865
@@ -7607,15 +7870,13 @@ code_conversion_save (int with_work_buf, int multibyte)
7607 return workbuf; 7870 return workbuf;
7608} 7871}
7609 7872
7610int 7873void
7611decode_coding_gap (struct coding_system *coding, 7874decode_coding_gap (struct coding_system *coding,
7612 ptrdiff_t chars, ptrdiff_t bytes) 7875 ptrdiff_t chars, ptrdiff_t bytes)
7613{ 7876{
7614 ptrdiff_t count = SPECPDL_INDEX (); 7877 ptrdiff_t count = SPECPDL_INDEX ();
7615 Lisp_Object attrs; 7878 Lisp_Object attrs;
7616 7879
7617 code_conversion_save (0, 0);
7618
7619 coding->src_object = Fcurrent_buffer (); 7880 coding->src_object = Fcurrent_buffer ();
7620 coding->src_chars = chars; 7881 coding->src_chars = chars;
7621 coding->src_bytes = bytes; 7882 coding->src_bytes = bytes;
@@ -7627,15 +7888,95 @@ decode_coding_gap (struct coding_system *coding,
7627 coding->dst_pos_byte = PT_BYTE; 7888 coding->dst_pos_byte = PT_BYTE;
7628 coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); 7889 coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
7629 7890
7891 coding->head_ascii = -1;
7892 coding->detected_utf8_chars = -1;
7893 coding->eol_seen = EOL_SEEN_NONE;
7630 if (CODING_REQUIRE_DETECTION (coding)) 7894 if (CODING_REQUIRE_DETECTION (coding))
7631 detect_coding (coding); 7895 detect_coding (coding);
7896 attrs = CODING_ID_ATTRS (coding->id);
7897 if (! disable_ascii_optimization
7898 && ! coding->src_multibyte
7899 && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
7900 && NILP (CODING_ATTR_POST_READ (attrs))
7901 && NILP (get_translation_table (attrs, 0, NULL)))
7902 {
7903 chars = coding->head_ascii;
7904 if (chars < 0)
7905 chars = check_ascii (coding);
7906 if (chars != bytes)
7907 {
7908 /* There exists a non-ASCII byte. */
7909 if (EQ (CODING_ATTR_TYPE (attrs), Qutf_8))
7910 {
7911 if (coding->detected_utf8_chars >= 0)
7912 chars = coding->detected_utf8_chars;
7913 else
7914 chars = check_utf_8 (coding);
7915 if (CODING_UTF_8_BOM (coding) != utf_without_bom
7916 && coding->head_ascii == 0
7917 && coding->source[0] == UTF_8_BOM_1
7918 && coding->source[1] == UTF_8_BOM_2
7919 && coding->source[2] == UTF_8_BOM_3)
7920 {
7921 chars--;
7922 bytes -= 3;
7923 coding->src_bytes -= 3;
7924 }
7925 }
7926 else
7927 chars = -1;
7928 }
7929 if (chars >= 0)
7930 {
7931 Lisp_Object eol_type;
7932
7933 eol_type = CODING_ID_EOL_TYPE (coding->id);
7934 if (VECTORP (eol_type))
7935 {
7936 if (coding->eol_seen != EOL_SEEN_NONE)
7937 eol_type = adjust_coding_eol_type (coding, coding->eol_seen);
7938 }
7939 if (EQ (eol_type, Qmac))
7940 {
7941 unsigned char *src_end = GAP_END_ADDR;
7942 unsigned char *src = src_end - coding->src_bytes;
7943
7944 while (src < src_end)
7945 {
7946 if (*src++ == '\r')
7947 src[-1] = '\n';
7948 }
7949 }
7950 else if (EQ (eol_type, Qdos))
7951 {
7952 unsigned char *src = GAP_END_ADDR;
7953 unsigned char *src_beg = src - coding->src_bytes;
7954 unsigned char *dst = src;
7955 ptrdiff_t diff;
7956
7957 while (src_beg < src)
7958 {
7959 *--dst = *--src;
7960 if (*src == '\n' && src > src_beg && src[-1] == '\r')
7961 src--;
7962 }
7963 diff = dst - src;
7964 bytes -= diff;
7965 chars -= diff;
7966 }
7967 coding->produced = bytes;
7968 coding->produced_char = chars;
7969 insert_from_gap (chars, bytes, 1);
7970 return;
7971 }
7972 }
7973 code_conversion_save (0, 0);
7632 7974
7633 coding->mode |= CODING_MODE_LAST_BLOCK; 7975 coding->mode |= CODING_MODE_LAST_BLOCK;
7634 current_buffer->text->inhibit_shrinking = 1; 7976 current_buffer->text->inhibit_shrinking = 1;
7635 decode_coding (coding); 7977 decode_coding (coding);
7636 current_buffer->text->inhibit_shrinking = 0; 7978 current_buffer->text->inhibit_shrinking = 0;
7637 7979
7638 attrs = CODING_ID_ATTRS (coding->id);
7639 if (! NILP (CODING_ATTR_POST_READ (attrs))) 7980 if (! NILP (CODING_ATTR_POST_READ (attrs)))
7640 { 7981 {
7641 ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE; 7982 ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE;
@@ -7650,7 +7991,6 @@ decode_coding_gap (struct coding_system *coding,
7650 } 7991 }
7651 7992
7652 unbind_to (count, Qnil); 7993 unbind_to (count, Qnil);
7653 return coding->result;
7654} 7994}
7655 7995
7656 7996
@@ -7696,8 +8036,8 @@ decode_coding_object (struct coding_system *coding,
7696 ptrdiff_t chars = to - from; 8036 ptrdiff_t chars = to - from;
7697 ptrdiff_t bytes = to_byte - from_byte; 8037 ptrdiff_t bytes = to_byte - from_byte;
7698 Lisp_Object attrs; 8038 Lisp_Object attrs;
7699 int saved_pt = -1, saved_pt_byte IF_LINT (= 0); 8039 ptrdiff_t saved_pt = -1, saved_pt_byte IF_LINT (= 0);
7700 int need_marker_adjustment = 0; 8040 bool need_marker_adjustment = 0;
7701 Lisp_Object old_deactivate_mark; 8041 Lisp_Object old_deactivate_mark;
7702 8042
7703 old_deactivate_mark = Vdeactivate_mark; 8043 old_deactivate_mark = Vdeactivate_mark;
@@ -7810,14 +8150,8 @@ decode_coding_object (struct coding_system *coding,
7810 set_buffer_internal (XBUFFER (coding->dst_object)); 8150 set_buffer_internal (XBUFFER (coding->dst_object));
7811 if (dst_bytes < coding->produced) 8151 if (dst_bytes < coding->produced)
7812 { 8152 {
8153 eassert (coding->produced > 0);
7813 destination = xrealloc (destination, coding->produced); 8154 destination = xrealloc (destination, coding->produced);
7814 if (! destination)
7815 {
7816 record_conversion_result (coding,
7817 CODING_RESULT_INSUFFICIENT_MEM);
7818 unbind_to (count, Qnil);
7819 return;
7820 }
7821 if (BEGV < GPT && GPT < BEGV + coding->produced_char) 8155 if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7822 move_gap_both (BEGV, BEGV_BYTE); 8156 move_gap_both (BEGV, BEGV_BYTE);
7823 memcpy (destination, BEGV_ADDR, coding->produced); 8157 memcpy (destination, BEGV_ADDR, coding->produced);
@@ -7884,9 +8218,9 @@ encode_coding_object (struct coding_system *coding,
7884 ptrdiff_t chars = to - from; 8218 ptrdiff_t chars = to - from;
7885 ptrdiff_t bytes = to_byte - from_byte; 8219 ptrdiff_t bytes = to_byte - from_byte;
7886 Lisp_Object attrs; 8220 Lisp_Object attrs;
7887 int saved_pt = -1, saved_pt_byte IF_LINT (= 0); 8221 ptrdiff_t saved_pt = -1, saved_pt_byte IF_LINT (= 0);
7888 int need_marker_adjustment = 0; 8222 bool need_marker_adjustment = 0;
7889 int kill_src_buffer = 0; 8223 bool kill_src_buffer = 0;
7890 Lisp_Object old_deactivate_mark; 8224 Lisp_Object old_deactivate_mark;
7891 8225
7892 old_deactivate_mark = Vdeactivate_mark; 8226 old_deactivate_mark = Vdeactivate_mark;
@@ -7930,15 +8264,12 @@ encode_coding_object (struct coding_system *coding,
7930 } 8264 }
7931 8265
7932 { 8266 {
7933 Lisp_Object args[3];
7934 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5; 8267 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7935 8268
7936 GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object, 8269 GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7937 old_deactivate_mark); 8270 old_deactivate_mark);
7938 args[0] = CODING_ATTR_PRE_WRITE (attrs); 8271 safe_call2 (CODING_ATTR_PRE_WRITE (attrs),
7939 args[1] = make_number (BEG); 8272 make_number (BEG), make_number (Z));
7940 args[2] = make_number (Z);
7941 safe_call (3, args);
7942 UNGCPRO; 8273 UNGCPRO;
7943 } 8274 }
7944 if (XBUFFER (coding->src_object) != current_buffer) 8275 if (XBUFFER (coding->src_object) != current_buffer)
@@ -8005,7 +8336,7 @@ encode_coding_object (struct coding_system *coding,
8005 { 8336 {
8006 ptrdiff_t dst_bytes = max (1, coding->src_chars); 8337 ptrdiff_t dst_bytes = max (1, coding->src_chars);
8007 coding->dst_object = Qnil; 8338 coding->dst_object = Qnil;
8008 coding->destination = (unsigned char *) xmalloc (dst_bytes); 8339 coding->destination = xmalloc (dst_bytes);
8009 coding->dst_bytes = dst_bytes; 8340 coding->dst_bytes = dst_bytes;
8010 coding->dst_multibyte = 0; 8341 coding->dst_multibyte = 0;
8011 } 8342 }
@@ -8088,6 +8419,50 @@ preferred_coding_system (void)
8088 return CODING_ID_NAME (id); 8419 return CODING_ID_NAME (id);
8089} 8420}
8090 8421
8422#if defined (WINDOWSNT) || defined (CYGWIN)
8423
8424Lisp_Object
8425from_unicode (Lisp_Object str)
8426{
8427 CHECK_STRING (str);
8428 if (!STRING_MULTIBYTE (str) &&
8429 SBYTES (str) & 1)
8430 {
8431 str = Fsubstring (str, make_number (0), make_number (-1));
8432 }
8433
8434 return code_convert_string_norecord (str, Qutf_16le, 0);
8435}
8436
8437Lisp_Object
8438from_unicode_buffer (const wchar_t* wstr)
8439{
8440 return from_unicode (
8441 make_unibyte_string (
8442 (char*) wstr,
8443 /* we get one of the two final 0 bytes for free. */
8444 1 + sizeof (wchar_t) * wcslen (wstr)));
8445}
8446
8447wchar_t *
8448to_unicode (Lisp_Object str, Lisp_Object *buf)
8449{
8450 *buf = code_convert_string_norecord (str, Qutf_16le, 1);
8451 /* We need to make another copy (in addition to the one made by
8452 code_convert_string_norecord) to ensure that the final string is
8453 _doubly_ zero terminated --- that is, that the string is
8454 terminated by two zero bytes and one utf-16le null character.
8455 Because strings are already terminated with a single zero byte,
8456 we just add one additional zero. */
8457 str = make_uninit_string (SBYTES (*buf) + 1);
8458 memcpy (SDATA (str), SDATA (*buf), SBYTES (*buf));
8459 SDATA (str) [SBYTES (*buf)] = '\0';
8460 *buf = str;
8461 return WCSDATA (*buf);
8462}
8463
8464#endif /* WINDOWSNT || CYGWIN */
8465
8091 8466
8092#ifdef emacs 8467#ifdef emacs
8093/*** 8. Emacs Lisp library functions ***/ 8468/*** 8. Emacs Lisp library functions ***/
@@ -8165,10 +8540,10 @@ function `define-coding-system'. */)
8165 8540
8166 8541
8167/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If 8542/* Detect how the bytes at SRC of length SRC_BYTES are encoded. If
8168 HIGHEST is nonzero, return the coding system of the highest 8543 HIGHEST, return the coding system of the highest
8169 priority among the detected coding systems. Otherwise return a 8544 priority among the detected coding systems. Otherwise return a
8170 list of detected coding systems sorted by their priorities. If 8545 list of detected coding systems sorted by their priorities. If
8171 MULTIBYTEP is nonzero, it is assumed that the bytes are in correct 8546 MULTIBYTEP, it is assumed that the bytes are in correct
8172 multibyte form but contains only ASCII and eight-bit chars. 8547 multibyte form but contains only ASCII and eight-bit chars.
8173 Otherwise, the bytes are raw bytes. 8548 Otherwise, the bytes are raw bytes.
8174 8549
@@ -8183,7 +8558,7 @@ function `define-coding-system'. */)
8183Lisp_Object 8558Lisp_Object
8184detect_coding_system (const unsigned char *src, 8559detect_coding_system (const unsigned char *src,
8185 ptrdiff_t src_chars, ptrdiff_t src_bytes, 8560 ptrdiff_t src_chars, ptrdiff_t src_bytes,
8186 int highest, int multibytep, 8561 bool highest, bool multibytep,
8187 Lisp_Object coding_system) 8562 Lisp_Object coding_system)
8188{ 8563{
8189 const unsigned char *src_end = src + src_bytes; 8564 const unsigned char *src_end = src + src_bytes;
@@ -8193,7 +8568,7 @@ detect_coding_system (const unsigned char *src,
8193 ptrdiff_t id; 8568 ptrdiff_t id;
8194 struct coding_detection_info detect_info; 8569 struct coding_detection_info detect_info;
8195 enum coding_category base_category; 8570 enum coding_category base_category;
8196 int null_byte_found = 0, eight_bit_found = 0; 8571 bool null_byte_found = 0, eight_bit_found = 0;
8197 8572
8198 if (NILP (coding_system)) 8573 if (NILP (coding_system))
8199 coding_system = Qundecided; 8574 coding_system = Qundecided;
@@ -8219,6 +8594,11 @@ detect_coding_system (const unsigned char *src,
8219 enum coding_category category IF_LINT (= 0); 8594 enum coding_category category IF_LINT (= 0);
8220 struct coding_system *this IF_LINT (= NULL); 8595 struct coding_system *this IF_LINT (= NULL);
8221 int c, i; 8596 int c, i;
8597 bool inhibit_nbd = inhibit_flag (coding.spec.undecided.inhibit_nbd,
8598 inhibit_null_byte_detection);
8599 bool inhibit_ied = inhibit_flag (coding.spec.undecided.inhibit_ied,
8600 inhibit_iso_escape_detection);
8601 bool prefer_utf_8 = coding.spec.undecided.prefer_utf_8;
8222 8602
8223 /* Skip all ASCII bytes except for a few ISO2022 controls. */ 8603 /* Skip all ASCII bytes except for a few ISO2022 controls. */
8224 for (; src < src_end; src++) 8604 for (; src < src_end; src++)
@@ -8233,7 +8613,7 @@ detect_coding_system (const unsigned char *src,
8233 else if (c < 0x20) 8613 else if (c < 0x20)
8234 { 8614 {
8235 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) 8615 if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
8236 && ! inhibit_iso_escape_detection 8616 && ! inhibit_ied
8237 && ! detect_info.checked) 8617 && ! detect_info.checked)
8238 { 8618 {
8239 if (detect_coding_iso_2022 (&coding, &detect_info)) 8619 if (detect_coding_iso_2022 (&coding, &detect_info))
@@ -8252,7 +8632,7 @@ detect_coding_system (const unsigned char *src,
8252 break; 8632 break;
8253 } 8633 }
8254 } 8634 }
8255 else if (! c && !inhibit_null_byte_detection) 8635 else if (! c && !inhibit_nbd)
8256 { 8636 {
8257 null_byte_found = 1; 8637 null_byte_found = 1;
8258 if (eight_bit_found) 8638 if (eight_bit_found)
@@ -8285,6 +8665,12 @@ detect_coding_system (const unsigned char *src,
8285 detect_info.checked |= ~CATEGORY_MASK_UTF_16; 8665 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
8286 detect_info.rejected |= ~CATEGORY_MASK_UTF_16; 8666 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
8287 } 8667 }
8668 else if (prefer_utf_8
8669 && detect_coding_utf_8 (&coding, &detect_info))
8670 {
8671 detect_info.checked |= ~CATEGORY_MASK_UTF_8;
8672 detect_info.rejected |= ~CATEGORY_MASK_UTF_8;
8673 }
8288 for (i = 0; i < coding_category_raw_text; i++) 8674 for (i = 0; i < coding_category_raw_text; i++)
8289 { 8675 {
8290 category = coding_priorities[i]; 8676 category = coding_priorities[i];
@@ -8325,20 +8711,20 @@ detect_coding_system (const unsigned char *src,
8325 { 8711 {
8326 detect_info.found = CATEGORY_MASK_RAW_TEXT; 8712 detect_info.found = CATEGORY_MASK_RAW_TEXT;
8327 id = CODING_SYSTEM_ID (Qno_conversion); 8713 id = CODING_SYSTEM_ID (Qno_conversion);
8328 val = Fcons (make_number (id), Qnil); 8714 val = list1 (make_number (id));
8329 } 8715 }
8330 else if (! detect_info.rejected && ! detect_info.found) 8716 else if (! detect_info.rejected && ! detect_info.found)
8331 { 8717 {
8332 detect_info.found = CATEGORY_MASK_ANY; 8718 detect_info.found = CATEGORY_MASK_ANY;
8333 id = coding_categories[coding_category_undecided].id; 8719 id = coding_categories[coding_category_undecided].id;
8334 val = Fcons (make_number (id), Qnil); 8720 val = list1 (make_number (id));
8335 } 8721 }
8336 else if (highest) 8722 else if (highest)
8337 { 8723 {
8338 if (detect_info.found) 8724 if (detect_info.found)
8339 { 8725 {
8340 detect_info.found = 1 << category; 8726 detect_info.found = 1 << category;
8341 val = Fcons (make_number (this->id), Qnil); 8727 val = list1 (make_number (this->id));
8342 } 8728 }
8343 else 8729 else
8344 for (i = 0; i < coding_category_raw_text; i++) 8730 for (i = 0; i < coding_category_raw_text; i++)
@@ -8346,7 +8732,7 @@ detect_coding_system (const unsigned char *src,
8346 { 8732 {
8347 detect_info.found = 1 << coding_priorities[i]; 8733 detect_info.found = 1 << coding_priorities[i];
8348 id = coding_categories[coding_priorities[i]].id; 8734 id = coding_categories[coding_priorities[i]].id;
8349 val = Fcons (make_number (id), Qnil); 8735 val = list1 (make_number (id));
8350 break; 8736 break;
8351 } 8737 }
8352 } 8738 }
@@ -8363,7 +8749,7 @@ detect_coding_system (const unsigned char *src,
8363 found |= 1 << category; 8749 found |= 1 << category;
8364 id = coding_categories[category].id; 8750 id = coding_categories[category].id;
8365 if (id >= 0) 8751 if (id >= 0)
8366 val = Fcons (make_number (id), val); 8752 val = list1 (make_number (id));
8367 } 8753 }
8368 } 8754 }
8369 for (i = coding_category_raw_text - 1; i >= 0; i--) 8755 for (i = coding_category_raw_text - 1; i >= 0; i--)
@@ -8388,7 +8774,7 @@ detect_coding_system (const unsigned char *src,
8388 this = coding_categories + coding_category_utf_8_sig; 8774 this = coding_categories + coding_category_utf_8_sig;
8389 else 8775 else
8390 this = coding_categories + coding_category_utf_8_nosig; 8776 this = coding_categories + coding_category_utf_8_nosig;
8391 val = Fcons (make_number (this->id), Qnil); 8777 val = list1 (make_number (this->id));
8392 } 8778 }
8393 } 8779 }
8394 else if (base_category == coding_category_utf_16_auto) 8780 else if (base_category == coding_category_utf_16_auto)
@@ -8405,13 +8791,13 @@ detect_coding_system (const unsigned char *src,
8405 this = coding_categories + coding_category_utf_16_be_nosig; 8791 this = coding_categories + coding_category_utf_16_be_nosig;
8406 else 8792 else
8407 this = coding_categories + coding_category_utf_16_le_nosig; 8793 this = coding_categories + coding_category_utf_16_le_nosig;
8408 val = Fcons (make_number (this->id), Qnil); 8794 val = list1 (make_number (this->id));
8409 } 8795 }
8410 } 8796 }
8411 else 8797 else
8412 { 8798 {
8413 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs)); 8799 detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
8414 val = Fcons (make_number (coding.id), Qnil); 8800 val = list1 (make_number (coding.id));
8415 } 8801 }
8416 8802
8417 /* Then, detect eol-format if necessary. */ 8803 /* Then, detect eol-format if necessary. */
@@ -8505,9 +8891,6 @@ highest priority. */)
8505 ptrdiff_t from, to; 8891 ptrdiff_t from, to;
8506 ptrdiff_t from_byte, to_byte; 8892 ptrdiff_t from_byte, to_byte;
8507 8893
8508 CHECK_NUMBER_COERCE_MARKER (start);
8509 CHECK_NUMBER_COERCE_MARKER (end);
8510
8511 validate_region (&start, &end); 8894 validate_region (&start, &end);
8512 from = XINT (start), to = XINT (end); 8895 from = XINT (start), to = XINT (end);
8513 from_byte = CHAR_TO_BYTE (from); 8896 from_byte = CHAR_TO_BYTE (from);
@@ -8549,7 +8932,7 @@ highest priority. */)
8549} 8932}
8550 8933
8551 8934
8552static inline int 8935static bool
8553char_encodable_p (int c, Lisp_Object attrs) 8936char_encodable_p (int c, Lisp_Object attrs)
8554{ 8937{
8555 Lisp_Object tail; 8938 Lisp_Object tail;
@@ -8626,8 +9009,7 @@ DEFUN ("find-coding-systems-region-internal",
8626 Lisp_Object attrs; 9009 Lisp_Object attrs;
8627 9010
8628 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0); 9011 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8629 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs)) 9012 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs)))
8630 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
8631 { 9013 {
8632 ASET (attrs, coding_attr_trans_tbl, 9014 ASET (attrs, coding_attr_trans_tbl,
8633 get_translation_table (attrs, 1, NULL)); 9015 get_translation_table (attrs, 1, NULL));
@@ -8721,7 +9103,7 @@ to the string. */)
8721 Lisp_Object positions; 9103 Lisp_Object positions;
8722 ptrdiff_t from, to; 9104 ptrdiff_t from, to;
8723 const unsigned char *p, *stop, *pend; 9105 const unsigned char *p, *stop, *pend;
8724 int ascii_compatible; 9106 bool ascii_compatible;
8725 9107
8726 setup_coding_system (Fcheck_coding_system (coding_system), &coding); 9108 setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8727 attrs = CODING_ID_ATTRS (coding.id); 9109 attrs = CODING_ID_ATTRS (coding.id);
@@ -8886,7 +9268,7 @@ is nil. */)
8886 attrs = AREF (CODING_SYSTEM_SPEC (elt), 0); 9268 attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8887 ASET (attrs, coding_attr_trans_tbl, 9269 ASET (attrs, coding_attr_trans_tbl,
8888 get_translation_table (attrs, 1, NULL)); 9270 get_translation_table (attrs, 1, NULL));
8889 list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list); 9271 list = Fcons (list2 (elt, attrs), list);
8890 } 9272 }
8891 9273
8892 if (STRINGP (start)) 9274 if (STRINGP (start))
@@ -8945,14 +9327,12 @@ is nil. */)
8945static Lisp_Object 9327static Lisp_Object
8946code_convert_region (Lisp_Object start, Lisp_Object end, 9328code_convert_region (Lisp_Object start, Lisp_Object end,
8947 Lisp_Object coding_system, Lisp_Object dst_object, 9329 Lisp_Object coding_system, Lisp_Object dst_object,
8948 int encodep, int norecord) 9330 bool encodep, bool norecord)
8949{ 9331{
8950 struct coding_system coding; 9332 struct coding_system coding;
8951 ptrdiff_t from, from_byte, to, to_byte; 9333 ptrdiff_t from, from_byte, to, to_byte;
8952 Lisp_Object src_object; 9334 Lisp_Object src_object;
8953 9335
8954 CHECK_NUMBER_COERCE_MARKER (start);
8955 CHECK_NUMBER_COERCE_MARKER (end);
8956 if (NILP (coding_system)) 9336 if (NILP (coding_system))
8957 coding_system = Qno_conversion; 9337 coding_system = Qno_conversion;
8958 else 9338 else
@@ -9033,7 +9413,8 @@ not fully specified.) */)
9033 9413
9034Lisp_Object 9414Lisp_Object
9035code_convert_string (Lisp_Object string, Lisp_Object coding_system, 9415code_convert_string (Lisp_Object string, Lisp_Object coding_system,
9036 Lisp_Object dst_object, int encodep, int nocopy, int norecord) 9416 Lisp_Object dst_object, bool encodep, bool nocopy,
9417 bool norecord)
9037{ 9418{
9038 struct coding_system coding; 9419 struct coding_system coding;
9039 ptrdiff_t chars, bytes; 9420 ptrdiff_t chars, bytes;
@@ -9081,7 +9462,7 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system,
9081 9462
9082Lisp_Object 9463Lisp_Object
9083code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system, 9464code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system,
9084 int encodep) 9465 bool encodep)
9085{ 9466{
9086 return code_convert_string (string, coding_system, Qt, encodep, 0, 1); 9467 return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
9087} 9468}
@@ -9295,10 +9676,10 @@ DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_intern
9295 terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK; 9676 terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9296 terminal_coding->src_multibyte = 1; 9677 terminal_coding->src_multibyte = 1;
9297 terminal_coding->dst_multibyte = 0; 9678 terminal_coding->dst_multibyte = 0;
9298 if (terminal_coding->common_flags & CODING_REQUIRE_ENCODING_MASK) 9679 tset_charset_list
9299 term->charset_list = coding_charset_list (terminal_coding); 9680 (term, (terminal_coding->common_flags & CODING_REQUIRE_ENCODING_MASK
9300 else 9681 ? coding_charset_list (terminal_coding)
9301 term->charset_list = Fcons (make_number (charset_ascii), Qnil); 9682 : list1 (make_number (charset_ascii))));
9302 return Qnil; 9683 return Qnil;
9303} 9684}
9304 9685
@@ -9482,7 +9863,7 @@ usage: (set-coding-system-priority &rest coding-systems) */)
9482 (ptrdiff_t nargs, Lisp_Object *args) 9863 (ptrdiff_t nargs, Lisp_Object *args)
9483{ 9864{
9484 ptrdiff_t i, j; 9865 ptrdiff_t i, j;
9485 int changed[coding_category_max]; 9866 bool changed[coding_category_max];
9486 enum coding_category priorities[coding_category_max]; 9867 enum coding_category priorities[coding_category_max];
9487 9868
9488 memset (changed, 0, sizeof changed); 9869 memset (changed, 0, sizeof changed);
@@ -9516,7 +9897,7 @@ usage: (set-coding-system-priority &rest coding-systems) */)
9516 && changed[coding_priorities[j]]) 9897 && changed[coding_priorities[j]])
9517 j++; 9898 j++;
9518 if (j == coding_category_max) 9899 if (j == coding_category_max)
9519 abort (); 9900 emacs_abort ();
9520 priorities[i] = coding_priorities[j]; 9901 priorities[i] = coding_priorities[j];
9521 } 9902 }
9522 9903
@@ -9567,11 +9948,11 @@ make_subsidiaries (Lisp_Object base)
9567{ 9948{
9568 Lisp_Object subsidiaries; 9949 Lisp_Object subsidiaries;
9569 ptrdiff_t base_name_len = SBYTES (SYMBOL_NAME (base)); 9950 ptrdiff_t base_name_len = SBYTES (SYMBOL_NAME (base));
9570 char *buf = (char *) alloca (base_name_len + 6); 9951 char *buf = alloca (base_name_len + 6);
9571 int i; 9952 int i;
9572 9953
9573 memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len); 9954 memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len);
9574 subsidiaries = Fmake_vector (make_number (3), Qnil); 9955 subsidiaries = make_uninit_vector (3);
9575 for (i = 0; i < 3; i++) 9956 for (i = 0; i < 3; i++)
9576 { 9957 {
9577 strcpy (buf + base_name_len, suffixes[i]); 9958 strcpy (buf + base_name_len, suffixes[i]);
@@ -9605,16 +9986,16 @@ usage: (define-coding-system-internal ...) */)
9605 9986
9606 name = args[coding_arg_name]; 9987 name = args[coding_arg_name];
9607 CHECK_SYMBOL (name); 9988 CHECK_SYMBOL (name);
9608 CODING_ATTR_BASE_NAME (attrs) = name; 9989 ASET (attrs, coding_attr_base_name, name);
9609 9990
9610 val = args[coding_arg_mnemonic]; 9991 val = args[coding_arg_mnemonic];
9611 if (! STRINGP (val)) 9992 if (! STRINGP (val))
9612 CHECK_CHARACTER (val); 9993 CHECK_CHARACTER (val);
9613 CODING_ATTR_MNEMONIC (attrs) = val; 9994 ASET (attrs, coding_attr_mnemonic, val);
9614 9995
9615 coding_type = args[coding_arg_coding_type]; 9996 coding_type = args[coding_arg_coding_type];
9616 CHECK_SYMBOL (coding_type); 9997 CHECK_SYMBOL (coding_type);
9617 CODING_ATTR_TYPE (attrs) = coding_type; 9998 ASET (attrs, coding_attr_type, coding_type);
9618 9999
9619 charset_list = args[coding_arg_charset_list]; 10000 charset_list = args[coding_arg_charset_list];
9620 if (SYMBOLP (charset_list)) 10001 if (SYMBOLP (charset_list))
@@ -9661,49 +10042,49 @@ usage: (define-coding-system-internal ...) */)
9661 max_charset_id = charset->id; 10042 max_charset_id = charset->id;
9662 } 10043 }
9663 } 10044 }
9664 CODING_ATTR_CHARSET_LIST (attrs) = charset_list; 10045 ASET (attrs, coding_attr_charset_list, charset_list);
9665 10046
9666 safe_charsets = make_uninit_string (max_charset_id + 1); 10047 safe_charsets = make_uninit_string (max_charset_id + 1);
9667 memset (SDATA (safe_charsets), 255, max_charset_id + 1); 10048 memset (SDATA (safe_charsets), 255, max_charset_id + 1);
9668 for (tail = charset_list; CONSP (tail); tail = XCDR (tail)) 10049 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9669 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0); 10050 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
9670 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets; 10051 ASET (attrs, coding_attr_safe_charsets, safe_charsets);
9671 10052
9672 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p]; 10053 ASET (attrs, coding_attr_ascii_compat, args[coding_arg_ascii_compatible_p]);
9673 10054
9674 val = args[coding_arg_decode_translation_table]; 10055 val = args[coding_arg_decode_translation_table];
9675 if (! CHAR_TABLE_P (val) && ! CONSP (val)) 10056 if (! CHAR_TABLE_P (val) && ! CONSP (val))
9676 CHECK_SYMBOL (val); 10057 CHECK_SYMBOL (val);
9677 CODING_ATTR_DECODE_TBL (attrs) = val; 10058 ASET (attrs, coding_attr_decode_tbl, val);
9678 10059
9679 val = args[coding_arg_encode_translation_table]; 10060 val = args[coding_arg_encode_translation_table];
9680 if (! CHAR_TABLE_P (val) && ! CONSP (val)) 10061 if (! CHAR_TABLE_P (val) && ! CONSP (val))
9681 CHECK_SYMBOL (val); 10062 CHECK_SYMBOL (val);
9682 CODING_ATTR_ENCODE_TBL (attrs) = val; 10063 ASET (attrs, coding_attr_encode_tbl, val);
9683 10064
9684 val = args[coding_arg_post_read_conversion]; 10065 val = args[coding_arg_post_read_conversion];
9685 CHECK_SYMBOL (val); 10066 CHECK_SYMBOL (val);
9686 CODING_ATTR_POST_READ (attrs) = val; 10067 ASET (attrs, coding_attr_post_read, val);
9687 10068
9688 val = args[coding_arg_pre_write_conversion]; 10069 val = args[coding_arg_pre_write_conversion];
9689 CHECK_SYMBOL (val); 10070 CHECK_SYMBOL (val);
9690 CODING_ATTR_PRE_WRITE (attrs) = val; 10071 ASET (attrs, coding_attr_pre_write, val);
9691 10072
9692 val = args[coding_arg_default_char]; 10073 val = args[coding_arg_default_char];
9693 if (NILP (val)) 10074 if (NILP (val))
9694 CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' '); 10075 ASET (attrs, coding_attr_default_char, make_number (' '));
9695 else 10076 else
9696 { 10077 {
9697 CHECK_CHARACTER (val); 10078 CHECK_CHARACTER (val);
9698 CODING_ATTR_DEFAULT_CHAR (attrs) = val; 10079 ASET (attrs, coding_attr_default_char, val);
9699 } 10080 }
9700 10081
9701 val = args[coding_arg_for_unibyte]; 10082 val = args[coding_arg_for_unibyte];
9702 CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt; 10083 ASET (attrs, coding_attr_for_unibyte, NILP (val) ? Qnil : Qt);
9703 10084
9704 val = args[coding_arg_plist]; 10085 val = args[coding_arg_plist];
9705 CHECK_LIST (val); 10086 CHECK_LIST (val);
9706 CODING_ATTR_PLIST (attrs) = val; 10087 ASET (attrs, coding_attr_plist, val);
9707 10088
9708 if (EQ (coding_type, Qcharset)) 10089 if (EQ (coding_type, Qcharset))
9709 { 10090 {
@@ -9728,7 +10109,7 @@ usage: (define-coding-system-internal ...) */)
9728 int idx = (dim - 1) * 4; 10109 int idx = (dim - 1) * 4;
9729 10110
9730 if (CHARSET_ASCII_COMPATIBLE_P (charset)) 10111 if (CHARSET_ASCII_COMPATIBLE_P (charset))
9731 CODING_ATTR_ASCII_COMPAT (attrs) = Qt; 10112 ASET (attrs, coding_attr_ascii_compat, Qt);
9732 10113
9733 for (i = charset->code_space[idx]; 10114 for (i = charset->code_space[idx];
9734 i <= charset->code_space[idx + 1]; i++) 10115 i <= charset->code_space[idx + 1]; i++)
@@ -9743,9 +10124,9 @@ usage: (define-coding-system-internal ...) */)
9743 { 10124 {
9744 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp))); 10125 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9745 if (dim < dim2) 10126 if (dim < dim2)
9746 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil)); 10127 tmp = list2 (XCAR (tail), tmp);
9747 else 10128 else
9748 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil)); 10129 tmp = list2 (tmp, XCAR (tail));
9749 } 10130 }
9750 else 10131 else
9751 { 10132 {
@@ -9756,7 +10137,7 @@ usage: (define-coding-system-internal ...) */)
9756 break; 10137 break;
9757 } 10138 }
9758 if (NILP (tmp2)) 10139 if (NILP (tmp2))
9759 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil)); 10140 tmp = nconc2 (tmp, list1 (XCAR (tail)));
9760 else 10141 else
9761 { 10142 {
9762 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2))); 10143 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
@@ -9790,11 +10171,11 @@ usage: (define-coding-system-internal ...) */)
9790 10171
9791 val = args[coding_arg_ccl_valids]; 10172 val = args[coding_arg_ccl_valids];
9792 valids = Fmake_string (make_number (256), make_number (0)); 10173 valids = Fmake_string (make_number (256), make_number (0));
9793 for (tail = val; !NILP (tail); tail = Fcdr (tail)) 10174 for (tail = val; CONSP (tail); tail = XCDR (tail))
9794 { 10175 {
9795 int from, to; 10176 int from, to;
9796 10177
9797 val = Fcar (tail); 10178 val = XCAR (tail);
9798 if (INTEGERP (val)) 10179 if (INTEGERP (val))
9799 { 10180 {
9800 if (! (0 <= XINT (val) && XINT (val) <= 255)) 10181 if (! (0 <= XINT (val) && XINT (val) <= 255))
@@ -9826,7 +10207,7 @@ usage: (define-coding-system-internal ...) */)
9826 { 10207 {
9827 Lisp_Object bom, endian; 10208 Lisp_Object bom, endian;
9828 10209
9829 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil; 10210 ASET (attrs, coding_attr_ascii_compat, Qnil);
9830 10211
9831 if (nargs < coding_arg_utf16_max) 10212 if (nargs < coding_arg_utf16_max)
9832 goto short_args; 10213 goto short_args;
@@ -9871,7 +10252,7 @@ usage: (define-coding-system-internal ...) */)
9871 CHECK_VECTOR (initial); 10252 CHECK_VECTOR (initial);
9872 for (i = 0; i < 4; i++) 10253 for (i = 0; i < 4; i++)
9873 { 10254 {
9874 val = Faref (initial, make_number (i)); 10255 val = AREF (initial, i);
9875 if (! NILP (val)) 10256 if (! NILP (val))
9876 { 10257 {
9877 struct charset *charset; 10258 struct charset *charset;
@@ -9879,7 +10260,7 @@ usage: (define-coding-system-internal ...) */)
9879 CHECK_CHARSET_GET_CHARSET (val, charset); 10260 CHECK_CHARSET_GET_CHARSET (val, charset);
9880 ASET (initial, i, make_number (CHARSET_ID (charset))); 10261 ASET (initial, i, make_number (CHARSET_ID (charset)));
9881 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset)) 10262 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9882 CODING_ATTR_ASCII_COMPAT (attrs) = Qt; 10263 ASET (attrs, coding_attr_ascii_compat, Qt);
9883 } 10264 }
9884 else 10265 else
9885 ASET (initial, i, make_number (-1)); 10266 ASET (initial, i, make_number (-1));
@@ -9891,12 +10272,12 @@ usage: (define-coding-system-internal ...) */)
9891 CHECK_NUMBER_CDR (reg_usage); 10272 CHECK_NUMBER_CDR (reg_usage);
9892 10273
9893 request = Fcopy_sequence (args[coding_arg_iso2022_request]); 10274 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9894 for (tail = request; ! NILP (tail); tail = Fcdr (tail)) 10275 for (tail = request; CONSP (tail); tail = XCDR (tail))
9895 { 10276 {
9896 int id; 10277 int id;
9897 Lisp_Object tmp1; 10278 Lisp_Object tmp1;
9898 10279
9899 val = Fcar (tail); 10280 val = XCAR (tail);
9900 CHECK_CONS (val); 10281 CHECK_CONS (val);
9901 tmp1 = XCAR (val); 10282 tmp1 = XCAR (val);
9902 CHECK_CHARSET_GET_ID (tmp1, id); 10283 CHECK_CHARSET_GET_ID (tmp1, id);
@@ -9940,13 +10321,13 @@ usage: (define-coding-system-internal ...) */)
9940 } 10321 }
9941 if (category != coding_category_iso_8_1 10322 if (category != coding_category_iso_8_1
9942 && category != coding_category_iso_8_2) 10323 && category != coding_category_iso_8_2)
9943 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil; 10324 ASET (attrs, coding_attr_ascii_compat, Qnil);
9944 } 10325 }
9945 else if (EQ (coding_type, Qemacs_mule)) 10326 else if (EQ (coding_type, Qemacs_mule))
9946 { 10327 {
9947 if (EQ (args[coding_arg_charset_list], Qemacs_mule)) 10328 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9948 ASET (attrs, coding_attr_emacs_mule_full, Qt); 10329 ASET (attrs, coding_attr_emacs_mule_full, Qt);
9949 CODING_ATTR_ASCII_COMPAT (attrs) = Qt; 10330 ASET (attrs, coding_attr_ascii_compat, Qt);
9950 category = coding_category_emacs_mule; 10331 category = coding_category_emacs_mule;
9951 } 10332 }
9952 else if (EQ (coding_type, Qshift_jis)) 10333 else if (EQ (coding_type, Qshift_jis))
@@ -9963,7 +10344,7 @@ usage: (define-coding-system-internal ...) */)
9963 error ("Dimension of charset %s is not one", 10344 error ("Dimension of charset %s is not one",
9964 SDATA (SYMBOL_NAME (CHARSET_NAME (charset)))); 10345 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9965 if (CHARSET_ASCII_COMPATIBLE_P (charset)) 10346 if (CHARSET_ASCII_COMPATIBLE_P (charset))
9966 CODING_ATTR_ASCII_COMPAT (attrs) = Qt; 10347 ASET (attrs, coding_attr_ascii_compat, Qt);
9967 10348
9968 charset_list = XCDR (charset_list); 10349 charset_list = XCDR (charset_list);
9969 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); 10350 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
@@ -10001,7 +10382,7 @@ usage: (define-coding-system-internal ...) */)
10001 error ("Dimension of charset %s is not one", 10382 error ("Dimension of charset %s is not one",
10002 SDATA (SYMBOL_NAME (CHARSET_NAME (charset)))); 10383 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10003 if (CHARSET_ASCII_COMPATIBLE_P (charset)) 10384 if (CHARSET_ASCII_COMPATIBLE_P (charset))
10004 CODING_ATTR_ASCII_COMPAT (attrs) = Qt; 10385 ASET (attrs, coding_attr_ascii_compat, Qt);
10005 10386
10006 charset_list = XCDR (charset_list); 10387 charset_list = XCDR (charset_list);
10007 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); 10388 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
@@ -10015,7 +10396,7 @@ usage: (define-coding-system-internal ...) */)
10015 else if (EQ (coding_type, Qraw_text)) 10396 else if (EQ (coding_type, Qraw_text))
10016 { 10397 {
10017 category = coding_category_raw_text; 10398 category = coding_category_raw_text;
10018 CODING_ATTR_ASCII_COMPAT (attrs) = Qt; 10399 ASET (attrs, coding_attr_ascii_compat, Qt);
10019 } 10400 }
10020 else if (EQ (coding_type, Qutf_8)) 10401 else if (EQ (coding_type, Qutf_8))
10021 { 10402 {
@@ -10035,26 +10416,37 @@ usage: (define-coding-system-internal ...) */)
10035 } 10416 }
10036 ASET (attrs, coding_attr_utf_bom, bom); 10417 ASET (attrs, coding_attr_utf_bom, bom);
10037 if (NILP (bom)) 10418 if (NILP (bom))
10038 CODING_ATTR_ASCII_COMPAT (attrs) = Qt; 10419 ASET (attrs, coding_attr_ascii_compat, Qt);
10039 10420
10040 category = (CONSP (bom) ? coding_category_utf_8_auto 10421 category = (CONSP (bom) ? coding_category_utf_8_auto
10041 : NILP (bom) ? coding_category_utf_8_nosig 10422 : NILP (bom) ? coding_category_utf_8_nosig
10042 : coding_category_utf_8_sig); 10423 : coding_category_utf_8_sig);
10043 } 10424 }
10044 else if (EQ (coding_type, Qundecided)) 10425 else if (EQ (coding_type, Qundecided))
10045 category = coding_category_undecided; 10426 {
10427 if (nargs < coding_arg_undecided_max)
10428 goto short_args;
10429 ASET (attrs, coding_attr_undecided_inhibit_null_byte_detection,
10430 args[coding_arg_undecided_inhibit_null_byte_detection]);
10431 ASET (attrs, coding_attr_undecided_inhibit_iso_escape_detection,
10432 args[coding_arg_undecided_inhibit_iso_escape_detection]);
10433 ASET (attrs, coding_attr_undecided_prefer_utf_8,
10434 args[coding_arg_undecided_prefer_utf_8]);
10435 category = coding_category_undecided;
10436 }
10046 else 10437 else
10047 error ("Invalid coding system type: %s", 10438 error ("Invalid coding system type: %s",
10048 SDATA (SYMBOL_NAME (coding_type))); 10439 SDATA (SYMBOL_NAME (coding_type)));
10049 10440
10050 CODING_ATTR_CATEGORY (attrs) = make_number (category); 10441 ASET (attrs, coding_attr_category, make_number (category));
10051 CODING_ATTR_PLIST (attrs) 10442 ASET (attrs, coding_attr_plist,
10052 = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category), 10443 Fcons (QCcategory,
10053 CODING_ATTR_PLIST (attrs))); 10444 Fcons (AREF (Vcoding_category_table, category),
10054 CODING_ATTR_PLIST (attrs) 10445 CODING_ATTR_PLIST (attrs))));
10055 = Fcons (QCascii_compatible_p, 10446 ASET (attrs, coding_attr_plist,
10056 Fcons (CODING_ATTR_ASCII_COMPAT (attrs), 10447 Fcons (QCascii_compatible_p,
10057 CODING_ATTR_PLIST (attrs))); 10448 Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
10449 CODING_ATTR_PLIST (attrs))));
10058 10450
10059 eol_type = args[coding_arg_eol_type]; 10451 eol_type = args[coding_arg_eol_type];
10060 if (! NILP (eol_type) 10452 if (! NILP (eol_type)
@@ -10063,7 +10455,7 @@ usage: (define-coding-system-internal ...) */)
10063 && ! EQ (eol_type, Qmac)) 10455 && ! EQ (eol_type, Qmac))
10064 error ("Invalid eol-type"); 10456 error ("Invalid eol-type");
10065 10457
10066 aliases = Fcons (name, Qnil); 10458 aliases = list1 (name);
10067 10459
10068 if (NILP (eol_type)) 10460 if (NILP (eol_type))
10069 { 10461 {
@@ -10073,9 +10465,10 @@ usage: (define-coding-system-internal ...) */)
10073 Lisp_Object this_spec, this_name, this_aliases, this_eol_type; 10465 Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
10074 10466
10075 this_name = AREF (eol_type, i); 10467 this_name = AREF (eol_type, i);
10076 this_aliases = Fcons (this_name, Qnil); 10468 this_aliases = list1 (this_name);
10077 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac); 10469 this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
10078 this_spec = Fmake_vector (make_number (3), attrs); 10470 this_spec = make_uninit_vector (3);
10471 ASET (this_spec, 0, attrs);
10079 ASET (this_spec, 1, this_aliases); 10472 ASET (this_spec, 1, this_aliases);
10080 ASET (this_spec, 2, this_eol_type); 10473 ASET (this_spec, 2, this_eol_type);
10081 Fputhash (this_name, this_spec, Vcoding_system_hash_table); 10474 Fputhash (this_name, this_spec, Vcoding_system_hash_table);
@@ -10088,7 +10481,8 @@ usage: (define-coding-system-internal ...) */)
10088 } 10481 }
10089 } 10482 }
10090 10483
10091 spec_vec = Fmake_vector (make_number (3), attrs); 10484 spec_vec = make_uninit_vector (3);
10485 ASET (spec_vec, 0, attrs);
10092 ASET (spec_vec, 1, aliases); 10486 ASET (spec_vec, 1, aliases);
10093 ASET (spec_vec, 2, eol_type); 10487 ASET (spec_vec, 2, eol_type);
10094 10488
@@ -10128,7 +10522,7 @@ DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
10128 { 10522 {
10129 if (! STRINGP (val)) 10523 if (! STRINGP (val))
10130 CHECK_CHARACTER (val); 10524 CHECK_CHARACTER (val);
10131 CODING_ATTR_MNEMONIC (attrs) = val; 10525 ASET (attrs, coding_attr_mnemonic, val);
10132 } 10526 }
10133 else if (EQ (prop, QCdefault_char)) 10527 else if (EQ (prop, QCdefault_char))
10134 { 10528 {
@@ -10136,37 +10530,37 @@ DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
10136 val = make_number (' '); 10530 val = make_number (' ');
10137 else 10531 else
10138 CHECK_CHARACTER (val); 10532 CHECK_CHARACTER (val);
10139 CODING_ATTR_DEFAULT_CHAR (attrs) = val; 10533 ASET (attrs, coding_attr_default_char, val);
10140 } 10534 }
10141 else if (EQ (prop, QCdecode_translation_table)) 10535 else if (EQ (prop, QCdecode_translation_table))
10142 { 10536 {
10143 if (! CHAR_TABLE_P (val) && ! CONSP (val)) 10537 if (! CHAR_TABLE_P (val) && ! CONSP (val))
10144 CHECK_SYMBOL (val); 10538 CHECK_SYMBOL (val);
10145 CODING_ATTR_DECODE_TBL (attrs) = val; 10539 ASET (attrs, coding_attr_decode_tbl, val);
10146 } 10540 }
10147 else if (EQ (prop, QCencode_translation_table)) 10541 else if (EQ (prop, QCencode_translation_table))
10148 { 10542 {
10149 if (! CHAR_TABLE_P (val) && ! CONSP (val)) 10543 if (! CHAR_TABLE_P (val) && ! CONSP (val))
10150 CHECK_SYMBOL (val); 10544 CHECK_SYMBOL (val);
10151 CODING_ATTR_ENCODE_TBL (attrs) = val; 10545 ASET (attrs, coding_attr_encode_tbl, val);
10152 } 10546 }
10153 else if (EQ (prop, QCpost_read_conversion)) 10547 else if (EQ (prop, QCpost_read_conversion))
10154 { 10548 {
10155 CHECK_SYMBOL (val); 10549 CHECK_SYMBOL (val);
10156 CODING_ATTR_POST_READ (attrs) = val; 10550 ASET (attrs, coding_attr_post_read, val);
10157 } 10551 }
10158 else if (EQ (prop, QCpre_write_conversion)) 10552 else if (EQ (prop, QCpre_write_conversion))
10159 { 10553 {
10160 CHECK_SYMBOL (val); 10554 CHECK_SYMBOL (val);
10161 CODING_ATTR_PRE_WRITE (attrs) = val; 10555 ASET (attrs, coding_attr_pre_write, val);
10162 } 10556 }
10163 else if (EQ (prop, QCascii_compatible_p)) 10557 else if (EQ (prop, QCascii_compatible_p))
10164 { 10558 {
10165 CODING_ATTR_ASCII_COMPAT (attrs) = val; 10559 ASET (attrs, coding_attr_ascii_compat, val);
10166 } 10560 }
10167 10561
10168 CODING_ATTR_PLIST (attrs) 10562 ASET (attrs, coding_attr_plist,
10169 = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val); 10563 Fplist_put (CODING_ATTR_PLIST (attrs), prop, val));
10170 return val; 10564 return val;
10171} 10565}
10172 10566
@@ -10186,7 +10580,7 @@ DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
10186 list. */ 10580 list. */
10187 while (!NILP (XCDR (aliases))) 10581 while (!NILP (XCDR (aliases)))
10188 aliases = XCDR (aliases); 10582 aliases = XCDR (aliases);
10189 XSETCDR (aliases, Fcons (alias, Qnil)); 10583 XSETCDR (aliases, list1 (alias));
10190 10584
10191 eol_type = AREF (spec, 2); 10585 eol_type = AREF (spec, 2);
10192 if (VECTORP (eol_type)) 10586 if (VECTORP (eol_type))
@@ -10349,7 +10743,7 @@ syms_of_coding (void)
10349 Vcode_conversion_reused_workbuf = Qnil; 10743 Vcode_conversion_reused_workbuf = Qnil;
10350 10744
10351 staticpro (&Vcode_conversion_workbuf_name); 10745 staticpro (&Vcode_conversion_workbuf_name);
10352 Vcode_conversion_workbuf_name = make_pure_c_string (" *code-conversion-work*"); 10746 Vcode_conversion_workbuf_name = build_pure_c_string (" *code-conversion-work*");
10353 10747
10354 reused_workbuf_in_use = 0; 10748 reused_workbuf_in_use = 0;
10355 10749
@@ -10385,6 +10779,7 @@ syms_of_coding (void)
10385 DEFSYM (Qeol_type, "eol-type"); 10779 DEFSYM (Qeol_type, "eol-type");
10386 DEFSYM (Qunix, "unix"); 10780 DEFSYM (Qunix, "unix");
10387 DEFSYM (Qdos, "dos"); 10781 DEFSYM (Qdos, "dos");
10782 DEFSYM (Qmac, "mac");
10388 10783
10389 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system"); 10784 DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
10390 DEFSYM (Qpost_read_conversion, "post-read-conversion"); 10785 DEFSYM (Qpost_read_conversion, "post-read-conversion");
@@ -10399,6 +10794,11 @@ syms_of_coding (void)
10399 DEFSYM (Qutf_8, "utf-8"); 10794 DEFSYM (Qutf_8, "utf-8");
10400 DEFSYM (Qutf_8_emacs, "utf-8-emacs"); 10795 DEFSYM (Qutf_8_emacs, "utf-8-emacs");
10401 10796
10797#if defined (WINDOWSNT) || defined (CYGWIN)
10798 /* No, not utf-16-le: that one has a BOM. */
10799 DEFSYM (Qutf_16le, "utf-16le");
10800#endif
10801
10402 DEFSYM (Qutf_16, "utf-16"); 10802 DEFSYM (Qutf_16, "utf-16");
10403 DEFSYM (Qbig, "big"); 10803 DEFSYM (Qbig, "big");
10404 DEFSYM (Qlittle, "little"); 10804 DEFSYM (Qlittle, "little");
@@ -10410,14 +10810,9 @@ syms_of_coding (void)
10410 10810
10411 DEFSYM (Qcoding_system_error, "coding-system-error"); 10811 DEFSYM (Qcoding_system_error, "coding-system-error");
10412 Fput (Qcoding_system_error, Qerror_conditions, 10812 Fput (Qcoding_system_error, Qerror_conditions,
10413 pure_cons (Qcoding_system_error, pure_cons (Qerror, Qnil))); 10813 listn (CONSTYPE_PURE, 2, Qcoding_system_error, Qerror));
10414 Fput (Qcoding_system_error, Qerror_message, 10814 Fput (Qcoding_system_error, Qerror_message,
10415 make_pure_c_string ("Invalid coding system")); 10815 build_pure_c_string ("Invalid coding system"));
10416
10417 /* Intern this now in case it isn't already done.
10418 Setting this variable twice is harmless.
10419 But don't staticpro it here--that is done in alloc.c. */
10420 Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
10421 10816
10422 DEFSYM (Qtranslation_table, "translation-table"); 10817 DEFSYM (Qtranslation_table, "translation-table");
10423 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2)); 10818 Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
@@ -10487,10 +10882,8 @@ syms_of_coding (void)
10487 intern_c_string ("coding-category-undecided")); 10882 intern_c_string ("coding-category-undecided"));
10488 10883
10489 DEFSYM (Qinsufficient_source, "insufficient-source"); 10884 DEFSYM (Qinsufficient_source, "insufficient-source");
10490 DEFSYM (Qinconsistent_eol, "inconsistent-eol");
10491 DEFSYM (Qinvalid_source, "invalid-source"); 10885 DEFSYM (Qinvalid_source, "invalid-source");
10492 DEFSYM (Qinterrupted, "interrupted"); 10886 DEFSYM (Qinterrupted, "interrupted");
10493 DEFSYM (Qinsufficient_memory, "insufficient-memory");
10494 DEFSYM (Qcoding_system_define_form, "coding-system-define-form"); 10887 DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
10495 10888
10496 defsubr (&Scoding_system_p); 10889 defsubr (&Scoding_system_p);
@@ -10559,7 +10952,7 @@ Don't modify this variable directly, but use `set-coding-system-priority'. */);
10559 Vcoding_category_list = Qnil; 10952 Vcoding_category_list = Qnil;
10560 for (i = coding_category_max - 1; i >= 0; i--) 10953 for (i = coding_category_max - 1; i >= 0; i--)
10561 Vcoding_category_list 10954 Vcoding_category_list
10562 = Fcons (XVECTOR (Vcoding_category_table)->contents[i], 10955 = Fcons (AREF (Vcoding_category_table, i),
10563 Vcoding_category_list); 10956 Vcoding_category_list);
10564 } 10957 }
10565 10958
@@ -10685,22 +11078,22 @@ Also used for decoding keyboard input on X Window system. */);
10685 DEFVAR_LISP ("eol-mnemonic-unix", eol_mnemonic_unix, 11078 DEFVAR_LISP ("eol-mnemonic-unix", eol_mnemonic_unix,
10686 doc: /* 11079 doc: /*
10687*String displayed in mode line for UNIX-like (LF) end-of-line format. */); 11080*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
10688 eol_mnemonic_unix = make_pure_c_string (":"); 11081 eol_mnemonic_unix = build_pure_c_string (":");
10689 11082
10690 DEFVAR_LISP ("eol-mnemonic-dos", eol_mnemonic_dos, 11083 DEFVAR_LISP ("eol-mnemonic-dos", eol_mnemonic_dos,
10691 doc: /* 11084 doc: /*
10692*String displayed in mode line for DOS-like (CRLF) end-of-line format. */); 11085*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
10693 eol_mnemonic_dos = make_pure_c_string ("\\"); 11086 eol_mnemonic_dos = build_pure_c_string ("\\");
10694 11087
10695 DEFVAR_LISP ("eol-mnemonic-mac", eol_mnemonic_mac, 11088 DEFVAR_LISP ("eol-mnemonic-mac", eol_mnemonic_mac,
10696 doc: /* 11089 doc: /*
10697*String displayed in mode line for MAC-like (CR) end-of-line format. */); 11090*String displayed in mode line for MAC-like (CR) end-of-line format. */);
10698 eol_mnemonic_mac = make_pure_c_string ("/"); 11091 eol_mnemonic_mac = build_pure_c_string ("/");
10699 11092
10700 DEFVAR_LISP ("eol-mnemonic-undecided", eol_mnemonic_undecided, 11093 DEFVAR_LISP ("eol-mnemonic-undecided", eol_mnemonic_undecided,
10701 doc: /* 11094 doc: /*
10702*String displayed in mode line when end-of-line format is not yet determined. */); 11095*String displayed in mode line when end-of-line format is not yet determined. */);
10703 eol_mnemonic_undecided = make_pure_c_string (":"); 11096 eol_mnemonic_undecided = build_pure_c_string (":");
10704 11097
10705 DEFVAR_LISP ("enable-character-translation", Venable_character_translation, 11098 DEFVAR_LISP ("enable-character-translation", Venable_character_translation,
10706 doc: /* 11099 doc: /*
@@ -10791,7 +11184,7 @@ reading if you suppress escape sequence detection.
10791 11184
10792The other way to read escape sequences in a file without decoding is 11185The other way to read escape sequences in a file without decoding is
10793to explicitly specify some coding system that doesn't use ISO-2022 11186to explicitly specify some coding system that doesn't use ISO-2022
10794escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */); 11187escape sequence (e.g., `latin-1') on reading by \\[universal-coding-system-argument]. */);
10795 inhibit_iso_escape_detection = 0; 11188 inhibit_iso_escape_detection = 0;
10796 11189
10797 DEFVAR_BOOL ("inhibit-null-byte-detection", 11190 DEFVAR_BOOL ("inhibit-null-byte-detection",
@@ -10807,6 +11200,11 @@ from GNU Find and GNU Grep. Emacs will then ignore the null bytes and
10807decode text as usual. */); 11200decode text as usual. */);
10808 inhibit_null_byte_detection = 0; 11201 inhibit_null_byte_detection = 0;
10809 11202
11203 DEFVAR_BOOL ("disable-ascii-optimization", disable_ascii_optimization,
11204 doc: /* If non-nil, Emacs does not optimize code decoder for ASCII files.
11205Internal use only. Removed after the experimental optimizer gets stable. */);
11206 disable_ascii_optimization = 0;
11207
10810 DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input, 11208 DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input,
10811 doc: /* Char table for translating self-inserting characters. 11209 doc: /* Char table for translating self-inserting characters.
10812This is applied to the result of input methods, not their input. 11210This is applied to the result of input methods, not their input.
@@ -10818,11 +11216,11 @@ internal character representation. */);
10818 Vtranslation_table_for_input = Qnil; 11216 Vtranslation_table_for_input = Qnil;
10819 11217
10820 { 11218 {
10821 Lisp_Object args[coding_arg_max]; 11219 Lisp_Object args[coding_arg_undecided_max];
10822 Lisp_Object plist[16]; 11220 Lisp_Object plist[16];
10823 int i; 11221 int i;
10824 11222
10825 for (i = 0; i < coding_arg_max; i++) 11223 for (i = 0; i < coding_arg_undecided_max; i++)
10826 args[i] = Qnil; 11224 args[i] = Qnil;
10827 11225
10828 plist[0] = intern_c_string (":name"); 11226 plist[0] = intern_c_string (":name");
@@ -10838,7 +11236,7 @@ internal character representation. */);
10838 plist[10] = intern_c_string (":for-unibyte"); 11236 plist[10] = intern_c_string (":for-unibyte");
10839 plist[11] = args[coding_arg_for_unibyte] = Qt; 11237 plist[11] = args[coding_arg_for_unibyte] = Qt;
10840 plist[12] = intern_c_string (":docstring"); 11238 plist[12] = intern_c_string (":docstring");
10841 plist[13] = make_pure_c_string ("Do no conversion.\n\ 11239 plist[13] = build_pure_c_string ("Do no conversion.\n\
10842\n\ 11240\n\
10843When you visit a file with this coding, the file is read into a\n\ 11241When you visit a file with this coding, the file is read into a\n\
10844unibyte buffer as is, thus each byte of a file is treated as a\n\ 11242unibyte buffer as is, thus each byte of a file is treated as a\n\
@@ -10856,10 +11254,12 @@ character.");
10856 plist[8] = intern_c_string (":charset-list"); 11254 plist[8] = intern_c_string (":charset-list");
10857 plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil); 11255 plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10858 plist[11] = args[coding_arg_for_unibyte] = Qnil; 11256 plist[11] = args[coding_arg_for_unibyte] = Qnil;
10859 plist[13] = make_pure_c_string ("No conversion on encoding, automatic conversion on decoding."); 11257 plist[13] = build_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
10860 plist[15] = args[coding_arg_eol_type] = Qnil; 11258 plist[15] = args[coding_arg_eol_type] = Qnil;
10861 args[coding_arg_plist] = Flist (16, plist); 11259 args[coding_arg_plist] = Flist (16, plist);
10862 Fdefine_coding_system_internal (coding_arg_max, args); 11260 args[coding_arg_undecided_inhibit_null_byte_detection] = make_number (0);
11261 args[coding_arg_undecided_inhibit_iso_escape_detection] = make_number (0);
11262 Fdefine_coding_system_internal (coding_arg_undecided_max, args);
10863 } 11263 }
10864 11264
10865 setup_coding_system (Qno_conversion, &safe_terminal_coding); 11265 setup_coding_system (Qno_conversion, &safe_terminal_coding);