aboutsummaryrefslogtreecommitdiffstats
path: root/src/coding.c
diff options
context:
space:
mode:
authorYAMAMOTO Mitsuharu2019-04-27 18:33:39 +0900
committerYAMAMOTO Mitsuharu2019-04-27 18:33:39 +0900
commit886bedb36c7b959b7e6fc8ce8e0c04e144b0ae28 (patch)
treeb5770d9fc10a704ad8aeb3474c6940121252c770 /src/coding.c
parent015a6e1df2772bd43680df5cbeaffccf98a881da (diff)
parent8dc00b2f1e6523c634df3e24379afbe712a32b27 (diff)
downloademacs-886bedb36c7b959b7e6fc8ce8e0c04e144b0ae28.tar.gz
emacs-886bedb36c7b959b7e6fc8ce8e0c04e144b0ae28.zip
Merge branch 'master' into harfbuzz
Diffstat (limited to 'src/coding.c')
-rw-r--r--src/coding.c294
1 files changed, 138 insertions, 156 deletions
diff --git a/src/coding.c b/src/coding.c
index 398691fc864..2c6b2c4d051 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -1,5 +1,5 @@
1/* Coding system handler (conversion, detection, etc). 1/* Coding system handler (conversion, detection, etc).
2 Copyright (C) 2001-2018 Free Software Foundation, Inc. 2 Copyright (C) 2001-2019 Free Software Foundation, Inc.
3 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 3 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
4 2005, 2006, 2007, 2008, 2009, 2010, 2011 4 2005, 2006, 2007, 2008, 2009, 2010, 2011
5 National Institute of Advanced Industrial Science and Technology (AIST) 5 National Institute of Advanced Industrial Science and Technology (AIST)
@@ -298,25 +298,16 @@ encode_coding_XXX (struct coding_system *coding)
298#include "composite.h" 298#include "composite.h"
299#include "coding.h" 299#include "coding.h"
300#include "termhooks.h" 300#include "termhooks.h"
301#include "pdumper.h"
301 302
302Lisp_Object Vcoding_system_hash_table; 303Lisp_Object Vcoding_system_hash_table;
303 304
304/* Format of end-of-line decided by system. This is Qunix on
305 Unix and Mac, Qdos on DOS/Windows.
306 This has an effect only for external encoding (i.e. for output to
307 file and process), not for in-buffer or Lisp string encoding. */
308static Lisp_Object system_eol_type;
309
310#ifdef emacs
311
312/* Coding-systems are handed between Emacs Lisp programs and C internal 305/* Coding-systems are handed between Emacs Lisp programs and C internal
313 routines by the following three variables. */ 306 routines by the following three variables. */
314/* Coding system to be used to encode text for terminal display when 307/* Coding system to be used to encode text for terminal display when
315 terminal coding system is nil. */ 308 terminal coding system is nil. */
316struct coding_system safe_terminal_coding; 309struct coding_system safe_terminal_coding;
317 310
318#endif /* emacs */
319
320/* Two special coding systems. */ 311/* Two special coding systems. */
321static Lisp_Object Vsjis_coding_system; 312static Lisp_Object Vsjis_coding_system;
322static Lisp_Object Vbig5_coding_system; 313static Lisp_Object Vbig5_coding_system;
@@ -617,23 +608,7 @@ inhibit_flag (int encoded_flag, bool var)
617 do { \ 608 do { \
618 (attrs) = CODING_ID_ATTRS ((coding)->id); \ 609 (attrs) = CODING_ID_ATTRS ((coding)->id); \
619 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \ 610 (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
620 } while (0) 611 } while (false)
621
622static void
623CHECK_FIXNAT_CAR (Lisp_Object x)
624{
625 Lisp_Object tmp = XCAR (x);
626 CHECK_FIXNAT (tmp);
627 XSETCAR (x, tmp);
628}
629
630static void
631CHECK_FIXNAT_CDR (Lisp_Object x)
632{
633 Lisp_Object tmp = XCDR (x);
634 CHECK_FIXNAT (tmp);
635 XSETCDR (x, tmp);
636}
637 612
638/* True if CODING's destination can be grown. */ 613/* True if CODING's destination can be grown. */
639 614
@@ -4611,8 +4586,7 @@ detect_coding_sjis (struct coding_system *coding,
4611 int max_first_byte_of_2_byte_code; 4586 int max_first_byte_of_2_byte_code;
4612 4587
4613 CODING_GET_INFO (coding, attrs, charset_list); 4588 CODING_GET_INFO (coding, attrs, charset_list);
4614 max_first_byte_of_2_byte_code 4589 max_first_byte_of_2_byte_code = list_length (charset_list) <= 3 ? 0xEF : 0xFC;
4615 = (XFIXNUM (Flength (charset_list)) > 3 ? 0xFC : 0xEF);
4616 4590
4617 detect_info->checked |= CATEGORY_MASK_SJIS; 4591 detect_info->checked |= CATEGORY_MASK_SJIS;
4618 /* A coding system of this category is always ASCII compatible. */ 4592 /* A coding system of this category is always ASCII compatible. */
@@ -5739,7 +5713,7 @@ setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
5739 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK; 5713 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5740 coding->spec.undecided.inhibit_nbd 5714 coding->spec.undecided.inhibit_nbd
5741 = (encode_inhibit_flag 5715 = (encode_inhibit_flag
5742 (AREF (attrs, coding_attr_undecided_inhibit_null_byte_detection))); 5716 (AREF (attrs, coding_attr_undecided_inhibit_nul_byte_detection)));
5743 coding->spec.undecided.inhibit_ied 5717 coding->spec.undecided.inhibit_ied
5744 = (encode_inhibit_flag 5718 = (encode_inhibit_flag
5745 (AREF (attrs, coding_attr_undecided_inhibit_iso_escape_detection))); 5719 (AREF (attrs, coding_attr_undecided_inhibit_iso_escape_detection)));
@@ -5992,8 +5966,7 @@ raw_text_coding_system_p (struct coding_system *coding)
5992 5966
5993/* If CODING_SYSTEM doesn't specify end-of-line format, return one of 5967/* If CODING_SYSTEM doesn't specify end-of-line format, return one of
5994 the subsidiary that has the same eol-spec as PARENT (if it is not 5968 the subsidiary that has the same eol-spec as PARENT (if it is not
5995 nil and specifies end-of-line format) or the system's setting 5969 nil and specifies end-of-line format) or the system's setting. */
5996 (system_eol_type). */
5997 5970
5998Lisp_Object 5971Lisp_Object
5999coding_inherit_eol_type (Lisp_Object coding_system, Lisp_Object parent) 5972coding_inherit_eol_type (Lisp_Object coding_system, Lisp_Object parent)
@@ -6008,20 +5981,24 @@ coding_inherit_eol_type (Lisp_Object coding_system, Lisp_Object parent)
6008 eol_type = AREF (spec, 2); 5981 eol_type = AREF (spec, 2);
6009 if (VECTORP (eol_type)) 5982 if (VECTORP (eol_type))
6010 { 5983 {
6011 Lisp_Object parent_eol_type; 5984 /* Format of end-of-line decided by system.
5985 This is Qunix on Unix and Mac, Qdos on DOS/Windows.
5986 This has an effect only for external encoding (i.e., for output to
5987 file and process), not for in-buffer or Lisp string encoding. */
5988 Lisp_Object system_eol_type = Qunix;
5989 #ifdef DOS_NT
5990 system_eol_type = Qdos;
5991 #endif
6012 5992
5993 Lisp_Object parent_eol_type = system_eol_type;
6013 if (! NILP (parent)) 5994 if (! NILP (parent))
6014 { 5995 {
6015 Lisp_Object parent_spec;
6016
6017 CHECK_CODING_SYSTEM (parent); 5996 CHECK_CODING_SYSTEM (parent);
6018 parent_spec = CODING_SYSTEM_SPEC (parent); 5997 Lisp_Object parent_spec = CODING_SYSTEM_SPEC (parent);
6019 parent_eol_type = AREF (parent_spec, 2); 5998 Lisp_Object pspec_type = AREF (parent_spec, 2);
6020 if (VECTORP (parent_eol_type)) 5999 if (!VECTORP (pspec_type))
6021 parent_eol_type = system_eol_type; 6000 parent_eol_type = pspec_type;
6022 } 6001 }
6023 else
6024 parent_eol_type = system_eol_type;
6025 if (EQ (parent_eol_type, Qunix)) 6002 if (EQ (parent_eol_type, Qunix))
6026 coding_system = AREF (eol_type, 0); 6003 coding_system = AREF (eol_type, 0);
6027 else if (EQ (parent_eol_type, Qdos)) 6004 else if (EQ (parent_eol_type, Qdos))
@@ -6534,9 +6511,9 @@ detect_coding (struct coding_system *coding)
6534 { 6511 {
6535 int c, i; 6512 int c, i;
6536 struct coding_detection_info detect_info; 6513 struct coding_detection_info detect_info;
6537 bool null_byte_found = 0, eight_bit_found = 0; 6514 bool nul_byte_found = 0, eight_bit_found = 0;
6538 bool inhibit_nbd = inhibit_flag (coding->spec.undecided.inhibit_nbd, 6515 bool inhibit_nbd = inhibit_flag (coding->spec.undecided.inhibit_nbd,
6539 inhibit_null_byte_detection); 6516 inhibit_nul_byte_detection);
6540 bool inhibit_ied = inhibit_flag (coding->spec.undecided.inhibit_ied, 6517 bool inhibit_ied = inhibit_flag (coding->spec.undecided.inhibit_ied,
6541 inhibit_iso_escape_detection); 6518 inhibit_iso_escape_detection);
6542 bool prefer_utf_8 = coding->spec.undecided.prefer_utf_8; 6519 bool prefer_utf_8 = coding->spec.undecided.prefer_utf_8;
@@ -6549,7 +6526,7 @@ detect_coding (struct coding_system *coding)
6549 if (c & 0x80) 6526 if (c & 0x80)
6550 { 6527 {
6551 eight_bit_found = 1; 6528 eight_bit_found = 1;
6552 if (null_byte_found) 6529 if (nul_byte_found)
6553 break; 6530 break;
6554 } 6531 }
6555 else if (c < 0x20) 6532 else if (c < 0x20)
@@ -6564,7 +6541,7 @@ detect_coding (struct coding_system *coding)
6564 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE)) 6541 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
6565 { 6542 {
6566 /* We didn't find an 8-bit code. We may 6543 /* We didn't find an 8-bit code. We may
6567 have found a null-byte, but it's very 6544 have found a NUL-byte, but it's very
6568 rare that a binary file conforms to 6545 rare that a binary file conforms to
6569 ISO-2022. */ 6546 ISO-2022. */
6570 src = src_end; 6547 src = src_end;
@@ -6576,7 +6553,7 @@ detect_coding (struct coding_system *coding)
6576 } 6553 }
6577 else if (! c && !inhibit_nbd) 6554 else if (! c && !inhibit_nbd)
6578 { 6555 {
6579 null_byte_found = 1; 6556 nul_byte_found = 1;
6580 if (eight_bit_found) 6557 if (eight_bit_found)
6581 break; 6558 break;
6582 } 6559 }
@@ -6608,7 +6585,7 @@ detect_coding (struct coding_system *coding)
6608 coding->head_ascii++; 6585 coding->head_ascii++;
6609 } 6586 }
6610 6587
6611 if (null_byte_found || eight_bit_found 6588 if (nul_byte_found || eight_bit_found
6612 || coding->head_ascii < coding->src_bytes 6589 || coding->head_ascii < coding->src_bytes
6613 || detect_info.found) 6590 || detect_info.found)
6614 { 6591 {
@@ -6626,7 +6603,7 @@ detect_coding (struct coding_system *coding)
6626 } 6603 }
6627 else 6604 else
6628 { 6605 {
6629 if (null_byte_found) 6606 if (nul_byte_found)
6630 { 6607 {
6631 detect_info.checked |= ~CATEGORY_MASK_UTF_16; 6608 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6632 detect_info.rejected |= ~CATEGORY_MASK_UTF_16; 6609 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
@@ -6699,7 +6676,7 @@ detect_coding (struct coding_system *coding)
6699 else 6676 else
6700 found = CODING_ID_NAME (this->id); 6677 found = CODING_ID_NAME (this->id);
6701 } 6678 }
6702 else if (null_byte_found) 6679 else if (nul_byte_found)
6703 found = Qno_conversion; 6680 found = Qno_conversion;
6704 else if ((detect_info.rejected & CATEGORY_MASK_ANY) 6681 else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6705 == CATEGORY_MASK_ANY) 6682 == CATEGORY_MASK_ANY)
@@ -7805,7 +7782,7 @@ encode_coding (struct coding_system *coding)
7805 7782
7806 7783
7807/* Name (or base name) of work buffer for code conversion. */ 7784/* Name (or base name) of work buffer for code conversion. */
7808static Lisp_Object Vcode_conversion_workbuf_name; 7785Lisp_Object Vcode_conversion_workbuf_name;
7809 7786
7810/* A working buffer used by the top level conversion. Once it is 7787/* A working buffer used by the top level conversion. Once it is
7811 created, it is never destroyed. It has the name 7788 created, it is never destroyed. It has the name
@@ -7817,43 +7794,6 @@ static Lisp_Object Vcode_conversion_reused_workbuf;
7817/* True iff Vcode_conversion_reused_workbuf is already in use. */ 7794/* True iff Vcode_conversion_reused_workbuf is already in use. */
7818static bool reused_workbuf_in_use; 7795static bool reused_workbuf_in_use;
7819 7796
7820
7821/* Return a working buffer of code conversion. MULTIBYTE specifies the
7822 multibyteness of returning buffer. */
7823
7824static Lisp_Object
7825make_conversion_work_buffer (bool multibyte)
7826{
7827 Lisp_Object name, workbuf;
7828 struct buffer *current;
7829
7830 if (reused_workbuf_in_use)
7831 {
7832 name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7833 workbuf = Fget_buffer_create (name);
7834 }
7835 else
7836 {
7837 reused_workbuf_in_use = 1;
7838 if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
7839 Vcode_conversion_reused_workbuf
7840 = Fget_buffer_create (Vcode_conversion_workbuf_name);
7841 workbuf = Vcode_conversion_reused_workbuf;
7842 }
7843 current = current_buffer;
7844 set_buffer_internal (XBUFFER (workbuf));
7845 /* We can't allow modification hooks to run in the work buffer. For
7846 instance, directory_files_internal assumes that file decoding
7847 doesn't compile new regexps. */
7848 Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
7849 Ferase_buffer ();
7850 bset_undo_list (current_buffer, Qt);
7851 bset_enable_multibyte_characters (current_buffer, multibyte ? Qt : Qnil);
7852 set_buffer_internal (current);
7853 return workbuf;
7854}
7855
7856
7857static void 7797static void
7858code_conversion_restore (Lisp_Object arg) 7798code_conversion_restore (Lisp_Object arg)
7859{ 7799{
@@ -7877,9 +7817,39 @@ code_conversion_save (bool with_work_buf, bool multibyte)
7877 Lisp_Object workbuf = Qnil; 7817 Lisp_Object workbuf = Qnil;
7878 7818
7879 if (with_work_buf) 7819 if (with_work_buf)
7880 workbuf = make_conversion_work_buffer (multibyte); 7820 {
7821 if (reused_workbuf_in_use)
7822 {
7823 Lisp_Object name
7824 = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7825 workbuf = Fget_buffer_create (name);
7826 }
7827 else
7828 {
7829 if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
7830 Vcode_conversion_reused_workbuf
7831 = Fget_buffer_create (Vcode_conversion_workbuf_name);
7832 workbuf = Vcode_conversion_reused_workbuf;
7833 }
7834 }
7881 record_unwind_protect (code_conversion_restore, 7835 record_unwind_protect (code_conversion_restore,
7882 Fcons (Fcurrent_buffer (), workbuf)); 7836 Fcons (Fcurrent_buffer (), workbuf));
7837 if (!NILP (workbuf))
7838 {
7839 struct buffer *current = current_buffer;
7840 set_buffer_internal (XBUFFER (workbuf));
7841 /* We can't allow modification hooks to run in the work buffer. For
7842 instance, directory_files_internal assumes that file decoding
7843 doesn't compile new regexps. */
7844 Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
7845 Ferase_buffer ();
7846 bset_undo_list (current_buffer, Qt);
7847 bset_enable_multibyte_characters (current_buffer, multibyte ? Qt : Qnil);
7848 if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7849 reused_workbuf_in_use = 1;
7850 set_buffer_internal (current);
7851 }
7852
7883 return workbuf; 7853 return workbuf;
7884} 7854}
7885 7855
@@ -8468,7 +8438,7 @@ from_unicode (Lisp_Object str)
8468Lisp_Object 8438Lisp_Object
8469from_unicode_buffer (const wchar_t *wstr) 8439from_unicode_buffer (const wchar_t *wstr)
8470{ 8440{
8471 /* We get one of the two final null bytes for free. */ 8441 /* We get one of the two final NUL bytes for free. */
8472 ptrdiff_t len = 1 + sizeof (wchar_t) * wcslen (wstr); 8442 ptrdiff_t len = 1 + sizeof (wchar_t) * wcslen (wstr);
8473 AUTO_STRING_WITH_LEN (str, (char *) wstr, len); 8443 AUTO_STRING_WITH_LEN (str, (char *) wstr, len);
8474 return from_unicode (str); 8444 return from_unicode (str);
@@ -8481,7 +8451,7 @@ to_unicode (Lisp_Object str, Lisp_Object *buf)
8481 /* We need to make another copy (in addition to the one made by 8451 /* We need to make another copy (in addition to the one made by
8482 code_convert_string_norecord) to ensure that the final string is 8452 code_convert_string_norecord) to ensure that the final string is
8483 _doubly_ zero terminated --- that is, that the string is 8453 _doubly_ zero terminated --- that is, that the string is
8484 terminated by two zero bytes and one utf-16le null character. 8454 terminated by two zero bytes and one utf-16le NUL character.
8485 Because strings are already terminated with a single zero byte, 8455 Because strings are already terminated with a single zero byte,
8486 we just add one additional zero. */ 8456 we just add one additional zero. */
8487 str = make_uninit_string (SBYTES (*buf) + 1); 8457 str = make_uninit_string (SBYTES (*buf) + 1);
@@ -8494,7 +8464,6 @@ to_unicode (Lisp_Object str, Lisp_Object *buf)
8494#endif /* WINDOWSNT || CYGWIN */ 8464#endif /* WINDOWSNT || CYGWIN */
8495 8465
8496 8466
8497#ifdef emacs
8498/*** 8. Emacs Lisp library functions ***/ 8467/*** 8. Emacs Lisp library functions ***/
8499 8468
8500DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0, 8469DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
@@ -8598,7 +8567,7 @@ detect_coding_system (const unsigned char *src,
8598 ptrdiff_t id; 8567 ptrdiff_t id;
8599 struct coding_detection_info detect_info; 8568 struct coding_detection_info detect_info;
8600 enum coding_category base_category; 8569 enum coding_category base_category;
8601 bool null_byte_found = 0, eight_bit_found = 0; 8570 bool nul_byte_found = 0, eight_bit_found = 0;
8602 8571
8603 if (NILP (coding_system)) 8572 if (NILP (coding_system))
8604 coding_system = Qundecided; 8573 coding_system = Qundecided;
@@ -8625,7 +8594,7 @@ detect_coding_system (const unsigned char *src,
8625 struct coding_system *this UNINIT; 8594 struct coding_system *this UNINIT;
8626 int c, i; 8595 int c, i;
8627 bool inhibit_nbd = inhibit_flag (coding.spec.undecided.inhibit_nbd, 8596 bool inhibit_nbd = inhibit_flag (coding.spec.undecided.inhibit_nbd,
8628 inhibit_null_byte_detection); 8597 inhibit_nul_byte_detection);
8629 bool inhibit_ied = inhibit_flag (coding.spec.undecided.inhibit_ied, 8598 bool inhibit_ied = inhibit_flag (coding.spec.undecided.inhibit_ied,
8630 inhibit_iso_escape_detection); 8599 inhibit_iso_escape_detection);
8631 bool prefer_utf_8 = coding.spec.undecided.prefer_utf_8; 8600 bool prefer_utf_8 = coding.spec.undecided.prefer_utf_8;
@@ -8637,7 +8606,7 @@ detect_coding_system (const unsigned char *src,
8637 if (c & 0x80) 8606 if (c & 0x80)
8638 { 8607 {
8639 eight_bit_found = 1; 8608 eight_bit_found = 1;
8640 if (null_byte_found) 8609 if (nul_byte_found)
8641 break; 8610 break;
8642 } 8611 }
8643 else if (c < 0x20) 8612 else if (c < 0x20)
@@ -8652,7 +8621,7 @@ detect_coding_system (const unsigned char *src,
8652 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE)) 8621 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
8653 { 8622 {
8654 /* We didn't find an 8-bit code. We may 8623 /* We didn't find an 8-bit code. We may
8655 have found a null-byte, but it's very 8624 have found a NUL-byte, but it's very
8656 rare that a binary file confirm to 8625 rare that a binary file confirm to
8657 ISO-2022. */ 8626 ISO-2022. */
8658 src = src_end; 8627 src = src_end;
@@ -8664,7 +8633,7 @@ detect_coding_system (const unsigned char *src,
8664 } 8633 }
8665 else if (! c && !inhibit_nbd) 8634 else if (! c && !inhibit_nbd)
8666 { 8635 {
8667 null_byte_found = 1; 8636 nul_byte_found = 1;
8668 if (eight_bit_found) 8637 if (eight_bit_found)
8669 break; 8638 break;
8670 } 8639 }
@@ -8675,7 +8644,7 @@ detect_coding_system (const unsigned char *src,
8675 coding.head_ascii++; 8644 coding.head_ascii++;
8676 } 8645 }
8677 8646
8678 if (null_byte_found || eight_bit_found 8647 if (nul_byte_found || eight_bit_found
8679 || coding.head_ascii < coding.src_bytes 8648 || coding.head_ascii < coding.src_bytes
8680 || detect_info.found) 8649 || detect_info.found)
8681 { 8650 {
@@ -8690,7 +8659,7 @@ detect_coding_system (const unsigned char *src,
8690 } 8659 }
8691 else 8660 else
8692 { 8661 {
8693 if (null_byte_found) 8662 if (nul_byte_found)
8694 { 8663 {
8695 detect_info.checked |= ~CATEGORY_MASK_UTF_16; 8664 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
8696 detect_info.rejected |= ~CATEGORY_MASK_UTF_16; 8665 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
@@ -8737,24 +8706,24 @@ detect_coding_system (const unsigned char *src,
8737 } 8706 }
8738 8707
8739 if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY 8708 if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY
8740 || null_byte_found) 8709 || nul_byte_found)
8741 { 8710 {
8742 detect_info.found = CATEGORY_MASK_RAW_TEXT; 8711 detect_info.found = CATEGORY_MASK_RAW_TEXT;
8743 id = CODING_SYSTEM_ID (Qno_conversion); 8712 id = CODING_SYSTEM_ID (Qno_conversion);
8744 val = list1 (make_fixnum (id)); 8713 val = list1i (id);
8745 } 8714 }
8746 else if (! detect_info.rejected && ! detect_info.found) 8715 else if (! detect_info.rejected && ! detect_info.found)
8747 { 8716 {
8748 detect_info.found = CATEGORY_MASK_ANY; 8717 detect_info.found = CATEGORY_MASK_ANY;
8749 id = coding_categories[coding_category_undecided].id; 8718 id = coding_categories[coding_category_undecided].id;
8750 val = list1 (make_fixnum (id)); 8719 val = list1i (id);
8751 } 8720 }
8752 else if (highest) 8721 else if (highest)
8753 { 8722 {
8754 if (detect_info.found) 8723 if (detect_info.found)
8755 { 8724 {
8756 detect_info.found = 1 << category; 8725 detect_info.found = 1 << category;
8757 val = list1 (make_fixnum (this->id)); 8726 val = list1i (this->id);
8758 } 8727 }
8759 else 8728 else
8760 for (i = 0; i < coding_category_raw_text; i++) 8729 for (i = 0; i < coding_category_raw_text; i++)
@@ -8762,7 +8731,7 @@ detect_coding_system (const unsigned char *src,
8762 { 8731 {
8763 detect_info.found = 1 << coding_priorities[i]; 8732 detect_info.found = 1 << coding_priorities[i];
8764 id = coding_categories[coding_priorities[i]].id; 8733 id = coding_categories[coding_priorities[i]].id;
8765 val = list1 (make_fixnum (id)); 8734 val = list1i (id);
8766 break; 8735 break;
8767 } 8736 }
8768 } 8737 }
@@ -8779,7 +8748,7 @@ detect_coding_system (const unsigned char *src,
8779 found |= 1 << category; 8748 found |= 1 << category;
8780 id = coding_categories[category].id; 8749 id = coding_categories[category].id;
8781 if (id >= 0) 8750 if (id >= 0)
8782 val = list1 (make_fixnum (id)); 8751 val = list1i (id);
8783 } 8752 }
8784 } 8753 }
8785 for (i = coding_category_raw_text - 1; i >= 0; i--) 8754 for (i = coding_category_raw_text - 1; i >= 0; i--)
@@ -8804,7 +8773,7 @@ detect_coding_system (const unsigned char *src,
8804 this = coding_categories + coding_category_utf_8_sig; 8773 this = coding_categories + coding_category_utf_8_sig;
8805 else 8774 else
8806 this = coding_categories + coding_category_utf_8_nosig; 8775 this = coding_categories + coding_category_utf_8_nosig;
8807 val = list1 (make_fixnum (this->id)); 8776 val = list1i (this->id);
8808 } 8777 }
8809 } 8778 }
8810 else if (base_category == coding_category_utf_16_auto) 8779 else if (base_category == coding_category_utf_16_auto)
@@ -8821,13 +8790,13 @@ detect_coding_system (const unsigned char *src,
8821 this = coding_categories + coding_category_utf_16_be_nosig; 8790 this = coding_categories + coding_category_utf_16_be_nosig;
8822 else 8791 else
8823 this = coding_categories + coding_category_utf_16_le_nosig; 8792 this = coding_categories + coding_category_utf_16_le_nosig;
8824 val = list1 (make_fixnum (this->id)); 8793 val = list1i (this->id);
8825 } 8794 }
8826 } 8795 }
8827 else 8796 else
8828 { 8797 {
8829 detect_info.found = 1 << XFIXNUM (CODING_ATTR_CATEGORY (attrs)); 8798 detect_info.found = 1 << XFIXNUM (CODING_ATTR_CATEGORY (attrs));
8830 val = list1 (make_fixnum (coding.id)); 8799 val = list1i (coding.id);
8831 } 8800 }
8832 8801
8833 /* Then, detect eol-format if necessary. */ 8802 /* Then, detect eol-format if necessary. */
@@ -8839,7 +8808,7 @@ detect_coding_system (const unsigned char *src,
8839 { 8808 {
8840 if (detect_info.found & ~CATEGORY_MASK_UTF_16) 8809 if (detect_info.found & ~CATEGORY_MASK_UTF_16)
8841 { 8810 {
8842 if (null_byte_found) 8811 if (nul_byte_found)
8843 normal_eol = EOL_SEEN_LF; 8812 normal_eol = EOL_SEEN_LF;
8844 else 8813 else
8845 normal_eol = detect_eol (coding.source, src_bytes, 8814 normal_eol = detect_eol (coding.source, src_bytes,
@@ -9770,7 +9739,7 @@ DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_intern
9770 tset_charset_list 9739 tset_charset_list
9771 (term, (terminal_coding->common_flags & CODING_REQUIRE_ENCODING_MASK 9740 (term, (terminal_coding->common_flags & CODING_REQUIRE_ENCODING_MASK
9772 ? coding_charset_list (terminal_coding) 9741 ? coding_charset_list (terminal_coding)
9773 : list1 (make_fixnum (charset_ascii)))); 9742 : list1i (charset_ascii)));
9774 return Qnil; 9743 return Qnil;
9775} 9744}
9776 9745
@@ -10271,15 +10240,9 @@ usage: (define-coding-system-internal ...) */)
10271 else 10240 else
10272 { 10241 {
10273 CHECK_CONS (val); 10242 CHECK_CONS (val);
10274 CHECK_FIXNAT_CAR (val); 10243 CHECK_RANGED_INTEGER (XCAR (val), 0, 255);
10275 CHECK_FIXNUM_CDR (val);
10276 if (XFIXNUM (XCAR (val)) > 255)
10277 args_out_of_range_3 (XCAR (val),
10278 make_fixnum (0), make_fixnum (255));
10279 from = XFIXNUM (XCAR (val)); 10244 from = XFIXNUM (XCAR (val));
10280 if (! (from <= XFIXNUM (XCDR (val)) && XFIXNUM (XCDR (val)) <= 255)) 10245 CHECK_RANGED_INTEGER (XCDR (val), from, 255);
10281 args_out_of_range_3 (XCDR (val),
10282 XCAR (val), make_fixnum (255));
10283 to = XFIXNUM (XCDR (val)); 10246 to = XFIXNUM (XCDR (val));
10284 } 10247 }
10285 for (int i = from; i <= to; i++) 10248 for (int i = from; i <= to; i++)
@@ -10354,23 +10317,18 @@ usage: (define-coding-system-internal ...) */)
10354 10317
10355 reg_usage = args[coding_arg_iso2022_reg_usage]; 10318 reg_usage = args[coding_arg_iso2022_reg_usage];
10356 CHECK_CONS (reg_usage); 10319 CHECK_CONS (reg_usage);
10357 CHECK_FIXNUM_CAR (reg_usage); 10320 CHECK_FIXNUM (XCAR (reg_usage));
10358 CHECK_FIXNUM_CDR (reg_usage); 10321 CHECK_FIXNUM (XCDR (reg_usage));
10359 10322
10360 request = Fcopy_sequence (args[coding_arg_iso2022_request]); 10323 request = Fcopy_sequence (args[coding_arg_iso2022_request]);
10361 for (Lisp_Object tail = request; CONSP (tail); tail = XCDR (tail)) 10324 for (Lisp_Object tail = request; CONSP (tail); tail = XCDR (tail))
10362 { 10325 {
10363 int id; 10326 int id;
10364 Lisp_Object tmp1;
10365 10327
10366 val = XCAR (tail); 10328 val = XCAR (tail);
10367 CHECK_CONS (val); 10329 CHECK_CONS (val);
10368 tmp1 = XCAR (val); 10330 CHECK_CHARSET_GET_ID (XCAR (val), id);
10369 CHECK_CHARSET_GET_ID (tmp1, id); 10331 CHECK_RANGED_INTEGER (XCDR (val), 0, 3);
10370 CHECK_FIXNAT_CDR (val);
10371 if (XFIXNUM (XCDR (val)) >= 4)
10372 error ("Invalid graphic register number: %"pI"d",
10373 XFIXNUM (XCDR (val)));
10374 XSETCAR (val, make_fixnum (id)); 10332 XSETCAR (val, make_fixnum (id));
10375 } 10333 }
10376 10334
@@ -10419,14 +10377,11 @@ usage: (define-coding-system-internal ...) */)
10419 } 10377 }
10420 else if (EQ (coding_type, Qshift_jis)) 10378 else if (EQ (coding_type, Qshift_jis))
10421 { 10379 {
10422 10380 ptrdiff_t charset_list_len = list_length (charset_list);
10423 struct charset *charset; 10381 if (charset_list_len != 3 && charset_list_len != 4)
10424
10425 if (XFIXNUM (Flength (charset_list)) != 3
10426 && XFIXNUM (Flength (charset_list)) != 4)
10427 error ("There should be three or four charsets"); 10382 error ("There should be three or four charsets");
10428 10383
10429 charset = CHARSET_FROM_ID (XFIXNUM (XCAR (charset_list))); 10384 struct charset *charset = CHARSET_FROM_ID (XFIXNUM (XCAR (charset_list)));
10430 if (CHARSET_DIMENSION (charset) != 1) 10385 if (CHARSET_DIMENSION (charset) != 1)
10431 error ("Dimension of charset %s is not one", 10386 error ("Dimension of charset %s is not one",
10432 SDATA (SYMBOL_NAME (CHARSET_NAME (charset)))); 10387 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
@@ -10461,7 +10416,7 @@ usage: (define-coding-system-internal ...) */)
10461 { 10416 {
10462 struct charset *charset; 10417 struct charset *charset;
10463 10418
10464 if (XFIXNUM (Flength (charset_list)) != 2) 10419 if (list_length (charset_list) != 2)
10465 error ("There should be just two charsets"); 10420 error ("There should be just two charsets");
10466 10421
10467 charset = CHARSET_FROM_ID (XFIXNUM (XCAR (charset_list))); 10422 charset = CHARSET_FROM_ID (XFIXNUM (XCAR (charset_list)));
@@ -10513,8 +10468,8 @@ usage: (define-coding-system-internal ...) */)
10513 { 10468 {
10514 if (nargs < coding_arg_undecided_max) 10469 if (nargs < coding_arg_undecided_max)
10515 goto short_args; 10470 goto short_args;
10516 ASET (attrs, coding_attr_undecided_inhibit_null_byte_detection, 10471 ASET (attrs, coding_attr_undecided_inhibit_nul_byte_detection,
10517 args[coding_arg_undecided_inhibit_null_byte_detection]); 10472 args[coding_arg_undecided_inhibit_nul_byte_detection]);
10518 ASET (attrs, coding_attr_undecided_inhibit_iso_escape_detection, 10473 ASET (attrs, coding_attr_undecided_inhibit_iso_escape_detection,
10519 args[coding_arg_undecided_inhibit_iso_escape_detection]); 10474 args[coding_arg_undecided_inhibit_iso_escape_detection]);
10520 ASET (attrs, coding_attr_undecided_prefer_utf_8, 10475 ASET (attrs, coding_attr_undecided_prefer_utf_8,
@@ -10759,8 +10714,6 @@ coding system whose eol-type is N. */)
10759 return make_fixnum (n); 10714 return make_fixnum (n);
10760} 10715}
10761 10716
10762#endif /* emacs */
10763
10764 10717
10765/*** 9. Post-amble ***/ 10718/*** 9. Post-amble ***/
10766 10719
@@ -10775,6 +10728,9 @@ init_coding_once (void)
10775 coding_priorities[i] = i; 10728 coding_priorities[i] = i;
10776 } 10729 }
10777 10730
10731 PDUMPER_REMEMBER_SCALAR (coding_categories);
10732 PDUMPER_REMEMBER_SCALAR (coding_priorities);
10733
10778 /* ISO2022 specific initialize routine. */ 10734 /* ISO2022 specific initialize routine. */
10779 for (i = 0; i < 0x20; i++) 10735 for (i = 0; i < 0x20; i++)
10780 iso_code_class[i] = ISO_control_0; 10736 iso_code_class[i] = ISO_control_0;
@@ -10794,6 +10750,8 @@ init_coding_once (void)
10794 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3; 10750 iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
10795 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer; 10751 iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
10796 10752
10753 PDUMPER_REMEMBER_SCALAR (iso_code_class);
10754
10797 for (i = 0; i < 256; i++) 10755 for (i = 0; i < 256; i++)
10798 { 10756 {
10799 emacs_mule_bytes[i] = 1; 10757 emacs_mule_bytes[i] = 1;
@@ -10802,9 +10760,11 @@ init_coding_once (void)
10802 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3; 10760 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
10803 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4; 10761 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
10804 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4; 10762 emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
10763
10764 PDUMPER_REMEMBER_SCALAR (emacs_mule_bytes);
10805} 10765}
10806 10766
10807#ifdef emacs 10767static void reset_coding_after_pdumper_load (void);
10808 10768
10809void 10769void
10810syms_of_coding (void) 10770syms_of_coding (void)
@@ -10825,6 +10785,7 @@ syms_of_coding (void)
10825 Vcode_conversion_workbuf_name = build_pure_c_string (" *code-conversion-work*"); 10785 Vcode_conversion_workbuf_name = build_pure_c_string (" *code-conversion-work*");
10826 10786
10827 reused_workbuf_in_use = 0; 10787 reused_workbuf_in_use = 0;
10788 PDUMPER_REMEMBER_SCALAR (reused_workbuf_in_use);
10828 10789
10829 DEFSYM (Qcharset, "charset"); 10790 DEFSYM (Qcharset, "charset");
10830 DEFSYM (Qtarget_idx, "target-idx"); 10791 DEFSYM (Qtarget_idx, "target-idx");
@@ -10860,6 +10821,7 @@ syms_of_coding (void)
10860 DEFSYM (Qundecided, "undecided"); 10821 DEFSYM (Qundecided, "undecided");
10861 DEFSYM (Qno_conversion, "no-conversion"); 10822 DEFSYM (Qno_conversion, "no-conversion");
10862 DEFSYM (Qraw_text, "raw-text"); 10823 DEFSYM (Qraw_text, "raw-text");
10824 DEFSYM (Qus_ascii, "us-ascii");
10863 10825
10864 DEFSYM (Qiso_2022, "iso-2022"); 10826 DEFSYM (Qiso_2022, "iso-2022");
10865 10827
@@ -10884,7 +10846,7 @@ syms_of_coding (void)
10884 /* Error signaled when there's a problem with detecting a coding system. */ 10846 /* Error signaled when there's a problem with detecting a coding system. */
10885 DEFSYM (Qcoding_system_error, "coding-system-error"); 10847 DEFSYM (Qcoding_system_error, "coding-system-error");
10886 Fput (Qcoding_system_error, Qerror_conditions, 10848 Fput (Qcoding_system_error, Qerror_conditions,
10887 listn (CONSTYPE_PURE, 2, Qcoding_system_error, Qerror)); 10849 pure_list (Qcoding_system_error, Qerror));
10888 Fput (Qcoding_system_error, Qerror_message, 10850 Fput (Qcoding_system_error, Qerror_message,
10889 build_pure_c_string ("Invalid coding system")); 10851 build_pure_c_string ("Invalid coding system"));
10890 10852
@@ -11262,18 +11224,18 @@ to explicitly specify some coding system that doesn't use ISO-2022
11262escape sequence (e.g., `latin-1') on reading by \\[universal-coding-system-argument]. */); 11224escape sequence (e.g., `latin-1') on reading by \\[universal-coding-system-argument]. */);
11263 inhibit_iso_escape_detection = 0; 11225 inhibit_iso_escape_detection = 0;
11264 11226
11265 DEFVAR_BOOL ("inhibit-null-byte-detection", 11227 DEFVAR_BOOL ("inhibit-nul-byte-detection",
11266 inhibit_null_byte_detection, 11228 inhibit_nul_byte_detection,
11267 doc: /* If non-nil, Emacs ignores null bytes on code detection. 11229 doc: /* If non-nil, Emacs ignores NUL bytes on code detection.
11268By default, Emacs treats it as binary data, and does not attempt to 11230By default, Emacs treats it as binary data, and does not attempt to
11269decode it. The effect is as if you specified `no-conversion' for 11231decode it. The effect is as if you specified `no-conversion' for
11270reading that text. 11232reading that text.
11271 11233
11272Set this to non-nil when a regular text happens to include null bytes. 11234Set this to non-nil when a regular text happens to include NUL bytes.
11273Examples are Index nodes of Info files and null-byte delimited output 11235Examples are Index nodes of Info files and NUL-byte delimited output
11274from GNU Find and GNU Grep. Emacs will then ignore the null bytes and 11236from GNU Find and GNU Grep. Emacs will then ignore the NUL bytes and
11275decode text as usual. */); 11237decode text as usual. */);
11276 inhibit_null_byte_detection = 0; 11238 inhibit_nul_byte_detection = 0;
11277 11239
11278 DEFVAR_BOOL ("disable-ascii-optimization", disable_ascii_optimization, 11240 DEFVAR_BOOL ("disable-ascii-optimization", disable_ascii_optimization,
11279 doc: /* If non-nil, Emacs does not optimize code decoder for ASCII files. 11241 doc: /* If non-nil, Emacs does not optimize code decoder for ASCII files.
@@ -11326,13 +11288,13 @@ internal character representation. */);
11326 /* This is already set. 11288 /* This is already set.
11327 plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */ 11289 plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
11328 plist[8] = intern_c_string (":charset-list"); 11290 plist[8] = intern_c_string (":charset-list");
11329 plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil); 11291 plist[9] = args[coding_arg_charset_list] = list1 (Qascii);
11330 plist[11] = args[coding_arg_for_unibyte] = Qnil; 11292 plist[11] = args[coding_arg_for_unibyte] = Qnil;
11331 plist[13] = build_pure_c_string ("No conversion on encoding, " 11293 plist[13] = build_pure_c_string ("No conversion on encoding, "
11332 "automatic conversion on decoding."); 11294 "automatic conversion on decoding.");
11333 plist[15] = args[coding_arg_eol_type] = Qnil; 11295 plist[15] = args[coding_arg_eol_type] = Qnil;
11334 args[coding_arg_plist] = CALLMANY (Flist, plist); 11296 args[coding_arg_plist] = CALLMANY (Flist, plist);
11335 args[coding_arg_undecided_inhibit_null_byte_detection] = make_fixnum (0); 11297 args[coding_arg_undecided_inhibit_nul_byte_detection] = make_fixnum (0);
11336 args[coding_arg_undecided_inhibit_iso_escape_detection] = make_fixnum (0); 11298 args[coding_arg_undecided_inhibit_iso_escape_detection] = make_fixnum (0);
11337 Fdefine_coding_system_internal (coding_arg_undecided_max, args); 11299 Fdefine_coding_system_internal (coding_arg_undecided_max, args);
11338 11300
@@ -11341,11 +11303,31 @@ internal character representation. */);
11341 for (int i = 0; i < coding_category_max; i++) 11303 for (int i = 0; i < coding_category_max; i++)
11342 Fset (AREF (Vcoding_category_table, i), Qno_conversion); 11304 Fset (AREF (Vcoding_category_table, i), Qno_conversion);
11343 11305
11344#if defined (DOS_NT) 11306 pdumper_do_now_and_after_load (reset_coding_after_pdumper_load);
11345 system_eol_type = Qdos; 11307}
11346#else 11308
11347 system_eol_type = Qunix; 11309static void
11348#endif 11310reset_coding_after_pdumper_load (void)
11349 staticpro (&system_eol_type); 11311{
11312 if (!dumped_with_pdumper_p ())
11313 return;
11314 for (struct coding_system *this = &coding_categories[0];
11315 this < &coding_categories[coding_category_max];
11316 ++this)
11317 {
11318 int id = this->id;
11319 if (id >= 0)
11320 {
11321 /* Need to rebuild the coding system object because we
11322 persisted it as a scalar and it's full of gunk that's now
11323 invalid. */
11324 memset (this, 0, sizeof (*this));
11325 setup_coding_system (CODING_ID_NAME (id), this);
11326 }
11327 }
11328 /* In temacs the below is done by mule-conf.el, because we need to
11329 define us-ascii first. But in dumped Emacs us-ascii is restored
11330 by the above loop, and mule-conf.el will not be loaded, so we set
11331 it up now; otherwise safe_terminal_coding will remain zeroed. */
11332 Fset_safe_terminal_coding_system_internal (Qus_ascii);
11350} 11333}
11351#endif /* emacs */