diff options
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 1464 |
1 files changed, 932 insertions, 532 deletions
diff --git a/src/coding.c b/src/coding.c index 17e342298b9..c10fb375672 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* Coding system handler (conversion, detection, etc). | 1 | /* Coding system handler (conversion, detection, etc). |
| 2 | Copyright (C) 2001-2012 Free Software Foundation, Inc. | 2 | Copyright (C) 2001-2013 Free Software Foundation, Inc. |
| 3 | Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, | 3 | Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
| 4 | 2005, 2006, 2007, 2008, 2009, 2010, 2011 | 4 | 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
| 5 | National Institute of Advanced Industrial Science and Technology (AIST) | 5 | National Institute of Advanced Industrial Science and Technology (AIST) |
| @@ -147,18 +147,18 @@ STRUCT CODING_SYSTEM | |||
| 147 | CODING conforms to the format of XXX, and update the members of | 147 | CODING conforms to the format of XXX, and update the members of |
| 148 | DETECT_INFO. | 148 | DETECT_INFO. |
| 149 | 149 | ||
| 150 | Return 1 if the byte sequence conforms to XXX, otherwise return 0. | 150 | Return true if the byte sequence conforms to XXX. |
| 151 | 151 | ||
| 152 | Below is the template of these functions. */ | 152 | Below is the template of these functions. */ |
| 153 | 153 | ||
| 154 | #if 0 | 154 | #if 0 |
| 155 | static int | 155 | static bool |
| 156 | detect_coding_XXX (struct coding_system *coding, | 156 | detect_coding_XXX (struct coding_system *coding, |
| 157 | struct coding_detection_info *detect_info) | 157 | struct coding_detection_info *detect_info) |
| 158 | { | 158 | { |
| 159 | const unsigned char *src = coding->source; | 159 | const unsigned char *src = coding->source; |
| 160 | const unsigned char *src_end = coding->source + coding->src_bytes; | 160 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 161 | int multibytep = coding->src_multibyte; | 161 | bool multibytep = coding->src_multibyte; |
| 162 | ptrdiff_t consumed_chars = 0; | 162 | ptrdiff_t consumed_chars = 0; |
| 163 | int found = 0; | 163 | int found = 0; |
| 164 | ...; | 164 | ...; |
| @@ -212,7 +212,7 @@ decode_coding_XXXX (struct coding_system *coding) | |||
| 212 | /* A buffer to produce decoded characters. */ | 212 | /* A buffer to produce decoded characters. */ |
| 213 | int *charbuf = coding->charbuf + coding->charbuf_used; | 213 | int *charbuf = coding->charbuf + coding->charbuf_used; |
| 214 | int *charbuf_end = coding->charbuf + coding->charbuf_size; | 214 | int *charbuf_end = coding->charbuf + coding->charbuf_size; |
| 215 | int multibytep = coding->src_multibyte; | 215 | bool multibytep = coding->src_multibyte; |
| 216 | 216 | ||
| 217 | while (1) | 217 | while (1) |
| 218 | { | 218 | { |
| @@ -260,7 +260,7 @@ decode_coding_XXXX (struct coding_system *coding) | |||
| 260 | static void | 260 | static void |
| 261 | encode_coding_XXX (struct coding_system *coding) | 261 | encode_coding_XXX (struct coding_system *coding) |
| 262 | { | 262 | { |
| 263 | int multibytep = coding->dst_multibyte; | 263 | bool multibytep = coding->dst_multibyte; |
| 264 | int *charbuf = coding->charbuf; | 264 | int *charbuf = coding->charbuf; |
| 265 | int *charbuf_end = charbuf->charbuf + coding->charbuf_used; | 265 | int *charbuf_end = charbuf->charbuf + coding->charbuf_used; |
| 266 | unsigned char *dst = coding->destination + coding->produced; | 266 | unsigned char *dst = coding->destination + coding->produced; |
| @@ -285,11 +285,14 @@ encode_coding_XXX (struct coding_system *coding) | |||
| 285 | 285 | ||
| 286 | #include <config.h> | 286 | #include <config.h> |
| 287 | #include <stdio.h> | 287 | #include <stdio.h> |
| 288 | #include <setjmp.h> | 288 | |
| 289 | #ifdef HAVE_WCHAR_H | ||
| 290 | #include <wchar.h> | ||
| 291 | #endif /* HAVE_WCHAR_H */ | ||
| 289 | 292 | ||
| 290 | #include "lisp.h" | 293 | #include "lisp.h" |
| 291 | #include "buffer.h" | ||
| 292 | #include "character.h" | 294 | #include "character.h" |
| 295 | #include "buffer.h" | ||
| 293 | #include "charset.h" | 296 | #include "charset.h" |
| 294 | #include "ccl.h" | 297 | #include "ccl.h" |
| 295 | #include "composite.h" | 298 | #include "composite.h" |
| @@ -303,6 +306,7 @@ Lisp_Object Vcoding_system_hash_table; | |||
| 303 | static Lisp_Object Qcoding_system, Qeol_type; | 306 | static Lisp_Object Qcoding_system, Qeol_type; |
| 304 | static Lisp_Object Qcoding_aliases; | 307 | static Lisp_Object Qcoding_aliases; |
| 305 | Lisp_Object Qunix, Qdos; | 308 | Lisp_Object Qunix, Qdos; |
| 309 | static Lisp_Object Qmac; | ||
| 306 | Lisp_Object Qbuffer_file_coding_system; | 310 | Lisp_Object Qbuffer_file_coding_system; |
| 307 | static Lisp_Object Qpost_read_conversion, Qpre_write_conversion; | 311 | static Lisp_Object Qpost_read_conversion, Qpre_write_conversion; |
| 308 | static Lisp_Object Qdefault_char; | 312 | static Lisp_Object Qdefault_char; |
| @@ -322,8 +326,7 @@ Lisp_Object Qcall_process, Qcall_process_region; | |||
| 322 | Lisp_Object Qstart_process, Qopen_network_stream; | 326 | Lisp_Object Qstart_process, Qopen_network_stream; |
| 323 | static Lisp_Object Qtarget_idx; | 327 | static Lisp_Object Qtarget_idx; |
| 324 | 328 | ||
| 325 | static Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source; | 329 | static Lisp_Object Qinsufficient_source, Qinvalid_source, Qinterrupted; |
| 326 | static Lisp_Object Qinterrupted, Qinsufficient_memory; | ||
| 327 | 330 | ||
| 328 | /* If a symbol has this property, evaluate the value to define the | 331 | /* If a symbol has this property, evaluate the value to define the |
| 329 | symbol as a coding system. */ | 332 | symbol as a coding system. */ |
| @@ -344,6 +347,10 @@ Lisp_Object Qcoding_system_p, Qcoding_system_error; | |||
| 344 | Lisp_Object Qemacs_mule, Qraw_text; | 347 | Lisp_Object Qemacs_mule, Qraw_text; |
| 345 | Lisp_Object Qutf_8_emacs; | 348 | Lisp_Object Qutf_8_emacs; |
| 346 | 349 | ||
| 350 | #if defined (WINDOWSNT) || defined (CYGWIN) | ||
| 351 | static Lisp_Object Qutf_16le; | ||
| 352 | #endif | ||
| 353 | |||
| 347 | /* Coding-systems are handed between Emacs Lisp programs and C internal | 354 | /* Coding-systems are handed between Emacs Lisp programs and C internal |
| 348 | routines by the following three variables. */ | 355 | routines by the following three variables. */ |
| 349 | /* Coding system to be used to encode text for terminal display when | 356 | /* Coding system to be used to encode text for terminal display when |
| @@ -416,7 +423,7 @@ enum iso_code_class_type | |||
| 416 | ISO_shift_out, /* ISO_CODE_SO (0x0E) */ | 423 | ISO_shift_out, /* ISO_CODE_SO (0x0E) */ |
| 417 | ISO_shift_in, /* ISO_CODE_SI (0x0F) */ | 424 | ISO_shift_in, /* ISO_CODE_SI (0x0F) */ |
| 418 | ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */ | 425 | ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */ |
| 419 | ISO_escape, /* ISO_CODE_SO (0x1B) */ | 426 | ISO_escape, /* ISO_CODE_ESC (0x1B) */ |
| 420 | ISO_control_1, /* Control codes in the range | 427 | ISO_control_1, /* Control codes in the range |
| 421 | 0x80..0x9F, except for the | 428 | 0x80..0x9F, except for the |
| 422 | following 3 codes. */ | 429 | following 3 codes. */ |
| @@ -486,6 +493,8 @@ enum iso_code_class_type | |||
| 486 | 493 | ||
| 487 | #define CODING_ISO_FLAG_USE_OLDJIS 0x10000 | 494 | #define CODING_ISO_FLAG_USE_OLDJIS 0x10000 |
| 488 | 495 | ||
| 496 | #define CODING_ISO_FLAG_LEVEL_4 0x20000 | ||
| 497 | |||
| 489 | #define CODING_ISO_FLAG_FULL_SUPPORT 0x100000 | 498 | #define CODING_ISO_FLAG_FULL_SUPPORT 0x100000 |
| 490 | 499 | ||
| 491 | /* A character to be produced on output if encoding of the original | 500 | /* A character to be produced on output if encoding of the original |
| @@ -642,17 +651,50 @@ static struct coding_system coding_categories[coding_category_max]; | |||
| 642 | #define max(a, b) ((a) > (b) ? (a) : (b)) | 651 | #define max(a, b) ((a) > (b) ? (a) : (b)) |
| 643 | #endif | 652 | #endif |
| 644 | 653 | ||
| 654 | /* Encode a flag that can be nil, something else, or t as -1, 0, 1. */ | ||
| 655 | |||
| 656 | static int | ||
| 657 | encode_inhibit_flag (Lisp_Object flag) | ||
| 658 | { | ||
| 659 | return NILP (flag) ? -1 : EQ (flag, Qt); | ||
| 660 | } | ||
| 661 | |||
| 662 | /* True if the value of ENCODED_FLAG says a flag should be treated as set. | ||
| 663 | 1 means yes, -1 means no, 0 means ask the user variable VAR. */ | ||
| 664 | |||
| 665 | static bool | ||
| 666 | inhibit_flag (int encoded_flag, bool var) | ||
| 667 | { | ||
| 668 | return 0 < encoded_flag + var; | ||
| 669 | } | ||
| 670 | |||
| 645 | #define CODING_GET_INFO(coding, attrs, charset_list) \ | 671 | #define CODING_GET_INFO(coding, attrs, charset_list) \ |
| 646 | do { \ | 672 | do { \ |
| 647 | (attrs) = CODING_ID_ATTRS ((coding)->id); \ | 673 | (attrs) = CODING_ID_ATTRS ((coding)->id); \ |
| 648 | (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \ | 674 | (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \ |
| 649 | } while (0) | 675 | } while (0) |
| 650 | 676 | ||
| 677 | static void | ||
| 678 | CHECK_NATNUM_CAR (Lisp_Object x) | ||
| 679 | { | ||
| 680 | Lisp_Object tmp = XCAR (x); | ||
| 681 | CHECK_NATNUM (tmp); | ||
| 682 | XSETCAR (x, tmp); | ||
| 683 | } | ||
| 684 | |||
| 685 | static void | ||
| 686 | CHECK_NATNUM_CDR (Lisp_Object x) | ||
| 687 | { | ||
| 688 | Lisp_Object tmp = XCDR (x); | ||
| 689 | CHECK_NATNUM (tmp); | ||
| 690 | XSETCDR (x, tmp); | ||
| 691 | } | ||
| 692 | |||
| 651 | 693 | ||
| 652 | /* Safely get one byte from the source text pointed by SRC which ends | 694 | /* Safely get one byte from the source text pointed by SRC which ends |
| 653 | at SRC_END, and set C to that byte. If there are not enough bytes | 695 | at SRC_END, and set C to that byte. If there are not enough bytes |
| 654 | in the source, it jumps to `no_more_source'. If multibytep is | 696 | in the source, it jumps to 'no_more_source'. If MULTIBYTEP, |
| 655 | nonzero, and a multibyte character is found at SRC, set C to the | 697 | and a multibyte character is found at SRC, set C to the |
| 656 | negative value of the character code. The caller should declare | 698 | negative value of the character code. The caller should declare |
| 657 | and set these variables appropriately in advance: | 699 | and set these variables appropriately in advance: |
| 658 | src, src_end, multibytep */ | 700 | src, src_end, multibytep */ |
| @@ -685,7 +727,7 @@ static struct coding_system coding_categories[coding_category_max]; | |||
| 685 | /* Safely get two bytes from the source text pointed by SRC which ends | 727 | /* Safely get two bytes from the source text pointed by SRC which ends |
| 686 | at SRC_END, and set C1 and C2 to those bytes while skipping the | 728 | at SRC_END, and set C1 and C2 to those bytes while skipping the |
| 687 | heading multibyte characters. If there are not enough bytes in the | 729 | heading multibyte characters. If there are not enough bytes in the |
| 688 | source, it jumps to `no_more_source'. If multibytep is nonzero and | 730 | source, it jumps to 'no_more_source'. If MULTIBYTEP and |
| 689 | a multibyte character is found for C2, set C2 to the negative value | 731 | a multibyte character is found for C2, set C2 to the negative value |
| 690 | of the character code. The caller should declare and set these | 732 | of the character code. The caller should declare and set these |
| 691 | variables appropriately in advance: | 733 | variables appropriately in advance: |
| @@ -746,8 +788,8 @@ static struct coding_system coding_categories[coding_category_max]; | |||
| 746 | 788 | ||
| 747 | 789 | ||
| 748 | /* Store a byte C in the place pointed by DST and increment DST to the | 790 | /* Store a byte C in the place pointed by DST and increment DST to the |
| 749 | next free point, and increment PRODUCED_CHARS. If MULTIBYTEP is | 791 | next free point, and increment PRODUCED_CHARS. If MULTIBYTEP, |
| 750 | nonzero, store in an appropriate multibyte from. The caller should | 792 | store in an appropriate multibyte form. The caller should |
| 751 | declare and set the variables `dst' and `multibytep' appropriately | 793 | declare and set the variables `dst' and `multibytep' appropriately |
| 752 | in advance. */ | 794 | in advance. */ |
| 753 | 795 | ||
| @@ -806,83 +848,6 @@ static struct coding_system coding_categories[coding_category_max]; | |||
| 806 | } while (0) | 848 | } while (0) |
| 807 | 849 | ||
| 808 | 850 | ||
| 809 | /* Prototypes for static functions. */ | ||
| 810 | static void record_conversion_result (struct coding_system *coding, | ||
| 811 | enum coding_result_code result); | ||
| 812 | static int detect_coding_utf_8 (struct coding_system *, | ||
| 813 | struct coding_detection_info *info); | ||
| 814 | static void decode_coding_utf_8 (struct coding_system *); | ||
| 815 | static int encode_coding_utf_8 (struct coding_system *); | ||
| 816 | |||
| 817 | static int detect_coding_utf_16 (struct coding_system *, | ||
| 818 | struct coding_detection_info *info); | ||
| 819 | static void decode_coding_utf_16 (struct coding_system *); | ||
| 820 | static int encode_coding_utf_16 (struct coding_system *); | ||
| 821 | |||
| 822 | static int detect_coding_iso_2022 (struct coding_system *, | ||
| 823 | struct coding_detection_info *info); | ||
| 824 | static void decode_coding_iso_2022 (struct coding_system *); | ||
| 825 | static int encode_coding_iso_2022 (struct coding_system *); | ||
| 826 | |||
| 827 | static int detect_coding_emacs_mule (struct coding_system *, | ||
| 828 | struct coding_detection_info *info); | ||
| 829 | static void decode_coding_emacs_mule (struct coding_system *); | ||
| 830 | static int encode_coding_emacs_mule (struct coding_system *); | ||
| 831 | |||
| 832 | static int detect_coding_sjis (struct coding_system *, | ||
| 833 | struct coding_detection_info *info); | ||
| 834 | static void decode_coding_sjis (struct coding_system *); | ||
| 835 | static int encode_coding_sjis (struct coding_system *); | ||
| 836 | |||
| 837 | static int detect_coding_big5 (struct coding_system *, | ||
| 838 | struct coding_detection_info *info); | ||
| 839 | static void decode_coding_big5 (struct coding_system *); | ||
| 840 | static int encode_coding_big5 (struct coding_system *); | ||
| 841 | |||
| 842 | static int detect_coding_ccl (struct coding_system *, | ||
| 843 | struct coding_detection_info *info); | ||
| 844 | static void decode_coding_ccl (struct coding_system *); | ||
| 845 | static int encode_coding_ccl (struct coding_system *); | ||
| 846 | |||
| 847 | static void decode_coding_raw_text (struct coding_system *); | ||
| 848 | static int encode_coding_raw_text (struct coding_system *); | ||
| 849 | |||
| 850 | static void coding_set_source (struct coding_system *); | ||
| 851 | static ptrdiff_t coding_change_source (struct coding_system *); | ||
| 852 | static void coding_set_destination (struct coding_system *); | ||
| 853 | static ptrdiff_t coding_change_destination (struct coding_system *); | ||
| 854 | static void coding_alloc_by_realloc (struct coding_system *, ptrdiff_t); | ||
| 855 | static void coding_alloc_by_making_gap (struct coding_system *, | ||
| 856 | ptrdiff_t, ptrdiff_t); | ||
| 857 | static unsigned char *alloc_destination (struct coding_system *, | ||
| 858 | ptrdiff_t, unsigned char *); | ||
| 859 | static void setup_iso_safe_charsets (Lisp_Object); | ||
| 860 | static ptrdiff_t encode_designation_at_bol (struct coding_system *, | ||
| 861 | int *, int *, unsigned char *); | ||
| 862 | static int detect_eol (const unsigned char *, | ||
| 863 | ptrdiff_t, enum coding_category); | ||
| 864 | static Lisp_Object adjust_coding_eol_type (struct coding_system *, int); | ||
| 865 | static void decode_eol (struct coding_system *); | ||
| 866 | static Lisp_Object get_translation_table (Lisp_Object, int, int *); | ||
| 867 | static Lisp_Object get_translation (Lisp_Object, int *, int *); | ||
| 868 | static int produce_chars (struct coding_system *, Lisp_Object, int); | ||
| 869 | static inline void produce_charset (struct coding_system *, int *, | ||
| 870 | ptrdiff_t); | ||
| 871 | static void produce_annotation (struct coding_system *, ptrdiff_t); | ||
| 872 | static int decode_coding (struct coding_system *); | ||
| 873 | static inline int *handle_composition_annotation (ptrdiff_t, ptrdiff_t, | ||
| 874 | struct coding_system *, | ||
| 875 | int *, ptrdiff_t *); | ||
| 876 | static inline int *handle_charset_annotation (ptrdiff_t, ptrdiff_t, | ||
| 877 | struct coding_system *, | ||
| 878 | int *, ptrdiff_t *); | ||
| 879 | static void consume_chars (struct coding_system *, Lisp_Object, int); | ||
| 880 | static int encode_coding (struct coding_system *); | ||
| 881 | static Lisp_Object make_conversion_work_buffer (int); | ||
| 882 | static Lisp_Object code_conversion_restore (Lisp_Object); | ||
| 883 | static inline int char_encodable_p (int, Lisp_Object); | ||
| 884 | static Lisp_Object make_subsidiaries (Lisp_Object); | ||
| 885 | |||
| 886 | static void | 851 | static void |
| 887 | record_conversion_result (struct coding_system *coding, | 852 | record_conversion_result (struct coding_system *coding, |
| 888 | enum coding_result_code result) | 853 | enum coding_result_code result) |
| @@ -893,18 +858,12 @@ record_conversion_result (struct coding_system *coding, | |||
| 893 | case CODING_RESULT_INSUFFICIENT_SRC: | 858 | case CODING_RESULT_INSUFFICIENT_SRC: |
| 894 | Vlast_code_conversion_error = Qinsufficient_source; | 859 | Vlast_code_conversion_error = Qinsufficient_source; |
| 895 | break; | 860 | break; |
| 896 | case CODING_RESULT_INCONSISTENT_EOL: | ||
| 897 | Vlast_code_conversion_error = Qinconsistent_eol; | ||
| 898 | break; | ||
| 899 | case CODING_RESULT_INVALID_SRC: | 861 | case CODING_RESULT_INVALID_SRC: |
| 900 | Vlast_code_conversion_error = Qinvalid_source; | 862 | Vlast_code_conversion_error = Qinvalid_source; |
| 901 | break; | 863 | break; |
| 902 | case CODING_RESULT_INTERRUPT: | 864 | case CODING_RESULT_INTERRUPT: |
| 903 | Vlast_code_conversion_error = Qinterrupted; | 865 | Vlast_code_conversion_error = Qinterrupted; |
| 904 | break; | 866 | break; |
| 905 | case CODING_RESULT_INSUFFICIENT_MEM: | ||
| 906 | Vlast_code_conversion_error = Qinsufficient_memory; | ||
| 907 | break; | ||
| 908 | case CODING_RESULT_INSUFFICIENT_DST: | 867 | case CODING_RESULT_INSUFFICIENT_DST: |
| 909 | /* Don't record this error in Vlast_code_conversion_error | 868 | /* Don't record this error in Vlast_code_conversion_error |
| 910 | because it happens just temporarily and is resolved when the | 869 | because it happens just temporarily and is resolved when the |
| @@ -998,65 +957,18 @@ record_conversion_result (struct coding_system *coding, | |||
| 998 | 957 | ||
| 999 | 958 | ||
| 1000 | /* Store multibyte form of the character C in P, and advance P to the | 959 | /* Store multibyte form of the character C in P, and advance P to the |
| 1001 | end of the multibyte form. This is like CHAR_STRING_ADVANCE but it | 960 | end of the multibyte form. This used to be like CHAR_STRING_ADVANCE |
| 1002 | never calls MAYBE_UNIFY_CHAR. */ | 961 | without ever calling MAYBE_UNIFY_CHAR, but nowadays we don't call |
| 1003 | 962 | MAYBE_UNIFY_CHAR in CHAR_STRING_ADVANCE. */ | |
| 1004 | #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) \ | ||
| 1005 | do { \ | ||
| 1006 | if ((c) <= MAX_1_BYTE_CHAR) \ | ||
| 1007 | *(p)++ = (c); \ | ||
| 1008 | else if ((c) <= MAX_2_BYTE_CHAR) \ | ||
| 1009 | *(p)++ = (0xC0 | ((c) >> 6)), \ | ||
| 1010 | *(p)++ = (0x80 | ((c) & 0x3F)); \ | ||
| 1011 | else if ((c) <= MAX_3_BYTE_CHAR) \ | ||
| 1012 | *(p)++ = (0xE0 | ((c) >> 12)), \ | ||
| 1013 | *(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \ | ||
| 1014 | *(p)++ = (0x80 | ((c) & 0x3F)); \ | ||
| 1015 | else if ((c) <= MAX_4_BYTE_CHAR) \ | ||
| 1016 | *(p)++ = (0xF0 | (c >> 18)), \ | ||
| 1017 | *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \ | ||
| 1018 | *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \ | ||
| 1019 | *(p)++ = (0x80 | (c & 0x3F)); \ | ||
| 1020 | else if ((c) <= MAX_5_BYTE_CHAR) \ | ||
| 1021 | *(p)++ = 0xF8, \ | ||
| 1022 | *(p)++ = (0x80 | ((c >> 18) & 0x0F)), \ | ||
| 1023 | *(p)++ = (0x80 | ((c >> 12) & 0x3F)), \ | ||
| 1024 | *(p)++ = (0x80 | ((c >> 6) & 0x3F)), \ | ||
| 1025 | *(p)++ = (0x80 | (c & 0x3F)); \ | ||
| 1026 | else \ | ||
| 1027 | (p) += BYTE8_STRING ((c) - 0x3FFF80, p); \ | ||
| 1028 | } while (0) | ||
| 1029 | 963 | ||
| 964 | #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) CHAR_STRING_ADVANCE(c, p) | ||
| 1030 | 965 | ||
| 1031 | /* Return the character code of character whose multibyte form is at | 966 | /* Return the character code of character whose multibyte form is at |
| 1032 | P, and advance P to the end of the multibyte form. This is like | 967 | P, and advance P to the end of the multibyte form. This used to be |
| 1033 | STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR. */ | 968 | like STRING_CHAR_ADVANCE without ever calling MAYBE_UNIFY_CHAR, but |
| 1034 | 969 | nowadays STRING_CHAR_ADVANCE doesn't call MAYBE_UNIFY_CHAR. */ | |
| 1035 | #define STRING_CHAR_ADVANCE_NO_UNIFY(p) \ | ||
| 1036 | (!((p)[0] & 0x80) \ | ||
| 1037 | ? *(p)++ \ | ||
| 1038 | : ! ((p)[0] & 0x20) \ | ||
| 1039 | ? ((p) += 2, \ | ||
| 1040 | ((((p)[-2] & 0x1F) << 6) \ | ||
| 1041 | | ((p)[-1] & 0x3F) \ | ||
| 1042 | | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \ | ||
| 1043 | : ! ((p)[0] & 0x10) \ | ||
| 1044 | ? ((p) += 3, \ | ||
| 1045 | ((((p)[-3] & 0x0F) << 12) \ | ||
| 1046 | | (((p)[-2] & 0x3F) << 6) \ | ||
| 1047 | | ((p)[-1] & 0x3F))) \ | ||
| 1048 | : ! ((p)[0] & 0x08) \ | ||
| 1049 | ? ((p) += 4, \ | ||
| 1050 | ((((p)[-4] & 0xF) << 18) \ | ||
| 1051 | | (((p)[-3] & 0x3F) << 12) \ | ||
| 1052 | | (((p)[-2] & 0x3F) << 6) \ | ||
| 1053 | | ((p)[-1] & 0x3F))) \ | ||
| 1054 | : ((p) += 5, \ | ||
| 1055 | ((((p)[-4] & 0x3F) << 18) \ | ||
| 1056 | | (((p)[-3] & 0x3F) << 12) \ | ||
| 1057 | | (((p)[-2] & 0x3F) << 6) \ | ||
| 1058 | | ((p)[-1] & 0x3F)))) | ||
| 1059 | 970 | ||
| 971 | #define STRING_CHAR_ADVANCE_NO_UNIFY(p) STRING_CHAR_ADVANCE(p) | ||
| 1060 | 972 | ||
| 1061 | /* Set coding->source from coding->src_object. */ | 973 | /* Set coding->source from coding->src_object. */ |
| 1062 | 974 | ||
| @@ -1145,8 +1057,8 @@ coding_alloc_by_realloc (struct coding_system *coding, ptrdiff_t bytes) | |||
| 1145 | { | 1057 | { |
| 1146 | if (STRING_BYTES_BOUND - coding->dst_bytes < bytes) | 1058 | if (STRING_BYTES_BOUND - coding->dst_bytes < bytes) |
| 1147 | string_overflow (); | 1059 | string_overflow (); |
| 1148 | coding->destination = (unsigned char *) xrealloc (coding->destination, | 1060 | coding->destination = xrealloc (coding->destination, |
| 1149 | coding->dst_bytes + bytes); | 1061 | coding->dst_bytes + bytes); |
| 1150 | coding->dst_bytes += bytes; | 1062 | coding->dst_bytes += bytes; |
| 1151 | } | 1063 | } |
| 1152 | 1064 | ||
| @@ -1169,14 +1081,7 @@ coding_alloc_by_making_gap (struct coding_system *coding, | |||
| 1169 | GPT -= gap_head_used, GPT_BYTE -= gap_head_used; | 1081 | GPT -= gap_head_used, GPT_BYTE -= gap_head_used; |
| 1170 | } | 1082 | } |
| 1171 | else | 1083 | else |
| 1172 | { | 1084 | make_gap_1 (XBUFFER (coding->dst_object), bytes); |
| 1173 | Lisp_Object this_buffer; | ||
| 1174 | |||
| 1175 | this_buffer = Fcurrent_buffer (); | ||
| 1176 | set_buffer_internal (XBUFFER (coding->dst_object)); | ||
| 1177 | make_gap (bytes); | ||
| 1178 | set_buffer_internal (XBUFFER (this_buffer)); | ||
| 1179 | } | ||
| 1180 | } | 1085 | } |
| 1181 | 1086 | ||
| 1182 | 1087 | ||
| @@ -1255,6 +1160,14 @@ alloc_destination (struct coding_system *coding, ptrdiff_t nbytes, | |||
| 1255 | *buf++ = id; \ | 1160 | *buf++ = id; \ |
| 1256 | } while (0) | 1161 | } while (0) |
| 1257 | 1162 | ||
| 1163 | |||
| 1164 | /* Bitmasks for coding->eol_seen. */ | ||
| 1165 | |||
| 1166 | #define EOL_SEEN_NONE 0 | ||
| 1167 | #define EOL_SEEN_LF 1 | ||
| 1168 | #define EOL_SEEN_CR 2 | ||
| 1169 | #define EOL_SEEN_CRLF 4 | ||
| 1170 | |||
| 1258 | 1171 | ||
| 1259 | /*** 2. Emacs' internal format (emacs-utf-8) ***/ | 1172 | /*** 2. Emacs' internal format (emacs-utf-8) ***/ |
| 1260 | 1173 | ||
| @@ -1264,8 +1177,7 @@ alloc_destination (struct coding_system *coding, ptrdiff_t nbytes, | |||
| 1264 | /*** 3. UTF-8 ***/ | 1177 | /*** 3. UTF-8 ***/ |
| 1265 | 1178 | ||
| 1266 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 1179 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
| 1267 | Check if a text is encoded in UTF-8. If it is, return 1, else | 1180 | Return true if a text is encoded in UTF-8. */ |
| 1268 | return 0. */ | ||
| 1269 | 1181 | ||
| 1270 | #define UTF_8_1_OCTET_P(c) ((c) < 0x80) | 1182 | #define UTF_8_1_OCTET_P(c) ((c) < 0x80) |
| 1271 | #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80) | 1183 | #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80) |
| @@ -1278,20 +1190,35 @@ alloc_destination (struct coding_system *coding, ptrdiff_t nbytes, | |||
| 1278 | #define UTF_8_BOM_2 0xBB | 1190 | #define UTF_8_BOM_2 0xBB |
| 1279 | #define UTF_8_BOM_3 0xBF | 1191 | #define UTF_8_BOM_3 0xBF |
| 1280 | 1192 | ||
| 1281 | static int | 1193 | /* Unlike the other detect_coding_XXX, this function counts number of |
| 1194 | characters and check EOL format. */ | ||
| 1195 | |||
| 1196 | static bool | ||
| 1282 | detect_coding_utf_8 (struct coding_system *coding, | 1197 | detect_coding_utf_8 (struct coding_system *coding, |
| 1283 | struct coding_detection_info *detect_info) | 1198 | struct coding_detection_info *detect_info) |
| 1284 | { | 1199 | { |
| 1285 | const unsigned char *src = coding->source, *src_base; | 1200 | const unsigned char *src = coding->source, *src_base; |
| 1286 | const unsigned char *src_end = coding->source + coding->src_bytes; | 1201 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 1287 | int multibytep = coding->src_multibyte; | 1202 | bool multibytep = coding->src_multibyte; |
| 1288 | ptrdiff_t consumed_chars = 0; | 1203 | ptrdiff_t consumed_chars = 0; |
| 1289 | int bom_found = 0; | 1204 | bool bom_found = 0; |
| 1290 | int found = 0; | 1205 | int nchars = coding->head_ascii; |
| 1206 | int eol_seen = coding->eol_seen; | ||
| 1291 | 1207 | ||
| 1292 | detect_info->checked |= CATEGORY_MASK_UTF_8; | 1208 | detect_info->checked |= CATEGORY_MASK_UTF_8; |
| 1293 | /* A coding system of this category is always ASCII compatible. */ | 1209 | /* A coding system of this category is always ASCII compatible. */ |
| 1294 | src += coding->head_ascii; | 1210 | src += nchars; |
| 1211 | |||
| 1212 | if (src == coding->source /* BOM should be at the head. */ | ||
| 1213 | && src + 3 < src_end /* BOM is 3-byte long. */ | ||
| 1214 | && src[0] == UTF_8_BOM_1 | ||
| 1215 | && src[1] == UTF_8_BOM_2 | ||
| 1216 | && src[2] == UTF_8_BOM_3) | ||
| 1217 | { | ||
| 1218 | bom_found = 1; | ||
| 1219 | src += 3; | ||
| 1220 | nchars++; | ||
| 1221 | } | ||
| 1295 | 1222 | ||
| 1296 | while (1) | 1223 | while (1) |
| 1297 | { | 1224 | { |
| @@ -1300,13 +1227,29 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1300 | src_base = src; | 1227 | src_base = src; |
| 1301 | ONE_MORE_BYTE (c); | 1228 | ONE_MORE_BYTE (c); |
| 1302 | if (c < 0 || UTF_8_1_OCTET_P (c)) | 1229 | if (c < 0 || UTF_8_1_OCTET_P (c)) |
| 1303 | continue; | 1230 | { |
| 1231 | nchars++; | ||
| 1232 | if (c == '\r') | ||
| 1233 | { | ||
| 1234 | if (src < src_end && *src == '\n') | ||
| 1235 | { | ||
| 1236 | eol_seen |= EOL_SEEN_CRLF; | ||
| 1237 | src++; | ||
| 1238 | nchars++; | ||
| 1239 | } | ||
| 1240 | else | ||
| 1241 | eol_seen |= EOL_SEEN_CR; | ||
| 1242 | } | ||
| 1243 | else if (c == '\n') | ||
| 1244 | eol_seen |= EOL_SEEN_LF; | ||
| 1245 | continue; | ||
| 1246 | } | ||
| 1304 | ONE_MORE_BYTE (c1); | 1247 | ONE_MORE_BYTE (c1); |
| 1305 | if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1)) | 1248 | if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1)) |
| 1306 | break; | 1249 | break; |
| 1307 | if (UTF_8_2_OCTET_LEADING_P (c)) | 1250 | if (UTF_8_2_OCTET_LEADING_P (c)) |
| 1308 | { | 1251 | { |
| 1309 | found = 1; | 1252 | nchars++; |
| 1310 | continue; | 1253 | continue; |
| 1311 | } | 1254 | } |
| 1312 | ONE_MORE_BYTE (c2); | 1255 | ONE_MORE_BYTE (c2); |
| @@ -1314,10 +1257,7 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1314 | break; | 1257 | break; |
| 1315 | if (UTF_8_3_OCTET_LEADING_P (c)) | 1258 | if (UTF_8_3_OCTET_LEADING_P (c)) |
| 1316 | { | 1259 | { |
| 1317 | found = 1; | 1260 | nchars++; |
| 1318 | if (src_base == coding->source | ||
| 1319 | && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3) | ||
| 1320 | bom_found = 1; | ||
| 1321 | continue; | 1261 | continue; |
| 1322 | } | 1262 | } |
| 1323 | ONE_MORE_BYTE (c3); | 1263 | ONE_MORE_BYTE (c3); |
| @@ -1325,7 +1265,7 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1325 | break; | 1265 | break; |
| 1326 | if (UTF_8_4_OCTET_LEADING_P (c)) | 1266 | if (UTF_8_4_OCTET_LEADING_P (c)) |
| 1327 | { | 1267 | { |
| 1328 | found = 1; | 1268 | nchars++; |
| 1329 | continue; | 1269 | continue; |
| 1330 | } | 1270 | } |
| 1331 | ONE_MORE_BYTE (c4); | 1271 | ONE_MORE_BYTE (c4); |
| @@ -1333,7 +1273,7 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1333 | break; | 1273 | break; |
| 1334 | if (UTF_8_5_OCTET_LEADING_P (c)) | 1274 | if (UTF_8_5_OCTET_LEADING_P (c)) |
| 1335 | { | 1275 | { |
| 1336 | found = 1; | 1276 | nchars++; |
| 1337 | continue; | 1277 | continue; |
| 1338 | } | 1278 | } |
| 1339 | break; | 1279 | break; |
| @@ -1350,14 +1290,17 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1350 | if (bom_found) | 1290 | if (bom_found) |
| 1351 | { | 1291 | { |
| 1352 | /* The first character 0xFFFE doesn't necessarily mean a BOM. */ | 1292 | /* The first character 0xFFFE doesn't necessarily mean a BOM. */ |
| 1353 | detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG; | 1293 | detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG; |
| 1354 | } | 1294 | } |
| 1355 | else | 1295 | else |
| 1356 | { | 1296 | { |
| 1357 | detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG; | 1297 | detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG; |
| 1358 | if (found) | 1298 | if (nchars < src_end - coding->source) |
| 1359 | detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG; | 1299 | /* The found characters are less than source bytes, which |
| 1300 | means that we found a valid non-ASCII characters. */ | ||
| 1301 | detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_NOSIG; | ||
| 1360 | } | 1302 | } |
| 1303 | coding->detected_utf8_chars = nchars; | ||
| 1361 | return 1; | 1304 | return 1; |
| 1362 | } | 1305 | } |
| 1363 | 1306 | ||
| @@ -1371,10 +1314,10 @@ decode_coding_utf_8 (struct coding_system *coding) | |||
| 1371 | int *charbuf = coding->charbuf + coding->charbuf_used; | 1314 | int *charbuf = coding->charbuf + coding->charbuf_used; |
| 1372 | int *charbuf_end = coding->charbuf + coding->charbuf_size; | 1315 | int *charbuf_end = coding->charbuf + coding->charbuf_size; |
| 1373 | ptrdiff_t consumed_chars = 0, consumed_chars_base = 0; | 1316 | ptrdiff_t consumed_chars = 0, consumed_chars_base = 0; |
| 1374 | int multibytep = coding->src_multibyte; | 1317 | bool multibytep = coding->src_multibyte; |
| 1375 | enum utf_bom_type bom = CODING_UTF_8_BOM (coding); | 1318 | enum utf_bom_type bom = CODING_UTF_8_BOM (coding); |
| 1376 | int eol_dos = | 1319 | bool eol_dos |
| 1377 | !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | 1320 | = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); |
| 1378 | int byte_after_cr = -1; | 1321 | int byte_after_cr = -1; |
| 1379 | 1322 | ||
| 1380 | if (bom != utf_without_bom) | 1323 | if (bom != utf_without_bom) |
| @@ -1422,6 +1365,45 @@ decode_coding_utf_8 (struct coding_system *coding) | |||
| 1422 | break; | 1365 | break; |
| 1423 | } | 1366 | } |
| 1424 | 1367 | ||
| 1368 | /* In the simple case, rapidly handle ordinary characters */ | ||
| 1369 | if (multibytep && ! eol_dos | ||
| 1370 | && charbuf < charbuf_end - 6 && src < src_end - 6) | ||
| 1371 | { | ||
| 1372 | while (charbuf < charbuf_end - 6 && src < src_end - 6) | ||
| 1373 | { | ||
| 1374 | c1 = *src; | ||
| 1375 | if (c1 & 0x80) | ||
| 1376 | break; | ||
| 1377 | src++; | ||
| 1378 | consumed_chars++; | ||
| 1379 | *charbuf++ = c1; | ||
| 1380 | |||
| 1381 | c1 = *src; | ||
| 1382 | if (c1 & 0x80) | ||
| 1383 | break; | ||
| 1384 | src++; | ||
| 1385 | consumed_chars++; | ||
| 1386 | *charbuf++ = c1; | ||
| 1387 | |||
| 1388 | c1 = *src; | ||
| 1389 | if (c1 & 0x80) | ||
| 1390 | break; | ||
| 1391 | src++; | ||
| 1392 | consumed_chars++; | ||
| 1393 | *charbuf++ = c1; | ||
| 1394 | |||
| 1395 | c1 = *src; | ||
| 1396 | if (c1 & 0x80) | ||
| 1397 | break; | ||
| 1398 | src++; | ||
| 1399 | consumed_chars++; | ||
| 1400 | *charbuf++ = c1; | ||
| 1401 | } | ||
| 1402 | /* If we handled at least one character, restart the main loop. */ | ||
| 1403 | if (src != src_base) | ||
| 1404 | continue; | ||
| 1405 | } | ||
| 1406 | |||
| 1425 | if (byte_after_cr >= 0) | 1407 | if (byte_after_cr >= 0) |
| 1426 | c1 = byte_after_cr, byte_after_cr = -1; | 1408 | c1 = byte_after_cr, byte_after_cr = -1; |
| 1427 | else | 1409 | else |
| @@ -1513,10 +1495,10 @@ decode_coding_utf_8 (struct coding_system *coding) | |||
| 1513 | } | 1495 | } |
| 1514 | 1496 | ||
| 1515 | 1497 | ||
| 1516 | static int | 1498 | static bool |
| 1517 | encode_coding_utf_8 (struct coding_system *coding) | 1499 | encode_coding_utf_8 (struct coding_system *coding) |
| 1518 | { | 1500 | { |
| 1519 | int multibytep = coding->dst_multibyte; | 1501 | bool multibytep = coding->dst_multibyte; |
| 1520 | int *charbuf = coding->charbuf; | 1502 | int *charbuf = coding->charbuf; |
| 1521 | int *charbuf_end = charbuf + coding->charbuf_used; | 1503 | int *charbuf_end = charbuf + coding->charbuf_used; |
| 1522 | unsigned char *dst = coding->destination + coding->produced; | 1504 | unsigned char *dst = coding->destination + coding->produced; |
| @@ -1577,8 +1559,7 @@ encode_coding_utf_8 (struct coding_system *coding) | |||
| 1577 | 1559 | ||
| 1578 | 1560 | ||
| 1579 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 1561 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
| 1580 | Check if a text is encoded in one of UTF-16 based coding systems. | 1562 | Return true if a text is encoded in one of UTF-16 based coding systems. */ |
| 1581 | If it is, return 1, else return 0. */ | ||
| 1582 | 1563 | ||
| 1583 | #define UTF_16_HIGH_SURROGATE_P(val) \ | 1564 | #define UTF_16_HIGH_SURROGATE_P(val) \ |
| 1584 | (((val) & 0xFC00) == 0xD800) | 1565 | (((val) & 0xFC00) == 0xD800) |
| @@ -1587,13 +1568,13 @@ encode_coding_utf_8 (struct coding_system *coding) | |||
| 1587 | (((val) & 0xFC00) == 0xDC00) | 1568 | (((val) & 0xFC00) == 0xDC00) |
| 1588 | 1569 | ||
| 1589 | 1570 | ||
| 1590 | static int | 1571 | static bool |
| 1591 | detect_coding_utf_16 (struct coding_system *coding, | 1572 | detect_coding_utf_16 (struct coding_system *coding, |
| 1592 | struct coding_detection_info *detect_info) | 1573 | struct coding_detection_info *detect_info) |
| 1593 | { | 1574 | { |
| 1594 | const unsigned char *src = coding->source; | 1575 | const unsigned char *src = coding->source; |
| 1595 | const unsigned char *src_end = coding->source + coding->src_bytes; | 1576 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 1596 | int multibytep = coding->src_multibyte; | 1577 | bool multibytep = coding->src_multibyte; |
| 1597 | int c1, c2; | 1578 | int c1, c2; |
| 1598 | 1579 | ||
| 1599 | detect_info->checked |= CATEGORY_MASK_UTF_16; | 1580 | detect_info->checked |= CATEGORY_MASK_UTF_16; |
| @@ -1680,12 +1661,12 @@ decode_coding_utf_16 (struct coding_system *coding) | |||
| 1680 | /* We may produces at most 3 chars in one loop. */ | 1661 | /* We may produces at most 3 chars in one loop. */ |
| 1681 | int *charbuf_end = coding->charbuf + coding->charbuf_size - 2; | 1662 | int *charbuf_end = coding->charbuf + coding->charbuf_size - 2; |
| 1682 | ptrdiff_t consumed_chars = 0, consumed_chars_base = 0; | 1663 | ptrdiff_t consumed_chars = 0, consumed_chars_base = 0; |
| 1683 | int multibytep = coding->src_multibyte; | 1664 | bool multibytep = coding->src_multibyte; |
| 1684 | enum utf_bom_type bom = CODING_UTF_16_BOM (coding); | 1665 | enum utf_bom_type bom = CODING_UTF_16_BOM (coding); |
| 1685 | enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding); | 1666 | enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding); |
| 1686 | int surrogate = CODING_UTF_16_SURROGATE (coding); | 1667 | int surrogate = CODING_UTF_16_SURROGATE (coding); |
| 1687 | int eol_dos = | 1668 | bool eol_dos |
| 1688 | !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | 1669 | = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); |
| 1689 | int byte_after_cr1 = -1, byte_after_cr2 = -1; | 1670 | int byte_after_cr1 = -1, byte_after_cr2 = -1; |
| 1690 | 1671 | ||
| 1691 | if (bom == utf_with_bom) | 1672 | if (bom == utf_with_bom) |
| @@ -1795,17 +1776,17 @@ decode_coding_utf_16 (struct coding_system *coding) | |||
| 1795 | coding->charbuf_used = charbuf - coding->charbuf; | 1776 | coding->charbuf_used = charbuf - coding->charbuf; |
| 1796 | } | 1777 | } |
| 1797 | 1778 | ||
| 1798 | static int | 1779 | static bool |
| 1799 | encode_coding_utf_16 (struct coding_system *coding) | 1780 | encode_coding_utf_16 (struct coding_system *coding) |
| 1800 | { | 1781 | { |
| 1801 | int multibytep = coding->dst_multibyte; | 1782 | bool multibytep = coding->dst_multibyte; |
| 1802 | int *charbuf = coding->charbuf; | 1783 | int *charbuf = coding->charbuf; |
| 1803 | int *charbuf_end = charbuf + coding->charbuf_used; | 1784 | int *charbuf_end = charbuf + coding->charbuf_used; |
| 1804 | unsigned char *dst = coding->destination + coding->produced; | 1785 | unsigned char *dst = coding->destination + coding->produced; |
| 1805 | unsigned char *dst_end = coding->destination + coding->dst_bytes; | 1786 | unsigned char *dst_end = coding->destination + coding->dst_bytes; |
| 1806 | int safe_room = 8; | 1787 | int safe_room = 8; |
| 1807 | enum utf_bom_type bom = CODING_UTF_16_BOM (coding); | 1788 | enum utf_bom_type bom = CODING_UTF_16_BOM (coding); |
| 1808 | int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian; | 1789 | bool big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian; |
| 1809 | ptrdiff_t produced_chars = 0; | 1790 | ptrdiff_t produced_chars = 0; |
| 1810 | int c; | 1791 | int c; |
| 1811 | 1792 | ||
| @@ -1930,16 +1911,15 @@ char emacs_mule_bytes[256]; | |||
| 1930 | 1911 | ||
| 1931 | 1912 | ||
| 1932 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 1913 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
| 1933 | Check if a text is encoded in `emacs-mule'. If it is, return 1, | 1914 | Return true if a text is encoded in 'emacs-mule'. */ |
| 1934 | else return 0. */ | ||
| 1935 | 1915 | ||
| 1936 | static int | 1916 | static bool |
| 1937 | detect_coding_emacs_mule (struct coding_system *coding, | 1917 | detect_coding_emacs_mule (struct coding_system *coding, |
| 1938 | struct coding_detection_info *detect_info) | 1918 | struct coding_detection_info *detect_info) |
| 1939 | { | 1919 | { |
| 1940 | const unsigned char *src = coding->source, *src_base; | 1920 | const unsigned char *src = coding->source, *src_base; |
| 1941 | const unsigned char *src_end = coding->source + coding->src_bytes; | 1921 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 1942 | int multibytep = coding->src_multibyte; | 1922 | bool multibytep = coding->src_multibyte; |
| 1943 | ptrdiff_t consumed_chars = 0; | 1923 | ptrdiff_t consumed_chars = 0; |
| 1944 | int c; | 1924 | int c; |
| 1945 | int found = 0; | 1925 | int found = 0; |
| @@ -2029,12 +2009,12 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src, | |||
| 2029 | { | 2009 | { |
| 2030 | const unsigned char *src_end = coding->source + coding->src_bytes; | 2010 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 2031 | const unsigned char *src_base = src; | 2011 | const unsigned char *src_base = src; |
| 2032 | int multibytep = coding->src_multibyte; | 2012 | bool multibytep = coding->src_multibyte; |
| 2033 | int charset_ID; | 2013 | int charset_ID; |
| 2034 | unsigned code; | 2014 | unsigned code; |
| 2035 | int c; | 2015 | int c; |
| 2036 | int consumed_chars = 0; | 2016 | int consumed_chars = 0; |
| 2037 | int mseq_found = 0; | 2017 | bool mseq_found = 0; |
| 2038 | 2018 | ||
| 2039 | ONE_MORE_BYTE (c); | 2019 | ONE_MORE_BYTE (c); |
| 2040 | if (c < 0) | 2020 | if (c < 0) |
| @@ -2131,7 +2111,7 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src, | |||
| 2131 | break; | 2111 | break; |
| 2132 | 2112 | ||
| 2133 | default: | 2113 | default: |
| 2134 | abort (); | 2114 | emacs_abort (); |
| 2135 | } | 2115 | } |
| 2136 | CODING_DECODE_CHAR (coding, src, src_base, src_end, | 2116 | CODING_DECODE_CHAR (coding, src, src_base, src_end, |
| 2137 | CHARSET_FROM_ID (charset_ID), code, c); | 2117 | CHARSET_FROM_ID (charset_ID), code, c); |
| @@ -2411,12 +2391,12 @@ decode_coding_emacs_mule (struct coding_system *coding) | |||
| 2411 | /* We can produce up to 2 characters in a loop. */ | 2391 | /* We can produce up to 2 characters in a loop. */ |
| 2412 | - 1; | 2392 | - 1; |
| 2413 | ptrdiff_t consumed_chars = 0, consumed_chars_base; | 2393 | ptrdiff_t consumed_chars = 0, consumed_chars_base; |
| 2414 | int multibytep = coding->src_multibyte; | 2394 | bool multibytep = coding->src_multibyte; |
| 2415 | ptrdiff_t char_offset = coding->produced_char; | 2395 | ptrdiff_t char_offset = coding->produced_char; |
| 2416 | ptrdiff_t last_offset = char_offset; | 2396 | ptrdiff_t last_offset = char_offset; |
| 2417 | int last_id = charset_ascii; | 2397 | int last_id = charset_ascii; |
| 2418 | int eol_dos = | 2398 | bool eol_dos |
| 2419 | !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | 2399 | = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); |
| 2420 | int byte_after_cr = -1; | 2400 | int byte_after_cr = -1; |
| 2421 | struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status; | 2401 | struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status; |
| 2422 | 2402 | ||
| @@ -2425,7 +2405,7 @@ decode_coding_emacs_mule (struct coding_system *coding) | |||
| 2425 | int i; | 2405 | int i; |
| 2426 | 2406 | ||
| 2427 | if (charbuf_end - charbuf < cmp_status->length) | 2407 | if (charbuf_end - charbuf < cmp_status->length) |
| 2428 | abort (); | 2408 | emacs_abort (); |
| 2429 | for (i = 0; i < cmp_status->length; i++) | 2409 | for (i = 0; i < cmp_status->length; i++) |
| 2430 | *charbuf++ = cmp_status->carryover[i]; | 2410 | *charbuf++ = cmp_status->carryover[i]; |
| 2431 | coding->annotated = 1; | 2411 | coding->annotated = 1; |
| @@ -2657,10 +2637,10 @@ decode_coding_emacs_mule (struct coding_system *coding) | |||
| 2657 | } while (0); | 2637 | } while (0); |
| 2658 | 2638 | ||
| 2659 | 2639 | ||
| 2660 | static int | 2640 | static bool |
| 2661 | encode_coding_emacs_mule (struct coding_system *coding) | 2641 | encode_coding_emacs_mule (struct coding_system *coding) |
| 2662 | { | 2642 | { |
| 2663 | int multibytep = coding->dst_multibyte; | 2643 | bool multibytep = coding->dst_multibyte; |
| 2664 | int *charbuf = coding->charbuf; | 2644 | int *charbuf = coding->charbuf; |
| 2665 | int *charbuf_end = charbuf + coding->charbuf_used; | 2645 | int *charbuf_end = charbuf + coding->charbuf_used; |
| 2666 | unsigned char *dst = coding->destination + coding->produced; | 2646 | unsigned char *dst = coding->destination + coding->produced; |
| @@ -2674,8 +2654,8 @@ encode_coding_emacs_mule (struct coding_system *coding) | |||
| 2674 | CODING_GET_INFO (coding, attrs, charset_list); | 2654 | CODING_GET_INFO (coding, attrs, charset_list); |
| 2675 | if (! EQ (charset_list, Vemacs_mule_charset_list)) | 2655 | if (! EQ (charset_list, Vemacs_mule_charset_list)) |
| 2676 | { | 2656 | { |
| 2677 | CODING_ATTR_CHARSET_LIST (attrs) | 2657 | charset_list = Vemacs_mule_charset_list; |
| 2678 | = charset_list = Vemacs_mule_charset_list; | 2658 | ASET (attrs, coding_attr_charset_list, charset_list); |
| 2679 | } | 2659 | } |
| 2680 | 2660 | ||
| 2681 | while (charbuf < charbuf_end) | 2661 | while (charbuf < charbuf_end) |
| @@ -2699,7 +2679,7 @@ encode_coding_emacs_mule (struct coding_system *coding) | |||
| 2699 | preferred_charset_id = -1; | 2679 | preferred_charset_id = -1; |
| 2700 | break; | 2680 | break; |
| 2701 | default: | 2681 | default: |
| 2702 | abort (); | 2682 | emacs_abort (); |
| 2703 | } | 2683 | } |
| 2704 | charbuf += -c - 1; | 2684 | charbuf += -c - 1; |
| 2705 | continue; | 2685 | continue; |
| @@ -2722,7 +2702,7 @@ encode_coding_emacs_mule (struct coding_system *coding) | |||
| 2722 | 2702 | ||
| 2723 | if (preferred_charset_id >= 0) | 2703 | if (preferred_charset_id >= 0) |
| 2724 | { | 2704 | { |
| 2725 | int result; | 2705 | bool result; |
| 2726 | 2706 | ||
| 2727 | charset = CHARSET_FROM_ID (preferred_charset_id); | 2707 | charset = CHARSET_FROM_ID (preferred_charset_id); |
| 2728 | CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result); | 2708 | CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result); |
| @@ -2967,8 +2947,8 @@ setup_iso_safe_charsets (Lisp_Object attrs) | |||
| 2967 | if ((flags & CODING_ISO_FLAG_FULL_SUPPORT) | 2947 | if ((flags & CODING_ISO_FLAG_FULL_SUPPORT) |
| 2968 | && ! EQ (charset_list, Viso_2022_charset_list)) | 2948 | && ! EQ (charset_list, Viso_2022_charset_list)) |
| 2969 | { | 2949 | { |
| 2970 | CODING_ATTR_CHARSET_LIST (attrs) | 2950 | charset_list = Viso_2022_charset_list; |
| 2971 | = charset_list = Viso_2022_charset_list; | 2951 | ASET (attrs, coding_attr_charset_list, charset_list); |
| 2972 | ASET (attrs, coding_attr_safe_charsets, Qnil); | 2952 | ASET (attrs, coding_attr_safe_charsets, Qnil); |
| 2973 | } | 2953 | } |
| 2974 | 2954 | ||
| @@ -3017,17 +2997,17 @@ setup_iso_safe_charsets (Lisp_Object attrs) | |||
| 3017 | 2997 | ||
| 3018 | 2998 | ||
| 3019 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 2999 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
| 3020 | Check if a text is encoded in one of ISO-2022 based coding systems. | 3000 | Return true if a text is encoded in one of ISO-2022 based coding |
| 3021 | If it is, return 1, else return 0. */ | 3001 | systems. */ |
| 3022 | 3002 | ||
| 3023 | static int | 3003 | static bool |
| 3024 | detect_coding_iso_2022 (struct coding_system *coding, | 3004 | detect_coding_iso_2022 (struct coding_system *coding, |
| 3025 | struct coding_detection_info *detect_info) | 3005 | struct coding_detection_info *detect_info) |
| 3026 | { | 3006 | { |
| 3027 | const unsigned char *src = coding->source, *src_base = src; | 3007 | const unsigned char *src = coding->source, *src_base = src; |
| 3028 | const unsigned char *src_end = coding->source + coding->src_bytes; | 3008 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 3029 | int multibytep = coding->src_multibyte; | 3009 | bool multibytep = coding->src_multibyte; |
| 3030 | int single_shifting = 0; | 3010 | bool single_shifting = 0; |
| 3031 | int id; | 3011 | int id; |
| 3032 | int c, c1; | 3012 | int c, c1; |
| 3033 | ptrdiff_t consumed_chars = 0; | 3013 | ptrdiff_t consumed_chars = 0; |
| @@ -3187,20 +3167,7 @@ detect_coding_iso_2022 (struct coding_system *coding, | |||
| 3187 | } | 3167 | } |
| 3188 | if (single_shifting) | 3168 | if (single_shifting) |
| 3189 | break; | 3169 | break; |
| 3190 | check_extra_latin: | 3170 | goto check_extra_latin; |
| 3191 | if (! VECTORP (Vlatin_extra_code_table) | ||
| 3192 | || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) | ||
| 3193 | { | ||
| 3194 | rejected = CATEGORY_MASK_ISO; | ||
| 3195 | break; | ||
| 3196 | } | ||
| 3197 | if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1]) | ||
| 3198 | & CODING_ISO_FLAG_LATIN_EXTRA) | ||
| 3199 | found |= CATEGORY_MASK_ISO_8_1; | ||
| 3200 | else | ||
| 3201 | rejected |= CATEGORY_MASK_ISO_8_1; | ||
| 3202 | rejected |= CATEGORY_MASK_ISO_8_2; | ||
| 3203 | break; | ||
| 3204 | 3171 | ||
| 3205 | default: | 3172 | default: |
| 3206 | if (c < 0) | 3173 | if (c < 0) |
| @@ -3251,6 +3218,20 @@ detect_coding_iso_2022 (struct coding_system *coding, | |||
| 3251 | } | 3218 | } |
| 3252 | break; | 3219 | break; |
| 3253 | } | 3220 | } |
| 3221 | check_extra_latin: | ||
| 3222 | if (! VECTORP (Vlatin_extra_code_table) | ||
| 3223 | || NILP (AREF (Vlatin_extra_code_table, c))) | ||
| 3224 | { | ||
| 3225 | rejected = CATEGORY_MASK_ISO; | ||
| 3226 | break; | ||
| 3227 | } | ||
| 3228 | if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1]) | ||
| 3229 | & CODING_ISO_FLAG_LATIN_EXTRA) | ||
| 3230 | found |= CATEGORY_MASK_ISO_8_1; | ||
| 3231 | else | ||
| 3232 | rejected |= CATEGORY_MASK_ISO_8_1; | ||
| 3233 | rejected |= CATEGORY_MASK_ISO_8_2; | ||
| 3234 | break; | ||
| 3254 | } | 3235 | } |
| 3255 | } | 3236 | } |
| 3256 | detect_info->rejected |= CATEGORY_MASK_ISO; | 3237 | detect_info->rejected |= CATEGORY_MASK_ISO; |
| @@ -3390,8 +3371,6 @@ detect_coding_iso_2022 (struct coding_system *coding, | |||
| 3390 | 3371 | ||
| 3391 | /* Finish the current composition as invalid. */ | 3372 | /* Finish the current composition as invalid. */ |
| 3392 | 3373 | ||
| 3393 | static int finish_composition (int *, struct composition_status *); | ||
| 3394 | |||
| 3395 | static int | 3374 | static int |
| 3396 | finish_composition (int *charbuf, struct composition_status *cmp_status) | 3375 | finish_composition (int *charbuf, struct composition_status *cmp_status) |
| 3397 | { | 3376 | { |
| @@ -3541,7 +3520,7 @@ decode_coding_iso_2022 (struct coding_system *coding) | |||
| 3541 | int *charbuf_end | 3520 | int *charbuf_end |
| 3542 | = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3); | 3521 | = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3); |
| 3543 | ptrdiff_t consumed_chars = 0, consumed_chars_base; | 3522 | ptrdiff_t consumed_chars = 0, consumed_chars_base; |
| 3544 | int multibytep = coding->src_multibyte; | 3523 | bool multibytep = coding->src_multibyte; |
| 3545 | /* Charsets invoked to graphic plane 0 and 1 respectively. */ | 3524 | /* Charsets invoked to graphic plane 0 and 1 respectively. */ |
| 3546 | int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0); | 3525 | int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0); |
| 3547 | int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1); | 3526 | int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1); |
| @@ -3553,8 +3532,8 @@ decode_coding_iso_2022 (struct coding_system *coding) | |||
| 3553 | ptrdiff_t char_offset = coding->produced_char; | 3532 | ptrdiff_t char_offset = coding->produced_char; |
| 3554 | ptrdiff_t last_offset = char_offset; | 3533 | ptrdiff_t last_offset = char_offset; |
| 3555 | int last_id = charset_ascii; | 3534 | int last_id = charset_ascii; |
| 3556 | int eol_dos = | 3535 | bool eol_dos |
| 3557 | !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | 3536 | = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); |
| 3558 | int byte_after_cr = -1; | 3537 | int byte_after_cr = -1; |
| 3559 | int i; | 3538 | int i; |
| 3560 | 3539 | ||
| @@ -3564,7 +3543,7 @@ decode_coding_iso_2022 (struct coding_system *coding) | |||
| 3564 | if (cmp_status->state != COMPOSING_NO) | 3543 | if (cmp_status->state != COMPOSING_NO) |
| 3565 | { | 3544 | { |
| 3566 | if (charbuf_end - charbuf < cmp_status->length) | 3545 | if (charbuf_end - charbuf < cmp_status->length) |
| 3567 | abort (); | 3546 | emacs_abort (); |
| 3568 | for (i = 0; i < cmp_status->length; i++) | 3547 | for (i = 0; i < cmp_status->length; i++) |
| 3569 | *charbuf++ = cmp_status->carryover[i]; | 3548 | *charbuf++ = cmp_status->carryover[i]; |
| 3570 | coding->annotated = 1; | 3549 | coding->annotated = 1; |
| @@ -3795,7 +3774,10 @@ decode_coding_iso_2022 (struct coding_system *coding) | |||
| 3795 | else | 3774 | else |
| 3796 | charset = CHARSET_FROM_ID (charset_id_2); | 3775 | charset = CHARSET_FROM_ID (charset_id_2); |
| 3797 | ONE_MORE_BYTE (c1); | 3776 | ONE_MORE_BYTE (c1); |
| 3798 | if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)) | 3777 | if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0) |
| 3778 | || (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) | ||
| 3779 | && ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LEVEL_4) | ||
| 3780 | ? c1 >= 0x80 : c1 < 0x80))) | ||
| 3799 | goto invalid_code; | 3781 | goto invalid_code; |
| 3800 | break; | 3782 | break; |
| 3801 | 3783 | ||
| @@ -3809,7 +3791,10 @@ decode_coding_iso_2022 (struct coding_system *coding) | |||
| 3809 | else | 3791 | else |
| 3810 | charset = CHARSET_FROM_ID (charset_id_3); | 3792 | charset = CHARSET_FROM_ID (charset_id_3); |
| 3811 | ONE_MORE_BYTE (c1); | 3793 | ONE_MORE_BYTE (c1); |
| 3812 | if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)) | 3794 | if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0) |
| 3795 | || (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) | ||
| 3796 | && ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LEVEL_4) | ||
| 3797 | ? c1 >= 0x80 : c1 < 0x80))) | ||
| 3813 | goto invalid_code; | 3798 | goto invalid_code; |
| 3814 | break; | 3799 | break; |
| 3815 | 3800 | ||
| @@ -3946,7 +3931,7 @@ decode_coding_iso_2022 (struct coding_system *coding) | |||
| 3946 | break; | 3931 | break; |
| 3947 | 3932 | ||
| 3948 | default: | 3933 | default: |
| 3949 | abort (); | 3934 | emacs_abort (); |
| 3950 | } | 3935 | } |
| 3951 | 3936 | ||
| 3952 | if (cmp_status->state == COMPOSING_NO | 3937 | if (cmp_status->state == COMPOSING_NO |
| @@ -4021,6 +4006,14 @@ decode_coding_iso_2022 (struct coding_system *coding) | |||
| 4021 | *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); | 4006 | *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); |
| 4022 | char_offset++; | 4007 | char_offset++; |
| 4023 | coding->errors++; | 4008 | coding->errors++; |
| 4009 | /* Reset the invocation and designation status to the safest | ||
| 4010 | one; i.e. designate ASCII to the graphic register 0, and | ||
| 4011 | invoke that register to the graphic plane 0. This typically | ||
| 4012 | helps the case that an designation sequence for ASCII "ESC ( | ||
| 4013 | B" is somehow broken (e.g. broken by a newline). */ | ||
| 4014 | CODING_ISO_INVOCATION (coding, 0) = 0; | ||
| 4015 | CODING_ISO_DESIGNATION (coding, 0) = charset_ascii; | ||
| 4016 | charset_id_0 = charset_ascii; | ||
| 4024 | continue; | 4017 | continue; |
| 4025 | 4018 | ||
| 4026 | break_loop: | 4019 | break_loop: |
| @@ -4282,7 +4275,7 @@ encode_invocation_designation (struct charset *charset, | |||
| 4282 | struct coding_system *coding, | 4275 | struct coding_system *coding, |
| 4283 | unsigned char *dst, ptrdiff_t *p_nchars) | 4276 | unsigned char *dst, ptrdiff_t *p_nchars) |
| 4284 | { | 4277 | { |
| 4285 | int multibytep = coding->dst_multibyte; | 4278 | bool multibytep = coding->dst_multibyte; |
| 4286 | ptrdiff_t produced_chars = *p_nchars; | 4279 | ptrdiff_t produced_chars = *p_nchars; |
| 4287 | int reg; /* graphic register number */ | 4280 | int reg; /* graphic register number */ |
| 4288 | int id = CHARSET_ID (charset); | 4281 | int id = CHARSET_ID (charset); |
| @@ -4380,7 +4373,7 @@ encode_designation_at_bol (struct coding_system *coding, | |||
| 4380 | int r[4]; | 4373 | int r[4]; |
| 4381 | int c, found = 0, reg; | 4374 | int c, found = 0, reg; |
| 4382 | ptrdiff_t produced_chars = 0; | 4375 | ptrdiff_t produced_chars = 0; |
| 4383 | int multibytep = coding->dst_multibyte; | 4376 | bool multibytep = coding->dst_multibyte; |
| 4384 | Lisp_Object attrs; | 4377 | Lisp_Object attrs; |
| 4385 | Lisp_Object charset_list; | 4378 | Lisp_Object charset_list; |
| 4386 | 4379 | ||
| @@ -4422,21 +4415,21 @@ encode_designation_at_bol (struct coding_system *coding, | |||
| 4422 | 4415 | ||
| 4423 | /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */ | 4416 | /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */ |
| 4424 | 4417 | ||
| 4425 | static int | 4418 | static bool |
| 4426 | encode_coding_iso_2022 (struct coding_system *coding) | 4419 | encode_coding_iso_2022 (struct coding_system *coding) |
| 4427 | { | 4420 | { |
| 4428 | int multibytep = coding->dst_multibyte; | 4421 | bool multibytep = coding->dst_multibyte; |
| 4429 | int *charbuf = coding->charbuf; | 4422 | int *charbuf = coding->charbuf; |
| 4430 | int *charbuf_end = charbuf + coding->charbuf_used; | 4423 | int *charbuf_end = charbuf + coding->charbuf_used; |
| 4431 | unsigned char *dst = coding->destination + coding->produced; | 4424 | unsigned char *dst = coding->destination + coding->produced; |
| 4432 | unsigned char *dst_end = coding->destination + coding->dst_bytes; | 4425 | unsigned char *dst_end = coding->destination + coding->dst_bytes; |
| 4433 | int safe_room = 16; | 4426 | int safe_room = 16; |
| 4434 | int bol_designation | 4427 | bool bol_designation |
| 4435 | = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL | 4428 | = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL |
| 4436 | && CODING_ISO_BOL (coding)); | 4429 | && CODING_ISO_BOL (coding)); |
| 4437 | ptrdiff_t produced_chars = 0; | 4430 | ptrdiff_t produced_chars = 0; |
| 4438 | Lisp_Object attrs, eol_type, charset_list; | 4431 | Lisp_Object attrs, eol_type, charset_list; |
| 4439 | int ascii_compatible; | 4432 | bool ascii_compatible; |
| 4440 | int c; | 4433 | int c; |
| 4441 | int preferred_charset_id = -1; | 4434 | int preferred_charset_id = -1; |
| 4442 | 4435 | ||
| @@ -4501,7 +4494,7 @@ encode_coding_iso_2022 (struct coding_system *coding) | |||
| 4501 | preferred_charset_id = -1; | 4494 | preferred_charset_id = -1; |
| 4502 | break; | 4495 | break; |
| 4503 | default: | 4496 | default: |
| 4504 | abort (); | 4497 | emacs_abort (); |
| 4505 | } | 4498 | } |
| 4506 | charbuf += -c - 1; | 4499 | charbuf += -c - 1; |
| 4507 | continue; | 4500 | continue; |
| @@ -4523,8 +4516,9 @@ encode_coding_iso_2022 (struct coding_system *coding) | |||
| 4523 | CODING_ISO_DESIGNATION (coding, i) | 4516 | CODING_ISO_DESIGNATION (coding, i) |
| 4524 | = CODING_ISO_INITIAL (coding, i); | 4517 | = CODING_ISO_INITIAL (coding, i); |
| 4525 | } | 4518 | } |
| 4526 | bol_designation | 4519 | bol_designation = ((CODING_ISO_FLAGS (coding) |
| 4527 | = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL; | 4520 | & CODING_ISO_FLAG_DESIGNATE_AT_BOL) |
| 4521 | != 0); | ||
| 4528 | } | 4522 | } |
| 4529 | else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL) | 4523 | else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL) |
| 4530 | ENCODE_RESET_PLANE_AND_REGISTER (); | 4524 | ENCODE_RESET_PLANE_AND_REGISTER (); |
| @@ -4551,7 +4545,7 @@ encode_coding_iso_2022 (struct coding_system *coding) | |||
| 4551 | 4545 | ||
| 4552 | if (preferred_charset_id >= 0) | 4546 | if (preferred_charset_id >= 0) |
| 4553 | { | 4547 | { |
| 4554 | int result; | 4548 | bool result; |
| 4555 | 4549 | ||
| 4556 | charset = CHARSET_FROM_ID (preferred_charset_id); | 4550 | charset = CHARSET_FROM_ID (preferred_charset_id); |
| 4557 | CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result); | 4551 | CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result); |
| @@ -4631,16 +4625,15 @@ encode_coding_iso_2022 (struct coding_system *coding) | |||
| 4631 | */ | 4625 | */ |
| 4632 | 4626 | ||
| 4633 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 4627 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
| 4634 | Check if a text is encoded in SJIS. If it is, return | 4628 | Return true if a text is encoded in SJIS. */ |
| 4635 | CATEGORY_MASK_SJIS, else return 0. */ | ||
| 4636 | 4629 | ||
| 4637 | static int | 4630 | static bool |
| 4638 | detect_coding_sjis (struct coding_system *coding, | 4631 | detect_coding_sjis (struct coding_system *coding, |
| 4639 | struct coding_detection_info *detect_info) | 4632 | struct coding_detection_info *detect_info) |
| 4640 | { | 4633 | { |
| 4641 | const unsigned char *src = coding->source, *src_base; | 4634 | const unsigned char *src = coding->source, *src_base; |
| 4642 | const unsigned char *src_end = coding->source + coding->src_bytes; | 4635 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 4643 | int multibytep = coding->src_multibyte; | 4636 | bool multibytep = coding->src_multibyte; |
| 4644 | ptrdiff_t consumed_chars = 0; | 4637 | ptrdiff_t consumed_chars = 0; |
| 4645 | int found = 0; | 4638 | int found = 0; |
| 4646 | int c; | 4639 | int c; |
| @@ -4688,16 +4681,15 @@ detect_coding_sjis (struct coding_system *coding, | |||
| 4688 | } | 4681 | } |
| 4689 | 4682 | ||
| 4690 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 4683 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
| 4691 | Check if a text is encoded in BIG5. If it is, return | 4684 | Return true if a text is encoded in BIG5. */ |
| 4692 | CATEGORY_MASK_BIG5, else return 0. */ | ||
| 4693 | 4685 | ||
| 4694 | static int | 4686 | static bool |
| 4695 | detect_coding_big5 (struct coding_system *coding, | 4687 | detect_coding_big5 (struct coding_system *coding, |
| 4696 | struct coding_detection_info *detect_info) | 4688 | struct coding_detection_info *detect_info) |
| 4697 | { | 4689 | { |
| 4698 | const unsigned char *src = coding->source, *src_base; | 4690 | const unsigned char *src = coding->source, *src_base; |
| 4699 | const unsigned char *src_end = coding->source + coding->src_bytes; | 4691 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 4700 | int multibytep = coding->src_multibyte; | 4692 | bool multibytep = coding->src_multibyte; |
| 4701 | ptrdiff_t consumed_chars = 0; | 4693 | ptrdiff_t consumed_chars = 0; |
| 4702 | int found = 0; | 4694 | int found = 0; |
| 4703 | int c; | 4695 | int c; |
| @@ -4735,8 +4727,7 @@ detect_coding_big5 (struct coding_system *coding, | |||
| 4735 | return 1; | 4727 | return 1; |
| 4736 | } | 4728 | } |
| 4737 | 4729 | ||
| 4738 | /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". | 4730 | /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ |
| 4739 | If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */ | ||
| 4740 | 4731 | ||
| 4741 | static void | 4732 | static void |
| 4742 | decode_coding_sjis (struct coding_system *coding) | 4733 | decode_coding_sjis (struct coding_system *coding) |
| @@ -4750,15 +4741,15 @@ decode_coding_sjis (struct coding_system *coding) | |||
| 4750 | int *charbuf_end | 4741 | int *charbuf_end |
| 4751 | = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); | 4742 | = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); |
| 4752 | ptrdiff_t consumed_chars = 0, consumed_chars_base; | 4743 | ptrdiff_t consumed_chars = 0, consumed_chars_base; |
| 4753 | int multibytep = coding->src_multibyte; | 4744 | bool multibytep = coding->src_multibyte; |
| 4754 | struct charset *charset_roman, *charset_kanji, *charset_kana; | 4745 | struct charset *charset_roman, *charset_kanji, *charset_kana; |
| 4755 | struct charset *charset_kanji2; | 4746 | struct charset *charset_kanji2; |
| 4756 | Lisp_Object attrs, charset_list, val; | 4747 | Lisp_Object attrs, charset_list, val; |
| 4757 | ptrdiff_t char_offset = coding->produced_char; | 4748 | ptrdiff_t char_offset = coding->produced_char; |
| 4758 | ptrdiff_t last_offset = char_offset; | 4749 | ptrdiff_t last_offset = char_offset; |
| 4759 | int last_id = charset_ascii; | 4750 | int last_id = charset_ascii; |
| 4760 | int eol_dos = | 4751 | bool eol_dos |
| 4761 | !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | 4752 | = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); |
| 4762 | int byte_after_cr = -1; | 4753 | int byte_after_cr = -1; |
| 4763 | 4754 | ||
| 4764 | CODING_GET_INFO (coding, attrs, charset_list); | 4755 | CODING_GET_INFO (coding, attrs, charset_list); |
| @@ -4868,14 +4859,14 @@ decode_coding_big5 (struct coding_system *coding) | |||
| 4868 | int *charbuf_end | 4859 | int *charbuf_end |
| 4869 | = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); | 4860 | = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); |
| 4870 | ptrdiff_t consumed_chars = 0, consumed_chars_base; | 4861 | ptrdiff_t consumed_chars = 0, consumed_chars_base; |
| 4871 | int multibytep = coding->src_multibyte; | 4862 | bool multibytep = coding->src_multibyte; |
| 4872 | struct charset *charset_roman, *charset_big5; | 4863 | struct charset *charset_roman, *charset_big5; |
| 4873 | Lisp_Object attrs, charset_list, val; | 4864 | Lisp_Object attrs, charset_list, val; |
| 4874 | ptrdiff_t char_offset = coding->produced_char; | 4865 | ptrdiff_t char_offset = coding->produced_char; |
| 4875 | ptrdiff_t last_offset = char_offset; | 4866 | ptrdiff_t last_offset = char_offset; |
| 4876 | int last_id = charset_ascii; | 4867 | int last_id = charset_ascii; |
| 4877 | int eol_dos = | 4868 | bool eol_dos |
| 4878 | !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | 4869 | = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); |
| 4879 | int byte_after_cr = -1; | 4870 | int byte_after_cr = -1; |
| 4880 | 4871 | ||
| 4881 | CODING_GET_INFO (coding, attrs, charset_list); | 4872 | CODING_GET_INFO (coding, attrs, charset_list); |
| @@ -4957,13 +4948,12 @@ decode_coding_big5 (struct coding_system *coding) | |||
| 4957 | `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We | 4948 | `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We |
| 4958 | are sure that all these charsets are registered as official charset | 4949 | are sure that all these charsets are registered as official charset |
| 4959 | (i.e. do not have extended leading-codes). Characters of other | 4950 | (i.e. do not have extended leading-codes). Characters of other |
| 4960 | charsets are produced without any encoding. If SJIS_P is 1, encode | 4951 | charsets are produced without any encoding. */ |
| 4961 | SJIS text, else encode BIG5 text. */ | ||
| 4962 | 4952 | ||
| 4963 | static int | 4953 | static bool |
| 4964 | encode_coding_sjis (struct coding_system *coding) | 4954 | encode_coding_sjis (struct coding_system *coding) |
| 4965 | { | 4955 | { |
| 4966 | int multibytep = coding->dst_multibyte; | 4956 | bool multibytep = coding->dst_multibyte; |
| 4967 | int *charbuf = coding->charbuf; | 4957 | int *charbuf = coding->charbuf; |
| 4968 | int *charbuf_end = charbuf + coding->charbuf_used; | 4958 | int *charbuf_end = charbuf + coding->charbuf_used; |
| 4969 | unsigned char *dst = coding->destination + coding->produced; | 4959 | unsigned char *dst = coding->destination + coding->produced; |
| @@ -4971,7 +4961,7 @@ encode_coding_sjis (struct coding_system *coding) | |||
| 4971 | int safe_room = 4; | 4961 | int safe_room = 4; |
| 4972 | ptrdiff_t produced_chars = 0; | 4962 | ptrdiff_t produced_chars = 0; |
| 4973 | Lisp_Object attrs, charset_list, val; | 4963 | Lisp_Object attrs, charset_list, val; |
| 4974 | int ascii_compatible; | 4964 | bool ascii_compatible; |
| 4975 | struct charset *charset_kanji, *charset_kana; | 4965 | struct charset *charset_kanji, *charset_kana; |
| 4976 | struct charset *charset_kanji2; | 4966 | struct charset *charset_kanji2; |
| 4977 | int c; | 4967 | int c; |
| @@ -5018,7 +5008,7 @@ encode_coding_sjis (struct coding_system *coding) | |||
| 5018 | } | 5008 | } |
| 5019 | } | 5009 | } |
| 5020 | if (code == CHARSET_INVALID_CODE (charset)) | 5010 | if (code == CHARSET_INVALID_CODE (charset)) |
| 5021 | abort (); | 5011 | emacs_abort (); |
| 5022 | if (charset == charset_kanji) | 5012 | if (charset == charset_kanji) |
| 5023 | { | 5013 | { |
| 5024 | int c1, c2; | 5014 | int c1, c2; |
| @@ -5054,10 +5044,10 @@ encode_coding_sjis (struct coding_system *coding) | |||
| 5054 | return 0; | 5044 | return 0; |
| 5055 | } | 5045 | } |
| 5056 | 5046 | ||
| 5057 | static int | 5047 | static bool |
| 5058 | encode_coding_big5 (struct coding_system *coding) | 5048 | encode_coding_big5 (struct coding_system *coding) |
| 5059 | { | 5049 | { |
| 5060 | int multibytep = coding->dst_multibyte; | 5050 | bool multibytep = coding->dst_multibyte; |
| 5061 | int *charbuf = coding->charbuf; | 5051 | int *charbuf = coding->charbuf; |
| 5062 | int *charbuf_end = charbuf + coding->charbuf_used; | 5052 | int *charbuf_end = charbuf + coding->charbuf_used; |
| 5063 | unsigned char *dst = coding->destination + coding->produced; | 5053 | unsigned char *dst = coding->destination + coding->produced; |
| @@ -5065,7 +5055,7 @@ encode_coding_big5 (struct coding_system *coding) | |||
| 5065 | int safe_room = 4; | 5055 | int safe_room = 4; |
| 5066 | ptrdiff_t produced_chars = 0; | 5056 | ptrdiff_t produced_chars = 0; |
| 5067 | Lisp_Object attrs, charset_list, val; | 5057 | Lisp_Object attrs, charset_list, val; |
| 5068 | int ascii_compatible; | 5058 | bool ascii_compatible; |
| 5069 | struct charset *charset_big5; | 5059 | struct charset *charset_big5; |
| 5070 | int c; | 5060 | int c; |
| 5071 | 5061 | ||
| @@ -5108,7 +5098,7 @@ encode_coding_big5 (struct coding_system *coding) | |||
| 5108 | } | 5098 | } |
| 5109 | } | 5099 | } |
| 5110 | if (code == CHARSET_INVALID_CODE (charset)) | 5100 | if (code == CHARSET_INVALID_CODE (charset)) |
| 5111 | abort (); | 5101 | emacs_abort (); |
| 5112 | if (charset == charset_big5) | 5102 | if (charset == charset_big5) |
| 5113 | { | 5103 | { |
| 5114 | int c1, c2; | 5104 | int c1, c2; |
| @@ -5130,17 +5120,16 @@ encode_coding_big5 (struct coding_system *coding) | |||
| 5130 | /*** 10. CCL handlers ***/ | 5120 | /*** 10. CCL handlers ***/ |
| 5131 | 5121 | ||
| 5132 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 5122 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
| 5133 | Check if a text is encoded in a coding system of which | 5123 | Return true if a text is encoded in a coding system of which |
| 5134 | encoder/decoder are written in CCL program. If it is, return | 5124 | encoder/decoder are written in CCL program. */ |
| 5135 | CATEGORY_MASK_CCL, else return 0. */ | ||
| 5136 | 5125 | ||
| 5137 | static int | 5126 | static bool |
| 5138 | detect_coding_ccl (struct coding_system *coding, | 5127 | detect_coding_ccl (struct coding_system *coding, |
| 5139 | struct coding_detection_info *detect_info) | 5128 | struct coding_detection_info *detect_info) |
| 5140 | { | 5129 | { |
| 5141 | const unsigned char *src = coding->source, *src_base; | 5130 | const unsigned char *src = coding->source, *src_base; |
| 5142 | const unsigned char *src_end = coding->source + coding->src_bytes; | 5131 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 5143 | int multibytep = coding->src_multibyte; | 5132 | bool multibytep = coding->src_multibyte; |
| 5144 | ptrdiff_t consumed_chars = 0; | 5133 | ptrdiff_t consumed_chars = 0; |
| 5145 | int found = 0; | 5134 | int found = 0; |
| 5146 | unsigned char *valids; | 5135 | unsigned char *valids; |
| @@ -5182,7 +5171,7 @@ decode_coding_ccl (struct coding_system *coding) | |||
| 5182 | int *charbuf = coding->charbuf + coding->charbuf_used; | 5171 | int *charbuf = coding->charbuf + coding->charbuf_used; |
| 5183 | int *charbuf_end = coding->charbuf + coding->charbuf_size; | 5172 | int *charbuf_end = coding->charbuf + coding->charbuf_size; |
| 5184 | ptrdiff_t consumed_chars = 0; | 5173 | ptrdiff_t consumed_chars = 0; |
| 5185 | int multibytep = coding->src_multibyte; | 5174 | bool multibytep = coding->src_multibyte; |
| 5186 | struct ccl_program *ccl = &coding->spec.ccl->ccl; | 5175 | struct ccl_program *ccl = &coding->spec.ccl->ccl; |
| 5187 | int source_charbuf[1024]; | 5176 | int source_charbuf[1024]; |
| 5188 | int source_byteidx[1025]; | 5177 | int source_byteidx[1025]; |
| @@ -5193,6 +5182,7 @@ decode_coding_ccl (struct coding_system *coding) | |||
| 5193 | while (1) | 5182 | while (1) |
| 5194 | { | 5183 | { |
| 5195 | const unsigned char *p = src; | 5184 | const unsigned char *p = src; |
| 5185 | ptrdiff_t offset; | ||
| 5196 | int i = 0; | 5186 | int i = 0; |
| 5197 | 5187 | ||
| 5198 | if (multibytep) | 5188 | if (multibytep) |
| @@ -5210,8 +5200,17 @@ decode_coding_ccl (struct coding_system *coding) | |||
| 5210 | 5200 | ||
| 5211 | if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK) | 5201 | if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK) |
| 5212 | ccl->last_block = 1; | 5202 | ccl->last_block = 1; |
| 5203 | /* As ccl_driver calls DECODE_CHAR, buffer may be relocated. */ | ||
| 5204 | charset_map_loaded = 0; | ||
| 5213 | ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf, | 5205 | ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf, |
| 5214 | charset_list); | 5206 | charset_list); |
| 5207 | if (charset_map_loaded | ||
| 5208 | && (offset = coding_change_source (coding))) | ||
| 5209 | { | ||
| 5210 | p += offset; | ||
| 5211 | src += offset; | ||
| 5212 | src_end += offset; | ||
| 5213 | } | ||
| 5215 | charbuf += ccl->produced; | 5214 | charbuf += ccl->produced; |
| 5216 | if (multibytep) | 5215 | if (multibytep) |
| 5217 | src += source_byteidx[ccl->consumed]; | 5216 | src += source_byteidx[ccl->consumed]; |
| @@ -5243,11 +5242,11 @@ decode_coding_ccl (struct coding_system *coding) | |||
| 5243 | coding->charbuf_used = charbuf - coding->charbuf; | 5242 | coding->charbuf_used = charbuf - coding->charbuf; |
| 5244 | } | 5243 | } |
| 5245 | 5244 | ||
| 5246 | static int | 5245 | static bool |
| 5247 | encode_coding_ccl (struct coding_system *coding) | 5246 | encode_coding_ccl (struct coding_system *coding) |
| 5248 | { | 5247 | { |
| 5249 | struct ccl_program *ccl = &coding->spec.ccl->ccl; | 5248 | struct ccl_program *ccl = &coding->spec.ccl->ccl; |
| 5250 | int multibytep = coding->dst_multibyte; | 5249 | bool multibytep = coding->dst_multibyte; |
| 5251 | int *charbuf = coding->charbuf; | 5250 | int *charbuf = coding->charbuf; |
| 5252 | int *charbuf_end = charbuf + coding->charbuf_used; | 5251 | int *charbuf_end = charbuf + coding->charbuf_used; |
| 5253 | unsigned char *dst = coding->destination + coding->produced; | 5252 | unsigned char *dst = coding->destination + coding->produced; |
| @@ -5264,8 +5263,15 @@ encode_coding_ccl (struct coding_system *coding) | |||
| 5264 | 5263 | ||
| 5265 | do | 5264 | do |
| 5266 | { | 5265 | { |
| 5266 | ptrdiff_t offset; | ||
| 5267 | |||
| 5268 | /* As ccl_driver calls DECODE_CHAR, buffer may be relocated. */ | ||
| 5269 | charset_map_loaded = 0; | ||
| 5267 | ccl_driver (ccl, charbuf, destination_charbuf, | 5270 | ccl_driver (ccl, charbuf, destination_charbuf, |
| 5268 | charbuf_end - charbuf, 1024, charset_list); | 5271 | charbuf_end - charbuf, 1024, charset_list); |
| 5272 | if (charset_map_loaded | ||
| 5273 | && (offset = coding_change_destination (coding))) | ||
| 5274 | dst += offset; | ||
| 5269 | if (multibytep) | 5275 | if (multibytep) |
| 5270 | { | 5276 | { |
| 5271 | ASSURE_DESTINATION (ccl->produced * 2); | 5277 | ASSURE_DESTINATION (ccl->produced * 2); |
| @@ -5308,7 +5314,6 @@ encode_coding_ccl (struct coding_system *coding) | |||
| 5308 | return 0; | 5314 | return 0; |
| 5309 | } | 5315 | } |
| 5310 | 5316 | ||
| 5311 | |||
| 5312 | 5317 | ||
| 5313 | /*** 10, 11. no-conversion handlers ***/ | 5318 | /*** 10, 11. no-conversion handlers ***/ |
| 5314 | 5319 | ||
| @@ -5317,8 +5322,8 @@ encode_coding_ccl (struct coding_system *coding) | |||
| 5317 | static void | 5322 | static void |
| 5318 | decode_coding_raw_text (struct coding_system *coding) | 5323 | decode_coding_raw_text (struct coding_system *coding) |
| 5319 | { | 5324 | { |
| 5320 | int eol_dos = | 5325 | bool eol_dos |
| 5321 | !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | 5326 | = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); |
| 5322 | 5327 | ||
| 5323 | coding->chars_at_source = 1; | 5328 | coding->chars_at_source = 1; |
| 5324 | coding->consumed_char = coding->src_chars; | 5329 | coding->consumed_char = coding->src_chars; |
| @@ -5333,10 +5338,10 @@ decode_coding_raw_text (struct coding_system *coding) | |||
| 5333 | record_conversion_result (coding, CODING_RESULT_SUCCESS); | 5338 | record_conversion_result (coding, CODING_RESULT_SUCCESS); |
| 5334 | } | 5339 | } |
| 5335 | 5340 | ||
| 5336 | static int | 5341 | static bool |
| 5337 | encode_coding_raw_text (struct coding_system *coding) | 5342 | encode_coding_raw_text (struct coding_system *coding) |
| 5338 | { | 5343 | { |
| 5339 | int multibytep = coding->dst_multibyte; | 5344 | bool multibytep = coding->dst_multibyte; |
| 5340 | int *charbuf = coding->charbuf; | 5345 | int *charbuf = coding->charbuf; |
| 5341 | int *charbuf_end = coding->charbuf + coding->charbuf_used; | 5346 | int *charbuf_end = coding->charbuf + coding->charbuf_used; |
| 5342 | unsigned char *dst = coding->destination + coding->produced; | 5347 | unsigned char *dst = coding->destination + coding->produced; |
| @@ -5414,21 +5419,20 @@ encode_coding_raw_text (struct coding_system *coding) | |||
| 5414 | } | 5419 | } |
| 5415 | 5420 | ||
| 5416 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 5421 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
| 5417 | Check if a text is encoded in a charset-based coding system. If it | 5422 | Return true if a text is encoded in a charset-based coding system. */ |
| 5418 | is, return 1, else return 0. */ | ||
| 5419 | 5423 | ||
| 5420 | static int | 5424 | static bool |
| 5421 | detect_coding_charset (struct coding_system *coding, | 5425 | detect_coding_charset (struct coding_system *coding, |
| 5422 | struct coding_detection_info *detect_info) | 5426 | struct coding_detection_info *detect_info) |
| 5423 | { | 5427 | { |
| 5424 | const unsigned char *src = coding->source, *src_base; | 5428 | const unsigned char *src = coding->source, *src_base; |
| 5425 | const unsigned char *src_end = coding->source + coding->src_bytes; | 5429 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 5426 | int multibytep = coding->src_multibyte; | 5430 | bool multibytep = coding->src_multibyte; |
| 5427 | ptrdiff_t consumed_chars = 0; | 5431 | ptrdiff_t consumed_chars = 0; |
| 5428 | Lisp_Object attrs, valids, name; | 5432 | Lisp_Object attrs, valids, name; |
| 5429 | int found = 0; | 5433 | int found = 0; |
| 5430 | ptrdiff_t head_ascii = coding->head_ascii; | 5434 | ptrdiff_t head_ascii = coding->head_ascii; |
| 5431 | int check_latin_extra = 0; | 5435 | bool check_latin_extra = 0; |
| 5432 | 5436 | ||
| 5433 | detect_info->checked |= CATEGORY_MASK_CHARSET; | 5437 | detect_info->checked |= CATEGORY_MASK_CHARSET; |
| 5434 | 5438 | ||
| @@ -5464,7 +5468,7 @@ detect_coding_charset (struct coding_system *coding, | |||
| 5464 | if (c < 0xA0 | 5468 | if (c < 0xA0 |
| 5465 | && check_latin_extra | 5469 | && check_latin_extra |
| 5466 | && (!VECTORP (Vlatin_extra_code_table) | 5470 | && (!VECTORP (Vlatin_extra_code_table) |
| 5467 | || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))) | 5471 | || NILP (AREF (Vlatin_extra_code_table, c)))) |
| 5468 | break; | 5472 | break; |
| 5469 | found = CATEGORY_MASK_CHARSET; | 5473 | found = CATEGORY_MASK_CHARSET; |
| 5470 | } | 5474 | } |
| @@ -5532,14 +5536,14 @@ decode_coding_charset (struct coding_system *coding) | |||
| 5532 | int *charbuf_end | 5536 | int *charbuf_end |
| 5533 | = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); | 5537 | = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2); |
| 5534 | ptrdiff_t consumed_chars = 0, consumed_chars_base; | 5538 | ptrdiff_t consumed_chars = 0, consumed_chars_base; |
| 5535 | int multibytep = coding->src_multibyte; | 5539 | bool multibytep = coding->src_multibyte; |
| 5536 | Lisp_Object attrs = CODING_ID_ATTRS (coding->id); | 5540 | Lisp_Object attrs = CODING_ID_ATTRS (coding->id); |
| 5537 | Lisp_Object valids; | 5541 | Lisp_Object valids; |
| 5538 | ptrdiff_t char_offset = coding->produced_char; | 5542 | ptrdiff_t char_offset = coding->produced_char; |
| 5539 | ptrdiff_t last_offset = char_offset; | 5543 | ptrdiff_t last_offset = char_offset; |
| 5540 | int last_id = charset_ascii; | 5544 | int last_id = charset_ascii; |
| 5541 | int eol_dos = | 5545 | bool eol_dos |
| 5542 | !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | 5546 | = !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); |
| 5543 | int byte_after_cr = -1; | 5547 | int byte_after_cr = -1; |
| 5544 | 5548 | ||
| 5545 | valids = AREF (attrs, coding_attr_charset_valids); | 5549 | valids = AREF (attrs, coding_attr_charset_valids); |
| @@ -5648,10 +5652,10 @@ decode_coding_charset (struct coding_system *coding) | |||
| 5648 | coding->charbuf_used = charbuf - coding->charbuf; | 5652 | coding->charbuf_used = charbuf - coding->charbuf; |
| 5649 | } | 5653 | } |
| 5650 | 5654 | ||
| 5651 | static int | 5655 | static bool |
| 5652 | encode_coding_charset (struct coding_system *coding) | 5656 | encode_coding_charset (struct coding_system *coding) |
| 5653 | { | 5657 | { |
| 5654 | int multibytep = coding->dst_multibyte; | 5658 | bool multibytep = coding->dst_multibyte; |
| 5655 | int *charbuf = coding->charbuf; | 5659 | int *charbuf = coding->charbuf; |
| 5656 | int *charbuf_end = charbuf + coding->charbuf_used; | 5660 | int *charbuf_end = charbuf + coding->charbuf_used; |
| 5657 | unsigned char *dst = coding->destination + coding->produced; | 5661 | unsigned char *dst = coding->destination + coding->produced; |
| @@ -5659,7 +5663,7 @@ encode_coding_charset (struct coding_system *coding) | |||
| 5659 | int safe_room = MAX_MULTIBYTE_LENGTH; | 5663 | int safe_room = MAX_MULTIBYTE_LENGTH; |
| 5660 | ptrdiff_t produced_chars = 0; | 5664 | ptrdiff_t produced_chars = 0; |
| 5661 | Lisp_Object attrs, charset_list; | 5665 | Lisp_Object attrs, charset_list; |
| 5662 | int ascii_compatible; | 5666 | bool ascii_compatible; |
| 5663 | int c; | 5667 | int c; |
| 5664 | 5668 | ||
| 5665 | CODING_GET_INFO (coding, attrs, charset_list); | 5669 | CODING_GET_INFO (coding, attrs, charset_list); |
| @@ -5737,7 +5741,6 @@ setup_coding_system (Lisp_Object coding_system, struct coding_system *coding) | |||
| 5737 | eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id); | 5741 | eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id); |
| 5738 | 5742 | ||
| 5739 | coding->mode = 0; | 5743 | coding->mode = 0; |
| 5740 | coding->head_ascii = -1; | ||
| 5741 | if (VECTORP (eol_type)) | 5744 | if (VECTORP (eol_type)) |
| 5742 | coding->common_flags = (CODING_REQUIRE_DECODING_MASK | 5745 | coding->common_flags = (CODING_REQUIRE_DECODING_MASK |
| 5743 | | CODING_REQUIRE_DETECTION_MASK); | 5746 | | CODING_REQUIRE_DETECTION_MASK); |
| @@ -5766,6 +5769,14 @@ setup_coding_system (Lisp_Object coding_system, struct coding_system *coding) | |||
| 5766 | coding->decoder = decode_coding_raw_text; | 5769 | coding->decoder = decode_coding_raw_text; |
| 5767 | coding->encoder = encode_coding_raw_text; | 5770 | coding->encoder = encode_coding_raw_text; |
| 5768 | coding->common_flags |= CODING_REQUIRE_DETECTION_MASK; | 5771 | coding->common_flags |= CODING_REQUIRE_DETECTION_MASK; |
| 5772 | coding->spec.undecided.inhibit_nbd | ||
| 5773 | = (encode_inhibit_flag | ||
| 5774 | (AREF (attrs, coding_attr_undecided_inhibit_null_byte_detection))); | ||
| 5775 | coding->spec.undecided.inhibit_ied | ||
| 5776 | = (encode_inhibit_flag | ||
| 5777 | (AREF (attrs, coding_attr_undecided_inhibit_iso_escape_detection))); | ||
| 5778 | coding->spec.undecided.prefer_utf_8 | ||
| 5779 | = ! NILP (AREF (attrs, coding_attr_undecided_prefer_utf_8)); | ||
| 5769 | } | 5780 | } |
| 5770 | else if (EQ (coding_type, Qiso_2022)) | 5781 | else if (EQ (coding_type, Qiso_2022)) |
| 5771 | { | 5782 | { |
| @@ -5865,7 +5876,6 @@ setup_coding_system (Lisp_Object coding_system, struct coding_system *coding) | |||
| 5865 | coding->encoder = encode_coding_emacs_mule; | 5876 | coding->encoder = encode_coding_emacs_mule; |
| 5866 | coding->common_flags | 5877 | coding->common_flags |
| 5867 | |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK); | 5878 | |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK); |
| 5868 | coding->spec.emacs_mule.full_support = 1; | ||
| 5869 | if (! NILP (AREF (attrs, coding_attr_emacs_mule_full)) | 5879 | if (! NILP (AREF (attrs, coding_attr_emacs_mule_full)) |
| 5870 | && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list)) | 5880 | && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list)) |
| 5871 | { | 5881 | { |
| @@ -5883,7 +5893,6 @@ setup_coding_system (Lisp_Object coding_system, struct coding_system *coding) | |||
| 5883 | SSET (safe_charsets, XFASTINT (XCAR (tail)), 0); | 5893 | SSET (safe_charsets, XFASTINT (XCAR (tail)), 0); |
| 5884 | coding->max_charset_id = max_charset_id; | 5894 | coding->max_charset_id = max_charset_id; |
| 5885 | coding->safe_charsets = SDATA (safe_charsets); | 5895 | coding->safe_charsets = SDATA (safe_charsets); |
| 5886 | coding->spec.emacs_mule.full_support = 1; | ||
| 5887 | } | 5896 | } |
| 5888 | coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO; | 5897 | coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO; |
| 5889 | coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO; | 5898 | coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO; |
| @@ -6191,10 +6200,181 @@ complement_process_encoding_system (Lisp_Object coding_system) | |||
| 6191 | 6200 | ||
| 6192 | */ | 6201 | */ |
| 6193 | 6202 | ||
| 6194 | #define EOL_SEEN_NONE 0 | 6203 | static Lisp_Object adjust_coding_eol_type (struct coding_system *coding, |
| 6195 | #define EOL_SEEN_LF 1 | 6204 | int eol_seen); |
| 6196 | #define EOL_SEEN_CR 2 | 6205 | |
| 6197 | #define EOL_SEEN_CRLF 4 | 6206 | |
| 6207 | /* Return the number of ASCII characters at the head of the source. | ||
| 6208 | By side effects, set coding->head_ascii and update | ||
| 6209 | coding->eol_seen. The value of coding->eol_seen is "logical or" of | ||
| 6210 | EOL_SEEN_LF, EOL_SEEN_CR, and EOL_SEEN_CRLF, but the value is | ||
| 6211 | reliable only when all the source bytes are ASCII. */ | ||
| 6212 | |||
| 6213 | static int | ||
| 6214 | check_ascii (struct coding_system *coding) | ||
| 6215 | { | ||
| 6216 | const unsigned char *src, *end; | ||
| 6217 | Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id); | ||
| 6218 | int eol_seen = coding->eol_seen; | ||
| 6219 | |||
| 6220 | coding_set_source (coding); | ||
| 6221 | src = coding->source; | ||
| 6222 | end = src + coding->src_bytes; | ||
| 6223 | |||
| 6224 | if (inhibit_eol_conversion | ||
| 6225 | || SYMBOLP (eol_type)) | ||
| 6226 | { | ||
| 6227 | /* We don't have to check EOL format. */ | ||
| 6228 | while (src < end && !( *src & 0x80)) | ||
| 6229 | { | ||
| 6230 | if (*src++ == '\n') | ||
| 6231 | eol_seen |= EOL_SEEN_LF; | ||
| 6232 | } | ||
| 6233 | } | ||
| 6234 | else | ||
| 6235 | { | ||
| 6236 | end--; /* We look ahead one byte for "CR LF". */ | ||
| 6237 | while (src < end) | ||
| 6238 | { | ||
| 6239 | int c = *src; | ||
| 6240 | |||
| 6241 | if (c & 0x80) | ||
| 6242 | break; | ||
| 6243 | src++; | ||
| 6244 | if (c == '\r') | ||
| 6245 | { | ||
| 6246 | if (*src == '\n') | ||
| 6247 | { | ||
| 6248 | eol_seen |= EOL_SEEN_CRLF; | ||
| 6249 | src++; | ||
| 6250 | } | ||
| 6251 | else | ||
| 6252 | eol_seen |= EOL_SEEN_CR; | ||
| 6253 | } | ||
| 6254 | else if (c == '\n') | ||
| 6255 | eol_seen |= EOL_SEEN_LF; | ||
| 6256 | } | ||
| 6257 | if (src == end) | ||
| 6258 | { | ||
| 6259 | int c = *src; | ||
| 6260 | |||
| 6261 | /* All bytes but the last one C are ASCII. */ | ||
| 6262 | if (! (c & 0x80)) | ||
| 6263 | { | ||
| 6264 | if (c == '\r') | ||
| 6265 | eol_seen |= EOL_SEEN_CR; | ||
| 6266 | else if (c == '\n') | ||
| 6267 | eol_seen |= EOL_SEEN_LF; | ||
| 6268 | src++; | ||
| 6269 | } | ||
| 6270 | } | ||
| 6271 | } | ||
| 6272 | coding->head_ascii = src - coding->source; | ||
| 6273 | coding->eol_seen = eol_seen; | ||
| 6274 | return (coding->head_ascii); | ||
| 6275 | } | ||
| 6276 | |||
| 6277 | |||
| 6278 | /* Return the number of characters at the source if all the bytes are | ||
| 6279 | valid UTF-8 (of Unicode range). Otherwise, return -1. By side | ||
| 6280 | effects, update coding->eol_seen. The value of coding->eol_seen is | ||
| 6281 | "logical or" of EOL_SEEN_LF, EOL_SEEN_CR, and EOL_SEEN_CRLF, but | ||
| 6282 | the value is reliable only when all the source bytes are valid | ||
| 6283 | UTF-8. */ | ||
| 6284 | |||
| 6285 | static int | ||
| 6286 | check_utf_8 (struct coding_system *coding) | ||
| 6287 | { | ||
| 6288 | const unsigned char *src, *end; | ||
| 6289 | int eol_seen; | ||
| 6290 | int nchars = coding->head_ascii; | ||
| 6291 | |||
| 6292 | if (coding->head_ascii < 0) | ||
| 6293 | check_ascii (coding); | ||
| 6294 | else | ||
| 6295 | coding_set_source (coding); | ||
| 6296 | src = coding->source + coding->head_ascii; | ||
| 6297 | /* We look ahead one byte for CR LF. */ | ||
| 6298 | end = coding->source + coding->src_bytes - 1; | ||
| 6299 | eol_seen = coding->eol_seen; | ||
| 6300 | while (src < end) | ||
| 6301 | { | ||
| 6302 | int c = *src; | ||
| 6303 | |||
| 6304 | if (UTF_8_1_OCTET_P (*src)) | ||
| 6305 | { | ||
| 6306 | src++; | ||
| 6307 | if (c < 0x20) | ||
| 6308 | { | ||
| 6309 | if (c == '\r') | ||
| 6310 | { | ||
| 6311 | if (*src == '\n') | ||
| 6312 | { | ||
| 6313 | eol_seen |= EOL_SEEN_CRLF; | ||
| 6314 | src++; | ||
| 6315 | nchars++; | ||
| 6316 | } | ||
| 6317 | else | ||
| 6318 | eol_seen |= EOL_SEEN_CR; | ||
| 6319 | } | ||
| 6320 | else if (c == '\n') | ||
| 6321 | eol_seen |= EOL_SEEN_LF; | ||
| 6322 | } | ||
| 6323 | } | ||
| 6324 | else if (UTF_8_2_OCTET_LEADING_P (c)) | ||
| 6325 | { | ||
| 6326 | if (c < 0xC2 /* overlong sequence */ | ||
| 6327 | || src + 1 >= end | ||
| 6328 | || ! UTF_8_EXTRA_OCTET_P (src[1])) | ||
| 6329 | return -1; | ||
| 6330 | src += 2; | ||
| 6331 | } | ||
| 6332 | else if (UTF_8_3_OCTET_LEADING_P (c)) | ||
| 6333 | { | ||
| 6334 | if (src + 2 >= end | ||
| 6335 | || ! (UTF_8_EXTRA_OCTET_P (src[1]) | ||
| 6336 | && UTF_8_EXTRA_OCTET_P (src[2]))) | ||
| 6337 | return -1; | ||
| 6338 | c = (((c & 0xF) << 12) | ||
| 6339 | | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F)); | ||
| 6340 | if (c < 0x800 /* overlong sequence */ | ||
| 6341 | || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */ | ||
| 6342 | return -1; | ||
| 6343 | src += 3; | ||
| 6344 | } | ||
| 6345 | else if (UTF_8_4_OCTET_LEADING_P (c)) | ||
| 6346 | { | ||
| 6347 | if (src + 3 >= end | ||
| 6348 | || ! (UTF_8_EXTRA_OCTET_P (src[1]) | ||
| 6349 | && UTF_8_EXTRA_OCTET_P (src[2]) | ||
| 6350 | && UTF_8_EXTRA_OCTET_P (src[3]))) | ||
| 6351 | return -1; | ||
| 6352 | c = (((c & 0x7) << 18) | ((src[1] & 0x3F) << 12) | ||
| 6353 | | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F)); | ||
| 6354 | if (c < 0x10000 /* overlong sequence */ | ||
| 6355 | || c >= 0x110000) /* non-Unicode character */ | ||
| 6356 | return -1; | ||
| 6357 | src += 4; | ||
| 6358 | } | ||
| 6359 | else | ||
| 6360 | return -1; | ||
| 6361 | nchars++; | ||
| 6362 | } | ||
| 6363 | |||
| 6364 | if (src == end) | ||
| 6365 | { | ||
| 6366 | if (! UTF_8_1_OCTET_P (*src)) | ||
| 6367 | return -1; | ||
| 6368 | nchars++; | ||
| 6369 | if (*src == '\r') | ||
| 6370 | eol_seen |= EOL_SEEN_CR; | ||
| 6371 | else if (*src == '\n') | ||
| 6372 | eol_seen |= EOL_SEEN_LF; | ||
| 6373 | } | ||
| 6374 | coding->eol_seen = eol_seen; | ||
| 6375 | return nchars; | ||
| 6376 | } | ||
| 6377 | |||
| 6198 | 6378 | ||
| 6199 | /* Detect how end-of-line of a text of length SRC_BYTES pointed by | 6379 | /* Detect how end-of-line of a text of length SRC_BYTES pointed by |
| 6200 | SOURCE is encoded. If CATEGORY is one of | 6380 | SOURCE is encoded. If CATEGORY is one of |
| @@ -6216,11 +6396,9 @@ detect_eol (const unsigned char *source, ptrdiff_t src_bytes, | |||
| 6216 | 6396 | ||
| 6217 | if ((1 << category) & CATEGORY_MASK_UTF_16) | 6397 | if ((1 << category) & CATEGORY_MASK_UTF_16) |
| 6218 | { | 6398 | { |
| 6219 | int msb, lsb; | 6399 | bool msb = category == (coding_category_utf_16_le |
| 6220 | 6400 | | coding_category_utf_16_le_nosig); | |
| 6221 | msb = category == (coding_category_utf_16_le | 6401 | bool lsb = !msb; |
| 6222 | | coding_category_utf_16_le_nosig); | ||
| 6223 | lsb = 1 - msb; | ||
| 6224 | 6402 | ||
| 6225 | while (src + 1 < src_end) | 6403 | while (src + 1 < src_end) |
| 6226 | { | 6404 | { |
| @@ -6309,6 +6487,9 @@ adjust_coding_eol_type (struct coding_system *coding, int eol_seen) | |||
| 6309 | Lisp_Object eol_type; | 6487 | Lisp_Object eol_type; |
| 6310 | 6488 | ||
| 6311 | eol_type = CODING_ID_EOL_TYPE (coding->id); | 6489 | eol_type = CODING_ID_EOL_TYPE (coding->id); |
| 6490 | if (! VECTORP (eol_type)) | ||
| 6491 | /* Already adjusted. */ | ||
| 6492 | return eol_type; | ||
| 6312 | if (eol_seen & EOL_SEEN_LF) | 6493 | if (eol_seen & EOL_SEEN_LF) |
| 6313 | { | 6494 | { |
| 6314 | coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0)); | 6495 | coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0)); |
| @@ -6335,23 +6516,31 @@ static void | |||
| 6335 | detect_coding (struct coding_system *coding) | 6516 | detect_coding (struct coding_system *coding) |
| 6336 | { | 6517 | { |
| 6337 | const unsigned char *src, *src_end; | 6518 | const unsigned char *src, *src_end; |
| 6338 | int saved_mode = coding->mode; | 6519 | unsigned int saved_mode = coding->mode; |
| 6520 | Lisp_Object found = Qnil; | ||
| 6521 | Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id); | ||
| 6339 | 6522 | ||
| 6340 | coding->consumed = coding->consumed_char = 0; | 6523 | coding->consumed = coding->consumed_char = 0; |
| 6341 | coding->produced = coding->produced_char = 0; | 6524 | coding->produced = coding->produced_char = 0; |
| 6342 | coding_set_source (coding); | 6525 | coding_set_source (coding); |
| 6343 | 6526 | ||
| 6344 | src_end = coding->source + coding->src_bytes; | 6527 | src_end = coding->source + coding->src_bytes; |
| 6345 | coding->head_ascii = 0; | ||
| 6346 | 6528 | ||
| 6529 | coding->eol_seen = EOL_SEEN_NONE; | ||
| 6347 | /* If we have not yet decided the text encoding type, detect it | 6530 | /* If we have not yet decided the text encoding type, detect it |
| 6348 | now. */ | 6531 | now. */ |
| 6349 | if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided)) | 6532 | if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided)) |
| 6350 | { | 6533 | { |
| 6351 | int c, i; | 6534 | int c, i; |
| 6352 | struct coding_detection_info detect_info; | 6535 | struct coding_detection_info detect_info; |
| 6353 | int null_byte_found = 0, eight_bit_found = 0; | 6536 | bool null_byte_found = 0, eight_bit_found = 0; |
| 6537 | bool inhibit_nbd = inhibit_flag (coding->spec.undecided.inhibit_nbd, | ||
| 6538 | inhibit_null_byte_detection); | ||
| 6539 | bool inhibit_ied = inhibit_flag (coding->spec.undecided.inhibit_ied, | ||
| 6540 | inhibit_iso_escape_detection); | ||
| 6541 | bool prefer_utf_8 = coding->spec.undecided.prefer_utf_8; | ||
| 6354 | 6542 | ||
| 6543 | coding->head_ascii = 0; | ||
| 6355 | detect_info.checked = detect_info.found = detect_info.rejected = 0; | 6544 | detect_info.checked = detect_info.found = detect_info.rejected = 0; |
| 6356 | for (src = coding->source; src < src_end; src++) | 6545 | for (src = coding->source; src < src_end; src++) |
| 6357 | { | 6546 | { |
| @@ -6365,7 +6554,7 @@ detect_coding (struct coding_system *coding) | |||
| 6365 | else if (c < 0x20) | 6554 | else if (c < 0x20) |
| 6366 | { | 6555 | { |
| 6367 | if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) | 6556 | if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) |
| 6368 | && ! inhibit_iso_escape_detection | 6557 | && ! inhibit_ied |
| 6369 | && ! detect_info.checked) | 6558 | && ! detect_info.checked) |
| 6370 | { | 6559 | { |
| 6371 | if (detect_coding_iso_2022 (coding, &detect_info)) | 6560 | if (detect_coding_iso_2022 (coding, &detect_info)) |
| @@ -6384,12 +6573,33 @@ detect_coding (struct coding_system *coding) | |||
| 6384 | break; | 6573 | break; |
| 6385 | } | 6574 | } |
| 6386 | } | 6575 | } |
| 6387 | else if (! c && !inhibit_null_byte_detection) | 6576 | else if (! c && !inhibit_nbd) |
| 6388 | { | 6577 | { |
| 6389 | null_byte_found = 1; | 6578 | null_byte_found = 1; |
| 6390 | if (eight_bit_found) | 6579 | if (eight_bit_found) |
| 6391 | break; | 6580 | break; |
| 6392 | } | 6581 | } |
| 6582 | else if (! disable_ascii_optimization | ||
| 6583 | && ! inhibit_eol_conversion) | ||
| 6584 | { | ||
| 6585 | if (c == '\r') | ||
| 6586 | { | ||
| 6587 | if (src < src_end && src[1] == '\n') | ||
| 6588 | { | ||
| 6589 | coding->eol_seen |= EOL_SEEN_CRLF; | ||
| 6590 | src++; | ||
| 6591 | if (! eight_bit_found) | ||
| 6592 | coding->head_ascii++; | ||
| 6593 | } | ||
| 6594 | else | ||
| 6595 | coding->eol_seen |= EOL_SEEN_CR; | ||
| 6596 | } | ||
| 6597 | else if (c == '\n') | ||
| 6598 | { | ||
| 6599 | coding->eol_seen |= EOL_SEEN_LF; | ||
| 6600 | } | ||
| 6601 | } | ||
| 6602 | |||
| 6393 | if (! eight_bit_found) | 6603 | if (! eight_bit_found) |
| 6394 | coding->head_ascii++; | 6604 | coding->head_ascii++; |
| 6395 | } | 6605 | } |
| @@ -6420,10 +6630,19 @@ detect_coding (struct coding_system *coding) | |||
| 6420 | detect_info.checked |= ~CATEGORY_MASK_UTF_16; | 6630 | detect_info.checked |= ~CATEGORY_MASK_UTF_16; |
| 6421 | detect_info.rejected |= ~CATEGORY_MASK_UTF_16; | 6631 | detect_info.rejected |= ~CATEGORY_MASK_UTF_16; |
| 6422 | } | 6632 | } |
| 6633 | else if (prefer_utf_8 | ||
| 6634 | && detect_coding_utf_8 (coding, &detect_info)) | ||
| 6635 | { | ||
| 6636 | detect_info.checked |= ~CATEGORY_MASK_UTF_8; | ||
| 6637 | detect_info.rejected |= ~CATEGORY_MASK_UTF_8; | ||
| 6638 | } | ||
| 6423 | for (i = 0; i < coding_category_raw_text; i++) | 6639 | for (i = 0; i < coding_category_raw_text; i++) |
| 6424 | { | 6640 | { |
| 6425 | category = coding_priorities[i]; | 6641 | category = coding_priorities[i]; |
| 6426 | this = coding_categories + category; | 6642 | this = coding_categories + category; |
| 6643 | /* Some of this->detector (e.g. detect_coding_sjis) | ||
| 6644 | require this information. */ | ||
| 6645 | coding->id = this->id; | ||
| 6427 | if (this->id < 0) | 6646 | if (this->id < 0) |
| 6428 | { | 6647 | { |
| 6429 | /* No coding system of this category is defined. */ | 6648 | /* No coding system of this category is defined. */ |
| @@ -6438,32 +6657,58 @@ detect_coding (struct coding_system *coding) | |||
| 6438 | } | 6657 | } |
| 6439 | else if ((*(this->detector)) (coding, &detect_info) | 6658 | else if ((*(this->detector)) (coding, &detect_info) |
| 6440 | && detect_info.found & (1 << category)) | 6659 | && detect_info.found & (1 << category)) |
| 6441 | { | 6660 | break; |
| 6442 | if (category == coding_category_utf_16_auto) | ||
| 6443 | { | ||
| 6444 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) | ||
| 6445 | category = coding_category_utf_16_le; | ||
| 6446 | else | ||
| 6447 | category = coding_category_utf_16_be; | ||
| 6448 | } | ||
| 6449 | break; | ||
| 6450 | } | ||
| 6451 | } | 6661 | } |
| 6452 | } | 6662 | } |
| 6453 | 6663 | ||
| 6454 | if (i < coding_category_raw_text) | 6664 | if (i < coding_category_raw_text) |
| 6455 | setup_coding_system (CODING_ID_NAME (this->id), coding); | 6665 | { |
| 6666 | if (category == coding_category_utf_8_auto) | ||
| 6667 | { | ||
| 6668 | Lisp_Object coding_systems; | ||
| 6669 | |||
| 6670 | coding_systems = AREF (CODING_ID_ATTRS (this->id), | ||
| 6671 | coding_attr_utf_bom); | ||
| 6672 | if (CONSP (coding_systems)) | ||
| 6673 | { | ||
| 6674 | if (detect_info.found & CATEGORY_MASK_UTF_8_SIG) | ||
| 6675 | found = XCAR (coding_systems); | ||
| 6676 | else | ||
| 6677 | found = XCDR (coding_systems); | ||
| 6678 | } | ||
| 6679 | else | ||
| 6680 | found = CODING_ID_NAME (this->id); | ||
| 6681 | } | ||
| 6682 | else if (category == coding_category_utf_16_auto) | ||
| 6683 | { | ||
| 6684 | Lisp_Object coding_systems; | ||
| 6685 | |||
| 6686 | coding_systems = AREF (CODING_ID_ATTRS (this->id), | ||
| 6687 | coding_attr_utf_bom); | ||
| 6688 | if (CONSP (coding_systems)) | ||
| 6689 | { | ||
| 6690 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) | ||
| 6691 | found = XCAR (coding_systems); | ||
| 6692 | else if (detect_info.found & CATEGORY_MASK_UTF_16_BE) | ||
| 6693 | found = XCDR (coding_systems); | ||
| 6694 | } | ||
| 6695 | else | ||
| 6696 | found = CODING_ID_NAME (this->id); | ||
| 6697 | } | ||
| 6698 | else | ||
| 6699 | found = CODING_ID_NAME (this->id); | ||
| 6700 | } | ||
| 6456 | else if (null_byte_found) | 6701 | else if (null_byte_found) |
| 6457 | setup_coding_system (Qno_conversion, coding); | 6702 | found = Qno_conversion; |
| 6458 | else if ((detect_info.rejected & CATEGORY_MASK_ANY) | 6703 | else if ((detect_info.rejected & CATEGORY_MASK_ANY) |
| 6459 | == CATEGORY_MASK_ANY) | 6704 | == CATEGORY_MASK_ANY) |
| 6460 | setup_coding_system (Qraw_text, coding); | 6705 | found = Qraw_text; |
| 6461 | else if (detect_info.rejected) | 6706 | else if (detect_info.rejected) |
| 6462 | for (i = 0; i < coding_category_raw_text; i++) | 6707 | for (i = 0; i < coding_category_raw_text; i++) |
| 6463 | if (! (detect_info.rejected & (1 << coding_priorities[i]))) | 6708 | if (! (detect_info.rejected & (1 << coding_priorities[i]))) |
| 6464 | { | 6709 | { |
| 6465 | this = coding_categories + coding_priorities[i]; | 6710 | this = coding_categories + coding_priorities[i]; |
| 6466 | setup_coding_system (CODING_ID_NAME (this->id), coding); | 6711 | found = CODING_ID_NAME (this->id); |
| 6467 | break; | 6712 | break; |
| 6468 | } | 6713 | } |
| 6469 | } | 6714 | } |
| @@ -6477,14 +6722,21 @@ detect_coding (struct coding_system *coding) | |||
| 6477 | coding_systems | 6722 | coding_systems |
| 6478 | = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom); | 6723 | = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom); |
| 6479 | detect_info.found = detect_info.rejected = 0; | 6724 | detect_info.found = detect_info.rejected = 0; |
| 6480 | coding->head_ascii = 0; | 6725 | if (check_ascii (coding) == coding->src_bytes) |
| 6481 | if (CONSP (coding_systems) | ||
| 6482 | && detect_coding_utf_8 (coding, &detect_info)) | ||
| 6483 | { | 6726 | { |
| 6484 | if (detect_info.found & CATEGORY_MASK_UTF_8_SIG) | 6727 | if (CONSP (coding_systems)) |
| 6485 | setup_coding_system (XCAR (coding_systems), coding); | 6728 | found = XCDR (coding_systems); |
| 6486 | else | 6729 | } |
| 6487 | setup_coding_system (XCDR (coding_systems), coding); | 6730 | else |
| 6731 | { | ||
| 6732 | if (CONSP (coding_systems) | ||
| 6733 | && detect_coding_utf_8 (coding, &detect_info)) | ||
| 6734 | { | ||
| 6735 | if (detect_info.found & CATEGORY_MASK_UTF_8_SIG) | ||
| 6736 | found = XCAR (coding_systems); | ||
| 6737 | else | ||
| 6738 | found = XCDR (coding_systems); | ||
| 6739 | } | ||
| 6488 | } | 6740 | } |
| 6489 | } | 6741 | } |
| 6490 | else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id))) | 6742 | else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id))) |
| @@ -6501,11 +6753,24 @@ detect_coding (struct coding_system *coding) | |||
| 6501 | && detect_coding_utf_16 (coding, &detect_info)) | 6753 | && detect_coding_utf_16 (coding, &detect_info)) |
| 6502 | { | 6754 | { |
| 6503 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) | 6755 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) |
| 6504 | setup_coding_system (XCAR (coding_systems), coding); | 6756 | found = XCAR (coding_systems); |
| 6505 | else if (detect_info.found & CATEGORY_MASK_UTF_16_BE) | 6757 | else if (detect_info.found & CATEGORY_MASK_UTF_16_BE) |
| 6506 | setup_coding_system (XCDR (coding_systems), coding); | 6758 | found = XCDR (coding_systems); |
| 6507 | } | 6759 | } |
| 6508 | } | 6760 | } |
| 6761 | |||
| 6762 | if (! NILP (found)) | ||
| 6763 | { | ||
| 6764 | int specified_eol = (VECTORP (eol_type) ? EOL_SEEN_NONE | ||
| 6765 | : EQ (eol_type, Qdos) ? EOL_SEEN_CRLF | ||
| 6766 | : EQ (eol_type, Qmac) ? EOL_SEEN_CR | ||
| 6767 | : EOL_SEEN_LF); | ||
| 6768 | |||
| 6769 | setup_coding_system (found, coding); | ||
| 6770 | if (specified_eol != EOL_SEEN_NONE) | ||
| 6771 | adjust_coding_eol_type (coding, specified_eol); | ||
| 6772 | } | ||
| 6773 | |||
| 6509 | coding->mode = saved_mode; | 6774 | coding->mode = saved_mode; |
| 6510 | } | 6775 | } |
| 6511 | 6776 | ||
| @@ -6609,11 +6874,11 @@ decode_eol (struct coding_system *coding) | |||
| 6609 | 6874 | ||
| 6610 | 6875 | ||
| 6611 | /* Return a translation table (or list of them) from coding system | 6876 | /* Return a translation table (or list of them) from coding system |
| 6612 | attribute vector ATTRS for encoding (ENCODEP is nonzero) or | 6877 | attribute vector ATTRS for encoding (if ENCODEP) or decoding (if |
| 6613 | decoding (ENCODEP is zero). */ | 6878 | not ENCODEP). */ |
| 6614 | 6879 | ||
| 6615 | static Lisp_Object | 6880 | static Lisp_Object |
| 6616 | get_translation_table (Lisp_Object attrs, int encodep, int *max_lookup) | 6881 | get_translation_table (Lisp_Object attrs, bool encodep, int *max_lookup) |
| 6617 | { | 6882 | { |
| 6618 | Lisp_Object standard, translation_table; | 6883 | Lisp_Object standard, translation_table; |
| 6619 | Lisp_Object val; | 6884 | Lisp_Object val; |
| @@ -6646,11 +6911,9 @@ get_translation_table (Lisp_Object attrs, int encodep, int *max_lookup) | |||
| 6646 | if (CHAR_TABLE_P (standard)) | 6911 | if (CHAR_TABLE_P (standard)) |
| 6647 | { | 6912 | { |
| 6648 | if (CONSP (translation_table)) | 6913 | if (CONSP (translation_table)) |
| 6649 | translation_table = nconc2 (translation_table, | 6914 | translation_table = nconc2 (translation_table, list1 (standard)); |
| 6650 | Fcons (standard, Qnil)); | ||
| 6651 | else | 6915 | else |
| 6652 | translation_table = Fcons (translation_table, | 6916 | translation_table = list2 (translation_table, standard); |
| 6653 | Fcons (standard, Qnil)); | ||
| 6654 | } | 6917 | } |
| 6655 | } | 6918 | } |
| 6656 | 6919 | ||
| @@ -6743,7 +7006,7 @@ get_translation (Lisp_Object trans, int *buf, int *buf_end) | |||
| 6743 | 7006 | ||
| 6744 | static int | 7007 | static int |
| 6745 | produce_chars (struct coding_system *coding, Lisp_Object translation_table, | 7008 | produce_chars (struct coding_system *coding, Lisp_Object translation_table, |
| 6746 | int last_block) | 7009 | bool last_block) |
| 6747 | { | 7010 | { |
| 6748 | unsigned char *dst = coding->destination + coding->produced; | 7011 | unsigned char *dst = coding->destination + coding->produced; |
| 6749 | unsigned char *dst_end = coding->destination + coding->dst_bytes; | 7012 | unsigned char *dst_end = coding->destination + coding->dst_bytes; |
| @@ -6765,7 +7028,8 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table, | |||
| 6765 | 7028 | ||
| 6766 | while (buf < buf_end) | 7029 | while (buf < buf_end) |
| 6767 | { | 7030 | { |
| 6768 | int c = *buf, i; | 7031 | int c = *buf; |
| 7032 | ptrdiff_t i; | ||
| 6769 | 7033 | ||
| 6770 | if (c >= 0) | 7034 | if (c >= 0) |
| 6771 | { | 7035 | { |
| @@ -6845,7 +7109,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table, | |||
| 6845 | { | 7109 | { |
| 6846 | if (coding->src_multibyte) | 7110 | if (coding->src_multibyte) |
| 6847 | { | 7111 | { |
| 6848 | int multibytep = 1; | 7112 | bool multibytep = 1; |
| 6849 | ptrdiff_t consumed_chars = 0; | 7113 | ptrdiff_t consumed_chars = 0; |
| 6850 | 7114 | ||
| 6851 | while (1) | 7115 | while (1) |
| @@ -6881,7 +7145,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table, | |||
| 6881 | else | 7145 | else |
| 6882 | while (src < src_end) | 7146 | while (src < src_end) |
| 6883 | { | 7147 | { |
| 6884 | int multibytep = 1; | 7148 | bool multibytep = 1; |
| 6885 | int c = *src++; | 7149 | int c = *src++; |
| 6886 | 7150 | ||
| 6887 | if (dst >= dst_end - 1) | 7151 | if (dst >= dst_end - 1) |
| @@ -6933,7 +7197,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table, | |||
| 6933 | 7197 | ||
| 6934 | produced = dst - (coding->destination + coding->produced); | 7198 | produced = dst - (coding->destination + coding->produced); |
| 6935 | if (BUFFERP (coding->dst_object) && produced_chars > 0) | 7199 | if (BUFFERP (coding->dst_object) && produced_chars > 0) |
| 6936 | insert_from_gap (produced_chars, produced); | 7200 | insert_from_gap (produced_chars, produced, 0); |
| 6937 | coding->produced += produced; | 7201 | coding->produced += produced; |
| 6938 | coding->produced_char += produced_chars; | 7202 | coding->produced_char += produced_chars; |
| 6939 | return carryover; | 7203 | return carryover; |
| @@ -6944,7 +7208,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table, | |||
| 6944 | [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ] | 7208 | [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ] |
| 6945 | */ | 7209 | */ |
| 6946 | 7210 | ||
| 6947 | static inline void | 7211 | static void |
| 6948 | produce_composition (struct coding_system *coding, int *charbuf, ptrdiff_t pos) | 7212 | produce_composition (struct coding_system *coding, int *charbuf, ptrdiff_t pos) |
| 6949 | { | 7213 | { |
| 6950 | int len; | 7214 | int len; |
| @@ -6988,7 +7252,7 @@ produce_composition (struct coding_system *coding, int *charbuf, ptrdiff_t pos) | |||
| 6988 | [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ] | 7252 | [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ] |
| 6989 | */ | 7253 | */ |
| 6990 | 7254 | ||
| 6991 | static inline void | 7255 | static void |
| 6992 | produce_charset (struct coding_system *coding, int *charbuf, ptrdiff_t pos) | 7256 | produce_charset (struct coding_system *coding, int *charbuf, ptrdiff_t pos) |
| 6993 | { | 7257 | { |
| 6994 | ptrdiff_t from = pos - charbuf[2]; | 7258 | ptrdiff_t from = pos - charbuf[2]; |
| @@ -7004,22 +7268,8 @@ produce_charset (struct coding_system *coding, int *charbuf, ptrdiff_t pos) | |||
| 7004 | 7268 | ||
| 7005 | #define ALLOC_CONVERSION_WORK_AREA(coding) \ | 7269 | #define ALLOC_CONVERSION_WORK_AREA(coding) \ |
| 7006 | do { \ | 7270 | do { \ |
| 7007 | int size = CHARBUF_SIZE; \ | 7271 | coding->charbuf = SAFE_ALLOCA (CHARBUF_SIZE * sizeof (int)); \ |
| 7008 | \ | 7272 | coding->charbuf_size = CHARBUF_SIZE; \ |
| 7009 | coding->charbuf = NULL; \ | ||
| 7010 | while (size > 1024) \ | ||
| 7011 | { \ | ||
| 7012 | coding->charbuf = (int *) alloca (sizeof (int) * size); \ | ||
| 7013 | if (coding->charbuf) \ | ||
| 7014 | break; \ | ||
| 7015 | size >>= 1; \ | ||
| 7016 | } \ | ||
| 7017 | if (! coding->charbuf) \ | ||
| 7018 | { \ | ||
| 7019 | record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \ | ||
| 7020 | return coding->result; \ | ||
| 7021 | } \ | ||
| 7022 | coding->charbuf_size = size; \ | ||
| 7023 | } while (0) | 7273 | } while (0) |
| 7024 | 7274 | ||
| 7025 | 7275 | ||
| @@ -7078,7 +7328,7 @@ produce_annotation (struct coding_system *coding, ptrdiff_t pos) | |||
| 7078 | CODING->dst_object. | 7328 | CODING->dst_object. |
| 7079 | */ | 7329 | */ |
| 7080 | 7330 | ||
| 7081 | static int | 7331 | static void |
| 7082 | decode_coding (struct coding_system *coding) | 7332 | decode_coding (struct coding_system *coding) |
| 7083 | { | 7333 | { |
| 7084 | Lisp_Object attrs; | 7334 | Lisp_Object attrs; |
| @@ -7088,6 +7338,8 @@ decode_coding (struct coding_system *coding) | |||
| 7088 | int carryover; | 7338 | int carryover; |
| 7089 | int i; | 7339 | int i; |
| 7090 | 7340 | ||
| 7341 | USE_SAFE_ALLOCA; | ||
| 7342 | |||
| 7091 | if (BUFFERP (coding->src_object) | 7343 | if (BUFFERP (coding->src_object) |
| 7092 | && coding->src_pos > 0 | 7344 | && coding->src_pos > 0 |
| 7093 | && coding->src_pos < GPT | 7345 | && coding->src_pos < GPT |
| @@ -7097,12 +7349,20 @@ decode_coding (struct coding_system *coding) | |||
| 7097 | undo_list = Qt; | 7349 | undo_list = Qt; |
| 7098 | if (BUFFERP (coding->dst_object)) | 7350 | if (BUFFERP (coding->dst_object)) |
| 7099 | { | 7351 | { |
| 7100 | if (current_buffer != XBUFFER (coding->dst_object)) | 7352 | set_buffer_internal (XBUFFER (coding->dst_object)); |
| 7101 | set_buffer_internal (XBUFFER (coding->dst_object)); | ||
| 7102 | if (GPT != PT) | 7353 | if (GPT != PT) |
| 7103 | move_gap_both (PT, PT_BYTE); | 7354 | move_gap_both (PT, PT_BYTE); |
| 7355 | |||
| 7356 | /* We must disable undo_list in order to record the whole insert | ||
| 7357 | transaction via record_insert at the end. But doing so also | ||
| 7358 | disables the recording of the first change to the undo_list. | ||
| 7359 | Therefore we check for first change here and record it via | ||
| 7360 | record_first_change if needed. */ | ||
| 7361 | if (MODIFF <= SAVE_MODIFF) | ||
| 7362 | record_first_change (); | ||
| 7363 | |||
| 7104 | undo_list = BVAR (current_buffer, undo_list); | 7364 | undo_list = BVAR (current_buffer, undo_list); |
| 7105 | BVAR (current_buffer, undo_list) = Qt; | 7365 | bset_undo_list (current_buffer, Qt); |
| 7106 | } | 7366 | } |
| 7107 | 7367 | ||
| 7108 | coding->consumed = coding->consumed_char = 0; | 7368 | coding->consumed = coding->consumed_char = 0; |
| @@ -7199,10 +7459,11 @@ decode_coding (struct coding_system *coding) | |||
| 7199 | decode_eol (coding); | 7459 | decode_eol (coding); |
| 7200 | if (BUFFERP (coding->dst_object)) | 7460 | if (BUFFERP (coding->dst_object)) |
| 7201 | { | 7461 | { |
| 7202 | BVAR (current_buffer, undo_list) = undo_list; | 7462 | bset_undo_list (current_buffer, undo_list); |
| 7203 | record_insert (coding->dst_pos, coding->produced_char); | 7463 | record_insert (coding->dst_pos, coding->produced_char); |
| 7204 | } | 7464 | } |
| 7205 | return coding->result; | 7465 | |
| 7466 | SAFE_FREE (); | ||
| 7206 | } | 7467 | } |
| 7207 | 7468 | ||
| 7208 | 7469 | ||
| @@ -7216,7 +7477,7 @@ decode_coding (struct coding_system *coding) | |||
| 7216 | position of a composition after POS (if any) or to LIMIT, and | 7477 | position of a composition after POS (if any) or to LIMIT, and |
| 7217 | return BUF. */ | 7478 | return BUF. */ |
| 7218 | 7479 | ||
| 7219 | static inline int * | 7480 | static int * |
| 7220 | handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit, | 7481 | handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit, |
| 7221 | struct coding_system *coding, int *buf, | 7482 | struct coding_system *coding, int *buf, |
| 7222 | ptrdiff_t *stop) | 7483 | ptrdiff_t *stop) |
| @@ -7236,7 +7497,7 @@ handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit, | |||
| 7236 | /* We found a composition. Store the corresponding | 7497 | /* We found a composition. Store the corresponding |
| 7237 | annotation data in BUF. */ | 7498 | annotation data in BUF. */ |
| 7238 | int *head = buf; | 7499 | int *head = buf; |
| 7239 | enum composition_method method = COMPOSITION_METHOD (prop); | 7500 | enum composition_method method = composition_method (prop); |
| 7240 | int nchars = COMPOSITION_LENGTH (prop); | 7501 | int nchars = COMPOSITION_LENGTH (prop); |
| 7241 | 7502 | ||
| 7242 | ADD_COMPOSITION_DATA (buf, nchars, 0, method); | 7503 | ADD_COMPOSITION_DATA (buf, nchars, 0, method); |
| @@ -7274,7 +7535,7 @@ handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit, | |||
| 7274 | *buf++ = XINT (XCAR (components)); | 7535 | *buf++ = XINT (XCAR (components)); |
| 7275 | } | 7536 | } |
| 7276 | else | 7537 | else |
| 7277 | abort (); | 7538 | emacs_abort (); |
| 7278 | *head -= len; | 7539 | *head -= len; |
| 7279 | } | 7540 | } |
| 7280 | } | 7541 | } |
| @@ -7299,7 +7560,7 @@ handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit, | |||
| 7299 | If the property value is nil, set *STOP to the position where the | 7560 | If the property value is nil, set *STOP to the position where the |
| 7300 | property value is non-nil (limiting by LIMIT), and return BUF. */ | 7561 | property value is non-nil (limiting by LIMIT), and return BUF. */ |
| 7301 | 7562 | ||
| 7302 | static inline int * | 7563 | static int * |
| 7303 | handle_charset_annotation (ptrdiff_t pos, ptrdiff_t limit, | 7564 | handle_charset_annotation (ptrdiff_t pos, ptrdiff_t limit, |
| 7304 | struct coding_system *coding, int *buf, | 7565 | struct coding_system *coding, int *buf, |
| 7305 | ptrdiff_t *stop) | 7566 | ptrdiff_t *stop) |
| @@ -7331,7 +7592,7 @@ consume_chars (struct coding_system *coding, Lisp_Object translation_table, | |||
| 7331 | const unsigned char *src_end = coding->source + coding->src_bytes; | 7592 | const unsigned char *src_end = coding->source + coding->src_bytes; |
| 7332 | ptrdiff_t pos = coding->src_pos + coding->consumed_char; | 7593 | ptrdiff_t pos = coding->src_pos + coding->consumed_char; |
| 7333 | ptrdiff_t end_pos = coding->src_pos + coding->src_chars; | 7594 | ptrdiff_t end_pos = coding->src_pos + coding->src_chars; |
| 7334 | int multibytep = coding->src_multibyte; | 7595 | bool multibytep = coding->src_multibyte; |
| 7335 | Lisp_Object eol_type; | 7596 | Lisp_Object eol_type; |
| 7336 | int c; | 7597 | int c; |
| 7337 | ptrdiff_t stop, stop_composition, stop_charset; | 7598 | ptrdiff_t stop, stop_composition, stop_charset; |
| @@ -7478,7 +7739,7 @@ consume_chars (struct coding_system *coding, Lisp_Object translation_table, | |||
| 7478 | If CODING->dst_object is nil, the encoded data is placed at the | 7739 | If CODING->dst_object is nil, the encoded data is placed at the |
| 7479 | memory area specified by CODING->destination. */ | 7740 | memory area specified by CODING->destination. */ |
| 7480 | 7741 | ||
| 7481 | static int | 7742 | static void |
| 7482 | encode_coding (struct coding_system *coding) | 7743 | encode_coding (struct coding_system *coding) |
| 7483 | { | 7744 | { |
| 7484 | Lisp_Object attrs; | 7745 | Lisp_Object attrs; |
| @@ -7486,6 +7747,8 @@ encode_coding (struct coding_system *coding) | |||
| 7486 | int max_lookup; | 7747 | int max_lookup; |
| 7487 | struct ccl_spec cclspec; | 7748 | struct ccl_spec cclspec; |
| 7488 | 7749 | ||
| 7750 | USE_SAFE_ALLOCA; | ||
| 7751 | |||
| 7489 | attrs = CODING_ID_ATTRS (coding->id); | 7752 | attrs = CODING_ID_ATTRS (coding->id); |
| 7490 | if (coding->encoder == encode_coding_raw_text) | 7753 | if (coding->encoder == encode_coding_raw_text) |
| 7491 | translation_table = Qnil, max_lookup = 0; | 7754 | translation_table = Qnil, max_lookup = 0; |
| @@ -7519,9 +7782,9 @@ encode_coding (struct coding_system *coding) | |||
| 7519 | } while (coding->consumed_char < coding->src_chars); | 7782 | } while (coding->consumed_char < coding->src_chars); |
| 7520 | 7783 | ||
| 7521 | if (BUFFERP (coding->dst_object) && coding->produced_char > 0) | 7784 | if (BUFFERP (coding->dst_object) && coding->produced_char > 0) |
| 7522 | insert_from_gap (coding->produced_char, coding->produced); | 7785 | insert_from_gap (coding->produced_char, coding->produced, 0); |
| 7523 | 7786 | ||
| 7524 | return (coding->result); | 7787 | SAFE_FREE (); |
| 7525 | } | 7788 | } |
| 7526 | 7789 | ||
| 7527 | 7790 | ||
| @@ -7535,26 +7798,27 @@ static Lisp_Object Vcode_conversion_workbuf_name; | |||
| 7535 | versions of Vcode_conversion_workbuf_name. */ | 7798 | versions of Vcode_conversion_workbuf_name. */ |
| 7536 | static Lisp_Object Vcode_conversion_reused_workbuf; | 7799 | static Lisp_Object Vcode_conversion_reused_workbuf; |
| 7537 | 7800 | ||
| 7538 | /* 1 iff Vcode_conversion_reused_workbuf is already in use. */ | 7801 | /* True iff Vcode_conversion_reused_workbuf is already in use. */ |
| 7539 | static int reused_workbuf_in_use; | 7802 | static bool reused_workbuf_in_use; |
| 7540 | 7803 | ||
| 7541 | 7804 | ||
| 7542 | /* Return a working buffer of code conversion. MULTIBYTE specifies the | 7805 | /* Return a working buffer of code conversion. MULTIBYTE specifies the |
| 7543 | multibyteness of returning buffer. */ | 7806 | multibyteness of returning buffer. */ |
| 7544 | 7807 | ||
| 7545 | static Lisp_Object | 7808 | static Lisp_Object |
| 7546 | make_conversion_work_buffer (int multibyte) | 7809 | make_conversion_work_buffer (bool multibyte) |
| 7547 | { | 7810 | { |
| 7548 | Lisp_Object name, workbuf; | 7811 | Lisp_Object name, workbuf; |
| 7549 | struct buffer *current; | 7812 | struct buffer *current; |
| 7550 | 7813 | ||
| 7551 | if (reused_workbuf_in_use++) | 7814 | if (reused_workbuf_in_use) |
| 7552 | { | 7815 | { |
| 7553 | name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil); | 7816 | name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil); |
| 7554 | workbuf = Fget_buffer_create (name); | 7817 | workbuf = Fget_buffer_create (name); |
| 7555 | } | 7818 | } |
| 7556 | else | 7819 | else |
| 7557 | { | 7820 | { |
| 7821 | reused_workbuf_in_use = 1; | ||
| 7558 | if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf))) | 7822 | if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf))) |
| 7559 | Vcode_conversion_reused_workbuf | 7823 | Vcode_conversion_reused_workbuf |
| 7560 | = Fget_buffer_create (Vcode_conversion_workbuf_name); | 7824 | = Fget_buffer_create (Vcode_conversion_workbuf_name); |
| @@ -7567,14 +7831,14 @@ make_conversion_work_buffer (int multibyte) | |||
| 7567 | doesn't compile new regexps. */ | 7831 | doesn't compile new regexps. */ |
| 7568 | Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt); | 7832 | Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt); |
| 7569 | Ferase_buffer (); | 7833 | Ferase_buffer (); |
| 7570 | BVAR (current_buffer, undo_list) = Qt; | 7834 | bset_undo_list (current_buffer, Qt); |
| 7571 | BVAR (current_buffer, enable_multibyte_characters) = multibyte ? Qt : Qnil; | 7835 | bset_enable_multibyte_characters (current_buffer, multibyte ? Qt : Qnil); |
| 7572 | set_buffer_internal (current); | 7836 | set_buffer_internal (current); |
| 7573 | return workbuf; | 7837 | return workbuf; |
| 7574 | } | 7838 | } |
| 7575 | 7839 | ||
| 7576 | 7840 | ||
| 7577 | static Lisp_Object | 7841 | static void |
| 7578 | code_conversion_restore (Lisp_Object arg) | 7842 | code_conversion_restore (Lisp_Object arg) |
| 7579 | { | 7843 | { |
| 7580 | Lisp_Object current, workbuf; | 7844 | Lisp_Object current, workbuf; |
| @@ -7587,16 +7851,15 @@ code_conversion_restore (Lisp_Object arg) | |||
| 7587 | { | 7851 | { |
| 7588 | if (EQ (workbuf, Vcode_conversion_reused_workbuf)) | 7852 | if (EQ (workbuf, Vcode_conversion_reused_workbuf)) |
| 7589 | reused_workbuf_in_use = 0; | 7853 | reused_workbuf_in_use = 0; |
| 7590 | else if (! NILP (Fbuffer_live_p (workbuf))) | 7854 | else |
| 7591 | Fkill_buffer (workbuf); | 7855 | Fkill_buffer (workbuf); |
| 7592 | } | 7856 | } |
| 7593 | set_buffer_internal (XBUFFER (current)); | 7857 | set_buffer_internal (XBUFFER (current)); |
| 7594 | UNGCPRO; | 7858 | UNGCPRO; |
| 7595 | return Qnil; | ||
| 7596 | } | 7859 | } |
| 7597 | 7860 | ||
| 7598 | Lisp_Object | 7861 | Lisp_Object |
| 7599 | code_conversion_save (int with_work_buf, int multibyte) | 7862 | code_conversion_save (bool with_work_buf, bool multibyte) |
| 7600 | { | 7863 | { |
| 7601 | Lisp_Object workbuf = Qnil; | 7864 | Lisp_Object workbuf = Qnil; |
| 7602 | 7865 | ||
| @@ -7607,15 +7870,13 @@ code_conversion_save (int with_work_buf, int multibyte) | |||
| 7607 | return workbuf; | 7870 | return workbuf; |
| 7608 | } | 7871 | } |
| 7609 | 7872 | ||
| 7610 | int | 7873 | void |
| 7611 | decode_coding_gap (struct coding_system *coding, | 7874 | decode_coding_gap (struct coding_system *coding, |
| 7612 | ptrdiff_t chars, ptrdiff_t bytes) | 7875 | ptrdiff_t chars, ptrdiff_t bytes) |
| 7613 | { | 7876 | { |
| 7614 | ptrdiff_t count = SPECPDL_INDEX (); | 7877 | ptrdiff_t count = SPECPDL_INDEX (); |
| 7615 | Lisp_Object attrs; | 7878 | Lisp_Object attrs; |
| 7616 | 7879 | ||
| 7617 | code_conversion_save (0, 0); | ||
| 7618 | |||
| 7619 | coding->src_object = Fcurrent_buffer (); | 7880 | coding->src_object = Fcurrent_buffer (); |
| 7620 | coding->src_chars = chars; | 7881 | coding->src_chars = chars; |
| 7621 | coding->src_bytes = bytes; | 7882 | coding->src_bytes = bytes; |
| @@ -7627,15 +7888,95 @@ decode_coding_gap (struct coding_system *coding, | |||
| 7627 | coding->dst_pos_byte = PT_BYTE; | 7888 | coding->dst_pos_byte = PT_BYTE; |
| 7628 | coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); | 7889 | coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); |
| 7629 | 7890 | ||
| 7891 | coding->head_ascii = -1; | ||
| 7892 | coding->detected_utf8_chars = -1; | ||
| 7893 | coding->eol_seen = EOL_SEEN_NONE; | ||
| 7630 | if (CODING_REQUIRE_DETECTION (coding)) | 7894 | if (CODING_REQUIRE_DETECTION (coding)) |
| 7631 | detect_coding (coding); | 7895 | detect_coding (coding); |
| 7896 | attrs = CODING_ID_ATTRS (coding->id); | ||
| 7897 | if (! disable_ascii_optimization | ||
| 7898 | && ! coding->src_multibyte | ||
| 7899 | && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)) | ||
| 7900 | && NILP (CODING_ATTR_POST_READ (attrs)) | ||
| 7901 | && NILP (get_translation_table (attrs, 0, NULL))) | ||
| 7902 | { | ||
| 7903 | chars = coding->head_ascii; | ||
| 7904 | if (chars < 0) | ||
| 7905 | chars = check_ascii (coding); | ||
| 7906 | if (chars != bytes) | ||
| 7907 | { | ||
| 7908 | /* There exists a non-ASCII byte. */ | ||
| 7909 | if (EQ (CODING_ATTR_TYPE (attrs), Qutf_8)) | ||
| 7910 | { | ||
| 7911 | if (coding->detected_utf8_chars >= 0) | ||
| 7912 | chars = coding->detected_utf8_chars; | ||
| 7913 | else | ||
| 7914 | chars = check_utf_8 (coding); | ||
| 7915 | if (CODING_UTF_8_BOM (coding) != utf_without_bom | ||
| 7916 | && coding->head_ascii == 0 | ||
| 7917 | && coding->source[0] == UTF_8_BOM_1 | ||
| 7918 | && coding->source[1] == UTF_8_BOM_2 | ||
| 7919 | && coding->source[2] == UTF_8_BOM_3) | ||
| 7920 | { | ||
| 7921 | chars--; | ||
| 7922 | bytes -= 3; | ||
| 7923 | coding->src_bytes -= 3; | ||
| 7924 | } | ||
| 7925 | } | ||
| 7926 | else | ||
| 7927 | chars = -1; | ||
| 7928 | } | ||
| 7929 | if (chars >= 0) | ||
| 7930 | { | ||
| 7931 | Lisp_Object eol_type; | ||
| 7932 | |||
| 7933 | eol_type = CODING_ID_EOL_TYPE (coding->id); | ||
| 7934 | if (VECTORP (eol_type)) | ||
| 7935 | { | ||
| 7936 | if (coding->eol_seen != EOL_SEEN_NONE) | ||
| 7937 | eol_type = adjust_coding_eol_type (coding, coding->eol_seen); | ||
| 7938 | } | ||
| 7939 | if (EQ (eol_type, Qmac)) | ||
| 7940 | { | ||
| 7941 | unsigned char *src_end = GAP_END_ADDR; | ||
| 7942 | unsigned char *src = src_end - coding->src_bytes; | ||
| 7943 | |||
| 7944 | while (src < src_end) | ||
| 7945 | { | ||
| 7946 | if (*src++ == '\r') | ||
| 7947 | src[-1] = '\n'; | ||
| 7948 | } | ||
| 7949 | } | ||
| 7950 | else if (EQ (eol_type, Qdos)) | ||
| 7951 | { | ||
| 7952 | unsigned char *src = GAP_END_ADDR; | ||
| 7953 | unsigned char *src_beg = src - coding->src_bytes; | ||
| 7954 | unsigned char *dst = src; | ||
| 7955 | ptrdiff_t diff; | ||
| 7956 | |||
| 7957 | while (src_beg < src) | ||
| 7958 | { | ||
| 7959 | *--dst = *--src; | ||
| 7960 | if (*src == '\n' && src > src_beg && src[-1] == '\r') | ||
| 7961 | src--; | ||
| 7962 | } | ||
| 7963 | diff = dst - src; | ||
| 7964 | bytes -= diff; | ||
| 7965 | chars -= diff; | ||
| 7966 | } | ||
| 7967 | coding->produced = bytes; | ||
| 7968 | coding->produced_char = chars; | ||
| 7969 | insert_from_gap (chars, bytes, 1); | ||
| 7970 | return; | ||
| 7971 | } | ||
| 7972 | } | ||
| 7973 | code_conversion_save (0, 0); | ||
| 7632 | 7974 | ||
| 7633 | coding->mode |= CODING_MODE_LAST_BLOCK; | 7975 | coding->mode |= CODING_MODE_LAST_BLOCK; |
| 7634 | current_buffer->text->inhibit_shrinking = 1; | 7976 | current_buffer->text->inhibit_shrinking = 1; |
| 7635 | decode_coding (coding); | 7977 | decode_coding (coding); |
| 7636 | current_buffer->text->inhibit_shrinking = 0; | 7978 | current_buffer->text->inhibit_shrinking = 0; |
| 7637 | 7979 | ||
| 7638 | attrs = CODING_ID_ATTRS (coding->id); | ||
| 7639 | if (! NILP (CODING_ATTR_POST_READ (attrs))) | 7980 | if (! NILP (CODING_ATTR_POST_READ (attrs))) |
| 7640 | { | 7981 | { |
| 7641 | ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE; | 7982 | ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE; |
| @@ -7650,7 +7991,6 @@ decode_coding_gap (struct coding_system *coding, | |||
| 7650 | } | 7991 | } |
| 7651 | 7992 | ||
| 7652 | unbind_to (count, Qnil); | 7993 | unbind_to (count, Qnil); |
| 7653 | return coding->result; | ||
| 7654 | } | 7994 | } |
| 7655 | 7995 | ||
| 7656 | 7996 | ||
| @@ -7696,8 +8036,8 @@ decode_coding_object (struct coding_system *coding, | |||
| 7696 | ptrdiff_t chars = to - from; | 8036 | ptrdiff_t chars = to - from; |
| 7697 | ptrdiff_t bytes = to_byte - from_byte; | 8037 | ptrdiff_t bytes = to_byte - from_byte; |
| 7698 | Lisp_Object attrs; | 8038 | Lisp_Object attrs; |
| 7699 | int saved_pt = -1, saved_pt_byte IF_LINT (= 0); | 8039 | ptrdiff_t saved_pt = -1, saved_pt_byte IF_LINT (= 0); |
| 7700 | int need_marker_adjustment = 0; | 8040 | bool need_marker_adjustment = 0; |
| 7701 | Lisp_Object old_deactivate_mark; | 8041 | Lisp_Object old_deactivate_mark; |
| 7702 | 8042 | ||
| 7703 | old_deactivate_mark = Vdeactivate_mark; | 8043 | old_deactivate_mark = Vdeactivate_mark; |
| @@ -7810,14 +8150,8 @@ decode_coding_object (struct coding_system *coding, | |||
| 7810 | set_buffer_internal (XBUFFER (coding->dst_object)); | 8150 | set_buffer_internal (XBUFFER (coding->dst_object)); |
| 7811 | if (dst_bytes < coding->produced) | 8151 | if (dst_bytes < coding->produced) |
| 7812 | { | 8152 | { |
| 8153 | eassert (coding->produced > 0); | ||
| 7813 | destination = xrealloc (destination, coding->produced); | 8154 | destination = xrealloc (destination, coding->produced); |
| 7814 | if (! destination) | ||
| 7815 | { | ||
| 7816 | record_conversion_result (coding, | ||
| 7817 | CODING_RESULT_INSUFFICIENT_MEM); | ||
| 7818 | unbind_to (count, Qnil); | ||
| 7819 | return; | ||
| 7820 | } | ||
| 7821 | if (BEGV < GPT && GPT < BEGV + coding->produced_char) | 8155 | if (BEGV < GPT && GPT < BEGV + coding->produced_char) |
| 7822 | move_gap_both (BEGV, BEGV_BYTE); | 8156 | move_gap_both (BEGV, BEGV_BYTE); |
| 7823 | memcpy (destination, BEGV_ADDR, coding->produced); | 8157 | memcpy (destination, BEGV_ADDR, coding->produced); |
| @@ -7884,9 +8218,9 @@ encode_coding_object (struct coding_system *coding, | |||
| 7884 | ptrdiff_t chars = to - from; | 8218 | ptrdiff_t chars = to - from; |
| 7885 | ptrdiff_t bytes = to_byte - from_byte; | 8219 | ptrdiff_t bytes = to_byte - from_byte; |
| 7886 | Lisp_Object attrs; | 8220 | Lisp_Object attrs; |
| 7887 | int saved_pt = -1, saved_pt_byte IF_LINT (= 0); | 8221 | ptrdiff_t saved_pt = -1, saved_pt_byte IF_LINT (= 0); |
| 7888 | int need_marker_adjustment = 0; | 8222 | bool need_marker_adjustment = 0; |
| 7889 | int kill_src_buffer = 0; | 8223 | bool kill_src_buffer = 0; |
| 7890 | Lisp_Object old_deactivate_mark; | 8224 | Lisp_Object old_deactivate_mark; |
| 7891 | 8225 | ||
| 7892 | old_deactivate_mark = Vdeactivate_mark; | 8226 | old_deactivate_mark = Vdeactivate_mark; |
| @@ -7930,15 +8264,12 @@ encode_coding_object (struct coding_system *coding, | |||
| 7930 | } | 8264 | } |
| 7931 | 8265 | ||
| 7932 | { | 8266 | { |
| 7933 | Lisp_Object args[3]; | ||
| 7934 | struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5; | 8267 | struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5; |
| 7935 | 8268 | ||
| 7936 | GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object, | 8269 | GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object, |
| 7937 | old_deactivate_mark); | 8270 | old_deactivate_mark); |
| 7938 | args[0] = CODING_ATTR_PRE_WRITE (attrs); | 8271 | safe_call2 (CODING_ATTR_PRE_WRITE (attrs), |
| 7939 | args[1] = make_number (BEG); | 8272 | make_number (BEG), make_number (Z)); |
| 7940 | args[2] = make_number (Z); | ||
| 7941 | safe_call (3, args); | ||
| 7942 | UNGCPRO; | 8273 | UNGCPRO; |
| 7943 | } | 8274 | } |
| 7944 | if (XBUFFER (coding->src_object) != current_buffer) | 8275 | if (XBUFFER (coding->src_object) != current_buffer) |
| @@ -8005,7 +8336,7 @@ encode_coding_object (struct coding_system *coding, | |||
| 8005 | { | 8336 | { |
| 8006 | ptrdiff_t dst_bytes = max (1, coding->src_chars); | 8337 | ptrdiff_t dst_bytes = max (1, coding->src_chars); |
| 8007 | coding->dst_object = Qnil; | 8338 | coding->dst_object = Qnil; |
| 8008 | coding->destination = (unsigned char *) xmalloc (dst_bytes); | 8339 | coding->destination = xmalloc (dst_bytes); |
| 8009 | coding->dst_bytes = dst_bytes; | 8340 | coding->dst_bytes = dst_bytes; |
| 8010 | coding->dst_multibyte = 0; | 8341 | coding->dst_multibyte = 0; |
| 8011 | } | 8342 | } |
| @@ -8088,6 +8419,50 @@ preferred_coding_system (void) | |||
| 8088 | return CODING_ID_NAME (id); | 8419 | return CODING_ID_NAME (id); |
| 8089 | } | 8420 | } |
| 8090 | 8421 | ||
| 8422 | #if defined (WINDOWSNT) || defined (CYGWIN) | ||
| 8423 | |||
| 8424 | Lisp_Object | ||
| 8425 | from_unicode (Lisp_Object str) | ||
| 8426 | { | ||
| 8427 | CHECK_STRING (str); | ||
| 8428 | if (!STRING_MULTIBYTE (str) && | ||
| 8429 | SBYTES (str) & 1) | ||
| 8430 | { | ||
| 8431 | str = Fsubstring (str, make_number (0), make_number (-1)); | ||
| 8432 | } | ||
| 8433 | |||
| 8434 | return code_convert_string_norecord (str, Qutf_16le, 0); | ||
| 8435 | } | ||
| 8436 | |||
| 8437 | Lisp_Object | ||
| 8438 | from_unicode_buffer (const wchar_t* wstr) | ||
| 8439 | { | ||
| 8440 | return from_unicode ( | ||
| 8441 | make_unibyte_string ( | ||
| 8442 | (char*) wstr, | ||
| 8443 | /* we get one of the two final 0 bytes for free. */ | ||
| 8444 | 1 + sizeof (wchar_t) * wcslen (wstr))); | ||
| 8445 | } | ||
| 8446 | |||
| 8447 | wchar_t * | ||
| 8448 | to_unicode (Lisp_Object str, Lisp_Object *buf) | ||
| 8449 | { | ||
| 8450 | *buf = code_convert_string_norecord (str, Qutf_16le, 1); | ||
| 8451 | /* We need to make another copy (in addition to the one made by | ||
| 8452 | code_convert_string_norecord) to ensure that the final string is | ||
| 8453 | _doubly_ zero terminated --- that is, that the string is | ||
| 8454 | terminated by two zero bytes and one utf-16le null character. | ||
| 8455 | Because strings are already terminated with a single zero byte, | ||
| 8456 | we just add one additional zero. */ | ||
| 8457 | str = make_uninit_string (SBYTES (*buf) + 1); | ||
| 8458 | memcpy (SDATA (str), SDATA (*buf), SBYTES (*buf)); | ||
| 8459 | SDATA (str) [SBYTES (*buf)] = '\0'; | ||
| 8460 | *buf = str; | ||
| 8461 | return WCSDATA (*buf); | ||
| 8462 | } | ||
| 8463 | |||
| 8464 | #endif /* WINDOWSNT || CYGWIN */ | ||
| 8465 | |||
| 8091 | 8466 | ||
| 8092 | #ifdef emacs | 8467 | #ifdef emacs |
| 8093 | /*** 8. Emacs Lisp library functions ***/ | 8468 | /*** 8. Emacs Lisp library functions ***/ |
| @@ -8165,10 +8540,10 @@ function `define-coding-system'. */) | |||
| 8165 | 8540 | ||
| 8166 | 8541 | ||
| 8167 | /* Detect how the bytes at SRC of length SRC_BYTES are encoded. If | 8542 | /* Detect how the bytes at SRC of length SRC_BYTES are encoded. If |
| 8168 | HIGHEST is nonzero, return the coding system of the highest | 8543 | HIGHEST, return the coding system of the highest |
| 8169 | priority among the detected coding systems. Otherwise return a | 8544 | priority among the detected coding systems. Otherwise return a |
| 8170 | list of detected coding systems sorted by their priorities. If | 8545 | list of detected coding systems sorted by their priorities. If |
| 8171 | MULTIBYTEP is nonzero, it is assumed that the bytes are in correct | 8546 | MULTIBYTEP, it is assumed that the bytes are in correct |
| 8172 | multibyte form but contains only ASCII and eight-bit chars. | 8547 | multibyte form but contains only ASCII and eight-bit chars. |
| 8173 | Otherwise, the bytes are raw bytes. | 8548 | Otherwise, the bytes are raw bytes. |
| 8174 | 8549 | ||
| @@ -8183,7 +8558,7 @@ function `define-coding-system'. */) | |||
| 8183 | Lisp_Object | 8558 | Lisp_Object |
| 8184 | detect_coding_system (const unsigned char *src, | 8559 | detect_coding_system (const unsigned char *src, |
| 8185 | ptrdiff_t src_chars, ptrdiff_t src_bytes, | 8560 | ptrdiff_t src_chars, ptrdiff_t src_bytes, |
| 8186 | int highest, int multibytep, | 8561 | bool highest, bool multibytep, |
| 8187 | Lisp_Object coding_system) | 8562 | Lisp_Object coding_system) |
| 8188 | { | 8563 | { |
| 8189 | const unsigned char *src_end = src + src_bytes; | 8564 | const unsigned char *src_end = src + src_bytes; |
| @@ -8193,7 +8568,7 @@ detect_coding_system (const unsigned char *src, | |||
| 8193 | ptrdiff_t id; | 8568 | ptrdiff_t id; |
| 8194 | struct coding_detection_info detect_info; | 8569 | struct coding_detection_info detect_info; |
| 8195 | enum coding_category base_category; | 8570 | enum coding_category base_category; |
| 8196 | int null_byte_found = 0, eight_bit_found = 0; | 8571 | bool null_byte_found = 0, eight_bit_found = 0; |
| 8197 | 8572 | ||
| 8198 | if (NILP (coding_system)) | 8573 | if (NILP (coding_system)) |
| 8199 | coding_system = Qundecided; | 8574 | coding_system = Qundecided; |
| @@ -8219,6 +8594,11 @@ detect_coding_system (const unsigned char *src, | |||
| 8219 | enum coding_category category IF_LINT (= 0); | 8594 | enum coding_category category IF_LINT (= 0); |
| 8220 | struct coding_system *this IF_LINT (= NULL); | 8595 | struct coding_system *this IF_LINT (= NULL); |
| 8221 | int c, i; | 8596 | int c, i; |
| 8597 | bool inhibit_nbd = inhibit_flag (coding.spec.undecided.inhibit_nbd, | ||
| 8598 | inhibit_null_byte_detection); | ||
| 8599 | bool inhibit_ied = inhibit_flag (coding.spec.undecided.inhibit_ied, | ||
| 8600 | inhibit_iso_escape_detection); | ||
| 8601 | bool prefer_utf_8 = coding.spec.undecided.prefer_utf_8; | ||
| 8222 | 8602 | ||
| 8223 | /* Skip all ASCII bytes except for a few ISO2022 controls. */ | 8603 | /* Skip all ASCII bytes except for a few ISO2022 controls. */ |
| 8224 | for (; src < src_end; src++) | 8604 | for (; src < src_end; src++) |
| @@ -8233,7 +8613,7 @@ detect_coding_system (const unsigned char *src, | |||
| 8233 | else if (c < 0x20) | 8613 | else if (c < 0x20) |
| 8234 | { | 8614 | { |
| 8235 | if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) | 8615 | if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) |
| 8236 | && ! inhibit_iso_escape_detection | 8616 | && ! inhibit_ied |
| 8237 | && ! detect_info.checked) | 8617 | && ! detect_info.checked) |
| 8238 | { | 8618 | { |
| 8239 | if (detect_coding_iso_2022 (&coding, &detect_info)) | 8619 | if (detect_coding_iso_2022 (&coding, &detect_info)) |
| @@ -8252,7 +8632,7 @@ detect_coding_system (const unsigned char *src, | |||
| 8252 | break; | 8632 | break; |
| 8253 | } | 8633 | } |
| 8254 | } | 8634 | } |
| 8255 | else if (! c && !inhibit_null_byte_detection) | 8635 | else if (! c && !inhibit_nbd) |
| 8256 | { | 8636 | { |
| 8257 | null_byte_found = 1; | 8637 | null_byte_found = 1; |
| 8258 | if (eight_bit_found) | 8638 | if (eight_bit_found) |
| @@ -8285,6 +8665,12 @@ detect_coding_system (const unsigned char *src, | |||
| 8285 | detect_info.checked |= ~CATEGORY_MASK_UTF_16; | 8665 | detect_info.checked |= ~CATEGORY_MASK_UTF_16; |
| 8286 | detect_info.rejected |= ~CATEGORY_MASK_UTF_16; | 8666 | detect_info.rejected |= ~CATEGORY_MASK_UTF_16; |
| 8287 | } | 8667 | } |
| 8668 | else if (prefer_utf_8 | ||
| 8669 | && detect_coding_utf_8 (&coding, &detect_info)) | ||
| 8670 | { | ||
| 8671 | detect_info.checked |= ~CATEGORY_MASK_UTF_8; | ||
| 8672 | detect_info.rejected |= ~CATEGORY_MASK_UTF_8; | ||
| 8673 | } | ||
| 8288 | for (i = 0; i < coding_category_raw_text; i++) | 8674 | for (i = 0; i < coding_category_raw_text; i++) |
| 8289 | { | 8675 | { |
| 8290 | category = coding_priorities[i]; | 8676 | category = coding_priorities[i]; |
| @@ -8325,20 +8711,20 @@ detect_coding_system (const unsigned char *src, | |||
| 8325 | { | 8711 | { |
| 8326 | detect_info.found = CATEGORY_MASK_RAW_TEXT; | 8712 | detect_info.found = CATEGORY_MASK_RAW_TEXT; |
| 8327 | id = CODING_SYSTEM_ID (Qno_conversion); | 8713 | id = CODING_SYSTEM_ID (Qno_conversion); |
| 8328 | val = Fcons (make_number (id), Qnil); | 8714 | val = list1 (make_number (id)); |
| 8329 | } | 8715 | } |
| 8330 | else if (! detect_info.rejected && ! detect_info.found) | 8716 | else if (! detect_info.rejected && ! detect_info.found) |
| 8331 | { | 8717 | { |
| 8332 | detect_info.found = CATEGORY_MASK_ANY; | 8718 | detect_info.found = CATEGORY_MASK_ANY; |
| 8333 | id = coding_categories[coding_category_undecided].id; | 8719 | id = coding_categories[coding_category_undecided].id; |
| 8334 | val = Fcons (make_number (id), Qnil); | 8720 | val = list1 (make_number (id)); |
| 8335 | } | 8721 | } |
| 8336 | else if (highest) | 8722 | else if (highest) |
| 8337 | { | 8723 | { |
| 8338 | if (detect_info.found) | 8724 | if (detect_info.found) |
| 8339 | { | 8725 | { |
| 8340 | detect_info.found = 1 << category; | 8726 | detect_info.found = 1 << category; |
| 8341 | val = Fcons (make_number (this->id), Qnil); | 8727 | val = list1 (make_number (this->id)); |
| 8342 | } | 8728 | } |
| 8343 | else | 8729 | else |
| 8344 | for (i = 0; i < coding_category_raw_text; i++) | 8730 | for (i = 0; i < coding_category_raw_text; i++) |
| @@ -8346,7 +8732,7 @@ detect_coding_system (const unsigned char *src, | |||
| 8346 | { | 8732 | { |
| 8347 | detect_info.found = 1 << coding_priorities[i]; | 8733 | detect_info.found = 1 << coding_priorities[i]; |
| 8348 | id = coding_categories[coding_priorities[i]].id; | 8734 | id = coding_categories[coding_priorities[i]].id; |
| 8349 | val = Fcons (make_number (id), Qnil); | 8735 | val = list1 (make_number (id)); |
| 8350 | break; | 8736 | break; |
| 8351 | } | 8737 | } |
| 8352 | } | 8738 | } |
| @@ -8363,7 +8749,7 @@ detect_coding_system (const unsigned char *src, | |||
| 8363 | found |= 1 << category; | 8749 | found |= 1 << category; |
| 8364 | id = coding_categories[category].id; | 8750 | id = coding_categories[category].id; |
| 8365 | if (id >= 0) | 8751 | if (id >= 0) |
| 8366 | val = Fcons (make_number (id), val); | 8752 | val = list1 (make_number (id)); |
| 8367 | } | 8753 | } |
| 8368 | } | 8754 | } |
| 8369 | for (i = coding_category_raw_text - 1; i >= 0; i--) | 8755 | for (i = coding_category_raw_text - 1; i >= 0; i--) |
| @@ -8388,7 +8774,7 @@ detect_coding_system (const unsigned char *src, | |||
| 8388 | this = coding_categories + coding_category_utf_8_sig; | 8774 | this = coding_categories + coding_category_utf_8_sig; |
| 8389 | else | 8775 | else |
| 8390 | this = coding_categories + coding_category_utf_8_nosig; | 8776 | this = coding_categories + coding_category_utf_8_nosig; |
| 8391 | val = Fcons (make_number (this->id), Qnil); | 8777 | val = list1 (make_number (this->id)); |
| 8392 | } | 8778 | } |
| 8393 | } | 8779 | } |
| 8394 | else if (base_category == coding_category_utf_16_auto) | 8780 | else if (base_category == coding_category_utf_16_auto) |
| @@ -8405,13 +8791,13 @@ detect_coding_system (const unsigned char *src, | |||
| 8405 | this = coding_categories + coding_category_utf_16_be_nosig; | 8791 | this = coding_categories + coding_category_utf_16_be_nosig; |
| 8406 | else | 8792 | else |
| 8407 | this = coding_categories + coding_category_utf_16_le_nosig; | 8793 | this = coding_categories + coding_category_utf_16_le_nosig; |
| 8408 | val = Fcons (make_number (this->id), Qnil); | 8794 | val = list1 (make_number (this->id)); |
| 8409 | } | 8795 | } |
| 8410 | } | 8796 | } |
| 8411 | else | 8797 | else |
| 8412 | { | 8798 | { |
| 8413 | detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs)); | 8799 | detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs)); |
| 8414 | val = Fcons (make_number (coding.id), Qnil); | 8800 | val = list1 (make_number (coding.id)); |
| 8415 | } | 8801 | } |
| 8416 | 8802 | ||
| 8417 | /* Then, detect eol-format if necessary. */ | 8803 | /* Then, detect eol-format if necessary. */ |
| @@ -8505,9 +8891,6 @@ highest priority. */) | |||
| 8505 | ptrdiff_t from, to; | 8891 | ptrdiff_t from, to; |
| 8506 | ptrdiff_t from_byte, to_byte; | 8892 | ptrdiff_t from_byte, to_byte; |
| 8507 | 8893 | ||
| 8508 | CHECK_NUMBER_COERCE_MARKER (start); | ||
| 8509 | CHECK_NUMBER_COERCE_MARKER (end); | ||
| 8510 | |||
| 8511 | validate_region (&start, &end); | 8894 | validate_region (&start, &end); |
| 8512 | from = XINT (start), to = XINT (end); | 8895 | from = XINT (start), to = XINT (end); |
| 8513 | from_byte = CHAR_TO_BYTE (from); | 8896 | from_byte = CHAR_TO_BYTE (from); |
| @@ -8549,7 +8932,7 @@ highest priority. */) | |||
| 8549 | } | 8932 | } |
| 8550 | 8933 | ||
| 8551 | 8934 | ||
| 8552 | static inline int | 8935 | static bool |
| 8553 | char_encodable_p (int c, Lisp_Object attrs) | 8936 | char_encodable_p (int c, Lisp_Object attrs) |
| 8554 | { | 8937 | { |
| 8555 | Lisp_Object tail; | 8938 | Lisp_Object tail; |
| @@ -8626,8 +9009,7 @@ DEFUN ("find-coding-systems-region-internal", | |||
| 8626 | Lisp_Object attrs; | 9009 | Lisp_Object attrs; |
| 8627 | 9010 | ||
| 8628 | attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0); | 9011 | attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0); |
| 8629 | if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs)) | 9012 | if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))) |
| 8630 | && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided)) | ||
| 8631 | { | 9013 | { |
| 8632 | ASET (attrs, coding_attr_trans_tbl, | 9014 | ASET (attrs, coding_attr_trans_tbl, |
| 8633 | get_translation_table (attrs, 1, NULL)); | 9015 | get_translation_table (attrs, 1, NULL)); |
| @@ -8721,7 +9103,7 @@ to the string. */) | |||
| 8721 | Lisp_Object positions; | 9103 | Lisp_Object positions; |
| 8722 | ptrdiff_t from, to; | 9104 | ptrdiff_t from, to; |
| 8723 | const unsigned char *p, *stop, *pend; | 9105 | const unsigned char *p, *stop, *pend; |
| 8724 | int ascii_compatible; | 9106 | bool ascii_compatible; |
| 8725 | 9107 | ||
| 8726 | setup_coding_system (Fcheck_coding_system (coding_system), &coding); | 9108 | setup_coding_system (Fcheck_coding_system (coding_system), &coding); |
| 8727 | attrs = CODING_ID_ATTRS (coding.id); | 9109 | attrs = CODING_ID_ATTRS (coding.id); |
| @@ -8886,7 +9268,7 @@ is nil. */) | |||
| 8886 | attrs = AREF (CODING_SYSTEM_SPEC (elt), 0); | 9268 | attrs = AREF (CODING_SYSTEM_SPEC (elt), 0); |
| 8887 | ASET (attrs, coding_attr_trans_tbl, | 9269 | ASET (attrs, coding_attr_trans_tbl, |
| 8888 | get_translation_table (attrs, 1, NULL)); | 9270 | get_translation_table (attrs, 1, NULL)); |
| 8889 | list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list); | 9271 | list = Fcons (list2 (elt, attrs), list); |
| 8890 | } | 9272 | } |
| 8891 | 9273 | ||
| 8892 | if (STRINGP (start)) | 9274 | if (STRINGP (start)) |
| @@ -8945,14 +9327,12 @@ is nil. */) | |||
| 8945 | static Lisp_Object | 9327 | static Lisp_Object |
| 8946 | code_convert_region (Lisp_Object start, Lisp_Object end, | 9328 | code_convert_region (Lisp_Object start, Lisp_Object end, |
| 8947 | Lisp_Object coding_system, Lisp_Object dst_object, | 9329 | Lisp_Object coding_system, Lisp_Object dst_object, |
| 8948 | int encodep, int norecord) | 9330 | bool encodep, bool norecord) |
| 8949 | { | 9331 | { |
| 8950 | struct coding_system coding; | 9332 | struct coding_system coding; |
| 8951 | ptrdiff_t from, from_byte, to, to_byte; | 9333 | ptrdiff_t from, from_byte, to, to_byte; |
| 8952 | Lisp_Object src_object; | 9334 | Lisp_Object src_object; |
| 8953 | 9335 | ||
| 8954 | CHECK_NUMBER_COERCE_MARKER (start); | ||
| 8955 | CHECK_NUMBER_COERCE_MARKER (end); | ||
| 8956 | if (NILP (coding_system)) | 9336 | if (NILP (coding_system)) |
| 8957 | coding_system = Qno_conversion; | 9337 | coding_system = Qno_conversion; |
| 8958 | else | 9338 | else |
| @@ -9033,7 +9413,8 @@ not fully specified.) */) | |||
| 9033 | 9413 | ||
| 9034 | Lisp_Object | 9414 | Lisp_Object |
| 9035 | code_convert_string (Lisp_Object string, Lisp_Object coding_system, | 9415 | code_convert_string (Lisp_Object string, Lisp_Object coding_system, |
| 9036 | Lisp_Object dst_object, int encodep, int nocopy, int norecord) | 9416 | Lisp_Object dst_object, bool encodep, bool nocopy, |
| 9417 | bool norecord) | ||
| 9037 | { | 9418 | { |
| 9038 | struct coding_system coding; | 9419 | struct coding_system coding; |
| 9039 | ptrdiff_t chars, bytes; | 9420 | ptrdiff_t chars, bytes; |
| @@ -9081,7 +9462,7 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system, | |||
| 9081 | 9462 | ||
| 9082 | Lisp_Object | 9463 | Lisp_Object |
| 9083 | code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system, | 9464 | code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system, |
| 9084 | int encodep) | 9465 | bool encodep) |
| 9085 | { | 9466 | { |
| 9086 | return code_convert_string (string, coding_system, Qt, encodep, 0, 1); | 9467 | return code_convert_string (string, coding_system, Qt, encodep, 0, 1); |
| 9087 | } | 9468 | } |
| @@ -9295,10 +9676,10 @@ DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_intern | |||
| 9295 | terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK; | 9676 | terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK; |
| 9296 | terminal_coding->src_multibyte = 1; | 9677 | terminal_coding->src_multibyte = 1; |
| 9297 | terminal_coding->dst_multibyte = 0; | 9678 | terminal_coding->dst_multibyte = 0; |
| 9298 | if (terminal_coding->common_flags & CODING_REQUIRE_ENCODING_MASK) | 9679 | tset_charset_list |
| 9299 | term->charset_list = coding_charset_list (terminal_coding); | 9680 | (term, (terminal_coding->common_flags & CODING_REQUIRE_ENCODING_MASK |
| 9300 | else | 9681 | ? coding_charset_list (terminal_coding) |
| 9301 | term->charset_list = Fcons (make_number (charset_ascii), Qnil); | 9682 | : list1 (make_number (charset_ascii)))); |
| 9302 | return Qnil; | 9683 | return Qnil; |
| 9303 | } | 9684 | } |
| 9304 | 9685 | ||
| @@ -9482,7 +9863,7 @@ usage: (set-coding-system-priority &rest coding-systems) */) | |||
| 9482 | (ptrdiff_t nargs, Lisp_Object *args) | 9863 | (ptrdiff_t nargs, Lisp_Object *args) |
| 9483 | { | 9864 | { |
| 9484 | ptrdiff_t i, j; | 9865 | ptrdiff_t i, j; |
| 9485 | int changed[coding_category_max]; | 9866 | bool changed[coding_category_max]; |
| 9486 | enum coding_category priorities[coding_category_max]; | 9867 | enum coding_category priorities[coding_category_max]; |
| 9487 | 9868 | ||
| 9488 | memset (changed, 0, sizeof changed); | 9869 | memset (changed, 0, sizeof changed); |
| @@ -9516,7 +9897,7 @@ usage: (set-coding-system-priority &rest coding-systems) */) | |||
| 9516 | && changed[coding_priorities[j]]) | 9897 | && changed[coding_priorities[j]]) |
| 9517 | j++; | 9898 | j++; |
| 9518 | if (j == coding_category_max) | 9899 | if (j == coding_category_max) |
| 9519 | abort (); | 9900 | emacs_abort (); |
| 9520 | priorities[i] = coding_priorities[j]; | 9901 | priorities[i] = coding_priorities[j]; |
| 9521 | } | 9902 | } |
| 9522 | 9903 | ||
| @@ -9567,11 +9948,11 @@ make_subsidiaries (Lisp_Object base) | |||
| 9567 | { | 9948 | { |
| 9568 | Lisp_Object subsidiaries; | 9949 | Lisp_Object subsidiaries; |
| 9569 | ptrdiff_t base_name_len = SBYTES (SYMBOL_NAME (base)); | 9950 | ptrdiff_t base_name_len = SBYTES (SYMBOL_NAME (base)); |
| 9570 | char *buf = (char *) alloca (base_name_len + 6); | 9951 | char *buf = alloca (base_name_len + 6); |
| 9571 | int i; | 9952 | int i; |
| 9572 | 9953 | ||
| 9573 | memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len); | 9954 | memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len); |
| 9574 | subsidiaries = Fmake_vector (make_number (3), Qnil); | 9955 | subsidiaries = make_uninit_vector (3); |
| 9575 | for (i = 0; i < 3; i++) | 9956 | for (i = 0; i < 3; i++) |
| 9576 | { | 9957 | { |
| 9577 | strcpy (buf + base_name_len, suffixes[i]); | 9958 | strcpy (buf + base_name_len, suffixes[i]); |
| @@ -9605,16 +9986,16 @@ usage: (define-coding-system-internal ...) */) | |||
| 9605 | 9986 | ||
| 9606 | name = args[coding_arg_name]; | 9987 | name = args[coding_arg_name]; |
| 9607 | CHECK_SYMBOL (name); | 9988 | CHECK_SYMBOL (name); |
| 9608 | CODING_ATTR_BASE_NAME (attrs) = name; | 9989 | ASET (attrs, coding_attr_base_name, name); |
| 9609 | 9990 | ||
| 9610 | val = args[coding_arg_mnemonic]; | 9991 | val = args[coding_arg_mnemonic]; |
| 9611 | if (! STRINGP (val)) | 9992 | if (! STRINGP (val)) |
| 9612 | CHECK_CHARACTER (val); | 9993 | CHECK_CHARACTER (val); |
| 9613 | CODING_ATTR_MNEMONIC (attrs) = val; | 9994 | ASET (attrs, coding_attr_mnemonic, val); |
| 9614 | 9995 | ||
| 9615 | coding_type = args[coding_arg_coding_type]; | 9996 | coding_type = args[coding_arg_coding_type]; |
| 9616 | CHECK_SYMBOL (coding_type); | 9997 | CHECK_SYMBOL (coding_type); |
| 9617 | CODING_ATTR_TYPE (attrs) = coding_type; | 9998 | ASET (attrs, coding_attr_type, coding_type); |
| 9618 | 9999 | ||
| 9619 | charset_list = args[coding_arg_charset_list]; | 10000 | charset_list = args[coding_arg_charset_list]; |
| 9620 | if (SYMBOLP (charset_list)) | 10001 | if (SYMBOLP (charset_list)) |
| @@ -9661,49 +10042,49 @@ usage: (define-coding-system-internal ...) */) | |||
| 9661 | max_charset_id = charset->id; | 10042 | max_charset_id = charset->id; |
| 9662 | } | 10043 | } |
| 9663 | } | 10044 | } |
| 9664 | CODING_ATTR_CHARSET_LIST (attrs) = charset_list; | 10045 | ASET (attrs, coding_attr_charset_list, charset_list); |
| 9665 | 10046 | ||
| 9666 | safe_charsets = make_uninit_string (max_charset_id + 1); | 10047 | safe_charsets = make_uninit_string (max_charset_id + 1); |
| 9667 | memset (SDATA (safe_charsets), 255, max_charset_id + 1); | 10048 | memset (SDATA (safe_charsets), 255, max_charset_id + 1); |
| 9668 | for (tail = charset_list; CONSP (tail); tail = XCDR (tail)) | 10049 | for (tail = charset_list; CONSP (tail); tail = XCDR (tail)) |
| 9669 | SSET (safe_charsets, XFASTINT (XCAR (tail)), 0); | 10050 | SSET (safe_charsets, XFASTINT (XCAR (tail)), 0); |
| 9670 | CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets; | 10051 | ASET (attrs, coding_attr_safe_charsets, safe_charsets); |
| 9671 | 10052 | ||
| 9672 | CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p]; | 10053 | ASET (attrs, coding_attr_ascii_compat, args[coding_arg_ascii_compatible_p]); |
| 9673 | 10054 | ||
| 9674 | val = args[coding_arg_decode_translation_table]; | 10055 | val = args[coding_arg_decode_translation_table]; |
| 9675 | if (! CHAR_TABLE_P (val) && ! CONSP (val)) | 10056 | if (! CHAR_TABLE_P (val) && ! CONSP (val)) |
| 9676 | CHECK_SYMBOL (val); | 10057 | CHECK_SYMBOL (val); |
| 9677 | CODING_ATTR_DECODE_TBL (attrs) = val; | 10058 | ASET (attrs, coding_attr_decode_tbl, val); |
| 9678 | 10059 | ||
| 9679 | val = args[coding_arg_encode_translation_table]; | 10060 | val = args[coding_arg_encode_translation_table]; |
| 9680 | if (! CHAR_TABLE_P (val) && ! CONSP (val)) | 10061 | if (! CHAR_TABLE_P (val) && ! CONSP (val)) |
| 9681 | CHECK_SYMBOL (val); | 10062 | CHECK_SYMBOL (val); |
| 9682 | CODING_ATTR_ENCODE_TBL (attrs) = val; | 10063 | ASET (attrs, coding_attr_encode_tbl, val); |
| 9683 | 10064 | ||
| 9684 | val = args[coding_arg_post_read_conversion]; | 10065 | val = args[coding_arg_post_read_conversion]; |
| 9685 | CHECK_SYMBOL (val); | 10066 | CHECK_SYMBOL (val); |
| 9686 | CODING_ATTR_POST_READ (attrs) = val; | 10067 | ASET (attrs, coding_attr_post_read, val); |
| 9687 | 10068 | ||
| 9688 | val = args[coding_arg_pre_write_conversion]; | 10069 | val = args[coding_arg_pre_write_conversion]; |
| 9689 | CHECK_SYMBOL (val); | 10070 | CHECK_SYMBOL (val); |
| 9690 | CODING_ATTR_PRE_WRITE (attrs) = val; | 10071 | ASET (attrs, coding_attr_pre_write, val); |
| 9691 | 10072 | ||
| 9692 | val = args[coding_arg_default_char]; | 10073 | val = args[coding_arg_default_char]; |
| 9693 | if (NILP (val)) | 10074 | if (NILP (val)) |
| 9694 | CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' '); | 10075 | ASET (attrs, coding_attr_default_char, make_number (' ')); |
| 9695 | else | 10076 | else |
| 9696 | { | 10077 | { |
| 9697 | CHECK_CHARACTER (val); | 10078 | CHECK_CHARACTER (val); |
| 9698 | CODING_ATTR_DEFAULT_CHAR (attrs) = val; | 10079 | ASET (attrs, coding_attr_default_char, val); |
| 9699 | } | 10080 | } |
| 9700 | 10081 | ||
| 9701 | val = args[coding_arg_for_unibyte]; | 10082 | val = args[coding_arg_for_unibyte]; |
| 9702 | CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt; | 10083 | ASET (attrs, coding_attr_for_unibyte, NILP (val) ? Qnil : Qt); |
| 9703 | 10084 | ||
| 9704 | val = args[coding_arg_plist]; | 10085 | val = args[coding_arg_plist]; |
| 9705 | CHECK_LIST (val); | 10086 | CHECK_LIST (val); |
| 9706 | CODING_ATTR_PLIST (attrs) = val; | 10087 | ASET (attrs, coding_attr_plist, val); |
| 9707 | 10088 | ||
| 9708 | if (EQ (coding_type, Qcharset)) | 10089 | if (EQ (coding_type, Qcharset)) |
| 9709 | { | 10090 | { |
| @@ -9728,7 +10109,7 @@ usage: (define-coding-system-internal ...) */) | |||
| 9728 | int idx = (dim - 1) * 4; | 10109 | int idx = (dim - 1) * 4; |
| 9729 | 10110 | ||
| 9730 | if (CHARSET_ASCII_COMPATIBLE_P (charset)) | 10111 | if (CHARSET_ASCII_COMPATIBLE_P (charset)) |
| 9731 | CODING_ATTR_ASCII_COMPAT (attrs) = Qt; | 10112 | ASET (attrs, coding_attr_ascii_compat, Qt); |
| 9732 | 10113 | ||
| 9733 | for (i = charset->code_space[idx]; | 10114 | for (i = charset->code_space[idx]; |
| 9734 | i <= charset->code_space[idx + 1]; i++) | 10115 | i <= charset->code_space[idx + 1]; i++) |
| @@ -9743,9 +10124,9 @@ usage: (define-coding-system-internal ...) */) | |||
| 9743 | { | 10124 | { |
| 9744 | dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp))); | 10125 | dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp))); |
| 9745 | if (dim < dim2) | 10126 | if (dim < dim2) |
| 9746 | tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil)); | 10127 | tmp = list2 (XCAR (tail), tmp); |
| 9747 | else | 10128 | else |
| 9748 | tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil)); | 10129 | tmp = list2 (tmp, XCAR (tail)); |
| 9749 | } | 10130 | } |
| 9750 | else | 10131 | else |
| 9751 | { | 10132 | { |
| @@ -9756,7 +10137,7 @@ usage: (define-coding-system-internal ...) */) | |||
| 9756 | break; | 10137 | break; |
| 9757 | } | 10138 | } |
| 9758 | if (NILP (tmp2)) | 10139 | if (NILP (tmp2)) |
| 9759 | tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil)); | 10140 | tmp = nconc2 (tmp, list1 (XCAR (tail))); |
| 9760 | else | 10141 | else |
| 9761 | { | 10142 | { |
| 9762 | XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2))); | 10143 | XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2))); |
| @@ -9790,11 +10171,11 @@ usage: (define-coding-system-internal ...) */) | |||
| 9790 | 10171 | ||
| 9791 | val = args[coding_arg_ccl_valids]; | 10172 | val = args[coding_arg_ccl_valids]; |
| 9792 | valids = Fmake_string (make_number (256), make_number (0)); | 10173 | valids = Fmake_string (make_number (256), make_number (0)); |
| 9793 | for (tail = val; !NILP (tail); tail = Fcdr (tail)) | 10174 | for (tail = val; CONSP (tail); tail = XCDR (tail)) |
| 9794 | { | 10175 | { |
| 9795 | int from, to; | 10176 | int from, to; |
| 9796 | 10177 | ||
| 9797 | val = Fcar (tail); | 10178 | val = XCAR (tail); |
| 9798 | if (INTEGERP (val)) | 10179 | if (INTEGERP (val)) |
| 9799 | { | 10180 | { |
| 9800 | if (! (0 <= XINT (val) && XINT (val) <= 255)) | 10181 | if (! (0 <= XINT (val) && XINT (val) <= 255)) |
| @@ -9826,7 +10207,7 @@ usage: (define-coding-system-internal ...) */) | |||
| 9826 | { | 10207 | { |
| 9827 | Lisp_Object bom, endian; | 10208 | Lisp_Object bom, endian; |
| 9828 | 10209 | ||
| 9829 | CODING_ATTR_ASCII_COMPAT (attrs) = Qnil; | 10210 | ASET (attrs, coding_attr_ascii_compat, Qnil); |
| 9830 | 10211 | ||
| 9831 | if (nargs < coding_arg_utf16_max) | 10212 | if (nargs < coding_arg_utf16_max) |
| 9832 | goto short_args; | 10213 | goto short_args; |
| @@ -9871,7 +10252,7 @@ usage: (define-coding-system-internal ...) */) | |||
| 9871 | CHECK_VECTOR (initial); | 10252 | CHECK_VECTOR (initial); |
| 9872 | for (i = 0; i < 4; i++) | 10253 | for (i = 0; i < 4; i++) |
| 9873 | { | 10254 | { |
| 9874 | val = Faref (initial, make_number (i)); | 10255 | val = AREF (initial, i); |
| 9875 | if (! NILP (val)) | 10256 | if (! NILP (val)) |
| 9876 | { | 10257 | { |
| 9877 | struct charset *charset; | 10258 | struct charset *charset; |
| @@ -9879,7 +10260,7 @@ usage: (define-coding-system-internal ...) */) | |||
| 9879 | CHECK_CHARSET_GET_CHARSET (val, charset); | 10260 | CHECK_CHARSET_GET_CHARSET (val, charset); |
| 9880 | ASET (initial, i, make_number (CHARSET_ID (charset))); | 10261 | ASET (initial, i, make_number (CHARSET_ID (charset))); |
| 9881 | if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset)) | 10262 | if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset)) |
| 9882 | CODING_ATTR_ASCII_COMPAT (attrs) = Qt; | 10263 | ASET (attrs, coding_attr_ascii_compat, Qt); |
| 9883 | } | 10264 | } |
| 9884 | else | 10265 | else |
| 9885 | ASET (initial, i, make_number (-1)); | 10266 | ASET (initial, i, make_number (-1)); |
| @@ -9891,12 +10272,12 @@ usage: (define-coding-system-internal ...) */) | |||
| 9891 | CHECK_NUMBER_CDR (reg_usage); | 10272 | CHECK_NUMBER_CDR (reg_usage); |
| 9892 | 10273 | ||
| 9893 | request = Fcopy_sequence (args[coding_arg_iso2022_request]); | 10274 | request = Fcopy_sequence (args[coding_arg_iso2022_request]); |
| 9894 | for (tail = request; ! NILP (tail); tail = Fcdr (tail)) | 10275 | for (tail = request; CONSP (tail); tail = XCDR (tail)) |
| 9895 | { | 10276 | { |
| 9896 | int id; | 10277 | int id; |
| 9897 | Lisp_Object tmp1; | 10278 | Lisp_Object tmp1; |
| 9898 | 10279 | ||
| 9899 | val = Fcar (tail); | 10280 | val = XCAR (tail); |
| 9900 | CHECK_CONS (val); | 10281 | CHECK_CONS (val); |
| 9901 | tmp1 = XCAR (val); | 10282 | tmp1 = XCAR (val); |
| 9902 | CHECK_CHARSET_GET_ID (tmp1, id); | 10283 | CHECK_CHARSET_GET_ID (tmp1, id); |
| @@ -9940,13 +10321,13 @@ usage: (define-coding-system-internal ...) */) | |||
| 9940 | } | 10321 | } |
| 9941 | if (category != coding_category_iso_8_1 | 10322 | if (category != coding_category_iso_8_1 |
| 9942 | && category != coding_category_iso_8_2) | 10323 | && category != coding_category_iso_8_2) |
| 9943 | CODING_ATTR_ASCII_COMPAT (attrs) = Qnil; | 10324 | ASET (attrs, coding_attr_ascii_compat, Qnil); |
| 9944 | } | 10325 | } |
| 9945 | else if (EQ (coding_type, Qemacs_mule)) | 10326 | else if (EQ (coding_type, Qemacs_mule)) |
| 9946 | { | 10327 | { |
| 9947 | if (EQ (args[coding_arg_charset_list], Qemacs_mule)) | 10328 | if (EQ (args[coding_arg_charset_list], Qemacs_mule)) |
| 9948 | ASET (attrs, coding_attr_emacs_mule_full, Qt); | 10329 | ASET (attrs, coding_attr_emacs_mule_full, Qt); |
| 9949 | CODING_ATTR_ASCII_COMPAT (attrs) = Qt; | 10330 | ASET (attrs, coding_attr_ascii_compat, Qt); |
| 9950 | category = coding_category_emacs_mule; | 10331 | category = coding_category_emacs_mule; |
| 9951 | } | 10332 | } |
| 9952 | else if (EQ (coding_type, Qshift_jis)) | 10333 | else if (EQ (coding_type, Qshift_jis)) |
| @@ -9963,7 +10344,7 @@ usage: (define-coding-system-internal ...) */) | |||
| 9963 | error ("Dimension of charset %s is not one", | 10344 | error ("Dimension of charset %s is not one", |
| 9964 | SDATA (SYMBOL_NAME (CHARSET_NAME (charset)))); | 10345 | SDATA (SYMBOL_NAME (CHARSET_NAME (charset)))); |
| 9965 | if (CHARSET_ASCII_COMPATIBLE_P (charset)) | 10346 | if (CHARSET_ASCII_COMPATIBLE_P (charset)) |
| 9966 | CODING_ATTR_ASCII_COMPAT (attrs) = Qt; | 10347 | ASET (attrs, coding_attr_ascii_compat, Qt); |
| 9967 | 10348 | ||
| 9968 | charset_list = XCDR (charset_list); | 10349 | charset_list = XCDR (charset_list); |
| 9969 | charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); | 10350 | charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); |
| @@ -10001,7 +10382,7 @@ usage: (define-coding-system-internal ...) */) | |||
| 10001 | error ("Dimension of charset %s is not one", | 10382 | error ("Dimension of charset %s is not one", |
| 10002 | SDATA (SYMBOL_NAME (CHARSET_NAME (charset)))); | 10383 | SDATA (SYMBOL_NAME (CHARSET_NAME (charset)))); |
| 10003 | if (CHARSET_ASCII_COMPATIBLE_P (charset)) | 10384 | if (CHARSET_ASCII_COMPATIBLE_P (charset)) |
| 10004 | CODING_ATTR_ASCII_COMPAT (attrs) = Qt; | 10385 | ASET (attrs, coding_attr_ascii_compat, Qt); |
| 10005 | 10386 | ||
| 10006 | charset_list = XCDR (charset_list); | 10387 | charset_list = XCDR (charset_list); |
| 10007 | charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); | 10388 | charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); |
| @@ -10015,7 +10396,7 @@ usage: (define-coding-system-internal ...) */) | |||
| 10015 | else if (EQ (coding_type, Qraw_text)) | 10396 | else if (EQ (coding_type, Qraw_text)) |
| 10016 | { | 10397 | { |
| 10017 | category = coding_category_raw_text; | 10398 | category = coding_category_raw_text; |
| 10018 | CODING_ATTR_ASCII_COMPAT (attrs) = Qt; | 10399 | ASET (attrs, coding_attr_ascii_compat, Qt); |
| 10019 | } | 10400 | } |
| 10020 | else if (EQ (coding_type, Qutf_8)) | 10401 | else if (EQ (coding_type, Qutf_8)) |
| 10021 | { | 10402 | { |
| @@ -10035,26 +10416,37 @@ usage: (define-coding-system-internal ...) */) | |||
| 10035 | } | 10416 | } |
| 10036 | ASET (attrs, coding_attr_utf_bom, bom); | 10417 | ASET (attrs, coding_attr_utf_bom, bom); |
| 10037 | if (NILP (bom)) | 10418 | if (NILP (bom)) |
| 10038 | CODING_ATTR_ASCII_COMPAT (attrs) = Qt; | 10419 | ASET (attrs, coding_attr_ascii_compat, Qt); |
| 10039 | 10420 | ||
| 10040 | category = (CONSP (bom) ? coding_category_utf_8_auto | 10421 | category = (CONSP (bom) ? coding_category_utf_8_auto |
| 10041 | : NILP (bom) ? coding_category_utf_8_nosig | 10422 | : NILP (bom) ? coding_category_utf_8_nosig |
| 10042 | : coding_category_utf_8_sig); | 10423 | : coding_category_utf_8_sig); |
| 10043 | } | 10424 | } |
| 10044 | else if (EQ (coding_type, Qundecided)) | 10425 | else if (EQ (coding_type, Qundecided)) |
| 10045 | category = coding_category_undecided; | 10426 | { |
| 10427 | if (nargs < coding_arg_undecided_max) | ||
| 10428 | goto short_args; | ||
| 10429 | ASET (attrs, coding_attr_undecided_inhibit_null_byte_detection, | ||
| 10430 | args[coding_arg_undecided_inhibit_null_byte_detection]); | ||
| 10431 | ASET (attrs, coding_attr_undecided_inhibit_iso_escape_detection, | ||
| 10432 | args[coding_arg_undecided_inhibit_iso_escape_detection]); | ||
| 10433 | ASET (attrs, coding_attr_undecided_prefer_utf_8, | ||
| 10434 | args[coding_arg_undecided_prefer_utf_8]); | ||
| 10435 | category = coding_category_undecided; | ||
| 10436 | } | ||
| 10046 | else | 10437 | else |
| 10047 | error ("Invalid coding system type: %s", | 10438 | error ("Invalid coding system type: %s", |
| 10048 | SDATA (SYMBOL_NAME (coding_type))); | 10439 | SDATA (SYMBOL_NAME (coding_type))); |
| 10049 | 10440 | ||
| 10050 | CODING_ATTR_CATEGORY (attrs) = make_number (category); | 10441 | ASET (attrs, coding_attr_category, make_number (category)); |
| 10051 | CODING_ATTR_PLIST (attrs) | 10442 | ASET (attrs, coding_attr_plist, |
| 10052 | = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category), | 10443 | Fcons (QCcategory, |
| 10053 | CODING_ATTR_PLIST (attrs))); | 10444 | Fcons (AREF (Vcoding_category_table, category), |
| 10054 | CODING_ATTR_PLIST (attrs) | 10445 | CODING_ATTR_PLIST (attrs)))); |
| 10055 | = Fcons (QCascii_compatible_p, | 10446 | ASET (attrs, coding_attr_plist, |
| 10056 | Fcons (CODING_ATTR_ASCII_COMPAT (attrs), | 10447 | Fcons (QCascii_compatible_p, |
| 10057 | CODING_ATTR_PLIST (attrs))); | 10448 | Fcons (CODING_ATTR_ASCII_COMPAT (attrs), |
| 10449 | CODING_ATTR_PLIST (attrs)))); | ||
| 10058 | 10450 | ||
| 10059 | eol_type = args[coding_arg_eol_type]; | 10451 | eol_type = args[coding_arg_eol_type]; |
| 10060 | if (! NILP (eol_type) | 10452 | if (! NILP (eol_type) |
| @@ -10063,7 +10455,7 @@ usage: (define-coding-system-internal ...) */) | |||
| 10063 | && ! EQ (eol_type, Qmac)) | 10455 | && ! EQ (eol_type, Qmac)) |
| 10064 | error ("Invalid eol-type"); | 10456 | error ("Invalid eol-type"); |
| 10065 | 10457 | ||
| 10066 | aliases = Fcons (name, Qnil); | 10458 | aliases = list1 (name); |
| 10067 | 10459 | ||
| 10068 | if (NILP (eol_type)) | 10460 | if (NILP (eol_type)) |
| 10069 | { | 10461 | { |
| @@ -10073,9 +10465,10 @@ usage: (define-coding-system-internal ...) */) | |||
| 10073 | Lisp_Object this_spec, this_name, this_aliases, this_eol_type; | 10465 | Lisp_Object this_spec, this_name, this_aliases, this_eol_type; |
| 10074 | 10466 | ||
| 10075 | this_name = AREF (eol_type, i); | 10467 | this_name = AREF (eol_type, i); |
| 10076 | this_aliases = Fcons (this_name, Qnil); | 10468 | this_aliases = list1 (this_name); |
| 10077 | this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac); | 10469 | this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac); |
| 10078 | this_spec = Fmake_vector (make_number (3), attrs); | 10470 | this_spec = make_uninit_vector (3); |
| 10471 | ASET (this_spec, 0, attrs); | ||
| 10079 | ASET (this_spec, 1, this_aliases); | 10472 | ASET (this_spec, 1, this_aliases); |
| 10080 | ASET (this_spec, 2, this_eol_type); | 10473 | ASET (this_spec, 2, this_eol_type); |
| 10081 | Fputhash (this_name, this_spec, Vcoding_system_hash_table); | 10474 | Fputhash (this_name, this_spec, Vcoding_system_hash_table); |
| @@ -10088,7 +10481,8 @@ usage: (define-coding-system-internal ...) */) | |||
| 10088 | } | 10481 | } |
| 10089 | } | 10482 | } |
| 10090 | 10483 | ||
| 10091 | spec_vec = Fmake_vector (make_number (3), attrs); | 10484 | spec_vec = make_uninit_vector (3); |
| 10485 | ASET (spec_vec, 0, attrs); | ||
| 10092 | ASET (spec_vec, 1, aliases); | 10486 | ASET (spec_vec, 1, aliases); |
| 10093 | ASET (spec_vec, 2, eol_type); | 10487 | ASET (spec_vec, 2, eol_type); |
| 10094 | 10488 | ||
| @@ -10128,7 +10522,7 @@ DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put, | |||
| 10128 | { | 10522 | { |
| 10129 | if (! STRINGP (val)) | 10523 | if (! STRINGP (val)) |
| 10130 | CHECK_CHARACTER (val); | 10524 | CHECK_CHARACTER (val); |
| 10131 | CODING_ATTR_MNEMONIC (attrs) = val; | 10525 | ASET (attrs, coding_attr_mnemonic, val); |
| 10132 | } | 10526 | } |
| 10133 | else if (EQ (prop, QCdefault_char)) | 10527 | else if (EQ (prop, QCdefault_char)) |
| 10134 | { | 10528 | { |
| @@ -10136,37 +10530,37 @@ DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put, | |||
| 10136 | val = make_number (' '); | 10530 | val = make_number (' '); |
| 10137 | else | 10531 | else |
| 10138 | CHECK_CHARACTER (val); | 10532 | CHECK_CHARACTER (val); |
| 10139 | CODING_ATTR_DEFAULT_CHAR (attrs) = val; | 10533 | ASET (attrs, coding_attr_default_char, val); |
| 10140 | } | 10534 | } |
| 10141 | else if (EQ (prop, QCdecode_translation_table)) | 10535 | else if (EQ (prop, QCdecode_translation_table)) |
| 10142 | { | 10536 | { |
| 10143 | if (! CHAR_TABLE_P (val) && ! CONSP (val)) | 10537 | if (! CHAR_TABLE_P (val) && ! CONSP (val)) |
| 10144 | CHECK_SYMBOL (val); | 10538 | CHECK_SYMBOL (val); |
| 10145 | CODING_ATTR_DECODE_TBL (attrs) = val; | 10539 | ASET (attrs, coding_attr_decode_tbl, val); |
| 10146 | } | 10540 | } |
| 10147 | else if (EQ (prop, QCencode_translation_table)) | 10541 | else if (EQ (prop, QCencode_translation_table)) |
| 10148 | { | 10542 | { |
| 10149 | if (! CHAR_TABLE_P (val) && ! CONSP (val)) | 10543 | if (! CHAR_TABLE_P (val) && ! CONSP (val)) |
| 10150 | CHECK_SYMBOL (val); | 10544 | CHECK_SYMBOL (val); |
| 10151 | CODING_ATTR_ENCODE_TBL (attrs) = val; | 10545 | ASET (attrs, coding_attr_encode_tbl, val); |
| 10152 | } | 10546 | } |
| 10153 | else if (EQ (prop, QCpost_read_conversion)) | 10547 | else if (EQ (prop, QCpost_read_conversion)) |
| 10154 | { | 10548 | { |
| 10155 | CHECK_SYMBOL (val); | 10549 | CHECK_SYMBOL (val); |
| 10156 | CODING_ATTR_POST_READ (attrs) = val; | 10550 | ASET (attrs, coding_attr_post_read, val); |
| 10157 | } | 10551 | } |
| 10158 | else if (EQ (prop, QCpre_write_conversion)) | 10552 | else if (EQ (prop, QCpre_write_conversion)) |
| 10159 | { | 10553 | { |
| 10160 | CHECK_SYMBOL (val); | 10554 | CHECK_SYMBOL (val); |
| 10161 | CODING_ATTR_PRE_WRITE (attrs) = val; | 10555 | ASET (attrs, coding_attr_pre_write, val); |
| 10162 | } | 10556 | } |
| 10163 | else if (EQ (prop, QCascii_compatible_p)) | 10557 | else if (EQ (prop, QCascii_compatible_p)) |
| 10164 | { | 10558 | { |
| 10165 | CODING_ATTR_ASCII_COMPAT (attrs) = val; | 10559 | ASET (attrs, coding_attr_ascii_compat, val); |
| 10166 | } | 10560 | } |
| 10167 | 10561 | ||
| 10168 | CODING_ATTR_PLIST (attrs) | 10562 | ASET (attrs, coding_attr_plist, |
| 10169 | = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val); | 10563 | Fplist_put (CODING_ATTR_PLIST (attrs), prop, val)); |
| 10170 | return val; | 10564 | return val; |
| 10171 | } | 10565 | } |
| 10172 | 10566 | ||
| @@ -10186,7 +10580,7 @@ DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias, | |||
| 10186 | list. */ | 10580 | list. */ |
| 10187 | while (!NILP (XCDR (aliases))) | 10581 | while (!NILP (XCDR (aliases))) |
| 10188 | aliases = XCDR (aliases); | 10582 | aliases = XCDR (aliases); |
| 10189 | XSETCDR (aliases, Fcons (alias, Qnil)); | 10583 | XSETCDR (aliases, list1 (alias)); |
| 10190 | 10584 | ||
| 10191 | eol_type = AREF (spec, 2); | 10585 | eol_type = AREF (spec, 2); |
| 10192 | if (VECTORP (eol_type)) | 10586 | if (VECTORP (eol_type)) |
| @@ -10349,7 +10743,7 @@ syms_of_coding (void) | |||
| 10349 | Vcode_conversion_reused_workbuf = Qnil; | 10743 | Vcode_conversion_reused_workbuf = Qnil; |
| 10350 | 10744 | ||
| 10351 | staticpro (&Vcode_conversion_workbuf_name); | 10745 | staticpro (&Vcode_conversion_workbuf_name); |
| 10352 | Vcode_conversion_workbuf_name = make_pure_c_string (" *code-conversion-work*"); | 10746 | Vcode_conversion_workbuf_name = build_pure_c_string (" *code-conversion-work*"); |
| 10353 | 10747 | ||
| 10354 | reused_workbuf_in_use = 0; | 10748 | reused_workbuf_in_use = 0; |
| 10355 | 10749 | ||
| @@ -10385,6 +10779,7 @@ syms_of_coding (void) | |||
| 10385 | DEFSYM (Qeol_type, "eol-type"); | 10779 | DEFSYM (Qeol_type, "eol-type"); |
| 10386 | DEFSYM (Qunix, "unix"); | 10780 | DEFSYM (Qunix, "unix"); |
| 10387 | DEFSYM (Qdos, "dos"); | 10781 | DEFSYM (Qdos, "dos"); |
| 10782 | DEFSYM (Qmac, "mac"); | ||
| 10388 | 10783 | ||
| 10389 | DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system"); | 10784 | DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system"); |
| 10390 | DEFSYM (Qpost_read_conversion, "post-read-conversion"); | 10785 | DEFSYM (Qpost_read_conversion, "post-read-conversion"); |
| @@ -10399,6 +10794,11 @@ syms_of_coding (void) | |||
| 10399 | DEFSYM (Qutf_8, "utf-8"); | 10794 | DEFSYM (Qutf_8, "utf-8"); |
| 10400 | DEFSYM (Qutf_8_emacs, "utf-8-emacs"); | 10795 | DEFSYM (Qutf_8_emacs, "utf-8-emacs"); |
| 10401 | 10796 | ||
| 10797 | #if defined (WINDOWSNT) || defined (CYGWIN) | ||
| 10798 | /* No, not utf-16-le: that one has a BOM. */ | ||
| 10799 | DEFSYM (Qutf_16le, "utf-16le"); | ||
| 10800 | #endif | ||
| 10801 | |||
| 10402 | DEFSYM (Qutf_16, "utf-16"); | 10802 | DEFSYM (Qutf_16, "utf-16"); |
| 10403 | DEFSYM (Qbig, "big"); | 10803 | DEFSYM (Qbig, "big"); |
| 10404 | DEFSYM (Qlittle, "little"); | 10804 | DEFSYM (Qlittle, "little"); |
| @@ -10410,14 +10810,9 @@ syms_of_coding (void) | |||
| 10410 | 10810 | ||
| 10411 | DEFSYM (Qcoding_system_error, "coding-system-error"); | 10811 | DEFSYM (Qcoding_system_error, "coding-system-error"); |
| 10412 | Fput (Qcoding_system_error, Qerror_conditions, | 10812 | Fput (Qcoding_system_error, Qerror_conditions, |
| 10413 | pure_cons (Qcoding_system_error, pure_cons (Qerror, Qnil))); | 10813 | listn (CONSTYPE_PURE, 2, Qcoding_system_error, Qerror)); |
| 10414 | Fput (Qcoding_system_error, Qerror_message, | 10814 | Fput (Qcoding_system_error, Qerror_message, |
| 10415 | make_pure_c_string ("Invalid coding system")); | 10815 | build_pure_c_string ("Invalid coding system")); |
| 10416 | |||
| 10417 | /* Intern this now in case it isn't already done. | ||
| 10418 | Setting this variable twice is harmless. | ||
| 10419 | But don't staticpro it here--that is done in alloc.c. */ | ||
| 10420 | Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots"); | ||
| 10421 | 10816 | ||
| 10422 | DEFSYM (Qtranslation_table, "translation-table"); | 10817 | DEFSYM (Qtranslation_table, "translation-table"); |
| 10423 | Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2)); | 10818 | Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2)); |
| @@ -10487,10 +10882,8 @@ syms_of_coding (void) | |||
| 10487 | intern_c_string ("coding-category-undecided")); | 10882 | intern_c_string ("coding-category-undecided")); |
| 10488 | 10883 | ||
| 10489 | DEFSYM (Qinsufficient_source, "insufficient-source"); | 10884 | DEFSYM (Qinsufficient_source, "insufficient-source"); |
| 10490 | DEFSYM (Qinconsistent_eol, "inconsistent-eol"); | ||
| 10491 | DEFSYM (Qinvalid_source, "invalid-source"); | 10885 | DEFSYM (Qinvalid_source, "invalid-source"); |
| 10492 | DEFSYM (Qinterrupted, "interrupted"); | 10886 | DEFSYM (Qinterrupted, "interrupted"); |
| 10493 | DEFSYM (Qinsufficient_memory, "insufficient-memory"); | ||
| 10494 | DEFSYM (Qcoding_system_define_form, "coding-system-define-form"); | 10887 | DEFSYM (Qcoding_system_define_form, "coding-system-define-form"); |
| 10495 | 10888 | ||
| 10496 | defsubr (&Scoding_system_p); | 10889 | defsubr (&Scoding_system_p); |
| @@ -10559,7 +10952,7 @@ Don't modify this variable directly, but use `set-coding-system-priority'. */); | |||
| 10559 | Vcoding_category_list = Qnil; | 10952 | Vcoding_category_list = Qnil; |
| 10560 | for (i = coding_category_max - 1; i >= 0; i--) | 10953 | for (i = coding_category_max - 1; i >= 0; i--) |
| 10561 | Vcoding_category_list | 10954 | Vcoding_category_list |
| 10562 | = Fcons (XVECTOR (Vcoding_category_table)->contents[i], | 10955 | = Fcons (AREF (Vcoding_category_table, i), |
| 10563 | Vcoding_category_list); | 10956 | Vcoding_category_list); |
| 10564 | } | 10957 | } |
| 10565 | 10958 | ||
| @@ -10685,22 +11078,22 @@ Also used for decoding keyboard input on X Window system. */); | |||
| 10685 | DEFVAR_LISP ("eol-mnemonic-unix", eol_mnemonic_unix, | 11078 | DEFVAR_LISP ("eol-mnemonic-unix", eol_mnemonic_unix, |
| 10686 | doc: /* | 11079 | doc: /* |
| 10687 | *String displayed in mode line for UNIX-like (LF) end-of-line format. */); | 11080 | *String displayed in mode line for UNIX-like (LF) end-of-line format. */); |
| 10688 | eol_mnemonic_unix = make_pure_c_string (":"); | 11081 | eol_mnemonic_unix = build_pure_c_string (":"); |
| 10689 | 11082 | ||
| 10690 | DEFVAR_LISP ("eol-mnemonic-dos", eol_mnemonic_dos, | 11083 | DEFVAR_LISP ("eol-mnemonic-dos", eol_mnemonic_dos, |
| 10691 | doc: /* | 11084 | doc: /* |
| 10692 | *String displayed in mode line for DOS-like (CRLF) end-of-line format. */); | 11085 | *String displayed in mode line for DOS-like (CRLF) end-of-line format. */); |
| 10693 | eol_mnemonic_dos = make_pure_c_string ("\\"); | 11086 | eol_mnemonic_dos = build_pure_c_string ("\\"); |
| 10694 | 11087 | ||
| 10695 | DEFVAR_LISP ("eol-mnemonic-mac", eol_mnemonic_mac, | 11088 | DEFVAR_LISP ("eol-mnemonic-mac", eol_mnemonic_mac, |
| 10696 | doc: /* | 11089 | doc: /* |
| 10697 | *String displayed in mode line for MAC-like (CR) end-of-line format. */); | 11090 | *String displayed in mode line for MAC-like (CR) end-of-line format. */); |
| 10698 | eol_mnemonic_mac = make_pure_c_string ("/"); | 11091 | eol_mnemonic_mac = build_pure_c_string ("/"); |
| 10699 | 11092 | ||
| 10700 | DEFVAR_LISP ("eol-mnemonic-undecided", eol_mnemonic_undecided, | 11093 | DEFVAR_LISP ("eol-mnemonic-undecided", eol_mnemonic_undecided, |
| 10701 | doc: /* | 11094 | doc: /* |
| 10702 | *String displayed in mode line when end-of-line format is not yet determined. */); | 11095 | *String displayed in mode line when end-of-line format is not yet determined. */); |
| 10703 | eol_mnemonic_undecided = make_pure_c_string (":"); | 11096 | eol_mnemonic_undecided = build_pure_c_string (":"); |
| 10704 | 11097 | ||
| 10705 | DEFVAR_LISP ("enable-character-translation", Venable_character_translation, | 11098 | DEFVAR_LISP ("enable-character-translation", Venable_character_translation, |
| 10706 | doc: /* | 11099 | doc: /* |
| @@ -10791,7 +11184,7 @@ reading if you suppress escape sequence detection. | |||
| 10791 | 11184 | ||
| 10792 | The other way to read escape sequences in a file without decoding is | 11185 | The other way to read escape sequences in a file without decoding is |
| 10793 | to explicitly specify some coding system that doesn't use ISO-2022 | 11186 | to explicitly specify some coding system that doesn't use ISO-2022 |
| 10794 | escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */); | 11187 | escape sequence (e.g., `latin-1') on reading by \\[universal-coding-system-argument]. */); |
| 10795 | inhibit_iso_escape_detection = 0; | 11188 | inhibit_iso_escape_detection = 0; |
| 10796 | 11189 | ||
| 10797 | DEFVAR_BOOL ("inhibit-null-byte-detection", | 11190 | DEFVAR_BOOL ("inhibit-null-byte-detection", |
| @@ -10807,6 +11200,11 @@ from GNU Find and GNU Grep. Emacs will then ignore the null bytes and | |||
| 10807 | decode text as usual. */); | 11200 | decode text as usual. */); |
| 10808 | inhibit_null_byte_detection = 0; | 11201 | inhibit_null_byte_detection = 0; |
| 10809 | 11202 | ||
| 11203 | DEFVAR_BOOL ("disable-ascii-optimization", disable_ascii_optimization, | ||
| 11204 | doc: /* If non-nil, Emacs does not optimize code decoder for ASCII files. | ||
| 11205 | Internal use only. Removed after the experimental optimizer gets stable. */); | ||
| 11206 | disable_ascii_optimization = 0; | ||
| 11207 | |||
| 10810 | DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input, | 11208 | DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input, |
| 10811 | doc: /* Char table for translating self-inserting characters. | 11209 | doc: /* Char table for translating self-inserting characters. |
| 10812 | This is applied to the result of input methods, not their input. | 11210 | This is applied to the result of input methods, not their input. |
| @@ -10818,11 +11216,11 @@ internal character representation. */); | |||
| 10818 | Vtranslation_table_for_input = Qnil; | 11216 | Vtranslation_table_for_input = Qnil; |
| 10819 | 11217 | ||
| 10820 | { | 11218 | { |
| 10821 | Lisp_Object args[coding_arg_max]; | 11219 | Lisp_Object args[coding_arg_undecided_max]; |
| 10822 | Lisp_Object plist[16]; | 11220 | Lisp_Object plist[16]; |
| 10823 | int i; | 11221 | int i; |
| 10824 | 11222 | ||
| 10825 | for (i = 0; i < coding_arg_max; i++) | 11223 | for (i = 0; i < coding_arg_undecided_max; i++) |
| 10826 | args[i] = Qnil; | 11224 | args[i] = Qnil; |
| 10827 | 11225 | ||
| 10828 | plist[0] = intern_c_string (":name"); | 11226 | plist[0] = intern_c_string (":name"); |
| @@ -10838,7 +11236,7 @@ internal character representation. */); | |||
| 10838 | plist[10] = intern_c_string (":for-unibyte"); | 11236 | plist[10] = intern_c_string (":for-unibyte"); |
| 10839 | plist[11] = args[coding_arg_for_unibyte] = Qt; | 11237 | plist[11] = args[coding_arg_for_unibyte] = Qt; |
| 10840 | plist[12] = intern_c_string (":docstring"); | 11238 | plist[12] = intern_c_string (":docstring"); |
| 10841 | plist[13] = make_pure_c_string ("Do no conversion.\n\ | 11239 | plist[13] = build_pure_c_string ("Do no conversion.\n\ |
| 10842 | \n\ | 11240 | \n\ |
| 10843 | When you visit a file with this coding, the file is read into a\n\ | 11241 | When you visit a file with this coding, the file is read into a\n\ |
| 10844 | unibyte buffer as is, thus each byte of a file is treated as a\n\ | 11242 | unibyte buffer as is, thus each byte of a file is treated as a\n\ |
| @@ -10856,10 +11254,12 @@ character."); | |||
| 10856 | plist[8] = intern_c_string (":charset-list"); | 11254 | plist[8] = intern_c_string (":charset-list"); |
| 10857 | plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil); | 11255 | plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil); |
| 10858 | plist[11] = args[coding_arg_for_unibyte] = Qnil; | 11256 | plist[11] = args[coding_arg_for_unibyte] = Qnil; |
| 10859 | plist[13] = make_pure_c_string ("No conversion on encoding, automatic conversion on decoding."); | 11257 | plist[13] = build_pure_c_string ("No conversion on encoding, automatic conversion on decoding."); |
| 10860 | plist[15] = args[coding_arg_eol_type] = Qnil; | 11258 | plist[15] = args[coding_arg_eol_type] = Qnil; |
| 10861 | args[coding_arg_plist] = Flist (16, plist); | 11259 | args[coding_arg_plist] = Flist (16, plist); |
| 10862 | Fdefine_coding_system_internal (coding_arg_max, args); | 11260 | args[coding_arg_undecided_inhibit_null_byte_detection] = make_number (0); |
| 11261 | args[coding_arg_undecided_inhibit_iso_escape_detection] = make_number (0); | ||
| 11262 | Fdefine_coding_system_internal (coding_arg_undecided_max, args); | ||
| 10863 | } | 11263 | } |
| 10864 | 11264 | ||
| 10865 | setup_coding_system (Qno_conversion, &safe_terminal_coding); | 11265 | setup_coding_system (Qno_conversion, &safe_terminal_coding); |