aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenichi Handa2000-07-27 06:01:19 +0000
committerKenichi Handa2000-07-27 06:01:19 +0000
commit05e6f5dcf672b220bd759beded0cae38ceb8a10b (patch)
tree7329ea76197002e26401ff9a314285ebaaed3b9a
parentabbe1b451d5b8416bbcc75db5d5bfa1dc9e79b21 (diff)
downloademacs-05e6f5dcf672b220bd759beded0cae38ceb8a10b.tar.gz
emacs-05e6f5dcf672b220bd759beded0cae38ceb8a10b.zip
(Qsafe_charsets): This variable deleted.
(Qsafe_chars, Vchar_coding_system_table, Qchar_coding_system): New variables. (coding_safe_chars): New function. (CODING_SAFE_CHAR_P): New macro. (CHARSET_OK): New arg C. Call CODING_SAFE_CHAR_P instead of checking safe_charsets member of the coding system. Caller changed. (detect_coding_iso2022): New local variable safe_chars. (DECODE_DESIGNATION): Call CODING_SAFE_CHAR_P instead of checking safe_charsets member of the coding system. (decode_coding_iso2022): New local variable safe_chars. (ENCODE_ISO_CHARACTER_DIMENSION1): Don't check unsafe chars here. (ENCODE_ISO_CHARACTER_DIMENSION2): Likewise. (ENCODE_ISO_CHARACTER): Arguments changed. Caller changed. (ENCODE_UNSAFE_CHARACTER): New macro. (encode_coding_iso2022): New local variable safe_chars. Check unsafe chars. (setup_coding_system): Delete the code to initialize coding->safe_charses (intersection, find_safe_codings): New functions. (Ffind_coding_systems_region_internal): New function. (syms_of_coding): Defsubr it. Initialize Qsafe_chars, Qsafe_cding_system. Make Vchar_coding_system_table a Lisp variable and initialize it.
-rw-r--r--src/coding.c354
1 files changed, 267 insertions, 87 deletions
diff --git a/src/coding.c b/src/coding.c
index cc636af0ba0..d67e07687cd 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -337,7 +337,7 @@ Lisp_Object Qbuffer_file_coding_system;
337Lisp_Object Qpost_read_conversion, Qpre_write_conversion; 337Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
338Lisp_Object Qno_conversion, Qundecided; 338Lisp_Object Qno_conversion, Qundecided;
339Lisp_Object Qcoding_system_history; 339Lisp_Object Qcoding_system_history;
340Lisp_Object Qsafe_charsets; 340Lisp_Object Qsafe_chars;
341Lisp_Object Qvalid_codes; 341Lisp_Object Qvalid_codes;
342 342
343extern Lisp_Object Qinsert_file_contents, Qwrite_region; 343extern Lisp_Object Qinsert_file_contents, Qwrite_region;
@@ -471,6 +471,28 @@ Lisp_Object Vdefault_process_coding_system;
471 to avoid infinite recursive call. */ 471 to avoid infinite recursive call. */
472static int inhibit_pre_post_conversion; 472static int inhibit_pre_post_conversion;
473 473
474/* Char-table containing safe coding systems of each character. */
475Lisp_Object Vchar_coding_system_table;
476Lisp_Object Qchar_coding_system;
477
478/* Return `safe-chars' property of coding system CODING. Don't check
479 validity of CODING. */
480
481Lisp_Object
482coding_safe_chars (coding)
483 struct coding_system *coding;
484{
485 Lisp_Object coding_spec, plist, safe_chars;
486
487 coding_spec = Fget (coding->symbol, Qcoding_system);
488 plist = XVECTOR (coding_spec)->contents[3];
489 safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
490 return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
491}
492
493#define CODING_SAFE_CHAR_P(safe_chars, c) \
494 (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
495
474 496
475/*** 2. Emacs internal format (emacs-mule) handlers ***/ 497/*** 2. Emacs internal format (emacs-mule) handlers ***/
476 498
@@ -797,12 +819,14 @@ decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
797 819
798enum iso_code_class_type iso_code_class[256]; 820enum iso_code_class_type iso_code_class[256];
799 821
800#define CHARSET_OK(idx, charset) \ 822#define CHARSET_OK(idx, charset, c) \
801 (coding_system_table[idx] \ 823 (coding_system_table[idx] \
802 && (coding_system_table[idx]->safe_charsets[charset] \ 824 && (charset == CHARSET_ASCII \
803 || (CODING_SPEC_ISO_REQUESTED_DESIGNATION \ 825 || (safe_chars = coding_safe_chars (coding_system_table[idx]), \
804 (coding_system_table[idx], charset) \ 826 CODING_SAFE_CHAR_P (safe_chars, c))) \
805 != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))) 827 && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
828 charset) \
829 != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
806 830
807#define SHIFT_OUT_OK(idx) \ 831#define SHIFT_OUT_OK(idx) \
808 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0) 832 (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
@@ -830,6 +854,7 @@ detect_coding_iso2022 (src, src_end)
830 /* Dummy for ONE_MORE_BYTE. */ 854 /* Dummy for ONE_MORE_BYTE. */
831 struct coding_system dummy_coding; 855 struct coding_system dummy_coding;
832 struct coding_system *coding = &dummy_coding; 856 struct coding_system *coding = &dummy_coding;
857 Lisp_Object safe_chars;
833 858
834 reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1; 859 reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
835 while (mask && src < src_end) 860 while (mask && src < src_end)
@@ -890,19 +915,20 @@ detect_coding_iso2022 (src, src_end)
890 915
891 /* We found a valid designation sequence for CHARSET. */ 916 /* We found a valid designation sequence for CHARSET. */
892 mask &= ~CODING_CATEGORY_MASK_ISO_8BIT; 917 mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
893 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset)) 918 c = MAKE_CHAR (charset, 0, 0);
919 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
894 mask_found |= CODING_CATEGORY_MASK_ISO_7; 920 mask_found |= CODING_CATEGORY_MASK_ISO_7;
895 else 921 else
896 mask &= ~CODING_CATEGORY_MASK_ISO_7; 922 mask &= ~CODING_CATEGORY_MASK_ISO_7;
897 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset)) 923 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
898 mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT; 924 mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
899 else 925 else
900 mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT; 926 mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
901 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset)) 927 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
902 mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE; 928 mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
903 else 929 else
904 mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE; 930 mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
905 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset)) 931 if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
906 mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE; 932 mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
907 else 933 else
908 mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE; 934 mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
@@ -1042,16 +1068,17 @@ detect_coding_iso2022 (src, src_end)
1042/* Set designation state into CODING. */ 1068/* Set designation state into CODING. */
1043#define DECODE_DESIGNATION(reg, dimension, chars, final_char) \ 1069#define DECODE_DESIGNATION(reg, dimension, chars, final_char) \
1044 do { \ 1070 do { \
1045 int charset; \ 1071 int charset, c; \
1046 \ 1072 \
1047 if (final_char < '0' || final_char >= 128) \ 1073 if (final_char < '0' || final_char >= 128) \
1048 goto label_invalid_code; \ 1074 goto label_invalid_code; \
1049 charset = ISO_CHARSET_TABLE (make_number (dimension), \ 1075 charset = ISO_CHARSET_TABLE (make_number (dimension), \
1050 make_number (chars), \ 1076 make_number (chars), \
1051 make_number (final_char)); \ 1077 make_number (final_char)); \
1078 c = MAKE_CHAR (charset, 0, 0); \
1052 if (charset >= 0 \ 1079 if (charset >= 0 \
1053 && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \ 1080 && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
1054 || coding->safe_charsets[charset])) \ 1081 || CODING_SAFE_CHAR_P (safe_chars, c))) \
1055 { \ 1082 { \
1056 if (coding->spec.iso2022.last_invalid_designation_register == 0 \ 1083 if (coding->spec.iso2022.last_invalid_designation_register == 0 \
1057 && reg == 0 \ 1084 && reg == 0 \
@@ -1238,6 +1265,9 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1238 unsigned char *src_base; 1265 unsigned char *src_base;
1239 int c, charset; 1266 int c, charset;
1240 Lisp_Object translation_table; 1267 Lisp_Object translation_table;
1268 Lisp_Object safe_chars;
1269
1270 safe_chars = coding_safe_chars (coding);
1241 1271
1242 if (NILP (Venable_character_translation)) 1272 if (NILP (Venable_character_translation))
1243 translation_table = Qnil; 1273 translation_table = Qnil;
@@ -1684,16 +1714,6 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1684 *dst++ = c1 | 0x80; \ 1714 *dst++ = c1 | 0x80; \
1685 break; \ 1715 break; \
1686 } \ 1716 } \
1687 else if (coding->flags & CODING_FLAG_ISO_SAFE \
1688 && !coding->safe_charsets[charset]) \
1689 { \
1690 /* We should not encode this character, instead produce one or \
1691 two `?'s. */ \
1692 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1693 if (CHARSET_WIDTH (charset) == 2) \
1694 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1695 break; \
1696 } \
1697 else \ 1717 else \
1698 /* Since CHARSET is not yet invoked to any graphic planes, we \ 1718 /* Since CHARSET is not yet invoked to any graphic planes, we \
1699 must invoke it, or, at first, designate it to some graphic \ 1719 must invoke it, or, at first, designate it to some graphic \
@@ -1727,16 +1747,6 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1727 *dst++ = c1 | 0x80, *dst++= c2 | 0x80; \ 1747 *dst++ = c1 | 0x80, *dst++= c2 | 0x80; \
1728 break; \ 1748 break; \
1729 } \ 1749 } \
1730 else if (coding->flags & CODING_FLAG_ISO_SAFE \
1731 && !coding->safe_charsets[charset]) \
1732 { \
1733 /* We should not encode this character, instead produce one or \
1734 two `?'s. */ \
1735 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1736 if (CHARSET_WIDTH (charset) == 2) \
1737 *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
1738 break; \
1739 } \
1740 else \ 1750 else \
1741 /* Since CHARSET is not yet invoked to any graphic planes, we \ 1751 /* Since CHARSET is not yet invoked to any graphic planes, we \
1742 must invoke it, or, at first, designate it to some graphic \ 1752 must invoke it, or, at first, designate it to some graphic \
@@ -1745,35 +1755,47 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1745 dst = encode_invocation_designation (charset, coding, dst); \ 1755 dst = encode_invocation_designation (charset, coding, dst); \
1746 } while (1) 1756 } while (1)
1747 1757
1748#define ENCODE_ISO_CHARACTER(charset, c1, c2) \ 1758#define ENCODE_ISO_CHARACTER(c) \
1759 do { \
1760 int charset, c1, c2; \
1761 \
1762 SPLIT_CHAR (c, charset, c1, c2); \
1763 if (CHARSET_DEFINED_P (charset)) \
1764 { \
1765 if (CHARSET_DIMENSION (charset) == 1) \
1766 { \
1767 if (charset == CHARSET_ASCII \
1768 && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \
1769 charset = charset_latin_jisx0201; \
1770 ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1); \
1771 } \
1772 else \
1773 { \
1774 if (charset == charset_jisx0208 \
1775 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \
1776 charset = charset_jisx0208_1978; \
1777 ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2); \
1778 } \
1779 } \
1780 else \
1781 { \
1782 *dst++ = c1; \
1783 if (c2 >= 0) \
1784 *dst++ = c2; \
1785 } \
1786 } while (0)
1787
1788
1789/* Instead of encoding character C, produce one or two `?'s. */
1790
1791#define ENCODE_UNSAFE_CHARACTER(c) \
1749 do { \ 1792 do { \
1750 int alt_charset = charset; \ 1793 ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION); \
1751 \ 1794 if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1) \
1752 if (CHARSET_DEFINED_P (charset)) \ 1795 ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION); \
1753 { \
1754 if (CHARSET_DIMENSION (charset) == 1) \
1755 { \
1756 if (charset == CHARSET_ASCII \
1757 && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \
1758 alt_charset = charset_latin_jisx0201; \
1759 ENCODE_ISO_CHARACTER_DIMENSION1 (alt_charset, c1); \
1760 } \
1761 else \
1762 { \
1763 if (charset == charset_jisx0208 \
1764 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \
1765 alt_charset = charset_jisx0208_1978; \
1766 ENCODE_ISO_CHARACTER_DIMENSION2 (alt_charset, c1, c2); \
1767 } \
1768 } \
1769 else \
1770 { \
1771 *dst++ = c1; \
1772 if (c2 >= 0) \
1773 *dst++ = c2; \
1774 } \
1775 } while (0) 1796 } while (0)
1776 1797
1798
1777/* Produce designation and invocation codes at a place pointed by DST 1799/* Produce designation and invocation codes at a place pointed by DST
1778 to use CHARSET. The element `spec.iso2022' of *CODING is updated. 1800 to use CHARSET. The element `spec.iso2022' of *CODING is updated.
1779 Return new DST. */ 1801 Return new DST. */
@@ -1997,6 +2019,9 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1997 unsigned char *src_base; 2019 unsigned char *src_base;
1998 int c; 2020 int c;
1999 Lisp_Object translation_table; 2021 Lisp_Object translation_table;
2022 Lisp_Object safe_chars;
2023
2024 safe_chars = coding_safe_chars (coding);
2000 2025
2001 if (NILP (Venable_character_translation)) 2026 if (NILP (Venable_character_translation))
2002 translation_table = Qnil; 2027 translation_table = Qnil;
@@ -2011,8 +2036,6 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
2011 coding->errors = 0; 2036 coding->errors = 0;
2012 while (1) 2037 while (1)
2013 { 2038 {
2014 int charset, c1, c2;
2015
2016 src_base = src; 2039 src_base = src;
2017 2040
2018 if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19))) 2041 if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
@@ -2065,8 +2088,11 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
2065 } 2088 }
2066 else 2089 else
2067 { 2090 {
2068 SPLIT_CHAR (c, charset, c1, c2); 2091 if (coding->flags & CODING_FLAG_ISO_SAFE
2069 ENCODE_ISO_CHARACTER (charset, c1, c2); 2092 && ! CODING_SAFE_CHAR_P (safe_chars, c))
2093 ENCODE_UNSAFE_CHARACTER (c);
2094 else
2095 ENCODE_ISO_CHARACTER (c);
2070 if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS) 2096 if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2071 coding->composition_rule_follows = 1; 2097 coding->composition_rule_follows = 1;
2072 } 2098 }
@@ -2125,17 +2151,17 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
2125 } 2151 }
2126 } 2152 }
2127 else if (ASCII_BYTE_P (c)) 2153 else if (ASCII_BYTE_P (c))
2128 ENCODE_ISO_CHARACTER (CHARSET_ASCII, c, /* dummy */ c1); 2154 ENCODE_ISO_CHARACTER (c);
2129 else if (SINGLE_BYTE_CHAR_P (c)) 2155 else if (SINGLE_BYTE_CHAR_P (c))
2130 { 2156 {
2131 *dst++ = c; 2157 *dst++ = c;
2132 coding->errors++; 2158 coding->errors++;
2133 } 2159 }
2160 else if (coding->flags & CODING_FLAG_ISO_SAFE
2161 && ! CODING_SAFE_CHAR_P (safe_chars, c))
2162 ENCODE_UNSAFE_CHARACTER (c);
2134 else 2163 else
2135 { 2164 ENCODE_ISO_CHARACTER (c);
2136 SPLIT_CHAR (c, charset, c1, c2);
2137 ENCODE_ISO_CHARACTER (charset, c1, c2);
2138 }
2139 2165
2140 coding->consumed_char++; 2166 coding->consumed_char++;
2141 } 2167 }
@@ -2970,23 +2996,6 @@ setup_coding_system (coding_system, coding)
2970 else 2996 else
2971 goto label_invalid_coding_system; 2997 goto label_invalid_coding_system;
2972 2998
2973 val = Fplist_get (plist, Qsafe_charsets);
2974 if (EQ (val, Qt))
2975 {
2976 for (i = 0; i <= MAX_CHARSET; i++)
2977 coding->safe_charsets[i] = 1;
2978 }
2979 else
2980 {
2981 bzero (coding->safe_charsets, MAX_CHARSET + 1);
2982 while (CONSP (val))
2983 {
2984 if ((i = get_charset_id (XCAR (val))) >= 0)
2985 coding->safe_charsets[i] = 1;
2986 val = XCDR (val);
2987 }
2988 }
2989
2990 /* If the coding system has non-nil `composition' property, enable 2999 /* If the coding system has non-nil `composition' property, enable
2991 composition handling. */ 3000 composition handling. */
2992 val = Fplist_get (plist, Qcomposition); 3001 val = Fplist_get (plist, Qcomposition);
@@ -5542,6 +5551,160 @@ highest priority.")
5542 !NILP (highest)); 5551 !NILP (highest));
5543} 5552}
5544 5553
5554/* Return an intersection of lists L1 and L2. */
5555
5556static Lisp_Object
5557intersection (l1, l2)
5558 Lisp_Object l1, l2;
5559{
5560 Lisp_Object val;
5561
5562 for (val = Qnil; CONSP (l1); l1 = XCDR (l1))
5563 {
5564 if (!NILP (Fmemq (XCAR (l1), l2)))
5565 val = Fcons (XCAR (l1), val);
5566 }
5567 return val;
5568}
5569
5570
5571/* Subroutine for Fsafe_coding_systems_region_internal.
5572
5573 Return a list of coding systems that safely encode the multibyte
5574 text between P and PEND. SAFE_CODINGS, if non-nil, is a list of
5575 possible coding systems. If it is nil, it means that we have not
5576 yet found any coding systems.
5577
5578 WORK_TABLE is a copy of the char-table Vchar_coding_system_table. An
5579 element of WORK_TABLE is set to t once the element is looked up.
5580
5581 If a non-ASCII single byte char is found, set
5582 *single_byte_char_found to 1. */
5583
5584static Lisp_Object
5585find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
5586 unsigned char *p, *pend;
5587 Lisp_Object safe_codings, work_table;
5588 int *single_byte_char_found;
5589{
5590 int c, len, idx;
5591 Lisp_Object val;
5592
5593 while (p < pend)
5594 {
5595 c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
5596 p += len;
5597 if (ASCII_BYTE_P (c))
5598 /* We can ignore ASCII characters here. */
5599 continue;
5600 if (SINGLE_BYTE_CHAR_P (c))
5601 *single_byte_char_found = 1;
5602 if (NILP (safe_codings))
5603 continue;
5604 /* Check the safe coding systems for C. */
5605 val = char_table_ref_and_index (work_table, c, &idx);
5606 if (EQ (val, Qt))
5607 /* This element was already checked. Ignore it. */
5608 continue;
5609 /* Remember that we checked this element. */
5610 CHAR_TABLE_SET (work_table, idx, Qt);
5611
5612 /* If there are some safe coding systems for C and we have
5613 already found the other set of coding systems for the
5614 different characters, get the intersection of them. */
5615 if (!EQ (safe_codings, Qt) && !NILP (val))
5616 val = intersection (safe_codings, val);
5617 safe_codings = val;
5618 }
5619 return safe_codings;
5620}
5621
5622
5623/* Return a list of coding systems that safely encode the text between
5624 START and END. If the text contains only ASCII or is unibyte,
5625 return t. */
5626
5627DEFUN ("find-coding-systems-region-internal",
5628 Ffind_coding_systems_region_internal,
5629 Sfind_coding_systems_region_internal, 2, 2, 0,
5630 "Internal use only.")
5631 (start, end)
5632 Lisp_Object start, end;
5633{
5634 Lisp_Object work_table, safe_codings;
5635 int non_ascii_p = 0;
5636 int single_byte_char_found = 0;
5637 unsigned char *p1, *p1end, *p2, *p2end, *p;
5638 Lisp_Object args[2];
5639
5640 if (STRINGP (start))
5641 {
5642 if (!STRING_MULTIBYTE (start))
5643 return Qt;
5644 p1 = XSTRING (start)->data, p1end = p1 + STRING_BYTES (XSTRING (start));
5645 p2 = p2end = p1end;
5646 if (XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
5647 non_ascii_p = 1;
5648 }
5649 else
5650 {
5651 int from, to, stop;
5652
5653 CHECK_NUMBER_COERCE_MARKER (start, 0);
5654 CHECK_NUMBER_COERCE_MARKER (end, 1);
5655 if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
5656 args_out_of_range (start, end);
5657 if (NILP (current_buffer->enable_multibyte_characters))
5658 return Qt;
5659 from = CHAR_TO_BYTE (XINT (start));
5660 to = CHAR_TO_BYTE (XINT (end));
5661 stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
5662 p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
5663 if (stop == to)
5664 p2 = p2end = p1end;
5665 else
5666 p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
5667 if (XINT (end) - XINT (start) != to - from)
5668 non_ascii_p = 1;
5669 }
5670
5671 if (!non_ascii_p)
5672 {
5673 /* We are sure that the text contains no multibyte character.
5674 Check if it contains eight-bit-graphic. */
5675 p = p1;
5676 for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
5677 if (p == p1end)
5678 {
5679 for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
5680 if (p == p2end)
5681 return Qt;
5682 }
5683 }
5684
5685 /* The text contains non-ASCII characters. */
5686 work_table = Fcopy_sequence (Vchar_coding_system_table);
5687 safe_codings = find_safe_codings (p1, p1end, Qt, work_table,
5688 &single_byte_char_found);
5689 if (p2 < p2end)
5690 safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
5691 &single_byte_char_found);
5692
5693 if (!single_byte_char_found)
5694 {
5695 /* Append generic coding systems. */
5696 Lisp_Object args[2];
5697 args[0] = safe_codings;
5698 args[1] = Fchar_table_extra_slot (Vchar_coding_system_table,
5699 make_number (0));
5700 safe_codings = Fappend (make_number (2), args);
5701 }
5702 else
5703 safe_codings = Fcons (Qraw_text, Fcons (Qemacs_mule, safe_codings));
5704 return safe_codings;
5705}
5706
5707
5545Lisp_Object 5708Lisp_Object
5546code_convert_region1 (start, end, coding_system, encodep) 5709code_convert_region1 (start, end, coding_system, encodep)
5547 Lisp_Object start, end, coding_system; 5710 Lisp_Object start, end, coding_system;
@@ -6196,8 +6359,18 @@ syms_of_coding ()
6196 Qtranslation_table_for_encode = intern ("translation-table-for-encode"); 6359 Qtranslation_table_for_encode = intern ("translation-table-for-encode");
6197 staticpro (&Qtranslation_table_for_encode); 6360 staticpro (&Qtranslation_table_for_encode);
6198 6361
6199 Qsafe_charsets = intern ("safe-charsets"); 6362 Qsafe_chars = intern ("safe-chars");
6200 staticpro (&Qsafe_charsets); 6363 staticpro (&Qsafe_chars);
6364
6365 Qchar_coding_system = intern ("char-coding-system");
6366 staticpro (&Qchar_coding_system);
6367
6368 /* Intern this now in case it isn't already done.
6369 Setting this variable twice is harmless.
6370 But don't staticpro it here--that is done in alloc.c. */
6371 Qchar_table_extra_slots = intern ("char-table-extra-slots");
6372 Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
6373 Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (1));
6201 6374
6202 Qvalid_codes = intern ("valid-codes"); 6375 Qvalid_codes = intern ("valid-codes");
6203 staticpro (&Qvalid_codes); 6376 staticpro (&Qvalid_codes);
@@ -6214,6 +6387,7 @@ syms_of_coding ()
6214 defsubr (&Scheck_coding_system); 6387 defsubr (&Scheck_coding_system);
6215 defsubr (&Sdetect_coding_region); 6388 defsubr (&Sdetect_coding_region);
6216 defsubr (&Sdetect_coding_string); 6389 defsubr (&Sdetect_coding_string);
6390 defsubr (&Sfind_coding_systems_region_internal);
6217 defsubr (&Sdecode_coding_region); 6391 defsubr (&Sdecode_coding_region);
6218 defsubr (&Sencode_coding_region); 6392 defsubr (&Sencode_coding_region);
6219 defsubr (&Sdecode_coding_string); 6393 defsubr (&Sdecode_coding_string);
@@ -6417,6 +6591,12 @@ coding system used in each operation can't encode the text.\n\
6417The default value is `select-safe-coding-system' (which see)."); 6591The default value is `select-safe-coding-system' (which see).");
6418 Vselect_safe_coding_system_function = Qnil; 6592 Vselect_safe_coding_system_function = Qnil;
6419 6593
6594 DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
6595 "Char-table containing safe coding systems of each characters.\n\
6596Each element doesn't include such generic coding systems that can\n\
6597encode any characters. They are in the first extra slot.");
6598 Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
6599
6420 DEFVAR_BOOL ("inhibit-iso-escape-detection", 6600 DEFVAR_BOOL ("inhibit-iso-escape-detection",
6421 &inhibit_iso_escape_detection, 6601 &inhibit_iso_escape_detection,
6422 "If non-nil, Emacs ignores ISO2022's escape sequence on code detection.\n\ 6602 "If non-nil, Emacs ignores ISO2022's escape sequence on code detection.\n\