diff options
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 354 |
1 files changed, 267 insertions, 87 deletions
diff --git a/src/coding.c b/src/coding.c index cc636af0ba0..d67e07687cd 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -337,7 +337,7 @@ Lisp_Object Qbuffer_file_coding_system; | |||
| 337 | Lisp_Object Qpost_read_conversion, Qpre_write_conversion; | 337 | Lisp_Object Qpost_read_conversion, Qpre_write_conversion; |
| 338 | Lisp_Object Qno_conversion, Qundecided; | 338 | Lisp_Object Qno_conversion, Qundecided; |
| 339 | Lisp_Object Qcoding_system_history; | 339 | Lisp_Object Qcoding_system_history; |
| 340 | Lisp_Object Qsafe_charsets; | 340 | Lisp_Object Qsafe_chars; |
| 341 | Lisp_Object Qvalid_codes; | 341 | Lisp_Object Qvalid_codes; |
| 342 | 342 | ||
| 343 | extern Lisp_Object Qinsert_file_contents, Qwrite_region; | 343 | extern Lisp_Object Qinsert_file_contents, Qwrite_region; |
| @@ -471,6 +471,28 @@ Lisp_Object Vdefault_process_coding_system; | |||
| 471 | to avoid infinite recursive call. */ | 471 | to avoid infinite recursive call. */ |
| 472 | static int inhibit_pre_post_conversion; | 472 | static int inhibit_pre_post_conversion; |
| 473 | 473 | ||
| 474 | /* Char-table containing safe coding systems of each character. */ | ||
| 475 | Lisp_Object Vchar_coding_system_table; | ||
| 476 | Lisp_Object Qchar_coding_system; | ||
| 477 | |||
| 478 | /* Return `safe-chars' property of coding system CODING. Don't check | ||
| 479 | validity of CODING. */ | ||
| 480 | |||
| 481 | Lisp_Object | ||
| 482 | coding_safe_chars (coding) | ||
| 483 | struct coding_system *coding; | ||
| 484 | { | ||
| 485 | Lisp_Object coding_spec, plist, safe_chars; | ||
| 486 | |||
| 487 | coding_spec = Fget (coding->symbol, Qcoding_system); | ||
| 488 | plist = XVECTOR (coding_spec)->contents[3]; | ||
| 489 | safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars); | ||
| 490 | return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt); | ||
| 491 | } | ||
| 492 | |||
| 493 | #define CODING_SAFE_CHAR_P(safe_chars, c) \ | ||
| 494 | (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c))) | ||
| 495 | |||
| 474 | 496 | ||
| 475 | /*** 2. Emacs internal format (emacs-mule) handlers ***/ | 497 | /*** 2. Emacs internal format (emacs-mule) handlers ***/ |
| 476 | 498 | ||
| @@ -797,12 +819,14 @@ decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) | |||
| 797 | 819 | ||
| 798 | enum iso_code_class_type iso_code_class[256]; | 820 | enum iso_code_class_type iso_code_class[256]; |
| 799 | 821 | ||
| 800 | #define CHARSET_OK(idx, charset) \ | 822 | #define CHARSET_OK(idx, charset, c) \ |
| 801 | (coding_system_table[idx] \ | 823 | (coding_system_table[idx] \ |
| 802 | && (coding_system_table[idx]->safe_charsets[charset] \ | 824 | && (charset == CHARSET_ASCII \ |
| 803 | || (CODING_SPEC_ISO_REQUESTED_DESIGNATION \ | 825 | || (safe_chars = coding_safe_chars (coding_system_table[idx]), \ |
| 804 | (coding_system_table[idx], charset) \ | 826 | CODING_SAFE_CHAR_P (safe_chars, c))) \ |
| 805 | != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))) | 827 | && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \ |
| 828 | charset) \ | ||
| 829 | != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)) | ||
| 806 | 830 | ||
| 807 | #define SHIFT_OUT_OK(idx) \ | 831 | #define SHIFT_OUT_OK(idx) \ |
| 808 | (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0) | 832 | (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0) |
| @@ -830,6 +854,7 @@ detect_coding_iso2022 (src, src_end) | |||
| 830 | /* Dummy for ONE_MORE_BYTE. */ | 854 | /* Dummy for ONE_MORE_BYTE. */ |
| 831 | struct coding_system dummy_coding; | 855 | struct coding_system dummy_coding; |
| 832 | struct coding_system *coding = &dummy_coding; | 856 | struct coding_system *coding = &dummy_coding; |
| 857 | Lisp_Object safe_chars; | ||
| 833 | 858 | ||
| 834 | reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1; | 859 | reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1; |
| 835 | while (mask && src < src_end) | 860 | while (mask && src < src_end) |
| @@ -890,19 +915,20 @@ detect_coding_iso2022 (src, src_end) | |||
| 890 | 915 | ||
| 891 | /* We found a valid designation sequence for CHARSET. */ | 916 | /* We found a valid designation sequence for CHARSET. */ |
| 892 | mask &= ~CODING_CATEGORY_MASK_ISO_8BIT; | 917 | mask &= ~CODING_CATEGORY_MASK_ISO_8BIT; |
| 893 | if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset)) | 918 | c = MAKE_CHAR (charset, 0, 0); |
| 919 | if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c)) | ||
| 894 | mask_found |= CODING_CATEGORY_MASK_ISO_7; | 920 | mask_found |= CODING_CATEGORY_MASK_ISO_7; |
| 895 | else | 921 | else |
| 896 | mask &= ~CODING_CATEGORY_MASK_ISO_7; | 922 | mask &= ~CODING_CATEGORY_MASK_ISO_7; |
| 897 | if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset)) | 923 | if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c)) |
| 898 | mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT; | 924 | mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT; |
| 899 | else | 925 | else |
| 900 | mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT; | 926 | mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT; |
| 901 | if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset)) | 927 | if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c)) |
| 902 | mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE; | 928 | mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE; |
| 903 | else | 929 | else |
| 904 | mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE; | 930 | mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE; |
| 905 | if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset)) | 931 | if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c)) |
| 906 | mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE; | 932 | mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE; |
| 907 | else | 933 | else |
| 908 | mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE; | 934 | mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE; |
| @@ -1042,16 +1068,17 @@ detect_coding_iso2022 (src, src_end) | |||
| 1042 | /* Set designation state into CODING. */ | 1068 | /* Set designation state into CODING. */ |
| 1043 | #define DECODE_DESIGNATION(reg, dimension, chars, final_char) \ | 1069 | #define DECODE_DESIGNATION(reg, dimension, chars, final_char) \ |
| 1044 | do { \ | 1070 | do { \ |
| 1045 | int charset; \ | 1071 | int charset, c; \ |
| 1046 | \ | 1072 | \ |
| 1047 | if (final_char < '0' || final_char >= 128) \ | 1073 | if (final_char < '0' || final_char >= 128) \ |
| 1048 | goto label_invalid_code; \ | 1074 | goto label_invalid_code; \ |
| 1049 | charset = ISO_CHARSET_TABLE (make_number (dimension), \ | 1075 | charset = ISO_CHARSET_TABLE (make_number (dimension), \ |
| 1050 | make_number (chars), \ | 1076 | make_number (chars), \ |
| 1051 | make_number (final_char)); \ | 1077 | make_number (final_char)); \ |
| 1078 | c = MAKE_CHAR (charset, 0, 0); \ | ||
| 1052 | if (charset >= 0 \ | 1079 | if (charset >= 0 \ |
| 1053 | && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \ | 1080 | && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \ |
| 1054 | || coding->safe_charsets[charset])) \ | 1081 | || CODING_SAFE_CHAR_P (safe_chars, c))) \ |
| 1055 | { \ | 1082 | { \ |
| 1056 | if (coding->spec.iso2022.last_invalid_designation_register == 0 \ | 1083 | if (coding->spec.iso2022.last_invalid_designation_register == 0 \ |
| 1057 | && reg == 0 \ | 1084 | && reg == 0 \ |
| @@ -1238,6 +1265,9 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1238 | unsigned char *src_base; | 1265 | unsigned char *src_base; |
| 1239 | int c, charset; | 1266 | int c, charset; |
| 1240 | Lisp_Object translation_table; | 1267 | Lisp_Object translation_table; |
| 1268 | Lisp_Object safe_chars; | ||
| 1269 | |||
| 1270 | safe_chars = coding_safe_chars (coding); | ||
| 1241 | 1271 | ||
| 1242 | if (NILP (Venable_character_translation)) | 1272 | if (NILP (Venable_character_translation)) |
| 1243 | translation_table = Qnil; | 1273 | translation_table = Qnil; |
| @@ -1684,16 +1714,6 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1684 | *dst++ = c1 | 0x80; \ | 1714 | *dst++ = c1 | 0x80; \ |
| 1685 | break; \ | 1715 | break; \ |
| 1686 | } \ | 1716 | } \ |
| 1687 | else if (coding->flags & CODING_FLAG_ISO_SAFE \ | ||
| 1688 | && !coding->safe_charsets[charset]) \ | ||
| 1689 | { \ | ||
| 1690 | /* We should not encode this character, instead produce one or \ | ||
| 1691 | two `?'s. */ \ | ||
| 1692 | *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \ | ||
| 1693 | if (CHARSET_WIDTH (charset) == 2) \ | ||
| 1694 | *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \ | ||
| 1695 | break; \ | ||
| 1696 | } \ | ||
| 1697 | else \ | 1717 | else \ |
| 1698 | /* Since CHARSET is not yet invoked to any graphic planes, we \ | 1718 | /* Since CHARSET is not yet invoked to any graphic planes, we \ |
| 1699 | must invoke it, or, at first, designate it to some graphic \ | 1719 | must invoke it, or, at first, designate it to some graphic \ |
| @@ -1727,16 +1747,6 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1727 | *dst++ = c1 | 0x80, *dst++= c2 | 0x80; \ | 1747 | *dst++ = c1 | 0x80, *dst++= c2 | 0x80; \ |
| 1728 | break; \ | 1748 | break; \ |
| 1729 | } \ | 1749 | } \ |
| 1730 | else if (coding->flags & CODING_FLAG_ISO_SAFE \ | ||
| 1731 | && !coding->safe_charsets[charset]) \ | ||
| 1732 | { \ | ||
| 1733 | /* We should not encode this character, instead produce one or \ | ||
| 1734 | two `?'s. */ \ | ||
| 1735 | *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \ | ||
| 1736 | if (CHARSET_WIDTH (charset) == 2) \ | ||
| 1737 | *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \ | ||
| 1738 | break; \ | ||
| 1739 | } \ | ||
| 1740 | else \ | 1750 | else \ |
| 1741 | /* Since CHARSET is not yet invoked to any graphic planes, we \ | 1751 | /* Since CHARSET is not yet invoked to any graphic planes, we \ |
| 1742 | must invoke it, or, at first, designate it to some graphic \ | 1752 | must invoke it, or, at first, designate it to some graphic \ |
| @@ -1745,35 +1755,47 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1745 | dst = encode_invocation_designation (charset, coding, dst); \ | 1755 | dst = encode_invocation_designation (charset, coding, dst); \ |
| 1746 | } while (1) | 1756 | } while (1) |
| 1747 | 1757 | ||
| 1748 | #define ENCODE_ISO_CHARACTER(charset, c1, c2) \ | 1758 | #define ENCODE_ISO_CHARACTER(c) \ |
| 1759 | do { \ | ||
| 1760 | int charset, c1, c2; \ | ||
| 1761 | \ | ||
| 1762 | SPLIT_CHAR (c, charset, c1, c2); \ | ||
| 1763 | if (CHARSET_DEFINED_P (charset)) \ | ||
| 1764 | { \ | ||
| 1765 | if (CHARSET_DIMENSION (charset) == 1) \ | ||
| 1766 | { \ | ||
| 1767 | if (charset == CHARSET_ASCII \ | ||
| 1768 | && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \ | ||
| 1769 | charset = charset_latin_jisx0201; \ | ||
| 1770 | ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1); \ | ||
| 1771 | } \ | ||
| 1772 | else \ | ||
| 1773 | { \ | ||
| 1774 | if (charset == charset_jisx0208 \ | ||
| 1775 | && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \ | ||
| 1776 | charset = charset_jisx0208_1978; \ | ||
| 1777 | ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2); \ | ||
| 1778 | } \ | ||
| 1779 | } \ | ||
| 1780 | else \ | ||
| 1781 | { \ | ||
| 1782 | *dst++ = c1; \ | ||
| 1783 | if (c2 >= 0) \ | ||
| 1784 | *dst++ = c2; \ | ||
| 1785 | } \ | ||
| 1786 | } while (0) | ||
| 1787 | |||
| 1788 | |||
| 1789 | /* Instead of encoding character C, produce one or two `?'s. */ | ||
| 1790 | |||
| 1791 | #define ENCODE_UNSAFE_CHARACTER(c) \ | ||
| 1749 | do { \ | 1792 | do { \ |
| 1750 | int alt_charset = charset; \ | 1793 | ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION); \ |
| 1751 | \ | 1794 | if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1) \ |
| 1752 | if (CHARSET_DEFINED_P (charset)) \ | 1795 | ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION); \ |
| 1753 | { \ | ||
| 1754 | if (CHARSET_DIMENSION (charset) == 1) \ | ||
| 1755 | { \ | ||
| 1756 | if (charset == CHARSET_ASCII \ | ||
| 1757 | && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \ | ||
| 1758 | alt_charset = charset_latin_jisx0201; \ | ||
| 1759 | ENCODE_ISO_CHARACTER_DIMENSION1 (alt_charset, c1); \ | ||
| 1760 | } \ | ||
| 1761 | else \ | ||
| 1762 | { \ | ||
| 1763 | if (charset == charset_jisx0208 \ | ||
| 1764 | && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \ | ||
| 1765 | alt_charset = charset_jisx0208_1978; \ | ||
| 1766 | ENCODE_ISO_CHARACTER_DIMENSION2 (alt_charset, c1, c2); \ | ||
| 1767 | } \ | ||
| 1768 | } \ | ||
| 1769 | else \ | ||
| 1770 | { \ | ||
| 1771 | *dst++ = c1; \ | ||
| 1772 | if (c2 >= 0) \ | ||
| 1773 | *dst++ = c2; \ | ||
| 1774 | } \ | ||
| 1775 | } while (0) | 1796 | } while (0) |
| 1776 | 1797 | ||
| 1798 | |||
| 1777 | /* Produce designation and invocation codes at a place pointed by DST | 1799 | /* Produce designation and invocation codes at a place pointed by DST |
| 1778 | to use CHARSET. The element `spec.iso2022' of *CODING is updated. | 1800 | to use CHARSET. The element `spec.iso2022' of *CODING is updated. |
| 1779 | Return new DST. */ | 1801 | Return new DST. */ |
| @@ -1997,6 +2019,9 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 1997 | unsigned char *src_base; | 2019 | unsigned char *src_base; |
| 1998 | int c; | 2020 | int c; |
| 1999 | Lisp_Object translation_table; | 2021 | Lisp_Object translation_table; |
| 2022 | Lisp_Object safe_chars; | ||
| 2023 | |||
| 2024 | safe_chars = coding_safe_chars (coding); | ||
| 2000 | 2025 | ||
| 2001 | if (NILP (Venable_character_translation)) | 2026 | if (NILP (Venable_character_translation)) |
| 2002 | translation_table = Qnil; | 2027 | translation_table = Qnil; |
| @@ -2011,8 +2036,6 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 2011 | coding->errors = 0; | 2036 | coding->errors = 0; |
| 2012 | while (1) | 2037 | while (1) |
| 2013 | { | 2038 | { |
| 2014 | int charset, c1, c2; | ||
| 2015 | |||
| 2016 | src_base = src; | 2039 | src_base = src; |
| 2017 | 2040 | ||
| 2018 | if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19))) | 2041 | if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19))) |
| @@ -2065,8 +2088,11 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 2065 | } | 2088 | } |
| 2066 | else | 2089 | else |
| 2067 | { | 2090 | { |
| 2068 | SPLIT_CHAR (c, charset, c1, c2); | 2091 | if (coding->flags & CODING_FLAG_ISO_SAFE |
| 2069 | ENCODE_ISO_CHARACTER (charset, c1, c2); | 2092 | && ! CODING_SAFE_CHAR_P (safe_chars, c)) |
| 2093 | ENCODE_UNSAFE_CHARACTER (c); | ||
| 2094 | else | ||
| 2095 | ENCODE_ISO_CHARACTER (c); | ||
| 2070 | if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS) | 2096 | if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS) |
| 2071 | coding->composition_rule_follows = 1; | 2097 | coding->composition_rule_follows = 1; |
| 2072 | } | 2098 | } |
| @@ -2125,17 +2151,17 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) | |||
| 2125 | } | 2151 | } |
| 2126 | } | 2152 | } |
| 2127 | else if (ASCII_BYTE_P (c)) | 2153 | else if (ASCII_BYTE_P (c)) |
| 2128 | ENCODE_ISO_CHARACTER (CHARSET_ASCII, c, /* dummy */ c1); | 2154 | ENCODE_ISO_CHARACTER (c); |
| 2129 | else if (SINGLE_BYTE_CHAR_P (c)) | 2155 | else if (SINGLE_BYTE_CHAR_P (c)) |
| 2130 | { | 2156 | { |
| 2131 | *dst++ = c; | 2157 | *dst++ = c; |
| 2132 | coding->errors++; | 2158 | coding->errors++; |
| 2133 | } | 2159 | } |
| 2160 | else if (coding->flags & CODING_FLAG_ISO_SAFE | ||
| 2161 | && ! CODING_SAFE_CHAR_P (safe_chars, c)) | ||
| 2162 | ENCODE_UNSAFE_CHARACTER (c); | ||
| 2134 | else | 2163 | else |
| 2135 | { | 2164 | ENCODE_ISO_CHARACTER (c); |
| 2136 | SPLIT_CHAR (c, charset, c1, c2); | ||
| 2137 | ENCODE_ISO_CHARACTER (charset, c1, c2); | ||
| 2138 | } | ||
| 2139 | 2165 | ||
| 2140 | coding->consumed_char++; | 2166 | coding->consumed_char++; |
| 2141 | } | 2167 | } |
| @@ -2970,23 +2996,6 @@ setup_coding_system (coding_system, coding) | |||
| 2970 | else | 2996 | else |
| 2971 | goto label_invalid_coding_system; | 2997 | goto label_invalid_coding_system; |
| 2972 | 2998 | ||
| 2973 | val = Fplist_get (plist, Qsafe_charsets); | ||
| 2974 | if (EQ (val, Qt)) | ||
| 2975 | { | ||
| 2976 | for (i = 0; i <= MAX_CHARSET; i++) | ||
| 2977 | coding->safe_charsets[i] = 1; | ||
| 2978 | } | ||
| 2979 | else | ||
| 2980 | { | ||
| 2981 | bzero (coding->safe_charsets, MAX_CHARSET + 1); | ||
| 2982 | while (CONSP (val)) | ||
| 2983 | { | ||
| 2984 | if ((i = get_charset_id (XCAR (val))) >= 0) | ||
| 2985 | coding->safe_charsets[i] = 1; | ||
| 2986 | val = XCDR (val); | ||
| 2987 | } | ||
| 2988 | } | ||
| 2989 | |||
| 2990 | /* If the coding system has non-nil `composition' property, enable | 2999 | /* If the coding system has non-nil `composition' property, enable |
| 2991 | composition handling. */ | 3000 | composition handling. */ |
| 2992 | val = Fplist_get (plist, Qcomposition); | 3001 | val = Fplist_get (plist, Qcomposition); |
| @@ -5542,6 +5551,160 @@ highest priority.") | |||
| 5542 | !NILP (highest)); | 5551 | !NILP (highest)); |
| 5543 | } | 5552 | } |
| 5544 | 5553 | ||
| 5554 | /* Return an intersection of lists L1 and L2. */ | ||
| 5555 | |||
| 5556 | static Lisp_Object | ||
| 5557 | intersection (l1, l2) | ||
| 5558 | Lisp_Object l1, l2; | ||
| 5559 | { | ||
| 5560 | Lisp_Object val; | ||
| 5561 | |||
| 5562 | for (val = Qnil; CONSP (l1); l1 = XCDR (l1)) | ||
| 5563 | { | ||
| 5564 | if (!NILP (Fmemq (XCAR (l1), l2))) | ||
| 5565 | val = Fcons (XCAR (l1), val); | ||
| 5566 | } | ||
| 5567 | return val; | ||
| 5568 | } | ||
| 5569 | |||
| 5570 | |||
| 5571 | /* Subroutine for Fsafe_coding_systems_region_internal. | ||
| 5572 | |||
| 5573 | Return a list of coding systems that safely encode the multibyte | ||
| 5574 | text between P and PEND. SAFE_CODINGS, if non-nil, is a list of | ||
| 5575 | possible coding systems. If it is nil, it means that we have not | ||
| 5576 | yet found any coding systems. | ||
| 5577 | |||
| 5578 | WORK_TABLE is a copy of the char-table Vchar_coding_system_table. An | ||
| 5579 | element of WORK_TABLE is set to t once the element is looked up. | ||
| 5580 | |||
| 5581 | If a non-ASCII single byte char is found, set | ||
| 5582 | *single_byte_char_found to 1. */ | ||
| 5583 | |||
| 5584 | static Lisp_Object | ||
| 5585 | find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found) | ||
| 5586 | unsigned char *p, *pend; | ||
| 5587 | Lisp_Object safe_codings, work_table; | ||
| 5588 | int *single_byte_char_found; | ||
| 5589 | { | ||
| 5590 | int c, len, idx; | ||
| 5591 | Lisp_Object val; | ||
| 5592 | |||
| 5593 | while (p < pend) | ||
| 5594 | { | ||
| 5595 | c = STRING_CHAR_AND_LENGTH (p, pend - p, len); | ||
| 5596 | p += len; | ||
| 5597 | if (ASCII_BYTE_P (c)) | ||
| 5598 | /* We can ignore ASCII characters here. */ | ||
| 5599 | continue; | ||
| 5600 | if (SINGLE_BYTE_CHAR_P (c)) | ||
| 5601 | *single_byte_char_found = 1; | ||
| 5602 | if (NILP (safe_codings)) | ||
| 5603 | continue; | ||
| 5604 | /* Check the safe coding systems for C. */ | ||
| 5605 | val = char_table_ref_and_index (work_table, c, &idx); | ||
| 5606 | if (EQ (val, Qt)) | ||
| 5607 | /* This element was already checked. Ignore it. */ | ||
| 5608 | continue; | ||
| 5609 | /* Remember that we checked this element. */ | ||
| 5610 | CHAR_TABLE_SET (work_table, idx, Qt); | ||
| 5611 | |||
| 5612 | /* If there are some safe coding systems for C and we have | ||
| 5613 | already found the other set of coding systems for the | ||
| 5614 | different characters, get the intersection of them. */ | ||
| 5615 | if (!EQ (safe_codings, Qt) && !NILP (val)) | ||
| 5616 | val = intersection (safe_codings, val); | ||
| 5617 | safe_codings = val; | ||
| 5618 | } | ||
| 5619 | return safe_codings; | ||
| 5620 | } | ||
| 5621 | |||
| 5622 | |||
| 5623 | /* Return a list of coding systems that safely encode the text between | ||
| 5624 | START and END. If the text contains only ASCII or is unibyte, | ||
| 5625 | return t. */ | ||
| 5626 | |||
| 5627 | DEFUN ("find-coding-systems-region-internal", | ||
| 5628 | Ffind_coding_systems_region_internal, | ||
| 5629 | Sfind_coding_systems_region_internal, 2, 2, 0, | ||
| 5630 | "Internal use only.") | ||
| 5631 | (start, end) | ||
| 5632 | Lisp_Object start, end; | ||
| 5633 | { | ||
| 5634 | Lisp_Object work_table, safe_codings; | ||
| 5635 | int non_ascii_p = 0; | ||
| 5636 | int single_byte_char_found = 0; | ||
| 5637 | unsigned char *p1, *p1end, *p2, *p2end, *p; | ||
| 5638 | Lisp_Object args[2]; | ||
| 5639 | |||
| 5640 | if (STRINGP (start)) | ||
| 5641 | { | ||
| 5642 | if (!STRING_MULTIBYTE (start)) | ||
| 5643 | return Qt; | ||
| 5644 | p1 = XSTRING (start)->data, p1end = p1 + STRING_BYTES (XSTRING (start)); | ||
| 5645 | p2 = p2end = p1end; | ||
| 5646 | if (XSTRING (start)->size != STRING_BYTES (XSTRING (start))) | ||
| 5647 | non_ascii_p = 1; | ||
| 5648 | } | ||
| 5649 | else | ||
| 5650 | { | ||
| 5651 | int from, to, stop; | ||
| 5652 | |||
| 5653 | CHECK_NUMBER_COERCE_MARKER (start, 0); | ||
| 5654 | CHECK_NUMBER_COERCE_MARKER (end, 1); | ||
| 5655 | if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end)) | ||
| 5656 | args_out_of_range (start, end); | ||
| 5657 | if (NILP (current_buffer->enable_multibyte_characters)) | ||
| 5658 | return Qt; | ||
| 5659 | from = CHAR_TO_BYTE (XINT (start)); | ||
| 5660 | to = CHAR_TO_BYTE (XINT (end)); | ||
| 5661 | stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to; | ||
| 5662 | p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from); | ||
| 5663 | if (stop == to) | ||
| 5664 | p2 = p2end = p1end; | ||
| 5665 | else | ||
| 5666 | p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop); | ||
| 5667 | if (XINT (end) - XINT (start) != to - from) | ||
| 5668 | non_ascii_p = 1; | ||
| 5669 | } | ||
| 5670 | |||
| 5671 | if (!non_ascii_p) | ||
| 5672 | { | ||
| 5673 | /* We are sure that the text contains no multibyte character. | ||
| 5674 | Check if it contains eight-bit-graphic. */ | ||
| 5675 | p = p1; | ||
| 5676 | for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++); | ||
| 5677 | if (p == p1end) | ||
| 5678 | { | ||
| 5679 | for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++); | ||
| 5680 | if (p == p2end) | ||
| 5681 | return Qt; | ||
| 5682 | } | ||
| 5683 | } | ||
| 5684 | |||
| 5685 | /* The text contains non-ASCII characters. */ | ||
| 5686 | work_table = Fcopy_sequence (Vchar_coding_system_table); | ||
| 5687 | safe_codings = find_safe_codings (p1, p1end, Qt, work_table, | ||
| 5688 | &single_byte_char_found); | ||
| 5689 | if (p2 < p2end) | ||
| 5690 | safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table, | ||
| 5691 | &single_byte_char_found); | ||
| 5692 | |||
| 5693 | if (!single_byte_char_found) | ||
| 5694 | { | ||
| 5695 | /* Append generic coding systems. */ | ||
| 5696 | Lisp_Object args[2]; | ||
| 5697 | args[0] = safe_codings; | ||
| 5698 | args[1] = Fchar_table_extra_slot (Vchar_coding_system_table, | ||
| 5699 | make_number (0)); | ||
| 5700 | safe_codings = Fappend (make_number (2), args); | ||
| 5701 | } | ||
| 5702 | else | ||
| 5703 | safe_codings = Fcons (Qraw_text, Fcons (Qemacs_mule, safe_codings)); | ||
| 5704 | return safe_codings; | ||
| 5705 | } | ||
| 5706 | |||
| 5707 | |||
| 5545 | Lisp_Object | 5708 | Lisp_Object |
| 5546 | code_convert_region1 (start, end, coding_system, encodep) | 5709 | code_convert_region1 (start, end, coding_system, encodep) |
| 5547 | Lisp_Object start, end, coding_system; | 5710 | Lisp_Object start, end, coding_system; |
| @@ -6196,8 +6359,18 @@ syms_of_coding () | |||
| 6196 | Qtranslation_table_for_encode = intern ("translation-table-for-encode"); | 6359 | Qtranslation_table_for_encode = intern ("translation-table-for-encode"); |
| 6197 | staticpro (&Qtranslation_table_for_encode); | 6360 | staticpro (&Qtranslation_table_for_encode); |
| 6198 | 6361 | ||
| 6199 | Qsafe_charsets = intern ("safe-charsets"); | 6362 | Qsafe_chars = intern ("safe-chars"); |
| 6200 | staticpro (&Qsafe_charsets); | 6363 | staticpro (&Qsafe_chars); |
| 6364 | |||
| 6365 | Qchar_coding_system = intern ("char-coding-system"); | ||
| 6366 | staticpro (&Qchar_coding_system); | ||
| 6367 | |||
| 6368 | /* Intern this now in case it isn't already done. | ||
| 6369 | Setting this variable twice is harmless. | ||
| 6370 | But don't staticpro it here--that is done in alloc.c. */ | ||
| 6371 | Qchar_table_extra_slots = intern ("char-table-extra-slots"); | ||
| 6372 | Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0)); | ||
| 6373 | Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (1)); | ||
| 6201 | 6374 | ||
| 6202 | Qvalid_codes = intern ("valid-codes"); | 6375 | Qvalid_codes = intern ("valid-codes"); |
| 6203 | staticpro (&Qvalid_codes); | 6376 | staticpro (&Qvalid_codes); |
| @@ -6214,6 +6387,7 @@ syms_of_coding () | |||
| 6214 | defsubr (&Scheck_coding_system); | 6387 | defsubr (&Scheck_coding_system); |
| 6215 | defsubr (&Sdetect_coding_region); | 6388 | defsubr (&Sdetect_coding_region); |
| 6216 | defsubr (&Sdetect_coding_string); | 6389 | defsubr (&Sdetect_coding_string); |
| 6390 | defsubr (&Sfind_coding_systems_region_internal); | ||
| 6217 | defsubr (&Sdecode_coding_region); | 6391 | defsubr (&Sdecode_coding_region); |
| 6218 | defsubr (&Sencode_coding_region); | 6392 | defsubr (&Sencode_coding_region); |
| 6219 | defsubr (&Sdecode_coding_string); | 6393 | defsubr (&Sdecode_coding_string); |
| @@ -6417,6 +6591,12 @@ coding system used in each operation can't encode the text.\n\ | |||
| 6417 | The default value is `select-safe-coding-system' (which see)."); | 6591 | The default value is `select-safe-coding-system' (which see)."); |
| 6418 | Vselect_safe_coding_system_function = Qnil; | 6592 | Vselect_safe_coding_system_function = Qnil; |
| 6419 | 6593 | ||
| 6594 | DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table, | ||
| 6595 | "Char-table containing safe coding systems of each characters.\n\ | ||
| 6596 | Each element doesn't include such generic coding systems that can\n\ | ||
| 6597 | encode any characters. They are in the first extra slot."); | ||
| 6598 | Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil); | ||
| 6599 | |||
| 6420 | DEFVAR_BOOL ("inhibit-iso-escape-detection", | 6600 | DEFVAR_BOOL ("inhibit-iso-escape-detection", |
| 6421 | &inhibit_iso_escape_detection, | 6601 | &inhibit_iso_escape_detection, |
| 6422 | "If non-nil, Emacs ignores ISO2022's escape sequence on code detection.\n\ | 6602 | "If non-nil, Emacs ignores ISO2022's escape sequence on code detection.\n\ |