diff options
| author | Kenichi Handa | 1998-12-15 04:35:38 +0000 |
|---|---|---|
| committer | Kenichi Handa | 1998-12-15 04:35:38 +0000 |
| commit | 0282eb691536b39247884452ac76473d05cdf369 (patch) | |
| tree | 9c5d34176df50a1a7e1a6bd77f838ae580ca01ad /src | |
| parent | 8c5b01cc4be108b4084e63f143f95b6d7131e1ae (diff) | |
| download | emacs-0282eb691536b39247884452ac76473d05cdf369.tar.gz emacs-0282eb691536b39247884452ac76473d05cdf369.zip | |
(Qunknown): New variable.
(init_charset_once): Intern and staticpro Qunknown. Initialize
all elements of Vcharset_symbol_table to Qunknown.
(find_charset_in_str): New arg MULTIBYTE. If it is zero, check
unibyte characters only. For an invalid composition sequence, set
CHARSETS[1] to 1.
(Ffind_charset_region): Call find_charset_in_str with an
appropriate MULTIBYTE arg. If undefined charsets are found,
include `unknown' is the return value.
(Ffind_charset_string): Likewise.
(Fsplit_char): If CHAR is invalid, return `(unknown CHAR)'.
(str_cmpchar_id): Max composite character code should be less than
GENERIC_COMPOSITION_CHAR.
Diffstat (limited to 'src')
| -rw-r--r-- | src/charset.c | 113 |
1 files changed, 86 insertions, 27 deletions
diff --git a/src/charset.c b/src/charset.c index bf3944d47fa..ecf70b0ec1b 100644 --- a/src/charset.c +++ b/src/charset.c | |||
| @@ -41,6 +41,7 @@ Boston, MA 02111-1307, USA. */ | |||
| 41 | #endif /* emacs */ | 41 | #endif /* emacs */ |
| 42 | 42 | ||
| 43 | Lisp_Object Qcharset, Qascii, Qcomposition; | 43 | Lisp_Object Qcharset, Qascii, Qcomposition; |
| 44 | Lisp_Object Qunknown; | ||
| 44 | 45 | ||
| 45 | /* Declaration of special leading-codes. */ | 46 | /* Declaration of special leading-codes. */ |
| 46 | int leading_code_composition; /* for composite characters */ | 47 | int leading_code_composition; /* for composite characters */ |
| @@ -141,7 +142,7 @@ non_ascii_char_to_string (c, workbuf, str) | |||
| 141 | { | 142 | { |
| 142 | int charset, c1, c2; | 143 | int charset, c1, c2; |
| 143 | 144 | ||
| 144 | if (c & ~GLYPH_MASK_CHAR) | 145 | if (c & ~GLYPH_MASK_CHAR) /* This includes the case C is negative. */ |
| 145 | { | 146 | { |
| 146 | if (c & CHAR_META) | 147 | if (c & CHAR_META) |
| 147 | /* Move the meta bit to the right place for a string. */ | 148 | /* Move the meta bit to the right place for a string. */ |
| @@ -735,17 +736,42 @@ CHARSET should be defined by `defined-charset' in advance.") | |||
| 735 | 736 | ||
| 736 | If CMPCHARP is nonzero and some composite character is found, | 737 | If CMPCHARP is nonzero and some composite character is found, |
| 737 | CHARSETS[128] is also set 1 and the returned number is incremented | 738 | CHARSETS[128] is also set 1 and the returned number is incremented |
| 738 | by 1. */ | 739 | by 1. |
| 740 | |||
| 741 | If MULTIBYTE is zero, do not check multibyte characters, i.e. if | ||
| 742 | any ASCII codes (7-bit) are found, CHARSET[0] is set to 1, if any | ||
| 743 | 8-bit codes are found CHARSET[1] is set to 1. */ | ||
| 739 | 744 | ||
| 740 | int | 745 | int |
| 741 | find_charset_in_str (str, len, charsets, table, cmpcharp) | 746 | find_charset_in_str (str, len, charsets, table, cmpcharp, multibyte) |
| 742 | unsigned char *str; | 747 | unsigned char *str; |
| 743 | int len, *charsets; | 748 | int len, *charsets; |
| 744 | Lisp_Object table; | 749 | Lisp_Object table; |
| 745 | int cmpcharp; | 750 | int cmpcharp; |
| 751 | int multibyte; | ||
| 746 | { | 752 | { |
| 747 | register int num = 0, c; | 753 | register int num = 0, c; |
| 748 | 754 | ||
| 755 | if (! multibyte) | ||
| 756 | { | ||
| 757 | unsigned char *endp = str + len; | ||
| 758 | int maskbits = 0; | ||
| 759 | |||
| 760 | while (str < endp && maskbits != 3) | ||
| 761 | maskbits |= (*str++ < 0x80 ? 1 : 2); | ||
| 762 | if (maskbits & 1) | ||
| 763 | { | ||
| 764 | charsets[0] = 1; | ||
| 765 | num++; | ||
| 766 | } | ||
| 767 | if (maskbits & 2) | ||
| 768 | { | ||
| 769 | charsets[1] = 1; | ||
| 770 | num++; | ||
| 771 | } | ||
| 772 | return num; | ||
| 773 | } | ||
| 774 | |||
| 749 | if (! CHAR_TABLE_P (table)) | 775 | if (! CHAR_TABLE_P (table)) |
| 750 | table = Qnil; | 776 | table = Qnil; |
| 751 | 777 | ||
| @@ -790,7 +816,7 @@ find_charset_in_str (str, len, charsets, table, cmpcharp) | |||
| 790 | continue; | 816 | continue; |
| 791 | } | 817 | } |
| 792 | 818 | ||
| 793 | charset = CHARSET_ASCII; | 819 | charset = 1; /* This leads to `unknown' charset. */ |
| 794 | bytes = 1; | 820 | bytes = 1; |
| 795 | } | 821 | } |
| 796 | else | 822 | else |
| @@ -822,23 +848,27 @@ DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region, | |||
| 822 | BEG and END are buffer positions.\n\ | 848 | BEG and END are buffer positions.\n\ |
| 823 | If the region contains any composite character,\n\ | 849 | If the region contains any composite character,\n\ |
| 824 | `composition' is included in the returned list.\n\ | 850 | `composition' is included in the returned list.\n\ |
| 825 | Optional arg TABLE if non-nil is a translation table to look up.") | 851 | Optional arg TABLE if non-nil is a translation table to look up.\n\ |
| 852 | \n\ | ||
| 853 | If the region contains invalid multiybte characters,\n\ | ||
| 854 | `unknown' is included in the returned list. | ||
| 855 | \n\ | ||
| 856 | If the current buffer is unibyte, the returned list contains\n\ | ||
| 857 | `ascii' if any 7-bit characters are found,\n\ | ||
| 858 | and `unknown' if any 8-bit characters are found.") | ||
| 826 | (beg, end, table) | 859 | (beg, end, table) |
| 827 | Lisp_Object beg, end, table; | 860 | Lisp_Object beg, end, table; |
| 828 | { | 861 | { |
| 829 | int charsets[MAX_CHARSET + 1]; | 862 | int charsets[MAX_CHARSET + 1]; |
| 830 | int from, from_byte, to, stop, stop_byte, i; | 863 | int from, from_byte, to, stop, stop_byte, i; |
| 831 | Lisp_Object val; | 864 | Lisp_Object val; |
| 865 | int undefined; | ||
| 866 | int multibyte = !NILP (current_buffer->enable_multibyte_characters); | ||
| 832 | 867 | ||
| 833 | validate_region (&beg, &end); | 868 | validate_region (&beg, &end); |
| 834 | from = XFASTINT (beg); | 869 | from = XFASTINT (beg); |
| 835 | stop = to = XFASTINT (end); | 870 | stop = to = XFASTINT (end); |
| 836 | 871 | ||
| 837 | if (NILP (current_buffer->enable_multibyte_characters)) | ||
| 838 | return (from == to | ||
| 839 | ? Qnil | ||
| 840 | : Fcons (Qascii, Qnil)); | ||
| 841 | |||
| 842 | if (from < GPT && GPT < to) | 872 | if (from < GPT && GPT < to) |
| 843 | { | 873 | { |
| 844 | stop = GPT; | 874 | stop = GPT; |
| @@ -853,7 +883,7 @@ Optional arg TABLE if non-nil is a translation table to look up.") | |||
| 853 | while (1) | 883 | while (1) |
| 854 | { | 884 | { |
| 855 | find_charset_in_str (BYTE_POS_ADDR (from_byte), stop_byte - from_byte, | 885 | find_charset_in_str (BYTE_POS_ADDR (from_byte), stop_byte - from_byte, |
| 856 | charsets, table, 1); | 886 | charsets, table, 1, multibyte); |
| 857 | if (stop < to) | 887 | if (stop < to) |
| 858 | { | 888 | { |
| 859 | from = stop, from_byte = stop_byte; | 889 | from = stop, from_byte = stop_byte; |
| @@ -864,9 +894,17 @@ Optional arg TABLE if non-nil is a translation table to look up.") | |||
| 864 | } | 894 | } |
| 865 | 895 | ||
| 866 | val = Qnil; | 896 | val = Qnil; |
| 867 | for (i = MAX_CHARSET; i >= 0; i--) | 897 | undefined = 0; |
| 898 | for (i = (multibyte ? MAX_CHARSET : 1); i >= 0; i--) | ||
| 868 | if (charsets[i]) | 899 | if (charsets[i]) |
| 869 | val = Fcons (CHARSET_SYMBOL (i), val); | 900 | { |
| 901 | if (CHARSET_DEFINED_P (i) || i == CHARSET_COMPOSITION) | ||
| 902 | val = Fcons (CHARSET_SYMBOL (i), val); | ||
| 903 | else | ||
| 904 | undefined = 1; | ||
| 905 | } | ||
| 906 | if (undefined) | ||
| 907 | val = Fcons (Qunknown, val); | ||
| 870 | return val; | 908 | return val; |
| 871 | } | 909 | } |
| 872 | 910 | ||
| @@ -875,28 +913,41 @@ DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string, | |||
| 875 | "Return a list of charsets in STR.\n\ | 913 | "Return a list of charsets in STR.\n\ |
| 876 | If the string contains any composite characters,\n\ | 914 | If the string contains any composite characters,\n\ |
| 877 | `composition' is included in the returned list.\n\ | 915 | `composition' is included in the returned list.\n\ |
| 878 | Optional arg TABLE if non-nil is a translation table to look up.") | 916 | Optional arg TABLE if non-nil is a translation table to look up.\n\ |
| 917 | \n\ | ||
| 918 | If the region contains invalid multiybte characters,\n\ | ||
| 919 | `unknown' is included in the returned list.\n\ | ||
| 920 | \n\ | ||
| 921 | If STR is unibyte, the returned list contains\n\ | ||
| 922 | `ascii' if any 7-bit characters are found,\n\ | ||
| 923 | and `unknown' if any 8-bit characters are found.") | ||
| 879 | (str, table) | 924 | (str, table) |
| 880 | Lisp_Object str, table; | 925 | Lisp_Object str, table; |
| 881 | { | 926 | { |
| 882 | int charsets[MAX_CHARSET + 1]; | 927 | int charsets[MAX_CHARSET + 1]; |
| 883 | int i; | 928 | int i; |
| 884 | Lisp_Object val; | 929 | Lisp_Object val; |
| 930 | int undefined; | ||
| 931 | int multibyte; | ||
| 885 | 932 | ||
| 886 | CHECK_STRING (str, 0); | 933 | CHECK_STRING (str, 0); |
| 887 | 934 | multibyte = STRING_MULTIBYTE (str); | |
| 888 | if (! STRING_MULTIBYTE (str)) | ||
| 889 | return (XSTRING (str)->size == 0 | ||
| 890 | ? Qnil | ||
| 891 | : Fcons (Qascii, Qnil)); | ||
| 892 | 935 | ||
| 893 | bzero (charsets, (MAX_CHARSET + 1) * sizeof (int)); | 936 | bzero (charsets, (MAX_CHARSET + 1) * sizeof (int)); |
| 894 | find_charset_in_str (XSTRING (str)->data, STRING_BYTES (XSTRING (str)), | 937 | find_charset_in_str (XSTRING (str)->data, STRING_BYTES (XSTRING (str)), |
| 895 | charsets, table, 1); | 938 | charsets, table, 1, multibyte); |
| 896 | val = Qnil; | 939 | val = Qnil; |
| 897 | for (i = MAX_CHARSET; i >= 0; i--) | 940 | undefined = 0; |
| 941 | for (i = (multibyte ? MAX_CHARSET : 1); i >= 0; i--) | ||
| 898 | if (charsets[i]) | 942 | if (charsets[i]) |
| 899 | val = Fcons (CHARSET_SYMBOL (i), val); | 943 | { |
| 944 | if (CHARSET_DEFINED_P (i) || i == CHARSET_COMPOSITION) | ||
| 945 | val = Fcons (CHARSET_SYMBOL (i), val); | ||
| 946 | else | ||
| 947 | undefined = 1; | ||
| 948 | } | ||
| 949 | if (undefined) | ||
| 950 | val = Fcons (Qunknown, val); | ||
| 900 | return val; | 951 | return val; |
| 901 | } | 952 | } |
| 902 | 953 | ||
| @@ -923,14 +974,19 @@ DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0, | |||
| 923 | } | 974 | } |
| 924 | 975 | ||
| 925 | DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0, | 976 | DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0, |
| 926 | "Return list of charset and one or two position-codes of CHAR.") | 977 | "Return list of charset and one or two position-codes of CHAR.\n\ |
| 978 | If CHAR is invalid as a character code,\n\ | ||
| 979 | return a list of symbol `unknown' and CHAR.") | ||
| 927 | (ch) | 980 | (ch) |
| 928 | Lisp_Object ch; | 981 | Lisp_Object ch; |
| 929 | { | 982 | { |
| 930 | Lisp_Object val; | 983 | Lisp_Object val; |
| 931 | int charset, c1, c2; | 984 | int c, charset, c1, c2; |
| 932 | 985 | ||
| 933 | CHECK_NUMBER (ch, 0); | 986 | CHECK_NUMBER (ch, 0); |
| 987 | c = XFASTINT (ch); | ||
| 988 | if (!CHAR_VALID_P (c, 1)) | ||
| 989 | return Fcons (Qunknown, Fcons (ch, Qnil)); | ||
| 934 | SPLIT_CHAR (XFASTINT (ch), charset, c1, c2); | 990 | SPLIT_CHAR (XFASTINT (ch), charset, c1, c2); |
| 935 | return (c2 >= 0 | 991 | return (c2 >= 0 |
| 936 | ? Fcons (CHARSET_SYMBOL (charset), | 992 | ? Fcons (CHARSET_SYMBOL (charset), |
| @@ -1153,7 +1209,7 @@ The width is measured by how many columns it occupies on the screen.") | |||
| 1153 | else if (COMPOSITE_CHAR_P (c)) | 1209 | else if (COMPOSITE_CHAR_P (c)) |
| 1154 | { | 1210 | { |
| 1155 | int id = COMPOSITE_CHAR_ID (XFASTINT (ch)); | 1211 | int id = COMPOSITE_CHAR_ID (XFASTINT (ch)); |
| 1156 | XSETFASTINT (val, (id < n_cmpchars ? cmpchar_table[id]->width : 0)); | 1212 | XSETFASTINT (val, (id < n_cmpchars ? cmpchar_table[id]->width : 1)); |
| 1157 | } | 1213 | } |
| 1158 | else | 1214 | else |
| 1159 | { | 1215 | { |
| @@ -1469,7 +1525,7 @@ str_cmpchar_id (str, len) | |||
| 1469 | } | 1525 | } |
| 1470 | 1526 | ||
| 1471 | /* We have to register the composite character in cmpchar_table. */ | 1527 | /* We have to register the composite character in cmpchar_table. */ |
| 1472 | if (n_cmpchars > (CHAR_FIELD2_MASK | CHAR_FIELD3_MASK)) | 1528 | if (n_cmpchars >= (CHAR_FIELD2_MASK | CHAR_FIELD3_MASK)) |
| 1473 | /* No, we have no more room for a new composite character. */ | 1529 | /* No, we have no more room for a new composite character. */ |
| 1474 | return -1; | 1530 | return -1; |
| 1475 | 1531 | ||
| @@ -1846,7 +1902,10 @@ init_charset_once () | |||
| 1846 | Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0)); | 1902 | Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0)); |
| 1847 | Vcharset_table = Fmake_char_table (Qcharset_table, Qnil); | 1903 | Vcharset_table = Fmake_char_table (Qcharset_table, Qnil); |
| 1848 | 1904 | ||
| 1849 | Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1), Qnil); | 1905 | Qunknown = intern ("unknown"); |
| 1906 | staticpro (&Qunknown); | ||
| 1907 | Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1), | ||
| 1908 | Qunknown); | ||
| 1850 | 1909 | ||
| 1851 | /* Setup tables. */ | 1910 | /* Setup tables. */ |
| 1852 | for (i = 0; i < 2; i++) | 1911 | for (i = 0; i < 2; i++) |