aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorKenichi Handa1998-12-15 04:35:38 +0000
committerKenichi Handa1998-12-15 04:35:38 +0000
commit0282eb691536b39247884452ac76473d05cdf369 (patch)
tree9c5d34176df50a1a7e1a6bd77f838ae580ca01ad /src
parent8c5b01cc4be108b4084e63f143f95b6d7131e1ae (diff)
downloademacs-0282eb691536b39247884452ac76473d05cdf369.tar.gz
emacs-0282eb691536b39247884452ac76473d05cdf369.zip
(Qunknown): New variable.
(init_charset_once): Intern and staticpro Qunknown. Initialize all elements of Vcharset_symbol_table to Qunknown. (find_charset_in_str): New arg MULTIBYTE. If it is zero, check unibyte characters only. For an invalid composition sequence, set CHARSETS[1] to 1. (Ffind_charset_region): Call find_charset_in_str with an appropriate MULTIBYTE arg. If undefined charsets are found, include `unknown' is the return value. (Ffind_charset_string): Likewise. (Fsplit_char): If CHAR is invalid, return `(unknown CHAR)'. (str_cmpchar_id): Max composite character code should be less than GENERIC_COMPOSITION_CHAR.
Diffstat (limited to 'src')
-rw-r--r--src/charset.c113
1 files changed, 86 insertions, 27 deletions
diff --git a/src/charset.c b/src/charset.c
index bf3944d47fa..ecf70b0ec1b 100644
--- a/src/charset.c
+++ b/src/charset.c
@@ -41,6 +41,7 @@ Boston, MA 02111-1307, USA. */
41#endif /* emacs */ 41#endif /* emacs */
42 42
43Lisp_Object Qcharset, Qascii, Qcomposition; 43Lisp_Object Qcharset, Qascii, Qcomposition;
44Lisp_Object Qunknown;
44 45
45/* Declaration of special leading-codes. */ 46/* Declaration of special leading-codes. */
46int leading_code_composition; /* for composite characters */ 47int leading_code_composition; /* for composite characters */
@@ -141,7 +142,7 @@ non_ascii_char_to_string (c, workbuf, str)
141{ 142{
142 int charset, c1, c2; 143 int charset, c1, c2;
143 144
144 if (c & ~GLYPH_MASK_CHAR) 145 if (c & ~GLYPH_MASK_CHAR) /* This includes the case C is negative. */
145 { 146 {
146 if (c & CHAR_META) 147 if (c & CHAR_META)
147 /* Move the meta bit to the right place for a string. */ 148 /* Move the meta bit to the right place for a string. */
@@ -735,17 +736,42 @@ CHARSET should be defined by `defined-charset' in advance.")
735 736
736 If CMPCHARP is nonzero and some composite character is found, 737 If CMPCHARP is nonzero and some composite character is found,
737 CHARSETS[128] is also set 1 and the returned number is incremented 738 CHARSETS[128] is also set 1 and the returned number is incremented
738 by 1. */ 739 by 1.
740
741 If MULTIBYTE is zero, do not check multibyte characters, i.e. if
742 any ASCII codes (7-bit) are found, CHARSET[0] is set to 1, if any
743 8-bit codes are found CHARSET[1] is set to 1. */
739 744
740int 745int
741find_charset_in_str (str, len, charsets, table, cmpcharp) 746find_charset_in_str (str, len, charsets, table, cmpcharp, multibyte)
742 unsigned char *str; 747 unsigned char *str;
743 int len, *charsets; 748 int len, *charsets;
744 Lisp_Object table; 749 Lisp_Object table;
745 int cmpcharp; 750 int cmpcharp;
751 int multibyte;
746{ 752{
747 register int num = 0, c; 753 register int num = 0, c;
748 754
755 if (! multibyte)
756 {
757 unsigned char *endp = str + len;
758 int maskbits = 0;
759
760 while (str < endp && maskbits != 3)
761 maskbits |= (*str++ < 0x80 ? 1 : 2);
762 if (maskbits & 1)
763 {
764 charsets[0] = 1;
765 num++;
766 }
767 if (maskbits & 2)
768 {
769 charsets[1] = 1;
770 num++;
771 }
772 return num;
773 }
774
749 if (! CHAR_TABLE_P (table)) 775 if (! CHAR_TABLE_P (table))
750 table = Qnil; 776 table = Qnil;
751 777
@@ -790,7 +816,7 @@ find_charset_in_str (str, len, charsets, table, cmpcharp)
790 continue; 816 continue;
791 } 817 }
792 818
793 charset = CHARSET_ASCII; 819 charset = 1; /* This leads to `unknown' charset. */
794 bytes = 1; 820 bytes = 1;
795 } 821 }
796 else 822 else
@@ -822,23 +848,27 @@ DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
822BEG and END are buffer positions.\n\ 848BEG and END are buffer positions.\n\
823If the region contains any composite character,\n\ 849If the region contains any composite character,\n\
824`composition' is included in the returned list.\n\ 850`composition' is included in the returned list.\n\
825Optional arg TABLE if non-nil is a translation table to look up.") 851Optional arg TABLE if non-nil is a translation table to look up.\n\
852\n\
853If the region contains invalid multiybte characters,\n\
854`unknown' is included in the returned list.
855\n\
856If the current buffer is unibyte, the returned list contains\n\
857`ascii' if any 7-bit characters are found,\n\
858and `unknown' if any 8-bit characters are found.")
826 (beg, end, table) 859 (beg, end, table)
827 Lisp_Object beg, end, table; 860 Lisp_Object beg, end, table;
828{ 861{
829 int charsets[MAX_CHARSET + 1]; 862 int charsets[MAX_CHARSET + 1];
830 int from, from_byte, to, stop, stop_byte, i; 863 int from, from_byte, to, stop, stop_byte, i;
831 Lisp_Object val; 864 Lisp_Object val;
865 int undefined;
866 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
832 867
833 validate_region (&beg, &end); 868 validate_region (&beg, &end);
834 from = XFASTINT (beg); 869 from = XFASTINT (beg);
835 stop = to = XFASTINT (end); 870 stop = to = XFASTINT (end);
836 871
837 if (NILP (current_buffer->enable_multibyte_characters))
838 return (from == to
839 ? Qnil
840 : Fcons (Qascii, Qnil));
841
842 if (from < GPT && GPT < to) 872 if (from < GPT && GPT < to)
843 { 873 {
844 stop = GPT; 874 stop = GPT;
@@ -853,7 +883,7 @@ Optional arg TABLE if non-nil is a translation table to look up.")
853 while (1) 883 while (1)
854 { 884 {
855 find_charset_in_str (BYTE_POS_ADDR (from_byte), stop_byte - from_byte, 885 find_charset_in_str (BYTE_POS_ADDR (from_byte), stop_byte - from_byte,
856 charsets, table, 1); 886 charsets, table, 1, multibyte);
857 if (stop < to) 887 if (stop < to)
858 { 888 {
859 from = stop, from_byte = stop_byte; 889 from = stop, from_byte = stop_byte;
@@ -864,9 +894,17 @@ Optional arg TABLE if non-nil is a translation table to look up.")
864 } 894 }
865 895
866 val = Qnil; 896 val = Qnil;
867 for (i = MAX_CHARSET; i >= 0; i--) 897 undefined = 0;
898 for (i = (multibyte ? MAX_CHARSET : 1); i >= 0; i--)
868 if (charsets[i]) 899 if (charsets[i])
869 val = Fcons (CHARSET_SYMBOL (i), val); 900 {
901 if (CHARSET_DEFINED_P (i) || i == CHARSET_COMPOSITION)
902 val = Fcons (CHARSET_SYMBOL (i), val);
903 else
904 undefined = 1;
905 }
906 if (undefined)
907 val = Fcons (Qunknown, val);
870 return val; 908 return val;
871} 909}
872 910
@@ -875,28 +913,41 @@ DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
875 "Return a list of charsets in STR.\n\ 913 "Return a list of charsets in STR.\n\
876If the string contains any composite characters,\n\ 914If the string contains any composite characters,\n\
877`composition' is included in the returned list.\n\ 915`composition' is included in the returned list.\n\
878Optional arg TABLE if non-nil is a translation table to look up.") 916Optional arg TABLE if non-nil is a translation table to look up.\n\
917\n\
918If the region contains invalid multiybte characters,\n\
919`unknown' is included in the returned list.\n\
920\n\
921If STR is unibyte, the returned list contains\n\
922`ascii' if any 7-bit characters are found,\n\
923and `unknown' if any 8-bit characters are found.")
879 (str, table) 924 (str, table)
880 Lisp_Object str, table; 925 Lisp_Object str, table;
881{ 926{
882 int charsets[MAX_CHARSET + 1]; 927 int charsets[MAX_CHARSET + 1];
883 int i; 928 int i;
884 Lisp_Object val; 929 Lisp_Object val;
930 int undefined;
931 int multibyte;
885 932
886 CHECK_STRING (str, 0); 933 CHECK_STRING (str, 0);
887 934 multibyte = STRING_MULTIBYTE (str);
888 if (! STRING_MULTIBYTE (str))
889 return (XSTRING (str)->size == 0
890 ? Qnil
891 : Fcons (Qascii, Qnil));
892 935
893 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int)); 936 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
894 find_charset_in_str (XSTRING (str)->data, STRING_BYTES (XSTRING (str)), 937 find_charset_in_str (XSTRING (str)->data, STRING_BYTES (XSTRING (str)),
895 charsets, table, 1); 938 charsets, table, 1, multibyte);
896 val = Qnil; 939 val = Qnil;
897 for (i = MAX_CHARSET; i >= 0; i--) 940 undefined = 0;
941 for (i = (multibyte ? MAX_CHARSET : 1); i >= 0; i--)
898 if (charsets[i]) 942 if (charsets[i])
899 val = Fcons (CHARSET_SYMBOL (i), val); 943 {
944 if (CHARSET_DEFINED_P (i) || i == CHARSET_COMPOSITION)
945 val = Fcons (CHARSET_SYMBOL (i), val);
946 else
947 undefined = 1;
948 }
949 if (undefined)
950 val = Fcons (Qunknown, val);
900 return val; 951 return val;
901} 952}
902 953
@@ -923,14 +974,19 @@ DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
923} 974}
924 975
925DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0, 976DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
926 "Return list of charset and one or two position-codes of CHAR.") 977 "Return list of charset and one or two position-codes of CHAR.\n\
978If CHAR is invalid as a character code,\n\
979return a list of symbol `unknown' and CHAR.")
927 (ch) 980 (ch)
928 Lisp_Object ch; 981 Lisp_Object ch;
929{ 982{
930 Lisp_Object val; 983 Lisp_Object val;
931 int charset, c1, c2; 984 int c, charset, c1, c2;
932 985
933 CHECK_NUMBER (ch, 0); 986 CHECK_NUMBER (ch, 0);
987 c = XFASTINT (ch);
988 if (!CHAR_VALID_P (c, 1))
989 return Fcons (Qunknown, Fcons (ch, Qnil));
934 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2); 990 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
935 return (c2 >= 0 991 return (c2 >= 0
936 ? Fcons (CHARSET_SYMBOL (charset), 992 ? Fcons (CHARSET_SYMBOL (charset),
@@ -1153,7 +1209,7 @@ The width is measured by how many columns it occupies on the screen.")
1153 else if (COMPOSITE_CHAR_P (c)) 1209 else if (COMPOSITE_CHAR_P (c))
1154 { 1210 {
1155 int id = COMPOSITE_CHAR_ID (XFASTINT (ch)); 1211 int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
1156 XSETFASTINT (val, (id < n_cmpchars ? cmpchar_table[id]->width : 0)); 1212 XSETFASTINT (val, (id < n_cmpchars ? cmpchar_table[id]->width : 1));
1157 } 1213 }
1158 else 1214 else
1159 { 1215 {
@@ -1469,7 +1525,7 @@ str_cmpchar_id (str, len)
1469 } 1525 }
1470 1526
1471 /* We have to register the composite character in cmpchar_table. */ 1527 /* We have to register the composite character in cmpchar_table. */
1472 if (n_cmpchars > (CHAR_FIELD2_MASK | CHAR_FIELD3_MASK)) 1528 if (n_cmpchars >= (CHAR_FIELD2_MASK | CHAR_FIELD3_MASK))
1473 /* No, we have no more room for a new composite character. */ 1529 /* No, we have no more room for a new composite character. */
1474 return -1; 1530 return -1;
1475 1531
@@ -1846,7 +1902,10 @@ init_charset_once ()
1846 Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0)); 1902 Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1847 Vcharset_table = Fmake_char_table (Qcharset_table, Qnil); 1903 Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1848 1904
1849 Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1), Qnil); 1905 Qunknown = intern ("unknown");
1906 staticpro (&Qunknown);
1907 Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
1908 Qunknown);
1850 1909
1851 /* Setup tables. */ 1910 /* Setup tables. */
1852 for (i = 0; i < 2; i++) 1911 for (i = 0; i < 2; i++)