diff options
| author | Kenichi Handa | 1999-12-15 00:04:59 +0000 |
|---|---|---|
| committer | Kenichi Handa | 1999-12-15 00:04:59 +0000 |
| commit | 99529c2c73b5de1b887f6c0d95fab016821dff52 (patch) | |
| tree | d76772dde874da0e7df0c144ad9e0674adcab7e8 /src | |
| parent | f49b37c9a3ab3b8d4cee577dbe3b224e648a6c85 (diff) | |
| download | emacs-99529c2c73b5de1b887f6c0d95fab016821dff52.tar.gz emacs-99529c2c73b5de1b887f6c0d95fab016821dff52.zip | |
In this entry, just `Modified' means that codes for a
composite character is deleted.
(Qcomposition) (leading_code_composition)
(charset_composition) (min_composite_char) (cmpchar_table)
(cmpchar_table_size) (n_cmpchars): Deleted.
(SPLIT_COMPOSITE_SEQ): Deleted.
(SPLIT_MULTIBYTE_SEQ): Modified.
(char_to_string): Renamed from non_ascii_char_to_string.
Modified.
(string_to_char): Renamed from string_to_non_ascii_char.
(split_string): Renamed from split_non_ascii_string.
(char_printable_p) (Fsplit_char)
(Ffind_charset_region) (Ffind_charset_string) (char_valid_p)
(char_bytes) (Fchar_width) (strwidth): Modified.
(find_charset_in_str): Argument CMPCHARP deleted. Modified.
(Fstring): Adjusted for the change of CHAR_STRING. Modified.
(hash_string) (CMPCHAR_HASH_TABLE_SIZE) (cmpchar_hash_table)
(CMPCHAR_HASH_SIZE) (CMPCHAR_HASH_USED) (CMPCHAR_HASH_CMPCHAR_ID)
(str_cmpchar_id) (cmpchar_component) (Fcmpcharp)
(Fcmpchar_component) (Fcmpchar_cmp_rule) (Fcmpchar_cmp_rule_p)
(Fcmpchar_cmp_count): Deleted.
(Fcompose_string): Implemented by Emacs Lisp in composite.el.
(init_charset_once): Modified.
(syms_of_charset): Modified.
Diffstat (limited to 'src')
| -rw-r--r-- | src/charset.c | 779 |
1 files changed, 75 insertions, 704 deletions
diff --git a/src/charset.c b/src/charset.c index ef426943861..302f243ad5a 100644 --- a/src/charset.c +++ b/src/charset.c | |||
| @@ -43,11 +43,10 @@ Boston, MA 02111-1307, USA. */ | |||
| 43 | 43 | ||
| 44 | #endif /* emacs */ | 44 | #endif /* emacs */ |
| 45 | 45 | ||
| 46 | Lisp_Object Qcharset, Qascii, Qcomposition; | 46 | Lisp_Object Qcharset, Qascii; |
| 47 | Lisp_Object Qunknown; | 47 | Lisp_Object Qunknown; |
| 48 | 48 | ||
| 49 | /* Declaration of special leading-codes. */ | 49 | /* Declaration of special leading-codes. */ |
| 50 | int leading_code_composition; /* for composite characters */ | ||
| 51 | int leading_code_private_11; /* for private DIMENSION1 of 1-column */ | 50 | int leading_code_private_11; /* for private DIMENSION1 of 1-column */ |
| 52 | int leading_code_private_12; /* for private DIMENSION1 of 2-column */ | 51 | int leading_code_private_12; /* for private DIMENSION1 of 2-column */ |
| 53 | int leading_code_private_21; /* for private DIMENSION2 of 1-column */ | 52 | int leading_code_private_21; /* for private DIMENSION2 of 1-column */ |
| @@ -55,7 +54,6 @@ int leading_code_private_22; /* for private DIMENSION2 of 2-column */ | |||
| 55 | 54 | ||
| 56 | /* Declaration of special charsets. */ | 55 | /* Declaration of special charsets. */ |
| 57 | int charset_ascii; /* ASCII */ | 56 | int charset_ascii; /* ASCII */ |
| 58 | int charset_composition; /* for a composite character */ | ||
| 59 | int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */ | 57 | int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */ |
| 60 | int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */ | 58 | int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */ |
| 61 | int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */ | 59 | int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */ |
| @@ -64,8 +62,6 @@ int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */ | |||
| 64 | int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */ | 62 | int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */ |
| 65 | int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */ | 63 | int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */ |
| 66 | 64 | ||
| 67 | int min_composite_char; | ||
| 68 | |||
| 69 | Lisp_Object Qcharset_table; | 65 | Lisp_Object Qcharset_table; |
| 70 | 66 | ||
| 71 | /* A char-table containing information of each character set. */ | 67 | /* A char-table containing information of each character set. */ |
| @@ -95,14 +91,6 @@ int width_by_char_head[256]; | |||
| 95 | CHARS, and FINAL-CHAR) to Emacs' charset. */ | 91 | CHARS, and FINAL-CHAR) to Emacs' charset. */ |
| 96 | int iso_charset_table[2][2][128]; | 92 | int iso_charset_table[2][2][128]; |
| 97 | 93 | ||
| 98 | /* Table of pointers to the structure `cmpchar_info' indexed by | ||
| 99 | CMPCHAR-ID. */ | ||
| 100 | struct cmpchar_info **cmpchar_table; | ||
| 101 | /* The current size of `cmpchar_table'. */ | ||
| 102 | static int cmpchar_table_size; | ||
| 103 | /* Number of the current composite characters. */ | ||
| 104 | int n_cmpchars; | ||
| 105 | |||
| 106 | /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */ | 94 | /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */ |
| 107 | unsigned char *_fetch_multibyte_char_p; | 95 | unsigned char *_fetch_multibyte_char_p; |
| 108 | int _fetch_multibyte_char_len; | 96 | int _fetch_multibyte_char_len; |
| @@ -127,48 +115,14 @@ invalid_character (c) | |||
| 127 | error ("Invalid character: 0%o, %d, 0x%x", c, c, c); | 115 | error ("Invalid character: 0%o, %d, 0x%x", c, c, c); |
| 128 | } | 116 | } |
| 129 | 117 | ||
| 130 | /* Parse composite character string STR of length LENGTH (>= 2) and | 118 | /* Parse a multibyte character string STR of length LENGTH (>= 2) set |
| 131 | set BYTES, CHARSET, C1, and C2 as below. | 119 | BYTES to the length of actual multibyte sequence, CHARSET, C1, and |
| 120 | C2 to such values that MAKE_CHAR can make the multibyte character | ||
| 121 | from them. | ||
| 132 | 122 | ||
| 133 | It is assumed that *STR is LEADING_CODE_COMPOSITION and the | 123 | It is assumed that *STR is one of base leading codes and the |
| 134 | following (LENGTH - 1) bytes satisfy !CHAR_HEAD_P. | 124 | following (LENGTH - 1) bytes satisfy !CHAR_HEAD_P. |
| 135 | 125 | ||
| 136 | If there is a valid composite character, set CHARSET, C1, and C2 to | ||
| 137 | such values that MAKE_CHAR can make the composite character from | ||
| 138 | them. Otherwise, set CHARSET to CHARSET_COMPOSITION, set C1 to the | ||
| 139 | second byte of the sequence, C2 to -1 so that MAKE_CHAR can make | ||
| 140 | the invalid multibyte character whose string representation is two | ||
| 141 | bytes of STR[0] and STR[1]. In any case, set BYTES to LENGTH. | ||
| 142 | |||
| 143 | This macro should be called only from SPLIT_MULTIBYTE_SEQ. */ | ||
| 144 | |||
| 145 | #define SPLIT_COMPOSITE_SEQ(str, length, bytes, charset, c1, c2) \ | ||
| 146 | do { \ | ||
| 147 | int cmpchar_id = str_cmpchar_id ((str), (length)); \ | ||
| 148 | \ | ||
| 149 | (charset) = CHARSET_COMPOSITION; \ | ||
| 150 | (bytes) = (length); \ | ||
| 151 | if (cmpchar_id >= 0) \ | ||
| 152 | { \ | ||
| 153 | (c1) = CHAR_FIELD2 (cmpchar_id); \ | ||
| 154 | (c2) = CHAR_FIELD3 (cmpchar_id); \ | ||
| 155 | } \ | ||
| 156 | else \ | ||
| 157 | { \ | ||
| 158 | (c1) = (str)[1] & 0x7F; \ | ||
| 159 | (c2) = -1; \ | ||
| 160 | } \ | ||
| 161 | } while (0) | ||
| 162 | |||
| 163 | /* Parse non-composite multibyte character string STR of length LENGTH | ||
| 164 | (>= 2) and set BYTES to the length of actual multibyte sequence, | ||
| 165 | CHARSET, C1, and C2 to such values that MAKE_CHAR can make the | ||
| 166 | multibyte character from them. | ||
| 167 | |||
| 168 | It is assumed that *STR is one of base leading codes (excluding | ||
| 169 | LEADING_CODE_COMPOSITION) and the following (LENGTH - 1) bytes | ||
| 170 | satisfy !CHAR_HEAD_P. | ||
| 171 | |||
| 172 | This macro should be called only from SPLIT_MULTIBYTE_SEQ. */ | 126 | This macro should be called only from SPLIT_MULTIBYTE_SEQ. */ |
| 173 | 127 | ||
| 174 | #define SPLIT_CHARACTER_SEQ(str, length, bytes, charset, c1, c2) \ | 128 | #define SPLIT_CHARACTER_SEQ(str, length, bytes, charset, c1, c2) \ |
| @@ -191,7 +145,7 @@ invalid_character (c) | |||
| 191 | } while (0) | 145 | } while (0) |
| 192 | 146 | ||
| 193 | /* Parse string STR of length LENGTH and check if a multibyte | 147 | /* Parse string STR of length LENGTH and check if a multibyte |
| 194 | characters is at STR. set BYTES to the actual length, CHARSET, C1, | 148 | characters is at STR. Set BYTES to the actual length, CHARSET, C1, |
| 195 | C2 to proper values for that character. */ | 149 | C2 to proper values for that character. */ |
| 196 | 150 | ||
| 197 | #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2) \ | 151 | #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2) \ |
| @@ -203,8 +157,6 @@ invalid_character (c) | |||
| 203 | for (i = 1; i < (length) && ! CHAR_HEAD_P ((str)[i]); i++); \ | 157 | for (i = 1; i < (length) && ! CHAR_HEAD_P ((str)[i]); i++); \ |
| 204 | if (i == 1) \ | 158 | if (i == 1) \ |
| 205 | (bytes) = 1, (charset) = CHARSET_ASCII, (c1) = (str)[0] ; \ | 159 | (bytes) = 1, (charset) = CHARSET_ASCII, (c1) = (str)[0] ; \ |
| 206 | else if ((str)[0] == LEADING_CODE_COMPOSITION) \ | ||
| 207 | SPLIT_COMPOSITE_SEQ (str, i, bytes, charset, c1, c2); \ | ||
| 208 | else \ | 160 | else \ |
| 209 | { \ | 161 | { \ |
| 210 | if (i > BYTES_BY_CHAR_HEAD ((str)[0])) \ | 162 | if (i > BYTES_BY_CHAR_HEAD ((str)[0])) \ |
| @@ -221,21 +173,21 @@ invalid_character (c) | |||
| 221 | ? ((c1) >= 0x20 && (c1) <= 0x7F) \ | 173 | ? ((c1) >= 0x20 && (c1) <= 0x7F) \ |
| 222 | : ((c1) >= 0x20 && (c1) <= 0x7F && (c2) >= 0x20 && (c2) <= 0x7F))) | 174 | : ((c1) >= 0x20 && (c1) <= 0x7F && (c2) >= 0x20 && (c2) <= 0x7F))) |
| 223 | 175 | ||
| 224 | /* Set STR a pointer to the multi-byte form of the character C. If C | 176 | /* Store multi-byte form of the character C in STR. The caller should |
| 225 | is not a composite character, the multi-byte form is set in WORKBUF | 177 | allocate at least 4-byte area at STR in advance. Returns the |
| 226 | and STR points WORKBUF. The caller should allocate at least 4-byte | 178 | length of the multi-byte form. If C is an invalid character code, |
| 227 | area at WORKBUF in advance. Returns the length of the multi-byte | 179 | signal an error. |
| 228 | form. If C is an invalid character, store (C & 0xFF) in WORKBUF[0] | ||
| 229 | and return 1. | ||
| 230 | 180 | ||
| 231 | Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this | 181 | Use macro `CHAR_STRING (C, STR)' instead of calling this function |
| 232 | function directly if C can be an ASCII character. */ | 182 | directly if C can be an ASCII character. */ |
| 233 | 183 | ||
| 234 | int | 184 | int |
| 235 | non_ascii_char_to_string (c, workbuf, str) | 185 | char_to_string (c, str) |
| 236 | int c; | 186 | int c; |
| 237 | unsigned char *workbuf, **str; | 187 | unsigned char *str; |
| 238 | { | 188 | { |
| 189 | unsigned char *p = str; | ||
| 190 | |||
| 239 | if (c & CHAR_MODIFIER_MASK) /* This includes the case C is negative. */ | 191 | if (c & CHAR_MODIFIER_MASK) /* This includes the case C is negative. */ |
| 240 | { | 192 | { |
| 241 | /* Multibyte character can't have a modifier bit. */ | 193 | /* Multibyte character can't have a modifier bit. */ |
| @@ -276,71 +228,46 @@ non_ascii_char_to_string (c, workbuf, str) | |||
| 276 | if (c & CHAR_MODIFIER_MASK) | 228 | if (c & CHAR_MODIFIER_MASK) |
| 277 | invalid_character (c); | 229 | invalid_character (c); |
| 278 | 230 | ||
| 279 | *str = workbuf; | 231 | *p++ = c; |
| 280 | *workbuf++ = c; | ||
| 281 | } | 232 | } |
| 282 | else | 233 | else if (c < MAX_CHAR) |
| 283 | { | 234 | { |
| 284 | int charset, c1, c2; | 235 | int charset, c1, c2; |
| 285 | 236 | ||
| 286 | SPLIT_NON_ASCII_CHAR (c, charset, c1, c2); | 237 | SPLIT_NON_ASCII_CHAR (c, charset, c1, c2); |
| 287 | if (charset == CHARSET_COMPOSITION) | 238 | |
| 288 | { | 239 | if (charset >= LEADING_CODE_EXT_11) |
| 289 | if (c >= MAX_CHAR) | 240 | *p++ = (charset < LEADING_CODE_EXT_12 |
| 290 | invalid_character (c); | 241 | ? LEADING_CODE_PRIVATE_11 |
| 291 | if (c >= MIN_CHAR_COMPOSITION) | 242 | : (charset < LEADING_CODE_EXT_21 |
| 292 | { | 243 | ? LEADING_CODE_PRIVATE_12 |
| 293 | /* Valid composite character. */ | 244 | : (charset < LEADING_CODE_EXT_22 |
| 294 | *str = cmpchar_table[COMPOSITE_CHAR_ID (c)]->data; | 245 | ? LEADING_CODE_PRIVATE_21 |
| 295 | workbuf = *str + cmpchar_table[COMPOSITE_CHAR_ID (c)]->len; | 246 | : LEADING_CODE_PRIVATE_22))); |
| 296 | } | 247 | *p++ = charset; |
| 297 | else | 248 | if (c1 > 0 && c1 < 32 || c2 > 0 && c2 < 32) |
| 298 | { | 249 | invalid_character (c); |
| 299 | /* Invalid but can have multibyte form. */ | 250 | if (c1) |
| 300 | *str = workbuf; | ||
| 301 | *workbuf++ = LEADING_CODE_COMPOSITION; | ||
| 302 | *workbuf++ = c1 | 0x80; | ||
| 303 | } | ||
| 304 | } | ||
| 305 | else if (charset > CHARSET_COMPOSITION) | ||
| 306 | { | 251 | { |
| 307 | *str = workbuf; | 252 | *p++ = c1 | 0x80; |
| 308 | if (charset >= LEADING_CODE_EXT_11) | 253 | if (c2 > 0) |
| 309 | *workbuf++ = (charset < LEADING_CODE_EXT_12 | 254 | *p++ = c2 | 0x80; |
| 310 | ? LEADING_CODE_PRIVATE_11 | ||
| 311 | : (charset < LEADING_CODE_EXT_21 | ||
| 312 | ? LEADING_CODE_PRIVATE_12 | ||
| 313 | : (charset < LEADING_CODE_EXT_22 | ||
| 314 | ? LEADING_CODE_PRIVATE_21 | ||
| 315 | : LEADING_CODE_PRIVATE_22))); | ||
| 316 | *workbuf++ = charset; | ||
| 317 | if (c1 > 0 && c1 < 32 || c2 > 0 && c2 < 32) | ||
| 318 | invalid_character (c); | ||
| 319 | if (c1) | ||
| 320 | { | ||
| 321 | *workbuf++ = c1 | 0x80; | ||
| 322 | if (c2 > 0) | ||
| 323 | *workbuf++ = c2 | 0x80; | ||
| 324 | } | ||
| 325 | } | 255 | } |
| 326 | else if (charset == CHARSET_ASCII) | ||
| 327 | *workbuf++= c & 0x7F; | ||
| 328 | else | ||
| 329 | invalid_character (c); | ||
| 330 | } | 256 | } |
| 331 | 257 | ||
| 332 | return (workbuf - *str); | 258 | return (p -str); |
| 333 | } | 259 | } |
| 334 | 260 | ||
| 335 | /* Return the non-ASCII character corresponding to multi-byte form at | 261 | /* Return the non-ASCII character corresponding to multi-byte form at |
| 336 | STR of length LEN. If ACTUAL_LEN is not NULL, store the byte | 262 | STR of length LEN. If ACTUAL_LEN is not NULL, store the byte |
| 337 | length of the multibyte form in *ACTUAL_LEN. | 263 | length of the multibyte form in *ACTUAL_LEN. |
| 338 | 264 | ||
| 339 | Use macro `STRING_CHAR (STR, LEN)' instead of calling this function | 265 | Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling |
| 340 | directly if you want ot handle ASCII characters as well. */ | 266 | this function directly if you want ot handle ASCII characters as |
| 267 | well. */ | ||
| 341 | 268 | ||
| 342 | int | 269 | int |
| 343 | string_to_non_ascii_char (str, len, actual_len) | 270 | string_to_char (str, len, actual_len) |
| 344 | const unsigned char *str; | 271 | const unsigned char *str; |
| 345 | int len, *actual_len; | 272 | int len, *actual_len; |
| 346 | { | 273 | { |
| @@ -373,7 +300,7 @@ multibyte_form_length (str, len) | |||
| 373 | which checks range of STR in advance. */ | 300 | which checks range of STR in advance. */ |
| 374 | 301 | ||
| 375 | int | 302 | int |
| 376 | split_non_ascii_string (str, len, charset, c1, c2) | 303 | split_string (str, len, charset, c1, c2) |
| 377 | const unsigned char *str; | 304 | const unsigned char *str; |
| 378 | unsigned char *c1, *c2; | 305 | unsigned char *c1, *c2; |
| 379 | int len, *charset; | 306 | int len, *charset; |
| @@ -399,8 +326,8 @@ char_printable_p (c) | |||
| 399 | 326 | ||
| 400 | if (SINGLE_BYTE_CHAR_P (c)) | 327 | if (SINGLE_BYTE_CHAR_P (c)) |
| 401 | return 1; | 328 | return 1; |
| 402 | if (c >= MIN_CHAR_COMPOSITION) | 329 | if (c >= MAX_CHAR) |
| 403 | return (c < MAX_CHAR); | 330 | return 0; |
| 404 | 331 | ||
| 405 | SPLIT_NON_ASCII_CHAR (c, charset, c1, c2); | 332 | SPLIT_NON_ASCII_CHAR (c, charset, c1, c2); |
| 406 | if (! CHARSET_DEFINED_P (charset)) | 333 | if (! CHARSET_DEFINED_P (charset)) |
| @@ -835,20 +762,15 @@ CHARSET should be defined by `defined-charset' in advance.") | |||
| 835 | caller should allocate CHARSETS (MAX_CHARSET + 1 elements) in advance. | 762 | caller should allocate CHARSETS (MAX_CHARSET + 1 elements) in advance. |
| 836 | It may lookup a translation table TABLE if supplied. | 763 | It may lookup a translation table TABLE if supplied. |
| 837 | 764 | ||
| 838 | If CMPCHARP is nonzero and some composite character is found, | ||
| 839 | CHARSETS[128] is also set 1 and the returned number is incremented | ||
| 840 | by 1. | ||
| 841 | |||
| 842 | If MULTIBYTE is zero, do not check multibyte characters, i.e. if | 765 | If MULTIBYTE is zero, do not check multibyte characters, i.e. if |
| 843 | any ASCII codes (7-bit) are found, CHARSET[0] is set to 1, if any | 766 | any ASCII codes (7-bit) are found, CHARSET[0] is set to 1, if any |
| 844 | 8-bit codes are found CHARSET[1] is set to 1. */ | 767 | 8-bit codes are found CHARSET[1] is set to 1. */ |
| 845 | 768 | ||
| 846 | int | 769 | int |
| 847 | find_charset_in_str (str, len, charsets, table, cmpcharp, multibyte) | 770 | find_charset_in_str (str, len, charsets, table, multibyte) |
| 848 | unsigned char *str; | 771 | unsigned char *str; |
| 849 | int len, *charsets; | 772 | int len, *charsets; |
| 850 | Lisp_Object table; | 773 | Lisp_Object table; |
| 851 | int cmpcharp; | ||
| 852 | int multibyte; | 774 | int multibyte; |
| 853 | { | 775 | { |
| 854 | register int num = 0, c; | 776 | register int num = 0, c; |
| @@ -878,58 +800,15 @@ find_charset_in_str (str, len, charsets, table, cmpcharp, multibyte) | |||
| 878 | 800 | ||
| 879 | while (len > 0) | 801 | while (len > 0) |
| 880 | { | 802 | { |
| 881 | int bytes, charset; | 803 | int bytes, charset, c1, c2; |
| 882 | c = *str; | ||
| 883 | |||
| 884 | if (c == LEADING_CODE_COMPOSITION) | ||
| 885 | { | ||
| 886 | int cmpchar_id = str_cmpchar_id (str, len); | ||
| 887 | GLYPH *glyph; | ||
| 888 | 804 | ||
| 889 | if (cmpchar_id >= 0) | 805 | SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2); |
| 890 | { | ||
| 891 | struct cmpchar_info *cmp_p = cmpchar_table[cmpchar_id]; | ||
| 892 | int i; | ||
| 893 | |||
| 894 | for (i = 0; i < cmp_p->glyph_len; i++) | ||
| 895 | { | ||
| 896 | c = cmp_p->glyph[i]; | ||
| 897 | if (!NILP (table)) | ||
| 898 | { | ||
| 899 | if ((c = translate_char (table, c, 0, 0, 0)) < 0) | ||
| 900 | c = cmp_p->glyph[i]; | ||
| 901 | } | ||
| 902 | if ((charset = CHAR_CHARSET (c)) < 0) | ||
| 903 | charset = CHARSET_ASCII; | ||
| 904 | if (!charsets[charset]) | ||
| 905 | { | ||
| 906 | charsets[charset] = 1; | ||
| 907 | num += 1; | ||
| 908 | } | ||
| 909 | } | ||
| 910 | str += cmp_p->len; | ||
| 911 | len -= cmp_p->len; | ||
| 912 | if (cmpcharp && !charsets[CHARSET_COMPOSITION]) | ||
| 913 | { | ||
| 914 | charsets[CHARSET_COMPOSITION] = 1; | ||
| 915 | num += 1; | ||
| 916 | } | ||
| 917 | continue; | ||
| 918 | } | ||
| 919 | 806 | ||
| 920 | charset = 1; /* This leads to `unknown' charset. */ | 807 | if (! NILP (table)) |
| 921 | bytes = 1; | ||
| 922 | } | ||
| 923 | else | ||
| 924 | { | 808 | { |
| 925 | c = STRING_CHAR_AND_LENGTH (str, len, bytes); | 809 | int c1 = translate_char (table, -1, charset, c1, c2); |
| 926 | if (! NILP (table)) | 810 | if (c1 >= 0) |
| 927 | { | 811 | charset = CHAR_CHARSET (c); |
| 928 | int c1 = translate_char (table, c, 0, 0, 0); | ||
| 929 | if (c1 >= 0) | ||
| 930 | c = c1; | ||
| 931 | } | ||
| 932 | charset = CHAR_CHARSET (c); | ||
| 933 | } | 812 | } |
| 934 | 813 | ||
| 935 | if (!charsets[charset]) | 814 | if (!charsets[charset]) |
| @@ -947,8 +826,6 @@ DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region, | |||
| 947 | 2, 3, 0, | 826 | 2, 3, 0, |
| 948 | "Return a list of charsets in the region between BEG and END.\n\ | 827 | "Return a list of charsets in the region between BEG and END.\n\ |
| 949 | BEG and END are buffer positions.\n\ | 828 | BEG and END are buffer positions.\n\ |
| 950 | If the region contains any composite character,\n\ | ||
| 951 | `composition' is included in the returned list.\n\ | ||
| 952 | Optional arg TABLE if non-nil is a translation table to look up.\n\ | 829 | Optional arg TABLE if non-nil is a translation table to look up.\n\ |
| 953 | \n\ | 830 | \n\ |
| 954 | If the region contains invalid multiybte characters,\n\ | 831 | If the region contains invalid multiybte characters,\n\ |
| @@ -984,7 +861,7 @@ and `unknown' if any 8-bit characters are found.") | |||
| 984 | while (1) | 861 | while (1) |
| 985 | { | 862 | { |
| 986 | find_charset_in_str (BYTE_POS_ADDR (from_byte), stop_byte - from_byte, | 863 | find_charset_in_str (BYTE_POS_ADDR (from_byte), stop_byte - from_byte, |
| 987 | charsets, table, 1, multibyte); | 864 | charsets, table, multibyte); |
| 988 | if (stop < to) | 865 | if (stop < to) |
| 989 | { | 866 | { |
| 990 | from = stop, from_byte = stop_byte; | 867 | from = stop, from_byte = stop_byte; |
| @@ -999,7 +876,7 @@ and `unknown' if any 8-bit characters are found.") | |||
| 999 | for (i = (multibyte ? MAX_CHARSET : 1); i >= 0; i--) | 876 | for (i = (multibyte ? MAX_CHARSET : 1); i >= 0; i--) |
| 1000 | if (charsets[i]) | 877 | if (charsets[i]) |
| 1001 | { | 878 | { |
| 1002 | if (CHARSET_DEFINED_P (i) || i == CHARSET_COMPOSITION) | 879 | if (CHARSET_DEFINED_P (i)) |
| 1003 | val = Fcons (CHARSET_SYMBOL (i), val); | 880 | val = Fcons (CHARSET_SYMBOL (i), val); |
| 1004 | else | 881 | else |
| 1005 | undefined = 1; | 882 | undefined = 1; |
| @@ -1012,8 +889,6 @@ and `unknown' if any 8-bit characters are found.") | |||
| 1012 | DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string, | 889 | DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string, |
| 1013 | 1, 2, 0, | 890 | 1, 2, 0, |
| 1014 | "Return a list of charsets in STR.\n\ | 891 | "Return a list of charsets in STR.\n\ |
| 1015 | If the string contains any composite characters,\n\ | ||
| 1016 | `composition' is included in the returned list.\n\ | ||
| 1017 | Optional arg TABLE if non-nil is a translation table to look up.\n\ | 892 | Optional arg TABLE if non-nil is a translation table to look up.\n\ |
| 1018 | \n\ | 893 | \n\ |
| 1019 | If the region contains invalid multiybte characters,\n\ | 894 | If the region contains invalid multiybte characters,\n\ |
| @@ -1036,13 +911,13 @@ and `unknown' if any 8-bit characters are found.") | |||
| 1036 | 911 | ||
| 1037 | bzero (charsets, (MAX_CHARSET + 1) * sizeof (int)); | 912 | bzero (charsets, (MAX_CHARSET + 1) * sizeof (int)); |
| 1038 | find_charset_in_str (XSTRING (str)->data, STRING_BYTES (XSTRING (str)), | 913 | find_charset_in_str (XSTRING (str)->data, STRING_BYTES (XSTRING (str)), |
| 1039 | charsets, table, 1, multibyte); | 914 | charsets, table, multibyte); |
| 1040 | val = Qnil; | 915 | val = Qnil; |
| 1041 | undefined = 0; | 916 | undefined = 0; |
| 1042 | for (i = (multibyte ? MAX_CHARSET : 1); i >= 0; i--) | 917 | for (i = (multibyte ? MAX_CHARSET : 1); i >= 0; i--) |
| 1043 | if (charsets[i]) | 918 | if (charsets[i]) |
| 1044 | { | 919 | { |
| 1045 | if (CHARSET_DEFINED_P (i) || i == CHARSET_COMPOSITION) | 920 | if (CHARSET_DEFINED_P (i)) |
| 1046 | val = Fcons (CHARSET_SYMBOL (i), val); | 921 | val = Fcons (CHARSET_SYMBOL (i), val); |
| 1047 | else | 922 | else |
| 1048 | undefined = 1; | 923 | undefined = 1; |
| @@ -1199,10 +1074,6 @@ char_valid_p (c, genericp) | |||
| 1199 | if (SINGLE_BYTE_CHAR_P (c)) | 1074 | if (SINGLE_BYTE_CHAR_P (c)) |
| 1200 | return 1; | 1075 | return 1; |
| 1201 | SPLIT_NON_ASCII_CHAR (c, charset, c1, c2); | 1076 | SPLIT_NON_ASCII_CHAR (c, charset, c1, c2); |
| 1202 | if (charset == CHARSET_COMPOSITION) | ||
| 1203 | return ((c >= MIN_CHAR_COMPOSITION | ||
| 1204 | && c < MIN_CHAR_COMPOSITION + n_cmpchars) | ||
| 1205 | || (genericp && c == GENERIC_COMPOSITION_CHAR)); | ||
| 1206 | if (genericp) | 1077 | if (genericp) |
| 1207 | { | 1078 | { |
| 1208 | if (c1) | 1079 | if (c1) |
| @@ -1288,25 +1159,13 @@ int | |||
| 1288 | char_bytes (c) | 1159 | char_bytes (c) |
| 1289 | int c; | 1160 | int c; |
| 1290 | { | 1161 | { |
| 1291 | int bytes; | 1162 | int charset; |
| 1292 | 1163 | ||
| 1293 | if (SINGLE_BYTE_CHAR_P (c) || (c & ~GLYPH_MASK_CHAR)) | 1164 | if (SINGLE_BYTE_CHAR_P (c) || (c & ~GLYPH_MASK_CHAR)) |
| 1294 | return 1; | 1165 | return 1; |
| 1295 | 1166 | ||
| 1296 | if (COMPOSITE_CHAR_P (c)) | 1167 | charset = CHAR_CHARSET (c); |
| 1297 | { | 1168 | return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1); |
| 1298 | unsigned int id = COMPOSITE_CHAR_ID (c); | ||
| 1299 | |||
| 1300 | bytes = (id < n_cmpchars ? cmpchar_table[id]->len : 1); | ||
| 1301 | } | ||
| 1302 | else | ||
| 1303 | { | ||
| 1304 | int charset = CHAR_CHARSET (c); | ||
| 1305 | |||
| 1306 | bytes = CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1; | ||
| 1307 | } | ||
| 1308 | |||
| 1309 | return bytes; | ||
| 1310 | } | 1169 | } |
| 1311 | 1170 | ||
| 1312 | /* Return the width of character of which multi-byte form starts with | 1171 | /* Return the width of character of which multi-byte form starts with |
| @@ -1348,11 +1207,6 @@ The width is measured by how many columns it occupies on the screen.") | |||
| 1348 | XSETINT (val, XVECTOR (disp)->size); | 1207 | XSETINT (val, XVECTOR (disp)->size); |
| 1349 | else if (SINGLE_BYTE_CHAR_P (c)) | 1208 | else if (SINGLE_BYTE_CHAR_P (c)) |
| 1350 | XSETINT (val, ONE_BYTE_CHAR_WIDTH (c)); | 1209 | XSETINT (val, ONE_BYTE_CHAR_WIDTH (c)); |
| 1351 | else if (COMPOSITE_CHAR_P (c)) | ||
| 1352 | { | ||
| 1353 | int id = COMPOSITE_CHAR_ID (XFASTINT (ch)); | ||
| 1354 | XSETFASTINT (val, (id < n_cmpchars ? cmpchar_table[id]->width : 1)); | ||
| 1355 | } | ||
| 1356 | else | 1210 | else |
| 1357 | { | 1211 | { |
| 1358 | int charset = CHAR_CHARSET (c); | 1212 | int charset = CHAR_CHARSET (c); |
| @@ -1377,40 +1231,22 @@ strwidth (str, len) | |||
| 1377 | 1231 | ||
| 1378 | while (str < endp) | 1232 | while (str < endp) |
| 1379 | { | 1233 | { |
| 1380 | if (*str == LEADING_CODE_COMPOSITION) | 1234 | Lisp_Object disp; |
| 1381 | { | 1235 | int thislen; |
| 1382 | int id = str_cmpchar_id (str, endp - str); | 1236 | int c = STRING_CHAR_AND_LENGTH (str, endp - str, thislen); |
| 1383 | 1237 | ||
| 1384 | if (id < 0) | 1238 | /* Get the way the display table would display it. */ |
| 1385 | { | 1239 | if (dp) |
| 1386 | width += 4; | 1240 | disp = DISP_CHAR_VECTOR (dp, c); |
| 1387 | str++; | ||
| 1388 | } | ||
| 1389 | else | ||
| 1390 | { | ||
| 1391 | width += cmpchar_table[id]->width; | ||
| 1392 | str += cmpchar_table[id]->len; | ||
| 1393 | } | ||
| 1394 | } | ||
| 1395 | else | 1241 | else |
| 1396 | { | 1242 | disp = Qnil; |
| 1397 | Lisp_Object disp; | 1243 | |
| 1398 | int thislen; | 1244 | if (VECTORP (disp)) |
| 1399 | int c = STRING_CHAR_AND_LENGTH (str, endp - str, thislen); | 1245 | width += XVECTOR (disp)->size; |
| 1400 | 1246 | else | |
| 1401 | /* Get the way the display table would display it. */ | 1247 | width += ONE_BYTE_CHAR_WIDTH (*str); |
| 1402 | if (dp) | 1248 | |
| 1403 | disp = DISP_CHAR_VECTOR (dp, c); | 1249 | str += thislen; |
| 1404 | else | ||
| 1405 | disp = Qnil; | ||
| 1406 | |||
| 1407 | if (VECTORP (disp)) | ||
| 1408 | width += XVECTOR (disp)->size; | ||
| 1409 | else | ||
| 1410 | width += ONE_BYTE_CHAR_WIDTH (*str); | ||
| 1411 | |||
| 1412 | str += thislen; | ||
| 1413 | } | ||
| 1414 | } | 1250 | } |
| 1415 | return width; | 1251 | return width; |
| 1416 | } | 1252 | } |
| @@ -1522,24 +1358,17 @@ DEFUN ("string", Fstring, Sstring, 1, MANY, 0, | |||
| 1522 | Lisp_Object *args; | 1358 | Lisp_Object *args; |
| 1523 | { | 1359 | { |
| 1524 | int i; | 1360 | int i; |
| 1525 | unsigned char *buf | 1361 | unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n); |
| 1526 | = (unsigned char *) alloca (MAX_LENGTH_OF_MULTI_BYTE_FORM * n); | ||
| 1527 | unsigned char *p = buf; | 1362 | unsigned char *p = buf; |
| 1528 | Lisp_Object val; | 1363 | Lisp_Object val; |
| 1364 | int c; | ||
| 1529 | 1365 | ||
| 1530 | for (i = 0; i < n; i++) | 1366 | for (i = 0; i < n; i++) |
| 1531 | { | 1367 | { |
| 1532 | int c, len; | ||
| 1533 | unsigned char *str; | ||
| 1534 | |||
| 1535 | if (!INTEGERP (args[i])) | 1368 | if (!INTEGERP (args[i])) |
| 1536 | CHECK_NUMBER (args[i], 0); | 1369 | CHECK_NUMBER (args[i], 0); |
| 1537 | c = XINT (args[i]); | 1370 | c = XINT (args[i]); |
| 1538 | len = CHAR_STRING (c, p, str); | 1371 | p += CHAR_STRING (c, p); |
| 1539 | if (p != str) | ||
| 1540 | /* C is a composite character. */ | ||
| 1541 | bcopy (str, p, len); | ||
| 1542 | p += len; | ||
| 1543 | } | 1372 | } |
| 1544 | 1373 | ||
| 1545 | /* Here, we can't use make_string_from_bytes because of byte | 1374 | /* Here, we can't use make_string_from_bytes because of byte |
| @@ -1550,442 +1379,6 @@ DEFUN ("string", Fstring, Sstring, 1, MANY, 0, | |||
| 1550 | 1379 | ||
| 1551 | #endif /* emacs */ | 1380 | #endif /* emacs */ |
| 1552 | 1381 | ||
| 1553 | /*** Composite characters staffs ***/ | ||
| 1554 | |||
| 1555 | /* Each composite character is identified by CMPCHAR-ID which is | ||
| 1556 | assigned when Emacs needs the character code of the composite | ||
| 1557 | character (e.g. when displaying it on the screen). See the | ||
| 1558 | document "GENERAL NOTE on COMPOSITE CHARACTER" in `charset.h' how a | ||
| 1559 | composite character is represented in Emacs. */ | ||
| 1560 | |||
| 1561 | /* If `static' is defined, it means that it is defined to null string. */ | ||
| 1562 | #ifndef static | ||
| 1563 | /* The following function is copied from lread.c. */ | ||
| 1564 | static int | ||
| 1565 | hash_string (ptr, len) | ||
| 1566 | unsigned char *ptr; | ||
| 1567 | int len; | ||
| 1568 | { | ||
| 1569 | register unsigned char *p = ptr; | ||
| 1570 | register unsigned char *end = p + len; | ||
| 1571 | register unsigned char c; | ||
| 1572 | register int hash = 0; | ||
| 1573 | |||
| 1574 | while (p != end) | ||
| 1575 | { | ||
| 1576 | c = *p++; | ||
| 1577 | if (c >= 0140) c -= 40; | ||
| 1578 | hash = ((hash<<3) + (hash>>28) + c); | ||
| 1579 | } | ||
| 1580 | return hash & 07777777777; | ||
| 1581 | } | ||
| 1582 | #endif | ||
| 1583 | |||
| 1584 | #define CMPCHAR_HASH_TABLE_SIZE 0xFFF | ||
| 1585 | |||
| 1586 | static int *cmpchar_hash_table[CMPCHAR_HASH_TABLE_SIZE]; | ||
| 1587 | |||
| 1588 | /* Each element of `cmpchar_hash_table' is a pointer to an array of | ||
| 1589 | integer, where the 1st element is the size of the array, the 2nd | ||
| 1590 | element is how many elements are actually used in the array, and | ||
| 1591 | the remaining elements are CMPCHAR-IDs of composite characters of | ||
| 1592 | the same hash value. */ | ||
| 1593 | #define CMPCHAR_HASH_SIZE(table) table[0] | ||
| 1594 | #define CMPCHAR_HASH_USED(table) table[1] | ||
| 1595 | #define CMPCHAR_HASH_CMPCHAR_ID(table, i) table[i] | ||
| 1596 | |||
| 1597 | /* Return CMPCHAR-ID of the composite character in STR of the length | ||
| 1598 | LEN. If the composite character has not yet been registered, | ||
| 1599 | register it in `cmpchar_table' and assign new CMPCHAR-ID. This | ||
| 1600 | is the sole function for assigning CMPCHAR-ID. */ | ||
| 1601 | int | ||
| 1602 | str_cmpchar_id (str, len) | ||
| 1603 | const unsigned char *str; | ||
| 1604 | int len; | ||
| 1605 | { | ||
| 1606 | int hash_idx, *hashp; | ||
| 1607 | unsigned char *buf; | ||
| 1608 | int embedded_rule; /* 1 if composition rule is embedded. */ | ||
| 1609 | int chars; /* number of components. */ | ||
| 1610 | int i; | ||
| 1611 | struct cmpchar_info *cmpcharp; | ||
| 1612 | |||
| 1613 | /* The second byte 0xFF means COMPOSITION rule is embedded. */ | ||
| 1614 | embedded_rule = (str[1] == 0xFF); | ||
| 1615 | |||
| 1616 | /* At first, get the actual length of the composite character. */ | ||
| 1617 | { | ||
| 1618 | const unsigned char *p, *endp = str + 1, *lastp = str + len; | ||
| 1619 | int bytes; | ||
| 1620 | |||
| 1621 | while (endp < lastp && ! CHAR_HEAD_P (*endp)) endp++; | ||
| 1622 | if (endp - str < 5) | ||
| 1623 | /* Any composite char have at least 5-byte length. */ | ||
| 1624 | return -1; | ||
| 1625 | |||
| 1626 | chars = 0; | ||
| 1627 | p = str + 1; | ||
| 1628 | while (p < endp) | ||
| 1629 | { | ||
| 1630 | if (embedded_rule) | ||
| 1631 | { | ||
| 1632 | p++; | ||
| 1633 | if (p >= endp) | ||
| 1634 | return -1; | ||
| 1635 | } | ||
| 1636 | /* No need of checking if *P is 0xA0 because | ||
| 1637 | BYTES_BY_CHAR_HEAD (0x80) surely returns 2. */ | ||
| 1638 | p += BYTES_BY_CHAR_HEAD (*p - 0x20); | ||
| 1639 | chars++; | ||
| 1640 | } | ||
| 1641 | if (p > endp || chars < 2 || chars > MAX_COMPONENT_COUNT) | ||
| 1642 | /* Invalid components. */ | ||
| 1643 | return -1; | ||
| 1644 | len = p - str; | ||
| 1645 | } | ||
| 1646 | hash_idx = hash_string (str, len) % CMPCHAR_HASH_TABLE_SIZE; | ||
| 1647 | hashp = cmpchar_hash_table[hash_idx]; | ||
| 1648 | |||
| 1649 | /* Then, look into the hash table. */ | ||
| 1650 | if (hashp != NULL) | ||
| 1651 | /* Find the correct one among composite characters of the same | ||
| 1652 | hash value. */ | ||
| 1653 | for (i = 2; i < CMPCHAR_HASH_USED (hashp); i++) | ||
| 1654 | { | ||
| 1655 | cmpcharp = cmpchar_table[CMPCHAR_HASH_CMPCHAR_ID (hashp, i)]; | ||
| 1656 | if (len == cmpcharp->len | ||
| 1657 | && ! bcmp (str, cmpcharp->data, len)) | ||
| 1658 | return CMPCHAR_HASH_CMPCHAR_ID (hashp, i); | ||
| 1659 | } | ||
| 1660 | |||
| 1661 | /* We have to register the composite character in cmpchar_table. */ | ||
| 1662 | if (n_cmpchars >= (CHAR_FIELD2_MASK | CHAR_FIELD3_MASK)) | ||
| 1663 | /* No, we have no more room for a new composite character. */ | ||
| 1664 | return -1; | ||
| 1665 | |||
| 1666 | /* Make the entry in hash table. */ | ||
| 1667 | if (hashp == NULL) | ||
| 1668 | { | ||
| 1669 | /* Make a table for 8 composite characters initially. */ | ||
| 1670 | hashp = (cmpchar_hash_table[hash_idx] | ||
| 1671 | = (int *) xmalloc (sizeof (int) * (2 + 8))); | ||
| 1672 | CMPCHAR_HASH_SIZE (hashp) = 10; | ||
| 1673 | CMPCHAR_HASH_USED (hashp) = 2; | ||
| 1674 | } | ||
| 1675 | else if (CMPCHAR_HASH_USED (hashp) >= CMPCHAR_HASH_SIZE (hashp)) | ||
| 1676 | { | ||
| 1677 | CMPCHAR_HASH_SIZE (hashp) += 8; | ||
| 1678 | hashp = (cmpchar_hash_table[hash_idx] | ||
| 1679 | = (int *) xrealloc (hashp, | ||
| 1680 | sizeof (int) * CMPCHAR_HASH_SIZE (hashp))); | ||
| 1681 | } | ||
| 1682 | CMPCHAR_HASH_CMPCHAR_ID (hashp, CMPCHAR_HASH_USED (hashp)) = n_cmpchars; | ||
| 1683 | CMPCHAR_HASH_USED (hashp)++; | ||
| 1684 | |||
| 1685 | /* Set information of the composite character in cmpchar_table. */ | ||
| 1686 | if (cmpchar_table_size == 0) | ||
| 1687 | { | ||
| 1688 | /* This is the first composite character to be registered. */ | ||
| 1689 | cmpchar_table_size = 256; | ||
| 1690 | cmpchar_table | ||
| 1691 | = (struct cmpchar_info **) xmalloc (sizeof (cmpchar_table[0]) | ||
| 1692 | * cmpchar_table_size); | ||
| 1693 | } | ||
| 1694 | else if (cmpchar_table_size <= n_cmpchars) | ||
| 1695 | { | ||
| 1696 | cmpchar_table_size += 256; | ||
| 1697 | cmpchar_table | ||
| 1698 | = (struct cmpchar_info **) xrealloc (cmpchar_table, | ||
| 1699 | sizeof (cmpchar_table[0]) | ||
| 1700 | * cmpchar_table_size); | ||
| 1701 | } | ||
| 1702 | |||
| 1703 | cmpcharp = (struct cmpchar_info *) xmalloc (sizeof (struct cmpchar_info)); | ||
| 1704 | |||
| 1705 | cmpcharp->len = len; | ||
| 1706 | cmpcharp->data = (unsigned char *) xmalloc (len + 1); | ||
| 1707 | bcopy (str, cmpcharp->data, len); | ||
| 1708 | cmpcharp->data[len] = 0; | ||
| 1709 | cmpcharp->glyph_len = chars; | ||
| 1710 | cmpcharp->glyph = (GLYPH *) xmalloc (sizeof (GLYPH) * chars); | ||
| 1711 | if (embedded_rule) | ||
| 1712 | { | ||
| 1713 | cmpcharp->cmp_rule = (unsigned char *) xmalloc (chars); | ||
| 1714 | cmpcharp->col_offset = (float *) xmalloc (sizeof (float) * chars); | ||
| 1715 | } | ||
| 1716 | else | ||
| 1717 | { | ||
| 1718 | cmpcharp->cmp_rule = NULL; | ||
| 1719 | cmpcharp->col_offset = NULL; | ||
| 1720 | } | ||
| 1721 | |||
| 1722 | /* Setup GLYPH data and composition rules (if any) so as not to make | ||
| 1723 | them every time on displaying. */ | ||
| 1724 | { | ||
| 1725 | unsigned char *bufp; | ||
| 1726 | int width; | ||
| 1727 | float leftmost = 0.0, rightmost = 1.0; | ||
| 1728 | |||
| 1729 | if (embedded_rule) | ||
| 1730 | /* At first, col_offset[N] is set to relative to col_offset[0]. */ | ||
| 1731 | cmpcharp->col_offset[0] = 0; | ||
| 1732 | |||
| 1733 | for (i = 0, bufp = cmpcharp->data + 1; i < chars; i++) | ||
| 1734 | { | ||
| 1735 | if (embedded_rule) | ||
| 1736 | cmpcharp->cmp_rule[i] = *bufp++; | ||
| 1737 | |||
| 1738 | if (*bufp == 0xA0) /* This is an ASCII character. */ | ||
| 1739 | { | ||
| 1740 | cmpcharp->glyph[i] = FAST_MAKE_GLYPH ((*++bufp & 0x7F), 0); | ||
| 1741 | width = 1; | ||
| 1742 | bufp++; | ||
| 1743 | } | ||
| 1744 | else /* Multibyte character. */ | ||
| 1745 | { | ||
| 1746 | /* Make `bufp' point normal multi-byte form temporally. */ | ||
| 1747 | *bufp -= 0x20; | ||
| 1748 | cmpcharp->glyph[i] | ||
| 1749 | = FAST_MAKE_GLYPH (string_to_non_ascii_char (bufp, 4, 0), 0); | ||
| 1750 | width = WIDTH_BY_CHAR_HEAD (*bufp); | ||
| 1751 | *bufp += 0x20; | ||
| 1752 | bufp += BYTES_BY_CHAR_HEAD (*bufp - 0x20); | ||
| 1753 | } | ||
| 1754 | |||
| 1755 | if (embedded_rule && i > 0) | ||
| 1756 | { | ||
| 1757 | /* Reference points (global_ref and new_ref) are | ||
| 1758 | encoded as below: | ||
| 1759 | |||
| 1760 | 0--1--2 -- ascent | ||
| 1761 | | | | ||
| 1762 | | | | ||
| 1763 | | 4 -+--- center | ||
| 1764 | -- 3 5 -- baseline | ||
| 1765 | | | | ||
| 1766 | 6--7--8 -- descent | ||
| 1767 | |||
| 1768 | Now, we calculate the column offset of the new glyph | ||
| 1769 | from the left edge of the first glyph. This can avoid | ||
| 1770 | the same calculation everytime displaying this | ||
| 1771 | composite character. */ | ||
| 1772 | |||
| 1773 | /* Reference points of global glyph and new glyph. */ | ||
| 1774 | int global_ref = (cmpcharp->cmp_rule[i] - 0xA0) / 9; | ||
| 1775 | int new_ref = (cmpcharp->cmp_rule[i] - 0xA0) % 9; | ||
| 1776 | /* Column offset relative to the first glyph. */ | ||
| 1777 | float left = (leftmost | ||
| 1778 | + (global_ref % 3) * (rightmost - leftmost) / 2.0 | ||
| 1779 | - (new_ref % 3) * width / 2.0); | ||
| 1780 | |||
| 1781 | cmpcharp->col_offset[i] = left; | ||
| 1782 | if (left < leftmost) | ||
| 1783 | leftmost = left; | ||
| 1784 | if (left + width > rightmost) | ||
| 1785 | rightmost = left + width; | ||
| 1786 | } | ||
| 1787 | else | ||
| 1788 | { | ||
| 1789 | if (width > rightmost) | ||
| 1790 | rightmost = width; | ||
| 1791 | } | ||
| 1792 | } | ||
| 1793 | if (embedded_rule) | ||
| 1794 | { | ||
| 1795 | /* Now col_offset[N] are relative to the left edge of the | ||
| 1796 | first component. Make them relative to the left edge of | ||
| 1797 | overall glyph. */ | ||
| 1798 | for (i = 0; i < chars; i++) | ||
| 1799 | cmpcharp->col_offset[i] -= leftmost; | ||
| 1800 | /* Make rightmost holds width of overall glyph. */ | ||
| 1801 | rightmost -= leftmost; | ||
| 1802 | } | ||
| 1803 | |||
| 1804 | cmpcharp->width = rightmost; | ||
| 1805 | if (cmpcharp->width < rightmost) | ||
| 1806 | /* To get a ceiling integer value. */ | ||
| 1807 | cmpcharp->width++; | ||
| 1808 | } | ||
| 1809 | |||
| 1810 | cmpchar_table[n_cmpchars] = cmpcharp; | ||
| 1811 | |||
| 1812 | return n_cmpchars++; | ||
| 1813 | } | ||
| 1814 | |||
| 1815 | /* Return the Nth element of the composite character C. If NOERROR is | ||
| 1816 | nonzero, return 0 on error condition (C is an invalid composite | ||
| 1817 | charcter, or N is out of range). */ | ||
| 1818 | int | ||
| 1819 | cmpchar_component (c, n, noerror) | ||
| 1820 | int c, n, noerror; | ||
| 1821 | { | ||
| 1822 | int id = COMPOSITE_CHAR_ID (c); | ||
| 1823 | |||
| 1824 | if (id < 0 || id >= n_cmpchars) | ||
| 1825 | { | ||
| 1826 | /* C is not a valid composite character. */ | ||
| 1827 | if (noerror) return 0; | ||
| 1828 | error ("Invalid composite character: %d", c) ; | ||
| 1829 | } | ||
| 1830 | if (n >= cmpchar_table[id]->glyph_len) | ||
| 1831 | { | ||
| 1832 | /* No such component. */ | ||
| 1833 | if (noerror) return 0; | ||
| 1834 | args_out_of_range (make_number (c), make_number (n)); | ||
| 1835 | } | ||
| 1836 | /* No face data is stored in glyph code. */ | ||
| 1837 | return ((int) (cmpchar_table[id]->glyph[n])); | ||
| 1838 | } | ||
| 1839 | |||
| 1840 | DEFUN ("cmpcharp", Fcmpcharp, Scmpcharp, 1, 1, 0, | ||
| 1841 | "T if CHAR is a composite character.") | ||
| 1842 | (ch) | ||
| 1843 | Lisp_Object ch; | ||
| 1844 | { | ||
| 1845 | CHECK_NUMBER (ch, 0); | ||
| 1846 | return (COMPOSITE_CHAR_P (XINT (ch)) ? Qt : Qnil); | ||
| 1847 | } | ||
| 1848 | |||
| 1849 | DEFUN ("composite-char-component", Fcmpchar_component, Scmpchar_component, | ||
| 1850 | 2, 2, 0, | ||
| 1851 | "Return the Nth component character of composite character CHARACTER.") | ||
| 1852 | (character, n) | ||
| 1853 | Lisp_Object character, n; | ||
| 1854 | { | ||
| 1855 | int id; | ||
| 1856 | |||
| 1857 | CHECK_NUMBER (character, 0); | ||
| 1858 | CHECK_NUMBER (n, 1); | ||
| 1859 | |||
| 1860 | return (make_number (cmpchar_component (XINT (character), XINT (n), 0))); | ||
| 1861 | } | ||
| 1862 | |||
| 1863 | DEFUN ("composite-char-composition-rule", Fcmpchar_cmp_rule, Scmpchar_cmp_rule, | ||
| 1864 | 2, 2, 0, | ||
| 1865 | "Return the Nth composition rule of composite character CHARACTER.\n\ | ||
| 1866 | The returned rule is for composing the Nth component\n\ | ||
| 1867 | on the (N-1)th component.\n\ | ||
| 1868 | If CHARACTER should be composed relatively or N is 0, return 255.") | ||
| 1869 | (character, n) | ||
| 1870 | Lisp_Object character, n; | ||
| 1871 | { | ||
| 1872 | int id; | ||
| 1873 | |||
| 1874 | CHECK_NUMBER (character, 0); | ||
| 1875 | CHECK_NUMBER (n, 1); | ||
| 1876 | |||
| 1877 | id = COMPOSITE_CHAR_ID (XINT (character)); | ||
| 1878 | if (id < 0 || id >= n_cmpchars) | ||
| 1879 | error ("Invalid composite character: %d", XINT (character)); | ||
| 1880 | if (XINT (n) < 0 || XINT (n) >= cmpchar_table[id]->glyph_len) | ||
| 1881 | args_out_of_range (character, n); | ||
| 1882 | |||
| 1883 | return make_number (cmpchar_table[id]->cmp_rule | ||
| 1884 | ? cmpchar_table[id]->cmp_rule[XINT (n)] | ||
| 1885 | : 255); | ||
| 1886 | } | ||
| 1887 | |||
| 1888 | DEFUN ("composite-char-composition-rule-p", Fcmpchar_cmp_rule_p, | ||
| 1889 | Scmpchar_cmp_rule_p, 1, 1, 0, | ||
| 1890 | "Return non-nil if composite character CHARACTER contains a embedded rule.") | ||
| 1891 | (character) | ||
| 1892 | Lisp_Object character; | ||
| 1893 | { | ||
| 1894 | int id; | ||
| 1895 | |||
| 1896 | CHECK_NUMBER (character, 0); | ||
| 1897 | id = COMPOSITE_CHAR_ID (XINT (character)); | ||
| 1898 | if (id < 0 || id >= n_cmpchars) | ||
| 1899 | error ("Invalid composite character: %d", XINT (character)); | ||
| 1900 | |||
| 1901 | return (cmpchar_table[id]->cmp_rule ? Qt : Qnil); | ||
| 1902 | } | ||
| 1903 | |||
| 1904 | DEFUN ("composite-char-component-count", Fcmpchar_cmp_count, | ||
| 1905 | Scmpchar_cmp_count, 1, 1, 0, | ||
| 1906 | "Return number of compoents of composite character CHARACTER.") | ||
| 1907 | (character) | ||
| 1908 | Lisp_Object character; | ||
| 1909 | { | ||
| 1910 | int id; | ||
| 1911 | |||
| 1912 | CHECK_NUMBER (character, 0); | ||
| 1913 | id = COMPOSITE_CHAR_ID (XINT (character)); | ||
| 1914 | if (id < 0 || id >= n_cmpchars) | ||
| 1915 | error ("Invalid composite character: %d", XINT (character)); | ||
| 1916 | |||
| 1917 | return (make_number (cmpchar_table[id]->glyph_len)); | ||
| 1918 | } | ||
| 1919 | |||
| 1920 | DEFUN ("compose-string", Fcompose_string, Scompose_string, | ||
| 1921 | 1, 1, 0, | ||
| 1922 | "Return one char string composed from all characters in STRING.") | ||
| 1923 | (str) | ||
| 1924 | Lisp_Object str; | ||
| 1925 | { | ||
| 1926 | unsigned char buf[MAX_LENGTH_OF_MULTI_BYTE_FORM], *p, *pend, *ptemp; | ||
| 1927 | int len, i; | ||
| 1928 | |||
| 1929 | CHECK_STRING (str, 0); | ||
| 1930 | |||
| 1931 | buf[0] = LEADING_CODE_COMPOSITION; | ||
| 1932 | p = XSTRING (str)->data; | ||
| 1933 | pend = p + STRING_BYTES (XSTRING (str)); | ||
| 1934 | i = 1; | ||
| 1935 | while (p < pend) | ||
| 1936 | { | ||
| 1937 | if (*p < 0x20) /* control code */ | ||
| 1938 | error ("Invalid component character: %d", *p); | ||
| 1939 | else if (*p < 0x80) /* ASCII */ | ||
| 1940 | { | ||
| 1941 | if (i + 2 >= MAX_LENGTH_OF_MULTI_BYTE_FORM) | ||
| 1942 | error ("Too long string to be composed: %s", XSTRING (str)->data); | ||
| 1943 | /* Prepend an ASCII charset indicator 0xA0, set MSB of the | ||
| 1944 | code itself. */ | ||
| 1945 | buf[i++] = 0xA0; | ||
| 1946 | buf[i++] = *p++ + 0x80; | ||
| 1947 | } | ||
| 1948 | else if (*p == LEADING_CODE_COMPOSITION) /* composite char */ | ||
| 1949 | { | ||
| 1950 | /* Already composed. Eliminate the heading | ||
| 1951 | LEADING_CODE_COMPOSITION, keep the remaining bytes | ||
| 1952 | unchanged. */ | ||
| 1953 | p++; | ||
| 1954 | if (*p == 255) | ||
| 1955 | error ("Can't compose a rule-based composition character"); | ||
| 1956 | ptemp = p; | ||
| 1957 | while (! CHAR_HEAD_P (*p)) p++; | ||
| 1958 | if (str_cmpchar_id (ptemp - 1, p - ptemp + 1) < 0) | ||
| 1959 | error ("Can't compose an invalid composition character"); | ||
| 1960 | if (i + (p - ptemp) >= MAX_LENGTH_OF_MULTI_BYTE_FORM) | ||
| 1961 | error ("Too long string to be composed: %s", XSTRING (str)->data); | ||
| 1962 | bcopy (ptemp, buf + i, p - ptemp); | ||
| 1963 | i += p - ptemp; | ||
| 1964 | } | ||
| 1965 | else /* multibyte char */ | ||
| 1966 | { | ||
| 1967 | /* Add 0x20 to the base leading-code, keep the remaining | ||
| 1968 | bytes unchanged. */ | ||
| 1969 | int c = STRING_CHAR_AND_LENGTH (p, pend - p, len); | ||
| 1970 | |||
| 1971 | if (len <= 1 || ! CHAR_VALID_P (c, 0)) | ||
| 1972 | error ("Can't compose an invalid character"); | ||
| 1973 | if (i + len >= MAX_LENGTH_OF_MULTI_BYTE_FORM) | ||
| 1974 | error ("Too long string to be composed: %s", XSTRING (str)->data); | ||
| 1975 | bcopy (p, buf + i, len); | ||
| 1976 | buf[i] += 0x20; | ||
| 1977 | p += len, i += len; | ||
| 1978 | } | ||
| 1979 | } | ||
| 1980 | |||
| 1981 | if (i < 5) | ||
| 1982 | /* STR contains only one character, which can't be composed. */ | ||
| 1983 | error ("Too short string to be composed: %s", XSTRING (str)->data); | ||
| 1984 | |||
| 1985 | return make_string_from_bytes (buf, 1, i); | ||
| 1986 | } | ||
| 1987 | |||
| 1988 | |||
| 1989 | int | 1382 | int |
| 1990 | charset_id_internal (charset_name) | 1383 | charset_id_internal (charset_name) |
| 1991 | char *charset_name; | 1384 | char *charset_name; |
| @@ -2047,9 +1440,6 @@ init_charset_once () | |||
| 2047 | for (k = 0; k < 128; k++) | 1440 | for (k = 0; k < 128; k++) |
| 2048 | iso_charset_table [i][j][k] = -1; | 1441 | iso_charset_table [i][j][k] = -1; |
| 2049 | 1442 | ||
| 2050 | bzero (cmpchar_hash_table, sizeof cmpchar_hash_table); | ||
| 2051 | cmpchar_table_size = n_cmpchars = 0; | ||
| 2052 | |||
| 2053 | for (i = 0; i < 256; i++) | 1443 | for (i = 0; i < 256; i++) |
| 2054 | BYTES_BY_CHAR_HEAD (i) = 1; | 1444 | BYTES_BY_CHAR_HEAD (i) = 1; |
| 2055 | for (i = MIN_CHARSET_OFFICIAL_DIMENSION1; | 1445 | for (i = MIN_CHARSET_OFFICIAL_DIMENSION1; |
| @@ -2064,7 +1454,7 @@ init_charset_once () | |||
| 2064 | BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 4; | 1454 | BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 4; |
| 2065 | /* The followings don't reflect the actual bytes, but just to tell | 1455 | /* The followings don't reflect the actual bytes, but just to tell |
| 2066 | that it is a start of a multibyte character. */ | 1456 | that it is a start of a multibyte character. */ |
| 2067 | BYTES_BY_CHAR_HEAD (LEADING_CODE_COMPOSITION) = 2; | 1457 | BYTES_BY_CHAR_HEAD (0x80) = 2; |
| 2068 | BYTES_BY_CHAR_HEAD (0x9E) = 2; | 1458 | BYTES_BY_CHAR_HEAD (0x9E) = 2; |
| 2069 | BYTES_BY_CHAR_HEAD (0x9F) = 2; | 1459 | BYTES_BY_CHAR_HEAD (0x9F) = 2; |
| 2070 | 1460 | ||
| @@ -2089,7 +1479,6 @@ init_charset_once () | |||
| 2089 | val = Fcons (make_number ((i - 0x70) << 7), val); | 1479 | val = Fcons (make_number ((i - 0x70) << 7), val); |
| 2090 | for (; i < 0xFF; i++) | 1480 | for (; i < 0xFF; i++) |
| 2091 | val = Fcons (make_number ((i - 0xE0) << 14), val); | 1481 | val = Fcons (make_number ((i - 0xE0) << 14), val); |
| 2092 | val = Fcons (make_number (GENERIC_COMPOSITION_CHAR), val); | ||
| 2093 | Vgeneric_character_list = Fnreverse (val); | 1482 | Vgeneric_character_list = Fnreverse (val); |
| 2094 | } | 1483 | } |
| 2095 | 1484 | ||
| @@ -2121,10 +1510,6 @@ syms_of_charset () | |||
| 2121 | CHARSET_SYMBOL (CHARSET_ASCII) = Qascii; | 1510 | CHARSET_SYMBOL (CHARSET_ASCII) = Qascii; |
| 2122 | Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII)); | 1511 | Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII)); |
| 2123 | 1512 | ||
| 2124 | Qcomposition = intern ("composition"); | ||
| 2125 | staticpro (&Qcomposition); | ||
| 2126 | CHARSET_SYMBOL (CHARSET_COMPOSITION) = Qcomposition; | ||
| 2127 | |||
| 2128 | Qauto_fill_chars = intern ("auto-fill-chars"); | 1513 | Qauto_fill_chars = intern ("auto-fill-chars"); |
| 2129 | staticpro (&Qauto_fill_chars); | 1514 | staticpro (&Qauto_fill_chars); |
| 2130 | Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0)); | 1515 | Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0)); |
| @@ -2149,12 +1534,6 @@ syms_of_charset () | |||
| 2149 | defsubr (&Schar_direction); | 1534 | defsubr (&Schar_direction); |
| 2150 | defsubr (&Schars_in_region); | 1535 | defsubr (&Schars_in_region); |
| 2151 | defsubr (&Sstring); | 1536 | defsubr (&Sstring); |
| 2152 | defsubr (&Scmpcharp); | ||
| 2153 | defsubr (&Scmpchar_component); | ||
| 2154 | defsubr (&Scmpchar_cmp_rule); | ||
| 2155 | defsubr (&Scmpchar_cmp_rule_p); | ||
| 2156 | defsubr (&Scmpchar_cmp_count); | ||
| 2157 | defsubr (&Scompose_string); | ||
| 2158 | defsubr (&Ssetup_special_charsets); | 1537 | defsubr (&Ssetup_special_charsets); |
| 2159 | 1538 | ||
| 2160 | DEFVAR_LISP ("charset-list", &Vcharset_list, | 1539 | DEFVAR_LISP ("charset-list", &Vcharset_list, |
| @@ -2166,10 +1545,6 @@ syms_of_charset () | |||
| 2166 | An ID of a translation table is an index of this vector."); | 1545 | An ID of a translation table is an index of this vector."); |
| 2167 | Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil); | 1546 | Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil); |
| 2168 | 1547 | ||
| 2169 | DEFVAR_INT ("leading-code-composition", &leading_code_composition, | ||
| 2170 | "Leading-code of composite characters."); | ||
| 2171 | leading_code_composition = LEADING_CODE_COMPOSITION; | ||
| 2172 | |||
| 2173 | DEFVAR_INT ("leading-code-private-11", &leading_code_private_11, | 1548 | DEFVAR_INT ("leading-code-private-11", &leading_code_private_11, |
| 2174 | "Leading-code of private TYPE9N charset of column-width 1."); | 1549 | "Leading-code of private TYPE9N charset of column-width 1."); |
| 2175 | leading_code_private_11 = LEADING_CODE_PRIVATE_11; | 1550 | leading_code_private_11 = LEADING_CODE_PRIVATE_11; |
| @@ -2208,10 +1583,6 @@ If this is nil, `nonascii-insert-offset' is used instead.\n\ | |||
| 2208 | See also the docstring of `make-translation-table'."); | 1583 | See also the docstring of `make-translation-table'."); |
| 2209 | Vnonascii_translation_table = Qnil; | 1584 | Vnonascii_translation_table = Qnil; |
| 2210 | 1585 | ||
| 2211 | DEFVAR_INT ("min-composite-char", &min_composite_char, | ||
| 2212 | "Minimum character code of a composite character."); | ||
| 2213 | min_composite_char = MIN_CHAR_COMPOSITION; | ||
| 2214 | |||
| 2215 | DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars, | 1586 | DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars, |
| 2216 | "A char-table for characters which invoke auto-filling.\n\ | 1587 | "A char-table for characters which invoke auto-filling.\n\ |
| 2217 | Such characters has value t in this table."); | 1588 | Such characters has value t in this table."); |