diff options
| author | Kenichi Handa | 1999-12-15 00:04:14 +0000 |
|---|---|---|
| committer | Kenichi Handa | 1999-12-15 00:04:14 +0000 |
| commit | f49b37c9a3ab3b8d4cee577dbe3b224e648a6c85 (patch) | |
| tree | cac527955f26dd3dbee3617f985aeec1d7025132 /src | |
| parent | 6982083b65de7526b4ff88815524addbbbceee80 (diff) | |
| download | emacs-f49b37c9a3ab3b8d4cee577dbe3b224e648a6c85.tar.gz emacs-f49b37c9a3ab3b8d4cee577dbe3b224e648a6c85.zip | |
In this entry, just `Modified' means that codes for a
composite character is deleted.
(LEADING_CODE_COMPOSITION) (CHARSET_COMPOSITION)
(charset_composition) (MIN_CHAR_COMPOSITION)
(MAX_CHAR_COMPOSITION) (GENERIC_COMPOSITION_CHAR)
(COMPOSITE_CHAR_P) (MAKE_COMPOSITE_CHAR) (COMPOSITE_CHAR_ID)
(PARSE_COMPOSITE_SEQ) (PARSE_CHARACTER_SEQ): Deleted.
(MAX_CHAR) (CHARSET_VALID_P) (CHARSET_DEFINED_P) (CHARSET_AT)
(FIRST_CHARSET_AT) (SAME_CHARSET_P) (MAKE_NON_ASCII_CHAR)
(PARSE_MULTIBYTE_SEQ) (SPLIT_NON_ASCII_CHAR) (CHAR_PRINTABLE_P):
Modified.
(SPLIT_STRING): Call split_string, not split_non_ascii_string.
(CHAR_STRING): Delete WORKBUF argument. Call char_string, not
non_ascii_char_to_string.
(STRING_CHAR): Call string_to_char, not string_to_non_ascii_char.
(STRING_CHAR_AND_LENGTH): Likewise.
(FETCH_CHAR_ADVANCE): New macro.
(MAX_COMPONENT_COUNT) (struct cmpchar_info): Deleted.
(MAX_MULTIBYTE_LENGTH): New macro.
(MAX_LENGTH_OF_MULTI_BYTE_FORM): Deleted.
(find_charset_in_str): Argument adjusted.
(CHAR_LEN): Modified.
Diffstat (limited to 'src')
| -rw-r--r-- | src/charset.h | 361 |
1 files changed, 106 insertions, 255 deletions
diff --git a/src/charset.h b/src/charset.h index ed5b3a7d2cf..c0b7e4d9eb4 100644 --- a/src/charset.h +++ b/src/charset.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* Header for multilingual character handler. | 1 | /* Header for multibyte character handler. |
| 2 | Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN. | 2 | Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN. |
| 3 | Licensed to the Free Software Foundation. | 3 | Licensed to the Free Software Foundation. |
| 4 | 4 | ||
| @@ -27,21 +27,20 @@ Boston, MA 02111-1307, USA. */ | |||
| 27 | A character set ("charset" hereafter) is a meaningful collection | 27 | A character set ("charset" hereafter) is a meaningful collection |
| 28 | (i.e. language, culture, functionality, etc) of characters. Emacs | 28 | (i.e. language, culture, functionality, etc) of characters. Emacs |
| 29 | handles multiple charsets at once. Each charset corresponds to one | 29 | handles multiple charsets at once. Each charset corresponds to one |
| 30 | of ISO charsets (except for a special charset for composition | 30 | of ISO charsets. Emacs identifies a charset by a unique |
| 31 | characters). Emacs identifies a charset by a unique identification | 31 | identification number, whereas ISO identifies a charset by a triplet |
| 32 | number, whereas ISO identifies a charset by a triplet of DIMENSION, | 32 | of DIMENSION, CHARS and FINAL-CHAR. So, hereafter, just saying |
| 33 | CHARS and FINAL-CHAR. So, hereafter, just saying "charset" means an | 33 | "charset" means an identification number (integer value). |
| 34 | identification number (integer value). | ||
| 35 | 34 | ||
| 36 | The value range of charset is 0x00, 0x80..0xFE. There are four | 35 | The value range of charset is 0x00, 0x81..0xFE. There are four |
| 37 | kinds of charset depending on DIMENSION (1 or 2) and CHARS (94 or | 36 | kinds of charset depending on DIMENSION (1 or 2) and CHARS (94 or |
| 38 | 96). For instance, a charset of DIMENSION2_CHARS94 contains 94x94 | 37 | 96). For instance, a charset of DIMENSION2_CHARS94 contains 94x94 |
| 39 | 38 | characters. | |
| 40 | 39 | ||
| 41 | Within Emacs Lisp, a charset is treated as a symbol which has a | 40 | Within Emacs Lisp, a charset is treated as a symbol which has a |
| 42 | property `charset'. The property value is a vector containing | 41 | property `charset'. The property value is a vector containing |
| 43 | various information about the charset. For readability of C codes, | 42 | various information about the charset. For readability of C codes, |
| 44 | we use the following convention on C variable names: | 43 | we use the following convention for C variable names: |
| 45 | charset_symbol: Emacs Lisp symbol of a charset | 44 | charset_symbol: Emacs Lisp symbol of a charset |
| 46 | charset_id: Emacs Lisp integer of an identification number of a charset | 45 | charset_id: Emacs Lisp integer of an identification number of a charset |
| 47 | charset: C integer of an identification number of a charset | 46 | charset: C integer of an identification number of a charset |
| @@ -55,7 +54,7 @@ Boston, MA 02111-1307, USA. */ | |||
| 55 | character in Emacs' buffer and string. | 54 | character in Emacs' buffer and string. |
| 56 | 55 | ||
| 57 | We call a charset which has extended leading-code as "private | 56 | We call a charset which has extended leading-code as "private |
| 58 | charset" because those are mainly for a charset which is not | 57 | charset" because those are mainly for a charset which is not yet |
| 59 | registered by ISO. On the contrary, we call a charset which does | 58 | registered by ISO. On the contrary, we call a charset which does |
| 60 | not have extended leading-code as "official charset". | 59 | not have extended leading-code as "official charset". |
| 61 | 60 | ||
| @@ -65,7 +64,7 @@ Boston, MA 02111-1307, USA. */ | |||
| 65 | 0x00 official dim1 -- none -- -- none -- | 64 | 0x00 official dim1 -- none -- -- none -- |
| 66 | (ASCII) | 65 | (ASCII) |
| 67 | 0x01..0x7F --never used-- | 66 | 0x01..0x7F --never used-- |
| 68 | 0x80 COMPOSITION same as charset -- none -- | 67 | 0x80 --never used-- |
| 69 | 0x81..0x8F official dim1 same as charset -- none -- | 68 | 0x81..0x8F official dim1 same as charset -- none -- |
| 70 | 0x90..0x99 official dim2 same as charset -- none -- | 69 | 0x90..0x99 official dim2 same as charset -- none -- |
| 71 | 0x9A..0x9F --never used-- | 70 | 0x9A..0x9F --never used-- |
| @@ -80,20 +79,9 @@ Boston, MA 02111-1307, USA. */ | |||
| 80 | 0xFF --never used-- | 79 | 0xFF --never used-- |
| 81 | --------------------------------------------------------------------------- | 80 | --------------------------------------------------------------------------- |
| 82 | 81 | ||
| 83 | In the table, "COMPOSITION" means a charset for a composite | ||
| 84 | character which is a character composed from several (up to 16) | ||
| 85 | non-composite characters (components). Although a composite | ||
| 86 | character can contain components of many charsets, a composite | ||
| 87 | character itself belongs to the charset CHARSET-COMPOSITION. See | ||
| 88 | the document "GENERAL NOTE on COMPOSITE CHARACTER" below for more | ||
| 89 | detail. | ||
| 90 | |||
| 91 | */ | 82 | */ |
| 92 | 83 | ||
| 93 | /* Definition of special leading-codes. */ | 84 | /* Definition of special leading-codes. */ |
| 94 | /* Base leading-code. */ | ||
| 95 | /* Special leading-code followed by components of a composite character. */ | ||
| 96 | #define LEADING_CODE_COMPOSITION 0x80 | ||
| 97 | /* Leading-code followed by extended leading-code. */ | 85 | /* Leading-code followed by extended leading-code. */ |
| 98 | #define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */ | 86 | #define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */ |
| 99 | #define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */ | 87 | #define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */ |
| @@ -122,10 +110,8 @@ Boston, MA 02111-1307, USA. */ | |||
| 122 | 110 | ||
| 123 | /* Definition of special charsets. */ | 111 | /* Definition of special charsets. */ |
| 124 | #define CHARSET_ASCII 0 | 112 | #define CHARSET_ASCII 0 |
| 125 | #define CHARSET_COMPOSITION 0x80 | ||
| 126 | 113 | ||
| 127 | extern int charset_ascii; /* ASCII */ | 114 | extern int charset_ascii; /* ASCII */ |
| 128 | extern int charset_composition; /* for a composite character */ | ||
| 129 | extern int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */ | 115 | extern int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */ |
| 130 | extern int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */ | 116 | extern int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */ |
| 131 | extern int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */ | 117 | extern int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */ |
| @@ -194,35 +180,6 @@ extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */ | |||
| 194 | 180 | ||
| 195 | */ | 181 | */ |
| 196 | 182 | ||
| 197 | /*** GENERAL NOTE on COMPOSITE CHARACTER *** | ||
| 198 | |||
| 199 | A composite character is a character composed from several (up to | ||
| 200 | 16) non-composite characters (components). Although each component | ||
| 201 | can belong to any charset, a composite character itself belongs to | ||
| 202 | the charset `charset-composition' and is assigned a special | ||
| 203 | leading-code `LEADING_CODE_COMPOSITION' for multi-byte form. See | ||
| 204 | the document "2. Emacs internal format handlers" in `coding.c' for | ||
| 205 | more detail about multi-byte form. | ||
| 206 | |||
| 207 | A character code of composite character has special format. In the | ||
| 208 | above document, FIELD1 of a composite character is 0x1F. Each | ||
| 209 | composite character is assigned a sequential number CMPCHAR-ID. | ||
| 210 | FIELD2 and FIELD3 are combined to make 14bits field for holding | ||
| 211 | CMPCHAR-ID, which means that Emacs can handle at most 2^14 (= 16384) | ||
| 212 | composite characters at once. | ||
| 213 | |||
| 214 | ----------------------------------------------------------------------- | ||
| 215 | charset FIELD1 (5-bit) FIELD2&3 (14-bit) | ||
| 216 | ----------------------------------------------------------------------- | ||
| 217 | CHARSET-COMPOSITION 0x1F CMPCHAR-ID | ||
| 218 | ----------------------------------------------------------------------- | ||
| 219 | |||
| 220 | Emacs assigns CMPCHAR-ID to a composite character only when it | ||
| 221 | requires the character code of the composite character (e.g. while | ||
| 222 | displaying the composite character). | ||
| 223 | |||
| 224 | */ | ||
| 225 | |||
| 226 | /* Masks of each field of character code. */ | 183 | /* Masks of each field of character code. */ |
| 227 | #define CHAR_FIELD1_MASK (0x1F << 14) | 184 | #define CHAR_FIELD1_MASK (0x1F << 14) |
| 228 | #define CHAR_FIELD2_MASK (0x7F << 7) | 185 | #define CHAR_FIELD2_MASK (0x7F << 7) |
| @@ -242,17 +199,11 @@ extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */ | |||
| 242 | ((MIN_CHARSET_OFFICIAL_DIMENSION2 - 0x8F) << 14) | 199 | ((MIN_CHARSET_OFFICIAL_DIMENSION2 - 0x8F) << 14) |
| 243 | #define MIN_CHAR_PRIVATE_DIMENSION2 \ | 200 | #define MIN_CHAR_PRIVATE_DIMENSION2 \ |
| 244 | ((MIN_CHARSET_PRIVATE_DIMENSION2 - 0xE0) << 14) | 201 | ((MIN_CHARSET_PRIVATE_DIMENSION2 - 0xE0) << 14) |
| 245 | #define MIN_CHAR_COMPOSITION \ | 202 | /* Maximum character code currently used plus 1. */ |
| 246 | (0x1F << 14) | 203 | #define MAX_CHAR (0x1F << 14) |
| 247 | #define MAX_CHAR_COMPOSITION (GLYPH_MASK_CHAR - 1) | ||
| 248 | |||
| 249 | /* A generic character for composition characters. */ | ||
| 250 | #define GENERIC_COMPOSITION_CHAR (GLYPH_MASK_CHAR) | ||
| 251 | 204 | ||
| 252 | /* 1 if C is an ASCII character, else 0. */ | 205 | /* 1 if C is an ASCII character, else 0. */ |
| 253 | #define SINGLE_BYTE_CHAR_P(c) ((c) >= 0 && (c) < 0x100) | 206 | #define SINGLE_BYTE_CHAR_P(c) ((c) >= 0 && (c) < 0x100) |
| 254 | /* 1 if C is an composite character, else 0. */ | ||
| 255 | #define COMPOSITE_CHAR_P(c) ((c) >= MIN_CHAR_COMPOSITION) | ||
| 256 | 207 | ||
| 257 | /* 1 if BYTE is a character in itself, in multibyte mode. */ | 208 | /* 1 if BYTE is a character in itself, in multibyte mode. */ |
| 258 | #define ASCII_BYTE_P(byte) ((byte) < 0x80) | 209 | #define ASCII_BYTE_P(byte) ((byte) < 0x80) |
| @@ -379,10 +330,10 @@ extern Lisp_Object Vcharset_symbol_table; | |||
| 379 | /* 1 if CHARSET is in valid value range, else 0. */ | 330 | /* 1 if CHARSET is in valid value range, else 0. */ |
| 380 | #define CHARSET_VALID_P(charset) \ | 331 | #define CHARSET_VALID_P(charset) \ |
| 381 | ((charset) == 0 \ | 332 | ((charset) == 0 \ |
| 382 | || ((charset) >= 0x80 && (charset) <= MAX_CHARSET_OFFICIAL_DIMENSION2) \ | 333 | || ((charset) > 0x80 && (charset) <= MAX_CHARSET_OFFICIAL_DIMENSION2) \ |
| 383 | || ((charset) >= MIN_CHARSET_PRIVATE_DIMENSION1 && (charset) <= MAX_CHARSET)) | 334 | || ((charset) >= MIN_CHARSET_PRIVATE_DIMENSION1 && (charset) <= MAX_CHARSET)) |
| 384 | 335 | ||
| 385 | /* 1 if CHARSET is already defined (and not CHARSET_COMPOSITION), else 0. */ | 336 | /* 1 if CHARSET is already defined, else 0. */ |
| 386 | #define CHARSET_DEFINED_P(charset) \ | 337 | #define CHARSET_DEFINED_P(charset) \ |
| 387 | (((charset) >= 0) && ((charset) <= MAX_CHARSET) \ | 338 | (((charset) >= 0) && ((charset) <= MAX_CHARSET) \ |
| 388 | && !NILP (CHARSET_TABLE_ENTRY (charset))) | 339 | && !NILP (CHARSET_TABLE_ENTRY (charset))) |
| @@ -406,67 +357,47 @@ extern int width_by_char_head[256]; | |||
| 406 | ? CHAR_FIELD2 (c) + 0x70 \ | 357 | ? CHAR_FIELD2 (c) + 0x70 \ |
| 407 | : ((c) < MIN_CHAR_PRIVATE_DIMENSION2 \ | 358 | : ((c) < MIN_CHAR_PRIVATE_DIMENSION2 \ |
| 408 | ? CHAR_FIELD1 (c) + 0x8F \ | 359 | ? CHAR_FIELD1 (c) + 0x8F \ |
| 409 | : ((c) < MIN_CHAR_COMPOSITION \ | 360 | : CHAR_FIELD1 (c) + 0xE0))) |
| 410 | ? CHAR_FIELD1 (c) + 0xE0 \ | ||
| 411 | : ((c) <= MAX_CHAR_COMPOSITION \ | ||
| 412 | ? CHARSET_COMPOSITION \ | ||
| 413 | : CHARSET_ASCII))))) | ||
| 414 | 361 | ||
| 415 | /* Return charset at the place pointed by P. */ | 362 | /* Return charset at the place pointed by P. */ |
| 416 | #define CHARSET_AT(p) \ | 363 | #define CHARSET_AT(p) \ |
| 417 | (*(p) < 0x80 \ | 364 | (*(p) < 0x80 \ |
| 418 | ? CHARSET_ASCII \ | 365 | ? CHARSET_ASCII \ |
| 419 | : (*(p) == LEADING_CODE_COMPOSITION \ | 366 | : (*(p) < LEADING_CODE_PRIVATE_11 \ |
| 420 | ? CHARSET_COMPOSITION \ | 367 | ? (int)*(p) \ |
| 421 | : (*(p) < LEADING_CODE_PRIVATE_11 \ | 368 | : (*(p) <= LEADING_CODE_PRIVATE_22 \ |
| 422 | ? (int)*(p) \ | 369 | ? (int)*((p) + 1) \ |
| 423 | : (*(p) <= LEADING_CODE_PRIVATE_22 \ | 370 | : -1))) |
| 424 | ? (int)*((p) + 1) \ | ||
| 425 | : -1)))) | ||
| 426 | 371 | ||
| 427 | /* Same as `CHARSET_AT ()' but perhaps runs faster because of an | 372 | /* Same as `CHARSET_AT ()' but perhaps runs faster because of an |
| 428 | additional argument C which is the code (byte) at P. */ | 373 | additional argument C which is the code (byte) at P. */ |
| 429 | #define FIRST_CHARSET_AT(p, c) \ | 374 | #define FIRST_CHARSET_AT(p, c) \ |
| 430 | ((c) < 0x80 \ | 375 | ((c) < 0x80 \ |
| 431 | ? CHARSET_ASCII \ | 376 | ? CHARSET_ASCII \ |
| 432 | : ((c) == LEADING_CODE_COMPOSITION \ | 377 | : ((c) < LEADING_CODE_PRIVATE_11 \ |
| 433 | ? CHARSET_COMPOSITION \ | 378 | ? (int)(c) \ |
| 434 | : ((c) < LEADING_CODE_PRIVATE_11 \ | 379 | : ((c) <= LEADING_CODE_PRIVATE_22 \ |
| 435 | ? (int)(c) \ | 380 | ? (int)*((p) + 1) \ |
| 436 | : ((c) <= LEADING_CODE_PRIVATE_22 \ | 381 | : -1))) |
| 437 | ? (int)*((p) + 1) \ | 382 | |
| 438 | : -1)))) | 383 | /* Check if two characters C1 and C2 belong to the same charset. */ |
| 439 | 384 | #define SAME_CHARSET_P(c1, c2) \ | |
| 440 | /* Check if two characters C1 and C2 belong to the same charset. | 385 | (SINGLE_BYTE_CHAR_P (c1) \ |
| 441 | Always return 0 for composite characters. */ | 386 | ? SINGLE_BYTE_CHAR_P (c2) \ |
| 442 | #define SAME_CHARSET_P(c1, c2) \ | 387 | : (c1 < MIN_CHAR_OFFICIAL_DIMENSION2 \ |
| 443 | (c1 < MIN_CHAR_COMPOSITION \ | 388 | ? (c1 & CHAR_FIELD2_MASK) == (c2 & CHAR_FIELD2_MASK) \ |
| 444 | && (SINGLE_BYTE_CHAR_P (c1) \ | 389 | : (c1 & CHAR_FIELD1_MASK) == (c2 & CHAR_FIELD1_MASK))) |
| 445 | ? SINGLE_BYTE_CHAR_P (c2) \ | ||
| 446 | : (c1 < MIN_CHAR_OFFICIAL_DIMENSION2 \ | ||
| 447 | ? (c1 & CHAR_FIELD2_MASK) == (c2 & CHAR_FIELD2_MASK) \ | ||
| 448 | : (c1 & CHAR_FIELD1_MASK) == (c2 & CHAR_FIELD1_MASK)))) | ||
| 449 | 390 | ||
| 450 | /* Return a non-ASCII character of which charset is CHARSET and | 391 | /* Return a non-ASCII character of which charset is CHARSET and |
| 451 | position-codes are C1 and C2. DIMENSION1 character ignores C2. */ | 392 | position-codes are C1 and C2. DIMENSION1 character ignores C2. */ |
| 452 | #define MAKE_NON_ASCII_CHAR(charset, c1, c2) \ | 393 | #define MAKE_NON_ASCII_CHAR(charset, c1, c2) \ |
| 453 | ((charset) == CHARSET_COMPOSITION \ | 394 | (! CHARSET_DEFINED_P (charset) || CHARSET_DIMENSION (charset) == 1 \ |
| 454 | ? ((c2) < 0 \ | 395 | ? (((charset) - 0x70) << 7) | ((c1) <= 0 ? 0 : (c1)) \ |
| 455 | ? (((charset) - 0x70) << 7) + (c1) \ | 396 | : ((charset) < MIN_CHARSET_PRIVATE_DIMENSION2 \ |
| 456 | : MAKE_COMPOSITE_CHAR (((c1) << 7) + (c2))) \ | 397 | ? ((((charset) - 0x8F) << 14) \ |
| 457 | : (! CHARSET_DEFINED_P (charset) || CHARSET_DIMENSION (charset) == 1 \ | 398 | | ((c1) <= 0 ? 0 : ((c1) << 7)) | ((c2) <= 0 ? 0 : (c2))) \ |
| 458 | ? (((charset) - 0x70) << 7) | ((c1) <= 0 ? 0 : (c1)) \ | 399 | : ((((charset) - 0xE0) << 14) \ |
| 459 | : ((charset) < MIN_CHARSET_PRIVATE_DIMENSION2 \ | 400 | | ((c1) <= 0 ? 0 : ((c1) << 7)) | ((c2) <= 0 ? 0 : (c2))))) |
| 460 | ? ((((charset) - 0x8F) << 14) \ | ||
| 461 | | ((c1) <= 0 ? 0 : ((c1) << 7)) | ((c2) <= 0 ? 0 : (c2))) \ | ||
| 462 | : ((((charset) - 0xE0) << 14) \ | ||
| 463 | | ((c1) <= 0 ? 0 : ((c1) << 7)) | ((c2) <= 0 ? 0 : (c2)))))) | ||
| 464 | |||
| 465 | /* Return a composite character of which CMPCHAR-ID is ID. */ | ||
| 466 | #define MAKE_COMPOSITE_CHAR(id) (MIN_CHAR_COMPOSITION + (id)) | ||
| 467 | |||
| 468 | /* Return CMPCHAR-ID of a composite character C. */ | ||
| 469 | #define COMPOSITE_CHAR_ID(c) ((c) - MIN_CHAR_COMPOSITION) | ||
| 470 | 401 | ||
| 471 | /* Return a character of which charset is CHARSET and position-codes | 402 | /* Return a character of which charset is CHARSET and position-codes |
| 472 | are C1 and C2. DIMENSION1 character ignores C2. */ | 403 | are C1 and C2. DIMENSION1 character ignores C2. */ |
| @@ -488,41 +419,6 @@ extern int width_by_char_head[256]; | |||
| 488 | 419 | ||
| 489 | #define DEFAULT_NONASCII_INSERT_OFFSET 0x800 | 420 | #define DEFAULT_NONASCII_INSERT_OFFSET 0x800 |
| 490 | 421 | ||
| 491 | /* Parse composite character string STR of length LENGTH (>= 2) and | ||
| 492 | set BYTES to the length of actual multibyte sequence. | ||
| 493 | |||
| 494 | It is assumed that *STR is LEADING_CODE_COMPOSITION and the | ||
| 495 | following (LENGTH - 1) bytes satisfy !CHAR_HEAD_P. | ||
| 496 | |||
| 497 | Actually, the whole multibyte sequence starting with | ||
| 498 | LEADING_CODE_COMPOSITION is treated as a single multibyte | ||
| 499 | character. So, here, we just set BYTES to LENGTH. | ||
| 500 | |||
| 501 | This macro should be called only from PARSE_MULTIBYTE_SEQ. */ | ||
| 502 | |||
| 503 | #define PARSE_COMPOSITE_SEQ(str, length, bytes) \ | ||
| 504 | do { \ | ||
| 505 | (bytes) = (length); \ | ||
| 506 | } while (0) | ||
| 507 | |||
| 508 | |||
| 509 | /* Parse non-composite multibyte character string STR of length | ||
| 510 | LENGTH (>= 2) and set BYTES to the length of actual multibyte | ||
| 511 | sequence. | ||
| 512 | |||
| 513 | It is assumed that *STR is one of base leading codes (excluding | ||
| 514 | LEADING_CODE_COMPOSITION) and the following (LENGTH - 1) bytes | ||
| 515 | satisfy !CHAR_HEAD_P. | ||
| 516 | |||
| 517 | This macro should be called only from PARSE_MULTIBYTE_SEQ. */ | ||
| 518 | |||
| 519 | #define PARSE_CHARACTER_SEQ(str, length, bytes) \ | ||
| 520 | do { \ | ||
| 521 | (bytes) = BYTES_BY_CHAR_HEAD ((str)[0]); \ | ||
| 522 | if ((bytes) > (length)) \ | ||
| 523 | (bytes) = (length); \ | ||
| 524 | } while (0) | ||
| 525 | |||
| 526 | /* Parse string STR of length LENGTH and check if a multibyte | 422 | /* Parse string STR of length LENGTH and check if a multibyte |
| 527 | characters is at STR. If so, set BYTES for that character, else | 423 | characters is at STR. If so, set BYTES for that character, else |
| 528 | set BYTES to 1. */ | 424 | set BYTES to 1. */ |
| @@ -530,17 +426,14 @@ extern int width_by_char_head[256]; | |||
| 530 | #define PARSE_MULTIBYTE_SEQ(str, length, bytes) \ | 426 | #define PARSE_MULTIBYTE_SEQ(str, length, bytes) \ |
| 531 | do { \ | 427 | do { \ |
| 532 | int i = 1; \ | 428 | int i = 1; \ |
| 533 | if (ASCII_BYTE_P (*str)) \ | 429 | while (i < (length) && ! CHAR_HEAD_P ((str)[i])) i++; \ |
| 534 | bytes = 1; \ | 430 | if (i == 1) \ |
| 431 | (bytes) = 1; \ | ||
| 535 | else \ | 432 | else \ |
| 536 | { \ | 433 | { \ |
| 537 | while (i < (length) && ! CHAR_HEAD_P ((str)[i])) i++; \ | 434 | (bytes) = BYTES_BY_CHAR_HEAD ((str)[0]); \ |
| 538 | if (i == 1) \ | 435 | if ((bytes) > (length)) \ |
| 539 | (bytes) = 1; \ | 436 | (bytes) = (length); \ |
| 540 | else if ((str)[0] == LEADING_CODE_COMPOSITION) \ | ||
| 541 | PARSE_COMPOSITE_SEQ (str, i, bytes); \ | ||
| 542 | else \ | ||
| 543 | PARSE_CHARACTER_SEQ (str, i, bytes); \ | ||
| 544 | } \ | 437 | } \ |
| 545 | } while (0) | 438 | } while (0) |
| 546 | 439 | ||
| @@ -550,16 +443,14 @@ extern int width_by_char_head[256]; | |||
| 550 | 443 | ||
| 551 | Do not use this macro for an ASCII character. */ | 444 | Do not use this macro for an ASCII character. */ |
| 552 | 445 | ||
| 553 | #define SPLIT_NON_ASCII_CHAR(c, charset, c1, c2) \ | 446 | #define SPLIT_NON_ASCII_CHAR(c, charset, c1, c2) \ |
| 554 | ((c) & CHAR_FIELD1_MASK \ | 447 | ((c) & CHAR_FIELD1_MASK \ |
| 555 | ? (charset = ((c) < MIN_CHAR_COMPOSITION \ | 448 | ? (charset = (CHAR_FIELD1 (c) \ |
| 556 | ? (CHAR_FIELD1 (c) \ | 449 | + ((c) < MIN_CHAR_PRIVATE_DIMENSION2 ? 0x8F : 0xE0)), \ |
| 557 | + ((c) < MIN_CHAR_PRIVATE_DIMENSION2 ? 0x8F : 0xE0)) \ | 450 | c1 = CHAR_FIELD2 (c), \ |
| 558 | : CHARSET_COMPOSITION), \ | 451 | c2 = CHAR_FIELD3 (c)) \ |
| 559 | c1 = CHAR_FIELD2 (c), \ | 452 | : (charset = CHAR_FIELD2 (c) + 0x70, \ |
| 560 | c2 = CHAR_FIELD3 (c)) \ | 453 | c1 = CHAR_FIELD3 (c), \ |
| 561 | : (charset = CHAR_FIELD2 (c) + 0x70, \ | ||
| 562 | c1 = CHAR_FIELD3 (c), \ | ||
| 563 | c2 = -1)) | 454 | c2 = -1)) |
| 564 | 455 | ||
| 565 | /* The charset of character C is stored in CHARSET, and the | 456 | /* The charset of character C is stored in CHARSET, and the |
| @@ -572,25 +463,19 @@ extern int width_by_char_head[256]; | |||
| 572 | : SPLIT_NON_ASCII_CHAR (c, charset, c1, c2)) | 463 | : SPLIT_NON_ASCII_CHAR (c, charset, c1, c2)) |
| 573 | 464 | ||
| 574 | /* Return 1 iff character C has valid printable glyph. */ | 465 | /* Return 1 iff character C has valid printable glyph. */ |
| 575 | #define CHAR_PRINTABLE_P(c) \ | 466 | #define CHAR_PRINTABLE_P(c) \ |
| 576 | (SINGLE_BYTE_CHAR_P (c) \ | 467 | (SINGLE_BYTE_CHAR_P (c) \ |
| 577 | || ((c) >= MIN_CHAR_COMPOSITION \ | 468 | || char_printable_p (c)) |
| 578 | ? (c) < MAX_CHAR \ | ||
| 579 | : char_printable_p (c))) | ||
| 580 | 469 | ||
| 581 | /* The charset of the character at STR is stored in CHARSET, and the | 470 | /* The charset of the character at STR is stored in CHARSET, and the |
| 582 | position-codes are stored in C1 and C2. | 471 | position-codes are stored in C1 and C2. |
| 583 | We store -1 in C2 if the character is just 2 bytes. | 472 | We store -1 in C2 if the character is just 2 bytes. */ |
| 584 | |||
| 585 | If the character is a composite character, the upper 7-bit and | ||
| 586 | lower 7-bit of CMPCHAR-ID are set in C1 and C2 respectively. No | ||
| 587 | range checking. */ | ||
| 588 | 473 | ||
| 589 | #define SPLIT_STRING(str, len, charset, c1, c2) \ | 474 | #define SPLIT_STRING(str, len, charset, c1, c2) \ |
| 590 | ((BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) < 2 \ | 475 | ((BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) < 2 \ |
| 591 | || BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) > len \ | 476 | || BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) > len \ |
| 592 | || split_non_ascii_string (str, len, &charset, &c1, &c2) < 0) \ | 477 | || split_string (str, len, &charset, &c1, &c2) < 0) \ |
| 593 | ? c1 = *(str), charset = CHARSET_ASCII \ | 478 | ? c1 = *(str), charset = CHARSET_ASCII \ |
| 594 | : charset) | 479 | : charset) |
| 595 | 480 | ||
| 596 | /* Mapping table from ISO2022's charset (specified by DIMENSION, | 481 | /* Mapping table from ISO2022's charset (specified by DIMENSION, |
| @@ -612,16 +497,15 @@ extern int iso_charset_table[2][2][128]; | |||
| 612 | representations: multi-byte form and single-word form (character | 497 | representations: multi-byte form and single-word form (character |
| 613 | code). */ | 498 | code). */ |
| 614 | 499 | ||
| 615 | /* Set STR a pointer to the multi-byte form of the character C. If C | 500 | /* Store multi-byte form of the character C in STR. The caller should |
| 616 | is not a composite character, the multi-byte form is set in WORKBUF | 501 | allocate at least 4-byte area at STR in advance. Returns the |
| 617 | and STR points WORKBUF. The caller should allocate at least 4-byte | 502 | length of the multi-byte form. If C is an invalid character code, |
| 618 | area at WORKBUF in advance. Returns the length of the multi-byte | 503 | signal an error. */ |
| 619 | form. If C is an invalid character code, signal an error. */ | ||
| 620 | 504 | ||
| 621 | #define CHAR_STRING(c, workbuf, str) \ | 505 | #define CHAR_STRING(c, str) \ |
| 622 | (SINGLE_BYTE_CHAR_P (c) \ | 506 | (SINGLE_BYTE_CHAR_P (c) \ |
| 623 | ? *(str = workbuf) = (unsigned char)(c), 1 \ | 507 | ? *(str) = (unsigned char)(c), 1 \ |
| 624 | : non_ascii_char_to_string (c, workbuf, (unsigned char **)&str)) | 508 | : char_to_string (c, (unsigned char *)str)) |
| 625 | 509 | ||
| 626 | /* Return a character code of the character of which multi-byte form | 510 | /* Return a character code of the character of which multi-byte form |
| 627 | is at STR and the length is LEN. If STR doesn't contain valid | 511 | is at STR and the length is LEN. If STR doesn't contain valid |
| @@ -630,7 +514,7 @@ extern int iso_charset_table[2][2][128]; | |||
| 630 | #define STRING_CHAR(str, len) \ | 514 | #define STRING_CHAR(str, len) \ |
| 631 | (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \ | 515 | (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \ |
| 632 | ? (unsigned char) *(str) \ | 516 | ? (unsigned char) *(str) \ |
| 633 | : string_to_non_ascii_char (str, len, 0)) | 517 | : string_to_char (str, len, 0)) |
| 634 | 518 | ||
| 635 | /* This is like STRING_CHAR but the third arg ACTUAL_LEN is set to the | 519 | /* This is like STRING_CHAR but the third arg ACTUAL_LEN is set to the |
| 636 | length of the multi-byte form. Just to know the length, use | 520 | length of the multi-byte form. Just to know the length, use |
| @@ -639,7 +523,7 @@ extern int iso_charset_table[2][2][128]; | |||
| 639 | #define STRING_CHAR_AND_LENGTH(str, len, actual_len) \ | 523 | #define STRING_CHAR_AND_LENGTH(str, len, actual_len) \ |
| 640 | (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \ | 524 | (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \ |
| 641 | ? ((actual_len) = 1), (unsigned char) *(str) \ | 525 | ? ((actual_len) = 1), (unsigned char) *(str) \ |
| 642 | : string_to_non_ascii_char (str, len, &(actual_len))) | 526 | : string_to_char (str, len, &(actual_len))) |
| 643 | 527 | ||
| 644 | /* Fetch the "next" multibyte character from Lisp string STRING | 528 | /* Fetch the "next" multibyte character from Lisp string STRING |
| 645 | at byte position BYTEIDX, character position CHARIDX. | 529 | at byte position BYTEIDX, character position CHARIDX. |
| @@ -665,6 +549,26 @@ if (1) \ | |||
| 665 | } \ | 549 | } \ |
| 666 | else | 550 | else |
| 667 | 551 | ||
| 552 | /* Like FETCH_STRING_CHAR_SPACE_LEFT but fetch character from the | ||
| 553 | current buffer. */ | ||
| 554 | |||
| 555 | #define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX) \ | ||
| 556 | if (1) \ | ||
| 557 | { \ | ||
| 558 | unsigned char *fetch_buf_char_ptr = BYTE_POS_ADDR (BYTEIDX); \ | ||
| 559 | int fetch_buf_char_space_left = ((CHARIDX < GPT ? GPT_BYTE : Z_BYTE) \ | ||
| 560 | - BYTEIDX); \ | ||
| 561 | int actual_len; \ | ||
| 562 | \ | ||
| 563 | OUTPUT \ | ||
| 564 | = STRING_CHAR_AND_LENGTH (fetch_buf_char_ptr, \ | ||
| 565 | fetch_buf_char_space_left, actual_len); \ | ||
| 566 | \ | ||
| 567 | BYTEIDX += actual_len; \ | ||
| 568 | CHARIDX++; \ | ||
| 569 | } \ | ||
| 570 | else | ||
| 571 | |||
| 668 | /* Return the length of the multi-byte form at string STR of length LEN. */ | 572 | /* Return the length of the multi-byte form at string STR of length LEN. */ |
| 669 | 573 | ||
| 670 | #define MULTIBYTE_FORM_LENGTH(str, len) \ | 574 | #define MULTIBYTE_FORM_LENGTH(str, len) \ |
| @@ -812,70 +716,21 @@ while (0) | |||
| 812 | 716 | ||
| 813 | #endif /* emacs */ | 717 | #endif /* emacs */ |
| 814 | 718 | ||
| 815 | /* Maximum counts of components in one composite character. */ | 719 | /* This is the maximum byte length of multi-byte sequence. */ |
| 816 | #define MAX_COMPONENT_COUNT 16 | 720 | #define MAX_MULTIBYTE_LENGTH 4 |
| 817 | |||
| 818 | /* Structure to hold information of a composite character. */ | ||
| 819 | struct cmpchar_info { | ||
| 820 | /* Byte length of the composite character. */ | ||
| 821 | int len; | ||
| 822 | |||
| 823 | /* Multi-byte form of the composite character. */ | ||
| 824 | unsigned char *data; | ||
| 825 | |||
| 826 | /* Length of glyph codes. */ | ||
| 827 | int glyph_len; | ||
| 828 | |||
| 829 | /* Width of the overall glyph of the composite character. */ | ||
| 830 | int width; | ||
| 831 | |||
| 832 | /* Pointer to an array of glyph codes of the composite character. | ||
| 833 | This actually contains only character code, no face. */ | ||
| 834 | GLYPH *glyph; | ||
| 835 | |||
| 836 | /* Pointer to an array of composition rules. The value has the form: | ||
| 837 | (0xA0 + ((GLOBAL-REF-POINT << 2) | NEW-REF-POINT)) | ||
| 838 | where each XXX-REF-POINT is 0..8. */ | ||
| 839 | unsigned char *cmp_rule; | ||
| 840 | |||
| 841 | /* Pointer to an array of x-axis offset of left edge of glyphs | ||
| 842 | relative to the left of of glyph[0] except for the first element | ||
| 843 | which is the absolute offset from the left edge of overall glyph. | ||
| 844 | The actual pixel offset should be calculated by multiplying each | ||
| 845 | frame's one column width by this value: | ||
| 846 | (i.e. FONT_WIDTH (f->output_data.x->font) * col_offset[N]). */ | ||
| 847 | float *col_offset; | ||
| 848 | |||
| 849 | /* Work slot used by `dumpglyphs' (xterm.c). */ | ||
| 850 | int face_work; | ||
| 851 | }; | ||
| 852 | |||
| 853 | /* Table of pointers to the structure `cmpchar_info' indexed by | ||
| 854 | CMPCHAR-ID. */ | ||
| 855 | extern struct cmpchar_info **cmpchar_table; | ||
| 856 | /* Number of the current composite characters. */ | ||
| 857 | extern int n_cmpchars; | ||
| 858 | |||
| 859 | /* This is the maximum length of multi-byte form. */ | ||
| 860 | #define MAX_LENGTH_OF_MULTI_BYTE_FORM (MAX_COMPONENT_COUNT * 6) | ||
| 861 | |||
| 862 | /* Maximum character code currently used. */ | ||
| 863 | #define MAX_CHAR (MIN_CHAR_COMPOSITION + n_cmpchars) | ||
| 864 | 721 | ||
| 865 | extern void invalid_character P_ ((int)); | 722 | extern void invalid_character P_ ((int)); |
| 866 | 723 | ||
| 867 | extern int translate_char P_ ((Lisp_Object, int, int, int, int)); | 724 | extern int translate_char P_ ((Lisp_Object, int, int, int, int)); |
| 868 | extern int split_non_ascii_string P_ ((const unsigned char *, int, int *, | 725 | extern int split_string P_ ((const unsigned char *, int, int *, |
| 869 | unsigned char *, unsigned char *)); | 726 | unsigned char *, unsigned char *)); |
| 870 | extern int string_to_non_ascii_char P_ ((const unsigned char *, int, int *)); | 727 | extern int char_to_string P_ ((int, unsigned char *)); |
| 871 | extern int non_ascii_char_to_string P_ ((int, unsigned char *, unsigned char **)); | 728 | extern int string_to_char P_ ((const unsigned char *, int, int *)); |
| 872 | extern int char_printable_p P_ ((int c)); | 729 | extern int char_printable_p P_ ((int c)); |
| 873 | extern int multibyte_form_length P_ ((const unsigned char *, int)); | 730 | extern int multibyte_form_length P_ ((const unsigned char *, int)); |
| 874 | extern int str_cmpchar_id P_ ((const unsigned char *, int)); | ||
| 875 | extern int get_charset_id P_ ((Lisp_Object)); | 731 | extern int get_charset_id P_ ((Lisp_Object)); |
| 876 | extern int cmpchar_component P_ ((int, int, int)); | ||
| 877 | extern int find_charset_in_str P_ ((unsigned char *, int, int *, | 732 | extern int find_charset_in_str P_ ((unsigned char *, int, int *, |
| 878 | Lisp_Object, int, int)); | 733 | Lisp_Object, int)); |
| 879 | extern int strwidth P_ ((unsigned char *, int)); | 734 | extern int strwidth P_ ((unsigned char *, int)); |
| 880 | extern int char_bytes P_ ((int)); | 735 | extern int char_bytes P_ ((int)); |
| 881 | extern int char_valid_p P_ ((int, int)); | 736 | extern int char_valid_p P_ ((int, int)); |
| @@ -902,10 +757,6 @@ extern Lisp_Object Vauto_fill_chars; | |||
| 902 | 757 | ||
| 903 | /* Length of C in bytes. */ | 758 | /* Length of C in bytes. */ |
| 904 | 759 | ||
| 905 | #define CHAR_LEN(C) \ | 760 | #define CHAR_LEN(C) CHARSET_BYTES (CHAR_CHARSET ((C))) |
| 906 | (CHAR_CHARSET ((C)) == CHARSET_COMPOSITION \ | ||
| 907 | ? cmpchar_table[COMPOSITE_CHAR_ID ((C))]->len \ | ||
| 908 | : CHARSET_BYTES (CHAR_CHARSET ((C)))) | ||
| 909 | |||
| 910 | 761 | ||
| 911 | #endif /* _CHARSET_H */ | 762 | #endif /* _CHARSET_H */ |