diff options
| author | Kenichi Handa | 2000-05-19 23:54:05 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2000-05-19 23:54:05 +0000 |
| commit | 6e4dc3e1c3f1fce6ec2c5174bb0a9c90d41dca7b (patch) | |
| tree | 6248b6f665fd43aadf368dc70f4dd726c2b07a99 /src | |
| parent | 2e344af3e731463b6733239d0c9520645072ce11 (diff) | |
| download | emacs-6e4dc3e1c3f1fce6ec2c5174bb0a9c90d41dca7b.tar.gz emacs-6e4dc3e1c3f1fce6ec2c5174bb0a9c90d41dca7b.zip | |
(LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
CHARSET_8_BIT_GRAPHIC): New macros.
(SINGLE_BYTE_CHAR_P): Make it faster by using casting.
(CHARSET_ISO_GRAPHIC_PLANE): Use XINT instead of XFASTINT.
(CHARSET_REVERSE_CHARSET): Likewise.
(CHARSET_VALID_P): Handle new charsets; eight-bit-control and
eight-bit-graphic.
(BYTES_BY_CHAR_HEAD, WIDTH_BY_CHAR_HEAD): Optimize for ASCII.
(CHAR_CHARSET, MAKE_CHAR, SPLIT_CHAR, CHAR_BYTES): Likewise.
(PARSE_MULTIBYTE_SEQ) [BYTE_COMBINING_DEBUG]: Abort if we
encounter an invalid multibyte sequence.
(PARSE_MULTIBYTE_SEQ) [not BYTE_COMBINING_DEBUG]: Assume multibyte
sequence is always valid.
(MAKE_NON_ASCII_CHAR, SPLIT_NON_ASCII_CHAR): These macros Deleted.
(UNIBYTE_STR_AS_MULTIBYTE_P, MULTIBYTE_STR_AS_UNIBYTE_P): New
macros.
(CHAR_STRING): For 8-bit characters, call char_to_string.
(INC_POS) [not BYTE_COMBINING_DEBUG]: Faster version. Assume
multibyte sequence is always valid.
(BUF_INC_POS) [not BYTE_COMBINING_DEBUG]: Likewise.
(parse_str_as_multibyte, str_as_multibyte, str_to_multibyte,
str_as_unibyte): Extern them.
(BCOPY_SHORT): Fix a bug.
(CHAR_LEN): This macro deleted. Callers changed to use
CHAR_BYTES.
(FETCH_STRING_CHAR_ADVANCE): Check multibyteness of STRING.
(FETCH_STRING_CHAR_ADVANCE_NO_CHECK): New macro.
(FETCH_CHAR_ADVANCE): Check multibyteness of the current buffer.
Diffstat (limited to 'src')
| -rw-r--r-- | src/charset.h | 358 |
1 files changed, 212 insertions, 146 deletions
diff --git a/src/charset.h b/src/charset.h index c00bf161e7c..60cd120897c 100644 --- a/src/charset.h +++ b/src/charset.h | |||
| @@ -22,6 +22,8 @@ Boston, MA 02111-1307, USA. */ | |||
| 22 | #ifndef _CHARSET_H | 22 | #ifndef _CHARSET_H |
| 23 | #define _CHARSET_H | 23 | #define _CHARSET_H |
| 24 | 24 | ||
| 25 | /* #define BYTE_COMBINING_DEBUG */ | ||
| 26 | |||
| 25 | /*** GENERAL NOTE on CHARACTER SET (CHARSET) *** | 27 | /*** GENERAL NOTE on CHARACTER SET (CHARSET) *** |
| 26 | 28 | ||
| 27 | A character set ("charset" hereafter) is a meaningful collection | 29 | A character set ("charset" hereafter) is a meaningful collection |
| @@ -45,8 +47,8 @@ Boston, MA 02111-1307, USA. */ | |||
| 45 | charset_id: Emacs Lisp integer of an identification number of a charset | 47 | charset_id: Emacs Lisp integer of an identification number of a charset |
| 46 | charset: C integer of an identification number of a charset | 48 | charset: C integer of an identification number of a charset |
| 47 | 49 | ||
| 48 | Each charset (except for ASCII) is assigned a base leading-code | 50 | Each charset (except for ascii) is assigned a base leading-code |
| 49 | (range 0x80..0x9D). In addition, a charset of greater than 0xA0 | 51 | (range 0x80..0x9E). In addition, a charset of greater than 0xA0 |
| 50 | (whose base leading-code is 0x9A..0x9D) is assigned an extended | 52 | (whose base leading-code is 0x9A..0x9D) is assigned an extended |
| 51 | leading-code (range 0xA0..0xFE). In this case, each base | 53 | leading-code (range 0xA0..0xFE). In this case, each base |
| 52 | leading-code specify the allowable range of extended leading-code as | 54 | leading-code specify the allowable range of extended leading-code as |
| @@ -67,7 +69,11 @@ Boston, MA 02111-1307, USA. */ | |||
| 67 | 0x80 --never used-- | 69 | 0x80 --never used-- |
| 68 | 0x81..0x8F official dim1 same as charset -- none -- | 70 | 0x81..0x8F official dim1 same as charset -- none -- |
| 69 | 0x90..0x99 official dim2 same as charset -- none -- | 71 | 0x90..0x99 official dim2 same as charset -- none -- |
| 70 | 0x9A..0x9F --never used-- | 72 | 0x9A..0x9D --never used-- |
| 73 | 0x9E official dim1 same as charset -- none -- | ||
| 74 | (eight-bit-control) | ||
| 75 | 0x9F official dim1 -- none -- -- none -- | ||
| 76 | (eight-bit-graphic) | ||
| 71 | 0xA0..0xDF private dim1 0x9A same as charset | 77 | 0xA0..0xDF private dim1 0x9A same as charset |
| 72 | of 1-column width | 78 | of 1-column width |
| 73 | 0xE0..0xEF private dim1 0x9B same as charset | 79 | 0xE0..0xEF private dim1 0x9B same as charset |
| @@ -88,6 +94,8 @@ Boston, MA 02111-1307, USA. */ | |||
| 88 | #define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */ | 94 | #define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */ |
| 89 | #define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */ | 95 | #define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */ |
| 90 | 96 | ||
| 97 | #define LEADING_CODE_8_BIT_CONTROL 0x9E /* for `eight-bit-control' */ | ||
| 98 | |||
| 91 | /* Extended leading-code. */ | 99 | /* Extended leading-code. */ |
| 92 | /* Start of each extended leading-codes. */ | 100 | /* Start of each extended leading-codes. */ |
| 93 | #define LEADING_CODE_EXT_11 0xA0 /* follows LEADING_CODE_PRIVATE_11 */ | 101 | #define LEADING_CODE_EXT_11 0xA0 /* follows LEADING_CODE_PRIVATE_11 */ |
| @@ -109,9 +117,10 @@ Boston, MA 02111-1307, USA. */ | |||
| 109 | #define MAX_CHARSET 0xFE | 117 | #define MAX_CHARSET 0xFE |
| 110 | 118 | ||
| 111 | /* Definition of special charsets. */ | 119 | /* Definition of special charsets. */ |
| 112 | #define CHARSET_ASCII 0 | 120 | #define CHARSET_ASCII 0 /* 0x00..0x7F */ |
| 121 | #define CHARSET_8_BIT_CONTROL 0x9E /* 0x80..0x9F */ | ||
| 122 | #define CHARSET_8_BIT_GRAPHIC 0x9F /* 0xA0..0xFF */ | ||
| 113 | 123 | ||
| 114 | extern int charset_ascii; /* ASCII */ | ||
| 115 | extern int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */ | 124 | extern int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */ |
| 116 | extern int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */ | 125 | extern int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */ |
| 117 | extern int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */ | 126 | extern int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */ |
| @@ -120,8 +129,9 @@ extern int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */ | |||
| 120 | extern int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */ | 129 | extern int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */ |
| 121 | extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */ | 130 | extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */ |
| 122 | 131 | ||
| 123 | /* Check if CH is the head of multi-byte form, i.e., | 132 | /* Check if CH is an ASCII character or a base leading-code. |
| 124 | an ASCII character or a base leading-code. */ | 133 | Nowadays, any byte can be the first byte of a character in a |
| 134 | multibyte buffer/string. So this macro name is not appropriate. */ | ||
| 125 | #define CHAR_HEAD_P(ch) ((unsigned char) (ch) < 0xA0) | 135 | #define CHAR_HEAD_P(ch) ((unsigned char) (ch) < 0xA0) |
| 126 | 136 | ||
| 127 | /*** GENERAL NOTE on CHARACTER REPRESENTATION *** | 137 | /*** GENERAL NOTE on CHARACTER REPRESENTATION *** |
| @@ -158,26 +168,28 @@ extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */ | |||
| 158 | 168 | ||
| 159 | More precisely... | 169 | More precisely... |
| 160 | 170 | ||
| 161 | FIELD2 of DIMENSION1 character (except for ASCII) is "charset - 0x70". | 171 | FIELD2 of DIMENSION1 character (except for ascii, eight-bit-control, |
| 162 | This is to make all character codes except for ASCII greater than | 172 | and eight-bit-graphic) is "charset - 0x70". This is to make all |
| 163 | 256 (ASCII's FIELD2 is 0). So, the range of FIELD2 of DIMENSION1 | 173 | character codes except for ASCII and 8-bit codes greater than 256. |
| 164 | character is 0 or 0x11..0x7F. | 174 | So, the range of FIELD2 of DIMENSION1 character is 0, 1, or |
| 175 | 0x11..0x7F. | ||
| 165 | 176 | ||
| 166 | FIELD1 of DIMENSION2 character is "charset - 0x8F" for official | 177 | FIELD1 of DIMENSION2 character is "charset - 0x8F" for official |
| 167 | charset and "charset - 0xE0" for private charset. So, the range of | 178 | charset and "charset - 0xE0" for private charset. So, the range of |
| 168 | FIELD1 of DIMENSION2 character is 0x01..0x1E. | 179 | FIELD1 of DIMENSION2 character is 0x01..0x1E. |
| 169 | 180 | ||
| 170 | ----------------------------------------------------------------------- | 181 | ----------------------------------------------------------------------------- |
| 171 | charset FIELD1 (5-bit) FIELD2 (7-bit) FIELD3 (7-bit) | 182 | charset FIELD1 (5-bit) FIELD2 (7-bit) FIELD3 (7-bit) |
| 172 | ----------------------------------------------------------------------- | 183 | ----------------------------------------------------------------------------- |
| 173 | ASCII 0 0 POSITION-CODE-1 | 184 | ascii 0 0 0x00..0x7F |
| 174 | DIMENSION1 0 charset - 0x70 POSITION-CODE-1 | 185 | eight-bit-control 0 1 0x00..0x1F |
| 175 | DIMENSION2(o) charset - 0x8F POSITION-CODE-1 POSITION-CODE-2 | 186 | eight-bit-graphic 0 1 0x20..0x7F |
| 176 | DIMENSION2(p) charset - 0xE0 POSITION-CODE-1 POSITION-CODE-2 | 187 | DIMENSION1 0 charset - 0x70 POSITION-CODE-1 |
| 177 | ----------------------------------------------------------------------- | 188 | DIMENSION2(o) charset - 0x8F POSITION-CODE-1 POSITION-CODE-2 |
| 189 | DIMENSION2(p) charset - 0xE0 POSITION-CODE-1 POSITION-CODE-2 | ||
| 190 | ----------------------------------------------------------------------------- | ||
| 178 | "(o)": official, "(p)": private | 191 | "(o)": official, "(p)": private |
| 179 | ----------------------------------------------------------------------- | 192 | ----------------------------------------------------------------------------- |
| 180 | |||
| 181 | */ | 193 | */ |
| 182 | 194 | ||
| 183 | /* Masks of each field of character code. */ | 195 | /* Masks of each field of character code. */ |
| @@ -202,10 +214,10 @@ extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */ | |||
| 202 | /* Maximum character code currently used plus 1. */ | 214 | /* Maximum character code currently used plus 1. */ |
| 203 | #define MAX_CHAR (0x1F << 14) | 215 | #define MAX_CHAR (0x1F << 14) |
| 204 | 216 | ||
| 205 | /* 1 if C is an ASCII character, else 0. */ | 217 | /* 1 if C is a single byte character, else 0. */ |
| 206 | #define SINGLE_BYTE_CHAR_P(c) ((c) >= 0 && (c) < 0x100) | 218 | #define SINGLE_BYTE_CHAR_P(c) ((unsigned) (c) < 0x100) |
| 207 | 219 | ||
| 208 | /* 1 if BYTE is a character in itself, in multibyte mode. */ | 220 | /* 1 if BYTE is an ASCII character in itself, in multibyte mode. */ |
| 209 | #define ASCII_BYTE_P(byte) ((byte) < 0x80) | 221 | #define ASCII_BYTE_P(byte) ((byte) < 0x80) |
| 210 | 222 | ||
| 211 | /* A char-table containing information of each character set. | 223 | /* A char-table containing information of each character set. |
| @@ -229,7 +241,7 @@ extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */ | |||
| 229 | CHARS (integer) is the number of characters in a dimension: 94 or 96. | 241 | CHARS (integer) is the number of characters in a dimension: 94 or 96. |
| 230 | 242 | ||
| 231 | WIDTH (integer) is the number of columns a character in the charset | 243 | WIDTH (integer) is the number of columns a character in the charset |
| 232 | occupies on the screen: one of 0, 1, and 2. | 244 | occupies on the screen: one of 0, 1, and 2.. |
| 233 | 245 | ||
| 234 | DIRECTION (integer) is the rendering direction of characters in the | 246 | DIRECTION (integer) is the rendering direction of characters in the |
| 235 | charset when rendering. If 0, render from left to right, else | 247 | charset when rendering. If 0, render from left to right, else |
| @@ -242,11 +254,14 @@ extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */ | |||
| 242 | charset. All charsets of less than 0xA0 has the value 0. | 254 | charset. All charsets of less than 0xA0 has the value 0. |
| 243 | 255 | ||
| 244 | ISO-FINAL-CHAR (character) is the final character of the | 256 | ISO-FINAL-CHAR (character) is the final character of the |
| 245 | corresponding ISO 2022 charset. | 257 | corresponding ISO 2022 charset. It is -1 for such a character |
| 258 | that is used only internally (e.g. `eight-bit-control'). | ||
| 246 | 259 | ||
| 247 | ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked | 260 | ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked |
| 248 | while encoding to variants of ISO 2022 coding system, one of the | 261 | while encoding to variants of ISO 2022 coding system, one of the |
| 249 | following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR). | 262 | following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR). It |
| 263 | is -1 for such a character that is used only internally | ||
| 264 | (e.g. `eight-bit-control'). | ||
| 250 | 265 | ||
| 251 | REVERSE-CHARSET (integer) is the charset which differs only in | 266 | REVERSE-CHARSET (integer) is the charset which differs only in |
| 252 | LEFT-TO-RIGHT value from the charset. If there's no such a | 267 | LEFT-TO-RIGHT value from the charset. If there's no such a |
| @@ -309,9 +324,9 @@ extern Lisp_Object Vcharset_table; | |||
| 309 | #define CHARSET_LEADING_CODE_EXT(charset) \ | 324 | #define CHARSET_LEADING_CODE_EXT(charset) \ |
| 310 | XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)) | 325 | XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)) |
| 311 | #define CHARSET_ISO_FINAL_CHAR(charset) \ | 326 | #define CHARSET_ISO_FINAL_CHAR(charset) \ |
| 312 | XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX)) | 327 | XINT (CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX)) |
| 313 | #define CHARSET_ISO_GRAPHIC_PLANE(charset) \ | 328 | #define CHARSET_ISO_GRAPHIC_PLANE(charset) \ |
| 314 | XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)) | 329 | XINT (CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)) |
| 315 | #define CHARSET_REVERSE_CHARSET(charset) \ | 330 | #define CHARSET_REVERSE_CHARSET(charset) \ |
| 316 | XINT (CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)) | 331 | XINT (CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)) |
| 317 | 332 | ||
| @@ -331,7 +346,10 @@ extern Lisp_Object Vcharset_symbol_table; | |||
| 331 | #define CHARSET_VALID_P(charset) \ | 346 | #define CHARSET_VALID_P(charset) \ |
| 332 | ((charset) == 0 \ | 347 | ((charset) == 0 \ |
| 333 | || ((charset) > 0x80 && (charset) <= MAX_CHARSET_OFFICIAL_DIMENSION2) \ | 348 | || ((charset) > 0x80 && (charset) <= MAX_CHARSET_OFFICIAL_DIMENSION2) \ |
| 334 | || ((charset) >= MIN_CHARSET_PRIVATE_DIMENSION1 && (charset) <= MAX_CHARSET)) | 349 | || ((charset) >= MIN_CHARSET_PRIVATE_DIMENSION1 \ |
| 350 | && (charset) <= MAX_CHARSET) \ | ||
| 351 | || ((charset) == CHARSET_8_BIT_CONTROL) \ | ||
| 352 | || ((charset) == CHARSET_8_BIT_GRAPHIC)) | ||
| 335 | 353 | ||
| 336 | /* 1 if CHARSET is already defined, else 0. */ | 354 | /* 1 if CHARSET is already defined, else 0. */ |
| 337 | #define CHARSET_DEFINED_P(charset) \ | 355 | #define CHARSET_DEFINED_P(charset) \ |
| @@ -339,72 +357,52 @@ extern Lisp_Object Vcharset_symbol_table; | |||
| 339 | && !NILP (CHARSET_TABLE_ENTRY (charset))) | 357 | && !NILP (CHARSET_TABLE_ENTRY (charset))) |
| 340 | 358 | ||
| 341 | /* Since the information CHARSET-BYTES and CHARSET-WIDTH of | 359 | /* Since the information CHARSET-BYTES and CHARSET-WIDTH of |
| 342 | Vcharset_table can be retrieved only the first byte of | 360 | Vcharset_table can be retrieved only by the first byte of |
| 343 | multi-byte form (an ASCII code or a base leading-code), we provide | 361 | multi-byte form (an ASCII code or a base leading-code), we provide |
| 344 | here tables to be used by macros BYTES_BY_CHAR_HEAD and | 362 | here tables to be used by macros BYTES_BY_CHAR_HEAD and |
| 345 | WIDTH_BY_CHAR_HEAD for faster information retrieval. */ | 363 | WIDTH_BY_CHAR_HEAD for faster information retrieval. */ |
| 346 | extern int bytes_by_char_head[256]; | 364 | extern int bytes_by_char_head[256]; |
| 347 | extern int width_by_char_head[256]; | 365 | extern int width_by_char_head[256]; |
| 348 | 366 | ||
| 349 | #define BYTES_BY_CHAR_HEAD(char_head) bytes_by_char_head[char_head] | 367 | #define BYTES_BY_CHAR_HEAD(char_head) \ |
| 350 | #define WIDTH_BY_CHAR_HEAD(char_head) width_by_char_head[char_head] | 368 | (ASCII_BYTE_P (char_head) ? 1 : bytes_by_char_head[char_head]) |
| 369 | #define WIDTH_BY_CHAR_HEAD(char_head) \ | ||
| 370 | (ASCII_BYTE_P (char_head) ? 1 : width_by_char_head[char_head]) | ||
| 351 | 371 | ||
| 352 | /* Charset of the character C. */ | 372 | /* Charset of the character C. */ |
| 353 | #define CHAR_CHARSET(c) \ | 373 | #define CHAR_CHARSET(c) \ |
| 354 | (SINGLE_BYTE_CHAR_P (c) \ | 374 | (SINGLE_BYTE_CHAR_P (c) \ |
| 355 | ? CHARSET_ASCII \ | 375 | ? (ASCII_BYTE_P (c) \ |
| 356 | : ((c) < MIN_CHAR_OFFICIAL_DIMENSION2 \ | 376 | ? CHARSET_ASCII \ |
| 357 | ? CHAR_FIELD2 (c) + 0x70 \ | 377 | : (c) < 0xA0 ? CHARSET_8_BIT_CONTROL : CHARSET_8_BIT_GRAPHIC) \ |
| 358 | : ((c) < MIN_CHAR_PRIVATE_DIMENSION2 \ | 378 | : ((c) < MIN_CHAR_OFFICIAL_DIMENSION2 \ |
| 359 | ? CHAR_FIELD1 (c) + 0x8F \ | 379 | ? CHAR_FIELD2 (c) + 0x70 \ |
| 380 | : ((c) < MIN_CHAR_PRIVATE_DIMENSION2 \ | ||
| 381 | ? CHAR_FIELD1 (c) + 0x8F \ | ||
| 360 | : CHAR_FIELD1 (c) + 0xE0))) | 382 | : CHAR_FIELD1 (c) + 0xE0))) |
| 361 | 383 | ||
| 362 | /* Return charset at the place pointed by P. */ | ||
| 363 | #define CHARSET_AT(p) \ | ||
| 364 | (*(p) < 0x80 \ | ||
| 365 | ? CHARSET_ASCII \ | ||
| 366 | : (*(p) < LEADING_CODE_PRIVATE_11 \ | ||
| 367 | ? (int)*(p) \ | ||
| 368 | : (*(p) <= LEADING_CODE_PRIVATE_22 \ | ||
| 369 | ? (int)*((p) + 1) \ | ||
| 370 | : -1))) | ||
| 371 | |||
| 372 | /* Same as `CHARSET_AT ()' but perhaps runs faster because of an | ||
| 373 | additional argument C which is the code (byte) at P. */ | ||
| 374 | #define FIRST_CHARSET_AT(p, c) \ | ||
| 375 | ((c) < 0x80 \ | ||
| 376 | ? CHARSET_ASCII \ | ||
| 377 | : ((c) < LEADING_CODE_PRIVATE_11 \ | ||
| 378 | ? (int)(c) \ | ||
| 379 | : ((c) <= LEADING_CODE_PRIVATE_22 \ | ||
| 380 | ? (int)*((p) + 1) \ | ||
| 381 | : -1))) | ||
| 382 | |||
| 383 | /* Check if two characters C1 and C2 belong to the same charset. */ | 384 | /* Check if two characters C1 and C2 belong to the same charset. */ |
| 384 | #define SAME_CHARSET_P(c1, c2) \ | 385 | #define SAME_CHARSET_P(c1, c2) \ |
| 385 | (SINGLE_BYTE_CHAR_P (c1) \ | 386 | (c1 < MIN_CHAR_OFFICIAL_DIMENSION2 \ |
| 386 | ? SINGLE_BYTE_CHAR_P (c2) \ | 387 | ? (c1 & CHAR_FIELD2_MASK) == (c2 & CHAR_FIELD2_MASK) \ |
| 387 | : (c1 < MIN_CHAR_OFFICIAL_DIMENSION2 \ | 388 | : (c1 & CHAR_FIELD1_MASK) == (c2 & CHAR_FIELD1_MASK)) |
| 388 | ? (c1 & CHAR_FIELD2_MASK) == (c2 & CHAR_FIELD2_MASK) \ | ||
| 389 | : (c1 & CHAR_FIELD1_MASK) == (c2 & CHAR_FIELD1_MASK))) | ||
| 390 | |||
| 391 | /* Return a non-ASCII character of which charset is CHARSET and | ||
| 392 | position-codes are C1 and C2. DIMENSION1 character ignores C2. */ | ||
| 393 | #define MAKE_NON_ASCII_CHAR(charset, c1, c2) \ | ||
| 394 | (! CHARSET_DEFINED_P (charset) || CHARSET_DIMENSION (charset) == 1 \ | ||
| 395 | ? (((charset) - 0x70) << 7) | ((c1) <= 0 ? 0 : (c1)) \ | ||
| 396 | : ((charset) < MIN_CHARSET_PRIVATE_DIMENSION2 \ | ||
| 397 | ? ((((charset) - 0x8F) << 14) \ | ||
| 398 | | ((c1) <= 0 ? 0 : ((c1) << 7)) | ((c2) <= 0 ? 0 : (c2))) \ | ||
| 399 | : ((((charset) - 0xE0) << 14) \ | ||
| 400 | | ((c1) <= 0 ? 0 : ((c1) << 7)) | ((c2) <= 0 ? 0 : (c2))))) | ||
| 401 | 389 | ||
| 402 | /* Return a character of which charset is CHARSET and position-codes | 390 | /* Return a character of which charset is CHARSET and position-codes |
| 403 | are C1 and C2. DIMENSION1 character ignores C2. */ | 391 | are C1 and C2. DIMENSION1 character ignores C2. */ |
| 404 | #define MAKE_CHAR(charset, c1, c2) \ | 392 | #define MAKE_CHAR(charset, c1, c2) \ |
| 405 | ((charset) == CHARSET_ASCII \ | 393 | ((charset) == CHARSET_ASCII \ |
| 406 | ? (c1) \ | 394 | ? (c1) & 0x7F \ |
| 407 | : MAKE_NON_ASCII_CHAR ((charset), (c1), (c2))) | 395 | : (((charset) == CHARSET_8_BIT_CONTROL \ |
| 396 | || (charset) == CHARSET_8_BIT_GRAPHIC) \ | ||
| 397 | ? ((c1) & 0x7F) | 0x80 \ | ||
| 398 | : (! CHARSET_DEFINED_P (charset) || CHARSET_DIMENSION (charset) == 1 \ | ||
| 399 | ? (((charset) - 0x70) << 7) | ((c1) <= 0 ? 0 : (c1)) \ | ||
| 400 | : ((((charset) \ | ||
| 401 | - ((charset) < MIN_CHARSET_PRIVATE_DIMENSION2 ? 0x8F : 0xE0)) \ | ||
| 402 | << 14) \ | ||
| 403 | | ((c2) <= 0 ? 0 : ((c2) & 0x7F)) \ | ||
| 404 | | ((c1) <= 0 ? 0 : (((c1) & 0x7F) << 7)))))) | ||
| 405 | |||
| 408 | 406 | ||
| 409 | /* If GENERICP is nonzero, return nonzero iff C is a valid normal or | 407 | /* If GENERICP is nonzero, return nonzero iff C is a valid normal or |
| 410 | generic character. If GENERICP is zero, return nonzero iff C is a | 408 | generic character. If GENERICP is zero, return nonzero iff C is a |
| @@ -419,53 +417,70 @@ extern int width_by_char_head[256]; | |||
| 419 | 417 | ||
| 420 | #define DEFAULT_NONASCII_INSERT_OFFSET 0x800 | 418 | #define DEFAULT_NONASCII_INSERT_OFFSET 0x800 |
| 421 | 419 | ||
| 422 | /* Parse string STR of length LENGTH and check if a multibyte | 420 | /* Parse multibyte string STR of length LENGTH and set BYTES to the |
| 423 | characters is at STR. If so, set BYTES for that character, else | 421 | byte length of a character at STR. */ |
| 424 | set BYTES to 1. */ | 422 | |
| 423 | #ifdef BYTE_COMBINING_DEBUG | ||
| 425 | 424 | ||
| 426 | #define PARSE_MULTIBYTE_SEQ(str, length, bytes) \ | 425 | #define PARSE_MULTIBYTE_SEQ(str, length, bytes) \ |
| 427 | do { \ | 426 | do { \ |
| 428 | int i = 1; \ | 427 | int i = 1; \ |
| 429 | while (i < (length) && ! CHAR_HEAD_P ((str)[i])) i++; \ | 428 | while (i < (length) && ! CHAR_HEAD_P ((str)[i])) i++; \ |
| 430 | if (i == 1) \ | 429 | (bytes) = BYTES_BY_CHAR_HEAD ((str)[0]); \ |
| 431 | (bytes) = 1; \ | 430 | if ((bytes) > i) \ |
| 432 | else \ | 431 | abort (); \ |
| 433 | { \ | ||
| 434 | (bytes) = BYTES_BY_CHAR_HEAD ((str)[0]); \ | ||
| 435 | if ((bytes) > (length)) \ | ||
| 436 | (bytes) = (length); \ | ||
| 437 | } \ | ||
| 438 | } while (0) | 432 | } while (0) |
| 439 | 433 | ||
| 440 | /* The charset of non-ASCII character C is stored in CHARSET, and the | 434 | #else /* not BYTE_COMBINING_DEBUG */ |
| 441 | position-codes of C are stored in C1 and C2. | ||
| 442 | We store -1 in C2 if the character is just 2 bytes. | ||
| 443 | 435 | ||
| 444 | Do not use this macro for an ASCII character. */ | 436 | #define PARSE_MULTIBYTE_SEQ(str, length, bytes) \ |
| 437 | (bytes) = BYTES_BY_CHAR_HEAD ((str)[0]) | ||
| 445 | 438 | ||
| 446 | #define SPLIT_NON_ASCII_CHAR(c, charset, c1, c2) \ | 439 | #endif /* not BYTE_COMBINING_DEBUG */ |
| 447 | ((c) & CHAR_FIELD1_MASK \ | 440 | |
| 448 | ? (charset = (CHAR_FIELD1 (c) \ | 441 | /* Return 1 iff the byte sequence at unibyte string STR (LENGTH bytes) |
| 449 | + ((c) < MIN_CHAR_PRIVATE_DIMENSION2 ? 0x8F : 0xE0)), \ | 442 | is valid as a multibyte form. If valid, by a side effect, BYTES is |
| 450 | c1 = CHAR_FIELD2 (c), \ | 443 | set to the byte length of the multibyte form. */ |
| 451 | c2 = CHAR_FIELD3 (c)) \ | 444 | |
| 452 | : (charset = CHAR_FIELD2 (c) + 0x70, \ | 445 | #define UNIBYTE_STR_AS_MULTIBYTE_P(str, length, bytes) \ |
| 453 | c1 = CHAR_FIELD3 (c), \ | 446 | (((bytes) = BYTES_BY_CHAR_HEAD ((str)[0])) == 1 \ |
| 454 | c2 = -1)) | 447 | || ((str)[0] != LEADING_CODE_8_BIT_CONTROL \ |
| 448 | && (bytes) <= (length) \ | ||
| 449 | && !CHAR_HEAD_P ((str)[1]) \ | ||
| 450 | && ((bytes) == 2 \ | ||
| 451 | || (!CHAR_HEAD_P ((str)[2]) \ | ||
| 452 | && ((bytes) == 3 \ | ||
| 453 | || !CHAR_HEAD_P ((str)[3])))))) | ||
| 454 | |||
| 455 | /* Return 1 iff the byte sequence at multibyte string STR is valid as | ||
| 456 | a unibyte form. By a side effect, BYTES is set to the byte length | ||
| 457 | of one character at STR. */ | ||
| 458 | |||
| 459 | #define MULTIBYTE_STR_AS_UNIBYTE_P(str, bytes) \ | ||
| 460 | ((bytes) = BYTES_BY_CHAR_HEAD ((str)[0]), \ | ||
| 461 | (str)[0] != LEADING_CODE_8_BIT_CONTROL) | ||
| 455 | 462 | ||
| 456 | /* The charset of character C is stored in CHARSET, and the | 463 | /* The charset of character C is stored in CHARSET, and the |
| 457 | position-codes of C are stored in C1 and C2. | 464 | position-codes of C are stored in C1 and C2. |
| 458 | We store -1 in C2 if the dimension of the charset is 1. */ | 465 | We store -1 in C2 if the dimension of the charset is 1. */ |
| 459 | 466 | ||
| 460 | #define SPLIT_CHAR(c, charset, c1, c2) \ | 467 | #define SPLIT_CHAR(c, charset, c1, c2) \ |
| 461 | (SINGLE_BYTE_CHAR_P (c) \ | 468 | (SINGLE_BYTE_CHAR_P (c) \ |
| 462 | ? charset = CHARSET_ASCII, c1 = (c), c2 = -1 \ | 469 | ? ((charset = ASCII_BYTE_P (c) \ |
| 463 | : SPLIT_NON_ASCII_CHAR (c, charset, c1, c2)) | 470 | ? CHARSET_ASCII \ |
| 471 | : (c) < 0xA0 ? CHARSET_8_BIT_CONTROL : CHARSET_8_BIT_GRAPHIC), \ | ||
| 472 | c1 = (c), c2 = -1) \ | ||
| 473 | : ((c) & CHAR_FIELD1_MASK \ | ||
| 474 | ? (charset = (CHAR_FIELD1 (c) \ | ||
| 475 | + ((c) < MIN_CHAR_PRIVATE_DIMENSION2 ? 0x8F : 0xE0)), \ | ||
| 476 | c1 = CHAR_FIELD2 (c), \ | ||
| 477 | c2 = CHAR_FIELD3 (c)) \ | ||
| 478 | : (charset = CHAR_FIELD2 (c) + 0x70, \ | ||
| 479 | c1 = CHAR_FIELD3 (c), \ | ||
| 480 | c2 = -1))) | ||
| 464 | 481 | ||
| 465 | /* Return 1 iff character C has valid printable glyph. */ | 482 | /* Return 1 iff character C has valid printable glyph. */ |
| 466 | #define CHAR_PRINTABLE_P(c) \ | 483 | #define CHAR_PRINTABLE_P(c) (ASCII_BYTE_P (c) || char_printable_p (c)) |
| 467 | (SINGLE_BYTE_CHAR_P (c) \ | ||
| 468 | || char_printable_p (c)) | ||
| 469 | 484 | ||
| 470 | /* The charset of the character at STR is stored in CHARSET, and the | 485 | /* The charset of the character at STR is stored in CHARSET, and the |
| 471 | position-codes are stored in C1 and C2. | 486 | position-codes are stored in C1 and C2. |
| @@ -489,9 +504,10 @@ extern int iso_charset_table[2][2][128]; | |||
| 489 | #define BASE_LEADING_CODE_P(c) (BYTES_BY_CHAR_HEAD ((unsigned char) (c)) > 1) | 504 | #define BASE_LEADING_CODE_P(c) (BYTES_BY_CHAR_HEAD ((unsigned char) (c)) > 1) |
| 490 | 505 | ||
| 491 | /* Return how many bytes C will occupy in a multibyte buffer. */ | 506 | /* Return how many bytes C will occupy in a multibyte buffer. */ |
| 492 | #define CHAR_BYTES(c) \ | 507 | #define CHAR_BYTES(c) \ |
| 493 | ((SINGLE_BYTE_CHAR_P ((c)) || ((c) & ~((1 << CHARACTERBITS) - 1))) \ | 508 | (SINGLE_BYTE_CHAR_P (c) \ |
| 494 | ? 1 : char_bytes (c)) | 509 | ? ((ASCII_BYTE_P (c) || (c) >= 0xA0) ? 1 : 2) \ |
| 510 | : char_bytes (c)) | ||
| 495 | 511 | ||
| 496 | /* The following two macros CHAR_STRING and STRING_CHAR are the main | 512 | /* The following two macros CHAR_STRING and STRING_CHAR are the main |
| 497 | entry points to convert between Emacs two types of character | 513 | entry points to convert between Emacs two types of character |
| @@ -499,14 +515,14 @@ extern int iso_charset_table[2][2][128]; | |||
| 499 | code). */ | 515 | code). */ |
| 500 | 516 | ||
| 501 | /* Store multi-byte form of the character C in STR. The caller should | 517 | /* Store multi-byte form of the character C in STR. The caller should |
| 502 | allocate at least 4-byte area at STR in advance. Returns the | 518 | allocate at least MAX_MULTIBYTE_LENGTH bytes area at STR in |
| 503 | length of the multi-byte form. If C is an invalid character code, | 519 | advance. Returns the length of the multi-byte form. If C is an |
| 504 | signal an error. */ | 520 | invalid character code, signal an error. */ |
| 505 | 521 | ||
| 506 | #define CHAR_STRING(c, str) \ | 522 | #define CHAR_STRING(c, str) \ |
| 507 | (SINGLE_BYTE_CHAR_P (c) \ | 523 | (ASCII_BYTE_P (c) \ |
| 508 | ? *(str) = (unsigned char)(c), 1 \ | 524 | ? (*(str) = (unsigned char)(c), 1) \ |
| 509 | : char_to_string (c, (unsigned char *)str)) | 525 | : char_to_string (c, (unsigned char *) str)) |
| 510 | 526 | ||
| 511 | /* Return a character code of the character of which multi-byte form | 527 | /* Return a character code of the character of which multi-byte form |
| 512 | is at STR and the length is LEN. If STR doesn't contain valid | 528 | is at STR and the length is LEN. If STR doesn't contain valid |
| @@ -526,15 +542,34 @@ extern int iso_charset_table[2][2][128]; | |||
| 526 | ? ((actual_len) = 1), (unsigned char) *(str) \ | 542 | ? ((actual_len) = 1), (unsigned char) *(str) \ |
| 527 | : string_to_char (str, len, &(actual_len))) | 543 | : string_to_char (str, len, &(actual_len))) |
| 528 | 544 | ||
| 529 | /* Fetch the "next" multibyte character from Lisp string STRING | 545 | /* Fetch the "next" character from Lisp string STRING at byte position |
| 530 | at byte position BYTEIDX, character position CHARIDX. | 546 | BYTEIDX, character position CHARIDX. Store it into OUTPUT. |
| 531 | Store it into OUTPUT. | ||
| 532 | 547 | ||
| 533 | All the args must be side-effect-free. | 548 | All the args must be side-effect-free. |
| 534 | BYTEIDX and CHARIDX must be lvalues; | 549 | BYTEIDX and CHARIDX must be lvalues; |
| 535 | we increment them past the character fetched. */ | 550 | we increment them past the character fetched. */ |
| 536 | 551 | ||
| 537 | #define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \ | 552 | #define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \ |
| 553 | if (1) \ | ||
| 554 | { \ | ||
| 555 | CHARIDX++; \ | ||
| 556 | if (STRING_MULTIBYTE (STRING)) \ | ||
| 557 | { \ | ||
| 558 | unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \ | ||
| 559 | int space_left = XSTRING (STRING)->size_byte - BYTEIDX; \ | ||
| 560 | int actual_len; \ | ||
| 561 | \ | ||
| 562 | OUTPUT = STRING_CHAR_AND_LENGTH (ptr, space_left, actual_len); \ | ||
| 563 | BYTEIDX += actual_len; \ | ||
| 564 | } \ | ||
| 565 | else \ | ||
| 566 | OUTPUT = XSTRING (STRING)->data[BYTEIDX++]; \ | ||
| 567 | } \ | ||
| 568 | else | ||
| 569 | |||
| 570 | /* Like FETCH_STRING_CHAR_ADVANCE but assume STRING is multibyte. */ | ||
| 571 | |||
| 572 | #define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \ | ||
| 538 | if (1) \ | 573 | if (1) \ |
| 539 | { \ | 574 | { \ |
| 540 | unsigned char *fetch_string_char_ptr = &XSTRING (STRING)->data[BYTEIDX]; \ | 575 | unsigned char *fetch_string_char_ptr = &XSTRING (STRING)->data[BYTEIDX]; \ |
| @@ -550,23 +585,27 @@ if (1) \ | |||
| 550 | } \ | 585 | } \ |
| 551 | else | 586 | else |
| 552 | 587 | ||
| 553 | /* Like FETCH_STRING_CHAR_SPACE_LEFT but fetch character from the | 588 | /* Like FETCH_STRING_CHAR_ADVANCE but fetch character from the current |
| 554 | current buffer. */ | 589 | buffer. */ |
| 555 | 590 | ||
| 556 | #define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX) \ | 591 | #define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX) \ |
| 557 | if (1) \ | 592 | if (1) \ |
| 558 | { \ | 593 | { \ |
| 559 | unsigned char *fetch_buf_char_ptr = BYTE_POS_ADDR (BYTEIDX); \ | ||
| 560 | int fetch_buf_char_space_left = ((CHARIDX < GPT ? GPT_BYTE : Z_BYTE) \ | ||
| 561 | - BYTEIDX); \ | ||
| 562 | int actual_len; \ | ||
| 563 | \ | ||
| 564 | OUTPUT \ | ||
| 565 | = STRING_CHAR_AND_LENGTH (fetch_buf_char_ptr, \ | ||
| 566 | fetch_buf_char_space_left, actual_len); \ | ||
| 567 | \ | ||
| 568 | BYTEIDX += actual_len; \ | ||
| 569 | CHARIDX++; \ | 594 | CHARIDX++; \ |
| 595 | if (!NILP (current_buffer->enable_multibyte_characters)) \ | ||
| 596 | { \ | ||
| 597 | unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \ | ||
| 598 | int space_left = ((CHARIDX < GPT ? GPT_BYTE : Z_BYTE) - BYTEIDX); \ | ||
| 599 | int actual_len; \ | ||
| 600 | \ | ||
| 601 | OUTPUT= STRING_CHAR_AND_LENGTH (ptr, space_left, actual_len); \ | ||
| 602 | BYTEIDX += actual_len; \ | ||
| 603 | } \ | ||
| 604 | else \ | ||
| 605 | { \ | ||
| 606 | OUTPUT = *(BYTE_POS_ADDR (BYTEIDX)); \ | ||
| 607 | BYTEIDX++; \ | ||
| 608 | } \ | ||
| 570 | } \ | 609 | } \ |
| 571 | else | 610 | else |
| 572 | 611 | ||
| @@ -583,6 +622,9 @@ else | |||
| 583 | the next character boundary. This macro relies on the fact that | 622 | the next character boundary. This macro relies on the fact that |
| 584 | *GPT_ADDR and *Z_ADDR are always accessible and the values are | 623 | *GPT_ADDR and *Z_ADDR are always accessible and the values are |
| 585 | '\0'. No range checking of POS. */ | 624 | '\0'. No range checking of POS. */ |
| 625 | |||
| 626 | #ifdef BYTE_COMBINING_DEBUG | ||
| 627 | |||
| 586 | #define INC_POS(pos_byte) \ | 628 | #define INC_POS(pos_byte) \ |
| 587 | do { \ | 629 | do { \ |
| 588 | unsigned char *p = BYTE_POS_ADDR (pos_byte); \ | 630 | unsigned char *p = BYTE_POS_ADDR (pos_byte); \ |
| @@ -597,6 +639,16 @@ else | |||
| 597 | pos_byte++; \ | 639 | pos_byte++; \ |
| 598 | } while (0) | 640 | } while (0) |
| 599 | 641 | ||
| 642 | #else /* not BYTE_COMBINING_DEBUG */ | ||
| 643 | |||
| 644 | #define INC_POS(pos_byte) \ | ||
| 645 | do { \ | ||
| 646 | unsigned char *p = BYTE_POS_ADDR (pos_byte); \ | ||
| 647 | pos_byte += BYTES_BY_CHAR_HEAD (*p); \ | ||
| 648 | } while (0) | ||
| 649 | |||
| 650 | #endif /* not BYTE_COMBINING_DEBUG */ | ||
| 651 | |||
| 600 | /* Decrease the buffer byte position POS_BYTE of the current buffer to | 652 | /* Decrease the buffer byte position POS_BYTE of the current buffer to |
| 601 | the previous character boundary. No range checking of POS. */ | 653 | the previous character boundary. No range checking of POS. */ |
| 602 | #define DEC_POS(pos_byte) \ | 654 | #define DEC_POS(pos_byte) \ |
| @@ -650,6 +702,9 @@ while (0) | |||
| 650 | the next character boundary. This macro relies on the fact that | 702 | the next character boundary. This macro relies on the fact that |
| 651 | *GPT_ADDR and *Z_ADDR are always accessible and the values are | 703 | *GPT_ADDR and *Z_ADDR are always accessible and the values are |
| 652 | '\0'. No range checking of POS_BYTE. */ | 704 | '\0'. No range checking of POS_BYTE. */ |
| 705 | |||
| 706 | #ifdef BYTE_COMBINING_DEBUG | ||
| 707 | |||
| 653 | #define BUF_INC_POS(buf, pos_byte) \ | 708 | #define BUF_INC_POS(buf, pos_byte) \ |
| 654 | do { \ | 709 | do { \ |
| 655 | unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \ | 710 | unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \ |
| @@ -664,6 +719,16 @@ while (0) | |||
| 664 | pos_byte++; \ | 719 | pos_byte++; \ |
| 665 | } while (0) | 720 | } while (0) |
| 666 | 721 | ||
| 722 | #else /* not BYTE_COMBINING_DEBUG */ | ||
| 723 | |||
| 724 | #define BUF_INC_POS(buf, pos_byte) \ | ||
| 725 | do { \ | ||
| 726 | unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \ | ||
| 727 | pos_byte += BYTES_BY_CHAR_HEAD (*p); \ | ||
| 728 | } while (0) | ||
| 729 | |||
| 730 | #endif /* not BYTE_COMBINING_DEBUG */ | ||
| 731 | |||
| 667 | /* Decrease the buffer byte position POS_BYTE of the current buffer to | 732 | /* Decrease the buffer byte position POS_BYTE of the current buffer to |
| 668 | the previous character boundary. No range checking of POS_BYTE. */ | 733 | the previous character boundary. No range checking of POS_BYTE. */ |
| 669 | #define BUF_DEC_POS(buf, pos_byte) \ | 734 | #define BUF_DEC_POS(buf, pos_byte) \ |
| @@ -706,9 +771,13 @@ extern int char_to_string P_ ((int, unsigned char *)); | |||
| 706 | extern int string_to_char P_ ((const unsigned char *, int, int *)); | 771 | extern int string_to_char P_ ((const unsigned char *, int, int *)); |
| 707 | extern int char_printable_p P_ ((int c)); | 772 | extern int char_printable_p P_ ((int c)); |
| 708 | extern int multibyte_form_length P_ ((const unsigned char *, int)); | 773 | extern int multibyte_form_length P_ ((const unsigned char *, int)); |
| 774 | extern void parse_str_as_multibyte P_ ((unsigned char *, int, int *, int *)); | ||
| 775 | extern int str_as_multibyte P_ ((unsigned char *, int, int, int *)); | ||
| 776 | extern int str_to_multibyte P_ ((unsigned char *, int, int)); | ||
| 777 | extern int str_as_unibyte P_ ((unsigned char *, int)); | ||
| 709 | extern int get_charset_id P_ ((Lisp_Object)); | 778 | extern int get_charset_id P_ ((Lisp_Object)); |
| 710 | extern int find_charset_in_str P_ ((unsigned char *, int, int *, | 779 | extern int find_charset_in_text P_ ((unsigned char *, int, int, int *, |
| 711 | Lisp_Object, int)); | 780 | Lisp_Object)); |
| 712 | extern int strwidth P_ ((unsigned char *, int)); | 781 | extern int strwidth P_ ((unsigned char *, int)); |
| 713 | extern int char_bytes P_ ((int)); | 782 | extern int char_bytes P_ ((int)); |
| 714 | extern int char_valid_p P_ ((int, int)); | 783 | extern int char_valid_p P_ ((int, int)); |
| @@ -724,17 +793,14 @@ extern Lisp_Object Vauto_fill_chars; | |||
| 724 | 793 | ||
| 725 | /* Copy LEN bytes from FROM to TO. This macro should be used only | 794 | /* Copy LEN bytes from FROM to TO. This macro should be used only |
| 726 | when a caller knows that LEN is short and the obvious copy loop is | 795 | when a caller knows that LEN is short and the obvious copy loop is |
| 727 | faster than calling bcopy which has some overhead. */ | 796 | faster than calling bcopy which has some overhead. Copying a |
| 797 | multibyte sequence of a multibyte character is the typical case. */ | ||
| 728 | 798 | ||
| 729 | #define BCOPY_SHORT(from, to, len) \ | 799 | #define BCOPY_SHORT(from, to, len) \ |
| 730 | do { \ | 800 | do { \ |
| 731 | int i = len; \ | 801 | int i = len; \ |
| 732 | unsigned char *from_p = from, *to_p = to; \ | 802 | unsigned char *from_p = from, *to_p = to; \ |
| 733 | while (i--) *from_p++ = *to_p++; \ | 803 | while (i--) *to_p++ = *from_p++; \ |
| 734 | } while (0) | 804 | } while (0) |
| 735 | 805 | ||
| 736 | /* Length of C in bytes. */ | ||
| 737 | |||
| 738 | #define CHAR_LEN(C) CHARSET_BYTES (CHAR_CHARSET ((C))) | ||
| 739 | |||
| 740 | #endif /* _CHARSET_H */ | 806 | #endif /* _CHARSET_H */ |