diff options
| author | Kenichi Handa | 2000-05-19 23:52:27 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2000-05-19 23:52:27 +0000 |
| commit | 2e344af3e731463b6733239d0c9520645072ce11 (patch) | |
| tree | 93361dd3c2f08fe274ebc46d83a5a48fd86b5272 /src | |
| parent | bd045987c20414ae1de41518d39dcb34e652b696 (diff) | |
| download | emacs-2e344af3e731463b6733239d0c9520645072ce11.tar.gz emacs-2e344af3e731463b6733239d0c9520645072ce11.zip | |
(Qeight_bit_control, Qeight_bit_graphic): New
variables.
(SPLIT_CHARACTER_SEQ): This macro deleted.
(SPLIT_MULTIBYTE_SEQ): Assume that multibyte sequence at STR is
valid.
(CHAR_COMPONENTS_VALID_P): Handle new charsets; eight-bit-control
and eight-bit-graphic.
(char_to_string): Likewise. Signal an error for too large
character code.
(char_printable_p): Return 0 for 8-bit characters.
(update_charset_table): Update iso_charset_table only when a final
character is non-negative.
(find_charset_in_text): Renamed from find_charset_in_str.
Arguments and return value changed. Callers changed.
(Fdefine_charset): Args ISO-FINAL-CHAR and ISO-GRAPHIC-PLANE can
be -1 if CHARSET is used only internally.
(Fmake_char_internal): Handle new charsets; eight-bit-control and
eight-bit-graphic.
(Fcharset_after): Simplified.
(char_valid_p): Use SPLIT_CHAR, not SPLIT_NON_ASCII_CHAR.
(char_bytes): Return 2 for chars of the range 0xA0..0xFF.
(multibyte_chars_in_text): Simplified by assuming there's no
invalid multibyte sequence.
(parse_str_as_multibyte, str_as_multibyte, str_to_multibyte,
str_as_unibyte): New functions.
(Fstring): Simpified by assuming that byte combining never
happens.
(init_charset_once): Initialization for
LEADING_CODE_8_BIT_CONTROL.
(syms_of_charset): Intern and staticpro Qeight_bit_control and
Qeight_bit_graphic. Include them in Vcharset_list. Make charsets
eight-bit-control and eight-bit-graphic.
Diffstat (limited to 'src')
| -rw-r--r-- | src/charset.c | 595 |
1 files changed, 370 insertions, 225 deletions
diff --git a/src/charset.c b/src/charset.c index cea0c57207c..bf9ce66d927 100644 --- a/src/charset.c +++ b/src/charset.c | |||
| @@ -43,7 +43,7 @@ Boston, MA 02111-1307, USA. */ | |||
| 43 | 43 | ||
| 44 | #endif /* emacs */ | 44 | #endif /* emacs */ |
| 45 | 45 | ||
| 46 | Lisp_Object Qcharset, Qascii; | 46 | Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic; |
| 47 | Lisp_Object Qunknown; | 47 | Lisp_Object Qunknown; |
| 48 | 48 | ||
| 49 | /* Declaration of special leading-codes. */ | 49 | /* Declaration of special leading-codes. */ |
| @@ -52,8 +52,8 @@ int leading_code_private_12; /* for private DIMENSION1 of 2-column */ | |||
| 52 | int leading_code_private_21; /* for private DIMENSION2 of 1-column */ | 52 | int leading_code_private_21; /* for private DIMENSION2 of 1-column */ |
| 53 | int leading_code_private_22; /* for private DIMENSION2 of 2-column */ | 53 | int leading_code_private_22; /* for private DIMENSION2 of 2-column */ |
| 54 | 54 | ||
| 55 | /* Declaration of special charsets. */ | 55 | /* Declaration of special charsets. The values are set by |
| 56 | int charset_ascii; /* ASCII */ | 56 | Fsetup_special_charsets. */ |
| 57 | int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */ | 57 | int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */ |
| 58 | int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */ | 58 | int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */ |
| 59 | int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */ | 59 | int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */ |
| @@ -115,63 +115,46 @@ invalid_character (c) | |||
| 115 | error ("Invalid character: 0%o, %d, 0x%x", c, c, c); | 115 | error ("Invalid character: 0%o, %d, 0x%x", c, c, c); |
| 116 | } | 116 | } |
| 117 | 117 | ||
| 118 | /* Parse a multibyte character string STR of length LENGTH (>= 2) set | 118 | /* Parse string STR of length LENGTH and fetch information of a |
| 119 | BYTES to the length of actual multibyte sequence, CHARSET, C1, and | 119 | character at STR. Set BYTES to the byte length the character |
| 120 | C2 to such values that MAKE_CHAR can make the multibyte character | 120 | occupies, CHARSET, C1, C2 to proper values of the character. */ |
| 121 | from them. | 121 | |
| 122 | 122 | #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2) \ | |
| 123 | It is assumed that *STR is one of base leading codes and the | 123 | do { \ |
| 124 | following (LENGTH - 1) bytes satisfy !CHAR_HEAD_P. | 124 | (c1) = *(str); \ |
| 125 | 125 | (bytes) = BYTES_BY_CHAR_HEAD (c1); \ | |
| 126 | This macro should be called only from SPLIT_MULTIBYTE_SEQ. */ | 126 | if ((bytes) == 1) \ |
| 127 | 127 | (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \ | |
| 128 | #define SPLIT_CHARACTER_SEQ(str, length, bytes, charset, c1, c2) \ | 128 | else if ((bytes) == 2) \ |
| 129 | do { \ | 129 | { \ |
| 130 | (bytes) = 1; \ | 130 | if ((c1) == LEADING_CODE_8_BIT_CONTROL) \ |
| 131 | (charset) = (str)[0]; \ | 131 | (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20; \ |
| 132 | if ((charset) >= LEADING_CODE_PRIVATE_11 \ | 132 | else \ |
| 133 | && (charset) <= LEADING_CODE_PRIVATE_22) \ | 133 | (charset) = (c1), (c1) = (str)[1] & 0x7F; \ |
| 134 | (charset) = (str)[(bytes)++]; \ | 134 | } \ |
| 135 | if ((bytes) < (length)) \ | 135 | else if ((bytes) == 3) \ |
| 136 | { \ | 136 | { \ |
| 137 | (c1) = (str)[(bytes)++] & 0x7F; \ | 137 | if ((c1) < LEADING_CODE_PRIVATE_11) \ |
| 138 | if ((bytes) < (length)) \ | 138 | (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F; \ |
| 139 | (c2) = (str)[(bytes)++] & 0x7F; \ | 139 | else \ |
| 140 | else \ | 140 | (charset) = (str)[1], (c1) = (str)[2] & 0x7F; \ |
| 141 | (c2) = -1; \ | 141 | } \ |
| 142 | } \ | 142 | else \ |
| 143 | else \ | 143 | (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F; \ |
| 144 | (c1) = (c2) = -1; \ | ||
| 145 | } while (0) | ||
| 146 | |||
| 147 | /* Parse string STR of length LENGTH and check if a multibyte | ||
| 148 | characters is at STR. Set BYTES to the actual length, CHARSET, C1, | ||
| 149 | C2 to proper values for that character. */ | ||
| 150 | |||
| 151 | #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2) \ | ||
| 152 | do { \ | ||
| 153 | int i; \ | ||
| 154 | if (ASCII_BYTE_P ((str)[0])) \ | ||
| 155 | i = 1; \ | ||
| 156 | else \ | ||
| 157 | for (i = 1; i < (length) && ! CHAR_HEAD_P ((str)[i]); i++); \ | ||
| 158 | if (i == 1) \ | ||
| 159 | (bytes) = 1, (charset) = CHARSET_ASCII, (c1) = (str)[0] ; \ | ||
| 160 | else \ | ||
| 161 | { \ | ||
| 162 | if (i > BYTES_BY_CHAR_HEAD ((str)[0])) \ | ||
| 163 | i = BYTES_BY_CHAR_HEAD ((str)[0]); \ | ||
| 164 | SPLIT_CHARACTER_SEQ (str, i, bytes, charset, c1, c2); \ | ||
| 165 | } \ | ||
| 166 | } while (0) | 144 | } while (0) |
| 167 | 145 | ||
| 168 | /* 1 if CHARSET, C1, and C2 compose a valid character, else 0. */ | 146 | /* 1 if CHARSET, C1, and C2 compose a valid character, else 0. */ |
| 169 | #define CHAR_COMPONENTS_VALID_P(charset, c1, c2) \ | 147 | #define CHAR_COMPONENTS_VALID_P(charset, c1, c2) \ |
| 170 | (charset == CHARSET_ASCII \ | 148 | ((charset) == CHARSET_ASCII \ |
| 171 | ? ((c1) >= 0 && (c1) <= 0x7F) \ | 149 | ? ((c1) >= 0 && (c1) <= 0x7F) \ |
| 172 | : (CHARSET_DIMENSION (charset) == 1 \ | 150 | : ((charset) == CHARSET_8_BIT_CONTROL \ |
| 173 | ? ((c1) >= 0x20 && (c1) <= 0x7F) \ | 151 | ? ((c1) >= 0x80 && (c1) <= 0x9F) \ |
| 174 | : ((c1) >= 0x20 && (c1) <= 0x7F && (c2) >= 0x20 && (c2) <= 0x7F))) | 152 | : ((charset) == CHARSET_8_BIT_GRAPHIC \ |
| 153 | ? ((c1) >= 0x80 && (c1) <= 0xFF) \ | ||
| 154 | : (CHARSET_DIMENSION (charset) == 1 \ | ||
| 155 | ? ((c1) >= 0x20 && (c1) <= 0x7F) \ | ||
| 156 | : ((c1) >= 0x20 && (c1) <= 0x7F \ | ||
| 157 | && (c2) >= 0x20 && (c2) <= 0x7F))))) | ||
| 175 | 158 | ||
| 176 | /* Store multi-byte form of the character C in STR. The caller should | 159 | /* Store multi-byte form of the character C in STR. The caller should |
| 177 | allocate at least 4-byte area at STR in advance. Returns the | 160 | allocate at least 4-byte area at STR in advance. Returns the |
| @@ -227,14 +210,22 @@ char_to_string (c, str) | |||
| 227 | /* If C still has any modifier bits, it is an invalid character. */ | 210 | /* If C still has any modifier bits, it is an invalid character. */ |
| 228 | if (c & CHAR_MODIFIER_MASK) | 211 | if (c & CHAR_MODIFIER_MASK) |
| 229 | invalid_character (c); | 212 | invalid_character (c); |
| 230 | 213 | } | |
| 231 | *p++ = c; | 214 | if (SINGLE_BYTE_CHAR_P (c)) |
| 215 | { | ||
| 216 | if (ASCII_BYTE_P (c) || c >= 0xA0) | ||
| 217 | *p++ = c; | ||
| 218 | else | ||
| 219 | { | ||
| 220 | *p++ = LEADING_CODE_8_BIT_CONTROL; | ||
| 221 | *p++ = c + 0x20; | ||
| 222 | } | ||
| 232 | } | 223 | } |
| 233 | else if (c < MAX_CHAR) | 224 | else if (c < MAX_CHAR) |
| 234 | { | 225 | { |
| 235 | int charset, c1, c2; | 226 | int charset, c1, c2; |
| 236 | 227 | ||
| 237 | SPLIT_NON_ASCII_CHAR (c, charset, c1, c2); | 228 | SPLIT_CHAR (c, charset, c1, c2); |
| 238 | 229 | ||
| 239 | if (charset >= LEADING_CODE_EXT_11) | 230 | if (charset >= LEADING_CODE_EXT_11) |
| 240 | *p++ = (charset < LEADING_CODE_EXT_12 | 231 | *p++ = (charset < LEADING_CODE_EXT_12 |
| @@ -254,8 +245,10 @@ char_to_string (c, str) | |||
| 254 | *p++ = c2 | 0x80; | 245 | *p++ = c2 | 0x80; |
| 255 | } | 246 | } |
| 256 | } | 247 | } |
| 248 | else | ||
| 249 | invalid_character (c); | ||
| 257 | 250 | ||
| 258 | return (p -str); | 251 | return (p - str); |
| 259 | } | 252 | } |
| 260 | 253 | ||
| 261 | /* Return the non-ASCII character corresponding to multi-byte form at | 254 | /* Return the non-ASCII character corresponding to multi-byte form at |
| @@ -324,12 +317,14 @@ char_printable_p (c) | |||
| 324 | { | 317 | { |
| 325 | int charset, c1, c2, chars; | 318 | int charset, c1, c2, chars; |
| 326 | 319 | ||
| 327 | if (SINGLE_BYTE_CHAR_P (c)) | 320 | if (ASCII_BYTE_P (c)) |
| 328 | return 1; | 321 | return 1; |
| 329 | if (c >= MAX_CHAR) | 322 | else if (SINGLE_BYTE_CHAR_P (c)) |
| 323 | return 0; | ||
| 324 | else if (c >= MAX_CHAR) | ||
| 330 | return 0; | 325 | return 0; |
| 331 | 326 | ||
| 332 | SPLIT_NON_ASCII_CHAR (c, charset, c1, c2); | 327 | SPLIT_CHAR (c, charset, c1, c2); |
| 333 | if (! CHARSET_DEFINED_P (charset)) | 328 | if (! CHARSET_DEFINED_P (charset)) |
| 334 | return 0; | 329 | return 0; |
| 335 | if (CHARSET_CHARS (charset) == 94 | 330 | if (CHARSET_CHARS (charset) == 94 |
| @@ -479,7 +474,7 @@ update_charset_table (charset_id, dimension, chars, width, direction, | |||
| 479 | if (charset < MIN_CHARSET_PRIVATE_DIMENSION1) | 474 | if (charset < MIN_CHARSET_PRIVATE_DIMENSION1) |
| 480 | { | 475 | { |
| 481 | /* Official charset, it doesn't have an extended leading-code. */ | 476 | /* Official charset, it doesn't have an extended leading-code. */ |
| 482 | if (charset != CHARSET_ASCII) | 477 | if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC) |
| 483 | bytes += 1; /* For a base leading-code. */ | 478 | bytes += 1; /* For a base leading-code. */ |
| 484 | leading_code_base = charset; | 479 | leading_code_base = charset; |
| 485 | leading_code_ext = 0; | 480 | leading_code_ext = 0; |
| @@ -499,7 +494,8 @@ update_charset_table (charset_id, dimension, chars, width, direction, | |||
| 499 | leading_code_ext = charset; | 494 | leading_code_ext = charset; |
| 500 | } | 495 | } |
| 501 | 496 | ||
| 502 | if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes) | 497 | if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC |
| 498 | &&BYTES_BY_CHAR_HEAD (leading_code_base) != bytes) | ||
| 503 | error ("Invalid dimension for the charset-ID %d", charset); | 499 | error ("Invalid dimension for the charset-ID %d", charset); |
| 504 | 500 | ||
| 505 | CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id; | 501 | CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id; |
| @@ -562,7 +558,8 @@ update_charset_table (charset_id, dimension, chars, width, direction, | |||
| 562 | } | 558 | } |
| 563 | 559 | ||
| 564 | /* Update table iso_charset_table. */ | 560 | /* Update table iso_charset_table. */ |
| 565 | if (ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0) | 561 | if (iso_final_char >= 0 |
| 562 | && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0) | ||
| 566 | ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset; | 563 | ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset; |
| 567 | } | 564 | } |
| 568 | 565 | ||
| @@ -633,10 +630,12 @@ render from right to left.\n\ | |||
| 633 | \n\ | 630 | \n\ |
| 634 | ISO-FINAL-CHAR (character) is the final character of the\n\ | 631 | ISO-FINAL-CHAR (character) is the final character of the\n\ |
| 635 | corresponding ISO 2022 charset.\n\ | 632 | corresponding ISO 2022 charset.\n\ |
| 633 | It may be -1 if the charset is internal use only.\n\ | ||
| 636 | \n\ | 634 | \n\ |
| 637 | ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\ | 635 | ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\ |
| 638 | while encoding to variants of ISO 2022 coding system, one of the\n\ | 636 | while encoding to variants of ISO 2022 coding system, one of the\n\ |
| 639 | following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\ | 637 | following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\ |
| 638 | It may be -1 if the charset is internal use only.\n\ | ||
| 640 | \n\ | 639 | \n\ |
| 641 | SHORT-NAME (string) is the short name to refer to the charset.\n\ | 640 | SHORT-NAME (string) is the short name to refer to the charset.\n\ |
| 642 | \n\ | 641 | \n\ |
| @@ -667,8 +666,10 @@ DESCRIPTION (string) is the description string of the charset.") | |||
| 667 | || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96) | 666 | || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96) |
| 668 | || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2) | 667 | || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2) |
| 669 | || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1) | 668 | || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1) |
| 670 | || !INTEGERP (vec[4]) || !(XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~') | 669 | || !INTEGERP (vec[4]) |
| 671 | || !INTEGERP (vec[5]) || !(XINT (vec[5]) == 0 || XINT (vec[5]) == 1) | 670 | || !(XINT (vec[4]) == -1 || XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~') |
| 671 | || !INTEGERP (vec[5]) | ||
| 672 | || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1) | ||
| 672 | || !STRINGP (vec[6]) | 673 | || !STRINGP (vec[6]) |
| 673 | || !STRINGP (vec[7]) | 674 | || !STRINGP (vec[7]) |
| 674 | || !STRINGP (vec[8])) | 675 | || !STRINGP (vec[8])) |
| @@ -757,69 +758,84 @@ CHARSET should be defined by `defined-charset' in advance.") | |||
| 757 | return Qnil; | 758 | return Qnil; |
| 758 | } | 759 | } |
| 759 | 760 | ||
| 760 | /* Return number of different charsets in STR of length LEN. In | 761 | /* Return information about charsets in the text at PTR of NBYTES |
| 761 | addition, for each found charset N, CHARSETS[N] is set 1. The | 762 | bytes, which are NCHARS characters. The value is: |
| 762 | caller should allocate CHARSETS (MAX_CHARSET + 1 elements) in advance. | 763 | 0: No multibyte characters (including 8-bit code of range 0x80..0x9F) |
| 763 | It may lookup a translation table TABLE if supplied. | 764 | are found. |
| 765 | 1: No charsets other than ascii eight-bit-control, | ||
| 766 | eight-bit-graphic, and latin-1 are found. | ||
| 767 | 2: Otherwise. | ||
| 764 | 768 | ||
| 765 | If MULTIBYTE is zero, do not check multibyte characters, i.e. if | 769 | In addition, if CHARSETS is nonzero, for each found charset N, set |
| 766 | any ASCII codes (7-bit) are found, CHARSET[0] is set to 1, if any | 770 | CHARSETS[N] to 1. For that, callers should allocate CHARSETS |
| 767 | 8-bit codes are found CHARSET[1] is set to 1. */ | 771 | (MAX_CHARSET + 1 elements) in advance. It may lookup a translation |
| 772 | table TABLE if supplied. For invalid charsets, set CHARSETS[1] to | ||
| 773 | 1 (note that there's no charset whose ID is 1). */ | ||
| 768 | 774 | ||
| 769 | int | 775 | int |
| 770 | find_charset_in_str (str, len, charsets, table, multibyte) | 776 | find_charset_in_text (ptr, nchars, nbytes, charsets, table) |
| 771 | unsigned char *str; | 777 | unsigned char *ptr; |
| 772 | int len, *charsets; | 778 | int nchars, nbytes, *charsets; |
| 773 | Lisp_Object table; | 779 | Lisp_Object table; |
| 774 | int multibyte; | ||
| 775 | { | 780 | { |
| 776 | register int num = 0; | 781 | if (nchars == nbytes) |
| 777 | |||
| 778 | if (! multibyte) | ||
| 779 | { | 782 | { |
| 780 | unsigned char *endp = str + len; | 783 | if (charsets && nbytes > 0) |
| 781 | int maskbits = 0; | ||
| 782 | |||
| 783 | while (str < endp && maskbits != 3) | ||
| 784 | maskbits |= (*str++ < 0x80 ? 1 : 2); | ||
| 785 | if (maskbits & 1) | ||
| 786 | { | ||
| 787 | charsets[0] = 1; | ||
| 788 | num++; | ||
| 789 | } | ||
| 790 | if (maskbits & 2) | ||
| 791 | { | 784 | { |
| 792 | charsets[1] = 1; | 785 | unsigned char *endp = ptr + nbytes; |
| 793 | num++; | 786 | int maskbits = 0; |
| 787 | |||
| 788 | while (ptr < endp && maskbits != 7) | ||
| 789 | { | ||
| 790 | maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4); | ||
| 791 | ptr++; | ||
| 792 | } | ||
| 793 | |||
| 794 | if (maskbits & 1) | ||
| 795 | charsets[CHARSET_ASCII] = 1; | ||
| 796 | if (maskbits & 2) | ||
| 797 | charsets[CHARSET_8_BIT_CONTROL] = 1; | ||
| 798 | if (maskbits & 4) | ||
| 799 | charsets[CHARSET_8_BIT_GRAPHIC] = 1; | ||
| 794 | } | 800 | } |
| 795 | return num; | 801 | return 0; |
| 796 | } | 802 | } |
| 797 | 803 | else | |
| 798 | if (! CHAR_TABLE_P (table)) | ||
| 799 | table = Qnil; | ||
| 800 | |||
| 801 | while (len > 0) | ||
| 802 | { | 804 | { |
| 805 | int return_val = 1; | ||
| 803 | int bytes, charset, c1, c2; | 806 | int bytes, charset, c1, c2; |
| 804 | 807 | ||
| 805 | SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2); | 808 | if (! CHAR_TABLE_P (table)) |
| 809 | table = Qnil; | ||
| 806 | 810 | ||
| 807 | if (! NILP (table)) | 811 | while (nchars-- > 0) |
| 808 | { | 812 | { |
| 809 | int c1 = translate_char (table, -1, charset, c1, c2); | 813 | SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2); |
| 810 | if (c1 >= 0) | 814 | ptr += bytes; |
| 811 | charset = CHAR_CHARSET (c1); | ||
| 812 | } | ||
| 813 | 815 | ||
| 814 | if (!charsets[charset]) | 816 | if (!CHARSET_DEFINED_P (charset)) |
| 815 | { | 817 | charset = 1; |
| 816 | charsets[charset] = 1; | 818 | else if (! NILP (table)) |
| 817 | num += 1; | 819 | { |
| 820 | int c = translate_char (table, -1, charset, c1, c2); | ||
| 821 | if (c >= 0) | ||
| 822 | charset = CHAR_CHARSET (c); | ||
| 823 | } | ||
| 824 | |||
| 825 | if (return_val == 1 | ||
| 826 | && charset != CHARSET_ASCII | ||
| 827 | && charset != CHARSET_8_BIT_CONTROL | ||
| 828 | && charset != CHARSET_8_BIT_GRAPHIC | ||
| 829 | && charset != charset_latin_iso8859_1) | ||
| 830 | return_val = 2; | ||
| 831 | |||
| 832 | if (charsets) | ||
| 833 | charsets[charset] = 1; | ||
| 834 | else if (return_val == 2) | ||
| 835 | break; | ||
| 818 | } | 836 | } |
| 819 | str += bytes; | 837 | return return_val; |
| 820 | len -= bytes; | ||
| 821 | } | 838 | } |
| 822 | return num; | ||
| 823 | } | 839 | } |
| 824 | 840 | ||
| 825 | DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region, | 841 | DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region, |
| @@ -831,17 +847,14 @@ Optional arg TABLE if non-nil is a translation table to look up.\n\ | |||
| 831 | If the region contains invalid multiybte characters,\n\ | 847 | If the region contains invalid multiybte characters,\n\ |
| 832 | `unknown' is included in the returned list.\n\ | 848 | `unknown' is included in the returned list.\n\ |
| 833 | \n\ | 849 | \n\ |
| 834 | If the current buffer is unibyte, the returned list contains\n\ | 850 | If the current buffer is unibyte, the returned list may contain\n\ |
| 835 | `ascii' if any 7-bit characters are found,\n\ | 851 | only `ascii', `eight-bit-control', and `eight-bit-graphic'.") |
| 836 | and `unknown' if any 8-bit characters are found.") | ||
| 837 | (beg, end, table) | 852 | (beg, end, table) |
| 838 | Lisp_Object beg, end, table; | 853 | Lisp_Object beg, end, table; |
| 839 | { | 854 | { |
| 840 | int charsets[MAX_CHARSET + 1]; | 855 | int charsets[MAX_CHARSET + 1]; |
| 841 | int from, from_byte, to, stop, stop_byte, i; | 856 | int from, from_byte, to, stop, stop_byte, i; |
| 842 | Lisp_Object val; | 857 | Lisp_Object val; |
| 843 | int undefined; | ||
| 844 | int multibyte = !NILP (current_buffer->enable_multibyte_characters); | ||
| 845 | 858 | ||
| 846 | validate_region (&beg, &end); | 859 | validate_region (&beg, &end); |
| 847 | from = XFASTINT (beg); | 860 | from = XFASTINT (beg); |
| @@ -860,8 +873,8 @@ and `unknown' if any 8-bit characters are found.") | |||
| 860 | bzero (charsets, (MAX_CHARSET + 1) * sizeof (int)); | 873 | bzero (charsets, (MAX_CHARSET + 1) * sizeof (int)); |
| 861 | while (1) | 874 | while (1) |
| 862 | { | 875 | { |
| 863 | find_charset_in_str (BYTE_POS_ADDR (from_byte), stop_byte - from_byte, | 876 | find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from, |
| 864 | charsets, table, multibyte); | 877 | stop_byte - from_byte, charsets, table); |
| 865 | if (stop < to) | 878 | if (stop < to) |
| 866 | { | 879 | { |
| 867 | from = stop, from_byte = stop_byte; | 880 | from = stop, from_byte = stop_byte; |
| @@ -872,17 +885,13 @@ and `unknown' if any 8-bit characters are found.") | |||
| 872 | } | 885 | } |
| 873 | 886 | ||
| 874 | val = Qnil; | 887 | val = Qnil; |
| 875 | undefined = 0; | 888 | if (charsets[1]) |
| 876 | for (i = (multibyte ? MAX_CHARSET : 1); i >= 0; i--) | ||
| 877 | if (charsets[i]) | ||
| 878 | { | ||
| 879 | if (CHARSET_DEFINED_P (i)) | ||
| 880 | val = Fcons (CHARSET_SYMBOL (i), val); | ||
| 881 | else | ||
| 882 | undefined = 1; | ||
| 883 | } | ||
| 884 | if (undefined) | ||
| 885 | val = Fcons (Qunknown, val); | 889 | val = Fcons (Qunknown, val); |
| 890 | for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--) | ||
| 891 | if (charsets[i]) | ||
| 892 | val = Fcons (CHARSET_SYMBOL (i), val); | ||
| 893 | if (charsets[0]) | ||
| 894 | val = Fcons (Qascii, val); | ||
| 886 | return val; | 895 | return val; |
| 887 | } | 896 | } |
| 888 | 897 | ||
| @@ -894,38 +903,32 @@ Optional arg TABLE if non-nil is a translation table to look up.\n\ | |||
| 894 | If the region contains invalid multiybte characters,\n\ | 903 | If the region contains invalid multiybte characters,\n\ |
| 895 | `unknown' is included in the returned list.\n\ | 904 | `unknown' is included in the returned list.\n\ |
| 896 | \n\ | 905 | \n\ |
| 897 | If STR is unibyte, the returned list contains\n\ | 906 | If STR is unibyte, the returned list may contain\n\ |
| 898 | `ascii' if any 7-bit characters are found,\n\ | 907 | only `ascii', `eight-bit-control', and `eight-bit-graphic'.") |
| 899 | and `unknown' if any 8-bit characters are found.") | ||
| 900 | (str, table) | 908 | (str, table) |
| 901 | Lisp_Object str, table; | 909 | Lisp_Object str, table; |
| 902 | { | 910 | { |
| 903 | int charsets[MAX_CHARSET + 1]; | 911 | int charsets[MAX_CHARSET + 1]; |
| 904 | int i; | 912 | int i; |
| 905 | Lisp_Object val; | 913 | Lisp_Object val; |
| 906 | int undefined; | ||
| 907 | int multibyte; | ||
| 908 | 914 | ||
| 909 | CHECK_STRING (str, 0); | 915 | CHECK_STRING (str, 0); |
| 910 | multibyte = STRING_MULTIBYTE (str); | ||
| 911 | 916 | ||
| 912 | bzero (charsets, (MAX_CHARSET + 1) * sizeof (int)); | 917 | bzero (charsets, (MAX_CHARSET + 1) * sizeof (int)); |
| 913 | find_charset_in_str (XSTRING (str)->data, STRING_BYTES (XSTRING (str)), | 918 | find_charset_in_text (XSTRING (str)->data, XSTRING (str)->size, |
| 914 | charsets, table, multibyte); | 919 | STRING_BYTES (XSTRING (str)), charsets, table); |
| 920 | |||
| 915 | val = Qnil; | 921 | val = Qnil; |
| 916 | undefined = 0; | 922 | if (charsets[1]) |
| 917 | for (i = (multibyte ? MAX_CHARSET : 1); i >= 0; i--) | ||
| 918 | if (charsets[i]) | ||
| 919 | { | ||
| 920 | if (CHARSET_DEFINED_P (i)) | ||
| 921 | val = Fcons (CHARSET_SYMBOL (i), val); | ||
| 922 | else | ||
| 923 | undefined = 1; | ||
| 924 | } | ||
| 925 | if (undefined) | ||
| 926 | val = Fcons (Qunknown, val); | 923 | val = Fcons (Qunknown, val); |
| 924 | for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--) | ||
| 925 | if (charsets[i]) | ||
| 926 | val = Fcons (CHARSET_SYMBOL (i), val); | ||
| 927 | if (charsets[0]) | ||
| 928 | val = Fcons (Qascii, val); | ||
| 927 | return val; | 929 | return val; |
| 928 | } | 930 | } |
| 931 | |||
| 929 | 932 | ||
| 930 | DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0, | 933 | DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0, |
| 931 | "") | 934 | "") |
| @@ -954,8 +957,26 @@ DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0, | |||
| 954 | c2 = XINT (code2); | 957 | c2 = XINT (code2); |
| 955 | } | 958 | } |
| 956 | 959 | ||
| 957 | if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF) | 960 | if (charset_id == CHARSET_ASCII) |
| 958 | error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2); | 961 | { |
| 962 | if (c1 < 0 || c1 > 0x7F) | ||
| 963 | goto invalid_code_posints; | ||
| 964 | return make_number (c1); | ||
| 965 | } | ||
| 966 | else if (charset_id == CHARSET_8_BIT_CONTROL) | ||
| 967 | { | ||
| 968 | if (c1 < 0x80 || c1 > 0x9F) | ||
| 969 | goto invalid_code_posints; | ||
| 970 | return make_number (c1); | ||
| 971 | } | ||
| 972 | else if (charset_id == CHARSET_8_BIT_GRAPHIC) | ||
| 973 | { | ||
| 974 | if (c1 < 0xA0 || c1 > 0xFF) | ||
| 975 | goto invalid_code_posints; | ||
| 976 | return make_number (c1); | ||
| 977 | } | ||
| 978 | else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF) | ||
| 979 | goto invalid_code_posints; | ||
| 959 | c1 &= 0x7F; | 980 | c1 &= 0x7F; |
| 960 | c2 &= 0x7F; | 981 | c2 &= 0x7F; |
| 961 | if (c1 == 0 | 982 | if (c1 == 0 |
| @@ -963,9 +984,11 @@ DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0, | |||
| 963 | : (c2 == 0 | 984 | : (c2 == 0 |
| 964 | ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20) | 985 | ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20) |
| 965 | : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2))) | 986 | : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2))) |
| 966 | error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2); | 987 | goto invalid_code_posints; |
| 967 | |||
| 968 | return make_number (MAKE_CHAR (charset_id, c1, c2)); | 988 | return make_number (MAKE_CHAR (charset_id, c1, c2)); |
| 989 | |||
| 990 | invalid_code_posints: | ||
| 991 | error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2); | ||
| 969 | } | 992 | } |
| 970 | 993 | ||
| 971 | DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0, | 994 | DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0, |
| @@ -1006,34 +1029,13 @@ If POS is out of range, the value is nil.") | |||
| 1006 | (pos) | 1029 | (pos) |
| 1007 | Lisp_Object pos; | 1030 | Lisp_Object pos; |
| 1008 | { | 1031 | { |
| 1009 | register int pos_byte, bytes, charset, c1, c2; | 1032 | Lisp_Object ch; |
| 1010 | register unsigned char *p; | 1033 | int charset; |
| 1011 | |||
| 1012 | if (NILP (pos)) | ||
| 1013 | pos_byte = PT_BYTE; | ||
| 1014 | else if (MARKERP (pos)) | ||
| 1015 | { | ||
| 1016 | pos_byte = marker_byte_position (pos); | ||
| 1017 | if (pos_byte < BEGV_BYTE || pos_byte >= ZV_BYTE) | ||
| 1018 | return Qnil; | ||
| 1019 | } | ||
| 1020 | else | ||
| 1021 | { | ||
| 1022 | CHECK_NUMBER (pos, 0); | ||
| 1023 | if (XINT (pos) < BEGV || XINT (pos) >= ZV) | ||
| 1024 | return Qnil; | ||
| 1025 | pos_byte = CHAR_TO_BYTE (XINT (pos)); | ||
| 1026 | } | ||
| 1027 | p = BYTE_POS_ADDR (pos_byte); | ||
| 1028 | if (BASE_LEADING_CODE_P (*p)) | ||
| 1029 | { | ||
| 1030 | SPLIT_MULTIBYTE_SEQ (p, Z_BYTE - pos_byte, bytes, charset, c1, c2); | ||
| 1031 | if (charset < 0) | ||
| 1032 | charset = 1; | ||
| 1033 | } | ||
| 1034 | else | ||
| 1035 | charset = CHARSET_ASCII; | ||
| 1036 | 1034 | ||
| 1035 | ch = Fchar_after (pos); | ||
| 1036 | if (! INTEGERP (ch)) | ||
| 1037 | return ch; | ||
| 1038 | charset = CHAR_CHARSET (XINT (ch)); | ||
| 1037 | return CHARSET_SYMBOL (charset); | 1039 | return CHARSET_SYMBOL (charset); |
| 1038 | } | 1040 | } |
| 1039 | 1041 | ||
| @@ -1073,7 +1075,7 @@ char_valid_p (c, genericp) | |||
| 1073 | return 0; | 1075 | return 0; |
| 1074 | if (SINGLE_BYTE_CHAR_P (c)) | 1076 | if (SINGLE_BYTE_CHAR_P (c)) |
| 1075 | return 1; | 1077 | return 1; |
| 1076 | SPLIT_NON_ASCII_CHAR (c, charset, c1, c2); | 1078 | SPLIT_CHAR (c, charset, c1, c2); |
| 1077 | if (genericp) | 1079 | if (genericp) |
| 1078 | { | 1080 | { |
| 1079 | if (c1) | 1081 | if (c1) |
| @@ -1161,7 +1163,9 @@ char_bytes (c) | |||
| 1161 | { | 1163 | { |
| 1162 | int charset; | 1164 | int charset; |
| 1163 | 1165 | ||
| 1164 | if (SINGLE_BYTE_CHAR_P (c) || (c & ~((1 << CHARACTERBITS) - 1))) | 1166 | if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1))) |
| 1167 | return 1; | ||
| 1168 | if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0) | ||
| 1165 | return 1; | 1169 | return 1; |
| 1166 | 1170 | ||
| 1167 | charset = CHAR_CHARSET (c); | 1171 | charset = CHAR_CHARSET (c); |
| @@ -1334,23 +1338,146 @@ multibyte_chars_in_text (ptr, nbytes) | |||
| 1334 | 1338 | ||
| 1335 | while (ptr < endp) | 1339 | while (ptr < endp) |
| 1336 | { | 1340 | { |
| 1337 | if (BASE_LEADING_CODE_P (*ptr)) | 1341 | PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes); |
| 1342 | ptr += bytes; | ||
| 1343 | chars++; | ||
| 1344 | } | ||
| 1345 | |||
| 1346 | return chars; | ||
| 1347 | } | ||
| 1348 | |||
| 1349 | /* Parse unibyte text at STR of LEN bytes as a multibyte text, and | ||
| 1350 | count the numbers of characters and bytes in it. On counting | ||
| 1351 | bytes, pay attention to that 8-bit characters in the range | ||
| 1352 | 0x80..0x9F are represented by 2-byte in a multibyte text. */ | ||
| 1353 | void | ||
| 1354 | parse_str_as_multibyte (str, len, nchars, nbytes) | ||
| 1355 | unsigned char *str; | ||
| 1356 | int len, *nchars, *nbytes; | ||
| 1357 | { | ||
| 1358 | unsigned char *endp = str + len; | ||
| 1359 | int n, chars = 0, bytes = 0; | ||
| 1360 | |||
| 1361 | while (str < endp) | ||
| 1362 | { | ||
| 1363 | if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n)) | ||
| 1364 | str += n, bytes += n; | ||
| 1365 | else | ||
| 1366 | str++, bytes += 2; | ||
| 1367 | chars++; | ||
| 1368 | } | ||
| 1369 | *nchars = chars; | ||
| 1370 | *nbytes = bytes; | ||
| 1371 | return; | ||
| 1372 | } | ||
| 1373 | |||
| 1374 | /* Arrange unibyte text at STR of NBYTES bytes as a multibyte text. | ||
| 1375 | It actually converts only 8-bit characters in the range 0x80..0x9F | ||
| 1376 | that don't contruct multibyte characters to multibyte forms. If | ||
| 1377 | NCHARS is nonzero, set *NCHARS to the number of characters in the | ||
| 1378 | text. It is assured that we can use LEN bytes at STR as a work | ||
| 1379 | area and that is enough. Return the number of bytes of the | ||
| 1380 | resulting text. */ | ||
| 1381 | |||
| 1382 | int | ||
| 1383 | str_as_multibyte (str, len, nbytes, nchars) | ||
| 1384 | unsigned char *str; | ||
| 1385 | int len, nbytes, *nchars; | ||
| 1386 | { | ||
| 1387 | unsigned char *p = str, *endp = str + nbytes; | ||
| 1388 | unsigned char *to; | ||
| 1389 | int chars = 0; | ||
| 1390 | int n; | ||
| 1391 | |||
| 1392 | while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n)) | ||
| 1393 | p += n, chars++; | ||
| 1394 | if (nchars) | ||
| 1395 | *nchars = chars; | ||
| 1396 | if (p == endp) | ||
| 1397 | return nbytes; | ||
| 1398 | |||
| 1399 | to = p; | ||
| 1400 | nbytes = endp - p; | ||
| 1401 | endp = str + len; | ||
| 1402 | safe_bcopy (p, endp - nbytes, nbytes); | ||
| 1403 | p = endp - nbytes; | ||
| 1404 | while (p < endp) | ||
| 1405 | { | ||
| 1406 | if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n)) | ||
| 1338 | { | 1407 | { |
| 1339 | PARSE_MULTIBYTE_SEQ (ptr, nbytes, bytes); | 1408 | while (n--) |
| 1340 | ptr += bytes; | 1409 | *to++ = *p++; |
| 1341 | nbytes -= bytes; | 1410 | } |
| 1342 | } | ||
| 1343 | else | 1411 | else |
| 1344 | { | 1412 | { |
| 1345 | ptr++; | 1413 | *to++ = LEADING_CODE_8_BIT_CONTROL; |
| 1346 | nbytes--; | 1414 | *to++ = *p++ + 0x20; |
| 1347 | } | 1415 | } |
| 1348 | chars++; | 1416 | chars++; |
| 1349 | } | 1417 | } |
| 1418 | if (nchars) | ||
| 1419 | *nchars = chars; | ||
| 1420 | return (to - str); | ||
| 1421 | } | ||
| 1350 | 1422 | ||
| 1351 | return chars; | 1423 | /* Convert unibyte text at STR of NBYTES bytes to a multibyte text |
| 1424 | that contains the same single-byte characters. It actually | ||
| 1425 | converts all 8-bit characters to multibyte forms. It is assured | ||
| 1426 | that we can use LEN bytes at STR as a work area and that is | ||
| 1427 | enough. */ | ||
| 1428 | |||
| 1429 | int | ||
| 1430 | str_to_multibyte (str, len, bytes) | ||
| 1431 | unsigned char *str; | ||
| 1432 | int len, bytes; | ||
| 1433 | { | ||
| 1434 | unsigned char *p = str, *endp = str + bytes; | ||
| 1435 | unsigned char *to; | ||
| 1436 | int c; | ||
| 1437 | |||
| 1438 | while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++; | ||
| 1439 | if (p == endp) | ||
| 1440 | return bytes; | ||
| 1441 | to = p; | ||
| 1442 | bytes = endp - p; | ||
| 1443 | endp = str + len; | ||
| 1444 | safe_bcopy (p, endp - bytes, bytes); | ||
| 1445 | p = endp - bytes; | ||
| 1446 | while (p < endp) | ||
| 1447 | { | ||
| 1448 | if (*p < 0x80 || *p >= 0xA0) | ||
| 1449 | *to++ = *p++; | ||
| 1450 | else | ||
| 1451 | *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20; | ||
| 1452 | } | ||
| 1453 | return (to - str); | ||
| 1352 | } | 1454 | } |
| 1353 | 1455 | ||
| 1456 | /* Arrange multibyte text at STR of LEN bytes as a unibyte text. It | ||
| 1457 | actually converts only 8-bit characters in the range 0x80..0x9F to | ||
| 1458 | unibyte forms. */ | ||
| 1459 | |||
| 1460 | int | ||
| 1461 | str_as_unibyte (str, bytes) | ||
| 1462 | unsigned char *str; | ||
| 1463 | int bytes; | ||
| 1464 | { | ||
| 1465 | unsigned char *p = str, *endp = str + bytes; | ||
| 1466 | unsigned char *to = str; | ||
| 1467 | |||
| 1468 | while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++; | ||
| 1469 | to = p; | ||
| 1470 | while (p < endp) | ||
| 1471 | { | ||
| 1472 | if (*p == LEADING_CODE_8_BIT_CONTROL) | ||
| 1473 | *to++ = *(p + 1) - 0x20, p += 2; | ||
| 1474 | else | ||
| 1475 | *to++ = *p++; | ||
| 1476 | } | ||
| 1477 | return (to - str); | ||
| 1478 | } | ||
| 1479 | |||
| 1480 | |||
| 1354 | DEFUN ("string", Fstring, Sstring, 1, MANY, 0, | 1481 | DEFUN ("string", Fstring, Sstring, 1, MANY, 0, |
| 1355 | "Concatenate all the argument characters and make the result a string.") | 1482 | "Concatenate all the argument characters and make the result a string.") |
| 1356 | (n, args) | 1483 | (n, args) |
| @@ -1360,28 +1487,16 @@ DEFUN ("string", Fstring, Sstring, 1, MANY, 0, | |||
| 1360 | int i; | 1487 | int i; |
| 1361 | unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n); | 1488 | unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n); |
| 1362 | unsigned char *p = buf; | 1489 | unsigned char *p = buf; |
| 1363 | Lisp_Object val; | 1490 | int c; |
| 1364 | int c, multibyte_p = 0; | ||
| 1365 | 1491 | ||
| 1366 | for (i = 0; i < n; i++) | 1492 | for (i = 0; i < n; i++) |
| 1367 | { | 1493 | { |
| 1368 | CHECK_NUMBER (args[i], 0); | 1494 | CHECK_NUMBER (args[i], 0); |
| 1369 | c = XINT (args[i]); | 1495 | c = XINT (args[i]); |
| 1370 | p += CHAR_STRING (c, p); | 1496 | p += CHAR_STRING (c, p); |
| 1371 | |||
| 1372 | if (!SINGLE_BYTE_CHAR_P (c)) | ||
| 1373 | multibyte_p = 1; | ||
| 1374 | } | 1497 | } |
| 1375 | 1498 | ||
| 1376 | /* Here, we can't use make_string_from_bytes because of the byte | 1499 | return make_string_from_bytes (buf, n, p - buf); |
| 1377 | combining problem. Make a multibyte string if there is any | ||
| 1378 | multibyte character in ARGS to make sure that `(string 2276)' | ||
| 1379 | returns a multibyte string if running --unibyte. */ | ||
| 1380 | if (multibyte_p) | ||
| 1381 | val = make_multibyte_string (buf, n, p - buf); | ||
| 1382 | else | ||
| 1383 | val = make_unibyte_string (buf, p - buf); | ||
| 1384 | return val; | ||
| 1385 | } | 1500 | } |
| 1386 | 1501 | ||
| 1387 | #endif /* emacs */ | 1502 | #endif /* emacs */ |
| @@ -1448,31 +1563,27 @@ init_charset_once () | |||
| 1448 | iso_charset_table [i][j][k] = -1; | 1563 | iso_charset_table [i][j][k] = -1; |
| 1449 | 1564 | ||
| 1450 | for (i = 0; i < 256; i++) | 1565 | for (i = 0; i < 256; i++) |
| 1451 | BYTES_BY_CHAR_HEAD (i) = 1; | 1566 | bytes_by_char_head[i] = 1; |
| 1452 | for (i = MIN_CHARSET_OFFICIAL_DIMENSION1; | 1567 | for (i = MIN_CHARSET_OFFICIAL_DIMENSION1; |
| 1453 | i <= MAX_CHARSET_OFFICIAL_DIMENSION1; i++) | 1568 | i <= MAX_CHARSET_OFFICIAL_DIMENSION1; i++) |
| 1454 | BYTES_BY_CHAR_HEAD (i) = 2; | 1569 | bytes_by_char_head[i] = 2; |
| 1455 | for (i = MIN_CHARSET_OFFICIAL_DIMENSION2; | 1570 | for (i = MIN_CHARSET_OFFICIAL_DIMENSION2; |
| 1456 | i <= MAX_CHARSET_OFFICIAL_DIMENSION2; i++) | 1571 | i <= MAX_CHARSET_OFFICIAL_DIMENSION2; i++) |
| 1457 | BYTES_BY_CHAR_HEAD (i) = 3; | 1572 | bytes_by_char_head[i] = 3; |
| 1458 | BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 3; | 1573 | bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3; |
| 1459 | BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 3; | 1574 | bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3; |
| 1460 | BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 4; | 1575 | bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4; |
| 1461 | BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 4; | 1576 | bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4; |
| 1462 | /* The followings don't reflect the actual bytes, but just to tell | 1577 | bytes_by_char_head[LEADING_CODE_8_BIT_CONTROL] = 2; |
| 1463 | that it is a start of a multibyte character. */ | ||
| 1464 | BYTES_BY_CHAR_HEAD (0x80) = 2; | ||
| 1465 | BYTES_BY_CHAR_HEAD (0x9E) = 2; | ||
| 1466 | BYTES_BY_CHAR_HEAD (0x9F) = 2; | ||
| 1467 | 1578 | ||
| 1468 | for (i = 0; i < 128; i++) | 1579 | for (i = 0; i < 128; i++) |
| 1469 | WIDTH_BY_CHAR_HEAD (i) = 1; | 1580 | width_by_char_head[i] = 1; |
| 1470 | for (; i < 256; i++) | 1581 | for (; i < 256; i++) |
| 1471 | WIDTH_BY_CHAR_HEAD (i) = 4; | 1582 | width_by_char_head[i] = 4; |
| 1472 | WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 1; | 1583 | width_by_char_head[LEADING_CODE_PRIVATE_11] = 1; |
| 1473 | WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 2; | 1584 | width_by_char_head[LEADING_CODE_PRIVATE_12] = 2; |
| 1474 | WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 1; | 1585 | width_by_char_head[LEADING_CODE_PRIVATE_21] = 1; |
| 1475 | WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 2; | 1586 | width_by_char_head[LEADING_CODE_PRIVATE_22] = 2; |
| 1476 | 1587 | ||
| 1477 | { | 1588 | { |
| 1478 | Lisp_Object val; | 1589 | Lisp_Object val; |
| @@ -1498,13 +1609,20 @@ init_charset_once () | |||
| 1498 | void | 1609 | void |
| 1499 | syms_of_charset () | 1610 | syms_of_charset () |
| 1500 | { | 1611 | { |
| 1612 | Qcharset = intern ("charset"); | ||
| 1613 | staticpro (&Qcharset); | ||
| 1614 | |||
| 1501 | Qascii = intern ("ascii"); | 1615 | Qascii = intern ("ascii"); |
| 1502 | staticpro (&Qascii); | 1616 | staticpro (&Qascii); |
| 1503 | 1617 | ||
| 1504 | Qcharset = intern ("charset"); | 1618 | Qeight_bit_control = intern ("eight-bit-control"); |
| 1505 | staticpro (&Qcharset); | 1619 | staticpro (&Qeight_bit_control); |
| 1620 | |||
| 1621 | Qeight_bit_graphic = intern ("eight-bit-graphic"); | ||
| 1622 | staticpro (&Qeight_bit_graphic); | ||
| 1506 | 1623 | ||
| 1507 | /* Define ASCII charset now. */ | 1624 | /* Define special charsets ascii, eight-bit-control, and |
| 1625 | eight-bit-graphic. */ | ||
| 1508 | update_charset_table (make_number (CHARSET_ASCII), | 1626 | update_charset_table (make_number (CHARSET_ASCII), |
| 1509 | make_number (1), make_number (94), | 1627 | make_number (1), make_number (94), |
| 1510 | make_number (1), | 1628 | make_number (1), |
| @@ -1517,6 +1635,32 @@ syms_of_charset () | |||
| 1517 | CHARSET_SYMBOL (CHARSET_ASCII) = Qascii; | 1635 | CHARSET_SYMBOL (CHARSET_ASCII) = Qascii; |
| 1518 | Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII)); | 1636 | Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII)); |
| 1519 | 1637 | ||
| 1638 | update_charset_table (make_number (CHARSET_8_BIT_CONTROL), | ||
| 1639 | make_number (1), make_number (96), | ||
| 1640 | make_number (4), | ||
| 1641 | make_number (0), | ||
| 1642 | make_number (-1), | ||
| 1643 | make_number (-1), | ||
| 1644 | build_string ("8-bit control code (0x80..0x9F)"), | ||
| 1645 | build_string ("8-bit control code (0x80..0x9F)"), | ||
| 1646 | build_string ("8-bit control code (0x80..0x9F)")); | ||
| 1647 | CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control; | ||
| 1648 | Fput (Qeight_bit_control, Qcharset, | ||
| 1649 | CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL)); | ||
| 1650 | |||
| 1651 | update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC), | ||
| 1652 | make_number (1), make_number (96), | ||
| 1653 | make_number (4), | ||
| 1654 | make_number (0), | ||
| 1655 | make_number (-1), | ||
| 1656 | make_number (-1), | ||
| 1657 | build_string ("8-bit graphic char"), | ||
| 1658 | build_string ("8-bit graphic char (0xA0..0xFF)"), | ||
| 1659 | build_string ("8-bit graphic char (0xA0..0xFF)")); | ||
| 1660 | CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic; | ||
| 1661 | Fput (Qeight_bit_graphic, Qcharset, | ||
| 1662 | CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC)); | ||
| 1663 | |||
| 1520 | Qauto_fill_chars = intern ("auto-fill-chars"); | 1664 | Qauto_fill_chars = intern ("auto-fill-chars"); |
| 1521 | staticpro (&Qauto_fill_chars); | 1665 | staticpro (&Qauto_fill_chars); |
| 1522 | Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0)); | 1666 | Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0)); |
| @@ -1545,7 +1689,8 @@ syms_of_charset () | |||
| 1545 | 1689 | ||
| 1546 | DEFVAR_LISP ("charset-list", &Vcharset_list, | 1690 | DEFVAR_LISP ("charset-list", &Vcharset_list, |
| 1547 | "List of charsets ever defined."); | 1691 | "List of charsets ever defined."); |
| 1548 | Vcharset_list = Fcons (Qascii, Qnil); | 1692 | Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control, |
| 1693 | Fcons (Qeight_bit_graphic, Qnil))); | ||
| 1549 | 1694 | ||
| 1550 | DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector, | 1695 | DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector, |
| 1551 | "Vector of cons cell of a symbol and translation table ever defined.\n\ | 1696 | "Vector of cons cell of a symbol and translation table ever defined.\n\ |