diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/charset.c | 2788 | ||||
| -rw-r--r-- | src/charset.h | 1179 |
2 files changed, 1700 insertions, 2267 deletions
diff --git a/src/charset.c b/src/charset.c index ff177a6cb29..72a30b410d4 100644 --- a/src/charset.c +++ b/src/charset.c | |||
| @@ -1,7 +1,10 @@ | |||
| 1 | /* Basic multilingual character support. | 1 | /* Basic character set support. |
| 2 | Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN. | 2 | Copyright (C) 1995, 97, 98, 2000, 2001 Electrotechnical Laboratory, JAPAN. |
| 3 | Licensed to the Free Software Foundation. | 3 | Licensed to the Free Software Foundation. |
| 4 | Copyright (C) 2001 Free Software Foundation, Inc. | 4 | Copyright (C) 2001 Free Software Foundation, Inc. |
| 5 | Copyright (C) 2001, 2002 | ||
| 6 | National Institute of Advanced Industrial Science and Technology (AIST) | ||
| 7 | Registration Number H13PRO009 | ||
| 5 | 8 | ||
| 6 | This file is part of GNU Emacs. | 9 | This file is part of GNU Emacs. |
| 7 | 10 | ||
| @@ -20,24 +23,23 @@ along with GNU Emacs; see the file COPYING. If not, write to | |||
| 20 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 23 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
| 21 | Boston, MA 02111-1307, USA. */ | 24 | Boston, MA 02111-1307, USA. */ |
| 22 | 25 | ||
| 23 | /* At first, see the document in `charset.h' to understand the code in | ||
| 24 | this file. */ | ||
| 25 | |||
| 26 | #ifdef emacs | 26 | #ifdef emacs |
| 27 | #include <config.h> | 27 | #include <config.h> |
| 28 | #endif | 28 | #endif |
| 29 | 29 | ||
| 30 | #include <stdio.h> | 30 | #include <stdio.h> |
| 31 | #include <unistd.h> | ||
| 32 | #include <ctype.h> | ||
| 31 | 33 | ||
| 32 | #ifdef emacs | 34 | #ifdef emacs |
| 33 | 35 | ||
| 34 | #include <sys/types.h> | 36 | #include <sys/types.h> |
| 35 | #include "lisp.h" | 37 | #include "lisp.h" |
| 36 | #include "buffer.h" | 38 | #include "character.h" |
| 37 | #include "charset.h" | 39 | #include "charset.h" |
| 38 | #include "composite.h" | ||
| 39 | #include "coding.h" | 40 | #include "coding.h" |
| 40 | #include "disptab.h" | 41 | #include "disptab.h" |
| 42 | #include "buffer.h" | ||
| 41 | 43 | ||
| 42 | #else /* not emacs */ | 44 | #else /* not emacs */ |
| 43 | 45 | ||
| @@ -45,694 +47,873 @@ Boston, MA 02111-1307, USA. */ | |||
| 45 | 47 | ||
| 46 | #endif /* emacs */ | 48 | #endif /* emacs */ |
| 47 | 49 | ||
| 48 | Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic; | ||
| 49 | Lisp_Object Qunknown; | ||
| 50 | 50 | ||
| 51 | /* Declaration of special leading-codes. */ | 51 | /*** GENERAL NOTE on CODED CHARACTER SET (CHARSET) *** |
| 52 | int leading_code_private_11; /* for private DIMENSION1 of 1-column */ | ||
| 53 | int leading_code_private_12; /* for private DIMENSION1 of 2-column */ | ||
| 54 | int leading_code_private_21; /* for private DIMENSION2 of 1-column */ | ||
| 55 | int leading_code_private_22; /* for private DIMENSION2 of 2-column */ | ||
| 56 | 52 | ||
| 57 | /* Declaration of special charsets. The values are set by | 53 | A coded character set ("charset" hereafter) is a meaningful |
| 58 | Fsetup_special_charsets. */ | 54 | collection (i.e. language, culture, functionality, etc) of |
| 59 | int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */ | 55 | characters. Emacs handles multiple charsets at once. In Emacs Lisp |
| 60 | int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */ | 56 | code, a charset is represented by symbol. In C code, a charset is |
| 61 | int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */ | 57 | represented by its ID number or by a pointer the struct charset. |
| 62 | int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */ | ||
| 63 | int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */ | ||
| 64 | int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */ | ||
| 65 | int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */ | ||
| 66 | 58 | ||
| 67 | Lisp_Object Qcharset_table; | 59 | The actual information about each charset is stored in two places. |
| 60 | Lispy information is stored in the hash table Vcharset_hash_table as | ||
| 61 | a vector (charset attributes). The other information is stored in | ||
| 62 | charset_table as struct charset. | ||
| 68 | 63 | ||
| 69 | /* A char-table containing information of each character set. */ | 64 | */ |
| 70 | Lisp_Object Vcharset_table; | ||
| 71 | 65 | ||
| 72 | /* A vector of charset symbol indexed by charset-id. This is used | 66 | /* List of all charsets. This variable is used only from Emacs |
| 73 | only for returning charset symbol from C functions. */ | 67 | Lisp. */ |
| 74 | Lisp_Object Vcharset_symbol_table; | ||
| 75 | |||
| 76 | /* A list of charset symbols ever defined. */ | ||
| 77 | Lisp_Object Vcharset_list; | 68 | Lisp_Object Vcharset_list; |
| 78 | 69 | ||
| 79 | /* Vector of translation table ever defined. | 70 | /* Hash table that contains attributes of each charset. Keys are |
| 80 | ID of a translation table is used to index this vector. */ | 71 | charset symbols, and values are vectors of charset attributes. */ |
| 81 | Lisp_Object Vtranslation_table_vector; | 72 | Lisp_Object Vcharset_hash_table; |
| 73 | |||
| 74 | /* Table of struct charset. */ | ||
| 75 | struct charset *charset_table; | ||
| 76 | |||
| 77 | static int charset_table_size; | ||
| 78 | int charset_table_used; | ||
| 79 | |||
| 80 | Lisp_Object Qcharsetp; | ||
| 81 | |||
| 82 | /* Special charset symbols. */ | ||
| 83 | Lisp_Object Qascii; | ||
| 84 | Lisp_Object Qeight_bit_control; | ||
| 85 | Lisp_Object Qeight_bit_graphic; | ||
| 86 | Lisp_Object Qiso_8859_1; | ||
| 87 | Lisp_Object Qunicode; | ||
| 88 | |||
| 89 | /* The corresponding charsets. */ | ||
| 90 | int charset_ascii; | ||
| 91 | int charset_8_bit_control; | ||
| 92 | int charset_8_bit_graphic; | ||
| 93 | int charset_iso_8859_1; | ||
| 94 | int charset_unicode; | ||
| 82 | 95 | ||
| 83 | /* A char-table for characters which may invoke auto-filling. */ | 96 | /* Value of charset attribute `charset-iso-plane'. */ |
| 84 | Lisp_Object Vauto_fill_chars; | 97 | Lisp_Object Qgl, Qgr; |
| 85 | 98 | ||
| 86 | Lisp_Object Qauto_fill_chars; | 99 | /* The primary charset. It is a charset of unibyte characters. */ |
| 100 | int charset_primary; | ||
| 87 | 101 | ||
| 88 | /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */ | 102 | /* List of charsets ordered by the priority. */ |
| 89 | int bytes_by_char_head[256]; | 103 | Lisp_Object Vcharset_ordered_list; |
| 90 | int width_by_char_head[256]; | 104 | |
| 105 | /* List of iso-2022 charsets. */ | ||
| 106 | Lisp_Object Viso_2022_charset_list; | ||
| 107 | |||
| 108 | /* List of emacs-mule charsets. */ | ||
| 109 | Lisp_Object Vemacs_mule_charset_list; | ||
| 110 | |||
| 111 | struct charset *emacs_mule_charset[256]; | ||
| 91 | 112 | ||
| 92 | /* Mapping table from ISO2022's charset (specified by DIMENSION, | 113 | /* Mapping table from ISO2022's charset (specified by DIMENSION, |
| 93 | CHARS, and FINAL-CHAR) to Emacs' charset. */ | 114 | CHARS, and FINAL-CHAR) to Emacs' charset. */ |
| 94 | int iso_charset_table[2][2][128]; | 115 | int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL]; |
| 116 | |||
| 117 | Lisp_Object Vcharset_map_directory; | ||
| 118 | |||
| 119 | Lisp_Object Vchar_unified_charset_table; | ||
| 120 | |||
| 121 | #define CODE_POINT_TO_INDEX(charset, code) \ | ||
| 122 | ((charset)->code_linear_p \ | ||
| 123 | ? (code) - (charset)->min_code \ | ||
| 124 | : ((((code) >> 24) <= (charset)->code_space[13]) \ | ||
| 125 | && ((((code) >> 16) & 0xFF) <= (charset)->code_space[9]) \ | ||
| 126 | && ((((code) >> 8) & 0xFF) <= (charset)->code_space[5]) \ | ||
| 127 | && (((code) & 0xFF) <= (charset)->code_space[1])) \ | ||
| 128 | ? (((((code) >> 24) - (charset)->code_space[12]) \ | ||
| 129 | * (charset)->code_space[11]) \ | ||
| 130 | + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \ | ||
| 131 | * (charset)->code_space[7]) \ | ||
| 132 | + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \ | ||
| 133 | * (charset)->code_space[3]) \ | ||
| 134 | + (((code) & 0xFF) - (charset)->code_space[0])) \ | ||
| 135 | : -1) | ||
| 136 | |||
| 137 | |||
| 138 | /* Convert the character index IDX to code-point CODE for CHARSET. | ||
| 139 | It is assumed that IDX is in a valid range. */ | ||
| 140 | |||
| 141 | #define INDEX_TO_CODE_POINT(charset, idx) \ | ||
| 142 | ((charset)->code_linear_p \ | ||
| 143 | ? (idx) + (charset)->min_code \ | ||
| 144 | : (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \ | ||
| 145 | | (((charset)->code_space[4] \ | ||
| 146 | + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \ | ||
| 147 | << 8) \ | ||
| 148 | | (((charset)->code_space[8] \ | ||
| 149 | + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \ | ||
| 150 | << 16) \ | ||
| 151 | | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \ | ||
| 152 | << 24))) | ||
| 95 | 153 | ||
| 96 | /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */ | 154 | |
| 97 | unsigned char *_fetch_multibyte_char_p; | ||
| 98 | int _fetch_multibyte_char_len; | ||
| 99 | 155 | ||
| 100 | /* Offset to add to a non-ASCII value when inserting it. */ | 156 | /* Set to 1 when a charset map is loaded to warn that a buffer text |
| 101 | int nonascii_insert_offset; | 157 | and a string data may be relocated. */ |
| 158 | int charset_map_loaded; | ||
| 102 | 159 | ||
| 103 | /* Translation table for converting non-ASCII unibyte characters | 160 | /* Parse the mapping vector MAP which has this form: |
| 104 | to multibyte codes, or nil. */ | 161 | [CODE0 CHAR0 CODE1 CHAR1 ... ] |
| 105 | Lisp_Object Vnonascii_translation_table; | ||
| 106 | 162 | ||
| 107 | /* List of all possible generic characters. */ | 163 | If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char. |
| 108 | Lisp_Object Vgeneric_character_list; | ||
| 109 | 164 | ||
| 110 | 165 | If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char, | |
| 111 | void | 166 | CHARSET->decoder, and CHARSET->encoder. |
| 112 | invalid_character (c) | ||
| 113 | int c; | ||
| 114 | { | ||
| 115 | error ("Invalid character: 0%o, %d, 0x%x", c, c, c); | ||
| 116 | } | ||
| 117 | 167 | ||
| 118 | /* Parse string STR of length LENGTH and fetch information of a | 168 | If CONTROL_FLAG is 2, setup CHARSET->deunifier and |
| 119 | character at STR. Set BYTES to the byte length the character | 169 | Vchar_unify_table. If Vchar_unified_charset_table is non-nil, |
| 120 | occupies, CHARSET, C1, C2 to proper values of the character. */ | 170 | setup it too. */ |
| 121 | |||
| 122 | #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2) \ | ||
| 123 | do { \ | ||
| 124 | (c1) = *(str); \ | ||
| 125 | (bytes) = BYTES_BY_CHAR_HEAD (c1); \ | ||
| 126 | if ((bytes) == 1) \ | ||
| 127 | (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \ | ||
| 128 | else if ((bytes) == 2) \ | ||
| 129 | { \ | ||
| 130 | if ((c1) == LEADING_CODE_8_BIT_CONTROL) \ | ||
| 131 | (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20; \ | ||
| 132 | else \ | ||
| 133 | (charset) = (c1), (c1) = (str)[1] & 0x7F; \ | ||
| 134 | } \ | ||
| 135 | else if ((bytes) == 3) \ | ||
| 136 | { \ | ||
| 137 | if ((c1) < LEADING_CODE_PRIVATE_11) \ | ||
| 138 | (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F; \ | ||
| 139 | else \ | ||
| 140 | (charset) = (str)[1], (c1) = (str)[2] & 0x7F; \ | ||
| 141 | } \ | ||
| 142 | else \ | ||
| 143 | (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F; \ | ||
| 144 | } while (0) | ||
| 145 | |||
| 146 | /* 1 if CHARSET, C1, and C2 compose a valid character, else 0. */ | ||
| 147 | #define CHAR_COMPONENTS_VALID_P(charset, c1, c2) \ | ||
| 148 | ((charset) == CHARSET_ASCII \ | ||
| 149 | ? ((c1) >= 0 && (c1) <= 0x7F) \ | ||
| 150 | : ((charset) == CHARSET_8_BIT_CONTROL \ | ||
| 151 | ? ((c1) >= 0x80 && (c1) <= 0x9F) \ | ||
| 152 | : ((charset) == CHARSET_8_BIT_GRAPHIC \ | ||
| 153 | ? ((c1) >= 0x80 && (c1) <= 0xFF) \ | ||
| 154 | : (CHARSET_DIMENSION (charset) == 1 \ | ||
| 155 | ? ((c1) >= 0x20 && (c1) <= 0x7F) \ | ||
| 156 | : ((c1) >= 0x20 && (c1) <= 0x7F \ | ||
| 157 | && (c2) >= 0x20 && (c2) <= 0x7F))))) | ||
| 158 | |||
| 159 | /* Store multi-byte form of the character C in STR. The caller should | ||
| 160 | allocate at least 4-byte area at STR in advance. Returns the | ||
| 161 | length of the multi-byte form. If C is an invalid character code, | ||
| 162 | return -1. */ | ||
| 163 | 171 | ||
| 164 | int | 172 | static void |
| 165 | char_to_string_1 (c, str) | 173 | parse_charset_map (charset, map, control_flag) |
| 166 | int c; | 174 | struct charset *charset; |
| 167 | unsigned char *str; | 175 | Lisp_Object map; |
| 176 | int control_flag; | ||
| 168 | { | 177 | { |
| 169 | unsigned char *p = str; | 178 | Lisp_Object vec, table; |
| 179 | unsigned min_code = CHARSET_MIN_CODE (charset); | ||
| 180 | unsigned max_code = CHARSET_MAX_CODE (charset); | ||
| 181 | int ascii_compatible_p = charset->ascii_compatible_p; | ||
| 182 | int min_char, max_char, nonascii_min_char; | ||
| 183 | int size; | ||
| 184 | int i; | ||
| 185 | int first; | ||
| 186 | unsigned char *fast_map = charset->fast_map; | ||
| 170 | 187 | ||
| 171 | if (c & CHAR_MODIFIER_MASK) /* This includes the case C is negative. */ | 188 | if (control_flag) |
| 172 | { | 189 | { |
| 173 | /* Multibyte character can't have a modifier bit. */ | 190 | int n = CODE_POINT_TO_INDEX (charset, max_code) + 1; |
| 174 | if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK))) | 191 | unsigned invalid_code = CHARSET_INVALID_CODE (charset); |
| 175 | return -1; | ||
| 176 | 192 | ||
| 177 | /* For Meta, Shift, and Control modifiers, we need special care. */ | 193 | table = Fmake_char_table (Qnil, make_number (invalid_code)); |
| 178 | if (c & CHAR_META) | 194 | if (control_flag == 1) |
| 179 | { | 195 | vec = Fmake_vector (make_number (n), make_number (-1)); |
| 180 | /* Move the meta bit to the right place for a string. */ | 196 | else if (! CHAR_TABLE_P (Vchar_unify_table)) |
| 181 | c = (c & ~CHAR_META) | 0x80; | 197 | Vchar_unify_table = Fmake_char_table (Qnil, make_number (-1)); |
| 182 | } | ||
| 183 | if (c & CHAR_SHIFT) | ||
| 184 | { | ||
| 185 | /* Shift modifier is valid only with [A-Za-z]. */ | ||
| 186 | if ((c & 0377) >= 'A' && (c & 0377) <= 'Z') | ||
| 187 | c &= ~CHAR_SHIFT; | ||
| 188 | else if ((c & 0377) >= 'a' && (c & 0377) <= 'z') | ||
| 189 | c = (c & ~CHAR_SHIFT) - ('a' - 'A'); | ||
| 190 | } | ||
| 191 | if (c & CHAR_CTL) | ||
| 192 | { | ||
| 193 | /* Simulate the code in lread.c. */ | ||
| 194 | /* Allow `\C- ' and `\C-?'. */ | ||
| 195 | if (c == (CHAR_CTL | ' ')) | ||
| 196 | c = 0; | ||
| 197 | else if (c == (CHAR_CTL | '?')) | ||
| 198 | c = 127; | ||
| 199 | /* ASCII control chars are made from letters (both cases), | ||
| 200 | as well as the non-letters within 0100...0137. */ | ||
| 201 | else if ((c & 0137) >= 0101 && (c & 0137) <= 0132) | ||
| 202 | c &= (037 | (~0177 & ~CHAR_CTL)); | ||
| 203 | else if ((c & 0177) >= 0100 && (c & 0177) <= 0137) | ||
| 204 | c &= (037 | (~0177 & ~CHAR_CTL)); | ||
| 205 | } | ||
| 206 | 198 | ||
| 207 | /* If C still has any modifier bits, just ignore it. */ | 199 | charset_map_loaded = 1; |
| 208 | c &= ~CHAR_MODIFIER_MASK; | ||
| 209 | } | 200 | } |
| 210 | 201 | ||
| 211 | if (SINGLE_BYTE_CHAR_P (c)) | 202 | size = ASIZE (map); |
| 203 | nonascii_min_char = MAX_CHAR; | ||
| 204 | CHARSET_COMPACT_CODES_P (charset) = 1; | ||
| 205 | for (first = 1, i = 0; i < size; i += 2) | ||
| 212 | { | 206 | { |
| 213 | if (ASCII_BYTE_P (c) || c >= 0xA0) | 207 | Lisp_Object val; |
| 214 | *p++ = c; | 208 | unsigned code, temp; |
| 215 | else | 209 | int c, char_index; |
| 210 | |||
| 211 | val = AREF (map, i); | ||
| 212 | CHECK_NATNUM (val); | ||
| 213 | code = XFASTINT (val); | ||
| 214 | val = AREF (map, i + 1); | ||
| 215 | CHECK_NATNUM (val); | ||
| 216 | c = XFASTINT (val); | ||
| 217 | |||
| 218 | if (code < min_code || code > max_code) | ||
| 219 | continue; | ||
| 220 | char_index = CODE_POINT_TO_INDEX (charset, code); | ||
| 221 | if (char_index < 0 | ||
| 222 | || c > MAX_CHAR) | ||
| 223 | continue; | ||
| 224 | |||
| 225 | if (control_flag < 2) | ||
| 216 | { | 226 | { |
| 217 | *p++ = LEADING_CODE_8_BIT_CONTROL; | 227 | if (first) |
| 218 | *p++ = c + 0x20; | 228 | { |
| 229 | min_char = max_char = c; | ||
| 230 | first = 0; | ||
| 231 | } | ||
| 232 | else if (c > max_char) | ||
| 233 | max_char = c; | ||
| 234 | else if (c < min_char) | ||
| 235 | min_char = c; | ||
| 236 | if (ascii_compatible_p && ! ASCII_BYTE_P (c) | ||
| 237 | && c < nonascii_min_char) | ||
| 238 | nonascii_min_char = c; | ||
| 239 | |||
| 240 | CHARSET_FAST_MAP_SET (c, fast_map); | ||
| 241 | } | ||
| 242 | |||
| 243 | if (control_flag) | ||
| 244 | { | ||
| 245 | if (control_flag == 1) | ||
| 246 | { | ||
| 247 | if (char_index >= ASIZE (vec)) | ||
| 248 | abort (); | ||
| 249 | ASET (vec, char_index, make_number (c)); | ||
| 250 | if (code > 0x7FFFFFF) | ||
| 251 | { | ||
| 252 | CHAR_TABLE_SET (table, c, | ||
| 253 | Fcons (make_number (code >> 16), | ||
| 254 | make_number (code & 0xFFFF))); | ||
| 255 | CHARSET_COMPACT_CODES_P (charset) = 0; | ||
| 256 | } | ||
| 257 | else | ||
| 258 | CHAR_TABLE_SET (table, c, make_number (code)); | ||
| 259 | } | ||
| 260 | else | ||
| 261 | { | ||
| 262 | int c1 = DECODE_CHAR (charset, code); | ||
| 263 | if (c1 >= 0) | ||
| 264 | { | ||
| 265 | CHAR_TABLE_SET (table, c, make_number (c1)); | ||
| 266 | CHAR_TABLE_SET (Vchar_unify_table, c1, c); | ||
| 267 | if (CHAR_TABLE_P (Vchar_unified_charset_table)) | ||
| 268 | CHAR_TABLE_SET (Vchar_unified_charset_table, c1, | ||
| 269 | CHARSET_NAME (charset)); | ||
| 270 | } | ||
| 271 | } | ||
| 219 | } | 272 | } |
| 220 | } | 273 | } |
| 221 | else if (CHAR_VALID_P (c, 0)) | 274 | |
| 275 | if (control_flag < 2) | ||
| 222 | { | 276 | { |
| 223 | int charset, c1, c2; | 277 | CHARSET_MIN_CHAR (charset) = (ascii_compatible_p |
| 224 | 278 | ? nonascii_min_char : min_char); | |
| 225 | SPLIT_CHAR (c, charset, c1, c2); | 279 | CHARSET_MAX_CHAR (charset) = max_char; |
| 226 | 280 | if (control_flag) | |
| 227 | if (charset >= LEADING_CODE_EXT_11) | ||
| 228 | *p++ = (charset < LEADING_CODE_EXT_12 | ||
| 229 | ? LEADING_CODE_PRIVATE_11 | ||
| 230 | : (charset < LEADING_CODE_EXT_21 | ||
| 231 | ? LEADING_CODE_PRIVATE_12 | ||
| 232 | : (charset < LEADING_CODE_EXT_22 | ||
| 233 | ? LEADING_CODE_PRIVATE_21 | ||
| 234 | : LEADING_CODE_PRIVATE_22))); | ||
| 235 | *p++ = charset; | ||
| 236 | if ((c1 > 0 && c1 < 32) || (c2 > 0 && c2 < 32)) | ||
| 237 | return -1; | ||
| 238 | if (c1) | ||
| 239 | { | 281 | { |
| 240 | *p++ = c1 | 0x80; | 282 | CHARSET_DECODER (charset) = vec; |
| 241 | if (c2 > 0) | 283 | CHARSET_ENCODER (charset) = table; |
| 242 | *p++ = c2 | 0x80; | ||
| 243 | } | 284 | } |
| 244 | } | 285 | } |
| 245 | else | 286 | else |
| 246 | return -1; | 287 | CHARSET_DEUNIFIER (charset) = table; |
| 247 | |||
| 248 | return (p - str); | ||
| 249 | } | 288 | } |
| 250 | 289 | ||
| 251 | 290 | ||
| 252 | /* Store multi-byte form of the character C in STR. The caller should | 291 | /* Read a hexadecimal number (preceded by "0x") from the file FP while |
| 253 | allocate at least 4-byte area at STR in advance. Returns the | 292 | paying attention to comment charcter '#'. */ |
| 254 | length of the multi-byte form. If C is an invalid character code, | ||
| 255 | signal an error. | ||
| 256 | |||
| 257 | Use macro `CHAR_STRING (C, STR)' instead of calling this function | ||
| 258 | directly if C can be an ASCII character. */ | ||
| 259 | 293 | ||
| 260 | int | 294 | static INLINE unsigned |
| 261 | char_to_string (c, str) | 295 | read_hex (fp, eof) |
| 262 | int c; | 296 | FILE *fp; |
| 263 | unsigned char *str; | 297 | int *eof; |
| 264 | { | 298 | { |
| 265 | int len; | 299 | int c; |
| 266 | len = char_to_string_1 (c, str); | 300 | unsigned n; |
| 267 | if (len == -1) | ||
| 268 | invalid_character (c); | ||
| 269 | return len; | ||
| 270 | } | ||
| 271 | 301 | ||
| 302 | while ((c = getc (fp)) != EOF) | ||
| 303 | { | ||
| 304 | if (c == '#' || c == ' ') | ||
| 305 | { | ||
| 306 | while ((c = getc (fp)) != EOF && c != '\n'); | ||
| 307 | } | ||
| 308 | else if (c == '0') | ||
| 309 | { | ||
| 310 | if ((c = getc (fp)) == EOF || c == 'x') | ||
| 311 | break; | ||
| 312 | } | ||
| 313 | } | ||
| 314 | if (c == EOF) | ||
| 315 | { | ||
| 316 | *eof = 1; | ||
| 317 | return 0; | ||
| 318 | } | ||
| 319 | *eof = 0; | ||
| 320 | n = 0; | ||
| 321 | if (c == 'x') | ||
| 322 | while ((c = getc (fp)) != EOF && isxdigit (c)) | ||
| 323 | n = ((n << 4) | ||
| 324 | | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10)); | ||
| 325 | else | ||
| 326 | while ((c = getc (fp)) != EOF && isdigit (c)) | ||
| 327 | n = (n * 10) + c - '0'; | ||
| 328 | return n; | ||
| 329 | } | ||
| 272 | 330 | ||
| 273 | /* Return the non-ASCII character corresponding to multi-byte form at | ||
| 274 | STR of length LEN. If ACTUAL_LEN is not NULL, store the byte | ||
| 275 | length of the multibyte form in *ACTUAL_LEN. | ||
| 276 | 331 | ||
| 277 | Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling | 332 | /* Return a mapping vector for CHARSET loaded from MAPFILE. |
| 278 | this function directly if you want ot handle ASCII characters as | 333 | Each line of MAPFILE has this form: |
| 279 | well. */ | 334 | 0xAAAA 0xBBBB |
| 335 | where 0xAAAA is a code-point and 0xBBBB is the corresponding | ||
| 336 | character code. | ||
| 337 | The returned vector has this form: | ||
| 338 | [ CODE1 CHAR1 CODE2 CHAR2 .... ] | ||
| 339 | */ | ||
| 280 | 340 | ||
| 281 | int | 341 | static Lisp_Object |
| 282 | string_to_char (str, len, actual_len) | 342 | load_charset_map (charset, mapfile) |
| 283 | const unsigned char *str; | 343 | struct charset *charset; |
| 284 | int len, *actual_len; | 344 | Lisp_Object mapfile; |
| 285 | { | 345 | { |
| 286 | int c, bytes, charset, c1, c2; | 346 | int fd; |
| 347 | FILE *fp; | ||
| 348 | int num; | ||
| 349 | unsigned *numbers_table[256]; | ||
| 350 | int numbers_table_used; | ||
| 351 | unsigned *numbers; | ||
| 352 | int eof; | ||
| 353 | Lisp_Object suffixes; | ||
| 354 | Lisp_Object vec; | ||
| 355 | int i; | ||
| 287 | 356 | ||
| 288 | SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2); | 357 | suffixes = Fcons (build_string (".map"), |
| 289 | c = MAKE_CHAR (charset, c1, c2); | 358 | Fcons (build_string (".TXT"), Qnil)); |
| 290 | if (actual_len) | ||
| 291 | *actual_len = bytes; | ||
| 292 | return c; | ||
| 293 | } | ||
| 294 | 359 | ||
| 295 | /* Return the length of the multi-byte form at string STR of length LEN. | 360 | fd = openp (Fcons (Vcharset_map_directory, Qnil), mapfile, suffixes, |
| 296 | Use the macro MULTIBYTE_FORM_LENGTH instead. */ | 361 | NULL, 0); |
| 297 | int | 362 | if (fd < 0 |
| 298 | multibyte_form_length (str, len) | 363 | || ! (fp = fdopen (fd, "r"))) |
| 299 | const unsigned char *str; | 364 | { |
| 300 | int len; | 365 | add_to_log ("Failure in loading charset map: %S", mapfile, Qnil); |
| 301 | { | 366 | return Qnil; |
| 302 | int bytes; | 367 | } |
| 303 | 368 | ||
| 304 | PARSE_MULTIBYTE_SEQ (str, len, bytes); | 369 | numbers_table_used = 0; |
| 305 | return bytes; | 370 | num = 0; |
| 306 | } | 371 | eof = 0; |
| 372 | while (1) | ||
| 373 | { | ||
| 374 | unsigned n = read_hex (fp, &eof); | ||
| 307 | 375 | ||
| 308 | /* Check multibyte form at string STR of length LEN and set variables | 376 | if (eof) |
| 309 | pointed by CHARSET, C1, and C2 to charset and position codes of the | 377 | break; |
| 310 | character at STR, and return 0. If there's no multibyte character, | 378 | if ((num % 0x10000) == 0) |
| 311 | return -1. This should be used only in the macro SPLIT_STRING | 379 | { |
| 312 | which checks range of STR in advance. */ | 380 | if (numbers_table_used == 256) |
| 381 | break; | ||
| 382 | numbers = (unsigned *) alloca (sizeof (unsigned) * 0x10000); | ||
| 383 | numbers_table[numbers_table_used++] = numbers; | ||
| 384 | } | ||
| 385 | *numbers++ = n; | ||
| 386 | num++; | ||
| 387 | } | ||
| 388 | fclose (fp); | ||
| 389 | close (fd); | ||
| 313 | 390 | ||
| 314 | int | 391 | vec = Fmake_vector (make_number (num), Qnil); |
| 315 | split_string (str, len, charset, c1, c2) | 392 | for (i = 0; i < num; i++, numbers++) |
| 316 | const unsigned char *str; | 393 | { |
| 317 | unsigned char *c1, *c2; | 394 | if ((i % 0x10000) == 0) |
| 318 | int len, *charset; | 395 | numbers = numbers_table[i / 0x10000]; |
| 319 | { | 396 | ASET (vec, i, make_number (*numbers)); |
| 320 | register int bytes, cs, code1, code2 = -1; | 397 | } |
| 321 | 398 | ||
| 322 | SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2); | 399 | charset_map_loaded = 1; |
| 323 | if (cs == CHARSET_ASCII) | 400 | |
| 324 | return -1; | 401 | return vec; |
| 325 | *charset = cs; | ||
| 326 | *c1 = code1; | ||
| 327 | *c2 = code2; | ||
| 328 | return 0; | ||
| 329 | } | 402 | } |
| 330 | 403 | ||
| 331 | /* Return 1 iff character C has valid printable glyph. | 404 | static void |
| 332 | Use the macro CHAR_PRINTABLE_P instead. */ | 405 | load_charset (charset) |
| 333 | int | 406 | struct charset *charset; |
| 334 | char_printable_p (c) | ||
| 335 | int c; | ||
| 336 | { | 407 | { |
| 337 | int charset, c1, c2; | 408 | if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED) |
| 409 | { | ||
| 410 | Lisp_Object map; | ||
| 338 | 411 | ||
| 339 | if (ASCII_BYTE_P (c)) | 412 | map = CHARSET_MAP (charset); |
| 340 | return 1; | 413 | if (STRINGP (map)) |
| 341 | else if (SINGLE_BYTE_CHAR_P (c)) | 414 | map = load_charset_map (charset, map); |
| 342 | return 0; | 415 | parse_charset_map (charset, map, 1); |
| 343 | else if (c >= MAX_CHAR) | 416 | CHARSET_METHOD (charset) = CHARSET_METHOD_MAP; |
| 344 | return 0; | 417 | } |
| 345 | |||
| 346 | SPLIT_CHAR (c, charset, c1, c2); | ||
| 347 | if (! CHARSET_DEFINED_P (charset)) | ||
| 348 | return 0; | ||
| 349 | if (CHARSET_CHARS (charset) == 94 | ||
| 350 | ? c1 <= 32 || c1 >= 127 | ||
| 351 | : c1 < 32) | ||
| 352 | return 0; | ||
| 353 | if (CHARSET_DIMENSION (charset) == 2 | ||
| 354 | && (CHARSET_CHARS (charset) == 94 | ||
| 355 | ? c2 <= 32 || c2 >= 127 | ||
| 356 | : c2 < 32)) | ||
| 357 | return 0; | ||
| 358 | return 1; | ||
| 359 | } | 418 | } |
| 360 | 419 | ||
| 361 | /* Translate character C by translation table TABLE. If C | 420 | |
| 362 | is negative, translate a character specified by CHARSET, C1, and C2 | 421 | DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0, |
| 363 | (C1 and C2 are code points of the character). If no translation is | 422 | doc: /* Return non-nil if and only if OBJECT is a charset.*/) |
| 364 | found in TABLE, return C. */ | 423 | (object) |
| 365 | int | 424 | Lisp_Object object; |
| 366 | translate_char (table, c, charset, c1, c2) | ||
| 367 | Lisp_Object table; | ||
| 368 | int c, charset, c1, c2; | ||
| 369 | { | 425 | { |
| 370 | Lisp_Object ch; | 426 | return (CHARSETP (object) ? Qt : Qnil); |
| 371 | int alt_charset, alt_c1, alt_c2, dimension; | ||
| 372 | |||
| 373 | if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F)); | ||
| 374 | if (!CHAR_TABLE_P (table) | ||
| 375 | || (ch = Faref (table, make_number (c)), !NATNUMP (ch))) | ||
| 376 | return c; | ||
| 377 | |||
| 378 | SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2); | ||
| 379 | dimension = CHARSET_DIMENSION (alt_charset); | ||
| 380 | if ((dimension == 1 && alt_c1 > 0) || (dimension == 2 && alt_c2 > 0)) | ||
| 381 | /* CH is not a generic character, just return it. */ | ||
| 382 | return XFASTINT (ch); | ||
| 383 | |||
| 384 | /* Since CH is a generic character, we must return a specific | ||
| 385 | charater which has the same position codes as C from CH. */ | ||
| 386 | if (charset < 0) | ||
| 387 | SPLIT_CHAR (c, charset, c1, c2); | ||
| 388 | if (dimension != CHARSET_DIMENSION (charset)) | ||
| 389 | /* We can't make such a character because of dimension mismatch. */ | ||
| 390 | return c; | ||
| 391 | return MAKE_CHAR (alt_charset, c1, c2); | ||
| 392 | } | 427 | } |
| 393 | 428 | ||
| 394 | /* Convert the unibyte character C to multibyte based on | ||
| 395 | Vnonascii_translation_table or nonascii_insert_offset. If they can't | ||
| 396 | convert C to a valid multibyte character, convert it based on | ||
| 397 | DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character. */ | ||
| 398 | 429 | ||
| 399 | int | 430 | void |
| 400 | unibyte_char_to_multibyte (c) | 431 | map_charset_chars (c_function, function, charset_symbol, arg) |
| 401 | int c; | 432 | void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object); |
| 433 | Lisp_Object function, charset_symbol, arg; | ||
| 402 | { | 434 | { |
| 403 | if (c < 0400 && c >= 0200) | 435 | int id; |
| 404 | { | 436 | struct charset *charset; |
| 405 | int c_save = c; | 437 | Lisp_Object range; |
| 438 | |||
| 439 | CHECK_CHARSET_GET_ID (charset_symbol, id); | ||
| 440 | charset = CHARSET_FROM_ID (id); | ||
| 441 | |||
| 442 | if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED) | ||
| 443 | load_charset (charset); | ||
| 406 | 444 | ||
| 407 | if (! NILP (Vnonascii_translation_table)) | 445 | if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET) |
| 446 | { | ||
| 447 | range = Fcons (make_number (CHARSET_MIN_CHAR (charset)), | ||
| 448 | make_number (CHARSET_MAX_CHAR (charset))); | ||
| 449 | if (NILP (function)) | ||
| 450 | (*c_function) (arg, range, Qnil); | ||
| 451 | else | ||
| 452 | call2 (function, range, arg); | ||
| 453 | } | ||
| 454 | else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP) | ||
| 455 | { | ||
| 456 | if (! CHAR_TABLE_P (CHARSET_ENCODER (charset))) | ||
| 457 | return; | ||
| 458 | if (CHARSET_ASCII_COMPATIBLE_P (charset)) | ||
| 408 | { | 459 | { |
| 409 | c = XINT (Faref (Vnonascii_translation_table, make_number (c))); | 460 | range = Fcons (make_number (0), make_number (127)); |
| 410 | if (c >= 0400 && ! char_valid_p (c, 0)) | 461 | if (NILP (function)) |
| 411 | c = c_save + DEFAULT_NONASCII_INSERT_OFFSET; | 462 | (*c_function) (arg, range, Qnil); |
| 463 | else | ||
| 464 | call2 (function, range, arg); | ||
| 412 | } | 465 | } |
| 413 | else if (c >= 0240 && nonascii_insert_offset > 0) | 466 | map_char_table (c_function, function, CHARSET_ENCODER (charset), arg, |
| 467 | 0, NULL); | ||
| 468 | } | ||
| 469 | else /* i.e. CHARSET_METHOD_PARENT */ | ||
| 470 | { | ||
| 471 | int from, to, c; | ||
| 472 | unsigned code; | ||
| 473 | int i, j, k, l; | ||
| 474 | int *code_space = CHARSET_CODE_SPACE (charset); | ||
| 475 | Lisp_Object val; | ||
| 476 | |||
| 477 | range = Fcons (Qnil, Qnil); | ||
| 478 | from = to = -2; | ||
| 479 | for (i = code_space[12]; i <= code_space[13]; i++) | ||
| 480 | for (j = code_space[8]; j <= code_space[9]; j++) | ||
| 481 | for (k = code_space[4]; k <= code_space[5]; k++) | ||
| 482 | for (l = code_space[0]; l <= code_space[1]; l++) | ||
| 483 | { | ||
| 484 | code = (i << 24) | (j << 16) | (k << 8) | l; | ||
| 485 | c = DECODE_CHAR (charset, code); | ||
| 486 | if (c == to + 1) | ||
| 487 | { | ||
| 488 | to++; | ||
| 489 | continue; | ||
| 490 | } | ||
| 491 | if (from >= 0) | ||
| 492 | { | ||
| 493 | if (from < to) | ||
| 494 | { | ||
| 495 | XSETCAR (range, make_number (from)); | ||
| 496 | XSETCDR (range, make_number (to)); | ||
| 497 | val = range; | ||
| 498 | } | ||
| 499 | else | ||
| 500 | val = make_number (from); | ||
| 501 | if (NILP (function)) | ||
| 502 | (*c_function) (arg, val, Qnil); | ||
| 503 | else | ||
| 504 | call2 (function, val, arg); | ||
| 505 | } | ||
| 506 | from = to = (c < 0 ? -2 : c); | ||
| 507 | } | ||
| 508 | if (from >= 0) | ||
| 414 | { | 509 | { |
| 415 | c += nonascii_insert_offset; | 510 | if (from < to) |
| 416 | if (c < 0400 || ! char_valid_p (c, 0)) | 511 | { |
| 417 | c = c_save + DEFAULT_NONASCII_INSERT_OFFSET; | 512 | XSETCAR (range, make_number (from)); |
| 513 | XSETCDR (range, make_number (to)); | ||
| 514 | val = range; | ||
| 515 | } | ||
| 516 | else | ||
| 517 | val = make_number (from); | ||
| 518 | if (NILP (function)) | ||
| 519 | (*c_function) (arg, val, Qnil); | ||
| 520 | else | ||
| 521 | call2 (function, val, arg); | ||
| 418 | } | 522 | } |
| 419 | else if (c >= 0240) | ||
| 420 | c = c_save + DEFAULT_NONASCII_INSERT_OFFSET; | ||
| 421 | } | 523 | } |
| 422 | return c; | 524 | } |
| 525 | |||
| 526 | DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 3, 0, | ||
| 527 | doc: /* Call FUNCTION for each characters in CHARSET. | ||
| 528 | FUNCTION is called with three arguments; FROM, TO, and the 3rd optional | ||
| 529 | argument ARG. | ||
| 530 | FROM and TO indicates a range of character sequence that are contained | ||
| 531 | in CHARSET. */) | ||
| 532 | (function, charset, arg) | ||
| 533 | Lisp_Object function, charset, arg; | ||
| 534 | { | ||
| 535 | map_charset_chars (NULL, function, charset, arg); | ||
| 536 | return Qnil; | ||
| 423 | } | 537 | } |
| 424 | 538 | ||
| 425 | 539 | ||
| 426 | /* Convert the multibyte character C to unibyte 8-bit character based | 540 | /* Define a charset according to the arguments. The Nth argument is |
| 427 | on Vnonascii_translation_table or nonascii_insert_offset. If | 541 | the Nth attribute of the charset (the last attribute `charset-id' |
| 428 | REV_TBL is non-nil, it should be a reverse table of | 542 | is not included). See the docstring of `define-charset' for the |
| 429 | Vnonascii_translation_table, i.e. what given by: | 543 | detail. */ |
| 430 | Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0)) */ | ||
| 431 | 544 | ||
| 432 | int | 545 | DEFUN ("define-charset-internal", Fdefine_charset_internal, |
| 433 | multibyte_char_to_unibyte (c, rev_tbl) | 546 | Sdefine_charset_internal, charset_arg_max, MANY, 0, |
| 434 | int c; | 547 | doc: /* For internal use only. */) |
| 435 | Lisp_Object rev_tbl; | 548 | (nargs, args) |
| 549 | int nargs; | ||
| 550 | Lisp_Object *args; | ||
| 436 | { | 551 | { |
| 437 | if (!SINGLE_BYTE_CHAR_P (c)) | 552 | /* Charset attr vector. */ |
| 553 | Lisp_Object attrs; | ||
| 554 | Lisp_Object val; | ||
| 555 | unsigned hash_code; | ||
| 556 | struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table); | ||
| 557 | int i; | ||
| 558 | struct charset charset; | ||
| 559 | int id; | ||
| 560 | int dimension; | ||
| 561 | int new_definition_p; | ||
| 562 | int nchars; | ||
| 563 | |||
| 564 | if (nargs != charset_arg_max) | ||
| 565 | return Fsignal (Qwrong_number_of_arguments, | ||
| 566 | Fcons (intern ("define-charset-internal"), | ||
| 567 | make_number (nargs))); | ||
| 568 | |||
| 569 | attrs = Fmake_vector (make_number (charset_attr_max), Qnil); | ||
| 570 | |||
| 571 | CHECK_SYMBOL (args[charset_arg_name]); | ||
| 572 | ASET (attrs, charset_name, args[charset_arg_name]); | ||
| 573 | |||
| 574 | val = args[charset_arg_code_space]; | ||
| 575 | for (i = 0, dimension = 0, nchars = 1; i < 4; i++) | ||
| 438 | { | 576 | { |
| 439 | int c_save = c; | 577 | int min_byte, max_byte; |
| 578 | |||
| 579 | min_byte = XINT (Faref (val, make_number (i * 2))); | ||
| 580 | max_byte = XINT (Faref (val, make_number (i * 2 + 1))); | ||
| 581 | if (min_byte < 0 || min_byte > max_byte || max_byte >= 256) | ||
| 582 | error ("Invalid :code-space value"); | ||
| 583 | charset.code_space[i * 4] = min_byte; | ||
| 584 | charset.code_space[i * 4 + 1] = max_byte; | ||
| 585 | charset.code_space[i * 4 + 2] = max_byte - min_byte + 1; | ||
| 586 | nchars *= charset.code_space[i * 4 + 2]; | ||
| 587 | charset.code_space[i * 4 + 3] = nchars; | ||
| 588 | if (max_byte > 0) | ||
| 589 | dimension = i + 1; | ||
| 590 | } | ||
| 440 | 591 | ||
| 441 | if (! CHAR_TABLE_P (rev_tbl) | 592 | val = args[charset_arg_dimension]; |
| 442 | && CHAR_TABLE_P (Vnonascii_translation_table)) | 593 | if (NILP (val)) |
| 443 | rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table, | 594 | charset.dimension = dimension; |
| 444 | make_number (0)); | 595 | else |
| 445 | if (CHAR_TABLE_P (rev_tbl)) | 596 | { |
| 446 | { | 597 | CHECK_NATNUM (val); |
| 447 | Lisp_Object temp; | 598 | charset.dimension = XINT (val); |
| 448 | temp = Faref (rev_tbl, make_number (c)); | 599 | if (charset.dimension < 1 || charset.dimension > 4) |
| 449 | if (INTEGERP (temp)) | 600 | args_out_of_range_3 (val, make_number (1), make_number (4)); |
| 450 | c = XINT (temp); | 601 | } |
| 451 | if (c >= 256) | 602 | |
| 452 | c = (c_save & 0177) + 0200; | 603 | charset.code_linear_p |
| 453 | } | 604 | = (charset.dimension == 1 |
| 605 | || (charset.code_space[2] == 256 | ||
| 606 | && (charset.dimension == 2 | ||
| 607 | || (charset.code_space[6] == 256 | ||
| 608 | && (charset.dimension == 3 | ||
| 609 | || charset.code_space[10] == 256))))); | ||
| 610 | |||
| 611 | charset.iso_chars_96 = charset.code_space[2] == 96; | ||
| 612 | |||
| 613 | charset.min_code = (charset.code_space[0] | ||
| 614 | | (charset.code_space[4] << 8) | ||
| 615 | | (charset.code_space[8] << 16) | ||
| 616 | | (charset.code_space[12] << 24)); | ||
| 617 | charset.max_code = (charset.code_space[1] | ||
| 618 | | (charset.code_space[5] << 8) | ||
| 619 | | (charset.code_space[9] << 16) | ||
| 620 | | (charset.code_space[13] << 24)); | ||
| 621 | |||
| 622 | val = args[charset_arg_invalid_code]; | ||
| 623 | if (NILP (val)) | ||
| 624 | { | ||
| 625 | if (charset.min_code > 0) | ||
| 626 | charset.invalid_code = 0; | ||
| 454 | else | 627 | else |
| 455 | { | 628 | { |
| 456 | if (nonascii_insert_offset > 0) | 629 | XSETINT (val, charset.max_code + 1); |
| 457 | c -= nonascii_insert_offset; | 630 | if (XINT (val) == charset.max_code + 1) |
| 458 | if (c < 128 || c >= 256) | 631 | charset.invalid_code = charset.max_code + 1; |
| 459 | c = (c_save & 0177) + 0200; | 632 | else |
| 633 | error ("Attribute :invalid-code must be specified"); | ||
| 460 | } | 634 | } |
| 461 | } | 635 | } |
| 636 | else | ||
| 637 | { | ||
| 638 | CHECK_NATNUM (val); | ||
| 639 | charset.invalid_code = XFASTINT (val); | ||
| 640 | } | ||
| 462 | 641 | ||
| 463 | return c; | 642 | val = args[charset_arg_iso_final]; |
| 464 | } | 643 | if (NILP (val)) |
| 465 | 644 | charset.iso_final = -1; | |
| 466 | 645 | else | |
| 467 | /* Update the table Vcharset_table with the given arguments (see the | 646 | { |
| 468 | document of `define-charset' for the meaning of each argument). | 647 | CHECK_NUMBER (val); |
| 469 | Several other table contents are also updated. The caller should | 648 | if (XINT (val) < '0' || XINT (val) > 127) |
| 470 | check the validity of CHARSET-ID and the remaining arguments in | 649 | error ("Invalid iso-final-char: %d", XINT (val)); |
| 471 | advance. */ | 650 | charset.iso_final = XINT (val); |
| 472 | 651 | } | |
| 473 | void | 652 | |
| 474 | update_charset_table (charset_id, dimension, chars, width, direction, | 653 | val = args[charset_arg_iso_revision]; |
| 475 | iso_final_char, iso_graphic_plane, | 654 | if (NILP (val)) |
| 476 | short_name, long_name, description) | 655 | charset.iso_revision = -1; |
| 477 | Lisp_Object charset_id, dimension, chars, width, direction; | 656 | else |
| 478 | Lisp_Object iso_final_char, iso_graphic_plane; | ||
| 479 | Lisp_Object short_name, long_name, description; | ||
| 480 | { | ||
| 481 | int charset = XINT (charset_id); | ||
| 482 | int bytes; | ||
| 483 | unsigned char leading_code_base, leading_code_ext; | ||
| 484 | |||
| 485 | if (NILP (CHARSET_TABLE_ENTRY (charset))) | ||
| 486 | CHARSET_TABLE_ENTRY (charset) | ||
| 487 | = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil); | ||
| 488 | |||
| 489 | if (NILP (long_name)) | ||
| 490 | long_name = short_name; | ||
| 491 | if (NILP (description)) | ||
| 492 | description = long_name; | ||
| 493 | |||
| 494 | /* Get byte length of multibyte form, base leading-code, and | ||
| 495 | extended leading-code of the charset. See the comment under the | ||
| 496 | title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */ | ||
| 497 | bytes = XINT (dimension); | ||
| 498 | if (charset < MIN_CHARSET_PRIVATE_DIMENSION1) | ||
| 499 | { | 657 | { |
| 500 | /* Official charset, it doesn't have an extended leading-code. */ | 658 | CHECK_NUMBER (val); |
| 501 | if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC) | 659 | if (XINT (val) > 63) |
| 502 | bytes += 1; /* For a base leading-code. */ | 660 | args_out_of_range (make_number (63), val); |
| 503 | leading_code_base = charset; | 661 | charset.iso_revision = XINT (val); |
| 504 | leading_code_ext = 0; | ||
| 505 | } | 662 | } |
| 663 | |||
| 664 | val = args[charset_arg_emacs_mule_id]; | ||
| 665 | if (NILP (val)) | ||
| 666 | charset.emacs_mule_id = -1; | ||
| 506 | else | 667 | else |
| 507 | { | 668 | { |
| 508 | /* Private charset. */ | 669 | CHECK_NATNUM (val); |
| 509 | bytes += 2; /* For base and extended leading-codes. */ | 670 | if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256) |
| 510 | leading_code_base | 671 | error ("Invalid emacs-mule-id: %d", XINT (val)); |
| 511 | = (charset < LEADING_CODE_EXT_12 | 672 | charset.emacs_mule_id = XINT (val); |
| 512 | ? LEADING_CODE_PRIVATE_11 | ||
| 513 | : (charset < LEADING_CODE_EXT_21 | ||
| 514 | ? LEADING_CODE_PRIVATE_12 | ||
| 515 | : (charset < LEADING_CODE_EXT_22 | ||
| 516 | ? LEADING_CODE_PRIVATE_21 | ||
| 517 | : LEADING_CODE_PRIVATE_22))); | ||
| 518 | leading_code_ext = charset; | ||
| 519 | if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes) | ||
| 520 | error ("Invalid dimension for the charset-ID %d", charset); | ||
| 521 | } | 673 | } |
| 522 | 674 | ||
| 523 | CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id; | 675 | charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]); |
| 524 | CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes); | ||
| 525 | CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension; | ||
| 526 | CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars; | ||
| 527 | CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width; | ||
| 528 | CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction; | ||
| 529 | CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX) | ||
| 530 | = make_number (leading_code_base); | ||
| 531 | CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX) | ||
| 532 | = make_number (leading_code_ext); | ||
| 533 | CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char; | ||
| 534 | CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX) | ||
| 535 | = iso_graphic_plane; | ||
| 536 | CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name; | ||
| 537 | CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name; | ||
| 538 | CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description; | ||
| 539 | CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil; | ||
| 540 | 676 | ||
| 541 | { | 677 | charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]); |
| 542 | /* If we have already defined a charset which has the same | 678 | |
| 543 | DIMENSION, CHARS and ISO-FINAL-CHAR but the different | 679 | charset.unified_p = 0; |
| 544 | DIRECTION, we must update the entry REVERSE-CHARSET of both | 680 | |
| 545 | charsets. If there's no such charset, the value of the entry | 681 | bzero (charset.fast_map, sizeof (charset.fast_map)); |
| 546 | is set to nil. */ | 682 | |
| 547 | int i; | 683 | if (! NILP (args[charset_arg_code_offset])) |
| 548 | 684 | { | |
| 549 | for (i = 0; i <= MAX_CHARSET; i++) | 685 | val = args[charset_arg_code_offset]; |
| 550 | if (!NILP (CHARSET_TABLE_ENTRY (i))) | 686 | CHECK_NUMBER (val); |
| 687 | |||
| 688 | charset.method = CHARSET_METHOD_OFFSET; | ||
| 689 | charset.code_offset = XINT (val); | ||
| 690 | |||
| 691 | i = CODE_POINT_TO_INDEX (&charset, charset.min_code); | ||
| 692 | charset.min_char = i + charset.code_offset; | ||
| 693 | i = CODE_POINT_TO_INDEX (&charset, charset.max_code); | ||
| 694 | charset.max_char = i + charset.code_offset; | ||
| 695 | if (charset.max_char > MAX_CHAR) | ||
| 696 | error ("Unsupported max char: %d", charset.max_char); | ||
| 697 | |||
| 698 | for (i = charset.min_char; i < 0x10000 && i <= charset.max_char; | ||
| 699 | i += 128) | ||
| 700 | CHARSET_FAST_MAP_SET (i, charset.fast_map); | ||
| 701 | for (; i <= charset.max_char; i += 0x1000) | ||
| 702 | CHARSET_FAST_MAP_SET (i, charset.fast_map); | ||
| 703 | } | ||
| 704 | else if (! NILP (args[charset_arg_map])) | ||
| 705 | { | ||
| 706 | val = args[charset_arg_map]; | ||
| 707 | ASET (attrs, charset_map, val); | ||
| 708 | if (STRINGP (val)) | ||
| 709 | val = load_charset_map (&charset, val); | ||
| 710 | CHECK_VECTOR (val); | ||
| 711 | parse_charset_map (&charset, val, 0); | ||
| 712 | charset.method = CHARSET_METHOD_MAP_DEFERRED; | ||
| 713 | } | ||
| 714 | else if (! NILP (args[charset_arg_parents])) | ||
| 715 | { | ||
| 716 | val = args[charset_arg_parents]; | ||
| 717 | CHECK_LIST (val); | ||
| 718 | charset.method = CHARSET_METHOD_INHERIT; | ||
| 719 | val = Fcopy_sequence (val); | ||
| 720 | ASET (attrs, charset_parents, val); | ||
| 721 | |||
| 722 | charset.min_char = MAX_CHAR; | ||
| 723 | charset.max_char = 0; | ||
| 724 | for (; ! NILP (val); val = Fcdr (val)) | ||
| 551 | { | 725 | { |
| 552 | if (CHARSET_DIMENSION (i) == XINT (dimension) | 726 | Lisp_Object elt, car_part, cdr_part; |
| 553 | && CHARSET_CHARS (i) == XINT (chars) | 727 | int this_id, offset; |
| 554 | && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char) | 728 | struct charset *this_charset; |
| 555 | && CHARSET_DIRECTION (i) != XINT (direction)) | 729 | |
| 730 | elt = Fcar (val); | ||
| 731 | if (CONSP (elt)) | ||
| 556 | { | 732 | { |
| 557 | CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX) | 733 | car_part = XCAR (elt); |
| 558 | = make_number (i); | 734 | cdr_part = XCDR (elt); |
| 559 | CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id; | 735 | CHECK_CHARSET_GET_ID (car_part, this_id); |
| 560 | break; | 736 | CHECK_NUMBER (cdr_part); |
| 737 | offset = XINT (cdr_part); | ||
| 561 | } | 738 | } |
| 739 | else | ||
| 740 | { | ||
| 741 | CHECK_CHARSET_GET_ID (elt, this_id); | ||
| 742 | offset = 0; | ||
| 743 | } | ||
| 744 | XSETCAR (val, Fcons (make_number (this_id), make_number (offset))); | ||
| 745 | |||
| 746 | this_charset = CHARSET_FROM_ID (this_id); | ||
| 747 | if (charset.min_char > this_charset->min_char) | ||
| 748 | charset.min_char = this_charset->min_char; | ||
| 749 | if (charset.max_char < this_charset->max_char) | ||
| 750 | charset.max_char = this_charset->max_char; | ||
| 751 | for (i = 0; i < 190; i++) | ||
| 752 | charset.fast_map[i] |= this_charset->fast_map[i]; | ||
| 562 | } | 753 | } |
| 563 | if (i > MAX_CHARSET) | ||
| 564 | /* No such a charset. */ | ||
| 565 | CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX) | ||
| 566 | = make_number (-1); | ||
| 567 | } | ||
| 568 | |||
| 569 | if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC | ||
| 570 | && charset < MIN_CHARSET_PRIVATE_DIMENSION1) | ||
| 571 | { | ||
| 572 | bytes_by_char_head[leading_code_base] = bytes; | ||
| 573 | width_by_char_head[leading_code_base] = XINT (width); | ||
| 574 | |||
| 575 | /* Update table emacs_code_class. */ | ||
| 576 | emacs_code_class[charset] = (bytes == 2 | ||
| 577 | ? EMACS_leading_code_2 | ||
| 578 | : (bytes == 3 | ||
| 579 | ? EMACS_leading_code_3 | ||
| 580 | : EMACS_leading_code_4)); | ||
| 581 | } | 754 | } |
| 755 | else | ||
| 756 | error ("None of :code-offset, :map, :parents are specified"); | ||
| 582 | 757 | ||
| 583 | /* Update table iso_charset_table. */ | 758 | val = args[charset_arg_unify_map]; |
| 584 | if (XINT (iso_final_char) >= 0 | 759 | if (! NILP (val) && !STRINGP (val)) |
| 585 | && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0) | 760 | CHECK_VECTOR (val); |
| 586 | ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset; | 761 | ASET (attrs, charset_unify_map, val); |
| 587 | } | ||
| 588 | 762 | ||
| 589 | #ifdef emacs | 763 | CHECK_LIST (args[charset_arg_plist]); |
| 764 | ASET (attrs, charset_plist, args[charset_arg_plist]); | ||
| 590 | 765 | ||
| 591 | /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL | 766 | charset.hash_index = hash_lookup (hash_table, args[charset_arg_name], |
| 592 | is invalid. */ | 767 | &hash_code); |
| 593 | int | 768 | if (charset.hash_index >= 0) |
| 594 | get_charset_id (charset_symbol) | 769 | { |
| 595 | Lisp_Object charset_symbol; | 770 | new_definition_p = 0; |
| 596 | { | 771 | HASH_VALUE (hash_table, charset.hash_index) = attrs; |
| 597 | Lisp_Object val; | 772 | } |
| 598 | int charset; | ||
| 599 | |||
| 600 | /* This originally used a ?: operator, but reportedly the HP-UX | ||
| 601 | compiler version HP92453-01 A.10.32.22 miscompiles that. */ | ||
| 602 | if (SYMBOLP (charset_symbol) | ||
| 603 | && VECTORP (val = Fget (charset_symbol, Qcharset)) | ||
| 604 | && CHARSET_VALID_P (charset = | ||
| 605 | XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]))) | ||
| 606 | return charset; | ||
| 607 | else | 773 | else |
| 608 | return -1; | 774 | { |
| 609 | } | 775 | charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs, |
| 776 | hash_code); | ||
| 777 | if (charset_table_used == charset_table_size) | ||
| 778 | { | ||
| 779 | charset_table_size += 256; | ||
| 780 | charset_table | ||
| 781 | = ((struct charset *) | ||
| 782 | xrealloc (charset_table, | ||
| 783 | sizeof (struct charset) * charset_table_size)); | ||
| 784 | } | ||
| 785 | id = charset_table_used++; | ||
| 786 | ASET (attrs, charset_id, make_number (id)); | ||
| 787 | new_definition_p = 1; | ||
| 788 | } | ||
| 610 | 789 | ||
| 611 | /* Return an identification number for a new private charset of | ||
| 612 | DIMENSION and WIDTH. If there's no more room for the new charset, | ||
| 613 | return 0. */ | ||
| 614 | Lisp_Object | ||
| 615 | get_new_private_charset_id (dimension, width) | ||
| 616 | int dimension, width; | ||
| 617 | { | ||
| 618 | int charset, from, to; | ||
| 619 | 790 | ||
| 620 | if (dimension == 1) | 791 | charset.id = id; |
| 792 | charset_table[id] = charset; | ||
| 793 | |||
| 794 | if (charset.iso_final >= 0) | ||
| 621 | { | 795 | { |
| 622 | from = LEADING_CODE_EXT_11; | 796 | ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96, |
| 623 | to = LEADING_CODE_EXT_21; | 797 | charset.iso_final) = id; |
| 798 | if (new_definition_p) | ||
| 799 | Viso_2022_charset_list = nconc2 (Viso_2022_charset_list, | ||
| 800 | Fcons (make_number (id), Qnil)); | ||
| 624 | } | 801 | } |
| 625 | else | 802 | |
| 803 | if (charset.emacs_mule_id >= 0) | ||
| 626 | { | 804 | { |
| 627 | from = LEADING_CODE_EXT_21; | 805 | emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id); |
| 628 | to = LEADING_CODE_EXT_MAX + 1; | 806 | if (new_definition_p) |
| 807 | Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list, | ||
| 808 | Fcons (make_number (id), Qnil)); | ||
| 629 | } | 809 | } |
| 630 | 810 | ||
| 631 | for (charset = from; charset < to; charset++) | 811 | if (new_definition_p) |
| 632 | if (!CHARSET_DEFINED_P (charset)) break; | 812 | { |
| 813 | Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list); | ||
| 814 | Vcharset_ordered_list = nconc2 (Vcharset_ordered_list, | ||
| 815 | Fcons (make_number (id), Qnil)); | ||
| 816 | } | ||
| 633 | 817 | ||
| 634 | return make_number (charset < to ? charset : 0); | 818 | return Qnil; |
| 635 | } | 819 | } |
| 636 | 820 | ||
| 637 | DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0, | 821 | |
| 638 | doc: /* Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR. | 822 | DEFUN ("define-charset-alias", Fdefine_charset_alias, |
| 639 | If CHARSET-ID is nil, it is decided automatically, which means CHARSET is | 823 | Sdefine_charset_alias, 2, 2, 0, |
| 640 | treated as a private charset. | 824 | doc: /* Define ALIAS as an alias for charset CHARSET. */) |
| 641 | INFO-VECTOR is a vector of the format: | 825 | (alias, charset) |
| 642 | [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE | 826 | Lisp_Object alias, charset; |
| 643 | SHORT-NAME LONG-NAME DESCRIPTION] | ||
| 644 | The meanings of each elements is as follows: | ||
| 645 | DIMENSION (integer) is the number of bytes to represent a character: 1 or 2. | ||
| 646 | CHARS (integer) is the number of characters in a dimension: 94 or 96. | ||
| 647 | WIDTH (integer) is the number of columns a character in the charset | ||
| 648 | occupies on the screen: one of 0, 1, and 2. | ||
| 649 | |||
| 650 | DIRECTION (integer) is the rendering direction of characters in the | ||
| 651 | charset when rendering. If 0, render from left to right, else | ||
| 652 | render from right to left. | ||
| 653 | |||
| 654 | ISO-FINAL-CHAR (character) is the final character of the | ||
| 655 | corresponding ISO 2022 charset. | ||
| 656 | It may be -1 if the charset is internal use only. | ||
| 657 | |||
| 658 | ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked | ||
| 659 | while encoding to variants of ISO 2022 coding system, one of the | ||
| 660 | following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR). | ||
| 661 | It may be -1 if the charset is internal use only. | ||
| 662 | |||
| 663 | SHORT-NAME (string) is the short name to refer to the charset. | ||
| 664 | |||
| 665 | LONG-NAME (string) is the long name to refer to the charset. | ||
| 666 | |||
| 667 | DESCRIPTION (string) is the description string of the charset. */) | ||
| 668 | (charset_id, charset_symbol, info_vector) | ||
| 669 | Lisp_Object charset_id, charset_symbol, info_vector; | ||
| 670 | { | 827 | { |
| 671 | Lisp_Object *vec; | 828 | Lisp_Object attr; |
| 672 | 829 | ||
| 673 | if (!NILP (charset_id)) | 830 | CHECK_CHARSET_GET_ATTR (charset, attr); |
| 674 | CHECK_NUMBER (charset_id); | 831 | Fputhash (alias, attr, Vcharset_hash_table); |
| 675 | CHECK_SYMBOL (charset_symbol); | 832 | return Qnil; |
| 676 | CHECK_VECTOR (info_vector); | 833 | } |
| 677 | 834 | ||
| 678 | if (! NILP (charset_id)) | ||
| 679 | { | ||
| 680 | if (! CHARSET_VALID_P (XINT (charset_id))) | ||
| 681 | error ("Invalid CHARSET: %d", XINT (charset_id)); | ||
| 682 | else if (CHARSET_DEFINED_P (XINT (charset_id))) | ||
| 683 | error ("Already defined charset: %d", XINT (charset_id)); | ||
| 684 | } | ||
| 685 | 835 | ||
| 686 | vec = XVECTOR (info_vector)->contents; | 836 | DEFUN ("primary-charset", Fprimary_charset, Sprimary_charset, 0, 0, 0, |
| 687 | if (XVECTOR (info_vector)->size != 9 | 837 | doc: /* Return the primary charset. */) |
| 688 | || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2) | 838 | () |
| 689 | || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96) | 839 | { |
| 690 | || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2) | 840 | return CHARSET_NAME (CHARSET_FROM_ID (charset_primary)); |
| 691 | || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1) | 841 | } |
| 692 | || !INTEGERP (vec[4]) | 842 | |
| 693 | || !(XINT (vec[4]) == -1 || (XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')) | ||
| 694 | || !INTEGERP (vec[5]) | ||
| 695 | || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1) | ||
| 696 | || !STRINGP (vec[6]) | ||
| 697 | || !STRINGP (vec[7]) | ||
| 698 | || !STRINGP (vec[8])) | ||
| 699 | error ("Invalid info-vector argument for defining charset %s", | ||
| 700 | XSYMBOL (charset_symbol)->name->data); | ||
| 701 | |||
| 702 | if (NILP (charset_id)) | ||
| 703 | { | ||
| 704 | charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2])); | ||
| 705 | if (XINT (charset_id) == 0) | ||
| 706 | error ("There's no room for a new private charset %s", | ||
| 707 | XSYMBOL (charset_symbol)->name->data); | ||
| 708 | } | ||
| 709 | 843 | ||
| 710 | update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3], | 844 | DEFUN ("set-primary-charset", Fset_primary_charset, Sset_primary_charset, |
| 711 | vec[4], vec[5], vec[6], vec[7], vec[8]); | 845 | 1, 1, 0, |
| 712 | Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id))); | 846 | doc: /* Set the primary charset to CHARSET. */) |
| 713 | CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol; | 847 | (charset) |
| 714 | Vcharset_list = Fcons (charset_symbol, Vcharset_list); | 848 | Lisp_Object charset; |
| 715 | Fupdate_coding_systems_internal (); | 849 | { |
| 850 | int id; | ||
| 851 | |||
| 852 | CHECK_CHARSET_GET_ID (charset, id); | ||
| 853 | charset_primary = id; | ||
| 716 | return Qnil; | 854 | return Qnil; |
| 717 | } | 855 | } |
| 718 | 856 | ||
| 719 | DEFUN ("generic-character-list", Fgeneric_character_list, | 857 | |
| 720 | Sgeneric_character_list, 0, 0, 0, | 858 | DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0, |
| 721 | doc: /* Return a list of all possible generic characters. | 859 | doc: /* Return a property list of CHARSET. */) |
| 722 | It includes a generic character for a charset not yet defined. */) | 860 | (charset) |
| 723 | () | 861 | Lisp_Object charset; |
| 724 | { | 862 | { |
| 725 | return Vgeneric_character_list; | 863 | Lisp_Object attrs; |
| 864 | |||
| 865 | CHECK_CHARSET_GET_ATTR (charset, attrs); | ||
| 866 | return CHARSET_ATTR_PLIST (attrs); | ||
| 867 | } | ||
| 868 | |||
| 869 | |||
| 870 | DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0, | ||
| 871 | doc: /* Set CHARSET's property list to PLIST. */) | ||
| 872 | (charset, plist) | ||
| 873 | Lisp_Object charset, plist; | ||
| 874 | { | ||
| 875 | Lisp_Object attrs; | ||
| 876 | |||
| 877 | CHECK_CHARSET_GET_ATTR (charset, attrs); | ||
| 878 | CHARSET_ATTR_PLIST (attrs) = plist; | ||
| 879 | return plist; | ||
| 880 | } | ||
| 881 | |||
| 882 | |||
| 883 | DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 2, 0, | ||
| 884 | doc: /* Unify characters of CHARSET with Unicode. */) | ||
| 885 | (charset, unify_map) | ||
| 886 | Lisp_Object charset, unify_map; | ||
| 887 | { | ||
| 888 | int id; | ||
| 889 | struct charset *cs; | ||
| 890 | |||
| 891 | CHECK_CHARSET_GET_ID (charset, id); | ||
| 892 | cs = CHARSET_FROM_ID (id); | ||
| 893 | if (CHARSET_METHOD (cs) == CHARSET_METHOD_MAP_DEFERRED) | ||
| 894 | load_charset (cs); | ||
| 895 | if (CHARSET_UNIFIED_P (cs) | ||
| 896 | && CHAR_TABLE_P (CHARSET_DEUNIFIER (cs))) | ||
| 897 | return Qnil; | ||
| 898 | CHARSET_UNIFIED_P (cs) = 0; | ||
| 899 | if (NILP (unify_map)) | ||
| 900 | unify_map = CHARSET_UNIFY_MAP (cs); | ||
| 901 | if (STRINGP (unify_map)) | ||
| 902 | unify_map = load_charset_map (cs, unify_map); | ||
| 903 | parse_charset_map (cs, unify_map, 2); | ||
| 904 | CHARSET_UNIFIED_P (cs) = 1; | ||
| 905 | return Qnil; | ||
| 726 | } | 906 | } |
| 727 | 907 | ||
| 728 | DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char, | 908 | DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char, |
| 729 | Sget_unused_iso_final_char, 2, 2, 0, | 909 | Sget_unused_iso_final_char, 2, 2, 0, |
| 730 | doc: /* Return an unsed ISO's final char for a charset of DIMENISION and CHARS. | 910 | doc: /* |
| 911 | Return an unsed ISO's final char for a charset of DIMENISION and CHARS. | ||
| 731 | DIMENSION is the number of bytes to represent a character: 1 or 2. | 912 | DIMENSION is the number of bytes to represent a character: 1 or 2. |
| 732 | CHARS is the number of characters in a dimension: 94 or 96. | 913 | CHARS is the number of characters in a dimension: 94 or 96. |
| 733 | 914 | ||
| 734 | This final char is for private use, thus the range is `0' (48) .. `?' (63). | 915 | This final char is for private use, thus the range is `0' (48) .. `?' (63). |
| 735 | If there's no unused final char for the specified kind of charset, | 916 | If there's no unused final char for the attrified kind of charset, |
| 736 | return nil. */) | 917 | return nil. */) |
| 737 | (dimension, chars) | 918 | (dimension, chars) |
| 738 | Lisp_Object dimension, chars; | 919 | Lisp_Object dimension, chars; |
| @@ -741,128 +922,136 @@ return nil. */) | |||
| 741 | 922 | ||
| 742 | CHECK_NUMBER (dimension); | 923 | CHECK_NUMBER (dimension); |
| 743 | CHECK_NUMBER (chars); | 924 | CHECK_NUMBER (chars); |
| 744 | if (XINT (dimension) != 1 && XINT (dimension) != 2) | 925 | if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3) |
| 745 | error ("Invalid charset dimension %d, it should be 1 or 2", | 926 | args_out_of_range_3 (dimension, make_number (1), make_number (3)); |
| 746 | XINT (dimension)); | ||
| 747 | if (XINT (chars) != 94 && XINT (chars) != 96) | 927 | if (XINT (chars) != 94 && XINT (chars) != 96) |
| 748 | error ("Invalid charset chars %d, it should be 94 or 96", | 928 | args_out_of_range_3 (chars, make_number (94), make_number (96)); |
| 749 | XINT (chars)); | ||
| 750 | for (final_char = '0'; final_char <= '?'; final_char++) | 929 | for (final_char = '0'; final_char <= '?'; final_char++) |
| 751 | { | 930 | if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0) |
| 752 | if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0) | 931 | break; |
| 753 | break; | ||
| 754 | } | ||
| 755 | return (final_char <= '?' ? make_number (final_char) : Qnil); | 932 | return (final_char <= '?' ? make_number (final_char) : Qnil); |
| 756 | } | 933 | } |
| 757 | 934 | ||
| 758 | DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset, | 935 | static void |
| 759 | 4, 4, 0, | 936 | check_iso_charset_parameter (dimension, chars, final_char) |
| 760 | doc: /* Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET. | 937 | Lisp_Object dimension, chars, final_char; |
| 761 | CHARSET should be defined by `defined-charset' in advance. */) | ||
| 762 | (dimension, chars, final_char, charset_symbol) | ||
| 763 | Lisp_Object dimension, chars, final_char, charset_symbol; | ||
| 764 | { | 938 | { |
| 765 | int charset; | 939 | CHECK_NATNUM (dimension); |
| 940 | CHECK_NATNUM (chars); | ||
| 941 | CHECK_NATNUM (final_char); | ||
| 766 | 942 | ||
| 767 | CHECK_NUMBER (dimension); | 943 | if (XINT (dimension) > 3) |
| 768 | CHECK_NUMBER (chars); | 944 | error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension)); |
| 769 | CHECK_NUMBER (final_char); | ||
| 770 | CHECK_SYMBOL (charset_symbol); | ||
| 771 | |||
| 772 | if (XINT (dimension) != 1 && XINT (dimension) != 2) | ||
| 773 | error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension)); | ||
| 774 | if (XINT (chars) != 94 && XINT (chars) != 96) | 945 | if (XINT (chars) != 94 && XINT (chars) != 96) |
| 775 | error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars)); | 946 | error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars)); |
| 776 | if (XINT (final_char) < '0' || XFASTINT (final_char) > '~') | 947 | if (XINT (final_char) < '0' || XINT (final_char) > '~') |
| 777 | error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars)); | 948 | error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars)); |
| 778 | if ((charset = get_charset_id (charset_symbol)) < 0) | 949 | } |
| 779 | error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data); | ||
| 780 | 950 | ||
| 781 | ISO_CHARSET_TABLE (dimension, chars, final_char) = charset; | 951 | |
| 952 | DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset, | ||
| 953 | 4, 4, 0, | ||
| 954 | doc: /* | ||
| 955 | Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET. | ||
| 956 | CHARSET should be defined by `defined-charset' in advance. */) | ||
| 957 | (dimension, chars, final_char, charset) | ||
| 958 | Lisp_Object dimension, chars, final_char, charset; | ||
| 959 | { | ||
| 960 | int id; | ||
| 961 | |||
| 962 | CHECK_CHARSET_GET_ID (charset, id); | ||
| 963 | check_iso_charset_parameter (dimension, chars, final_char); | ||
| 964 | |||
| 965 | ISO_CHARSET_TABLE (dimension, chars, final_char) = id; | ||
| 782 | return Qnil; | 966 | return Qnil; |
| 783 | } | 967 | } |
| 784 | 968 | ||
| 969 | |||
| 785 | /* Return information about charsets in the text at PTR of NBYTES | 970 | /* Return information about charsets in the text at PTR of NBYTES |
| 786 | bytes, which are NCHARS characters. The value is: | 971 | bytes, which are NCHARS characters. The value is: |
| 787 | 972 | ||
| 788 | 0: Each character is represented by one byte. This is always | 973 | 0: Each character is represented by one byte. This is always |
| 789 | true for unibyte text. | 974 | true for a unibyte string. For a multibyte string, true if |
| 790 | 1: No charsets other than ascii eight-bit-control, | 975 | it contains only ASCII characters. |
| 791 | eight-bit-graphic, and latin-1 are found. | ||
| 792 | 2: Otherwise. | ||
| 793 | 976 | ||
| 794 | In addition, if CHARSETS is nonzero, for each found charset N, set | 977 | 1: No charsets other than ascii, eight-bit-control, and |
| 795 | CHARSETS[N] to 1. For that, callers should allocate CHARSETS | 978 | latin-1 are found. |
| 796 | (MAX_CHARSET + 1 elements) in advance. It may lookup a translation | 979 | |
| 797 | table TABLE if supplied. For invalid charsets, set CHARSETS[1] to | 980 | 2: Otherwise. |
| 798 | 1 (note that there's no charset whose ID is 1). */ | 981 | */ |
| 799 | 982 | ||
| 800 | int | 983 | int |
| 801 | find_charset_in_text (ptr, nchars, nbytes, charsets, table) | 984 | string_xstring_p (string) |
| 802 | unsigned char *ptr; | 985 | Lisp_Object string; |
| 803 | int nchars, nbytes, *charsets; | ||
| 804 | Lisp_Object table; | ||
| 805 | { | 986 | { |
| 806 | if (nchars == nbytes) | 987 | unsigned char *p = XSTRING (string)->data; |
| 988 | unsigned char *endp = p + STRING_BYTES (XSTRING (string)); | ||
| 989 | struct charset *charset; | ||
| 990 | |||
| 991 | if (XSTRING (string)->size == STRING_BYTES (XSTRING (string))) | ||
| 992 | return 0; | ||
| 993 | |||
| 994 | charset = CHARSET_FROM_ID (charset_iso_8859_1); | ||
| 995 | while (p < endp) | ||
| 807 | { | 996 | { |
| 808 | if (charsets && nbytes > 0) | 997 | int c = STRING_CHAR_ADVANCE (p); |
| 809 | { | ||
| 810 | unsigned char *endp = ptr + nbytes; | ||
| 811 | int maskbits = 0; | ||
| 812 | 998 | ||
| 813 | while (ptr < endp && maskbits != 7) | 999 | if (ENCODE_CHAR (charset, c) < 0) |
| 814 | { | 1000 | return 2; |
| 815 | maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4); | ||
| 816 | ptr++; | ||
| 817 | } | ||
| 818 | |||
| 819 | if (maskbits & 1) | ||
| 820 | charsets[CHARSET_ASCII] = 1; | ||
| 821 | if (maskbits & 2) | ||
| 822 | charsets[CHARSET_8_BIT_CONTROL] = 1; | ||
| 823 | if (maskbits & 4) | ||
| 824 | charsets[CHARSET_8_BIT_GRAPHIC] = 1; | ||
| 825 | } | ||
| 826 | return 0; | ||
| 827 | } | 1001 | } |
| 828 | else | 1002 | return 1; |
| 829 | { | 1003 | } |
| 830 | int return_val = 1; | ||
| 831 | int bytes, charset, c1, c2; | ||
| 832 | 1004 | ||
| 833 | if (! CHAR_TABLE_P (table)) | ||
| 834 | table = Qnil; | ||
| 835 | 1005 | ||
| 836 | while (nchars-- > 0) | 1006 | /* Find charsets in the string at PTR of NCHARS and NBYTES. |
| 837 | { | ||
| 838 | SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2); | ||
| 839 | ptr += bytes; | ||
| 840 | 1007 | ||
| 841 | if (!CHARSET_DEFINED_P (charset)) | 1008 | CHARSETS is a vector. Each element is a cons of CHARSET and |
| 842 | charset = 1; | 1009 | FOUND-FLAG. CHARSET is a charset id, and FOUND-FLAG is nil or t. |
| 843 | else if (! NILP (table)) | 1010 | FOUND-FLAG t (or nil) means that the corresponding charset is |
| 844 | { | 1011 | already found (or not yet found). |
| 845 | int c = translate_char (table, -1, charset, c1, c2); | ||
| 846 | if (c >= 0) | ||
| 847 | charset = CHAR_CHARSET (c); | ||
| 848 | } | ||
| 849 | 1012 | ||
| 850 | if (return_val == 1 | 1013 | It may lookup a translation table TABLE if supplied. */ |
| 851 | && charset != CHARSET_ASCII | ||
| 852 | && charset != CHARSET_8_BIT_CONTROL | ||
| 853 | && charset != CHARSET_8_BIT_GRAPHIC | ||
| 854 | && charset != charset_latin_iso8859_1) | ||
| 855 | return_val = 2; | ||
| 856 | 1014 | ||
| 857 | if (charsets) | 1015 | static void |
| 858 | charsets[charset] = 1; | 1016 | find_charsets_in_text (ptr, nchars, nbytes, charsets, table) |
| 859 | else if (return_val == 2) | 1017 | unsigned char *ptr; |
| 860 | break; | 1018 | int nchars, nbytes; |
| 1019 | Lisp_Object charsets, table; | ||
| 1020 | { | ||
| 1021 | unsigned char *pend = ptr + nbytes; | ||
| 1022 | int ncharsets = ASIZE (charsets); | ||
| 1023 | |||
| 1024 | if (nchars == nbytes) | ||
| 1025 | return; | ||
| 1026 | |||
| 1027 | while (ptr < pend) | ||
| 1028 | { | ||
| 1029 | int c = STRING_CHAR_ADVANCE (ptr); | ||
| 1030 | int i; | ||
| 1031 | int all_found = 1; | ||
| 1032 | Lisp_Object elt; | ||
| 1033 | |||
| 1034 | if (!NILP (table)) | ||
| 1035 | c = translate_char (table, c); | ||
| 1036 | for (i = 0; i < ncharsets; i++) | ||
| 1037 | { | ||
| 1038 | elt = AREF (charsets, i); | ||
| 1039 | if (NILP (XCDR (elt))) | ||
| 1040 | { | ||
| 1041 | struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (elt))); | ||
| 1042 | |||
| 1043 | if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset)) | ||
| 1044 | XCDR (elt) = Qt; | ||
| 1045 | else | ||
| 1046 | all_found = 0; | ||
| 1047 | } | ||
| 861 | } | 1048 | } |
| 862 | return return_val; | 1049 | if (all_found) |
| 1050 | break; | ||
| 863 | } | 1051 | } |
| 864 | } | 1052 | } |
| 865 | 1053 | ||
| 1054 | |||
| 866 | DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region, | 1055 | DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region, |
| 867 | 2, 3, 0, | 1056 | 2, 3, 0, |
| 868 | doc: /* Return a list of charsets in the region between BEG and END. | 1057 | doc: /* Return a list of charsets in the region between BEG and END. |
| @@ -877,7 +1066,7 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */) | |||
| 877 | (beg, end, table) | 1066 | (beg, end, table) |
| 878 | Lisp_Object beg, end, table; | 1067 | Lisp_Object beg, end, table; |
| 879 | { | 1068 | { |
| 880 | int charsets[MAX_CHARSET + 1]; | 1069 | Lisp_Object charsets; |
| 881 | int from, from_byte, to, stop, stop_byte, i; | 1070 | int from, from_byte, to, stop, stop_byte, i; |
| 882 | Lisp_Object val; | 1071 | Lisp_Object val; |
| 883 | 1072 | ||
| @@ -895,11 +1084,14 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */) | |||
| 895 | 1084 | ||
| 896 | from_byte = CHAR_TO_BYTE (from); | 1085 | from_byte = CHAR_TO_BYTE (from); |
| 897 | 1086 | ||
| 898 | bzero (charsets, (MAX_CHARSET + 1) * sizeof (int)); | 1087 | charsets = Fmake_vector (make_number (charset_table_used), Qnil); |
| 1088 | for (i = 0; i < charset_table_used; i++) | ||
| 1089 | ASET (charsets, i, Fcons (make_number (i), Qnil)); | ||
| 1090 | |||
| 899 | while (1) | 1091 | while (1) |
| 900 | { | 1092 | { |
| 901 | find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from, | 1093 | find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from, |
| 902 | stop_byte - from_byte, charsets, table); | 1094 | stop_byte - from_byte, charsets, table); |
| 903 | if (stop < to) | 1095 | if (stop < to) |
| 904 | { | 1096 | { |
| 905 | from = stop, from_byte = stop_byte; | 1097 | from = stop, from_byte = stop_byte; |
| @@ -910,13 +1102,9 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */) | |||
| 910 | } | 1102 | } |
| 911 | 1103 | ||
| 912 | val = Qnil; | 1104 | val = Qnil; |
| 913 | if (charsets[1]) | 1105 | for (i = charset_table_used - 1; i >= 0; i--) |
| 914 | val = Fcons (Qunknown, val); | 1106 | if (!NILP (XCDR (AREF (charsets, i)))) |
| 915 | for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--) | 1107 | val = Fcons (CHARSET_NAME (charset_table + i), val); |
| 916 | if (charsets[i]) | ||
| 917 | val = Fcons (CHARSET_SYMBOL (i), val); | ||
| 918 | if (charsets[0]) | ||
| 919 | val = Fcons (Qascii, val); | ||
| 920 | return val; | 1108 | return val; |
| 921 | } | 1109 | } |
| 922 | 1110 | ||
| @@ -929,838 +1117,471 @@ If the string contains invalid multibyte characters, | |||
| 929 | `unknown' is included in the returned list. | 1117 | `unknown' is included in the returned list. |
| 930 | 1118 | ||
| 931 | If STR is unibyte, the returned list may contain | 1119 | If STR is unibyte, the returned list may contain |
| 932 | only `ascii', `eight-bit-control', and `eight-bit-graphic'. */) | 1120 | only `ascii', `eight-bit-control', and `eight-bit-graphic'. */) |
| 933 | (str, table) | 1121 | (str, table) |
| 934 | Lisp_Object str, table; | 1122 | Lisp_Object str, table; |
| 935 | { | 1123 | { |
| 936 | int charsets[MAX_CHARSET + 1]; | 1124 | Lisp_Object charsets; |
| 937 | int i; | 1125 | int i; |
| 938 | Lisp_Object val; | 1126 | Lisp_Object val; |
| 939 | 1127 | ||
| 940 | CHECK_STRING (str); | 1128 | CHECK_STRING (str); |
| 941 | 1129 | ||
| 942 | bzero (charsets, (MAX_CHARSET + 1) * sizeof (int)); | 1130 | charsets = Fmake_vector (make_number (charset_table_used), Qnil); |
| 943 | find_charset_in_text (XSTRING (str)->data, XSTRING (str)->size, | 1131 | find_charsets_in_text (XSTRING (str)->data, XSTRING (str)->size, |
| 944 | STRING_BYTES (XSTRING (str)), charsets, table); | 1132 | STRING_BYTES (XSTRING (str)), charsets, table); |
| 945 | 1133 | ||
| 946 | val = Qnil; | 1134 | val = Qnil; |
| 947 | if (charsets[1]) | 1135 | for (i = charset_table_used - 1; i >= 0; i--) |
| 948 | val = Fcons (Qunknown, val); | 1136 | if (!NILP (XCDR (AREF (charsets, i)))) |
| 949 | for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--) | 1137 | val = Fcons (CHARSET_NAME (charset_table + i), val); |
| 950 | if (charsets[i]) | ||
| 951 | val = Fcons (CHARSET_SYMBOL (i), val); | ||
| 952 | if (charsets[0]) | ||
| 953 | val = Fcons (Qascii, val); | ||
| 954 | return val; | 1138 | return val; |
| 955 | } | 1139 | } |
| 956 | 1140 | ||
| 957 | 1141 | ||
| 958 | DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0, | 1142 | |
| 959 | doc: /* Return a character made from arguments. | 1143 | /* Return a character correponding to the code-point CODE of |
| 960 | Internal use only. */) | 1144 | CHARSET. */ |
| 961 | (charset, code1, code2) | 1145 | |
| 962 | Lisp_Object charset, code1, code2; | 1146 | int |
| 1147 | decode_char (charset, code) | ||
| 1148 | struct charset *charset; | ||
| 1149 | unsigned code; | ||
| 963 | { | 1150 | { |
| 964 | int charset_id, c1, c2; | 1151 | int c, char_index; |
| 1152 | enum charset_method method = CHARSET_METHOD (charset); | ||
| 965 | 1153 | ||
| 966 | CHECK_NUMBER (charset); | 1154 | if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset)) |
| 967 | charset_id = XINT (charset); | 1155 | return -1; |
| 968 | if (!CHARSET_DEFINED_P (charset_id)) | ||
| 969 | error ("Invalid charset ID: %d", XINT (charset)); | ||
| 970 | 1156 | ||
| 971 | if (NILP (code1)) | 1157 | if (method == CHARSET_METHOD_MAP_DEFERRED) |
| 972 | c1 = 0; | ||
| 973 | else | ||
| 974 | { | ||
| 975 | CHECK_NUMBER (code1); | ||
| 976 | c1 = XINT (code1); | ||
| 977 | } | ||
| 978 | if (NILP (code2)) | ||
| 979 | c2 = 0; | ||
| 980 | else | ||
| 981 | { | 1158 | { |
| 982 | CHECK_NUMBER (code2); | 1159 | load_charset (charset); |
| 983 | c2 = XINT (code2); | 1160 | method = CHARSET_METHOD (charset); |
| 984 | } | 1161 | } |
| 985 | 1162 | ||
| 986 | if (charset_id == CHARSET_ASCII) | 1163 | if (method == CHARSET_METHOD_INHERIT) |
| 987 | { | ||
| 988 | if (c1 < 0 || c1 > 0x7F) | ||
| 989 | goto invalid_code_posints; | ||
| 990 | return make_number (c1); | ||
| 991 | } | ||
| 992 | else if (charset_id == CHARSET_8_BIT_CONTROL) | ||
| 993 | { | 1164 | { |
| 994 | if (NILP (code1)) | 1165 | Lisp_Object parents; |
| 995 | c1 = 0x80; | ||
| 996 | else if (c1 < 0x80 || c1 > 0x9F) | ||
| 997 | goto invalid_code_posints; | ||
| 998 | return make_number (c1); | ||
| 999 | } | ||
| 1000 | else if (charset_id == CHARSET_8_BIT_GRAPHIC) | ||
| 1001 | { | ||
| 1002 | if (NILP (code1)) | ||
| 1003 | c1 = 0xA0; | ||
| 1004 | else if (c1 < 0xA0 || c1 > 0xFF) | ||
| 1005 | goto invalid_code_posints; | ||
| 1006 | return make_number (c1); | ||
| 1007 | } | ||
| 1008 | else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF) | ||
| 1009 | goto invalid_code_posints; | ||
| 1010 | c1 &= 0x7F; | ||
| 1011 | c2 &= 0x7F; | ||
| 1012 | if (c1 == 0 | ||
| 1013 | ? c2 != 0 | ||
| 1014 | : (c2 == 0 | ||
| 1015 | ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20) | ||
| 1016 | : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2))) | ||
| 1017 | goto invalid_code_posints; | ||
| 1018 | return make_number (MAKE_CHAR (charset_id, c1, c2)); | ||
| 1019 | |||
| 1020 | invalid_code_posints: | ||
| 1021 | error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2); | ||
| 1022 | } | ||
| 1023 | |||
| 1024 | DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0, | ||
| 1025 | doc: /* Return list of charset and one or two position-codes of CHAR. | ||
| 1026 | If CHAR is invalid as a character code, | ||
| 1027 | return a list of symbol `unknown' and CHAR. */) | ||
| 1028 | (ch) | ||
| 1029 | Lisp_Object ch; | ||
| 1030 | { | ||
| 1031 | int c, charset, c1, c2; | ||
| 1032 | |||
| 1033 | CHECK_NUMBER (ch); | ||
| 1034 | c = XFASTINT (ch); | ||
| 1035 | if (!CHAR_VALID_P (c, 1)) | ||
| 1036 | return Fcons (Qunknown, Fcons (ch, Qnil)); | ||
| 1037 | SPLIT_CHAR (XFASTINT (ch), charset, c1, c2); | ||
| 1038 | return (c2 >= 0 | ||
| 1039 | ? Fcons (CHARSET_SYMBOL (charset), | ||
| 1040 | Fcons (make_number (c1), Fcons (make_number (c2), Qnil))) | ||
| 1041 | : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil))); | ||
| 1042 | } | ||
| 1043 | |||
| 1044 | DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0, | ||
| 1045 | doc: /* Return charset of CHAR. */) | ||
| 1046 | (ch) | ||
| 1047 | Lisp_Object ch; | ||
| 1048 | { | ||
| 1049 | CHECK_NUMBER (ch); | ||
| 1050 | |||
| 1051 | return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch))); | ||
| 1052 | } | ||
| 1053 | |||
| 1054 | DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0, | ||
| 1055 | doc: /* Return charset of a character in the current buffer at position POS. | ||
| 1056 | If POS is nil, it defauls to the current point. | ||
| 1057 | If POS is out of range, the value is nil. */) | ||
| 1058 | (pos) | ||
| 1059 | Lisp_Object pos; | ||
| 1060 | { | ||
| 1061 | Lisp_Object ch; | ||
| 1062 | int charset; | ||
| 1063 | 1166 | ||
| 1064 | ch = Fchar_after (pos); | 1167 | parents = CHARSET_PARENTS (charset); |
| 1065 | if (! INTEGERP (ch)) | 1168 | c = -1; |
| 1066 | return ch; | 1169 | for (; CONSP (parents); parents = XCDR (parents)) |
| 1067 | charset = CHAR_CHARSET (XINT (ch)); | 1170 | { |
| 1068 | return CHARSET_SYMBOL (charset); | 1171 | int id = XINT (XCAR (XCAR (parents))); |
| 1069 | } | 1172 | int code_offset = XINT (XCDR (XCAR (parents))); |
| 1070 | 1173 | unsigned this_code = code + code_offset; | |
| 1071 | DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0, | ||
| 1072 | doc: /* Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR. | ||
| 1073 | |||
| 1074 | ISO 2022's designation sequence (escape sequence) distinguishes charsets | ||
| 1075 | by their DIMENSION, CHARS, and FINAL-CHAR, | ||
| 1076 | where as Emacs distinguishes them by charset symbol. | ||
| 1077 | See the documentation of the function `charset-info' for the meanings of | ||
| 1078 | DIMENSION, CHARS, and FINAL-CHAR. */) | ||
| 1079 | (dimension, chars, final_char) | ||
| 1080 | Lisp_Object dimension, chars, final_char; | ||
| 1081 | { | ||
| 1082 | int charset; | ||
| 1083 | |||
| 1084 | CHECK_NUMBER (dimension); | ||
| 1085 | CHECK_NUMBER (chars); | ||
| 1086 | CHECK_NUMBER (final_char); | ||
| 1087 | |||
| 1088 | if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0) | ||
| 1089 | return Qnil; | ||
| 1090 | return CHARSET_SYMBOL (charset); | ||
| 1091 | } | ||
| 1092 | |||
| 1093 | /* If GENERICP is nonzero, return nonzero iff C is a valid normal or | ||
| 1094 | generic character. If GENERICP is zero, return nonzero iff C is a | ||
| 1095 | valid normal character. Do not call this function directly, | ||
| 1096 | instead use macro CHAR_VALID_P. */ | ||
| 1097 | int | ||
| 1098 | char_valid_p (c, genericp) | ||
| 1099 | int c, genericp; | ||
| 1100 | { | ||
| 1101 | int charset, c1, c2; | ||
| 1102 | 1174 | ||
| 1103 | if (c < 0 || c >= MAX_CHAR) | 1175 | charset = CHARSET_FROM_ID (id); |
| 1104 | return 0; | 1176 | if ((c = DECODE_CHAR (charset, this_code)) >= 0) |
| 1105 | if (SINGLE_BYTE_CHAR_P (c)) | 1177 | break; |
| 1106 | return 1; | 1178 | } |
| 1107 | SPLIT_CHAR (c, charset, c1, c2); | 1179 | } |
| 1108 | if (genericp) | 1180 | else |
| 1109 | { | 1181 | { |
| 1110 | if (c1) | 1182 | char_index = CODE_POINT_TO_INDEX (charset, code); |
| 1183 | |||
| 1184 | if (method == CHARSET_METHOD_MAP) | ||
| 1111 | { | 1185 | { |
| 1112 | if (c2 <= 0) c2 = 0x20; | 1186 | Lisp_Object decoder; |
| 1187 | |||
| 1188 | decoder = CHARSET_DECODER (charset); | ||
| 1189 | if (! VECTORP (decoder)) | ||
| 1190 | return -1; | ||
| 1191 | c = XINT (AREF (decoder, char_index)); | ||
| 1113 | } | 1192 | } |
| 1114 | else | 1193 | else |
| 1115 | { | 1194 | { |
| 1116 | if (c2 <= 0) c1 = c2 = 0x20; | 1195 | c = char_index + CHARSET_CODE_OFFSET (charset); |
| 1117 | } | 1196 | } |
| 1118 | } | 1197 | } |
| 1119 | return (CHARSET_DEFINED_P (charset) | ||
| 1120 | && CHAR_COMPONENTS_VALID_P (charset, c1, c2)); | ||
| 1121 | } | ||
| 1122 | 1198 | ||
| 1123 | DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0, | 1199 | if (CHARSET_UNIFIED_P (charset) |
| 1124 | doc: /* Return t if OBJECT is a valid normal character. | 1200 | && c >= 0) |
| 1125 | If optional arg GENERICP is non-nil, also return t if OBJECT is | 1201 | MAYBE_UNIFY_CHAR (c); |
| 1126 | a valid generic character. */) | ||
| 1127 | (object, genericp) | ||
| 1128 | Lisp_Object object, genericp; | ||
| 1129 | { | ||
| 1130 | if (! NATNUMP (object)) | ||
| 1131 | return Qnil; | ||
| 1132 | return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil); | ||
| 1133 | } | ||
| 1134 | |||
| 1135 | DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte, | ||
| 1136 | Sunibyte_char_to_multibyte, 1, 1, 0, | ||
| 1137 | doc: /* Convert the unibyte character CH to multibyte character. | ||
| 1138 | The conversion is done based on `nonascii-translation-table' (which see) | ||
| 1139 | or `nonascii-insert-offset' (which see). */) | ||
| 1140 | (ch) | ||
| 1141 | Lisp_Object ch; | ||
| 1142 | { | ||
| 1143 | int c; | ||
| 1144 | 1202 | ||
| 1145 | CHECK_NUMBER (ch); | 1203 | return c; |
| 1146 | c = XINT (ch); | ||
| 1147 | if (c < 0 || c >= 0400) | ||
| 1148 | error ("Invalid unibyte character: %d", c); | ||
| 1149 | c = unibyte_char_to_multibyte (c); | ||
| 1150 | if (c < 0) | ||
| 1151 | error ("Can't convert to multibyte character: %d", XINT (ch)); | ||
| 1152 | return make_number (c); | ||
| 1153 | } | 1204 | } |
| 1154 | 1205 | ||
| 1155 | DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte, | ||
| 1156 | Smultibyte_char_to_unibyte, 1, 1, 0, | ||
| 1157 | doc: /* Convert the multibyte character CH to unibyte character. | ||
| 1158 | The conversion is done based on `nonascii-translation-table' (which see) | ||
| 1159 | or `nonascii-insert-offset' (which see). */) | ||
| 1160 | (ch) | ||
| 1161 | Lisp_Object ch; | ||
| 1162 | { | ||
| 1163 | int c; | ||
| 1164 | |||
| 1165 | CHECK_NUMBER (ch); | ||
| 1166 | c = XINT (ch); | ||
| 1167 | if (! CHAR_VALID_P (c, 0)) | ||
| 1168 | error ("Invalid multibyte character: %d", c); | ||
| 1169 | c = multibyte_char_to_unibyte (c, Qnil); | ||
| 1170 | if (c < 0) | ||
| 1171 | error ("Can't convert to unibyte character: %d", XINT (ch)); | ||
| 1172 | return make_number (c); | ||
| 1173 | } | ||
| 1174 | 1206 | ||
| 1175 | DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0, | 1207 | /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to |
| 1176 | doc: /* Return 1 regardless of the argument CHAR. | 1208 | CHARSET, return CHARSET_INVALID_CODE (CHARSET). */ |
| 1177 | This is now an obsolete function. We keep it just for backward compatibility. */) | ||
| 1178 | (ch) | ||
| 1179 | Lisp_Object ch; | ||
| 1180 | { | ||
| 1181 | CHECK_NUMBER (ch); | ||
| 1182 | return make_number (1); | ||
| 1183 | } | ||
| 1184 | 1209 | ||
| 1185 | /* Return how many bytes C will occupy in a multibyte buffer. | 1210 | unsigned |
| 1186 | Don't call this function directly, instead use macro CHAR_BYTES. */ | 1211 | encode_char (charset, c) |
| 1187 | int | 1212 | struct charset *charset; |
| 1188 | char_bytes (c) | ||
| 1189 | int c; | 1213 | int c; |
| 1190 | { | 1214 | { |
| 1191 | int charset; | 1215 | unsigned code; |
| 1216 | enum charset_method method = CHARSET_METHOD (charset); | ||
| 1192 | 1217 | ||
| 1193 | if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1))) | 1218 | if (CHARSET_UNIFIED_P (charset)) |
| 1194 | return 1; | ||
| 1195 | if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0) | ||
| 1196 | return 1; | ||
| 1197 | |||
| 1198 | charset = CHAR_CHARSET (c); | ||
| 1199 | return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1); | ||
| 1200 | } | ||
| 1201 | |||
| 1202 | /* Return the width of character of which multi-byte form starts with | ||
| 1203 | C. The width is measured by how many columns occupied on the | ||
| 1204 | screen when displayed in the current buffer. */ | ||
| 1205 | |||
| 1206 | #define ONE_BYTE_CHAR_WIDTH(c) \ | ||
| 1207 | (c < 0x20 \ | ||
| 1208 | ? (c == '\t' \ | ||
| 1209 | ? XFASTINT (current_buffer->tab_width) \ | ||
| 1210 | : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \ | ||
| 1211 | : (c < 0x7f \ | ||
| 1212 | ? 1 \ | ||
| 1213 | : (c == 0x7F \ | ||
| 1214 | ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \ | ||
| 1215 | : ((! NILP (current_buffer->enable_multibyte_characters) \ | ||
| 1216 | && BASE_LEADING_CODE_P (c)) \ | ||
| 1217 | ? WIDTH_BY_CHAR_HEAD (c) \ | ||
| 1218 | : 4)))) | ||
| 1219 | |||
| 1220 | DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0, | ||
| 1221 | doc: /* Return width of CHAR when displayed in the current buffer. | ||
| 1222 | The width is measured by how many columns it occupies on the screen. | ||
| 1223 | Tab is taken to occupy `tab-width' columns. */) | ||
| 1224 | (ch) | ||
| 1225 | Lisp_Object ch; | ||
| 1226 | { | ||
| 1227 | Lisp_Object val, disp; | ||
| 1228 | int c; | ||
| 1229 | struct Lisp_Char_Table *dp = buffer_display_table (); | ||
| 1230 | |||
| 1231 | CHECK_NUMBER (ch); | ||
| 1232 | |||
| 1233 | c = XINT (ch); | ||
| 1234 | |||
| 1235 | /* Get the way the display table would display it. */ | ||
| 1236 | disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil; | ||
| 1237 | |||
| 1238 | if (VECTORP (disp)) | ||
| 1239 | XSETINT (val, XVECTOR (disp)->size); | ||
| 1240 | else if (SINGLE_BYTE_CHAR_P (c)) | ||
| 1241 | XSETINT (val, ONE_BYTE_CHAR_WIDTH (c)); | ||
| 1242 | else | ||
| 1243 | { | 1219 | { |
| 1244 | int charset = CHAR_CHARSET (c); | 1220 | Lisp_Object deunifier; |
| 1221 | int deunified; | ||
| 1245 | 1222 | ||
| 1246 | XSETFASTINT (val, CHARSET_WIDTH (charset)); | 1223 | deunifier = CHARSET_DEUNIFIER (charset); |
| 1224 | if (! CHAR_TABLE_P (deunifier)) | ||
| 1225 | { | ||
| 1226 | Funify_charset (CHARSET_NAME (charset), Qnil); | ||
| 1227 | deunifier = CHARSET_DEUNIFIER (charset); | ||
| 1228 | } | ||
| 1229 | deunified = XINT (CHAR_TABLE_REF (deunifier, c)); | ||
| 1230 | if (deunified > 0) | ||
| 1231 | c = deunified; | ||
| 1247 | } | 1232 | } |
| 1248 | return val; | ||
| 1249 | } | ||
| 1250 | |||
| 1251 | /* Return width of string STR of length LEN when displayed in the | ||
| 1252 | current buffer. The width is measured by how many columns it | ||
| 1253 | occupies on the screen. */ | ||
| 1254 | 1233 | ||
| 1255 | int | 1234 | if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map) |
| 1256 | strwidth (str, len) | 1235 | || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset)) |
| 1257 | unsigned char *str; | 1236 | return CHARSET_INVALID_CODE (charset); |
| 1258 | int len; | ||
| 1259 | { | ||
| 1260 | return c_string_width (str, len, -1, NULL, NULL); | ||
| 1261 | } | ||
| 1262 | 1237 | ||
| 1263 | /* Return width of string STR of length LEN when displayed in the | 1238 | if (method == CHARSET_METHOD_INHERIT) |
| 1264 | current buffer. The width is measured by how many columns it | ||
| 1265 | occupies on the screen. If PRECISION > 0, return the width of | ||
| 1266 | longest substring that doesn't exceed PRECISION, and set number of | ||
| 1267 | characters and bytes of the substring in *NCHARS and *NBYTES | ||
| 1268 | respectively. */ | ||
| 1269 | |||
| 1270 | int | ||
| 1271 | c_string_width (str, len, precision, nchars, nbytes) | ||
| 1272 | unsigned char *str; | ||
| 1273 | int precision, *nchars, *nbytes; | ||
| 1274 | { | ||
| 1275 | int i = 0, i_byte = 0; | ||
| 1276 | int width = 0; | ||
| 1277 | int chars; | ||
| 1278 | struct Lisp_Char_Table *dp = buffer_display_table (); | ||
| 1279 | |||
| 1280 | while (i_byte < len) | ||
| 1281 | { | 1239 | { |
| 1282 | int bytes, thiswidth; | 1240 | Lisp_Object parents; |
| 1283 | Lisp_Object val; | ||
| 1284 | |||
| 1285 | if (dp) | ||
| 1286 | { | ||
| 1287 | int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes); | ||
| 1288 | 1241 | ||
| 1289 | chars = 1; | 1242 | parents = CHARSET_PARENTS (charset); |
| 1290 | val = DISP_CHAR_VECTOR (dp, c); | 1243 | for (; CONSP (parents); parents = XCDR (parents)) |
| 1291 | if (VECTORP (val)) | ||
| 1292 | thiswidth = XVECTOR (val)->size; | ||
| 1293 | else | ||
| 1294 | thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]); | ||
| 1295 | } | ||
| 1296 | else | ||
| 1297 | { | 1244 | { |
| 1298 | chars = 1; | 1245 | int id = XINT (XCAR (XCAR (parents))); |
| 1299 | PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes); | 1246 | int code_offset = XINT (XCDR (XCAR (parents))); |
| 1300 | thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]); | 1247 | struct charset *this_charset = CHARSET_FROM_ID (id); |
| 1301 | } | ||
| 1302 | 1248 | ||
| 1303 | if (precision > 0 | 1249 | code = ENCODE_CHAR (this_charset, c); |
| 1304 | && (width + thiswidth > precision)) | 1250 | if (code != CHARSET_INVALID_CODE (this_charset) |
| 1305 | { | 1251 | && (code_offset < 0 || code >= code_offset)) |
| 1306 | *nchars = i; | 1252 | { |
| 1307 | *nbytes = i_byte; | 1253 | code -= code_offset; |
| 1308 | return width; | 1254 | if (CODE_POINT_TO_INDEX (charset, code) >= 0) |
| 1255 | return code; | ||
| 1256 | } | ||
| 1309 | } | 1257 | } |
| 1310 | i++; | 1258 | return CHARSET_INVALID_CODE (charset); |
| 1311 | i_byte += bytes; | 1259 | } |
| 1312 | width += thiswidth; | ||
| 1313 | } | ||
| 1314 | 1260 | ||
| 1315 | if (precision > 0) | 1261 | if (method == CHARSET_METHOD_MAP_DEFERRED) |
| 1316 | { | 1262 | { |
| 1317 | *nchars = i; | 1263 | load_charset (charset); |
| 1318 | *nbytes = i_byte; | 1264 | method = CHARSET_METHOD (charset); |
| 1319 | } | 1265 | } |
| 1320 | 1266 | ||
| 1321 | return width; | 1267 | if (method == CHARSET_METHOD_MAP) |
| 1322 | } | ||
| 1323 | |||
| 1324 | /* Return width of Lisp string STRING when displayed in the current | ||
| 1325 | buffer. The width is measured by how many columns it occupies on | ||
| 1326 | the screen while paying attention to compositions. If PRECISION > | ||
| 1327 | 0, return the width of longest substring that doesn't exceed | ||
| 1328 | PRECISION, and set number of characters and bytes of the substring | ||
| 1329 | in *NCHARS and *NBYTES respectively. */ | ||
| 1330 | |||
| 1331 | int | ||
| 1332 | lisp_string_width (string, precision, nchars, nbytes) | ||
| 1333 | Lisp_Object string; | ||
| 1334 | int precision, *nchars, *nbytes; | ||
| 1335 | { | ||
| 1336 | int len = XSTRING (string)->size; | ||
| 1337 | int len_byte = STRING_BYTES (XSTRING (string)); | ||
| 1338 | unsigned char *str = XSTRING (string)->data; | ||
| 1339 | int i = 0, i_byte = 0; | ||
| 1340 | int width = 0; | ||
| 1341 | struct Lisp_Char_Table *dp = buffer_display_table (); | ||
| 1342 | |||
| 1343 | while (i < len) | ||
| 1344 | { | 1268 | { |
| 1345 | int chars, bytes, thiswidth; | 1269 | Lisp_Object encoder; |
| 1346 | Lisp_Object val; | 1270 | Lisp_Object val; |
| 1347 | int cmp_id; | ||
| 1348 | int ignore, end; | ||
| 1349 | 1271 | ||
| 1350 | if (find_composition (i, -1, &ignore, &end, &val, string) | 1272 | encoder = CHARSET_ENCODER (charset); |
| 1351 | && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string)) | 1273 | if (! CHAR_TABLE_P (CHARSET_ENCODER (charset))) |
| 1352 | >= 0)) | 1274 | return CHARSET_INVALID_CODE (charset); |
| 1353 | { | 1275 | val = CHAR_TABLE_REF (encoder, c); |
| 1354 | thiswidth = composition_table[cmp_id]->width; | 1276 | if (CONSP (val)) |
| 1355 | chars = end - i; | 1277 | code = (XINT (XCAR (val)) << 16) | XINT (XCDR (val)); |
| 1356 | bytes = string_char_to_byte (string, end) - i_byte; | ||
| 1357 | } | ||
| 1358 | else if (dp) | ||
| 1359 | { | ||
| 1360 | int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes); | ||
| 1361 | |||
| 1362 | chars = 1; | ||
| 1363 | val = DISP_CHAR_VECTOR (dp, c); | ||
| 1364 | if (VECTORP (val)) | ||
| 1365 | thiswidth = XVECTOR (val)->size; | ||
| 1366 | else | ||
| 1367 | thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]); | ||
| 1368 | } | ||
| 1369 | else | 1278 | else |
| 1370 | { | 1279 | code = XINT (val); |
| 1371 | chars = 1; | 1280 | } |
| 1372 | PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes); | 1281 | else |
| 1373 | thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]); | ||
| 1374 | } | ||
| 1375 | |||
| 1376 | if (precision > 0 | ||
| 1377 | && (width + thiswidth > precision)) | ||
| 1378 | { | ||
| 1379 | *nchars = i; | ||
| 1380 | *nbytes = i_byte; | ||
| 1381 | return width; | ||
| 1382 | } | ||
| 1383 | i += chars; | ||
| 1384 | i_byte += bytes; | ||
| 1385 | width += thiswidth; | ||
| 1386 | } | ||
| 1387 | |||
| 1388 | if (precision > 0) | ||
| 1389 | { | 1282 | { |
| 1390 | *nchars = i; | 1283 | code = c - CHARSET_CODE_OFFSET (charset); |
| 1391 | *nbytes = i_byte; | 1284 | code = INDEX_TO_CODE_POINT (charset, code); |
| 1392 | } | 1285 | } |
| 1393 | 1286 | ||
| 1394 | return width; | 1287 | return code; |
| 1395 | } | 1288 | } |
| 1396 | 1289 | ||
| 1397 | DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0, | ||
| 1398 | doc: /* Return width of STRING when displayed in the current buffer. | ||
| 1399 | Width is measured by how many columns it occupies on the screen. | ||
| 1400 | When calculating width of a multibyte character in STRING, | ||
| 1401 | only the base leading-code is considered; the validity of | ||
| 1402 | the following bytes is not checked. Tabs in STRING are always | ||
| 1403 | taken to occupy `tab-width' columns. */) | ||
| 1404 | (str) | ||
| 1405 | Lisp_Object str; | ||
| 1406 | { | ||
| 1407 | Lisp_Object val; | ||
| 1408 | 1290 | ||
| 1409 | CHECK_STRING (str); | 1291 | DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0, |
| 1410 | XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL)); | 1292 | doc: /* Decode the pair of CHARSET and CODE-POINT into a character. |
| 1411 | return val; | 1293 | Return nil if CODE-POINT is not valid in CHARSET. |
| 1412 | } | ||
| 1413 | 1294 | ||
| 1414 | DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0, | 1295 | CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE). |
| 1415 | doc: /* Return the direction of CHAR. | 1296 | |
| 1416 | The returned value is 0 for left-to-right and 1 for right-to-left. */) | 1297 | Optional argument RESTRICTION specifies a way to map the pair of CCS |
| 1417 | (ch) | 1298 | and CODE-POINT to a chracter. Currently not supported and just ignored. */) |
| 1418 | Lisp_Object ch; | 1299 | (charset, code_point, restriction) |
| 1300 | Lisp_Object charset, code_point, restriction; | ||
| 1419 | { | 1301 | { |
| 1420 | int charset; | 1302 | int c, id; |
| 1303 | unsigned code; | ||
| 1304 | struct charset *charsetp; | ||
| 1421 | 1305 | ||
| 1422 | CHECK_NUMBER (ch); | 1306 | CHECK_CHARSET_GET_ID (charset, id); |
| 1423 | charset = CHAR_CHARSET (XFASTINT (ch)); | 1307 | if (CONSP (code_point)) |
| 1424 | if (!CHARSET_DEFINED_P (charset)) | 1308 | { |
| 1425 | invalid_character (XINT (ch)); | 1309 | CHECK_NATNUM (XCAR (code_point)); |
| 1426 | return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX); | 1310 | CHECK_NATNUM (XCDR (code_point)); |
| 1311 | code = (XINT (XCAR (code_point)) << 16) | (XINT (XCAR (code_point))); | ||
| 1312 | } | ||
| 1313 | else | ||
| 1314 | { | ||
| 1315 | CHECK_NATNUM (code_point); | ||
| 1316 | code = XINT (code_point); | ||
| 1317 | } | ||
| 1318 | charsetp = CHARSET_FROM_ID (id); | ||
| 1319 | c = DECODE_CHAR (charsetp, code); | ||
| 1320 | return (c >= 0 ? make_number (c) : Qnil); | ||
| 1427 | } | 1321 | } |
| 1428 | 1322 | ||
| 1429 | DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0, | ||
| 1430 | doc: /* Return number of characters between BEG and END. */) | ||
| 1431 | (beg, end) | ||
| 1432 | Lisp_Object beg, end; | ||
| 1433 | { | ||
| 1434 | int from, to; | ||
| 1435 | 1323 | ||
| 1436 | CHECK_NUMBER_COERCE_MARKER (beg); | 1324 | DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0, |
| 1437 | CHECK_NUMBER_COERCE_MARKER (end); | 1325 | doc: /* Encode the character CH into a code-point of CHARSET. |
| 1326 | Return nil if CHARSET doesn't include CH. | ||
| 1438 | 1327 | ||
| 1439 | from = min (XFASTINT (beg), XFASTINT (end)); | 1328 | Optional argument RESTRICTION specifies a way to map CHAR to a |
| 1440 | to = max (XFASTINT (beg), XFASTINT (end)); | 1329 | code-point in CCS. Currently not supported and just ignored. */) |
| 1330 | (ch, charset, restriction) | ||
| 1331 | Lisp_Object ch, charset, restriction; | ||
| 1332 | { | ||
| 1333 | int c, id; | ||
| 1334 | unsigned code; | ||
| 1335 | struct charset *charsetp; | ||
| 1441 | 1336 | ||
| 1442 | return make_number (to - from); | 1337 | CHECK_CHARSET_GET_ID (charset, id); |
| 1338 | CHECK_NATNUM (ch); | ||
| 1339 | c = XINT (ch); | ||
| 1340 | charsetp = CHARSET_FROM_ID (id); | ||
| 1341 | code = ENCODE_CHAR (charsetp, ch); | ||
| 1342 | if (code == CHARSET_INVALID_CODE (charsetp)) | ||
| 1343 | return Qnil; | ||
| 1344 | if (code > 0x7FFFFFF) | ||
| 1345 | return Fcons (make_number (code >> 16), make_number (code & 0xFFFF)); | ||
| 1346 | return make_number (code); | ||
| 1443 | } | 1347 | } |
| 1444 | 1348 | ||
| 1445 | /* Return the number of characters in the NBYTES bytes at PTR. | ||
| 1446 | This works by looking at the contents and checking for multibyte sequences. | ||
| 1447 | However, if the current buffer has enable-multibyte-characters = nil, | ||
| 1448 | we treat each byte as a character. */ | ||
| 1449 | 1349 | ||
| 1450 | int | 1350 | DEFUN ("make-char", Fmake_char, Smake_char, 1, 4, 0, |
| 1451 | chars_in_text (ptr, nbytes) | 1351 | doc: /* Return a character of CHARSET whose position code is CODE. |
| 1452 | unsigned char *ptr; | ||
| 1453 | int nbytes; | ||
| 1454 | { | ||
| 1455 | /* current_buffer is null at early stages of Emacs initialization. */ | ||
| 1456 | if (current_buffer == 0 | ||
| 1457 | || NILP (current_buffer->enable_multibyte_characters)) | ||
| 1458 | return nbytes; | ||
| 1459 | |||
| 1460 | return multibyte_chars_in_text (ptr, nbytes); | ||
| 1461 | } | ||
| 1462 | 1352 | ||
| 1463 | /* Return the number of characters in the NBYTES bytes at PTR. | 1353 | If dimension of CHARSET is two, and the third optional arg CODE2 is |
| 1464 | This works by looking at the contents and checking for multibyte sequences. | 1354 | non-nil, CODE actually specifies the first byte of the position code, |
| 1465 | It ignores enable-multibyte-characters. */ | 1355 | and CODE2 specifies the second byte. |
| 1466 | 1356 | ||
| 1467 | int | 1357 | If dimension of CHARSET is three, and the third optional arg CODE2 and |
| 1468 | multibyte_chars_in_text (ptr, nbytes) | 1358 | the fourth optional arg CODE3 are both non-nil, CODE actually |
| 1469 | unsigned char *ptr; | 1359 | specifies the first byte of the position code, CODE2 the second byte, |
| 1470 | int nbytes; | 1360 | and CODE3 the third byte. */) |
| 1361 | (charset, code, code2, code3) | ||
| 1362 | Lisp_Object charset, code, code2, code3; | ||
| 1471 | { | 1363 | { |
| 1472 | unsigned char *endp; | 1364 | int id, dimension; |
| 1473 | int chars, bytes; | 1365 | struct charset *charsetp; |
| 1366 | unsigned c; | ||
| 1474 | 1367 | ||
| 1475 | endp = ptr + nbytes; | 1368 | CHECK_CHARSET_GET_ID (charset, id); |
| 1476 | chars = 0; | 1369 | charsetp = CHARSET_FROM_ID (id); |
| 1477 | 1370 | ||
| 1478 | while (ptr < endp) | 1371 | if (NILP (code)) |
| 1372 | code = make_number (CHARSET_MIN_CODE (charsetp)); | ||
| 1373 | else | ||
| 1479 | { | 1374 | { |
| 1480 | PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes); | 1375 | CHECK_NATNUM (code); |
| 1481 | ptr += bytes; | 1376 | dimension = CHARSET_DIMENSION (charsetp); |
| 1482 | chars++; | 1377 | |
| 1378 | if (!NILP (code2)) | ||
| 1379 | { | ||
| 1380 | CHECK_NATNUM (code2); | ||
| 1381 | if (dimension == 3) | ||
| 1382 | CHECK_NATNUM (code3); | ||
| 1383 | } | ||
| 1483 | } | 1384 | } |
| 1484 | 1385 | ||
| 1485 | return chars; | 1386 | if (dimension == 1 || NILP (code2)) |
| 1387 | c = XFASTINT (code); | ||
| 1388 | else if (dimension == 2) | ||
| 1389 | c = (XFASTINT (code) << 8) | XFASTINT (code2); | ||
| 1390 | else if (dimension == 3) | ||
| 1391 | c = (XFASTINT (code) << 16) | (XFASTINT (code2) << 8) | XFASTINT (code3); | ||
| 1392 | |||
| 1393 | c = DECODE_CHAR (charsetp, c); | ||
| 1394 | return make_number (c); | ||
| 1486 | } | 1395 | } |
| 1487 | 1396 | ||
| 1488 | /* Parse unibyte text at STR of LEN bytes as multibyte text, and | 1397 | |
| 1489 | count the numbers of characters and bytes in it. On counting | 1398 | /* Return the first charset in CHARSET_LIST that contains C. |
| 1490 | bytes, pay attention to the fact that 8-bit characters in the range | 1399 | CHARSET_LIST is a list of charset IDs. If it is nil, use |
| 1491 | 0x80..0x9F are represented by 2 bytes in multibyte text. */ | 1400 | Vcharset_ordered_list. */ |
| 1492 | void | 1401 | |
| 1493 | parse_str_as_multibyte (str, len, nchars, nbytes) | 1402 | struct charset * |
| 1494 | unsigned char *str; | 1403 | char_charset (c, charset_list, code_return) |
| 1495 | int len, *nchars, *nbytes; | 1404 | int c; |
| 1405 | Lisp_Object charset_list; | ||
| 1406 | unsigned *code_return; | ||
| 1496 | { | 1407 | { |
| 1497 | unsigned char *endp = str + len; | 1408 | if (NILP (charset_list)) |
| 1498 | int n, chars = 0, bytes = 0; | 1409 | charset_list = Vcharset_ordered_list; |
| 1499 | 1410 | ||
| 1500 | while (str < endp) | 1411 | while (CONSP (charset_list)) |
| 1501 | { | 1412 | { |
| 1502 | if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n)) | 1413 | struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); |
| 1503 | str += n, bytes += n; | 1414 | unsigned code = ENCODE_CHAR (charset, c); |
| 1504 | else | 1415 | |
| 1505 | str++, bytes += 2; | 1416 | if (code != CHARSET_INVALID_CODE (charset)) |
| 1506 | chars++; | 1417 | { |
| 1418 | if (code_return) | ||
| 1419 | *code_return = code; | ||
| 1420 | return charset; | ||
| 1421 | } | ||
| 1422 | charset_list = XCDR (charset_list); | ||
| 1507 | } | 1423 | } |
| 1508 | *nchars = chars; | 1424 | return NULL; |
| 1509 | *nbytes = bytes; | ||
| 1510 | return; | ||
| 1511 | } | 1425 | } |
| 1512 | 1426 | ||
| 1513 | /* Arrange unibyte text at STR of NBYTES bytes as multibyte text. | ||
| 1514 | It actually converts only 8-bit characters in the range 0x80..0x9F | ||
| 1515 | that don't contruct multibyte characters to multibyte forms. If | ||
| 1516 | NCHARS is nonzero, set *NCHARS to the number of characters in the | ||
| 1517 | text. It is assured that we can use LEN bytes at STR as a work | ||
| 1518 | area and that is enough. Return the number of bytes of the | ||
| 1519 | resulting text. */ | ||
| 1520 | 1427 | ||
| 1521 | int | 1428 | DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0, |
| 1522 | str_as_multibyte (str, len, nbytes, nchars) | 1429 | doc: /*Return list of charset and one or two position-codes of CHAR. |
| 1523 | unsigned char *str; | 1430 | If CHAR is invalid as a character code, |
| 1524 | int len, nbytes, *nchars; | 1431 | return a list of symbol `unknown' and CHAR. */) |
| 1432 | (ch) | ||
| 1433 | Lisp_Object ch; | ||
| 1525 | { | 1434 | { |
| 1526 | unsigned char *p = str, *endp = str + nbytes; | 1435 | struct charset *charset; |
| 1527 | unsigned char *to; | 1436 | int c, dimension; |
| 1528 | int chars = 0; | 1437 | unsigned code; |
| 1529 | int n; | 1438 | Lisp_Object val; |
| 1530 | 1439 | ||
| 1531 | while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n)) | 1440 | CHECK_CHARACTER (ch); |
| 1532 | p += n, chars++; | 1441 | c = XFASTINT (ch); |
| 1533 | if (nchars) | 1442 | charset = CHAR_CHARSET (c); |
| 1534 | *nchars = chars; | 1443 | if (! charset) |
| 1535 | if (p == endp) | 1444 | return Fcons (intern ("unknown"), Fcons (ch, Qnil)); |
| 1536 | return nbytes; | 1445 | |
| 1537 | 1446 | code = ENCODE_CHAR (charset, c); | |
| 1538 | to = p; | 1447 | if (code == CHARSET_INVALID_CODE (charset)) |
| 1539 | nbytes = endp - p; | 1448 | abort (); |
| 1540 | endp = str + len; | 1449 | dimension = CHARSET_DIMENSION (charset); |
| 1541 | safe_bcopy (p, endp - nbytes, nbytes); | 1450 | val = (dimension == 1 ? Fcons (make_number (code), Qnil) |
| 1542 | p = endp - nbytes; | 1451 | : dimension == 2 ? Fcons (make_number (code >> 8), |
| 1543 | while (p < endp) | 1452 | Fcons (make_number (code & 0xFF), Qnil)) |
| 1544 | { | 1453 | : Fcons (make_number (code >> 16), |
| 1545 | if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n)) | 1454 | Fcons (make_number ((code >> 8) & 0xFF), |
| 1546 | { | 1455 | Fcons (make_number (code & 0xFF), Qnil)))); |
| 1547 | while (n--) | 1456 | return Fcons (CHARSET_NAME (charset), val); |
| 1548 | *to++ = *p++; | ||
| 1549 | } | ||
| 1550 | else | ||
| 1551 | { | ||
| 1552 | *to++ = LEADING_CODE_8_BIT_CONTROL; | ||
| 1553 | *to++ = *p++ + 0x20; | ||
| 1554 | } | ||
| 1555 | chars++; | ||
| 1556 | } | ||
| 1557 | if (nchars) | ||
| 1558 | *nchars = chars; | ||
| 1559 | return (to - str); | ||
| 1560 | } | 1457 | } |
| 1561 | 1458 | ||
| 1562 | /* Parse unibyte string at STR of LEN bytes, and return the number of | ||
| 1563 | bytes it may ocupy when converted to multibyte string by | ||
| 1564 | `str_to_multibyte'. */ | ||
| 1565 | 1459 | ||
| 1566 | int | 1460 | DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0, |
| 1567 | parse_str_to_multibyte (str, len) | 1461 | doc: /* Return the charset of highest priority that contains CHAR. */) |
| 1568 | unsigned char *str; | 1462 | (ch) |
| 1569 | int len; | 1463 | Lisp_Object ch; |
| 1570 | { | 1464 | { |
| 1571 | unsigned char *endp = str + len; | 1465 | struct charset *charset; |
| 1572 | int bytes; | ||
| 1573 | 1466 | ||
| 1574 | for (bytes = 0; str < endp; str++) | 1467 | CHECK_CHARACTER (ch); |
| 1575 | bytes += (*str < 0x80 || *str >= 0xA0) ? 1 : 2; | 1468 | charset = CHAR_CHARSET (XINT (ch)); |
| 1576 | return bytes; | 1469 | return (CHARSET_NAME (charset)); |
| 1577 | } | 1470 | } |
| 1578 | 1471 | ||
| 1579 | /* Convert unibyte text at STR of NBYTES bytes to multibyte text | ||
| 1580 | that contains the same single-byte characters. It actually | ||
| 1581 | converts all 8-bit characters to multibyte forms. It is assured | ||
| 1582 | that we can use LEN bytes at STR as a work area and that is | ||
| 1583 | enough. */ | ||
| 1584 | 1472 | ||
| 1585 | int | 1473 | DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0, |
| 1586 | str_to_multibyte (str, len, bytes) | 1474 | doc: /* |
| 1587 | unsigned char *str; | 1475 | Return charset of a character in the current buffer at position POS. |
| 1588 | int len, bytes; | 1476 | If POS is nil, it defauls to the current point. |
| 1477 | If POS is out of range, the value is nil. */) | ||
| 1478 | (pos) | ||
| 1479 | Lisp_Object pos; | ||
| 1589 | { | 1480 | { |
| 1590 | unsigned char *p = str, *endp = str + bytes; | 1481 | Lisp_Object ch; |
| 1591 | unsigned char *to; | 1482 | struct charset *charset; |
| 1592 | 1483 | ||
| 1593 | while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++; | 1484 | ch = Fchar_after (pos); |
| 1594 | if (p == endp) | 1485 | if (! INTEGERP (ch)) |
| 1595 | return bytes; | 1486 | return ch; |
| 1596 | to = p; | 1487 | charset = CHAR_CHARSET (XINT (ch)); |
| 1597 | bytes = endp - p; | 1488 | return (CHARSET_NAME (charset)); |
| 1598 | endp = str + len; | ||
| 1599 | safe_bcopy (p, endp - bytes, bytes); | ||
| 1600 | p = endp - bytes; | ||
| 1601 | while (p < endp) | ||
| 1602 | { | ||
| 1603 | if (*p < 0x80 || *p >= 0xA0) | ||
| 1604 | *to++ = *p++; | ||
| 1605 | else | ||
| 1606 | *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20; | ||
| 1607 | } | ||
| 1608 | return (to - str); | ||
| 1609 | } | 1489 | } |
| 1610 | 1490 | ||
| 1611 | /* Arrange multibyte text at STR of LEN bytes as a unibyte text. It | ||
| 1612 | actually converts only 8-bit characters in the range 0x80..0x9F to | ||
| 1613 | unibyte forms. */ | ||
| 1614 | 1491 | ||
| 1615 | int | 1492 | DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0, |
| 1616 | str_as_unibyte (str, bytes) | 1493 | doc: /* |
| 1617 | unsigned char *str; | 1494 | Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR. |
| 1618 | int bytes; | 1495 | |
| 1496 | ISO 2022's designation sequence (escape sequence) distinguishes charsets | ||
| 1497 | by their DIMENSION, CHARS, and FINAL-CHAR, | ||
| 1498 | where as Emacs distinguishes them by charset symbol. | ||
| 1499 | See the documentation of the function `charset-info' for the meanings of | ||
| 1500 | DIMENSION, CHARS, and FINAL-CHAR. */) | ||
| 1501 | (dimension, chars, final_char) | ||
| 1502 | Lisp_Object dimension, chars, final_char; | ||
| 1619 | { | 1503 | { |
| 1620 | unsigned char *p = str, *endp = str + bytes; | 1504 | int id; |
| 1621 | unsigned char *to = str; | ||
| 1622 | 1505 | ||
| 1623 | while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++; | 1506 | check_iso_charset_parameter (dimension, chars, final_char); |
| 1624 | to = p; | 1507 | id = ISO_CHARSET_TABLE (XFASTINT (dimension), XFASTINT (chars), |
| 1625 | while (p < endp) | 1508 | XFASTINT (final_char)); |
| 1626 | { | 1509 | return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil); |
| 1627 | if (*p == LEADING_CODE_8_BIT_CONTROL) | ||
| 1628 | *to++ = *(p + 1) - 0x20, p += 2; | ||
| 1629 | else | ||
| 1630 | *to++ = *p++; | ||
| 1631 | } | ||
| 1632 | return (to - str); | ||
| 1633 | } | 1510 | } |
| 1634 | 1511 | ||
| 1635 | 1512 | ||
| 1636 | DEFUN ("string", Fstring, Sstring, 1, MANY, 0, | 1513 | DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps, |
| 1637 | doc: /* Concatenate all the argument characters and make the result a string. | 1514 | 0, 0, 0, |
| 1638 | usage: (string &rest CHARACTERS) */) | 1515 | doc: /* |
| 1639 | (n, args) | 1516 | Clear encoder and decoder of charsets that are loaded from mapfiles. */) |
| 1640 | int n; | 1517 | () |
| 1641 | Lisp_Object *args; | ||
| 1642 | { | 1518 | { |
| 1643 | int i; | 1519 | int i; |
| 1644 | unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n); | 1520 | struct charset *charset; |
| 1645 | unsigned char *p = buf; | 1521 | Lisp_Object attrs; |
| 1646 | int c; | ||
| 1647 | int multibyte = 0; | ||
| 1648 | 1522 | ||
| 1649 | for (i = 0; i < n; i++) | 1523 | for (i = 0; i < charset_table_used; i++) |
| 1650 | { | 1524 | { |
| 1651 | CHECK_NUMBER (args[i]); | 1525 | charset = CHARSET_FROM_ID (i); |
| 1652 | if (!multibyte && !SINGLE_BYTE_CHAR_P (XFASTINT (args[i]))) | 1526 | attrs = CHARSET_ATTRIBUTES (charset); |
| 1653 | multibyte = 1; | 1527 | |
| 1528 | if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP) | ||
| 1529 | { | ||
| 1530 | CHARSET_ATTR_DECODER (attrs) = Qnil; | ||
| 1531 | CHARSET_ATTR_ENCODER (attrs) = Qnil; | ||
| 1532 | CHARSET_METHOD (charset) = CHARSET_METHOD_MAP_DEFERRED; | ||
| 1533 | } | ||
| 1534 | |||
| 1535 | if (CHARSET_UNIFIED_P (charset)) | ||
| 1536 | CHARSET_ATTR_DEUNIFIER (attrs) = Qnil; | ||
| 1654 | } | 1537 | } |
| 1655 | 1538 | ||
| 1656 | for (i = 0; i < n; i++) | 1539 | if (CHAR_TABLE_P (Vchar_unified_charset_table)) |
| 1657 | { | 1540 | { |
| 1658 | c = XINT (args[i]); | 1541 | Foptimize_char_table (Vchar_unified_charset_table); |
| 1659 | if (multibyte) | 1542 | Vchar_unify_table = Vchar_unified_charset_table; |
| 1660 | p += CHAR_STRING (c, p); | 1543 | Vchar_unified_charset_table = Qnil; |
| 1661 | else | ||
| 1662 | *p++ = c; | ||
| 1663 | } | 1544 | } |
| 1664 | 1545 | ||
| 1665 | return make_string_from_bytes (buf, n, p - buf); | 1546 | return Qnil; |
| 1666 | } | 1547 | } |
| 1667 | 1548 | ||
| 1668 | #endif /* emacs */ | ||
| 1669 | 1549 | ||
| 1670 | int | 1550 | void |
| 1671 | charset_id_internal (charset_name) | 1551 | init_charset () |
| 1672 | char *charset_name; | ||
| 1673 | { | 1552 | { |
| 1674 | Lisp_Object val; | ||
| 1675 | |||
| 1676 | val= Fget (intern (charset_name), Qcharset); | ||
| 1677 | if (!VECTORP (val)) | ||
| 1678 | error ("Charset %s is not defined", charset_name); | ||
| 1679 | 1553 | ||
| 1680 | return (XINT (XVECTOR (val)->contents[0])); | ||
| 1681 | } | 1554 | } |
| 1682 | 1555 | ||
| 1683 | DEFUN ("setup-special-charsets", Fsetup_special_charsets, | ||
| 1684 | Ssetup_special_charsets, 0, 0, 0, doc: /* Internal use only. */) | ||
| 1685 | () | ||
| 1686 | { | ||
| 1687 | charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1"); | ||
| 1688 | charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978"); | ||
| 1689 | charset_jisx0208 = charset_id_internal ("japanese-jisx0208"); | ||
| 1690 | charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201"); | ||
| 1691 | charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201"); | ||
| 1692 | charset_big5_1 = charset_id_internal ("chinese-big5-1"); | ||
| 1693 | charset_big5_2 = charset_id_internal ("chinese-big5-2"); | ||
| 1694 | return Qnil; | ||
| 1695 | } | ||
| 1696 | 1556 | ||
| 1697 | void | 1557 | void |
| 1698 | init_charset_once () | 1558 | init_charset_once () |
| 1699 | { | 1559 | { |
| 1700 | int i, j, k; | 1560 | int i, j, k; |
| 1701 | 1561 | ||
| 1702 | staticpro (&Vcharset_table); | 1562 | for (i = 0; i < ISO_MAX_DIMENSION; i++) |
| 1703 | staticpro (&Vcharset_symbol_table); | 1563 | for (j = 0; j < ISO_MAX_CHARS; j++) |
| 1704 | staticpro (&Vgeneric_character_list); | 1564 | for (k = 0; k < ISO_MAX_FINAL; k++) |
| 1565 | iso_charset_table[i][j][k] = -1; | ||
| 1705 | 1566 | ||
| 1706 | /* This has to be done here, before we call Fmake_char_table. */ | 1567 | for (i = 0; i < 255; i++) |
| 1707 | Qcharset_table = intern ("charset-table"); | 1568 | emacs_mule_charset[i] = NULL; |
| 1708 | staticpro (&Qcharset_table); | 1569 | |
| 1570 | #if 0 | ||
| 1571 | Vchar_charset_set = Fmake_char_table (Qnil, Qnil); | ||
| 1572 | CHAR_TABLE_SET (Vchar_charset_set, make_number (97), Qnil); | ||
| 1573 | |||
| 1574 | DEFSYM (Qcharset_encode_table, "charset-encode-table"); | ||
| 1709 | 1575 | ||
| 1710 | /* Intern this now in case it isn't already done. | 1576 | /* Intern this now in case it isn't already done. |
| 1711 | Setting this variable twice is harmless. | 1577 | Setting this variable twice is harmless. |
| 1712 | But don't staticpro it here--that is done in alloc.c. */ | 1578 | But don't staticpro it here--that is done in alloc.c. */ |
| 1713 | Qchar_table_extra_slots = intern ("char-table-extra-slots"); | 1579 | Qchar_table_extra_slots = intern ("char-table-extra-slots"); |
| 1714 | 1580 | ||
| 1715 | /* Now we are ready to set up this property, so we can | 1581 | /* Now we are ready to set up this property, so we can create syntax |
| 1716 | create the charset table. */ | 1582 | tables. */ |
| 1717 | Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0)); | 1583 | Fput (Qcharset_encode_table, Qchar_table_extra_slots, make_number (0)); |
| 1718 | Vcharset_table = Fmake_char_table (Qcharset_table, Qnil); | 1584 | #endif |
| 1719 | |||
| 1720 | Qunknown = intern ("unknown"); | ||
| 1721 | staticpro (&Qunknown); | ||
| 1722 | Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1), | ||
| 1723 | Qunknown); | ||
| 1724 | |||
| 1725 | /* Setup tables. */ | ||
| 1726 | for (i = 0; i < 2; i++) | ||
| 1727 | for (j = 0; j < 2; j++) | ||
| 1728 | for (k = 0; k < 128; k++) | ||
| 1729 | iso_charset_table [i][j][k] = -1; | ||
| 1730 | |||
| 1731 | for (i = 0; i < 256; i++) | ||
| 1732 | bytes_by_char_head[i] = 1; | ||
| 1733 | bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3; | ||
| 1734 | bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3; | ||
| 1735 | bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4; | ||
| 1736 | bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4; | ||
| 1737 | |||
| 1738 | for (i = 0; i < 128; i++) | ||
| 1739 | width_by_char_head[i] = 1; | ||
| 1740 | for (; i < 256; i++) | ||
| 1741 | width_by_char_head[i] = 4; | ||
| 1742 | width_by_char_head[LEADING_CODE_PRIVATE_11] = 1; | ||
| 1743 | width_by_char_head[LEADING_CODE_PRIVATE_12] = 2; | ||
| 1744 | width_by_char_head[LEADING_CODE_PRIVATE_21] = 1; | ||
| 1745 | width_by_char_head[LEADING_CODE_PRIVATE_22] = 2; | ||
| 1746 | |||
| 1747 | { | ||
| 1748 | Lisp_Object val; | ||
| 1749 | |||
| 1750 | val = Qnil; | ||
| 1751 | for (i = 0x81; i < 0x90; i++) | ||
| 1752 | val = Fcons (make_number ((i - 0x70) << 7), val); | ||
| 1753 | for (; i < 0x9A; i++) | ||
| 1754 | val = Fcons (make_number ((i - 0x8F) << 14), val); | ||
| 1755 | for (i = 0xA0; i < 0xF0; i++) | ||
| 1756 | val = Fcons (make_number ((i - 0x70) << 7), val); | ||
| 1757 | for (; i < 0xFF; i++) | ||
| 1758 | val = Fcons (make_number ((i - 0xE0) << 14), val); | ||
| 1759 | Vgeneric_character_list = Fnreverse (val); | ||
| 1760 | } | ||
| 1761 | |||
| 1762 | nonascii_insert_offset = 0; | ||
| 1763 | Vnonascii_translation_table = Qnil; | ||
| 1764 | } | 1585 | } |
| 1765 | 1586 | ||
| 1766 | #ifdef emacs | 1587 | #ifdef emacs |
| @@ -1768,141 +1589,128 @@ init_charset_once () | |||
| 1768 | void | 1589 | void |
| 1769 | syms_of_charset () | 1590 | syms_of_charset () |
| 1770 | { | 1591 | { |
| 1771 | Qcharset = intern ("charset"); | 1592 | char *p; |
| 1772 | staticpro (&Qcharset); | 1593 | |
| 1773 | 1594 | DEFSYM (Qcharsetp, "charsetp"); | |
| 1774 | Qascii = intern ("ascii"); | 1595 | |
| 1775 | staticpro (&Qascii); | 1596 | DEFSYM (Qascii, "ascii"); |
| 1776 | 1597 | DEFSYM (Qunicode, "unicode"); | |
| 1777 | Qeight_bit_control = intern ("eight-bit-control"); | 1598 | DEFSYM (Qeight_bit_control, "eight-bit-control"); |
| 1778 | staticpro (&Qeight_bit_control); | 1599 | DEFSYM (Qeight_bit_graphic, "eight-bit-graphic"); |
| 1779 | 1600 | DEFSYM (Qiso_8859_1, "iso-8859-1"); | |
| 1780 | Qeight_bit_graphic = intern ("eight-bit-graphic"); | 1601 | |
| 1781 | staticpro (&Qeight_bit_graphic); | 1602 | DEFSYM (Qgl, "gl"); |
| 1782 | 1603 | DEFSYM (Qgr, "gr"); | |
| 1783 | /* Define special charsets ascii, eight-bit-control, and | 1604 | |
| 1784 | eight-bit-graphic. */ | 1605 | p = (char *) xmalloc (30000); |
| 1785 | update_charset_table (make_number (CHARSET_ASCII), | 1606 | |
| 1786 | make_number (1), make_number (94), | 1607 | staticpro (&Vcharset_ordered_list); |
| 1787 | make_number (1), | 1608 | Vcharset_ordered_list = Qnil; |
| 1788 | make_number (0), | 1609 | |
| 1789 | make_number ('B'), | 1610 | staticpro (&Viso_2022_charset_list); |
| 1790 | make_number (0), | 1611 | Viso_2022_charset_list = Qnil; |
| 1791 | build_string ("ASCII"), | 1612 | |
| 1792 | Qnil, /* same as above */ | 1613 | staticpro (&Vemacs_mule_charset_list); |
| 1793 | build_string ("ASCII (ISO646 IRV)")); | 1614 | Vemacs_mule_charset_list = Qnil; |
| 1794 | CHARSET_SYMBOL (CHARSET_ASCII) = Qascii; | 1615 | |
| 1795 | Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII)); | 1616 | staticpro (&Vcharset_hash_table); |
| 1796 | 1617 | Vcharset_hash_table = Fmakehash (Qeq); | |
| 1797 | update_charset_table (make_number (CHARSET_8_BIT_CONTROL), | 1618 | |
| 1798 | make_number (1), make_number (96), | 1619 | charset_table_size = 128; |
| 1799 | make_number (4), | 1620 | charset_table = ((struct charset *) |
| 1800 | make_number (0), | 1621 | xmalloc (sizeof (struct charset) * charset_table_size)); |
| 1801 | make_number (-1), | 1622 | charset_table_used = 0; |
| 1802 | make_number (-1), | 1623 | |
| 1803 | build_string ("8-bit control code (0x80..0x9F)"), | 1624 | staticpro (&Vchar_unified_charset_table); |
| 1804 | Qnil, /* same as above */ | 1625 | Vchar_unified_charset_table = Fmake_char_table (Qnil, make_number (-1)); |
| 1805 | Qnil); /* same as above */ | 1626 | |
| 1806 | CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control; | 1627 | defsubr (&Scharsetp); |
| 1807 | Fput (Qeight_bit_control, Qcharset, | 1628 | defsubr (&Smap_charset_chars); |
| 1808 | CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL)); | 1629 | defsubr (&Sdefine_charset_internal); |
| 1809 | 1630 | defsubr (&Sdefine_charset_alias); | |
| 1810 | update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC), | 1631 | defsubr (&Sprimary_charset); |
| 1811 | make_number (1), make_number (96), | 1632 | defsubr (&Sset_primary_charset); |
| 1812 | make_number (4), | 1633 | defsubr (&Scharset_plist); |
| 1813 | make_number (0), | 1634 | defsubr (&Sset_charset_plist); |
| 1814 | make_number (-1), | 1635 | defsubr (&Sunify_charset); |
| 1815 | make_number (-1), | ||
| 1816 | build_string ("8-bit graphic char (0xA0..0xFF)"), | ||
| 1817 | Qnil, /* same as above */ | ||
| 1818 | Qnil); /* same as above */ | ||
| 1819 | CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic; | ||
| 1820 | Fput (Qeight_bit_graphic, Qcharset, | ||
| 1821 | CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC)); | ||
| 1822 | |||
| 1823 | Qauto_fill_chars = intern ("auto-fill-chars"); | ||
| 1824 | staticpro (&Qauto_fill_chars); | ||
| 1825 | Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0)); | ||
| 1826 | |||
| 1827 | defsubr (&Sdefine_charset); | ||
| 1828 | defsubr (&Sgeneric_character_list); | ||
| 1829 | defsubr (&Sget_unused_iso_final_char); | 1636 | defsubr (&Sget_unused_iso_final_char); |
| 1830 | defsubr (&Sdeclare_equiv_charset); | 1637 | defsubr (&Sdeclare_equiv_charset); |
| 1831 | defsubr (&Sfind_charset_region); | 1638 | defsubr (&Sfind_charset_region); |
| 1832 | defsubr (&Sfind_charset_string); | 1639 | defsubr (&Sfind_charset_string); |
| 1833 | defsubr (&Smake_char_internal); | 1640 | defsubr (&Sdecode_char); |
| 1641 | defsubr (&Sencode_char); | ||
| 1834 | defsubr (&Ssplit_char); | 1642 | defsubr (&Ssplit_char); |
| 1643 | defsubr (&Smake_char); | ||
| 1835 | defsubr (&Schar_charset); | 1644 | defsubr (&Schar_charset); |
| 1836 | defsubr (&Scharset_after); | 1645 | defsubr (&Scharset_after); |
| 1837 | defsubr (&Siso_charset); | 1646 | defsubr (&Siso_charset); |
| 1838 | defsubr (&Schar_valid_p); | 1647 | defsubr (&Sclear_charset_maps); |
| 1839 | defsubr (&Sunibyte_char_to_multibyte); | 1648 | |
| 1840 | defsubr (&Smultibyte_char_to_unibyte); | 1649 | DEFVAR_LISP ("charset-map-directory", &Vcharset_map_directory, |
| 1841 | defsubr (&Schar_bytes); | 1650 | doc: /* Directory of charset map files that come with GNU Emacs. |
| 1842 | defsubr (&Schar_width); | 1651 | The default value is \"\\[data-directory]/charsets\". */); |
| 1843 | defsubr (&Sstring_width); | 1652 | Vcharset_map_directory = Fexpand_file_name (build_string ("charsets"), |
| 1844 | defsubr (&Schar_direction); | 1653 | Vdata_directory); |
| 1845 | defsubr (&Schars_in_region); | ||
| 1846 | defsubr (&Sstring); | ||
| 1847 | defsubr (&Ssetup_special_charsets); | ||
| 1848 | 1654 | ||
| 1849 | DEFVAR_LISP ("charset-list", &Vcharset_list, | 1655 | DEFVAR_LISP ("charset-list", &Vcharset_list, |
| 1850 | doc: /* List of charsets ever defined. */); | 1656 | doc: /* List of charsets ever defined. */); |
| 1851 | Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control, | 1657 | Vcharset_list = Qnil; |
| 1852 | Fcons (Qeight_bit_graphic, Qnil))); | 1658 | |
| 1853 | 1659 | /* Make the prerequisite charset `ascii' and `unicode'. */ | |
| 1854 | DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector, | 1660 | { |
| 1855 | doc: /* Vector of cons cell of a symbol and translation table ever defined. | 1661 | Lisp_Object args[charset_arg_max]; |
| 1856 | An ID of a translation table is an index of this vector. */); | 1662 | Lisp_Object plist[14]; |
| 1857 | Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil); | 1663 | Lisp_Object val; |
| 1858 | 1664 | ||
| 1859 | DEFVAR_INT ("leading-code-private-11", &leading_code_private_11, | 1665 | plist[0] = intern (":name"); |
| 1860 | doc: /* Leading-code of private TYPE9N charset of column-width 1. */); | 1666 | plist[1] = args[charset_arg_name] = Qascii; |
| 1861 | leading_code_private_11 = LEADING_CODE_PRIVATE_11; | 1667 | plist[2] = intern (":dimension"); |
| 1862 | 1668 | plist[3] = args[charset_arg_dimension] = make_number (1); | |
| 1863 | DEFVAR_INT ("leading-code-private-12", &leading_code_private_12, | 1669 | val = Fmake_vector (make_number (8), make_number (0)); |
| 1864 | doc: /* Leading-code of private TYPE9N charset of column-width 2. */); | 1670 | ASET (val, 1, make_number (127)); |
| 1865 | leading_code_private_12 = LEADING_CODE_PRIVATE_12; | 1671 | plist[4] = intern (":code-space"); |
| 1866 | 1672 | plist[5] = args[charset_arg_code_space] = val; | |
| 1867 | DEFVAR_INT ("leading-code-private-21", &leading_code_private_21, | 1673 | plist[6] = intern (":iso-final-char"); |
| 1868 | doc: /* Leading-code of private TYPE9Nx9N charset of column-width 1. */); | 1674 | plist[7] = args[charset_arg_iso_final] = make_number ('B'); |
| 1869 | leading_code_private_21 = LEADING_CODE_PRIVATE_21; | 1675 | args[charset_arg_iso_revision] = Qnil; |
| 1870 | 1676 | plist[8] = intern (":emacs-mule-id"); | |
| 1871 | DEFVAR_INT ("leading-code-private-22", &leading_code_private_22, | 1677 | plist[9] = args[charset_arg_emacs_mule_id] = make_number (0); |
| 1872 | doc: /* Leading-code of private TYPE9Nx9N charset of column-width 2. */); | 1678 | plist[10] = intern (":ascii-compatible-p"); |
| 1873 | leading_code_private_22 = LEADING_CODE_PRIVATE_22; | 1679 | plist[11] = args[charset_arg_ascii_compatible_p] = Qt; |
| 1874 | 1680 | args[charset_arg_supplementary_p] = Qnil; | |
| 1875 | DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset, | 1681 | args[charset_arg_invalid_code] = Qnil; |
| 1876 | doc: /* Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte. | 1682 | plist[12] = intern (":code-offset"); |
| 1877 | This is used for converting unibyte text to multibyte, | 1683 | plist[13] = args[charset_arg_code_offset] = make_number (0); |
| 1878 | and for inserting character codes specified by number. | 1684 | args[charset_arg_map] = Qnil; |
| 1879 | 1685 | args[charset_arg_parents] = Qnil; | |
| 1880 | This serves to convert a Latin-1 or similar 8-bit character code | 1686 | args[charset_arg_unify_map] = Qnil; |
| 1881 | to the corresponding Emacs multibyte character code. | 1687 | /* The actual plist is set by mule-conf.el. */ |
| 1882 | Typically the value should be (- (make-char CHARSET 0) 128), | 1688 | args[charset_arg_plist] = Flist (14, plist); |
| 1883 | for your choice of character set. | 1689 | Fdefine_charset_internal (charset_arg_max, args); |
| 1884 | If `nonascii-translation-table' is non-nil, it overrides this variable. */); | 1690 | charset_ascii = CHARSET_SYMBOL_ID (Qascii); |
| 1885 | nonascii_insert_offset = 0; | 1691 | |
| 1886 | 1692 | plist[1] = args[charset_arg_name] = Qunicode; | |
| 1887 | DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table, | 1693 | plist[3] = args[charset_arg_dimension] = make_number (3); |
| 1888 | doc: /* Translation table to convert non-ASCII unibyte codes to multibyte. | 1694 | val = Fmake_vector (make_number (8), make_number (0)); |
| 1889 | This is used for converting unibyte text to multibyte, | 1695 | ASET (val, 1, make_number (255)); |
| 1890 | and for inserting character codes specified by number. | 1696 | ASET (val, 3, make_number (255)); |
| 1891 | 1697 | ASET (val, 5, make_number (16)); | |
| 1892 | Conversion is performed only when multibyte characters are enabled, | 1698 | plist[5] = args[charset_arg_code_space] = val; |
| 1893 | and it serves to convert a Latin-1 or similar 8-bit character code | 1699 | plist[7] = args[charset_arg_iso_final] = Qnil; |
| 1894 | to the corresponding Emacs character code. | 1700 | args[charset_arg_iso_revision] = Qnil; |
| 1895 | 1701 | plist[9] = args[charset_arg_emacs_mule_id] = Qnil; | |
| 1896 | If this is nil, `nonascii-insert-offset' is used instead. | 1702 | plist[11] = args[charset_arg_ascii_compatible_p] = Qt; |
| 1897 | See also the docstring of `make-translation-table'. */); | 1703 | args[charset_arg_supplementary_p] = Qnil; |
| 1898 | Vnonascii_translation_table = Qnil; | 1704 | args[charset_arg_invalid_code] = Qnil; |
| 1899 | 1705 | plist[13] = args[charset_arg_code_offset] = make_number (0); | |
| 1900 | DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars, | 1706 | args[charset_arg_map] = Qnil; |
| 1901 | doc: /* A char-table for characters which invoke auto-filling. | 1707 | args[charset_arg_parents] = Qnil; |
| 1902 | Such characters have value t in this table. */); | 1708 | args[charset_arg_unify_map] = Qnil; |
| 1903 | Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil); | 1709 | /* The actual plist is set by mule-conf.el. */ |
| 1904 | CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt); | 1710 | args[charset_arg_plist] = Flist (14, plist); |
| 1905 | CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt); | 1711 | Fdefine_charset_internal (charset_arg_max, args); |
| 1712 | charset_unicode = CHARSET_SYMBOL_ID (Qunicode); | ||
| 1713 | } | ||
| 1906 | } | 1714 | } |
| 1907 | 1715 | ||
| 1908 | #endif /* emacs */ | 1716 | #endif /* emacs */ |
diff --git a/src/charset.h b/src/charset.h index d4e85d91ebf..58649eabb51 100644 --- a/src/charset.h +++ b/src/charset.h | |||
| @@ -1,7 +1,10 @@ | |||
| 1 | /* Header for multibyte character handler. | 1 | /* Header for charset handler. |
| 2 | Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN. | 2 | Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN. |
| 3 | Licensed to the Free Software Foundation. | 3 | Licensed to the Free Software Foundation. |
| 4 | Copyright (C) 2001 Free Software Foundation, Inc. | 4 | Copyright (C) 2001 Free Software Foundation, Inc. |
| 5 | Copyright (C) 2001, 2002 | ||
| 6 | National Institute of Advanced Industrial Science and Technology (AIST) | ||
| 7 | Registration Number H13PRO009 | ||
| 5 | 8 | ||
| 6 | This file is part of GNU Emacs. | 9 | This file is part of GNU Emacs. |
| 7 | 10 | ||
| @@ -23,806 +26,428 @@ Boston, MA 02111-1307, USA. */ | |||
| 23 | #ifndef EMACS_CHARSET_H | 26 | #ifndef EMACS_CHARSET_H |
| 24 | #define EMACS_CHARSET_H | 27 | #define EMACS_CHARSET_H |
| 25 | 28 | ||
| 26 | /* #define BYTE_COMBINING_DEBUG */ | 29 | /* Index to arguments of Fdefine_charset_internal. */ |
| 27 | 30 | ||
| 28 | /*** GENERAL NOTE on CHARACTER SET (CHARSET) *** | 31 | enum define_charset_arg_index |
| 29 | 32 | { | |
| 30 | A character set ("charset" hereafter) is a meaningful collection | 33 | charset_arg_name, |
| 31 | (i.e. language, culture, functionality, etc) of characters. Emacs | 34 | charset_arg_dimension, |
| 32 | handles multiple charsets at once. Each charset corresponds to one | 35 | charset_arg_code_space, |
| 33 | of the ISO charsets. Emacs identifies a charset by a unique | 36 | charset_arg_iso_final, |
| 34 | identification number, whereas ISO identifies a charset by a triplet | 37 | charset_arg_iso_revision, |
| 35 | of DIMENSION, CHARS and FINAL-CHAR. So, hereafter, just saying | 38 | charset_arg_emacs_mule_id, |
| 36 | "charset" means an identification number (integer value). | 39 | charset_arg_ascii_compatible_p, |
| 37 | 40 | charset_arg_supplementary_p, | |
| 38 | The value range of charsets is 0x00, 0x81..0xFE. There are four | 41 | charset_arg_invalid_code, |
| 39 | kinds of charset depending on DIMENSION (1 or 2) and CHARS (94 or | 42 | charset_arg_code_offset, |
| 40 | 96). For instance, a charset of DIMENSION2_CHARS94 contains 94x94 | 43 | charset_arg_map, |
| 41 | characters. | 44 | charset_arg_parents, |
| 42 | 45 | charset_arg_unify_map, | |
| 43 | Within Emacs Lisp, a charset is treated as a symbol which has a | 46 | charset_arg_plist, |
| 44 | property `charset'. The property value is a vector containing | 47 | charset_arg_max |
| 45 | various information about the charset. For readability of C code, | 48 | }; |
| 46 | we use the following convention for C variable names: | 49 | |
| 47 | charset_symbol: Emacs Lisp symbol of a charset | 50 | |
| 48 | charset_id: Emacs Lisp integer of an identification number of a charset | 51 | /* Indices to charset attributes vector. */ |
| 49 | charset: C integer of an identification number of a charset | 52 | |
| 50 | 53 | enum charset_attr_index | |
| 51 | Each charset (except for ascii) is assigned a base leading-code | 54 | { |
| 52 | (range 0x80..0x9E). In addition, a charset of greater than 0xA0 | 55 | /* ID number of the charset. */ |
| 53 | (whose base leading-code is 0x9A..0x9D) is assigned an extended | 56 | charset_id, |
| 54 | leading-code (range 0xA0..0xFE). In this case, each base | ||
| 55 | leading-code specifies the allowable range of extended leading-code | ||
| 56 | as shown in the table below. A leading-code is used to represent a | ||
| 57 | character in Emacs' buffer and string. | ||
| 58 | |||
| 59 | We call a charset which has extended leading-code a "private | ||
| 60 | charset" because those are mainly for a charset which is not yet | ||
| 61 | registered by ISO. On the contrary, we call a charset which does | ||
| 62 | not have extended leading-code an "official charset". | ||
| 63 | |||
| 64 | --------------------------------------------------------------------------- | ||
| 65 | charset dimension base leading-code extended leading-code | ||
| 66 | --------------------------------------------------------------------------- | ||
| 67 | 0x00 official dim1 -- none -- -- none -- | ||
| 68 | (ASCII) | ||
| 69 | 0x01..0x7F --never used-- | ||
| 70 | 0x80 official dim1 -- none -- -- none -- | ||
| 71 | (eight-bit-graphic) | ||
| 72 | 0x81..0x8F official dim1 same as charset -- none -- | ||
| 73 | 0x90..0x99 official dim2 same as charset -- none -- | ||
| 74 | 0x9A..0x9D --never used-- | ||
| 75 | 0x9E official dim1 same as charset -- none -- | ||
| 76 | (eight-bit-control) | ||
| 77 | 0x9F --never used-- | ||
| 78 | 0xA0..0xDF private dim1 0x9A same as charset | ||
| 79 | of 1-column width | ||
| 80 | 0xE0..0xEF private dim1 0x9B same as charset | ||
| 81 | of 2-column width | ||
| 82 | 0xF0..0xF4 private dim2 0x9C same as charset | ||
| 83 | of 1-column width | ||
| 84 | 0xF5..0xFE private dim2 0x9D same as charset | ||
| 85 | of 2-column width | ||
| 86 | 0xFF --never used-- | ||
| 87 | --------------------------------------------------------------------------- | ||
| 88 | |||
| 89 | */ | ||
| 90 | |||
| 91 | /* Definition of special leading-codes. */ | ||
| 92 | /* Leading-code followed by extended leading-code. */ | ||
| 93 | #define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */ | ||
| 94 | #define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */ | ||
| 95 | #define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */ | ||
| 96 | #define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */ | ||
| 97 | |||
| 98 | #define LEADING_CODE_8_BIT_CONTROL 0x9E /* for `eight-bit-control' */ | ||
| 99 | |||
| 100 | /* Extended leading-code. */ | ||
| 101 | /* Start of each extended leading-codes. */ | ||
| 102 | #define LEADING_CODE_EXT_11 0xA0 /* follows LEADING_CODE_PRIVATE_11 */ | ||
| 103 | #define LEADING_CODE_EXT_12 0xE0 /* follows LEADING_CODE_PRIVATE_12 */ | ||
| 104 | #define LEADING_CODE_EXT_21 0xF0 /* follows LEADING_CODE_PRIVATE_21 */ | ||
| 105 | #define LEADING_CODE_EXT_22 0xF5 /* follows LEADING_CODE_PRIVATE_22 */ | ||
| 106 | /* Maximum value of extended leading-codes. */ | ||
| 107 | #define LEADING_CODE_EXT_MAX 0xFE | ||
| 108 | |||
| 109 | /* Definition of minimum/maximum charset of each DIMENSION. */ | ||
| 110 | #define MIN_CHARSET_OFFICIAL_DIMENSION1 0x80 | ||
| 111 | #define MAX_CHARSET_OFFICIAL_DIMENSION1 0x8F | ||
| 112 | #define MIN_CHARSET_OFFICIAL_DIMENSION2 0x90 | ||
| 113 | #define MAX_CHARSET_OFFICIAL_DIMENSION2 0x99 | ||
| 114 | #define MIN_CHARSET_PRIVATE_DIMENSION1 LEADING_CODE_EXT_11 | ||
| 115 | #define MIN_CHARSET_PRIVATE_DIMENSION2 LEADING_CODE_EXT_21 | ||
| 116 | |||
| 117 | /* Maximum value of overall charset identification number. */ | ||
| 118 | #define MAX_CHARSET 0xFE | ||
| 119 | |||
| 120 | /* Definition of special charsets. */ | ||
| 121 | #define CHARSET_ASCII 0 /* 0x00..0x7F */ | ||
| 122 | #define CHARSET_8_BIT_CONTROL 0x9E /* 0x80..0x9F */ | ||
| 123 | #define CHARSET_8_BIT_GRAPHIC 0x80 /* 0xA0..0xFF */ | ||
| 124 | |||
| 125 | extern int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */ | ||
| 126 | extern int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */ | ||
| 127 | extern int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */ | ||
| 128 | extern int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */ | ||
| 129 | extern int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */ | ||
| 130 | extern int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */ | ||
| 131 | extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */ | ||
| 132 | |||
| 133 | /* Check if CH is an ASCII character or a base leading-code. | ||
| 134 | Nowadays, any byte can be the first byte of a character in a | ||
| 135 | multibyte buffer/string. So this macro name is not appropriate. */ | ||
| 136 | #define CHAR_HEAD_P(ch) ((unsigned char) (ch) < 0xA0) | ||
| 137 | |||
| 138 | /*** GENERAL NOTE on CHARACTER REPRESENTATION *** | ||
| 139 | |||
| 140 | Firstly, the term "character" or "char" is used for a multilingual | ||
| 141 | character (of course, including ASCII characters), not for a byte in | ||
| 142 | computer memory. We use the term "code" or "byte" for the latter | ||
| 143 | case. | ||
| 144 | |||
| 145 | A character is identified by charset and one or two POSITION-CODEs. | ||
| 146 | POSITION-CODE is the position of the character in the charset. A | ||
| 147 | character of DIMENSION1 charset has one POSITION-CODE: POSITION-CODE-1. | ||
| 148 | A character of DIMENSION2 charset has two POSITION-CODE: | ||
| 149 | POSITION-CODE-1 and POSITION-CODE-2. The code range of | ||
| 150 | POSITION-CODE is 0x20..0x7F. | ||
| 151 | |||
| 152 | Emacs has two kinds of representation of a character: multi-byte | ||
| 153 | form (for buffers and strings) and single-word form (for character | ||
| 154 | objects in Emacs Lisp). The latter is called "character code" | ||
| 155 | hereafter. Both representations encode the information of charset | ||
| 156 | and POSITION-CODE but in a different way (for instance, the MSB of | ||
| 157 | POSITION-CODE is set in multi-byte form). | ||
| 158 | |||
| 159 | For details of the multi-byte form, see the section "2. Emacs | ||
| 160 | internal format handlers" of `coding.c'. | ||
| 161 | |||
| 162 | Emacs uses 19 bits for a character code. The bits are divided into | ||
| 163 | 3 fields: FIELD1(5bits):FIELD2(7bits):FIELD3(7bits). | ||
| 164 | |||
| 165 | A character code of DIMENSION1 character uses FIELD2 to hold charset | ||
| 166 | and FIELD3 to hold POSITION-CODE-1. A character code of DIMENSION2 | ||
| 167 | character uses FIELD1 to hold charset, FIELD2 and FIELD3 to hold | ||
| 168 | POSITION-CODE-1 and POSITION-CODE-2 respectively. | ||
| 169 | |||
| 170 | More precisely... | ||
| 171 | |||
| 172 | FIELD2 of DIMENSION1 character (except for ascii, eight-bit-control, | ||
| 173 | and eight-bit-graphic) is "charset - 0x70". This is to make all | ||
| 174 | character codes except for ASCII and 8-bit codes greater than 256. | ||
| 175 | So, the range of FIELD2 of DIMENSION1 character is 0, 1, or | ||
| 176 | 0x11..0x7F. | ||
| 177 | |||
| 178 | FIELD1 of DIMENSION2 character is "charset - 0x8F" for official | ||
| 179 | charset and "charset - 0xE0" for private charset. So, the range of | ||
| 180 | FIELD1 of DIMENSION2 character is 0x01..0x1E. | ||
| 181 | |||
| 182 | ----------------------------------------------------------------------------- | ||
| 183 | charset FIELD1 (5-bit) FIELD2 (7-bit) FIELD3 (7-bit) | ||
| 184 | ----------------------------------------------------------------------------- | ||
| 185 | ascii 0 0 0x00..0x7F | ||
| 186 | eight-bit-control 0 1 0x00..0x1F | ||
| 187 | eight-bit-graphic 0 1 0x20..0x7F | ||
| 188 | DIMENSION1 0 charset - 0x70 POSITION-CODE-1 | ||
| 189 | DIMENSION2(o) charset - 0x8F POSITION-CODE-1 POSITION-CODE-2 | ||
| 190 | DIMENSION2(p) charset - 0xE0 POSITION-CODE-1 POSITION-CODE-2 | ||
| 191 | ----------------------------------------------------------------------------- | ||
| 192 | "(o)": official, "(p)": private | ||
| 193 | ----------------------------------------------------------------------------- | ||
| 194 | */ | ||
| 195 | |||
| 196 | /* Masks of each field of character code. */ | ||
| 197 | #define CHAR_FIELD1_MASK (0x1F << 14) | ||
| 198 | #define CHAR_FIELD2_MASK (0x7F << 7) | ||
| 199 | #define CHAR_FIELD3_MASK 0x7F | ||
| 200 | |||
| 201 | /* Macros to access each field of character C. */ | ||
| 202 | #define CHAR_FIELD1(c) (((c) & CHAR_FIELD1_MASK) >> 14) | ||
| 203 | #define CHAR_FIELD2(c) (((c) & CHAR_FIELD2_MASK) >> 7) | ||
| 204 | #define CHAR_FIELD3(c) ((c) & CHAR_FIELD3_MASK) | ||
| 205 | |||
| 206 | /* Minimum character code of character of each DIMENSION. */ | ||
| 207 | #define MIN_CHAR_OFFICIAL_DIMENSION1 \ | ||
| 208 | ((0x81 - 0x70) << 7) | ||
| 209 | #define MIN_CHAR_PRIVATE_DIMENSION1 \ | ||
| 210 | ((MIN_CHARSET_PRIVATE_DIMENSION1 - 0x70) << 7) | ||
| 211 | #define MIN_CHAR_OFFICIAL_DIMENSION2 \ | ||
| 212 | ((MIN_CHARSET_OFFICIAL_DIMENSION2 - 0x8F) << 14) | ||
| 213 | #define MIN_CHAR_PRIVATE_DIMENSION2 \ | ||
| 214 | ((MIN_CHARSET_PRIVATE_DIMENSION2 - 0xE0) << 14) | ||
| 215 | /* Maximum character code currently used plus 1. */ | ||
| 216 | #define MAX_CHAR (0x1F << 14) | ||
| 217 | |||
| 218 | /* 1 if C is a single byte character, else 0. */ | ||
| 219 | #define SINGLE_BYTE_CHAR_P(c) ((unsigned) (c) < 0x100) | ||
| 220 | |||
| 221 | /* 1 if BYTE is an ASCII character in itself, in multibyte mode. */ | ||
| 222 | #define ASCII_BYTE_P(byte) ((byte) < 0x80) | ||
| 223 | |||
| 224 | /* A char-table containing information on each character set. | ||
| 225 | |||
| 226 | Unlike ordinary char-tables, this doesn't contain any nested tables. | ||
| 227 | Only the top level elements are used. Each element is a vector of | ||
| 228 | the following information: | ||
| 229 | CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION, | ||
| 230 | LEADING-CODE-BASE, LEADING-CODE-EXT, | ||
| 231 | ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE, | ||
| 232 | REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION, | ||
| 233 | PLIST. | ||
| 234 | |||
| 235 | CHARSET-ID (integer) is the identification number of the charset. | ||
| 236 | |||
| 237 | BYTES (integer) is the length of the multi-byte form of a character | ||
| 238 | in the charset: one of 1, 2, 3, and 4. | ||
| 239 | |||
| 240 | DIMENSION (integer) is the number of bytes to represent a character: 1 or 2. | ||
| 241 | |||
| 242 | CHARS (integer) is the number of characters in a dimension: 94 or 96. | ||
| 243 | |||
| 244 | WIDTH (integer) is the number of columns a character in the charset | ||
| 245 | occupies on the screen: one of 0, 1, and 2.. | ||
| 246 | |||
| 247 | DIRECTION (integer) is the rendering direction of characters in the | ||
| 248 | charset when rendering. If 0, render from left to right, else | ||
| 249 | render from right to left. | ||
| 250 | |||
| 251 | LEADING-CODE-BASE (integer) is the base leading-code for the | ||
| 252 | charset. | ||
| 253 | |||
| 254 | LEADING-CODE-EXT (integer) is the extended leading-code for the | ||
| 255 | charset. All charsets of less than 0xA0 have the value 0. | ||
| 256 | |||
| 257 | ISO-FINAL-CHAR (character) is the final character of the | ||
| 258 | corresponding ISO 2022 charset. It is -1 for such a character | ||
| 259 | that is used only internally (e.g. `eight-bit-control'). | ||
| 260 | |||
| 261 | ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked | ||
| 262 | while encoding to variants of ISO 2022 coding system, one of the | ||
| 263 | following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR). It | ||
| 264 | is -1 for such a character that is used only internally | ||
| 265 | (e.g. `eight-bit-control'). | ||
| 266 | |||
| 267 | REVERSE-CHARSET (integer) is the charset which differs only in | ||
| 268 | LEFT-TO-RIGHT value from the charset. If there's no such a | ||
| 269 | charset, the value is -1. | ||
| 270 | |||
| 271 | SHORT-NAME (string) is the short name to refer to the charset. | ||
| 272 | |||
| 273 | LONG-NAME (string) is the long name to refer to the charset. | ||
| 274 | |||
| 275 | DESCRIPTION (string) is the description string of the charset. | ||
| 276 | |||
| 277 | PLIST (property list) may contain any type of information a user | ||
| 278 | wants to put and get by functions `put-charset-property' and | ||
| 279 | `get-charset-property' respectively. */ | ||
| 280 | extern Lisp_Object Vcharset_table; | ||
| 281 | |||
| 282 | /* Macros to access various information of CHARSET in Vcharset_table. | ||
| 283 | We provide these macros for efficiency. No range check of CHARSET. */ | ||
| 284 | |||
| 285 | /* Return entry of CHARSET (C integer) in Vcharset_table. */ | ||
| 286 | #define CHARSET_TABLE_ENTRY(charset) \ | ||
| 287 | XCHAR_TABLE (Vcharset_table)->contents[((charset) == CHARSET_ASCII \ | ||
| 288 | ? 0 : (charset) + 128)] | ||
| 289 | |||
| 290 | /* Return information INFO-IDX of CHARSET. */ | ||
| 291 | #define CHARSET_TABLE_INFO(charset, info_idx) \ | ||
| 292 | XVECTOR (CHARSET_TABLE_ENTRY (charset))->contents[info_idx] | ||
| 293 | |||
| 294 | #define CHARSET_ID_IDX (0) | ||
| 295 | #define CHARSET_BYTES_IDX (1) | ||
| 296 | #define CHARSET_DIMENSION_IDX (2) | ||
| 297 | #define CHARSET_CHARS_IDX (3) | ||
| 298 | #define CHARSET_WIDTH_IDX (4) | ||
| 299 | #define CHARSET_DIRECTION_IDX (5) | ||
| 300 | #define CHARSET_LEADING_CODE_BASE_IDX (6) | ||
| 301 | #define CHARSET_LEADING_CODE_EXT_IDX (7) | ||
| 302 | #define CHARSET_ISO_FINAL_CHAR_IDX (8) | ||
| 303 | #define CHARSET_ISO_GRAPHIC_PLANE_IDX (9) | ||
| 304 | #define CHARSET_REVERSE_CHARSET_IDX (10) | ||
| 305 | #define CHARSET_SHORT_NAME_IDX (11) | ||
| 306 | #define CHARSET_LONG_NAME_IDX (12) | ||
| 307 | #define CHARSET_DESCRIPTION_IDX (13) | ||
| 308 | #define CHARSET_PLIST_IDX (14) | ||
| 309 | /* Size of a vector of each entry of Vcharset_table. */ | ||
| 310 | #define CHARSET_MAX_IDX (15) | ||
| 311 | |||
| 312 | /* And several more macros to be used frequently. */ | ||
| 313 | #define CHARSET_BYTES(charset) \ | ||
| 314 | XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX)) | ||
| 315 | #define CHARSET_DIMENSION(charset) \ | ||
| 316 | XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX)) | ||
| 317 | #define CHARSET_CHARS(charset) \ | ||
| 318 | XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX)) | ||
| 319 | #define CHARSET_WIDTH(charset) \ | ||
| 320 | XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX)) | ||
| 321 | #define CHARSET_DIRECTION(charset) \ | ||
| 322 | XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX)) | ||
| 323 | #define CHARSET_LEADING_CODE_BASE(charset) \ | ||
| 324 | XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)) | ||
| 325 | #define CHARSET_LEADING_CODE_EXT(charset) \ | ||
| 326 | XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)) | ||
| 327 | #define CHARSET_ISO_FINAL_CHAR(charset) \ | ||
| 328 | XINT (CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX)) | ||
| 329 | #define CHARSET_ISO_GRAPHIC_PLANE(charset) \ | ||
| 330 | XINT (CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)) | ||
| 331 | #define CHARSET_REVERSE_CHARSET(charset) \ | ||
| 332 | XINT (CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)) | ||
| 333 | |||
| 334 | /* Macros to specify direction of a charset. */ | ||
| 335 | #define CHARSET_DIRECTION_LEFT_TO_RIGHT 0 | ||
| 336 | #define CHARSET_DIRECTION_RIGHT_TO_LEFT 1 | ||
| 337 | |||
| 338 | /* A vector of charset symbol indexed by charset-id. This is used | ||
| 339 | only for returning charset symbol from C functions. */ | ||
| 340 | extern Lisp_Object Vcharset_symbol_table; | ||
| 341 | |||
| 342 | /* Return symbol of CHARSET. */ | ||
| 343 | #define CHARSET_SYMBOL(charset) \ | ||
| 344 | XVECTOR (Vcharset_symbol_table)->contents[charset] | ||
| 345 | |||
| 346 | /* 1 if CHARSET is in valid value range, else 0. */ | ||
| 347 | #define CHARSET_VALID_P(charset) \ | ||
| 348 | ((charset) == 0 \ | ||
| 349 | || ((charset) > 0x80 && (charset) <= MAX_CHARSET_OFFICIAL_DIMENSION2) \ | ||
| 350 | || ((charset) >= MIN_CHARSET_PRIVATE_DIMENSION1 \ | ||
| 351 | && (charset) <= MAX_CHARSET) \ | ||
| 352 | || ((charset) == CHARSET_8_BIT_CONTROL) \ | ||
| 353 | || ((charset) == CHARSET_8_BIT_GRAPHIC)) | ||
| 354 | |||
| 355 | /* 1 if CHARSET is already defined, else 0. */ | ||
| 356 | #define CHARSET_DEFINED_P(charset) \ | ||
| 357 | (((charset) >= 0) && ((charset) <= MAX_CHARSET) \ | ||
| 358 | && !NILP (CHARSET_TABLE_ENTRY (charset))) | ||
| 359 | |||
| 360 | /* Since the information CHARSET-BYTES and CHARSET-WIDTH of | ||
| 361 | Vcharset_table can be retrieved only by the first byte of | ||
| 362 | multi-byte form (an ASCII code or a base leading-code), we provide | ||
| 363 | here tables to be used by macros BYTES_BY_CHAR_HEAD and | ||
| 364 | WIDTH_BY_CHAR_HEAD for faster information retrieval. */ | ||
| 365 | extern int bytes_by_char_head[256]; | ||
| 366 | extern int width_by_char_head[256]; | ||
| 367 | |||
| 368 | #define BYTES_BY_CHAR_HEAD(char_head) \ | ||
| 369 | (ASCII_BYTE_P (char_head) ? 1 : bytes_by_char_head[char_head]) | ||
| 370 | #define WIDTH_BY_CHAR_HEAD(char_head) \ | ||
| 371 | (ASCII_BYTE_P (char_head) ? 1 : width_by_char_head[char_head]) | ||
| 372 | |||
| 373 | /* Charset of the character C. */ | ||
| 374 | #define CHAR_CHARSET(c) \ | ||
| 375 | (SINGLE_BYTE_CHAR_P (c) \ | ||
| 376 | ? (ASCII_BYTE_P (c) \ | ||
| 377 | ? CHARSET_ASCII \ | ||
| 378 | : (c) < 0xA0 ? CHARSET_8_BIT_CONTROL : CHARSET_8_BIT_GRAPHIC) \ | ||
| 379 | : ((c) < MIN_CHAR_OFFICIAL_DIMENSION2 \ | ||
| 380 | ? CHAR_FIELD2 (c) + 0x70 \ | ||
| 381 | : ((c) < MIN_CHAR_PRIVATE_DIMENSION2 \ | ||
| 382 | ? CHAR_FIELD1 (c) + 0x8F \ | ||
| 383 | : CHAR_FIELD1 (c) + 0xE0))) | ||
| 384 | 57 | ||
| 385 | /* Check if two characters C1 and C2 belong to the same charset. */ | 58 | /* Name of the charset (symbol). */ |
| 386 | #define SAME_CHARSET_P(c1, c2) \ | 59 | charset_name, |
| 387 | (c1 < MIN_CHAR_OFFICIAL_DIMENSION2 \ | 60 | |
| 388 | ? (c1 & CHAR_FIELD2_MASK) == (c2 & CHAR_FIELD2_MASK) \ | 61 | /* Property list of the charset. */ |
| 389 | : (c1 & CHAR_FIELD1_MASK) == (c2 & CHAR_FIELD1_MASK)) | 62 | charset_plist, |
| 390 | 63 | ||
| 391 | /* Return a character of which charset is CHARSET and position-codes | 64 | /* If the method of the charset is `MAP_DEFERRED', the value is a |
| 392 | are C1 and C2. DIMENSION1 character ignores C2. */ | 65 | mappint vector or a file name that contains mapping vector. |
| 393 | #define MAKE_CHAR(charset, c1, c2) \ | 66 | Otherwise, nil. */ |
| 394 | ((charset) == CHARSET_ASCII \ | 67 | charset_map, |
| 395 | ? (c1) & 0x7F \ | 68 | |
| 396 | : (((charset) == CHARSET_8_BIT_CONTROL \ | 69 | /* If the method of the charset is `MAP', the value is a vector |
| 397 | || (charset) == CHARSET_8_BIT_GRAPHIC) \ | 70 | that maps code points of the charset to characters. The vector |
| 398 | ? ((c1) & 0x7F) | 0x80 \ | 71 | is indexed by a character index. A character index is |
| 399 | : ((CHARSET_DEFINED_P (charset) \ | 72 | calculated from a code point and the code-space table of the |
| 400 | ? CHARSET_DIMENSION (charset) == 1 \ | 73 | charset. */ |
| 401 | : (charset) < MIN_CHARSET_PRIVATE_DIMENSION2) \ | 74 | charset_decoder, |
| 402 | ? (((charset) - 0x70) << 7) | ((c1) <= 0 ? 0 : ((c1) & 0x7F)) \ | 75 | |
| 403 | : ((((charset) \ | 76 | /* If the method of the charset is `MAP', the value is a |
| 404 | - ((charset) < MIN_CHARSET_PRIVATE_DIMENSION2 ? 0x8F : 0xE0)) \ | 77 | char-table that maps characters of the charset to code |
| 405 | << 14) \ | 78 | points. */ |
| 406 | | ((c2) <= 0 ? 0 : ((c2) & 0x7F)) \ | 79 | charset_encoder, |
| 407 | | ((c1) <= 0 ? 0 : (((c1) & 0x7F) << 7)))))) | 80 | |
| 408 | 81 | /* If the method of the charset is `INHERIT', the value is a list | |
| 409 | 82 | of the form (PARENT-CHARSET-ID . CODE-OFFSET). */ | |
| 410 | /* If GENERICP is nonzero, return nonzero iff C is a valid normal or | 83 | charset_parents, |
| 411 | generic character. If GENERICP is zero, return nonzero iff C is a | 84 | |
| 412 | valid normal character. */ | 85 | /* */ |
| 413 | #define CHAR_VALID_P(c, genericp) \ | 86 | charset_unify_map, |
| 414 | ((c) >= 0 \ | 87 | |
| 415 | && (SINGLE_BYTE_CHAR_P (c) || char_valid_p (c, genericp))) | 88 | /* */ |
| 416 | 89 | charset_deunifier, | |
| 417 | /* This default value is used when nonascii-translation-table or | 90 | |
| 418 | nonascii-insert-offset fail to convert unibyte character to a valid | 91 | /* The length of charset attribute vector. */ |
| 419 | multibyte character. This makes a Latin-1 character. */ | 92 | charset_attr_max |
| 420 | 93 | }; | |
| 421 | #define DEFAULT_NONASCII_INSERT_OFFSET 0x800 | 94 | |
| 422 | 95 | /* Methods for converting code points and characters of charsets. */ | |
| 423 | /* Parse multibyte string STR of length LENGTH and set BYTES to the | 96 | |
| 424 | byte length of a character at STR. */ | 97 | enum charset_method |
| 425 | 98 | { | |
| 426 | #ifdef BYTE_COMBINING_DEBUG | 99 | /* For a charset of this method, a character code is calculated |
| 427 | 100 | from a character index (which is calculated from a code point) | |
| 428 | #define PARSE_MULTIBYTE_SEQ(str, length, bytes) \ | 101 | simply by adding an offset value. */ |
| 102 | CHARSET_METHOD_OFFSET, | ||
| 103 | |||
| 104 | /* For a charset of this method, a decoder vector and an encoder | ||
| 105 | char-table is used for code point <-> character code | ||
| 106 | conversion. */ | ||
| 107 | CHARSET_METHOD_MAP, | ||
| 108 | |||
| 109 | /* Same as above but decoder and encoder are loaded from a file on | ||
| 110 | demand. Once loaded, the method is changed to | ||
| 111 | CHARSET_METHOD_MAP. */ | ||
| 112 | CHARSET_METHOD_MAP_DEFERRED, | ||
| 113 | |||
| 114 | /* A charset of this method inherits characters from the other | ||
| 115 | charsets. */ | ||
| 116 | CHARSET_METHOD_INHERIT, | ||
| 117 | }; | ||
| 118 | |||
| 119 | struct charset | ||
| 120 | { | ||
| 121 | int id; | ||
| 122 | |||
| 123 | int hash_index; | ||
| 124 | |||
| 125 | /* Dimension of the charset: 1, 2, 3, or 4. */ | ||
| 126 | int dimension; | ||
| 127 | |||
| 128 | /* Minimum byte code in each dimension. */ | ||
| 129 | int code_space[16]; | ||
| 130 | |||
| 131 | /* 1 if there's no gap in code-points. */ | ||
| 132 | int code_linear_p; | ||
| 133 | |||
| 134 | /* If the charset is treated as 94-chars in ISO-2022, the value is 0. | ||
| 135 | If the charset is treated as 96-chars in ISO-2022, the value is 1. */ | ||
| 136 | int iso_chars_96; | ||
| 137 | |||
| 138 | /* ISO final character code for the charset: 48..127. | ||
| 139 | It may be 0 if the charset doesn't conform to ISO-2022. */ | ||
| 140 | int iso_final; | ||
| 141 | |||
| 142 | int iso_revision; | ||
| 143 | |||
| 144 | /* If the charset is identical to what supported by Emacs 21 and the | ||
| 145 | priors, the identification number of the charset used in those | ||
| 146 | version. Otherwise, -1. */ | ||
| 147 | int emacs_mule_id; | ||
| 148 | |||
| 149 | /* Nonzero iff the charset is compatible with ASCII. */ | ||
| 150 | int ascii_compatible_p; | ||
| 151 | |||
| 152 | /* Nonzero iff the charset is supplementary. */ | ||
| 153 | int supplementary_p; | ||
| 154 | |||
| 155 | /* Nonzero iff all the code points are representable by Lisp_Int. */ | ||
| 156 | int compact_codes_p; | ||
| 157 | |||
| 158 | /* The method for encoding/decoding characters of the charset. */ | ||
| 159 | enum charset_method method; | ||
| 160 | |||
| 161 | /* Mininum and Maximum code points of the charset. */ | ||
| 162 | unsigned min_code, max_code; | ||
| 163 | |||
| 164 | /* Mininum and Maximum character codes of the charset. If the | ||
| 165 | charset is compatible with ASCII, min_char is a minimum non-ASCII | ||
| 166 | character of the charset. */ | ||
| 167 | int min_char, max_char; | ||
| 168 | |||
| 169 | /* The code returned by ENCODE_CHAR if a character is not encodable | ||
| 170 | by the charset. */ | ||
| 171 | unsigned invalid_code; | ||
| 172 | |||
| 173 | /* If the method of the charset is CHARSET_METHOD_MAP, this is a | ||
| 174 | table of bits used to quickly and roughly guess if a character | ||
| 175 | belongs to the charset. | ||
| 176 | |||
| 177 | The first 64 elements are 512 bits for characters less than | ||
| 178 | 0x10000. Each bit corresponds to 128-character block. The last | ||
| 179 | 126 elements are 1008 bits for the greater characters | ||
| 180 | (0x10000..0x3FFFFF). Each bit corresponds to 4096-character | ||
| 181 | block. | ||
| 182 | |||
| 183 | If a bit is 1, at least one character in the corresponds block is | ||
| 184 | in this charset. */ | ||
| 185 | unsigned char fast_map[190]; | ||
| 186 | |||
| 187 | /* Offset value to calculate a character code from code-point, and | ||
| 188 | visa versa. */ | ||
| 189 | int code_offset; | ||
| 190 | |||
| 191 | int unified_p; | ||
| 192 | }; | ||
| 193 | |||
| 194 | /* Hash table of charset symbols vs. the correponding attribute | ||
| 195 | vectors. */ | ||
| 196 | extern Lisp_Object Vcharset_hash_table; | ||
| 197 | |||
| 198 | /* Table of struct charset. */ | ||
| 199 | extern struct charset *charset_table; | ||
| 200 | extern int charset_table_used; | ||
| 201 | |||
| 202 | #define CHARSET_FROM_ID(id) (charset_table + (id)) | ||
| 203 | |||
| 204 | extern Lisp_Object Vcharset_list; | ||
| 205 | extern Lisp_Object Viso_2022_charset_list; | ||
| 206 | extern Lisp_Object Vemacs_mule_charset_list; | ||
| 207 | |||
| 208 | extern struct charset *emacs_mule_charset[256]; | ||
| 209 | |||
| 210 | |||
| 211 | /* Macros to access information about charset. */ | ||
| 212 | |||
| 213 | /* Return the attribute vector of charset whose symbol is SYMBOL. */ | ||
| 214 | #define CHARSET_SYMBOL_ATTRIBUTES(symbol) \ | ||
| 215 | Fgethash ((symbol), Vcharset_hash_table, Qnil) | ||
| 216 | |||
| 217 | #define CHARSET_ATTR_ID(attrs) AREF ((attrs), charset_id) | ||
| 218 | #define CHARSET_ATTR_NAME(attrs) AREF ((attrs), charset_name) | ||
| 219 | #define CHARSET_ATTR_PLIST(attrs) AREF ((attrs), charset_plist) | ||
| 220 | #define CHARSET_ATTR_MAP(attrs) AREF ((attrs), charset_map) | ||
| 221 | #define CHARSET_ATTR_DECODER(attrs) AREF ((attrs), charset_decoder) | ||
| 222 | #define CHARSET_ATTR_ENCODER(attrs) AREF ((attrs), charset_encoder) | ||
| 223 | #define CHARSET_ATTR_PARENTS(attrs) AREF ((attrs), charset_parents) | ||
| 224 | #define CHARSET_ATTR_UNIFY_MAP(attrs) AREF ((attrs), charset_unify_map) | ||
| 225 | #define CHARSET_ATTR_DEUNIFIER(attrs) AREF ((attrs), charset_deunifier) | ||
| 226 | |||
| 227 | #define CHARSET_SYMBOL_ID(symbol) \ | ||
| 228 | CHARSET_ATTR_ID (CHARSET_SYMBOL_ATTRIBUTES (symbol)) | ||
| 229 | |||
| 230 | /* Return an index to Vcharset_hash_table of the charset whose symbol | ||
| 231 | is SYMBOL. */ | ||
| 232 | #define CHARSET_SYMBOL_HASH_INDEX(symbol) \ | ||
| 233 | hash_lookup (XHASH_TABLE (Vcharset_hash_table), symbol, NULL) | ||
| 234 | |||
| 235 | /* Return the attribute vector of CHARSET. */ | ||
| 236 | #define CHARSET_ATTRIBUTES(charset) \ | ||
| 237 | (HASH_VALUE (XHASH_TABLE (Vcharset_hash_table), (charset)->hash_index)) | ||
| 238 | |||
| 239 | #define CHARSET_ID(charset) ((charset)->id) | ||
| 240 | #define CHARSET_HASH_INDEX(charset) ((charset)->hash_index) | ||
| 241 | #define CHARSET_DIMENSION(charset) ((charset)->dimension) | ||
| 242 | #define CHARSET_CODE_SPACE(charset) ((charset)->code_space) | ||
| 243 | #define CHARSET_CODE_LINEAR_P(charset) ((charset)->code_linear_p) | ||
| 244 | #define CHARSET_ISO_CHARS_96(charset) ((charset)->iso_chars_96) | ||
| 245 | #define CHARSET_ISO_FINAL(charset) ((charset)->iso_final) | ||
| 246 | #define CHARSET_ISO_PLANE(charset) ((charset)->iso_plane) | ||
| 247 | #define CHARSET_ISO_REVISION(charset) ((charset)->iso_revision) | ||
| 248 | #define CHARSET_EMACS_MULE_ID(charset) ((charset)->emacs_mule_id) | ||
| 249 | #define CHARSET_ASCII_COMPATIBLE_P(charset) ((charset)->ascii_compatible_p) | ||
| 250 | #define CHARSET_COMPACT_CODES_P(charset) ((charset)->compact_codes_p) | ||
| 251 | #define CHARSET_METHOD(charset) ((charset)->method) | ||
| 252 | #define CHARSET_MIN_CODE(charset) ((charset)->min_code) | ||
| 253 | #define CHARSET_MAX_CODE(charset) ((charset)->max_code) | ||
| 254 | #define CHARSET_INVALID_CODE(charset) ((charset)->invalid_code) | ||
| 255 | #define CHARSET_MIN_CHAR(charset) ((charset)->min_char) | ||
| 256 | #define CHARSET_MAX_CHAR(charset) ((charset)->max_char) | ||
| 257 | #define CHARSET_CODE_OFFSET(charset) ((charset)->code_offset) | ||
| 258 | #define CHARSET_UNIFIED_P(charset) ((charset)->unified_p) | ||
| 259 | |||
| 260 | #define CHARSET_NAME(charset) \ | ||
| 261 | (CHARSET_ATTR_NAME (CHARSET_ATTRIBUTES (charset))) | ||
| 262 | #define CHARSET_MAP(charset) \ | ||
| 263 | (CHARSET_ATTR_MAP (CHARSET_ATTRIBUTES (charset))) | ||
| 264 | #define CHARSET_DECODER(charset) \ | ||
| 265 | (CHARSET_ATTR_DECODER (CHARSET_ATTRIBUTES (charset))) | ||
| 266 | #define CHARSET_ENCODER(charset) \ | ||
| 267 | (CHARSET_ATTR_ENCODER (CHARSET_ATTRIBUTES (charset))) | ||
| 268 | #define CHARSET_PARENTS(charset) \ | ||
| 269 | (CHARSET_ATTR_PARENTS (CHARSET_ATTRIBUTES (charset))) | ||
| 270 | #define CHARSET_UNIFY_MAP(charset) \ | ||
| 271 | (CHARSET_ATTR_UNIFY_MAP (CHARSET_ATTRIBUTES (charset))) | ||
| 272 | #define CHARSET_DEUNIFIER(charset) \ | ||
| 273 | (CHARSET_ATTR_DEUNIFIER (CHARSET_ATTRIBUTES (charset))) | ||
| 274 | |||
| 275 | |||
| 276 | /* Nonzero iff OBJ is a valid charset symbol. */ | ||
| 277 | #define CHARSETP(obj) (CHARSET_SYMBOL_HASH_INDEX (obj) >= 0) | ||
| 278 | |||
| 279 | /* Check if X is a valid charset symbol. If not, signal an error. */ | ||
| 280 | #define CHECK_CHARSET(x) \ | ||
| 429 | do { \ | 281 | do { \ |
| 430 | int i = 1; \ | 282 | if (! SYMBOLP (x) || CHARSET_SYMBOL_HASH_INDEX (x) < 0) \ |
| 431 | while (i < (length) && ! CHAR_HEAD_P ((str)[i])) i++; \ | 283 | x = wrong_type_argument (Qcharsetp, (x)); \ |
| 432 | (bytes) = BYTES_BY_CHAR_HEAD ((str)[0]); \ | ||
| 433 | if ((bytes) > i) \ | ||
| 434 | abort (); \ | ||
| 435 | } while (0) | 284 | } while (0) |
| 436 | 285 | ||
| 437 | #else /* not BYTE_COMBINING_DEBUG */ | ||
| 438 | |||
| 439 | #define PARSE_MULTIBYTE_SEQ(str, length, bytes) \ | ||
| 440 | (bytes) = BYTES_BY_CHAR_HEAD ((str)[0]) | ||
| 441 | |||
| 442 | #endif /* not BYTE_COMBINING_DEBUG */ | ||
| 443 | |||
| 444 | /* Return 1 iff the byte sequence at unibyte string STR (LENGTH bytes) | ||
| 445 | is valid as a multibyte form. If valid, by a side effect, BYTES is | ||
| 446 | set to the byte length of the multibyte form. */ | ||
| 447 | |||
| 448 | #define UNIBYTE_STR_AS_MULTIBYTE_P(str, length, bytes) \ | ||
| 449 | (((str)[0] < 0x80 || (str)[0] >= 0xA0) \ | ||
| 450 | ? ((bytes) = 1) \ | ||
| 451 | : (((bytes) = BYTES_BY_CHAR_HEAD ((str)[0])), \ | ||
| 452 | ((bytes) > 1 && (bytes) <= (length) \ | ||
| 453 | && (str)[0] != LEADING_CODE_8_BIT_CONTROL \ | ||
| 454 | && !CHAR_HEAD_P ((str)[1]) \ | ||
| 455 | && ((bytes) == 2 \ | ||
| 456 | || (!CHAR_HEAD_P ((str)[2]) \ | ||
| 457 | && ((bytes) == 3 \ | ||
| 458 | || !CHAR_HEAD_P ((str)[3]))))))) | ||
| 459 | |||
| 460 | /* Return 1 iff the byte sequence at multibyte string STR is valid as | ||
| 461 | a unibyte form. By a side effect, BYTES is set to the byte length | ||
| 462 | of one character at STR. */ | ||
| 463 | |||
| 464 | #define MULTIBYTE_STR_AS_UNIBYTE_P(str, bytes) \ | ||
| 465 | ((bytes) = BYTES_BY_CHAR_HEAD ((str)[0]), \ | ||
| 466 | (str)[0] != LEADING_CODE_8_BIT_CONTROL) | ||
| 467 | |||
| 468 | /* The charset of character C is stored in CHARSET, and the | ||
| 469 | position-codes of C are stored in C1 and C2. | ||
| 470 | We store -1 in C2 if the dimension of the charset is 1. */ | ||
| 471 | |||
| 472 | #define SPLIT_CHAR(c, charset, c1, c2) \ | ||
| 473 | (SINGLE_BYTE_CHAR_P (c) \ | ||
| 474 | ? ((charset \ | ||
| 475 | = (ASCII_BYTE_P (c) \ | ||
| 476 | ? CHARSET_ASCII \ | ||
| 477 | : ((c) < 0xA0 ? CHARSET_8_BIT_CONTROL : CHARSET_8_BIT_GRAPHIC))), \ | ||
| 478 | c1 = (c), c2 = -1) \ | ||
| 479 | : ((c) & CHAR_FIELD1_MASK \ | ||
| 480 | ? (charset = (CHAR_FIELD1 (c) \ | ||
| 481 | + ((c) < MIN_CHAR_PRIVATE_DIMENSION2 ? 0x8F : 0xE0)), \ | ||
| 482 | c1 = CHAR_FIELD2 (c), \ | ||
| 483 | c2 = CHAR_FIELD3 (c)) \ | ||
| 484 | : (charset = CHAR_FIELD2 (c) + 0x70, \ | ||
| 485 | c1 = CHAR_FIELD3 (c), \ | ||
| 486 | c2 = -1))) | ||
| 487 | |||
| 488 | /* Return 1 iff character C has valid printable glyph. */ | ||
| 489 | #define CHAR_PRINTABLE_P(c) (ASCII_BYTE_P (c) || char_printable_p (c)) | ||
| 490 | |||
| 491 | /* The charset of the character at STR is stored in CHARSET, and the | ||
| 492 | position-codes are stored in C1 and C2. | ||
| 493 | We store -1 in C2 if the character is just 2 bytes. */ | ||
| 494 | |||
| 495 | #define SPLIT_STRING(str, len, charset, c1, c2) \ | ||
| 496 | ((BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) < 2 \ | ||
| 497 | || BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) > len \ | ||
| 498 | || split_string (str, len, &charset, &c1, &c2) < 0) \ | ||
| 499 | ? c1 = *(str), charset = CHARSET_ASCII \ | ||
| 500 | : charset) | ||
| 501 | 286 | ||
| 502 | /* Mapping table from ISO2022's charset (specified by DIMENSION, | 287 | /* Check if X is a valid charset symbol. If valid, set ID to the id |
| 503 | CHARS, and FINAL_CHAR) to Emacs' charset. Should be accessed by | 288 | number of the charset. Otherwise, signal an error. */ |
| 504 | macro ISO_CHARSET_TABLE (DIMENSION, CHARS, FINAL_CHAR). */ | 289 | #define CHECK_CHARSET_GET_ID(x, id) \ |
| 505 | extern int iso_charset_table[2][2][128]; | 290 | do { \ |
| 506 | 291 | int idx; \ | |
| 507 | #define ISO_CHARSET_TABLE(dimension, chars, final_char) \ | 292 | \ |
| 508 | iso_charset_table[XINT (dimension) - 1][XINT (chars) > 94][XINT (final_char)] | 293 | if (! SYMBOLP (x) || (idx = CHARSET_SYMBOL_HASH_INDEX (x)) < 0) \ |
| 509 | 294 | x = wrong_type_argument (Qcharsetp, (x)); \ | |
| 510 | #define BASE_LEADING_CODE_P(c) (BYTES_BY_CHAR_HEAD ((unsigned char) (c)) > 1) | 295 | id = AREF (HASH_VALUE (XHASH_TABLE (Vcharset_hash_table), idx), \ |
| 511 | 296 | charset_id); \ | |
| 512 | /* Return how many bytes C will occupy in a multibyte buffer. */ | ||
| 513 | #define CHAR_BYTES(c) \ | ||
| 514 | (SINGLE_BYTE_CHAR_P (c) \ | ||
| 515 | ? ((ASCII_BYTE_P (c) || (c) >= 0xA0) ? 1 : 2) \ | ||
| 516 | : char_bytes (c)) | ||
| 517 | |||
| 518 | /* The following two macros CHAR_STRING and STRING_CHAR are the main | ||
| 519 | entry points to convert between Emacs's two types of character | ||
| 520 | representations: multi-byte form and single-word form (character | ||
| 521 | code). */ | ||
| 522 | |||
| 523 | /* Store multi-byte form of the character C in STR. The caller should | ||
| 524 | allocate at least MAX_MULTIBYTE_LENGTH bytes area at STR in | ||
| 525 | advance. Returns the length of the multi-byte form. If C is an | ||
| 526 | invalid character code, signal an error. */ | ||
| 527 | |||
| 528 | #define CHAR_STRING(c, str) \ | ||
| 529 | (SINGLE_BYTE_CHAR_P (c) \ | ||
| 530 | ? ((ASCII_BYTE_P (c) || c >= 0xA0) \ | ||
| 531 | ? (*(str) = (unsigned char)(c), 1) \ | ||
| 532 | : (*(str) = LEADING_CODE_8_BIT_CONTROL, *((str)+ 1) = c + 0x20, 2)) \ | ||
| 533 | : char_to_string (c, (unsigned char *) str)) | ||
| 534 | |||
| 535 | /* Like CHAR_STRING but don't signal an error if C is invalid. | ||
| 536 | Value is -1 in this case. */ | ||
| 537 | |||
| 538 | #define CHAR_STRING_NO_SIGNAL(c, str) \ | ||
| 539 | (SINGLE_BYTE_CHAR_P (c) \ | ||
| 540 | ? ((ASCII_BYTE_P (c) || c >= 0xA0) \ | ||
| 541 | ? (*(str) = (unsigned char)(c), 1) \ | ||
| 542 | : (*(str) = LEADING_CODE_8_BIT_CONTROL, *((str)+ 1) = c + 0x20, 2)) \ | ||
| 543 | : char_to_string_1 (c, (unsigned char *) str)) | ||
| 544 | |||
| 545 | /* Return a character code of the character of which multi-byte form | ||
| 546 | is at STR and the length is LEN. If STR doesn't contain valid | ||
| 547 | multi-byte form, only the first byte in STR is returned. */ | ||
| 548 | |||
| 549 | #define STRING_CHAR(str, len) \ | ||
| 550 | (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \ | ||
| 551 | ? (unsigned char) *(str) \ | ||
| 552 | : string_to_char (str, len, 0)) | ||
| 553 | |||
| 554 | /* This is like STRING_CHAR but the third arg ACTUAL_LEN is set to the | ||
| 555 | length of the multi-byte form. Just to know the length, use | ||
| 556 | MULTIBYTE_FORM_LENGTH. */ | ||
| 557 | |||
| 558 | #define STRING_CHAR_AND_LENGTH(str, len, actual_len) \ | ||
| 559 | (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \ | ||
| 560 | ? ((actual_len) = 1), (unsigned char) *(str) \ | ||
| 561 | : string_to_char (str, len, &(actual_len))) | ||
| 562 | |||
| 563 | /* Fetch the "next" character from Lisp string STRING at byte position | ||
| 564 | BYTEIDX, character position CHARIDX. Store it into OUTPUT. | ||
| 565 | |||
| 566 | All the args must be side-effect-free. | ||
| 567 | BYTEIDX and CHARIDX must be lvalues; | ||
| 568 | we increment them past the character fetched. */ | ||
| 569 | |||
| 570 | #define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \ | ||
| 571 | if (1) \ | ||
| 572 | { \ | ||
| 573 | CHARIDX++; \ | ||
| 574 | if (STRING_MULTIBYTE (STRING)) \ | ||
| 575 | { \ | ||
| 576 | unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \ | ||
| 577 | int space_left = XSTRING (STRING)->size_byte - BYTEIDX; \ | ||
| 578 | int actual_len; \ | ||
| 579 | \ | ||
| 580 | OUTPUT = STRING_CHAR_AND_LENGTH (ptr, space_left, actual_len); \ | ||
| 581 | BYTEIDX += actual_len; \ | ||
| 582 | } \ | ||
| 583 | else \ | ||
| 584 | OUTPUT = XSTRING (STRING)->data[BYTEIDX++]; \ | ||
| 585 | } \ | ||
| 586 | else | ||
| 587 | |||
| 588 | /* Like FETCH_STRING_CHAR_ADVANCE but assume STRING is multibyte. */ | ||
| 589 | |||
| 590 | #define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \ | ||
| 591 | if (1) \ | ||
| 592 | { \ | ||
| 593 | unsigned char *fetch_string_char_ptr = &XSTRING (STRING)->data[BYTEIDX]; \ | ||
| 594 | int fetch_string_char_space_left = XSTRING (STRING)->size_byte - BYTEIDX; \ | ||
| 595 | int actual_len; \ | ||
| 596 | \ | ||
| 597 | OUTPUT \ | ||
| 598 | = STRING_CHAR_AND_LENGTH (fetch_string_char_ptr, \ | ||
| 599 | fetch_string_char_space_left, actual_len); \ | ||
| 600 | \ | ||
| 601 | BYTEIDX += actual_len; \ | ||
| 602 | CHARIDX++; \ | ||
| 603 | } \ | ||
| 604 | else | ||
| 605 | |||
| 606 | /* Like FETCH_STRING_CHAR_ADVANCE but fetch character from the current | ||
| 607 | buffer. */ | ||
| 608 | |||
| 609 | #define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX) \ | ||
| 610 | if (1) \ | ||
| 611 | { \ | ||
| 612 | CHARIDX++; \ | ||
| 613 | if (!NILP (current_buffer->enable_multibyte_characters)) \ | ||
| 614 | { \ | ||
| 615 | unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \ | ||
| 616 | int space_left = ((CHARIDX < GPT ? GPT_BYTE : Z_BYTE) - BYTEIDX); \ | ||
| 617 | int actual_len; \ | ||
| 618 | \ | ||
| 619 | OUTPUT= STRING_CHAR_AND_LENGTH (ptr, space_left, actual_len); \ | ||
| 620 | BYTEIDX += actual_len; \ | ||
| 621 | } \ | ||
| 622 | else \ | ||
| 623 | { \ | ||
| 624 | OUTPUT = *(BYTE_POS_ADDR (BYTEIDX)); \ | ||
| 625 | BYTEIDX++; \ | ||
| 626 | } \ | ||
| 627 | } \ | ||
| 628 | else | ||
| 629 | |||
| 630 | /* Return the length of the multi-byte form at string STR of length LEN. */ | ||
| 631 | |||
| 632 | #define MULTIBYTE_FORM_LENGTH(str, len) \ | ||
| 633 | (BYTES_BY_CHAR_HEAD (*(unsigned char *)(str)) == 1 \ | ||
| 634 | ? 1 \ | ||
| 635 | : multibyte_form_length (str, len)) | ||
| 636 | |||
| 637 | #ifdef emacs | ||
| 638 | |||
| 639 | /* Increase the buffer byte position POS_BYTE of the current buffer to | ||
| 640 | the next character boundary. This macro relies on the fact that | ||
| 641 | *GPT_ADDR and *Z_ADDR are always accessible and the values are | ||
| 642 | '\0'. No range checking of POS. */ | ||
| 643 | |||
| 644 | #ifdef BYTE_COMBINING_DEBUG | ||
| 645 | |||
| 646 | #define INC_POS(pos_byte) \ | ||
| 647 | do { \ | ||
| 648 | unsigned char *p = BYTE_POS_ADDR (pos_byte); \ | ||
| 649 | if (BASE_LEADING_CODE_P (*p)) \ | ||
| 650 | { \ | ||
| 651 | int len, bytes; \ | ||
| 652 | len = Z_BYTE - pos_byte; \ | ||
| 653 | PARSE_MULTIBYTE_SEQ (p, len, bytes); \ | ||
| 654 | pos_byte += bytes; \ | ||
| 655 | } \ | ||
| 656 | else \ | ||
| 657 | pos_byte++; \ | ||
| 658 | } while (0) | 297 | } while (0) |
| 659 | 298 | ||
| 660 | #else /* not BYTE_COMBINING_DEBUG */ | ||
| 661 | 299 | ||
| 662 | #define INC_POS(pos_byte) \ | 300 | /* Check if X is a valid charset symbol. If valid, set ATTR to the |
| 663 | do { \ | 301 | attr vector of the charset. Otherwise, signal an error. */ |
| 664 | unsigned char *p = BYTE_POS_ADDR (pos_byte); \ | 302 | #define CHECK_CHARSET_GET_ATTR(x, attr) \ |
| 665 | pos_byte += BYTES_BY_CHAR_HEAD (*p); \ | 303 | do { \ |
| 304 | if (!SYMBOLP (x) || NILP (attr = CHARSET_SYMBOL_ATTRIBUTES (x))) \ | ||
| 305 | x = wrong_type_argument (Qcharsetp, (x)); \ | ||
| 666 | } while (0) | 306 | } while (0) |
| 667 | 307 | ||
| 668 | #endif /* not BYTE_COMBINING_DEBUG */ | ||
| 669 | 308 | ||
| 670 | /* Decrease the buffer byte position POS_BYTE of the current buffer to | 309 | #define CHECK_CHARSET_GET_CHARSET(x, charset) \ |
| 671 | the previous character boundary. No range checking of POS. */ | 310 | do { \ |
| 672 | #define DEC_POS(pos_byte) \ | 311 | int id; \ |
| 673 | do { \ | 312 | CHECK_CHARSET_GET_ID (x, id); \ |
| 674 | unsigned char *p, *p_min; \ | 313 | charset = CHARSET_FROM_ID (id); \ |
| 675 | \ | ||
| 676 | pos_byte--; \ | ||
| 677 | if (pos_byte < GPT_BYTE) \ | ||
| 678 | p = BEG_ADDR + pos_byte - 1, p_min = BEG_ADDR; \ | ||
| 679 | else \ | ||
| 680 | p = BEG_ADDR + GAP_SIZE + pos_byte - 1, p_min = GAP_END_ADDR; \ | ||
| 681 | if (p > p_min && !CHAR_HEAD_P (*p)) \ | ||
| 682 | { \ | ||
| 683 | unsigned char *pend = p--; \ | ||
| 684 | int len, bytes; \ | ||
| 685 | while (p > p_min && !CHAR_HEAD_P (*p)) p--; \ | ||
| 686 | len = pend + 1 - p; \ | ||
| 687 | PARSE_MULTIBYTE_SEQ (p, len, bytes); \ | ||
| 688 | if (bytes == len) \ | ||
| 689 | pos_byte -= len - 1; \ | ||
| 690 | } \ | ||
| 691 | } while (0) | 314 | } while (0) |
| 692 | 315 | ||
| 693 | /* Increment both CHARPOS and BYTEPOS, each in the appropriate way. */ | ||
| 694 | 316 | ||
| 695 | #define INC_BOTH(charpos, bytepos) \ | 317 | /* Lookup Vcharset_order_list and return the first charset that |
| 696 | do \ | 318 | contains the character C. */ |
| 697 | { \ | 319 | #define CHAR_CHARSET(c) \ |
| 698 | (charpos)++; \ | 320 | char_charset ((c), Qnil, NULL) |
| 699 | if (NILP (current_buffer->enable_multibyte_characters)) \ | 321 | |
| 700 | (bytepos)++; \ | 322 | #if 0 |
| 701 | else \ | 323 | /* Char-table of charset-sets. Each element is a bool vector indexed |
| 702 | INC_POS ((bytepos)); \ | 324 | by a charset ID. */ |
| 703 | } \ | 325 | extern Lisp_Object Vchar_charset_set; |
| 704 | while (0) | 326 | |
| 705 | 327 | /* Charset-bag of character C. */ | |
| 706 | /* Decrement both CHARPOS and BYTEPOS, each in the appropriate way. */ | 328 | #define CHAR_CHARSET_SET(c) \ |
| 707 | 329 | CHAR_TABLE_REF (Vchar_charset_set, c) | |
| 708 | #define DEC_BOTH(charpos, bytepos) \ | 330 | |
| 709 | do \ | 331 | /* Check if two characters C1 and C2 belong to the same charset. */ |
| 710 | { \ | 332 | #define SAME_CHARSET_P(c1, c2) \ |
| 711 | (charpos)--; \ | 333 | intersection_p (CHAR_CHARSET_SET (c1), CHAR_CHARSET_SET (c2)) |
| 712 | if (NILP (current_buffer->enable_multibyte_characters)) \ | 334 | |
| 713 | (bytepos)--; \ | 335 | #endif |
| 714 | else \ | 336 | |
| 715 | DEC_POS ((bytepos)); \ | 337 | |
| 716 | } \ | 338 | /* Return a character correponding to the code-point CODE of CHARSET. |
| 717 | while (0) | 339 | Try some optimization before calling decode_char. */ |
| 340 | |||
| 341 | #define DECODE_CHAR(charset, code) \ | ||
| 342 | ((ASCII_BYTE_P (code) && (charset)->ascii_compatible_p) \ | ||
| 343 | ? (code) \ | ||
| 344 | : ((code) < (charset)->min_code || (code) > (charset)->max_code) \ | ||
| 345 | ? -1 \ | ||
| 346 | : (charset)->unified_p \ | ||
| 347 | ? decode_char ((charset), (code)) \ | ||
| 348 | : (charset)->method == CHARSET_METHOD_OFFSET \ | ||
| 349 | ? ((charset)->code_linear_p \ | ||
| 350 | ? (code) - (charset)->min_code + (charset)->code_offset \ | ||
| 351 | : decode_char ((charset), (code))) \ | ||
| 352 | : (charset)->method == CHARSET_METHOD_MAP \ | ||
| 353 | ? ((charset)->code_linear_p \ | ||
| 354 | ? XINT (AREF (CHARSET_DECODER (charset), \ | ||
| 355 | (code) - (charset)->min_code)) \ | ||
| 356 | : decode_char ((charset), (code))) \ | ||
| 357 | : decode_char ((charset), (code))) | ||
| 358 | |||
| 359 | |||
| 360 | /* Return a code point of CHAR in CHARSET. | ||
| 361 | Try some optimization before calling encode_char. */ | ||
| 362 | |||
| 363 | #define ENCODE_CHAR(charset, c) \ | ||
| 364 | ((ASCII_CHAR_P (c) && (charset)->ascii_compatible_p) \ | ||
| 365 | ? (c) \ | ||
| 366 | : (charset)->unified_p \ | ||
| 367 | ? encode_char ((charset), (c)) \ | ||
| 368 | : ((c) < (charset)->min_char || (c) > (charset)->max_char) \ | ||
| 369 | ? (charset)->invalid_code \ | ||
| 370 | : (charset)->method == CHARSET_METHOD_OFFSET \ | ||
| 371 | ? ((charset)->code_linear_p \ | ||
| 372 | ? (c) - (charset)->code_offset + (charset)->min_code \ | ||
| 373 | : encode_char ((charset), (c))) \ | ||
| 374 | : (charset)->method == CHARSET_METHOD_MAP \ | ||
| 375 | ? ((charset)->compact_codes_p \ | ||
| 376 | ? XFASTINT (CHAR_TABLE_REF (CHARSET_ENCODER (charset), (c))) \ | ||
| 377 | : encode_char ((charset), (c))) \ | ||
| 378 | : encode_char ((charset), (c))) | ||
| 379 | |||
| 380 | |||
| 381 | /* Set to 1 when a charset map is loaded to warn that a buffer text | ||
| 382 | and a string data may be relocated. */ | ||
| 383 | extern int charset_map_loaded; | ||
| 384 | |||
| 385 | |||
| 386 | /* Set CHARSET to the charset highest priority of C, CODE to the | ||
| 387 | code-point of C in CHARSET. */ | ||
| 388 | #define SPLIT_CHAR(c, charset, code) \ | ||
| 389 | ((charset) = char_charset ((c), Qnil, &(code))) | ||
| 390 | |||
| 391 | |||
| 392 | #define ISO_MAX_DIMENSION 3 | ||
| 393 | #define ISO_MAX_CHARS 2 | ||
| 394 | #define ISO_MAX_FINAL 0x80 /* only 0x30..0xFF are used */ | ||
| 395 | |||
| 396 | /* Mapping table from ISO2022's charset (specified by DIMENSION, | ||
| 397 | CHARS, and FINAL_CHAR) to Emacs' charset ID. Should be accessed by | ||
| 398 | macro ISO_CHARSET_TABLE (DIMENSION, CHARS, FINAL_CHAR). */ | ||
| 399 | extern int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL]; | ||
| 718 | 400 | ||
| 719 | /* Increase the buffer byte position POS_BYTE of the current buffer to | 401 | /* A charset of type iso2022 who has DIMENSION, CHARS, and FINAL |
| 720 | the next character boundary. This macro relies on the fact that | 402 | (final character). */ |
| 721 | *GPT_ADDR and *Z_ADDR are always accessible and the values are | 403 | #define ISO_CHARSET_TABLE(dimension, chars_96, final) \ |
| 722 | '\0'. No range checking of POS_BYTE. */ | 404 | iso_charset_table[(dimension) - 1][(chars_96)][(final)] |
| 723 | 405 | ||
| 724 | #ifdef BYTE_COMBINING_DEBUG | 406 | /* Nonzero iff the charset who has FAST_MAP may contain C. */ |
| 407 | #define CHARSET_FAST_MAP_REF(c, fast_map) \ | ||
| 408 | ((c) < 0x10000 \ | ||
| 409 | ? fast_map[(c) >> 10] & (1 << (((c) >> 7) & 7)) \ | ||
| 410 | : fast_map[((c) >> 15) + 62] & (1 << (((c) >> 12) & 7))) | ||
| 725 | 411 | ||
| 726 | #define BUF_INC_POS(buf, pos_byte) \ | 412 | #define CHARSET_FAST_MAP_SET(c, fast_map) \ |
| 727 | do { \ | 413 | do { \ |
| 728 | unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \ | 414 | if ((c) < 0x10000) \ |
| 729 | if (BASE_LEADING_CODE_P (*p)) \ | 415 | (fast_map)[(c) >> 10] |= 1 << (((c) >> 7) & 7); \ |
| 730 | { \ | ||
| 731 | int len, bytes; \ | ||
| 732 | len = BUF_Z_BYTE (buf) - pos_byte; \ | ||
| 733 | PARSE_MULTIBYTE_SEQ (p, len, bytes); \ | ||
| 734 | pos_byte += bytes; \ | ||
| 735 | } \ | ||
| 736 | else \ | 416 | else \ |
| 737 | pos_byte++; \ | 417 | (fast_map)[((c) >> 15) + 62] |= 1 << (((c) >> 12) & 7); \ |
| 738 | } while (0) | 418 | } while (0) |
| 739 | 419 | ||
| 740 | #else /* not BYTE_COMBINING_DEBUG */ | ||
| 741 | 420 | ||
| 742 | #define BUF_INC_POS(buf, pos_byte) \ | ||
| 743 | do { \ | ||
| 744 | unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \ | ||
| 745 | pos_byte += BYTES_BY_CHAR_HEAD (*p); \ | ||
| 746 | } while (0) | ||
| 747 | 421 | ||
| 748 | #endif /* not BYTE_COMBINING_DEBUG */ | 422 | /* 1 iff CHARSET may contain the character C. */ |
| 423 | #define CHAR_CHARSET_P(c, charset) \ | ||
| 424 | ((ASCII_CHAR_P (c) && (charset)->ascii_compatible_p) \ | ||
| 425 | || (CHARSET_UNIFIED_P (charset) \ | ||
| 426 | ? encode_char ((charset), (c)) != (charset)->invalid_code \ | ||
| 427 | : (CHARSET_FAST_MAP_REF ((c), (charset)->fast_map) \ | ||
| 428 | && ((charset)->method == CHARSET_METHOD_OFFSET \ | ||
| 429 | ? (c) >= (charset)->min_char && (c) <= (charset)->max_char \ | ||
| 430 | : ((charset)->method == CHARSET_METHOD_MAP \ | ||
| 431 | && (charset)->compact_codes_p) \ | ||
| 432 | ? (XFASTINT (CHAR_TABLE_REF (CHARSET_ENCODER (charset), (c))) \ | ||
| 433 | != (charset)->invalid_code) \ | ||
| 434 | : encode_char ((charset), (c)) != (charset)->invalid_code)))) | ||
| 749 | 435 | ||
| 750 | /* Decrease the buffer byte position POS_BYTE of the current buffer to | ||
| 751 | the previous character boundary. No range checking of POS_BYTE. */ | ||
| 752 | #define BUF_DEC_POS(buf, pos_byte) \ | ||
| 753 | do { \ | ||
| 754 | unsigned char *p, *p_min; \ | ||
| 755 | pos_byte--; \ | ||
| 756 | if (pos_byte < BUF_GPT_BYTE (buf)) \ | ||
| 757 | { \ | ||
| 758 | p = BUF_BEG_ADDR (buf) + pos_byte - 1; \ | ||
| 759 | p_min = BUF_BEG_ADDR (buf); \ | ||
| 760 | } \ | ||
| 761 | else \ | ||
| 762 | { \ | ||
| 763 | p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - 1; \ | ||
| 764 | p_min = BUF_GAP_END_ADDR (buf); \ | ||
| 765 | } \ | ||
| 766 | if (p > p_min && !CHAR_HEAD_P (*p)) \ | ||
| 767 | { \ | ||
| 768 | unsigned char *pend = p--; \ | ||
| 769 | int len, bytes; \ | ||
| 770 | while (p > p_min && !CHAR_HEAD_P (*p)) p--; \ | ||
| 771 | len = pend + 1 - p; \ | ||
| 772 | PARSE_MULTIBYTE_SEQ (p, len, bytes); \ | ||
| 773 | if (bytes == len) \ | ||
| 774 | pos_byte -= len - 1; \ | ||
| 775 | } \ | ||
| 776 | } while (0) | ||
| 777 | 436 | ||
| 778 | #endif /* emacs */ | 437 | extern Lisp_Object Qcharsetp; |
| 779 | 438 | ||
| 780 | /* This is the maximum byte length of multi-byte sequence. */ | 439 | extern Lisp_Object Qascii, Qunicode; |
| 781 | #define MAX_MULTIBYTE_LENGTH 4 | 440 | extern int charset_ascii, charset_8_bit_control, charset_8_bit_graphic; |
| 782 | 441 | extern int charset_iso_8859_1; | |
| 783 | extern void invalid_character P_ ((int)); | 442 | extern int charset_primary; |
| 784 | 443 | ||
| 785 | extern int translate_char P_ ((Lisp_Object, int, int, int, int)); | 444 | extern struct charset *char_charset P_ ((int, Lisp_Object, unsigned *)); |
| 786 | extern int split_string P_ ((const unsigned char *, int, int *, | 445 | extern Lisp_Object charset_attributes P_ ((int)); |
| 787 | unsigned char *, unsigned char *)); | 446 | |
| 788 | extern int char_to_string P_ ((int, unsigned char *)); | 447 | extern int decode_char P_ ((struct charset *, unsigned)); |
| 789 | extern int char_to_string_1 P_ ((int, unsigned char *)); | 448 | extern unsigned encode_char P_ ((struct charset *, int)); |
| 790 | extern int string_to_char P_ ((const unsigned char *, int, int *)); | 449 | extern int string_xstring_p P_ ((Lisp_Object)); |
| 791 | extern int char_printable_p P_ ((int c)); | 450 | |
| 792 | extern int multibyte_form_length P_ ((const unsigned char *, int)); | 451 | EXFUN (Funify_charset, 2); |
| 793 | extern void parse_str_as_multibyte P_ ((unsigned char *, int, int *, int *)); | ||
| 794 | extern int str_as_multibyte P_ ((unsigned char *, int, int, int *)); | ||
| 795 | extern int parse_str_to_multibyte P_ ((unsigned char *, int)); | ||
| 796 | extern int str_to_multibyte P_ ((unsigned char *, int, int)); | ||
| 797 | extern int str_as_unibyte P_ ((unsigned char *, int)); | ||
| 798 | extern int get_charset_id P_ ((Lisp_Object)); | ||
| 799 | extern int find_charset_in_text P_ ((unsigned char *, int, int, int *, | ||
| 800 | Lisp_Object)); | ||
| 801 | extern int strwidth P_ ((unsigned char *, int)); | ||
| 802 | extern int c_string_width P_ ((unsigned char *, int, int, int *, int *)); | ||
| 803 | extern int lisp_string_width P_ ((Lisp_Object, int, int *, int *)); | ||
| 804 | extern int char_bytes P_ ((int)); | ||
| 805 | extern int char_valid_p P_ ((int, int)); | ||
| 806 | |||
| 807 | extern Lisp_Object Vtranslation_table_vector; | ||
| 808 | |||
| 809 | /* Return a translation table of id number ID. */ | ||
| 810 | #define GET_TRANSLATION_TABLE(id) \ | ||
| 811 | (XCDR(XVECTOR(Vtranslation_table_vector)->contents[(id)])) | ||
| 812 | |||
| 813 | /* A char-table for characters which may invoke auto-filling. */ | ||
| 814 | extern Lisp_Object Vauto_fill_chars; | ||
| 815 | |||
| 816 | /* Copy LEN bytes from FROM to TO. This macro should be used only | ||
| 817 | when a caller knows that LEN is short and the obvious copy loop is | ||
| 818 | faster than calling bcopy which has some overhead. Copying a | ||
| 819 | multibyte sequence of a multibyte character is the typical case. */ | ||
| 820 | |||
| 821 | #define BCOPY_SHORT(from, to, len) \ | ||
| 822 | do { \ | ||
| 823 | int i = len; \ | ||
| 824 | unsigned char *from_p = from, *to_p = to; \ | ||
| 825 | while (i--) *to_p++ = *from_p++; \ | ||
| 826 | } while (0) | ||
| 827 | 452 | ||
| 828 | #endif /* EMACS_CHARSET_H */ | 453 | #endif /* EMACS_CHARSET_H */ |