aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorKenichi Handa1999-09-03 01:28:42 +0000
committerKenichi Handa1999-09-03 01:28:42 +0000
commitac4137cca636cd011ce43a595c9cf57126c0a558 (patch)
treea30b7c701ff4729f3dd766e0f22fafe9109fbbe1 /src
parent384107f281bd6a43a4c66e9bbb1826e1bc8cec05 (diff)
downloademacs-ac4137cca636cd011ce43a595c9cf57126c0a558.tar.gz
emacs-ac4137cca636cd011ce43a595c9cf57126c0a558.zip
(SPLIT_COMPOSITE_SEQ): New macro.
(SPLIT_CHARACTER_SEQ): New macro. (SPLIT_MULTIBYTE_SEQ): New macro. (CHAR_COMPONENT_VALID_P): New macro. (non_ascii_char_to_string): Generate a multibyte sequence as far as possible. (string_to_non_ascii_char): The 4th arg exclude_tail_garbage is deleted. Caller changed. Use the macro SPLIT_MULTIBYTE_SEQ. (split_non_ascii_string): Likewise. (multibyte_form_length): Use the macro PARSE_MULTIBYTE_SEQ. (char_printable_p): New function. (translate_char): Check character by NATNUMP instead of INTEGERP. (unibyte_char_to_multibyte): Call char_valid_p instead of VALID_MULTIBYTE_CHAR_P. (Fmake_char_internal): Check the arguments more rigidly. (Fcharset_after): Use the macro SPLIT_MULTIBYTE_SEQ. (char_valid_p): Check the validity by CHAR_COMPONENT_VALID_P. (Fmultibyte_char_to_unibyte): Check the validity of character by CHAR_VALID_P. (chars_in_text): Call multibyte_chars_in_text. (multibyte_chars_in_text): Use the macro PARSE_MULTIBYTE_SEQ. (Fcompose_string): Use the macro STRING_CHAR_AND_LENGTH instead of STRING_CHAR_AND_CHAR_LENGTH (which is obsolete now).
Diffstat (limited to 'src')
-rw-r--r--src/charset.c417
1 files changed, 245 insertions, 172 deletions
diff --git a/src/charset.c b/src/charset.c
index 9c6da218436..0876644ee94 100644
--- a/src/charset.c
+++ b/src/charset.c
@@ -124,13 +124,89 @@ invalid_character (c)
124 error ("Invalid character: 0%o, %d, 0x%x", c, c, c); 124 error ("Invalid character: 0%o, %d, 0x%x", c, c, c);
125} 125}
126 126
127/* Parse string STR of length LENGTH (>= 2) and check if a composite
128 character is at STR. If there is a valid composite character, set
129 CHARSET, C1, and C2 to proper values so that MAKE_CHAR can compose
130 the composite character from them. Otherwise, set CHARSET to
131 CHARSET_COMPOSITION, but set C1 to the second byte of the sequence,
132 C2 to -1 so that MAKE_CHAR can compose the invalid multibyte
133 character whose string representation is two bytes of STR[0] and
134 STR[1]. In any case, set BYTES to LENGTH. */
135
136#define SPLIT_COMPOSITE_SEQ(str, length, bytes, charset, c1, c2) \
137 do { \
138 int cmpchar_id = str_cmpchar_id ((str), (length)); \
139 \
140 (charset) = CHARSET_COMPOSITION; \
141 (bytes) = (length); \
142 if (cmpchar_id >= 0) \
143 { \
144 (c1) = CHAR_FIELD2 (cmpchar_id); \
145 (c2) = CHAR_FIELD3 (cmpchar_id); \
146 } \
147 else \
148 { \
149 (c1) = (str)[1] & 0x7F; \
150 (c2) = -1; \
151 } \
152 } while (0)
153
154/* Parse string STR of length LENGTH (>= 2) and check if a
155 non-composite multibyte character is at STR. Set BYTES to the
156 actual length, CHARSET, C1, and C2 to proper values so that
157 MAKE_CHAR can compose the multibyte character from them. */
158
159#define SPLIT_CHARACTER_SEQ(str, length, bytes, charset, c1, c2) \
160 do { \
161 (bytes) = 1; \
162 (charset) = (str)[0]; \
163 if ((charset) >= LEADING_CODE_PRIVATE_11 \
164 && (charset) <= LEADING_CODE_PRIVATE_22) \
165 (charset) = (str)[(bytes)++]; \
166 if ((bytes) < (length)) \
167 { \
168 (c1) = (str)[(bytes)++] & 0x7F; \
169 if ((bytes) < (length)) \
170 (c2) = (str)[(bytes)++] & 0x7F; \
171 else \
172 (c2) = -1; \
173 } \
174 else \
175 (c1) = (c2) = -1; \
176 } while (0)
177
178/* Parse string STR of length LENGTH and check if a multibyte
179 characters is at STR. set BYTES to the actual length, CHARSET, C1,
180 C2 to proper values for that character. */
181
182#define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2) \
183 do { \
184 int i; \
185 for (i = 1; i < (length) && ! CHAR_HEAD_P ((str)[i]); i++); \
186 if (i == 1) \
187 (bytes) = 1, (charset) = CHARSET_ASCII, (c1) = (str)[0] ; \
188 else if ((str)[0] == LEADING_CODE_COMPOSITION) \
189 SPLIT_COMPOSITE_SEQ (str, i, bytes, charset, c1, c2); \
190 else \
191 { \
192 if (i > BYTES_BY_CHAR_HEAD ((str)[0])) \
193 i = BYTES_BY_CHAR_HEAD ((str)[0]); \
194 SPLIT_CHARACTER_SEQ (str, i, bytes, charset, c1, c2); \
195 } \
196 } while (0)
197
198/* 1 if CHARSET, C1, and C2 compose a valid character, else 0. */
199#define CHAR_COMPONENT_VALID_P(charset, c1, c2) \
200 (CHARSET_DIMENSION (charset) == 1 \
201 ? ((c1) >= 0x20 && (c1) <= 0x7F) \
202 : ((c1) >= 0x20 && (c1) <= 0x7F && (c2) >= 0x20 && (c2) <= 0x7F))
127 203
128/* Set STR a pointer to the multi-byte form of the character C. If C 204/* Set STR a pointer to the multi-byte form of the character C. If C
129 is not a composite character, the multi-byte form is set in WORKBUF 205 is not a composite character, the multi-byte form is set in WORKBUF
130 and STR points WORKBUF. The caller should allocate at least 4-byte 206 and STR points WORKBUF. The caller should allocate at least 4-byte
131 area at WORKBUF in advance. Returns the length of the multi-byte 207 area at WORKBUF in advance. Returns the length of the multi-byte
132 form. If C is an invalid character to have a multi-byte form, 208 form. If C is an invalid character, store (C & 0xFF) in WORKBUF[0]
133 signal an error. 209 and return 1.
134 210
135 Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this 211 Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this
136 function directly if C can be an ASCII character. */ 212 function directly if C can be an ASCII character. */
@@ -140,8 +216,6 @@ non_ascii_char_to_string (c, workbuf, str)
140 int c; 216 int c;
141 unsigned char *workbuf, **str; 217 unsigned char *workbuf, **str;
142{ 218{
143 int charset, c1, c2;
144
145 if (c & CHAR_MODIFIER_MASK) /* This includes the case C is negative. */ 219 if (c & CHAR_MODIFIER_MASK) /* This includes the case C is negative. */
146 { 220 {
147 /* Multibyte character can't have a modifier bit. */ 221 /* Multibyte character can't have a modifier bit. */
@@ -183,111 +257,79 @@ non_ascii_char_to_string (c, workbuf, str)
183 invalid_character (c); 257 invalid_character (c);
184 258
185 *str = workbuf; 259 *str = workbuf;
186 *workbuf = c; 260 *workbuf++ = c;
187 return 1;
188 } 261 }
189 262 else
190 if (c < 0)
191 invalid_character (c);
192
193 if (COMPOSITE_CHAR_P (c))
194 { 263 {
195 int cmpchar_id = COMPOSITE_CHAR_ID (c); 264 int charset, c1, c2;
196 265
197 if (cmpchar_id < n_cmpchars) 266 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
267 if (charset == CHARSET_COMPOSITION)
198 { 268 {
199 *str = cmpchar_table[cmpchar_id]->data; 269 if (c >= MAX_CHAR)
200 return cmpchar_table[cmpchar_id]->len; 270 invalid_character (c);
271 if (c >= MIN_CHAR_COMPOSITION)
272 {
273 /* Valid composite character. */
274 *str = cmpchar_table[COMPOSITE_CHAR_ID (c)]->data;
275 workbuf = *str + cmpchar_table[COMPOSITE_CHAR_ID (c)]->len;
276 }
277 else
278 {
279 /* Invalid but can have multibyte form. */
280 *str = workbuf;
281 *workbuf++ = LEADING_CODE_COMPOSITION;
282 *workbuf++ = c1 | 0x80;
283 }
201 } 284 }
202 else 285 else if (charset > CHARSET_COMPOSITION)
203 { 286 {
204 invalid_character (c); 287 *str = workbuf;
288 if (charset >= LEADING_CODE_EXT_11)
289 *workbuf++ = (charset < LEADING_CODE_EXT_12
290 ? LEADING_CODE_PRIVATE_11
291 : (charset < LEADING_CODE_EXT_21
292 ? LEADING_CODE_PRIVATE_12
293 : (charset < LEADING_CODE_EXT_22
294 ? LEADING_CODE_PRIVATE_21
295 : LEADING_CODE_PRIVATE_22)));
296 *workbuf++ = charset;
297 if (c1 > 0 && c1 < 32 || c2 > 0 && c2 < 32)
298 invalid_character (c);
299 if (c1)
300 {
301 *workbuf++ = c1 | 0x80;
302 if (c2 > 0)
303 *workbuf++ = c2 | 0x80;
304 }
205 } 305 }
306 else if (charset == CHARSET_ASCII)
307 *workbuf++= c & 0x7F;
308 else
309 invalid_character (c);
206 } 310 }
207 311
208 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
209 if (!charset
210 || ! CHARSET_DEFINED_P (charset)
211 || c1 >= 0 && c1 < 32
212 || c2 >= 0 && c2 < 32)
213 invalid_character (c);
214
215 *str = workbuf;
216 *workbuf++ = CHARSET_LEADING_CODE_BASE (charset);
217 if (*workbuf = CHARSET_LEADING_CODE_EXT (charset))
218 workbuf++;
219 *workbuf++ = c1 | 0x80;
220 if (c2 >= 0)
221 *workbuf++ = c2 | 0x80;
222
223 return (workbuf - *str); 312 return (workbuf - *str);
224} 313}
225 314
226/* Return a non-ASCII character of which multi-byte form is at STR of 315/* Return a non-ASCII character of which multi-byte form is at STR of
227 length LEN. If ACTUAL_LEN is not NULL, the actual length of the 316 length LEN. If ACTUAL_LEN is not NULL, the byte length of the
228 multibyte form is set to the address ACTUAL_LEN. 317 multibyte form is set to the address ACTUAL_LEN.
229 318
230 If exclude_tail_garbage is nonzero, ACTUAL_LEN excludes gabage
231 bytes following the non-ASCII character.
232
233 Use macro `STRING_CHAR (STR, LEN)' instead of calling this function 319 Use macro `STRING_CHAR (STR, LEN)' instead of calling this function
234 directly if STR can hold an ASCII character. */ 320 directly if STR can hold an ASCII character. */
235 321
236int 322int
237string_to_non_ascii_char (str, len, actual_len, exclude_tail_garbage) 323string_to_non_ascii_char (str, len, actual_len)
238 const unsigned char *str; 324 const unsigned char *str;
239 int len, *actual_len, exclude_tail_garbage; 325 int len, *actual_len;
240{ 326{
241 int charset; 327 int c, bytes, charset, c1, c2;
242 unsigned char c1, c2;
243 int c, bytes;
244 const unsigned char *begp = str;
245
246 c = *str++;
247 bytes = 1;
248
249 if (BASE_LEADING_CODE_P (c))
250 do {
251 while (bytes < len && ! CHAR_HEAD_P (begp[bytes])) bytes++;
252
253 if (c == LEADING_CODE_COMPOSITION)
254 {
255 int cmpchar_id = str_cmpchar_id (begp, bytes);
256
257 if (cmpchar_id >= 0)
258 {
259 c = MAKE_COMPOSITE_CHAR (cmpchar_id);
260 str += cmpchar_table[cmpchar_id]->len - 1;
261 }
262 else
263 str += bytes - 1;
264 }
265 else
266 {
267 const unsigned char *endp = begp + bytes;
268 int charset = c, c1, c2 = 0;
269
270 if (str >= endp) break;
271 if (c >= LEADING_CODE_PRIVATE_11 && c <= LEADING_CODE_PRIVATE_22)
272 {
273 charset = *str++;
274 if (str < endp)
275 c1 = *str++ & 0x7F;
276 else
277 c1 = charset, charset = c;
278 }
279 else
280 c1 = *str++ & 0x7f;
281 if (CHARSET_DEFINED_P (charset)
282 && CHARSET_DIMENSION (charset) == 2
283 && str < endp)
284 c2 = *str++ & 0x7F;
285 c = MAKE_NON_ASCII_CHAR (charset, c1, c2);
286 }
287 } while (0);
288 328
329 SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
330 c = MAKE_CHAR (charset, c1, c2);
289 if (actual_len) 331 if (actual_len)
290 *actual_len = exclude_tail_garbage ? str - begp : bytes; 332 *actual_len = bytes;
291 return c; 333 return c;
292} 334}
293 335
@@ -297,53 +339,59 @@ multibyte_form_length (str, len)
297 const unsigned char *str; 339 const unsigned char *str;
298 int len; 340 int len;
299{ 341{
300 int bytes = 1; 342 int bytes;
301
302 if (BASE_LEADING_CODE_P (*str))
303 while (bytes < len && ! CHAR_HEAD_P (str[bytes])) bytes++;
304 343
344 PARSE_MULTIBYTE_SEQ (str, len, bytes);
305 return bytes; 345 return bytes;
306} 346}
307 347
308/* Check if string STR of length LEN contains valid multi-byte form of 348/* Check multibyte form at string STR of length LEN and set variables
309 a character. If valid, charset and position codes of the character 349 pointed by CHARSET, C1, and C2 to charset and position codes of the
310 is set at *CHARSET, *C1, and *C2, and return 0. If not valid, 350 character at STR, and return 0. If there's no multibyte character,
311 return -1. This should be used only in the macro SPLIT_STRING 351 return -1. This should be used only in the macro SPLIT_STRING
312 which checks range of STR in advance. */ 352 which checks range of STR in advance. */
313 353
314int 354int
315split_non_ascii_string (str, len, charset, c1, c2) 355split_non_ascii_string (str, len, charset, c1, c2)
316 register const unsigned char *str; 356 const unsigned char *str;
317 register unsigned char *c1, *c2; 357 unsigned char *c1, *c2;
318 register int len, *charset; 358 int len, *charset;
319{ 359{
320 register unsigned int cs = *str++; 360 register int bytes, cs, code1, code2 = -1;
321
322 if (cs == LEADING_CODE_COMPOSITION)
323 {
324 int cmpchar_id = str_cmpchar_id (str - 1, len);
325 361
326 if (cmpchar_id < 0) 362 SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
327 return -1; 363 if (cs == CHARSET_ASCII)
328 *charset = cs, *c1 = cmpchar_id >> 7, *c2 = cmpchar_id & 0x7F;
329 }
330 else if ((cs < LEADING_CODE_PRIVATE_11 || (cs = *str++) >= 0xA0)
331 && CHARSET_DEFINED_P (cs))
332 {
333 *charset = cs;
334 if (*str < 0xA0)
335 return -1;
336 *c1 = (*str++) & 0x7F;
337 if (CHARSET_DIMENSION (cs) == 2)
338 {
339 if (*str < 0xA0)
340 return -1;
341 *c2 = (*str++) & 0x7F;
342 }
343 }
344 else
345 return -1; 364 return -1;
346 return 0; 365 *charset = cs;
366 *c1 = code1;
367 *c2 = code2;
368}
369
370/* Return 1 iff character C has valid printable glyph. */
371int
372char_printable_p (c)
373 int c;
374{
375 int charset, c1, c2, chars;
376
377 if (SINGLE_BYTE_CHAR_P (c))
378 return 1;
379 if (c >= MIN_CHAR_COMPOSITION)
380 return (c < MAX_CHAR);
381
382 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
383 if (! CHARSET_DEFINED_P (charset))
384 return 0;
385 if (CHARSET_CHARS (charset) == 94
386 ? c1 <= 32 || c1 >= 127
387 : c1 < 32)
388 return 0;
389 if (CHARSET_DIMENSION (charset) == 2
390 && (CHARSET_CHARS (charset) == 94
391 ? c2 <= 32 || c2 >= 127
392 : c2 < 32))
393 return 0;
394 return 1;
347} 395}
348 396
349/* Translate character C by translation table TABLE. If C 397/* Translate character C by translation table TABLE. If C
@@ -360,8 +408,7 @@ translate_char (table, c, charset, c1, c2)
360 408
361 if (c < 0) c = MAKE_CHAR (charset, c1, c2); 409 if (c < 0) c = MAKE_CHAR (charset, c1, c2);
362 if (!CHAR_TABLE_P (table) 410 if (!CHAR_TABLE_P (table)
363 || (ch = Faref (table, make_number (c)), !INTEGERP (ch)) 411 || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
364 || XINT (ch) < 0)
365 return c; 412 return c;
366 413
367 SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2); 414 SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
@@ -396,13 +443,13 @@ unibyte_char_to_multibyte (c)
396 if (! NILP (Vnonascii_translation_table)) 443 if (! NILP (Vnonascii_translation_table))
397 { 444 {
398 c = XINT (Faref (Vnonascii_translation_table, make_number (c))); 445 c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
399 if (c >= 0400 && ! VALID_MULTIBYTE_CHAR_P (c)) 446 if (c >= 0400 && ! char_valid_p (c, 0))
400 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET; 447 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
401 } 448 }
402 else if (c >= 0240 && nonascii_insert_offset > 0) 449 else if (c >= 0240 && nonascii_insert_offset > 0)
403 { 450 {
404 c += nonascii_insert_offset; 451 c += nonascii_insert_offset;
405 if (c < 0400 || ! VALID_MULTIBYTE_CHAR_P (c)) 452 if (c < 0400 || ! char_valid_p (c, 0))
406 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET; 453 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
407 } 454 }
408 else if (c >= 0240) 455 else if (c >= 0240)
@@ -987,21 +1034,40 @@ DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
987 (charset, code1, code2) 1034 (charset, code1, code2)
988 Lisp_Object charset, code1, code2; 1035 Lisp_Object charset, code1, code2;
989{ 1036{
1037 int charset_id, c1, c2;
1038
990 CHECK_NUMBER (charset, 0); 1039 CHECK_NUMBER (charset, 0);
1040 charset_id = XINT (charset);
1041 if (!CHARSET_DEFINED_P (charset_id))
1042 error ("Invalid charset ID: %d", XINT (charset));
991 1043
992 if (NILP (code1)) 1044 if (NILP (code1))
993 XSETFASTINT (code1, 0); 1045 c1 = 0;
994 else 1046 else
995 CHECK_NUMBER (code1, 1); 1047 {
1048 CHECK_NUMBER (code1, 1);
1049 c1 = XINT (code1);
1050 }
996 if (NILP (code2)) 1051 if (NILP (code2))
997 XSETFASTINT (code2, 0); 1052 c2 = 0;
998 else 1053 else
999 CHECK_NUMBER (code2, 2); 1054 {
1000 1055 CHECK_NUMBER (code2, 2);
1001 if (!CHARSET_DEFINED_P (XINT (charset))) 1056 c2 = XINT (code2);
1002 error ("Invalid charset: %d", XINT (charset)); 1057 }
1003 1058
1004 return make_number (MAKE_CHAR (XINT (charset), XINT (code1), XINT (code2))); 1059 if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
1060 error ("Invalid code points: %d %d", c1, c2);
1061 c1 &= 0x7F;
1062 c2 &= 0x7F;
1063 if (c1 == 0
1064 ? c2 != 0
1065 : (c2 == 0
1066 ? !CHAR_COMPONENT_VALID_P (charset, c1, 0x20)
1067 : !CHAR_COMPONENT_VALID_P (charset, c1, c2)))
1068 error ("Invalid code points: %d %d", c1, c2);
1069
1070 return make_number (MAKE_CHAR (charset_id, c1, c2));
1005} 1071}
1006 1072
1007DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0, 1073DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
@@ -1036,13 +1102,13 @@ DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1036} 1102}
1037 1103
1038DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0, 1104DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1039 "Return charset of a character in current buffer at position POS.\n\ 1105 "Return charset of a character in the current buffer at position POS.\n\
1040If POS is nil, it defauls to the current point.\n\ 1106If POS is nil, it defauls to the current point.\n\
1041If POS is out of range, the value is nil.") 1107If POS is out of range, the value is nil.")
1042 (pos) 1108 (pos)
1043 Lisp_Object pos; 1109 Lisp_Object pos;
1044{ 1110{
1045 register int pos_byte, c, charset; 1111 register int pos_byte, bytes, charset, c1, c2;
1046 register unsigned char *p; 1112 register unsigned char *p;
1047 1113
1048 if (NILP (pos)) 1114 if (NILP (pos))
@@ -1061,8 +1127,15 @@ If POS is out of range, the value is nil.")
1061 pos_byte = CHAR_TO_BYTE (XINT (pos)); 1127 pos_byte = CHAR_TO_BYTE (XINT (pos));
1062 } 1128 }
1063 p = BYTE_POS_ADDR (pos_byte); 1129 p = BYTE_POS_ADDR (pos_byte);
1064 c = STRING_CHAR (p, Z_BYTE - pos_byte); 1130 if (BASE_LEADING_CODE_P (*p))
1065 charset = CHAR_CHARSET (c); 1131 {
1132 SPLIT_MULTIBYTE_SEQ (p, Z_BYTE - pos_byte, bytes, charset, c1, c2);
1133 if (charset < 0)
1134 charset = 1;
1135 }
1136 else
1137 charset = CHARSET_ASCII;
1138
1066 return CHARSET_SYMBOL (charset); 1139 return CHARSET_SYMBOL (charset);
1067} 1140}
1068 1141
@@ -1103,15 +1176,23 @@ char_valid_p (c, genericp)
1103 if (SINGLE_BYTE_CHAR_P (c)) 1176 if (SINGLE_BYTE_CHAR_P (c))
1104 return 1; 1177 return 1;
1105 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2); 1178 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
1106 if (charset != CHARSET_COMPOSITION && !CHARSET_DEFINED_P (charset)) 1179 if (charset == CHARSET_COMPOSITION)
1107 return 0; 1180 return ((c >= MIN_CHAR_COMPOSITION
1108 return (c < MIN_CHAR_COMPOSITION 1181 && c < MIN_CHAR_COMPOSITION + n_cmpchars)
1109 ? ((c & CHAR_FIELD1_MASK) /* i.e. dimension of C is two. */ 1182 || (genericp && c == GENERIC_COMPOSITION_CHAR));
1110 ? (genericp && c1 == 0 && c2 == 0 1183 if (genericp)
1111 || c1 >= 32 && c2 >= 32) 1184 {
1112 : (genericp && c1 == 0 1185 if (c1)
1113 || c1 >= 32)) 1186 {
1114 : c < MIN_CHAR_COMPOSITION + n_cmpchars); 1187 if (c2 <= 0) c2 = 0x20;
1188 }
1189 else
1190 {
1191 if (c2 <= 0) c1 = c2 = 0x20;
1192 }
1193 }
1194 return (CHARSET_DEFINED_P (charset)
1195 && CHAR_COMPONENT_VALID_P (charset, c1, c2));
1115} 1196}
1116 1197
1117DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0, 1198DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
@@ -1158,7 +1239,7 @@ The conversion is done based on `nonascii-translation-table' (which see)\n\
1158 1239
1159 CHECK_NUMBER (ch, 0); 1240 CHECK_NUMBER (ch, 0);
1160 c = XINT (ch); 1241 c = XINT (ch);
1161 if (c < 0) 1242 if (! CHAR_VALID_P (c, 0))
1162 error ("Invalid multibyte character: %d", c); 1243 error ("Invalid multibyte character: %d", c);
1163 c = multibyte_char_to_unibyte (c, Qnil); 1244 c = multibyte_char_to_unibyte (c, Qnil);
1164 if (c < 0) 1245 if (c < 0)
@@ -1369,27 +1450,12 @@ chars_in_text (ptr, nbytes)
1369 unsigned char *ptr; 1450 unsigned char *ptr;
1370 int nbytes; 1451 int nbytes;
1371{ 1452{
1372 unsigned char *endp, c;
1373 int chars;
1374
1375 /* current_buffer is null at early stages of Emacs initialization. */ 1453 /* current_buffer is null at early stages of Emacs initialization. */
1376 if (current_buffer == 0 1454 if (current_buffer == 0
1377 || NILP (current_buffer->enable_multibyte_characters)) 1455 || NILP (current_buffer->enable_multibyte_characters))
1378 return nbytes; 1456 return nbytes;
1379 1457
1380 endp = ptr + nbytes; 1458 return multibyte_chars_in_text (ptr, nbytes);
1381 chars = 0;
1382
1383 while (ptr < endp)
1384 {
1385 c = *ptr++;
1386
1387 if (BASE_LEADING_CODE_P (c))
1388 while (ptr < endp && ! CHAR_HEAD_P (*ptr)) ptr++;
1389 chars++;
1390 }
1391
1392 return chars;
1393} 1459}
1394 1460
1395/* Return the number of characters in the NBYTES bytes at PTR. 1461/* Return the number of characters in the NBYTES bytes at PTR.
@@ -1401,18 +1467,25 @@ multibyte_chars_in_text (ptr, nbytes)
1401 unsigned char *ptr; 1467 unsigned char *ptr;
1402 int nbytes; 1468 int nbytes;
1403{ 1469{
1404 unsigned char *endp, c; 1470 unsigned char *endp;
1405 int chars; 1471 int chars, bytes;
1406 1472
1407 endp = ptr + nbytes; 1473 endp = ptr + nbytes;
1408 chars = 0; 1474 chars = 0;
1409 1475
1410 while (ptr < endp) 1476 while (ptr < endp)
1411 { 1477 {
1412 c = *ptr++; 1478 if (BASE_LEADING_CODE_P (*ptr))
1413 1479 {
1414 if (BASE_LEADING_CODE_P (c)) 1480 PARSE_MULTIBYTE_SEQ (ptr, nbytes, bytes);
1415 while (ptr < endp && ! CHAR_HEAD_P (*ptr)) ptr++; 1481 ptr += bytes;
1482 nbytes -= bytes;
1483 }
1484 else
1485 {
1486 ptr++;
1487 nbytes--;
1488 }
1416 chars++; 1489 chars++;
1417 } 1490 }
1418 1491
@@ -1514,7 +1587,7 @@ str_cmpchar_id (str, len)
1514 int i; 1587 int i;
1515 struct cmpchar_info *cmpcharp; 1588 struct cmpchar_info *cmpcharp;
1516 1589
1517 /* The second byte 0xFF means compostion rule is embedded. */ 1590 /* The second byte 0xFF means COMPOSITION rule is embedded. */
1518 embedded_rule = (str[1] == 0xFF); 1591 embedded_rule = (str[1] == 0xFF);
1519 1592
1520 /* At first, get the actual length of the composite character. */ 1593 /* At first, get the actual length of the composite character. */
@@ -1650,7 +1723,7 @@ str_cmpchar_id (str, len)
1650 /* Make `bufp' point normal multi-byte form temporally. */ 1723 /* Make `bufp' point normal multi-byte form temporally. */
1651 *bufp -= 0x20; 1724 *bufp -= 0x20;
1652 cmpcharp->glyph[i] 1725 cmpcharp->glyph[i]
1653 = FAST_MAKE_GLYPH (string_to_non_ascii_char (bufp, 4, 0, 0), 0); 1726 = FAST_MAKE_GLYPH (string_to_non_ascii_char (bufp, 4, 0), 0);
1654 width = WIDTH_BY_CHAR_HEAD (*bufp); 1727 width = WIDTH_BY_CHAR_HEAD (*bufp);
1655 *bufp += 0x20; 1728 *bufp += 0x20;
1656 bufp += BYTES_BY_CHAR_HEAD (*bufp - 0x20); 1729 bufp += BYTES_BY_CHAR_HEAD (*bufp - 0x20);
@@ -1870,7 +1943,7 @@ DEFUN ("compose-string", Fcompose_string, Scompose_string,
1870 { 1943 {
1871 /* Add 0x20 to the base leading-code, keep the remaining 1944 /* Add 0x20 to the base leading-code, keep the remaining
1872 bytes unchanged. */ 1945 bytes unchanged. */
1873 int c = STRING_CHAR_AND_CHAR_LENGTH (p, pend - p, len); 1946 int c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
1874 1947
1875 if (len <= 1 || ! CHAR_VALID_P (c, 0)) 1948 if (len <= 1 || ! CHAR_VALID_P (c, 0))
1876 error ("Can't compose an invalid character"); 1949 error ("Can't compose an invalid character");