diff options
| author | Kenichi Handa | 2003-10-06 11:19:39 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2003-10-06 11:19:39 +0000 |
| commit | 3cc67a4dc3672a38049698a9246c0664b83413cd (patch) | |
| tree | b7f5eb4b5fa8b88febdad985fc7a448b3c7f0705 /src | |
| parent | ec480361d4f8a22ade0830a7f7738fbf68394083 (diff) | |
| download | emacs-3cc67a4dc3672a38049698a9246c0664b83413cd.tar.gz emacs-3cc67a4dc3672a38049698a9246c0664b83413cd.zip | |
(string_xstring_p): Check by (C >= 0x100).
(find_charsets_in_text): Format of the arc CHARSETS changed. New
arg MULTIBYTE.
(Ffind_charset_region, Ffind_charset_string): Adjusted for the
change of find_charsets_in_text.
(Fsplit_char): Fix doc. Never return unknown.
Diffstat (limited to 'src')
| -rw-r--r-- | src/charset.c | 101 |
1 files changed, 47 insertions, 54 deletions
diff --git a/src/charset.c b/src/charset.c index 88752a80b79..65e233fd498 100644 --- a/src/charset.c +++ b/src/charset.c | |||
| @@ -1345,18 +1345,15 @@ string_xstring_p (string) | |||
| 1345 | { | 1345 | { |
| 1346 | const unsigned char *p = SDATA (string); | 1346 | const unsigned char *p = SDATA (string); |
| 1347 | const unsigned char *endp = p + SBYTES (string); | 1347 | const unsigned char *endp = p + SBYTES (string); |
| 1348 | struct charset *charset; | ||
| 1349 | 1348 | ||
| 1350 | if (SCHARS (string) == SBYTES (string)) | 1349 | if (SCHARS (string) == SBYTES (string)) |
| 1351 | return 0; | 1350 | return 0; |
| 1352 | 1351 | ||
| 1353 | charset = CHARSET_FROM_ID (charset_iso_8859_1); | ||
| 1354 | while (p < endp) | 1352 | while (p < endp) |
| 1355 | { | 1353 | { |
| 1356 | int c = STRING_CHAR_ADVANCE (p); | 1354 | int c = STRING_CHAR_ADVANCE (p); |
| 1357 | 1355 | ||
| 1358 | /* Fixme: comparison of unsigned expression < 0 is always false */ | 1356 | if (c >= 0x100) |
| 1359 | if (ENCODE_CHAR (charset, c) < 0) | ||
| 1360 | return 2; | 1357 | return 2; |
| 1361 | } | 1358 | } |
| 1362 | return 1; | 1359 | return 1; |
| @@ -1365,53 +1362,53 @@ string_xstring_p (string) | |||
| 1365 | 1362 | ||
| 1366 | /* Find charsets in the string at PTR of NCHARS and NBYTES. | 1363 | /* Find charsets in the string at PTR of NCHARS and NBYTES. |
| 1367 | 1364 | ||
| 1368 | CHARSETS is a vector. Each element is a cons of CHARSET and | 1365 | CHARSETS is a vector. If Nth element is non-nil, it means the |
| 1369 | FOUND-FLAG. CHARSET is a charset id, and FOUND-FLAG is nil or t. | 1366 | charset whose id is N is already found. |
| 1370 | FOUND-FLAG t (or nil) means that the corresponding charset is | ||
| 1371 | already found (or not yet found). | ||
| 1372 | 1367 | ||
| 1373 | It may lookup a translation table TABLE if supplied. */ | 1368 | It may lookup a translation table TABLE if supplied. */ |
| 1374 | 1369 | ||
| 1375 | static void | 1370 | static void |
| 1376 | find_charsets_in_text (ptr, nchars, nbytes, charsets, table) | 1371 | find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte) |
| 1377 | const unsigned char *ptr; | 1372 | const unsigned char *ptr; |
| 1378 | EMACS_INT nchars, nbytes; | 1373 | EMACS_INT nchars, nbytes; |
| 1379 | Lisp_Object charsets, table; | 1374 | Lisp_Object charsets, table; |
| 1375 | int multibyte; | ||
| 1380 | { | 1376 | { |
| 1381 | const unsigned char *pend = ptr + nbytes; | 1377 | const unsigned char *pend = ptr + nbytes; |
| 1382 | int ncharsets = ASIZE (charsets); | 1378 | int ncharsets = ASIZE (charsets); |
| 1383 | 1379 | ||
| 1384 | if (nchars == nbytes) | 1380 | if (nchars == nbytes) |
| 1385 | return; | ||
| 1386 | |||
| 1387 | while (ptr < pend) | ||
| 1388 | { | 1381 | { |
| 1389 | int c = STRING_CHAR_ADVANCE (ptr); | 1382 | if (multibyte) |
| 1390 | int i; | 1383 | ASET (charsets, charset_ascii, Qt); |
| 1391 | int all_found = 1; | 1384 | else |
| 1392 | Lisp_Object elt; | 1385 | while (ptr < pend) |
| 1393 | 1386 | { | |
| 1394 | if (!NILP (table)) | 1387 | int c = *ptr++; |
| 1395 | c = translate_char (table, c); | 1388 | |
| 1396 | for (i = 0; i < ncharsets; i++) | 1389 | if (!NILP (table)) |
| 1390 | c = translate_char (table, c); | ||
| 1391 | if (ASCII_BYTE_P (c)) | ||
| 1392 | ASET (charsets, charset_ascii, Qt); | ||
| 1393 | else | ||
| 1394 | ASET (charsets, charset_eight_bit, Qt); | ||
| 1395 | } | ||
| 1396 | } | ||
| 1397 | else | ||
| 1398 | { | ||
| 1399 | while (ptr < pend) | ||
| 1397 | { | 1400 | { |
| 1398 | elt = AREF (charsets, i); | 1401 | int c = STRING_CHAR_ADVANCE (ptr); |
| 1399 | if (NILP (XCDR (elt))) | 1402 | struct charset *charset; |
| 1400 | { | ||
| 1401 | struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (elt))); | ||
| 1402 | 1403 | ||
| 1403 | if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset)) | 1404 | if (!NILP (table)) |
| 1404 | XSETCDR (elt, Qt); | 1405 | c = translate_char (table, c); |
| 1405 | else | 1406 | charset = CHAR_CHARSET (c); |
| 1406 | all_found = 0; | 1407 | ASET (charsets, CHARSET_ID (charset), Qt); |
| 1407 | } | ||
| 1408 | } | 1408 | } |
| 1409 | if (all_found) | ||
| 1410 | break; | ||
| 1411 | } | 1409 | } |
| 1412 | } | 1410 | } |
| 1413 | 1411 | ||
| 1414 | /* Fixme: returns nil for unibyte. */ | ||
| 1415 | DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region, | 1412 | DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region, |
| 1416 | 2, 3, 0, | 1413 | 2, 3, 0, |
| 1417 | doc: /* Return a list of charsets in the region between BEG and END. | 1414 | doc: /* Return a list of charsets in the region between BEG and END. |
| @@ -1427,6 +1424,7 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */) | |||
| 1427 | EMACS_INT from, from_byte, to, stop, stop_byte; | 1424 | EMACS_INT from, from_byte, to, stop, stop_byte; |
| 1428 | int i; | 1425 | int i; |
| 1429 | Lisp_Object val; | 1426 | Lisp_Object val; |
| 1427 | int multibyte = ! NILP (current_buffer->enable_multibyte_characters); | ||
| 1430 | 1428 | ||
| 1431 | validate_region (&beg, &end); | 1429 | validate_region (&beg, &end); |
| 1432 | from = XFASTINT (beg); | 1430 | from = XFASTINT (beg); |
| @@ -1443,13 +1441,11 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */) | |||
| 1443 | from_byte = CHAR_TO_BYTE (from); | 1441 | from_byte = CHAR_TO_BYTE (from); |
| 1444 | 1442 | ||
| 1445 | charsets = Fmake_vector (make_number (charset_table_used), Qnil); | 1443 | charsets = Fmake_vector (make_number (charset_table_used), Qnil); |
| 1446 | for (i = 0; i < charset_table_used; i++) | ||
| 1447 | ASET (charsets, i, Fcons (make_number (i), Qnil)); | ||
| 1448 | |||
| 1449 | while (1) | 1444 | while (1) |
| 1450 | { | 1445 | { |
| 1451 | find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from, | 1446 | find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from, |
| 1452 | stop_byte - from_byte, charsets, table); | 1447 | stop_byte - from_byte, charsets, table, |
| 1448 | multibyte); | ||
| 1453 | if (stop < to) | 1449 | if (stop < to) |
| 1454 | { | 1450 | { |
| 1455 | from = stop, from_byte = stop_byte; | 1451 | from = stop, from_byte = stop_byte; |
| @@ -1461,12 +1457,11 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */) | |||
| 1461 | 1457 | ||
| 1462 | val = Qnil; | 1458 | val = Qnil; |
| 1463 | for (i = charset_table_used - 1; i >= 0; i--) | 1459 | for (i = charset_table_used - 1; i >= 0; i--) |
| 1464 | if (!NILP (XCDR (AREF (charsets, i)))) | 1460 | if (!NILP (AREF (charsets, i))) |
| 1465 | val = Fcons (CHARSET_NAME (charset_table + i), val); | 1461 | val = Fcons (CHARSET_NAME (charset_table + i), val); |
| 1466 | return val; | 1462 | return val; |
| 1467 | } | 1463 | } |
| 1468 | 1464 | ||
| 1469 | /* Fixme: returns nil for unibyte. */ | ||
| 1470 | DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string, | 1465 | DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string, |
| 1471 | 1, 2, 0, | 1466 | 1, 2, 0, |
| 1472 | doc: /* Return a list of charsets in STR. | 1467 | doc: /* Return a list of charsets in STR. |
| @@ -1484,14 +1479,12 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */) | |||
| 1484 | CHECK_STRING (str); | 1479 | CHECK_STRING (str); |
| 1485 | 1480 | ||
| 1486 | charsets = Fmake_vector (make_number (charset_table_used), Qnil); | 1481 | charsets = Fmake_vector (make_number (charset_table_used), Qnil); |
| 1487 | for (i = 0; i < charset_table_used; i++) | ||
| 1488 | ASET (charsets, i, Fcons (make_number (i), Qnil)); | ||
| 1489 | find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str), | 1482 | find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str), |
| 1490 | charsets, table); | 1483 | charsets, table, |
| 1491 | 1484 | STRING_MULTIBYTE (str)); | |
| 1492 | val = Qnil; | 1485 | val = Qnil; |
| 1493 | for (i = charset_table_used - 1; i >= 0; i--) | 1486 | for (i = charset_table_used - 1; i >= 0; i--) |
| 1494 | if (!NILP (XCDR (AREF (charsets, i)))) | 1487 | if (!NILP (AREF (charsets, i))) |
| 1495 | val = Fcons (CHARSET_NAME (charset_table + i), val); | 1488 | val = Fcons (CHARSET_NAME (charset_table + i), val); |
| 1496 | return val; | 1489 | return val; |
| 1497 | } | 1490 | } |
| @@ -1846,10 +1839,12 @@ char_charset (c, charset_list, code_return) | |||
| 1846 | } | 1839 | } |
| 1847 | 1840 | ||
| 1848 | 1841 | ||
| 1849 | /* Fixme: `unknown' can't happen now? */ | ||
| 1850 | DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0, | 1842 | DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0, |
| 1851 | doc: /*Return list of charset and one to three position-codes of CHAR. | 1843 | doc: |
| 1852 | If CHAR is invalid as a character code, return a list `(unknown CHAR)'. */) | 1844 | /*Return list of charset and one to four position-codes of CHAR. |
| 1845 | The charset is decided by the current priority order of charsets. | ||
| 1846 | A position-code is a byte value of each dimension of the code-point of | ||
| 1847 | CHAR in the charset. */) | ||
| 1853 | (ch) | 1848 | (ch) |
| 1854 | Lisp_Object ch; | 1849 | Lisp_Object ch; |
| 1855 | { | 1850 | { |
| @@ -1862,18 +1857,16 @@ If CHAR is invalid as a character code, return a list `(unknown CHAR)'. */) | |||
| 1862 | c = XFASTINT (ch); | 1857 | c = XFASTINT (ch); |
| 1863 | charset = CHAR_CHARSET (c); | 1858 | charset = CHAR_CHARSET (c); |
| 1864 | if (! charset) | 1859 | if (! charset) |
| 1865 | return Fcons (intern ("unknown"), Fcons (ch, Qnil)); | 1860 | abort (); |
| 1866 | |||
| 1867 | code = ENCODE_CHAR (charset, c); | 1861 | code = ENCODE_CHAR (charset, c); |
| 1868 | if (code == CHARSET_INVALID_CODE (charset)) | 1862 | if (code == CHARSET_INVALID_CODE (charset)) |
| 1869 | abort (); | 1863 | abort (); |
| 1870 | dimension = CHARSET_DIMENSION (charset); | 1864 | dimension = CHARSET_DIMENSION (charset); |
| 1871 | val = (dimension == 1 ? Fcons (make_number (code), Qnil) | 1865 | for (val = Qnil; dimension > 0; dimension--) |
| 1872 | : dimension == 2 ? Fcons (make_number (code >> 8), | 1866 | { |
| 1873 | Fcons (make_number (code & 0xFF), Qnil)) | 1867 | val = Fcons (make_number (code & 0xFF), val); |
| 1874 | : Fcons (make_number (code >> 16), | 1868 | code >>= 8; |
| 1875 | Fcons (make_number ((code >> 8) & 0xFF), | 1869 | } |
| 1876 | Fcons (make_number (code & 0xFF), Qnil)))); | ||
| 1877 | return Fcons (CHARSET_NAME (charset), val); | 1870 | return Fcons (CHARSET_NAME (charset), val); |
| 1878 | } | 1871 | } |
| 1879 | 1872 | ||