aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorKenichi Handa2003-10-06 11:19:39 +0000
committerKenichi Handa2003-10-06 11:19:39 +0000
commit3cc67a4dc3672a38049698a9246c0664b83413cd (patch)
treeb7f5eb4b5fa8b88febdad985fc7a448b3c7f0705 /src
parentec480361d4f8a22ade0830a7f7738fbf68394083 (diff)
downloademacs-3cc67a4dc3672a38049698a9246c0664b83413cd.tar.gz
emacs-3cc67a4dc3672a38049698a9246c0664b83413cd.zip
(string_xstring_p): Check by (C >= 0x100).
(find_charsets_in_text): Format of the arc CHARSETS changed. New arg MULTIBYTE. (Ffind_charset_region, Ffind_charset_string): Adjusted for the change of find_charsets_in_text. (Fsplit_char): Fix doc. Never return unknown.
Diffstat (limited to 'src')
-rw-r--r--src/charset.c101
1 files changed, 47 insertions, 54 deletions
diff --git a/src/charset.c b/src/charset.c
index 88752a80b79..65e233fd498 100644
--- a/src/charset.c
+++ b/src/charset.c
@@ -1345,18 +1345,15 @@ string_xstring_p (string)
1345{ 1345{
1346 const unsigned char *p = SDATA (string); 1346 const unsigned char *p = SDATA (string);
1347 const unsigned char *endp = p + SBYTES (string); 1347 const unsigned char *endp = p + SBYTES (string);
1348 struct charset *charset;
1349 1348
1350 if (SCHARS (string) == SBYTES (string)) 1349 if (SCHARS (string) == SBYTES (string))
1351 return 0; 1350 return 0;
1352 1351
1353 charset = CHARSET_FROM_ID (charset_iso_8859_1);
1354 while (p < endp) 1352 while (p < endp)
1355 { 1353 {
1356 int c = STRING_CHAR_ADVANCE (p); 1354 int c = STRING_CHAR_ADVANCE (p);
1357 1355
1358 /* Fixme: comparison of unsigned expression < 0 is always false */ 1356 if (c >= 0x100)
1359 if (ENCODE_CHAR (charset, c) < 0)
1360 return 2; 1357 return 2;
1361 } 1358 }
1362 return 1; 1359 return 1;
@@ -1365,53 +1362,53 @@ string_xstring_p (string)
1365 1362
1366/* Find charsets in the string at PTR of NCHARS and NBYTES. 1363/* Find charsets in the string at PTR of NCHARS and NBYTES.
1367 1364
1368 CHARSETS is a vector. Each element is a cons of CHARSET and 1365 CHARSETS is a vector. If Nth element is non-nil, it means the
1369 FOUND-FLAG. CHARSET is a charset id, and FOUND-FLAG is nil or t. 1366 charset whose id is N is already found.
1370 FOUND-FLAG t (or nil) means that the corresponding charset is
1371 already found (or not yet found).
1372 1367
1373 It may lookup a translation table TABLE if supplied. */ 1368 It may lookup a translation table TABLE if supplied. */
1374 1369
1375static void 1370static void
1376find_charsets_in_text (ptr, nchars, nbytes, charsets, table) 1371find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
1377 const unsigned char *ptr; 1372 const unsigned char *ptr;
1378 EMACS_INT nchars, nbytes; 1373 EMACS_INT nchars, nbytes;
1379 Lisp_Object charsets, table; 1374 Lisp_Object charsets, table;
1375 int multibyte;
1380{ 1376{
1381 const unsigned char *pend = ptr + nbytes; 1377 const unsigned char *pend = ptr + nbytes;
1382 int ncharsets = ASIZE (charsets); 1378 int ncharsets = ASIZE (charsets);
1383 1379
1384 if (nchars == nbytes) 1380 if (nchars == nbytes)
1385 return;
1386
1387 while (ptr < pend)
1388 { 1381 {
1389 int c = STRING_CHAR_ADVANCE (ptr); 1382 if (multibyte)
1390 int i; 1383 ASET (charsets, charset_ascii, Qt);
1391 int all_found = 1; 1384 else
1392 Lisp_Object elt; 1385 while (ptr < pend)
1393 1386 {
1394 if (!NILP (table)) 1387 int c = *ptr++;
1395 c = translate_char (table, c); 1388
1396 for (i = 0; i < ncharsets; i++) 1389 if (!NILP (table))
1390 c = translate_char (table, c);
1391 if (ASCII_BYTE_P (c))
1392 ASET (charsets, charset_ascii, Qt);
1393 else
1394 ASET (charsets, charset_eight_bit, Qt);
1395 }
1396 }
1397 else
1398 {
1399 while (ptr < pend)
1397 { 1400 {
1398 elt = AREF (charsets, i); 1401 int c = STRING_CHAR_ADVANCE (ptr);
1399 if (NILP (XCDR (elt))) 1402 struct charset *charset;
1400 {
1401 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (elt)));
1402 1403
1403 if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset)) 1404 if (!NILP (table))
1404 XSETCDR (elt, Qt); 1405 c = translate_char (table, c);
1405 else 1406 charset = CHAR_CHARSET (c);
1406 all_found = 0; 1407 ASET (charsets, CHARSET_ID (charset), Qt);
1407 }
1408 } 1408 }
1409 if (all_found)
1410 break;
1411 } 1409 }
1412} 1410}
1413 1411
1414/* Fixme: returns nil for unibyte. */
1415DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region, 1412DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1416 2, 3, 0, 1413 2, 3, 0,
1417 doc: /* Return a list of charsets in the region between BEG and END. 1414 doc: /* Return a list of charsets in the region between BEG and END.
@@ -1427,6 +1424,7 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1427 EMACS_INT from, from_byte, to, stop, stop_byte; 1424 EMACS_INT from, from_byte, to, stop, stop_byte;
1428 int i; 1425 int i;
1429 Lisp_Object val; 1426 Lisp_Object val;
1427 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1430 1428
1431 validate_region (&beg, &end); 1429 validate_region (&beg, &end);
1432 from = XFASTINT (beg); 1430 from = XFASTINT (beg);
@@ -1443,13 +1441,11 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1443 from_byte = CHAR_TO_BYTE (from); 1441 from_byte = CHAR_TO_BYTE (from);
1444 1442
1445 charsets = Fmake_vector (make_number (charset_table_used), Qnil); 1443 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1446 for (i = 0; i < charset_table_used; i++)
1447 ASET (charsets, i, Fcons (make_number (i), Qnil));
1448
1449 while (1) 1444 while (1)
1450 { 1445 {
1451 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from, 1446 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1452 stop_byte - from_byte, charsets, table); 1447 stop_byte - from_byte, charsets, table,
1448 multibyte);
1453 if (stop < to) 1449 if (stop < to)
1454 { 1450 {
1455 from = stop, from_byte = stop_byte; 1451 from = stop, from_byte = stop_byte;
@@ -1461,12 +1457,11 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1461 1457
1462 val = Qnil; 1458 val = Qnil;
1463 for (i = charset_table_used - 1; i >= 0; i--) 1459 for (i = charset_table_used - 1; i >= 0; i--)
1464 if (!NILP (XCDR (AREF (charsets, i)))) 1460 if (!NILP (AREF (charsets, i)))
1465 val = Fcons (CHARSET_NAME (charset_table + i), val); 1461 val = Fcons (CHARSET_NAME (charset_table + i), val);
1466 return val; 1462 return val;
1467} 1463}
1468 1464
1469/* Fixme: returns nil for unibyte. */
1470DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string, 1465DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1471 1, 2, 0, 1466 1, 2, 0,
1472 doc: /* Return a list of charsets in STR. 1467 doc: /* Return a list of charsets in STR.
@@ -1484,14 +1479,12 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1484 CHECK_STRING (str); 1479 CHECK_STRING (str);
1485 1480
1486 charsets = Fmake_vector (make_number (charset_table_used), Qnil); 1481 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1487 for (i = 0; i < charset_table_used; i++)
1488 ASET (charsets, i, Fcons (make_number (i), Qnil));
1489 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str), 1482 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1490 charsets, table); 1483 charsets, table,
1491 1484 STRING_MULTIBYTE (str));
1492 val = Qnil; 1485 val = Qnil;
1493 for (i = charset_table_used - 1; i >= 0; i--) 1486 for (i = charset_table_used - 1; i >= 0; i--)
1494 if (!NILP (XCDR (AREF (charsets, i)))) 1487 if (!NILP (AREF (charsets, i)))
1495 val = Fcons (CHARSET_NAME (charset_table + i), val); 1488 val = Fcons (CHARSET_NAME (charset_table + i), val);
1496 return val; 1489 return val;
1497} 1490}
@@ -1846,10 +1839,12 @@ char_charset (c, charset_list, code_return)
1846} 1839}
1847 1840
1848 1841
1849/* Fixme: `unknown' can't happen now? */
1850DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0, 1842DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1851 doc: /*Return list of charset and one to three position-codes of CHAR. 1843 doc:
1852If CHAR is invalid as a character code, return a list `(unknown CHAR)'. */) 1844 /*Return list of charset and one to four position-codes of CHAR.
1845The charset is decided by the current priority order of charsets.
1846A position-code is a byte value of each dimension of the code-point of
1847CHAR in the charset. */)
1853 (ch) 1848 (ch)
1854 Lisp_Object ch; 1849 Lisp_Object ch;
1855{ 1850{
@@ -1862,18 +1857,16 @@ If CHAR is invalid as a character code, return a list `(unknown CHAR)'. */)
1862 c = XFASTINT (ch); 1857 c = XFASTINT (ch);
1863 charset = CHAR_CHARSET (c); 1858 charset = CHAR_CHARSET (c);
1864 if (! charset) 1859 if (! charset)
1865 return Fcons (intern ("unknown"), Fcons (ch, Qnil)); 1860 abort ();
1866
1867 code = ENCODE_CHAR (charset, c); 1861 code = ENCODE_CHAR (charset, c);
1868 if (code == CHARSET_INVALID_CODE (charset)) 1862 if (code == CHARSET_INVALID_CODE (charset))
1869 abort (); 1863 abort ();
1870 dimension = CHARSET_DIMENSION (charset); 1864 dimension = CHARSET_DIMENSION (charset);
1871 val = (dimension == 1 ? Fcons (make_number (code), Qnil) 1865 for (val = Qnil; dimension > 0; dimension--)
1872 : dimension == 2 ? Fcons (make_number (code >> 8), 1866 {
1873 Fcons (make_number (code & 0xFF), Qnil)) 1867 val = Fcons (make_number (code & 0xFF), val);
1874 : Fcons (make_number (code >> 16), 1868 code >>= 8;
1875 Fcons (make_number ((code >> 8) & 0xFF), 1869 }
1876 Fcons (make_number (code & 0xFF), Qnil))));
1877 return Fcons (CHARSET_NAME (charset), val); 1870 return Fcons (CHARSET_NAME (charset), val);
1878} 1871}
1879 1872