diff options
| author | Richard M. Stallman | 1998-03-09 00:25:30 +0000 |
|---|---|---|
| committer | Richard M. Stallman | 1998-03-09 00:25:30 +0000 |
| commit | ab228c24d550d34f768077d6c1d047b12c8ff62d (patch) | |
| tree | 126ba7a2cc9a12398e449f7ca3189731b7cf6ea2 /src | |
| parent | b8bc6df2ee9537bc601dff9f26634407ef80cfea (diff) | |
| download | emacs-ab228c24d550d34f768077d6c1d047b12c8ff62d.tar.gz emacs-ab228c24d550d34f768077d6c1d047b12c8ff62d.zip | |
(simple_search): Don't count a character until it matches!
Call set_search_regs differently in a forward search.
(boyer_moore): Fix up the code that translates the pattern
and loops thru equivalent characters.
Diffstat (limited to 'src')
| -rw-r--r-- | src/search.c | 94 |
1 files changed, 59 insertions, 35 deletions
diff --git a/src/search.c b/src/search.c index 419b20d3b8a..b6a1ba5b6f8 100644 --- a/src/search.c +++ b/src/search.c | |||
| @@ -1304,6 +1304,7 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte) | |||
| 1304 | int lim, lim_byte; | 1304 | int lim, lim_byte; |
| 1305 | { | 1305 | { |
| 1306 | int multibyte = ! NILP (current_buffer->enable_multibyte_characters); | 1306 | int multibyte = ! NILP (current_buffer->enable_multibyte_characters); |
| 1307 | int forward = n > 0; | ||
| 1307 | 1308 | ||
| 1308 | if (lim > pos && multibyte) | 1309 | if (lim > pos && multibyte) |
| 1309 | while (n > 0) | 1310 | while (n > 0) |
| @@ -1322,22 +1323,23 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte) | |||
| 1322 | while (this_len > 0) | 1323 | while (this_len > 0) |
| 1323 | { | 1324 | { |
| 1324 | int charlen, buf_charlen; | 1325 | int charlen, buf_charlen; |
| 1325 | int pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); | 1326 | int pat_ch, buf_ch; |
| 1326 | int buf_ch; | ||
| 1327 | |||
| 1328 | this_len_byte -= charlen; | ||
| 1329 | this_len--; | ||
| 1330 | p += charlen; | ||
| 1331 | 1327 | ||
| 1328 | pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); | ||
| 1332 | buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte), | 1329 | buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte), |
| 1333 | ZV_BYTE - this_pos_byte, | 1330 | ZV_BYTE - this_pos_byte, |
| 1334 | buf_charlen); | 1331 | buf_charlen); |
| 1335 | this_pos_byte += buf_charlen; | ||
| 1336 | this_pos++; | ||
| 1337 | TRANSLATE (buf_ch, trt, buf_ch); | 1332 | TRANSLATE (buf_ch, trt, buf_ch); |
| 1338 | 1333 | ||
| 1339 | if (buf_ch != pat_ch) | 1334 | if (buf_ch != pat_ch) |
| 1340 | break; | 1335 | break; |
| 1336 | |||
| 1337 | this_len_byte -= charlen; | ||
| 1338 | this_len--; | ||
| 1339 | p += charlen; | ||
| 1340 | |||
| 1341 | this_pos_byte += buf_charlen; | ||
| 1342 | this_pos++; | ||
| 1341 | } | 1343 | } |
| 1342 | 1344 | ||
| 1343 | if (this_len == 0) | 1345 | if (this_len == 0) |
| @@ -1369,12 +1371,13 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte) | |||
| 1369 | { | 1371 | { |
| 1370 | int pat_ch = *p++; | 1372 | int pat_ch = *p++; |
| 1371 | int buf_ch = FETCH_BYTE (this_pos); | 1373 | int buf_ch = FETCH_BYTE (this_pos); |
| 1372 | this_len--; | ||
| 1373 | this_pos++; | ||
| 1374 | TRANSLATE (buf_ch, trt, buf_ch); | 1374 | TRANSLATE (buf_ch, trt, buf_ch); |
| 1375 | 1375 | ||
| 1376 | if (buf_ch != pat_ch) | 1376 | if (buf_ch != pat_ch) |
| 1377 | break; | 1377 | break; |
| 1378 | |||
| 1379 | this_len--; | ||
| 1380 | this_pos++; | ||
| 1378 | } | 1381 | } |
| 1379 | 1382 | ||
| 1380 | if (this_len == 0) | 1383 | if (this_len == 0) |
| @@ -1407,22 +1410,22 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte) | |||
| 1407 | while (this_len > 0) | 1410 | while (this_len > 0) |
| 1408 | { | 1411 | { |
| 1409 | int charlen, buf_charlen; | 1412 | int charlen, buf_charlen; |
| 1410 | int pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); | 1413 | int pat_ch, buf_ch; |
| 1411 | int buf_ch; | ||
| 1412 | |||
| 1413 | this_len_byte -= charlen; | ||
| 1414 | this_len--; | ||
| 1415 | p += charlen; | ||
| 1416 | 1414 | ||
| 1415 | pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); | ||
| 1417 | buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte), | 1416 | buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte), |
| 1418 | ZV_BYTE - this_pos_byte, | 1417 | ZV_BYTE - this_pos_byte, |
| 1419 | buf_charlen); | 1418 | buf_charlen); |
| 1420 | this_pos_byte += buf_charlen; | ||
| 1421 | this_pos++; | ||
| 1422 | TRANSLATE (buf_ch, trt, buf_ch); | 1419 | TRANSLATE (buf_ch, trt, buf_ch); |
| 1423 | 1420 | ||
| 1424 | if (buf_ch != pat_ch) | 1421 | if (buf_ch != pat_ch) |
| 1425 | break; | 1422 | break; |
| 1423 | |||
| 1424 | this_len_byte -= charlen; | ||
| 1425 | this_len--; | ||
| 1426 | p += charlen; | ||
| 1427 | this_pos_byte += buf_charlen; | ||
| 1428 | this_pos++; | ||
| 1426 | } | 1429 | } |
| 1427 | 1430 | ||
| 1428 | if (this_len == 0) | 1431 | if (this_len == 0) |
| @@ -1454,12 +1457,12 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte) | |||
| 1454 | { | 1457 | { |
| 1455 | int pat_ch = *p++; | 1458 | int pat_ch = *p++; |
| 1456 | int buf_ch = FETCH_BYTE (this_pos); | 1459 | int buf_ch = FETCH_BYTE (this_pos); |
| 1457 | this_len--; | ||
| 1458 | this_pos++; | ||
| 1459 | TRANSLATE (buf_ch, trt, buf_ch); | 1460 | TRANSLATE (buf_ch, trt, buf_ch); |
| 1460 | 1461 | ||
| 1461 | if (buf_ch != pat_ch) | 1462 | if (buf_ch != pat_ch) |
| 1462 | break; | 1463 | break; |
| 1464 | this_len--; | ||
| 1465 | this_pos++; | ||
| 1463 | } | 1466 | } |
| 1464 | 1467 | ||
| 1465 | if (this_len == 0) | 1468 | if (this_len == 0) |
| @@ -1477,7 +1480,10 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte) | |||
| 1477 | stop: | 1480 | stop: |
| 1478 | if (n == 0) | 1481 | if (n == 0) |
| 1479 | { | 1482 | { |
| 1480 | set_search_regs (multibyte ? pos_byte : pos, len_byte); | 1483 | if (forward) |
| 1484 | set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte); | ||
| 1485 | else | ||
| 1486 | set_search_regs (multibyte ? pos_byte : pos, len_byte); | ||
| 1481 | 1487 | ||
| 1482 | return pos; | 1488 | return pos; |
| 1483 | } | 1489 | } |
| @@ -1605,9 +1611,9 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, | |||
| 1605 | while (! CHAR_HEAD_P (*charstart)) | 1611 | while (! CHAR_HEAD_P (*charstart)) |
| 1606 | charstart--; | 1612 | charstart--; |
| 1607 | untranslated = STRING_CHAR (charstart, ptr - charstart + 1); | 1613 | untranslated = STRING_CHAR (charstart, ptr - charstart + 1); |
| 1608 | TRANSLATE (ch, trt, untranslated); | 1614 | if (charset_base == (untranslated & ~0xff)) |
| 1609 | if (charset_base == (ch & ~0xff)) | ||
| 1610 | { | 1615 | { |
| 1616 | TRANSLATE (ch, trt, untranslated); | ||
| 1611 | if (! CHAR_HEAD_P (*ptr)) | 1617 | if (! CHAR_HEAD_P (*ptr)) |
| 1612 | { | 1618 | { |
| 1613 | translate_prev_byte = ptr[-1]; | 1619 | translate_prev_byte = ptr[-1]; |
| @@ -1616,7 +1622,10 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, | |||
| 1616 | } | 1622 | } |
| 1617 | } | 1623 | } |
| 1618 | else | 1624 | else |
| 1619 | this_translated = 0; | 1625 | { |
| 1626 | this_translated = 0; | ||
| 1627 | ch = *ptr; | ||
| 1628 | } | ||
| 1620 | } | 1629 | } |
| 1621 | else if (!multibyte) | 1630 | else if (!multibyte) |
| 1622 | TRANSLATE (ch, trt, *ptr); | 1631 | TRANSLATE (ch, trt, *ptr); |
| @@ -1626,23 +1635,38 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, | |||
| 1626 | this_translated = 0; | 1635 | this_translated = 0; |
| 1627 | } | 1636 | } |
| 1628 | 1637 | ||
| 1629 | k = j = (unsigned char) ch; | 1638 | if (ch > 0400) |
| 1639 | j = ((unsigned char) ch) | 0200; | ||
| 1640 | else | ||
| 1641 | j = (unsigned char) ch; | ||
| 1642 | |||
| 1630 | if (i == infinity) | 1643 | if (i == infinity) |
| 1631 | stride_for_teases = BM_tab[j]; | 1644 | stride_for_teases = BM_tab[j]; |
| 1645 | |||
| 1632 | BM_tab[j] = dirlen - i; | 1646 | BM_tab[j] = dirlen - i; |
| 1633 | /* A translation table is accompanied by its inverse -- see */ | 1647 | /* A translation table is accompanied by its inverse -- see */ |
| 1634 | /* comment following downcase_table for details */ | 1648 | /* comment following downcase_table for details */ |
| 1635 | if (this_translated) | 1649 | if (this_translated) |
| 1636 | while (1) | 1650 | { |
| 1637 | { | 1651 | int starting_ch = ch; |
| 1638 | TRANSLATE (ch, inverse_trt, ch); | 1652 | int starting_j = j; |
| 1639 | /* For all the characters that map into K, | 1653 | while (1) |
| 1640 | set up simple_translate to map them into K. */ | 1654 | { |
| 1641 | simple_translate[(unsigned char) ch] = k; | 1655 | TRANSLATE (ch, inverse_trt, ch); |
| 1642 | if ((unsigned char) ch == k) | 1656 | if (ch > 0400) |
| 1643 | break; | 1657 | j = ((unsigned char) ch) | 0200; |
| 1644 | BM_tab[(unsigned char) ch] = dirlen - i; | 1658 | else |
| 1645 | } | 1659 | j = (unsigned char) ch; |
| 1660 | |||
| 1661 | /* For all the characters that map into CH, | ||
| 1662 | set up simple_translate to map the last byte | ||
| 1663 | into STARTING_J. */ | ||
| 1664 | simple_translate[j] = starting_j; | ||
| 1665 | if (ch == starting_ch) | ||
| 1666 | break; | ||
| 1667 | BM_tab[j] = dirlen - i; | ||
| 1668 | } | ||
| 1669 | } | ||
| 1646 | } | 1670 | } |
| 1647 | else | 1671 | else |
| 1648 | { | 1672 | { |