aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorRichard M. Stallman1998-03-09 00:25:30 +0000
committerRichard M. Stallman1998-03-09 00:25:30 +0000
commitab228c24d550d34f768077d6c1d047b12c8ff62d (patch)
tree126ba7a2cc9a12398e449f7ca3189731b7cf6ea2 /src
parentb8bc6df2ee9537bc601dff9f26634407ef80cfea (diff)
downloademacs-ab228c24d550d34f768077d6c1d047b12c8ff62d.tar.gz
emacs-ab228c24d550d34f768077d6c1d047b12c8ff62d.zip
(simple_search): Don't count a character until it matches!
Call set_search_regs differently in a forward search. (boyer_moore): Fix up the code that translates the pattern and loops thru equivalent characters.
Diffstat (limited to 'src')
-rw-r--r--src/search.c94
1 files changed, 59 insertions, 35 deletions
diff --git a/src/search.c b/src/search.c
index 419b20d3b8a..b6a1ba5b6f8 100644
--- a/src/search.c
+++ b/src/search.c
@@ -1304,6 +1304,7 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1304 int lim, lim_byte; 1304 int lim, lim_byte;
1305{ 1305{
1306 int multibyte = ! NILP (current_buffer->enable_multibyte_characters); 1306 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1307 int forward = n > 0;
1307 1308
1308 if (lim > pos && multibyte) 1309 if (lim > pos && multibyte)
1309 while (n > 0) 1310 while (n > 0)
@@ -1322,22 +1323,23 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1322 while (this_len > 0) 1323 while (this_len > 0)
1323 { 1324 {
1324 int charlen, buf_charlen; 1325 int charlen, buf_charlen;
1325 int pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); 1326 int pat_ch, buf_ch;
1326 int buf_ch;
1327
1328 this_len_byte -= charlen;
1329 this_len--;
1330 p += charlen;
1331 1327
1328 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1332 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte), 1329 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1333 ZV_BYTE - this_pos_byte, 1330 ZV_BYTE - this_pos_byte,
1334 buf_charlen); 1331 buf_charlen);
1335 this_pos_byte += buf_charlen;
1336 this_pos++;
1337 TRANSLATE (buf_ch, trt, buf_ch); 1332 TRANSLATE (buf_ch, trt, buf_ch);
1338 1333
1339 if (buf_ch != pat_ch) 1334 if (buf_ch != pat_ch)
1340 break; 1335 break;
1336
1337 this_len_byte -= charlen;
1338 this_len--;
1339 p += charlen;
1340
1341 this_pos_byte += buf_charlen;
1342 this_pos++;
1341 } 1343 }
1342 1344
1343 if (this_len == 0) 1345 if (this_len == 0)
@@ -1369,12 +1371,13 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1369 { 1371 {
1370 int pat_ch = *p++; 1372 int pat_ch = *p++;
1371 int buf_ch = FETCH_BYTE (this_pos); 1373 int buf_ch = FETCH_BYTE (this_pos);
1372 this_len--;
1373 this_pos++;
1374 TRANSLATE (buf_ch, trt, buf_ch); 1374 TRANSLATE (buf_ch, trt, buf_ch);
1375 1375
1376 if (buf_ch != pat_ch) 1376 if (buf_ch != pat_ch)
1377 break; 1377 break;
1378
1379 this_len--;
1380 this_pos++;
1378 } 1381 }
1379 1382
1380 if (this_len == 0) 1383 if (this_len == 0)
@@ -1407,22 +1410,22 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1407 while (this_len > 0) 1410 while (this_len > 0)
1408 { 1411 {
1409 int charlen, buf_charlen; 1412 int charlen, buf_charlen;
1410 int pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); 1413 int pat_ch, buf_ch;
1411 int buf_ch;
1412
1413 this_len_byte -= charlen;
1414 this_len--;
1415 p += charlen;
1416 1414
1415 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1417 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte), 1416 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1418 ZV_BYTE - this_pos_byte, 1417 ZV_BYTE - this_pos_byte,
1419 buf_charlen); 1418 buf_charlen);
1420 this_pos_byte += buf_charlen;
1421 this_pos++;
1422 TRANSLATE (buf_ch, trt, buf_ch); 1419 TRANSLATE (buf_ch, trt, buf_ch);
1423 1420
1424 if (buf_ch != pat_ch) 1421 if (buf_ch != pat_ch)
1425 break; 1422 break;
1423
1424 this_len_byte -= charlen;
1425 this_len--;
1426 p += charlen;
1427 this_pos_byte += buf_charlen;
1428 this_pos++;
1426 } 1429 }
1427 1430
1428 if (this_len == 0) 1431 if (this_len == 0)
@@ -1454,12 +1457,12 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1454 { 1457 {
1455 int pat_ch = *p++; 1458 int pat_ch = *p++;
1456 int buf_ch = FETCH_BYTE (this_pos); 1459 int buf_ch = FETCH_BYTE (this_pos);
1457 this_len--;
1458 this_pos++;
1459 TRANSLATE (buf_ch, trt, buf_ch); 1460 TRANSLATE (buf_ch, trt, buf_ch);
1460 1461
1461 if (buf_ch != pat_ch) 1462 if (buf_ch != pat_ch)
1462 break; 1463 break;
1464 this_len--;
1465 this_pos++;
1463 } 1466 }
1464 1467
1465 if (this_len == 0) 1468 if (this_len == 0)
@@ -1477,7 +1480,10 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1477 stop: 1480 stop:
1478 if (n == 0) 1481 if (n == 0)
1479 { 1482 {
1480 set_search_regs (multibyte ? pos_byte : pos, len_byte); 1483 if (forward)
1484 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1485 else
1486 set_search_regs (multibyte ? pos_byte : pos, len_byte);
1481 1487
1482 return pos; 1488 return pos;
1483 } 1489 }
@@ -1605,9 +1611,9 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1605 while (! CHAR_HEAD_P (*charstart)) 1611 while (! CHAR_HEAD_P (*charstart))
1606 charstart--; 1612 charstart--;
1607 untranslated = STRING_CHAR (charstart, ptr - charstart + 1); 1613 untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
1608 TRANSLATE (ch, trt, untranslated); 1614 if (charset_base == (untranslated & ~0xff))
1609 if (charset_base == (ch & ~0xff))
1610 { 1615 {
1616 TRANSLATE (ch, trt, untranslated);
1611 if (! CHAR_HEAD_P (*ptr)) 1617 if (! CHAR_HEAD_P (*ptr))
1612 { 1618 {
1613 translate_prev_byte = ptr[-1]; 1619 translate_prev_byte = ptr[-1];
@@ -1616,7 +1622,10 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1616 } 1622 }
1617 } 1623 }
1618 else 1624 else
1619 this_translated = 0; 1625 {
1626 this_translated = 0;
1627 ch = *ptr;
1628 }
1620 } 1629 }
1621 else if (!multibyte) 1630 else if (!multibyte)
1622 TRANSLATE (ch, trt, *ptr); 1631 TRANSLATE (ch, trt, *ptr);
@@ -1626,23 +1635,38 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1626 this_translated = 0; 1635 this_translated = 0;
1627 } 1636 }
1628 1637
1629 k = j = (unsigned char) ch; 1638 if (ch > 0400)
1639 j = ((unsigned char) ch) | 0200;
1640 else
1641 j = (unsigned char) ch;
1642
1630 if (i == infinity) 1643 if (i == infinity)
1631 stride_for_teases = BM_tab[j]; 1644 stride_for_teases = BM_tab[j];
1645
1632 BM_tab[j] = dirlen - i; 1646 BM_tab[j] = dirlen - i;
1633 /* A translation table is accompanied by its inverse -- see */ 1647 /* A translation table is accompanied by its inverse -- see */
1634 /* comment following downcase_table for details */ 1648 /* comment following downcase_table for details */
1635 if (this_translated) 1649 if (this_translated)
1636 while (1) 1650 {
1637 { 1651 int starting_ch = ch;
1638 TRANSLATE (ch, inverse_trt, ch); 1652 int starting_j = j;
1639 /* For all the characters that map into K, 1653 while (1)
1640 set up simple_translate to map them into K. */ 1654 {
1641 simple_translate[(unsigned char) ch] = k; 1655 TRANSLATE (ch, inverse_trt, ch);
1642 if ((unsigned char) ch == k) 1656 if (ch > 0400)
1643 break; 1657 j = ((unsigned char) ch) | 0200;
1644 BM_tab[(unsigned char) ch] = dirlen - i; 1658 else
1645 } 1659 j = (unsigned char) ch;
1660
1661 /* For all the characters that map into CH,
1662 set up simple_translate to map the last byte
1663 into STARTING_J. */
1664 simple_translate[j] = starting_j;
1665 if (ch == starting_ch)
1666 break;
1667 BM_tab[j] = dirlen - i;
1668 }
1669 }
1646 } 1670 }
1647 else 1671 else
1648 { 1672 {