diff options
| author | Richard M. Stallman | 1998-02-14 08:43:17 +0000 |
|---|---|---|
| committer | Richard M. Stallman | 1998-02-14 08:43:17 +0000 |
| commit | aff2ce94e2a60d5bb769fb23e14c6d1027088989 (patch) | |
| tree | 73c35503b9e76e234a538cee9df864e0ea20c69e | |
| parent | 62f555a5af8a020b6a86f57fa440050fa429cf22 (diff) | |
| download | emacs-aff2ce94e2a60d5bb769fb23e14c6d1027088989.tar.gz emacs-aff2ce94e2a60d5bb769fb23e14c6d1027088989.zip | |
(simple_search): Call set_search_regs.
(boyer_moore): New arg CHARSET_BASE says which chars
to use the translate tables for.
(search_buffer): Properly test which chars participate in translation.
(TRANSLATE): New arg OUT. Handle non-integer in TRT.
All calls changed.
| -rw-r--r-- | src/search.c | 81 |
1 files changed, 54 insertions, 27 deletions
diff --git a/src/search.c b/src/search.c index 9781c65d8d0..d1a57887c25 100644 --- a/src/search.c +++ b/src/search.c | |||
| @@ -972,8 +972,22 @@ trivial_regexp_p (regexp) | |||
| 972 | POSIX is nonzero if we want full backtracking (POSIX style) | 972 | POSIX is nonzero if we want full backtracking (POSIX style) |
| 973 | for this pattern. 0 means backtrack only enough to get a valid match. */ | 973 | for this pattern. 0 means backtrack only enough to get a valid match. */ |
| 974 | 974 | ||
| 975 | #define TRANSLATE(trt, d) \ | 975 | #define TRANSLATE(out, trt, d) \ |
| 976 | (! NILP (trt) ? XINT (Faref (trt, make_number (d))) : (d)) | 976 | do \ |
| 977 | { \ | ||
| 978 | if (! NILP (trt)) \ | ||
| 979 | { \ | ||
| 980 | Lisp_Object temp; \ | ||
| 981 | temp = Faref (trt, make_number (d)); \ | ||
| 982 | if (INTEGERP (temp)) \ | ||
| 983 | out = XINT (temp); \ | ||
| 984 | else \ | ||
| 985 | out = d; \ | ||
| 986 | } \ | ||
| 987 | else \ | ||
| 988 | out = d; \ | ||
| 989 | } \ | ||
| 990 | while (0) | ||
| 977 | 991 | ||
| 978 | static int | 992 | static int |
| 979 | search_buffer (string, pos, pos_byte, lim, lim_byte, n, | 993 | search_buffer (string, pos, pos_byte, lim, lim_byte, n, |
| @@ -1165,7 +1179,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, | |||
| 1165 | while (--len >= 0) | 1179 | while (--len >= 0) |
| 1166 | { | 1180 | { |
| 1167 | unsigned char workbuf[4], *str; | 1181 | unsigned char workbuf[4], *str; |
| 1168 | int c, translated; | 1182 | int c, translated, inverse; |
| 1169 | int in_charlen, charlen; | 1183 | int in_charlen, charlen; |
| 1170 | 1184 | ||
| 1171 | /* If we got here and the RE flag is set, it's because we're | 1185 | /* If we got here and the RE flag is set, it's because we're |
| @@ -1180,7 +1194,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, | |||
| 1180 | 1194 | ||
| 1181 | c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen); | 1195 | c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen); |
| 1182 | /* Translate the character, if requested. */ | 1196 | /* Translate the character, if requested. */ |
| 1183 | translated = TRANSLATE (trt, c); | 1197 | TRANSLATE (translated, trt, c); |
| 1184 | /* If translation changed the byte-length, go back | 1198 | /* If translation changed the byte-length, go back |
| 1185 | to the original character. */ | 1199 | to the original character. */ |
| 1186 | charlen = CHAR_STRING (translated, workbuf, str); | 1200 | charlen = CHAR_STRING (translated, workbuf, str); |
| @@ -1190,10 +1204,11 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, | |||
| 1190 | charlen = CHAR_STRING (c, workbuf, str); | 1204 | charlen = CHAR_STRING (c, workbuf, str); |
| 1191 | } | 1205 | } |
| 1192 | 1206 | ||
| 1207 | TRANSLATE (inverse, inverse_trt, c); | ||
| 1208 | |||
| 1193 | /* Did this char actually get translated? | 1209 | /* Did this char actually get translated? |
| 1194 | Would any other char get translated into it? */ | 1210 | Would any other char get translated into it? */ |
| 1195 | if (translated != c | 1211 | if (translated != c || inverse != c) |
| 1196 | || TRANSLATE (inverse_trt, c) != c) | ||
| 1197 | { | 1212 | { |
| 1198 | /* Keep track of which character set row | 1213 | /* Keep track of which character set row |
| 1199 | contains the characters that need translation. */ | 1214 | contains the characters that need translation. */ |
| @@ -1206,7 +1221,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, | |||
| 1206 | simple = 0; | 1221 | simple = 0; |
| 1207 | /* ??? Handa: this must do simple = 0 | 1222 | /* ??? Handa: this must do simple = 0 |
| 1208 | if c is a composite character. */ | 1223 | if c is a composite character. */ |
| 1209 | } | 1224 | } |
| 1210 | 1225 | ||
| 1211 | /* Store this character into the translated pattern. */ | 1226 | /* Store this character into the translated pattern. */ |
| 1212 | bcopy (str, pat, charlen); | 1227 | bcopy (str, pat, charlen); |
| @@ -1219,7 +1234,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, | |||
| 1219 | { | 1234 | { |
| 1220 | while (--len >= 0) | 1235 | while (--len >= 0) |
| 1221 | { | 1236 | { |
| 1222 | int c, translated; | 1237 | int c, translated, inverse; |
| 1223 | 1238 | ||
| 1224 | /* If we got here and the RE flag is set, it's because we're | 1239 | /* If we got here and the RE flag is set, it's because we're |
| 1225 | dealing with a regexp known to be trivial, so the backslash | 1240 | dealing with a regexp known to be trivial, so the backslash |
| @@ -1230,12 +1245,12 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, | |||
| 1230 | base_pat++; | 1245 | base_pat++; |
| 1231 | } | 1246 | } |
| 1232 | c = *base_pat++; | 1247 | c = *base_pat++; |
| 1233 | translated = TRANSLATE (trt, c); | 1248 | TRANSLATE (translated, trt, c); |
| 1249 | TRANSLATE (inverse, inverse_trt, c); | ||
| 1234 | 1250 | ||
| 1235 | /* Did this char actually get translated? | 1251 | /* Did this char actually get translated? |
| 1236 | Would any other char get translated into it? */ | 1252 | Would any other char get translated into it? */ |
| 1237 | if (translated != c | 1253 | if (translated != c || inverse != c) |
| 1238 | || TRANSLATE (inverse_trt, c) != c) | ||
| 1239 | { | 1254 | { |
| 1240 | /* Keep track of which character set row | 1255 | /* Keep track of which character set row |
| 1241 | contains the characters that need translation. */ | 1256 | contains the characters that need translation. */ |
| @@ -1246,7 +1261,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, | |||
| 1246 | /* If two different rows appear, needing translation, | 1261 | /* If two different rows appear, needing translation, |
| 1247 | then we cannot use boyer_moore search. */ | 1262 | then we cannot use boyer_moore search. */ |
| 1248 | simple = 0; | 1263 | simple = 0; |
| 1249 | } | 1264 | } |
| 1250 | *pat++ = translated; | 1265 | *pat++ = translated; |
| 1251 | } | 1266 | } |
| 1252 | } | 1267 | } |
| @@ -1257,7 +1272,8 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, | |||
| 1257 | 1272 | ||
| 1258 | if (simple) | 1273 | if (simple) |
| 1259 | return boyer_moore (n, pat, len, len_byte, trt, inverse_trt, | 1274 | return boyer_moore (n, pat, len, len_byte, trt, inverse_trt, |
| 1260 | pos, pos_byte, lim, lim_byte); | 1275 | pos, pos_byte, lim, lim_byte, |
| 1276 | charset_base); | ||
| 1261 | else | 1277 | else |
| 1262 | return simple_search (n, pat, len, len_byte, trt, | 1278 | return simple_search (n, pat, len, len_byte, trt, |
| 1263 | pos, pos_byte, lim, lim_byte); | 1279 | pos, pos_byte, lim, lim_byte); |
| @@ -1316,7 +1332,7 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte) | |||
| 1316 | buf_charlen); | 1332 | buf_charlen); |
| 1317 | this_pos_byte += buf_charlen; | 1333 | this_pos_byte += buf_charlen; |
| 1318 | this_pos++; | 1334 | this_pos++; |
| 1319 | buf_ch = TRANSLATE (trt, buf_ch); | 1335 | TRANSLATE (buf_ch, trt, buf_ch); |
| 1320 | 1336 | ||
| 1321 | if (buf_ch != pat_ch) | 1337 | if (buf_ch != pat_ch) |
| 1322 | break; | 1338 | break; |
| @@ -1353,7 +1369,7 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte) | |||
| 1353 | int buf_ch = FETCH_BYTE (this_pos); | 1369 | int buf_ch = FETCH_BYTE (this_pos); |
| 1354 | this_len--; | 1370 | this_len--; |
| 1355 | this_pos++; | 1371 | this_pos++; |
| 1356 | buf_ch = TRANSLATE (trt, buf_ch); | 1372 | TRANSLATE (buf_ch, trt, buf_ch); |
| 1357 | 1373 | ||
| 1358 | if (buf_ch != pat_ch) | 1374 | if (buf_ch != pat_ch) |
| 1359 | break; | 1375 | break; |
| @@ -1401,7 +1417,7 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte) | |||
| 1401 | buf_charlen); | 1417 | buf_charlen); |
| 1402 | this_pos_byte += buf_charlen; | 1418 | this_pos_byte += buf_charlen; |
| 1403 | this_pos++; | 1419 | this_pos++; |
| 1404 | buf_ch = TRANSLATE (trt, buf_ch); | 1420 | TRANSLATE (buf_ch, trt, buf_ch); |
| 1405 | 1421 | ||
| 1406 | if (buf_ch != pat_ch) | 1422 | if (buf_ch != pat_ch) |
| 1407 | break; | 1423 | break; |
| @@ -1438,7 +1454,7 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte) | |||
| 1438 | int buf_ch = FETCH_BYTE (this_pos); | 1454 | int buf_ch = FETCH_BYTE (this_pos); |
| 1439 | this_len--; | 1455 | this_len--; |
| 1440 | this_pos++; | 1456 | this_pos++; |
| 1441 | buf_ch = TRANSLATE (trt, buf_ch); | 1457 | TRANSLATE (buf_ch, trt, buf_ch); |
| 1442 | 1458 | ||
| 1443 | if (buf_ch != pat_ch) | 1459 | if (buf_ch != pat_ch) |
| 1444 | break; | 1460 | break; |
| @@ -1458,7 +1474,11 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte) | |||
| 1458 | 1474 | ||
| 1459 | stop: | 1475 | stop: |
| 1460 | if (n == 0) | 1476 | if (n == 0) |
| 1461 | return pos; | 1477 | { |
| 1478 | set_search_regs (multibyte ? pos_byte : pos, len_byte); | ||
| 1479 | |||
| 1480 | return pos; | ||
| 1481 | } | ||
| 1462 | else if (n > 0) | 1482 | else if (n > 0) |
| 1463 | return -n; | 1483 | return -n; |
| 1464 | else | 1484 | else |
| @@ -1480,7 +1500,7 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte) | |||
| 1480 | 1500 | ||
| 1481 | static int | 1501 | static int |
| 1482 | boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, | 1502 | boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, |
| 1483 | pos, pos_byte, lim, lim_byte) | 1503 | pos, pos_byte, lim, lim_byte, charset_base) |
| 1484 | int n; | 1504 | int n; |
| 1485 | unsigned char *base_pat; | 1505 | unsigned char *base_pat; |
| 1486 | int len, len_byte; | 1506 | int len, len_byte; |
| @@ -1488,6 +1508,7 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, | |||
| 1488 | Lisp_Object inverse_trt; | 1508 | Lisp_Object inverse_trt; |
| 1489 | int pos, pos_byte; | 1509 | int pos, pos_byte; |
| 1490 | int lim, lim_byte; | 1510 | int lim, lim_byte; |
| 1511 | int charset_base; | ||
| 1491 | { | 1512 | { |
| 1492 | int direction = ((n > 0) ? 1 : -1); | 1513 | int direction = ((n > 0) ? 1 : -1); |
| 1493 | register int dirlen; | 1514 | register int dirlen; |
| @@ -1572,6 +1593,7 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, | |||
| 1572 | if (! NILP (trt)) | 1593 | if (! NILP (trt)) |
| 1573 | { | 1594 | { |
| 1574 | int ch; | 1595 | int ch; |
| 1596 | int untranslated; | ||
| 1575 | int this_translated = 1; | 1597 | int this_translated = 1; |
| 1576 | 1598 | ||
| 1577 | if (multibyte | 1599 | if (multibyte |
| @@ -1580,17 +1602,22 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, | |||
| 1580 | unsigned char *charstart = ptr; | 1602 | unsigned char *charstart = ptr; |
| 1581 | while (! CHAR_HEAD_P (*charstart)) | 1603 | while (! CHAR_HEAD_P (*charstart)) |
| 1582 | charstart--; | 1604 | charstart--; |
| 1583 | if (! CHAR_HEAD_P (*ptr)) | 1605 | untranslated = STRING_CHAR (charstart, ptr - charstart + 1); |
| 1606 | TRANSLATE (ch, trt, untranslated); | ||
| 1607 | if (charset_base == (ch & ~0xff)) | ||
| 1584 | { | 1608 | { |
| 1585 | translate_prev_byte = ptr[-1]; | 1609 | if (! CHAR_HEAD_P (*ptr)) |
| 1586 | if (! CHAR_HEAD_P (translate_prev_byte)) | 1610 | { |
| 1587 | translate_anteprev_byte = ptr[-2]; | 1611 | translate_prev_byte = ptr[-1]; |
| 1612 | if (! CHAR_HEAD_P (translate_prev_byte)) | ||
| 1613 | translate_anteprev_byte = ptr[-2]; | ||
| 1614 | } | ||
| 1588 | } | 1615 | } |
| 1589 | ch = STRING_CHAR (charstart, ptr - charstart + 1); | 1616 | else |
| 1590 | ch = TRANSLATE (trt, ch); | 1617 | this_translated = 0; |
| 1591 | } | 1618 | } |
| 1592 | else if (!multibyte) | 1619 | else if (!multibyte) |
| 1593 | ch = TRANSLATE (trt, *ptr); | 1620 | TRANSLATE (ch, trt, *ptr); |
| 1594 | else | 1621 | else |
| 1595 | { | 1622 | { |
| 1596 | ch = *ptr; | 1623 | ch = *ptr; |
| @@ -1606,7 +1633,7 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, | |||
| 1606 | if (this_translated) | 1633 | if (this_translated) |
| 1607 | while (1) | 1634 | while (1) |
| 1608 | { | 1635 | { |
| 1609 | ch = TRANSLATE (inverse_trt, ch); | 1636 | TRANSLATE (ch, inverse_trt, ch); |
| 1610 | /* For all the characters that map into K, | 1637 | /* For all the characters that map into K, |
| 1611 | set up simple_translate to map them into K. */ | 1638 | set up simple_translate to map them into K. */ |
| 1612 | simple_translate[(unsigned char) ch] = k; | 1639 | simple_translate[(unsigned char) ch] = k; |