aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorRichard M. Stallman1998-02-14 08:43:17 +0000
committerRichard M. Stallman1998-02-14 08:43:17 +0000
commitaff2ce94e2a60d5bb769fb23e14c6d1027088989 (patch)
tree73c35503b9e76e234a538cee9df864e0ea20c69e /src
parent62f555a5af8a020b6a86f57fa440050fa429cf22 (diff)
downloademacs-aff2ce94e2a60d5bb769fb23e14c6d1027088989.tar.gz
emacs-aff2ce94e2a60d5bb769fb23e14c6d1027088989.zip
(simple_search): Call set_search_regs.
(boyer_moore): New arg CHARSET_BASE says which chars to use the translate tables for. (search_buffer): Properly test which chars participate in translation. (TRANSLATE): New arg OUT. Handle non-integer in TRT. All calls changed.
Diffstat (limited to 'src')
-rw-r--r--src/search.c81
1 files changed, 54 insertions, 27 deletions
diff --git a/src/search.c b/src/search.c
index 9781c65d8d0..d1a57887c25 100644
--- a/src/search.c
+++ b/src/search.c
@@ -972,8 +972,22 @@ trivial_regexp_p (regexp)
972 POSIX is nonzero if we want full backtracking (POSIX style) 972 POSIX is nonzero if we want full backtracking (POSIX style)
973 for this pattern. 0 means backtrack only enough to get a valid match. */ 973 for this pattern. 0 means backtrack only enough to get a valid match. */
974 974
975#define TRANSLATE(trt, d) \ 975#define TRANSLATE(out, trt, d) \
976 (! NILP (trt) ? XINT (Faref (trt, make_number (d))) : (d)) 976do \
977 { \
978 if (! NILP (trt)) \
979 { \
980 Lisp_Object temp; \
981 temp = Faref (trt, make_number (d)); \
982 if (INTEGERP (temp)) \
983 out = XINT (temp); \
984 else \
985 out = d; \
986 } \
987 else \
988 out = d; \
989 } \
990while (0)
977 991
978static int 992static int
979search_buffer (string, pos, pos_byte, lim, lim_byte, n, 993search_buffer (string, pos, pos_byte, lim, lim_byte, n,
@@ -1165,7 +1179,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1165 while (--len >= 0) 1179 while (--len >= 0)
1166 { 1180 {
1167 unsigned char workbuf[4], *str; 1181 unsigned char workbuf[4], *str;
1168 int c, translated; 1182 int c, translated, inverse;
1169 int in_charlen, charlen; 1183 int in_charlen, charlen;
1170 1184
1171 /* If we got here and the RE flag is set, it's because we're 1185 /* If we got here and the RE flag is set, it's because we're
@@ -1180,7 +1194,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1180 1194
1181 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen); 1195 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1182 /* Translate the character, if requested. */ 1196 /* Translate the character, if requested. */
1183 translated = TRANSLATE (trt, c); 1197 TRANSLATE (translated, trt, c);
1184 /* If translation changed the byte-length, go back 1198 /* If translation changed the byte-length, go back
1185 to the original character. */ 1199 to the original character. */
1186 charlen = CHAR_STRING (translated, workbuf, str); 1200 charlen = CHAR_STRING (translated, workbuf, str);
@@ -1190,10 +1204,11 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1190 charlen = CHAR_STRING (c, workbuf, str); 1204 charlen = CHAR_STRING (c, workbuf, str);
1191 } 1205 }
1192 1206
1207 TRANSLATE (inverse, inverse_trt, c);
1208
1193 /* Did this char actually get translated? 1209 /* Did this char actually get translated?
1194 Would any other char get translated into it? */ 1210 Would any other char get translated into it? */
1195 if (translated != c 1211 if (translated != c || inverse != c)
1196 || TRANSLATE (inverse_trt, c) != c)
1197 { 1212 {
1198 /* Keep track of which character set row 1213 /* Keep track of which character set row
1199 contains the characters that need translation. */ 1214 contains the characters that need translation. */
@@ -1206,7 +1221,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1206 simple = 0; 1221 simple = 0;
1207 /* ??? Handa: this must do simple = 0 1222 /* ??? Handa: this must do simple = 0
1208 if c is a composite character. */ 1223 if c is a composite character. */
1209 } 1224 }
1210 1225
1211 /* Store this character into the translated pattern. */ 1226 /* Store this character into the translated pattern. */
1212 bcopy (str, pat, charlen); 1227 bcopy (str, pat, charlen);
@@ -1219,7 +1234,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1219 { 1234 {
1220 while (--len >= 0) 1235 while (--len >= 0)
1221 { 1236 {
1222 int c, translated; 1237 int c, translated, inverse;
1223 1238
1224 /* If we got here and the RE flag is set, it's because we're 1239 /* If we got here and the RE flag is set, it's because we're
1225 dealing with a regexp known to be trivial, so the backslash 1240 dealing with a regexp known to be trivial, so the backslash
@@ -1230,12 +1245,12 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1230 base_pat++; 1245 base_pat++;
1231 } 1246 }
1232 c = *base_pat++; 1247 c = *base_pat++;
1233 translated = TRANSLATE (trt, c); 1248 TRANSLATE (translated, trt, c);
1249 TRANSLATE (inverse, inverse_trt, c);
1234 1250
1235 /* Did this char actually get translated? 1251 /* Did this char actually get translated?
1236 Would any other char get translated into it? */ 1252 Would any other char get translated into it? */
1237 if (translated != c 1253 if (translated != c || inverse != c)
1238 || TRANSLATE (inverse_trt, c) != c)
1239 { 1254 {
1240 /* Keep track of which character set row 1255 /* Keep track of which character set row
1241 contains the characters that need translation. */ 1256 contains the characters that need translation. */
@@ -1246,7 +1261,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1246 /* If two different rows appear, needing translation, 1261 /* If two different rows appear, needing translation,
1247 then we cannot use boyer_moore search. */ 1262 then we cannot use boyer_moore search. */
1248 simple = 0; 1263 simple = 0;
1249 } 1264 }
1250 *pat++ = translated; 1265 *pat++ = translated;
1251 } 1266 }
1252 } 1267 }
@@ -1257,7 +1272,8 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1257 1272
1258 if (simple) 1273 if (simple)
1259 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt, 1274 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1260 pos, pos_byte, lim, lim_byte); 1275 pos, pos_byte, lim, lim_byte,
1276 charset_base);
1261 else 1277 else
1262 return simple_search (n, pat, len, len_byte, trt, 1278 return simple_search (n, pat, len, len_byte, trt,
1263 pos, pos_byte, lim, lim_byte); 1279 pos, pos_byte, lim, lim_byte);
@@ -1316,7 +1332,7 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1316 buf_charlen); 1332 buf_charlen);
1317 this_pos_byte += buf_charlen; 1333 this_pos_byte += buf_charlen;
1318 this_pos++; 1334 this_pos++;
1319 buf_ch = TRANSLATE (trt, buf_ch); 1335 TRANSLATE (buf_ch, trt, buf_ch);
1320 1336
1321 if (buf_ch != pat_ch) 1337 if (buf_ch != pat_ch)
1322 break; 1338 break;
@@ -1353,7 +1369,7 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1353 int buf_ch = FETCH_BYTE (this_pos); 1369 int buf_ch = FETCH_BYTE (this_pos);
1354 this_len--; 1370 this_len--;
1355 this_pos++; 1371 this_pos++;
1356 buf_ch = TRANSLATE (trt, buf_ch); 1372 TRANSLATE (buf_ch, trt, buf_ch);
1357 1373
1358 if (buf_ch != pat_ch) 1374 if (buf_ch != pat_ch)
1359 break; 1375 break;
@@ -1401,7 +1417,7 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1401 buf_charlen); 1417 buf_charlen);
1402 this_pos_byte += buf_charlen; 1418 this_pos_byte += buf_charlen;
1403 this_pos++; 1419 this_pos++;
1404 buf_ch = TRANSLATE (trt, buf_ch); 1420 TRANSLATE (buf_ch, trt, buf_ch);
1405 1421
1406 if (buf_ch != pat_ch) 1422 if (buf_ch != pat_ch)
1407 break; 1423 break;
@@ -1438,7 +1454,7 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1438 int buf_ch = FETCH_BYTE (this_pos); 1454 int buf_ch = FETCH_BYTE (this_pos);
1439 this_len--; 1455 this_len--;
1440 this_pos++; 1456 this_pos++;
1441 buf_ch = TRANSLATE (trt, buf_ch); 1457 TRANSLATE (buf_ch, trt, buf_ch);
1442 1458
1443 if (buf_ch != pat_ch) 1459 if (buf_ch != pat_ch)
1444 break; 1460 break;
@@ -1458,7 +1474,11 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1458 1474
1459 stop: 1475 stop:
1460 if (n == 0) 1476 if (n == 0)
1461 return pos; 1477 {
1478 set_search_regs (multibyte ? pos_byte : pos, len_byte);
1479
1480 return pos;
1481 }
1462 else if (n > 0) 1482 else if (n > 0)
1463 return -n; 1483 return -n;
1464 else 1484 else
@@ -1480,7 +1500,7 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1480 1500
1481static int 1501static int
1482boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, 1502boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1483 pos, pos_byte, lim, lim_byte) 1503 pos, pos_byte, lim, lim_byte, charset_base)
1484 int n; 1504 int n;
1485 unsigned char *base_pat; 1505 unsigned char *base_pat;
1486 int len, len_byte; 1506 int len, len_byte;
@@ -1488,6 +1508,7 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1488 Lisp_Object inverse_trt; 1508 Lisp_Object inverse_trt;
1489 int pos, pos_byte; 1509 int pos, pos_byte;
1490 int lim, lim_byte; 1510 int lim, lim_byte;
1511 int charset_base;
1491{ 1512{
1492 int direction = ((n > 0) ? 1 : -1); 1513 int direction = ((n > 0) ? 1 : -1);
1493 register int dirlen; 1514 register int dirlen;
@@ -1572,6 +1593,7 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1572 if (! NILP (trt)) 1593 if (! NILP (trt))
1573 { 1594 {
1574 int ch; 1595 int ch;
1596 int untranslated;
1575 int this_translated = 1; 1597 int this_translated = 1;
1576 1598
1577 if (multibyte 1599 if (multibyte
@@ -1580,17 +1602,22 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1580 unsigned char *charstart = ptr; 1602 unsigned char *charstart = ptr;
1581 while (! CHAR_HEAD_P (*charstart)) 1603 while (! CHAR_HEAD_P (*charstart))
1582 charstart--; 1604 charstart--;
1583 if (! CHAR_HEAD_P (*ptr)) 1605 untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
1606 TRANSLATE (ch, trt, untranslated);
1607 if (charset_base == (ch & ~0xff))
1584 { 1608 {
1585 translate_prev_byte = ptr[-1]; 1609 if (! CHAR_HEAD_P (*ptr))
1586 if (! CHAR_HEAD_P (translate_prev_byte)) 1610 {
1587 translate_anteprev_byte = ptr[-2]; 1611 translate_prev_byte = ptr[-1];
1612 if (! CHAR_HEAD_P (translate_prev_byte))
1613 translate_anteprev_byte = ptr[-2];
1614 }
1588 } 1615 }
1589 ch = STRING_CHAR (charstart, ptr - charstart + 1); 1616 else
1590 ch = TRANSLATE (trt, ch); 1617 this_translated = 0;
1591 } 1618 }
1592 else if (!multibyte) 1619 else if (!multibyte)
1593 ch = TRANSLATE (trt, *ptr); 1620 TRANSLATE (ch, trt, *ptr);
1594 else 1621 else
1595 { 1622 {
1596 ch = *ptr; 1623 ch = *ptr;
@@ -1606,7 +1633,7 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1606 if (this_translated) 1633 if (this_translated)
1607 while (1) 1634 while (1)
1608 { 1635 {
1609 ch = TRANSLATE (inverse_trt, ch); 1636 TRANSLATE (ch, inverse_trt, ch);
1610 /* For all the characters that map into K, 1637 /* For all the characters that map into K,
1611 set up simple_translate to map them into K. */ 1638 set up simple_translate to map them into K. */
1612 simple_translate[(unsigned char) ch] = k; 1639 simple_translate[(unsigned char) ch] = k;