diff options
| author | Kenichi Handa | 1998-12-15 04:35:38 +0000 |
|---|---|---|
| committer | Kenichi Handa | 1998-12-15 04:35:38 +0000 |
| commit | 040272ceb2552a84171db18185f53fccbb9fb12d (patch) | |
| tree | e5625147f556c58b8644a544195717a53494f089 /src | |
| parent | 42912be3607fc4f841e612c50e94ab2e7ca1ad9e (diff) | |
| download | emacs-040272ceb2552a84171db18185f53fccbb9fb12d.tar.gz emacs-040272ceb2552a84171db18185f53fccbb9fb12d.zip | |
(search_buffer): Don't use Boyer-Moore
to search for an invalid multibyte code.
In unibyte case, no need to check whether there are translations
in more than one charset; just set charset_base to 0.
Diffstat (limited to 'src')
| -rw-r--r-- | src/search.c | 34 |
1 files changed, 13 insertions, 21 deletions
diff --git a/src/search.c b/src/search.c index e4877b5f498..fee6b835bd3 100644 --- a/src/search.c +++ b/src/search.c | |||
| @@ -1142,7 +1142,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, | |||
| 1142 | int multibyte = !NILP (current_buffer->enable_multibyte_characters); | 1142 | int multibyte = !NILP (current_buffer->enable_multibyte_characters); |
| 1143 | unsigned char *base_pat = XSTRING (string)->data; | 1143 | unsigned char *base_pat = XSTRING (string)->data; |
| 1144 | int charset_base = -1; | 1144 | int charset_base = -1; |
| 1145 | int simple = 1; | 1145 | int boyer_moore_ok = 1; |
| 1146 | 1146 | ||
| 1147 | /* MULTIBYTE says whether the text to be searched is multibyte. | 1147 | /* MULTIBYTE says whether the text to be searched is multibyte. |
| 1148 | We must convert PATTERN to match that, or we will not really | 1148 | We must convert PATTERN to match that, or we will not really |
| @@ -1204,6 +1204,12 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, | |||
| 1204 | } | 1204 | } |
| 1205 | 1205 | ||
| 1206 | c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen); | 1206 | c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen); |
| 1207 | |||
| 1208 | /* If we are searching for something strange, | ||
| 1209 | an invalid multibyte code, don't use boyer-moore. */ | ||
| 1210 | if (! ASCII_BYTE_P (c)) | ||
| 1211 | boyer_moore_ok = 0; | ||
| 1212 | |||
| 1207 | /* Translate the character, if requested. */ | 1213 | /* Translate the character, if requested. */ |
| 1208 | TRANSLATE (translated, trt, c); | 1214 | TRANSLATE (translated, trt, c); |
| 1209 | /* If translation changed the byte-length, go back | 1215 | /* If translation changed the byte-length, go back |
| @@ -1229,8 +1235,8 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, | |||
| 1229 | else if (charset_base != charset_base_code) | 1235 | else if (charset_base != charset_base_code) |
| 1230 | /* If two different rows appear, needing translation, | 1236 | /* If two different rows appear, needing translation, |
| 1231 | then we cannot use boyer_moore search. */ | 1237 | then we cannot use boyer_moore search. */ |
| 1232 | simple = 0; | 1238 | boyer_moore_ok = 0; |
| 1233 | /* ??? Handa: this must do simple = 0 | 1239 | /* ??? Handa: this must do boyer_moore_ok = 0 |
| 1234 | if c is a composite character. */ | 1240 | if c is a composite character. */ |
| 1235 | } | 1241 | } |
| 1236 | 1242 | ||
| @@ -1243,9 +1249,11 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, | |||
| 1243 | } | 1249 | } |
| 1244 | else | 1250 | else |
| 1245 | { | 1251 | { |
| 1252 | /* Unibyte buffer. */ | ||
| 1253 | charset_base = 0; | ||
| 1246 | while (--len >= 0) | 1254 | while (--len >= 0) |
| 1247 | { | 1255 | { |
| 1248 | int c, translated, inverse; | 1256 | int c, translated; |
| 1249 | 1257 | ||
| 1250 | /* If we got here and the RE flag is set, it's because we're | 1258 | /* If we got here and the RE flag is set, it's because we're |
| 1251 | dealing with a regexp known to be trivial, so the backslash | 1259 | dealing with a regexp known to be trivial, so the backslash |
| @@ -1257,22 +1265,6 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, | |||
| 1257 | } | 1265 | } |
| 1258 | c = *base_pat++; | 1266 | c = *base_pat++; |
| 1259 | TRANSLATE (translated, trt, c); | 1267 | TRANSLATE (translated, trt, c); |
| 1260 | TRANSLATE (inverse, inverse_trt, c); | ||
| 1261 | |||
| 1262 | /* Did this char actually get translated? | ||
| 1263 | Would any other char get translated into it? */ | ||
| 1264 | if (translated != c || inverse != c) | ||
| 1265 | { | ||
| 1266 | /* Keep track of which character set row | ||
| 1267 | contains the characters that need translation. */ | ||
| 1268 | int charset_base_code = c & ~0xff; | ||
| 1269 | if (charset_base == -1) | ||
| 1270 | charset_base = charset_base_code; | ||
| 1271 | else if (charset_base != charset_base_code) | ||
| 1272 | /* If two different rows appear, needing translation, | ||
| 1273 | then we cannot use boyer_moore search. */ | ||
| 1274 | simple = 0; | ||
| 1275 | } | ||
| 1276 | *pat++ = translated; | 1268 | *pat++ = translated; |
| 1277 | } | 1269 | } |
| 1278 | } | 1270 | } |
| @@ -1281,7 +1273,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, | |||
| 1281 | len = raw_pattern_size; | 1273 | len = raw_pattern_size; |
| 1282 | pat = base_pat = patbuf; | 1274 | pat = base_pat = patbuf; |
| 1283 | 1275 | ||
| 1284 | if (simple) | 1276 | if (boyer_moore_ok) |
| 1285 | return boyer_moore (n, pat, len, len_byte, trt, inverse_trt, | 1277 | return boyer_moore (n, pat, len, len_byte, trt, inverse_trt, |
| 1286 | pos, pos_byte, lim, lim_byte, | 1278 | pos, pos_byte, lim, lim_byte, |
| 1287 | charset_base); | 1279 | charset_base); |