aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorKenichi Handa1998-12-15 04:35:38 +0000
committerKenichi Handa1998-12-15 04:35:38 +0000
commit040272ceb2552a84171db18185f53fccbb9fb12d (patch)
treee5625147f556c58b8644a544195717a53494f089 /src
parent42912be3607fc4f841e612c50e94ab2e7ca1ad9e (diff)
downloademacs-040272ceb2552a84171db18185f53fccbb9fb12d.tar.gz
emacs-040272ceb2552a84171db18185f53fccbb9fb12d.zip
(search_buffer): Don't use Boyer-Moore
to search for an invalid multibyte code. In unibyte case, no need to check whether there are translations in more than one charset; just set charset_base to 0.
Diffstat (limited to 'src')
-rw-r--r--src/search.c34
1 files changed, 13 insertions, 21 deletions
diff --git a/src/search.c b/src/search.c
index e4877b5f498..fee6b835bd3 100644
--- a/src/search.c
+++ b/src/search.c
@@ -1142,7 +1142,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1142 int multibyte = !NILP (current_buffer->enable_multibyte_characters); 1142 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1143 unsigned char *base_pat = XSTRING (string)->data; 1143 unsigned char *base_pat = XSTRING (string)->data;
1144 int charset_base = -1; 1144 int charset_base = -1;
1145 int simple = 1; 1145 int boyer_moore_ok = 1;
1146 1146
1147 /* MULTIBYTE says whether the text to be searched is multibyte. 1147 /* MULTIBYTE says whether the text to be searched is multibyte.
1148 We must convert PATTERN to match that, or we will not really 1148 We must convert PATTERN to match that, or we will not really
@@ -1204,6 +1204,12 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1204 } 1204 }
1205 1205
1206 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen); 1206 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1207
1208 /* If we are searching for something strange,
1209 an invalid multibyte code, don't use boyer-moore. */
1210 if (! ASCII_BYTE_P (c))
1211 boyer_moore_ok = 0;
1212
1207 /* Translate the character, if requested. */ 1213 /* Translate the character, if requested. */
1208 TRANSLATE (translated, trt, c); 1214 TRANSLATE (translated, trt, c);
1209 /* If translation changed the byte-length, go back 1215 /* If translation changed the byte-length, go back
@@ -1229,8 +1235,8 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1229 else if (charset_base != charset_base_code) 1235 else if (charset_base != charset_base_code)
1230 /* If two different rows appear, needing translation, 1236 /* If two different rows appear, needing translation,
1231 then we cannot use boyer_moore search. */ 1237 then we cannot use boyer_moore search. */
1232 simple = 0; 1238 boyer_moore_ok = 0;
1233 /* ??? Handa: this must do simple = 0 1239 /* ??? Handa: this must do boyer_moore_ok = 0
1234 if c is a composite character. */ 1240 if c is a composite character. */
1235 } 1241 }
1236 1242
@@ -1243,9 +1249,11 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1243 } 1249 }
1244 else 1250 else
1245 { 1251 {
1252 /* Unibyte buffer. */
1253 charset_base = 0;
1246 while (--len >= 0) 1254 while (--len >= 0)
1247 { 1255 {
1248 int c, translated, inverse; 1256 int c, translated;
1249 1257
1250 /* If we got here and the RE flag is set, it's because we're 1258 /* If we got here and the RE flag is set, it's because we're
1251 dealing with a regexp known to be trivial, so the backslash 1259 dealing with a regexp known to be trivial, so the backslash
@@ -1257,22 +1265,6 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1257 } 1265 }
1258 c = *base_pat++; 1266 c = *base_pat++;
1259 TRANSLATE (translated, trt, c); 1267 TRANSLATE (translated, trt, c);
1260 TRANSLATE (inverse, inverse_trt, c);
1261
1262 /* Did this char actually get translated?
1263 Would any other char get translated into it? */
1264 if (translated != c || inverse != c)
1265 {
1266 /* Keep track of which character set row
1267 contains the characters that need translation. */
1268 int charset_base_code = c & ~0xff;
1269 if (charset_base == -1)
1270 charset_base = charset_base_code;
1271 else if (charset_base != charset_base_code)
1272 /* If two different rows appear, needing translation,
1273 then we cannot use boyer_moore search. */
1274 simple = 0;
1275 }
1276 *pat++ = translated; 1268 *pat++ = translated;
1277 } 1269 }
1278 } 1270 }
@@ -1281,7 +1273,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1281 len = raw_pattern_size; 1273 len = raw_pattern_size;
1282 pat = base_pat = patbuf; 1274 pat = base_pat = patbuf;
1283 1275
1284 if (simple) 1276 if (boyer_moore_ok)
1285 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt, 1277 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1286 pos, pos_byte, lim, lim_byte, 1278 pos, pos_byte, lim, lim_byte,
1287 charset_base); 1279 charset_base);