diff options
| author | Richard M. Stallman | 1998-02-08 21:33:56 +0000 |
|---|---|---|
| committer | Richard M. Stallman | 1998-02-08 21:33:56 +0000 |
| commit | facdc7504890785787642585215c4193ccd4476e (patch) | |
| tree | 20ff266a268440bb22b1ead13f94d9fc5af59050 /src | |
| parent | b05525fa0df8a7a71a1b66c1e22860194bd6421c (diff) | |
| download | emacs-facdc7504890785787642585215c4193ccd4476e.tar.gz emacs-facdc7504890785787642585215c4193ccd4476e.zip | |
(boyer_moore, simple_search): New subroutines.
(search_buffer): For non-regexp, use one of those subroutines.
Args TRT and INVERSE_TRT are now Lisp_Object. Callers changed.
(compile_pattern_1): Arg TRANSLATE is now Lisp_Object. Calls changed.
(compile_pattern): Arg TRANSLATE is now Lisp_Object. Calls changed.
Diffstat (limited to 'src')
| -rw-r--r-- | src/search.c | 987 |
1 files changed, 704 insertions, 283 deletions
diff --git a/src/search.c b/src/search.c index 4404fdbe467..c3081f95657 100644 --- a/src/search.c +++ b/src/search.c | |||
| @@ -84,7 +84,8 @@ Lisp_Object Qinvalid_regexp; | |||
| 84 | 84 | ||
| 85 | static void set_search_regs (); | 85 | static void set_search_regs (); |
| 86 | static void save_search_regs (); | 86 | static void save_search_regs (); |
| 87 | 87 | static int simple_search (); | |
| 88 | static int boyer_moore (); | ||
| 88 | static int search_buffer (); | 89 | static int search_buffer (); |
| 89 | 90 | ||
| 90 | static void | 91 | static void |
| @@ -102,7 +103,7 @@ matcher_overflow () | |||
| 102 | /* Compile a regexp and signal a Lisp error if anything goes wrong. | 103 | /* Compile a regexp and signal a Lisp error if anything goes wrong. |
| 103 | PATTERN is the pattern to compile. | 104 | PATTERN is the pattern to compile. |
| 104 | CP is the place to put the result. | 105 | CP is the place to put the result. |
| 105 | TRANSLATE is a translation table for ignoring case, or NULL for none. | 106 | TRANSLATE is a translation table for ignoring case, or nil for none. |
| 106 | REGP is the structure that says where to store the "register" | 107 | REGP is the structure that says where to store the "register" |
| 107 | values that will result from matching this pattern. | 108 | values that will result from matching this pattern. |
| 108 | If it is 0, we should compile the pattern not to record any | 109 | If it is 0, we should compile the pattern not to record any |
| @@ -117,7 +118,7 @@ static void | |||
| 117 | compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte) | 118 | compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte) |
| 118 | struct regexp_cache *cp; | 119 | struct regexp_cache *cp; |
| 119 | Lisp_Object pattern; | 120 | Lisp_Object pattern; |
| 120 | Lisp_Object *translate; | 121 | Lisp_Object translate; |
| 121 | struct re_registers *regp; | 122 | struct re_registers *regp; |
| 122 | int posix; | 123 | int posix; |
| 123 | int multibyte; | 124 | int multibyte; |
| @@ -155,11 +156,11 @@ compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte) | |||
| 155 | raw_pattern_size = XSTRING (pattern)->size; | 156 | raw_pattern_size = XSTRING (pattern)->size; |
| 156 | raw_pattern = (char *) alloca (raw_pattern_size + 1); | 157 | raw_pattern = (char *) alloca (raw_pattern_size + 1); |
| 157 | copy_text (XSTRING (pattern)->data, raw_pattern, | 158 | copy_text (XSTRING (pattern)->data, raw_pattern, |
| 158 | XSTRING (pattern)->size, 1, 0); | 159 | XSTRING (pattern)->size_byte, 1, 0); |
| 159 | } | 160 | } |
| 160 | 161 | ||
| 161 | cp->regexp = Qnil; | 162 | cp->regexp = Qnil; |
| 162 | cp->buf.translate = translate; | 163 | cp->buf.translate = (! NILP (translate) ? translate : 0); |
| 163 | cp->posix = posix; | 164 | cp->posix = posix; |
| 164 | cp->buf.multibyte = multibyte; | 165 | cp->buf.multibyte = multibyte; |
| 165 | BLOCK_INPUT; | 166 | BLOCK_INPUT; |
| @@ -177,7 +178,7 @@ compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte) | |||
| 177 | /* Compile a regexp if necessary, but first check to see if there's one in | 178 | /* Compile a regexp if necessary, but first check to see if there's one in |
| 178 | the cache. | 179 | the cache. |
| 179 | PATTERN is the pattern to compile. | 180 | PATTERN is the pattern to compile. |
| 180 | TRANSLATE is a translation table for ignoring case, or NULL for none. | 181 | TRANSLATE is a translation table for ignoring case, or nil for none. |
| 181 | REGP is the structure that says where to store the "register" | 182 | REGP is the structure that says where to store the "register" |
| 182 | values that will result from matching this pattern. | 183 | values that will result from matching this pattern. |
| 183 | If it is 0, we should compile the pattern not to record any | 184 | If it is 0, we should compile the pattern not to record any |
| @@ -189,7 +190,7 @@ struct re_pattern_buffer * | |||
| 189 | compile_pattern (pattern, regp, translate, posix, multibyte) | 190 | compile_pattern (pattern, regp, translate, posix, multibyte) |
| 190 | Lisp_Object pattern; | 191 | Lisp_Object pattern; |
| 191 | struct re_registers *regp; | 192 | struct re_registers *regp; |
| 192 | Lisp_Object *translate; | 193 | Lisp_Object translate; |
| 193 | int posix, multibyte; | 194 | int posix, multibyte; |
| 194 | { | 195 | { |
| 195 | struct regexp_cache *cp, **cpp; | 196 | struct regexp_cache *cp, **cpp; |
| @@ -199,7 +200,7 @@ compile_pattern (pattern, regp, translate, posix, multibyte) | |||
| 199 | cp = *cpp; | 200 | cp = *cpp; |
| 200 | if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size | 201 | if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size |
| 201 | && !NILP (Fstring_equal (cp->regexp, pattern)) | 202 | && !NILP (Fstring_equal (cp->regexp, pattern)) |
| 202 | && cp->buf.translate == translate | 203 | && cp->buf.translate == (! NILP (translate) ? translate : 0) |
| 203 | && cp->posix == posix | 204 | && cp->posix == posix |
| 204 | && cp->buf.multibyte == multibyte) | 205 | && cp->buf.multibyte == multibyte) |
| 205 | break; | 206 | break; |
| @@ -255,7 +256,7 @@ looking_at_1 (string, posix) | |||
| 255 | CHECK_STRING (string, 0); | 256 | CHECK_STRING (string, 0); |
| 256 | bufp = compile_pattern (string, &search_regs, | 257 | bufp = compile_pattern (string, &search_regs, |
| 257 | (!NILP (current_buffer->case_fold_search) | 258 | (!NILP (current_buffer->case_fold_search) |
| 258 | ? XCHAR_TABLE (DOWNCASE_TABLE)->contents : 0), | 259 | ? DOWNCASE_TABLE : Qnil), |
| 259 | posix, | 260 | posix, |
| 260 | !NILP (current_buffer->enable_multibyte_characters)); | 261 | !NILP (current_buffer->enable_multibyte_characters)); |
| 261 | 262 | ||
| @@ -360,7 +361,7 @@ string_match_1 (regexp, string, start, posix) | |||
| 360 | 361 | ||
| 361 | bufp = compile_pattern (regexp, &search_regs, | 362 | bufp = compile_pattern (regexp, &search_regs, |
| 362 | (!NILP (current_buffer->case_fold_search) | 363 | (!NILP (current_buffer->case_fold_search) |
| 363 | ? XCHAR_TABLE (DOWNCASE_TABLE)->contents : 0), | 364 | ? DOWNCASE_TABLE : Qnil), |
| 364 | posix, | 365 | posix, |
| 365 | STRING_MULTIBYTE (string)); | 366 | STRING_MULTIBYTE (string)); |
| 366 | immediate_quit = 1; | 367 | immediate_quit = 1; |
| @@ -424,7 +425,8 @@ fast_string_match (regexp, string) | |||
| 424 | int val; | 425 | int val; |
| 425 | struct re_pattern_buffer *bufp; | 426 | struct re_pattern_buffer *bufp; |
| 426 | 427 | ||
| 427 | bufp = compile_pattern (regexp, 0, 0, 0, STRING_MULTIBYTE (string)); | 428 | bufp = compile_pattern (regexp, 0, Qnil, |
| 429 | 0, STRING_MULTIBYTE (string)); | ||
| 428 | immediate_quit = 1; | 430 | immediate_quit = 1; |
| 429 | re_match_object = string; | 431 | re_match_object = string; |
| 430 | 432 | ||
| @@ -454,7 +456,7 @@ fast_c_string_match_ignore_case (regexp, string) | |||
| 454 | regexp = string_make_unibyte (regexp); | 456 | regexp = string_make_unibyte (regexp); |
| 455 | re_match_object = Qt; | 457 | re_match_object = Qt; |
| 456 | bufp = compile_pattern (regexp, 0, | 458 | bufp = compile_pattern (regexp, 0, |
| 457 | XCHAR_TABLE (Vascii_downcase_table)->contents, 0, | 459 | Vascii_downcase_table, 0, |
| 458 | 0); | 460 | 0); |
| 459 | immediate_quit = 1; | 461 | immediate_quit = 1; |
| 460 | val = re_search (bufp, string, len, 0, len, 0); | 462 | val = re_search (bufp, string, len, 0, len, 0); |
| @@ -889,11 +891,11 @@ search_command (string, bound, noerror, count, direction, RE, posix) | |||
| 889 | 891 | ||
| 890 | np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE, | 892 | np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE, |
| 891 | (!NILP (current_buffer->case_fold_search) | 893 | (!NILP (current_buffer->case_fold_search) |
| 892 | ? XCHAR_TABLE (current_buffer->case_canon_table)->contents | 894 | ? current_buffer->case_canon_table |
| 893 | : 0), | 895 | : make_number (0)), |
| 894 | (!NILP (current_buffer->case_fold_search) | 896 | (!NILP (current_buffer->case_fold_search) |
| 895 | ? XCHAR_TABLE (current_buffer->case_eqv_table)->contents | 897 | ? current_buffer->case_eqv_table |
| 896 | : 0), | 898 | : make_number (0)), |
| 897 | posix); | 899 | posix); |
| 898 | if (np <= 0) | 900 | if (np <= 0) |
| 899 | { | 901 | { |
| @@ -963,13 +965,16 @@ trivial_regexp_p (regexp) | |||
| 963 | If N is positive, searching is forward and LIM must be greater than POS. | 965 | If N is positive, searching is forward and LIM must be greater than POS. |
| 964 | If N is negative, searching is backward and LIM must be less than POS. | 966 | If N is negative, searching is backward and LIM must be less than POS. |
| 965 | 967 | ||
| 966 | Returns -x if only N-x occurrences found (x > 0), | 968 | Returns -x if x occurrences remain to be found (x > 0), |
| 967 | or else the position at the beginning of the Nth occurrence | 969 | or else the position at the beginning of the Nth occurrence |
| 968 | (if searching backward) or the end (if searching forward). | 970 | (if searching backward) or the end (if searching forward). |
| 969 | 971 | ||
| 970 | POSIX is nonzero if we want full backtracking (POSIX style) | 972 | POSIX is nonzero if we want full backtracking (POSIX style) |
| 971 | for this pattern. 0 means backtrack only enough to get a valid match. */ | 973 | for this pattern. 0 means backtrack only enough to get a valid match. */ |
| 972 | 974 | ||
| 975 | #define TRANSLATE(trt, d) \ | ||
| 976 | (! NILP (trt) ? XINT (Faref (trt, make_number (d))) : (d)) | ||
| 977 | |||
| 973 | static int | 978 | static int |
| 974 | search_buffer (string, pos, pos_byte, lim, lim_byte, n, | 979 | search_buffer (string, pos, pos_byte, lim, lim_byte, n, |
| 975 | RE, trt, inverse_trt, posix) | 980 | RE, trt, inverse_trt, posix) |
| @@ -980,22 +985,13 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, | |||
| 980 | int lim_byte; | 985 | int lim_byte; |
| 981 | int n; | 986 | int n; |
| 982 | int RE; | 987 | int RE; |
| 983 | Lisp_Object *trt; | 988 | Lisp_Object trt; |
| 984 | Lisp_Object *inverse_trt; | 989 | Lisp_Object inverse_trt; |
| 985 | int posix; | 990 | int posix; |
| 986 | { | 991 | { |
| 987 | int len = XSTRING (string)->size; | 992 | int len = XSTRING (string)->size; |
| 988 | int len_byte = XSTRING (string)->size_byte; | 993 | int len_byte = XSTRING (string)->size_byte; |
| 989 | unsigned char *base_pat = XSTRING (string)->data; | 994 | register int i; |
| 990 | register int *BM_tab; | ||
| 991 | int *BM_tab_base; | ||
| 992 | register int direction = ((n > 0) ? 1 : -1); | ||
| 993 | register int dirlen; | ||
| 994 | int infinity, limit, k, stride_for_teases; | ||
| 995 | register unsigned char *pat, *cursor, *p_limit; | ||
| 996 | register int i, j; | ||
| 997 | unsigned char *p1, *p2; | ||
| 998 | int s1, s2; | ||
| 999 | 995 | ||
| 1000 | if (running_asynch_code) | 996 | if (running_asynch_code) |
| 1001 | save_search_regs (); | 997 | save_search_regs (); |
| @@ -1013,6 +1009,8 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, | |||
| 1013 | 1009 | ||
| 1014 | if (RE && !trivial_regexp_p (string)) | 1010 | if (RE && !trivial_regexp_p (string)) |
| 1015 | { | 1011 | { |
| 1012 | unsigned char *p1, *p2; | ||
| 1013 | int s1, s2; | ||
| 1016 | struct re_pattern_buffer *bufp; | 1014 | struct re_pattern_buffer *bufp; |
| 1017 | 1015 | ||
| 1018 | bufp = compile_pattern (string, &search_regs, trt, posix, | 1016 | bufp = compile_pattern (string, &search_regs, trt, posix, |
| @@ -1112,304 +1110,727 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, | |||
| 1112 | } | 1110 | } |
| 1113 | else /* non-RE case */ | 1111 | else /* non-RE case */ |
| 1114 | { | 1112 | { |
| 1115 | #ifdef C_ALLOCA | 1113 | unsigned char *raw_pattern, *pat; |
| 1116 | int BM_tab_space[0400]; | 1114 | int raw_pattern_size; |
| 1117 | BM_tab = &BM_tab_space[0]; | 1115 | int raw_pattern_size_byte; |
| 1118 | #else | 1116 | unsigned char *patbuf; |
| 1119 | BM_tab = (int *) alloca (0400 * sizeof (int)); | 1117 | int multibyte = !NILP (current_buffer->enable_multibyte_characters); |
| 1120 | #endif | 1118 | unsigned char *base_pat = XSTRING (string)->data; |
| 1121 | { | 1119 | int charset_base = -1; |
| 1122 | unsigned char *raw_pattern; | 1120 | int simple = 1; |
| 1123 | int raw_pattern_size; | 1121 | |
| 1124 | unsigned char *patbuf; | 1122 | /* MULTIBYTE says whether the text to be searched is multibyte. |
| 1125 | int multibyte = !NILP (current_buffer->enable_multibyte_characters); | 1123 | We must convert PATTERN to match that, or we will not really |
| 1124 | find things right. */ | ||
| 1125 | |||
| 1126 | if (multibyte == STRING_MULTIBYTE (string)) | ||
| 1127 | { | ||
| 1128 | raw_pattern = (char *) XSTRING (string)->data; | ||
| 1129 | raw_pattern_size = XSTRING (string)->size; | ||
| 1130 | raw_pattern_size_byte = XSTRING (string)->size_byte; | ||
| 1131 | } | ||
| 1132 | else if (multibyte) | ||
| 1133 | { | ||
| 1134 | raw_pattern_size = XSTRING (string)->size; | ||
| 1135 | raw_pattern_size_byte | ||
| 1136 | = count_size_as_multibyte (XSTRING (string)->data, | ||
| 1137 | raw_pattern_size); | ||
| 1138 | raw_pattern = (char *) alloca (raw_pattern_size_byte + 1); | ||
| 1139 | copy_text (XSTRING (string)->data, raw_pattern, | ||
| 1140 | XSTRING (string)->size, 0, 1); | ||
| 1141 | } | ||
| 1142 | else | ||
| 1143 | { | ||
| 1144 | /* Converting multibyte to single-byte. | ||
| 1145 | |||
| 1146 | ??? Perhaps this conversion should be done in a special way | ||
| 1147 | by subtracting nonascii-insert-offset from each non-ASCII char, | ||
| 1148 | so that only the multibyte chars which really correspond to | ||
| 1149 | the chosen single-byte character set can possibly match. */ | ||
| 1150 | raw_pattern_size = XSTRING (string)->size; | ||
| 1151 | raw_pattern_size_byte = XSTRING (string)->size; | ||
| 1152 | raw_pattern = (char *) alloca (raw_pattern_size + 1); | ||
| 1153 | copy_text (XSTRING (string)->data, raw_pattern, | ||
| 1154 | XSTRING (string)->size_byte, 1, 0); | ||
| 1155 | } | ||
| 1156 | |||
| 1157 | /* Copy and optionally translate the pattern. */ | ||
| 1158 | len = raw_pattern_size; | ||
| 1159 | len_byte = raw_pattern_size_byte; | ||
| 1160 | patbuf = (unsigned char *) alloca (len_byte); | ||
| 1161 | pat = patbuf; | ||
| 1162 | base_pat = raw_pattern; | ||
| 1163 | if (multibyte) | ||
| 1164 | { | ||
| 1165 | while (--len >= 0) | ||
| 1166 | { | ||
| 1167 | unsigned char workbuf[4], *str; | ||
| 1168 | int c, translated; | ||
| 1169 | int in_charlen, charlen; | ||
| 1170 | |||
| 1171 | /* If we got here and the RE flag is set, it's because we're | ||
| 1172 | dealing with a regexp known to be trivial, so the backslash | ||
| 1173 | just quotes the next character. */ | ||
| 1174 | if (RE && *base_pat == '\\') | ||
| 1175 | { | ||
| 1176 | len--; | ||
| 1177 | len_byte--; | ||
| 1178 | base_pat++; | ||
| 1179 | } | ||
| 1180 | |||
| 1181 | c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen); | ||
| 1182 | /* Translate the character, if requested. */ | ||
| 1183 | translated = TRANSLATE (trt, c); | ||
| 1184 | /* If translation changed the byte-length, go back | ||
| 1185 | to the original character. */ | ||
| 1186 | charlen = CHAR_STRING (translated, workbuf, str); | ||
| 1187 | if (in_charlen != charlen) | ||
| 1188 | { | ||
| 1189 | translated = c; | ||
| 1190 | charlen = CHAR_STRING (c, workbuf, str); | ||
| 1191 | } | ||
| 1192 | |||
| 1193 | /* Did this char actually get translated? | ||
| 1194 | Would any other char get translated into it? */ | ||
| 1195 | if (translated != c | ||
| 1196 | || TRANSLATE (inverse_trt, c) != c) | ||
| 1197 | { | ||
| 1198 | /* Keep track of which character set row | ||
| 1199 | contains the characters that need translation. */ | ||
| 1200 | int charset_base_code = c & ~0xff; | ||
| 1201 | if (charset_base == -1) | ||
| 1202 | charset_base = charset_base_code; | ||
| 1203 | else if (charset_base != charset_base_code) | ||
| 1204 | /* If two different rows appear, needing translation, | ||
| 1205 | then we cannot use boyer_moore search. */ | ||
| 1206 | simple = 0; | ||
| 1207 | /* ??? Handa: this must do simple = 0 | ||
| 1208 | if c is a composite character. */ | ||
| 1209 | } | ||
| 1210 | |||
| 1211 | /* Store this character into the translated pattern. */ | ||
| 1212 | bcopy (str, pat, charlen); | ||
| 1213 | pat += charlen; | ||
| 1214 | base_pat += in_charlen; | ||
| 1215 | len_byte -= in_charlen; | ||
| 1216 | } | ||
| 1217 | } | ||
| 1218 | else | ||
| 1219 | { | ||
| 1220 | while (--len >= 0) | ||
| 1221 | { | ||
| 1222 | int c, translated; | ||
| 1223 | |||
| 1224 | /* If we got here and the RE flag is set, it's because we're | ||
| 1225 | dealing with a regexp known to be trivial, so the backslash | ||
| 1226 | just quotes the next character. */ | ||
| 1227 | if (RE && *base_pat == '\\') | ||
| 1228 | { | ||
| 1229 | len--; | ||
| 1230 | base_pat++; | ||
| 1231 | } | ||
| 1232 | c = *base_pat++; | ||
| 1233 | translated = TRANSLATE (trt, c); | ||
| 1234 | |||
| 1235 | /* Did this char actually get translated? | ||
| 1236 | Would any other char get translated into it? */ | ||
| 1237 | if (translated != c | ||
| 1238 | || TRANSLATE (inverse_trt, c) != c) | ||
| 1239 | { | ||
| 1240 | /* Keep track of which character set row | ||
| 1241 | contains the characters that need translation. */ | ||
| 1242 | int charset_base_code = c & ~0xff; | ||
| 1243 | if (charset_base == -1) | ||
| 1244 | charset_base = charset_base_code; | ||
| 1245 | else if (charset_base != charset_base_code) | ||
| 1246 | /* If two different rows appear, needing translation, | ||
| 1247 | then we cannot use boyer_moore search. */ | ||
| 1248 | simple = 0; | ||
| 1249 | } | ||
| 1250 | *pat++ = translated; | ||
| 1251 | } | ||
| 1252 | } | ||
| 1253 | |||
| 1254 | len_byte = pat - patbuf; | ||
| 1255 | len = raw_pattern_size; | ||
| 1256 | pat = base_pat = patbuf; | ||
| 1257 | |||
| 1258 | if (simple) | ||
| 1259 | return boyer_moore (n, pat, len, len_byte, trt, inverse_trt, | ||
| 1260 | pos, pos_byte, lim, lim_byte); | ||
| 1261 | else | ||
| 1262 | return simple_search (n, pat, len, len_byte, trt, | ||
| 1263 | pos, pos_byte, lim, lim_byte); | ||
| 1264 | } | ||
| 1265 | } | ||
| 1266 | |||
| 1267 | /* Do a simple string search N times for the string PAT, | ||
| 1268 | whose length is LEN/LEN_BYTE, | ||
| 1269 | from buffer position POS/POS_BYTE until LIM/LIM_BYTE. | ||
| 1270 | TRT is the translation table. | ||
| 1126 | 1271 | ||
| 1127 | /* MULTIBYTE says whether the text to be searched is multibyte. | 1272 | Return the character position where the match is found. |
| 1128 | We must convert PATTERN to match that, or we will not really | 1273 | Otherwise, if M matches remained to be found, return -M. |
| 1129 | find things right. */ | ||
| 1130 | 1274 | ||
| 1131 | if (multibyte == STRING_MULTIBYTE (string)) | 1275 | This kind of search works regardless of what is in PAT and |
| 1276 | regardless of what is in TRT. It is used in cases where | ||
| 1277 | boyer_moore cannot work. */ | ||
| 1278 | |||
| 1279 | static int | ||
| 1280 | simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte) | ||
| 1281 | int n; | ||
| 1282 | unsigned char *pat; | ||
| 1283 | int len, len_byte; | ||
| 1284 | Lisp_Object trt; | ||
| 1285 | int pos, pos_byte; | ||
| 1286 | int lim, lim_byte; | ||
| 1287 | { | ||
| 1288 | int multibyte = ! NILP (current_buffer->enable_multibyte_characters); | ||
| 1289 | |||
| 1290 | if (lim > pos && multibyte) | ||
| 1291 | while (n > 0) | ||
| 1292 | { | ||
| 1293 | while (1) | ||
| 1132 | { | 1294 | { |
| 1133 | raw_pattern = (char *) XSTRING (string)->data; | 1295 | /* Try matching at position POS. */ |
| 1134 | raw_pattern_size = XSTRING (string)->size_byte; | 1296 | int this_pos = pos; |
| 1297 | int this_pos_byte = pos_byte; | ||
| 1298 | int this_len = len; | ||
| 1299 | int this_len_byte = len_byte; | ||
| 1300 | unsigned char *p = pat; | ||
| 1301 | if (pos + len > lim) | ||
| 1302 | goto stop; | ||
| 1303 | |||
| 1304 | while (this_len > 0) | ||
| 1305 | { | ||
| 1306 | int charlen, buf_charlen; | ||
| 1307 | int pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); | ||
| 1308 | int buf_ch; | ||
| 1309 | |||
| 1310 | this_len_byte -= charlen; | ||
| 1311 | this_len--; | ||
| 1312 | p += charlen; | ||
| 1313 | |||
| 1314 | buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte), | ||
| 1315 | ZV_BYTE - this_pos_byte, | ||
| 1316 | buf_charlen); | ||
| 1317 | this_pos_byte += buf_charlen; | ||
| 1318 | this_pos++; | ||
| 1319 | buf_ch = TRANSLATE (trt, buf_ch); | ||
| 1320 | |||
| 1321 | if (buf_ch != pat_ch) | ||
| 1322 | break; | ||
| 1323 | } | ||
| 1324 | |||
| 1325 | if (this_len == 0) | ||
| 1326 | { | ||
| 1327 | pos += len; | ||
| 1328 | pos_byte += len_byte; | ||
| 1329 | break; | ||
| 1330 | } | ||
| 1331 | |||
| 1332 | INC_BOTH (pos, pos_byte); | ||
| 1135 | } | 1333 | } |
| 1136 | else if (multibyte) | 1334 | |
| 1335 | n--; | ||
| 1336 | } | ||
| 1337 | else if (lim > pos) | ||
| 1338 | while (n > 0) | ||
| 1339 | { | ||
| 1340 | while (1) | ||
| 1137 | { | 1341 | { |
| 1138 | raw_pattern_size = count_size_as_multibyte (XSTRING (string)->data, | 1342 | /* Try matching at position POS. */ |
| 1139 | XSTRING (string)->size); | 1343 | int this_pos = pos; |
| 1140 | raw_pattern = (char *) alloca (raw_pattern_size + 1); | 1344 | int this_len = len; |
| 1141 | copy_text (XSTRING (string)->data, raw_pattern, | 1345 | unsigned char *p = pat; |
| 1142 | XSTRING (string)->size, 0, 1); | 1346 | |
| 1347 | if (pos + len > lim) | ||
| 1348 | goto stop; | ||
| 1349 | |||
| 1350 | while (this_len > 0) | ||
| 1351 | { | ||
| 1352 | int pat_ch = *p++; | ||
| 1353 | int buf_ch = FETCH_BYTE (this_pos); | ||
| 1354 | this_len--; | ||
| 1355 | this_pos++; | ||
| 1356 | buf_ch = TRANSLATE (trt, buf_ch); | ||
| 1357 | |||
| 1358 | if (buf_ch != pat_ch) | ||
| 1359 | break; | ||
| 1360 | } | ||
| 1361 | |||
| 1362 | if (this_len == 0) | ||
| 1363 | { | ||
| 1364 | pos += len; | ||
| 1365 | break; | ||
| 1366 | } | ||
| 1367 | |||
| 1368 | pos++; | ||
| 1143 | } | 1369 | } |
| 1144 | else | 1370 | |
| 1371 | n--; | ||
| 1372 | } | ||
| 1373 | /* Backwards search. */ | ||
| 1374 | else if (lim < pos && multibyte) | ||
| 1375 | while (n < 0) | ||
| 1376 | { | ||
| 1377 | while (1) | ||
| 1145 | { | 1378 | { |
| 1146 | /* Converting multibyte to single-byte. | 1379 | /* Try matching at position POS. */ |
| 1147 | 1380 | int this_pos = pos - len; | |
| 1148 | ??? Perhaps this conversion should be done in a special way | 1381 | int this_pos_byte = pos_byte - len_byte; |
| 1149 | by subtracting nonascii-insert-offset from each non-ASCII char, | 1382 | int this_len = len; |
| 1150 | so that only the multibyte chars which really correspond to | 1383 | int this_len_byte = len_byte; |
| 1151 | the chosen single-byte character set can possibly match. */ | 1384 | unsigned char *p = pat; |
| 1152 | raw_pattern_size = XSTRING (string)->size; | 1385 | |
| 1153 | raw_pattern = (char *) alloca (raw_pattern_size + 1); | 1386 | if (pos - len < lim) |
| 1154 | copy_text (XSTRING (string)->data, raw_pattern, | 1387 | goto stop; |
| 1155 | XSTRING (string)->size, 1, 0); | 1388 | |
| 1389 | while (this_len > 0) | ||
| 1390 | { | ||
| 1391 | int charlen, buf_charlen; | ||
| 1392 | int pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); | ||
| 1393 | int buf_ch; | ||
| 1394 | |||
| 1395 | this_len_byte -= charlen; | ||
| 1396 | this_len--; | ||
| 1397 | p += charlen; | ||
| 1398 | |||
| 1399 | buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte), | ||
| 1400 | ZV_BYTE - this_pos_byte, | ||
| 1401 | buf_charlen); | ||
| 1402 | this_pos_byte += buf_charlen; | ||
| 1403 | this_pos++; | ||
| 1404 | buf_ch = TRANSLATE (trt, buf_ch); | ||
| 1405 | |||
| 1406 | if (buf_ch != pat_ch) | ||
| 1407 | break; | ||
| 1408 | } | ||
| 1409 | |||
| 1410 | if (this_len == 0) | ||
| 1411 | { | ||
| 1412 | pos -= len; | ||
| 1413 | pos_byte -= len_byte; | ||
| 1414 | break; | ||
| 1415 | } | ||
| 1416 | |||
| 1417 | DEC_BOTH (pos, pos_byte); | ||
| 1156 | } | 1418 | } |
| 1157 | 1419 | ||
| 1158 | len_byte = raw_pattern_size; | 1420 | n++; |
| 1159 | patbuf = (unsigned char *) alloca (len_byte); | 1421 | } |
| 1160 | pat = patbuf; | 1422 | else if (lim < pos) |
| 1161 | base_pat = raw_pattern; | 1423 | while (n < 0) |
| 1162 | while (--len_byte >= 0) | 1424 | { |
| 1425 | while (1) | ||
| 1163 | { | 1426 | { |
| 1164 | /* If we got here and the RE flag is set, it's because we're | 1427 | /* Try matching at position POS. */ |
| 1165 | dealing with a regexp known to be trivial, so the backslash | 1428 | int this_pos = pos - len; |
| 1166 | just quotes the next character. */ | 1429 | int this_len = len; |
| 1167 | if (RE && *base_pat == '\\') | 1430 | unsigned char *p = pat; |
| 1431 | |||
| 1432 | if (pos - len < lim) | ||
| 1433 | goto stop; | ||
| 1434 | |||
| 1435 | while (this_len > 0) | ||
| 1168 | { | 1436 | { |
| 1169 | len_byte--; | 1437 | int pat_ch = *p++; |
| 1170 | base_pat++; | 1438 | int buf_ch = FETCH_BYTE (this_pos); |
| 1439 | this_len--; | ||
| 1440 | this_pos++; | ||
| 1441 | buf_ch = TRANSLATE (trt, buf_ch); | ||
| 1442 | |||
| 1443 | if (buf_ch != pat_ch) | ||
| 1444 | break; | ||
| 1171 | } | 1445 | } |
| 1172 | *pat++ = (trt ? XINT (trt[*base_pat++]) : *base_pat++); | 1446 | |
| 1447 | if (this_len == 0) | ||
| 1448 | { | ||
| 1449 | pos -= len; | ||
| 1450 | break; | ||
| 1451 | } | ||
| 1452 | |||
| 1453 | pos--; | ||
| 1173 | } | 1454 | } |
| 1174 | len_byte = pat - patbuf; | 1455 | |
| 1175 | pat = base_pat = patbuf; | 1456 | n++; |
| 1176 | } | 1457 | } |
| 1177 | /* The general approach is that we are going to maintain that we know */ | 1458 | |
| 1178 | /* the first (closest to the present position, in whatever direction */ | 1459 | stop: |
| 1179 | /* we're searching) character that could possibly be the last */ | 1460 | if (n == 0) |
| 1180 | /* (furthest from present position) character of a valid match. We */ | 1461 | return pos; |
| 1181 | /* advance the state of our knowledge by looking at that character */ | 1462 | else if (n > 0) |
| 1182 | /* and seeing whether it indeed matches the last character of the */ | 1463 | return -n; |
| 1183 | /* pattern. If it does, we take a closer look. If it does not, we */ | 1464 | else |
| 1184 | /* move our pointer (to putative last characters) as far as is */ | 1465 | return n; |
| 1185 | /* logically possible. This amount of movement, which I call a */ | 1466 | } |
| 1186 | /* stride, will be the length of the pattern if the actual character */ | 1467 | |
| 1187 | /* appears nowhere in the pattern, otherwise it will be the distance */ | 1468 | /* Do Boyer-Moore search N times for the string PAT, |
| 1188 | /* from the last occurrence of that character to the end of the */ | 1469 | whose length is LEN/LEN_BYTE, |
| 1189 | /* pattern. */ | 1470 | from buffer position POS/POS_BYTE until LIM/LIM_BYTE. |
| 1190 | /* As a coding trick, an enormous stride is coded into the table for */ | 1471 | DIRECTION says which direction we search in. |
| 1191 | /* characters that match the last character. This allows use of only */ | 1472 | TRT and INVERSE_TRT are translation tables. |
| 1192 | /* a single test, a test for having gone past the end of the */ | 1473 | |
| 1193 | /* permissible match region, to test for both possible matches (when */ | 1474 | This kind of search works if all the characters in PAT that have |
| 1194 | /* the stride goes past the end immediately) and failure to */ | 1475 | nontrivial translation are the same aside from the last byte. This |
| 1195 | /* match (where you get nudged past the end one stride at a time). */ | 1476 | makes it possible to translate just the last byte of a character, |
| 1196 | 1477 | and do so after just a simple test of the context. | |
| 1197 | /* Here we make a "mickey mouse" BM table. The stride of the search */ | 1478 | |
| 1198 | /* is determined only by the last character of the putative match. */ | 1479 | If that criterion is not satisfied, do not call this function. */ |
| 1199 | /* If that character does not match, we will stride the proper */ | 1480 | |
| 1200 | /* distance to propose a match that superimposes it on the last */ | 1481 | static int |
| 1201 | /* instance of a character that matches it (per trt), or misses */ | 1482 | boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, |
| 1202 | /* it entirely if there is none. */ | 1483 | pos, pos_byte, lim, lim_byte) |
| 1203 | 1484 | int n; | |
| 1204 | dirlen = len_byte * direction; | 1485 | unsigned char *base_pat; |
| 1205 | infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction; | 1486 | int len, len_byte; |
| 1206 | if (direction < 0) | 1487 | Lisp_Object trt; |
| 1207 | pat = (base_pat += len_byte - 1); | 1488 | Lisp_Object inverse_trt; |
| 1208 | BM_tab_base = BM_tab; | 1489 | int pos, pos_byte; |
| 1209 | BM_tab += 0400; | 1490 | int lim, lim_byte; |
| 1210 | j = dirlen; /* to get it in a register */ | 1491 | { |
| 1211 | /* A character that does not appear in the pattern induces a */ | 1492 | int direction = ((n > 0) ? 1 : -1); |
| 1212 | /* stride equal to the pattern length. */ | 1493 | register int dirlen; |
| 1213 | while (BM_tab_base != BM_tab) | 1494 | int infinity, limit, k, stride_for_teases; |
| 1214 | { | 1495 | register int *BM_tab; |
| 1215 | *--BM_tab = j; | 1496 | int *BM_tab_base; |
| 1216 | *--BM_tab = j; | 1497 | register unsigned char *cursor, *p_limit; |
| 1217 | *--BM_tab = j; | 1498 | register int i, j; |
| 1218 | *--BM_tab = j; | 1499 | unsigned char *pat; |
| 1219 | } | 1500 | int multibyte = ! NILP (current_buffer->enable_multibyte_characters); |
| 1220 | i = 0; | 1501 | |
| 1221 | while (i != infinity) | 1502 | unsigned char simple_translate[0400]; |
| 1503 | int translate_prev_byte; | ||
| 1504 | int translate_anteprev_byte; | ||
| 1505 | |||
| 1506 | #ifdef C_ALLOCA | ||
| 1507 | int BM_tab_space[0400]; | ||
| 1508 | BM_tab = &BM_tab_space[0]; | ||
| 1509 | #else | ||
| 1510 | BM_tab = (int *) alloca (0400 * sizeof (int)); | ||
| 1511 | #endif | ||
| 1512 | /* The general approach is that we are going to maintain that we know */ | ||
| 1513 | /* the first (closest to the present position, in whatever direction */ | ||
| 1514 | /* we're searching) character that could possibly be the last */ | ||
| 1515 | /* (furthest from present position) character of a valid match. We */ | ||
| 1516 | /* advance the state of our knowledge by looking at that character */ | ||
| 1517 | /* and seeing whether it indeed matches the last character of the */ | ||
| 1518 | /* pattern. If it does, we take a closer look. If it does not, we */ | ||
| 1519 | /* move our pointer (to putative last characters) as far as is */ | ||
| 1520 | /* logically possible. This amount of movement, which I call a */ | ||
| 1521 | /* stride, will be the length of the pattern if the actual character */ | ||
| 1522 | /* appears nowhere in the pattern, otherwise it will be the distance */ | ||
| 1523 | /* from the last occurrence of that character to the end of the */ | ||
| 1524 | /* pattern. */ | ||
| 1525 | /* As a coding trick, an enormous stride is coded into the table for */ | ||
| 1526 | /* characters that match the last character. This allows use of only */ | ||
| 1527 | /* a single test, a test for having gone past the end of the */ | ||
| 1528 | /* permissible match region, to test for both possible matches (when */ | ||
| 1529 | /* the stride goes past the end immediately) and failure to */ | ||
| 1530 | /* match (where you get nudged past the end one stride at a time). */ | ||
| 1531 | |||
| 1532 | /* Here we make a "mickey mouse" BM table. The stride of the search */ | ||
| 1533 | /* is determined only by the last character of the putative match. */ | ||
| 1534 | /* If that character does not match, we will stride the proper */ | ||
| 1535 | /* distance to propose a match that superimposes it on the last */ | ||
| 1536 | /* instance of a character that matches it (per trt), or misses */ | ||
| 1537 | /* it entirely if there is none. */ | ||
| 1538 | |||
| 1539 | dirlen = len_byte * direction; | ||
| 1540 | infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction; | ||
| 1541 | if (direction < 0) | ||
| 1542 | pat = (base_pat += len_byte - 1); | ||
| 1543 | else | ||
| 1544 | pat = base_pat; | ||
| 1545 | BM_tab_base = BM_tab; | ||
| 1546 | BM_tab += 0400; | ||
| 1547 | j = dirlen; /* to get it in a register */ | ||
| 1548 | /* A character that does not appear in the pattern induces a */ | ||
| 1549 | /* stride equal to the pattern length. */ | ||
| 1550 | while (BM_tab_base != BM_tab) | ||
| 1551 | { | ||
| 1552 | *--BM_tab = j; | ||
| 1553 | *--BM_tab = j; | ||
| 1554 | *--BM_tab = j; | ||
| 1555 | *--BM_tab = j; | ||
| 1556 | } | ||
| 1557 | |||
| 1558 | /* We use this for translation, instead of TRT itself. | ||
| 1559 | We fill this in to handle the characters that actually | ||
| 1560 | occur in the pattern. Others don't matter anyway! */ | ||
| 1561 | bzero (simple_translate, sizeof simple_translate); | ||
| 1562 | for (i = 0; i < 0400; i++) | ||
| 1563 | simple_translate[i] = i; | ||
| 1564 | |||
| 1565 | i = 0; | ||
| 1566 | while (i != infinity) | ||
| 1567 | { | ||
| 1568 | unsigned char *ptr = pat + i; | ||
| 1569 | i += direction; | ||
| 1570 | if (i == dirlen) | ||
| 1571 | i = infinity; | ||
| 1572 | if (! NILP (trt)) | ||
| 1222 | { | 1573 | { |
| 1223 | j = pat[i]; i += direction; | 1574 | int ch; |
| 1224 | if (i == dirlen) i = infinity; | 1575 | int this_translated = 1; |
| 1225 | if (trt != 0) | 1576 | |
| 1577 | if (multibyte | ||
| 1578 | && (ptr + 1 == pat + len_byte || CHAR_HEAD_P (ptr[1]))) | ||
| 1226 | { | 1579 | { |
| 1227 | k = (j = XINT (trt[j])); | 1580 | unsigned char *charstart = ptr; |
| 1228 | if (i == infinity) | 1581 | while (! CHAR_HEAD_P (*charstart)) |
| 1229 | stride_for_teases = BM_tab[j]; | 1582 | charstart--; |
| 1230 | BM_tab[j] = dirlen - i; | 1583 | if (! CHAR_HEAD_P (*ptr)) |
| 1231 | /* A translation table is accompanied by its inverse -- see */ | 1584 | { |
| 1232 | /* comment following downcase_table for details */ | 1585 | translate_prev_byte = ptr[-1]; |
| 1233 | while ((j = (unsigned char) XINT (inverse_trt[j])) != k) | 1586 | if (! CHAR_HEAD_P (translate_prev_byte)) |
| 1234 | BM_tab[j] = dirlen - i; | 1587 | translate_anteprev_byte = ptr[-2]; |
| 1588 | } | ||
| 1589 | ch = STRING_CHAR (charstart, ptr - charstart + 1); | ||
| 1590 | ch = TRANSLATE (trt, ch); | ||
| 1235 | } | 1591 | } |
| 1592 | else if (!multibyte) | ||
| 1593 | ch = TRANSLATE (trt, *ptr); | ||
| 1236 | else | 1594 | else |
| 1237 | { | 1595 | { |
| 1238 | if (i == infinity) | 1596 | ch = *ptr; |
| 1239 | stride_for_teases = BM_tab[j]; | 1597 | this_translated = 0; |
| 1240 | BM_tab[j] = dirlen - i; | ||
| 1241 | } | 1598 | } |
| 1242 | /* stride_for_teases tells how much to stride if we get a */ | 1599 | |
| 1243 | /* match on the far character but are subsequently */ | 1600 | k = j = (unsigned char) ch; |
| 1244 | /* disappointed, by recording what the stride would have been */ | 1601 | if (i == infinity) |
| 1245 | /* for that character if the last character had been */ | 1602 | stride_for_teases = BM_tab[j]; |
| 1246 | /* different. */ | 1603 | BM_tab[j] = dirlen - i; |
| 1604 | /* A translation table is accompanied by its inverse -- see */ | ||
| 1605 | /* comment following downcase_table for details */ | ||
| 1606 | if (this_translated) | ||
| 1607 | while (1) | ||
| 1608 | { | ||
| 1609 | ch = TRANSLATE (inverse_trt, ch); | ||
| 1610 | /* For all the characters that map into K, | ||
| 1611 | set up simple_translate to map them into K. */ | ||
| 1612 | simple_translate[(unsigned char) ch] = k; | ||
| 1613 | if ((unsigned char) ch == k) | ||
| 1614 | break; | ||
| 1615 | BM_tab[(unsigned char) ch] = dirlen - i; | ||
| 1616 | } | ||
| 1617 | } | ||
| 1618 | else | ||
| 1619 | { | ||
| 1620 | j = *ptr; | ||
| 1621 | |||
| 1622 | if (i == infinity) | ||
| 1623 | stride_for_teases = BM_tab[j]; | ||
| 1624 | BM_tab[j] = dirlen - i; | ||
| 1247 | } | 1625 | } |
| 1248 | infinity = dirlen - infinity; | 1626 | /* stride_for_teases tells how much to stride if we get a */ |
| 1249 | pos_byte += dirlen - ((direction > 0) ? direction : 0); | 1627 | /* match on the far character but are subsequently */ |
| 1250 | /* loop invariant - POS_BYTE points at where last char (first | 1628 | /* disappointed, by recording what the stride would have been */ |
| 1251 | char if reverse) of pattern would align in a possible match. */ | 1629 | /* for that character if the last character had been */ |
| 1252 | while (n != 0) | 1630 | /* different. */ |
| 1631 | } | ||
| 1632 | infinity = dirlen - infinity; | ||
| 1633 | pos_byte += dirlen - ((direction > 0) ? direction : 0); | ||
| 1634 | /* loop invariant - POS_BYTE points at where last char (first | ||
| 1635 | char if reverse) of pattern would align in a possible match. */ | ||
| 1636 | while (n != 0) | ||
| 1637 | { | ||
| 1638 | int tail_end; | ||
| 1639 | unsigned char *tail_end_ptr; | ||
| 1640 | |||
| 1641 | /* It's been reported that some (broken) compiler thinks that | ||
| 1642 | Boolean expressions in an arithmetic context are unsigned. | ||
| 1643 | Using an explicit ?1:0 prevents this. */ | ||
| 1644 | if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction | ||
| 1645 | < 0) | ||
| 1646 | return (n * (0 - direction)); | ||
| 1647 | /* First we do the part we can by pointers (maybe nothing) */ | ||
| 1648 | QUIT; | ||
| 1649 | pat = base_pat; | ||
| 1650 | limit = pos_byte - dirlen + direction; | ||
| 1651 | limit = ((direction > 0) | ||
| 1652 | ? BUFFER_CEILING_OF (limit) | ||
| 1653 | : BUFFER_FLOOR_OF (limit)); | ||
| 1654 | /* LIMIT is now the last (not beyond-last!) value POS_BYTE | ||
| 1655 | can take on without hitting edge of buffer or the gap. */ | ||
| 1656 | limit = ((direction > 0) | ||
| 1657 | ? min (lim_byte - 1, min (limit, pos_byte + 20000)) | ||
| 1658 | : max (lim_byte, max (limit, pos_byte - 20000))); | ||
| 1659 | tail_end = BUFFER_CEILING_OF (pos_byte) + 1; | ||
| 1660 | tail_end_ptr = BYTE_POS_ADDR (tail_end); | ||
| 1661 | |||
| 1662 | if ((limit - pos_byte) * direction > 20) | ||
| 1253 | { | 1663 | { |
| 1254 | /* It's been reported that some (broken) compiler thinks that | 1664 | unsigned char *p2; |
| 1255 | Boolean expressions in an arithmetic context are unsigned. | 1665 | |
| 1256 | Using an explicit ?1:0 prevents this. */ | 1666 | p_limit = BYTE_POS_ADDR (limit); |
| 1257 | if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction | 1667 | p2 = (cursor = BYTE_POS_ADDR (pos_byte)); |
| 1258 | < 0) | 1668 | /* In this loop, pos + cursor - p2 is the surrogate for pos */ |
| 1259 | return (n * (0 - direction)); | 1669 | while (1) /* use one cursor setting as long as i can */ |
| 1260 | /* First we do the part we can by pointers (maybe nothing) */ | ||
| 1261 | QUIT; | ||
| 1262 | pat = base_pat; | ||
| 1263 | limit = pos_byte - dirlen + direction; | ||
| 1264 | limit = ((direction > 0) | ||
| 1265 | ? BUFFER_CEILING_OF (limit) | ||
| 1266 | : BUFFER_FLOOR_OF (limit)); | ||
| 1267 | /* LIMIT is now the last (not beyond-last!) value POS_BYTE | ||
| 1268 | can take on without hitting edge of buffer or the gap. */ | ||
| 1269 | limit = ((direction > 0) | ||
| 1270 | ? min (lim_byte - 1, min (limit, pos_byte + 20000)) | ||
| 1271 | : max (lim_byte, max (limit, pos_byte - 20000))); | ||
| 1272 | if ((limit - pos_byte) * direction > 20) | ||
| 1273 | { | 1670 | { |
| 1274 | p_limit = BYTE_POS_ADDR (limit); | 1671 | if (direction > 0) /* worth duplicating */ |
| 1275 | p2 = (cursor = BYTE_POS_ADDR (pos_byte)); | ||
| 1276 | /* In this loop, pos + cursor - p2 is the surrogate for pos */ | ||
| 1277 | while (1) /* use one cursor setting as long as i can */ | ||
| 1278 | { | 1672 | { |
| 1279 | if (direction > 0) /* worth duplicating */ | 1673 | /* Use signed comparison if appropriate |
| 1280 | { | 1674 | to make cursor+infinity sure to be > p_limit. |
| 1281 | /* Use signed comparison if appropriate | 1675 | Assuming that the buffer lies in a range of addresses |
| 1282 | to make cursor+infinity sure to be > p_limit. | 1676 | that are all "positive" (as ints) or all "negative", |
| 1283 | Assuming that the buffer lies in a range of addresses | 1677 | either kind of comparison will work as long |
| 1284 | that are all "positive" (as ints) or all "negative", | 1678 | as we don't step by infinity. So pick the kind |
| 1285 | either kind of comparison will work as long | 1679 | that works when we do step by infinity. */ |
| 1286 | as we don't step by infinity. So pick the kind | 1680 | if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit) |
| 1287 | that works when we do step by infinity. */ | 1681 | while ((EMACS_INT) cursor <= (EMACS_INT) p_limit) |
| 1288 | if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit) | 1682 | cursor += BM_tab[*cursor]; |
| 1289 | while ((EMACS_INT) cursor <= (EMACS_INT) p_limit) | ||
| 1290 | cursor += BM_tab[*cursor]; | ||
| 1291 | else | ||
| 1292 | while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit) | ||
| 1293 | cursor += BM_tab[*cursor]; | ||
| 1294 | } | ||
| 1295 | else | 1683 | else |
| 1296 | { | 1684 | while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit) |
| 1297 | if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit) | 1685 | cursor += BM_tab[*cursor]; |
| 1298 | while ((EMACS_INT) cursor >= (EMACS_INT) p_limit) | 1686 | } |
| 1299 | cursor += BM_tab[*cursor]; | 1687 | else |
| 1300 | else | 1688 | { |
| 1301 | while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit) | 1689 | if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit) |
| 1302 | cursor += BM_tab[*cursor]; | 1690 | while ((EMACS_INT) cursor >= (EMACS_INT) p_limit) |
| 1303 | } | 1691 | cursor += BM_tab[*cursor]; |
| 1692 | else | ||
| 1693 | while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit) | ||
| 1694 | cursor += BM_tab[*cursor]; | ||
| 1695 | } | ||
| 1304 | /* If you are here, cursor is beyond the end of the searched region. */ | 1696 | /* If you are here, cursor is beyond the end of the searched region. */ |
| 1305 | /* This can happen if you match on the far character of the pattern, */ | 1697 | /* This can happen if you match on the far character of the pattern, */ |
| 1306 | /* because the "stride" of that character is infinity, a number able */ | 1698 | /* because the "stride" of that character is infinity, a number able */ |
| 1307 | /* to throw you well beyond the end of the search. It can also */ | 1699 | /* to throw you well beyond the end of the search. It can also */ |
| 1308 | /* happen if you fail to match within the permitted region and would */ | 1700 | /* happen if you fail to match within the permitted region and would */ |
| 1309 | /* otherwise try a character beyond that region */ | 1701 | /* otherwise try a character beyond that region */ |
| 1310 | if ((cursor - p_limit) * direction <= len_byte) | 1702 | if ((cursor - p_limit) * direction <= len_byte) |
| 1311 | break; /* a small overrun is genuine */ | 1703 | break; /* a small overrun is genuine */ |
| 1312 | cursor -= infinity; /* large overrun = hit */ | 1704 | cursor -= infinity; /* large overrun = hit */ |
| 1313 | i = dirlen - direction; | 1705 | i = dirlen - direction; |
| 1314 | if (trt != 0) | 1706 | if (! NILP (trt)) |
| 1707 | { | ||
| 1708 | while ((i -= direction) + direction != 0) | ||
| 1315 | { | 1709 | { |
| 1316 | while ((i -= direction) + direction != 0) | 1710 | int ch; |
| 1317 | if (pat[i] != XINT (trt[*(cursor -= direction)])) | 1711 | cursor -= direction; |
| 1318 | break; | 1712 | /* Translate only the last byte of a character. */ |
| 1713 | if (! multibyte | ||
| 1714 | || ((cursor == tail_end_ptr | ||
| 1715 | || CHAR_HEAD_P (cursor[1])) | ||
| 1716 | && (CHAR_HEAD_P (cursor[0]) | ||
| 1717 | || (translate_prev_byte == cursor[-1] | ||
| 1718 | && (CHAR_HEAD_P (translate_prev_byte) | ||
| 1719 | || translate_anteprev_byte == cursor[-2]))))) | ||
| 1720 | ch = simple_translate[*cursor]; | ||
| 1721 | else | ||
| 1722 | ch = *cursor; | ||
| 1723 | if (pat[i] != ch) | ||
| 1724 | break; | ||
| 1319 | } | 1725 | } |
| 1320 | else | 1726 | } |
| 1727 | else | ||
| 1728 | { | ||
| 1729 | while ((i -= direction) + direction != 0) | ||
| 1321 | { | 1730 | { |
| 1322 | while ((i -= direction) + direction != 0) | 1731 | cursor -= direction; |
| 1323 | if (pat[i] != *(cursor -= direction)) | 1732 | if (pat[i] != *cursor) |
| 1324 | break; | 1733 | break; |
| 1325 | } | 1734 | } |
| 1326 | cursor += dirlen - i - direction; /* fix cursor */ | 1735 | } |
| 1327 | if (i + direction == 0) | 1736 | cursor += dirlen - i - direction; /* fix cursor */ |
| 1328 | { | 1737 | if (i + direction == 0) |
| 1329 | int position; | 1738 | { |
| 1739 | int position; | ||
| 1330 | 1740 | ||
| 1331 | cursor -= direction; | 1741 | cursor -= direction; |
| 1332 | 1742 | ||
| 1333 | position = pos_byte + cursor - p2 + ((direction > 0) | 1743 | position = pos_byte + cursor - p2 + ((direction > 0) |
| 1334 | ? 1 - len_byte : 0); | 1744 | ? 1 - len_byte : 0); |
| 1335 | set_search_regs (position, len_byte); | 1745 | set_search_regs (position, len_byte); |
| 1336 | 1746 | ||
| 1337 | if ((n -= direction) != 0) | 1747 | if ((n -= direction) != 0) |
| 1338 | cursor += dirlen; /* to resume search */ | 1748 | cursor += dirlen; /* to resume search */ |
| 1339 | else | ||
| 1340 | return ((direction > 0) | ||
| 1341 | ? search_regs.end[0] : search_regs.start[0]); | ||
| 1342 | } | ||
| 1343 | else | 1749 | else |
| 1344 | cursor += stride_for_teases; /* <sigh> we lose - */ | 1750 | return ((direction > 0) |
| 1751 | ? search_regs.end[0] : search_regs.start[0]); | ||
| 1345 | } | 1752 | } |
| 1346 | pos_byte += cursor - p2; | 1753 | else |
| 1754 | cursor += stride_for_teases; /* <sigh> we lose - */ | ||
| 1347 | } | 1755 | } |
| 1348 | else | 1756 | pos_byte += cursor - p2; |
| 1349 | /* Now we'll pick up a clump that has to be done the hard */ | 1757 | } |
| 1350 | /* way because it covers a discontinuity */ | 1758 | else |
| 1759 | /* Now we'll pick up a clump that has to be done the hard */ | ||
| 1760 | /* way because it covers a discontinuity */ | ||
| 1761 | { | ||
| 1762 | limit = ((direction > 0) | ||
| 1763 | ? BUFFER_CEILING_OF (pos_byte - dirlen + 1) | ||
| 1764 | : BUFFER_FLOOR_OF (pos_byte - dirlen - 1)); | ||
| 1765 | limit = ((direction > 0) | ||
| 1766 | ? min (limit + len_byte, lim_byte - 1) | ||
| 1767 | : max (limit - len_byte, lim_byte)); | ||
| 1768 | /* LIMIT is now the last value POS_BYTE can have | ||
| 1769 | and still be valid for a possible match. */ | ||
| 1770 | while (1) | ||
| 1351 | { | 1771 | { |
| 1352 | limit = ((direction > 0) | 1772 | /* This loop can be coded for space rather than */ |
| 1353 | ? BUFFER_CEILING_OF (pos_byte - dirlen + 1) | 1773 | /* speed because it will usually run only once. */ |
| 1354 | : BUFFER_FLOOR_OF (pos_byte - dirlen - 1)); | 1774 | /* (the reach is at most len + 21, and typically */ |
| 1355 | limit = ((direction > 0) | 1775 | /* does not exceed len) */ |
| 1356 | ? min (limit + len_byte, lim_byte - 1) | 1776 | while ((limit - pos_byte) * direction >= 0) |
| 1357 | : max (limit - len_byte, lim_byte)); | 1777 | pos_byte += BM_tab[FETCH_BYTE (pos_byte)]; |
| 1358 | /* LIMIT is now the last value POS_BYTE can have | 1778 | /* now run the same tests to distinguish going off the */ |
| 1359 | and still be valid for a possible match. */ | 1779 | /* end, a match or a phony match. */ |
| 1360 | while (1) | 1780 | if ((pos_byte - limit) * direction <= len_byte) |
| 1781 | break; /* ran off the end */ | ||
| 1782 | /* Found what might be a match. | ||
| 1783 | Set POS_BYTE back to last (first if reverse) pos. */ | ||
| 1784 | pos_byte -= infinity; | ||
| 1785 | i = dirlen - direction; | ||
| 1786 | while ((i -= direction) + direction != 0) | ||
| 1361 | { | 1787 | { |
| 1362 | /* This loop can be coded for space rather than */ | 1788 | int ch; |
| 1363 | /* speed because it will usually run only once. */ | 1789 | unsigned char *ptr; |
| 1364 | /* (the reach is at most len + 21, and typically */ | 1790 | pos_byte -= direction; |
| 1365 | /* does not exceed len) */ | 1791 | ptr = BYTE_POS_ADDR (pos_byte); |
| 1366 | while ((limit - pos_byte) * direction >= 0) | 1792 | /* Translate only the last byte of a character. */ |
| 1367 | pos_byte += BM_tab[FETCH_BYTE (pos_byte)]; | 1793 | if (! multibyte |
| 1368 | /* now run the same tests to distinguish going off the */ | 1794 | || ((ptr == tail_end_ptr |
| 1369 | /* end, a match or a phony match. */ | 1795 | || CHAR_HEAD_P (ptr[1])) |
| 1370 | if ((pos_byte - limit) * direction <= len_byte) | 1796 | && (CHAR_HEAD_P (ptr[0]) |
| 1371 | break; /* ran off the end */ | 1797 | || (translate_prev_byte == ptr[-1] |
| 1372 | /* Found what might be a match. | 1798 | && (CHAR_HEAD_P (translate_prev_byte) |
| 1373 | Set POS_BYTE back to last (first if reverse) pos. */ | 1799 | || translate_anteprev_byte == ptr[-2]))))) |
| 1374 | pos_byte -= infinity; | 1800 | ch = simple_translate[*ptr]; |
| 1375 | i = dirlen - direction; | 1801 | else |
| 1376 | while ((i -= direction) + direction != 0) | 1802 | ch = *ptr; |
| 1377 | { | 1803 | if (pat[i] != ch) |
| 1378 | pos_byte -= direction; | 1804 | break; |
| 1379 | if (pat[i] != (trt != 0 | 1805 | } |
| 1380 | ? XINT (trt[FETCH_BYTE (pos_byte)]) | 1806 | /* Above loop has moved POS_BYTE part or all the way |
| 1381 | : FETCH_BYTE (pos_byte))) | 1807 | back to the first pos (last pos if reverse). |
| 1382 | break; | 1808 | Set it once again at the last (first if reverse) char. */ |
| 1383 | } | 1809 | pos_byte += dirlen - i- direction; |
| 1384 | /* Above loop has moved POS_BYTE part or all the way | 1810 | if (i + direction == 0) |
| 1385 | back to the first pos (last pos if reverse). | 1811 | { |
| 1386 | Set it once again at the last (first if reverse) char. */ | 1812 | int position; |
| 1387 | pos_byte += dirlen - i- direction; | 1813 | pos_byte -= direction; |
| 1388 | if (i + direction == 0) | ||
| 1389 | { | ||
| 1390 | int position; | ||
| 1391 | pos_byte -= direction; | ||
| 1392 | 1814 | ||
| 1393 | position = pos_byte + ((direction > 0) ? 1 - len_byte : 0); | 1815 | position = pos_byte + ((direction > 0) ? 1 - len_byte : 0); |
| 1394 | 1816 | ||
| 1395 | set_search_regs (position, len_byte); | 1817 | set_search_regs (position, len_byte); |
| 1396 | 1818 | ||
| 1397 | if ((n -= direction) != 0) | 1819 | if ((n -= direction) != 0) |
| 1398 | pos_byte += dirlen; /* to resume search */ | 1820 | pos_byte += dirlen; /* to resume search */ |
| 1399 | else | ||
| 1400 | return ((direction > 0) | ||
| 1401 | ? search_regs.end[0] : search_regs.start[0]); | ||
| 1402 | } | ||
| 1403 | else | 1821 | else |
| 1404 | pos_byte += stride_for_teases; | 1822 | return ((direction > 0) |
| 1823 | ? search_regs.end[0] : search_regs.start[0]); | ||
| 1405 | } | 1824 | } |
| 1406 | } | 1825 | else |
| 1407 | /* We have done one clump. Can we continue? */ | 1826 | pos_byte += stride_for_teases; |
| 1408 | if ((lim_byte - pos_byte) * direction < 0) | 1827 | } |
| 1409 | return ((0 - n) * direction); | 1828 | } |
| 1410 | } | 1829 | /* We have done one clump. Can we continue? */ |
| 1411 | return BYTE_TO_CHAR (pos_byte); | 1830 | if ((lim_byte - pos_byte) * direction < 0) |
| 1831 | return ((0 - n) * direction); | ||
| 1412 | } | 1832 | } |
| 1833 | return BYTE_TO_CHAR (pos_byte); | ||
| 1413 | } | 1834 | } |
| 1414 | 1835 | ||
| 1415 | /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES | 1836 | /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES |