aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDaniel Colascione2018-06-16 13:46:10 -0700
committerDaniel Colascione2018-06-16 13:46:38 -0700
commit938d252d1c6c5e2027aa250c649deb024154f936 (patch)
treec0ee02c8cb40d0b7fc9bb0fa55ac8569491197c9 /src
parent1502b377d35d6db623301829549ebcab9a2777e6 (diff)
downloademacs-938d252d1c6c5e2027aa250c649deb024154f936.tar.gz
emacs-938d252d1c6c5e2027aa250c649deb024154f936.zip
Make regex matching reentrant; update syntax during match
* src/lisp.h (compile_pattern): Remove prototype of now-internal function. * src/regex.c (POS_AS_IN_BUFFER): Consult gl_state instead of re_match_object: the latter can change in Lisp. (re_match_2_internal): Switch back to UPDATE_SYNTAX_* FROM UPDATE_SYNTAX_FAST*, allowing calls into Lisp. * src/regex.h (re_match_object): Uncomment declaration. * src/search.c (struct regexp_cache): Add `busy' field. (thaw_buffer_relocation): Delete; rely on unbind. (compile_pattern_1): Assert pattern isn't busy. (shrink_regexp_cache): Don't shrink busy patterns. (clear_regexp_cache): Don't nuke busy patterns. (unfreeze_pattern, freeze_pattern): New functions. (compile_pattern): Return a regexp_cache pointer instead of the re_pattern_buffer, allowing callers to use `freeze_pattern' if needed. Do not consider busy patterns as cache hit candidates; error if we run out of non-busy cache entries. (looking_at_1, fast_looking_at): Snapshot Vinhibit_changing_match_data; mark pattern busy while we're matching it; unbind. (string_match_1, fast_string_match_internal) (fast_c_string_match_ignore_case): Adjust for compile_pattern return type. (search_buffer_re): Regex code from old search_buffer moved here; snapshot Vinhibit_changing_match_data; mark pattern busy while we're matching it; unbind. (search_buffer_non_re): Non-regex code from old search_buffer moved here. (search_buffer): Split into search_buffer_re, search_buffer_non_re. (syms_of_search): Staticpro re_match_object, even though we really shouldn't have to. * src/syntax.h (UPDATE_SYNTAX_TABLE_FORWARD_FAST): (UPDATE_SYNTAX_TABLE_FAST): Remove. * src/thread.h (struct thread_state): Remove m_re_match_object, which is global again. (It never needs to be preserved across thread switch.)
Diffstat (limited to 'src')
-rw-r--r--src/lisp.h4
-rw-r--r--src/regex.c30
-rw-r--r--src/regex.h11
-rw-r--r--src/search.c795
-rw-r--r--src/syntax.h14
-rw-r--r--src/thread.h9
6 files changed, 446 insertions, 417 deletions
diff --git a/src/lisp.h b/src/lisp.h
index ff708ebf60e..d0c52d85672 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -4029,10 +4029,6 @@ extern void restore_search_regs (void);
4029extern void update_search_regs (ptrdiff_t oldstart, 4029extern void update_search_regs (ptrdiff_t oldstart,
4030 ptrdiff_t oldend, ptrdiff_t newend); 4030 ptrdiff_t oldend, ptrdiff_t newend);
4031extern void record_unwind_save_match_data (void); 4031extern void record_unwind_save_match_data (void);
4032struct re_registers;
4033extern struct re_pattern_buffer *compile_pattern (Lisp_Object,
4034 struct re_registers *,
4035 Lisp_Object, bool, bool);
4036extern ptrdiff_t fast_string_match_internal (Lisp_Object, Lisp_Object, 4032extern ptrdiff_t fast_string_match_internal (Lisp_Object, Lisp_Object,
4037 Lisp_Object); 4033 Lisp_Object);
4038 4034
diff --git a/src/regex.c b/src/regex.c
index 85e63feea10..b8c6f3f19b2 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -155,7 +155,8 @@
155# define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d)) 155# define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
156/* Strings are 0-indexed, buffers are 1-indexed; we pun on the boolean 156/* Strings are 0-indexed, buffers are 1-indexed; we pun on the boolean
157 result to get the right base index. */ 157 result to get the right base index. */
158# define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object))) 158# define POS_AS_IN_BUFFER(p) \
159 ((p) + (NILP (gl_state.object) || BUFFERP (gl_state.object)))
159 160
160# define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte) 161# define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
161# define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte) 162# define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
@@ -1233,6 +1234,15 @@ static const char *re_error_msgid[] =
1233# undef MATCH_MAY_ALLOCATE 1234# undef MATCH_MAY_ALLOCATE
1234#endif 1235#endif
1235 1236
1237/* While regex matching of a single compiled pattern isn't reentrant
1238 (because we compile regexes to bytecode programs, and the bytecode
1239 programs are self-modifying), the regex machinery must nevertheless
1240 be reentrant with respect to _different_ patterns, and we do that
1241 by avoiding global variables and using MATCH_MAY_ALLOCATE. */
1242#if !defined MATCH_MAY_ALLOCATE && defined emacs
1243# error "Emacs requires MATCH_MAY_ALLOCATE"
1244#endif
1245
1236 1246
1237/* Failure stack declarations and macros; both re_compile_fastmap and 1247/* Failure stack declarations and macros; both re_compile_fastmap and
1238 re_match_2 use a failure stack. These have to be macros because of 1248 re_match_2 use a failure stack. These have to be macros because of
@@ -5895,12 +5905,12 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5895#ifdef emacs 5905#ifdef emacs
5896 ssize_t offset = PTR_TO_OFFSET (d - 1); 5906 ssize_t offset = PTR_TO_OFFSET (d - 1);
5897 ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); 5907 ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
5898 UPDATE_SYNTAX_TABLE_FAST (charpos); 5908 UPDATE_SYNTAX_TABLE (charpos);
5899#endif 5909#endif
5900 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); 5910 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5901 s1 = SYNTAX (c1); 5911 s1 = SYNTAX (c1);
5902#ifdef emacs 5912#ifdef emacs
5903 UPDATE_SYNTAX_TABLE_FORWARD_FAST (charpos + 1); 5913 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
5904#endif 5914#endif
5905 PREFETCH_NOLIMIT (); 5915 PREFETCH_NOLIMIT ();
5906 GET_CHAR_AFTER (c2, d, dummy); 5916 GET_CHAR_AFTER (c2, d, dummy);
@@ -5937,7 +5947,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5937#ifdef emacs 5947#ifdef emacs
5938 ssize_t offset = PTR_TO_OFFSET (d); 5948 ssize_t offset = PTR_TO_OFFSET (d);
5939 ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); 5949 ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
5940 UPDATE_SYNTAX_TABLE_FAST (charpos); 5950 UPDATE_SYNTAX_TABLE (charpos);
5941#endif 5951#endif
5942 PREFETCH (); 5952 PREFETCH ();
5943 GET_CHAR_AFTER (c2, d, dummy); 5953 GET_CHAR_AFTER (c2, d, dummy);
@@ -5982,7 +5992,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5982#ifdef emacs 5992#ifdef emacs
5983 ssize_t offset = PTR_TO_OFFSET (d) - 1; 5993 ssize_t offset = PTR_TO_OFFSET (d) - 1;
5984 ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); 5994 ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
5985 UPDATE_SYNTAX_TABLE_FAST (charpos); 5995 UPDATE_SYNTAX_TABLE (charpos);
5986#endif 5996#endif
5987 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); 5997 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5988 s1 = SYNTAX (c1); 5998 s1 = SYNTAX (c1);
@@ -5997,7 +6007,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
5997 PREFETCH_NOLIMIT (); 6007 PREFETCH_NOLIMIT ();
5998 GET_CHAR_AFTER (c2, d, dummy); 6008 GET_CHAR_AFTER (c2, d, dummy);
5999#ifdef emacs 6009#ifdef emacs
6000 UPDATE_SYNTAX_TABLE_FORWARD_FAST (charpos); 6010 UPDATE_SYNTAX_TABLE_FORWARD (charpos);
6001#endif 6011#endif
6002 s2 = SYNTAX (c2); 6012 s2 = SYNTAX (c2);
6003 6013
@@ -6026,7 +6036,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
6026#ifdef emacs 6036#ifdef emacs
6027 ssize_t offset = PTR_TO_OFFSET (d); 6037 ssize_t offset = PTR_TO_OFFSET (d);
6028 ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); 6038 ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
6029 UPDATE_SYNTAX_TABLE_FAST (charpos); 6039 UPDATE_SYNTAX_TABLE (charpos);
6030#endif 6040#endif
6031 PREFETCH (); 6041 PREFETCH ();
6032 c2 = RE_STRING_CHAR (d, target_multibyte); 6042 c2 = RE_STRING_CHAR (d, target_multibyte);
@@ -6069,7 +6079,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
6069#ifdef emacs 6079#ifdef emacs
6070 ssize_t offset = PTR_TO_OFFSET (d) - 1; 6080 ssize_t offset = PTR_TO_OFFSET (d) - 1;
6071 ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); 6081 ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
6072 UPDATE_SYNTAX_TABLE_FAST (charpos); 6082 UPDATE_SYNTAX_TABLE (charpos);
6073#endif 6083#endif
6074 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); 6084 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
6075 s1 = SYNTAX (c1); 6085 s1 = SYNTAX (c1);
@@ -6084,7 +6094,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
6084 PREFETCH_NOLIMIT (); 6094 PREFETCH_NOLIMIT ();
6085 c2 = RE_STRING_CHAR (d, target_multibyte); 6095 c2 = RE_STRING_CHAR (d, target_multibyte);
6086#ifdef emacs 6096#ifdef emacs
6087 UPDATE_SYNTAX_TABLE_FORWARD_FAST (charpos + 1); 6097 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
6088#endif 6098#endif
6089 s2 = SYNTAX (c2); 6099 s2 = SYNTAX (c2);
6090 6100
@@ -6107,7 +6117,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
6107 { 6117 {
6108 ssize_t offset = PTR_TO_OFFSET (d); 6118 ssize_t offset = PTR_TO_OFFSET (d);
6109 ssize_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset); 6119 ssize_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
6110 UPDATE_SYNTAX_TABLE_FAST (pos1); 6120 UPDATE_SYNTAX_TABLE (pos1);
6111 } 6121 }
6112#endif 6122#endif
6113 { 6123 {
diff --git a/src/regex.h b/src/regex.h
index 082f7e010d8..3a2d74d86a1 100644
--- a/src/regex.h
+++ b/src/regex.h
@@ -181,8 +181,15 @@ typedef unsigned long reg_syntax_t;
181 string; if it's nil, we are matching text in the current buffer; if 181 string; if it's nil, we are matching text in the current buffer; if
182 it's t, we are matching text in a C string. 182 it's t, we are matching text in a C string.
183 183
184 This is defined as a macro in thread.h, which see. */ 184 This value is effectively another parameter to re_search_2 and
185/* extern Lisp_Object re_match_object; */ 185 re_match_2. No calls into Lisp or thread switches are allowed
186 before setting re_match_object and calling into the regex search
187 and match functions. These functions capture the current value of
188 re_match_object into gl_state on entry.
189
190 TODO: once we get rid of the !emacs case in this code, turn into an
191 actual function parameter. */
192extern Lisp_Object re_match_object;
186#endif 193#endif
187 194
188/* Roughly the maximum number of failure points on the stack. */ 195/* Roughly the maximum number of failure points on the stack. */
diff --git a/src/search.c b/src/search.c
index a21c01ca4b4..ccdb659776d 100644
--- a/src/search.c
+++ b/src/search.c
@@ -48,6 +48,8 @@ struct regexp_cache
48 char fastmap[0400]; 48 char fastmap[0400];
49 /* True means regexp was compiled to do full POSIX backtracking. */ 49 /* True means regexp was compiled to do full POSIX backtracking. */
50 bool posix; 50 bool posix;
51 /* True means we're inside a buffer match. */
52 bool busy;
51}; 53};
52 54
53/* The instances of that struct. */ 55/* The instances of that struct. */
@@ -93,6 +95,8 @@ static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t,
93 ptrdiff_t, ptrdiff_t, EMACS_INT, int, 95 ptrdiff_t, ptrdiff_t, EMACS_INT, int,
94 Lisp_Object, Lisp_Object, bool); 96 Lisp_Object, Lisp_Object, bool);
95 97
98Lisp_Object re_match_object;
99
96static _Noreturn void 100static _Noreturn void
97matcher_overflow (void) 101matcher_overflow (void)
98{ 102{
@@ -110,14 +114,6 @@ freeze_buffer_relocation (void)
110#endif 114#endif
111} 115}
112 116
113static void
114thaw_buffer_relocation (void)
115{
116#ifdef REL_ALLOC
117 unbind_to (SPECPDL_INDEX () - 1, Qnil);
118#endif
119}
120
121/* Compile a regexp and signal a Lisp error if anything goes wrong. 117/* Compile a regexp and signal a Lisp error if anything goes wrong.
122 PATTERN is the pattern to compile. 118 PATTERN is the pattern to compile.
123 CP is the place to put the result. 119 CP is the place to put the result.
@@ -134,6 +130,7 @@ compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
134 const char *whitespace_regexp; 130 const char *whitespace_regexp;
135 char *val; 131 char *val;
136 132
133 eassert (!cp->busy);
137 cp->regexp = Qnil; 134 cp->regexp = Qnil;
138 cp->buf.translate = (! NILP (translate) ? translate : make_number (0)); 135 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
139 cp->posix = posix; 136 cp->posix = posix;
@@ -170,10 +167,11 @@ shrink_regexp_cache (void)
170 struct regexp_cache *cp; 167 struct regexp_cache *cp;
171 168
172 for (cp = searchbuf_head; cp != 0; cp = cp->next) 169 for (cp = searchbuf_head; cp != 0; cp = cp->next)
173 { 170 if (!cp->busy)
174 cp->buf.allocated = cp->buf.used; 171 {
175 cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used); 172 cp->buf.allocated = cp->buf.used;
176 } 173 cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
174 }
177} 175}
178 176
179/* Clear the regexp cache w.r.t. a particular syntax table, 177/* Clear the regexp cache w.r.t. a particular syntax table,
@@ -190,10 +188,25 @@ clear_regexp_cache (void)
190 /* It's tempting to compare with the syntax-table we've actually changed, 188 /* It's tempting to compare with the syntax-table we've actually changed,
191 but it's not sufficient because char-table inheritance means that 189 but it's not sufficient because char-table inheritance means that
192 modifying one syntax-table can change others at the same time. */ 190 modifying one syntax-table can change others at the same time. */
193 if (!EQ (searchbufs[i].syntax_table, Qt)) 191 if (!searchbufs[i].busy && !EQ (searchbufs[i].syntax_table, Qt))
194 searchbufs[i].regexp = Qnil; 192 searchbufs[i].regexp = Qnil;
195} 193}
196 194
195static void
196unfreeze_pattern (void *arg)
197{
198 struct regexp_cache *searchbuf = arg;
199 searchbuf->busy = false;
200}
201
202static void
203freeze_pattern (struct regexp_cache *searchbuf)
204{
205 eassert (!searchbuf->busy);
206 record_unwind_protect_ptr (unfreeze_pattern, searchbuf);
207 searchbuf->busy = true;
208}
209
197/* Compile a regexp if necessary, but first check to see if there's one in 210/* Compile a regexp if necessary, but first check to see if there's one in
198 the cache. 211 the cache.
199 PATTERN is the pattern to compile. 212 PATTERN is the pattern to compile.
@@ -205,7 +218,7 @@ clear_regexp_cache (void)
205 POSIX is true if we want full backtracking (POSIX style) for this pattern. 218 POSIX is true if we want full backtracking (POSIX style) for this pattern.
206 False means backtrack only enough to get a valid match. */ 219 False means backtrack only enough to get a valid match. */
207 220
208struct re_pattern_buffer * 221static struct regexp_cache *
209compile_pattern (Lisp_Object pattern, struct re_registers *regp, 222compile_pattern (Lisp_Object pattern, struct re_registers *regp,
210 Lisp_Object translate, bool posix, bool multibyte) 223 Lisp_Object translate, bool posix, bool multibyte)
211{ 224{
@@ -222,6 +235,7 @@ compile_pattern (Lisp_Object pattern, struct re_registers *regp,
222 if (NILP (cp->regexp)) 235 if (NILP (cp->regexp))
223 goto compile_it; 236 goto compile_it;
224 if (SCHARS (cp->regexp) == SCHARS (pattern) 237 if (SCHARS (cp->regexp) == SCHARS (pattern)
238 && !cp->busy
225 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern) 239 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
226 && !NILP (Fstring_equal (cp->regexp, pattern)) 240 && !NILP (Fstring_equal (cp->regexp, pattern))
227 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0))) 241 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
@@ -237,7 +251,10 @@ compile_pattern (Lisp_Object pattern, struct re_registers *regp,
237 string value. */ 251 string value. */
238 if (cp->next == 0) 252 if (cp->next == 0)
239 { 253 {
254 if (cp->busy)
255 error ("Too much matching reentrancy");
240 compile_it: 256 compile_it:
257 eassert (!cp->busy);
241 compile_pattern_1 (cp, pattern, translate, posix); 258 compile_pattern_1 (cp, pattern, translate, posix);
242 break; 259 break;
243 } 260 }
@@ -258,8 +275,7 @@ compile_pattern (Lisp_Object pattern, struct re_registers *regp,
258 /* The compiled pattern can be used both for multibyte and unibyte 275 /* The compiled pattern can be used both for multibyte and unibyte
259 target. But, we have to tell which the pattern is used for. */ 276 target. But, we have to tell which the pattern is used for. */
260 cp->buf.target_multibyte = multibyte; 277 cp->buf.target_multibyte = multibyte;
261 278 return cp;
262 return &cp->buf;
263} 279}
264 280
265 281
@@ -270,7 +286,6 @@ looking_at_1 (Lisp_Object string, bool posix)
270 unsigned char *p1, *p2; 286 unsigned char *p1, *p2;
271 ptrdiff_t s1, s2; 287 ptrdiff_t s1, s2;
272 register ptrdiff_t i; 288 register ptrdiff_t i;
273 struct re_pattern_buffer *bufp;
274 289
275 if (running_asynch_code) 290 if (running_asynch_code)
276 save_search_regs (); 291 save_search_regs ();
@@ -280,13 +295,17 @@ looking_at_1 (Lisp_Object string, bool posix)
280 BVAR (current_buffer, case_eqv_table)); 295 BVAR (current_buffer, case_eqv_table));
281 296
282 CHECK_STRING (string); 297 CHECK_STRING (string);
283 bufp = compile_pattern (string, 298
284 (NILP (Vinhibit_changing_match_data) 299 /* Snapshot in case Lisp changes the value. */
285 ? &search_regs : NULL), 300 bool preserve_match_data = NILP (Vinhibit_changing_match_data);
286 (!NILP (BVAR (current_buffer, case_fold_search)) 301
287 ? BVAR (current_buffer, case_canon_table) : Qnil), 302 struct regexp_cache *cache_entry = compile_pattern (
288 posix, 303 string,
289 !NILP (BVAR (current_buffer, enable_multibyte_characters))); 304 preserve_match_data ? &search_regs : NULL,
305 (!NILP (BVAR (current_buffer, case_fold_search))
306 ? BVAR (current_buffer, case_canon_table) : Qnil),
307 posix,
308 !NILP (BVAR (current_buffer, enable_multibyte_characters)));
290 309
291 /* Do a pending quit right away, to avoid paradoxical behavior */ 310 /* Do a pending quit right away, to avoid paradoxical behavior */
292 maybe_quit (); 311 maybe_quit ();
@@ -310,21 +329,20 @@ looking_at_1 (Lisp_Object string, bool posix)
310 s2 = 0; 329 s2 = 0;
311 } 330 }
312 331
313 re_match_object = Qnil; 332 ptrdiff_t count = SPECPDL_INDEX ();
314
315 freeze_buffer_relocation (); 333 freeze_buffer_relocation ();
316 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2, 334 freeze_pattern (cache_entry);
335 re_match_object = Qnil;
336 i = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2,
317 PT_BYTE - BEGV_BYTE, 337 PT_BYTE - BEGV_BYTE,
318 (NILP (Vinhibit_changing_match_data) 338 preserve_match_data ? &search_regs : NULL,
319 ? &search_regs : NULL),
320 ZV_BYTE - BEGV_BYTE); 339 ZV_BYTE - BEGV_BYTE);
321 thaw_buffer_relocation ();
322 340
323 if (i == -2) 341 if (i == -2)
324 matcher_overflow (); 342 matcher_overflow ();
325 343
326 val = (i >= 0 ? Qt : Qnil); 344 val = (i >= 0 ? Qt : Qnil);
327 if (NILP (Vinhibit_changing_match_data) && i >= 0) 345 if (preserve_match_data && i >= 0)
328 { 346 {
329 for (i = 0; i < search_regs.num_regs; i++) 347 for (i = 0; i < search_regs.num_regs; i++)
330 if (search_regs.start[i] >= 0) 348 if (search_regs.start[i] >= 0)
@@ -338,7 +356,7 @@ looking_at_1 (Lisp_Object string, bool posix)
338 XSETBUFFER (last_thing_searched, current_buffer); 356 XSETBUFFER (last_thing_searched, current_buffer);
339 } 357 }
340 358
341 return val; 359 return unbind_to (count, val);
342} 360}
343 361
344DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0, 362DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
@@ -396,15 +414,14 @@ string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
396 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2, 414 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
397 BVAR (current_buffer, case_eqv_table)); 415 BVAR (current_buffer, case_eqv_table));
398 416
399 bufp = compile_pattern (regexp, 417 bufp = &compile_pattern (regexp,
400 (NILP (Vinhibit_changing_match_data) 418 (NILP (Vinhibit_changing_match_data)
401 ? &search_regs : NULL), 419 ? &search_regs : NULL),
402 (!NILP (BVAR (current_buffer, case_fold_search)) 420 (!NILP (BVAR (current_buffer, case_fold_search))
403 ? BVAR (current_buffer, case_canon_table) : Qnil), 421 ? BVAR (current_buffer, case_canon_table) : Qnil),
404 posix, 422 posix,
405 STRING_MULTIBYTE (string)); 423 STRING_MULTIBYTE (string))->buf;
406 re_match_object = string; 424 re_match_object = string;
407
408 val = re_search (bufp, SSDATA (string), 425 val = re_search (bufp, SSDATA (string),
409 SBYTES (string), pos_byte, 426 SBYTES (string), pos_byte,
410 SBYTES (string) - pos_byte, 427 SBYTES (string) - pos_byte,
@@ -471,10 +488,9 @@ fast_string_match_internal (Lisp_Object regexp, Lisp_Object string,
471 ptrdiff_t val; 488 ptrdiff_t val;
472 struct re_pattern_buffer *bufp; 489 struct re_pattern_buffer *bufp;
473 490
474 bufp = compile_pattern (regexp, 0, table, 491 bufp = &compile_pattern (regexp, 0, table,
475 0, STRING_MULTIBYTE (string)); 492 0, STRING_MULTIBYTE (string))->buf;
476 re_match_object = string; 493 re_match_object = string;
477
478 val = re_search (bufp, SSDATA (string), 494 val = re_search (bufp, SSDATA (string),
479 SBYTES (string), 0, 495 SBYTES (string), 0,
480 SBYTES (string), 0); 496 SBYTES (string), 0);
@@ -494,10 +510,10 @@ fast_c_string_match_ignore_case (Lisp_Object regexp,
494 struct re_pattern_buffer *bufp; 510 struct re_pattern_buffer *bufp;
495 511
496 regexp = string_make_unibyte (regexp); 512 regexp = string_make_unibyte (regexp);
513 bufp = &compile_pattern (regexp, 0,
514 Vascii_canon_table, 0,
515 0)->buf;
497 re_match_object = Qt; 516 re_match_object = Qt;
498 bufp = compile_pattern (regexp, 0,
499 Vascii_canon_table, 0,
500 0);
501 val = re_search (bufp, string, len, 0, len, 0); 517 val = re_search (bufp, string, len, 0, len, 0);
502 return val; 518 return val;
503} 519}
@@ -513,7 +529,6 @@ fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
513 ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string) 529 ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
514{ 530{
515 bool multibyte; 531 bool multibyte;
516 struct re_pattern_buffer *buf;
517 unsigned char *p1, *p2; 532 unsigned char *p1, *p2;
518 ptrdiff_t s1, s2; 533 ptrdiff_t s1, s2;
519 ptrdiff_t len; 534 ptrdiff_t len;
@@ -528,7 +543,6 @@ fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
528 s1 = 0; 543 s1 = 0;
529 p2 = SDATA (string); 544 p2 = SDATA (string);
530 s2 = SBYTES (string); 545 s2 = SBYTES (string);
531 re_match_object = string;
532 multibyte = STRING_MULTIBYTE (string); 546 multibyte = STRING_MULTIBYTE (string);
533 } 547 }
534 else 548 else
@@ -554,16 +568,19 @@ fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
554 s1 = ZV_BYTE - BEGV_BYTE; 568 s1 = ZV_BYTE - BEGV_BYTE;
555 s2 = 0; 569 s2 = 0;
556 } 570 }
557 re_match_object = Qnil;
558 multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters)); 571 multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
559 } 572 }
560 573
561 buf = compile_pattern (regexp, 0, Qnil, 0, multibyte); 574 struct regexp_cache *cache_entry =
575 compile_pattern (regexp, 0, Qnil, 0, multibyte);
576 ptrdiff_t count = SPECPDL_INDEX ();
562 freeze_buffer_relocation (); 577 freeze_buffer_relocation ();
563 len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2, 578 freeze_pattern (cache_entry);
579 re_match_object = STRINGP (string) ? string : Qnil;
580 len = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2,
564 pos_byte, NULL, limit_byte); 581 pos_byte, NULL, limit_byte);
565 thaw_buffer_relocation ();
566 582
583 unbind_to (count, Qnil);
567 return len; 584 return len;
568} 585}
569 586
@@ -1151,355 +1168,372 @@ while (0)
1151static struct re_registers search_regs_1; 1168static struct re_registers search_regs_1;
1152 1169
1153static EMACS_INT 1170static EMACS_INT
1154search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, 1171search_buffer_re (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1155 ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n, 1172 ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
1156 int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix) 1173 Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
1157{ 1174{
1158 ptrdiff_t len = SCHARS (string); 1175 unsigned char *p1, *p2;
1159 ptrdiff_t len_byte = SBYTES (string); 1176 ptrdiff_t s1, s2;
1160 register ptrdiff_t i;
1161 1177
1162 if (running_asynch_code) 1178 /* Snapshot in case Lisp changes the value. */
1163 save_search_regs (); 1179 bool preserve_match_data = NILP (Vinhibit_changing_match_data);
1164 1180
1165 /* Searching 0 times means don't move. */ 1181 struct regexp_cache *cache_entry =
1166 /* Null string is found at starting position. */ 1182 compile_pattern (string,
1167 if (len == 0 || n == 0) 1183 preserve_match_data ? &search_regs : &search_regs_1,
1184 trt, posix,
1185 !NILP (BVAR (current_buffer, enable_multibyte_characters)));
1186 struct re_pattern_buffer *bufp = &cache_entry->buf;
1187
1188 maybe_quit (); /* Do a pending quit right away,
1189 to avoid paradoxical behavior */
1190 /* Get pointers and sizes of the two strings
1191 that make up the visible portion of the buffer. */
1192
1193 p1 = BEGV_ADDR;
1194 s1 = GPT_BYTE - BEGV_BYTE;
1195 p2 = GAP_END_ADDR;
1196 s2 = ZV_BYTE - GPT_BYTE;
1197 if (s1 < 0)
1168 { 1198 {
1169 set_search_regs (pos_byte, 0); 1199 p2 = p1;
1170 return pos; 1200 s2 = ZV_BYTE - BEGV_BYTE;
1201 s1 = 0;
1171 } 1202 }
1172 1203 if (s2 < 0)
1173 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1174 { 1204 {
1175 unsigned char *p1, *p2; 1205 s1 = ZV_BYTE - BEGV_BYTE;
1176 ptrdiff_t s1, s2; 1206 s2 = 0;
1177 struct re_pattern_buffer *bufp; 1207 }
1178 1208
1179 bufp = compile_pattern (string, 1209 ptrdiff_t count = SPECPDL_INDEX ();
1180 (NILP (Vinhibit_changing_match_data) 1210 freeze_buffer_relocation ();
1181 ? &search_regs : &search_regs_1), 1211 freeze_pattern (cache_entry);
1182 trt, posix,
1183 !NILP (BVAR (current_buffer, enable_multibyte_characters)));
1184 1212
1185 maybe_quit (); /* Do a pending quit right away, 1213 while (n < 0)
1186 to avoid paradoxical behavior */ 1214 {
1187 /* Get pointers and sizes of the two strings 1215 ptrdiff_t val;
1188 that make up the visible portion of the buffer. */
1189 1216
1190 p1 = BEGV_ADDR;
1191 s1 = GPT_BYTE - BEGV_BYTE;
1192 p2 = GAP_END_ADDR;
1193 s2 = ZV_BYTE - GPT_BYTE;
1194 if (s1 < 0)
1195 {
1196 p2 = p1;
1197 s2 = ZV_BYTE - BEGV_BYTE;
1198 s1 = 0;
1199 }
1200 if (s2 < 0)
1201 {
1202 s1 = ZV_BYTE - BEGV_BYTE;
1203 s2 = 0;
1204 }
1205 re_match_object = Qnil; 1217 re_match_object = Qnil;
1218 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1219 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1220 preserve_match_data ? &search_regs : &search_regs_1,
1221 /* Don't allow match past current point */
1222 pos_byte - BEGV_BYTE);
1223 if (val == -2)
1224 {
1225 matcher_overflow ();
1226 }
1227 if (val >= 0)
1228 {
1229 if (preserve_match_data)
1230 {
1231 pos_byte = search_regs.start[0] + BEGV_BYTE;
1232 for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
1233 if (search_regs.start[i] >= 0)
1234 {
1235 search_regs.start[i]
1236 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1237 search_regs.end[i]
1238 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1239 }
1240 XSETBUFFER (last_thing_searched, current_buffer);
1241 /* Set pos to the new position. */
1242 pos = search_regs.start[0];
1243 }
1244 else
1245 {
1246 pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1247 /* Set pos to the new position. */
1248 pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1249 }
1250 }
1251 else
1252 {
1253 unbind_to (count, Qnil);
1254 return (n);
1255 }
1256 n++;
1257 maybe_quit ();
1258 }
1259 while (n > 0)
1260 {
1261 ptrdiff_t val;
1206 1262
1207 freeze_buffer_relocation (); 1263 re_match_object = Qnil;
1264 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1265 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1266 preserve_match_data ? &search_regs : &search_regs_1,
1267 lim_byte - BEGV_BYTE);
1268 if (val == -2)
1269 {
1270 matcher_overflow ();
1271 }
1272 if (val >= 0)
1273 {
1274 if (preserve_match_data)
1275 {
1276 pos_byte = search_regs.end[0] + BEGV_BYTE;
1277 for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
1278 if (search_regs.start[i] >= 0)
1279 {
1280 search_regs.start[i]
1281 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1282 search_regs.end[i]
1283 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1284 }
1285 XSETBUFFER (last_thing_searched, current_buffer);
1286 pos = search_regs.end[0];
1287 }
1288 else
1289 {
1290 pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1291 pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1292 }
1293 }
1294 else
1295 {
1296 unbind_to (count, Qnil);
1297 return (0 - n);
1298 }
1299 n--;
1300 maybe_quit ();
1301 }
1302 unbind_to (count, Qnil);
1303 return (pos);
1304}
1208 1305
1209 while (n < 0) 1306static EMACS_INT
1210 { 1307search_buffer_non_re (Lisp_Object string, ptrdiff_t pos,
1211 ptrdiff_t val; 1308 ptrdiff_t pos_byte, ptrdiff_t lim, ptrdiff_t lim_byte,
1212 1309 EMACS_INT n, int RE, Lisp_Object trt, Lisp_Object inverse_trt,
1213 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2, 1310 bool posix)
1214 pos_byte - BEGV_BYTE, lim_byte - pos_byte, 1311{
1215 (NILP (Vinhibit_changing_match_data) 1312 unsigned char *raw_pattern, *pat;
1216 ? &search_regs : &search_regs_1), 1313 ptrdiff_t raw_pattern_size;
1217 /* Don't allow match past current point */ 1314 ptrdiff_t raw_pattern_size_byte;
1218 pos_byte - BEGV_BYTE); 1315 unsigned char *patbuf;
1219 if (val == -2) 1316 bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
1220 { 1317 unsigned char *base_pat;
1221 matcher_overflow (); 1318 /* Set to positive if we find a non-ASCII char that need
1222 } 1319 translation. Otherwise set to zero later. */
1223 if (val >= 0) 1320 int char_base = -1;
1224 { 1321 bool boyer_moore_ok = 1;
1225 if (NILP (Vinhibit_changing_match_data)) 1322 USE_SAFE_ALLOCA;
1226 { 1323
1227 pos_byte = search_regs.start[0] + BEGV_BYTE; 1324 /* MULTIBYTE says whether the text to be searched is multibyte.
1228 for (i = 0; i < search_regs.num_regs; i++) 1325 We must convert PATTERN to match that, or we will not really
1229 if (search_regs.start[i] >= 0) 1326 find things right. */
1230 { 1327
1231 search_regs.start[i] 1328 if (multibyte == STRING_MULTIBYTE (string))
1232 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE); 1329 {
1233 search_regs.end[i] 1330 raw_pattern = SDATA (string);
1234 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE); 1331 raw_pattern_size = SCHARS (string);
1235 } 1332 raw_pattern_size_byte = SBYTES (string);
1236 XSETBUFFER (last_thing_searched, current_buffer);
1237 /* Set pos to the new position. */
1238 pos = search_regs.start[0];
1239 }
1240 else
1241 {
1242 pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1243 /* Set pos to the new position. */
1244 pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1245 }
1246 }
1247 else
1248 {
1249 thaw_buffer_relocation ();
1250 return (n);
1251 }
1252 n++;
1253 maybe_quit ();
1254 }
1255 while (n > 0)
1256 {
1257 ptrdiff_t val;
1258
1259 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1260 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1261 (NILP (Vinhibit_changing_match_data)
1262 ? &search_regs : &search_regs_1),
1263 lim_byte - BEGV_BYTE);
1264 if (val == -2)
1265 {
1266 matcher_overflow ();
1267 }
1268 if (val >= 0)
1269 {
1270 if (NILP (Vinhibit_changing_match_data))
1271 {
1272 pos_byte = search_regs.end[0] + BEGV_BYTE;
1273 for (i = 0; i < search_regs.num_regs; i++)
1274 if (search_regs.start[i] >= 0)
1275 {
1276 search_regs.start[i]
1277 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1278 search_regs.end[i]
1279 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1280 }
1281 XSETBUFFER (last_thing_searched, current_buffer);
1282 pos = search_regs.end[0];
1283 }
1284 else
1285 {
1286 pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1287 pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1288 }
1289 }
1290 else
1291 {
1292 thaw_buffer_relocation ();
1293 return (0 - n);
1294 }
1295 n--;
1296 maybe_quit ();
1297 }
1298 thaw_buffer_relocation ();
1299 return (pos);
1300 } 1333 }
1301 else /* non-RE case */ 1334 else if (multibyte)
1302 { 1335 {
1303 unsigned char *raw_pattern, *pat; 1336 raw_pattern_size = SCHARS (string);
1304 ptrdiff_t raw_pattern_size; 1337 raw_pattern_size_byte
1305 ptrdiff_t raw_pattern_size_byte; 1338 = count_size_as_multibyte (SDATA (string),
1306 unsigned char *patbuf; 1339 raw_pattern_size);
1307 bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters)); 1340 raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1);
1308 unsigned char *base_pat; 1341 copy_text (SDATA (string), raw_pattern,
1309 /* Set to positive if we find a non-ASCII char that need 1342 SCHARS (string), 0, 1);
1310 translation. Otherwise set to zero later. */ 1343 }
1311 int char_base = -1; 1344 else
1312 bool boyer_moore_ok = 1; 1345 {
1313 USE_SAFE_ALLOCA; 1346 /* Converting multibyte to single-byte.
1314 1347
1315 /* MULTIBYTE says whether the text to be searched is multibyte. 1348 ??? Perhaps this conversion should be done in a special way
1316 We must convert PATTERN to match that, or we will not really 1349 by subtracting nonascii-insert-offset from each non-ASCII char,
1317 find things right. */ 1350 so that only the multibyte chars which really correspond to
1318 1351 the chosen single-byte character set can possibly match. */
1319 if (multibyte == STRING_MULTIBYTE (string)) 1352 raw_pattern_size = SCHARS (string);
1320 { 1353 raw_pattern_size_byte = SCHARS (string);
1321 raw_pattern = SDATA (string); 1354 raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1);
1322 raw_pattern_size = SCHARS (string); 1355 copy_text (SDATA (string), raw_pattern,
1323 raw_pattern_size_byte = SBYTES (string); 1356 SBYTES (string), 1, 0);
1324 } 1357 }
1325 else if (multibyte)
1326 {
1327 raw_pattern_size = SCHARS (string);
1328 raw_pattern_size_byte
1329 = count_size_as_multibyte (SDATA (string),
1330 raw_pattern_size);
1331 raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1);
1332 copy_text (SDATA (string), raw_pattern,
1333 SCHARS (string), 0, 1);
1334 }
1335 else
1336 {
1337 /* Converting multibyte to single-byte.
1338
1339 ??? Perhaps this conversion should be done in a special way
1340 by subtracting nonascii-insert-offset from each non-ASCII char,
1341 so that only the multibyte chars which really correspond to
1342 the chosen single-byte character set can possibly match. */
1343 raw_pattern_size = SCHARS (string);
1344 raw_pattern_size_byte = SCHARS (string);
1345 raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1);
1346 copy_text (SDATA (string), raw_pattern,
1347 SBYTES (string), 1, 0);
1348 }
1349 1358
1350 /* Copy and optionally translate the pattern. */ 1359 /* Copy and optionally translate the pattern. */
1351 len = raw_pattern_size; 1360 ptrdiff_t len = raw_pattern_size;
1352 len_byte = raw_pattern_size_byte; 1361 ptrdiff_t len_byte = raw_pattern_size_byte;
1353 SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len); 1362 SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len);
1354 pat = patbuf; 1363 pat = patbuf;
1355 base_pat = raw_pattern; 1364 base_pat = raw_pattern;
1356 if (multibyte) 1365 if (multibyte)
1357 { 1366 {
1358 /* Fill patbuf by translated characters in STRING while 1367 /* Fill patbuf by translated characters in STRING while
1359 checking if we can use boyer-moore search. If TRT is 1368 checking if we can use boyer-moore search. If TRT is
1360 non-nil, we can use boyer-moore search only if TRT can be 1369 non-nil, we can use boyer-moore search only if TRT can be
1361 represented by the byte array of 256 elements. For that, 1370 represented by the byte array of 256 elements. For that,
1362 all non-ASCII case-equivalents of all case-sensitive 1371 all non-ASCII case-equivalents of all case-sensitive
1363 characters in STRING must belong to the same character 1372 characters in STRING must belong to the same character
1364 group (two characters belong to the same group iff their 1373 group (two characters belong to the same group iff their
1365 multibyte forms are the same except for the last byte; 1374 multibyte forms are the same except for the last byte;
1366 i.e. every 64 characters form a group; U+0000..U+003F, 1375 i.e. every 64 characters form a group; U+0000..U+003F,
1367 U+0040..U+007F, U+0080..U+00BF, ...). */ 1376 U+0040..U+007F, U+0080..U+00BF, ...). */
1368 1377
1369 while (--len >= 0) 1378 while (--len >= 0)
1370 { 1379 {
1371 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str; 1380 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1372 int c, translated, inverse; 1381 int c, translated, inverse;
1373 int in_charlen, charlen; 1382 int in_charlen, charlen;
1374 1383
1375 /* If we got here and the RE flag is set, it's because we're 1384 /* If we got here and the RE flag is set, it's because we're
1376 dealing with a regexp known to be trivial, so the backslash 1385 dealing with a regexp known to be trivial, so the backslash
1377 just quotes the next character. */ 1386 just quotes the next character. */
1378 if (RE && *base_pat == '\\') 1387 if (RE && *base_pat == '\\')
1379 { 1388 {
1380 len--; 1389 len--;
1381 raw_pattern_size--; 1390 raw_pattern_size--;
1382 len_byte--; 1391 len_byte--;
1383 base_pat++; 1392 base_pat++;
1384 } 1393 }
1385 1394
1386 c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen); 1395 c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
1387 1396
1388 if (NILP (trt)) 1397 if (NILP (trt))
1389 { 1398 {
1390 str = base_pat; 1399 str = base_pat;
1391 charlen = in_charlen; 1400 charlen = in_charlen;
1392 } 1401 }
1393 else 1402 else
1394 { 1403 {
1395 /* Translate the character. */ 1404 /* Translate the character. */
1396 TRANSLATE (translated, trt, c); 1405 TRANSLATE (translated, trt, c);
1397 charlen = CHAR_STRING (translated, str_base); 1406 charlen = CHAR_STRING (translated, str_base);
1398 str = str_base; 1407 str = str_base;
1399 1408
1400 /* Check if C has any other case-equivalents. */ 1409 /* Check if C has any other case-equivalents. */
1401 TRANSLATE (inverse, inverse_trt, c); 1410 TRANSLATE (inverse, inverse_trt, c);
1402 /* If so, check if we can use boyer-moore. */ 1411 /* If so, check if we can use boyer-moore. */
1403 if (c != inverse && boyer_moore_ok) 1412 if (c != inverse && boyer_moore_ok)
1404 { 1413 {
1405 /* Check if all equivalents belong to the same 1414 /* Check if all equivalents belong to the same
1406 group of characters. Note that the check of C 1415 group of characters. Note that the check of C
1407 itself is done by the last iteration. */ 1416 itself is done by the last iteration. */
1408 int this_char_base = -1; 1417 int this_char_base = -1;
1418
1419 while (boyer_moore_ok)
1420 {
1421 if (ASCII_CHAR_P (inverse))
1422 {
1423 if (this_char_base > 0)
1424 boyer_moore_ok = 0;
1425 else
1426 this_char_base = 0;
1427 }
1428 else if (CHAR_BYTE8_P (inverse))
1429 /* Boyer-moore search can't handle a
1430 translation of an eight-bit
1431 character. */
1432 boyer_moore_ok = 0;
1433 else if (this_char_base < 0)
1434 {
1435 this_char_base = inverse & ~0x3F;
1436 if (char_base < 0)
1437 char_base = this_char_base;
1438 else if (this_char_base != char_base)
1439 boyer_moore_ok = 0;
1440 }
1441 else if ((inverse & ~0x3F) != this_char_base)
1442 boyer_moore_ok = 0;
1443 if (c == inverse)
1444 break;
1445 TRANSLATE (inverse, inverse_trt, inverse);
1446 }
1447 }
1448 }
1409 1449
1410 while (boyer_moore_ok) 1450 /* Store this character into the translated pattern. */
1411 { 1451 memcpy (pat, str, charlen);
1412 if (ASCII_CHAR_P (inverse)) 1452 pat += charlen;
1413 { 1453 base_pat += in_charlen;
1414 if (this_char_base > 0) 1454 len_byte -= in_charlen;
1415 boyer_moore_ok = 0; 1455 }
1416 else
1417 this_char_base = 0;
1418 }
1419 else if (CHAR_BYTE8_P (inverse))
1420 /* Boyer-moore search can't handle a
1421 translation of an eight-bit
1422 character. */
1423 boyer_moore_ok = 0;
1424 else if (this_char_base < 0)
1425 {
1426 this_char_base = inverse & ~0x3F;
1427 if (char_base < 0)
1428 char_base = this_char_base;
1429 else if (this_char_base != char_base)
1430 boyer_moore_ok = 0;
1431 }
1432 else if ((inverse & ~0x3F) != this_char_base)
1433 boyer_moore_ok = 0;
1434 if (c == inverse)
1435 break;
1436 TRANSLATE (inverse, inverse_trt, inverse);
1437 }
1438 }
1439 }
1440 1456
1441 /* Store this character into the translated pattern. */ 1457 /* If char_base is still negative we didn't find any translated
1442 memcpy (pat, str, charlen); 1458 non-ASCII characters. */
1443 pat += charlen; 1459 if (char_base < 0)
1444 base_pat += in_charlen; 1460 char_base = 0;
1445 len_byte -= in_charlen; 1461 }
1446 } 1462 else
1463 {
1464 /* Unibyte buffer. */
1465 char_base = 0;
1466 while (--len >= 0)
1467 {
1468 int c, translated, inverse;
1447 1469
1448 /* If char_base is still negative we didn't find any translated 1470 /* If we got here and the RE flag is set, it's because we're
1449 non-ASCII characters. */ 1471 dealing with a regexp known to be trivial, so the backslash
1450 if (char_base < 0) 1472 just quotes the next character. */
1451 char_base = 0; 1473 if (RE && *base_pat == '\\')
1452 } 1474 {
1453 else 1475 len--;
1454 { 1476 raw_pattern_size--;
1455 /* Unibyte buffer. */ 1477 base_pat++;
1456 char_base = 0; 1478 }
1457 while (--len >= 0) 1479 c = *base_pat++;
1458 { 1480 TRANSLATE (translated, trt, c);
1459 int c, translated, inverse; 1481 *pat++ = translated;
1482 /* Check that none of C's equivalents violates the
1483 assumptions of boyer_moore. */
1484 TRANSLATE (inverse, inverse_trt, c);
1485 while (1)
1486 {
1487 if (inverse >= 0200)
1488 {
1489 boyer_moore_ok = 0;
1490 break;
1491 }
1492 if (c == inverse)
1493 break;
1494 TRANSLATE (inverse, inverse_trt, inverse);
1495 }
1496 }
1497 }
1460 1498
1461 /* If we got here and the RE flag is set, it's because we're 1499 len_byte = pat - patbuf;
1462 dealing with a regexp known to be trivial, so the backslash 1500 pat = base_pat = patbuf;
1463 just quotes the next character. */ 1501
1464 if (RE && *base_pat == '\\') 1502 EMACS_INT result
1465 { 1503 = (boyer_moore_ok
1466 len--; 1504 ? boyer_moore (n, pat, len_byte, trt, inverse_trt,
1467 raw_pattern_size--; 1505 pos_byte, lim_byte,
1468 base_pat++; 1506 char_base)
1469 } 1507 : simple_search (n, pat, raw_pattern_size, len_byte, trt,
1470 c = *base_pat++; 1508 pos, pos_byte, lim, lim_byte));
1471 TRANSLATE (translated, trt, c); 1509 SAFE_FREE ();
1472 *pat++ = translated; 1510 return result;
1473 /* Check that none of C's equivalents violates the 1511}
1474 assumptions of boyer_moore. */ 1512
1475 TRANSLATE (inverse, inverse_trt, c); 1513static EMACS_INT
1476 while (1) 1514search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1477 { 1515 ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
1478 if (inverse >= 0200) 1516 int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
1479 { 1517{
1480 boyer_moore_ok = 0; 1518 if (running_asynch_code)
1481 break; 1519 save_search_regs ();
1482 }
1483 if (c == inverse)
1484 break;
1485 TRANSLATE (inverse, inverse_trt, inverse);
1486 }
1487 }
1488 }
1489 1520
1490 len_byte = pat - patbuf; 1521 /* Searching 0 times means don't move. */
1491 pat = base_pat = patbuf; 1522 /* Null string is found at starting position. */
1492 1523 if (n == 0 || SCHARS (string) == 0)
1493 EMACS_INT result 1524 {
1494 = (boyer_moore_ok 1525 set_search_regs (pos_byte, 0);
1495 ? boyer_moore (n, pat, len_byte, trt, inverse_trt, 1526 return pos;
1496 pos_byte, lim_byte,
1497 char_base)
1498 : simple_search (n, pat, raw_pattern_size, len_byte, trt,
1499 pos, pos_byte, lim, lim_byte));
1500 SAFE_FREE ();
1501 return result;
1502 } 1527 }
1528
1529 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1530 pos = search_buffer_re (string, pos, pos_byte, lim, lim_byte,
1531 n, trt, inverse_trt, posix);
1532 else
1533 pos = search_buffer_non_re (string, pos, pos_byte, lim, lim_byte,
1534 n, RE, trt, inverse_trt, posix);
1535
1536 return pos;
1503} 1537}
1504 1538
1505/* Do a simple string search N times for the string PAT, 1539/* Do a simple string search N times for the string PAT,
@@ -3353,6 +3387,7 @@ the buffer. If the buffer doesn't have a cache, the value is nil. */)
3353 return val; 3387 return val;
3354} 3388}
3355 3389
3390
3356void 3391void
3357syms_of_search (void) 3392syms_of_search (void)
3358{ 3393{
@@ -3365,6 +3400,7 @@ syms_of_search (void)
3365 searchbufs[i].buf.fastmap = searchbufs[i].fastmap; 3400 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3366 searchbufs[i].regexp = Qnil; 3401 searchbufs[i].regexp = Qnil;
3367 searchbufs[i].f_whitespace_regexp = Qnil; 3402 searchbufs[i].f_whitespace_regexp = Qnil;
3403 searchbufs[i].busy = false;
3368 searchbufs[i].syntax_table = Qnil; 3404 searchbufs[i].syntax_table = Qnil;
3369 staticpro (&searchbufs[i].regexp); 3405 staticpro (&searchbufs[i].regexp);
3370 staticpro (&searchbufs[i].f_whitespace_regexp); 3406 staticpro (&searchbufs[i].f_whitespace_regexp);
@@ -3405,6 +3441,9 @@ syms_of_search (void)
3405 saved_last_thing_searched = Qnil; 3441 saved_last_thing_searched = Qnil;
3406 staticpro (&saved_last_thing_searched); 3442 staticpro (&saved_last_thing_searched);
3407 3443
3444 re_match_object = Qnil;
3445 staticpro (&re_match_object);
3446
3408 DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp, 3447 DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
3409 doc: /* Regexp to substitute for bunches of spaces in regexp search. 3448 doc: /* Regexp to substitute for bunches of spaces in regexp search.
3410Some commands use this for user-specified regexps. 3449Some commands use this for user-specified regexps.
diff --git a/src/syntax.h b/src/syntax.h
index 2171cbbba45..f02a17ce8d6 100644
--- a/src/syntax.h
+++ b/src/syntax.h
@@ -186,13 +186,6 @@ UPDATE_SYNTAX_TABLE_FORWARD (ptrdiff_t charpos)
186 false, gl_state.object); 186 false, gl_state.object);
187} 187}
188 188
189INLINE void
190UPDATE_SYNTAX_TABLE_FORWARD_FAST (ptrdiff_t charpos)
191{
192 if (parse_sexp_lookup_properties && charpos >= gl_state.e_property)
193 update_syntax_table (charpos + gl_state.offset, 1, false, gl_state.object);
194}
195
196/* Make syntax table state (gl_state) good for CHARPOS, assuming it is 189/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
197 currently good for a position after CHARPOS. */ 190 currently good for a position after CHARPOS. */
198 191
@@ -212,13 +205,6 @@ UPDATE_SYNTAX_TABLE (ptrdiff_t charpos)
212 UPDATE_SYNTAX_TABLE_FORWARD (charpos); 205 UPDATE_SYNTAX_TABLE_FORWARD (charpos);
213} 206}
214 207
215INLINE void
216UPDATE_SYNTAX_TABLE_FAST (ptrdiff_t charpos)
217{
218 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
219 UPDATE_SYNTAX_TABLE_FORWARD_FAST (charpos);
220}
221
222/* Set up the buffer-global syntax table. */ 208/* Set up the buffer-global syntax table. */
223 209
224INLINE void 210INLINE void
diff --git a/src/thread.h b/src/thread.h
index 2c8914e1b28..c10e5ecb758 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -137,15 +137,6 @@ struct thread_state
137 struct re_registers m_saved_search_regs; 137 struct re_registers m_saved_search_regs;
138#define saved_search_regs (current_thread->m_saved_search_regs) 138#define saved_search_regs (current_thread->m_saved_search_regs)
139 139
140 /* This is the string or buffer in which we
141 are matching. It is used for looking up syntax properties.
142
143 If the value is a Lisp string object, we are matching text in that
144 string; if it's nil, we are matching text in the current buffer; if
145 it's t, we are matching text in a C string. */
146 Lisp_Object m_re_match_object;
147#define re_match_object (current_thread->m_re_match_object)
148
149 /* This member is different from waiting_for_input. 140 /* This member is different from waiting_for_input.
150 It is used to communicate to a lisp process-filter/sentinel (via the 141 It is used to communicate to a lisp process-filter/sentinel (via the
151 function Fwaiting_for_user_input_p) whether Emacs was waiting 142 function Fwaiting_for_user_input_p) whether Emacs was waiting