aboutsummaryrefslogtreecommitdiffstats
path: root/src/regex.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/regex.c')
-rw-r--r--src/regex.c259
1 files changed, 127 insertions, 132 deletions
diff --git a/src/regex.c b/src/regex.c
index 43351b380de..b4f2d8d8049 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -19,9 +19,7 @@
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
20 USA. */ 20 USA. */
21 21
22/* BUGS: 22/* TODO:
23 - (x?)*y\1z should match both xxxxyxz and xxxyz.
24 TODO:
25 - structure the opcode space into opcode+flag. 23 - structure the opcode space into opcode+flag.
26 - merge with glibc's regex.[ch]. 24 - merge with glibc's regex.[ch].
27 - replace (succeed_n + jump_n + set_number_at) with something that doesn't 25 - replace (succeed_n + jump_n + set_number_at) with something that doesn't
@@ -1520,26 +1518,6 @@ do { \
1520 } \ 1518 } \
1521} while (0) 1519} while (0)
1522 1520
1523/* Discard a saved register off the stack. */
1524#define DISCARD_FAILURE_REG_OR_COUNT() \
1525do { \
1526 int reg = POP_FAILURE_INT (); \
1527 if (reg == -1) \
1528 { \
1529 /* It's a counter. */ \
1530 POP_FAILURE_POINTER (); \
1531 reg = POP_FAILURE_INT (); \
1532 DEBUG_PRINT3 (" Discard counter %p = %d\n", ptr, reg); \
1533 } \
1534 else \
1535 { \
1536 POP_FAILURE_POINTER (); \
1537 POP_FAILURE_POINTER (); \
1538 DEBUG_PRINT4 (" Discard reg %d (spanning %p -> %p)\n", \
1539 reg, regstart[reg], regend[reg]); \
1540 } \
1541} while (0)
1542
1543/* Check that we are not stuck in an infinite loop. */ 1521/* Check that we are not stuck in an infinite loop. */
1544#define CHECK_INFINITE_LOOP(pat_cur, string_place) \ 1522#define CHECK_INFINITE_LOOP(pat_cur, string_place) \
1545do { \ 1523do { \
@@ -1553,16 +1531,15 @@ do { \
1553 && FAILURE_PAT (failure) <= bufp->buffer + bufp->used); \ 1531 && FAILURE_PAT (failure) <= bufp->buffer + bufp->used); \
1554 if (FAILURE_PAT (failure) == pat_cur) \ 1532 if (FAILURE_PAT (failure) == pat_cur) \
1555 { \ 1533 { \
1556 while (fail_stack.frame < fail_stack.avail) \ 1534 cycle = 1; \
1557 DISCARD_FAILURE_REG_OR_COUNT (); \ 1535 break; \
1558 goto fail; \
1559 } \ 1536 } \
1560 DEBUG_PRINT2 (" Other pattern: %p\n", FAILURE_PAT (failure)); \ 1537 DEBUG_PRINT2 (" Other pattern: %p\n", FAILURE_PAT (failure)); \
1561 failure = NEXT_FAILURE_HANDLE(failure); \ 1538 failure = NEXT_FAILURE_HANDLE(failure); \
1562 } \ 1539 } \
1563 DEBUG_PRINT2 (" Other string: %p\n", FAILURE_STR (failure)); \ 1540 DEBUG_PRINT2 (" Other string: %p\n", FAILURE_STR (failure)); \
1564} while (0) 1541} while (0)
1565 1542
1566/* Push the information about the state we will need 1543/* Push the information about the state we will need
1567 if we ever fail back to it. 1544 if we ever fail back to it.
1568 1545
@@ -2645,8 +2622,8 @@ regex_compile (pattern, size, syntax, bufp)
2645 unsigned int startoffset = 0; 2622 unsigned int startoffset = 0;
2646 re_opcode_t ofj = 2623 re_opcode_t ofj =
2647 /* Check if the loop can match the empty string. */ 2624 /* Check if the loop can match the empty string. */
2648 (simple || !analyse_first (laststart, b, NULL, 0)) ? 2625 (simple || !analyse_first (laststart, b, NULL, 0))
2649 on_failure_jump : on_failure_jump_loop; 2626 ? on_failure_jump : on_failure_jump_loop;
2650 assert (skip_one_char (laststart) <= b); 2627 assert (skip_one_char (laststart) <= b);
2651 2628
2652 if (!zero_times_ok && simple) 2629 if (!zero_times_ok && simple)
@@ -2694,8 +2671,9 @@ regex_compile (pattern, size, syntax, bufp)
2694 { 2671 {
2695 boolean emptyp = analyse_first (laststart, b, NULL, 0); 2672 boolean emptyp = analyse_first (laststart, b, NULL, 0);
2696 2673
2697 /* The non-greedy multiple match looks like a repeat..until: 2674 /* The non-greedy multiple match looks like
2698 we only need a conditional jump at the end of the loop */ 2675 a repeat..until: we only need a conditional jump
2676 at the end of the loop. */
2699 if (emptyp) BUF_PUSH (no_op); 2677 if (emptyp) BUF_PUSH (no_op);
2700 STORE_JUMP (emptyp ? on_failure_jump_nastyloop 2678 STORE_JUMP (emptyp ? on_failure_jump_nastyloop
2701 : on_failure_jump, b, laststart); 2679 : on_failure_jump, b, laststart);
@@ -2704,7 +2682,7 @@ regex_compile (pattern, size, syntax, bufp)
2704 { 2682 {
2705 /* The repeat...until naturally matches one or more. 2683 /* The repeat...until naturally matches one or more.
2706 To also match zero times, we need to first jump to 2684 To also match zero times, we need to first jump to
2707 the end of the loop (its conditional jump). */ 2685 the end of the loop (its conditional jump). */
2708 INSERT_JUMP (jump, laststart, b); 2686 INSERT_JUMP (jump, laststart, b);
2709 b += 3; 2687 b += 3;
2710 } 2688 }
@@ -3241,99 +3219,99 @@ regex_compile (pattern, size, syntax, bufp)
3241 goto unfetch_interval; 3219 goto unfetch_interval;
3242 } 3220 }
3243 3221
3244 if (upper_bound == 0) 3222 if (upper_bound == 0)
3245 /* If the upper bound is zero, just drop the sub pattern 3223 /* If the upper bound is zero, just drop the sub pattern
3246 altogether. */ 3224 altogether. */
3247 b = laststart; 3225 b = laststart;
3248 else if (lower_bound == 1 && upper_bound == 1) 3226 else if (lower_bound == 1 && upper_bound == 1)
3249 /* Just match it once: nothing to do here. */ 3227 /* Just match it once: nothing to do here. */
3250 ; 3228 ;
3251 3229
3252 /* Otherwise, we have a nontrivial interval. When 3230 /* Otherwise, we have a nontrivial interval. When
3253 we're all done, the pattern will look like: 3231 we're all done, the pattern will look like:
3254 set_number_at <jump count> <upper bound> 3232 set_number_at <jump count> <upper bound>
3255 set_number_at <succeed_n count> <lower bound> 3233 set_number_at <succeed_n count> <lower bound>
3256 succeed_n <after jump addr> <succeed_n count> 3234 succeed_n <after jump addr> <succeed_n count>
3257 <body of loop> 3235 <body of loop>
3258 jump_n <succeed_n addr> <jump count> 3236 jump_n <succeed_n addr> <jump count>
3259 (The upper bound and `jump_n' are omitted if 3237 (The upper bound and `jump_n' are omitted if
3260 `upper_bound' is 1, though.) */ 3238 `upper_bound' is 1, though.) */
3261 else 3239 else
3262 { /* If the upper bound is > 1, we need to insert 3240 { /* If the upper bound is > 1, we need to insert
3263 more at the end of the loop. */ 3241 more at the end of the loop. */
3264 unsigned int nbytes = (upper_bound < 0 ? 3 3242 unsigned int nbytes = (upper_bound < 0 ? 3
3265 : upper_bound > 1 ? 5 : 0); 3243 : upper_bound > 1 ? 5 : 0);
3266 unsigned int startoffset = 0; 3244 unsigned int startoffset = 0;
3267 3245
3268 GET_BUFFER_SPACE (20); /* We might use less. */ 3246 GET_BUFFER_SPACE (20); /* We might use less. */
3269 3247
3270 if (lower_bound == 0) 3248 if (lower_bound == 0)
3271 { 3249 {
3272 /* A succeed_n that starts with 0 is really a 3250 /* A succeed_n that starts with 0 is really a
3273 a simple on_failure_jump_loop. */ 3251 a simple on_failure_jump_loop. */
3274 INSERT_JUMP (on_failure_jump_loop, laststart, 3252 INSERT_JUMP (on_failure_jump_loop, laststart,
3275 b + 3 + nbytes); 3253 b + 3 + nbytes);
3276 b += 3; 3254 b += 3;
3277 } 3255 }
3278 else 3256 else
3279 { 3257 {
3280 /* Initialize lower bound of the `succeed_n', even 3258 /* Initialize lower bound of the `succeed_n', even
3281 though it will be set during matching by its 3259 though it will be set during matching by its
3282 attendant `set_number_at' (inserted next), 3260 attendant `set_number_at' (inserted next),
3283 because `re_compile_fastmap' needs to know. 3261 because `re_compile_fastmap' needs to know.
3284 Jump to the `jump_n' we might insert below. */ 3262 Jump to the `jump_n' we might insert below. */
3285 INSERT_JUMP2 (succeed_n, laststart, 3263 INSERT_JUMP2 (succeed_n, laststart,
3286 b + 5 + nbytes, 3264 b + 5 + nbytes,
3287 lower_bound); 3265 lower_bound);
3288 b += 5; 3266 b += 5;
3289 3267
3290 /* Code to initialize the lower bound. Insert 3268 /* Code to initialize the lower bound. Insert
3291 before the `succeed_n'. The `5' is the last two 3269 before the `succeed_n'. The `5' is the last two
3292 bytes of this `set_number_at', plus 3 bytes of 3270 bytes of this `set_number_at', plus 3 bytes of
3293 the following `succeed_n'. */ 3271 the following `succeed_n'. */
3294 insert_op2 (set_number_at, laststart, 5, lower_bound, b); 3272 insert_op2 (set_number_at, laststart, 5, lower_bound, b);
3295 b += 5; 3273 b += 5;
3296 startoffset += 5; 3274 startoffset += 5;
3297 } 3275 }
3298 3276
3299 if (upper_bound < 0) 3277 if (upper_bound < 0)
3300 { 3278 {
3301 /* A negative upper bound stands for infinity, 3279 /* A negative upper bound stands for infinity,
3302 in which case it degenerates to a plain jump. */ 3280 in which case it degenerates to a plain jump. */
3303 STORE_JUMP (jump, b, laststart + startoffset); 3281 STORE_JUMP (jump, b, laststart + startoffset);
3304 b += 3; 3282 b += 3;
3305 } 3283 }
3306 else if (upper_bound > 1) 3284 else if (upper_bound > 1)
3307 { /* More than one repetition is allowed, so 3285 { /* More than one repetition is allowed, so
3308 append a backward jump to the `succeed_n' 3286 append a backward jump to the `succeed_n'
3309 that starts this interval. 3287 that starts this interval.
3310 3288
3311 When we've reached this during matching, 3289 When we've reached this during matching,
3312 we'll have matched the interval once, so 3290 we'll have matched the interval once, so
3313 jump back only `upper_bound - 1' times. */ 3291 jump back only `upper_bound - 1' times. */
3314 STORE_JUMP2 (jump_n, b, laststart + startoffset, 3292 STORE_JUMP2 (jump_n, b, laststart + startoffset,
3315 upper_bound - 1); 3293 upper_bound - 1);
3316 b += 5; 3294 b += 5;
3317 3295
3318 /* The location we want to set is the second 3296 /* The location we want to set is the second
3319 parameter of the `jump_n'; that is `b-2' as 3297 parameter of the `jump_n'; that is `b-2' as
3320 an absolute address. `laststart' will be 3298 an absolute address. `laststart' will be
3321 the `set_number_at' we're about to insert; 3299 the `set_number_at' we're about to insert;
3322 `laststart+3' the number to set, the source 3300 `laststart+3' the number to set, the source
3323 for the relative address. But we are 3301 for the relative address. But we are
3324 inserting into the middle of the pattern -- 3302 inserting into the middle of the pattern --
3325 so everything is getting moved up by 5. 3303 so everything is getting moved up by 5.
3326 Conclusion: (b - 2) - (laststart + 3) + 5, 3304 Conclusion: (b - 2) - (laststart + 3) + 5,
3327 i.e., b - laststart. 3305 i.e., b - laststart.
3328 3306
3329 We insert this at the beginning of the loop 3307 We insert this at the beginning of the loop
3330 so that if we fail during matching, we'll 3308 so that if we fail during matching, we'll
3331 reinitialize the bounds. */ 3309 reinitialize the bounds. */
3332 insert_op2 (set_number_at, laststart, b - laststart, 3310 insert_op2 (set_number_at, laststart, b - laststart,
3333 upper_bound - 1, b); 3311 upper_bound - 1, b);
3334 b += 5; 3312 b += 5;
3335 } 3313 }
3336 } 3314 }
3337 pending_exact = 0; 3315 pending_exact = 0;
3338 beg_interval = NULL; 3316 beg_interval = NULL;
3339 } 3317 }
@@ -5518,7 +5496,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5518 cycle detection cannot work. Worse yet, such a detection 5496 cycle detection cannot work. Worse yet, such a detection
5519 can not only fail to detect a cycle, but it can also wrongly 5497 can not only fail to detect a cycle, but it can also wrongly
5520 detect a cycle (between different instantiations of the same 5498 detect a cycle (between different instantiations of the same
5521 loop. 5499 loop).
5522 So the method used for those nasty loops is a little different: 5500 So the method used for those nasty loops is a little different:
5523 We use a special cycle-detection-stack-frame which is pushed 5501 We use a special cycle-detection-stack-frame which is pushed
5524 when the on_failure_jump_nastyloop failure-point is *popped*. 5502 when the on_failure_jump_nastyloop failure-point is *popped*.
@@ -5532,11 +5510,18 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5532 mcnt, p + mcnt); 5510 mcnt, p + mcnt);
5533 5511
5534 assert ((re_opcode_t)p[-4] == no_op); 5512 assert ((re_opcode_t)p[-4] == no_op);
5535 CHECK_INFINITE_LOOP (p - 4, d); 5513 {
5536 PUSH_FAILURE_POINT (p - 3, d); 5514 int cycle = 0;
5515 CHECK_INFINITE_LOOP (p - 4, d);
5516 if (!cycle)
5517 /* If there's a cycle, just continue without pushing
5518 this failure point. The failure point is the "try again"
5519 option, which shouldn't be tried.
5520 We want (x?)*?y\1z to match both xxyz and xxyxz. */
5521 PUSH_FAILURE_POINT (p - 3, d);
5522 }
5537 break; 5523 break;
5538 5524
5539
5540 /* Simple loop detecting on_failure_jump: just check on the 5525 /* Simple loop detecting on_failure_jump: just check on the
5541 failure stack if the same spot was already hit earlier. */ 5526 failure stack if the same spot was already hit earlier. */
5542 case on_failure_jump_loop: 5527 case on_failure_jump_loop:
@@ -5544,9 +5529,19 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5544 EXTRACT_NUMBER_AND_INCR (mcnt, p); 5529 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5545 DEBUG_PRINT3 ("EXECUTING on_failure_jump_loop %d (to %p):\n", 5530 DEBUG_PRINT3 ("EXECUTING on_failure_jump_loop %d (to %p):\n",
5546 mcnt, p + mcnt); 5531 mcnt, p + mcnt);
5547 5532 {
5548 CHECK_INFINITE_LOOP (p - 3, d); 5533 int cycle = 0;
5549 PUSH_FAILURE_POINT (p - 3, d); 5534 CHECK_INFINITE_LOOP (p - 3, d);
5535 if (cycle)
5536 /* If there's a cycle, get out of the loop, as if the matching
5537 had failed. We used to just `goto fail' here, but that was
5538 aborting the search a bit too early: we want to keep the
5539 empty-loop-match and keep matching after the loop.
5540 We want (x?)*y\1z to match both xxyz and xxyxz. */
5541 p += mcnt;
5542 else
5543 PUSH_FAILURE_POINT (p - 3, d);
5544 }
5550 break; 5545 break;
5551 5546
5552 5547