diff options
| author | Stefan Monnier | 2000-03-26 23:05:51 +0000 |
|---|---|---|
| committer | Stefan Monnier | 2000-03-26 23:05:51 +0000 |
| commit | 0683b6fa90db2a43ca13bb2774255e22d40c4758 (patch) | |
| tree | 1dff5bc1e63e301b759ad20fd454bed4b7e2a0f9 /src | |
| parent | 3831af621accfbda7ea055db82046759a572987a (diff) | |
| download | emacs-0683b6fa90db2a43ca13bb2774255e22d40c4758.tar.gz emacs-0683b6fa90db2a43ca13bb2774255e22d40c4758.zip | |
(enum re_opcode_t): New opcode on_failure_jump_nastyloop.
(print_partial_compiled_pattern, re_compile_fastmap): Handle new opcode.
(regex_compile): Use on_failure_jump_nastyloop for non-greedy loops.
(re_match_2_internal): Add code for on_failure_jump_nastyloop when
executing it as well as when popping it off the stack to find infinite
loops in non-greedy repetition operators.
Diffstat (limited to 'src')
| -rw-r--r-- | src/regex.c | 56 |
1 files changed, 50 insertions, 6 deletions
diff --git a/src/regex.c b/src/regex.c index 227a52b46e8..671f6c152f3 100644 --- a/src/regex.c +++ b/src/regex.c | |||
| @@ -20,7 +20,6 @@ | |||
| 20 | USA. */ | 20 | USA. */ |
| 21 | 21 | ||
| 22 | /* TODO: | 22 | /* TODO: |
| 23 | - detect nasty infinite loops like "\\(\\)+?ab" when matching "ac". | ||
| 24 | - use analyze_first to optimize non-empty loops | 23 | - use analyze_first to optimize non-empty loops |
| 25 | - optimize succeed_n and jump_n away when possible | 24 | - optimize succeed_n and jump_n away when possible |
| 26 | - clean up multibyte issues | 25 | - clean up multibyte issues |
| @@ -532,6 +531,12 @@ typedef enum | |||
| 532 | indefinitely). */ | 531 | indefinitely). */ |
| 533 | on_failure_jump_loop, | 532 | on_failure_jump_loop, |
| 534 | 533 | ||
| 534 | /* Just like `on_failure_jump_loop', except that it checks for | ||
| 535 | a different kind of loop (the kind that shows up with non-greedy | ||
| 536 | operators). This operation has to be immediately preceded | ||
| 537 | by a `no_op'. */ | ||
| 538 | on_failure_jump_nastyloop, | ||
| 539 | |||
| 535 | /* A smart `on_failure_jump' used for greedy * and + operators. | 540 | /* A smart `on_failure_jump' used for greedy * and + operators. |
| 536 | It analyses the loop before which it is put and if the | 541 | It analyses the loop before which it is put and if the |
| 537 | loop does not require backtracking, it changes itself to | 542 | loop does not require backtracking, it changes itself to |
| @@ -942,6 +947,11 @@ print_partial_compiled_pattern (start, end) | |||
| 942 | printf ("/on_failure_keep_string_jump to %d", p + mcnt - start); | 947 | printf ("/on_failure_keep_string_jump to %d", p + mcnt - start); |
| 943 | break; | 948 | break; |
| 944 | 949 | ||
| 950 | case on_failure_jump_nastyloop: | ||
| 951 | extract_number_and_incr (&mcnt, &p); | ||
| 952 | printf ("/on_failure_jump_nastyloop to %d", p + mcnt - start); | ||
| 953 | break; | ||
| 954 | |||
| 945 | case on_failure_jump_loop: | 955 | case on_failure_jump_loop: |
| 946 | extract_number_and_incr (&mcnt, &p); | 956 | extract_number_and_incr (&mcnt, &p); |
| 947 | printf ("/on_failure_jump_loop to %d", p + mcnt - start); | 957 | printf ("/on_failure_jump_loop to %d", p + mcnt - start); |
| @@ -2179,19 +2189,19 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2179 | else /* not greedy */ | 2189 | else /* not greedy */ |
| 2180 | { /* I wish the greedy and non-greedy cases could be merged. */ | 2190 | { /* I wish the greedy and non-greedy cases could be merged. */ |
| 2181 | 2191 | ||
| 2192 | GET_BUFFER_SPACE (7); /* We might use less. */ | ||
| 2182 | if (many_times_ok) | 2193 | if (many_times_ok) |
| 2183 | { | 2194 | { |
| 2184 | /* The non-greedy multiple match looks like a repeat..until: | 2195 | /* The non-greedy multiple match looks like a repeat..until: |
| 2185 | we only need a conditional jump at the end of the loop */ | 2196 | we only need a conditional jump at the end of the loop */ |
| 2186 | GET_BUFFER_SPACE (3); | 2197 | BUF_PUSH (no_op); |
| 2187 | STORE_JUMP (on_failure_jump, b, laststart); | 2198 | STORE_JUMP (on_failure_jump_nastyloop, b, laststart); |
| 2188 | b += 3; | 2199 | b += 3; |
| 2189 | if (zero_times_ok) | 2200 | if (zero_times_ok) |
| 2190 | { | 2201 | { |
| 2191 | /* The repeat...until naturally matches one or more. | 2202 | /* The repeat...until naturally matches one or more. |
| 2192 | To also match zero times, we need to first jump to | 2203 | To also match zero times, we need to first jump to |
| 2193 | the end of the loop (its conditional jump). */ | 2204 | the end of the loop (its conditional jump). */ |
| 2194 | GET_BUFFER_SPACE (3); | ||
| 2195 | INSERT_JUMP (jump, laststart, b); | 2205 | INSERT_JUMP (jump, laststart, b); |
| 2196 | b += 3; | 2206 | b += 3; |
| 2197 | } | 2207 | } |
| @@ -2199,7 +2209,6 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2199 | else | 2209 | else |
| 2200 | { | 2210 | { |
| 2201 | /* non-greedy a?? */ | 2211 | /* non-greedy a?? */ |
| 2202 | GET_BUFFER_SPACE (6); | ||
| 2203 | INSERT_JUMP (jump, laststart, b + 3); | 2212 | INSERT_JUMP (jump, laststart, b + 3); |
| 2204 | b += 3; | 2213 | b += 3; |
| 2205 | INSERT_JUMP (on_failure_jump, laststart, laststart + 6); | 2214 | INSERT_JUMP (on_failure_jump, laststart, laststart + 6); |
| @@ -3514,6 +3523,7 @@ re_compile_fastmap (bufp) | |||
| 3514 | case on_failure_jump: | 3523 | case on_failure_jump: |
| 3515 | case on_failure_keep_string_jump: | 3524 | case on_failure_keep_string_jump: |
| 3516 | case on_failure_jump_loop: | 3525 | case on_failure_jump_loop: |
| 3526 | case on_failure_jump_nastyloop: | ||
| 3517 | case on_failure_jump_smart: | 3527 | case on_failure_jump_smart: |
| 3518 | p++; | 3528 | p++; |
| 3519 | break; | 3529 | break; |
| @@ -3526,6 +3536,7 @@ re_compile_fastmap (bufp) | |||
| 3526 | 3536 | ||
| 3527 | case on_failure_jump: | 3537 | case on_failure_jump: |
| 3528 | case on_failure_keep_string_jump: | 3538 | case on_failure_keep_string_jump: |
| 3539 | case on_failure_jump_nastyloop: | ||
| 3529 | case on_failure_jump_loop: | 3540 | case on_failure_jump_loop: |
| 3530 | case on_failure_jump_smart: | 3541 | case on_failure_jump_smart: |
| 3531 | handle_on_failure_jump: | 3542 | handle_on_failure_jump: |
| @@ -3708,7 +3719,7 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) | |||
| 3708 | int anchored_start = 0; | 3719 | int anchored_start = 0; |
| 3709 | 3720 | ||
| 3710 | /* Nonzero if we have to concern multibyte character. */ | 3721 | /* Nonzero if we have to concern multibyte character. */ |
| 3711 | int multibyte = bufp->multibyte; | 3722 | const boolean multibyte = bufp->multibyte; |
| 3712 | 3723 | ||
| 3713 | /* Check for out-of-range STARTPOS. */ | 3724 | /* Check for out-of-range STARTPOS. */ |
| 3714 | if (startpos < 0 || startpos > total_size) | 3725 | if (startpos < 0 || startpos > total_size) |
| @@ -5067,6 +5078,30 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5067 | PUSH_FAILURE_POINT (p - 3, NULL); | 5078 | PUSH_FAILURE_POINT (p - 3, NULL); |
| 5068 | break; | 5079 | break; |
| 5069 | 5080 | ||
| 5081 | /* A nasty loop is introduced by the non-greedy *? and +?. | ||
| 5082 | With such loops, the stack only ever contains one failure point | ||
| 5083 | at a time, so that a plain on_failure_jump_loop kind of | ||
| 5084 | cycle detection cannot work. Worse yet, such a detection | ||
| 5085 | can not only fail to detect a cycle, but it can also wrongly | ||
| 5086 | detect a cycle (between different instantiations of the same | ||
| 5087 | loop. | ||
| 5088 | So the method used for those nasty loops is a little different: | ||
| 5089 | We use a special cycle-detection-stack-frame which is pushed | ||
| 5090 | when the on_failure_jump_nastyloop failure-point is *popped*. | ||
| 5091 | This special frame thus marks the beginning of one iteration | ||
| 5092 | through the loop and we can hence easily check right here | ||
| 5093 | whether something matched between the beginning and the end of | ||
| 5094 | the loop. */ | ||
| 5095 | case on_failure_jump_nastyloop: | ||
| 5096 | EXTRACT_NUMBER_AND_INCR (mcnt, p); | ||
| 5097 | DEBUG_PRINT3 ("EXECUTING on_failure_jump_nastyloop %d (to %p):\n", | ||
| 5098 | mcnt, p + mcnt); | ||
| 5099 | |||
| 5100 | assert ((re_opcode_t)p[-4] == no_op); | ||
| 5101 | CHECK_INFINITE_LOOP (p - 4, d); | ||
| 5102 | PUSH_FAILURE_POINT (p - 3, d); | ||
| 5103 | break; | ||
| 5104 | |||
| 5070 | 5105 | ||
| 5071 | /* Simple loop detecting on_failure_jump: just check on the | 5106 | /* Simple loop detecting on_failure_jump: just check on the |
| 5072 | failure stack if the same spot was already hit earlier. */ | 5107 | failure stack if the same spot was already hit earlier. */ |
| @@ -5428,6 +5463,11 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5428 | assert (str == NULL); | 5463 | assert (str == NULL); |
| 5429 | goto continue_failure_jump; | 5464 | goto continue_failure_jump; |
| 5430 | 5465 | ||
| 5466 | case on_failure_jump_nastyloop: | ||
| 5467 | assert ((re_opcode_t)pat[-2] == no_op); | ||
| 5468 | PUSH_FAILURE_POINT (pat - 2, str); | ||
| 5469 | /* Fallthrough */ | ||
| 5470 | |||
| 5431 | case on_failure_jump_loop: | 5471 | case on_failure_jump_loop: |
| 5432 | case on_failure_jump: | 5472 | case on_failure_jump: |
| 5433 | case succeed_n: | 5473 | case succeed_n: |
| @@ -5437,6 +5477,10 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) | |||
| 5437 | p = pat + mcnt; | 5477 | p = pat + mcnt; |
| 5438 | break; | 5478 | break; |
| 5439 | 5479 | ||
| 5480 | case no_op: | ||
| 5481 | /* A special frame used for nastyloops. */ | ||
| 5482 | goto fail; | ||
| 5483 | |||
| 5440 | default: | 5484 | default: |
| 5441 | abort(); | 5485 | abort(); |
| 5442 | } | 5486 | } |