diff options
| author | Stefan Monnier | 2000-03-27 22:26:54 +0000 |
|---|---|---|
| committer | Stefan Monnier | 2000-03-27 22:26:54 +0000 |
| commit | ed0767d80f1d3ae00b4a89576b009e44398cc1ba (patch) | |
| tree | 3628c75f0a930d59d45d3ba1b0dc9c38b1c4e7a5 /src | |
| parent | e11e7e46bcd0f5310d5a8ed56908993d8662866f (diff) | |
| download | emacs-ed0767d80f1d3ae00b4a89576b009e44398cc1ba.tar.gz emacs-ed0767d80f1d3ae00b4a89576b009e44398cc1ba.zip | |
(REGEX_FREE_STACK, RESET_FAIL_STACK): Make them usable as an expression.
(enum re_opcode_t): Update description of succeed_n.
(PATFETCH): Always define.
(regex_compile): Use lookahead rather than PATUNFETCH (for repetition
operators, char classes, shy-groups and intervals).
Optimize special cases of intervals so as to only use succeed_n and
jump_n when really needed.
(re_compile_fastmap): Simplify handling of jump_n and succeed_n now
that we don't have to handle the special cases any more.
Simplify on_failure_jump handling as well.
Diffstat (limited to 'src')
| -rw-r--r-- | src/ChangeLog | 14 | ||||
| -rw-r--r-- | src/regex.c | 227 |
2 files changed, 118 insertions, 123 deletions
diff --git a/src/ChangeLog b/src/ChangeLog index 3a01bac0160..d3ea1be941d 100644 --- a/src/ChangeLog +++ b/src/ChangeLog | |||
| @@ -1,3 +1,17 @@ | |||
| 1 | 2000-03-27 Stefan Monnier <monnier@cs.yale.edu> | ||
| 2 | |||
| 3 | * regex.c (REGEX_FREE_STACK, RESET_FAIL_STACK): Make them usable as | ||
| 4 | an expression. | ||
| 5 | (enum re_opcode_t): Update description of succeed_n. | ||
| 6 | (PATFETCH): Always define. | ||
| 7 | (regex_compile): Use lookahead rather than PATUNFETCH (for repetition | ||
| 8 | operators, char classes, shy-groups and intervals). | ||
| 9 | Optimize special cases of intervals so as to only use succeed_n and | ||
| 10 | jump_n when really needed. | ||
| 11 | (re_compile_fastmap): Simplify handling of jump_n and succeed_n now | ||
| 12 | that we don't have to handle the special cases any more. | ||
| 13 | Simplify on_failure_jump handling as well. | ||
| 14 | |||
| 1 | 2000-03-28 Jason Rumney <jasonr@gnu.org> | 15 | 2000-03-28 Jason Rumney <jasonr@gnu.org> |
| 2 | 16 | ||
| 3 | * lread.c (Fload): Move safe_p definition to above #ifdef DOS_NT. | 17 | * lread.c (Fload): Move safe_p definition to above #ifdef DOS_NT. |
diff --git a/src/regex.c b/src/regex.c index 671f6c152f3..f7ca3a7b8b4 100644 --- a/src/regex.c +++ b/src/regex.c | |||
| @@ -21,7 +21,6 @@ | |||
| 21 | 21 | ||
| 22 | /* TODO: | 22 | /* TODO: |
| 23 | - use analyze_first to optimize non-empty loops | 23 | - use analyze_first to optimize non-empty loops |
| 24 | - optimize succeed_n and jump_n away when possible | ||
| 25 | - clean up multibyte issues | 24 | - clean up multibyte issues |
| 26 | - structure the opcode space into opcode+flag. | 25 | - structure the opcode space into opcode+flag. |
| 27 | - merge with glic's regex.[ch] | 26 | - merge with glic's regex.[ch] |
| @@ -411,7 +410,7 @@ char *alloca (); | |||
| 411 | #define REGEX_REALLOCATE_STACK(source, osize, nsize) \ | 410 | #define REGEX_REALLOCATE_STACK(source, osize, nsize) \ |
| 412 | REGEX_REALLOCATE (source, osize, nsize) | 411 | REGEX_REALLOCATE (source, osize, nsize) |
| 413 | /* No need to explicitly free anything. */ | 412 | /* No need to explicitly free anything. */ |
| 414 | #define REGEX_FREE_STACK(arg) | 413 | #define REGEX_FREE_STACK(arg) ((void)0) |
| 415 | 414 | ||
| 416 | #endif /* not REGEX_MALLOC */ | 415 | #endif /* not REGEX_MALLOC */ |
| 417 | #endif /* not using relocating allocator */ | 416 | #endif /* not using relocating allocator */ |
| @@ -546,7 +545,9 @@ typedef enum | |||
| 546 | on_failure_jump_smart, | 545 | on_failure_jump_smart, |
| 547 | 546 | ||
| 548 | /* Followed by two-byte relative address and two-byte number n. | 547 | /* Followed by two-byte relative address and two-byte number n. |
| 549 | After matching N times, jump to the address upon failure. */ | 548 | After matching N times, jump to the address upon failure. |
| 549 | Does not work if N starts at 0: use on_failure_jump_loop | ||
| 550 | instead. */ | ||
| 550 | succeed_n, | 551 | succeed_n, |
| 551 | 552 | ||
| 552 | /* Followed by two-byte relative address, and two-byte number n. | 553 | /* Followed by two-byte relative address, and two-byte number n. |
| @@ -1289,7 +1290,7 @@ typedef struct | |||
| 1289 | fail_stack.frame = 0; \ | 1290 | fail_stack.frame = 0; \ |
| 1290 | } while (0) | 1291 | } while (0) |
| 1291 | 1292 | ||
| 1292 | #define RESET_FAIL_STACK() | 1293 | #define RESET_FAIL_STACK() ((void)0) |
| 1293 | #endif | 1294 | #endif |
| 1294 | 1295 | ||
| 1295 | 1296 | ||
| @@ -1546,13 +1547,11 @@ static unsigned char *skip_one_char _RE_ARGS((unsigned char *p)); | |||
| 1546 | if necessary. Also cast from a signed character in the constant | 1547 | if necessary. Also cast from a signed character in the constant |
| 1547 | string passed to us by the user to an unsigned char that we can use | 1548 | string passed to us by the user to an unsigned char that we can use |
| 1548 | as an array index (in, e.g., `translate'). */ | 1549 | as an array index (in, e.g., `translate'). */ |
| 1549 | #ifndef PATFETCH | ||
| 1550 | #define PATFETCH(c) \ | 1550 | #define PATFETCH(c) \ |
| 1551 | do { \ | 1551 | do { \ |
| 1552 | PATFETCH_RAW (c); \ | 1552 | PATFETCH_RAW (c); \ |
| 1553 | if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c); \ | 1553 | if (RE_TRANSLATE_P (translate)) c = RE_TRANSLATE (translate, c); \ |
| 1554 | } while (0) | 1554 | } while (0) |
| 1555 | #endif | ||
| 1556 | 1555 | ||
| 1557 | /* Fetch the next character in the uncompiled pattern, with no | 1556 | /* Fetch the next character in the uncompiled pattern, with no |
| 1558 | translation. */ | 1557 | translation. */ |
| @@ -2103,35 +2102,24 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2103 | 2102 | ||
| 2104 | if (p == pend) | 2103 | if (p == pend) |
| 2105 | break; | 2104 | break; |
| 2106 | 2105 | else if (*p == '*' | |
| 2107 | PATFETCH (c); | 2106 | || (!(syntax & RE_BK_PLUS_QM) |
| 2108 | 2107 | && (*p == '+' || *p == '?'))) | |
| 2109 | if (c == '*' | ||
| 2110 | || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?'))) | ||
| 2111 | ; | 2108 | ; |
| 2112 | 2109 | else if (syntax & RE_BK_PLUS_QM && *p == '\\') | |
| 2113 | else if (syntax & RE_BK_PLUS_QM && c == '\\') | ||
| 2114 | { | 2110 | { |
| 2115 | if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); | 2111 | if (p+1 == pend) |
| 2116 | 2112 | FREE_STACK_RETURN (REG_EESCAPE); | |
| 2117 | PATFETCH (c1); | 2113 | if (p[1] == '+' || p[1] == '?') |
| 2118 | if (!(c1 == '+' || c1 == '?')) | 2114 | PATFETCH (c); /* Gobble up the backslash. */ |
| 2119 | { | 2115 | else |
| 2120 | PATUNFETCH; | 2116 | break; |
| 2121 | PATUNFETCH; | ||
| 2122 | break; | ||
| 2123 | } | ||
| 2124 | |||
| 2125 | c = c1; | ||
| 2126 | } | 2117 | } |
| 2127 | else | 2118 | else |
| 2128 | { | 2119 | break; |
| 2129 | PATUNFETCH; | ||
| 2130 | break; | ||
| 2131 | } | ||
| 2132 | |||
| 2133 | /* If we get here, we found another repeat character. */ | 2120 | /* If we get here, we found another repeat character. */ |
| 2134 | } | 2121 | PATFETCH (c); |
| 2122 | } | ||
| 2135 | 2123 | ||
| 2136 | /* Star, etc. applied to an empty pattern is equivalent | 2124 | /* Star, etc. applied to an empty pattern is equivalent |
| 2137 | to an empty pattern. */ | 2125 | to an empty pattern. */ |
| @@ -2306,9 +2294,11 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2306 | { | 2294 | { |
| 2307 | /* Leave room for the null. */ | 2295 | /* Leave room for the null. */ |
| 2308 | char str[CHAR_CLASS_MAX_LENGTH + 1]; | 2296 | char str[CHAR_CLASS_MAX_LENGTH + 1]; |
| 2297 | const unsigned char *class_beg; | ||
| 2309 | 2298 | ||
| 2310 | PATFETCH (c); | 2299 | PATFETCH (c); |
| 2311 | c1 = 0; | 2300 | c1 = 0; |
| 2301 | class_beg = p; | ||
| 2312 | 2302 | ||
| 2313 | /* If pattern is `[[:'. */ | 2303 | /* If pattern is `[[:'. */ |
| 2314 | if (p == pend) FREE_STACK_RETURN (REG_EBRACK); | 2304 | if (p == pend) FREE_STACK_RETURN (REG_EBRACK); |
| @@ -2421,9 +2411,8 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2421 | } | 2411 | } |
| 2422 | else | 2412 | else |
| 2423 | { | 2413 | { |
| 2424 | c1++; | 2414 | /* Go back to right after the "[:". */ |
| 2425 | while (c1--) | 2415 | p = class_beg; |
| 2426 | PATUNFETCH; | ||
| 2427 | SET_LIST_BIT ('['); | 2416 | SET_LIST_BIT ('['); |
| 2428 | 2417 | ||
| 2429 | /* Because the `:' may starts the range, we | 2418 | /* Because the `:' may starts the range, we |
| @@ -2584,9 +2573,9 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2584 | if (p+1 < pend) | 2573 | if (p+1 < pend) |
| 2585 | { | 2574 | { |
| 2586 | /* Look for a special (?...) construct */ | 2575 | /* Look for a special (?...) construct */ |
| 2587 | PATFETCH (c); | 2576 | if ((syntax & RE_SHY_GROUPS) && *p == '?') |
| 2588 | if ((syntax & RE_SHY_GROUPS) && c == '?') | ||
| 2589 | { | 2577 | { |
| 2578 | PATFETCH (c); /* Gobble up the '?'. */ | ||
| 2590 | PATFETCH (c); | 2579 | PATFETCH (c); |
| 2591 | switch (c) | 2580 | switch (c) |
| 2592 | { | 2581 | { |
| @@ -2596,7 +2585,6 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2596 | FREE_STACK_RETURN (REG_BADPAT); | 2585 | FREE_STACK_RETURN (REG_BADPAT); |
| 2597 | } | 2586 | } |
| 2598 | } | 2587 | } |
| 2599 | else PATUNFETCH; | ||
| 2600 | } | 2588 | } |
| 2601 | 2589 | ||
| 2602 | if (!shy) | 2590 | if (!shy) |
| @@ -2744,8 +2732,9 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2744 | if (!(syntax & RE_INTERVALS) | 2732 | if (!(syntax & RE_INTERVALS) |
| 2745 | /* If we're at `\{' and it's not the open-interval | 2733 | /* If we're at `\{' and it's not the open-interval |
| 2746 | operator. */ | 2734 | operator. */ |
| 2747 | || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) | 2735 | || (syntax & RE_NO_BK_BRACES) |
| 2748 | || (p - 2 == pattern && p == pend)) | 2736 | /* What is that? -sm */ |
| 2737 | /* || (p - 2 == pattern && p == pend) */) | ||
| 2749 | goto normal_backslash; | 2738 | goto normal_backslash; |
| 2750 | 2739 | ||
| 2751 | handle_interval: | 2740 | handle_interval: |
| @@ -2755,7 +2744,7 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2755 | /* At least (most) this many matches must be made. */ | 2744 | /* At least (most) this many matches must be made. */ |
| 2756 | int lower_bound = 0, upper_bound = -1; | 2745 | int lower_bound = 0, upper_bound = -1; |
| 2757 | 2746 | ||
| 2758 | beg_interval = p - 1; | 2747 | beg_interval = p; |
| 2759 | 2748 | ||
| 2760 | if (p == pend) | 2749 | if (p == pend) |
| 2761 | { | 2750 | { |
| @@ -2768,16 +2757,13 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2768 | GET_UNSIGNED_NUMBER (lower_bound); | 2757 | GET_UNSIGNED_NUMBER (lower_bound); |
| 2769 | 2758 | ||
| 2770 | if (c == ',') | 2759 | if (c == ',') |
| 2771 | { | 2760 | GET_UNSIGNED_NUMBER (upper_bound); |
| 2772 | GET_UNSIGNED_NUMBER (upper_bound); | ||
| 2773 | if (upper_bound < 0) upper_bound = RE_DUP_MAX; | ||
| 2774 | } | ||
| 2775 | else | 2761 | else |
| 2776 | /* Interval such as `{1}' => match exactly once. */ | 2762 | /* Interval such as `{1}' => match exactly once. */ |
| 2777 | upper_bound = lower_bound; | 2763 | upper_bound = lower_bound; |
| 2778 | 2764 | ||
| 2779 | if (lower_bound < 0 || upper_bound > RE_DUP_MAX | 2765 | if (lower_bound < 0 || upper_bound > RE_DUP_MAX |
| 2780 | || lower_bound > upper_bound) | 2766 | || (upper_bound >= 0 && lower_bound > upper_bound)) |
| 2781 | { | 2767 | { |
| 2782 | if (syntax & RE_NO_BK_BRACES) | 2768 | if (syntax & RE_NO_BK_BRACES) |
| 2783 | goto unfetch_interval; | 2769 | goto unfetch_interval; |
| @@ -2813,15 +2799,13 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2813 | goto unfetch_interval; | 2799 | goto unfetch_interval; |
| 2814 | } | 2800 | } |
| 2815 | 2801 | ||
| 2816 | /* If the upper bound is zero, don't want to succeed at | ||
| 2817 | all; jump from `laststart' to `b + 3', which will be | ||
| 2818 | the end of the buffer after we insert the jump. */ | ||
| 2819 | if (upper_bound == 0) | 2802 | if (upper_bound == 0) |
| 2820 | { | 2803 | /* If the upper bound is zero, just drop the sub pattern |
| 2821 | GET_BUFFER_SPACE (3); | 2804 | altogether. */ |
| 2822 | INSERT_JUMP (jump, laststart, b + 3); | 2805 | b = laststart; |
| 2823 | b += 3; | 2806 | else if (lower_bound == 1 && upper_bound == 1) |
| 2824 | } | 2807 | /* Just match it once: nothing to do here. */ |
| 2808 | ; | ||
| 2825 | 2809 | ||
| 2826 | /* Otherwise, we have a nontrivial interval. When | 2810 | /* Otherwise, we have a nontrivial interval. When |
| 2827 | we're all done, the pattern will look like: | 2811 | we're all done, the pattern will look like: |
| @@ -2835,28 +2819,49 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2835 | else | 2819 | else |
| 2836 | { /* If the upper bound is > 1, we need to insert | 2820 | { /* If the upper bound is > 1, we need to insert |
| 2837 | more at the end of the loop. */ | 2821 | more at the end of the loop. */ |
| 2838 | unsigned nbytes = 10 + (upper_bound > 1) * 10; | 2822 | unsigned int nbytes = (upper_bound < 0 ? 3 |
| 2839 | 2823 | : upper_bound > 1 ? 5 : 0); | |
| 2840 | GET_BUFFER_SPACE (nbytes); | 2824 | unsigned int startoffset = 0; |
| 2841 | 2825 | ||
| 2842 | /* Initialize lower bound of the `succeed_n', even | 2826 | GET_BUFFER_SPACE (20); /* We might use less. */ |
| 2843 | though it will be set during matching by its | 2827 | |
| 2844 | attendant `set_number_at' (inserted next), | 2828 | if (lower_bound == 0) |
| 2845 | because `re_compile_fastmap' needs to know. | 2829 | { |
| 2846 | Jump to the `jump_n' we might insert below. */ | 2830 | /* A succeed_n that starts with 0 is really a |
| 2847 | INSERT_JUMP2 (succeed_n, laststart, | 2831 | a simple on_failure_jump_loop. */ |
| 2848 | b + 5 + (upper_bound > 1) * 5, | 2832 | INSERT_JUMP (on_failure_jump_loop, laststart, |
| 2849 | lower_bound); | 2833 | b + 3 + nbytes); |
| 2850 | b += 5; | 2834 | b += 3; |
| 2851 | 2835 | } | |
| 2852 | /* Code to initialize the lower bound. Insert | 2836 | else |
| 2853 | before the `succeed_n'. The `5' is the last two | 2837 | { |
| 2854 | bytes of this `set_number_at', plus 3 bytes of | 2838 | /* Initialize lower bound of the `succeed_n', even |
| 2855 | the following `succeed_n'. */ | 2839 | though it will be set during matching by its |
| 2856 | insert_op2 (set_number_at, laststart, 5, lower_bound, b); | 2840 | attendant `set_number_at' (inserted next), |
| 2857 | b += 5; | 2841 | because `re_compile_fastmap' needs to know. |
| 2858 | 2842 | Jump to the `jump_n' we might insert below. */ | |
| 2859 | if (upper_bound > 1) | 2843 | INSERT_JUMP2 (succeed_n, laststart, |
| 2844 | b + 5 + nbytes, | ||
| 2845 | lower_bound); | ||
| 2846 | b += 5; | ||
| 2847 | |||
| 2848 | /* Code to initialize the lower bound. Insert | ||
| 2849 | before the `succeed_n'. The `5' is the last two | ||
| 2850 | bytes of this `set_number_at', plus 3 bytes of | ||
| 2851 | the following `succeed_n'. */ | ||
| 2852 | insert_op2 (set_number_at, laststart, 5, lower_bound, b); | ||
| 2853 | b += 5; | ||
| 2854 | startoffset += 5; | ||
| 2855 | } | ||
| 2856 | |||
| 2857 | if (upper_bound < 0) | ||
| 2858 | { | ||
| 2859 | /* A negative upper bound stands for infinity, | ||
| 2860 | in which case it degenerates to a plain jump. */ | ||
| 2861 | STORE_JUMP (jump, b, laststart + startoffset); | ||
| 2862 | b += 3; | ||
| 2863 | } | ||
| 2864 | else if (upper_bound > 1) | ||
| 2860 | { /* More than one repetition is allowed, so | 2865 | { /* More than one repetition is allowed, so |
| 2861 | append a backward jump to the `succeed_n' | 2866 | append a backward jump to the `succeed_n' |
| 2862 | that starts this interval. | 2867 | that starts this interval. |
| @@ -2864,7 +2869,7 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2864 | When we've reached this during matching, | 2869 | When we've reached this during matching, |
| 2865 | we'll have matched the interval once, so | 2870 | we'll have matched the interval once, so |
| 2866 | jump back only `upper_bound - 1' times. */ | 2871 | jump back only `upper_bound - 1' times. */ |
| 2867 | STORE_JUMP2 (jump_n, b, laststart + 5, | 2872 | STORE_JUMP2 (jump_n, b, laststart + startoffset, |
| 2868 | upper_bound - 1); | 2873 | upper_bound - 1); |
| 2869 | b += 5; | 2874 | b += 5; |
| 2870 | 2875 | ||
| @@ -2899,14 +2904,15 @@ regex_compile (pattern, size, syntax, bufp) | |||
| 2899 | beg_interval = NULL; | 2904 | beg_interval = NULL; |
| 2900 | 2905 | ||
| 2901 | /* normal_char and normal_backslash need `c'. */ | 2906 | /* normal_char and normal_backslash need `c'. */ |
| 2902 | PATFETCH (c); | 2907 | c = '{'; |
| 2903 | 2908 | ||
| 2904 | if (!(syntax & RE_NO_BK_BRACES)) | 2909 | if (!(syntax & RE_NO_BK_BRACES)) |
| 2905 | { | 2910 | { |
| 2906 | if (p > pattern && p[-1] == '\\') | 2911 | assert (p > pattern && p[-1] == '\\'); |
| 2907 | goto normal_backslash; | 2912 | goto normal_backslash; |
| 2908 | } | 2913 | } |
| 2909 | goto normal_char; | 2914 | else |
| 2915 | goto normal_char; | ||
| 2910 | 2916 | ||
| 2911 | #ifdef emacs | 2917 | #ifdef emacs |
| 2912 | /* There is no way to specify the before_dot and after_dot | 2918 | /* There is no way to specify the before_dot and after_dot |
| @@ -3311,9 +3317,6 @@ re_compile_fastmap (bufp) | |||
| 3311 | match the empty string. */ | 3317 | match the empty string. */ |
| 3312 | boolean path_can_be_null = true; | 3318 | boolean path_can_be_null = true; |
| 3313 | 3319 | ||
| 3314 | /* We aren't doing a `succeed_n' to begin with. */ | ||
| 3315 | boolean succeed_n_p = false; | ||
| 3316 | |||
| 3317 | /* If all elements for base leading-codes in fastmap is set, this | 3320 | /* If all elements for base leading-codes in fastmap is set, this |
| 3318 | flag is set true. */ | 3321 | flag is set true. */ |
| 3319 | boolean match_any_multibyte_characters = false; | 3322 | boolean match_any_multibyte_characters = false; |
| @@ -3353,7 +3356,7 @@ re_compile_fastmap (bufp) | |||
| 3353 | as used for the *? operator. */ | 3356 | as used for the *? operator. */ |
| 3354 | unsigned char *p1 = p; | 3357 | unsigned char *p1 = p; |
| 3355 | 3358 | ||
| 3356 | if (p == pend || *p == succeed) | 3359 | if (p >= pend || *p == succeed) |
| 3357 | { | 3360 | { |
| 3358 | /* We have reached the (effective) end of pattern. */ | 3361 | /* We have reached the (effective) end of pattern. */ |
| 3359 | if (!PATTERN_STACK_EMPTY ()) | 3362 | if (!PATTERN_STACK_EMPTY ()) |
| @@ -3510,7 +3513,6 @@ re_compile_fastmap (bufp) | |||
| 3510 | continue; | 3513 | continue; |
| 3511 | 3514 | ||
| 3512 | 3515 | ||
| 3513 | case jump_n: | ||
| 3514 | case jump: | 3516 | case jump: |
| 3515 | EXTRACT_NUMBER_AND_INCR (j, p); | 3517 | EXTRACT_NUMBER_AND_INCR (j, p); |
| 3516 | if (j < 0) | 3518 | if (j < 0) |
| @@ -3539,51 +3541,30 @@ re_compile_fastmap (bufp) | |||
| 3539 | case on_failure_jump_nastyloop: | 3541 | case on_failure_jump_nastyloop: |
| 3540 | case on_failure_jump_loop: | 3542 | case on_failure_jump_loop: |
| 3541 | case on_failure_jump_smart: | 3543 | case on_failure_jump_smart: |
| 3542 | handle_on_failure_jump: | ||
| 3543 | EXTRACT_NUMBER_AND_INCR (j, p); | 3544 | EXTRACT_NUMBER_AND_INCR (j, p); |
| 3544 | |||
| 3545 | /* For some patterns, e.g., `(a?)?', `p+j' here points to the | ||
| 3546 | end of the pattern. We don't want to push such a point, | ||
| 3547 | since when we restore it above, entering the switch will | ||
| 3548 | increment `p' past the end of the pattern. We don't need | ||
| 3549 | to push such a point since we obviously won't find any more | ||
| 3550 | fastmap entries beyond `pend'. Such a pattern can match | ||
| 3551 | the null string, though. */ | ||
| 3552 | if (p + j <= p1) | 3545 | if (p + j <= p1) |
| 3553 | /* Backward jump to be ignored. */ | 3546 | ; /* Backward jump to be ignored. */ |
| 3554 | ; | 3547 | else if (!PUSH_PATTERN_OP (p + j, fail_stack)) |
| 3555 | else if (p + j < pend) | 3548 | return (RESET_FAIL_STACK (), -2); |
| 3556 | { | ||
| 3557 | if (!PUSH_PATTERN_OP (p + j, fail_stack)) | ||
| 3558 | { | ||
| 3559 | RESET_FAIL_STACK (); | ||
| 3560 | return -2; | ||
| 3561 | } | ||
| 3562 | } | ||
| 3563 | else | ||
| 3564 | bufp->can_be_null = 1; | ||
| 3565 | |||
| 3566 | if (succeed_n_p) | ||
| 3567 | { | ||
| 3568 | EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ | ||
| 3569 | succeed_n_p = false; | ||
| 3570 | } | ||
| 3571 | |||
| 3572 | continue; | 3549 | continue; |
| 3573 | 3550 | ||
| 3574 | 3551 | ||
| 3552 | case jump_n: | ||
| 3553 | /* This code simply does not properly handle forward jump_n. */ | ||
| 3554 | DEBUG_STATEMENT (EXTRACT_NUMBER (j, p); assert (j < 0)); | ||
| 3555 | p += 4; | ||
| 3556 | /* jump_n can either jump or fall through. The (backward) jump | ||
| 3557 | case has already been handled, so we only need to look at the | ||
| 3558 | fallthrough case. */ | ||
| 3559 | continue; | ||
| 3560 | |||
| 3575 | case succeed_n: | 3561 | case succeed_n: |
| 3576 | /* Get to the number of times to succeed. */ | 3562 | /* If N == 0, it should be an on_failure_jump_loop instead. */ |
| 3577 | p += 2; | 3563 | DEBUG_STATEMENT (EXTRACT_NUMBER (j, p + 2); assert (j > 0)); |
| 3578 | 3564 | p += 4; | |
| 3579 | /* Increment p past the n for when k != 0. */ | 3565 | /* We only care about one iteration of the loop, so we don't |
| 3580 | EXTRACT_NUMBER_AND_INCR (k, p); | 3566 | need to consider the case where this behaves like an |
| 3581 | if (k == 0) | 3567 | on_failure_jump. */ |
| 3582 | { | ||
| 3583 | p -= 4; | ||
| 3584 | succeed_n_p = true; /* Spaghetti code alert. */ | ||
| 3585 | goto handle_on_failure_jump; | ||
| 3586 | } | ||
| 3587 | continue; | 3568 | continue; |
| 3588 | 3569 | ||
| 3589 | 3570 | ||