diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/regex-emacs.c | 53 |
1 files changed, 45 insertions, 8 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c index 52f240bdaf6..55d0a6e8df8 100644 --- a/src/regex-emacs.c +++ b/src/regex-emacs.c | |||
| @@ -3743,9 +3743,20 @@ mutually_exclusive_charset (struct re_pattern_buffer *bufp, re_char *p1, | |||
| 3743 | } | 3743 | } |
| 3744 | 3744 | ||
| 3745 | /* True if "p1 matches something" implies "p2 fails". */ | 3745 | /* True if "p1 matches something" implies "p2 fails". */ |
| 3746 | /* Avoiding inf-loops: | ||
| 3747 | We're trying to follow all paths reachable from `p2`, but since some | ||
| 3748 | loops can match the empty string, this can loop back to `p2`. | ||
| 3749 | To avoid inf-looping, we keep track of points that have been considered | ||
| 3750 | "already". Instead of keeping a list of such points, `done_beg` and | ||
| 3751 | `done_end` delimit a chunk of bytecode we already considered. | ||
| 3752 | To guarantee termination, a lexical ordering between `done_*` and `p2` | ||
| 3753 | should be obeyed: | ||
| 3754 | At each recursion, either `done_beg` gets smaller, | ||
| 3755 | or `done_beg` is unchanged and `done_end` gets larger | ||
| 3756 | or they're both unchanged and `p2` gets larger. */ | ||
| 3746 | static bool | 3757 | static bool |
| 3747 | mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, | 3758 | mutually_exclusive_aux (struct re_pattern_buffer *bufp, re_char *p1, |
| 3748 | re_char *p2) | 3759 | re_char *p2, re_char *done_beg, re_char *done_end) |
| 3749 | { | 3760 | { |
| 3750 | re_opcode_t op2; | 3761 | re_opcode_t op2; |
| 3751 | unsigned char *pend = bufp->buffer + bufp->used; | 3762 | unsigned char *pend = bufp->buffer + bufp->used; |
| @@ -3754,6 +3765,9 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, | |||
| 3754 | eassert (p1 >= bufp->buffer && p1 < pend | 3765 | eassert (p1 >= bufp->buffer && p1 < pend |
| 3755 | && p2 >= bufp->buffer && p2 <= pend); | 3766 | && p2 >= bufp->buffer && p2 <= pend); |
| 3756 | 3767 | ||
| 3768 | eassert (done_beg <= done_end); | ||
| 3769 | eassert (done_end <= p2); | ||
| 3770 | |||
| 3757 | /* Skip over open/close-group commands. | 3771 | /* Skip over open/close-group commands. |
| 3758 | If what follows this loop is a ...+ construct, | 3772 | If what follows this loop is a ...+ construct, |
| 3759 | look at what begins its body, since we will have to | 3773 | look at what begins its body, since we will have to |
| @@ -3844,12 +3858,29 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, | |||
| 3844 | int mcnt; | 3858 | int mcnt; |
| 3845 | p2++; | 3859 | p2++; |
| 3846 | EXTRACT_NUMBER_AND_INCR (mcnt, p2); | 3860 | EXTRACT_NUMBER_AND_INCR (mcnt, p2); |
| 3847 | /* Don't just test `mcnt > 0` because non-greedy loops have | 3861 | re_char *p2_other = p2 + mcnt; |
| 3848 | their test at the end with an unconditional jump at the start. */ | 3862 | |
| 3849 | if (p2 > p2_orig && mcnt >= 0) /* Ensure forward progress. */ | 3863 | /* When we jump backward we bump `done_end` up to `p3` under |
| 3850 | return (mutually_exclusive_p (bufp, p1, p2) | 3864 | the assumption that any other position between `done_end` |
| 3851 | && mutually_exclusive_p (bufp, p1, p2 + mcnt)); | 3865 | and `p3` is either: |
| 3852 | break; | 3866 | - checked by the other call to RECURSE. |
| 3867 | - not reachable from here (e.g. for positions before the | ||
| 3868 | `on_failure_jump`), or at least not without first | ||
| 3869 | jumping before `done_beg`. | ||
| 3870 | This should hold because our state machines are not arbitrary: | ||
| 3871 | they consists of syntaxically nested loops with limited | ||
| 3872 | control flow. | ||
| 3873 | FIXME: This can fail (i.e. return true when it shouldn't) | ||
| 3874 | if we start generating bytecode with a different shape, | ||
| 3875 | so maybe we should bite the bullet and replace done_beg/end | ||
| 3876 | with an actual list of positions we've already processed. */ | ||
| 3877 | #define RECURSE(p3) \ | ||
| 3878 | ((p3) < done_beg ? mutually_exclusive_aux (bufp, p1, p3, p3, p3) \ | ||
| 3879 | : (p3) <= done_end ? true \ | ||
| 3880 | : mutually_exclusive_aux (bufp, p1, p3, done_beg, \ | ||
| 3881 | (p3) > p2_orig ? done_end : (p3))) | ||
| 3882 | |||
| 3883 | return RECURSE (p2) && RECURSE (p2_other); | ||
| 3853 | } | 3884 | } |
| 3854 | 3885 | ||
| 3855 | default: | 3886 | default: |
| @@ -3860,6 +3891,12 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, | |||
| 3860 | return false; | 3891 | return false; |
| 3861 | } | 3892 | } |
| 3862 | 3893 | ||
| 3894 | static bool | ||
| 3895 | mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, | ||
| 3896 | re_char *p2) | ||
| 3897 | { | ||
| 3898 | return mutually_exclusive_aux (bufp, p1, p2, p2, p2); | ||
| 3899 | } | ||
| 3863 | 3900 | ||
| 3864 | /* Matching routines. */ | 3901 | /* Matching routines. */ |
| 3865 | 3902 | ||