aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/regex-emacs.c53
1 files changed, 45 insertions, 8 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c
index 52f240bdaf6..55d0a6e8df8 100644
--- a/src/regex-emacs.c
+++ b/src/regex-emacs.c
@@ -3743,9 +3743,20 @@ mutually_exclusive_charset (struct re_pattern_buffer *bufp, re_char *p1,
3743} 3743}
3744 3744
3745/* True if "p1 matches something" implies "p2 fails". */ 3745/* True if "p1 matches something" implies "p2 fails". */
3746/* Avoiding inf-loops:
3747 We're trying to follow all paths reachable from `p2`, but since some
3748 loops can match the empty string, this can loop back to `p2`.
3749 To avoid inf-looping, we keep track of points that have been considered
3750 "already". Instead of keeping a list of such points, `done_beg` and
3751 `done_end` delimit a chunk of bytecode we already considered.
3752 To guarantee termination, a lexical ordering between `done_*` and `p2`
3753 should be obeyed:
3754 At each recursion, either `done_beg` gets smaller,
3755 or `done_beg` is unchanged and `done_end` gets larger
3756 or they're both unchanged and `p2` gets larger. */
3746static bool 3757static bool
3747mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, 3758mutually_exclusive_aux (struct re_pattern_buffer *bufp, re_char *p1,
3748 re_char *p2) 3759 re_char *p2, re_char *done_beg, re_char *done_end)
3749{ 3760{
3750 re_opcode_t op2; 3761 re_opcode_t op2;
3751 unsigned char *pend = bufp->buffer + bufp->used; 3762 unsigned char *pend = bufp->buffer + bufp->used;
@@ -3754,6 +3765,9 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
3754 eassert (p1 >= bufp->buffer && p1 < pend 3765 eassert (p1 >= bufp->buffer && p1 < pend
3755 && p2 >= bufp->buffer && p2 <= pend); 3766 && p2 >= bufp->buffer && p2 <= pend);
3756 3767
3768 eassert (done_beg <= done_end);
3769 eassert (done_end <= p2);
3770
3757 /* Skip over open/close-group commands. 3771 /* Skip over open/close-group commands.
3758 If what follows this loop is a ...+ construct, 3772 If what follows this loop is a ...+ construct,
3759 look at what begins its body, since we will have to 3773 look at what begins its body, since we will have to
@@ -3844,12 +3858,29 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
3844 int mcnt; 3858 int mcnt;
3845 p2++; 3859 p2++;
3846 EXTRACT_NUMBER_AND_INCR (mcnt, p2); 3860 EXTRACT_NUMBER_AND_INCR (mcnt, p2);
3847 /* Don't just test `mcnt > 0` because non-greedy loops have 3861 re_char *p2_other = p2 + mcnt;
3848 their test at the end with an unconditional jump at the start. */ 3862
3849 if (p2 > p2_orig && mcnt >= 0) /* Ensure forward progress. */ 3863 /* When we jump backward we bump `done_end` up to `p3` under
3850 return (mutually_exclusive_p (bufp, p1, p2) 3864 the assumption that any other position between `done_end`
3851 && mutually_exclusive_p (bufp, p1, p2 + mcnt)); 3865 and `p3` is either:
3852 break; 3866 - checked by the other call to RECURSE.
3867 - not reachable from here (e.g. for positions before the
3868 `on_failure_jump`), or at least not without first
3869 jumping before `done_beg`.
3870 This should hold because our state machines are not arbitrary:
3871 they consists of syntaxically nested loops with limited
3872 control flow.
3873 FIXME: This can fail (i.e. return true when it shouldn't)
3874 if we start generating bytecode with a different shape,
3875 so maybe we should bite the bullet and replace done_beg/end
3876 with an actual list of positions we've already processed. */
3877#define RECURSE(p3) \
3878 ((p3) < done_beg ? mutually_exclusive_aux (bufp, p1, p3, p3, p3) \
3879 : (p3) <= done_end ? true \
3880 : mutually_exclusive_aux (bufp, p1, p3, done_beg, \
3881 (p3) > p2_orig ? done_end : (p3)))
3882
3883 return RECURSE (p2) && RECURSE (p2_other);
3853 } 3884 }
3854 3885
3855 default: 3886 default:
@@ -3860,6 +3891,12 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
3860 return false; 3891 return false;
3861} 3892}
3862 3893
3894static bool
3895mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
3896 re_char *p2)
3897{
3898 return mutually_exclusive_aux (bufp, p1, p2, p2, p2);
3899}
3863 3900
3864/* Matching routines. */ 3901/* Matching routines. */
3865 3902