aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorStefan Monnier2023-10-03 10:10:57 -0400
committerStefan Monnier2023-10-03 10:10:57 -0400
commit37130fd500fbf78ff0d0037aa6275f0f70a415dd (patch)
treeb17e7bc371bfff085b5edc2db2344f83d943d577 /src
parent849de5aa1a42cae6ae1504804acf0c7fb8b13860 (diff)
downloademacs-37130fd500fbf78ff0d0037aa6275f0f70a415dd.tar.gz
emacs-37130fd500fbf78ff0d0037aa6275f0f70a415dd.zip
regex.c: Fix recent regression with mutually_exclusive_p
The new analysis code ended up increasing the scope of an optimization a bit too far. Reign it in. * src/regex-emacs.c (struct mutexcl_data): Add `unconstrained` field. (mutually_exclusive_one): Use and set it. (mutually_exclusive_p): Initialize it. * test/src/regex-emacs-tests.el (regexp-tests-backtrack-optimization): Add test.
Diffstat (limited to 'src')
-rw-r--r--src/regex-emacs.c41
1 files changed, 33 insertions, 8 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c
index ffb8891d3a6..95c3366652d 100644
--- a/src/regex-emacs.c
+++ b/src/regex-emacs.c
@@ -3899,6 +3899,7 @@ mutually_exclusive_charset (struct re_pattern_buffer *bufp, re_char *p1,
3899struct mutexcl_data { 3899struct mutexcl_data {
3900 struct re_pattern_buffer *bufp; 3900 struct re_pattern_buffer *bufp;
3901 re_char *p1; 3901 re_char *p1;
3902 bool unconstrained;
3902}; 3903};
3903 3904
3904static bool 3905static bool
@@ -3907,7 +3908,32 @@ mutually_exclusive_one (re_char *p2, void *arg)
3907 struct mutexcl_data *data = arg; 3908 struct mutexcl_data *data = arg;
3908 switch (*p2) 3909 switch (*p2)
3909 { 3910 {
3911 case succeed:
3912 /* If `p1` matches, `succeed` can still match, so we should return
3913 `false`. *BUT* when N iterations of `p1` and N+1 iterations of `p1`
3914 match, the `succeed` that comes after N+1 always takes precedence
3915 over the one after N because we always prefer a longer match, so
3916 the succeed after N can actually be replaced by a "fail" without
3917 changing the end result.
3918 In this sense, "if `p1` matches, `succeed` can't match".
3919 So we can return `true`.
3920 *BUT* this only holds if we're sure that the N+1 will indeed succeed,
3921 so we need to make sure there is no other matching operator between
3922 the exit of the iteration and the `succeed`. */
3923 return data->unconstrained;
3924
3925/* Remember that there may be an empty matching operator on the way.
3926 If we return true, this is the "end" of this control flow path,
3927 so it can't get in the way of a subsequent `succeed. */
3928#define RETURN_CONSTRAIN(v) \
3929 { bool tmp = (v); \
3930 if (!tmp) \
3931 data->unconstrained = false; \
3932 return tmp; \
3933 }
3934
3910 case endline: 3935 case endline:
3936 RETURN_CONSTRAIN (mutually_exclusive_exactn (data->bufp, data->p1, p2));
3911 case exactn: 3937 case exactn:
3912 return mutually_exclusive_exactn (data->bufp, data->p1, p2); 3938 return mutually_exclusive_exactn (data->bufp, data->p1, p2);
3913 case charset: 3939 case charset:
@@ -3945,18 +3971,17 @@ mutually_exclusive_one (re_char *p2, void *arg)
3945 return (*data->p1 == categoryspec && data->p1[1] == p2[1]); 3971 return (*data->p1 == categoryspec && data->p1[1] == p2[1]);
3946 3972
3947 case endbuf: 3973 case endbuf:
3948 case succeed:
3949 return true; 3974 return true;
3950 case wordbeg: 3975 case wordbeg:
3951 return (*data->p1 == notsyntaxspec && data->p1[1] == Sword); 3976 RETURN_CONSTRAIN (*data->p1 == notsyntaxspec && data->p1[1] == Sword);
3952 case wordend: 3977 case wordend:
3953 return (*data->p1 == syntaxspec && data->p1[1] == Sword); 3978 RETURN_CONSTRAIN (*data->p1 == syntaxspec && data->p1[1] == Sword);
3954 case symbeg: 3979 case symbeg:
3955 return (*data->p1 == notsyntaxspec 3980 RETURN_CONSTRAIN (*data->p1 == notsyntaxspec
3956 && (data->p1[1] == Ssymbol || data->p1[1] == Sword)); 3981 && (data->p1[1] == Ssymbol || data->p1[1] == Sword));
3957 case symend: 3982 case symend:
3958 return (*data->p1 == syntaxspec 3983 RETURN_CONSTRAIN (*data->p1 == syntaxspec
3959 && (data->p1[1] == Ssymbol || data->p1[1] == Sword)); 3984 && (data->p1[1] == Ssymbol || data->p1[1] == Sword));
3960 3985
3961 case duplicate: 3986 case duplicate:
3962 /* At this point, we know nothing about what this can match, sadly. */ 3987 /* At this point, we know nothing about what this can match, sadly. */
@@ -3976,7 +4001,7 @@ static bool
3976mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, 4001mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
3977 re_char *p2) 4002 re_char *p2)
3978{ 4003{
3979 struct mutexcl_data data = { bufp, p1 }; 4004 struct mutexcl_data data = { bufp, p1, true };
3980 return forall_firstchar (bufp, p2, NULL, mutually_exclusive_one, &data); 4005 return forall_firstchar (bufp, p2, NULL, mutually_exclusive_one, &data);
3981} 4006}
3982 4007