diff options
| author | Stefan Monnier | 2023-10-03 10:10:57 -0400 |
|---|---|---|
| committer | Stefan Monnier | 2023-10-03 10:10:57 -0400 |
| commit | 37130fd500fbf78ff0d0037aa6275f0f70a415dd (patch) | |
| tree | b17e7bc371bfff085b5edc2db2344f83d943d577 /src | |
| parent | 849de5aa1a42cae6ae1504804acf0c7fb8b13860 (diff) | |
| download | emacs-37130fd500fbf78ff0d0037aa6275f0f70a415dd.tar.gz emacs-37130fd500fbf78ff0d0037aa6275f0f70a415dd.zip | |
regex.c: Fix recent regression with mutually_exclusive_p
The new analysis code ended up increasing the scope of an optimization
a bit too far. Reign it in.
* src/regex-emacs.c (struct mutexcl_data): Add `unconstrained` field.
(mutually_exclusive_one): Use and set it.
(mutually_exclusive_p): Initialize it.
* test/src/regex-emacs-tests.el (regexp-tests-backtrack-optimization):
Add test.
Diffstat (limited to 'src')
| -rw-r--r-- | src/regex-emacs.c | 41 |
1 files changed, 33 insertions, 8 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c index ffb8891d3a6..95c3366652d 100644 --- a/src/regex-emacs.c +++ b/src/regex-emacs.c | |||
| @@ -3899,6 +3899,7 @@ mutually_exclusive_charset (struct re_pattern_buffer *bufp, re_char *p1, | |||
| 3899 | struct mutexcl_data { | 3899 | struct mutexcl_data { |
| 3900 | struct re_pattern_buffer *bufp; | 3900 | struct re_pattern_buffer *bufp; |
| 3901 | re_char *p1; | 3901 | re_char *p1; |
| 3902 | bool unconstrained; | ||
| 3902 | }; | 3903 | }; |
| 3903 | 3904 | ||
| 3904 | static bool | 3905 | static bool |
| @@ -3907,7 +3908,32 @@ mutually_exclusive_one (re_char *p2, void *arg) | |||
| 3907 | struct mutexcl_data *data = arg; | 3908 | struct mutexcl_data *data = arg; |
| 3908 | switch (*p2) | 3909 | switch (*p2) |
| 3909 | { | 3910 | { |
| 3911 | case succeed: | ||
| 3912 | /* If `p1` matches, `succeed` can still match, so we should return | ||
| 3913 | `false`. *BUT* when N iterations of `p1` and N+1 iterations of `p1` | ||
| 3914 | match, the `succeed` that comes after N+1 always takes precedence | ||
| 3915 | over the one after N because we always prefer a longer match, so | ||
| 3916 | the succeed after N can actually be replaced by a "fail" without | ||
| 3917 | changing the end result. | ||
| 3918 | In this sense, "if `p1` matches, `succeed` can't match". | ||
| 3919 | So we can return `true`. | ||
| 3920 | *BUT* this only holds if we're sure that the N+1 will indeed succeed, | ||
| 3921 | so we need to make sure there is no other matching operator between | ||
| 3922 | the exit of the iteration and the `succeed`. */ | ||
| 3923 | return data->unconstrained; | ||
| 3924 | |||
| 3925 | /* Remember that there may be an empty matching operator on the way. | ||
| 3926 | If we return true, this is the "end" of this control flow path, | ||
| 3927 | so it can't get in the way of a subsequent `succeed. */ | ||
| 3928 | #define RETURN_CONSTRAIN(v) \ | ||
| 3929 | { bool tmp = (v); \ | ||
| 3930 | if (!tmp) \ | ||
| 3931 | data->unconstrained = false; \ | ||
| 3932 | return tmp; \ | ||
| 3933 | } | ||
| 3934 | |||
| 3910 | case endline: | 3935 | case endline: |
| 3936 | RETURN_CONSTRAIN (mutually_exclusive_exactn (data->bufp, data->p1, p2)); | ||
| 3911 | case exactn: | 3937 | case exactn: |
| 3912 | return mutually_exclusive_exactn (data->bufp, data->p1, p2); | 3938 | return mutually_exclusive_exactn (data->bufp, data->p1, p2); |
| 3913 | case charset: | 3939 | case charset: |
| @@ -3945,18 +3971,17 @@ mutually_exclusive_one (re_char *p2, void *arg) | |||
| 3945 | return (*data->p1 == categoryspec && data->p1[1] == p2[1]); | 3971 | return (*data->p1 == categoryspec && data->p1[1] == p2[1]); |
| 3946 | 3972 | ||
| 3947 | case endbuf: | 3973 | case endbuf: |
| 3948 | case succeed: | ||
| 3949 | return true; | 3974 | return true; |
| 3950 | case wordbeg: | 3975 | case wordbeg: |
| 3951 | return (*data->p1 == notsyntaxspec && data->p1[1] == Sword); | 3976 | RETURN_CONSTRAIN (*data->p1 == notsyntaxspec && data->p1[1] == Sword); |
| 3952 | case wordend: | 3977 | case wordend: |
| 3953 | return (*data->p1 == syntaxspec && data->p1[1] == Sword); | 3978 | RETURN_CONSTRAIN (*data->p1 == syntaxspec && data->p1[1] == Sword); |
| 3954 | case symbeg: | 3979 | case symbeg: |
| 3955 | return (*data->p1 == notsyntaxspec | 3980 | RETURN_CONSTRAIN (*data->p1 == notsyntaxspec |
| 3956 | && (data->p1[1] == Ssymbol || data->p1[1] == Sword)); | 3981 | && (data->p1[1] == Ssymbol || data->p1[1] == Sword)); |
| 3957 | case symend: | 3982 | case symend: |
| 3958 | return (*data->p1 == syntaxspec | 3983 | RETURN_CONSTRAIN (*data->p1 == syntaxspec |
| 3959 | && (data->p1[1] == Ssymbol || data->p1[1] == Sword)); | 3984 | && (data->p1[1] == Ssymbol || data->p1[1] == Sword)); |
| 3960 | 3985 | ||
| 3961 | case duplicate: | 3986 | case duplicate: |
| 3962 | /* At this point, we know nothing about what this can match, sadly. */ | 3987 | /* At this point, we know nothing about what this can match, sadly. */ |
| @@ -3976,7 +4001,7 @@ static bool | |||
| 3976 | mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, | 4001 | mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1, |
| 3977 | re_char *p2) | 4002 | re_char *p2) |
| 3978 | { | 4003 | { |
| 3979 | struct mutexcl_data data = { bufp, p1 }; | 4004 | struct mutexcl_data data = { bufp, p1, true }; |
| 3980 | return forall_firstchar (bufp, p2, NULL, mutually_exclusive_one, &data); | 4005 | return forall_firstchar (bufp, p2, NULL, mutually_exclusive_one, &data); |
| 3981 | } | 4006 | } |
| 3982 | 4007 | ||