aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMattias EngdegÄrd2023-06-20 12:12:50 +0200
committerMattias EngdegÄrd2023-06-21 18:00:26 +0200
commitbe91192ecb1e0dff794582cd463f0a6480d160ef (patch)
tree156031723d39088ce4e2dd52a3a7b7068fbc5880 /src
parentdae8aab52874441a70a94435d50f25b27301d9b0 (diff)
downloademacs-be91192ecb1e0dff794582cd463f0a6480d160ef.tar.gz
emacs-be91192ecb1e0dff794582cd463f0a6480d160ef.zip
Straighten regexp postfix operator after zero-width assertion parse
The zero-width assertions \` \' \b \B were parsed in a sloppy way so that a following postfix repetition operator could yield surprising results. For instance, "\\b*" would act as "\\b\\*", and "xy\\b*" would act as "\\(?:xy\\b\\)*". Except for \` and ^, any following postfix operator now applies to the zero-width assertion itself only, which is predictable and consistent with other assertions, if useless in practice. For historical compatibility, an operator character following \` and ^ always becomes a literal. (Bug#64128) * src/regex-emacs.c (regex_compile): Set `laststart` appropriately for each zero-width assertion instead of leaving it with whatever value it had before. Remove a redundant condition. * test/src/regex-emacs-tests.el (regexp-tests-zero-width-assertion-repetition): New test. * doc/lispref/searching.texi (Regexp Special): Say that repetition operators are not special after \`, and that they work as expected after other backslash escapes. * etc/NEWS: Announce.
Diffstat (limited to 'src')
-rw-r--r--src/regex-emacs.c15
1 files changed, 13 insertions, 2 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c
index fea34df991b..9e298b81ebb 100644
--- a/src/regex-emacs.c
+++ b/src/regex-emacs.c
@@ -1716,7 +1716,8 @@ regex_compile (re_char *pattern, ptrdiff_t size,
1716 1716
1717 /* Address of start of the most recently finished expression. 1717 /* Address of start of the most recently finished expression.
1718 This tells, e.g., postfix * where to find the start of its 1718 This tells, e.g., postfix * where to find the start of its
1719 operand. Reset at the beginning of groups and alternatives. */ 1719 operand. Reset at the beginning of groups and alternatives,
1720 and after ^ and \` for dusty-deck compatibility. */
1720 unsigned char *laststart = 0; 1721 unsigned char *laststart = 0;
1721 1722
1722 /* Address of beginning of regexp, or inside of last group. */ 1723 /* Address of beginning of regexp, or inside of last group. */
@@ -1847,12 +1848,16 @@ regex_compile (re_char *pattern, ptrdiff_t size,
1847 case '^': 1848 case '^':
1848 if (! (p == pattern + 1 || at_begline_loc_p (pattern, p))) 1849 if (! (p == pattern + 1 || at_begline_loc_p (pattern, p)))
1849 goto normal_char; 1850 goto normal_char;
1851 /* Special case for compatibility: postfix ops after ^ become
1852 literals. */
1853 laststart = 0;
1850 BUF_PUSH (begline); 1854 BUF_PUSH (begline);
1851 break; 1855 break;
1852 1856
1853 case '$': 1857 case '$':
1854 if (! (p == pend || at_endline_loc_p (p, pend))) 1858 if (! (p == pend || at_endline_loc_p (p, pend)))
1855 goto normal_char; 1859 goto normal_char;
1860 laststart = b;
1856 BUF_PUSH (endline); 1861 BUF_PUSH (endline);
1857 break; 1862 break;
1858 1863
@@ -1892,7 +1897,7 @@ regex_compile (re_char *pattern, ptrdiff_t size,
1892 1897
1893 /* Star, etc. applied to an empty pattern is equivalent 1898 /* Star, etc. applied to an empty pattern is equivalent
1894 to an empty pattern. */ 1899 to an empty pattern. */
1895 if (!laststart || laststart == b) 1900 if (laststart == b)
1896 break; 1901 break;
1897 1902
1898 /* Now we know whether or not zero matches is allowed 1903 /* Now we know whether or not zero matches is allowed
@@ -2544,18 +2549,24 @@ regex_compile (re_char *pattern, ptrdiff_t size,
2544 break; 2549 break;
2545 2550
2546 case 'b': 2551 case 'b':
2552 laststart = b;
2547 BUF_PUSH (wordbound); 2553 BUF_PUSH (wordbound);
2548 break; 2554 break;
2549 2555
2550 case 'B': 2556 case 'B':
2557 laststart = b;
2551 BUF_PUSH (notwordbound); 2558 BUF_PUSH (notwordbound);
2552 break; 2559 break;
2553 2560
2554 case '`': 2561 case '`':
2562 /* Special case for compatibility: postfix ops after \` become
2563 literals, as for ^ (see above). */
2564 laststart = 0;
2555 BUF_PUSH (begbuf); 2565 BUF_PUSH (begbuf);
2556 break; 2566 break;
2557 2567
2558 case '\'': 2568 case '\'':
2569 laststart = b;
2559 BUF_PUSH (endbuf); 2570 BUF_PUSH (endbuf);
2560 break; 2571 break;
2561 2572