aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMattias EngdegÄrd2023-07-22 17:26:11 +0200
committerMattias EngdegÄrd2023-07-22 18:26:57 +0200
commit5d2d28458d0eb378a7e94363ef716e8648ef129a (patch)
treef2ca6c379a81372444e2b5841c12cef7c84f6ed3 /src
parentcfdce1a19fa8a845b78e535b510932df945598ad (diff)
downloademacs-5d2d28458d0eb378a7e94363ef716e8648ef129a.tar.gz
emacs-5d2d28458d0eb378a7e94363ef716e8648ef129a.zip
Fix regexp character class syntax property ghost matching bug
The syntax-table-dependent regexp character classes [:space:], [:word:] and [:punct:] always use the buffer-local syntax table for performance reasons. Fix a bug that could cause ghost (mis)matches from use of lingering state by constructs that do use syntax properties, such as `\sX`. * src/regex-emacs.c (BUFFER_SYNTAX): New macro. (ISPUNCT, ISSPACE, ISWORD): Use BUFFER_SYNTAX instead of SYNTAX. (regex_compile): Delete syntax table setup code that is no longer needed. * test/src/regex-emacs-tests.el (regex-emacs-syntax-properties): New regression test.
Diffstat (limited to 'src')
-rw-r--r--src/regex-emacs.c24
1 files changed, 12 insertions, 12 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c
index 51fc2b0558d..7e75f0ac597 100644
--- a/src/regex-emacs.c
+++ b/src/regex-emacs.c
@@ -47,6 +47,9 @@
47/* Make syntax table lookup grant data in gl_state. */ 47/* Make syntax table lookup grant data in gl_state. */
48#define SYNTAX(c) syntax_property (c, 1) 48#define SYNTAX(c) syntax_property (c, 1)
49 49
50/* Explicit syntax lookup using the buffer-local table. */
51#define BUFFER_SYNTAX(c) syntax_property (c, 0)
52
50#define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte) 53#define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
51#define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte) 54#define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
52#define RE_STRING_CHAR(p, multibyte) \ 55#define RE_STRING_CHAR(p, multibyte) \
@@ -132,18 +135,22 @@
132 135
133#define ISLOWER(c) lowercasep (c) 136#define ISLOWER(c) lowercasep (c)
134 137
138#define ISUPPER(c) uppercasep (c)
139
140/* The following predicates use the buffer-local syntax table and
141 ignore syntax properties, for consistency with the up-front
142 assumptions made at compile time. */
143
135#define ISPUNCT(c) (IS_REAL_ASCII (c) \ 144#define ISPUNCT(c) (IS_REAL_ASCII (c) \
136 ? ((c) > ' ' && (c) < 0177 \ 145 ? ((c) > ' ' && (c) < 0177 \
137 && !(((c) >= 'a' && (c) <= 'z') \ 146 && !(((c) >= 'a' && (c) <= 'z') \
138 || ((c) >= 'A' && (c) <= 'Z') \ 147 || ((c) >= 'A' && (c) <= 'Z') \
139 || ((c) >= '0' && (c) <= '9'))) \ 148 || ((c) >= '0' && (c) <= '9'))) \
140 : SYNTAX (c) != Sword) 149 : BUFFER_SYNTAX (c) != Sword)
141 150
142#define ISSPACE(c) (SYNTAX (c) == Swhitespace) 151#define ISSPACE(c) (BUFFER_SYNTAX (c) == Swhitespace)
143 152
144#define ISUPPER(c) uppercasep (c) 153#define ISWORD(c) (BUFFER_SYNTAX (c) == Sword)
145
146#define ISWORD(c) (SYNTAX (c) == Sword)
147 154
148/* Use alloca instead of malloc. This is because using malloc in 155/* Use alloca instead of malloc. This is because using malloc in
149 re_search* or re_match* could cause memory leaks when C-g is used 156 re_search* or re_match* could cause memory leaks when C-g is used
@@ -2048,13 +2055,6 @@ regex_compile (re_char *pattern, ptrdiff_t size,
2048 is_xdigit, since they can only match ASCII characters. 2055 is_xdigit, since they can only match ASCII characters.
2049 We don't need to handle them for multibyte. */ 2056 We don't need to handle them for multibyte. */
2050 2057
2051 /* Setup the gl_state object to its buffer-defined value.
2052 This hardcodes the buffer-global syntax-table for ASCII
2053 chars, while the other chars will obey syntax-table
2054 properties. It's not ideal, but it's the way it's been
2055 done until now. */
2056 SETUP_BUFFER_SYNTAX_TABLE ();
2057
2058 for (c = 0; c < 0x80; ++c) 2058 for (c = 0; c < 0x80; ++c)
2059 if (re_iswctype (c, cc)) 2059 if (re_iswctype (c, cc))
2060 { 2060 {