diff options
| author | Mattias EngdegÄrd | 2023-07-22 17:26:11 +0200 |
|---|---|---|
| committer | Mattias EngdegÄrd | 2023-07-22 18:26:57 +0200 |
| commit | 5d2d28458d0eb378a7e94363ef716e8648ef129a (patch) | |
| tree | f2ca6c379a81372444e2b5841c12cef7c84f6ed3 /src | |
| parent | cfdce1a19fa8a845b78e535b510932df945598ad (diff) | |
| download | emacs-5d2d28458d0eb378a7e94363ef716e8648ef129a.tar.gz emacs-5d2d28458d0eb378a7e94363ef716e8648ef129a.zip | |
Fix regexp character class syntax property ghost matching bug
The syntax-table-dependent regexp character classes [:space:],
[:word:] and [:punct:] always use the buffer-local syntax table for
performance reasons. Fix a bug that could cause ghost (mis)matches
from use of lingering state by constructs that do use syntax
properties, such as `\sX`.
* src/regex-emacs.c (BUFFER_SYNTAX): New macro.
(ISPUNCT, ISSPACE, ISWORD): Use BUFFER_SYNTAX instead of SYNTAX.
(regex_compile): Delete syntax table setup code that is no longer
needed.
* test/src/regex-emacs-tests.el (regex-emacs-syntax-properties):
New regression test.
Diffstat (limited to 'src')
| -rw-r--r-- | src/regex-emacs.c | 24 |
1 files changed, 12 insertions, 12 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c index 51fc2b0558d..7e75f0ac597 100644 --- a/src/regex-emacs.c +++ b/src/regex-emacs.c | |||
| @@ -47,6 +47,9 @@ | |||
| 47 | /* Make syntax table lookup grant data in gl_state. */ | 47 | /* Make syntax table lookup grant data in gl_state. */ |
| 48 | #define SYNTAX(c) syntax_property (c, 1) | 48 | #define SYNTAX(c) syntax_property (c, 1) |
| 49 | 49 | ||
| 50 | /* Explicit syntax lookup using the buffer-local table. */ | ||
| 51 | #define BUFFER_SYNTAX(c) syntax_property (c, 0) | ||
| 52 | |||
| 50 | #define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte) | 53 | #define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte) |
| 51 | #define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte) | 54 | #define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte) |
| 52 | #define RE_STRING_CHAR(p, multibyte) \ | 55 | #define RE_STRING_CHAR(p, multibyte) \ |
| @@ -132,18 +135,22 @@ | |||
| 132 | 135 | ||
| 133 | #define ISLOWER(c) lowercasep (c) | 136 | #define ISLOWER(c) lowercasep (c) |
| 134 | 137 | ||
| 138 | #define ISUPPER(c) uppercasep (c) | ||
| 139 | |||
| 140 | /* The following predicates use the buffer-local syntax table and | ||
| 141 | ignore syntax properties, for consistency with the up-front | ||
| 142 | assumptions made at compile time. */ | ||
| 143 | |||
| 135 | #define ISPUNCT(c) (IS_REAL_ASCII (c) \ | 144 | #define ISPUNCT(c) (IS_REAL_ASCII (c) \ |
| 136 | ? ((c) > ' ' && (c) < 0177 \ | 145 | ? ((c) > ' ' && (c) < 0177 \ |
| 137 | && !(((c) >= 'a' && (c) <= 'z') \ | 146 | && !(((c) >= 'a' && (c) <= 'z') \ |
| 138 | || ((c) >= 'A' && (c) <= 'Z') \ | 147 | || ((c) >= 'A' && (c) <= 'Z') \ |
| 139 | || ((c) >= '0' && (c) <= '9'))) \ | 148 | || ((c) >= '0' && (c) <= '9'))) \ |
| 140 | : SYNTAX (c) != Sword) | 149 | : BUFFER_SYNTAX (c) != Sword) |
| 141 | 150 | ||
| 142 | #define ISSPACE(c) (SYNTAX (c) == Swhitespace) | 151 | #define ISSPACE(c) (BUFFER_SYNTAX (c) == Swhitespace) |
| 143 | 152 | ||
| 144 | #define ISUPPER(c) uppercasep (c) | 153 | #define ISWORD(c) (BUFFER_SYNTAX (c) == Sword) |
| 145 | |||
| 146 | #define ISWORD(c) (SYNTAX (c) == Sword) | ||
| 147 | 154 | ||
| 148 | /* Use alloca instead of malloc. This is because using malloc in | 155 | /* Use alloca instead of malloc. This is because using malloc in |
| 149 | re_search* or re_match* could cause memory leaks when C-g is used | 156 | re_search* or re_match* could cause memory leaks when C-g is used |
| @@ -2048,13 +2055,6 @@ regex_compile (re_char *pattern, ptrdiff_t size, | |||
| 2048 | is_xdigit, since they can only match ASCII characters. | 2055 | is_xdigit, since they can only match ASCII characters. |
| 2049 | We don't need to handle them for multibyte. */ | 2056 | We don't need to handle them for multibyte. */ |
| 2050 | 2057 | ||
| 2051 | /* Setup the gl_state object to its buffer-defined value. | ||
| 2052 | This hardcodes the buffer-global syntax-table for ASCII | ||
| 2053 | chars, while the other chars will obey syntax-table | ||
| 2054 | properties. It's not ideal, but it's the way it's been | ||
| 2055 | done until now. */ | ||
| 2056 | SETUP_BUFFER_SYNTAX_TABLE (); | ||
| 2057 | |||
| 2058 | for (c = 0; c < 0x80; ++c) | 2058 | for (c = 0; c < 0x80; ++c) |
| 2059 | if (re_iswctype (c, cc)) | 2059 | if (re_iswctype (c, cc)) |
| 2060 | { | 2060 | { |