diff options
| author | Mattias EngdegÄrd | 2021-02-28 13:06:24 +0100 |
|---|---|---|
| committer | Mattias EngdegÄrd | 2021-02-28 13:06:24 +0100 |
| commit | bdea1883cc8feb8a607c3d05191e7dc8d12f0aa0 (patch) | |
| tree | 93d93a3c7f2fc7b0aa821a128be8c306645a7519 | |
| parent | aad8ffafa89fe46ff5d63bd0127274f74019d50f (diff) | |
| download | emacs-bdea1883cc8feb8a607c3d05191e7dc8d12f0aa0.tar.gz emacs-bdea1883cc8feb8a607c3d05191e7dc8d12f0aa0.zip | |
Fix pcase 'rx' pattern match-data bug
The pcase 'rx' pattern would in some cases allow the match data to be
clobbered before it is read. For example:
(pcase "PQR"
((and (rx (let a nonl)) (rx ?z)) (list 'one a))
((rx (let b ?Q)) (list 'two b)))
The above returned (two "P") instead of the correct (two "Q").
This occurred because the calls to string-match and match-string were
presented as separate patterns to pcase, which would interleave them
with other patterns.
As a remedy, combine string matching and match-data extraction into a
single pcase pattern. This introduces a slight inefficiency for two
or more submatches as they are grouped into a list structure which
then has to be destructured.
Found by Stefan Monnier. See discussion at
https://lists.gnu.org/archive/html/emacs-devel/2021-02/msg02010.html
* lisp/emacs-lisp/rx.el (rx--reduce-right): New helper.
(rx [pcase macro]): Combine string-match and match-string calls into a
single pcase pattern.
* test/lisp/emacs-lisp/rx-tests.el (rx-pcase): Add test cases.
| -rw-r--r-- | lisp/emacs-lisp/rx.el | 37 | ||||
| -rw-r--r-- | test/lisp/emacs-lisp/rx-tests.el | 8 |
2 files changed, 35 insertions, 10 deletions
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el index ffc21951b64..56e588ee0d5 100644 --- a/lisp/emacs-lisp/rx.el +++ b/lisp/emacs-lisp/rx.el | |||
| @@ -1418,6 +1418,12 @@ into a plain rx-expression, collecting names into `rx--pcase-vars'." | |||
| 1418 | (cons head (mapcar #'rx--pcase-transform rest))) | 1418 | (cons head (mapcar #'rx--pcase-transform rest))) |
| 1419 | (_ rx))) | 1419 | (_ rx))) |
| 1420 | 1420 | ||
| 1421 | (defun rx--reduce-right (f l) | ||
| 1422 | "Right-reduction on L by F. L must be non-empty." | ||
| 1423 | (if (cdr l) | ||
| 1424 | (funcall f (car l) (rx--reduce-right f (cdr l))) | ||
| 1425 | (car l))) | ||
| 1426 | |||
| 1421 | ;;;###autoload | 1427 | ;;;###autoload |
| 1422 | (pcase-defmacro rx (&rest regexps) | 1428 | (pcase-defmacro rx (&rest regexps) |
| 1423 | "A pattern that matches strings against `rx' REGEXPS in sexp form. | 1429 | "A pattern that matches strings against `rx' REGEXPS in sexp form. |
| @@ -1436,17 +1442,28 @@ following constructs: | |||
| 1436 | introduced by a previous (let REF ...) | 1442 | introduced by a previous (let REF ...) |
| 1437 | construct." | 1443 | construct." |
| 1438 | (let* ((rx--pcase-vars nil) | 1444 | (let* ((rx--pcase-vars nil) |
| 1439 | (regexp (rx--to-expr (rx--pcase-transform (cons 'seq regexps))))) | 1445 | (regexp (rx--to-expr (rx--pcase-transform (cons 'seq regexps)))) |
| 1446 | (nvars (length rx--pcase-vars))) | ||
| 1440 | `(and (pred stringp) | 1447 | `(and (pred stringp) |
| 1441 | ;; `pcase-let' takes a match for granted and discards all unnecessary | 1448 | ,(if (zerop nvars) |
| 1442 | ;; conditions, which means that a `pred' clause cannot be used for | 1449 | ;; No variables bound: a single predicate suffices. |
| 1443 | ;; the match condition. The following construct seems to survive. | 1450 | `(pred (string-match ,regexp)) |
| 1444 | (app (lambda (s) (string-match ,regexp s)) (pred identity)) | 1451 | ;; Pack the submatches into a dotted list which is then |
| 1445 | ,@(let ((i 0)) | 1452 | ;; immediately destructured into individual variables again. |
| 1446 | (mapcar (lambda (name) | 1453 | ;; This is of course slightly inefficient when NVARS > 1. |
| 1447 | (setq i (1+ i)) | 1454 | ;; A dotted list is used to reduce the number of conses |
| 1448 | `(app (match-string ,i) ,name)) | 1455 | ;; to create and take apart. |
| 1449 | (reverse rx--pcase-vars)))))) | 1456 | `(app (lambda (s) |
| 1457 | (and (string-match ,regexp s) | ||
| 1458 | ,(rx--reduce-right | ||
| 1459 | (lambda (a b) `(cons ,a ,b)) | ||
| 1460 | (mapcar (lambda (i) `(match-string ,i s)) | ||
| 1461 | (number-sequence 1 nvars))))) | ||
| 1462 | ,(list '\` | ||
| 1463 | (rx--reduce-right | ||
| 1464 | #'cons | ||
| 1465 | (mapcar (lambda (name) (list '\, name)) | ||
| 1466 | (reverse rx--pcase-vars))))))))) | ||
| 1450 | 1467 | ||
| 1451 | ;; Obsolete internal symbol, used in old versions of the `flycheck' package. | 1468 | ;; Obsolete internal symbol, used in old versions of the `flycheck' package. |
| 1452 | (define-obsolete-function-alias 'rx-submatch-n 'rx-to-string "27.1") | 1469 | (define-obsolete-function-alias 'rx-submatch-n 'rx-to-string "27.1") |
diff --git a/test/lisp/emacs-lisp/rx-tests.el b/test/lisp/emacs-lisp/rx-tests.el index fecdcf55aff..2dd1bca22d1 100644 --- a/test/lisp/emacs-lisp/rx-tests.el +++ b/test/lisp/emacs-lisp/rx-tests.el | |||
| @@ -156,6 +156,8 @@ | |||
| 156 | "....."))) | 156 | "....."))) |
| 157 | 157 | ||
| 158 | (ert-deftest rx-pcase () | 158 | (ert-deftest rx-pcase () |
| 159 | (should (equal (pcase "i18n" ((rx (let x (+ digit))) (list 'ok x))) | ||
| 160 | '(ok "18"))) | ||
| 159 | (should (equal (pcase "a 1 2 3 1 1 b" | 161 | (should (equal (pcase "a 1 2 3 1 1 b" |
| 160 | ((rx (let u (+ digit)) space | 162 | ((rx (let u (+ digit)) space |
| 161 | (let v (+ digit)) space | 163 | (let v (+ digit)) space |
| @@ -176,6 +178,12 @@ | |||
| 176 | ((rx nonl) 'wrong) | 178 | ((rx nonl) 'wrong) |
| 177 | (_ 'correct)) | 179 | (_ 'correct)) |
| 178 | 'correct)) | 180 | 'correct)) |
| 181 | (should (equal (pcase "PQR" | ||
| 182 | ((and (rx (let a nonl)) (rx ?z)) | ||
| 183 | (list 'one a)) | ||
| 184 | ((rx (let b ?Q)) | ||
| 185 | (list 'two b))) | ||
| 186 | '(two "Q"))) | ||
| 179 | (should (equal (pcase-let (((rx ?B (let z nonl)) "ABC")) | 187 | (should (equal (pcase-let (((rx ?B (let z nonl)) "ABC")) |
| 180 | (list 'ok z)) | 188 | (list 'ok z)) |
| 181 | '(ok "C"))) | 189 | '(ok "C"))) |