diff options
| author | Mattias Engdegård | 2019-02-15 19:27:48 +0100 |
|---|---|---|
| committer | Mattias Engdegård | 2019-02-16 12:43:32 +0100 |
| commit | 478bbf7c80e71ff84f0e4e1363bf86e93d9c51c3 (patch) | |
| tree | 7d05c376a0299282d291eff879eedcc6f3d2651d | |
| parent | aff0c585060b7cc92d52a32978c6aa64cf7e2a5e (diff) | |
| download | emacs-478bbf7c80e71ff84f0e4e1363bf86e93d9c51c3.tar.gz emacs-478bbf7c80e71ff84f0e4e1363bf86e93d9c51c3.zip | |
Prevent over-eager rx character range condensation
`rx' incorrectly considers character ranges between ASCII and raw bytes to
cover all codes in-between, which includes all non-ASCII Unicode chars.
This causes (any "\000-\377" ?Å) to be simplified to (any "\000-\377"),
which is not at all the same thing: [\000-\377] really means
[\000-\177\200-\377] (Bug#34492).
* lisp/emacs-lisp/rx.el (rx-any-condense-range): Split ranges going
from ASCII to raw bytes.
* test/lisp/emacs-lisp/rx-tests.el (rx-char-any-raw-byte): Add test case.
* etc/NEWS: Mention the overall change (Bug#33205).
| -rw-r--r-- | etc/NEWS | 8 | ||||
| -rw-r--r-- | lisp/emacs-lisp/rx.el | 7 | ||||
| -rw-r--r-- | test/lisp/emacs-lisp/rx-tests.el | 6 |
3 files changed, 20 insertions, 1 deletions
| @@ -1101,6 +1101,14 @@ subexpression. | |||
| 1101 | When there is no menu for a mode, display the mode name after the | 1101 | When there is no menu for a mode, display the mode name after the |
| 1102 | indicator instead of just the indicator (which is sometimes cryptic). | 1102 | indicator instead of just the indicator (which is sometimes cryptic). |
| 1103 | 1103 | ||
| 1104 | ** rx | ||
| 1105 | |||
| 1106 | --- | ||
| 1107 | *** rx now handles raw bytes in character alternatives correctly, | ||
| 1108 | when given in a string. Previously, '(any "\x80-\xff")' would match | ||
| 1109 | characters U+0080...U+00FF. Now the expression matches raw bytes in | ||
| 1110 | the 128...255 range, as expected. | ||
| 1111 | |||
| 1104 | 1112 | ||
| 1105 | * New Modes and Packages in Emacs 27.1 | 1113 | * New Modes and Packages in Emacs 27.1 |
| 1106 | 1114 | ||
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el index b2299030a1b..715cd608c46 100644 --- a/lisp/emacs-lisp/rx.el +++ b/lisp/emacs-lisp/rx.el | |||
| @@ -429,6 +429,13 @@ Only both edges of each range is checked." | |||
| 429 | ;; set L list of all ranges | 429 | ;; set L list of all ranges |
| 430 | (mapc (lambda (e) (cond ((stringp e) (push e str)) | 430 | (mapc (lambda (e) (cond ((stringp e) (push e str)) |
| 431 | ((numberp e) (push (cons e e) l)) | 431 | ((numberp e) (push (cons e e) l)) |
| 432 | ;; Ranges between ASCII and raw bytes are split, | ||
| 433 | ;; to prevent accidental inclusion of Unicode | ||
| 434 | ;; characters later on. | ||
| 435 | ((and (<= (car e) #x7f) | ||
| 436 | (>= (cdr e) #x3fff80)) | ||
| 437 | (push (cons (car e) #x7f) l) | ||
| 438 | (push (cons #x3fff80 (cdr e)) l)) | ||
| 432 | (t (push e l)))) | 439 | (t (push e l)))) |
| 433 | args) | 440 | args) |
| 434 | ;; condense overlapped ranges in L | 441 | ;; condense overlapped ranges in L |
diff --git a/test/lisp/emacs-lisp/rx-tests.el b/test/lisp/emacs-lisp/rx-tests.el index f15e1016f7c..e14feda347f 100644 --- a/test/lisp/emacs-lisp/rx-tests.el +++ b/test/lisp/emacs-lisp/rx-tests.el | |||
| @@ -53,7 +53,11 @@ | |||
| 53 | ;; Range of raw characters, multibyte. | 53 | ;; Range of raw characters, multibyte. |
| 54 | (should (equal (string-match-p (rx (any "Å\211\326-\377\177")) | 54 | (should (equal (string-match-p (rx (any "Å\211\326-\377\177")) |
| 55 | "XY\355\177\327") | 55 | "XY\355\177\327") |
| 56 | 2))) | 56 | 2)) |
| 57 | ;; Split range; \177-\377ÿ should not be optimised to \177-\377. | ||
| 58 | (should (equal (string-match-p (rx (any "\177-\377" ?ÿ)) | ||
| 59 | "ÿA\310B") | ||
| 60 | 0))) | ||
| 57 | 61 | ||
| 58 | (ert-deftest rx-pcase () | 62 | (ert-deftest rx-pcase () |
| 59 | (should (equal (pcase "a 1 2 3 1 1 b" | 63 | (should (equal (pcase "a 1 2 3 1 1 b" |