aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMattias Engdegård2019-02-15 19:27:48 +0100
committerMattias Engdegård2019-02-16 12:43:32 +0100
commit478bbf7c80e71ff84f0e4e1363bf86e93d9c51c3 (patch)
tree7d05c376a0299282d291eff879eedcc6f3d2651d
parentaff0c585060b7cc92d52a32978c6aa64cf7e2a5e (diff)
downloademacs-478bbf7c80e71ff84f0e4e1363bf86e93d9c51c3.tar.gz
emacs-478bbf7c80e71ff84f0e4e1363bf86e93d9c51c3.zip
Prevent over-eager rx character range condensation
`rx' incorrectly considers character ranges between ASCII and raw bytes to cover all codes in-between, which includes all non-ASCII Unicode chars. This causes (any "\000-\377" ?Å) to be simplified to (any "\000-\377"), which is not at all the same thing: [\000-\377] really means [\000-\177\200-\377] (Bug#34492). * lisp/emacs-lisp/rx.el (rx-any-condense-range): Split ranges going from ASCII to raw bytes. * test/lisp/emacs-lisp/rx-tests.el (rx-char-any-raw-byte): Add test case. * etc/NEWS: Mention the overall change (Bug#33205).
-rw-r--r--etc/NEWS8
-rw-r--r--lisp/emacs-lisp/rx.el7
-rw-r--r--test/lisp/emacs-lisp/rx-tests.el6
3 files changed, 20 insertions, 1 deletions
diff --git a/etc/NEWS b/etc/NEWS
index 70a50c02c4e..0cafbaae96c 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -1101,6 +1101,14 @@ subexpression.
1101When there is no menu for a mode, display the mode name after the 1101When there is no menu for a mode, display the mode name after the
1102indicator instead of just the indicator (which is sometimes cryptic). 1102indicator instead of just the indicator (which is sometimes cryptic).
1103 1103
1104** rx
1105
1106---
1107*** rx now handles raw bytes in character alternatives correctly,
1108when given in a string. Previously, '(any "\x80-\xff")' would match
1109characters U+0080...U+00FF. Now the expression matches raw bytes in
1110the 128...255 range, as expected.
1111
1104 1112
1105* New Modes and Packages in Emacs 27.1 1113* New Modes and Packages in Emacs 27.1
1106 1114
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el
index b2299030a1b..715cd608c46 100644
--- a/lisp/emacs-lisp/rx.el
+++ b/lisp/emacs-lisp/rx.el
@@ -429,6 +429,13 @@ Only both edges of each range is checked."
429 ;; set L list of all ranges 429 ;; set L list of all ranges
430 (mapc (lambda (e) (cond ((stringp e) (push e str)) 430 (mapc (lambda (e) (cond ((stringp e) (push e str))
431 ((numberp e) (push (cons e e) l)) 431 ((numberp e) (push (cons e e) l))
432 ;; Ranges between ASCII and raw bytes are split,
433 ;; to prevent accidental inclusion of Unicode
434 ;; characters later on.
435 ((and (<= (car e) #x7f)
436 (>= (cdr e) #x3fff80))
437 (push (cons (car e) #x7f) l)
438 (push (cons #x3fff80 (cdr e)) l))
432 (t (push e l)))) 439 (t (push e l))))
433 args) 440 args)
434 ;; condense overlapped ranges in L 441 ;; condense overlapped ranges in L
diff --git a/test/lisp/emacs-lisp/rx-tests.el b/test/lisp/emacs-lisp/rx-tests.el
index f15e1016f7c..e14feda347f 100644
--- a/test/lisp/emacs-lisp/rx-tests.el
+++ b/test/lisp/emacs-lisp/rx-tests.el
@@ -53,7 +53,11 @@
53 ;; Range of raw characters, multibyte. 53 ;; Range of raw characters, multibyte.
54 (should (equal (string-match-p (rx (any "Å\211\326-\377\177")) 54 (should (equal (string-match-p (rx (any "Å\211\326-\377\177"))
55 "XY\355\177\327") 55 "XY\355\177\327")
56 2))) 56 2))
57 ;; Split range; \177-\377ÿ should not be optimised to \177-\377.
58 (should (equal (string-match-p (rx (any "\177-\377" ?ÿ))
59 "ÿA\310B")
60 0)))
57 61
58(ert-deftest rx-pcase () 62(ert-deftest rx-pcase ()
59 (should (equal (pcase "a 1 2 3 1 1 b" 63 (should (equal (pcase "a 1 2 3 1 1 b"