aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMattias Engdegård2018-12-29 11:09:27 +0100
committerEli Zaretskii2018-12-29 16:53:27 +0200
commitb71d4ce056ca291594682b2a8536a4a768a97330 (patch)
treea3a2094b1611d1039cc3d728816e9652c83f2a22
parentfb10834a602416f8422131d5ce9dabcc28e57be4 (diff)
downloademacs-b71d4ce056ca291594682b2a8536a4a768a97330.tar.gz
emacs-b71d4ce056ca291594682b2a8536a4a768a97330.zip
Handle raw bytes, and LF in ranges, in rx `any' argument strings
* lisp/emacs-lisp/rx.el (rx-check-any-string): Rewrite to handle raw bytes in unibyte strings and accept LF as range endpoints (Bug#33205). * test/lisp/emacs-lisp/rx-tests.el: Add tests for the above.
-rw-r--r--lisp/emacs-lisp/rx.el51
-rw-r--r--test/lisp/emacs-lisp/rx-tests.el22
2 files changed, 51 insertions, 22 deletions
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el
index 1230df4f15d..1cae22f870a 100644
--- a/lisp/emacs-lisp/rx.el
+++ b/lisp/emacs-lisp/rx.el
@@ -449,28 +449,35 @@ Only both edges of each range is checked."
449 449
450 450
451(defun rx-check-any-string (str) 451(defun rx-check-any-string (str)
452 "Check string argument STR for Rx `any'." 452 "Turn the `any' argument string STR into a list of characters.
453 (let ((i 0) 453The original order is not preserved. Ranges, \"A-Z\", become pairs, (?A . ?Z)."
454 c1 c2 l) 454 (let ((decode-char
455 (if (= 0 (length str)) 455 ;; Make sure raw bytes are decoded as such, to avoid confusion with
456 (error "String arg for Rx `any' must not be empty")) 456 ;; U+0080..U+00FF.
457 (while (string-match ".-." str i) 457 (if (multibyte-string-p str)
458 ;; string before range: convert it to characters 458 #'identity
459 (if (< i (match-beginning 0)) 459 (lambda (c) (if (<= #x80 c #xff)
460 (setq l (nconc 460 (+ c #x3fff00)
461 l 461 c))))
462 (append (substring str i (match-beginning 0)) nil)))) 462 (len (length str))
463 ;; range 463 (i 0)
464 (setq i (match-end 0) 464 (ret nil))
465 c1 (aref str (match-beginning 0)) 465 (if (= 0 len)
466 c2 (aref str (1- i))) 466 (error "String arg for Rx `any' must not be empty"))
467 (cond 467 (while (< i len)
468 ((< c1 c2) (setq l (nconc l (list (cons c1 c2))))) 468 (cond ((and (< i (- len 2))
469 ((= c1 c2) (setq l (nconc l (list c1)))))) 469 (= (aref str (+ i 1)) ?-))
470 ;; rest? 470 ;; Range.
471 (if (< i (length str)) 471 (let ((start (funcall decode-char (aref str i)))
472 (setq l (nconc l (append (substring str i) nil)))) 472 (end (funcall decode-char (aref str (+ i 2)))))
473 l)) 473 (cond ((< start end) (push (cons start end) ret))
474 ((= start end) (push start ret)))
475 (setq i (+ i 3))))
476 (t
477 ;; Single character.
478 (push (funcall decode-char (aref str i)) ret)
479 (setq i (+ i 1)))))
480 ret))
474 481
475 482
476(defun rx-check-any (arg) 483(defun rx-check-any (arg)
diff --git a/test/lisp/emacs-lisp/rx-tests.el b/test/lisp/emacs-lisp/rx-tests.el
index d15e3d77199..8b3ce6cb01f 100644
--- a/test/lisp/emacs-lisp/rx-tests.el
+++ b/test/lisp/emacs-lisp/rx-tests.el
@@ -33,6 +33,28 @@
33 (number-sequence ?< ?\]) 33 (number-sequence ?< ?\])
34 (number-sequence ?- ?:)))))) 34 (number-sequence ?- ?:))))))
35 35
36(ert-deftest rx-char-any-range-nl ()
37 "Test character alternatives with LF as a range endpoint."
38 (should (equal (rx (any "\n-\r"))
39 "[\n-\r]"))
40 (should (equal (rx (any "\a-\n"))
41 "[\a-\n]")))
42
43(ert-deftest rx-char-any-raw-byte ()
44 "Test raw bytes in character alternatives."
45 ;; Separate raw characters.
46 (should (equal (string-match-p (rx (any "\326A\333B"))
47 "X\326\333")
48 1))
49 ;; Range of raw characters, unibyte.
50 (should (equal (string-match-p (rx (any "\200-\377"))
51 "ÿA\310B")
52 2))
53 ;; Range of raw characters, multibyte.
54 (should (equal (string-match-p (rx (any "Å\211\326-\377\177"))
55 "XY\355\177\327")
56 2)))
57
36(ert-deftest rx-pcase () 58(ert-deftest rx-pcase ()
37 (should (equal (pcase "a 1 2 3 1 1 b" 59 (should (equal (pcase "a 1 2 3 1 1 b"
38 ((rx (let u (+ digit)) space 60 ((rx (let u (+ digit)) space