diff options
| author | Mattias Engdegård | 2018-12-29 11:09:27 +0100 |
|---|---|---|
| committer | Eli Zaretskii | 2018-12-29 16:53:27 +0200 |
| commit | b71d4ce056ca291594682b2a8536a4a768a97330 (patch) | |
| tree | a3a2094b1611d1039cc3d728816e9652c83f2a22 | |
| parent | fb10834a602416f8422131d5ce9dabcc28e57be4 (diff) | |
| download | emacs-b71d4ce056ca291594682b2a8536a4a768a97330.tar.gz emacs-b71d4ce056ca291594682b2a8536a4a768a97330.zip | |
Handle raw bytes, and LF in ranges, in rx `any' argument strings
* lisp/emacs-lisp/rx.el (rx-check-any-string): Rewrite to handle raw bytes
in unibyte strings and accept LF as range endpoints (Bug#33205).
* test/lisp/emacs-lisp/rx-tests.el: Add tests for the above.
| -rw-r--r-- | lisp/emacs-lisp/rx.el | 51 | ||||
| -rw-r--r-- | test/lisp/emacs-lisp/rx-tests.el | 22 |
2 files changed, 51 insertions, 22 deletions
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el index 1230df4f15d..1cae22f870a 100644 --- a/lisp/emacs-lisp/rx.el +++ b/lisp/emacs-lisp/rx.el | |||
| @@ -449,28 +449,35 @@ Only both edges of each range is checked." | |||
| 449 | 449 | ||
| 450 | 450 | ||
| 451 | (defun rx-check-any-string (str) | 451 | (defun rx-check-any-string (str) |
| 452 | "Check string argument STR for Rx `any'." | 452 | "Turn the `any' argument string STR into a list of characters. |
| 453 | (let ((i 0) | 453 | The original order is not preserved. Ranges, \"A-Z\", become pairs, (?A . ?Z)." |
| 454 | c1 c2 l) | 454 | (let ((decode-char |
| 455 | (if (= 0 (length str)) | 455 | ;; Make sure raw bytes are decoded as such, to avoid confusion with |
| 456 | (error "String arg for Rx `any' must not be empty")) | 456 | ;; U+0080..U+00FF. |
| 457 | (while (string-match ".-." str i) | 457 | (if (multibyte-string-p str) |
| 458 | ;; string before range: convert it to characters | 458 | #'identity |
| 459 | (if (< i (match-beginning 0)) | 459 | (lambda (c) (if (<= #x80 c #xff) |
| 460 | (setq l (nconc | 460 | (+ c #x3fff00) |
| 461 | l | 461 | c)))) |
| 462 | (append (substring str i (match-beginning 0)) nil)))) | 462 | (len (length str)) |
| 463 | ;; range | 463 | (i 0) |
| 464 | (setq i (match-end 0) | 464 | (ret nil)) |
| 465 | c1 (aref str (match-beginning 0)) | 465 | (if (= 0 len) |
| 466 | c2 (aref str (1- i))) | 466 | (error "String arg for Rx `any' must not be empty")) |
| 467 | (cond | 467 | (while (< i len) |
| 468 | ((< c1 c2) (setq l (nconc l (list (cons c1 c2))))) | 468 | (cond ((and (< i (- len 2)) |
| 469 | ((= c1 c2) (setq l (nconc l (list c1)))))) | 469 | (= (aref str (+ i 1)) ?-)) |
| 470 | ;; rest? | 470 | ;; Range. |
| 471 | (if (< i (length str)) | 471 | (let ((start (funcall decode-char (aref str i))) |
| 472 | (setq l (nconc l (append (substring str i) nil)))) | 472 | (end (funcall decode-char (aref str (+ i 2))))) |
| 473 | l)) | 473 | (cond ((< start end) (push (cons start end) ret)) |
| 474 | ((= start end) (push start ret))) | ||
| 475 | (setq i (+ i 3)))) | ||
| 476 | (t | ||
| 477 | ;; Single character. | ||
| 478 | (push (funcall decode-char (aref str i)) ret) | ||
| 479 | (setq i (+ i 1))))) | ||
| 480 | ret)) | ||
| 474 | 481 | ||
| 475 | 482 | ||
| 476 | (defun rx-check-any (arg) | 483 | (defun rx-check-any (arg) |
diff --git a/test/lisp/emacs-lisp/rx-tests.el b/test/lisp/emacs-lisp/rx-tests.el index d15e3d77199..8b3ce6cb01f 100644 --- a/test/lisp/emacs-lisp/rx-tests.el +++ b/test/lisp/emacs-lisp/rx-tests.el | |||
| @@ -33,6 +33,28 @@ | |||
| 33 | (number-sequence ?< ?\]) | 33 | (number-sequence ?< ?\]) |
| 34 | (number-sequence ?- ?:)))))) | 34 | (number-sequence ?- ?:)))))) |
| 35 | 35 | ||
| 36 | (ert-deftest rx-char-any-range-nl () | ||
| 37 | "Test character alternatives with LF as a range endpoint." | ||
| 38 | (should (equal (rx (any "\n-\r")) | ||
| 39 | "[\n-\r]")) | ||
| 40 | (should (equal (rx (any "\a-\n")) | ||
| 41 | "[\a-\n]"))) | ||
| 42 | |||
| 43 | (ert-deftest rx-char-any-raw-byte () | ||
| 44 | "Test raw bytes in character alternatives." | ||
| 45 | ;; Separate raw characters. | ||
| 46 | (should (equal (string-match-p (rx (any "\326A\333B")) | ||
| 47 | "X\326\333") | ||
| 48 | 1)) | ||
| 49 | ;; Range of raw characters, unibyte. | ||
| 50 | (should (equal (string-match-p (rx (any "\200-\377")) | ||
| 51 | "ÿA\310B") | ||
| 52 | 2)) | ||
| 53 | ;; Range of raw characters, multibyte. | ||
| 54 | (should (equal (string-match-p (rx (any "Å\211\326-\377\177")) | ||
| 55 | "XY\355\177\327") | ||
| 56 | 2))) | ||
| 57 | |||
| 36 | (ert-deftest rx-pcase () | 58 | (ert-deftest rx-pcase () |
| 37 | (should (equal (pcase "a 1 2 3 1 1 b" | 59 | (should (equal (pcase "a 1 2 3 1 1 b" |
| 38 | ((rx (let u (+ digit)) space | 60 | ((rx (let u (+ digit)) space |