aboutsummaryrefslogtreecommitdiffstats
path: root/test
diff options
context:
space:
mode:
authorMattias Engdegård2019-09-25 14:29:50 -0700
committerPaul Eggert2019-09-25 14:29:50 -0700
commit2ed71227c626c6cfdc684948644ccf3d9eaeb15b (patch)
tree2a4043ce8036206c7138b9bf5b149da8c66ec811 /test
parenta773a6474897356cd78aeea092d2c1a51ede23f9 (diff)
downloademacs-2ed71227c626c6cfdc684948644ccf3d9eaeb15b.tar.gz
emacs-2ed71227c626c6cfdc684948644ccf3d9eaeb15b.zip
New rx implementation
* lisp/emacs-lisp/rx.el: * test/lisp/emacs-lisp/rx-tests.el: * doc/lispref/searching.texi (Rx Constructs): Rewrite rx for correctness, clarity, and performance. The new implementation retains full compatibility and has more comprehensive tests. * lisp/emacs-lisp/re-builder.el (reb-rx-font-lock-keywords): Adapt to changes in internal variables in rx.el.
Diffstat (limited to 'test')
-rw-r--r--test/lisp/emacs-lisp/rx-tests.el336
1 files changed, 247 insertions, 89 deletions
diff --git a/test/lisp/emacs-lisp/rx-tests.el b/test/lisp/emacs-lisp/rx-tests.el
index 8845ebf46d1..fec046dd991 100644
--- a/test/lisp/emacs-lisp/rx-tests.el
+++ b/test/lisp/emacs-lisp/rx-tests.el
@@ -1,4 +1,4 @@
1;;; rx-tests.el --- test for rx.el functions -*- lexical-binding: t -*- 1;;; rx-tests.el --- tests for rx.el -*- lexical-binding: t -*-
2 2
3;; Copyright (C) 2016-2019 Free Software Foundation, Inc. 3;; Copyright (C) 2016-2019 Free Software Foundation, Inc.
4 4
@@ -17,21 +17,44 @@
17;; You should have received a copy of the GNU General Public License 17;; You should have received a copy of the GNU General Public License
18;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. 18;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
19 19
20;;; Commentary:
21
22(require 'ert) 20(require 'ert)
23(require 'rx) 21(require 'rx)
24 22
25;;; Code: 23(ert-deftest rx-seq ()
24 (should (equal (rx "a.b" "*" "c")
25 "a\\.b\\*c"))
26 (should (equal (rx (seq "a" (: "b" (and "c" (sequence "d" nonl)
27 "e")
28 "f")
29 "g"))
30 "abcd.efg"))
31 (should (equal (rx "a$" "b")
32 "a\\$b"))
33 (should (equal (rx bol "a" "b" ?c eol)
34 "^abc$"))
35 (should (equal (rx "a" "" "b")
36 "ab"))
37 (should (equal (rx (seq))
38 ""))
39 (should (equal (rx "" (or "ab" nonl) "")
40 "ab\\|.")))
41
42(ert-deftest rx-or ()
43 (should (equal (rx (or "ab" (| "c" nonl) "de"))
44 "ab\\|c\\|.\\|de"))
45 (should (equal (rx (or "ab" "abc" "a"))
46 "\\(?:ab\\|abc\\|a\\)"))
47 (should (equal (rx (| nonl "a") (| "b" blank))
48 "\\(?:.\\|a\\)\\(?:b\\|[[:blank:]]\\)"))
49 (should (equal (rx (|))
50 "\\`a\\`")))
26 51
27(ert-deftest rx-char-any () 52(ert-deftest rx-char-any ()
28 "Test character alternatives with `]' and `-' (Bug#25123)." 53 "Test character alternatives with `]' and `-' (Bug#25123)."
29 (should (string-match 54 (should (equal
30 (rx string-start (1+ (char (?\] . ?\{) (?< . ?\]) (?- . ?:))) 55 (rx string-start (1+ (char (?\] . ?\{) (?< . ?\]) (?- . ?:)))
31 string-end) 56 string-end)
32 (apply #'string (nconc (number-sequence ?\] ?\{) 57 "\\`[.-:<-{-]+\\'")))
33 (number-sequence ?< ?\])
34 (number-sequence ?- ?:))))))
35 58
36(ert-deftest rx-char-any-range-nl () 59(ert-deftest rx-char-any-range-nl ()
37 "Test character alternatives with LF as a range endpoint." 60 "Test character alternatives with LF as a range endpoint."
@@ -40,28 +63,72 @@
40 (should (equal (rx (any "\a-\n")) 63 (should (equal (rx (any "\a-\n"))
41 "[\a-\n]"))) 64 "[\a-\n]")))
42 65
43(ert-deftest rx-char-any-range-bad ()
44 (should-error (rx (any "0-9a-Z")))
45 (should-error (rx (any (?0 . ?9) (?a . ?Z)))))
46
47(ert-deftest rx-char-any-raw-byte () 66(ert-deftest rx-char-any-raw-byte ()
48 "Test raw bytes in character alternatives." 67 "Test raw bytes in character alternatives."
68
69 ;; The multibyteness of the rx return value sometimes depends on whether
70 ;; the test had been byte-compiled or not, so we add explicit conversions.
71
49 ;; Separate raw characters. 72 ;; Separate raw characters.
50 (should (equal (string-match-p (rx (any "\326A\333B")) 73 (should (equal (string-to-multibyte (rx (any "\326A\333B")))
51 "X\326\333") 74 (string-to-multibyte "[AB\326\333]")))
52 1))
53 ;; Range of raw characters, unibyte. 75 ;; Range of raw characters, unibyte.
54 (should (equal (string-match-p (rx (any "\200-\377")) 76 (should (equal (string-to-multibyte (rx (any "\200-\377")))
55 "ÿA\310B") 77 (string-to-multibyte "[\200-\377]")))
56 2)) 78
57 ;; Range of raw characters, multibyte. 79 ;; Range of raw characters, multibyte.
58 (should (equal (string-match-p (rx (any "Å\211\326-\377\177")) 80 (should (equal (rx (any "Å\211\326-\377\177"))
59 "XY\355\177\327") 81 "[\177Å\211\326-\377]"))
60 2))
61 ;; Split range; \177-\377ÿ should not be optimised to \177-\377. 82 ;; Split range; \177-\377ÿ should not be optimised to \177-\377.
62 (should (equal (string-match-p (rx (any "\177-\377" ?ÿ)) 83 (should (equal (rx (any "\177-\377" ?ÿ))
63 "ÿA\310B") 84 "[\177ÿ\200-\377]")))
64 0))) 85
86(ert-deftest rx-any ()
87 (should (equal (rx (any ?A (?C . ?D) "F-H" "J-L" "M" "N-P" "Q" "RS"))
88 "[ACDF-HJ-S]"))
89 (should (equal (rx (in "a!f" ?c) (char "q-z" "0-3")
90 (not-char "a-e1-5") (not (in "A-M" ?q)))
91 "[!acf][0-3q-z][^1-5a-e][^A-Mq]"))
92 (should (equal (rx (any "^") (any "]") (any "-")
93 (not (any "^")) (not (any "]")) (not (any "-")))
94 "\\^]-[^^][^]][^-]"))
95 (should (equal (rx (any "]" "^") (any "]" "-") (any "-" "^")
96 (not (any "]" "^")) (not (any "]" "-"))
97 (not (any "-" "^")))
98 "[]^][]-][-^][^]^][^]-][^-^]"))
99 (should (equal (rx (any "]" "^" "-") (not (any "]" "^" "-")))
100 "[]^-][^]^-]"))
101 (should (equal (rx (any "-" ascii) (any "^" ascii) (any "]" ascii))
102 "[[:ascii:]-][[:ascii:]^][][:ascii:]]"))
103 (should (equal (rx (not (any "-" ascii)) (not (any "^" ascii))
104 (not (any "]" ascii)))
105 "[^[:ascii:]-][^[:ascii:]^][^][:ascii:]]"))
106 (should (equal (rx (any "-]" ascii) (any "^]" ascii) (any "-^" ascii))
107 "[][:ascii:]-][]^[:ascii:]][[:ascii:]^-]"))
108 (should (equal (rx (not (any "-]" ascii)) (not (any "^]" ascii))
109 (not (any "-^" ascii)))
110 "[^][:ascii:]-][^]^[:ascii:]][^[:ascii:]^-]"))
111 (should (equal (rx (any "-]^" ascii) (not (any "-]^" ascii)))
112 "[]^[:ascii:]-][^]^[:ascii:]-]"))
113 (should (equal (rx (any "^" lower upper) (not (any "^" lower upper)))
114 "[[:lower:]^[:upper:]][^[:lower:]^[:upper:]]"))
115 (should (equal (rx (any "-" lower upper) (not (any "-" lower upper)))
116 "[[:lower:][:upper:]-][^[:lower:][:upper:]-]"))
117 (should (equal (rx (any "]" lower upper) (not (any "]" lower upper)))
118 "[][:lower:][:upper:]][^][:lower:][:upper:]]"))
119 (should (equal (rx (any "-a" "c-" "f-f" "--/*--"))
120 "[*-/acf]"))
121 (should (equal (rx (any "]-a" ?-) (not (any "]-a" ?-)))
122 "[]-a-][^]-a-]"))
123 (should (equal (rx (any "--]") (not (any "--]"))
124 (any "-" "^-a") (not (any "-" "^-a")))
125 "[].-\\-][^].-\\-][-^-a][^-^-a]"))
126 (should (equal (rx (not (any "!a" "0-8" digit nonascii)))
127 "[^!0-8a[:digit:][:nonascii:]]"))
128 (should (equal (rx (any) (not (any)))
129 "\\`a\\`\\(?:.\\|\n\\)"))
130 (should (equal (rx (any "") (not (any "")))
131 "\\`a\\`\\(?:.\\|\n\\)")))
65 132
66(ert-deftest rx-pcase () 133(ert-deftest rx-pcase ()
67 (should (equal (pcase "a 1 2 3 1 1 b" 134 (should (equal (pcase "a 1 2 3 1 1 b"
@@ -71,7 +138,11 @@
71 (backref u) space 138 (backref u) space
72 (backref 1)) 139 (backref 1))
73 (list u v))) 140 (list u v)))
74 '("1" "3")))) 141 '("1" "3")))
142 (let ((k "blue"))
143 (should (equal (pcase "<blue>"
144 ((rx "<" (literal k) ">") 'ok))
145 'ok))))
75 146
76(ert-deftest rx-kleene () 147(ert-deftest rx-kleene ()
77 "Test greedy and non-greedy repetition operators." 148 "Test greedy and non-greedy repetition operators."
@@ -94,71 +165,158 @@
94 (should (equal (rx (maximal-match 165 (should (equal (rx (maximal-match
95 (seq (* "a") (+ "b") (\? "c") (?\s "d") 166 (seq (* "a") (+ "b") (\? "c") (?\s "d")
96 (*? "e") (+? "f") (\?? "g") (?? "h")))) 167 (*? "e") (+? "f") (\?? "g") (?? "h"))))
97 "a*b+c?d?e*?f+?g??h??"))) 168 "a*b+c?d?e*?f+?g??h??"))
169 (should (equal (rx "a" (*) (+ (*)) (? (*) (+)) "b")
170 "ab")))
98 171
99(ert-deftest rx-or () 172(ert-deftest rx-repeat ()
100 ;; Test or-pattern reordering (Bug#34641). 173 (should (equal (rx (= 3 "a") (>= 51 "b")
101 (let ((s "abc")) 174 (** 2 11 "c") (repeat 6 "d") (repeat 4 8 "e"))
102 (should (equal (and (string-match (rx (or "abc" "ab" "a")) s) 175 "a\\{3\\}b\\{51,\\}c\\{2,11\\}d\\{6\\}e\\{4,8\\}"))
103 (match-string 0 s)) 176 (should (equal (rx (= 0 "k") (>= 0 "l") (** 0 0 "m") (repeat 0 "n")
104 "abc")) 177 (repeat 0 0 "o"))
105 (should (equal (and (string-match (rx (or "ab" "abc" "a")) s) 178 "k\\{0\\}l\\{0,\\}m\\{0\\}n\\{0\\}o\\{0\\}"))
106 (match-string 0 s)) 179 (should (equal (rx (opt (0+ "a")))
107 "ab")) 180 "\\(?:a*\\)?"))
108 (should (equal (and (string-match (rx (or "a" "ab" "abc")) s) 181 (should (equal (rx (opt (= 4 "a")))
109 (match-string 0 s)) 182 "a\\{4\\}?"))
110 "a"))) 183 (should (equal (rx "a" (** 3 7) (= 4) (>= 3) (= 4 (>= 7) (= 2)) "b")
111 ;; Test zero-argument `or'. 184 "ab")))
112 (should (equal (rx (or)) regexp-unmatchable))) 185
186(ert-deftest rx-atoms ()
187 (should (equal (rx anything)
188 ".\\|\n"))
189 (should (equal (rx line-start not-newline nonl any line-end)
190 "^...$"))
191 (should (equal (rx bol string-start string-end buffer-start buffer-end
192 bos eos bot eot eol)
193 "^\\`\\'\\`\\'\\`\\'\\`\\'$"))
194 (should (equal (rx point word-start word-end bow eow symbol-start symbol-end
195 word-boundary not-word-boundary not-wordchar)
196 "\\=\\<\\>\\<\\>\\_<\\_>\\b\\B\\W"))
197 (should (equal (rx digit numeric num control cntrl)
198 "[[:digit:]][[:digit:]][[:digit:]][[:cntrl:]][[:cntrl:]]"))
199 (should (equal (rx hex-digit hex xdigit blank)
200 "[[:xdigit:]][[:xdigit:]][[:xdigit:]][[:blank:]]"))
201 (should (equal (rx graph graphic print printing)
202 "[[:graph:]][[:graph:]][[:print:]][[:print:]]"))
203 (should (equal (rx alphanumeric alnum letter alphabetic alpha)
204 "[[:alnum:]][[:alnum:]][[:alpha:]][[:alpha:]][[:alpha:]]"))
205 (should (equal (rx ascii nonascii lower lower-case)
206 "[[:ascii:]][[:nonascii:]][[:lower:]][[:lower:]]"))
207 (should (equal (rx punctuation punct space whitespace white)
208 "[[:punct:]][[:punct:]][[:space:]][[:space:]][[:space:]]"))
209 (should (equal (rx upper upper-case word wordchar)
210 "[[:upper:]][[:upper:]][[:word:]][[:word:]]"))
211 (should (equal (rx unibyte multibyte)
212 "[[:unibyte:]][[:multibyte:]]")))
213
214(ert-deftest rx-syntax ()
215 (should (equal (rx (syntax whitespace) (syntax punctuation)
216 (syntax word) (syntax symbol)
217 (syntax open-parenthesis) (syntax close-parenthesis))
218 "\\s-\\s.\\sw\\s_\\s(\\s)"))
219 (should (equal (rx (syntax string-quote) (syntax paired-delimiter)
220 (syntax escape) (syntax character-quote)
221 (syntax comment-start) (syntax comment-end)
222 (syntax string-delimiter) (syntax comment-delimiter))
223 "\\s\"\\s$\\s\\\\s/\\s<\\s>\\s|\\s!")))
224
225(ert-deftest rx-category ()
226 (should (equal (rx (category space-for-indent) (category base)
227 (category consonant) (category base-vowel)
228 (category upper-diacritical-mark)
229 (category lower-diacritical-mark)
230 (category tone-mark) (category symbol)
231 (category digit)
232 (category vowel-modifying-diacritical-mark)
233 (category vowel-sign) (category semivowel-lower)
234 (category not-at-end-of-line)
235 (category not-at-beginning-of-line))
236 "\\c \\c.\\c0\\c1\\c2\\c3\\c4\\c5\\c6\\c7\\c8\\c9\\c<\\c>"))
237 (should (equal (rx (category alpha-numeric-two-byte)
238 (category chinese-two-byte) (category greek-two-byte)
239 (category japanese-hiragana-two-byte)
240 (category indian-two-byte)
241 (category japanese-katakana-two-byte)
242 (category strong-left-to-right)
243 (category korean-hangul-two-byte)
244 (category strong-right-to-left)
245 (category cyrillic-two-byte)
246 (category combining-diacritic))
247 "\\cA\\cC\\cG\\cH\\cI\\cK\\cL\\cN\\cR\\cY\\c^"))
248 (should (equal (rx (category ascii) (category arabic) (category chinese)
249 (category ethiopic) (category greek) (category korean)
250 (category indian) (category japanese)
251 (category japanese-katakana) (category latin)
252 (category lao) (category tibetan))
253 "\\ca\\cb\\cc\\ce\\cg\\ch\\ci\\cj\\ck\\cl\\co\\cq"))
254 (should (equal (rx (category japanese-roman) (category thai)
255 (category vietnamese) (category hebrew)
256 (category cyrillic) (category can-break))
257 "\\cr\\ct\\cv\\cw\\cy\\c|"))
258 (should (equal (rx (category ?g) (not (category ?~)))
259 "\\cg\\C~")))
260
261(ert-deftest rx-not ()
262 (should (equal (rx (not word-boundary))
263 "\\B"))
264 (should (equal (rx (not ascii) (not lower-case) (not wordchar))
265 "[^[:ascii:]][^[:lower:]][^[:word:]]"))
266 (should (equal (rx (not (syntax punctuation)) (not (syntax escape)))
267 "\\S.\\S\\"))
268 (should (equal (rx (not (category tone-mark)) (not (category lao)))
269 "\\C4\\Co")))
270
271(ert-deftest rx-group ()
272 (should (equal (rx (group nonl) (submatch "x")
273 (group-n 3 "y") (submatch-n 13 "z") (backref 1))
274 "\\(.\\)\\(x\\)\\(?3:y\\)\\(?13:z\\)\\1"))
275 (should (equal (rx (group) (group-n 2))
276 "\\(\\)\\(?2:\\)")))
277
278(ert-deftest rx-regexp ()
279 (should (equal (rx (regexp "abc") (regex "[de]"))
280 "\\(?:abc\\)[de]"))
281 (let ((x "a*"))
282 (should (equal (rx (regexp x) "b")
283 "\\(?:a*\\)b"))
284 (should (equal (rx "" (regexp x) (eval ""))
285 "a*"))))
286
287(ert-deftest rx-eval ()
288 (should (equal (rx (eval (list 'syntax 'symbol)))
289 "\\s_"))
290 (should (equal (rx "a" (eval (concat)) "b")
291 "ab")))
292
293(ert-deftest rx-literal ()
294 (should (equal (rx (literal (char-to-string 42)) nonl)
295 "\\*."))
296 (let ((x "a+b"))
297 (should (equal (rx (opt (literal (upcase x))))
298 "\\(?:A\\+B\\)?"))))
299
300(ert-deftest rx-to-string ()
301 (should (equal (rx-to-string '(or nonl "\nx"))
302 "\\(?:.\\|\nx\\)"))
303 (should (equal (rx-to-string '(or nonl "\nx") t)
304 ".\\|\nx")))
305
306
307(ert-deftest rx-constituents ()
308 (let ((rx-constituents
309 (append '((beta . gamma)
310 (gamma . "a*b")
311 (delta . ((lambda (form)
312 (regexp-quote (format "<%S>" form)))
313 1 nil symbolp))
314 (epsilon . delta))
315 rx-constituents)))
316 (should (equal (rx-to-string '(seq (+ beta) nonl gamma) t)
317 "\\(?:a*b\\)+.\\(?:a*b\\)"))
318 (should (equal (rx-to-string '(seq (delta a b c) (* (epsilon d e))) t)
319 "\\(?:<(delta a b c)>\\)\\(?:<(epsilon d e)>\\)*"))))
113 320
114(ert-deftest rx-seq ()
115 ;; Test zero-argument `seq'.
116 (should (equal (rx (seq)) "")))
117
118(defmacro rx-tests--match (regexp string &optional match)
119 (macroexp-let2 nil strexp string
120 `(ert-info ((format "Matching %S to %S" ',regexp ,strexp))
121 (should (string-match ,regexp ,strexp))
122 ,@(when match
123 `((should (equal (match-string 0 ,strexp) ,match)))))))
124
125(ert-deftest rx-nonstring-expr ()
126 (let ((bee "b")
127 (vowel "[aeiou]"))
128 (rx-tests--match (rx "a" (literal bee) "c") "abc")
129 (rx-tests--match (rx "a" (regexp bee) "c") "abc")
130 (rx-tests--match (rx "a" (or (regexp bee) "xy") "c") "abc")
131 (rx-tests--match (rx "a" (or "xy" (regexp bee)) "c") "abc")
132 (should-not (string-match (rx (or (regexp bee) "xy")) ""))
133 (rx-tests--match (rx "a" (= 3 (regexp bee)) "c") "abbbc")
134 (rx-tests--match (rx "x" (= 3 (regexp vowel)) "z") "xeoez")
135 (should-not (string-match (rx "x" (= 3 (regexp vowel)) "z") "xe[]z"))
136 (rx-tests--match (rx "x" (= 3 (literal vowel)) "z")
137 "x[aeiou][aeiou][aeiou]z")
138 (rx-tests--match (rx "x" (repeat 1 (regexp vowel)) "z") "xaz")
139 (rx-tests--match (rx "x" (repeat 1 2 (regexp vowel)) "z") "xaz")
140 (rx-tests--match (rx "x" (repeat 1 2 (regexp vowel)) "z") "xauz")
141 (rx-tests--match (rx "x" (>= 1 (regexp vowel)) "z") "xaiiz")
142 (rx-tests--match (rx "x" (** 1 2 (regexp vowel)) "z") "xaiz")
143 (rx-tests--match (rx "x" (group (regexp vowel)) "z") "xaz")
144 (rx-tests--match (rx "x" (group-n 1 (regexp vowel)) "z") "xaz")
145 (rx-tests--match (rx "x" (? (regexp vowel)) "z") "xz")))
146
147(ert-deftest rx-nonstring-expr-non-greedy ()
148 "`rx's greediness can't affect runtime regexp parts."
149 (let ((ad-min "[ad]*?")
150 (ad-max "[ad]*")
151 (ad "[ad]"))
152 (rx-tests--match (rx "c" (regexp ad-min) "a") "cdaaada" "cda")
153 (rx-tests--match (rx "c" (regexp ad-max) "a") "cdaaada" "cdaaada")
154 (rx-tests--match (rx "c" (minimal-match (regexp ad-max)) "a") "cdaaada" "cdaaada")
155 (rx-tests--match (rx "c" (maximal-match (regexp ad-min)) "a") "cdaaada" "cda")
156 (rx-tests--match (rx "c" (minimal-match (0+ (regexp ad))) "a") "cdaaada" "cda")
157 (rx-tests--match (rx "c" (maximal-match (0+ (regexp ad))) "a") "cdaaada" "cdaaada")))
158
159(ert-deftest rx-to-string-lisp-forms ()
160 (rx-tests--match (rx-to-string '(seq "a" (literal "b") "c")) "abc")
161 (rx-tests--match (rx-to-string '(seq "a" (regexp "b") "c")) "abc"))
162 321
163(provide 'rx-tests) 322(provide 'rx-tests)
164;; rx-tests.el ends here.