aboutsummaryrefslogtreecommitdiffstats
path: root/test
diff options
context:
space:
mode:
authorEric Abrahamsen2022-12-05 21:59:03 -0800
committerEric Abrahamsen2024-03-30 15:19:47 -0700
commit8bee4060ea42c61e52ebe6487ff97bc095261050 (patch)
tree2d9d9e7ef775fdb885cb53ead590103f599a893e /test
parent0df8dadde2edaee406c76d639a22c70d0b03426b (diff)
downloademacs-8bee4060ea42c61e52ebe6487ff97bc095261050.tar.gz
emacs-8bee4060ea42c61e52ebe6487ff97bc095261050.zip
Add peg.el as a built-in library
* lisp/progmodes/peg.el: New file, taken from ELPA package. * test/lisp/peg-tests.el: Package tests. * doc/lispref/peg.texi: Documentation.
Diffstat (limited to 'test')
-rw-r--r--test/lisp/peg-tests.el367
1 files changed, 367 insertions, 0 deletions
diff --git a/test/lisp/peg-tests.el b/test/lisp/peg-tests.el
new file mode 100644
index 00000000000..864e09b4200
--- /dev/null
+++ b/test/lisp/peg-tests.el
@@ -0,0 +1,367 @@
1;;; peg-tests.el --- Tests of PEG parsers -*- lexical-binding: t; -*-
2
3;; Copyright (C) 2008-2023 Free Software Foundation, Inc.
4
5;; This program is free software; you can redistribute it and/or modify
6;; it under the terms of the GNU General Public License as published by
7;; the Free Software Foundation, either version 3 of the License, or
8;; (at your option) any later version.
9
10;; This program is distributed in the hope that it will be useful,
11;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13;; GNU General Public License for more details.
14
15;; You should have received a copy of the GNU General Public License
16;; along with this program. If not, see <https://www.gnu.org/licenses/>.
17
18;;; Commentary:
19
20;; Tests and examples, that used to live in peg.el wrapped inside an `eval'.
21
22;;; Code:
23
24(require 'peg)
25(require 'ert)
26
27;;; Tests:
28
29(defmacro peg-parse-string (pex string &optional noerror)
30 "Parse STRING according to PEX.
31If NOERROR is non-nil, push nil resp. t if the parse failed
32resp. succeeded instead of signaling an error."
33 (let ((oldstyle (consp (car-safe pex)))) ;PEX is really a list of rules.
34 `(with-temp-buffer
35 (insert ,string)
36 (goto-char (point-min))
37 ,(if oldstyle
38 `(with-peg-rules ,pex
39 (peg-run (peg ,(caar pex))
40 ,(unless noerror '#'peg-signal-failure)))
41 `(peg-run (peg ,pex)
42 ,(unless noerror '#'peg-signal-failure))))))
43
44(define-peg-rule peg-test-natural ()
45 [0-9] (* [0-9]))
46
47(ert-deftest peg-test ()
48 (should (peg-parse-string peg-test-natural "99 bottles" t))
49 (should (peg-parse-string ((s "a")) "a" t))
50 (should (not (peg-parse-string ((s "a")) "b" t)))
51 (should (peg-parse-string ((s (not "a"))) "b" t))
52 (should (not (peg-parse-string ((s (not "a"))) "a" t)))
53 (should (peg-parse-string ((s (if "a"))) "a" t))
54 (should (not (peg-parse-string ((s (if "a"))) "b" t)))
55 (should (peg-parse-string ((s "ab")) "ab" t))
56 (should (not (peg-parse-string ((s "ab")) "ba" t)))
57 (should (not (peg-parse-string ((s "ab")) "a" t)))
58 (should (peg-parse-string ((s (range ?0 ?9))) "0" t))
59 (should (not (peg-parse-string ((s (range ?0 ?9))) "a" t)))
60 (should (peg-parse-string ((s [0-9])) "0" t))
61 (should (not (peg-parse-string ((s [0-9])) "a" t)))
62 (should (not (peg-parse-string ((s [0-9])) "" t)))
63 (should (peg-parse-string ((s (any))) "0" t))
64 (should (not (peg-parse-string ((s (any))) "" t)))
65 (should (peg-parse-string ((s (eob))) "" t))
66 (should (peg-parse-string ((s (not (eob)))) "a" t))
67 (should (peg-parse-string ((s (or "a" "b"))) "a" t))
68 (should (peg-parse-string ((s (or "a" "b"))) "b" t))
69 (should (not (peg-parse-string ((s (or "a" "b"))) "c" t)))
70 (should (peg-parse-string (and "a" "b") "ab" t))
71 (should (peg-parse-string ((s (and "a" "b"))) "abc" t))
72 (should (not (peg-parse-string (and "a" "b") "ba" t)))
73 (should (peg-parse-string ((s (and "a" "b" "c"))) "abc" t))
74 (should (peg-parse-string ((s (* "a") "b" (eob))) "b" t))
75 (should (peg-parse-string ((s (* "a") "b" (eob))) "ab" t))
76 (should (peg-parse-string ((s (* "a") "b" (eob))) "aaab" t))
77 (should (not (peg-parse-string ((s (* "a") "b" (eob))) "abc" t)))
78 (should (peg-parse-string ((s "")) "abc" t))
79 (should (peg-parse-string ((s "" (eob))) "" t))
80 (should (peg-parse-string ((s (opt "a") "b")) "abc" t))
81 (should (peg-parse-string ((s (opt "a") "b")) "bc" t))
82 (should (not (peg-parse-string ((s (or))) "ab" t)))
83 (should (peg-parse-string ((s (and))) "ab" t))
84 (should (peg-parse-string ((s (and))) "" t))
85 (should (peg-parse-string ((s ["^"])) "^" t))
86 (should (peg-parse-string ((s ["^a"])) "a" t))
87 (should (peg-parse-string ["-"] "-" t))
88 (should (peg-parse-string ((s ["]-"])) "]" t))
89 (should (peg-parse-string ((s ["^]"])) "^" t))
90 (should (peg-parse-string ((s [alpha])) "z" t))
91 (should (not (peg-parse-string ((s [alpha])) "0" t)))
92 (should (not (peg-parse-string ((s [alpha])) "" t)))
93 (should (not (peg-parse-string ((s ["][:alpha:]"])) "z" t)))
94 (should (peg-parse-string ((s (bob))) "" t))
95 (should (peg-parse-string ((s (bos))) "x" t))
96 (should (not (peg-parse-string ((s (bos))) " x" t)))
97 (should (peg-parse-string ((s "x" (eos))) "x" t))
98 (should (peg-parse-string ((s (syntax-class whitespace))) " " t))
99 (should (peg-parse-string ((s (= "foo"))) "foo" t))
100 (should (let ((f "foo")) (peg-parse-string ((s (= f))) "foo" t)))
101 (should (not (peg-parse-string ((s (= "foo"))) "xfoo" t)))
102 (should (equal (peg-parse-string ((s `(-- 1 2))) "") '(2 1)))
103 (should (equal (peg-parse-string ((s `(-- 1 2) `(a b -- a b))) "") '(2 1)))
104 (should (equal (peg-parse-string ((s (or (and (any) s)
105 (substring [0-9]))))
106 "ab0cd1ef2gh")
107 '("2")))
108 ;; The PEG rule `other' doesn't exist, which will cause a byte-compiler
109 ;; warning, but not an error at run time because the rule is not actually
110 ;; used in this particular case.
111 (should (equal (peg-parse-string ((s (substring (or "a" other)))
112 ;; Unused left-recursive rule, should
113 ;; cause a byte-compiler warning.
114 (r (* "a") r))
115 "af")
116 '("a")))
117 (should (equal (peg-parse-string ((s (list x y))
118 (x `(-- 1))
119 (y `(-- 2)))
120 "")
121 '((1 2))))
122 (should (equal (peg-parse-string ((s (list (* x)))
123 (x "" `(-- 'x)))
124 "xxx")
125 ;; The empty loop body should be matched once!
126 '((x))))
127 (should (equal (peg-parse-string ((s (list (* x)))
128 (x "x" `(-- 'x)))
129 "xxx")
130 '((x x x))))
131 (should (equal (peg-parse-string ((s (region (* x)))
132 (x "x" `(-- 'x)))
133 "xxx")
134 ;; FIXME: Since string positions start at 0, this should
135 ;; really be '(3 x x x 0) !!
136 '(4 x x x 1)))
137 (should (equal (peg-parse-string ((s (region (list (* x))))
138 (x "x" `(-- 'x 'y)))
139 "xxx")
140 '(4 (x y x y x y) 1)))
141 (should (equal (with-temp-buffer
142 (save-excursion (insert "abcdef"))
143 (list
144 (peg-run (peg "a"
145 (replace "bc" "x")
146 (replace "de" "y")
147 "f"))
148 (buffer-string)))
149 '(t "axyf")))
150 (with-temp-buffer
151 (insert "toro")
152 (goto-char (point-min))
153 (should (peg-run (peg "to")))
154 (should-not (peg-run (peg "to")))
155 (should (peg-run (peg "ro")))
156 (should (eobp)))
157 (with-temp-buffer
158 (insert " ")
159 (goto-char (point-min))
160 (peg-run (peg (+ (syntax-class whitespace))))
161 (should (eobp)))
162 )
163
164;;; Examples:
165
166;; peg-ex-recognize-int recognizes integers. An integer begins with a
167;; optional sign, then follows one or more digits. Digits are all
168;; characters from 0 to 9.
169;;
170;; Notes:
171;; 1) "" matches the empty sequence, i.e. matches without consuming
172;; input.
173;; 2) [0-9] is the character range from 0 to 9. This can also be
174;; written as (range ?0 ?9). Note that 0-9 is a symbol.
175(defun peg-ex-recognize-int ()
176 (with-peg-rules ((number sign digit (* digit))
177 (sign (or "+" "-" ""))
178 (digit [0-9]))
179 (peg-run (peg number))))
180
181;; peg-ex-parse-int recognizes integers and computes the corresponding
182;; value. The grammar is the same as for `peg-ex-recognize-int'
183;; augmented with parsing actions. Unfortunaletly, the actions add
184;; quite a bit of clutter.
185;;
186;; The actions for the sign rule push -1 on the stack for a minus sign
187;; and 1 for plus or no sign.
188;;
189;; The action for the digit rule pushes the value for a single digit.
190;;
191;; The action `(a b -- (+ (* a 10) b)), takes two items from the stack
192;; and pushes the first digit times 10 added to the second digit.
193;;
194;; The action `(sign val -- (* sign val)), multiplies val with the
195;; sign (1 or -1).
196(defun peg-ex-parse-int ()
197 (with-peg-rules ((number sign digit (* digit
198 `(a b -- (+ (* a 10) b)))
199 `(sign val -- (* sign val)))
200 (sign (or (and "+" `(-- 1))
201 (and "-" `(-- -1))
202 (and "" `(-- 1))))
203 (digit [0-9] `(-- (- (char-before) ?0))))
204 (peg-run (peg number))))
205
206;; Put point after the ) and press C-x C-e
207;; (peg-ex-parse-int)-234234
208
209;; Parse arithmetic expressions and compute the result as side effect.
210(defun peg-ex-arith ()
211 (peg-parse
212 (expr _ sum eol)
213 (sum product (* (or (and "+" _ product `(a b -- (+ a b)))
214 (and "-" _ product `(a b -- (- a b))))))
215 (product value (* (or (and "*" _ value `(a b -- (* a b)))
216 (and "/" _ value `(a b -- (/ a b))))))
217 (value (or (and (substring number) `(string -- (string-to-number string)))
218 (and "(" _ sum ")" _)))
219 (number (+ [0-9]) _)
220 (_ (* [" \t"]))
221 (eol (or "\n" "\r\n" "\r"))))
222
223;; (peg-ex-arith) 1 + 2 * 3 * (4 + 5)
224;; (peg-ex-arith) 1 + 2 ^ 3 * (4 + 5) ; fails to parse
225
226;; Parse URI according to RFC 2396.
227(defun peg-ex-uri ()
228 (peg-parse
229 (URI-reference (or absoluteURI relativeURI)
230 (or (and "#" (substring fragment))
231 `(-- nil))
232 `(scheme user host port path query fragment --
233 (list :scheme scheme :user user
234 :host host :port port
235 :path path :query query
236 :fragment fragment)))
237 (absoluteURI (substring scheme) ":" (or hier-part opaque-part))
238 (hier-part ;(-- user host port path query)
239 (or net-path
240 (and `(-- nil nil nil)
241 abs-path))
242 (or (and "?" (substring query))
243 `(-- nil)))
244 (net-path "//" authority (or abs-path `(-- nil)))
245 (abs-path "/" path-segments)
246 (path-segments segment (list (* "/" segment)) `(s l -- (cons s l)))
247 (segment (substring (* pchar) (* ";" param)))
248 (param (* pchar))
249 (pchar (or unreserved escaped [":@&=+$,"]))
250 (query (* uric))
251 (fragment (* uric))
252 (relativeURI (or net-path abs-path rel-path) (opt "?" query))
253 (rel-path rel-segment (opt abs-path))
254 (rel-segment (+ unreserved escaped [";@&=+$,"]))
255 (authority (or server reg-name))
256 (server (or (and (or (and (substring userinfo) "@")
257 `(-- nil))
258 hostport)
259 `(-- nil nil nil)))
260 (userinfo (* (or unreserved escaped [";:&=+$,"])))
261 (hostport (substring host) (or (and ":" (substring port))
262 `(-- nil)))
263 (host (or hostname ipv4address))
264 (hostname (* domainlabel ".") toplabel (opt "."))
265 (domainlabel alphanum
266 (opt (* (or alphanum "-") (if alphanum))
267 alphanum))
268 (toplabel alpha
269 (* (or alphanum "-") (if alphanum))
270 alphanum)
271 (ipv4address (+ digit) "." (+ digit) "." (+ digit) "." (+ digit))
272 (port (* digit))
273 (scheme alpha (* (or alpha digit ["+-."])))
274 (reg-name (or unreserved escaped ["$,;:@&=+"]))
275 (opaque-part uric-no-slash (* uric))
276 (uric (or reserved unreserved escaped))
277 (uric-no-slash (or unreserved escaped [";?:@&=+$,"]))
278 (reserved (set ";/?:@&=+$,"))
279 (unreserved (or alphanum mark))
280 (escaped "%" hex hex)
281 (hex (or digit [A-F] [a-f]))
282 (mark (set "-_.!~*'()"))
283 (alphanum (or alpha digit))
284 (alpha (or lowalpha upalpha))
285 (lowalpha [a-z])
286 (upalpha [A-Z])
287 (digit [0-9])))
288
289;; (peg-ex-uri)http://luser@www.foo.com:8080/bar/baz.html?x=1#foo
290;; (peg-ex-uri)file:/bar/baz.html?foo=df#x
291
292;; Split STRING where SEPARATOR occurs.
293(defun peg-ex-split (string separator)
294 (peg-parse-string ((s (list (* (* sep) elt)))
295 (elt (substring (+ (not sep) (any))))
296 (sep (= separator)))
297 string))
298
299;; (peg-ex-split "-abc-cd-" "-")
300
301;; Parse a lisp style Sexp.
302;; [To keep the example short, ' and . are handled as ordinary symbol.]
303(defun peg-ex-lisp ()
304 (peg-parse
305 (sexp _ (or string list number symbol))
306 (_ (* (or [" \n\t"] comment)))
307 (comment ";" (* (not (or "\n" (eob))) (any)))
308 (string "\"" (substring (* (not "\"") (any))) "\"")
309 (number (substring (opt (set "+-")) (+ digit))
310 (if terminating)
311 `(string -- (string-to-number string)))
312 (symbol (substring (and symchar (* (not terminating) symchar)))
313 `(s -- (intern s)))
314 (symchar [a-z A-Z 0-9 "-;!#%&'*+,./:;<=>?@[]^_`{|}~"])
315 (list "(" `(-- (cons nil nil)) `(hd -- hd hd)
316 (* sexp `(tl e -- (setcdr tl (list e))))
317 _ ")" `(hd _tl -- (cdr hd)))
318 (digit [0-9])
319 (terminating (or (set " \n\t();\"'") (eob)))))
320
321;; (peg-ex-lisp)
322
323;; We try to detect left recursion and report it as error.
324(defun peg-ex-left-recursion ()
325 (eval '(peg-parse (exp (or term
326 (and exp "+" exp)))
327 (term (or digit
328 (and term "*" term)))
329 (digit [0-9]))
330 t))
331
332(defun peg-ex-infinite-loop ()
333 (eval '(peg-parse (exp (* (or "x"
334 "y"
335 (action (foo))))))
336 t))
337
338;; Some efficiency problems:
339
340;; Find the last digit in a string.
341;; Recursive definition with excessive stack usage.
342(defun peg-ex-last-digit (string)
343 (peg-parse-string ((s (or (and (any) s)
344 (substring [0-9]))))
345 string))
346
347;; (peg-ex-last-digit "ab0cd1ef2gh")
348;; (peg-ex-last-digit (make-string 50 ?-))
349;; (peg-ex-last-digit (make-string 1000 ?-))
350
351;; Find the last digit without recursion. Doesn't run out of stack,
352;; but probably still too inefficient for large inputs.
353(defun peg-ex-last-digit2 (string)
354 (peg-parse-string ((s `(-- nil)
355 (+ (* (not digit) (any))
356 (substring digit)
357 `(_d1 d2 -- d2)))
358 (digit [0-9]))
359 string))
360
361;; (peg-ex-last-digit2 "ab0cd1ef2gh")
362;; (peg-ex-last-digit2 (concat (make-string 500000 ?-) "8a9b"))
363;; (peg-ex-last-digit2 (make-string 500000 ?-))
364;; (peg-ex-last-digit2 (make-string 500000 ?5))
365
366(provide 'peg-tests)
367;;; peg-tests.el ends here