diff options
| author | Eric Abrahamsen | 2022-12-05 21:59:03 -0800 |
|---|---|---|
| committer | Eric Abrahamsen | 2024-03-30 15:19:47 -0700 |
| commit | 8bee4060ea42c61e52ebe6487ff97bc095261050 (patch) | |
| tree | 2d9d9e7ef775fdb885cb53ead590103f599a893e /test | |
| parent | 0df8dadde2edaee406c76d639a22c70d0b03426b (diff) | |
| download | emacs-8bee4060ea42c61e52ebe6487ff97bc095261050.tar.gz emacs-8bee4060ea42c61e52ebe6487ff97bc095261050.zip | |
Add peg.el as a built-in library
* lisp/progmodes/peg.el: New file, taken from ELPA package.
* test/lisp/peg-tests.el: Package tests.
* doc/lispref/peg.texi: Documentation.
Diffstat (limited to 'test')
| -rw-r--r-- | test/lisp/peg-tests.el | 367 |
1 files changed, 367 insertions, 0 deletions
diff --git a/test/lisp/peg-tests.el b/test/lisp/peg-tests.el new file mode 100644 index 00000000000..864e09b4200 --- /dev/null +++ b/test/lisp/peg-tests.el | |||
| @@ -0,0 +1,367 @@ | |||
| 1 | ;;; peg-tests.el --- Tests of PEG parsers -*- lexical-binding: t; -*- | ||
| 2 | |||
| 3 | ;; Copyright (C) 2008-2023 Free Software Foundation, Inc. | ||
| 4 | |||
| 5 | ;; This program is free software; you can redistribute it and/or modify | ||
| 6 | ;; it under the terms of the GNU General Public License as published by | ||
| 7 | ;; the Free Software Foundation, either version 3 of the License, or | ||
| 8 | ;; (at your option) any later version. | ||
| 9 | |||
| 10 | ;; This program is distributed in the hope that it will be useful, | ||
| 11 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 12 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 13 | ;; GNU General Public License for more details. | ||
| 14 | |||
| 15 | ;; You should have received a copy of the GNU General Public License | ||
| 16 | ;; along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
| 17 | |||
| 18 | ;;; Commentary: | ||
| 19 | |||
| 20 | ;; Tests and examples, that used to live in peg.el wrapped inside an `eval'. | ||
| 21 | |||
| 22 | ;;; Code: | ||
| 23 | |||
| 24 | (require 'peg) | ||
| 25 | (require 'ert) | ||
| 26 | |||
| 27 | ;;; Tests: | ||
| 28 | |||
| 29 | (defmacro peg-parse-string (pex string &optional noerror) | ||
| 30 | "Parse STRING according to PEX. | ||
| 31 | If NOERROR is non-nil, push nil resp. t if the parse failed | ||
| 32 | resp. succeeded instead of signaling an error." | ||
| 33 | (let ((oldstyle (consp (car-safe pex)))) ;PEX is really a list of rules. | ||
| 34 | `(with-temp-buffer | ||
| 35 | (insert ,string) | ||
| 36 | (goto-char (point-min)) | ||
| 37 | ,(if oldstyle | ||
| 38 | `(with-peg-rules ,pex | ||
| 39 | (peg-run (peg ,(caar pex)) | ||
| 40 | ,(unless noerror '#'peg-signal-failure))) | ||
| 41 | `(peg-run (peg ,pex) | ||
| 42 | ,(unless noerror '#'peg-signal-failure)))))) | ||
| 43 | |||
| 44 | (define-peg-rule peg-test-natural () | ||
| 45 | [0-9] (* [0-9])) | ||
| 46 | |||
| 47 | (ert-deftest peg-test () | ||
| 48 | (should (peg-parse-string peg-test-natural "99 bottles" t)) | ||
| 49 | (should (peg-parse-string ((s "a")) "a" t)) | ||
| 50 | (should (not (peg-parse-string ((s "a")) "b" t))) | ||
| 51 | (should (peg-parse-string ((s (not "a"))) "b" t)) | ||
| 52 | (should (not (peg-parse-string ((s (not "a"))) "a" t))) | ||
| 53 | (should (peg-parse-string ((s (if "a"))) "a" t)) | ||
| 54 | (should (not (peg-parse-string ((s (if "a"))) "b" t))) | ||
| 55 | (should (peg-parse-string ((s "ab")) "ab" t)) | ||
| 56 | (should (not (peg-parse-string ((s "ab")) "ba" t))) | ||
| 57 | (should (not (peg-parse-string ((s "ab")) "a" t))) | ||
| 58 | (should (peg-parse-string ((s (range ?0 ?9))) "0" t)) | ||
| 59 | (should (not (peg-parse-string ((s (range ?0 ?9))) "a" t))) | ||
| 60 | (should (peg-parse-string ((s [0-9])) "0" t)) | ||
| 61 | (should (not (peg-parse-string ((s [0-9])) "a" t))) | ||
| 62 | (should (not (peg-parse-string ((s [0-9])) "" t))) | ||
| 63 | (should (peg-parse-string ((s (any))) "0" t)) | ||
| 64 | (should (not (peg-parse-string ((s (any))) "" t))) | ||
| 65 | (should (peg-parse-string ((s (eob))) "" t)) | ||
| 66 | (should (peg-parse-string ((s (not (eob)))) "a" t)) | ||
| 67 | (should (peg-parse-string ((s (or "a" "b"))) "a" t)) | ||
| 68 | (should (peg-parse-string ((s (or "a" "b"))) "b" t)) | ||
| 69 | (should (not (peg-parse-string ((s (or "a" "b"))) "c" t))) | ||
| 70 | (should (peg-parse-string (and "a" "b") "ab" t)) | ||
| 71 | (should (peg-parse-string ((s (and "a" "b"))) "abc" t)) | ||
| 72 | (should (not (peg-parse-string (and "a" "b") "ba" t))) | ||
| 73 | (should (peg-parse-string ((s (and "a" "b" "c"))) "abc" t)) | ||
| 74 | (should (peg-parse-string ((s (* "a") "b" (eob))) "b" t)) | ||
| 75 | (should (peg-parse-string ((s (* "a") "b" (eob))) "ab" t)) | ||
| 76 | (should (peg-parse-string ((s (* "a") "b" (eob))) "aaab" t)) | ||
| 77 | (should (not (peg-parse-string ((s (* "a") "b" (eob))) "abc" t))) | ||
| 78 | (should (peg-parse-string ((s "")) "abc" t)) | ||
| 79 | (should (peg-parse-string ((s "" (eob))) "" t)) | ||
| 80 | (should (peg-parse-string ((s (opt "a") "b")) "abc" t)) | ||
| 81 | (should (peg-parse-string ((s (opt "a") "b")) "bc" t)) | ||
| 82 | (should (not (peg-parse-string ((s (or))) "ab" t))) | ||
| 83 | (should (peg-parse-string ((s (and))) "ab" t)) | ||
| 84 | (should (peg-parse-string ((s (and))) "" t)) | ||
| 85 | (should (peg-parse-string ((s ["^"])) "^" t)) | ||
| 86 | (should (peg-parse-string ((s ["^a"])) "a" t)) | ||
| 87 | (should (peg-parse-string ["-"] "-" t)) | ||
| 88 | (should (peg-parse-string ((s ["]-"])) "]" t)) | ||
| 89 | (should (peg-parse-string ((s ["^]"])) "^" t)) | ||
| 90 | (should (peg-parse-string ((s [alpha])) "z" t)) | ||
| 91 | (should (not (peg-parse-string ((s [alpha])) "0" t))) | ||
| 92 | (should (not (peg-parse-string ((s [alpha])) "" t))) | ||
| 93 | (should (not (peg-parse-string ((s ["][:alpha:]"])) "z" t))) | ||
| 94 | (should (peg-parse-string ((s (bob))) "" t)) | ||
| 95 | (should (peg-parse-string ((s (bos))) "x" t)) | ||
| 96 | (should (not (peg-parse-string ((s (bos))) " x" t))) | ||
| 97 | (should (peg-parse-string ((s "x" (eos))) "x" t)) | ||
| 98 | (should (peg-parse-string ((s (syntax-class whitespace))) " " t)) | ||
| 99 | (should (peg-parse-string ((s (= "foo"))) "foo" t)) | ||
| 100 | (should (let ((f "foo")) (peg-parse-string ((s (= f))) "foo" t))) | ||
| 101 | (should (not (peg-parse-string ((s (= "foo"))) "xfoo" t))) | ||
| 102 | (should (equal (peg-parse-string ((s `(-- 1 2))) "") '(2 1))) | ||
| 103 | (should (equal (peg-parse-string ((s `(-- 1 2) `(a b -- a b))) "") '(2 1))) | ||
| 104 | (should (equal (peg-parse-string ((s (or (and (any) s) | ||
| 105 | (substring [0-9])))) | ||
| 106 | "ab0cd1ef2gh") | ||
| 107 | '("2"))) | ||
| 108 | ;; The PEG rule `other' doesn't exist, which will cause a byte-compiler | ||
| 109 | ;; warning, but not an error at run time because the rule is not actually | ||
| 110 | ;; used in this particular case. | ||
| 111 | (should (equal (peg-parse-string ((s (substring (or "a" other))) | ||
| 112 | ;; Unused left-recursive rule, should | ||
| 113 | ;; cause a byte-compiler warning. | ||
| 114 | (r (* "a") r)) | ||
| 115 | "af") | ||
| 116 | '("a"))) | ||
| 117 | (should (equal (peg-parse-string ((s (list x y)) | ||
| 118 | (x `(-- 1)) | ||
| 119 | (y `(-- 2))) | ||
| 120 | "") | ||
| 121 | '((1 2)))) | ||
| 122 | (should (equal (peg-parse-string ((s (list (* x))) | ||
| 123 | (x "" `(-- 'x))) | ||
| 124 | "xxx") | ||
| 125 | ;; The empty loop body should be matched once! | ||
| 126 | '((x)))) | ||
| 127 | (should (equal (peg-parse-string ((s (list (* x))) | ||
| 128 | (x "x" `(-- 'x))) | ||
| 129 | "xxx") | ||
| 130 | '((x x x)))) | ||
| 131 | (should (equal (peg-parse-string ((s (region (* x))) | ||
| 132 | (x "x" `(-- 'x))) | ||
| 133 | "xxx") | ||
| 134 | ;; FIXME: Since string positions start at 0, this should | ||
| 135 | ;; really be '(3 x x x 0) !! | ||
| 136 | '(4 x x x 1))) | ||
| 137 | (should (equal (peg-parse-string ((s (region (list (* x)))) | ||
| 138 | (x "x" `(-- 'x 'y))) | ||
| 139 | "xxx") | ||
| 140 | '(4 (x y x y x y) 1))) | ||
| 141 | (should (equal (with-temp-buffer | ||
| 142 | (save-excursion (insert "abcdef")) | ||
| 143 | (list | ||
| 144 | (peg-run (peg "a" | ||
| 145 | (replace "bc" "x") | ||
| 146 | (replace "de" "y") | ||
| 147 | "f")) | ||
| 148 | (buffer-string))) | ||
| 149 | '(t "axyf"))) | ||
| 150 | (with-temp-buffer | ||
| 151 | (insert "toro") | ||
| 152 | (goto-char (point-min)) | ||
| 153 | (should (peg-run (peg "to"))) | ||
| 154 | (should-not (peg-run (peg "to"))) | ||
| 155 | (should (peg-run (peg "ro"))) | ||
| 156 | (should (eobp))) | ||
| 157 | (with-temp-buffer | ||
| 158 | (insert " ") | ||
| 159 | (goto-char (point-min)) | ||
| 160 | (peg-run (peg (+ (syntax-class whitespace)))) | ||
| 161 | (should (eobp))) | ||
| 162 | ) | ||
| 163 | |||
| 164 | ;;; Examples: | ||
| 165 | |||
| 166 | ;; peg-ex-recognize-int recognizes integers. An integer begins with a | ||
| 167 | ;; optional sign, then follows one or more digits. Digits are all | ||
| 168 | ;; characters from 0 to 9. | ||
| 169 | ;; | ||
| 170 | ;; Notes: | ||
| 171 | ;; 1) "" matches the empty sequence, i.e. matches without consuming | ||
| 172 | ;; input. | ||
| 173 | ;; 2) [0-9] is the character range from 0 to 9. This can also be | ||
| 174 | ;; written as (range ?0 ?9). Note that 0-9 is a symbol. | ||
| 175 | (defun peg-ex-recognize-int () | ||
| 176 | (with-peg-rules ((number sign digit (* digit)) | ||
| 177 | (sign (or "+" "-" "")) | ||
| 178 | (digit [0-9])) | ||
| 179 | (peg-run (peg number)))) | ||
| 180 | |||
| 181 | ;; peg-ex-parse-int recognizes integers and computes the corresponding | ||
| 182 | ;; value. The grammar is the same as for `peg-ex-recognize-int' | ||
| 183 | ;; augmented with parsing actions. Unfortunaletly, the actions add | ||
| 184 | ;; quite a bit of clutter. | ||
| 185 | ;; | ||
| 186 | ;; The actions for the sign rule push -1 on the stack for a minus sign | ||
| 187 | ;; and 1 for plus or no sign. | ||
| 188 | ;; | ||
| 189 | ;; The action for the digit rule pushes the value for a single digit. | ||
| 190 | ;; | ||
| 191 | ;; The action `(a b -- (+ (* a 10) b)), takes two items from the stack | ||
| 192 | ;; and pushes the first digit times 10 added to the second digit. | ||
| 193 | ;; | ||
| 194 | ;; The action `(sign val -- (* sign val)), multiplies val with the | ||
| 195 | ;; sign (1 or -1). | ||
| 196 | (defun peg-ex-parse-int () | ||
| 197 | (with-peg-rules ((number sign digit (* digit | ||
| 198 | `(a b -- (+ (* a 10) b))) | ||
| 199 | `(sign val -- (* sign val))) | ||
| 200 | (sign (or (and "+" `(-- 1)) | ||
| 201 | (and "-" `(-- -1)) | ||
| 202 | (and "" `(-- 1)))) | ||
| 203 | (digit [0-9] `(-- (- (char-before) ?0)))) | ||
| 204 | (peg-run (peg number)))) | ||
| 205 | |||
| 206 | ;; Put point after the ) and press C-x C-e | ||
| 207 | ;; (peg-ex-parse-int)-234234 | ||
| 208 | |||
| 209 | ;; Parse arithmetic expressions and compute the result as side effect. | ||
| 210 | (defun peg-ex-arith () | ||
| 211 | (peg-parse | ||
| 212 | (expr _ sum eol) | ||
| 213 | (sum product (* (or (and "+" _ product `(a b -- (+ a b))) | ||
| 214 | (and "-" _ product `(a b -- (- a b)))))) | ||
| 215 | (product value (* (or (and "*" _ value `(a b -- (* a b))) | ||
| 216 | (and "/" _ value `(a b -- (/ a b)))))) | ||
| 217 | (value (or (and (substring number) `(string -- (string-to-number string))) | ||
| 218 | (and "(" _ sum ")" _))) | ||
| 219 | (number (+ [0-9]) _) | ||
| 220 | (_ (* [" \t"])) | ||
| 221 | (eol (or "\n" "\r\n" "\r")))) | ||
| 222 | |||
| 223 | ;; (peg-ex-arith) 1 + 2 * 3 * (4 + 5) | ||
| 224 | ;; (peg-ex-arith) 1 + 2 ^ 3 * (4 + 5) ; fails to parse | ||
| 225 | |||
| 226 | ;; Parse URI according to RFC 2396. | ||
| 227 | (defun peg-ex-uri () | ||
| 228 | (peg-parse | ||
| 229 | (URI-reference (or absoluteURI relativeURI) | ||
| 230 | (or (and "#" (substring fragment)) | ||
| 231 | `(-- nil)) | ||
| 232 | `(scheme user host port path query fragment -- | ||
| 233 | (list :scheme scheme :user user | ||
| 234 | :host host :port port | ||
| 235 | :path path :query query | ||
| 236 | :fragment fragment))) | ||
| 237 | (absoluteURI (substring scheme) ":" (or hier-part opaque-part)) | ||
| 238 | (hier-part ;(-- user host port path query) | ||
| 239 | (or net-path | ||
| 240 | (and `(-- nil nil nil) | ||
| 241 | abs-path)) | ||
| 242 | (or (and "?" (substring query)) | ||
| 243 | `(-- nil))) | ||
| 244 | (net-path "//" authority (or abs-path `(-- nil))) | ||
| 245 | (abs-path "/" path-segments) | ||
| 246 | (path-segments segment (list (* "/" segment)) `(s l -- (cons s l))) | ||
| 247 | (segment (substring (* pchar) (* ";" param))) | ||
| 248 | (param (* pchar)) | ||
| 249 | (pchar (or unreserved escaped [":@&=+$,"])) | ||
| 250 | (query (* uric)) | ||
| 251 | (fragment (* uric)) | ||
| 252 | (relativeURI (or net-path abs-path rel-path) (opt "?" query)) | ||
| 253 | (rel-path rel-segment (opt abs-path)) | ||
| 254 | (rel-segment (+ unreserved escaped [";@&=+$,"])) | ||
| 255 | (authority (or server reg-name)) | ||
| 256 | (server (or (and (or (and (substring userinfo) "@") | ||
| 257 | `(-- nil)) | ||
| 258 | hostport) | ||
| 259 | `(-- nil nil nil))) | ||
| 260 | (userinfo (* (or unreserved escaped [";:&=+$,"]))) | ||
| 261 | (hostport (substring host) (or (and ":" (substring port)) | ||
| 262 | `(-- nil))) | ||
| 263 | (host (or hostname ipv4address)) | ||
| 264 | (hostname (* domainlabel ".") toplabel (opt ".")) | ||
| 265 | (domainlabel alphanum | ||
| 266 | (opt (* (or alphanum "-") (if alphanum)) | ||
| 267 | alphanum)) | ||
| 268 | (toplabel alpha | ||
| 269 | (* (or alphanum "-") (if alphanum)) | ||
| 270 | alphanum) | ||
| 271 | (ipv4address (+ digit) "." (+ digit) "." (+ digit) "." (+ digit)) | ||
| 272 | (port (* digit)) | ||
| 273 | (scheme alpha (* (or alpha digit ["+-."]))) | ||
| 274 | (reg-name (or unreserved escaped ["$,;:@&=+"])) | ||
| 275 | (opaque-part uric-no-slash (* uric)) | ||
| 276 | (uric (or reserved unreserved escaped)) | ||
| 277 | (uric-no-slash (or unreserved escaped [";?:@&=+$,"])) | ||
| 278 | (reserved (set ";/?:@&=+$,")) | ||
| 279 | (unreserved (or alphanum mark)) | ||
| 280 | (escaped "%" hex hex) | ||
| 281 | (hex (or digit [A-F] [a-f])) | ||
| 282 | (mark (set "-_.!~*'()")) | ||
| 283 | (alphanum (or alpha digit)) | ||
| 284 | (alpha (or lowalpha upalpha)) | ||
| 285 | (lowalpha [a-z]) | ||
| 286 | (upalpha [A-Z]) | ||
| 287 | (digit [0-9]))) | ||
| 288 | |||
| 289 | ;; (peg-ex-uri)http://luser@www.foo.com:8080/bar/baz.html?x=1#foo | ||
| 290 | ;; (peg-ex-uri)file:/bar/baz.html?foo=df#x | ||
| 291 | |||
| 292 | ;; Split STRING where SEPARATOR occurs. | ||
| 293 | (defun peg-ex-split (string separator) | ||
| 294 | (peg-parse-string ((s (list (* (* sep) elt))) | ||
| 295 | (elt (substring (+ (not sep) (any)))) | ||
| 296 | (sep (= separator))) | ||
| 297 | string)) | ||
| 298 | |||
| 299 | ;; (peg-ex-split "-abc-cd-" "-") | ||
| 300 | |||
| 301 | ;; Parse a lisp style Sexp. | ||
| 302 | ;; [To keep the example short, ' and . are handled as ordinary symbol.] | ||
| 303 | (defun peg-ex-lisp () | ||
| 304 | (peg-parse | ||
| 305 | (sexp _ (or string list number symbol)) | ||
| 306 | (_ (* (or [" \n\t"] comment))) | ||
| 307 | (comment ";" (* (not (or "\n" (eob))) (any))) | ||
| 308 | (string "\"" (substring (* (not "\"") (any))) "\"") | ||
| 309 | (number (substring (opt (set "+-")) (+ digit)) | ||
| 310 | (if terminating) | ||
| 311 | `(string -- (string-to-number string))) | ||
| 312 | (symbol (substring (and symchar (* (not terminating) symchar))) | ||
| 313 | `(s -- (intern s))) | ||
| 314 | (symchar [a-z A-Z 0-9 "-;!#%&'*+,./:;<=>?@[]^_`{|}~"]) | ||
| 315 | (list "(" `(-- (cons nil nil)) `(hd -- hd hd) | ||
| 316 | (* sexp `(tl e -- (setcdr tl (list e)))) | ||
| 317 | _ ")" `(hd _tl -- (cdr hd))) | ||
| 318 | (digit [0-9]) | ||
| 319 | (terminating (or (set " \n\t();\"'") (eob))))) | ||
| 320 | |||
| 321 | ;; (peg-ex-lisp) | ||
| 322 | |||
| 323 | ;; We try to detect left recursion and report it as error. | ||
| 324 | (defun peg-ex-left-recursion () | ||
| 325 | (eval '(peg-parse (exp (or term | ||
| 326 | (and exp "+" exp))) | ||
| 327 | (term (or digit | ||
| 328 | (and term "*" term))) | ||
| 329 | (digit [0-9])) | ||
| 330 | t)) | ||
| 331 | |||
| 332 | (defun peg-ex-infinite-loop () | ||
| 333 | (eval '(peg-parse (exp (* (or "x" | ||
| 334 | "y" | ||
| 335 | (action (foo)))))) | ||
| 336 | t)) | ||
| 337 | |||
| 338 | ;; Some efficiency problems: | ||
| 339 | |||
| 340 | ;; Find the last digit in a string. | ||
| 341 | ;; Recursive definition with excessive stack usage. | ||
| 342 | (defun peg-ex-last-digit (string) | ||
| 343 | (peg-parse-string ((s (or (and (any) s) | ||
| 344 | (substring [0-9])))) | ||
| 345 | string)) | ||
| 346 | |||
| 347 | ;; (peg-ex-last-digit "ab0cd1ef2gh") | ||
| 348 | ;; (peg-ex-last-digit (make-string 50 ?-)) | ||
| 349 | ;; (peg-ex-last-digit (make-string 1000 ?-)) | ||
| 350 | |||
| 351 | ;; Find the last digit without recursion. Doesn't run out of stack, | ||
| 352 | ;; but probably still too inefficient for large inputs. | ||
| 353 | (defun peg-ex-last-digit2 (string) | ||
| 354 | (peg-parse-string ((s `(-- nil) | ||
| 355 | (+ (* (not digit) (any)) | ||
| 356 | (substring digit) | ||
| 357 | `(_d1 d2 -- d2))) | ||
| 358 | (digit [0-9])) | ||
| 359 | string)) | ||
| 360 | |||
| 361 | ;; (peg-ex-last-digit2 "ab0cd1ef2gh") | ||
| 362 | ;; (peg-ex-last-digit2 (concat (make-string 500000 ?-) "8a9b")) | ||
| 363 | ;; (peg-ex-last-digit2 (make-string 500000 ?-)) | ||
| 364 | ;; (peg-ex-last-digit2 (make-string 500000 ?5)) | ||
| 365 | |||
| 366 | (provide 'peg-tests) | ||
| 367 | ;;; peg-tests.el ends here | ||