diff options
| author | Eric Abrahamsen | 2022-12-05 21:59:03 -0800 |
|---|---|---|
| committer | Eric Abrahamsen | 2024-03-30 15:19:47 -0700 |
| commit | 8bee4060ea42c61e52ebe6487ff97bc095261050 (patch) | |
| tree | 2d9d9e7ef775fdb885cb53ead590103f599a893e | |
| parent | 0df8dadde2edaee406c76d639a22c70d0b03426b (diff) | |
| download | emacs-8bee4060ea42c61e52ebe6487ff97bc095261050.tar.gz emacs-8bee4060ea42c61e52ebe6487ff97bc095261050.zip | |
Add peg.el as a built-in library
* lisp/progmodes/peg.el: New file, taken from ELPA package.
* test/lisp/peg-tests.el: Package tests.
* doc/lispref/peg.texi: Documentation.
| -rw-r--r-- | doc/lispref/Makefile.in | 1 | ||||
| -rw-r--r-- | doc/lispref/elisp.texi | 8 | ||||
| -rw-r--r-- | doc/lispref/peg.texi | 351 | ||||
| -rw-r--r-- | etc/NEWS | 8 | ||||
| -rw-r--r-- | lisp/progmodes/peg.el | 944 | ||||
| -rw-r--r-- | test/lisp/peg-tests.el | 367 |
6 files changed, 1679 insertions, 0 deletions
diff --git a/doc/lispref/Makefile.in b/doc/lispref/Makefile.in index 0a228271be3..4ceffd7d7d3 100644 --- a/doc/lispref/Makefile.in +++ b/doc/lispref/Makefile.in | |||
| @@ -112,6 +112,7 @@ srcs = \ | |||
| 112 | $(srcdir)/os.texi \ | 112 | $(srcdir)/os.texi \ |
| 113 | $(srcdir)/package.texi \ | 113 | $(srcdir)/package.texi \ |
| 114 | $(srcdir)/parsing.texi \ | 114 | $(srcdir)/parsing.texi \ |
| 115 | $(srcdir)/peg.texi \ | ||
| 115 | $(srcdir)/positions.texi \ | 116 | $(srcdir)/positions.texi \ |
| 116 | $(srcdir)/processes.texi \ | 117 | $(srcdir)/processes.texi \ |
| 117 | $(srcdir)/records.texi \ | 118 | $(srcdir)/records.texi \ |
diff --git a/doc/lispref/elisp.texi b/doc/lispref/elisp.texi index 71139db4359..ec93a0b9c8a 100644 --- a/doc/lispref/elisp.texi +++ b/doc/lispref/elisp.texi | |||
| @@ -222,6 +222,7 @@ To view this manual in other formats, click | |||
| 222 | * Non-ASCII Characters:: Non-ASCII text in buffers and strings. | 222 | * Non-ASCII Characters:: Non-ASCII text in buffers and strings. |
| 223 | * Searching and Matching:: Searching buffers for strings or regexps. | 223 | * Searching and Matching:: Searching buffers for strings or regexps. |
| 224 | * Syntax Tables:: The syntax table controls word and list parsing. | 224 | * Syntax Tables:: The syntax table controls word and list parsing. |
| 225 | * Parsing Expression Grammars:: Parsing structured buffer text. | ||
| 225 | * Parsing Program Source:: Generate syntax tree for program sources. | 226 | * Parsing Program Source:: Generate syntax tree for program sources. |
| 226 | * Abbrevs:: How Abbrev mode works, and its data structures. | 227 | * Abbrevs:: How Abbrev mode works, and its data structures. |
| 227 | 228 | ||
| @@ -1365,6 +1366,12 @@ Syntax Tables | |||
| 1365 | * Syntax Table Internals:: How syntax table information is stored. | 1366 | * Syntax Table Internals:: How syntax table information is stored. |
| 1366 | * Categories:: Another way of classifying character syntax. | 1367 | * Categories:: Another way of classifying character syntax. |
| 1367 | 1368 | ||
| 1369 | Parsing Expression Grammars | ||
| 1370 | |||
| 1371 | * PEX Definitions:: The syntax of PEX rules | ||
| 1372 | * Parsing Actions:: Running actions upon successful parsing. | ||
| 1373 | * Writing PEG Rules:: Tips for writing parsing rules. | ||
| 1374 | |||
| 1368 | Parsing Program Source | 1375 | Parsing Program Source |
| 1369 | 1376 | ||
| 1370 | * Language Grammar:: Loading tree-sitter language grammar. | 1377 | * Language Grammar:: Loading tree-sitter language grammar. |
| @@ -1720,6 +1727,7 @@ Object Internals | |||
| 1720 | 1727 | ||
| 1721 | @include searching.texi | 1728 | @include searching.texi |
| 1722 | @include syntax.texi | 1729 | @include syntax.texi |
| 1730 | @include peg.texi | ||
| 1723 | @include parsing.texi | 1731 | @include parsing.texi |
| 1724 | @include abbrevs.texi | 1732 | @include abbrevs.texi |
| 1725 | @include threads.texi | 1733 | @include threads.texi |
diff --git a/doc/lispref/peg.texi b/doc/lispref/peg.texi new file mode 100644 index 00000000000..ef4dfa7653e --- /dev/null +++ b/doc/lispref/peg.texi | |||
| @@ -0,0 +1,351 @@ | |||
| 1 | @c -*-texinfo-*- | ||
| 2 | @c This is part of the GNU Emacs Lisp Reference Manual. | ||
| 3 | @c Copyright (C) 1990--1995, 1998--1999, 2001--2023 Free Software | ||
| 4 | @c Foundation, Inc. | ||
| 5 | @c See the file elisp.texi for copying conditions. | ||
| 6 | @node Parsing Expression Grammars | ||
| 7 | @chapter Parsing Expression Grammars | ||
| 8 | @cindex text parsing | ||
| 9 | @cindex parsing expression grammar | ||
| 10 | |||
| 11 | Emacs Lisp provides several tools for parsing and matching text, | ||
| 12 | from regular expressions (@pxref{Regular Expressions}) to full | ||
| 13 | @acronym{LL} grammar parsers (@pxref{Top,, Bovine parser | ||
| 14 | development,bovine}). @dfn{Parsing Expression Grammars} | ||
| 15 | (@acronym{PEG}) are another approach to text parsing that offer more | ||
| 16 | structure and composibility than regular expressions, but less | ||
| 17 | complexity than context-free grammars. | ||
| 18 | |||
| 19 | A @acronym{PEG} parser is defined as a list of named rules, each of | ||
| 20 | which matches text patterns, and/or contains references to other | ||
| 21 | rules. Parsing is initiated with the function @code{peg-run} or the | ||
| 22 | macro @code{peg-parse} (see below), and parses text after point in the | ||
| 23 | current buffer, using a given set of rules. | ||
| 24 | |||
| 25 | @cindex parsing expression | ||
| 26 | The definition of each rule is referred to as a @dfn{parsing | ||
| 27 | expression} (@acronym{PEX}), and can consist of a literal string, a | ||
| 28 | regexp-like character range or set, a peg-specific construct | ||
| 29 | resembling an elisp function call, a reference to another rule, or a | ||
| 30 | combination of any of these. A grammar is expressed as a tree of | ||
| 31 | rules in which one rule is typically treated as a ``root'' or | ||
| 32 | ``entry-point'' rule. For instance: | ||
| 33 | |||
| 34 | @example | ||
| 35 | @group | ||
| 36 | ((number sign digit (* digit)) | ||
| 37 | (sign (or "+" "-" "")) | ||
| 38 | (digit [0-9])) | ||
| 39 | @end group | ||
| 40 | @end example | ||
| 41 | |||
| 42 | Once defined, grammars can be used to parse text after point in the | ||
| 43 | current buffer, in the following ways: | ||
| 44 | |||
| 45 | @defmac peg-parse &rest pexs | ||
| 46 | Match @var{pexs} at point. If @var{pexs} is a list of PEG rules, the | ||
| 47 | first rule is considered the ``entry-point'': | ||
| 48 | @end defmac | ||
| 49 | |||
| 50 | @example | ||
| 51 | @group | ||
| 52 | (peg-parse | ||
| 53 | ((number sign digit (* digit)) | ||
| 54 | (sign (or "+" "-" "")) | ||
| 55 | (digit [0-9]))) | ||
| 56 | @end group | ||
| 57 | @end example | ||
| 58 | |||
| 59 | This macro represents the simplest use of the @acronym{PEG} library, | ||
| 60 | but also the least flexible, as the rules must be written directly | ||
| 61 | into the source code. A more flexible approach involves use of three | ||
| 62 | macros in conjunction: @code{with-peg-rules}, a @code{let}-like | ||
| 63 | construct that makes a set of rules available within the macro body; | ||
| 64 | @code{peg-run}, which initiates parsing given a single rule; and | ||
| 65 | @code{peg}, which is used to wrap the entry-point rule name. In fact, | ||
| 66 | a call to @code{peg-parse} expands to just this set of calls. The | ||
| 67 | above example could be written as: | ||
| 68 | |||
| 69 | @example | ||
| 70 | @group | ||
| 71 | (with-peg-rules | ||
| 72 | ((number sign digit (* digit)) | ||
| 73 | (sign (or "+" "-" "")) | ||
| 74 | (digit [0-9])) | ||
| 75 | (peg-run (peg number))) | ||
| 76 | @end group | ||
| 77 | @end example | ||
| 78 | |||
| 79 | This allows more explicit control over the ``entry-point'' of parsing, | ||
| 80 | and allows the combination of rules from different sources. | ||
| 81 | |||
| 82 | Individual rules can also be defined using a more @code{defun}-like | ||
| 83 | syntax, using the macro @code{define-peg-rule}: | ||
| 84 | |||
| 85 | @example | ||
| 86 | (define-peg-rule digit () | ||
| 87 | [0-9]) | ||
| 88 | @end example | ||
| 89 | |||
| 90 | This also allows for rules that accept an argument (supplied by the | ||
| 91 | @code{funcall} PEG rule). | ||
| 92 | |||
| 93 | Another possibility is to define a named set of rules with | ||
| 94 | @code{define-peg-ruleset}: | ||
| 95 | |||
| 96 | @example | ||
| 97 | (define-peg-ruleset number-grammar | ||
| 98 | '((number sign digit (* digit)) | ||
| 99 | digit ;; A reference to the definition above. | ||
| 100 | (sign (or "+" "-" "")))) | ||
| 101 | @end example | ||
| 102 | |||
| 103 | Rules and rulesets defined this way can be referred to by name in | ||
| 104 | later calls to @code{peg-run} or @code{with-peg-rules}: | ||
| 105 | |||
| 106 | @example | ||
| 107 | (with-peg-rules number-grammar | ||
| 108 | (peg-run (peg number))) | ||
| 109 | @end example | ||
| 110 | |||
| 111 | By default, calls to @code{peg-run} or @code{peg-parse} produce no | ||
| 112 | output: parsing simply moves point. In order to return or otherwise | ||
| 113 | act upon parsed strings, rules can include @dfn{actions}, see | ||
| 114 | @ref{Parsing Actions}. | ||
| 115 | |||
| 116 | @menu | ||
| 117 | * PEX Definitions:: The syntax of PEX rules. | ||
| 118 | * Parsing Actions:: Running actions upon successful parsing. | ||
| 119 | * Writing PEG Rules:: Tips for writing parsing rules. | ||
| 120 | @end menu | ||
| 121 | |||
| 122 | @node PEX Definitions | ||
| 123 | @section PEX Definitions | ||
| 124 | |||
| 125 | Parsing expressions can be defined using the following syntax: | ||
| 126 | |||
| 127 | @table @code | ||
| 128 | @item (and E1 E2 ...) | ||
| 129 | A sequence of @acronym{PEX}s that must all be matched. The @code{and} form is | ||
| 130 | optional and implicit. | ||
| 131 | |||
| 132 | @item (or E1 E2 ...) | ||
| 133 | Prioritized choices, meaning that, as in Elisp, the choices are tried | ||
| 134 | in order, and the first successful match is used. Note that this is | ||
| 135 | distinct from context-free grammars, in which selection between | ||
| 136 | multiple matches is indeterminate. | ||
| 137 | |||
| 138 | @item (any) | ||
| 139 | Matches any single character, as the regexp ``.''. | ||
| 140 | |||
| 141 | @item @var{string} | ||
| 142 | A literal string. | ||
| 143 | |||
| 144 | @item (char @var{C}) | ||
| 145 | A single character @var{C}, as an Elisp character literal. | ||
| 146 | |||
| 147 | @item (* @var{E}) | ||
| 148 | Zero or more instances of expression @var{E}, as the regexp @samp{*}. | ||
| 149 | Matching is always ``greedy''. | ||
| 150 | |||
| 151 | @item (+ @var{E}) | ||
| 152 | One or more instances of expression @var{E}, as the regexp @samp{+}. | ||
| 153 | Matching is always ``greedy''. | ||
| 154 | |||
| 155 | @item (opt @var{E}) | ||
| 156 | Zero or one instance of expression @var{E}, as the regexp @samp{?}. | ||
| 157 | |||
| 158 | @item SYMBOL | ||
| 159 | A symbol representing a previously-defined PEG rule. | ||
| 160 | |||
| 161 | @item (range CH1 CH2) | ||
| 162 | The character range between CH1 and CH2, as the regexp @samp{[CH1-CH2]}. | ||
| 163 | |||
| 164 | @item [CH1-CH2 "+*" ?x] | ||
| 165 | A character set, which can include ranges, character literals, or | ||
| 166 | strings of characters. | ||
| 167 | |||
| 168 | @item [ascii cntrl] | ||
| 169 | A list of named character classes. | ||
| 170 | |||
| 171 | @item (syntax-class @var{NAME}) | ||
| 172 | A single syntax class. | ||
| 173 | |||
| 174 | @item (funcall E ARGS...) | ||
| 175 | Call @acronym{PEX} E (previously defined with @code{define-peg-rule}) | ||
| 176 | with arguments @var{ARGS}. | ||
| 177 | |||
| 178 | @item (null) | ||
| 179 | The empty string. | ||
| 180 | |||
| 181 | @end table | ||
| 182 | |||
| 183 | The following expressions are used as anchors or tests -- they do not | ||
| 184 | move point, but return a boolean value which can be used to constrain | ||
| 185 | matches as a way of controlling the parsing process (@pxref{Writing | ||
| 186 | PEG Rules}). | ||
| 187 | |||
| 188 | @table @code | ||
| 189 | @item (bob) | ||
| 190 | Beginning of buffer. | ||
| 191 | |||
| 192 | @item (eob) | ||
| 193 | End of buffer. | ||
| 194 | |||
| 195 | @item (bol) | ||
| 196 | Beginning of line. | ||
| 197 | |||
| 198 | @item (eol) | ||
| 199 | End of line. | ||
| 200 | |||
| 201 | @item (bow) | ||
| 202 | Beginning of word. | ||
| 203 | |||
| 204 | @item (eow) | ||
| 205 | End of word. | ||
| 206 | |||
| 207 | @item (bos) | ||
| 208 | Beginning of symbol. | ||
| 209 | |||
| 210 | @item (eos) | ||
| 211 | End of symbol. | ||
| 212 | |||
| 213 | @item (if E) | ||
| 214 | Returns non-@code{nil} if parsing @acronym{PEX} E from point succeeds (point | ||
| 215 | is not moved). | ||
| 216 | |||
| 217 | @item (not E) | ||
| 218 | Returns non-@code{nil} if parsing @acronym{PEX} E from point fails (point | ||
| 219 | is not moved). | ||
| 220 | |||
| 221 | @item (guard EXP) | ||
| 222 | Treats the value of the Lisp expression EXP as a boolean. | ||
| 223 | |||
| 224 | @end table | ||
| 225 | |||
| 226 | @vindex peg-char-classes | ||
| 227 | Character class matching can use the same named character classes as | ||
| 228 | in regular expressions (@pxref{Top,, Character Classes,elisp}) | ||
| 229 | |||
| 230 | @node Parsing Actions | ||
| 231 | @section Parsing Actions | ||
| 232 | |||
| 233 | @cindex parsing actions | ||
| 234 | @cindex parsing stack | ||
| 235 | By default the process of parsing simply moves point in the current | ||
| 236 | buffer, ultimately returning @code{t} if the parsing succeeds, and | ||
| 237 | @code{nil} if it doesn't. It's also possible to define ``actions'' | ||
| 238 | that can run arbitrary Elisp at certain points in the parsed text. | ||
| 239 | These actions can optionally affect something called the @dfn{parsing | ||
| 240 | stack}, which is a list of values returned by the parsing process. | ||
| 241 | These actions only run (and only return values) if the parsing process | ||
| 242 | ultimately succeeds; if it fails the action code is not run at all. | ||
| 243 | |||
| 244 | Actions can be added anywhere in the definition of a rule. They are | ||
| 245 | distinguished from parsing expressions by an initial backquote | ||
| 246 | (@samp{`}), followed by a parenthetical form that must contain a pair | ||
| 247 | of hyphens (@samp{--}) somewhere within it. Symbols to the left of | ||
| 248 | the hyphens are bound to values popped from the stack (they are | ||
| 249 | somewhat analogous to the argument list of a lambda form). Values | ||
| 250 | produced by code to the right are pushed to the stack (analogous to | ||
| 251 | the return value of the lambda). For instance, the previous grammar | ||
| 252 | can be augmented with actions to return the parsed number as an actual | ||
| 253 | integer: | ||
| 254 | |||
| 255 | @example | ||
| 256 | (with-peg-rules ((number sign digit (* digit | ||
| 257 | `(a b -- (+ (* a 10) b))) | ||
| 258 | `(sign val -- (* sign val))) | ||
| 259 | (sign (or (and "+" `(-- 1)) | ||
| 260 | (and "-" `(-- -1)) | ||
| 261 | (and "" `(-- 1)))) | ||
| 262 | (digit [0-9] `(-- (- (char-before) ?0)))) | ||
| 263 | (peg-run (peg number))) | ||
| 264 | @end example | ||
| 265 | |||
| 266 | There must be values on the stack before they can be popped and | ||
| 267 | returned -- if there aren't enough stack values to bind to an action's | ||
| 268 | left-hand terms, they will be bound to @code{nil}. An action with | ||
| 269 | only right-hand terms will push values to the stack; an action with | ||
| 270 | only left-hand terms will consume (and discard) values from the stack. | ||
| 271 | At the end of parsing, stack values are returned as a flat list. | ||
| 272 | |||
| 273 | To return the string matched by a @acronym{PEX} (instead of simply | ||
| 274 | moving point over it), a rule like this can be used: | ||
| 275 | |||
| 276 | @example | ||
| 277 | (one-word | ||
| 278 | `(-- (point)) | ||
| 279 | (+ [word]) | ||
| 280 | `(start -- (buffer-substring start (point)))) | ||
| 281 | @end example | ||
| 282 | |||
| 283 | The first action pushes the initial value of point to the stack. The | ||
| 284 | intervening @acronym{PEX} moves point over the next word. The second | ||
| 285 | action pops the previous value from the stack (binding it to the | ||
| 286 | variable @code{start}), and uses that value to extract a substring | ||
| 287 | from the buffer and push it to the stack. This pattern is so common | ||
| 288 | that @acronym{PEG} provides a shorthand function that does exactly the | ||
| 289 | above, along with a few other shorthands for common scenarios: | ||
| 290 | |||
| 291 | @table @code | ||
| 292 | @item (substring @var{E}) | ||
| 293 | Match @acronym{PEX} @var{E} and push the matched string to the stack. | ||
| 294 | |||
| 295 | @item (region @var{E}) | ||
| 296 | Match @var{E} and push the start and end positions of the matched | ||
| 297 | region to the stack. | ||
| 298 | |||
| 299 | @item (replace @var{E} @var{replacement}) | ||
| 300 | Match @var{E} and replaced the matched region with the string @var{replacement}. | ||
| 301 | |||
| 302 | @item (list @var{E}) | ||
| 303 | Match @var{E}, collect all values produced by @var{E} (and its | ||
| 304 | sub-expressions) into a list, and push that list to the stack. Stack | ||
| 305 | values are typically returned as a flat list; this is a way of | ||
| 306 | ``grouping'' values together. | ||
| 307 | @end table | ||
| 308 | |||
| 309 | @node Writing PEG Rules | ||
| 310 | @section Writing PEG Rules | ||
| 311 | |||
| 312 | Something to be aware of when writing PEG rules is that they are | ||
| 313 | greedy. Rules which can consume a variable amount of text will always | ||
| 314 | consume the maximum amount possible, even if that causes a rule that | ||
| 315 | might otherwise have matched to fail later on -- there is no | ||
| 316 | backtracking. For instance, this rule will never succeed: | ||
| 317 | |||
| 318 | @example | ||
| 319 | (forest (+ "tree" (* [blank])) "tree" (eol)) | ||
| 320 | @end example | ||
| 321 | |||
| 322 | The @acronym{PEX} @code{(+ "tree" (* [blank]))} will consume all | ||
| 323 | repetitions of the word ``tree'', leaving none to match the final | ||
| 324 | @code{"tree"}. | ||
| 325 | |||
| 326 | In these situations, the desired result can be obtained by using | ||
| 327 | predicates and guards -- namely the @code{not}, @code{if} and | ||
| 328 | @code{guard} expressions -- to constrain behavior. For instance: | ||
| 329 | |||
| 330 | @example | ||
| 331 | (forest (+ "tree" (* [blank])) (not (eol)) "tree" (eol)) | ||
| 332 | @end example | ||
| 333 | |||
| 334 | The @code{if} and @code{not} operators accept a parsing expression and | ||
| 335 | interpret it as a boolean, without moving point. The contents of a | ||
| 336 | @code{guard} operator are evaluated as regular Lisp (not a | ||
| 337 | @acronym{PEX}) and should return a boolean value. A @code{nil} value | ||
| 338 | causes the match to fail. | ||
| 339 | |||
| 340 | Another potentially unexpected behavior is that parsing will move | ||
| 341 | point as far as possible, even if the parsing ultimately fails. This | ||
| 342 | rule: | ||
| 343 | |||
| 344 | @example | ||
| 345 | (end-game "game" (eob)) | ||
| 346 | @end example | ||
| 347 | |||
| 348 | when run in a buffer containing the text ``game over'' after point, | ||
| 349 | will move point to just after ``game'' then halt parsing, returning | ||
| 350 | @code{nil}. Successful parsing will always return @code{t}, or the | ||
| 351 | contexts of the parsing stack. | ||
| @@ -1585,6 +1585,14 @@ forwards-compatibility Compat package from GNU ELPA. This allows | |||
| 1585 | built-in packages to use the library more effectively, and helps | 1585 | built-in packages to use the library more effectively, and helps |
| 1586 | preventing the installation of Compat if unnecessary. | 1586 | preventing the installation of Compat if unnecessary. |
| 1587 | 1587 | ||
| 1588 | +++ | ||
| 1589 | ** New package PEG. | ||
| 1590 | Emacs now includes a library for writing (P)arsing (E)xpression | ||
| 1591 | (G)rammars, an approach to text parsing that provides more structure | ||
| 1592 | than regular expressions, but less complexity than context-free | ||
| 1593 | grammars. The Info manual "(elisp) Parsing Expression Grammars" has | ||
| 1594 | documentation and examples. | ||
| 1595 | |||
| 1588 | 1596 | ||
| 1589 | * Incompatible Lisp Changes in Emacs 30.1 | 1597 | * Incompatible Lisp Changes in Emacs 30.1 |
| 1590 | 1598 | ||
diff --git a/lisp/progmodes/peg.el b/lisp/progmodes/peg.el new file mode 100644 index 00000000000..2eb4a7384d0 --- /dev/null +++ b/lisp/progmodes/peg.el | |||
| @@ -0,0 +1,944 @@ | |||
| 1 | ;;; peg.el --- Parsing Expression Grammars in Emacs Lisp -*- lexical-binding:t -*- | ||
| 2 | |||
| 3 | ;; Copyright (C) 2008-2023 Free Software Foundation, Inc. | ||
| 4 | ;; | ||
| 5 | ;; Author: Helmut Eller <eller.helmut@gmail.com> | ||
| 6 | ;; Maintainer: Stefan Monnier <monnier@iro.umontreal.ca> | ||
| 7 | ;; Version: 1.0.1 | ||
| 8 | ;; | ||
| 9 | ;; This program is free software: you can redistribute it and/or modify | ||
| 10 | ;; it under the terms of the GNU General Public License as published by | ||
| 11 | ;; the Free Software Foundation, either version 3 of the License, or | ||
| 12 | ;; (at your option) any later version. | ||
| 13 | ;; | ||
| 14 | ;; This program is distributed in the hope that it will be useful, | ||
| 15 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 16 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 17 | ;; GNU General Public License for more details. | ||
| 18 | ;; | ||
| 19 | ;; You should have received a copy of the GNU General Public License | ||
| 20 | ;; along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
| 21 | ;; | ||
| 22 | ;;; Commentary: | ||
| 23 | ;; | ||
| 24 | ;; This package implements Parsing Expression Grammars for Emacs Lisp. | ||
| 25 | |||
| 26 | ;; Parsing Expression Grammars (PEG) are a formalism in the spirit of | ||
| 27 | ;; Context Free Grammars (CFG) with some simplifications which makes | ||
| 28 | ;; the implementation of PEGs as recursive descent parsers particularly | ||
| 29 | ;; simple and easy to understand [Ford, Baker]. | ||
| 30 | ;; PEGs are more expressive than regexps and potentially easier to use. | ||
| 31 | ;; | ||
| 32 | ;; This file implements the macros `define-peg-rule', `with-peg-rules', and | ||
| 33 | ;; `peg-parse' which parses the current buffer according to a PEG. | ||
| 34 | ;; E.g. we can match integers with: | ||
| 35 | ;; | ||
| 36 | ;; (with-peg-rules | ||
| 37 | ;; ((number sign digit (* digit)) | ||
| 38 | ;; (sign (or "+" "-" "")) | ||
| 39 | ;; (digit [0-9])) | ||
| 40 | ;; (peg-run (peg number))) | ||
| 41 | ;; or | ||
| 42 | ;; (define-peg-rule digit () | ||
| 43 | ;; [0-9]) | ||
| 44 | ;; (peg-parse (number sign digit (* digit)) | ||
| 45 | ;; (sign (or "+" "-" ""))) | ||
| 46 | ;; | ||
| 47 | ;; In contrast to regexps, PEGs allow us to define recursive "rules". | ||
| 48 | ;; A "grammar" is a set of rules. A rule is written as (NAME PEX...) | ||
| 49 | ;; E.g. (sign (or "+" "-" "")) is a rule with the name "sign". | ||
| 50 | ;; The syntax for PEX (Parsing Expression) is a follows: | ||
| 51 | ;; | ||
| 52 | ;; Description Lisp Traditional, as in Ford's paper | ||
| 53 | ;; =========== ==== =========== | ||
| 54 | ;; Sequence (and E1 E2) e1 e2 | ||
| 55 | ;; Prioritized Choice (or E1 E2) e1 / e2 | ||
| 56 | ;; Not-predicate (not E) !e | ||
| 57 | ;; And-predicate (if E) &e | ||
| 58 | ;; Any character (any) . | ||
| 59 | ;; Literal string "abc" "abc" | ||
| 60 | ;; Character C (char C) 'c' | ||
| 61 | ;; Zero-or-more (* E) e* | ||
| 62 | ;; One-or-more (+ E) e+ | ||
| 63 | ;; Optional (opt E) e? | ||
| 64 | ;; Non-terminal SYMBOL A | ||
| 65 | ;; Character range (range A B) [a-b] | ||
| 66 | ;; Character set [a-b "+*" ?x] [a-b+*x] ;Note: it's a vector | ||
| 67 | ;; Character classes [ascii cntrl] | ||
| 68 | ;; Boolean-guard (guard EXP) | ||
| 69 | ;; Syntax-Class (syntax-class NAME) | ||
| 70 | ;; Local definitions (with RULES PEX...) | ||
| 71 | ;; Indirect call (funcall EXP ARGS...) | ||
| 72 | ;; and | ||
| 73 | ;; Empty-string (null) ε | ||
| 74 | ;; Beginning-of-Buffer (bob) | ||
| 75 | ;; End-of-Buffer (eob) | ||
| 76 | ;; Beginning-of-Line (bol) | ||
| 77 | ;; End-of-Line (eol) | ||
| 78 | ;; Beginning-of-Word (bow) | ||
| 79 | ;; End-of-Word (eow) | ||
| 80 | ;; Beginning-of-Symbol (bos) | ||
| 81 | ;; End-of-Symbol (eos) | ||
| 82 | ;; | ||
| 83 | ;; Rules can refer to other rules, and a grammar is often structured | ||
| 84 | ;; as a tree, with a root rule referring to one or more "branch | ||
| 85 | ;; rules", all the way down to the "leaf rules" that deal with actual | ||
| 86 | ;; buffer text. Rules can be recursive or mutually referential, | ||
| 87 | ;; though care must be taken not to create infinite loops. | ||
| 88 | ;; | ||
| 89 | ;;;; Named rulesets: | ||
| 90 | ;; | ||
| 91 | ;; You can define a set of rules for later use with: | ||
| 92 | ;; | ||
| 93 | ;; (define-peg-ruleset myrules | ||
| 94 | ;; (sign () (or "+" "-" "")) | ||
| 95 | ;; (digit () [0-9]) | ||
| 96 | ;; (nat () digit (* digit)) | ||
| 97 | ;; (int () sign digit (* digit)) | ||
| 98 | ;; (float () int "." nat)) | ||
| 99 | ;; | ||
| 100 | ;; and later refer to it: | ||
| 101 | ;; | ||
| 102 | ;; (with-peg-rules | ||
| 103 | ;; (myrules | ||
| 104 | ;; (complex float "+i" float)) | ||
| 105 | ;; ... (peg-parse nat "," nat "," complex) ...) | ||
| 106 | ;; | ||
| 107 | ;;;; Parsing actions: | ||
| 108 | ;; | ||
| 109 | ;; PEXs also support parsing actions, i.e. Lisp snippets which are | ||
| 110 | ;; executed when a pex matches. This can be used to construct syntax | ||
| 111 | ;; trees or for similar tasks. The most basic form of action is | ||
| 112 | ;; written as: | ||
| 113 | ;; | ||
| 114 | ;; (action FORM) ; evaluate FORM for its side-effects | ||
| 115 | ;; | ||
| 116 | ;; Actions don't consume input, but are executed at the point of | ||
| 117 | ;; match. Another kind of action is called a "stack action", and | ||
| 118 | ;; looks like this: | ||
| 119 | ;; | ||
| 120 | ;; `(VAR... -- FORM...) ; stack action | ||
| 121 | ;; | ||
| 122 | ;; A stack action takes VARs from the "value stack" and pushes the | ||
| 123 | ;; results of evaluating FORMs to that stack. | ||
| 124 | |||
| 125 | ;; The value stack is created during the course of parsing. Certain | ||
| 126 | ;; operators (see below) that match buffer text can push values onto | ||
| 127 | ;; this stack. "Upstream" rules can then draw values from the stack, | ||
| 128 | ;; and optionally push new ones back. For instance, consider this | ||
| 129 | ;; very simple grammar: | ||
| 130 | ;; | ||
| 131 | ;; (with-peg-rules | ||
| 132 | ;; ((query (+ term) (eol)) | ||
| 133 | ;; (term key ":" value (opt (+ [space])) | ||
| 134 | ;; `(k v -- (cons (intern k) v))) | ||
| 135 | ;; (key (substring (and (not ":") (+ [word])))) | ||
| 136 | ;; (value (or string-value number-value)) | ||
| 137 | ;; (string-value (substring (+ [alpha]))) | ||
| 138 | ;; (number-value (substring (+ [digit])) | ||
| 139 | ;; `(val -- (string-to-number val)))) | ||
| 140 | ;; (peg-run (peg query))) | ||
| 141 | ;; | ||
| 142 | ;; This invocation of `peg-run' would parse this buffer text: | ||
| 143 | ;; | ||
| 144 | ;; name:Jane age:30 | ||
| 145 | ;; | ||
| 146 | ;; And return this Elisp sexp: | ||
| 147 | ;; | ||
| 148 | ;; ((age . 30) (name . "Jane")) | ||
| 149 | ;; | ||
| 150 | ;; Note that, in complex grammars, some care must be taken to make | ||
| 151 | ;; sure that the number and type of values drawn from the stack always | ||
| 152 | ;; match those pushed. In the example above, both `string-value' and | ||
| 153 | ;; `number-value' push a single value to the stack. Since the `value' | ||
| 154 | ;; rule only includes these two sub-rules, any upstream rule that | ||
| 155 | ;; makes use of `value' can be confident it will always and only push | ||
| 156 | ;; a single value to the stack. | ||
| 157 | ;; | ||
| 158 | ;; Stack action forms are in a sense analogous to lambda forms: the | ||
| 159 | ;; symbols before the "--" are the equivalent of lambda arguments, | ||
| 160 | ;; while the forms after the "--" are return values. The difference | ||
| 161 | ;; being that a lambda form can only return a single value, while a | ||
| 162 | ;; stack action can push multiple values onto the stack. It's also | ||
| 163 | ;; perfectly valid to use `(-- FORM...)' or `(VAR... --)': the former | ||
| 164 | ;; pushes values to the stack without consuming any, and the latter | ||
| 165 | ;; pops values from the stack and discards them. | ||
| 166 | ;; | ||
| 167 | ;;;; Derived Operators: | ||
| 168 | ;; | ||
| 169 | ;; The following operators are implemented as combinations of | ||
| 170 | ;; primitive expressions: | ||
| 171 | ;; | ||
| 172 | ;; (substring E) ; Match E and push the substring for the matched region. | ||
| 173 | ;; (region E) ; Match E and push the start and end positions. | ||
| 174 | ;; (replace E RPL); Match E and replace the matched region with RPL. | ||
| 175 | ;; (list E) ; Match E and push a list of the items that E produced. | ||
| 176 | ;; | ||
| 177 | ;; See `peg-ex-parse-int' in `peg-tests.el' for further examples. | ||
| 178 | ;; | ||
| 179 | ;; Regexp equivalents: | ||
| 180 | ;; | ||
| 181 | ;; Here a some examples for regexps and how those could be written as pex. | ||
| 182 | ;; [Most are taken from rx.el] | ||
| 183 | ;; | ||
| 184 | ;; "^[a-z]*" | ||
| 185 | ;; (and (bol) (* [a-z])) | ||
| 186 | ;; | ||
| 187 | ;; "\n[^ \t]" | ||
| 188 | ;; (and "\n" (not [" \t"]) (any)) | ||
| 189 | ;; | ||
| 190 | ;; "\\*\\*\\* EOOH \\*\\*\\*\n" | ||
| 191 | ;; "*** EOOH ***\n" | ||
| 192 | ;; | ||
| 193 | ;; "\\<\\(catch\\|finally\\)\\>[^_]" | ||
| 194 | ;; (and (bow) (or "catch" "finally") (eow) (not "_") (any)) | ||
| 195 | ;; | ||
| 196 | ;; "[ \t\n]*:\\([^:]+\\|$\\)" | ||
| 197 | ;; (and (* [" \t\n"]) ":" (or (+ (not ":") (any)) (eol))) | ||
| 198 | ;; | ||
| 199 | ;; "^content-transfer-encoding:\\(\n?[\t ]\\)*quoted-printable\\(\n?[\t ]\\)*" | ||
| 200 | ;; (and (bol) | ||
| 201 | ;; "content-transfer-encoding:" | ||
| 202 | ;; (* (opt "\n") ["\t "]) | ||
| 203 | ;; "quoted-printable" | ||
| 204 | ;; (* (opt "\n") ["\t "])) | ||
| 205 | ;; | ||
| 206 | ;; "\\$[I]d: [^ ]+ \\([^ ]+\\) " | ||
| 207 | ;; (and "$Id: " (+ (not " ") (any)) " " (+ (not " ") (any)) " ") | ||
| 208 | ;; | ||
| 209 | ;; "^;;\\s-*\n\\|^\n" | ||
| 210 | ;; (or (and (bol) ";;" (* (syntax-class whitespace)) "\n") | ||
| 211 | ;; (and (bol) "\n")) | ||
| 212 | ;; | ||
| 213 | ;; "\\\\\\\\\\[\\w+" | ||
| 214 | ;; (and "\\\\[" (+ (syntax-class word))) | ||
| 215 | ;; | ||
| 216 | ;; See ";;; Examples" in `peg-tests.el' for other examples. | ||
| 217 | ;; | ||
| 218 | ;;;; Rule argument and indirect calls: | ||
| 219 | ;; | ||
| 220 | ;; Rules can take arguments and those arguments can themselves be PEGs. | ||
| 221 | ;; For example: | ||
| 222 | ;; | ||
| 223 | ;; (define-peg-rule 2-or-more (peg) | ||
| 224 | ;; (funcall peg) | ||
| 225 | ;; (funcall peg) | ||
| 226 | ;; (* (funcall peg))) | ||
| 227 | ;; | ||
| 228 | ;; ... (peg-parse | ||
| 229 | ;; ... | ||
| 230 | ;; (2-or-more (peg foo)) | ||
| 231 | ;; ... | ||
| 232 | ;; (2-or-more (peg bar)) | ||
| 233 | ;; ...) | ||
| 234 | ;; | ||
| 235 | ;;;; References: | ||
| 236 | ;; | ||
| 237 | ;; [Ford] Bryan Ford. Parsing Expression Grammars: a Recognition-Based | ||
| 238 | ;; Syntactic Foundation. In POPL'04: Proceedings of the 31st ACM | ||
| 239 | ;; SIGPLAN-SIGACT symposium on Principles of Programming Languages, | ||
| 240 | ;; pages 111-122, New York, NY, USA, 2004. ACM Press. | ||
| 241 | ;; http://pdos.csail.mit.edu/~baford/packrat/ | ||
| 242 | ;; | ||
| 243 | ;; [Baker] Baker, Henry G. "Pragmatic Parsing in Common Lisp". ACM Lisp | ||
| 244 | ;; Pointers 4(2), April--June 1991, pp. 3--15. | ||
| 245 | ;; http://home.pipeline.com/~hbaker1/Prag-Parse.html | ||
| 246 | ;; | ||
| 247 | ;; Roman Redziejowski does good PEG related research | ||
| 248 | ;; http://www.romanredz.se/pubs.htm | ||
| 249 | |||
| 250 | ;;;; Todo: | ||
| 251 | |||
| 252 | ;; - Fix the exponential blowup in `peg-translate-exp'. | ||
| 253 | ;; - Add a proper debug-spec for PEXs. | ||
| 254 | |||
| 255 | ;;; News: | ||
| 256 | |||
| 257 | ;; Since 1.0.1: | ||
| 258 | ;; - Use OClosures to represent PEG rules when available, and let cl-print | ||
| 259 | ;; display their source code. | ||
| 260 | ;; - New PEX form (with RULES PEX...). | ||
| 261 | ;; - Named rulesets. | ||
| 262 | ;; - You can pass arguments to rules. | ||
| 263 | ;; - New `funcall' rule to call rules indirectly (e.g. a peg you received | ||
| 264 | ;; as argument). | ||
| 265 | |||
| 266 | ;; Version 1.0: | ||
| 267 | ;; - New official entry points `peg` and `peg-run`. | ||
| 268 | |||
| 269 | ;;; Code: | ||
| 270 | |||
| 271 | (eval-when-compile (require 'cl-lib)) | ||
| 272 | |||
| 273 | (defvar peg--actions nil | ||
| 274 | "Actions collected along the current parse. | ||
| 275 | Used at runtime for backtracking. It's a list ((POS . THUNK)...). | ||
| 276 | Each THUNK is executed at the corresponding POS. Thunks are | ||
| 277 | executed in a postprocessing step, not during parsing.") | ||
| 278 | |||
| 279 | (defvar peg--errors nil | ||
| 280 | "Data keeping track of the rightmost parse failure location. | ||
| 281 | It's a pair (POSITION . EXPS ...). POSITION is the buffer position and | ||
| 282 | EXPS is a list of rules/expressions that failed.") | ||
| 283 | |||
| 284 | ;;;; Main entry points | ||
| 285 | |||
| 286 | (defmacro peg--when-fboundp (f &rest body) | ||
| 287 | (declare (indent 1) (debug (sexp body))) | ||
| 288 | (when (fboundp f) | ||
| 289 | (macroexp-progn body))) | ||
| 290 | |||
| 291 | (peg--when-fboundp oclosure-define | ||
| 292 | (oclosure-define peg-function | ||
| 293 | "Parsing function built from PEG rule." | ||
| 294 | pexs) | ||
| 295 | |||
| 296 | (cl-defmethod cl-print-object ((peg peg-function) stream) | ||
| 297 | (princ "#f<peg " stream) | ||
| 298 | (let ((args (help-function-arglist peg 'preserve-names))) | ||
| 299 | (if args | ||
| 300 | (prin1 args stream) | ||
| 301 | (princ "()" stream))) | ||
| 302 | (princ " " stream) | ||
| 303 | (prin1 (peg-function--pexs peg) stream) | ||
| 304 | (princ ">" stream))) | ||
| 305 | |||
| 306 | (defmacro peg--lambda (pexs args &rest body) | ||
| 307 | (declare (indent 2) | ||
| 308 | (debug (&define form lambda-list def-body))) | ||
| 309 | (if (fboundp 'oclosure-lambda) | ||
| 310 | `(oclosure-lambda (peg-function (pexs ,pexs)) ,args . ,body) | ||
| 311 | `(lambda ,args . ,body))) | ||
| 312 | |||
| 313 | ;; Sometimes (with-peg-rules ... (peg-run (peg ...))) is too | ||
| 314 | ;; longwinded for the task at hand, so `peg-parse' comes in handy. | ||
| 315 | (defmacro peg-parse (&rest pexs) | ||
| 316 | "Match PEXS at point. | ||
| 317 | PEXS is a sequence of PEG expressions, implicitly combined with `and'. | ||
| 318 | Returns STACK if the match succeed and signals an error on failure, | ||
| 319 | moving point along the way. | ||
| 320 | PEXS can also be a list of PEG rules, in which case the first rule is used." | ||
| 321 | (if (and (consp (car pexs)) | ||
| 322 | (symbolp (caar pexs)) | ||
| 323 | (not (ignore-errors (peg-normalize (car pexs))))) | ||
| 324 | ;; `pexs' is a list of rules: use the first rule as entry point. | ||
| 325 | `(with-peg-rules ,pexs (peg-run (peg ,(caar pexs)) #'peg-signal-failure)) | ||
| 326 | `(peg-run (peg ,@pexs) #'peg-signal-failure))) | ||
| 327 | |||
| 328 | (defmacro peg (&rest pexs) | ||
| 329 | "Return a PEG-matcher that matches PEXS." | ||
| 330 | (pcase (peg-normalize `(and . ,pexs)) | ||
| 331 | (`(call ,name) `#',(peg--rule-id name)) ;Optimize this case by η-reduction! | ||
| 332 | (exp `(peg--lambda ',pexs () ,(peg-translate-exp exp))))) | ||
| 333 | |||
| 334 | ;; There are several "infos we want to return" when parsing a given PEX: | ||
| 335 | ;; 1- We want to return the success/failure of the parse. | ||
| 336 | ;; 2- We want to return the data of the successful parse (the stack). | ||
| 337 | ;; 3- We want to return the diagnostic of the failures. | ||
| 338 | ;; 4- We want to perform the actions (upon parse success)! | ||
| 339 | ;; `peg-parse' used an error signal to encode the (1) boolean, which | ||
| 340 | ;; lets it return all the info conveniently but the error signal was sometimes | ||
| 341 | ;; inconvenient. Other times one wants to just know (1) maybe without even | ||
| 342 | ;; performing (4). | ||
| 343 | ;; `peg-run' lets you choose all that, and by default gives you | ||
| 344 | ;; (1) as a simple boolean, while also doing (2), and (4). | ||
| 345 | |||
| 346 | (defun peg-run (peg-matcher &optional failure-function success-function) | ||
| 347 | "Parse with PEG-MATCHER at point and run the success/failure function. | ||
| 348 | If a match was found, move to the end of the match and call SUCCESS-FUNCTION | ||
| 349 | with one argument: a function which will perform all the actions collected | ||
| 350 | during the parse and then return the resulting stack (or t if empty). | ||
| 351 | If no match was found, move to the (rightmost) point of parse failure and call | ||
| 352 | FAILURE-FUNCTION with one argument, which is a list of PEG expressions that | ||
| 353 | failed at this point. | ||
| 354 | SUCCESS-FUNCTION defaults to `funcall' and FAILURE-FUNCTION | ||
| 355 | defaults to `ignore'." | ||
| 356 | (let ((peg--actions '()) (peg--errors '(-1))) | ||
| 357 | (if (funcall peg-matcher) | ||
| 358 | ;; Found a parse: run the actions collected along the way. | ||
| 359 | (funcall (or success-function #'funcall) | ||
| 360 | (lambda () | ||
| 361 | (save-excursion (peg-postprocess peg--actions)))) | ||
| 362 | (goto-char (car peg--errors)) | ||
| 363 | (when failure-function | ||
| 364 | (funcall failure-function (peg-merge-errors (cdr peg--errors))))))) | ||
| 365 | |||
| 366 | (defmacro define-peg-rule (name args &rest pexs) | ||
| 367 | "Define PEG rule NAME as equivalent to PEXS. | ||
| 368 | The PEG expressions in PEXS are implicitly combined with the | ||
| 369 | sequencing `and' operator of PEG grammars." | ||
| 370 | (declare (indent 1)) | ||
| 371 | (let ((inline nil)) | ||
| 372 | (while (keywordp (car pexs)) | ||
| 373 | (pcase (pop pexs) | ||
| 374 | (:inline (setq inline (car pexs)))) | ||
| 375 | (setq pexs (cdr pexs))) | ||
| 376 | (let ((id (peg--rule-id name)) | ||
| 377 | (exp (peg-normalize `(and . ,pexs)))) | ||
| 378 | `(progn | ||
| 379 | (defalias ',id | ||
| 380 | (peg--lambda ',pexs ,args | ||
| 381 | ,(if inline | ||
| 382 | ;; Short-circuit to peg--translate in order to skip | ||
| 383 | ;; the extra failure-recording of `peg-translate-exp'. | ||
| 384 | ;; It also skips the cycle detection of | ||
| 385 | ;; `peg--translate-rule-body', which is not the main | ||
| 386 | ;; purpose but we can live with it. | ||
| 387 | (apply #'peg--translate exp) | ||
| 388 | (peg--translate-rule-body name exp)))) | ||
| 389 | (eval-and-compile | ||
| 390 | ;; FIXME: We shouldn't need this any more since the info is now | ||
| 391 | ;; stored in the function, but sadly we need to find a name's EXP | ||
| 392 | ;; during compilation (i.e. before the `defalias' is executed) | ||
| 393 | ;; as part of cycle-detection! | ||
| 394 | (put ',id 'peg--rule-definition ',exp) | ||
| 395 | ,@(when inline | ||
| 396 | ;; FIXME: Copied from `defsubst'. | ||
| 397 | `(;; Never native-compile defsubsts as we need the byte | ||
| 398 | ;; definition in `byte-compile-unfold-bcf' to perform the | ||
| 399 | ;; inlining (Bug#42664, Bug#43280, Bug#44209). | ||
| 400 | ,(byte-run--set-speed id nil -1) | ||
| 401 | (put ',id 'byte-optimizer #'byte-compile-inline-expand)))))))) | ||
| 402 | |||
| 403 | (defmacro define-peg-ruleset (name &rest rules) | ||
| 404 | "Define a set of PEG rules for later use, e.g., in `with-peg-rules'." | ||
| 405 | (declare (indent 1)) | ||
| 406 | (let ((defs ()) | ||
| 407 | (aliases ())) | ||
| 408 | (dolist (rule rules) | ||
| 409 | (let* ((rname (car rule)) | ||
| 410 | (full-rname (format "%s %s" name rname))) | ||
| 411 | (push `(define-peg-rule ,full-rname . ,(cdr rule)) defs) | ||
| 412 | (push `(,(peg--rule-id rname) #',(peg--rule-id full-rname)) aliases))) | ||
| 413 | `(cl-flet ,aliases | ||
| 414 | ,@defs | ||
| 415 | (eval-and-compile (put ',name 'peg--rules ',aliases))))) | ||
| 416 | |||
| 417 | (defmacro with-peg-rules (rules &rest body) | ||
| 418 | "Make PEG rules RULES available within the scope of BODY. | ||
| 419 | RULES is a list of rules of the form (NAME . PEXS), where PEXS is a sequence | ||
| 420 | of PEG expressions, implicitly combined with `and'. | ||
| 421 | RULES can also contain symbols in which case these must name | ||
| 422 | rulesets defined previously with `define-peg-ruleset'." | ||
| 423 | (declare (indent 1) (debug (sexp form))) ;FIXME: `sexp' is not good enough! | ||
| 424 | (let* ((rulesets nil) | ||
| 425 | (rules | ||
| 426 | ;; First, macroexpand the rules. | ||
| 427 | (delq nil | ||
| 428 | (mapcar (lambda (rule) | ||
| 429 | (if (symbolp rule) | ||
| 430 | (progn (push rule rulesets) nil) | ||
| 431 | (cons (car rule) (peg-normalize `(and . ,(cdr rule)))))) | ||
| 432 | rules))) | ||
| 433 | (ctx (assq :peg-rules macroexpand-all-environment))) | ||
| 434 | (macroexpand-all | ||
| 435 | `(cl-labels | ||
| 436 | ,(mapcar (lambda (rule) | ||
| 437 | ;; FIXME: Use `peg--lambda' as well. | ||
| 438 | `(,(peg--rule-id (car rule)) | ||
| 439 | () | ||
| 440 | ,(peg--translate-rule-body (car rule) (cdr rule)))) | ||
| 441 | rules) | ||
| 442 | ,@body) | ||
| 443 | `((:peg-rules ,@(append rules (cdr ctx))) | ||
| 444 | ,@macroexpand-all-environment)))) | ||
| 445 | |||
| 446 | ;;;;; Old entry points | ||
| 447 | |||
| 448 | (defmacro peg-parse-exp (exp) | ||
| 449 | "Match the parsing expression EXP at point." | ||
| 450 | (declare (obsolete peg-parse "peg-0.9")) | ||
| 451 | `(peg-run (peg ,exp))) | ||
| 452 | |||
| 453 | ;;;; The actual implementation | ||
| 454 | |||
| 455 | (defun peg--lookup-rule (name) | ||
| 456 | (or (cdr (assq name (cdr (assq :peg-rules macroexpand-all-environment)))) | ||
| 457 | ;; With `peg-function' objects, we can recover the PEG from which it was | ||
| 458 | ;; defined, but this info is not yet available at compile-time. :-( | ||
| 459 | ;;(let ((id (peg--rule-id name))) | ||
| 460 | ;; (peg-function--pexs (symbol-function id))) | ||
| 461 | (get (peg--rule-id name) 'peg--rule-definition))) | ||
| 462 | |||
| 463 | (defun peg--rule-id (name) | ||
| 464 | (intern (format "peg-rule %s" name))) | ||
| 465 | |||
| 466 | (define-error 'peg-search-failed "Parse error at %d (expecting %S)") | ||
| 467 | |||
| 468 | (defun peg-signal-failure (failures) | ||
| 469 | (signal 'peg-search-failed (list (point) failures))) | ||
| 470 | |||
| 471 | (defun peg-parse-at-point (peg-matcher) | ||
| 472 | "Parse text at point according to the PEG rule PEG-MATCHER." | ||
| 473 | (declare (obsolete peg-run "peg-1.0")) | ||
| 474 | (peg-run peg-matcher | ||
| 475 | #'peg-signal-failure | ||
| 476 | (lambda (f) (let ((r (funcall f))) (if (listp r) r))))) | ||
| 477 | |||
| 478 | ;; Internally we use a regularized syntax, e.g. we only have binary OR | ||
| 479 | ;; nodes. Regularized nodes are lists of the form (OP ARGS...). | ||
| 480 | (cl-defgeneric peg-normalize (exp) | ||
| 481 | "Return a \"normalized\" form of EXP." | ||
| 482 | (error "Invalid parsing expression: %S" exp)) | ||
| 483 | |||
| 484 | (cl-defmethod peg-normalize ((exp string)) | ||
| 485 | (let ((len (length exp))) | ||
| 486 | (cond ((zerop len) '(guard t)) | ||
| 487 | ((= len 1) `(char ,(aref exp 0))) | ||
| 488 | (t `(str ,exp))))) | ||
| 489 | |||
| 490 | (cl-defmethod peg-normalize ((exp symbol)) | ||
| 491 | ;; (peg--lookup-rule exp) | ||
| 492 | `(call ,exp)) | ||
| 493 | |||
| 494 | (cl-defmethod peg-normalize ((exp vector)) | ||
| 495 | (peg-normalize `(set . ,(append exp '())))) | ||
| 496 | |||
| 497 | (cl-defmethod peg-normalize ((exp cons)) | ||
| 498 | (apply #'peg--macroexpand exp)) | ||
| 499 | |||
| 500 | (defconst peg-leaf-types '(any call action char range str set | ||
| 501 | guard syntax-class = funcall)) | ||
| 502 | |||
| 503 | (cl-defgeneric peg--macroexpand (head &rest args) | ||
| 504 | (cond | ||
| 505 | ((memq head peg-leaf-types) (cons head args)) | ||
| 506 | (t `(call ,head ,@args)))) | ||
| 507 | |||
| 508 | (cl-defmethod peg--macroexpand ((_ (eql or)) &rest args) | ||
| 509 | (cond ((null args) '(guard nil)) | ||
| 510 | ((null (cdr args)) (peg-normalize (car args))) | ||
| 511 | (t `(or ,(peg-normalize (car args)) | ||
| 512 | ,(peg-normalize `(or . ,(cdr args))))))) | ||
| 513 | |||
| 514 | (cl-defmethod peg--macroexpand ((_ (eql and)) &rest args) | ||
| 515 | (cond ((null args) '(guard t)) | ||
| 516 | ((null (cdr args)) (peg-normalize (car args))) | ||
| 517 | (t `(and ,(peg-normalize (car args)) | ||
| 518 | ,(peg-normalize `(and . ,(cdr args))))))) | ||
| 519 | |||
| 520 | (cl-defmethod peg--macroexpand ((_ (eql *)) &rest args) | ||
| 521 | `(* ,(peg-normalize `(and . ,args)))) | ||
| 522 | |||
| 523 | ;; FIXME: this duplicates code; could use some loop to avoid that | ||
| 524 | (cl-defmethod peg--macroexpand ((_ (eql +)) &rest args) | ||
| 525 | (let ((e (peg-normalize `(and . ,args)))) | ||
| 526 | `(and ,e (* ,e)))) | ||
| 527 | |||
| 528 | (cl-defmethod peg--macroexpand ((_ (eql opt)) &rest args) | ||
| 529 | (let ((e (peg-normalize `(and . ,args)))) | ||
| 530 | `(or ,e (guard t)))) | ||
| 531 | |||
| 532 | (cl-defmethod peg--macroexpand ((_ (eql if)) &rest args) | ||
| 533 | `(if ,(peg-normalize `(and . ,args)))) | ||
| 534 | |||
| 535 | (cl-defmethod peg--macroexpand ((_ (eql not)) &rest args) | ||
| 536 | `(not ,(peg-normalize `(and . ,args)))) | ||
| 537 | |||
| 538 | (cl-defmethod peg--macroexpand ((_ (eql \`)) form) | ||
| 539 | (peg-normalize `(stack-action ,form))) | ||
| 540 | |||
| 541 | (cl-defmethod peg--macroexpand ((_ (eql stack-action)) form) | ||
| 542 | (unless (member '-- form) | ||
| 543 | (error "Malformed stack action: %S" form)) | ||
| 544 | (let ((args (cdr (member '-- (reverse form)))) | ||
| 545 | (values (cdr (member '-- form)))) | ||
| 546 | (let ((form `(let ,(mapcar (lambda (var) `(,var (pop peg--stack))) args) | ||
| 547 | ,@(mapcar (lambda (val) `(push ,val peg--stack)) values)))) | ||
| 548 | `(action ,form)))) | ||
| 549 | |||
| 550 | (defvar peg-char-classes | ||
| 551 | '(ascii alnum alpha blank cntrl digit graph lower multibyte nonascii print | ||
| 552 | punct space unibyte upper word xdigit)) | ||
| 553 | |||
| 554 | (cl-defmethod peg--macroexpand ((_ (eql set)) &rest specs) | ||
| 555 | (cond ((null specs) '(guard nil)) | ||
| 556 | ((and (null (cdr specs)) | ||
| 557 | (let ((range (peg-range-designator (car specs)))) | ||
| 558 | (and range `(range ,(car range) ,(cdr range)))))) | ||
| 559 | (t | ||
| 560 | (let ((chars '()) (ranges '()) (classes '())) | ||
| 561 | (while specs | ||
| 562 | (let* ((spec (pop specs)) | ||
| 563 | (range (peg-range-designator spec))) | ||
| 564 | (cond (range | ||
| 565 | (push range ranges)) | ||
| 566 | ((peg-characterp spec) | ||
| 567 | (push spec chars)) | ||
| 568 | ((stringp spec) | ||
| 569 | (setq chars (append (reverse (append spec ())) chars))) | ||
| 570 | ((memq spec peg-char-classes) | ||
| 571 | (push spec classes)) | ||
| 572 | (t (error "Invalid set specifier: %S" spec))))) | ||
| 573 | (setq ranges (reverse ranges)) | ||
| 574 | (setq chars (delete-dups (reverse chars))) | ||
| 575 | (setq classes (reverse classes)) | ||
| 576 | (cond ((and (null ranges) | ||
| 577 | (null classes) | ||
| 578 | (cond ((null chars) '(guard nil)) | ||
| 579 | ((null (cdr chars)) `(char ,(car chars)))))) | ||
| 580 | (t `(set ,ranges ,chars ,classes))))))) | ||
| 581 | |||
| 582 | (defun peg-range-designator (x) | ||
| 583 | (and (symbolp x) | ||
| 584 | (let ((str (symbol-name x))) | ||
| 585 | (and (= (length str) 3) | ||
| 586 | (eq (aref str 1) ?-) | ||
| 587 | (< (aref str 0) (aref str 2)) | ||
| 588 | (cons (aref str 0) (aref str 2)))))) | ||
| 589 | |||
| 590 | ;; characterp is new in Emacs 23. | ||
| 591 | (defun peg-characterp (x) | ||
| 592 | (if (fboundp 'characterp) | ||
| 593 | (characterp x) | ||
| 594 | (integerp x))) | ||
| 595 | |||
| 596 | (cl-defmethod peg--macroexpand ((_ (eql list)) &rest args) | ||
| 597 | (peg-normalize | ||
| 598 | (let ((marker (make-symbol "magic-marker"))) | ||
| 599 | `(and (stack-action (-- ',marker)) | ||
| 600 | ,@args | ||
| 601 | (stack-action (-- | ||
| 602 | (let ((l '())) | ||
| 603 | (while | ||
| 604 | (let ((e (pop peg--stack))) | ||
| 605 | (cond ((eq e ',marker) nil) | ||
| 606 | ((null peg--stack) | ||
| 607 | (error "No marker on stack")) | ||
| 608 | (t (push e l) t)))) | ||
| 609 | l))))))) | ||
| 610 | |||
| 611 | (cl-defmethod peg--macroexpand ((_ (eql substring)) &rest args) | ||
| 612 | (peg-normalize | ||
| 613 | `(and `(-- (point)) | ||
| 614 | ,@args | ||
| 615 | `(start -- (buffer-substring-no-properties start (point)))))) | ||
| 616 | |||
| 617 | (cl-defmethod peg--macroexpand ((_ (eql region)) &rest args) | ||
| 618 | (peg-normalize | ||
| 619 | `(and `(-- (point)) | ||
| 620 | ,@args | ||
| 621 | `(-- (point))))) | ||
| 622 | |||
| 623 | (cl-defmethod peg--macroexpand ((_ (eql replace)) pe replacement) | ||
| 624 | (peg-normalize | ||
| 625 | `(and (stack-action (-- (point))) | ||
| 626 | ,pe | ||
| 627 | (stack-action (start -- (progn | ||
| 628 | (delete-region start (point)) | ||
| 629 | (insert-before-markers ,replacement)))) | ||
| 630 | (stack-action (_ --))))) | ||
| 631 | |||
| 632 | (cl-defmethod peg--macroexpand ((_ (eql quote)) _form) | ||
| 633 | (error "quote is reserved for future use")) | ||
| 634 | |||
| 635 | (cl-defgeneric peg--translate (head &rest args) | ||
| 636 | (error "No translator for: %S" (cons head args))) | ||
| 637 | |||
| 638 | (defun peg--translate-rule-body (name exp) | ||
| 639 | (let ((msg (condition-case err | ||
| 640 | (progn (peg-detect-cycles exp (list name)) nil) | ||
| 641 | (error (error-message-string err)))) | ||
| 642 | (code (peg-translate-exp exp))) | ||
| 643 | (cond | ||
| 644 | ((null msg) code) | ||
| 645 | ((fboundp 'macroexp--warn-and-return) | ||
| 646 | (macroexp--warn-and-return msg code)) | ||
| 647 | (t | ||
| 648 | (message "%s" msg) | ||
| 649 | code)))) | ||
| 650 | |||
| 651 | ;; This is the main translation function. | ||
| 652 | (defun peg-translate-exp (exp) | ||
| 653 | "Return the ELisp code to match the PE EXP." | ||
| 654 | ;; FIXME: This expansion basically duplicates `exp' in the output, which is | ||
| 655 | ;; a serious problem because it's done recursively, so it makes the output | ||
| 656 | ;; code's size exponentially larger than the input! | ||
| 657 | `(or ,(apply #'peg--translate exp) | ||
| 658 | (peg--record-failure ',exp))) ; for error reporting | ||
| 659 | |||
| 660 | (define-obsolete-function-alias 'peg-record-failure | ||
| 661 | #'peg--record-failure "peg-1.0") | ||
| 662 | (defun peg--record-failure (exp) | ||
| 663 | (cond ((= (point) (car peg--errors)) | ||
| 664 | (setcdr peg--errors (cons exp (cdr peg--errors)))) | ||
| 665 | ((> (point) (car peg--errors)) | ||
| 666 | (setq peg--errors (list (point) exp)))) | ||
| 667 | nil) | ||
| 668 | |||
| 669 | (cl-defmethod peg--translate ((_ (eql and)) e1 e2) | ||
| 670 | `(and ,(peg-translate-exp e1) | ||
| 671 | ,(peg-translate-exp e2))) | ||
| 672 | |||
| 673 | ;; Choicepoints are used for backtracking. At a choicepoint we save | ||
| 674 | ;; enough state, so that we can continue from there if needed. | ||
| 675 | (defun peg--choicepoint-moved-p (choicepoint) | ||
| 676 | `(/= ,(car choicepoint) (point))) | ||
| 677 | |||
| 678 | (defun peg--choicepoint-restore (choicepoint) | ||
| 679 | `(progn | ||
| 680 | (goto-char ,(car choicepoint)) | ||
| 681 | (setq peg--actions ,(cdr choicepoint)))) | ||
| 682 | |||
| 683 | (defmacro peg--with-choicepoint (var &rest body) | ||
| 684 | (declare (indent 1) (debug (symbolp form))) | ||
| 685 | `(let ((,var (cons (make-symbol "point") (make-symbol "actions")))) | ||
| 686 | `(let ((,(car ,var) (point)) | ||
| 687 | (,(cdr ,var) peg--actions)) | ||
| 688 | ,@(list ,@body)))) | ||
| 689 | |||
| 690 | (cl-defmethod peg--translate ((_ (eql or)) e1 e2) | ||
| 691 | (peg--with-choicepoint cp | ||
| 692 | `(or ,(peg-translate-exp e1) | ||
| 693 | (,@(peg--choicepoint-restore cp) | ||
| 694 | ,(peg-translate-exp e2))))) | ||
| 695 | |||
| 696 | (cl-defmethod peg--translate ((_ (eql with)) rules &rest exps) | ||
| 697 | `(with-peg-rules ,rules ,(peg--translate `(and . ,exps)))) | ||
| 698 | |||
| 699 | (cl-defmethod peg--translate ((_ (eql guard)) exp) exp) | ||
| 700 | |||
| 701 | (defvar peg-syntax-classes | ||
| 702 | '((whitespace ?-) (word ?w) (symbol ?s) (punctuation ?.) | ||
| 703 | (open ?\() (close ?\)) (string ?\") (escape ?\\) (charquote ?/) | ||
| 704 | (math ?$) (prefix ?') (comment ?<) (endcomment ?>) | ||
| 705 | (comment-fence ?!) (string-fence ?|))) | ||
| 706 | |||
| 707 | (cl-defmethod peg--translate ((_ (eql syntax-class)) class) | ||
| 708 | (let ((probe (assoc class peg-syntax-classes))) | ||
| 709 | (cond (probe `(when (looking-at ,(format "\\s%c" (cadr probe))) | ||
| 710 | (forward-char) | ||
| 711 | t)) | ||
| 712 | (t (error "Invalid syntax class: %S\nMust be one of: %s" class | ||
| 713 | (mapcar #'car peg-syntax-classes)))))) | ||
| 714 | |||
| 715 | (cl-defmethod peg--translate ((_ (eql =)) string) | ||
| 716 | `(let ((str ,string)) | ||
| 717 | (when (zerop (length str)) | ||
| 718 | (error "Empty strings not allowed for =")) | ||
| 719 | (search-forward str (+ (point) (length str)) t))) | ||
| 720 | |||
| 721 | (cl-defmethod peg--translate ((_ (eql *)) e) | ||
| 722 | `(progn (while ,(peg--with-choicepoint cp | ||
| 723 | `(if ,(peg-translate-exp e) | ||
| 724 | ;; Just as regexps do for the `*' operator, | ||
| 725 | ;; we allow the body of `*' loops to match | ||
| 726 | ;; the empty string, but we don't repeat the loop if | ||
| 727 | ;; we haven't moved, to avoid inf-loops. | ||
| 728 | ,(peg--choicepoint-moved-p cp) | ||
| 729 | ,(peg--choicepoint-restore cp) | ||
| 730 | nil))) | ||
| 731 | t)) | ||
| 732 | |||
| 733 | (cl-defmethod peg--translate ((_ (eql if)) e) | ||
| 734 | (peg--with-choicepoint cp | ||
| 735 | `(when ,(peg-translate-exp e) | ||
| 736 | ,(peg--choicepoint-restore cp) | ||
| 737 | t))) | ||
| 738 | |||
| 739 | (cl-defmethod peg--translate ((_ (eql not)) e) | ||
| 740 | (peg--with-choicepoint cp | ||
| 741 | `(unless ,(peg-translate-exp e) | ||
| 742 | ,(peg--choicepoint-restore cp) | ||
| 743 | t))) | ||
| 744 | |||
| 745 | (cl-defmethod peg--translate ((_ (eql any)) ) | ||
| 746 | '(when (not (eobp)) | ||
| 747 | (forward-char) | ||
| 748 | t)) | ||
| 749 | |||
| 750 | (cl-defmethod peg--translate ((_ (eql char)) c) | ||
| 751 | `(when (eq (char-after) ',c) | ||
| 752 | (forward-char) | ||
| 753 | t)) | ||
| 754 | |||
| 755 | (cl-defmethod peg--translate ((_ (eql set)) ranges chars classes) | ||
| 756 | `(when (looking-at ',(peg-make-charset-regexp ranges chars classes)) | ||
| 757 | (forward-char) | ||
| 758 | t)) | ||
| 759 | |||
| 760 | (defun peg-make-charset-regexp (ranges chars classes) | ||
| 761 | (when (and (not ranges) (not classes) (<= (length chars) 1)) | ||
| 762 | (error "Bug")) | ||
| 763 | (let ((rbracket (member ?\] chars)) | ||
| 764 | (minus (member ?- chars)) | ||
| 765 | (hat (member ?^ chars))) | ||
| 766 | (dolist (c '(?\] ?- ?^)) | ||
| 767 | (setq chars (remove c chars))) | ||
| 768 | (format "[%s%s%s%s%s%s]" | ||
| 769 | (if rbracket "]" "") | ||
| 770 | (if minus "-" "") | ||
| 771 | (mapconcat (lambda (x) (format "%c-%c" (car x) (cdr x))) ranges "") | ||
| 772 | (mapconcat (lambda (c) (format "[:%s:]" c)) classes "") | ||
| 773 | (mapconcat (lambda (c) (format "%c" c)) chars "") | ||
| 774 | (if hat "^" "")))) | ||
| 775 | |||
| 776 | (cl-defmethod peg--translate ((_ (eql range)) from to) | ||
| 777 | `(when (and (char-after) | ||
| 778 | (<= ',from (char-after)) | ||
| 779 | (<= (char-after) ',to)) | ||
| 780 | (forward-char) | ||
| 781 | t)) | ||
| 782 | |||
| 783 | (cl-defmethod peg--translate ((_ (eql str)) str) | ||
| 784 | `(when (looking-at ',(regexp-quote str)) | ||
| 785 | (goto-char (match-end 0)) | ||
| 786 | t)) | ||
| 787 | |||
| 788 | (cl-defmethod peg--translate ((_ (eql call)) name &rest args) | ||
| 789 | `(,(peg--rule-id name) ,@args)) | ||
| 790 | |||
| 791 | (cl-defmethod peg--translate ((_ (eql funcall)) exp &rest args) | ||
| 792 | `(funcall ,exp ,@args)) | ||
| 793 | |||
| 794 | (cl-defmethod peg--translate ((_ (eql action)) form) | ||
| 795 | `(progn | ||
| 796 | (push (cons (point) (lambda () ,form)) peg--actions) | ||
| 797 | t)) | ||
| 798 | |||
| 799 | (defvar peg--stack nil) | ||
| 800 | (defun peg-postprocess (actions) | ||
| 801 | "Execute \"actions\"." | ||
| 802 | (let ((peg--stack '()) | ||
| 803 | (forw-actions ())) | ||
| 804 | (pcase-dolist (`(,pos . ,thunk) actions) | ||
| 805 | (push (cons (copy-marker pos) thunk) forw-actions)) | ||
| 806 | (pcase-dolist (`(,pos . ,thunk) forw-actions) | ||
| 807 | (goto-char pos) | ||
| 808 | (funcall thunk)) | ||
| 809 | (or peg--stack t))) | ||
| 810 | |||
| 811 | ;; Left recursion is presumably a common mistake when using PEGs. | ||
| 812 | ;; Here we try to detect such mistakes. Essentially we traverse the | ||
| 813 | ;; graph as long as we can without consuming input. When we find a | ||
| 814 | ;; recursive call we signal an error. | ||
| 815 | |||
| 816 | (defun peg-detect-cycles (exp path) | ||
| 817 | "Signal an error on a cycle. | ||
| 818 | Otherwise traverse EXP recursively and return T if EXP can match | ||
| 819 | without consuming input. Return nil if EXP definitely consumes | ||
| 820 | input. PATH is the list of rules that we have visited so far." | ||
| 821 | (apply #'peg--detect-cycles path exp)) | ||
| 822 | |||
| 823 | (cl-defgeneric peg--detect-cycles (head _path &rest args) | ||
| 824 | (error "No detect-cycle method for: %S" (cons head args))) | ||
| 825 | |||
| 826 | (cl-defmethod peg--detect-cycles (path (_ (eql call)) name) | ||
| 827 | (if (member name path) | ||
| 828 | (error "Possible left recursion: %s" | ||
| 829 | (mapconcat (lambda (x) (format "%s" x)) | ||
| 830 | (reverse (cons name path)) " -> ")) | ||
| 831 | (let ((exp (peg--lookup-rule name))) | ||
| 832 | (if (null exp) | ||
| 833 | ;; If there's no rule by that name, either we'll fail at | ||
| 834 | ;; run-time or it will be defined later. In any case, at this | ||
| 835 | ;; point there's no evidence of a cycle, and if a cycle appears | ||
| 836 | ;; later we'll hopefully catch it when the rule gets defined. | ||
| 837 | ;; FIXME: In practice, if `name' is part of the cycle, we will | ||
| 838 | ;; indeed detect it when it gets defined, but OTOH if `name' | ||
| 839 | ;; is not part of a cycle but it *enables* a cycle because | ||
| 840 | ;; it matches the empty string (i.e. we should have returned t | ||
| 841 | ;; here), then we may not catch the problem at all :-( | ||
| 842 | nil | ||
| 843 | (peg-detect-cycles exp (cons name path)))))) | ||
| 844 | |||
| 845 | (cl-defmethod peg--detect-cycles (path (_ (eql and)) e1 e2) | ||
| 846 | (and (peg-detect-cycles e1 path) | ||
| 847 | (peg-detect-cycles e2 path))) | ||
| 848 | |||
| 849 | (cl-defmethod peg--detect-cycles (path (_ (eql or)) e1 e2) | ||
| 850 | (or (peg-detect-cycles e1 path) | ||
| 851 | (peg-detect-cycles e2 path))) | ||
| 852 | |||
| 853 | (cl-defmethod peg--detect-cycles (path (_ (eql *)) e) | ||
| 854 | (peg-detect-cycles e path) | ||
| 855 | t) | ||
| 856 | |||
| 857 | (cl-defmethod peg--detect-cycles (path (_ (eql if)) e) | ||
| 858 | (peg-unary-nullable e path)) | ||
| 859 | (cl-defmethod peg--detect-cycles (path (_ (eql not)) e) | ||
| 860 | (peg-unary-nullable e path)) | ||
| 861 | |||
| 862 | (defun peg-unary-nullable (exp path) | ||
| 863 | (peg-detect-cycles exp path) | ||
| 864 | t) | ||
| 865 | |||
| 866 | (cl-defmethod peg--detect-cycles (_path (_ (eql any))) nil) | ||
| 867 | (cl-defmethod peg--detect-cycles (_path (_ (eql char)) _c) nil) | ||
| 868 | (cl-defmethod peg--detect-cycles (_path (_ (eql set)) _r _c _k) nil) | ||
| 869 | (cl-defmethod peg--detect-cycles (_path (_ (eql range)) _c1 _c2) nil) | ||
| 870 | (cl-defmethod peg--detect-cycles (_path (_ (eql str)) s) (equal s "")) | ||
| 871 | (cl-defmethod peg--detect-cycles (_path (_ (eql guard)) _e) t) | ||
| 872 | (cl-defmethod peg--detect-cycles (_path (_ (eql =)) _s) nil) | ||
| 873 | (cl-defmethod peg--detect-cycles (_path (_ (eql syntax-class)) _n) nil) | ||
| 874 | (cl-defmethod peg--detect-cycles (_path (_ (eql action)) _form) t) | ||
| 875 | |||
| 876 | (defun peg-merge-errors (exps) | ||
| 877 | "Build a more readable error message out of failed expression." | ||
| 878 | (let ((merged '())) | ||
| 879 | (dolist (exp exps) | ||
| 880 | (setq merged (peg-merge-error exp merged))) | ||
| 881 | merged)) | ||
| 882 | |||
| 883 | (defun peg-merge-error (exp merged) | ||
| 884 | (apply #'peg--merge-error merged exp)) | ||
| 885 | |||
| 886 | (cl-defgeneric peg--merge-error (_merged head &rest args) | ||
| 887 | (error "No merge-error method for: %S" (cons head args))) | ||
| 888 | |||
| 889 | (cl-defmethod peg--merge-error (merged (_ (eql or)) e1 e2) | ||
| 890 | (peg-merge-error e2 (peg-merge-error e1 merged))) | ||
| 891 | |||
| 892 | (cl-defmethod peg--merge-error (merged (_ (eql and)) e1 _e2) | ||
| 893 | ;; FIXME: Why is `e2' not used? | ||
| 894 | (peg-merge-error e1 merged)) | ||
| 895 | |||
| 896 | (cl-defmethod peg--merge-error (merged (_ (eql str)) str) | ||
| 897 | ;;(add-to-list 'merged str) | ||
| 898 | (cl-adjoin str merged :test #'equal)) | ||
| 899 | |||
| 900 | (cl-defmethod peg--merge-error (merged (_ (eql call)) rule) | ||
| 901 | ;; (add-to-list 'merged rule) | ||
| 902 | (cl-adjoin rule merged :test #'equal)) | ||
| 903 | |||
| 904 | (cl-defmethod peg--merge-error (merged (_ (eql char)) char) | ||
| 905 | ;; (add-to-list 'merged (string char)) | ||
| 906 | (cl-adjoin (string char) merged :test #'equal)) | ||
| 907 | |||
| 908 | (cl-defmethod peg--merge-error (merged (_ (eql set)) r c k) | ||
| 909 | ;; (add-to-list 'merged (peg-make-charset-regexp r c k)) | ||
| 910 | (cl-adjoin (peg-make-charset-regexp r c k) merged :test #'equal)) | ||
| 911 | |||
| 912 | (cl-defmethod peg--merge-error (merged (_ (eql range)) from to) | ||
| 913 | ;; (add-to-list 'merged (format "[%c-%c]" from to)) | ||
| 914 | (cl-adjoin (format "[%c-%c]" from to) merged :test #'equal)) | ||
| 915 | |||
| 916 | (cl-defmethod peg--merge-error (merged (_ (eql *)) exp) | ||
| 917 | (peg-merge-error exp merged)) | ||
| 918 | |||
| 919 | (cl-defmethod peg--merge-error (merged (_ (eql any))) | ||
| 920 | ;; (add-to-list 'merged '(any)) | ||
| 921 | (cl-adjoin '(any) merged :test #'equal)) | ||
| 922 | |||
| 923 | (cl-defmethod peg--merge-error (merged (_ (eql not)) x) | ||
| 924 | ;; (add-to-list 'merged `(not ,x)) | ||
| 925 | (cl-adjoin `(not ,x) merged :test #'equal)) | ||
| 926 | |||
| 927 | (cl-defmethod peg--merge-error (merged (_ (eql action)) _action) merged) | ||
| 928 | (cl-defmethod peg--merge-error (merged (_ (eql null))) merged) | ||
| 929 | |||
| 930 | (provide 'peg) | ||
| 931 | (require 'peg) | ||
| 932 | |||
| 933 | (define-peg-rule null () :inline t (guard t)) | ||
| 934 | (define-peg-rule fail () :inline t (guard nil)) | ||
| 935 | (define-peg-rule bob () :inline t (guard (bobp))) | ||
| 936 | (define-peg-rule eob () :inline t (guard (eobp))) | ||
| 937 | (define-peg-rule bol () :inline t (guard (bolp))) | ||
| 938 | (define-peg-rule eol () :inline t (guard (eolp))) | ||
| 939 | (define-peg-rule bow () :inline t (guard (looking-at "\\<"))) | ||
| 940 | (define-peg-rule eow () :inline t (guard (looking-at "\\>"))) | ||
| 941 | (define-peg-rule bos () :inline t (guard (looking-at "\\_<"))) | ||
| 942 | (define-peg-rule eos () :inline t (guard (looking-at "\\_>"))) | ||
| 943 | |||
| 944 | ;;; peg.el ends here | ||
diff --git a/test/lisp/peg-tests.el b/test/lisp/peg-tests.el new file mode 100644 index 00000000000..864e09b4200 --- /dev/null +++ b/test/lisp/peg-tests.el | |||
| @@ -0,0 +1,367 @@ | |||
| 1 | ;;; peg-tests.el --- Tests of PEG parsers -*- lexical-binding: t; -*- | ||
| 2 | |||
| 3 | ;; Copyright (C) 2008-2023 Free Software Foundation, Inc. | ||
| 4 | |||
| 5 | ;; This program is free software; you can redistribute it and/or modify | ||
| 6 | ;; it under the terms of the GNU General Public License as published by | ||
| 7 | ;; the Free Software Foundation, either version 3 of the License, or | ||
| 8 | ;; (at your option) any later version. | ||
| 9 | |||
| 10 | ;; This program is distributed in the hope that it will be useful, | ||
| 11 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 12 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 13 | ;; GNU General Public License for more details. | ||
| 14 | |||
| 15 | ;; You should have received a copy of the GNU General Public License | ||
| 16 | ;; along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
| 17 | |||
| 18 | ;;; Commentary: | ||
| 19 | |||
| 20 | ;; Tests and examples, that used to live in peg.el wrapped inside an `eval'. | ||
| 21 | |||
| 22 | ;;; Code: | ||
| 23 | |||
| 24 | (require 'peg) | ||
| 25 | (require 'ert) | ||
| 26 | |||
| 27 | ;;; Tests: | ||
| 28 | |||
| 29 | (defmacro peg-parse-string (pex string &optional noerror) | ||
| 30 | "Parse STRING according to PEX. | ||
| 31 | If NOERROR is non-nil, push nil resp. t if the parse failed | ||
| 32 | resp. succeeded instead of signaling an error." | ||
| 33 | (let ((oldstyle (consp (car-safe pex)))) ;PEX is really a list of rules. | ||
| 34 | `(with-temp-buffer | ||
| 35 | (insert ,string) | ||
| 36 | (goto-char (point-min)) | ||
| 37 | ,(if oldstyle | ||
| 38 | `(with-peg-rules ,pex | ||
| 39 | (peg-run (peg ,(caar pex)) | ||
| 40 | ,(unless noerror '#'peg-signal-failure))) | ||
| 41 | `(peg-run (peg ,pex) | ||
| 42 | ,(unless noerror '#'peg-signal-failure)))))) | ||
| 43 | |||
| 44 | (define-peg-rule peg-test-natural () | ||
| 45 | [0-9] (* [0-9])) | ||
| 46 | |||
| 47 | (ert-deftest peg-test () | ||
| 48 | (should (peg-parse-string peg-test-natural "99 bottles" t)) | ||
| 49 | (should (peg-parse-string ((s "a")) "a" t)) | ||
| 50 | (should (not (peg-parse-string ((s "a")) "b" t))) | ||
| 51 | (should (peg-parse-string ((s (not "a"))) "b" t)) | ||
| 52 | (should (not (peg-parse-string ((s (not "a"))) "a" t))) | ||
| 53 | (should (peg-parse-string ((s (if "a"))) "a" t)) | ||
| 54 | (should (not (peg-parse-string ((s (if "a"))) "b" t))) | ||
| 55 | (should (peg-parse-string ((s "ab")) "ab" t)) | ||
| 56 | (should (not (peg-parse-string ((s "ab")) "ba" t))) | ||
| 57 | (should (not (peg-parse-string ((s "ab")) "a" t))) | ||
| 58 | (should (peg-parse-string ((s (range ?0 ?9))) "0" t)) | ||
| 59 | (should (not (peg-parse-string ((s (range ?0 ?9))) "a" t))) | ||
| 60 | (should (peg-parse-string ((s [0-9])) "0" t)) | ||
| 61 | (should (not (peg-parse-string ((s [0-9])) "a" t))) | ||
| 62 | (should (not (peg-parse-string ((s [0-9])) "" t))) | ||
| 63 | (should (peg-parse-string ((s (any))) "0" t)) | ||
| 64 | (should (not (peg-parse-string ((s (any))) "" t))) | ||
| 65 | (should (peg-parse-string ((s (eob))) "" t)) | ||
| 66 | (should (peg-parse-string ((s (not (eob)))) "a" t)) | ||
| 67 | (should (peg-parse-string ((s (or "a" "b"))) "a" t)) | ||
| 68 | (should (peg-parse-string ((s (or "a" "b"))) "b" t)) | ||
| 69 | (should (not (peg-parse-string ((s (or "a" "b"))) "c" t))) | ||
| 70 | (should (peg-parse-string (and "a" "b") "ab" t)) | ||
| 71 | (should (peg-parse-string ((s (and "a" "b"))) "abc" t)) | ||
| 72 | (should (not (peg-parse-string (and "a" "b") "ba" t))) | ||
| 73 | (should (peg-parse-string ((s (and "a" "b" "c"))) "abc" t)) | ||
| 74 | (should (peg-parse-string ((s (* "a") "b" (eob))) "b" t)) | ||
| 75 | (should (peg-parse-string ((s (* "a") "b" (eob))) "ab" t)) | ||
| 76 | (should (peg-parse-string ((s (* "a") "b" (eob))) "aaab" t)) | ||
| 77 | (should (not (peg-parse-string ((s (* "a") "b" (eob))) "abc" t))) | ||
| 78 | (should (peg-parse-string ((s "")) "abc" t)) | ||
| 79 | (should (peg-parse-string ((s "" (eob))) "" t)) | ||
| 80 | (should (peg-parse-string ((s (opt "a") "b")) "abc" t)) | ||
| 81 | (should (peg-parse-string ((s (opt "a") "b")) "bc" t)) | ||
| 82 | (should (not (peg-parse-string ((s (or))) "ab" t))) | ||
| 83 | (should (peg-parse-string ((s (and))) "ab" t)) | ||
| 84 | (should (peg-parse-string ((s (and))) "" t)) | ||
| 85 | (should (peg-parse-string ((s ["^"])) "^" t)) | ||
| 86 | (should (peg-parse-string ((s ["^a"])) "a" t)) | ||
| 87 | (should (peg-parse-string ["-"] "-" t)) | ||
| 88 | (should (peg-parse-string ((s ["]-"])) "]" t)) | ||
| 89 | (should (peg-parse-string ((s ["^]"])) "^" t)) | ||
| 90 | (should (peg-parse-string ((s [alpha])) "z" t)) | ||
| 91 | (should (not (peg-parse-string ((s [alpha])) "0" t))) | ||
| 92 | (should (not (peg-parse-string ((s [alpha])) "" t))) | ||
| 93 | (should (not (peg-parse-string ((s ["][:alpha:]"])) "z" t))) | ||
| 94 | (should (peg-parse-string ((s (bob))) "" t)) | ||
| 95 | (should (peg-parse-string ((s (bos))) "x" t)) | ||
| 96 | (should (not (peg-parse-string ((s (bos))) " x" t))) | ||
| 97 | (should (peg-parse-string ((s "x" (eos))) "x" t)) | ||
| 98 | (should (peg-parse-string ((s (syntax-class whitespace))) " " t)) | ||
| 99 | (should (peg-parse-string ((s (= "foo"))) "foo" t)) | ||
| 100 | (should (let ((f "foo")) (peg-parse-string ((s (= f))) "foo" t))) | ||
| 101 | (should (not (peg-parse-string ((s (= "foo"))) "xfoo" t))) | ||
| 102 | (should (equal (peg-parse-string ((s `(-- 1 2))) "") '(2 1))) | ||
| 103 | (should (equal (peg-parse-string ((s `(-- 1 2) `(a b -- a b))) "") '(2 1))) | ||
| 104 | (should (equal (peg-parse-string ((s (or (and (any) s) | ||
| 105 | (substring [0-9])))) | ||
| 106 | "ab0cd1ef2gh") | ||
| 107 | '("2"))) | ||
| 108 | ;; The PEG rule `other' doesn't exist, which will cause a byte-compiler | ||
| 109 | ;; warning, but not an error at run time because the rule is not actually | ||
| 110 | ;; used in this particular case. | ||
| 111 | (should (equal (peg-parse-string ((s (substring (or "a" other))) | ||
| 112 | ;; Unused left-recursive rule, should | ||
| 113 | ;; cause a byte-compiler warning. | ||
| 114 | (r (* "a") r)) | ||
| 115 | "af") | ||
| 116 | '("a"))) | ||
| 117 | (should (equal (peg-parse-string ((s (list x y)) | ||
| 118 | (x `(-- 1)) | ||
| 119 | (y `(-- 2))) | ||
| 120 | "") | ||
| 121 | '((1 2)))) | ||
| 122 | (should (equal (peg-parse-string ((s (list (* x))) | ||
| 123 | (x "" `(-- 'x))) | ||
| 124 | "xxx") | ||
| 125 | ;; The empty loop body should be matched once! | ||
| 126 | '((x)))) | ||
| 127 | (should (equal (peg-parse-string ((s (list (* x))) | ||
| 128 | (x "x" `(-- 'x))) | ||
| 129 | "xxx") | ||
| 130 | '((x x x)))) | ||
| 131 | (should (equal (peg-parse-string ((s (region (* x))) | ||
| 132 | (x "x" `(-- 'x))) | ||
| 133 | "xxx") | ||
| 134 | ;; FIXME: Since string positions start at 0, this should | ||
| 135 | ;; really be '(3 x x x 0) !! | ||
| 136 | '(4 x x x 1))) | ||
| 137 | (should (equal (peg-parse-string ((s (region (list (* x)))) | ||
| 138 | (x "x" `(-- 'x 'y))) | ||
| 139 | "xxx") | ||
| 140 | '(4 (x y x y x y) 1))) | ||
| 141 | (should (equal (with-temp-buffer | ||
| 142 | (save-excursion (insert "abcdef")) | ||
| 143 | (list | ||
| 144 | (peg-run (peg "a" | ||
| 145 | (replace "bc" "x") | ||
| 146 | (replace "de" "y") | ||
| 147 | "f")) | ||
| 148 | (buffer-string))) | ||
| 149 | '(t "axyf"))) | ||
| 150 | (with-temp-buffer | ||
| 151 | (insert "toro") | ||
| 152 | (goto-char (point-min)) | ||
| 153 | (should (peg-run (peg "to"))) | ||
| 154 | (should-not (peg-run (peg "to"))) | ||
| 155 | (should (peg-run (peg "ro"))) | ||
| 156 | (should (eobp))) | ||
| 157 | (with-temp-buffer | ||
| 158 | (insert " ") | ||
| 159 | (goto-char (point-min)) | ||
| 160 | (peg-run (peg (+ (syntax-class whitespace)))) | ||
| 161 | (should (eobp))) | ||
| 162 | ) | ||
| 163 | |||
| 164 | ;;; Examples: | ||
| 165 | |||
| 166 | ;; peg-ex-recognize-int recognizes integers. An integer begins with a | ||
| 167 | ;; optional sign, then follows one or more digits. Digits are all | ||
| 168 | ;; characters from 0 to 9. | ||
| 169 | ;; | ||
| 170 | ;; Notes: | ||
| 171 | ;; 1) "" matches the empty sequence, i.e. matches without consuming | ||
| 172 | ;; input. | ||
| 173 | ;; 2) [0-9] is the character range from 0 to 9. This can also be | ||
| 174 | ;; written as (range ?0 ?9). Note that 0-9 is a symbol. | ||
| 175 | (defun peg-ex-recognize-int () | ||
| 176 | (with-peg-rules ((number sign digit (* digit)) | ||
| 177 | (sign (or "+" "-" "")) | ||
| 178 | (digit [0-9])) | ||
| 179 | (peg-run (peg number)))) | ||
| 180 | |||
| 181 | ;; peg-ex-parse-int recognizes integers and computes the corresponding | ||
| 182 | ;; value. The grammar is the same as for `peg-ex-recognize-int' | ||
| 183 | ;; augmented with parsing actions. Unfortunaletly, the actions add | ||
| 184 | ;; quite a bit of clutter. | ||
| 185 | ;; | ||
| 186 | ;; The actions for the sign rule push -1 on the stack for a minus sign | ||
| 187 | ;; and 1 for plus or no sign. | ||
| 188 | ;; | ||
| 189 | ;; The action for the digit rule pushes the value for a single digit. | ||
| 190 | ;; | ||
| 191 | ;; The action `(a b -- (+ (* a 10) b)), takes two items from the stack | ||
| 192 | ;; and pushes the first digit times 10 added to the second digit. | ||
| 193 | ;; | ||
| 194 | ;; The action `(sign val -- (* sign val)), multiplies val with the | ||
| 195 | ;; sign (1 or -1). | ||
| 196 | (defun peg-ex-parse-int () | ||
| 197 | (with-peg-rules ((number sign digit (* digit | ||
| 198 | `(a b -- (+ (* a 10) b))) | ||
| 199 | `(sign val -- (* sign val))) | ||
| 200 | (sign (or (and "+" `(-- 1)) | ||
| 201 | (and "-" `(-- -1)) | ||
| 202 | (and "" `(-- 1)))) | ||
| 203 | (digit [0-9] `(-- (- (char-before) ?0)))) | ||
| 204 | (peg-run (peg number)))) | ||
| 205 | |||
| 206 | ;; Put point after the ) and press C-x C-e | ||
| 207 | ;; (peg-ex-parse-int)-234234 | ||
| 208 | |||
| 209 | ;; Parse arithmetic expressions and compute the result as side effect. | ||
| 210 | (defun peg-ex-arith () | ||
| 211 | (peg-parse | ||
| 212 | (expr _ sum eol) | ||
| 213 | (sum product (* (or (and "+" _ product `(a b -- (+ a b))) | ||
| 214 | (and "-" _ product `(a b -- (- a b)))))) | ||
| 215 | (product value (* (or (and "*" _ value `(a b -- (* a b))) | ||
| 216 | (and "/" _ value `(a b -- (/ a b)))))) | ||
| 217 | (value (or (and (substring number) `(string -- (string-to-number string))) | ||
| 218 | (and "(" _ sum ")" _))) | ||
| 219 | (number (+ [0-9]) _) | ||
| 220 | (_ (* [" \t"])) | ||
| 221 | (eol (or "\n" "\r\n" "\r")))) | ||
| 222 | |||
| 223 | ;; (peg-ex-arith) 1 + 2 * 3 * (4 + 5) | ||
| 224 | ;; (peg-ex-arith) 1 + 2 ^ 3 * (4 + 5) ; fails to parse | ||
| 225 | |||
| 226 | ;; Parse URI according to RFC 2396. | ||
| 227 | (defun peg-ex-uri () | ||
| 228 | (peg-parse | ||
| 229 | (URI-reference (or absoluteURI relativeURI) | ||
| 230 | (or (and "#" (substring fragment)) | ||
| 231 | `(-- nil)) | ||
| 232 | `(scheme user host port path query fragment -- | ||
| 233 | (list :scheme scheme :user user | ||
| 234 | :host host :port port | ||
| 235 | :path path :query query | ||
| 236 | :fragment fragment))) | ||
| 237 | (absoluteURI (substring scheme) ":" (or hier-part opaque-part)) | ||
| 238 | (hier-part ;(-- user host port path query) | ||
| 239 | (or net-path | ||
| 240 | (and `(-- nil nil nil) | ||
| 241 | abs-path)) | ||
| 242 | (or (and "?" (substring query)) | ||
| 243 | `(-- nil))) | ||
| 244 | (net-path "//" authority (or abs-path `(-- nil))) | ||
| 245 | (abs-path "/" path-segments) | ||
| 246 | (path-segments segment (list (* "/" segment)) `(s l -- (cons s l))) | ||
| 247 | (segment (substring (* pchar) (* ";" param))) | ||
| 248 | (param (* pchar)) | ||
| 249 | (pchar (or unreserved escaped [":@&=+$,"])) | ||
| 250 | (query (* uric)) | ||
| 251 | (fragment (* uric)) | ||
| 252 | (relativeURI (or net-path abs-path rel-path) (opt "?" query)) | ||
| 253 | (rel-path rel-segment (opt abs-path)) | ||
| 254 | (rel-segment (+ unreserved escaped [";@&=+$,"])) | ||
| 255 | (authority (or server reg-name)) | ||
| 256 | (server (or (and (or (and (substring userinfo) "@") | ||
| 257 | `(-- nil)) | ||
| 258 | hostport) | ||
| 259 | `(-- nil nil nil))) | ||
| 260 | (userinfo (* (or unreserved escaped [";:&=+$,"]))) | ||
| 261 | (hostport (substring host) (or (and ":" (substring port)) | ||
| 262 | `(-- nil))) | ||
| 263 | (host (or hostname ipv4address)) | ||
| 264 | (hostname (* domainlabel ".") toplabel (opt ".")) | ||
| 265 | (domainlabel alphanum | ||
| 266 | (opt (* (or alphanum "-") (if alphanum)) | ||
| 267 | alphanum)) | ||
| 268 | (toplabel alpha | ||
| 269 | (* (or alphanum "-") (if alphanum)) | ||
| 270 | alphanum) | ||
| 271 | (ipv4address (+ digit) "." (+ digit) "." (+ digit) "." (+ digit)) | ||
| 272 | (port (* digit)) | ||
| 273 | (scheme alpha (* (or alpha digit ["+-."]))) | ||
| 274 | (reg-name (or unreserved escaped ["$,;:@&=+"])) | ||
| 275 | (opaque-part uric-no-slash (* uric)) | ||
| 276 | (uric (or reserved unreserved escaped)) | ||
| 277 | (uric-no-slash (or unreserved escaped [";?:@&=+$,"])) | ||
| 278 | (reserved (set ";/?:@&=+$,")) | ||
| 279 | (unreserved (or alphanum mark)) | ||
| 280 | (escaped "%" hex hex) | ||
| 281 | (hex (or digit [A-F] [a-f])) | ||
| 282 | (mark (set "-_.!~*'()")) | ||
| 283 | (alphanum (or alpha digit)) | ||
| 284 | (alpha (or lowalpha upalpha)) | ||
| 285 | (lowalpha [a-z]) | ||
| 286 | (upalpha [A-Z]) | ||
| 287 | (digit [0-9]))) | ||
| 288 | |||
| 289 | ;; (peg-ex-uri)http://luser@www.foo.com:8080/bar/baz.html?x=1#foo | ||
| 290 | ;; (peg-ex-uri)file:/bar/baz.html?foo=df#x | ||
| 291 | |||
| 292 | ;; Split STRING where SEPARATOR occurs. | ||
| 293 | (defun peg-ex-split (string separator) | ||
| 294 | (peg-parse-string ((s (list (* (* sep) elt))) | ||
| 295 | (elt (substring (+ (not sep) (any)))) | ||
| 296 | (sep (= separator))) | ||
| 297 | string)) | ||
| 298 | |||
| 299 | ;; (peg-ex-split "-abc-cd-" "-") | ||
| 300 | |||
| 301 | ;; Parse a lisp style Sexp. | ||
| 302 | ;; [To keep the example short, ' and . are handled as ordinary symbol.] | ||
| 303 | (defun peg-ex-lisp () | ||
| 304 | (peg-parse | ||
| 305 | (sexp _ (or string list number symbol)) | ||
| 306 | (_ (* (or [" \n\t"] comment))) | ||
| 307 | (comment ";" (* (not (or "\n" (eob))) (any))) | ||
| 308 | (string "\"" (substring (* (not "\"") (any))) "\"") | ||
| 309 | (number (substring (opt (set "+-")) (+ digit)) | ||
| 310 | (if terminating) | ||
| 311 | `(string -- (string-to-number string))) | ||
| 312 | (symbol (substring (and symchar (* (not terminating) symchar))) | ||
| 313 | `(s -- (intern s))) | ||
| 314 | (symchar [a-z A-Z 0-9 "-;!#%&'*+,./:;<=>?@[]^_`{|}~"]) | ||
| 315 | (list "(" `(-- (cons nil nil)) `(hd -- hd hd) | ||
| 316 | (* sexp `(tl e -- (setcdr tl (list e)))) | ||
| 317 | _ ")" `(hd _tl -- (cdr hd))) | ||
| 318 | (digit [0-9]) | ||
| 319 | (terminating (or (set " \n\t();\"'") (eob))))) | ||
| 320 | |||
| 321 | ;; (peg-ex-lisp) | ||
| 322 | |||
| 323 | ;; We try to detect left recursion and report it as error. | ||
| 324 | (defun peg-ex-left-recursion () | ||
| 325 | (eval '(peg-parse (exp (or term | ||
| 326 | (and exp "+" exp))) | ||
| 327 | (term (or digit | ||
| 328 | (and term "*" term))) | ||
| 329 | (digit [0-9])) | ||
| 330 | t)) | ||
| 331 | |||
| 332 | (defun peg-ex-infinite-loop () | ||
| 333 | (eval '(peg-parse (exp (* (or "x" | ||
| 334 | "y" | ||
| 335 | (action (foo)))))) | ||
| 336 | t)) | ||
| 337 | |||
| 338 | ;; Some efficiency problems: | ||
| 339 | |||
| 340 | ;; Find the last digit in a string. | ||
| 341 | ;; Recursive definition with excessive stack usage. | ||
| 342 | (defun peg-ex-last-digit (string) | ||
| 343 | (peg-parse-string ((s (or (and (any) s) | ||
| 344 | (substring [0-9])))) | ||
| 345 | string)) | ||
| 346 | |||
| 347 | ;; (peg-ex-last-digit "ab0cd1ef2gh") | ||
| 348 | ;; (peg-ex-last-digit (make-string 50 ?-)) | ||
| 349 | ;; (peg-ex-last-digit (make-string 1000 ?-)) | ||
| 350 | |||
| 351 | ;; Find the last digit without recursion. Doesn't run out of stack, | ||
| 352 | ;; but probably still too inefficient for large inputs. | ||
| 353 | (defun peg-ex-last-digit2 (string) | ||
| 354 | (peg-parse-string ((s `(-- nil) | ||
| 355 | (+ (* (not digit) (any)) | ||
| 356 | (substring digit) | ||
| 357 | `(_d1 d2 -- d2))) | ||
| 358 | (digit [0-9])) | ||
| 359 | string)) | ||
| 360 | |||
| 361 | ;; (peg-ex-last-digit2 "ab0cd1ef2gh") | ||
| 362 | ;; (peg-ex-last-digit2 (concat (make-string 500000 ?-) "8a9b")) | ||
| 363 | ;; (peg-ex-last-digit2 (make-string 500000 ?-)) | ||
| 364 | ;; (peg-ex-last-digit2 (make-string 500000 ?5)) | ||
| 365 | |||
| 366 | (provide 'peg-tests) | ||
| 367 | ;;; peg-tests.el ends here | ||