diff options
| author | Mattias EngdegÄrd | 2019-09-25 14:29:50 -0700 |
|---|---|---|
| committer | Paul Eggert | 2019-09-25 14:29:50 -0700 |
| commit | 07367e5b95fe31f3d4e994b42b081075501b9b60 (patch) | |
| tree | 7d26251a300462083d971aa3aa9880cc23c423a1 | |
| parent | 2ed71227c626c6cfdc684948644ccf3d9eaeb15b (diff) | |
| download | emacs-07367e5b95fe31f3d4e994b42b081075501b9b60.tar.gz emacs-07367e5b95fe31f3d4e994b42b081075501b9b60.zip | |
Add rx extension mechanism
Add a built-in set of extension macros: `rx-define', `rx-let' and
`rx-let-eval'.
* lisp/emacs-lisp/rx.el (rx-constituents, rx-to-string): Doc updates.
(rx--builtin-symbols, rx--builtin-names, rx--local-definitions)
(rx--lookup-def, rx--substitute, rx--expand-template)
(rx--make-binding, rx--make-named-binding, rx--extend-local-defs)
(rx-let-eval, rx-let, rx-define): New.
(rx--translate-symbol, rx--translate-form): Use extensions if any.
(rx): Use local definitions.
* test/lisp/emacs-lisp/rx-tests.el (rx-let, rx-define)
(rx-to-string-define, rx-let-define, rx-let-eval): New.
* etc/NEWS (Changes in Specialized Modes and Packages):
* doc/lispref/searching.texi (Rx Notation, Rx Functions, Extending Rx):
Add node about rx extensions.
| -rw-r--r-- | doc/lispref/searching.texi | 157 | ||||
| -rw-r--r-- | etc/NEWS | 4 | ||||
| -rw-r--r-- | lisp/emacs-lisp/rx.el | 299 | ||||
| -rw-r--r-- | test/lisp/emacs-lisp/rx-tests.el | 98 |
4 files changed, 538 insertions, 20 deletions
diff --git a/doc/lispref/searching.texi b/doc/lispref/searching.texi index 2d94e5659de..a4b65334126 100644 --- a/doc/lispref/searching.texi +++ b/doc/lispref/searching.texi | |||
| @@ -1037,6 +1037,7 @@ customisation. | |||
| 1037 | @menu | 1037 | @menu |
| 1038 | * Rx Constructs:: Constructs valid in rx forms. | 1038 | * Rx Constructs:: Constructs valid in rx forms. |
| 1039 | * Rx Functions:: Functions and macros that use rx forms. | 1039 | * Rx Functions:: Functions and macros that use rx forms. |
| 1040 | * Extending Rx:: How to define your own rx forms. | ||
| 1040 | @end menu | 1041 | @end menu |
| 1041 | 1042 | ||
| 1042 | @node Rx Constructs | 1043 | @node Rx Constructs |
| @@ -1524,6 +1525,162 @@ must be string literals. | |||
| 1524 | 1525 | ||
| 1525 | The @code{pcase} macro can use @code{rx} expressions as patterns | 1526 | The @code{pcase} macro can use @code{rx} expressions as patterns |
| 1526 | directly; @pxref{rx in pcase}. | 1527 | directly; @pxref{rx in pcase}. |
| 1528 | |||
| 1529 | For mechanisms to add user-defined extensions to the @code{rx} | ||
| 1530 | notation, @pxref{Extending Rx}. | ||
| 1531 | |||
| 1532 | @node Extending Rx | ||
| 1533 | @subsubsection Defining new @code{rx} forms | ||
| 1534 | |||
| 1535 | The @code{rx} notation can be extended by defining new symbols and | ||
| 1536 | parametrised forms in terms of other @code{rx} expressions. This is | ||
| 1537 | handy for sharing parts between several regexps, and for making | ||
| 1538 | complex ones easier to build and understand by putting them together | ||
| 1539 | from smaller pieces. | ||
| 1540 | |||
| 1541 | For example, you could define @code{name} to mean | ||
| 1542 | @code{(one-or-more letter)}, and @code{(quoted @var{x})} to mean | ||
| 1543 | @code{(seq ?' @var{x} ?')} for any @var{x}. These forms could then be | ||
| 1544 | used in @code{rx} expressions like any other: @code{(rx (quoted name))} | ||
| 1545 | would match a nonempty sequence of letters inside single quotes. | ||
| 1546 | |||
| 1547 | The Lisp macros below provide different ways of binding names to | ||
| 1548 | definitions. Common to all of them are the following rules: | ||
| 1549 | |||
| 1550 | @itemize | ||
| 1551 | @item | ||
| 1552 | Built-in @code{rx} forms, like @code{digit} and @code{group}, cannot | ||
| 1553 | be redefined. | ||
| 1554 | |||
| 1555 | @item | ||
| 1556 | The definitions live in a name space of their own, separate from that | ||
| 1557 | of Lisp variables. There is thus no need to attach a suffix like | ||
| 1558 | @code{-regexp} to names; they cannot collide with anything else. | ||
| 1559 | |||
| 1560 | @item | ||
| 1561 | Definitions cannot refer to themselves recursively, directly or | ||
| 1562 | indirectly. If you find yourself needing this, you want a parser, not | ||
| 1563 | a regular expression. | ||
| 1564 | |||
| 1565 | @item | ||
| 1566 | Definitions are only ever expanded in calls to @code{rx} or | ||
| 1567 | @code{rx-to-string}, not merely by their presence in definition | ||
| 1568 | macros. This means that the order of definitions doesn't matter, even | ||
| 1569 | when they refer to each other, and that syntax errors only show up | ||
| 1570 | when they are used, not when they are defined. | ||
| 1571 | |||
| 1572 | @item | ||
| 1573 | User-defined forms are allowed wherever arbitrary @code{rx} | ||
| 1574 | expressions are expected; for example, in the body of a | ||
| 1575 | @code{zero-or-one} form, but not inside @code{any} or @code{category} | ||
| 1576 | forms. | ||
| 1577 | @end itemize | ||
| 1578 | |||
| 1579 | @defmac rx-define name [arglist] rx-form | ||
| 1580 | Define @var{name} globally in all subsequent calls to @code{rx} and | ||
| 1581 | @code{rx-to-string}. If @var{arglist} is absent, then @var{name} is | ||
| 1582 | defined as a plain symbol to be replaced with @var{rx-form}. Example: | ||
| 1583 | |||
| 1584 | @example | ||
| 1585 | @group | ||
| 1586 | (rx-define haskell-comment (seq "--" (zero-or-more nonl))) | ||
| 1587 | (rx haskell-comment) | ||
| 1588 | @result{} "--.*" | ||
| 1589 | @end group | ||
| 1590 | @end example | ||
| 1591 | |||
| 1592 | If @var{arglist} is present, it must be a list of zero or more | ||
| 1593 | argument names, and @var{name} is then defined as a parametrised form. | ||
| 1594 | When used in an @code{rx} expression as @code{(@var{name} @var{arg}@dots{})}, | ||
| 1595 | each @var{arg} will replace the corresponding argument name inside | ||
| 1596 | @var{rx-form}. | ||
| 1597 | |||
| 1598 | @var{arglist} may end in @code{&rest} and one final argument name, | ||
| 1599 | denoting a rest parameter. The rest parameter will expand to all | ||
| 1600 | extra actual argument values not matched by any other parameter in | ||
| 1601 | @var{arglist}, spliced into @var{rx-form} where it occurs. Example: | ||
| 1602 | |||
| 1603 | @example | ||
| 1604 | @group | ||
| 1605 | (rx-define moan (x y &rest r) (seq x (one-or-more y) r "!")) | ||
| 1606 | (rx (moan "MOO" "A" "MEE" "OW")) | ||
| 1607 | @result{} "MOOA+MEEOW!" | ||
| 1608 | @end group | ||
| 1609 | @end example | ||
| 1610 | |||
| 1611 | Since the definition is global, it is recommended to give @var{name} a | ||
| 1612 | package prefix to avoid name clashes with definitions elsewhere, as is | ||
| 1613 | usual when naming non-local variables and functions. | ||
| 1614 | @end defmac | ||
| 1615 | |||
| 1616 | @defmac rx-let (bindings@dots{}) body@dots{} | ||
| 1617 | Make the @code{rx} definitions in @var{bindings} available locally for | ||
| 1618 | @code{rx} macro invocations in @var{body}, which is then evaluated. | ||
| 1619 | |||
| 1620 | Each element of @var{bindings} is on the form | ||
| 1621 | @w{@code{(@var{name} [@var{arglist}] @var{rx-form})}}, where the parts | ||
| 1622 | have the same meaning as in @code{rx-define} above. Example: | ||
| 1623 | |||
| 1624 | @example | ||
| 1625 | @group | ||
| 1626 | (rx-let ((comma-separated (item) (seq item (0+ "," item))) | ||
| 1627 | (number (1+ digit)) | ||
| 1628 | (numbers (comma-separated number))) | ||
| 1629 | (re-search-forward (rx "(" numbers ")"))) | ||
| 1630 | @end group | ||
| 1631 | @end example | ||
| 1632 | |||
| 1633 | The definitions are only available during the macro-expansion of | ||
| 1634 | @var{body}, and are thus not present during execution of compiled | ||
| 1635 | code. | ||
| 1636 | |||
| 1637 | @code{rx-let} can be used not only inside a function, but also at top | ||
| 1638 | level to include global variable and function definitions that need | ||
| 1639 | to share a common set of @code{rx} forms. Since the names are local | ||
| 1640 | inside @var{body}, there is no need for any package prefixes. | ||
| 1641 | Example: | ||
| 1642 | |||
| 1643 | @example | ||
| 1644 | @group | ||
| 1645 | (rx-let ((phone-number (seq (opt ?+) (1+ (any digit ?-))))) | ||
| 1646 | (defun find-next-phone-number () | ||
| 1647 | (re-search-forward (rx phone-number))) | ||
| 1648 | (defun phone-number-p (string) | ||
| 1649 | (string-match-p (rx bos phone-number eos) string))) | ||
| 1650 | @end group | ||
| 1651 | @end example | ||
| 1652 | |||
| 1653 | The scope of the @code{rx-let} bindings is lexical, which means that | ||
| 1654 | they are not visible outside @var{body} itself, even in functions | ||
| 1655 | called from @var{body}. | ||
| 1656 | @end defmac | ||
| 1657 | |||
| 1658 | @defmac rx-let-eval bindings body@dots{} | ||
| 1659 | Evaluate @var{bindings} to a list of bindings as in @code{rx-let}, | ||
| 1660 | and evaluate @var{body} with those bindings in effect for calls | ||
| 1661 | to @code{rx-to-string}. | ||
| 1662 | |||
| 1663 | This macro is similar to @code{rx-let}, except that the @var{bindings} | ||
| 1664 | argument is evaluated (and thus needs to be quoted if it is a list | ||
| 1665 | literal), and the definitions are substituted at run time, which is | ||
| 1666 | required for @code{rx-to-string} to work. Example: | ||
| 1667 | |||
| 1668 | @example | ||
| 1669 | @group | ||
| 1670 | (rx-let-eval | ||
| 1671 | '((ponder (x) (seq "Where have all the " x " gone?"))) | ||
| 1672 | (looking-at (rx-to-string | ||
| 1673 | '(ponder (or "flowers" "young girls" | ||
| 1674 | "left socks"))))) | ||
| 1675 | @end group | ||
| 1676 | @end example | ||
| 1677 | |||
| 1678 | Another difference from @code{rx-let} is that the @var{bindings} are | ||
| 1679 | dynamically scoped, and thus also available in functions called from | ||
| 1680 | @var{body}. However, they are not visible inside functions defined in | ||
| 1681 | @var{body}. | ||
| 1682 | @end defmac | ||
| 1683 | |||
| 1527 | @end ifnottex | 1684 | @end ifnottex |
| 1528 | 1685 | ||
| 1529 | @node Regexp Functions | 1686 | @node Regexp Functions |
| @@ -1725,6 +1725,10 @@ This also works for their aliases: '|' for 'or'; ':', 'and' and | |||
| 1725 | In this case, 'rx' will generate code which produces a regexp string | 1725 | In this case, 'rx' will generate code which produces a regexp string |
| 1726 | at run time, instead of a constant string. | 1726 | at run time, instead of a constant string. |
| 1727 | 1727 | ||
| 1728 | --- | ||
| 1729 | *** New rx extension mechanism: 'rx-define', 'rx-let', 'rx-let-eval'. | ||
| 1730 | These macros add new forms to the rx notation. | ||
| 1731 | |||
| 1728 | ** Frames | 1732 | ** Frames |
| 1729 | 1733 | ||
| 1730 | +++ | 1734 | +++ |
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el index 9b3419e1c88..a192ed1ad27 100644 --- a/lisp/emacs-lisp/rx.el +++ b/lisp/emacs-lisp/rx.el | |||
| @@ -97,6 +97,7 @@ Most of the names are from SRE.") | |||
| 97 | 97 | ||
| 98 | (defvar rx-constituents nil | 98 | (defvar rx-constituents nil |
| 99 | "Alist of old-style rx extensions, for compatibility. | 99 | "Alist of old-style rx extensions, for compatibility. |
| 100 | For new code, use `rx-define', `rx-let' or `rx-let-eval'. | ||
| 100 | 101 | ||
| 101 | Each element is (SYMBOL . DEF). | 102 | Each element is (SYMBOL . DEF). |
| 102 | 103 | ||
| @@ -113,6 +114,17 @@ If DEF is a list on the form (FUN MIN-ARGS MAX-ARGS PRED), then | |||
| 113 | If PRED is non-nil, it is a predicate that all actual arguments must | 114 | If PRED is non-nil, it is a predicate that all actual arguments must |
| 114 | satisfy.") | 115 | satisfy.") |
| 115 | 116 | ||
| 117 | (defvar rx--local-definitions nil | ||
| 118 | "Alist of dynamic local rx definitions. | ||
| 119 | Each entry is: | ||
| 120 | (NAME DEF) -- NAME is an rx symbol defined as the rx form DEF. | ||
| 121 | (NAME ARGS DEF) -- NAME is an rx form with arglist ARGS, defined | ||
| 122 | as the rx form DEF (which can contain members of ARGS).") | ||
| 123 | |||
| 124 | (defsubst rx--lookup-def (name) | ||
| 125 | (or (cdr (assq name rx--local-definitions)) | ||
| 126 | (get name 'rx-definition))) | ||
| 127 | |||
| 116 | ;; TODO: Additions to consider: | 128 | ;; TODO: Additions to consider: |
| 117 | ;; - A better name for `anything', like `any-char' or `anychar'. | 129 | ;; - A better name for `anything', like `any-char' or `anychar'. |
| 118 | ;; - A name for (or), maybe `unmatchable'. | 130 | ;; - A name for (or), maybe `unmatchable'. |
| @@ -144,6 +156,12 @@ If DEF is a list on the form (FUN MIN-ARGS MAX-ARGS PRED), then | |||
| 144 | ((let ((class (cdr (assq sym rx--char-classes)))) | 156 | ((let ((class (cdr (assq sym rx--char-classes)))) |
| 145 | (and class (cons (list (concat "[[:" (symbol-name class) ":]]")) t)))) | 157 | (and class (cons (list (concat "[[:" (symbol-name class) ":]]")) t)))) |
| 146 | 158 | ||
| 159 | ((let ((definition (rx--lookup-def sym))) | ||
| 160 | (and definition | ||
| 161 | (if (cdr definition) | ||
| 162 | (error "Not an `rx' symbol definition: %s" sym) | ||
| 163 | (rx--translate (nth 0 definition)))))) | ||
| 164 | |||
| 147 | ;; For compatibility with old rx. | 165 | ;; For compatibility with old rx. |
| 148 | ((let ((entry (assq sym rx-constituents))) | 166 | ((let ((entry (assq sym rx-constituents))) |
| 149 | (and (progn | 167 | (and (progn |
| @@ -310,6 +328,19 @@ INTERVALS is a list of (START . END) with START †END, sorted by START." | |||
| 310 | (setq tail d))) | 328 | (setq tail d))) |
| 311 | intervals)) | 329 | intervals)) |
| 312 | 330 | ||
| 331 | ;; FIXME: Consider expanding definitions inside (any ...) and (not ...), | ||
| 332 | ;; and perhaps allow (any ...) inside (any ...). | ||
| 333 | ;; It would be benefit composability (build a character alternative by pieces) | ||
| 334 | ;; and be handy for obtaining the complement of a defined set of | ||
| 335 | ;; characters. (See, for example, python.el:421, `not-simple-operator'.) | ||
| 336 | ;; (Expansion in other non-rx positions is probably not a good idea: | ||
| 337 | ;; syntax, category, backref, and the integer parameters of group-n, | ||
| 338 | ;; =, >=, **, repeat) | ||
| 339 | ;; Similar effect could be attained by ensuring that | ||
| 340 | ;; (or (any X) (any Y)) -> (any X Y), and find a way to compose negative | ||
| 341 | ;; sets. `and' is taken, but we could add | ||
| 342 | ;; (intersection (not (any X)) (not (any Y))) -> (not (any X Y)). | ||
| 343 | |||
| 313 | (defun rx--translate-any (negated body) | 344 | (defun rx--translate-any (negated body) |
| 314 | "Translate an (any ...) construct. Return (REGEXP . PRECEDENCE). | 345 | "Translate an (any ...) construct. Return (REGEXP . PRECEDENCE). |
| 315 | If NEGATED, negate the sense." | 346 | If NEGATED, negate the sense." |
| @@ -712,6 +743,94 @@ DEF is the definition tuple. Return (REGEXP . PRECEDENCE)." | |||
| 712 | (error "The `%s' form did not expand to a string" (car form))) | 743 | (error "The `%s' form did not expand to a string" (car form))) |
| 713 | (cons (list regexp) nil)))) | 744 | (cons (list regexp) nil)))) |
| 714 | 745 | ||
| 746 | (defun rx--substitute (bindings form) | ||
| 747 | "Substitute BINDINGS in FORM. BINDINGS is an alist of (NAME . VALUES) | ||
| 748 | where VALUES is a list to splice into FORM wherever NAME occurs. | ||
| 749 | Return the substitution result wrapped in a list, since a single value | ||
| 750 | can expand to any number of values." | ||
| 751 | (cond ((symbolp form) | ||
| 752 | (let ((binding (assq form bindings))) | ||
| 753 | (if binding | ||
| 754 | (cdr binding) | ||
| 755 | (list form)))) | ||
| 756 | ((consp form) | ||
| 757 | (if (listp (cdr form)) | ||
| 758 | ;; Proper list. We substitute variables even in the head | ||
| 759 | ;; position -- who knows, might be handy one day. | ||
| 760 | (list (mapcan (lambda (x) (copy-sequence | ||
| 761 | (rx--substitute bindings x))) | ||
| 762 | form)) | ||
| 763 | ;; Cons pair (presumably an interval). | ||
| 764 | (let ((first (rx--substitute bindings (car form))) | ||
| 765 | (second (rx--substitute bindings (cdr form)))) | ||
| 766 | (if (and first (not (cdr first)) | ||
| 767 | second (not (cdr second))) | ||
| 768 | (list (cons (car first) (car second))) | ||
| 769 | (error | ||
| 770 | "Cannot substitute a &rest parameter into a dotted pair"))))) | ||
| 771 | (t (list form)))) | ||
| 772 | |||
| 773 | ;; FIXME: Consider adding extensions in Lisp macro style, where | ||
| 774 | ;; arguments are passed unevaluated to code that returns the rx form | ||
| 775 | ;; to use. Example: | ||
| 776 | ;; | ||
| 777 | ;; (rx-let ((radix-digit (radix) | ||
| 778 | ;; :lisp (list 'any (cons ?0 (+ ?0 (eval radix) -1))))) | ||
| 779 | ;; (rx (radix-digit (+ 5 3)))) | ||
| 780 | ;; => | ||
| 781 | ;; "[0-7]" | ||
| 782 | ;; | ||
| 783 | ;; While this would permit more powerful extensions, it's unclear just | ||
| 784 | ;; how often they would be used in practice. Let's wait until there is | ||
| 785 | ;; demand for it. | ||
| 786 | |||
| 787 | ;; FIXME: An alternative binding syntax would be | ||
| 788 | ;; | ||
| 789 | ;; (NAME RXs...) | ||
| 790 | ;; and | ||
| 791 | ;; ((NAME ARGS...) RXs...) | ||
| 792 | ;; | ||
| 793 | ;; which would have two minor advantages: multiple RXs with implicit | ||
| 794 | ;; `seq' in the definition, and the arglist is no longer an optional | ||
| 795 | ;; element in the middle of the list. On the other hand, it's less | ||
| 796 | ;; like traditional lisp arglist constructs (defun, defmacro). | ||
| 797 | ;; Since it's a Scheme-like syntax, &rest parameters could be done using | ||
| 798 | ;; dotted lists: | ||
| 799 | ;; (rx-let (((name arg1 arg2 . rest) ...definition...)) ...) | ||
| 800 | |||
| 801 | (defun rx--expand-template (op values arglist template) | ||
| 802 | "Return TEMPLATE with variables in ARGLIST replaced with VALUES." | ||
| 803 | (let ((bindings nil) | ||
| 804 | (value-tail values) | ||
| 805 | (formals arglist)) | ||
| 806 | (while formals | ||
| 807 | (pcase (car formals) | ||
| 808 | ('&rest | ||
| 809 | (unless (cdr formals) | ||
| 810 | (error | ||
| 811 | "Expanding rx def `%s': missing &rest parameter name" op)) | ||
| 812 | (push (cons (cadr formals) value-tail) bindings) | ||
| 813 | (setq formals nil) | ||
| 814 | (setq value-tail nil)) | ||
| 815 | (name | ||
| 816 | (unless value-tail | ||
| 817 | (error | ||
| 818 | "Expanding rx def `%s': too few arguments (got %d, need %s%d)" | ||
| 819 | op (length values) | ||
| 820 | (if (memq '&rest arglist) "at least " "") | ||
| 821 | (- (length arglist) (length (memq '&rest arglist))))) | ||
| 822 | (push (cons name (list (car value-tail))) bindings) | ||
| 823 | (setq value-tail (cdr value-tail)))) | ||
| 824 | (setq formals (cdr formals))) | ||
| 825 | (when value-tail | ||
| 826 | (error | ||
| 827 | "Expanding rx def `%s': too many arguments (got %d, need %d)" | ||
| 828 | op (length values) (length arglist))) | ||
| 829 | (let ((subst (rx--substitute bindings template))) | ||
| 830 | (if (and subst (not (cdr subst))) | ||
| 831 | (car subst) | ||
| 832 | (error "Expanding rx def `%s': must result in a single value" op))))) | ||
| 833 | |||
| 715 | (defun rx--translate-form (form) | 834 | (defun rx--translate-form (form) |
| 716 | "Translate an rx form (list structure). Return (REGEXP . PRECEDENCE)." | 835 | "Translate an rx form (list structure). Return (REGEXP . PRECEDENCE)." |
| 717 | (let ((body (cdr form))) | 836 | (let ((body (cdr form))) |
| @@ -757,24 +876,29 @@ DEF is the definition tuple. Return (REGEXP . PRECEDENCE)." | |||
| 757 | (op | 876 | (op |
| 758 | (unless (symbolp op) | 877 | (unless (symbolp op) |
| 759 | (error "Bad rx operator `%S'" op)) | 878 | (error "Bad rx operator `%S'" op)) |
| 879 | (let ((definition (rx--lookup-def op))) | ||
| 880 | (if definition | ||
| 881 | (if (cdr definition) | ||
| 882 | (rx--translate | ||
| 883 | (rx--expand-template | ||
| 884 | op body (nth 0 definition) (nth 1 definition))) | ||
| 885 | (error "Not an `rx' form definition: %s" op)) | ||
| 886 | |||
| 887 | ;; For compatibility with old rx. | ||
| 888 | (let ((entry (assq op rx-constituents))) | ||
| 889 | (if (progn | ||
| 890 | (while (and entry (not (consp (cdr entry)))) | ||
| 891 | (setq entry | ||
| 892 | (if (symbolp (cdr entry)) | ||
| 893 | ;; Alias for another entry. | ||
| 894 | (assq (cdr entry) rx-constituents) | ||
| 895 | ;; Wrong type, try further down the list. | ||
| 896 | (assq (car entry) | ||
| 897 | (cdr (memq entry rx-constituents)))))) | ||
| 898 | entry) | ||
| 899 | (rx--translate-compat-form (cdr entry) form) | ||
| 900 | (error "Unknown rx form `%s'" op))))))))) | ||
| 760 | 901 | ||
| 761 | ;; For compatibility with old rx. | ||
| 762 | (let ((entry (assq op rx-constituents))) | ||
| 763 | (if (progn | ||
| 764 | (while (and entry (not (consp (cdr entry)))) | ||
| 765 | (setq entry | ||
| 766 | (if (symbolp (cdr entry)) | ||
| 767 | ;; Alias for another entry. | ||
| 768 | (assq (cdr entry) rx-constituents) | ||
| 769 | ;; Wrong type, try further down the list. | ||
| 770 | (assq (car entry) | ||
| 771 | (cdr (memq entry rx-constituents)))))) | ||
| 772 | entry) | ||
| 773 | (rx--translate-compat-form (cdr entry) form) | ||
| 774 | (error "Unknown rx form `%s'" op))))))) | ||
| 775 | |||
| 776 | ;; Defined here rather than in re-builder to lower the odds that it | ||
| 777 | ;; will be kept in sync with changes. | ||
| 778 | (defconst rx--builtin-forms | 902 | (defconst rx--builtin-forms |
| 779 | '(seq sequence : and or | any in char not-char not | 903 | '(seq sequence : and or | any in char not-char not |
| 780 | repeat = >= ** | 904 | repeat = >= ** |
| @@ -786,7 +910,21 @@ DEF is the definition tuple. Return (REGEXP . PRECEDENCE)." | |||
| 786 | group submatch group-n submatch-n backref | 910 | group submatch group-n submatch-n backref |
| 787 | syntax not-syntax category | 911 | syntax not-syntax category |
| 788 | literal eval regexp regex) | 912 | literal eval regexp regex) |
| 789 | "List of built-in rx forms. For use in re-builder only.") | 913 | "List of built-in rx function-like symbols.") |
| 914 | |||
| 915 | (defconst rx--builtin-symbols | ||
| 916 | (append '(nonl not-newline any anything | ||
| 917 | bol eol line-start line-end | ||
| 918 | bos eos string-start string-end | ||
| 919 | bow eow word-start word-end | ||
| 920 | symbol-start symbol-end | ||
| 921 | point word-boundary not-word-boundary not-wordchar) | ||
| 922 | (mapcar #'car rx--char-classes)) | ||
| 923 | "List of built-in rx variable-like symbols.") | ||
| 924 | |||
| 925 | (defconst rx--builtin-names | ||
| 926 | (append rx--builtin-forms rx--builtin-symbols) | ||
| 927 | "List of built-in rx names. These cannot be redefined by the user.") | ||
| 790 | 928 | ||
| 791 | (defun rx--translate (item) | 929 | (defun rx--translate (item) |
| 792 | "Translate the rx-expression ITEM. Return (REGEXP . PRECEDENCE)." | 930 | "Translate the rx-expression ITEM. Return (REGEXP . PRECEDENCE)." |
| @@ -810,7 +948,9 @@ DEF is the definition tuple. Return (REGEXP . PRECEDENCE)." | |||
| 810 | The arguments to `literal' and `regexp' forms inside FORM must be | 948 | The arguments to `literal' and `regexp' forms inside FORM must be |
| 811 | constant strings. | 949 | constant strings. |
| 812 | If NO-GROUP is non-nil, don't bracket the result in a non-capturing | 950 | If NO-GROUP is non-nil, don't bracket the result in a non-capturing |
| 813 | group." | 951 | group. |
| 952 | |||
| 953 | For extending the `rx' notation in FORM, use `rx-define' or `rx-let-eval'." | ||
| 814 | (let* ((item (rx--translate form)) | 954 | (let* ((item (rx--translate form)) |
| 815 | (exprs (if no-group | 955 | (exprs (if no-group |
| 816 | (car item) | 956 | (car item) |
| @@ -939,14 +1079,133 @@ Zero-width assertions: these all match the empty string in specific places. | |||
| 939 | (regexp EXPR) Match the string regexp from evaluating EXPR at run time. | 1079 | (regexp EXPR) Match the string regexp from evaluating EXPR at run time. |
| 940 | (eval EXPR) Match the rx sexp from evaluating EXPR at compile time. | 1080 | (eval EXPR) Match the rx sexp from evaluating EXPR at compile time. |
| 941 | 1081 | ||
| 1082 | Additional constructs can be defined using `rx-define' and `rx-let', | ||
| 1083 | which see. | ||
| 1084 | |||
| 942 | \(fn REGEXPS...)" | 1085 | \(fn REGEXPS...)" |
| 943 | (rx--to-expr (cons 'seq regexps))) | 1086 | ;; Retrieve local definitions from the macroexpansion environment. |
| 1087 | ;; (It's unclear whether the previous value of `rx--local-definitions' | ||
| 1088 | ;; should be included, and if so, in which order.) | ||
| 1089 | (let ((rx--local-definitions | ||
| 1090 | (cdr (assq :rx-locals macroexpand-all-environment)))) | ||
| 1091 | (rx--to-expr (cons 'seq regexps)))) | ||
| 1092 | |||
| 1093 | (defun rx--make-binding (name tail) | ||
| 1094 | "Make a definitions entry out of TAIL. | ||
| 1095 | TAIL is on the form ([ARGLIST] DEFINITION)." | ||
| 1096 | (unless (symbolp name) | ||
| 1097 | (error "Bad `rx' definition name: %S" name)) | ||
| 1098 | ;; FIXME: Consider using a hash table or symbol property, for speed. | ||
| 1099 | (when (memq name rx--builtin-names) | ||
| 1100 | (error "Cannot redefine built-in rx name `%s'" name)) | ||
| 1101 | (pcase tail | ||
| 1102 | (`(,def) | ||
| 1103 | (list def)) | ||
| 1104 | (`(,args ,def) | ||
| 1105 | (unless (and (listp args) (rx--every #'symbolp args)) | ||
| 1106 | (error "Bad argument list for `rx' definition %s: %S" name args)) | ||
| 1107 | (list args def)) | ||
| 1108 | (_ (error "Bad `rx' definition of %s: %S" name tail)))) | ||
| 1109 | |||
| 1110 | (defun rx--make-named-binding (bindspec) | ||
| 1111 | "Make a definitions entry out of BINDSPEC. | ||
| 1112 | BINDSPEC is on the form (NAME [ARGLIST] DEFINITION)." | ||
| 1113 | (unless (consp bindspec) | ||
| 1114 | (error "Bad `rx-let' binding: %S" bindspec)) | ||
| 1115 | (cons (car bindspec) | ||
| 1116 | (rx--make-binding (car bindspec) (cdr bindspec)))) | ||
| 1117 | |||
| 1118 | (defun rx--extend-local-defs (bindspecs) | ||
| 1119 | (append (mapcar #'rx--make-named-binding bindspecs) | ||
| 1120 | rx--local-definitions)) | ||
| 944 | 1121 | ||
| 1122 | ;;;###autoload | ||
| 1123 | (defmacro rx-let-eval (bindings &rest body) | ||
| 1124 | "Evaluate BODY with local BINDINGS for `rx-to-string'. | ||
| 1125 | BINDINGS, after evaluation, is a list of definitions each on the form | ||
| 1126 | (NAME [(ARGS...)] RX), in effect for calls to `rx-to-string' | ||
| 1127 | in BODY. | ||
| 1128 | |||
| 1129 | For bindings without an ARGS list, NAME is defined as an alias | ||
| 1130 | for the `rx' expression RX. Where ARGS is supplied, NAME is | ||
| 1131 | defined as an `rx' form with ARGS as argument list. The | ||
| 1132 | parameters are bound from the values in the (NAME ...) form and | ||
| 1133 | are substituted in RX. ARGS can contain `&rest' parameters, | ||
| 1134 | whose values are spliced into RX where the parameter name occurs. | ||
| 1135 | |||
| 1136 | Any previous definitions with the same names are shadowed during | ||
| 1137 | the expansion of BODY only. | ||
| 1138 | For extensions when using the `rx' macro, use `rx-let'. | ||
| 1139 | To make global rx extensions, use `rx-define'. | ||
| 1140 | For more details, see Info node `(elisp) Extending Rx'. | ||
| 1141 | |||
| 1142 | \(fn BINDINGS BODY...)" | ||
| 1143 | (declare (indent 1) (debug (form body))) | ||
| 1144 | ;; FIXME: this way, `rx--extend-local-defs' may need to be autoloaded. | ||
| 1145 | `(let ((rx--local-definitions (rx--extend-local-defs ,bindings))) | ||
| 1146 | ,@body)) | ||
| 1147 | |||
| 1148 | ;;;###autoload | ||
| 1149 | (defmacro rx-let (bindings &rest body) | ||
| 1150 | "Evaluate BODY with local BINDINGS for `rx'. | ||
| 1151 | BINDINGS is an unevaluated list of bindings each on the form | ||
| 1152 | (NAME [(ARGS...)] RX). | ||
| 1153 | They are bound lexically and are available in `rx' expressions in | ||
| 1154 | BODY only. | ||
| 1155 | |||
| 1156 | For bindings without an ARGS list, NAME is defined as an alias | ||
| 1157 | for the `rx' expression RX. Where ARGS is supplied, NAME is | ||
| 1158 | defined as an `rx' form with ARGS as argument list. The | ||
| 1159 | parameters are bound from the values in the (NAME ...) form and | ||
| 1160 | are substituted in RX. ARGS can contain `&rest' parameters, | ||
| 1161 | whose values are spliced into RX where the parameter name occurs. | ||
| 1162 | |||
| 1163 | Any previous definitions with the same names are shadowed during | ||
| 1164 | the expansion of BODY only. | ||
| 1165 | For local extensions to `rx-to-string', use `rx-let-eval'. | ||
| 1166 | To make global rx extensions, use `rx-define'. | ||
| 1167 | For more details, see Info node `(elisp) Extending Rx'. | ||
| 1168 | |||
| 1169 | \(fn BINDINGS BODY...)" | ||
| 1170 | (declare (indent 1) (debug (sexp body))) | ||
| 1171 | (let ((prev-locals (cdr (assq :rx-locals macroexpand-all-environment))) | ||
| 1172 | (new-locals (mapcar #'rx--make-named-binding bindings))) | ||
| 1173 | (macroexpand-all (cons 'progn body) | ||
| 1174 | (cons (cons :rx-locals (append new-locals prev-locals)) | ||
| 1175 | macroexpand-all-environment)))) | ||
| 1176 | |||
| 1177 | ;;;###autoload | ||
| 1178 | (defmacro rx-define (name &rest definition) | ||
| 1179 | "Define NAME as a global `rx' definition. | ||
| 1180 | If the ARGS list is omitted, define NAME as an alias for the `rx' | ||
| 1181 | expression RX. | ||
| 1182 | |||
| 1183 | If the ARGS list is supplied, define NAME as an `rx' form with | ||
| 1184 | ARGS as argument list. The parameters are bound from the values | ||
| 1185 | in the (NAME ...) form and are substituted in RX. | ||
| 1186 | ARGS can contain `&rest' parameters, whose values are spliced | ||
| 1187 | into RX where the parameter name occurs. | ||
| 1188 | |||
| 1189 | Any previous global definition of NAME is overwritten with the new one. | ||
| 1190 | To make local rx extensions, use `rx-let' for `rx', | ||
| 1191 | `rx-let-eval' for `rx-to-string'. | ||
| 1192 | For more details, see Info node `(elisp) Extending Rx'. | ||
| 1193 | |||
| 1194 | \(fn NAME [(ARGS...)] RX)" | ||
| 1195 | (declare (indent 1)) | ||
| 1196 | `(eval-and-compile | ||
| 1197 | (put ',name 'rx-definition ',(rx--make-binding name definition)) | ||
| 1198 | ',name)) | ||
| 945 | 1199 | ||
| 946 | ;; During `rx--pcase-transform', list of defined variables in right-to-left | 1200 | ;; During `rx--pcase-transform', list of defined variables in right-to-left |
| 947 | ;; order. | 1201 | ;; order. |
| 948 | (defvar rx--pcase-vars) | 1202 | (defvar rx--pcase-vars) |
| 949 | 1203 | ||
| 1204 | ;; FIXME: The rewriting strategy for pcase works so-so with extensions; | ||
| 1205 | ;; definitions cannot expand to `let' or named `backref'. If this ever | ||
| 1206 | ;; becomes a problem, we can handle those forms in the ordinary parser, | ||
| 1207 | ;; using a dynamic variable for activating the augmented forms. | ||
| 1208 | |||
| 950 | (defun rx--pcase-transform (rx) | 1209 | (defun rx--pcase-transform (rx) |
| 951 | "Transform RX, an rx-expression augmented with `let' and named `backref', | 1210 | "Transform RX, an rx-expression augmented with `let' and named `backref', |
| 952 | into a plain rx-expression, collecting names into `rx--pcase-vars'." | 1211 | into a plain rx-expression, collecting names into `rx--pcase-vars'." |
diff --git a/test/lisp/emacs-lisp/rx-tests.el b/test/lisp/emacs-lisp/rx-tests.el index fec046dd991..11de4771dea 100644 --- a/test/lisp/emacs-lisp/rx-tests.el +++ b/test/lisp/emacs-lisp/rx-tests.el | |||
| @@ -303,6 +303,104 @@ | |||
| 303 | (should (equal (rx-to-string '(or nonl "\nx") t) | 303 | (should (equal (rx-to-string '(or nonl "\nx") t) |
| 304 | ".\\|\nx"))) | 304 | ".\\|\nx"))) |
| 305 | 305 | ||
| 306 | (ert-deftest rx-let () | ||
| 307 | (rx-let ((beta gamma) | ||
| 308 | (gamma delta) | ||
| 309 | (delta (+ digit)) | ||
| 310 | (epsilon (or gamma nonl))) | ||
| 311 | (should (equal (rx bol delta epsilon) | ||
| 312 | "^[[:digit:]]+\\(?:[[:digit:]]+\\|.\\)"))) | ||
| 313 | (rx-let ((p () point) | ||
| 314 | (separated (x sep) (seq x (* sep x))) | ||
| 315 | (comma-separated (x) (separated x ",")) | ||
| 316 | (semi-separated (x) (separated x ";")) | ||
| 317 | (matrix (v) (semi-separated (comma-separated v)))) | ||
| 318 | (should (equal (rx (p) (matrix (+ "a")) eos) | ||
| 319 | "\\=a+\\(?:,a+\\)*\\(?:;a+\\(?:,a+\\)*\\)*\\'"))) | ||
| 320 | (rx-let ((b bol) | ||
| 321 | (z "B") | ||
| 322 | (three (x) (= 3 x))) | ||
| 323 | (rx-let ((two (x) (seq x x)) | ||
| 324 | (z "A") | ||
| 325 | (e eol)) | ||
| 326 | (should (equal (rx b (two (three z)) e) | ||
| 327 | "^A\\{3\\}A\\{3\\}$")))) | ||
| 328 | (rx-let ((f (a b &rest r) (seq "<" a ";" b ":" r ">"))) | ||
| 329 | (should (equal (rx bol (f ?x ?y) ?! (f ?u ?v ?w) ?! (f ?k ?l ?m ?n) eol) | ||
| 330 | "^<x;y:>!<u;v:w>!<k;l:mn>$"))) | ||
| 331 | |||
| 332 | ;; Rest parameters are expanded by splicing. | ||
| 333 | (rx-let ((f (&rest r) (or bol r eol))) | ||
| 334 | (should (equal (rx (f "ab" nonl)) | ||
| 335 | "^\\|ab\\|.\\|$"))) | ||
| 336 | |||
| 337 | ;; Substitution is done in number positions. | ||
| 338 | (rx-let ((stars (n) (= n ?*))) | ||
| 339 | (should (equal (rx (stars 4)) | ||
| 340 | "\\*\\{4\\}"))) | ||
| 341 | |||
| 342 | ;; Substitution is done inside dotted pairs. | ||
| 343 | (rx-let ((f (x y z) (any x (y . z)))) | ||
| 344 | (should (equal (rx (f ?* ?a ?t)) | ||
| 345 | "[*a-t]"))) | ||
| 346 | |||
| 347 | ;; Substitution is done in the head position of forms. | ||
| 348 | (rx-let ((f (x) (x "a"))) | ||
| 349 | (should (equal (rx (f +)) | ||
| 350 | "a+")))) | ||
| 351 | |||
| 352 | (ert-deftest rx-define () | ||
| 353 | (rx-define rx--a (seq "x" (opt "y"))) | ||
| 354 | (should (equal (rx bol rx--a eol) | ||
| 355 | "^xy?$")) | ||
| 356 | (rx-define rx--c (lb rb &rest stuff) (seq lb stuff rb)) | ||
| 357 | (should (equal (rx bol (rx--c "<" ">" rx--a nonl) eol) | ||
| 358 | "^<xy?.>$")) | ||
| 359 | (rx-define rx--b (* rx--a)) | ||
| 360 | (should (equal (rx rx--b) | ||
| 361 | "\\(?:xy?\\)*")) | ||
| 362 | (rx-define rx--a "z") | ||
| 363 | (should (equal (rx rx--b) | ||
| 364 | "z*"))) | ||
| 365 | |||
| 366 | (defun rx--test-rx-to-string-define () | ||
| 367 | ;; `rx-define' won't expand to code inside `ert-deftest' since we use | ||
| 368 | ;; `eval-and-compile'. Put it into a defun as a workaround. | ||
| 369 | (rx-define rx--d "Q") | ||
| 370 | (rx-to-string '(seq bol rx--d) t)) | ||
| 371 | |||
| 372 | (ert-deftest rx-to-string-define () | ||
| 373 | "Check that `rx-to-string' uses definitions made by `rx-define'." | ||
| 374 | (should (equal (rx--test-rx-to-string-define) | ||
| 375 | "^Q"))) | ||
| 376 | |||
| 377 | (ert-deftest rx-let-define () | ||
| 378 | "Test interaction between `rx-let' and `rx-define'." | ||
| 379 | (rx-define rx--e "one") | ||
| 380 | (rx-define rx--f "eins") | ||
| 381 | (rx-let ((rx--e "two")) | ||
| 382 | (should (equal (rx rx--e nonl rx--f) "two.eins")) | ||
| 383 | (rx-define rx--e "three") | ||
| 384 | (should (equal (rx rx--e) "two")) | ||
| 385 | (rx-define rx--f "zwei") | ||
| 386 | (should (equal (rx rx--f) "zwei"))) | ||
| 387 | (should (equal (rx rx--e nonl rx--f) "three.zwei"))) | ||
| 388 | |||
| 389 | (ert-deftest rx-let-eval () | ||
| 390 | (rx-let-eval '((a (* digit)) | ||
| 391 | (f (x &rest r) (seq x nonl r))) | ||
| 392 | (should (equal (rx-to-string '(seq a (f bow a ?b)) t) | ||
| 393 | "[[:digit:]]*\\<.[[:digit:]]*b")))) | ||
| 394 | |||
| 395 | (ert-deftest rx-redefine-builtin () | ||
| 396 | (should-error (rx-define sequence () "x")) | ||
| 397 | (should-error (rx-define sequence "x")) | ||
| 398 | (should-error (rx-define nonl () "x")) | ||
| 399 | (should-error (rx-define nonl "x")) | ||
| 400 | (should-error (rx-let ((punctuation () "x")) nil)) | ||
| 401 | (should-error (rx-let ((punctuation "x")) nil)) | ||
| 402 | (should-error (rx-let-eval '((not-char () "x")) nil)) | ||
| 403 | (should-error (rx-let-eval '((not-char "x")) nil))) | ||
| 306 | 404 | ||
| 307 | (ert-deftest rx-constituents () | 405 | (ert-deftest rx-constituents () |
| 308 | (let ((rx-constituents | 406 | (let ((rx-constituents |