aboutsummaryrefslogtreecommitdiffstats
path: root/lisp
diff options
context:
space:
mode:
authorMattias Engdegård2019-09-25 14:29:50 -0700
committerPaul Eggert2019-09-25 14:29:50 -0700
commit07367e5b95fe31f3d4e994b42b081075501b9b60 (patch)
tree7d26251a300462083d971aa3aa9880cc23c423a1 /lisp
parent2ed71227c626c6cfdc684948644ccf3d9eaeb15b (diff)
downloademacs-07367e5b95fe31f3d4e994b42b081075501b9b60.tar.gz
emacs-07367e5b95fe31f3d4e994b42b081075501b9b60.zip
Add rx extension mechanism
Add a built-in set of extension macros: `rx-define', `rx-let' and `rx-let-eval'. * lisp/emacs-lisp/rx.el (rx-constituents, rx-to-string): Doc updates. (rx--builtin-symbols, rx--builtin-names, rx--local-definitions) (rx--lookup-def, rx--substitute, rx--expand-template) (rx--make-binding, rx--make-named-binding, rx--extend-local-defs) (rx-let-eval, rx-let, rx-define): New. (rx--translate-symbol, rx--translate-form): Use extensions if any. (rx): Use local definitions. * test/lisp/emacs-lisp/rx-tests.el (rx-let, rx-define) (rx-to-string-define, rx-let-define, rx-let-eval): New. * etc/NEWS (Changes in Specialized Modes and Packages): * doc/lispref/searching.texi (Rx Notation, Rx Functions, Extending Rx): Add node about rx extensions.
Diffstat (limited to 'lisp')
-rw-r--r--lisp/emacs-lisp/rx.el299
1 files changed, 279 insertions, 20 deletions
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el
index 9b3419e1c88..a192ed1ad27 100644
--- a/lisp/emacs-lisp/rx.el
+++ b/lisp/emacs-lisp/rx.el
@@ -97,6 +97,7 @@ Most of the names are from SRE.")
97 97
98(defvar rx-constituents nil 98(defvar rx-constituents nil
99 "Alist of old-style rx extensions, for compatibility. 99 "Alist of old-style rx extensions, for compatibility.
100For new code, use `rx-define', `rx-let' or `rx-let-eval'.
100 101
101Each element is (SYMBOL . DEF). 102Each element is (SYMBOL . DEF).
102 103
@@ -113,6 +114,17 @@ If DEF is a list on the form (FUN MIN-ARGS MAX-ARGS PRED), then
113 If PRED is non-nil, it is a predicate that all actual arguments must 114 If PRED is non-nil, it is a predicate that all actual arguments must
114 satisfy.") 115 satisfy.")
115 116
117(defvar rx--local-definitions nil
118 "Alist of dynamic local rx definitions.
119Each entry is:
120 (NAME DEF) -- NAME is an rx symbol defined as the rx form DEF.
121 (NAME ARGS DEF) -- NAME is an rx form with arglist ARGS, defined
122 as the rx form DEF (which can contain members of ARGS).")
123
124(defsubst rx--lookup-def (name)
125 (or (cdr (assq name rx--local-definitions))
126 (get name 'rx-definition)))
127
116;; TODO: Additions to consider: 128;; TODO: Additions to consider:
117;; - A better name for `anything', like `any-char' or `anychar'. 129;; - A better name for `anything', like `any-char' or `anychar'.
118;; - A name for (or), maybe `unmatchable'. 130;; - A name for (or), maybe `unmatchable'.
@@ -144,6 +156,12 @@ If DEF is a list on the form (FUN MIN-ARGS MAX-ARGS PRED), then
144 ((let ((class (cdr (assq sym rx--char-classes)))) 156 ((let ((class (cdr (assq sym rx--char-classes))))
145 (and class (cons (list (concat "[[:" (symbol-name class) ":]]")) t)))) 157 (and class (cons (list (concat "[[:" (symbol-name class) ":]]")) t))))
146 158
159 ((let ((definition (rx--lookup-def sym)))
160 (and definition
161 (if (cdr definition)
162 (error "Not an `rx' symbol definition: %s" sym)
163 (rx--translate (nth 0 definition))))))
164
147 ;; For compatibility with old rx. 165 ;; For compatibility with old rx.
148 ((let ((entry (assq sym rx-constituents))) 166 ((let ((entry (assq sym rx-constituents)))
149 (and (progn 167 (and (progn
@@ -310,6 +328,19 @@ INTERVALS is a list of (START . END) with START ≤ END, sorted by START."
310 (setq tail d))) 328 (setq tail d)))
311 intervals)) 329 intervals))
312 330
331;; FIXME: Consider expanding definitions inside (any ...) and (not ...),
332;; and perhaps allow (any ...) inside (any ...).
333;; It would be benefit composability (build a character alternative by pieces)
334;; and be handy for obtaining the complement of a defined set of
335;; characters. (See, for example, python.el:421, `not-simple-operator'.)
336;; (Expansion in other non-rx positions is probably not a good idea:
337;; syntax, category, backref, and the integer parameters of group-n,
338;; =, >=, **, repeat)
339;; Similar effect could be attained by ensuring that
340;; (or (any X) (any Y)) -> (any X Y), and find a way to compose negative
341;; sets. `and' is taken, but we could add
342;; (intersection (not (any X)) (not (any Y))) -> (not (any X Y)).
343
313(defun rx--translate-any (negated body) 344(defun rx--translate-any (negated body)
314 "Translate an (any ...) construct. Return (REGEXP . PRECEDENCE). 345 "Translate an (any ...) construct. Return (REGEXP . PRECEDENCE).
315If NEGATED, negate the sense." 346If NEGATED, negate the sense."
@@ -712,6 +743,94 @@ DEF is the definition tuple. Return (REGEXP . PRECEDENCE)."
712 (error "The `%s' form did not expand to a string" (car form))) 743 (error "The `%s' form did not expand to a string" (car form)))
713 (cons (list regexp) nil)))) 744 (cons (list regexp) nil))))
714 745
746(defun rx--substitute (bindings form)
747 "Substitute BINDINGS in FORM. BINDINGS is an alist of (NAME . VALUES)
748where VALUES is a list to splice into FORM wherever NAME occurs.
749Return the substitution result wrapped in a list, since a single value
750can expand to any number of values."
751 (cond ((symbolp form)
752 (let ((binding (assq form bindings)))
753 (if binding
754 (cdr binding)
755 (list form))))
756 ((consp form)
757 (if (listp (cdr form))
758 ;; Proper list. We substitute variables even in the head
759 ;; position -- who knows, might be handy one day.
760 (list (mapcan (lambda (x) (copy-sequence
761 (rx--substitute bindings x)))
762 form))
763 ;; Cons pair (presumably an interval).
764 (let ((first (rx--substitute bindings (car form)))
765 (second (rx--substitute bindings (cdr form))))
766 (if (and first (not (cdr first))
767 second (not (cdr second)))
768 (list (cons (car first) (car second)))
769 (error
770 "Cannot substitute a &rest parameter into a dotted pair")))))
771 (t (list form))))
772
773;; FIXME: Consider adding extensions in Lisp macro style, where
774;; arguments are passed unevaluated to code that returns the rx form
775;; to use. Example:
776;;
777;; (rx-let ((radix-digit (radix)
778;; :lisp (list 'any (cons ?0 (+ ?0 (eval radix) -1)))))
779;; (rx (radix-digit (+ 5 3))))
780;; =>
781;; "[0-7]"
782;;
783;; While this would permit more powerful extensions, it's unclear just
784;; how often they would be used in practice. Let's wait until there is
785;; demand for it.
786
787;; FIXME: An alternative binding syntax would be
788;;
789;; (NAME RXs...)
790;; and
791;; ((NAME ARGS...) RXs...)
792;;
793;; which would have two minor advantages: multiple RXs with implicit
794;; `seq' in the definition, and the arglist is no longer an optional
795;; element in the middle of the list. On the other hand, it's less
796;; like traditional lisp arglist constructs (defun, defmacro).
797;; Since it's a Scheme-like syntax, &rest parameters could be done using
798;; dotted lists:
799;; (rx-let (((name arg1 arg2 . rest) ...definition...)) ...)
800
801(defun rx--expand-template (op values arglist template)
802 "Return TEMPLATE with variables in ARGLIST replaced with VALUES."
803 (let ((bindings nil)
804 (value-tail values)
805 (formals arglist))
806 (while formals
807 (pcase (car formals)
808 ('&rest
809 (unless (cdr formals)
810 (error
811 "Expanding rx def `%s': missing &rest parameter name" op))
812 (push (cons (cadr formals) value-tail) bindings)
813 (setq formals nil)
814 (setq value-tail nil))
815 (name
816 (unless value-tail
817 (error
818 "Expanding rx def `%s': too few arguments (got %d, need %s%d)"
819 op (length values)
820 (if (memq '&rest arglist) "at least " "")
821 (- (length arglist) (length (memq '&rest arglist)))))
822 (push (cons name (list (car value-tail))) bindings)
823 (setq value-tail (cdr value-tail))))
824 (setq formals (cdr formals)))
825 (when value-tail
826 (error
827 "Expanding rx def `%s': too many arguments (got %d, need %d)"
828 op (length values) (length arglist)))
829 (let ((subst (rx--substitute bindings template)))
830 (if (and subst (not (cdr subst)))
831 (car subst)
832 (error "Expanding rx def `%s': must result in a single value" op)))))
833
715(defun rx--translate-form (form) 834(defun rx--translate-form (form)
716 "Translate an rx form (list structure). Return (REGEXP . PRECEDENCE)." 835 "Translate an rx form (list structure). Return (REGEXP . PRECEDENCE)."
717 (let ((body (cdr form))) 836 (let ((body (cdr form)))
@@ -757,24 +876,29 @@ DEF is the definition tuple. Return (REGEXP . PRECEDENCE)."
757 (op 876 (op
758 (unless (symbolp op) 877 (unless (symbolp op)
759 (error "Bad rx operator `%S'" op)) 878 (error "Bad rx operator `%S'" op))
879 (let ((definition (rx--lookup-def op)))
880 (if definition
881 (if (cdr definition)
882 (rx--translate
883 (rx--expand-template
884 op body (nth 0 definition) (nth 1 definition)))
885 (error "Not an `rx' form definition: %s" op))
886
887 ;; For compatibility with old rx.
888 (let ((entry (assq op rx-constituents)))
889 (if (progn
890 (while (and entry (not (consp (cdr entry))))
891 (setq entry
892 (if (symbolp (cdr entry))
893 ;; Alias for another entry.
894 (assq (cdr entry) rx-constituents)
895 ;; Wrong type, try further down the list.
896 (assq (car entry)
897 (cdr (memq entry rx-constituents))))))
898 entry)
899 (rx--translate-compat-form (cdr entry) form)
900 (error "Unknown rx form `%s'" op)))))))))
760 901
761 ;; For compatibility with old rx.
762 (let ((entry (assq op rx-constituents)))
763 (if (progn
764 (while (and entry (not (consp (cdr entry))))
765 (setq entry
766 (if (symbolp (cdr entry))
767 ;; Alias for another entry.
768 (assq (cdr entry) rx-constituents)
769 ;; Wrong type, try further down the list.
770 (assq (car entry)
771 (cdr (memq entry rx-constituents))))))
772 entry)
773 (rx--translate-compat-form (cdr entry) form)
774 (error "Unknown rx form `%s'" op)))))))
775
776;; Defined here rather than in re-builder to lower the odds that it
777;; will be kept in sync with changes.
778(defconst rx--builtin-forms 902(defconst rx--builtin-forms
779 '(seq sequence : and or | any in char not-char not 903 '(seq sequence : and or | any in char not-char not
780 repeat = >= ** 904 repeat = >= **
@@ -786,7 +910,21 @@ DEF is the definition tuple. Return (REGEXP . PRECEDENCE)."
786 group submatch group-n submatch-n backref 910 group submatch group-n submatch-n backref
787 syntax not-syntax category 911 syntax not-syntax category
788 literal eval regexp regex) 912 literal eval regexp regex)
789 "List of built-in rx forms. For use in re-builder only.") 913 "List of built-in rx function-like symbols.")
914
915(defconst rx--builtin-symbols
916 (append '(nonl not-newline any anything
917 bol eol line-start line-end
918 bos eos string-start string-end
919 bow eow word-start word-end
920 symbol-start symbol-end
921 point word-boundary not-word-boundary not-wordchar)
922 (mapcar #'car rx--char-classes))
923 "List of built-in rx variable-like symbols.")
924
925(defconst rx--builtin-names
926 (append rx--builtin-forms rx--builtin-symbols)
927 "List of built-in rx names. These cannot be redefined by the user.")
790 928
791(defun rx--translate (item) 929(defun rx--translate (item)
792 "Translate the rx-expression ITEM. Return (REGEXP . PRECEDENCE)." 930 "Translate the rx-expression ITEM. Return (REGEXP . PRECEDENCE)."
@@ -810,7 +948,9 @@ DEF is the definition tuple. Return (REGEXP . PRECEDENCE)."
810The arguments to `literal' and `regexp' forms inside FORM must be 948The arguments to `literal' and `regexp' forms inside FORM must be
811constant strings. 949constant strings.
812If NO-GROUP is non-nil, don't bracket the result in a non-capturing 950If NO-GROUP is non-nil, don't bracket the result in a non-capturing
813group." 951group.
952
953For extending the `rx' notation in FORM, use `rx-define' or `rx-let-eval'."
814 (let* ((item (rx--translate form)) 954 (let* ((item (rx--translate form))
815 (exprs (if no-group 955 (exprs (if no-group
816 (car item) 956 (car item)
@@ -939,14 +1079,133 @@ Zero-width assertions: these all match the empty string in specific places.
939(regexp EXPR) Match the string regexp from evaluating EXPR at run time. 1079(regexp EXPR) Match the string regexp from evaluating EXPR at run time.
940(eval EXPR) Match the rx sexp from evaluating EXPR at compile time. 1080(eval EXPR) Match the rx sexp from evaluating EXPR at compile time.
941 1081
1082Additional constructs can be defined using `rx-define' and `rx-let',
1083which see.
1084
942\(fn REGEXPS...)" 1085\(fn REGEXPS...)"
943 (rx--to-expr (cons 'seq regexps))) 1086 ;; Retrieve local definitions from the macroexpansion environment.
1087 ;; (It's unclear whether the previous value of `rx--local-definitions'
1088 ;; should be included, and if so, in which order.)
1089 (let ((rx--local-definitions
1090 (cdr (assq :rx-locals macroexpand-all-environment))))
1091 (rx--to-expr (cons 'seq regexps))))
1092
1093(defun rx--make-binding (name tail)
1094 "Make a definitions entry out of TAIL.
1095TAIL is on the form ([ARGLIST] DEFINITION)."
1096 (unless (symbolp name)
1097 (error "Bad `rx' definition name: %S" name))
1098 ;; FIXME: Consider using a hash table or symbol property, for speed.
1099 (when (memq name rx--builtin-names)
1100 (error "Cannot redefine built-in rx name `%s'" name))
1101 (pcase tail
1102 (`(,def)
1103 (list def))
1104 (`(,args ,def)
1105 (unless (and (listp args) (rx--every #'symbolp args))
1106 (error "Bad argument list for `rx' definition %s: %S" name args))
1107 (list args def))
1108 (_ (error "Bad `rx' definition of %s: %S" name tail))))
1109
1110(defun rx--make-named-binding (bindspec)
1111 "Make a definitions entry out of BINDSPEC.
1112BINDSPEC is on the form (NAME [ARGLIST] DEFINITION)."
1113 (unless (consp bindspec)
1114 (error "Bad `rx-let' binding: %S" bindspec))
1115 (cons (car bindspec)
1116 (rx--make-binding (car bindspec) (cdr bindspec))))
1117
1118(defun rx--extend-local-defs (bindspecs)
1119 (append (mapcar #'rx--make-named-binding bindspecs)
1120 rx--local-definitions))
944 1121
1122;;;###autoload
1123(defmacro rx-let-eval (bindings &rest body)
1124 "Evaluate BODY with local BINDINGS for `rx-to-string'.
1125BINDINGS, after evaluation, is a list of definitions each on the form
1126(NAME [(ARGS...)] RX), in effect for calls to `rx-to-string'
1127in BODY.
1128
1129For bindings without an ARGS list, NAME is defined as an alias
1130for the `rx' expression RX. Where ARGS is supplied, NAME is
1131defined as an `rx' form with ARGS as argument list. The
1132parameters are bound from the values in the (NAME ...) form and
1133are substituted in RX. ARGS can contain `&rest' parameters,
1134whose values are spliced into RX where the parameter name occurs.
1135
1136Any previous definitions with the same names are shadowed during
1137the expansion of BODY only.
1138For extensions when using the `rx' macro, use `rx-let'.
1139To make global rx extensions, use `rx-define'.
1140For more details, see Info node `(elisp) Extending Rx'.
1141
1142\(fn BINDINGS BODY...)"
1143 (declare (indent 1) (debug (form body)))
1144 ;; FIXME: this way, `rx--extend-local-defs' may need to be autoloaded.
1145 `(let ((rx--local-definitions (rx--extend-local-defs ,bindings)))
1146 ,@body))
1147
1148;;;###autoload
1149(defmacro rx-let (bindings &rest body)
1150 "Evaluate BODY with local BINDINGS for `rx'.
1151BINDINGS is an unevaluated list of bindings each on the form
1152(NAME [(ARGS...)] RX).
1153They are bound lexically and are available in `rx' expressions in
1154BODY only.
1155
1156For bindings without an ARGS list, NAME is defined as an alias
1157for the `rx' expression RX. Where ARGS is supplied, NAME is
1158defined as an `rx' form with ARGS as argument list. The
1159parameters are bound from the values in the (NAME ...) form and
1160are substituted in RX. ARGS can contain `&rest' parameters,
1161whose values are spliced into RX where the parameter name occurs.
1162
1163Any previous definitions with the same names are shadowed during
1164the expansion of BODY only.
1165For local extensions to `rx-to-string', use `rx-let-eval'.
1166To make global rx extensions, use `rx-define'.
1167For more details, see Info node `(elisp) Extending Rx'.
1168
1169\(fn BINDINGS BODY...)"
1170 (declare (indent 1) (debug (sexp body)))
1171 (let ((prev-locals (cdr (assq :rx-locals macroexpand-all-environment)))
1172 (new-locals (mapcar #'rx--make-named-binding bindings)))
1173 (macroexpand-all (cons 'progn body)
1174 (cons (cons :rx-locals (append new-locals prev-locals))
1175 macroexpand-all-environment))))
1176
1177;;;###autoload
1178(defmacro rx-define (name &rest definition)
1179 "Define NAME as a global `rx' definition.
1180If the ARGS list is omitted, define NAME as an alias for the `rx'
1181expression RX.
1182
1183If the ARGS list is supplied, define NAME as an `rx' form with
1184ARGS as argument list. The parameters are bound from the values
1185in the (NAME ...) form and are substituted in RX.
1186ARGS can contain `&rest' parameters, whose values are spliced
1187into RX where the parameter name occurs.
1188
1189Any previous global definition of NAME is overwritten with the new one.
1190To make local rx extensions, use `rx-let' for `rx',
1191`rx-let-eval' for `rx-to-string'.
1192For more details, see Info node `(elisp) Extending Rx'.
1193
1194\(fn NAME [(ARGS...)] RX)"
1195 (declare (indent 1))
1196 `(eval-and-compile
1197 (put ',name 'rx-definition ',(rx--make-binding name definition))
1198 ',name))
945 1199
946;; During `rx--pcase-transform', list of defined variables in right-to-left 1200;; During `rx--pcase-transform', list of defined variables in right-to-left
947;; order. 1201;; order.
948(defvar rx--pcase-vars) 1202(defvar rx--pcase-vars)
949 1203
1204;; FIXME: The rewriting strategy for pcase works so-so with extensions;
1205;; definitions cannot expand to `let' or named `backref'. If this ever
1206;; becomes a problem, we can handle those forms in the ordinary parser,
1207;; using a dynamic variable for activating the augmented forms.
1208
950(defun rx--pcase-transform (rx) 1209(defun rx--pcase-transform (rx)
951 "Transform RX, an rx-expression augmented with `let' and named `backref', 1210 "Transform RX, an rx-expression augmented with `let' and named `backref',
952into a plain rx-expression, collecting names into `rx--pcase-vars'." 1211into a plain rx-expression, collecting names into `rx--pcase-vars'."