aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNoam Postavsky2019-06-14 08:43:17 -0400
committerNoam Postavsky2019-06-25 22:00:03 -0400
commitb59ffd2290ff744ca4e7cc2748ba6b66fb2f99f1 (patch)
tree650ab12b77ba2cf9918ebc9bce586ce22ab7d52a
parent29babad7286bff235407e883a4ff61bae49a2e5e (diff)
downloademacs-b59ffd2290ff744ca4e7cc2748ba6b66fb2f99f1.tar.gz
emacs-b59ffd2290ff744ca4e7cc2748ba6b66fb2f99f1.zip
Support (rx (and (regexp EXPR) (literal EXPR))) (Bug#36237)
* lisp/emacs-lisp/rx.el (rx-regexp): Allow non-string forms. (rx-constituents): Add literal constituent, which is like a plain STRING form, but allows arbitrary lisp expressions. (rx-literal): New function. (rx-compile-to-lisp): New variable. (rx--subforms): New helper function for handling subforms, including non-constant case. (rx-group-if, rx-and, rx-or, rx-=, rx->=, rx-repeat, rx-submatch) (rx-submatch-n, rx-kleene, rx-atomic-p): Use it to handle non-constant subforms. (rx): Document new form, wrap non-constant forms with concat call. * test/lisp/emacs-lisp/rx-tests.el (rx-tests--match): New macro. (rx-nonstring-expr, rx-nonstring-expr-non-greedy): New tests. * etc/NEWS: Announce changes.
-rw-r--r--etc/NEWS6
-rw-r--r--lisp/emacs-lisp/rx.el242
-rw-r--r--test/lisp/emacs-lisp/rx-tests.el41
3 files changed, 201 insertions, 88 deletions
diff --git a/etc/NEWS b/etc/NEWS
index 7de94a7b11d..29b74ee682b 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -1441,12 +1441,18 @@ when given in a string. Previously, '(any "\x80-\xff")' would match
1441characters U+0080...U+00FF. Now the expression matches raw bytes in 1441characters U+0080...U+00FF. Now the expression matches raw bytes in
1442the 128...255 range, as expected. 1442the 128...255 range, as expected.
1443 1443
1444---
1444*** The rx 'or' and 'seq' forms no longer require any arguments. 1445*** The rx 'or' and 'seq' forms no longer require any arguments.
1445(or) produces a regexp that never matches anything, while (seq) 1446(or) produces a regexp that never matches anything, while (seq)
1446matches the empty string, each being an identity for the operation. 1447matches the empty string, each being an identity for the operation.
1447This also works for their aliases: '|' for 'or'; ':', 'and' and 1448This also works for their aliases: '|' for 'or'; ':', 'and' and
1448'sequence' for 'seq'. 1449'sequence' for 'seq'.
1449 1450
1451---
1452*** 'regexp' and new 'literal' accept arbitrary lisp as arguments.
1453In this case, 'rx' will generate code which produces a regexp string
1454at run time, instead of a constant string.
1455
1450** Frames 1456** Frames
1451 1457
1452+++ 1458+++
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el
index 2130e3e1aea..1b5afe73b45 100644
--- a/lisp/emacs-lisp/rx.el
+++ b/lisp/emacs-lisp/rx.el
@@ -47,57 +47,58 @@
47 47
48;; Rx translates a sexp notation for regular expressions into the 48;; Rx translates a sexp notation for regular expressions into the
49;; usual string notation. The translation can be done at compile-time 49;; usual string notation. The translation can be done at compile-time
50;; by using the `rx' macro. It can be done at run-time by calling 50;; by using the `rx' macro. The `regexp' and `literal' forms accept
51;; function `rx-to-string'. See the documentation of `rx' for a 51;; non-constant expressions, in which case `rx' will translate to a
52;; complete description of the sexp notation. 52;; `concat' expression. Translation can be done fully at run time by
53;; calling function `rx-to-string'. See the documentation of `rx' for
54;; a complete description of the sexp notation.
53;; 55;;
54;; Some examples of string regexps and their sexp counterparts: 56;; Some examples of string regexps and their sexp counterparts:
55;; 57;;
56;; "^[a-z]*" 58;; "^[a-z]*"
57;; (rx (and line-start (0+ (in "a-z")))) 59;; (rx line-start (0+ (in "a-z")))
58;; 60;;
59;; "\n[^ \t]" 61;; "\n[^ \t]"
60;; (rx (and "\n" (not (any " \t")))) 62;; (rx ?\n (not (in " \t")))
61;; 63;;
62;; "\\*\\*\\* EOOH \\*\\*\\*\n" 64;; "\\*\\*\\* EOOH \\*\\*\\*\n"
63;; (rx "*** EOOH ***\n") 65;; (rx "*** EOOH ***\n")
64;; 66;;
65;; "\\<\\(catch\\|finally\\)\\>[^_]" 67;; "\\<\\(catch\\|finally\\)\\>[^_]"
66;; (rx (and word-start (submatch (or "catch" "finally")) word-end 68;; (rx word-start (submatch (or "catch" "finally")) word-end
67;; (not (any ?_)))) 69;; (not (in ?_)))
68;; 70;;
69;; "[ \t\n]*:\\([^:]+\\|$\\)" 71;; "[ \t\n]*:\\($\\|[^:]+\\)"
70;; (rx (and (zero-or-more (in " \t\n")) ":" 72;; (rx (* (in " \t\n")) ":"
71;; (submatch (or line-end (one-or-more (not (any ?:))))))) 73;; (submatch (or line-end (+ (not (in ?:))))))
72;; 74;;
73;; "^content-transfer-encoding:\\(\n?[\t ]\\)*quoted-printable\\(\n?[\t ]\\)*" 75;; "^content-transfer-encoding:\\(?:\n?[\t ]\\)*quoted-printable\\(?:\n?[\t ]\\)*"
74;; (rx (and line-start 76;; (rx line-start
75;; "content-transfer-encoding:" 77;; "content-transfer-encoding:"
76;; (+ (? ?\n)) (any " \t") 78;; (* (? ?\n) (in " \t"))
77;; "quoted-printable" 79;; "quoted-printable"
78;; (+ (? ?\n)) (any " \t")) 80;; (* (? ?\n) (in " \t")))
79;; 81;;
80;; (concat "^\\(?:" something-else "\\)") 82;; (concat "^\\(?:" something-else "\\)")
81;; (rx (and line-start (eval something-else))), statically or 83;; (rx line-start (regexp something-else))
82;; (rx-to-string '(and line-start ,something-else)), dynamically.
83;; 84;;
84;; (regexp-opt '(STRING1 STRING2 ...)) 85;; (regexp-opt '(STRING1 STRING2 ...))
85;; (rx (or STRING1 STRING2 ...)), or in other words, `or' automatically 86;; (rx (or STRING1 STRING2 ...)), or in other words, `or' automatically
86;; calls `regexp-opt' as needed. 87;; calls `regexp-opt' as needed.
87;; 88;;
88;; "^;;\\s-*\n\\|^\n" 89;; "^;;\\s-*\n\\|^\n"
89;; (rx (or (and line-start ";;" (0+ space) ?\n) 90;; (rx (or (seq line-start ";;" (0+ space) ?\n)
90;; (and line-start ?\n))) 91;; (seq line-start ?\n)))
91;; 92;;
92;; "\\$[I]d: [^ ]+ \\([^ ]+\\) " 93;; "\\$[I]d: [^ ]+ \\([^ ]+\\) "
93;; (rx (and "$Id: " 94;; (rx "$Id: "
94;; (1+ (not (in " "))) 95;; (1+ (not (in " ")))
95;; " " 96;; " "
96;; (submatch (1+ (not (in " ")))) 97;; (submatch (1+ (not (in " "))))
97;; " ")) 98;; " ")
98;; 99;;
99;; "\\\\\\\\\\[\\w+" 100;; "\\\\\\\\\\[\\w+"
100;; (rx (and ?\\ ?\\ ?\[ (1+ word))) 101;; (rx "\\\\[" (1+ word))
101;; 102;;
102;; etc. 103;; etc.
103 104
@@ -176,6 +177,7 @@
176 (not-syntax . (rx-not-syntax 1 1)) ; sregex 177 (not-syntax . (rx-not-syntax 1 1)) ; sregex
177 (category . (rx-category 1 1 rx-check-category)) 178 (category . (rx-category 1 1 rx-check-category))
178 (eval . (rx-eval 1 1)) 179 (eval . (rx-eval 1 1))
180 (literal . (rx-literal 1 1 stringp))
179 (regexp . (rx-regexp 1 1 stringp)) 181 (regexp . (rx-regexp 1 1 stringp))
180 (regex . regexp) ; sregex 182 (regex . regexp) ; sregex
181 (digit . "[[:digit:]]") 183 (digit . "[[:digit:]]")
@@ -302,6 +304,10 @@ regular expression strings.")
302 "Non-nil means produce greedy regular expressions for `zero-or-one', 304 "Non-nil means produce greedy regular expressions for `zero-or-one',
303`zero-or-more', and `one-or-more'. Dynamically bound.") 305`zero-or-more', and `one-or-more'. Dynamically bound.")
304 306
307(defvar rx--compile-to-lisp nil
308 "Nil means return a regexp as a string.
309Non-nil means we may return a lisp form which produces a
310string (used for `rx' macro).")
305 311
306(defun rx-info (op head) 312(defun rx-info (op head)
307 "Return parsing/code generation info for OP. 313 "Return parsing/code generation info for OP.
@@ -344,7 +350,7 @@ a standalone symbol."
344 (> nargs max-args)) 350 (> nargs max-args))
345 (error "rx form `%s' accepts at most %d args" 351 (error "rx form `%s' accepts at most %d args"
346 (car form) max-args)) 352 (car form) max-args))
347 (when (not (null type-pred)) 353 (when type-pred
348 (dolist (sub-form (cdr form)) 354 (dolist (sub-form (cdr form))
349 (unless (funcall type-pred sub-form) 355 (unless (funcall type-pred sub-form)
350 (error "rx form `%s' requires args satisfying `%s'" 356 (error "rx form `%s' requires args satisfying `%s'"
@@ -360,8 +366,9 @@ is non-nil."
360 ;; for concatenation 366 ;; for concatenation
361 ((eq group ':) 367 ((eq group ':)
362 (if (rx-atomic-p 368 (if (rx-atomic-p
363 (if (string-match 369 (if (and (stringp regexp)
364 "\\(?:[?*+]\\??\\|\\\\{[0-9]*,?[0-9]*\\\\}\\)\\'" regexp) 370 (string-match
371 "\\(?:[?*+]\\??\\|\\\\{[0-9]*,?[0-9]*\\\\}\\)\\'" regexp))
365 (substring regexp 0 (match-beginning 0)) 372 (substring regexp 0 (match-beginning 0))
366 regexp)) 373 regexp))
367 (setq group nil))) 374 (setq group nil)))
@@ -370,9 +377,10 @@ is non-nil."
370 ;; do anyway 377 ;; do anyway
371 ((eq group t)) 378 ((eq group t))
372 ((rx-atomic-p regexp t) (setq group nil))) 379 ((rx-atomic-p regexp t) (setq group nil)))
373 (if group 380 (cond ((and group (stringp regexp))
374 (concat "\\(?:" regexp "\\)") 381 (concat "\\(?:" regexp "\\)"))
375 regexp)) 382 (group `("\\(?:" ,@regexp "\\)"))
383 (t regexp)))
376 384
377 385
378(defvar rx-parent) 386(defvar rx-parent)
@@ -384,7 +392,7 @@ is non-nil."
384FORM is of the form `(and FORM1 ...)'." 392FORM is of the form `(and FORM1 ...)'."
385 (rx-check form) 393 (rx-check form)
386 (rx-group-if 394 (rx-group-if
387 (mapconcat (lambda (x) (rx-form x ':)) (cdr form) nil) 395 (rx--subforms (cdr form) ':)
388 (and (memq rx-parent '(* t)) rx-parent))) 396 (and (memq rx-parent '(* t)) rx-parent)))
389 397
390 398
@@ -396,7 +404,7 @@ FORM is of the form `(and FORM1 ...)'."
396 ((null (cdr form)) regexp-unmatchable) 404 ((null (cdr form)) regexp-unmatchable)
397 ((cl-every #'stringp (cdr form)) 405 ((cl-every #'stringp (cdr form))
398 (regexp-opt (cdr form) nil t)) 406 (regexp-opt (cdr form) nil t))
399 (t (mapconcat (lambda (x) (rx-form x '|)) (cdr form) "\\|"))) 407 (t (rx--subforms (cdr form) '| "\\|")))
400 (and (memq rx-parent '(: * t)) rx-parent))) 408 (and (memq rx-parent '(: * t)) rx-parent)))
401 409
402 410
@@ -669,7 +677,10 @@ If SKIP is non-nil, allow that number of items after the head, i.e.
669 (unless (and (integerp (nth 1 form)) 677 (unless (and (integerp (nth 1 form))
670 (> (nth 1 form) 0)) 678 (> (nth 1 form) 0))
671 (error "rx `=' requires positive integer first arg")) 679 (error "rx `=' requires positive integer first arg"))
672 (format "%s\\{%d\\}" (rx-form (nth 2 form) '*) (nth 1 form))) 680 (let ((subform (rx-form (nth 2 form) '*)))
681 (if (stringp subform)
682 (format "%s\\{%d\\}" subform (nth 1 form))
683 `(,@subform ,(format "\\{%d\\}" (nth 1 form))))))
673 684
674 685
675(defun rx->= (form) 686(defun rx->= (form)
@@ -679,7 +690,10 @@ If SKIP is non-nil, allow that number of items after the head, i.e.
679 (unless (and (integerp (nth 1 form)) 690 (unless (and (integerp (nth 1 form))
680 (> (nth 1 form) 0)) 691 (> (nth 1 form) 0))
681 (error "rx `>=' requires positive integer first arg")) 692 (error "rx `>=' requires positive integer first arg"))
682 (format "%s\\{%d,\\}" (rx-form (nth 2 form) '*) (nth 1 form))) 693 (let ((subform (rx-form (nth 2 form) '*)))
694 (if (stringp subform)
695 (format "%s\\{%d,\\}" subform (nth 1 form))
696 `(,@subform ,(format "\\{%d,\\}" (nth 1 form))))))
683 697
684 698
685(defun rx-** (form) 699(defun rx-** (form)
@@ -700,7 +714,10 @@ FORM is either `(repeat N FORM1)' or `(repeat N M FORMS...)'."
700 (unless (and (integerp (nth 1 form)) 714 (unless (and (integerp (nth 1 form))
701 (> (nth 1 form) 0)) 715 (> (nth 1 form) 0))
702 (error "rx `repeat' requires positive integer first arg")) 716 (error "rx `repeat' requires positive integer first arg"))
703 (format "%s\\{%d\\}" (rx-form (nth 2 form) '*) (nth 1 form))) 717 (let ((subform (rx-form (nth 2 form) '*)))
718 (if (stringp subform)
719 (format "%s\\{%d\\}" subform (nth 1 form))
720 `(,@subform ,(format "\\{%d\\}" (nth 1 form))))))
704 ((or (not (integerp (nth 2 form))) 721 ((or (not (integerp (nth 2 form)))
705 (< (nth 2 form) 0) 722 (< (nth 2 form) 0)
706 (not (integerp (nth 1 form))) 723 (not (integerp (nth 1 form)))
@@ -708,32 +725,28 @@ FORM is either `(repeat N FORM1)' or `(repeat N M FORMS...)'."
708 (< (nth 2 form) (nth 1 form))) 725 (< (nth 2 form) (nth 1 form)))
709 (error "rx `repeat' range error")) 726 (error "rx `repeat' range error"))
710 (t 727 (t
711 (format "%s\\{%d,%d\\}" (rx-form (nth 3 form) '*) 728 (let ((subform (rx-form (nth 3 form) '*)))
712 (nth 1 form) (nth 2 form))))) 729 (if (stringp subform)
730 (format "%s\\{%d,%d\\}" subform (nth 1 form) (nth 2 form))
731 `(,@subform ,(format "\\{%d,%d\\}" (nth 1 form) (nth 2 form))))))))
713 732
714 733
715(defun rx-submatch (form) 734(defun rx-submatch (form)
716 "Parse and produce code from FORM, which is `(submatch ...)'." 735 "Parse and produce code from FORM, which is `(submatch ...)'."
717 (concat "\\(" 736 (let ((subforms (rx--subforms (cdr form) ':)))
718 (if (= 2 (length form)) 737 (if (stringp subforms)
719 ;; Only one sub-form. 738 (concat "\\(" subforms "\\)")
720 (rx-form (cadr form)) 739 `("\\(" ,@subforms "\\)"))))
721 ;; Several sub-forms implicitly concatenated.
722 (mapconcat (lambda (re) (rx-form re ':)) (cdr form) nil))
723 "\\)"))
724 740
725(defun rx-submatch-n (form) 741(defun rx-submatch-n (form)
726 "Parse and produce code from FORM, which is `(submatch-n N ...)'." 742 "Parse and produce code from FORM, which is `(submatch-n N ...)'."
727 (let ((n (nth 1 form))) 743 (let ((n (nth 1 form))
744 (subforms (rx--subforms (cddr form) ':)))
728 (unless (and (integerp n) (> n 0)) 745 (unless (and (integerp n) (> n 0))
729 (error "rx `submatch-n' argument must be positive")) 746 (error "rx `submatch-n' argument must be positive"))
730 (concat "\\(?" (number-to-string n) ":" 747 (if (stringp subforms)
731 (if (= 3 (length form)) 748 (concat "\\(?" (number-to-string n) ":" subforms "\\)")
732 ;; Only one sub-form. 749 `("\\(?" ,(number-to-string n) ":" ,@subforms "\\)"))))
733 (rx-form (nth 2 form))
734 ;; Several sub-forms implicitly concatenated.
735 (mapconcat (lambda (re) (rx-form re ':)) (cddr form) nil))
736 "\\)")))
737 750
738(defun rx-backref (form) 751(defun rx-backref (form)
739 "Parse and produce code from FORM, which is `(backref N)'." 752 "Parse and produce code from FORM, which is `(backref N)'."
@@ -761,9 +774,12 @@ is non-nil."
761 (t "?"))) 774 (t "?")))
762 (op (cond ((memq (car form) '(* *? 0+ zero-or-more)) "*") 775 (op (cond ((memq (car form) '(* *? 0+ zero-or-more)) "*")
763 ((memq (car form) '(+ +? 1+ one-or-more)) "+") 776 ((memq (car form) '(+ +? 1+ one-or-more)) "+")
764 (t "?")))) 777 (t "?")))
778 (subform (rx-form (cadr form) '*)))
765 (rx-group-if 779 (rx-group-if
766 (concat (rx-form (cadr form) '*) op suffix) 780 (if (stringp subform)
781 (concat subform op suffix)
782 `(,@subform ,(concat op suffix)))
767 (and (memq rx-parent '(t *)) rx-parent)))) 783 (and (memq rx-parent '(t *)) rx-parent))))
768 784
769 785
@@ -791,15 +807,18 @@ regexps that are atomic but end in operators, such as
791be detected without much effort. A guarantee of no false 807be detected without much effort. A guarantee of no false
792negatives would require a theoretic specification of the set 808negatives would require a theoretic specification of the set
793of all atomic regexps." 809of all atomic regexps."
794 (let ((l (length r))) 810 (if (and rx--compile-to-lisp
795 (cond 811 (not (stringp r)))
796 ((<= l 1)) 812 nil ;; Runtime value, we must assume non-atomic.
797 ((= l 2) (= (aref r 0) ?\\)) 813 (let ((l (length r)))
798 ((= l 3) (string-match "\\`\\(?:\\\\[cCsS_]\\|\\[[^^]\\]\\)" r))
799 ((null lax)
800 (cond 814 (cond
801 ((string-match "\\`\\[\\^?]?\\(?:\\[:[a-z]+:]\\|[^]]\\)*]\\'" r)) 815 ((<= l 1))
802 ((string-match "\\`\\\\(\\(?:[^\\]\\|\\\\[^)]\\)*\\\\)\\'" r))))))) 816 ((= l 2) (= (aref r 0) ?\\))
817 ((= l 3) (string-match "\\`\\(?:\\\\[cCsS_]\\|\\[[^^]\\]\\)" r))
818 ((null lax)
819 (cond
820 ((string-match "\\`\\[\\^?]?\\(?:\\[:[a-z]+:]\\|[^]]\\)*]\\'" r))
821 ((string-match "\\`\\\\(\\(?:[^\\]\\|\\\\[^)]\\)*\\\\)\\'" r))))))))
803 822
804 823
805(defun rx-syntax (form) 824(defun rx-syntax (form)
@@ -855,9 +874,23 @@ If FORM is `(minimal-match FORM1)', non-greedy versions of `*',
855 874
856(defun rx-regexp (form) 875(defun rx-regexp (form)
857 "Parse and produce code from FORM, which is `(regexp STRING)'." 876 "Parse and produce code from FORM, which is `(regexp STRING)'."
858 (rx-check form) 877 (cond ((stringp form)
859 (rx-group-if (cadr form) rx-parent)) 878 (rx-group-if (cadr form) rx-parent))
860 879 (rx--compile-to-lisp
880 ;; Always group non-string forms, since we can't be sure they
881 ;; are atomic.
882 (rx-group-if (cdr form) t))
883 (t (rx-check form))))
884
885(defun rx-literal (form)
886 "Parse and produce code from FORM, which is `(literal STRING-EXP)'."
887 (cond ((stringp form)
888 ;; This is allowed, but makes little sense, you could just
889 ;; use STRING directly.
890 (rx-group-if (regexp-quote (cadr form)) rx-parent))
891 (rx--compile-to-lisp
892 (rx-group-if `((regexp-quote ,(cadr form))) rx-parent))
893 (t (rx-check form))))
861 894
862(defun rx-form (form &optional parent) 895(defun rx-form (form &optional parent)
863 "Parse and produce code for regular expression FORM. 896 "Parse and produce code for regular expression FORM.
@@ -888,12 +921,38 @@ shy groups around the result and some more in other functions."
888 (t 921 (t
889 (error "rx syntax error at `%s'" form))))) 922 (error "rx syntax error at `%s'" form)))))
890 923
924(defun rx--subforms (subforms &optional parent separator)
925 "Produce code for regular expressions SUBFORMS.
926SUBFORMS is a list of regular expression sexps.
927PARENT controls grouping, as in `rx-form'.
928Insert SEPARATOR between the code from each of SUBFORMS."
929 (if (null (cdr subforms))
930 ;; Zero or one forms, no need for grouping.
931 (and subforms (rx-form (car subforms)))
932 (let ((listify (lambda (x)
933 (if (listp x) (copy-sequence x)
934 (list x)))))
935 (setq subforms (mapcar (lambda (x) (rx-form x parent)) subforms))
936 (cond ((or (not rx--compile-to-lisp)
937 (cl-every #'stringp subforms))
938 (mapconcat #'identity subforms separator))
939 (separator
940 (nconc (funcall listify (car subforms))
941 (mapcan (lambda (x)
942 (cons separator (funcall listify x)))
943 (cdr subforms))))
944 (t (mapcan listify subforms))))))
945
891 946
892;;;###autoload 947;;;###autoload
893(defun rx-to-string (form &optional no-group) 948(defun rx-to-string (form &optional no-group)
894 "Parse and produce code for regular expression FORM. 949 "Parse and produce code for regular expression FORM.
895FORM is a regular expression in sexp form. 950FORM is a regular expression in sexp form.
896NO-GROUP non-nil means don't put shy groups around the result." 951NO-GROUP non-nil means don't put shy groups around the result.
952
953In contrast to the `rx' macro, subforms `literal' and `regexp'
954will not accept non-string arguments, i.e., (literal STRING)
955becomes just a more verbose version of STRING."
897 (rx-group-if (rx-form form) (null no-group))) 956 (rx-group-if (rx-form form) (null no-group)))
898 957
899 958
@@ -903,8 +962,12 @@ NO-GROUP non-nil means don't put shy groups around the result."
903REGEXPS is a non-empty sequence of forms of the sort listed below. 962REGEXPS is a non-empty sequence of forms of the sort listed below.
904 963
905Note that `rx' is a Lisp macro; when used in a Lisp program being 964Note that `rx' is a Lisp macro; when used in a Lisp program being
906compiled, the translation is performed by the compiler. 965compiled, the translation is performed by the compiler. The
907See `rx-to-string' for how to do such a translation at run-time. 966`literal' and `regexp' forms accept subforms that will evaluate
967to strings, in addition to constant strings. If REGEXPS include
968such forms, then the result is an expression which returns a
969regexp string, rather than a regexp string directly. See
970`rx-to-string' for performing translation completely at run time.
908 971
909The following are valid subforms of regular expressions in sexp 972The following are valid subforms of regular expressions in sexp
910notation. 973notation.
@@ -1204,18 +1267,29 @@ enclosed in `(and ...)'.
1204`(backref N)' 1267`(backref N)'
1205 matches what was matched previously by submatch N. 1268 matches what was matched previously by submatch N.
1206 1269
1270`(literal STRING-EXPR)'
1271 matches STRING-EXPR literally, where STRING-EXPR is any lisp
1272 expression that evaluates to a string.
1273
1274`(regexp REGEXP-EXPR)'
1275 include REGEXP-EXPR in string notation in the result, where
1276 REGEXP-EXPR is any lisp expression that evaluates to a
1277 string containing a valid regexp.
1278
1207`(eval FORM)' 1279`(eval FORM)'
1208 evaluate FORM and insert result. If result is a string, 1280 evaluate FORM and insert result. If result is a string,
1209 `regexp-quote' it. 1281 `regexp-quote' it. Note that FORM is evaluated during
1210 1282 macroexpansion."
1211`(regexp REGEXP)' 1283 (let* ((rx--compile-to-lisp t)
1212 include REGEXP in string notation in the result." 1284 (re (cond ((null regexps)
1213 (cond ((null regexps) 1285 (error "No regexp"))
1214 (error "No regexp")) 1286 ((cdr regexps)
1215 ((cdr regexps) 1287 (rx-to-string `(and ,@regexps) t))
1216 (rx-to-string `(and ,@regexps) t)) 1288 (t
1217 (t 1289 (rx-to-string (car regexps) t)))))
1218 (rx-to-string (car regexps) t)))) 1290 (if (stringp re)
1291 re
1292 `(concat ,@re))))
1219 1293
1220 1294
1221(pcase-defmacro rx (&rest regexps) 1295(pcase-defmacro rx (&rest regexps)
@@ -1277,14 +1351,6 @@ string as argument to `match-string'."
1277 for var in vars 1351 for var in vars
1278 collect `(app (match-string ,i) ,var))))) 1352 collect `(app (match-string ,i) ,var)))))
1279 1353
1280;; ;; sregex.el replacement
1281
1282;; ;;;###autoload (provide 'sregex)
1283;; ;;;###autoload (autoload 'sregex "rx")
1284;; (defalias 'sregex 'rx-to-string)
1285;; ;;;###autoload (autoload 'sregexq "rx" nil nil 'macro)
1286;; (defalias 'sregexq 'rx)
1287
1288(provide 'rx) 1354(provide 'rx)
1289 1355
1290;;; rx.el ends here 1356;;; rx.el ends here
diff --git a/test/lisp/emacs-lisp/rx-tests.el b/test/lisp/emacs-lisp/rx-tests.el
index 6f392d616d1..bab71b522bb 100644
--- a/test/lisp/emacs-lisp/rx-tests.el
+++ b/test/lisp/emacs-lisp/rx-tests.el
@@ -115,5 +115,46 @@
115 ;; Test zero-argument `seq'. 115 ;; Test zero-argument `seq'.
116 (should (equal (rx (seq)) ""))) 116 (should (equal (rx (seq)) "")))
117 117
118(defmacro rx-tests--match (regexp string &optional match)
119 (macroexp-let2 nil strexp string
120 `(ert-info ((format "Matching %S to %S" ',regexp ,strexp))
121 (should (string-match ,regexp ,strexp))
122 ,@(when match
123 `((should (equal (match-string 0 ,strexp) ,match)))))))
124
125(ert-deftest rx-nonstring-expr ()
126 (let ((bee "b")
127 (vowel "[aeiou]"))
128 (rx-tests--match (rx "a" (literal bee) "c") "abc")
129 (rx-tests--match (rx "a" (regexp bee) "c") "abc")
130 (rx-tests--match (rx "a" (or (regexp bee) "xy") "c") "abc")
131 (rx-tests--match (rx "a" (or "xy" (regexp bee)) "c") "abc")
132 (should-not (string-match (rx (or (regexp bee) "xy")) ""))
133 (rx-tests--match (rx "a" (= 3 (regexp bee)) "c") "abbbc")
134 (rx-tests--match (rx "x" (= 3 (regexp vowel)) "z") "xeoez")
135 (should-not (string-match (rx "x" (= 3 (regexp vowel)) "z") "xe[]z"))
136 (rx-tests--match (rx "x" (= 3 (literal vowel)) "z")
137 "x[aeiou][aeiou][aeiou]z")
138 (rx-tests--match (rx "x" (repeat 1 (regexp vowel)) "z") "xaz")
139 (rx-tests--match (rx "x" (repeat 1 2 (regexp vowel)) "z") "xaz")
140 (rx-tests--match (rx "x" (repeat 1 2 (regexp vowel)) "z") "xauz")
141 (rx-tests--match (rx "x" (>= 1 (regexp vowel)) "z") "xaiiz")
142 (rx-tests--match (rx "x" (** 1 2 (regexp vowel)) "z") "xaiz")
143 (rx-tests--match (rx "x" (group (regexp vowel)) "z") "xaz")
144 (rx-tests--match (rx "x" (group-n 1 (regexp vowel)) "z") "xaz")
145 (rx-tests--match (rx "x" (? (regexp vowel)) "z") "xz")))
146
147(ert-deftest rx-nonstring-expr-non-greedy ()
148 "`rx's greediness can't affect runtime regexp parts."
149 (let ((ad-min "[ad]*?")
150 (ad-max "[ad]*")
151 (ad "[ad]"))
152 (rx-tests--match (rx "c" (regexp ad-min) "a") "cdaaada" "cda")
153 (rx-tests--match (rx "c" (regexp ad-max) "a") "cdaaada" "cdaaada")
154 (rx-tests--match (rx "c" (minimal-match (regexp ad-max)) "a") "cdaaada" "cdaaada")
155 (rx-tests--match (rx "c" (maximal-match (regexp ad-min)) "a") "cdaaada" "cda")
156 (rx-tests--match (rx "c" (minimal-match (0+ (regexp ad))) "a") "cdaaada" "cda")
157 (rx-tests--match (rx "c" (maximal-match (0+ (regexp ad))) "a") "cdaaada" "cdaaada")))
158
118(provide 'rx-tests) 159(provide 'rx-tests)
119;; rx-tests.el ends here. 160;; rx-tests.el ends here.