diff options
| author | Noam Postavsky | 2019-06-14 08:43:17 -0400 |
|---|---|---|
| committer | Noam Postavsky | 2019-06-25 22:00:03 -0400 |
| commit | b59ffd2290ff744ca4e7cc2748ba6b66fb2f99f1 (patch) | |
| tree | 650ab12b77ba2cf9918ebc9bce586ce22ab7d52a | |
| parent | 29babad7286bff235407e883a4ff61bae49a2e5e (diff) | |
| download | emacs-b59ffd2290ff744ca4e7cc2748ba6b66fb2f99f1.tar.gz emacs-b59ffd2290ff744ca4e7cc2748ba6b66fb2f99f1.zip | |
Support (rx (and (regexp EXPR) (literal EXPR))) (Bug#36237)
* lisp/emacs-lisp/rx.el (rx-regexp): Allow non-string forms.
(rx-constituents): Add literal constituent, which is like a plain
STRING form, but allows arbitrary lisp expressions.
(rx-literal): New function.
(rx-compile-to-lisp): New variable.
(rx--subforms): New helper function for handling subforms, including
non-constant case.
(rx-group-if, rx-and, rx-or, rx-=, rx->=, rx-repeat, rx-submatch)
(rx-submatch-n, rx-kleene, rx-atomic-p): Use it to handle non-constant
subforms.
(rx): Document new form, wrap non-constant forms with concat call.
* test/lisp/emacs-lisp/rx-tests.el (rx-tests--match): New macro.
(rx-nonstring-expr, rx-nonstring-expr-non-greedy): New tests.
* etc/NEWS: Announce changes.
| -rw-r--r-- | etc/NEWS | 6 | ||||
| -rw-r--r-- | lisp/emacs-lisp/rx.el | 242 | ||||
| -rw-r--r-- | test/lisp/emacs-lisp/rx-tests.el | 41 |
3 files changed, 201 insertions, 88 deletions
| @@ -1441,12 +1441,18 @@ when given in a string. Previously, '(any "\x80-\xff")' would match | |||
| 1441 | characters U+0080...U+00FF. Now the expression matches raw bytes in | 1441 | characters U+0080...U+00FF. Now the expression matches raw bytes in |
| 1442 | the 128...255 range, as expected. | 1442 | the 128...255 range, as expected. |
| 1443 | 1443 | ||
| 1444 | --- | ||
| 1444 | *** The rx 'or' and 'seq' forms no longer require any arguments. | 1445 | *** The rx 'or' and 'seq' forms no longer require any arguments. |
| 1445 | (or) produces a regexp that never matches anything, while (seq) | 1446 | (or) produces a regexp that never matches anything, while (seq) |
| 1446 | matches the empty string, each being an identity for the operation. | 1447 | matches the empty string, each being an identity for the operation. |
| 1447 | This also works for their aliases: '|' for 'or'; ':', 'and' and | 1448 | This also works for their aliases: '|' for 'or'; ':', 'and' and |
| 1448 | 'sequence' for 'seq'. | 1449 | 'sequence' for 'seq'. |
| 1449 | 1450 | ||
| 1451 | --- | ||
| 1452 | *** 'regexp' and new 'literal' accept arbitrary lisp as arguments. | ||
| 1453 | In this case, 'rx' will generate code which produces a regexp string | ||
| 1454 | at run time, instead of a constant string. | ||
| 1455 | |||
| 1450 | ** Frames | 1456 | ** Frames |
| 1451 | 1457 | ||
| 1452 | +++ | 1458 | +++ |
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el index 2130e3e1aea..1b5afe73b45 100644 --- a/lisp/emacs-lisp/rx.el +++ b/lisp/emacs-lisp/rx.el | |||
| @@ -47,57 +47,58 @@ | |||
| 47 | 47 | ||
| 48 | ;; Rx translates a sexp notation for regular expressions into the | 48 | ;; Rx translates a sexp notation for regular expressions into the |
| 49 | ;; usual string notation. The translation can be done at compile-time | 49 | ;; usual string notation. The translation can be done at compile-time |
| 50 | ;; by using the `rx' macro. It can be done at run-time by calling | 50 | ;; by using the `rx' macro. The `regexp' and `literal' forms accept |
| 51 | ;; function `rx-to-string'. See the documentation of `rx' for a | 51 | ;; non-constant expressions, in which case `rx' will translate to a |
| 52 | ;; complete description of the sexp notation. | 52 | ;; `concat' expression. Translation can be done fully at run time by |
| 53 | ;; calling function `rx-to-string'. See the documentation of `rx' for | ||
| 54 | ;; a complete description of the sexp notation. | ||
| 53 | ;; | 55 | ;; |
| 54 | ;; Some examples of string regexps and their sexp counterparts: | 56 | ;; Some examples of string regexps and their sexp counterparts: |
| 55 | ;; | 57 | ;; |
| 56 | ;; "^[a-z]*" | 58 | ;; "^[a-z]*" |
| 57 | ;; (rx (and line-start (0+ (in "a-z")))) | 59 | ;; (rx line-start (0+ (in "a-z"))) |
| 58 | ;; | 60 | ;; |
| 59 | ;; "\n[^ \t]" | 61 | ;; "\n[^ \t]" |
| 60 | ;; (rx (and "\n" (not (any " \t")))) | 62 | ;; (rx ?\n (not (in " \t"))) |
| 61 | ;; | 63 | ;; |
| 62 | ;; "\\*\\*\\* EOOH \\*\\*\\*\n" | 64 | ;; "\\*\\*\\* EOOH \\*\\*\\*\n" |
| 63 | ;; (rx "*** EOOH ***\n") | 65 | ;; (rx "*** EOOH ***\n") |
| 64 | ;; | 66 | ;; |
| 65 | ;; "\\<\\(catch\\|finally\\)\\>[^_]" | 67 | ;; "\\<\\(catch\\|finally\\)\\>[^_]" |
| 66 | ;; (rx (and word-start (submatch (or "catch" "finally")) word-end | 68 | ;; (rx word-start (submatch (or "catch" "finally")) word-end |
| 67 | ;; (not (any ?_)))) | 69 | ;; (not (in ?_))) |
| 68 | ;; | 70 | ;; |
| 69 | ;; "[ \t\n]*:\\([^:]+\\|$\\)" | 71 | ;; "[ \t\n]*:\\($\\|[^:]+\\)" |
| 70 | ;; (rx (and (zero-or-more (in " \t\n")) ":" | 72 | ;; (rx (* (in " \t\n")) ":" |
| 71 | ;; (submatch (or line-end (one-or-more (not (any ?:))))))) | 73 | ;; (submatch (or line-end (+ (not (in ?:)))))) |
| 72 | ;; | 74 | ;; |
| 73 | ;; "^content-transfer-encoding:\\(\n?[\t ]\\)*quoted-printable\\(\n?[\t ]\\)*" | 75 | ;; "^content-transfer-encoding:\\(?:\n?[\t ]\\)*quoted-printable\\(?:\n?[\t ]\\)*" |
| 74 | ;; (rx (and line-start | 76 | ;; (rx line-start |
| 75 | ;; "content-transfer-encoding:" | 77 | ;; "content-transfer-encoding:" |
| 76 | ;; (+ (? ?\n)) (any " \t") | 78 | ;; (* (? ?\n) (in " \t")) |
| 77 | ;; "quoted-printable" | 79 | ;; "quoted-printable" |
| 78 | ;; (+ (? ?\n)) (any " \t")) | 80 | ;; (* (? ?\n) (in " \t"))) |
| 79 | ;; | 81 | ;; |
| 80 | ;; (concat "^\\(?:" something-else "\\)") | 82 | ;; (concat "^\\(?:" something-else "\\)") |
| 81 | ;; (rx (and line-start (eval something-else))), statically or | 83 | ;; (rx line-start (regexp something-else)) |
| 82 | ;; (rx-to-string '(and line-start ,something-else)), dynamically. | ||
| 83 | ;; | 84 | ;; |
| 84 | ;; (regexp-opt '(STRING1 STRING2 ...)) | 85 | ;; (regexp-opt '(STRING1 STRING2 ...)) |
| 85 | ;; (rx (or STRING1 STRING2 ...)), or in other words, `or' automatically | 86 | ;; (rx (or STRING1 STRING2 ...)), or in other words, `or' automatically |
| 86 | ;; calls `regexp-opt' as needed. | 87 | ;; calls `regexp-opt' as needed. |
| 87 | ;; | 88 | ;; |
| 88 | ;; "^;;\\s-*\n\\|^\n" | 89 | ;; "^;;\\s-*\n\\|^\n" |
| 89 | ;; (rx (or (and line-start ";;" (0+ space) ?\n) | 90 | ;; (rx (or (seq line-start ";;" (0+ space) ?\n) |
| 90 | ;; (and line-start ?\n))) | 91 | ;; (seq line-start ?\n))) |
| 91 | ;; | 92 | ;; |
| 92 | ;; "\\$[I]d: [^ ]+ \\([^ ]+\\) " | 93 | ;; "\\$[I]d: [^ ]+ \\([^ ]+\\) " |
| 93 | ;; (rx (and "$Id: " | 94 | ;; (rx "$Id: " |
| 94 | ;; (1+ (not (in " "))) | 95 | ;; (1+ (not (in " "))) |
| 95 | ;; " " | 96 | ;; " " |
| 96 | ;; (submatch (1+ (not (in " ")))) | 97 | ;; (submatch (1+ (not (in " ")))) |
| 97 | ;; " ")) | 98 | ;; " ") |
| 98 | ;; | 99 | ;; |
| 99 | ;; "\\\\\\\\\\[\\w+" | 100 | ;; "\\\\\\\\\\[\\w+" |
| 100 | ;; (rx (and ?\\ ?\\ ?\[ (1+ word))) | 101 | ;; (rx "\\\\[" (1+ word)) |
| 101 | ;; | 102 | ;; |
| 102 | ;; etc. | 103 | ;; etc. |
| 103 | 104 | ||
| @@ -176,6 +177,7 @@ | |||
| 176 | (not-syntax . (rx-not-syntax 1 1)) ; sregex | 177 | (not-syntax . (rx-not-syntax 1 1)) ; sregex |
| 177 | (category . (rx-category 1 1 rx-check-category)) | 178 | (category . (rx-category 1 1 rx-check-category)) |
| 178 | (eval . (rx-eval 1 1)) | 179 | (eval . (rx-eval 1 1)) |
| 180 | (literal . (rx-literal 1 1 stringp)) | ||
| 179 | (regexp . (rx-regexp 1 1 stringp)) | 181 | (regexp . (rx-regexp 1 1 stringp)) |
| 180 | (regex . regexp) ; sregex | 182 | (regex . regexp) ; sregex |
| 181 | (digit . "[[:digit:]]") | 183 | (digit . "[[:digit:]]") |
| @@ -302,6 +304,10 @@ regular expression strings.") | |||
| 302 | "Non-nil means produce greedy regular expressions for `zero-or-one', | 304 | "Non-nil means produce greedy regular expressions for `zero-or-one', |
| 303 | `zero-or-more', and `one-or-more'. Dynamically bound.") | 305 | `zero-or-more', and `one-or-more'. Dynamically bound.") |
| 304 | 306 | ||
| 307 | (defvar rx--compile-to-lisp nil | ||
| 308 | "Nil means return a regexp as a string. | ||
| 309 | Non-nil means we may return a lisp form which produces a | ||
| 310 | string (used for `rx' macro).") | ||
| 305 | 311 | ||
| 306 | (defun rx-info (op head) | 312 | (defun rx-info (op head) |
| 307 | "Return parsing/code generation info for OP. | 313 | "Return parsing/code generation info for OP. |
| @@ -344,7 +350,7 @@ a standalone symbol." | |||
| 344 | (> nargs max-args)) | 350 | (> nargs max-args)) |
| 345 | (error "rx form `%s' accepts at most %d args" | 351 | (error "rx form `%s' accepts at most %d args" |
| 346 | (car form) max-args)) | 352 | (car form) max-args)) |
| 347 | (when (not (null type-pred)) | 353 | (when type-pred |
| 348 | (dolist (sub-form (cdr form)) | 354 | (dolist (sub-form (cdr form)) |
| 349 | (unless (funcall type-pred sub-form) | 355 | (unless (funcall type-pred sub-form) |
| 350 | (error "rx form `%s' requires args satisfying `%s'" | 356 | (error "rx form `%s' requires args satisfying `%s'" |
| @@ -360,8 +366,9 @@ is non-nil." | |||
| 360 | ;; for concatenation | 366 | ;; for concatenation |
| 361 | ((eq group ':) | 367 | ((eq group ':) |
| 362 | (if (rx-atomic-p | 368 | (if (rx-atomic-p |
| 363 | (if (string-match | 369 | (if (and (stringp regexp) |
| 364 | "\\(?:[?*+]\\??\\|\\\\{[0-9]*,?[0-9]*\\\\}\\)\\'" regexp) | 370 | (string-match |
| 371 | "\\(?:[?*+]\\??\\|\\\\{[0-9]*,?[0-9]*\\\\}\\)\\'" regexp)) | ||
| 365 | (substring regexp 0 (match-beginning 0)) | 372 | (substring regexp 0 (match-beginning 0)) |
| 366 | regexp)) | 373 | regexp)) |
| 367 | (setq group nil))) | 374 | (setq group nil))) |
| @@ -370,9 +377,10 @@ is non-nil." | |||
| 370 | ;; do anyway | 377 | ;; do anyway |
| 371 | ((eq group t)) | 378 | ((eq group t)) |
| 372 | ((rx-atomic-p regexp t) (setq group nil))) | 379 | ((rx-atomic-p regexp t) (setq group nil))) |
| 373 | (if group | 380 | (cond ((and group (stringp regexp)) |
| 374 | (concat "\\(?:" regexp "\\)") | 381 | (concat "\\(?:" regexp "\\)")) |
| 375 | regexp)) | 382 | (group `("\\(?:" ,@regexp "\\)")) |
| 383 | (t regexp))) | ||
| 376 | 384 | ||
| 377 | 385 | ||
| 378 | (defvar rx-parent) | 386 | (defvar rx-parent) |
| @@ -384,7 +392,7 @@ is non-nil." | |||
| 384 | FORM is of the form `(and FORM1 ...)'." | 392 | FORM is of the form `(and FORM1 ...)'." |
| 385 | (rx-check form) | 393 | (rx-check form) |
| 386 | (rx-group-if | 394 | (rx-group-if |
| 387 | (mapconcat (lambda (x) (rx-form x ':)) (cdr form) nil) | 395 | (rx--subforms (cdr form) ':) |
| 388 | (and (memq rx-parent '(* t)) rx-parent))) | 396 | (and (memq rx-parent '(* t)) rx-parent))) |
| 389 | 397 | ||
| 390 | 398 | ||
| @@ -396,7 +404,7 @@ FORM is of the form `(and FORM1 ...)'." | |||
| 396 | ((null (cdr form)) regexp-unmatchable) | 404 | ((null (cdr form)) regexp-unmatchable) |
| 397 | ((cl-every #'stringp (cdr form)) | 405 | ((cl-every #'stringp (cdr form)) |
| 398 | (regexp-opt (cdr form) nil t)) | 406 | (regexp-opt (cdr form) nil t)) |
| 399 | (t (mapconcat (lambda (x) (rx-form x '|)) (cdr form) "\\|"))) | 407 | (t (rx--subforms (cdr form) '| "\\|"))) |
| 400 | (and (memq rx-parent '(: * t)) rx-parent))) | 408 | (and (memq rx-parent '(: * t)) rx-parent))) |
| 401 | 409 | ||
| 402 | 410 | ||
| @@ -669,7 +677,10 @@ If SKIP is non-nil, allow that number of items after the head, i.e. | |||
| 669 | (unless (and (integerp (nth 1 form)) | 677 | (unless (and (integerp (nth 1 form)) |
| 670 | (> (nth 1 form) 0)) | 678 | (> (nth 1 form) 0)) |
| 671 | (error "rx `=' requires positive integer first arg")) | 679 | (error "rx `=' requires positive integer first arg")) |
| 672 | (format "%s\\{%d\\}" (rx-form (nth 2 form) '*) (nth 1 form))) | 680 | (let ((subform (rx-form (nth 2 form) '*))) |
| 681 | (if (stringp subform) | ||
| 682 | (format "%s\\{%d\\}" subform (nth 1 form)) | ||
| 683 | `(,@subform ,(format "\\{%d\\}" (nth 1 form)))))) | ||
| 673 | 684 | ||
| 674 | 685 | ||
| 675 | (defun rx->= (form) | 686 | (defun rx->= (form) |
| @@ -679,7 +690,10 @@ If SKIP is non-nil, allow that number of items after the head, i.e. | |||
| 679 | (unless (and (integerp (nth 1 form)) | 690 | (unless (and (integerp (nth 1 form)) |
| 680 | (> (nth 1 form) 0)) | 691 | (> (nth 1 form) 0)) |
| 681 | (error "rx `>=' requires positive integer first arg")) | 692 | (error "rx `>=' requires positive integer first arg")) |
| 682 | (format "%s\\{%d,\\}" (rx-form (nth 2 form) '*) (nth 1 form))) | 693 | (let ((subform (rx-form (nth 2 form) '*))) |
| 694 | (if (stringp subform) | ||
| 695 | (format "%s\\{%d,\\}" subform (nth 1 form)) | ||
| 696 | `(,@subform ,(format "\\{%d,\\}" (nth 1 form)))))) | ||
| 683 | 697 | ||
| 684 | 698 | ||
| 685 | (defun rx-** (form) | 699 | (defun rx-** (form) |
| @@ -700,7 +714,10 @@ FORM is either `(repeat N FORM1)' or `(repeat N M FORMS...)'." | |||
| 700 | (unless (and (integerp (nth 1 form)) | 714 | (unless (and (integerp (nth 1 form)) |
| 701 | (> (nth 1 form) 0)) | 715 | (> (nth 1 form) 0)) |
| 702 | (error "rx `repeat' requires positive integer first arg")) | 716 | (error "rx `repeat' requires positive integer first arg")) |
| 703 | (format "%s\\{%d\\}" (rx-form (nth 2 form) '*) (nth 1 form))) | 717 | (let ((subform (rx-form (nth 2 form) '*))) |
| 718 | (if (stringp subform) | ||
| 719 | (format "%s\\{%d\\}" subform (nth 1 form)) | ||
| 720 | `(,@subform ,(format "\\{%d\\}" (nth 1 form)))))) | ||
| 704 | ((or (not (integerp (nth 2 form))) | 721 | ((or (not (integerp (nth 2 form))) |
| 705 | (< (nth 2 form) 0) | 722 | (< (nth 2 form) 0) |
| 706 | (not (integerp (nth 1 form))) | 723 | (not (integerp (nth 1 form))) |
| @@ -708,32 +725,28 @@ FORM is either `(repeat N FORM1)' or `(repeat N M FORMS...)'." | |||
| 708 | (< (nth 2 form) (nth 1 form))) | 725 | (< (nth 2 form) (nth 1 form))) |
| 709 | (error "rx `repeat' range error")) | 726 | (error "rx `repeat' range error")) |
| 710 | (t | 727 | (t |
| 711 | (format "%s\\{%d,%d\\}" (rx-form (nth 3 form) '*) | 728 | (let ((subform (rx-form (nth 3 form) '*))) |
| 712 | (nth 1 form) (nth 2 form))))) | 729 | (if (stringp subform) |
| 730 | (format "%s\\{%d,%d\\}" subform (nth 1 form) (nth 2 form)) | ||
| 731 | `(,@subform ,(format "\\{%d,%d\\}" (nth 1 form) (nth 2 form)))))))) | ||
| 713 | 732 | ||
| 714 | 733 | ||
| 715 | (defun rx-submatch (form) | 734 | (defun rx-submatch (form) |
| 716 | "Parse and produce code from FORM, which is `(submatch ...)'." | 735 | "Parse and produce code from FORM, which is `(submatch ...)'." |
| 717 | (concat "\\(" | 736 | (let ((subforms (rx--subforms (cdr form) ':))) |
| 718 | (if (= 2 (length form)) | 737 | (if (stringp subforms) |
| 719 | ;; Only one sub-form. | 738 | (concat "\\(" subforms "\\)") |
| 720 | (rx-form (cadr form)) | 739 | `("\\(" ,@subforms "\\)")))) |
| 721 | ;; Several sub-forms implicitly concatenated. | ||
| 722 | (mapconcat (lambda (re) (rx-form re ':)) (cdr form) nil)) | ||
| 723 | "\\)")) | ||
| 724 | 740 | ||
| 725 | (defun rx-submatch-n (form) | 741 | (defun rx-submatch-n (form) |
| 726 | "Parse and produce code from FORM, which is `(submatch-n N ...)'." | 742 | "Parse and produce code from FORM, which is `(submatch-n N ...)'." |
| 727 | (let ((n (nth 1 form))) | 743 | (let ((n (nth 1 form)) |
| 744 | (subforms (rx--subforms (cddr form) ':))) | ||
| 728 | (unless (and (integerp n) (> n 0)) | 745 | (unless (and (integerp n) (> n 0)) |
| 729 | (error "rx `submatch-n' argument must be positive")) | 746 | (error "rx `submatch-n' argument must be positive")) |
| 730 | (concat "\\(?" (number-to-string n) ":" | 747 | (if (stringp subforms) |
| 731 | (if (= 3 (length form)) | 748 | (concat "\\(?" (number-to-string n) ":" subforms "\\)") |
| 732 | ;; Only one sub-form. | 749 | `("\\(?" ,(number-to-string n) ":" ,@subforms "\\)")))) |
| 733 | (rx-form (nth 2 form)) | ||
| 734 | ;; Several sub-forms implicitly concatenated. | ||
| 735 | (mapconcat (lambda (re) (rx-form re ':)) (cddr form) nil)) | ||
| 736 | "\\)"))) | ||
| 737 | 750 | ||
| 738 | (defun rx-backref (form) | 751 | (defun rx-backref (form) |
| 739 | "Parse and produce code from FORM, which is `(backref N)'." | 752 | "Parse and produce code from FORM, which is `(backref N)'." |
| @@ -761,9 +774,12 @@ is non-nil." | |||
| 761 | (t "?"))) | 774 | (t "?"))) |
| 762 | (op (cond ((memq (car form) '(* *? 0+ zero-or-more)) "*") | 775 | (op (cond ((memq (car form) '(* *? 0+ zero-or-more)) "*") |
| 763 | ((memq (car form) '(+ +? 1+ one-or-more)) "+") | 776 | ((memq (car form) '(+ +? 1+ one-or-more)) "+") |
| 764 | (t "?")))) | 777 | (t "?"))) |
| 778 | (subform (rx-form (cadr form) '*))) | ||
| 765 | (rx-group-if | 779 | (rx-group-if |
| 766 | (concat (rx-form (cadr form) '*) op suffix) | 780 | (if (stringp subform) |
| 781 | (concat subform op suffix) | ||
| 782 | `(,@subform ,(concat op suffix))) | ||
| 767 | (and (memq rx-parent '(t *)) rx-parent)))) | 783 | (and (memq rx-parent '(t *)) rx-parent)))) |
| 768 | 784 | ||
| 769 | 785 | ||
| @@ -791,15 +807,18 @@ regexps that are atomic but end in operators, such as | |||
| 791 | be detected without much effort. A guarantee of no false | 807 | be detected without much effort. A guarantee of no false |
| 792 | negatives would require a theoretic specification of the set | 808 | negatives would require a theoretic specification of the set |
| 793 | of all atomic regexps." | 809 | of all atomic regexps." |
| 794 | (let ((l (length r))) | 810 | (if (and rx--compile-to-lisp |
| 795 | (cond | 811 | (not (stringp r))) |
| 796 | ((<= l 1)) | 812 | nil ;; Runtime value, we must assume non-atomic. |
| 797 | ((= l 2) (= (aref r 0) ?\\)) | 813 | (let ((l (length r))) |
| 798 | ((= l 3) (string-match "\\`\\(?:\\\\[cCsS_]\\|\\[[^^]\\]\\)" r)) | ||
| 799 | ((null lax) | ||
| 800 | (cond | 814 | (cond |
| 801 | ((string-match "\\`\\[\\^?]?\\(?:\\[:[a-z]+:]\\|[^]]\\)*]\\'" r)) | 815 | ((<= l 1)) |
| 802 | ((string-match "\\`\\\\(\\(?:[^\\]\\|\\\\[^)]\\)*\\\\)\\'" r))))))) | 816 | ((= l 2) (= (aref r 0) ?\\)) |
| 817 | ((= l 3) (string-match "\\`\\(?:\\\\[cCsS_]\\|\\[[^^]\\]\\)" r)) | ||
| 818 | ((null lax) | ||
| 819 | (cond | ||
| 820 | ((string-match "\\`\\[\\^?]?\\(?:\\[:[a-z]+:]\\|[^]]\\)*]\\'" r)) | ||
| 821 | ((string-match "\\`\\\\(\\(?:[^\\]\\|\\\\[^)]\\)*\\\\)\\'" r)))))))) | ||
| 803 | 822 | ||
| 804 | 823 | ||
| 805 | (defun rx-syntax (form) | 824 | (defun rx-syntax (form) |
| @@ -855,9 +874,23 @@ If FORM is `(minimal-match FORM1)', non-greedy versions of `*', | |||
| 855 | 874 | ||
| 856 | (defun rx-regexp (form) | 875 | (defun rx-regexp (form) |
| 857 | "Parse and produce code from FORM, which is `(regexp STRING)'." | 876 | "Parse and produce code from FORM, which is `(regexp STRING)'." |
| 858 | (rx-check form) | 877 | (cond ((stringp form) |
| 859 | (rx-group-if (cadr form) rx-parent)) | 878 | (rx-group-if (cadr form) rx-parent)) |
| 860 | 879 | (rx--compile-to-lisp | |
| 880 | ;; Always group non-string forms, since we can't be sure they | ||
| 881 | ;; are atomic. | ||
| 882 | (rx-group-if (cdr form) t)) | ||
| 883 | (t (rx-check form)))) | ||
| 884 | |||
| 885 | (defun rx-literal (form) | ||
| 886 | "Parse and produce code from FORM, which is `(literal STRING-EXP)'." | ||
| 887 | (cond ((stringp form) | ||
| 888 | ;; This is allowed, but makes little sense, you could just | ||
| 889 | ;; use STRING directly. | ||
| 890 | (rx-group-if (regexp-quote (cadr form)) rx-parent)) | ||
| 891 | (rx--compile-to-lisp | ||
| 892 | (rx-group-if `((regexp-quote ,(cadr form))) rx-parent)) | ||
| 893 | (t (rx-check form)))) | ||
| 861 | 894 | ||
| 862 | (defun rx-form (form &optional parent) | 895 | (defun rx-form (form &optional parent) |
| 863 | "Parse and produce code for regular expression FORM. | 896 | "Parse and produce code for regular expression FORM. |
| @@ -888,12 +921,38 @@ shy groups around the result and some more in other functions." | |||
| 888 | (t | 921 | (t |
| 889 | (error "rx syntax error at `%s'" form))))) | 922 | (error "rx syntax error at `%s'" form))))) |
| 890 | 923 | ||
| 924 | (defun rx--subforms (subforms &optional parent separator) | ||
| 925 | "Produce code for regular expressions SUBFORMS. | ||
| 926 | SUBFORMS is a list of regular expression sexps. | ||
| 927 | PARENT controls grouping, as in `rx-form'. | ||
| 928 | Insert SEPARATOR between the code from each of SUBFORMS." | ||
| 929 | (if (null (cdr subforms)) | ||
| 930 | ;; Zero or one forms, no need for grouping. | ||
| 931 | (and subforms (rx-form (car subforms))) | ||
| 932 | (let ((listify (lambda (x) | ||
| 933 | (if (listp x) (copy-sequence x) | ||
| 934 | (list x))))) | ||
| 935 | (setq subforms (mapcar (lambda (x) (rx-form x parent)) subforms)) | ||
| 936 | (cond ((or (not rx--compile-to-lisp) | ||
| 937 | (cl-every #'stringp subforms)) | ||
| 938 | (mapconcat #'identity subforms separator)) | ||
| 939 | (separator | ||
| 940 | (nconc (funcall listify (car subforms)) | ||
| 941 | (mapcan (lambda (x) | ||
| 942 | (cons separator (funcall listify x))) | ||
| 943 | (cdr subforms)))) | ||
| 944 | (t (mapcan listify subforms)))))) | ||
| 945 | |||
| 891 | 946 | ||
| 892 | ;;;###autoload | 947 | ;;;###autoload |
| 893 | (defun rx-to-string (form &optional no-group) | 948 | (defun rx-to-string (form &optional no-group) |
| 894 | "Parse and produce code for regular expression FORM. | 949 | "Parse and produce code for regular expression FORM. |
| 895 | FORM is a regular expression in sexp form. | 950 | FORM is a regular expression in sexp form. |
| 896 | NO-GROUP non-nil means don't put shy groups around the result." | 951 | NO-GROUP non-nil means don't put shy groups around the result. |
| 952 | |||
| 953 | In contrast to the `rx' macro, subforms `literal' and `regexp' | ||
| 954 | will not accept non-string arguments, i.e., (literal STRING) | ||
| 955 | becomes just a more verbose version of STRING." | ||
| 897 | (rx-group-if (rx-form form) (null no-group))) | 956 | (rx-group-if (rx-form form) (null no-group))) |
| 898 | 957 | ||
| 899 | 958 | ||
| @@ -903,8 +962,12 @@ NO-GROUP non-nil means don't put shy groups around the result." | |||
| 903 | REGEXPS is a non-empty sequence of forms of the sort listed below. | 962 | REGEXPS is a non-empty sequence of forms of the sort listed below. |
| 904 | 963 | ||
| 905 | Note that `rx' is a Lisp macro; when used in a Lisp program being | 964 | Note that `rx' is a Lisp macro; when used in a Lisp program being |
| 906 | compiled, the translation is performed by the compiler. | 965 | compiled, the translation is performed by the compiler. The |
| 907 | See `rx-to-string' for how to do such a translation at run-time. | 966 | `literal' and `regexp' forms accept subforms that will evaluate |
| 967 | to strings, in addition to constant strings. If REGEXPS include | ||
| 968 | such forms, then the result is an expression which returns a | ||
| 969 | regexp string, rather than a regexp string directly. See | ||
| 970 | `rx-to-string' for performing translation completely at run time. | ||
| 908 | 971 | ||
| 909 | The following are valid subforms of regular expressions in sexp | 972 | The following are valid subforms of regular expressions in sexp |
| 910 | notation. | 973 | notation. |
| @@ -1204,18 +1267,29 @@ enclosed in `(and ...)'. | |||
| 1204 | `(backref N)' | 1267 | `(backref N)' |
| 1205 | matches what was matched previously by submatch N. | 1268 | matches what was matched previously by submatch N. |
| 1206 | 1269 | ||
| 1270 | `(literal STRING-EXPR)' | ||
| 1271 | matches STRING-EXPR literally, where STRING-EXPR is any lisp | ||
| 1272 | expression that evaluates to a string. | ||
| 1273 | |||
| 1274 | `(regexp REGEXP-EXPR)' | ||
| 1275 | include REGEXP-EXPR in string notation in the result, where | ||
| 1276 | REGEXP-EXPR is any lisp expression that evaluates to a | ||
| 1277 | string containing a valid regexp. | ||
| 1278 | |||
| 1207 | `(eval FORM)' | 1279 | `(eval FORM)' |
| 1208 | evaluate FORM and insert result. If result is a string, | 1280 | evaluate FORM and insert result. If result is a string, |
| 1209 | `regexp-quote' it. | 1281 | `regexp-quote' it. Note that FORM is evaluated during |
| 1210 | 1282 | macroexpansion." | |
| 1211 | `(regexp REGEXP)' | 1283 | (let* ((rx--compile-to-lisp t) |
| 1212 | include REGEXP in string notation in the result." | 1284 | (re (cond ((null regexps) |
| 1213 | (cond ((null regexps) | 1285 | (error "No regexp")) |
| 1214 | (error "No regexp")) | 1286 | ((cdr regexps) |
| 1215 | ((cdr regexps) | 1287 | (rx-to-string `(and ,@regexps) t)) |
| 1216 | (rx-to-string `(and ,@regexps) t)) | 1288 | (t |
| 1217 | (t | 1289 | (rx-to-string (car regexps) t))))) |
| 1218 | (rx-to-string (car regexps) t)))) | 1290 | (if (stringp re) |
| 1291 | re | ||
| 1292 | `(concat ,@re)))) | ||
| 1219 | 1293 | ||
| 1220 | 1294 | ||
| 1221 | (pcase-defmacro rx (&rest regexps) | 1295 | (pcase-defmacro rx (&rest regexps) |
| @@ -1277,14 +1351,6 @@ string as argument to `match-string'." | |||
| 1277 | for var in vars | 1351 | for var in vars |
| 1278 | collect `(app (match-string ,i) ,var))))) | 1352 | collect `(app (match-string ,i) ,var))))) |
| 1279 | 1353 | ||
| 1280 | ;; ;; sregex.el replacement | ||
| 1281 | |||
| 1282 | ;; ;;;###autoload (provide 'sregex) | ||
| 1283 | ;; ;;;###autoload (autoload 'sregex "rx") | ||
| 1284 | ;; (defalias 'sregex 'rx-to-string) | ||
| 1285 | ;; ;;;###autoload (autoload 'sregexq "rx" nil nil 'macro) | ||
| 1286 | ;; (defalias 'sregexq 'rx) | ||
| 1287 | |||
| 1288 | (provide 'rx) | 1354 | (provide 'rx) |
| 1289 | 1355 | ||
| 1290 | ;;; rx.el ends here | 1356 | ;;; rx.el ends here |
diff --git a/test/lisp/emacs-lisp/rx-tests.el b/test/lisp/emacs-lisp/rx-tests.el index 6f392d616d1..bab71b522bb 100644 --- a/test/lisp/emacs-lisp/rx-tests.el +++ b/test/lisp/emacs-lisp/rx-tests.el | |||
| @@ -115,5 +115,46 @@ | |||
| 115 | ;; Test zero-argument `seq'. | 115 | ;; Test zero-argument `seq'. |
| 116 | (should (equal (rx (seq)) ""))) | 116 | (should (equal (rx (seq)) ""))) |
| 117 | 117 | ||
| 118 | (defmacro rx-tests--match (regexp string &optional match) | ||
| 119 | (macroexp-let2 nil strexp string | ||
| 120 | `(ert-info ((format "Matching %S to %S" ',regexp ,strexp)) | ||
| 121 | (should (string-match ,regexp ,strexp)) | ||
| 122 | ,@(when match | ||
| 123 | `((should (equal (match-string 0 ,strexp) ,match))))))) | ||
| 124 | |||
| 125 | (ert-deftest rx-nonstring-expr () | ||
| 126 | (let ((bee "b") | ||
| 127 | (vowel "[aeiou]")) | ||
| 128 | (rx-tests--match (rx "a" (literal bee) "c") "abc") | ||
| 129 | (rx-tests--match (rx "a" (regexp bee) "c") "abc") | ||
| 130 | (rx-tests--match (rx "a" (or (regexp bee) "xy") "c") "abc") | ||
| 131 | (rx-tests--match (rx "a" (or "xy" (regexp bee)) "c") "abc") | ||
| 132 | (should-not (string-match (rx (or (regexp bee) "xy")) "")) | ||
| 133 | (rx-tests--match (rx "a" (= 3 (regexp bee)) "c") "abbbc") | ||
| 134 | (rx-tests--match (rx "x" (= 3 (regexp vowel)) "z") "xeoez") | ||
| 135 | (should-not (string-match (rx "x" (= 3 (regexp vowel)) "z") "xe[]z")) | ||
| 136 | (rx-tests--match (rx "x" (= 3 (literal vowel)) "z") | ||
| 137 | "x[aeiou][aeiou][aeiou]z") | ||
| 138 | (rx-tests--match (rx "x" (repeat 1 (regexp vowel)) "z") "xaz") | ||
| 139 | (rx-tests--match (rx "x" (repeat 1 2 (regexp vowel)) "z") "xaz") | ||
| 140 | (rx-tests--match (rx "x" (repeat 1 2 (regexp vowel)) "z") "xauz") | ||
| 141 | (rx-tests--match (rx "x" (>= 1 (regexp vowel)) "z") "xaiiz") | ||
| 142 | (rx-tests--match (rx "x" (** 1 2 (regexp vowel)) "z") "xaiz") | ||
| 143 | (rx-tests--match (rx "x" (group (regexp vowel)) "z") "xaz") | ||
| 144 | (rx-tests--match (rx "x" (group-n 1 (regexp vowel)) "z") "xaz") | ||
| 145 | (rx-tests--match (rx "x" (? (regexp vowel)) "z") "xz"))) | ||
| 146 | |||
| 147 | (ert-deftest rx-nonstring-expr-non-greedy () | ||
| 148 | "`rx's greediness can't affect runtime regexp parts." | ||
| 149 | (let ((ad-min "[ad]*?") | ||
| 150 | (ad-max "[ad]*") | ||
| 151 | (ad "[ad]")) | ||
| 152 | (rx-tests--match (rx "c" (regexp ad-min) "a") "cdaaada" "cda") | ||
| 153 | (rx-tests--match (rx "c" (regexp ad-max) "a") "cdaaada" "cdaaada") | ||
| 154 | (rx-tests--match (rx "c" (minimal-match (regexp ad-max)) "a") "cdaaada" "cdaaada") | ||
| 155 | (rx-tests--match (rx "c" (maximal-match (regexp ad-min)) "a") "cdaaada" "cda") | ||
| 156 | (rx-tests--match (rx "c" (minimal-match (0+ (regexp ad))) "a") "cdaaada" "cda") | ||
| 157 | (rx-tests--match (rx "c" (maximal-match (0+ (regexp ad))) "a") "cdaaada" "cdaaada"))) | ||
| 158 | |||
| 118 | (provide 'rx-tests) | 159 | (provide 'rx-tests) |
| 119 | ;; rx-tests.el ends here. | 160 | ;; rx-tests.el ends here. |