diff options
| author | Mattias Engdegård | 2020-02-13 20:06:48 +0100 |
|---|---|---|
| committer | Mattias Engdegård | 2020-02-13 20:43:42 +0100 |
| commit | 9f6a4bbcc96bef451c75a8a78e442dec87a0ddf0 (patch) | |
| tree | bde1d7ad47c8cea91a80c4391593515b3fd0d2ea | |
| parent | d1e8ce8bb6fadf3d034ae437ff1c1b81be7d5209 (diff) | |
| download | emacs-9f6a4bbcc96bef451c75a8a78e442dec87a0ddf0.tar.gz emacs-9f6a4bbcc96bef451c75a8a78e442dec87a0ddf0.zip | |
Remove the optional KEEP-ORDER argument to regexp-opt
This argument was added for the 'or' clause in rx, but it turned out
to be a bad idea (bug#37659), and there seems to be little other use
for it.
* lisp/emacs-lisp/regexp-opt.el (regexp-opt): Remove KEEP-ORDER.
* doc/lispref/searching.texi (Regexp Functions):
* etc/NEWS: Remove it from the documentation.
* test/lisp/emacs-lisp/regexp-opt-tests.el (regexp-opt-test--match-all)
(regexp-opt-test--check-perm, regexp-opt-test--explain-perm)
(regexp-opt-keep-order, regexp-opt-longest-match): Simplify test.
| -rw-r--r-- | doc/lispref/searching.texi | 9 | ||||
| -rw-r--r-- | etc/NEWS | 7 | ||||
| -rw-r--r-- | lisp/emacs-lisp/regexp-opt.el | 43 | ||||
| -rw-r--r-- | test/lisp/emacs-lisp/regexp-opt-tests.el | 44 |
4 files changed, 19 insertions, 84 deletions
diff --git a/doc/lispref/searching.texi b/doc/lispref/searching.texi index 5f4509a8b43..1f6db0643e8 100644 --- a/doc/lispref/searching.texi +++ b/doc/lispref/searching.texi | |||
| @@ -1745,7 +1745,7 @@ any special characters. | |||
| 1745 | @end defun | 1745 | @end defun |
| 1746 | 1746 | ||
| 1747 | @cindex optimize regexp | 1747 | @cindex optimize regexp |
| 1748 | @defun regexp-opt strings &optional paren keep-order | 1748 | @defun regexp-opt strings &optional paren |
| 1749 | This function returns an efficient regular expression that will match | 1749 | This function returns an efficient regular expression that will match |
| 1750 | any of the strings in the list @var{strings}. This is useful when you | 1750 | any of the strings in the list @var{strings}. This is useful when you |
| 1751 | need to make matching or searching as fast as possible---for example, | 1751 | need to make matching or searching as fast as possible---for example, |
| @@ -1783,11 +1783,8 @@ if it is necessary to ensure that a postfix operator appended to | |||
| 1783 | it will apply to the whole expression. | 1783 | it will apply to the whole expression. |
| 1784 | @end table | 1784 | @end table |
| 1785 | 1785 | ||
| 1786 | The optional argument @var{keep-order}, if non-@code{nil}, forces the | 1786 | The returned regexp is ordered in such a way that it will always match |
| 1787 | match to be performed in the order given, as if the strings were made | 1787 | the longest string possible. |
| 1788 | into a regexp by joining them with the @samp{\|} operator. If nil or | ||
| 1789 | omitted, the returned regexp will always match the longest string | ||
| 1790 | possible. | ||
| 1791 | 1788 | ||
| 1792 | Up to reordering, the resulting regexp of @code{regexp-opt} is | 1789 | Up to reordering, the resulting regexp of @code{regexp-opt} is |
| 1793 | equivalent to but usually more efficient than that of a simplified | 1790 | equivalent to but usually more efficient than that of a simplified |
| @@ -3527,13 +3527,6 @@ This is currently supported on GNUish hosts and on modern versions of | |||
| 3527 | MS-Windows. | 3527 | MS-Windows. |
| 3528 | 3528 | ||
| 3529 | +++ | 3529 | +++ |
| 3530 | ** The function 'regexp-opt' accepts an additional optional argument. | ||
| 3531 | By default, the regexp returned by 'regexp-opt' may match the strings | ||
| 3532 | in any order. If the new third argument is non-nil, the match is | ||
| 3533 | guaranteed to be performed in the order given, as if the strings were | ||
| 3534 | made into a regexp by joining them with '\|'. | ||
| 3535 | |||
| 3536 | +++ | ||
| 3537 | ** The function 'regexp-opt', when given an empty list of strings, now | 3530 | ** The function 'regexp-opt', when given an empty list of strings, now |
| 3538 | returns a regexp that never matches anything, which is an identity for | 3531 | returns a regexp that never matches anything, which is an identity for |
| 3539 | this operation. Previously, the empty string was returned in this | 3532 | this operation. Previously, the empty string was returned in this |
diff --git a/lisp/emacs-lisp/regexp-opt.el b/lisp/emacs-lisp/regexp-opt.el index 2cce4e63539..35a5fda184f 100644 --- a/lisp/emacs-lisp/regexp-opt.el +++ b/lisp/emacs-lisp/regexp-opt.el | |||
| @@ -84,7 +84,7 @@ | |||
| 84 | ;;; Code: | 84 | ;;; Code: |
| 85 | 85 | ||
| 86 | ;;;###autoload | 86 | ;;;###autoload |
| 87 | (defun regexp-opt (strings &optional paren keep-order) | 87 | (defun regexp-opt (strings &optional paren) |
| 88 | "Return a regexp to match a string in the list STRINGS. | 88 | "Return a regexp to match a string in the list STRINGS. |
| 89 | Each member of STRINGS is treated as a fixed string, not as a regexp. | 89 | Each member of STRINGS is treated as a fixed string, not as a regexp. |
| 90 | Optional PAREN specifies how the returned regexp is surrounded by | 90 | Optional PAREN specifies how the returned regexp is surrounded by |
| @@ -114,11 +114,8 @@ nil | |||
| 114 | necessary to ensure that a postfix operator appended to it will | 114 | necessary to ensure that a postfix operator appended to it will |
| 115 | apply to the whole expression. | 115 | apply to the whole expression. |
| 116 | 116 | ||
| 117 | The optional argument KEEP-ORDER, if non-nil, forces the match to | 117 | The returned regexp is ordered in such a way that it will always |
| 118 | be performed in the order given, as if the strings were made into | 118 | match the longest string possible. |
| 119 | a regexp by joining them with the `\\|' operator. If nil or | ||
| 120 | omitted, the returned regexp is will always match the longest | ||
| 121 | string possible. | ||
| 122 | 119 | ||
| 123 | Up to reordering, the resulting regexp is equivalent to but | 120 | Up to reordering, the resulting regexp is equivalent to but |
| 124 | usually more efficient than that of a simplified version: | 121 | usually more efficient than that of a simplified version: |
| @@ -140,34 +137,12 @@ usually more efficient than that of a simplified version: | |||
| 140 | (completion-ignore-case nil) | 137 | (completion-ignore-case nil) |
| 141 | (completion-regexp-list nil) | 138 | (completion-regexp-list nil) |
| 142 | (open (cond ((stringp paren) paren) (paren "\\("))) | 139 | (open (cond ((stringp paren) paren) (paren "\\("))) |
| 143 | (re | 140 | (re (if strings |
| 144 | (cond | 141 | (regexp-opt-group |
| 145 | ;; No strings: return an unmatchable regexp. | 142 | (delete-dups (sort (copy-sequence strings) 'string-lessp)) |
| 146 | ((null strings) | 143 | (or open t) (not open)) |
| 147 | (concat (or open "\\(?:") regexp-unmatchable "\\)")) | 144 | ;; No strings: return an unmatchable regexp. |
| 148 | 145 | (concat (or open "\\(?:") regexp-unmatchable "\\)")))) | |
| 149 | ;; The algorithm will generate a pattern that matches | ||
| 150 | ;; longer strings in the list before shorter. If the | ||
| 151 | ;; list order matters, then no string must come after a | ||
| 152 | ;; proper prefix of that string. To check this, verify | ||
| 153 | ;; that a straight or-pattern matches each string | ||
| 154 | ;; entirely. | ||
| 155 | ((and keep-order | ||
| 156 | (let* ((case-fold-search nil) | ||
| 157 | (alts (mapconcat #'regexp-quote strings "\\|"))) | ||
| 158 | (and (let ((s strings)) | ||
| 159 | (while (and s | ||
| 160 | (string-match alts (car s)) | ||
| 161 | (= (match-end 0) (length (car s)))) | ||
| 162 | (setq s (cdr s))) | ||
| 163 | ;; If we exited early, we found evidence that | ||
| 164 | ;; regexp-opt-group cannot be used. | ||
| 165 | s) | ||
| 166 | (concat (or open "\\(?:") alts "\\)"))))) | ||
| 167 | (t | ||
| 168 | (regexp-opt-group | ||
| 169 | (delete-dups (sort (copy-sequence strings) 'string-lessp)) | ||
| 170 | (or open t) (not open)))))) | ||
| 171 | (cond ((eq paren 'words) | 146 | (cond ((eq paren 'words) |
| 172 | (concat "\\<" re "\\>")) | 147 | (concat "\\<" re "\\>")) |
| 173 | ((eq paren 'symbols) | 148 | ((eq paren 'symbols) |
diff --git a/test/lisp/emacs-lisp/regexp-opt-tests.el b/test/lisp/emacs-lisp/regexp-opt-tests.el index 9b4567c72cc..0179ac4f1f4 100644 --- a/test/lisp/emacs-lisp/regexp-opt-tests.el +++ b/test/lisp/emacs-lisp/regexp-opt-tests.el | |||
| @@ -47,43 +47,13 @@ | |||
| 47 | (mapcar (lambda (i) (regexp-opt-test--permutation i list)) | 47 | (mapcar (lambda (i) (regexp-opt-test--permutation i list)) |
| 48 | (number-sequence 0 (1- (regexp-opt-test--factorial (length list)))))) | 48 | (number-sequence 0 (1- (regexp-opt-test--factorial (length list)))))) |
| 49 | 49 | ||
| 50 | (defun regexp-opt-test--match-all (words re) | 50 | (ert-deftest regexp-opt-longest-match () |
| 51 | (mapcar (lambda (w) (and (string-match re w) | 51 | "Check that the regexp always matches as much as possible." |
| 52 | (match-string 0 w))) | 52 | (let ((s "abcd")) |
| 53 | words)) | 53 | (dolist (perm (regexp-opt-test--permutations '("a" "ab" "ac" "abc"))) |
| 54 | 54 | (should (equal (and (string-match (regexp-opt perm) s) | |
| 55 | (defun regexp-opt-test--check-perm (perm) | 55 | (match-string 0 s)) |
| 56 | (let* ((ref-re (mapconcat #'regexp-quote perm "\\|")) | 56 | "abc"))))) |
| 57 | (opt-re (regexp-opt perm nil t)) | ||
| 58 | (ref (regexp-opt-test--match-all perm ref-re)) | ||
| 59 | (opt (regexp-opt-test--match-all perm opt-re))) | ||
| 60 | (equal opt ref))) | ||
| 61 | |||
| 62 | (defun regexp-opt-test--explain-perm (perm) | ||
| 63 | (let* ((ref-re (mapconcat #'regexp-quote perm "\\|")) | ||
| 64 | (opt-re (regexp-opt perm nil t)) | ||
| 65 | (ref (regexp-opt-test--match-all perm ref-re)) | ||
| 66 | (opt (regexp-opt-test--match-all perm opt-re))) | ||
| 67 | (concat "\n" | ||
| 68 | (format "Naïve regexp: %s\n" ref-re) | ||
| 69 | (format "Optimized regexp: %s\n" opt-re) | ||
| 70 | (format "Got: %s\n" opt) | ||
| 71 | (format "Expected: %s\n" ref)))) | ||
| 72 | |||
| 73 | (put 'regexp-opt-test--check-perm 'ert-explainer 'regexp-opt-test--explain-perm) | ||
| 74 | |||
| 75 | (ert-deftest regexp-opt-keep-order () | ||
| 76 | "Check that KEEP-ORDER works." | ||
| 77 | (dolist (perm (regexp-opt-test--permutations '("abc" "bca" "cab"))) | ||
| 78 | (should (regexp-opt-test--check-perm perm))) | ||
| 79 | (dolist (perm (regexp-opt-test--permutations '("abc" "ab" "bca" "bc"))) | ||
| 80 | (should (regexp-opt-test--check-perm perm))) | ||
| 81 | (dolist (perm (regexp-opt-test--permutations '("abxy" "cdxy"))) | ||
| 82 | (should (regexp-opt-test--check-perm perm))) | ||
| 83 | (dolist (perm (regexp-opt-test--permutations '("afgx" "bfgx" "afgy" "bfgy"))) | ||
| 84 | (should (regexp-opt-test--check-perm perm))) | ||
| 85 | (dolist (perm (regexp-opt-test--permutations '("a" "ab" "ac" "abc"))) | ||
| 86 | (should (regexp-opt-test--check-perm perm)))) | ||
| 87 | 57 | ||
| 88 | (ert-deftest regexp-opt-charset () | 58 | (ert-deftest regexp-opt-charset () |
| 89 | (should (equal (regexp-opt-charset '(?a ?b ?a)) "[ab]")) | 59 | (should (equal (regexp-opt-charset '(?a ?b ?a)) "[ab]")) |