aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMattias Engdegård2020-02-13 20:06:48 +0100
committerMattias Engdegård2020-02-13 20:43:42 +0100
commit9f6a4bbcc96bef451c75a8a78e442dec87a0ddf0 (patch)
treebde1d7ad47c8cea91a80c4391593515b3fd0d2ea
parentd1e8ce8bb6fadf3d034ae437ff1c1b81be7d5209 (diff)
downloademacs-9f6a4bbcc96bef451c75a8a78e442dec87a0ddf0.tar.gz
emacs-9f6a4bbcc96bef451c75a8a78e442dec87a0ddf0.zip
Remove the optional KEEP-ORDER argument to regexp-opt
This argument was added for the 'or' clause in rx, but it turned out to be a bad idea (bug#37659), and there seems to be little other use for it. * lisp/emacs-lisp/regexp-opt.el (regexp-opt): Remove KEEP-ORDER. * doc/lispref/searching.texi (Regexp Functions): * etc/NEWS: Remove it from the documentation. * test/lisp/emacs-lisp/regexp-opt-tests.el (regexp-opt-test--match-all) (regexp-opt-test--check-perm, regexp-opt-test--explain-perm) (regexp-opt-keep-order, regexp-opt-longest-match): Simplify test.
-rw-r--r--doc/lispref/searching.texi9
-rw-r--r--etc/NEWS7
-rw-r--r--lisp/emacs-lisp/regexp-opt.el43
-rw-r--r--test/lisp/emacs-lisp/regexp-opt-tests.el44
4 files changed, 19 insertions, 84 deletions
diff --git a/doc/lispref/searching.texi b/doc/lispref/searching.texi
index 5f4509a8b43..1f6db0643e8 100644
--- a/doc/lispref/searching.texi
+++ b/doc/lispref/searching.texi
@@ -1745,7 +1745,7 @@ any special characters.
1745@end defun 1745@end defun
1746 1746
1747@cindex optimize regexp 1747@cindex optimize regexp
1748@defun regexp-opt strings &optional paren keep-order 1748@defun regexp-opt strings &optional paren
1749This function returns an efficient regular expression that will match 1749This function returns an efficient regular expression that will match
1750any of the strings in the list @var{strings}. This is useful when you 1750any of the strings in the list @var{strings}. This is useful when you
1751need to make matching or searching as fast as possible---for example, 1751need to make matching or searching as fast as possible---for example,
@@ -1783,11 +1783,8 @@ if it is necessary to ensure that a postfix operator appended to
1783it will apply to the whole expression. 1783it will apply to the whole expression.
1784@end table 1784@end table
1785 1785
1786The optional argument @var{keep-order}, if non-@code{nil}, forces the 1786The returned regexp is ordered in such a way that it will always match
1787match to be performed in the order given, as if the strings were made 1787the longest string possible.
1788into a regexp by joining them with the @samp{\|} operator. If nil or
1789omitted, the returned regexp will always match the longest string
1790possible.
1791 1788
1792Up to reordering, the resulting regexp of @code{regexp-opt} is 1789Up to reordering, the resulting regexp of @code{regexp-opt} is
1793equivalent to but usually more efficient than that of a simplified 1790equivalent to but usually more efficient than that of a simplified
diff --git a/etc/NEWS b/etc/NEWS
index 312869fe57e..380ac71260d 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -3527,13 +3527,6 @@ This is currently supported on GNUish hosts and on modern versions of
3527MS-Windows. 3527MS-Windows.
3528 3528
3529+++ 3529+++
3530** The function 'regexp-opt' accepts an additional optional argument.
3531By default, the regexp returned by 'regexp-opt' may match the strings
3532in any order. If the new third argument is non-nil, the match is
3533guaranteed to be performed in the order given, as if the strings were
3534made into a regexp by joining them with '\|'.
3535
3536+++
3537** The function 'regexp-opt', when given an empty list of strings, now 3530** The function 'regexp-opt', when given an empty list of strings, now
3538returns a regexp that never matches anything, which is an identity for 3531returns a regexp that never matches anything, which is an identity for
3539this operation. Previously, the empty string was returned in this 3532this operation. Previously, the empty string was returned in this
diff --git a/lisp/emacs-lisp/regexp-opt.el b/lisp/emacs-lisp/regexp-opt.el
index 2cce4e63539..35a5fda184f 100644
--- a/lisp/emacs-lisp/regexp-opt.el
+++ b/lisp/emacs-lisp/regexp-opt.el
@@ -84,7 +84,7 @@
84;;; Code: 84;;; Code:
85 85
86;;;###autoload 86;;;###autoload
87(defun regexp-opt (strings &optional paren keep-order) 87(defun regexp-opt (strings &optional paren)
88 "Return a regexp to match a string in the list STRINGS. 88 "Return a regexp to match a string in the list STRINGS.
89Each member of STRINGS is treated as a fixed string, not as a regexp. 89Each member of STRINGS is treated as a fixed string, not as a regexp.
90Optional PAREN specifies how the returned regexp is surrounded by 90Optional PAREN specifies how the returned regexp is surrounded by
@@ -114,11 +114,8 @@ nil
114 necessary to ensure that a postfix operator appended to it will 114 necessary to ensure that a postfix operator appended to it will
115 apply to the whole expression. 115 apply to the whole expression.
116 116
117The optional argument KEEP-ORDER, if non-nil, forces the match to 117The returned regexp is ordered in such a way that it will always
118be performed in the order given, as if the strings were made into 118match the longest string possible.
119a regexp by joining them with the `\\|' operator. If nil or
120omitted, the returned regexp is will always match the longest
121string possible.
122 119
123Up to reordering, the resulting regexp is equivalent to but 120Up to reordering, the resulting regexp is equivalent to but
124usually more efficient than that of a simplified version: 121usually more efficient than that of a simplified version:
@@ -140,34 +137,12 @@ usually more efficient than that of a simplified version:
140 (completion-ignore-case nil) 137 (completion-ignore-case nil)
141 (completion-regexp-list nil) 138 (completion-regexp-list nil)
142 (open (cond ((stringp paren) paren) (paren "\\("))) 139 (open (cond ((stringp paren) paren) (paren "\\(")))
143 (re 140 (re (if strings
144 (cond 141 (regexp-opt-group
145 ;; No strings: return an unmatchable regexp. 142 (delete-dups (sort (copy-sequence strings) 'string-lessp))
146 ((null strings) 143 (or open t) (not open))
147 (concat (or open "\\(?:") regexp-unmatchable "\\)")) 144 ;; No strings: return an unmatchable regexp.
148 145 (concat (or open "\\(?:") regexp-unmatchable "\\)"))))
149 ;; The algorithm will generate a pattern that matches
150 ;; longer strings in the list before shorter. If the
151 ;; list order matters, then no string must come after a
152 ;; proper prefix of that string. To check this, verify
153 ;; that a straight or-pattern matches each string
154 ;; entirely.
155 ((and keep-order
156 (let* ((case-fold-search nil)
157 (alts (mapconcat #'regexp-quote strings "\\|")))
158 (and (let ((s strings))
159 (while (and s
160 (string-match alts (car s))
161 (= (match-end 0) (length (car s))))
162 (setq s (cdr s)))
163 ;; If we exited early, we found evidence that
164 ;; regexp-opt-group cannot be used.
165 s)
166 (concat (or open "\\(?:") alts "\\)")))))
167 (t
168 (regexp-opt-group
169 (delete-dups (sort (copy-sequence strings) 'string-lessp))
170 (or open t) (not open))))))
171 (cond ((eq paren 'words) 146 (cond ((eq paren 'words)
172 (concat "\\<" re "\\>")) 147 (concat "\\<" re "\\>"))
173 ((eq paren 'symbols) 148 ((eq paren 'symbols)
diff --git a/test/lisp/emacs-lisp/regexp-opt-tests.el b/test/lisp/emacs-lisp/regexp-opt-tests.el
index 9b4567c72cc..0179ac4f1f4 100644
--- a/test/lisp/emacs-lisp/regexp-opt-tests.el
+++ b/test/lisp/emacs-lisp/regexp-opt-tests.el
@@ -47,43 +47,13 @@
47 (mapcar (lambda (i) (regexp-opt-test--permutation i list)) 47 (mapcar (lambda (i) (regexp-opt-test--permutation i list))
48 (number-sequence 0 (1- (regexp-opt-test--factorial (length list)))))) 48 (number-sequence 0 (1- (regexp-opt-test--factorial (length list))))))
49 49
50(defun regexp-opt-test--match-all (words re) 50(ert-deftest regexp-opt-longest-match ()
51 (mapcar (lambda (w) (and (string-match re w) 51 "Check that the regexp always matches as much as possible."
52 (match-string 0 w))) 52 (let ((s "abcd"))
53 words)) 53 (dolist (perm (regexp-opt-test--permutations '("a" "ab" "ac" "abc")))
54 54 (should (equal (and (string-match (regexp-opt perm) s)
55(defun regexp-opt-test--check-perm (perm) 55 (match-string 0 s))
56 (let* ((ref-re (mapconcat #'regexp-quote perm "\\|")) 56 "abc")))))
57 (opt-re (regexp-opt perm nil t))
58 (ref (regexp-opt-test--match-all perm ref-re))
59 (opt (regexp-opt-test--match-all perm opt-re)))
60 (equal opt ref)))
61
62(defun regexp-opt-test--explain-perm (perm)
63 (let* ((ref-re (mapconcat #'regexp-quote perm "\\|"))
64 (opt-re (regexp-opt perm nil t))
65 (ref (regexp-opt-test--match-all perm ref-re))
66 (opt (regexp-opt-test--match-all perm opt-re)))
67 (concat "\n"
68 (format "Naïve regexp: %s\n" ref-re)
69 (format "Optimized regexp: %s\n" opt-re)
70 (format "Got: %s\n" opt)
71 (format "Expected: %s\n" ref))))
72
73(put 'regexp-opt-test--check-perm 'ert-explainer 'regexp-opt-test--explain-perm)
74
75(ert-deftest regexp-opt-keep-order ()
76 "Check that KEEP-ORDER works."
77 (dolist (perm (regexp-opt-test--permutations '("abc" "bca" "cab")))
78 (should (regexp-opt-test--check-perm perm)))
79 (dolist (perm (regexp-opt-test--permutations '("abc" "ab" "bca" "bc")))
80 (should (regexp-opt-test--check-perm perm)))
81 (dolist (perm (regexp-opt-test--permutations '("abxy" "cdxy")))
82 (should (regexp-opt-test--check-perm perm)))
83 (dolist (perm (regexp-opt-test--permutations '("afgx" "bfgx" "afgy" "bfgy")))
84 (should (regexp-opt-test--check-perm perm)))
85 (dolist (perm (regexp-opt-test--permutations '("a" "ab" "ac" "abc")))
86 (should (regexp-opt-test--check-perm perm))))
87 57
88(ert-deftest regexp-opt-charset () 58(ert-deftest regexp-opt-charset ()
89 (should (equal (regexp-opt-charset '(?a ?b ?a)) "[ab]")) 59 (should (equal (regexp-opt-charset '(?a ?b ?a)) "[ab]"))