aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMattias Engdegård2019-06-30 12:53:52 +0200
committerMattias Engdegård2019-07-04 17:18:15 +0200
commit3fd74915121a3eac265170e20bd19b3cde6a2589 (patch)
tree7abac1b1a1c5241dc9226edf2e99373dd0e966f8
parent2bc90e0ce0f349b8c80aa8df782f991b64aa7398 (diff)
downloademacs-3fd74915121a3eac265170e20bd19b3cde6a2589.tar.gz
emacs-3fd74915121a3eac265170e20bd19b3cde6a2589.zip
Optimise more inputs to `regexp-opt' (bug#36444)
Use a more precise test to determine whether the input to `regexp-opt' is safe to optimise when KEEP-ORDER is non-nil, permitting more inputs to be optimised than before. For example, ("good" "goal" "go") is now accepted. * lisp/emacs-lisp/regexp-opt.el (regexp-opt): More precise test for whether the list is safe w.r.t. KEEP-ORDER. (regexp-opt--contains-prefix): Remove. * test/lisp/emacs-lisp/regexp-opt-tests.el: Use lexical-binding. (regexp-opt-test--permutation, regexp-opt-test--factorial) (regexp-opt-test--permutations, regexp-opt-test--match-all) (regexp-opt-test--check-perm, regexp-opt-test--explain-perm) (regexp-opt-keep-order): Test KEEP-ORDER.
-rw-r--r--lisp/emacs-lisp/regexp-opt.el46
-rw-r--r--test/lisp/emacs-lisp/regexp-opt-tests.el62
2 files changed, 83 insertions, 25 deletions
diff --git a/lisp/emacs-lisp/regexp-opt.el b/lisp/emacs-lisp/regexp-opt.el
index b6104f22e7d..ab52003cdf7 100644
--- a/lisp/emacs-lisp/regexp-opt.el
+++ b/lisp/emacs-lisp/regexp-opt.el
@@ -140,21 +140,34 @@ usually more efficient than that of a simplified version:
140 (completion-ignore-case nil) 140 (completion-ignore-case nil)
141 (completion-regexp-list nil) 141 (completion-regexp-list nil)
142 (open (cond ((stringp paren) paren) (paren "\\("))) 142 (open (cond ((stringp paren) paren) (paren "\\(")))
143 (sorted-strings (delete-dups
144 (sort (copy-sequence strings) 'string-lessp)))
145 (re 143 (re
146 (cond 144 (cond
147 ;; No strings: return an unmatchable regexp. 145 ;; No strings: return an unmatchable regexp.
148 ((null strings) 146 ((null strings)
149 (concat (or open "\\(?:") regexp-unmatchable "\\)")) 147 (concat (or open "\\(?:") regexp-unmatchable "\\)"))
150 ;; If we cannot reorder, give up all attempts at 148
151 ;; optimisation. There is room for improvement (Bug#34641). 149 ;; The algorithm will generate a pattern that matches
152 ((and keep-order (regexp-opt--contains-prefix sorted-strings)) 150 ;; longer strings in the list before shorter. If the
153 (concat (or open "\\(?:") 151 ;; list order matters, then no string must come after a
154 (mapconcat #'regexp-quote strings "\\|") 152 ;; proper prefix of that string. To check this, verify
155 "\\)")) 153 ;; that a straight or-pattern matches each string
154 ;; entirely.
155 ((and keep-order
156 (let* ((case-fold-search nil)
157 (alts (mapconcat #'regexp-quote strings "\\|")))
158 (and (let ((s strings))
159 (while (and s
160 (string-match alts (car s))
161 (= (match-end 0) (length (car s))))
162 (setq s (cdr s)))
163 ;; If we exited early, we found evidence that
164 ;; regexp-opt-group cannot be used.
165 s)
166 (concat (or open "\\(?:") alts "\\)")))))
156 (t 167 (t
157 (regexp-opt-group sorted-strings (or open t) (not open)))))) 168 (regexp-opt-group
169 (delete-dups (sort (copy-sequence strings) 'string-lessp))
170 (or open t) (not open))))))
158 (cond ((eq paren 'words) 171 (cond ((eq paren 'words)
159 (concat "\\<" re "\\>")) 172 (concat "\\<" re "\\>"))
160 ((eq paren 'symbols) 173 ((eq paren 'symbols)
@@ -339,21 +352,6 @@ never matches anything."
339 (concat "[" all "]"))))))) 352 (concat "[" all "]")))))))
340 353
341 354
342(defun regexp-opt--contains-prefix (strings)
343 "Whether STRINGS contains a proper prefix of one of its other elements.
344STRINGS must be a list of sorted strings without duplicates."
345 (let ((s strings))
346 ;; In a lexicographically sorted list, a string always immediately
347 ;; succeeds one of its prefixes.
348 (while (and (cdr s)
349 (not (string-equal
350 (car s)
351 (substring (cadr s) 0 (min (length (car s))
352 (length (cadr s)))))))
353 (setq s (cdr s)))
354 (cdr s)))
355
356
357(provide 'regexp-opt) 355(provide 'regexp-opt)
358 356
359;;; regexp-opt.el ends here 357;;; regexp-opt.el ends here
diff --git a/test/lisp/emacs-lisp/regexp-opt-tests.el b/test/lisp/emacs-lisp/regexp-opt-tests.el
index 927de8c6a5f..3658964faac 100644
--- a/test/lisp/emacs-lisp/regexp-opt-tests.el
+++ b/test/lisp/emacs-lisp/regexp-opt-tests.el
@@ -1,4 +1,4 @@
1;;; regexp-opt-tests.el --- Tests for regexp-opt.el 1;;; regexp-opt-tests.el --- Tests for regexp-opt.el -*- lexical-binding: t -*-
2 2
3;; Copyright (C) 2013-2019 Free Software Foundation, Inc. 3;; Copyright (C) 2013-2019 Free Software Foundation, Inc.
4 4
@@ -25,6 +25,66 @@
25 25
26(require 'regexp-opt) 26(require 'regexp-opt)
27 27
28(defun regexp-opt-test--permutation (n list)
29 "The Nth permutation of LIST, 0 ≤ N < (length LIST)!."
30 (let ((len (length list))
31 (perm-list nil))
32 (dotimes (i len)
33 (let* ((d (- len i))
34 (k (mod n d)))
35 (push (nth k list) perm-list)
36 (setq list (append (butlast list (- (length list) k))
37 (nthcdr (1+ k) list)))
38 (setq n (/ n d))))
39 (nreverse perm-list)))
40
41(defun regexp-opt-test--factorial (n)
42 "N!"
43 (apply #'* (number-sequence 1 n)))
44
45(defun regexp-opt-test--permutations (list)
46 "All permutations of LIST."
47 (mapcar (lambda (i) (regexp-opt-test--permutation i list))
48 (number-sequence 0 (1- (regexp-opt-test--factorial (length list))))))
49
50(defun regexp-opt-test--match-all (words re)
51 (mapcar (lambda (w) (and (string-match re w)
52 (match-string 0 w)))
53 words))
54
55(defun regexp-opt-test--check-perm (perm)
56 (let* ((ref-re (mapconcat #'regexp-quote perm "\\|"))
57 (opt-re (regexp-opt perm nil t))
58 (ref (regexp-opt-test--match-all perm ref-re))
59 (opt (regexp-opt-test--match-all perm opt-re)))
60 (equal opt ref)))
61
62(defun regexp-opt-test--explain-perm (perm)
63 (let* ((ref-re (mapconcat #'regexp-quote perm "\\|"))
64 (opt-re (regexp-opt perm nil t))
65 (ref (regexp-opt-test--match-all perm ref-re))
66 (opt (regexp-opt-test--match-all perm opt-re)))
67 (concat "\n"
68 (format "Naïve regexp: %s\n" ref-re)
69 (format "Optimised regexp: %s\n" opt-re)
70 (format "Got: %s\n" opt)
71 (format "Expected: %s\n" ref))))
72
73(put 'regexp-opt-test--check-perm 'ert-explainer 'regexp-opt-test--explain-perm)
74
75(ert-deftest regexp-opt-keep-order ()
76 "Check that KEEP-ORDER works."
77 (dolist (perm (regexp-opt-test--permutations '("abc" "bca" "cab")))
78 (should (regexp-opt-test--check-perm perm)))
79 (dolist (perm (regexp-opt-test--permutations '("abc" "ab" "bca" "bc")))
80 (should (regexp-opt-test--check-perm perm)))
81 (dolist (perm (regexp-opt-test--permutations '("abxy" "cdxy")))
82 (should (regexp-opt-test--check-perm perm)))
83 (dolist (perm (regexp-opt-test--permutations '("afgx" "bfgx" "afgy" "bfgy")))
84 (should (regexp-opt-test--check-perm perm)))
85 (dolist (perm (regexp-opt-test--permutations '("a" "ab" "ac" "abc")))
86 (should (regexp-opt-test--check-perm perm))))
87
28(ert-deftest regexp-opt-charset () 88(ert-deftest regexp-opt-charset ()
29 (should (equal (regexp-opt-charset '(?a ?b ?a)) "[ab]")) 89 (should (equal (regexp-opt-charset '(?a ?b ?a)) "[ab]"))
30 (should (equal (regexp-opt-charset '(?D ?d ?B ?a ?b ?C ?7 ?a ?c ?A)) 90 (should (equal (regexp-opt-charset '(?D ?d ?B ?a ?b ?C ?7 ?a ?c ?A))