diff options
| author | Mattias Engdegård | 2019-06-30 12:53:52 +0200 |
|---|---|---|
| committer | Mattias Engdegård | 2019-07-04 17:18:15 +0200 |
| commit | 3fd74915121a3eac265170e20bd19b3cde6a2589 (patch) | |
| tree | 7abac1b1a1c5241dc9226edf2e99373dd0e966f8 | |
| parent | 2bc90e0ce0f349b8c80aa8df782f991b64aa7398 (diff) | |
| download | emacs-3fd74915121a3eac265170e20bd19b3cde6a2589.tar.gz emacs-3fd74915121a3eac265170e20bd19b3cde6a2589.zip | |
Optimise more inputs to `regexp-opt' (bug#36444)
Use a more precise test to determine whether the input to `regexp-opt'
is safe to optimise when KEEP-ORDER is non-nil, permitting more inputs
to be optimised than before. For example, ("good" "goal" "go") is now
accepted.
* lisp/emacs-lisp/regexp-opt.el (regexp-opt):
More precise test for whether the list is safe w.r.t. KEEP-ORDER.
(regexp-opt--contains-prefix): Remove.
* test/lisp/emacs-lisp/regexp-opt-tests.el: Use lexical-binding.
(regexp-opt-test--permutation, regexp-opt-test--factorial)
(regexp-opt-test--permutations, regexp-opt-test--match-all)
(regexp-opt-test--check-perm, regexp-opt-test--explain-perm)
(regexp-opt-keep-order): Test KEEP-ORDER.
| -rw-r--r-- | lisp/emacs-lisp/regexp-opt.el | 46 | ||||
| -rw-r--r-- | test/lisp/emacs-lisp/regexp-opt-tests.el | 62 |
2 files changed, 83 insertions, 25 deletions
diff --git a/lisp/emacs-lisp/regexp-opt.el b/lisp/emacs-lisp/regexp-opt.el index b6104f22e7d..ab52003cdf7 100644 --- a/lisp/emacs-lisp/regexp-opt.el +++ b/lisp/emacs-lisp/regexp-opt.el | |||
| @@ -140,21 +140,34 @@ usually more efficient than that of a simplified version: | |||
| 140 | (completion-ignore-case nil) | 140 | (completion-ignore-case nil) |
| 141 | (completion-regexp-list nil) | 141 | (completion-regexp-list nil) |
| 142 | (open (cond ((stringp paren) paren) (paren "\\("))) | 142 | (open (cond ((stringp paren) paren) (paren "\\("))) |
| 143 | (sorted-strings (delete-dups | ||
| 144 | (sort (copy-sequence strings) 'string-lessp))) | ||
| 145 | (re | 143 | (re |
| 146 | (cond | 144 | (cond |
| 147 | ;; No strings: return an unmatchable regexp. | 145 | ;; No strings: return an unmatchable regexp. |
| 148 | ((null strings) | 146 | ((null strings) |
| 149 | (concat (or open "\\(?:") regexp-unmatchable "\\)")) | 147 | (concat (or open "\\(?:") regexp-unmatchable "\\)")) |
| 150 | ;; If we cannot reorder, give up all attempts at | 148 | |
| 151 | ;; optimisation. There is room for improvement (Bug#34641). | 149 | ;; The algorithm will generate a pattern that matches |
| 152 | ((and keep-order (regexp-opt--contains-prefix sorted-strings)) | 150 | ;; longer strings in the list before shorter. If the |
| 153 | (concat (or open "\\(?:") | 151 | ;; list order matters, then no string must come after a |
| 154 | (mapconcat #'regexp-quote strings "\\|") | 152 | ;; proper prefix of that string. To check this, verify |
| 155 | "\\)")) | 153 | ;; that a straight or-pattern matches each string |
| 154 | ;; entirely. | ||
| 155 | ((and keep-order | ||
| 156 | (let* ((case-fold-search nil) | ||
| 157 | (alts (mapconcat #'regexp-quote strings "\\|"))) | ||
| 158 | (and (let ((s strings)) | ||
| 159 | (while (and s | ||
| 160 | (string-match alts (car s)) | ||
| 161 | (= (match-end 0) (length (car s)))) | ||
| 162 | (setq s (cdr s))) | ||
| 163 | ;; If we exited early, we found evidence that | ||
| 164 | ;; regexp-opt-group cannot be used. | ||
| 165 | s) | ||
| 166 | (concat (or open "\\(?:") alts "\\)"))))) | ||
| 156 | (t | 167 | (t |
| 157 | (regexp-opt-group sorted-strings (or open t) (not open)))))) | 168 | (regexp-opt-group |
| 169 | (delete-dups (sort (copy-sequence strings) 'string-lessp)) | ||
| 170 | (or open t) (not open)))))) | ||
| 158 | (cond ((eq paren 'words) | 171 | (cond ((eq paren 'words) |
| 159 | (concat "\\<" re "\\>")) | 172 | (concat "\\<" re "\\>")) |
| 160 | ((eq paren 'symbols) | 173 | ((eq paren 'symbols) |
| @@ -339,21 +352,6 @@ never matches anything." | |||
| 339 | (concat "[" all "]"))))))) | 352 | (concat "[" all "]"))))))) |
| 340 | 353 | ||
| 341 | 354 | ||
| 342 | (defun regexp-opt--contains-prefix (strings) | ||
| 343 | "Whether STRINGS contains a proper prefix of one of its other elements. | ||
| 344 | STRINGS must be a list of sorted strings without duplicates." | ||
| 345 | (let ((s strings)) | ||
| 346 | ;; In a lexicographically sorted list, a string always immediately | ||
| 347 | ;; succeeds one of its prefixes. | ||
| 348 | (while (and (cdr s) | ||
| 349 | (not (string-equal | ||
| 350 | (car s) | ||
| 351 | (substring (cadr s) 0 (min (length (car s)) | ||
| 352 | (length (cadr s))))))) | ||
| 353 | (setq s (cdr s))) | ||
| 354 | (cdr s))) | ||
| 355 | |||
| 356 | |||
| 357 | (provide 'regexp-opt) | 355 | (provide 'regexp-opt) |
| 358 | 356 | ||
| 359 | ;;; regexp-opt.el ends here | 357 | ;;; regexp-opt.el ends here |
diff --git a/test/lisp/emacs-lisp/regexp-opt-tests.el b/test/lisp/emacs-lisp/regexp-opt-tests.el index 927de8c6a5f..3658964faac 100644 --- a/test/lisp/emacs-lisp/regexp-opt-tests.el +++ b/test/lisp/emacs-lisp/regexp-opt-tests.el | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | ;;; regexp-opt-tests.el --- Tests for regexp-opt.el | 1 | ;;; regexp-opt-tests.el --- Tests for regexp-opt.el -*- lexical-binding: t -*- |
| 2 | 2 | ||
| 3 | ;; Copyright (C) 2013-2019 Free Software Foundation, Inc. | 3 | ;; Copyright (C) 2013-2019 Free Software Foundation, Inc. |
| 4 | 4 | ||
| @@ -25,6 +25,66 @@ | |||
| 25 | 25 | ||
| 26 | (require 'regexp-opt) | 26 | (require 'regexp-opt) |
| 27 | 27 | ||
| 28 | (defun regexp-opt-test--permutation (n list) | ||
| 29 | "The Nth permutation of LIST, 0 ≤ N < (length LIST)!." | ||
| 30 | (let ((len (length list)) | ||
| 31 | (perm-list nil)) | ||
| 32 | (dotimes (i len) | ||
| 33 | (let* ((d (- len i)) | ||
| 34 | (k (mod n d))) | ||
| 35 | (push (nth k list) perm-list) | ||
| 36 | (setq list (append (butlast list (- (length list) k)) | ||
| 37 | (nthcdr (1+ k) list))) | ||
| 38 | (setq n (/ n d)))) | ||
| 39 | (nreverse perm-list))) | ||
| 40 | |||
| 41 | (defun regexp-opt-test--factorial (n) | ||
| 42 | "N!" | ||
| 43 | (apply #'* (number-sequence 1 n))) | ||
| 44 | |||
| 45 | (defun regexp-opt-test--permutations (list) | ||
| 46 | "All permutations of LIST." | ||
| 47 | (mapcar (lambda (i) (regexp-opt-test--permutation i list)) | ||
| 48 | (number-sequence 0 (1- (regexp-opt-test--factorial (length list)))))) | ||
| 49 | |||
| 50 | (defun regexp-opt-test--match-all (words re) | ||
| 51 | (mapcar (lambda (w) (and (string-match re w) | ||
| 52 | (match-string 0 w))) | ||
| 53 | words)) | ||
| 54 | |||
| 55 | (defun regexp-opt-test--check-perm (perm) | ||
| 56 | (let* ((ref-re (mapconcat #'regexp-quote perm "\\|")) | ||
| 57 | (opt-re (regexp-opt perm nil t)) | ||
| 58 | (ref (regexp-opt-test--match-all perm ref-re)) | ||
| 59 | (opt (regexp-opt-test--match-all perm opt-re))) | ||
| 60 | (equal opt ref))) | ||
| 61 | |||
| 62 | (defun regexp-opt-test--explain-perm (perm) | ||
| 63 | (let* ((ref-re (mapconcat #'regexp-quote perm "\\|")) | ||
| 64 | (opt-re (regexp-opt perm nil t)) | ||
| 65 | (ref (regexp-opt-test--match-all perm ref-re)) | ||
| 66 | (opt (regexp-opt-test--match-all perm opt-re))) | ||
| 67 | (concat "\n" | ||
| 68 | (format "Naïve regexp: %s\n" ref-re) | ||
| 69 | (format "Optimised regexp: %s\n" opt-re) | ||
| 70 | (format "Got: %s\n" opt) | ||
| 71 | (format "Expected: %s\n" ref)))) | ||
| 72 | |||
| 73 | (put 'regexp-opt-test--check-perm 'ert-explainer 'regexp-opt-test--explain-perm) | ||
| 74 | |||
| 75 | (ert-deftest regexp-opt-keep-order () | ||
| 76 | "Check that KEEP-ORDER works." | ||
| 77 | (dolist (perm (regexp-opt-test--permutations '("abc" "bca" "cab"))) | ||
| 78 | (should (regexp-opt-test--check-perm perm))) | ||
| 79 | (dolist (perm (regexp-opt-test--permutations '("abc" "ab" "bca" "bc"))) | ||
| 80 | (should (regexp-opt-test--check-perm perm))) | ||
| 81 | (dolist (perm (regexp-opt-test--permutations '("abxy" "cdxy"))) | ||
| 82 | (should (regexp-opt-test--check-perm perm))) | ||
| 83 | (dolist (perm (regexp-opt-test--permutations '("afgx" "bfgx" "afgy" "bfgy"))) | ||
| 84 | (should (regexp-opt-test--check-perm perm))) | ||
| 85 | (dolist (perm (regexp-opt-test--permutations '("a" "ab" "ac" "abc"))) | ||
| 86 | (should (regexp-opt-test--check-perm perm)))) | ||
| 87 | |||
| 28 | (ert-deftest regexp-opt-charset () | 88 | (ert-deftest regexp-opt-charset () |
| 29 | (should (equal (regexp-opt-charset '(?a ?b ?a)) "[ab]")) | 89 | (should (equal (regexp-opt-charset '(?a ?b ?a)) "[ab]")) |
| 30 | (should (equal (regexp-opt-charset '(?D ?d ?B ?a ?b ?C ?7 ?a ?c ?A)) | 90 | (should (equal (regexp-opt-charset '(?D ?d ?B ?a ?b ?C ?7 ?a ?c ?A)) |