diff options
| author | Artur Malabarba | 2015-12-04 15:12:10 +0000 |
|---|---|---|
| committer | Artur Malabarba | 2015-12-04 15:12:34 +0000 |
| commit | 30f3432e9519f61882faa303e7851e761d2d18ea (patch) | |
| tree | fbb216d979d5c7af77f4c4018ab4c4bf06b2a85e | |
| parent | 3a9df7589ae189fc34a5fab98e82d85d2d40433f (diff) | |
| download | emacs-30f3432e9519f61882faa303e7851e761d2d18ea.tar.gz emacs-30f3432e9519f61882faa303e7851e761d2d18ea.zip | |
* lisp/character-fold.el: Remove special case-folding support
(character-fold-to-regexp): Remove special code for
case-folding. Char-fold search still respects the
`case-fold-search' variable (i.e., f matches F). This only
removes the code that was added to ensure that f also matched
all chars that F matched. For instance, after this commit, f
no longer matches 𝔽.
This was necessary because the logic created a regexp with
2^(length of the string) redundant paths. So, when a very
long string "almost" matched, Emacs took a very long time to
figure out that it didn't. This became particularly relevant
because isearch's lazy-highlight does a search bounded by (1-
match-end) (which, in most circumstances, is a search that
almost matches). A recipe for this can be found in bug#22090.
| -rw-r--r-- | lisp/character-fold.el | 22 | ||||
| -rw-r--r-- | test/automated/character-fold-tests.el | 21 |
2 files changed, 25 insertions, 18 deletions
diff --git a/lisp/character-fold.el b/lisp/character-fold.el index fb28bae7281..1e49fe2f0e5 100644 --- a/lisp/character-fold.el +++ b/lisp/character-fold.el | |||
| @@ -157,8 +157,6 @@ FROM is for internal use. It specifies an index in the STRING | |||
| 157 | from which to start." | 157 | from which to start." |
| 158 | (let* ((spaces 0) | 158 | (let* ((spaces 0) |
| 159 | (multi-char-table (char-table-extra-slot character-fold-table 0)) | 159 | (multi-char-table (char-table-extra-slot character-fold-table 0)) |
| 160 | (lower-case-table (current-case-table)) | ||
| 161 | (upper-case-table (char-table-extra-slot lower-case-table 0)) | ||
| 162 | (i (or from 0)) | 160 | (i (or from 0)) |
| 163 | (end (length string)) | 161 | (end (length string)) |
| 164 | (out nil)) | 162 | (out nil)) |
| @@ -178,21 +176,9 @@ from which to start." | |||
| 178 | (setq spaces 0)) | 176 | (setq spaces 0)) |
| 179 | (let ((regexp (or (aref character-fold-table c) | 177 | (let ((regexp (or (aref character-fold-table c) |
| 180 | (regexp-quote (string c)))) | 178 | (regexp-quote (string c)))) |
| 181 | (alist nil)) | 179 | ;; Long string. The regexp would probably be too long. |
| 182 | ;; Long string. The regexp would probably be too long. | 180 | (alist (unless (> end 50) |
| 183 | (unless (> end 50) | 181 | (aref multi-char-table c)))) |
| 184 | (setq alist (aref multi-char-table c)) | ||
| 185 | (when case-fold-search | ||
| 186 | (let ((other-c (aref lower-case-table c))) | ||
| 187 | (when (or (not other-c) | ||
| 188 | (eq other-c c)) | ||
| 189 | (setq other-c (aref upper-case-table c))) | ||
| 190 | (when other-c | ||
| 191 | (setq alist (append alist (aref multi-char-table other-c))) | ||
| 192 | (setq regexp (concat "\\(?:" regexp "\\|" | ||
| 193 | (or (aref character-fold-table other-c) | ||
| 194 | (regexp-quote (string other-c))) | ||
| 195 | "\\)")))))) | ||
| 196 | (push (let ((matched-entries nil) | 182 | (push (let ((matched-entries nil) |
| 197 | (max-length 0)) | 183 | (max-length 0)) |
| 198 | (dolist (entry alist) | 184 | (dolist (entry alist) |
| @@ -229,7 +215,7 @@ from which to start." | |||
| 229 | (push (character-fold--make-space-string spaces) out)) | 215 | (push (character-fold--make-space-string spaces) out)) |
| 230 | (let ((regexp (apply #'concat (nreverse out)))) | 216 | (let ((regexp (apply #'concat (nreverse out)))) |
| 231 | ;; Limited by `MAX_BUF_SIZE' in `regex.c'. | 217 | ;; Limited by `MAX_BUF_SIZE' in `regex.c'. |
| 232 | (if (> (length regexp) 10000) | 218 | (if (> (length regexp) 5000) |
| 233 | (regexp-quote string) | 219 | (regexp-quote string) |
| 234 | regexp)))) | 220 | regexp)))) |
| 235 | 221 | ||
diff --git a/test/automated/character-fold-tests.el b/test/automated/character-fold-tests.el index 40735e5df7f..4e8761e6f7b 100644 --- a/test/automated/character-fold-tests.el +++ b/test/automated/character-fold-tests.el | |||
| @@ -98,6 +98,27 @@ | |||
| 98 | ;; (character-fold--test-match-exactly "a12" "xxyy") | 98 | ;; (character-fold--test-match-exactly "a12" "xxyy") |
| 99 | )) | 99 | )) |
| 100 | 100 | ||
| 101 | (ert-deftest character-fold--speed-test () | ||
| 102 | (dolist (string (append '("tty-set-up-initial-frame-face" | ||
| 103 | "tty-set-up-initial-frame-face-frame-faceframe-faceframe-faceframe-face") | ||
| 104 | (mapcar #'character-fold--random-word '(10 50 100 | ||
| 105 | 50 100)))) | ||
| 106 | (message "Testing %s" string) | ||
| 107 | ;; Make sure we didn't just fallback on the trivial search. | ||
| 108 | (should-not (string= (regexp-quote string) | ||
| 109 | (character-fold-to-regexp string))) | ||
| 110 | (with-temp-buffer | ||
| 111 | (save-excursion (insert string)) | ||
| 112 | (let ((time (time-to-seconds (current-time)))) | ||
| 113 | ;; Our initial implementation of case-folding in char-folding | ||
| 114 | ;; created a lot of redundant paths in the regexp. Because of | ||
| 115 | ;; that, if a really long string "almost" matches, the regexp | ||
| 116 | ;; engine took a long time to realise that it doesn't match. | ||
| 117 | (should-not (character-fold-search-forward (concat string "c") nil 'noerror)) | ||
| 118 | ;; Ensure it took less than a second. | ||
| 119 | (should (< (- (time-to-seconds (current-time)) | ||
| 120 | time) | ||
| 121 | 1)))))) | ||
| 101 | 122 | ||
| 102 | (provide 'character-fold-tests) | 123 | (provide 'character-fold-tests) |
| 103 | ;;; character-fold-tests.el ends here | 124 | ;;; character-fold-tests.el ends here |