diff options
| author | Juri Linkov | 2019-07-04 23:49:33 +0300 |
|---|---|---|
| committer | Juri Linkov | 2019-07-04 23:49:33 +0300 |
| commit | 19b1cefa3ba00ea383bd0910523c6e972fedbe02 (patch) | |
| tree | 2ca0649d576434fb9604593ea8845e422577134b | |
| parent | 4a754df8858dc7acec9413f4f11064230d6741cf (diff) | |
| download | emacs-19b1cefa3ba00ea383bd0910523c6e972fedbe02.tar.gz emacs-19b1cefa3ba00ea383bd0910523c6e972fedbe02.zip | |
* lisp/char-fold.el (char-fold-to-regexp): Implement arg LAX (bug#36398).
* test/lisp/char-fold-tests.el (char-fold--test-multi-lax): New test.
| -rw-r--r-- | lisp/char-fold.el | 64 | ||||
| -rw-r--r-- | test/lisp/char-fold-tests.el | 8 |
2 files changed, 45 insertions, 27 deletions
diff --git a/lisp/char-fold.el b/lisp/char-fold.el index 7223ecf738c..9d3ea17b413 100644 --- a/lisp/char-fold.el +++ b/lisp/char-fold.el | |||
| @@ -148,12 +148,18 @@ Exceptionally for the space character (32), ALIST is ignored.") | |||
| 148 | (make-list n (or (aref char-fold-table ?\s) " "))))) | 148 | (make-list n (or (aref char-fold-table ?\s) " "))))) |
| 149 | 149 | ||
| 150 | ;;;###autoload | 150 | ;;;###autoload |
| 151 | (defun char-fold-to-regexp (string &optional _lax from) | 151 | (defun char-fold-to-regexp (string &optional lax from) |
| 152 | "Return a regexp matching anything that char-folds into STRING. | 152 | "Return a regexp matching anything that char-folds into STRING. |
| 153 | Any character in STRING that has an entry in | 153 | Any character in STRING that has an entry in |
| 154 | `char-fold-table' is replaced with that entry (which is a | 154 | `char-fold-table' is replaced with that entry (which is a |
| 155 | regexp) and other characters are `regexp-quote'd. | 155 | regexp) and other characters are `regexp-quote'd. |
| 156 | 156 | ||
| 157 | When LAX is non-nil, then the final character also matches ligatures | ||
| 158 | partially, for instance, the search string \"f\" will match \"fi\", | ||
| 159 | so when typing the search string in isearch while the cursor is on | ||
| 160 | a ligature, the search won't try to immediately advance to the next | ||
| 161 | complete match, but will stay on the partially matched ligature. | ||
| 162 | |||
| 157 | If the resulting regexp would be too long for Emacs to handle, | 163 | If the resulting regexp would be too long for Emacs to handle, |
| 158 | just return the result of calling `regexp-quote' on STRING. | 164 | just return the result of calling `regexp-quote' on STRING. |
| 159 | 165 | ||
| @@ -183,36 +189,40 @@ from which to start." | |||
| 183 | ;; Long string. The regexp would probably be too long. | 189 | ;; Long string. The regexp would probably be too long. |
| 184 | (alist (unless (> end 50) | 190 | (alist (unless (> end 50) |
| 185 | (aref multi-char-table c)))) | 191 | (aref multi-char-table c)))) |
| 186 | (push (let ((matched-entries nil) | 192 | (push (if (and lax alist (= (1+ i) end)) |
| 187 | (max-length 0)) | 193 | (concat "\\(?:" regexp "\\|" |
| 188 | (dolist (entry alist) | 194 | (mapconcat (lambda (entry) |
| 189 | (let* ((suffix (car entry)) | 195 | (cdr entry)) alist "\\|") "\\)") |
| 190 | (len-suf (length suffix))) | 196 | (let ((matched-entries nil) |
| 191 | (when (eq (compare-strings suffix 0 nil | 197 | (max-length 0)) |
| 192 | string (1+ i) (+ i 1 len-suf) | 198 | (dolist (entry alist) |
| 193 | nil) | 199 | (let* ((suffix (car entry)) |
| 194 | t) | 200 | (len-suf (length suffix))) |
| 195 | (push (cons len-suf (cdr entry)) matched-entries) | 201 | (when (eq (compare-strings suffix 0 nil |
| 196 | (setq max-length (max max-length len-suf))))) | 202 | string (1+ i) (+ i 1 len-suf) |
| 197 | ;; If no suffixes matched, just go on. | 203 | nil) |
| 198 | (if (not matched-entries) | 204 | t) |
| 199 | regexp | 205 | (push (cons len-suf (cdr entry)) matched-entries) |
| 206 | (setq max-length (max max-length len-suf))))) | ||
| 207 | ;; If no suffixes matched, just go on. | ||
| 208 | (if (not matched-entries) | ||
| 209 | regexp | ||
| 200 | ;;; If N suffixes match, we "branch" out into N+1 executions for the | 210 | ;;; If N suffixes match, we "branch" out into N+1 executions for the |
| 201 | ;;; length of the longest match. This means "fix" will match "fix" but | 211 | ;;; length of the longest match. This means "fix" will match "fix" but |
| 202 | ;;; not "fⅸ", but it's necessary to keep the regexp size from scaling | 212 | ;;; not "fⅸ", but it's necessary to keep the regexp size from scaling |
| 203 | ;;; exponentially. See https://lists.gnu.org/r/emacs-devel/2015-11/msg02562.html | 213 | ;;; exponentially. See https://lists.gnu.org/r/emacs-devel/2015-11/msg02562.html |
| 204 | (let ((subs (substring string (1+ i) (+ i 1 max-length)))) | 214 | (let ((subs (substring string (1+ i) (+ i 1 max-length)))) |
| 205 | ;; `i' is still going to inc by 1 below. | 215 | ;; `i' is still going to inc by 1 below. |
| 206 | (setq i (+ i max-length)) | 216 | (setq i (+ i max-length)) |
| 207 | (concat | 217 | (concat |
| 208 | "\\(?:" | 218 | "\\(?:" |
| 209 | (mapconcat (lambda (entry) | 219 | (mapconcat (lambda (entry) |
| 210 | (let ((length (car entry)) | 220 | (let ((length (car entry)) |
| 211 | (suffix-regexp (cdr entry))) | 221 | (suffix-regexp (cdr entry))) |
| 212 | (concat suffix-regexp | 222 | (concat suffix-regexp |
| 213 | (char-fold-to-regexp subs nil length)))) | 223 | (char-fold-to-regexp subs nil length)))) |
| 214 | `((0 . ,regexp) . ,matched-entries) "\\|") | 224 | `((0 . ,regexp) . ,matched-entries) "\\|") |
| 215 | "\\)")))) | 225 | "\\)"))))) |
| 216 | out)))) | 226 | out)))) |
| 217 | (setq i (1+ i))) | 227 | (setq i (1+ i))) |
| 218 | (when (> spaces 0) | 228 | (when (> spaces 0) |
diff --git a/test/lisp/char-fold-tests.el b/test/lisp/char-fold-tests.el index 3fde312a133..e9dfd2b7336 100644 --- a/test/lisp/char-fold-tests.el +++ b/test/lisp/char-fold-tests.el | |||
| @@ -82,6 +82,14 @@ | |||
| 82 | (set-char-table-extra-slot char-fold-table 0 multi) | 82 | (set-char-table-extra-slot char-fold-table 0 multi) |
| 83 | (char-fold--test-match-exactly (car it) (cdr it))))) | 83 | (char-fold--test-match-exactly (car it) (cdr it))))) |
| 84 | 84 | ||
| 85 | (ert-deftest char-fold--test-multi-lax () | ||
| 86 | (dolist (it '(("f" . "fi") ("f" . "ff"))) | ||
| 87 | (with-temp-buffer | ||
| 88 | (insert (cdr it)) | ||
| 89 | (goto-char (point-min)) | ||
| 90 | (should (search-forward-regexp | ||
| 91 | (char-fold-to-regexp (car it) 'lax) nil 'noerror))))) | ||
| 92 | |||
| 85 | (ert-deftest char-fold--test-fold-to-regexp () | 93 | (ert-deftest char-fold--test-fold-to-regexp () |
| 86 | (let ((char-fold-table (make-char-table 'char-fold-table)) | 94 | (let ((char-fold-table (make-char-table 'char-fold-table)) |
| 87 | (multi (make-char-table 'char-fold-table))) | 95 | (multi (make-char-table 'char-fold-table))) |