aboutsummaryrefslogtreecommitdiffstats
path: root/lisp
diff options
context:
space:
mode:
authorJuri Linkov2019-07-04 23:49:33 +0300
committerJuri Linkov2019-07-04 23:49:33 +0300
commit19b1cefa3ba00ea383bd0910523c6e972fedbe02 (patch)
tree2ca0649d576434fb9604593ea8845e422577134b /lisp
parent4a754df8858dc7acec9413f4f11064230d6741cf (diff)
downloademacs-19b1cefa3ba00ea383bd0910523c6e972fedbe02.tar.gz
emacs-19b1cefa3ba00ea383bd0910523c6e972fedbe02.zip
* lisp/char-fold.el (char-fold-to-regexp): Implement arg LAX (bug#36398).
* test/lisp/char-fold-tests.el (char-fold--test-multi-lax): New test.
Diffstat (limited to 'lisp')
-rw-r--r--lisp/char-fold.el64
1 files changed, 37 insertions, 27 deletions
diff --git a/lisp/char-fold.el b/lisp/char-fold.el
index 7223ecf738c..9d3ea17b413 100644
--- a/lisp/char-fold.el
+++ b/lisp/char-fold.el
@@ -148,12 +148,18 @@ Exceptionally for the space character (32), ALIST is ignored.")
148 (make-list n (or (aref char-fold-table ?\s) " "))))) 148 (make-list n (or (aref char-fold-table ?\s) " ")))))
149 149
150;;;###autoload 150;;;###autoload
151(defun char-fold-to-regexp (string &optional _lax from) 151(defun char-fold-to-regexp (string &optional lax from)
152 "Return a regexp matching anything that char-folds into STRING. 152 "Return a regexp matching anything that char-folds into STRING.
153Any character in STRING that has an entry in 153Any character in STRING that has an entry in
154`char-fold-table' is replaced with that entry (which is a 154`char-fold-table' is replaced with that entry (which is a
155regexp) and other characters are `regexp-quote'd. 155regexp) and other characters are `regexp-quote'd.
156 156
157When LAX is non-nil, then the final character also matches ligatures
158partially, for instance, the search string \"f\" will match \"fi\",
159so when typing the search string in isearch while the cursor is on
160a ligature, the search won't try to immediately advance to the next
161complete match, but will stay on the partially matched ligature.
162
157If the resulting regexp would be too long for Emacs to handle, 163If the resulting regexp would be too long for Emacs to handle,
158just return the result of calling `regexp-quote' on STRING. 164just return the result of calling `regexp-quote' on STRING.
159 165
@@ -183,36 +189,40 @@ from which to start."
183 ;; Long string. The regexp would probably be too long. 189 ;; Long string. The regexp would probably be too long.
184 (alist (unless (> end 50) 190 (alist (unless (> end 50)
185 (aref multi-char-table c)))) 191 (aref multi-char-table c))))
186 (push (let ((matched-entries nil) 192 (push (if (and lax alist (= (1+ i) end))
187 (max-length 0)) 193 (concat "\\(?:" regexp "\\|"
188 (dolist (entry alist) 194 (mapconcat (lambda (entry)
189 (let* ((suffix (car entry)) 195 (cdr entry)) alist "\\|") "\\)")
190 (len-suf (length suffix))) 196 (let ((matched-entries nil)
191 (when (eq (compare-strings suffix 0 nil 197 (max-length 0))
192 string (1+ i) (+ i 1 len-suf) 198 (dolist (entry alist)
193 nil) 199 (let* ((suffix (car entry))
194 t) 200 (len-suf (length suffix)))
195 (push (cons len-suf (cdr entry)) matched-entries) 201 (when (eq (compare-strings suffix 0 nil
196 (setq max-length (max max-length len-suf))))) 202 string (1+ i) (+ i 1 len-suf)
197 ;; If no suffixes matched, just go on. 203 nil)
198 (if (not matched-entries) 204 t)
199 regexp 205 (push (cons len-suf (cdr entry)) matched-entries)
206 (setq max-length (max max-length len-suf)))))
207 ;; If no suffixes matched, just go on.
208 (if (not matched-entries)
209 regexp
200;;; If N suffixes match, we "branch" out into N+1 executions for the 210;;; If N suffixes match, we "branch" out into N+1 executions for the
201;;; length of the longest match. This means "fix" will match "fix" but 211;;; length of the longest match. This means "fix" will match "fix" but
202;;; not "fⅸ", but it's necessary to keep the regexp size from scaling 212;;; not "fⅸ", but it's necessary to keep the regexp size from scaling
203;;; exponentially. See https://lists.gnu.org/r/emacs-devel/2015-11/msg02562.html 213;;; exponentially. See https://lists.gnu.org/r/emacs-devel/2015-11/msg02562.html
204 (let ((subs (substring string (1+ i) (+ i 1 max-length)))) 214 (let ((subs (substring string (1+ i) (+ i 1 max-length))))
205 ;; `i' is still going to inc by 1 below. 215 ;; `i' is still going to inc by 1 below.
206 (setq i (+ i max-length)) 216 (setq i (+ i max-length))
207 (concat 217 (concat
208 "\\(?:" 218 "\\(?:"
209 (mapconcat (lambda (entry) 219 (mapconcat (lambda (entry)
210 (let ((length (car entry)) 220 (let ((length (car entry))
211 (suffix-regexp (cdr entry))) 221 (suffix-regexp (cdr entry)))
212 (concat suffix-regexp 222 (concat suffix-regexp
213 (char-fold-to-regexp subs nil length)))) 223 (char-fold-to-regexp subs nil length))))
214 `((0 . ,regexp) . ,matched-entries) "\\|") 224 `((0 . ,regexp) . ,matched-entries) "\\|")
215 "\\)")))) 225 "\\)")))))
216 out)))) 226 out))))
217 (setq i (1+ i))) 227 (setq i (1+ i)))
218 (when (> spaces 0) 228 (when (> spaces 0)