aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArtur Malabarba2015-01-27 14:08:01 -0200
committerArtur Malabarba2015-06-23 20:09:06 +0100
commitc7a19e0c80ec6134ab6fb1950d3e1ac59a7b986f (patch)
tree0efcd7a931162664e9992a34277709e31b003c0b
parent2ca5558395c92b25b58478627b426c66f8e4f170 (diff)
downloademacs-c7a19e0c80ec6134ab6fb1950d3e1ac59a7b986f.tar.gz
emacs-c7a19e0c80ec6134ab6fb1950d3e1ac59a7b986f.zip
* lisp/isearch.el: Fold many unicode characters to ASCII
(isearch-character-fold-search, isearch--character-fold-extras) (isearch--character-fold-table): New variable. (isearch--character-folded-regexp): New function. (isearch-search-fun-default): Use them. * lisp/replace.el (replace-character-fold): New variable. (replace-search): Use it. * etc/NEWS: Document it.
-rw-r--r--etc/NEWS14
-rw-r--r--lisp/isearch.el78
-rw-r--r--lisp/replace.el9
3 files changed, 101 insertions, 0 deletions
diff --git a/etc/NEWS b/etc/NEWS
index 31055ac5303..e4cf2d65e09 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -84,6 +84,20 @@ command line when `initial-buffer-choice' is non-nil.
84 84
85* Changes in Emacs 25.1 85* Changes in Emacs 25.1
86 86
87** `isearch' and `query-replace' now perform character folding in matches.
88This is analogous to case-folding, but applies between Unicode
89characters and their ASCII counterparts. This means many characters
90will match entire groups of charactes.
91
92For instance, the " will match all variants of unicode double quotes
93(like “ and ”), and the letter a will match all of its accented
94cousins, even those composed of multiple characters, as well as many
95other symbols like ℀, ℁, ⒜, and ⓐ.
96
97** New function `isearch--character-folded-regexp' can be used
98by searching commands to produce a a regexp matching anything that
99character-folds into STRING.
100
87** New command `checkdoc-package-keywords' checks if the 101** New command `checkdoc-package-keywords' checks if the
88current package keywords are recognized. Set the new option 102current package keywords are recognized. Set the new option
89`checkdoc-package-keywords-flag' to non-nil to make 103`checkdoc-package-keywords-flag' to non-nil to make
diff --git a/lisp/isearch.el b/lisp/isearch.el
index d1b92bd6a9d..44ce9023d8a 100644
--- a/lisp/isearch.el
+++ b/lisp/isearch.el
@@ -272,6 +272,79 @@ Default value, nil, means edit the string instead."
272 :version "23.1" 272 :version "23.1"
273 :group 'isearch) 273 :group 'isearch)
274 274
275(defvar isearch-character-fold-search t
276 "Non-nil if isearch should fold similar characters.
277This means some characters will match entire groups of charactes.
278For instance, \" will match all variants of double quotes, and
279the letter a will match all of its accented versions (and then
280some).")
281
282(defconst isearch--character-fold-extras
283 '((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“" "„" "〝" "〟" "🙷" "🙶" "🙸" "«" "»")
284 (?' "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "󠀢" "❮" "❯" "‹" "›")
285 (?` "❛" "‘" "‛" "󠀢" "❮" "‹")
286 ;; `isearch-character-fold-search' doesn't interact with
287 ;; `isearch-lax-whitespace' yet. So we need to add this here.
288 (?\s " " "\r" "\n"))
289 "Extra entries to add to `isearch--character-fold-table'.
290Used to specify character folding not covered by unicode
291decomposition. Each car is a character and each cdr is a list of
292strings that it should match (itself excluded).")
293
294(defvar isearch--character-fold-table
295 (eval-when-compile
296 (require 'subr-x)
297 (let ((equiv (make-char-table 'character-fold-table)))
298 ;; Compile a list of all complex characters that each simple
299 ;; character should match.
300 (dotimes (i (length equiv))
301 (let ((dd (get-char-code-property i 'decomposition))
302 d k found)
303 ;; Skip trivial cases (?a decomposes to (?a)).
304 (unless (and (eq i (car dd)))
305 ;; Discard a possible formatting tag.
306 (when (symbolp (car-safe dd))
307 (setq dd (cdr dd)))
308 ;; Is k a number or letter, per unicode standard?
309 (setq d dd)
310 (while (and d (not found))
311 (setq k (pop d))
312 (setq found (and (characterp k)
313 (memq (get-char-code-property k 'general-category)
314 '(Lu Ll Lt Lm Lo Nd Nl No)))))
315 ;; If there's no number or letter on the
316 ;; decomposition, find the first character in it.
317 (setq d dd)
318 (while (and d (not found))
319 (setq k (pop d))
320 (setq found (characterp k)))
321 ;; Add i to the list of characters that k can
322 ;; represent. Also add its decomposition, so we can
323 ;; match multi-char representations like (format "a%c" 769)
324 (when (and found (not (eq i k)))
325 (aset equiv k (cons (apply #'string dd)
326 (cons (char-to-string i)
327 (aref equiv k))))))))
328 (dotimes (i (length equiv))
329 (when-let ((chars (append (cdr (assq i isearch--character-fold-extras))
330 (aref equiv i))))
331 (aset equiv i (regexp-opt (cons (char-to-string i) chars)))))
332 equiv))
333 "Used for folding characters of the same group during search.")
334
335(defun isearch--character-folded-regexp (string)
336 "Return a regexp matching anything that character-folds into STRING.
337If `isearch-character-fold-search' is nil, `regexp-quote' string.
338Otherwise, any character in STRING that has an entry in
339`isearch--character-fold-table' is replaced with that entry
340\(which is a regexp) and other characters are `regexp-quote'd."
341 (if isearch-character-fold-search
342 (apply #'concat
343 (mapcar (lambda (c) (or (aref isearch--character-fold-table c)
344 (regexp-quote (string c))))
345 string))
346 (regexp-quote string)))
347
275(defcustom isearch-lazy-highlight t 348(defcustom isearch-lazy-highlight t
276 "Controls the lazy-highlighting during incremental search. 349 "Controls the lazy-highlighting during incremental search.
277When non-nil, all text in the buffer matching the current search 350When non-nil, all text in the buffer matching the current search
@@ -2607,6 +2680,11 @@ Can be changed via `isearch-search-fun-function' for special needs."
2607 're-search-backward-lax-whitespace)) 2680 're-search-backward-lax-whitespace))
2608 (isearch-regexp 2681 (isearch-regexp
2609 (if isearch-forward 're-search-forward 're-search-backward)) 2682 (if isearch-forward 're-search-forward 're-search-backward))
2683 (isearch-character-fold-search
2684 (lambda (string &optional bound noerror count)
2685 (funcall (if isearch-forward #'re-search-forward #'re-search-backward)
2686 (isearch--character-folded-regexp string)
2687 bound noerror count)))
2610 ((and isearch-lax-whitespace search-whitespace-regexp) 2688 ((and isearch-lax-whitespace search-whitespace-regexp)
2611 (if isearch-forward 2689 (if isearch-forward
2612 'search-forward-lax-whitespace 2690 'search-forward-lax-whitespace
diff --git a/lisp/replace.el b/lisp/replace.el
index 74909efa380..5e3ddc551fb 100644
--- a/lisp/replace.el
+++ b/lisp/replace.el
@@ -33,6 +33,14 @@
33 :type 'boolean 33 :type 'boolean
34 :group 'matching) 34 :group 'matching)
35 35
36(defcustom replace-character-fold t
37 "Non-nil means `query-replace' should do character folding in matches.
38This means, for instance, that ' will match a large variety of
39unicode quotes."
40 :type 'boolean
41 :group 'matching
42 :version "25.1")
43
36(defcustom replace-lax-whitespace nil 44(defcustom replace-lax-whitespace nil
37 "Non-nil means `query-replace' matches a sequence of whitespace chars. 45 "Non-nil means `query-replace' matches a sequence of whitespace chars.
38When you enter a space or spaces in the strings to be replaced, 46When you enter a space or spaces in the strings to be replaced,
@@ -2005,6 +2013,7 @@ It is called with three arguments, as if it were
2005 ;; used after `recursive-edit' might override them. 2013 ;; used after `recursive-edit' might override them.
2006 (let* ((isearch-regexp regexp-flag) 2014 (let* ((isearch-regexp regexp-flag)
2007 (isearch-word delimited-flag) 2015 (isearch-word delimited-flag)
2016 (isearch-character-fold-search replace-character-fold)
2008 (isearch-lax-whitespace 2017 (isearch-lax-whitespace
2009 replace-lax-whitespace) 2018 replace-lax-whitespace)
2010 (isearch-regexp-lax-whitespace 2019 (isearch-regexp-lax-whitespace