diff options
| author | Artur Malabarba | 2015-01-27 14:08:01 -0200 |
|---|---|---|
| committer | Artur Malabarba | 2015-06-23 20:09:06 +0100 |
| commit | c7a19e0c80ec6134ab6fb1950d3e1ac59a7b986f (patch) | |
| tree | 0efcd7a931162664e9992a34277709e31b003c0b | |
| parent | 2ca5558395c92b25b58478627b426c66f8e4f170 (diff) | |
| download | emacs-c7a19e0c80ec6134ab6fb1950d3e1ac59a7b986f.tar.gz emacs-c7a19e0c80ec6134ab6fb1950d3e1ac59a7b986f.zip | |
* lisp/isearch.el: Fold many unicode characters to ASCII
(isearch-character-fold-search, isearch--character-fold-extras)
(isearch--character-fold-table): New variable.
(isearch--character-folded-regexp): New function.
(isearch-search-fun-default): Use them.
* lisp/replace.el (replace-character-fold): New variable.
(replace-search): Use it.
* etc/NEWS: Document it.
| -rw-r--r-- | etc/NEWS | 14 | ||||
| -rw-r--r-- | lisp/isearch.el | 78 | ||||
| -rw-r--r-- | lisp/replace.el | 9 |
3 files changed, 101 insertions, 0 deletions
| @@ -84,6 +84,20 @@ command line when `initial-buffer-choice' is non-nil. | |||
| 84 | 84 | ||
| 85 | * Changes in Emacs 25.1 | 85 | * Changes in Emacs 25.1 |
| 86 | 86 | ||
| 87 | ** `isearch' and `query-replace' now perform character folding in matches. | ||
| 88 | This is analogous to case-folding, but applies between Unicode | ||
| 89 | characters and their ASCII counterparts. This means many characters | ||
| 90 | will match entire groups of charactes. | ||
| 91 | |||
| 92 | For instance, the " will match all variants of unicode double quotes | ||
| 93 | (like “ and ”), and the letter a will match all of its accented | ||
| 94 | cousins, even those composed of multiple characters, as well as many | ||
| 95 | other symbols like ℀, ℁, ⒜, and ⓐ. | ||
| 96 | |||
| 97 | ** New function `isearch--character-folded-regexp' can be used | ||
| 98 | by searching commands to produce a a regexp matching anything that | ||
| 99 | character-folds into STRING. | ||
| 100 | |||
| 87 | ** New command `checkdoc-package-keywords' checks if the | 101 | ** New command `checkdoc-package-keywords' checks if the |
| 88 | current package keywords are recognized. Set the new option | 102 | current package keywords are recognized. Set the new option |
| 89 | `checkdoc-package-keywords-flag' to non-nil to make | 103 | `checkdoc-package-keywords-flag' to non-nil to make |
diff --git a/lisp/isearch.el b/lisp/isearch.el index d1b92bd6a9d..44ce9023d8a 100644 --- a/lisp/isearch.el +++ b/lisp/isearch.el | |||
| @@ -272,6 +272,79 @@ Default value, nil, means edit the string instead." | |||
| 272 | :version "23.1" | 272 | :version "23.1" |
| 273 | :group 'isearch) | 273 | :group 'isearch) |
| 274 | 274 | ||
| 275 | (defvar isearch-character-fold-search t | ||
| 276 | "Non-nil if isearch should fold similar characters. | ||
| 277 | This means some characters will match entire groups of charactes. | ||
| 278 | For instance, \" will match all variants of double quotes, and | ||
| 279 | the letter a will match all of its accented versions (and then | ||
| 280 | some).") | ||
| 281 | |||
| 282 | (defconst isearch--character-fold-extras | ||
| 283 | '((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“" "„" "〝" "〟" "🙷" "🙶" "🙸" "«" "»") | ||
| 284 | (?' "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "" "❮" "❯" "‹" "›") | ||
| 285 | (?` "❛" "‘" "‛" "" "❮" "‹") | ||
| 286 | ;; `isearch-character-fold-search' doesn't interact with | ||
| 287 | ;; `isearch-lax-whitespace' yet. So we need to add this here. | ||
| 288 | (?\s " " "\r" "\n")) | ||
| 289 | "Extra entries to add to `isearch--character-fold-table'. | ||
| 290 | Used to specify character folding not covered by unicode | ||
| 291 | decomposition. Each car is a character and each cdr is a list of | ||
| 292 | strings that it should match (itself excluded).") | ||
| 293 | |||
| 294 | (defvar isearch--character-fold-table | ||
| 295 | (eval-when-compile | ||
| 296 | (require 'subr-x) | ||
| 297 | (let ((equiv (make-char-table 'character-fold-table))) | ||
| 298 | ;; Compile a list of all complex characters that each simple | ||
| 299 | ;; character should match. | ||
| 300 | (dotimes (i (length equiv)) | ||
| 301 | (let ((dd (get-char-code-property i 'decomposition)) | ||
| 302 | d k found) | ||
| 303 | ;; Skip trivial cases (?a decomposes to (?a)). | ||
| 304 | (unless (and (eq i (car dd))) | ||
| 305 | ;; Discard a possible formatting tag. | ||
| 306 | (when (symbolp (car-safe dd)) | ||
| 307 | (setq dd (cdr dd))) | ||
| 308 | ;; Is k a number or letter, per unicode standard? | ||
| 309 | (setq d dd) | ||
| 310 | (while (and d (not found)) | ||
| 311 | (setq k (pop d)) | ||
| 312 | (setq found (and (characterp k) | ||
| 313 | (memq (get-char-code-property k 'general-category) | ||
| 314 | '(Lu Ll Lt Lm Lo Nd Nl No))))) | ||
| 315 | ;; If there's no number or letter on the | ||
| 316 | ;; decomposition, find the first character in it. | ||
| 317 | (setq d dd) | ||
| 318 | (while (and d (not found)) | ||
| 319 | (setq k (pop d)) | ||
| 320 | (setq found (characterp k))) | ||
| 321 | ;; Add i to the list of characters that k can | ||
| 322 | ;; represent. Also add its decomposition, so we can | ||
| 323 | ;; match multi-char representations like (format "a%c" 769) | ||
| 324 | (when (and found (not (eq i k))) | ||
| 325 | (aset equiv k (cons (apply #'string dd) | ||
| 326 | (cons (char-to-string i) | ||
| 327 | (aref equiv k)))))))) | ||
| 328 | (dotimes (i (length equiv)) | ||
| 329 | (when-let ((chars (append (cdr (assq i isearch--character-fold-extras)) | ||
| 330 | (aref equiv i)))) | ||
| 331 | (aset equiv i (regexp-opt (cons (char-to-string i) chars))))) | ||
| 332 | equiv)) | ||
| 333 | "Used for folding characters of the same group during search.") | ||
| 334 | |||
| 335 | (defun isearch--character-folded-regexp (string) | ||
| 336 | "Return a regexp matching anything that character-folds into STRING. | ||
| 337 | If `isearch-character-fold-search' is nil, `regexp-quote' string. | ||
| 338 | Otherwise, any character in STRING that has an entry in | ||
| 339 | `isearch--character-fold-table' is replaced with that entry | ||
| 340 | \(which is a regexp) and other characters are `regexp-quote'd." | ||
| 341 | (if isearch-character-fold-search | ||
| 342 | (apply #'concat | ||
| 343 | (mapcar (lambda (c) (or (aref isearch--character-fold-table c) | ||
| 344 | (regexp-quote (string c)))) | ||
| 345 | string)) | ||
| 346 | (regexp-quote string))) | ||
| 347 | |||
| 275 | (defcustom isearch-lazy-highlight t | 348 | (defcustom isearch-lazy-highlight t |
| 276 | "Controls the lazy-highlighting during incremental search. | 349 | "Controls the lazy-highlighting during incremental search. |
| 277 | When non-nil, all text in the buffer matching the current search | 350 | When non-nil, all text in the buffer matching the current search |
| @@ -2607,6 +2680,11 @@ Can be changed via `isearch-search-fun-function' for special needs." | |||
| 2607 | 're-search-backward-lax-whitespace)) | 2680 | 're-search-backward-lax-whitespace)) |
| 2608 | (isearch-regexp | 2681 | (isearch-regexp |
| 2609 | (if isearch-forward 're-search-forward 're-search-backward)) | 2682 | (if isearch-forward 're-search-forward 're-search-backward)) |
| 2683 | (isearch-character-fold-search | ||
| 2684 | (lambda (string &optional bound noerror count) | ||
| 2685 | (funcall (if isearch-forward #'re-search-forward #'re-search-backward) | ||
| 2686 | (isearch--character-folded-regexp string) | ||
| 2687 | bound noerror count))) | ||
| 2610 | ((and isearch-lax-whitespace search-whitespace-regexp) | 2688 | ((and isearch-lax-whitespace search-whitespace-regexp) |
| 2611 | (if isearch-forward | 2689 | (if isearch-forward |
| 2612 | 'search-forward-lax-whitespace | 2690 | 'search-forward-lax-whitespace |
diff --git a/lisp/replace.el b/lisp/replace.el index 74909efa380..5e3ddc551fb 100644 --- a/lisp/replace.el +++ b/lisp/replace.el | |||
| @@ -33,6 +33,14 @@ | |||
| 33 | :type 'boolean | 33 | :type 'boolean |
| 34 | :group 'matching) | 34 | :group 'matching) |
| 35 | 35 | ||
| 36 | (defcustom replace-character-fold t | ||
| 37 | "Non-nil means `query-replace' should do character folding in matches. | ||
| 38 | This means, for instance, that ' will match a large variety of | ||
| 39 | unicode quotes." | ||
| 40 | :type 'boolean | ||
| 41 | :group 'matching | ||
| 42 | :version "25.1") | ||
| 43 | |||
| 36 | (defcustom replace-lax-whitespace nil | 44 | (defcustom replace-lax-whitespace nil |
| 37 | "Non-nil means `query-replace' matches a sequence of whitespace chars. | 45 | "Non-nil means `query-replace' matches a sequence of whitespace chars. |
| 38 | When you enter a space or spaces in the strings to be replaced, | 46 | When you enter a space or spaces in the strings to be replaced, |
| @@ -2005,6 +2013,7 @@ It is called with three arguments, as if it were | |||
| 2005 | ;; used after `recursive-edit' might override them. | 2013 | ;; used after `recursive-edit' might override them. |
| 2006 | (let* ((isearch-regexp regexp-flag) | 2014 | (let* ((isearch-regexp regexp-flag) |
| 2007 | (isearch-word delimited-flag) | 2015 | (isearch-word delimited-flag) |
| 2016 | (isearch-character-fold-search replace-character-fold) | ||
| 2008 | (isearch-lax-whitespace | 2017 | (isearch-lax-whitespace |
| 2009 | replace-lax-whitespace) | 2018 | replace-lax-whitespace) |
| 2010 | (isearch-regexp-lax-whitespace | 2019 | (isearch-regexp-lax-whitespace |