diff options
| author | Robert Pluim | 2019-07-22 20:27:59 +0200 |
|---|---|---|
| committer | Robert Pluim | 2019-07-23 19:39:06 +0200 |
| commit | f9337bc36d17a8819c0d05be8d3a1edcc34c6c79 (patch) | |
| tree | e4c06ceea14068f3b90035e6db484c4baa42bcaa | |
| parent | 9a83ecb60a0dd280fe892adfe3bbefd2d55d13bd (diff) | |
| download | emacs-f9337bc36d17a8819c0d05be8d3a1edcc34c6c79.tar.gz emacs-f9337bc36d17a8819c0d05be8d3a1edcc34c6c79.zip | |
Follow decomposition chains when constructing char-fold-table
* lisp/char-fold.el (char-fold-make-table): Decompose the
decomposition of each character, adding equivalences to the original
character, until no more decompositions are left.
| -rw-r--r-- | etc/NEWS | 8 | ||||
| -rw-r--r-- | lisp/char-fold.el | 19 |
2 files changed, 27 insertions, 0 deletions
| @@ -1167,6 +1167,14 @@ and case-sensitivity together with search strings in the search ring. | |||
| 1167 | +++ | 1167 | +++ |
| 1168 | *** 'flush-lines' prints and returns the number of deleted matching lines. | 1168 | *** 'flush-lines' prints and returns the number of deleted matching lines. |
| 1169 | 1169 | ||
| 1170 | --- | ||
| 1171 | *** 'char-fold-to-regexp' now matches more variants of a base character. | ||
| 1172 | The table used to check for equivalence of characters is now built | ||
| 1173 | using the complete chain of unicode decompositions of a character, | ||
| 1174 | rather than stopping after one level, such that searching for | ||
| 1175 | e.g. GREEK SMALL LETTER IOTA will now also find GREEK SMALL LETTER | ||
| 1176 | IOTA WITH OXIA. | ||
| 1177 | |||
| 1170 | ** Debugger | 1178 | ** Debugger |
| 1171 | 1179 | ||
| 1172 | +++ | 1180 | +++ |
diff --git a/lisp/char-fold.el b/lisp/char-fold.el index 9d3ea17b413..a5c4e5e411b 100644 --- a/lisp/char-fold.el +++ b/lisp/char-fold.el | |||
| @@ -78,6 +78,25 @@ | |||
| 78 | (cons (char-to-string char) | 78 | (cons (char-to-string char) |
| 79 | (aref equiv (car decomp)))))))) | 79 | (aref equiv (car decomp)))))))) |
| 80 | (funcall make-decomp-match-char decomp char) | 80 | (funcall make-decomp-match-char decomp char) |
| 81 | ;; Check to see if the first char of the decomposition | ||
| 82 | ;; has a further decomposition. If so, add a mapping | ||
| 83 | ;; back from that second decomposition to the original | ||
| 84 | ;; character. This allows e.g. 'ι' (GREEK SMALL LETTER | ||
| 85 | ;; IOTA) to match both the Basic Greek block and | ||
| 86 | ;; Extended Greek block variants of IOTA + | ||
| 87 | ;; diacritical(s). Repeat until there are no more | ||
| 88 | ;; decompositions. | ||
| 89 | (let ((dec decomp) | ||
| 90 | next-decomp) | ||
| 91 | (while dec | ||
| 92 | (setq next-decomp (char-table-range table (car dec))) | ||
| 93 | (when (consp next-decomp) | ||
| 94 | (when (symbolp (car next-decomp)) | ||
| 95 | (setq next-decomp (cdr next-decomp))) | ||
| 96 | (if (not (eq (car dec) | ||
| 97 | (car next-decomp))) | ||
| 98 | (funcall make-decomp-match-char (list (car next-decomp)) char))) | ||
| 99 | (setq dec next-decomp))) | ||
| 81 | ;; Do it again, without the non-spacing characters. | 100 | ;; Do it again, without the non-spacing characters. |
| 82 | ;; This allows 'a' to match 'ä'. | 101 | ;; This allows 'a' to match 'ä'. |
| 83 | (let ((simpler-decomp nil) | 102 | (let ((simpler-decomp nil) |