aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJuri Linkov2019-07-23 23:27:28 +0300
committerJuri Linkov2019-07-23 23:27:28 +0300
commit376f5df3cca0dbf186823e5b329d76b52019473d (patch)
treed13fed57ed0b5d306d27846e87ee1e21b2f9900a
parenta48726ebae2f44ed15b97cb72bc7eca199d8de47 (diff)
downloademacs-376f5df3cca0dbf186823e5b329d76b52019473d.tar.gz
emacs-376f5df3cca0dbf186823e5b329d76b52019473d.zip
Customizable char-fold with char-fold-symmetric, char-fold-include (bug#35689)
* doc/emacs/search.texi (Lax Search): Document char-fold-symmetric, char-fold-include, char-fold-exclude. * lisp/char-fold.el (char-fold--default-include) (char-fold--default-exclude, char-fold--default-symmetric) (char-fold--previous): New defconsts. (char-fold-include, char-fold-exclude, char-fold-symmetric): New defcustoms. (char-fold-make-table): Use them. (char-fold-update-table): New function called at top-level. * test/lisp/char-fold-tests.el (char-fold--test-no-match-exactly) (char-fold--permutation): New functions. (char-fold--test-without-customization) (char-fold--test-with-customization): New tests.
-rw-r--r--doc/emacs/search.texi19
-rw-r--r--etc/NEWS9
-rw-r--r--lisp/char-fold.el131
-rw-r--r--test/lisp/char-fold-tests.el75
4 files changed, 222 insertions, 12 deletions
diff --git a/doc/emacs/search.texi b/doc/emacs/search.texi
index b47d51a2b66..66af5d40162 100644
--- a/doc/emacs/search.texi
+++ b/doc/emacs/search.texi
@@ -1354,10 +1354,21 @@ folding, but only for that search. (Replace commands have a different
1354default, controlled by a separate option; see @ref{Replacement and Lax 1354default, controlled by a separate option; see @ref{Replacement and Lax
1355Matches}.) 1355Matches}.)
1356 1356
1357 Like with case folding, typing an explicit variant of a character, 1357@vindex char-fold-symmetric
1358such as @code{@"a}, as part of the search string disables character 1358 By default, typing an explicit variant of a character, such as
1359folding for that search. If you delete such a character from the 1359@code{@"a}, as part of the search string doesn't match its base
1360search string, this effect ceases. 1360character, such as @code{a}. But if you customize the variable
1361@code{char-fold-symmetric} to @code{t}, then search commands treat
1362equivalent characters the same and use of any of a set of equivalent
1363characters in a search string finds any of them in the text being
1364searched, so typing an accented character @code{@"a} matches the
1365letter @code{a} as well as all the other variants like @code{@'a}.
1366
1367@vindex char-fold-include
1368@vindex char-fold-exclude
1369 You can add new foldings using the customizable variable
1370@code{char-fold-include}, or remove the existing ones using the
1371customizable variable @code{char-fold-exclude}.
1361 1372
1362@node Replace 1373@node Replace
1363@section Replacement Commands 1374@section Replacement Commands
diff --git a/etc/NEWS b/etc/NEWS
index 6a02c386960..5313270411c 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -1175,6 +1175,15 @@ rather than stopping after one level, such that searching for
1175e.g. GREEK SMALL LETTER IOTA will now also find GREEK SMALL LETTER 1175e.g. GREEK SMALL LETTER IOTA will now also find GREEK SMALL LETTER
1176IOTA WITH OXIA. 1176IOTA WITH OXIA.
1177 1177
1178+++
1179*** New char-folding options: 'char-fold-include' lets you add ad hoc
1180foldings, 'char-fold-exclude' to remove foldings from default decomposition,
1181and 'char-fold-symmetric' to search for any of an equivalence class of
1182characters. For example, with a 'nil' value of 'char-fold-symmetric'
1183you can search for "e" to find "é", but not vice versa. With a non-nil
1184value you can search for either, for example, you can search for "é"
1185to find "e".
1186
1178** Debugger 1187** Debugger
1179 1188
1180+++ 1189+++
diff --git a/lisp/char-fold.el b/lisp/char-fold.el
index a5c4e5e411b..f379229e6c4 100644
--- a/lisp/char-fold.el
+++ b/lisp/char-fold.el
@@ -22,7 +22,18 @@
22 22
23;;; Code: 23;;; Code:
24 24
25(eval-and-compile (put 'char-fold-table 'char-table-extra-slots 1)) 25(eval-and-compile
26 (put 'char-fold-table 'char-table-extra-slots 1)
27 (defconst char-fold--default-include
28 '((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“" "„" "〝" "〟" "🙷" "🙶" "🙸" "«" "»")
29 (?' "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "󠀢" "❮" "❯" "‹" "›")
30 (?` "❛" "‘" "‛" "󠀢" "❮" "‹")))
31 (defconst char-fold--default-exclude nil)
32 (defconst char-fold--default-symmetric nil)
33 (defconst char-fold--previous (list char-fold--default-include
34 char-fold--default-exclude
35 char-fold--default-symmetric)))
36
26 37
27(eval-and-compile 38(eval-and-compile
28 (defun char-fold-make-table () 39 (defun char-fold-make-table ()
@@ -116,21 +127,70 @@
116 (aref equiv (car simpler-decomp))))))))))) 127 (aref equiv (car simpler-decomp)))))))))))
117 table) 128 table)
118 129
119 ;; Add some manual entries. 130 ;; Add some entries to default decomposition
120 (dolist (it '((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“" "„" "〝" "〟" "🙷" "🙶" "🙸" "«" "»") 131 (dolist (it (or (bound-and-true-p char-fold-include)
121 (?' "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "󠀢" "❮" "❯" "‹" "›") 132 char-fold--default-include))
122 (?` "❛" "‘" "‛" "󠀢" "❮" "‹")))
123 (let ((idx (car it)) 133 (let ((idx (car it))
124 (chars (cdr it))) 134 (chars (cdr it)))
125 (aset equiv idx (append chars (aref equiv idx))))) 135 (aset equiv idx (append chars (aref equiv idx)))))
126 136
137 ;; Remove some entries from default decomposition
138 (dolist (it (or (bound-and-true-p char-fold-exclude)
139 char-fold--default-exclude))
140 (let ((idx (car it))
141 (chars (cdr it)))
142 (when (aref equiv idx)
143 (dolist (char chars)
144 (aset equiv idx (remove char (aref equiv idx)))))))
145
146 ;; Add symmetric entries
147 (when (or (bound-and-true-p char-fold-symmetric)
148 char-fold--default-symmetric)
149 (let ((symmetric (make-hash-table :test 'eq)))
150 ;; Initialize hashes
151 (map-char-table
152 (lambda (char decomp-list)
153 (puthash char (make-hash-table :test 'equal) symmetric)
154 (dolist (decomp decomp-list)
155 (puthash (string-to-char decomp) (make-hash-table :test 'equal) symmetric)))
156 equiv)
157
158 (map-char-table
159 (lambda (char decomp-list)
160 (dolist (decomp decomp-list)
161 (if (< (length decomp) 2)
162 ;; Add single-char symmetric pairs to hash
163 (let ((decomp-list (cons (char-to-string char) decomp-list))
164 (decomp-hash (gethash (string-to-char decomp) symmetric)))
165 (dolist (decomp2 decomp-list)
166 (unless (equal decomp decomp2)
167 (puthash decomp2 t decomp-hash)
168 (puthash decomp t (gethash (string-to-char decomp2) symmetric)))))
169 ;; Add multi-char symmetric pairs to equiv-multi char-table
170 (let ((decomp-list (cons (char-to-string char) decomp-list))
171 (prefix (string-to-char decomp))
172 (suffix (substring decomp 1)))
173 (puthash decomp t (gethash char symmetric))
174 (dolist (decomp2 decomp-list)
175 (if (< (length decomp2) 2)
176 (aset equiv-multi prefix
177 (cons (cons suffix (regexp-quote decomp2))
178 (aref equiv-multi prefix)))))))))
179 equiv)
180
181 ;; Update equiv char-table from hash
182 (maphash
183 (lambda (char decomp-hash)
184 (let (schars)
185 (maphash (lambda (schar _) (push schar schars)) decomp-hash)
186 (aset equiv char schars)))
187 symmetric)))
188
127 ;; Convert the lists of characters we compiled into regexps. 189 ;; Convert the lists of characters we compiled into regexps.
128 (map-char-table 190 (map-char-table
129 (lambda (char decomp-list) 191 (lambda (char decomp-list)
130 (let ((re (regexp-opt (cons (char-to-string char) decomp-list)))) 192 (let ((re (regexp-opt (cons (char-to-string char) decomp-list))))
131 (if (consp char) ; FIXME: char never is consp? 193 (aset equiv char re)))
132 (set-char-table-range equiv char re)
133 (aset equiv char re))))
134 equiv) 194 equiv)
135 equiv))) 195 equiv)))
136 196
@@ -159,6 +219,61 @@ For instance, the default alist for ?f includes:
159 219
160Exceptionally for the space character (32), ALIST is ignored.") 220Exceptionally for the space character (32), ALIST is ignored.")
161 221
222
223(defun char-fold-update-table ()
224 (let ((new (list (or (bound-and-true-p char-fold-include)
225 char-fold--default-include)
226 (or (bound-and-true-p char-fold-exclude)
227 char-fold--default-exclude)
228 (or (bound-and-true-p char-fold-symmetric)
229 char-fold--default-symmetric))))
230 (unless (equal char-fold--previous new)
231 (setq char-fold-table (char-fold-make-table)
232 char-fold--previous new))))
233
234(defcustom char-fold-include char-fold--default-include
235 "Additional character foldings to include.
236Each entry is a list of a character and the strings that fold into it."
237 :type '(alist :key-type (character :tag "Fold to character")
238 :value-type (repeat (string :tag "Fold from string")))
239 :initialize #'custom-initialize-default
240 :set (lambda (sym val)
241 (custom-set-default sym val)
242 (char-fold-update-table))
243 :group 'isearch
244 :version "27.1")
245
246(defcustom char-fold-exclude char-fold--default-exclude
247 "Character foldings to remove from default decompisitions.
248Each entry is a list of a character and the strings to remove from folding."
249 :type '(alist :key-type (character :tag "Fold to character")
250 :value-type (repeat (string :tag "Fold from string")))
251 :initialize #'custom-initialize-default
252 :set (lambda (sym val)
253 (custom-set-default sym val)
254 (char-fold-update-table))
255 :group 'isearch
256 :version "27.1")
257
258(defcustom char-fold-symmetric char-fold--default-symmetric
259 "Non-nil means char-fold searching treats equivalent chars the same.
260That is, use of any of a set of char-fold equivalent chars in a search
261string finds any of them in the text being searched.
262
263If nil then only the \"base\" or \"canonical\" char of the set matches
264any of them. The others match only themselves, even when char-folding
265is turned on."
266 :type 'boolean
267 :initialize #'custom-initialize-default
268 :set (lambda (sym val)
269 (custom-set-default sym val)
270 (char-fold-update-table))
271 :group 'isearch
272 :version "27.1")
273
274(char-fold-update-table)
275
276
162(defun char-fold--make-space-string (n) 277(defun char-fold--make-space-string (n)
163 "Return a string that matches N spaces." 278 "Return a string that matches N spaces."
164 (format "\\(?:%s\\|%s\\)" 279 (format "\\(?:%s\\|%s\\)"
diff --git a/test/lisp/char-fold-tests.el b/test/lisp/char-fold-tests.el
index e9dfd2b7336..e519435ef05 100644
--- a/test/lisp/char-fold-tests.el
+++ b/test/lisp/char-fold-tests.el
@@ -44,6 +44,16 @@
44 (should (string-match (char-fold--ascii-upcase re) (downcase it))) 44 (should (string-match (char-fold--ascii-upcase re) (downcase it)))
45 (should (string-match (char-fold--ascii-downcase re) (upcase it))))))) 45 (should (string-match (char-fold--ascii-downcase re) (upcase it)))))))
46 46
47(defun char-fold--test-no-match-exactly (string &rest strings-to-match)
48 (let ((re (concat "\\`" (char-fold-to-regexp string) "\\'")))
49 (dolist (it strings-to-match)
50 (should-not (string-match re it)))
51 ;; Case folding
52 (let ((case-fold-search t))
53 (dolist (it strings-to-match)
54 (should-not (string-match (char-fold--ascii-upcase re) (downcase it)))
55 (should-not (string-match (char-fold--ascii-downcase re) (upcase it)))))))
56
47(defun char-fold--test-search-with-contents (contents string) 57(defun char-fold--test-search-with-contents (contents string)
48 (with-temp-buffer 58 (with-temp-buffer
49 (insert contents) 59 (insert contents)
@@ -53,6 +63,11 @@
53 (should (char-fold-search-forward string nil 'noerror)) 63 (should (char-fold-search-forward string nil 'noerror))
54 (should (char-fold-search-backward string nil 'noerror)))) 64 (should (char-fold-search-backward string nil 'noerror))))
55 65
66(defun char-fold--permutation (strings)
67 (mapcar (lambda (string)
68 (cons string (remove string strings)))
69 strings))
70
56 71
57(ert-deftest char-fold--test-consistency () 72(ert-deftest char-fold--test-consistency ()
58 (dotimes (n 30) 73 (dotimes (n 30)
@@ -132,5 +147,65 @@
132 ;; Ensure it took less than a second. 147 ;; Ensure it took less than a second.
133 (should (< (- (time-to-seconds) time) 1)))))) 148 (should (< (- (time-to-seconds) time) 1))))))
134 149
150(ert-deftest char-fold--test-without-customization ()
151 (let* ((matches
152 '(
153 ("e" "ℯ" "ḗ" "ë" "ë")
154 ("ι"
155 "ί" ;; 1 level decomposition
156 "ί" ;; 2 level decomposition
157 ;; FIXME:
158 ;; "ΐ" ;; 3 level decomposition
159 )
160 )))
161 (dolist (strings matches)
162 (apply 'char-fold--test-match-exactly strings))))
163
164(ert-deftest char-fold--test-with-customization ()
165 :tags '(:expensive-test)
166 (let* ((char-fold-include
167 '(
168 (?ß "ss") ;; de
169 (?o "ø") ;; da no nb nn
170 (?l "ł") ;; pl
171 ))
172 ;; FIXME: move language-specific settings to defaults
173 (char-fold-exclude
174 '(
175 (?a "å") ;; sv da no nb nn
176 (?a "ä") ;; sv fi et
177 (?o "ö") ;; sv fi et
178 (?n "ñ") ;; es
179 (?и "й") ;; ru
180 ))
181 (char-fold-symmetric t)
182 (char-fold-table (char-fold-make-table))
183 (matches
184 '(
185 ("e" "ℯ" "ḗ" "ë" "ë")
186 ("е" "ё" "ё")
187 ("ι" "ί" "ί"
188 ;; FIXME: "ΐ"
189 )
190 ("ß" "ss")
191 ("o" "ø")
192 ("l" "ł")
193
194 ))
195 (no-matches
196 '(
197 ("a" "å")
198 ("a" "ä")
199 ("o" "ö")
200 ("n" "ñ")
201 ("и" "й")
202 )))
203 (dolist (strings matches)
204 (dolist (permutation (char-fold--permutation strings))
205 (apply 'char-fold--test-match-exactly permutation)))
206 (dolist (strings no-matches)
207 (dolist (permutation (char-fold--permutation strings))
208 (apply 'char-fold--test-no-match-exactly permutation)))))
209
135(provide 'char-fold-tests) 210(provide 'char-fold-tests)
136;;; char-fold-tests.el ends here 211;;; char-fold-tests.el ends here