diff options
| author | Juri Linkov | 2019-07-23 23:27:28 +0300 |
|---|---|---|
| committer | Juri Linkov | 2019-07-23 23:27:28 +0300 |
| commit | 376f5df3cca0dbf186823e5b329d76b52019473d (patch) | |
| tree | d13fed57ed0b5d306d27846e87ee1e21b2f9900a | |
| parent | a48726ebae2f44ed15b97cb72bc7eca199d8de47 (diff) | |
| download | emacs-376f5df3cca0dbf186823e5b329d76b52019473d.tar.gz emacs-376f5df3cca0dbf186823e5b329d76b52019473d.zip | |
Customizable char-fold with char-fold-symmetric, char-fold-include (bug#35689)
* doc/emacs/search.texi (Lax Search): Document
char-fold-symmetric, char-fold-include, char-fold-exclude.
* lisp/char-fold.el (char-fold--default-include)
(char-fold--default-exclude, char-fold--default-symmetric)
(char-fold--previous): New defconsts.
(char-fold-include, char-fold-exclude, char-fold-symmetric):
New defcustoms.
(char-fold-make-table): Use them.
(char-fold-update-table): New function called at top-level.
* test/lisp/char-fold-tests.el (char-fold--test-no-match-exactly)
(char-fold--permutation): New functions.
(char-fold--test-without-customization)
(char-fold--test-with-customization): New tests.
| -rw-r--r-- | doc/emacs/search.texi | 19 | ||||
| -rw-r--r-- | etc/NEWS | 9 | ||||
| -rw-r--r-- | lisp/char-fold.el | 131 | ||||
| -rw-r--r-- | test/lisp/char-fold-tests.el | 75 |
4 files changed, 222 insertions, 12 deletions
diff --git a/doc/emacs/search.texi b/doc/emacs/search.texi index b47d51a2b66..66af5d40162 100644 --- a/doc/emacs/search.texi +++ b/doc/emacs/search.texi | |||
| @@ -1354,10 +1354,21 @@ folding, but only for that search. (Replace commands have a different | |||
| 1354 | default, controlled by a separate option; see @ref{Replacement and Lax | 1354 | default, controlled by a separate option; see @ref{Replacement and Lax |
| 1355 | Matches}.) | 1355 | Matches}.) |
| 1356 | 1356 | ||
| 1357 | Like with case folding, typing an explicit variant of a character, | 1357 | @vindex char-fold-symmetric |
| 1358 | such as @code{@"a}, as part of the search string disables character | 1358 | By default, typing an explicit variant of a character, such as |
| 1359 | folding for that search. If you delete such a character from the | 1359 | @code{@"a}, as part of the search string doesn't match its base |
| 1360 | search string, this effect ceases. | 1360 | character, such as @code{a}. But if you customize the variable |
| 1361 | @code{char-fold-symmetric} to @code{t}, then search commands treat | ||
| 1362 | equivalent characters the same and use of any of a set of equivalent | ||
| 1363 | characters in a search string finds any of them in the text being | ||
| 1364 | searched, so typing an accented character @code{@"a} matches the | ||
| 1365 | letter @code{a} as well as all the other variants like @code{@'a}. | ||
| 1366 | |||
| 1367 | @vindex char-fold-include | ||
| 1368 | @vindex char-fold-exclude | ||
| 1369 | You can add new foldings using the customizable variable | ||
| 1370 | @code{char-fold-include}, or remove the existing ones using the | ||
| 1371 | customizable variable @code{char-fold-exclude}. | ||
| 1361 | 1372 | ||
| 1362 | @node Replace | 1373 | @node Replace |
| 1363 | @section Replacement Commands | 1374 | @section Replacement Commands |
| @@ -1175,6 +1175,15 @@ rather than stopping after one level, such that searching for | |||
| 1175 | e.g. GREEK SMALL LETTER IOTA will now also find GREEK SMALL LETTER | 1175 | e.g. GREEK SMALL LETTER IOTA will now also find GREEK SMALL LETTER |
| 1176 | IOTA WITH OXIA. | 1176 | IOTA WITH OXIA. |
| 1177 | 1177 | ||
| 1178 | +++ | ||
| 1179 | *** New char-folding options: 'char-fold-include' lets you add ad hoc | ||
| 1180 | foldings, 'char-fold-exclude' to remove foldings from default decomposition, | ||
| 1181 | and 'char-fold-symmetric' to search for any of an equivalence class of | ||
| 1182 | characters. For example, with a 'nil' value of 'char-fold-symmetric' | ||
| 1183 | you can search for "e" to find "é", but not vice versa. With a non-nil | ||
| 1184 | value you can search for either, for example, you can search for "é" | ||
| 1185 | to find "e". | ||
| 1186 | |||
| 1178 | ** Debugger | 1187 | ** Debugger |
| 1179 | 1188 | ||
| 1180 | +++ | 1189 | +++ |
diff --git a/lisp/char-fold.el b/lisp/char-fold.el index a5c4e5e411b..f379229e6c4 100644 --- a/lisp/char-fold.el +++ b/lisp/char-fold.el | |||
| @@ -22,7 +22,18 @@ | |||
| 22 | 22 | ||
| 23 | ;;; Code: | 23 | ;;; Code: |
| 24 | 24 | ||
| 25 | (eval-and-compile (put 'char-fold-table 'char-table-extra-slots 1)) | 25 | (eval-and-compile |
| 26 | (put 'char-fold-table 'char-table-extra-slots 1) | ||
| 27 | (defconst char-fold--default-include | ||
| 28 | '((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“" "„" "〝" "〟" "🙷" "🙶" "🙸" "«" "»") | ||
| 29 | (?' "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "" "❮" "❯" "‹" "›") | ||
| 30 | (?` "❛" "‘" "‛" "" "❮" "‹"))) | ||
| 31 | (defconst char-fold--default-exclude nil) | ||
| 32 | (defconst char-fold--default-symmetric nil) | ||
| 33 | (defconst char-fold--previous (list char-fold--default-include | ||
| 34 | char-fold--default-exclude | ||
| 35 | char-fold--default-symmetric))) | ||
| 36 | |||
| 26 | 37 | ||
| 27 | (eval-and-compile | 38 | (eval-and-compile |
| 28 | (defun char-fold-make-table () | 39 | (defun char-fold-make-table () |
| @@ -116,21 +127,70 @@ | |||
| 116 | (aref equiv (car simpler-decomp))))))))))) | 127 | (aref equiv (car simpler-decomp))))))))))) |
| 117 | table) | 128 | table) |
| 118 | 129 | ||
| 119 | ;; Add some manual entries. | 130 | ;; Add some entries to default decomposition |
| 120 | (dolist (it '((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“" "„" "〝" "〟" "🙷" "🙶" "🙸" "«" "»") | 131 | (dolist (it (or (bound-and-true-p char-fold-include) |
| 121 | (?' "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "" "❮" "❯" "‹" "›") | 132 | char-fold--default-include)) |
| 122 | (?` "❛" "‘" "‛" "" "❮" "‹"))) | ||
| 123 | (let ((idx (car it)) | 133 | (let ((idx (car it)) |
| 124 | (chars (cdr it))) | 134 | (chars (cdr it))) |
| 125 | (aset equiv idx (append chars (aref equiv idx))))) | 135 | (aset equiv idx (append chars (aref equiv idx))))) |
| 126 | 136 | ||
| 137 | ;; Remove some entries from default decomposition | ||
| 138 | (dolist (it (or (bound-and-true-p char-fold-exclude) | ||
| 139 | char-fold--default-exclude)) | ||
| 140 | (let ((idx (car it)) | ||
| 141 | (chars (cdr it))) | ||
| 142 | (when (aref equiv idx) | ||
| 143 | (dolist (char chars) | ||
| 144 | (aset equiv idx (remove char (aref equiv idx))))))) | ||
| 145 | |||
| 146 | ;; Add symmetric entries | ||
| 147 | (when (or (bound-and-true-p char-fold-symmetric) | ||
| 148 | char-fold--default-symmetric) | ||
| 149 | (let ((symmetric (make-hash-table :test 'eq))) | ||
| 150 | ;; Initialize hashes | ||
| 151 | (map-char-table | ||
| 152 | (lambda (char decomp-list) | ||
| 153 | (puthash char (make-hash-table :test 'equal) symmetric) | ||
| 154 | (dolist (decomp decomp-list) | ||
| 155 | (puthash (string-to-char decomp) (make-hash-table :test 'equal) symmetric))) | ||
| 156 | equiv) | ||
| 157 | |||
| 158 | (map-char-table | ||
| 159 | (lambda (char decomp-list) | ||
| 160 | (dolist (decomp decomp-list) | ||
| 161 | (if (< (length decomp) 2) | ||
| 162 | ;; Add single-char symmetric pairs to hash | ||
| 163 | (let ((decomp-list (cons (char-to-string char) decomp-list)) | ||
| 164 | (decomp-hash (gethash (string-to-char decomp) symmetric))) | ||
| 165 | (dolist (decomp2 decomp-list) | ||
| 166 | (unless (equal decomp decomp2) | ||
| 167 | (puthash decomp2 t decomp-hash) | ||
| 168 | (puthash decomp t (gethash (string-to-char decomp2) symmetric))))) | ||
| 169 | ;; Add multi-char symmetric pairs to equiv-multi char-table | ||
| 170 | (let ((decomp-list (cons (char-to-string char) decomp-list)) | ||
| 171 | (prefix (string-to-char decomp)) | ||
| 172 | (suffix (substring decomp 1))) | ||
| 173 | (puthash decomp t (gethash char symmetric)) | ||
| 174 | (dolist (decomp2 decomp-list) | ||
| 175 | (if (< (length decomp2) 2) | ||
| 176 | (aset equiv-multi prefix | ||
| 177 | (cons (cons suffix (regexp-quote decomp2)) | ||
| 178 | (aref equiv-multi prefix))))))))) | ||
| 179 | equiv) | ||
| 180 | |||
| 181 | ;; Update equiv char-table from hash | ||
| 182 | (maphash | ||
| 183 | (lambda (char decomp-hash) | ||
| 184 | (let (schars) | ||
| 185 | (maphash (lambda (schar _) (push schar schars)) decomp-hash) | ||
| 186 | (aset equiv char schars))) | ||
| 187 | symmetric))) | ||
| 188 | |||
| 127 | ;; Convert the lists of characters we compiled into regexps. | 189 | ;; Convert the lists of characters we compiled into regexps. |
| 128 | (map-char-table | 190 | (map-char-table |
| 129 | (lambda (char decomp-list) | 191 | (lambda (char decomp-list) |
| 130 | (let ((re (regexp-opt (cons (char-to-string char) decomp-list)))) | 192 | (let ((re (regexp-opt (cons (char-to-string char) decomp-list)))) |
| 131 | (if (consp char) ; FIXME: char never is consp? | 193 | (aset equiv char re))) |
| 132 | (set-char-table-range equiv char re) | ||
| 133 | (aset equiv char re)))) | ||
| 134 | equiv) | 194 | equiv) |
| 135 | equiv))) | 195 | equiv))) |
| 136 | 196 | ||
| @@ -159,6 +219,61 @@ For instance, the default alist for ?f includes: | |||
| 159 | 219 | ||
| 160 | Exceptionally for the space character (32), ALIST is ignored.") | 220 | Exceptionally for the space character (32), ALIST is ignored.") |
| 161 | 221 | ||
| 222 | |||
| 223 | (defun char-fold-update-table () | ||
| 224 | (let ((new (list (or (bound-and-true-p char-fold-include) | ||
| 225 | char-fold--default-include) | ||
| 226 | (or (bound-and-true-p char-fold-exclude) | ||
| 227 | char-fold--default-exclude) | ||
| 228 | (or (bound-and-true-p char-fold-symmetric) | ||
| 229 | char-fold--default-symmetric)))) | ||
| 230 | (unless (equal char-fold--previous new) | ||
| 231 | (setq char-fold-table (char-fold-make-table) | ||
| 232 | char-fold--previous new)))) | ||
| 233 | |||
| 234 | (defcustom char-fold-include char-fold--default-include | ||
| 235 | "Additional character foldings to include. | ||
| 236 | Each entry is a list of a character and the strings that fold into it." | ||
| 237 | :type '(alist :key-type (character :tag "Fold to character") | ||
| 238 | :value-type (repeat (string :tag "Fold from string"))) | ||
| 239 | :initialize #'custom-initialize-default | ||
| 240 | :set (lambda (sym val) | ||
| 241 | (custom-set-default sym val) | ||
| 242 | (char-fold-update-table)) | ||
| 243 | :group 'isearch | ||
| 244 | :version "27.1") | ||
| 245 | |||
| 246 | (defcustom char-fold-exclude char-fold--default-exclude | ||
| 247 | "Character foldings to remove from default decompisitions. | ||
| 248 | Each entry is a list of a character and the strings to remove from folding." | ||
| 249 | :type '(alist :key-type (character :tag "Fold to character") | ||
| 250 | :value-type (repeat (string :tag "Fold from string"))) | ||
| 251 | :initialize #'custom-initialize-default | ||
| 252 | :set (lambda (sym val) | ||
| 253 | (custom-set-default sym val) | ||
| 254 | (char-fold-update-table)) | ||
| 255 | :group 'isearch | ||
| 256 | :version "27.1") | ||
| 257 | |||
| 258 | (defcustom char-fold-symmetric char-fold--default-symmetric | ||
| 259 | "Non-nil means char-fold searching treats equivalent chars the same. | ||
| 260 | That is, use of any of a set of char-fold equivalent chars in a search | ||
| 261 | string finds any of them in the text being searched. | ||
| 262 | |||
| 263 | If nil then only the \"base\" or \"canonical\" char of the set matches | ||
| 264 | any of them. The others match only themselves, even when char-folding | ||
| 265 | is turned on." | ||
| 266 | :type 'boolean | ||
| 267 | :initialize #'custom-initialize-default | ||
| 268 | :set (lambda (sym val) | ||
| 269 | (custom-set-default sym val) | ||
| 270 | (char-fold-update-table)) | ||
| 271 | :group 'isearch | ||
| 272 | :version "27.1") | ||
| 273 | |||
| 274 | (char-fold-update-table) | ||
| 275 | |||
| 276 | |||
| 162 | (defun char-fold--make-space-string (n) | 277 | (defun char-fold--make-space-string (n) |
| 163 | "Return a string that matches N spaces." | 278 | "Return a string that matches N spaces." |
| 164 | (format "\\(?:%s\\|%s\\)" | 279 | (format "\\(?:%s\\|%s\\)" |
diff --git a/test/lisp/char-fold-tests.el b/test/lisp/char-fold-tests.el index e9dfd2b7336..e519435ef05 100644 --- a/test/lisp/char-fold-tests.el +++ b/test/lisp/char-fold-tests.el | |||
| @@ -44,6 +44,16 @@ | |||
| 44 | (should (string-match (char-fold--ascii-upcase re) (downcase it))) | 44 | (should (string-match (char-fold--ascii-upcase re) (downcase it))) |
| 45 | (should (string-match (char-fold--ascii-downcase re) (upcase it))))))) | 45 | (should (string-match (char-fold--ascii-downcase re) (upcase it))))))) |
| 46 | 46 | ||
| 47 | (defun char-fold--test-no-match-exactly (string &rest strings-to-match) | ||
| 48 | (let ((re (concat "\\`" (char-fold-to-regexp string) "\\'"))) | ||
| 49 | (dolist (it strings-to-match) | ||
| 50 | (should-not (string-match re it))) | ||
| 51 | ;; Case folding | ||
| 52 | (let ((case-fold-search t)) | ||
| 53 | (dolist (it strings-to-match) | ||
| 54 | (should-not (string-match (char-fold--ascii-upcase re) (downcase it))) | ||
| 55 | (should-not (string-match (char-fold--ascii-downcase re) (upcase it))))))) | ||
| 56 | |||
| 47 | (defun char-fold--test-search-with-contents (contents string) | 57 | (defun char-fold--test-search-with-contents (contents string) |
| 48 | (with-temp-buffer | 58 | (with-temp-buffer |
| 49 | (insert contents) | 59 | (insert contents) |
| @@ -53,6 +63,11 @@ | |||
| 53 | (should (char-fold-search-forward string nil 'noerror)) | 63 | (should (char-fold-search-forward string nil 'noerror)) |
| 54 | (should (char-fold-search-backward string nil 'noerror)))) | 64 | (should (char-fold-search-backward string nil 'noerror)))) |
| 55 | 65 | ||
| 66 | (defun char-fold--permutation (strings) | ||
| 67 | (mapcar (lambda (string) | ||
| 68 | (cons string (remove string strings))) | ||
| 69 | strings)) | ||
| 70 | |||
| 56 | 71 | ||
| 57 | (ert-deftest char-fold--test-consistency () | 72 | (ert-deftest char-fold--test-consistency () |
| 58 | (dotimes (n 30) | 73 | (dotimes (n 30) |
| @@ -132,5 +147,65 @@ | |||
| 132 | ;; Ensure it took less than a second. | 147 | ;; Ensure it took less than a second. |
| 133 | (should (< (- (time-to-seconds) time) 1)))))) | 148 | (should (< (- (time-to-seconds) time) 1)))))) |
| 134 | 149 | ||
| 150 | (ert-deftest char-fold--test-without-customization () | ||
| 151 | (let* ((matches | ||
| 152 | '( | ||
| 153 | ("e" "ℯ" "ḗ" "ë" "ë") | ||
| 154 | ("ι" | ||
| 155 | "ί" ;; 1 level decomposition | ||
| 156 | "ί" ;; 2 level decomposition | ||
| 157 | ;; FIXME: | ||
| 158 | ;; "ΐ" ;; 3 level decomposition | ||
| 159 | ) | ||
| 160 | ))) | ||
| 161 | (dolist (strings matches) | ||
| 162 | (apply 'char-fold--test-match-exactly strings)))) | ||
| 163 | |||
| 164 | (ert-deftest char-fold--test-with-customization () | ||
| 165 | :tags '(:expensive-test) | ||
| 166 | (let* ((char-fold-include | ||
| 167 | '( | ||
| 168 | (?ß "ss") ;; de | ||
| 169 | (?o "ø") ;; da no nb nn | ||
| 170 | (?l "ł") ;; pl | ||
| 171 | )) | ||
| 172 | ;; FIXME: move language-specific settings to defaults | ||
| 173 | (char-fold-exclude | ||
| 174 | '( | ||
| 175 | (?a "å") ;; sv da no nb nn | ||
| 176 | (?a "ä") ;; sv fi et | ||
| 177 | (?o "ö") ;; sv fi et | ||
| 178 | (?n "ñ") ;; es | ||
| 179 | (?и "й") ;; ru | ||
| 180 | )) | ||
| 181 | (char-fold-symmetric t) | ||
| 182 | (char-fold-table (char-fold-make-table)) | ||
| 183 | (matches | ||
| 184 | '( | ||
| 185 | ("e" "ℯ" "ḗ" "ë" "ë") | ||
| 186 | ("е" "ё" "ё") | ||
| 187 | ("ι" "ί" "ί" | ||
| 188 | ;; FIXME: "ΐ" | ||
| 189 | ) | ||
| 190 | ("ß" "ss") | ||
| 191 | ("o" "ø") | ||
| 192 | ("l" "ł") | ||
| 193 | |||
| 194 | )) | ||
| 195 | (no-matches | ||
| 196 | '( | ||
| 197 | ("a" "å") | ||
| 198 | ("a" "ä") | ||
| 199 | ("o" "ö") | ||
| 200 | ("n" "ñ") | ||
| 201 | ("и" "й") | ||
| 202 | ))) | ||
| 203 | (dolist (strings matches) | ||
| 204 | (dolist (permutation (char-fold--permutation strings)) | ||
| 205 | (apply 'char-fold--test-match-exactly permutation))) | ||
| 206 | (dolist (strings no-matches) | ||
| 207 | (dolist (permutation (char-fold--permutation strings)) | ||
| 208 | (apply 'char-fold--test-no-match-exactly permutation))))) | ||
| 209 | |||
| 135 | (provide 'char-fold-tests) | 210 | (provide 'char-fold-tests) |
| 136 | ;;; char-fold-tests.el ends here | 211 | ;;; char-fold-tests.el ends here |