diff options
| author | Lars Ingebrigtsen | 2022-01-18 10:24:32 +0100 |
|---|---|---|
| committer | Lars Ingebrigtsen | 2022-01-18 10:24:32 +0100 |
| commit | 4eebf528fca6f6f16168c4f76a653353f3598a35 (patch) | |
| tree | ed76cf47fff33570e36280836ccf142d7cdc1596 | |
| parent | 19fefea1ca567cc08c67a2167f8e366483b1c013 (diff) | |
| download | emacs-4eebf528fca6f6f16168c4f76a653353f3598a35.tar.gz emacs-4eebf528fca6f6f16168c4f76a653353f3598a35.zip | |
Add textsec predicates for different types of confusability
* lisp/international/textsec.el (textsec-resolved-script-set)
(textsec-single-script-confusable-p)
(textsec-mixed-script-confusable-p)
(textsec-whole-script-confusable-p): New functions.
| -rw-r--r-- | lisp/international/textsec.el | 30 | ||||
| -rw-r--r-- | test/lisp/international/textsec-tests.el | 19 |
2 files changed, 48 insertions, 1 deletions
diff --git a/lisp/international/textsec.el b/lisp/international/textsec.el index 304d69cb894..8095733e097 100644 --- a/lisp/international/textsec.el +++ b/lisp/international/textsec.el | |||
| @@ -192,6 +192,36 @@ This algorithm is described in: | |||
| 192 | (string char))) | 192 | (string char))) |
| 193 | (ucs-normalize-NFD-string string))))) | 193 | (ucs-normalize-NFD-string string))))) |
| 194 | 194 | ||
| 195 | (defun textsec-resolved-script-set (string) | ||
| 196 | "Return the resolved script set for STRING. | ||
| 197 | This is the minimal covering script set for STRING, but is nil is | ||
| 198 | STRING isn't a single script string." | ||
| 199 | (and (textsec-single-script-p string) | ||
| 200 | (textsec-covering-scripts string))) | ||
| 201 | |||
| 202 | (defun textsec-single-script-confusable-p (string1 string2) | ||
| 203 | "Say whether STRING1 and STRING2 are single script confusables." | ||
| 204 | (and (equal (textsec-unconfuse-string string1) | ||
| 205 | (textsec-unconfuse-string string2)) | ||
| 206 | ;; And they have to have at least one resolved script in | ||
| 207 | ;; common. | ||
| 208 | (seq-intersection (textsec-resolved-script-set string1) | ||
| 209 | (textsec-resolved-script-set string2)))) | ||
| 210 | |||
| 211 | (defun textsec-mixed-script-confusable-p (string1 string2) | ||
| 212 | "Say whether STRING1 and STRING2 are mixed script confusables." | ||
| 213 | (and (equal (textsec-unconfuse-string string1) | ||
| 214 | (textsec-unconfuse-string string2)) | ||
| 215 | ;; And they have no resolved scripts in common. | ||
| 216 | (null (seq-intersection (textsec-resolved-script-set string1) | ||
| 217 | (textsec-resolved-script-set string2))))) | ||
| 218 | |||
| 219 | (defun textsec-whole-script-confusable-p (string1 string2) | ||
| 220 | "Say whether STRING1 and STRING2 are whole script confusables." | ||
| 221 | (and (textsec-mixed-script-confusable-p string1 string2) | ||
| 222 | (textsec-single-script-p string1) | ||
| 223 | (textsec-single-script-p string2))) | ||
| 224 | |||
| 195 | (provide 'textsec) | 225 | (provide 'textsec) |
| 196 | 226 | ||
| 197 | ;;; textsec.el ends here | 227 | ;;; textsec.el ends here |
diff --git a/test/lisp/international/textsec-tests.el b/test/lisp/international/textsec-tests.el index 50106bb955e..15b6b21b348 100644 --- a/test/lisp/international/textsec-tests.el +++ b/test/lisp/international/textsec-tests.el | |||
| @@ -86,11 +86,28 @@ | |||
| 86 | (should-not (textsec-mixed-numbers-p "8foo8")) | 86 | (should-not (textsec-mixed-numbers-p "8foo8")) |
| 87 | (should (textsec-mixed-numbers-p "8foo৪"))) | 87 | (should (textsec-mixed-numbers-p "8foo৪"))) |
| 88 | 88 | ||
| 89 | (ert-deftest test-resolved () | ||
| 90 | (should (equal (textsec-resolved-script-set "ljeto") | ||
| 91 | '(latin))) | ||
| 92 | (should-not (textsec-resolved-script-set "Сirсlе"))) | ||
| 93 | |||
| 89 | (ert-deftest test-confusable () | 94 | (ert-deftest test-confusable () |
| 90 | (should (equal (textsec-unconfuse-string "ljeto") "ljeto")) | 95 | (should (equal (textsec-unconfuse-string "ljeto") "ljeto")) |
| 91 | (should (textsec-ascii-confusable-p "ljeto")) | 96 | (should (textsec-ascii-confusable-p "ljeto")) |
| 92 | (should-not (textsec-ascii-confusable-p "ljeto")) | 97 | (should-not (textsec-ascii-confusable-p "ljeto")) |
| 93 | (should (equal (textsec-unconfuse-string "~") "〜")) | 98 | (should (equal (textsec-unconfuse-string "~") "〜")) |
| 94 | (should-not (textsec-ascii-confusable-p "~"))) | 99 | (should-not (textsec-ascii-confusable-p "~")) |
| 100 | |||
| 101 | (should (textsec-single-script-confusable-p "ljeto" "ljeto")) | ||
| 102 | (should-not (textsec-single-script-confusable-p "paypal" "pаypаl")) | ||
| 103 | (should-not (textsec-single-script-confusable-p "scope""ѕсоре")) | ||
| 104 | |||
| 105 | (should-not (textsec-mixed-script-confusable-p "ljeto" "ljeto")) | ||
| 106 | (should (textsec-mixed-script-confusable-p "paypal" "pаypаl")) | ||
| 107 | (should (textsec-mixed-script-confusable-p "scope""ѕсоре")) | ||
| 108 | |||
| 109 | (should-not (textsec-whole-script-confusable-p "ljeto" "ljeto")) | ||
| 110 | (should-not (textsec-whole-script-confusable-p "paypal" "pаypаl")) | ||
| 111 | (should (textsec-whole-script-confusable-p "scope""ѕсоре"))) | ||
| 95 | 112 | ||
| 96 | ;;; textsec-tests.el ends here | 113 | ;;; textsec-tests.el ends here |