aboutsummaryrefslogtreecommitdiffstats
path: root/test/src
diff options
context:
space:
mode:
authorMichal Nazarewicz2016-10-05 00:06:01 +0200
committerMichal Nazarewicz2017-04-06 20:54:58 +0200
commitb3b9b258c4026baa1cad3f2e617f1a637fc8d205 (patch)
tree1520ef9f5a3204784c597fcf2bf7a7c7fc1b8d7c /test/src
parent2c87dabd0460cce83d2345b4ddff159969674fef (diff)
downloademacs-b3b9b258c4026baa1cad3f2e617f1a637fc8d205.tar.gz
emacs-b3b9b258c4026baa1cad3f2e617f1a637fc8d205.zip
Support casing characters which map into multiple code points (bug#24603)
Implement unconditional special casing rules defined in Unicode standard. Among other things, they deal with cases when a single code point is replaced by multiple ones because single character does not exist (e.g. ‘fi’ ligature turning into ‘FL’) or is not commonly used (e.g. ß turning into SS). * admin/unidata/SpecialCasing.txt: New data file pulled from Unicode standard distribution. * admin/unidata/README: Mention SpecialCasing.txt. * admin/unidata/unidata-get.el (unidata-gen-table-special-casing, unidata-gen-table-special-casing--do-load): New functions generating ‘special-uppercase’, ‘special-lowercase’ and ‘special-titlecase’ character Unicode properties built from the SpecialCasing.txt Unicode data file. * src/casefiddle.c (struct casing_str_buf): New structure for representing short strings used to handle one-to-many character mappings. (case_character_imlp): New function which can handle one-to-many character mappings. (case_character, case_single_character): Wrappers for the above functions. The former may map one character to multiple (or no) code points while the latter does what the former used to do (i.e. handles one-to-one mappings only). (do_casify_natnum, do_casify_unibyte_string, do_casify_unibyte_region): Use case_single_character. (do_casify_multibyte_string, do_casify_multibyte_region): Support new features of case_character. * (do_casify_region): Updated to reflact do_casify_multibyte_string changes. (casify_word): Handle situation when one character-length of a word can change affecting where end of the word is. (upcase, capitalize, upcase-initials): Update documentation to mention limitations when working on characters. * test/src/casefiddle-tests.el (casefiddle-tests-char-properties): Add test cases for the newly introduced character properties. (casefiddle-tests-casing): Update test cases which are now passing. * test/lisp/char-fold-tests.el (char-fold--ascii-upcase, char-fold--ascii-downcase): New functions which behave like old ‘upcase’ and ‘downcase’. (char-fold--test-match-exactly): Use the new functions. This is needed because otherwise fi and similar characters are turned into their multi- -character representation. * doc/lispref/strings.texi: Describe issue with casing characters versus strings. * doc/lispref/nonascii.texi: Describe the new character properties.
Diffstat (limited to 'test/src')
-rw-r--r--test/src/casefiddle-tests.el73
1 files changed, 37 insertions, 36 deletions
diff --git a/test/src/casefiddle-tests.el b/test/src/casefiddle-tests.el
index e83cb00059b..dd260633f4c 100644
--- a/test/src/casefiddle-tests.el
+++ b/test/src/casefiddle-tests.el
@@ -24,36 +24,40 @@
24 24
25(ert-deftest casefiddle-tests-char-properties () 25(ert-deftest casefiddle-tests-char-properties ()
26 "Sanity check of character Unicode properties." 26 "Sanity check of character Unicode properties."
27 (should-not 27 (let ((props '(uppercase lowercase titlecase
28 (let (errors) 28 special-uppercase special-lowercase special-titlecase))
29 ;; character uppercase lowercase titlecase 29 (tests '((?A nil ?a nil nil nil nil)
30 (dolist (test '((?A nil ?a nil) 30 (?a ?A nil ?A nil nil nil)
31 (?a ?A nil ?A) 31 (?Ł nil ?ł nil nil nil nil)
32 (?Ł nil ?ł nil) 32 (?ł ?Ł nil ?Ł nil nil nil)
33 (?ł ?Ł nil ?Ł) 33
34 34 (?DŽ nil ?dž ?Dž nil nil nil)
35 (?DŽ nil ?dž ?Dž) 35 (?Dž ?DŽ ?dž ?Dž nil nil nil)
36 (?Dž ?DŽ ?dž ?Dž) 36 (?dž ?DŽ nil ?Dž nil nil nil)
37 (?dž ?DŽ nil ?Dž) 37
38 38 (?Σ nil ?σ nil nil nil nil)
39 (?Σ nil ?σ nil) 39 (?σ ?Σ nil ?Σ nil nil nil)
40 (?σ ?Σ nil ?Σ) 40 (?ς ?Σ nil ?Σ nil nil nil)
41 (?ς ?Σ nil ?Σ) 41
42 42 (?ⅷ ?Ⅷ nil ?Ⅷ nil nil nil)
43 (?ⅷ ?Ⅷ nil ?Ⅷ) 43 (?Ⅷ nil ?ⅷ nil nil nil nil)
44 (?Ⅷ nil ?ⅷ nil))) 44
45 (let ((ch (car test)) 45 (?fi nil nil nil "FI" nil "Fi")
46 (expected (cdr test)) 46 (?ß nil nil nil "SS" nil "Ss")
47 (props '(uppercase lowercase titlecase))) 47 (?İ nil ?i nil nil "i\u0307" nil)))
48 (while props 48 errors)
49 (let ((got (get-char-code-property ch (car props)))) 49 (dolist (test tests)
50 (unless (equal (car expected) got) 50 (let ((ch (car test))
51 (push (format "\n%c %s; expected: %s but got: %s" 51 (expected (cdr test)))
52 ch (car props) (car expected) got) 52 (dolist (prop props)
53 errors))) 53 (let ((got (get-char-code-property ch prop)))
54 (setq props (cdr props) expected (cdr expected))))) 54 (unless (equal (car expected) got)
55 (when errors 55 (push (format "\n%c %s; expected: %s but got: %s"
56 (mapconcat (lambda (line) line) (nreverse errors) ""))))) 56 ch prop (car expected) got)
57 errors)))
58 (setq expected (cdr expected)))))
59 (when errors
60 (ert-fail (mapconcat (lambda (line) line) (nreverse errors) "")))))
57 61
58 62
59(defconst casefiddle-tests--characters 63(defconst casefiddle-tests--characters
@@ -188,16 +192,13 @@
188 ("DŽUNGLA" "DŽUNGLA" "džungla" "Džungla" "DžUNGLA") 192 ("DŽUNGLA" "DŽUNGLA" "džungla" "Džungla" "DžUNGLA")
189 ("Džungla" "DŽUNGLA" "džungla" "Džungla" "Džungla") 193 ("Džungla" "DŽUNGLA" "džungla" "Džungla" "Džungla")
190 ("džungla" "DŽUNGLA" "džungla" "Džungla" "Džungla") 194 ("džungla" "DŽUNGLA" "džungla" "Džungla" "Džungla")
195 ("define" "DEFINE" "define" "Define" "Define")
196 ("fish" "FISH" "fish" "Fish" "Fish")
197 ("Straße" "STRASSE" "straße" "Straße" "Straße")
191 ;; FIXME(bug#24603): Everything below is broken at the moment. 198 ;; FIXME(bug#24603): Everything below is broken at the moment.
192 ;; Here’s what should happen: 199 ;; Here’s what should happen:
193 ;;("define" "DEFINE" "define" "Define" "Define")
194 ;;("fish" "FIsh" "fish" "Fish" "Fish")
195 ;;("Straße" "STRASSE" "straße" "Straße" "Straße")
196 ;;("ΌΣΟΣ" "ΌΣΟΣ" "όσος" "Όσος" "Όσος") 200 ;;("ΌΣΟΣ" "ΌΣΟΣ" "όσος" "Όσος" "Όσος")
197 ;; And here’s what is actually happening: 201 ;; And here’s what is actually happening:
198 ("define" "DEfiNE" "define" "Define" "Define")
199 ("fish" "fiSH" "fish" "fish" "fish")
200 ("Straße" "STRAßE" "straße" "Straße" "Straße")
201 ("ΌΣΟΣ" "ΌΣΟΣ" "όσοσ" "Όσοσ" "ΌΣΟΣ") 202 ("ΌΣΟΣ" "ΌΣΟΣ" "όσοσ" "Όσοσ" "ΌΣΟΣ")
202 203
203 ("όσος" "ΌΣΟΣ" "όσος" "Όσος" "Όσος")))))) 204 ("όσος" "ΌΣΟΣ" "όσος" "Όσος" "Όσος"))))))