diff options
| author | Michal Nazarewicz | 2016-10-05 00:06:01 +0200 |
|---|---|---|
| committer | Michal Nazarewicz | 2017-04-06 20:54:58 +0200 |
| commit | b3b9b258c4026baa1cad3f2e617f1a637fc8d205 (patch) | |
| tree | 1520ef9f5a3204784c597fcf2bf7a7c7fc1b8d7c /test/src | |
| parent | 2c87dabd0460cce83d2345b4ddff159969674fef (diff) | |
| download | emacs-b3b9b258c4026baa1cad3f2e617f1a637fc8d205.tar.gz emacs-b3b9b258c4026baa1cad3f2e617f1a637fc8d205.zip | |
Support casing characters which map into multiple code points (bug#24603)
Implement unconditional special casing rules defined in Unicode standard.
Among other things, they deal with cases when a single code point is
replaced by multiple ones because single character does not exist (e.g.
‘fi’ ligature turning into ‘FL’) or is not commonly used (e.g. ß turning
into SS).
* admin/unidata/SpecialCasing.txt: New data file pulled from Unicode
standard distribution.
* admin/unidata/README: Mention SpecialCasing.txt.
* admin/unidata/unidata-get.el (unidata-gen-table-special-casing,
unidata-gen-table-special-casing--do-load): New functions generating
‘special-uppercase’, ‘special-lowercase’ and ‘special-titlecase’
character Unicode properties built from the SpecialCasing.txt Unicode
data file.
* src/casefiddle.c (struct casing_str_buf): New structure for
representing short strings used to handle one-to-many character
mappings.
(case_character_imlp): New function which can handle one-to-many
character mappings.
(case_character, case_single_character): Wrappers for the above
functions. The former may map one character to multiple (or no)
code points while the latter does what the former used to do (i.e.
handles one-to-one mappings only).
(do_casify_natnum, do_casify_unibyte_string,
do_casify_unibyte_region): Use case_single_character.
(do_casify_multibyte_string, do_casify_multibyte_region): Support new
features of case_character.
* (do_casify_region): Updated to reflact do_casify_multibyte_string
changes.
(casify_word): Handle situation when one character-length of a word
can change affecting where end of the word is.
(upcase, capitalize, upcase-initials): Update documentation to mention
limitations when working on characters.
* test/src/casefiddle-tests.el (casefiddle-tests-char-properties):
Add test cases for the newly introduced character properties.
(casefiddle-tests-casing): Update test cases which are now passing.
* test/lisp/char-fold-tests.el (char-fold--ascii-upcase,
char-fold--ascii-downcase): New functions which behave like old ‘upcase’
and ‘downcase’.
(char-fold--test-match-exactly): Use the new functions. This is needed
because otherwise fi and similar characters are turned into their multi-
-character representation.
* doc/lispref/strings.texi: Describe issue with casing characters versus
strings.
* doc/lispref/nonascii.texi: Describe the new character properties.
Diffstat (limited to 'test/src')
| -rw-r--r-- | test/src/casefiddle-tests.el | 73 |
1 files changed, 37 insertions, 36 deletions
diff --git a/test/src/casefiddle-tests.el b/test/src/casefiddle-tests.el index e83cb00059b..dd260633f4c 100644 --- a/test/src/casefiddle-tests.el +++ b/test/src/casefiddle-tests.el | |||
| @@ -24,36 +24,40 @@ | |||
| 24 | 24 | ||
| 25 | (ert-deftest casefiddle-tests-char-properties () | 25 | (ert-deftest casefiddle-tests-char-properties () |
| 26 | "Sanity check of character Unicode properties." | 26 | "Sanity check of character Unicode properties." |
| 27 | (should-not | 27 | (let ((props '(uppercase lowercase titlecase |
| 28 | (let (errors) | 28 | special-uppercase special-lowercase special-titlecase)) |
| 29 | ;; character uppercase lowercase titlecase | 29 | (tests '((?A nil ?a nil nil nil nil) |
| 30 | (dolist (test '((?A nil ?a nil) | 30 | (?a ?A nil ?A nil nil nil) |
| 31 | (?a ?A nil ?A) | 31 | (?Ł nil ?ł nil nil nil nil) |
| 32 | (?Ł nil ?ł nil) | 32 | (?ł ?Ł nil ?Ł nil nil nil) |
| 33 | (?ł ?Ł nil ?Ł) | 33 | |
| 34 | 34 | (?DŽ nil ?dž ?Dž nil nil nil) | |
| 35 | (?DŽ nil ?dž ?Dž) | 35 | (?Dž ?DŽ ?dž ?Dž nil nil nil) |
| 36 | (?Dž ?DŽ ?dž ?Dž) | 36 | (?dž ?DŽ nil ?Dž nil nil nil) |
| 37 | (?dž ?DŽ nil ?Dž) | 37 | |
| 38 | 38 | (?Σ nil ?σ nil nil nil nil) | |
| 39 | (?Σ nil ?σ nil) | 39 | (?σ ?Σ nil ?Σ nil nil nil) |
| 40 | (?σ ?Σ nil ?Σ) | 40 | (?ς ?Σ nil ?Σ nil nil nil) |
| 41 | (?ς ?Σ nil ?Σ) | 41 | |
| 42 | 42 | (?ⅷ ?Ⅷ nil ?Ⅷ nil nil nil) | |
| 43 | (?ⅷ ?Ⅷ nil ?Ⅷ) | 43 | (?Ⅷ nil ?ⅷ nil nil nil nil) |
| 44 | (?Ⅷ nil ?ⅷ nil))) | 44 | |
| 45 | (let ((ch (car test)) | 45 | (?fi nil nil nil "FI" nil "Fi") |
| 46 | (expected (cdr test)) | 46 | (?ß nil nil nil "SS" nil "Ss") |
| 47 | (props '(uppercase lowercase titlecase))) | 47 | (?İ nil ?i nil nil "i\u0307" nil))) |
| 48 | (while props | 48 | errors) |
| 49 | (let ((got (get-char-code-property ch (car props)))) | 49 | (dolist (test tests) |
| 50 | (unless (equal (car expected) got) | 50 | (let ((ch (car test)) |
| 51 | (push (format "\n%c %s; expected: %s but got: %s" | 51 | (expected (cdr test))) |
| 52 | ch (car props) (car expected) got) | 52 | (dolist (prop props) |
| 53 | errors))) | 53 | (let ((got (get-char-code-property ch prop))) |
| 54 | (setq props (cdr props) expected (cdr expected))))) | 54 | (unless (equal (car expected) got) |
| 55 | (when errors | 55 | (push (format "\n%c %s; expected: %s but got: %s" |
| 56 | (mapconcat (lambda (line) line) (nreverse errors) ""))))) | 56 | ch prop (car expected) got) |
| 57 | errors))) | ||
| 58 | (setq expected (cdr expected))))) | ||
| 59 | (when errors | ||
| 60 | (ert-fail (mapconcat (lambda (line) line) (nreverse errors) ""))))) | ||
| 57 | 61 | ||
| 58 | 62 | ||
| 59 | (defconst casefiddle-tests--characters | 63 | (defconst casefiddle-tests--characters |
| @@ -188,16 +192,13 @@ | |||
| 188 | ("DŽUNGLA" "DŽUNGLA" "džungla" "Džungla" "DžUNGLA") | 192 | ("DŽUNGLA" "DŽUNGLA" "džungla" "Džungla" "DžUNGLA") |
| 189 | ("Džungla" "DŽUNGLA" "džungla" "Džungla" "Džungla") | 193 | ("Džungla" "DŽUNGLA" "džungla" "Džungla" "Džungla") |
| 190 | ("džungla" "DŽUNGLA" "džungla" "Džungla" "Džungla") | 194 | ("džungla" "DŽUNGLA" "džungla" "Džungla" "Džungla") |
| 195 | ("define" "DEFINE" "define" "Define" "Define") | ||
| 196 | ("fish" "FISH" "fish" "Fish" "Fish") | ||
| 197 | ("Straße" "STRASSE" "straße" "Straße" "Straße") | ||
| 191 | ;; FIXME(bug#24603): Everything below is broken at the moment. | 198 | ;; FIXME(bug#24603): Everything below is broken at the moment. |
| 192 | ;; Here’s what should happen: | 199 | ;; Here’s what should happen: |
| 193 | ;;("define" "DEFINE" "define" "Define" "Define") | ||
| 194 | ;;("fish" "FIsh" "fish" "Fish" "Fish") | ||
| 195 | ;;("Straße" "STRASSE" "straße" "Straße" "Straße") | ||
| 196 | ;;("ΌΣΟΣ" "ΌΣΟΣ" "όσος" "Όσος" "Όσος") | 200 | ;;("ΌΣΟΣ" "ΌΣΟΣ" "όσος" "Όσος" "Όσος") |
| 197 | ;; And here’s what is actually happening: | 201 | ;; And here’s what is actually happening: |
| 198 | ("define" "DEfiNE" "define" "Define" "Define") | ||
| 199 | ("fish" "fiSH" "fish" "fish" "fish") | ||
| 200 | ("Straße" "STRAßE" "straße" "Straße" "Straße") | ||
| 201 | ("ΌΣΟΣ" "ΌΣΟΣ" "όσοσ" "Όσοσ" "ΌΣΟΣ") | 202 | ("ΌΣΟΣ" "ΌΣΟΣ" "όσοσ" "Όσοσ" "ΌΣΟΣ") |
| 202 | 203 | ||
| 203 | ("όσος" "ΌΣΟΣ" "όσος" "Όσος" "Όσος")))))) | 204 | ("όσος" "ΌΣΟΣ" "όσος" "Όσος" "Όσος")))))) |