diff options
| author | Mattias Engdegård | 2020-12-09 13:27:16 +0100 |
|---|---|---|
| committer | Mattias Engdegård | 2020-12-09 15:29:48 +0100 |
| commit | beebd2a85eeab5b977ca2de8ad32784f9d8bdd51 (patch) | |
| tree | 2abf190d7e51d435c05c771b67316e4e8f86c3af | |
| parent | 445ab5cce95aee4cd5fee8ef67c2ee24c1c8850a (diff) | |
| download | emacs-beebd2a85eeab5b977ca2de8ad32784f9d8bdd51.tar.gz emacs-beebd2a85eeab5b977ca2de8ad32784f9d8bdd51.zip | |
Recognise ß properly as a lower-case letter (bug#11309)
ß was incorrectly treated as a caseless character and thus not matched
by the regexp [[:lower:]] (or, in case-folding mode, [[:upper:]]).
The reason is that the upcase table maps it to itself, which can be
remedied by mapping it to ẞ (U+7838) instead. Doing so does not
affect upcasing since the special-uppercase property maps it to SS.
* lisp/international/characters.el (tbl): Map ß to ẞ in the upcase
table.
* test/src/regex-emacs-tests.el (regexp-eszett): Uncomment previously
failing tests. Add checks to make sure that case transformations
remain valid.
| -rw-r--r-- | lisp/international/characters.el | 9 | ||||
| -rw-r--r-- | test/src/regex-emacs-tests.el | 15 |
2 files changed, 19 insertions, 5 deletions
diff --git a/lisp/international/characters.el b/lisp/international/characters.el index 0b6920cf180..5f610ddf670 100644 --- a/lisp/international/characters.el +++ b/lisp/international/characters.el | |||
| @@ -759,7 +759,14 @@ with L, LRE, or LRO Unicode bidi character type.") | |||
| 759 | (funcall map-unicode-property 'uppercase | 759 | (funcall map-unicode-property 'uppercase |
| 760 | (lambda (lc uc) (aset up lc uc) (aset up uc uc))) | 760 | (lambda (lc uc) (aset up lc uc) (aset up uc uc))) |
| 761 | (funcall map-unicode-property 'lowercase | 761 | (funcall map-unicode-property 'lowercase |
| 762 | (lambda (uc lc) (aset down uc lc) (aset down lc lc)))))) | 762 | (lambda (uc lc) (aset down uc lc) (aset down lc lc))) |
| 763 | |||
| 764 | ;; Override the Unicode uppercase property for ß, since we are | ||
| 765 | ;; using our case tables for determining the case of a | ||
| 766 | ;; character (see uppercasep and lowercasep in buffer.h). | ||
| 767 | ;; The special-uppercase property of ß ensures that it is | ||
| 768 | ;; still upcased to SS per the usual convention. | ||
| 769 | (aset up ?ß ?ẞ)))) | ||
| 763 | 770 | ||
| 764 | ;; Clear out the extra slots so that they will be recomputed from the main | 771 | ;; Clear out the extra slots so that they will be recomputed from the main |
| 765 | ;; (downcase) table and upcase table. Since we’re side-stepping the usual | 772 | ;; (downcase) table and upcase table. Since we’re side-stepping the usual |
diff --git a/test/src/regex-emacs-tests.el b/test/src/regex-emacs-tests.el index 576630aa5af..34d4067db47 100644 --- a/test/src/regex-emacs-tests.el +++ b/test/src/regex-emacs-tests.el | |||
| @@ -834,6 +834,13 @@ This evaluates the TESTS test cases from glibc." | |||
| 834 | 834 | ||
| 835 | (ert-deftest regexp-eszett () | 835 | (ert-deftest regexp-eszett () |
| 836 | "Test matching of ß and ẞ." | 836 | "Test matching of ß and ẞ." |
| 837 | ;; Sanity checks. | ||
| 838 | (should (equal (upcase "ß") "SS")) | ||
| 839 | (should (equal (downcase "ß") "ß")) | ||
| 840 | (should (equal (capitalize "ß") "Ss")) ; undeutsch... | ||
| 841 | (should (equal (upcase "ẞ") "ẞ")) | ||
| 842 | (should (equal (downcase "ẞ") "ß")) | ||
| 843 | (should (equal (capitalize "ẞ") "ẞ")) | ||
| 837 | ;; ß is a lower-case letter (Ll); ẞ is an upper-case letter (Lu). | 844 | ;; ß is a lower-case letter (Ll); ẞ is an upper-case letter (Lu). |
| 838 | (let ((case-fold-search nil)) | 845 | (let ((case-fold-search nil)) |
| 839 | (should (equal (string-match "ß" "ß") 0)) | 846 | (should (equal (string-match "ß" "ß") 0)) |
| @@ -842,8 +849,8 @@ This evaluates the TESTS test cases from glibc." | |||
| 842 | (should (equal (string-match "ẞ" "ẞ") 0)) | 849 | (should (equal (string-match "ẞ" "ẞ") 0)) |
| 843 | (should (equal (string-match "[[:alpha:]]" "ß") 0)) | 850 | (should (equal (string-match "[[:alpha:]]" "ß") 0)) |
| 844 | ;; bug#11309 | 851 | ;; bug#11309 |
| 845 | ;;(should (equal (string-match "[[:lower:]]" "ß") 0)) | 852 | (should (equal (string-match "[[:lower:]]" "ß") 0)) |
| 846 | ;;(should (equal (string-match "[[:upper:]]" "ß") nil)) | 853 | (should (equal (string-match "[[:upper:]]" "ß") nil)) |
| 847 | (should (equal (string-match "[[:alpha:]]" "ẞ") 0)) | 854 | (should (equal (string-match "[[:alpha:]]" "ẞ") 0)) |
| 848 | (should (equal (string-match "[[:lower:]]" "ẞ") nil)) | 855 | (should (equal (string-match "[[:lower:]]" "ẞ") nil)) |
| 849 | (should (equal (string-match "[[:upper:]]" "ẞ") 0))) | 856 | (should (equal (string-match "[[:upper:]]" "ẞ") 0))) |
| @@ -854,8 +861,8 @@ This evaluates the TESTS test cases from glibc." | |||
| 854 | (should (equal (string-match "ẞ" "ẞ") 0)) | 861 | (should (equal (string-match "ẞ" "ẞ") 0)) |
| 855 | (should (equal (string-match "[[:alpha:]]" "ß") 0)) | 862 | (should (equal (string-match "[[:alpha:]]" "ß") 0)) |
| 856 | ;; bug#11309 | 863 | ;; bug#11309 |
| 857 | ;;(should (equal (string-match "[[:lower:]]" "ß") 0)) | 864 | (should (equal (string-match "[[:lower:]]" "ß") 0)) |
| 858 | ;;(should (equal (string-match "[[:upper:]]" "ß") 0)) | 865 | (should (equal (string-match "[[:upper:]]" "ß") 0)) |
| 859 | (should (equal (string-match "[[:alpha:]]" "ẞ") 0)) | 866 | (should (equal (string-match "[[:alpha:]]" "ẞ") 0)) |
| 860 | (should (equal (string-match "[[:lower:]]" "ẞ") 0)) | 867 | (should (equal (string-match "[[:lower:]]" "ẞ") 0)) |
| 861 | (should (equal (string-match "[[:upper:]]" "ẞ") 0)))) | 868 | (should (equal (string-match "[[:upper:]]" "ẞ") 0)))) |