aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMattias Engdegård2020-12-09 13:27:16 +0100
committerMattias Engdegård2020-12-09 15:29:48 +0100
commitbeebd2a85eeab5b977ca2de8ad32784f9d8bdd51 (patch)
tree2abf190d7e51d435c05c771b67316e4e8f86c3af
parent445ab5cce95aee4cd5fee8ef67c2ee24c1c8850a (diff)
downloademacs-beebd2a85eeab5b977ca2de8ad32784f9d8bdd51.tar.gz
emacs-beebd2a85eeab5b977ca2de8ad32784f9d8bdd51.zip
Recognise ß properly as a lower-case letter (bug#11309)
ß was incorrectly treated as a caseless character and thus not matched by the regexp [[:lower:]] (or, in case-folding mode, [[:upper:]]). The reason is that the upcase table maps it to itself, which can be remedied by mapping it to ẞ (U+7838) instead. Doing so does not affect upcasing since the special-uppercase property maps it to SS. * lisp/international/characters.el (tbl): Map ß to ẞ in the upcase table. * test/src/regex-emacs-tests.el (regexp-eszett): Uncomment previously failing tests. Add checks to make sure that case transformations remain valid.
-rw-r--r--lisp/international/characters.el9
-rw-r--r--test/src/regex-emacs-tests.el15
2 files changed, 19 insertions, 5 deletions
diff --git a/lisp/international/characters.el b/lisp/international/characters.el
index 0b6920cf180..5f610ddf670 100644
--- a/lisp/international/characters.el
+++ b/lisp/international/characters.el
@@ -759,7 +759,14 @@ with L, LRE, or LRO Unicode bidi character type.")
759 (funcall map-unicode-property 'uppercase 759 (funcall map-unicode-property 'uppercase
760 (lambda (lc uc) (aset up lc uc) (aset up uc uc))) 760 (lambda (lc uc) (aset up lc uc) (aset up uc uc)))
761 (funcall map-unicode-property 'lowercase 761 (funcall map-unicode-property 'lowercase
762 (lambda (uc lc) (aset down uc lc) (aset down lc lc)))))) 762 (lambda (uc lc) (aset down uc lc) (aset down lc lc)))
763
764 ;; Override the Unicode uppercase property for ß, since we are
765 ;; using our case tables for determining the case of a
766 ;; character (see uppercasep and lowercasep in buffer.h).
767 ;; The special-uppercase property of ß ensures that it is
768 ;; still upcased to SS per the usual convention.
769 (aset up ?ß ?ẞ))))
763 770
764 ;; Clear out the extra slots so that they will be recomputed from the main 771 ;; Clear out the extra slots so that they will be recomputed from the main
765 ;; (downcase) table and upcase table. Since we’re side-stepping the usual 772 ;; (downcase) table and upcase table. Since we’re side-stepping the usual
diff --git a/test/src/regex-emacs-tests.el b/test/src/regex-emacs-tests.el
index 576630aa5af..34d4067db47 100644
--- a/test/src/regex-emacs-tests.el
+++ b/test/src/regex-emacs-tests.el
@@ -834,6 +834,13 @@ This evaluates the TESTS test cases from glibc."
834 834
835(ert-deftest regexp-eszett () 835(ert-deftest regexp-eszett ()
836 "Test matching of ß and ẞ." 836 "Test matching of ß and ẞ."
837 ;; Sanity checks.
838 (should (equal (upcase "ß") "SS"))
839 (should (equal (downcase "ß") "ß"))
840 (should (equal (capitalize "ß") "Ss")) ; undeutsch...
841 (should (equal (upcase "ẞ") "ẞ"))
842 (should (equal (downcase "ẞ") "ß"))
843 (should (equal (capitalize "ẞ") "ẞ"))
837 ;; ß is a lower-case letter (Ll); ẞ is an upper-case letter (Lu). 844 ;; ß is a lower-case letter (Ll); ẞ is an upper-case letter (Lu).
838 (let ((case-fold-search nil)) 845 (let ((case-fold-search nil))
839 (should (equal (string-match "ß" "ß") 0)) 846 (should (equal (string-match "ß" "ß") 0))
@@ -842,8 +849,8 @@ This evaluates the TESTS test cases from glibc."
842 (should (equal (string-match "ẞ" "ẞ") 0)) 849 (should (equal (string-match "ẞ" "ẞ") 0))
843 (should (equal (string-match "[[:alpha:]]" "ß") 0)) 850 (should (equal (string-match "[[:alpha:]]" "ß") 0))
844 ;; bug#11309 851 ;; bug#11309
845 ;;(should (equal (string-match "[[:lower:]]" "ß") 0)) 852 (should (equal (string-match "[[:lower:]]" "ß") 0))
846 ;;(should (equal (string-match "[[:upper:]]" "ß") nil)) 853 (should (equal (string-match "[[:upper:]]" "ß") nil))
847 (should (equal (string-match "[[:alpha:]]" "ẞ") 0)) 854 (should (equal (string-match "[[:alpha:]]" "ẞ") 0))
848 (should (equal (string-match "[[:lower:]]" "ẞ") nil)) 855 (should (equal (string-match "[[:lower:]]" "ẞ") nil))
849 (should (equal (string-match "[[:upper:]]" "ẞ") 0))) 856 (should (equal (string-match "[[:upper:]]" "ẞ") 0)))
@@ -854,8 +861,8 @@ This evaluates the TESTS test cases from glibc."
854 (should (equal (string-match "ẞ" "ẞ") 0)) 861 (should (equal (string-match "ẞ" "ẞ") 0))
855 (should (equal (string-match "[[:alpha:]]" "ß") 0)) 862 (should (equal (string-match "[[:alpha:]]" "ß") 0))
856 ;; bug#11309 863 ;; bug#11309
857 ;;(should (equal (string-match "[[:lower:]]" "ß") 0)) 864 (should (equal (string-match "[[:lower:]]" "ß") 0))
858 ;;(should (equal (string-match "[[:upper:]]" "ß") 0)) 865 (should (equal (string-match "[[:upper:]]" "ß") 0))
859 (should (equal (string-match "[[:alpha:]]" "ẞ") 0)) 866 (should (equal (string-match "[[:alpha:]]" "ẞ") 0))
860 (should (equal (string-match "[[:lower:]]" "ẞ") 0)) 867 (should (equal (string-match "[[:lower:]]" "ẞ") 0))
861 (should (equal (string-match "[[:upper:]]" "ẞ") 0)))) 868 (should (equal (string-match "[[:upper:]]" "ẞ") 0))))