aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichal Nazarewicz2016-09-07 21:00:57 +0200
committerMichal Nazarewicz2017-02-15 16:54:07 +0100
commit6220faeb4e9be16b9dec728e72ea8dff2cfe35ba (patch)
treed329bc3c65eb858ea8f03a2705ea5de696abac05
parent5ec3a58462e99533ea5200de356302181d634d0b (diff)
downloademacs-6220faeb4e9be16b9dec728e72ea8dff2cfe35ba.tar.gz
emacs-6220faeb4e9be16b9dec728e72ea8dff2cfe35ba.zip
casing: don’t assume letters are *either* upper- or lower-case (bug#24603)
A compatibility digraph characters, such as Dž, are neither upper- nor lower-case. At the moment however, those are reported as upper-case¹ despite the fact that they change when upper-cased. Stop checking if a character is upper-case before trying to up-case it so that title-case characters are handled correctly. This fixes one of the issues mentioned in bug#24603. ¹ Because they change when converted to lower-case. Notice an asymmetry in that for a character to be considered lower-case it must not be upper-case (plus the usual condition of changing when upper-cased). * src/buffer.h (upcase1): Delete. (upcase): Change to upcase character unconditionally just like downcase does it. This is what upcase1 was. * src/casefiddle.c (casify_object, casify_region): Use upcase instead of upcase1 and don’t check !uppercasep(x) before calling upcase. * src/keyboard.c (read_key_sequence): Don’t check if uppercase(x), just downcase(x) and see if it changed. * test/src/casefiddle-tests.el (casefiddle-tests--characters, casefiddle-tests-casing): Update test cases which are now passing.
-rw-r--r--etc/NEWS8
-rw-r--r--src/buffer.h18
-rw-r--r--src/casefiddle.c20
-rw-r--r--src/keyboard.c25
-rw-r--r--test/src/casefiddle-tests.el8
5 files changed, 42 insertions, 37 deletions
diff --git a/etc/NEWS b/etc/NEWS
index 421e5daa3e4..a54c655c36d 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -338,6 +338,12 @@ same as in modes where the character is not whitespace.
338Instead of only checking the modification time, Emacs now also checks 338Instead of only checking the modification time, Emacs now also checks
339the file's actual content before prompting the user. 339the file's actual content before prompting the user.
340 340
341** Title case characters are properly converted to upper case.
342'upcase', 'upcase-region' et al. convert title case characters (such
343as Dz) into their upper case form (such as DZ). As a downside,
344'capitalize' and 'upcase-initials' produce awkward words where first
345two letters are upper case, e.g. DŽungla (instead of Džungla).
346
341 347
342* Changes in Specialized Modes and Packages in Emacs 26.1 348* Changes in Specialized Modes and Packages in Emacs 26.1
343 349
@@ -1028,7 +1034,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
1028 1034
1029 1035
1030Local variables: 1036Local variables:
1031coding: us-ascii 1037coding: utf-8
1032mode: outline 1038mode: outline
1033paragraph-separate: "[ ]*$" 1039paragraph-separate: "[ ]*$"
1034end: 1040end:
diff --git a/src/buffer.h b/src/buffer.h
index 4a23e4fdd2e..f53212e3120 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -1365,28 +1365,28 @@ downcase (int c)
1365 return NATNUMP (down) ? XFASTINT (down) : c; 1365 return NATNUMP (down) ? XFASTINT (down) : c;
1366} 1366}
1367 1367
1368/* True if C is upper case. */ 1368/* Upcase a character C, or make no change if that cannot be done. */
1369INLINE bool uppercasep (int c) { return downcase (c) != c; }
1370
1371/* Upcase a character C known to be not upper case. */
1372INLINE int 1369INLINE int
1373upcase1 (int c) 1370upcase (int c)
1374{ 1371{
1375 Lisp_Object upcase_table = BVAR (current_buffer, upcase_table); 1372 Lisp_Object upcase_table = BVAR (current_buffer, upcase_table);
1376 Lisp_Object up = CHAR_TABLE_REF (upcase_table, c); 1373 Lisp_Object up = CHAR_TABLE_REF (upcase_table, c);
1377 return NATNUMP (up) ? XFASTINT (up) : c; 1374 return NATNUMP (up) ? XFASTINT (up) : c;
1378} 1375}
1379 1376
1377/* True if C is upper case. */
1378INLINE bool uppercasep (int c)
1379{
1380 return downcase (c) != c;
1381}
1382
1380/* True if C is lower case. */ 1383/* True if C is lower case. */
1381INLINE bool 1384INLINE bool
1382lowercasep (int c) 1385lowercasep (int c)
1383{ 1386{
1384 return !uppercasep (c) && upcase1 (c) != c; 1387 return !uppercasep (c) && upcase (c) != c;
1385} 1388}
1386 1389
1387/* Upcase a character C, or make no change if that cannot be done. */
1388INLINE int upcase (int c) { return uppercasep (c) ? c : upcase1 (c); }
1389
1390INLINE_HEADER_END 1390INLINE_HEADER_END
1391 1391
1392#endif /* EMACS_BUFFER_H */ 1392#endif /* EMACS_BUFFER_H */
diff --git a/src/casefiddle.c b/src/casefiddle.c
index 28ffcb298ff..b2b87e7a858 100644
--- a/src/casefiddle.c
+++ b/src/casefiddle.c
@@ -64,13 +64,9 @@ casify_object (enum case_action flag, Lisp_Object obj)
64 multibyte = 1; 64 multibyte = 1;
65 if (! multibyte) 65 if (! multibyte)
66 MAKE_CHAR_MULTIBYTE (c1); 66 MAKE_CHAR_MULTIBYTE (c1);
67 c = downcase (c1); 67 c = flag == CASE_DOWN ? downcase (c1) : upcase (c1);
68 if (inword) 68 if (c != c1)
69 XSETFASTINT (obj, c | flags);
70 else if (c == (XFASTINT (obj) & ~flagbits))
71 { 69 {
72 if (! inword)
73 c = upcase1 (c1);
74 if (! multibyte) 70 if (! multibyte)
75 MAKE_CHAR_UNIBYTE (c); 71 MAKE_CHAR_UNIBYTE (c);
76 XSETFASTINT (obj, c | flags); 72 XSETFASTINT (obj, c | flags);
@@ -95,7 +91,7 @@ casify_object (enum case_action flag, Lisp_Object obj)
95 c = downcase (c); 91 c = downcase (c);
96 else if (!uppercasep (c) 92 else if (!uppercasep (c)
97 && (!inword || flag != CASE_CAPITALIZE_UP)) 93 && (!inword || flag != CASE_CAPITALIZE_UP))
98 c = upcase1 (c1); 94 c = upcase (c1);
99 if ((int) flag >= (int) CASE_CAPITALIZE) 95 if ((int) flag >= (int) CASE_CAPITALIZE)
100 inword = (SYNTAX (c) == Sword); 96 inword = (SYNTAX (c) == Sword);
101 if (c != c1) 97 if (c != c1)
@@ -127,9 +123,8 @@ casify_object (enum case_action flag, Lisp_Object obj)
127 c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, len); 123 c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, len);
128 if (inword && flag != CASE_CAPITALIZE_UP) 124 if (inword && flag != CASE_CAPITALIZE_UP)
129 c = downcase (c); 125 c = downcase (c);
130 else if (!uppercasep (c) 126 else if (!inword || flag != CASE_CAPITALIZE_UP)
131 && (!inword || flag != CASE_CAPITALIZE_UP)) 127 c = upcase (c);
132 c = upcase1 (c);
133 if ((int) flag >= (int) CASE_CAPITALIZE) 128 if ((int) flag >= (int) CASE_CAPITALIZE)
134 inword = (SYNTAX (c) == Sword); 129 inword = (SYNTAX (c) == Sword);
135 o += CHAR_STRING (c, o); 130 o += CHAR_STRING (c, o);
@@ -236,9 +231,8 @@ casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
236 c2 = c; 231 c2 = c;
237 if (inword && flag != CASE_CAPITALIZE_UP) 232 if (inword && flag != CASE_CAPITALIZE_UP)
238 c = downcase (c); 233 c = downcase (c);
239 else if (!uppercasep (c) 234 else if (!inword || flag != CASE_CAPITALIZE_UP)
240 && (!inword || flag != CASE_CAPITALIZE_UP)) 235 c = upcase (c);
241 c = upcase1 (c);
242 if ((int) flag >= (int) CASE_CAPITALIZE) 236 if ((int) flag >= (int) CASE_CAPITALIZE)
243 inword = ((SYNTAX (c) == Sword) 237 inword = ((SYNTAX (c) == Sword)
244 && (inword || !syntax_prefix_flag_p (c))); 238 && (inword || !syntax_prefix_flag_p (c)));
diff --git a/src/keyboard.c b/src/keyboard.c
index ed8e71fd0a7..0fad633581d 100644
--- a/src/keyboard.c
+++ b/src/keyboard.c
@@ -9642,22 +9642,26 @@ read_key_sequence (Lisp_Object *keybuf, int bufsize, Lisp_Object prompt,
9642 use the corresponding lower-case letter instead. */ 9642 use the corresponding lower-case letter instead. */
9643 if (NILP (current_binding) 9643 if (NILP (current_binding)
9644 && /* indec.start >= t && fkey.start >= t && */ keytran.start >= t 9644 && /* indec.start >= t && fkey.start >= t && */ keytran.start >= t
9645 && INTEGERP (key) 9645 && INTEGERP (key))
9646 && ((CHARACTERP (make_number (XINT (key) & ~CHAR_MODIFIER_MASK))
9647 && uppercasep (XINT (key) & ~CHAR_MODIFIER_MASK))
9648 || (XINT (key) & shift_modifier)))
9649 { 9646 {
9650 Lisp_Object new_key; 9647 Lisp_Object new_key;
9648 int k = XINT (key);
9649
9650 if (k & shift_modifier)
9651 XSETINT (new_key, k & ~shift_modifier);
9652 else if (CHARACTERP (make_number (k & ~CHAR_MODIFIER_MASK)))
9653 {
9654 int dc = downcase(k & ~CHAR_MODIFIER_MASK);
9655 if (dc == (k & ~CHAR_MODIFIER_MASK))
9656 goto not_upcase;
9657 XSETINT (new_key, dc | (k & CHAR_MODIFIER_MASK));
9658 }
9659 else
9660 goto not_upcase;
9651 9661
9652 original_uppercase = key; 9662 original_uppercase = key;
9653 original_uppercase_position = t - 1; 9663 original_uppercase_position = t - 1;
9654 9664
9655 if (XINT (key) & shift_modifier)
9656 XSETINT (new_key, XINT (key) & ~shift_modifier);
9657 else
9658 XSETINT (new_key, (downcase (XINT (key) & ~CHAR_MODIFIER_MASK)
9659 | (XINT (key) & CHAR_MODIFIER_MASK)));
9660
9661 /* We have to do this unconditionally, regardless of whether 9665 /* We have to do this unconditionally, regardless of whether
9662 the lower-case char is defined in the keymaps, because they 9666 the lower-case char is defined in the keymaps, because they
9663 might get translated through function-key-map. */ 9667 might get translated through function-key-map. */
@@ -9668,6 +9672,7 @@ read_key_sequence (Lisp_Object *keybuf, int bufsize, Lisp_Object prompt,
9668 goto replay_sequence; 9672 goto replay_sequence;
9669 } 9673 }
9670 9674
9675 not_upcase:
9671 if (NILP (current_binding) 9676 if (NILP (current_binding)
9672 && help_char_p (EVENT_HEAD (key)) && t > 1) 9677 && help_char_p (EVENT_HEAD (key)) && t > 1)
9673 { 9678 {
diff --git a/test/src/casefiddle-tests.el b/test/src/casefiddle-tests.el
index c752bb09172..152d85de006 100644
--- a/test/src/casefiddle-tests.el
+++ b/test/src/casefiddle-tests.el
@@ -63,13 +63,13 @@
63 (?Ł ?Ł ?ł ?Ł) 63 (?Ł ?Ł ?ł ?Ł)
64 (?ł ?Ł ?ł ?Ł) 64 (?ł ?Ł ?ł ?Ł)
65 65
66 ;; FIXME(bug#24603): We should have: 66 ;; FIXME(bug#24603): Commented ones are what we want.
67 ;;(?DŽ ?DŽ ?dž ?Dž) 67 ;;(?DŽ ?DŽ ?dž ?Dž)
68 ;; but instead we have:
69 (?DŽ ?DŽ ?dž ?DŽ) 68 (?DŽ ?DŽ ?dž ?DŽ)
70 ;; FIXME(bug#24603): Those two are broken at the moment:
71 ;;(?Dž ?DŽ ?dž ?Dž) 69 ;;(?Dž ?DŽ ?dž ?Dž)
70 (?Dž ?DŽ ?dž ?DŽ)
72 ;;(?dž ?DŽ ?dž ?Dž) 71 ;;(?dž ?DŽ ?dž ?Dž)
72 (?dž ?DŽ ?dž ?DŽ)
73 73
74 (?Σ ?Σ ?σ ?Σ) 74 (?Σ ?Σ ?σ ?Σ)
75 (?σ ?Σ ?σ ?Σ) 75 (?σ ?Σ ?σ ?Σ)
@@ -197,7 +197,7 @@
197 ;;("ΌΣΟΣ" "ΌΣΟΣ" "όσος" "Όσος" "Όσος") 197 ;;("ΌΣΟΣ" "ΌΣΟΣ" "όσος" "Όσος" "Όσος")
198 ;; And here’s what is actually happening: 198 ;; And here’s what is actually happening:
199 ("DŽUNGLA" "DŽUNGLA" "džungla" "DŽungla" "DŽUNGLA") 199 ("DŽUNGLA" "DŽUNGLA" "džungla" "DŽungla" "DŽUNGLA")
200 ("Džungla" "UNGLA" "džungla" "ungla" "ungla") 200 ("Džungla" "UNGLA" "džungla" "ungla" "ungla")
201 ("džungla" "DŽUNGLA" "džungla" "DŽungla" "DŽungla") 201 ("džungla" "DŽUNGLA" "džungla" "DŽungla" "DŽungla")
202 ("define" "DEfiNE" "define" "Define" "Define") 202 ("define" "DEfiNE" "define" "Define" "Define")
203 ("fish" "fiSH" "fish" "fish" "fish") 203 ("fish" "fiSH" "fish" "fish" "fish")