diff options
| author | Michal Nazarewicz | 2016-09-07 21:00:57 +0200 |
|---|---|---|
| committer | Michal Nazarewicz | 2017-02-15 16:54:07 +0100 |
| commit | 6220faeb4e9be16b9dec728e72ea8dff2cfe35ba (patch) | |
| tree | d329bc3c65eb858ea8f03a2705ea5de696abac05 | |
| parent | 5ec3a58462e99533ea5200de356302181d634d0b (diff) | |
| download | emacs-6220faeb4e9be16b9dec728e72ea8dff2cfe35ba.tar.gz emacs-6220faeb4e9be16b9dec728e72ea8dff2cfe35ba.zip | |
casing: don’t assume letters are *either* upper- or lower-case (bug#24603)
A compatibility digraph characters, such as Dž, are neither upper- nor
lower-case. At the moment however, those are reported as upper-case¹
despite the fact that they change when upper-cased.
Stop checking if a character is upper-case before trying to up-case it
so that title-case characters are handled correctly. This fixes one of
the issues mentioned in bug#24603.
¹ Because they change when converted to lower-case. Notice an asymmetry
in that for a character to be considered lower-case it must not be
upper-case (plus the usual condition of changing when upper-cased).
* src/buffer.h (upcase1): Delete.
(upcase): Change to upcase character unconditionally just like downcase
does it. This is what upcase1 was.
* src/casefiddle.c (casify_object, casify_region): Use upcase instead
of upcase1 and don’t check !uppercasep(x) before calling upcase.
* src/keyboard.c (read_key_sequence): Don’t check if uppercase(x), just
downcase(x) and see if it changed.
* test/src/casefiddle-tests.el (casefiddle-tests--characters,
casefiddle-tests-casing): Update test cases which are now passing.
| -rw-r--r-- | etc/NEWS | 8 | ||||
| -rw-r--r-- | src/buffer.h | 18 | ||||
| -rw-r--r-- | src/casefiddle.c | 20 | ||||
| -rw-r--r-- | src/keyboard.c | 25 | ||||
| -rw-r--r-- | test/src/casefiddle-tests.el | 8 |
5 files changed, 42 insertions, 37 deletions
| @@ -338,6 +338,12 @@ same as in modes where the character is not whitespace. | |||
| 338 | Instead of only checking the modification time, Emacs now also checks | 338 | Instead of only checking the modification time, Emacs now also checks |
| 339 | the file's actual content before prompting the user. | 339 | the file's actual content before prompting the user. |
| 340 | 340 | ||
| 341 | ** Title case characters are properly converted to upper case. | ||
| 342 | 'upcase', 'upcase-region' et al. convert title case characters (such | ||
| 343 | as Dz) into their upper case form (such as DZ). As a downside, | ||
| 344 | 'capitalize' and 'upcase-initials' produce awkward words where first | ||
| 345 | two letters are upper case, e.g. DŽungla (instead of Džungla). | ||
| 346 | |||
| 341 | 347 | ||
| 342 | * Changes in Specialized Modes and Packages in Emacs 26.1 | 348 | * Changes in Specialized Modes and Packages in Emacs 26.1 |
| 343 | 349 | ||
| @@ -1028,7 +1034,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. | |||
| 1028 | 1034 | ||
| 1029 | 1035 | ||
| 1030 | Local variables: | 1036 | Local variables: |
| 1031 | coding: us-ascii | 1037 | coding: utf-8 |
| 1032 | mode: outline | 1038 | mode: outline |
| 1033 | paragraph-separate: "[ ]*$" | 1039 | paragraph-separate: "[ ]*$" |
| 1034 | end: | 1040 | end: |
diff --git a/src/buffer.h b/src/buffer.h index 4a23e4fdd2e..f53212e3120 100644 --- a/src/buffer.h +++ b/src/buffer.h | |||
| @@ -1365,28 +1365,28 @@ downcase (int c) | |||
| 1365 | return NATNUMP (down) ? XFASTINT (down) : c; | 1365 | return NATNUMP (down) ? XFASTINT (down) : c; |
| 1366 | } | 1366 | } |
| 1367 | 1367 | ||
| 1368 | /* True if C is upper case. */ | 1368 | /* Upcase a character C, or make no change if that cannot be done. */ |
| 1369 | INLINE bool uppercasep (int c) { return downcase (c) != c; } | ||
| 1370 | |||
| 1371 | /* Upcase a character C known to be not upper case. */ | ||
| 1372 | INLINE int | 1369 | INLINE int |
| 1373 | upcase1 (int c) | 1370 | upcase (int c) |
| 1374 | { | 1371 | { |
| 1375 | Lisp_Object upcase_table = BVAR (current_buffer, upcase_table); | 1372 | Lisp_Object upcase_table = BVAR (current_buffer, upcase_table); |
| 1376 | Lisp_Object up = CHAR_TABLE_REF (upcase_table, c); | 1373 | Lisp_Object up = CHAR_TABLE_REF (upcase_table, c); |
| 1377 | return NATNUMP (up) ? XFASTINT (up) : c; | 1374 | return NATNUMP (up) ? XFASTINT (up) : c; |
| 1378 | } | 1375 | } |
| 1379 | 1376 | ||
| 1377 | /* True if C is upper case. */ | ||
| 1378 | INLINE bool uppercasep (int c) | ||
| 1379 | { | ||
| 1380 | return downcase (c) != c; | ||
| 1381 | } | ||
| 1382 | |||
| 1380 | /* True if C is lower case. */ | 1383 | /* True if C is lower case. */ |
| 1381 | INLINE bool | 1384 | INLINE bool |
| 1382 | lowercasep (int c) | 1385 | lowercasep (int c) |
| 1383 | { | 1386 | { |
| 1384 | return !uppercasep (c) && upcase1 (c) != c; | 1387 | return !uppercasep (c) && upcase (c) != c; |
| 1385 | } | 1388 | } |
| 1386 | 1389 | ||
| 1387 | /* Upcase a character C, or make no change if that cannot be done. */ | ||
| 1388 | INLINE int upcase (int c) { return uppercasep (c) ? c : upcase1 (c); } | ||
| 1389 | |||
| 1390 | INLINE_HEADER_END | 1390 | INLINE_HEADER_END |
| 1391 | 1391 | ||
| 1392 | #endif /* EMACS_BUFFER_H */ | 1392 | #endif /* EMACS_BUFFER_H */ |
diff --git a/src/casefiddle.c b/src/casefiddle.c index 28ffcb298ff..b2b87e7a858 100644 --- a/src/casefiddle.c +++ b/src/casefiddle.c | |||
| @@ -64,13 +64,9 @@ casify_object (enum case_action flag, Lisp_Object obj) | |||
| 64 | multibyte = 1; | 64 | multibyte = 1; |
| 65 | if (! multibyte) | 65 | if (! multibyte) |
| 66 | MAKE_CHAR_MULTIBYTE (c1); | 66 | MAKE_CHAR_MULTIBYTE (c1); |
| 67 | c = downcase (c1); | 67 | c = flag == CASE_DOWN ? downcase (c1) : upcase (c1); |
| 68 | if (inword) | 68 | if (c != c1) |
| 69 | XSETFASTINT (obj, c | flags); | ||
| 70 | else if (c == (XFASTINT (obj) & ~flagbits)) | ||
| 71 | { | 69 | { |
| 72 | if (! inword) | ||
| 73 | c = upcase1 (c1); | ||
| 74 | if (! multibyte) | 70 | if (! multibyte) |
| 75 | MAKE_CHAR_UNIBYTE (c); | 71 | MAKE_CHAR_UNIBYTE (c); |
| 76 | XSETFASTINT (obj, c | flags); | 72 | XSETFASTINT (obj, c | flags); |
| @@ -95,7 +91,7 @@ casify_object (enum case_action flag, Lisp_Object obj) | |||
| 95 | c = downcase (c); | 91 | c = downcase (c); |
| 96 | else if (!uppercasep (c) | 92 | else if (!uppercasep (c) |
| 97 | && (!inword || flag != CASE_CAPITALIZE_UP)) | 93 | && (!inword || flag != CASE_CAPITALIZE_UP)) |
| 98 | c = upcase1 (c1); | 94 | c = upcase (c1); |
| 99 | if ((int) flag >= (int) CASE_CAPITALIZE) | 95 | if ((int) flag >= (int) CASE_CAPITALIZE) |
| 100 | inword = (SYNTAX (c) == Sword); | 96 | inword = (SYNTAX (c) == Sword); |
| 101 | if (c != c1) | 97 | if (c != c1) |
| @@ -127,9 +123,8 @@ casify_object (enum case_action flag, Lisp_Object obj) | |||
| 127 | c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, len); | 123 | c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, len); |
| 128 | if (inword && flag != CASE_CAPITALIZE_UP) | 124 | if (inword && flag != CASE_CAPITALIZE_UP) |
| 129 | c = downcase (c); | 125 | c = downcase (c); |
| 130 | else if (!uppercasep (c) | 126 | else if (!inword || flag != CASE_CAPITALIZE_UP) |
| 131 | && (!inword || flag != CASE_CAPITALIZE_UP)) | 127 | c = upcase (c); |
| 132 | c = upcase1 (c); | ||
| 133 | if ((int) flag >= (int) CASE_CAPITALIZE) | 128 | if ((int) flag >= (int) CASE_CAPITALIZE) |
| 134 | inword = (SYNTAX (c) == Sword); | 129 | inword = (SYNTAX (c) == Sword); |
| 135 | o += CHAR_STRING (c, o); | 130 | o += CHAR_STRING (c, o); |
| @@ -236,9 +231,8 @@ casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e) | |||
| 236 | c2 = c; | 231 | c2 = c; |
| 237 | if (inword && flag != CASE_CAPITALIZE_UP) | 232 | if (inword && flag != CASE_CAPITALIZE_UP) |
| 238 | c = downcase (c); | 233 | c = downcase (c); |
| 239 | else if (!uppercasep (c) | 234 | else if (!inword || flag != CASE_CAPITALIZE_UP) |
| 240 | && (!inword || flag != CASE_CAPITALIZE_UP)) | 235 | c = upcase (c); |
| 241 | c = upcase1 (c); | ||
| 242 | if ((int) flag >= (int) CASE_CAPITALIZE) | 236 | if ((int) flag >= (int) CASE_CAPITALIZE) |
| 243 | inword = ((SYNTAX (c) == Sword) | 237 | inword = ((SYNTAX (c) == Sword) |
| 244 | && (inword || !syntax_prefix_flag_p (c))); | 238 | && (inword || !syntax_prefix_flag_p (c))); |
diff --git a/src/keyboard.c b/src/keyboard.c index ed8e71fd0a7..0fad633581d 100644 --- a/src/keyboard.c +++ b/src/keyboard.c | |||
| @@ -9642,22 +9642,26 @@ read_key_sequence (Lisp_Object *keybuf, int bufsize, Lisp_Object prompt, | |||
| 9642 | use the corresponding lower-case letter instead. */ | 9642 | use the corresponding lower-case letter instead. */ |
| 9643 | if (NILP (current_binding) | 9643 | if (NILP (current_binding) |
| 9644 | && /* indec.start >= t && fkey.start >= t && */ keytran.start >= t | 9644 | && /* indec.start >= t && fkey.start >= t && */ keytran.start >= t |
| 9645 | && INTEGERP (key) | 9645 | && INTEGERP (key)) |
| 9646 | && ((CHARACTERP (make_number (XINT (key) & ~CHAR_MODIFIER_MASK)) | ||
| 9647 | && uppercasep (XINT (key) & ~CHAR_MODIFIER_MASK)) | ||
| 9648 | || (XINT (key) & shift_modifier))) | ||
| 9649 | { | 9646 | { |
| 9650 | Lisp_Object new_key; | 9647 | Lisp_Object new_key; |
| 9648 | int k = XINT (key); | ||
| 9649 | |||
| 9650 | if (k & shift_modifier) | ||
| 9651 | XSETINT (new_key, k & ~shift_modifier); | ||
| 9652 | else if (CHARACTERP (make_number (k & ~CHAR_MODIFIER_MASK))) | ||
| 9653 | { | ||
| 9654 | int dc = downcase(k & ~CHAR_MODIFIER_MASK); | ||
| 9655 | if (dc == (k & ~CHAR_MODIFIER_MASK)) | ||
| 9656 | goto not_upcase; | ||
| 9657 | XSETINT (new_key, dc | (k & CHAR_MODIFIER_MASK)); | ||
| 9658 | } | ||
| 9659 | else | ||
| 9660 | goto not_upcase; | ||
| 9651 | 9661 | ||
| 9652 | original_uppercase = key; | 9662 | original_uppercase = key; |
| 9653 | original_uppercase_position = t - 1; | 9663 | original_uppercase_position = t - 1; |
| 9654 | 9664 | ||
| 9655 | if (XINT (key) & shift_modifier) | ||
| 9656 | XSETINT (new_key, XINT (key) & ~shift_modifier); | ||
| 9657 | else | ||
| 9658 | XSETINT (new_key, (downcase (XINT (key) & ~CHAR_MODIFIER_MASK) | ||
| 9659 | | (XINT (key) & CHAR_MODIFIER_MASK))); | ||
| 9660 | |||
| 9661 | /* We have to do this unconditionally, regardless of whether | 9665 | /* We have to do this unconditionally, regardless of whether |
| 9662 | the lower-case char is defined in the keymaps, because they | 9666 | the lower-case char is defined in the keymaps, because they |
| 9663 | might get translated through function-key-map. */ | 9667 | might get translated through function-key-map. */ |
| @@ -9668,6 +9672,7 @@ read_key_sequence (Lisp_Object *keybuf, int bufsize, Lisp_Object prompt, | |||
| 9668 | goto replay_sequence; | 9672 | goto replay_sequence; |
| 9669 | } | 9673 | } |
| 9670 | 9674 | ||
| 9675 | not_upcase: | ||
| 9671 | if (NILP (current_binding) | 9676 | if (NILP (current_binding) |
| 9672 | && help_char_p (EVENT_HEAD (key)) && t > 1) | 9677 | && help_char_p (EVENT_HEAD (key)) && t > 1) |
| 9673 | { | 9678 | { |
diff --git a/test/src/casefiddle-tests.el b/test/src/casefiddle-tests.el index c752bb09172..152d85de006 100644 --- a/test/src/casefiddle-tests.el +++ b/test/src/casefiddle-tests.el | |||
| @@ -63,13 +63,13 @@ | |||
| 63 | (?Ł ?Ł ?ł ?Ł) | 63 | (?Ł ?Ł ?ł ?Ł) |
| 64 | (?ł ?Ł ?ł ?Ł) | 64 | (?ł ?Ł ?ł ?Ł) |
| 65 | 65 | ||
| 66 | ;; FIXME(bug#24603): We should have: | 66 | ;; FIXME(bug#24603): Commented ones are what we want. |
| 67 | ;;(?DŽ ?DŽ ?dž ?Dž) | 67 | ;;(?DŽ ?DŽ ?dž ?Dž) |
| 68 | ;; but instead we have: | ||
| 69 | (?DŽ ?DŽ ?dž ?DŽ) | 68 | (?DŽ ?DŽ ?dž ?DŽ) |
| 70 | ;; FIXME(bug#24603): Those two are broken at the moment: | ||
| 71 | ;;(?Dž ?DŽ ?dž ?Dž) | 69 | ;;(?Dž ?DŽ ?dž ?Dž) |
| 70 | (?Dž ?DŽ ?dž ?DŽ) | ||
| 72 | ;;(?dž ?DŽ ?dž ?Dž) | 71 | ;;(?dž ?DŽ ?dž ?Dž) |
| 72 | (?dž ?DŽ ?dž ?DŽ) | ||
| 73 | 73 | ||
| 74 | (?Σ ?Σ ?σ ?Σ) | 74 | (?Σ ?Σ ?σ ?Σ) |
| 75 | (?σ ?Σ ?σ ?Σ) | 75 | (?σ ?Σ ?σ ?Σ) |
| @@ -197,7 +197,7 @@ | |||
| 197 | ;;("ΌΣΟΣ" "ΌΣΟΣ" "όσος" "Όσος" "Όσος") | 197 | ;;("ΌΣΟΣ" "ΌΣΟΣ" "όσος" "Όσος" "Όσος") |
| 198 | ;; And here’s what is actually happening: | 198 | ;; And here’s what is actually happening: |
| 199 | ("DŽUNGLA" "DŽUNGLA" "džungla" "DŽungla" "DŽUNGLA") | 199 | ("DŽUNGLA" "DŽUNGLA" "džungla" "DŽungla" "DŽUNGLA") |
| 200 | ("Džungla" "UNGLA" "džungla" "ungla" "ungla") | 200 | ("Džungla" "UNGLA" "džungla" "ungla" "ungla") |
| 201 | ("džungla" "DŽUNGLA" "džungla" "DŽungla" "DŽungla") | 201 | ("džungla" "DŽUNGLA" "džungla" "DŽungla" "DŽungla") |
| 202 | ("define" "DEfiNE" "define" "Define" "Define") | 202 | ("define" "DEfiNE" "define" "Define" "Define") |
| 203 | ("fish" "fiSH" "fish" "fish" "fish") | 203 | ("fish" "fiSH" "fish" "fish" "fish") |