diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/casefiddle.c | 196 |
1 files changed, 104 insertions, 92 deletions
diff --git a/src/casefiddle.c b/src/casefiddle.c index 11d59444916..72661979b4d 100644 --- a/src/casefiddle.c +++ b/src/casefiddle.c | |||
| @@ -32,108 +32,120 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ | |||
| 32 | enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP}; | 32 | enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP}; |
| 33 | 33 | ||
| 34 | static Lisp_Object | 34 | static Lisp_Object |
| 35 | casify_object (enum case_action flag, Lisp_Object obj) | 35 | do_casify_natnum (enum case_action flag, Lisp_Object obj) |
| 36 | { | ||
| 37 | int flagbits = (CHAR_ALT | CHAR_SUPER | CHAR_HYPER | ||
| 38 | | CHAR_SHIFT | CHAR_CTL | CHAR_META); | ||
| 39 | int flags, ch = XFASTINT (obj), cased; | ||
| 40 | bool multibyte; | ||
| 41 | |||
| 42 | /* If the character has higher bits set above the flags, return it unchanged. | ||
| 43 | It is not a real character. */ | ||
| 44 | if (UNSIGNED_CMP (ch, >, flagbits)) | ||
| 45 | return obj; | ||
| 46 | |||
| 47 | flags = ch & flagbits; | ||
| 48 | ch = ch & ~flagbits; | ||
| 49 | |||
| 50 | /* FIXME: Even if enable-multibyte-characters is nil, we may manipulate | ||
| 51 | multibyte chars. This means we have a bug for latin-1 chars since when we | ||
| 52 | receive an int 128-255 we can't tell whether it's an eight-bit byte or | ||
| 53 | a latin-1 char. */ | ||
| 54 | multibyte = ch >= 256 | ||
| 55 | || !NILP (BVAR (current_buffer, enable_multibyte_characters)); | ||
| 56 | if (! multibyte) | ||
| 57 | MAKE_CHAR_MULTIBYTE (ch); | ||
| 58 | cased = flag == CASE_DOWN ? downcase (ch) : upcase (ch); | ||
| 59 | if (cased == ch) | ||
| 60 | return obj; | ||
| 61 | |||
| 62 | if (! multibyte) | ||
| 63 | MAKE_CHAR_UNIBYTE (cased); | ||
| 64 | XSETFASTINT (obj, cased | flags); | ||
| 65 | return obj; | ||
| 66 | } | ||
| 67 | |||
| 68 | static Lisp_Object | ||
| 69 | do_casify_multibyte_string (enum case_action flag, Lisp_Object obj) | ||
| 70 | { | ||
| 71 | ptrdiff_t i, i_byte, size = SCHARS (obj); | ||
| 72 | bool inword = flag == CASE_DOWN; | ||
| 73 | int len, ch, cased; | ||
| 74 | USE_SAFE_ALLOCA; | ||
| 75 | ptrdiff_t o_size; | ||
| 76 | if (INT_MULTIPLY_WRAPV (size, MAX_MULTIBYTE_LENGTH, &o_size)) | ||
| 77 | o_size = PTRDIFF_MAX; | ||
| 78 | unsigned char *dst = SAFE_ALLOCA (o_size); | ||
| 79 | unsigned char *o = dst; | ||
| 80 | |||
| 81 | for (i = i_byte = 0; i < size; i++, i_byte += len) | ||
| 82 | { | ||
| 83 | if (o_size - MAX_MULTIBYTE_LENGTH < o - dst) | ||
| 84 | string_overflow (); | ||
| 85 | ch = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, len); | ||
| 86 | if (inword && flag != CASE_CAPITALIZE_UP) | ||
| 87 | cased = downcase (ch); | ||
| 88 | else if (!inword || flag != CASE_CAPITALIZE_UP) | ||
| 89 | cased = upcase (ch); | ||
| 90 | else | ||
| 91 | cased = ch; | ||
| 92 | if ((int) flag >= (int) CASE_CAPITALIZE) | ||
| 93 | inword = (SYNTAX (ch) == Sword); | ||
| 94 | o += CHAR_STRING (cased, o); | ||
| 95 | } | ||
| 96 | eassert (o - dst <= o_size); | ||
| 97 | obj = make_multibyte_string ((char *) dst, size, o - dst); | ||
| 98 | SAFE_FREE (); | ||
| 99 | return obj; | ||
| 100 | } | ||
| 101 | |||
| 102 | static Lisp_Object | ||
| 103 | do_casify_unibyte_string (enum case_action flag, Lisp_Object obj) | ||
| 36 | { | 104 | { |
| 37 | int c, c1; | 105 | ptrdiff_t i, size = SCHARS (obj); |
| 38 | bool inword = flag == CASE_DOWN; | 106 | bool inword = flag == CASE_DOWN; |
| 107 | int ch, cased; | ||
| 108 | |||
| 109 | obj = Fcopy_sequence (obj); | ||
| 110 | for (i = 0; i < size; i++) | ||
| 111 | { | ||
| 112 | ch = SREF (obj, i); | ||
| 113 | MAKE_CHAR_MULTIBYTE (ch); | ||
| 114 | cased = ch; | ||
| 115 | if (inword && flag != CASE_CAPITALIZE_UP) | ||
| 116 | ch = downcase (ch); | ||
| 117 | else if (!uppercasep (ch) | ||
| 118 | && (!inword || flag != CASE_CAPITALIZE_UP)) | ||
| 119 | ch = upcase (cased); | ||
| 120 | if ((int) flag >= (int) CASE_CAPITALIZE) | ||
| 121 | inword = (SYNTAX (ch) == Sword); | ||
| 122 | if (ch == cased) | ||
| 123 | continue; | ||
| 124 | MAKE_CHAR_UNIBYTE (ch); | ||
| 125 | /* If the char can't be converted to a valid byte, just don't change it */ | ||
| 126 | if (ch >= 0 && ch < 256) | ||
| 127 | SSET (obj, i, ch); | ||
| 128 | } | ||
| 129 | return obj; | ||
| 130 | } | ||
| 39 | 131 | ||
| 132 | static Lisp_Object | ||
| 133 | casify_object (enum case_action flag, Lisp_Object obj) | ||
| 134 | { | ||
| 40 | /* If the case table is flagged as modified, rescan it. */ | 135 | /* If the case table is flagged as modified, rescan it. */ |
| 41 | if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1])) | 136 | if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1])) |
| 42 | Fset_case_table (BVAR (current_buffer, downcase_table)); | 137 | Fset_case_table (BVAR (current_buffer, downcase_table)); |
| 43 | 138 | ||
| 44 | if (NATNUMP (obj)) | 139 | if (NATNUMP (obj)) |
| 45 | { | 140 | return do_casify_natnum (flag, obj); |
| 46 | int flagbits = (CHAR_ALT | CHAR_SUPER | CHAR_HYPER | 141 | else if (!STRINGP (obj)) |
| 47 | | CHAR_SHIFT | CHAR_CTL | CHAR_META); | ||
| 48 | int flags = XINT (obj) & flagbits; | ||
| 49 | bool multibyte = ! NILP (BVAR (current_buffer, | ||
| 50 | enable_multibyte_characters)); | ||
| 51 | |||
| 52 | /* If the character has higher bits set | ||
| 53 | above the flags, return it unchanged. | ||
| 54 | It is not a real character. */ | ||
| 55 | if (UNSIGNED_CMP (XFASTINT (obj), >, flagbits)) | ||
| 56 | return obj; | ||
| 57 | |||
| 58 | c1 = XFASTINT (obj) & ~flagbits; | ||
| 59 | /* FIXME: Even if enable-multibyte-characters is nil, we may | ||
| 60 | manipulate multibyte chars. This means we have a bug for latin-1 | ||
| 61 | chars since when we receive an int 128-255 we can't tell whether | ||
| 62 | it's an eight-bit byte or a latin-1 char. */ | ||
| 63 | if (c1 >= 256) | ||
| 64 | multibyte = 1; | ||
| 65 | if (! multibyte) | ||
| 66 | MAKE_CHAR_MULTIBYTE (c1); | ||
| 67 | c = flag == CASE_DOWN ? downcase (c1) : upcase (c1); | ||
| 68 | if (c != c1) | ||
| 69 | { | ||
| 70 | if (! multibyte) | ||
| 71 | MAKE_CHAR_UNIBYTE (c); | ||
| 72 | XSETFASTINT (obj, c | flags); | ||
| 73 | } | ||
| 74 | return obj; | ||
| 75 | } | ||
| 76 | |||
| 77 | if (!STRINGP (obj)) | ||
| 78 | wrong_type_argument (Qchar_or_string_p, obj); | 142 | wrong_type_argument (Qchar_or_string_p, obj); |
| 79 | else if (!STRING_MULTIBYTE (obj)) | 143 | else if (!SCHARS (obj)) |
| 80 | { | 144 | return obj; |
| 81 | ptrdiff_t i; | 145 | else if (STRING_MULTIBYTE (obj)) |
| 82 | ptrdiff_t size = SCHARS (obj); | 146 | return do_casify_multibyte_string (flag, obj); |
| 83 | |||
| 84 | obj = Fcopy_sequence (obj); | ||
| 85 | for (i = 0; i < size; i++) | ||
| 86 | { | ||
| 87 | c = SREF (obj, i); | ||
| 88 | MAKE_CHAR_MULTIBYTE (c); | ||
| 89 | c1 = c; | ||
| 90 | if (inword && flag != CASE_CAPITALIZE_UP) | ||
| 91 | c = downcase (c); | ||
| 92 | else if (!uppercasep (c) | ||
| 93 | && (!inword || flag != CASE_CAPITALIZE_UP)) | ||
| 94 | c = upcase (c1); | ||
| 95 | if ((int) flag >= (int) CASE_CAPITALIZE) | ||
| 96 | inword = (SYNTAX (c) == Sword); | ||
| 97 | if (c != c1) | ||
| 98 | { | ||
| 99 | MAKE_CHAR_UNIBYTE (c); | ||
| 100 | /* If the char can't be converted to a valid byte, just don't | ||
| 101 | change it. */ | ||
| 102 | if (c >= 0 && c < 256) | ||
| 103 | SSET (obj, i, c); | ||
| 104 | } | ||
| 105 | } | ||
| 106 | return obj; | ||
| 107 | } | ||
| 108 | else | 147 | else |
| 109 | { | 148 | return do_casify_unibyte_string (flag, obj); |
| 110 | ptrdiff_t i, i_byte, size = SCHARS (obj); | ||
| 111 | int len; | ||
| 112 | USE_SAFE_ALLOCA; | ||
| 113 | ptrdiff_t o_size; | ||
| 114 | if (INT_MULTIPLY_WRAPV (size, MAX_MULTIBYTE_LENGTH, &o_size)) | ||
| 115 | o_size = PTRDIFF_MAX; | ||
| 116 | unsigned char *dst = SAFE_ALLOCA (o_size); | ||
| 117 | unsigned char *o = dst; | ||
| 118 | |||
| 119 | for (i = i_byte = 0; i < size; i++, i_byte += len) | ||
| 120 | { | ||
| 121 | if (o_size - MAX_MULTIBYTE_LENGTH < o - dst) | ||
| 122 | string_overflow (); | ||
| 123 | c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, len); | ||
| 124 | if (inword && flag != CASE_CAPITALIZE_UP) | ||
| 125 | c = downcase (c); | ||
| 126 | else if (!inword || flag != CASE_CAPITALIZE_UP) | ||
| 127 | c = upcase (c); | ||
| 128 | if ((int) flag >= (int) CASE_CAPITALIZE) | ||
| 129 | inword = (SYNTAX (c) == Sword); | ||
| 130 | o += CHAR_STRING (c, o); | ||
| 131 | } | ||
| 132 | eassert (o - dst <= o_size); | ||
| 133 | obj = make_multibyte_string ((char *) dst, size, o - dst); | ||
| 134 | SAFE_FREE (); | ||
| 135 | return obj; | ||
| 136 | } | ||
| 137 | } | 149 | } |
| 138 | 150 | ||
| 139 | DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0, | 151 | DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0, |