diff options
| author | Michael R. Mauger | 2017-07-03 15:32:41 -0400 |
|---|---|---|
| committer | Michael R. Mauger | 2017-07-03 15:32:41 -0400 |
| commit | 776635c01abd4aa759e7aa9584b513146978568c (patch) | |
| tree | 554f444bc96cb6b05435e8bf195de4df1b00df8f /src/casefiddle.c | |
| parent | 77083e2d34ba5559ae2899d3b03cf08c2e6c5ad4 (diff) | |
| parent | 4cd0db3d6e6e4d5bd49283483bdafbbfc0f583f1 (diff) | |
| download | emacs-776635c01abd4aa759e7aa9584b513146978568c.tar.gz emacs-776635c01abd4aa759e7aa9584b513146978568c.zip | |
Merge branch 'master' of git.sv.gnu.org:/srv/git/emacs
Diffstat (limited to 'src/casefiddle.c')
| -rw-r--r-- | src/casefiddle.c | 600 |
1 files changed, 416 insertions, 184 deletions
diff --git a/src/casefiddle.c b/src/casefiddle.c index 11d59444916..443d62b6259 100644 --- a/src/casefiddle.c +++ b/src/casefiddle.c | |||
| @@ -1,3 +1,4 @@ | |||
| 1 | /* -*- coding: utf-8 -*- */ | ||
| 1 | /* GNU Emacs case conversion functions. | 2 | /* GNU Emacs case conversion functions. |
| 2 | 3 | ||
| 3 | Copyright (C) 1985, 1994, 1997-1999, 2001-2017 Free Software Foundation, | 4 | Copyright (C) 1985, 1994, 1997-1999, 2001-2017 Free Software Foundation, |
| @@ -30,116 +31,312 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ | |||
| 30 | #include "keymap.h" | 31 | #include "keymap.h" |
| 31 | 32 | ||
| 32 | enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP}; | 33 | enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP}; |
| 33 | 34 | ||
| 34 | static Lisp_Object | 35 | /* State for casing individual characters. */ |
| 35 | casify_object (enum case_action flag, Lisp_Object obj) | 36 | struct casing_context |
| 37 | { | ||
| 38 | /* A char-table with title-case character mappings or nil. Non-nil implies | ||
| 39 | flag is CASE_CAPITALIZE or CASE_CAPITALIZE_UP. */ | ||
| 40 | Lisp_Object titlecase_char_table; | ||
| 41 | |||
| 42 | /* The unconditional special-casing Unicode property char tables for upper | ||
| 43 | casing, lower casing and title casing respectively. */ | ||
| 44 | Lisp_Object specialcase_char_tables[3]; | ||
| 45 | |||
| 46 | /* User-requested action. */ | ||
| 47 | enum case_action flag; | ||
| 48 | |||
| 49 | /* If true, the function operates on a buffer as opposed to a string | ||
| 50 | or character. When run on a buffer, syntax_prefix_flag_p is | ||
| 51 | taken into account when determining whether the context is within | ||
| 52 | a word. */ | ||
| 53 | bool inbuffer; | ||
| 54 | |||
| 55 | /* Whether the context is within a word. */ | ||
| 56 | bool inword; | ||
| 57 | }; | ||
| 58 | |||
| 59 | /* Initialize CTX structure for casing characters. */ | ||
| 60 | static void | ||
| 61 | prepare_casing_context (struct casing_context *ctx, | ||
| 62 | enum case_action flag, bool inbuffer) | ||
| 36 | { | 63 | { |
| 37 | int c, c1; | 64 | ctx->flag = flag; |
| 38 | bool inword = flag == CASE_DOWN; | 65 | ctx->inbuffer = inbuffer; |
| 66 | ctx->inword = false; | ||
| 67 | ctx->titlecase_char_table | ||
| 68 | = (flag < CASE_CAPITALIZE ? Qnil | ||
| 69 | : uniprop_table (Qtitlecase)); | ||
| 70 | ctx->specialcase_char_tables[CASE_UP] | ||
| 71 | = (flag == CASE_DOWN ? Qnil | ||
| 72 | : uniprop_table (Qspecial_uppercase)); | ||
| 73 | ctx->specialcase_char_tables[CASE_DOWN] | ||
| 74 | = (flag == CASE_UP ? Qnil | ||
| 75 | : uniprop_table (Qspecial_lowercase)); | ||
| 76 | ctx->specialcase_char_tables[CASE_CAPITALIZE] | ||
| 77 | = (flag < CASE_CAPITALIZE ? Qnil | ||
| 78 | : uniprop_table (Qspecial_titlecase)); | ||
| 39 | 79 | ||
| 40 | /* If the case table is flagged as modified, rescan it. */ | 80 | /* If the case table is flagged as modified, rescan it. */ |
| 41 | if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1])) | 81 | if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1])) |
| 42 | Fset_case_table (BVAR (current_buffer, downcase_table)); | 82 | Fset_case_table (BVAR (current_buffer, downcase_table)); |
| 43 | 83 | ||
| 44 | if (NATNUMP (obj)) | 84 | if (inbuffer && flag >= CASE_CAPITALIZE) |
| 85 | SETUP_BUFFER_SYNTAX_TABLE (); /* For syntax_prefix_flag_p. */ | ||
| 86 | } | ||
| 87 | |||
| 88 | struct casing_str_buf | ||
| 89 | { | ||
| 90 | unsigned char data[max (6, MAX_MULTIBYTE_LENGTH)]; | ||
| 91 | unsigned char len_chars; | ||
| 92 | unsigned char len_bytes; | ||
| 93 | }; | ||
| 94 | |||
| 95 | /* Based on CTX, case character CH. If BUF is NULL, return cased character. | ||
| 96 | Otherwise, if BUF is non-NULL, save result in it and return whether the | ||
| 97 | character has been changed. | ||
| 98 | |||
| 99 | Since meaning of return value depends on arguments, it’s more convenient to | ||
| 100 | use case_single_character or case_character instead. */ | ||
| 101 | static int | ||
| 102 | case_character_impl (struct casing_str_buf *buf, | ||
| 103 | struct casing_context *ctx, int ch) | ||
| 104 | { | ||
| 105 | enum case_action flag; | ||
| 106 | Lisp_Object prop; | ||
| 107 | int cased; | ||
| 108 | |||
| 109 | /* Update inword state */ | ||
| 110 | bool was_inword = ctx->inword; | ||
| 111 | ctx->inword = SYNTAX (ch) == Sword && | ||
| 112 | (!ctx->inbuffer || was_inword || !syntax_prefix_flag_p (ch)); | ||
| 113 | |||
| 114 | /* Normalize flag so its one of CASE_UP, CASE_DOWN or CASE_CAPITALIZE. */ | ||
| 115 | if (ctx->flag == CASE_CAPITALIZE) | ||
| 116 | flag = ctx->flag - was_inword; | ||
| 117 | else if (ctx->flag != CASE_CAPITALIZE_UP) | ||
| 118 | flag = ctx->flag; | ||
| 119 | else if (!was_inword) | ||
| 120 | flag = CASE_CAPITALIZE; | ||
| 121 | else | ||
| 45 | { | 122 | { |
| 46 | int flagbits = (CHAR_ALT | CHAR_SUPER | CHAR_HYPER | 123 | cased = ch; |
| 47 | | CHAR_SHIFT | CHAR_CTL | CHAR_META); | 124 | goto done; |
| 48 | int flags = XINT (obj) & flagbits; | ||
| 49 | bool multibyte = ! NILP (BVAR (current_buffer, | ||
| 50 | enable_multibyte_characters)); | ||
| 51 | |||
| 52 | /* If the character has higher bits set | ||
| 53 | above the flags, return it unchanged. | ||
| 54 | It is not a real character. */ | ||
| 55 | if (UNSIGNED_CMP (XFASTINT (obj), >, flagbits)) | ||
| 56 | return obj; | ||
| 57 | |||
| 58 | c1 = XFASTINT (obj) & ~flagbits; | ||
| 59 | /* FIXME: Even if enable-multibyte-characters is nil, we may | ||
| 60 | manipulate multibyte chars. This means we have a bug for latin-1 | ||
| 61 | chars since when we receive an int 128-255 we can't tell whether | ||
| 62 | it's an eight-bit byte or a latin-1 char. */ | ||
| 63 | if (c1 >= 256) | ||
| 64 | multibyte = 1; | ||
| 65 | if (! multibyte) | ||
| 66 | MAKE_CHAR_MULTIBYTE (c1); | ||
| 67 | c = flag == CASE_DOWN ? downcase (c1) : upcase (c1); | ||
| 68 | if (c != c1) | ||
| 69 | { | ||
| 70 | if (! multibyte) | ||
| 71 | MAKE_CHAR_UNIBYTE (c); | ||
| 72 | XSETFASTINT (obj, c | flags); | ||
| 73 | } | ||
| 74 | return obj; | ||
| 75 | } | 125 | } |
| 76 | 126 | ||
| 77 | if (!STRINGP (obj)) | 127 | /* Look through the special casing entries. */ |
| 78 | wrong_type_argument (Qchar_or_string_p, obj); | 128 | if (buf && !NILP (ctx->specialcase_char_tables[flag])) |
| 79 | else if (!STRING_MULTIBYTE (obj)) | ||
| 80 | { | 129 | { |
| 81 | ptrdiff_t i; | 130 | prop = CHAR_TABLE_REF (ctx->specialcase_char_tables[flag], ch); |
| 82 | ptrdiff_t size = SCHARS (obj); | 131 | if (STRINGP (prop)) |
| 83 | 132 | { | |
| 84 | obj = Fcopy_sequence (obj); | 133 | struct Lisp_String *str = XSTRING (prop); |
| 85 | for (i = 0; i < size; i++) | 134 | if (STRING_BYTES (str) <= sizeof buf->data) |
| 86 | { | ||
| 87 | c = SREF (obj, i); | ||
| 88 | MAKE_CHAR_MULTIBYTE (c); | ||
| 89 | c1 = c; | ||
| 90 | if (inword && flag != CASE_CAPITALIZE_UP) | ||
| 91 | c = downcase (c); | ||
| 92 | else if (!uppercasep (c) | ||
| 93 | && (!inword || flag != CASE_CAPITALIZE_UP)) | ||
| 94 | c = upcase (c1); | ||
| 95 | if ((int) flag >= (int) CASE_CAPITALIZE) | ||
| 96 | inword = (SYNTAX (c) == Sword); | ||
| 97 | if (c != c1) | ||
| 98 | { | 135 | { |
| 99 | MAKE_CHAR_UNIBYTE (c); | 136 | buf->len_chars = str->size; |
| 100 | /* If the char can't be converted to a valid byte, just don't | 137 | buf->len_bytes = STRING_BYTES (str); |
| 101 | change it. */ | 138 | memcpy (buf->data, str->data, buf->len_bytes); |
| 102 | if (c >= 0 && c < 256) | 139 | return 1; |
| 103 | SSET (obj, i, c); | ||
| 104 | } | 140 | } |
| 105 | } | 141 | } |
| 106 | return obj; | ||
| 107 | } | 142 | } |
| 143 | |||
| 144 | /* Handle simple, one-to-one case. */ | ||
| 145 | if (flag == CASE_DOWN) | ||
| 146 | cased = downcase (ch); | ||
| 108 | else | 147 | else |
| 109 | { | 148 | { |
| 110 | ptrdiff_t i, i_byte, size = SCHARS (obj); | 149 | bool cased_is_set = false; |
| 111 | int len; | 150 | if (!NILP (ctx->titlecase_char_table)) |
| 112 | USE_SAFE_ALLOCA; | ||
| 113 | ptrdiff_t o_size; | ||
| 114 | if (INT_MULTIPLY_WRAPV (size, MAX_MULTIBYTE_LENGTH, &o_size)) | ||
| 115 | o_size = PTRDIFF_MAX; | ||
| 116 | unsigned char *dst = SAFE_ALLOCA (o_size); | ||
| 117 | unsigned char *o = dst; | ||
| 118 | |||
| 119 | for (i = i_byte = 0; i < size; i++, i_byte += len) | ||
| 120 | { | 151 | { |
| 121 | if (o_size - MAX_MULTIBYTE_LENGTH < o - dst) | 152 | prop = CHAR_TABLE_REF (ctx->titlecase_char_table, ch); |
| 122 | string_overflow (); | 153 | if (CHARACTERP (prop)) |
| 123 | c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, len); | 154 | { |
| 124 | if (inword && flag != CASE_CAPITALIZE_UP) | 155 | cased = XFASTINT (prop); |
| 125 | c = downcase (c); | 156 | cased_is_set = true; |
| 126 | else if (!inword || flag != CASE_CAPITALIZE_UP) | 157 | } |
| 127 | c = upcase (c); | ||
| 128 | if ((int) flag >= (int) CASE_CAPITALIZE) | ||
| 129 | inword = (SYNTAX (c) == Sword); | ||
| 130 | o += CHAR_STRING (c, o); | ||
| 131 | } | 158 | } |
| 132 | eassert (o - dst <= o_size); | 159 | if (!cased_is_set) |
| 133 | obj = make_multibyte_string ((char *) dst, size, o - dst); | 160 | cased = upcase (ch); |
| 134 | SAFE_FREE (); | ||
| 135 | return obj; | ||
| 136 | } | 161 | } |
| 162 | |||
| 163 | /* And we’re done. */ | ||
| 164 | done: | ||
| 165 | if (!buf) | ||
| 166 | return cased; | ||
| 167 | buf->len_chars = 1; | ||
| 168 | buf->len_bytes = CHAR_STRING (cased, buf->data); | ||
| 169 | return cased != ch; | ||
| 170 | } | ||
| 171 | |||
| 172 | /* In Greek, lower case sigma has two forms: one when used in the middle and one | ||
| 173 | when used at the end of a word. Below is to help handle those cases when | ||
| 174 | casing. | ||
| 175 | |||
| 176 | The rule does not conflict with any other casing rules so while it is | ||
| 177 | a conditional one, it is independent of language. */ | ||
| 178 | |||
| 179 | enum { GREEK_CAPITAL_LETTER_SIGMA = 0x03A3 }; /* Σ */ | ||
| 180 | enum { GREEK_SMALL_LETTER_FINAL_SIGMA = 0x03C2 }; /* ς */ | ||
| 181 | |||
| 182 | /* Based on CTX, case character CH accordingly. Update CTX as necessary. | ||
| 183 | Return cased character. | ||
| 184 | |||
| 185 | Special casing rules (such as upcase(fi) = FI) are not handled. For | ||
| 186 | characters whose casing results in multiple code points, the character is | ||
| 187 | returned unchanged. */ | ||
| 188 | static inline int | ||
| 189 | case_single_character (struct casing_context *ctx, int ch) | ||
| 190 | { | ||
| 191 | return case_character_impl (NULL, ctx, ch); | ||
| 192 | } | ||
| 193 | |||
| 194 | /* Save in BUF result of casing character CH. Return whether casing changed the | ||
| 195 | character. | ||
| 196 | |||
| 197 | If not-NULL, NEXT points to the next character in the cased string. If NULL, | ||
| 198 | it is assumed current character is the last one being cased. This is used to | ||
| 199 | apply some rules which depend on proceeding state. | ||
| 200 | |||
| 201 | This is like case_single_character but also handles one-to-many casing | ||
| 202 | rules. */ | ||
| 203 | static bool | ||
| 204 | case_character (struct casing_str_buf *buf, struct casing_context *ctx, | ||
| 205 | int ch, const unsigned char *next) | ||
| 206 | { | ||
| 207 | bool was_inword = ctx->inword; | ||
| 208 | bool changed = case_character_impl (buf, ctx, ch); | ||
| 209 | |||
| 210 | /* If we have just down-cased a capital sigma and the next character no longer | ||
| 211 | has a word syntax (i.e. current character is end of word), use final | ||
| 212 | sigma. */ | ||
| 213 | if (was_inword && ch == GREEK_CAPITAL_LETTER_SIGMA && changed | ||
| 214 | && (!next || SYNTAX (STRING_CHAR (next)) != Sword)) | ||
| 215 | { | ||
| 216 | buf->len_bytes = CHAR_STRING (GREEK_SMALL_LETTER_FINAL_SIGMA, buf->data); | ||
| 217 | buf->len_chars = 1; | ||
| 218 | } | ||
| 219 | |||
| 220 | return changed; | ||
| 221 | } | ||
| 222 | |||
| 223 | static Lisp_Object | ||
| 224 | do_casify_natnum (struct casing_context *ctx, Lisp_Object obj) | ||
| 225 | { | ||
| 226 | int flagbits = (CHAR_ALT | CHAR_SUPER | CHAR_HYPER | ||
| 227 | | CHAR_SHIFT | CHAR_CTL | CHAR_META); | ||
| 228 | int ch = XFASTINT (obj); | ||
| 229 | |||
| 230 | /* If the character has higher bits set above the flags, return it unchanged. | ||
| 231 | It is not a real character. */ | ||
| 232 | if (UNSIGNED_CMP (ch, >, flagbits)) | ||
| 233 | return obj; | ||
| 234 | |||
| 235 | int flags = ch & flagbits; | ||
| 236 | ch = ch & ~flagbits; | ||
| 237 | |||
| 238 | /* FIXME: Even if enable-multibyte-characters is nil, we may manipulate | ||
| 239 | multibyte chars. This means we have a bug for latin-1 chars since when we | ||
| 240 | receive an int 128-255 we can't tell whether it's an eight-bit byte or | ||
| 241 | a latin-1 char. */ | ||
| 242 | bool multibyte = (ch >= 256 | ||
| 243 | || !NILP (BVAR (current_buffer, | ||
| 244 | enable_multibyte_characters))); | ||
| 245 | if (! multibyte) | ||
| 246 | MAKE_CHAR_MULTIBYTE (ch); | ||
| 247 | int cased = case_single_character (ctx, ch); | ||
| 248 | if (cased == ch) | ||
| 249 | return obj; | ||
| 250 | |||
| 251 | if (! multibyte) | ||
| 252 | MAKE_CHAR_UNIBYTE (cased); | ||
| 253 | return make_natnum (cased | flags); | ||
| 254 | } | ||
| 255 | |||
| 256 | static Lisp_Object | ||
| 257 | do_casify_multibyte_string (struct casing_context *ctx, Lisp_Object obj) | ||
| 258 | { | ||
| 259 | /* Verify that ‘data’ is the first member of struct casing_str_buf | ||
| 260 | so that when casting char * to struct casing_str_buf *, the | ||
| 261 | representation of the character is at the beginning of the | ||
| 262 | buffer. This is why we don’t need a separate struct | ||
| 263 | casing_str_buf object, and can write directly to the destination. */ | ||
| 264 | verify (offsetof (struct casing_str_buf, data) == 0); | ||
| 265 | |||
| 266 | ptrdiff_t size = SCHARS (obj), n; | ||
| 267 | USE_SAFE_ALLOCA; | ||
| 268 | if (INT_MULTIPLY_WRAPV (size, MAX_MULTIBYTE_LENGTH, &n) | ||
| 269 | || INT_ADD_WRAPV (n, sizeof (struct casing_str_buf), &n)) | ||
| 270 | n = PTRDIFF_MAX; | ||
| 271 | unsigned char *dst = SAFE_ALLOCA (n); | ||
| 272 | unsigned char *dst_end = dst + n; | ||
| 273 | unsigned char *o = dst; | ||
| 274 | |||
| 275 | const unsigned char *src = SDATA (obj); | ||
| 276 | |||
| 277 | for (n = 0; size; --size) | ||
| 278 | { | ||
| 279 | if (dst_end - o < sizeof (struct casing_str_buf)) | ||
| 280 | string_overflow (); | ||
| 281 | int ch = STRING_CHAR_ADVANCE (src); | ||
| 282 | case_character ((struct casing_str_buf *) o, ctx, ch, | ||
| 283 | size > 1 ? src : NULL); | ||
| 284 | n += ((struct casing_str_buf *) o)->len_chars; | ||
| 285 | o += ((struct casing_str_buf *) o)->len_bytes; | ||
| 286 | } | ||
| 287 | eassert (o <= dst_end); | ||
| 288 | obj = make_multibyte_string ((char *) dst, n, o - dst); | ||
| 289 | SAFE_FREE (); | ||
| 290 | return obj; | ||
| 291 | } | ||
| 292 | |||
| 293 | static Lisp_Object | ||
| 294 | do_casify_unibyte_string (struct casing_context *ctx, Lisp_Object obj) | ||
| 295 | { | ||
| 296 | ptrdiff_t i, size = SCHARS (obj); | ||
| 297 | int ch, cased; | ||
| 298 | |||
| 299 | obj = Fcopy_sequence (obj); | ||
| 300 | for (i = 0; i < size; i++) | ||
| 301 | { | ||
| 302 | ch = SREF (obj, i); | ||
| 303 | MAKE_CHAR_MULTIBYTE (ch); | ||
| 304 | cased = case_single_character (ctx, ch); | ||
| 305 | if (ch == cased) | ||
| 306 | continue; | ||
| 307 | MAKE_CHAR_UNIBYTE (cased); | ||
| 308 | /* If the char can't be converted to a valid byte, just don't | ||
| 309 | change it. */ | ||
| 310 | if (cased >= 0 && cased < 256) | ||
| 311 | SSET (obj, i, cased); | ||
| 312 | } | ||
| 313 | return obj; | ||
| 314 | } | ||
| 315 | |||
| 316 | static Lisp_Object | ||
| 317 | casify_object (enum case_action flag, Lisp_Object obj) | ||
| 318 | { | ||
| 319 | struct casing_context ctx; | ||
| 320 | prepare_casing_context (&ctx, flag, false); | ||
| 321 | |||
| 322 | if (NATNUMP (obj)) | ||
| 323 | return do_casify_natnum (&ctx, obj); | ||
| 324 | else if (!STRINGP (obj)) | ||
| 325 | wrong_type_argument (Qchar_or_string_p, obj); | ||
| 326 | else if (!SCHARS (obj)) | ||
| 327 | return obj; | ||
| 328 | else if (STRING_MULTIBYTE (obj)) | ||
| 329 | return do_casify_multibyte_string (&ctx, obj); | ||
| 330 | else | ||
| 331 | return do_casify_unibyte_string (&ctx, obj); | ||
| 137 | } | 332 | } |
| 138 | 333 | ||
| 139 | DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0, | 334 | DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0, |
| 140 | doc: /* Convert argument to upper case and return that. | 335 | doc: /* Convert argument to upper case and return that. |
| 141 | The argument may be a character or string. The result has the same type. | 336 | The argument may be a character or string. The result has the same type. |
| 142 | The argument object is not altered--the value is a copy. | 337 | The argument object is not altered--the value is a copy. If argument |
| 338 | is a character, characters which map to multiple code points when | ||
| 339 | cased, e.g. fi, are returned unchanged. | ||
| 143 | See also `capitalize', `downcase' and `upcase-initials'. */) | 340 | See also `capitalize', `downcase' and `upcase-initials'. */) |
| 144 | (Lisp_Object obj) | 341 | (Lisp_Object obj) |
| 145 | { | 342 | { |
| @@ -157,10 +354,12 @@ The argument object is not altered--the value is a copy. */) | |||
| 157 | 354 | ||
| 158 | DEFUN ("capitalize", Fcapitalize, Scapitalize, 1, 1, 0, | 355 | DEFUN ("capitalize", Fcapitalize, Scapitalize, 1, 1, 0, |
| 159 | doc: /* Convert argument to capitalized form and return that. | 356 | doc: /* Convert argument to capitalized form and return that. |
| 160 | This means that each word's first character is upper case | 357 | This means that each word's first character is converted to either |
| 161 | and the rest is lower case. | 358 | title case or upper case, and the rest to lower case. |
| 162 | The argument may be a character or string. The result has the same type. | 359 | The argument may be a character or string. The result has the same type. |
| 163 | The argument object is not altered--the value is a copy. */) | 360 | The argument object is not altered--the value is a copy. If argument |
| 361 | is a character, characters which map to multiple code points when | ||
| 362 | cased, e.g. fi, are returned unchanged. */) | ||
| 164 | (Lisp_Object obj) | 363 | (Lisp_Object obj) |
| 165 | { | 364 | { |
| 166 | return casify_object (CASE_CAPITALIZE, obj); | 365 | return casify_object (CASE_CAPITALIZE, obj); |
| @@ -170,122 +369,151 @@ The argument object is not altered--the value is a copy. */) | |||
| 170 | 369 | ||
| 171 | DEFUN ("upcase-initials", Fupcase_initials, Supcase_initials, 1, 1, 0, | 370 | DEFUN ("upcase-initials", Fupcase_initials, Supcase_initials, 1, 1, 0, |
| 172 | doc: /* Convert the initial of each word in the argument to upper case. | 371 | doc: /* Convert the initial of each word in the argument to upper case. |
| 173 | Do not change the other letters of each word. | 372 | This means that each word's first character is converted to either |
| 373 | title case or upper case, and the rest are left unchanged. | ||
| 174 | The argument may be a character or string. The result has the same type. | 374 | The argument may be a character or string. The result has the same type. |
| 175 | The argument object is not altered--the value is a copy. */) | 375 | The argument object is not altered--the value is a copy. If argument |
| 376 | is a character, characters which map to multiple code points when | ||
| 377 | cased, e.g. fi, are returned unchanged. */) | ||
| 176 | (Lisp_Object obj) | 378 | (Lisp_Object obj) |
| 177 | { | 379 | { |
| 178 | return casify_object (CASE_CAPITALIZE_UP, obj); | 380 | return casify_object (CASE_CAPITALIZE_UP, obj); |
| 179 | } | 381 | } |
| 180 | 382 | ||
| 181 | /* flag is CASE_UP, CASE_DOWN or CASE_CAPITALIZE or CASE_CAPITALIZE_UP. | 383 | /* Based on CTX, case region in a unibyte buffer from *STARTP to *ENDP. |
| 182 | b and e specify range of buffer to operate on. */ | ||
| 183 | 384 | ||
| 184 | static void | 385 | Save first and last positions that has changed in *STARTP and *ENDP |
| 185 | casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e) | 386 | respectively. If no characters were changed, save -1 to *STARTP and leave |
| 387 | *ENDP unspecified. | ||
| 388 | |||
| 389 | Always return 0. This is so that interface of this function is the same as | ||
| 390 | do_casify_multibyte_region. */ | ||
| 391 | static ptrdiff_t | ||
| 392 | do_casify_unibyte_region (struct casing_context *ctx, | ||
| 393 | ptrdiff_t *startp, ptrdiff_t *endp) | ||
| 186 | { | 394 | { |
| 187 | int c; | 395 | ptrdiff_t first = -1, last = -1; /* Position of first and last changes. */ |
| 188 | bool inword = flag == CASE_DOWN; | 396 | ptrdiff_t end = *endp; |
| 189 | bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters)); | ||
| 190 | ptrdiff_t start, end; | ||
| 191 | ptrdiff_t start_byte; | ||
| 192 | 397 | ||
| 193 | /* Position of first and last changes. */ | 398 | for (ptrdiff_t pos = *startp; pos < end; ++pos) |
| 194 | ptrdiff_t first = -1, last; | 399 | { |
| 400 | int ch = FETCH_BYTE (pos); | ||
| 401 | MAKE_CHAR_MULTIBYTE (ch); | ||
| 195 | 402 | ||
| 196 | ptrdiff_t opoint = PT; | 403 | int cased = case_single_character (ctx, ch); |
| 197 | ptrdiff_t opoint_byte = PT_BYTE; | 404 | if (cased == ch) |
| 405 | continue; | ||
| 198 | 406 | ||
| 199 | if (EQ (b, e)) | 407 | last = pos + 1; |
| 200 | /* Not modifying because nothing marked */ | 408 | if (first < 0) |
| 201 | return; | 409 | first = pos; |
| 202 | 410 | ||
| 203 | /* If the case table is flagged as modified, rescan it. */ | 411 | MAKE_CHAR_UNIBYTE (cased); |
| 204 | if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1])) | 412 | FETCH_BYTE (pos) = cased; |
| 205 | Fset_case_table (BVAR (current_buffer, downcase_table)); | 413 | } |
| 206 | 414 | ||
| 207 | validate_region (&b, &e); | 415 | *startp = first; |
| 208 | start = XFASTINT (b); | 416 | *endp = last; |
| 209 | end = XFASTINT (e); | 417 | return 0; |
| 210 | modify_text (start, end); | 418 | } |
| 211 | record_change (start, end - start); | ||
| 212 | start_byte = CHAR_TO_BYTE (start); | ||
| 213 | 419 | ||
| 214 | SETUP_BUFFER_SYNTAX_TABLE (); /* For syntax_prefix_flag_p. */ | 420 | /* Based on CTX, case region in a multibyte buffer from *STARTP to *ENDP. |
| 215 | 421 | ||
| 216 | while (start < end) | 422 | Return number of added characters (may be negative if more characters were |
| 217 | { | 423 | deleted then inserted), save first and last positions that has changed in |
| 218 | int c2, len; | 424 | *STARTP and *ENDP respectively. If no characters were changed, return 0, |
| 425 | save -1 to *STARTP and leave *ENDP unspecified. */ | ||
| 426 | static ptrdiff_t | ||
| 427 | do_casify_multibyte_region (struct casing_context *ctx, | ||
| 428 | ptrdiff_t *startp, ptrdiff_t *endp) | ||
| 429 | { | ||
| 430 | ptrdiff_t first = -1, last = -1; /* Position of first and last changes. */ | ||
| 431 | ptrdiff_t pos = *startp, pos_byte = CHAR_TO_BYTE (pos), size = *endp - pos; | ||
| 432 | ptrdiff_t opoint = PT, added = 0; | ||
| 219 | 433 | ||
| 220 | if (multibyte) | 434 | for (; size; --size) |
| 435 | { | ||
| 436 | int len; | ||
| 437 | int ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (pos_byte), len); | ||
| 438 | struct casing_str_buf buf; | ||
| 439 | if (!case_character (&buf, ctx, ch, | ||
| 440 | size > 1 ? BYTE_POS_ADDR (pos_byte + len) : NULL)) | ||
| 221 | { | 441 | { |
| 222 | c = FETCH_MULTIBYTE_CHAR (start_byte); | 442 | pos_byte += len; |
| 223 | len = CHAR_BYTES (c); | 443 | ++pos; |
| 444 | continue; | ||
| 224 | } | 445 | } |
| 446 | |||
| 447 | last = pos + buf.len_chars; | ||
| 448 | if (first < 0) | ||
| 449 | first = pos; | ||
| 450 | |||
| 451 | if (buf.len_chars == 1 && buf.len_bytes == len) | ||
| 452 | memcpy (BYTE_POS_ADDR (pos_byte), buf.data, len); | ||
| 225 | else | 453 | else |
| 226 | { | 454 | { |
| 227 | c = FETCH_BYTE (start_byte); | 455 | /* Replace one character with the other(s), keeping text |
| 228 | MAKE_CHAR_MULTIBYTE (c); | 456 | properties the same. */ |
| 229 | len = 1; | 457 | replace_range_2 (pos, pos_byte, pos + 1, pos_byte + len, |
| 458 | (const char *) buf.data, buf.len_chars, | ||
| 459 | buf.len_bytes, | ||
| 460 | 0); | ||
| 461 | added += (ptrdiff_t) buf.len_chars - 1; | ||
| 462 | if (opoint > pos) | ||
| 463 | opoint += (ptrdiff_t) buf.len_chars - 1; | ||
| 230 | } | 464 | } |
| 231 | c2 = c; | ||
| 232 | if (inword && flag != CASE_CAPITALIZE_UP) | ||
| 233 | c = downcase (c); | ||
| 234 | else if (!inword || flag != CASE_CAPITALIZE_UP) | ||
| 235 | c = upcase (c); | ||
| 236 | if ((int) flag >= (int) CASE_CAPITALIZE) | ||
| 237 | inword = ((SYNTAX (c) == Sword) | ||
| 238 | && (inword || !syntax_prefix_flag_p (c))); | ||
| 239 | if (c != c2) | ||
| 240 | { | ||
| 241 | last = start; | ||
| 242 | if (first < 0) | ||
| 243 | first = start; | ||
| 244 | 465 | ||
| 245 | if (! multibyte) | 466 | pos_byte += buf.len_bytes; |
| 246 | { | 467 | pos += buf.len_chars; |
| 247 | MAKE_CHAR_UNIBYTE (c); | ||
| 248 | FETCH_BYTE (start_byte) = c; | ||
| 249 | } | ||
| 250 | else if (ASCII_CHAR_P (c2) && ASCII_CHAR_P (c)) | ||
| 251 | FETCH_BYTE (start_byte) = c; | ||
| 252 | else | ||
| 253 | { | ||
| 254 | int tolen = CHAR_BYTES (c); | ||
| 255 | int j; | ||
| 256 | unsigned char str[MAX_MULTIBYTE_LENGTH]; | ||
| 257 | |||
| 258 | CHAR_STRING (c, str); | ||
| 259 | if (len == tolen) | ||
| 260 | { | ||
| 261 | /* Length is unchanged. */ | ||
| 262 | for (j = 0; j < len; ++j) | ||
| 263 | FETCH_BYTE (start_byte + j) = str[j]; | ||
| 264 | } | ||
| 265 | else | ||
| 266 | { | ||
| 267 | /* Replace one character with the other, | ||
| 268 | keeping text properties the same. */ | ||
| 269 | replace_range_2 (start, start_byte, | ||
| 270 | start + 1, start_byte + len, | ||
| 271 | (char *) str, 1, tolen, | ||
| 272 | 0); | ||
| 273 | len = tolen; | ||
| 274 | } | ||
| 275 | } | ||
| 276 | } | ||
| 277 | start++; | ||
| 278 | start_byte += len; | ||
| 279 | } | 468 | } |
| 280 | 469 | ||
| 281 | if (PT != opoint) | 470 | if (PT != opoint) |
| 282 | TEMP_SET_PT_BOTH (opoint, opoint_byte); | 471 | TEMP_SET_PT_BOTH (opoint, CHAR_TO_BYTE (opoint)); |
| 283 | 472 | ||
| 284 | if (first >= 0) | 473 | *startp = first; |
| 474 | *endp = last; | ||
| 475 | return added; | ||
| 476 | } | ||
| 477 | |||
| 478 | /* flag is CASE_UP, CASE_DOWN or CASE_CAPITALIZE or CASE_CAPITALIZE_UP. b and | ||
| 479 | e specify range of buffer to operate on. Return character position of the | ||
| 480 | end of the region after changes. */ | ||
| 481 | static ptrdiff_t | ||
| 482 | casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e) | ||
| 483 | { | ||
| 484 | ptrdiff_t added; | ||
| 485 | struct casing_context ctx; | ||
| 486 | |||
| 487 | validate_region (&b, &e); | ||
| 488 | ptrdiff_t start = XFASTINT (b); | ||
| 489 | ptrdiff_t end = XFASTINT (e); | ||
| 490 | if (start == end) | ||
| 491 | /* Not modifying because nothing marked. */ | ||
| 492 | return end; | ||
| 493 | modify_text (start, end); | ||
| 494 | prepare_casing_context (&ctx, flag, true); | ||
| 495 | |||
| 496 | ptrdiff_t orig_end = end; | ||
| 497 | record_delete (start, make_buffer_string (start, end, true), false); | ||
| 498 | if (NILP (BVAR (current_buffer, enable_multibyte_characters))) | ||
| 285 | { | 499 | { |
| 286 | signal_after_change (first, last + 1 - first, last + 1 - first); | 500 | record_insert (start, end - start); |
| 287 | update_compositions (first, last + 1, CHECK_ALL); | 501 | added = do_casify_unibyte_region (&ctx, &start, &end); |
| 288 | } | 502 | } |
| 503 | else | ||
| 504 | { | ||
| 505 | ptrdiff_t len = end - start, ostart = start; | ||
| 506 | added = do_casify_multibyte_region (&ctx, &start, &end); | ||
| 507 | record_insert (ostart, len + added); | ||
| 508 | } | ||
| 509 | |||
| 510 | if (start >= 0) | ||
| 511 | { | ||
| 512 | signal_after_change (start, end - start - added, end - start); | ||
| 513 | update_compositions (start, end, CHECK_ALL); | ||
| 514 | } | ||
| 515 | |||
| 516 | return orig_end + added; | ||
| 289 | } | 517 | } |
| 290 | 518 | ||
| 291 | DEFUN ("upcase-region", Fupcase_region, Supcase_region, 2, 3, | 519 | DEFUN ("upcase-region", Fupcase_region, Supcase_region, 2, 3, |
| @@ -345,8 +573,8 @@ point and the mark is operated on. */) | |||
| 345 | 573 | ||
| 346 | DEFUN ("capitalize-region", Fcapitalize_region, Scapitalize_region, 2, 2, "r", | 574 | DEFUN ("capitalize-region", Fcapitalize_region, Scapitalize_region, 2, 2, "r", |
| 347 | doc: /* Convert the region to capitalized form. | 575 | doc: /* Convert the region to capitalized form. |
| 348 | Capitalized form means each word's first character is upper case | 576 | This means that each word's first character is converted to either |
| 349 | and the rest of it is lower case. | 577 | title case or upper case, and the rest to lower case. |
| 350 | In programs, give two arguments, the starting and ending | 578 | In programs, give two arguments, the starting and ending |
| 351 | character positions to operate on. */) | 579 | character positions to operate on. */) |
| 352 | (Lisp_Object beg, Lisp_Object end) | 580 | (Lisp_Object beg, Lisp_Object end) |
| @@ -360,7 +588,8 @@ character positions to operate on. */) | |||
| 360 | DEFUN ("upcase-initials-region", Fupcase_initials_region, | 588 | DEFUN ("upcase-initials-region", Fupcase_initials_region, |
| 361 | Supcase_initials_region, 2, 2, "r", | 589 | Supcase_initials_region, 2, 2, "r", |
| 362 | doc: /* Upcase the initial of each word in the region. | 590 | doc: /* Upcase the initial of each word in the region. |
| 363 | Subsequent letters of each word are not changed. | 591 | This means that each word's first character is converted to either |
| 592 | title case or upper case, and the rest are left unchanged. | ||
| 364 | In programs, give two arguments, the starting and ending | 593 | In programs, give two arguments, the starting and ending |
| 365 | character positions to operate on. */) | 594 | character positions to operate on. */) |
| 366 | (Lisp_Object beg, Lisp_Object end) | 595 | (Lisp_Object beg, Lisp_Object end) |
| @@ -376,9 +605,7 @@ casify_word (enum case_action flag, Lisp_Object arg) | |||
| 376 | ptrdiff_t farend = scan_words (PT, XINT (arg)); | 605 | ptrdiff_t farend = scan_words (PT, XINT (arg)); |
| 377 | if (!farend) | 606 | if (!farend) |
| 378 | farend = XINT (arg) <= 0 ? BEGV : ZV; | 607 | farend = XINT (arg) <= 0 ? BEGV : ZV; |
| 379 | ptrdiff_t newpoint = max (PT, farend); | 608 | SET_PT (casify_region (flag, make_number (PT), make_number (farend))); |
| 380 | casify_region (flag, make_number (PT), make_number (farend)); | ||
| 381 | SET_PT (newpoint); | ||
| 382 | return Qnil; | 609 | return Qnil; |
| 383 | } | 610 | } |
| 384 | 611 | ||
| @@ -426,6 +653,11 @@ void | |||
| 426 | syms_of_casefiddle (void) | 653 | syms_of_casefiddle (void) |
| 427 | { | 654 | { |
| 428 | DEFSYM (Qidentity, "identity"); | 655 | DEFSYM (Qidentity, "identity"); |
| 656 | DEFSYM (Qtitlecase, "titlecase"); | ||
| 657 | DEFSYM (Qspecial_uppercase, "special-uppercase"); | ||
| 658 | DEFSYM (Qspecial_lowercase, "special-lowercase"); | ||
| 659 | DEFSYM (Qspecial_titlecase, "special-titlecase"); | ||
| 660 | |||
| 429 | defsubr (&Supcase); | 661 | defsubr (&Supcase); |
| 430 | defsubr (&Sdowncase); | 662 | defsubr (&Sdowncase); |
| 431 | defsubr (&Scapitalize); | 663 | defsubr (&Scapitalize); |