aboutsummaryrefslogtreecommitdiffstats
path: root/src/casefiddle.c
diff options
context:
space:
mode:
authorMichael R. Mauger2017-07-03 15:32:41 -0400
committerMichael R. Mauger2017-07-03 15:32:41 -0400
commit776635c01abd4aa759e7aa9584b513146978568c (patch)
tree554f444bc96cb6b05435e8bf195de4df1b00df8f /src/casefiddle.c
parent77083e2d34ba5559ae2899d3b03cf08c2e6c5ad4 (diff)
parent4cd0db3d6e6e4d5bd49283483bdafbbfc0f583f1 (diff)
downloademacs-776635c01abd4aa759e7aa9584b513146978568c.tar.gz
emacs-776635c01abd4aa759e7aa9584b513146978568c.zip
Merge branch 'master' of git.sv.gnu.org:/srv/git/emacs
Diffstat (limited to 'src/casefiddle.c')
-rw-r--r--src/casefiddle.c600
1 files changed, 416 insertions, 184 deletions
diff --git a/src/casefiddle.c b/src/casefiddle.c
index 11d59444916..443d62b6259 100644
--- a/src/casefiddle.c
+++ b/src/casefiddle.c
@@ -1,3 +1,4 @@
1/* -*- coding: utf-8 -*- */
1/* GNU Emacs case conversion functions. 2/* GNU Emacs case conversion functions.
2 3
3Copyright (C) 1985, 1994, 1997-1999, 2001-2017 Free Software Foundation, 4Copyright (C) 1985, 1994, 1997-1999, 2001-2017 Free Software Foundation,
@@ -30,116 +31,312 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
30#include "keymap.h" 31#include "keymap.h"
31 32
32enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP}; 33enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP};
33 34
34static Lisp_Object 35/* State for casing individual characters. */
35casify_object (enum case_action flag, Lisp_Object obj) 36struct casing_context
37{
38 /* A char-table with title-case character mappings or nil. Non-nil implies
39 flag is CASE_CAPITALIZE or CASE_CAPITALIZE_UP. */
40 Lisp_Object titlecase_char_table;
41
42 /* The unconditional special-casing Unicode property char tables for upper
43 casing, lower casing and title casing respectively. */
44 Lisp_Object specialcase_char_tables[3];
45
46 /* User-requested action. */
47 enum case_action flag;
48
49 /* If true, the function operates on a buffer as opposed to a string
50 or character. When run on a buffer, syntax_prefix_flag_p is
51 taken into account when determining whether the context is within
52 a word. */
53 bool inbuffer;
54
55 /* Whether the context is within a word. */
56 bool inword;
57};
58
59/* Initialize CTX structure for casing characters. */
60static void
61prepare_casing_context (struct casing_context *ctx,
62 enum case_action flag, bool inbuffer)
36{ 63{
37 int c, c1; 64 ctx->flag = flag;
38 bool inword = flag == CASE_DOWN; 65 ctx->inbuffer = inbuffer;
66 ctx->inword = false;
67 ctx->titlecase_char_table
68 = (flag < CASE_CAPITALIZE ? Qnil
69 : uniprop_table (Qtitlecase));
70 ctx->specialcase_char_tables[CASE_UP]
71 = (flag == CASE_DOWN ? Qnil
72 : uniprop_table (Qspecial_uppercase));
73 ctx->specialcase_char_tables[CASE_DOWN]
74 = (flag == CASE_UP ? Qnil
75 : uniprop_table (Qspecial_lowercase));
76 ctx->specialcase_char_tables[CASE_CAPITALIZE]
77 = (flag < CASE_CAPITALIZE ? Qnil
78 : uniprop_table (Qspecial_titlecase));
39 79
40 /* If the case table is flagged as modified, rescan it. */ 80 /* If the case table is flagged as modified, rescan it. */
41 if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1])) 81 if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1]))
42 Fset_case_table (BVAR (current_buffer, downcase_table)); 82 Fset_case_table (BVAR (current_buffer, downcase_table));
43 83
44 if (NATNUMP (obj)) 84 if (inbuffer && flag >= CASE_CAPITALIZE)
85 SETUP_BUFFER_SYNTAX_TABLE (); /* For syntax_prefix_flag_p. */
86}
87
88struct casing_str_buf
89{
90 unsigned char data[max (6, MAX_MULTIBYTE_LENGTH)];
91 unsigned char len_chars;
92 unsigned char len_bytes;
93};
94
95/* Based on CTX, case character CH. If BUF is NULL, return cased character.
96 Otherwise, if BUF is non-NULL, save result in it and return whether the
97 character has been changed.
98
99 Since meaning of return value depends on arguments, it’s more convenient to
100 use case_single_character or case_character instead. */
101static int
102case_character_impl (struct casing_str_buf *buf,
103 struct casing_context *ctx, int ch)
104{
105 enum case_action flag;
106 Lisp_Object prop;
107 int cased;
108
109 /* Update inword state */
110 bool was_inword = ctx->inword;
111 ctx->inword = SYNTAX (ch) == Sword &&
112 (!ctx->inbuffer || was_inword || !syntax_prefix_flag_p (ch));
113
114 /* Normalize flag so its one of CASE_UP, CASE_DOWN or CASE_CAPITALIZE. */
115 if (ctx->flag == CASE_CAPITALIZE)
116 flag = ctx->flag - was_inword;
117 else if (ctx->flag != CASE_CAPITALIZE_UP)
118 flag = ctx->flag;
119 else if (!was_inword)
120 flag = CASE_CAPITALIZE;
121 else
45 { 122 {
46 int flagbits = (CHAR_ALT | CHAR_SUPER | CHAR_HYPER 123 cased = ch;
47 | CHAR_SHIFT | CHAR_CTL | CHAR_META); 124 goto done;
48 int flags = XINT (obj) & flagbits;
49 bool multibyte = ! NILP (BVAR (current_buffer,
50 enable_multibyte_characters));
51
52 /* If the character has higher bits set
53 above the flags, return it unchanged.
54 It is not a real character. */
55 if (UNSIGNED_CMP (XFASTINT (obj), >, flagbits))
56 return obj;
57
58 c1 = XFASTINT (obj) & ~flagbits;
59 /* FIXME: Even if enable-multibyte-characters is nil, we may
60 manipulate multibyte chars. This means we have a bug for latin-1
61 chars since when we receive an int 128-255 we can't tell whether
62 it's an eight-bit byte or a latin-1 char. */
63 if (c1 >= 256)
64 multibyte = 1;
65 if (! multibyte)
66 MAKE_CHAR_MULTIBYTE (c1);
67 c = flag == CASE_DOWN ? downcase (c1) : upcase (c1);
68 if (c != c1)
69 {
70 if (! multibyte)
71 MAKE_CHAR_UNIBYTE (c);
72 XSETFASTINT (obj, c | flags);
73 }
74 return obj;
75 } 125 }
76 126
77 if (!STRINGP (obj)) 127 /* Look through the special casing entries. */
78 wrong_type_argument (Qchar_or_string_p, obj); 128 if (buf && !NILP (ctx->specialcase_char_tables[flag]))
79 else if (!STRING_MULTIBYTE (obj))
80 { 129 {
81 ptrdiff_t i; 130 prop = CHAR_TABLE_REF (ctx->specialcase_char_tables[flag], ch);
82 ptrdiff_t size = SCHARS (obj); 131 if (STRINGP (prop))
83 132 {
84 obj = Fcopy_sequence (obj); 133 struct Lisp_String *str = XSTRING (prop);
85 for (i = 0; i < size; i++) 134 if (STRING_BYTES (str) <= sizeof buf->data)
86 {
87 c = SREF (obj, i);
88 MAKE_CHAR_MULTIBYTE (c);
89 c1 = c;
90 if (inword && flag != CASE_CAPITALIZE_UP)
91 c = downcase (c);
92 else if (!uppercasep (c)
93 && (!inword || flag != CASE_CAPITALIZE_UP))
94 c = upcase (c1);
95 if ((int) flag >= (int) CASE_CAPITALIZE)
96 inword = (SYNTAX (c) == Sword);
97 if (c != c1)
98 { 135 {
99 MAKE_CHAR_UNIBYTE (c); 136 buf->len_chars = str->size;
100 /* If the char can't be converted to a valid byte, just don't 137 buf->len_bytes = STRING_BYTES (str);
101 change it. */ 138 memcpy (buf->data, str->data, buf->len_bytes);
102 if (c >= 0 && c < 256) 139 return 1;
103 SSET (obj, i, c);
104 } 140 }
105 } 141 }
106 return obj;
107 } 142 }
143
144 /* Handle simple, one-to-one case. */
145 if (flag == CASE_DOWN)
146 cased = downcase (ch);
108 else 147 else
109 { 148 {
110 ptrdiff_t i, i_byte, size = SCHARS (obj); 149 bool cased_is_set = false;
111 int len; 150 if (!NILP (ctx->titlecase_char_table))
112 USE_SAFE_ALLOCA;
113 ptrdiff_t o_size;
114 if (INT_MULTIPLY_WRAPV (size, MAX_MULTIBYTE_LENGTH, &o_size))
115 o_size = PTRDIFF_MAX;
116 unsigned char *dst = SAFE_ALLOCA (o_size);
117 unsigned char *o = dst;
118
119 for (i = i_byte = 0; i < size; i++, i_byte += len)
120 { 151 {
121 if (o_size - MAX_MULTIBYTE_LENGTH < o - dst) 152 prop = CHAR_TABLE_REF (ctx->titlecase_char_table, ch);
122 string_overflow (); 153 if (CHARACTERP (prop))
123 c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, len); 154 {
124 if (inword && flag != CASE_CAPITALIZE_UP) 155 cased = XFASTINT (prop);
125 c = downcase (c); 156 cased_is_set = true;
126 else if (!inword || flag != CASE_CAPITALIZE_UP) 157 }
127 c = upcase (c);
128 if ((int) flag >= (int) CASE_CAPITALIZE)
129 inword = (SYNTAX (c) == Sword);
130 o += CHAR_STRING (c, o);
131 } 158 }
132 eassert (o - dst <= o_size); 159 if (!cased_is_set)
133 obj = make_multibyte_string ((char *) dst, size, o - dst); 160 cased = upcase (ch);
134 SAFE_FREE ();
135 return obj;
136 } 161 }
162
163 /* And we’re done. */
164 done:
165 if (!buf)
166 return cased;
167 buf->len_chars = 1;
168 buf->len_bytes = CHAR_STRING (cased, buf->data);
169 return cased != ch;
170}
171
172/* In Greek, lower case sigma has two forms: one when used in the middle and one
173 when used at the end of a word. Below is to help handle those cases when
174 casing.
175
176 The rule does not conflict with any other casing rules so while it is
177 a conditional one, it is independent of language. */
178
179enum { GREEK_CAPITAL_LETTER_SIGMA = 0x03A3 }; /* Σ */
180enum { GREEK_SMALL_LETTER_FINAL_SIGMA = 0x03C2 }; /* ς */
181
182/* Based on CTX, case character CH accordingly. Update CTX as necessary.
183 Return cased character.
184
185 Special casing rules (such as upcase(fi) = FI) are not handled. For
186 characters whose casing results in multiple code points, the character is
187 returned unchanged. */
188static inline int
189case_single_character (struct casing_context *ctx, int ch)
190{
191 return case_character_impl (NULL, ctx, ch);
192}
193
194/* Save in BUF result of casing character CH. Return whether casing changed the
195 character.
196
197 If not-NULL, NEXT points to the next character in the cased string. If NULL,
198 it is assumed current character is the last one being cased. This is used to
199 apply some rules which depend on proceeding state.
200
201 This is like case_single_character but also handles one-to-many casing
202 rules. */
203static bool
204case_character (struct casing_str_buf *buf, struct casing_context *ctx,
205 int ch, const unsigned char *next)
206{
207 bool was_inword = ctx->inword;
208 bool changed = case_character_impl (buf, ctx, ch);
209
210 /* If we have just down-cased a capital sigma and the next character no longer
211 has a word syntax (i.e. current character is end of word), use final
212 sigma. */
213 if (was_inword && ch == GREEK_CAPITAL_LETTER_SIGMA && changed
214 && (!next || SYNTAX (STRING_CHAR (next)) != Sword))
215 {
216 buf->len_bytes = CHAR_STRING (GREEK_SMALL_LETTER_FINAL_SIGMA, buf->data);
217 buf->len_chars = 1;
218 }
219
220 return changed;
221}
222
223static Lisp_Object
224do_casify_natnum (struct casing_context *ctx, Lisp_Object obj)
225{
226 int flagbits = (CHAR_ALT | CHAR_SUPER | CHAR_HYPER
227 | CHAR_SHIFT | CHAR_CTL | CHAR_META);
228 int ch = XFASTINT (obj);
229
230 /* If the character has higher bits set above the flags, return it unchanged.
231 It is not a real character. */
232 if (UNSIGNED_CMP (ch, >, flagbits))
233 return obj;
234
235 int flags = ch & flagbits;
236 ch = ch & ~flagbits;
237
238 /* FIXME: Even if enable-multibyte-characters is nil, we may manipulate
239 multibyte chars. This means we have a bug for latin-1 chars since when we
240 receive an int 128-255 we can't tell whether it's an eight-bit byte or
241 a latin-1 char. */
242 bool multibyte = (ch >= 256
243 || !NILP (BVAR (current_buffer,
244 enable_multibyte_characters)));
245 if (! multibyte)
246 MAKE_CHAR_MULTIBYTE (ch);
247 int cased = case_single_character (ctx, ch);
248 if (cased == ch)
249 return obj;
250
251 if (! multibyte)
252 MAKE_CHAR_UNIBYTE (cased);
253 return make_natnum (cased | flags);
254}
255
256static Lisp_Object
257do_casify_multibyte_string (struct casing_context *ctx, Lisp_Object obj)
258{
259 /* Verify that ‘data’ is the first member of struct casing_str_buf
260 so that when casting char * to struct casing_str_buf *, the
261 representation of the character is at the beginning of the
262 buffer. This is why we don’t need a separate struct
263 casing_str_buf object, and can write directly to the destination. */
264 verify (offsetof (struct casing_str_buf, data) == 0);
265
266 ptrdiff_t size = SCHARS (obj), n;
267 USE_SAFE_ALLOCA;
268 if (INT_MULTIPLY_WRAPV (size, MAX_MULTIBYTE_LENGTH, &n)
269 || INT_ADD_WRAPV (n, sizeof (struct casing_str_buf), &n))
270 n = PTRDIFF_MAX;
271 unsigned char *dst = SAFE_ALLOCA (n);
272 unsigned char *dst_end = dst + n;
273 unsigned char *o = dst;
274
275 const unsigned char *src = SDATA (obj);
276
277 for (n = 0; size; --size)
278 {
279 if (dst_end - o < sizeof (struct casing_str_buf))
280 string_overflow ();
281 int ch = STRING_CHAR_ADVANCE (src);
282 case_character ((struct casing_str_buf *) o, ctx, ch,
283 size > 1 ? src : NULL);
284 n += ((struct casing_str_buf *) o)->len_chars;
285 o += ((struct casing_str_buf *) o)->len_bytes;
286 }
287 eassert (o <= dst_end);
288 obj = make_multibyte_string ((char *) dst, n, o - dst);
289 SAFE_FREE ();
290 return obj;
291}
292
293static Lisp_Object
294do_casify_unibyte_string (struct casing_context *ctx, Lisp_Object obj)
295{
296 ptrdiff_t i, size = SCHARS (obj);
297 int ch, cased;
298
299 obj = Fcopy_sequence (obj);
300 for (i = 0; i < size; i++)
301 {
302 ch = SREF (obj, i);
303 MAKE_CHAR_MULTIBYTE (ch);
304 cased = case_single_character (ctx, ch);
305 if (ch == cased)
306 continue;
307 MAKE_CHAR_UNIBYTE (cased);
308 /* If the char can't be converted to a valid byte, just don't
309 change it. */
310 if (cased >= 0 && cased < 256)
311 SSET (obj, i, cased);
312 }
313 return obj;
314}
315
316static Lisp_Object
317casify_object (enum case_action flag, Lisp_Object obj)
318{
319 struct casing_context ctx;
320 prepare_casing_context (&ctx, flag, false);
321
322 if (NATNUMP (obj))
323 return do_casify_natnum (&ctx, obj);
324 else if (!STRINGP (obj))
325 wrong_type_argument (Qchar_or_string_p, obj);
326 else if (!SCHARS (obj))
327 return obj;
328 else if (STRING_MULTIBYTE (obj))
329 return do_casify_multibyte_string (&ctx, obj);
330 else
331 return do_casify_unibyte_string (&ctx, obj);
137} 332}
138 333
139DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0, 334DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0,
140 doc: /* Convert argument to upper case and return that. 335 doc: /* Convert argument to upper case and return that.
141The argument may be a character or string. The result has the same type. 336The argument may be a character or string. The result has the same type.
142The argument object is not altered--the value is a copy. 337The argument object is not altered--the value is a copy. If argument
338is a character, characters which map to multiple code points when
339cased, e.g. fi, are returned unchanged.
143See also `capitalize', `downcase' and `upcase-initials'. */) 340See also `capitalize', `downcase' and `upcase-initials'. */)
144 (Lisp_Object obj) 341 (Lisp_Object obj)
145{ 342{
@@ -157,10 +354,12 @@ The argument object is not altered--the value is a copy. */)
157 354
158DEFUN ("capitalize", Fcapitalize, Scapitalize, 1, 1, 0, 355DEFUN ("capitalize", Fcapitalize, Scapitalize, 1, 1, 0,
159 doc: /* Convert argument to capitalized form and return that. 356 doc: /* Convert argument to capitalized form and return that.
160This means that each word's first character is upper case 357This means that each word's first character is converted to either
161and the rest is lower case. 358title case or upper case, and the rest to lower case.
162The argument may be a character or string. The result has the same type. 359The argument may be a character or string. The result has the same type.
163The argument object is not altered--the value is a copy. */) 360The argument object is not altered--the value is a copy. If argument
361is a character, characters which map to multiple code points when
362cased, e.g. fi, are returned unchanged. */)
164 (Lisp_Object obj) 363 (Lisp_Object obj)
165{ 364{
166 return casify_object (CASE_CAPITALIZE, obj); 365 return casify_object (CASE_CAPITALIZE, obj);
@@ -170,122 +369,151 @@ The argument object is not altered--the value is a copy. */)
170 369
171DEFUN ("upcase-initials", Fupcase_initials, Supcase_initials, 1, 1, 0, 370DEFUN ("upcase-initials", Fupcase_initials, Supcase_initials, 1, 1, 0,
172 doc: /* Convert the initial of each word in the argument to upper case. 371 doc: /* Convert the initial of each word in the argument to upper case.
173Do not change the other letters of each word. 372This means that each word's first character is converted to either
373title case or upper case, and the rest are left unchanged.
174The argument may be a character or string. The result has the same type. 374The argument may be a character or string. The result has the same type.
175The argument object is not altered--the value is a copy. */) 375The argument object is not altered--the value is a copy. If argument
376is a character, characters which map to multiple code points when
377cased, e.g. fi, are returned unchanged. */)
176 (Lisp_Object obj) 378 (Lisp_Object obj)
177{ 379{
178 return casify_object (CASE_CAPITALIZE_UP, obj); 380 return casify_object (CASE_CAPITALIZE_UP, obj);
179} 381}
180 382
181/* flag is CASE_UP, CASE_DOWN or CASE_CAPITALIZE or CASE_CAPITALIZE_UP. 383/* Based on CTX, case region in a unibyte buffer from *STARTP to *ENDP.
182 b and e specify range of buffer to operate on. */
183 384
184static void 385 Save first and last positions that has changed in *STARTP and *ENDP
185casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e) 386 respectively. If no characters were changed, save -1 to *STARTP and leave
387 *ENDP unspecified.
388
389 Always return 0. This is so that interface of this function is the same as
390 do_casify_multibyte_region. */
391static ptrdiff_t
392do_casify_unibyte_region (struct casing_context *ctx,
393 ptrdiff_t *startp, ptrdiff_t *endp)
186{ 394{
187 int c; 395 ptrdiff_t first = -1, last = -1; /* Position of first and last changes. */
188 bool inword = flag == CASE_DOWN; 396 ptrdiff_t end = *endp;
189 bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
190 ptrdiff_t start, end;
191 ptrdiff_t start_byte;
192 397
193 /* Position of first and last changes. */ 398 for (ptrdiff_t pos = *startp; pos < end; ++pos)
194 ptrdiff_t first = -1, last; 399 {
400 int ch = FETCH_BYTE (pos);
401 MAKE_CHAR_MULTIBYTE (ch);
195 402
196 ptrdiff_t opoint = PT; 403 int cased = case_single_character (ctx, ch);
197 ptrdiff_t opoint_byte = PT_BYTE; 404 if (cased == ch)
405 continue;
198 406
199 if (EQ (b, e)) 407 last = pos + 1;
200 /* Not modifying because nothing marked */ 408 if (first < 0)
201 return; 409 first = pos;
202 410
203 /* If the case table is flagged as modified, rescan it. */ 411 MAKE_CHAR_UNIBYTE (cased);
204 if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1])) 412 FETCH_BYTE (pos) = cased;
205 Fset_case_table (BVAR (current_buffer, downcase_table)); 413 }
206 414
207 validate_region (&b, &e); 415 *startp = first;
208 start = XFASTINT (b); 416 *endp = last;
209 end = XFASTINT (e); 417 return 0;
210 modify_text (start, end); 418}
211 record_change (start, end - start);
212 start_byte = CHAR_TO_BYTE (start);
213 419
214 SETUP_BUFFER_SYNTAX_TABLE (); /* For syntax_prefix_flag_p. */ 420/* Based on CTX, case region in a multibyte buffer from *STARTP to *ENDP.
215 421
216 while (start < end) 422 Return number of added characters (may be negative if more characters were
217 { 423 deleted then inserted), save first and last positions that has changed in
218 int c2, len; 424 *STARTP and *ENDP respectively. If no characters were changed, return 0,
425 save -1 to *STARTP and leave *ENDP unspecified. */
426static ptrdiff_t
427do_casify_multibyte_region (struct casing_context *ctx,
428 ptrdiff_t *startp, ptrdiff_t *endp)
429{
430 ptrdiff_t first = -1, last = -1; /* Position of first and last changes. */
431 ptrdiff_t pos = *startp, pos_byte = CHAR_TO_BYTE (pos), size = *endp - pos;
432 ptrdiff_t opoint = PT, added = 0;
219 433
220 if (multibyte) 434 for (; size; --size)
435 {
436 int len;
437 int ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (pos_byte), len);
438 struct casing_str_buf buf;
439 if (!case_character (&buf, ctx, ch,
440 size > 1 ? BYTE_POS_ADDR (pos_byte + len) : NULL))
221 { 441 {
222 c = FETCH_MULTIBYTE_CHAR (start_byte); 442 pos_byte += len;
223 len = CHAR_BYTES (c); 443 ++pos;
444 continue;
224 } 445 }
446
447 last = pos + buf.len_chars;
448 if (first < 0)
449 first = pos;
450
451 if (buf.len_chars == 1 && buf.len_bytes == len)
452 memcpy (BYTE_POS_ADDR (pos_byte), buf.data, len);
225 else 453 else
226 { 454 {
227 c = FETCH_BYTE (start_byte); 455 /* Replace one character with the other(s), keeping text
228 MAKE_CHAR_MULTIBYTE (c); 456 properties the same. */
229 len = 1; 457 replace_range_2 (pos, pos_byte, pos + 1, pos_byte + len,
458 (const char *) buf.data, buf.len_chars,
459 buf.len_bytes,
460 0);
461 added += (ptrdiff_t) buf.len_chars - 1;
462 if (opoint > pos)
463 opoint += (ptrdiff_t) buf.len_chars - 1;
230 } 464 }
231 c2 = c;
232 if (inword && flag != CASE_CAPITALIZE_UP)
233 c = downcase (c);
234 else if (!inword || flag != CASE_CAPITALIZE_UP)
235 c = upcase (c);
236 if ((int) flag >= (int) CASE_CAPITALIZE)
237 inword = ((SYNTAX (c) == Sword)
238 && (inword || !syntax_prefix_flag_p (c)));
239 if (c != c2)
240 {
241 last = start;
242 if (first < 0)
243 first = start;
244 465
245 if (! multibyte) 466 pos_byte += buf.len_bytes;
246 { 467 pos += buf.len_chars;
247 MAKE_CHAR_UNIBYTE (c);
248 FETCH_BYTE (start_byte) = c;
249 }
250 else if (ASCII_CHAR_P (c2) && ASCII_CHAR_P (c))
251 FETCH_BYTE (start_byte) = c;
252 else
253 {
254 int tolen = CHAR_BYTES (c);
255 int j;
256 unsigned char str[MAX_MULTIBYTE_LENGTH];
257
258 CHAR_STRING (c, str);
259 if (len == tolen)
260 {
261 /* Length is unchanged. */
262 for (j = 0; j < len; ++j)
263 FETCH_BYTE (start_byte + j) = str[j];
264 }
265 else
266 {
267 /* Replace one character with the other,
268 keeping text properties the same. */
269 replace_range_2 (start, start_byte,
270 start + 1, start_byte + len,
271 (char *) str, 1, tolen,
272 0);
273 len = tolen;
274 }
275 }
276 }
277 start++;
278 start_byte += len;
279 } 468 }
280 469
281 if (PT != opoint) 470 if (PT != opoint)
282 TEMP_SET_PT_BOTH (opoint, opoint_byte); 471 TEMP_SET_PT_BOTH (opoint, CHAR_TO_BYTE (opoint));
283 472
284 if (first >= 0) 473 *startp = first;
474 *endp = last;
475 return added;
476}
477
478/* flag is CASE_UP, CASE_DOWN or CASE_CAPITALIZE or CASE_CAPITALIZE_UP. b and
479 e specify range of buffer to operate on. Return character position of the
480 end of the region after changes. */
481static ptrdiff_t
482casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
483{
484 ptrdiff_t added;
485 struct casing_context ctx;
486
487 validate_region (&b, &e);
488 ptrdiff_t start = XFASTINT (b);
489 ptrdiff_t end = XFASTINT (e);
490 if (start == end)
491 /* Not modifying because nothing marked. */
492 return end;
493 modify_text (start, end);
494 prepare_casing_context (&ctx, flag, true);
495
496 ptrdiff_t orig_end = end;
497 record_delete (start, make_buffer_string (start, end, true), false);
498 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
285 { 499 {
286 signal_after_change (first, last + 1 - first, last + 1 - first); 500 record_insert (start, end - start);
287 update_compositions (first, last + 1, CHECK_ALL); 501 added = do_casify_unibyte_region (&ctx, &start, &end);
288 } 502 }
503 else
504 {
505 ptrdiff_t len = end - start, ostart = start;
506 added = do_casify_multibyte_region (&ctx, &start, &end);
507 record_insert (ostart, len + added);
508 }
509
510 if (start >= 0)
511 {
512 signal_after_change (start, end - start - added, end - start);
513 update_compositions (start, end, CHECK_ALL);
514 }
515
516 return orig_end + added;
289} 517}
290 518
291DEFUN ("upcase-region", Fupcase_region, Supcase_region, 2, 3, 519DEFUN ("upcase-region", Fupcase_region, Supcase_region, 2, 3,
@@ -345,8 +573,8 @@ point and the mark is operated on. */)
345 573
346DEFUN ("capitalize-region", Fcapitalize_region, Scapitalize_region, 2, 2, "r", 574DEFUN ("capitalize-region", Fcapitalize_region, Scapitalize_region, 2, 2, "r",
347 doc: /* Convert the region to capitalized form. 575 doc: /* Convert the region to capitalized form.
348Capitalized form means each word's first character is upper case 576This means that each word's first character is converted to either
349and the rest of it is lower case. 577title case or upper case, and the rest to lower case.
350In programs, give two arguments, the starting and ending 578In programs, give two arguments, the starting and ending
351character positions to operate on. */) 579character positions to operate on. */)
352 (Lisp_Object beg, Lisp_Object end) 580 (Lisp_Object beg, Lisp_Object end)
@@ -360,7 +588,8 @@ character positions to operate on. */)
360DEFUN ("upcase-initials-region", Fupcase_initials_region, 588DEFUN ("upcase-initials-region", Fupcase_initials_region,
361 Supcase_initials_region, 2, 2, "r", 589 Supcase_initials_region, 2, 2, "r",
362 doc: /* Upcase the initial of each word in the region. 590 doc: /* Upcase the initial of each word in the region.
363Subsequent letters of each word are not changed. 591This means that each word's first character is converted to either
592title case or upper case, and the rest are left unchanged.
364In programs, give two arguments, the starting and ending 593In programs, give two arguments, the starting and ending
365character positions to operate on. */) 594character positions to operate on. */)
366 (Lisp_Object beg, Lisp_Object end) 595 (Lisp_Object beg, Lisp_Object end)
@@ -376,9 +605,7 @@ casify_word (enum case_action flag, Lisp_Object arg)
376 ptrdiff_t farend = scan_words (PT, XINT (arg)); 605 ptrdiff_t farend = scan_words (PT, XINT (arg));
377 if (!farend) 606 if (!farend)
378 farend = XINT (arg) <= 0 ? BEGV : ZV; 607 farend = XINT (arg) <= 0 ? BEGV : ZV;
379 ptrdiff_t newpoint = max (PT, farend); 608 SET_PT (casify_region (flag, make_number (PT), make_number (farend)));
380 casify_region (flag, make_number (PT), make_number (farend));
381 SET_PT (newpoint);
382 return Qnil; 609 return Qnil;
383} 610}
384 611
@@ -426,6 +653,11 @@ void
426syms_of_casefiddle (void) 653syms_of_casefiddle (void)
427{ 654{
428 DEFSYM (Qidentity, "identity"); 655 DEFSYM (Qidentity, "identity");
656 DEFSYM (Qtitlecase, "titlecase");
657 DEFSYM (Qspecial_uppercase, "special-uppercase");
658 DEFSYM (Qspecial_lowercase, "special-lowercase");
659 DEFSYM (Qspecial_titlecase, "special-titlecase");
660
429 defsubr (&Supcase); 661 defsubr (&Supcase);
430 defsubr (&Sdowncase); 662 defsubr (&Sdowncase);
431 defsubr (&Scapitalize); 663 defsubr (&Scapitalize);