aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMichal Nazarewicz2016-09-19 00:52:47 +0200
committerMichal Nazarewicz2017-04-06 20:54:58 +0200
commitc1fa07222e9c76964d1261c31b50f1e399554fa2 (patch)
treed1d48a5fd44b2d2abd049a70d17984a76c022c4f /src
parentb3b9b258c4026baa1cad3f2e617f1a637fc8d205 (diff)
downloademacs-c1fa07222e9c76964d1261c31b50f1e399554fa2.tar.gz
emacs-c1fa07222e9c76964d1261c31b50f1e399554fa2.zip
Implement special sigma casing rule (bug#24603)
In Greek, a sigma character has two lower case forms which depend on their position in the word. Implement logic determining it. * src/casefiddle.c (struct casing_context, case_character_impl): Don’t assume inword is true when flag is CASE_UP and false when flag is CASE_DOWN. For final sigma detection we need this information tracked reliably;. (CAPITAL_SIGMA, SMALL_SIGMA, SMALL_FINAL_SIGMA): New macros defining Unicode code point of different forms of sigma letter. (case_character): Implement support for final sigma casing. (do_casify_multibyte_string, do_casify_multibyte_region): Update after changes to case_character. * test/src/casefiddle-tests.el (casefiddle-tests-casing): Add test cases for final sigma.
Diffstat (limited to 'src')
-rw-r--r--src/casefiddle.c73
1 files changed, 55 insertions, 18 deletions
diff --git a/src/casefiddle.c b/src/casefiddle.c
index 10674d963ec..6fe584b8302 100644
--- a/src/casefiddle.c
+++ b/src/casefiddle.c
@@ -46,9 +46,7 @@ struct casing_context {
46 When run on a buffer, syntax_prefix_flag_p is taken into account when 46 When run on a buffer, syntax_prefix_flag_p is taken into account when
47 determined inword flag. */ 47 determined inword flag. */
48 bool inbuffer; 48 bool inbuffer;
49 /* Conceptually, this denotes whether we are inside of a word except 49 /* Whether we are inside of a word. */
50 that if flag is CASE_UP it’s always false and if flag is CASE_DOWN
51 this is always true. */
52 bool inword; 50 bool inword;
53}; 51};
54 52
@@ -59,7 +57,7 @@ prepare_casing_context (struct casing_context *ctx,
59{ 57{
60 ctx->flag = flag; 58 ctx->flag = flag;
61 ctx->inbuffer = inbuffer; 59 ctx->inbuffer = inbuffer;
62 ctx->inword = flag == CASE_DOWN; 60 ctx->inword = false;
63 ctx->titlecase_char_table = (int)flag < (int)CASE_CAPITALIZE ? Qnil : 61 ctx->titlecase_char_table = (int)flag < (int)CASE_CAPITALIZE ? Qnil :
64 uniprop_table (intern_c_string ("titlecase")); 62 uniprop_table (intern_c_string ("titlecase"));
65 ctx->specialcase_char_tables[CASE_UP] = flag == CASE_DOWN ? Qnil : 63 ctx->specialcase_char_tables[CASE_UP] = flag == CASE_DOWN ? Qnil :
@@ -101,15 +99,16 @@ case_character_impl (struct casing_str_buf *buf,
101 99
102 /* Update inword state */ 100 /* Update inword state */
103 was_inword = ctx->inword; 101 was_inword = ctx->inword;
104 if ((int) ctx->flag >= (int) CASE_CAPITALIZE) 102 ctx->inword = SYNTAX (ch) == Sword &&
105 ctx->inword = SYNTAX (ch) == Sword && 103 (!ctx->inbuffer || was_inword || !syntax_prefix_flag_p (ch));
106 (!ctx->inbuffer || was_inword || !syntax_prefix_flag_p (ch));
107 104
108 /* Normalise flag so its one of CASE_UP, CASE_DOWN or CASE_CAPITALIZE. */ 105 /* Normalise flag so its one of CASE_UP, CASE_DOWN or CASE_CAPITALIZE. */
109 if (!was_inword) 106 if (ctx->flag == CASE_CAPITALIZE)
110 flag = ctx->flag == CASE_UP ? CASE_UP : CASE_CAPITALIZE; 107 flag = (enum case_action)((int)ctx->flag - was_inword);
111 else if (ctx->flag != CASE_CAPITALIZE_UP) 108 else if (ctx->flag != CASE_CAPITALIZE_UP)
112 flag = CASE_DOWN; 109 flag = ctx->flag;
110 else if (!was_inword)
111 flag = CASE_CAPITALIZE;
113 else 112 else
114 { 113 {
115 cased = ch; 114 cased = ch;
@@ -150,7 +149,18 @@ case_character_impl (struct casing_str_buf *buf,
150 buf->len_bytes = CHAR_STRING (cased, buf->data); 149 buf->len_bytes = CHAR_STRING (cased, buf->data);
151 return cased != ch; 150 return cased != ch;
152} 151}
152
153/* In Greek, lower case sigma has two forms: one when used in the middle and one
154 when used at the end of a word. Below is to help handle those cases when
155 casing.
156
157 The rule does not conflict with any other casing rules so while it is
158 a conditional one, it is independent on language. */
153 159
160#define CAPITAL_SIGMA 0x03A3
161#define SMALL_SIGMA 0x03C3
162#define SMALL_FINAL_SIGMA 0x03C2
163
154/* Based on CTX, case character CH accordingly. Update CTX as necessary. 164/* Based on CTX, case character CH accordingly. Update CTX as necessary.
155 Return cased character. 165 Return cased character.
156 166
@@ -164,12 +174,34 @@ case_single_character (struct casing_context *ctx, int ch)
164} 174}
165 175
166/* Save in BUF result of casing character CH. Return whether casing changed the 176/* Save in BUF result of casing character CH. Return whether casing changed the
167 character. This is like case_single_character but also handles one-to-many 177 character.
168 casing rules. */ 178
169static inline bool 179 If not-NULL, NEXT points to the next character in the cased string. If NULL,
170case_character (struct casing_str_buf *buf, struct casing_context *ctx, int ch) 180 it is assumed current character is the last one being cased. This is used to
181 apply some rules which depend on proceeding state.
182
183 This is like case_single_character but also handles one-to-many casing
184 rules. */
185static bool
186case_character (struct casing_str_buf *buf, struct casing_context *ctx,
187 int ch, const unsigned char *next)
171{ 188{
172 return case_character_impl (buf, ctx, ch); 189 bool changed, was_inword;
190
191 was_inword = ctx->inword;
192 changed = case_character_impl (buf, ctx, ch);
193
194 /* If we have just down-cased a capital sigma and the next character no longer
195 has a word syntax (i.e. current character is end of word), use final
196 sigma. */
197 if (was_inword && ch == CAPITAL_SIGMA && changed &&
198 (!next || SYNTAX (STRING_CHAR (next)) != Sword))
199 {
200 buf->len_bytes = CHAR_STRING (SMALL_FINAL_SIGMA, buf->data);
201 buf->len_chars = 1;
202 }
203
204 return changed;
173} 205}
174 206
175static Lisp_Object 207static Lisp_Object
@@ -231,7 +263,7 @@ do_casify_multibyte_string (struct casing_context *ctx, Lisp_Object obj)
231 if (dst_end - o < sizeof(struct casing_str_buf)) 263 if (dst_end - o < sizeof(struct casing_str_buf))
232 string_overflow (); 264 string_overflow ();
233 ch = STRING_CHAR_ADVANCE (src); 265 ch = STRING_CHAR_ADVANCE (src);
234 case_character ((void *)o, ctx, ch); 266 case_character ((void *)o, ctx, ch, size > 1 ? src : NULL);
235 n += ((struct casing_str_buf *)o)->len_chars; 267 n += ((struct casing_str_buf *)o)->len_chars;
236 o += ((struct casing_str_buf *)o)->len_bytes; 268 o += ((struct casing_str_buf *)o)->len_bytes;
237 } 269 }
@@ -382,12 +414,17 @@ do_casify_multibyte_region (struct casing_context *ctx,
382 ptrdiff_t pos = *startp, pos_byte = CHAR_TO_BYTE (pos), size = *endp - pos; 414 ptrdiff_t pos = *startp, pos_byte = CHAR_TO_BYTE (pos), size = *endp - pos;
383 ptrdiff_t opoint = PT, added = 0; 415 ptrdiff_t opoint = PT, added = 0;
384 struct casing_str_buf buf; 416 struct casing_str_buf buf;
385 int ch, cased, len; 417 bool changed;
418 int ch, len;
386 419
387 for (; size; --size) 420 for (; size; --size)
388 { 421 {
389 ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (pos_byte), len); 422 ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (pos_byte), len);
390 if (!case_character (&buf, ctx, ch)) 423 changed = case_character (
424 &buf, ctx, ch,
425 size > 1 ? BYTE_POS_ADDR (pos_byte + len) : NULL);
426
427 if (!changed)
391 { 428 {
392 pos_byte += len; 429 pos_byte += len;
393 ++pos; 430 ++pos;