aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorSpencer Baugh2023-10-21 11:09:39 -0400
committerEli Zaretskii2023-10-29 13:32:43 +0200
commit5c8fc0b0594b1e3af43d86c0bc96e10d03bc75a2 (patch)
tree37e56deacbaaa4492eff5c0211ec362dbfc1a409 /src
parent3dca52dd422c50ebf24a304e7c3d36cf5f1c55cf (diff)
downloademacs-5c8fc0b0594b1e3af43d86c0bc96e10d03bc75a2.tar.gz
emacs-5c8fc0b0594b1e3af43d86c0bc96e10d03bc75a2.zip
Add 'case-symbols-as-words' to configure symbol case behavior
In some programming languages and styles, a symbol (or every symbol in a sequence of symbols) might be capitalized, but the individual words making up the symbol should never be capitalized. For example, in OCaml, type names Look_like_this and variable names look_like_this, but it is basically never correct for something to Look_Like_This. And one might have "aa_bb cc_dd ee_ff" or "Aa_bb Cc_dd Ee_ff", but never "Aa_Bb Cc_Dd Ee_Ff". To support this, the new variable 'case-symbols-as-words' causes symbol constituents to be treated as part of words only for case operations. * src/casefiddle.c (case_ch_is_word): New function. (case_character_impl, case_character): Use 'case_ch_is_word'. (syms_of_casefiddle): Define 'case-symbols-as-words'. * src/search.c (Freplace_match): Use 'case-symbols-as-words' when calculating case pattern. * test/src/casefiddle-tests.el (casefiddle-tests--check-syms) (casefiddle-case-symbols-as-words): Test 'case-symbols-as-words'. * etc/NEWS: Announce 'case-symbols-as-words'. * doc/lispref/strings.texi (Case Conversion): Document 'case-symbols-as-words'. (Bug#66614)
Diffstat (limited to 'src')
-rw-r--r--src/casefiddle.c25
-rw-r--r--src/search.c11
2 files changed, 30 insertions, 6 deletions
diff --git a/src/casefiddle.c b/src/casefiddle.c
index d567a5e353a..3afb131c50e 100644
--- a/src/casefiddle.c
+++ b/src/casefiddle.c
@@ -92,6 +92,12 @@ prepare_casing_context (struct casing_context *ctx,
92 SETUP_BUFFER_SYNTAX_TABLE (); /* For syntax_prefix_flag_p. */ 92 SETUP_BUFFER_SYNTAX_TABLE (); /* For syntax_prefix_flag_p. */
93} 93}
94 94
95static bool
96case_ch_is_word (enum syntaxcode syntax)
97{
98 return syntax == Sword || (case_symbols_as_words && syntax == Ssymbol);
99}
100
95struct casing_str_buf 101struct casing_str_buf
96{ 102{
97 unsigned char data[max (6, MAX_MULTIBYTE_LENGTH)]; 103 unsigned char data[max (6, MAX_MULTIBYTE_LENGTH)];
@@ -115,7 +121,7 @@ case_character_impl (struct casing_str_buf *buf,
115 121
116 /* Update inword state */ 122 /* Update inword state */
117 bool was_inword = ctx->inword; 123 bool was_inword = ctx->inword;
118 ctx->inword = SYNTAX (ch) == Sword && 124 ctx->inword = case_ch_is_word (SYNTAX (ch)) &&
119 (!ctx->inbuffer || was_inword || !syntax_prefix_flag_p (ch)); 125 (!ctx->inbuffer || was_inword || !syntax_prefix_flag_p (ch));
120 126
121 /* Normalize flag so its one of CASE_UP, CASE_DOWN or CASE_CAPITALIZE. */ 127 /* Normalize flag so its one of CASE_UP, CASE_DOWN or CASE_CAPITALIZE. */
@@ -222,7 +228,7 @@ case_character (struct casing_str_buf *buf, struct casing_context *ctx,
222 has a word syntax (i.e. current character is end of word), use final 228 has a word syntax (i.e. current character is end of word), use final
223 sigma. */ 229 sigma. */
224 if (was_inword && ch == GREEK_CAPITAL_LETTER_SIGMA && changed 230 if (was_inword && ch == GREEK_CAPITAL_LETTER_SIGMA && changed
225 && (!next || SYNTAX (STRING_CHAR (next)) != Sword)) 231 && (!next || !case_ch_is_word (SYNTAX (STRING_CHAR (next)))))
226 { 232 {
227 buf->len_bytes = CHAR_STRING (GREEK_SMALL_LETTER_FINAL_SIGMA, buf->data); 233 buf->len_bytes = CHAR_STRING (GREEK_SMALL_LETTER_FINAL_SIGMA, buf->data);
228 buf->len_chars = 1; 234 buf->len_chars = 1;
@@ -720,6 +726,21 @@ Called with one argument METHOD which can be:
720 3rd argument. */); 726 3rd argument. */);
721 Vregion_extract_function = Qnil; /* simple.el sets this. */ 727 Vregion_extract_function = Qnil; /* simple.el sets this. */
722 728
729 DEFVAR_BOOL ("case-symbols-as-words", case_symbols_as_words,
730 doc: /* If non-nil, case functions treat symbol syntax as part of words.
731
732Functions such as `upcase-initials' and `replace-match' check or modify
733the case pattern of sequences of characters. Normally, these operate on
734sequences of characters whose syntax is word constituent. If this
735variable is non-nil, then they operate on sequences of characters whose
736syntax is either word constituent or symbol constituent.
737
738This is useful for programming languages and styles where only the first
739letter of a symbol's name is ever capitalized.*/);
740 case_symbols_as_words = 0;
741 DEFSYM (Qcase_symbols_as_words, "case-symbols-as-words");
742 Fmake_variable_buffer_local (Qcase_symbols_as_words);
743
723 defsubr (&Supcase); 744 defsubr (&Supcase);
724 defsubr (&Sdowncase); 745 defsubr (&Sdowncase);
725 defsubr (&Scapitalize); 746 defsubr (&Scapitalize);
diff --git a/src/search.c b/src/search.c
index e9b29bb7179..692d8488049 100644
--- a/src/search.c
+++ b/src/search.c
@@ -2365,7 +2365,7 @@ text has only capital letters and has at least one multiletter word,
2365convert NEWTEXT to all caps. Otherwise if all words are capitalized 2365convert NEWTEXT to all caps. Otherwise if all words are capitalized
2366in the replaced text, capitalize each word in NEWTEXT. Note that 2366in the replaced text, capitalize each word in NEWTEXT. Note that
2367what exactly is a word is determined by the syntax tables in effect 2367what exactly is a word is determined by the syntax tables in effect
2368in the current buffer. 2368in the current buffer, and the variable `case-symbols-as-words'.
2369 2369
2370If optional third arg LITERAL is non-nil, insert NEWTEXT literally. 2370If optional third arg LITERAL is non-nil, insert NEWTEXT literally.
2371Otherwise treat `\\' as special: 2371Otherwise treat `\\' as special:
@@ -2479,7 +2479,8 @@ since only regular expressions have distinguished subexpressions. */)
2479 /* Cannot be all caps if any original char is lower case */ 2479 /* Cannot be all caps if any original char is lower case */
2480 2480
2481 some_lowercase = 1; 2481 some_lowercase = 1;
2482 if (SYNTAX (prevc) != Sword) 2482 if (SYNTAX (prevc) != Sword
2483 && !(case_symbols_as_words && SYNTAX (prevc) == Ssymbol))
2483 some_nonuppercase_initial = 1; 2484 some_nonuppercase_initial = 1;
2484 else 2485 else
2485 some_multiletter_word = 1; 2486 some_multiletter_word = 1;
@@ -2487,7 +2488,8 @@ since only regular expressions have distinguished subexpressions. */)
2487 else if (uppercasep (c)) 2488 else if (uppercasep (c))
2488 { 2489 {
2489 some_uppercase = 1; 2490 some_uppercase = 1;
2490 if (SYNTAX (prevc) != Sword) 2491 if (SYNTAX (prevc) != Sword
2492 && !(case_symbols_as_words && SYNTAX (prevc) == Ssymbol))
2491 ; 2493 ;
2492 else 2494 else
2493 some_multiletter_word = 1; 2495 some_multiletter_word = 1;
@@ -2496,7 +2498,8 @@ since only regular expressions have distinguished subexpressions. */)
2496 { 2498 {
2497 /* If the initial is a caseless word constituent, 2499 /* If the initial is a caseless word constituent,
2498 treat that like a lowercase initial. */ 2500 treat that like a lowercase initial. */
2499 if (SYNTAX (prevc) != Sword) 2501 if (SYNTAX (prevc) != Sword
2502 && !(case_symbols_as_words && SYNTAX (prevc) == Ssymbol))
2500 some_nonuppercase_initial = 1; 2503 some_nonuppercase_initial = 1;
2501 } 2504 }
2502 2505