aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSpencer Baugh2023-10-21 11:09:39 -0400
committerEli Zaretskii2023-10-29 13:32:43 +0200
commit5c8fc0b0594b1e3af43d86c0bc96e10d03bc75a2 (patch)
tree37e56deacbaaa4492eff5c0211ec362dbfc1a409
parent3dca52dd422c50ebf24a304e7c3d36cf5f1c55cf (diff)
downloademacs-5c8fc0b0594b1e3af43d86c0bc96e10d03bc75a2.tar.gz
emacs-5c8fc0b0594b1e3af43d86c0bc96e10d03bc75a2.zip
Add 'case-symbols-as-words' to configure symbol case behavior
In some programming languages and styles, a symbol (or every symbol in a sequence of symbols) might be capitalized, but the individual words making up the symbol should never be capitalized. For example, in OCaml, type names Look_like_this and variable names look_like_this, but it is basically never correct for something to Look_Like_This. And one might have "aa_bb cc_dd ee_ff" or "Aa_bb Cc_dd Ee_ff", but never "Aa_Bb Cc_Dd Ee_Ff". To support this, the new variable 'case-symbols-as-words' causes symbol constituents to be treated as part of words only for case operations. * src/casefiddle.c (case_ch_is_word): New function. (case_character_impl, case_character): Use 'case_ch_is_word'. (syms_of_casefiddle): Define 'case-symbols-as-words'. * src/search.c (Freplace_match): Use 'case-symbols-as-words' when calculating case pattern. * test/src/casefiddle-tests.el (casefiddle-tests--check-syms) (casefiddle-case-symbols-as-words): Test 'case-symbols-as-words'. * etc/NEWS: Announce 'case-symbols-as-words'. * doc/lispref/strings.texi (Case Conversion): Document 'case-symbols-as-words'. (Bug#66614)
-rw-r--r--doc/lispref/strings.texi8
-rw-r--r--etc/NEWS8
-rw-r--r--src/casefiddle.c25
-rw-r--r--src/search.c11
-rw-r--r--test/src/casefiddle-tests.el12
5 files changed, 56 insertions, 8 deletions
diff --git a/doc/lispref/strings.texi b/doc/lispref/strings.texi
index 7d11db49def..665d4f9a8dc 100644
--- a/doc/lispref/strings.texi
+++ b/doc/lispref/strings.texi
@@ -1510,7 +1510,9 @@ case.
1510 1510
1511The definition of a word is any sequence of consecutive characters that 1511The definition of a word is any sequence of consecutive characters that
1512are assigned to the word constituent syntax class in the current syntax 1512are assigned to the word constituent syntax class in the current syntax
1513table (@pxref{Syntax Class Table}). 1513table (@pxref{Syntax Class Table}), or if @code{case-symbols-as-words}
1514is non-nil, also characters assigned to the symbol constituent syntax
1515class.
1514 1516
1515When @var{string-or-char} is a character, this function does the same 1517When @var{string-or-char} is a character, this function does the same
1516thing as @code{upcase}. 1518thing as @code{upcase}.
@@ -1542,7 +1544,9 @@ had its initial letter converted to upper case.
1542 1544
1543The definition of a word is any sequence of consecutive characters that 1545The definition of a word is any sequence of consecutive characters that
1544are assigned to the word constituent syntax class in the current syntax 1546are assigned to the word constituent syntax class in the current syntax
1545table (@pxref{Syntax Class Table}). 1547table (@pxref{Syntax Class Table}), or if @code{case-symbols-as-words}
1548is non-nil, also characters assigned to the symbol constituent syntax
1549class.
1546 1550
1547When the argument to @code{upcase-initials} is a character, 1551When the argument to @code{upcase-initials} is a character,
1548@code{upcase-initials} has the same result as @code{upcase}. 1552@code{upcase-initials} has the same result as @code{upcase}.
diff --git a/etc/NEWS b/etc/NEWS
index ed9f1a2124c..269346b5917 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -1194,6 +1194,14 @@ instead of "ctags", "ebrowse", "etags", "hexl", "emacsclient", and
1194subprocess. 1194subprocess.
1195 1195
1196+++ 1196+++
1197** New variable 'case-symbols-as-words' affects case operations for symbols.
1198If non-nil, then case operations such as 'upcase-initials' or
1199'replace-match' (with nil FIXEDCASE) will treat the entire symbol name
1200as a single word. This is useful for programming languages and styles
1201where only the first letter of a symbol's name is ever capitalized.
1202It defaults to nil.
1203
1204+++
1197** 'x-popup-menu' now understands touch screen events. 1205** 'x-popup-menu' now understands touch screen events.
1198When a 'touchscreen-begin' or 'touchscreen-end' event is passed as the 1206When a 'touchscreen-begin' or 'touchscreen-end' event is passed as the
1199POSITION argument, it will behave as if that event was a mouse event. 1207POSITION argument, it will behave as if that event was a mouse event.
diff --git a/src/casefiddle.c b/src/casefiddle.c
index d567a5e353a..3afb131c50e 100644
--- a/src/casefiddle.c
+++ b/src/casefiddle.c
@@ -92,6 +92,12 @@ prepare_casing_context (struct casing_context *ctx,
92 SETUP_BUFFER_SYNTAX_TABLE (); /* For syntax_prefix_flag_p. */ 92 SETUP_BUFFER_SYNTAX_TABLE (); /* For syntax_prefix_flag_p. */
93} 93}
94 94
95static bool
96case_ch_is_word (enum syntaxcode syntax)
97{
98 return syntax == Sword || (case_symbols_as_words && syntax == Ssymbol);
99}
100
95struct casing_str_buf 101struct casing_str_buf
96{ 102{
97 unsigned char data[max (6, MAX_MULTIBYTE_LENGTH)]; 103 unsigned char data[max (6, MAX_MULTIBYTE_LENGTH)];
@@ -115,7 +121,7 @@ case_character_impl (struct casing_str_buf *buf,
115 121
116 /* Update inword state */ 122 /* Update inword state */
117 bool was_inword = ctx->inword; 123 bool was_inword = ctx->inword;
118 ctx->inword = SYNTAX (ch) == Sword && 124 ctx->inword = case_ch_is_word (SYNTAX (ch)) &&
119 (!ctx->inbuffer || was_inword || !syntax_prefix_flag_p (ch)); 125 (!ctx->inbuffer || was_inword || !syntax_prefix_flag_p (ch));
120 126
121 /* Normalize flag so its one of CASE_UP, CASE_DOWN or CASE_CAPITALIZE. */ 127 /* Normalize flag so its one of CASE_UP, CASE_DOWN or CASE_CAPITALIZE. */
@@ -222,7 +228,7 @@ case_character (struct casing_str_buf *buf, struct casing_context *ctx,
222 has a word syntax (i.e. current character is end of word), use final 228 has a word syntax (i.e. current character is end of word), use final
223 sigma. */ 229 sigma. */
224 if (was_inword && ch == GREEK_CAPITAL_LETTER_SIGMA && changed 230 if (was_inword && ch == GREEK_CAPITAL_LETTER_SIGMA && changed
225 && (!next || SYNTAX (STRING_CHAR (next)) != Sword)) 231 && (!next || !case_ch_is_word (SYNTAX (STRING_CHAR (next)))))
226 { 232 {
227 buf->len_bytes = CHAR_STRING (GREEK_SMALL_LETTER_FINAL_SIGMA, buf->data); 233 buf->len_bytes = CHAR_STRING (GREEK_SMALL_LETTER_FINAL_SIGMA, buf->data);
228 buf->len_chars = 1; 234 buf->len_chars = 1;
@@ -720,6 +726,21 @@ Called with one argument METHOD which can be:
720 3rd argument. */); 726 3rd argument. */);
721 Vregion_extract_function = Qnil; /* simple.el sets this. */ 727 Vregion_extract_function = Qnil; /* simple.el sets this. */
722 728
729 DEFVAR_BOOL ("case-symbols-as-words", case_symbols_as_words,
730 doc: /* If non-nil, case functions treat symbol syntax as part of words.
731
732Functions such as `upcase-initials' and `replace-match' check or modify
733the case pattern of sequences of characters. Normally, these operate on
734sequences of characters whose syntax is word constituent. If this
735variable is non-nil, then they operate on sequences of characters whose
736syntax is either word constituent or symbol constituent.
737
738This is useful for programming languages and styles where only the first
739letter of a symbol's name is ever capitalized.*/);
740 case_symbols_as_words = 0;
741 DEFSYM (Qcase_symbols_as_words, "case-symbols-as-words");
742 Fmake_variable_buffer_local (Qcase_symbols_as_words);
743
723 defsubr (&Supcase); 744 defsubr (&Supcase);
724 defsubr (&Sdowncase); 745 defsubr (&Sdowncase);
725 defsubr (&Scapitalize); 746 defsubr (&Scapitalize);
diff --git a/src/search.c b/src/search.c
index e9b29bb7179..692d8488049 100644
--- a/src/search.c
+++ b/src/search.c
@@ -2365,7 +2365,7 @@ text has only capital letters and has at least one multiletter word,
2365convert NEWTEXT to all caps. Otherwise if all words are capitalized 2365convert NEWTEXT to all caps. Otherwise if all words are capitalized
2366in the replaced text, capitalize each word in NEWTEXT. Note that 2366in the replaced text, capitalize each word in NEWTEXT. Note that
2367what exactly is a word is determined by the syntax tables in effect 2367what exactly is a word is determined by the syntax tables in effect
2368in the current buffer. 2368in the current buffer, and the variable `case-symbols-as-words'.
2369 2369
2370If optional third arg LITERAL is non-nil, insert NEWTEXT literally. 2370If optional third arg LITERAL is non-nil, insert NEWTEXT literally.
2371Otherwise treat `\\' as special: 2371Otherwise treat `\\' as special:
@@ -2479,7 +2479,8 @@ since only regular expressions have distinguished subexpressions. */)
2479 /* Cannot be all caps if any original char is lower case */ 2479 /* Cannot be all caps if any original char is lower case */
2480 2480
2481 some_lowercase = 1; 2481 some_lowercase = 1;
2482 if (SYNTAX (prevc) != Sword) 2482 if (SYNTAX (prevc) != Sword
2483 && !(case_symbols_as_words && SYNTAX (prevc) == Ssymbol))
2483 some_nonuppercase_initial = 1; 2484 some_nonuppercase_initial = 1;
2484 else 2485 else
2485 some_multiletter_word = 1; 2486 some_multiletter_word = 1;
@@ -2487,7 +2488,8 @@ since only regular expressions have distinguished subexpressions. */)
2487 else if (uppercasep (c)) 2488 else if (uppercasep (c))
2488 { 2489 {
2489 some_uppercase = 1; 2490 some_uppercase = 1;
2490 if (SYNTAX (prevc) != Sword) 2491 if (SYNTAX (prevc) != Sword
2492 && !(case_symbols_as_words && SYNTAX (prevc) == Ssymbol))
2491 ; 2493 ;
2492 else 2494 else
2493 some_multiletter_word = 1; 2495 some_multiletter_word = 1;
@@ -2496,7 +2498,8 @@ since only regular expressions have distinguished subexpressions. */)
2496 { 2498 {
2497 /* If the initial is a caseless word constituent, 2499 /* If the initial is a caseless word constituent,
2498 treat that like a lowercase initial. */ 2500 treat that like a lowercase initial. */
2499 if (SYNTAX (prevc) != Sword) 2501 if (SYNTAX (prevc) != Sword
2502 && !(case_symbols_as_words && SYNTAX (prevc) == Ssymbol))
2500 some_nonuppercase_initial = 1; 2503 some_nonuppercase_initial = 1;
2501 } 2504 }
2502 2505
diff --git a/test/src/casefiddle-tests.el b/test/src/casefiddle-tests.el
index e7f4348b0c6..12984d898b9 100644
--- a/test/src/casefiddle-tests.el
+++ b/test/src/casefiddle-tests.el
@@ -294,4 +294,16 @@
294 ;;(should (string-equal (capitalize "indIá") "İndıa")) 294 ;;(should (string-equal (capitalize "indIá") "İndıa"))
295 )) 295 ))
296 296
297(defun casefiddle-tests--check-syms (init with-words with-symbols)
298 (let ((case-symbols-as-words nil))
299 (should (string-equal (upcase-initials init) with-words)))
300 (let ((case-symbols-as-words t))
301 (should (string-equal (upcase-initials init) with-symbols))))
302
303(ert-deftest casefiddle-case-symbols-as-words ()
304 (casefiddle-tests--check-syms "Aa_bb Cc_dd" "Aa_Bb Cc_Dd" "Aa_bb Cc_dd")
305 (casefiddle-tests--check-syms "Aa_bb cc_DD" "Aa_Bb Cc_DD" "Aa_bb Cc_DD")
306 (casefiddle-tests--check-syms "aa_bb cc_dd" "Aa_Bb Cc_Dd" "Aa_bb Cc_dd")
307 (casefiddle-tests--check-syms "Aa_Bb Cc_Dd" "Aa_Bb Cc_Dd" "Aa_Bb Cc_Dd"))
308
297;;; casefiddle-tests.el ends here 309;;; casefiddle-tests.el ends here