diff options
| author | Spencer Baugh | 2023-10-21 11:09:39 -0400 |
|---|---|---|
| committer | Eli Zaretskii | 2023-10-29 13:32:43 +0200 |
| commit | 5c8fc0b0594b1e3af43d86c0bc96e10d03bc75a2 (patch) | |
| tree | 37e56deacbaaa4492eff5c0211ec362dbfc1a409 | |
| parent | 3dca52dd422c50ebf24a304e7c3d36cf5f1c55cf (diff) | |
| download | emacs-5c8fc0b0594b1e3af43d86c0bc96e10d03bc75a2.tar.gz emacs-5c8fc0b0594b1e3af43d86c0bc96e10d03bc75a2.zip | |
Add 'case-symbols-as-words' to configure symbol case behavior
In some programming languages and styles, a symbol (or every
symbol in a sequence of symbols) might be capitalized, but the
individual words making up the symbol should never be capitalized.
For example, in OCaml, type names Look_like_this and variable names
look_like_this, but it is basically never correct for something to
Look_Like_This. And one might have "aa_bb cc_dd ee_ff" or "Aa_bb
Cc_dd Ee_ff", but never "Aa_Bb Cc_Dd Ee_Ff".
To support this, the new variable 'case-symbols-as-words' causes
symbol constituents to be treated as part of words only for case
operations.
* src/casefiddle.c (case_ch_is_word): New function.
(case_character_impl, case_character): Use 'case_ch_is_word'.
(syms_of_casefiddle): Define 'case-symbols-as-words'.
* src/search.c (Freplace_match): Use 'case-symbols-as-words'
when calculating case pattern.
* test/src/casefiddle-tests.el (casefiddle-tests--check-syms)
(casefiddle-case-symbols-as-words): Test 'case-symbols-as-words'.
* etc/NEWS: Announce 'case-symbols-as-words'.
* doc/lispref/strings.texi (Case Conversion): Document
'case-symbols-as-words'.
(Bug#66614)
| -rw-r--r-- | doc/lispref/strings.texi | 8 | ||||
| -rw-r--r-- | etc/NEWS | 8 | ||||
| -rw-r--r-- | src/casefiddle.c | 25 | ||||
| -rw-r--r-- | src/search.c | 11 | ||||
| -rw-r--r-- | test/src/casefiddle-tests.el | 12 |
5 files changed, 56 insertions, 8 deletions
diff --git a/doc/lispref/strings.texi b/doc/lispref/strings.texi index 7d11db49def..665d4f9a8dc 100644 --- a/doc/lispref/strings.texi +++ b/doc/lispref/strings.texi | |||
| @@ -1510,7 +1510,9 @@ case. | |||
| 1510 | 1510 | ||
| 1511 | The definition of a word is any sequence of consecutive characters that | 1511 | The definition of a word is any sequence of consecutive characters that |
| 1512 | are assigned to the word constituent syntax class in the current syntax | 1512 | are assigned to the word constituent syntax class in the current syntax |
| 1513 | table (@pxref{Syntax Class Table}). | 1513 | table (@pxref{Syntax Class Table}), or if @code{case-symbols-as-words} |
| 1514 | is non-nil, also characters assigned to the symbol constituent syntax | ||
| 1515 | class. | ||
| 1514 | 1516 | ||
| 1515 | When @var{string-or-char} is a character, this function does the same | 1517 | When @var{string-or-char} is a character, this function does the same |
| 1516 | thing as @code{upcase}. | 1518 | thing as @code{upcase}. |
| @@ -1542,7 +1544,9 @@ had its initial letter converted to upper case. | |||
| 1542 | 1544 | ||
| 1543 | The definition of a word is any sequence of consecutive characters that | 1545 | The definition of a word is any sequence of consecutive characters that |
| 1544 | are assigned to the word constituent syntax class in the current syntax | 1546 | are assigned to the word constituent syntax class in the current syntax |
| 1545 | table (@pxref{Syntax Class Table}). | 1547 | table (@pxref{Syntax Class Table}), or if @code{case-symbols-as-words} |
| 1548 | is non-nil, also characters assigned to the symbol constituent syntax | ||
| 1549 | class. | ||
| 1546 | 1550 | ||
| 1547 | When the argument to @code{upcase-initials} is a character, | 1551 | When the argument to @code{upcase-initials} is a character, |
| 1548 | @code{upcase-initials} has the same result as @code{upcase}. | 1552 | @code{upcase-initials} has the same result as @code{upcase}. |
| @@ -1194,6 +1194,14 @@ instead of "ctags", "ebrowse", "etags", "hexl", "emacsclient", and | |||
| 1194 | subprocess. | 1194 | subprocess. |
| 1195 | 1195 | ||
| 1196 | +++ | 1196 | +++ |
| 1197 | ** New variable 'case-symbols-as-words' affects case operations for symbols. | ||
| 1198 | If non-nil, then case operations such as 'upcase-initials' or | ||
| 1199 | 'replace-match' (with nil FIXEDCASE) will treat the entire symbol name | ||
| 1200 | as a single word. This is useful for programming languages and styles | ||
| 1201 | where only the first letter of a symbol's name is ever capitalized. | ||
| 1202 | It defaults to nil. | ||
| 1203 | |||
| 1204 | +++ | ||
| 1197 | ** 'x-popup-menu' now understands touch screen events. | 1205 | ** 'x-popup-menu' now understands touch screen events. |
| 1198 | When a 'touchscreen-begin' or 'touchscreen-end' event is passed as the | 1206 | When a 'touchscreen-begin' or 'touchscreen-end' event is passed as the |
| 1199 | POSITION argument, it will behave as if that event was a mouse event. | 1207 | POSITION argument, it will behave as if that event was a mouse event. |
diff --git a/src/casefiddle.c b/src/casefiddle.c index d567a5e353a..3afb131c50e 100644 --- a/src/casefiddle.c +++ b/src/casefiddle.c | |||
| @@ -92,6 +92,12 @@ prepare_casing_context (struct casing_context *ctx, | |||
| 92 | SETUP_BUFFER_SYNTAX_TABLE (); /* For syntax_prefix_flag_p. */ | 92 | SETUP_BUFFER_SYNTAX_TABLE (); /* For syntax_prefix_flag_p. */ |
| 93 | } | 93 | } |
| 94 | 94 | ||
| 95 | static bool | ||
| 96 | case_ch_is_word (enum syntaxcode syntax) | ||
| 97 | { | ||
| 98 | return syntax == Sword || (case_symbols_as_words && syntax == Ssymbol); | ||
| 99 | } | ||
| 100 | |||
| 95 | struct casing_str_buf | 101 | struct casing_str_buf |
| 96 | { | 102 | { |
| 97 | unsigned char data[max (6, MAX_MULTIBYTE_LENGTH)]; | 103 | unsigned char data[max (6, MAX_MULTIBYTE_LENGTH)]; |
| @@ -115,7 +121,7 @@ case_character_impl (struct casing_str_buf *buf, | |||
| 115 | 121 | ||
| 116 | /* Update inword state */ | 122 | /* Update inword state */ |
| 117 | bool was_inword = ctx->inword; | 123 | bool was_inword = ctx->inword; |
| 118 | ctx->inword = SYNTAX (ch) == Sword && | 124 | ctx->inword = case_ch_is_word (SYNTAX (ch)) && |
| 119 | (!ctx->inbuffer || was_inword || !syntax_prefix_flag_p (ch)); | 125 | (!ctx->inbuffer || was_inword || !syntax_prefix_flag_p (ch)); |
| 120 | 126 | ||
| 121 | /* Normalize flag so its one of CASE_UP, CASE_DOWN or CASE_CAPITALIZE. */ | 127 | /* Normalize flag so its one of CASE_UP, CASE_DOWN or CASE_CAPITALIZE. */ |
| @@ -222,7 +228,7 @@ case_character (struct casing_str_buf *buf, struct casing_context *ctx, | |||
| 222 | has a word syntax (i.e. current character is end of word), use final | 228 | has a word syntax (i.e. current character is end of word), use final |
| 223 | sigma. */ | 229 | sigma. */ |
| 224 | if (was_inword && ch == GREEK_CAPITAL_LETTER_SIGMA && changed | 230 | if (was_inword && ch == GREEK_CAPITAL_LETTER_SIGMA && changed |
| 225 | && (!next || SYNTAX (STRING_CHAR (next)) != Sword)) | 231 | && (!next || !case_ch_is_word (SYNTAX (STRING_CHAR (next))))) |
| 226 | { | 232 | { |
| 227 | buf->len_bytes = CHAR_STRING (GREEK_SMALL_LETTER_FINAL_SIGMA, buf->data); | 233 | buf->len_bytes = CHAR_STRING (GREEK_SMALL_LETTER_FINAL_SIGMA, buf->data); |
| 228 | buf->len_chars = 1; | 234 | buf->len_chars = 1; |
| @@ -720,6 +726,21 @@ Called with one argument METHOD which can be: | |||
| 720 | 3rd argument. */); | 726 | 3rd argument. */); |
| 721 | Vregion_extract_function = Qnil; /* simple.el sets this. */ | 727 | Vregion_extract_function = Qnil; /* simple.el sets this. */ |
| 722 | 728 | ||
| 729 | DEFVAR_BOOL ("case-symbols-as-words", case_symbols_as_words, | ||
| 730 | doc: /* If non-nil, case functions treat symbol syntax as part of words. | ||
| 731 | |||
| 732 | Functions such as `upcase-initials' and `replace-match' check or modify | ||
| 733 | the case pattern of sequences of characters. Normally, these operate on | ||
| 734 | sequences of characters whose syntax is word constituent. If this | ||
| 735 | variable is non-nil, then they operate on sequences of characters whose | ||
| 736 | syntax is either word constituent or symbol constituent. | ||
| 737 | |||
| 738 | This is useful for programming languages and styles where only the first | ||
| 739 | letter of a symbol's name is ever capitalized.*/); | ||
| 740 | case_symbols_as_words = 0; | ||
| 741 | DEFSYM (Qcase_symbols_as_words, "case-symbols-as-words"); | ||
| 742 | Fmake_variable_buffer_local (Qcase_symbols_as_words); | ||
| 743 | |||
| 723 | defsubr (&Supcase); | 744 | defsubr (&Supcase); |
| 724 | defsubr (&Sdowncase); | 745 | defsubr (&Sdowncase); |
| 725 | defsubr (&Scapitalize); | 746 | defsubr (&Scapitalize); |
diff --git a/src/search.c b/src/search.c index e9b29bb7179..692d8488049 100644 --- a/src/search.c +++ b/src/search.c | |||
| @@ -2365,7 +2365,7 @@ text has only capital letters and has at least one multiletter word, | |||
| 2365 | convert NEWTEXT to all caps. Otherwise if all words are capitalized | 2365 | convert NEWTEXT to all caps. Otherwise if all words are capitalized |
| 2366 | in the replaced text, capitalize each word in NEWTEXT. Note that | 2366 | in the replaced text, capitalize each word in NEWTEXT. Note that |
| 2367 | what exactly is a word is determined by the syntax tables in effect | 2367 | what exactly is a word is determined by the syntax tables in effect |
| 2368 | in the current buffer. | 2368 | in the current buffer, and the variable `case-symbols-as-words'. |
| 2369 | 2369 | ||
| 2370 | If optional third arg LITERAL is non-nil, insert NEWTEXT literally. | 2370 | If optional third arg LITERAL is non-nil, insert NEWTEXT literally. |
| 2371 | Otherwise treat `\\' as special: | 2371 | Otherwise treat `\\' as special: |
| @@ -2479,7 +2479,8 @@ since only regular expressions have distinguished subexpressions. */) | |||
| 2479 | /* Cannot be all caps if any original char is lower case */ | 2479 | /* Cannot be all caps if any original char is lower case */ |
| 2480 | 2480 | ||
| 2481 | some_lowercase = 1; | 2481 | some_lowercase = 1; |
| 2482 | if (SYNTAX (prevc) != Sword) | 2482 | if (SYNTAX (prevc) != Sword |
| 2483 | && !(case_symbols_as_words && SYNTAX (prevc) == Ssymbol)) | ||
| 2483 | some_nonuppercase_initial = 1; | 2484 | some_nonuppercase_initial = 1; |
| 2484 | else | 2485 | else |
| 2485 | some_multiletter_word = 1; | 2486 | some_multiletter_word = 1; |
| @@ -2487,7 +2488,8 @@ since only regular expressions have distinguished subexpressions. */) | |||
| 2487 | else if (uppercasep (c)) | 2488 | else if (uppercasep (c)) |
| 2488 | { | 2489 | { |
| 2489 | some_uppercase = 1; | 2490 | some_uppercase = 1; |
| 2490 | if (SYNTAX (prevc) != Sword) | 2491 | if (SYNTAX (prevc) != Sword |
| 2492 | && !(case_symbols_as_words && SYNTAX (prevc) == Ssymbol)) | ||
| 2491 | ; | 2493 | ; |
| 2492 | else | 2494 | else |
| 2493 | some_multiletter_word = 1; | 2495 | some_multiletter_word = 1; |
| @@ -2496,7 +2498,8 @@ since only regular expressions have distinguished subexpressions. */) | |||
| 2496 | { | 2498 | { |
| 2497 | /* If the initial is a caseless word constituent, | 2499 | /* If the initial is a caseless word constituent, |
| 2498 | treat that like a lowercase initial. */ | 2500 | treat that like a lowercase initial. */ |
| 2499 | if (SYNTAX (prevc) != Sword) | 2501 | if (SYNTAX (prevc) != Sword |
| 2502 | && !(case_symbols_as_words && SYNTAX (prevc) == Ssymbol)) | ||
| 2500 | some_nonuppercase_initial = 1; | 2503 | some_nonuppercase_initial = 1; |
| 2501 | } | 2504 | } |
| 2502 | 2505 | ||
diff --git a/test/src/casefiddle-tests.el b/test/src/casefiddle-tests.el index e7f4348b0c6..12984d898b9 100644 --- a/test/src/casefiddle-tests.el +++ b/test/src/casefiddle-tests.el | |||
| @@ -294,4 +294,16 @@ | |||
| 294 | ;;(should (string-equal (capitalize "indIá") "İndıa")) | 294 | ;;(should (string-equal (capitalize "indIá") "İndıa")) |
| 295 | )) | 295 | )) |
| 296 | 296 | ||
| 297 | (defun casefiddle-tests--check-syms (init with-words with-symbols) | ||
| 298 | (let ((case-symbols-as-words nil)) | ||
| 299 | (should (string-equal (upcase-initials init) with-words))) | ||
| 300 | (let ((case-symbols-as-words t)) | ||
| 301 | (should (string-equal (upcase-initials init) with-symbols)))) | ||
| 302 | |||
| 303 | (ert-deftest casefiddle-case-symbols-as-words () | ||
| 304 | (casefiddle-tests--check-syms "Aa_bb Cc_dd" "Aa_Bb Cc_Dd" "Aa_bb Cc_dd") | ||
| 305 | (casefiddle-tests--check-syms "Aa_bb cc_DD" "Aa_Bb Cc_DD" "Aa_bb Cc_DD") | ||
| 306 | (casefiddle-tests--check-syms "aa_bb cc_dd" "Aa_Bb Cc_Dd" "Aa_bb Cc_dd") | ||
| 307 | (casefiddle-tests--check-syms "Aa_Bb Cc_Dd" "Aa_Bb Cc_Dd" "Aa_Bb Cc_Dd")) | ||
| 308 | |||
| 297 | ;;; casefiddle-tests.el ends here | 309 | ;;; casefiddle-tests.el ends here |