diff options
| author | Po Lu | 2023-06-20 09:18:27 +0800 |
|---|---|---|
| committer | Po Lu | 2023-06-20 09:18:27 +0800 |
| commit | 8f3fee7dffadfce25c8f47fb71674f77417b42e5 (patch) | |
| tree | 69bc7643e0c6bd2ee13247e10835b8a3da36e42b | |
| parent | d067b2fca0005f3032d82f80f5c4d88750395dfc (diff) | |
| parent | 1e13610b75718e7904f8af181fb73571639e1211 (diff) | |
| download | emacs-8f3fee7dffadfce25c8f47fb71674f77417b42e5.tar.gz emacs-8f3fee7dffadfce25c8f47fb71674f77417b42e5.zip | |
Merge remote-tracking branch 'origin/master' into feature/android
| -rw-r--r-- | doc/emacs/search.texi | 12 | ||||
| -rw-r--r-- | doc/lispref/searching.texi | 201 | ||||
| -rw-r--r-- | lisp/emacs-lisp/lisp-mode.el | 2 | ||||
| -rw-r--r-- | lisp/gnus/gnus-sum.el | 4 | ||||
| -rw-r--r-- | lisp/textmodes/picture.el | 2 | ||||
| -rw-r--r-- | src/regex-emacs.c | 2 | ||||
| -rw-r--r-- | test/lisp/eshell/esh-util-tests.el | 2 | ||||
| -rw-r--r-- | test/lisp/progmodes/eglot-tests.el | 5 |
8 files changed, 168 insertions, 62 deletions
diff --git a/doc/emacs/search.texi b/doc/emacs/search.texi index 45378d95f65..2a816221235 100644 --- a/doc/emacs/search.texi +++ b/doc/emacs/search.texi | |||
| @@ -950,8 +950,8 @@ features used mainly in Lisp programs. | |||
| 950 | @dfn{special constructs} and the rest are @dfn{ordinary}. An ordinary | 950 | @dfn{special constructs} and the rest are @dfn{ordinary}. An ordinary |
| 951 | character matches that same character and nothing else. The special | 951 | character matches that same character and nothing else. The special |
| 952 | characters are @samp{$^.*+?[\}. The character @samp{]} is special if | 952 | characters are @samp{$^.*+?[\}. The character @samp{]} is special if |
| 953 | it ends a character alternative (see below). The character @samp{-} | 953 | it ends a bracket expression (see below). The character @samp{-} |
| 954 | is special inside a character alternative. Any other character | 954 | is special inside a bracket expression. Any other character |
| 955 | appearing in a regular expression is ordinary, unless a @samp{\} | 955 | appearing in a regular expression is ordinary, unless a @samp{\} |
| 956 | precedes it. (When you use regular expressions in a Lisp program, | 956 | precedes it. (When you use regular expressions in a Lisp program, |
| 957 | each @samp{\} must be doubled, see the example near the end of this | 957 | each @samp{\} must be doubled, see the example near the end of this |
| @@ -1033,11 +1033,11 @@ you search for @samp{a.*?$} against the text @samp{abbab} followed by | |||
| 1033 | a newline, it matches the whole string. Since it @emph{can} match | 1033 | a newline, it matches the whole string. Since it @emph{can} match |
| 1034 | starting at the first @samp{a}, it does. | 1034 | starting at the first @samp{a}, it does. |
| 1035 | 1035 | ||
| 1036 | @cindex bracket expression | ||
| 1036 | @cindex set of alternative characters, in regular expressions | 1037 | @cindex set of alternative characters, in regular expressions |
| 1037 | @cindex character set, in regular expressions | 1038 | @cindex character set, in regular expressions |
| 1038 | @item @kbd{[ @dots{} ]} | 1039 | @item @kbd{[ @dots{} ]} |
| 1039 | is a @dfn{set of alternative characters}, or a @dfn{character set}, | 1040 | is a @dfn{bracket expression}, which matches one of a set of characters. |
| 1040 | beginning with @samp{[} and terminated by @samp{]}. | ||
| 1041 | 1041 | ||
| 1042 | In the simplest case, the characters between the two brackets are what | 1042 | In the simplest case, the characters between the two brackets are what |
| 1043 | this set can match. Thus, @samp{[ad]} matches either one @samp{a} or | 1043 | this set can match. Thus, @samp{[ad]} matches either one @samp{a} or |
| @@ -1057,7 +1057,7 @@ Greek letters. | |||
| 1057 | @cindex character classes, in regular expressions | 1057 | @cindex character classes, in regular expressions |
| 1058 | You can also include certain special @dfn{character classes} in a | 1058 | You can also include certain special @dfn{character classes} in a |
| 1059 | character set. A @samp{[:} and balancing @samp{:]} enclose a | 1059 | character set. A @samp{[:} and balancing @samp{:]} enclose a |
| 1060 | character class inside a set of alternative characters. For instance, | 1060 | character class inside a bracket expression. For instance, |
| 1061 | @samp{[[:alnum:]]} matches any letter or digit. @xref{Char Classes,,, | 1061 | @samp{[[:alnum:]]} matches any letter or digit. @xref{Char Classes,,, |
| 1062 | elisp, The Emacs Lisp Reference Manual}, for a list of character | 1062 | elisp, The Emacs Lisp Reference Manual}, for a list of character |
| 1063 | classes. | 1063 | classes. |
| @@ -1125,7 +1125,7 @@ no preceding expression on which the @samp{*} can act. It is poor practice | |||
| 1125 | to depend on this behavior; it is better to quote the special character anyway, | 1125 | to depend on this behavior; it is better to quote the special character anyway, |
| 1126 | regardless of where it appears. | 1126 | regardless of where it appears. |
| 1127 | 1127 | ||
| 1128 | As a @samp{\} is not special inside a set of alternative characters, it can | 1128 | As a @samp{\} is not special inside a bracket expression, it can |
| 1129 | never remove the special meaning of @samp{-}, @samp{^} or @samp{]}. | 1129 | never remove the special meaning of @samp{-}, @samp{^} or @samp{]}. |
| 1130 | You should not quote these characters when they have no special | 1130 | You should not quote these characters when they have no special |
| 1131 | meaning. This would not clarify anything, since backslashes | 1131 | meaning. This would not clarify anything, since backslashes |
diff --git a/doc/lispref/searching.texi b/doc/lispref/searching.texi index b8d9094b28d..28230cea643 100644 --- a/doc/lispref/searching.texi +++ b/doc/lispref/searching.texi | |||
| @@ -18,11 +18,12 @@ portions of it. | |||
| 18 | * Searching and Case:: Case-independent or case-significant searching. | 18 | * Searching and Case:: Case-independent or case-significant searching. |
| 19 | * Regular Expressions:: Describing classes of strings. | 19 | * Regular Expressions:: Describing classes of strings. |
| 20 | * Regexp Search:: Searching for a match for a regexp. | 20 | * Regexp Search:: Searching for a match for a regexp. |
| 21 | * POSIX Regexps:: Searching POSIX-style for the longest match. | 21 | * Longest Match:: Searching for the longest match. |
| 22 | * Match Data:: Finding out which part of the text matched, | 22 | * Match Data:: Finding out which part of the text matched, |
| 23 | after a string or regexp search. | 23 | after a string or regexp search. |
| 24 | * Search and Replace:: Commands that loop, searching and replacing. | 24 | * Search and Replace:: Commands that loop, searching and replacing. |
| 25 | * Standard Regexps:: Useful regexps for finding sentences, pages,... | 25 | * Standard Regexps:: Useful regexps for finding sentences, pages,... |
| 26 | * POSIX Regexps:: Emacs regexps vs POSIX regexps. | ||
| 26 | @end menu | 27 | @end menu |
| 27 | 28 | ||
| 28 | The @samp{skip-chars@dots{}} functions also perform a kind of searching. | 29 | The @samp{skip-chars@dots{}} functions also perform a kind of searching. |
| @@ -277,10 +278,10 @@ character is a simple regular expression that matches that character | |||
| 277 | and nothing else. The special characters are @samp{.}, @samp{*}, | 278 | and nothing else. The special characters are @samp{.}, @samp{*}, |
| 278 | @samp{+}, @samp{?}, @samp{[}, @samp{^}, @samp{$}, and @samp{\}; no new | 279 | @samp{+}, @samp{?}, @samp{[}, @samp{^}, @samp{$}, and @samp{\}; no new |
| 279 | special characters will be defined in the future. The character | 280 | special characters will be defined in the future. The character |
| 280 | @samp{]} is special if it ends a character alternative (see later). | 281 | @samp{]} is special if it ends a bracket expression (see later). |
| 281 | The character @samp{-} is special inside a character alternative. A | 282 | The character @samp{-} is special inside a bracket expression. A |
| 282 | @samp{[:} and balancing @samp{:]} enclose a character class inside a | 283 | @samp{[:} and balancing @samp{:]} enclose a character class inside a |
| 283 | character alternative. Any other character appearing in a regular | 284 | bracket expression. Any other character appearing in a regular |
| 284 | expression is ordinary, unless a @samp{\} precedes it. | 285 | expression is ordinary, unless a @samp{\} precedes it. |
| 285 | 286 | ||
| 286 | For example, @samp{f} is not a special character, so it is ordinary, and | 287 | For example, @samp{f} is not a special character, so it is ordinary, and |
| @@ -373,19 +374,19 @@ expression @samp{c[ad]*?a}, applied to that same string, matches just | |||
| 373 | permits the whole expression to match is @samp{d}.) | 374 | permits the whole expression to match is @samp{d}.) |
| 374 | 375 | ||
| 375 | @item @samp{[ @dots{} ]} | 376 | @item @samp{[ @dots{} ]} |
| 376 | @cindex character alternative (in regexp) | 377 | @cindex bracket expression (in regexp) |
| 377 | @cindex @samp{[} in regexp | 378 | @cindex @samp{[} in regexp |
| 378 | @cindex @samp{]} in regexp | 379 | @cindex @samp{]} in regexp |
| 379 | is a @dfn{character alternative}, which begins with @samp{[} and is | 380 | is a @dfn{bracket expression}, which begins with @samp{[} and is |
| 380 | terminated by @samp{]}. In the simplest case, the characters between | 381 | terminated by @samp{]}. In the simplest case, the characters between |
| 381 | the two brackets are what this character alternative can match. | 382 | the two brackets are what this bracket expression can match. |
| 382 | 383 | ||
| 383 | Thus, @samp{[ad]} matches either one @samp{a} or one @samp{d}, and | 384 | Thus, @samp{[ad]} matches either one @samp{a} or one @samp{d}, and |
| 384 | @samp{[ad]*} matches any string composed of just @samp{a}s and @samp{d}s | 385 | @samp{[ad]*} matches any string composed of just @samp{a}s and @samp{d}s |
| 385 | (including the empty string). It follows that @samp{c[ad]*r} | 386 | (including the empty string). It follows that @samp{c[ad]*r} |
| 386 | matches @samp{cr}, @samp{car}, @samp{cdr}, @samp{caddaar}, etc. | 387 | matches @samp{cr}, @samp{car}, @samp{cdr}, @samp{caddaar}, etc. |
| 387 | 388 | ||
| 388 | You can also include character ranges in a character alternative, by | 389 | You can also include character ranges in a bracket expression, by |
| 389 | writing the starting and ending characters with a @samp{-} between them. | 390 | writing the starting and ending characters with a @samp{-} between them. |
| 390 | Thus, @samp{[a-z]} matches any lower-case @acronym{ASCII} letter. | 391 | Thus, @samp{[a-z]} matches any lower-case @acronym{ASCII} letter. |
| 391 | Ranges may be intermixed freely with individual characters, as in | 392 | Ranges may be intermixed freely with individual characters, as in |
| @@ -394,7 +395,7 @@ or @samp{$}, @samp{%} or period. However, the ending character of one | |||
| 394 | range should not be the starting point of another one; for example, | 395 | range should not be the starting point of another one; for example, |
| 395 | @samp{[a-m-z]} should be avoided. | 396 | @samp{[a-m-z]} should be avoided. |
| 396 | 397 | ||
| 397 | A character alternative can also specify named character classes | 398 | A bracket expression can also specify named character classes |
| 398 | (@pxref{Char Classes}). For example, @samp{[[:ascii:]]} matches any | 399 | (@pxref{Char Classes}). For example, @samp{[[:ascii:]]} matches any |
| 399 | @acronym{ASCII} character. Using a character class is equivalent to | 400 | @acronym{ASCII} character. Using a character class is equivalent to |
| 400 | mentioning each of the characters in that class; but the latter is not | 401 | mentioning each of the characters in that class; but the latter is not |
| @@ -403,9 +404,9 @@ different characters. A character class should not appear as the | |||
| 403 | lower or upper bound of a range. | 404 | lower or upper bound of a range. |
| 404 | 405 | ||
| 405 | The usual regexp special characters are not special inside a | 406 | The usual regexp special characters are not special inside a |
| 406 | character alternative. A completely different set of characters is | 407 | bracket expression. A completely different set of characters is |
| 407 | special: @samp{]}, @samp{-} and @samp{^}. | 408 | special: @samp{]}, @samp{-} and @samp{^}. |
| 408 | To include @samp{]} in a character alternative, put it at the | 409 | To include @samp{]} in a bracket expression, put it at the |
| 409 | beginning. To include @samp{^}, put it anywhere but at the beginning. | 410 | beginning. To include @samp{^}, put it anywhere but at the beginning. |
| 410 | To include @samp{-}, put it at the end. Thus, @samp{[]^-]} matches | 411 | To include @samp{-}, put it at the end. Thus, @samp{[]^-]} matches |
| 411 | all three of these special characters. You cannot use @samp{\} to | 412 | all three of these special characters. You cannot use @samp{\} to |
| @@ -443,7 +444,7 @@ characters and raw 8-bit bytes, but not non-ASCII characters. This | |||
| 443 | feature is intended for searching text in unibyte buffers and strings. | 444 | feature is intended for searching text in unibyte buffers and strings. |
| 444 | @end enumerate | 445 | @end enumerate |
| 445 | 446 | ||
| 446 | Some kinds of character alternatives are not the best style even | 447 | Some kinds of bracket expressions are not the best style even |
| 447 | though they have a well-defined meaning in Emacs. They include: | 448 | though they have a well-defined meaning in Emacs. They include: |
| 448 | 449 | ||
| 449 | @enumerate | 450 | @enumerate |
| @@ -457,7 +458,7 @@ Unicode character escapes can help here; for example, for most programmers | |||
| 457 | @samp{[ก-ฺ฿-๛]} is less clear than @samp{[\u0E01-\u0E3A\u0E3F-\u0E5B]}. | 458 | @samp{[ก-ฺ฿-๛]} is less clear than @samp{[\u0E01-\u0E3A\u0E3F-\u0E5B]}. |
| 458 | 459 | ||
| 459 | @item | 460 | @item |
| 460 | Although a character alternative can include duplicates, it is better | 461 | Although a bracket expression can include duplicates, it is better |
| 461 | style to avoid them. For example, @samp{[XYa-yYb-zX]} is less clear | 462 | style to avoid them. For example, @samp{[XYa-yYb-zX]} is less clear |
| 462 | than @samp{[XYa-z]}. | 463 | than @samp{[XYa-z]}. |
| 463 | 464 | ||
| @@ -468,30 +469,30 @@ is simpler to list the characters. For example, | |||
| 468 | than @samp{[ij]}, and @samp{[i-k]} is less clear than @samp{[ijk]}. | 469 | than @samp{[ij]}, and @samp{[i-k]} is less clear than @samp{[ijk]}. |
| 469 | 470 | ||
| 470 | @item | 471 | @item |
| 471 | Although a @samp{-} can appear at the beginning of a character | 472 | Although a @samp{-} can appear at the beginning of a bracket |
| 472 | alternative or as the upper bound of a range, it is better style to | 473 | expression or as the upper bound of a range, it is better style to |
| 473 | put @samp{-} by itself at the end of a character alternative. For | 474 | put @samp{-} by itself at the end of a bracket expression. For |
| 474 | example, although @samp{[-a-z]} is valid, @samp{[a-z-]} is better | 475 | example, although @samp{[-a-z]} is valid, @samp{[a-z-]} is better |
| 475 | style; and although @samp{[*--]} is valid, @samp{[*+,-]} is clearer. | 476 | style; and although @samp{[*--]} is valid, @samp{[*+,-]} is clearer. |
| 476 | @end enumerate | 477 | @end enumerate |
| 477 | 478 | ||
| 478 | @item @samp{[^ @dots{} ]} | 479 | @item @samp{[^ @dots{} ]} |
| 479 | @cindex @samp{^} in regexp | 480 | @cindex @samp{^} in regexp |
| 480 | @samp{[^} begins a @dfn{complemented character alternative}. This | 481 | @samp{[^} begins a @dfn{complemented bracket expression}. This |
| 481 | matches any character except the ones specified. Thus, | 482 | matches any character except the ones specified. Thus, |
| 482 | @samp{[^a-z0-9A-Z]} matches all characters @emph{except} ASCII letters and | 483 | @samp{[^a-z0-9A-Z]} matches all characters @emph{except} ASCII letters and |
| 483 | digits. | 484 | digits. |
| 484 | 485 | ||
| 485 | @samp{^} is not special in a character alternative unless it is the first | 486 | @samp{^} is not special in a bracket expression unless it is the first |
| 486 | character. The character following the @samp{^} is treated as if it | 487 | character. The character following the @samp{^} is treated as if it |
| 487 | were first (in other words, @samp{-} and @samp{]} are not special there). | 488 | were first (in other words, @samp{-} and @samp{]} are not special there). |
| 488 | 489 | ||
| 489 | A complemented character alternative can match a newline, unless newline is | 490 | A complemented bracket expression can match a newline, unless newline is |
| 490 | mentioned as one of the characters not to match. This is in contrast to | 491 | mentioned as one of the characters not to match. This is in contrast to |
| 491 | the handling of regexps in programs such as @code{grep}. | 492 | the handling of regexps in programs such as @code{grep}. |
| 492 | 493 | ||
| 493 | You can specify named character classes, just like in character | 494 | You can specify named character classes, just like in bracket |
| 494 | alternatives. For instance, @samp{[^[:ascii:]]} matches any | 495 | expressions. For instance, @samp{[^[:ascii:]]} matches any |
| 495 | non-@acronym{ASCII} character. @xref{Char Classes}. | 496 | non-@acronym{ASCII} character. @xref{Char Classes}. |
| 496 | 497 | ||
| 497 | @item @samp{^} | 498 | @item @samp{^} |
| @@ -505,9 +506,10 @@ beginning of a line. | |||
| 505 | When matching a string instead of a buffer, @samp{^} matches at the | 506 | When matching a string instead of a buffer, @samp{^} matches at the |
| 506 | beginning of the string or after a newline character. | 507 | beginning of the string or after a newline character. |
| 507 | 508 | ||
| 508 | For historical compatibility reasons, @samp{^} can be used only at the | 509 | For historical compatibility, @samp{^} is special only at the beginning |
| 509 | beginning of the regular expression, or after @samp{\(}, @samp{\(?:} | 510 | of the regular expression, or after @samp{\(}, @samp{\(?:} or @samp{\|}. |
| 510 | or @samp{\|}. | 511 | Although @samp{^} is an ordinary character in other contexts, |
| 512 | it is good practice to use @samp{\^} even then. | ||
| 511 | 513 | ||
| 512 | @item @samp{$} | 514 | @item @samp{$} |
| 513 | @cindex @samp{$} in regexp | 515 | @cindex @samp{$} in regexp |
| @@ -519,8 +521,10 @@ matches a string of one @samp{x} or more at the end of a line. | |||
| 519 | When matching a string instead of a buffer, @samp{$} matches at the end | 521 | When matching a string instead of a buffer, @samp{$} matches at the end |
| 520 | of the string or before a newline character. | 522 | of the string or before a newline character. |
| 521 | 523 | ||
| 522 | For historical compatibility reasons, @samp{$} can be used only at the | 524 | For historical compatibility, @samp{$} is special only at the |
| 523 | end of the regular expression, or before @samp{\)} or @samp{\|}. | 525 | end of the regular expression, or before @samp{\)} or @samp{\|}. |
| 526 | Although @samp{$} is an ordinary character in other contexts, | ||
| 527 | it is good practice to use @samp{\$} even then. | ||
| 524 | 528 | ||
| 525 | @item @samp{\} | 529 | @item @samp{\} |
| 526 | @cindex @samp{\} in regexp | 530 | @cindex @samp{\} in regexp |
| @@ -540,14 +544,19 @@ example, the regular expression that matches the @samp{\} character is | |||
| 540 | @samp{\} is @code{"\\\\"}. | 544 | @samp{\} is @code{"\\\\"}. |
| 541 | @end table | 545 | @end table |
| 542 | 546 | ||
| 543 | @strong{Please note:} For historical compatibility, special characters | 547 | For historical compatibility, a repetition operator is treated as ordinary |
| 544 | are treated as ordinary ones if they are in contexts where their special | 548 | if it appears at the start of a regular expression |
| 545 | meanings make no sense. For example, @samp{*foo} treats @samp{*} as | 549 | or after @samp{^}, @samp{\(}, @samp{\(?:} or @samp{\|}. |
| 546 | ordinary since there is no preceding expression on which the @samp{*} | 550 | For example, @samp{*foo} is treated as @samp{\*foo}, and |
| 547 | can act. It is poor practice to depend on this behavior; quote the | 551 | @samp{two\|^\@{2\@}} is treated as @samp{two\|^@{2@}}. |
| 548 | special character anyway, regardless of where it appears. | 552 | It is poor practice to depend on this behavior; use proper backslash |
| 549 | 553 | escaping anyway, regardless of where the repetition operator appears. | |
| 550 | As a @samp{\} is not special inside a character alternative, it can | 554 | Also, a repetition operator should not immediately follow a backslash escape |
| 555 | that matches only empty strings, as Emacs has bugs in this area. | ||
| 556 | For example, it is unwise to use @samp{\b*}, which can be omitted | ||
| 557 | without changing the documented meaning of the regular expression. | ||
| 558 | |||
| 559 | As a @samp{\} is not special inside a bracket expression, it can | ||
| 551 | never remove the special meaning of @samp{-}, @samp{^} or @samp{]}. | 560 | never remove the special meaning of @samp{-}, @samp{^} or @samp{]}. |
| 552 | You should not quote these characters when they have no special | 561 | You should not quote these characters when they have no special |
| 553 | meaning. This would not clarify anything, since backslashes | 562 | meaning. This would not clarify anything, since backslashes |
| @@ -556,23 +565,23 @@ special meaning, as in @samp{[^\]} (@code{"[^\\]"} for Lisp string | |||
| 556 | syntax), which matches any single character except a backslash. | 565 | syntax), which matches any single character except a backslash. |
| 557 | 566 | ||
| 558 | In practice, most @samp{]} that occur in regular expressions close a | 567 | In practice, most @samp{]} that occur in regular expressions close a |
| 559 | character alternative and hence are special. However, occasionally a | 568 | bracket expression and hence are special. However, occasionally a |
| 560 | regular expression may try to match a complex pattern of literal | 569 | regular expression may try to match a complex pattern of literal |
| 561 | @samp{[} and @samp{]}. In such situations, it sometimes may be | 570 | @samp{[} and @samp{]}. In such situations, it sometimes may be |
| 562 | necessary to carefully parse the regexp from the start to determine | 571 | necessary to carefully parse the regexp from the start to determine |
| 563 | which square brackets enclose a character alternative. For example, | 572 | which square brackets enclose a bracket expression. For example, |
| 564 | @samp{[^][]]} consists of the complemented character alternative | 573 | @samp{[^][]]} consists of the complemented bracket expression |
| 565 | @samp{[^][]} (which matches any single character that is not a square | 574 | @samp{[^][]} (which matches any single character that is not a square |
| 566 | bracket), followed by a literal @samp{]}. | 575 | bracket), followed by a literal @samp{]}. |
| 567 | 576 | ||
| 568 | The exact rules are that at the beginning of a regexp, @samp{[} is | 577 | The exact rules are that at the beginning of a regexp, @samp{[} is |
| 569 | special and @samp{]} not. This lasts until the first unquoted | 578 | special and @samp{]} not. This lasts until the first unquoted |
| 570 | @samp{[}, after which we are in a character alternative; @samp{[} is | 579 | @samp{[}, after which we are in a bracket expression; @samp{[} is |
| 571 | no longer special (except when it starts a character class) but @samp{]} | 580 | no longer special (except when it starts a character class) but @samp{]} |
| 572 | is special, unless it immediately follows the special @samp{[} or that | 581 | is special, unless it immediately follows the special @samp{[} or that |
| 573 | @samp{[} followed by a @samp{^}. This lasts until the next special | 582 | @samp{[} followed by a @samp{^}. This lasts until the next special |
| 574 | @samp{]} that does not end a character class. This ends the character | 583 | @samp{]} that does not end a character class. This ends the bracket |
| 575 | alternative and restores the ordinary syntax of regular expressions; | 584 | expression and restores the ordinary syntax of regular expressions; |
| 576 | an unquoted @samp{[} is special again and a @samp{]} not. | 585 | an unquoted @samp{[} is special again and a @samp{]} not. |
| 577 | 586 | ||
| 578 | @node Char Classes | 587 | @node Char Classes |
| @@ -583,8 +592,8 @@ an unquoted @samp{[} is special again and a @samp{]} not. | |||
| 583 | @cindex alpha character class, regexp | 592 | @cindex alpha character class, regexp |
| 584 | @cindex xdigit character class, regexp | 593 | @cindex xdigit character class, regexp |
| 585 | 594 | ||
| 586 | Below is a table of the classes you can use in a character | 595 | Below is a table of the classes you can use in a bracket |
| 587 | alternative, and what they mean. Note that the @samp{[} and @samp{]} | 596 | expression, and what they mean. Note that the @samp{[} and @samp{]} |
| 588 | characters that enclose the class name are part of the name, so a | 597 | characters that enclose the class name are part of the name, so a |
| 589 | regular expression using these classes needs one more pair of | 598 | regular expression using these classes needs one more pair of |
| 590 | brackets. For example, a regular expression matching a sequence of | 599 | brackets. For example, a regular expression matching a sequence of |
| @@ -911,7 +920,7 @@ with a symbol-constituent character. | |||
| 911 | 920 | ||
| 912 | @kindex invalid-regexp | 921 | @kindex invalid-regexp |
| 913 | Not every string is a valid regular expression. For example, a string | 922 | Not every string is a valid regular expression. For example, a string |
| 914 | that ends inside a character alternative without a terminating @samp{]} | 923 | that ends inside a bracket expression without a terminating @samp{]} |
| 915 | is invalid, and so is a string that ends with a single @samp{\}. If | 924 | is invalid, and so is a string that ends with a single @samp{\}. If |
| 916 | an invalid regular expression is passed to any of the search functions, | 925 | an invalid regular expression is passed to any of the search functions, |
| 917 | an @code{invalid-regexp} error is signaled. | 926 | an @code{invalid-regexp} error is signaled. |
| @@ -948,7 +957,7 @@ deciphered as follows: | |||
| 948 | 957 | ||
| 949 | @table @code | 958 | @table @code |
| 950 | @item [.?!] | 959 | @item [.?!] |
| 951 | The first part of the pattern is a character alternative that matches | 960 | The first part of the pattern is a bracket expression that matches |
| 952 | any one of three characters: period, question mark, and exclamation | 961 | any one of three characters: period, question mark, and exclamation |
| 953 | mark. The match must begin with one of these three characters. (This | 962 | mark. The match must begin with one of these three characters. (This |
| 954 | is one point where the new default regexp used by Emacs differs from | 963 | is one point where the new default regexp used by Emacs differs from |
| @@ -960,7 +969,7 @@ The second part of the pattern matches any closing braces and quotation | |||
| 960 | marks, zero or more of them, that may follow the period, question mark | 969 | marks, zero or more of them, that may follow the period, question mark |
| 961 | or exclamation mark. The @code{\"} is Lisp syntax for a double-quote in | 970 | or exclamation mark. The @code{\"} is Lisp syntax for a double-quote in |
| 962 | a string. The @samp{*} at the end indicates that the immediately | 971 | a string. The @samp{*} at the end indicates that the immediately |
| 963 | preceding regular expression (a character alternative, in this case) may be | 972 | preceding regular expression (a bracket expression, in this case) may be |
| 964 | repeated zero or more times. | 973 | repeated zero or more times. |
| 965 | 974 | ||
| 966 | @item \\($\\|@ $\\|\t\\|@ @ \\) | 975 | @item \\($\\|@ $\\|\t\\|@ @ \\) |
| @@ -1911,7 +1920,7 @@ attempts. Other zero-width assertions may also bring benefits by | |||
| 1911 | causing a match to fail early. | 1920 | causing a match to fail early. |
| 1912 | 1921 | ||
| 1913 | @item | 1922 | @item |
| 1914 | Avoid or-patterns in favor of character alternatives: write | 1923 | Avoid or-patterns in favor of bracket expressions: write |
| 1915 | @samp{[ab]} instead of @samp{a\|b}. Recall that @samp{\s-} and @samp{\sw} | 1924 | @samp{[ab]} instead of @samp{a\|b}. Recall that @samp{\s-} and @samp{\sw} |
| 1916 | are equivalent to @samp{[[:space:]]} and @samp{[[:word:]]}, respectively. | 1925 | are equivalent to @samp{[[:space:]]} and @samp{[[:word:]]}, respectively. |
| 1917 | 1926 | ||
| @@ -2193,8 +2202,8 @@ constructs, you should bind it temporarily for as small as possible | |||
| 2193 | a part of the code. | 2202 | a part of the code. |
| 2194 | @end defvar | 2203 | @end defvar |
| 2195 | 2204 | ||
| 2196 | @node POSIX Regexps | 2205 | @node Longest Match |
| 2197 | @section POSIX Regular Expression Searching | 2206 | @section Longest-match searching for regular expression matches |
| 2198 | 2207 | ||
| 2199 | @cindex backtracking and POSIX regular expressions | 2208 | @cindex backtracking and POSIX regular expressions |
| 2200 | The usual regular expression functions do backtracking when necessary | 2209 | The usual regular expression functions do backtracking when necessary |
| @@ -2209,7 +2218,9 @@ possibilities and found all matches, so they can report the longest | |||
| 2209 | match, as required by POSIX@. This is much slower, so use these | 2218 | match, as required by POSIX@. This is much slower, so use these |
| 2210 | functions only when you really need the longest match. | 2219 | functions only when you really need the longest match. |
| 2211 | 2220 | ||
| 2212 | The POSIX search and match functions do not properly support the | 2221 | Despite their names, the POSIX search and match functions |
| 2222 | use Emacs regular expressions, not POSIX regular expressions. | ||
| 2223 | @xref{POSIX Regexps}. Also, they do not properly support the | ||
| 2213 | non-greedy repetition operators (@pxref{Regexp Special, non-greedy}). | 2224 | non-greedy repetition operators (@pxref{Regexp Special, non-greedy}). |
| 2214 | This is because POSIX backtracking conflicts with the semantics of | 2225 | This is because POSIX backtracking conflicts with the semantics of |
| 2215 | non-greedy repetition. | 2226 | non-greedy repetition. |
| @@ -2957,3 +2968,97 @@ values of the variables @code{sentence-end-double-space} | |||
| 2957 | @code{sentence-end-without-period}, and | 2968 | @code{sentence-end-without-period}, and |
| 2958 | @code{sentence-end-without-space}. | 2969 | @code{sentence-end-without-space}. |
| 2959 | @end defun | 2970 | @end defun |
| 2971 | |||
| 2972 | @node POSIX Regexps | ||
| 2973 | @section Emacs versus POSIX Regular Expressions | ||
| 2974 | @cindex POSIX regular expressions | ||
| 2975 | |||
| 2976 | Regular expression syntax varies signficantly among computer programs. | ||
| 2977 | When writing Elisp code that generates regular expressions for use by other | ||
| 2978 | programs, it is helpful to know how syntax variants differ. | ||
| 2979 | To give a feel for the variation, this section discusses how | ||
| 2980 | Emacs regular expressions differ from two syntax variants standarded by POSIX: | ||
| 2981 | basic regular expressions (BREs) and extended regular expressions (EREs). | ||
| 2982 | Plain @command{grep} uses BREs, and @samp{grep -E} uses EREs. | ||
| 2983 | |||
| 2984 | Emacs regular expressions have a syntax closer to EREs than to BREs, | ||
| 2985 | with some extensions. Here is a summary of how POSIX BREs and EREs | ||
| 2986 | differ from Emacs regular expressions. | ||
| 2987 | |||
| 2988 | @itemize @bullet | ||
| 2989 | @item | ||
| 2990 | In POSIX BREs @samp{+} and @samp{?} are not special. | ||
| 2991 | The only backslash escape sequences are @samp{\(@dots{}\)}, | ||
| 2992 | @samp{\@{@dots{}\@}}, @samp{\1} through @samp{\9}, along with the | ||
| 2993 | escaped special characters @samp{\$}, @samp{\*}, @samp{\.}, @samp{\[}, | ||
| 2994 | @samp{\\}, and @samp{\^}. | ||
| 2995 | Therefore @samp{\(?:} acts like @samp{\([?]:}. | ||
| 2996 | POSIX does not define how other BRE escapes behave; | ||
| 2997 | for example, GNU @command{grep} treats @samp{\|} like Emacs does, | ||
| 2998 | but does not support all the Emacs escapes. | ||
| 2999 | |||
| 3000 | @item | ||
| 3001 | In POSIX EREs @samp{@{}, @samp{(} and @samp{|} are special, | ||
| 3002 | and @samp{)} is special when matched with a preceding @samp{(}. | ||
| 3003 | These special characters do not use preceding backslashes; | ||
| 3004 | @samp{(?} produces undefined results. | ||
| 3005 | The only backslash escape sequences are the escaped special characters | ||
| 3006 | @samp{\$}, @samp{\(}, @samp{\)}, @samp{\*}, @samp{\+}, @samp{\.}, | ||
| 3007 | @samp{\?}, @samp{\[}, @samp{\\}, @samp{\^}, @samp{\@{} and @samp{\|}. | ||
| 3008 | POSIX does not define how other ERE escapes behave; | ||
| 3009 | for example, GNU @samp{grep -E} treats @samp{\1} like Emacs does, | ||
| 3010 | but does not support all the Emacs escapes. | ||
| 3011 | |||
| 3012 | @item | ||
| 3013 | In POSIX BREs, it is an implementation option whether @samp{^} is special | ||
| 3014 | after @samp{\(}; GNU @command{grep} treats it like Emacs does. | ||
| 3015 | In POSIX EREs, @samp{^} is always special outside of bracket expressions, | ||
| 3016 | which means the ERE @samp{x^} never matches. | ||
| 3017 | In Emacs regular expressions, @samp{^} is special only at the | ||
| 3018 | beginning of the regular expression, or after @samp{\(}, @samp{\(?:} | ||
| 3019 | or @samp{\|}. | ||
| 3020 | |||
| 3021 | @item | ||
| 3022 | In POSIX BREs, it is an implementation option whether @samp{$} is special | ||
| 3023 | before @samp{\)}; GNU @command{grep} treats it like Emacs does. | ||
| 3024 | In POSIX EREs, @samp{$} is always special outside of bracket expressions, | ||
| 3025 | which means the ERE @samp{$x} never matches. | ||
| 3026 | In Emacs regular expressions, @samp{$} is special only at the | ||
| 3027 | end of the regular expression, or before @samp{\)} or @samp{\|}. | ||
| 3028 | |||
| 3029 | @item | ||
| 3030 | In POSIX BREs and EREs, undefined results are produced by repetition | ||
| 3031 | operators at the start of a regular expression or subexpression | ||
| 3032 | (possibly preceded by @samp{^}), except that the repetition operator | ||
| 3033 | @samp{*} has the same behavior in BREs as in Emacs. | ||
| 3034 | In Emacs, these operators are treated as ordinary. | ||
| 3035 | |||
| 3036 | @item | ||
| 3037 | In BREs and EREs, undefined results are produced by two repetition | ||
| 3038 | operators in sequence. In Emacs, these have well-defined behavior, | ||
| 3039 | e.g., @samp{a**} is equivalent to @samp{a*}. | ||
| 3040 | |||
| 3041 | @item | ||
| 3042 | In BREs and EREs, undefined results are produced by empty regular | ||
| 3043 | expressions or subexpressions. In Emacs these have well-defined | ||
| 3044 | behavior, e.g., @samp{\(\)*} matches the empty string, | ||
| 3045 | |||
| 3046 | @item | ||
| 3047 | In BREs and EREs, undefined results are produced for the named | ||
| 3048 | character classes @samp{[:ascii:]}, @samp{[:multibyte:]}, | ||
| 3049 | @samp{[:nonascii:]}, @samp{[:unibyte:]}, and @samp{[:word:]}. | ||
| 3050 | |||
| 3051 | @item | ||
| 3052 | BREs and EREs can contain collating symbols and equivalence | ||
| 3053 | class expressions within bracket expressions, e.g., @samp{[[.ch.]d[=a=]]}. | ||
| 3054 | Emacs regular expressions do not support this. | ||
| 3055 | |||
| 3056 | @item | ||
| 3057 | BREs, EREs, and the strings they match cannot contain encoding errors | ||
| 3058 | or NUL bytes. In Emacs these constructs simply match themselves. | ||
| 3059 | |||
| 3060 | @item | ||
| 3061 | BRE and ERE searching always finds the longest match. | ||
| 3062 | Emacs searching by default does not necessarily do so. | ||
| 3063 | @xref{Longest Match}. | ||
| 3064 | @end itemize | ||
diff --git a/lisp/emacs-lisp/lisp-mode.el b/lisp/emacs-lisp/lisp-mode.el index 9914ededb85..1990630608d 100644 --- a/lisp/emacs-lisp/lisp-mode.el +++ b/lisp/emacs-lisp/lisp-mode.el | |||
| @@ -1453,7 +1453,7 @@ and initial semicolons." | |||
| 1453 | ;; are buffer-local, but we avoid changing them so that they can be set | 1453 | ;; are buffer-local, but we avoid changing them so that they can be set |
| 1454 | ;; to make `forward-paragraph' and friends do something the user wants. | 1454 | ;; to make `forward-paragraph' and friends do something the user wants. |
| 1455 | ;; | 1455 | ;; |
| 1456 | ;; `paragraph-start': The `(' in the character alternative and the | 1456 | ;; `paragraph-start': The `(' in the bracket expression and the |
| 1457 | ;; left-singlequote plus `(' sequence after the \\| alternative prevent | 1457 | ;; left-singlequote plus `(' sequence after the \\| alternative prevent |
| 1458 | ;; sexps and backquoted sexps that follow a docstring from being filled | 1458 | ;; sexps and backquoted sexps that follow a docstring from being filled |
| 1459 | ;; with the docstring. This setting has the consequence of inhibiting | 1459 | ;; with the docstring. This setting has the consequence of inhibiting |
diff --git a/lisp/gnus/gnus-sum.el b/lisp/gnus/gnus-sum.el index 4effaa981ec..a3be5577f7a 100644 --- a/lisp/gnus/gnus-sum.el +++ b/lisp/gnus/gnus-sum.el | |||
| @@ -9029,7 +9029,6 @@ is non-numeric or nil fetch the number specified by the | |||
| 9029 | (id (mail-header-id header)) | 9029 | (id (mail-header-id header)) |
| 9030 | (gnus-inhibit-demon t) | 9030 | (gnus-inhibit-demon t) |
| 9031 | (gnus-summary-ignore-duplicates t) | 9031 | (gnus-summary-ignore-duplicates t) |
| 9032 | (gnus-read-all-available-headers t) | ||
| 9033 | (gnus-refer-thread-use-search | 9032 | (gnus-refer-thread-use-search |
| 9034 | (if (or (null limit) (numberp limit)) | 9033 | (if (or (null limit) (numberp limit)) |
| 9035 | gnus-refer-thread-use-search | 9034 | gnus-refer-thread-use-search |
| @@ -9049,7 +9048,8 @@ is non-numeric or nil fetch the number specified by the | |||
| 9049 | (gnus-search-thread header)) | 9048 | (gnus-search-thread header)) |
| 9050 | ;; Otherwise just retrieve some headers. | 9049 | ;; Otherwise just retrieve some headers. |
| 9051 | (t | 9050 | (t |
| 9052 | (let* ((limit (if (numberp limit) | 9051 | (let* ((gnus-read-all-available-headers t) |
| 9052 | (limit (if (numberp limit) | ||
| 9053 | limit | 9053 | limit |
| 9054 | gnus-refer-thread-limit)) | 9054 | gnus-refer-thread-limit)) |
| 9055 | (last (if (numberp limit) | 9055 | (last (if (numberp limit) |
diff --git a/lisp/textmodes/picture.el b/lisp/textmodes/picture.el index 9aa9b72c513..f98c3963b6f 100644 --- a/lisp/textmodes/picture.el +++ b/lisp/textmodes/picture.el | |||
| @@ -383,7 +383,7 @@ Interactively, ARG is the numeric argument, and defaults to 1." | |||
| 383 | The syntax for this variable is like the syntax used inside of `[...]' | 383 | The syntax for this variable is like the syntax used inside of `[...]' |
| 384 | in a regular expression--but without the `[' and the `]'. | 384 | in a regular expression--but without the `[' and the `]'. |
| 385 | It is NOT a regular expression, and should follow the usual | 385 | It is NOT a regular expression, and should follow the usual |
| 386 | rules for the contents of a character alternative. | 386 | rules for the contents of a bracket expression. |
| 387 | It defines a set of \"interesting characters\" to look for when setting | 387 | It defines a set of \"interesting characters\" to look for when setting |
| 388 | \(or searching for) tab stops, initially \"!-~\" (all printing characters). | 388 | \(or searching for) tab stops, initially \"!-~\" (all printing characters). |
| 389 | For example, suppose that you are editing a table which is formatted thus: | 389 | For example, suppose that you are editing a table which is formatted thus: |
diff --git a/src/regex-emacs.c b/src/regex-emacs.c index e3237cd425a..fea34df991b 100644 --- a/src/regex-emacs.c +++ b/src/regex-emacs.c | |||
| @@ -2597,7 +2597,7 @@ regex_compile (re_char *pattern, ptrdiff_t size, | |||
| 2597 | 2597 | ||
| 2598 | /* If followed by a repetition operator. */ | 2598 | /* If followed by a repetition operator. */ |
| 2599 | || (p != pend | 2599 | || (p != pend |
| 2600 | && (*p == '*' || *p == '+' || *p == '?' || *p == '^')) | 2600 | && (*p == '*' || *p == '+' || *p == '?')) |
| 2601 | || (p + 1 < pend && p[0] == '\\' && p[1] == '{')) | 2601 | || (p + 1 < pend && p[0] == '\\' && p[1] == '{')) |
| 2602 | { | 2602 | { |
| 2603 | /* Start building a new exactn. */ | 2603 | /* Start building a new exactn. */ |
diff --git a/test/lisp/eshell/esh-util-tests.el b/test/lisp/eshell/esh-util-tests.el index 52b42fe915c..8585677e14e 100644 --- a/test/lisp/eshell/esh-util-tests.el +++ b/test/lisp/eshell/esh-util-tests.el | |||
| @@ -52,7 +52,7 @@ | |||
| 52 | ;; no leading/trailing whitespace. | 52 | ;; no leading/trailing whitespace. |
| 53 | (should (equal (eshell-stringify '(1 2 3)) "(1 2 3)")) | 53 | (should (equal (eshell-stringify '(1 2 3)) "(1 2 3)")) |
| 54 | (should (equal (replace-regexp-in-string | 54 | (should (equal (replace-regexp-in-string |
| 55 | (rx (+ (or space "\n"))) " " | 55 | (rx (+ (any space "\n"))) " " |
| 56 | (eshell-stringify '((1 2) (3 . 4)))) | 56 | (eshell-stringify '((1 2) (3 . 4)))) |
| 57 | "((1 2) (3 . 4))"))) | 57 | "((1 2) (3 . 4))"))) |
| 58 | 58 | ||
diff --git a/test/lisp/progmodes/eglot-tests.el b/test/lisp/progmodes/eglot-tests.el index 518f8810bdf..725b877fd3c 100644 --- a/test/lisp/progmodes/eglot-tests.el +++ b/test/lisp/progmodes/eglot-tests.el | |||
| @@ -1237,8 +1237,6 @@ GUESSED-MAJOR-MODES-SYM are bound to the useful return values of | |||
| 1237 | 1237 | ||
| 1238 | (defvar tramp-histfile-override) | 1238 | (defvar tramp-histfile-override) |
| 1239 | (defun eglot--call-with-tramp-test (fn) | 1239 | (defun eglot--call-with-tramp-test (fn) |
| 1240 | (unless (>= emacs-major-version 27) | ||
| 1241 | (ert-skip "Eglot Tramp support only on Emacs >= 27")) | ||
| 1242 | ;; Set up a Tramp method that’s just a shell so the remote host is | 1240 | ;; Set up a Tramp method that’s just a shell so the remote host is |
| 1243 | ;; really just the local host. | 1241 | ;; really just the local host. |
| 1244 | (let* ((tramp-remote-path (cons 'tramp-own-remote-path | 1242 | (let* ((tramp-remote-path (cons 'tramp-own-remote-path |
| @@ -1260,6 +1258,9 @@ GUESSED-MAJOR-MODES-SYM are bound to the useful return values of | |||
| 1260 | (when (and noninteractive (not (file-directory-p "~/"))) | 1258 | (when (and noninteractive (not (file-directory-p "~/"))) |
| 1261 | (setenv "HOME" temporary-file-directory))))) | 1259 | (setenv "HOME" temporary-file-directory))))) |
| 1262 | (default-directory temporary-file-directory)) | 1260 | (default-directory temporary-file-directory)) |
| 1261 | ;; We must check the remote LSP server. So far, just "clangd" is used. | ||
| 1262 | (unless (ignore-errors (executable-find "clangd" 'remote)) | ||
| 1263 | (ert-skip "Remote clangd not found")) | ||
| 1263 | (funcall fn))) | 1264 | (funcall fn))) |
| 1264 | 1265 | ||
| 1265 | (ert-deftest eglot-test-tramp-test () | 1266 | (ert-deftest eglot-test-tramp-test () |