aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPo Lu2023-06-20 09:18:27 +0800
committerPo Lu2023-06-20 09:18:27 +0800
commit8f3fee7dffadfce25c8f47fb71674f77417b42e5 (patch)
tree69bc7643e0c6bd2ee13247e10835b8a3da36e42b
parentd067b2fca0005f3032d82f80f5c4d88750395dfc (diff)
parent1e13610b75718e7904f8af181fb73571639e1211 (diff)
downloademacs-8f3fee7dffadfce25c8f47fb71674f77417b42e5.tar.gz
emacs-8f3fee7dffadfce25c8f47fb71674f77417b42e5.zip
Merge remote-tracking branch 'origin/master' into feature/android
-rw-r--r--doc/emacs/search.texi12
-rw-r--r--doc/lispref/searching.texi201
-rw-r--r--lisp/emacs-lisp/lisp-mode.el2
-rw-r--r--lisp/gnus/gnus-sum.el4
-rw-r--r--lisp/textmodes/picture.el2
-rw-r--r--src/regex-emacs.c2
-rw-r--r--test/lisp/eshell/esh-util-tests.el2
-rw-r--r--test/lisp/progmodes/eglot-tests.el5
8 files changed, 168 insertions, 62 deletions
diff --git a/doc/emacs/search.texi b/doc/emacs/search.texi
index 45378d95f65..2a816221235 100644
--- a/doc/emacs/search.texi
+++ b/doc/emacs/search.texi
@@ -950,8 +950,8 @@ features used mainly in Lisp programs.
950@dfn{special constructs} and the rest are @dfn{ordinary}. An ordinary 950@dfn{special constructs} and the rest are @dfn{ordinary}. An ordinary
951character matches that same character and nothing else. The special 951character matches that same character and nothing else. The special
952characters are @samp{$^.*+?[\}. The character @samp{]} is special if 952characters are @samp{$^.*+?[\}. The character @samp{]} is special if
953it ends a character alternative (see below). The character @samp{-} 953it ends a bracket expression (see below). The character @samp{-}
954is special inside a character alternative. Any other character 954is special inside a bracket expression. Any other character
955appearing in a regular expression is ordinary, unless a @samp{\} 955appearing in a regular expression is ordinary, unless a @samp{\}
956precedes it. (When you use regular expressions in a Lisp program, 956precedes it. (When you use regular expressions in a Lisp program,
957each @samp{\} must be doubled, see the example near the end of this 957each @samp{\} must be doubled, see the example near the end of this
@@ -1033,11 +1033,11 @@ you search for @samp{a.*?$} against the text @samp{abbab} followed by
1033a newline, it matches the whole string. Since it @emph{can} match 1033a newline, it matches the whole string. Since it @emph{can} match
1034starting at the first @samp{a}, it does. 1034starting at the first @samp{a}, it does.
1035 1035
1036@cindex bracket expression
1036@cindex set of alternative characters, in regular expressions 1037@cindex set of alternative characters, in regular expressions
1037@cindex character set, in regular expressions 1038@cindex character set, in regular expressions
1038@item @kbd{[ @dots{} ]} 1039@item @kbd{[ @dots{} ]}
1039is a @dfn{set of alternative characters}, or a @dfn{character set}, 1040is a @dfn{bracket expression}, which matches one of a set of characters.
1040beginning with @samp{[} and terminated by @samp{]}.
1041 1041
1042In the simplest case, the characters between the two brackets are what 1042In the simplest case, the characters between the two brackets are what
1043this set can match. Thus, @samp{[ad]} matches either one @samp{a} or 1043this set can match. Thus, @samp{[ad]} matches either one @samp{a} or
@@ -1057,7 +1057,7 @@ Greek letters.
1057@cindex character classes, in regular expressions 1057@cindex character classes, in regular expressions
1058You can also include certain special @dfn{character classes} in a 1058You can also include certain special @dfn{character classes} in a
1059character set. A @samp{[:} and balancing @samp{:]} enclose a 1059character set. A @samp{[:} and balancing @samp{:]} enclose a
1060character class inside a set of alternative characters. For instance, 1060character class inside a bracket expression. For instance,
1061@samp{[[:alnum:]]} matches any letter or digit. @xref{Char Classes,,, 1061@samp{[[:alnum:]]} matches any letter or digit. @xref{Char Classes,,,
1062elisp, The Emacs Lisp Reference Manual}, for a list of character 1062elisp, The Emacs Lisp Reference Manual}, for a list of character
1063classes. 1063classes.
@@ -1125,7 +1125,7 @@ no preceding expression on which the @samp{*} can act. It is poor practice
1125to depend on this behavior; it is better to quote the special character anyway, 1125to depend on this behavior; it is better to quote the special character anyway,
1126regardless of where it appears. 1126regardless of where it appears.
1127 1127
1128As a @samp{\} is not special inside a set of alternative characters, it can 1128As a @samp{\} is not special inside a bracket expression, it can
1129never remove the special meaning of @samp{-}, @samp{^} or @samp{]}. 1129never remove the special meaning of @samp{-}, @samp{^} or @samp{]}.
1130You should not quote these characters when they have no special 1130You should not quote these characters when they have no special
1131meaning. This would not clarify anything, since backslashes 1131meaning. This would not clarify anything, since backslashes
diff --git a/doc/lispref/searching.texi b/doc/lispref/searching.texi
index b8d9094b28d..28230cea643 100644
--- a/doc/lispref/searching.texi
+++ b/doc/lispref/searching.texi
@@ -18,11 +18,12 @@ portions of it.
18* Searching and Case:: Case-independent or case-significant searching. 18* Searching and Case:: Case-independent or case-significant searching.
19* Regular Expressions:: Describing classes of strings. 19* Regular Expressions:: Describing classes of strings.
20* Regexp Search:: Searching for a match for a regexp. 20* Regexp Search:: Searching for a match for a regexp.
21* POSIX Regexps:: Searching POSIX-style for the longest match. 21* Longest Match:: Searching for the longest match.
22* Match Data:: Finding out which part of the text matched, 22* Match Data:: Finding out which part of the text matched,
23 after a string or regexp search. 23 after a string or regexp search.
24* Search and Replace:: Commands that loop, searching and replacing. 24* Search and Replace:: Commands that loop, searching and replacing.
25* Standard Regexps:: Useful regexps for finding sentences, pages,... 25* Standard Regexps:: Useful regexps for finding sentences, pages,...
26* POSIX Regexps:: Emacs regexps vs POSIX regexps.
26@end menu 27@end menu
27 28
28 The @samp{skip-chars@dots{}} functions also perform a kind of searching. 29 The @samp{skip-chars@dots{}} functions also perform a kind of searching.
@@ -277,10 +278,10 @@ character is a simple regular expression that matches that character
277and nothing else. The special characters are @samp{.}, @samp{*}, 278and nothing else. The special characters are @samp{.}, @samp{*},
278@samp{+}, @samp{?}, @samp{[}, @samp{^}, @samp{$}, and @samp{\}; no new 279@samp{+}, @samp{?}, @samp{[}, @samp{^}, @samp{$}, and @samp{\}; no new
279special characters will be defined in the future. The character 280special characters will be defined in the future. The character
280@samp{]} is special if it ends a character alternative (see later). 281@samp{]} is special if it ends a bracket expression (see later).
281The character @samp{-} is special inside a character alternative. A 282The character @samp{-} is special inside a bracket expression. A
282@samp{[:} and balancing @samp{:]} enclose a character class inside a 283@samp{[:} and balancing @samp{:]} enclose a character class inside a
283character alternative. Any other character appearing in a regular 284bracket expression. Any other character appearing in a regular
284expression is ordinary, unless a @samp{\} precedes it. 285expression is ordinary, unless a @samp{\} precedes it.
285 286
286 For example, @samp{f} is not a special character, so it is ordinary, and 287 For example, @samp{f} is not a special character, so it is ordinary, and
@@ -373,19 +374,19 @@ expression @samp{c[ad]*?a}, applied to that same string, matches just
373permits the whole expression to match is @samp{d}.) 374permits the whole expression to match is @samp{d}.)
374 375
375@item @samp{[ @dots{} ]} 376@item @samp{[ @dots{} ]}
376@cindex character alternative (in regexp) 377@cindex bracket expression (in regexp)
377@cindex @samp{[} in regexp 378@cindex @samp{[} in regexp
378@cindex @samp{]} in regexp 379@cindex @samp{]} in regexp
379is a @dfn{character alternative}, which begins with @samp{[} and is 380is a @dfn{bracket expression}, which begins with @samp{[} and is
380terminated by @samp{]}. In the simplest case, the characters between 381terminated by @samp{]}. In the simplest case, the characters between
381the two brackets are what this character alternative can match. 382the two brackets are what this bracket expression can match.
382 383
383Thus, @samp{[ad]} matches either one @samp{a} or one @samp{d}, and 384Thus, @samp{[ad]} matches either one @samp{a} or one @samp{d}, and
384@samp{[ad]*} matches any string composed of just @samp{a}s and @samp{d}s 385@samp{[ad]*} matches any string composed of just @samp{a}s and @samp{d}s
385(including the empty string). It follows that @samp{c[ad]*r} 386(including the empty string). It follows that @samp{c[ad]*r}
386matches @samp{cr}, @samp{car}, @samp{cdr}, @samp{caddaar}, etc. 387matches @samp{cr}, @samp{car}, @samp{cdr}, @samp{caddaar}, etc.
387 388
388You can also include character ranges in a character alternative, by 389You can also include character ranges in a bracket expression, by
389writing the starting and ending characters with a @samp{-} between them. 390writing the starting and ending characters with a @samp{-} between them.
390Thus, @samp{[a-z]} matches any lower-case @acronym{ASCII} letter. 391Thus, @samp{[a-z]} matches any lower-case @acronym{ASCII} letter.
391Ranges may be intermixed freely with individual characters, as in 392Ranges may be intermixed freely with individual characters, as in
@@ -394,7 +395,7 @@ or @samp{$}, @samp{%} or period. However, the ending character of one
394range should not be the starting point of another one; for example, 395range should not be the starting point of another one; for example,
395@samp{[a-m-z]} should be avoided. 396@samp{[a-m-z]} should be avoided.
396 397
397A character alternative can also specify named character classes 398A bracket expression can also specify named character classes
398(@pxref{Char Classes}). For example, @samp{[[:ascii:]]} matches any 399(@pxref{Char Classes}). For example, @samp{[[:ascii:]]} matches any
399@acronym{ASCII} character. Using a character class is equivalent to 400@acronym{ASCII} character. Using a character class is equivalent to
400mentioning each of the characters in that class; but the latter is not 401mentioning each of the characters in that class; but the latter is not
@@ -403,9 +404,9 @@ different characters. A character class should not appear as the
403lower or upper bound of a range. 404lower or upper bound of a range.
404 405
405The usual regexp special characters are not special inside a 406The usual regexp special characters are not special inside a
406character alternative. A completely different set of characters is 407bracket expression. A completely different set of characters is
407special: @samp{]}, @samp{-} and @samp{^}. 408special: @samp{]}, @samp{-} and @samp{^}.
408To include @samp{]} in a character alternative, put it at the 409To include @samp{]} in a bracket expression, put it at the
409beginning. To include @samp{^}, put it anywhere but at the beginning. 410beginning. To include @samp{^}, put it anywhere but at the beginning.
410To include @samp{-}, put it at the end. Thus, @samp{[]^-]} matches 411To include @samp{-}, put it at the end. Thus, @samp{[]^-]} matches
411all three of these special characters. You cannot use @samp{\} to 412all three of these special characters. You cannot use @samp{\} to
@@ -443,7 +444,7 @@ characters and raw 8-bit bytes, but not non-ASCII characters. This
443feature is intended for searching text in unibyte buffers and strings. 444feature is intended for searching text in unibyte buffers and strings.
444@end enumerate 445@end enumerate
445 446
446Some kinds of character alternatives are not the best style even 447Some kinds of bracket expressions are not the best style even
447though they have a well-defined meaning in Emacs. They include: 448though they have a well-defined meaning in Emacs. They include:
448 449
449@enumerate 450@enumerate
@@ -457,7 +458,7 @@ Unicode character escapes can help here; for example, for most programmers
457@samp{[ก-ฺ฿-๛]} is less clear than @samp{[\u0E01-\u0E3A\u0E3F-\u0E5B]}. 458@samp{[ก-ฺ฿-๛]} is less clear than @samp{[\u0E01-\u0E3A\u0E3F-\u0E5B]}.
458 459
459@item 460@item
460Although a character alternative can include duplicates, it is better 461Although a bracket expression can include duplicates, it is better
461style to avoid them. For example, @samp{[XYa-yYb-zX]} is less clear 462style to avoid them. For example, @samp{[XYa-yYb-zX]} is less clear
462than @samp{[XYa-z]}. 463than @samp{[XYa-z]}.
463 464
@@ -468,30 +469,30 @@ is simpler to list the characters. For example,
468than @samp{[ij]}, and @samp{[i-k]} is less clear than @samp{[ijk]}. 469than @samp{[ij]}, and @samp{[i-k]} is less clear than @samp{[ijk]}.
469 470
470@item 471@item
471Although a @samp{-} can appear at the beginning of a character 472Although a @samp{-} can appear at the beginning of a bracket
472alternative or as the upper bound of a range, it is better style to 473expression or as the upper bound of a range, it is better style to
473put @samp{-} by itself at the end of a character alternative. For 474put @samp{-} by itself at the end of a bracket expression. For
474example, although @samp{[-a-z]} is valid, @samp{[a-z-]} is better 475example, although @samp{[-a-z]} is valid, @samp{[a-z-]} is better
475style; and although @samp{[*--]} is valid, @samp{[*+,-]} is clearer. 476style; and although @samp{[*--]} is valid, @samp{[*+,-]} is clearer.
476@end enumerate 477@end enumerate
477 478
478@item @samp{[^ @dots{} ]} 479@item @samp{[^ @dots{} ]}
479@cindex @samp{^} in regexp 480@cindex @samp{^} in regexp
480@samp{[^} begins a @dfn{complemented character alternative}. This 481@samp{[^} begins a @dfn{complemented bracket expression}. This
481matches any character except the ones specified. Thus, 482matches any character except the ones specified. Thus,
482@samp{[^a-z0-9A-Z]} matches all characters @emph{except} ASCII letters and 483@samp{[^a-z0-9A-Z]} matches all characters @emph{except} ASCII letters and
483digits. 484digits.
484 485
485@samp{^} is not special in a character alternative unless it is the first 486@samp{^} is not special in a bracket expression unless it is the first
486character. The character following the @samp{^} is treated as if it 487character. The character following the @samp{^} is treated as if it
487were first (in other words, @samp{-} and @samp{]} are not special there). 488were first (in other words, @samp{-} and @samp{]} are not special there).
488 489
489A complemented character alternative can match a newline, unless newline is 490A complemented bracket expression can match a newline, unless newline is
490mentioned as one of the characters not to match. This is in contrast to 491mentioned as one of the characters not to match. This is in contrast to
491the handling of regexps in programs such as @code{grep}. 492the handling of regexps in programs such as @code{grep}.
492 493
493You can specify named character classes, just like in character 494You can specify named character classes, just like in bracket
494alternatives. For instance, @samp{[^[:ascii:]]} matches any 495expressions. For instance, @samp{[^[:ascii:]]} matches any
495non-@acronym{ASCII} character. @xref{Char Classes}. 496non-@acronym{ASCII} character. @xref{Char Classes}.
496 497
497@item @samp{^} 498@item @samp{^}
@@ -505,9 +506,10 @@ beginning of a line.
505When matching a string instead of a buffer, @samp{^} matches at the 506When matching a string instead of a buffer, @samp{^} matches at the
506beginning of the string or after a newline character. 507beginning of the string or after a newline character.
507 508
508For historical compatibility reasons, @samp{^} can be used only at the 509For historical compatibility, @samp{^} is special only at the beginning
509beginning of the regular expression, or after @samp{\(}, @samp{\(?:} 510of the regular expression, or after @samp{\(}, @samp{\(?:} or @samp{\|}.
510or @samp{\|}. 511Although @samp{^} is an ordinary character in other contexts,
512it is good practice to use @samp{\^} even then.
511 513
512@item @samp{$} 514@item @samp{$}
513@cindex @samp{$} in regexp 515@cindex @samp{$} in regexp
@@ -519,8 +521,10 @@ matches a string of one @samp{x} or more at the end of a line.
519When matching a string instead of a buffer, @samp{$} matches at the end 521When matching a string instead of a buffer, @samp{$} matches at the end
520of the string or before a newline character. 522of the string or before a newline character.
521 523
522For historical compatibility reasons, @samp{$} can be used only at the 524For historical compatibility, @samp{$} is special only at the
523end of the regular expression, or before @samp{\)} or @samp{\|}. 525end of the regular expression, or before @samp{\)} or @samp{\|}.
526Although @samp{$} is an ordinary character in other contexts,
527it is good practice to use @samp{\$} even then.
524 528
525@item @samp{\} 529@item @samp{\}
526@cindex @samp{\} in regexp 530@cindex @samp{\} in regexp
@@ -540,14 +544,19 @@ example, the regular expression that matches the @samp{\} character is
540@samp{\} is @code{"\\\\"}. 544@samp{\} is @code{"\\\\"}.
541@end table 545@end table
542 546
543@strong{Please note:} For historical compatibility, special characters 547For historical compatibility, a repetition operator is treated as ordinary
544are treated as ordinary ones if they are in contexts where their special 548if it appears at the start of a regular expression
545meanings make no sense. For example, @samp{*foo} treats @samp{*} as 549or after @samp{^}, @samp{\(}, @samp{\(?:} or @samp{\|}.
546ordinary since there is no preceding expression on which the @samp{*} 550For example, @samp{*foo} is treated as @samp{\*foo}, and
547can act. It is poor practice to depend on this behavior; quote the 551@samp{two\|^\@{2\@}} is treated as @samp{two\|^@{2@}}.
548special character anyway, regardless of where it appears. 552It is poor practice to depend on this behavior; use proper backslash
549 553escaping anyway, regardless of where the repetition operator appears.
550As a @samp{\} is not special inside a character alternative, it can 554Also, a repetition operator should not immediately follow a backslash escape
555that matches only empty strings, as Emacs has bugs in this area.
556For example, it is unwise to use @samp{\b*}, which can be omitted
557without changing the documented meaning of the regular expression.
558
559As a @samp{\} is not special inside a bracket expression, it can
551never remove the special meaning of @samp{-}, @samp{^} or @samp{]}. 560never remove the special meaning of @samp{-}, @samp{^} or @samp{]}.
552You should not quote these characters when they have no special 561You should not quote these characters when they have no special
553meaning. This would not clarify anything, since backslashes 562meaning. This would not clarify anything, since backslashes
@@ -556,23 +565,23 @@ special meaning, as in @samp{[^\]} (@code{"[^\\]"} for Lisp string
556syntax), which matches any single character except a backslash. 565syntax), which matches any single character except a backslash.
557 566
558In practice, most @samp{]} that occur in regular expressions close a 567In practice, most @samp{]} that occur in regular expressions close a
559character alternative and hence are special. However, occasionally a 568bracket expression and hence are special. However, occasionally a
560regular expression may try to match a complex pattern of literal 569regular expression may try to match a complex pattern of literal
561@samp{[} and @samp{]}. In such situations, it sometimes may be 570@samp{[} and @samp{]}. In such situations, it sometimes may be
562necessary to carefully parse the regexp from the start to determine 571necessary to carefully parse the regexp from the start to determine
563which square brackets enclose a character alternative. For example, 572which square brackets enclose a bracket expression. For example,
564@samp{[^][]]} consists of the complemented character alternative 573@samp{[^][]]} consists of the complemented bracket expression
565@samp{[^][]} (which matches any single character that is not a square 574@samp{[^][]} (which matches any single character that is not a square
566bracket), followed by a literal @samp{]}. 575bracket), followed by a literal @samp{]}.
567 576
568The exact rules are that at the beginning of a regexp, @samp{[} is 577The exact rules are that at the beginning of a regexp, @samp{[} is
569special and @samp{]} not. This lasts until the first unquoted 578special and @samp{]} not. This lasts until the first unquoted
570@samp{[}, after which we are in a character alternative; @samp{[} is 579@samp{[}, after which we are in a bracket expression; @samp{[} is
571no longer special (except when it starts a character class) but @samp{]} 580no longer special (except when it starts a character class) but @samp{]}
572is special, unless it immediately follows the special @samp{[} or that 581is special, unless it immediately follows the special @samp{[} or that
573@samp{[} followed by a @samp{^}. This lasts until the next special 582@samp{[} followed by a @samp{^}. This lasts until the next special
574@samp{]} that does not end a character class. This ends the character 583@samp{]} that does not end a character class. This ends the bracket
575alternative and restores the ordinary syntax of regular expressions; 584expression and restores the ordinary syntax of regular expressions;
576an unquoted @samp{[} is special again and a @samp{]} not. 585an unquoted @samp{[} is special again and a @samp{]} not.
577 586
578@node Char Classes 587@node Char Classes
@@ -583,8 +592,8 @@ an unquoted @samp{[} is special again and a @samp{]} not.
583@cindex alpha character class, regexp 592@cindex alpha character class, regexp
584@cindex xdigit character class, regexp 593@cindex xdigit character class, regexp
585 594
586 Below is a table of the classes you can use in a character 595 Below is a table of the classes you can use in a bracket
587alternative, and what they mean. Note that the @samp{[} and @samp{]} 596expression, and what they mean. Note that the @samp{[} and @samp{]}
588characters that enclose the class name are part of the name, so a 597characters that enclose the class name are part of the name, so a
589regular expression using these classes needs one more pair of 598regular expression using these classes needs one more pair of
590brackets. For example, a regular expression matching a sequence of 599brackets. For example, a regular expression matching a sequence of
@@ -911,7 +920,7 @@ with a symbol-constituent character.
911 920
912@kindex invalid-regexp 921@kindex invalid-regexp
913 Not every string is a valid regular expression. For example, a string 922 Not every string is a valid regular expression. For example, a string
914that ends inside a character alternative without a terminating @samp{]} 923that ends inside a bracket expression without a terminating @samp{]}
915is invalid, and so is a string that ends with a single @samp{\}. If 924is invalid, and so is a string that ends with a single @samp{\}. If
916an invalid regular expression is passed to any of the search functions, 925an invalid regular expression is passed to any of the search functions,
917an @code{invalid-regexp} error is signaled. 926an @code{invalid-regexp} error is signaled.
@@ -948,7 +957,7 @@ deciphered as follows:
948 957
949@table @code 958@table @code
950@item [.?!] 959@item [.?!]
951The first part of the pattern is a character alternative that matches 960The first part of the pattern is a bracket expression that matches
952any one of three characters: period, question mark, and exclamation 961any one of three characters: period, question mark, and exclamation
953mark. The match must begin with one of these three characters. (This 962mark. The match must begin with one of these three characters. (This
954is one point where the new default regexp used by Emacs differs from 963is one point where the new default regexp used by Emacs differs from
@@ -960,7 +969,7 @@ The second part of the pattern matches any closing braces and quotation
960marks, zero or more of them, that may follow the period, question mark 969marks, zero or more of them, that may follow the period, question mark
961or exclamation mark. The @code{\"} is Lisp syntax for a double-quote in 970or exclamation mark. The @code{\"} is Lisp syntax for a double-quote in
962a string. The @samp{*} at the end indicates that the immediately 971a string. The @samp{*} at the end indicates that the immediately
963preceding regular expression (a character alternative, in this case) may be 972preceding regular expression (a bracket expression, in this case) may be
964repeated zero or more times. 973repeated zero or more times.
965 974
966@item \\($\\|@ $\\|\t\\|@ @ \\) 975@item \\($\\|@ $\\|\t\\|@ @ \\)
@@ -1911,7 +1920,7 @@ attempts. Other zero-width assertions may also bring benefits by
1911causing a match to fail early. 1920causing a match to fail early.
1912 1921
1913@item 1922@item
1914Avoid or-patterns in favor of character alternatives: write 1923Avoid or-patterns in favor of bracket expressions: write
1915@samp{[ab]} instead of @samp{a\|b}. Recall that @samp{\s-} and @samp{\sw} 1924@samp{[ab]} instead of @samp{a\|b}. Recall that @samp{\s-} and @samp{\sw}
1916are equivalent to @samp{[[:space:]]} and @samp{[[:word:]]}, respectively. 1925are equivalent to @samp{[[:space:]]} and @samp{[[:word:]]}, respectively.
1917 1926
@@ -2193,8 +2202,8 @@ constructs, you should bind it temporarily for as small as possible
2193a part of the code. 2202a part of the code.
2194@end defvar 2203@end defvar
2195 2204
2196@node POSIX Regexps 2205@node Longest Match
2197@section POSIX Regular Expression Searching 2206@section Longest-match searching for regular expression matches
2198 2207
2199@cindex backtracking and POSIX regular expressions 2208@cindex backtracking and POSIX regular expressions
2200 The usual regular expression functions do backtracking when necessary 2209 The usual regular expression functions do backtracking when necessary
@@ -2209,7 +2218,9 @@ possibilities and found all matches, so they can report the longest
2209match, as required by POSIX@. This is much slower, so use these 2218match, as required by POSIX@. This is much slower, so use these
2210functions only when you really need the longest match. 2219functions only when you really need the longest match.
2211 2220
2212 The POSIX search and match functions do not properly support the 2221 Despite their names, the POSIX search and match functions
2222use Emacs regular expressions, not POSIX regular expressions.
2223@xref{POSIX Regexps}. Also, they do not properly support the
2213non-greedy repetition operators (@pxref{Regexp Special, non-greedy}). 2224non-greedy repetition operators (@pxref{Regexp Special, non-greedy}).
2214This is because POSIX backtracking conflicts with the semantics of 2225This is because POSIX backtracking conflicts with the semantics of
2215non-greedy repetition. 2226non-greedy repetition.
@@ -2957,3 +2968,97 @@ values of the variables @code{sentence-end-double-space}
2957@code{sentence-end-without-period}, and 2968@code{sentence-end-without-period}, and
2958@code{sentence-end-without-space}. 2969@code{sentence-end-without-space}.
2959@end defun 2970@end defun
2971
2972@node POSIX Regexps
2973@section Emacs versus POSIX Regular Expressions
2974@cindex POSIX regular expressions
2975
2976Regular expression syntax varies signficantly among computer programs.
2977When writing Elisp code that generates regular expressions for use by other
2978programs, it is helpful to know how syntax variants differ.
2979To give a feel for the variation, this section discusses how
2980Emacs regular expressions differ from two syntax variants standarded by POSIX:
2981basic regular expressions (BREs) and extended regular expressions (EREs).
2982Plain @command{grep} uses BREs, and @samp{grep -E} uses EREs.
2983
2984Emacs regular expressions have a syntax closer to EREs than to BREs,
2985with some extensions. Here is a summary of how POSIX BREs and EREs
2986differ from Emacs regular expressions.
2987
2988@itemize @bullet
2989@item
2990In POSIX BREs @samp{+} and @samp{?} are not special.
2991The only backslash escape sequences are @samp{\(@dots{}\)},
2992@samp{\@{@dots{}\@}}, @samp{\1} through @samp{\9}, along with the
2993escaped special characters @samp{\$}, @samp{\*}, @samp{\.}, @samp{\[},
2994@samp{\\}, and @samp{\^}.
2995Therefore @samp{\(?:} acts like @samp{\([?]:}.
2996POSIX does not define how other BRE escapes behave;
2997for example, GNU @command{grep} treats @samp{\|} like Emacs does,
2998but does not support all the Emacs escapes.
2999
3000@item
3001In POSIX EREs @samp{@{}, @samp{(} and @samp{|} are special,
3002and @samp{)} is special when matched with a preceding @samp{(}.
3003These special characters do not use preceding backslashes;
3004@samp{(?} produces undefined results.
3005The only backslash escape sequences are the escaped special characters
3006@samp{\$}, @samp{\(}, @samp{\)}, @samp{\*}, @samp{\+}, @samp{\.},
3007@samp{\?}, @samp{\[}, @samp{\\}, @samp{\^}, @samp{\@{} and @samp{\|}.
3008POSIX does not define how other ERE escapes behave;
3009for example, GNU @samp{grep -E} treats @samp{\1} like Emacs does,
3010but does not support all the Emacs escapes.
3011
3012@item
3013In POSIX BREs, it is an implementation option whether @samp{^} is special
3014after @samp{\(}; GNU @command{grep} treats it like Emacs does.
3015In POSIX EREs, @samp{^} is always special outside of bracket expressions,
3016which means the ERE @samp{x^} never matches.
3017In Emacs regular expressions, @samp{^} is special only at the
3018beginning of the regular expression, or after @samp{\(}, @samp{\(?:}
3019or @samp{\|}.
3020
3021@item
3022In POSIX BREs, it is an implementation option whether @samp{$} is special
3023before @samp{\)}; GNU @command{grep} treats it like Emacs does.
3024In POSIX EREs, @samp{$} is always special outside of bracket expressions,
3025which means the ERE @samp{$x} never matches.
3026In Emacs regular expressions, @samp{$} is special only at the
3027end of the regular expression, or before @samp{\)} or @samp{\|}.
3028
3029@item
3030In POSIX BREs and EREs, undefined results are produced by repetition
3031operators at the start of a regular expression or subexpression
3032(possibly preceded by @samp{^}), except that the repetition operator
3033@samp{*} has the same behavior in BREs as in Emacs.
3034In Emacs, these operators are treated as ordinary.
3035
3036@item
3037In BREs and EREs, undefined results are produced by two repetition
3038operators in sequence. In Emacs, these have well-defined behavior,
3039e.g., @samp{a**} is equivalent to @samp{a*}.
3040
3041@item
3042In BREs and EREs, undefined results are produced by empty regular
3043expressions or subexpressions. In Emacs these have well-defined
3044behavior, e.g., @samp{\(\)*} matches the empty string,
3045
3046@item
3047In BREs and EREs, undefined results are produced for the named
3048character classes @samp{[:ascii:]}, @samp{[:multibyte:]},
3049@samp{[:nonascii:]}, @samp{[:unibyte:]}, and @samp{[:word:]}.
3050
3051@item
3052BREs and EREs can contain collating symbols and equivalence
3053class expressions within bracket expressions, e.g., @samp{[[.ch.]d[=a=]]}.
3054Emacs regular expressions do not support this.
3055
3056@item
3057BREs, EREs, and the strings they match cannot contain encoding errors
3058or NUL bytes. In Emacs these constructs simply match themselves.
3059
3060@item
3061BRE and ERE searching always finds the longest match.
3062Emacs searching by default does not necessarily do so.
3063@xref{Longest Match}.
3064@end itemize
diff --git a/lisp/emacs-lisp/lisp-mode.el b/lisp/emacs-lisp/lisp-mode.el
index 9914ededb85..1990630608d 100644
--- a/lisp/emacs-lisp/lisp-mode.el
+++ b/lisp/emacs-lisp/lisp-mode.el
@@ -1453,7 +1453,7 @@ and initial semicolons."
1453 ;; are buffer-local, but we avoid changing them so that they can be set 1453 ;; are buffer-local, but we avoid changing them so that they can be set
1454 ;; to make `forward-paragraph' and friends do something the user wants. 1454 ;; to make `forward-paragraph' and friends do something the user wants.
1455 ;; 1455 ;;
1456 ;; `paragraph-start': The `(' in the character alternative and the 1456 ;; `paragraph-start': The `(' in the bracket expression and the
1457 ;; left-singlequote plus `(' sequence after the \\| alternative prevent 1457 ;; left-singlequote plus `(' sequence after the \\| alternative prevent
1458 ;; sexps and backquoted sexps that follow a docstring from being filled 1458 ;; sexps and backquoted sexps that follow a docstring from being filled
1459 ;; with the docstring. This setting has the consequence of inhibiting 1459 ;; with the docstring. This setting has the consequence of inhibiting
diff --git a/lisp/gnus/gnus-sum.el b/lisp/gnus/gnus-sum.el
index 4effaa981ec..a3be5577f7a 100644
--- a/lisp/gnus/gnus-sum.el
+++ b/lisp/gnus/gnus-sum.el
@@ -9029,7 +9029,6 @@ is non-numeric or nil fetch the number specified by the
9029 (id (mail-header-id header)) 9029 (id (mail-header-id header))
9030 (gnus-inhibit-demon t) 9030 (gnus-inhibit-demon t)
9031 (gnus-summary-ignore-duplicates t) 9031 (gnus-summary-ignore-duplicates t)
9032 (gnus-read-all-available-headers t)
9033 (gnus-refer-thread-use-search 9032 (gnus-refer-thread-use-search
9034 (if (or (null limit) (numberp limit)) 9033 (if (or (null limit) (numberp limit))
9035 gnus-refer-thread-use-search 9034 gnus-refer-thread-use-search
@@ -9049,7 +9048,8 @@ is non-numeric or nil fetch the number specified by the
9049 (gnus-search-thread header)) 9048 (gnus-search-thread header))
9050 ;; Otherwise just retrieve some headers. 9049 ;; Otherwise just retrieve some headers.
9051 (t 9050 (t
9052 (let* ((limit (if (numberp limit) 9051 (let* ((gnus-read-all-available-headers t)
9052 (limit (if (numberp limit)
9053 limit 9053 limit
9054 gnus-refer-thread-limit)) 9054 gnus-refer-thread-limit))
9055 (last (if (numberp limit) 9055 (last (if (numberp limit)
diff --git a/lisp/textmodes/picture.el b/lisp/textmodes/picture.el
index 9aa9b72c513..f98c3963b6f 100644
--- a/lisp/textmodes/picture.el
+++ b/lisp/textmodes/picture.el
@@ -383,7 +383,7 @@ Interactively, ARG is the numeric argument, and defaults to 1."
383The syntax for this variable is like the syntax used inside of `[...]' 383The syntax for this variable is like the syntax used inside of `[...]'
384in a regular expression--but without the `[' and the `]'. 384in a regular expression--but without the `[' and the `]'.
385It is NOT a regular expression, and should follow the usual 385It is NOT a regular expression, and should follow the usual
386rules for the contents of a character alternative. 386rules for the contents of a bracket expression.
387It defines a set of \"interesting characters\" to look for when setting 387It defines a set of \"interesting characters\" to look for when setting
388\(or searching for) tab stops, initially \"!-~\" (all printing characters). 388\(or searching for) tab stops, initially \"!-~\" (all printing characters).
389For example, suppose that you are editing a table which is formatted thus: 389For example, suppose that you are editing a table which is formatted thus:
diff --git a/src/regex-emacs.c b/src/regex-emacs.c
index e3237cd425a..fea34df991b 100644
--- a/src/regex-emacs.c
+++ b/src/regex-emacs.c
@@ -2597,7 +2597,7 @@ regex_compile (re_char *pattern, ptrdiff_t size,
2597 2597
2598 /* If followed by a repetition operator. */ 2598 /* If followed by a repetition operator. */
2599 || (p != pend 2599 || (p != pend
2600 && (*p == '*' || *p == '+' || *p == '?' || *p == '^')) 2600 && (*p == '*' || *p == '+' || *p == '?'))
2601 || (p + 1 < pend && p[0] == '\\' && p[1] == '{')) 2601 || (p + 1 < pend && p[0] == '\\' && p[1] == '{'))
2602 { 2602 {
2603 /* Start building a new exactn. */ 2603 /* Start building a new exactn. */
diff --git a/test/lisp/eshell/esh-util-tests.el b/test/lisp/eshell/esh-util-tests.el
index 52b42fe915c..8585677e14e 100644
--- a/test/lisp/eshell/esh-util-tests.el
+++ b/test/lisp/eshell/esh-util-tests.el
@@ -52,7 +52,7 @@
52 ;; no leading/trailing whitespace. 52 ;; no leading/trailing whitespace.
53 (should (equal (eshell-stringify '(1 2 3)) "(1 2 3)")) 53 (should (equal (eshell-stringify '(1 2 3)) "(1 2 3)"))
54 (should (equal (replace-regexp-in-string 54 (should (equal (replace-regexp-in-string
55 (rx (+ (or space "\n"))) " " 55 (rx (+ (any space "\n"))) " "
56 (eshell-stringify '((1 2) (3 . 4)))) 56 (eshell-stringify '((1 2) (3 . 4))))
57 "((1 2) (3 . 4))"))) 57 "((1 2) (3 . 4))")))
58 58
diff --git a/test/lisp/progmodes/eglot-tests.el b/test/lisp/progmodes/eglot-tests.el
index 518f8810bdf..725b877fd3c 100644
--- a/test/lisp/progmodes/eglot-tests.el
+++ b/test/lisp/progmodes/eglot-tests.el
@@ -1237,8 +1237,6 @@ GUESSED-MAJOR-MODES-SYM are bound to the useful return values of
1237 1237
1238(defvar tramp-histfile-override) 1238(defvar tramp-histfile-override)
1239(defun eglot--call-with-tramp-test (fn) 1239(defun eglot--call-with-tramp-test (fn)
1240 (unless (>= emacs-major-version 27)
1241 (ert-skip "Eglot Tramp support only on Emacs >= 27"))
1242 ;; Set up a Tramp method that’s just a shell so the remote host is 1240 ;; Set up a Tramp method that’s just a shell so the remote host is
1243 ;; really just the local host. 1241 ;; really just the local host.
1244 (let* ((tramp-remote-path (cons 'tramp-own-remote-path 1242 (let* ((tramp-remote-path (cons 'tramp-own-remote-path
@@ -1260,6 +1258,9 @@ GUESSED-MAJOR-MODES-SYM are bound to the useful return values of
1260 (when (and noninteractive (not (file-directory-p "~/"))) 1258 (when (and noninteractive (not (file-directory-p "~/")))
1261 (setenv "HOME" temporary-file-directory))))) 1259 (setenv "HOME" temporary-file-directory)))))
1262 (default-directory temporary-file-directory)) 1260 (default-directory temporary-file-directory))
1261 ;; We must check the remote LSP server. So far, just "clangd" is used.
1262 (unless (ignore-errors (executable-find "clangd" 'remote))
1263 (ert-skip "Remote clangd not found"))
1263 (funcall fn))) 1264 (funcall fn)))
1264 1265
1265(ert-deftest eglot-test-tramp-test () 1266(ert-deftest eglot-test-tramp-test ()