Shorter `rx' doc string (bug#36496)

* lisp/emacs-lisp/rx.el (rx): Replace long description with a condensed summary of the rx syntax, with reference to the manual section.
author: Mattias Engdegård 2019-07-06 13:22:15 +0200
committer: Mattias Engdegård 2019-07-07 11:49:22 +0200
commit: 72e21777d0c3940465351fb86d9b7dbce20ace63 (patch)
tree: ab66c2f8b6047160a89ad4d30ef8cdc427d7c8c7
parent: ac1ad3e49abd57a3e39b817864ea379354119d08 (diff)
download: emacs-72e21777d0c3940465351fb86d9b7dbce20ace63.tar.gz
emacs-72e21777d0c3940465351fb86d9b7dbce20ace63.zip
1 files changed, 96 insertions, 321 deletions
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el
index 24dd6cbf1d6..249529e54e3 100644
--- a/lisp/emacs-lisp/rx.el
+++ b/lisp/emacs-lisp/rx.el
@@ -959,327 +959,102 @@ becomes just a more verbose version of STRING."
 ;;;###autoload
 (defmacro rx (&rest regexps)
  "Translate regular expressions REGEXPS in sexp form to a regexp string.
-REGEXPS is a non-empty sequence of forms of the sort listed below.
+Each argument is one of the forms below; RX is a subform, and RX... stands
+for one or more RXs.  For details, see Info node `(elisp) Rx Notation'.
-Note that `rx' is a Lisp macro; when used in a Lisp program being
+See `rx-to-string' for the corresponding function.
-compiled, the translation is performed by the compiler.  The
-`literal' and `regexp' forms accept subforms that will evaluate
+STRING         Match a literal string.
-to strings, in addition to constant strings.  If REGEXPS include
+CHAR           Match a literal character.
-such forms, then the result is an expression which returns a
-regexp string, rather than a regexp string directly.  See
+(seq RX...)    Match the RXs in sequence.  Alias: :, sequence, and.
-`rx-to-string' for performing translation completely at run time.
+(or RX...)     Match one of the RXs.  Alias: |.
-The following are valid subforms of regular expressions in sexp
+(zero-or-more RX...) Match RXs zero or more times.  Alias: 0+.
-notation.
+(one-or-more RX...)  Match RXs one or more times.  Alias: 1+.
+(zero-or-one RX...)  Match RXs or the empty string.  Alias: opt, optional.
-STRING
+(* RX...)       Match RXs zero or more times; greedy.
-     matches string STRING literally.
+(+ RX...)       Match RXs one or more times; greedy.
+(? RX...)       Match RXs or the empty string; greedy.
-CHAR
+(*? RX...)      Match RXs zero or more times; non-greedy.
-     matches character CHAR literally.
+(+? RX...)      Match RXs one or more times; non-greedy.
+(?? RX...)      Match RXs or the empty string; non-greedy.
-`not-newline', `nonl'
+(= N RX...)     Match RXs exactly N times.
-     matches any character except a newline.
+(>= N RX...)    Match RXs N or more times.
+(** N M RX...)  Match RXs N to M times.  Alias: repeat.
-`anything'
+(minimal-match RX)  Match RX, with zero-or-more, one-or-more, zero-or-one
-     matches any character
+                and aliases using non-greedy matching.
+(maximal-match RX)  Match RX, with zero-or-more, one-or-more, zero-or-one
-`(any SET ...)'
+                and aliases using greedy matching, which is the default.
-`(in SET ...)'
-`(char SET ...)'
+(any SET...)    Match a character from one of the SETs.  Each SET is a
-     matches any character in SET ....  SET may be a character or string.
+                character, a string, a range as string \"A-Z\" or cons
-     Ranges of characters can be specified as `A-Z' in strings.
+                (?A . ?Z), or a character class (see below).  Alias: in, char.
-     Ranges may also be specified as conses like `(?A . ?Z)'.
+(not CHARSPEC)  Match one character not matched by CHARSPEC.  CHARSPEC
-     Reversed ranges like `Z-A' and `(?Z . ?A)' are not permitted.
+                can be (any ...), (syntax ...), (category ...),
+                or a character class.
-     SET may also be the name of a character class: `digit',
+not-newline     Match any character except a newline.  Alias: nonl.
-     `control', `hex-digit', `blank', `graph', `print', `alnum',
+anything        Match any character.
-     `alpha', `ascii', `nonascii', `lower', `punct', `space', `upper',
-     `word', or one of their synonyms.
+CHARCLASS       Match a character from a character class.  One of:
+ alpha, alphabetic, letter   Alphabetic characters (defined by Unicode).
-`(not (any SET ...))'
+ alnum, alphanumeric         Alphabetic or decimal digit chars (Unicode).
-     matches any character not in SET ...
+ digit numeric, num          0-9.
+ xdigit, hex-digit, hex      0-9, A-F, a-f.
-`line-start', `bol'
+ cntrl, control              ASCII codes 0-31.
-     matches the empty string, but only at the beginning of a line
+ blank                       Horizontal whitespace (Unicode).
-     in the text being matched
+ space, whitespace, white    Chars with whitespace syntax.
+ lower, lower-case           Lower-case chars, from current case table.
-`line-end', `eol'
+ upper, upper-case           Upper-case chars, from current case table.
-     is similar to `line-start' but matches only at the end of a line
+ graph, graphic              Graphic characters (Unicode).
+ print, printing             Whitespace or graphic (Unicode).
-`string-start', `bos', `bot'
+ punct, punctuation          Not control, space, letter or digit (ASCII);
-     matches the empty string, but only at the beginning of the
+                              not word syntax (non-ASCII).
-     string being matched against.
+ word, wordchar              Characters with word syntax.
+ ascii                       ASCII characters (codes 0-127).
-`string-end', `eos', `eot'
+ nonascii                    Non-ASCII characters (but not raw bytes).
-     matches the empty string, but only at the end of the
-     string being matched against.
+(syntax SYNTAX)  Match a character with syntax SYNTAX, being one of:
+  whitespace, punctuation, word, symbol, open-parenthesis,
-`buffer-start'
+  close-parenthesis, expression-prefix, string-quote,
-     matches the empty string, but only at the beginning of the
+  paired-delimiter, escape, character-quote, comment-start,
-     buffer being matched against.  Actually equivalent to `string-start'.
+  comment-end, string-delimiter, comment-delimiter
-`buffer-end'
+(category CAT)   Match a character in category CAT, being one of:
-     matches the empty string, but only at the end of the
+  space-for-indent, base, consonant, base-vowel,
-     buffer being matched against.  Actually equivalent to `string-end'.
+  upper-diacritical-mark, lower-diacritical-mark, tone-mark, symbol,
+  digit, vowel-modifying-diacritical-mark, vowel-sign,
-`point'
+  semivowel-lower, not-at-end-of-line, not-at-beginning-of-line,
-     matches the empty string, but only at point.
+  alpha-numeric-two-byte, chinese-two-byte, greek-two-byte,
+  japanese-hiragana-two-byte, indian-two-byte,
-`word-start', `bow'
+  japanese-katakana-two-byte, strong-left-to-right,
-     matches the empty string, but only at the beginning of a word.
+  korean-hangul-two-byte, strong-right-to-left, cyrillic-two-byte,
+  combining-diacritic, ascii, arabic, chinese, ethiopic, greek,
-`word-end', `eow'
+  korean, indian, japanese, japanese-katakana, latin, lao,
-     matches the empty string, but only at the end of a word.
+  tibetan, japanese-roman, thai, vietnamese, hebrew, cyrillic,
+  can-break
-`word-boundary'
-     matches the empty string, but only at the beginning or end of a
+Zero-width assertions: these all match the empty string in specific places.
-     word.
+ line-start         At the beginning of a line.  Alias: bol.
+ line-end           At the end of a line.  Alias: eol.
-`(not word-boundary)'
+ string-start       At the start of the string or buffer.
-`not-word-boundary'
+                     Alias: buffer-start, bos, bot.
-     matches the empty string, but not at the beginning or end of a
+ string-end         At the end of the string or buffer.
-     word.
+                     Alias: buffer-end, eos, eot.
+ point              At point.
-`symbol-start'
+ word-start         At the beginning of a word.
-     matches the empty string, but only at the beginning of a symbol.
+ word-end           At the end of a word.
+ word-boundary      At the beginning or end of a word.
-`symbol-end'
+ not-word-boundary  Not at the beginning or end of a word.
-     matches the empty string, but only at the end of a symbol.
+ symbol-start       At the beginning of a symbol.
+ symbol-end         At the end of a symbol.
-`digit', `numeric', `num'
-     matches 0 through 9.
+(group RX...)  Match RXs and define a capture group.  Alias: submatch.
+(group-n N RX...) Match RXs and define capture group N.  Alias: submatch-n.
-`control', `cntrl'
+(backref N)    Match the text that capture group N matched.
-     matches any character whose code is in the range 0-31.
+(literal EXPR) Match the literal string from evaluating EXPR at run time.
-`hex-digit', `hex', `xdigit'
+(regexp EXPR)  Match the string regexp from evaluating EXPR at run time.
-     matches 0 through 9, a through f and A through F.
+(eval EXPR)    Match the rx sexp from evaluating EXPR at compile time."
-`blank'
-     matches horizontal whitespace, as defined by Annex C of the
-     Unicode Technical Standard #18.  In particular, it matches
-     spaces, tabs, and other characters whose Unicode
-     `general-category' property indicates they are spacing
-     separators.
-`graphic', `graph'
-     matches graphic characters--everything except whitespace, ASCII
-     and non-ASCII control characters, surrogates, and codepoints
-     unassigned by Unicode.
-`printing', `print'
-     matches whitespace and graphic characters.
-`alphanumeric', `alnum'
-     matches alphabetic characters and digits.  For multibyte characters,
-     it matches characters whose Unicode `general-category' property
-     indicates they are alphabetic or decimal number characters.
-`letter', `alphabetic', `alpha'
-     matches alphabetic characters.  For multibyte characters,
-     it matches characters whose Unicode `general-category' property
-     indicates they are alphabetic characters.
-`ascii'
-     matches ASCII (unibyte) characters.
-`nonascii'
-     matches non-ASCII (multibyte) characters.
-`lower', `lower-case'
-     matches anything lower-case, as determined by the current case
-     table.  If `case-fold-search' is non-nil, this also matches any
-     upper-case letter.
-`upper', `upper-case'
-     matches anything upper-case, as determined by the current case
-     table.  If `case-fold-search' is non-nil, this also matches any
-     lower-case letter.
-`punctuation', `punct'
-     matches punctuation.  (But at present, for multibyte characters,
-     it matches anything that has non-word syntax.)
-`space', `whitespace', `white'
-     matches anything that has whitespace syntax.
-`word', `wordchar'
-     matches anything that has word syntax.
-`not-wordchar'
-     matches anything that has non-word syntax.
-`(syntax SYNTAX)'
-     matches a character with syntax SYNTAX.  SYNTAX must be one
-     of the following symbols, or a symbol corresponding to the syntax
-     character, e.g. `\\.' for `\\s.'.
-     `whitespace'               (\\s- in string notation)
-     `punctuation'              (\\s.)
-     `word'                     (\\sw)
-     `symbol'                   (\\s_)
-     `open-parenthesis'         (\\s()
-     `close-parenthesis'        (\\s))
-     `expression-prefix'        (\\s')
-     `string-quote'             (\\s\")
-     `paired-delimiter'         (\\s$)
-     `escape'                   (\\s\\)
-     `character-quote'          (\\s/)
-     `comment-start'            (\\s<)
-     `comment-end'              (\\s>)
-     `string-delimiter'         (\\s|)
-     `comment-delimiter'        (\\s!)
-`(not (syntax SYNTAX))'
-     matches a character that doesn't have syntax SYNTAX.
-`(category CATEGORY)'
-     matches a character with category CATEGORY.  CATEGORY must be
-     either a character to use for C, or one of the following symbols.
-     `space-for-indent'                 (\\c\\s in string notation)
-     `base'                             (\\c.)
-     `consonant'                        (\\c0)
-     `base-vowel'                       (\\c1)
-     `upper-diacritical-mark'           (\\c2)
-     `lower-diacritical-mark'           (\\c3)
-     `tone-mark'                        (\\c4)
-     `symbol'                           (\\c5)
-     `digit'                            (\\c6)
-     `vowel-modifying-diacritical-mark' (\\c7)
-     `vowel-sign'                       (\\c8)
-     `semivowel-lower'                  (\\c9)
-     `not-at-end-of-line'               (\\c<)
-     `not-at-beginning-of-line'         (\\c>)
-     `alpha-numeric-two-byte'           (\\cA)
-     `chinese-two-byte'                 (\\cC)
-     `greek-two-byte'                   (\\cG)
-     `japanese-hiragana-two-byte'       (\\cH)
-     `indian-two-byte'                  (\\cI)
-     `japanese-katakana-two-byte'       (\\cK)
-     `strong-left-to-right'             (\\cL)
-     `korean-hangul-two-byte'           (\\cN)
-     `strong-right-to-left'             (\\cR)
-     `cyrillic-two-byte'                (\\cY)
-     `combining-diacritic'              (\\c^)
-     `ascii'                            (\\ca)
-     `arabic'                           (\\cb)
-     `chinese'                          (\\cc)
-     `ethiopic'                         (\\ce)
-     `greek'                            (\\cg)
-     `korean'                           (\\ch)
-     `indian'                           (\\ci)
-     `japanese'                         (\\cj)
-     `japanese-katakana'                (\\ck)
-     `latin'                            (\\cl)
-     `lao'                              (\\co)
-     `tibetan'                          (\\cq)
-     `japanese-roman'                   (\\cr)
-     `thai'                             (\\ct)
-     `vietnamese'                       (\\cv)
-     `hebrew'                           (\\cw)
-     `cyrillic'                         (\\cy)
-     `can-break'                        (\\c|)
-`(not (category CATEGORY))'
-     matches a character that doesn't have category CATEGORY.
-`(and SEXP1 SEXP2 ...)'
-`(: SEXP1 SEXP2 ...)'
-`(seq SEXP1 SEXP2 ...)'
-`(sequence SEXP1 SEXP2 ...)'
-     matches what SEXP1 matches, followed by what SEXP2 matches, etc.
-     Without arguments, matches the empty string.
-`(submatch SEXP1 SEXP2 ...)'
-`(group SEXP1 SEXP2 ...)'
-     like `and', but makes the match accessible with `match-end',
-     `match-beginning', and `match-string'.
-`(submatch-n N SEXP1 SEXP2 ...)'
-`(group-n N SEXP1 SEXP2 ...)'
-     like `group', but make it an explicitly-numbered group with
-     group number N.
-`(or SEXP1 SEXP2 ...)'
-`(| SEXP1 SEXP2 ...)'
-     matches anything that matches SEXP1 or SEXP2, etc.  If all
-     args are strings, use `regexp-opt' to optimize the resulting
-     regular expression.  Without arguments, never matches anything.
-`(minimal-match SEXP)'
-     produce a non-greedy regexp for SEXP.  Normally, regexps matching
-     zero or more occurrences of something are \"greedy\" in that they
-     match as much as they can, as long as the overall regexp can
-     still match.  A non-greedy regexp matches as little as possible.
-`(maximal-match SEXP)'
-     produce a greedy regexp for SEXP.  This is the default.
-Below, `SEXP ...' represents a sequence of regexp forms, treated as if
-enclosed in `(and ...)'.
-`(zero-or-more SEXP ...)'
-`(0+ SEXP ...)'
-     matches zero or more occurrences of what SEXP ... matches.
-`(* SEXP ...)'
-     like `zero-or-more', but always produces a greedy regexp, independent
-     of `rx-greedy-flag'.
-`(*? SEXP ...)'
-     like `zero-or-more', but always produces a non-greedy regexp,
-     independent of `rx-greedy-flag'.
-`(one-or-more SEXP ...)'
-`(1+ SEXP ...)'
-     matches one or more occurrences of SEXP ...
-`(+ SEXP ...)'
-     like `one-or-more', but always produces a greedy regexp.
-`(+? SEXP ...)'
-     like `one-or-more', but always produces a non-greedy regexp.
-`(zero-or-one SEXP ...)'
-`(optional SEXP ...)'
-`(opt SEXP ...)'
-     matches zero or one occurrences of A.
-`(? SEXP ...)'
-     like `zero-or-one', but always produces a greedy regexp.
-`(?? SEXP ...)'
-     like `zero-or-one', but always produces a non-greedy regexp.
-`(repeat N SEXP)'
-`(= N SEXP ...)'
-     matches N occurrences.
-`(>= N SEXP ...)'
-     matches N or more occurrences.
-`(repeat N M SEXP)'
-`(** N M SEXP ...)'
-     matches N to M occurrences.
-`(backref N)'
-     matches what was matched previously by submatch N.
-`(literal STRING-EXPR)'
-     matches STRING-EXPR literally, where STRING-EXPR is any lisp
-     expression that evaluates to a string.
-`(regexp REGEXP-EXPR)'
-     include REGEXP-EXPR in string notation in the result, where
-     REGEXP-EXPR is any lisp expression that evaluates to a
-     string containing a valid regexp.
-`(eval FORM)'
-     evaluate FORM and insert result.  If result is a string,
-     `regexp-quote' it.  Note that FORM is evaluated during
-     macroexpansion."
  (let* ((rx--compile-to-lisp t)
         (re (cond ((null regexps)
                    (error "No regexp"))
author	Mattias Engdegård	2019-07-06 13:22:15 +0200
committer	Mattias Engdegård	2019-07-07 11:49:22 +0200
commit	72e21777d0c3940465351fb86d9b7dbce20ace63 (patch)
tree	ab66c2f8b6047160a89ad4d30ef8cdc427d7c8c7
parent	ac1ad3e49abd57a3e39b817864ea379354119d08 (diff)
download	emacs-72e21777d0c3940465351fb86d9b7dbce20ace63.tar.gz emacs-72e21777d0c3940465351fb86d9b7dbce20ace63.zip

diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el index 24dd6cbf1d6..249529e54e3 100644 --- a/lisp/emacs-lisp/rx.el +++ b/lisp/emacs-lisp/rx.el
@@ -959,327 +959,102 @@ becomes just a more verbose version of STRING."
959	;;;###autoload	959	;;;###autoload
960	(defmacro rx (&rest regexps)	960	(defmacro rx (&rest regexps)
961	"Translate regular expressions REGEXPS in sexp form to a regexp string.	961	"Translate regular expressions REGEXPS in sexp form to a regexp string.
962	REGEXPS is a non-empty sequence of forms of the sort listed below.	962	Each argument is one of the forms below; RX is a subform, and RX... stands
963		963	for one or more RXs. For details, see Info node `(elisp) Rx Notation'.
964	Note that `rx' is a Lisp macro; when used in a Lisp program being	964	See `rx-to-string' for the corresponding function.
965	compiled, the translation is performed by the compiler. The	965
966	`literal' and `regexp' forms accept subforms that will evaluate	966	STRING Match a literal string.
967	to strings, in addition to constant strings. If REGEXPS include	967	CHAR Match a literal character.
968	such forms, then the result is an expression which returns a	968
969	regexp string, rather than a regexp string directly. See	969	(seq RX...) Match the RXs in sequence. Alias: :, sequence, and.
970	`rx-to-string' for performing translation completely at run time.	970	(or RX...) Match one of the RXs. Alias: \|.
971		971
972	The following are valid subforms of regular expressions in sexp	972	(zero-or-more RX...) Match RXs zero or more times. Alias: 0+.
973	notation.	973	(one-or-more RX...) Match RXs one or more times. Alias: 1+.
974		974	(zero-or-one RX...) Match RXs or the empty string. Alias: opt, optional.
975	STRING	975	(* RX...) Match RXs zero or more times; greedy.
976	matches string STRING literally.	976	(+ RX...) Match RXs one or more times; greedy.
977		977	(? RX...) Match RXs or the empty string; greedy.
978	CHAR	978	(*? RX...) Match RXs zero or more times; non-greedy.
979	matches character CHAR literally.	979	(+? RX...) Match RXs one or more times; non-greedy.
980		980	(?? RX...) Match RXs or the empty string; non-greedy.
981	`not-newline', `nonl'	981	(= N RX...) Match RXs exactly N times.
982	matches any character except a newline.	982	(>= N RX...) Match RXs N or more times.
983		983	(** N M RX...) Match RXs N to M times. Alias: repeat.
984	`anything'	984	(minimal-match RX) Match RX, with zero-or-more, one-or-more, zero-or-one
985	matches any character	985	and aliases using non-greedy matching.
986		986	(maximal-match RX) Match RX, with zero-or-more, one-or-more, zero-or-one
987	`(any SET ...)'	987	and aliases using greedy matching, which is the default.
988	`(in SET ...)'	988
989	`(char SET ...)'	989	(any SET...) Match a character from one of the SETs. Each SET is a
990	matches any character in SET .... SET may be a character or string.	990	character, a string, a range as string \"A-Z\" or cons
991	Ranges of characters can be specified as `A-Z' in strings.	991	(?A . ?Z), or a character class (see below). Alias: in, char.
992	Ranges may also be specified as conses like `(?A . ?Z)'.	992	(not CHARSPEC) Match one character not matched by CHARSPEC. CHARSPEC
993	Reversed ranges like `Z-A' and `(?Z . ?A)' are not permitted.	993	can be (any ...), (syntax ...), (category ...),
994		994	or a character class.
995	SET may also be the name of a character class: `digit',	995	not-newline Match any character except a newline. Alias: nonl.
996	`control', `hex-digit', `blank', `graph', `print', `alnum',	996	anything Match any character.
997	`alpha', `ascii', `nonascii', `lower', `punct', `space', `upper',	997
998	`word', or one of their synonyms.	998	CHARCLASS Match a character from a character class. One of:
999		999	alpha, alphabetic, letter Alphabetic characters (defined by Unicode).
1000	`(not (any SET ...))'	1000	alnum, alphanumeric Alphabetic or decimal digit chars (Unicode).
1001	matches any character not in SET ...	1001	digit numeric, num 0-9.
1002		1002	xdigit, hex-digit, hex 0-9, A-F, a-f.
1003	`line-start', `bol'	1003	cntrl, control ASCII codes 0-31.
1004	matches the empty string, but only at the beginning of a line	1004	blank Horizontal whitespace (Unicode).
1005	in the text being matched	1005	space, whitespace, white Chars with whitespace syntax.
1006		1006	lower, lower-case Lower-case chars, from current case table.
1007	`line-end', `eol'	1007	upper, upper-case Upper-case chars, from current case table.
1008	is similar to `line-start' but matches only at the end of a line	1008	graph, graphic Graphic characters (Unicode).
1009		1009	print, printing Whitespace or graphic (Unicode).
1010	`string-start', `bos', `bot'	1010	punct, punctuation Not control, space, letter or digit (ASCII);
1011	matches the empty string, but only at the beginning of the	1011	not word syntax (non-ASCII).
1012	string being matched against.	1012	word, wordchar Characters with word syntax.
1013		1013	ascii ASCII characters (codes 0-127).
1014	`string-end', `eos', `eot'	1014	nonascii Non-ASCII characters (but not raw bytes).
1015	matches the empty string, but only at the end of the	1015
1016	string being matched against.	1016	(syntax SYNTAX) Match a character with syntax SYNTAX, being one of:
1017		1017	whitespace, punctuation, word, symbol, open-parenthesis,
1018	`buffer-start'	1018	close-parenthesis, expression-prefix, string-quote,
1019	matches the empty string, but only at the beginning of the	1019	paired-delimiter, escape, character-quote, comment-start,
1020	buffer being matched against. Actually equivalent to `string-start'.	1020	comment-end, string-delimiter, comment-delimiter
1021		1021
1022	`buffer-end'	1022	(category CAT) Match a character in category CAT, being one of:
1023	matches the empty string, but only at the end of the	1023	space-for-indent, base, consonant, base-vowel,
1024	buffer being matched against. Actually equivalent to `string-end'.	1024	upper-diacritical-mark, lower-diacritical-mark, tone-mark, symbol,
1025		1025	digit, vowel-modifying-diacritical-mark, vowel-sign,
1026	`point'	1026	semivowel-lower, not-at-end-of-line, not-at-beginning-of-line,
1027	matches the empty string, but only at point.	1027	alpha-numeric-two-byte, chinese-two-byte, greek-two-byte,
1028		1028	japanese-hiragana-two-byte, indian-two-byte,
1029	`word-start', `bow'	1029	japanese-katakana-two-byte, strong-left-to-right,
1030	matches the empty string, but only at the beginning of a word.	1030	korean-hangul-two-byte, strong-right-to-left, cyrillic-two-byte,
1031		1031	combining-diacritic, ascii, arabic, chinese, ethiopic, greek,
1032	`word-end', `eow'	1032	korean, indian, japanese, japanese-katakana, latin, lao,
1033	matches the empty string, but only at the end of a word.	1033	tibetan, japanese-roman, thai, vietnamese, hebrew, cyrillic,
1034		1034	can-break
1035	`word-boundary'	1035
1036	matches the empty string, but only at the beginning or end of a	1036	Zero-width assertions: these all match the empty string in specific places.
1037	word.	1037	line-start At the beginning of a line. Alias: bol.
1038		1038	line-end At the end of a line. Alias: eol.
1039	`(not word-boundary)'	1039	string-start At the start of the string or buffer.
1040	`not-word-boundary'	1040	Alias: buffer-start, bos, bot.
1041	matches the empty string, but not at the beginning or end of a	1041	string-end At the end of the string or buffer.
1042	word.	1042	Alias: buffer-end, eos, eot.
1043		1043	point At point.
1044	`symbol-start'	1044	word-start At the beginning of a word.
1045	matches the empty string, but only at the beginning of a symbol.	1045	word-end At the end of a word.
1046		1046	word-boundary At the beginning or end of a word.
1047	`symbol-end'	1047	not-word-boundary Not at the beginning or end of a word.
1048	matches the empty string, but only at the end of a symbol.	1048	symbol-start At the beginning of a symbol.
1049		1049	symbol-end At the end of a symbol.
1050	`digit', `numeric', `num'	1050
1051	matches 0 through 9.	1051	(group RX...) Match RXs and define a capture group. Alias: submatch.
1052		1052	(group-n N RX...) Match RXs and define capture group N. Alias: submatch-n.
1053	`control', `cntrl'	1053	(backref N) Match the text that capture group N matched.
1054	matches any character whose code is in the range 0-31.	1054
1055		1055	(literal EXPR) Match the literal string from evaluating EXPR at run time.
1056	`hex-digit', `hex', `xdigit'	1056	(regexp EXPR) Match the string regexp from evaluating EXPR at run time.
1057	matches 0 through 9, a through f and A through F.	1057	(eval EXPR) Match the rx sexp from evaluating EXPR at compile time."
1058
1059	`blank'
1060	matches horizontal whitespace, as defined by Annex C of the
1061	Unicode Technical Standard #18. In particular, it matches
1062	spaces, tabs, and other characters whose Unicode
1063	`general-category' property indicates they are spacing
1064	separators.
1065
1066	`graphic', `graph'
1067	matches graphic characters--everything except whitespace, ASCII
1068	and non-ASCII control characters, surrogates, and codepoints
1069	unassigned by Unicode.
1070
1071	`printing', `print'
1072	matches whitespace and graphic characters.
1073
1074	`alphanumeric', `alnum'
1075	matches alphabetic characters and digits. For multibyte characters,
1076	it matches characters whose Unicode `general-category' property
1077	indicates they are alphabetic or decimal number characters.
1078
1079	`letter', `alphabetic', `alpha'
1080	matches alphabetic characters. For multibyte characters,
1081	it matches characters whose Unicode `general-category' property
1082	indicates they are alphabetic characters.
1083
1084	`ascii'
1085	matches ASCII (unibyte) characters.
1086
1087	`nonascii'
1088	matches non-ASCII (multibyte) characters.
1089
1090	`lower', `lower-case'
1091	matches anything lower-case, as determined by the current case
1092	table. If `case-fold-search' is non-nil, this also matches any
1093	upper-case letter.
1094
1095	`upper', `upper-case'
1096	matches anything upper-case, as determined by the current case
1097	table. If `case-fold-search' is non-nil, this also matches any
1098	lower-case letter.
1099
1100	`punctuation', `punct'
1101	matches punctuation. (But at present, for multibyte characters,
1102	it matches anything that has non-word syntax.)
1103
1104	`space', `whitespace', `white'
1105	matches anything that has whitespace syntax.
1106
1107	`word', `wordchar'
1108	matches anything that has word syntax.
1109
1110	`not-wordchar'
1111	matches anything that has non-word syntax.
1112
1113	`(syntax SYNTAX)'
1114	matches a character with syntax SYNTAX. SYNTAX must be one
1115	of the following symbols, or a symbol corresponding to the syntax
1116	character, e.g. `\\.' for `\\s.'.
1117
1118	`whitespace' (\\s- in string notation)
1119	`punctuation' (\\s.)
1120	`word' (\\sw)
1121	`symbol' (\\s_)
1122	`open-parenthesis' (\\s()
1123	`close-parenthesis' (\\s))
1124	`expression-prefix' (\\s')
1125	`string-quote' (\\s\")
1126	`paired-delimiter' (\\s$)
1127	`escape' (\\s\\)
1128	`character-quote' (\\s/)
1129	`comment-start' (\\s<)
1130	`comment-end' (\\s>)
1131	`string-delimiter' (\\s\|)
1132	`comment-delimiter' (\\s!)
1133
1134	`(not (syntax SYNTAX))'
1135	matches a character that doesn't have syntax SYNTAX.
1136
1137	`(category CATEGORY)'
1138	matches a character with category CATEGORY. CATEGORY must be
1139	either a character to use for C, or one of the following symbols.
1140
1141	`space-for-indent' (\\c\\s in string notation)
1142	`base' (\\c.)
1143	`consonant' (\\c0)
1144	`base-vowel' (\\c1)
1145	`upper-diacritical-mark' (\\c2)
1146	`lower-diacritical-mark' (\\c3)
1147	`tone-mark' (\\c4)
1148	`symbol' (\\c5)
1149	`digit' (\\c6)
1150	`vowel-modifying-diacritical-mark' (\\c7)
1151	`vowel-sign' (\\c8)
1152	`semivowel-lower' (\\c9)
1153	`not-at-end-of-line' (\\c<)
1154	`not-at-beginning-of-line' (\\c>)
1155	`alpha-numeric-two-byte' (\\cA)
1156	`chinese-two-byte' (\\cC)
1157	`greek-two-byte' (\\cG)
1158	`japanese-hiragana-two-byte' (\\cH)
1159	`indian-two-byte' (\\cI)
1160	`japanese-katakana-two-byte' (\\cK)
1161	`strong-left-to-right' (\\cL)
1162	`korean-hangul-two-byte' (\\cN)
1163	`strong-right-to-left' (\\cR)
1164	`cyrillic-two-byte' (\\cY)
1165	`combining-diacritic' (\\c^)
1166	`ascii' (\\ca)
1167	`arabic' (\\cb)
1168	`chinese' (\\cc)
1169	`ethiopic' (\\ce)
1170	`greek' (\\cg)
1171	`korean' (\\ch)
1172	`indian' (\\ci)
1173	`japanese' (\\cj)
1174	`japanese-katakana' (\\ck)
1175	`latin' (\\cl)
1176	`lao' (\\co)
1177	`tibetan' (\\cq)
1178	`japanese-roman' (\\cr)
1179	`thai' (\\ct)
1180	`vietnamese' (\\cv)
1181	`hebrew' (\\cw)
1182	`cyrillic' (\\cy)
1183	`can-break' (\\c\|)
1184
1185	`(not (category CATEGORY))'
1186	matches a character that doesn't have category CATEGORY.
1187
1188	`(and SEXP1 SEXP2 ...)'
1189	`(: SEXP1 SEXP2 ...)'
1190	`(seq SEXP1 SEXP2 ...)'
1191	`(sequence SEXP1 SEXP2 ...)'
1192	matches what SEXP1 matches, followed by what SEXP2 matches, etc.
1193	Without arguments, matches the empty string.
1194
1195	`(submatch SEXP1 SEXP2 ...)'
1196	`(group SEXP1 SEXP2 ...)'
1197	like `and', but makes the match accessible with `match-end',
1198	`match-beginning', and `match-string'.
1199
1200	`(submatch-n N SEXP1 SEXP2 ...)'
1201	`(group-n N SEXP1 SEXP2 ...)'
1202	like `group', but make it an explicitly-numbered group with
1203	group number N.
1204
1205	`(or SEXP1 SEXP2 ...)'
1206	`(\| SEXP1 SEXP2 ...)'
1207	matches anything that matches SEXP1 or SEXP2, etc. If all
1208	args are strings, use `regexp-opt' to optimize the resulting
1209	regular expression. Without arguments, never matches anything.
1210
1211	`(minimal-match SEXP)'
1212	produce a non-greedy regexp for SEXP. Normally, regexps matching
1213	zero or more occurrences of something are \"greedy\" in that they
1214	match as much as they can, as long as the overall regexp can
1215	still match. A non-greedy regexp matches as little as possible.
1216
1217	`(maximal-match SEXP)'
1218	produce a greedy regexp for SEXP. This is the default.
1219
1220	Below, `SEXP ...' represents a sequence of regexp forms, treated as if
1221	enclosed in `(and ...)'.
1222
1223	`(zero-or-more SEXP ...)'
1224	`(0+ SEXP ...)'
1225	matches zero or more occurrences of what SEXP ... matches.
1226
1227	`(* SEXP ...)'
1228	like `zero-or-more', but always produces a greedy regexp, independent
1229	of `rx-greedy-flag'.
1230
1231	`(*? SEXP ...)'
1232	like `zero-or-more', but always produces a non-greedy regexp,
1233	independent of `rx-greedy-flag'.
1234
1235	`(one-or-more SEXP ...)'
1236	`(1+ SEXP ...)'
1237	matches one or more occurrences of SEXP ...
1238
1239	`(+ SEXP ...)'
1240	like `one-or-more', but always produces a greedy regexp.
1241
1242	`(+? SEXP ...)'
1243	like `one-or-more', but always produces a non-greedy regexp.
1244
1245	`(zero-or-one SEXP ...)'
1246	`(optional SEXP ...)'
1247	`(opt SEXP ...)'
1248	matches zero or one occurrences of A.
1249
1250	`(? SEXP ...)'
1251	like `zero-or-one', but always produces a greedy regexp.
1252
1253	`(?? SEXP ...)'
1254	like `zero-or-one', but always produces a non-greedy regexp.
1255
1256	`(repeat N SEXP)'
1257	`(= N SEXP ...)'
1258	matches N occurrences.
1259
1260	`(>= N SEXP ...)'
1261	matches N or more occurrences.
1262
1263	`(repeat N M SEXP)'
1264	`(** N M SEXP ...)'
1265	matches N to M occurrences.
1266
1267	`(backref N)'
1268	matches what was matched previously by submatch N.
1269
1270	`(literal STRING-EXPR)'
1271	matches STRING-EXPR literally, where STRING-EXPR is any lisp
1272	expression that evaluates to a string.
1273
1274	`(regexp REGEXP-EXPR)'
1275	include REGEXP-EXPR in string notation in the result, where
1276	REGEXP-EXPR is any lisp expression that evaluates to a
1277	string containing a valid regexp.
1278
1279	`(eval FORM)'
1280	evaluate FORM and insert result. If result is a string,
1281	`regexp-quote' it. Note that FORM is evaluated during
1282	macroexpansion."
1283	(let* ((rx--compile-to-lisp t)	1058	(let* ((rx--compile-to-lisp t)
1284	(re (cond ((null regexps)	1059	(re (cond ((null regexps)
1285	(error "No regexp"))	1060	(error "No regexp"))