diff options
| author | Mattias EngdegÄrd | 2019-07-06 13:22:15 +0200 |
|---|---|---|
| committer | Mattias EngdegÄrd | 2019-07-07 11:49:22 +0200 |
| commit | 72e21777d0c3940465351fb86d9b7dbce20ace63 (patch) | |
| tree | ab66c2f8b6047160a89ad4d30ef8cdc427d7c8c7 | |
| parent | ac1ad3e49abd57a3e39b817864ea379354119d08 (diff) | |
| download | emacs-72e21777d0c3940465351fb86d9b7dbce20ace63.tar.gz emacs-72e21777d0c3940465351fb86d9b7dbce20ace63.zip | |
Shorter `rx' doc string (bug#36496)
* lisp/emacs-lisp/rx.el (rx): Replace long description with a condensed
summary of the rx syntax, with reference to the manual section.
| -rw-r--r-- | lisp/emacs-lisp/rx.el | 417 |
1 files changed, 96 insertions, 321 deletions
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el index 24dd6cbf1d6..249529e54e3 100644 --- a/lisp/emacs-lisp/rx.el +++ b/lisp/emacs-lisp/rx.el | |||
| @@ -959,327 +959,102 @@ becomes just a more verbose version of STRING." | |||
| 959 | ;;;###autoload | 959 | ;;;###autoload |
| 960 | (defmacro rx (&rest regexps) | 960 | (defmacro rx (&rest regexps) |
| 961 | "Translate regular expressions REGEXPS in sexp form to a regexp string. | 961 | "Translate regular expressions REGEXPS in sexp form to a regexp string. |
| 962 | REGEXPS is a non-empty sequence of forms of the sort listed below. | 962 | Each argument is one of the forms below; RX is a subform, and RX... stands |
| 963 | 963 | for one or more RXs. For details, see Info node `(elisp) Rx Notation'. | |
| 964 | Note that `rx' is a Lisp macro; when used in a Lisp program being | 964 | See `rx-to-string' for the corresponding function. |
| 965 | compiled, the translation is performed by the compiler. The | 965 | |
| 966 | `literal' and `regexp' forms accept subforms that will evaluate | 966 | STRING Match a literal string. |
| 967 | to strings, in addition to constant strings. If REGEXPS include | 967 | CHAR Match a literal character. |
| 968 | such forms, then the result is an expression which returns a | 968 | |
| 969 | regexp string, rather than a regexp string directly. See | 969 | (seq RX...) Match the RXs in sequence. Alias: :, sequence, and. |
| 970 | `rx-to-string' for performing translation completely at run time. | 970 | (or RX...) Match one of the RXs. Alias: |. |
| 971 | 971 | ||
| 972 | The following are valid subforms of regular expressions in sexp | 972 | (zero-or-more RX...) Match RXs zero or more times. Alias: 0+. |
| 973 | notation. | 973 | (one-or-more RX...) Match RXs one or more times. Alias: 1+. |
| 974 | 974 | (zero-or-one RX...) Match RXs or the empty string. Alias: opt, optional. | |
| 975 | STRING | 975 | (* RX...) Match RXs zero or more times; greedy. |
| 976 | matches string STRING literally. | 976 | (+ RX...) Match RXs one or more times; greedy. |
| 977 | 977 | (? RX...) Match RXs or the empty string; greedy. | |
| 978 | CHAR | 978 | (*? RX...) Match RXs zero or more times; non-greedy. |
| 979 | matches character CHAR literally. | 979 | (+? RX...) Match RXs one or more times; non-greedy. |
| 980 | 980 | (?? RX...) Match RXs or the empty string; non-greedy. | |
| 981 | `not-newline', `nonl' | 981 | (= N RX...) Match RXs exactly N times. |
| 982 | matches any character except a newline. | 982 | (>= N RX...) Match RXs N or more times. |
| 983 | 983 | (** N M RX...) Match RXs N to M times. Alias: repeat. | |
| 984 | `anything' | 984 | (minimal-match RX) Match RX, with zero-or-more, one-or-more, zero-or-one |
| 985 | matches any character | 985 | and aliases using non-greedy matching. |
| 986 | 986 | (maximal-match RX) Match RX, with zero-or-more, one-or-more, zero-or-one | |
| 987 | `(any SET ...)' | 987 | and aliases using greedy matching, which is the default. |
| 988 | `(in SET ...)' | 988 | |
| 989 | `(char SET ...)' | 989 | (any SET...) Match a character from one of the SETs. Each SET is a |
| 990 | matches any character in SET .... SET may be a character or string. | 990 | character, a string, a range as string \"A-Z\" or cons |
| 991 | Ranges of characters can be specified as `A-Z' in strings. | 991 | (?A . ?Z), or a character class (see below). Alias: in, char. |
| 992 | Ranges may also be specified as conses like `(?A . ?Z)'. | 992 | (not CHARSPEC) Match one character not matched by CHARSPEC. CHARSPEC |
| 993 | Reversed ranges like `Z-A' and `(?Z . ?A)' are not permitted. | 993 | can be (any ...), (syntax ...), (category ...), |
| 994 | 994 | or a character class. | |
| 995 | SET may also be the name of a character class: `digit', | 995 | not-newline Match any character except a newline. Alias: nonl. |
| 996 | `control', `hex-digit', `blank', `graph', `print', `alnum', | 996 | anything Match any character. |
| 997 | `alpha', `ascii', `nonascii', `lower', `punct', `space', `upper', | 997 | |
| 998 | `word', or one of their synonyms. | 998 | CHARCLASS Match a character from a character class. One of: |
| 999 | 999 | alpha, alphabetic, letter Alphabetic characters (defined by Unicode). | |
| 1000 | `(not (any SET ...))' | 1000 | alnum, alphanumeric Alphabetic or decimal digit chars (Unicode). |
| 1001 | matches any character not in SET ... | 1001 | digit numeric, num 0-9. |
| 1002 | 1002 | xdigit, hex-digit, hex 0-9, A-F, a-f. | |
| 1003 | `line-start', `bol' | 1003 | cntrl, control ASCII codes 0-31. |
| 1004 | matches the empty string, but only at the beginning of a line | 1004 | blank Horizontal whitespace (Unicode). |
| 1005 | in the text being matched | 1005 | space, whitespace, white Chars with whitespace syntax. |
| 1006 | 1006 | lower, lower-case Lower-case chars, from current case table. | |
| 1007 | `line-end', `eol' | 1007 | upper, upper-case Upper-case chars, from current case table. |
| 1008 | is similar to `line-start' but matches only at the end of a line | 1008 | graph, graphic Graphic characters (Unicode). |
| 1009 | 1009 | print, printing Whitespace or graphic (Unicode). | |
| 1010 | `string-start', `bos', `bot' | 1010 | punct, punctuation Not control, space, letter or digit (ASCII); |
| 1011 | matches the empty string, but only at the beginning of the | 1011 | not word syntax (non-ASCII). |
| 1012 | string being matched against. | 1012 | word, wordchar Characters with word syntax. |
| 1013 | 1013 | ascii ASCII characters (codes 0-127). | |
| 1014 | `string-end', `eos', `eot' | 1014 | nonascii Non-ASCII characters (but not raw bytes). |
| 1015 | matches the empty string, but only at the end of the | 1015 | |
| 1016 | string being matched against. | 1016 | (syntax SYNTAX) Match a character with syntax SYNTAX, being one of: |
| 1017 | 1017 | whitespace, punctuation, word, symbol, open-parenthesis, | |
| 1018 | `buffer-start' | 1018 | close-parenthesis, expression-prefix, string-quote, |
| 1019 | matches the empty string, but only at the beginning of the | 1019 | paired-delimiter, escape, character-quote, comment-start, |
| 1020 | buffer being matched against. Actually equivalent to `string-start'. | 1020 | comment-end, string-delimiter, comment-delimiter |
| 1021 | 1021 | ||
| 1022 | `buffer-end' | 1022 | (category CAT) Match a character in category CAT, being one of: |
| 1023 | matches the empty string, but only at the end of the | 1023 | space-for-indent, base, consonant, base-vowel, |
| 1024 | buffer being matched against. Actually equivalent to `string-end'. | 1024 | upper-diacritical-mark, lower-diacritical-mark, tone-mark, symbol, |
| 1025 | 1025 | digit, vowel-modifying-diacritical-mark, vowel-sign, | |
| 1026 | `point' | 1026 | semivowel-lower, not-at-end-of-line, not-at-beginning-of-line, |
| 1027 | matches the empty string, but only at point. | 1027 | alpha-numeric-two-byte, chinese-two-byte, greek-two-byte, |
| 1028 | 1028 | japanese-hiragana-two-byte, indian-two-byte, | |
| 1029 | `word-start', `bow' | 1029 | japanese-katakana-two-byte, strong-left-to-right, |
| 1030 | matches the empty string, but only at the beginning of a word. | 1030 | korean-hangul-two-byte, strong-right-to-left, cyrillic-two-byte, |
| 1031 | 1031 | combining-diacritic, ascii, arabic, chinese, ethiopic, greek, | |
| 1032 | `word-end', `eow' | 1032 | korean, indian, japanese, japanese-katakana, latin, lao, |
| 1033 | matches the empty string, but only at the end of a word. | 1033 | tibetan, japanese-roman, thai, vietnamese, hebrew, cyrillic, |
| 1034 | 1034 | can-break | |
| 1035 | `word-boundary' | 1035 | |
| 1036 | matches the empty string, but only at the beginning or end of a | 1036 | Zero-width assertions: these all match the empty string in specific places. |
| 1037 | word. | 1037 | line-start At the beginning of a line. Alias: bol. |
| 1038 | 1038 | line-end At the end of a line. Alias: eol. | |
| 1039 | `(not word-boundary)' | 1039 | string-start At the start of the string or buffer. |
| 1040 | `not-word-boundary' | 1040 | Alias: buffer-start, bos, bot. |
| 1041 | matches the empty string, but not at the beginning or end of a | 1041 | string-end At the end of the string or buffer. |
| 1042 | word. | 1042 | Alias: buffer-end, eos, eot. |
| 1043 | 1043 | point At point. | |
| 1044 | `symbol-start' | 1044 | word-start At the beginning of a word. |
| 1045 | matches the empty string, but only at the beginning of a symbol. | 1045 | word-end At the end of a word. |
| 1046 | 1046 | word-boundary At the beginning or end of a word. | |
| 1047 | `symbol-end' | 1047 | not-word-boundary Not at the beginning or end of a word. |
| 1048 | matches the empty string, but only at the end of a symbol. | 1048 | symbol-start At the beginning of a symbol. |
| 1049 | 1049 | symbol-end At the end of a symbol. | |
| 1050 | `digit', `numeric', `num' | 1050 | |
| 1051 | matches 0 through 9. | 1051 | (group RX...) Match RXs and define a capture group. Alias: submatch. |
| 1052 | 1052 | (group-n N RX...) Match RXs and define capture group N. Alias: submatch-n. | |
| 1053 | `control', `cntrl' | 1053 | (backref N) Match the text that capture group N matched. |
| 1054 | matches any character whose code is in the range 0-31. | 1054 | |
| 1055 | 1055 | (literal EXPR) Match the literal string from evaluating EXPR at run time. | |
| 1056 | `hex-digit', `hex', `xdigit' | 1056 | (regexp EXPR) Match the string regexp from evaluating EXPR at run time. |
| 1057 | matches 0 through 9, a through f and A through F. | 1057 | (eval EXPR) Match the rx sexp from evaluating EXPR at compile time." |
| 1058 | |||
| 1059 | `blank' | ||
| 1060 | matches horizontal whitespace, as defined by Annex C of the | ||
| 1061 | Unicode Technical Standard #18. In particular, it matches | ||
| 1062 | spaces, tabs, and other characters whose Unicode | ||
| 1063 | `general-category' property indicates they are spacing | ||
| 1064 | separators. | ||
| 1065 | |||
| 1066 | `graphic', `graph' | ||
| 1067 | matches graphic characters--everything except whitespace, ASCII | ||
| 1068 | and non-ASCII control characters, surrogates, and codepoints | ||
| 1069 | unassigned by Unicode. | ||
| 1070 | |||
| 1071 | `printing', `print' | ||
| 1072 | matches whitespace and graphic characters. | ||
| 1073 | |||
| 1074 | `alphanumeric', `alnum' | ||
| 1075 | matches alphabetic characters and digits. For multibyte characters, | ||
| 1076 | it matches characters whose Unicode `general-category' property | ||
| 1077 | indicates they are alphabetic or decimal number characters. | ||
| 1078 | |||
| 1079 | `letter', `alphabetic', `alpha' | ||
| 1080 | matches alphabetic characters. For multibyte characters, | ||
| 1081 | it matches characters whose Unicode `general-category' property | ||
| 1082 | indicates they are alphabetic characters. | ||
| 1083 | |||
| 1084 | `ascii' | ||
| 1085 | matches ASCII (unibyte) characters. | ||
| 1086 | |||
| 1087 | `nonascii' | ||
| 1088 | matches non-ASCII (multibyte) characters. | ||
| 1089 | |||
| 1090 | `lower', `lower-case' | ||
| 1091 | matches anything lower-case, as determined by the current case | ||
| 1092 | table. If `case-fold-search' is non-nil, this also matches any | ||
| 1093 | upper-case letter. | ||
| 1094 | |||
| 1095 | `upper', `upper-case' | ||
| 1096 | matches anything upper-case, as determined by the current case | ||
| 1097 | table. If `case-fold-search' is non-nil, this also matches any | ||
| 1098 | lower-case letter. | ||
| 1099 | |||
| 1100 | `punctuation', `punct' | ||
| 1101 | matches punctuation. (But at present, for multibyte characters, | ||
| 1102 | it matches anything that has non-word syntax.) | ||
| 1103 | |||
| 1104 | `space', `whitespace', `white' | ||
| 1105 | matches anything that has whitespace syntax. | ||
| 1106 | |||
| 1107 | `word', `wordchar' | ||
| 1108 | matches anything that has word syntax. | ||
| 1109 | |||
| 1110 | `not-wordchar' | ||
| 1111 | matches anything that has non-word syntax. | ||
| 1112 | |||
| 1113 | `(syntax SYNTAX)' | ||
| 1114 | matches a character with syntax SYNTAX. SYNTAX must be one | ||
| 1115 | of the following symbols, or a symbol corresponding to the syntax | ||
| 1116 | character, e.g. `\\.' for `\\s.'. | ||
| 1117 | |||
| 1118 | `whitespace' (\\s- in string notation) | ||
| 1119 | `punctuation' (\\s.) | ||
| 1120 | `word' (\\sw) | ||
| 1121 | `symbol' (\\s_) | ||
| 1122 | `open-parenthesis' (\\s() | ||
| 1123 | `close-parenthesis' (\\s)) | ||
| 1124 | `expression-prefix' (\\s') | ||
| 1125 | `string-quote' (\\s\") | ||
| 1126 | `paired-delimiter' (\\s$) | ||
| 1127 | `escape' (\\s\\) | ||
| 1128 | `character-quote' (\\s/) | ||
| 1129 | `comment-start' (\\s<) | ||
| 1130 | `comment-end' (\\s>) | ||
| 1131 | `string-delimiter' (\\s|) | ||
| 1132 | `comment-delimiter' (\\s!) | ||
| 1133 | |||
| 1134 | `(not (syntax SYNTAX))' | ||
| 1135 | matches a character that doesn't have syntax SYNTAX. | ||
| 1136 | |||
| 1137 | `(category CATEGORY)' | ||
| 1138 | matches a character with category CATEGORY. CATEGORY must be | ||
| 1139 | either a character to use for C, or one of the following symbols. | ||
| 1140 | |||
| 1141 | `space-for-indent' (\\c\\s in string notation) | ||
| 1142 | `base' (\\c.) | ||
| 1143 | `consonant' (\\c0) | ||
| 1144 | `base-vowel' (\\c1) | ||
| 1145 | `upper-diacritical-mark' (\\c2) | ||
| 1146 | `lower-diacritical-mark' (\\c3) | ||
| 1147 | `tone-mark' (\\c4) | ||
| 1148 | `symbol' (\\c5) | ||
| 1149 | `digit' (\\c6) | ||
| 1150 | `vowel-modifying-diacritical-mark' (\\c7) | ||
| 1151 | `vowel-sign' (\\c8) | ||
| 1152 | `semivowel-lower' (\\c9) | ||
| 1153 | `not-at-end-of-line' (\\c<) | ||
| 1154 | `not-at-beginning-of-line' (\\c>) | ||
| 1155 | `alpha-numeric-two-byte' (\\cA) | ||
| 1156 | `chinese-two-byte' (\\cC) | ||
| 1157 | `greek-two-byte' (\\cG) | ||
| 1158 | `japanese-hiragana-two-byte' (\\cH) | ||
| 1159 | `indian-two-byte' (\\cI) | ||
| 1160 | `japanese-katakana-two-byte' (\\cK) | ||
| 1161 | `strong-left-to-right' (\\cL) | ||
| 1162 | `korean-hangul-two-byte' (\\cN) | ||
| 1163 | `strong-right-to-left' (\\cR) | ||
| 1164 | `cyrillic-two-byte' (\\cY) | ||
| 1165 | `combining-diacritic' (\\c^) | ||
| 1166 | `ascii' (\\ca) | ||
| 1167 | `arabic' (\\cb) | ||
| 1168 | `chinese' (\\cc) | ||
| 1169 | `ethiopic' (\\ce) | ||
| 1170 | `greek' (\\cg) | ||
| 1171 | `korean' (\\ch) | ||
| 1172 | `indian' (\\ci) | ||
| 1173 | `japanese' (\\cj) | ||
| 1174 | `japanese-katakana' (\\ck) | ||
| 1175 | `latin' (\\cl) | ||
| 1176 | `lao' (\\co) | ||
| 1177 | `tibetan' (\\cq) | ||
| 1178 | `japanese-roman' (\\cr) | ||
| 1179 | `thai' (\\ct) | ||
| 1180 | `vietnamese' (\\cv) | ||
| 1181 | `hebrew' (\\cw) | ||
| 1182 | `cyrillic' (\\cy) | ||
| 1183 | `can-break' (\\c|) | ||
| 1184 | |||
| 1185 | `(not (category CATEGORY))' | ||
| 1186 | matches a character that doesn't have category CATEGORY. | ||
| 1187 | |||
| 1188 | `(and SEXP1 SEXP2 ...)' | ||
| 1189 | `(: SEXP1 SEXP2 ...)' | ||
| 1190 | `(seq SEXP1 SEXP2 ...)' | ||
| 1191 | `(sequence SEXP1 SEXP2 ...)' | ||
| 1192 | matches what SEXP1 matches, followed by what SEXP2 matches, etc. | ||
| 1193 | Without arguments, matches the empty string. | ||
| 1194 | |||
| 1195 | `(submatch SEXP1 SEXP2 ...)' | ||
| 1196 | `(group SEXP1 SEXP2 ...)' | ||
| 1197 | like `and', but makes the match accessible with `match-end', | ||
| 1198 | `match-beginning', and `match-string'. | ||
| 1199 | |||
| 1200 | `(submatch-n N SEXP1 SEXP2 ...)' | ||
| 1201 | `(group-n N SEXP1 SEXP2 ...)' | ||
| 1202 | like `group', but make it an explicitly-numbered group with | ||
| 1203 | group number N. | ||
| 1204 | |||
| 1205 | `(or SEXP1 SEXP2 ...)' | ||
| 1206 | `(| SEXP1 SEXP2 ...)' | ||
| 1207 | matches anything that matches SEXP1 or SEXP2, etc. If all | ||
| 1208 | args are strings, use `regexp-opt' to optimize the resulting | ||
| 1209 | regular expression. Without arguments, never matches anything. | ||
| 1210 | |||
| 1211 | `(minimal-match SEXP)' | ||
| 1212 | produce a non-greedy regexp for SEXP. Normally, regexps matching | ||
| 1213 | zero or more occurrences of something are \"greedy\" in that they | ||
| 1214 | match as much as they can, as long as the overall regexp can | ||
| 1215 | still match. A non-greedy regexp matches as little as possible. | ||
| 1216 | |||
| 1217 | `(maximal-match SEXP)' | ||
| 1218 | produce a greedy regexp for SEXP. This is the default. | ||
| 1219 | |||
| 1220 | Below, `SEXP ...' represents a sequence of regexp forms, treated as if | ||
| 1221 | enclosed in `(and ...)'. | ||
| 1222 | |||
| 1223 | `(zero-or-more SEXP ...)' | ||
| 1224 | `(0+ SEXP ...)' | ||
| 1225 | matches zero or more occurrences of what SEXP ... matches. | ||
| 1226 | |||
| 1227 | `(* SEXP ...)' | ||
| 1228 | like `zero-or-more', but always produces a greedy regexp, independent | ||
| 1229 | of `rx-greedy-flag'. | ||
| 1230 | |||
| 1231 | `(*? SEXP ...)' | ||
| 1232 | like `zero-or-more', but always produces a non-greedy regexp, | ||
| 1233 | independent of `rx-greedy-flag'. | ||
| 1234 | |||
| 1235 | `(one-or-more SEXP ...)' | ||
| 1236 | `(1+ SEXP ...)' | ||
| 1237 | matches one or more occurrences of SEXP ... | ||
| 1238 | |||
| 1239 | `(+ SEXP ...)' | ||
| 1240 | like `one-or-more', but always produces a greedy regexp. | ||
| 1241 | |||
| 1242 | `(+? SEXP ...)' | ||
| 1243 | like `one-or-more', but always produces a non-greedy regexp. | ||
| 1244 | |||
| 1245 | `(zero-or-one SEXP ...)' | ||
| 1246 | `(optional SEXP ...)' | ||
| 1247 | `(opt SEXP ...)' | ||
| 1248 | matches zero or one occurrences of A. | ||
| 1249 | |||
| 1250 | `(? SEXP ...)' | ||
| 1251 | like `zero-or-one', but always produces a greedy regexp. | ||
| 1252 | |||
| 1253 | `(?? SEXP ...)' | ||
| 1254 | like `zero-or-one', but always produces a non-greedy regexp. | ||
| 1255 | |||
| 1256 | `(repeat N SEXP)' | ||
| 1257 | `(= N SEXP ...)' | ||
| 1258 | matches N occurrences. | ||
| 1259 | |||
| 1260 | `(>= N SEXP ...)' | ||
| 1261 | matches N or more occurrences. | ||
| 1262 | |||
| 1263 | `(repeat N M SEXP)' | ||
| 1264 | `(** N M SEXP ...)' | ||
| 1265 | matches N to M occurrences. | ||
| 1266 | |||
| 1267 | `(backref N)' | ||
| 1268 | matches what was matched previously by submatch N. | ||
| 1269 | |||
| 1270 | `(literal STRING-EXPR)' | ||
| 1271 | matches STRING-EXPR literally, where STRING-EXPR is any lisp | ||
| 1272 | expression that evaluates to a string. | ||
| 1273 | |||
| 1274 | `(regexp REGEXP-EXPR)' | ||
| 1275 | include REGEXP-EXPR in string notation in the result, where | ||
| 1276 | REGEXP-EXPR is any lisp expression that evaluates to a | ||
| 1277 | string containing a valid regexp. | ||
| 1278 | |||
| 1279 | `(eval FORM)' | ||
| 1280 | evaluate FORM and insert result. If result is a string, | ||
| 1281 | `regexp-quote' it. Note that FORM is evaluated during | ||
| 1282 | macroexpansion." | ||
| 1283 | (let* ((rx--compile-to-lisp t) | 1058 | (let* ((rx--compile-to-lisp t) |
| 1284 | (re (cond ((null regexps) | 1059 | (re (cond ((null regexps) |
| 1285 | (error "No regexp")) | 1060 | (error "No regexp")) |