aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStefan Monnier2004-04-23 21:23:29 +0000
committerStefan Monnier2004-04-23 21:23:29 +0000
commitccfbe679888d8c3431b9946ff2e42d4e4c1c0816 (patch)
treea4b613537ad83dac6ec7ee397b913aaf243ebd8b
parent4b284383bb198e1c5ea311431bdc46947b4a46ef (diff)
downloademacs-ccfbe679888d8c3431b9946ff2e42d4e4c1c0816.tar.gz
emacs-ccfbe679888d8c3431b9946ff2e42d4e4c1c0816.zip
Doc fixes.
(rx-constituents): Add/extend many forms. (rx-check): Check form is a list. (bracket): Defvar. (rx-check-any, rx-any, rx-check-not): Modify. (rx-not): Simplify. (rx-trans-forms, rx-=, rx->=, rx-**, rx-not-char, rx-not-syntax): New. (rx-kleene): Use rx-trans-forms. (rx-quote-for-set): Delete. (rx): Allow multiple args.
-rw-r--r--lisp/emacs-lisp/rx.el427
1 files changed, 295 insertions, 132 deletions
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el
index 6656cf5ed3c..042d711ee3d 100644
--- a/lisp/emacs-lisp/rx.el
+++ b/lisp/emacs-lisp/rx.el
@@ -32,6 +32,22 @@
32;; from the bugs mentioned in the commentary section of Sregex, and 32;; from the bugs mentioned in the commentary section of Sregex, and
33;; uses a nicer syntax (IMHO, of course :-). 33;; uses a nicer syntax (IMHO, of course :-).
34 34
35;; This significantly extended version of the original, is almost
36;; compatible with Sregex. The only incompatibility I (fx) know of is
37;; that the `repeat' form can't have multiple regexp args.
38
39;; Now alternative forms are provided for a degree of compatibility
40;; with Shivers' attempted definitive SRE notation
41;; <URL:http://www.ai.mit.edu/~/shivers/sre.txt>. SRE forms not
42;; catered for include: dsm, uncase, w/case, w/nocase, ,@<exp>,
43;; ,<exp>, (word ...), word+, posix-string, and character class forms.
44;; Some forms are inconsistent with SRE, either for historical reasons
45;; or because of the implementation -- simple translation into Emacs
46;; regexp strings. These include: any, word. Also, case-sensitivity
47;; and greediness are controlled by variables external to the regexp,
48;; and you need to feed the forms to the `posix-' functions to get
49;; SRE's POSIX semantics. There are probably more difficulties.
50
35;; Rx translates a sexp notation for regular expressions into the 51;; Rx translates a sexp notation for regular expressions into the
36;; usual string notation. The translation can be done at compile-time 52;; usual string notation. The translation can be done at compile-time
37;; by using the `rx' macro. It can be done at run-time by calling 53;; by using the `rx' macro. It can be done at run-time by calling
@@ -94,62 +110,103 @@
94 110
95;;; Code: 111;;; Code:
96 112
97
98(defconst rx-constituents 113(defconst rx-constituents
99 '((and . (rx-and 1 nil)) 114 '((and . (rx-and 1 nil))
115 (seq . and) ; SRE
116 (: . and) ; SRE
117 (sequence . and) ; sregex
100 (or . (rx-or 1 nil)) 118 (or . (rx-or 1 nil))
119 (| . or) ; SRE
101 (not-newline . ".") 120 (not-newline . ".")
121 (nonl . not-newline) ; SRE
102 (anything . ".\\|\n") 122 (anything . ".\\|\n")
103 (any . (rx-any 1 1 rx-check-any)) 123 (any . (rx-any 1 nil rx-check-any)) ; inconsistent with SRE
104 (in . any) 124 (in . any)
125 (char . any) ; sregex
126 (not-char . (rx-not-char 1 nil rx-check-any)) ; sregex
105 (not . (rx-not 1 1 rx-check-not)) 127 (not . (rx-not 1 1 rx-check-not))
128 ;; Partially consistent with sregex, whose `repeat' is like our
129 ;; `**'. (`repeat' with optional max arg and multiple sexp forms
130 ;; is ambiguous.)
106 (repeat . (rx-repeat 2 3)) 131 (repeat . (rx-repeat 2 3))
107 (submatch . (rx-submatch 1 nil)) 132 (= . (rx-= 2 nil)) ; SRE
133 (>= . (rx->= 2 nil)) ; SRE
134 (** . (rx-** 2 nil)) ; SRE
135 (submatch . (rx-submatch 1 nil)) ; SRE
108 (group . submatch) 136 (group . submatch)
109 (zero-or-more . (rx-kleene 1 1)) 137 (zero-or-more . (rx-kleene 1 nil))
110 (one-or-more . (rx-kleene 1 1)) 138 (one-or-more . (rx-kleene 1 nil))
111 (zero-or-one . (rx-kleene 1 1)) 139 (zero-or-one . (rx-kleene 1 nil))
112 (\? . zero-or-one) 140 (\? . zero-or-one) ; SRE
113 (\?? . zero-or-one) 141 (\?? . zero-or-one)
114 (* . zero-or-more) 142 (* . zero-or-more) ; SRE
115 (*? . zero-or-more) 143 (*? . zero-or-more)
116 (0+ . zero-or-more) 144 (0+ . zero-or-more)
117 (+ . one-or-more) 145 (+ . one-or-more) ; SRE
118 (+? . one-or-more) 146 (+? . one-or-more)
119 (1+ . one-or-more) 147 (1+ . one-or-more)
120 (optional . zero-or-one) 148 (optional . zero-or-one)
149 (opt . zero-or-one) ; sregex
121 (minimal-match . (rx-greedy 1 1)) 150 (minimal-match . (rx-greedy 1 1))
122 (maximal-match . (rx-greedy 1 1)) 151 (maximal-match . (rx-greedy 1 1))
123 (backref . (rx-backref 1 1 rx-check-backref)) 152 (backref . (rx-backref 1 1 rx-check-backref))
124 (line-start . "^") 153 (line-start . "^")
154 (bol . line-start) ; SRE
125 (line-end . "$") 155 (line-end . "$")
156 (eol . line-end) ; SRE
126 (string-start . "\\`") 157 (string-start . "\\`")
158 (bos . string-start) ; SRE
159 (bot . string-start) ; sregex
127 (string-end . "\\'") 160 (string-end . "\\'")
161 (eos . string-end) ; SRE
162 (eot . string-end) ; sregex
128 (buffer-start . "\\`") 163 (buffer-start . "\\`")
129 (buffer-end . "\\'") 164 (buffer-end . "\\'")
130 (point . "\\=") 165 (point . "\\=")
131 (word-start . "\\<") 166 (word-start . "\\<")
167 (bow . word-start) ; SRE
132 (word-end . "\\>") 168 (word-end . "\\>")
169 (eow . word-end) ; SRE
133 (word-boundary . "\\b") 170 (word-boundary . "\\b")
171 (not-word-boundary . "\\B") ; sregex
134 (syntax . (rx-syntax 1 1)) 172 (syntax . (rx-syntax 1 1))
173 (not-syntax . (rx-not-syntax 1 1)) ; sregex
135 (category . (rx-category 1 1 rx-check-category)) 174 (category . (rx-category 1 1 rx-check-category))
136 (eval . (rx-eval 1 1)) 175 (eval . (rx-eval 1 1))
137 (regexp . (rx-regexp 1 1 stringp)) 176 (regexp . (rx-regexp 1 1 stringp))
138 (digit . "[[:digit:]]") 177 (digit . "[[:digit:]]")
139 (control . "[[:cntrl:]]") 178 (numeric . digit) ; SRE
140 (hex-digit . "[[:xdigit:]]") 179 (num . digit) ; SRE
141 (blank . "[[:blank:]]") 180 (control . "[[:cntrl:]]") ; SRE
142 (graphic . "[[:graph:]]") 181 (cntrl . control) ; SRE
143 (printing . "[[:print:]]") 182 (hex-digit . "[[:xdigit:]]") ; SRE
144 (alphanumeric . "[[:alnum:]]") 183 (hex . hex-digit) ; SRE
184 (xdigit . hex-digit) ; SRE
185 (blank . "[[:blank:]]") ; SRE
186 (graphic . "[[:graph:]]") ; SRE
187 (graph . graphic) ; SRE
188 (printing . "[[:print:]]") ; SRE
189 (print . printing) ; SRE
190 (alphanumeric . "[[:alnum:]]") ; SRE
191 (alnum . alphanumeric) ; SRE
145 (letter . "[[:alpha:]]") 192 (letter . "[[:alpha:]]")
146 (ascii . "[[:ascii:]]") 193 (alphabetic . letter) ; SRE
194 (alpha . letter) ; SRE
195 (ascii . "[[:ascii:]]") ; SRE
147 (nonascii . "[[:nonascii:]]") 196 (nonascii . "[[:nonascii:]]")
148 (lower . "[[:lower:]]") 197 (lower . "[[:lower:]]") ; SRE
149 (punctuation . "[[:punct:]]") 198 (lower-case . lower) ; SRE
150 (space . "[[:space:]]") 199 (punctuation . "[[:punct:]]") ; SRE
151 (upper . "[[:upper:]]") 200 (punct . punctuation) ; SRE
152 (word . "[[:word:]]")) 201 (space . "[[:space:]]") ; SRE
202 (whitespace . space) ; SRE
203 (white . space) ; SRE
204 (upper . "[[:upper:]]") ; SRE
205 (upper-case . upper) ; SRE
206 (word . "[[:word:]]") ; inconsistent with SRE
207 (wordchar . word) ; sregex
208 (not-wordchar . "[^[:word:]]") ; sregex (use \\W?)
209 )
153 "Alist of sexp form regexp constituents. 210 "Alist of sexp form regexp constituents.
154Each element of the alist has the form (SYMBOL . DEFN). 211Each element of the alist has the form (SYMBOL . DEFN).
155SYMBOL is a valid constituent of sexp regular expressions. 212SYMBOL is a valid constituent of sexp regular expressions.
@@ -178,7 +235,23 @@ all arguments must satisfy PREDICATE.")
178 (comment-start . ?<) 235 (comment-start . ?<)
179 (comment-end . ?>) 236 (comment-end . ?>)
180 (string-delimiter . ?|) 237 (string-delimiter . ?|)
181 (comment-delimiter . ?!)) 238 (comment-delimiter . ?!)
239 ;; sregex compatibility
240 (- . ?-)
241 (\. . ?.)
242 (w . ?w)
243 (_ . ?_)
244 (\( . ?\()
245 (\) . ?\))
246 (\' . ?\')
247 (\" . ?\")
248 (\$ . ?$)
249 (\\ . ?\\)
250 (/ . ?/)
251 (< . ?<)
252 (> . ?>)
253 (| . ?|)
254 (! . ?!))
182 "Alist mapping Rx syntax symbols to syntax characters. 255 "Alist mapping Rx syntax symbols to syntax characters.
183Each entry has the form (SYMBOL . CHAR), where SYMBOL is a valid 256Each entry has the form (SYMBOL . CHAR), where SYMBOL is a valid
184symbol in `(syntax SYMBOL)', and CHAR is the syntax character 257symbol in `(syntax SYMBOL)', and CHAR is the syntax character
@@ -252,6 +325,8 @@ See also `rx-constituents'."
252 325
253(defun rx-check (form) 326(defun rx-check (form)
254 "Check FORM according to its car's parsing info." 327 "Check FORM according to its car's parsing info."
328 (unless (listp form)
329 (error "rx `%s' needs argument(s)" form))
255 (let* ((rx (rx-info (car form))) 330 (let* ((rx (rx-info (car form)))
256 (nargs (1- (length form))) 331 (nargs (1- (length form)))
257 (min-args (nth 1 rx)) 332 (min-args (nth 1 rx))
@@ -297,53 +372,61 @@ FORM is of the form `(and FORM1 ...)'."
297 "\\)"))) 372 "\\)")))
298 373
299 374
300(defun rx-quote-for-set (string) 375(defvar bracket) ; dynamically bound in `rx-any'
301 "Transform STRING for use in a character set.
302If STRING contains a `]', move it to the front.
303If STRING starts with a '^', move it to the end."
304 (when (string-match "\\`\\(\\(?:.\\|\n\\)+\\)\\]\\(\\(?:.\\|\n\\)\\)*\\'"
305 string)
306 (setq string (concat "]" (match-string 1 string)
307 (match-string 2 string))))
308 (when (string-match "\\`^\\(\\(?:.\\|\n\\)+\\)\\'" string)
309 (setq string (concat (substring string 1) "^")))
310 string)
311
312 376
313(defun rx-check-any (arg) 377(defun rx-check-any (arg)
314 "Check arg ARG for Rx `any'." 378 "Check arg ARG for Rx `any'."
315 (cond ((integerp arg) t) 379 (if (integerp arg)
316 ((and (stringp arg) (zerop (length arg))) 380 (setq arg (string arg)))
317 (error "String arg for rx `any' must not be empty")) 381 (when (stringp arg)
318 ((stringp arg) t) 382 (if (zerop (length arg))
319 (t 383 (error "String arg for Rx `any' must not be empty"))
320 (error "rx `any' requires string or character arg")))) 384 ;; Quote ^ at start; don't bother to check whether this is first arg.
321 385 (if (eq ?^ (aref arg 0))
386 (setq arg (concat "\\" arg)))
387 ;; Remove ] and set flag for adding it to start of overall result.
388 (when (string-match "]" arg)
389 (setq arg (replace-regexp-in-string "]" "" arg)
390 bracket "]")))
391 (when (symbolp arg)
392 (let ((translation (condition-case nil
393 (rx-to-string arg 'no-group)
394 (error nil))))
395 (unless translation (error "Invalid char class `%s' in Rx `any'" arg))
396 (setq arg (substring translation 1 -1)))) ; strip outer brackets
397 ;; sregex compatibility
398 (when (and (integerp (car-safe arg))
399 (integerp (cdr-safe arg)))
400 (setq arg (string (car arg) ?- (cdr arg))))
401 (unless (stringp arg)
402 (error "rx `any' requires string, character, char pair or char class args"))
403 arg)
322 404
323(defun rx-any (form) 405(defun rx-any (form)
324 "Parse and produce code from FORM, which is `(any STRING)'. 406 "Parse and produce code from FORM, which is `(any ARG ...)'.
325STRING is optional. If it is omitted, build a regexp that 407ARG is optional."
326matches anything."
327 (rx-check form) 408 (rx-check form)
328 (let ((arg (cadr form))) 409 (let* (bracket
329 (cond ((integerp arg) 410 (args (mapcar #'rx-check-any (cdr form)))) ; side-effects `bracket'
330 (char-to-string arg)) 411 ;; If there was a ?- in the form, move it to the front to avoid
331 ((= (length arg) 1) 412 ;; accidental range.
332 arg) 413 (if (member "-" args)
333 (t 414 (setq args (cons "-" (delete "-" args))))
334 (concat "[" (rx-quote-for-set (cadr form)) "]"))))) 415 (apply #'concat "[" bracket (append args '("]")))))
335 416
336 417
337(defun rx-check-not (arg) 418(defun rx-check-not (arg)
338 "Check arg ARG for Rx `not'." 419 "Check arg ARG for Rx `not'."
339 (unless (or (memq form 420 (unless (or (and (symbolp arg)
340 '(digit control hex-digit blank graphic printing 421 (string-match "\\`\\[\\[:[-a-z]:]]\\'"
341 alphanumeric letter ascii nonascii lower 422 (condition-case nil
342 punctuation space upper word)) 423 (rx-to-string arg 'no-group)
343 (and (consp form) 424 (error ""))))
344 (memq (car form) '(not any in syntax category:)))) 425 (eq arg 'word-boundary)
345 (error "rx `not' syntax error: %s" form)) 426 (and (consp arg)
346 t) 427 (memq (car arg) '(not any in syntax category))))
428 (error "rx `not' syntax error: %s" arg))
429 t)
347 430
348 431
349(defun rx-not (form) 432(defun rx-not (form)
@@ -355,24 +438,67 @@ matches anything."
355 (if (= (length result) 4) 438 (if (= (length result) 4)
356 (substring result 2 3) 439 (substring result 2 3)
357 (concat "[" (substring result 2)))) 440 (concat "[" (substring result 2))))
358 ((string-match "\\`\\[" result) 441 ((eq ?\[ (aref result 0))
359 (concat "[^" (substring result 1))) 442 (concat "[^" (substring result 1)))
360 ((string-match "\\`\\\\s." result) 443 ((string-match "\\`\\\\[scb]" result)
361 (concat "\\S" (substring result 2))) 444 (concat (capitalize (substring result 0 2)) (substring result 2)))
362 ((string-match "\\`\\\\S." result)
363 (concat "\\s" (substring result 2)))
364 ((string-match "\\`\\\\c." result)
365 (concat "\\C" (substring result 2)))
366 ((string-match "\\`\\\\C." result)
367 (concat "\\c" (substring result 2)))
368 ((string-match "\\`\\\\B" result)
369 (concat "\\b" (substring result 2)))
370 ((string-match "\\`\\\\b" result)
371 (concat "\\B" (substring result 2)))
372 (t 445 (t
373 (concat "[^" result "]"))))) 446 (concat "[^" result "]")))))
374 447
375 448
449(defun rx-not-char (form)
450 "Parse and produce code from FORM. FORM is `(not-char ...)'."
451 (rx-check form)
452 (rx-not `(not (in ,@(cdr form)))))
453
454
455(defun rx-not-syntax (form)
456 "Parse and produce code from FORM. FORM is `(not-syntax SYNTAX)'."
457 (rx-check form)
458 (rx-not `(not (syntax ,@(cdr form)))))
459
460
461(defun rx-trans-forms (form &optional skip)
462 "If FORM's length is greater than two, transform it to length two.
463A form (HEAD REST ...) becomes (HEAD (and REST ...)).
464If SKIP is non-nil, allow that number of items after the head, i.e.
465`(= N REST ...)' becomes `(= N (and REST ...))' if SKIP is 1."
466 (unless skip (setq skip 0))
467 (let ((tail (nthcdr (1+ skip) form)))
468 (if (= (length tail) 1)
469 form
470 (let ((form (copy-sequence form)))
471 (setcdr (nthcdr skip form) (list (cons 'and tail)))
472 form))))
473
474
475(defun rx-= (form)
476 "Parse and produce code from FORM `(= N ...)'."
477 (rx-check form)
478 (setq form (rx-trans-forms form 1))
479 (unless (and (integerp (nth 1 form))
480 (> (nth 1 form) 0))
481 (error "rx `=' requires positive integer first arg"))
482 (format "%s\\{%d\\}" (rx-to-string (nth 2 form)) (nth 1 form)))
483
484
485(defun rx->= (form)
486 "Parse and produce code from FORM `(>= N ...)'."
487 (rx-check form)
488 (setq form (rx-trans-forms form 1))
489 (unless (and (integerp (nth 1 form))
490 (> (nth 1 form) 0))
491 (error "rx `>=' requires positive integer first arg"))
492 (format "%s\\{%d,\\}" (rx-to-string (nth 2 form)) (nth 1 form)))
493
494
495(defun rx-** (form)
496 "Parse and produce code from FORM `(** N M ...)'."
497 (rx-check form)
498 (setq form (cons 'repeat (cdr (rx-trans-forms form 2))))
499 (rx-to-string form))
500
501
376(defun rx-repeat (form) 502(defun rx-repeat (form)
377 "Parse and produce code from FORM. 503 "Parse and produce code from FORM.
378FORM is either `(repeat N FORM1)' or `(repeat N M FORM1)'." 504FORM is either `(repeat N FORM1)' or `(repeat N M FORM1)'."
@@ -419,6 +545,7 @@ If OP is one of `*?', `+?', `??', produce a non-greedy regexp.
419If OP is anything else, produce a greedy regexp if `rx-greedy-flag' 545If OP is anything else, produce a greedy regexp if `rx-greedy-flag'
420is non-nil." 546is non-nil."
421 (rx-check form) 547 (rx-check form)
548 (setq form (rx-trans-forms form))
422 (let ((suffix (cond ((memq (car form) '(* + ? )) "") 549 (let ((suffix (cond ((memq (car form) '(* + ? )) "")
423 ((memq (car form) '(*? +? ??)) "?") 550 ((memq (car form) '(*? +? ??)) "?")
424 (rx-greedy-flag "") 551 (rx-greedy-flag "")
@@ -483,7 +610,7 @@ of all atomic regexps."
483 610
484 611
485(defun rx-category (form) 612(defun rx-category (form)
486 "Parse and produce code from FORM, which is `(category SYMBOL ...)'." 613 "Parse and produce code from FORM, which is `(category SYMBOL)'."
487 (rx-check form) 614 (rx-check form)
488 (let ((char (if (integerp (cadr form)) 615 (let ((char (if (integerp (cadr form))
489 (cadr form) 616 (cadr form)
@@ -543,8 +670,9 @@ NO-GROUP non-nil means don't put shy groups around the result."
543 670
544 671
545;;;###autoload 672;;;###autoload
546(defmacro rx (regexp) 673(defmacro rx (&rest regexps)
547 "Translate a regular expression REGEXP in sexp form to a regexp string. 674 "Translate regular expressions REGEXPS in sexp form to a regexp string.
675REGEXPS is a non-empty sequence of forms of the sort listed below.
548See also `rx-to-string' for how to do such a translation at run-time. 676See also `rx-to-string' for how to do such a translation at run-time.
549 677
550The following are valid subforms of regular expressions in sexp 678The following are valid subforms of regular expressions in sexp
@@ -556,53 +684,58 @@ STRING
556CHAR 684CHAR
557 matches character CHAR literally. 685 matches character CHAR literally.
558 686
559`not-newline' 687`not-newline', `nonl'
560 matches any character except a newline. 688 matches any character except a newline.
561 . 689 .
562`anything' 690`anything'
563 matches any character 691 matches any character
564 692
565`(any SET)' 693`(any SET ...)'
566 matches any character in SET. SET may be a character or string. 694`(in SET ...)'
695`(char SET ...)'
696 matches any character in SET .... SET may be a character or string.
567 Ranges of characters can be specified as `A-Z' in strings. 697 Ranges of characters can be specified as `A-Z' in strings.
698 Ranges may also be specified as conses like `(?A . ?Z)'.
568 699
569'(in SET)' 700 SET may also be the name of a character class: `digit',
570 like `any'. 701 `control', `hex-digit', `blank', `graph', `print', `alnum',
702 `alpha', `ascii', `nonascii', `lower', `punct', `space', `upper',
703 `word', or one of their synonyms.
571 704
572`(not (any SET))' 705`(not (any SET ...))'
573 matches any character not in SET 706 matches any character not in SET ...
574 707
575`line-start' 708`line-start', `bol'
576 matches the empty string, but only at the beginning of a line 709 matches the empty string, but only at the beginning of a line
577 in the text being matched 710 in the text being matched
578 711
579`line-end' 712`line-end', `eol'
580 is similar to `line-start' but matches only at the end of a line 713 is similar to `line-start' but matches only at the end of a line
581 714
582`string-start' 715`string-start', `bos', `bot'
583 matches the empty string, but only at the beginning of the 716 matches the empty string, but only at the beginning of the
584 string being matched against. 717 string being matched against.
585 718
586`string-end' 719`string-end', `eos', `eot'
587 matches the empty string, but only at the end of the 720 matches the empty string, but only at the end of the
588 string being matched against. 721 string being matched against.
589 722
590`buffer-start' 723`buffer-start'
591 matches the empty string, but only at the beginning of the 724 matches the empty string, but only at the beginning of the
592 buffer being matched against. 725 buffer being matched against. Actually equivalent to `string-start'.
593 726
594`buffer-end' 727`buffer-end'
595 matches the empty string, but only at the end of the 728 matches the empty string, but only at the end of the
596 buffer being matched against. 729 buffer being matched against. Actually equivalent to `string-end'.
597 730
598`point' 731`point'
599 matches the empty string, but only at point. 732 matches the empty string, but only at point.
600 733
601`word-start' 734`word-start', `bow'
602 matches the empty string, but only at the beginning or end of a 735 matches the empty string, but only at the beginning or end of a
603 word. 736 word.
604 737
605`word-end' 738`word-end', `eow'
606 matches the empty string, but only at the end of a word. 739 matches the empty string, but only at the end of a word.
607 740
608`word-boundary' 741`word-boundary'
@@ -610,34 +743,35 @@ CHAR
610 word. 743 word.
611 744
612`(not word-boundary)' 745`(not word-boundary)'
746`not-word-boundary'
613 matches the empty string, but not at the beginning or end of a 747 matches the empty string, but not at the beginning or end of a
614 word. 748 word.
615 749
616`digit' 750`digit', `numeric', `num'
617 matches 0 through 9. 751 matches 0 through 9.
618 752
619`control' 753`control', `cntrl'
620 matches ASCII control characters. 754 matches ASCII control characters.
621 755
622`hex-digit' 756`hex-digit', `hex', `xdigit'
623 matches 0 through 9, a through f and A through F. 757 matches 0 through 9, a through f and A through F.
624 758
625`blank' 759`blank'
626 matches space and tab only. 760 matches space and tab only.
627 761
628`graphic' 762`graphic', `graph'
629 matches graphic characters--everything except ASCII control chars, 763 matches graphic characters--everything except ASCII control chars,
630 space, and DEL. 764 space, and DEL.
631 765
632`printing' 766`printing', `print'
633 matches printing characters--everything except ASCII control chars 767 matches printing characters--everything except ASCII control chars
634 and DEL. 768 and DEL.
635 769
636`alphanumeric' 770`alphanumeric', `alnum'
637 matches letters and digits. (But at present, for multibyte characters, 771 matches letters and digits. (But at present, for multibyte characters,
638 it matches anything that has word syntax.) 772 it matches anything that has word syntax.)
639 773
640`letter' 774`letter', `alphabetic', `alpha'
641 matches letters. (But at present, for multibyte characters, 775 matches letters. (But at present, for multibyte characters,
642 it matches anything that has word syntax.) 776 it matches anything that has word syntax.)
643 777
@@ -647,25 +781,29 @@ CHAR
647`nonascii' 781`nonascii'
648 matches non-ASCII (multibyte) characters. 782 matches non-ASCII (multibyte) characters.
649 783
650`lower' 784`lower', `lower-case'
651 matches anything lower-case. 785 matches anything lower-case.
652 786
653`upper' 787`upper', `upper-case'
654 matches anything upper-case. 788 matches anything upper-case.
655 789
656`punctuation' 790`punctuation', `punct'
657 matches punctuation. (But at present, for multibyte characters, 791 matches punctuation. (But at present, for multibyte characters,
658 it matches anything that has non-word syntax.) 792 it matches anything that has non-word syntax.)
659 793
660`space' 794`space', `whitespace', `white'
661 matches anything that has whitespace syntax. 795 matches anything that has whitespace syntax.
662 796
663`word' 797`word', `wordchar'
664 matches anything that has word syntax. 798 matches anything that has word syntax.
665 799
800`not-wordchar'
801 matches anything that has non-word syntax.
802
666`(syntax SYNTAX)' 803`(syntax SYNTAX)'
667 matches a character with syntax SYNTAX. SYNTAX must be one 804 matches a character with syntax SYNTAX. SYNTAX must be one
668 of the following symbols. 805 of the following symbols, or a symbol corresponding to the syntax
806 character, e.g. `\\.' for `\\s.'.
669 807
670 `whitespace' (\\s- in string notation) 808 `whitespace' (\\s- in string notation)
671 `punctuation' (\\s.) 809 `punctuation' (\\s.)
@@ -684,7 +822,7 @@ CHAR
684 `comment-delimiter' (\\s!) 822 `comment-delimiter' (\\s!)
685 823
686`(not (syntax SYNTAX))' 824`(not (syntax SYNTAX))'
687 matches a character that has not syntax SYNTAX. 825 matches a character that doesn't have syntax SYNTAX.
688 826
689`(category CATEGORY)' 827`(category CATEGORY)'
690 matches a character with category CATEGORY. CATEGORY must be 828 matches a character with category CATEGORY. CATEGORY must be
@@ -710,7 +848,7 @@ CHAR
710 `japanese-katakana-two-byte' (\\cK) 848 `japanese-katakana-two-byte' (\\cK)
711 `korean-hangul-two-byte' (\\cN) 849 `korean-hangul-two-byte' (\\cN)
712 `cyrillic-two-byte' (\\cY) 850 `cyrillic-two-byte' (\\cY)
713 `combining-diacritic' (\\c^) 851 `combining-diacritic' (\\c^)
714 `ascii' (\\ca) 852 `ascii' (\\ca)
715 `arabic' (\\cb) 853 `arabic' (\\cb)
716 `chinese' (\\cc) 854 `chinese' (\\cc)
@@ -731,12 +869,16 @@ CHAR
731 `can-break' (\\c|) 869 `can-break' (\\c|)
732 870
733`(not (category CATEGORY))' 871`(not (category CATEGORY))'
734 matches a character that has not category CATEGORY. 872 matches a character that doesn't have category CATEGORY.
735 873
736`(and SEXP1 SEXP2 ...)' 874`(and SEXP1 SEXP2 ...)'
875`(: SEXP1 SEXP2 ...)'
876`(seq SEXP1 SEXP2 ...)'
877`(sequence SEXP1 SEXP2 ...)'
737 matches what SEXP1 matches, followed by what SEXP2 matches, etc. 878 matches what SEXP1 matches, followed by what SEXP2 matches, etc.
738 879
739`(submatch SEXP1 SEXP2 ...)' 880`(submatch SEXP1 SEXP2 ...)'
881`(group SEXP1 SEXP2 ...)'
740 like `and', but makes the match accessible with `match-end', 882 like `and', but makes the match accessible with `match-end',
741 `match-beginning', and `match-string'. 883 `match-beginning', and `match-string'.
742 884
@@ -744,6 +886,7 @@ CHAR
744 another name for `submatch'. 886 another name for `submatch'.
745 887
746`(or SEXP1 SEXP2 ...)' 888`(or SEXP1 SEXP2 ...)'
889`(| SEXP1 SEXP2 ...)'
747 matches anything that matches SEXP1 or SEXP2, etc. If all 890 matches anything that matches SEXP1 or SEXP2, etc. If all
748 args are strings, use `regexp-opt' to optimize the resulting 891 args are strings, use `regexp-opt' to optimize the resulting
749 regular expression. 892 regular expression.
@@ -757,47 +900,55 @@ CHAR
757`(maximal-match SEXP)' 900`(maximal-match SEXP)'
758 produce a greedy regexp for SEXP. This is the default. 901 produce a greedy regexp for SEXP. This is the default.
759 902
760`(zero-or-more SEXP)' 903Below, `SEXP ...' represents a sequence of regexp forms, treated as if
761 matches zero or more occurrences of what SEXP matches. 904enclosed in `(and ...)'.
762
763`(0+ SEXP)'
764 like `zero-or-more'.
765 905
766`(* SEXP)' 906`(zero-or-more SEXP ...)'
767 like `zero-or-more', but always produces a greedy regexp. 907`(0+ SEXP ...)'
908 matches zero or more occurrences of what SEXP ... matches.
768 909
769`(*? SEXP)' 910`(* SEXP ...)'
770 like `zero-or-more', but always produces a non-greedy regexp. 911 like `zero-or-more', but always produces a greedy regexp, independent
912 of `rx-greedy-flag'.
771 913
772`(one-or-more SEXP)' 914`(*? SEXP ...)'
773 matches one or more occurrences of A. 915 like `zero-or-more', but always produces a non-greedy regexp,
916 independent of `rx-greedy-flag'.
774 917
775`(1+ SEXP)' 918`(one-or-more SEXP ...)'
776 like `one-or-more'. 919`(1+ SEXP ...)'
920 matches one or more occurrences of SEXP ...
777 921
778`(+ SEXP)' 922`(+ SEXP ...)'
779 like `one-or-more', but always produces a greedy regexp. 923 like `one-or-more', but always produces a greedy regexp.
780 924
781`(+? SEXP)' 925`(+? SEXP ...)'
782 like `one-or-more', but always produces a non-greedy regexp. 926 like `one-or-more', but always produces a non-greedy regexp.
783 927
784`(zero-or-one SEXP)' 928`(zero-or-one SEXP ...)'
929`(optional SEXP ...)'
930`(opt SEXP ...)'
785 matches zero or one occurrences of A. 931 matches zero or one occurrences of A.
786 932
787`(optional SEXP)' 933`(? SEXP ...)'
788 like `zero-or-one'.
789
790`(? SEXP)'
791 like `zero-or-one', but always produces a greedy regexp. 934 like `zero-or-one', but always produces a greedy regexp.
792 935
793`(?? SEXP)' 936`(?? SEXP ...)'
794 like `zero-or-one', but always produces a non-greedy regexp. 937 like `zero-or-one', but always produces a non-greedy regexp.
795 938
796`(repeat N SEXP)' 939`(repeat N SEXP)'
797 matches N occurrences of what SEXP matches. 940`(= N SEXP ...)'
941 matches N occurrences.
942
943`(>= N SEXP ...)'
944 matches N or more occurrences.
798 945
799`(repeat N M SEXP)' 946`(repeat N M SEXP)'
800 matches N to M occurrences of what SEXP matches. 947`(** N M SEXP ...)'
948 matches N to M occurrences.
949
950`(backref N)'
951 matches what was matched previously by submatch N.
801 952
802`(backref N)' 953`(backref N)'
803 matches what was matched previously by submatch N. 954 matches what was matched previously by submatch N.
@@ -811,9 +962,21 @@ CHAR
811 962
812`(regexp REGEXP)' 963`(regexp REGEXP)'
813 include REGEXP in string notation in the result." 964 include REGEXP in string notation in the result."
814 965 (cond ((null regexps)
815 (rx-to-string regexp)) 966 (error "No regexp"))
816 967 ((cdr regexps)
968 (rx-to-string `(and ,@regexps) t))
969 (t
970 (rx-to-string (car regexps) t))))
971
972;; ;; sregex.el replacement
973
974;; ;;;###autoload (provide 'sregex)
975;; ;;;###autoload (autoload 'sregex "rx")
976;; (defalias 'sregex 'rx-to-string)
977;; ;;;###autoload (autoload 'sregexq "rx" nil nil 'macro)
978;; (defalias 'sregexq 'rx)
979
817(provide 'rx) 980(provide 'rx)
818 981
819;;; arch-tag: 12d01a63-0008-42bb-ab8c-1c7d63be370b 982;;; arch-tag: 12d01a63-0008-42bb-ab8c-1c7d63be370b