aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChong Yidong2008-10-07 18:08:26 +0000
committerChong Yidong2008-10-07 18:08:26 +0000
commit5dbe5c8fb72de9ecb3b19697180c9144f4d49494 (patch)
tree43eb6cf64ccbe9592865a3616a32ebf17bb4dbb9
parenta469adf3e61bdead12a52dd3dce294a9fde37aef (diff)
downloademacs-5dbe5c8fb72de9ecb3b19697180c9144f4d49494.tar.gz
emacs-5dbe5c8fb72de9ecb3b19697180c9144f4d49494.zip
(rx-constituents): Change `anything' to call
rx-anything. Change `not-wordchar' assignment to "\\W" from "[^[:word:]]". (rx-group-if): New function. (rx-parent): New variable. (rx-and, rx-or): Put shy groups only when necessary. (rx-bracket): Remove. (rx-anything): New function. (rx-any-delete-from-range, rx-any-condense-range) (rx-check-any-string): New functions. (rx-check-any): Return result as a list. Don't convert chars to strings. Don't prepend "\\" to "^". Don't search for close bracket. Check char category string. Call rx-form instead of rx-to-string. (rx-any): Rebuid to complete the function. (rx-check-not): Fix char category regexp pattern string. Call rx-form instead of rx-to-string. (rx-not): Call rx-form instead of rx-to-string. Convert "[^]" to "[^^]". Call regexp-quote for one char string when not called from rx-not. Add "\\w", and toggle to upcase. Add the case of "\\[SCBW]" to toggle. (rx-=, rx->=, rx -**, rx-repeat, rx-submatch): Call rx-form instead of rx-to-string. (rx-kleene): Call rx-form instead of rx-to-string. Call rx-group-if to adjust putting of shy groups. (rx-atomic-p): Make check more precisely. (rx-eval, rx-greedy): Call rx-form instead of rx-to-string. (rx-regexp): Call rx-group-if. (rx-form): New function. (rx-to-string): Call rx-form, rx-group-if. Refine definition of NO-GROUP.
-rw-r--r--lisp/emacs-lisp/rx.el381
1 files changed, 275 insertions, 106 deletions
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el
index 5e76256cfe6..c5e94874793 100644
--- a/lisp/emacs-lisp/rx.el
+++ b/lisp/emacs-lisp/rx.el
@@ -118,7 +118,7 @@
118 (| . or) ; SRE 118 (| . or) ; SRE
119 (not-newline . ".") 119 (not-newline . ".")
120 (nonl . not-newline) ; SRE 120 (nonl . not-newline) ; SRE
121 (anything . "\\(?:.\\|\n\\)") 121 (anything . (rx-anything 0 nil))
122 (any . (rx-any 1 nil rx-check-any)) ; inconsistent with SRE 122 (any . (rx-any 1 nil rx-check-any)) ; inconsistent with SRE
123 (in . any) 123 (in . any)
124 (char . any) ; sregex 124 (char . any) ; sregex
@@ -206,8 +206,7 @@
206 (upper-case . upper) ; SRE 206 (upper-case . upper) ; SRE
207 (word . "[[:word:]]") ; inconsistent with SRE 207 (word . "[[:word:]]") ; inconsistent with SRE
208 (wordchar . word) ; sregex 208 (wordchar . word) ; sregex
209 (not-wordchar . "[^[:word:]]") ; sregex (use \\W?) 209 (not-wordchar . "\\W"))
210 )
211 "Alist of sexp form regexp constituents. 210 "Alist of sexp form regexp constituents.
212Each element of the alist has the form (SYMBOL . DEFN). 211Each element of the alist has the form (SYMBOL . DEFN).
213SYMBOL is a valid constituent of sexp regular expressions. 212SYMBOL is a valid constituent of sexp regular expressions.
@@ -332,82 +331,237 @@ See also `rx-constituents'."
332 (car form) type-pred)))))) 331 (car form) type-pred))))))
333 332
334 333
334(defun rx-group-if (regexp group)
335 "Put shy groups around REGEXP if seemingly necessary when GROUP
336is non-nil."
337 (cond
338 ;; for some repetition
339 ((eq group '*) (if (rx-atomic-p regexp) (setq group nil)))
340 ;; for concatenation
341 ((eq group ':)
342 (if (rx-atomic-p
343 (if (string-match
344 "\\(?:[?*+]\\??\\|\\\\{[0-9]*,?[0-9]*\\\\}\\)\\'" regexp)
345 (substring regexp 0 (match-beginning 0))
346 regexp))
347 (setq group nil)))
348 ;; for OR
349 ((eq group '|) (setq group nil))
350 ;; do anyway
351 ((eq group t))
352 ((rx-atomic-p regexp t) (setq group nil)))
353 (if group
354 (concat "\\(?:" regexp "\\)")
355 regexp))
356
357
358(defvar rx-parent)
359;; dynamically bound in some functions.
360
361
335(defun rx-and (form) 362(defun rx-and (form)
336 "Parse and produce code from FORM. 363 "Parse and produce code from FORM.
337FORM is of the form `(and FORM1 ...)'." 364FORM is of the form `(and FORM1 ...)'."
338 (rx-check form) 365 (rx-check form)
339 (concat "\\(?:" 366 (rx-group-if
340 (mapconcat 367 (mapconcat (lambda (x) (rx-form x ':)) (cdr form) nil)
341 (function (lambda (x) (rx-to-string x 'no-group))) 368 (and (memq rx-parent '(* t)) rx-parent)))
342 (cdr form) nil)
343 "\\)"))
344 369
345 370
346(defun rx-or (form) 371(defun rx-or (form)
347 "Parse and produce code from FORM, which is `(or FORM1 ...)'." 372 "Parse and produce code from FORM, which is `(or FORM1 ...)'."
348 (rx-check form) 373 (rx-check form)
349 (let ((all-args-strings t)) 374 (rx-group-if
350 (dolist (arg (cdr form)) 375 (if (memq nil (mapcar 'stringp (cdr form)))
351 (unless (stringp arg) 376 (mapconcat (lambda (x) (rx-form x '|)) (cdr form) "\\|")
352 (setq all-args-strings nil))) 377 (regexp-opt (cdr form)))
353 (concat "\\(?:" 378 (and (memq rx-parent '(: * t)) rx-parent)))
354 (if all-args-strings 379
355 (regexp-opt (cdr form)) 380
356 (mapconcat #'rx-to-string (cdr form) "\\|")) 381(defun rx-anything (form)
357 "\\)"))) 382 "Match any character."
358 383 (if (consp form)
384 (error "rx `anythng' syntax error: %s" form))
385 (rx-or (list 'or 'not-newline ?\n)))
386
387
388(defun rx-any-delete-from-range (char ranges)
389 "Delete by side effect character CHAR from RANGES.
390Only both edges of each range is checked."
391 (let (m)
392 (cond
393 ((memq char ranges) (setq ranges (delq char ranges)))
394 ((setq m (assq char ranges))
395 (if (eq (1+ char) (cdr m))
396 (setcar (memq m ranges) (1+ char))
397 (setcar m (1+ char))))
398 ((setq m (rassq char ranges))
399 (if (eq (1- char) (car m))
400 (setcar (memq m ranges) (1- char))
401 (setcdr m (1- char)))))
402 ranges))
403
404
405(defun rx-any-condense-range (args)
406 "Condense by side effect ARGS as range for Rx `any'."
407 (let (str
408 l)
409 ;; set STR list of all strings
410 ;; set L list of all ranges
411 (mapc (lambda (e) (cond ((stringp e) (push e str))
412 ((numberp e) (push (cons e e) l))
413 (t (push e l))))
414 args)
415 ;; condense overlapped ranges in L
416 (let ((tail (setq l (sort l #'car-less-than-car)))
417 d)
418 (while (setq d (cdr tail))
419 (if (>= (cdar tail) (1- (caar d)))
420 (progn
421 (setcdr (car tail) (max (cdar tail) (cdar d)))
422 (setcdr tail (cdr d)))
423 (setq tail d))))
424 ;; Separate small ranges to single number, and delete dups.
425 (nconc
426 (apply #'nconc
427 (mapcar (lambda (e)
428 (cond
429 ((= (car e) (cdr e)) (list (car e)))
430 ;; ((= (1+ (car e)) (cdr e)) (list (car e) (cdr e)))
431 ((list e))))
432 l))
433 (delete-dups str))))
434
435
436(defun rx-check-any-string (str)
437 "Check string argument STR for Rx `any'."
438 (let ((i 0)
439 c1 c2 l)
440 (if (= 0 (length str))
441 (error "String arg for Rx `any' must not be empty"))
442 (while (string-match ".-." str i)
443 ;; string before range: convert it to characters
444 (if (< i (match-beginning 0))
445 (setq l (nconc
446 l
447 (append (substring str i (match-beginning 0)) nil))))
448 ;; range
449 (setq i (match-end 0)
450 c1 (aref str (match-beginning 0))
451 c2 (aref str (1- i)))
452 (cond
453 ((< c1 c2) (setq l (nconc l (list (cons c1 c2)))))
454 ((= c1 c2) (setq l (nconc l (list c1))))))
455 ;; rest?
456 (if (< i (length str))
457 (setq l (nconc l (append (substring str i) nil))))
458 l))
359 459
360(defvar rx-bracket) ; dynamically bound in `rx-any'
361 460
362(defun rx-check-any (arg) 461(defun rx-check-any (arg)
363 "Check arg ARG for Rx `any'." 462 "Check arg ARG for Rx `any'."
364 (if (integerp arg) 463 (cond
365 (setq arg (string arg))) 464 ((integerp arg) (list arg))
366 (when (stringp arg) 465 ((symbolp arg)
367 (if (zerop (length arg))
368 (error "String arg for Rx `any' must not be empty"))
369 ;; Quote ^ at start; don't bother to check whether this is first arg.
370 (if (eq ?^ (aref arg 0))
371 (setq arg (concat "\\" arg)))
372 ;; Remove ] and set flag for adding it to start of overall result.
373 (when (string-match "\\]" arg)
374 (setq arg (replace-regexp-in-string "\\]" "" arg)
375 rx-bracket "]")))
376 (when (symbolp arg)
377 (let ((translation (condition-case nil 466 (let ((translation (condition-case nil
378 (rx-to-string arg 'no-group) 467 (rx-form arg)
379 (error nil)))) 468 (error nil))))
380 (unless translation (error "Invalid char class `%s' in Rx `any'" arg)) 469 (if (or (null translation)
381 (setq arg (substring translation 1 -1)))) ; strip outer brackets 470 (null (string-match "\\`\\[\\[:[-a-z]+:\\]\\]\\'" translation)))
382 ;; sregex compatibility 471 (error "Invalid char class `%s' in Rx `any'" arg))
383 (when (and (integerp (car-safe arg)) 472 (list (substring translation 1 -1)))) ; strip outer brackets
384 (integerp (cdr-safe arg))) 473 ((and (integerp (car-safe arg)) (integerp (cdr-safe arg)))
385 (setq arg (string (car arg) ?- (cdr arg)))) 474 (list arg))
386 (unless (stringp arg) 475 ((stringp arg) (rx-check-any-string arg))
387 (error "rx `any' requires string, character, char pair or char class args")) 476 ((error
388 arg) 477 "rx `any' requires string, character, char pair or char class args"))))
478
389 479
390(defun rx-any (form) 480(defun rx-any (form)
391 "Parse and produce code from FORM, which is `(any ARG ...)'. 481 "Parse and produce code from FORM, which is `(any ARG ...)'.
392ARG is optional." 482ARG is optional."
393 (rx-check form) 483 (rx-check form)
394 (let* ((rx-bracket nil) 484 (let* ((args (rx-any-condense-range
395 (args (mapcar #'rx-check-any (cdr form)))) ; side-effects `rx-bracket' 485 (apply
396 ;; If there was a ?- in the form, move it to the front to avoid 486 #'nconc
397 ;; accidental range. 487 (mapcar #'rx-check-any (cdr form)))))
398 (if (member "-" args) 488 m
399 (setq args (cons "-" (delete "-" args)))) 489 s)
400 (apply #'concat "[" rx-bracket (append args '("]"))))) 490 (cond
491 ;; single close bracket
492 ;; => "[]...-]" or "[]...--.]"
493 ((memq ?\] args)
494 ;; set ] at the beginning
495 (setq args (cons ?\] (delq ?\] args)))
496 ;; set - at the end
497 (if (or (memq ?- args) (assq ?- args))
498 (setq args (nconc (rx-any-delete-from-range ?- args)
499 (list ?-)))))
500 ;; close bracket starts a range
501 ;; => "[]-....-]" or "[]-.--....]"
502 ((setq m (assq ?\] args))
503 ;; bring it to the beginning
504 (setq args (cons m (delq m args)))
505 (cond ((memq ?- args)
506 ;; to the end
507 (setq args (nconc (delq ?- args) (list ?-))))
508 ((setq m (assq ?- args))
509 ;; next to the bracket's range, make the second range
510 (setcdr args (cons m (delq m args))))))
511 ;; bracket in the end range
512 ;; => "[]...-]"
513 ((setq m (rassq ?\] args))
514 ;; set ] at the beginning
515 (setq args (cons ?\] (rx-any-delete-from-range ?\] args)))
516 ;; set - at the end
517 (if (or (memq ?- args) (assq ?- args))
518 (setq args (nconc (rx-any-delete-from-range ?- args)
519 (list ?-)))))
520 ;; {no close bracket appears}
521 ;;
522 ;; bring single bar to the beginning
523 ((memq ?- args)
524 (setq args (cons ?- (delq ?- args))))
525 ;; bar start a range, bring it to the beginning
526 ((setq m (assq ?- args))
527 (setq args (cons m (delq m args))))
528 ;;
529 ;; hat at the beginning?
530 ((or (eq (car args) ?^) (eq (car-safe (car args)) ?^))
531 (setq args (if (cdr args)
532 `(,(cadr args) ,(car args) ,@(cddr args))
533 (nconc (rx-any-delete-from-range ?^ args)
534 (list ?^))))))
535 ;; some 1-char?
536 (if (and (null (cdr args)) (numberp (car args))
537 (or (= 1 (length
538 (setq s (regexp-quote (string (car args))))))
539 (and (equal (car args) ?^) ;; unnecessary predicate?
540 (null (eq rx-parent '!)))))
541 s
542 (concat "["
543 (mapconcat
544 (lambda (e) (cond
545 ((numberp e) (string e))
546 ((consp e)
547 (if (and (= (1+ (car e)) (cdr e))
548 (null (memq (car e) '(?\] ?-))))
549 (string (car e) (cdr e))
550 (string (car e) ?- (cdr e))))
551 (e)))
552 args
553 nil)
554 "]"))))
401 555
402 556
403(defun rx-check-not (arg) 557(defun rx-check-not (arg)
404 "Check arg ARG for Rx `not'." 558 "Check arg ARG for Rx `not'."
405 (unless (or (and (symbolp arg) 559 (unless (or (and (symbolp arg)
406 (string-match "\\`\\[\\[:[-a-z]:\\]\\]\\'" 560 (string-match "\\`\\[\\[:[-a-z]+:\\]\\]\\'"
407 (condition-case nil 561 (condition-case nil
408 (rx-to-string arg 'no-group) 562 (rx-form arg)
409 (error "")))) 563 (error ""))))
410 (eq arg 'word-boundary) 564 (eq arg 'word-boundary)
411 (and (consp arg) 565 (and (consp arg)
412 (memq (car arg) '(not any in syntax category)))) 566 (memq (car arg) '(not any in syntax category))))
413 (error "rx `not' syntax error: %s" arg)) 567 (error "rx `not' syntax error: %s" arg))
@@ -417,16 +571,22 @@ ARG is optional."
417(defun rx-not (form) 571(defun rx-not (form)
418 "Parse and produce code from FORM. FORM is `(not ...)'." 572 "Parse and produce code from FORM. FORM is `(not ...)'."
419 (rx-check form) 573 (rx-check form)
420 (let ((result (rx-to-string (cadr form) 'no-group)) 574 (let ((result (rx-form (cadr form) '!))
421 case-fold-search) 575 case-fold-search)
422 (cond ((string-match "\\`\\[^" result) 576 (cond ((string-match "\\`\\[^" result)
423 (if (= (length result) 4) 577 (cond
424 (substring result 2 3) 578 ((equal result "[^]") "[^^]")
425 (concat "[" (substring result 2)))) 579 ((and (= (length result) 4) (null (eq rx-parent '!)))
580 (regexp-quote (substring result 2 3)))
581 ((concat "[" (substring result 2)))))
426 ((eq ?\[ (aref result 0)) 582 ((eq ?\[ (aref result 0))
427 (concat "[^" (substring result 1))) 583 (concat "[^" (substring result 1)))
428 ((string-match "\\`\\\\[scb]" result) 584 ((string-match "\\`\\\\[scbw]" result)
429 (concat (capitalize (substring result 0 2)) (substring result 2))) 585 (concat (upcase (substring result 0 2))
586 (substring result 2)))
587 ((string-match "\\`\\\\[SCBW]" result)
588 (concat (downcase (substring result 0 2))
589 (substring result 2)))
430 (t 590 (t
431 (concat "[^" result "]"))))) 591 (concat "[^" result "]")))))
432 592
@@ -464,7 +624,7 @@ If SKIP is non-nil, allow that number of items after the head, i.e.
464 (unless (and (integerp (nth 1 form)) 624 (unless (and (integerp (nth 1 form))
465 (> (nth 1 form) 0)) 625 (> (nth 1 form) 0))
466 (error "rx `=' requires positive integer first arg")) 626 (error "rx `=' requires positive integer first arg"))
467 (format "%s\\{%d\\}" (rx-to-string (nth 2 form)) (nth 1 form))) 627 (format "%s\\{%d\\}" (rx-form (nth 2 form) '*) (nth 1 form)))
468 628
469 629
470(defun rx->= (form) 630(defun rx->= (form)
@@ -474,14 +634,14 @@ If SKIP is non-nil, allow that number of items after the head, i.e.
474 (unless (and (integerp (nth 1 form)) 634 (unless (and (integerp (nth 1 form))
475 (> (nth 1 form) 0)) 635 (> (nth 1 form) 0))
476 (error "rx `>=' requires positive integer first arg")) 636 (error "rx `>=' requires positive integer first arg"))
477 (format "%s\\{%d,\\}" (rx-to-string (nth 2 form)) (nth 1 form))) 637 (format "%s\\{%d,\\}" (rx-form (nth 2 form) '*) (nth 1 form)))
478 638
479 639
480(defun rx-** (form) 640(defun rx-** (form)
481 "Parse and produce code from FORM `(** N M ...)'." 641 "Parse and produce code from FORM `(** N M ...)'."
482 (rx-check form) 642 (rx-check form)
483 (setq form (cons 'repeat (cdr (rx-trans-forms form 2)))) 643 (setq form (cons 'repeat (cdr (rx-trans-forms form 2))))
484 (rx-to-string form)) 644 (rx-form form '*))
485 645
486 646
487(defun rx-repeat (form) 647(defun rx-repeat (form)
@@ -492,7 +652,7 @@ FORM is either `(repeat N FORM1)' or `(repeat N M FORM1)'."
492 (unless (and (integerp (nth 1 form)) 652 (unless (and (integerp (nth 1 form))
493 (> (nth 1 form) 0)) 653 (> (nth 1 form) 0))
494 (error "rx `repeat' requires positive integer first arg")) 654 (error "rx `repeat' requires positive integer first arg"))
495 (format "%s\\{%d\\}" (rx-to-string (nth 2 form)) (nth 1 form))) 655 (format "%s\\{%d\\}" (rx-form (nth 2 form) '*) (nth 1 form)))
496 ((or (not (integerp (nth 2 form))) 656 ((or (not (integerp (nth 2 form)))
497 (< (nth 2 form) 0) 657 (< (nth 2 form) 0)
498 (not (integerp (nth 1 form))) 658 (not (integerp (nth 1 form)))
@@ -500,16 +660,14 @@ FORM is either `(repeat N FORM1)' or `(repeat N M FORM1)'."
500 (< (nth 2 form) (nth 1 form))) 660 (< (nth 2 form) (nth 1 form)))
501 (error "rx `repeat' range error")) 661 (error "rx `repeat' range error"))
502 (t 662 (t
503 (format "%s\\{%d,%d\\}" (rx-to-string (nth 3 form)) 663 (format "%s\\{%d,%d\\}" (rx-form (nth 3 form) '*)
504 (nth 1 form) (nth 2 form))))) 664 (nth 1 form) (nth 2 form)))))
505 665
506 666
507(defun rx-submatch (form) 667(defun rx-submatch (form)
508 "Parse and produce code from FORM, which is `(submatch ...)'." 668 "Parse and produce code from FORM, which is `(submatch ...)'."
509 (concat "\\(" 669 (concat "\\(" (mapconcat #'rx-form (cdr form) nil) "\\)"))
510 (mapconcat (function (lambda (x) (rx-to-string x 'no-group))) 670
511 (cdr form) nil)
512 "\\)"))
513 671
514(defun rx-backref (form) 672(defun rx-backref (form)
515 "Parse and produce code from FORM, which is `(backref N)'." 673 "Parse and produce code from FORM, which is `(backref N)'."
@@ -531,19 +689,19 @@ If OP is anything else, produce a greedy regexp if `rx-greedy-flag'
531is non-nil." 689is non-nil."
532 (rx-check form) 690 (rx-check form)
533 (setq form (rx-trans-forms form)) 691 (setq form (rx-trans-forms form))
534 (let ((suffix (cond ((memq (car form) '(* + ? )) "") 692 (let ((suffix (cond ((memq (car form) '(* + ?\s)) "")
535 ((memq (car form) '(*? +? ??)) "?") 693 ((memq (car form) '(*? +? ??)) "?")
536 (rx-greedy-flag "") 694 (rx-greedy-flag "")
537 (t "?"))) 695 (t "?")))
538 (op (cond ((memq (car form) '(* *? 0+ zero-or-more)) "*") 696 (op (cond ((memq (car form) '(* *? 0+ zero-or-more)) "*")
539 ((memq (car form) '(+ +? 1+ one-or-more)) "+") 697 ((memq (car form) '(+ +? 1+ one-or-more)) "+")
540 (t "?"))) 698 (t "?"))))
541 (result (rx-to-string (cadr form) 'no-group))) 699 (rx-group-if
542 (if (not (rx-atomic-p result)) 700 (concat (rx-form (cadr form) '*) op suffix)
543 (setq result (concat "\\(?:" result "\\)"))) 701 (and (memq rx-parent '(t *)) rx-parent))))
544 (concat result op suffix)))
545 702
546(defun rx-atomic-p (r) 703
704(defun rx-atomic-p (r &optional lax)
547 "Return non-nil if regexp string R is atomic. 705 "Return non-nil if regexp string R is atomic.
548An atomic regexp R is one such that a suffix operator 706An atomic regexp R is one such that a suffix operator
549appended to R will apply to all of R. For example, \"a\" 707appended to R will apply to all of R. For example, \"a\"
@@ -568,13 +726,14 @@ be detected without much effort. A guarantee of no false
568negatives would require a theoretic specification of the set 726negatives would require a theoretic specification of the set
569of all atomic regexps." 727of all atomic regexps."
570 (let ((l (length r))) 728 (let ((l (length r)))
571 (or (equal l 1) 729 (cond
572 (and (>= l 6) 730 ((<= l 1))
573 (equal (substring r 0 2) "\\(") 731 ((= l 2) (= (aref r 0) ?\\))
574 (equal (substring r -2) "\\)")) 732 ((= l 3) (string-match "\\`\\(?:\\\\[cCsS_]\\|\\[[^^]\\]\\)" r))
575 (and (>= l 2) 733 ((null lax)
576 (equal (substring r 0 1) "[") 734 (cond
577 (equal (substring r -1) "]"))))) 735 ((string-match "\\`\\[^?\]?\\(?:\\[:[a-z]+:]\\|[^\]]\\)*\\]\\'" r))
736 ((string-match "\\`\\\\(\\(?:[^\\]\\|\\\\[^\)]\\)*\\\\)\\'" r)))))))
578 737
579 738
580(defun rx-syntax (form) 739(defun rx-syntax (form)
@@ -612,7 +771,7 @@ of all atomic regexps."
612(defun rx-eval (form) 771(defun rx-eval (form)
613 "Parse and produce code from FORM, which is `(eval FORM)'." 772 "Parse and produce code from FORM, which is `(eval FORM)'."
614 (rx-check form) 773 (rx-check form)
615 (rx-to-string (eval (cadr form)))) 774 (rx-form (eval (cadr form)) rx-parent))
616 775
617 776
618(defun rx-greedy (form) 777(defun rx-greedy (form)
@@ -622,13 +781,41 @@ If FORM is '(minimal-match FORM1)', non-greedy versions of `*',
622'(maximal-match FORM1)', greedy operators will be used." 781'(maximal-match FORM1)', greedy operators will be used."
623 (rx-check form) 782 (rx-check form)
624 (let ((rx-greedy-flag (eq (car form) 'maximal-match))) 783 (let ((rx-greedy-flag (eq (car form) 'maximal-match)))
625 (rx-to-string (cadr form)))) 784 (rx-form (cadr form) rx-parent)))
626 785
627 786
628(defun rx-regexp (form) 787(defun rx-regexp (form)
629 "Parse and produce code from FORM, which is `(regexp STRING)'." 788 "Parse and produce code from FORM, which is `(regexp STRING)'."
630 (rx-check form) 789 (rx-check form)
631 (concat "\\(?:" (cadr form) "\\)")) 790 (rx-group-if (cadr form) rx-parent))
791
792
793(defun rx-form (form &optional rx-parent)
794 "Parse and produce code for regular expression FORM.
795FORM is a regular expression in sexp form.
796RX-PARENT shows which type of expression calls and controls putting of
797shy groups around the result and some more in other functions."
798 (if (stringp form)
799 (rx-group-if (regexp-quote form)
800 (if (and (eq rx-parent '*) (< 1 (length form)))
801 rx-parent))
802 (cond ((integerp form)
803 (regexp-quote (char-to-string form)))
804 ((symbolp form)
805 (let ((info (rx-info form)))
806 (cond ((stringp info)
807 info)
808 ((null info)
809 (error "Unknown rx form `%s'" form))
810 (t
811 (funcall (nth 0 info) form)))))
812 ((consp form)
813 (let ((info (rx-info (car form))))
814 (unless (consp info)
815 (error "Unknown rx form `%s'" (car form)))
816 (funcall (nth 0 info) form)))
817 (t
818 (error "rx syntax error at `%s'" form)))))
632 819
633 820
634;;;###autoload 821;;;###autoload
@@ -636,28 +823,7 @@ If FORM is '(minimal-match FORM1)', non-greedy versions of `*',
636 "Parse and produce code for regular expression FORM. 823 "Parse and produce code for regular expression FORM.
637FORM is a regular expression in sexp form. 824FORM is a regular expression in sexp form.
638NO-GROUP non-nil means don't put shy groups around the result." 825NO-GROUP non-nil means don't put shy groups around the result."
639 (cond ((stringp form) 826 (rx-group-if (rx-form form) (null no-group)))
640 (regexp-quote form))
641 ((integerp form)
642 (regexp-quote (char-to-string form)))
643 ((symbolp form)
644 (let ((info (rx-info form)))
645 (cond ((stringp info)
646 info)
647 ((null info)
648 (error "Unknown rx form `%s'" form))
649 (t
650 (funcall (nth 0 info) form)))))
651 ((consp form)
652 (let ((info (rx-info (car form))))
653 (unless (consp info)
654 (error "Unknown rx form `%s'" (car form)))
655 (let ((result (funcall (nth 0 info) form)))
656 (if (or no-group (string-match "\\`\\\\[(]" result))
657 result
658 (concat "\\(?:" result "\\)")))))
659 (t
660 (error "rx syntax error at `%s'" form))))
661 827
662 828
663;;;###autoload 829;;;###autoload
@@ -878,6 +1044,9 @@ CHAR
878 like `and', but makes the match accessible with `match-end', 1044 like `and', but makes the match accessible with `match-end',
879 `match-beginning', and `match-string'. 1045 `match-beginning', and `match-string'.
880 1046
1047`(group SEXP1 SEXP2 ...)'
1048 another name for `submatch'.
1049
881`(or SEXP1 SEXP2 ...)' 1050`(or SEXP1 SEXP2 ...)'
882`(| SEXP1 SEXP2 ...)' 1051`(| SEXP1 SEXP2 ...)'
883 matches anything that matches SEXP1 or SEXP2, etc. If all 1052 matches anything that matches SEXP1 or SEXP2, etc. If all