diff options
| author | Chong Yidong | 2008-10-07 18:08:26 +0000 |
|---|---|---|
| committer | Chong Yidong | 2008-10-07 18:08:26 +0000 |
| commit | 5dbe5c8fb72de9ecb3b19697180c9144f4d49494 (patch) | |
| tree | 43eb6cf64ccbe9592865a3616a32ebf17bb4dbb9 | |
| parent | a469adf3e61bdead12a52dd3dce294a9fde37aef (diff) | |
| download | emacs-5dbe5c8fb72de9ecb3b19697180c9144f4d49494.tar.gz emacs-5dbe5c8fb72de9ecb3b19697180c9144f4d49494.zip | |
(rx-constituents): Change `anything' to call
rx-anything. Change `not-wordchar' assignment to "\\W" from
"[^[:word:]]".
(rx-group-if): New function.
(rx-parent): New variable.
(rx-and, rx-or): Put shy groups only when necessary.
(rx-bracket): Remove.
(rx-anything): New function.
(rx-any-delete-from-range, rx-any-condense-range)
(rx-check-any-string): New functions.
(rx-check-any): Return result as a list. Don't convert chars to
strings. Don't prepend "\\" to "^". Don't search for close
bracket. Check char category string. Call rx-form instead of
rx-to-string.
(rx-any): Rebuid to complete the function.
(rx-check-not): Fix char category regexp pattern string. Call
rx-form instead of rx-to-string.
(rx-not): Call rx-form instead of rx-to-string. Convert "[^]" to
"[^^]". Call regexp-quote for one char string when not called from
rx-not. Add "\\w", and toggle to upcase. Add the case of
"\\[SCBW]" to toggle.
(rx-=, rx->=, rx -**, rx-repeat, rx-submatch): Call rx-form
instead of rx-to-string.
(rx-kleene): Call rx-form instead of rx-to-string. Call
rx-group-if to adjust putting of shy groups.
(rx-atomic-p): Make check more precisely.
(rx-eval, rx-greedy): Call rx-form instead of rx-to-string.
(rx-regexp): Call rx-group-if.
(rx-form): New function.
(rx-to-string): Call rx-form, rx-group-if. Refine definition of
NO-GROUP.
| -rw-r--r-- | lisp/emacs-lisp/rx.el | 381 |
1 files changed, 275 insertions, 106 deletions
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el index 5e76256cfe6..c5e94874793 100644 --- a/lisp/emacs-lisp/rx.el +++ b/lisp/emacs-lisp/rx.el | |||
| @@ -118,7 +118,7 @@ | |||
| 118 | (| . or) ; SRE | 118 | (| . or) ; SRE |
| 119 | (not-newline . ".") | 119 | (not-newline . ".") |
| 120 | (nonl . not-newline) ; SRE | 120 | (nonl . not-newline) ; SRE |
| 121 | (anything . "\\(?:.\\|\n\\)") | 121 | (anything . (rx-anything 0 nil)) |
| 122 | (any . (rx-any 1 nil rx-check-any)) ; inconsistent with SRE | 122 | (any . (rx-any 1 nil rx-check-any)) ; inconsistent with SRE |
| 123 | (in . any) | 123 | (in . any) |
| 124 | (char . any) ; sregex | 124 | (char . any) ; sregex |
| @@ -206,8 +206,7 @@ | |||
| 206 | (upper-case . upper) ; SRE | 206 | (upper-case . upper) ; SRE |
| 207 | (word . "[[:word:]]") ; inconsistent with SRE | 207 | (word . "[[:word:]]") ; inconsistent with SRE |
| 208 | (wordchar . word) ; sregex | 208 | (wordchar . word) ; sregex |
| 209 | (not-wordchar . "[^[:word:]]") ; sregex (use \\W?) | 209 | (not-wordchar . "\\W")) |
| 210 | ) | ||
| 211 | "Alist of sexp form regexp constituents. | 210 | "Alist of sexp form regexp constituents. |
| 212 | Each element of the alist has the form (SYMBOL . DEFN). | 211 | Each element of the alist has the form (SYMBOL . DEFN). |
| 213 | SYMBOL is a valid constituent of sexp regular expressions. | 212 | SYMBOL is a valid constituent of sexp regular expressions. |
| @@ -332,82 +331,237 @@ See also `rx-constituents'." | |||
| 332 | (car form) type-pred)))))) | 331 | (car form) type-pred)))))) |
| 333 | 332 | ||
| 334 | 333 | ||
| 334 | (defun rx-group-if (regexp group) | ||
| 335 | "Put shy groups around REGEXP if seemingly necessary when GROUP | ||
| 336 | is non-nil." | ||
| 337 | (cond | ||
| 338 | ;; for some repetition | ||
| 339 | ((eq group '*) (if (rx-atomic-p regexp) (setq group nil))) | ||
| 340 | ;; for concatenation | ||
| 341 | ((eq group ':) | ||
| 342 | (if (rx-atomic-p | ||
| 343 | (if (string-match | ||
| 344 | "\\(?:[?*+]\\??\\|\\\\{[0-9]*,?[0-9]*\\\\}\\)\\'" regexp) | ||
| 345 | (substring regexp 0 (match-beginning 0)) | ||
| 346 | regexp)) | ||
| 347 | (setq group nil))) | ||
| 348 | ;; for OR | ||
| 349 | ((eq group '|) (setq group nil)) | ||
| 350 | ;; do anyway | ||
| 351 | ((eq group t)) | ||
| 352 | ((rx-atomic-p regexp t) (setq group nil))) | ||
| 353 | (if group | ||
| 354 | (concat "\\(?:" regexp "\\)") | ||
| 355 | regexp)) | ||
| 356 | |||
| 357 | |||
| 358 | (defvar rx-parent) | ||
| 359 | ;; dynamically bound in some functions. | ||
| 360 | |||
| 361 | |||
| 335 | (defun rx-and (form) | 362 | (defun rx-and (form) |
| 336 | "Parse and produce code from FORM. | 363 | "Parse and produce code from FORM. |
| 337 | FORM is of the form `(and FORM1 ...)'." | 364 | FORM is of the form `(and FORM1 ...)'." |
| 338 | (rx-check form) | 365 | (rx-check form) |
| 339 | (concat "\\(?:" | 366 | (rx-group-if |
| 340 | (mapconcat | 367 | (mapconcat (lambda (x) (rx-form x ':)) (cdr form) nil) |
| 341 | (function (lambda (x) (rx-to-string x 'no-group))) | 368 | (and (memq rx-parent '(* t)) rx-parent))) |
| 342 | (cdr form) nil) | ||
| 343 | "\\)")) | ||
| 344 | 369 | ||
| 345 | 370 | ||
| 346 | (defun rx-or (form) | 371 | (defun rx-or (form) |
| 347 | "Parse and produce code from FORM, which is `(or FORM1 ...)'." | 372 | "Parse and produce code from FORM, which is `(or FORM1 ...)'." |
| 348 | (rx-check form) | 373 | (rx-check form) |
| 349 | (let ((all-args-strings t)) | 374 | (rx-group-if |
| 350 | (dolist (arg (cdr form)) | 375 | (if (memq nil (mapcar 'stringp (cdr form))) |
| 351 | (unless (stringp arg) | 376 | (mapconcat (lambda (x) (rx-form x '|)) (cdr form) "\\|") |
| 352 | (setq all-args-strings nil))) | 377 | (regexp-opt (cdr form))) |
| 353 | (concat "\\(?:" | 378 | (and (memq rx-parent '(: * t)) rx-parent))) |
| 354 | (if all-args-strings | 379 | |
| 355 | (regexp-opt (cdr form)) | 380 | |
| 356 | (mapconcat #'rx-to-string (cdr form) "\\|")) | 381 | (defun rx-anything (form) |
| 357 | "\\)"))) | 382 | "Match any character." |
| 358 | 383 | (if (consp form) | |
| 384 | (error "rx `anythng' syntax error: %s" form)) | ||
| 385 | (rx-or (list 'or 'not-newline ?\n))) | ||
| 386 | |||
| 387 | |||
| 388 | (defun rx-any-delete-from-range (char ranges) | ||
| 389 | "Delete by side effect character CHAR from RANGES. | ||
| 390 | Only both edges of each range is checked." | ||
| 391 | (let (m) | ||
| 392 | (cond | ||
| 393 | ((memq char ranges) (setq ranges (delq char ranges))) | ||
| 394 | ((setq m (assq char ranges)) | ||
| 395 | (if (eq (1+ char) (cdr m)) | ||
| 396 | (setcar (memq m ranges) (1+ char)) | ||
| 397 | (setcar m (1+ char)))) | ||
| 398 | ((setq m (rassq char ranges)) | ||
| 399 | (if (eq (1- char) (car m)) | ||
| 400 | (setcar (memq m ranges) (1- char)) | ||
| 401 | (setcdr m (1- char))))) | ||
| 402 | ranges)) | ||
| 403 | |||
| 404 | |||
| 405 | (defun rx-any-condense-range (args) | ||
| 406 | "Condense by side effect ARGS as range for Rx `any'." | ||
| 407 | (let (str | ||
| 408 | l) | ||
| 409 | ;; set STR list of all strings | ||
| 410 | ;; set L list of all ranges | ||
| 411 | (mapc (lambda (e) (cond ((stringp e) (push e str)) | ||
| 412 | ((numberp e) (push (cons e e) l)) | ||
| 413 | (t (push e l)))) | ||
| 414 | args) | ||
| 415 | ;; condense overlapped ranges in L | ||
| 416 | (let ((tail (setq l (sort l #'car-less-than-car))) | ||
| 417 | d) | ||
| 418 | (while (setq d (cdr tail)) | ||
| 419 | (if (>= (cdar tail) (1- (caar d))) | ||
| 420 | (progn | ||
| 421 | (setcdr (car tail) (max (cdar tail) (cdar d))) | ||
| 422 | (setcdr tail (cdr d))) | ||
| 423 | (setq tail d)))) | ||
| 424 | ;; Separate small ranges to single number, and delete dups. | ||
| 425 | (nconc | ||
| 426 | (apply #'nconc | ||
| 427 | (mapcar (lambda (e) | ||
| 428 | (cond | ||
| 429 | ((= (car e) (cdr e)) (list (car e))) | ||
| 430 | ;; ((= (1+ (car e)) (cdr e)) (list (car e) (cdr e))) | ||
| 431 | ((list e)))) | ||
| 432 | l)) | ||
| 433 | (delete-dups str)))) | ||
| 434 | |||
| 435 | |||
| 436 | (defun rx-check-any-string (str) | ||
| 437 | "Check string argument STR for Rx `any'." | ||
| 438 | (let ((i 0) | ||
| 439 | c1 c2 l) | ||
| 440 | (if (= 0 (length str)) | ||
| 441 | (error "String arg for Rx `any' must not be empty")) | ||
| 442 | (while (string-match ".-." str i) | ||
| 443 | ;; string before range: convert it to characters | ||
| 444 | (if (< i (match-beginning 0)) | ||
| 445 | (setq l (nconc | ||
| 446 | l | ||
| 447 | (append (substring str i (match-beginning 0)) nil)))) | ||
| 448 | ;; range | ||
| 449 | (setq i (match-end 0) | ||
| 450 | c1 (aref str (match-beginning 0)) | ||
| 451 | c2 (aref str (1- i))) | ||
| 452 | (cond | ||
| 453 | ((< c1 c2) (setq l (nconc l (list (cons c1 c2))))) | ||
| 454 | ((= c1 c2) (setq l (nconc l (list c1)))))) | ||
| 455 | ;; rest? | ||
| 456 | (if (< i (length str)) | ||
| 457 | (setq l (nconc l (append (substring str i) nil)))) | ||
| 458 | l)) | ||
| 359 | 459 | ||
| 360 | (defvar rx-bracket) ; dynamically bound in `rx-any' | ||
| 361 | 460 | ||
| 362 | (defun rx-check-any (arg) | 461 | (defun rx-check-any (arg) |
| 363 | "Check arg ARG for Rx `any'." | 462 | "Check arg ARG for Rx `any'." |
| 364 | (if (integerp arg) | 463 | (cond |
| 365 | (setq arg (string arg))) | 464 | ((integerp arg) (list arg)) |
| 366 | (when (stringp arg) | 465 | ((symbolp arg) |
| 367 | (if (zerop (length arg)) | ||
| 368 | (error "String arg for Rx `any' must not be empty")) | ||
| 369 | ;; Quote ^ at start; don't bother to check whether this is first arg. | ||
| 370 | (if (eq ?^ (aref arg 0)) | ||
| 371 | (setq arg (concat "\\" arg))) | ||
| 372 | ;; Remove ] and set flag for adding it to start of overall result. | ||
| 373 | (when (string-match "\\]" arg) | ||
| 374 | (setq arg (replace-regexp-in-string "\\]" "" arg) | ||
| 375 | rx-bracket "]"))) | ||
| 376 | (when (symbolp arg) | ||
| 377 | (let ((translation (condition-case nil | 466 | (let ((translation (condition-case nil |
| 378 | (rx-to-string arg 'no-group) | 467 | (rx-form arg) |
| 379 | (error nil)))) | 468 | (error nil)))) |
| 380 | (unless translation (error "Invalid char class `%s' in Rx `any'" arg)) | 469 | (if (or (null translation) |
| 381 | (setq arg (substring translation 1 -1)))) ; strip outer brackets | 470 | (null (string-match "\\`\\[\\[:[-a-z]+:\\]\\]\\'" translation))) |
| 382 | ;; sregex compatibility | 471 | (error "Invalid char class `%s' in Rx `any'" arg)) |
| 383 | (when (and (integerp (car-safe arg)) | 472 | (list (substring translation 1 -1)))) ; strip outer brackets |
| 384 | (integerp (cdr-safe arg))) | 473 | ((and (integerp (car-safe arg)) (integerp (cdr-safe arg))) |
| 385 | (setq arg (string (car arg) ?- (cdr arg)))) | 474 | (list arg)) |
| 386 | (unless (stringp arg) | 475 | ((stringp arg) (rx-check-any-string arg)) |
| 387 | (error "rx `any' requires string, character, char pair or char class args")) | 476 | ((error |
| 388 | arg) | 477 | "rx `any' requires string, character, char pair or char class args")))) |
| 478 | |||
| 389 | 479 | ||
| 390 | (defun rx-any (form) | 480 | (defun rx-any (form) |
| 391 | "Parse and produce code from FORM, which is `(any ARG ...)'. | 481 | "Parse and produce code from FORM, which is `(any ARG ...)'. |
| 392 | ARG is optional." | 482 | ARG is optional." |
| 393 | (rx-check form) | 483 | (rx-check form) |
| 394 | (let* ((rx-bracket nil) | 484 | (let* ((args (rx-any-condense-range |
| 395 | (args (mapcar #'rx-check-any (cdr form)))) ; side-effects `rx-bracket' | 485 | (apply |
| 396 | ;; If there was a ?- in the form, move it to the front to avoid | 486 | #'nconc |
| 397 | ;; accidental range. | 487 | (mapcar #'rx-check-any (cdr form))))) |
| 398 | (if (member "-" args) | 488 | m |
| 399 | (setq args (cons "-" (delete "-" args)))) | 489 | s) |
| 400 | (apply #'concat "[" rx-bracket (append args '("]"))))) | 490 | (cond |
| 491 | ;; single close bracket | ||
| 492 | ;; => "[]...-]" or "[]...--.]" | ||
| 493 | ((memq ?\] args) | ||
| 494 | ;; set ] at the beginning | ||
| 495 | (setq args (cons ?\] (delq ?\] args))) | ||
| 496 | ;; set - at the end | ||
| 497 | (if (or (memq ?- args) (assq ?- args)) | ||
| 498 | (setq args (nconc (rx-any-delete-from-range ?- args) | ||
| 499 | (list ?-))))) | ||
| 500 | ;; close bracket starts a range | ||
| 501 | ;; => "[]-....-]" or "[]-.--....]" | ||
| 502 | ((setq m (assq ?\] args)) | ||
| 503 | ;; bring it to the beginning | ||
| 504 | (setq args (cons m (delq m args))) | ||
| 505 | (cond ((memq ?- args) | ||
| 506 | ;; to the end | ||
| 507 | (setq args (nconc (delq ?- args) (list ?-)))) | ||
| 508 | ((setq m (assq ?- args)) | ||
| 509 | ;; next to the bracket's range, make the second range | ||
| 510 | (setcdr args (cons m (delq m args)))))) | ||
| 511 | ;; bracket in the end range | ||
| 512 | ;; => "[]...-]" | ||
| 513 | ((setq m (rassq ?\] args)) | ||
| 514 | ;; set ] at the beginning | ||
| 515 | (setq args (cons ?\] (rx-any-delete-from-range ?\] args))) | ||
| 516 | ;; set - at the end | ||
| 517 | (if (or (memq ?- args) (assq ?- args)) | ||
| 518 | (setq args (nconc (rx-any-delete-from-range ?- args) | ||
| 519 | (list ?-))))) | ||
| 520 | ;; {no close bracket appears} | ||
| 521 | ;; | ||
| 522 | ;; bring single bar to the beginning | ||
| 523 | ((memq ?- args) | ||
| 524 | (setq args (cons ?- (delq ?- args)))) | ||
| 525 | ;; bar start a range, bring it to the beginning | ||
| 526 | ((setq m (assq ?- args)) | ||
| 527 | (setq args (cons m (delq m args)))) | ||
| 528 | ;; | ||
| 529 | ;; hat at the beginning? | ||
| 530 | ((or (eq (car args) ?^) (eq (car-safe (car args)) ?^)) | ||
| 531 | (setq args (if (cdr args) | ||
| 532 | `(,(cadr args) ,(car args) ,@(cddr args)) | ||
| 533 | (nconc (rx-any-delete-from-range ?^ args) | ||
| 534 | (list ?^)))))) | ||
| 535 | ;; some 1-char? | ||
| 536 | (if (and (null (cdr args)) (numberp (car args)) | ||
| 537 | (or (= 1 (length | ||
| 538 | (setq s (regexp-quote (string (car args)))))) | ||
| 539 | (and (equal (car args) ?^) ;; unnecessary predicate? | ||
| 540 | (null (eq rx-parent '!))))) | ||
| 541 | s | ||
| 542 | (concat "[" | ||
| 543 | (mapconcat | ||
| 544 | (lambda (e) (cond | ||
| 545 | ((numberp e) (string e)) | ||
| 546 | ((consp e) | ||
| 547 | (if (and (= (1+ (car e)) (cdr e)) | ||
| 548 | (null (memq (car e) '(?\] ?-)))) | ||
| 549 | (string (car e) (cdr e)) | ||
| 550 | (string (car e) ?- (cdr e)))) | ||
| 551 | (e))) | ||
| 552 | args | ||
| 553 | nil) | ||
| 554 | "]")))) | ||
| 401 | 555 | ||
| 402 | 556 | ||
| 403 | (defun rx-check-not (arg) | 557 | (defun rx-check-not (arg) |
| 404 | "Check arg ARG for Rx `not'." | 558 | "Check arg ARG for Rx `not'." |
| 405 | (unless (or (and (symbolp arg) | 559 | (unless (or (and (symbolp arg) |
| 406 | (string-match "\\`\\[\\[:[-a-z]:\\]\\]\\'" | 560 | (string-match "\\`\\[\\[:[-a-z]+:\\]\\]\\'" |
| 407 | (condition-case nil | 561 | (condition-case nil |
| 408 | (rx-to-string arg 'no-group) | 562 | (rx-form arg) |
| 409 | (error "")))) | 563 | (error "")))) |
| 410 | (eq arg 'word-boundary) | 564 | (eq arg 'word-boundary) |
| 411 | (and (consp arg) | 565 | (and (consp arg) |
| 412 | (memq (car arg) '(not any in syntax category)))) | 566 | (memq (car arg) '(not any in syntax category)))) |
| 413 | (error "rx `not' syntax error: %s" arg)) | 567 | (error "rx `not' syntax error: %s" arg)) |
| @@ -417,16 +571,22 @@ ARG is optional." | |||
| 417 | (defun rx-not (form) | 571 | (defun rx-not (form) |
| 418 | "Parse and produce code from FORM. FORM is `(not ...)'." | 572 | "Parse and produce code from FORM. FORM is `(not ...)'." |
| 419 | (rx-check form) | 573 | (rx-check form) |
| 420 | (let ((result (rx-to-string (cadr form) 'no-group)) | 574 | (let ((result (rx-form (cadr form) '!)) |
| 421 | case-fold-search) | 575 | case-fold-search) |
| 422 | (cond ((string-match "\\`\\[^" result) | 576 | (cond ((string-match "\\`\\[^" result) |
| 423 | (if (= (length result) 4) | 577 | (cond |
| 424 | (substring result 2 3) | 578 | ((equal result "[^]") "[^^]") |
| 425 | (concat "[" (substring result 2)))) | 579 | ((and (= (length result) 4) (null (eq rx-parent '!))) |
| 580 | (regexp-quote (substring result 2 3))) | ||
| 581 | ((concat "[" (substring result 2))))) | ||
| 426 | ((eq ?\[ (aref result 0)) | 582 | ((eq ?\[ (aref result 0)) |
| 427 | (concat "[^" (substring result 1))) | 583 | (concat "[^" (substring result 1))) |
| 428 | ((string-match "\\`\\\\[scb]" result) | 584 | ((string-match "\\`\\\\[scbw]" result) |
| 429 | (concat (capitalize (substring result 0 2)) (substring result 2))) | 585 | (concat (upcase (substring result 0 2)) |
| 586 | (substring result 2))) | ||
| 587 | ((string-match "\\`\\\\[SCBW]" result) | ||
| 588 | (concat (downcase (substring result 0 2)) | ||
| 589 | (substring result 2))) | ||
| 430 | (t | 590 | (t |
| 431 | (concat "[^" result "]"))))) | 591 | (concat "[^" result "]"))))) |
| 432 | 592 | ||
| @@ -464,7 +624,7 @@ If SKIP is non-nil, allow that number of items after the head, i.e. | |||
| 464 | (unless (and (integerp (nth 1 form)) | 624 | (unless (and (integerp (nth 1 form)) |
| 465 | (> (nth 1 form) 0)) | 625 | (> (nth 1 form) 0)) |
| 466 | (error "rx `=' requires positive integer first arg")) | 626 | (error "rx `=' requires positive integer first arg")) |
| 467 | (format "%s\\{%d\\}" (rx-to-string (nth 2 form)) (nth 1 form))) | 627 | (format "%s\\{%d\\}" (rx-form (nth 2 form) '*) (nth 1 form))) |
| 468 | 628 | ||
| 469 | 629 | ||
| 470 | (defun rx->= (form) | 630 | (defun rx->= (form) |
| @@ -474,14 +634,14 @@ If SKIP is non-nil, allow that number of items after the head, i.e. | |||
| 474 | (unless (and (integerp (nth 1 form)) | 634 | (unless (and (integerp (nth 1 form)) |
| 475 | (> (nth 1 form) 0)) | 635 | (> (nth 1 form) 0)) |
| 476 | (error "rx `>=' requires positive integer first arg")) | 636 | (error "rx `>=' requires positive integer first arg")) |
| 477 | (format "%s\\{%d,\\}" (rx-to-string (nth 2 form)) (nth 1 form))) | 637 | (format "%s\\{%d,\\}" (rx-form (nth 2 form) '*) (nth 1 form))) |
| 478 | 638 | ||
| 479 | 639 | ||
| 480 | (defun rx-** (form) | 640 | (defun rx-** (form) |
| 481 | "Parse and produce code from FORM `(** N M ...)'." | 641 | "Parse and produce code from FORM `(** N M ...)'." |
| 482 | (rx-check form) | 642 | (rx-check form) |
| 483 | (setq form (cons 'repeat (cdr (rx-trans-forms form 2)))) | 643 | (setq form (cons 'repeat (cdr (rx-trans-forms form 2)))) |
| 484 | (rx-to-string form)) | 644 | (rx-form form '*)) |
| 485 | 645 | ||
| 486 | 646 | ||
| 487 | (defun rx-repeat (form) | 647 | (defun rx-repeat (form) |
| @@ -492,7 +652,7 @@ FORM is either `(repeat N FORM1)' or `(repeat N M FORM1)'." | |||
| 492 | (unless (and (integerp (nth 1 form)) | 652 | (unless (and (integerp (nth 1 form)) |
| 493 | (> (nth 1 form) 0)) | 653 | (> (nth 1 form) 0)) |
| 494 | (error "rx `repeat' requires positive integer first arg")) | 654 | (error "rx `repeat' requires positive integer first arg")) |
| 495 | (format "%s\\{%d\\}" (rx-to-string (nth 2 form)) (nth 1 form))) | 655 | (format "%s\\{%d\\}" (rx-form (nth 2 form) '*) (nth 1 form))) |
| 496 | ((or (not (integerp (nth 2 form))) | 656 | ((or (not (integerp (nth 2 form))) |
| 497 | (< (nth 2 form) 0) | 657 | (< (nth 2 form) 0) |
| 498 | (not (integerp (nth 1 form))) | 658 | (not (integerp (nth 1 form))) |
| @@ -500,16 +660,14 @@ FORM is either `(repeat N FORM1)' or `(repeat N M FORM1)'." | |||
| 500 | (< (nth 2 form) (nth 1 form))) | 660 | (< (nth 2 form) (nth 1 form))) |
| 501 | (error "rx `repeat' range error")) | 661 | (error "rx `repeat' range error")) |
| 502 | (t | 662 | (t |
| 503 | (format "%s\\{%d,%d\\}" (rx-to-string (nth 3 form)) | 663 | (format "%s\\{%d,%d\\}" (rx-form (nth 3 form) '*) |
| 504 | (nth 1 form) (nth 2 form))))) | 664 | (nth 1 form) (nth 2 form))))) |
| 505 | 665 | ||
| 506 | 666 | ||
| 507 | (defun rx-submatch (form) | 667 | (defun rx-submatch (form) |
| 508 | "Parse and produce code from FORM, which is `(submatch ...)'." | 668 | "Parse and produce code from FORM, which is `(submatch ...)'." |
| 509 | (concat "\\(" | 669 | (concat "\\(" (mapconcat #'rx-form (cdr form) nil) "\\)")) |
| 510 | (mapconcat (function (lambda (x) (rx-to-string x 'no-group))) | 670 | |
| 511 | (cdr form) nil) | ||
| 512 | "\\)")) | ||
| 513 | 671 | ||
| 514 | (defun rx-backref (form) | 672 | (defun rx-backref (form) |
| 515 | "Parse and produce code from FORM, which is `(backref N)'." | 673 | "Parse and produce code from FORM, which is `(backref N)'." |
| @@ -531,19 +689,19 @@ If OP is anything else, produce a greedy regexp if `rx-greedy-flag' | |||
| 531 | is non-nil." | 689 | is non-nil." |
| 532 | (rx-check form) | 690 | (rx-check form) |
| 533 | (setq form (rx-trans-forms form)) | 691 | (setq form (rx-trans-forms form)) |
| 534 | (let ((suffix (cond ((memq (car form) '(* + ? )) "") | 692 | (let ((suffix (cond ((memq (car form) '(* + ?\s)) "") |
| 535 | ((memq (car form) '(*? +? ??)) "?") | 693 | ((memq (car form) '(*? +? ??)) "?") |
| 536 | (rx-greedy-flag "") | 694 | (rx-greedy-flag "") |
| 537 | (t "?"))) | 695 | (t "?"))) |
| 538 | (op (cond ((memq (car form) '(* *? 0+ zero-or-more)) "*") | 696 | (op (cond ((memq (car form) '(* *? 0+ zero-or-more)) "*") |
| 539 | ((memq (car form) '(+ +? 1+ one-or-more)) "+") | 697 | ((memq (car form) '(+ +? 1+ one-or-more)) "+") |
| 540 | (t "?"))) | 698 | (t "?")))) |
| 541 | (result (rx-to-string (cadr form) 'no-group))) | 699 | (rx-group-if |
| 542 | (if (not (rx-atomic-p result)) | 700 | (concat (rx-form (cadr form) '*) op suffix) |
| 543 | (setq result (concat "\\(?:" result "\\)"))) | 701 | (and (memq rx-parent '(t *)) rx-parent)))) |
| 544 | (concat result op suffix))) | ||
| 545 | 702 | ||
| 546 | (defun rx-atomic-p (r) | 703 | |
| 704 | (defun rx-atomic-p (r &optional lax) | ||
| 547 | "Return non-nil if regexp string R is atomic. | 705 | "Return non-nil if regexp string R is atomic. |
| 548 | An atomic regexp R is one such that a suffix operator | 706 | An atomic regexp R is one such that a suffix operator |
| 549 | appended to R will apply to all of R. For example, \"a\" | 707 | appended to R will apply to all of R. For example, \"a\" |
| @@ -568,13 +726,14 @@ be detected without much effort. A guarantee of no false | |||
| 568 | negatives would require a theoretic specification of the set | 726 | negatives would require a theoretic specification of the set |
| 569 | of all atomic regexps." | 727 | of all atomic regexps." |
| 570 | (let ((l (length r))) | 728 | (let ((l (length r))) |
| 571 | (or (equal l 1) | 729 | (cond |
| 572 | (and (>= l 6) | 730 | ((<= l 1)) |
| 573 | (equal (substring r 0 2) "\\(") | 731 | ((= l 2) (= (aref r 0) ?\\)) |
| 574 | (equal (substring r -2) "\\)")) | 732 | ((= l 3) (string-match "\\`\\(?:\\\\[cCsS_]\\|\\[[^^]\\]\\)" r)) |
| 575 | (and (>= l 2) | 733 | ((null lax) |
| 576 | (equal (substring r 0 1) "[") | 734 | (cond |
| 577 | (equal (substring r -1) "]"))))) | 735 | ((string-match "\\`\\[^?\]?\\(?:\\[:[a-z]+:]\\|[^\]]\\)*\\]\\'" r)) |
| 736 | ((string-match "\\`\\\\(\\(?:[^\\]\\|\\\\[^\)]\\)*\\\\)\\'" r))))))) | ||
| 578 | 737 | ||
| 579 | 738 | ||
| 580 | (defun rx-syntax (form) | 739 | (defun rx-syntax (form) |
| @@ -612,7 +771,7 @@ of all atomic regexps." | |||
| 612 | (defun rx-eval (form) | 771 | (defun rx-eval (form) |
| 613 | "Parse and produce code from FORM, which is `(eval FORM)'." | 772 | "Parse and produce code from FORM, which is `(eval FORM)'." |
| 614 | (rx-check form) | 773 | (rx-check form) |
| 615 | (rx-to-string (eval (cadr form)))) | 774 | (rx-form (eval (cadr form)) rx-parent)) |
| 616 | 775 | ||
| 617 | 776 | ||
| 618 | (defun rx-greedy (form) | 777 | (defun rx-greedy (form) |
| @@ -622,13 +781,41 @@ If FORM is '(minimal-match FORM1)', non-greedy versions of `*', | |||
| 622 | '(maximal-match FORM1)', greedy operators will be used." | 781 | '(maximal-match FORM1)', greedy operators will be used." |
| 623 | (rx-check form) | 782 | (rx-check form) |
| 624 | (let ((rx-greedy-flag (eq (car form) 'maximal-match))) | 783 | (let ((rx-greedy-flag (eq (car form) 'maximal-match))) |
| 625 | (rx-to-string (cadr form)))) | 784 | (rx-form (cadr form) rx-parent))) |
| 626 | 785 | ||
| 627 | 786 | ||
| 628 | (defun rx-regexp (form) | 787 | (defun rx-regexp (form) |
| 629 | "Parse and produce code from FORM, which is `(regexp STRING)'." | 788 | "Parse and produce code from FORM, which is `(regexp STRING)'." |
| 630 | (rx-check form) | 789 | (rx-check form) |
| 631 | (concat "\\(?:" (cadr form) "\\)")) | 790 | (rx-group-if (cadr form) rx-parent)) |
| 791 | |||
| 792 | |||
| 793 | (defun rx-form (form &optional rx-parent) | ||
| 794 | "Parse and produce code for regular expression FORM. | ||
| 795 | FORM is a regular expression in sexp form. | ||
| 796 | RX-PARENT shows which type of expression calls and controls putting of | ||
| 797 | shy groups around the result and some more in other functions." | ||
| 798 | (if (stringp form) | ||
| 799 | (rx-group-if (regexp-quote form) | ||
| 800 | (if (and (eq rx-parent '*) (< 1 (length form))) | ||
| 801 | rx-parent)) | ||
| 802 | (cond ((integerp form) | ||
| 803 | (regexp-quote (char-to-string form))) | ||
| 804 | ((symbolp form) | ||
| 805 | (let ((info (rx-info form))) | ||
| 806 | (cond ((stringp info) | ||
| 807 | info) | ||
| 808 | ((null info) | ||
| 809 | (error "Unknown rx form `%s'" form)) | ||
| 810 | (t | ||
| 811 | (funcall (nth 0 info) form))))) | ||
| 812 | ((consp form) | ||
| 813 | (let ((info (rx-info (car form)))) | ||
| 814 | (unless (consp info) | ||
| 815 | (error "Unknown rx form `%s'" (car form))) | ||
| 816 | (funcall (nth 0 info) form))) | ||
| 817 | (t | ||
| 818 | (error "rx syntax error at `%s'" form))))) | ||
| 632 | 819 | ||
| 633 | 820 | ||
| 634 | ;;;###autoload | 821 | ;;;###autoload |
| @@ -636,28 +823,7 @@ If FORM is '(minimal-match FORM1)', non-greedy versions of `*', | |||
| 636 | "Parse and produce code for regular expression FORM. | 823 | "Parse and produce code for regular expression FORM. |
| 637 | FORM is a regular expression in sexp form. | 824 | FORM is a regular expression in sexp form. |
| 638 | NO-GROUP non-nil means don't put shy groups around the result." | 825 | NO-GROUP non-nil means don't put shy groups around the result." |
| 639 | (cond ((stringp form) | 826 | (rx-group-if (rx-form form) (null no-group))) |
| 640 | (regexp-quote form)) | ||
| 641 | ((integerp form) | ||
| 642 | (regexp-quote (char-to-string form))) | ||
| 643 | ((symbolp form) | ||
| 644 | (let ((info (rx-info form))) | ||
| 645 | (cond ((stringp info) | ||
| 646 | info) | ||
| 647 | ((null info) | ||
| 648 | (error "Unknown rx form `%s'" form)) | ||
| 649 | (t | ||
| 650 | (funcall (nth 0 info) form))))) | ||
| 651 | ((consp form) | ||
| 652 | (let ((info (rx-info (car form)))) | ||
| 653 | (unless (consp info) | ||
| 654 | (error "Unknown rx form `%s'" (car form))) | ||
| 655 | (let ((result (funcall (nth 0 info) form))) | ||
| 656 | (if (or no-group (string-match "\\`\\\\[(]" result)) | ||
| 657 | result | ||
| 658 | (concat "\\(?:" result "\\)"))))) | ||
| 659 | (t | ||
| 660 | (error "rx syntax error at `%s'" form)))) | ||
| 661 | 827 | ||
| 662 | 828 | ||
| 663 | ;;;###autoload | 829 | ;;;###autoload |
| @@ -878,6 +1044,9 @@ CHAR | |||
| 878 | like `and', but makes the match accessible with `match-end', | 1044 | like `and', but makes the match accessible with `match-end', |
| 879 | `match-beginning', and `match-string'. | 1045 | `match-beginning', and `match-string'. |
| 880 | 1046 | ||
| 1047 | `(group SEXP1 SEXP2 ...)' | ||
| 1048 | another name for `submatch'. | ||
| 1049 | |||
| 881 | `(or SEXP1 SEXP2 ...)' | 1050 | `(or SEXP1 SEXP2 ...)' |
| 882 | `(| SEXP1 SEXP2 ...)' | 1051 | `(| SEXP1 SEXP2 ...)' |
| 883 | matches anything that matches SEXP1 or SEXP2, etc. If all | 1052 | matches anything that matches SEXP1 or SEXP2, etc. If all |