diff options
| author | Richard M. Stallman | 2002-12-23 17:43:24 +0000 |
|---|---|---|
| committer | Richard M. Stallman | 2002-12-23 17:43:24 +0000 |
| commit | c53f9b3b9ce4d4a4c54bdb82cbea1d0f46ec1ba6 (patch) | |
| tree | 05cfd5065939361d58e05fbb97a50782c7f95731 | |
| parent | 61f1d295a28526a92ca52740197e5b9e31393899 (diff) | |
| download | emacs-c53f9b3b9ce4d4a4c54bdb82cbea1d0f46ec1ba6.tar.gz emacs-c53f9b3b9ce4d4a4c54bdb82cbea1d0f46ec1ba6.zip | |
(rx-and): Generate a shy group.
Specify `no-group' when calling rx-to-string.
(rx-submatch): Specify `no-group' when calling rx-to-string.
(rx-kleene): Use rx-atomic-p to decide whether to make a group.
(rx-atomic-p): New function.
| -rw-r--r-- | lisp/emacs-lisp/rx.el | 61 |
1 files changed, 51 insertions, 10 deletions
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el index 938564b5dde..a58a38bf213 100644 --- a/lisp/emacs-lisp/rx.el +++ b/lisp/emacs-lisp/rx.el | |||
| @@ -61,9 +61,9 @@ | |||
| 61 | ;; "^content-transfer-encoding:\\(\n?[\t ]\\)*quoted-printable\\(\n?[\t ]\\)*" | 61 | ;; "^content-transfer-encoding:\\(\n?[\t ]\\)*quoted-printable\\(\n?[\t ]\\)*" |
| 62 | ;; (rx (and line-start | 62 | ;; (rx (and line-start |
| 63 | ;; "content-transfer-encoding:" | 63 | ;; "content-transfer-encoding:" |
| 64 | ;; (+ (? ?\n) blank) | 64 | ;; (+ (? ?\n)) blank |
| 65 | ;; "quoted-printable" | 65 | ;; "quoted-printable" |
| 66 | ;; (+ (? ?\n) blank)) | 66 | ;; (+ (? ?\n)) blank)) |
| 67 | ;; | 67 | ;; |
| 68 | ;; (concat "^\\(?:" something-else "\\)") | 68 | ;; (concat "^\\(?:" something-else "\\)") |
| 69 | ;; (rx (and line-start (eval something-else))), statically or | 69 | ;; (rx (and line-start (eval something-else))), statically or |
| @@ -78,11 +78,11 @@ | |||
| 78 | ;; (and line-start ?\n))) | 78 | ;; (and line-start ?\n))) |
| 79 | ;; | 79 | ;; |
| 80 | ;; "\\$[I]d: [^ ]+ \\([^ ]+\\) " | 80 | ;; "\\$[I]d: [^ ]+ \\([^ ]+\\) " |
| 81 | ;; (rx (and "$Id": " | 81 | ;; (rx (and "$Id: " |
| 82 | ;; (1+ (not (in " "))) | 82 | ;; (1+ (not (in " "))) |
| 83 | ;; " " | 83 | ;; " " |
| 84 | ;; (submatch (1+ (not (in " ")))) | 84 | ;; (submatch (1+ (not (in " ")))) |
| 85 | ;; " "))) | 85 | ;; " ")) |
| 86 | ;; | 86 | ;; |
| 87 | ;; "\\\\\\\\\\[\\w+" | 87 | ;; "\\\\\\\\\\[\\w+" |
| 88 | ;; (rx (and ?\\ ?\\ ?\[ (1+ word))) | 88 | ;; (rx (and ?\\ ?\\ ?\[ (1+ word))) |
| @@ -272,7 +272,11 @@ See also `rx-constituents'." | |||
| 272 | "Parse and produce code from FORM. | 272 | "Parse and produce code from FORM. |
| 273 | FORM is of the form `(and FORM1 ...)'." | 273 | FORM is of the form `(and FORM1 ...)'." |
| 274 | (rx-check form) | 274 | (rx-check form) |
| 275 | (mapconcat #'rx-to-string (cdr form) nil)) | 275 | (concat "\\(?:" |
| 276 | (mapconcat | ||
| 277 | (function (lambda (x) (rx-to-string x 'no-group))) | ||
| 278 | (cdr form) nil) | ||
| 279 | "\\)")) | ||
| 276 | 280 | ||
| 277 | 281 | ||
| 278 | (defun rx-or (form) | 282 | (defun rx-or (form) |
| @@ -384,8 +388,10 @@ FORM is either `(repeat N FORM1)' or `(repeat N M FORM1)'." | |||
| 384 | 388 | ||
| 385 | (defun rx-submatch (form) | 389 | (defun rx-submatch (form) |
| 386 | "Parse and produce code from FORM, which is `(submatch ...)'." | 390 | "Parse and produce code from FORM, which is `(submatch ...)'." |
| 387 | (concat "\\(" (mapconcat #'rx-to-string (cdr form) nil) "\\)")) | 391 | (concat "\\(" |
| 388 | 392 | (mapconcat (function (lambda (x) (rx-to-string x 'no-group))) | |
| 393 | (cdr form) nil) | ||
| 394 | "\\)")) | ||
| 389 | 395 | ||
| 390 | (defun rx-kleene (form) | 396 | (defun rx-kleene (form) |
| 391 | "Parse and produce code from FORM. | 397 | "Parse and produce code from FORM. |
| @@ -402,9 +408,44 @@ is non-nil." | |||
| 402 | (t "?"))) | 408 | (t "?"))) |
| 403 | (op (cond ((memq (car form) '(* *? 0+ zero-or-more)) "*") | 409 | (op (cond ((memq (car form) '(* *? 0+ zero-or-more)) "*") |
| 404 | ((memq (car form) '(+ +? 1+ one-or-more)) "+") | 410 | ((memq (car form) '(+ +? 1+ one-or-more)) "+") |
| 405 | (t "?")))) | 411 | (t "?"))) |
| 406 | (format "\\(?:%s\\)%s%s" (rx-to-string (cadr form) 'no-group) | 412 | (result (rx-to-string (cadr form) 'no-group))) |
| 407 | op suffix))) | 413 | (if (not (rx-atomic-p result)) |
| 414 | (setq result (concat "\\(?:" result "\\)"))) | ||
| 415 | (concat result op suffix))) | ||
| 416 | |||
| 417 | (defun rx-atomic-p (r) | ||
| 418 | "Return non-nil if regexp string R is atomic. | ||
| 419 | An atomic regexp R is one such that a suffix operator | ||
| 420 | appended to R will apply to all of R. For example, \"a\" | ||
| 421 | \"[abc]\" and \"\\(ab\\|ab*c\\)\" are atomic and \"ab\", | ||
| 422 | \"[ab]c\", and \"ab\\|ab*c\" are not atomic. | ||
| 423 | |||
| 424 | This function may return false negatives, but it will not | ||
| 425 | return false positives. It is nevertheless useful in | ||
| 426 | situations where an efficiency shortcut can be taken iff a | ||
| 427 | regexp is atomic. The function can be improved to detect | ||
| 428 | more cases of atomic regexps. Presently, this function | ||
| 429 | detects the following categories of atomic regexp; | ||
| 430 | |||
| 431 | a group or shy group: \\(...\\) | ||
| 432 | a character class: [...] | ||
| 433 | a single character: a | ||
| 434 | |||
| 435 | On the other hand, false negatives will be returned for | ||
| 436 | regexps that are atomic but end in operators, such as | ||
| 437 | \"a+\". I think these are rare. Probably such cases could | ||
| 438 | be detected without much effort. A guarantee of no false | ||
| 439 | negatives would require a theoretic specification of the set | ||
| 440 | of all atomic regexps." | ||
| 441 | (let ((l (length r))) | ||
| 442 | (or (equal l 1) | ||
| 443 | (and (>= l 6) | ||
| 444 | (equal (substring r 0 2) "\\(") | ||
| 445 | (equal (substring r -2) "\\)")) | ||
| 446 | (and (>= l 2) | ||
| 447 | (equal (substring r 0 1) "[") | ||
| 448 | (equal (substring r -1) "]"))))) | ||
| 408 | 449 | ||
| 409 | 450 | ||
| 410 | (defun rx-syntax (form) | 451 | (defun rx-syntax (form) |