aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard M. Stallman2002-12-23 17:43:24 +0000
committerRichard M. Stallman2002-12-23 17:43:24 +0000
commitc53f9b3b9ce4d4a4c54bdb82cbea1d0f46ec1ba6 (patch)
tree05cfd5065939361d58e05fbb97a50782c7f95731
parent61f1d295a28526a92ca52740197e5b9e31393899 (diff)
downloademacs-c53f9b3b9ce4d4a4c54bdb82cbea1d0f46ec1ba6.tar.gz
emacs-c53f9b3b9ce4d4a4c54bdb82cbea1d0f46ec1ba6.zip
(rx-and): Generate a shy group.
Specify `no-group' when calling rx-to-string. (rx-submatch): Specify `no-group' when calling rx-to-string. (rx-kleene): Use rx-atomic-p to decide whether to make a group. (rx-atomic-p): New function.
-rw-r--r--lisp/emacs-lisp/rx.el61
1 files changed, 51 insertions, 10 deletions
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el
index 938564b5dde..a58a38bf213 100644
--- a/lisp/emacs-lisp/rx.el
+++ b/lisp/emacs-lisp/rx.el
@@ -61,9 +61,9 @@
61;; "^content-transfer-encoding:\\(\n?[\t ]\\)*quoted-printable\\(\n?[\t ]\\)*" 61;; "^content-transfer-encoding:\\(\n?[\t ]\\)*quoted-printable\\(\n?[\t ]\\)*"
62;; (rx (and line-start 62;; (rx (and line-start
63;; "content-transfer-encoding:" 63;; "content-transfer-encoding:"
64;; (+ (? ?\n) blank) 64;; (+ (? ?\n)) blank
65;; "quoted-printable" 65;; "quoted-printable"
66;; (+ (? ?\n) blank)) 66;; (+ (? ?\n)) blank))
67;; 67;;
68;; (concat "^\\(?:" something-else "\\)") 68;; (concat "^\\(?:" something-else "\\)")
69;; (rx (and line-start (eval something-else))), statically or 69;; (rx (and line-start (eval something-else))), statically or
@@ -78,11 +78,11 @@
78;; (and line-start ?\n))) 78;; (and line-start ?\n)))
79;; 79;;
80;; "\\$[I]d: [^ ]+ \\([^ ]+\\) " 80;; "\\$[I]d: [^ ]+ \\([^ ]+\\) "
81;; (rx (and "$Id": " 81;; (rx (and "$Id: "
82;; (1+ (not (in " "))) 82;; (1+ (not (in " ")))
83;; " " 83;; " "
84;; (submatch (1+ (not (in " ")))) 84;; (submatch (1+ (not (in " "))))
85;; " "))) 85;; " "))
86;; 86;;
87;; "\\\\\\\\\\[\\w+" 87;; "\\\\\\\\\\[\\w+"
88;; (rx (and ?\\ ?\\ ?\[ (1+ word))) 88;; (rx (and ?\\ ?\\ ?\[ (1+ word)))
@@ -272,7 +272,11 @@ See also `rx-constituents'."
272 "Parse and produce code from FORM. 272 "Parse and produce code from FORM.
273FORM is of the form `(and FORM1 ...)'." 273FORM is of the form `(and FORM1 ...)'."
274 (rx-check form) 274 (rx-check form)
275 (mapconcat #'rx-to-string (cdr form) nil)) 275 (concat "\\(?:"
276 (mapconcat
277 (function (lambda (x) (rx-to-string x 'no-group)))
278 (cdr form) nil)
279 "\\)"))
276 280
277 281
278(defun rx-or (form) 282(defun rx-or (form)
@@ -384,8 +388,10 @@ FORM is either `(repeat N FORM1)' or `(repeat N M FORM1)'."
384 388
385(defun rx-submatch (form) 389(defun rx-submatch (form)
386 "Parse and produce code from FORM, which is `(submatch ...)'." 390 "Parse and produce code from FORM, which is `(submatch ...)'."
387 (concat "\\(" (mapconcat #'rx-to-string (cdr form) nil) "\\)")) 391 (concat "\\("
388 392 (mapconcat (function (lambda (x) (rx-to-string x 'no-group)))
393 (cdr form) nil)
394 "\\)"))
389 395
390(defun rx-kleene (form) 396(defun rx-kleene (form)
391 "Parse and produce code from FORM. 397 "Parse and produce code from FORM.
@@ -402,9 +408,44 @@ is non-nil."
402 (t "?"))) 408 (t "?")))
403 (op (cond ((memq (car form) '(* *? 0+ zero-or-more)) "*") 409 (op (cond ((memq (car form) '(* *? 0+ zero-or-more)) "*")
404 ((memq (car form) '(+ +? 1+ one-or-more)) "+") 410 ((memq (car form) '(+ +? 1+ one-or-more)) "+")
405 (t "?")))) 411 (t "?")))
406 (format "\\(?:%s\\)%s%s" (rx-to-string (cadr form) 'no-group) 412 (result (rx-to-string (cadr form) 'no-group)))
407 op suffix))) 413 (if (not (rx-atomic-p result))
414 (setq result (concat "\\(?:" result "\\)")))
415 (concat result op suffix)))
416
417(defun rx-atomic-p (r)
418 "Return non-nil if regexp string R is atomic.
419An atomic regexp R is one such that a suffix operator
420appended to R will apply to all of R. For example, \"a\"
421\"[abc]\" and \"\\(ab\\|ab*c\\)\" are atomic and \"ab\",
422\"[ab]c\", and \"ab\\|ab*c\" are not atomic.
423
424This function may return false negatives, but it will not
425return false positives. It is nevertheless useful in
426situations where an efficiency shortcut can be taken iff a
427regexp is atomic. The function can be improved to detect
428more cases of atomic regexps. Presently, this function
429detects the following categories of atomic regexp;
430
431 a group or shy group: \\(...\\)
432 a character class: [...]
433 a single character: a
434
435On the other hand, false negatives will be returned for
436regexps that are atomic but end in operators, such as
437\"a+\". I think these are rare. Probably such cases could
438be detected without much effort. A guarantee of no false
439negatives would require a theoretic specification of the set
440of all atomic regexps."
441 (let ((l (length r)))
442 (or (equal l 1)
443 (and (>= l 6)
444 (equal (substring r 0 2) "\\(")
445 (equal (substring r -2) "\\)"))
446 (and (>= l 2)
447 (equal (substring r 0 1) "[")
448 (equal (substring r -1) "]")))))
408 449
409 450
410(defun rx-syntax (form) 451(defun rx-syntax (form)