aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStefan Monnier2000-05-22 04:29:52 +0000
committerStefan Monnier2000-05-22 04:29:52 +0000
commit40aeecadb8fc54c941eb36c6584654627c300c39 (patch)
tree6d7351b72361f557263738ede14be73163765d20
parentc9d80d38167190dcf2c1d049b723e7a70e0d7538 (diff)
downloademacs-40aeecadb8fc54c941eb36c6584654627c300c39.tar.gz
emacs-40aeecadb8fc54c941eb36c6584654627c300c39.zip
Rewritten to take advantage of shy-groups and
intervals which makes it heaps simpler.
-rw-r--r--lisp/ChangeLog3
-rw-r--r--lisp/emacs-lisp/sregex.el628
2 files changed, 146 insertions, 485 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog
index 737524baa43..04aa3be2b30 100644
--- a/lisp/ChangeLog
+++ b/lisp/ChangeLog
@@ -1,5 +1,8 @@
12000-05-22 Stefan Monnier <monnier@cs.yale.edu> 12000-05-22 Stefan Monnier <monnier@cs.yale.edu>
2 2
3 * emacs-lisp/sregex.el: Rewritten to take advantage of shy-groups and
4 intervals which makes it heaps simpler.
5
3 * newcomment.el (comment-region-internal): Go back to BEG after quoting 6 * newcomment.el (comment-region-internal): Go back to BEG after quoting
4 the nested comment markers. 7 the nested comment markers.
5 8
diff --git a/lisp/emacs-lisp/sregex.el b/lisp/emacs-lisp/sregex.el
index 09fc2313675..2c808eba5be 100644
--- a/lisp/emacs-lisp/sregex.el
+++ b/lisp/emacs-lisp/sregex.el
@@ -1,6 +1,6 @@
1;;; sregex.el --- symbolic regular expressions 1;;; sregex.el --- symbolic regular expressions
2 2
3;; Copyright (C) 1997, 1998 Free Software Foundation, Inc. 3;; Copyright (C) 1997, 1998, 2000 Free Software Foundation, Inc.
4 4
5;; Author: Bob Glickstein <bobg+sregex@zanshin.com> 5;; Author: Bob Glickstein <bobg+sregex@zanshin.com>
6;; Maintainer: Bob Glickstein <bobg+sregex@zanshin.com> 6;; Maintainer: Bob Glickstein <bobg+sregex@zanshin.com>
@@ -48,7 +48,7 @@
48;; to overcome operator precedence; that also happens automatically. 48;; to overcome operator precedence; that also happens automatically.
49;; For example: 49;; For example:
50 50
51;; (sregexq (opt (or "Bob" "Robert"))) => "\\(Bob\\|Robert\\)?" 51;; (sregexq (opt (or "Bob" "Robert"))) => "\\(?:Bob\\|Robert\\)?"
52 52
53;; It *is* possible to group parts of the expression in order to refer 53;; It *is* possible to group parts of the expression in order to refer
54;; to them with numbered backreferences: 54;; to them with numbered backreferences:
@@ -57,14 +57,6 @@
57;; ", Spot, " 57;; ", Spot, "
58;; (backref 1)) => "\\(Go\\|Run\\), Spot, \\1" 58;; (backref 1)) => "\\(Go\\|Run\\), Spot, \\1"
59 59
60;; If `sregexq' needs to introduce its own grouping parentheses, it
61;; will automatically renumber your backreferences:
62
63;; (sregexq (opt "resent-")
64;; (group (or "to" "cc" "bcc"))
65;; ": "
66;; (backref 1)) => "\\(resent-\\)?\\(to\\|cc\\|bcc\\): \\2"
67
68;; `sregexq' is a macro. Each time it is used, it constructs a simple 60;; `sregexq' is a macro. Each time it is used, it constructs a simple
69;; Lisp expression that then invokes a moderately complex engine to 61;; Lisp expression that then invokes a moderately complex engine to
70;; interpret the sregex and render the string form. Because of this, 62;; interpret the sregex and render the string form. Because of this,
@@ -99,47 +91,6 @@
99;; (digits '(1+ (char (?0 . ?9))))) 91;; (digits '(1+ (char (?0 . ?9)))))
100;; (sregex 'bol dotstar ":" whitespace digits)) => "^.*:\\s-+[0-9]+" 92;; (sregex 'bol dotstar ":" whitespace digits)) => "^.*:\\s-+[0-9]+"
101 93
102;; This package also provides sregex-specific versions of the Emacs
103;; functions `replace-match', `match-string',
104;; `match-string-no-properties', `match-beginning', `match-end', and
105;; `match-data'. In each case, the sregex version's name begins with
106;; `sregex-' and takes one additional optional parameter, an sregex
107;; "info" object. Each of these functions is concerned with numbered
108;; submatches. Since sregex may renumber submatches, alternate
109;; versions of these functions are needed that know how to adjust the
110;; supplied number.
111
112;; The sregex info object for the most recently evaluated sregex can
113;; be obtained with `sregex-info'; so if you precompute your sregexes
114;; and you plan to use `replace-match' or one of the others with it,
115;; you need to record the info object for later use:
116
117;; (let* ((regex (sregexq (opt "resent-")
118;; (group (or "to" "cc" "bcc"))
119;; ":"))
120;; (regex-info (sregex-info)))
121;; ...
122;; (if (re-search-forward regex ...)
123;; (let ((which (sregex-match-string 1 nil regex-info)))
124;; ...)))
125
126;; In this example, `regex' is "\\(resent-\\)?\\(to\\|cc\\|bcc\\):",
127;; so the call to (sregex-match-string 1 ...) is automatically turned
128;; into a call to (match-string 2 ...).
129
130;; If the sregex info argument to `sregex-replace-match',
131;; `sregex-match-string', `sregex-match-string-no-properties',
132;; `sregex-match-beginning', `sregex-match-end', or
133;; `sregex-match-data' is omitted, the current value of (sregex-info)
134;; is used.
135
136;; You can do your own sregex submatch renumbering with
137;; `sregex-backref-num'.
138
139;; Finally, `sregex-save-match-data' is like `save-match-data' but
140;; also saves and restores the information maintained by
141;; `sregex-info'.
142
143;; To use this package in a Lisp program, simply (require 'sregex). 94;; To use this package in a Lisp program, simply (require 'sregex).
144 95
145;; Here are the clauses allowed in an `sregex' or `sregexq' 96;; Here are the clauses allowed in an `sregex' or `sregexq'
@@ -165,23 +116,21 @@
165 116
166;; - (sequence CLAUSE ...) 117;; - (sequence CLAUSE ...)
167 118
168;; Groups the given CLAUSEs; may or may not use "\\(" and "\\)". 119;; Groups the given CLAUSEs; may or may not use "\\(?:" and "\\)".
169;; Clauses groups by `sequence' do not count for purposes of 120;; Clauses grouped by `sequence' do not count for purposes of
170;; numbering backreferences. Use `sequence' in situations like 121;; numbering backreferences. Use `sequence' in situations like
171;; this: 122;; this:
172 123
173;; (sregexq (or "dog" "cat" 124;; (sregexq (or "dog" "cat"
174;; (sequence (opt "sea ") "monkey"))) 125;; (sequence (opt "sea ") "monkey")))
175;; => "dog\\|cat\\|\\(sea \\)?monkey" 126;; => "dog\\|cat\\|\\(?:sea \\)?monkey"
176 127
177;; where a single `or' alternate needs to contain multiple 128;; where a single `or' alternate needs to contain multiple
178;; subclauses. 129;; subclauses.
179 130
180;; - (backref N) 131;; - (backref N)
181;; Matches the same string previously matched by the Nth "group" in 132;; Matches the same string previously matched by the Nth "group" in
182;; the same sregex. N is a positive integer. In the resulting 133;; the same sregex. N is a positive integer.
183;; regex, N may be adjusted to account for automatically introduced
184;; groups.
185 134
186;; - (or CLAUSE ...) 135;; - (or CLAUSE ...)
187;; Matches any one of the CLAUSEs by separating them with "\\|". 136;; Matches any one of the CLAUSEs by separating them with "\\|".
@@ -276,158 +225,37 @@
276 225
277;;; To do: 226;;; To do:
278 227
279;; Make (sregexq (or "a" (sequence "b" "c"))) return "a\\|bc" instead
280;; of "a\\|\\(bc\\)"
281
282;; An earlier version of this package could optionally translate the 228;; An earlier version of this package could optionally translate the
283;; symbolic regex into other languages' syntaxes, e.g. Perl. For 229;; symbolic regex into other languages' syntaxes, e.g. Perl. For
284;; instance, with Perl syntax selected, (sregexq (or "ab" "cd")) would 230;; instance, with Perl syntax selected, (sregexq (or "ab" "cd")) would
285;; yield "ab|cd" instead of "ab\\|cd". It might be useful to restore 231;; yield "ab|cd" instead of "ab\\|cd". It might be useful to restore
286;; such a facility. 232;; such a facility.
287 233
288;;; Bugs: 234;; - handle multibyte chars in sregex--char-aux
235;; - add support for character classes ([:blank:], ...)
236;; - add support for non-greedy operators *? and +?
237;; - bug: (sregexq (opt (opt ?a))) returns "a??" which is a non-greedy "a?"
289 238
290;; The (regex REGEX) form can confuse the code that distinguishes 239;;; Bugs:
291;; introduced groups from user-specified groups. Try to avoid using
292;; grouping within a `regex' form. Failing that, try to avoid using
293;; backrefs if you're using `regex'.
294 240
295;;; Code: 241;;; Code:
296 242
297(defsubst sregex--value-unitp (val) (nth 0 val)) 243(eval-when-compile (require 'cl))
298(defsubst sregex--value-groups (val) (nth 1 val))
299(defsubst sregex--value-tree (val) (nth 2 val))
300
301(defun sregex--make-value (unitp groups tree)
302 (list unitp groups tree))
303
304(defvar sregex--current-sregex nil
305 "Global state for `sregex-info'.")
306
307(defun sregex-info ()
308 "Return extra information about the latest call to `sregex'.
309This extra information is needed in order to adjust user-requested
310backreference numbers to numbers suitable for the generated regexp.
311See e.g. `sregex-match-string' and `sregex-backref-num'."
312 sregex--current-sregex)
313
314; (require 'advice)
315; (defadvice save-match-data (around sregex-save-match-data protect)
316; (let ((sregex--saved-sregex sregex--current-sregex))
317; (unwind-protect
318; ad-do-it
319; (setq sregex--current-sregex sregex--saved-sregex))))
320(defmacro sregex-save-match-data (&rest forms)
321 "Like `save-match-data', but also saves and restores `sregex-info' data."
322 `(let ((sregex--saved-sregex sregex--current-sregex))
323 (unwind-protect
324 (save-match-data ,@forms)
325 (setq sregex--current-sregex sregex--saved-sregex))))
326
327(defun sregex-replace-match (replacement
328 &optional fixedcase literal string subexp sregex)
329 "Like `replace-match', for a regexp made with `sregex'.
330This takes one additional optional argument, the `sregex' info, which
331can be obtained with `sregex-info'. The SUBEXP argument is adjusted
332to allow for \"introduced groups\". If the extra argument is omitted
333or nil, it defaults to the current value of (sregex-info)."
334 (replace-match replacement fixedcase literal string
335 (and subexp
336 (sregex-backref-num subexp sregex))))
337
338(defun sregex-match-string (count &optional in-string sregex)
339 "Like `match-string', for a regexp made with `sregex'.
340This takes one additional optional argument, the `sregex' info, which
341can be obtained with `sregex-info'. The COUNT argument is adjusted to
342allow for \"introduced groups\". If the extra argument is omitted or
343nil, it defaults to the current value of (sregex-info)."
344 (match-string (and count
345 (sregex-backref-num count sregex))
346 in-string))
347 244
245;; Compatibility code for when we didn't have shy-groups
246(defvar sregex--current-sregex nil)
247(defun sregex-info () nil)
248(defmacro sregex-save-match-data (&rest forms) (cons 'save-match-data forms))
249(defun sregex-replace-match (r &optional f l str subexp x)
250 (replace-match r f l str subexp))
251(defun sregex-match-string (c &optional i x) (match-string c i))
348(defun sregex-match-string-no-properties (count &optional in-string sregex) 252(defun sregex-match-string-no-properties (count &optional in-string sregex)
349 "Like `match-string-no-properties', for a regexp made with `sregex'. 253 (match-string-no-properties count in-string))
350This takes one additional optional argument, the `sregex' info, which 254(defun sregex-match-beginning (count &optional sregex) (match-beginning count))
351can be obtained with `sregex-info'. The COUNT argument is adjusted to 255(defun sregex-match-end (count &optional sregex) (match-end count))
352allow for \"introduced groups\". If the extra argument is omitted or 256(defun sregex-match-data (&optional sregex) (match-data))
353nil, it defaults to the current value of (sregex-info)." 257(defun sregex-backref-num (n &optional sregex) n)
354 (match-string-no-properties 258
355 (and count
356 (sregex-backref-num count sregex))
357 in-string))
358
359(defun sregex-match-beginning (count &optional sregex)
360 "Like `match-beginning', for a regexp made with `sregex'.
361This takes one additional optional argument, the `sregex' info, which
362can be obtained with `sregex-info'. The COUNT argument is adjusted to
363allow for \"introduced groups\". If the extra argument is omitted or
364nil, it defaults to the current value of (sregex-info)."
365 (match-beginning (sregex-backref-num count sregex)))
366
367(defun sregex-match-end (count &optional sregex)
368 "Like `match-end', for a regexp made with `sregex'.
369This takes one additional optional argument, the `sregex' info, which
370can be obtained with `sregex-info'. The COUNT argument is adjusted to
371allow for \"introduced groups\". If the extra argument is omitted or
372nil, it defaults to the current value of (sregex-info)."
373 (match-end (sregex-backref-num count sregex)))
374
375(defun sregex-match-data (&optional sregex)
376 "Like `match-data', for a regexp made with `sregex'.
377This takes one additional optional argument, the `sregex' info, which
378can be obtained with `sregex-info'. \"Introduced groups\" are removed
379from the result. If the extra argument is omitted or nil, it defaults
380to the current value of (sregex-info)."
381 (let* ((data (match-data))
382 (groups (sregex--value-groups (or sregex
383 sregex--current-sregex)))
384 (result (list (car (cdr data))
385 (car data))))
386 (setq data (cdr (cdr data)))
387 (while data
388 (if (car groups)
389 (setq result (append (list (car (cdr data))
390 (car data))
391 result)))
392 (setq groups (cdr groups)
393 data (cdr (cdr data))))
394 (reverse result)))
395
396(defun sregex--render-tree (tree sregex)
397 (let ((key (car tree)))
398 (cond ((eq key 'str)
399 (cdr tree))
400 ((eq key 'or)
401 (mapconcat '(lambda (x)
402 (sregex--render-tree x sregex))
403 (cdr tree)
404 "\\|"))
405 ((eq key 'sequence)
406 (apply 'concat
407 (mapcar '(lambda (x)
408 (sregex--render-tree x sregex))
409 (cdr tree))))
410 ((eq key 'group)
411 (concat "\\("
412 (sregex--render-tree (cdr tree) sregex)
413 "\\)"))
414 ((eq key 'opt)
415 (concat (sregex--render-tree (cdr tree) sregex)
416 "?"))
417 ((eq key '0+)
418 (concat (sregex--render-tree (cdr tree) sregex)
419 "*"))
420 ((eq key '1+)
421 (concat (sregex--render-tree (cdr tree) sregex)
422 "+"))
423 ((eq key 'backref)
424 (let ((num (sregex-backref-num (cdr tree) sregex)))
425 (if (> num 9)
426 (error "sregex: backref number %d too high after adjustment"
427 num)
428 (concat "\\" (int-to-string num)))))
429 (t (error "sregex internal error: unknown tree type %S"
430 key)))))
431 259
432(defun sregex (&rest exps) 260(defun sregex (&rest exps)
433 "Symbolic regular expression interpreter. 261 "Symbolic regular expression interpreter.
@@ -443,10 +271,7 @@ subexpressions:
443 (whitespace '(1+ (syntax ?-))) 271 (whitespace '(1+ (syntax ?-)))
444 (digits '(1+ (char (?0 . ?9))))) 272 (digits '(1+ (char (?0 . ?9)))))
445 (sregex 'bol dotstar \":\" whitespace digits)) => \"^.*:\\\\s-+[0-9]+\"" 273 (sregex 'bol dotstar \":\" whitespace digits)) => \"^.*:\\\\s-+[0-9]+\""
446 (progn 274 (sregex--sequence exps nil))
447 (setq sregex--current-sregex (sregex--sequence exps nil))
448 (sregex--render-tree (sregex--value-tree sregex--current-sregex)
449 sregex--current-sregex)))
450 275
451(defmacro sregexq (&rest exps) 276(defmacro sregexq (&rest exps)
452 "Symbolic regular expression interpreter. 277 "Symbolic regular expression interpreter.
@@ -546,22 +371,20 @@ Here are the clauses allowed in an `sregex' or `sregexq' expression:
546- (sequence CLAUSE ...) 371- (sequence CLAUSE ...)
547 372
548 Groups the given CLAUSEs; may or may not use \"\\\\(\" and \"\\\\)\". 373 Groups the given CLAUSEs; may or may not use \"\\\\(\" and \"\\\\)\".
549 Clauses groups by `sequence' do not count for purposes of 374 Clauses grouped by `sequence' do not count for purposes of
550 numbering backreferences. Use `sequence' in situations like 375 numbering backreferences. Use `sequence' in situations like
551 this: 376 this:
552 377
553 (sregexq (or \"dog\" \"cat\" 378 (sregexq (or \"dog\" \"cat\"
554 (sequence (opt \"sea \") \"monkey\"))) 379 (sequence (opt \"sea \") \"monkey\")))
555 => \"dog\\\\|cat\\\\|\\\\(sea \\\\)?monkey\" 380 => \"dog\\\\|cat\\\\|\\\\(?:sea \\\\)?monkey\"
556 381
557 where a single `or' alternate needs to contain multiple 382 where a single `or' alternate needs to contain multiple
558 subclauses. 383 subclauses.
559 384
560- (backref N) 385- (backref N)
561 Matches the same string previously matched by the Nth \"group\" in 386 Matches the same string previously matched by the Nth \"group\" in
562 the same sregex. N is a positive integer. In the resulting 387 the same sregex. N is a positive integer.
563 regex, N may be adjusted to account for automatically introduced
564 groups.
565 388
566- (or CLAUSE ...) 389- (or CLAUSE ...)
567 Matches any one of the CLAUSEs by separating them with \"\\\\|\". 390 Matches any one of the CLAUSEs by separating them with \"\\\\|\".
@@ -639,10 +462,7 @@ Here are the clauses allowed in an `sregex' or `sregexq' expression:
639 This is a \"trapdoor\" for including ordinary regular expression 462 This is a \"trapdoor\" for including ordinary regular expression
640 strings in the result. Some regular expressions are clearer when 463 strings in the result. Some regular expressions are clearer when
641 written the old way: \"[a-z]\" vs. (sregexq (char (?a . ?z))), for 464 written the old way: \"[a-z]\" vs. (sregexq (char (?a . ?z))), for
642 instance. However, using this can confuse the code that 465 instance.
643 distinguishes introduced groups from user-specified groups. Avoid
644 using grouping within a `regex' form. Failing that, avoid using
645 backrefs if you're using `regex'.
646 466
647Each CHAR-CLAUSE that is passed to (char ...) and (not-char ...) 467Each CHAR-CLAUSE that is passed to (char ...) and (not-char ...)
648has one of the following forms: 468has one of the following forms:
@@ -659,290 +479,128 @@ has one of the following forms:
659 `(apply 'sregex ',exps)) 479 `(apply 'sregex ',exps))
660 480
661(defun sregex--engine (exp combine) 481(defun sregex--engine (exp combine)
662 (let* ((val (cond ((stringp exp) 482 (cond
663 (sregex--make-value (or (not (eq combine 'suffix)) 483 ((stringp exp)
664 (= (length exp) 1)) 484 (if (and combine
665 nil 485 (eq combine 'suffix)
666 (cons 'str 486 (/= (length exp) 1))
667 (regexp-quote exp)))) 487 (concat "\\(?:" (regexp-quote exp) "\\)")
668 ((symbolp exp) 488 (regexp-quote exp)))
669 (funcall (intern (concat "sregex--" 489 ((symbolp exp)
670 (symbol-name exp))) 490 (ecase exp
671 combine)) 491 (any ".")
672 ((consp exp) 492 (bol "^")
673 (funcall (intern (concat "sregex--" 493 (eol "$")
674 (symbol-name (car exp)))) 494 (wordchar "\\w")
675 (cdr exp) 495 (not-wordchar "\\W")
676 combine)) 496 (bot "\\`")
677 (t (error "Invalid expression: %s" exp)))) 497 (eot "\\'")
678 (unitp (sregex--value-unitp val)) 498 (point "\\=")
679 (groups (sregex--value-groups val)) 499 (word-boundary "\\b")
680 (tree (sregex--value-tree val))) 500 (not-word-boundary "\\B")
681 (if (and combine (not unitp)) 501 (bow "\\<")
682 (sregex--make-value t 502 (eow "\\>")))
683 (cons nil groups) 503 ((consp exp)
684 (cons 'group tree)) 504 (funcall (intern (concat "sregex--"
685 (sregex--make-value unitp groups tree)))) 505 (symbol-name (car exp))))
506 (cdr exp)
507 combine))
508 (t (error "Invalid expression: %s" exp))))
686 509
687(defun sregex--sequence (exps combine) 510(defun sregex--sequence (exps combine)
688 (if (= (length exps) 1) 511 (if (= (length exps) 1) (sregex--engine (car exps) combine)
689 (sregex--engine (car exps) combine) 512 (let ((re (mapconcat
690 (let ((groups nil) 513 (lambda (e) (sregex--engine e 'concat))
691 (trees nil)) ;grows in reverse 514 exps "")))
692 (while exps
693 (let ((val (sregex--engine (car exps) 'concat)))
694 (setq groups (append groups
695 (sregex--value-groups val))
696 trees (cons (sregex--value-tree val) trees)
697 exps (cdr exps))))
698 (setq trees (nreverse trees))
699 (if (eq combine 'suffix) 515 (if (eq combine 'suffix)
700 (sregex--make-value t 516 (concat "\\(?:" re "\\)")
701 (cons nil groups) 517 re))))
702 (cons 'group
703 (cons 'sequence trees)))
704 (sregex--make-value (not (eq combine 'suffix))
705 groups
706 (cons 'sequence trees))))))
707
708(defun sregex--group (exps combine)
709 (let ((val (sregex--sequence exps nil)))
710 (sregex--make-value t
711 (cons t (sregex--value-groups val))
712 (cons 'group (sregex--value-tree val)))))
713
714(defun sregex-backref-num (n &optional sregex)
715 "Adjust backreference number N according to SREGEX.
716When `sregex' introduces parenthesized groups that the user didn't ask
717for, the numbering of the groups that the user *did* ask for gets all
718out of whack. This function accounts for introduced groups. Example:
719
720 (sregexq (opt \"ab\")
721 (group (or \"c\" \"d\"))) => \"\\\\(ab\\\\)?\\\\(c\\\\|d\\\\)\"
722 (setq info (sregex-info))
723 (sregex-backref-num 1 info) => 2
724
725The SREGEX parameter is optional and defaults to the current value of
726`sregex-info'."
727 (let ((groups (sregex--value-groups (or sregex
728 sregex--current-sregex)))
729 (result 0))
730 (while (and groups (> n 0))
731 (if (car groups)
732 (setq n (1- n)))
733 (setq result (1+ result)
734 groups (cdr groups)))
735 result))
736
737(defun sregex--backref (exps combine)
738 (sregex--make-value t nil (cons 'backref (car exps))))
739
740(defun sregex--any (combine)
741 (sregex--make-value t nil '(str . ".")))
742
743(defun sregex--opt (exps combine)
744 (let ((val (sregex--sequence exps 'suffix)))
745 (sregex--make-value t
746 (sregex--value-groups val)
747 (cons 'opt (sregex--value-tree val)))))
748
749(defun sregex--0+ (exps combine)
750 (let ((val (sregex--sequence exps 'suffix)))
751 (sregex--make-value t
752 (sregex--value-groups val)
753 (cons '0+ (sregex--value-tree val)))))
754(defun sregex--1+ (exps combine)
755 (let ((val (sregex--sequence exps 'suffix)))
756 (sregex--make-value t
757 (sregex--value-groups val)
758 (cons '1+ (sregex--value-tree val)))))
759
760(defun sregex--repeat (exps combine)
761 (let ((min (or (car exps) 0))
762 (max (car (cdr exps))))
763 (setq exps (cdr (cdr exps)))
764 (cond ((zerop min)
765 (cond ((equal max 0) ;degenerate
766 (sregex--make-value t nil nil))
767 ((equal max 1)
768 (sregex--opt exps combine))
769 ((not max)
770 (sregex--0+ exps combine))
771 (t (sregex--sequence (make-list max
772 (cons 'opt exps))
773 combine))))
774 ((= min 1)
775 (cond ((equal max 1)
776 (sregex--sequence exps combine))
777 ((not max)
778 (sregex--1+ exps combine))
779 (t (sregex--sequence (append exps
780 (make-list (1- max)
781 (cons 'opt exps)))
782 combine))))
783 (t (sregex--sequence (append exps
784 (list (append (list 'repeat
785 (1- min)
786 (and max
787 (1- max)))
788 exps)))
789 combine)))))
790 518
791(defun sregex--or (exps combine) 519(defun sregex--or (exps combine)
792 (if (= (length exps) 1) 520 (if (= (length exps) 1) (sregex--engine (car exps) combine)
793 (sregex--engine (car exps) combine) 521 (let ((re (mapconcat
794 (let ((groups nil) 522 (lambda (e) (sregex--engine e 'or))
795 (trees nil)) 523 exps "\\|")))
796 (while exps 524 (if (not (eq combine 'or))
797 (let ((val (sregex--engine (car exps) 'or))) 525 (concat "\\(?:" re "\\)")
798 (setq groups (append groups 526 re))))
799 (sregex--value-groups val)) 527
800 trees (cons (sregex--value-tree val) trees) 528(defun sregex--group (exps combine) (concat "\\(" (sregex--sequence exps nil) "\\)"))
801 exps (cdr exps)))) 529
802 (sregex--make-value (eq combine 'or) 530(defun sregex--backref (exps combine) (concat "\\" (int-to-string (car exps))))
803 groups 531(defun sregex--opt (exps combine) (concat (sregex--sequence exps 'suffix) "?"))
804 (cons 'or (nreverse trees)))))) 532(defun sregex--0+ (exps combine) (concat (sregex--sequence exps 'suffix) "*"))
805 533(defun sregex--1+ (exps combine) (concat (sregex--sequence exps 'suffix) "+"))
806(defmacro sregex--char-range-aux () 534
807 '(if start 535(defun sregex--char (exps combine) (sregex--char-aux nil exps))
808 (let (startc endc) 536(defun sregex--not-char (exps combine) (sregex--char-aux t exps))
809 (if (and (<= 32 start) 537
810 (<= start 127)) 538(defun sregex--syntax (exps combine) (format "\\s%c" (car exps)))
811 (setq startc (char-to-string start) 539(defun sregex--not-syntax (exps combine) (format "\\S%c" (car exps)))
812 endc (char-to-string end)) 540
813 (setq startc (format "\\%03o" start) 541(defun sregex--regex (exps combine)
814 endc (format "\\%03o" end))) 542 (if combine (concat "\\(?:" (car exps) "\\)") (car exps)))
815 (if (> end start) 543
816 (if (> end (+ start 1)) 544(defun sregex--repeat (exps combine)
817 (setq class (concat class startc "-" endc)) 545 (let* ((min (or (pop exps) 0))
818 (setq class (concat class startc endc))) 546 (minstr (number-to-string min))
819 (setq class (concat class startc)))))) 547 (max (pop exps)))
820 548 (concat (sregex--sequence exps 'suffix)
821(defmacro sregex--char-range (rstart rend) 549 (concat "\\{" minstr ","
822 `(let ((i ,rstart) 550 (when max (number-to-string max)) "\\}"))))
823 start end) 551
824 (while (<= i ,rend) 552(defun sregex--char-range (start end)
825 (if (aref chars i) 553 (let ((startc (char-to-string start))
826 (progn 554 (endc (char-to-string end)))
827 (if start 555 (cond
828 (setq end i) 556 ((> end (+ start 2)) (concat startc "-" endc))
829 (setq start i 557 ((> end (+ start 1)) (concat startc (char-to-string (1+ start)) endc))
830 end i)) 558 ((> end start) (concat startc endc))
831 (aset chars i nil)) 559 (t startc))))
832 (sregex--char-range-aux)
833 (setq start nil
834 end nil))
835 (setq i (1+ i)))
836 (sregex--char-range-aux)))
837 560
838(defun sregex--char-aux (complement args) 561(defun sregex--char-aux (complement args)
839 (let ((chars (make-vector 256 nil))) 562 ;; regex-opt does the same, we should join effort.
840 (while args 563 (let ((chars (make-bool-vector 256 nil))) ; Yeah, right!
841 (let ((arg (car args))) 564 (dolist (arg args)
842 (cond ((integerp arg) 565 (cond ((integerp arg) (aset chars arg t))
843 (aset chars arg t)) 566 ((stringp arg) (mapcar (lambda (c) (aset chars c t)) arg))
844 ((stringp arg) 567 ((consp arg)
845 (mapcar (function 568 (let ((start (car arg))
846 (lambda (c) 569 (end (cdr arg)))
847 (aset chars c t))) 570 (when (> start end)
848 arg)) 571 (let ((tmp start)) (setq start end) (setq end tmp)))
849 ((consp arg) 572 ;; now start <= end
850 (let ((start (car arg)) 573 (let ((i start))
851 (end (cdr arg))) 574 (while (<= i end)
852 (if (> start end) 575 (aset chars i t)
853 (let ((tmp start)) 576 (setq i (1+ i))))))))
854 (setq start end
855 end tmp)))
856 ;; now start <= end
857 (let ((i start))
858 (while (<= i end)
859 (aset chars i t)
860 (setq i (1+ i))))))))
861 (setq args (cdr args)))
862 ;; now chars is a map of the characters in the class 577 ;; now chars is a map of the characters in the class
863 (let ((class "") 578 (let ((caret (aref chars ?^))
864 (caret (aref chars ?^))) 579 (dash (aref chars ?-))
580 (class (if (aref chars ?\]) "]" "")))
865 (aset chars ?^ nil) 581 (aset chars ?^ nil)
866 (if (aref chars ?\]) 582 (aset chars ?- nil)
867 (progn 583 (aset chars ?\] nil)
868 (setq class (concat class "]")) 584
869 (aset chars ?\] nil))) 585 (let (start end)
870 (if (aref chars ?-) 586 (dotimes (i 256)
871 (progn 587 (if (aref chars i)
872 (setq class (concat class "-")) 588 (progn
873 (aset chars ?- nil))) 589 (unless start (setq start i))
874 (if (aref chars ?\\) 590 (setq end i)
875 (progn 591 (aset chars i nil))
876 (setq class (concat class "\\\\")) 592 (when start
877 (aset chars ?\\ nil))) 593 (setq class (concat class (sregex--char-range start end)))
878 594 (setq start nil))))
879 (sregex--char-range ?A ?Z) 595 (if start
880 (sregex--char-range ?a ?z) 596 (setq class (concat class (sregex--char-range start end)))))
881 (sregex--char-range ?0 ?9) 597
882 598 (if (> (length class) 0)
883 (let ((i 32)) 599 (setq class (concat class (if caret "^") (if dash "-")))
884 (while (< i 128) 600 (setq class (concat class (if dash "-") (if caret "^"))))
885 (if (aref chars i) 601 (if (and (not complement) (= (length class) 1))
886 (progn 602 (regexp-quote class)
887 (setq class (concat class (char-to-string i))) 603 (concat "[" (if complement "^") class "]")))))
888 (aset chars i nil)))
889 (setq i (1+ i))))
890
891 (sregex--char-range 0 31)
892 (sregex--char-range 128 255)
893
894 (let ((i 0))
895 (while (< i 256)
896 (if (aref chars i)
897 (setq class (concat class (format "\\%03o" i))))
898 (setq i (1+ i))))
899
900 (if caret
901 (setq class (concat class "^")))
902 (concat "[" (if complement "^") class "]"))))
903
904(defun sregex--char (exps combine)
905 (sregex--make-value t nil (cons 'str (sregex--char-aux nil exps))))
906(defun sregex--not-char (exps combine)
907 (sregex--make-value t nil (cons 'str (sregex--char-aux t exps))))
908
909(defun sregex--bol (combine)
910 (sregex--make-value t nil '(str . "^")))
911(defun sregex--eol (combine)
912 (sregex--make-value t nil '(str . "$")))
913
914(defun sregex--wordchar (combine)
915 (sregex--make-value t nil '(str . "\\w")))
916(defun sregex--not-wordchar (combine)
917 (sregex--make-value t nil '(str . "\\W")))
918
919(defun sregex--syntax (exps combine)
920 (sregex--make-value t nil (cons 'str (format "\\s%c" (car exps)))))
921(defun sregex--not-syntax (exps combine)
922 (sregex--make-value t nil (cons 'str (format "\\S%c" (car exps)))))
923
924(defun sregex--bot (combine)
925 (sregex--make-value t nil (cons 'str "\\`")))
926(defun sregex--eot (combine)
927 (sregex--make-value t nil (cons 'str "\\'")))
928
929(defun sregex--point (combine)
930 (sregex--make-value t nil '(str . "\\=")))
931
932(defun sregex--word-boundary (combine)
933 (sregex--make-value t nil '(str . "\\b")))
934(defun sregex--not-word-boundary (combine)
935 (sregex--make-value t nil '(str . "\\B")))
936
937(defun sregex--bow (combine)
938 (sregex--make-value t nil '(str . "\\<")))
939(defun sregex--eow (combine)
940 (sregex--make-value t nil '(str . "\\>")))
941
942
943;; trapdoor - usage discouraged
944(defun sregex--regex (exps combine)
945 (sregex--make-value nil nil (car exps)))
946 604
947(provide 'sregex) 605(provide 'sregex)
948 606