aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Marshall1997-06-06 07:10:24 +0000
committerSimon Marshall1997-06-06 07:10:24 +0000
commit25544ce1bd4992b990365f8a09070f963aefb67d (patch)
tree60ee4e61f351617d86afedcfa0003da85151c279
parent14aa11f4dae9557d582f4f66b233ae84fdf925e1 (diff)
downloademacs-25544ce1bd4992b990365f8a09070f963aefb67d.tar.gz
emacs-25544ce1bd4992b990365f8a09070f963aefb67d.zip
emit charsets after strings so that the final regexp finds the longest match.
-rw-r--r--lisp/emacs-lisp/regexp-opt.el82
1 files changed, 45 insertions, 37 deletions
diff --git a/lisp/emacs-lisp/regexp-opt.el b/lisp/emacs-lisp/regexp-opt.el
index adf8e9e8da7..140fd8005dd 100644
--- a/lisp/emacs-lisp/regexp-opt.el
+++ b/lisp/emacs-lisp/regexp-opt.el
@@ -4,7 +4,7 @@
4 4
5;; Author: Simon Marshall <simon@gnu.ai.mit.edu> 5;; Author: Simon Marshall <simon@gnu.ai.mit.edu>
6;; Keywords: strings, regexps 6;; Keywords: strings, regexps
7;; Version: 1.04.01 7;; Version: 1.05
8 8
9;; This file is part of GNU Emacs. 9;; This file is part of GNU Emacs.
10 10
@@ -27,10 +27,14 @@
27 27
28;; The "opt" in "regexp-opt" stands for "optim\\(al\\|i\\(se\\|ze\\)\\)". 28;; The "opt" in "regexp-opt" stands for "optim\\(al\\|i\\(se\\|ze\\)\\)".
29;; 29;;
30;; This package generates a regexp from a given list of strings (that matches 30;; This package generates a regexp from a given list of strings (which matches
31;; one of those strings) that is equivalent to but more efficient than: 31;; one of those strings) so that the regexp generated by:
32;; 32;;
33;; (mapconcat 'identity (mapcar 'regexp-quote strings) "\\|") 33;; (regexp-opt strings)
34;;
35;; is equivalent to, but more efficient than, the regexp generated by:
36;;
37;; (mapconcat 'regexp-quote strings "\\|")
34;; 38;;
35;; For example: 39;; For example:
36;; 40;;
@@ -42,10 +46,9 @@
42;; (concat "(" (regexp-opt strings t) "\\>")) 46;; (concat "(" (regexp-opt strings t) "\\>"))
43;; => "(\\(c\\(atch\\|ond\\(ition-case\\)?\\)\\|if\\|let\\*?\\|prog[12n]\\|save-\\(current-buffer\\|excursion\\|match-data\\|restriction\\|window-excursion\\)\\|throw\\|un\\(less\\|wind-protect\\)\\|wh\\(en\\|ile\\)\\)\\>" 47;; => "(\\(c\\(atch\\|ond\\(ition-case\\)?\\)\\|if\\|let\\*?\\|prog[12n]\\|save-\\(current-buffer\\|excursion\\|match-data\\|restriction\\|window-excursion\\)\\|throw\\|un\\(less\\|wind-protect\\)\\|wh\\(en\\|ile\\)\\)\\>"
44;; 48;;
45;; Searching using the above example `regexp-opt' regexp is significantly 49;; Searching using the above example `regexp-opt' regexp takes approximately
46;; faster than searching using the equivalent `mapconcat' regexp, taking 50;; two-thirds of the time taken using the equivalent `mapconcat' regexp.
47;; approximately two-thirds of the time. 51
48;;
49;; Since this package was written to produce efficient regexps, not regexps 52;; Since this package was written to produce efficient regexps, not regexps
50;; efficiently, it is probably not a good idea to in-line too many calls in 53;; efficiently, it is probably not a good idea to in-line too many calls in
51;; your code, unless you use the following trick with `eval-when-compile': 54;; your code, unless you use the following trick with `eval-when-compile':
@@ -62,7 +65,15 @@
62;; (defvar definition-regexp 65;; (defvar definition-regexp
63;; "^(\\(def\\(alias\\|const\\|macro\\|subst\\|un\\|var\\)\\)\\>") 66;; "^(\\(def\\(alias\\|const\\|macro\\|subst\\|un\\|var\\)\\)\\>")
64;; 67;;
65;; Originally written for font-lock.el, from an idea from Stig's hl319.el. 68;; Note that if you use this trick for all instances of `regexp-opt' and
69;; `regexp-opt-depth' in your code, regexp-opt.el would only have to be loaded
70;; at compile time. But note also that using this trick means that should
71;; regexp-opt.el be changed, perhaps to fix a bug or to add a feature to
72;; improve the efficiency of `regexp-opt' regexps, you would have to recompile
73;; your code for such changes to have effect in your code.
74
75;; Originally written for font-lock.el, from an idea from Stig's hl319.el, with
76;; thanks for ideas also to Michael Ernst, Bob Glickstein and Dan Nicolaescu.
66;; Please don't tell me that it doesn't produce optimal regexps; I know that 77;; Please don't tell me that it doesn't produce optimal regexps; I know that
67;; already. For example, the above explanation for the meaning of "opt" would 78;; already. For example, the above explanation for the meaning of "opt" would
68;; be more efficient as "optim\\(al\\|i[sz]e\\)", but this requires complex 79;; be more efficient as "optim\\(al\\|i[sz]e\\)", but this requires complex
@@ -73,15 +84,16 @@
73;;;###autoload 84;;;###autoload
74(defun regexp-opt (strings &optional paren) 85(defun regexp-opt (strings &optional paren)
75 "Return a regexp to match a string in STRINGS. 86 "Return a regexp to match a string in STRINGS.
87Each string should be unique in STRINGS and should not contain any regexps.
76If optional PAREN non-nil, ensure that the returned regexp is enclosed by at 88If optional PAREN non-nil, ensure that the returned regexp is enclosed by at
77least one regexp grouping construct. 89least one regexp grouping construct.
78Each string in STRINGS should be unique and should not contain any regexps.
79The returned regexp is typically more efficient than the equivalent regexp: 90The returned regexp is typically more efficient than the equivalent regexp:
80 91
81 (mapconcat 'identity (mapcar 'regexp-quote STRINGS) \"\\\\|\") 92 (let ((open-paren (if PAREN \"\\\\(\" \"\")) (close-paren (if PAREN \"\\\\)\" \"\")))
93 (concat open-paren (mapconcat 'regexp-quote STRINGS \"\\\\|\") close-paren))
82 94
83but typically contains regexp grouping constructs. Use `regexp-opt-depth' to 95but typically contains more regexp grouping constructs.
84count them." 96Use `regexp-opt-depth' to count them."
85 (save-match-data 97 (save-match-data
86 ;; Recurse on the sorted list. 98 ;; Recurse on the sorted list.
87 (let ((max-lisp-eval-depth (* 1024 1024)) 99 (let ((max-lisp-eval-depth (* 1024 1024))
@@ -165,12 +177,12 @@ in REGEXP."
165 close-group))) 177 close-group)))
166 ;; 178 ;;
167 ;; If there are several one-character strings, remove them and recurse 179 ;; If there are several one-character strings, remove them and recurse
168 ;; on the rest. 180 ;; on the rest (first so the final regexp finds the longest match).
169 ((> (length letters) 1) 181 ((> (length letters) 1)
170 (let ((rest (let ((completion-regexp-list '("^..+$"))) 182 (let ((rest (let ((completion-regexp-list '("^..+$")))
171 (all-completions "" (mapcar 'list strings))))) 183 (all-completions "" (mapcar 'list strings)))))
172 (concat open-group 184 (concat open-group
173 (regexp-opt-charset letters) "\\|" (regexp-opt-group rest) 185 (regexp-opt-group rest) "\\|" (regexp-opt-charset letters)
174 close-group))) 186 close-group)))
175 ;; 187 ;;
176 ;; Otherwise, divide the list into those that start with a particular 188 ;; Otherwise, divide the list into those that start with a particular
@@ -196,30 +208,26 @@ in REGEXP."
196 (bracket "") (dash "") (caret "")) 208 (bracket "") (dash "") (caret ""))
197 ;; 209 ;;
198 ;; Make a character map but extract character set meta characters. 210 ;; Make a character map but extract character set meta characters.
199 (let (char) 211 (dolist (char (mapcar 'string-to-char chars))
200 (while chars 212 (case char
201 (setq char (string-to-char (pop chars))) 213 (?\]
202 (cond ((eq char ?\]) 214 (setq bracket "]"))
203 (setq bracket "]")) 215 (?^
204 ((eq char ?^) 216 (setq caret "^"))
205 (setq caret "^")) 217 (?-
206 ((eq char ?-) 218 (setq dash "-"))
207 (setq dash "-")) 219 (otherwise
208 (t 220 (aset charmap char t))))
209 (aset charmap char t)))))
210 ;; 221 ;;
211 ;; Make a character set from the map using ranges where applicable. 222 ;; Make a character set from the map using ranges where applicable.
212 (let ((elt 0) start) 223 (dotimes (elt charwidth)
213 (while (< elt charwidth) 224 (when (aref charmap elt)
214 (when (aref charmap elt) 225 (let ((start elt))
215 (setq start (1+ elt)) 226 (while (and (< elt charwidth) (aref charmap elt))
216 (while (and (< start charwidth) (aref charmap start)) 227 (incf elt))
217 (incf start)) 228 (if (> (- elt start) 3)
218 (if (< (- start elt) 4) 229 (setq charset (format "%s%c-%c" charset start (1- elt)))
219 (setq charset (format "%s%c" charset elt)) 230 (setq charset (format "%s%c" charset (setq elt start)))))))
220 (setq charset (format "%s%c-%c" charset elt (1- start))
221 elt start)))
222 (incf elt)))
223 ;; 231 ;;
224 ;; Make sure a caret is not first and a dash is first or last. 232 ;; Make sure a caret is not first and a dash is first or last.
225 (if (and (string-equal charset "") (string-equal bracket "")) 233 (if (and (string-equal charset "") (string-equal bracket ""))