aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGerd Moellmann1999-10-08 23:06:15 +0000
committerGerd Moellmann1999-10-08 23:06:15 +0000
commitb02b54a8ba4e958fd103c0ae15874b3010c07860 (patch)
tree8f6b071f1d8da77c99f73bb5a2d31182a4a84c32
parent57f707e72d04e232cf2e3c0b25d9380f9106f6dd (diff)
downloademacs-b02b54a8ba4e958fd103c0ae15874b3010c07860.tar.gz
emacs-b02b54a8ba4e958fd103c0ae15874b3010c07860.zip
(regexp-opt-try-suffix): New function.
(regexp-opt-group): Use it to get common suffixes in STRINGS. If STRINGS is nil, return "" rather than nil.
-rw-r--r--lisp/ChangeLog6
-rw-r--r--lisp/emacs-lisp/regexp-opt.el78
2 files changed, 64 insertions, 20 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog
index b81771d8f36..412edf885d8 100644
--- a/lisp/ChangeLog
+++ b/lisp/ChangeLog
@@ -1,3 +1,9 @@
11999-10-08 Simon Marshall <simon@gnu.org>
2
3 * emacs-lisp/regexp-opt.el (regexp-opt-try-suffix): New function.
4 (regexp-opt-group): Use it to get common suffixes in STRINGS.
5 If STRINGS is nil, return "" rather than nil.
6
11999-10-08 Gerd Moellmann <gerd@gnu.org> 71999-10-08 Gerd Moellmann <gerd@gnu.org>
2 8
3 * Makefile (compile): Compile files one by one. Set load-path to 9 * Makefile (compile): Compile files one by one. Set load-path to
diff --git a/lisp/emacs-lisp/regexp-opt.el b/lisp/emacs-lisp/regexp-opt.el
index a6ed0734f1d..fde30cdd3c4 100644
--- a/lisp/emacs-lisp/regexp-opt.el
+++ b/lisp/emacs-lisp/regexp-opt.el
@@ -1,9 +1,10 @@
1;;; regexp-opt.el --- generate efficient regexps to match strings. 1;;; regexp-opt.el --- generate efficient regexps to match strings.
2 2
3;; Copyright (C) 1994, 1995, 1996, 1997 Free Software Foundation, Inc. 3;; Copyright (C) 1994, 95, 96, 97, 98, 1999 Free Software Foundation, Inc.
4 4
5;; Author: Simon Marshall <simon@gnu.org> 5;; Author: Simon Marshall <simon@gnu.org>
6;; Keywords: strings, regexps 6;; Keywords: strings, regexps
7;; Version: 1.07
7 8
8;; This file is part of GNU Emacs. 9;; This file is part of GNU Emacs.
9 10
@@ -24,7 +25,7 @@
24 25
25;;; Commentary: 26;;; Commentary:
26 27
27;; The "opt" in "regexp-opt" stands for "optim\\(al\\|i\\(se\\|ze\\)\\)". 28;; The "opt" in "regexp-opt" stands for "optim\\(al\\|i[sz]e\\)".
28;; 29;;
29;; This package generates a regexp from a given list of strings (which matches 30;; This package generates a regexp from a given list of strings (which matches
30;; one of those strings) so that the regexp generated by: 31;; one of those strings) so that the regexp generated by:
@@ -47,6 +48,17 @@
47;; 48;;
48;; Searching using the above example `regexp-opt' regexp takes approximately 49;; Searching using the above example `regexp-opt' regexp takes approximately
49;; two-thirds of the time taken using the equivalent `mapconcat' regexp. 50;; two-thirds of the time taken using the equivalent `mapconcat' regexp.
51;;
52;; Note that this package will also find common suffix strings if this does not
53;; increase the number of grouping constructs. For example:
54;;
55;; (regexp-opt '("these" "those"))
56;; => "th[eo]se"
57;;
58;; but:
59;;
60;; (regexp-opt '("barfly" "housefly"))
61;; => "barfly\\|housefly" rather than "\\(bar\\|house\\)fly"
50 62
51;; Since this package was written to produce efficient regexps, not regexps 63;; Since this package was written to produce efficient regexps, not regexps
52;; efficiently, it is probably not a good idea to in-line too many calls in 64;; efficiently, it is probably not a good idea to in-line too many calls in
@@ -72,13 +84,12 @@
72;; your code for such changes to have effect in your code. 84;; your code for such changes to have effect in your code.
73 85
74;; Originally written for font-lock.el, from an idea from Stig's hl319.el, with 86;; Originally written for font-lock.el, from an idea from Stig's hl319.el, with
75;; thanks for ideas also to Michael Ernst, Bob Glickstein and Dan Nicolaescu. 87;; thanks for ideas also to Michael Ernst, Bob Glickstein, Dan Nicolaescu and
76;; Please don't tell me that it doesn't produce optimal regexps; I know that 88;; Stefan Monnier.
77;; already. For example, the above explanation for the meaning of "opt" would 89;; No doubt `regexp-opt' doesn't always produce optimal regexps, so code, ideas
78;; be more efficient as "optim\\(al\\|i[sz]e\\)", but this requires complex 90;; or any other information to improve things are welcome.
79;; forward looking. But (ideas or) code to improve things (are) is welcome.
80 91
81;;; Code: 92;;; Code.
82 93
83;;;###autoload 94;;;###autoload
84(defun regexp-opt (strings &optional paren) 95(defun regexp-opt (strings &optional paren)
@@ -128,9 +139,9 @@ in REGEXP."
128 ;; If LAX non-nil, don't output parentheses if it doesn't require them. 139 ;; If LAX non-nil, don't output parentheses if it doesn't require them.
129 ;; Merges keywords to avoid backtracking in Emacs' regexp matcher. 140 ;; Merges keywords to avoid backtracking in Emacs' regexp matcher.
130 ;; 141 ;;
131 ;; The basic idea is to find the shortest common prefix, remove it and 142 ;; The basic idea is to find the shortest common prefix or suffix, remove it
132 ;; recurse. If there is no prefix, we divide the list into two so that (at 143 ;; and recurse. If there is no prefix, we divide the list into two so that
133 ;; least) one half will have at least a one-character common prefix. 144 ;; (at least) one half will have at least a one-character common prefix.
134 ;; 145 ;;
135 ;; Also we delay the addition of grouping parenthesis as long as possible 146 ;; Also we delay the addition of grouping parenthesis as long as possible
136 ;; until we're sure we need them, and try to remove one-character sequences 147 ;; until we're sure we need them, and try to remove one-character sequences
@@ -139,11 +150,15 @@ in REGEXP."
139 (let* ((open-group (if paren "\\(" "")) 150 (let* ((open-group (if paren "\\(" ""))
140 (close-group (if paren "\\)" "")) 151 (close-group (if paren "\\)" ""))
141 (open-charset (if lax "" open-group)) 152 (open-charset (if lax "" open-group))
142 (close-charset (if lax "" close-group))) 153 (close-charset (if lax "" close-group))
154 (open-presuf open-charset)
155 (close-presuf close-charset))
143 (cond 156 (cond
144 ;; Protect against user-stupidity... could call error here 157 ;;
145 ((null strings) 158 ;; If there are no strings, just return the empty string.
146 nil) 159 ((= (length strings) 0)
160 "")
161 ;;
147 ;; If there is only one string, just return it. 162 ;; If there is only one string, just return it.
148 ((= (length strings) 1) 163 ((= (length strings) 1)
149 (if (= (length (car strings)) 1) 164 (if (= (length (car strings)) 1)
@@ -157,7 +172,7 @@ in REGEXP."
157 close-charset)) 172 close-charset))
158 ;; 173 ;;
159 ;; If all are one-character strings, just return a character set. 174 ;; If all are one-character strings, just return a character set.
160 ((= (length strings) (apply '+ (mapcar 'length strings))) 175 ((= (apply 'max (mapcar 'length strings)) 1)
161 (concat open-charset 176 (concat open-charset
162 (regexp-opt-charset strings) 177 (regexp-opt-charset strings)
163 close-charset)) 178 close-charset))
@@ -165,17 +180,30 @@ in REGEXP."
165 ;; We have a list of different length strings. 180 ;; We have a list of different length strings.
166 (t 181 (t
167 (let ((prefix (try-completion "" (mapcar 'list strings))) 182 (let ((prefix (try-completion "" (mapcar 'list strings)))
183 (suffix (regexp-opt-try-suffix strings))
168 (letters (let ((completion-regexp-list '("^.$"))) 184 (letters (let ((completion-regexp-list '("^.$")))
169 (all-completions "" (mapcar 'list strings))))) 185 (all-completions "" (mapcar 'list strings)))))
170 (cond 186 (cond
171 ;; 187 ;;
172 ;; If there is a common prefix, remove it and recurse on the suffixes. 188 ;; If there is a common prefix, remove it and recurse on the suffixes.
173 ((> (length prefix) 0) 189 ((> (length prefix) 0)
174 (let* ((length (length prefix)) 190 (let* ((end (length prefix))
175 (suffixes (mapcar (lambda (s) (substring s length)) strings))) 191 (suffixes (mapcar (lambda (s) (substring s end)) strings)))
176 (concat open-group 192 (concat open-presuf
177 (regexp-quote prefix) (regexp-opt-group suffixes t t) 193 (regexp-quote prefix) (regexp-opt-group suffixes t t)
178 close-group))) 194 close-presuf)))
195 ;;
196 ;; If there is a common suffix, remove it and recurse on the prefixes.
197 ((> (length suffix) (if lax
198 0
199 (- (apply 'max (mapcar 'length strings)) 2)))
200 (let* ((end (- (length suffix)))
201 (prefixes (sort (mapcar (lambda (s) (substring s 0 end))
202 strings)
203 'string-lessp)))
204 (concat open-presuf
205 (regexp-opt-group prefixes t t) (regexp-quote suffix)
206 close-presuf)))
179 ;; 207 ;;
180 ;; If there are several one-character strings, remove them and recurse 208 ;; If there are several one-character strings, remove them and recurse
181 ;; on the rest (first so the final regexp finds the longest match). 209 ;; on the rest (first so the final regexp finds the longest match).
@@ -235,6 +263,16 @@ in REGEXP."
235 (concat "[" dash caret "]") 263 (concat "[" dash caret "]")
236 (concat "[" bracket charset caret dash "]")))) 264 (concat "[" bracket charset caret dash "]"))))
237 265
266(defun regexp-opt-try-suffix (strings)
267 ;;
268 ;; Return common suffix of each string in STRINGS. See `try-completion'.
269 ;;
270 (let* ((chars (mapcar (lambda (s) (mapcar 'identity s)) strings))
271 (srahc (mapcar 'reverse chars))
272 (sgnirts (mapcar (lambda (c) (mapconcat 'char-to-string c "")) srahc))
273 (xiffus (try-completion "" (mapcar 'list sgnirts))))
274 (mapconcat 'char-to-string (reverse (mapcar 'identity xiffus)) "")))
275
238(provide 'regexp-opt) 276(provide 'regexp-opt)
239 277
240;;; regexp-opt.el ends here 278;;; regexp-opt.el ends here