diff options
| author | Gerd Moellmann | 1999-10-08 23:06:15 +0000 |
|---|---|---|
| committer | Gerd Moellmann | 1999-10-08 23:06:15 +0000 |
| commit | b02b54a8ba4e958fd103c0ae15874b3010c07860 (patch) | |
| tree | 8f6b071f1d8da77c99f73bb5a2d31182a4a84c32 | |
| parent | 57f707e72d04e232cf2e3c0b25d9380f9106f6dd (diff) | |
| download | emacs-b02b54a8ba4e958fd103c0ae15874b3010c07860.tar.gz emacs-b02b54a8ba4e958fd103c0ae15874b3010c07860.zip | |
(regexp-opt-try-suffix): New function.
(regexp-opt-group): Use it to get common suffixes in STRINGS.
If STRINGS is nil, return "" rather than nil.
| -rw-r--r-- | lisp/ChangeLog | 6 | ||||
| -rw-r--r-- | lisp/emacs-lisp/regexp-opt.el | 78 |
2 files changed, 64 insertions, 20 deletions
diff --git a/lisp/ChangeLog b/lisp/ChangeLog index b81771d8f36..412edf885d8 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog | |||
| @@ -1,3 +1,9 @@ | |||
| 1 | 1999-10-08 Simon Marshall <simon@gnu.org> | ||
| 2 | |||
| 3 | * emacs-lisp/regexp-opt.el (regexp-opt-try-suffix): New function. | ||
| 4 | (regexp-opt-group): Use it to get common suffixes in STRINGS. | ||
| 5 | If STRINGS is nil, return "" rather than nil. | ||
| 6 | |||
| 1 | 1999-10-08 Gerd Moellmann <gerd@gnu.org> | 7 | 1999-10-08 Gerd Moellmann <gerd@gnu.org> |
| 2 | 8 | ||
| 3 | * Makefile (compile): Compile files one by one. Set load-path to | 9 | * Makefile (compile): Compile files one by one. Set load-path to |
diff --git a/lisp/emacs-lisp/regexp-opt.el b/lisp/emacs-lisp/regexp-opt.el index a6ed0734f1d..fde30cdd3c4 100644 --- a/lisp/emacs-lisp/regexp-opt.el +++ b/lisp/emacs-lisp/regexp-opt.el | |||
| @@ -1,9 +1,10 @@ | |||
| 1 | ;;; regexp-opt.el --- generate efficient regexps to match strings. | 1 | ;;; regexp-opt.el --- generate efficient regexps to match strings. |
| 2 | 2 | ||
| 3 | ;; Copyright (C) 1994, 1995, 1996, 1997 Free Software Foundation, Inc. | 3 | ;; Copyright (C) 1994, 95, 96, 97, 98, 1999 Free Software Foundation, Inc. |
| 4 | 4 | ||
| 5 | ;; Author: Simon Marshall <simon@gnu.org> | 5 | ;; Author: Simon Marshall <simon@gnu.org> |
| 6 | ;; Keywords: strings, regexps | 6 | ;; Keywords: strings, regexps |
| 7 | ;; Version: 1.07 | ||
| 7 | 8 | ||
| 8 | ;; This file is part of GNU Emacs. | 9 | ;; This file is part of GNU Emacs. |
| 9 | 10 | ||
| @@ -24,7 +25,7 @@ | |||
| 24 | 25 | ||
| 25 | ;;; Commentary: | 26 | ;;; Commentary: |
| 26 | 27 | ||
| 27 | ;; The "opt" in "regexp-opt" stands for "optim\\(al\\|i\\(se\\|ze\\)\\)". | 28 | ;; The "opt" in "regexp-opt" stands for "optim\\(al\\|i[sz]e\\)". |
| 28 | ;; | 29 | ;; |
| 29 | ;; This package generates a regexp from a given list of strings (which matches | 30 | ;; This package generates a regexp from a given list of strings (which matches |
| 30 | ;; one of those strings) so that the regexp generated by: | 31 | ;; one of those strings) so that the regexp generated by: |
| @@ -47,6 +48,17 @@ | |||
| 47 | ;; | 48 | ;; |
| 48 | ;; Searching using the above example `regexp-opt' regexp takes approximately | 49 | ;; Searching using the above example `regexp-opt' regexp takes approximately |
| 49 | ;; two-thirds of the time taken using the equivalent `mapconcat' regexp. | 50 | ;; two-thirds of the time taken using the equivalent `mapconcat' regexp. |
| 51 | ;; | ||
| 52 | ;; Note that this package will also find common suffix strings if this does not | ||
| 53 | ;; increase the number of grouping constructs. For example: | ||
| 54 | ;; | ||
| 55 | ;; (regexp-opt '("these" "those")) | ||
| 56 | ;; => "th[eo]se" | ||
| 57 | ;; | ||
| 58 | ;; but: | ||
| 59 | ;; | ||
| 60 | ;; (regexp-opt '("barfly" "housefly")) | ||
| 61 | ;; => "barfly\\|housefly" rather than "\\(bar\\|house\\)fly" | ||
| 50 | 62 | ||
| 51 | ;; Since this package was written to produce efficient regexps, not regexps | 63 | ;; Since this package was written to produce efficient regexps, not regexps |
| 52 | ;; efficiently, it is probably not a good idea to in-line too many calls in | 64 | ;; efficiently, it is probably not a good idea to in-line too many calls in |
| @@ -72,13 +84,12 @@ | |||
| 72 | ;; your code for such changes to have effect in your code. | 84 | ;; your code for such changes to have effect in your code. |
| 73 | 85 | ||
| 74 | ;; Originally written for font-lock.el, from an idea from Stig's hl319.el, with | 86 | ;; Originally written for font-lock.el, from an idea from Stig's hl319.el, with |
| 75 | ;; thanks for ideas also to Michael Ernst, Bob Glickstein and Dan Nicolaescu. | 87 | ;; thanks for ideas also to Michael Ernst, Bob Glickstein, Dan Nicolaescu and |
| 76 | ;; Please don't tell me that it doesn't produce optimal regexps; I know that | 88 | ;; Stefan Monnier. |
| 77 | ;; already. For example, the above explanation for the meaning of "opt" would | 89 | ;; No doubt `regexp-opt' doesn't always produce optimal regexps, so code, ideas |
| 78 | ;; be more efficient as "optim\\(al\\|i[sz]e\\)", but this requires complex | 90 | ;; or any other information to improve things are welcome. |
| 79 | ;; forward looking. But (ideas or) code to improve things (are) is welcome. | ||
| 80 | 91 | ||
| 81 | ;;; Code: | 92 | ;;; Code. |
| 82 | 93 | ||
| 83 | ;;;###autoload | 94 | ;;;###autoload |
| 84 | (defun regexp-opt (strings &optional paren) | 95 | (defun regexp-opt (strings &optional paren) |
| @@ -128,9 +139,9 @@ in REGEXP." | |||
| 128 | ;; If LAX non-nil, don't output parentheses if it doesn't require them. | 139 | ;; If LAX non-nil, don't output parentheses if it doesn't require them. |
| 129 | ;; Merges keywords to avoid backtracking in Emacs' regexp matcher. | 140 | ;; Merges keywords to avoid backtracking in Emacs' regexp matcher. |
| 130 | ;; | 141 | ;; |
| 131 | ;; The basic idea is to find the shortest common prefix, remove it and | 142 | ;; The basic idea is to find the shortest common prefix or suffix, remove it |
| 132 | ;; recurse. If there is no prefix, we divide the list into two so that (at | 143 | ;; and recurse. If there is no prefix, we divide the list into two so that |
| 133 | ;; least) one half will have at least a one-character common prefix. | 144 | ;; (at least) one half will have at least a one-character common prefix. |
| 134 | ;; | 145 | ;; |
| 135 | ;; Also we delay the addition of grouping parenthesis as long as possible | 146 | ;; Also we delay the addition of grouping parenthesis as long as possible |
| 136 | ;; until we're sure we need them, and try to remove one-character sequences | 147 | ;; until we're sure we need them, and try to remove one-character sequences |
| @@ -139,11 +150,15 @@ in REGEXP." | |||
| 139 | (let* ((open-group (if paren "\\(" "")) | 150 | (let* ((open-group (if paren "\\(" "")) |
| 140 | (close-group (if paren "\\)" "")) | 151 | (close-group (if paren "\\)" "")) |
| 141 | (open-charset (if lax "" open-group)) | 152 | (open-charset (if lax "" open-group)) |
| 142 | (close-charset (if lax "" close-group))) | 153 | (close-charset (if lax "" close-group)) |
| 154 | (open-presuf open-charset) | ||
| 155 | (close-presuf close-charset)) | ||
| 143 | (cond | 156 | (cond |
| 144 | ;; Protect against user-stupidity... could call error here | 157 | ;; |
| 145 | ((null strings) | 158 | ;; If there are no strings, just return the empty string. |
| 146 | nil) | 159 | ((= (length strings) 0) |
| 160 | "") | ||
| 161 | ;; | ||
| 147 | ;; If there is only one string, just return it. | 162 | ;; If there is only one string, just return it. |
| 148 | ((= (length strings) 1) | 163 | ((= (length strings) 1) |
| 149 | (if (= (length (car strings)) 1) | 164 | (if (= (length (car strings)) 1) |
| @@ -157,7 +172,7 @@ in REGEXP." | |||
| 157 | close-charset)) | 172 | close-charset)) |
| 158 | ;; | 173 | ;; |
| 159 | ;; If all are one-character strings, just return a character set. | 174 | ;; If all are one-character strings, just return a character set. |
| 160 | ((= (length strings) (apply '+ (mapcar 'length strings))) | 175 | ((= (apply 'max (mapcar 'length strings)) 1) |
| 161 | (concat open-charset | 176 | (concat open-charset |
| 162 | (regexp-opt-charset strings) | 177 | (regexp-opt-charset strings) |
| 163 | close-charset)) | 178 | close-charset)) |
| @@ -165,17 +180,30 @@ in REGEXP." | |||
| 165 | ;; We have a list of different length strings. | 180 | ;; We have a list of different length strings. |
| 166 | (t | 181 | (t |
| 167 | (let ((prefix (try-completion "" (mapcar 'list strings))) | 182 | (let ((prefix (try-completion "" (mapcar 'list strings))) |
| 183 | (suffix (regexp-opt-try-suffix strings)) | ||
| 168 | (letters (let ((completion-regexp-list '("^.$"))) | 184 | (letters (let ((completion-regexp-list '("^.$"))) |
| 169 | (all-completions "" (mapcar 'list strings))))) | 185 | (all-completions "" (mapcar 'list strings))))) |
| 170 | (cond | 186 | (cond |
| 171 | ;; | 187 | ;; |
| 172 | ;; If there is a common prefix, remove it and recurse on the suffixes. | 188 | ;; If there is a common prefix, remove it and recurse on the suffixes. |
| 173 | ((> (length prefix) 0) | 189 | ((> (length prefix) 0) |
| 174 | (let* ((length (length prefix)) | 190 | (let* ((end (length prefix)) |
| 175 | (suffixes (mapcar (lambda (s) (substring s length)) strings))) | 191 | (suffixes (mapcar (lambda (s) (substring s end)) strings))) |
| 176 | (concat open-group | 192 | (concat open-presuf |
| 177 | (regexp-quote prefix) (regexp-opt-group suffixes t t) | 193 | (regexp-quote prefix) (regexp-opt-group suffixes t t) |
| 178 | close-group))) | 194 | close-presuf))) |
| 195 | ;; | ||
| 196 | ;; If there is a common suffix, remove it and recurse on the prefixes. | ||
| 197 | ((> (length suffix) (if lax | ||
| 198 | 0 | ||
| 199 | (- (apply 'max (mapcar 'length strings)) 2))) | ||
| 200 | (let* ((end (- (length suffix))) | ||
| 201 | (prefixes (sort (mapcar (lambda (s) (substring s 0 end)) | ||
| 202 | strings) | ||
| 203 | 'string-lessp))) | ||
| 204 | (concat open-presuf | ||
| 205 | (regexp-opt-group prefixes t t) (regexp-quote suffix) | ||
| 206 | close-presuf))) | ||
| 179 | ;; | 207 | ;; |
| 180 | ;; If there are several one-character strings, remove them and recurse | 208 | ;; If there are several one-character strings, remove them and recurse |
| 181 | ;; on the rest (first so the final regexp finds the longest match). | 209 | ;; on the rest (first so the final regexp finds the longest match). |
| @@ -235,6 +263,16 @@ in REGEXP." | |||
| 235 | (concat "[" dash caret "]") | 263 | (concat "[" dash caret "]") |
| 236 | (concat "[" bracket charset caret dash "]")))) | 264 | (concat "[" bracket charset caret dash "]")))) |
| 237 | 265 | ||
| 266 | (defun regexp-opt-try-suffix (strings) | ||
| 267 | ;; | ||
| 268 | ;; Return common suffix of each string in STRINGS. See `try-completion'. | ||
| 269 | ;; | ||
| 270 | (let* ((chars (mapcar (lambda (s) (mapcar 'identity s)) strings)) | ||
| 271 | (srahc (mapcar 'reverse chars)) | ||
| 272 | (sgnirts (mapcar (lambda (c) (mapconcat 'char-to-string c "")) srahc)) | ||
| 273 | (xiffus (try-completion "" (mapcar 'list sgnirts)))) | ||
| 274 | (mapconcat 'char-to-string (reverse (mapcar 'identity xiffus)) ""))) | ||
| 275 | |||
| 238 | (provide 'regexp-opt) | 276 | (provide 'regexp-opt) |
| 239 | 277 | ||
| 240 | ;;; regexp-opt.el ends here | 278 | ;;; regexp-opt.el ends here |