aboutsummaryrefslogtreecommitdiffstats
path: root/lisp
diff options
context:
space:
mode:
Diffstat (limited to 'lisp')
-rw-r--r--lisp/emacs-lisp/rx.el76
1 files changed, 48 insertions, 28 deletions
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el
index 1ee5e8294a6..a0b2444346a 100644
--- a/lisp/emacs-lisp/rx.el
+++ b/lisp/emacs-lisp/rx.el
@@ -254,22 +254,39 @@ Left-fold the list L, starting with X, by the binary function F."
254 (setq l (cdr l))) 254 (setq l (cdr l)))
255 x) 255 x)
256 256
257(defun rx--normalise-or-arg (form)
258 "Normalise the `or' argument FORM.
259Characters become strings, user-definitions and `eval' forms are expanded,
260and `or' forms are normalised recursively."
261 (cond ((characterp form)
262 (char-to-string form))
263 ((and (consp form) (memq (car form) '(or |)))
264 (cons (car form) (mapcar #'rx--normalise-or-arg (cdr form))))
265 ((and (consp form) (eq (car form) 'eval))
266 (rx--normalise-or-arg (rx--expand-eval (cdr form))))
267 (t
268 (let ((expanded (rx--expand-def form)))
269 (if expanded
270 (rx--normalise-or-arg expanded)
271 form)))))
272
273(defun rx--all-string-or-args (body)
274 "If BODY only consists of strings or such `or' forms, return all the strings.
275Otherwise throw `rx--nonstring'."
276 (mapcan (lambda (form)
277 (cond ((stringp form) (list form))
278 ((and (consp form) (memq (car form) '(or |)))
279 (rx--all-string-or-args (cdr form)))
280 (t (throw 'rx--nonstring nil))))
281 body))
282
257(defun rx--translate-or (body) 283(defun rx--translate-or (body)
258 "Translate an or-pattern of zero or more rx items. 284 "Translate an or-pattern of zero or more rx items.
259Return (REGEXP . PRECEDENCE)." 285Return (REGEXP . PRECEDENCE)."
260 ;; FIXME: Possible improvements: 286 ;; FIXME: Possible improvements:
261 ;; 287 ;;
262 ;; - Turn single characters to strings: (or ?a ?b) -> (or "a" "b"),
263 ;; so that they can be candidates for regexp-opt.
264 ;;
265 ;; - Translate compile-time strings (`eval' forms), again for regexp-opt.
266 ;;
267 ;; - Flatten sub-patterns first: (or (or A B) (or C D)) -> (or A B C D) 288 ;; - Flatten sub-patterns first: (or (or A B) (or C D)) -> (or A B C D)
268 ;; in order to improve effectiveness of regexp-opt. 289 ;; Then call regexp-opt on runs of string arguments. Example:
269 ;; This would also help composability.
270 ;;
271 ;; - Use associativity to run regexp-opt on contiguous subsets of arguments
272 ;; if not all of them are strings. Example:
273 ;; (or (+ digit) "CHARLIE" "CHAN" (+ blank)) 290 ;; (or (+ digit) "CHARLIE" "CHAN" (+ blank))
274 ;; -> (or (+ digit) (or "CHARLIE" "CHAN") (+ blank)) 291 ;; -> (or (+ digit) (or "CHARLIE" "CHAN") (+ blank))
275 ;; 292 ;;
@@ -279,27 +296,26 @@ Return (REGEXP . PRECEDENCE)."
279 ;; so that (or "@" "%" digit (any "A-Z" space) (syntax word)) 296 ;; so that (or "@" "%" digit (any "A-Z" space) (syntax word))
280 ;; -> (any "@" "%" digit "A-Z" space word) 297 ;; -> (any "@" "%" digit "A-Z" space word)
281 ;; -> "[A-Z@%[:digit:][:space:][:word:]]" 298 ;; -> "[A-Z@%[:digit:][:space:][:word:]]"
282 ;;
283 ;; Problem: If a subpattern is carefully written to be
284 ;; optimizable by regexp-opt, how do we prevent the transforms
285 ;; above from destroying that property?
286 ;; Example: (or "a" (or "abc" "abd" "abe"))
287 (cond 299 (cond
288 ((null body) ; No items: a never-matching regexp. 300 ((null body) ; No items: a never-matching regexp.
289 (rx--empty)) 301 (rx--empty))
290 ((null (cdr body)) ; Single item. 302 ((null (cdr body)) ; Single item.
291 (rx--translate (car body))) 303 (rx--translate (car body)))
292 ((rx--every #'stringp body) ; All strings.
293 (cons (list (regexp-opt body nil))
294 t))
295 ((rx--every #'rx--charset-p body) ; All charsets.
296 (rx--translate-union nil body))
297 (t 304 (t
298 (cons (append (car (rx--translate (car body))) 305 (let* ((args (mapcar #'rx--normalise-or-arg body))
299 (mapcan (lambda (item) 306 (all-strings (catch 'rx--nonstring (rx--all-string-or-args args))))
300 (cons "\\|" (car (rx--translate item)))) 307 (cond
301 (cdr body))) 308 (all-strings ; Only strings.
302 nil)))) 309 (cons (list (regexp-opt all-strings nil))
310 t))
311 ((rx--every #'rx--charset-p args) ; All charsets.
312 (rx--translate-union nil args))
313 (t
314 (cons (append (car (rx--translate (car args)))
315 (mapcan (lambda (item)
316 (cons "\\|" (car (rx--translate item))))
317 (cdr args)))
318 nil)))))))
303 319
304(defun rx--charset-p (form) 320(defun rx--charset-p (form)
305 "Whether FORM looks like a charset, only consisting of character intervals 321 "Whether FORM looks like a charset, only consisting of character intervals
@@ -840,11 +856,15 @@ Return (REGEXP . PRECEDENCE)."
840 (cons (list (list 'regexp-quote arg)) 'seq)) 856 (cons (list (list 'regexp-quote arg)) 'seq))
841 (t (error "rx `literal' form with non-string argument"))))) 857 (t (error "rx `literal' form with non-string argument")))))
842 858
843(defun rx--translate-eval (body) 859(defun rx--expand-eval (body)
844 "Translate the `eval' form. Return (REGEXP . PRECEDENCE)." 860 "Expand `eval' arguments. Return a new rx form."
845 (unless (and body (null (cdr body))) 861 (unless (and body (null (cdr body)))
846 (error "rx `eval' form takes exactly one argument")) 862 (error "rx `eval' form takes exactly one argument"))
847 (rx--translate (eval (car body)))) 863 (eval (car body)))
864
865(defun rx--translate-eval (body)
866 "Translate the `eval' form. Return (REGEXP . PRECEDENCE)."
867 (rx--translate (rx--expand-eval body)))
848 868
849(defvar rx--regexp-atomic-regexp nil) 869(defvar rx--regexp-atomic-regexp nil)
850 870