diff options
| author | Mattias EngdegÄrd | 2019-12-12 23:04:00 +0100 |
|---|---|---|
| committer | Mattias EngdegÄrd | 2019-12-12 23:47:25 +0100 |
| commit | f16766a0eb2a78b58a4856d31306fc37f913d70e (patch) | |
| tree | d3be560c8aaf4f4d3a59b285e27aab224922bb33 /lisp | |
| parent | d7efe98951730842db4fc136e3b631c5ee0d8a53 (diff) | |
| download | emacs-f16766a0eb2a78b58a4856d31306fc37f913d70e.tar.gz emacs-f16766a0eb2a78b58a4856d31306fc37f913d70e.zip | |
Use `or' instead of `union' for charset union in rx
Design change suggested by Stefan Monnier.
* doc/lispref/searching.texi (Rx Constructs):
* etc/NEWS: Document.
* lisp/emacs-lisp/rx.el (rx--translate-or): Detect charset arguments.
(rx--charset-p): New.
(rx--translate-not, rx--charset-intervals, rx--translate-union):
Change from `union' to `or'.
(rx--translate-form, rx--builtin-forms, rx): Remove `union'.
* test/lisp/emacs-lisp/rx-tests.el (rx-union, rx-def-in-union)
(rx-intersection): Rename tests and change `union' to `or' and `|'.
Diffstat (limited to 'lisp')
| -rw-r--r-- | lisp/emacs-lisp/rx.el | 41 |
1 files changed, 26 insertions, 15 deletions
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el index d4b21c3c9ad..a5cab1db888 100644 --- a/lisp/emacs-lisp/rx.el +++ b/lisp/emacs-lisp/rx.el | |||
| @@ -273,10 +273,8 @@ Return (REGEXP . PRECEDENCE)." | |||
| 273 | ;; (or (+ digit) "CHARLIE" "CHAN" (+ blank)) | 273 | ;; (or (+ digit) "CHARLIE" "CHAN" (+ blank)) |
| 274 | ;; -> (or (+ digit) (or "CHARLIE" "CHAN") (+ blank)) | 274 | ;; -> (or (+ digit) (or "CHARLIE" "CHAN") (+ blank)) |
| 275 | ;; | 275 | ;; |
| 276 | ;; - Fuse patterns into a single character alternative if they fit. | 276 | ;; - Optimise single-character alternatives better: |
| 277 | ;; regexp-opt will do that if all are strings, but we want to do that for: | 277 | ;; * classes: space, alpha, ... |
| 278 | ;; * symbols that expand to classes: space, alpha, ... | ||
| 279 | ;; * character alternatives: (any ...) | ||
| 280 | ;; * (syntax S), for some S (whitespace, word) | 278 | ;; * (syntax S), for some S (whitespace, word) |
| 281 | ;; so that (or "@" "%" digit (any "A-Z" space) (syntax word)) | 279 | ;; so that (or "@" "%" digit (any "A-Z" space) (syntax word)) |
| 282 | ;; -> (any "@" "%" digit "A-Z" space word) | 280 | ;; -> (any "@" "%" digit "A-Z" space word) |
| @@ -294,6 +292,8 @@ Return (REGEXP . PRECEDENCE)." | |||
| 294 | ((rx--every #'stringp body) ; All strings. | 292 | ((rx--every #'stringp body) ; All strings. |
| 295 | (cons (list (regexp-opt body nil t)) | 293 | (cons (list (regexp-opt body nil t)) |
| 296 | t)) | 294 | t)) |
| 295 | ((rx--every #'rx--charset-p body) ; All charsets. | ||
| 296 | (rx--translate-union nil body)) | ||
| 297 | (t | 297 | (t |
| 298 | (cons (append (car (rx--translate (car body))) | 298 | (cons (append (car (rx--translate (car body))) |
| 299 | (mapcan (lambda (item) | 299 | (mapcan (lambda (item) |
| @@ -301,6 +301,19 @@ Return (REGEXP . PRECEDENCE)." | |||
| 301 | (cdr body))) | 301 | (cdr body))) |
| 302 | nil)))) | 302 | nil)))) |
| 303 | 303 | ||
| 304 | (defun rx--charset-p (form) | ||
| 305 | "Whether FORM looks like a charset, only consisting of character intervals | ||
| 306 | and set operations." | ||
| 307 | (or (and (consp form) | ||
| 308 | (or (and (memq (car form) '(any 'in 'char)) | ||
| 309 | (rx--every (lambda (x) (not (symbolp x))) (cdr form))) | ||
| 310 | (and (memq (car form) '(not or | intersection)) | ||
| 311 | (rx--every #'rx--charset-p (cdr form))))) | ||
| 312 | (and (or (symbolp form) (consp form)) | ||
| 313 | (let ((expanded (rx--expand-def form))) | ||
| 314 | (and expanded | ||
| 315 | (rx--charset-p expanded)))))) | ||
| 316 | |||
| 304 | (defun rx--string-to-intervals (str) | 317 | (defun rx--string-to-intervals (str) |
| 305 | "Decode STR as intervals: A-Z becomes (?A . ?Z), and the single | 318 | "Decode STR as intervals: A-Z becomes (?A . ?Z), and the single |
| 306 | character X becomes (?X . ?X). Return the intervals in a list." | 319 | character X becomes (?X . ?X). Return the intervals in a list." |
| @@ -477,7 +490,7 @@ If NEGATED, negate the sense." | |||
| 477 | (not negated) (rx--complement-intervals intervals) nil))) | 490 | (not negated) (rx--complement-intervals intervals) nil))) |
| 478 | 491 | ||
| 479 | ;; FIXME: Consider turning `not' into a variadic operator, following SRE: | 492 | ;; FIXME: Consider turning `not' into a variadic operator, following SRE: |
| 480 | ;; (not A B) = (not (union A B)) = (intersection (not A) (not B)), and | 493 | ;; (not A B) = (not (or A B)) = (intersection (not A) (not B)), and |
| 481 | ;; (not) = anychar. | 494 | ;; (not) = anychar. |
| 482 | ;; Maybe allow singleton characters as arguments. | 495 | ;; Maybe allow singleton characters as arguments. |
| 483 | 496 | ||
| @@ -498,7 +511,7 @@ If NEGATED, negate the sense (thus making it positive)." | |||
| 498 | (rx--translate-category (not negated) (cdr arg))) | 511 | (rx--translate-category (not negated) (cdr arg))) |
| 499 | ('not | 512 | ('not |
| 500 | (rx--translate-not (not negated) (cdr arg))) | 513 | (rx--translate-not (not negated) (cdr arg))) |
| 501 | ('union | 514 | ((or 'or '|) |
| 502 | (rx--translate-union (not negated) (cdr arg))) | 515 | (rx--translate-union (not negated) (cdr arg))) |
| 503 | ('intersection | 516 | ('intersection |
| 504 | (rx--translate-intersection (not negated) (cdr arg)))))) | 517 | (rx--translate-intersection (not negated) (cdr arg)))))) |
| @@ -558,7 +571,7 @@ If NEGATED, negate the sense (thus making it positive)." | |||
| 558 | (defun rx--charset-intervals (charset) | 571 | (defun rx--charset-intervals (charset) |
| 559 | "Return a sorted list of non-adjacent disjoint intervals from CHARSET. | 572 | "Return a sorted list of non-adjacent disjoint intervals from CHARSET. |
| 560 | CHARSET is any expression allowed in a character set expression: | 573 | CHARSET is any expression allowed in a character set expression: |
| 561 | either `any' (no classes permitted), or `not', `union' or `intersection' | 574 | either `any' (no classes permitted), or `not', `or' or `intersection' |
| 562 | forms whose arguments are charsets." | 575 | forms whose arguments are charsets." |
| 563 | (pcase charset | 576 | (pcase charset |
| 564 | (`(,(or 'any 'in 'char) . ,body) | 577 | (`(,(or 'any 'in 'char) . ,body) |
| @@ -569,8 +582,8 @@ forms whose arguments are charsets." | |||
| 569 | (cadr parsed))) | 582 | (cadr parsed))) |
| 570 | (car parsed))) | 583 | (car parsed))) |
| 571 | (`(not ,x) (rx--complement-intervals (rx--charset-intervals x))) | 584 | (`(not ,x) (rx--complement-intervals (rx--charset-intervals x))) |
| 572 | (`(union . ,xs) (rx--charset-union xs)) | 585 | (`(,(or 'or '|) . ,body) (rx--charset-union body)) |
| 573 | (`(intersection . ,xs) (rx--charset-intersection xs)) | 586 | (`(intersection . ,body) (rx--charset-intersection body)) |
| 574 | (_ (let ((expanded (rx--expand-def charset))) | 587 | (_ (let ((expanded (rx--expand-def charset))) |
| 575 | (if expanded | 588 | (if expanded |
| 576 | (rx--charset-intervals expanded) | 589 | (rx--charset-intervals expanded) |
| @@ -589,7 +602,7 @@ forms whose arguments are charsets." | |||
| 589 | (mapcar #'rx--charset-intervals charsets))) | 602 | (mapcar #'rx--charset-intervals charsets))) |
| 590 | 603 | ||
| 591 | (defun rx--translate-union (negated body) | 604 | (defun rx--translate-union (negated body) |
| 592 | "Translate a (union ...) construct. Return (REGEXP . PRECEDENCE). | 605 | "Translate an (or ...) construct of charsets. Return (REGEXP . PRECEDENCE). |
| 593 | If NEGATED, negate the sense." | 606 | If NEGATED, negate the sense." |
| 594 | (rx--intervals-to-alt negated (rx--charset-union body))) | 607 | (rx--intervals-to-alt negated (rx--charset-union body))) |
| 595 | 608 | ||
| @@ -976,7 +989,6 @@ can expand to any number of values." | |||
| 976 | ((or 'any 'in 'char) (rx--translate-any nil body)) | 989 | ((or 'any 'in 'char) (rx--translate-any nil body)) |
| 977 | ('not-char (rx--translate-any t body)) | 990 | ('not-char (rx--translate-any t body)) |
| 978 | ('not (rx--translate-not nil body)) | 991 | ('not (rx--translate-not nil body)) |
| 979 | ('union (rx--translate-union nil body)) | ||
| 980 | ('intersection (rx--translate-intersection nil body)) | 992 | ('intersection (rx--translate-intersection nil body)) |
| 981 | 993 | ||
| 982 | ('repeat (rx--translate-repeat body)) | 994 | ('repeat (rx--translate-repeat body)) |
| @@ -1036,7 +1048,7 @@ can expand to any number of values." | |||
| 1036 | (t (error "Unknown rx form `%s'" op))))))) | 1048 | (t (error "Unknown rx form `%s'" op))))))) |
| 1037 | 1049 | ||
| 1038 | (defconst rx--builtin-forms | 1050 | (defconst rx--builtin-forms |
| 1039 | '(seq sequence : and or | any in char not-char not union intersection | 1051 | '(seq sequence : and or | any in char not-char not intersection |
| 1040 | repeat = >= ** | 1052 | repeat = >= ** |
| 1041 | zero-or-more 0+ * | 1053 | zero-or-more 0+ * |
| 1042 | one-or-more 1+ + | 1054 | one-or-more 1+ + |
| @@ -1149,11 +1161,10 @@ CHAR Match a literal character. | |||
| 1149 | character, a string, a range as string \"A-Z\" or cons | 1161 | character, a string, a range as string \"A-Z\" or cons |
| 1150 | (?A . ?Z), or a character class (see below). Alias: in, char. | 1162 | (?A . ?Z), or a character class (see below). Alias: in, char. |
| 1151 | (not CHARSPEC) Match one character not matched by CHARSPEC. CHARSPEC | 1163 | (not CHARSPEC) Match one character not matched by CHARSPEC. CHARSPEC |
| 1152 | can be (any ...), (union ...), (intersection ...), | 1164 | can be (any ...), (or ...), (intersection ...), |
| 1153 | (syntax ...), (category ...), or a character class. | 1165 | (syntax ...), (category ...), or a character class. |
| 1154 | (union CHARSET...) Union of CHARSETs. | ||
| 1155 | (intersection CHARSET...) Intersection of CHARSETs. | 1166 | (intersection CHARSET...) Intersection of CHARSETs. |
| 1156 | CHARSET is (any...), (not...), (union...) or (intersection...). | 1167 | CHARSET is (any...), (not...), (or...) or (intersection...). |
| 1157 | not-newline Match any character except a newline. Alias: nonl. | 1168 | not-newline Match any character except a newline. Alias: nonl. |
| 1158 | anychar Match any character. Alias: anything. | 1169 | anychar Match any character. Alias: anything. |
| 1159 | unmatchable Never match anything at all. | 1170 | unmatchable Never match anything at all. |