diff options
| author | Mattias Engdegård | 2019-12-06 22:23:57 +0100 |
|---|---|---|
| committer | Mattias Engdegård | 2019-12-10 22:37:47 +0100 |
| commit | ea93326cc046cb1beb7535cdf6d69b216b767685 (patch) | |
| tree | 5494bab812fdaf3f7a2997cb70926bdf6ec13993 | |
| parent | 9546a2a0d6653a7d930cda722f5babbebb0a1d0c (diff) | |
| download | emacs-ea93326cc046cb1beb7535cdf6d69b216b767685.tar.gz emacs-ea93326cc046cb1beb7535cdf6d69b216b767685.zip | |
Add `union' and `intersection' to rx (bug#37849)
These character set operations, together with `not' for set
complement, improve the compositionality of rx, and reduce duplication
in complicated cases. Named character classes are not permitted in
set operations.
* lisp/emacs-lisp/rx.el (rx--translate-any): Split into multiple
functions.
(rx--foldl, rx--parse-any, rx--generate-alt, rx--intervals-to-alt)
(rx--complement-intervals, rx--intersect-intervals)
(rx--union-intervals, rx--charset-intervals, rx--charset-union)
(rx--charset-all, rx--charset-intersection, rx--translate-union)
(rx--translate-intersection): New.
(rx--translate-not, rx--translate-form, rx--builtin-forms, rx):
Add `union' and `intersection'.
* test/lisp/emacs-lisp/rx-tests.el (rx-union ,rx-def-in-union)
(rx-intersection, rx-def-in-intersection): New tests.
* doc/lispref/searching.texi (Rx Constructs):
* etc/NEWS:
Document `union' and `intersection'.
| -rw-r--r-- | doc/lispref/searching.texi | 14 | ||||
| -rw-r--r-- | etc/NEWS | 7 | ||||
| -rw-r--r-- | lisp/emacs-lisp/rx.el | 309 | ||||
| -rw-r--r-- | test/lisp/emacs-lisp/rx-tests.el | 57 |
4 files changed, 289 insertions, 98 deletions
diff --git a/doc/lispref/searching.texi b/doc/lispref/searching.texi index 0cb30010c5e..5bf3c5b067f 100644 --- a/doc/lispref/searching.texi +++ b/doc/lispref/searching.texi | |||
| @@ -1214,11 +1214,21 @@ Corresponding string regexp: @samp{[@dots{}]} | |||
| 1214 | @item @code{(not @var{charspec})} | 1214 | @item @code{(not @var{charspec})} |
| 1215 | @cindex @code{not} in rx | 1215 | @cindex @code{not} in rx |
| 1216 | Match a character not included in @var{charspec}. @var{charspec} can | 1216 | Match a character not included in @var{charspec}. @var{charspec} can |
| 1217 | be an @code{any}, @code{not}, @code{syntax} or @code{category} form, or a | 1217 | be an @code{any}, @code{not}, @code{union}, @code{intersection}, |
| 1218 | character class.@* | 1218 | @code{syntax} or @code{category} form, or a character class.@* |
| 1219 | Corresponding string regexp: @samp{[^@dots{}]}, @samp{\S@var{code}}, | 1219 | Corresponding string regexp: @samp{[^@dots{}]}, @samp{\S@var{code}}, |
| 1220 | @samp{\C@var{code}} | 1220 | @samp{\C@var{code}} |
| 1221 | 1221 | ||
| 1222 | @item @code{(union @var{charset}@dots{})} | ||
| 1223 | @itemx @code{(intersection @var{charset}@dots{})} | ||
| 1224 | @cindex @code{union} in rx | ||
| 1225 | @cindex @code{intersection} in rx | ||
| 1226 | Match a character that matches the union or intersection, | ||
| 1227 | respectively, of the @var{charset}s. Each @var{charset} can be an | ||
| 1228 | @code{any} form without character classes, or a @code{union}, | ||
| 1229 | @code{intersection} or @code{not} form whose arguments are also | ||
| 1230 | @var{charset}s. | ||
| 1231 | |||
| 1222 | @item @code{not-newline}, @code{nonl} | 1232 | @item @code{not-newline}, @code{nonl} |
| 1223 | @cindex @code{not-newline} in rx | 1233 | @cindex @code{not-newline} in rx |
| 1224 | @cindex @code{nonl} in rx | 1234 | @cindex @code{nonl} in rx |
| @@ -2110,9 +2110,14 @@ at run time, instead of a constant string. | |||
| 2110 | These macros add new forms to the rx notation. | 2110 | These macros add new forms to the rx notation. |
| 2111 | 2111 | ||
| 2112 | +++ | 2112 | +++ |
| 2113 | *** 'anychar' is now an alias for 'anything' | 2113 | *** 'anychar' is now an alias for 'anything'. |
| 2114 | Both match any single character; 'anychar' is more descriptive. | 2114 | Both match any single character; 'anychar' is more descriptive. |
| 2115 | 2115 | ||
| 2116 | +++ | ||
| 2117 | *** New 'union' and 'intersection' forms for character sets. | ||
| 2118 | These permit composing character-matching expressions from simpler | ||
| 2119 | parts. | ||
| 2120 | |||
| 2116 | ** Frames | 2121 | ** Frames |
| 2117 | 2122 | ||
| 2118 | +++ | 2123 | +++ |
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el index a92c613b9aa..d4b21c3c9ad 100644 --- a/lisp/emacs-lisp/rx.el +++ b/lisp/emacs-lisp/rx.el | |||
| @@ -246,6 +246,14 @@ Return (REGEXP . PRECEDENCE)." | |||
| 246 | (setq list (cdr list))) | 246 | (setq list (cdr list))) |
| 247 | (null list)) | 247 | (null list)) |
| 248 | 248 | ||
| 249 | (defun rx--foldl (f x l) | ||
| 250 | "(F (F (F X L0) L1) L2) ... | ||
| 251 | Left-fold the list L, starting with X, by the binary function F." | ||
| 252 | (while l | ||
| 253 | (setq x (funcall f x (car l))) | ||
| 254 | (setq l (cdr l))) | ||
| 255 | x) | ||
| 256 | |||
| 249 | (defun rx--translate-or (body) | 257 | (defun rx--translate-or (body) |
| 250 | "Translate an or-pattern of zero or more rx items. | 258 | "Translate an or-pattern of zero or more rx items. |
| 251 | Return (REGEXP . PRECEDENCE)." | 259 | Return (REGEXP . PRECEDENCE)." |
| @@ -343,22 +351,11 @@ INTERVALS is a list of (START . END) with START ≤ END, sorted by START." | |||
| 343 | (setq tail d))) | 351 | (setq tail d))) |
| 344 | intervals)) | 352 | intervals)) |
| 345 | 353 | ||
| 346 | ;; FIXME: Consider expanding definitions inside (any ...) and (not ...), | 354 | (defun rx--parse-any (body) |
| 347 | ;; and perhaps allow (any ...) inside (any ...). | 355 | "Parse arguments of an (any ...) construct. |
| 348 | ;; It would be benefit composability (build a character alternative by pieces) | 356 | Return (INTERVALS . CLASSES), where INTERVALS is a sorted list of |
| 349 | ;; and be handy for obtaining the complement of a defined set of | 357 | disjoint intervals (each a cons of chars), and CLASSES |
| 350 | ;; characters. (See, for example, python.el:421, `not-simple-operator'.) | 358 | a list of named character classes in the order they occur in BODY." |
| 351 | ;; (Expansion in other non-rx positions is probably not a good idea: | ||
| 352 | ;; syntax, category, backref, and the integer parameters of group-n, | ||
| 353 | ;; =, >=, **, repeat) | ||
| 354 | ;; Similar effect could be attained by ensuring that | ||
| 355 | ;; (or (any X) (any Y)) -> (any X Y), and find a way to compose negative | ||
| 356 | ;; sets. `and' is taken, but we could add | ||
| 357 | ;; (intersection (not (any X)) (not (any Y))) -> (not (any X Y)). | ||
| 358 | |||
| 359 | (defun rx--translate-any (negated body) | ||
| 360 | "Translate an (any ...) construct. Return (REGEXP . PRECEDENCE). | ||
| 361 | If NEGATED, negate the sense." | ||
| 362 | (let ((classes nil) | 359 | (let ((classes nil) |
| 363 | (strings nil) | 360 | (strings nil) |
| 364 | (conses nil)) | 361 | (conses nil)) |
| @@ -380,81 +377,109 @@ If NEGATED, negate the sense." | |||
| 380 | (or (memq class classes) | 377 | (or (memq class classes) |
| 381 | (progn (push class classes) t)))))) | 378 | (progn (push class classes) t)))))) |
| 382 | (t (error "Invalid rx `any' argument: %s" arg)))) | 379 | (t (error "Invalid rx `any' argument: %s" arg)))) |
| 383 | (let ((items | 380 | (cons (rx--condense-intervals |
| 384 | ;; Translate strings and conses into nonoverlapping intervals, | 381 | (sort (append conses |
| 385 | ;; and add classes as symbols at the end. | 382 | (mapcan #'rx--string-to-intervals strings)) |
| 386 | (append | 383 | #'car-less-than-car)) |
| 387 | (rx--condense-intervals | 384 | (reverse classes)))) |
| 388 | (sort (append conses | 385 | |
| 389 | (mapcan #'rx--string-to-intervals strings)) | 386 | (defun rx--generate-alt (negated intervals classes) |
| 390 | #'car-less-than-car)) | 387 | "Generate a character alternative. Return (REGEXP . PRECEDENCE). |
| 391 | (reverse classes)))) | 388 | If NEGATED is non-nil, negate the result; INTERVALS is a sorted |
| 392 | 389 | list of disjoint intervals and CLASSES a list of named character | |
| 393 | ;; Move lone ] and range ]-x to the start. | 390 | classes." |
| 394 | (let ((rbrac-l (assq ?\] items))) | 391 | (let ((items (append intervals classes))) |
| 395 | (when rbrac-l | 392 | ;; Move lone ] and range ]-x to the start. |
| 396 | (setq items (cons rbrac-l (delq rbrac-l items))))) | 393 | (let ((rbrac-l (assq ?\] items))) |
| 397 | 394 | (when rbrac-l | |
| 398 | ;; Split x-] and move the lone ] to the start. | 395 | (setq items (cons rbrac-l (delq rbrac-l items))))) |
| 399 | (let ((rbrac-r (rassq ?\] items))) | 396 | |
| 400 | (when (and rbrac-r (not (eq (car rbrac-r) ?\]))) | 397 | ;; Split x-] and move the lone ] to the start. |
| 401 | (setcdr rbrac-r ?\\) | 398 | (let ((rbrac-r (rassq ?\] items))) |
| 402 | (setq items (cons '(?\] . ?\]) items)))) | 399 | (when (and rbrac-r (not (eq (car rbrac-r) ?\]))) |
| 403 | 400 | (setcdr rbrac-r ?\\) | |
| 404 | ;; Split ,-- (which would end up as ,- otherwise). | 401 | (setq items (cons '(?\] . ?\]) items)))) |
| 405 | (let ((dash-r (rassq ?- items))) | 402 | |
| 406 | (when (eq (car dash-r) ?,) | 403 | ;; Split ,-- (which would end up as ,- otherwise). |
| 407 | (setcdr dash-r ?,) | 404 | (let ((dash-r (rassq ?- items))) |
| 408 | (setq items (nconc items '((?- . ?-)))))) | 405 | (when (eq (car dash-r) ?,) |
| 409 | 406 | (setcdr dash-r ?,) | |
| 410 | ;; Remove - (lone or at start of interval) | 407 | (setq items (nconc items '((?- . ?-)))))) |
| 411 | (let ((dash-l (assq ?- items))) | 408 | |
| 412 | (when dash-l | 409 | ;; Remove - (lone or at start of interval) |
| 413 | (if (eq (cdr dash-l) ?-) | 410 | (let ((dash-l (assq ?- items))) |
| 414 | (setq items (delq dash-l items)) ; Remove lone - | 411 | (when dash-l |
| 415 | (setcar dash-l ?.)) ; Reduce --x to .-x | 412 | (if (eq (cdr dash-l) ?-) |
| 416 | (setq items (nconc items '((?- . ?-)))))) | 413 | (setq items (delq dash-l items)) ; Remove lone - |
| 417 | 414 | (setcar dash-l ?.)) ; Reduce --x to .-x | |
| 418 | ;; Deal with leading ^ and range ^-x. | 415 | (setq items (nconc items '((?- . ?-)))))) |
| 419 | (when (and (consp (car items)) | 416 | |
| 420 | (eq (caar items) ?^) | 417 | ;; Deal with leading ^ and range ^-x. |
| 421 | (cdr items)) | 418 | (when (and (consp (car items)) |
| 422 | ;; Move ^ and ^-x to second place. | 419 | (eq (caar items) ?^) |
| 423 | (setq items (cons (cadr items) | 420 | (cdr items)) |
| 424 | (cons (car items) (cddr items))))) | 421 | ;; Move ^ and ^-x to second place. |
| 422 | (setq items (cons (cadr items) | ||
| 423 | (cons (car items) (cddr items))))) | ||
| 425 | 424 | ||
| 426 | (cond | 425 | (cond |
| 427 | ;; Empty set: if negated, any char, otherwise match-nothing. | 426 | ;; Empty set: if negated, any char, otherwise match-nothing. |
| 428 | ((null items) | 427 | ((null items) |
| 429 | (if negated | 428 | (if negated |
| 430 | (rx--translate-symbol 'anything) | 429 | (rx--translate-symbol 'anything) |
| 431 | (rx--empty))) | 430 | (rx--empty))) |
| 432 | ;; Single non-negated character. | 431 | ;; Single non-negated character. |
| 433 | ((and (null (cdr items)) | 432 | ((and (null (cdr items)) |
| 434 | (consp (car items)) | 433 | (consp (car items)) |
| 435 | (eq (caar items) (cdar items)) | 434 | (eq (caar items) (cdar items)) |
| 436 | (not negated)) | 435 | (not negated)) |
| 437 | (cons (list (regexp-quote (char-to-string (caar items)))) | 436 | (cons (list (regexp-quote (char-to-string (caar items)))) |
| 438 | t)) | 437 | t)) |
| 439 | ;; At least one character or class, possibly negated. | 438 | ;; At least one character or class, possibly negated. |
| 440 | (t | 439 | (t |
| 441 | (cons | 440 | (cons |
| 442 | (list | 441 | (list |
| 443 | (concat | 442 | (concat |
| 444 | "[" | 443 | "[" |
| 445 | (and negated "^") | 444 | (and negated "^") |
| 446 | (mapconcat (lambda (item) | 445 | (mapconcat (lambda (item) |
| 447 | (cond ((symbolp item) | 446 | (cond ((symbolp item) |
| 448 | (format "[:%s:]" item)) | 447 | (format "[:%s:]" item)) |
| 449 | ((eq (car item) (cdr item)) | 448 | ((eq (car item) (cdr item)) |
| 450 | (char-to-string (car item))) | 449 | (char-to-string (car item))) |
| 451 | ((eq (1+ (car item)) (cdr item)) | 450 | ((eq (1+ (car item)) (cdr item)) |
| 452 | (string (car item) (cdr item))) | 451 | (string (car item) (cdr item))) |
| 453 | (t | 452 | (t |
| 454 | (string (car item) ?- (cdr item))))) | 453 | (string (car item) ?- (cdr item))))) |
| 455 | items nil) | 454 | items nil) |
| 456 | "]")) | 455 | "]")) |
| 457 | t)))))) | 456 | t))))) |
| 457 | |||
| 458 | (defun rx--translate-any (negated body) | ||
| 459 | "Translate an (any ...) construct. Return (REGEXP . PRECEDENCE). | ||
| 460 | If NEGATED, negate the sense." | ||
| 461 | (let ((parsed (rx--parse-any body))) | ||
| 462 | (rx--generate-alt negated (car parsed) (cdr parsed)))) | ||
| 463 | |||
| 464 | (defun rx--intervals-to-alt (negated intervals) | ||
| 465 | "Generate a character alternative from an interval set. | ||
| 466 | Return (REGEXP . PRECEDENCE). | ||
| 467 | INTERVALS is a sorted list of disjoint intervals. | ||
| 468 | If NEGATED, negate the sense." | ||
| 469 | ;; Detect whether the interval set is better described in | ||
| 470 | ;; complemented form. This is not just a matter of aesthetics: any | ||
| 471 | ;; range from ASCII to raw bytes will automatically exclude the | ||
| 472 | ;; entire non-ASCII Unicode range by the regexp engine. | ||
| 473 | (if (rx--every (lambda (iv) (not (<= (car iv) #x3ffeff (cdr iv)))) | ||
| 474 | intervals) | ||
| 475 | (rx--generate-alt negated intervals nil) | ||
| 476 | (rx--generate-alt | ||
| 477 | (not negated) (rx--complement-intervals intervals) nil))) | ||
| 478 | |||
| 479 | ;; FIXME: Consider turning `not' into a variadic operator, following SRE: | ||
| 480 | ;; (not A B) = (not (union A B)) = (intersection (not A) (not B)), and | ||
| 481 | ;; (not) = anychar. | ||
| 482 | ;; Maybe allow singleton characters as arguments. | ||
| 458 | 483 | ||
| 459 | (defun rx--translate-not (negated body) | 484 | (defun rx--translate-not (negated body) |
| 460 | "Translate a (not ...) construct. Return (REGEXP . PRECEDENCE). | 485 | "Translate a (not ...) construct. Return (REGEXP . PRECEDENCE). |
| @@ -472,10 +497,14 @@ If NEGATED, negate the sense (thus making it positive)." | |||
| 472 | ('category | 497 | ('category |
| 473 | (rx--translate-category (not negated) (cdr arg))) | 498 | (rx--translate-category (not negated) (cdr arg))) |
| 474 | ('not | 499 | ('not |
| 475 | (rx--translate-not (not negated) (cdr arg)))))) | 500 | (rx--translate-not (not negated) (cdr arg))) |
| 501 | ('union | ||
| 502 | (rx--translate-union (not negated) (cdr arg))) | ||
| 503 | ('intersection | ||
| 504 | (rx--translate-intersection (not negated) (cdr arg)))))) | ||
| 476 | ((let ((class (cdr (assq arg rx--char-classes)))) | 505 | ((let ((class (cdr (assq arg rx--char-classes)))) |
| 477 | (and class | 506 | (and class |
| 478 | (rx--translate-any (not negated) (list class))))) | 507 | (rx--generate-alt (not negated) nil (list class))))) |
| 479 | ((eq arg 'word-boundary) | 508 | ((eq arg 'word-boundary) |
| 480 | (rx--translate-symbol | 509 | (rx--translate-symbol |
| 481 | (if negated 'word-boundary 'not-word-boundary))) | 510 | (if negated 'word-boundary 'not-word-boundary))) |
| @@ -484,6 +513,91 @@ If NEGATED, negate the sense (thus making it positive)." | |||
| 484 | (rx--translate-not negated (list expanded))))) | 513 | (rx--translate-not negated (list expanded))))) |
| 485 | (t (error "Illegal argument to rx `not': %S" arg))))) | 514 | (t (error "Illegal argument to rx `not': %S" arg))))) |
| 486 | 515 | ||
| 516 | (defun rx--complement-intervals (intervals) | ||
| 517 | "Complement of the interval list INTERVALS." | ||
| 518 | (let ((compl nil) | ||
| 519 | (c 0)) | ||
| 520 | (dolist (iv intervals) | ||
| 521 | (when (< c (car iv)) | ||
| 522 | (push (cons c (1- (car iv))) compl)) | ||
| 523 | (setq c (1+ (cdr iv)))) | ||
| 524 | (when (< c (max-char)) | ||
| 525 | (push (cons c (max-char)) compl)) | ||
| 526 | (nreverse compl))) | ||
| 527 | |||
| 528 | (defun rx--intersect-intervals (ivs-a ivs-b) | ||
| 529 | "Intersection of the interval lists IVS-A and IVS-B." | ||
| 530 | (let ((isect nil)) | ||
| 531 | (while (and ivs-a ivs-b) | ||
| 532 | (let ((a (car ivs-a)) | ||
| 533 | (b (car ivs-b))) | ||
| 534 | (cond | ||
| 535 | ((< (cdr a) (car b)) (setq ivs-a (cdr ivs-a))) | ||
| 536 | ((> (car a) (cdr b)) (setq ivs-b (cdr ivs-b))) | ||
| 537 | (t | ||
| 538 | (push (cons (max (car a) (car b)) | ||
| 539 | (min (cdr a) (cdr b))) | ||
| 540 | isect) | ||
| 541 | (setq ivs-a (cdr ivs-a)) | ||
| 542 | (setq ivs-b (cdr ivs-b)) | ||
| 543 | (cond ((< (cdr a) (cdr b)) | ||
| 544 | (push (cons (1+ (cdr a)) (cdr b)) | ||
| 545 | ivs-b)) | ||
| 546 | ((> (cdr a) (cdr b)) | ||
| 547 | (push (cons (1+ (cdr b)) (cdr a)) | ||
| 548 | ivs-a))))))) | ||
| 549 | (nreverse isect))) | ||
| 550 | |||
| 551 | (defun rx--union-intervals (ivs-a ivs-b) | ||
| 552 | "Union of the interval lists IVS-A and IVS-B." | ||
| 553 | (rx--complement-intervals | ||
| 554 | (rx--intersect-intervals | ||
| 555 | (rx--complement-intervals ivs-a) | ||
| 556 | (rx--complement-intervals ivs-b)))) | ||
| 557 | |||
| 558 | (defun rx--charset-intervals (charset) | ||
| 559 | "Return a sorted list of non-adjacent disjoint intervals from CHARSET. | ||
| 560 | CHARSET is any expression allowed in a character set expression: | ||
| 561 | either `any' (no classes permitted), or `not', `union' or `intersection' | ||
| 562 | forms whose arguments are charsets." | ||
| 563 | (pcase charset | ||
| 564 | (`(,(or 'any 'in 'char) . ,body) | ||
| 565 | (let ((parsed (rx--parse-any body))) | ||
| 566 | (when (cdr parsed) | ||
| 567 | (error | ||
| 568 | "Character class not permitted in set operations: %S" | ||
| 569 | (cadr parsed))) | ||
| 570 | (car parsed))) | ||
| 571 | (`(not ,x) (rx--complement-intervals (rx--charset-intervals x))) | ||
| 572 | (`(union . ,xs) (rx--charset-union xs)) | ||
| 573 | (`(intersection . ,xs) (rx--charset-intersection xs)) | ||
| 574 | (_ (let ((expanded (rx--expand-def charset))) | ||
| 575 | (if expanded | ||
| 576 | (rx--charset-intervals expanded) | ||
| 577 | (error "Bad character set: %S" charset)))))) | ||
| 578 | |||
| 579 | (defun rx--charset-union (charsets) | ||
| 580 | "Union of CHARSETS, as a set of intervals." | ||
| 581 | (rx--foldl #'rx--union-intervals nil | ||
| 582 | (mapcar #'rx--charset-intervals charsets))) | ||
| 583 | |||
| 584 | (defconst rx--charset-all (list (cons 0 (max-char)))) | ||
| 585 | |||
| 586 | (defun rx--charset-intersection (charsets) | ||
| 587 | "Intersection of CHARSETS, as a set of intervals." | ||
| 588 | (rx--foldl #'rx--intersect-intervals rx--charset-all | ||
| 589 | (mapcar #'rx--charset-intervals charsets))) | ||
| 590 | |||
| 591 | (defun rx--translate-union (negated body) | ||
| 592 | "Translate a (union ...) construct. Return (REGEXP . PRECEDENCE). | ||
| 593 | If NEGATED, negate the sense." | ||
| 594 | (rx--intervals-to-alt negated (rx--charset-union body))) | ||
| 595 | |||
| 596 | (defun rx--translate-intersection (negated body) | ||
| 597 | "Translate an (intersection ...) construct. Return (REGEXP . PRECEDENCE). | ||
| 598 | If NEGATED, negate the sense." | ||
| 599 | (rx--intervals-to-alt negated (rx--charset-intersection body))) | ||
| 600 | |||
| 487 | (defun rx--atomic-regexp (item) | 601 | (defun rx--atomic-regexp (item) |
| 488 | "ITEM is (REGEXP . PRECEDENCE); return a regexp of precedence t." | 602 | "ITEM is (REGEXP . PRECEDENCE); return a regexp of precedence t." |
| 489 | (if (eq (cdr item) t) | 603 | (if (eq (cdr item) t) |
| @@ -862,6 +976,8 @@ can expand to any number of values." | |||
| 862 | ((or 'any 'in 'char) (rx--translate-any nil body)) | 976 | ((or 'any 'in 'char) (rx--translate-any nil body)) |
| 863 | ('not-char (rx--translate-any t body)) | 977 | ('not-char (rx--translate-any t body)) |
| 864 | ('not (rx--translate-not nil body)) | 978 | ('not (rx--translate-not nil body)) |
| 979 | ('union (rx--translate-union nil body)) | ||
| 980 | ('intersection (rx--translate-intersection nil body)) | ||
| 865 | 981 | ||
| 866 | ('repeat (rx--translate-repeat body)) | 982 | ('repeat (rx--translate-repeat body)) |
| 867 | ('= (rx--translate-= body)) | 983 | ('= (rx--translate-= body)) |
| @@ -920,7 +1036,7 @@ can expand to any number of values." | |||
| 920 | (t (error "Unknown rx form `%s'" op))))))) | 1036 | (t (error "Unknown rx form `%s'" op))))))) |
| 921 | 1037 | ||
| 922 | (defconst rx--builtin-forms | 1038 | (defconst rx--builtin-forms |
| 923 | '(seq sequence : and or | any in char not-char not | 1039 | '(seq sequence : and or | any in char not-char not union intersection |
| 924 | repeat = >= ** | 1040 | repeat = >= ** |
| 925 | zero-or-more 0+ * | 1041 | zero-or-more 0+ * |
| 926 | one-or-more 1+ + | 1042 | one-or-more 1+ + |
| @@ -1033,8 +1149,11 @@ CHAR Match a literal character. | |||
| 1033 | character, a string, a range as string \"A-Z\" or cons | 1149 | character, a string, a range as string \"A-Z\" or cons |
| 1034 | (?A . ?Z), or a character class (see below). Alias: in, char. | 1150 | (?A . ?Z), or a character class (see below). Alias: in, char. |
| 1035 | (not CHARSPEC) Match one character not matched by CHARSPEC. CHARSPEC | 1151 | (not CHARSPEC) Match one character not matched by CHARSPEC. CHARSPEC |
| 1036 | can be (any ...), (syntax ...), (category ...), | 1152 | can be (any ...), (union ...), (intersection ...), |
| 1037 | or a character class. | 1153 | (syntax ...), (category ...), or a character class. |
| 1154 | (union CHARSET...) Union of CHARSETs. | ||
| 1155 | (intersection CHARSET...) Intersection of CHARSETs. | ||
| 1156 | CHARSET is (any...), (not...), (union...) or (intersection...). | ||
| 1038 | not-newline Match any character except a newline. Alias: nonl. | 1157 | not-newline Match any character except a newline. Alias: nonl. |
| 1039 | anychar Match any character. Alias: anything. | 1158 | anychar Match any character. Alias: anything. |
| 1040 | unmatchable Never match anything at all. | 1159 | unmatchable Never match anything at all. |
diff --git a/test/lisp/emacs-lisp/rx-tests.el b/test/lisp/emacs-lisp/rx-tests.el index 317dae2990b..0cd2c9590b7 100644 --- a/test/lisp/emacs-lisp/rx-tests.el +++ b/test/lisp/emacs-lisp/rx-tests.el | |||
| @@ -274,6 +274,63 @@ | |||
| 274 | (should (equal (rx (not (not ascii)) (not (not (not (any "a-z"))))) | 274 | (should (equal (rx (not (not ascii)) (not (not (not (any "a-z"))))) |
| 275 | "[[:ascii:]][^a-z]"))) | 275 | "[[:ascii:]][^a-z]"))) |
| 276 | 276 | ||
| 277 | (ert-deftest rx-union () | ||
| 278 | (should (equal (rx (union)) | ||
| 279 | "\\`a\\`")) | ||
| 280 | (should (equal (rx (union (any "ba"))) | ||
| 281 | "[ab]")) | ||
| 282 | (should (equal (rx (union (any "a-f") (any "c-k" ?y) (any ?r "x-z"))) | ||
| 283 | "[a-krx-z]")) | ||
| 284 | (should (equal (rx (union (not (any "a-m")) (not (any "f-p")))) | ||
| 285 | "[^f-m]")) | ||
| 286 | (should (equal (rx (union (any "e-m") (not (any "a-z")))) | ||
| 287 | "[^a-dn-z]")) | ||
| 288 | (should (equal (rx (union (not (any "g-r")) (not (any "t")))) | ||
| 289 | "[^z-a]")) | ||
| 290 | (should (equal (rx (not (union (not (any "g-r")) (not (any "t"))))) | ||
| 291 | "\\`a\\`")) | ||
| 292 | (should (equal (rx (union (union (any "a-f") (any "u-z")) | ||
| 293 | (any "g-r"))) | ||
| 294 | "[a-ru-z]")) | ||
| 295 | (should (equal (rx (union (intersection (any "c-z") (any "a-g")) | ||
| 296 | (not (any "a-k")))) | ||
| 297 | "[^abh-k]"))) | ||
| 298 | |||
| 299 | (ert-deftest rx-def-in-union () | ||
| 300 | (rx-let ((a (any "badc")) | ||
| 301 | (b (union a (any "def")))) | ||
| 302 | (should (equal(rx (union b (any "q"))) | ||
| 303 | "[a-fq]")))) | ||
| 304 | |||
| 305 | (ert-deftest rx-intersection () | ||
| 306 | (should (equal (rx (intersection)) | ||
| 307 | "[^z-a]")) | ||
| 308 | (should (equal (rx (intersection (any "ba"))) | ||
| 309 | "[ab]")) | ||
| 310 | (should (equal (rx (intersection (any "a-j" "u-z") (any "c-k" ?y) | ||
| 311 | (any "a-i" "x-z"))) | ||
| 312 | "[c-iy]")) | ||
| 313 | (should (equal (rx (intersection (not (any "a-m")) (not (any "f-p")))) | ||
| 314 | "[^a-p]")) | ||
| 315 | (should (equal (rx (intersection (any "a-z") (not (any "g-q")))) | ||
| 316 | "[a-fr-z]")) | ||
| 317 | (should (equal (rx (intersection (any "a-d") (any "e"))) | ||
| 318 | "\\`a\\`")) | ||
| 319 | (should (equal (rx (not (intersection (any "a-d") (any "e")))) | ||
| 320 | "[^z-a]")) | ||
| 321 | (should (equal (rx (intersection (any "d-u") | ||
| 322 | (intersection (any "e-z") (any "a-m")))) | ||
| 323 | "[e-m]")) | ||
| 324 | (should (equal (rx (intersection (union (any "a-f") (any "f-t")) | ||
| 325 | (any "e-w"))) | ||
| 326 | "[e-t]"))) | ||
| 327 | |||
| 328 | (ert-deftest rx-def-in-intersection () | ||
| 329 | (rx-let ((a (any "a-g")) | ||
| 330 | (b (intersection a (any "d-j")))) | ||
| 331 | (should (equal(rx (intersection b (any "e-k"))) | ||
| 332 | "[e-g]")))) | ||
| 333 | |||
| 277 | (ert-deftest rx-group () | 334 | (ert-deftest rx-group () |
| 278 | (should (equal (rx (group nonl) (submatch "x") | 335 | (should (equal (rx (group nonl) (submatch "x") |
| 279 | (group-n 3 "y") (submatch-n 13 "z") (backref 1)) | 336 | (group-n 3 "y") (submatch-n 13 "z") (backref 1)) |