diff options
| author | Mattias EngdegÄrd | 2019-12-13 13:10:58 +0100 |
|---|---|---|
| committer | Mattias EngdegÄrd | 2019-12-13 13:30:14 +0100 |
| commit | 82b4e48c590cf2c0448a751e641b0ee7a6a02438 (patch) | |
| tree | 55da830604ce9ebe4a5aa626bec285fb688578a3 | |
| parent | b04086adf649b18cf5309dd43aa638fc7b3cd4a0 (diff) | |
| download | emacs-82b4e48c590cf2c0448a751e641b0ee7a6a02438.tar.gz emacs-82b4e48c590cf2c0448a751e641b0ee7a6a02438.zip | |
Allow characters and single-char strings in rx charsets
The `not' and `intersection' forms, and `or' inside these forms,
now accept characters and single-character strings as arguments.
Previously, they had to be wrapped in `any' forms.
This does not add expressive power but is a convenience and is easily
understood.
* doc/lispref/searching.texi (Rx Constructs): Amend the documentation.
* etc/NEWS: Announce the change.
* lisp/emacs-lisp/rx.el (rx--charset-p, rx--translate-not)
(rx--charset-intervals, rx): Accept characters and 1-char strings in
more places.
* test/lisp/emacs-lisp/rx-tests.el (rx-not, rx-charset-or)
(rx-def-in-charset-or, rx-intersection): Test the change.
| -rw-r--r-- | doc/lispref/searching.texi | 11 | ||||
| -rw-r--r-- | etc/NEWS | 3 | ||||
| -rw-r--r-- | lisp/emacs-lisp/rx.el | 26 | ||||
| -rw-r--r-- | test/lisp/emacs-lisp/rx-tests.el | 20 |
4 files changed, 43 insertions, 17 deletions
diff --git a/doc/lispref/searching.texi b/doc/lispref/searching.texi index 0c6c7cc68b5..700880c2289 100644 --- a/doc/lispref/searching.texi +++ b/doc/lispref/searching.texi | |||
| @@ -1214,8 +1214,9 @@ Corresponding string regexp: @samp{[@dots{}]} | |||
| 1214 | @item @code{(not @var{charspec})} | 1214 | @item @code{(not @var{charspec})} |
| 1215 | @cindex @code{not} in rx | 1215 | @cindex @code{not} in rx |
| 1216 | Match a character not included in @var{charspec}. @var{charspec} can | 1216 | Match a character not included in @var{charspec}. @var{charspec} can |
| 1217 | be an @code{any}, @code{not}, @code{or}, @code{intersection}, | 1217 | be a character, a single-character string, an @code{any}, @code{not}, |
| 1218 | @code{syntax} or @code{category} form, or a character class. | 1218 | @code{or}, @code{intersection}, @code{syntax} or @code{category} form, |
| 1219 | or a character class. | ||
| 1219 | If @var{charspec} is an @code{or} form, its arguments have the same | 1220 | If @var{charspec} is an @code{or} form, its arguments have the same |
| 1220 | restrictions as those of @code{intersection}; see below.@* | 1221 | restrictions as those of @code{intersection}; see below.@* |
| 1221 | Corresponding string regexp: @samp{[^@dots{}]}, @samp{\S@var{code}}, | 1222 | Corresponding string regexp: @samp{[^@dots{}]}, @samp{\S@var{code}}, |
| @@ -1224,9 +1225,9 @@ Corresponding string regexp: @samp{[^@dots{}]}, @samp{\S@var{code}}, | |||
| 1224 | @item @code{(intersection @var{charset}@dots{})} | 1225 | @item @code{(intersection @var{charset}@dots{})} |
| 1225 | @cindex @code{intersection} in rx | 1226 | @cindex @code{intersection} in rx |
| 1226 | Match a character included in all of the @var{charset}s. | 1227 | Match a character included in all of the @var{charset}s. |
| 1227 | Each @var{charset} can be an @code{any} form without character | 1228 | Each @var{charset} can be a character, a single-character string, an |
| 1228 | classes, or an @code{intersection}, @code{or} or @code{not} form whose | 1229 | @code{any} form without character classes, or an @code{intersection}, |
| 1229 | arguments are also @var{charset}s. | 1230 | @code{or} or @code{not} form whose arguments are also @var{charset}s. |
| 1230 | 1231 | ||
| 1231 | @item @code{not-newline}, @code{nonl} | 1232 | @item @code{not-newline}, @code{nonl} |
| 1232 | @cindex @code{not-newline} in rx | 1233 | @cindex @code{not-newline} in rx |
| @@ -2124,6 +2124,9 @@ Both match any single character; 'anychar' is more descriptive. | |||
| 2124 | With 'or' and 'not', it can be used to compose character-matching | 2124 | With 'or' and 'not', it can be used to compose character-matching |
| 2125 | expressions from simpler parts. | 2125 | expressions from simpler parts. |
| 2126 | 2126 | ||
| 2127 | +++ | ||
| 2128 | *** 'not' argument can now be a character or single-char string. | ||
| 2129 | |||
| 2127 | ** Frames | 2130 | ** Frames |
| 2128 | 2131 | ||
| 2129 | +++ | 2132 | +++ |
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el index a5cab1db888..43f7a4e2752 100644 --- a/lisp/emacs-lisp/rx.el +++ b/lisp/emacs-lisp/rx.el | |||
| @@ -309,6 +309,8 @@ and set operations." | |||
| 309 | (rx--every (lambda (x) (not (symbolp x))) (cdr form))) | 309 | (rx--every (lambda (x) (not (symbolp x))) (cdr form))) |
| 310 | (and (memq (car form) '(not or | intersection)) | 310 | (and (memq (car form) '(not or | intersection)) |
| 311 | (rx--every #'rx--charset-p (cdr form))))) | 311 | (rx--every #'rx--charset-p (cdr form))))) |
| 312 | (characterp form) | ||
| 313 | (and (stringp form) (= (length form) 1)) | ||
| 312 | (and (or (symbolp form) (consp form)) | 314 | (and (or (symbolp form) (consp form)) |
| 313 | (let ((expanded (rx--expand-def form))) | 315 | (let ((expanded (rx--expand-def form))) |
| 314 | (and expanded | 316 | (and expanded |
| @@ -521,6 +523,11 @@ If NEGATED, negate the sense (thus making it positive)." | |||
| 521 | ((eq arg 'word-boundary) | 523 | ((eq arg 'word-boundary) |
| 522 | (rx--translate-symbol | 524 | (rx--translate-symbol |
| 523 | (if negated 'word-boundary 'not-word-boundary))) | 525 | (if negated 'word-boundary 'not-word-boundary))) |
| 526 | ((characterp arg) | ||
| 527 | (rx--generate-alt (not negated) (list (cons arg arg)) nil)) | ||
| 528 | ((and (stringp arg) (= (length arg) 1)) | ||
| 529 | (let ((char (string-to-char arg))) | ||
| 530 | (rx--generate-alt (not negated) (list (cons char char)) nil))) | ||
| 524 | ((let ((expanded (rx--expand-def arg))) | 531 | ((let ((expanded (rx--expand-def arg))) |
| 525 | (and expanded | 532 | (and expanded |
| 526 | (rx--translate-not negated (list expanded))))) | 533 | (rx--translate-not negated (list expanded))))) |
| @@ -571,8 +578,8 @@ If NEGATED, negate the sense (thus making it positive)." | |||
| 571 | (defun rx--charset-intervals (charset) | 578 | (defun rx--charset-intervals (charset) |
| 572 | "Return a sorted list of non-adjacent disjoint intervals from CHARSET. | 579 | "Return a sorted list of non-adjacent disjoint intervals from CHARSET. |
| 573 | CHARSET is any expression allowed in a character set expression: | 580 | CHARSET is any expression allowed in a character set expression: |
| 574 | either `any' (no classes permitted), or `not', `or' or `intersection' | 581 | characters, single-char strings, `any' forms (no classes permitted), |
| 575 | forms whose arguments are charsets." | 582 | or `not', `or' or `intersection' forms whose arguments are charsets." |
| 576 | (pcase charset | 583 | (pcase charset |
| 577 | (`(,(or 'any 'in 'char) . ,body) | 584 | (`(,(or 'any 'in 'char) . ,body) |
| 578 | (let ((parsed (rx--parse-any body))) | 585 | (let ((parsed (rx--parse-any body))) |
| @@ -584,6 +591,11 @@ forms whose arguments are charsets." | |||
| 584 | (`(not ,x) (rx--complement-intervals (rx--charset-intervals x))) | 591 | (`(not ,x) (rx--complement-intervals (rx--charset-intervals x))) |
| 585 | (`(,(or 'or '|) . ,body) (rx--charset-union body)) | 592 | (`(,(or 'or '|) . ,body) (rx--charset-union body)) |
| 586 | (`(intersection . ,body) (rx--charset-intersection body)) | 593 | (`(intersection . ,body) (rx--charset-intersection body)) |
| 594 | ((pred characterp) | ||
| 595 | (list (cons charset charset))) | ||
| 596 | ((guard (and (stringp charset) (= (length charset) 1))) | ||
| 597 | (let ((char (string-to-char charset))) | ||
| 598 | (list (cons char char)))) | ||
| 587 | (_ (let ((expanded (rx--expand-def charset))) | 599 | (_ (let ((expanded (rx--expand-def charset))) |
| 588 | (if expanded | 600 | (if expanded |
| 589 | (rx--charset-intervals expanded) | 601 | (rx--charset-intervals expanded) |
| @@ -1161,10 +1173,12 @@ CHAR Match a literal character. | |||
| 1161 | character, a string, a range as string \"A-Z\" or cons | 1173 | character, a string, a range as string \"A-Z\" or cons |
| 1162 | (?A . ?Z), or a character class (see below). Alias: in, char. | 1174 | (?A . ?Z), or a character class (see below). Alias: in, char. |
| 1163 | (not CHARSPEC) Match one character not matched by CHARSPEC. CHARSPEC | 1175 | (not CHARSPEC) Match one character not matched by CHARSPEC. CHARSPEC |
| 1164 | can be (any ...), (or ...), (intersection ...), | 1176 | can be a character, single-char string, (any ...), (or ...), |
| 1165 | (syntax ...), (category ...), or a character class. | 1177 | (intersection ...), (syntax ...), (category ...), |
| 1166 | (intersection CHARSET...) Intersection of CHARSETs. | 1178 | or a character class. |
| 1167 | CHARSET is (any...), (not...), (or...) or (intersection...). | 1179 | (intersection CHARSET...) Match all CHARSETs. |
| 1180 | CHARSET is (any...), (not...), (or...) or (intersection...), | ||
| 1181 | a character or a single-char string. | ||
| 1168 | not-newline Match any character except a newline. Alias: nonl. | 1182 | not-newline Match any character except a newline. Alias: nonl. |
| 1169 | anychar Match any character. Alias: anything. | 1183 | anychar Match any character. Alias: anything. |
| 1170 | unmatchable Never match anything at all. | 1184 | unmatchable Never match anything at all. |
diff --git a/test/lisp/emacs-lisp/rx-tests.el b/test/lisp/emacs-lisp/rx-tests.el index 344f46764c8..a82f1f83645 100644 --- a/test/lisp/emacs-lisp/rx-tests.el +++ b/test/lisp/emacs-lisp/rx-tests.el | |||
| @@ -272,7 +272,9 @@ | |||
| 272 | (should (equal (rx (not (category tone-mark)) (not (category lao))) | 272 | (should (equal (rx (not (category tone-mark)) (not (category lao))) |
| 273 | "\\C4\\Co")) | 273 | "\\C4\\Co")) |
| 274 | (should (equal (rx (not (not ascii)) (not (not (not (any "a-z"))))) | 274 | (should (equal (rx (not (not ascii)) (not (not (not (any "a-z"))))) |
| 275 | "[[:ascii:]][^a-z]"))) | 275 | "[[:ascii:]][^a-z]")) |
| 276 | (should (equal (rx (not ?a) (not "b") (not (not "c")) (not (not ?d))) | ||
| 277 | "[^a][^b]cd"))) | ||
| 276 | 278 | ||
| 277 | (ert-deftest rx-charset-or () | 279 | (ert-deftest rx-charset-or () |
| 278 | (should (equal (rx (or)) | 280 | (should (equal (rx (or)) |
| @@ -294,13 +296,17 @@ | |||
| 294 | "[a-ru-z]")) | 296 | "[a-ru-z]")) |
| 295 | (should (equal (rx (or (intersection (any "c-z") (any "a-g")) | 297 | (should (equal (rx (or (intersection (any "c-z") (any "a-g")) |
| 296 | (not (any "a-k")))) | 298 | (not (any "a-k")))) |
| 297 | "[^abh-k]"))) | 299 | "[^abh-k]")) |
| 300 | (should (equal (rx (or ?f (any "b-e") "a") (not (or ?x "y" (any "s-w")))) | ||
| 301 | "[a-f][^s-y]"))) | ||
| 298 | 302 | ||
| 299 | (ert-deftest rx-def-in-charset-or () | 303 | (ert-deftest rx-def-in-charset-or () |
| 300 | (rx-let ((a (any "badc")) | 304 | (rx-let ((a (any "badc")) |
| 301 | (b (| a (any "def")))) | 305 | (b (| a (any "def"))) |
| 302 | (should (equal (rx (or b (any "q"))) | 306 | (c ?a) |
| 303 | "[a-fq]"))) | 307 | (d "b")) |
| 308 | (should (equal (rx (or b (any "q")) (or c d)) | ||
| 309 | "[a-fq][ab]"))) | ||
| 304 | (rx-let ((diff-| (a b) (not (or (not a) b)))) | 310 | (rx-let ((diff-| (a b) (not (or (not a) b)))) |
| 305 | (should (equal (rx (diff-| (any "a-z") (any "gr"))) | 311 | (should (equal (rx (diff-| (any "a-z") (any "gr"))) |
| 306 | "[a-fh-qs-z]")))) | 312 | "[a-fh-qs-z]")))) |
| @@ -326,7 +332,9 @@ | |||
| 326 | "[e-m]")) | 332 | "[e-m]")) |
| 327 | (should (equal (rx (intersection (or (any "a-f") (any "f-t")) | 333 | (should (equal (rx (intersection (or (any "a-f") (any "f-t")) |
| 328 | (any "e-w"))) | 334 | (any "e-w"))) |
| 329 | "[e-t]"))) | 335 | "[e-t]")) |
| 336 | (should (equal (rx (intersection ?m (any "a-z") "m")) | ||
| 337 | "m"))) | ||
| 330 | 338 | ||
| 331 | (ert-deftest rx-def-in-intersection () | 339 | (ert-deftest rx-def-in-intersection () |
| 332 | (rx-let ((a (any "a-g")) | 340 | (rx-let ((a (any "a-g")) |