aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMattias EngdegÄrd2019-12-13 13:10:58 +0100
committerMattias EngdegÄrd2019-12-13 13:30:14 +0100
commit82b4e48c590cf2c0448a751e641b0ee7a6a02438 (patch)
tree55da830604ce9ebe4a5aa626bec285fb688578a3
parentb04086adf649b18cf5309dd43aa638fc7b3cd4a0 (diff)
downloademacs-82b4e48c590cf2c0448a751e641b0ee7a6a02438.tar.gz
emacs-82b4e48c590cf2c0448a751e641b0ee7a6a02438.zip
Allow characters and single-char strings in rx charsets
The `not' and `intersection' forms, and `or' inside these forms, now accept characters and single-character strings as arguments. Previously, they had to be wrapped in `any' forms. This does not add expressive power but is a convenience and is easily understood. * doc/lispref/searching.texi (Rx Constructs): Amend the documentation. * etc/NEWS: Announce the change. * lisp/emacs-lisp/rx.el (rx--charset-p, rx--translate-not) (rx--charset-intervals, rx): Accept characters and 1-char strings in more places. * test/lisp/emacs-lisp/rx-tests.el (rx-not, rx-charset-or) (rx-def-in-charset-or, rx-intersection): Test the change.
-rw-r--r--doc/lispref/searching.texi11
-rw-r--r--etc/NEWS3
-rw-r--r--lisp/emacs-lisp/rx.el26
-rw-r--r--test/lisp/emacs-lisp/rx-tests.el20
4 files changed, 43 insertions, 17 deletions
diff --git a/doc/lispref/searching.texi b/doc/lispref/searching.texi
index 0c6c7cc68b5..700880c2289 100644
--- a/doc/lispref/searching.texi
+++ b/doc/lispref/searching.texi
@@ -1214,8 +1214,9 @@ Corresponding string regexp: @samp{[@dots{}]}
1214@item @code{(not @var{charspec})} 1214@item @code{(not @var{charspec})}
1215@cindex @code{not} in rx 1215@cindex @code{not} in rx
1216Match a character not included in @var{charspec}. @var{charspec} can 1216Match a character not included in @var{charspec}. @var{charspec} can
1217be an @code{any}, @code{not}, @code{or}, @code{intersection}, 1217be a character, a single-character string, an @code{any}, @code{not},
1218@code{syntax} or @code{category} form, or a character class. 1218@code{or}, @code{intersection}, @code{syntax} or @code{category} form,
1219or a character class.
1219If @var{charspec} is an @code{or} form, its arguments have the same 1220If @var{charspec} is an @code{or} form, its arguments have the same
1220restrictions as those of @code{intersection}; see below.@* 1221restrictions as those of @code{intersection}; see below.@*
1221Corresponding string regexp: @samp{[^@dots{}]}, @samp{\S@var{code}}, 1222Corresponding string regexp: @samp{[^@dots{}]}, @samp{\S@var{code}},
@@ -1224,9 +1225,9 @@ Corresponding string regexp: @samp{[^@dots{}]}, @samp{\S@var{code}},
1224@item @code{(intersection @var{charset}@dots{})} 1225@item @code{(intersection @var{charset}@dots{})}
1225@cindex @code{intersection} in rx 1226@cindex @code{intersection} in rx
1226Match a character included in all of the @var{charset}s. 1227Match a character included in all of the @var{charset}s.
1227Each @var{charset} can be an @code{any} form without character 1228Each @var{charset} can be a character, a single-character string, an
1228classes, or an @code{intersection}, @code{or} or @code{not} form whose 1229@code{any} form without character classes, or an @code{intersection},
1229arguments are also @var{charset}s. 1230@code{or} or @code{not} form whose arguments are also @var{charset}s.
1230 1231
1231@item @code{not-newline}, @code{nonl} 1232@item @code{not-newline}, @code{nonl}
1232@cindex @code{not-newline} in rx 1233@cindex @code{not-newline} in rx
diff --git a/etc/NEWS b/etc/NEWS
index 1e0422c761f..a7f3c3d2fe8 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -2124,6 +2124,9 @@ Both match any single character; 'anychar' is more descriptive.
2124With 'or' and 'not', it can be used to compose character-matching 2124With 'or' and 'not', it can be used to compose character-matching
2125expressions from simpler parts. 2125expressions from simpler parts.
2126 2126
2127+++
2128*** 'not' argument can now be a character or single-char string.
2129
2127** Frames 2130** Frames
2128 2131
2129+++ 2132+++
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el
index a5cab1db888..43f7a4e2752 100644
--- a/lisp/emacs-lisp/rx.el
+++ b/lisp/emacs-lisp/rx.el
@@ -309,6 +309,8 @@ and set operations."
309 (rx--every (lambda (x) (not (symbolp x))) (cdr form))) 309 (rx--every (lambda (x) (not (symbolp x))) (cdr form)))
310 (and (memq (car form) '(not or | intersection)) 310 (and (memq (car form) '(not or | intersection))
311 (rx--every #'rx--charset-p (cdr form))))) 311 (rx--every #'rx--charset-p (cdr form)))))
312 (characterp form)
313 (and (stringp form) (= (length form) 1))
312 (and (or (symbolp form) (consp form)) 314 (and (or (symbolp form) (consp form))
313 (let ((expanded (rx--expand-def form))) 315 (let ((expanded (rx--expand-def form)))
314 (and expanded 316 (and expanded
@@ -521,6 +523,11 @@ If NEGATED, negate the sense (thus making it positive)."
521 ((eq arg 'word-boundary) 523 ((eq arg 'word-boundary)
522 (rx--translate-symbol 524 (rx--translate-symbol
523 (if negated 'word-boundary 'not-word-boundary))) 525 (if negated 'word-boundary 'not-word-boundary)))
526 ((characterp arg)
527 (rx--generate-alt (not negated) (list (cons arg arg)) nil))
528 ((and (stringp arg) (= (length arg) 1))
529 (let ((char (string-to-char arg)))
530 (rx--generate-alt (not negated) (list (cons char char)) nil)))
524 ((let ((expanded (rx--expand-def arg))) 531 ((let ((expanded (rx--expand-def arg)))
525 (and expanded 532 (and expanded
526 (rx--translate-not negated (list expanded))))) 533 (rx--translate-not negated (list expanded)))))
@@ -571,8 +578,8 @@ If NEGATED, negate the sense (thus making it positive)."
571(defun rx--charset-intervals (charset) 578(defun rx--charset-intervals (charset)
572 "Return a sorted list of non-adjacent disjoint intervals from CHARSET. 579 "Return a sorted list of non-adjacent disjoint intervals from CHARSET.
573CHARSET is any expression allowed in a character set expression: 580CHARSET is any expression allowed in a character set expression:
574either `any' (no classes permitted), or `not', `or' or `intersection' 581characters, single-char strings, `any' forms (no classes permitted),
575forms whose arguments are charsets." 582or `not', `or' or `intersection' forms whose arguments are charsets."
576 (pcase charset 583 (pcase charset
577 (`(,(or 'any 'in 'char) . ,body) 584 (`(,(or 'any 'in 'char) . ,body)
578 (let ((parsed (rx--parse-any body))) 585 (let ((parsed (rx--parse-any body)))
@@ -584,6 +591,11 @@ forms whose arguments are charsets."
584 (`(not ,x) (rx--complement-intervals (rx--charset-intervals x))) 591 (`(not ,x) (rx--complement-intervals (rx--charset-intervals x)))
585 (`(,(or 'or '|) . ,body) (rx--charset-union body)) 592 (`(,(or 'or '|) . ,body) (rx--charset-union body))
586 (`(intersection . ,body) (rx--charset-intersection body)) 593 (`(intersection . ,body) (rx--charset-intersection body))
594 ((pred characterp)
595 (list (cons charset charset)))
596 ((guard (and (stringp charset) (= (length charset) 1)))
597 (let ((char (string-to-char charset)))
598 (list (cons char char))))
587 (_ (let ((expanded (rx--expand-def charset))) 599 (_ (let ((expanded (rx--expand-def charset)))
588 (if expanded 600 (if expanded
589 (rx--charset-intervals expanded) 601 (rx--charset-intervals expanded)
@@ -1161,10 +1173,12 @@ CHAR Match a literal character.
1161 character, a string, a range as string \"A-Z\" or cons 1173 character, a string, a range as string \"A-Z\" or cons
1162 (?A . ?Z), or a character class (see below). Alias: in, char. 1174 (?A . ?Z), or a character class (see below). Alias: in, char.
1163(not CHARSPEC) Match one character not matched by CHARSPEC. CHARSPEC 1175(not CHARSPEC) Match one character not matched by CHARSPEC. CHARSPEC
1164 can be (any ...), (or ...), (intersection ...), 1176 can be a character, single-char string, (any ...), (or ...),
1165 (syntax ...), (category ...), or a character class. 1177 (intersection ...), (syntax ...), (category ...),
1166(intersection CHARSET...) Intersection of CHARSETs. 1178 or a character class.
1167 CHARSET is (any...), (not...), (or...) or (intersection...). 1179(intersection CHARSET...) Match all CHARSETs.
1180 CHARSET is (any...), (not...), (or...) or (intersection...),
1181 a character or a single-char string.
1168not-newline Match any character except a newline. Alias: nonl. 1182not-newline Match any character except a newline. Alias: nonl.
1169anychar Match any character. Alias: anything. 1183anychar Match any character. Alias: anything.
1170unmatchable Never match anything at all. 1184unmatchable Never match anything at all.
diff --git a/test/lisp/emacs-lisp/rx-tests.el b/test/lisp/emacs-lisp/rx-tests.el
index 344f46764c8..a82f1f83645 100644
--- a/test/lisp/emacs-lisp/rx-tests.el
+++ b/test/lisp/emacs-lisp/rx-tests.el
@@ -272,7 +272,9 @@
272 (should (equal (rx (not (category tone-mark)) (not (category lao))) 272 (should (equal (rx (not (category tone-mark)) (not (category lao)))
273 "\\C4\\Co")) 273 "\\C4\\Co"))
274 (should (equal (rx (not (not ascii)) (not (not (not (any "a-z"))))) 274 (should (equal (rx (not (not ascii)) (not (not (not (any "a-z")))))
275 "[[:ascii:]][^a-z]"))) 275 "[[:ascii:]][^a-z]"))
276 (should (equal (rx (not ?a) (not "b") (not (not "c")) (not (not ?d)))
277 "[^a][^b]cd")))
276 278
277(ert-deftest rx-charset-or () 279(ert-deftest rx-charset-or ()
278 (should (equal (rx (or)) 280 (should (equal (rx (or))
@@ -294,13 +296,17 @@
294 "[a-ru-z]")) 296 "[a-ru-z]"))
295 (should (equal (rx (or (intersection (any "c-z") (any "a-g")) 297 (should (equal (rx (or (intersection (any "c-z") (any "a-g"))
296 (not (any "a-k")))) 298 (not (any "a-k"))))
297 "[^abh-k]"))) 299 "[^abh-k]"))
300 (should (equal (rx (or ?f (any "b-e") "a") (not (or ?x "y" (any "s-w"))))
301 "[a-f][^s-y]")))
298 302
299(ert-deftest rx-def-in-charset-or () 303(ert-deftest rx-def-in-charset-or ()
300 (rx-let ((a (any "badc")) 304 (rx-let ((a (any "badc"))
301 (b (| a (any "def")))) 305 (b (| a (any "def")))
302 (should (equal (rx (or b (any "q"))) 306 (c ?a)
303 "[a-fq]"))) 307 (d "b"))
308 (should (equal (rx (or b (any "q")) (or c d))
309 "[a-fq][ab]")))
304 (rx-let ((diff-| (a b) (not (or (not a) b)))) 310 (rx-let ((diff-| (a b) (not (or (not a) b))))
305 (should (equal (rx (diff-| (any "a-z") (any "gr"))) 311 (should (equal (rx (diff-| (any "a-z") (any "gr")))
306 "[a-fh-qs-z]")))) 312 "[a-fh-qs-z]"))))
@@ -326,7 +332,9 @@
326 "[e-m]")) 332 "[e-m]"))
327 (should (equal (rx (intersection (or (any "a-f") (any "f-t")) 333 (should (equal (rx (intersection (or (any "a-f") (any "f-t"))
328 (any "e-w"))) 334 (any "e-w")))
329 "[e-t]"))) 335 "[e-t]"))
336 (should (equal (rx (intersection ?m (any "a-z") "m"))
337 "m")))
330 338
331(ert-deftest rx-def-in-intersection () 339(ert-deftest rx-def-in-intersection ()
332 (rx-let ((a (any "a-g")) 340 (rx-let ((a (any "a-g"))