aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Eggert2015-04-15 00:26:32 -0700
committerPaul Eggert2015-04-15 00:27:18 -0700
commita122a0276bddbda8ca84f9b94250a5a5f4e0582a (patch)
tree4d0368943f2d0c53504e1e5e4727adaafd602d7e
parent45d75c0b758cf152698e83e180dfc8eed5d355ba (diff)
downloademacs-a122a0276bddbda8ca84f9b94250a5a5f4e0582a.tar.gz
emacs-a122a0276bddbda8ca84f9b94250a5a5f4e0582a.zip
Make [:graph:] act like [:print:] sans space
In POSIX [[:print:]] is equivalent to [ [:graph:]], so change [:graph:] so that it matches everything that [:print:] does, except for space. * doc/lispref/searching.texi (Char Classes): * etc/NEWS: * lisp/emacs-lisp/rx.el (rx): Document [:graph:] to be [:print:] sans ' '. * src/character.c, src/character.h (graphicp): New function. * src/regex.c (ISGRAPH) [emacs]: Use it. (BIT_GRAPH): New macro. (BIT_PRINT): Increase to 0x200, to make room for BIT_GRAPH. (re_wctype_to_bit) [! WIDE_CHAR_SUPPORT]: Return BIT_GRAPH for RECC_GRAPH. (re_match_2_internal) [emacs]: Use ISGRAPH if BIT_GRAPH, and ISPRINT if BIT_PRINT.
-rw-r--r--doc/lispref/searching.texi14
-rw-r--r--etc/NEWS10
-rw-r--r--lisp/emacs-lisp/rx.el8
-rw-r--r--src/character.c8
-rw-r--r--src/character.h1
-rw-r--r--src/regex.c12
6 files changed, 33 insertions, 20 deletions
diff --git a/doc/lispref/searching.texi b/doc/lispref/searching.texi
index 238d814a9dc..10ea411d436 100644
--- a/doc/lispref/searching.texi
+++ b/doc/lispref/searching.texi
@@ -558,8 +558,11 @@ This matches any @acronym{ASCII} control character.
558This matches @samp{0} through @samp{9}. Thus, @samp{[-+[:digit:]]} 558This matches @samp{0} through @samp{9}. Thus, @samp{[-+[:digit:]]}
559matches any digit, as well as @samp{+} and @samp{-}. 559matches any digit, as well as @samp{+} and @samp{-}.
560@item [:graph:] 560@item [:graph:]
561This matches graphic characters---everything except @acronym{ASCII} control 561This matches graphic characters---everything except space,
562characters, space, and the delete character. 562@acronym{ASCII} and non-@acronym{ASCII} control characters,
563surrogates, and codepoints unassigned by Unicode, as indicated by the
564Unicode @samp{general-category} property (@pxref{Character
565Properties}).
563@item [:lower:] 566@item [:lower:]
564This matches any lower-case letter, as determined by the current case 567This matches any lower-case letter, as determined by the current case
565table (@pxref{Case Tables}). If @code{case-fold-search} is 568table (@pxref{Case Tables}). If @code{case-fold-search} is
@@ -569,11 +572,8 @@ This matches any multibyte character (@pxref{Text Representations}).
569@item [:nonascii:] 572@item [:nonascii:]
570This matches any non-@acronym{ASCII} character. 573This matches any non-@acronym{ASCII} character.
571@item [:print:] 574@item [:print:]
572This matches printing characters---everything except @acronym{ASCII} 575This matches any printing character---either space, or a graphic
573and non-@acronym{ASCII} control characters (including the delete 576character matched by @samp{[:graph:]}.
574character), surrogates, and codepoints unassigned by Unicode, as
575indicated by the Unicode @samp{general-category} property
576(@pxref{Character Properties}).
577@item [:punct:] 577@item [:punct:]
578This matches any punctuation character. (At present, for multibyte 578This matches any punctuation character. (At present, for multibyte
579characters, it matches anything that has non-word syntax.) 579characters, it matches anything that has non-word syntax.)
diff --git a/etc/NEWS b/etc/NEWS
index 907787a1f3e..d97e80a7171 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -629,12 +629,12 @@ notifications, if Emacs is compiled with file notification support.
629*** gulp.el 629*** gulp.el
630 630
631+++ 631+++
632** The character class [:print:] in regular expressions 632** The character classes [:graph:] and [:print:] in regular expressions
633no longer matches any multibyte character. Instead, Emacs now 633no longer match every multibyte character. Instead, Emacs now
634consults the Unicode character properties to determine which 634consults the Unicode character properties to determine which
635characters are printable. In particular, surrogates and unassigned 635characters are graphic or printable. In particular, surrogates and
636codepoints are now rejected by this class. If you want the old 636unassigned codepoints are now rejected. If you want the old behavior,
637behavior, use [:multibyte:] instead. 637use [:multibyte:] instead.
638 638
639 639
640* New Modes and Packages in Emacs 25.1 640* New Modes and Packages in Emacs 25.1
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el
index a5a228e5876..ab9beb60928 100644
--- a/lisp/emacs-lisp/rx.el
+++ b/lisp/emacs-lisp/rx.el
@@ -965,12 +965,12 @@ CHAR
965 matches space and tab only. 965 matches space and tab only.
966 966
967`graphic', `graph' 967`graphic', `graph'
968 matches graphic characters--everything except ASCII control chars, 968 matches graphic characters--everything except space, ASCII
969 space, and DEL. 969 and non-ASCII control characters, surrogates, and codepoints
970 unassigned by Unicode.
970 971
971`printing', `print' 972`printing', `print'
972 matches printing characters--everything except ASCII and non-ASCII 973 matches space and graphic characters.
973 control characters, surrogates, and codepoints unassigned by Unicode.
974 974
975`alphanumeric', `alnum' 975`alphanumeric', `alnum'
976 matches alphabetic characters and digits. (For multibyte characters, 976 matches alphabetic characters and digits. (For multibyte characters,
diff --git a/src/character.c b/src/character.c
index b357dd5a334..ea98cf68e6c 100644
--- a/src/character.c
+++ b/src/character.c
@@ -1022,6 +1022,14 @@ decimalnump (int c)
1022 return gen_cat == UNICODE_CATEGORY_Nd; 1022 return gen_cat == UNICODE_CATEGORY_Nd;
1023} 1023}
1024 1024
1025/* Return 'true' if C is a graphic character as defined by its
1026 Unicode properties. */
1027bool
1028graphicp (int c)
1029{
1030 return c == ' ' || printablep (c);
1031}
1032
1025/* Return 'true' if C is a printable character as defined by its 1033/* Return 'true' if C is a printable character as defined by its
1026 Unicode properties. */ 1034 Unicode properties. */
1027bool 1035bool
diff --git a/src/character.h b/src/character.h
index 1a5d2c8a670..859d717a0ba 100644
--- a/src/character.h
+++ b/src/character.h
@@ -662,6 +662,7 @@ extern Lisp_Object string_escape_byte8 (Lisp_Object);
662 662
663extern bool alphabeticp (int); 663extern bool alphabeticp (int);
664extern bool decimalnump (int); 664extern bool decimalnump (int);
665extern bool graphicp (int);
665extern bool printablep (int); 666extern bool printablep (int);
666 667
667/* Return a translation table of id number ID. */ 668/* Return a translation table of id number ID. */
diff --git a/src/regex.c b/src/regex.c
index b9d09d02c22..4af70c62cf5 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -314,7 +314,7 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
314 314
315# define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \ 315# define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \
316 ? (c) > ' ' && !((c) >= 0177 && (c) <= 0237) \ 316 ? (c) > ' ' && !((c) >= 0177 && (c) <= 0237) \
317 : 1) 317 : graphicp (c))
318 318
319# define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \ 319# define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \
320 ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \ 320 ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \
@@ -1875,7 +1875,8 @@ struct range_table_work_area
1875#define BIT_MULTIBYTE 0x20 1875#define BIT_MULTIBYTE 0x20
1876#define BIT_ALPHA 0x40 1876#define BIT_ALPHA 0x40
1877#define BIT_ALNUM 0x80 1877#define BIT_ALNUM 0x80
1878#define BIT_PRINT 0x100 1878#define BIT_GRAPH 0x100
1879#define BIT_PRINT 0x200
1879 1880
1880 1881
1881/* Set the bit for character C in a list. */ 1882/* Set the bit for character C in a list. */
@@ -2074,7 +2075,7 @@ re_wctype_to_bit (re_wctype_t cc)
2074{ 2075{
2075 switch (cc) 2076 switch (cc)
2076 { 2077 {
2077 case RECC_NONASCII: case RECC_GRAPH: 2078 case RECC_NONASCII:
2078 case RECC_MULTIBYTE: return BIT_MULTIBYTE; 2079 case RECC_MULTIBYTE: return BIT_MULTIBYTE;
2079 case RECC_ALPHA: return BIT_ALPHA; 2080 case RECC_ALPHA: return BIT_ALPHA;
2080 case RECC_ALNUM: return BIT_ALNUM; 2081 case RECC_ALNUM: return BIT_ALNUM;
@@ -2083,6 +2084,7 @@ re_wctype_to_bit (re_wctype_t cc)
2083 case RECC_UPPER: return BIT_UPPER; 2084 case RECC_UPPER: return BIT_UPPER;
2084 case RECC_PUNCT: return BIT_PUNCT; 2085 case RECC_PUNCT: return BIT_PUNCT;
2085 case RECC_SPACE: return BIT_SPACE; 2086 case RECC_SPACE: return BIT_SPACE;
2087 case RECC_GRAPH: return BIT_GRAPH;
2086 case RECC_PRINT: return BIT_PRINT; 2088 case RECC_PRINT: return BIT_PRINT;
2087 case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: 2089 case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
2088 case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; 2090 case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
@@ -5522,7 +5524,9 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const_re_char *string1,
5522 | (class_bits & BIT_UPPER && ISUPPER (c)) 5524 | (class_bits & BIT_UPPER && ISUPPER (c))
5523 | (class_bits & BIT_WORD && ISWORD (c)) 5525 | (class_bits & BIT_WORD && ISWORD (c))
5524 | (class_bits & BIT_ALPHA && ISALPHA (c)) 5526 | (class_bits & BIT_ALPHA && ISALPHA (c))
5525 | (class_bits & BIT_ALNUM && ISALNUM (c))) 5527 | (class_bits & BIT_ALNUM && ISALNUM (c))
5528 | (class_bits & BIT_GRAPH && ISGRAPH (c))
5529 | (class_bits & BIT_PRINT && ISPRINT (c)))
5526 not = !not; 5530 not = !not;
5527 else 5531 else
5528 CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count); 5532 CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count);