diff options
| author | Paul Eggert | 2015-04-15 00:26:32 -0700 |
|---|---|---|
| committer | Paul Eggert | 2015-04-15 00:27:18 -0700 |
| commit | a122a0276bddbda8ca84f9b94250a5a5f4e0582a (patch) | |
| tree | 4d0368943f2d0c53504e1e5e4727adaafd602d7e | |
| parent | 45d75c0b758cf152698e83e180dfc8eed5d355ba (diff) | |
| download | emacs-a122a0276bddbda8ca84f9b94250a5a5f4e0582a.tar.gz emacs-a122a0276bddbda8ca84f9b94250a5a5f4e0582a.zip | |
Make [:graph:] act like [:print:] sans space
In POSIX [[:print:]] is equivalent to [ [:graph:]], so change
[:graph:] so that it matches everything that [:print:] does,
except for space.
* doc/lispref/searching.texi (Char Classes):
* etc/NEWS:
* lisp/emacs-lisp/rx.el (rx):
Document [:graph:] to be [:print:] sans ' '.
* src/character.c, src/character.h (graphicp): New function.
* src/regex.c (ISGRAPH) [emacs]: Use it.
(BIT_GRAPH): New macro.
(BIT_PRINT): Increase to 0x200, to make room for BIT_GRAPH.
(re_wctype_to_bit) [! WIDE_CHAR_SUPPORT]:
Return BIT_GRAPH for RECC_GRAPH.
(re_match_2_internal) [emacs]: Use ISGRAPH if BIT_GRAPH,
and ISPRINT if BIT_PRINT.
| -rw-r--r-- | doc/lispref/searching.texi | 14 | ||||
| -rw-r--r-- | etc/NEWS | 10 | ||||
| -rw-r--r-- | lisp/emacs-lisp/rx.el | 8 | ||||
| -rw-r--r-- | src/character.c | 8 | ||||
| -rw-r--r-- | src/character.h | 1 | ||||
| -rw-r--r-- | src/regex.c | 12 |
6 files changed, 33 insertions, 20 deletions
diff --git a/doc/lispref/searching.texi b/doc/lispref/searching.texi index 238d814a9dc..10ea411d436 100644 --- a/doc/lispref/searching.texi +++ b/doc/lispref/searching.texi | |||
| @@ -558,8 +558,11 @@ This matches any @acronym{ASCII} control character. | |||
| 558 | This matches @samp{0} through @samp{9}. Thus, @samp{[-+[:digit:]]} | 558 | This matches @samp{0} through @samp{9}. Thus, @samp{[-+[:digit:]]} |
| 559 | matches any digit, as well as @samp{+} and @samp{-}. | 559 | matches any digit, as well as @samp{+} and @samp{-}. |
| 560 | @item [:graph:] | 560 | @item [:graph:] |
| 561 | This matches graphic characters---everything except @acronym{ASCII} control | 561 | This matches graphic characters---everything except space, |
| 562 | characters, space, and the delete character. | 562 | @acronym{ASCII} and non-@acronym{ASCII} control characters, |
| 563 | surrogates, and codepoints unassigned by Unicode, as indicated by the | ||
| 564 | Unicode @samp{general-category} property (@pxref{Character | ||
| 565 | Properties}). | ||
| 563 | @item [:lower:] | 566 | @item [:lower:] |
| 564 | This matches any lower-case letter, as determined by the current case | 567 | This matches any lower-case letter, as determined by the current case |
| 565 | table (@pxref{Case Tables}). If @code{case-fold-search} is | 568 | table (@pxref{Case Tables}). If @code{case-fold-search} is |
| @@ -569,11 +572,8 @@ This matches any multibyte character (@pxref{Text Representations}). | |||
| 569 | @item [:nonascii:] | 572 | @item [:nonascii:] |
| 570 | This matches any non-@acronym{ASCII} character. | 573 | This matches any non-@acronym{ASCII} character. |
| 571 | @item [:print:] | 574 | @item [:print:] |
| 572 | This matches printing characters---everything except @acronym{ASCII} | 575 | This matches any printing character---either space, or a graphic |
| 573 | and non-@acronym{ASCII} control characters (including the delete | 576 | character matched by @samp{[:graph:]}. |
| 574 | character), surrogates, and codepoints unassigned by Unicode, as | ||
| 575 | indicated by the Unicode @samp{general-category} property | ||
| 576 | (@pxref{Character Properties}). | ||
| 577 | @item [:punct:] | 577 | @item [:punct:] |
| 578 | This matches any punctuation character. (At present, for multibyte | 578 | This matches any punctuation character. (At present, for multibyte |
| 579 | characters, it matches anything that has non-word syntax.) | 579 | characters, it matches anything that has non-word syntax.) |
| @@ -629,12 +629,12 @@ notifications, if Emacs is compiled with file notification support. | |||
| 629 | *** gulp.el | 629 | *** gulp.el |
| 630 | 630 | ||
| 631 | +++ | 631 | +++ |
| 632 | ** The character class [:print:] in regular expressions | 632 | ** The character classes [:graph:] and [:print:] in regular expressions |
| 633 | no longer matches any multibyte character. Instead, Emacs now | 633 | no longer match every multibyte character. Instead, Emacs now |
| 634 | consults the Unicode character properties to determine which | 634 | consults the Unicode character properties to determine which |
| 635 | characters are printable. In particular, surrogates and unassigned | 635 | characters are graphic or printable. In particular, surrogates and |
| 636 | codepoints are now rejected by this class. If you want the old | 636 | unassigned codepoints are now rejected. If you want the old behavior, |
| 637 | behavior, use [:multibyte:] instead. | 637 | use [:multibyte:] instead. |
| 638 | 638 | ||
| 639 | 639 | ||
| 640 | * New Modes and Packages in Emacs 25.1 | 640 | * New Modes and Packages in Emacs 25.1 |
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el index a5a228e5876..ab9beb60928 100644 --- a/lisp/emacs-lisp/rx.el +++ b/lisp/emacs-lisp/rx.el | |||
| @@ -965,12 +965,12 @@ CHAR | |||
| 965 | matches space and tab only. | 965 | matches space and tab only. |
| 966 | 966 | ||
| 967 | `graphic', `graph' | 967 | `graphic', `graph' |
| 968 | matches graphic characters--everything except ASCII control chars, | 968 | matches graphic characters--everything except space, ASCII |
| 969 | space, and DEL. | 969 | and non-ASCII control characters, surrogates, and codepoints |
| 970 | unassigned by Unicode. | ||
| 970 | 971 | ||
| 971 | `printing', `print' | 972 | `printing', `print' |
| 972 | matches printing characters--everything except ASCII and non-ASCII | 973 | matches space and graphic characters. |
| 973 | control characters, surrogates, and codepoints unassigned by Unicode. | ||
| 974 | 974 | ||
| 975 | `alphanumeric', `alnum' | 975 | `alphanumeric', `alnum' |
| 976 | matches alphabetic characters and digits. (For multibyte characters, | 976 | matches alphabetic characters and digits. (For multibyte characters, |
diff --git a/src/character.c b/src/character.c index b357dd5a334..ea98cf68e6c 100644 --- a/src/character.c +++ b/src/character.c | |||
| @@ -1022,6 +1022,14 @@ decimalnump (int c) | |||
| 1022 | return gen_cat == UNICODE_CATEGORY_Nd; | 1022 | return gen_cat == UNICODE_CATEGORY_Nd; |
| 1023 | } | 1023 | } |
| 1024 | 1024 | ||
| 1025 | /* Return 'true' if C is a graphic character as defined by its | ||
| 1026 | Unicode properties. */ | ||
| 1027 | bool | ||
| 1028 | graphicp (int c) | ||
| 1029 | { | ||
| 1030 | return c == ' ' || printablep (c); | ||
| 1031 | } | ||
| 1032 | |||
| 1025 | /* Return 'true' if C is a printable character as defined by its | 1033 | /* Return 'true' if C is a printable character as defined by its |
| 1026 | Unicode properties. */ | 1034 | Unicode properties. */ |
| 1027 | bool | 1035 | bool |
diff --git a/src/character.h b/src/character.h index 1a5d2c8a670..859d717a0ba 100644 --- a/src/character.h +++ b/src/character.h | |||
| @@ -662,6 +662,7 @@ extern Lisp_Object string_escape_byte8 (Lisp_Object); | |||
| 662 | 662 | ||
| 663 | extern bool alphabeticp (int); | 663 | extern bool alphabeticp (int); |
| 664 | extern bool decimalnump (int); | 664 | extern bool decimalnump (int); |
| 665 | extern bool graphicp (int); | ||
| 665 | extern bool printablep (int); | 666 | extern bool printablep (int); |
| 666 | 667 | ||
| 667 | /* Return a translation table of id number ID. */ | 668 | /* Return a translation table of id number ID. */ |
diff --git a/src/regex.c b/src/regex.c index b9d09d02c22..4af70c62cf5 100644 --- a/src/regex.c +++ b/src/regex.c | |||
| @@ -314,7 +314,7 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 }; | |||
| 314 | 314 | ||
| 315 | # define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \ | 315 | # define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \ |
| 316 | ? (c) > ' ' && !((c) >= 0177 && (c) <= 0237) \ | 316 | ? (c) > ' ' && !((c) >= 0177 && (c) <= 0237) \ |
| 317 | : 1) | 317 | : graphicp (c)) |
| 318 | 318 | ||
| 319 | # define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \ | 319 | # define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \ |
| 320 | ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \ | 320 | ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \ |
| @@ -1875,7 +1875,8 @@ struct range_table_work_area | |||
| 1875 | #define BIT_MULTIBYTE 0x20 | 1875 | #define BIT_MULTIBYTE 0x20 |
| 1876 | #define BIT_ALPHA 0x40 | 1876 | #define BIT_ALPHA 0x40 |
| 1877 | #define BIT_ALNUM 0x80 | 1877 | #define BIT_ALNUM 0x80 |
| 1878 | #define BIT_PRINT 0x100 | 1878 | #define BIT_GRAPH 0x100 |
| 1879 | #define BIT_PRINT 0x200 | ||
| 1879 | 1880 | ||
| 1880 | 1881 | ||
| 1881 | /* Set the bit for character C in a list. */ | 1882 | /* Set the bit for character C in a list. */ |
| @@ -2074,7 +2075,7 @@ re_wctype_to_bit (re_wctype_t cc) | |||
| 2074 | { | 2075 | { |
| 2075 | switch (cc) | 2076 | switch (cc) |
| 2076 | { | 2077 | { |
| 2077 | case RECC_NONASCII: case RECC_GRAPH: | 2078 | case RECC_NONASCII: |
| 2078 | case RECC_MULTIBYTE: return BIT_MULTIBYTE; | 2079 | case RECC_MULTIBYTE: return BIT_MULTIBYTE; |
| 2079 | case RECC_ALPHA: return BIT_ALPHA; | 2080 | case RECC_ALPHA: return BIT_ALPHA; |
| 2080 | case RECC_ALNUM: return BIT_ALNUM; | 2081 | case RECC_ALNUM: return BIT_ALNUM; |
| @@ -2083,6 +2084,7 @@ re_wctype_to_bit (re_wctype_t cc) | |||
| 2083 | case RECC_UPPER: return BIT_UPPER; | 2084 | case RECC_UPPER: return BIT_UPPER; |
| 2084 | case RECC_PUNCT: return BIT_PUNCT; | 2085 | case RECC_PUNCT: return BIT_PUNCT; |
| 2085 | case RECC_SPACE: return BIT_SPACE; | 2086 | case RECC_SPACE: return BIT_SPACE; |
| 2087 | case RECC_GRAPH: return BIT_GRAPH; | ||
| 2086 | case RECC_PRINT: return BIT_PRINT; | 2088 | case RECC_PRINT: return BIT_PRINT; |
| 2087 | case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: | 2089 | case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: |
| 2088 | case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; | 2090 | case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; |
| @@ -5522,7 +5524,9 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const_re_char *string1, | |||
| 5522 | | (class_bits & BIT_UPPER && ISUPPER (c)) | 5524 | | (class_bits & BIT_UPPER && ISUPPER (c)) |
| 5523 | | (class_bits & BIT_WORD && ISWORD (c)) | 5525 | | (class_bits & BIT_WORD && ISWORD (c)) |
| 5524 | | (class_bits & BIT_ALPHA && ISALPHA (c)) | 5526 | | (class_bits & BIT_ALPHA && ISALPHA (c)) |
| 5525 | | (class_bits & BIT_ALNUM && ISALNUM (c))) | 5527 | | (class_bits & BIT_ALNUM && ISALNUM (c)) |
| 5528 | | (class_bits & BIT_GRAPH && ISGRAPH (c)) | ||
| 5529 | | (class_bits & BIT_PRINT && ISPRINT (c))) | ||
| 5526 | not = !not; | 5530 | not = !not; |
| 5527 | else | 5531 | else |
| 5528 | CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count); | 5532 | CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count); |