diff options
| author | Eli Zaretskii | 2015-04-14 18:47:04 +0300 |
|---|---|---|
| committer | Eli Zaretskii | 2015-04-14 18:47:04 +0300 |
| commit | 6c284c6b5828bc4407f7201499e0507ce0e5a0a0 (patch) | |
| tree | 0e89f736a55245dc7e59725e57effb3b36b6dfe8 /src | |
| parent | 8802474a219ad3be01825466a8837d3775f8b31b (diff) | |
| download | emacs-6c284c6b5828bc4407f7201499e0507ce0e5a0a0.tar.gz emacs-6c284c6b5828bc4407f7201499e0507ce0e5a0a0.zip | |
Make [:print:] support non-ASCII characters correctly
* src/regex.c (ISPRINT): Call 'printablep' for multibyte characters.
(BIT_PRINT): New bit mask.
(re_wctype_to_bit): Return BIT_PRINT for RECC_PRINT.
* src/character.c (printablep): New function.
* src/character.h (printablep): Add prototype.
* lisp/emacs-lisp/rx.el (rx): Doc fix: document the new behavior
of 'print', 'alnum', and 'alphabetic'.
* doc/lispref/searching.texi (Char Classes): Document the new
behavior of [:print:].
* etc/NEWS: Mention the new behavior of [:print:].
Diffstat (limited to 'src')
| -rw-r--r-- | src/character.c | 16 | ||||
| -rw-r--r-- | src/character.h | 1 | ||||
| -rw-r--r-- | src/regex.c | 9 |
3 files changed, 23 insertions, 3 deletions
diff --git a/src/character.c b/src/character.c index ad78f512f43..b357dd5a334 100644 --- a/src/character.c +++ b/src/character.c | |||
| @@ -1022,6 +1022,22 @@ decimalnump (int c) | |||
| 1022 | return gen_cat == UNICODE_CATEGORY_Nd; | 1022 | return gen_cat == UNICODE_CATEGORY_Nd; |
| 1023 | } | 1023 | } |
| 1024 | 1024 | ||
| 1025 | /* Return 'true' if C is a printable character as defined by its | ||
| 1026 | Unicode properties. */ | ||
| 1027 | bool | ||
| 1028 | printablep (int c) | ||
| 1029 | { | ||
| 1030 | Lisp_Object category = CHAR_TABLE_REF (Vunicode_category_table, c); | ||
| 1031 | if (! INTEGERP (category)) | ||
| 1032 | return false; | ||
| 1033 | EMACS_INT gen_cat = XINT (category); | ||
| 1034 | |||
| 1035 | /* See UTS #18. */ | ||
| 1036 | return (!(gen_cat == UNICODE_CATEGORY_Cc /* control */ | ||
| 1037 | || gen_cat == UNICODE_CATEGORY_Cs /* surrogate */ | ||
| 1038 | || gen_cat == UNICODE_CATEGORY_Cn)); /* unassigned */ | ||
| 1039 | } | ||
| 1040 | |||
| 1025 | void | 1041 | void |
| 1026 | syms_of_character (void) | 1042 | syms_of_character (void) |
| 1027 | { | 1043 | { |
diff --git a/src/character.h b/src/character.h index 7d902952db6..1a5d2c8a670 100644 --- a/src/character.h +++ b/src/character.h | |||
| @@ -662,6 +662,7 @@ extern Lisp_Object string_escape_byte8 (Lisp_Object); | |||
| 662 | 662 | ||
| 663 | extern bool alphabeticp (int); | 663 | extern bool alphabeticp (int); |
| 664 | extern bool decimalnump (int); | 664 | extern bool decimalnump (int); |
| 665 | extern bool printablep (int); | ||
| 665 | 666 | ||
| 666 | /* Return a translation table of id number ID. */ | 667 | /* Return a translation table of id number ID. */ |
| 667 | #define GET_TRANSLATION_TABLE(id) \ | 668 | #define GET_TRANSLATION_TABLE(id) \ |
diff --git a/src/regex.c b/src/regex.c index 1afc5037594..b9d09d02c22 100644 --- a/src/regex.c +++ b/src/regex.c | |||
| @@ -318,7 +318,7 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 }; | |||
| 318 | 318 | ||
| 319 | # define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \ | 319 | # define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \ |
| 320 | ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \ | 320 | ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \ |
| 321 | : 1) | 321 | : printablep (c)) |
| 322 | 322 | ||
| 323 | # define ISALNUM(c) (IS_REAL_ASCII (c) \ | 323 | # define ISALNUM(c) (IS_REAL_ASCII (c) \ |
| 324 | ? (((c) >= 'a' && (c) <= 'z') \ | 324 | ? (((c) >= 'a' && (c) <= 'z') \ |
| @@ -1865,7 +1865,8 @@ struct range_table_work_area | |||
| 1865 | #define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i]) | 1865 | #define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i]) |
| 1866 | 1866 | ||
| 1867 | /* Bits used to implement the multibyte-part of the various character classes | 1867 | /* Bits used to implement the multibyte-part of the various character classes |
| 1868 | such as [:alnum:] in a charset's range table. */ | 1868 | such as [:alnum:] in a charset's range table. The code currently assumes |
| 1869 | that only the low 16 bits are used. */ | ||
| 1869 | #define BIT_WORD 0x1 | 1870 | #define BIT_WORD 0x1 |
| 1870 | #define BIT_LOWER 0x2 | 1871 | #define BIT_LOWER 0x2 |
| 1871 | #define BIT_PUNCT 0x4 | 1872 | #define BIT_PUNCT 0x4 |
| @@ -1874,6 +1875,7 @@ struct range_table_work_area | |||
| 1874 | #define BIT_MULTIBYTE 0x20 | 1875 | #define BIT_MULTIBYTE 0x20 |
| 1875 | #define BIT_ALPHA 0x40 | 1876 | #define BIT_ALPHA 0x40 |
| 1876 | #define BIT_ALNUM 0x80 | 1877 | #define BIT_ALNUM 0x80 |
| 1878 | #define BIT_PRINT 0x100 | ||
| 1877 | 1879 | ||
| 1878 | 1880 | ||
| 1879 | /* Set the bit for character C in a list. */ | 1881 | /* Set the bit for character C in a list. */ |
| @@ -2072,7 +2074,7 @@ re_wctype_to_bit (re_wctype_t cc) | |||
| 2072 | { | 2074 | { |
| 2073 | switch (cc) | 2075 | switch (cc) |
| 2074 | { | 2076 | { |
| 2075 | case RECC_NONASCII: case RECC_PRINT: case RECC_GRAPH: | 2077 | case RECC_NONASCII: case RECC_GRAPH: |
| 2076 | case RECC_MULTIBYTE: return BIT_MULTIBYTE; | 2078 | case RECC_MULTIBYTE: return BIT_MULTIBYTE; |
| 2077 | case RECC_ALPHA: return BIT_ALPHA; | 2079 | case RECC_ALPHA: return BIT_ALPHA; |
| 2078 | case RECC_ALNUM: return BIT_ALNUM; | 2080 | case RECC_ALNUM: return BIT_ALNUM; |
| @@ -2081,6 +2083,7 @@ re_wctype_to_bit (re_wctype_t cc) | |||
| 2081 | case RECC_UPPER: return BIT_UPPER; | 2083 | case RECC_UPPER: return BIT_UPPER; |
| 2082 | case RECC_PUNCT: return BIT_PUNCT; | 2084 | case RECC_PUNCT: return BIT_PUNCT; |
| 2083 | case RECC_SPACE: return BIT_SPACE; | 2085 | case RECC_SPACE: return BIT_SPACE; |
| 2086 | case RECC_PRINT: return BIT_PRINT; | ||
| 2084 | case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: | 2087 | case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: |
| 2085 | case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; | 2088 | case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; |
| 2086 | default: | 2089 | default: |