diff options
| author | Paul Eggert | 2020-04-17 07:57:25 -0700 |
|---|---|---|
| committer | Paul Eggert | 2020-04-17 09:17:35 -0700 |
| commit | 3e46a2315f1a999f5811f57a60a2a55f95d8fbb0 (patch) | |
| tree | 64e35b78bb77f3eba5148650fb7a98bbda7f14d6 /src | |
| parent | 7f1dae114dffbf4bdec60e38ada4eb0673cfb4e2 (diff) | |
| download | emacs-3e46a2315f1a999f5811f57a60a2a55f95d8fbb0.tar.gz emacs-3e46a2315f1a999f5811f57a60a2a55f95d8fbb0.zip | |
Prefer inline functions in character.h
In character.h, replace macros with inline functions or enums
when this is easy. This improves maintainability and
on my platform (Fedora 31 x86-64, gcc -O2) improved CPU
performance very slightly (0.3%) on ‘make compile-always’.
* src/buffer.h (SANE_TAB_WIDTH, CHARACTER_WIDTH):
Move here from character.h, and make them inline functions.
Tune CHARACTER_WIDTH so that ASCII_CHAR_WIDTH is no longer needed.
(sanitize_tab_width, sanitize_char_width):
Move here from character.h.
* src/character.h (MAX_CHAR, MAX_UNICODE_CHAR, MAX_1_BYTE_CHAR)
(MAX_2_BYTE_CHAR, MAX_3_BYTE_CHAR, MAX_4_BYTE_CHAR)
(MAX_5_BYTE_CHAR, MIN_MULTIBYTE_LEADING_CODE)
(MAX_MULTIBYTE_LEADING_CODE, MAX_MULTIBYTE_LENGTH):
Now enum constants instead of macros.
* src/character.h (CHAR_BYTES): Redo to avoid conditional branches.
(CHAR_BYTE8_P, BYTE8_TO_CHAR, UNIBYTE_TO_CHAR, CHAR_TO_BYTE8)
(CHAR_TO_BYTE_SAFE, CHAR_BYTE8_HEAD_P, CHARACTERP)
(CHECK_CHARACTER, CHECK_CHARACTER_CAR, CHECK_CHARACTER_CDR)
(CHAR_PRINTABLE_P, CHAR_BYTES, CHAR_LEADING_CODE, BYTE8_STRING)
(LEADING_CODE_P, TRAILING_CODE_P, CHAR_HEAD_P)
(BYTES_BY_CHAR_HEAD):
Now inline functions instead of macros.
(ASCII_CHAR_WIDTH): Remove; no longer used.
* src/conf_post.h (ATTRIBUTE_PURE): New macro.
* src/lisp.h (char_table_ref): Use it, for better inlining.
* src/fns.c (base64_decode_1): Add now-necessary casts.
Diffstat (limited to 'src')
| -rw-r--r-- | src/buffer.h | 39 | ||||
| -rw-r--r-- | src/character.h | 337 | ||||
| -rw-r--r-- | src/conf_post.h | 1 | ||||
| -rw-r--r-- | src/fns.c | 6 | ||||
| -rw-r--r-- | src/lisp.h | 2 |
5 files changed, 224 insertions, 161 deletions
diff --git a/src/buffer.h b/src/buffer.h index abb1294d038..9875b8a447b 100644 --- a/src/buffer.h +++ b/src/buffer.h | |||
| @@ -1523,6 +1523,45 @@ lowercasep (int c) | |||
| 1523 | return !uppercasep (c) && upcase (c) != c; | 1523 | return !uppercasep (c) && upcase (c) != c; |
| 1524 | } | 1524 | } |
| 1525 | 1525 | ||
| 1526 | /* Return a non-outlandish value for the tab width. */ | ||
| 1527 | |||
| 1528 | INLINE int | ||
| 1529 | sanitize_tab_width (Lisp_Object width) | ||
| 1530 | { | ||
| 1531 | return (FIXNUMP (width) && 0 < XFIXNUM (width) && XFIXNUM (width) <= 1000 | ||
| 1532 | ? XFIXNUM (width) : 8); | ||
| 1533 | } | ||
| 1534 | |||
| 1535 | INLINE int | ||
| 1536 | SANE_TAB_WIDTH (struct buffer *buf) | ||
| 1537 | { | ||
| 1538 | return sanitize_tab_width (BVAR (buf, tab_width)); | ||
| 1539 | } | ||
| 1540 | |||
| 1541 | /* Return a non-outlandish value for a character width. */ | ||
| 1542 | |||
| 1543 | INLINE int | ||
| 1544 | sanitize_char_width (EMACS_INT width) | ||
| 1545 | { | ||
| 1546 | return 0 <= width && width <= 1000 ? width : 1000; | ||
| 1547 | } | ||
| 1548 | |||
| 1549 | /* Return the width of character C. The width is measured by how many | ||
| 1550 | columns C will occupy on the screen when displayed in the current | ||
| 1551 | buffer. The name CHARACTER_WIDTH avoids a collision with <limits.h> | ||
| 1552 | CHAR_WIDTH. */ | ||
| 1553 | |||
| 1554 | INLINE int | ||
| 1555 | CHARACTER_WIDTH (int c) | ||
| 1556 | { | ||
| 1557 | return (0x20 <= c && c < 0x7f ? 1 | ||
| 1558 | : 0x7f < c ? (sanitize_char_width | ||
| 1559 | (XFIXNUM (CHAR_TABLE_REF (Vchar_width_table, c)))) | ||
| 1560 | : c == '\t' ? SANE_TAB_WIDTH (current_buffer) | ||
| 1561 | : c == '\n' ? 0 | ||
| 1562 | : !NILP (BVAR (current_buffer, ctl_arrow)) ? 2 : 4); | ||
| 1563 | } | ||
| 1564 | |||
| 1526 | INLINE_HEADER_END | 1565 | INLINE_HEADER_END |
| 1527 | 1566 | ||
| 1528 | #endif /* EMACS_BUFFER_H */ | 1567 | #endif /* EMACS_BUFFER_H */ |
diff --git a/src/character.h b/src/character.h index 7639b018cb0..d4bc718af72 100644 --- a/src/character.h +++ b/src/character.h | |||
| @@ -43,23 +43,23 @@ INLINE_HEADER_BEGIN | |||
| 43 | */ | 43 | */ |
| 44 | 44 | ||
| 45 | /* Maximum character code ((1 << CHARACTERBITS) - 1). */ | 45 | /* Maximum character code ((1 << CHARACTERBITS) - 1). */ |
| 46 | #define MAX_CHAR 0x3FFFFF | 46 | enum { MAX_CHAR = 0x3FFFFF }; |
| 47 | 47 | ||
| 48 | /* Maximum Unicode character code. */ | 48 | /* Maximum Unicode character code. */ |
| 49 | #define MAX_UNICODE_CHAR 0x10FFFF | 49 | enum { MAX_UNICODE_CHAR = 0x10FFFF }; |
| 50 | 50 | ||
| 51 | /* Maximum N-byte character codes. */ | 51 | /* Maximum N-byte character codes. */ |
| 52 | #define MAX_1_BYTE_CHAR 0x7F | 52 | enum { MAX_1_BYTE_CHAR = 0x7F }; |
| 53 | #define MAX_2_BYTE_CHAR 0x7FF | 53 | enum { MAX_2_BYTE_CHAR = 0x7FF }; |
| 54 | #define MAX_3_BYTE_CHAR 0xFFFF | 54 | enum { MAX_3_BYTE_CHAR = 0xFFFF }; |
| 55 | #define MAX_4_BYTE_CHAR 0x1FFFFF | 55 | enum { MAX_4_BYTE_CHAR = 0x1FFFFF }; |
| 56 | #define MAX_5_BYTE_CHAR 0x3FFF7F | 56 | enum { MAX_5_BYTE_CHAR = 0x3FFF7F }; |
| 57 | 57 | ||
| 58 | /* Minimum leading code of multibyte characters. */ | 58 | /* Minimum leading code of multibyte characters. */ |
| 59 | #define MIN_MULTIBYTE_LEADING_CODE 0xC0 | 59 | enum { MIN_MULTIBYTE_LEADING_CODE = 0xC0 }; |
| 60 | /* Maximum leading code of multibyte characters. Note: this must be | 60 | /* Maximum leading code of multibyte characters. Note: this must be |
| 61 | updated if we ever increase MAX_CHAR above. */ | 61 | updated if we ever increase MAX_CHAR above. */ |
| 62 | #define MAX_MULTIBYTE_LEADING_CODE 0xF8 | 62 | enum { MAX_MULTIBYTE_LEADING_CODE = 0xF8 }; |
| 63 | 63 | ||
| 64 | /* Unicode character values. */ | 64 | /* Unicode character values. */ |
| 65 | enum | 65 | enum |
| @@ -81,33 +81,57 @@ enum | |||
| 81 | }; | 81 | }; |
| 82 | 82 | ||
| 83 | extern int char_string (unsigned, unsigned char *); | 83 | extern int char_string (unsigned, unsigned char *); |
| 84 | extern int string_char (const unsigned char *, | ||
| 85 | const unsigned char **, int *); | ||
| 84 | 86 | ||
| 85 | /* UTF-8 encodings. Use \x escapes, so they are portable to pre-C11 | 87 | /* UTF-8 encodings. Use \x escapes, so they are portable to pre-C11 |
| 86 | compilers and can be concatenated with ordinary string literals. */ | 88 | compilers and can be concatenated with ordinary string literals. */ |
| 87 | #define uLSQM "\xE2\x80\x98" /* U+2018 LEFT SINGLE QUOTATION MARK */ | 89 | #define uLSQM "\xE2\x80\x98" /* U+2018 LEFT SINGLE QUOTATION MARK */ |
| 88 | #define uRSQM "\xE2\x80\x99" /* U+2019 RIGHT SINGLE QUOTATION MARK */ | 90 | #define uRSQM "\xE2\x80\x99" /* U+2019 RIGHT SINGLE QUOTATION MARK */ |
| 89 | 91 | ||
| 90 | /* Nonzero iff C is a character that corresponds to a raw 8-bit | 92 | /* True iff C is a character that corresponds to a raw 8-bit |
| 91 | byte. */ | 93 | byte. */ |
| 92 | #define CHAR_BYTE8_P(c) ((c) > MAX_5_BYTE_CHAR) | 94 | INLINE bool |
| 95 | CHAR_BYTE8_P (int c) | ||
| 96 | { | ||
| 97 | return MAX_5_BYTE_CHAR < c; | ||
| 98 | } | ||
| 93 | 99 | ||
| 94 | /* Return the character code for raw 8-bit byte BYTE. */ | 100 | /* Return the character code for raw 8-bit byte BYTE. */ |
| 95 | #define BYTE8_TO_CHAR(byte) ((byte) + 0x3FFF00) | 101 | INLINE int |
| 102 | BYTE8_TO_CHAR (int byte) | ||
| 103 | { | ||
| 104 | return byte + 0x3FFF00; | ||
| 105 | } | ||
| 96 | 106 | ||
| 97 | #define UNIBYTE_TO_CHAR(byte) \ | 107 | INLINE int |
| 98 | (ASCII_CHAR_P (byte) ? (byte) : BYTE8_TO_CHAR (byte)) | 108 | UNIBYTE_TO_CHAR (int byte) |
| 109 | { | ||
| 110 | return ASCII_CHAR_P (byte) ? byte : BYTE8_TO_CHAR (byte); | ||
| 111 | } | ||
| 99 | 112 | ||
| 100 | /* Return the raw 8-bit byte for character C. */ | 113 | /* Return the raw 8-bit byte for character C. */ |
| 101 | #define CHAR_TO_BYTE8(c) (CHAR_BYTE8_P (c) ? (c) - 0x3FFF00 : (c & 0xFF)) | 114 | INLINE int |
| 115 | CHAR_TO_BYTE8 (int c) | ||
| 116 | { | ||
| 117 | return CHAR_BYTE8_P (c) ? c - 0x3FFF00 : c & 0xFF; | ||
| 118 | } | ||
| 102 | 119 | ||
| 103 | /* Return the raw 8-bit byte for character C, | 120 | /* Return the raw 8-bit byte for character C, |
| 104 | or -1 if C doesn't correspond to a byte. */ | 121 | or -1 if C doesn't correspond to a byte. */ |
| 105 | #define CHAR_TO_BYTE_SAFE(c) \ | 122 | INLINE int |
| 106 | (ASCII_CHAR_P (c) ? c : (CHAR_BYTE8_P (c) ? (c) - 0x3FFF00 : -1)) | 123 | CHAR_TO_BYTE_SAFE (int c) |
| 124 | { | ||
| 125 | return ASCII_CHAR_P (c) ? c : CHAR_BYTE8_P (c) ? c - 0x3FFF00 : -1; | ||
| 126 | } | ||
| 107 | 127 | ||
| 108 | /* Nonzero iff BYTE is the 1st byte of a multibyte form of a character | 128 | /* True iff BYTE is the 1st byte of a multibyte form of a character |
| 109 | that corresponds to a raw 8-bit byte. */ | 129 | that corresponds to a raw 8-bit byte. */ |
| 110 | #define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 || (byte) == 0xC1) | 130 | INLINE bool |
| 131 | CHAR_BYTE8_HEAD_P (int byte) | ||
| 132 | { | ||
| 133 | return byte == 0xC0 || byte == 0xC1; | ||
| 134 | } | ||
| 111 | 135 | ||
| 112 | /* If C is not ASCII, make it unibyte. */ | 136 | /* If C is not ASCII, make it unibyte. */ |
| 113 | #define MAKE_CHAR_UNIBYTE(c) \ | 137 | #define MAKE_CHAR_UNIBYTE(c) \ |
| @@ -122,10 +146,7 @@ extern int char_string (unsigned, unsigned char *); | |||
| 122 | (eassert ((c) >= 0 && (c) < 256), (c) = UNIBYTE_TO_CHAR (c)) | 146 | (eassert ((c) >= 0 && (c) < 256), (c) = UNIBYTE_TO_CHAR (c)) |
| 123 | 147 | ||
| 124 | /* This is the maximum byte length of multibyte form. */ | 148 | /* This is the maximum byte length of multibyte form. */ |
| 125 | #define MAX_MULTIBYTE_LENGTH 5 | 149 | enum { MAX_MULTIBYTE_LENGTH = 5 }; |
| 126 | |||
| 127 | /* Nonzero iff X is a character. */ | ||
| 128 | #define CHARACTERP(x) (FIXNATP (x) && XFIXNAT (x) <= MAX_CHAR) | ||
| 129 | 150 | ||
| 130 | /* Nonzero iff C is valid as a character code. */ | 151 | /* Nonzero iff C is valid as a character code. */ |
| 131 | INLINE bool | 152 | INLINE bool |
| @@ -134,52 +155,69 @@ CHAR_VALID_P (intmax_t c) | |||
| 134 | return 0 <= c && c <= MAX_CHAR; | 155 | return 0 <= c && c <= MAX_CHAR; |
| 135 | } | 156 | } |
| 136 | 157 | ||
| 158 | /* Nonzero iff X is a character. */ | ||
| 159 | INLINE bool | ||
| 160 | CHARACTERP (Lisp_Object x) | ||
| 161 | { | ||
| 162 | return FIXNUMP (x) && CHAR_VALID_P (XFIXNUM (x)); | ||
| 163 | } | ||
| 164 | |||
| 137 | /* Check if Lisp object X is a character or not. */ | 165 | /* Check if Lisp object X is a character or not. */ |
| 138 | #define CHECK_CHARACTER(x) \ | 166 | INLINE void |
| 139 | CHECK_TYPE (CHARACTERP (x), Qcharacterp, x) | 167 | CHECK_CHARACTER (Lisp_Object x) |
| 168 | { | ||
| 169 | CHECK_TYPE (CHARACTERP (x), Qcharacterp, x); | ||
| 170 | } | ||
| 140 | 171 | ||
| 141 | #define CHECK_CHARACTER_CAR(x) \ | 172 | INLINE void |
| 142 | do { \ | 173 | CHECK_CHARACTER_CAR (Lisp_Object x) |
| 143 | Lisp_Object tmp = XCAR (x); \ | 174 | { |
| 144 | CHECK_CHARACTER (tmp); \ | 175 | CHECK_CHARACTER (XCAR (x)); |
| 145 | } while (false) | 176 | } |
| 146 | 177 | ||
| 147 | #define CHECK_CHARACTER_CDR(x) \ | 178 | INLINE void |
| 148 | do { \ | 179 | CHECK_CHARACTER_CDR (Lisp_Object x) |
| 149 | Lisp_Object tmp = XCDR (x); \ | 180 | { |
| 150 | CHECK_CHARACTER (tmp); \ | 181 | CHECK_CHARACTER (XCDR (x)); |
| 151 | } while (false) | 182 | } |
| 152 | 183 | ||
| 153 | /* Nonzero iff C is a character of code less than 0x100. */ | 184 | /* True iff C is a character of code less than 0x100. */ |
| 154 | INLINE bool | 185 | INLINE bool |
| 155 | SINGLE_BYTE_CHAR_P (intmax_t c) | 186 | SINGLE_BYTE_CHAR_P (intmax_t c) |
| 156 | { | 187 | { |
| 157 | return 0 <= c && c < 0x100; | 188 | return 0 <= c && c < 0x100; |
| 158 | } | 189 | } |
| 159 | 190 | ||
| 160 | /* Nonzero if character C has a printable glyph. */ | 191 | /* True if character C has a printable glyph. */ |
| 161 | #define CHAR_PRINTABLE_P(c) \ | 192 | INLINE bool |
| 162 | (((c) >= 32 && (c) < 127) \ | 193 | CHAR_PRINTABLE_P (int c) |
| 163 | || ! NILP (CHAR_TABLE_REF (Vprintable_chars, (c)))) | 194 | { |
| 195 | return ((32 <= c && c < 127) | ||
| 196 | || ! NILP (CHAR_TABLE_REF (Vprintable_chars, c))); | ||
| 197 | } | ||
| 164 | 198 | ||
| 165 | /* Return byte length of multibyte form for character C. */ | 199 | /* Return byte length of multibyte form for character C. */ |
| 166 | #define CHAR_BYTES(c) \ | 200 | INLINE int |
| 167 | ( (c) <= MAX_1_BYTE_CHAR ? 1 \ | 201 | CHAR_BYTES (int c) |
| 168 | : (c) <= MAX_2_BYTE_CHAR ? 2 \ | 202 | { |
| 169 | : (c) <= MAX_3_BYTE_CHAR ? 3 \ | 203 | return ((MAX_5_BYTE_CHAR < c ? -2 : 1) |
| 170 | : (c) <= MAX_4_BYTE_CHAR ? 4 \ | 204 | + (MAX_1_BYTE_CHAR < c) |
| 171 | : (c) <= MAX_5_BYTE_CHAR ? 5 \ | 205 | + (MAX_2_BYTE_CHAR < c) |
| 172 | : 2) | 206 | + (MAX_3_BYTE_CHAR < c) |
| 173 | 207 | + (MAX_4_BYTE_CHAR < c)); | |
| 208 | } | ||
| 174 | 209 | ||
| 175 | /* Return the leading code of multibyte form of C. */ | 210 | /* Return the leading code of multibyte form of C. */ |
| 176 | #define CHAR_LEADING_CODE(c) \ | 211 | INLINE int |
| 177 | ((c) <= MAX_1_BYTE_CHAR ? c \ | 212 | CHAR_LEADING_CODE (int c) |
| 178 | : (c) <= MAX_2_BYTE_CHAR ? (0xC0 | ((c) >> 6)) \ | 213 | { |
| 179 | : (c) <= MAX_3_BYTE_CHAR ? (0xE0 | ((c) >> 12)) \ | 214 | return (c <= MAX_1_BYTE_CHAR ? c |
| 180 | : (c) <= MAX_4_BYTE_CHAR ? (0xF0 | ((c) >> 18)) \ | 215 | : c <= MAX_2_BYTE_CHAR ? 0xC0 | (c >> 6) |
| 181 | : (c) <= MAX_5_BYTE_CHAR ? 0xF8 \ | 216 | : c <= MAX_3_BYTE_CHAR ? 0xE0 | (c >> 12) |
| 182 | : (0xC0 | (((c) >> 6) & 0x01))) | 217 | : c <= MAX_4_BYTE_CHAR ? 0xF0 | (c >> 18) |
| 218 | : c <= MAX_5_BYTE_CHAR ? 0xF8 | ||
| 219 | : 0xC0 | ((c >> 6) & 0x01)); | ||
| 220 | } | ||
| 183 | 221 | ||
| 184 | 222 | ||
| 185 | /* Store multibyte form of the character C in P. The caller should | 223 | /* Store multibyte form of the character C in P. The caller should |
| @@ -217,10 +255,13 @@ CHAR_STRING (int c, unsigned char *p) | |||
| 217 | least MAX_MULTIBYTE_LENGTH bytes area at P in advance. Returns the | 255 | least MAX_MULTIBYTE_LENGTH bytes area at P in advance. Returns the |
| 218 | length of the multibyte form. */ | 256 | length of the multibyte form. */ |
| 219 | 257 | ||
| 220 | #define BYTE8_STRING(b, p) \ | 258 | INLINE int |
| 221 | ((p)[0] = (0xC0 | (((b) >> 6) & 0x01)), \ | 259 | BYTE8_STRING (int b, unsigned char *p) |
| 222 | (p)[1] = (0x80 | ((b) & 0x3F)), \ | 260 | { |
| 223 | 2) | 261 | p[0] = 0xC0 | ((b >> 6) & 0x01); |
| 262 | p[1] = 0x80 | (b & 0x3F); | ||
| 263 | return 2; | ||
| 264 | } | ||
| 224 | 265 | ||
| 225 | 266 | ||
| 226 | /* Store multibyte form of the character C in P and advance P to the | 267 | /* Store multibyte form of the character C in P and advance P to the |
| @@ -246,28 +287,42 @@ CHAR_STRING (int c, unsigned char *p) | |||
| 246 | } while (false) | 287 | } while (false) |
| 247 | 288 | ||
| 248 | 289 | ||
| 249 | /* Nonzero iff BYTE starts a non-ASCII character in a multibyte | 290 | /* True iff BYTE starts a non-ASCII character in a multibyte form. */ |
| 250 | form. */ | 291 | INLINE bool |
| 251 | #define LEADING_CODE_P(byte) (((byte) & 0xC0) == 0xC0) | 292 | LEADING_CODE_P (int byte) |
| 293 | { | ||
| 294 | return (byte & 0xC0) == 0xC0; | ||
| 295 | } | ||
| 252 | 296 | ||
| 253 | /* Nonzero iff BYTE is a trailing code of a non-ASCII character in a | 297 | /* True iff BYTE is a trailing code of a non-ASCII character in a |
| 254 | multibyte form. */ | 298 | multibyte form. */ |
| 255 | #define TRAILING_CODE_P(byte) (((byte) & 0xC0) == 0x80) | 299 | INLINE bool |
| 300 | TRAILING_CODE_P (int byte) | ||
| 301 | { | ||
| 302 | return (byte & 0xC0) == 0x80; | ||
| 303 | } | ||
| 256 | 304 | ||
| 257 | /* Nonzero iff BYTE starts a character in a multibyte form. | 305 | /* True iff BYTE starts a character in a multibyte form. |
| 258 | This is equivalent to: | 306 | This is equivalent to: |
| 259 | (ASCII_CHAR_P (byte) || LEADING_CODE_P (byte)) */ | 307 | (ASCII_CHAR_P (byte) || LEADING_CODE_P (byte)) */ |
| 260 | #define CHAR_HEAD_P(byte) (((byte) & 0xC0) != 0x80) | 308 | INLINE bool |
| 309 | CHAR_HEAD_P (int byte) | ||
| 310 | { | ||
| 311 | return (byte & 0xC0) != 0x80; | ||
| 312 | } | ||
| 261 | 313 | ||
| 262 | /* How many bytes a character that starts with BYTE occupies in a | 314 | /* How many bytes a character that starts with BYTE occupies in a |
| 263 | multibyte form. Unlike MULTIBYTE_LENGTH below, this macro does not | 315 | multibyte form. Unlike MULTIBYTE_LENGTH below, this function does not |
| 264 | validate the multibyte form, but looks only at its first byte. */ | 316 | validate the multibyte form, but looks only at its first byte. */ |
| 265 | #define BYTES_BY_CHAR_HEAD(byte) \ | 317 | INLINE int |
| 266 | (!((byte) & 0x80) ? 1 \ | 318 | BYTES_BY_CHAR_HEAD (int byte) |
| 267 | : !((byte) & 0x20) ? 2 \ | 319 | { |
| 268 | : !((byte) & 0x10) ? 3 \ | 320 | return (!(byte & 0x80) ? 1 |
| 269 | : !((byte) & 0x08) ? 4 \ | 321 | : !(byte & 0x20) ? 2 |
| 270 | : 5) | 322 | : !(byte & 0x10) ? 3 |
| 323 | : !(byte & 0x08) ? 4 | ||
| 324 | : 5); | ||
| 325 | } | ||
| 271 | 326 | ||
| 272 | 327 | ||
| 273 | /* The byte length of multibyte form at unibyte string P ending at | 328 | /* The byte length of multibyte form at unibyte string P ending at |
| @@ -275,34 +330,40 @@ CHAR_STRING (int c, unsigned char *p) | |||
| 275 | return 0. Unlike BYTES_BY_CHAR_HEAD, this macro validates the | 330 | return 0. Unlike BYTES_BY_CHAR_HEAD, this macro validates the |
| 276 | multibyte form. */ | 331 | multibyte form. */ |
| 277 | 332 | ||
| 278 | #define MULTIBYTE_LENGTH(p, pend) \ | 333 | INLINE int |
| 279 | (p >= pend ? 0 \ | 334 | MULTIBYTE_LENGTH (unsigned char const *p, unsigned char const *pend) |
| 280 | : !((p)[0] & 0x80) ? 1 \ | 335 | { |
| 281 | : ((p + 1 >= pend) || (((p)[1] & 0xC0) != 0x80)) ? 0 \ | 336 | return (! (p < pend) ? 0 |
| 282 | : ((p)[0] & 0xE0) == 0xC0 ? 2 \ | 337 | : ! (p[0] & 0x80) ? 1 |
| 283 | : ((p + 2 >= pend) || (((p)[2] & 0xC0) != 0x80)) ? 0 \ | 338 | : ! (p + 1 < pend && (p[1] & 0xC0) == 0x80) ? 0 |
| 284 | : ((p)[0] & 0xF0) == 0xE0 ? 3 \ | 339 | : (p[0] & 0xE0) == 0xC0 ? 2 |
| 285 | : ((p + 3 >= pend) || (((p)[3] & 0xC0) != 0x80)) ? 0 \ | 340 | : ! (p + 2 < pend && (p[2] & 0xC0) == 0x80) ? 0 |
| 286 | : ((p)[0] & 0xF8) == 0xF0 ? 4 \ | 341 | : (p[0] & 0xF0) == 0xE0 ? 3 |
| 287 | : ((p + 4 >= pend) || (((p)[4] & 0xC0) != 0x80)) ? 0 \ | 342 | : ! (p + 3 < pend && (p[3] & 0xC0) == 0x80) ? 0 |
| 288 | : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \ | 343 | : (p[0] & 0xF8) == 0xF0 ? 4 |
| 289 | : 0) | 344 | : ! (p + 4 < pend && (p[4] & 0xC0) == 0x80) ? 0 |
| 345 | : p[0] == 0xF8 && (p[1] & 0xF0) == 0x80 ? 5 | ||
| 346 | : 0); | ||
| 347 | } | ||
| 290 | 348 | ||
| 291 | 349 | ||
| 292 | /* Like MULTIBYTE_LENGTH, but don't check the ending address. The | 350 | /* Like MULTIBYTE_LENGTH, but don't check the ending address. The |
| 293 | multibyte form is still validated, unlike BYTES_BY_CHAR_HEAD. */ | 351 | multibyte form is still validated, unlike BYTES_BY_CHAR_HEAD. */ |
| 294 | 352 | ||
| 295 | #define MULTIBYTE_LENGTH_NO_CHECK(p) \ | 353 | INLINE int |
| 296 | (!((p)[0] & 0x80) ? 1 \ | 354 | MULTIBYTE_LENGTH_NO_CHECK (unsigned char const *p) |
| 297 | : ((p)[1] & 0xC0) != 0x80 ? 0 \ | 355 | { |
| 298 | : ((p)[0] & 0xE0) == 0xC0 ? 2 \ | 356 | return (!(p[0] & 0x80) ? 1 |
| 299 | : ((p)[2] & 0xC0) != 0x80 ? 0 \ | 357 | : (p[1] & 0xC0) != 0x80 ? 0 |
| 300 | : ((p)[0] & 0xF0) == 0xE0 ? 3 \ | 358 | : (p[0] & 0xE0) == 0xC0 ? 2 |
| 301 | : ((p)[3] & 0xC0) != 0x80 ? 0 \ | 359 | : (p[2] & 0xC0) != 0x80 ? 0 |
| 302 | : ((p)[0] & 0xF8) == 0xF0 ? 4 \ | 360 | : (p[0] & 0xF0) == 0xE0 ? 3 |
| 303 | : ((p)[4] & 0xC0) != 0x80 ? 0 \ | 361 | : (p[3] & 0xC0) != 0x80 ? 0 |
| 304 | : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \ | 362 | : (p[0] & 0xF8) == 0xF0 ? 4 |
| 305 | : 0) | 363 | : (p[4] & 0xC0) != 0x80 ? 0 |
| 364 | : p[0] == 0xF8 && (p[1] & 0xF0) == 0x80 ? 5 | ||
| 365 | : 0); | ||
| 366 | } | ||
| 306 | 367 | ||
| 307 | /* If P is before LIMIT, advance P to the next character boundary. | 368 | /* If P is before LIMIT, advance P to the next character boundary. |
| 308 | Assumes that P is already at a character boundary of the same | 369 | Assumes that P is already at a character boundary of the same |
| @@ -333,18 +394,21 @@ CHAR_STRING (int c, unsigned char *p) | |||
| 333 | 394 | ||
| 334 | /* Return the character code of character whose multibyte form is at P. */ | 395 | /* Return the character code of character whose multibyte form is at P. */ |
| 335 | 396 | ||
| 336 | #define STRING_CHAR(p) \ | 397 | INLINE int |
| 337 | (!((p)[0] & 0x80) \ | 398 | STRING_CHAR (unsigned char const *p) |
| 338 | ? (p)[0] \ | 399 | { |
| 339 | : ! ((p)[0] & 0x20) \ | 400 | return (!(p[0] & 0x80) |
| 340 | ? (((((p)[0] & 0x1F) << 6) \ | 401 | ? p[0] |
| 341 | | ((p)[1] & 0x3F)) \ | 402 | : ! (p[0] & 0x20) |
| 342 | + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0)) \ | 403 | ? ((((p[0] & 0x1F) << 6) |
| 343 | : ! ((p)[0] & 0x10) \ | 404 | | (p[1] & 0x3F)) |
| 344 | ? ((((p)[0] & 0x0F) << 12) \ | 405 | + (p[0] < 0xC2 ? 0x3FFF80 : 0)) |
| 345 | | (((p)[1] & 0x3F) << 6) \ | 406 | : ! (p[0] & 0x10) |
| 346 | | ((p)[2] & 0x3F)) \ | 407 | ? (((p[0] & 0x0F) << 12) |
| 347 | : string_char ((p), NULL, NULL)) | 408 | | ((p[1] & 0x3F) << 6) |
| 409 | | (p[2] & 0x3F)) | ||
| 410 | : string_char (p, NULL, NULL)); | ||
| 411 | } | ||
| 348 | 412 | ||
| 349 | 413 | ||
| 350 | /* Like STRING_CHAR, but set ACTUAL_LEN to the length of multibyte | 414 | /* Like STRING_CHAR, but set ACTUAL_LEN to the length of multibyte |
| @@ -578,58 +642,19 @@ CHAR_STRING (int c, unsigned char *p) | |||
| 578 | } while (false) | 642 | } while (false) |
| 579 | 643 | ||
| 580 | 644 | ||
| 581 | /* Return a non-outlandish value for the tab width. */ | 645 | /* If C is a variation selector, return the index of the |
| 582 | 646 | variation selector (1..256). Otherwise, return 0. */ | |
| 583 | #define SANE_TAB_WIDTH(buf) sanitize_tab_width (BVAR (buf, tab_width)) | ||
| 584 | |||
| 585 | INLINE int | ||
| 586 | sanitize_tab_width (Lisp_Object width) | ||
| 587 | { | ||
| 588 | return (FIXNUMP (width) && 0 < XFIXNUM (width) && XFIXNUM (width) <= 1000 | ||
| 589 | ? XFIXNUM (width) : 8); | ||
| 590 | } | ||
| 591 | |||
| 592 | /* Return the width of ASCII character C. The width is measured by | ||
| 593 | how many columns C will occupy on the screen when displayed in the | ||
| 594 | current buffer. */ | ||
| 595 | |||
| 596 | #define ASCII_CHAR_WIDTH(c) \ | ||
| 597 | (c < 0x20 \ | ||
| 598 | ? (c == '\t' \ | ||
| 599 | ? SANE_TAB_WIDTH (current_buffer) \ | ||
| 600 | : (c == '\n' ? 0 : (NILP (BVAR (current_buffer, ctl_arrow)) ? 4 : 2))) \ | ||
| 601 | : (c < 0x7f \ | ||
| 602 | ? 1 \ | ||
| 603 | : ((NILP (BVAR (current_buffer, ctl_arrow)) ? 4 : 2)))) | ||
| 604 | |||
| 605 | /* Return a non-outlandish value for a character width. */ | ||
| 606 | 647 | ||
| 607 | INLINE int | 648 | INLINE int |
| 608 | sanitize_char_width (EMACS_INT width) | 649 | CHAR_VARIATION_SELECTOR_P (int c) |
| 609 | { | 650 | { |
| 610 | return 0 <= width && width <= 1000 ? width : 1000; | 651 | return (c < 0xFE00 ? 0 |
| 652 | : c <= 0xFE0F ? c - 0xFE00 + 1 | ||
| 653 | : c < 0xE0100 ? 0 | ||
| 654 | : c <= 0xE01EF ? c - 0xE0100 + 17 | ||
| 655 | : 0); | ||
| 611 | } | 656 | } |
| 612 | 657 | ||
| 613 | /* Return the width of character C. The width is measured by how many | ||
| 614 | columns C will occupy on the screen when displayed in the current | ||
| 615 | buffer. The name CHARACTER_WIDTH avoids a collision with <limits.h> | ||
| 616 | CHAR_WIDTH when enabled; see ISO/IEC TS 18661-1:2014. */ | ||
| 617 | |||
| 618 | #define CHARACTER_WIDTH(c) \ | ||
| 619 | (ASCII_CHAR_P (c) \ | ||
| 620 | ? ASCII_CHAR_WIDTH (c) \ | ||
| 621 | : sanitize_char_width (XFIXNUM (CHAR_TABLE_REF (Vchar_width_table, c)))) | ||
| 622 | |||
| 623 | /* If C is a variation selector, return the index of the | ||
| 624 | variation selector (1..256). Otherwise, return 0. */ | ||
| 625 | |||
| 626 | #define CHAR_VARIATION_SELECTOR_P(c) \ | ||
| 627 | ((c) < 0xFE00 ? 0 \ | ||
| 628 | : (c) <= 0xFE0F ? (c) - 0xFE00 + 1 \ | ||
| 629 | : (c) < 0xE0100 ? 0 \ | ||
| 630 | : (c) <= 0xE01EF ? (c) - 0xE0100 + 17 \ | ||
| 631 | : 0) | ||
| 632 | |||
| 633 | /* Return true if C is a surrogate. */ | 658 | /* Return true if C is a surrogate. */ |
| 634 | 659 | ||
| 635 | INLINE bool | 660 | INLINE bool |
| @@ -679,8 +704,6 @@ typedef enum { | |||
| 679 | } unicode_category_t; | 704 | } unicode_category_t; |
| 680 | 705 | ||
| 681 | extern EMACS_INT char_resolve_modifier_mask (EMACS_INT) ATTRIBUTE_CONST; | 706 | extern EMACS_INT char_resolve_modifier_mask (EMACS_INT) ATTRIBUTE_CONST; |
| 682 | extern int string_char (const unsigned char *, | ||
| 683 | const unsigned char **, int *); | ||
| 684 | 707 | ||
| 685 | extern int translate_char (Lisp_Object, int c); | 708 | extern int translate_char (Lisp_Object, int c); |
| 686 | extern ptrdiff_t count_size_as_multibyte (const unsigned char *, ptrdiff_t); | 709 | extern ptrdiff_t count_size_as_multibyte (const unsigned char *, ptrdiff_t); |
diff --git a/src/conf_post.h b/src/conf_post.h index 79fb4cfe8a7..8a6b32998d8 100644 --- a/src/conf_post.h +++ b/src/conf_post.h | |||
| @@ -290,6 +290,7 @@ extern int emacs_setenv_TZ (char const *); | |||
| 290 | 290 | ||
| 291 | #define ARG_NONNULL _GL_ARG_NONNULL | 291 | #define ARG_NONNULL _GL_ARG_NONNULL |
| 292 | #define ATTRIBUTE_CONST _GL_ATTRIBUTE_CONST | 292 | #define ATTRIBUTE_CONST _GL_ATTRIBUTE_CONST |
| 293 | #define ATTRIBUTE_PURE _GL_ATTRIBUTE_PURE | ||
| 293 | #define ATTRIBUTE_UNUSED _GL_UNUSED | 294 | #define ATTRIBUTE_UNUSED _GL_UNUSED |
| 294 | 295 | ||
| 295 | #if GNUC_PREREQ (3, 3, 0) && !defined __ICC | 296 | #if GNUC_PREREQ (3, 3, 0) && !defined __ICC |
| @@ -3700,7 +3700,7 @@ base64_decode_1 (const char *from, char *to, ptrdiff_t length, | |||
| 3700 | 3700 | ||
| 3701 | c = value >> 16 & 0xff; | 3701 | c = value >> 16 & 0xff; |
| 3702 | if (c & multibyte_bit) | 3702 | if (c & multibyte_bit) |
| 3703 | e += BYTE8_STRING (c, e); | 3703 | e += BYTE8_STRING (c, (unsigned char *) e); |
| 3704 | else | 3704 | else |
| 3705 | *e++ = c; | 3705 | *e++ = c; |
| 3706 | nchars++; | 3706 | nchars++; |
| @@ -3742,7 +3742,7 @@ base64_decode_1 (const char *from, char *to, ptrdiff_t length, | |||
| 3742 | 3742 | ||
| 3743 | c = value >> 8 & 0xff; | 3743 | c = value >> 8 & 0xff; |
| 3744 | if (c & multibyte_bit) | 3744 | if (c & multibyte_bit) |
| 3745 | e += BYTE8_STRING (c, e); | 3745 | e += BYTE8_STRING (c, (unsigned char *) e); |
| 3746 | else | 3746 | else |
| 3747 | *e++ = c; | 3747 | *e++ = c; |
| 3748 | nchars++; | 3748 | nchars++; |
| @@ -3772,7 +3772,7 @@ base64_decode_1 (const char *from, char *to, ptrdiff_t length, | |||
| 3772 | 3772 | ||
| 3773 | c = value & 0xff; | 3773 | c = value & 0xff; |
| 3774 | if (c & multibyte_bit) | 3774 | if (c & multibyte_bit) |
| 3775 | e += BYTE8_STRING (c, e); | 3775 | e += BYTE8_STRING (c, (unsigned char *) e); |
| 3776 | else | 3776 | else |
| 3777 | *e++ = c; | 3777 | *e++ = c; |
| 3778 | nchars++; | 3778 | nchars++; |
diff --git a/src/lisp.h b/src/lisp.h index 706ca6b9a82..b4ac017dcf5 100644 --- a/src/lisp.h +++ b/src/lisp.h | |||
| @@ -597,7 +597,7 @@ extern Lisp_Object make_biguint (uintmax_t); | |||
| 597 | extern uintmax_t check_uinteger_max (Lisp_Object, uintmax_t); | 597 | extern uintmax_t check_uinteger_max (Lisp_Object, uintmax_t); |
| 598 | 598 | ||
| 599 | /* Defined in chartab.c. */ | 599 | /* Defined in chartab.c. */ |
| 600 | extern Lisp_Object char_table_ref (Lisp_Object, int); | 600 | extern Lisp_Object char_table_ref (Lisp_Object, int) ATTRIBUTE_PURE; |
| 601 | extern void char_table_set (Lisp_Object, int, Lisp_Object); | 601 | extern void char_table_set (Lisp_Object, int, Lisp_Object); |
| 602 | 602 | ||
| 603 | /* Defined in data.c. */ | 603 | /* Defined in data.c. */ |