diff options
| author | Paul Eggert | 2020-04-26 15:18:49 -0700 |
|---|---|---|
| committer | Paul Eggert | 2020-04-26 19:31:54 -0700 |
| commit | ed2def7d5e423388ca75c6e10fd7b42e0c4789c7 (patch) | |
| tree | a488de7c0a4729937cfa8fca01093433a609374f /src | |
| parent | 895a18eafb84bca68045e552437dbb00a15a9f56 (diff) | |
| download | emacs-ed2def7d5e423388ca75c6e10fd7b42e0c4789c7.tar.gz emacs-ed2def7d5e423388ca75c6e10fd7b42e0c4789c7.zip | |
Improve string_char_and_length speed
This tweak improved the CPU time performance of
‘make compile-always’ by about 1.7% on my platform.
* src/character.c (string_char): Remove; no longer used.
* src/character.h (string_char_and_length): Redo so that it
needn’t call string_char. This helps the caller, which can now
become a leaf function.
Diffstat (limited to 'src')
| -rw-r--r-- | src/character.c | 45 | ||||
| -rw-r--r-- | src/character.h | 47 |
2 files changed, 27 insertions, 65 deletions
diff --git a/src/character.c b/src/character.c index edcec5f1c79..4902e564b1d 100644 --- a/src/character.c +++ b/src/character.c | |||
| @@ -141,51 +141,6 @@ char_string (unsigned int c, unsigned char *p) | |||
| 141 | } | 141 | } |
| 142 | 142 | ||
| 143 | 143 | ||
| 144 | /* Return a character whose multibyte form is at P. Set *LEN to the | ||
| 145 | byte length of the multibyte form. */ | ||
| 146 | |||
| 147 | int | ||
| 148 | string_char (const unsigned char *p, int *len) | ||
| 149 | { | ||
| 150 | int c; | ||
| 151 | const unsigned char *saved_p = p; | ||
| 152 | |||
| 153 | if (*p < 0x80 || ! (*p & 0x20) || ! (*p & 0x10)) | ||
| 154 | { | ||
| 155 | /* 1-, 2-, and 3-byte sequences can be handled by the macro. */ | ||
| 156 | c = string_char_advance (&p); | ||
| 157 | } | ||
| 158 | else if (! (*p & 0x08)) | ||
| 159 | { | ||
| 160 | /* A 4-byte sequence of this form: | ||
| 161 | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ | ||
| 162 | c = ((((p)[0] & 0x7) << 18) | ||
| 163 | | (((p)[1] & 0x3F) << 12) | ||
| 164 | | (((p)[2] & 0x3F) << 6) | ||
| 165 | | ((p)[3] & 0x3F)); | ||
| 166 | p += 4; | ||
| 167 | } | ||
| 168 | else | ||
| 169 | { | ||
| 170 | /* A 5-byte sequence of this form: | ||
| 171 | |||
| 172 | 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx | ||
| 173 | |||
| 174 | Note that the top 4 `x's are always 0, so shifting p[1] can | ||
| 175 | never exceed the maximum valid character codepoint. */ | ||
| 176 | c = (/* (((p)[0] & 0x3) << 24) ... always 0, so no need to shift. */ | ||
| 177 | (((p)[1] & 0x3F) << 18) | ||
| 178 | | (((p)[2] & 0x3F) << 12) | ||
| 179 | | (((p)[3] & 0x3F) << 6) | ||
| 180 | | ((p)[4] & 0x3F)); | ||
| 181 | p += 5; | ||
| 182 | } | ||
| 183 | |||
| 184 | *len = p - saved_p; | ||
| 185 | return c; | ||
| 186 | } | ||
| 187 | |||
| 188 | |||
| 189 | /* Translate character C by translation table TABLE. If no translation is | 144 | /* Translate character C by translation table TABLE. If no translation is |
| 190 | found in TABLE, return the untranslated character. If TABLE is a list, | 145 | found in TABLE, return the untranslated character. If TABLE is a list, |
| 191 | elements are char tables. In that case, recursively translate C by all the | 146 | elements are char tables. In that case, recursively translate C by all the |
diff --git a/src/character.h b/src/character.h index 4887473b27e..d4d77504426 100644 --- a/src/character.h +++ b/src/character.h | |||
| @@ -85,7 +85,6 @@ enum | |||
| 85 | }; | 85 | }; |
| 86 | 86 | ||
| 87 | extern int char_string (unsigned, unsigned char *); | 87 | extern int char_string (unsigned, unsigned char *); |
| 88 | extern int string_char (const unsigned char *, int *); | ||
| 89 | 88 | ||
| 90 | /* UTF-8 encodings. Use \x escapes, so they are portable to pre-C11 | 89 | /* UTF-8 encodings. Use \x escapes, so they are portable to pre-C11 |
| 91 | compilers and can be concatenated with ordinary string literals. */ | 90 | compilers and can be concatenated with ordinary string literals. */ |
| @@ -371,33 +370,41 @@ raw_prev_char_len (unsigned char const *p) | |||
| 371 | INLINE int | 370 | INLINE int |
| 372 | string_char_and_length (unsigned char const *p, int *length) | 371 | string_char_and_length (unsigned char const *p, int *length) |
| 373 | { | 372 | { |
| 374 | int c, len; | 373 | int c = p[0]; |
| 374 | if (! (c & 0x80)) | ||
| 375 | { | ||
| 376 | *length = 1; | ||
| 377 | return c; | ||
| 378 | } | ||
| 379 | eassume (0xC0 <= c); | ||
| 375 | 380 | ||
| 376 | if (! (p[0] & 0x80)) | 381 | int d = (c << 6) + p[1] - ((0xC0 << 6) + 0x80); |
| 382 | if (! (c & 0x20)) | ||
| 377 | { | 383 | { |
| 378 | len = 1; | 384 | *length = 2; |
| 379 | c = p[0]; | 385 | return d + (c < 0xC2 ? 0x3FFF80 : 0); |
| 380 | } | 386 | } |
| 381 | else if (! (p[0] & 0x20)) | 387 | |
| 388 | d = (d << 6) + p[2] - ((0x20 << 12) + 0x80); | ||
| 389 | if (! (c & 0x10)) | ||
| 382 | { | 390 | { |
| 383 | len = 2; | 391 | *length = 3; |
| 384 | c = ((((p[0] & 0x1F) << 6) | 392 | eassume (MAX_2_BYTE_CHAR < d && d <= MAX_3_BYTE_CHAR); |
| 385 | | (p[1] & 0x3F)) | 393 | return d; |
| 386 | + (p[0] < 0xC2 ? 0x3FFF80 : 0)); | ||
| 387 | } | 394 | } |
| 388 | else if (! (p[0] & 0x10)) | 395 | |
| 396 | d = (d << 6) + p[3] - ((0x10 << 18) + 0x80); | ||
| 397 | if (! (c & 0x08)) | ||
| 389 | { | 398 | { |
| 390 | len = 3; | 399 | *length = 4; |
| 391 | c = (((p[0] & 0x0F) << 12) | 400 | eassume (MAX_3_BYTE_CHAR < d && d <= MAX_4_BYTE_CHAR); |
| 392 | | ((p[1] & 0x3F) << 6) | 401 | return d; |
| 393 | | (p[2] & 0x3F)); | ||
| 394 | } | 402 | } |
| 395 | else | ||
| 396 | c = string_char (p, &len); | ||
| 397 | 403 | ||
| 398 | eassume (0 < len && len <= MAX_MULTIBYTE_LENGTH); | 404 | d = (d << 6) + p[4] - ((0x08 << 24) + 0x80); |
| 399 | *length = len; | 405 | *length = 5; |
| 400 | return c; | 406 | eassume (MAX_4_BYTE_CHAR < d && d <= MAX_5_BYTE_CHAR); |
| 407 | return d; | ||
| 401 | } | 408 | } |
| 402 | 409 | ||
| 403 | /* Return the character code of character whose multibyte form is at P. */ | 410 | /* Return the character code of character whose multibyte form is at P. */ |