aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorPaul Eggert2020-04-26 15:18:49 -0700
committerPaul Eggert2020-04-26 19:31:54 -0700
commited2def7d5e423388ca75c6e10fd7b42e0c4789c7 (patch)
treea488de7c0a4729937cfa8fca01093433a609374f /src
parent895a18eafb84bca68045e552437dbb00a15a9f56 (diff)
downloademacs-ed2def7d5e423388ca75c6e10fd7b42e0c4789c7.tar.gz
emacs-ed2def7d5e423388ca75c6e10fd7b42e0c4789c7.zip
Improve string_char_and_length speed
This tweak improved the CPU time performance of ‘make compile-always’ by about 1.7% on my platform. * src/character.c (string_char): Remove; no longer used. * src/character.h (string_char_and_length): Redo so that it needn’t call string_char. This helps the caller, which can now become a leaf function.
Diffstat (limited to 'src')
-rw-r--r--src/character.c45
-rw-r--r--src/character.h47
2 files changed, 27 insertions, 65 deletions
diff --git a/src/character.c b/src/character.c
index edcec5f1c79..4902e564b1d 100644
--- a/src/character.c
+++ b/src/character.c
@@ -141,51 +141,6 @@ char_string (unsigned int c, unsigned char *p)
141} 141}
142 142
143 143
144/* Return a character whose multibyte form is at P. Set *LEN to the
145 byte length of the multibyte form. */
146
147int
148string_char (const unsigned char *p, int *len)
149{
150 int c;
151 const unsigned char *saved_p = p;
152
153 if (*p < 0x80 || ! (*p & 0x20) || ! (*p & 0x10))
154 {
155 /* 1-, 2-, and 3-byte sequences can be handled by the macro. */
156 c = string_char_advance (&p);
157 }
158 else if (! (*p & 0x08))
159 {
160 /* A 4-byte sequence of this form:
161 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
162 c = ((((p)[0] & 0x7) << 18)
163 | (((p)[1] & 0x3F) << 12)
164 | (((p)[2] & 0x3F) << 6)
165 | ((p)[3] & 0x3F));
166 p += 4;
167 }
168 else
169 {
170 /* A 5-byte sequence of this form:
171
172 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
173
174 Note that the top 4 `x's are always 0, so shifting p[1] can
175 never exceed the maximum valid character codepoint. */
176 c = (/* (((p)[0] & 0x3) << 24) ... always 0, so no need to shift. */
177 (((p)[1] & 0x3F) << 18)
178 | (((p)[2] & 0x3F) << 12)
179 | (((p)[3] & 0x3F) << 6)
180 | ((p)[4] & 0x3F));
181 p += 5;
182 }
183
184 *len = p - saved_p;
185 return c;
186}
187
188
189/* Translate character C by translation table TABLE. If no translation is 144/* Translate character C by translation table TABLE. If no translation is
190 found in TABLE, return the untranslated character. If TABLE is a list, 145 found in TABLE, return the untranslated character. If TABLE is a list,
191 elements are char tables. In that case, recursively translate C by all the 146 elements are char tables. In that case, recursively translate C by all the
diff --git a/src/character.h b/src/character.h
index 4887473b27e..d4d77504426 100644
--- a/src/character.h
+++ b/src/character.h
@@ -85,7 +85,6 @@ enum
85}; 85};
86 86
87extern int char_string (unsigned, unsigned char *); 87extern int char_string (unsigned, unsigned char *);
88extern int string_char (const unsigned char *, int *);
89 88
90/* UTF-8 encodings. Use \x escapes, so they are portable to pre-C11 89/* UTF-8 encodings. Use \x escapes, so they are portable to pre-C11
91 compilers and can be concatenated with ordinary string literals. */ 90 compilers and can be concatenated with ordinary string literals. */
@@ -371,33 +370,41 @@ raw_prev_char_len (unsigned char const *p)
371INLINE int 370INLINE int
372string_char_and_length (unsigned char const *p, int *length) 371string_char_and_length (unsigned char const *p, int *length)
373{ 372{
374 int c, len; 373 int c = p[0];
374 if (! (c & 0x80))
375 {
376 *length = 1;
377 return c;
378 }
379 eassume (0xC0 <= c);
375 380
376 if (! (p[0] & 0x80)) 381 int d = (c << 6) + p[1] - ((0xC0 << 6) + 0x80);
382 if (! (c & 0x20))
377 { 383 {
378 len = 1; 384 *length = 2;
379 c = p[0]; 385 return d + (c < 0xC2 ? 0x3FFF80 : 0);
380 } 386 }
381 else if (! (p[0] & 0x20)) 387
388 d = (d << 6) + p[2] - ((0x20 << 12) + 0x80);
389 if (! (c & 0x10))
382 { 390 {
383 len = 2; 391 *length = 3;
384 c = ((((p[0] & 0x1F) << 6) 392 eassume (MAX_2_BYTE_CHAR < d && d <= MAX_3_BYTE_CHAR);
385 | (p[1] & 0x3F)) 393 return d;
386 + (p[0] < 0xC2 ? 0x3FFF80 : 0));
387 } 394 }
388 else if (! (p[0] & 0x10)) 395
396 d = (d << 6) + p[3] - ((0x10 << 18) + 0x80);
397 if (! (c & 0x08))
389 { 398 {
390 len = 3; 399 *length = 4;
391 c = (((p[0] & 0x0F) << 12) 400 eassume (MAX_3_BYTE_CHAR < d && d <= MAX_4_BYTE_CHAR);
392 | ((p[1] & 0x3F) << 6) 401 return d;
393 | (p[2] & 0x3F));
394 } 402 }
395 else
396 c = string_char (p, &len);
397 403
398 eassume (0 < len && len <= MAX_MULTIBYTE_LENGTH); 404 d = (d << 6) + p[4] - ((0x08 << 24) + 0x80);
399 *length = len; 405 *length = 5;
400 return c; 406 eassume (MAX_4_BYTE_CHAR < d && d <= MAX_5_BYTE_CHAR);
407 return d;
401} 408}
402 409
403/* Return the character code of character whose multibyte form is at P. */ 410/* Return the character code of character whose multibyte form is at P. */