aboutsummaryrefslogtreecommitdiffstats
path: root/src/character.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/character.h')
-rw-r--r--src/character.h47
1 files changed, 27 insertions, 20 deletions
diff --git a/src/character.h b/src/character.h
index 4887473b27e..d4d77504426 100644
--- a/src/character.h
+++ b/src/character.h
@@ -85,7 +85,6 @@ enum
85}; 85};
86 86
87extern int char_string (unsigned, unsigned char *); 87extern int char_string (unsigned, unsigned char *);
88extern int string_char (const unsigned char *, int *);
89 88
90/* UTF-8 encodings. Use \x escapes, so they are portable to pre-C11 89/* UTF-8 encodings. Use \x escapes, so they are portable to pre-C11
91 compilers and can be concatenated with ordinary string literals. */ 90 compilers and can be concatenated with ordinary string literals. */
@@ -371,33 +370,41 @@ raw_prev_char_len (unsigned char const *p)
371INLINE int 370INLINE int
372string_char_and_length (unsigned char const *p, int *length) 371string_char_and_length (unsigned char const *p, int *length)
373{ 372{
374 int c, len; 373 int c = p[0];
374 if (! (c & 0x80))
375 {
376 *length = 1;
377 return c;
378 }
379 eassume (0xC0 <= c);
375 380
376 if (! (p[0] & 0x80)) 381 int d = (c << 6) + p[1] - ((0xC0 << 6) + 0x80);
382 if (! (c & 0x20))
377 { 383 {
378 len = 1; 384 *length = 2;
379 c = p[0]; 385 return d + (c < 0xC2 ? 0x3FFF80 : 0);
380 } 386 }
381 else if (! (p[0] & 0x20)) 387
388 d = (d << 6) + p[2] - ((0x20 << 12) + 0x80);
389 if (! (c & 0x10))
382 { 390 {
383 len = 2; 391 *length = 3;
384 c = ((((p[0] & 0x1F) << 6) 392 eassume (MAX_2_BYTE_CHAR < d && d <= MAX_3_BYTE_CHAR);
385 | (p[1] & 0x3F)) 393 return d;
386 + (p[0] < 0xC2 ? 0x3FFF80 : 0));
387 } 394 }
388 else if (! (p[0] & 0x10)) 395
396 d = (d << 6) + p[3] - ((0x10 << 18) + 0x80);
397 if (! (c & 0x08))
389 { 398 {
390 len = 3; 399 *length = 4;
391 c = (((p[0] & 0x0F) << 12) 400 eassume (MAX_3_BYTE_CHAR < d && d <= MAX_4_BYTE_CHAR);
392 | ((p[1] & 0x3F) << 6) 401 return d;
393 | (p[2] & 0x3F));
394 } 402 }
395 else
396 c = string_char (p, &len);
397 403
398 eassume (0 < len && len <= MAX_MULTIBYTE_LENGTH); 404 d = (d << 6) + p[4] - ((0x08 << 24) + 0x80);
399 *length = len; 405 *length = 5;
400 return c; 406 eassume (MAX_4_BYTE_CHAR < d && d <= MAX_5_BYTE_CHAR);
407 return d;
401} 408}
402 409
403/* Return the character code of character whose multibyte form is at P. */ 410/* Return the character code of character whose multibyte form is at P. */