diff options
| author | Eli Zaretskii | 2018-06-15 17:39:34 +0300 |
|---|---|---|
| committer | Eli Zaretskii | 2018-06-15 17:39:34 +0300 |
| commit | 22aa665c9b536775a28ff2e4907afc31b69ccb21 (patch) | |
| tree | 4866771b8bf81174d9a9c7e2c23fa320c948468f | |
| parent | 0d3c35807d0b0a3aaa4c4ebd2f040bb78013879d (diff) | |
| download | emacs-22aa665c9b536775a28ff2e4907afc31b69ccb21.tar.gz emacs-22aa665c9b536775a28ff2e4907afc31b69ccb21.zip | |
Reject invalid 5-byte sequences when detecting UTF-8 encoding
* src/coding.c (detect_coding_utf_8): Reject multibyte sequences
whose leading byte is greater than MAX_MULTIBYTE_LEADING_CODE.
(Bug#31829)
* src/character.h (MAX_MULTIBYTE_LEADING_CODE): Add commentary
about the connection between the value of this macro and MAX_CHAR.
| -rw-r--r-- | src/character.h | 3 | ||||
| -rw-r--r-- | src/coding.c | 5 |
2 files changed, 6 insertions, 2 deletions
diff --git a/src/character.h b/src/character.h index 1f21b2ad330..bc65759aa2a 100644 --- a/src/character.h +++ b/src/character.h | |||
| @@ -57,7 +57,8 @@ INLINE_HEADER_BEGIN | |||
| 57 | 57 | ||
| 58 | /* Minimum leading code of multibyte characters. */ | 58 | /* Minimum leading code of multibyte characters. */ |
| 59 | #define MIN_MULTIBYTE_LEADING_CODE 0xC0 | 59 | #define MIN_MULTIBYTE_LEADING_CODE 0xC0 |
| 60 | /* Maximum leading code of multibyte characters. */ | 60 | /* Maximum leading code of multibyte characters. Note: this must be |
| 61 | updated if we ever increase MAX_CHAR above. */ | ||
| 61 | #define MAX_MULTIBYTE_LEADING_CODE 0xF8 | 62 | #define MAX_MULTIBYTE_LEADING_CODE 0xF8 |
| 62 | 63 | ||
| 63 | /* Unicode character values. */ | 64 | /* Unicode character values. */ |
diff --git a/src/coding.c b/src/coding.c index e756ba169dd..b1eb2edb497 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -1225,7 +1225,10 @@ detect_coding_utf_8 (struct coding_system *coding, | |||
| 1225 | ONE_MORE_BYTE (c4); | 1225 | ONE_MORE_BYTE (c4); |
| 1226 | if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4)) | 1226 | if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4)) |
| 1227 | break; | 1227 | break; |
| 1228 | if (UTF_8_5_OCTET_LEADING_P (c)) | 1228 | if (UTF_8_5_OCTET_LEADING_P (c) |
| 1229 | /* If we ever need to increase MAX_CHAR, the below may need | ||
| 1230 | to be reviewed. */ | ||
| 1231 | && c < MAX_MULTIBYTE_LEADING_CODE) | ||
| 1229 | { | 1232 | { |
| 1230 | nchars++; | 1233 | nchars++; |
| 1231 | continue; | 1234 | continue; |