diff options
| author | Eli Zaretskii | 2020-04-09 12:18:30 +0300 |
|---|---|---|
| committer | Eli Zaretskii | 2020-04-09 12:18:30 +0300 |
| commit | faf996dc6e963a8dd74e9e794ded0467dd78ea18 (patch) | |
| tree | 05414568c1e99a0226101e37fb94dc4ade687699 /src/coding.c | |
| parent | 1aeb1819353418ebed635f18a009048700ba1ad0 (diff) | |
| download | emacs-faf996dc6e963a8dd74e9e794ded0467dd78ea18.tar.gz emacs-faf996dc6e963a8dd74e9e794ded0467dd78ea18.zip | |
Fix decoding ASCII strings with embedded CR characters
* src/coding.c (string_ascii_p): Return a negative value if an
all-ASCII string STR includes the CR character, otherwise a
positive value.
(code_convert_string): If the string is ASCII, but includes CR
characters, use the fast path only if EOL doesn't need to be
decoded. (Bug#40519)
* test/src/coding-tests.el (coding-nocopy-ascii): Add tests for
bug#40519.
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 37 |
1 files changed, 26 insertions, 11 deletions
diff --git a/src/coding.c b/src/coding.c index 49c1e625d57..24a832ff3ee 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -9471,15 +9471,22 @@ not fully specified.) */) | |||
| 9471 | return code_convert_region (start, end, coding_system, destination, 1, 0); | 9471 | return code_convert_region (start, end, coding_system, destination, 1, 0); |
| 9472 | } | 9472 | } |
| 9473 | 9473 | ||
| 9474 | /* Whether a string only contains chars in the 0..127 range. */ | 9474 | /* Non-zero if STR contains only characterss in the 0..127 range. |
| 9475 | static bool | 9475 | Positive if STR includes characters that don't need EOL conversion |
| 9476 | on decoding, negative otherwise. */ | ||
| 9477 | static int | ||
| 9476 | string_ascii_p (Lisp_Object str) | 9478 | string_ascii_p (Lisp_Object str) |
| 9477 | { | 9479 | { |
| 9478 | ptrdiff_t nbytes = SBYTES (str); | 9480 | ptrdiff_t nbytes = SBYTES (str); |
| 9481 | bool CR_Seen = false; | ||
| 9479 | for (ptrdiff_t i = 0; i < nbytes; i++) | 9482 | for (ptrdiff_t i = 0; i < nbytes; i++) |
| 9480 | if (SREF (str, i) > 127) | 9483 | { |
| 9481 | return false; | 9484 | if (SREF (str, i) > 127) |
| 9482 | return true; | 9485 | return 0; |
| 9486 | if (SREF (str, i) == '\r') | ||
| 9487 | CR_Seen = true; | ||
| 9488 | } | ||
| 9489 | return CR_Seen ? -1 : 1; | ||
| 9483 | } | 9490 | } |
| 9484 | 9491 | ||
| 9485 | Lisp_Object | 9492 | Lisp_Object |
| @@ -9517,15 +9524,23 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system, | |||
| 9517 | { | 9524 | { |
| 9518 | /* Fast path for ASCII-only input and an ASCII-compatible coding: | 9525 | /* Fast path for ASCII-only input and an ASCII-compatible coding: |
| 9519 | act as identity. */ | 9526 | act as identity. */ |
| 9527 | int ascii_p; | ||
| 9520 | Lisp_Object attrs = CODING_ID_ATTRS (coding.id); | 9528 | Lisp_Object attrs = CODING_ID_ATTRS (coding.id); |
| 9521 | if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)) | 9529 | if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)) |
| 9522 | && (STRING_MULTIBYTE (string) | 9530 | && (STRING_MULTIBYTE (string) |
| 9523 | ? (chars == bytes) : string_ascii_p (string))) | 9531 | ? (chars == bytes) : ((ascii_p = string_ascii_p (string)) != 0))) |
| 9524 | return (nocopy | 9532 | { |
| 9525 | ? string | 9533 | if (ascii_p > 0 |
| 9526 | : (encodep | 9534 | || (ascii_p < 0 |
| 9527 | ? make_unibyte_string (SSDATA (string), bytes) | 9535 | && (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix) |
| 9528 | : make_multibyte_string (SSDATA (string), bytes, bytes))); | 9536 | || inhibit_eol_conversion))) |
| 9537 | return (nocopy | ||
| 9538 | ? string | ||
| 9539 | : (encodep | ||
| 9540 | ? make_unibyte_string (SSDATA (string), bytes) | ||
| 9541 | : make_multibyte_string (SSDATA (string), | ||
| 9542 | bytes, bytes))); | ||
| 9543 | } | ||
| 9529 | } | 9544 | } |
| 9530 | else if (BUFFERP (dst_object)) | 9545 | else if (BUFFERP (dst_object)) |
| 9531 | { | 9546 | { |