diff options
| author | Eli Zaretskii | 2020-04-09 12:18:30 +0300 |
|---|---|---|
| committer | Eli Zaretskii | 2020-04-09 12:18:30 +0300 |
| commit | faf996dc6e963a8dd74e9e794ded0467dd78ea18 (patch) | |
| tree | 05414568c1e99a0226101e37fb94dc4ade687699 | |
| parent | 1aeb1819353418ebed635f18a009048700ba1ad0 (diff) | |
| download | emacs-faf996dc6e963a8dd74e9e794ded0467dd78ea18.tar.gz emacs-faf996dc6e963a8dd74e9e794ded0467dd78ea18.zip | |
Fix decoding ASCII strings with embedded CR characters
* src/coding.c (string_ascii_p): Return a negative value if an
all-ASCII string STR includes the CR character, otherwise a
positive value.
(code_convert_string): If the string is ASCII, but includes CR
characters, use the fast path only if EOL doesn't need to be
decoded. (Bug#40519)
* test/src/coding-tests.el (coding-nocopy-ascii): Add tests for
bug#40519.
| -rw-r--r-- | src/coding.c | 37 | ||||
| -rw-r--r-- | test/src/coding-tests.el | 17 |
2 files changed, 43 insertions, 11 deletions
diff --git a/src/coding.c b/src/coding.c index 49c1e625d57..24a832ff3ee 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -9471,15 +9471,22 @@ not fully specified.) */) | |||
| 9471 | return code_convert_region (start, end, coding_system, destination, 1, 0); | 9471 | return code_convert_region (start, end, coding_system, destination, 1, 0); |
| 9472 | } | 9472 | } |
| 9473 | 9473 | ||
| 9474 | /* Whether a string only contains chars in the 0..127 range. */ | 9474 | /* Non-zero if STR contains only characterss in the 0..127 range. |
| 9475 | static bool | 9475 | Positive if STR includes characters that don't need EOL conversion |
| 9476 | on decoding, negative otherwise. */ | ||
| 9477 | static int | ||
| 9476 | string_ascii_p (Lisp_Object str) | 9478 | string_ascii_p (Lisp_Object str) |
| 9477 | { | 9479 | { |
| 9478 | ptrdiff_t nbytes = SBYTES (str); | 9480 | ptrdiff_t nbytes = SBYTES (str); |
| 9481 | bool CR_Seen = false; | ||
| 9479 | for (ptrdiff_t i = 0; i < nbytes; i++) | 9482 | for (ptrdiff_t i = 0; i < nbytes; i++) |
| 9480 | if (SREF (str, i) > 127) | 9483 | { |
| 9481 | return false; | 9484 | if (SREF (str, i) > 127) |
| 9482 | return true; | 9485 | return 0; |
| 9486 | if (SREF (str, i) == '\r') | ||
| 9487 | CR_Seen = true; | ||
| 9488 | } | ||
| 9489 | return CR_Seen ? -1 : 1; | ||
| 9483 | } | 9490 | } |
| 9484 | 9491 | ||
| 9485 | Lisp_Object | 9492 | Lisp_Object |
| @@ -9517,15 +9524,23 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system, | |||
| 9517 | { | 9524 | { |
| 9518 | /* Fast path for ASCII-only input and an ASCII-compatible coding: | 9525 | /* Fast path for ASCII-only input and an ASCII-compatible coding: |
| 9519 | act as identity. */ | 9526 | act as identity. */ |
| 9527 | int ascii_p; | ||
| 9520 | Lisp_Object attrs = CODING_ID_ATTRS (coding.id); | 9528 | Lisp_Object attrs = CODING_ID_ATTRS (coding.id); |
| 9521 | if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)) | 9529 | if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)) |
| 9522 | && (STRING_MULTIBYTE (string) | 9530 | && (STRING_MULTIBYTE (string) |
| 9523 | ? (chars == bytes) : string_ascii_p (string))) | 9531 | ? (chars == bytes) : ((ascii_p = string_ascii_p (string)) != 0))) |
| 9524 | return (nocopy | 9532 | { |
| 9525 | ? string | 9533 | if (ascii_p > 0 |
| 9526 | : (encodep | 9534 | || (ascii_p < 0 |
| 9527 | ? make_unibyte_string (SSDATA (string), bytes) | 9535 | && (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix) |
| 9528 | : make_multibyte_string (SSDATA (string), bytes, bytes))); | 9536 | || inhibit_eol_conversion))) |
| 9537 | return (nocopy | ||
| 9538 | ? string | ||
| 9539 | : (encodep | ||
| 9540 | ? make_unibyte_string (SSDATA (string), bytes) | ||
| 9541 | : make_multibyte_string (SSDATA (string), | ||
| 9542 | bytes, bytes))); | ||
| 9543 | } | ||
| 9529 | } | 9544 | } |
| 9530 | else if (BUFFERP (dst_object)) | 9545 | else if (BUFFERP (dst_object)) |
| 9531 | { | 9546 | { |
diff --git a/test/src/coding-tests.el b/test/src/coding-tests.el index 93e6709d442..83a06b8179e 100644 --- a/test/src/coding-tests.el +++ b/test/src/coding-tests.el | |||
| @@ -388,6 +388,23 @@ | |||
| 388 | (let* ((uni (apply #'string (number-sequence 0 127))) | 388 | (let* ((uni (apply #'string (number-sequence 0 127))) |
| 389 | (multi (string-to-multibyte uni))) | 389 | (multi (string-to-multibyte uni))) |
| 390 | (dolist (s (list uni multi)) | 390 | (dolist (s (list uni multi)) |
| 391 | (dolist (coding '(us-ascii-unix iso-latin-1-unix utf-8-unix)) | ||
| 392 | (should-not (eq (decode-coding-string s coding nil) s)) | ||
| 393 | (should-not (eq (encode-coding-string s coding nil) s)) | ||
| 394 | (should (eq (decode-coding-string s coding t) s)) | ||
| 395 | (should (eq (encode-coding-string s coding t) s))))) | ||
| 396 | (let* ((uni (apply #'string (number-sequence 15 127))) | ||
| 397 | (multi (string-to-multibyte uni))) | ||
| 398 | (dolist (s (list uni multi)) | ||
| 399 | (dolist (coding '(us-ascii iso-latin-1 utf-8)) | ||
| 400 | (should-not (eq (decode-coding-string s coding nil) s)) | ||
| 401 | (should-not (eq (encode-coding-string s coding nil) s)) | ||
| 402 | (should (eq (decode-coding-string s coding t) s)) | ||
| 403 | (should (eq (encode-coding-string s coding t) s))))) | ||
| 404 | (let* ((uni (apply #'string (number-sequence 0 127))) | ||
| 405 | (multi (string-to-multibyte uni)) | ||
| 406 | (inhibit-eol-conversion t)) | ||
| 407 | (dolist (s (list uni multi)) | ||
| 391 | (dolist (coding '(us-ascii iso-latin-1 utf-8)) | 408 | (dolist (coding '(us-ascii iso-latin-1 utf-8)) |
| 392 | (should-not (eq (decode-coding-string s coding nil) s)) | 409 | (should-not (eq (decode-coding-string s coding nil) s)) |
| 393 | (should-not (eq (encode-coding-string s coding nil) s)) | 410 | (should-not (eq (encode-coding-string s coding nil) s)) |