diff options
| author | Mattias Engdegård | 2020-04-09 12:04:22 +0200 |
|---|---|---|
| committer | Mattias Engdegård | 2020-04-09 12:50:35 +0200 |
| commit | d3e2c88041b4844422bda64b1ee51678dc8a2e88 (patch) | |
| tree | 5108104aceb42394dcf860759ab0b85a140521a4 | |
| parent | 95dd8de1df19a8529efb66257ac78789be62ca37 (diff) | |
| download | emacs-d3e2c88041b4844422bda64b1ee51678dc8a2e88.tar.gz emacs-d3e2c88041b4844422bda64b1ee51678dc8a2e88.zip | |
Fix ASCII-only conversion logic (bug#40407)
To sidestep conversion altogether when EOL conversion applies, we must
either be encoding a string without NL, or decoding without CR.
* src/coding.c (string_ascii_p): Revert to a pure predicate.
(code_convert_string): Fix logic. Don't use uninitialised
ascii_p (removed). Use memchr to detect CR or LF in string when needed.
* test/src/coding-tests.el (coding-nocopy-ascii):
Update tests to include encodings with explicit EOL conversions.
| -rw-r--r-- | src/coding.c | 46 | ||||
| -rw-r--r-- | test/src/coding-tests.el | 33 |
2 files changed, 38 insertions, 41 deletions
diff --git a/src/coding.c b/src/coding.c index ffcb9cf0a1a..450c498f1e8 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -9474,22 +9474,15 @@ not fully specified.) */) | |||
| 9474 | return code_convert_region (start, end, coding_system, destination, 1, 0); | 9474 | return code_convert_region (start, end, coding_system, destination, 1, 0); |
| 9475 | } | 9475 | } |
| 9476 | 9476 | ||
| 9477 | /* Non-zero if STR contains only characters in the 0..127 range. | 9477 | /* Whether STRING only contains chars in the 0..127 range. */ |
| 9478 | Positive if STR includes characters that don't need EOL conversion | 9478 | static bool |
| 9479 | on decoding, negative otherwise. */ | 9479 | string_ascii_p (Lisp_Object string) |
| 9480 | static int | ||
| 9481 | string_ascii_p (Lisp_Object str) | ||
| 9482 | { | 9480 | { |
| 9483 | ptrdiff_t nbytes = SBYTES (str); | 9481 | ptrdiff_t nbytes = SBYTES (string); |
| 9484 | bool CR_Seen = false; | ||
| 9485 | for (ptrdiff_t i = 0; i < nbytes; i++) | 9482 | for (ptrdiff_t i = 0; i < nbytes; i++) |
| 9486 | { | 9483 | if (SREF (string, i) > 127) |
| 9487 | if (SREF (str, i) > 127) | 9484 | return false; |
| 9488 | return 0; | 9485 | return true; |
| 9489 | if (SREF (str, i) == '\r') | ||
| 9490 | CR_Seen = true; | ||
| 9491 | } | ||
| 9492 | return CR_Seen ? -1 : 1; | ||
| 9493 | } | 9486 | } |
| 9494 | 9487 | ||
| 9495 | Lisp_Object | 9488 | Lisp_Object |
| @@ -9526,24 +9519,19 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system, | |||
| 9526 | if (EQ (dst_object, Qt)) | 9519 | if (EQ (dst_object, Qt)) |
| 9527 | { | 9520 | { |
| 9528 | /* Fast path for ASCII-only input and an ASCII-compatible coding: | 9521 | /* Fast path for ASCII-only input and an ASCII-compatible coding: |
| 9529 | act as identity if no EOL conversion is neede. */ | 9522 | act as identity if no EOL conversion is needed. */ |
| 9530 | int ascii_p; | ||
| 9531 | Lisp_Object attrs = CODING_ID_ATTRS (coding.id); | 9523 | Lisp_Object attrs = CODING_ID_ATTRS (coding.id); |
| 9532 | if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)) | 9524 | if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)) |
| 9533 | && (STRING_MULTIBYTE (string) | 9525 | && (STRING_MULTIBYTE (string) |
| 9534 | ? (chars == bytes) : ((ascii_p = string_ascii_p (string)) != 0))) | 9526 | ? (chars == bytes) : string_ascii_p (string)) |
| 9535 | { | 9527 | && (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix) |
| 9536 | if (ascii_p > 0 | 9528 | || inhibit_eol_conversion |
| 9537 | || (ascii_p < 0 | 9529 | || ! memchr (SDATA (string), encodep ? '\n' : '\r', bytes))) |
| 9538 | && (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix) | 9530 | return (nocopy |
| 9539 | || inhibit_eol_conversion))) | 9531 | ? string |
| 9540 | return (nocopy | 9532 | : (encodep |
| 9541 | ? string | 9533 | ? make_unibyte_string (SSDATA (string), bytes) |
| 9542 | : (encodep | 9534 | : make_multibyte_string (SSDATA (string), bytes, bytes))); |
| 9543 | ? make_unibyte_string (SSDATA (string), bytes) | ||
| 9544 | : make_multibyte_string (SSDATA (string), | ||
| 9545 | bytes, bytes))); | ||
| 9546 | } | ||
| 9547 | } | 9535 | } |
| 9548 | else if (BUFFERP (dst_object)) | 9536 | else if (BUFFERP (dst_object)) |
| 9549 | { | 9537 | { |
diff --git a/test/src/coding-tests.el b/test/src/coding-tests.el index 8d92bcdcd1a..9f6fac3edd8 100644 --- a/test/src/coding-tests.el +++ b/test/src/coding-tests.el | |||
| @@ -388,29 +388,38 @@ | |||
| 388 | (let* ((uni (apply #'string (number-sequence 0 127))) | 388 | (let* ((uni (apply #'string (number-sequence 0 127))) |
| 389 | (multi (string-to-multibyte uni))) | 389 | (multi (string-to-multibyte uni))) |
| 390 | (dolist (s (list uni multi)) | 390 | (dolist (s (list uni multi)) |
| 391 | ;; Encodings without EOL conversion. | ||
| 391 | (dolist (coding '(us-ascii-unix iso-latin-1-unix utf-8-unix)) | 392 | (dolist (coding '(us-ascii-unix iso-latin-1-unix utf-8-unix)) |
| 392 | (should-not (eq (decode-coding-string s coding nil) s)) | 393 | (should-not (eq (decode-coding-string s coding nil) s)) |
| 393 | (should-not (eq (encode-coding-string s coding nil) s)) | 394 | (should-not (eq (encode-coding-string s coding nil) s)) |
| 394 | (should (eq (decode-coding-string s coding t) s)) | 395 | (should (eq (decode-coding-string s coding t) s)) |
| 395 | (should (eq (encode-coding-string s coding t) s))))) | 396 | (should (eq (encode-coding-string s coding t) s))) |
| 396 | (let* ((uni (apply #'string (number-sequence 15 127))) | 397 | |
| 398 | ;; With EOL conversion inhibited. | ||
| 399 | (let ((inhibit-eol-conversion t)) | ||
| 400 | (dolist (coding '(us-ascii iso-latin-1 utf-8)) | ||
| 401 | (should-not (eq (decode-coding-string s coding nil) s)) | ||
| 402 | (should-not (eq (encode-coding-string s coding nil) s)) | ||
| 403 | (should (eq (decode-coding-string s coding t) s)) | ||
| 404 | (should (eq (encode-coding-string s coding t) s)))))) | ||
| 405 | |||
| 406 | ;; Check identity decoding with EOL conversion for ASCII except CR. | ||
| 407 | (let* ((uni (apply #'string (delq ?\r (number-sequence 0 127)))) | ||
| 397 | (multi (string-to-multibyte uni))) | 408 | (multi (string-to-multibyte uni))) |
| 398 | (dolist (s (list uni multi)) | 409 | (dolist (s (list uni multi)) |
| 399 | (dolist (coding '(us-ascii iso-latin-1 utf-8)) | 410 | (dolist (coding '(us-ascii-dos iso-latin-1-dos utf-8-dos mac-roman-mac)) |
| 400 | (should-not (eq (decode-coding-string s coding nil) s)) | 411 | (should-not (eq (decode-coding-string s coding nil) s)) |
| 401 | (should-not (eq (encode-coding-string s coding nil) s)) | 412 | (should (eq (decode-coding-string s coding t) s))))) |
| 402 | (should (eq (decode-coding-string s coding t) s)) | 413 | |
| 403 | (should (eq (encode-coding-string s coding t) s))))) | 414 | ;; Check identity encoding with EOL conversion for ASCII except LF. |
| 404 | (let* ((uni (apply #'string (number-sequence 0 127))) | 415 | (let* ((uni (apply #'string (delq ?\n (number-sequence 0 127)))) |
| 405 | (multi (string-to-multibyte uni)) | 416 | (multi (string-to-multibyte uni))) |
| 406 | (inhibit-eol-conversion t)) | ||
| 407 | (dolist (s (list uni multi)) | 417 | (dolist (s (list uni multi)) |
| 408 | (dolist (coding '(us-ascii iso-latin-1 utf-8)) | 418 | (dolist (coding '(us-ascii-dos iso-latin-1-dos utf-8-dos mac-roman-mac)) |
| 409 | (should-not (eq (decode-coding-string s coding nil) s)) | ||
| 410 | (should-not (eq (encode-coding-string s coding nil) s)) | 419 | (should-not (eq (encode-coding-string s coding nil) s)) |
| 411 | (should (eq (decode-coding-string s coding t) s)) | ||
| 412 | (should (eq (encode-coding-string s coding t) s)))))) | 420 | (should (eq (encode-coding-string s coding t) s)))))) |
| 413 | 421 | ||
| 422 | |||
| 414 | (ert-deftest coding-check-coding-systems-region () | 423 | (ert-deftest coding-check-coding-systems-region () |
| 415 | (should (equal (check-coding-systems-region "aå" nil '(utf-8)) | 424 | (should (equal (check-coding-systems-region "aå" nil '(utf-8)) |
| 416 | nil)) | 425 | nil)) |