aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMattias Engdegård2020-04-09 12:04:22 +0200
committerMattias Engdegård2020-04-09 12:50:35 +0200
commitd3e2c88041b4844422bda64b1ee51678dc8a2e88 (patch)
tree5108104aceb42394dcf860759ab0b85a140521a4
parent95dd8de1df19a8529efb66257ac78789be62ca37 (diff)
downloademacs-d3e2c88041b4844422bda64b1ee51678dc8a2e88.tar.gz
emacs-d3e2c88041b4844422bda64b1ee51678dc8a2e88.zip
Fix ASCII-only conversion logic (bug#40407)
To sidestep conversion altogether when EOL conversion applies, we must either be encoding a string without NL, or decoding without CR. * src/coding.c (string_ascii_p): Revert to a pure predicate. (code_convert_string): Fix logic. Don't use uninitialised ascii_p (removed). Use memchr to detect CR or LF in string when needed. * test/src/coding-tests.el (coding-nocopy-ascii): Update tests to include encodings with explicit EOL conversions.
-rw-r--r--src/coding.c46
-rw-r--r--test/src/coding-tests.el33
2 files changed, 38 insertions, 41 deletions
diff --git a/src/coding.c b/src/coding.c
index ffcb9cf0a1a..450c498f1e8 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -9474,22 +9474,15 @@ not fully specified.) */)
9474 return code_convert_region (start, end, coding_system, destination, 1, 0); 9474 return code_convert_region (start, end, coding_system, destination, 1, 0);
9475} 9475}
9476 9476
9477/* Non-zero if STR contains only characters in the 0..127 range. 9477/* Whether STRING only contains chars in the 0..127 range. */
9478 Positive if STR includes characters that don't need EOL conversion 9478static bool
9479 on decoding, negative otherwise. */ 9479string_ascii_p (Lisp_Object string)
9480static int
9481string_ascii_p (Lisp_Object str)
9482{ 9480{
9483 ptrdiff_t nbytes = SBYTES (str); 9481 ptrdiff_t nbytes = SBYTES (string);
9484 bool CR_Seen = false;
9485 for (ptrdiff_t i = 0; i < nbytes; i++) 9482 for (ptrdiff_t i = 0; i < nbytes; i++)
9486 { 9483 if (SREF (string, i) > 127)
9487 if (SREF (str, i) > 127) 9484 return false;
9488 return 0; 9485 return true;
9489 if (SREF (str, i) == '\r')
9490 CR_Seen = true;
9491 }
9492 return CR_Seen ? -1 : 1;
9493} 9486}
9494 9487
9495Lisp_Object 9488Lisp_Object
@@ -9526,24 +9519,19 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system,
9526 if (EQ (dst_object, Qt)) 9519 if (EQ (dst_object, Qt))
9527 { 9520 {
9528 /* Fast path for ASCII-only input and an ASCII-compatible coding: 9521 /* Fast path for ASCII-only input and an ASCII-compatible coding:
9529 act as identity if no EOL conversion is neede. */ 9522 act as identity if no EOL conversion is needed. */
9530 int ascii_p;
9531 Lisp_Object attrs = CODING_ID_ATTRS (coding.id); 9523 Lisp_Object attrs = CODING_ID_ATTRS (coding.id);
9532 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)) 9524 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
9533 && (STRING_MULTIBYTE (string) 9525 && (STRING_MULTIBYTE (string)
9534 ? (chars == bytes) : ((ascii_p = string_ascii_p (string)) != 0))) 9526 ? (chars == bytes) : string_ascii_p (string))
9535 { 9527 && (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix)
9536 if (ascii_p > 0 9528 || inhibit_eol_conversion
9537 || (ascii_p < 0 9529 || ! memchr (SDATA (string), encodep ? '\n' : '\r', bytes)))
9538 && (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix) 9530 return (nocopy
9539 || inhibit_eol_conversion))) 9531 ? string
9540 return (nocopy 9532 : (encodep
9541 ? string 9533 ? make_unibyte_string (SSDATA (string), bytes)
9542 : (encodep 9534 : make_multibyte_string (SSDATA (string), bytes, bytes)));
9543 ? make_unibyte_string (SSDATA (string), bytes)
9544 : make_multibyte_string (SSDATA (string),
9545 bytes, bytes)));
9546 }
9547 } 9535 }
9548 else if (BUFFERP (dst_object)) 9536 else if (BUFFERP (dst_object))
9549 { 9537 {
diff --git a/test/src/coding-tests.el b/test/src/coding-tests.el
index 8d92bcdcd1a..9f6fac3edd8 100644
--- a/test/src/coding-tests.el
+++ b/test/src/coding-tests.el
@@ -388,29 +388,38 @@
388 (let* ((uni (apply #'string (number-sequence 0 127))) 388 (let* ((uni (apply #'string (number-sequence 0 127)))
389 (multi (string-to-multibyte uni))) 389 (multi (string-to-multibyte uni)))
390 (dolist (s (list uni multi)) 390 (dolist (s (list uni multi))
391 ;; Encodings without EOL conversion.
391 (dolist (coding '(us-ascii-unix iso-latin-1-unix utf-8-unix)) 392 (dolist (coding '(us-ascii-unix iso-latin-1-unix utf-8-unix))
392 (should-not (eq (decode-coding-string s coding nil) s)) 393 (should-not (eq (decode-coding-string s coding nil) s))
393 (should-not (eq (encode-coding-string s coding nil) s)) 394 (should-not (eq (encode-coding-string s coding nil) s))
394 (should (eq (decode-coding-string s coding t) s)) 395 (should (eq (decode-coding-string s coding t) s))
395 (should (eq (encode-coding-string s coding t) s))))) 396 (should (eq (encode-coding-string s coding t) s)))
396 (let* ((uni (apply #'string (number-sequence 15 127))) 397
398 ;; With EOL conversion inhibited.
399 (let ((inhibit-eol-conversion t))
400 (dolist (coding '(us-ascii iso-latin-1 utf-8))
401 (should-not (eq (decode-coding-string s coding nil) s))
402 (should-not (eq (encode-coding-string s coding nil) s))
403 (should (eq (decode-coding-string s coding t) s))
404 (should (eq (encode-coding-string s coding t) s))))))
405
406 ;; Check identity decoding with EOL conversion for ASCII except CR.
407 (let* ((uni (apply #'string (delq ?\r (number-sequence 0 127))))
397 (multi (string-to-multibyte uni))) 408 (multi (string-to-multibyte uni)))
398 (dolist (s (list uni multi)) 409 (dolist (s (list uni multi))
399 (dolist (coding '(us-ascii iso-latin-1 utf-8)) 410 (dolist (coding '(us-ascii-dos iso-latin-1-dos utf-8-dos mac-roman-mac))
400 (should-not (eq (decode-coding-string s coding nil) s)) 411 (should-not (eq (decode-coding-string s coding nil) s))
401 (should-not (eq (encode-coding-string s coding nil) s)) 412 (should (eq (decode-coding-string s coding t) s)))))
402 (should (eq (decode-coding-string s coding t) s)) 413
403 (should (eq (encode-coding-string s coding t) s))))) 414 ;; Check identity encoding with EOL conversion for ASCII except LF.
404 (let* ((uni (apply #'string (number-sequence 0 127))) 415 (let* ((uni (apply #'string (delq ?\n (number-sequence 0 127))))
405 (multi (string-to-multibyte uni)) 416 (multi (string-to-multibyte uni)))
406 (inhibit-eol-conversion t))
407 (dolist (s (list uni multi)) 417 (dolist (s (list uni multi))
408 (dolist (coding '(us-ascii iso-latin-1 utf-8)) 418 (dolist (coding '(us-ascii-dos iso-latin-1-dos utf-8-dos mac-roman-mac))
409 (should-not (eq (decode-coding-string s coding nil) s))
410 (should-not (eq (encode-coding-string s coding nil) s)) 419 (should-not (eq (encode-coding-string s coding nil) s))
411 (should (eq (decode-coding-string s coding t) s))
412 (should (eq (encode-coding-string s coding t) s)))))) 420 (should (eq (encode-coding-string s coding t) s))))))
413 421
422
414(ert-deftest coding-check-coding-systems-region () 423(ert-deftest coding-check-coding-systems-region ()
415 (should (equal (check-coding-systems-region "aå" nil '(utf-8)) 424 (should (equal (check-coding-systems-region "aå" nil '(utf-8))
416 nil)) 425 nil))