aboutsummaryrefslogtreecommitdiffstats
path: root/src/coding.c
diff options
context:
space:
mode:
authorMattias EngdegÄrd2020-04-09 12:04:22 +0200
committerMattias EngdegÄrd2020-04-09 12:50:35 +0200
commitd3e2c88041b4844422bda64b1ee51678dc8a2e88 (patch)
tree5108104aceb42394dcf860759ab0b85a140521a4 /src/coding.c
parent95dd8de1df19a8529efb66257ac78789be62ca37 (diff)
downloademacs-d3e2c88041b4844422bda64b1ee51678dc8a2e88.tar.gz
emacs-d3e2c88041b4844422bda64b1ee51678dc8a2e88.zip
Fix ASCII-only conversion logic (bug#40407)
To sidestep conversion altogether when EOL conversion applies, we must either be encoding a string without NL, or decoding without CR. * src/coding.c (string_ascii_p): Revert to a pure predicate. (code_convert_string): Fix logic. Don't use uninitialised ascii_p (removed). Use memchr to detect CR or LF in string when needed. * test/src/coding-tests.el (coding-nocopy-ascii): Update tests to include encodings with explicit EOL conversions.
Diffstat (limited to 'src/coding.c')
-rw-r--r--src/coding.c46
1 files changed, 17 insertions, 29 deletions
diff --git a/src/coding.c b/src/coding.c
index ffcb9cf0a1a..450c498f1e8 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -9474,22 +9474,15 @@ not fully specified.) */)
9474 return code_convert_region (start, end, coding_system, destination, 1, 0); 9474 return code_convert_region (start, end, coding_system, destination, 1, 0);
9475} 9475}
9476 9476
9477/* Non-zero if STR contains only characters in the 0..127 range. 9477/* Whether STRING only contains chars in the 0..127 range. */
9478 Positive if STR includes characters that don't need EOL conversion 9478static bool
9479 on decoding, negative otherwise. */ 9479string_ascii_p (Lisp_Object string)
9480static int
9481string_ascii_p (Lisp_Object str)
9482{ 9480{
9483 ptrdiff_t nbytes = SBYTES (str); 9481 ptrdiff_t nbytes = SBYTES (string);
9484 bool CR_Seen = false;
9485 for (ptrdiff_t i = 0; i < nbytes; i++) 9482 for (ptrdiff_t i = 0; i < nbytes; i++)
9486 { 9483 if (SREF (string, i) > 127)
9487 if (SREF (str, i) > 127) 9484 return false;
9488 return 0; 9485 return true;
9489 if (SREF (str, i) == '\r')
9490 CR_Seen = true;
9491 }
9492 return CR_Seen ? -1 : 1;
9493} 9486}
9494 9487
9495Lisp_Object 9488Lisp_Object
@@ -9526,24 +9519,19 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system,
9526 if (EQ (dst_object, Qt)) 9519 if (EQ (dst_object, Qt))
9527 { 9520 {
9528 /* Fast path for ASCII-only input and an ASCII-compatible coding: 9521 /* Fast path for ASCII-only input and an ASCII-compatible coding:
9529 act as identity if no EOL conversion is neede. */ 9522 act as identity if no EOL conversion is needed. */
9530 int ascii_p;
9531 Lisp_Object attrs = CODING_ID_ATTRS (coding.id); 9523 Lisp_Object attrs = CODING_ID_ATTRS (coding.id);
9532 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)) 9524 if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
9533 && (STRING_MULTIBYTE (string) 9525 && (STRING_MULTIBYTE (string)
9534 ? (chars == bytes) : ((ascii_p = string_ascii_p (string)) != 0))) 9526 ? (chars == bytes) : string_ascii_p (string))
9535 { 9527 && (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix)
9536 if (ascii_p > 0 9528 || inhibit_eol_conversion
9537 || (ascii_p < 0 9529 || ! memchr (SDATA (string), encodep ? '\n' : '\r', bytes)))
9538 && (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix) 9530 return (nocopy
9539 || inhibit_eol_conversion))) 9531 ? string
9540 return (nocopy 9532 : (encodep
9541 ? string 9533 ? make_unibyte_string (SSDATA (string), bytes)
9542 : (encodep 9534 : make_multibyte_string (SSDATA (string), bytes, bytes)));
9543 ? make_unibyte_string (SSDATA (string), bytes)
9544 : make_multibyte_string (SSDATA (string),
9545 bytes, bytes)));
9546 }
9547 } 9535 }
9548 else if (BUFFERP (dst_object)) 9536 else if (BUFFERP (dst_object))
9549 { 9537 {