aboutsummaryrefslogtreecommitdiffstats
path: root/src/data.c
diff options
context:
space:
mode:
authorMattias EngdegÄrd2024-05-02 18:05:21 +0200
committerMattias EngdegÄrd2025-08-24 10:28:27 +0200
commit308e3ab1dbd9633b843541af55d77c82b725df02 (patch)
tree7036ce2267ad670d95e1b2702f4554e97395e233 /src/data.c
parent230ed1f9b6da42515735970c370424c37bda5d59 (diff)
downloademacs-308e3ab1dbd9633b843541af55d77c82b725df02.tar.gz
emacs-308e3ab1dbd9633b843541af55d77c82b725df02.zip
Disallow string data resizing (bug#79784)
Only allow string mutation that is certain not to require string data to be resized and reallocated: writing bytes into a unibyte string, and changing ASCII to ASCII in a multibyte string. This ensures that mutation will never transform a unibyte string to multibyte, that the size of a string in bytes never changes, and that the byte offsets of characters remain the same. Most importantly, it removes a long-standing obstacle to reform of string representation and allow for future performance improvements. * src/data.c (Faset): Disallow resizing string mutation. * src/fns.c (clear_string_char_byte_cache): * src/alloc.c (resize_string_data): Remove. * test/src/data-tests.el (data-aset-string): New test. * test/lisp/subr-tests.el (subr--subst-char-in-string): Skip error cases. * test/src/alloc-tests.el (aset-nbytes-change): Remove test that is no longer relevant. * doc/lispref/strings.texi (Modifying Strings): * doc/lispref/sequences.texi (Array Functions): * doc/lispref/text.texi (Substitution): Update manual. * etc/NEWS: Announce.
Diffstat (limited to 'src/data.c')
-rw-r--r--src/data.c43
1 files changed, 14 insertions, 29 deletions
diff --git a/src/data.c b/src/data.c
index 493a8dd63fc..b8a48203bcf 100644
--- a/src/data.c
+++ b/src/data.c
@@ -2574,7 +2574,10 @@ or a byte-code object. IDX starts at 0. */)
2574DEFUN ("aset", Faset, Saset, 3, 3, 0, 2574DEFUN ("aset", Faset, Saset, 3, 3, 0,
2575 doc: /* Store into the element of ARRAY at index IDX the value NEWELT. 2575 doc: /* Store into the element of ARRAY at index IDX the value NEWELT.
2576Return NEWELT. ARRAY may be a vector, a string, a char-table or a 2576Return NEWELT. ARRAY may be a vector, a string, a char-table or a
2577bool-vector. IDX starts at 0. */) 2577bool-vector. IDX starts at 0.
2578If ARRAY is a unibyte string, NEWELT must be a single byte (0-255).
2579If ARRAY is a multibyte string, NEWELT and the previous character at
2580index IDX must both be ASCII (0-127). */)
2578 (register Lisp_Object array, Lisp_Object idx, Lisp_Object newelt) 2581 (register Lisp_Object array, Lisp_Object idx, Lisp_Object newelt)
2579{ 2582{
2580 register EMACS_INT idxval; 2583 register EMACS_INT idxval;
@@ -2613,42 +2616,24 @@ bool-vector. IDX starts at 0. */)
2613 args_out_of_range (array, idx); 2616 args_out_of_range (array, idx);
2614 CHECK_CHARACTER (newelt); 2617 CHECK_CHARACTER (newelt);
2615 int c = XFIXNAT (newelt); 2618 int c = XFIXNAT (newelt);
2616 ptrdiff_t idxval_byte;
2617 int prev_bytes;
2618 unsigned char workbuf[MAX_MULTIBYTE_LENGTH], *p0 = workbuf, *p1;
2619 2619
2620 if (STRING_MULTIBYTE (array)) 2620 if (STRING_MULTIBYTE (array))
2621 { 2621 {
2622 idxval_byte = string_char_to_byte (array, idxval); 2622 if (c > 0x7f)
2623 p1 = SDATA (array) + idxval_byte; 2623 error ("Attempt to store non-ASCII char into multibyte string");
2624 prev_bytes = BYTES_BY_CHAR_HEAD (*p1); 2624 ptrdiff_t idxval_byte = string_char_to_byte (array, idxval);
2625 } 2625 unsigned char *p = SDATA (array) + idxval_byte;
2626 else if (SINGLE_BYTE_CHAR_P (c)) 2626 if (*p > 0x7f)
2627 { 2627 error ("Attempt to replace non-ASCII char in multibyte string");
2628 SSET (array, idxval, c); 2628 *p = c;
2629 return newelt;
2630 } 2629 }
2631 else 2630 else
2632 { 2631 {
2633 for (ptrdiff_t i = SBYTES (array) - 1; i >= 0; i--) 2632 if (c > 0xff)
2634 if (!ASCII_CHAR_P (SREF (array, i))) 2633 error ("Attempt to store non-byte value into unibyte string");
2635 args_out_of_range (array, newelt); 2634 SSET (array, idxval, c);
2636 /* ARRAY is an ASCII string. Convert it to a multibyte string. */
2637 STRING_SET_MULTIBYTE (array);
2638 idxval_byte = idxval;
2639 p1 = SDATA (array) + idxval_byte;
2640 prev_bytes = 1;
2641 } 2635 }
2642
2643 int new_bytes = CHAR_STRING (c, p0);
2644 if (prev_bytes != new_bytes)
2645 p1 = resize_string_data (array, idxval_byte, prev_bytes, new_bytes);
2646
2647 do
2648 *p1++ = *p0++;
2649 while (--new_bytes != 0);
2650 } 2636 }
2651
2652 return newelt; 2637 return newelt;
2653} 2638}
2654 2639