diff options
| author | Mattias EngdegÄrd | 2024-05-02 18:05:21 +0200 |
|---|---|---|
| committer | Mattias EngdegÄrd | 2025-08-24 10:28:27 +0200 |
| commit | 308e3ab1dbd9633b843541af55d77c82b725df02 (patch) | |
| tree | 7036ce2267ad670d95e1b2702f4554e97395e233 /src/data.c | |
| parent | 230ed1f9b6da42515735970c370424c37bda5d59 (diff) | |
| download | emacs-308e3ab1dbd9633b843541af55d77c82b725df02.tar.gz emacs-308e3ab1dbd9633b843541af55d77c82b725df02.zip | |
Disallow string data resizing (bug#79784)
Only allow string mutation that is certain not to require string data to
be resized and reallocated: writing bytes into a unibyte string, and
changing ASCII to ASCII in a multibyte string.
This ensures that mutation will never transform a unibyte string to
multibyte, that the size of a string in bytes never changes, and that
the byte offsets of characters remain the same. Most importantly, it
removes a long-standing obstacle to reform of string representation and
allow for future performance improvements.
* src/data.c (Faset): Disallow resizing string mutation.
* src/fns.c (clear_string_char_byte_cache):
* src/alloc.c (resize_string_data): Remove.
* test/src/data-tests.el (data-aset-string): New test.
* test/lisp/subr-tests.el (subr--subst-char-in-string):
Skip error cases.
* test/src/alloc-tests.el (aset-nbytes-change):
Remove test that is no longer relevant.
* doc/lispref/strings.texi (Modifying Strings):
* doc/lispref/sequences.texi (Array Functions):
* doc/lispref/text.texi (Substitution): Update manual.
* etc/NEWS: Announce.
Diffstat (limited to 'src/data.c')
| -rw-r--r-- | src/data.c | 43 |
1 files changed, 14 insertions, 29 deletions
diff --git a/src/data.c b/src/data.c index 493a8dd63fc..b8a48203bcf 100644 --- a/src/data.c +++ b/src/data.c | |||
| @@ -2574,7 +2574,10 @@ or a byte-code object. IDX starts at 0. */) | |||
| 2574 | DEFUN ("aset", Faset, Saset, 3, 3, 0, | 2574 | DEFUN ("aset", Faset, Saset, 3, 3, 0, |
| 2575 | doc: /* Store into the element of ARRAY at index IDX the value NEWELT. | 2575 | doc: /* Store into the element of ARRAY at index IDX the value NEWELT. |
| 2576 | Return NEWELT. ARRAY may be a vector, a string, a char-table or a | 2576 | Return NEWELT. ARRAY may be a vector, a string, a char-table or a |
| 2577 | bool-vector. IDX starts at 0. */) | 2577 | bool-vector. IDX starts at 0. |
| 2578 | If ARRAY is a unibyte string, NEWELT must be a single byte (0-255). | ||
| 2579 | If ARRAY is a multibyte string, NEWELT and the previous character at | ||
| 2580 | index IDX must both be ASCII (0-127). */) | ||
| 2578 | (register Lisp_Object array, Lisp_Object idx, Lisp_Object newelt) | 2581 | (register Lisp_Object array, Lisp_Object idx, Lisp_Object newelt) |
| 2579 | { | 2582 | { |
| 2580 | register EMACS_INT idxval; | 2583 | register EMACS_INT idxval; |
| @@ -2613,42 +2616,24 @@ bool-vector. IDX starts at 0. */) | |||
| 2613 | args_out_of_range (array, idx); | 2616 | args_out_of_range (array, idx); |
| 2614 | CHECK_CHARACTER (newelt); | 2617 | CHECK_CHARACTER (newelt); |
| 2615 | int c = XFIXNAT (newelt); | 2618 | int c = XFIXNAT (newelt); |
| 2616 | ptrdiff_t idxval_byte; | ||
| 2617 | int prev_bytes; | ||
| 2618 | unsigned char workbuf[MAX_MULTIBYTE_LENGTH], *p0 = workbuf, *p1; | ||
| 2619 | 2619 | ||
| 2620 | if (STRING_MULTIBYTE (array)) | 2620 | if (STRING_MULTIBYTE (array)) |
| 2621 | { | 2621 | { |
| 2622 | idxval_byte = string_char_to_byte (array, idxval); | 2622 | if (c > 0x7f) |
| 2623 | p1 = SDATA (array) + idxval_byte; | 2623 | error ("Attempt to store non-ASCII char into multibyte string"); |
| 2624 | prev_bytes = BYTES_BY_CHAR_HEAD (*p1); | 2624 | ptrdiff_t idxval_byte = string_char_to_byte (array, idxval); |
| 2625 | } | 2625 | unsigned char *p = SDATA (array) + idxval_byte; |
| 2626 | else if (SINGLE_BYTE_CHAR_P (c)) | 2626 | if (*p > 0x7f) |
| 2627 | { | 2627 | error ("Attempt to replace non-ASCII char in multibyte string"); |
| 2628 | SSET (array, idxval, c); | 2628 | *p = c; |
| 2629 | return newelt; | ||
| 2630 | } | 2629 | } |
| 2631 | else | 2630 | else |
| 2632 | { | 2631 | { |
| 2633 | for (ptrdiff_t i = SBYTES (array) - 1; i >= 0; i--) | 2632 | if (c > 0xff) |
| 2634 | if (!ASCII_CHAR_P (SREF (array, i))) | 2633 | error ("Attempt to store non-byte value into unibyte string"); |
| 2635 | args_out_of_range (array, newelt); | 2634 | SSET (array, idxval, c); |
| 2636 | /* ARRAY is an ASCII string. Convert it to a multibyte string. */ | ||
| 2637 | STRING_SET_MULTIBYTE (array); | ||
| 2638 | idxval_byte = idxval; | ||
| 2639 | p1 = SDATA (array) + idxval_byte; | ||
| 2640 | prev_bytes = 1; | ||
| 2641 | } | 2635 | } |
| 2642 | |||
| 2643 | int new_bytes = CHAR_STRING (c, p0); | ||
| 2644 | if (prev_bytes != new_bytes) | ||
| 2645 | p1 = resize_string_data (array, idxval_byte, prev_bytes, new_bytes); | ||
| 2646 | |||
| 2647 | do | ||
| 2648 | *p1++ = *p0++; | ||
| 2649 | while (--new_bytes != 0); | ||
| 2650 | } | 2636 | } |
| 2651 | |||
| 2652 | return newelt; | 2637 | return newelt; |
| 2653 | } | 2638 | } |
| 2654 | 2639 | ||