diff options
| author | Eli Zaretskii | 2025-06-21 11:08:05 +0300 |
|---|---|---|
| committer | Eli Zaretskii | 2025-06-21 11:08:05 +0300 |
| commit | 4607d3e426115ded71f0d8c19e7187ed45c19734 (patch) | |
| tree | 0c5db58576e0a2aa44c367af54685e027811b260 | |
| parent | 8201e7e7a765e2c74076cdddb4fdbe854e672438 (diff) | |
| download | emacs-4607d3e426115ded71f0d8c19e7187ed45c19734.tar.gz emacs-4607d3e426115ded71f0d8c19e7187ed45c19734.zip | |
Fix 'split-string' when TRIM is used
* lisp/subr.el (split-string): Support the case where STRING
begins with a match for SEPARATORS, and a match for SEPARATORS
also matches TRIM. Doc fix. (Bug#78690)
* test/lisp/subr-tests.el (subr-test-split-string): New test.
| -rw-r--r-- | lisp/subr.el | 35 | ||||
| -rw-r--r-- | test/lisp/subr-tests.el | 11 |
2 files changed, 31 insertions, 15 deletions
diff --git a/lisp/subr.el b/lisp/subr.el index 729f8b3e09b..76a28ddaacc 100644 --- a/lisp/subr.el +++ b/lisp/subr.el | |||
| @@ -5755,9 +5755,9 @@ the substrings between the splitting points are collected as a list, | |||
| 5755 | which is returned. | 5755 | which is returned. |
| 5756 | 5756 | ||
| 5757 | If SEPARATORS is non-nil, it should be a regular expression matching text | 5757 | If SEPARATORS is non-nil, it should be a regular expression matching text |
| 5758 | that separates, but is not part of, the substrings. If nil it defaults to | 5758 | that separates, but is not part of, the substrings. If omitted or nil, |
| 5759 | `split-string-default-separators', normally \"[ \\f\\t\\n\\r\\v]+\", and | 5759 | it defaults to `split-string-default-separators', whose value is |
| 5760 | OMIT-NULLS is forced to t. | 5760 | normally \"[ \\f\\t\\n\\r\\v]+\", and OMIT-NULLS is then forced to t. |
| 5761 | 5761 | ||
| 5762 | If OMIT-NULLS is t, zero-length substrings are omitted from the list (so | 5762 | If OMIT-NULLS is t, zero-length substrings are omitted from the list (so |
| 5763 | that for the default value of SEPARATORS leading and trailing whitespace | 5763 | that for the default value of SEPARATORS leading and trailing whitespace |
| @@ -5768,11 +5768,6 @@ If TRIM is non-nil, it should be a regular expression to match | |||
| 5768 | text to trim from the beginning and end of each substring. If trimming | 5768 | text to trim from the beginning and end of each substring. If trimming |
| 5769 | makes the substring empty, it is treated as null. | 5769 | makes the substring empty, it is treated as null. |
| 5770 | 5770 | ||
| 5771 | If you want to trim whitespace from the substrings, the reliably correct | ||
| 5772 | way is using TRIM. Making SEPARATORS match that whitespace gives incorrect | ||
| 5773 | results when there is whitespace at the start or end of STRING. If you | ||
| 5774 | see such calls to `split-string', please fix them. | ||
| 5775 | |||
| 5776 | Note that the effect of `(split-string STRING)' is the same as | 5771 | Note that the effect of `(split-string STRING)' is the same as |
| 5777 | `(split-string STRING split-string-default-separators t)'. In the rare | 5772 | `(split-string STRING split-string-default-separators t)'. In the rare |
| 5778 | case that you wish to retain zero-length substrings when splitting on | 5773 | case that you wish to retain zero-length substrings when splitting on |
| @@ -5785,7 +5780,9 @@ Modifies the match data; use `save-match-data' if necessary." | |||
| 5785 | (start 0) | 5780 | (start 0) |
| 5786 | this-start this-end | 5781 | this-start this-end |
| 5787 | notfirst | 5782 | notfirst |
| 5783 | match-beg | ||
| 5788 | (list nil) | 5784 | (list nil) |
| 5785 | (strlen (length string)) | ||
| 5789 | (push-one | 5786 | (push-one |
| 5790 | ;; Push the substring in range THIS-START to THIS-END | 5787 | ;; Push the substring in range THIS-START to THIS-END |
| 5791 | ;; onto LIST, trimming it and perhaps discarding it. | 5788 | ;; onto LIST, trimming it and perhaps discarding it. |
| @@ -5794,6 +5791,7 @@ Modifies the match data; use `save-match-data' if necessary." | |||
| 5794 | ;; Discard the trim from start of this substring. | 5791 | ;; Discard the trim from start of this substring. |
| 5795 | (let ((tem (string-match trim string this-start))) | 5792 | (let ((tem (string-match trim string this-start))) |
| 5796 | (and (eq tem this-start) | 5793 | (and (eq tem this-start) |
| 5794 | (<= (match-end 0) this-end) | ||
| 5797 | (setq this-start (match-end 0))))) | 5795 | (setq this-start (match-end 0))))) |
| 5798 | 5796 | ||
| 5799 | (when (or keep-nulls (< this-start this-end)) | 5797 | (when (or keep-nulls (< this-start this-end)) |
| @@ -5811,18 +5809,25 @@ Modifies the match data; use `save-match-data' if necessary." | |||
| 5811 | 5809 | ||
| 5812 | (while (and (string-match rexp string | 5810 | (while (and (string-match rexp string |
| 5813 | (if (and notfirst | 5811 | (if (and notfirst |
| 5814 | (= start (match-beginning 0)) | 5812 | (= start match-beg) ; empty match |
| 5815 | (< start (length string))) | 5813 | (< start strlen)) |
| 5816 | (1+ start) start)) | 5814 | (1+ start) start)) |
| 5817 | (< start (length string))) | 5815 | (< start strlen)) |
| 5818 | (setq notfirst t) | 5816 | (setq notfirst t |
| 5819 | (setq this-start start this-end (match-beginning 0) | 5817 | match-beg (match-beginning 0)) |
| 5820 | start (match-end 0)) | 5818 | ;; If the separator is right at the beginning, produce an empty |
| 5819 | ;; substring in the result list. | ||
| 5820 | (if (= start match-beg) | ||
| 5821 | (setq this-start (match-end 0) | ||
| 5822 | this-end this-start) | ||
| 5823 | ;; Otherwise produce a substring from start to the separator. | ||
| 5824 | (setq this-start start this-end match-beg)) | ||
| 5825 | (setq start (match-end 0)) | ||
| 5821 | 5826 | ||
| 5822 | (funcall push-one)) | 5827 | (funcall push-one)) |
| 5823 | 5828 | ||
| 5824 | ;; Handle the substring at the end of STRING. | 5829 | ;; Handle the substring at the end of STRING. |
| 5825 | (setq this-start start this-end (length string)) | 5830 | (setq this-start start this-end strlen) |
| 5826 | (funcall push-one) | 5831 | (funcall push-one) |
| 5827 | 5832 | ||
| 5828 | (nreverse list))) | 5833 | (nreverse list))) |
diff --git a/test/lisp/subr-tests.el b/test/lisp/subr-tests.el index 024cbe85bba..f598ee57f8d 100644 --- a/test/lisp/subr-tests.el +++ b/test/lisp/subr-tests.el | |||
| @@ -1505,5 +1505,16 @@ final or penultimate step during initialization.")) | |||
| 1505 | (should (hash-table-contains-p 'cookie h)) | 1505 | (should (hash-table-contains-p 'cookie h)) |
| 1506 | (should (hash-table-contains-p 'milk h)))) | 1506 | (should (hash-table-contains-p 'milk h)))) |
| 1507 | 1507 | ||
| 1508 | (ert-deftest subr-test-split-string () | ||
| 1509 | (let ((text "-*- lexical-binding: t; -*-") | ||
| 1510 | (seps "-\\*-") | ||
| 1511 | (trim "[ \t\n\r-]+")) | ||
| 1512 | (should (equal (split-string text seps nil trim) | ||
| 1513 | '("" "lexical-binding: t;" ""))) | ||
| 1514 | (should (equal (split-string text seps t trim) | ||
| 1515 | '("lexical-binding: t;"))) | ||
| 1516 | (should (equal (split-string text "[ \t\n\r-]*-\\*-[ \t\n\r-]*") | ||
| 1517 | '("" "lexical-binding: t;" ""))))) | ||
| 1518 | |||
| 1508 | (provide 'subr-tests) | 1519 | (provide 'subr-tests) |
| 1509 | ;;; subr-tests.el ends here | 1520 | ;;; subr-tests.el ends here |