diff options
| author | Mattias EngdegÄrd | 2020-11-25 15:32:08 +0100 |
|---|---|---|
| committer | Mattias EngdegÄrd | 2020-11-26 14:20:13 +0100 |
| commit | 558b6dbca7bc933fe01255be9ebeffebd44a2645 (patch) | |
| tree | 32f4bc6b85c11d31a513afb4b804e7502d00808d | |
| parent | 0287c5176867628e7acb834b3d5f26a150cfaf85 (diff) | |
| download | emacs-558b6dbca7bc933fe01255be9ebeffebd44a2645.tar.gz emacs-558b6dbca7bc933fe01255be9ebeffebd44a2645.zip | |
Fix replace-regexp-in-string substring match data translation
For certain patterns, re-matching the same regexp on the matched
substring does not produce correctly translated match data
(bug#15107 and bug#44861).
Using a new builtin function also improves performance since the
number of calls to string-match is halved.
Reported by Kevin Ryde and Shigeru Fukaya.
* lisp/subr.el (replace-regexp-in-string): Translate the match data
using match-data--translate instead of trusting a call to string-match
on the matched string to do the job.
* test/lisp/subr-tests.el (subr-replace-regexp-in-string):
Add test cases.
* src/search.c (Fmatch_data__translate): New internal function.
(syms_of_search): Register it as a subroutine.
| -rw-r--r-- | lisp/subr.el | 7 | ||||
| -rw-r--r-- | src/search.c | 18 | ||||
| -rw-r--r-- | test/lisp/subr-tests.el | 6 |
3 files changed, 26 insertions, 5 deletions
diff --git a/lisp/subr.el b/lisp/subr.el index 1fb0f9ab7e6..e009dcc2b9a 100644 --- a/lisp/subr.el +++ b/lisp/subr.el | |||
| @@ -4546,10 +4546,9 @@ and replace a sub-expression, e.g. | |||
| 4546 | (when (= me mb) (setq me (min l (1+ mb)))) | 4546 | (when (= me mb) (setq me (min l (1+ mb)))) |
| 4547 | ;; Generate a replacement for the matched substring. | 4547 | ;; Generate a replacement for the matched substring. |
| 4548 | ;; Operate on only the substring to minimize string consing. | 4548 | ;; Operate on only the substring to minimize string consing. |
| 4549 | ;; Set up match data for the substring for replacement; | 4549 | ;; Translate the match data so that it applies to the matched substring. |
| 4550 | ;; presumably this is likely to be faster than munging the | 4550 | (match-data--translate (- mb)) |
| 4551 | ;; match data directly in Lisp. | 4551 | (setq str (substring string mb me)) |
| 4552 | (string-match regexp (setq str (substring string mb me))) | ||
| 4553 | (setq matches | 4552 | (setq matches |
| 4554 | (cons (replace-match (if (stringp rep) | 4553 | (cons (replace-match (if (stringp rep) |
| 4555 | rep | 4554 | rep |
diff --git a/src/search.c b/src/search.c index e7f90949464..4eb634a3c03 100644 --- a/src/search.c +++ b/src/search.c | |||
| @@ -3031,6 +3031,23 @@ If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */) | |||
| 3031 | return Qnil; | 3031 | return Qnil; |
| 3032 | } | 3032 | } |
| 3033 | 3033 | ||
| 3034 | DEFUN ("match-data--translate", Fmatch_data__translate, Smatch_data__translate, | ||
| 3035 | 1, 1, 0, | ||
| 3036 | doc: /* Add N to all string positions in the match data. Internal. */) | ||
| 3037 | (Lisp_Object n) | ||
| 3038 | { | ||
| 3039 | CHECK_FIXNUM (n); | ||
| 3040 | EMACS_INT delta = XFIXNUM (n); | ||
| 3041 | if (EQ (last_thing_searched, Qt)) /* String match data only. */ | ||
| 3042 | for (ptrdiff_t i = 0; i < search_regs.num_regs; i++) | ||
| 3043 | if (search_regs.start[i] >= 0) | ||
| 3044 | { | ||
| 3045 | search_regs.start[i] = max (0, search_regs.start[i] + delta); | ||
| 3046 | search_regs.end[i] = max (0, search_regs.end[i] + delta); | ||
| 3047 | } | ||
| 3048 | return Qnil; | ||
| 3049 | } | ||
| 3050 | |||
| 3034 | /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data | 3051 | /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data |
| 3035 | if asynchronous code (filter or sentinel) is running. */ | 3052 | if asynchronous code (filter or sentinel) is running. */ |
| 3036 | static void | 3053 | static void |
| @@ -3388,6 +3405,7 @@ is to bind it with `let' around a small expression. */); | |||
| 3388 | defsubr (&Smatch_end); | 3405 | defsubr (&Smatch_end); |
| 3389 | defsubr (&Smatch_data); | 3406 | defsubr (&Smatch_data); |
| 3390 | defsubr (&Sset_match_data); | 3407 | defsubr (&Sset_match_data); |
| 3408 | defsubr (&Smatch_data__translate); | ||
| 3391 | defsubr (&Sregexp_quote); | 3409 | defsubr (&Sregexp_quote); |
| 3392 | defsubr (&Snewline_cache_check); | 3410 | defsubr (&Snewline_cache_check); |
| 3393 | 3411 | ||
diff --git a/test/lisp/subr-tests.el b/test/lisp/subr-tests.el index c77be511dc2..67f7fc97496 100644 --- a/test/lisp/subr-tests.el +++ b/test/lisp/subr-tests.el | |||
| @@ -545,7 +545,11 @@ See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=19350." | |||
| 545 | (match-beginning 1) (match-end 1))) | 545 | (match-beginning 1) (match-end 1))) |
| 546 | "babbcaacabc") | 546 | "babbcaacabc") |
| 547 | "b<abbc,0,4,1,3>a<ac,0,2,1,1><abc,0,3,1,2>")) | 547 | "b<abbc,0,4,1,3>a<ac,0,2,1,1><abc,0,3,1,2>")) |
| 548 | ) | 548 | ;; anchors (bug#15107, bug#44861) |
| 549 | (should (equal (replace-regexp-in-string "a\\B" "b" "a aaaa") | ||
| 550 | "a bbba")) | ||
| 551 | (should (equal (replace-regexp-in-string "\\`\\|x" "z" "--xx--") | ||
| 552 | "z--zz--"))) | ||
| 549 | 553 | ||
| 550 | (provide 'subr-tests) | 554 | (provide 'subr-tests) |
| 551 | ;;; subr-tests.el ends here | 555 | ;;; subr-tests.el ends here |