aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTassilo Horn2019-02-23 21:18:36 +0100
committerTassilo Horn2019-02-23 21:31:15 +0100
commite96923c188a2a38d09917c5b7f606187a1413a96 (patch)
tree6ad7b9a1549bf520747db72e36eb62ceb6fcc720
parent5f640bfdf84753322763be23ebaa8ded92dc1c5d (diff)
downloademacs-e96923c188a2a38d09917c5b7f606187a1413a96.tar.gz
emacs-e96923c188a2a38d09917c5b7f606187a1413a96.zip
Improve replace-buffer-contents/replace-region-contents
* src/editfns.c (Freplace_buffer_contents): Add two optional arguments for mitigating performance issues. * lisp/emacs-lisp/subr-x.el (replace-region-contents): Move from subr.el. Add the same two arguments as for replace-buffer-contents. * lisp/json.el (json-pretty-print-max-secs): New variable holding the default MAX-SECS value json-pretty-print passes to replace-buffer-contents. (json-pretty-print): Use it. * doc/lispref/text.texi (Replacing): Add documentation for replace-buffer-contents two new optional arguments. Document replace-region-contents.
-rw-r--r--doc/lispref/text.texi55
-rw-r--r--etc/NEWS10
-rw-r--r--lisp/emacs-lisp/subr-x.el29
-rw-r--r--lisp/json.el15
-rw-r--r--lisp/subr.el26
-rw-r--r--src/editfns.c95
6 files changed, 174 insertions, 56 deletions
diff --git a/doc/lispref/text.texi b/doc/lispref/text.texi
index 6dfd211d1a0..88843c3764f 100644
--- a/doc/lispref/text.texi
+++ b/doc/lispref/text.texi
@@ -4436,20 +4436,57 @@ all markers unrelocated.
4436 You can use the following function to replace the text of one buffer 4436 You can use the following function to replace the text of one buffer
4437with the text of another buffer: 4437with the text of another buffer:
4438 4438
4439@deffn Command replace-buffer-contents source 4439@deffn Command replace-buffer-contents source &optional max-secs max-costs
4440This function replaces the accessible portion of the current buffer 4440This function replaces the accessible portion of the current buffer
4441with the accessible portion of the buffer @var{source}. @var{source} 4441with the accessible portion of the buffer @var{source}. @var{source}
4442may either be a buffer object or the name of a buffer. When 4442may either be a buffer object or the name of a buffer. When
4443@code{replace-buffer-contents} succeeds, the text of the accessible 4443@code{replace-buffer-contents} succeeds, the text of the accessible
4444portion of the current buffer will be equal to the text of the 4444portion of the current buffer will be equal to the text of the
4445accessible portion of the @var{source} buffer. This function attempts 4445accessible portion of the @var{source} buffer.
4446to keep point, markers, text properties, and overlays in the current 4446
4447buffer intact. One potential case where this behavior is useful is 4447This function attempts to keep point, markers, text properties, and
4448external code formatting programs: they typically write the 4448overlays in the current buffer intact. One potential case where this
4449reformatted text into a temporary buffer or file, and using 4449behavior is useful is external code formatting programs: they
4450@code{delete-region} and @code{insert-buffer-substring} would destroy 4450typically write the reformatted text into a temporary buffer or file,
4451these properties. However, the latter combination is typically 4451and using @code{delete-region} and @code{insert-buffer-substring}
4452faster. @xref{Deletion}, and @ref{Insertion}. 4452would destroy these properties. However, the latter combination is
4453typically faster (@xref{Deletion}, and @ref{Insertion}).
4454
4455For its working, @code{replace-buffer-contents} needs to compare the
4456contents of the original buffer with that of @code{source} which is a
4457costly operation if the buffers are huge and there is a high number of
4458differences between them. In order to keep
4459@code{replace-buffer-contents}'s runtime in bounds, it has two
4460optional arguments.
4461
4462@code{max-secs} defines a hard boundary in terms of seconds. If given
4463and exceeded, it will fall back to @code{delete-region} and
4464@code{insert-buffer-substring}.
4465
4466@code{max-costs} defines the quality of the difference computation.
4467If the actual costs exceed this limit, heuristics are used to provide
4468a faster but suboptimal solution. The default value is 1000000.
4469
4470@code{replace-buffer-contents} returns t if a non-destructive
4471replacement could be performed. Otherwise, i.e., if MAX-SECS was
4472exceeded, it returns nil.
4473@end deffn
4474
4475@defun Command replace-region-contents beg end replace-fn &optional max-secs max-costs
4476This function replaces the region between @code{beg} and @code{end}
4477using the given @code{replace-fn}. The function @code{replace-fn} is
4478run in the current buffer narrowed to the specified region and it
4479should return either a string or a buffer replacing the region.
4480
4481The replacement is performed using @code{replace-buffer-contents}
4482which also describes the @code{max-secs} and @code{max-costs}
4483arguments and the return value.
4484
4485Note: If the replacement is a string, it will be placed in a temporary
4486buffer so that @code{replace-buffer-contents} can operate on it.
4487Therefore, if you already have the replacement in a buffer, it makes
4488no sense to convert it to a string using @code{buffer-substring} or
4489similar.
4453@end deffn 4490@end deffn
4454 4491
4455@node Decompression 4492@node Decompression
diff --git a/etc/NEWS b/etc/NEWS
index 3c5fb24b0e4..67e376d9b38 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -335,6 +335,16 @@ the node "(emacs) Directory Variables" of the user manual.
335'make-network-process' now uses the correct loopback address when 335'make-network-process' now uses the correct loopback address when
336asked to use :host 'local and :family 'ipv6. 336asked to use :host 'local and :family 'ipv6.
337 337
338+++
339** The new function `replace-region-contents' replaces the current
340region using a given replacement-function in a non-destructive manner
341(in terms of `replace-buffer-contents').
342
343+++
344** The command `replace-buffer-contents' now has two optional
345arguments mitigating performance issues when operating on huge
346buffers.
347
338 348
339* Changes in Specialized Modes and Packages in Emacs 27.1 349* Changes in Specialized Modes and Packages in Emacs 27.1
340 350
diff --git a/lisp/emacs-lisp/subr-x.el b/lisp/emacs-lisp/subr-x.el
index 7d9f0bba4c7..b9ffe6a6fc6 100644
--- a/lisp/emacs-lisp/subr-x.el
+++ b/lisp/emacs-lisp/subr-x.el
@@ -250,6 +250,35 @@ TRIM-LEFT and TRIM-RIGHT default to \"[ \\t\\n\\r]+\"."
250 (substring string 0 (- (length string) (length suffix))) 250 (substring string 0 (- (length string) (length suffix)))
251 string)) 251 string))
252 252
253(defun replace-region-contents (beg end replace-fn
254 &optional max-secs max-costs)
255 "Replace the region between BEG and END using REPLACE-FN.
256REPLACE-FN runs on the current buffer narrowed to the region. It
257should return either a string or a buffer replacing the region.
258
259The replacement is performed using `replace-buffer-contents'
260which also describes the MAX-SECS and MAX-COSTS arguments and the
261return value.
262
263Note: If the replacement is a string, it'll be placed in a
264temporary buffer so that `replace-buffer-contents' can operate on
265it. Therefore, if you already have the replacement in a buffer,
266it makes no sense to convert it to a string using
267`buffer-substring' or similar."
268 (save-excursion
269 (save-restriction
270 (narrow-to-region beg end)
271 (goto-char (point-min))
272 (let ((repl (funcall replace-fn)))
273 (if (bufferp repl)
274 (replace-buffer-contents repl max-secs max-costs)
275 (let ((source-buffer (current-buffer)))
276 (with-temp-buffer
277 (insert repl)
278 (let ((tmp-buffer (current-buffer)))
279 (set-buffer source-buffer)
280 (replace-buffer-contents tmp-buffer max-secs max-costs)))))))))
281
253(provide 'subr-x) 282(provide 'subr-x)
254 283
255;;; subr-x.el ends here 284;;; subr-x.el ends here
diff --git a/lisp/json.el b/lisp/json.el
index 19b8f09dcda..44b3c33df7c 100644
--- a/lisp/json.el
+++ b/lisp/json.el
@@ -49,10 +49,13 @@
49;; 2008-02-21 - Installed in GNU Emacs. 49;; 2008-02-21 - Installed in GNU Emacs.
50;; 2011-10-17 - Patch `json-alist-p' and `json-plist-p' to avoid recursion -tzz 50;; 2011-10-17 - Patch `json-alist-p' and `json-plist-p' to avoid recursion -tzz
51;; 2012-10-25 - Added pretty-printed reformatting -Ryan Crum (ryan@ryancrum.org) 51;; 2012-10-25 - Added pretty-printed reformatting -Ryan Crum (ryan@ryancrum.org)
52;; 2019-02-02 - Pretty-printing now uses replace-region-contents and support for
53;; minimization -tsdh
52 54
53;;; Code: 55;;; Code:
54 56
55(require 'map) 57(require 'map)
58(require 'subr-x)
56 59
57;; Parameters 60;; Parameters
58 61
@@ -738,6 +741,12 @@ With prefix argument MINIMIZE, minimize it instead."
738 (interactive "P") 741 (interactive "P")
739 (json-pretty-print (point-min) (point-max) minimize)) 742 (json-pretty-print (point-min) (point-max) minimize))
740 743
744(defvar json-pretty-print-max-secs 2.0
745 "Maximum time for `json-pretty-print's comparison.
746The function `json-pretty-print' uses `replace-region-contents'
747(which see) passing the value of this variable as argument
748MAX-SECS.")
749
741(defun json-pretty-print (begin end &optional minimize) 750(defun json-pretty-print (begin end &optional minimize)
742 "Pretty-print selected region. 751 "Pretty-print selected region.
743With prefix argument MINIMIZE, minimize it instead." 752With prefix argument MINIMIZE, minimize it instead."
@@ -749,7 +758,11 @@ With prefix argument MINIMIZE, minimize it instead."
749 (json-object-type 'alist)) 758 (json-object-type 'alist))
750 (replace-region-contents 759 (replace-region-contents
751 begin end 760 begin end
752 (lambda () (json-encode (json-read)))))) 761 (lambda () (json-encode (json-read)))
762 json-pretty-print-max-secs
763 ;; FIXME: What's a good value here? Can we use something better,
764 ;; e.g., by deriving a value from the size of the region?
765 64)))
753 766
754(defun json-pretty-print-buffer-ordered (&optional minimize) 767(defun json-pretty-print-buffer-ordered (&optional minimize)
755 "Pretty-print current buffer with object keys ordered. 768 "Pretty-print current buffer with object keys ordered.
diff --git a/lisp/subr.el b/lisp/subr.el
index 69ae804e200..5c8b84b8e9c 100644
--- a/lisp/subr.el
+++ b/lisp/subr.el
@@ -5476,30 +5476,4 @@ returned list are in the same order as in TREE.
5476;; for discoverability: 5476;; for discoverability:
5477(defalias 'flatten-list 'flatten-tree) 5477(defalias 'flatten-list 'flatten-tree)
5478 5478
5479(defun replace-region-contents (beg end replace-fn)
5480 "Replace the region between BEG and END using REPLACE-FN.
5481REPLACE-FN runs on the current buffer narrowed to the region. It
5482should return either a string or a buffer replacing the region.
5483
5484The replacement is performed using `replace-buffer-contents'.
5485
5486Note: If the replacement is a string, it'll be placed in a
5487temporary buffer so that `replace-buffer-contents' can operate on
5488it. Therefore, if you already have the replacement in a buffer,
5489it makes no sense to convert it to a string using
5490`buffer-substring' or similar."
5491 (save-excursion
5492 (save-restriction
5493 (narrow-to-region beg end)
5494 (goto-char (point-min))
5495 (let ((repl (funcall replace-fn)))
5496 (if (bufferp repl)
5497 (replace-buffer-contents repl)
5498 (let ((source-buffer (current-buffer)))
5499 (with-temp-buffer
5500 (insert repl)
5501 (let ((tmp-buffer (current-buffer)))
5502 (set-buffer source-buffer)
5503 (replace-buffer-contents tmp-buffer)))))))))
5504
5505;;; subr.el ends here 5479;;; subr.el ends here
diff --git a/src/editfns.c b/src/editfns.c
index 7a600bacf18..8f21f8a677e 100644
--- a/src/editfns.c
+++ b/src/editfns.c
@@ -20,6 +20,7 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
20 20
21#include <config.h> 21#include <config.h>
22#include <sys/types.h> 22#include <sys/types.h>
23#include <sys/time.h>
23#include <stdio.h> 24#include <stdio.h>
24 25
25#ifdef HAVE_PWD_H 26#ifdef HAVE_PWD_H
@@ -1912,10 +1913,6 @@ determines whether case is significant or ignored. */)
1912#undef EQUAL 1913#undef EQUAL
1913#define USE_HEURISTIC 1914#define USE_HEURISTIC
1914 1915
1915#ifdef USE_HEURISTIC
1916#define DIFFSEQ_HEURISTIC
1917#endif
1918
1919/* Counter used to rarely_quit in replace-buffer-contents. */ 1916/* Counter used to rarely_quit in replace-buffer-contents. */
1920static unsigned short rbc_quitcounter; 1917static unsigned short rbc_quitcounter;
1921 1918
@@ -1937,30 +1934,54 @@ static unsigned short rbc_quitcounter;
1937 /* Bit vectors recording for each character whether it was deleted 1934 /* Bit vectors recording for each character whether it was deleted
1938 or inserted. */ \ 1935 or inserted. */ \
1939 unsigned char *deletions; \ 1936 unsigned char *deletions; \
1940 unsigned char *insertions; 1937 unsigned char *insertions; \
1938 struct timeval start; \
1939 double max_secs; \
1940 unsigned int early_abort_tests;
1941 1941
1942#define NOTE_DELETE(ctx, xoff) set_bit ((ctx)->deletions, (xoff)) 1942#define NOTE_DELETE(ctx, xoff) set_bit ((ctx)->deletions, (xoff))
1943#define NOTE_INSERT(ctx, yoff) set_bit ((ctx)->insertions, (yoff)) 1943#define NOTE_INSERT(ctx, yoff) set_bit ((ctx)->insertions, (yoff))
1944#define EARLY_ABORT(ctx) compareseq_early_abort (ctx)
1944 1945
1945struct context; 1946struct context;
1946static void set_bit (unsigned char *, OFFSET); 1947static void set_bit (unsigned char *, OFFSET);
1947static bool bit_is_set (const unsigned char *, OFFSET); 1948static bool bit_is_set (const unsigned char *, OFFSET);
1948static bool buffer_chars_equal (struct context *, OFFSET, OFFSET); 1949static bool buffer_chars_equal (struct context *, OFFSET, OFFSET);
1950static bool compareseq_early_abort (struct context *);
1949 1951
1950#include "minmax.h" 1952#include "minmax.h"
1951#include "diffseq.h" 1953#include "diffseq.h"
1952 1954
1953DEFUN ("replace-buffer-contents", Freplace_buffer_contents, 1955DEFUN ("replace-buffer-contents", Freplace_buffer_contents,
1954 Sreplace_buffer_contents, 1, 1, "bSource buffer: ", 1956 Sreplace_buffer_contents, 1, 3, "bSource buffer: ",
1955 doc: /* Replace accessible portion of current buffer with that of SOURCE. 1957 doc: /* Replace accessible portion of current buffer with that of SOURCE.
1956SOURCE can be a buffer or a string that names a buffer. 1958SOURCE can be a buffer or a string that names a buffer.
1957Interactively, prompt for SOURCE. 1959Interactively, prompt for SOURCE.
1960
1958As far as possible the replacement is non-destructive, i.e. existing 1961As far as possible the replacement is non-destructive, i.e. existing
1959buffer contents, markers, properties, and overlays in the current 1962buffer contents, markers, properties, and overlays in the current
1960buffer stay intact. 1963buffer stay intact.
1961Warning: this function can be slow if there's a large number of small 1964
1962differences between the two buffers. */) 1965Because this function can be very slow if there is a large number of
1963 (Lisp_Object source) 1966differences between the two buffers, there are two optional arguments
1967mitigating this issue.
1968
1969The MAX-SECS argument, if given, defines a hard limit on the time used
1970for comparing the buffers. If it takes longer than MAX-SECS, the
1971function falls back to a plain `delete-region' and
1972`insert-buffer-substring'. (Note that the checks are not performed
1973too evenly over time, so in some cases it may run a bit longer than
1974allowed).
1975
1976The optional argument MAX-COSTS defines the quality of the difference
1977computation. If the actual costs exceed this limit, heuristics are
1978used to provide a faster but suboptimal solution. The default value
1979is 1000000.
1980
1981This function returns t if a non-destructive replacement could be
1982performed. Otherwise, i.e., if MAX-SECS was exceeded, it returns
1983nil. */)
1984 (Lisp_Object source, Lisp_Object max_secs, Lisp_Object max_costs)
1964{ 1985{
1965 struct buffer *a = current_buffer; 1986 struct buffer *a = current_buffer;
1966 Lisp_Object source_buffer = Fget_buffer (source); 1987 Lisp_Object source_buffer = Fget_buffer (source);
@@ -1985,15 +2006,18 @@ differences between the two buffers. */)
1985 empty. */ 2006 empty. */
1986 2007
1987 if (a_empty && b_empty) 2008 if (a_empty && b_empty)
1988 return Qnil; 2009 return Qt;
1989 2010
1990 if (a_empty) 2011 if (a_empty)
1991 return Finsert_buffer_substring (source, Qnil, Qnil); 2012 {
2013 Finsert_buffer_substring (source, Qnil, Qnil);
2014 return Qt;
2015 }
1992 2016
1993 if (b_empty) 2017 if (b_empty)
1994 { 2018 {
1995 del_range_both (BEGV, BEGV_BYTE, ZV, ZV_BYTE, true); 2019 del_range_both (BEGV, BEGV_BYTE, ZV, ZV_BYTE, true);
1996 return Qnil; 2020 return Qt;
1997 } 2021 }
1998 2022
1999 ptrdiff_t count = SPECPDL_INDEX (); 2023 ptrdiff_t count = SPECPDL_INDEX ();
@@ -2007,6 +2031,12 @@ differences between the two buffers. */)
2007 ptrdiff_t *buffer; 2031 ptrdiff_t *buffer;
2008 USE_SAFE_ALLOCA; 2032 USE_SAFE_ALLOCA;
2009 SAFE_NALLOCA (buffer, 2, diags); 2033 SAFE_NALLOCA (buffer, 2, diags);
2034
2035 if (NILP (max_costs))
2036 XSETFASTINT (max_costs, 1000000);
2037 else
2038 CHECK_FIXNUM (max_costs);
2039
2010 /* Micro-optimization: Casting to size_t generates much better 2040 /* Micro-optimization: Casting to size_t generates much better
2011 code. */ 2041 code. */
2012 ptrdiff_t del_bytes = (size_t) size_a / CHAR_BIT + 1; 2042 ptrdiff_t del_bytes = (size_t) size_a / CHAR_BIT + 1;
@@ -2022,20 +2052,26 @@ differences between the two buffers. */)
2022 .insertions = SAFE_ALLOCA (ins_bytes), 2052 .insertions = SAFE_ALLOCA (ins_bytes),
2023 .fdiag = buffer + size_b + 1, 2053 .fdiag = buffer + size_b + 1,
2024 .bdiag = buffer + diags + size_b + 1, 2054 .bdiag = buffer + diags + size_b + 1,
2025#ifdef DIFFSEQ_HEURISTIC
2026 .heuristic = true, 2055 .heuristic = true,
2027#endif 2056 .too_expensive = XFIXNUM (max_costs),
2028 /* FIXME: Find a good number for .too_expensive. */ 2057 .max_secs = FLOATP (max_secs) ? XFLOAT_DATA (max_secs) : -1.0,
2029 .too_expensive = 64, 2058 .early_abort_tests = 0
2030 }; 2059 };
2031 memclear (ctx.deletions, del_bytes); 2060 memclear (ctx.deletions, del_bytes);
2032 memclear (ctx.insertions, ins_bytes); 2061 memclear (ctx.insertions, ins_bytes);
2062
2063 gettimeofday (&ctx.start, NULL);
2033 /* compareseq requires indices to be zero-based. We add BEGV back 2064 /* compareseq requires indices to be zero-based. We add BEGV back
2034 later. */ 2065 later. */
2035 bool early_abort = compareseq (0, size_a, 0, size_b, false, &ctx); 2066 bool early_abort = compareseq (0, size_a, 0, size_b, false, &ctx);
2036 /* Since we didn’t define EARLY_ABORT, we should never abort 2067
2037 early. */ 2068 if (early_abort)
2038 eassert (! early_abort); 2069 {
2070 del_range (min_a, ZV);
2071 Finsert_buffer_substring (source, Qnil,Qnil);
2072 SAFE_FREE_UNBIND_TO (count, Qnil);
2073 return Qnil;
2074 }
2039 2075
2040 rbc_quitcounter = 0; 2076 rbc_quitcounter = 0;
2041 2077
@@ -2097,6 +2133,7 @@ differences between the two buffers. */)
2097 --i; 2133 --i;
2098 --j; 2134 --j;
2099 } 2135 }
2136
2100 SAFE_FREE_UNBIND_TO (count, Qnil); 2137 SAFE_FREE_UNBIND_TO (count, Qnil);
2101 rbc_quitcounter = 0; 2138 rbc_quitcounter = 0;
2102 2139
@@ -2106,7 +2143,7 @@ differences between the two buffers. */)
2106 update_compositions (BEGV, ZV, CHECK_INSIDE); 2143 update_compositions (BEGV, ZV, CHECK_INSIDE);
2107 } 2144 }
2108 2145
2109 return Qnil; 2146 return Qt;
2110} 2147}
2111 2148
2112static void 2149static void
@@ -2173,6 +2210,18 @@ buffer_chars_equal (struct context *ctx,
2173 == BUF_FETCH_MULTIBYTE_CHAR (ctx->buffer_b, bpos_b); 2210 == BUF_FETCH_MULTIBYTE_CHAR (ctx->buffer_b, bpos_b);
2174} 2211}
2175 2212
2213static bool
2214compareseq_early_abort (struct context *ctx)
2215{
2216 if (ctx->max_secs < 0.0)
2217 return false;
2218
2219 struct timeval now, diff;
2220 gettimeofday (&now, NULL);
2221 timersub (&now, &ctx->start, &diff);
2222 return diff.tv_sec + diff.tv_usec / 1000000.0 > ctx->max_secs;
2223}
2224
2176 2225
2177static void 2226static void
2178subst_char_in_region_unwind (Lisp_Object arg) 2227subst_char_in_region_unwind (Lisp_Object arg)
@@ -4441,6 +4490,12 @@ it to be non-nil. */);
4441 binary_as_unsigned = true; 4490 binary_as_unsigned = true;
4442#endif 4491#endif
4443 4492
4493 DEFVAR_LISP ("replace-buffer-contents-max-secs",
4494 Vreplace_buffer_contents_max_secs,
4495 doc: /* If differencing the two buffers takes longer than this,
4496`replace-buffer-contents' falls back to a plain delete and insert. */);
4497 Vreplace_buffer_contents_max_secs = Qnil;
4498
4444 defsubr (&Spropertize); 4499 defsubr (&Spropertize);
4445 defsubr (&Schar_equal); 4500 defsubr (&Schar_equal);
4446 defsubr (&Sgoto_char); 4501 defsubr (&Sgoto_char);