insert-file-contents 1 KiB seek fix

This improves on recent fixes to Bug#77315. When sampling the first 1 KiB and last 3 KiB, do not seek before BEG if given. Instead, sample starting at BEG, to be consistent with the non-optimized version. * src/fileio.c (xlseek): Return POS, for convenience. (Finsert_file_contents): Sample the first 1 KiB correctly when BEG. In a CURPOS local, keep track of the input file offset, or for nonseekable files the number of bytes read, while this value is important. This lets us avoid some unnecessary seeks. Report an error earlier if the file is not seekable and BEG is nonzero, to save work and simplify the code. When sampling, discard less data, as this is simpler and there’s little point to discarding it.
author: Paul Eggert 2025-07-18 17:29:25 -0700
committer: Paul Eggert 2025-07-21 16:23:18 -0700
commit: 2903b0b92cfdf87fbbb764c4f202479e9a1ac941 (patch)
tree: a038a9545e482afa635a5ad21b6334f21941ee73 /src
parent: ffe6d33ef4ef4afce98a1521a03acd6934cc53d2 (diff)
download: emacs-2903b0b92cfdf87fbbb764c4f202479e9a1ac941.tar.gz
emacs-2903b0b92cfdf87fbbb764c4f202479e9a1ac941.zip
1 files changed, 47 insertions, 46 deletions
diff --git a/src/fileio.c b/src/fileio.c
index 9d777c7415b..dbd9bd4ce55 100644
--- a/src/fileio.c
+++ b/src/fileio.c
@@ -4026,12 +4026,14 @@ maybe_move_gap (struct buffer *b)
    }
 }
-/* In FD, position to POS.  If this fails, report an error with FILENAME.  */
+/* In FD, position to POS.  Return POS if successful, otherwise signal
-static void
+   an error with FILENAME.  */
+static off_t
 xlseek (emacs_fd fd, off_t pos, Lisp_Object filename)
 {
  if (emacs_fd_lseek (fd, pos, SEEK_SET) < 0)
    report_file_error ("Setting file position", filename);
+  return pos;
 }
 /* A good blocksize to minimize system call overhead across most systems.
@@ -4224,16 +4226,21 @@ by calling `format-decode', which see.  */)
             : get_stat_mtime (&st));
  }
-  /* The initial offset can be nonzero, e.g., /dev/stdin.
+  /* The initial input position, or -1 if the file is not seekable.  */
-     If SEEK_CUR works, later code assumes SEEK_SET also works,
+  off_t begpos = emacs_fd_lseek (fd, beg_offset,
-     but tests SEEK_END rather than relying on it
+                                 !NILP (beg) ? SEEK_SET : SEEK_CUR);
-     as SEEK_END can fail on Linux /proc files.  */
-  off_t initial_offset = emacs_fd_lseek (fd, 0, SEEK_CUR);
+  /* Whether the file is seekable via SEEK_CUR and SEEK_SET.
-  bool seekable = 0 <= initial_offset;
+     SEEK_END is trickier as it is not reliable on /proc files,
-  if (seekable && NILP (beg))
+     so it is tested separately below.  */
-    beg_offset = initial_offset;
+  bool seekable = 0 <= begpos;
-  if (end_offset <= beg_offset)
-    goto handled;
+  /* The current input position if the file is seekable,
+     otherwise the number of bytes read.  */
+  off_t curpos = seekable ? begpos : 0;
+  if (!seekable && beg_offset != 0)
+    report_file_error ("Setting file position", orig_filename);
  /* The REPLACE code will need to be changed in order to work on
     named pipes, and it's probably just not worth it.  So we should
@@ -4263,6 +4270,9 @@ by calling `format-decode', which see.  */)
                  orig_filename);
    }
+  if (end_offset <= beg_offset)
+    goto handled;
  /* Check now whether the buffer will become too large,
     in the likely case where the file's length is not changing.
     This saves a lot of needless work before a buffer overflow.
@@ -4310,31 +4320,25 @@ by calling `format-decode', which see.  */)
                 do not use st_size or report any SEEK_END failure.  */
              static_assert (4 * 1024 < sizeof read_buf);
              ptrdiff_t nread = emacs_full_read (fd, read_buf, 4 * 1024);
-              if (4 * 1024 <= nread)
+              if (nread < 4 * 1024)
+                curpos = nread;
+              else
                {
-                  off_t tailoff = emacs_fd_lseek (fd, - 3 * 1024, SEEK_END);
+                  curpos = emacs_fd_lseek (fd, - 3 * 1024, SEEK_END);
-                  if (tailoff < 0)
+                  if (curpos < 0)
-                    tailoff = nread;
+                    curpos = nread;
                  /* When appending the last 3 KiB, read extra bytes
-                     without trusting tailoff, as the file may be growing.  */
+                     without trusting SEEK_END, as the file may be growing.
+                     Although this may yield more than 4 KiB of data total,
+                     and the trailing data may not be from file end if
+                     the file is growing, it is good enough.  */
                  nread = emacs_full_read (fd, read_buf + 1024,
                                           sizeof read_buf - 1024);
-                  if (nread == sizeof read_buf - 1024)
+                  if (0 <= nread)
-                    {
-                      /* Give up reading the last 3 KiB; the file is
-                         growing too rapidly.  */
-                      nread = 1024;
-                    }
-                  else if (0 <= nread)
                    {
+                      curpos += nread;
                      nread += 1024;
-                      if (4 * 1024 < nread)
-                        {
-                          memmove (read_buf + 1024,
-                                   read_buf + nread - 3 * 1024, 3 * 1024);
-                          nread = 4 * 1024;
-                        }
                    }
                }
@@ -4372,9 +4376,6 @@ by calling `format-decode', which see.  */)
                  /* Discard the unwind protect for recovering the
                     current buffer.  */
                  specpdl_ptr--;
-                  /* Rewind the file for the actual read done later.  */
-                  xlseek (fd, initial_offset, orig_filename);
                }
            }
@@ -4430,15 +4431,14 @@ by calling `format-decode', which see.  */)
         give up on handling REPLACE in the optimized way.  */
      bool giveup_match_end = false;
-      if (beg_offset != initial_offset)
+      if (beg_offset != curpos)
-        xlseek (fd, beg_offset, orig_filename);
+        curpos = xlseek (fd, beg_offset, orig_filename);
      /* Count how many chars at the start of the file
         match the text at the beginning of the buffer.  */
      while (true)
        {
          off_t bytes_to_read = sizeof read_buf;
-          off_t curpos = beg_offset + (same_at_start - BEGV_BYTE);
          bytes_to_read = min (bytes_to_read, end_offset - curpos);
          ptrdiff_t nread = (bytes_to_read <= 0
                             ? 0
@@ -4448,6 +4448,8 @@ by calling `format-decode', which see.  */)
          if (0 < nread)
            {
+              curpos += nread;
              if (CODING_REQUIRE_DETECTION (&coding))
                {
                  coding_system
@@ -4507,7 +4509,7 @@ by calling `format-decode', which see.  */)
                  ptrdiff_t n = emacs_full_read (fd, read_buf, sizeof read_buf);
                  if (n < 0)
                    report_file_error ("Read error", orig_filename);
-                  endpos += n;
+                  curpos = endpos += n;
                  /* Give up if the file grew more than even the test read.  */
                  giveup_match_end = n == sizeof read_buf;
@@ -4532,10 +4534,6 @@ by calling `format-decode', which see.  */)
      while (!giveup_match_end)
        {
          ptrdiff_t nread, bufpos, trial;
-          off_t curpos;
-          /* At what file position are we now scanning?  */
-          curpos = endpos - (ZV_BYTE - same_at_end);
          /* How much can we scan in the next step?  Compare with poslim
             to prevent overlap of the matching head with the matching tail.
@@ -4550,10 +4548,10 @@ by calling `format-decode', which see.  */)
          if (trial == 0)
            break;
-          curpos -= trial;
+          curpos = xlseek (fd, curpos - trial, orig_filename);
-          xlseek (fd, curpos, orig_filename);
          nread = emacs_full_read (fd, read_buf, trial);
+          curpos += nread;
          if (nread < trial)
            {
              if (nread < 0)
@@ -4670,7 +4668,8 @@ by calling `format-decode', which see.  */)
      /* First read the whole file, performing code conversion into
         CONVERSION_BUFFER.  */
-      xlseek (fd, beg_offset, orig_filename);
+      if (beg_offset != curpos)
+        curpos = xlseek (fd, beg_offset, orig_filename);
      inserted = 0;             /* Bytes put into CONVERSION_BUFFER so far.  */
      unprocessed = 0;          /* Bytes not processed in previous loop.  */
@@ -4686,6 +4685,7 @@ by calling `format-decode', which see.  */)
            report_file_error ("Read error", orig_filename);
          if (this == 0)
            break;
+          curpos += this;
          BUF_TEMP_SET_PT (XBUFFER (conversion_buffer),
                           BUF_Z (XBUFFER (conversion_buffer)));
@@ -4856,9 +4856,10 @@ by calling `format-decode', which see.  */)
      make_gap (growth);
    }
-  if (beg_offset != 0 || (!NILP (replace)
+  if (beg_offset != curpos)
-                          && !BASE_EQ (replace, Qunbound)))
    xlseek (fd, beg_offset, orig_filename);
+  /* curpos effectively goes out of scope now, as it is no longer needed,
+     so not bother to update curpos from now on.  */
  /* Total bytes inserted.  */
  inserted = 0;
author	Paul Eggert	2025-07-18 17:29:25 -0700
committer	Paul Eggert	2025-07-21 16:23:18 -0700
commit	2903b0b92cfdf87fbbb764c4f202479e9a1ac941 (patch)
tree	a038a9545e482afa635a5ad21b6334f21941ee73 /src
parent	ffe6d33ef4ef4afce98a1521a03acd6934cc53d2 (diff)
download	emacs-2903b0b92cfdf87fbbb764c4f202479e9a1ac941.tar.gz emacs-2903b0b92cfdf87fbbb764c4f202479e9a1ac941.zip

diff --git a/src/fileio.c b/src/fileio.c index 9d777c7415b..dbd9bd4ce55 100644 --- a/src/fileio.c +++ b/src/fileio.c
@@ -4026,12 +4026,14 @@ maybe_move_gap (struct buffer *b)
4026	}	4026	}
4027	}	4027	}
4028		4028
4029	/* In FD, position to POS. If this fails, report an error with FILENAME. */	4029	/* In FD, position to POS. Return POS if successful, otherwise signal
4030	static void	4030	an error with FILENAME. */
		4031	static off_t
4031	xlseek (emacs_fd fd, off_t pos, Lisp_Object filename)	4032	xlseek (emacs_fd fd, off_t pos, Lisp_Object filename)
4032	{	4033	{
4033	if (emacs_fd_lseek (fd, pos, SEEK_SET) < 0)	4034	if (emacs_fd_lseek (fd, pos, SEEK_SET) < 0)
4034	report_file_error ("Setting file position", filename);	4035	report_file_error ("Setting file position", filename);
		4036	return pos;
4035	}	4037	}
4036		4038
4037	/* A good blocksize to minimize system call overhead across most systems.	4039	/* A good blocksize to minimize system call overhead across most systems.
@@ -4224,16 +4226,21 @@ by calling `format-decode', which see. */)
4224	: get_stat_mtime (&st));	4226	: get_stat_mtime (&st));
4225	}	4227	}
4226		4228
4227	/* The initial offset can be nonzero, e.g., /dev/stdin.	4229	/* The initial input position, or -1 if the file is not seekable. */
4228	If SEEK_CUR works, later code assumes SEEK_SET also works,	4230	off_t begpos = emacs_fd_lseek (fd, beg_offset,
4229	but tests SEEK_END rather than relying on it	4231	!NILP (beg) ? SEEK_SET : SEEK_CUR);
4230	as SEEK_END can fail on Linux /proc files. */	4232
4231	off_t initial_offset = emacs_fd_lseek (fd, 0, SEEK_CUR);	4233	/* Whether the file is seekable via SEEK_CUR and SEEK_SET.
4232	bool seekable = 0 <= initial_offset;	4234	SEEK_END is trickier as it is not reliable on /proc files,
4233	if (seekable && NILP (beg))	4235	so it is tested separately below. */
4234	beg_offset = initial_offset;	4236	bool seekable = 0 <= begpos;
4235	if (end_offset <= beg_offset)	4237
4236	goto handled;	4238	/* The current input position if the file is seekable,
		4239	otherwise the number of bytes read. */
		4240	off_t curpos = seekable ? begpos : 0;
		4241
		4242	if (!seekable && beg_offset != 0)
		4243	report_file_error ("Setting file position", orig_filename);
4237		4244
4238	/* The REPLACE code will need to be changed in order to work on	4245	/* The REPLACE code will need to be changed in order to work on
4239	named pipes, and it's probably just not worth it. So we should	4246	named pipes, and it's probably just not worth it. So we should
@@ -4263,6 +4270,9 @@ by calling `format-decode', which see. */)
4263	orig_filename);	4270	orig_filename);
4264	}	4271	}
4265		4272
		4273	if (end_offset <= beg_offset)
		4274	goto handled;
		4275
4266	/* Check now whether the buffer will become too large,	4276	/* Check now whether the buffer will become too large,
4267	in the likely case where the file's length is not changing.	4277	in the likely case where the file's length is not changing.
4268	This saves a lot of needless work before a buffer overflow.	4278	This saves a lot of needless work before a buffer overflow.
@@ -4310,31 +4320,25 @@ by calling `format-decode', which see. */)
4310	do not use st_size or report any SEEK_END failure. */	4320	do not use st_size or report any SEEK_END failure. */
4311	static_assert (4 * 1024 < sizeof read_buf);	4321	static_assert (4 * 1024 < sizeof read_buf);
4312	ptrdiff_t nread = emacs_full_read (fd, read_buf, 4 * 1024);	4322	ptrdiff_t nread = emacs_full_read (fd, read_buf, 4 * 1024);
4313	if (4 * 1024 <= nread)	4323	if (nread < 4 * 1024)
		4324	curpos = nread;
		4325	else
4314	{	4326	{
4315	off_t tailoff = emacs_fd_lseek (fd, - 3 * 1024, SEEK_END);	4327	curpos = emacs_fd_lseek (fd, - 3 * 1024, SEEK_END);
4316	if (tailoff < 0)	4328	if (curpos < 0)
4317	tailoff = nread;	4329	curpos = nread;
4318		4330
4319	/* When appending the last 3 KiB, read extra bytes	4331	/* When appending the last 3 KiB, read extra bytes
4320	without trusting tailoff, as the file may be growing. */	4332	without trusting SEEK_END, as the file may be growing.
		4333	Although this may yield more than 4 KiB of data total,
		4334	and the trailing data may not be from file end if
		4335	the file is growing, it is good enough. */
4321	nread = emacs_full_read (fd, read_buf + 1024,	4336	nread = emacs_full_read (fd, read_buf + 1024,
4322	sizeof read_buf - 1024);	4337	sizeof read_buf - 1024);
4323	if (nread == sizeof read_buf - 1024)	4338	if (0 <= nread)
4324	{
4325	/* Give up reading the last 3 KiB; the file is
4326	growing too rapidly. */
4327	nread = 1024;
4328	}
4329	else if (0 <= nread)
4330	{	4339	{
		4340	curpos += nread;
4331	nread += 1024;	4341	nread += 1024;
4332	if (4 * 1024 < nread)
4333	{
4334	memmove (read_buf + 1024,
4335	read_buf + nread - 3 * 1024, 3 * 1024);
4336	nread = 4 * 1024;
4337	}
4338	}	4342	}
4339	}	4343	}
4340		4344
@@ -4372,9 +4376,6 @@ by calling `format-decode', which see. */)
4372	/* Discard the unwind protect for recovering the	4376	/* Discard the unwind protect for recovering the
4373	current buffer. */	4377	current buffer. */
4374	specpdl_ptr--;	4378	specpdl_ptr--;
4375
4376	/* Rewind the file for the actual read done later. */
4377	xlseek (fd, initial_offset, orig_filename);
4378	}	4379	}
4379	}	4380	}
4380		4381
@@ -4430,15 +4431,14 @@ by calling `format-decode', which see. */)
4430	give up on handling REPLACE in the optimized way. */	4431	give up on handling REPLACE in the optimized way. */
4431	bool giveup_match_end = false;	4432	bool giveup_match_end = false;
4432		4433
4433	if (beg_offset != initial_offset)	4434	if (beg_offset != curpos)
4434	xlseek (fd, beg_offset, orig_filename);	4435	curpos = xlseek (fd, beg_offset, orig_filename);
4435		4436
4436	/* Count how many chars at the start of the file	4437	/* Count how many chars at the start of the file
4437	match the text at the beginning of the buffer. */	4438	match the text at the beginning of the buffer. */
4438	while (true)	4439	while (true)
4439	{	4440	{
4440	off_t bytes_to_read = sizeof read_buf;	4441	off_t bytes_to_read = sizeof read_buf;
4441	off_t curpos = beg_offset + (same_at_start - BEGV_BYTE);
4442	bytes_to_read = min (bytes_to_read, end_offset - curpos);	4442	bytes_to_read = min (bytes_to_read, end_offset - curpos);
4443	ptrdiff_t nread = (bytes_to_read <= 0	4443	ptrdiff_t nread = (bytes_to_read <= 0
4444	? 0	4444	? 0
@@ -4448,6 +4448,8 @@ by calling `format-decode', which see. */)
4448		4448
4449	if (0 < nread)	4449	if (0 < nread)
4450	{	4450	{
		4451	curpos += nread;
		4452
4451	if (CODING_REQUIRE_DETECTION (&coding))	4453	if (CODING_REQUIRE_DETECTION (&coding))
4452	{	4454	{
4453	coding_system	4455	coding_system
@@ -4507,7 +4509,7 @@ by calling `format-decode', which see. */)
4507	ptrdiff_t n = emacs_full_read (fd, read_buf, sizeof read_buf);	4509	ptrdiff_t n = emacs_full_read (fd, read_buf, sizeof read_buf);
4508	if (n < 0)	4510	if (n < 0)
4509	report_file_error ("Read error", orig_filename);	4511	report_file_error ("Read error", orig_filename);
4510	endpos += n;	4512	curpos = endpos += n;
4511		4513
4512	/* Give up if the file grew more than even the test read. */	4514	/* Give up if the file grew more than even the test read. */
4513	giveup_match_end = n == sizeof read_buf;	4515	giveup_match_end = n == sizeof read_buf;
@@ -4532,10 +4534,6 @@ by calling `format-decode', which see. */)
4532	while (!giveup_match_end)	4534	while (!giveup_match_end)
4533	{	4535	{
4534	ptrdiff_t nread, bufpos, trial;	4536	ptrdiff_t nread, bufpos, trial;
4535	off_t curpos;
4536
4537	/* At what file position are we now scanning? */
4538	curpos = endpos - (ZV_BYTE - same_at_end);
4539		4537
4540	/* How much can we scan in the next step? Compare with poslim	4538	/* How much can we scan in the next step? Compare with poslim
4541	to prevent overlap of the matching head with the matching tail.	4539	to prevent overlap of the matching head with the matching tail.
@@ -4550,10 +4548,10 @@ by calling `format-decode', which see. */)
4550	if (trial == 0)	4548	if (trial == 0)
4551	break;	4549	break;
4552		4550
4553	curpos -= trial;	4551	curpos = xlseek (fd, curpos - trial, orig_filename);
4554	xlseek (fd, curpos, orig_filename);
4555		4552
4556	nread = emacs_full_read (fd, read_buf, trial);	4553	nread = emacs_full_read (fd, read_buf, trial);
		4554	curpos += nread;
4557	if (nread < trial)	4555	if (nread < trial)
4558	{	4556	{
4559	if (nread < 0)	4557	if (nread < 0)
@@ -4670,7 +4668,8 @@ by calling `format-decode', which see. */)
4670	/* First read the whole file, performing code conversion into	4668	/* First read the whole file, performing code conversion into
4671	CONVERSION_BUFFER. */	4669	CONVERSION_BUFFER. */
4672		4670
4673	xlseek (fd, beg_offset, orig_filename);	4671	if (beg_offset != curpos)
		4672	curpos = xlseek (fd, beg_offset, orig_filename);
4674		4673
4675	inserted = 0; /* Bytes put into CONVERSION_BUFFER so far. */	4674	inserted = 0; /* Bytes put into CONVERSION_BUFFER so far. */
4676	unprocessed = 0; /* Bytes not processed in previous loop. */	4675	unprocessed = 0; /* Bytes not processed in previous loop. */
@@ -4686,6 +4685,7 @@ by calling `format-decode', which see. */)
4686	report_file_error ("Read error", orig_filename);	4685	report_file_error ("Read error", orig_filename);
4687	if (this == 0)	4686	if (this == 0)
4688	break;	4687	break;
		4688	curpos += this;
4689		4689
4690	BUF_TEMP_SET_PT (XBUFFER (conversion_buffer),	4690	BUF_TEMP_SET_PT (XBUFFER (conversion_buffer),
4691	BUF_Z (XBUFFER (conversion_buffer)));	4691	BUF_Z (XBUFFER (conversion_buffer)));
@@ -4856,9 +4856,10 @@ by calling `format-decode', which see. */)
4856	make_gap (growth);	4856	make_gap (growth);
4857	}	4857	}
4858		4858
4859	if (beg_offset != 0 \|\| (!NILP (replace)	4859	if (beg_offset != curpos)
4860	&& !BASE_EQ (replace, Qunbound)))
4861	xlseek (fd, beg_offset, orig_filename);	4860	xlseek (fd, beg_offset, orig_filename);
		4861	/* curpos effectively goes out of scope now, as it is no longer needed,
		4862	so not bother to update curpos from now on. */
4862		4863
4863	/* Total bytes inserted. */	4864	/* Total bytes inserted. */
4864	inserted = 0;	4865	inserted = 0;