aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorPaul Eggert2025-07-18 17:29:25 -0700
committerPaul Eggert2025-07-21 16:23:18 -0700
commit2903b0b92cfdf87fbbb764c4f202479e9a1ac941 (patch)
treea038a9545e482afa635a5ad21b6334f21941ee73 /src
parentffe6d33ef4ef4afce98a1521a03acd6934cc53d2 (diff)
downloademacs-2903b0b92cfdf87fbbb764c4f202479e9a1ac941.tar.gz
emacs-2903b0b92cfdf87fbbb764c4f202479e9a1ac941.zip
insert-file-contents 1 KiB seek fix
This improves on recent fixes to Bug#77315. When sampling the first 1 KiB and last 3 KiB, do not seek before BEG if given. Instead, sample starting at BEG, to be consistent with the non-optimized version. * src/fileio.c (xlseek): Return POS, for convenience. (Finsert_file_contents): Sample the first 1 KiB correctly when BEG. In a CURPOS local, keep track of the input file offset, or for nonseekable files the number of bytes read, while this value is important. This lets us avoid some unnecessary seeks. Report an error earlier if the file is not seekable and BEG is nonzero, to save work and simplify the code. When sampling, discard less data, as this is simpler and there’s little point to discarding it.
Diffstat (limited to 'src')
-rw-r--r--src/fileio.c93
1 files changed, 47 insertions, 46 deletions
diff --git a/src/fileio.c b/src/fileio.c
index 9d777c7415b..dbd9bd4ce55 100644
--- a/src/fileio.c
+++ b/src/fileio.c
@@ -4026,12 +4026,14 @@ maybe_move_gap (struct buffer *b)
4026 } 4026 }
4027} 4027}
4028 4028
4029/* In FD, position to POS. If this fails, report an error with FILENAME. */ 4029/* In FD, position to POS. Return POS if successful, otherwise signal
4030static void 4030 an error with FILENAME. */
4031static off_t
4031xlseek (emacs_fd fd, off_t pos, Lisp_Object filename) 4032xlseek (emacs_fd fd, off_t pos, Lisp_Object filename)
4032{ 4033{
4033 if (emacs_fd_lseek (fd, pos, SEEK_SET) < 0) 4034 if (emacs_fd_lseek (fd, pos, SEEK_SET) < 0)
4034 report_file_error ("Setting file position", filename); 4035 report_file_error ("Setting file position", filename);
4036 return pos;
4035} 4037}
4036 4038
4037/* A good blocksize to minimize system call overhead across most systems. 4039/* A good blocksize to minimize system call overhead across most systems.
@@ -4224,16 +4226,21 @@ by calling `format-decode', which see. */)
4224 : get_stat_mtime (&st)); 4226 : get_stat_mtime (&st));
4225 } 4227 }
4226 4228
4227 /* The initial offset can be nonzero, e.g., /dev/stdin. 4229 /* The initial input position, or -1 if the file is not seekable. */
4228 If SEEK_CUR works, later code assumes SEEK_SET also works, 4230 off_t begpos = emacs_fd_lseek (fd, beg_offset,
4229 but tests SEEK_END rather than relying on it 4231 !NILP (beg) ? SEEK_SET : SEEK_CUR);
4230 as SEEK_END can fail on Linux /proc files. */ 4232
4231 off_t initial_offset = emacs_fd_lseek (fd, 0, SEEK_CUR); 4233 /* Whether the file is seekable via SEEK_CUR and SEEK_SET.
4232 bool seekable = 0 <= initial_offset; 4234 SEEK_END is trickier as it is not reliable on /proc files,
4233 if (seekable && NILP (beg)) 4235 so it is tested separately below. */
4234 beg_offset = initial_offset; 4236 bool seekable = 0 <= begpos;
4235 if (end_offset <= beg_offset) 4237
4236 goto handled; 4238 /* The current input position if the file is seekable,
4239 otherwise the number of bytes read. */
4240 off_t curpos = seekable ? begpos : 0;
4241
4242 if (!seekable && beg_offset != 0)
4243 report_file_error ("Setting file position", orig_filename);
4237 4244
4238 /* The REPLACE code will need to be changed in order to work on 4245 /* The REPLACE code will need to be changed in order to work on
4239 named pipes, and it's probably just not worth it. So we should 4246 named pipes, and it's probably just not worth it. So we should
@@ -4263,6 +4270,9 @@ by calling `format-decode', which see. */)
4263 orig_filename); 4270 orig_filename);
4264 } 4271 }
4265 4272
4273 if (end_offset <= beg_offset)
4274 goto handled;
4275
4266 /* Check now whether the buffer will become too large, 4276 /* Check now whether the buffer will become too large,
4267 in the likely case where the file's length is not changing. 4277 in the likely case where the file's length is not changing.
4268 This saves a lot of needless work before a buffer overflow. 4278 This saves a lot of needless work before a buffer overflow.
@@ -4310,31 +4320,25 @@ by calling `format-decode', which see. */)
4310 do not use st_size or report any SEEK_END failure. */ 4320 do not use st_size or report any SEEK_END failure. */
4311 static_assert (4 * 1024 < sizeof read_buf); 4321 static_assert (4 * 1024 < sizeof read_buf);
4312 ptrdiff_t nread = emacs_full_read (fd, read_buf, 4 * 1024); 4322 ptrdiff_t nread = emacs_full_read (fd, read_buf, 4 * 1024);
4313 if (4 * 1024 <= nread) 4323 if (nread < 4 * 1024)
4324 curpos = nread;
4325 else
4314 { 4326 {
4315 off_t tailoff = emacs_fd_lseek (fd, - 3 * 1024, SEEK_END); 4327 curpos = emacs_fd_lseek (fd, - 3 * 1024, SEEK_END);
4316 if (tailoff < 0) 4328 if (curpos < 0)
4317 tailoff = nread; 4329 curpos = nread;
4318 4330
4319 /* When appending the last 3 KiB, read extra bytes 4331 /* When appending the last 3 KiB, read extra bytes
4320 without trusting tailoff, as the file may be growing. */ 4332 without trusting SEEK_END, as the file may be growing.
4333 Although this may yield more than 4 KiB of data total,
4334 and the trailing data may not be from file end if
4335 the file is growing, it is good enough. */
4321 nread = emacs_full_read (fd, read_buf + 1024, 4336 nread = emacs_full_read (fd, read_buf + 1024,
4322 sizeof read_buf - 1024); 4337 sizeof read_buf - 1024);
4323 if (nread == sizeof read_buf - 1024) 4338 if (0 <= nread)
4324 {
4325 /* Give up reading the last 3 KiB; the file is
4326 growing too rapidly. */
4327 nread = 1024;
4328 }
4329 else if (0 <= nread)
4330 { 4339 {
4340 curpos += nread;
4331 nread += 1024; 4341 nread += 1024;
4332 if (4 * 1024 < nread)
4333 {
4334 memmove (read_buf + 1024,
4335 read_buf + nread - 3 * 1024, 3 * 1024);
4336 nread = 4 * 1024;
4337 }
4338 } 4342 }
4339 } 4343 }
4340 4344
@@ -4372,9 +4376,6 @@ by calling `format-decode', which see. */)
4372 /* Discard the unwind protect for recovering the 4376 /* Discard the unwind protect for recovering the
4373 current buffer. */ 4377 current buffer. */
4374 specpdl_ptr--; 4378 specpdl_ptr--;
4375
4376 /* Rewind the file for the actual read done later. */
4377 xlseek (fd, initial_offset, orig_filename);
4378 } 4379 }
4379 } 4380 }
4380 4381
@@ -4430,15 +4431,14 @@ by calling `format-decode', which see. */)
4430 give up on handling REPLACE in the optimized way. */ 4431 give up on handling REPLACE in the optimized way. */
4431 bool giveup_match_end = false; 4432 bool giveup_match_end = false;
4432 4433
4433 if (beg_offset != initial_offset) 4434 if (beg_offset != curpos)
4434 xlseek (fd, beg_offset, orig_filename); 4435 curpos = xlseek (fd, beg_offset, orig_filename);
4435 4436
4436 /* Count how many chars at the start of the file 4437 /* Count how many chars at the start of the file
4437 match the text at the beginning of the buffer. */ 4438 match the text at the beginning of the buffer. */
4438 while (true) 4439 while (true)
4439 { 4440 {
4440 off_t bytes_to_read = sizeof read_buf; 4441 off_t bytes_to_read = sizeof read_buf;
4441 off_t curpos = beg_offset + (same_at_start - BEGV_BYTE);
4442 bytes_to_read = min (bytes_to_read, end_offset - curpos); 4442 bytes_to_read = min (bytes_to_read, end_offset - curpos);
4443 ptrdiff_t nread = (bytes_to_read <= 0 4443 ptrdiff_t nread = (bytes_to_read <= 0
4444 ? 0 4444 ? 0
@@ -4448,6 +4448,8 @@ by calling `format-decode', which see. */)
4448 4448
4449 if (0 < nread) 4449 if (0 < nread)
4450 { 4450 {
4451 curpos += nread;
4452
4451 if (CODING_REQUIRE_DETECTION (&coding)) 4453 if (CODING_REQUIRE_DETECTION (&coding))
4452 { 4454 {
4453 coding_system 4455 coding_system
@@ -4507,7 +4509,7 @@ by calling `format-decode', which see. */)
4507 ptrdiff_t n = emacs_full_read (fd, read_buf, sizeof read_buf); 4509 ptrdiff_t n = emacs_full_read (fd, read_buf, sizeof read_buf);
4508 if (n < 0) 4510 if (n < 0)
4509 report_file_error ("Read error", orig_filename); 4511 report_file_error ("Read error", orig_filename);
4510 endpos += n; 4512 curpos = endpos += n;
4511 4513
4512 /* Give up if the file grew more than even the test read. */ 4514 /* Give up if the file grew more than even the test read. */
4513 giveup_match_end = n == sizeof read_buf; 4515 giveup_match_end = n == sizeof read_buf;
@@ -4532,10 +4534,6 @@ by calling `format-decode', which see. */)
4532 while (!giveup_match_end) 4534 while (!giveup_match_end)
4533 { 4535 {
4534 ptrdiff_t nread, bufpos, trial; 4536 ptrdiff_t nread, bufpos, trial;
4535 off_t curpos;
4536
4537 /* At what file position are we now scanning? */
4538 curpos = endpos - (ZV_BYTE - same_at_end);
4539 4537
4540 /* How much can we scan in the next step? Compare with poslim 4538 /* How much can we scan in the next step? Compare with poslim
4541 to prevent overlap of the matching head with the matching tail. 4539 to prevent overlap of the matching head with the matching tail.
@@ -4550,10 +4548,10 @@ by calling `format-decode', which see. */)
4550 if (trial == 0) 4548 if (trial == 0)
4551 break; 4549 break;
4552 4550
4553 curpos -= trial; 4551 curpos = xlseek (fd, curpos - trial, orig_filename);
4554 xlseek (fd, curpos, orig_filename);
4555 4552
4556 nread = emacs_full_read (fd, read_buf, trial); 4553 nread = emacs_full_read (fd, read_buf, trial);
4554 curpos += nread;
4557 if (nread < trial) 4555 if (nread < trial)
4558 { 4556 {
4559 if (nread < 0) 4557 if (nread < 0)
@@ -4670,7 +4668,8 @@ by calling `format-decode', which see. */)
4670 /* First read the whole file, performing code conversion into 4668 /* First read the whole file, performing code conversion into
4671 CONVERSION_BUFFER. */ 4669 CONVERSION_BUFFER. */
4672 4670
4673 xlseek (fd, beg_offset, orig_filename); 4671 if (beg_offset != curpos)
4672 curpos = xlseek (fd, beg_offset, orig_filename);
4674 4673
4675 inserted = 0; /* Bytes put into CONVERSION_BUFFER so far. */ 4674 inserted = 0; /* Bytes put into CONVERSION_BUFFER so far. */
4676 unprocessed = 0; /* Bytes not processed in previous loop. */ 4675 unprocessed = 0; /* Bytes not processed in previous loop. */
@@ -4686,6 +4685,7 @@ by calling `format-decode', which see. */)
4686 report_file_error ("Read error", orig_filename); 4685 report_file_error ("Read error", orig_filename);
4687 if (this == 0) 4686 if (this == 0)
4688 break; 4687 break;
4688 curpos += this;
4689 4689
4690 BUF_TEMP_SET_PT (XBUFFER (conversion_buffer), 4690 BUF_TEMP_SET_PT (XBUFFER (conversion_buffer),
4691 BUF_Z (XBUFFER (conversion_buffer))); 4691 BUF_Z (XBUFFER (conversion_buffer)));
@@ -4856,9 +4856,10 @@ by calling `format-decode', which see. */)
4856 make_gap (growth); 4856 make_gap (growth);
4857 } 4857 }
4858 4858
4859 if (beg_offset != 0 || (!NILP (replace) 4859 if (beg_offset != curpos)
4860 && !BASE_EQ (replace, Qunbound)))
4861 xlseek (fd, beg_offset, orig_filename); 4860 xlseek (fd, beg_offset, orig_filename);
4861 /* curpos effectively goes out of scope now, as it is no longer needed,
4862 so not bother to update curpos from now on. */
4862 4863
4863 /* Total bytes inserted. */ 4864 /* Total bytes inserted. */
4864 inserted = 0; 4865 inserted = 0;