diff options
| author | Paul Eggert | 2025-07-18 17:29:25 -0700 |
|---|---|---|
| committer | Paul Eggert | 2025-07-21 16:23:18 -0700 |
| commit | 2903b0b92cfdf87fbbb764c4f202479e9a1ac941 (patch) | |
| tree | a038a9545e482afa635a5ad21b6334f21941ee73 /src | |
| parent | ffe6d33ef4ef4afce98a1521a03acd6934cc53d2 (diff) | |
| download | emacs-2903b0b92cfdf87fbbb764c4f202479e9a1ac941.tar.gz emacs-2903b0b92cfdf87fbbb764c4f202479e9a1ac941.zip | |
insert-file-contents 1 KiB seek fix
This improves on recent fixes to Bug#77315.
When sampling the first 1 KiB and last 3 KiB, do not seek before
BEG if given. Instead, sample starting at BEG, to be consistent
with the non-optimized version.
* src/fileio.c (xlseek): Return POS, for convenience.
(Finsert_file_contents): Sample the first 1 KiB correctly when BEG.
In a CURPOS local, keep track of the input file offset, or for
nonseekable files the number of bytes read, while this value is
important. This lets us avoid some unnecessary seeks. Report an
error earlier if the file is not seekable and BEG is nonzero,
to save work and simplify the code. When sampling, discard less
data, as this is simpler and there’s little point to discarding it.
Diffstat (limited to 'src')
| -rw-r--r-- | src/fileio.c | 93 |
1 files changed, 47 insertions, 46 deletions
diff --git a/src/fileio.c b/src/fileio.c index 9d777c7415b..dbd9bd4ce55 100644 --- a/src/fileio.c +++ b/src/fileio.c | |||
| @@ -4026,12 +4026,14 @@ maybe_move_gap (struct buffer *b) | |||
| 4026 | } | 4026 | } |
| 4027 | } | 4027 | } |
| 4028 | 4028 | ||
| 4029 | /* In FD, position to POS. If this fails, report an error with FILENAME. */ | 4029 | /* In FD, position to POS. Return POS if successful, otherwise signal |
| 4030 | static void | 4030 | an error with FILENAME. */ |
| 4031 | static off_t | ||
| 4031 | xlseek (emacs_fd fd, off_t pos, Lisp_Object filename) | 4032 | xlseek (emacs_fd fd, off_t pos, Lisp_Object filename) |
| 4032 | { | 4033 | { |
| 4033 | if (emacs_fd_lseek (fd, pos, SEEK_SET) < 0) | 4034 | if (emacs_fd_lseek (fd, pos, SEEK_SET) < 0) |
| 4034 | report_file_error ("Setting file position", filename); | 4035 | report_file_error ("Setting file position", filename); |
| 4036 | return pos; | ||
| 4035 | } | 4037 | } |
| 4036 | 4038 | ||
| 4037 | /* A good blocksize to minimize system call overhead across most systems. | 4039 | /* A good blocksize to minimize system call overhead across most systems. |
| @@ -4224,16 +4226,21 @@ by calling `format-decode', which see. */) | |||
| 4224 | : get_stat_mtime (&st)); | 4226 | : get_stat_mtime (&st)); |
| 4225 | } | 4227 | } |
| 4226 | 4228 | ||
| 4227 | /* The initial offset can be nonzero, e.g., /dev/stdin. | 4229 | /* The initial input position, or -1 if the file is not seekable. */ |
| 4228 | If SEEK_CUR works, later code assumes SEEK_SET also works, | 4230 | off_t begpos = emacs_fd_lseek (fd, beg_offset, |
| 4229 | but tests SEEK_END rather than relying on it | 4231 | !NILP (beg) ? SEEK_SET : SEEK_CUR); |
| 4230 | as SEEK_END can fail on Linux /proc files. */ | 4232 | |
| 4231 | off_t initial_offset = emacs_fd_lseek (fd, 0, SEEK_CUR); | 4233 | /* Whether the file is seekable via SEEK_CUR and SEEK_SET. |
| 4232 | bool seekable = 0 <= initial_offset; | 4234 | SEEK_END is trickier as it is not reliable on /proc files, |
| 4233 | if (seekable && NILP (beg)) | 4235 | so it is tested separately below. */ |
| 4234 | beg_offset = initial_offset; | 4236 | bool seekable = 0 <= begpos; |
| 4235 | if (end_offset <= beg_offset) | 4237 | |
| 4236 | goto handled; | 4238 | /* The current input position if the file is seekable, |
| 4239 | otherwise the number of bytes read. */ | ||
| 4240 | off_t curpos = seekable ? begpos : 0; | ||
| 4241 | |||
| 4242 | if (!seekable && beg_offset != 0) | ||
| 4243 | report_file_error ("Setting file position", orig_filename); | ||
| 4237 | 4244 | ||
| 4238 | /* The REPLACE code will need to be changed in order to work on | 4245 | /* The REPLACE code will need to be changed in order to work on |
| 4239 | named pipes, and it's probably just not worth it. So we should | 4246 | named pipes, and it's probably just not worth it. So we should |
| @@ -4263,6 +4270,9 @@ by calling `format-decode', which see. */) | |||
| 4263 | orig_filename); | 4270 | orig_filename); |
| 4264 | } | 4271 | } |
| 4265 | 4272 | ||
| 4273 | if (end_offset <= beg_offset) | ||
| 4274 | goto handled; | ||
| 4275 | |||
| 4266 | /* Check now whether the buffer will become too large, | 4276 | /* Check now whether the buffer will become too large, |
| 4267 | in the likely case where the file's length is not changing. | 4277 | in the likely case where the file's length is not changing. |
| 4268 | This saves a lot of needless work before a buffer overflow. | 4278 | This saves a lot of needless work before a buffer overflow. |
| @@ -4310,31 +4320,25 @@ by calling `format-decode', which see. */) | |||
| 4310 | do not use st_size or report any SEEK_END failure. */ | 4320 | do not use st_size or report any SEEK_END failure. */ |
| 4311 | static_assert (4 * 1024 < sizeof read_buf); | 4321 | static_assert (4 * 1024 < sizeof read_buf); |
| 4312 | ptrdiff_t nread = emacs_full_read (fd, read_buf, 4 * 1024); | 4322 | ptrdiff_t nread = emacs_full_read (fd, read_buf, 4 * 1024); |
| 4313 | if (4 * 1024 <= nread) | 4323 | if (nread < 4 * 1024) |
| 4324 | curpos = nread; | ||
| 4325 | else | ||
| 4314 | { | 4326 | { |
| 4315 | off_t tailoff = emacs_fd_lseek (fd, - 3 * 1024, SEEK_END); | 4327 | curpos = emacs_fd_lseek (fd, - 3 * 1024, SEEK_END); |
| 4316 | if (tailoff < 0) | 4328 | if (curpos < 0) |
| 4317 | tailoff = nread; | 4329 | curpos = nread; |
| 4318 | 4330 | ||
| 4319 | /* When appending the last 3 KiB, read extra bytes | 4331 | /* When appending the last 3 KiB, read extra bytes |
| 4320 | without trusting tailoff, as the file may be growing. */ | 4332 | without trusting SEEK_END, as the file may be growing. |
| 4333 | Although this may yield more than 4 KiB of data total, | ||
| 4334 | and the trailing data may not be from file end if | ||
| 4335 | the file is growing, it is good enough. */ | ||
| 4321 | nread = emacs_full_read (fd, read_buf + 1024, | 4336 | nread = emacs_full_read (fd, read_buf + 1024, |
| 4322 | sizeof read_buf - 1024); | 4337 | sizeof read_buf - 1024); |
| 4323 | if (nread == sizeof read_buf - 1024) | 4338 | if (0 <= nread) |
| 4324 | { | ||
| 4325 | /* Give up reading the last 3 KiB; the file is | ||
| 4326 | growing too rapidly. */ | ||
| 4327 | nread = 1024; | ||
| 4328 | } | ||
| 4329 | else if (0 <= nread) | ||
| 4330 | { | 4339 | { |
| 4340 | curpos += nread; | ||
| 4331 | nread += 1024; | 4341 | nread += 1024; |
| 4332 | if (4 * 1024 < nread) | ||
| 4333 | { | ||
| 4334 | memmove (read_buf + 1024, | ||
| 4335 | read_buf + nread - 3 * 1024, 3 * 1024); | ||
| 4336 | nread = 4 * 1024; | ||
| 4337 | } | ||
| 4338 | } | 4342 | } |
| 4339 | } | 4343 | } |
| 4340 | 4344 | ||
| @@ -4372,9 +4376,6 @@ by calling `format-decode', which see. */) | |||
| 4372 | /* Discard the unwind protect for recovering the | 4376 | /* Discard the unwind protect for recovering the |
| 4373 | current buffer. */ | 4377 | current buffer. */ |
| 4374 | specpdl_ptr--; | 4378 | specpdl_ptr--; |
| 4375 | |||
| 4376 | /* Rewind the file for the actual read done later. */ | ||
| 4377 | xlseek (fd, initial_offset, orig_filename); | ||
| 4378 | } | 4379 | } |
| 4379 | } | 4380 | } |
| 4380 | 4381 | ||
| @@ -4430,15 +4431,14 @@ by calling `format-decode', which see. */) | |||
| 4430 | give up on handling REPLACE in the optimized way. */ | 4431 | give up on handling REPLACE in the optimized way. */ |
| 4431 | bool giveup_match_end = false; | 4432 | bool giveup_match_end = false; |
| 4432 | 4433 | ||
| 4433 | if (beg_offset != initial_offset) | 4434 | if (beg_offset != curpos) |
| 4434 | xlseek (fd, beg_offset, orig_filename); | 4435 | curpos = xlseek (fd, beg_offset, orig_filename); |
| 4435 | 4436 | ||
| 4436 | /* Count how many chars at the start of the file | 4437 | /* Count how many chars at the start of the file |
| 4437 | match the text at the beginning of the buffer. */ | 4438 | match the text at the beginning of the buffer. */ |
| 4438 | while (true) | 4439 | while (true) |
| 4439 | { | 4440 | { |
| 4440 | off_t bytes_to_read = sizeof read_buf; | 4441 | off_t bytes_to_read = sizeof read_buf; |
| 4441 | off_t curpos = beg_offset + (same_at_start - BEGV_BYTE); | ||
| 4442 | bytes_to_read = min (bytes_to_read, end_offset - curpos); | 4442 | bytes_to_read = min (bytes_to_read, end_offset - curpos); |
| 4443 | ptrdiff_t nread = (bytes_to_read <= 0 | 4443 | ptrdiff_t nread = (bytes_to_read <= 0 |
| 4444 | ? 0 | 4444 | ? 0 |
| @@ -4448,6 +4448,8 @@ by calling `format-decode', which see. */) | |||
| 4448 | 4448 | ||
| 4449 | if (0 < nread) | 4449 | if (0 < nread) |
| 4450 | { | 4450 | { |
| 4451 | curpos += nread; | ||
| 4452 | |||
| 4451 | if (CODING_REQUIRE_DETECTION (&coding)) | 4453 | if (CODING_REQUIRE_DETECTION (&coding)) |
| 4452 | { | 4454 | { |
| 4453 | coding_system | 4455 | coding_system |
| @@ -4507,7 +4509,7 @@ by calling `format-decode', which see. */) | |||
| 4507 | ptrdiff_t n = emacs_full_read (fd, read_buf, sizeof read_buf); | 4509 | ptrdiff_t n = emacs_full_read (fd, read_buf, sizeof read_buf); |
| 4508 | if (n < 0) | 4510 | if (n < 0) |
| 4509 | report_file_error ("Read error", orig_filename); | 4511 | report_file_error ("Read error", orig_filename); |
| 4510 | endpos += n; | 4512 | curpos = endpos += n; |
| 4511 | 4513 | ||
| 4512 | /* Give up if the file grew more than even the test read. */ | 4514 | /* Give up if the file grew more than even the test read. */ |
| 4513 | giveup_match_end = n == sizeof read_buf; | 4515 | giveup_match_end = n == sizeof read_buf; |
| @@ -4532,10 +4534,6 @@ by calling `format-decode', which see. */) | |||
| 4532 | while (!giveup_match_end) | 4534 | while (!giveup_match_end) |
| 4533 | { | 4535 | { |
| 4534 | ptrdiff_t nread, bufpos, trial; | 4536 | ptrdiff_t nread, bufpos, trial; |
| 4535 | off_t curpos; | ||
| 4536 | |||
| 4537 | /* At what file position are we now scanning? */ | ||
| 4538 | curpos = endpos - (ZV_BYTE - same_at_end); | ||
| 4539 | 4537 | ||
| 4540 | /* How much can we scan in the next step? Compare with poslim | 4538 | /* How much can we scan in the next step? Compare with poslim |
| 4541 | to prevent overlap of the matching head with the matching tail. | 4539 | to prevent overlap of the matching head with the matching tail. |
| @@ -4550,10 +4548,10 @@ by calling `format-decode', which see. */) | |||
| 4550 | if (trial == 0) | 4548 | if (trial == 0) |
| 4551 | break; | 4549 | break; |
| 4552 | 4550 | ||
| 4553 | curpos -= trial; | 4551 | curpos = xlseek (fd, curpos - trial, orig_filename); |
| 4554 | xlseek (fd, curpos, orig_filename); | ||
| 4555 | 4552 | ||
| 4556 | nread = emacs_full_read (fd, read_buf, trial); | 4553 | nread = emacs_full_read (fd, read_buf, trial); |
| 4554 | curpos += nread; | ||
| 4557 | if (nread < trial) | 4555 | if (nread < trial) |
| 4558 | { | 4556 | { |
| 4559 | if (nread < 0) | 4557 | if (nread < 0) |
| @@ -4670,7 +4668,8 @@ by calling `format-decode', which see. */) | |||
| 4670 | /* First read the whole file, performing code conversion into | 4668 | /* First read the whole file, performing code conversion into |
| 4671 | CONVERSION_BUFFER. */ | 4669 | CONVERSION_BUFFER. */ |
| 4672 | 4670 | ||
| 4673 | xlseek (fd, beg_offset, orig_filename); | 4671 | if (beg_offset != curpos) |
| 4672 | curpos = xlseek (fd, beg_offset, orig_filename); | ||
| 4674 | 4673 | ||
| 4675 | inserted = 0; /* Bytes put into CONVERSION_BUFFER so far. */ | 4674 | inserted = 0; /* Bytes put into CONVERSION_BUFFER so far. */ |
| 4676 | unprocessed = 0; /* Bytes not processed in previous loop. */ | 4675 | unprocessed = 0; /* Bytes not processed in previous loop. */ |
| @@ -4686,6 +4685,7 @@ by calling `format-decode', which see. */) | |||
| 4686 | report_file_error ("Read error", orig_filename); | 4685 | report_file_error ("Read error", orig_filename); |
| 4687 | if (this == 0) | 4686 | if (this == 0) |
| 4688 | break; | 4687 | break; |
| 4688 | curpos += this; | ||
| 4689 | 4689 | ||
| 4690 | BUF_TEMP_SET_PT (XBUFFER (conversion_buffer), | 4690 | BUF_TEMP_SET_PT (XBUFFER (conversion_buffer), |
| 4691 | BUF_Z (XBUFFER (conversion_buffer))); | 4691 | BUF_Z (XBUFFER (conversion_buffer))); |
| @@ -4856,9 +4856,10 @@ by calling `format-decode', which see. */) | |||
| 4856 | make_gap (growth); | 4856 | make_gap (growth); |
| 4857 | } | 4857 | } |
| 4858 | 4858 | ||
| 4859 | if (beg_offset != 0 || (!NILP (replace) | 4859 | if (beg_offset != curpos) |
| 4860 | && !BASE_EQ (replace, Qunbound))) | ||
| 4861 | xlseek (fd, beg_offset, orig_filename); | 4860 | xlseek (fd, beg_offset, orig_filename); |
| 4861 | /* curpos effectively goes out of scope now, as it is no longer needed, | ||
| 4862 | so not bother to update curpos from now on. */ | ||
| 4862 | 4863 | ||
| 4863 | /* Total bytes inserted. */ | 4864 | /* Total bytes inserted. */ |
| 4864 | inserted = 0; | 4865 | inserted = 0; |