aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorPaul Eggert2025-07-13 10:55:14 -0700
committerPaul Eggert2025-07-13 21:09:39 -0700
commit55f41ca3aa8fe487d10730708a7396137a2c9d18 (patch)
treeaddad6d26781ef8a15618154799489787cdc5cc9 /src
parent8ac78986ff7028c249f136b3523dacf0693c22d3 (diff)
downloademacs-55f41ca3aa8fe487d10730708a7396137a2c9d18.tar.gz
emacs-55f41ca3aa8fe487d10730708a7396137a2c9d18.zip
insert-file-contents file shrinkage in scanback
* src/fileio.c (emacs_full_read): New function. (Fcopy_file, Finsert_file_contents): Use it. (Finsert_file_contents): Check for partial reads when scanning backwards through the file’s tail, as this indicates the file shrank while we read it. Also, use emacs_full_read in other situations where it’s simpler and should be a bit faster.
Diffstat (limited to 'src')
-rw-r--r--src/fileio.c166
1 files changed, 87 insertions, 79 deletions
diff --git a/src/fileio.c b/src/fileio.c
index 8150f4d8e12..2a53dbbdd07 100644
--- a/src/fileio.c
+++ b/src/fileio.c
@@ -2220,6 +2220,21 @@ barf_or_query_if_file_exists (Lisp_Object absname, bool known_to_exist,
2220 } 2220 }
2221} 2221}
2222 2222
2223/* Read a full buffer of bytes from FD into BUF, which is of size BUFSIZE.
2224 FD should be a regular file, as special files might need quit processing.
2225 On success, return the read count, which is less than BUFSIZE at EOF.
2226 Return -1 on failure, setting errno and possibly setting BUF. */
2227static ptrdiff_t
2228emacs_full_read (int fd, void *buf, ptrdiff_t bufsize)
2229{
2230 char *b = buf;
2231 ptrdiff_t nread = 0, r;
2232 while (0 < (r = emacs_fd_read (fd, b + nread, bufsize - nread))
2233 && (nread += r) < bufsize)
2234 continue;
2235 return r < 0 ? r : nread;
2236}
2237
2223#ifndef WINDOWSNT 2238#ifndef WINDOWSNT
2224/* Copy data to DEST from SOURCE if possible. Return true if OK. */ 2239/* Copy data to DEST from SOURCE if possible. Return true if OK. */
2225static bool 2240static bool
@@ -2419,13 +2434,15 @@ permissions. */)
2419 char buf[MAX_ALLOCA]; 2434 char buf[MAX_ALLOCA];
2420 2435
2421 for (ptrdiff_t copied; 2436 for (ptrdiff_t copied;
2422 (copied = emacs_fd_read (ifd, buf, sizeof buf)); 2437 (copied = emacs_full_read (ifd, buf, sizeof buf));
2423 newsize += copied) 2438 newsize += copied)
2424 { 2439 {
2425 if (copied < 0) 2440 if (copied < 0)
2426 report_file_error ("Read error", file); 2441 report_file_error ("Read error", file);
2427 if (emacs_write_quit (ofd, buf, copied) != copied) 2442 if (emacs_write_quit (ofd, buf, copied) != copied)
2428 report_file_error ("Write error", newname); 2443 report_file_error ("Write error", newname);
2444 if (copied < sizeof buf)
2445 break;
2429 } 2446 }
2430 } 2447 }
2431 2448
@@ -4279,8 +4296,11 @@ by calling `format-decode', which see. */)
4279 and tailing respectively are sufficient for this 4296 and tailing respectively are sufficient for this
4280 purpose. Because the file may be in /proc, 4297 purpose. Because the file may be in /proc,
4281 do not use st_size or report any SEEK_END failure. */ 4298 do not use st_size or report any SEEK_END failure. */
4282 ptrdiff_t nread = emacs_fd_read (fd, read_buf, 4 * 1024); 4299 static_assert (4 * 1024 < sizeof read_buf);
4283 if (nread == 4 * 1024) 4300 ptrdiff_t nread = emacs_full_read (fd, read_buf, 4 * 1024);
4301 if (nread < 4 * 1024)
4302 file_size_hint = nread;
4303 else
4284 { 4304 {
4285 off_t tailoff = emacs_fd_lseek (fd, - 3 * 1024, SEEK_END); 4305 off_t tailoff = emacs_fd_lseek (fd, - 3 * 1024, SEEK_END);
4286 if (tailoff < 0) 4306 if (tailoff < 0)
@@ -4289,33 +4309,26 @@ by calling `format-decode', which see. */)
4289 tailoff = nread; 4309 tailoff = nread;
4290 } 4310 }
4291 4311
4292 /* When appending the last 3 KiB, read until EOF 4312 /* When appending the last 3 KiB, read extra bytes
4293 without trusting tailoff, as the file may be in 4313 without trusting tailoff, as the file may be growing. */
4294 /proc or be mutating. */ 4314 nread = emacs_full_read (fd, read_buf + 1024,
4295 nread = 1024; 4315 sizeof read_buf - 1024);
4296 for (;;) 4316 if (nread == sizeof read_buf - 1024)
4297 { 4317 {
4298 ptrdiff_t r = emacs_fd_read (fd, read_buf + nread, 4318 /* Give up reading the last 3 KiB; the file is
4299 sizeof read_buf - nread); 4319 growing too rapidly. */
4300 if (r <= 0) 4320 nread = 1024;
4301 { 4321 }
4302 if (r < 0) 4322 else if (0 <= nread)
4303 nread = r; 4323 {
4304 else 4324 file_size_hint = tailoff + nread;
4305 file_size_hint = tailoff; 4325 nread += 1024;
4306 break;
4307 }
4308 tailoff += r;
4309 nread += r;
4310 bool eof = nread < sizeof read_buf;
4311 if (4 * 1024 < nread) 4326 if (4 * 1024 < nread)
4312 { 4327 {
4313 memmove (read_buf + 1024, 4328 memmove (read_buf + 1024,
4314 read_buf + nread - 3 * 1024, 3 * 1024); 4329 read_buf + nread - 3 * 1024, 3 * 1024);
4315 nread = 4 * 1024; 4330 nread = 4 * 1024;
4316 } 4331 }
4317 if (eof)
4318 break;
4319 } 4332 }
4320 } 4333 }
4321 4334
@@ -4427,12 +4440,41 @@ by calling `format-decode', which see. */)
4427 bytes_to_read = min (bytes_to_read, end_offset - curpos); 4440 bytes_to_read = min (bytes_to_read, end_offset - curpos);
4428 ptrdiff_t nread = (bytes_to_read <= 0 4441 ptrdiff_t nread = (bytes_to_read <= 0
4429 ? 0 4442 ? 0
4430 : emacs_fd_read (fd, read_buf, bytes_to_read)); 4443 : emacs_full_read (fd, read_buf, bytes_to_read));
4431 if (nread < 0) 4444 if (nread < 0)
4432 report_file_error ("Read error", orig_filename); 4445 report_file_error ("Read error", orig_filename);
4433 else if (nread == 0) 4446
4447 if (0 < nread)
4448 {
4449 if (CODING_REQUIRE_DETECTION (&coding))
4450 {
4451 coding_system
4452 = detect_coding_system ((unsigned char *) read_buf,
4453 nread, nread, 1, 0,
4454 coding_system);
4455 setup_coding_system (coding_system, &coding);
4456 }
4457
4458 if (CODING_REQUIRE_DECODING (&coding))
4459 /* We found that the file should be decoded somehow.
4460 Let's give up here. */
4461 {
4462 giveup_match_end = true;
4463 break;
4464 }
4465
4466 ptrdiff_t bufpos = 0;
4467 while (bufpos < nread && same_at_start < same_at_end
4468 && FETCH_BYTE (same_at_start) == read_buf[bufpos])
4469 same_at_start++, bufpos++;
4470 /* If we found a discrepancy, stop the scan. */
4471 if (bufpos != nread)
4472 break;
4473 }
4474
4475 if (nread < bytes_to_read)
4434 { 4476 {
4435 file_size_hint = curpos; 4477 file_size_hint = curpos + nread;
4436 4478
4437 /* Data inserted from the file match the buffer's leading bytes, 4479 /* Data inserted from the file match the buffer's leading bytes,
4438 so there's no need to replace anything. */ 4480 so there's no need to replace anything. */
@@ -4443,31 +4485,6 @@ by calling `format-decode', which see. */)
4443 del_range_byte (same_at_start, same_at_end); 4485 del_range_byte (same_at_start, same_at_end);
4444 goto handled; 4486 goto handled;
4445 } 4487 }
4446
4447 if (CODING_REQUIRE_DETECTION (&coding))
4448 {
4449 coding_system = detect_coding_system ((unsigned char *) read_buf,
4450 nread, nread, 1, 0,
4451 coding_system);
4452 setup_coding_system (coding_system, &coding);
4453 }
4454
4455 if (CODING_REQUIRE_DECODING (&coding))
4456 /* We found that the file should be decoded somehow.
4457 Let's give up here. */
4458 {
4459 giveup_match_end = true;
4460 break;
4461 }
4462
4463 int bufpos = 0;
4464 while (bufpos < nread && same_at_start < same_at_end
4465 && FETCH_BYTE (same_at_start) == read_buf[bufpos])
4466 same_at_start++, bufpos++;
4467 /* If we found a discrepancy, stop the scan.
4468 Otherwise loop around and scan the next bufferful. */
4469 if (bufpos != nread)
4470 break;
4471 } 4488 }
4472 4489
4473 /* Count how many chars at the end of the file 4490 /* Count how many chars at the end of the file
@@ -4488,11 +4505,11 @@ by calling `format-decode', which see. */)
4488 { 4505 {
4489 /* Check that read reports EOF soon, to catch platforms 4506 /* Check that read reports EOF soon, to catch platforms
4490 where SEEK_END can report wildly small offsets. */ 4507 where SEEK_END can report wildly small offsets. */
4491 ptrdiff_t n = emacs_fd_read (fd, read_buf, sizeof read_buf); 4508 ptrdiff_t n = emacs_full_read (fd, read_buf, sizeof read_buf);
4492 if (n < 0) 4509 if (n < 0)
4493 report_file_error ("Read error", orig_filename); 4510 report_file_error ("Read error", orig_filename);
4494 endpos += n; 4511 endpos += n;
4495 giveup_match_end = 0 < n; 4512 giveup_match_end = n == sizeof read_buf;
4496 if (!giveup_match_end) 4513 if (!giveup_match_end)
4497 file_size_hint = endpos; 4514 file_size_hint = endpos;
4498 } 4515 }
@@ -4501,7 +4518,7 @@ by calling `format-decode', which see. */)
4501 4518
4502 while (!giveup_match_end) 4519 while (!giveup_match_end)
4503 { 4520 {
4504 ptrdiff_t total_read, nread, bufpos, trial; 4521 ptrdiff_t nread, bufpos, trial;
4505 off_t curpos; 4522 off_t curpos;
4506 4523
4507 /* At what file position are we now scanning? */ 4524 /* At what file position are we now scanning? */
@@ -4511,32 +4528,24 @@ by calling `format-decode', which see. */)
4511 break; 4528 break;
4512 /* How much can we scan in the next step? */ 4529 /* How much can we scan in the next step? */
4513 trial = min (curpos, sizeof read_buf); 4530 trial = min (curpos, sizeof read_buf);
4514 if (emacs_fd_lseek (fd, curpos - trial, SEEK_SET) < 0) 4531 curpos = emacs_fd_lseek (fd, curpos - trial, SEEK_SET);
4532 if (curpos < 0)
4515 report_file_error ("Setting file position", orig_filename); 4533 report_file_error ("Setting file position", orig_filename);
4516 4534
4517 total_read = nread = 0; 4535 nread = emacs_full_read (fd, read_buf, trial);
4518 while (total_read < trial) 4536 if (nread < trial)
4519 { 4537 {
4520 nread = emacs_fd_read (fd, read_buf + total_read,
4521 trial - total_read);
4522 if (nread < 0) 4538 if (nread < 0)
4523 report_file_error ("Read error", orig_filename); 4539 report_file_error ("Read error", orig_filename);
4524 else if (nread == 0) 4540 /* The file unexpectedly shrank. */
4525 { 4541 file_size_hint = curpos + nread;
4526 /* The file unexpectedly shrank. */ 4542 giveup_match_end = true;
4527 file_size_hint = curpos - trial + total_read; 4543 break;
4528 giveup_match_end = true;
4529 break;
4530 }
4531 total_read += nread;
4532 } 4544 }
4533 4545
4534 if (giveup_match_end)
4535 break;
4536
4537 /* Scan this bufferful from the end, comparing with 4546 /* Scan this bufferful from the end, comparing with
4538 the Emacs buffer. */ 4547 the Emacs buffer. */
4539 bufpos = total_read; 4548 bufpos = nread;
4540 4549
4541 /* Compare with same_at_start to avoid counting some buffer text 4550 /* Compare with same_at_start to avoid counting some buffer text
4542 as matching both at the file's beginning and at the end. */ 4551 as matching both at the file's beginning and at the end. */
@@ -4557,9 +4566,6 @@ by calling `format-decode', which see. */)
4557 giveup_match_end = true; 4566 giveup_match_end = true;
4558 break; 4567 break;
4559 } 4568 }
4560
4561 if (nread == 0)
4562 break;
4563 } 4569 }
4564 4570
4565 if (! giveup_match_end) 4571 if (! giveup_match_end)
@@ -4658,9 +4664,11 @@ by calling `format-decode', which see. */)
4658 /* Read one buffer a time, to allow 4664 /* Read one buffer a time, to allow
4659 quitting while reading a huge file. */ 4665 quitting while reading a huge file. */
4660 4666
4661 this = emacs_fd_read (fd, read_buf + unprocessed, 4667 ptrdiff_t trial = sizeof read_buf - unprocessed;
4662 sizeof read_buf - unprocessed); 4668 this = emacs_full_read (fd, read_buf + unprocessed, trial);
4663 if (this <= 0) 4669 if (this < 0)
4670 report_file_error ("Read error", orig_filename);
4671 if (this == 0)
4664 break; 4672 break;
4665 4673
4666 file_size_hint += this; 4674 file_size_hint += this;
@@ -4671,10 +4679,10 @@ by calling `format-decode', which see. */)
4671 unprocessed = coding.carryover_bytes; 4679 unprocessed = coding.carryover_bytes;
4672 if (coding.carryover_bytes > 0) 4680 if (coding.carryover_bytes > 0)
4673 memcpy (read_buf, coding.carryover, unprocessed); 4681 memcpy (read_buf, coding.carryover, unprocessed);
4682 if (this < trial)
4683 break;
4674 } 4684 }
4675 4685
4676 if (this < 0)
4677 report_file_error ("Read error", orig_filename);
4678 emacs_fd_close (fd); 4686 emacs_fd_close (fd);
4679 clear_unwind_protect (fd_index); 4687 clear_unwind_protect (fd_index);
4680 4688