diff options
| author | Paul Eggert | 2025-07-13 10:55:14 -0700 |
|---|---|---|
| committer | Paul Eggert | 2025-07-13 21:09:39 -0700 |
| commit | 55f41ca3aa8fe487d10730708a7396137a2c9d18 (patch) | |
| tree | addad6d26781ef8a15618154799489787cdc5cc9 /src | |
| parent | 8ac78986ff7028c249f136b3523dacf0693c22d3 (diff) | |
| download | emacs-55f41ca3aa8fe487d10730708a7396137a2c9d18.tar.gz emacs-55f41ca3aa8fe487d10730708a7396137a2c9d18.zip | |
insert-file-contents file shrinkage in scanback
* src/fileio.c (emacs_full_read): New function.
(Fcopy_file, Finsert_file_contents): Use it.
(Finsert_file_contents): Check for partial reads when scanning
backwards through the file’s tail, as this indicates the file
shrank while we read it. Also, use emacs_full_read in other
situations where it’s simpler and should be a bit faster.
Diffstat (limited to 'src')
| -rw-r--r-- | src/fileio.c | 166 |
1 files changed, 87 insertions, 79 deletions
diff --git a/src/fileio.c b/src/fileio.c index 8150f4d8e12..2a53dbbdd07 100644 --- a/src/fileio.c +++ b/src/fileio.c | |||
| @@ -2220,6 +2220,21 @@ barf_or_query_if_file_exists (Lisp_Object absname, bool known_to_exist, | |||
| 2220 | } | 2220 | } |
| 2221 | } | 2221 | } |
| 2222 | 2222 | ||
| 2223 | /* Read a full buffer of bytes from FD into BUF, which is of size BUFSIZE. | ||
| 2224 | FD should be a regular file, as special files might need quit processing. | ||
| 2225 | On success, return the read count, which is less than BUFSIZE at EOF. | ||
| 2226 | Return -1 on failure, setting errno and possibly setting BUF. */ | ||
| 2227 | static ptrdiff_t | ||
| 2228 | emacs_full_read (int fd, void *buf, ptrdiff_t bufsize) | ||
| 2229 | { | ||
| 2230 | char *b = buf; | ||
| 2231 | ptrdiff_t nread = 0, r; | ||
| 2232 | while (0 < (r = emacs_fd_read (fd, b + nread, bufsize - nread)) | ||
| 2233 | && (nread += r) < bufsize) | ||
| 2234 | continue; | ||
| 2235 | return r < 0 ? r : nread; | ||
| 2236 | } | ||
| 2237 | |||
| 2223 | #ifndef WINDOWSNT | 2238 | #ifndef WINDOWSNT |
| 2224 | /* Copy data to DEST from SOURCE if possible. Return true if OK. */ | 2239 | /* Copy data to DEST from SOURCE if possible. Return true if OK. */ |
| 2225 | static bool | 2240 | static bool |
| @@ -2419,13 +2434,15 @@ permissions. */) | |||
| 2419 | char buf[MAX_ALLOCA]; | 2434 | char buf[MAX_ALLOCA]; |
| 2420 | 2435 | ||
| 2421 | for (ptrdiff_t copied; | 2436 | for (ptrdiff_t copied; |
| 2422 | (copied = emacs_fd_read (ifd, buf, sizeof buf)); | 2437 | (copied = emacs_full_read (ifd, buf, sizeof buf)); |
| 2423 | newsize += copied) | 2438 | newsize += copied) |
| 2424 | { | 2439 | { |
| 2425 | if (copied < 0) | 2440 | if (copied < 0) |
| 2426 | report_file_error ("Read error", file); | 2441 | report_file_error ("Read error", file); |
| 2427 | if (emacs_write_quit (ofd, buf, copied) != copied) | 2442 | if (emacs_write_quit (ofd, buf, copied) != copied) |
| 2428 | report_file_error ("Write error", newname); | 2443 | report_file_error ("Write error", newname); |
| 2444 | if (copied < sizeof buf) | ||
| 2445 | break; | ||
| 2429 | } | 2446 | } |
| 2430 | } | 2447 | } |
| 2431 | 2448 | ||
| @@ -4279,8 +4296,11 @@ by calling `format-decode', which see. */) | |||
| 4279 | and tailing respectively are sufficient for this | 4296 | and tailing respectively are sufficient for this |
| 4280 | purpose. Because the file may be in /proc, | 4297 | purpose. Because the file may be in /proc, |
| 4281 | do not use st_size or report any SEEK_END failure. */ | 4298 | do not use st_size or report any SEEK_END failure. */ |
| 4282 | ptrdiff_t nread = emacs_fd_read (fd, read_buf, 4 * 1024); | 4299 | static_assert (4 * 1024 < sizeof read_buf); |
| 4283 | if (nread == 4 * 1024) | 4300 | ptrdiff_t nread = emacs_full_read (fd, read_buf, 4 * 1024); |
| 4301 | if (nread < 4 * 1024) | ||
| 4302 | file_size_hint = nread; | ||
| 4303 | else | ||
| 4284 | { | 4304 | { |
| 4285 | off_t tailoff = emacs_fd_lseek (fd, - 3 * 1024, SEEK_END); | 4305 | off_t tailoff = emacs_fd_lseek (fd, - 3 * 1024, SEEK_END); |
| 4286 | if (tailoff < 0) | 4306 | if (tailoff < 0) |
| @@ -4289,33 +4309,26 @@ by calling `format-decode', which see. */) | |||
| 4289 | tailoff = nread; | 4309 | tailoff = nread; |
| 4290 | } | 4310 | } |
| 4291 | 4311 | ||
| 4292 | /* When appending the last 3 KiB, read until EOF | 4312 | /* When appending the last 3 KiB, read extra bytes |
| 4293 | without trusting tailoff, as the file may be in | 4313 | without trusting tailoff, as the file may be growing. */ |
| 4294 | /proc or be mutating. */ | 4314 | nread = emacs_full_read (fd, read_buf + 1024, |
| 4295 | nread = 1024; | 4315 | sizeof read_buf - 1024); |
| 4296 | for (;;) | 4316 | if (nread == sizeof read_buf - 1024) |
| 4297 | { | 4317 | { |
| 4298 | ptrdiff_t r = emacs_fd_read (fd, read_buf + nread, | 4318 | /* Give up reading the last 3 KiB; the file is |
| 4299 | sizeof read_buf - nread); | 4319 | growing too rapidly. */ |
| 4300 | if (r <= 0) | 4320 | nread = 1024; |
| 4301 | { | 4321 | } |
| 4302 | if (r < 0) | 4322 | else if (0 <= nread) |
| 4303 | nread = r; | 4323 | { |
| 4304 | else | 4324 | file_size_hint = tailoff + nread; |
| 4305 | file_size_hint = tailoff; | 4325 | nread += 1024; |
| 4306 | break; | ||
| 4307 | } | ||
| 4308 | tailoff += r; | ||
| 4309 | nread += r; | ||
| 4310 | bool eof = nread < sizeof read_buf; | ||
| 4311 | if (4 * 1024 < nread) | 4326 | if (4 * 1024 < nread) |
| 4312 | { | 4327 | { |
| 4313 | memmove (read_buf + 1024, | 4328 | memmove (read_buf + 1024, |
| 4314 | read_buf + nread - 3 * 1024, 3 * 1024); | 4329 | read_buf + nread - 3 * 1024, 3 * 1024); |
| 4315 | nread = 4 * 1024; | 4330 | nread = 4 * 1024; |
| 4316 | } | 4331 | } |
| 4317 | if (eof) | ||
| 4318 | break; | ||
| 4319 | } | 4332 | } |
| 4320 | } | 4333 | } |
| 4321 | 4334 | ||
| @@ -4427,12 +4440,41 @@ by calling `format-decode', which see. */) | |||
| 4427 | bytes_to_read = min (bytes_to_read, end_offset - curpos); | 4440 | bytes_to_read = min (bytes_to_read, end_offset - curpos); |
| 4428 | ptrdiff_t nread = (bytes_to_read <= 0 | 4441 | ptrdiff_t nread = (bytes_to_read <= 0 |
| 4429 | ? 0 | 4442 | ? 0 |
| 4430 | : emacs_fd_read (fd, read_buf, bytes_to_read)); | 4443 | : emacs_full_read (fd, read_buf, bytes_to_read)); |
| 4431 | if (nread < 0) | 4444 | if (nread < 0) |
| 4432 | report_file_error ("Read error", orig_filename); | 4445 | report_file_error ("Read error", orig_filename); |
| 4433 | else if (nread == 0) | 4446 | |
| 4447 | if (0 < nread) | ||
| 4448 | { | ||
| 4449 | if (CODING_REQUIRE_DETECTION (&coding)) | ||
| 4450 | { | ||
| 4451 | coding_system | ||
| 4452 | = detect_coding_system ((unsigned char *) read_buf, | ||
| 4453 | nread, nread, 1, 0, | ||
| 4454 | coding_system); | ||
| 4455 | setup_coding_system (coding_system, &coding); | ||
| 4456 | } | ||
| 4457 | |||
| 4458 | if (CODING_REQUIRE_DECODING (&coding)) | ||
| 4459 | /* We found that the file should be decoded somehow. | ||
| 4460 | Let's give up here. */ | ||
| 4461 | { | ||
| 4462 | giveup_match_end = true; | ||
| 4463 | break; | ||
| 4464 | } | ||
| 4465 | |||
| 4466 | ptrdiff_t bufpos = 0; | ||
| 4467 | while (bufpos < nread && same_at_start < same_at_end | ||
| 4468 | && FETCH_BYTE (same_at_start) == read_buf[bufpos]) | ||
| 4469 | same_at_start++, bufpos++; | ||
| 4470 | /* If we found a discrepancy, stop the scan. */ | ||
| 4471 | if (bufpos != nread) | ||
| 4472 | break; | ||
| 4473 | } | ||
| 4474 | |||
| 4475 | if (nread < bytes_to_read) | ||
| 4434 | { | 4476 | { |
| 4435 | file_size_hint = curpos; | 4477 | file_size_hint = curpos + nread; |
| 4436 | 4478 | ||
| 4437 | /* Data inserted from the file match the buffer's leading bytes, | 4479 | /* Data inserted from the file match the buffer's leading bytes, |
| 4438 | so there's no need to replace anything. */ | 4480 | so there's no need to replace anything. */ |
| @@ -4443,31 +4485,6 @@ by calling `format-decode', which see. */) | |||
| 4443 | del_range_byte (same_at_start, same_at_end); | 4485 | del_range_byte (same_at_start, same_at_end); |
| 4444 | goto handled; | 4486 | goto handled; |
| 4445 | } | 4487 | } |
| 4446 | |||
| 4447 | if (CODING_REQUIRE_DETECTION (&coding)) | ||
| 4448 | { | ||
| 4449 | coding_system = detect_coding_system ((unsigned char *) read_buf, | ||
| 4450 | nread, nread, 1, 0, | ||
| 4451 | coding_system); | ||
| 4452 | setup_coding_system (coding_system, &coding); | ||
| 4453 | } | ||
| 4454 | |||
| 4455 | if (CODING_REQUIRE_DECODING (&coding)) | ||
| 4456 | /* We found that the file should be decoded somehow. | ||
| 4457 | Let's give up here. */ | ||
| 4458 | { | ||
| 4459 | giveup_match_end = true; | ||
| 4460 | break; | ||
| 4461 | } | ||
| 4462 | |||
| 4463 | int bufpos = 0; | ||
| 4464 | while (bufpos < nread && same_at_start < same_at_end | ||
| 4465 | && FETCH_BYTE (same_at_start) == read_buf[bufpos]) | ||
| 4466 | same_at_start++, bufpos++; | ||
| 4467 | /* If we found a discrepancy, stop the scan. | ||
| 4468 | Otherwise loop around and scan the next bufferful. */ | ||
| 4469 | if (bufpos != nread) | ||
| 4470 | break; | ||
| 4471 | } | 4488 | } |
| 4472 | 4489 | ||
| 4473 | /* Count how many chars at the end of the file | 4490 | /* Count how many chars at the end of the file |
| @@ -4488,11 +4505,11 @@ by calling `format-decode', which see. */) | |||
| 4488 | { | 4505 | { |
| 4489 | /* Check that read reports EOF soon, to catch platforms | 4506 | /* Check that read reports EOF soon, to catch platforms |
| 4490 | where SEEK_END can report wildly small offsets. */ | 4507 | where SEEK_END can report wildly small offsets. */ |
| 4491 | ptrdiff_t n = emacs_fd_read (fd, read_buf, sizeof read_buf); | 4508 | ptrdiff_t n = emacs_full_read (fd, read_buf, sizeof read_buf); |
| 4492 | if (n < 0) | 4509 | if (n < 0) |
| 4493 | report_file_error ("Read error", orig_filename); | 4510 | report_file_error ("Read error", orig_filename); |
| 4494 | endpos += n; | 4511 | endpos += n; |
| 4495 | giveup_match_end = 0 < n; | 4512 | giveup_match_end = n == sizeof read_buf; |
| 4496 | if (!giveup_match_end) | 4513 | if (!giveup_match_end) |
| 4497 | file_size_hint = endpos; | 4514 | file_size_hint = endpos; |
| 4498 | } | 4515 | } |
| @@ -4501,7 +4518,7 @@ by calling `format-decode', which see. */) | |||
| 4501 | 4518 | ||
| 4502 | while (!giveup_match_end) | 4519 | while (!giveup_match_end) |
| 4503 | { | 4520 | { |
| 4504 | ptrdiff_t total_read, nread, bufpos, trial; | 4521 | ptrdiff_t nread, bufpos, trial; |
| 4505 | off_t curpos; | 4522 | off_t curpos; |
| 4506 | 4523 | ||
| 4507 | /* At what file position are we now scanning? */ | 4524 | /* At what file position are we now scanning? */ |
| @@ -4511,32 +4528,24 @@ by calling `format-decode', which see. */) | |||
| 4511 | break; | 4528 | break; |
| 4512 | /* How much can we scan in the next step? */ | 4529 | /* How much can we scan in the next step? */ |
| 4513 | trial = min (curpos, sizeof read_buf); | 4530 | trial = min (curpos, sizeof read_buf); |
| 4514 | if (emacs_fd_lseek (fd, curpos - trial, SEEK_SET) < 0) | 4531 | curpos = emacs_fd_lseek (fd, curpos - trial, SEEK_SET); |
| 4532 | if (curpos < 0) | ||
| 4515 | report_file_error ("Setting file position", orig_filename); | 4533 | report_file_error ("Setting file position", orig_filename); |
| 4516 | 4534 | ||
| 4517 | total_read = nread = 0; | 4535 | nread = emacs_full_read (fd, read_buf, trial); |
| 4518 | while (total_read < trial) | 4536 | if (nread < trial) |
| 4519 | { | 4537 | { |
| 4520 | nread = emacs_fd_read (fd, read_buf + total_read, | ||
| 4521 | trial - total_read); | ||
| 4522 | if (nread < 0) | 4538 | if (nread < 0) |
| 4523 | report_file_error ("Read error", orig_filename); | 4539 | report_file_error ("Read error", orig_filename); |
| 4524 | else if (nread == 0) | 4540 | /* The file unexpectedly shrank. */ |
| 4525 | { | 4541 | file_size_hint = curpos + nread; |
| 4526 | /* The file unexpectedly shrank. */ | 4542 | giveup_match_end = true; |
| 4527 | file_size_hint = curpos - trial + total_read; | 4543 | break; |
| 4528 | giveup_match_end = true; | ||
| 4529 | break; | ||
| 4530 | } | ||
| 4531 | total_read += nread; | ||
| 4532 | } | 4544 | } |
| 4533 | 4545 | ||
| 4534 | if (giveup_match_end) | ||
| 4535 | break; | ||
| 4536 | |||
| 4537 | /* Scan this bufferful from the end, comparing with | 4546 | /* Scan this bufferful from the end, comparing with |
| 4538 | the Emacs buffer. */ | 4547 | the Emacs buffer. */ |
| 4539 | bufpos = total_read; | 4548 | bufpos = nread; |
| 4540 | 4549 | ||
| 4541 | /* Compare with same_at_start to avoid counting some buffer text | 4550 | /* Compare with same_at_start to avoid counting some buffer text |
| 4542 | as matching both at the file's beginning and at the end. */ | 4551 | as matching both at the file's beginning and at the end. */ |
| @@ -4557,9 +4566,6 @@ by calling `format-decode', which see. */) | |||
| 4557 | giveup_match_end = true; | 4566 | giveup_match_end = true; |
| 4558 | break; | 4567 | break; |
| 4559 | } | 4568 | } |
| 4560 | |||
| 4561 | if (nread == 0) | ||
| 4562 | break; | ||
| 4563 | } | 4569 | } |
| 4564 | 4570 | ||
| 4565 | if (! giveup_match_end) | 4571 | if (! giveup_match_end) |
| @@ -4658,9 +4664,11 @@ by calling `format-decode', which see. */) | |||
| 4658 | /* Read one buffer a time, to allow | 4664 | /* Read one buffer a time, to allow |
| 4659 | quitting while reading a huge file. */ | 4665 | quitting while reading a huge file. */ |
| 4660 | 4666 | ||
| 4661 | this = emacs_fd_read (fd, read_buf + unprocessed, | 4667 | ptrdiff_t trial = sizeof read_buf - unprocessed; |
| 4662 | sizeof read_buf - unprocessed); | 4668 | this = emacs_full_read (fd, read_buf + unprocessed, trial); |
| 4663 | if (this <= 0) | 4669 | if (this < 0) |
| 4670 | report_file_error ("Read error", orig_filename); | ||
| 4671 | if (this == 0) | ||
| 4664 | break; | 4672 | break; |
| 4665 | 4673 | ||
| 4666 | file_size_hint += this; | 4674 | file_size_hint += this; |
| @@ -4671,10 +4679,10 @@ by calling `format-decode', which see. */) | |||
| 4671 | unprocessed = coding.carryover_bytes; | 4679 | unprocessed = coding.carryover_bytes; |
| 4672 | if (coding.carryover_bytes > 0) | 4680 | if (coding.carryover_bytes > 0) |
| 4673 | memcpy (read_buf, coding.carryover, unprocessed); | 4681 | memcpy (read_buf, coding.carryover, unprocessed); |
| 4682 | if (this < trial) | ||
| 4683 | break; | ||
| 4674 | } | 4684 | } |
| 4675 | 4685 | ||
| 4676 | if (this < 0) | ||
| 4677 | report_file_error ("Read error", orig_filename); | ||
| 4678 | emacs_fd_close (fd); | 4686 | emacs_fd_close (fd); |
| 4679 | clear_unwind_protect (fd_index); | 4687 | clear_unwind_protect (fd_index); |
| 4680 | 4688 | ||