aboutsummaryrefslogtreecommitdiffstats
path: root/src/fileio.c
diff options
context:
space:
mode:
authorPaul Eggert2011-09-02 22:23:17 -0700
committerPaul Eggert2011-09-02 22:23:17 -0700
commit728f8f0a00a22e544789952c76f9eac0c3f8b2c6 (patch)
tree15415449894cb74cc1f09c1fe5a7bc3869a17bf2 /src/fileio.c
parent9af32a1a25d9619e17abfb8a21b2938017cf56b3 (diff)
downloademacs-728f8f0a00a22e544789952c76f9eac0c3f8b2c6.tar.gz
emacs-728f8f0a00a22e544789952c76f9eac0c3f8b2c6.zip
* fileio.c: Fix bugs with large file offsets.
The previous code assumed that file offsets (off_t values) fit in EMACS_INT variables, which is not true on typical 32-bit hosts. The code messed up by falsely reporting buffer overflow in cases such as (insert-file-contents "big" nil 1 2) into an empty buffer when "big" contains more than 2**29 bytes, even though this inserts just one byte and does not overflow the buffer. (Finsert_file_contents): Store file offsets as off_t values, not as EMACS_INT values. Check for overflow when converting between EMACS_INT and off_t. When checking for buffer overflow or for overlap, take the offsets into account. Don't use EMACS_INT for small values where int suffices. When checking for overlap, fix a typo: ZV was used where ZV_BYTE was intended. (Fwrite_region): Don't assume off_t fits into 'long'. * buffer.h (struct buffer.modtime_size): Now off_t, not EMACS_INT.
Diffstat (limited to 'src/fileio.c')
-rw-r--r--src/fileio.c92
1 files changed, 62 insertions, 30 deletions
diff --git a/src/fileio.c b/src/fileio.c
index 60ee35bb399..fe0fb593208 100644
--- a/src/fileio.c
+++ b/src/fileio.c
@@ -3179,6 +3179,7 @@ variable `last-coding-system-used' to the coding system actually used. */)
3179 EMACS_INT inserted = 0; 3179 EMACS_INT inserted = 0;
3180 int nochange = 0; 3180 int nochange = 0;
3181 register EMACS_INT how_much; 3181 register EMACS_INT how_much;
3182 off_t beg_offset, end_offset;
3182 register EMACS_INT unprocessed; 3183 register EMACS_INT unprocessed;
3183 int count = SPECPDL_INDEX (); 3184 int count = SPECPDL_INDEX ();
3184 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5; 3185 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
@@ -3284,15 +3285,6 @@ variable `last-coding-system-used' to the coding system actually used. */)
3284 record_unwind_protect (close_file_unwind, make_number (fd)); 3285 record_unwind_protect (close_file_unwind, make_number (fd));
3285 3286
3286 3287
3287 /* Check whether the size is too large or negative, which can happen on a
3288 platform that allows file sizes greater than the maximum off_t value. */
3289 if (! not_regular
3290 && ! (0 <= st.st_size && st.st_size <= BUF_BYTES_MAX))
3291 buffer_overflow ();
3292
3293 /* Prevent redisplay optimizations. */
3294 current_buffer->clip_changed = 1;
3295
3296 if (!NILP (visit)) 3288 if (!NILP (visit))
3297 { 3289 {
3298 if (!NILP (beg) || !NILP (end)) 3290 if (!NILP (beg) || !NILP (end))
@@ -3302,26 +3294,64 @@ variable `last-coding-system-used' to the coding system actually used. */)
3302 } 3294 }
3303 3295
3304 if (!NILP (beg)) 3296 if (!NILP (beg))
3305 CHECK_NUMBER (beg); 3297 {
3298 if (! (RANGED_INTEGERP (0, beg, TYPE_MAXIMUM (off_t))))
3299 wrong_type_argument (intern ("file-offset"), beg);
3300 beg_offset = XFASTINT (beg);
3301 }
3306 else 3302 else
3307 XSETFASTINT (beg, 0); 3303 beg_offset = 0;
3308 3304
3309 if (!NILP (end)) 3305 if (!NILP (end))
3310 CHECK_NUMBER (end); 3306 {
3307 if (! (RANGED_INTEGERP (0, end, TYPE_MAXIMUM (off_t))))
3308 wrong_type_argument (intern ("file-offset"), end);
3309 end_offset = XFASTINT (end);
3310 }
3311 else 3311 else
3312 { 3312 {
3313 if (! not_regular) 3313 if (not_regular)
3314 end_offset = TYPE_MAXIMUM (off_t);
3315 else
3314 { 3316 {
3315 XSETINT (end, st.st_size); 3317 end_offset = st.st_size;
3318
3319 /* A negative size can happen on a platform that allows file
3320 sizes greater than the maximum off_t value. */
3321 if (end_offset < 0)
3322 buffer_overflow ();
3316 3323
3317 /* The file size returned from stat may be zero, but data 3324 /* The file size returned from stat may be zero, but data
3318 may be readable nonetheless, for example when this is a 3325 may be readable nonetheless, for example when this is a
3319 file in the /proc filesystem. */ 3326 file in the /proc filesystem. */
3320 if (st.st_size == 0) 3327 if (end_offset == 0)
3321 XSETINT (end, READ_BUF_SIZE); 3328 end_offset = READ_BUF_SIZE;
3322 } 3329 }
3323 } 3330 }
3324 3331
3332 /* Check now whether the buffer will become too large,
3333 in the likely case where the file's length is not changing.
3334 This saves a lot of needless work before a buffer overflow. */
3335 if (! not_regular)
3336 {
3337 /* The likely offset where we will stop reading. We could read
3338 more (or less), if the file grows (or shrinks) as we read it. */
3339 off_t likely_end = min (end_offset, st.st_size);
3340
3341 if (beg_offset < likely_end)
3342 {
3343 ptrdiff_t buf_bytes =
3344 Z_BYTE - (!NILP (replace) ? ZV_BYTE - BEGV_BYTE : 0);
3345 ptrdiff_t buf_growth_max = BUF_BYTES_MAX - buf_bytes;
3346 off_t likely_growth = likely_end - beg_offset;
3347 if (buf_growth_max < likely_growth)
3348 buffer_overflow ();
3349 }
3350 }
3351
3352 /* Prevent redisplay optimizations. */
3353 current_buffer->clip_changed = 1;
3354
3325 if (EQ (Vcoding_system_for_read, Qauto_save_coding)) 3355 if (EQ (Vcoding_system_for_read, Qauto_save_coding))
3326 { 3356 {
3327 coding_system = coding_inherit_eol_type (Qutf_8_emacs, Qunix); 3357 coding_system = coding_inherit_eol_type (Qutf_8_emacs, Qunix);
@@ -3465,9 +3495,9 @@ variable `last-coding-system-used' to the coding system actually used. */)
3465 give up on handling REPLACE in the optimized way. */ 3495 give up on handling REPLACE in the optimized way. */
3466 int giveup_match_end = 0; 3496 int giveup_match_end = 0;
3467 3497
3468 if (XINT (beg) != 0) 3498 if (beg_offset != 0)
3469 { 3499 {
3470 if (emacs_lseek (fd, XINT (beg), SEEK_SET) < 0) 3500 if (lseek (fd, beg_offset, SEEK_SET) < 0)
3471 report_file_error ("Setting file position", 3501 report_file_error ("Setting file position",
3472 Fcons (orig_filename, Qnil)); 3502 Fcons (orig_filename, Qnil));
3473 } 3503 }
@@ -3515,7 +3545,7 @@ variable `last-coding-system-used' to the coding system actually used. */)
3515 immediate_quit = 0; 3545 immediate_quit = 0;
3516 /* If the file matches the buffer completely, 3546 /* If the file matches the buffer completely,
3517 there's no need to replace anything. */ 3547 there's no need to replace anything. */
3518 if (same_at_start - BEGV_BYTE == XINT (end)) 3548 if (same_at_start - BEGV_BYTE == end_offset)
3519 { 3549 {
3520 emacs_close (fd); 3550 emacs_close (fd);
3521 specpdl_ptr--; 3551 specpdl_ptr--;
@@ -3530,16 +3560,17 @@ variable `last-coding-system-used' to the coding system actually used. */)
3530 already found that decoding is necessary, don't waste time. */ 3560 already found that decoding is necessary, don't waste time. */
3531 while (!giveup_match_end) 3561 while (!giveup_match_end)
3532 { 3562 {
3533 EMACS_INT total_read, nread, bufpos, curpos, trial; 3563 int total_read, nread, bufpos, trial;
3564 off_t curpos;
3534 3565
3535 /* At what file position are we now scanning? */ 3566 /* At what file position are we now scanning? */
3536 curpos = XINT (end) - (ZV_BYTE - same_at_end); 3567 curpos = end_offset - (ZV_BYTE - same_at_end);
3537 /* If the entire file matches the buffer tail, stop the scan. */ 3568 /* If the entire file matches the buffer tail, stop the scan. */
3538 if (curpos == 0) 3569 if (curpos == 0)
3539 break; 3570 break;
3540 /* How much can we scan in the next step? */ 3571 /* How much can we scan in the next step? */
3541 trial = min (curpos, sizeof buffer); 3572 trial = min (curpos, sizeof buffer);
3542 if (emacs_lseek (fd, curpos - trial, SEEK_SET) < 0) 3573 if (lseek (fd, curpos - trial, SEEK_SET) < 0)
3543 report_file_error ("Setting file position", 3574 report_file_error ("Setting file position",
3544 Fcons (orig_filename, Qnil)); 3575 Fcons (orig_filename, Qnil));
3545 3576
@@ -3606,13 +3637,14 @@ variable `last-coding-system-used' to the coding system actually used. */)
3606 3637
3607 /* Don't try to reuse the same piece of text twice. */ 3638 /* Don't try to reuse the same piece of text twice. */
3608 overlap = (same_at_start - BEGV_BYTE 3639 overlap = (same_at_start - BEGV_BYTE
3609 - (same_at_end + st.st_size - ZV)); 3640 - (same_at_end
3641 + (! NILP (end) ? end_offset : st.st_size) - ZV_BYTE));
3610 if (overlap > 0) 3642 if (overlap > 0)
3611 same_at_end += overlap; 3643 same_at_end += overlap;
3612 3644
3613 /* Arrange to read only the nonmatching middle part of the file. */ 3645 /* Arrange to read only the nonmatching middle part of the file. */
3614 XSETFASTINT (beg, XINT (beg) + (same_at_start - BEGV_BYTE)); 3646 beg_offset += same_at_start - BEGV_BYTE;
3615 XSETFASTINT (end, XINT (end) - (ZV_BYTE - same_at_end)); 3647 end_offset -= ZV_BYTE - same_at_end;
3616 3648
3617 del_range_byte (same_at_start, same_at_end, 0); 3649 del_range_byte (same_at_start, same_at_end, 0);
3618 /* Insert from the file at the proper position. */ 3650 /* Insert from the file at the proper position. */
@@ -3657,7 +3689,7 @@ variable `last-coding-system-used' to the coding system actually used. */)
3657 /* First read the whole file, performing code conversion into 3689 /* First read the whole file, performing code conversion into
3658 CONVERSION_BUFFER. */ 3690 CONVERSION_BUFFER. */
3659 3691
3660 if (emacs_lseek (fd, XINT (beg), SEEK_SET) < 0) 3692 if (lseek (fd, beg_offset, SEEK_SET) < 0)
3661 report_file_error ("Setting file position", 3693 report_file_error ("Setting file position",
3662 Fcons (orig_filename, Qnil)); 3694 Fcons (orig_filename, Qnil));
3663 3695
@@ -3824,7 +3856,7 @@ variable `last-coding-system-used' to the coding system actually used. */)
3824 } 3856 }
3825 3857
3826 if (! not_regular) 3858 if (! not_regular)
3827 total = XINT (end) - XINT (beg); 3859 total = end_offset - beg_offset;
3828 else 3860 else
3829 /* For a special file, all we can do is guess. */ 3861 /* For a special file, all we can do is guess. */
3830 total = READ_BUF_SIZE; 3862 total = READ_BUF_SIZE;
@@ -3845,9 +3877,9 @@ variable `last-coding-system-used' to the coding system actually used. */)
3845 if (GAP_SIZE < total) 3877 if (GAP_SIZE < total)
3846 make_gap (total - GAP_SIZE); 3878 make_gap (total - GAP_SIZE);
3847 3879
3848 if (XINT (beg) != 0 || !NILP (replace)) 3880 if (beg_offset != 0 || !NILP (replace))
3849 { 3881 {
3850 if (emacs_lseek (fd, XINT (beg), SEEK_SET) < 0) 3882 if (lseek (fd, beg_offset, SEEK_SET) < 0)
3851 report_file_error ("Setting file position", 3883 report_file_error ("Setting file position",
3852 Fcons (orig_filename, Qnil)); 3884 Fcons (orig_filename, Qnil));
3853 } 3885 }
@@ -4576,7 +4608,7 @@ This calls `write-region-annotate-functions' at the start, and
4576 4608
4577 if (!NILP (append) && !NILP (Ffile_regular_p (filename))) 4609 if (!NILP (append) && !NILP (Ffile_regular_p (filename)))
4578 { 4610 {
4579 long ret; 4611 off_t ret;
4580 4612
4581 if (NUMBERP (append)) 4613 if (NUMBERP (append))
4582 ret = emacs_lseek (desc, XINT (append), SEEK_CUR); 4614 ret = emacs_lseek (desc, XINT (append), SEEK_CUR);