aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Eggert2011-09-02 22:23:17 -0700
committerPaul Eggert2011-09-02 22:23:17 -0700
commit728f8f0a00a22e544789952c76f9eac0c3f8b2c6 (patch)
tree15415449894cb74cc1f09c1fe5a7bc3869a17bf2
parent9af32a1a25d9619e17abfb8a21b2938017cf56b3 (diff)
downloademacs-728f8f0a00a22e544789952c76f9eac0c3f8b2c6.tar.gz
emacs-728f8f0a00a22e544789952c76f9eac0c3f8b2c6.zip
* fileio.c: Fix bugs with large file offsets.
The previous code assumed that file offsets (off_t values) fit in EMACS_INT variables, which is not true on typical 32-bit hosts. The code messed up by falsely reporting buffer overflow in cases such as (insert-file-contents "big" nil 1 2) into an empty buffer when "big" contains more than 2**29 bytes, even though this inserts just one byte and does not overflow the buffer. (Finsert_file_contents): Store file offsets as off_t values, not as EMACS_INT values. Check for overflow when converting between EMACS_INT and off_t. When checking for buffer overflow or for overlap, take the offsets into account. Don't use EMACS_INT for small values where int suffices. When checking for overlap, fix a typo: ZV was used where ZV_BYTE was intended. (Fwrite_region): Don't assume off_t fits into 'long'. * buffer.h (struct buffer.modtime_size): Now off_t, not EMACS_INT.
-rw-r--r--src/ChangeLog19
-rw-r--r--src/buffer.h2
-rw-r--r--src/fileio.c92
3 files changed, 82 insertions, 31 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index 52ec796d6cf..c80a12c1c00 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,22 @@
12011-09-03 Paul Eggert <eggert@cs.ucla.edu>
2
3 * fileio.c: Fix bugs with large file offsets.
4 The previous code assumed that file offsets (off_t values) fit in
5 EMACS_INT variables, which is not true on typical 32-bit hosts.
6 The code messed up by falsely reporting buffer overflow in cases
7 such as (insert-file-contents "big" nil 1 2) into an empty buffer
8 when "big" contains more than 2**29 bytes, even though this
9 inserts just one byte and does not overflow the buffer.
10 (Finsert_file_contents): Store file offsets as off_t
11 values, not as EMACS_INT values. Check for overflow when
12 converting between EMACS_INT and off_t. When checking for
13 buffer overflow or for overlap, take the offsets into account.
14 Don't use EMACS_INT for small values where int suffices.
15 When checking for overlap, fix a typo: ZV was used where
16 ZV_BYTE was intended.
17 (Fwrite_region): Don't assume off_t fits into 'long'.
18 * buffer.h (struct buffer.modtime_size): Now off_t, not EMACS_INT.
19
12011-08-30 Chong Yidong <cyd@stupidchicken.com> 202011-08-30 Chong Yidong <cyd@stupidchicken.com>
2 21
3 * syntax.c (find_defun_start): Update all cache variables if 22 * syntax.c (find_defun_start): Update all cache variables if
diff --git a/src/buffer.h b/src/buffer.h
index 06864dd5789..c50cfe56c77 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -559,7 +559,7 @@ struct buffer
559 is still the same (since it's rounded up to seconds) but we're actually 559 is still the same (since it's rounded up to seconds) but we're actually
560 not up-to-date. -1 means the size is unknown. Only meaningful if 560 not up-to-date. -1 means the size is unknown. Only meaningful if
561 modtime is actually set. */ 561 modtime is actually set. */
562 EMACS_INT modtime_size; 562 off_t modtime_size;
563 /* The value of text->modiff at the last auto-save. */ 563 /* The value of text->modiff at the last auto-save. */
564 int auto_save_modified; 564 int auto_save_modified;
565 /* The value of text->modiff at the last display error. 565 /* The value of text->modiff at the last display error.
diff --git a/src/fileio.c b/src/fileio.c
index 60ee35bb399..fe0fb593208 100644
--- a/src/fileio.c
+++ b/src/fileio.c
@@ -3179,6 +3179,7 @@ variable `last-coding-system-used' to the coding system actually used. */)
3179 EMACS_INT inserted = 0; 3179 EMACS_INT inserted = 0;
3180 int nochange = 0; 3180 int nochange = 0;
3181 register EMACS_INT how_much; 3181 register EMACS_INT how_much;
3182 off_t beg_offset, end_offset;
3182 register EMACS_INT unprocessed; 3183 register EMACS_INT unprocessed;
3183 int count = SPECPDL_INDEX (); 3184 int count = SPECPDL_INDEX ();
3184 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5; 3185 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
@@ -3284,15 +3285,6 @@ variable `last-coding-system-used' to the coding system actually used. */)
3284 record_unwind_protect (close_file_unwind, make_number (fd)); 3285 record_unwind_protect (close_file_unwind, make_number (fd));
3285 3286
3286 3287
3287 /* Check whether the size is too large or negative, which can happen on a
3288 platform that allows file sizes greater than the maximum off_t value. */
3289 if (! not_regular
3290 && ! (0 <= st.st_size && st.st_size <= BUF_BYTES_MAX))
3291 buffer_overflow ();
3292
3293 /* Prevent redisplay optimizations. */
3294 current_buffer->clip_changed = 1;
3295
3296 if (!NILP (visit)) 3288 if (!NILP (visit))
3297 { 3289 {
3298 if (!NILP (beg) || !NILP (end)) 3290 if (!NILP (beg) || !NILP (end))
@@ -3302,26 +3294,64 @@ variable `last-coding-system-used' to the coding system actually used. */)
3302 } 3294 }
3303 3295
3304 if (!NILP (beg)) 3296 if (!NILP (beg))
3305 CHECK_NUMBER (beg); 3297 {
3298 if (! (RANGED_INTEGERP (0, beg, TYPE_MAXIMUM (off_t))))
3299 wrong_type_argument (intern ("file-offset"), beg);
3300 beg_offset = XFASTINT (beg);
3301 }
3306 else 3302 else
3307 XSETFASTINT (beg, 0); 3303 beg_offset = 0;
3308 3304
3309 if (!NILP (end)) 3305 if (!NILP (end))
3310 CHECK_NUMBER (end); 3306 {
3307 if (! (RANGED_INTEGERP (0, end, TYPE_MAXIMUM (off_t))))
3308 wrong_type_argument (intern ("file-offset"), end);
3309 end_offset = XFASTINT (end);
3310 }
3311 else 3311 else
3312 { 3312 {
3313 if (! not_regular) 3313 if (not_regular)
3314 end_offset = TYPE_MAXIMUM (off_t);
3315 else
3314 { 3316 {
3315 XSETINT (end, st.st_size); 3317 end_offset = st.st_size;
3318
3319 /* A negative size can happen on a platform that allows file
3320 sizes greater than the maximum off_t value. */
3321 if (end_offset < 0)
3322 buffer_overflow ();
3316 3323
3317 /* The file size returned from stat may be zero, but data 3324 /* The file size returned from stat may be zero, but data
3318 may be readable nonetheless, for example when this is a 3325 may be readable nonetheless, for example when this is a
3319 file in the /proc filesystem. */ 3326 file in the /proc filesystem. */
3320 if (st.st_size == 0) 3327 if (end_offset == 0)
3321 XSETINT (end, READ_BUF_SIZE); 3328 end_offset = READ_BUF_SIZE;
3322 } 3329 }
3323 } 3330 }
3324 3331
3332 /* Check now whether the buffer will become too large,
3333 in the likely case where the file's length is not changing.
3334 This saves a lot of needless work before a buffer overflow. */
3335 if (! not_regular)
3336 {
3337 /* The likely offset where we will stop reading. We could read
3338 more (or less), if the file grows (or shrinks) as we read it. */
3339 off_t likely_end = min (end_offset, st.st_size);
3340
3341 if (beg_offset < likely_end)
3342 {
3343 ptrdiff_t buf_bytes =
3344 Z_BYTE - (!NILP (replace) ? ZV_BYTE - BEGV_BYTE : 0);
3345 ptrdiff_t buf_growth_max = BUF_BYTES_MAX - buf_bytes;
3346 off_t likely_growth = likely_end - beg_offset;
3347 if (buf_growth_max < likely_growth)
3348 buffer_overflow ();
3349 }
3350 }
3351
3352 /* Prevent redisplay optimizations. */
3353 current_buffer->clip_changed = 1;
3354
3325 if (EQ (Vcoding_system_for_read, Qauto_save_coding)) 3355 if (EQ (Vcoding_system_for_read, Qauto_save_coding))
3326 { 3356 {
3327 coding_system = coding_inherit_eol_type (Qutf_8_emacs, Qunix); 3357 coding_system = coding_inherit_eol_type (Qutf_8_emacs, Qunix);
@@ -3465,9 +3495,9 @@ variable `last-coding-system-used' to the coding system actually used. */)
3465 give up on handling REPLACE in the optimized way. */ 3495 give up on handling REPLACE in the optimized way. */
3466 int giveup_match_end = 0; 3496 int giveup_match_end = 0;
3467 3497
3468 if (XINT (beg) != 0) 3498 if (beg_offset != 0)
3469 { 3499 {
3470 if (emacs_lseek (fd, XINT (beg), SEEK_SET) < 0) 3500 if (lseek (fd, beg_offset, SEEK_SET) < 0)
3471 report_file_error ("Setting file position", 3501 report_file_error ("Setting file position",
3472 Fcons (orig_filename, Qnil)); 3502 Fcons (orig_filename, Qnil));
3473 } 3503 }
@@ -3515,7 +3545,7 @@ variable `last-coding-system-used' to the coding system actually used. */)
3515 immediate_quit = 0; 3545 immediate_quit = 0;
3516 /* If the file matches the buffer completely, 3546 /* If the file matches the buffer completely,
3517 there's no need to replace anything. */ 3547 there's no need to replace anything. */
3518 if (same_at_start - BEGV_BYTE == XINT (end)) 3548 if (same_at_start - BEGV_BYTE == end_offset)
3519 { 3549 {
3520 emacs_close (fd); 3550 emacs_close (fd);
3521 specpdl_ptr--; 3551 specpdl_ptr--;
@@ -3530,16 +3560,17 @@ variable `last-coding-system-used' to the coding system actually used. */)
3530 already found that decoding is necessary, don't waste time. */ 3560 already found that decoding is necessary, don't waste time. */
3531 while (!giveup_match_end) 3561 while (!giveup_match_end)
3532 { 3562 {
3533 EMACS_INT total_read, nread, bufpos, curpos, trial; 3563 int total_read, nread, bufpos, trial;
3564 off_t curpos;
3534 3565
3535 /* At what file position are we now scanning? */ 3566 /* At what file position are we now scanning? */
3536 curpos = XINT (end) - (ZV_BYTE - same_at_end); 3567 curpos = end_offset - (ZV_BYTE - same_at_end);
3537 /* If the entire file matches the buffer tail, stop the scan. */ 3568 /* If the entire file matches the buffer tail, stop the scan. */
3538 if (curpos == 0) 3569 if (curpos == 0)
3539 break; 3570 break;
3540 /* How much can we scan in the next step? */ 3571 /* How much can we scan in the next step? */
3541 trial = min (curpos, sizeof buffer); 3572 trial = min (curpos, sizeof buffer);
3542 if (emacs_lseek (fd, curpos - trial, SEEK_SET) < 0) 3573 if (lseek (fd, curpos - trial, SEEK_SET) < 0)
3543 report_file_error ("Setting file position", 3574 report_file_error ("Setting file position",
3544 Fcons (orig_filename, Qnil)); 3575 Fcons (orig_filename, Qnil));
3545 3576
@@ -3606,13 +3637,14 @@ variable `last-coding-system-used' to the coding system actually used. */)
3606 3637
3607 /* Don't try to reuse the same piece of text twice. */ 3638 /* Don't try to reuse the same piece of text twice. */
3608 overlap = (same_at_start - BEGV_BYTE 3639 overlap = (same_at_start - BEGV_BYTE
3609 - (same_at_end + st.st_size - ZV)); 3640 - (same_at_end
3641 + (! NILP (end) ? end_offset : st.st_size) - ZV_BYTE));
3610 if (overlap > 0) 3642 if (overlap > 0)
3611 same_at_end += overlap; 3643 same_at_end += overlap;
3612 3644
3613 /* Arrange to read only the nonmatching middle part of the file. */ 3645 /* Arrange to read only the nonmatching middle part of the file. */
3614 XSETFASTINT (beg, XINT (beg) + (same_at_start - BEGV_BYTE)); 3646 beg_offset += same_at_start - BEGV_BYTE;
3615 XSETFASTINT (end, XINT (end) - (ZV_BYTE - same_at_end)); 3647 end_offset -= ZV_BYTE - same_at_end;
3616 3648
3617 del_range_byte (same_at_start, same_at_end, 0); 3649 del_range_byte (same_at_start, same_at_end, 0);
3618 /* Insert from the file at the proper position. */ 3650 /* Insert from the file at the proper position. */
@@ -3657,7 +3689,7 @@ variable `last-coding-system-used' to the coding system actually used. */)
3657 /* First read the whole file, performing code conversion into 3689 /* First read the whole file, performing code conversion into
3658 CONVERSION_BUFFER. */ 3690 CONVERSION_BUFFER. */
3659 3691
3660 if (emacs_lseek (fd, XINT (beg), SEEK_SET) < 0) 3692 if (lseek (fd, beg_offset, SEEK_SET) < 0)
3661 report_file_error ("Setting file position", 3693 report_file_error ("Setting file position",
3662 Fcons (orig_filename, Qnil)); 3694 Fcons (orig_filename, Qnil));
3663 3695
@@ -3824,7 +3856,7 @@ variable `last-coding-system-used' to the coding system actually used. */)
3824 } 3856 }
3825 3857
3826 if (! not_regular) 3858 if (! not_regular)
3827 total = XINT (end) - XINT (beg); 3859 total = end_offset - beg_offset;
3828 else 3860 else
3829 /* For a special file, all we can do is guess. */ 3861 /* For a special file, all we can do is guess. */
3830 total = READ_BUF_SIZE; 3862 total = READ_BUF_SIZE;
@@ -3845,9 +3877,9 @@ variable `last-coding-system-used' to the coding system actually used. */)
3845 if (GAP_SIZE < total) 3877 if (GAP_SIZE < total)
3846 make_gap (total - GAP_SIZE); 3878 make_gap (total - GAP_SIZE);
3847 3879
3848 if (XINT (beg) != 0 || !NILP (replace)) 3880 if (beg_offset != 0 || !NILP (replace))
3849 { 3881 {
3850 if (emacs_lseek (fd, XINT (beg), SEEK_SET) < 0) 3882 if (lseek (fd, beg_offset, SEEK_SET) < 0)
3851 report_file_error ("Setting file position", 3883 report_file_error ("Setting file position",
3852 Fcons (orig_filename, Qnil)); 3884 Fcons (orig_filename, Qnil));
3853 } 3885 }
@@ -4576,7 +4608,7 @@ This calls `write-region-annotate-functions' at the start, and
4576 4608
4577 if (!NILP (append) && !NILP (Ffile_regular_p (filename))) 4609 if (!NILP (append) && !NILP (Ffile_regular_p (filename)))
4578 { 4610 {
4579 long ret; 4611 off_t ret;
4580 4612
4581 if (NUMBERP (append)) 4613 if (NUMBERP (append))
4582 ret = emacs_lseek (desc, XINT (append), SEEK_CUR); 4614 ret = emacs_lseek (desc, XINT (append), SEEK_CUR);