diff options
| author | Kenichi Handa | 1998-06-12 01:38:07 +0000 |
|---|---|---|
| committer | Kenichi Handa | 1998-06-12 01:38:07 +0000 |
| commit | b413243374cb897b48e786b4c84f4a39eb18ccef (patch) | |
| tree | 2d7ff08d68e676cf3fe8d3da04e93456600f48c7 | |
| parent | 03c6b7f66dd5a7f467c35470ab2a53a9301677fc (diff) | |
| download | emacs-b413243374cb897b48e786b4c84f4a39eb18ccef.tar.gz emacs-b413243374cb897b48e786b4c84f4a39eb18ccef.zip | |
(set_auto_coding_unwind): New function.
(Finsert_file_contents): If the current buffer is empty, decide
the coding system after the file is inserted in the buffer. If
not, insert the head and tail of a file in a temporary buffer and
call set-auto-coding-function.
(e_write): If there are carryover bytes in encoding because of
incomplete multibyte form, write them out as is.
| -rw-r--r-- | src/fileio.c | 338 |
1 files changed, 189 insertions, 149 deletions
diff --git a/src/fileio.c b/src/fileio.c index 66ac9587342..742c7ad95fe 100644 --- a/src/fileio.c +++ b/src/fileio.c | |||
| @@ -3246,6 +3246,26 @@ Lisp_Object Qfind_buffer_file_type; | |||
| 3246 | #define READ_BUF_SIZE (64 << 10) | 3246 | #define READ_BUF_SIZE (64 << 10) |
| 3247 | #endif | 3247 | #endif |
| 3248 | 3248 | ||
| 3249 | /* This function is called when a function bound to | ||
| 3250 | Vset_auto_coding_function causes some error. At that time, a text | ||
| 3251 | of a file has already been inserted in the current buffer, but, | ||
| 3252 | markers has not yet been adjusted. Thus we must adjust markers | ||
| 3253 | here. We are sure that the buffer was empty before the text of the | ||
| 3254 | file was inserted. */ | ||
| 3255 | |||
| 3256 | static Lisp_Object | ||
| 3257 | set_auto_coding_unwind (multibyte) | ||
| 3258 | Lisp_Object multibyte; | ||
| 3259 | { | ||
| 3260 | int inserted = Z_BYTE - BEG_BYTE; | ||
| 3261 | |||
| 3262 | if (!NILP (multibyte)) | ||
| 3263 | inserted = multibyte_chars_in_text (GPT_ADDR - inserted, inserted); | ||
| 3264 | adjust_after_insert (PT, PT_BYTE, Z, Z_BYTE, inserted); | ||
| 3265 | |||
| 3266 | return Qnil; | ||
| 3267 | } | ||
| 3268 | |||
| 3249 | DEFUN ("insert-file-contents", Finsert_file_contents, Sinsert_file_contents, | 3269 | DEFUN ("insert-file-contents", Finsert_file_contents, Sinsert_file_contents, |
| 3250 | 1, 5, 0, | 3270 | 1, 5, 0, |
| 3251 | "Insert contents of file FILENAME after point.\n\ | 3271 | "Insert contents of file FILENAME after point.\n\ |
| @@ -3290,6 +3310,7 @@ actually used.") | |||
| 3290 | unsigned char buffer[1 << 14]; | 3310 | unsigned char buffer[1 << 14]; |
| 3291 | int replace_handled = 0; | 3311 | int replace_handled = 0; |
| 3292 | int set_coding_system = 0; | 3312 | int set_coding_system = 0; |
| 3313 | int coding_system_decided = 0; | ||
| 3293 | 3314 | ||
| 3294 | if (current_buffer->base_buffer && ! NILP (visit)) | 3315 | if (current_buffer->base_buffer && ! NILP (visit)) |
| 3295 | error ("Cannot do file visiting in an indirect buffer"); | 3316 | error ("Cannot do file visiting in an indirect buffer"); |
| @@ -3392,161 +3413,107 @@ actually used.") | |||
| 3392 | } | 3413 | } |
| 3393 | } | 3414 | } |
| 3394 | 3415 | ||
| 3395 | /* Decide the coding-system of the file. */ | 3416 | if (BEG < Z) |
| 3396 | { | 3417 | { |
| 3397 | Lisp_Object val; | 3418 | /* Decide the coding system to use for reading the file now |
| 3398 | val = Qnil; | 3419 | because we can't use an optimized method for handling |
| 3399 | 3420 | `coding:' tag if the current buffer is not empty. */ | |
| 3400 | if (!NILP (Vcoding_system_for_read)) | 3421 | Lisp_Object val; |
| 3401 | val = Vcoding_system_for_read; | 3422 | val = Qnil; |
| 3402 | else if (! NILP (replace)) | ||
| 3403 | /* In REPLACE mode, we can use the same coding system | ||
| 3404 | that was used to visit the file. */ | ||
| 3405 | val = current_buffer->buffer_file_coding_system; | ||
| 3406 | else if (! not_regular) | ||
| 3407 | { | ||
| 3408 | /* Don't try looking inside a file for a coding system specification | ||
| 3409 | if it is not seekable. */ | ||
| 3410 | if (! NILP (Vset_auto_coding_function)) | ||
| 3411 | { | ||
| 3412 | /* Find a coding system specified in the heading two lines | ||
| 3413 | or in the tailing several lines of the file. We assume | ||
| 3414 | that the 1K-byte and 3K-byte for heading and tailing | ||
| 3415 | respectively are sufficient fot this purpose. */ | ||
| 3416 | int nread; | ||
| 3417 | int beginning_of_end, end_of_beginning; | ||
| 3418 | |||
| 3419 | if (st.st_size <= (1024 * 4)) | ||
| 3420 | { | ||
| 3421 | nread = read (fd, read_buf, 1024 * 4); | ||
| 3422 | end_of_beginning = nread; | ||
| 3423 | beginning_of_end = 0; | ||
| 3424 | } | ||
| 3425 | else | ||
| 3426 | { | ||
| 3427 | nread = read (fd, read_buf, 1024); | ||
| 3428 | end_of_beginning = nread; | ||
| 3429 | beginning_of_end = nread; | ||
| 3430 | if (nread >= 0) | ||
| 3431 | { | ||
| 3432 | if (lseek (fd, st.st_size - (1024 * 3), 0) < 0) | ||
| 3433 | report_file_error ("Setting file position", | ||
| 3434 | Fcons (orig_filename, Qnil)); | ||
| 3435 | nread += read (fd, read_buf + nread, 1024 * 3); | ||
| 3436 | } | ||
| 3437 | } | ||
| 3438 | 3423 | ||
| 3439 | if (nread < 0) | 3424 | if (!NILP (Vcoding_system_for_read)) |
| 3440 | error ("IO error reading %s: %s", | 3425 | val = Vcoding_system_for_read; |
| 3441 | XSTRING (orig_filename)->data, strerror (errno)); | 3426 | else if (! NILP (replace)) |
| 3442 | else if (nread > 0) | 3427 | /* In REPLACE mode, we can use the same coding system |
| 3443 | { | 3428 | that was used to visit the file. */ |
| 3444 | int i; | 3429 | val = current_buffer->buffer_file_coding_system; |
| 3445 | int possible_spec = 0; | 3430 | else |
| 3446 | unsigned char *p, *p1; | 3431 | { |
| 3447 | Lisp_Object tem; | 3432 | /* Don't try looking inside a file for a coding system |
| 3448 | unsigned char *copy = (unsigned char *) alloca (nread + 1); | 3433 | specification if it is not seekable. */ |
| 3449 | 3434 | if (! not_regular && ! NILP (Vset_auto_coding_function)) | |
| 3450 | /* Make a copy of the contents of read_buf in COPY, | 3435 | { |
| 3451 | and convert it to lower case so we can compare | 3436 | /* Find a coding system specified in the heading two |
| 3452 | more efficiently. */ | 3437 | lines or in the tailing several lines of the file. |
| 3453 | bcopy (read_buf, copy, nread); | 3438 | We assume that the 1K-byte and 3K-byte for heading |
| 3454 | for (i = 0; i < nread; i++) | 3439 | and tailing respectively are sufficient fot this |
| 3455 | copy[i] = DOWNCASE (copy[i]); | 3440 | purpose. */ |
| 3456 | /* Ensure various comparisons fail at end of data. */ | 3441 | int how_many, nread; |
| 3457 | copy[nread] = 0; | 3442 | |
| 3458 | 3443 | if (st.st_size <= (1024 * 4)) | |
| 3459 | /* Now test quickly whether the file contains a -*- line. */ | 3444 | nread = read (fd, read_buf, 1024 * 4); |
| 3460 | p = copy; | 3445 | else |
| 3461 | while (*p != '\n' && p - copy < end_of_beginning) | 3446 | { |
| 3462 | p++; | 3447 | nread = read (fd, read_buf, 1024); |
| 3463 | if (copy[0] == '#' && copy[1] == '!') | 3448 | if (nread >= 0) |
| 3464 | while (*p != '\n' && p - copy < end_of_beginning) | 3449 | { |
| 3465 | p++; | 3450 | if (lseek (fd, st.st_size - (1024 * 3), 0) < 0) |
| 3466 | p1 = copy; | 3451 | report_file_error ("Setting file position", |
| 3467 | while (p - p1 >= 3) | 3452 | Fcons (orig_filename, Qnil)); |
| 3468 | { | 3453 | nread += read (fd, read_buf + nread, 1024 * 3); |
| 3469 | if (p1[0] == '-' && p1[1] == '*' && p1[2] == '-') | 3454 | } |
| 3470 | { | 3455 | } |
| 3471 | while (p - p1 >= 7) | ||
| 3472 | { | ||
| 3473 | if (! bcmp ("coding:", p1, 7)) | ||
| 3474 | { | ||
| 3475 | possible_spec = 1; | ||
| 3476 | goto win; | ||
| 3477 | } | ||
| 3478 | p1++; | ||
| 3479 | } | ||
| 3480 | break; | ||
| 3481 | } | ||
| 3482 | p1++; | ||
| 3483 | } | ||
| 3484 | 3456 | ||
| 3485 | /* Test quickly whether the file | 3457 | if (nread < 0) |
| 3486 | contains a local variables list. */ | 3458 | error ("IO error reading %s: %s", |
| 3487 | p = ©[nread - 1]; | 3459 | XSTRING (orig_filename)->data, strerror (errno)); |
| 3488 | p1 = ©[beginning_of_end]; | 3460 | else if (nread > 0) |
| 3489 | while (p > p1) | 3461 | { |
| 3490 | { | 3462 | int count = specpdl_ptr - specpdl; |
| 3491 | if (p[0] == '\n' && p[1] == '\f') | 3463 | struct buffer *prev = current_buffer; |
| 3492 | break; | 3464 | |
| 3493 | p--; | 3465 | record_unwind_protect (Fset_buffer, Fcurrent_buffer ()); |
| 3494 | } | 3466 | temp_output_buffer_setup (" *code-converting-work*"); |
| 3495 | p1 = ©[nread]; | 3467 | set_buffer_internal (XBUFFER (Vstandard_output)); |
| 3496 | while (p1 - p >= 16) | 3468 | current_buffer->enable_multibyte_characters = Qnil; |
| 3497 | { | 3469 | insert_1_both (read_buf, nread, nread, 0, 0, 0); |
| 3498 | if (! bcmp ("local variables:", p, 16)) | 3470 | TEMP_SET_PT_BOTH (BEG, BEG_BYTE); |
| 3499 | { | 3471 | val = call1 (Vset_auto_coding_function, make_number (nread)); |
| 3500 | possible_spec = 1; | 3472 | set_buffer_internal (prev); |
| 3501 | break; | 3473 | /* Discard the unwind protect for recovering the |
| 3502 | } | 3474 | current buffer. */ |
| 3503 | p++; | 3475 | specpdl_ptr--; |
| 3504 | } | 3476 | |
| 3505 | win: | 3477 | /* Rewind the file for the actual read done later. */ |
| 3478 | if (lseek (fd, 0, 0) < 0) | ||
| 3479 | report_file_error ("Setting file position", | ||
| 3480 | Fcons (orig_filename, Qnil)); | ||
| 3481 | } | ||
| 3482 | } | ||
| 3506 | 3483 | ||
| 3507 | if (possible_spec) | 3484 | if (NILP (val)) |
| 3508 | { | 3485 | { |
| 3509 | /* Always make this a unibyte string | 3486 | /* If we have not yet decided a coding system, check |
| 3510 | because we have not yet decoded it. */ | 3487 | file-coding-system-alist. */ |
| 3511 | tem = make_unibyte_string (read_buf, nread); | 3488 | Lisp_Object args[6], coding_systems; |
| 3512 | val = call1 (Vset_auto_coding_function, tem); | 3489 | |
| 3513 | } | 3490 | args[0] = Qinsert_file_contents, args[1] = orig_filename; |
| 3491 | args[2] = visit, args[3] = beg, args[4] = end, args[5] = replace; | ||
| 3492 | coding_systems = Ffind_operation_coding_system (6, args); | ||
| 3493 | if (CONSP (coding_systems)) | ||
| 3494 | val = XCONS (coding_systems)->car; | ||
| 3495 | } | ||
| 3496 | } | ||
| 3514 | 3497 | ||
| 3515 | /* Rewind the file for the actual read done later. */ | 3498 | setup_coding_system (Fcheck_coding_system (val), &coding); |
| 3516 | if (lseek (fd, 0, 0) < 0) | ||
| 3517 | report_file_error ("Setting file position", | ||
| 3518 | Fcons (orig_filename, Qnil)); | ||
| 3519 | } | ||
| 3520 | } | ||
| 3521 | if (NILP (val)) | ||
| 3522 | { | ||
| 3523 | Lisp_Object args[6], coding_systems; | ||
| 3524 | 3499 | ||
| 3525 | args[0] = Qinsert_file_contents, args[1] = orig_filename; | 3500 | if (NILP (Vcoding_system_for_read) |
| 3526 | args[2] = visit, args[3] = beg, args[4] = end, args[5] = replace; | 3501 | && NILP (current_buffer->enable_multibyte_characters)) |
| 3527 | coding_systems = Ffind_operation_coding_system (6, args); | 3502 | { |
| 3528 | if (CONSP (coding_systems)) | 3503 | /* We must suppress all text conversion except for end-of-line |
| 3529 | val = XCONS (coding_systems)->car; | 3504 | conversion. */ |
| 3530 | } | 3505 | int eol_type; |
| 3531 | } | ||
| 3532 | 3506 | ||
| 3533 | if (NILP (Vcoding_system_for_read) | 3507 | eol_type = coding.eol_type; |
| 3534 | && NILP (current_buffer->enable_multibyte_characters)) | 3508 | setup_coding_system (Qraw_text, &coding); |
| 3535 | { | 3509 | coding.eol_type = eol_type; |
| 3536 | /* We must suppress all text conversion except for end-of-line | 3510 | } |
| 3537 | conversion. */ | ||
| 3538 | struct coding_system coding_temp; | ||
| 3539 | 3511 | ||
| 3540 | setup_coding_system (Fcheck_coding_system (val), &coding_temp); | 3512 | coding_system_decided = 1; |
| 3541 | setup_coding_system (Qraw_text, &coding); | 3513 | } |
| 3542 | coding.eol_type = coding_temp.eol_type; | ||
| 3543 | } | ||
| 3544 | else | ||
| 3545 | setup_coding_system (Fcheck_coding_system (val), &coding); | ||
| 3546 | 3514 | ||
| 3547 | /* Ensure we always set Vlast_coding_system_used. */ | 3515 | /* Ensure we always set Vlast_coding_system_used. */ |
| 3548 | set_coding_system = 1; | 3516 | set_coding_system = 1; |
| 3549 | } | ||
| 3550 | 3517 | ||
| 3551 | /* If requested, replace the accessible part of the buffer | 3518 | /* If requested, replace the accessible part of the buffer |
| 3552 | with the file contents. Avoid replacing text at the | 3519 | with the file contents. Avoid replacing text at the |
| @@ -3563,6 +3530,7 @@ actually used.") | |||
| 3563 | But if we discover the need for conversion, we give up on this method | 3530 | But if we discover the need for conversion, we give up on this method |
| 3564 | and let the following if-statement handle the replace job. */ | 3531 | and let the following if-statement handle the replace job. */ |
| 3565 | if (!NILP (replace) | 3532 | if (!NILP (replace) |
| 3533 | && BEGV < ZV | ||
| 3566 | && ! CODING_REQUIRE_DECODING (&coding) | 3534 | && ! CODING_REQUIRE_DECODING (&coding) |
| 3567 | && (coding.eol_type == CODING_EOL_UNDECIDED | 3535 | && (coding.eol_type == CODING_EOL_UNDECIDED |
| 3568 | || coding.eol_type == CODING_EOL_LF)) | 3536 | || coding.eol_type == CODING_EOL_LF)) |
| @@ -3741,7 +3709,7 @@ actually used.") | |||
| 3741 | is needed, in a simple way that needs a lot of memory. | 3709 | is needed, in a simple way that needs a lot of memory. |
| 3742 | The preceding if-statement handles the case of no conversion | 3710 | The preceding if-statement handles the case of no conversion |
| 3743 | in a more optimized way. */ | 3711 | in a more optimized way. */ |
| 3744 | if (!NILP (replace) && ! replace_handled) | 3712 | if (!NILP (replace) && ! replace_handled && BEGV < ZV) |
| 3745 | { | 3713 | { |
| 3746 | int same_at_start = BEGV_BYTE; | 3714 | int same_at_start = BEGV_BYTE; |
| 3747 | int same_at_end = ZV_BYTE; | 3715 | int same_at_end = ZV_BYTE; |
| @@ -3993,6 +3961,69 @@ actually used.") | |||
| 3993 | 3961 | ||
| 3994 | if (inserted > 0) | 3962 | if (inserted > 0) |
| 3995 | { | 3963 | { |
| 3964 | if (! coding_system_decided) | ||
| 3965 | { | ||
| 3966 | /* The coding system is not yet decided. Decide it by an | ||
| 3967 | optimized method for handling `coding:' tag. */ | ||
| 3968 | Lisp_Object val; | ||
| 3969 | val = Qnil; | ||
| 3970 | |||
| 3971 | if (!NILP (Vcoding_system_for_read)) | ||
| 3972 | val = Vcoding_system_for_read; | ||
| 3973 | else | ||
| 3974 | { | ||
| 3975 | if (! NILP (Vset_auto_coding_function)) | ||
| 3976 | { | ||
| 3977 | /* Since we are sure that the current buffer was | ||
| 3978 | empty before the insertion, we can toggle | ||
| 3979 | enable-multibyte-characters directly here without | ||
| 3980 | taking care of marker adjustment and byte | ||
| 3981 | combining problem. */ | ||
| 3982 | Lisp_Object prev_multibyte; | ||
| 3983 | int count = specpdl_ptr - specpdl; | ||
| 3984 | |||
| 3985 | prev_multibyte = current_buffer->enable_multibyte_characters; | ||
| 3986 | current_buffer->enable_multibyte_characters = Qnil; | ||
| 3987 | record_unwind_protect (set_auto_coding_unwind, | ||
| 3988 | prev_multibyte); | ||
| 3989 | val = call1 (Vset_auto_coding_function, | ||
| 3990 | make_number (inserted)); | ||
| 3991 | /* Discard the unwind protect for recovering the | ||
| 3992 | error of Vset_auto_coding_function. */ | ||
| 3993 | specpdl_ptr--; | ||
| 3994 | current_buffer->enable_multibyte_characters = prev_multibyte; | ||
| 3995 | TEMP_SET_PT_BOTH (BEG, BEG_BYTE); | ||
| 3996 | } | ||
| 3997 | |||
| 3998 | if (NILP (val)) | ||
| 3999 | { | ||
| 4000 | /* If the coding system is not yet decided, check | ||
| 4001 | file-coding-system-alist. */ | ||
| 4002 | Lisp_Object args[6], coding_systems; | ||
| 4003 | |||
| 4004 | args[0] = Qinsert_file_contents, args[1] = orig_filename; | ||
| 4005 | args[2] = visit, args[3] = beg, args[4] = end, args[5] = Qnil; | ||
| 4006 | coding_systems = Ffind_operation_coding_system (6, args); | ||
| 4007 | if (CONSP (coding_systems)) | ||
| 4008 | val = XCONS (coding_systems)->car; | ||
| 4009 | } | ||
| 4010 | } | ||
| 4011 | |||
| 4012 | setup_coding_system (Fcheck_coding_system (val), &coding); | ||
| 4013 | |||
| 4014 | if (NILP (Vcoding_system_for_read) | ||
| 4015 | && NILP (current_buffer->enable_multibyte_characters)) | ||
| 4016 | { | ||
| 4017 | /* We must suppress all text conversion except for | ||
| 4018 | end-of-line conversion. */ | ||
| 4019 | int eol_type; | ||
| 4020 | |||
| 4021 | eol_type = coding.eol_type; | ||
| 4022 | setup_coding_system (Qraw_text, &coding); | ||
| 4023 | coding.eol_type = eol_type; | ||
| 4024 | } | ||
| 4025 | } | ||
| 4026 | |||
| 3996 | if (CODING_MAY_REQUIRE_DECODING (&coding)) | 4027 | if (CODING_MAY_REQUIRE_DECODING (&coding)) |
| 3997 | { | 4028 | { |
| 3998 | /* Here, we don't have to consider byte combining (see the | 4029 | /* Here, we don't have to consider byte combining (see the |
| @@ -4763,13 +4794,22 @@ e_write (desc, addr, nbytes, coding) | |||
| 4763 | now it is handled within encode_coding. */ | 4794 | now it is handled within encode_coding. */ |
| 4764 | while (1) | 4795 | while (1) |
| 4765 | { | 4796 | { |
| 4766 | encode_coding (coding, addr, buf, nbytes, WRITE_BUF_SIZE); | 4797 | int result; |
| 4798 | |||
| 4799 | result = encode_coding (coding, addr, buf, nbytes, WRITE_BUF_SIZE); | ||
| 4767 | nbytes -= coding->consumed, addr += coding->consumed; | 4800 | nbytes -= coding->consumed, addr += coding->consumed; |
| 4768 | if (coding->produced > 0) | 4801 | if (coding->produced > 0) |
| 4769 | { | 4802 | { |
| 4770 | coding->produced -= write (desc, buf, coding->produced); | 4803 | coding->produced -= write (desc, buf, coding->produced); |
| 4771 | if (coding->produced) return -1; | 4804 | if (coding->produced) return -1; |
| 4772 | } | 4805 | } |
| 4806 | if (result == CODING_FINISH_INSUFFICIENT_SRC) | ||
| 4807 | { | ||
| 4808 | /* The source text ends by an incomplete multibyte form. | ||
| 4809 | There's no way other than write it out as is. */ | ||
| 4810 | nbytes -= write (desc, addr, nbytes); | ||
| 4811 | if (nbytes) return -1; | ||
| 4812 | } | ||
| 4773 | if (nbytes <= 0) | 4813 | if (nbytes <= 0) |
| 4774 | break; | 4814 | break; |
| 4775 | } | 4815 | } |