diff options
| author | Kenichi Handa | 2000-06-19 05:18:09 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2000-06-19 05:18:09 +0000 |
| commit | aaaf0b1e09315b9512faa00e7fc2f01445222d78 (patch) | |
| tree | 60fc2812ba8955698ef5d7a1b9b2d6bd4df01a3a /src/coding.c | |
| parent | 5b8ca8222ee46fea4a6165748824ea2f1f04562b (diff) | |
| download | emacs-aaaf0b1e09315b9512faa00e7fc2f01445222d78.tar.gz emacs-aaaf0b1e09315b9512faa00e7fc2f01445222d78.zip | |
(setup_coding_system) <4>: Reset member `cr_carryover'.
(ccl_coding_driver): On encoding, initialize ccl->eol_type.
(decode_eol_post_ccl): New function.
(decode_coding): Don't detect EOL format here for CCL based coding
systems.
(decode_coding) <coding_type_ccl>: Handle carryovered CR. Call
decode_eol_post_ccl after running the CCL program.
(code_convert_region): Don't detect EOL format here for CCL based
coding systems.
(decode_coding_string): Likewise.
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 157 |
1 files changed, 152 insertions, 5 deletions
diff --git a/src/coding.c b/src/coding.c index 0b70480b965..d02c27dc478 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -3202,6 +3202,7 @@ setup_coding_system (coding_system, coding) | |||
| 3202 | } | 3202 | } |
| 3203 | } | 3203 | } |
| 3204 | coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK; | 3204 | coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK; |
| 3205 | coding->spec.ccl.cr_carryover = 0; | ||
| 3205 | break; | 3206 | break; |
| 3206 | 3207 | ||
| 3207 | case 5: | 3208 | case 5: |
| @@ -3883,7 +3884,8 @@ ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep) | |||
| 3883 | int result; | 3884 | int result; |
| 3884 | 3885 | ||
| 3885 | ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK; | 3886 | ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK; |
| 3886 | 3887 | if (encodep) | |
| 3888 | ccl->eol_type = coding->eol_type; | ||
| 3887 | coding->produced = ccl_driver (ccl, source, destination, | 3889 | coding->produced = ccl_driver (ccl, source, destination, |
| 3888 | src_bytes, dst_bytes, &(coding->consumed)); | 3890 | src_bytes, dst_bytes, &(coding->consumed)); |
| 3889 | if (encodep) | 3891 | if (encodep) |
| @@ -3916,6 +3918,136 @@ ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep) | |||
| 3916 | return result; | 3918 | return result; |
| 3917 | } | 3919 | } |
| 3918 | 3920 | ||
| 3921 | /* Decode EOL format of the text at PTR of BYTES length destructively | ||
| 3922 | according to CODING->eol_type. This is called after the CCL | ||
| 3923 | program produced a decoded text at PTR. If we do CRLF->LF | ||
| 3924 | conversion, update CODING->produced and CODING->produced_char. */ | ||
| 3925 | |||
| 3926 | static void | ||
| 3927 | decode_eol_post_ccl (coding, ptr, bytes) | ||
| 3928 | struct coding_system *coding; | ||
| 3929 | unsigned char *ptr; | ||
| 3930 | int bytes; | ||
| 3931 | { | ||
| 3932 | Lisp_Object val, saved_coding_symbol; | ||
| 3933 | unsigned char *pend = ptr + bytes; | ||
| 3934 | int dummy; | ||
| 3935 | |||
| 3936 | /* Remember the current coding system symbol. We set it back when | ||
| 3937 | an inconsistent EOL is found so that `last-coding-system-used' is | ||
| 3938 | set to the coding system that doesn't specify EOL conversion. */ | ||
| 3939 | saved_coding_symbol = coding->symbol; | ||
| 3940 | |||
| 3941 | coding->spec.ccl.cr_carryover = 0; | ||
| 3942 | if (coding->eol_type == CODING_EOL_UNDECIDED) | ||
| 3943 | { | ||
| 3944 | /* Here, to avoid the call of setup_coding_system, we directly | ||
| 3945 | call detect_eol_type. */ | ||
| 3946 | coding->eol_type = detect_eol_type (ptr, bytes, &dummy); | ||
| 3947 | val = Fget (coding->symbol, Qeol_type); | ||
| 3948 | if (VECTORP (val) && XVECTOR (val)->size == 3) | ||
| 3949 | coding->symbol = XVECTOR (val)->contents[coding->eol_type]; | ||
| 3950 | coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL; | ||
| 3951 | } | ||
| 3952 | |||
| 3953 | if (coding->eol_type == CODING_EOL_LF) | ||
| 3954 | { | ||
| 3955 | /* We have nothing to do. */ | ||
| 3956 | ptr = pend; | ||
| 3957 | } | ||
| 3958 | else if (coding->eol_type == CODING_EOL_CRLF) | ||
| 3959 | { | ||
| 3960 | unsigned char *pstart = ptr, *p = ptr; | ||
| 3961 | |||
| 3962 | if (! (coding->mode & CODING_MODE_LAST_BLOCK) | ||
| 3963 | && *(pend - 1) == '\r') | ||
| 3964 | { | ||
| 3965 | /* If the last character is CR, we can't handle it here | ||
| 3966 | because LF will be in the not-yet-decoded source text. | ||
| 3967 | Recorded that the CR is not yet processed. */ | ||
| 3968 | coding->spec.ccl.cr_carryover = 1; | ||
| 3969 | coding->produced--; | ||
| 3970 | coding->produced_char--; | ||
| 3971 | pend--; | ||
| 3972 | } | ||
| 3973 | while (ptr < pend) | ||
| 3974 | { | ||
| 3975 | if (*ptr == '\r') | ||
| 3976 | { | ||
| 3977 | if (ptr + 1 < pend && *(ptr + 1) == '\n') | ||
| 3978 | { | ||
| 3979 | *p++ = '\n'; | ||
| 3980 | ptr += 2; | ||
| 3981 | } | ||
| 3982 | else | ||
| 3983 | { | ||
| 3984 | if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) | ||
| 3985 | goto undo_eol_conversion; | ||
| 3986 | *p++ = *ptr++; | ||
| 3987 | } | ||
| 3988 | } | ||
| 3989 | else if (*ptr == '\n' | ||
| 3990 | && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) | ||
| 3991 | goto undo_eol_conversion; | ||
| 3992 | else | ||
| 3993 | *p++ = *ptr++; | ||
| 3994 | continue; | ||
| 3995 | |||
| 3996 | undo_eol_conversion: | ||
| 3997 | /* We have faced with inconsistent EOL format at PTR. | ||
| 3998 | Convert all LFs before PTR back to CRLFs. */ | ||
| 3999 | for (p--, ptr--; p >= pstart; p--) | ||
| 4000 | { | ||
| 4001 | if (*p == '\n') | ||
| 4002 | *ptr-- = '\n', *ptr-- = '\r'; | ||
| 4003 | else | ||
| 4004 | *ptr-- = *p; | ||
| 4005 | } | ||
| 4006 | /* If carryover is recorded, cancel it because we don't | ||
| 4007 | convert CRLF anymore. */ | ||
| 4008 | if (coding->spec.ccl.cr_carryover) | ||
| 4009 | { | ||
| 4010 | coding->spec.ccl.cr_carryover = 0; | ||
| 4011 | coding->produced++; | ||
| 4012 | coding->produced_char++; | ||
| 4013 | pend++; | ||
| 4014 | } | ||
| 4015 | p = ptr = pend; | ||
| 4016 | coding->eol_type = CODING_EOL_LF; | ||
| 4017 | coding->symbol = saved_coding_symbol; | ||
| 4018 | } | ||
| 4019 | if (p < pend) | ||
| 4020 | { | ||
| 4021 | /* As each two-byte sequence CRLF was converted to LF, (PEND | ||
| 4022 | - P) is the number of deleted characters. */ | ||
| 4023 | coding->produced -= pend - p; | ||
| 4024 | coding->produced_char -= pend - p; | ||
| 4025 | } | ||
| 4026 | } | ||
| 4027 | else /* i.e. coding->eol_type == CODING_EOL_CR */ | ||
| 4028 | { | ||
| 4029 | unsigned char *p = ptr; | ||
| 4030 | |||
| 4031 | for (; ptr < pend; ptr++) | ||
| 4032 | { | ||
| 4033 | if (*ptr == '\r') | ||
| 4034 | *ptr = '\n'; | ||
| 4035 | else if (*ptr == '\n' | ||
| 4036 | && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) | ||
| 4037 | { | ||
| 4038 | for (; p < ptr; p++) | ||
| 4039 | { | ||
| 4040 | if (*p == '\n') | ||
| 4041 | *p = '\r'; | ||
| 4042 | } | ||
| 4043 | ptr = pend; | ||
| 4044 | coding->eol_type = CODING_EOL_LF; | ||
| 4045 | coding->symbol = saved_coding_symbol; | ||
| 4046 | } | ||
| 4047 | } | ||
| 4048 | } | ||
| 4049 | } | ||
| 4050 | |||
| 3919 | /* See "GENERAL NOTES about `decode_coding_XXX ()' functions". Before | 4051 | /* See "GENERAL NOTES about `decode_coding_XXX ()' functions". Before |
| 3920 | decoding, it may detect coding system and format of end-of-line if | 4052 | decoding, it may detect coding system and format of end-of-line if |
| 3921 | those are not yet decided. The source should be unibyte, the | 4053 | those are not yet decided. The source should be unibyte, the |
| @@ -3931,7 +4063,8 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes) | |||
| 3931 | if (coding->type == coding_type_undecided) | 4063 | if (coding->type == coding_type_undecided) |
| 3932 | detect_coding (coding, source, src_bytes); | 4064 | detect_coding (coding, source, src_bytes); |
| 3933 | 4065 | ||
| 3934 | if (coding->eol_type == CODING_EOL_UNDECIDED) | 4066 | if (coding->eol_type == CODING_EOL_UNDECIDED |
| 4067 | && coding->type != coding_type_ccl) | ||
| 3935 | detect_eol (coding, source, src_bytes); | 4068 | detect_eol (coding, source, src_bytes); |
| 3936 | 4069 | ||
| 3937 | coding->produced = coding->produced_char = 0; | 4070 | coding->produced = coding->produced_char = 0; |
| @@ -3962,8 +4095,20 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes) | |||
| 3962 | break; | 4095 | break; |
| 3963 | 4096 | ||
| 3964 | case coding_type_ccl: | 4097 | case coding_type_ccl: |
| 3965 | ccl_coding_driver (coding, source, destination, | 4098 | if (coding->spec.ccl.cr_carryover) |
| 4099 | { | ||
| 4100 | /* Set the CR which is not processed by the previous call of | ||
| 4101 | decode_eol_post_ccl in DESTINATION. */ | ||
| 4102 | *destination = '\r'; | ||
| 4103 | coding->produced++; | ||
| 4104 | coding->produced_char++; | ||
| 4105 | dst_bytes--; | ||
| 4106 | } | ||
| 4107 | ccl_coding_driver (coding, source, | ||
| 4108 | destination + coding->spec.ccl.cr_carryover, | ||
| 3966 | src_bytes, dst_bytes, 0); | 4109 | src_bytes, dst_bytes, 0); |
| 4110 | if (coding->eol_type != CODING_EOL_LF) | ||
| 4111 | decode_eol_post_ccl (coding, destination, coding->produced); | ||
| 3967 | break; | 4112 | break; |
| 3968 | 4113 | ||
| 3969 | default: | 4114 | default: |
| @@ -4580,7 +4725,8 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) | |||
| 4580 | encodings again in vain. */ | 4725 | encodings again in vain. */ |
| 4581 | coding->type = coding_type_emacs_mule; | 4726 | coding->type = coding_type_emacs_mule; |
| 4582 | } | 4727 | } |
| 4583 | if (coding->eol_type == CODING_EOL_UNDECIDED) | 4728 | if (coding->eol_type == CODING_EOL_UNDECIDED |
| 4729 | && coding->type != coding_type_ccl) | ||
| 4584 | { | 4730 | { |
| 4585 | saved_coding_symbol = coding->symbol; | 4731 | saved_coding_symbol = coding->symbol; |
| 4586 | detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte); | 4732 | detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte); |
| @@ -5038,7 +5184,8 @@ decode_coding_string (str, coding, nocopy) | |||
| 5038 | if (coding->type == coding_type_undecided) | 5184 | if (coding->type == coding_type_undecided) |
| 5039 | coding->type = coding_type_emacs_mule; | 5185 | coding->type = coding_type_emacs_mule; |
| 5040 | } | 5186 | } |
| 5041 | if (coding->eol_type == CODING_EOL_UNDECIDED) | 5187 | if (coding->eol_type == CODING_EOL_UNDECIDED |
| 5188 | && coding->type != coding_type_ccl) | ||
| 5042 | { | 5189 | { |
| 5043 | saved_coding_symbol = coding->symbol; | 5190 | saved_coding_symbol = coding->symbol; |
| 5044 | detect_eol (coding, XSTRING (str)->data, to_byte); | 5191 | detect_eol (coding, XSTRING (str)->data, to_byte); |