aboutsummaryrefslogtreecommitdiffstats
path: root/src/coding.c
diff options
context:
space:
mode:
authorKenichi Handa2000-06-19 05:18:09 +0000
committerKenichi Handa2000-06-19 05:18:09 +0000
commitaaaf0b1e09315b9512faa00e7fc2f01445222d78 (patch)
tree60fc2812ba8955698ef5d7a1b9b2d6bd4df01a3a /src/coding.c
parent5b8ca8222ee46fea4a6165748824ea2f1f04562b (diff)
downloademacs-aaaf0b1e09315b9512faa00e7fc2f01445222d78.tar.gz
emacs-aaaf0b1e09315b9512faa00e7fc2f01445222d78.zip
(setup_coding_system) <4>: Reset member `cr_carryover'.
(ccl_coding_driver): On encoding, initialize ccl->eol_type. (decode_eol_post_ccl): New function. (decode_coding): Don't detect EOL format here for CCL based coding systems. (decode_coding) <coding_type_ccl>: Handle carryovered CR. Call decode_eol_post_ccl after running the CCL program. (code_convert_region): Don't detect EOL format here for CCL based coding systems. (decode_coding_string): Likewise.
Diffstat (limited to 'src/coding.c')
-rw-r--r--src/coding.c157
1 files changed, 152 insertions, 5 deletions
diff --git a/src/coding.c b/src/coding.c
index 0b70480b965..d02c27dc478 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -3202,6 +3202,7 @@ setup_coding_system (coding_system, coding)
3202 } 3202 }
3203 } 3203 }
3204 coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK; 3204 coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3205 coding->spec.ccl.cr_carryover = 0;
3205 break; 3206 break;
3206 3207
3207 case 5: 3208 case 5:
@@ -3883,7 +3884,8 @@ ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3883 int result; 3884 int result;
3884 3885
3885 ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK; 3886 ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
3886 3887 if (encodep)
3888 ccl->eol_type = coding->eol_type;
3887 coding->produced = ccl_driver (ccl, source, destination, 3889 coding->produced = ccl_driver (ccl, source, destination,
3888 src_bytes, dst_bytes, &(coding->consumed)); 3890 src_bytes, dst_bytes, &(coding->consumed));
3889 if (encodep) 3891 if (encodep)
@@ -3916,6 +3918,136 @@ ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3916 return result; 3918 return result;
3917} 3919}
3918 3920
3921/* Decode EOL format of the text at PTR of BYTES length destructively
3922 according to CODING->eol_type. This is called after the CCL
3923 program produced a decoded text at PTR. If we do CRLF->LF
3924 conversion, update CODING->produced and CODING->produced_char. */
3925
3926static void
3927decode_eol_post_ccl (coding, ptr, bytes)
3928 struct coding_system *coding;
3929 unsigned char *ptr;
3930 int bytes;
3931{
3932 Lisp_Object val, saved_coding_symbol;
3933 unsigned char *pend = ptr + bytes;
3934 int dummy;
3935
3936 /* Remember the current coding system symbol. We set it back when
3937 an inconsistent EOL is found so that `last-coding-system-used' is
3938 set to the coding system that doesn't specify EOL conversion. */
3939 saved_coding_symbol = coding->symbol;
3940
3941 coding->spec.ccl.cr_carryover = 0;
3942 if (coding->eol_type == CODING_EOL_UNDECIDED)
3943 {
3944 /* Here, to avoid the call of setup_coding_system, we directly
3945 call detect_eol_type. */
3946 coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
3947 val = Fget (coding->symbol, Qeol_type);
3948 if (VECTORP (val) && XVECTOR (val)->size == 3)
3949 coding->symbol = XVECTOR (val)->contents[coding->eol_type];
3950 coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
3951 }
3952
3953 if (coding->eol_type == CODING_EOL_LF)
3954 {
3955 /* We have nothing to do. */
3956 ptr = pend;
3957 }
3958 else if (coding->eol_type == CODING_EOL_CRLF)
3959 {
3960 unsigned char *pstart = ptr, *p = ptr;
3961
3962 if (! (coding->mode & CODING_MODE_LAST_BLOCK)
3963 && *(pend - 1) == '\r')
3964 {
3965 /* If the last character is CR, we can't handle it here
3966 because LF will be in the not-yet-decoded source text.
3967 Recorded that the CR is not yet processed. */
3968 coding->spec.ccl.cr_carryover = 1;
3969 coding->produced--;
3970 coding->produced_char--;
3971 pend--;
3972 }
3973 while (ptr < pend)
3974 {
3975 if (*ptr == '\r')
3976 {
3977 if (ptr + 1 < pend && *(ptr + 1) == '\n')
3978 {
3979 *p++ = '\n';
3980 ptr += 2;
3981 }
3982 else
3983 {
3984 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3985 goto undo_eol_conversion;
3986 *p++ = *ptr++;
3987 }
3988 }
3989 else if (*ptr == '\n'
3990 && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3991 goto undo_eol_conversion;
3992 else
3993 *p++ = *ptr++;
3994 continue;
3995
3996 undo_eol_conversion:
3997 /* We have faced with inconsistent EOL format at PTR.
3998 Convert all LFs before PTR back to CRLFs. */
3999 for (p--, ptr--; p >= pstart; p--)
4000 {
4001 if (*p == '\n')
4002 *ptr-- = '\n', *ptr-- = '\r';
4003 else
4004 *ptr-- = *p;
4005 }
4006 /* If carryover is recorded, cancel it because we don't
4007 convert CRLF anymore. */
4008 if (coding->spec.ccl.cr_carryover)
4009 {
4010 coding->spec.ccl.cr_carryover = 0;
4011 coding->produced++;
4012 coding->produced_char++;
4013 pend++;
4014 }
4015 p = ptr = pend;
4016 coding->eol_type = CODING_EOL_LF;
4017 coding->symbol = saved_coding_symbol;
4018 }
4019 if (p < pend)
4020 {
4021 /* As each two-byte sequence CRLF was converted to LF, (PEND
4022 - P) is the number of deleted characters. */
4023 coding->produced -= pend - p;
4024 coding->produced_char -= pend - p;
4025 }
4026 }
4027 else /* i.e. coding->eol_type == CODING_EOL_CR */
4028 {
4029 unsigned char *p = ptr;
4030
4031 for (; ptr < pend; ptr++)
4032 {
4033 if (*ptr == '\r')
4034 *ptr = '\n';
4035 else if (*ptr == '\n'
4036 && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4037 {
4038 for (; p < ptr; p++)
4039 {
4040 if (*p == '\n')
4041 *p = '\r';
4042 }
4043 ptr = pend;
4044 coding->eol_type = CODING_EOL_LF;
4045 coding->symbol = saved_coding_symbol;
4046 }
4047 }
4048 }
4049}
4050
3919/* See "GENERAL NOTES about `decode_coding_XXX ()' functions". Before 4051/* See "GENERAL NOTES about `decode_coding_XXX ()' functions". Before
3920 decoding, it may detect coding system and format of end-of-line if 4052 decoding, it may detect coding system and format of end-of-line if
3921 those are not yet decided. The source should be unibyte, the 4053 those are not yet decided. The source should be unibyte, the
@@ -3931,7 +4063,8 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes)
3931 if (coding->type == coding_type_undecided) 4063 if (coding->type == coding_type_undecided)
3932 detect_coding (coding, source, src_bytes); 4064 detect_coding (coding, source, src_bytes);
3933 4065
3934 if (coding->eol_type == CODING_EOL_UNDECIDED) 4066 if (coding->eol_type == CODING_EOL_UNDECIDED
4067 && coding->type != coding_type_ccl)
3935 detect_eol (coding, source, src_bytes); 4068 detect_eol (coding, source, src_bytes);
3936 4069
3937 coding->produced = coding->produced_char = 0; 4070 coding->produced = coding->produced_char = 0;
@@ -3962,8 +4095,20 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes)
3962 break; 4095 break;
3963 4096
3964 case coding_type_ccl: 4097 case coding_type_ccl:
3965 ccl_coding_driver (coding, source, destination, 4098 if (coding->spec.ccl.cr_carryover)
4099 {
4100 /* Set the CR which is not processed by the previous call of
4101 decode_eol_post_ccl in DESTINATION. */
4102 *destination = '\r';
4103 coding->produced++;
4104 coding->produced_char++;
4105 dst_bytes--;
4106 }
4107 ccl_coding_driver (coding, source,
4108 destination + coding->spec.ccl.cr_carryover,
3966 src_bytes, dst_bytes, 0); 4109 src_bytes, dst_bytes, 0);
4110 if (coding->eol_type != CODING_EOL_LF)
4111 decode_eol_post_ccl (coding, destination, coding->produced);
3967 break; 4112 break;
3968 4113
3969 default: 4114 default:
@@ -4580,7 +4725,8 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
4580 encodings again in vain. */ 4725 encodings again in vain. */
4581 coding->type = coding_type_emacs_mule; 4726 coding->type = coding_type_emacs_mule;
4582 } 4727 }
4583 if (coding->eol_type == CODING_EOL_UNDECIDED) 4728 if (coding->eol_type == CODING_EOL_UNDECIDED
4729 && coding->type != coding_type_ccl)
4584 { 4730 {
4585 saved_coding_symbol = coding->symbol; 4731 saved_coding_symbol = coding->symbol;
4586 detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte); 4732 detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
@@ -5038,7 +5184,8 @@ decode_coding_string (str, coding, nocopy)
5038 if (coding->type == coding_type_undecided) 5184 if (coding->type == coding_type_undecided)
5039 coding->type = coding_type_emacs_mule; 5185 coding->type = coding_type_emacs_mule;
5040 } 5186 }
5041 if (coding->eol_type == CODING_EOL_UNDECIDED) 5187 if (coding->eol_type == CODING_EOL_UNDECIDED
5188 && coding->type != coding_type_ccl)
5042 { 5189 {
5043 saved_coding_symbol = coding->symbol; 5190 saved_coding_symbol = coding->symbol;
5044 detect_eol (coding, XSTRING (str)->data, to_byte); 5191 detect_eol (coding, XSTRING (str)->data, to_byte);