(check_composing_code): If the current composing

sequence doesn't end properly, return -1. (DECODE_CHARACTER_ASCII): Update coding->composed_chars. (DECODE_CHARACTER_DIMENSION1): Likewise. (decode_coding_iso2022): Check validity of a composing sequence. (code_convert_string): If the length of text to be converted is shrunk to zero, don't perform code conversion. (shrink_decoding_region): Fix previous change.
author: Kenichi Handa 1998-09-26 04:20:48 +0000
committer: Kenichi Handa 1998-09-26 04:20:48 +0000
commit: de79a6a5ed49e728d1ee62efd9b1542cb72c095d (patch)
tree: 3f8e9f4c034b93a63065244b5ab0b68d37fa10a2 /src/coding.c
parent: 450c60a5597beb1aea1a549f53baece4e7d26983 (diff)
download: emacs-de79a6a5ed49e728d1ee62efd9b1542cb72c095d.tar.gz
emacs-de79a6a5ed49e728d1ee62efd9b1542cb72c095d.zip
1 files changed, 82 insertions, 41 deletions
diff --git a/src/coding.c b/src/coding.c
index fa2bbc620a0..5c3299b6b56 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -213,15 +213,18 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 /* Decode one ASCII character C.  */
-#define DECODE_CHARACTER_ASCII(c)                               \
+#define DECODE_CHARACTER_ASCII(c)               \
-  do {                                                          \
+  do {                                          \
-    if (COMPOSING_P (coding->composing))                        \
+    if (COMPOSING_P (coding->composing))        \
-      *dst++ = 0xA0, *dst++ = (c) | 0x80;                       \
+      {                                         \
-    else                                                        \
+        *dst++ = 0xA0, *dst++ = (c) | 0x80;     \
-      {                                                         \
+        coding->composed_chars++;               \
-        *dst++ = (c);                                           \
+      }                                         \
-        coding->produced_char++;                                \
+    else                                        \
-      }                                                         \
+      {                                         \
+        *dst++ = (c);                           \
+        coding->produced_char++;                \
+      }                                         \
  } while (0)
 /* Decode one DIMENSION1 character whose charset is CHARSET and whose
@@ -231,7 +234,10 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
  do {                                                                  \
    unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset);   \
    if (COMPOSING_P (coding->composing))                                \
-      *dst++ = leading_code + 0x20;                                     \
+      {                                                                 \
+        *dst++ = leading_code + 0x20;                                   \
+        coding->composed_chars++;                                       \
+      }                                                                 \
    else                                                                \
      {                                                                 \
        *dst++ = leading_code;                                          \
@@ -997,9 +1003,7 @@ check_composing_code (coding, src, src_end)
            invalid_code_found = 1;
        }
    }
-  return (invalid_code_found
+  return (invalid_code_found ? src - src_start : -1);
-          ? src - src_start
-          : (coding->mode & CODING_MODE_LAST_BLOCK ? 0 : -1));
 }
 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
@@ -1030,6 +1034,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
    translation_table = Vstandard_translation_table_for_decode;
  coding->produced_char = 0;
+  coding->composed_chars = 0;
  coding->fake_multibyte = 0;
  while (src < src_end && (dst_bytes
                           ? (dst < adjusted_dst_end)
@@ -1243,7 +1248,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
                    coding->composing = (c1 == '0'
                                         ? COMPOSING_NO_RULE_HEAD
                                         : COMPOSING_WITH_RULE_HEAD);
-                    coding->produced_char++;
+                    coding->composed_chars = 0;
                  }
                else if (result1 > 0)
                  {
@@ -1253,6 +1258,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
                        src += result1;
                        dst += result1 + 2;
                        coding->produced_char += result1 + 2;
+                        coding->fake_multibyte = 1;
                      }
                    else
                      {
@@ -1266,6 +1272,28 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
              break;
            case '1':           /* end composing */
+              if (coding->composed_chars > 0)
+                {
+                  if (coding->composed_chars == 1)
+                    {
+                      unsigned char *this_char_start = dst;
+                      int this_bytes;
+                      /* Only one character is in the composing
+                         sequence.  Make it a normal character.  */
+                      while (*--this_char_start != LEADING_CODE_COMPOSITION);
+                      dst = (this_char_start
+                             + (coding->composing == COMPOSING_NO_RULE_TAIL
+                                ? 1 : 2));
+                      *dst -= 0x20;
+                      if (*dst == 0x80)
+                        *++dst &= 0x7F;
+                      this_bytes = BYTES_BY_CHAR_HEAD (*dst);
+                      while (this_bytes--) *this_char_start++ = *dst++;
+                      dst = this_char_start;
+                    }
+                  coding->produced_char++;
+                }
              coding->composing = COMPOSING_NO;
              break;
@@ -3938,30 +3966,45 @@ shrink_decoding_region (beg, end, coding, str)
        case CODING_CATEGORY_IDX_ISO_7:
        case CODING_CATEGORY_IDX_ISO_7_TIGHT:
-          /* We can skip all charactes at the tail except for ESC and
+          {
-             the following 2-byte at the tail.  */
+            /* We can skip all charactes at the tail except for 8-bit
-          if (eol_conversion)
+               codes and ESC and the following 2-byte at the tail.  */
-            while (begp < endp
+            unsigned char *eight_bit = NULL;
-                   && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
-              endp--;
+            if (eol_conversion)
-          else
+              while (begp < endp
-            while (begp < endp
+                     && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
-                   && (c = endp[-1]) != ISO_CODE_ESC)
+                {
-              endp--;
+                  if (!eight_bit && c & 0x80) eight_bit = endp;
-          /* Do not consider LF as ascii if preceded by CR, since that
+                  endp--;
-             confuses eol decoding. */
+                }
-          if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
+            else
-            endp++;
+              while (begp < endp
-          if (begp < endp && endp[-1] == ISO_CODE_ESC)
+                     && (c = endp[-1]) != ISO_CODE_ESC)
-            {
+                {
-              if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
+                  if (!eight_bit && c & 0x80) eight_bit = endp;
-                /* This is an ASCII designation sequence.  We can
+                  endp--;
-                    surely skip the tail.  */
+                }
-                endp += 2;
+            /* Do not consider LF as ascii if preceded by CR, since that
-              else
+               confuses eol decoding. */
-                /* Hmmm, we can't skip the tail.  */
+            if (begp < endp && endp < endp_orig
-                endp = endp_orig;
+                && endp[-1] == '\r' && endp[0] == '\n')
-            }
+              endp++;
+            if (begp < endp && endp[-1] == ISO_CODE_ESC)
+              {
+                if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
+                  /* This is an ASCII designation sequence.  We can
+                     surely skip the tail.  But, if we have
+                     encountered an 8-bit code, skip only the codes
+                     after that.  */
+                  endp = eight_bit ? eight_bit : endp + 2;
+                else
+                  /* Hmmm, we can't skip the tail.  */
+                  endp = endp_orig;
+              }
+            else if (eight_bit)
+              endp = eight_bit;
+          }
        }
    }
  *beg += begp - begp_orig;
@@ -4524,9 +4567,7 @@ code_convert_string (str, coding, encodep, nocopy)
      else
        shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data);
    }
-  if (from == to_byte
+  if (from == to_byte)
-      && ! (coding->mode & CODING_MODE_LAST_BLOCK
-            && CODING_REQUIRE_FLUSHING (coding)))
    return (nocopy ? str : Fcopy_sequence (str));
  if (encodep)
author	Kenichi Handa	1998-09-26 04:20:48 +0000
committer	Kenichi Handa	1998-09-26 04:20:48 +0000
commit	de79a6a5ed49e728d1ee62efd9b1542cb72c095d (patch)
tree	3f8e9f4c034b93a63065244b5ab0b68d37fa10a2 /src/coding.c
parent	450c60a5597beb1aea1a549f53baece4e7d26983 (diff)
download	emacs-de79a6a5ed49e728d1ee62efd9b1542cb72c095d.tar.gz emacs-de79a6a5ed49e728d1ee62efd9b1542cb72c095d.zip

diff --git a/src/coding.c b/src/coding.c index fa2bbc620a0..5c3299b6b56 100644 --- a/src/coding.c +++ b/src/coding.c
@@ -213,15 +213,18 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
213		213
214	/* Decode one ASCII character C. */	214	/* Decode one ASCII character C. */
215		215
216	#define DECODE_CHARACTER_ASCII(c) \	216	#define DECODE_CHARACTER_ASCII(c) \
217	do { \	217	do { \
218	if (COMPOSING_P (coding->composing)) \	218	if (COMPOSING_P (coding->composing)) \
219	dst++ = 0xA0, dst++ = (c) \| 0x80; \	219	{ \
220	else \	220	dst++ = 0xA0, dst++ = (c) \| 0x80; \
221	{ \	221	coding->composed_chars++; \
222	*dst++ = (c); \	222	} \
223	coding->produced_char++; \	223	else \
224	} \	224	{ \
		225	*dst++ = (c); \
		226	coding->produced_char++; \
		227	} \
225	} while (0)	228	} while (0)
226		229
227	/* Decode one DIMENSION1 character whose charset is CHARSET and whose	230	/* Decode one DIMENSION1 character whose charset is CHARSET and whose
@@ -231,7 +234,10 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
231	do { \	234	do { \
232	unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset); \	235	unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset); \
233	if (COMPOSING_P (coding->composing)) \	236	if (COMPOSING_P (coding->composing)) \
234	*dst++ = leading_code + 0x20; \	237	{ \
		238	*dst++ = leading_code + 0x20; \
		239	coding->composed_chars++; \
		240	} \
235	else \	241	else \
236	{ \	242	{ \
237	*dst++ = leading_code; \	243	*dst++ = leading_code; \
@@ -997,9 +1003,7 @@ check_composing_code (coding, src, src_end)
997	invalid_code_found = 1;	1003	invalid_code_found = 1;
998	}	1004	}
999	}	1005	}
1000	return (invalid_code_found	1006	return (invalid_code_found ? src - src_start : -1);
1001	? src - src_start
1002	: (coding->mode & CODING_MODE_LAST_BLOCK ? 0 : -1));
1003	}	1007	}
1004		1008
1005	/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */	1009	/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
@@ -1030,6 +1034,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1030	translation_table = Vstandard_translation_table_for_decode;	1034	translation_table = Vstandard_translation_table_for_decode;
1031		1035
1032	coding->produced_char = 0;	1036	coding->produced_char = 0;
		1037	coding->composed_chars = 0;
1033	coding->fake_multibyte = 0;	1038	coding->fake_multibyte = 0;
1034	while (src < src_end && (dst_bytes	1039	while (src < src_end && (dst_bytes
1035	? (dst < adjusted_dst_end)	1040	? (dst < adjusted_dst_end)
@@ -1243,7 +1248,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1243	coding->composing = (c1 == '0'	1248	coding->composing = (c1 == '0'
1244	? COMPOSING_NO_RULE_HEAD	1249	? COMPOSING_NO_RULE_HEAD
1245	: COMPOSING_WITH_RULE_HEAD);	1250	: COMPOSING_WITH_RULE_HEAD);
1246	coding->produced_char++;	1251	coding->composed_chars = 0;
1247	}	1252	}
1248	else if (result1 > 0)	1253	else if (result1 > 0)
1249	{	1254	{
@@ -1253,6 +1258,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1253	src += result1;	1258	src += result1;
1254	dst += result1 + 2;	1259	dst += result1 + 2;
1255	coding->produced_char += result1 + 2;	1260	coding->produced_char += result1 + 2;
		1261	coding->fake_multibyte = 1;
1256	}	1262	}
1257	else	1263	else
1258	{	1264	{
@@ -1266,6 +1272,28 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1266	break;	1272	break;
1267		1273
1268	case '1': /* end composing */	1274	case '1': /* end composing */
		1275	if (coding->composed_chars > 0)
		1276	{
		1277	if (coding->composed_chars == 1)
		1278	{
		1279	unsigned char *this_char_start = dst;
		1280	int this_bytes;
		1281
		1282	/* Only one character is in the composing
		1283	sequence. Make it a normal character. */
		1284	while (*--this_char_start != LEADING_CODE_COMPOSITION);
		1285	dst = (this_char_start
		1286	+ (coding->composing == COMPOSING_NO_RULE_TAIL
		1287	? 1 : 2));
		1288	*dst -= 0x20;
		1289	if (*dst == 0x80)
		1290	*++dst &= 0x7F;
		1291	this_bytes = BYTES_BY_CHAR_HEAD (*dst);
		1292	while (this_bytes--) this_char_start++ = dst++;
		1293	dst = this_char_start;
		1294	}
		1295	coding->produced_char++;
		1296	}
1269	coding->composing = COMPOSING_NO;	1297	coding->composing = COMPOSING_NO;
1270	break;	1298	break;
1271		1299
@@ -3938,30 +3966,45 @@ shrink_decoding_region (beg, end, coding, str)
3938		3966
3939	case CODING_CATEGORY_IDX_ISO_7:	3967	case CODING_CATEGORY_IDX_ISO_7:
3940	case CODING_CATEGORY_IDX_ISO_7_TIGHT:	3968	case CODING_CATEGORY_IDX_ISO_7_TIGHT:
3941	/* We can skip all charactes at the tail except for ESC and	3969	{
3942	the following 2-byte at the tail. */	3970	/* We can skip all charactes at the tail except for 8-bit
3943	if (eol_conversion)	3971	codes and ESC and the following 2-byte at the tail. */
3944	while (begp < endp	3972	unsigned char *eight_bit = NULL;
3945	&& (c = endp[-1]) != ISO_CODE_ESC && c != '\r')	3973
3946	endp--;	3974	if (eol_conversion)
3947	else	3975	while (begp < endp
3948	while (begp < endp	3976	&& (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
3949	&& (c = endp[-1]) != ISO_CODE_ESC)	3977	{
3950	endp--;	3978	if (!eight_bit && c & 0x80) eight_bit = endp;
3951	/* Do not consider LF as ascii if preceded by CR, since that	3979	endp--;
3952	confuses eol decoding. */	3980	}
3953	if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')	3981	else
3954	endp++;	3982	while (begp < endp
3955	if (begp < endp && endp[-1] == ISO_CODE_ESC)	3983	&& (c = endp[-1]) != ISO_CODE_ESC)
3956	{	3984	{
3957	if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')	3985	if (!eight_bit && c & 0x80) eight_bit = endp;
3958	/* This is an ASCII designation sequence. We can	3986	endp--;
3959	surely skip the tail. */	3987	}
3960	endp += 2;	3988	/* Do not consider LF as ascii if preceded by CR, since that
3961	else	3989	confuses eol decoding. */
3962	/* Hmmm, we can't skip the tail. */	3990	if (begp < endp && endp < endp_orig
3963	endp = endp_orig;	3991	&& endp[-1] == '\r' && endp[0] == '\n')
3964	}	3992	endp++;
		3993	if (begp < endp && endp[-1] == ISO_CODE_ESC)
		3994	{
		3995	if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
		3996	/* This is an ASCII designation sequence. We can
		3997	surely skip the tail. But, if we have
		3998	encountered an 8-bit code, skip only the codes
		3999	after that. */
		4000	endp = eight_bit ? eight_bit : endp + 2;
		4001	else
		4002	/* Hmmm, we can't skip the tail. */
		4003	endp = endp_orig;
		4004	}
		4005	else if (eight_bit)
		4006	endp = eight_bit;
		4007	}
3965	}	4008	}
3966	}	4009	}
3967	*beg += begp - begp_orig;	4010	*beg += begp - begp_orig;
@@ -4524,9 +4567,7 @@ code_convert_string (str, coding, encodep, nocopy)
4524	else	4567	else
4525	shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data);	4568	shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4526	}	4569	}
4527	if (from == to_byte	4570	if (from == to_byte)
4528	&& ! (coding->mode & CODING_MODE_LAST_BLOCK
4529	&& CODING_REQUIRE_FLUSHING (coding)))
4530	return (nocopy ? str : Fcopy_sequence (str));	4571	return (nocopy ? str : Fcopy_sequence (str));
4531		4572
4532	if (encodep)	4573	if (encodep)