(TWO_MORE_BYTES): New macro.

(detect_coding_utf_16): Use TWO_MORE_BYTES instead of ONE_MORE_BYTE.
author: Kenichi Handa 2009-01-14 12:19:44 +0000
committer: Kenichi Handa 2009-01-14 12:19:44 +0000
commit: f56a4450912fa06401b13e6631313fe17bed006f (patch)
tree: 00e36c6a98a8b740184d7535844f9eda51eb1d29 /src/coding.c
parent: 97d42150b4f0233e98f516e69f4978b2e6eebe59 (diff)
download: emacs-f56a4450912fa06401b13e6631313fe17bed006f.tar.gz
emacs-f56a4450912fa06401b13e6631313fe17bed006f.zip
1 files changed, 50 insertions, 4 deletions
diff --git a/src/coding.c b/src/coding.c
index 01878a37b5c..9a94bc6fb2a 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -743,6 +743,47 @@ static struct coding_system coding_categories[coding_category_max];
    consumed_chars++;                                   \
  } while (0)
+/* Safely get two bytes from the source text pointed by SRC which ends
+   at SRC_END, and set C1 and C2 to those bytes.  If there are not
+   enough bytes in the source for C1, it jumps to `no_more_source'.
+   If there are not enough bytes in the source for C2, set C2 to -1.
+   If multibytep is nonzero and a multibyte character is found at SRC,
+   set C1 and/or C2 to the negative value of the character code.  The
+   caller should declare and set these variables appropriately in
+   advance:
+        src, src_end, multibytep
+   It is intended that this macro is used in detect_coding_utf_16.  */
+#define TWO_MORE_BYTES(c1, c2)                  \
+  do {                                          \
+    if (src == src_end)                         \
+      goto no_more_source;                      \
+    c1 = *src++;                                \
+    if (multibytep && (c1 & 0x80))              \
+      {                                         \
+        if ((c1 & 0xFE) == 0xC0)                \
+          c1 = ((c1 & 1) << 6) | *src++;        \
+        else                                    \
+          {                                     \
+            c1 = c2 = -1;                       \
+            break;                              \
+          }                                     \
+      }                                         \
+    if (src == src_end)                         \
+      c2 = -1;                                  \
+    else                                        \
+      {                                         \
+        c2 = *src++;                            \
+        if (multibytep && (c2 & 0x80))          \
+          {                                     \
+            if ((c2 & 0xFE) == 0xC0)            \
+              c2 = ((c2 & 1) << 6) | *src++;    \
+            else                                \
+              c2 = -1;                          \
+          }                                     \
+      }                                         \
+  } while (0)
 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
  do {                                                  \
@@ -1575,8 +1616,7 @@ detect_coding_utf_16 (coding, detect_info)
      return 0;
    }
-  ONE_MORE_BYTE (c1);
+  TWO_MORE_BYTES (c1, c2);
-  ONE_MORE_BYTE (c2);
  if ((c1 == 0xFF) && (c2 == 0xFE))
    {
      detect_info->found |= (CATEGORY_MASK_UTF_16_LE
@@ -1593,6 +1633,11 @@ detect_coding_utf_16 (coding, detect_info)
                                | CATEGORY_MASK_UTF_16_BE_NOSIG
                                | CATEGORY_MASK_UTF_16_LE_NOSIG);
    }
+  else if (c1 < 0 || c2 < 0)
+    {
+      detect_info->rejected |= CATEGORY_MASK_UTF_16;
+      return 0;
+    }
  else
    {
      /* We check the dispersion of Eth and Oth bytes where E is even and
@@ -1610,8 +1655,9 @@ detect_coding_utf_16 (coding, detect_info)
      while (1)
        {
-          ONE_MORE_BYTE (c1);
+          TWO_MORE_BYTES (c1, c2);
-          ONE_MORE_BYTE (c2);
+          if (c1 < 0 || c2 < 0)
+            break;
          if (! e[c1])
            {
              e[c1] = 1;
author	Kenichi Handa	2009-01-14 12:19:44 +0000
committer	Kenichi Handa	2009-01-14 12:19:44 +0000
commit	f56a4450912fa06401b13e6631313fe17bed006f (patch)
tree	00e36c6a98a8b740184d7535844f9eda51eb1d29 /src/coding.c
parent	97d42150b4f0233e98f516e69f4978b2e6eebe59 (diff)
download	emacs-f56a4450912fa06401b13e6631313fe17bed006f.tar.gz emacs-f56a4450912fa06401b13e6631313fe17bed006f.zip

diff --git a/src/coding.c b/src/coding.c index 01878a37b5c..9a94bc6fb2a 100644 --- a/src/coding.c +++ b/src/coding.c
@@ -743,6 +743,47 @@ static struct coding_system coding_categories[coding_category_max];
743	consumed_chars++; \	743	consumed_chars++; \
744	} while (0)	744	} while (0)
745		745
		746	/* Safely get two bytes from the source text pointed by SRC which ends
		747	at SRC_END, and set C1 and C2 to those bytes. If there are not
		748	enough bytes in the source for C1, it jumps to `no_more_source'.
		749	If there are not enough bytes in the source for C2, set C2 to -1.
		750	If multibytep is nonzero and a multibyte character is found at SRC,
		751	set C1 and/or C2 to the negative value of the character code. The
		752	caller should declare and set these variables appropriately in
		753	advance:
		754	src, src_end, multibytep
		755	It is intended that this macro is used in detect_coding_utf_16. */
		756
		757	#define TWO_MORE_BYTES(c1, c2) \
		758	do { \
		759	if (src == src_end) \
		760	goto no_more_source; \
		761	c1 = *src++; \
		762	if (multibytep && (c1 & 0x80)) \
		763	{ \
		764	if ((c1 & 0xFE) == 0xC0) \
		765	c1 = ((c1 & 1) << 6) \| *src++; \
		766	else \
		767	{ \
		768	c1 = c2 = -1; \
		769	break; \
		770	} \
		771	} \
		772	if (src == src_end) \
		773	c2 = -1; \
		774	else \
		775	{ \
		776	c2 = *src++; \
		777	if (multibytep && (c2 & 0x80)) \
		778	{ \
		779	if ((c2 & 0xFE) == 0xC0) \
		780	c2 = ((c2 & 1) << 6) \| *src++; \
		781	else \
		782	c2 = -1; \
		783	} \
		784	} \
		785	} while (0)
		786
746		787
747	#define ONE_MORE_BYTE_NO_CHECK(c) \	788	#define ONE_MORE_BYTE_NO_CHECK(c) \
748	do { \	789	do { \
@@ -1575,8 +1616,7 @@ detect_coding_utf_16 (coding, detect_info)
1575	return 0;	1616	return 0;
1576	}	1617	}
1577		1618
1578	ONE_MORE_BYTE (c1);	1619	TWO_MORE_BYTES (c1, c2);
1579	ONE_MORE_BYTE (c2);
1580	if ((c1 == 0xFF) && (c2 == 0xFE))	1620	if ((c1 == 0xFF) && (c2 == 0xFE))
1581	{	1621	{
1582	detect_info->found \|= (CATEGORY_MASK_UTF_16_LE	1622	detect_info->found \|= (CATEGORY_MASK_UTF_16_LE
@@ -1593,6 +1633,11 @@ detect_coding_utf_16 (coding, detect_info)
1593	\| CATEGORY_MASK_UTF_16_BE_NOSIG	1633	\| CATEGORY_MASK_UTF_16_BE_NOSIG
1594	\| CATEGORY_MASK_UTF_16_LE_NOSIG);	1634	\| CATEGORY_MASK_UTF_16_LE_NOSIG);
1595	}	1635	}
		1636	else if (c1 < 0 \|\| c2 < 0)
		1637	{
		1638	detect_info->rejected \|= CATEGORY_MASK_UTF_16;
		1639	return 0;
		1640	}
1596	else	1641	else
1597	{	1642	{
1598	/* We check the dispersion of Eth and Oth bytes where E is even and	1643	/* We check the dispersion of Eth and Oth bytes where E is even and
@@ -1610,8 +1655,9 @@ detect_coding_utf_16 (coding, detect_info)
1610		1655
1611	while (1)	1656	while (1)
1612	{	1657	{
1613	ONE_MORE_BYTE (c1);	1658	TWO_MORE_BYTES (c1, c2);
1614	ONE_MORE_BYTE (c2);	1659	if (c1 < 0 \|\| c2 < 0)
		1660	break;
1615	if (! e[c1])	1661	if (! e[c1])
1616	{	1662	{
1617	e[c1] = 1;	1663	e[c1] = 1;