diff options
| author | Kenichi Handa | 2009-01-14 12:19:44 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2009-01-14 12:19:44 +0000 |
| commit | f56a4450912fa06401b13e6631313fe17bed006f (patch) | |
| tree | 00e36c6a98a8b740184d7535844f9eda51eb1d29 /src/coding.c | |
| parent | 97d42150b4f0233e98f516e69f4978b2e6eebe59 (diff) | |
| download | emacs-f56a4450912fa06401b13e6631313fe17bed006f.tar.gz emacs-f56a4450912fa06401b13e6631313fe17bed006f.zip | |
(TWO_MORE_BYTES): New macro.
(detect_coding_utf_16): Use TWO_MORE_BYTES instead of
ONE_MORE_BYTE.
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 54 |
1 files changed, 50 insertions, 4 deletions
diff --git a/src/coding.c b/src/coding.c index 01878a37b5c..9a94bc6fb2a 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -743,6 +743,47 @@ static struct coding_system coding_categories[coding_category_max]; | |||
| 743 | consumed_chars++; \ | 743 | consumed_chars++; \ |
| 744 | } while (0) | 744 | } while (0) |
| 745 | 745 | ||
| 746 | /* Safely get two bytes from the source text pointed by SRC which ends | ||
| 747 | at SRC_END, and set C1 and C2 to those bytes. If there are not | ||
| 748 | enough bytes in the source for C1, it jumps to `no_more_source'. | ||
| 749 | If there are not enough bytes in the source for C2, set C2 to -1. | ||
| 750 | If multibytep is nonzero and a multibyte character is found at SRC, | ||
| 751 | set C1 and/or C2 to the negative value of the character code. The | ||
| 752 | caller should declare and set these variables appropriately in | ||
| 753 | advance: | ||
| 754 | src, src_end, multibytep | ||
| 755 | It is intended that this macro is used in detect_coding_utf_16. */ | ||
| 756 | |||
| 757 | #define TWO_MORE_BYTES(c1, c2) \ | ||
| 758 | do { \ | ||
| 759 | if (src == src_end) \ | ||
| 760 | goto no_more_source; \ | ||
| 761 | c1 = *src++; \ | ||
| 762 | if (multibytep && (c1 & 0x80)) \ | ||
| 763 | { \ | ||
| 764 | if ((c1 & 0xFE) == 0xC0) \ | ||
| 765 | c1 = ((c1 & 1) << 6) | *src++; \ | ||
| 766 | else \ | ||
| 767 | { \ | ||
| 768 | c1 = c2 = -1; \ | ||
| 769 | break; \ | ||
| 770 | } \ | ||
| 771 | } \ | ||
| 772 | if (src == src_end) \ | ||
| 773 | c2 = -1; \ | ||
| 774 | else \ | ||
| 775 | { \ | ||
| 776 | c2 = *src++; \ | ||
| 777 | if (multibytep && (c2 & 0x80)) \ | ||
| 778 | { \ | ||
| 779 | if ((c2 & 0xFE) == 0xC0) \ | ||
| 780 | c2 = ((c2 & 1) << 6) | *src++; \ | ||
| 781 | else \ | ||
| 782 | c2 = -1; \ | ||
| 783 | } \ | ||
| 784 | } \ | ||
| 785 | } while (0) | ||
| 786 | |||
| 746 | 787 | ||
| 747 | #define ONE_MORE_BYTE_NO_CHECK(c) \ | 788 | #define ONE_MORE_BYTE_NO_CHECK(c) \ |
| 748 | do { \ | 789 | do { \ |
| @@ -1575,8 +1616,7 @@ detect_coding_utf_16 (coding, detect_info) | |||
| 1575 | return 0; | 1616 | return 0; |
| 1576 | } | 1617 | } |
| 1577 | 1618 | ||
| 1578 | ONE_MORE_BYTE (c1); | 1619 | TWO_MORE_BYTES (c1, c2); |
| 1579 | ONE_MORE_BYTE (c2); | ||
| 1580 | if ((c1 == 0xFF) && (c2 == 0xFE)) | 1620 | if ((c1 == 0xFF) && (c2 == 0xFE)) |
| 1581 | { | 1621 | { |
| 1582 | detect_info->found |= (CATEGORY_MASK_UTF_16_LE | 1622 | detect_info->found |= (CATEGORY_MASK_UTF_16_LE |
| @@ -1593,6 +1633,11 @@ detect_coding_utf_16 (coding, detect_info) | |||
| 1593 | | CATEGORY_MASK_UTF_16_BE_NOSIG | 1633 | | CATEGORY_MASK_UTF_16_BE_NOSIG |
| 1594 | | CATEGORY_MASK_UTF_16_LE_NOSIG); | 1634 | | CATEGORY_MASK_UTF_16_LE_NOSIG); |
| 1595 | } | 1635 | } |
| 1636 | else if (c1 < 0 || c2 < 0) | ||
| 1637 | { | ||
| 1638 | detect_info->rejected |= CATEGORY_MASK_UTF_16; | ||
| 1639 | return 0; | ||
| 1640 | } | ||
| 1596 | else | 1641 | else |
| 1597 | { | 1642 | { |
| 1598 | /* We check the dispersion of Eth and Oth bytes where E is even and | 1643 | /* We check the dispersion of Eth and Oth bytes where E is even and |
| @@ -1610,8 +1655,9 @@ detect_coding_utf_16 (coding, detect_info) | |||
| 1610 | 1655 | ||
| 1611 | while (1) | 1656 | while (1) |
| 1612 | { | 1657 | { |
| 1613 | ONE_MORE_BYTE (c1); | 1658 | TWO_MORE_BYTES (c1, c2); |
| 1614 | ONE_MORE_BYTE (c2); | 1659 | if (c1 < 0 || c2 < 0) |
| 1660 | break; | ||
| 1615 | if (! e[c1]) | 1661 | if (! e[c1]) |
| 1616 | { | 1662 | { |
| 1617 | e[c1] = 1; | 1663 | e[c1] = 1; |