diff options
| author | Kenichi Handa | 2002-05-21 04:23:15 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2002-05-21 04:23:15 +0000 |
| commit | 781d7a48c41eb1760dfa99993a9b033aecfbee28 (patch) | |
| tree | d87fcb6e1d3f1d3c42c91594421a4f45a4814b70 /src/coding.c | |
| parent | 4f65af013ec650af933680f93313486591aa7dcb (diff) | |
| download | emacs-781d7a48c41eb1760dfa99993a9b033aecfbee28.tar.gz emacs-781d7a48c41eb1760dfa99993a9b033aecfbee28.zip | |
(ONE_MORE_BYTE_NO_CHECK): Increment consumed_chars.
(emacs_mule_char): New arg src. Delete arg `composition'. Caller
changed. Handle 2-byte and 3-byte charsets correctly.
(DECODE_EMACS_MULE_COMPOSITION_RULE_20): Renamed from
DECODE_EMACS_MULE_COMPOSITION_RULE. Caller changed.
(DECODE_EMACS_MULE_COMPOSITION_RULE_21): New macro.
(DECODE_EMACS_MULE_21_COMPOSITION): Call
DECODE_EMACS_MULE_COMPOSITION_RULE_21. Produce correct annotation
sequence.
(decode_coding_emacs_mule): Handle composition correctly. Rewind
`src' and `consumed_chars' correctly before calling
emacs_mule_char.
(DECODE_COMPOSITION_START): Correctly handle the case of altchar
and alt&rule composition.
(decode_coding_iso_2022): Handle composition correctly.
(init_coding_once): Setup emacs_mule_bytes for private charsets.
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 125 |
1 files changed, 76 insertions, 49 deletions
diff --git a/src/coding.c b/src/coding.c index 11ccc61984e..7bfefef88dd 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -764,6 +764,7 @@ static int detected_mask[coding_category_raw_text] = | |||
| 764 | error ("Undecodable char found"); \ | 764 | error ("Undecodable char found"); \ |
| 765 | c = ((c & 1) << 6) | *src++; \ | 765 | c = ((c & 1) << 6) | *src++; \ |
| 766 | } \ | 766 | } \ |
| 767 | consumed_chars++; \ | ||
| 767 | } while (0) | 768 | } while (0) |
| 768 | 769 | ||
| 769 | 770 | ||
| @@ -1523,12 +1524,12 @@ char emacs_mule_bytes[256]; | |||
| 1523 | 1524 | ||
| 1524 | 1525 | ||
| 1525 | int | 1526 | int |
| 1526 | emacs_mule_char (coding, composition, nbytes, nchars) | 1527 | emacs_mule_char (coding, src, nbytes, nchars) |
| 1527 | struct coding_system *coding; | 1528 | struct coding_system *coding; |
| 1529 | unsigned char *src; | ||
| 1528 | int composition; | 1530 | int composition; |
| 1529 | int *nbytes, *nchars; | 1531 | int *nbytes, *nchars; |
| 1530 | { | 1532 | { |
| 1531 | unsigned char *src = coding->source + coding->consumed; | ||
| 1532 | unsigned char *src_end = coding->source + coding->src_bytes; | 1533 | unsigned char *src_end = coding->source + coding->src_bytes; |
| 1533 | int multibytep = coding->src_multibyte; | 1534 | int multibytep = coding->src_multibyte; |
| 1534 | unsigned char *src_base = src; | 1535 | unsigned char *src_base = src; |
| @@ -1538,20 +1539,6 @@ emacs_mule_char (coding, composition, nbytes, nchars) | |||
| 1538 | int consumed_chars = 0; | 1539 | int consumed_chars = 0; |
| 1539 | 1540 | ||
| 1540 | ONE_MORE_BYTE (c); | 1541 | ONE_MORE_BYTE (c); |
| 1541 | if (composition) | ||
| 1542 | { | ||
| 1543 | c -= 0x20; | ||
| 1544 | if (c == 0x80) | ||
| 1545 | { | ||
| 1546 | ONE_MORE_BYTE (c); | ||
| 1547 | if (c < 0xA0) | ||
| 1548 | goto invalid_code; | ||
| 1549 | *nbytes = src - src_base; | ||
| 1550 | *nchars = consumed_chars; | ||
| 1551 | return (c - 0x80); | ||
| 1552 | } | ||
| 1553 | } | ||
| 1554 | |||
| 1555 | switch (emacs_mule_bytes[c]) | 1542 | switch (emacs_mule_bytes[c]) |
| 1556 | { | 1543 | { |
| 1557 | case 2: | 1544 | case 2: |
| @@ -1576,17 +1563,18 @@ emacs_mule_char (coding, composition, nbytes, nchars) | |||
| 1576 | if (! (charset = emacs_mule_charset[c])) | 1563 | if (! (charset = emacs_mule_charset[c])) |
| 1577 | goto invalid_code; | 1564 | goto invalid_code; |
| 1578 | ONE_MORE_BYTE (c); | 1565 | ONE_MORE_BYTE (c); |
| 1579 | code = (c & 0x7F) << 7; | 1566 | code = (c & 0x7F) << 8; |
| 1580 | ONE_MORE_BYTE (c); | 1567 | ONE_MORE_BYTE (c); |
| 1581 | code |= c & 0x7F; | 1568 | code |= c & 0x7F; |
| 1582 | } | 1569 | } |
| 1583 | break; | 1570 | break; |
| 1584 | 1571 | ||
| 1585 | case 4: | 1572 | case 4: |
| 1573 | ONE_MORE_BYTE (c); | ||
| 1586 | if (! (charset = emacs_mule_charset[c])) | 1574 | if (! (charset = emacs_mule_charset[c])) |
| 1587 | goto invalid_code; | 1575 | goto invalid_code; |
| 1588 | ONE_MORE_BYTE (c); | 1576 | ONE_MORE_BYTE (c); |
| 1589 | code = (c & 0x7F) << 7; | 1577 | code = (c & 0x7F) << 8; |
| 1590 | ONE_MORE_BYTE (c); | 1578 | ONE_MORE_BYTE (c); |
| 1591 | code |= c & 0x7F; | 1579 | code |= c & 0x7F; |
| 1592 | break; | 1580 | break; |
| @@ -1709,7 +1697,7 @@ detect_coding_emacs_mule (coding, mask) | |||
| 1709 | \ | 1697 | \ |
| 1710 | if (src == src_end) \ | 1698 | if (src == src_end) \ |
| 1711 | break; \ | 1699 | break; \ |
| 1712 | c = emacs_mule_char (coding, 1, &nbytes, &nchars); \ | 1700 | c = emacs_mule_char (coding, src, &nbytes, &nchars); \ |
| 1713 | if (c < 0) \ | 1701 | if (c < 0) \ |
| 1714 | { \ | 1702 | { \ |
| 1715 | if (c == -2) \ | 1703 | if (c == -2) \ |
| @@ -1724,17 +1712,18 @@ detect_coding_emacs_mule (coding, mask) | |||
| 1724 | 1712 | ||
| 1725 | 1713 | ||
| 1726 | /* Decode a composition rule represented as a component of composition | 1714 | /* Decode a composition rule represented as a component of composition |
| 1727 | sequence of Emacs 20 style at SRC. Set C to the rule. If SRC | 1715 | sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF, |
| 1728 | points an invalid byte sequence, set C to -1. */ | 1716 | and increment BUF. If SRC points an invalid byte sequence, set C |
| 1717 | to -1. */ | ||
| 1729 | 1718 | ||
| 1730 | #define DECODE_EMACS_MULE_COMPOSITION_RULE(buf) \ | 1719 | #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \ |
| 1731 | do { \ | 1720 | do { \ |
| 1732 | int c, gref, nref; \ | 1721 | int c, gref, nref; \ |
| 1733 | \ | 1722 | \ |
| 1734 | if (src < src_end) \ | 1723 | if (src >= src_end) \ |
| 1735 | goto invalid_code; \ | 1724 | goto invalid_code; \ |
| 1736 | ONE_MORE_BYTE_NO_CHECK (c); \ | 1725 | ONE_MORE_BYTE_NO_CHECK (c); \ |
| 1737 | c -= 0xA0; \ | 1726 | c -= 0x20; \ |
| 1738 | if (c < 0 || c >= 81) \ | 1727 | if (c < 0 || c >= 81) \ |
| 1739 | goto invalid_code; \ | 1728 | goto invalid_code; \ |
| 1740 | \ | 1729 | \ |
| @@ -1743,6 +1732,28 @@ detect_coding_emacs_mule (coding, mask) | |||
| 1743 | } while (0) | 1732 | } while (0) |
| 1744 | 1733 | ||
| 1745 | 1734 | ||
| 1735 | /* Decode a composition rule represented as a component of composition | ||
| 1736 | sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF, | ||
| 1737 | and increment BUF. If SRC points an invalid byte sequence, set C | ||
| 1738 | to -1. */ | ||
| 1739 | |||
| 1740 | #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \ | ||
| 1741 | do { \ | ||
| 1742 | int gref, nref; \ | ||
| 1743 | \ | ||
| 1744 | if (src + 1>= src_end) \ | ||
| 1745 | goto invalid_code; \ | ||
| 1746 | ONE_MORE_BYTE_NO_CHECK (gref); \ | ||
| 1747 | gref -= 0x20; \ | ||
| 1748 | ONE_MORE_BYTE_NO_CHECK (nref); \ | ||
| 1749 | nref -= 0x20; \ | ||
| 1750 | if (gref < 0 || gref >= 81 \ | ||
| 1751 | || nref < 0 || nref >= 81) \ | ||
| 1752 | goto invalid_code; \ | ||
| 1753 | *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \ | ||
| 1754 | } while (0) | ||
| 1755 | |||
| 1756 | |||
| 1746 | #define ADD_COMPOSITION_DATA(buf, method, nchars) \ | 1757 | #define ADD_COMPOSITION_DATA(buf, method, nchars) \ |
| 1747 | do { \ | 1758 | do { \ |
| 1748 | *buf++ = -5; \ | 1759 | *buf++ = -5; \ |
| @@ -1756,10 +1767,11 @@ detect_coding_emacs_mule (coding, mask) | |||
| 1756 | #define DECODE_EMACS_MULE_21_COMPOSITION(c) \ | 1767 | #define DECODE_EMACS_MULE_21_COMPOSITION(c) \ |
| 1757 | do { \ | 1768 | do { \ |
| 1758 | /* Emacs 21 style format. The first three bytes at SRC are \ | 1769 | /* Emacs 21 style format. The first three bytes at SRC are \ |
| 1759 | (METHOD - 0xF0), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \ | 1770 | (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \ |
| 1760 | the byte length of this composition information, CHARS is the \ | 1771 | the byte length of this composition information, CHARS is the \ |
| 1761 | number of characters composed by this composition. */ \ | 1772 | number of characters composed by this composition. */ \ |
| 1762 | enum composition_method method = c - 0xF0; \ | 1773 | enum composition_method method = c - 0xF2; \ |
| 1774 | int *charbuf_base = charbuf; \ | ||
| 1763 | int consumed_chars_limit; \ | 1775 | int consumed_chars_limit; \ |
| 1764 | int nbytes, nchars; \ | 1776 | int nbytes, nchars; \ |
| 1765 | \ | 1777 | \ |
| @@ -1777,12 +1789,14 @@ detect_coding_emacs_mule (coding, mask) | |||
| 1777 | while (consumed_chars < consumed_chars_limit) \ | 1789 | while (consumed_chars < consumed_chars_limit) \ |
| 1778 | { \ | 1790 | { \ |
| 1779 | if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \ | 1791 | if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \ |
| 1780 | DECODE_EMACS_MULE_COMPOSITION_RULE (charbuf); \ | 1792 | DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \ |
| 1781 | else \ | 1793 | else \ |
| 1782 | DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \ | 1794 | DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \ |
| 1795 | i++; \ | ||
| 1783 | } \ | 1796 | } \ |
| 1784 | if (consumed_chars < consumed_chars_limit) \ | 1797 | if (consumed_chars < consumed_chars_limit) \ |
| 1785 | goto invalid_code; \ | 1798 | goto invalid_code; \ |
| 1799 | charbuf_base[0] -= i; \ | ||
| 1786 | } \ | 1800 | } \ |
| 1787 | } while (0) | 1801 | } while (0) |
| 1788 | 1802 | ||
| @@ -1818,7 +1832,7 @@ detect_coding_emacs_mule (coding, mask) | |||
| 1818 | DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ | 1832 | DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ |
| 1819 | for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \ | 1833 | for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \ |
| 1820 | { \ | 1834 | { \ |
| 1821 | DECODE_EMACS_MULE_COMPOSITION_RULE (buf); \ | 1835 | DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \ |
| 1822 | DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ | 1836 | DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ |
| 1823 | } \ | 1837 | } \ |
| 1824 | if (i < 1 || (buf - components) % 2 == 0) \ | 1838 | if (i < 1 || (buf - components) % 2 == 0) \ |
| @@ -1883,8 +1897,8 @@ decode_coding_emacs_mule (coding) | |||
| 1883 | if (charbuf + 5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 > charbuf_end) | 1897 | if (charbuf + 5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 > charbuf_end) |
| 1884 | break; | 1898 | break; |
| 1885 | ONE_MORE_BYTE (c); | 1899 | ONE_MORE_BYTE (c); |
| 1886 | if (c - 0xF0 >= COMPOSITION_RELATIVE | 1900 | if (c - 0xF2 >= COMPOSITION_RELATIVE |
| 1887 | && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS) | 1901 | && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS) |
| 1888 | DECODE_EMACS_MULE_21_COMPOSITION (c); | 1902 | DECODE_EMACS_MULE_21_COMPOSITION (c); |
| 1889 | else if (c < 0xC0) | 1903 | else if (c < 0xC0) |
| 1890 | DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c); | 1904 | DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c); |
| @@ -1892,12 +1906,14 @@ decode_coding_emacs_mule (coding) | |||
| 1892 | DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c); | 1906 | DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c); |
| 1893 | else | 1907 | else |
| 1894 | goto invalid_code; | 1908 | goto invalid_code; |
| 1909 | coding->annotated = 1; | ||
| 1895 | } | 1910 | } |
| 1896 | else if (c < 0xA0 && emacs_mule_bytes[c] > 1) | 1911 | else if (c < 0xA0 && emacs_mule_bytes[c] > 1) |
| 1897 | { | 1912 | { |
| 1898 | int nbytes, nchars; | 1913 | int nbytes, nchars; |
| 1899 | src--; | 1914 | src = src_base; |
| 1900 | c = emacs_mule_char (coding, 0, &nbytes, &nchars); | 1915 | consumed_chars = consumed_chars_base; |
| 1916 | c = emacs_mule_char (coding, src, &nbytes, &nchars); | ||
| 1901 | if (c < 0) | 1917 | if (c < 0) |
| 1902 | { | 1918 | { |
| 1903 | if (c == -2) | 1919 | if (c == -2) |
| @@ -1905,6 +1921,8 @@ decode_coding_emacs_mule (coding) | |||
| 1905 | goto invalid_code; | 1921 | goto invalid_code; |
| 1906 | } | 1922 | } |
| 1907 | *charbuf++ = c; | 1923 | *charbuf++ = c; |
| 1924 | src += nbytes; | ||
| 1925 | consumed_chars += nchars; | ||
| 1908 | char_offset++; | 1926 | char_offset++; |
| 1909 | } | 1927 | } |
| 1910 | continue; | 1928 | continue; |
| @@ -2572,7 +2590,7 @@ detect_coding_iso_2022 (coding, mask) | |||
| 2572 | #define DECODE_COMPOSITION_START(c1) \ | 2590 | #define DECODE_COMPOSITION_START(c1) \ |
| 2573 | do { \ | 2591 | do { \ |
| 2574 | if (c1 == '0' \ | 2592 | if (c1 == '0' \ |
| 2575 | && composition_state == COMPOSING_COMPONENT_CHAR) \ | 2593 | && composition_state == COMPOSING_COMPONENT_RULE) \ |
| 2576 | { \ | 2594 | { \ |
| 2577 | component_len = component_idx; \ | 2595 | component_len = component_idx; \ |
| 2578 | composition_state = COMPOSING_CHAR; \ | 2596 | composition_state = COMPOSING_CHAR; \ |
| @@ -2725,27 +2743,26 @@ decode_coding_iso_2022 (coding) | |||
| 2725 | composition_state--; | 2743 | composition_state--; |
| 2726 | continue; | 2744 | continue; |
| 2727 | } | 2745 | } |
| 2728 | else if (method == COMPOSITION_WITH_RULE) | ||
| 2729 | composition_state = COMPOSING_RULE; | ||
| 2730 | else if (method == COMPOSITION_WITH_RULE_ALTCHARS | ||
| 2731 | && composition_state == COMPOSING_COMPONENT_CHAR) | ||
| 2732 | composition_state = COMPOSING_COMPONENT_CHAR; | ||
| 2733 | } | 2746 | } |
| 2734 | if (charset_id_0 < 0 | 2747 | if (charset_id_0 < 0 |
| 2735 | || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0))) | 2748 | || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0))) |
| 2736 | { | 2749 | /* This is SPACE or DEL. */ |
| 2737 | /* This is SPACE or DEL. */ | 2750 | charset = CHARSET_FROM_ID (charset_ascii); |
| 2738 | charset = CHARSET_FROM_ID (charset_ascii); | 2751 | else |
| 2739 | break; | 2752 | charset = CHARSET_FROM_ID (charset_id_0); |
| 2740 | } | 2753 | break; |
| 2741 | /* This is a graphic character, we fall down ... */ | ||
| 2742 | 2754 | ||
| 2743 | case ISO_graphic_plane_0: | 2755 | case ISO_graphic_plane_0: |
| 2744 | if (composition_state == COMPOSING_RULE) | 2756 | if (composition_state != COMPOSING_NO) |
| 2745 | { | 2757 | { |
| 2746 | DECODE_COMPOSITION_RULE (c1); | 2758 | if (composition_state == COMPOSING_RULE |
| 2747 | components[component_idx++] = c1; | 2759 | || composition_state == COMPOSING_COMPONENT_RULE) |
| 2748 | composition_state = COMPOSING_CHAR; | 2760 | { |
| 2761 | DECODE_COMPOSITION_RULE (c1); | ||
| 2762 | components[component_idx++] = c1; | ||
| 2763 | composition_state--; | ||
| 2764 | continue; | ||
| 2765 | } | ||
| 2749 | } | 2766 | } |
| 2750 | charset = CHARSET_FROM_ID (charset_id_0); | 2767 | charset = CHARSET_FROM_ID (charset_id_0); |
| 2751 | break; | 2768 | break; |
| @@ -3009,7 +3026,13 @@ decode_coding_iso_2022 (coding) | |||
| 3009 | char_offset++; | 3026 | char_offset++; |
| 3010 | } | 3027 | } |
| 3011 | else | 3028 | else |
| 3012 | components[component_idx++] = c; | 3029 | { |
| 3030 | components[component_idx++] = c; | ||
| 3031 | if (method == COMPOSITION_WITH_RULE | ||
| 3032 | || (method == COMPOSITION_WITH_RULE_ALTCHARS | ||
| 3033 | && composition_state == COMPOSING_COMPONENT_CHAR)) | ||
| 3034 | composition_state++; | ||
| 3035 | } | ||
| 3013 | continue; | 3036 | continue; |
| 3014 | 3037 | ||
| 3015 | invalid_code: | 3038 | invalid_code: |
| @@ -7785,6 +7808,10 @@ init_coding_once () | |||
| 7785 | { | 7808 | { |
| 7786 | emacs_mule_bytes[i] = 1; | 7809 | emacs_mule_bytes[i] = 1; |
| 7787 | } | 7810 | } |
| 7811 | emacs_mule_bytes[LEADING_CODE_PRIVATE_11] = 3; | ||
| 7812 | emacs_mule_bytes[LEADING_CODE_PRIVATE_12] = 3; | ||
| 7813 | emacs_mule_bytes[LEADING_CODE_PRIVATE_21] = 4; | ||
| 7814 | emacs_mule_bytes[LEADING_CODE_PRIVATE_22] = 4; | ||
| 7788 | } | 7815 | } |
| 7789 | 7816 | ||
| 7790 | #ifdef emacs | 7817 | #ifdef emacs |