aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorKenichi Handa2002-05-21 04:23:15 +0000
committerKenichi Handa2002-05-21 04:23:15 +0000
commit781d7a48c41eb1760dfa99993a9b033aecfbee28 (patch)
treed87fcb6e1d3f1d3c42c91594421a4f45a4814b70 /src
parent4f65af013ec650af933680f93313486591aa7dcb (diff)
downloademacs-781d7a48c41eb1760dfa99993a9b033aecfbee28.tar.gz
emacs-781d7a48c41eb1760dfa99993a9b033aecfbee28.zip
(ONE_MORE_BYTE_NO_CHECK): Increment consumed_chars.
(emacs_mule_char): New arg src. Delete arg `composition'. Caller changed. Handle 2-byte and 3-byte charsets correctly. (DECODE_EMACS_MULE_COMPOSITION_RULE_20): Renamed from DECODE_EMACS_MULE_COMPOSITION_RULE. Caller changed. (DECODE_EMACS_MULE_COMPOSITION_RULE_21): New macro. (DECODE_EMACS_MULE_21_COMPOSITION): Call DECODE_EMACS_MULE_COMPOSITION_RULE_21. Produce correct annotation sequence. (decode_coding_emacs_mule): Handle composition correctly. Rewind `src' and `consumed_chars' correctly before calling emacs_mule_char. (DECODE_COMPOSITION_START): Correctly handle the case of altchar and alt&rule composition. (decode_coding_iso_2022): Handle composition correctly. (init_coding_once): Setup emacs_mule_bytes for private charsets.
Diffstat (limited to 'src')
-rw-r--r--src/ChangeLog23
-rw-r--r--src/coding.c125
2 files changed, 99 insertions, 49 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index a4820577e1a..d81f9882fba 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,26 @@
12002-05-21 Kenichi Handa <handa@etl.go.jp>
2
3 * coding.c (ONE_MORE_BYTE_NO_CHECK): Increment consumed_chars.
4 (emacs_mule_char): New arg src. Delete arg `composition'. Caller
5 changed. Handle 2-byte and 3-byte charsets correctly.
6 (DECODE_EMACS_MULE_COMPOSITION_RULE_20): Renamed from
7 DECODE_EMACS_MULE_COMPOSITION_RULE. Caller changed.
8 (DECODE_EMACS_MULE_COMPOSITION_RULE_21): New macro.
9 (DECODE_EMACS_MULE_21_COMPOSITION): Call
10 DECODE_EMACS_MULE_COMPOSITION_RULE_21. Produce correct annotation
11 sequence.
12 (decode_coding_emacs_mule): Handle composition correctly. Rewind
13 `src' and `consumed_chars' correctly before calling
14 emacs_mule_char.
15 (DECODE_COMPOSITION_START): Correctly handle the case of altchar
16 and alt&rule composition.
17 (decode_coding_iso_2022): Handle composition correctly.
18 (init_coding_once): Setup emacs_mule_bytes for private charsets.
19
20 * charset.c (Fdefine_charset_internal): Fix bug for the case of
21 re-defining a charset. If the charset has :emacs-mule-id, setup
22 emacs_mule_bytes.
23
12002-05-20 Kenichi Handa <handa@etl.go.jp> 242002-05-20 Kenichi Handa <handa@etl.go.jp>
2 25
3 * coding.c (encode_coding_iso_2022): If coding requires safe 26 * coding.c (encode_coding_iso_2022): If coding requires safe
diff --git a/src/coding.c b/src/coding.c
index 11ccc61984e..7bfefef88dd 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -764,6 +764,7 @@ static int detected_mask[coding_category_raw_text] =
764 error ("Undecodable char found"); \ 764 error ("Undecodable char found"); \
765 c = ((c & 1) << 6) | *src++; \ 765 c = ((c & 1) << 6) | *src++; \
766 } \ 766 } \
767 consumed_chars++; \
767 } while (0) 768 } while (0)
768 769
769 770
@@ -1523,12 +1524,12 @@ char emacs_mule_bytes[256];
1523 1524
1524 1525
1525int 1526int
1526emacs_mule_char (coding, composition, nbytes, nchars) 1527emacs_mule_char (coding, src, nbytes, nchars)
1527 struct coding_system *coding; 1528 struct coding_system *coding;
1529 unsigned char *src;
1528 int composition; 1530 int composition;
1529 int *nbytes, *nchars; 1531 int *nbytes, *nchars;
1530{ 1532{
1531 unsigned char *src = coding->source + coding->consumed;
1532 unsigned char *src_end = coding->source + coding->src_bytes; 1533 unsigned char *src_end = coding->source + coding->src_bytes;
1533 int multibytep = coding->src_multibyte; 1534 int multibytep = coding->src_multibyte;
1534 unsigned char *src_base = src; 1535 unsigned char *src_base = src;
@@ -1538,20 +1539,6 @@ emacs_mule_char (coding, composition, nbytes, nchars)
1538 int consumed_chars = 0; 1539 int consumed_chars = 0;
1539 1540
1540 ONE_MORE_BYTE (c); 1541 ONE_MORE_BYTE (c);
1541 if (composition)
1542 {
1543 c -= 0x20;
1544 if (c == 0x80)
1545 {
1546 ONE_MORE_BYTE (c);
1547 if (c < 0xA0)
1548 goto invalid_code;
1549 *nbytes = src - src_base;
1550 *nchars = consumed_chars;
1551 return (c - 0x80);
1552 }
1553 }
1554
1555 switch (emacs_mule_bytes[c]) 1542 switch (emacs_mule_bytes[c])
1556 { 1543 {
1557 case 2: 1544 case 2:
@@ -1576,17 +1563,18 @@ emacs_mule_char (coding, composition, nbytes, nchars)
1576 if (! (charset = emacs_mule_charset[c])) 1563 if (! (charset = emacs_mule_charset[c]))
1577 goto invalid_code; 1564 goto invalid_code;
1578 ONE_MORE_BYTE (c); 1565 ONE_MORE_BYTE (c);
1579 code = (c & 0x7F) << 7; 1566 code = (c & 0x7F) << 8;
1580 ONE_MORE_BYTE (c); 1567 ONE_MORE_BYTE (c);
1581 code |= c & 0x7F; 1568 code |= c & 0x7F;
1582 } 1569 }
1583 break; 1570 break;
1584 1571
1585 case 4: 1572 case 4:
1573 ONE_MORE_BYTE (c);
1586 if (! (charset = emacs_mule_charset[c])) 1574 if (! (charset = emacs_mule_charset[c]))
1587 goto invalid_code; 1575 goto invalid_code;
1588 ONE_MORE_BYTE (c); 1576 ONE_MORE_BYTE (c);
1589 code = (c & 0x7F) << 7; 1577 code = (c & 0x7F) << 8;
1590 ONE_MORE_BYTE (c); 1578 ONE_MORE_BYTE (c);
1591 code |= c & 0x7F; 1579 code |= c & 0x7F;
1592 break; 1580 break;
@@ -1709,7 +1697,7 @@ detect_coding_emacs_mule (coding, mask)
1709 \ 1697 \
1710 if (src == src_end) \ 1698 if (src == src_end) \
1711 break; \ 1699 break; \
1712 c = emacs_mule_char (coding, 1, &nbytes, &nchars); \ 1700 c = emacs_mule_char (coding, src, &nbytes, &nchars); \
1713 if (c < 0) \ 1701 if (c < 0) \
1714 { \ 1702 { \
1715 if (c == -2) \ 1703 if (c == -2) \
@@ -1724,17 +1712,18 @@ detect_coding_emacs_mule (coding, mask)
1724 1712
1725 1713
1726/* Decode a composition rule represented as a component of composition 1714/* Decode a composition rule represented as a component of composition
1727 sequence of Emacs 20 style at SRC. Set C to the rule. If SRC 1715 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF,
1728 points an invalid byte sequence, set C to -1. */ 1716 and increment BUF. If SRC points an invalid byte sequence, set C
1717 to -1. */
1729 1718
1730#define DECODE_EMACS_MULE_COMPOSITION_RULE(buf) \ 1719#define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \
1731 do { \ 1720 do { \
1732 int c, gref, nref; \ 1721 int c, gref, nref; \
1733 \ 1722 \
1734 if (src < src_end) \ 1723 if (src >= src_end) \
1735 goto invalid_code; \ 1724 goto invalid_code; \
1736 ONE_MORE_BYTE_NO_CHECK (c); \ 1725 ONE_MORE_BYTE_NO_CHECK (c); \
1737 c -= 0xA0; \ 1726 c -= 0x20; \
1738 if (c < 0 || c >= 81) \ 1727 if (c < 0 || c >= 81) \
1739 goto invalid_code; \ 1728 goto invalid_code; \
1740 \ 1729 \
@@ -1743,6 +1732,28 @@ detect_coding_emacs_mule (coding, mask)
1743 } while (0) 1732 } while (0)
1744 1733
1745 1734
1735/* Decode a composition rule represented as a component of composition
1736 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF,
1737 and increment BUF. If SRC points an invalid byte sequence, set C
1738 to -1. */
1739
1740#define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
1741 do { \
1742 int gref, nref; \
1743 \
1744 if (src + 1>= src_end) \
1745 goto invalid_code; \
1746 ONE_MORE_BYTE_NO_CHECK (gref); \
1747 gref -= 0x20; \
1748 ONE_MORE_BYTE_NO_CHECK (nref); \
1749 nref -= 0x20; \
1750 if (gref < 0 || gref >= 81 \
1751 || nref < 0 || nref >= 81) \
1752 goto invalid_code; \
1753 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \
1754 } while (0)
1755
1756
1746#define ADD_COMPOSITION_DATA(buf, method, nchars) \ 1757#define ADD_COMPOSITION_DATA(buf, method, nchars) \
1747 do { \ 1758 do { \
1748 *buf++ = -5; \ 1759 *buf++ = -5; \
@@ -1756,10 +1767,11 @@ detect_coding_emacs_mule (coding, mask)
1756#define DECODE_EMACS_MULE_21_COMPOSITION(c) \ 1767#define DECODE_EMACS_MULE_21_COMPOSITION(c) \
1757 do { \ 1768 do { \
1758 /* Emacs 21 style format. The first three bytes at SRC are \ 1769 /* Emacs 21 style format. The first three bytes at SRC are \
1759 (METHOD - 0xF0), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \ 1770 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
1760 the byte length of this composition information, CHARS is the \ 1771 the byte length of this composition information, CHARS is the \
1761 number of characters composed by this composition. */ \ 1772 number of characters composed by this composition. */ \
1762 enum composition_method method = c - 0xF0; \ 1773 enum composition_method method = c - 0xF2; \
1774 int *charbuf_base = charbuf; \
1763 int consumed_chars_limit; \ 1775 int consumed_chars_limit; \
1764 int nbytes, nchars; \ 1776 int nbytes, nchars; \
1765 \ 1777 \
@@ -1777,12 +1789,14 @@ detect_coding_emacs_mule (coding, mask)
1777 while (consumed_chars < consumed_chars_limit) \ 1789 while (consumed_chars < consumed_chars_limit) \
1778 { \ 1790 { \
1779 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \ 1791 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \
1780 DECODE_EMACS_MULE_COMPOSITION_RULE (charbuf); \ 1792 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \
1781 else \ 1793 else \
1782 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \ 1794 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
1795 i++; \
1783 } \ 1796 } \
1784 if (consumed_chars < consumed_chars_limit) \ 1797 if (consumed_chars < consumed_chars_limit) \
1785 goto invalid_code; \ 1798 goto invalid_code; \
1799 charbuf_base[0] -= i; \
1786 } \ 1800 } \
1787 } while (0) 1801 } while (0)
1788 1802
@@ -1818,7 +1832,7 @@ detect_coding_emacs_mule (coding, mask)
1818 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ 1832 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1819 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \ 1833 for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \
1820 { \ 1834 { \
1821 DECODE_EMACS_MULE_COMPOSITION_RULE (buf); \ 1835 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \
1822 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ 1836 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
1823 } \ 1837 } \
1824 if (i < 1 || (buf - components) % 2 == 0) \ 1838 if (i < 1 || (buf - components) % 2 == 0) \
@@ -1883,8 +1897,8 @@ decode_coding_emacs_mule (coding)
1883 if (charbuf + 5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 > charbuf_end) 1897 if (charbuf + 5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 > charbuf_end)
1884 break; 1898 break;
1885 ONE_MORE_BYTE (c); 1899 ONE_MORE_BYTE (c);
1886 if (c - 0xF0 >= COMPOSITION_RELATIVE 1900 if (c - 0xF2 >= COMPOSITION_RELATIVE
1887 && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS) 1901 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
1888 DECODE_EMACS_MULE_21_COMPOSITION (c); 1902 DECODE_EMACS_MULE_21_COMPOSITION (c);
1889 else if (c < 0xC0) 1903 else if (c < 0xC0)
1890 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c); 1904 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
@@ -1892,12 +1906,14 @@ decode_coding_emacs_mule (coding)
1892 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c); 1906 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
1893 else 1907 else
1894 goto invalid_code; 1908 goto invalid_code;
1909 coding->annotated = 1;
1895 } 1910 }
1896 else if (c < 0xA0 && emacs_mule_bytes[c] > 1) 1911 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
1897 { 1912 {
1898 int nbytes, nchars; 1913 int nbytes, nchars;
1899 src--; 1914 src = src_base;
1900 c = emacs_mule_char (coding, 0, &nbytes, &nchars); 1915 consumed_chars = consumed_chars_base;
1916 c = emacs_mule_char (coding, src, &nbytes, &nchars);
1901 if (c < 0) 1917 if (c < 0)
1902 { 1918 {
1903 if (c == -2) 1919 if (c == -2)
@@ -1905,6 +1921,8 @@ decode_coding_emacs_mule (coding)
1905 goto invalid_code; 1921 goto invalid_code;
1906 } 1922 }
1907 *charbuf++ = c; 1923 *charbuf++ = c;
1924 src += nbytes;
1925 consumed_chars += nchars;
1908 char_offset++; 1926 char_offset++;
1909 } 1927 }
1910 continue; 1928 continue;
@@ -2572,7 +2590,7 @@ detect_coding_iso_2022 (coding, mask)
2572#define DECODE_COMPOSITION_START(c1) \ 2590#define DECODE_COMPOSITION_START(c1) \
2573 do { \ 2591 do { \
2574 if (c1 == '0' \ 2592 if (c1 == '0' \
2575 && composition_state == COMPOSING_COMPONENT_CHAR) \ 2593 && composition_state == COMPOSING_COMPONENT_RULE) \
2576 { \ 2594 { \
2577 component_len = component_idx; \ 2595 component_len = component_idx; \
2578 composition_state = COMPOSING_CHAR; \ 2596 composition_state = COMPOSING_CHAR; \
@@ -2725,27 +2743,26 @@ decode_coding_iso_2022 (coding)
2725 composition_state--; 2743 composition_state--;
2726 continue; 2744 continue;
2727 } 2745 }
2728 else if (method == COMPOSITION_WITH_RULE)
2729 composition_state = COMPOSING_RULE;
2730 else if (method == COMPOSITION_WITH_RULE_ALTCHARS
2731 && composition_state == COMPOSING_COMPONENT_CHAR)
2732 composition_state = COMPOSING_COMPONENT_CHAR;
2733 } 2746 }
2734 if (charset_id_0 < 0 2747 if (charset_id_0 < 0
2735 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0))) 2748 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
2736 { 2749 /* This is SPACE or DEL. */
2737 /* This is SPACE or DEL. */ 2750 charset = CHARSET_FROM_ID (charset_ascii);
2738 charset = CHARSET_FROM_ID (charset_ascii); 2751 else
2739 break; 2752 charset = CHARSET_FROM_ID (charset_id_0);
2740 } 2753 break;
2741 /* This is a graphic character, we fall down ... */
2742 2754
2743 case ISO_graphic_plane_0: 2755 case ISO_graphic_plane_0:
2744 if (composition_state == COMPOSING_RULE) 2756 if (composition_state != COMPOSING_NO)
2745 { 2757 {
2746 DECODE_COMPOSITION_RULE (c1); 2758 if (composition_state == COMPOSING_RULE
2747 components[component_idx++] = c1; 2759 || composition_state == COMPOSING_COMPONENT_RULE)
2748 composition_state = COMPOSING_CHAR; 2760 {
2761 DECODE_COMPOSITION_RULE (c1);
2762 components[component_idx++] = c1;
2763 composition_state--;
2764 continue;
2765 }
2749 } 2766 }
2750 charset = CHARSET_FROM_ID (charset_id_0); 2767 charset = CHARSET_FROM_ID (charset_id_0);
2751 break; 2768 break;
@@ -3009,7 +3026,13 @@ decode_coding_iso_2022 (coding)
3009 char_offset++; 3026 char_offset++;
3010 } 3027 }
3011 else 3028 else
3012 components[component_idx++] = c; 3029 {
3030 components[component_idx++] = c;
3031 if (method == COMPOSITION_WITH_RULE
3032 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3033 && composition_state == COMPOSING_COMPONENT_CHAR))
3034 composition_state++;
3035 }
3013 continue; 3036 continue;
3014 3037
3015 invalid_code: 3038 invalid_code:
@@ -7785,6 +7808,10 @@ init_coding_once ()
7785 { 7808 {
7786 emacs_mule_bytes[i] = 1; 7809 emacs_mule_bytes[i] = 1;
7787 } 7810 }
7811 emacs_mule_bytes[LEADING_CODE_PRIVATE_11] = 3;
7812 emacs_mule_bytes[LEADING_CODE_PRIVATE_12] = 3;
7813 emacs_mule_bytes[LEADING_CODE_PRIVATE_21] = 4;
7814 emacs_mule_bytes[LEADING_CODE_PRIVATE_22] = 4;
7788} 7815}
7789 7816
7790#ifdef emacs 7817#ifdef emacs