aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Love2002-10-17 15:30:28 +0000
committerDave Love2002-10-17 15:30:28 +0000
commit1276c77b44719b10a823d8167ec3472e8666d565 (patch)
treeba44fe4e7146706b7c3e40371ed7b8e9e3da68fc
parent57ef78169b6151cd069bbe170bf5e4dd04f85920 (diff)
downloademacs-1276c77b44719b10a823d8167ec3472e8666d565.tar.gz
emacs-1276c77b44719b10a823d8167ec3472e8666d565.zip
(cp-make-translation-table): Use
ucs-mule-to-mule-unicode. (cp-fix-safe-chars): Fix typo. (non-iso-charset-alist): Don't define. (cp-make-coding-system): Use utf-8-translation-table-for-decode. Define translation-table-for-input. (cp866): Reinstate. (alternativnj): Don't define alias. (koi8-u): Deleted.
-rw-r--r--lisp/international/code-pages.el608
1 files changed, 162 insertions, 446 deletions
diff --git a/lisp/international/code-pages.el b/lisp/international/code-pages.el
index 4a6d471118d..c3ec192d248 100644
--- a/lisp/international/code-pages.el
+++ b/lisp/international/code-pages.el
@@ -1,6 +1,6 @@
1;;; code-pages.el --- coding systems for assorted codepages -*-coding: utf-8;-*- 1;;; code-pages.el --- coding systems for assorted codepages -*-coding: utf-8;-*-
2 2
3;; Copyright (C) 2001 Free Software Foundation, Inc. 3;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
4 4
5;; Author: Dave Love <fx@gnu.org> 5;; Author: Dave Love <fx@gnu.org>
6;; Keywords: i18n 6;; Keywords: i18n
@@ -35,17 +35,16 @@
35;; cp855, cp857, cp860, cp861, cp862, cp863, cp864, cp865, cp866, 35;; cp855, cp857, cp860, cp861, cp862, cp863, cp864, cp865, cp866,
36;; cp869, cp874, cp1125, windows-1250, windows-1251, windows-1252, 36;; cp869, cp874, cp1125, windows-1250, windows-1251, windows-1252,
37;; windows-1253, windows-1254, windows-1255, windows-1256, 37;; windows-1253, windows-1254, windows-1255, windows-1256,
38;; windows-1257, windows-1258, next, koi8-r, koi8-u, iso-8859-6, 38;; windows-1257, windows-1258, next, koi8-u, iso-8859-6,
39;; iso-8859-10, iso-8859-12, iso-8859-16, koi8-t, georgian-ps. This 39;; iso-8859-10, iso-8859-12, iso-8859-16, koi8-t, georgian-ps. This
40;; is meant to include all the single-byte ones relevant to GNU (used 40;; is meant to include all the single-byte ones relevant to GNU (used
41;; in glibc-defined locales); we don't yet get all the multibyte ones 41;; in glibc-defined locales); we don't yet get all the multibyte ones
42;; in base Emacs. 42;; in base Emacs.
43 43
44;; Note that koi8-r and cp866 (alternativnyj) clash with the 44;; Note that various of these can clash with definitions in
45;; iso8859-5-based versions in cyrillic.el (which should be changed), 45;; codepage.el; we try to avoid damage from that. A few CPs from
46;; and others can clash with definitions in codepage.el; we try to 46;; codepage.el (770, 773, 774) aren't covered (in the absence of
47;; avoid damage from that. A few CPs from codepage.el (770, 773, 774) 47;; translation tables to Unicode).
48;; aren't covered (in the absence of translation tables to Unicode).
49 48
50;; Compile this, to avoid loading `ccl' at runtime. 49;; Compile this, to avoid loading `ccl' at runtime.
51 50
@@ -63,7 +62,18 @@ V comprises characters encodable by mule-utf-8."
63 (aset encoding-vector i i)) 62 (aset encoding-vector i i))
64 (dotimes (i 128) 63 (dotimes (i 128)
65 (aset encoding-vector (+ i 128) (aref v i))) 64 (aset encoding-vector (+ i 128) (aref v i)))
66 (make-translation-table-from-vector encoding-vector))) 65 ;; Add equivalent characters to the encoder so that we can unify
66 ;; on encoding.
67 (let* ((tab (make-translation-table-from-vector encoding-vector))
68 ;; Translation table used for encoding:
69 (encode-table (char-table-extra-slot tab 0)))
70 (map-char-table (lambda (c v)
71 (if v
72 (let ((c1 (aref encode-table v)))
73 (if c1 ; we encode that unicode
74 (aset encode-table c c1)))))
75 ucs-mule-to-mule-unicode)
76 tab)))
67 77
68(defun cp-valid-codes (v) 78(defun cp-valid-codes (v)
69 "Derive a valid-codes list for translation vector V. 79 "Derive a valid-codes list for translation vector V.
@@ -94,7 +104,7 @@ CS is a base coding system or alias."
94 (if (and v (not (eq v t))) 104 (if (and v (not (eq v t)))
95 (aset char-coding-system-table 105 (aset char-coding-system-table
96 k 106 k
97 (remq cs (aref char-coding-system-table v))))) 107 (remq cs (aref char-coding-system-table k)))))
98 chars)))) 108 chars))))
99 109
100;; Fix things that have been, or might be done by codepage.el. 110;; Fix things that have been, or might be done by codepage.el.
@@ -140,27 +150,6 @@ read/written by MS-DOS software, or for display on the MS-DOS terminal."
140 cp (cp-charset-for-codepage cp) (cp-offset-for-codepage cp)))))) 150 cp (cp-charset-for-codepage cp) (cp-offset-for-codepage cp))))))
141) ; eval-after-load 151) ; eval-after-load
142 152
143;; Somewhat amended from the version in mule-diag.el, needed below.
144(defvar non-iso-charset-alist
145 `((mac-roman
146 nil
147 mac-roman-decoder
148 ((0 255)))
149 (viscii
150 (ascii vietnamese-viscii-lower vietnamese-viscii-upper)
151 viet-viscii-nonascii-translation-table
152 ((0 255)))
153 (big5
154 (ascii chinese-big5-1 chinese-big5-2)
155 decode-big5-char
156 ((32 127)
157 ((?\xA1 ?\xFE) . (?\x40 ?\x7E ?\xA1 ?\xFE))))
158 (sjis
159 (ascii katakana-jisx0201 japanese-jisx0208)
160 decode-sjis-char
161 ((32 127 ?\xA1 ?\xDF)
162 ((?\x81 ?\x9F ?\xE0 ?\xEF) . (?\x40 ?\x7E ?\x80 ?\xFC))))))
163
164;; Macro to allow ccl compilation at byte-compile time, avoiding 153;; Macro to allow ccl compilation at byte-compile time, avoiding
165;; loading ccl. 154;; loading ccl.
166;;;###autoload 155;;;###autoload
@@ -183,6 +172,9 @@ corresponding args of `make-coding-system'. If MNEMONIC isn't given,
183 (r0 = ,(charset-id 'eight-bit-control)) 172 (r0 = ,(charset-id 'eight-bit-control))
184 (r0 = ,(charset-id 'eight-bit-graphic)))) 173 (r0 = ,(charset-id 'eight-bit-graphic))))
185 (translate-character ,decoder r0 r1) 174 (translate-character ,decoder r0 r1)
175 ;; Allow fragmentation on decoding -- relevant for
176 ;; Cyrillic, Greek and, possibly Arabic and Hebrew.
177 (translate-character utf-8-translation-table-for-decode r0 r1)
186 (write-multibyte-character r0 r1) 178 (write-multibyte-character r0 r1)
187 (repeat)))))) 179 (repeat))))))
188 (ccl-encoder 180 (ccl-encoder
@@ -204,7 +196,11 @@ corresponding args of `make-coding-system'. If MNEMONIC isn't given,
204 (cons ,ccl-decoder ,ccl-encoder) 196 (cons ,ccl-decoder ,ccl-encoder)
205 (list (cons 'safe-chars (get ',encoder 'translation-table)) 197 (list (cons 'safe-chars (get ',encoder 'translation-table))
206 (cons 'valid-codes codes) 198 (cons 'valid-codes codes)
207 (cons 'mime-charset ',name))) 199 (cons 'mime-charset ',name)
200 ;; For Quail translation. Fixme: this should really be
201 ;; a separate table that only translates the coding
202 ;; system's safe-chars.
203 (cons 'translation-table-for-input ,ucs-mule-to-mule-unicode)))
208 (push (list ',name 204 (push (list ',name
209 nil ; charset list 205 nil ; charset list
210 ',decoder 206 ',decoder
@@ -2059,145 +2055,138 @@ corresponding args of `make-coding-system'. If MNEMONIC isn't given,
2059 ?\■ 2055 ?\■
2060 ?\ ]) 2056 ?\ ])
2061 2057
2062;; ;; This should be the same as cyrillic-alternativnyj, 2058(cp-make-coding-system
2063;; ;; (<URL:http://czyborra.com/charsets/cyrillic.html>), but code point 2059 cp866
2064;; ;; 255 in the cyrillic.el alternativnyj table is `№', i.e. point 240 2060 [?\А
2065;; ;; in 8859-5, not no-break space as below; `№' should be at point 252. 2061 ?\Б
2066;; (cp-make-coding-system 2062 ?\В
2067;; cp866 2063 ?\Г
2068;; [?\А 2064 ?\Д
2069;; ?\Б 2065 ?\Е
2070;; ?\В 2066 ?\Ж
2071;; ?\Г 2067 ?\З
2072;; ?\Д 2068 ?\И
2073;; ?\Е 2069 ?\Й
2074;; ?\Ж 2070 ?\К
2075;; ?\З 2071 ?\Л
2076;; ?\И 2072 ?\М
2077;; ?\Й 2073 ?\Н
2078;; ?\К 2074 ?\О
2079;; ?\Л 2075 ?\П
2080;; ?\М 2076 ?\Р
2081;; ?\Н 2077 ?\С
2082;; ?\О 2078 ?\Т
2083;; ?\П 2079 ?\У
2084;; ?\Р 2080 ?\Ф
2085;; ?\С 2081 ?\Х
2086;; ?\Т 2082 ?\Ц
2087;; ?\У 2083 ?\Ч
2088;; ?\Ф 2084 ?\Ш
2089;; ?\Х 2085 ?\Щ
2090;; ?\Ц 2086 ?\Ъ
2091;; ?\Ч 2087 ?\Ы
2092;; ?\Ш 2088 ?\Ь
2093;; ?\Щ 2089 ?\Э
2094;; ?\Ъ 2090 ?\Ю
2095;; ?\Ы 2091 ?\Я
2096;; ?\Ь 2092 ?\а
2097;; ?\Э 2093 ?\б
2098;; ?\Ю 2094 ?\в
2099;; ?\Я 2095 ?\г
2100;; ?\а 2096 ?\д
2101;; ?\б 2097 ?\е
2102;; ?\в 2098 ?\ж
2103;; ?\г 2099 ?\з
2104;; ?\д 2100 ?\и
2105;; ?\е 2101 ?\й
2106;; ?\ж 2102 ?\к
2107;; ?\з 2103 ?\л
2108;; ?\и 2104 ?\м
2109;; ?\й 2105 ?\н
2110;; ?\к 2106 ?\о
2111;; ?\л 2107 ?\п
2112;; ?\м 2108 ?\░
2113;; ?\н 2109 ?\▒
2114;; ?\о 2110 ?\▓
2115;; ?\п 2111 ?\│
2116;; ?\░ 2112 ?\┤
2117;; ?\▒ 2113 ?\╡
2118;; ?\▓ 2114 ?\╢
2119;; ?\│ 2115 ?\╖
2120;; ?\┤ 2116 ?\╕
2121;; ?\╡ 2117 ?\╣
2122;; ?\╢ 2118 ?\║
2123;; ?\╖ 2119 ?\╗
2124;; ?\╕ 2120 ?\╝
2125;; ?\╣ 2121 ?\╜
2126;; ?\║ 2122 ?\╛
2127;; ?\╗ 2123 ?\┐
2128;; ?\╝ 2124 ?\└
2129;; ?\╜ 2125 ?\┴
2130;; ?\╛ 2126 ?\┬
2131;; ?\┐ 2127 ?\├
2132;; ?\└ 2128 ?\─
2133;; ?\┴ 2129 ?\┼
2134;; ?\┬ 2130 ?\╞
2135;; ?\├ 2131 ?\╟
2136;; ?\─ 2132 ?\╚
2137;; ?\┼ 2133 ?\╔
2138;; ?\╞ 2134 ?\╩
2139;; ?\╟ 2135 ?\╦
2140;; ?\╚ 2136 ?\╠
2141;; ?\╔ 2137 ?\═
2142;; ?\╩ 2138 ?\╬
2143;; ?\╦ 2139 ?\╧
2144;; ?\╠ 2140 ?\╨
2145;; ?\═ 2141 ?\╤
2146;; ?\╬ 2142 ?\╥
2147;; ?\╧ 2143 ?\╙
2148;; ?\╨ 2144 ?\╘
2149;; ?\╤ 2145 ?\╒
2150;; ?\╥ 2146 ?\╓
2151;; ?\╙ 2147 ?\╫
2152;; ?\╘ 2148 ?\╪
2153;; ?\╒ 2149 ?\┘
2154;; ?\╓ 2150 ?\┌
2155;; ?\╫ 2151 ?\█
2156;; ?\╪ 2152 ?\▄
2157;; ?\┘ 2153 ?\▌
2158;; ?\┌ 2154 ?\▐
2159;; ?\█ 2155 ?\▀
2160;; ?\▄ 2156 ?\р
2161;; ?\▌ 2157 ?\с
2162;; ?\▐ 2158 ?\т
2163;; ?\▀ 2159 ?\у
2164;; ?\р 2160 ?\ф
2165;; ?\с 2161 ?\х
2166;; ?\т 2162 ?\ц
2167;; ?\у 2163 ?\ч
2168;; ?\ф 2164 ?\ш
2169;; ?\х 2165 ?\щ
2170;; ?\ц 2166 ?\ъ
2171;; ?\ч 2167 ?\ы
2172;; ?\ш 2168 ?\ь
2173;; ?\щ 2169 ?\э
2174;; ?\ъ 2170 ?\ю
2175;; ?\ы 2171 ?\я
2176;; ?\ь 2172 ?\Ё
2177;; ?\э 2173 ?\ё
2178;; ?\ю 2174 ?\Є
2179;; ?\я 2175 ?\є
2180;; ?\Ё 2176 ?\Ї
2181;; ?\ё 2177 ?\ї
2182;; ?\Є 2178 ?\Ў
2183;; ?\є 2179 ?\ў
2184;; ?\Ї 2180 ?\°
2185;; ?\ї 2181 ?\∙
2186;; ?\Ў 2182 ?\·
2187;; ?\ў 2183 ?\√
2188;; ?\° 2184 ?\№
2189;; ?\∙ 2185 ?\¤
2190;; ?\· 2186 ?\■
2191;; ?\√ 2187 ?\ ]
2192;; ?\№ 2188 "CP866 (Cyrillic)."
2193;; ?\¤ 2189 ?A)
2194;; ?\■
2195;; ?\ ]
2196;; "CP866 (Cyrillic Alternativnyj) encoding using Unicode."
2197;; ?A)
2198;; (define-coding-system-alias 'alternativnyj 'cp866)
2199;; (cp-fix-safe-chars 'cyrillic-alternativnyj)
2200;; (define-coding-system-alias 'cyrillic-alternativnyj 'cp866)
2201 2190
2202(cp-make-coding-system 2191(cp-make-coding-system
2203 cp869 2192 cp869
@@ -3779,280 +3768,6 @@ corresponding args of `make-coding-system'. If MNEMONIC isn't given,
3779 "NeXTstep encoding." ?N) 3768 "NeXTstep encoding." ?N)
3780 3769
3781(cp-make-coding-system 3770(cp-make-coding-system
3782 koi8-u
3783 [?\─
3784 ?\│
3785 ?\┌
3786 ?\┐
3787 ?\└
3788 ?\┘
3789 ?\├
3790 ?\┤
3791 ?\┬
3792 ?\┴
3793 ?\┼
3794 ?\▀
3795 ?\▄
3796 ?\█
3797 ?\▌
3798 ?\▐
3799 ?\░
3800 ?\▒
3801 ?\▓
3802 ?\⌠
3803 ?\■
3804 ?\∙
3805 ?\√
3806 ?\≈
3807 ?\≤
3808 ?\≥
3809 ?\ 
3810 ?\⌡
3811 ?\°
3812 ?\²
3813 ?\·
3814 ?\÷
3815 ?\═
3816 ?\║
3817 ?\╒
3818 ?\ё
3819 ?\є
3820 ?\╔
3821 ?\і
3822 ?\ї
3823 ?\╗
3824 ?\╘
3825 ?\╙
3826 ?\╚
3827 ?\╛
3828 ?\ґ
3829 ?\╝
3830 ?\╞
3831 ?\╟
3832 ?\╠
3833 ?\╡
3834 ?\Ё
3835 ?\Є
3836 ?\╣
3837 ?\І
3838 ?\Ї
3839 ?\╦
3840 ?\╧
3841 ?\╨
3842 ?\╩
3843 ?\╪
3844 ?\Ґ
3845 ?\╬
3846 ?\©
3847 ?\ю
3848 ?\а
3849 ?\б
3850 ?\ц
3851 ?\д
3852 ?\е
3853 ?\ф
3854 ?\г
3855 ?\х
3856 ?\и
3857 ?\й
3858 ?\к
3859 ?\л
3860 ?\м
3861 ?\н
3862 ?\о
3863 ?\п
3864 ?\я
3865 ?\р
3866 ?\с
3867 ?\т
3868 ?\у
3869 ?\ж
3870 ?\в
3871 ?\ь
3872 ?\ы
3873 ?\з
3874 ?\ш
3875 ?\э
3876 ?\щ
3877 ?\ч
3878 ?\ъ
3879 ?\Ю
3880 ?\А
3881 ?\Б
3882 ?\Ц
3883 ?\Д
3884 ?\Е
3885 ?\Ф
3886 ?\Г
3887 ?\Х
3888 ?\И
3889 ?\Й
3890 ?\К
3891 ?\Л
3892 ?\М
3893 ?\Н
3894 ?\О
3895 ?\П
3896 ?\Я
3897 ?\Р
3898 ?\С
3899 ?\Т
3900 ?\У
3901 ?\Ж
3902 ?\В
3903 ?\Ь
3904 ?\Ы
3905 ?\З
3906 ?\Ш
3907 ?\Э
3908 ?\Щ
3909 ?\Ч
3910 ?\Ъ]
3911 "Cyrillic KOI8-U (Ukranian) encoding."
3912 ?U)
3913
3914;; ;; Unicode-based, not cyrillic-iso8859-5 based (and thus incomplete)
3915;; ;; like the standard version.
3916;; (cp-make-coding-system
3917;; ;; The base system uses cyrillic-koi8 as the canonical name, but
3918;; ;; that's not a MIME name.
3919;; koi8-r
3920;; [?\─
3921;; ?\│
3922;; ?\┌
3923;; ?\┐
3924;; ?\└
3925;; ?\┘
3926;; ?\├
3927;; ?\┤
3928;; ?\┬
3929;; ?\┴
3930;; ?\┼
3931;; ?\▀
3932;; ?\▄
3933;; ?\█
3934;; ?\▌
3935;; ?\▐
3936;; ?\░
3937;; ?\▒
3938;; ?\▓
3939;; ?\⌠
3940;; ?\■
3941;; ?\•
3942;; ?\√
3943;; ?\≈
3944;; ?\≤
3945;; ?\≥
3946;; ?\ 
3947;; ?\⌡
3948;; ?\°
3949;; ?\²
3950;; ?\·
3951;; ?\÷
3952;; ?\═
3953;; ?\║
3954;; ?\╒
3955;; ?\ё
3956;; ?\╓
3957;; ?\╔
3958;; ?\╕
3959;; ?\╖
3960;; ?\╗
3961;; ?\╘
3962;; ?\╙
3963;; ?\╚
3964;; ?\╛
3965;; ?\╜
3966;; ?\╝
3967;; ?\╞
3968;; ?\╟
3969;; ?\╠
3970;; ?\╡
3971;; ?\Ё
3972;; ?\╢
3973;; ?\╣
3974;; ?\╤
3975;; ?\╥
3976;; ?\╦
3977;; ?\╧
3978;; ?\╨
3979;; ?\╩
3980;; ?\╪
3981;; ?\╫
3982;; ?\╬
3983;; ?\©
3984;; ?\ю
3985;; ?\а
3986;; ?\б
3987;; ?\ц
3988;; ?\д
3989;; ?\е
3990;; ?\ф
3991;; ?\г
3992;; ?\х
3993;; ?\и
3994;; ?\й
3995;; ?\к
3996;; ?\л
3997;; ?\м
3998;; ?\н
3999;; ?\о
4000;; ?\п
4001;; ?\я
4002;; ?\р
4003;; ?\с
4004;; ?\т
4005;; ?\у
4006;; ?\ж
4007;; ?\в
4008;; ?\ь
4009;; ?\ы
4010;; ?\з
4011;; ?\ш
4012;; ?\э
4013;; ?\щ
4014;; ?\ч
4015;; ?\ъ
4016;; ?\Ю
4017;; ?\А
4018;; ?\Б
4019;; ?\Ц
4020;; ?\Д
4021;; ?\Е
4022;; ?\Ф
4023;; ?\Г
4024;; ?\Х
4025;; ?\И
4026;; ?\Й
4027;; ?\К
4028;; ?\Л
4029;; ?\М
4030;; ?\Н
4031;; ?\О
4032;; ?\П
4033;; ?\Я
4034;; ?\Р
4035;; ?\С
4036;; ?\Т
4037;; ?\У
4038;; ?\Ж
4039;; ?\В
4040;; ?\Ь
4041;; ?\Ы
4042;; ?\З
4043;; ?\Ш
4044;; ?\Э
4045;; ?\Щ
4046;; ?\Ч
4047;; ?\Ъ]
4048;; "Unicode-based KOI8 encoding for Cyrillic (MIME: KOI8-R)"
4049;; ?R)
4050;; (cp-fix-safe-chars 'cyrillic-koi8)
4051;; (define-coding-system-alias 'cyrillic-koi8 'koi8-r)
4052;; (define-coding-system-alias 'koi8 'koi8-r)
4053;; (define-coding-system-alias 'cp878 'koi8-r)
4054
4055(cp-make-coding-system
4056 koi8-t ; used by glibc for tg_TJ 3771 koi8-t ; used by glibc for tg_TJ
4057 [?\қ 3772 [?\қ
4058 ?\ғ 3773 ?\ғ
@@ -4187,7 +3902,7 @@ corresponding args of `make-coding-system'. If MNEMONIC isn't given,
4187 3902
4188;; Online final ISO draft: 3903;; Online final ISO draft:
4189 3904
4190;; http://www.egt.ie/standards/iso8859/fdis8859-16-en.pdf 3905;; http://www.evertype.com/standards/iso8859/fdis8859-16-en.pdf
4191 3906
4192;; Equivalent National Standard: 3907;; Equivalent National Standard:
4193;; Romanian Standard SR 14111:1998, Romanian Standards Institution 3908;; Romanian Standard SR 14111:1998, Romanian Standards Institution
@@ -4991,7 +4706,8 @@ corresponding args of `make-coding-system'. If MNEMONIC isn't given,
4991 ?\¤ 4706 ?\¤
4992 ?\■ 4707 ?\■
4993 ?\ ]) 4708 ?\ ])
4994;; Original ;name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua> 4709(define-coding-system-alias 'ruscii 'cp1125)
4710;; Original name for cp1125, says Serhii Hlodin <hlodin@lutsk.bank.gov.ua>
4995(define-coding-system-alias 'cp866u 'cp1125) 4711(define-coding-system-alias 'cp866u 'cp1125)
4996 4712
4997(dotimes (i 8) 4713(dotimes (i 8)