aboutsummaryrefslogtreecommitdiffstats
path: root/admin/charsets
diff options
context:
space:
mode:
authorKenichi Handa2003-09-08 11:56:09 +0000
committerKenichi Handa2003-09-08 11:56:09 +0000
commit463f5630a5e7cbe7f042bc1175d1fa1c4e98860f (patch)
tree3287d0c628fea2249abf4635b3a4f45bedd6f8c4 /admin/charsets
parent4256310de631bd57c78b88b5131caa073315b3d7 (diff)
downloademacs-463f5630a5e7cbe7f042bc1175d1fa1c4e98860f.tar.gz
emacs-463f5630a5e7cbe7f042bc1175d1fa1c4e98860f.zip
New directory
Diffstat (limited to 'admin/charsets')
-rw-r--r--admin/charsets/Makefile287
-rw-r--r--admin/charsets/big5.awk53
-rw-r--r--admin/charsets/compact.awk123
-rw-r--r--admin/charsets/gb180302.awk80
-rw-r--r--admin/charsets/gb180304.awk102
-rw-r--r--admin/charsets/kuten.awk5
-rwxr-xr-xadmin/charsets/mapconv125
-rw-r--r--admin/charsets/mule-charsets.el58
8 files changed, 833 insertions, 0 deletions
diff --git a/admin/charsets/Makefile b/admin/charsets/Makefile
new file mode 100644
index 00000000000..0628bfeba74
--- /dev/null
+++ b/admin/charsets/Makefile
@@ -0,0 +1,287 @@
1# Makefile -- Makefile to generate charset maps in etc/charsets.
2# Copyright (C) 2003
3# National Institute of Advanced Industrial Science and Technology (AIST)
4# Registration Number H13PRO009
5#
6# This file is part of GNU Emacs.
7
8# GNU Emacs is free software; you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation; either version 2, or (at your option)
11# any later version.
12
13# GNU Emacs is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17
18# You should have received a copy of the GNU General Public License
19# along with GNU Emacs; see the file COPYING. If not, write to the
20# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21# Boston, MA 02111-1307, USA.
22
23# Commentary
24
25# At first, set these environment variables:
26# GLIBC_CHARMAPS
27# Directory of glibc-VERSION/localedate/charmaps.
28# VERSION must be 2.3 or the later.
29# MISC_CHARMAPS
30# Direcory containing these charmap files:
31# o bulgarian-mik.txt.gz
32# provided at <http://czyborra.com/charsets/>
33# o PTCP154
34# provided at <http://www.iana.org/assignments/charset-reg/>
35# o stdenc.txt and symbol.txt
36# provided at <http://www.unicode.org/Public/MAPPINGS/>
37# o Uni2JIS
38# provided at <http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/CJK.html>
39# o 720.htm and 858.htm
40# provided at <http://www.microsoft.com/globaldev/reference/oem/>
41# OLDEMACS
42# emacs of version 21.3.50 or later
43#
44# Then, do this:
45# % make install
46
47CHARSETS = ${ISO8859} ${IBM} ${CODEPAGE} ${CJK} ${MISC} ${MULE}
48
49# Note: We can not prepend "ISO-" to these map files because of file
50# name limits on DOS.
51ISO8859 = \
52 8859-2.map 8859-3.map 8859-4.map 8859-5.map 8859-6.map 8859-7.map \
53 8859-8.map 8859-9.map 8859-10.map 8859-11.map 8859-13.map 8859-14.map \
54 8859-15.map 8859-16.map
55
56IBM = \
57 IBM037.map IBM038.map \
58 IBM256.map IBM273.map IBM274.map IBM275.map IBM277.map IBM278.map \
59 IBM280.map IBM281.map IBM284.map IBM285.map IBM290.map IBM297.map \
60 IBM420.map IBM423.map IBM424.map IBM437.map IBM500.map IBM850.map \
61 IBM851.map IBM852.map IBM855.map IBM856.map IBM857.map IBM860.map \
62 IBM861.map IBM862.map IBM863.map IBM864.map IBM865.map IBM866.map \
63 IBM868.map IBM869.map IBM870.map IBM871.map IBM874.map IBM875.map \
64 IBM880.map IBM891.map IBM903.map IBM904.map IBM905.map IBM918.map \
65 IBM1004.map IBM1026.map IBM1047.map
66
67CODEPAGE = \
68 CP737.map CP775.map CP1125.map\
69 CP1250.map CP1251.map CP1252.map CP1253.map CP1254.map \
70 CP1255.map CP1256.map CP1257.map CP1258.map \
71 CP10007.map \
72 CP720.map CP858.map
73
74CJK = GB2312.map GBK.map GB180302.map GB180304.map \
75 BIG5.map BIG5-HKSCS.map\
76 CNS-1.map CNS-2.map CNS-3.map CNS-4.map CNS-5.map CNS-6.map CNS-7.map \
77 CNS-F.map \
78 JISX0201.map JISX0208.map JISX0212.map JISX2131.map JISX2132.map \
79 JISC6226.map \
80 KSC5601.map KSC5636.map JOHAB.map
81
82MISC = KOI-8.map KOI8-R.map KOI8-U.map KOI8-T.map ALTERNATIVNYJ.map \
83 MIK.map PTCP154.map \
84 TIS-620.map VISCII.map VSCII.map VSCII-2.map\
85 KA-PS.map KA-ACADEMY.map \
86 HP-ROMAN8.map NEXTSTEP.map MACINTOSH.map EBCDICUK.map EBCDICUS.map \
87 stdenc.map symbol.map \
88 CP949-2BYTE.map \
89 BIG5-1.map BIG5-2.map
90
91# Emacs-mule charsets.
92MULE = MULE-ethiopic.map MULE-ipa.map MULE-is13194.map \
93 MULE-sisheng.map MULE-tibetan.map \
94 MULE-lviscii.map MULE-uviscii.map
95
96charsets: ${CHARSETS}
97
98AWK = gawk
99
100# Rules for each charset
101
102VSCII.map: ${GLIBC_CHARMAPS}/TCVN5712-1 mapconv compact.awk
103 # Generating $@...
104 @mapconv $< '/^<.*[ ]\/x[0-9a-f].[ ]/' GLIBC-1 compact.awk > $@
105
106VSCII-2.map: ${GLIBC_CHARMAPS}/TCVN5712-1 mapconv compact.awk
107 # Generating $@...
108 @mapconv $< '/^<.*[ ]\/x[2-7a-f].[ ]/' GLIBC-1 compact.awk \
109 | sed 's/0x20-0x7F.*/0x00-0x7F 0x0000/' > $@
110
111ALTERNATIVNYJ.map: IBM866.map
112 # Generating $@...
113 @echo "# Modified from ibm866 according to the chart at" > $@
114 @echo "# http://www.cyrillic.com/ref/cyrillic/koi-8alt.html," >> $@
115 @echo "# with guesses for the Unicodes of the glyphs." >> $@
116 @sed -e '/0xF2/ s/ .*/ 0x2019/' \
117 -e '/0xF3/ s/ .*/ 0x2018/' \
118 -e '/0xF4/ s/ .*/ 0x0301/' \
119 -e '/0xF5/ s/ .*/ 0x0300/' \
120 -e '/0xF6/ s/ .*/ 0x203A/' \
121 -e '/0xF7/ s/ .*/ 0x2039/' \
122 -e '/0xF8/ s/ .*/ 0x2191/' \
123 -e '/0xF9/ s/ .*/ 0x2193/' \
124 -e '/0xFA/ s/ .*/ 0x00B1/' \
125 -e '/0xFB/ s/ .*/ 0x00F7/' < $< >> $@
126
127MIK.map: ${MISC_CHARMAPS}/bulgarian-mik.txt.gz mapconv compact.awk
128 # Generating $@...
129 @mapconv $< '1,$$' CZYBORRA compact.awk > $@
130
131PTCP154.map: ${MISC_CHARMAPS}/PTCP154 mapconv compact.awk
132 # Generating $@...
133 @mapconv $< '/^0x/' IANA compact.awk > $@
134
135stdenc.map: ${MISC_CHARMAPS}/stdenc.txt mapconv compact.awk
136 # Generating $@...
137 @mapconv $< '/^[0-9A-Fa-f]/' UNICODE compact.awk > $@
138
139symbol.map: ${MISC_CHARMAPS}/symbol.txt mapconv compact.awk
140 # Generating $@...
141 @mapconv $< '/^[0-9A-Fa-f]/' UNICODE compact.awk > $@
142
143CP720.map: ${MISC_CHARMAPS}/720.htm mapconv compact.awk
144 # Generating $@...
145 @mapconv $< '/^[0-9A-F]/' MICROSOFT compact.awk > $@
146
147CP858.map: ${MISC_CHARMAPS}/858.htm mapconv compact.awk
148 # Generating $@...
149 @mapconv $< '/^[0-9A-F]/' MICROSOFT compact.awk > $@
150
151CP949-2BYTE.map: ${GLIBC_CHARMAPS}/CP949 mapconv compact.awk
152 # Generating $@...
153 @mapconv $< '/^<.*[ ]\/x[89a-f]/' GLIBC-2 compact.awk > $@
154
155GB2312.map: ${GLIBC_CHARMAPS}/GB2312 mapconv compact.awk
156 # Generating $@...
157 @mapconv $< '/^<.*[ ]\/x[a-f]/' GLIBC-2-7 compact.awk > $@
158
159GBK.map: ${GLIBC_CHARMAPS}/GBK mapconv compact.awk
160 # Generating $@...
161 @mapconv $< '/^<.*[ ]\/x[89a-f]/' GLIBC-2 compact.awk > $@
162
163GB180302.map: ${GLIBC_CHARMAPS}/GB18030 mapconv gb180302.awk
164 # Generating $@...
165 @mapconv $< '/^<.*[ ]\/x..\/x..[ ]/' GLIBC-2 gb180302.awk > $@
166
167GB180304.map: GB180302.map gb180304.awk
168 # Generating $@...
169 @$(AWK) -f gb180304.awk < $< > $@
170
171JISX0201.map: ${GLIBC_CHARMAPS}/JIS_X0201 mapconv compact.awk
172 # Generating $@...
173 @mapconv $< '/^<.*[ ]\/x[0-9]/' GLIBC-1 compact.awk > $@
174 @echo "# Generated by hand" >> $@
175 @echo "0xA1-0xDF 0xFF61" >> $@
176
177JISX0208.map: ${GLIBC_CHARMAPS}/EUC-JP mapconv
178 # Generating $@...
179 @mapconv $< '/^<.*[ ]\/x[a-f]/' GLIBC-2-7 > $@
180
181JISX0212.map: ${GLIBC_CHARMAPS}/EUC-JP mapconv compact.awk
182 # Generating $@...
183 @mapconv $< '/^<.*[ ]\/x8f/ s,/x8f,,' GLIBC-2-7 compact.awk > $@
184
185JISX2131.map: ${GLIBC_CHARMAPS}/EUC-JISX0213 mapconv
186 # Generating $@...
187 @mapconv $< '/^<.*[ ]\/x[a-f]/' GLIBC-2-7 > $@
188
189JISX2132.map: ${GLIBC_CHARMAPS}/EUC-JISX0213 mapconv
190 # Generating $@...
191 @mapconv $< '/^<.*[ ]\/x8f/ s,/x8f,,' GLIBC-2-7 > $@
192
193JISC6226.map : ${MISC_CHARMAPS}/Uni2JIS mapconv kuten.awk
194 # Generating $@...
195 @mapconv $< '/^[^#].*0-/' YASUOKA kuten.awk > $@
196
197KSC5601.map: ${GLIBC_CHARMAPS}/EUC-KR mapconv compact.awk
198 # Generating $@...
199 @mapconv $< '/^<.*[ ]\/x[a-f]/' GLIBC-2-7 compact.awk > $@
200
201BIG5.map: ${GLIBC_CHARMAPS}/BIG5 mapconv compact.awk
202 # Generating $@...
203 @mapconv $< '/^<.*[ ]\/x[a-f]/' GLIBC-2 > $@
204
205BIG5-1.map: BIG5.map mapconv big5.awk
206 # Generating $@...
207 @echo "Generated from $<" > $@
208 @sed -n -e '/0xa140/,/0xc8fe/p' < $< | gawk -f big5.awk >> $@
209
210BIG5-2.map: BIG5.map mapconv big5.awk
211 # Generating $@...
212 @echo "Generated from $<" > $@
213 @sed -n -e '/0xc940/,$$ p' < $< | gawk -f big5.awk >> $@
214
215BIG5-HKSCS.map: ${GLIBC_CHARMAPS}/BIG5-HKSCS mapconv compact.awk
216 # Generating $@...
217 @mapconv $< '/^<.*[ ]\/x[89a-f].\//' GLIBC-2 compact.awk > $@
218
219JOHAB.map: ${GLIBC_CHARMAPS}/JOHAB mapconv compact.awk
220 # Generating $@...
221 @mapconv $< '/^<.*[ ]\/x[89a-f]/' GLIBC-2 compact.awk > $@
222
223CNS-1.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk
224 # Generating $@...
225 @mapconv $< '/^<.*[ ]\/x[a-f]/' GLIBC-2-7 compact.awk > $@
226
227CNS-2.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk
228 # Generating $@...
229 @mapconv $< '/^<.*\/x8e\/xa2/s,/x8e/xa2,,' GLIBC-2-7 compact.awk > $@
230
231CNS-3.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk
232 # Generating $@...
233 @mapconv $< '/^<.*\/x8e\/xa3/ s,/x8e/xa3,,' GLIBC-2-7 compact.awk > $@
234
235CNS-4.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk
236 # Generating $@...
237 @mapconv $< '/^<.*\/x8e\/xa4/ s,/x8e/xa4,,' GLIBC-2-7 compact.awk > $@
238
239CNS-5.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk
240 # Generating $@...
241 @mapconv $< '/^<.*\/x8e\/xa5/ s,/x8e/xa5,,' GLIBC-2-7 compact.awk > $@
242
243CNS-6.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk
244 # Generating $@...
245 @mapconv $< '/^<.*\/x8e\/xa6/ s,/x8e/xa6,,' GLIBC-2-7 compact.awk > $@
246
247CNS-7.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk
248 # Generating $@...
249 @mapconv $< '/^<.*\/x8e\/xa7/ s,/x8e/xa7,,' GLIBC-2-7 compact.awk > $@
250
251CNS-F.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk
252 # Generating $@...
253 @mapconv $< '/^<.*\/x8e\/xaf/ s,/x8e/xaf,,' GLIBC-2-7 compact.awk > $@
254
255# General target to produce map files for mule charsets.
256MULE-%.map: mule-charsets.el
257 # Generating $@...
258 @${OLDEMACS} -batch -l ./mule-charsets.el $@
259
260# General target to produce map files for ISO-8859, GEORGIAN, and
261# EBCDIC charsets. We can not use the original file name because of
262# file name limit on DOS. "KA" is ISO 639 language code for Georgian.
263
2648859-%.map: ${GLIBC_CHARMAPS}/ISO-8859-% mapconv compact.awk
265 # Generating $@...
266 @mapconv $< '/^<.*[ ]\/x/' GLIBC-1 compact.awk > $@
267
268KA-%.map: ${GLIBC_CHARMAPS}/GEORGIAN-% mapconv compact.awk
269 # Generating $@...
270 @mapconv $< '/^<.*[ ]\/x/' GLIBC-1 compact.awk > $@
271
272EBCDIC%.map: ${GLIBC_CHARMAPS}/EBCDIC-% mapconv compact.awk
273 # Generating $@...
274 @mapconv $< '/^<.*[ ]\/x/' GLIBC-1 compact.awk > $@
275
276# General target to produce map files for single-byte charsets.
277
278%.map: ${GLIBC_CHARMAPS}/% mapconv compact.awk
279 # Generating $@...
280 @mapconv $< '/^<.*[ ]\/x/' GLIBC-1 compact.awk > $@
281
282install: ${CHARSETS}
283 cp ${CHARSETS} ../../etc/charsets
284
285# Clear files that are automatically generated.
286clean:
287 rm -f ${CHARSETS}
diff --git a/admin/charsets/big5.awk b/admin/charsets/big5.awk
new file mode 100644
index 00000000000..8d5fa6429b3
--- /dev/null
+++ b/admin/charsets/big5.awk
@@ -0,0 +1,53 @@
1BEGIN {
2 tohex["A"] = 10;
3 tohex["B"] = 11;
4 tohex["C"] = 12;
5 tohex["D"] = 13;
6 tohex["E"] = 14;
7 tohex["F"] = 15;
8 tohex["a"] = 10;
9 tohex["b"] = 11;
10 tohex["c"] = 12;
11 tohex["d"] = 13;
12 tohex["e"] = 14;
13 tohex["f"] = 15;
14}
15
16function decode_hex(str) {
17 n = 0;
18 len = length(str);
19 for (i = 1; i <= len; i++)
20 {
21 c = substr (str, i, 1);
22 if (c >= "0" && c <= "9")
23 n = n * 16 + (c - "0");
24 else
25 n = n * 16 + tohex[c];
26 }
27 return n;
28}
29
30function decode_big5(big5) {
31 b0 = int(big5 / 256);
32 b1 = big5 % 256;
33# (0xFF - 0xA1 + 0x7F - 0x40) = 157
34# (0xA1 - (0x7F - 0x40)) = 98
35# (0xC9 - 0xA1) * (0xFF - 0xA1 + 0x7F - 0x40) = 6280
36 if (b1 < 127)
37 idx = (b0 - 161) * 157 + (b1 - 64);
38 else
39 idx = (b0 - 161) * 157 + (b1 - 98);
40 if (b0 >= 201)
41 idx -= 6280;
42 b0 = int(idx / 94) + 33;
43 b1 = (idx % 94) + 33;
44 return (b0 * 256 + b1)
45}
46
47{
48 big5 = decode_hex($1);
49 code = decode_big5(big5);
50 printf "0x%04X %s\n", code, $2;
51}
52
53
diff --git a/admin/charsets/compact.awk b/admin/charsets/compact.awk
new file mode 100644
index 00000000000..281e51ebc3b
--- /dev/null
+++ b/admin/charsets/compact.awk
@@ -0,0 +1,123 @@
1# compact.awk -- Make charset map compact.
2# Copyright (C) 2003
3# National Institute of Advanced Industrial Science and Technology (AIST)
4# Registration Number H13PRO009
5#
6# This file is part of GNU Emacs.
7#
8# GNU Emacs is free software; you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation; either version 2, or (at your option)
11# any later version.
12#
13# GNU Emacs is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with GNU Emacs; see the file COPYING. If not, write to the
20# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21# Boston, MA 02111-1307, USA.
22
23# Comment:
24# Make a charset map compact by changing this kind of line sequence:
25# 0x00 0x0000
26# 0x01 0x0001
27# ...
28# 0x7F 0x007F
29# to one line of this format:
30# 0x00-0x7F 0x0000
31
32BEGIN {
33 tohex["0"] = 1;
34 tohex["1"] = 2;
35 tohex["2"] = 3;
36 tohex["3"] = 4;
37 tohex["4"] = 5;
38 tohex["5"] = 6;
39 tohex["6"] = 7;
40 tohex["7"] = 8;
41 tohex["8"] = 9;
42 tohex["9"] = 10;
43 tohex["A"] = 11;
44 tohex["B"] = 12;
45 tohex["C"] = 13;
46 tohex["D"] = 14;
47 tohex["E"] = 15;
48 tohex["F"] = 16;
49 tohex["a"] = 11;
50 tohex["b"] = 12;
51 tohex["c"] = 13;
52 tohex["d"] = 14;
53 tohex["e"] = 15;
54 tohex["f"] = 16;
55 from_code = 0;
56 to_code = -1;
57 to_unicode = 0;
58 from_unicode = 0;
59}
60
61function decode_hex(str, idx) {
62 n = 0;
63 len = length(str);
64 for (i = idx; i <= len; i++)
65 {
66 c = tohex[substr (str, i, 1)];
67 if (c == 0)
68 break;
69 n = n * 16 + c - 1;
70 }
71 return n;
72}
73
74/^\#/ {
75 print;
76 next;
77}
78
79{
80 code = decode_hex($1, 3);
81 unicode = decode_hex($2, 3);
82 if ((code == to_code + 1) && (unicode == to_unicode + 1))
83 {
84 to_code++;
85 to_unicode++;
86 }
87 else
88 {
89 if (to_code < 256)
90 {
91 if (from_code == to_code)
92 printf "0x%02X 0x%04X\n", from_code, from_unicode;
93 else if (from_code < to_code)
94 printf "0x%02X-0x%02X 0x%04X\n", from_code, to_code, from_unicode;
95 }
96 else
97 {
98 if (from_code == to_code)
99 printf "0x%04X 0x%04X\n", from_code, from_unicode;
100 else if (from_code < to_code)
101 printf "0x%04X-0x%04X 0x%04X\n", from_code, to_code, from_unicode;
102 }
103 from_code = to_code = code;
104 from_unicode = to_unicode = unicode;
105 }
106}
107
108END {
109 if (to_code < 256)
110 {
111 if (from_code == to_code)
112 printf "0x%02X 0x%04X\n", from_code, from_unicode;
113 else
114 printf "0x%02X-0x%02X 0x%04X\n", from_code, to_code, from_unicode;
115 }
116 else
117 {
118 if (from_code == to_code)
119 printf "0x%04X 0x%04X\n", from_code, from_unicode;
120 else
121 printf "0x%04X-0x%04X 0x%04X\n", from_code, to_code, from_unicode;
122 }
123}
diff --git a/admin/charsets/gb180302.awk b/admin/charsets/gb180302.awk
new file mode 100644
index 00000000000..94d0a9e410a
--- /dev/null
+++ b/admin/charsets/gb180302.awk
@@ -0,0 +1,80 @@
1BEGIN {
2 tohex["A"] = 10;
3 tohex["B"] = 11;
4 tohex["C"] = 12;
5 tohex["D"] = 13;
6 tohex["E"] = 14;
7 tohex["F"] = 15;
8 tohex["a"] = 10;
9 tohex["b"] = 11;
10 tohex["c"] = 12;
11 tohex["d"] = 13;
12 tohex["e"] = 14;
13 tohex["f"] = 15;
14 from_gb = 0;
15 to_gb = -1;
16 to_unicode = 0;
17 from_unicode = 0;
18}
19
20function decode_hex(str) {
21 n = 0;
22 len = length(str);
23 for (i = 1; i <= len; i++)
24 {
25 c = substr (str, i, 1);
26 if (c >= "0" && c <= "9")
27 n = n * 16 + (c - "0");
28 else
29 n = n * 16 + tohex[c];
30 }
31 return n;
32}
33
34function gb_to_index(gb) {
35 b0 = int(gb / 256);
36 b1 = gb % 256;
37 idx = (((b0 - 129)) * 191 + b1 - 64);
38# if (b1 >= 128)
39# idx--;
40 return idx
41}
42
43function index_to_gb(idx) {
44 b0 = int(idx / 191) + 129;
45 b1 = (idx % 191) + 64;
46# if (b1 >= 127)
47# b1++;
48 return (b0 * 256 + b1);
49}
50
51/^\#/ {
52 print;
53 next;
54}
55
56{
57 gb = gb_to_index(decode_hex(substr($1, 3, 4)));
58 unicode = decode_hex(substr($2, 3, 4));
59 if ((gb == to_gb + 1) && (unicode == to_unicode + 1))
60 {
61 to_gb++;
62 to_unicode++;
63 }
64 else
65 {
66 if (from_gb == to_gb)
67 printf "0x%04X 0x%04X\n", index_to_gb(from_gb), from_unicode;
68 else if (from_gb < to_gb)
69 printf "0x%04X-0x%04X 0x%04X\n",
70 index_to_gb(from_gb), index_to_gb(to_gb), from_unicode;
71 from_gb = to_gb = gb;
72 from_unicode = to_unicode = unicode;
73 }
74}
75
76END {
77 if (from_gb <= to_gb)
78 printf "0x%04X-0x%04X 0x%04X\n",
79 index_to_gb(from_gb), index_to_gb(to_gb), from_unicode;
80}
diff --git a/admin/charsets/gb180304.awk b/admin/charsets/gb180304.awk
new file mode 100644
index 00000000000..9dac34bceff
--- /dev/null
+++ b/admin/charsets/gb180304.awk
@@ -0,0 +1,102 @@
1BEGIN {
2 tohex["A"] = 10;
3 tohex["B"] = 11;
4 tohex["C"] = 12;
5 tohex["D"] = 13;
6 tohex["E"] = 14;
7 tohex["F"] = 15;
8 tohex["a"] = 10;
9 tohex["b"] = 11;
10 tohex["c"] = 12;
11 tohex["d"] = 13;
12 tohex["e"] = 14;
13 tohex["f"] = 15;
14}
15
16function decode_hex(str) {
17 n = 0;
18 len = length(str);
19 for (i = 1; i <= len; i++)
20 {
21 c = substr (str, i, 1);
22 if (c >= "0" && c <= "9")
23 n = n * 16 + (c - "0");
24 else
25 n = n * 16 + tohex[c];
26 }
27 return n;
28}
29
30function gb_to_index(gb) {
31 b0 = int(gb / 256);
32 b1 = gb % 256;
33 idx = (((b0 - 129)) * 191 + b1 - 64);
34# if (b1 >= 127)
35# idx--;
36 return idx
37}
38
39function index_to_gb(idx) {
40 b3 = (idx % 10) + 48;
41 idx = int(idx / 10);
42 b2 = (idx % 126) + 129;
43 idx = int(idx / 126);
44 b1 = (idx % 10) + 48;
45 b0 = int(idx / 10) + 129;
46 return sprintf("%02X%02X%02X%02X", b0, b1, b2, b3);
47}
48
49/^\#/ {
50 print;
51 next;
52}
53
54/0x....-0x..../ {
55 gb_from = gb_to_index(decode_hex(substr($1, 3, 4)));
56 gb_to = gb_to_index(decode_hex(substr($1, 10, 4)));
57 unicode = decode_hex(substr($2, 3, 4));
58 while (gb_from <= gb_to)
59 {
60 table[unicode++] = 1;
61 gb_from++;
62 }
63 next;
64}
65
66{
67 gb = decode_hex(substr($1, 3, 4));
68 unicode = decode_hex(substr($2, 3, 4));
69 table[unicode] = 1;
70}
71
72END {
73 from_gb = -1;
74 to_gb = 0;
75 from_i = 0;
76 table[65536] = 1;
77 for (i = 128; i <= 65536; i++)
78 {
79 if (table[i] == 0)
80 {
81 if (i < 55296 || i >= 57344)
82 {
83 if (from_gb < 0)
84 {
85 from_gb = to_gb;
86 from_i = i;
87 }
88 to_gb++;
89 }
90 }
91 else if (from_gb >= 0)
92 {
93 if (from_gb + 1 == to_gb)
94 printf "0x%s\t\t0x%04X\n",
95 index_to_gb(from_gb), from_i;
96 else
97 printf "0x%s-0x%s\t0x%04X\n",
98 index_to_gb(from_gb), index_to_gb(to_gb - 1), from_i;
99 from_gb = -1;
100 }
101 }
102}
diff --git a/admin/charsets/kuten.awk b/admin/charsets/kuten.awk
new file mode 100644
index 00000000000..b874c78fc49
--- /dev/null
+++ b/admin/charsets/kuten.awk
@@ -0,0 +1,5 @@
1/^[0-9]/ {
2 ku=substr($1, 3, 2) + 32;
3 ten=substr($1, 5, 2) + 32;
4 printf "0x%02X%02X %s\n", ku, ten, $2;
5}
diff --git a/admin/charsets/mapconv b/admin/charsets/mapconv
new file mode 100755
index 00000000000..f686ea3799c
--- /dev/null
+++ b/admin/charsets/mapconv
@@ -0,0 +1,125 @@
1#!/bin/sh
2#
3# Copyright (C) 2003
4# National Institute of Advanced Industrial Science and Technology (AIST)
5# Registration Number H13PRO009
6#
7# This file is part of GNU Emacs.
8#
9# GNU Emacs is free software; you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation; either version 2, or (at your option)
12# any later version.
13#
14# GNU Emacs is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with GNU Emacs; see the file COPYING. If not, write to the
21# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22# Boston, MA 02111-1307, USA.
23
24# Comment:
25# Convert charset map of various format into this:
26# 0xXX 0xYYYY
27# where,
28# XX is a code point of the charset in hexa-decimal,
29# YYYY is the corresponding Unicode character code in hexa-decimal.
30# Arguments are:
31# $1: source map file
32# $2: address pattern for sed (optionally with substitution command)
33# $3: format of source map file
34# GLIBC-1 GLIBC-2 GLIBC-2-7 CZYBORRA IANA UNICODE YASUOKA MICROSOFT
35# $4: awk script
36
37BASE=`basename $1`
38
39case "$3" in
40 GLIBC*)
41 SOURCE=`echo $1 | sed 's/.*\(glibc.*$\)/\1/'`;;
42 CZYBORRA)
43 SOURCE="http://czyborra.com/charsets/${BASE}";;
44 IANA)
45 SOURCE="http://www.iana.org/assignments/charset-reg/${BASE}";;
46 UNICODE)
47 SOURCE="http://www.unicode.org/Public/MAPPINGS/.../${BASE}";;
48 YASUOKA)
49 SOURCE="http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/.../${BASE}";;
50 MICROSOFT)
51 SOURCE="http://www.microsoft.com/globaldev/reference/oem/${BASE}";;
52 *)
53 echo "Unknown file type: $3";
54 exit 1;;
55esac
56
57echo "# Generated from $SOURCE"
58
59if [ -n "$4" ] ; then
60 if [ -f "$4" ] ; then
61 AWKPROG="gawk -f $4"
62 else
63 echo "Awk program does not exist: $4"
64 exit 1
65 fi
66else
67 AWKPROG=cat
68fi
69
70if [ "$3" == "GLIBC-1" ] ; then
71 # Source format is:
72 # <UYYYY> /xXX
73 sed -n -e "$2 p" < $1 \
74 | sed -e 's,<U\([^>]*\)>[ ]*/x\(..\).*,0x\2 0x\1,' \
75 | sort | ${AWKPROG}
76elif [ "$3" == "GLIBC-2" ] ; then
77 # Source format is:
78 # <UYYYY> /xXX/xZZ
79 sed -n -e "$2 p" < $1 \
80 | sed -e 's,<U\([^>]*\)>[ ]*/x\(..\)/x\(..\).*,0x\2\3 0x\1,' \
81 | sort | ${AWKPROG}
82elif [ "$3" == "GLIBC-2-7" ] ; then
83 # Source format is:
84 # <UYYYY> /xXX/xZZ
85 # We must drop MSBs of XX and ZZ
86 sed -n -e "$2 p" < $1 \
87 | sed -e 's/xa/x2/g' -e 's/xb/x3/g' -e 's/xc/x4/g' \
88 -e 's/xd/x5/g' -e 's/xe/x6/g' -e 's/xf/x7/g' \
89 -e 's,<U\([^>]*\)>[ ]*/x\(..\)/x\(..\).*,0x\2\3 0x\1,' \
90 | tee temp \
91 | sort | ${AWKPROG}
92elif [ "$3" == "CZYBORRA" ] ; then
93 # Source format is:
94 # =XX U+YYYY
95 zcat $1 | sed -n -e "$2 p" \
96 | sed -e 's/=\(..\)[^U]*U+\([0-9A-F]*\).*/0x\1 0x\2/' \
97 | sort | ${AWKPROG}
98elif [ "$3" == "IANA" ] ; then
99 # Source format is:
100 # 0xXX 0xYYYY
101 sed -n -e "$2 p" < $1 \
102 | sed -e 's/\(0x[0-9A-Fa-f]*\)[^0]*\(0x[0-9A-Fa-f]*\).*/\1 \2/' \
103 | sort | ${AWKPROG}
104elif [ "$3" == "UNICODE" ] ; then
105 # Source format is:
106 # YYYY XX
107 sed -n -e "$2 p" < $1 \
108 | sed -e 's/\([0-9A-F]*\)[^0-9A-F]*\([0-9A-F]*\).*/0x\2 0x\1/' \
109 | sort | ${AWKPROG}
110elif [ "$3" == "YASUOKA" ] ; then
111 # Source format is:
112 # YYYY 0-XXXX (XXXX is a Kuten code)
113 sed -n -e "$2 p" < $1 \
114 | sed -e 's/\([0-9A-F]*\)[^0]*0-\([0-9]*\).*/0x\2 0x\1/' \
115 | sort | ${AWKPROG}
116elif [ "$3" == "MICROSOFT" ] ; then
117 # Source format is:
118 # XX = U+YYYY
119 sed -n -e "$2 p" < $1 \
120 | sed -e 's/\([0-9A-F]*\).*U+\([0-9A-F]*\).*/0x\1 0x\2/' \
121 | sort | ${AWKPROG}
122else
123 echo "Invalid arguments"
124 exit 1
125fi
diff --git a/admin/charsets/mule-charsets.el b/admin/charsets/mule-charsets.el
new file mode 100644
index 00000000000..9fc1ad83fc8
--- /dev/null
+++ b/admin/charsets/mule-charsets.el
@@ -0,0 +1,58 @@
1;; mule-charsets.el -- Generate Mule-orignal charset maps.
2;; Copyright (C) 2003
3;; National Institute of Advanced Industrial Science and Technology (AIST)
4;; Registration Number H13PRO009
5
6;; This file is part of GNU Emacs.
7
8;; GNU Emacs is free software; you can redistribute it and/or modify
9;; it under the terms of the GNU General Public License as published by
10;; the Free Software Foundation; either version 2, or (at your option)
11;; any later version.
12
13;; GNU Emacs is distributed in the hope that it will be useful,
14;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16;; GNU General Public License for more details.
17
18;; You should have received a copy of the GNU General Public License
19;; along with GNU Emacs; see the file COPYING. If not, write to the
20;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21;; Boston, MA 02111-1307, USA.
22
23(if (or (< emacs-major-version 21)
24 (< emacs-minor-version 3)
25 (and (= emacs-minor-version 3)
26 (string< emacs-version "21.3.50")))
27 (error "Use Emacs of version 21.3.50 or later"))
28
29(defun func (start end)
30 (while (<= start end)
31 (let ((split (split-char start))
32 (unicode (encode-char start 'ucs)))
33 (if unicode
34 (if (nth 2 split)
35 (insert (format "0x%02X%02X 0x%04X\n"
36 (nth 1 split) (nth 2 split) unicode))
37 (insert (format "0x%02X 0x%04X\n" (nth 1 split) unicode)))))
38 (setq start (1+ start))))
39
40(defconst charset-alist
41 '(("MULE-ethiopic.map" . ethiopic)
42 ("MULE-ipa.map" . ipa)
43 ("MULE-is13194.map" . indian-is13194)
44 ("MULE-sisheng.map" . chinese-sisheng)
45 ("MULE-tibetan.map" . tibetan)
46 ("MULE-lviscii.map" . vietnamese-viscii-lower)
47 ("MULE-uviscii.map" . vietnamese-viscii-upper)))
48
49(setq file (car command-line-args-left))
50(or (stringp file)
51 (error "Invalid file name: %s" file))
52(setq charset (cdr (assoc file charset-alist)))
53(or charset
54 (error "Invalid charset: %s" (car command-line-args-left)))
55
56(with-temp-buffer
57 (map-charset-chars 'func charset)
58 (write-file file))