diff options
| author | Kenichi Handa | 2003-09-08 11:56:09 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2003-09-08 11:56:09 +0000 |
| commit | 463f5630a5e7cbe7f042bc1175d1fa1c4e98860f (patch) | |
| tree | 3287d0c628fea2249abf4635b3a4f45bedd6f8c4 /admin/charsets | |
| parent | 4256310de631bd57c78b88b5131caa073315b3d7 (diff) | |
| download | emacs-463f5630a5e7cbe7f042bc1175d1fa1c4e98860f.tar.gz emacs-463f5630a5e7cbe7f042bc1175d1fa1c4e98860f.zip | |
New directory
Diffstat (limited to 'admin/charsets')
| -rw-r--r-- | admin/charsets/Makefile | 287 | ||||
| -rw-r--r-- | admin/charsets/big5.awk | 53 | ||||
| -rw-r--r-- | admin/charsets/compact.awk | 123 | ||||
| -rw-r--r-- | admin/charsets/gb180302.awk | 80 | ||||
| -rw-r--r-- | admin/charsets/gb180304.awk | 102 | ||||
| -rw-r--r-- | admin/charsets/kuten.awk | 5 | ||||
| -rwxr-xr-x | admin/charsets/mapconv | 125 | ||||
| -rw-r--r-- | admin/charsets/mule-charsets.el | 58 |
8 files changed, 833 insertions, 0 deletions
diff --git a/admin/charsets/Makefile b/admin/charsets/Makefile new file mode 100644 index 00000000000..0628bfeba74 --- /dev/null +++ b/admin/charsets/Makefile | |||
| @@ -0,0 +1,287 @@ | |||
| 1 | # Makefile -- Makefile to generate charset maps in etc/charsets. | ||
| 2 | # Copyright (C) 2003 | ||
| 3 | # National Institute of Advanced Industrial Science and Technology (AIST) | ||
| 4 | # Registration Number H13PRO009 | ||
| 5 | # | ||
| 6 | # This file is part of GNU Emacs. | ||
| 7 | |||
| 8 | # GNU Emacs is free software; you can redistribute it and/or modify | ||
| 9 | # it under the terms of the GNU General Public License as published by | ||
| 10 | # the Free Software Foundation; either version 2, or (at your option) | ||
| 11 | # any later version. | ||
| 12 | |||
| 13 | # GNU Emacs is distributed in the hope that it will be useful, | ||
| 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 16 | # GNU General Public License for more details. | ||
| 17 | |||
| 18 | # You should have received a copy of the GNU General Public License | ||
| 19 | # along with GNU Emacs; see the file COPYING. If not, write to the | ||
| 20 | # Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 21 | # Boston, MA 02111-1307, USA. | ||
| 22 | |||
| 23 | # Commentary | ||
| 24 | |||
| 25 | # At first, set these environment variables: | ||
| 26 | # GLIBC_CHARMAPS | ||
| 27 | # Directory of glibc-VERSION/localedate/charmaps. | ||
| 28 | # VERSION must be 2.3 or the later. | ||
| 29 | # MISC_CHARMAPS | ||
| 30 | # Direcory containing these charmap files: | ||
| 31 | # o bulgarian-mik.txt.gz | ||
| 32 | # provided at <http://czyborra.com/charsets/> | ||
| 33 | # o PTCP154 | ||
| 34 | # provided at <http://www.iana.org/assignments/charset-reg/> | ||
| 35 | # o stdenc.txt and symbol.txt | ||
| 36 | # provided at <http://www.unicode.org/Public/MAPPINGS/> | ||
| 37 | # o Uni2JIS | ||
| 38 | # provided at <http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/CJK.html> | ||
| 39 | # o 720.htm and 858.htm | ||
| 40 | # provided at <http://www.microsoft.com/globaldev/reference/oem/> | ||
| 41 | # OLDEMACS | ||
| 42 | # emacs of version 21.3.50 or later | ||
| 43 | # | ||
| 44 | # Then, do this: | ||
| 45 | # % make install | ||
| 46 | |||
| 47 | CHARSETS = ${ISO8859} ${IBM} ${CODEPAGE} ${CJK} ${MISC} ${MULE} | ||
| 48 | |||
| 49 | # Note: We can not prepend "ISO-" to these map files because of file | ||
| 50 | # name limits on DOS. | ||
| 51 | ISO8859 = \ | ||
| 52 | 8859-2.map 8859-3.map 8859-4.map 8859-5.map 8859-6.map 8859-7.map \ | ||
| 53 | 8859-8.map 8859-9.map 8859-10.map 8859-11.map 8859-13.map 8859-14.map \ | ||
| 54 | 8859-15.map 8859-16.map | ||
| 55 | |||
| 56 | IBM = \ | ||
| 57 | IBM037.map IBM038.map \ | ||
| 58 | IBM256.map IBM273.map IBM274.map IBM275.map IBM277.map IBM278.map \ | ||
| 59 | IBM280.map IBM281.map IBM284.map IBM285.map IBM290.map IBM297.map \ | ||
| 60 | IBM420.map IBM423.map IBM424.map IBM437.map IBM500.map IBM850.map \ | ||
| 61 | IBM851.map IBM852.map IBM855.map IBM856.map IBM857.map IBM860.map \ | ||
| 62 | IBM861.map IBM862.map IBM863.map IBM864.map IBM865.map IBM866.map \ | ||
| 63 | IBM868.map IBM869.map IBM870.map IBM871.map IBM874.map IBM875.map \ | ||
| 64 | IBM880.map IBM891.map IBM903.map IBM904.map IBM905.map IBM918.map \ | ||
| 65 | IBM1004.map IBM1026.map IBM1047.map | ||
| 66 | |||
| 67 | CODEPAGE = \ | ||
| 68 | CP737.map CP775.map CP1125.map\ | ||
| 69 | CP1250.map CP1251.map CP1252.map CP1253.map CP1254.map \ | ||
| 70 | CP1255.map CP1256.map CP1257.map CP1258.map \ | ||
| 71 | CP10007.map \ | ||
| 72 | CP720.map CP858.map | ||
| 73 | |||
| 74 | CJK = GB2312.map GBK.map GB180302.map GB180304.map \ | ||
| 75 | BIG5.map BIG5-HKSCS.map\ | ||
| 76 | CNS-1.map CNS-2.map CNS-3.map CNS-4.map CNS-5.map CNS-6.map CNS-7.map \ | ||
| 77 | CNS-F.map \ | ||
| 78 | JISX0201.map JISX0208.map JISX0212.map JISX2131.map JISX2132.map \ | ||
| 79 | JISC6226.map \ | ||
| 80 | KSC5601.map KSC5636.map JOHAB.map | ||
| 81 | |||
| 82 | MISC = KOI-8.map KOI8-R.map KOI8-U.map KOI8-T.map ALTERNATIVNYJ.map \ | ||
| 83 | MIK.map PTCP154.map \ | ||
| 84 | TIS-620.map VISCII.map VSCII.map VSCII-2.map\ | ||
| 85 | KA-PS.map KA-ACADEMY.map \ | ||
| 86 | HP-ROMAN8.map NEXTSTEP.map MACINTOSH.map EBCDICUK.map EBCDICUS.map \ | ||
| 87 | stdenc.map symbol.map \ | ||
| 88 | CP949-2BYTE.map \ | ||
| 89 | BIG5-1.map BIG5-2.map | ||
| 90 | |||
| 91 | # Emacs-mule charsets. | ||
| 92 | MULE = MULE-ethiopic.map MULE-ipa.map MULE-is13194.map \ | ||
| 93 | MULE-sisheng.map MULE-tibetan.map \ | ||
| 94 | MULE-lviscii.map MULE-uviscii.map | ||
| 95 | |||
| 96 | charsets: ${CHARSETS} | ||
| 97 | |||
| 98 | AWK = gawk | ||
| 99 | |||
| 100 | # Rules for each charset | ||
| 101 | |||
| 102 | VSCII.map: ${GLIBC_CHARMAPS}/TCVN5712-1 mapconv compact.awk | ||
| 103 | # Generating $@... | ||
| 104 | @mapconv $< '/^<.*[ ]\/x[0-9a-f].[ ]/' GLIBC-1 compact.awk > $@ | ||
| 105 | |||
| 106 | VSCII-2.map: ${GLIBC_CHARMAPS}/TCVN5712-1 mapconv compact.awk | ||
| 107 | # Generating $@... | ||
| 108 | @mapconv $< '/^<.*[ ]\/x[2-7a-f].[ ]/' GLIBC-1 compact.awk \ | ||
| 109 | | sed 's/0x20-0x7F.*/0x00-0x7F 0x0000/' > $@ | ||
| 110 | |||
| 111 | ALTERNATIVNYJ.map: IBM866.map | ||
| 112 | # Generating $@... | ||
| 113 | @echo "# Modified from ibm866 according to the chart at" > $@ | ||
| 114 | @echo "# http://www.cyrillic.com/ref/cyrillic/koi-8alt.html," >> $@ | ||
| 115 | @echo "# with guesses for the Unicodes of the glyphs." >> $@ | ||
| 116 | @sed -e '/0xF2/ s/ .*/ 0x2019/' \ | ||
| 117 | -e '/0xF3/ s/ .*/ 0x2018/' \ | ||
| 118 | -e '/0xF4/ s/ .*/ 0x0301/' \ | ||
| 119 | -e '/0xF5/ s/ .*/ 0x0300/' \ | ||
| 120 | -e '/0xF6/ s/ .*/ 0x203A/' \ | ||
| 121 | -e '/0xF7/ s/ .*/ 0x2039/' \ | ||
| 122 | -e '/0xF8/ s/ .*/ 0x2191/' \ | ||
| 123 | -e '/0xF9/ s/ .*/ 0x2193/' \ | ||
| 124 | -e '/0xFA/ s/ .*/ 0x00B1/' \ | ||
| 125 | -e '/0xFB/ s/ .*/ 0x00F7/' < $< >> $@ | ||
| 126 | |||
| 127 | MIK.map: ${MISC_CHARMAPS}/bulgarian-mik.txt.gz mapconv compact.awk | ||
| 128 | # Generating $@... | ||
| 129 | @mapconv $< '1,$$' CZYBORRA compact.awk > $@ | ||
| 130 | |||
| 131 | PTCP154.map: ${MISC_CHARMAPS}/PTCP154 mapconv compact.awk | ||
| 132 | # Generating $@... | ||
| 133 | @mapconv $< '/^0x/' IANA compact.awk > $@ | ||
| 134 | |||
| 135 | stdenc.map: ${MISC_CHARMAPS}/stdenc.txt mapconv compact.awk | ||
| 136 | # Generating $@... | ||
| 137 | @mapconv $< '/^[0-9A-Fa-f]/' UNICODE compact.awk > $@ | ||
| 138 | |||
| 139 | symbol.map: ${MISC_CHARMAPS}/symbol.txt mapconv compact.awk | ||
| 140 | # Generating $@... | ||
| 141 | @mapconv $< '/^[0-9A-Fa-f]/' UNICODE compact.awk > $@ | ||
| 142 | |||
| 143 | CP720.map: ${MISC_CHARMAPS}/720.htm mapconv compact.awk | ||
| 144 | # Generating $@... | ||
| 145 | @mapconv $< '/^[0-9A-F]/' MICROSOFT compact.awk > $@ | ||
| 146 | |||
| 147 | CP858.map: ${MISC_CHARMAPS}/858.htm mapconv compact.awk | ||
| 148 | # Generating $@... | ||
| 149 | @mapconv $< '/^[0-9A-F]/' MICROSOFT compact.awk > $@ | ||
| 150 | |||
| 151 | CP949-2BYTE.map: ${GLIBC_CHARMAPS}/CP949 mapconv compact.awk | ||
| 152 | # Generating $@... | ||
| 153 | @mapconv $< '/^<.*[ ]\/x[89a-f]/' GLIBC-2 compact.awk > $@ | ||
| 154 | |||
| 155 | GB2312.map: ${GLIBC_CHARMAPS}/GB2312 mapconv compact.awk | ||
| 156 | # Generating $@... | ||
| 157 | @mapconv $< '/^<.*[ ]\/x[a-f]/' GLIBC-2-7 compact.awk > $@ | ||
| 158 | |||
| 159 | GBK.map: ${GLIBC_CHARMAPS}/GBK mapconv compact.awk | ||
| 160 | # Generating $@... | ||
| 161 | @mapconv $< '/^<.*[ ]\/x[89a-f]/' GLIBC-2 compact.awk > $@ | ||
| 162 | |||
| 163 | GB180302.map: ${GLIBC_CHARMAPS}/GB18030 mapconv gb180302.awk | ||
| 164 | # Generating $@... | ||
| 165 | @mapconv $< '/^<.*[ ]\/x..\/x..[ ]/' GLIBC-2 gb180302.awk > $@ | ||
| 166 | |||
| 167 | GB180304.map: GB180302.map gb180304.awk | ||
| 168 | # Generating $@... | ||
| 169 | @$(AWK) -f gb180304.awk < $< > $@ | ||
| 170 | |||
| 171 | JISX0201.map: ${GLIBC_CHARMAPS}/JIS_X0201 mapconv compact.awk | ||
| 172 | # Generating $@... | ||
| 173 | @mapconv $< '/^<.*[ ]\/x[0-9]/' GLIBC-1 compact.awk > $@ | ||
| 174 | @echo "# Generated by hand" >> $@ | ||
| 175 | @echo "0xA1-0xDF 0xFF61" >> $@ | ||
| 176 | |||
| 177 | JISX0208.map: ${GLIBC_CHARMAPS}/EUC-JP mapconv | ||
| 178 | # Generating $@... | ||
| 179 | @mapconv $< '/^<.*[ ]\/x[a-f]/' GLIBC-2-7 > $@ | ||
| 180 | |||
| 181 | JISX0212.map: ${GLIBC_CHARMAPS}/EUC-JP mapconv compact.awk | ||
| 182 | # Generating $@... | ||
| 183 | @mapconv $< '/^<.*[ ]\/x8f/ s,/x8f,,' GLIBC-2-7 compact.awk > $@ | ||
| 184 | |||
| 185 | JISX2131.map: ${GLIBC_CHARMAPS}/EUC-JISX0213 mapconv | ||
| 186 | # Generating $@... | ||
| 187 | @mapconv $< '/^<.*[ ]\/x[a-f]/' GLIBC-2-7 > $@ | ||
| 188 | |||
| 189 | JISX2132.map: ${GLIBC_CHARMAPS}/EUC-JISX0213 mapconv | ||
| 190 | # Generating $@... | ||
| 191 | @mapconv $< '/^<.*[ ]\/x8f/ s,/x8f,,' GLIBC-2-7 > $@ | ||
| 192 | |||
| 193 | JISC6226.map : ${MISC_CHARMAPS}/Uni2JIS mapconv kuten.awk | ||
| 194 | # Generating $@... | ||
| 195 | @mapconv $< '/^[^#].*0-/' YASUOKA kuten.awk > $@ | ||
| 196 | |||
| 197 | KSC5601.map: ${GLIBC_CHARMAPS}/EUC-KR mapconv compact.awk | ||
| 198 | # Generating $@... | ||
| 199 | @mapconv $< '/^<.*[ ]\/x[a-f]/' GLIBC-2-7 compact.awk > $@ | ||
| 200 | |||
| 201 | BIG5.map: ${GLIBC_CHARMAPS}/BIG5 mapconv compact.awk | ||
| 202 | # Generating $@... | ||
| 203 | @mapconv $< '/^<.*[ ]\/x[a-f]/' GLIBC-2 > $@ | ||
| 204 | |||
| 205 | BIG5-1.map: BIG5.map mapconv big5.awk | ||
| 206 | # Generating $@... | ||
| 207 | @echo "Generated from $<" > $@ | ||
| 208 | @sed -n -e '/0xa140/,/0xc8fe/p' < $< | gawk -f big5.awk >> $@ | ||
| 209 | |||
| 210 | BIG5-2.map: BIG5.map mapconv big5.awk | ||
| 211 | # Generating $@... | ||
| 212 | @echo "Generated from $<" > $@ | ||
| 213 | @sed -n -e '/0xc940/,$$ p' < $< | gawk -f big5.awk >> $@ | ||
| 214 | |||
| 215 | BIG5-HKSCS.map: ${GLIBC_CHARMAPS}/BIG5-HKSCS mapconv compact.awk | ||
| 216 | # Generating $@... | ||
| 217 | @mapconv $< '/^<.*[ ]\/x[89a-f].\//' GLIBC-2 compact.awk > $@ | ||
| 218 | |||
| 219 | JOHAB.map: ${GLIBC_CHARMAPS}/JOHAB mapconv compact.awk | ||
| 220 | # Generating $@... | ||
| 221 | @mapconv $< '/^<.*[ ]\/x[89a-f]/' GLIBC-2 compact.awk > $@ | ||
| 222 | |||
| 223 | CNS-1.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk | ||
| 224 | # Generating $@... | ||
| 225 | @mapconv $< '/^<.*[ ]\/x[a-f]/' GLIBC-2-7 compact.awk > $@ | ||
| 226 | |||
| 227 | CNS-2.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk | ||
| 228 | # Generating $@... | ||
| 229 | @mapconv $< '/^<.*\/x8e\/xa2/s,/x8e/xa2,,' GLIBC-2-7 compact.awk > $@ | ||
| 230 | |||
| 231 | CNS-3.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk | ||
| 232 | # Generating $@... | ||
| 233 | @mapconv $< '/^<.*\/x8e\/xa3/ s,/x8e/xa3,,' GLIBC-2-7 compact.awk > $@ | ||
| 234 | |||
| 235 | CNS-4.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk | ||
| 236 | # Generating $@... | ||
| 237 | @mapconv $< '/^<.*\/x8e\/xa4/ s,/x8e/xa4,,' GLIBC-2-7 compact.awk > $@ | ||
| 238 | |||
| 239 | CNS-5.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk | ||
| 240 | # Generating $@... | ||
| 241 | @mapconv $< '/^<.*\/x8e\/xa5/ s,/x8e/xa5,,' GLIBC-2-7 compact.awk > $@ | ||
| 242 | |||
| 243 | CNS-6.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk | ||
| 244 | # Generating $@... | ||
| 245 | @mapconv $< '/^<.*\/x8e\/xa6/ s,/x8e/xa6,,' GLIBC-2-7 compact.awk > $@ | ||
| 246 | |||
| 247 | CNS-7.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk | ||
| 248 | # Generating $@... | ||
| 249 | @mapconv $< '/^<.*\/x8e\/xa7/ s,/x8e/xa7,,' GLIBC-2-7 compact.awk > $@ | ||
| 250 | |||
| 251 | CNS-F.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk | ||
| 252 | # Generating $@... | ||
| 253 | @mapconv $< '/^<.*\/x8e\/xaf/ s,/x8e/xaf,,' GLIBC-2-7 compact.awk > $@ | ||
| 254 | |||
| 255 | # General target to produce map files for mule charsets. | ||
| 256 | MULE-%.map: mule-charsets.el | ||
| 257 | # Generating $@... | ||
| 258 | @${OLDEMACS} -batch -l ./mule-charsets.el $@ | ||
| 259 | |||
| 260 | # General target to produce map files for ISO-8859, GEORGIAN, and | ||
| 261 | # EBCDIC charsets. We can not use the original file name because of | ||
| 262 | # file name limit on DOS. "KA" is ISO 639 language code for Georgian. | ||
| 263 | |||
| 264 | 8859-%.map: ${GLIBC_CHARMAPS}/ISO-8859-% mapconv compact.awk | ||
| 265 | # Generating $@... | ||
| 266 | @mapconv $< '/^<.*[ ]\/x/' GLIBC-1 compact.awk > $@ | ||
| 267 | |||
| 268 | KA-%.map: ${GLIBC_CHARMAPS}/GEORGIAN-% mapconv compact.awk | ||
| 269 | # Generating $@... | ||
| 270 | @mapconv $< '/^<.*[ ]\/x/' GLIBC-1 compact.awk > $@ | ||
| 271 | |||
| 272 | EBCDIC%.map: ${GLIBC_CHARMAPS}/EBCDIC-% mapconv compact.awk | ||
| 273 | # Generating $@... | ||
| 274 | @mapconv $< '/^<.*[ ]\/x/' GLIBC-1 compact.awk > $@ | ||
| 275 | |||
| 276 | # General target to produce map files for single-byte charsets. | ||
| 277 | |||
| 278 | %.map: ${GLIBC_CHARMAPS}/% mapconv compact.awk | ||
| 279 | # Generating $@... | ||
| 280 | @mapconv $< '/^<.*[ ]\/x/' GLIBC-1 compact.awk > $@ | ||
| 281 | |||
| 282 | install: ${CHARSETS} | ||
| 283 | cp ${CHARSETS} ../../etc/charsets | ||
| 284 | |||
| 285 | # Clear files that are automatically generated. | ||
| 286 | clean: | ||
| 287 | rm -f ${CHARSETS} | ||
diff --git a/admin/charsets/big5.awk b/admin/charsets/big5.awk new file mode 100644 index 00000000000..8d5fa6429b3 --- /dev/null +++ b/admin/charsets/big5.awk | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | BEGIN { | ||
| 2 | tohex["A"] = 10; | ||
| 3 | tohex["B"] = 11; | ||
| 4 | tohex["C"] = 12; | ||
| 5 | tohex["D"] = 13; | ||
| 6 | tohex["E"] = 14; | ||
| 7 | tohex["F"] = 15; | ||
| 8 | tohex["a"] = 10; | ||
| 9 | tohex["b"] = 11; | ||
| 10 | tohex["c"] = 12; | ||
| 11 | tohex["d"] = 13; | ||
| 12 | tohex["e"] = 14; | ||
| 13 | tohex["f"] = 15; | ||
| 14 | } | ||
| 15 | |||
| 16 | function decode_hex(str) { | ||
| 17 | n = 0; | ||
| 18 | len = length(str); | ||
| 19 | for (i = 1; i <= len; i++) | ||
| 20 | { | ||
| 21 | c = substr (str, i, 1); | ||
| 22 | if (c >= "0" && c <= "9") | ||
| 23 | n = n * 16 + (c - "0"); | ||
| 24 | else | ||
| 25 | n = n * 16 + tohex[c]; | ||
| 26 | } | ||
| 27 | return n; | ||
| 28 | } | ||
| 29 | |||
| 30 | function decode_big5(big5) { | ||
| 31 | b0 = int(big5 / 256); | ||
| 32 | b1 = big5 % 256; | ||
| 33 | # (0xFF - 0xA1 + 0x7F - 0x40) = 157 | ||
| 34 | # (0xA1 - (0x7F - 0x40)) = 98 | ||
| 35 | # (0xC9 - 0xA1) * (0xFF - 0xA1 + 0x7F - 0x40) = 6280 | ||
| 36 | if (b1 < 127) | ||
| 37 | idx = (b0 - 161) * 157 + (b1 - 64); | ||
| 38 | else | ||
| 39 | idx = (b0 - 161) * 157 + (b1 - 98); | ||
| 40 | if (b0 >= 201) | ||
| 41 | idx -= 6280; | ||
| 42 | b0 = int(idx / 94) + 33; | ||
| 43 | b1 = (idx % 94) + 33; | ||
| 44 | return (b0 * 256 + b1) | ||
| 45 | } | ||
| 46 | |||
| 47 | { | ||
| 48 | big5 = decode_hex($1); | ||
| 49 | code = decode_big5(big5); | ||
| 50 | printf "0x%04X %s\n", code, $2; | ||
| 51 | } | ||
| 52 | |||
| 53 | |||
diff --git a/admin/charsets/compact.awk b/admin/charsets/compact.awk new file mode 100644 index 00000000000..281e51ebc3b --- /dev/null +++ b/admin/charsets/compact.awk | |||
| @@ -0,0 +1,123 @@ | |||
| 1 | # compact.awk -- Make charset map compact. | ||
| 2 | # Copyright (C) 2003 | ||
| 3 | # National Institute of Advanced Industrial Science and Technology (AIST) | ||
| 4 | # Registration Number H13PRO009 | ||
| 5 | # | ||
| 6 | # This file is part of GNU Emacs. | ||
| 7 | # | ||
| 8 | # GNU Emacs is free software; you can redistribute it and/or modify | ||
| 9 | # it under the terms of the GNU General Public License as published by | ||
| 10 | # the Free Software Foundation; either version 2, or (at your option) | ||
| 11 | # any later version. | ||
| 12 | # | ||
| 13 | # GNU Emacs is distributed in the hope that it will be useful, | ||
| 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 16 | # GNU General Public License for more details. | ||
| 17 | # | ||
| 18 | # You should have received a copy of the GNU General Public License | ||
| 19 | # along with GNU Emacs; see the file COPYING. If not, write to the | ||
| 20 | # Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 21 | # Boston, MA 02111-1307, USA. | ||
| 22 | |||
| 23 | # Comment: | ||
| 24 | # Make a charset map compact by changing this kind of line sequence: | ||
| 25 | # 0x00 0x0000 | ||
| 26 | # 0x01 0x0001 | ||
| 27 | # ... | ||
| 28 | # 0x7F 0x007F | ||
| 29 | # to one line of this format: | ||
| 30 | # 0x00-0x7F 0x0000 | ||
| 31 | |||
| 32 | BEGIN { | ||
| 33 | tohex["0"] = 1; | ||
| 34 | tohex["1"] = 2; | ||
| 35 | tohex["2"] = 3; | ||
| 36 | tohex["3"] = 4; | ||
| 37 | tohex["4"] = 5; | ||
| 38 | tohex["5"] = 6; | ||
| 39 | tohex["6"] = 7; | ||
| 40 | tohex["7"] = 8; | ||
| 41 | tohex["8"] = 9; | ||
| 42 | tohex["9"] = 10; | ||
| 43 | tohex["A"] = 11; | ||
| 44 | tohex["B"] = 12; | ||
| 45 | tohex["C"] = 13; | ||
| 46 | tohex["D"] = 14; | ||
| 47 | tohex["E"] = 15; | ||
| 48 | tohex["F"] = 16; | ||
| 49 | tohex["a"] = 11; | ||
| 50 | tohex["b"] = 12; | ||
| 51 | tohex["c"] = 13; | ||
| 52 | tohex["d"] = 14; | ||
| 53 | tohex["e"] = 15; | ||
| 54 | tohex["f"] = 16; | ||
| 55 | from_code = 0; | ||
| 56 | to_code = -1; | ||
| 57 | to_unicode = 0; | ||
| 58 | from_unicode = 0; | ||
| 59 | } | ||
| 60 | |||
| 61 | function decode_hex(str, idx) { | ||
| 62 | n = 0; | ||
| 63 | len = length(str); | ||
| 64 | for (i = idx; i <= len; i++) | ||
| 65 | { | ||
| 66 | c = tohex[substr (str, i, 1)]; | ||
| 67 | if (c == 0) | ||
| 68 | break; | ||
| 69 | n = n * 16 + c - 1; | ||
| 70 | } | ||
| 71 | return n; | ||
| 72 | } | ||
| 73 | |||
| 74 | /^\#/ { | ||
| 75 | print; | ||
| 76 | next; | ||
| 77 | } | ||
| 78 | |||
| 79 | { | ||
| 80 | code = decode_hex($1, 3); | ||
| 81 | unicode = decode_hex($2, 3); | ||
| 82 | if ((code == to_code + 1) && (unicode == to_unicode + 1)) | ||
| 83 | { | ||
| 84 | to_code++; | ||
| 85 | to_unicode++; | ||
| 86 | } | ||
| 87 | else | ||
| 88 | { | ||
| 89 | if (to_code < 256) | ||
| 90 | { | ||
| 91 | if (from_code == to_code) | ||
| 92 | printf "0x%02X 0x%04X\n", from_code, from_unicode; | ||
| 93 | else if (from_code < to_code) | ||
| 94 | printf "0x%02X-0x%02X 0x%04X\n", from_code, to_code, from_unicode; | ||
| 95 | } | ||
| 96 | else | ||
| 97 | { | ||
| 98 | if (from_code == to_code) | ||
| 99 | printf "0x%04X 0x%04X\n", from_code, from_unicode; | ||
| 100 | else if (from_code < to_code) | ||
| 101 | printf "0x%04X-0x%04X 0x%04X\n", from_code, to_code, from_unicode; | ||
| 102 | } | ||
| 103 | from_code = to_code = code; | ||
| 104 | from_unicode = to_unicode = unicode; | ||
| 105 | } | ||
| 106 | } | ||
| 107 | |||
| 108 | END { | ||
| 109 | if (to_code < 256) | ||
| 110 | { | ||
| 111 | if (from_code == to_code) | ||
| 112 | printf "0x%02X 0x%04X\n", from_code, from_unicode; | ||
| 113 | else | ||
| 114 | printf "0x%02X-0x%02X 0x%04X\n", from_code, to_code, from_unicode; | ||
| 115 | } | ||
| 116 | else | ||
| 117 | { | ||
| 118 | if (from_code == to_code) | ||
| 119 | printf "0x%04X 0x%04X\n", from_code, from_unicode; | ||
| 120 | else | ||
| 121 | printf "0x%04X-0x%04X 0x%04X\n", from_code, to_code, from_unicode; | ||
| 122 | } | ||
| 123 | } | ||
diff --git a/admin/charsets/gb180302.awk b/admin/charsets/gb180302.awk new file mode 100644 index 00000000000..94d0a9e410a --- /dev/null +++ b/admin/charsets/gb180302.awk | |||
| @@ -0,0 +1,80 @@ | |||
| 1 | BEGIN { | ||
| 2 | tohex["A"] = 10; | ||
| 3 | tohex["B"] = 11; | ||
| 4 | tohex["C"] = 12; | ||
| 5 | tohex["D"] = 13; | ||
| 6 | tohex["E"] = 14; | ||
| 7 | tohex["F"] = 15; | ||
| 8 | tohex["a"] = 10; | ||
| 9 | tohex["b"] = 11; | ||
| 10 | tohex["c"] = 12; | ||
| 11 | tohex["d"] = 13; | ||
| 12 | tohex["e"] = 14; | ||
| 13 | tohex["f"] = 15; | ||
| 14 | from_gb = 0; | ||
| 15 | to_gb = -1; | ||
| 16 | to_unicode = 0; | ||
| 17 | from_unicode = 0; | ||
| 18 | } | ||
| 19 | |||
| 20 | function decode_hex(str) { | ||
| 21 | n = 0; | ||
| 22 | len = length(str); | ||
| 23 | for (i = 1; i <= len; i++) | ||
| 24 | { | ||
| 25 | c = substr (str, i, 1); | ||
| 26 | if (c >= "0" && c <= "9") | ||
| 27 | n = n * 16 + (c - "0"); | ||
| 28 | else | ||
| 29 | n = n * 16 + tohex[c]; | ||
| 30 | } | ||
| 31 | return n; | ||
| 32 | } | ||
| 33 | |||
| 34 | function gb_to_index(gb) { | ||
| 35 | b0 = int(gb / 256); | ||
| 36 | b1 = gb % 256; | ||
| 37 | idx = (((b0 - 129)) * 191 + b1 - 64); | ||
| 38 | # if (b1 >= 128) | ||
| 39 | # idx--; | ||
| 40 | return idx | ||
| 41 | } | ||
| 42 | |||
| 43 | function index_to_gb(idx) { | ||
| 44 | b0 = int(idx / 191) + 129; | ||
| 45 | b1 = (idx % 191) + 64; | ||
| 46 | # if (b1 >= 127) | ||
| 47 | # b1++; | ||
| 48 | return (b0 * 256 + b1); | ||
| 49 | } | ||
| 50 | |||
| 51 | /^\#/ { | ||
| 52 | print; | ||
| 53 | next; | ||
| 54 | } | ||
| 55 | |||
| 56 | { | ||
| 57 | gb = gb_to_index(decode_hex(substr($1, 3, 4))); | ||
| 58 | unicode = decode_hex(substr($2, 3, 4)); | ||
| 59 | if ((gb == to_gb + 1) && (unicode == to_unicode + 1)) | ||
| 60 | { | ||
| 61 | to_gb++; | ||
| 62 | to_unicode++; | ||
| 63 | } | ||
| 64 | else | ||
| 65 | { | ||
| 66 | if (from_gb == to_gb) | ||
| 67 | printf "0x%04X 0x%04X\n", index_to_gb(from_gb), from_unicode; | ||
| 68 | else if (from_gb < to_gb) | ||
| 69 | printf "0x%04X-0x%04X 0x%04X\n", | ||
| 70 | index_to_gb(from_gb), index_to_gb(to_gb), from_unicode; | ||
| 71 | from_gb = to_gb = gb; | ||
| 72 | from_unicode = to_unicode = unicode; | ||
| 73 | } | ||
| 74 | } | ||
| 75 | |||
| 76 | END { | ||
| 77 | if (from_gb <= to_gb) | ||
| 78 | printf "0x%04X-0x%04X 0x%04X\n", | ||
| 79 | index_to_gb(from_gb), index_to_gb(to_gb), from_unicode; | ||
| 80 | } | ||
diff --git a/admin/charsets/gb180304.awk b/admin/charsets/gb180304.awk new file mode 100644 index 00000000000..9dac34bceff --- /dev/null +++ b/admin/charsets/gb180304.awk | |||
| @@ -0,0 +1,102 @@ | |||
| 1 | BEGIN { | ||
| 2 | tohex["A"] = 10; | ||
| 3 | tohex["B"] = 11; | ||
| 4 | tohex["C"] = 12; | ||
| 5 | tohex["D"] = 13; | ||
| 6 | tohex["E"] = 14; | ||
| 7 | tohex["F"] = 15; | ||
| 8 | tohex["a"] = 10; | ||
| 9 | tohex["b"] = 11; | ||
| 10 | tohex["c"] = 12; | ||
| 11 | tohex["d"] = 13; | ||
| 12 | tohex["e"] = 14; | ||
| 13 | tohex["f"] = 15; | ||
| 14 | } | ||
| 15 | |||
| 16 | function decode_hex(str) { | ||
| 17 | n = 0; | ||
| 18 | len = length(str); | ||
| 19 | for (i = 1; i <= len; i++) | ||
| 20 | { | ||
| 21 | c = substr (str, i, 1); | ||
| 22 | if (c >= "0" && c <= "9") | ||
| 23 | n = n * 16 + (c - "0"); | ||
| 24 | else | ||
| 25 | n = n * 16 + tohex[c]; | ||
| 26 | } | ||
| 27 | return n; | ||
| 28 | } | ||
| 29 | |||
| 30 | function gb_to_index(gb) { | ||
| 31 | b0 = int(gb / 256); | ||
| 32 | b1 = gb % 256; | ||
| 33 | idx = (((b0 - 129)) * 191 + b1 - 64); | ||
| 34 | # if (b1 >= 127) | ||
| 35 | # idx--; | ||
| 36 | return idx | ||
| 37 | } | ||
| 38 | |||
| 39 | function index_to_gb(idx) { | ||
| 40 | b3 = (idx % 10) + 48; | ||
| 41 | idx = int(idx / 10); | ||
| 42 | b2 = (idx % 126) + 129; | ||
| 43 | idx = int(idx / 126); | ||
| 44 | b1 = (idx % 10) + 48; | ||
| 45 | b0 = int(idx / 10) + 129; | ||
| 46 | return sprintf("%02X%02X%02X%02X", b0, b1, b2, b3); | ||
| 47 | } | ||
| 48 | |||
| 49 | /^\#/ { | ||
| 50 | print; | ||
| 51 | next; | ||
| 52 | } | ||
| 53 | |||
| 54 | /0x....-0x..../ { | ||
| 55 | gb_from = gb_to_index(decode_hex(substr($1, 3, 4))); | ||
| 56 | gb_to = gb_to_index(decode_hex(substr($1, 10, 4))); | ||
| 57 | unicode = decode_hex(substr($2, 3, 4)); | ||
| 58 | while (gb_from <= gb_to) | ||
| 59 | { | ||
| 60 | table[unicode++] = 1; | ||
| 61 | gb_from++; | ||
| 62 | } | ||
| 63 | next; | ||
| 64 | } | ||
| 65 | |||
| 66 | { | ||
| 67 | gb = decode_hex(substr($1, 3, 4)); | ||
| 68 | unicode = decode_hex(substr($2, 3, 4)); | ||
| 69 | table[unicode] = 1; | ||
| 70 | } | ||
| 71 | |||
| 72 | END { | ||
| 73 | from_gb = -1; | ||
| 74 | to_gb = 0; | ||
| 75 | from_i = 0; | ||
| 76 | table[65536] = 1; | ||
| 77 | for (i = 128; i <= 65536; i++) | ||
| 78 | { | ||
| 79 | if (table[i] == 0) | ||
| 80 | { | ||
| 81 | if (i < 55296 || i >= 57344) | ||
| 82 | { | ||
| 83 | if (from_gb < 0) | ||
| 84 | { | ||
| 85 | from_gb = to_gb; | ||
| 86 | from_i = i; | ||
| 87 | } | ||
| 88 | to_gb++; | ||
| 89 | } | ||
| 90 | } | ||
| 91 | else if (from_gb >= 0) | ||
| 92 | { | ||
| 93 | if (from_gb + 1 == to_gb) | ||
| 94 | printf "0x%s\t\t0x%04X\n", | ||
| 95 | index_to_gb(from_gb), from_i; | ||
| 96 | else | ||
| 97 | printf "0x%s-0x%s\t0x%04X\n", | ||
| 98 | index_to_gb(from_gb), index_to_gb(to_gb - 1), from_i; | ||
| 99 | from_gb = -1; | ||
| 100 | } | ||
| 101 | } | ||
| 102 | } | ||
diff --git a/admin/charsets/kuten.awk b/admin/charsets/kuten.awk new file mode 100644 index 00000000000..b874c78fc49 --- /dev/null +++ b/admin/charsets/kuten.awk | |||
| @@ -0,0 +1,5 @@ | |||
| 1 | /^[0-9]/ { | ||
| 2 | ku=substr($1, 3, 2) + 32; | ||
| 3 | ten=substr($1, 5, 2) + 32; | ||
| 4 | printf "0x%02X%02X %s\n", ku, ten, $2; | ||
| 5 | } | ||
diff --git a/admin/charsets/mapconv b/admin/charsets/mapconv new file mode 100755 index 00000000000..f686ea3799c --- /dev/null +++ b/admin/charsets/mapconv | |||
| @@ -0,0 +1,125 @@ | |||
| 1 | #!/bin/sh | ||
| 2 | # | ||
| 3 | # Copyright (C) 2003 | ||
| 4 | # National Institute of Advanced Industrial Science and Technology (AIST) | ||
| 5 | # Registration Number H13PRO009 | ||
| 6 | # | ||
| 7 | # This file is part of GNU Emacs. | ||
| 8 | # | ||
| 9 | # GNU Emacs is free software; you can redistribute it and/or modify | ||
| 10 | # it under the terms of the GNU General Public License as published by | ||
| 11 | # the Free Software Foundation; either version 2, or (at your option) | ||
| 12 | # any later version. | ||
| 13 | # | ||
| 14 | # GNU Emacs is distributed in the hope that it will be useful, | ||
| 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 17 | # GNU General Public License for more details. | ||
| 18 | # | ||
| 19 | # You should have received a copy of the GNU General Public License | ||
| 20 | # along with GNU Emacs; see the file COPYING. If not, write to the | ||
| 21 | # Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 22 | # Boston, MA 02111-1307, USA. | ||
| 23 | |||
| 24 | # Comment: | ||
| 25 | # Convert charset map of various format into this: | ||
| 26 | # 0xXX 0xYYYY | ||
| 27 | # where, | ||
| 28 | # XX is a code point of the charset in hexa-decimal, | ||
| 29 | # YYYY is the corresponding Unicode character code in hexa-decimal. | ||
| 30 | # Arguments are: | ||
| 31 | # $1: source map file | ||
| 32 | # $2: address pattern for sed (optionally with substitution command) | ||
| 33 | # $3: format of source map file | ||
| 34 | # GLIBC-1 GLIBC-2 GLIBC-2-7 CZYBORRA IANA UNICODE YASUOKA MICROSOFT | ||
| 35 | # $4: awk script | ||
| 36 | |||
| 37 | BASE=`basename $1` | ||
| 38 | |||
| 39 | case "$3" in | ||
| 40 | GLIBC*) | ||
| 41 | SOURCE=`echo $1 | sed 's/.*\(glibc.*$\)/\1/'`;; | ||
| 42 | CZYBORRA) | ||
| 43 | SOURCE="http://czyborra.com/charsets/${BASE}";; | ||
| 44 | IANA) | ||
| 45 | SOURCE="http://www.iana.org/assignments/charset-reg/${BASE}";; | ||
| 46 | UNICODE) | ||
| 47 | SOURCE="http://www.unicode.org/Public/MAPPINGS/.../${BASE}";; | ||
| 48 | YASUOKA) | ||
| 49 | SOURCE="http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/.../${BASE}";; | ||
| 50 | MICROSOFT) | ||
| 51 | SOURCE="http://www.microsoft.com/globaldev/reference/oem/${BASE}";; | ||
| 52 | *) | ||
| 53 | echo "Unknown file type: $3"; | ||
| 54 | exit 1;; | ||
| 55 | esac | ||
| 56 | |||
| 57 | echo "# Generated from $SOURCE" | ||
| 58 | |||
| 59 | if [ -n "$4" ] ; then | ||
| 60 | if [ -f "$4" ] ; then | ||
| 61 | AWKPROG="gawk -f $4" | ||
| 62 | else | ||
| 63 | echo "Awk program does not exist: $4" | ||
| 64 | exit 1 | ||
| 65 | fi | ||
| 66 | else | ||
| 67 | AWKPROG=cat | ||
| 68 | fi | ||
| 69 | |||
| 70 | if [ "$3" == "GLIBC-1" ] ; then | ||
| 71 | # Source format is: | ||
| 72 | # <UYYYY> /xXX | ||
| 73 | sed -n -e "$2 p" < $1 \ | ||
| 74 | | sed -e 's,<U\([^>]*\)>[ ]*/x\(..\).*,0x\2 0x\1,' \ | ||
| 75 | | sort | ${AWKPROG} | ||
| 76 | elif [ "$3" == "GLIBC-2" ] ; then | ||
| 77 | # Source format is: | ||
| 78 | # <UYYYY> /xXX/xZZ | ||
| 79 | sed -n -e "$2 p" < $1 \ | ||
| 80 | | sed -e 's,<U\([^>]*\)>[ ]*/x\(..\)/x\(..\).*,0x\2\3 0x\1,' \ | ||
| 81 | | sort | ${AWKPROG} | ||
| 82 | elif [ "$3" == "GLIBC-2-7" ] ; then | ||
| 83 | # Source format is: | ||
| 84 | # <UYYYY> /xXX/xZZ | ||
| 85 | # We must drop MSBs of XX and ZZ | ||
| 86 | sed -n -e "$2 p" < $1 \ | ||
| 87 | | sed -e 's/xa/x2/g' -e 's/xb/x3/g' -e 's/xc/x4/g' \ | ||
| 88 | -e 's/xd/x5/g' -e 's/xe/x6/g' -e 's/xf/x7/g' \ | ||
| 89 | -e 's,<U\([^>]*\)>[ ]*/x\(..\)/x\(..\).*,0x\2\3 0x\1,' \ | ||
| 90 | | tee temp \ | ||
| 91 | | sort | ${AWKPROG} | ||
| 92 | elif [ "$3" == "CZYBORRA" ] ; then | ||
| 93 | # Source format is: | ||
| 94 | # =XX U+YYYY | ||
| 95 | zcat $1 | sed -n -e "$2 p" \ | ||
| 96 | | sed -e 's/=\(..\)[^U]*U+\([0-9A-F]*\).*/0x\1 0x\2/' \ | ||
| 97 | | sort | ${AWKPROG} | ||
| 98 | elif [ "$3" == "IANA" ] ; then | ||
| 99 | # Source format is: | ||
| 100 | # 0xXX 0xYYYY | ||
| 101 | sed -n -e "$2 p" < $1 \ | ||
| 102 | | sed -e 's/\(0x[0-9A-Fa-f]*\)[^0]*\(0x[0-9A-Fa-f]*\).*/\1 \2/' \ | ||
| 103 | | sort | ${AWKPROG} | ||
| 104 | elif [ "$3" == "UNICODE" ] ; then | ||
| 105 | # Source format is: | ||
| 106 | # YYYY XX | ||
| 107 | sed -n -e "$2 p" < $1 \ | ||
| 108 | | sed -e 's/\([0-9A-F]*\)[^0-9A-F]*\([0-9A-F]*\).*/0x\2 0x\1/' \ | ||
| 109 | | sort | ${AWKPROG} | ||
| 110 | elif [ "$3" == "YASUOKA" ] ; then | ||
| 111 | # Source format is: | ||
| 112 | # YYYY 0-XXXX (XXXX is a Kuten code) | ||
| 113 | sed -n -e "$2 p" < $1 \ | ||
| 114 | | sed -e 's/\([0-9A-F]*\)[^0]*0-\([0-9]*\).*/0x\2 0x\1/' \ | ||
| 115 | | sort | ${AWKPROG} | ||
| 116 | elif [ "$3" == "MICROSOFT" ] ; then | ||
| 117 | # Source format is: | ||
| 118 | # XX = U+YYYY | ||
| 119 | sed -n -e "$2 p" < $1 \ | ||
| 120 | | sed -e 's/\([0-9A-F]*\).*U+\([0-9A-F]*\).*/0x\1 0x\2/' \ | ||
| 121 | | sort | ${AWKPROG} | ||
| 122 | else | ||
| 123 | echo "Invalid arguments" | ||
| 124 | exit 1 | ||
| 125 | fi | ||
diff --git a/admin/charsets/mule-charsets.el b/admin/charsets/mule-charsets.el new file mode 100644 index 00000000000..9fc1ad83fc8 --- /dev/null +++ b/admin/charsets/mule-charsets.el | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | ;; mule-charsets.el -- Generate Mule-orignal charset maps. | ||
| 2 | ;; Copyright (C) 2003 | ||
| 3 | ;; National Institute of Advanced Industrial Science and Technology (AIST) | ||
| 4 | ;; Registration Number H13PRO009 | ||
| 5 | |||
| 6 | ;; This file is part of GNU Emacs. | ||
| 7 | |||
| 8 | ;; GNU Emacs is free software; you can redistribute it and/or modify | ||
| 9 | ;; it under the terms of the GNU General Public License as published by | ||
| 10 | ;; the Free Software Foundation; either version 2, or (at your option) | ||
| 11 | ;; any later version. | ||
| 12 | |||
| 13 | ;; GNU Emacs is distributed in the hope that it will be useful, | ||
| 14 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 15 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 16 | ;; GNU General Public License for more details. | ||
| 17 | |||
| 18 | ;; You should have received a copy of the GNU General Public License | ||
| 19 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | ||
| 20 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 21 | ;; Boston, MA 02111-1307, USA. | ||
| 22 | |||
| 23 | (if (or (< emacs-major-version 21) | ||
| 24 | (< emacs-minor-version 3) | ||
| 25 | (and (= emacs-minor-version 3) | ||
| 26 | (string< emacs-version "21.3.50"))) | ||
| 27 | (error "Use Emacs of version 21.3.50 or later")) | ||
| 28 | |||
| 29 | (defun func (start end) | ||
| 30 | (while (<= start end) | ||
| 31 | (let ((split (split-char start)) | ||
| 32 | (unicode (encode-char start 'ucs))) | ||
| 33 | (if unicode | ||
| 34 | (if (nth 2 split) | ||
| 35 | (insert (format "0x%02X%02X 0x%04X\n" | ||
| 36 | (nth 1 split) (nth 2 split) unicode)) | ||
| 37 | (insert (format "0x%02X 0x%04X\n" (nth 1 split) unicode))))) | ||
| 38 | (setq start (1+ start)))) | ||
| 39 | |||
| 40 | (defconst charset-alist | ||
| 41 | '(("MULE-ethiopic.map" . ethiopic) | ||
| 42 | ("MULE-ipa.map" . ipa) | ||
| 43 | ("MULE-is13194.map" . indian-is13194) | ||
| 44 | ("MULE-sisheng.map" . chinese-sisheng) | ||
| 45 | ("MULE-tibetan.map" . tibetan) | ||
| 46 | ("MULE-lviscii.map" . vietnamese-viscii-lower) | ||
| 47 | ("MULE-uviscii.map" . vietnamese-viscii-upper))) | ||
| 48 | |||
| 49 | (setq file (car command-line-args-left)) | ||
| 50 | (or (stringp file) | ||
| 51 | (error "Invalid file name: %s" file)) | ||
| 52 | (setq charset (cdr (assoc file charset-alist))) | ||
| 53 | (or charset | ||
| 54 | (error "Invalid charset: %s" (car command-line-args-left))) | ||
| 55 | |||
| 56 | (with-temp-buffer | ||
| 57 | (map-charset-chars 'func charset) | ||
| 58 | (write-file file)) | ||