New directory

author: Kenichi Handa 2003-09-08 11:56:09 +0000
committer: Kenichi Handa 2003-09-08 11:56:09 +0000
commit: 463f5630a5e7cbe7f042bc1175d1fa1c4e98860f (patch)
tree: 3287d0c628fea2249abf4635b3a4f45bedd6f8c4 /admin/charsets
parent: 4256310de631bd57c78b88b5131caa073315b3d7 (diff)
download: emacs-463f5630a5e7cbe7f042bc1175d1fa1c4e98860f.tar.gz
emacs-463f5630a5e7cbe7f042bc1175d1fa1c4e98860f.zip
8 files changed, 833 insertions, 0 deletions
diff --git a/admin/charsets/Makefile b/admin/charsets/Makefile
new file mode 100644
index 00000000000..0628bfeba74
--- /dev/null
+++ b/admin/charsets/Makefile
@@ -0,0 +1,287 @@
+# Makefile -- Makefile to generate charset maps in etc/charsets.
+# Copyright (C) 2003
+#   National Institute of Advanced Industrial Science and Technology (AIST)
+#   Registration Number H13PRO009
+#
+# This file is part of GNU Emacs.
+# GNU Emacs is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+# GNU Emacs is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# You should have received a copy of the GNU General Public License
+# along with GNU Emacs; see the file COPYING.  If not, write to the
+# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+# Commentary
+# At first, set these environment variables:
+# GLIBC_CHARMAPS
+#   Directory of glibc-VERSION/localedate/charmaps.
+#   VERSION must be 2.3 or the later.
+# MISC_CHARMAPS
+#   Direcory containing these charmap files:
+#   o bulgarian-mik.txt.gz
+#       provided at <http://czyborra.com/charsets/>
+#   o PTCP154
+#       provided at <http://www.iana.org/assignments/charset-reg/>
+#   o stdenc.txt and symbol.txt
+#       provided at <http://www.unicode.org/Public/MAPPINGS/>
+#   o Uni2JIS
+#       provided at <http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/CJK.html>
+#   o 720.htm and 858.htm
+#       provided at <http://www.microsoft.com/globaldev/reference/oem/>
+# OLDEMACS
+#   emacs of version 21.3.50 or later
+#
+# Then, do this:
+#   % make install
+CHARSETS = ${ISO8859} ${IBM} ${CODEPAGE} ${CJK} ${MISC} ${MULE}
+# Note: We can not prepend "ISO-" to these map files because of file
+# name limits on DOS.
+ISO8859 = \
+        8859-2.map 8859-3.map 8859-4.map 8859-5.map 8859-6.map 8859-7.map \
+        8859-8.map 8859-9.map 8859-10.map 8859-11.map 8859-13.map 8859-14.map \
+        8859-15.map 8859-16.map
+IBM = \
+        IBM037.map IBM038.map \
+        IBM256.map IBM273.map IBM274.map IBM275.map IBM277.map IBM278.map \
+        IBM280.map IBM281.map IBM284.map IBM285.map IBM290.map IBM297.map \
+        IBM420.map IBM423.map IBM424.map IBM437.map IBM500.map IBM850.map \
+        IBM851.map IBM852.map IBM855.map IBM856.map IBM857.map IBM860.map \
+        IBM861.map IBM862.map IBM863.map IBM864.map IBM865.map IBM866.map \
+        IBM868.map IBM869.map IBM870.map IBM871.map IBM874.map IBM875.map \
+        IBM880.map IBM891.map IBM903.map IBM904.map IBM905.map IBM918.map \
+        IBM1004.map IBM1026.map IBM1047.map
+CODEPAGE = \
+        CP737.map CP775.map CP1125.map\
+        CP1250.map CP1251.map CP1252.map CP1253.map CP1254.map \
+        CP1255.map CP1256.map CP1257.map CP1258.map \
+        CP10007.map \
+        CP720.map CP858.map
+CJK =   GB2312.map GBK.map GB180302.map GB180304.map \
+        BIG5.map BIG5-HKSCS.map\
+        CNS-1.map CNS-2.map CNS-3.map CNS-4.map CNS-5.map CNS-6.map CNS-7.map \
+        CNS-F.map \
+        JISX0201.map JISX0208.map JISX0212.map JISX2131.map JISX2132.map \
+        JISC6226.map \
+        KSC5601.map KSC5636.map JOHAB.map
+MISC =  KOI-8.map KOI8-R.map KOI8-U.map KOI8-T.map ALTERNATIVNYJ.map \
+        MIK.map PTCP154.map \
+        TIS-620.map VISCII.map VSCII.map VSCII-2.map\
+        KA-PS.map KA-ACADEMY.map \
+        HP-ROMAN8.map NEXTSTEP.map MACINTOSH.map EBCDICUK.map EBCDICUS.map \
+        stdenc.map symbol.map \
+        CP949-2BYTE.map \
+        BIG5-1.map BIG5-2.map
+# Emacs-mule charsets.
+MULE =  MULE-ethiopic.map MULE-ipa.map MULE-is13194.map \
+        MULE-sisheng.map MULE-tibetan.map \
+        MULE-lviscii.map MULE-uviscii.map
+charsets: ${CHARSETS}
+AWK = gawk
+# Rules for each charset
+VSCII.map: ${GLIBC_CHARMAPS}/TCVN5712-1 mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^<.*[     ]\/x[0-9a-f].[  ]/' GLIBC-1 compact.awk > $@
+VSCII-2.map: ${GLIBC_CHARMAPS}/TCVN5712-1 mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^<.*[     ]\/x[2-7a-f].[  ]/' GLIBC-1 compact.awk \
+          | sed 's/0x20-0x7F.*/0x00-0x7F 0x0000/' > $@
+ALTERNATIVNYJ.map: IBM866.map
+        # Generating $@...
+        @echo "# Modified from ibm866 according to the chart at" > $@
+        @echo "# http://www.cyrillic.com/ref/cyrillic/koi-8alt.html," >> $@
+        @echo "# with guesses for the Unicodes of the glyphs." >> $@
+        @sed -e '/0xF2/ s/ .*/ 0x2019/' \
+             -e '/0xF3/ s/ .*/ 0x2018/' \
+             -e '/0xF4/ s/ .*/ 0x0301/' \
+             -e '/0xF5/ s/ .*/ 0x0300/' \
+             -e '/0xF6/ s/ .*/ 0x203A/' \
+             -e '/0xF7/ s/ .*/ 0x2039/' \
+             -e '/0xF8/ s/ .*/ 0x2191/' \
+             -e '/0xF9/ s/ .*/ 0x2193/' \
+             -e '/0xFA/ s/ .*/ 0x00B1/' \
+             -e '/0xFB/ s/ .*/ 0x00F7/' < $< >> $@
+MIK.map: ${MISC_CHARMAPS}/bulgarian-mik.txt.gz mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '1,$$' CZYBORRA compact.awk > $@
+PTCP154.map: ${MISC_CHARMAPS}/PTCP154 mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^0x/' IANA compact.awk > $@
+stdenc.map: ${MISC_CHARMAPS}/stdenc.txt mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^[0-9A-Fa-f]/' UNICODE compact.awk > $@
+symbol.map: ${MISC_CHARMAPS}/symbol.txt mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^[0-9A-Fa-f]/' UNICODE compact.awk > $@
+CP720.map: ${MISC_CHARMAPS}/720.htm mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^[0-9A-F]/' MICROSOFT compact.awk > $@
+CP858.map: ${MISC_CHARMAPS}/858.htm mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^[0-9A-F]/' MICROSOFT compact.awk > $@
+CP949-2BYTE.map: ${GLIBC_CHARMAPS}/CP949 mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^<.*[     ]\/x[89a-f]/' GLIBC-2 compact.awk > $@
+GB2312.map: ${GLIBC_CHARMAPS}/GB2312 mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^<.*[     ]\/x[a-f]/' GLIBC-2-7 compact.awk > $@
+GBK.map: ${GLIBC_CHARMAPS}/GBK mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^<.*[     ]\/x[89a-f]/' GLIBC-2 compact.awk > $@
+GB180302.map: ${GLIBC_CHARMAPS}/GB18030 mapconv gb180302.awk
+        # Generating $@...
+        @mapconv $< '/^<.*[     ]\/x..\/x..[    ]/' GLIBC-2 gb180302.awk > $@
+GB180304.map: GB180302.map gb180304.awk
+        # Generating $@...
+        @$(AWK) -f gb180304.awk < $< > $@
+JISX0201.map: ${GLIBC_CHARMAPS}/JIS_X0201 mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^<.*[     ]\/x[0-9]/' GLIBC-1 compact.awk > $@
+        @echo "# Generated by hand" >> $@
+        @echo "0xA1-0xDF 0xFF61" >> $@
+JISX0208.map: ${GLIBC_CHARMAPS}/EUC-JP mapconv
+        # Generating $@...
+        @mapconv $< '/^<.*[     ]\/x[a-f]/' GLIBC-2-7 > $@
+JISX0212.map: ${GLIBC_CHARMAPS}/EUC-JP mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^<.*[     ]\/x8f/ s,/x8f,,' GLIBC-2-7 compact.awk > $@
+JISX2131.map: ${GLIBC_CHARMAPS}/EUC-JISX0213 mapconv
+        # Generating $@...
+        @mapconv $< '/^<.*[     ]\/x[a-f]/' GLIBC-2-7 > $@
+JISX2132.map: ${GLIBC_CHARMAPS}/EUC-JISX0213 mapconv
+        # Generating $@...
+        @mapconv $< '/^<.*[     ]\/x8f/ s,/x8f,,' GLIBC-2-7 > $@
+JISC6226.map : ${MISC_CHARMAPS}/Uni2JIS mapconv kuten.awk
+        # Generating $@...
+        @mapconv $< '/^[^#].*0-/' YASUOKA kuten.awk > $@
+KSC5601.map: ${GLIBC_CHARMAPS}/EUC-KR mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^<.*[     ]\/x[a-f]/' GLIBC-2-7 compact.awk > $@
+BIG5.map: ${GLIBC_CHARMAPS}/BIG5 mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^<.*[     ]\/x[a-f]/' GLIBC-2 > $@
+BIG5-1.map: BIG5.map mapconv big5.awk
+        # Generating $@...
+        @echo "Generated from $<" > $@
+        @sed -n -e '/0xa140/,/0xc8fe/p' < $< | gawk -f big5.awk >> $@
+BIG5-2.map: BIG5.map mapconv big5.awk
+        # Generating $@...
+        @echo "Generated from $<" > $@
+        @sed -n -e '/0xc940/,$$ p' < $< | gawk -f big5.awk >> $@
+BIG5-HKSCS.map: ${GLIBC_CHARMAPS}/BIG5-HKSCS mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^<.*[     ]\/x[89a-f].\//' GLIBC-2 compact.awk > $@
+JOHAB.map: ${GLIBC_CHARMAPS}/JOHAB mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^<.*[     ]\/x[89a-f]/' GLIBC-2 compact.awk > $@
+CNS-1.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^<.*[     ]\/x[a-f]/' GLIBC-2-7 compact.awk > $@
+CNS-2.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^<.*\/x8e\/xa2/s,/x8e/xa2,,' GLIBC-2-7 compact.awk > $@
+CNS-3.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^<.*\/x8e\/xa3/ s,/x8e/xa3,,' GLIBC-2-7 compact.awk > $@
+CNS-4.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^<.*\/x8e\/xa4/ s,/x8e/xa4,,' GLIBC-2-7 compact.awk > $@
+CNS-5.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^<.*\/x8e\/xa5/ s,/x8e/xa5,,' GLIBC-2-7 compact.awk > $@
+CNS-6.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^<.*\/x8e\/xa6/ s,/x8e/xa6,,' GLIBC-2-7 compact.awk > $@
+CNS-7.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^<.*\/x8e\/xa7/ s,/x8e/xa7,,' GLIBC-2-7 compact.awk > $@
+CNS-F.map: ${GLIBC_CHARMAPS}/EUC-TW mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^<.*\/x8e\/xaf/ s,/x8e/xaf,,' GLIBC-2-7 compact.awk > $@
+# General target to produce map files for mule charsets.
+MULE-%.map: mule-charsets.el
+        # Generating $@...
+        @${OLDEMACS} -batch -l ./mule-charsets.el $@
+# General target to produce map files for ISO-8859, GEORGIAN, and
+# EBCDIC charsets.  We can not use the original file name because of
+# file name limit on DOS.  "KA" is ISO 639 language code for Georgian.
+8859-%.map: ${GLIBC_CHARMAPS}/ISO-8859-% mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^<.*[     ]\/x/' GLIBC-1 compact.awk > $@
+KA-%.map: ${GLIBC_CHARMAPS}/GEORGIAN-% mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^<.*[     ]\/x/' GLIBC-1 compact.awk > $@
+EBCDIC%.map: ${GLIBC_CHARMAPS}/EBCDIC-% mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^<.*[     ]\/x/' GLIBC-1 compact.awk > $@
+# General target to produce map files for single-byte charsets.
+%.map: ${GLIBC_CHARMAPS}/% mapconv compact.awk
+        # Generating $@...
+        @mapconv $< '/^<.*[     ]\/x/' GLIBC-1 compact.awk > $@
+install: ${CHARSETS}
+        cp ${CHARSETS} ../../etc/charsets
+# Clear files that are automatically generated.
+clean:
+        rm -f ${CHARSETS}
diff --git a/admin/charsets/big5.awk b/admin/charsets/big5.awk
new file mode 100644
index 00000000000..8d5fa6429b3
--- /dev/null
+++ b/admin/charsets/big5.awk
@@ -0,0 +1,53 @@
+BEGIN {
+  tohex["A"] = 10;
+  tohex["B"] = 11;
+  tohex["C"] = 12;
+  tohex["D"] = 13;
+  tohex["E"] = 14;
+  tohex["F"] = 15;
+  tohex["a"] = 10;
+  tohex["b"] = 11;
+  tohex["c"] = 12;
+  tohex["d"] = 13;
+  tohex["e"] = 14;
+  tohex["f"] = 15;
+}
+function decode_hex(str) {
+  n = 0;
+  len = length(str);
+  for (i = 1; i <= len; i++)
+    {
+      c = substr (str, i, 1);
+      if (c >= "0" && c <= "9")
+        n = n * 16 + (c - "0");
+      else
+        n = n * 16 + tohex[c];
+    }
+  return n;
+}
+function decode_big5(big5) {
+  b0 = int(big5 / 256);
+  b1 = big5 % 256;
+# (0xFF - 0xA1 + 0x7F - 0x40) = 157
+# (0xA1 - (0x7F - 0x40)) = 98
+# (0xC9 - 0xA1) * (0xFF - 0xA1 + 0x7F - 0x40) = 6280
+  if (b1 < 127)
+    idx = (b0 - 161) * 157 + (b1 - 64);
+  else
+    idx = (b0 - 161) * 157 + (b1 - 98);
+  if (b0 >= 201)
+    idx -= 6280;
+  b0 = int(idx / 94) + 33;
+  b1 = (idx % 94) + 33;
+  return (b0 * 256 + b1)
+}
+{
+  big5 = decode_hex($1);
+  code = decode_big5(big5);
+  printf "0x%04X %s\n", code, $2;
+}
+    
diff --git a/admin/charsets/compact.awk b/admin/charsets/compact.awk
new file mode 100644
index 00000000000..281e51ebc3b
--- /dev/null
+++ b/admin/charsets/compact.awk
@@ -0,0 +1,123 @@
+# compact.awk -- Make charset map compact.
+# Copyright (C) 2003
+#   National Institute of Advanced Industrial Science and Technology (AIST)
+#   Registration Number H13PRO009
+#
+# This file is part of GNU Emacs.
+#
+# GNU Emacs is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# GNU Emacs is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Emacs; see the file COPYING.  If not, write to the
+# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+# Comment:
+# Make a charset map compact by changing this kind of line sequence:
+#   0x00 0x0000
+#   0x01 0x0001
+#   ...
+#   0x7F 0x007F
+# to one line of this format:
+#   0x00-0x7F 0x0000
+BEGIN {
+  tohex["0"] = 1;
+  tohex["1"] = 2;
+  tohex["2"] = 3;
+  tohex["3"] = 4;
+  tohex["4"] = 5;
+  tohex["5"] = 6;
+  tohex["6"] = 7;
+  tohex["7"] = 8;
+  tohex["8"] = 9;
+  tohex["9"] = 10;
+  tohex["A"] = 11;
+  tohex["B"] = 12;
+  tohex["C"] = 13;
+  tohex["D"] = 14;
+  tohex["E"] = 15;
+  tohex["F"] = 16;
+  tohex["a"] = 11;
+  tohex["b"] = 12;
+  tohex["c"] = 13;
+  tohex["d"] = 14;
+  tohex["e"] = 15;
+  tohex["f"] = 16;
+  from_code = 0;
+  to_code = -1;
+  to_unicode = 0;
+  from_unicode = 0;
+}
+function decode_hex(str, idx) {
+  n = 0;
+  len = length(str);
+  for (i = idx; i <= len; i++)
+    {
+      c = tohex[substr (str, i, 1)];
+      if (c == 0)
+        break;
+      n = n * 16 + c - 1;
+    }
+  return n;
+}
+/^\#/ {
+  print;
+  next;
+}
+{
+  code = decode_hex($1, 3);
+  unicode = decode_hex($2, 3);
+  if ((code == to_code + 1) && (unicode == to_unicode + 1))
+    {
+      to_code++;
+      to_unicode++;
+    }
+  else
+    {
+      if (to_code < 256)
+        {
+          if (from_code == to_code)
+            printf "0x%02X 0x%04X\n", from_code, from_unicode;
+          else if (from_code < to_code)
+            printf "0x%02X-0x%02X 0x%04X\n", from_code, to_code, from_unicode;
+        }
+      else
+        {
+          if (from_code == to_code)
+            printf "0x%04X 0x%04X\n", from_code, from_unicode;
+          else if (from_code < to_code)
+            printf "0x%04X-0x%04X 0x%04X\n", from_code, to_code, from_unicode;
+        }
+      from_code = to_code = code;
+      from_unicode = to_unicode = unicode;
+    }
+}
+END {
+  if (to_code < 256)
+    {
+      if (from_code == to_code)
+        printf "0x%02X 0x%04X\n", from_code, from_unicode;
+      else
+        printf "0x%02X-0x%02X 0x%04X\n", from_code, to_code, from_unicode;
+    }
+  else
+    {
+      if (from_code == to_code)
+        printf "0x%04X 0x%04X\n", from_code, from_unicode;
+      else
+        printf "0x%04X-0x%04X 0x%04X\n", from_code, to_code, from_unicode;
+    }
+}
diff --git a/admin/charsets/gb180302.awk b/admin/charsets/gb180302.awk
new file mode 100644
index 00000000000..94d0a9e410a
--- /dev/null
+++ b/admin/charsets/gb180302.awk
@@ -0,0 +1,80 @@
+BEGIN {
+  tohex["A"] = 10;
+  tohex["B"] = 11;
+  tohex["C"] = 12;
+  tohex["D"] = 13;
+  tohex["E"] = 14;
+  tohex["F"] = 15;
+  tohex["a"] = 10;
+  tohex["b"] = 11;
+  tohex["c"] = 12;
+  tohex["d"] = 13;
+  tohex["e"] = 14;
+  tohex["f"] = 15;
+  from_gb = 0;
+  to_gb = -1;
+  to_unicode = 0;
+  from_unicode = 0;
+}
+function decode_hex(str) {
+  n = 0;
+  len = length(str);
+  for (i = 1; i <= len; i++)
+    {
+      c = substr (str, i, 1);
+      if (c >= "0" && c <= "9")
+        n = n * 16 + (c - "0");
+      else
+        n = n * 16 + tohex[c];
+    }
+  return n;
+}
+function gb_to_index(gb) {
+  b0 = int(gb / 256);
+  b1 = gb % 256;
+  idx = (((b0 - 129)) * 191 + b1 - 64); 
+#  if (b1 >= 128)
+#    idx--;
+  return idx
+}
+function index_to_gb(idx) {
+  b0 = int(idx / 191) + 129;
+  b1 = (idx % 191) + 64;
+#  if (b1 >= 127)
+#    b1++;
+  return (b0 * 256 + b1);
+}
+/^\#/ {
+  print;
+  next;
+}
+{
+  gb = gb_to_index(decode_hex(substr($1, 3, 4)));
+  unicode = decode_hex(substr($2, 3, 4));
+  if ((gb == to_gb + 1) && (unicode == to_unicode + 1))
+    {
+      to_gb++;
+      to_unicode++;
+    }
+  else
+    {
+      if (from_gb == to_gb)
+        printf "0x%04X 0x%04X\n", index_to_gb(from_gb), from_unicode;
+      else if (from_gb < to_gb)
+        printf "0x%04X-0x%04X 0x%04X\n",
+          index_to_gb(from_gb), index_to_gb(to_gb), from_unicode;
+      from_gb = to_gb = gb;
+      from_unicode = to_unicode = unicode;
+    }
+}
+END {
+  if (from_gb <= to_gb)
+    printf "0x%04X-0x%04X 0x%04X\n",
+      index_to_gb(from_gb), index_to_gb(to_gb), from_unicode;
+}
diff --git a/admin/charsets/gb180304.awk b/admin/charsets/gb180304.awk
new file mode 100644
index 00000000000..9dac34bceff
--- /dev/null
+++ b/admin/charsets/gb180304.awk
@@ -0,0 +1,102 @@
+BEGIN {
+  tohex["A"] = 10;
+  tohex["B"] = 11;
+  tohex["C"] = 12;
+  tohex["D"] = 13;
+  tohex["E"] = 14;
+  tohex["F"] = 15;
+  tohex["a"] = 10;
+  tohex["b"] = 11;
+  tohex["c"] = 12;
+  tohex["d"] = 13;
+  tohex["e"] = 14;
+  tohex["f"] = 15;
+}
+function decode_hex(str) {
+  n = 0;
+  len = length(str);
+  for (i = 1; i <= len; i++)
+    {
+      c = substr (str, i, 1);
+      if (c >= "0" && c <= "9")
+        n = n * 16 + (c - "0");
+      else
+        n = n * 16 + tohex[c];
+    }
+  return n;
+}
+function gb_to_index(gb) {
+  b0 = int(gb / 256);
+  b1 = gb % 256;
+  idx = (((b0 - 129)) * 191 + b1 - 64); 
+#  if (b1 >= 127)
+#    idx--;
+  return idx
+}
+function index_to_gb(idx) {
+  b3 = (idx % 10) + 48;
+  idx = int(idx / 10);
+  b2 = (idx % 126) + 129;
+  idx = int(idx / 126);
+  b1 = (idx % 10) + 48;
+  b0 = int(idx / 10) + 129;
+  return sprintf("%02X%02X%02X%02X", b0, b1, b2, b3);
+}
+/^\#/ {
+  print;
+  next;
+}
+/0x....-0x..../ {
+  gb_from = gb_to_index(decode_hex(substr($1, 3, 4)));
+  gb_to = gb_to_index(decode_hex(substr($1, 10, 4)));
+  unicode = decode_hex(substr($2, 3, 4));
+  while (gb_from <= gb_to)
+    {
+      table[unicode++] = 1;
+      gb_from++;
+    }
+  next;
+}
+{
+  gb = decode_hex(substr($1, 3, 4));
+  unicode = decode_hex(substr($2, 3, 4));
+  table[unicode] = 1;
+}
+END {
+  from_gb = -1;
+  to_gb = 0;
+  from_i = 0;
+  table[65536] = 1;
+  for (i = 128; i <= 65536; i++)
+    {
+      if (table[i] == 0)
+        {
+          if (i < 55296 || i >= 57344)
+            {
+              if (from_gb < 0)
+                {
+                  from_gb = to_gb;
+                  from_i = i;
+                }
+              to_gb++;
+            }
+        }
+      else if (from_gb >= 0)
+        {
+          if (from_gb + 1 == to_gb)
+            printf "0x%s\t\t0x%04X\n",
+              index_to_gb(from_gb), from_i;
+          else
+            printf "0x%s-0x%s\t0x%04X\n",
+              index_to_gb(from_gb), index_to_gb(to_gb - 1), from_i;
+          from_gb = -1;
+        }
+    }
+}
diff --git a/admin/charsets/kuten.awk b/admin/charsets/kuten.awk
new file mode 100644
index 00000000000..b874c78fc49
--- /dev/null
+++ b/admin/charsets/kuten.awk
@@ -0,0 +1,5 @@
+/^[0-9]/ {
+  ku=substr($1, 3, 2) + 32;
+  ten=substr($1, 5, 2) + 32;
+  printf "0x%02X%02X %s\n", ku, ten, $2;
+}
diff --git a/admin/charsets/mapconv b/admin/charsets/mapconv
new file mode 100755
index 00000000000..f686ea3799c
--- /dev/null
+++ b/admin/charsets/mapconv
@@ -0,0 +1,125 @@
+#!/bin/sh
+#
+# Copyright (C) 2003
+#   National Institute of Advanced Industrial Science and Technology (AIST)
+#   Registration Number H13PRO009
+#
+# This file is part of GNU Emacs.
+#
+# GNU Emacs is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# GNU Emacs is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Emacs; see the file COPYING.  If not, write to the
+# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+# Comment:
+# Convert charset map of various format into this:
+#       0xXX 0xYYYY
+# where,
+#   XX is a code point of the charset in hexa-decimal,
+#   YYYY is the corresponding Unicode character code in hexa-decimal.
+# Arguments are:
+#   $1: source map file
+#   $2: address pattern for sed (optionally with substitution command)
+#   $3: format of source map file
+#       GLIBC-1 GLIBC-2 GLIBC-2-7 CZYBORRA IANA UNICODE YASUOKA MICROSOFT
+#   $4: awk script
+BASE=`basename $1`
+case "$3" in
+    GLIBC*)
+        SOURCE=`echo $1 | sed 's/.*\(glibc.*$\)/\1/'`;;
+    CZYBORRA)
+        SOURCE="http://czyborra.com/charsets/${BASE}";;
+    IANA)
+        SOURCE="http://www.iana.org/assignments/charset-reg/${BASE}";;
+    UNICODE)
+        SOURCE="http://www.unicode.org/Public/MAPPINGS/.../${BASE}";;
+    YASUOKA)
+        SOURCE="http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/.../${BASE}";;
+    MICROSOFT)
+        SOURCE="http://www.microsoft.com/globaldev/reference/oem/${BASE}";;
+    *)
+        echo "Unknown file type: $3";
+        exit 1;;
+esac
+echo "# Generated from $SOURCE"
+if [ -n "$4" ] ; then
+    if [ -f "$4" ] ; then
+        AWKPROG="gawk -f $4"
+    else
+        echo "Awk program does not exist: $4"
+        exit 1
+    fi
+else
+    AWKPROG=cat
+fi
+if [ "$3" == "GLIBC-1" ] ; then
+    # Source format is:
+    #   <UYYYY> /xXX
+    sed -n -e "$2 p" < $1 \
+        | sed -e 's,<U\([^>]*\)>[       ]*/x\(..\).*,0x\2 0x\1,' \
+        | sort | ${AWKPROG}
+elif [ "$3" == "GLIBC-2" ] ; then
+    # Source format is:
+    #   <UYYYY> /xXX/xZZ
+    sed -n -e "$2 p" < $1 \
+        | sed -e 's,<U\([^>]*\)>[       ]*/x\(..\)/x\(..\).*,0x\2\3 0x\1,' \
+        | sort | ${AWKPROG}
+elif [ "$3" == "GLIBC-2-7" ] ; then
+    # Source format is:
+    #   <UYYYY> /xXX/xZZ
+    # We must drop MSBs of XX and ZZ
+    sed -n -e "$2 p" < $1 \
+        | sed -e 's/xa/x2/g' -e 's/xb/x3/g' -e 's/xc/x4/g' \
+              -e 's/xd/x5/g' -e 's/xe/x6/g' -e 's/xf/x7/g' \
+              -e 's,<U\([^>]*\)>[       ]*/x\(..\)/x\(..\).*,0x\2\3 0x\1,' \
+        | tee temp \
+        | sort | ${AWKPROG}
+elif [ "$3" == "CZYBORRA" ] ; then
+    # Source format is:
+    #   =XX     U+YYYY
+    zcat $1 | sed -n -e "$2 p" \
+        | sed -e 's/=\(..\)[^U]*U+\([0-9A-F]*\).*/0x\1 0x\2/' \
+        | sort | ${AWKPROG}
+elif [ "$3" == "IANA" ] ; then
+    # Source format is:
+    #   0xXX    0xYYYY
+    sed -n -e "$2 p" < $1 \
+        | sed -e 's/\(0x[0-9A-Fa-f]*\)[^0]*\(0x[0-9A-Fa-f]*\).*/\1 \2/' \
+        | sort | ${AWKPROG}
+elif [ "$3" == "UNICODE" ] ; then
+    # Source format is:
+    #   YYYY    XX
+    sed -n -e "$2 p" < $1 \
+        | sed -e 's/\([0-9A-F]*\)[^0-9A-F]*\([0-9A-F]*\).*/0x\2 0x\1/' \
+        | sort | ${AWKPROG}
+elif [ "$3" == "YASUOKA" ] ; then
+    # Source format is:
+    # YYYY      0-XXXX (XXXX is a Kuten code)
+    sed -n -e "$2 p" < $1 \
+        | sed -e 's/\([0-9A-F]*\)[^0]*0-\([0-9]*\).*/0x\2 0x\1/' \
+        | sort | ${AWKPROG}
+elif [ "$3" == "MICROSOFT" ] ; then
+    # Source format is:
+    # XX = U+YYYY
+    sed -n -e "$2 p" < $1 \
+        | sed -e 's/\([0-9A-F]*\).*U+\([0-9A-F]*\).*/0x\1 0x\2/' \
+        | sort | ${AWKPROG}
+else
+    echo "Invalid arguments"
+    exit 1
+fi
diff --git a/admin/charsets/mule-charsets.el b/admin/charsets/mule-charsets.el
new file mode 100644
index 00000000000..9fc1ad83fc8
--- /dev/null
+++ b/admin/charsets/mule-charsets.el
@@ -0,0 +1,58 @@
+;; mule-charsets.el -- Generate Mule-orignal charset maps.
+;; Copyright (C) 2003
+;;   National Institute of Advanced Industrial Science and Technology (AIST)
+;;   Registration Number H13PRO009
+;; This file is part of GNU Emacs.
+;; GNU Emacs is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs; see the file COPYING.  If not, write to the
+;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
+(if (or (< emacs-major-version 21)
+        (< emacs-minor-version 3)
+        (and (= emacs-minor-version 3)
+             (string< emacs-version "21.3.50")))
+    (error "Use Emacs of version 21.3.50 or later"))
+(defun func (start end)
+  (while (<= start end)
+    (let ((split (split-char start))
+          (unicode (encode-char start 'ucs)))
+      (if unicode
+          (if (nth 2 split)
+              (insert (format "0x%02X%02X 0x%04X\n" 
+                              (nth 1 split) (nth 2 split) unicode))
+            (insert (format "0x%02X 0x%04X\n" (nth 1 split) unicode)))))
+    (setq start (1+ start))))
+(defconst charset-alist
+  '(("MULE-ethiopic.map" . ethiopic)
+    ("MULE-ipa.map" . ipa)
+    ("MULE-is13194.map" . indian-is13194)
+    ("MULE-sisheng.map" . chinese-sisheng)
+    ("MULE-tibetan.map" . tibetan)
+    ("MULE-lviscii.map" . vietnamese-viscii-lower)
+    ("MULE-uviscii.map" . vietnamese-viscii-upper)))
+(setq file (car command-line-args-left))
+(or (stringp file)
+    (error "Invalid file name: %s" file))
+(setq charset (cdr (assoc file charset-alist)))
+(or charset
+    (error "Invalid charset: %s" (car command-line-args-left)))
+(with-temp-buffer
+  (map-charset-chars 'func charset)
+  (write-file file))
author	Kenichi Handa	2003-09-08 11:56:09 +0000
committer	Kenichi Handa	2003-09-08 11:56:09 +0000
commit	463f5630a5e7cbe7f042bc1175d1fa1c4e98860f (patch)
tree	3287d0c628fea2249abf4635b3a4f45bedd6f8c4 /admin/charsets
parent	4256310de631bd57c78b88b5131caa073315b3d7 (diff)
download	emacs-463f5630a5e7cbe7f042bc1175d1fa1c4e98860f.tar.gz emacs-463f5630a5e7cbe7f042bc1175d1fa1c4e98860f.zip