diff options
| author | Kenichi Handa | 2003-09-08 11:56:09 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2003-09-08 11:56:09 +0000 |
| commit | 463f5630a5e7cbe7f042bc1175d1fa1c4e98860f (patch) | |
| tree | 3287d0c628fea2249abf4635b3a4f45bedd6f8c4 /admin/charsets/mapconv | |
| parent | 4256310de631bd57c78b88b5131caa073315b3d7 (diff) | |
| download | emacs-463f5630a5e7cbe7f042bc1175d1fa1c4e98860f.tar.gz emacs-463f5630a5e7cbe7f042bc1175d1fa1c4e98860f.zip | |
New directory
Diffstat (limited to 'admin/charsets/mapconv')
| -rwxr-xr-x | admin/charsets/mapconv | 125 |
1 files changed, 125 insertions, 0 deletions
diff --git a/admin/charsets/mapconv b/admin/charsets/mapconv new file mode 100755 index 00000000000..f686ea3799c --- /dev/null +++ b/admin/charsets/mapconv | |||
| @@ -0,0 +1,125 @@ | |||
| 1 | #!/bin/sh | ||
| 2 | # | ||
| 3 | # Copyright (C) 2003 | ||
| 4 | # National Institute of Advanced Industrial Science and Technology (AIST) | ||
| 5 | # Registration Number H13PRO009 | ||
| 6 | # | ||
| 7 | # This file is part of GNU Emacs. | ||
| 8 | # | ||
| 9 | # GNU Emacs is free software; you can redistribute it and/or modify | ||
| 10 | # it under the terms of the GNU General Public License as published by | ||
| 11 | # the Free Software Foundation; either version 2, or (at your option) | ||
| 12 | # any later version. | ||
| 13 | # | ||
| 14 | # GNU Emacs is distributed in the hope that it will be useful, | ||
| 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 17 | # GNU General Public License for more details. | ||
| 18 | # | ||
| 19 | # You should have received a copy of the GNU General Public License | ||
| 20 | # along with GNU Emacs; see the file COPYING. If not, write to the | ||
| 21 | # Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 22 | # Boston, MA 02111-1307, USA. | ||
| 23 | |||
| 24 | # Comment: | ||
| 25 | # Convert charset map of various format into this: | ||
| 26 | # 0xXX 0xYYYY | ||
| 27 | # where, | ||
| 28 | # XX is a code point of the charset in hexa-decimal, | ||
| 29 | # YYYY is the corresponding Unicode character code in hexa-decimal. | ||
| 30 | # Arguments are: | ||
| 31 | # $1: source map file | ||
| 32 | # $2: address pattern for sed (optionally with substitution command) | ||
| 33 | # $3: format of source map file | ||
| 34 | # GLIBC-1 GLIBC-2 GLIBC-2-7 CZYBORRA IANA UNICODE YASUOKA MICROSOFT | ||
| 35 | # $4: awk script | ||
| 36 | |||
| 37 | BASE=`basename $1` | ||
| 38 | |||
| 39 | case "$3" in | ||
| 40 | GLIBC*) | ||
| 41 | SOURCE=`echo $1 | sed 's/.*\(glibc.*$\)/\1/'`;; | ||
| 42 | CZYBORRA) | ||
| 43 | SOURCE="http://czyborra.com/charsets/${BASE}";; | ||
| 44 | IANA) | ||
| 45 | SOURCE="http://www.iana.org/assignments/charset-reg/${BASE}";; | ||
| 46 | UNICODE) | ||
| 47 | SOURCE="http://www.unicode.org/Public/MAPPINGS/.../${BASE}";; | ||
| 48 | YASUOKA) | ||
| 49 | SOURCE="http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/.../${BASE}";; | ||
| 50 | MICROSOFT) | ||
| 51 | SOURCE="http://www.microsoft.com/globaldev/reference/oem/${BASE}";; | ||
| 52 | *) | ||
| 53 | echo "Unknown file type: $3"; | ||
| 54 | exit 1;; | ||
| 55 | esac | ||
| 56 | |||
| 57 | echo "# Generated from $SOURCE" | ||
| 58 | |||
| 59 | if [ -n "$4" ] ; then | ||
| 60 | if [ -f "$4" ] ; then | ||
| 61 | AWKPROG="gawk -f $4" | ||
| 62 | else | ||
| 63 | echo "Awk program does not exist: $4" | ||
| 64 | exit 1 | ||
| 65 | fi | ||
| 66 | else | ||
| 67 | AWKPROG=cat | ||
| 68 | fi | ||
| 69 | |||
| 70 | if [ "$3" == "GLIBC-1" ] ; then | ||
| 71 | # Source format is: | ||
| 72 | # <UYYYY> /xXX | ||
| 73 | sed -n -e "$2 p" < $1 \ | ||
| 74 | | sed -e 's,<U\([^>]*\)>[ ]*/x\(..\).*,0x\2 0x\1,' \ | ||
| 75 | | sort | ${AWKPROG} | ||
| 76 | elif [ "$3" == "GLIBC-2" ] ; then | ||
| 77 | # Source format is: | ||
| 78 | # <UYYYY> /xXX/xZZ | ||
| 79 | sed -n -e "$2 p" < $1 \ | ||
| 80 | | sed -e 's,<U\([^>]*\)>[ ]*/x\(..\)/x\(..\).*,0x\2\3 0x\1,' \ | ||
| 81 | | sort | ${AWKPROG} | ||
| 82 | elif [ "$3" == "GLIBC-2-7" ] ; then | ||
| 83 | # Source format is: | ||
| 84 | # <UYYYY> /xXX/xZZ | ||
| 85 | # We must drop MSBs of XX and ZZ | ||
| 86 | sed -n -e "$2 p" < $1 \ | ||
| 87 | | sed -e 's/xa/x2/g' -e 's/xb/x3/g' -e 's/xc/x4/g' \ | ||
| 88 | -e 's/xd/x5/g' -e 's/xe/x6/g' -e 's/xf/x7/g' \ | ||
| 89 | -e 's,<U\([^>]*\)>[ ]*/x\(..\)/x\(..\).*,0x\2\3 0x\1,' \ | ||
| 90 | | tee temp \ | ||
| 91 | | sort | ${AWKPROG} | ||
| 92 | elif [ "$3" == "CZYBORRA" ] ; then | ||
| 93 | # Source format is: | ||
| 94 | # =XX U+YYYY | ||
| 95 | zcat $1 | sed -n -e "$2 p" \ | ||
| 96 | | sed -e 's/=\(..\)[^U]*U+\([0-9A-F]*\).*/0x\1 0x\2/' \ | ||
| 97 | | sort | ${AWKPROG} | ||
| 98 | elif [ "$3" == "IANA" ] ; then | ||
| 99 | # Source format is: | ||
| 100 | # 0xXX 0xYYYY | ||
| 101 | sed -n -e "$2 p" < $1 \ | ||
| 102 | | sed -e 's/\(0x[0-9A-Fa-f]*\)[^0]*\(0x[0-9A-Fa-f]*\).*/\1 \2/' \ | ||
| 103 | | sort | ${AWKPROG} | ||
| 104 | elif [ "$3" == "UNICODE" ] ; then | ||
| 105 | # Source format is: | ||
| 106 | # YYYY XX | ||
| 107 | sed -n -e "$2 p" < $1 \ | ||
| 108 | | sed -e 's/\([0-9A-F]*\)[^0-9A-F]*\([0-9A-F]*\).*/0x\2 0x\1/' \ | ||
| 109 | | sort | ${AWKPROG} | ||
| 110 | elif [ "$3" == "YASUOKA" ] ; then | ||
| 111 | # Source format is: | ||
| 112 | # YYYY 0-XXXX (XXXX is a Kuten code) | ||
| 113 | sed -n -e "$2 p" < $1 \ | ||
| 114 | | sed -e 's/\([0-9A-F]*\)[^0]*0-\([0-9]*\).*/0x\2 0x\1/' \ | ||
| 115 | | sort | ${AWKPROG} | ||
| 116 | elif [ "$3" == "MICROSOFT" ] ; then | ||
| 117 | # Source format is: | ||
| 118 | # XX = U+YYYY | ||
| 119 | sed -n -e "$2 p" < $1 \ | ||
| 120 | | sed -e 's/\([0-9A-F]*\).*U+\([0-9A-F]*\).*/0x\1 0x\2/' \ | ||
| 121 | | sort | ${AWKPROG} | ||
| 122 | else | ||
| 123 | echo "Invalid arguments" | ||
| 124 | exit 1 | ||
| 125 | fi | ||