diff options
| author | Kenichi Handa | 2009-06-12 07:22:48 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2009-06-12 07:22:48 +0000 |
| commit | 64ace564ee79038f9b1d1886772c42679e164505 (patch) | |
| tree | e25ffa1ea3e7f28b86ed175998dfd9594115ea9a /admin | |
| parent | d581a3d06529b0290e3b9821fa203c9b3241d16d (diff) | |
| download | emacs-64ace564ee79038f9b1d1886772c42679e164505.tar.gz emacs-64ace564ee79038f9b1d1886772c42679e164505.zip | |
Mostly re-written to handle "gzip"ed input files.
Diffstat (limited to 'admin')
| -rwxr-xr-x | admin/charsets/mapconv | 70 |
1 files changed, 36 insertions, 34 deletions
diff --git a/admin/charsets/mapconv b/admin/charsets/mapconv index cb647020e6a..1913449f898 100755 --- a/admin/charsets/mapconv +++ b/admin/charsets/mapconv | |||
| @@ -30,34 +30,42 @@ | |||
| 30 | # $1: source map file | 30 | # $1: source map file |
| 31 | # $2: address pattern for sed (optionally with substitution command) | 31 | # $2: address pattern for sed (optionally with substitution command) |
| 32 | # $3: format of source map file | 32 | # $3: format of source map file |
| 33 | # GLIBC-1 GLIBC-2 GLIBC-2-7 CZYBORRA IANA UNICODE YASUOKA MICROSOFT | 33 | # GLIBC-1 GLIBC-2 GLIBC-2-7 CZYBORRA IANA UNICODE YASUOKA |
| 34 | # $4: awk script | 34 | # $4: awk script |
| 35 | 35 | ||
| 36 | BASE=`basename $1` | 36 | FILE="admin/charsets/$1" |
| 37 | BASE=`basename $1 .gz` | ||
| 37 | 38 | ||
| 38 | case "$3" in | 39 | case "$3" in |
| 39 | GLIBC*) | 40 | GLIBC*) |
| 40 | SOURCE="glibc-2.3.2/localedata/charmaps/${BASE}";; | 41 | FILE="$BASE in localedate/charmaps of glibc"; |
| 42 | SOURCE="";; | ||
| 41 | CZYBORRA) | 43 | CZYBORRA) |
| 44 | BASE="$BASE.gz"; | ||
| 42 | SOURCE="http://czyborra.com/charsets/${BASE}";; | 45 | SOURCE="http://czyborra.com/charsets/${BASE}";; |
| 43 | IANA) | 46 | IANA) |
| 44 | SOURCE="http://www.iana.org/assignments/charset-reg/${BASE}";; | 47 | SOURCE="http://www.iana.org/assignments/charset-reg/${BASE}";; |
| 45 | UNICODE) | 48 | UNICODE) |
| 46 | SOURCE="http://www.unicode.org/Public/MAPPINGS/.../${BASE}";; | 49 | SOURCE="http://www.unicode.org/Public/MAPPINGS/VENDORS/ADOBE/${BASE}";; |
| 47 | UNICODE2) | 50 | UNICODE2) |
| 48 | SOURCE="http://www.unicode.org/Public/MAPPINGS/.../${BASE}";; | 51 | SOURCE="http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/${BASE}";; |
| 49 | YASUOKA) | 52 | YASUOKA) |
| 50 | SOURCE="http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/.../${BASE}";; | 53 | BASE="$BASE.Z"; |
| 51 | MICROSOFT) | 54 | SOURCE="http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/ftp/CJKtable/${BASE}";; |
| 52 | SOURCE="http://www.microsoft.com/globaldev/reference/oem/${BASE}";; | ||
| 53 | KANJI-DATABASE) | 55 | KANJI-DATABASE) |
| 54 | SOURCE="data at http://sourceforge.net/cvs/?group_id=26261";; | 56 | SOURCE="http://kanji-database.cvs.sourceforge.net/viewvc/*checkout*/kanji-database/kanji-database/data/cns2ucsdkw.txt?revision=1.4";; |
| 55 | *) | 57 | *) |
| 56 | echo "Unknown file type: $3"; | 58 | echo "Unknown file type: $3"; |
| 57 | exit 1;; | 59 | exit 1;; |
| 58 | esac | 60 | esac |
| 59 | 61 | ||
| 60 | echo "# Generated from $SOURCE" | 62 | if [ -n "$SOURCE" ] ; then |
| 63 | echo "# Generated from $FILE which is a copy of"; | ||
| 64 | echo "# $SOURCE" | ||
| 65 | else | ||
| 66 | echo "# Generated from $FILE" | ||
| 67 | fi | ||
| 68 | |||
| 61 | 69 | ||
| 62 | if [ -n "$4" ] ; then | 70 | if [ -n "$4" ] ; then |
| 63 | if [ -f "$4" ] ; then | 71 | if [ -f "$4" ] ; then |
| @@ -70,72 +78,66 @@ else | |||
| 70 | AWKPROG=cat | 78 | AWKPROG=cat |
| 71 | fi | 79 | fi |
| 72 | 80 | ||
| 73 | if [ "$3" == "GLIBC-1" ] ; then | 81 | if [ "$3" = "GLIBC-1" ] ; then |
| 74 | # Source format is: | 82 | # Source format is: |
| 75 | # <UYYYY> /xXX | 83 | # <UYYYY> /xXX |
| 76 | sed -n -e "$2 p" < $1 \ | 84 | zcat $1 | sed -n -e "$2 p" \ |
| 77 | | sed -e 's,<U\([^>]*\)>[ ]*/x\(..\).*,0x\2 0x\1,' \ | 85 | | sed -e 's,<U\([^>]*\)>[ ]*/x\(..\).*,0x\2 0x\1,' \ |
| 78 | | sort | ${AWKPROG} | 86 | | sort | ${AWKPROG} |
| 79 | elif [ "$3" == "GLIBC-2" ] ; then | 87 | elif [ "$3" = "GLIBC-2" ] ; then |
| 80 | # Source format is: | 88 | # Source format is: |
| 81 | # <UYYYY> /xXX/xZZ | 89 | # <UYYYY> /xXX/xZZ |
| 82 | sed -n -e "$2 p" < $1 \ | 90 | zcat $1 | sed -n -e "$2 p" \ |
| 83 | | sed -e 's,<U\([^>]*\)>[ ]*/x\(..\)/x\(..\).*,0x\2\3 0x\1,' \ | 91 | | sed -e 's,<U\([^>]*\)>[ ]*/x\(..\)/x\(..\).*,0x\2\3 0x\1,' \ |
| 84 | | sort | ${AWKPROG} | 92 | | sort | ${AWKPROG} |
| 85 | elif [ "$3" == "GLIBC-2-7" ] ; then | 93 | elif [ "$3" = "GLIBC-2-7" ] ; then |
| 86 | # Source format is: | 94 | # Source format is: |
| 87 | # <UYYYY> /xXX/xZZ | 95 | # <UYYYY> /xXX/xZZ |
| 88 | # We must drop MSBs of XX and ZZ | 96 | # We must drop MSBs of XX and ZZ |
| 89 | sed -n -e "$2 p" < $1 \ | 97 | zcat $1 | sed -n -e "$2 p" \ |
| 90 | | sed -e 's/xa/x2/g' -e 's/xb/x3/g' -e 's/xc/x4/g' \ | 98 | | sed -e 's/xa/x2/g' -e 's/xb/x3/g' -e 's/xc/x4/g' \ |
| 91 | -e 's/xd/x5/g' -e 's/xe/x6/g' -e 's/xf/x7/g' \ | 99 | -e 's/xd/x5/g' -e 's/xe/x6/g' -e 's/xf/x7/g' \ |
| 92 | -e 's,<U\([^>]*\)>[ ]*/x\(..\)/x\(..\).*,0x\2\3 0x\1,' \ | 100 | -e 's,<U\([^>]*\)>[ ]*/x\(..\)/x\(..\).*,0x\2\3 0x\1,' \ |
| 93 | | tee temp \ | 101 | | tee temp \ |
| 94 | | sort | ${AWKPROG} | 102 | | sort | ${AWKPROG} |
| 95 | elif [ "$3" == "CZYBORRA" ] ; then | 103 | elif [ "$3" = "CZYBORRA" ] ; then |
| 96 | # Source format is: | 104 | # Source format is: |
| 97 | # =XX U+YYYY | 105 | # =XX U+YYYY |
| 98 | zcat $1 | sed -n -e "$2 p" \ | 106 | zcat $1 | sed -n -e "$2 p" \ |
| 99 | | sed -e 's/=\(..\)[^U]*U+\([0-9A-F]*\).*/0x\1 0x\2/' \ | 107 | | sed -e 's/=\(..\)[^U]*U+\([0-9A-F]*\).*/0x\1 0x\2/' \ |
| 100 | | sort | ${AWKPROG} | 108 | | sort | ${AWKPROG} |
| 101 | elif [ "$3" == "IANA" ] ; then | 109 | elif [ "$3" = "IANA" ] ; then |
| 102 | # Source format is: | 110 | # Source format is: |
| 103 | # 0xXX 0xYYYY | 111 | # 0xXX 0xYYYY |
| 104 | sed -n -e "$2 p" < $1 \ | 112 | zcat $1 | sed -n -e "$2 p" \ |
| 105 | | sed -e 's/\(0x[0-9A-Fa-f]*\)[^0]*\(0x[0-9A-Fa-f]*\).*/\1 \2/' \ | 113 | | sed -e 's/\(0x[0-9A-Fa-f]*\)[^0]*\(0x[0-9A-Fa-f]*\).*/\1 \2/' \ |
| 106 | | sort | ${AWKPROG} | 114 | | sort | ${AWKPROG} |
| 107 | elif [ "$3" == "UNICODE" ] ; then | 115 | elif [ "$3" = "UNICODE" ] ; then |
| 108 | # Source format is: | 116 | # Source format is: |
| 109 | # YYYY XX | 117 | # YYYY XX |
| 110 | sed -n -e "$2 p" < $1 \ | 118 | zcat $1 | sed -n -e "$2 p" \ |
| 111 | | sed -e 's/\([0-9A-F]*\)[^0-9A-F]*\([0-9A-F]*\).*/0x\2 0x\1/' \ | 119 | | sed -e 's/\([0-9A-F]*\)[^0-9A-F]*\([0-9A-F]*\).*/0x\2 0x\1/' \ |
| 112 | | sort | ${AWKPROG} | 120 | | sort | ${AWKPROG} |
| 113 | elif [ "$3" == "UNICODE2" ] ; then | 121 | elif [ "$3" = "UNICODE2" ] ; then |
| 114 | # Source format is: | 122 | # Source format is: |
| 115 | # 0xXXXX 0xYYYY # ... | 123 | # 0xXXXX 0xYYYY # ... |
| 116 | sed -n -e "$2 p" < $1 \ | 124 | zcat $1 | sed -n -e "$2 p" \ |
| 117 | | sed -e 's/\([0-9A-Fx]*\)[^0]*\([0-9A-Fx]*\).*/\1 \2/' \ | 125 | | sed -e 's/\([0-9A-Fx]*\)[^0]*\([0-9A-Fx]*\).*/\1 \2/' \ |
| 118 | | ${AWKPROG} | sort -n -k 4,4 | 126 | | ${AWKPROG} | sort -n -k 4,4 |
| 119 | elif [ "$3" == "YASUOKA" ] ; then | 127 | elif [ "$3" = "YASUOKA" ] ; then |
| 120 | # Source format is: | 128 | # Source format is: |
| 121 | # YYYY 0-XXXX (XXXX is a Kuten code) | 129 | # YYYY 0-XXXX (XXXX is a Kuten code) |
| 122 | sed -n -e "$2 p" < $1 \ | 130 | zcat $1 | sed -n -e "$2 p" \ |
| 123 | | sed -e 's/\([0-9A-F]*\)[^0]*0-\([0-9]*\).*/0x\2 0x\1/' \ | 131 | | sed -e 's/\([0-9A-F]*\)[^0]*0-\([0-9]*\).*/0x\2 0x\1/' \ |
| 124 | | sort | ${AWKPROG} | 132 | | sort | ${AWKPROG} |
| 125 | elif [ "$3" == "MICROSOFT" ] ; then | 133 | elif [ "$3" = "KANJI-DATABASE" ] ; then |
| 126 | # Source format is: | ||
| 127 | # XX = U+YYYY | ||
| 128 | sed -n -e "$2 p" < $1 \ | ||
| 129 | | sed -e 's/\([0-9A-F]*\).*U+\([0-9A-F]*\).*/0x\1 0x\2/' \ | ||
| 130 | | sort | ${AWKPROG} | ||
| 131 | elif [ "$3" == "KANJI-DATABASE" ] ; then | ||
| 132 | # Source format is: | 134 | # Source format is: |
| 133 | # C?-XXXX U+YYYYY ..... | 135 | # C?-XXXX U+YYYYY ..... |
| 134 | sed -n -e "$2 p" < $1 \ | 136 | zcat $1 | sed -n -e "$2 p" \ |
| 135 | | sed -e 's/...\(....\) U+\([0-9A-F]*\).*/0x\1 0x\2/' \ | 137 | | sed -e 's/...\(....\) U+\([0-9A-F]*\).*/0x\1 0x\2/' \ |
| 136 | | sort | ${AWKPROG} | 138 | | sort | ${AWKPROG} |
| 137 | else | 139 | else |
| 138 | echo "Invalid arguments" | 140 | echo "Invalid arguments: $3" |
| 139 | exit 1 | 141 | exit 1 |
| 140 | fi | 142 | fi |
| 141 | 143 | ||