diff options
Diffstat (limited to 'admin/charsets/mapconv')
| -rwxr-xr-x | admin/charsets/mapconv | 28 |
1 files changed, 16 insertions, 12 deletions
diff --git a/admin/charsets/mapconv b/admin/charsets/mapconv index 8433d222b8d..6fd13c61c4a 100755 --- a/admin/charsets/mapconv +++ b/admin/charsets/mapconv | |||
| @@ -1,5 +1,7 @@ | |||
| 1 | #!/bin/sh | 1 | #!/bin/sh |
| 2 | 2 | ||
| 3 | # Copyright (C) 2015 Free Software Foundation, Inc. | ||
| 4 | |||
| 3 | # Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 | 5 | # Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
| 4 | # National Institute of Advanced Industrial Science and Technology (AIST) | 6 | # National Institute of Advanced Industrial Science and Technology (AIST) |
| 5 | # Registration Number H13PRO009 | 7 | # Registration Number H13PRO009 |
| @@ -33,8 +35,10 @@ | |||
| 33 | # GLIBC-1 GLIBC-2 GLIBC-2-7 CZYBORRA IANA UNICODE UNICODE2 YASUOKA | 35 | # GLIBC-1 GLIBC-2 GLIBC-2-7 CZYBORRA IANA UNICODE UNICODE2 YASUOKA |
| 34 | # $4: awk script | 36 | # $4: awk script |
| 35 | 37 | ||
| 36 | FILE="admin/charsets/$1" | 38 | BASE=`expr "$1" : '.*/\(.*\)' '|' "$1"` # basename |
| 37 | BASE=`basename $1 .gz` | 39 | FILE="admin/charsets/mapfiles/$BASE" |
| 40 | BASE=`expr "$BASE" : '\(.*\)\.gz$' '|' "$BASE"` # remove any .gz suffix | ||
| 41 | AWK=${AWK:-awk} | ||
| 38 | 42 | ||
| 39 | case "$3" in | 43 | case "$3" in |
| 40 | GLIBC*) | 44 | GLIBC*) |
| @@ -69,7 +73,7 @@ fi | |||
| 69 | 73 | ||
| 70 | if [ -n "$4" ] ; then | 74 | if [ -n "$4" ] ; then |
| 71 | if [ -f "$4" ] ; then | 75 | if [ -f "$4" ] ; then |
| 72 | AWKPROG="gawk -f $4" | 76 | AWKPROG="$AWK -f $4" |
| 73 | else | 77 | else |
| 74 | echo "Awk program does not exist: $4" | 78 | echo "Awk program does not exist: $4" |
| 75 | exit 1 | 79 | exit 1 |
| @@ -81,20 +85,20 @@ fi | |||
| 81 | if [ "$3" = "GLIBC-1" ] ; then | 85 | if [ "$3" = "GLIBC-1" ] ; then |
| 82 | # Source format is: | 86 | # Source format is: |
| 83 | # <UYYYY> /xXX | 87 | # <UYYYY> /xXX |
| 84 | zcat $1 | sed -n -e "$2 p" \ | 88 | gunzip -c $1 | sed -n -e "${2}p" \ |
| 85 | | sed -e 's,<U\([^>]*\)>[ ]*/x\(..\).*,0x\2 0x\1,' \ | 89 | | sed -e 's,<U\([^>]*\)>[ ]*/x\(..\).*,0x\2 0x\1,' \ |
| 86 | | sort | ${AWKPROG} | 90 | | sort | ${AWKPROG} |
| 87 | elif [ "$3" = "GLIBC-2" ] ; then | 91 | elif [ "$3" = "GLIBC-2" ] ; then |
| 88 | # Source format is: | 92 | # Source format is: |
| 89 | # <UYYYY> /xXX/xZZ | 93 | # <UYYYY> /xXX/xZZ |
| 90 | zcat $1 | sed -n -e "$2 p" \ | 94 | gunzip -c $1 | sed -n -e "${2}p" \ |
| 91 | | sed -e 's,<U\([^>]*\)>[ ]*/x\(..\)/x\(..\).*,0x\2\3 0x\1,' \ | 95 | | sed -e 's,<U\([^>]*\)>[ ]*/x\(..\)/x\(..\).*,0x\2\3 0x\1,' \ |
| 92 | | sort | ${AWKPROG} | 96 | | sort | ${AWKPROG} |
| 93 | elif [ "$3" = "GLIBC-2-7" ] ; then | 97 | elif [ "$3" = "GLIBC-2-7" ] ; then |
| 94 | # Source format is: | 98 | # Source format is: |
| 95 | # <UYYYY> /xXX/xZZ | 99 | # <UYYYY> /xXX/xZZ |
| 96 | # We must drop MSBs of XX and ZZ | 100 | # We must drop MSBs of XX and ZZ |
| 97 | zcat $1 | sed -n -e "$2 p" \ | 101 | gunzip -c $1 | sed -n -e "${2}p" \ |
| 98 | | sed -e 's/xa/x2/g' -e 's/xb/x3/g' -e 's/xc/x4/g' \ | 102 | | sed -e 's/xa/x2/g' -e 's/xb/x3/g' -e 's/xc/x4/g' \ |
| 99 | -e 's/xd/x5/g' -e 's/xe/x6/g' -e 's/xf/x7/g' \ | 103 | -e 's/xd/x5/g' -e 's/xe/x6/g' -e 's/xf/x7/g' \ |
| 100 | -e 's,<U\([^>]*\)>[ ]*/x\(..\)/x\(..\).*,0x\2\3 0x\1,' \ | 104 | -e 's,<U\([^>]*\)>[ ]*/x\(..\)/x\(..\).*,0x\2\3 0x\1,' \ |
| @@ -102,13 +106,13 @@ elif [ "$3" = "GLIBC-2-7" ] ; then | |||
| 102 | elif [ "$3" = "CZYBORRA" ] ; then | 106 | elif [ "$3" = "CZYBORRA" ] ; then |
| 103 | # Source format is: | 107 | # Source format is: |
| 104 | # =XX U+YYYY | 108 | # =XX U+YYYY |
| 105 | sed -n -e "$2 p" < $1 \ | 109 | sed -n -e "${2}p" < $1 \ |
| 106 | | sed -e 's/=\(..\)[^U]*U+\([0-9A-F]*\).*/0x\1 0x\2/' \ | 110 | | sed -e 's/=\(..\)[^U]*U+\([0-9A-F]*\).*/0x\1 0x\2/' \ |
| 107 | | sort | ${AWKPROG} | 111 | | sort | ${AWKPROG} |
| 108 | elif [ "$3" = "IANA" ] ; then | 112 | elif [ "$3" = "IANA" ] ; then |
| 109 | # Source format is: | 113 | # Source format is: |
| 110 | # 0xXX 0xYYYY | 114 | # 0xXX 0xYYYY |
| 111 | sed -n -e "$2 p" < $1 \ | 115 | sed -n -e "${2}p" < $1 \ |
| 112 | | sed -e 's/\(0x[0-9A-Fa-f]*\)[^0]*\(0x[0-9A-Fa-f]*\).*/\1 \2/' \ | 116 | | sed -e 's/\(0x[0-9A-Fa-f]*\)[^0]*\(0x[0-9A-Fa-f]*\).*/\1 \2/' \ |
| 113 | | sort | ${AWKPROG} | 117 | | sort | ${AWKPROG} |
| 114 | elif [ "$3" = "UNICODE" ] ; then | 118 | elif [ "$3" = "UNICODE" ] ; then |
| @@ -116,25 +120,25 @@ elif [ "$3" = "UNICODE" ] ; then | |||
| 116 | # YYYY XX | 120 | # YYYY XX |
| 117 | # We perform reverse sort to prefer the first one in the | 121 | # We perform reverse sort to prefer the first one in the |
| 118 | # duplicated mappings (e.g. 0x20->U+0020, 0x20->U+00A0). | 122 | # duplicated mappings (e.g. 0x20->U+0020, 0x20->U+00A0). |
| 119 | sed -n -e "$2 p" < $1 \ | 123 | sed -n -e "${2}p" < $1 \ |
| 120 | | sed -e 's/\([0-9A-F]*\)[^0-9A-F]*\([0-9A-F]*\).*/0x\2 0x\1/' \ | 124 | | sed -e 's/\([0-9A-F]*\)[^0-9A-F]*\([0-9A-F]*\).*/0x\2 0x\1/' \ |
| 121 | | sort -r | 125 | | sort -r |
| 122 | elif [ "$3" = "UNICODE2" ] ; then | 126 | elif [ "$3" = "UNICODE2" ] ; then |
| 123 | # Source format is: | 127 | # Source format is: |
| 124 | # 0xXXXX 0xYYYY # ... | 128 | # 0xXXXX 0xYYYY # ... |
| 125 | sed -n -e "$2 p" < $1 \ | 129 | sed -n -e "${2}p" < $1 \ |
| 126 | | sed -e 's/\([0-9A-Fx]*\)[^0]*\([0-9A-Fx]*\).*/\1 \2/' \ | 130 | | sed -e 's/\([0-9A-Fx]*\)[^0]*\([0-9A-Fx]*\).*/\1 \2/' \ |
| 127 | | ${AWKPROG} | sort -n -k 4,4 | 131 | | ${AWKPROG} | sort -n -k 4,4 |
| 128 | elif [ "$3" = "YASUOKA" ] ; then | 132 | elif [ "$3" = "YASUOKA" ] ; then |
| 129 | # Source format is: | 133 | # Source format is: |
| 130 | # YYYY 0-XXXX (XXXX is a Kuten code) | 134 | # YYYY 0-XXXX (XXXX is a Kuten code) |
| 131 | sed -n -e "$2 p" < $1 \ | 135 | sed -n -e "${2}p" < $1 \ |
| 132 | | sed -e 's/\([0-9A-F]*\)[^0]*0-\([0-9]*\).*/0x\2 0x\1/' \ | 136 | | sed -e 's/\([0-9A-F]*\)[^0]*0-\([0-9]*\).*/0x\2 0x\1/' \ |
| 133 | | sort | ${AWKPROG} | 137 | | sort | ${AWKPROG} |
| 134 | elif [ "$3" = "KANJI-DATABASE" ] ; then | 138 | elif [ "$3" = "KANJI-DATABASE" ] ; then |
| 135 | # Source format is: | 139 | # Source format is: |
| 136 | # C?-XXXX U+YYYYY ..... | 140 | # C?-XXXX U+YYYYY ..... |
| 137 | sed -n -e "$2 p" < $1 \ | 141 | sed -n -e "${2}p" < $1 \ |
| 138 | | sed -e 's/...\(....\) U+\([0-9A-F]*\).*/0x\1 0x\2/' \ | 142 | | sed -e 's/...\(....\) U+\([0-9A-F]*\).*/0x\1 0x\2/' \ |
| 139 | | sort | ${AWKPROG} | 143 | | sort | ${AWKPROG} |
| 140 | else | 144 | else |