aboutsummaryrefslogtreecommitdiffstats
path: root/admin/unidata
diff options
context:
space:
mode:
Diffstat (limited to 'admin/unidata')
-rw-r--r--admin/unidata/Makefile.in8
-rw-r--r--admin/unidata/bidimirror.awk37
-rw-r--r--admin/unidata/biditype.awk93
-rw-r--r--admin/unidata/makefile.w32-in14
-rw-r--r--admin/unidata/unidata-gen.el105
5 files changed, 80 insertions, 177 deletions
diff --git a/admin/unidata/Makefile.in b/admin/unidata/Makefile.in
index e1fe247631f..c890dad8903 100644
--- a/admin/unidata/Makefile.in
+++ b/admin/unidata/Makefile.in
@@ -23,7 +23,7 @@ EMACS = ../../src/emacs
23DSTDIR = ../../lisp/international 23DSTDIR = ../../lisp/international
24RUNEMACS = ${EMACS} -Q -batch 24RUNEMACS = ${EMACS} -Q -batch
25 25
26all: ${DSTDIR}/charprop.el ../../src/biditype.h ../../src/bidimirror.h 26all: ${DSTDIR}/charprop.el
27 27
28.el.elc: 28.el.elc:
29 ${RUNEMACS} -batch -f batch-byte-compile $< 29 ${RUNEMACS} -batch -f batch-byte-compile $<
@@ -38,12 +38,6 @@ ${DSTDIR}/charprop.el: unidata-gen.elc unidata.txt
38 cd ${DSTDIR}; \ 38 cd ${DSTDIR}; \
39 ${RUNEMACS} -batch --load $${ELC} -f unidata-gen-files $${DATADIR} $${DATA} 39 ${RUNEMACS} -batch --load $${ELC} -f unidata-gen-files $${DATADIR} $${DATA}
40 40
41../../src/biditype.h: UnicodeData.txt
42 gawk -F";" -f biditype.awk $< > $@
43
44../../src/bidimirror.h: BidiMirroring.txt
45 gawk -F"[; ]+" -f bidimirror.awk $< > $@
46
47install: charprop.el 41install: charprop.el
48 cp charprop.el ${DSTDIR} 42 cp charprop.el ${DSTDIR}
49 cp `sed -n 's/^;; FILE: //p' < charprop.el` ${DSTDIR} 43 cp `sed -n 's/^;; FILE: //p' < charprop.el` ${DSTDIR}
diff --git a/admin/unidata/bidimirror.awk b/admin/unidata/bidimirror.awk
deleted file mode 100644
index fc3e8afaace..00000000000
--- a/admin/unidata/bidimirror.awk
+++ /dev/null
@@ -1,37 +0,0 @@
1# Generate data for bidi_mirroring_table, see src/bidi.c:bidi_initialize.
2
3# Copyright (C) 2010-2011 Free Software Foundation, Inc.
4
5# This file is part of GNU Emacs.
6
7# GNU Emacs is free software: you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation, either version 3 of the License, or
10# (at your option) any later version.
11
12# GNU Emacs is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15# GNU General Public License for more details.
16
17# You should have received a copy of the GNU General Public License
18# along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
19
20# Written by Eli Zaretskii <eliz@gnu.org>
21
22BEGIN {
23 printf " struct {\n int from, to;\n } bidi_mirror[] = {\n";
24 first = 1;
25 }
26
27$1 !~ /^#/ && NF >= 2 {
28 if (!first)
29 printf ",\n";
30 else
31 first = 0;
32 printf "\t{ 0x%s, 0x%s }", $1, $2;
33 }
34
35END {
36 printf " };\n";
37 }
diff --git a/admin/unidata/biditype.awk b/admin/unidata/biditype.awk
deleted file mode 100644
index bb1aaad1973..00000000000
--- a/admin/unidata/biditype.awk
+++ /dev/null
@@ -1,93 +0,0 @@
1# Generate data for filling bidi_type_table, see src/bidi.c:bidi_initialize.
2
3# Copyright (C) 2010-2011 Free Software Foundation, Inc.
4
5# This file is part of GNU Emacs.
6
7# GNU Emacs is free software: you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation, either version 3 of the License, or
10# (at your option) any later version.
11
12# GNU Emacs is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15# GNU General Public License for more details.
16
17# You should have received a copy of the GNU General Public License
18# along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
19
20# Written by Eli Zaretskii <eliz@gnu.org>
21
22function trtype(type)
23{
24 # Types are listed in the order of decresing use in UnicodeData.txt:
25 if (type == "ON")
26 return "NEUTRAL_ON";
27 else if (type == "NSM")
28 return "WEAK_NSM";
29 else if (type == "AL")
30 return "STRONG_AL";
31 else if (type == "R")
32 return "STRONG_R";
33 else if (type == "BN")
34 return "WEAK_BN";
35 else if (type == "EN")
36 return "WEAK_EN";
37 else if (type == "ET")
38 return "WEAK_ET";
39 else if (type == "AN")
40 return "WEAK_AN";
41 else if (type == "WS")
42 return "NEUTRAL_WS";
43 else if (type == "CS")
44 return "WEAK_CS";
45 else if (type == "ES")
46 return "WEAK_ES";
47 else if (type == "B")
48 return "NEUTRAL_B";
49 else if (type == "S")
50 return "NEUTRAL_S";
51 else if (type == "LRE" || type == "RLE" || type == "LRO" || type == "RLO" || type == "PDF")
52 return type;
53 else if (type == "L")
54 return "STRONG_L";
55 else
56 {
57 printf "Unknown type: %s\n", type > "/dev/stderr";
58 exit 1;
59 }
60}
61
62BEGIN {
63 otype = "";
64 startcode = "";
65 endcode = "";
66 printf " struct {\n int from, to;\n bidi_type_t type;\n } bidi_type[] = {\n";
67 first = 1;
68 }
69
70 { code = $1;
71 ntype = $5;
72 if (ntype != otype)
73 {
74 # Don't output data for L, as that's the default value, see bidi.c.
75 if (otype != "L" && startcode != "")
76 {
77 if (!first)
78 printf ",\n";
79 else
80 first = 0;
81 printf "\t{ 0x%s, 0x%s, %s }", startcode, endcode, trtype(otype);
82 }
83 otype = ntype;
84 startcode = code;
85 endcode = code;
86 }
87 else
88 endcode = code;
89 }
90
91END {
92 printf " };\n";
93 }
diff --git a/admin/unidata/makefile.w32-in b/admin/unidata/makefile.w32-in
index 6a877e0c1d0..0e9b9f0d2bd 100644
--- a/admin/unidata/makefile.w32-in
+++ b/admin/unidata/makefile.w32-in
@@ -29,7 +29,7 @@ EMACSLOADPATH = $(lisp);$(lisp)/international;$(lisp)/emacs-lisp
29# Quote EMACS so it could be a file name with embedded whitespace 29# Quote EMACS so it could be a file name with embedded whitespace
30RUNEMACS = "$(EMACS)" -Q -batch 30RUNEMACS = "$(EMACS)" -Q -batch
31 31
32all: $(DSTDIR)/charprop.el ../../src/biditype.h ../../src/bidimirror.h 32all: $(DSTDIR)/charprop.el
33 33
34.el.elc: 34.el.elc:
35 $(RUNEMACS) -f batch-byte-compile $< 35 $(RUNEMACS) -f batch-byte-compile $<
@@ -51,16 +51,6 @@ charprop-CMD: unidata-gen.elc unidata.txt
51 51
52${DSTDIR}/charprop.el: charprop-$(SHELLTYPE) 52${DSTDIR}/charprop.el: charprop-$(SHELLTYPE)
53 53
54../../src/biditype.h: UnicodeData.txt
55 gawk -F";" -f biditype.awk -v BINMODE=2 $< > biditype.h
56 $(CP) biditype.h $@
57 $(DEL) biditype.h
58
59../../src/bidimirror.h: BidiMirroring.txt
60 gawk -F"[; ]+" -f bidimirror.awk -v BINMODE=2 $< > bidimirror.h
61 $(CP) bidimirror.h $@
62 $(DEL) bidimirror.h
63
64clean: 54clean:
65 - $(DEL) unidata-gen.elc unidata.txt biditype.h bidimirror.h 55 - $(DEL) unidata-gen.elc unidata.txt
66 56
diff --git a/admin/unidata/unidata-gen.el b/admin/unidata/unidata-gen.el
index ab1dcd134ac..d9277217f0e 100644
--- a/admin/unidata/unidata-gen.el
+++ b/admin/unidata/unidata-gen.el
@@ -146,7 +146,7 @@
146 (setq unidata-list (cdr table)))) 146 (setq unidata-list (cdr table))))
147 147
148;; Alist of this form: 148;; Alist of this form:
149;; (PROP INDEX GENERATOR FILENAME DOCSTRING DESCRIBER VAL-LIST) 149;; (PROP INDEX GENERATOR FILENAME DOCSTRING DESCRIBER DEFAULT VAL-LIST)
150;; PROP: character property 150;; PROP: character property
151;; INDEX: index to each element of unidata-list for PROP. 151;; INDEX: index to each element of unidata-list for PROP.
152;; It may be a function that generates an alist of character codes 152;; It may be a function that generates an alist of character codes
@@ -155,14 +155,21 @@
155;; FILENAME: filename to store the char-table 155;; FILENAME: filename to store the char-table
156;; DOCSTRING: docstring for the property 156;; DOCSTRING: docstring for the property
157;; DESCRIBER: function to call to get a description string of property value 157;; DESCRIBER: function to call to get a description string of property value
158;; DEFAULT: the default value of the property 158;; DEFAULT: the default value of the property. It may have the form
159;; (VAL0 (FROM1 TO1 VAL1) ...) which indicates that the default
160;; value is VAL0 except for characters in the ranges specified by
161;; FROMn and TOn (inclusive). The default value of characters
162;; between FROMn and TOn is VALn.
159;; VAL-LIST: list of specially ordered property values 163;; VAL-LIST: list of specially ordered property values
160 164
161(defconst unidata-prop-alist 165(defconst unidata-prop-alist
162 '((name 166 '((name
163 1 unidata-gen-table-name "uni-name.el" 167 1 unidata-gen-table-name "uni-name.el"
164 "Unicode character name. 168 "Unicode character name.
165Property value is a string.") 169Property value is a string or nil.
170The value nil stands for the default value \"null string\")."
171 nil
172 nil)
166 (general-category 173 (general-category
167 2 unidata-gen-table-symbol "uni-category.el" 174 2 unidata-gen-table-symbol "uni-category.el"
168 "Unicode general category. 175 "Unicode general category.
@@ -170,7 +177,7 @@ Property value is one of the following symbols:
170 Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc, Pd, Ps, Pe, Pi, Pf, Po, 177 Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc, Pd, Ps, Pe, Pi, Pf, Po,
171 Sm, Sc, Sk, So, Zs, Zl, Zp, Cc, Cf, Cs, Co, Cn" 178 Sm, Sc, Sk, So, Zs, Zl, Zp, Cc, Cf, Cs, Co, Cn"
172 unidata-describe-general-category 179 unidata-describe-general-category
173 nil 180 Cn
174 ;; The order of elements must be in sync with unicode_category_t 181 ;; The order of elements must be in sync with unicode_category_t
175 ;; in src/character.h. 182 ;; in src/character.h.
176 (Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Pd Ps Pe Pi Pf Po 183 (Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Pd Ps Pe Pi Pf Po
@@ -179,7 +186,8 @@ Property value is one of the following symbols:
179 3 unidata-gen-table-integer "uni-combining.el" 186 3 unidata-gen-table-integer "uni-combining.el"
180 "Unicode canonical combining class. 187 "Unicode canonical combining class.
181Property value is an integer." 188Property value is an integer."
182 unidata-describe-canonical-combining-class) 189 unidata-describe-canonical-combining-class
190 0)
183 (bidi-class 191 (bidi-class
184 4 unidata-gen-table-symbol "uni-bidi.el" 192 4 unidata-gen-table-symbol "uni-bidi.el"
185 "Unicode bidi class. 193 "Unicode bidi class.
@@ -187,7 +195,12 @@ Property value is one of the following symbols:
187 L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET, 195 L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET,
188 AN, CS, NSM, BN, B, S, WS, ON" 196 AN, CS, NSM, BN, B, S, WS, ON"
189 unidata-describe-bidi-class 197 unidata-describe-bidi-class
190 L 198 ;; The assignment of default values to blocks of code points
199 ;; follows the file DerivedBidiClass.txt from the Unicode
200 ;; Character Database (UCD).
201 (L (#x0600 #x06FF AL) (#xFB50 #xFDFF AL) (#xFE70 #xFEFF AL)
202 (#x0590 #x05FF R) (#x07C0 #x08FF R)
203 (#xFB1D #xFB4F R) (#x10800 #x10FFF R) (#x1E800 #x1EFFF R))
191 ;; The order of elements must be in sync with bidi_type_t in 204 ;; The order of elements must be in sync with bidi_type_t in
192 ;; src/dispextern.h. 205 ;; src/dispextern.h.
193 (L R EN AN BN B AL LRE LRO RLE RLO PDF ES ET CS NSM S WS ON)) 206 (L R EN AN BN B AL LRE LRO RLE RLO PDF ES ET CS NSM S WS ON))
@@ -202,23 +215,29 @@ one of these symbols representing compatibility formatting tag:
202 (decimal-digit-value 215 (decimal-digit-value
203 6 unidata-gen-table-integer "uni-decimal.el" 216 6 unidata-gen-table-integer "uni-decimal.el"
204 "Unicode numeric value (decimal digit). 217 "Unicode numeric value (decimal digit).
205Property value is an integer.") 218Property value is an integer 0..9, or nil.
219The value nil stands for NaN \"Numeric_Value\".")
206 (digit-value 220 (digit-value
207 7 unidata-gen-table-integer "uni-digit.el" 221 7 unidata-gen-table-integer "uni-digit.el"
208 "Unicode numeric value (digit). 222 "Unicode numeric value (digit).
209Property value is an integer.") 223Property value is an integer 0..9, or nil.
224The value nil stands for NaN \"Numeric_Value\".")
210 (numeric-value 225 (numeric-value
211 8 unidata-gen-table-numeric "uni-numeric.el" 226 8 unidata-gen-table-numeric "uni-numeric.el"
212 "Unicode numeric value (numeric). 227 "Unicode numeric value (numeric).
213Property value is an integer or a floating point.") 228Property value is an integer, a floating point, or nil.
229The value nil stands for NaN \"Numeric_Value\".")
214 (mirrored 230 (mirrored
215 9 unidata-gen-table-symbol "uni-mirrored.el" 231 9 unidata-gen-table-symbol "uni-mirrored.el"
216 "Unicode bidi mirrored flag. 232 "Unicode bidi mirrored flag.
217Property value is a symbol `Y' or `N'. See also the property `mirroring'.") 233Property value is a symbol `Y' or `N'. See also the property `mirroring'."
234 nil
235 N)
218 (old-name 236 (old-name
219 10 unidata-gen-table-name "uni-old-name.el" 237 10 unidata-gen-table-name "uni-old-name.el"
220 "Unicode old names as published in Unicode 1.0. 238 "Unicode old names as published in Unicode 1.0.
221Property value is a string.") 239Property value is a string or nil.
240The value nil stands for the default value \"null string\").")
222 (iso-10646-comment 241 (iso-10646-comment
223 11 unidata-gen-table-name "uni-comment.el" 242 11 unidata-gen-table-name "uni-comment.el"
224 "Unicode ISO 10646 comment. 243 "Unicode ISO 10646 comment.
@@ -226,23 +245,30 @@ Property value is a string.")
226 (uppercase 245 (uppercase
227 12 unidata-gen-table-character "uni-uppercase.el" 246 12 unidata-gen-table-character "uni-uppercase.el"
228 "Unicode simple uppercase mapping. 247 "Unicode simple uppercase mapping.
229Property value is a character." 248Property value is a character or nil.
249The value nil means that the actual property value of a character
250is the character itself."
230 string) 251 string)
231 (lowercase 252 (lowercase
232 13 unidata-gen-table-character "uni-lowercase.el" 253 13 unidata-gen-table-character "uni-lowercase.el"
233 "Unicode simple lowercase mapping. 254 "Unicode simple lowercase mapping.
234Property value is a character." 255Property value is a character or nil.
256The value nil means that the actual property value of a character
257is the character itself."
235 string) 258 string)
236 (titlecase 259 (titlecase
237 14 unidata-gen-table-character "uni-titlecase.el" 260 14 unidata-gen-table-character "uni-titlecase.el"
238 "Unicode simple titlecase mapping. 261 "Unicode simple titlecase mapping.
239Property value is a character." 262Property value is a character or nil.
263The value nil means that the actual property value of a character
264is the character itself."
240 string) 265 string)
241 (mirroring 266 (mirroring
242 unidata-gen-mirroring-list unidata-gen-table-character "uni-mirrored.el" 267 unidata-gen-mirroring-list unidata-gen-table-character "uni-mirrored.el"
243 "Unicode bidi-mirroring characters. 268 "Unicode bidi-mirroring characters.
244Property value is a character that has the corresponding mirroring image, 269Property value is a character that has the corresponding mirroring image or nil.
245or nil for non-mirrored character."))) 270The value nil means that the actual property value of a character
271is the character itself.")))
246 272
247;; Functions to access the above data. 273;; Functions to access the above data.
248(defsubst unidata-prop-index (prop) (nth 1 (assq prop unidata-prop-alist))) 274(defsubst unidata-prop-index (prop) (nth 1 (assq prop unidata-prop-alist)))
@@ -393,9 +419,18 @@ or nil for non-mirrored character.")))
393 (while tail 419 (while tail
394 (setcar tail (cons (car tail) val-code)) 420 (setcar tail (cons (car tail) val-code))
395 (setq tail (cdr tail) val-code (1+ val-code))) 421 (setq tail (cdr tail) val-code (1+ val-code)))
396 (setq default-value (unidata-encode-val val-list default-value)) 422 (if (consp default-value)
397 (set-char-table-range table t default-value) 423 (setq default-value (copy-sequence default-value))
398 (set-char-table-range table nil default-value) 424 (setq default-value (list default-value)))
425 (setcar default-value
426 (unidata-encode-val val-list (car default-value)))
427 (set-char-table-range table t (car default-value))
428 (set-char-table-range table nil (car default-value))
429 (dolist (elm (cdr default-value))
430 (setcar (nthcdr 2 elm)
431 (unidata-encode-val val-list (nth 2 elm)))
432 (set-char-table-range table (cons (car elm) (nth 1 elm)) (nth 2 elm)))
433
399 (setq tail unidata-list) 434 (setq tail unidata-list)
400 (while tail 435 (while tail
401 (setq elt (car tail) tail (cdr tail)) 436 (setq elt (car tail) tail (cdr tail))
@@ -419,17 +454,27 @@ or nil for non-mirrored character.")))
419 (setq prev-range-data (cons (cons from to) val-code))))) 454 (setq prev-range-data (cons (cons from to) val-code)))))
420 (let* ((start (lsh (lsh range -7) 7)) 455 (let* ((start (lsh (lsh range -7) 7))
421 (limit (+ start 127)) 456 (limit (+ start 127))
422 str count new-val) 457 str count new-val from to vcode)
423 (fillarray vec 0) 458 (fillarray vec (car default-value))
424 ;; See the comment above. 459 (dolist (elm (cdr default-value))
425 (when (and prev-range-data 460 (setq from (car elm) to (nth 1 elm))
426 (>= (cdr (car prev-range-data)) start)) 461 (when (and (<= from limit)
427 (let ((from (car (car prev-range-data))) 462 (or (>= from start) (>= to start)))
428 (to (cdr (car prev-range-data))) 463 (setq from (max from start)
429 (vcode (cdr prev-range-data))) 464 to (min to limit)
465 vcode (nth 2 elm))
430 (while (<= from to) 466 (while (<= from to)
431 (aset vec (- from start) vcode) 467 (aset vec (- from start) vcode)
432 (setq from (1+ from))))) 468 (setq from (1+ from)))))
469 ;; See the comment above.
470 (when (and prev-range-data
471 (>= (cdr (car prev-range-data)) start))
472 (setq from (car (car prev-range-data))
473 to (cdr (car prev-range-data))
474 vcode (cdr prev-range-data))
475 (while (<= from to)
476 (aset vec (- from start) vcode)
477 (setq from (1+ from))))
433 (setq prev-range-data nil) 478 (setq prev-range-data nil)
434 (if val-code 479 (if val-code
435 (aset vec (- range start) val-code)) 480 (aset vec (- range start) val-code))
@@ -707,6 +752,9 @@ or nil for non-mirrored character.")))
707 752
708(defun unidata-get-decomposition (char val table) 753(defun unidata-get-decomposition (char val table)
709 (cond 754 (cond
755 ((not val)
756 (list char))
757
710 ((consp val) 758 ((consp val)
711 val) 759 val)
712 760
@@ -747,7 +795,8 @@ or nil for non-mirrored character.")))
747 (aset vec idx (nconc word-list tail-list))) 795 (aset vec idx (nconc word-list tail-list)))
748 (dotimes (i 128) 796 (dotimes (i 128)
749 (aset table (+ first-char i) (aref vec i))) 797 (aset table (+ first-char i) (aref vec i)))
750 (aref vec (- char first-char))))) 798 (setq val (aref vec (- char first-char)))
799 (or val (list char)))))
751 800
752 ;; Hangul syllable 801 ;; Hangul syllable
753 ((and (eq val 0) (>= char #xAC00) (<= char #xD7A3)) 802 ((and (eq val 0) (>= char #xAC00) (<= char #xD7A3))