aboutsummaryrefslogtreecommitdiffstats
path: root/admin
diff options
context:
space:
mode:
authorPaul Eggert2011-08-24 14:20:36 -0700
committerPaul Eggert2011-08-24 14:20:36 -0700
commit011ba6eaacfa50cc9871d0cfea34e8f0a7a5bc43 (patch)
treeced7a98ff1eb289559da6ebfda46a8e436640da6 /admin
parentfe4496a6e27ac892283b8568adbd12831868cc54 (diff)
parentf22f4808a08e8f985d5e6175bbd13d5260e1ab1a (diff)
downloademacs-011ba6eaacfa50cc9871d0cfea34e8f0a7a5bc43.tar.gz
emacs-011ba6eaacfa50cc9871d0cfea34e8f0a7a5bc43.zip
Merge from trunk.
Diffstat (limited to 'admin')
-rw-r--r--admin/ChangeLog20
-rw-r--r--admin/unidata/unidata-gen.el107
2 files changed, 98 insertions, 29 deletions
diff --git a/admin/ChangeLog b/admin/ChangeLog
index bc38edfc8d4..17cbcbb3bdf 100644
--- a/admin/ChangeLog
+++ b/admin/ChangeLog
@@ -1,3 +1,23 @@
12011-08-23 Eli Zaretskii <eliz@gnu.org>
2
3 * unidata/unidata-gen.el (unidata-prop-alist): Update the default
4 values of bidi-class according to DerivedBidiClass.txt from the
5 latest UCD.
6
72011-08-23 Kenichi Handa <handa@m17n.org>
8
9 * unidata/unidata-gen.el (unidata-prop-alist): Provide default
10 values for name, general-category, canonical-combining-class,
11 mirrored, and bidi-class. Describe the meaning of value nil for
12 decimal-digit-value, digit-value, numeric-value, uppercase,
13 lowercase, titlecase, and mirroring.
14 (unidata-gen-table): Handle the case that default-value is a
15 list. Set default values of characters not listed in a table.
16 (unidata-get-name): Return an empty string if a value in a
17 char-table is nil.
18 (unidata-get-decomposition): Return a list of character itself if
19 a value in a char-table is nil.
20
12011-08-15 Eli Zaretskii <eliz@gnu.org> 212011-08-15 Eli Zaretskii <eliz@gnu.org>
2 22
3 * unidata/bidimirror.awk: File removed. 23 * unidata/bidimirror.awk: File removed.
diff --git a/admin/unidata/unidata-gen.el b/admin/unidata/unidata-gen.el
index ab1dcd134ac..1002bb003af 100644
--- a/admin/unidata/unidata-gen.el
+++ b/admin/unidata/unidata-gen.el
@@ -146,7 +146,7 @@
146 (setq unidata-list (cdr table)))) 146 (setq unidata-list (cdr table))))
147 147
148;; Alist of this form: 148;; Alist of this form:
149;; (PROP INDEX GENERATOR FILENAME DOCSTRING DESCRIBER VAL-LIST) 149;; (PROP INDEX GENERATOR FILENAME DOCSTRING DESCRIBER DEFAULT VAL-LIST)
150;; PROP: character property 150;; PROP: character property
151;; INDEX: index to each element of unidata-list for PROP. 151;; INDEX: index to each element of unidata-list for PROP.
152;; It may be a function that generates an alist of character codes 152;; It may be a function that generates an alist of character codes
@@ -155,14 +155,20 @@
155;; FILENAME: filename to store the char-table 155;; FILENAME: filename to store the char-table
156;; DOCSTRING: docstring for the property 156;; DOCSTRING: docstring for the property
157;; DESCRIBER: function to call to get a description string of property value 157;; DESCRIBER: function to call to get a description string of property value
158;; DEFAULT: the default value of the property 158;; DEFAULT: the default value of the property. It may have the form
159;; (VAL0 (FROM1 TO1 VAL1) ...) which indicates that the default
160;; value is VAL0 except for characters in the ranges specified by
161;; FROMn and TOn (incusive). The default value of characters
162;; between FROMn and TOn is VALn.
159;; VAL-LIST: list of specially ordered property values 163;; VAL-LIST: list of specially ordered property values
160 164
161(defconst unidata-prop-alist 165(defconst unidata-prop-alist
162 '((name 166 '((name
163 1 unidata-gen-table-name "uni-name.el" 167 1 unidata-gen-table-name "uni-name.el"
164 "Unicode character name. 168 "Unicode character name.
165Property value is a string.") 169Property value is a string."
170 nil
171 "")
166 (general-category 172 (general-category
167 2 unidata-gen-table-symbol "uni-category.el" 173 2 unidata-gen-table-symbol "uni-category.el"
168 "Unicode general category. 174 "Unicode general category.
@@ -170,7 +176,7 @@ Property value is one of the following symbols:
170 Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc, Pd, Ps, Pe, Pi, Pf, Po, 176 Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc, Pd, Ps, Pe, Pi, Pf, Po,
171 Sm, Sc, Sk, So, Zs, Zl, Zp, Cc, Cf, Cs, Co, Cn" 177 Sm, Sc, Sk, So, Zs, Zl, Zp, Cc, Cf, Cs, Co, Cn"
172 unidata-describe-general-category 178 unidata-describe-general-category
173 nil 179 Cn
174 ;; The order of elements must be in sync with unicode_category_t 180 ;; The order of elements must be in sync with unicode_category_t
175 ;; in src/character.h. 181 ;; in src/character.h.
176 (Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Pd Ps Pe Pi Pf Po 182 (Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Pd Ps Pe Pi Pf Po
@@ -179,7 +185,8 @@ Property value is one of the following symbols:
179 3 unidata-gen-table-integer "uni-combining.el" 185 3 unidata-gen-table-integer "uni-combining.el"
180 "Unicode canonical combining class. 186 "Unicode canonical combining class.
181Property value is an integer." 187Property value is an integer."
182 unidata-describe-canonical-combining-class) 188 unidata-describe-canonical-combining-class
189 0)
183 (bidi-class 190 (bidi-class
184 4 unidata-gen-table-symbol "uni-bidi.el" 191 4 unidata-gen-table-symbol "uni-bidi.el"
185 "Unicode bidi class. 192 "Unicode bidi class.
@@ -187,7 +194,12 @@ Property value is one of the following symbols:
187 L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET, 194 L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET,
188 AN, CS, NSM, BN, B, S, WS, ON" 195 AN, CS, NSM, BN, B, S, WS, ON"
189 unidata-describe-bidi-class 196 unidata-describe-bidi-class
190 L 197 ;; The assignment of default values to blocks of code points
198 ;; follows the file DerivedBidiClass.txt from the Unicode
199 ;; Character Database (UCD).
200 (L (#x0600 #x06FF AL) (#xFB50 #xFDFF AL) (#xFE70 #xFEFF AL)
201 (#x0590 #x05FF R) (#x07C0 #x08FF R)
202 (#xFB1D #xFB4F R) (#x10800 #x10FFF R) (#x1E800 #x1EFFF R))
191 ;; The order of elements must be in sync with bidi_type_t in 203 ;; The order of elements must be in sync with bidi_type_t in
192 ;; src/dispextern.h. 204 ;; src/dispextern.h.
193 (L R EN AN BN B AL LRE LRO RLE RLO PDF ES ET CS NSM S WS ON)) 205 (L R EN AN BN B AL LRE LRO RLE RLO PDF ES ET CS NSM S WS ON))
@@ -202,19 +214,24 @@ one of these symbols representing compatibility formatting tag:
202 (decimal-digit-value 214 (decimal-digit-value
203 6 unidata-gen-table-integer "uni-decimal.el" 215 6 unidata-gen-table-integer "uni-decimal.el"
204 "Unicode numeric value (decimal digit). 216 "Unicode numeric value (decimal digit).
205Property value is an integer.") 217Property value is an integer 0..9, or nil.
218The value nil stands for NaN \"Numeric_Value\".")
206 (digit-value 219 (digit-value
207 7 unidata-gen-table-integer "uni-digit.el" 220 7 unidata-gen-table-integer "uni-digit.el"
208 "Unicode numeric value (digit). 221 "Unicode numeric value (digit).
209Property value is an integer.") 222Property value is an integer 0..9, or nil.
223The value nil stands for NaN \"Numeric_Value\".")
210 (numeric-value 224 (numeric-value
211 8 unidata-gen-table-numeric "uni-numeric.el" 225 8 unidata-gen-table-numeric "uni-numeric.el"
212 "Unicode numeric value (numeric). 226 "Unicode numeric value (numeric).
213Property value is an integer or a floating point.") 227Property value is an integer, a floating point, or nil.
228The value nil stands for NaN \"Numeric_Value\".")
214 (mirrored 229 (mirrored
215 9 unidata-gen-table-symbol "uni-mirrored.el" 230 9 unidata-gen-table-symbol "uni-mirrored.el"
216 "Unicode bidi mirrored flag. 231 "Unicode bidi mirrored flag.
217Property value is a symbol `Y' or `N'. See also the property `mirroring'.") 232Property value is a symbol `Y' or `N'. See also the property `mirroring'."
233 nil
234 N)
218 (old-name 235 (old-name
219 10 unidata-gen-table-name "uni-old-name.el" 236 10 unidata-gen-table-name "uni-old-name.el"
220 "Unicode old names as published in Unicode 1.0. 237 "Unicode old names as published in Unicode 1.0.
@@ -226,23 +243,30 @@ Property value is a string.")
226 (uppercase 243 (uppercase
227 12 unidata-gen-table-character "uni-uppercase.el" 244 12 unidata-gen-table-character "uni-uppercase.el"
228 "Unicode simple uppercase mapping. 245 "Unicode simple uppercase mapping.
229Property value is a character." 246Property value is a character or nil.
247The value nil means that the actual property value of a character
248is the character itself."
230 string) 249 string)
231 (lowercase 250 (lowercase
232 13 unidata-gen-table-character "uni-lowercase.el" 251 13 unidata-gen-table-character "uni-lowercase.el"
233 "Unicode simple lowercase mapping. 252 "Unicode simple lowercase mapping.
234Property value is a character." 253Property value is a character or nil.
254The value nil means that the actual property value of a character
255is the character itself."
235 string) 256 string)
236 (titlecase 257 (titlecase
237 14 unidata-gen-table-character "uni-titlecase.el" 258 14 unidata-gen-table-character "uni-titlecase.el"
238 "Unicode simple titlecase mapping. 259 "Unicode simple titlecase mapping.
239Property value is a character." 260Property value is a character or nil.
261The value nil means that the actual property value of a character
262is the character itself."
240 string) 263 string)
241 (mirroring 264 (mirroring
242 unidata-gen-mirroring-list unidata-gen-table-character "uni-mirrored.el" 265 unidata-gen-mirroring-list unidata-gen-table-character "uni-mirrored.el"
243 "Unicode bidi-mirroring characters. 266 "Unicode bidi-mirroring characters.
244Property value is a character that has the corresponding mirroring image, 267Property value is a character that has the corresponding mirroring image or nil.
245or nil for non-mirrored character."))) 268The value nil means that the actual property value of a character
269is the character itself.")))
246 270
247;; Functions to access the above data. 271;; Functions to access the above data.
248(defsubst unidata-prop-index (prop) (nth 1 (assq prop unidata-prop-alist))) 272(defsubst unidata-prop-index (prop) (nth 1 (assq prop unidata-prop-alist)))
@@ -393,9 +417,18 @@ or nil for non-mirrored character.")))
393 (while tail 417 (while tail
394 (setcar tail (cons (car tail) val-code)) 418 (setcar tail (cons (car tail) val-code))
395 (setq tail (cdr tail) val-code (1+ val-code))) 419 (setq tail (cdr tail) val-code (1+ val-code)))
396 (setq default-value (unidata-encode-val val-list default-value)) 420 (if (consp default-value)
397 (set-char-table-range table t default-value) 421 (setq default-value (copy-sequence default-value))
398 (set-char-table-range table nil default-value) 422 (setq default-value (list default-value)))
423 (setcar default-value
424 (unidata-encode-val val-list (car default-value)))
425 (set-char-table-range table t (car default-value))
426 (set-char-table-range table nil (car default-value))
427 (dolist (elm (cdr default-value))
428 (setcar (nthcdr 2 elm)
429 (unidata-encode-val val-list (nth 2 elm)))
430 (set-char-table-range table (cons (car elm) (nth 1 elm)) (nth 2 elm)))
431
399 (setq tail unidata-list) 432 (setq tail unidata-list)
400 (while tail 433 (while tail
401 (setq elt (car tail) tail (cdr tail)) 434 (setq elt (car tail) tail (cdr tail))
@@ -419,17 +452,27 @@ or nil for non-mirrored character.")))
419 (setq prev-range-data (cons (cons from to) val-code))))) 452 (setq prev-range-data (cons (cons from to) val-code)))))
420 (let* ((start (lsh (lsh range -7) 7)) 453 (let* ((start (lsh (lsh range -7) 7))
421 (limit (+ start 127)) 454 (limit (+ start 127))
422 str count new-val) 455 str count new-val from to vcode)
423 (fillarray vec 0) 456 (fillarray vec (car default-value))
424 ;; See the comment above. 457 (dolist (elm (cdr default-value))
425 (when (and prev-range-data 458 (setq from (car elm) to (nth 1 elm))
426 (>= (cdr (car prev-range-data)) start)) 459 (when (and (<= from limit)
427 (let ((from (car (car prev-range-data))) 460 (or (>= from start) (>= to start)))
428 (to (cdr (car prev-range-data))) 461 (setq from (max from start)
429 (vcode (cdr prev-range-data))) 462 to (min to limit)
463 vcode (nth 2 elm))
430 (while (<= from to) 464 (while (<= from to)
431 (aset vec (- from start) vcode) 465 (aset vec (- from start) vcode)
432 (setq from (1+ from))))) 466 (setq from (1+ from)))))
467 ;; See the comment above.
468 (when (and prev-range-data
469 (>= (cdr (car prev-range-data)) start))
470 (setq from (car (car prev-range-data))
471 to (cdr (car prev-range-data))
472 vcode (cdr prev-range-data))
473 (while (<= from to)
474 (aset vec (- from start) vcode)
475 (setq from (1+ from))))
433 (setq prev-range-data nil) 476 (setq prev-range-data nil)
434 (if val-code 477 (if val-code
435 (aset vec (- range start) val-code)) 478 (aset vec (- range start) val-code))
@@ -669,7 +712,7 @@ or nil for non-mirrored character.")))
669 (aset table c name) 712 (aset table c name)
670 (if (= c char) 713 (if (= c char)
671 (setq val name)))) 714 (setq val name))))
672 val))) 715 (or val ""))))
673 716
674 ((and (integerp val) (> val 0)) 717 ((and (integerp val) (> val 0))
675 (let* ((symbol-table (aref (char-table-extra-slot table 4) 1)) 718 (let* ((symbol-table (aref (char-table-extra-slot table 4) 1))
@@ -695,7 +738,9 @@ or nil for non-mirrored character.")))
695 ((eq sym 'CJK\ COMPATIBILITY\ IDEOGRAPH) 738 ((eq sym 'CJK\ COMPATIBILITY\ IDEOGRAPH)
696 (format "%s-%04X" sym char)) 739 (format "%s-%04X" sym char))
697 ((eq sym 'VARIATION\ SELECTOR) 740 ((eq sym 'VARIATION\ SELECTOR)
698 (format "%s-%d" sym (+ (- char #xe0100) 17)))))))) 741 (format "%s-%d" sym (+ (- char #xe0100) 17))))))
742
743 (t "")))
699 744
700;; Store VAL as the name of CHAR in TABLE. 745;; Store VAL as the name of CHAR in TABLE.
701 746
@@ -707,6 +752,9 @@ or nil for non-mirrored character.")))
707 752
708(defun unidata-get-decomposition (char val table) 753(defun unidata-get-decomposition (char val table)
709 (cond 754 (cond
755 ((not val)
756 (list char))
757
710 ((consp val) 758 ((consp val)
711 val) 759 val)
712 760
@@ -747,7 +795,8 @@ or nil for non-mirrored character.")))
747 (aset vec idx (nconc word-list tail-list))) 795 (aset vec idx (nconc word-list tail-list)))
748 (dotimes (i 128) 796 (dotimes (i 128)
749 (aset table (+ first-char i) (aref vec i))) 797 (aset table (+ first-char i) (aref vec i)))
750 (aref vec (- char first-char))))) 798 (setq val (aref vec (- char first-char)))
799 (or val (list char)))))
751 800
752 ;; Hangul syllable 801 ;; Hangul syllable
753 ((and (eq val 0) (>= char #xAC00) (<= char #xD7A3)) 802 ((and (eq val 0) (>= char #xAC00) (<= char #xD7A3))