aboutsummaryrefslogtreecommitdiffstats
path: root/admin
diff options
context:
space:
mode:
authorKenichi Handa2011-08-23 20:48:07 +0900
committerKenichi Handa2011-08-23 20:48:07 +0900
commit0902a04edd7a2e0ca5d73fd996e46f26d5228501 (patch)
treef525281c4570fda4a5d45a5506f79be3f639a502 /admin
parent823564e519dd1f3e81a79949e1abc033c9e7c0a5 (diff)
downloademacs-0902a04edd7a2e0ca5d73fd996e46f26d5228501.tar.gz
emacs-0902a04edd7a2e0ca5d73fd996e46f26d5228501.zip
Fix default values of character properties.
Diffstat (limited to 'admin')
-rw-r--r--admin/ChangeLog14
-rw-r--r--admin/unidata/unidata-gen.el103
2 files changed, 88 insertions, 29 deletions
diff --git a/admin/ChangeLog b/admin/ChangeLog
index bc38edfc8d4..f8eb071d96d 100644
--- a/admin/ChangeLog
+++ b/admin/ChangeLog
@@ -1,3 +1,17 @@
12011-08-23 Kenichi Handa <handa@m17n.org>
2
3 * unidata/unidata-gen.el (unidata-prop-alist): Provide default
4 values for name, general-category, canonical-combining-class,
5 mirrored, and bidi-class. Describe the meaning of value nil for
6 decimal-digit-value, digit-value, numeric-value, uppercase,
7 lowercase, titlecase, and mirroring.
8 (unidata-gen-table): Handle the case that default-value is a
9 list. Set default values of characters not listed in a table.
10 (unidata-get-name): Return an empty string if a value in a
11 char-table is nil.
12 (unidata-get-decomposition): Return a list of character itself if
13 a value in a char-table is nil.
14
12011-08-15 Eli Zaretskii <eliz@gnu.org> 152011-08-15 Eli Zaretskii <eliz@gnu.org>
2 16
3 * unidata/bidimirror.awk: File removed. 17 * unidata/bidimirror.awk: File removed.
diff --git a/admin/unidata/unidata-gen.el b/admin/unidata/unidata-gen.el
index ab1dcd134ac..03399eae213 100644
--- a/admin/unidata/unidata-gen.el
+++ b/admin/unidata/unidata-gen.el
@@ -146,7 +146,7 @@
146 (setq unidata-list (cdr table)))) 146 (setq unidata-list (cdr table))))
147 147
148;; Alist of this form: 148;; Alist of this form:
149;; (PROP INDEX GENERATOR FILENAME DOCSTRING DESCRIBER VAL-LIST) 149;; (PROP INDEX GENERATOR FILENAME DOCSTRING DESCRIBER DEFAULT VAL-LIST)
150;; PROP: character property 150;; PROP: character property
151;; INDEX: index to each element of unidata-list for PROP. 151;; INDEX: index to each element of unidata-list for PROP.
152;; It may be a function that generates an alist of character codes 152;; It may be a function that generates an alist of character codes
@@ -155,14 +155,20 @@
155;; FILENAME: filename to store the char-table 155;; FILENAME: filename to store the char-table
156;; DOCSTRING: docstring for the property 156;; DOCSTRING: docstring for the property
157;; DESCRIBER: function to call to get a description string of property value 157;; DESCRIBER: function to call to get a description string of property value
158;; DEFAULT: the default value of the property 158;; DEFAULT: the default value of the property. It may have the form
159;; (VAL0 (FROM1 TO1 VAL1) ...) which indicates that the default
160;; value is VAL0 except for characters in the ranges specified by
161;; FROMn and TOn (incusive). The default value of characters
162;; between FROMn and TOn is VALn.
159;; VAL-LIST: list of specially ordered property values 163;; VAL-LIST: list of specially ordered property values
160 164
161(defconst unidata-prop-alist 165(defconst unidata-prop-alist
162 '((name 166 '((name
163 1 unidata-gen-table-name "uni-name.el" 167 1 unidata-gen-table-name "uni-name.el"
164 "Unicode character name. 168 "Unicode character name.
165Property value is a string.") 169Property value is a string."
170 nil
171 "")
166 (general-category 172 (general-category
167 2 unidata-gen-table-symbol "uni-category.el" 173 2 unidata-gen-table-symbol "uni-category.el"
168 "Unicode general category. 174 "Unicode general category.
@@ -170,7 +176,7 @@ Property value is one of the following symbols:
170 Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc, Pd, Ps, Pe, Pi, Pf, Po, 176 Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc, Pd, Ps, Pe, Pi, Pf, Po,
171 Sm, Sc, Sk, So, Zs, Zl, Zp, Cc, Cf, Cs, Co, Cn" 177 Sm, Sc, Sk, So, Zs, Zl, Zp, Cc, Cf, Cs, Co, Cn"
172 unidata-describe-general-category 178 unidata-describe-general-category
173 nil 179 Cn
174 ;; The order of elements must be in sync with unicode_category_t 180 ;; The order of elements must be in sync with unicode_category_t
175 ;; in src/character.h. 181 ;; in src/character.h.
176 (Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Pd Ps Pe Pi Pf Po 182 (Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Pd Ps Pe Pi Pf Po
@@ -179,7 +185,8 @@ Property value is one of the following symbols:
179 3 unidata-gen-table-integer "uni-combining.el" 185 3 unidata-gen-table-integer "uni-combining.el"
180 "Unicode canonical combining class. 186 "Unicode canonical combining class.
181Property value is an integer." 187Property value is an integer."
182 unidata-describe-canonical-combining-class) 188 unidata-describe-canonical-combining-class
189 0)
183 (bidi-class 190 (bidi-class
184 4 unidata-gen-table-symbol "uni-bidi.el" 191 4 unidata-gen-table-symbol "uni-bidi.el"
185 "Unicode bidi class. 192 "Unicode bidi class.
@@ -187,7 +194,8 @@ Property value is one of the following symbols:
187 L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET, 194 L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET,
188 AN, CS, NSM, BN, B, S, WS, ON" 195 AN, CS, NSM, BN, B, S, WS, ON"
189 unidata-describe-bidi-class 196 unidata-describe-bidi-class
190 L 197 (L (#x0600 #x06FF AL) (#x0590 #x05FF R) (#x07C0 #x08FF R)
198 (#xFB1D #xFB4F R) (#x10800 #x10FFF R) (#x1E800 #x1EFFF R))
191 ;; The order of elements must be in sync with bidi_type_t in 199 ;; The order of elements must be in sync with bidi_type_t in
192 ;; src/dispextern.h. 200 ;; src/dispextern.h.
193 (L R EN AN BN B AL LRE LRO RLE RLO PDF ES ET CS NSM S WS ON)) 201 (L R EN AN BN B AL LRE LRO RLE RLO PDF ES ET CS NSM S WS ON))
@@ -202,19 +210,24 @@ one of these symbols representing compatibility formatting tag:
202 (decimal-digit-value 210 (decimal-digit-value
203 6 unidata-gen-table-integer "uni-decimal.el" 211 6 unidata-gen-table-integer "uni-decimal.el"
204 "Unicode numeric value (decimal digit). 212 "Unicode numeric value (decimal digit).
205Property value is an integer.") 213Property value is an integer 0..9, or nil.
214The value nil stands for NaN \"Numeric_Value\".")
206 (digit-value 215 (digit-value
207 7 unidata-gen-table-integer "uni-digit.el" 216 7 unidata-gen-table-integer "uni-digit.el"
208 "Unicode numeric value (digit). 217 "Unicode numeric value (digit).
209Property value is an integer.") 218Property value is an integer 0..9, or nil.
219The value nil stands for NaN \"Numeric_Value\".")
210 (numeric-value 220 (numeric-value
211 8 unidata-gen-table-numeric "uni-numeric.el" 221 8 unidata-gen-table-numeric "uni-numeric.el"
212 "Unicode numeric value (numeric). 222 "Unicode numeric value (numeric).
213Property value is an integer or a floating point.") 223Property value is an integer, a floating point, or nil.
224The value nil stands for NaN \"Numeric_Value\".")
214 (mirrored 225 (mirrored
215 9 unidata-gen-table-symbol "uni-mirrored.el" 226 9 unidata-gen-table-symbol "uni-mirrored.el"
216 "Unicode bidi mirrored flag. 227 "Unicode bidi mirrored flag.
217Property value is a symbol `Y' or `N'. See also the property `mirroring'.") 228Property value is a symbol `Y' or `N'. See also the property `mirroring'."
229 nil
230 N)
218 (old-name 231 (old-name
219 10 unidata-gen-table-name "uni-old-name.el" 232 10 unidata-gen-table-name "uni-old-name.el"
220 "Unicode old names as published in Unicode 1.0. 233 "Unicode old names as published in Unicode 1.0.
@@ -226,23 +239,30 @@ Property value is a string.")
226 (uppercase 239 (uppercase
227 12 unidata-gen-table-character "uni-uppercase.el" 240 12 unidata-gen-table-character "uni-uppercase.el"
228 "Unicode simple uppercase mapping. 241 "Unicode simple uppercase mapping.
229Property value is a character." 242Property value is a character or nil.
243The value nil means that the actual property value of a character
244is the character itself."
230 string) 245 string)
231 (lowercase 246 (lowercase
232 13 unidata-gen-table-character "uni-lowercase.el" 247 13 unidata-gen-table-character "uni-lowercase.el"
233 "Unicode simple lowercase mapping. 248 "Unicode simple lowercase mapping.
234Property value is a character." 249Property value is a character or nil.
250The value nil means that the actual property value of a character
251is the character itself."
235 string) 252 string)
236 (titlecase 253 (titlecase
237 14 unidata-gen-table-character "uni-titlecase.el" 254 14 unidata-gen-table-character "uni-titlecase.el"
238 "Unicode simple titlecase mapping. 255 "Unicode simple titlecase mapping.
239Property value is a character." 256Property value is a character or nil.
257The value nil means that the actual property value of a character
258is the character itself."
240 string) 259 string)
241 (mirroring 260 (mirroring
242 unidata-gen-mirroring-list unidata-gen-table-character "uni-mirrored.el" 261 unidata-gen-mirroring-list unidata-gen-table-character "uni-mirrored.el"
243 "Unicode bidi-mirroring characters. 262 "Unicode bidi-mirroring characters.
244Property value is a character that has the corresponding mirroring image, 263Property value is a character that has the corresponding mirroring image or nil.
245or nil for non-mirrored character."))) 264The value nil means that the actual property value of a character
265is the character itself.")))
246 266
247;; Functions to access the above data. 267;; Functions to access the above data.
248(defsubst unidata-prop-index (prop) (nth 1 (assq prop unidata-prop-alist))) 268(defsubst unidata-prop-index (prop) (nth 1 (assq prop unidata-prop-alist)))
@@ -393,9 +413,18 @@ or nil for non-mirrored character.")))
393 (while tail 413 (while tail
394 (setcar tail (cons (car tail) val-code)) 414 (setcar tail (cons (car tail) val-code))
395 (setq tail (cdr tail) val-code (1+ val-code))) 415 (setq tail (cdr tail) val-code (1+ val-code)))
396 (setq default-value (unidata-encode-val val-list default-value)) 416 (if (consp default-value)
397 (set-char-table-range table t default-value) 417 (setq default-value (copy-sequence default-value))
398 (set-char-table-range table nil default-value) 418 (setq default-value (list default-value)))
419 (setcar default-value
420 (unidata-encode-val val-list (car default-value)))
421 (set-char-table-range table t (car default-value))
422 (set-char-table-range table nil (car default-value))
423 (dolist (elm (cdr default-value))
424 (setcar (nthcdr 2 elm)
425 (unidata-encode-val val-list (nth 2 elm)))
426 (set-char-table-range table (cons (car elm) (nth 1 elm)) (nth 2 elm)))
427
399 (setq tail unidata-list) 428 (setq tail unidata-list)
400 (while tail 429 (while tail
401 (setq elt (car tail) tail (cdr tail)) 430 (setq elt (car tail) tail (cdr tail))
@@ -419,17 +448,27 @@ or nil for non-mirrored character.")))
419 (setq prev-range-data (cons (cons from to) val-code))))) 448 (setq prev-range-data (cons (cons from to) val-code)))))
420 (let* ((start (lsh (lsh range -7) 7)) 449 (let* ((start (lsh (lsh range -7) 7))
421 (limit (+ start 127)) 450 (limit (+ start 127))
422 str count new-val) 451 str count new-val from to vcode)
423 (fillarray vec 0) 452 (fillarray vec (car default-value))
424 ;; See the comment above. 453 (dolist (elm (cdr default-value))
425 (when (and prev-range-data 454 (setq from (car elm) to (nth 1 elm))
426 (>= (cdr (car prev-range-data)) start)) 455 (when (and (<= from limit)
427 (let ((from (car (car prev-range-data))) 456 (or (>= from start) (>= to start)))
428 (to (cdr (car prev-range-data))) 457 (setq from (max from start)
429 (vcode (cdr prev-range-data))) 458 to (min to limit)
459 vcode (nth 2 elm))
430 (while (<= from to) 460 (while (<= from to)
431 (aset vec (- from start) vcode) 461 (aset vec (- from start) vcode)
432 (setq from (1+ from))))) 462 (setq from (1+ from)))))
463 ;; See the comment above.
464 (when (and prev-range-data
465 (>= (cdr (car prev-range-data)) start))
466 (setq from (car (car prev-range-data))
467 to (cdr (car prev-range-data))
468 vcode (cdr prev-range-data))
469 (while (<= from to)
470 (aset vec (- from start) vcode)
471 (setq from (1+ from))))
433 (setq prev-range-data nil) 472 (setq prev-range-data nil)
434 (if val-code 473 (if val-code
435 (aset vec (- range start) val-code)) 474 (aset vec (- range start) val-code))
@@ -669,7 +708,7 @@ or nil for non-mirrored character.")))
669 (aset table c name) 708 (aset table c name)
670 (if (= c char) 709 (if (= c char)
671 (setq val name)))) 710 (setq val name))))
672 val))) 711 (or val ""))))
673 712
674 ((and (integerp val) (> val 0)) 713 ((and (integerp val) (> val 0))
675 (let* ((symbol-table (aref (char-table-extra-slot table 4) 1)) 714 (let* ((symbol-table (aref (char-table-extra-slot table 4) 1))
@@ -695,7 +734,9 @@ or nil for non-mirrored character.")))
695 ((eq sym 'CJK\ COMPATIBILITY\ IDEOGRAPH) 734 ((eq sym 'CJK\ COMPATIBILITY\ IDEOGRAPH)
696 (format "%s-%04X" sym char)) 735 (format "%s-%04X" sym char))
697 ((eq sym 'VARIATION\ SELECTOR) 736 ((eq sym 'VARIATION\ SELECTOR)
698 (format "%s-%d" sym (+ (- char #xe0100) 17)))))))) 737 (format "%s-%d" sym (+ (- char #xe0100) 17))))))
738
739 (t "")))
699 740
700;; Store VAL as the name of CHAR in TABLE. 741;; Store VAL as the name of CHAR in TABLE.
701 742
@@ -707,6 +748,9 @@ or nil for non-mirrored character.")))
707 748
708(defun unidata-get-decomposition (char val table) 749(defun unidata-get-decomposition (char val table)
709 (cond 750 (cond
751 ((not val)
752 (list char))
753
710 ((consp val) 754 ((consp val)
711 val) 755 val)
712 756
@@ -747,7 +791,8 @@ or nil for non-mirrored character.")))
747 (aset vec idx (nconc word-list tail-list))) 791 (aset vec idx (nconc word-list tail-list)))
748 (dotimes (i 128) 792 (dotimes (i 128)
749 (aset table (+ first-char i) (aref vec i))) 793 (aset table (+ first-char i) (aref vec i)))
750 (aref vec (- char first-char))))) 794 (setq val (aref vec (- char first-char)))
795 (or val (list char)))))
751 796
752 ;; Hangul syllable 797 ;; Hangul syllable
753 ((and (eq val 0) (>= char #xAC00) (<= char #xD7A3)) 798 ((and (eq val 0) (>= char #xAC00) (<= char #xD7A3))