diff options
| author | Paul Eggert | 2011-08-24 14:20:36 -0700 |
|---|---|---|
| committer | Paul Eggert | 2011-08-24 14:20:36 -0700 |
| commit | 011ba6eaacfa50cc9871d0cfea34e8f0a7a5bc43 (patch) | |
| tree | ced7a98ff1eb289559da6ebfda46a8e436640da6 /admin | |
| parent | fe4496a6e27ac892283b8568adbd12831868cc54 (diff) | |
| parent | f22f4808a08e8f985d5e6175bbd13d5260e1ab1a (diff) | |
| download | emacs-011ba6eaacfa50cc9871d0cfea34e8f0a7a5bc43.tar.gz emacs-011ba6eaacfa50cc9871d0cfea34e8f0a7a5bc43.zip | |
Merge from trunk.
Diffstat (limited to 'admin')
| -rw-r--r-- | admin/ChangeLog | 20 | ||||
| -rw-r--r-- | admin/unidata/unidata-gen.el | 107 |
2 files changed, 98 insertions, 29 deletions
diff --git a/admin/ChangeLog b/admin/ChangeLog index bc38edfc8d4..17cbcbb3bdf 100644 --- a/admin/ChangeLog +++ b/admin/ChangeLog | |||
| @@ -1,3 +1,23 @@ | |||
| 1 | 2011-08-23 Eli Zaretskii <eliz@gnu.org> | ||
| 2 | |||
| 3 | * unidata/unidata-gen.el (unidata-prop-alist): Update the default | ||
| 4 | values of bidi-class according to DerivedBidiClass.txt from the | ||
| 5 | latest UCD. | ||
| 6 | |||
| 7 | 2011-08-23 Kenichi Handa <handa@m17n.org> | ||
| 8 | |||
| 9 | * unidata/unidata-gen.el (unidata-prop-alist): Provide default | ||
| 10 | values for name, general-category, canonical-combining-class, | ||
| 11 | mirrored, and bidi-class. Describe the meaning of value nil for | ||
| 12 | decimal-digit-value, digit-value, numeric-value, uppercase, | ||
| 13 | lowercase, titlecase, and mirroring. | ||
| 14 | (unidata-gen-table): Handle the case that default-value is a | ||
| 15 | list. Set default values of characters not listed in a table. | ||
| 16 | (unidata-get-name): Return an empty string if a value in a | ||
| 17 | char-table is nil. | ||
| 18 | (unidata-get-decomposition): Return a list of character itself if | ||
| 19 | a value in a char-table is nil. | ||
| 20 | |||
| 1 | 2011-08-15 Eli Zaretskii <eliz@gnu.org> | 21 | 2011-08-15 Eli Zaretskii <eliz@gnu.org> |
| 2 | 22 | ||
| 3 | * unidata/bidimirror.awk: File removed. | 23 | * unidata/bidimirror.awk: File removed. |
diff --git a/admin/unidata/unidata-gen.el b/admin/unidata/unidata-gen.el index ab1dcd134ac..1002bb003af 100644 --- a/admin/unidata/unidata-gen.el +++ b/admin/unidata/unidata-gen.el | |||
| @@ -146,7 +146,7 @@ | |||
| 146 | (setq unidata-list (cdr table)))) | 146 | (setq unidata-list (cdr table)))) |
| 147 | 147 | ||
| 148 | ;; Alist of this form: | 148 | ;; Alist of this form: |
| 149 | ;; (PROP INDEX GENERATOR FILENAME DOCSTRING DESCRIBER VAL-LIST) | 149 | ;; (PROP INDEX GENERATOR FILENAME DOCSTRING DESCRIBER DEFAULT VAL-LIST) |
| 150 | ;; PROP: character property | 150 | ;; PROP: character property |
| 151 | ;; INDEX: index to each element of unidata-list for PROP. | 151 | ;; INDEX: index to each element of unidata-list for PROP. |
| 152 | ;; It may be a function that generates an alist of character codes | 152 | ;; It may be a function that generates an alist of character codes |
| @@ -155,14 +155,20 @@ | |||
| 155 | ;; FILENAME: filename to store the char-table | 155 | ;; FILENAME: filename to store the char-table |
| 156 | ;; DOCSTRING: docstring for the property | 156 | ;; DOCSTRING: docstring for the property |
| 157 | ;; DESCRIBER: function to call to get a description string of property value | 157 | ;; DESCRIBER: function to call to get a description string of property value |
| 158 | ;; DEFAULT: the default value of the property | 158 | ;; DEFAULT: the default value of the property. It may have the form |
| 159 | ;; (VAL0 (FROM1 TO1 VAL1) ...) which indicates that the default | ||
| 160 | ;; value is VAL0 except for characters in the ranges specified by | ||
| 161 | ;; FROMn and TOn (incusive). The default value of characters | ||
| 162 | ;; between FROMn and TOn is VALn. | ||
| 159 | ;; VAL-LIST: list of specially ordered property values | 163 | ;; VAL-LIST: list of specially ordered property values |
| 160 | 164 | ||
| 161 | (defconst unidata-prop-alist | 165 | (defconst unidata-prop-alist |
| 162 | '((name | 166 | '((name |
| 163 | 1 unidata-gen-table-name "uni-name.el" | 167 | 1 unidata-gen-table-name "uni-name.el" |
| 164 | "Unicode character name. | 168 | "Unicode character name. |
| 165 | Property value is a string.") | 169 | Property value is a string." |
| 170 | nil | ||
| 171 | "") | ||
| 166 | (general-category | 172 | (general-category |
| 167 | 2 unidata-gen-table-symbol "uni-category.el" | 173 | 2 unidata-gen-table-symbol "uni-category.el" |
| 168 | "Unicode general category. | 174 | "Unicode general category. |
| @@ -170,7 +176,7 @@ Property value is one of the following symbols: | |||
| 170 | Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc, Pd, Ps, Pe, Pi, Pf, Po, | 176 | Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc, Pd, Ps, Pe, Pi, Pf, Po, |
| 171 | Sm, Sc, Sk, So, Zs, Zl, Zp, Cc, Cf, Cs, Co, Cn" | 177 | Sm, Sc, Sk, So, Zs, Zl, Zp, Cc, Cf, Cs, Co, Cn" |
| 172 | unidata-describe-general-category | 178 | unidata-describe-general-category |
| 173 | nil | 179 | Cn |
| 174 | ;; The order of elements must be in sync with unicode_category_t | 180 | ;; The order of elements must be in sync with unicode_category_t |
| 175 | ;; in src/character.h. | 181 | ;; in src/character.h. |
| 176 | (Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Pd Ps Pe Pi Pf Po | 182 | (Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Pd Ps Pe Pi Pf Po |
| @@ -179,7 +185,8 @@ Property value is one of the following symbols: | |||
| 179 | 3 unidata-gen-table-integer "uni-combining.el" | 185 | 3 unidata-gen-table-integer "uni-combining.el" |
| 180 | "Unicode canonical combining class. | 186 | "Unicode canonical combining class. |
| 181 | Property value is an integer." | 187 | Property value is an integer." |
| 182 | unidata-describe-canonical-combining-class) | 188 | unidata-describe-canonical-combining-class |
| 189 | 0) | ||
| 183 | (bidi-class | 190 | (bidi-class |
| 184 | 4 unidata-gen-table-symbol "uni-bidi.el" | 191 | 4 unidata-gen-table-symbol "uni-bidi.el" |
| 185 | "Unicode bidi class. | 192 | "Unicode bidi class. |
| @@ -187,7 +194,12 @@ Property value is one of the following symbols: | |||
| 187 | L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET, | 194 | L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET, |
| 188 | AN, CS, NSM, BN, B, S, WS, ON" | 195 | AN, CS, NSM, BN, B, S, WS, ON" |
| 189 | unidata-describe-bidi-class | 196 | unidata-describe-bidi-class |
| 190 | L | 197 | ;; The assignment of default values to blocks of code points |
| 198 | ;; follows the file DerivedBidiClass.txt from the Unicode | ||
| 199 | ;; Character Database (UCD). | ||
| 200 | (L (#x0600 #x06FF AL) (#xFB50 #xFDFF AL) (#xFE70 #xFEFF AL) | ||
| 201 | (#x0590 #x05FF R) (#x07C0 #x08FF R) | ||
| 202 | (#xFB1D #xFB4F R) (#x10800 #x10FFF R) (#x1E800 #x1EFFF R)) | ||
| 191 | ;; The order of elements must be in sync with bidi_type_t in | 203 | ;; The order of elements must be in sync with bidi_type_t in |
| 192 | ;; src/dispextern.h. | 204 | ;; src/dispextern.h. |
| 193 | (L R EN AN BN B AL LRE LRO RLE RLO PDF ES ET CS NSM S WS ON)) | 205 | (L R EN AN BN B AL LRE LRO RLE RLO PDF ES ET CS NSM S WS ON)) |
| @@ -202,19 +214,24 @@ one of these symbols representing compatibility formatting tag: | |||
| 202 | (decimal-digit-value | 214 | (decimal-digit-value |
| 203 | 6 unidata-gen-table-integer "uni-decimal.el" | 215 | 6 unidata-gen-table-integer "uni-decimal.el" |
| 204 | "Unicode numeric value (decimal digit). | 216 | "Unicode numeric value (decimal digit). |
| 205 | Property value is an integer.") | 217 | Property value is an integer 0..9, or nil. |
| 218 | The value nil stands for NaN \"Numeric_Value\".") | ||
| 206 | (digit-value | 219 | (digit-value |
| 207 | 7 unidata-gen-table-integer "uni-digit.el" | 220 | 7 unidata-gen-table-integer "uni-digit.el" |
| 208 | "Unicode numeric value (digit). | 221 | "Unicode numeric value (digit). |
| 209 | Property value is an integer.") | 222 | Property value is an integer 0..9, or nil. |
| 223 | The value nil stands for NaN \"Numeric_Value\".") | ||
| 210 | (numeric-value | 224 | (numeric-value |
| 211 | 8 unidata-gen-table-numeric "uni-numeric.el" | 225 | 8 unidata-gen-table-numeric "uni-numeric.el" |
| 212 | "Unicode numeric value (numeric). | 226 | "Unicode numeric value (numeric). |
| 213 | Property value is an integer or a floating point.") | 227 | Property value is an integer, a floating point, or nil. |
| 228 | The value nil stands for NaN \"Numeric_Value\".") | ||
| 214 | (mirrored | 229 | (mirrored |
| 215 | 9 unidata-gen-table-symbol "uni-mirrored.el" | 230 | 9 unidata-gen-table-symbol "uni-mirrored.el" |
| 216 | "Unicode bidi mirrored flag. | 231 | "Unicode bidi mirrored flag. |
| 217 | Property value is a symbol `Y' or `N'. See also the property `mirroring'.") | 232 | Property value is a symbol `Y' or `N'. See also the property `mirroring'." |
| 233 | nil | ||
| 234 | N) | ||
| 218 | (old-name | 235 | (old-name |
| 219 | 10 unidata-gen-table-name "uni-old-name.el" | 236 | 10 unidata-gen-table-name "uni-old-name.el" |
| 220 | "Unicode old names as published in Unicode 1.0. | 237 | "Unicode old names as published in Unicode 1.0. |
| @@ -226,23 +243,30 @@ Property value is a string.") | |||
| 226 | (uppercase | 243 | (uppercase |
| 227 | 12 unidata-gen-table-character "uni-uppercase.el" | 244 | 12 unidata-gen-table-character "uni-uppercase.el" |
| 228 | "Unicode simple uppercase mapping. | 245 | "Unicode simple uppercase mapping. |
| 229 | Property value is a character." | 246 | Property value is a character or nil. |
| 247 | The value nil means that the actual property value of a character | ||
| 248 | is the character itself." | ||
| 230 | string) | 249 | string) |
| 231 | (lowercase | 250 | (lowercase |
| 232 | 13 unidata-gen-table-character "uni-lowercase.el" | 251 | 13 unidata-gen-table-character "uni-lowercase.el" |
| 233 | "Unicode simple lowercase mapping. | 252 | "Unicode simple lowercase mapping. |
| 234 | Property value is a character." | 253 | Property value is a character or nil. |
| 254 | The value nil means that the actual property value of a character | ||
| 255 | is the character itself." | ||
| 235 | string) | 256 | string) |
| 236 | (titlecase | 257 | (titlecase |
| 237 | 14 unidata-gen-table-character "uni-titlecase.el" | 258 | 14 unidata-gen-table-character "uni-titlecase.el" |
| 238 | "Unicode simple titlecase mapping. | 259 | "Unicode simple titlecase mapping. |
| 239 | Property value is a character." | 260 | Property value is a character or nil. |
| 261 | The value nil means that the actual property value of a character | ||
| 262 | is the character itself." | ||
| 240 | string) | 263 | string) |
| 241 | (mirroring | 264 | (mirroring |
| 242 | unidata-gen-mirroring-list unidata-gen-table-character "uni-mirrored.el" | 265 | unidata-gen-mirroring-list unidata-gen-table-character "uni-mirrored.el" |
| 243 | "Unicode bidi-mirroring characters. | 266 | "Unicode bidi-mirroring characters. |
| 244 | Property value is a character that has the corresponding mirroring image, | 267 | Property value is a character that has the corresponding mirroring image or nil. |
| 245 | or nil for non-mirrored character."))) | 268 | The value nil means that the actual property value of a character |
| 269 | is the character itself."))) | ||
| 246 | 270 | ||
| 247 | ;; Functions to access the above data. | 271 | ;; Functions to access the above data. |
| 248 | (defsubst unidata-prop-index (prop) (nth 1 (assq prop unidata-prop-alist))) | 272 | (defsubst unidata-prop-index (prop) (nth 1 (assq prop unidata-prop-alist))) |
| @@ -393,9 +417,18 @@ or nil for non-mirrored character."))) | |||
| 393 | (while tail | 417 | (while tail |
| 394 | (setcar tail (cons (car tail) val-code)) | 418 | (setcar tail (cons (car tail) val-code)) |
| 395 | (setq tail (cdr tail) val-code (1+ val-code))) | 419 | (setq tail (cdr tail) val-code (1+ val-code))) |
| 396 | (setq default-value (unidata-encode-val val-list default-value)) | 420 | (if (consp default-value) |
| 397 | (set-char-table-range table t default-value) | 421 | (setq default-value (copy-sequence default-value)) |
| 398 | (set-char-table-range table nil default-value) | 422 | (setq default-value (list default-value))) |
| 423 | (setcar default-value | ||
| 424 | (unidata-encode-val val-list (car default-value))) | ||
| 425 | (set-char-table-range table t (car default-value)) | ||
| 426 | (set-char-table-range table nil (car default-value)) | ||
| 427 | (dolist (elm (cdr default-value)) | ||
| 428 | (setcar (nthcdr 2 elm) | ||
| 429 | (unidata-encode-val val-list (nth 2 elm))) | ||
| 430 | (set-char-table-range table (cons (car elm) (nth 1 elm)) (nth 2 elm))) | ||
| 431 | |||
| 399 | (setq tail unidata-list) | 432 | (setq tail unidata-list) |
| 400 | (while tail | 433 | (while tail |
| 401 | (setq elt (car tail) tail (cdr tail)) | 434 | (setq elt (car tail) tail (cdr tail)) |
| @@ -419,17 +452,27 @@ or nil for non-mirrored character."))) | |||
| 419 | (setq prev-range-data (cons (cons from to) val-code))))) | 452 | (setq prev-range-data (cons (cons from to) val-code))))) |
| 420 | (let* ((start (lsh (lsh range -7) 7)) | 453 | (let* ((start (lsh (lsh range -7) 7)) |
| 421 | (limit (+ start 127)) | 454 | (limit (+ start 127)) |
| 422 | str count new-val) | 455 | str count new-val from to vcode) |
| 423 | (fillarray vec 0) | 456 | (fillarray vec (car default-value)) |
| 424 | ;; See the comment above. | 457 | (dolist (elm (cdr default-value)) |
| 425 | (when (and prev-range-data | 458 | (setq from (car elm) to (nth 1 elm)) |
| 426 | (>= (cdr (car prev-range-data)) start)) | 459 | (when (and (<= from limit) |
| 427 | (let ((from (car (car prev-range-data))) | 460 | (or (>= from start) (>= to start))) |
| 428 | (to (cdr (car prev-range-data))) | 461 | (setq from (max from start) |
| 429 | (vcode (cdr prev-range-data))) | 462 | to (min to limit) |
| 463 | vcode (nth 2 elm)) | ||
| 430 | (while (<= from to) | 464 | (while (<= from to) |
| 431 | (aset vec (- from start) vcode) | 465 | (aset vec (- from start) vcode) |
| 432 | (setq from (1+ from))))) | 466 | (setq from (1+ from))))) |
| 467 | ;; See the comment above. | ||
| 468 | (when (and prev-range-data | ||
| 469 | (>= (cdr (car prev-range-data)) start)) | ||
| 470 | (setq from (car (car prev-range-data)) | ||
| 471 | to (cdr (car prev-range-data)) | ||
| 472 | vcode (cdr prev-range-data)) | ||
| 473 | (while (<= from to) | ||
| 474 | (aset vec (- from start) vcode) | ||
| 475 | (setq from (1+ from)))) | ||
| 433 | (setq prev-range-data nil) | 476 | (setq prev-range-data nil) |
| 434 | (if val-code | 477 | (if val-code |
| 435 | (aset vec (- range start) val-code)) | 478 | (aset vec (- range start) val-code)) |
| @@ -669,7 +712,7 @@ or nil for non-mirrored character."))) | |||
| 669 | (aset table c name) | 712 | (aset table c name) |
| 670 | (if (= c char) | 713 | (if (= c char) |
| 671 | (setq val name)))) | 714 | (setq val name)))) |
| 672 | val))) | 715 | (or val "")))) |
| 673 | 716 | ||
| 674 | ((and (integerp val) (> val 0)) | 717 | ((and (integerp val) (> val 0)) |
| 675 | (let* ((symbol-table (aref (char-table-extra-slot table 4) 1)) | 718 | (let* ((symbol-table (aref (char-table-extra-slot table 4) 1)) |
| @@ -695,7 +738,9 @@ or nil for non-mirrored character."))) | |||
| 695 | ((eq sym 'CJK\ COMPATIBILITY\ IDEOGRAPH) | 738 | ((eq sym 'CJK\ COMPATIBILITY\ IDEOGRAPH) |
| 696 | (format "%s-%04X" sym char)) | 739 | (format "%s-%04X" sym char)) |
| 697 | ((eq sym 'VARIATION\ SELECTOR) | 740 | ((eq sym 'VARIATION\ SELECTOR) |
| 698 | (format "%s-%d" sym (+ (- char #xe0100) 17)))))))) | 741 | (format "%s-%d" sym (+ (- char #xe0100) 17)))))) |
| 742 | |||
| 743 | (t ""))) | ||
| 699 | 744 | ||
| 700 | ;; Store VAL as the name of CHAR in TABLE. | 745 | ;; Store VAL as the name of CHAR in TABLE. |
| 701 | 746 | ||
| @@ -707,6 +752,9 @@ or nil for non-mirrored character."))) | |||
| 707 | 752 | ||
| 708 | (defun unidata-get-decomposition (char val table) | 753 | (defun unidata-get-decomposition (char val table) |
| 709 | (cond | 754 | (cond |
| 755 | ((not val) | ||
| 756 | (list char)) | ||
| 757 | |||
| 710 | ((consp val) | 758 | ((consp val) |
| 711 | val) | 759 | val) |
| 712 | 760 | ||
| @@ -747,7 +795,8 @@ or nil for non-mirrored character."))) | |||
| 747 | (aset vec idx (nconc word-list tail-list))) | 795 | (aset vec idx (nconc word-list tail-list))) |
| 748 | (dotimes (i 128) | 796 | (dotimes (i 128) |
| 749 | (aset table (+ first-char i) (aref vec i))) | 797 | (aset table (+ first-char i) (aref vec i))) |
| 750 | (aref vec (- char first-char))))) | 798 | (setq val (aref vec (- char first-char))) |
| 799 | (or val (list char))))) | ||
| 751 | 800 | ||
| 752 | ;; Hangul syllable | 801 | ;; Hangul syllable |
| 753 | ((and (eq val 0) (>= char #xAC00) (<= char #xD7A3)) | 802 | ((and (eq val 0) (>= char #xAC00) (<= char #xD7A3)) |