diff options
| author | Kenichi Handa | 2011-07-07 07:43:48 +0900 |
|---|---|---|
| committer | Kenichi Handa | 2011-07-07 07:43:48 +0900 |
| commit | c805dec0b5fa81b5c9f2b724e2ec12a17d723aca (patch) | |
| tree | c29a8490c976fdf4dbf64ef1b13a57f7d1110cc1 /admin | |
| parent | 5c62d133468c354b47a1643092add8292e084765 (diff) | |
| download | emacs-c805dec0b5fa81b5c9f2b724e2ec12a17d723aca.tar.gz emacs-c805dec0b5fa81b5c9f2b724e2ec12a17d723aca.zip | |
Add C interface for Unicode character property table.
Diffstat (limited to 'admin')
| -rw-r--r-- | admin/ChangeLog | 35 | ||||
| -rw-r--r-- | admin/unidata/Makefile.in | 5 | ||||
| -rw-r--r-- | admin/unidata/unidata-gen.el | 437 |
3 files changed, 223 insertions, 254 deletions
diff --git a/admin/ChangeLog b/admin/ChangeLog index 7aaeb1d5ee2..3632a0992a6 100644 --- a/admin/ChangeLog +++ b/admin/ChangeLog | |||
| @@ -1,3 +1,38 @@ | |||
| 1 | 2011-07-06 Kenichi Handa <handa@m17n.org> | ||
| 2 | |||
| 3 | * unidata/unidata-gen.el (unidata-dir): New variable. | ||
| 4 | (unidata-setup-list): Expand unidata-text-file in unidata-dir. | ||
| 5 | (unidata-prop-alist): INDEX element may be a function. New | ||
| 6 | optional element VAL-LIST (for general-category and bidi-class). | ||
| 7 | New entry `mirroring'. | ||
| 8 | (unidata-prop-default, unidata-prop-val-list): New subst. | ||
| 9 | (unidata-get-character, unidata-put-character): Delete them. | ||
| 10 | (unidata-gen-table-character): New arg IGNORE. Adjusted for the | ||
| 11 | above changes. | ||
| 12 | (unidata-get-symbol, unidata-get-integer, unidata-get-numeric) | ||
| 13 | (unidata-put-symbol, unidata-put-integer, unidata-put-numeric): | ||
| 14 | Delete them. | ||
| 15 | (unidata-encode-val): Assume that the first element of VAL-LIST is | ||
| 16 | a cons (nil . 0). | ||
| 17 | (unidata-gen-table): Change argument DEFAULT-VALUE to VAL-LIST. | ||
| 18 | Always store the encoded value. | ||
| 19 | (unidata-gen-table-symbol): New args DEFAULT-VALUE and VAL-LIST. | ||
| 20 | Set the 1st and the 2nd extra slots to index numbers for C | ||
| 21 | functions. | ||
| 22 | (unidata-gen-table-integer): Likewise. | ||
| 23 | (unidata-gen-table-numeric): Likewise. | ||
| 24 | (unidata-gen-table-name): New arg IGNORE. | ||
| 25 | (unidata-gen-table-decomposition): Likewise. | ||
| 26 | (unidata-describe-general-category): Add the case nil to the | ||
| 27 | description alist. | ||
| 28 | (unidata-gen-mirroring-list): New funciton. | ||
| 29 | (unidata-gen-files): New arg DATA-DIR. Adjusted for the change of | ||
| 30 | unidata-prop-alist. Handle the case of storing multiple | ||
| 31 | char-tables in a file. | ||
| 32 | |||
| 33 | * unidata/Makefile.in (${DSTDIR}/charprop.el): New arg to | ||
| 34 | unidata-gen-files. | ||
| 35 | |||
| 1 | 2011-05-21 Glenn Morris <rgm@gnu.org> | 36 | 2011-05-21 Glenn Morris <rgm@gnu.org> |
| 2 | 37 | ||
| 3 | * bzrmerge.el (bzrmerge-resolve): Suppress prompts about file-locals. | 38 | * bzrmerge.el (bzrmerge-resolve): Suppress prompts about file-locals. |
diff --git a/admin/unidata/Makefile.in b/admin/unidata/Makefile.in index 04f2f1d4380..e1fe247631f 100644 --- a/admin/unidata/Makefile.in +++ b/admin/unidata/Makefile.in | |||
| @@ -33,9 +33,10 @@ unidata.txt: UnicodeData.txt | |||
| 33 | 33 | ||
| 34 | ${DSTDIR}/charprop.el: unidata-gen.elc unidata.txt | 34 | ${DSTDIR}/charprop.el: unidata-gen.elc unidata.txt |
| 35 | ELC=`/bin/pwd`/unidata-gen.elc; \ | 35 | ELC=`/bin/pwd`/unidata-gen.elc; \ |
| 36 | DATA=`/bin/pwd`/unidata.txt; \ | 36 | DATADIR=`/bin/pwd`; \ |
| 37 | DATA=unidata.txt; \ | ||
| 37 | cd ${DSTDIR}; \ | 38 | cd ${DSTDIR}; \ |
| 38 | ${RUNEMACS} -batch --load $${ELC} -f unidata-gen-files $${DATA} | 39 | ${RUNEMACS} -batch --load $${ELC} -f unidata-gen-files $${DATADIR} $${DATA} |
| 39 | 40 | ||
| 40 | ../../src/biditype.h: UnicodeData.txt | 41 | ../../src/biditype.h: UnicodeData.txt |
| 41 | gawk -F";" -f biditype.awk $< > $@ | 42 | gawk -F";" -f biditype.awk $< > $@ |
diff --git a/admin/unidata/unidata-gen.el b/admin/unidata/unidata-gen.el index 9f898668526..ab1dcd134ac 100644 --- a/admin/unidata/unidata-gen.el +++ b/admin/unidata/unidata-gen.el | |||
| @@ -33,24 +33,25 @@ | |||
| 33 | ;; | 33 | ;; |
| 34 | ;; charprop.el | 34 | ;; charprop.el |
| 35 | ;; It contains a series of forms of this format: | 35 | ;; It contains a series of forms of this format: |
| 36 | ;; (char-code-property-register PROP FILE) | 36 | ;; (define-char-code-property PROP FILE) |
| 37 | ;; where PROP is a symbol representing a character property | 37 | ;; where PROP is a symbol representing a character property |
| 38 | ;; (name, generic-category, etc), and FILE is a name of one of | 38 | ;; (name, general-category, etc), and FILE is a name of one of |
| 39 | ;; the following files. | 39 | ;; the following files. |
| 40 | ;; | 40 | ;; |
| 41 | ;; uni-name.el, uni-category.el, uni-combining.el, uni-bidi.el, | 41 | ;; uni-name.el, uni-category.el, uni-combining.el, uni-bidi.el, |
| 42 | ;; uni-decomposition.el, uni-decimal.el, uni-digit.el, uni-numeric.el, | 42 | ;; uni-decomposition.el, uni-decimal.el, uni-digit.el, uni-numeric.el, |
| 43 | ;; uni-mirrored.el, uni-old-name.el, uni-comment.el, uni-uppercase.el, | 43 | ;; uni-mirrored.el, uni-old-name.el, uni-comment.el, uni-uppercase.el, |
| 44 | ;; uni-lowercase.el, uni-titlecase.el | 44 | ;; uni-lowercase.el, uni-titlecase.el |
| 45 | ;; They each contain a single form of this format: | 45 | ;; They contain one or more forms of this format: |
| 46 | ;; (char-code-property-register PROP CHAR-TABLE) | 46 | ;; (define-char-code-property PROP CHAR-TABLE) |
| 47 | ;; where PROP is the same as above, and CHAR-TABLE is a | 47 | ;; where PROP is the same as above, and CHAR-TABLE is a |
| 48 | ;; char-table containing property values in a compressed format. | 48 | ;; char-table containing property values in a compressed format. |
| 49 | ;; | 49 | ;; |
| 50 | ;; When they are installed in .../lisp/international/, the file | 50 | ;; When they are installed in .../lisp/international/, the file |
| 51 | ;; "charprop.el" is preloaded in loadup.el. The other files are | 51 | ;; "charprop.el" is preloaded in loadup.el. The other files are |
| 52 | ;; automatically loaded when the functions `get-char-code-property' | 52 | ;; automatically loaded when the Lisp functions |
| 53 | ;; and `put-char-code-property' are called. | 53 | ;; `get-char-code-property' and `put-char-code-property', and C |
| 54 | ;; function uniprop_table are called. | ||
| 54 | ;; | 55 | ;; |
| 55 | ;; FORMAT OF A CHAR TABLE | 56 | ;; FORMAT OF A CHAR TABLE |
| 56 | ;; | 57 | ;; |
| @@ -62,17 +63,22 @@ | |||
| 62 | ;; data in a char-table as below. | 63 | ;; data in a char-table as below. |
| 63 | ;; | 64 | ;; |
| 64 | ;; If succeeding 128*N characters have the same property value, we | 65 | ;; If succeeding 128*N characters have the same property value, we |
| 65 | ;; store that value for them. Otherwise, compress values for | 66 | ;; store that value (or the encoded one) for them. Otherwise, |
| 66 | ;; succeeding 128 characters into a single string and store it as a | 67 | ;; compress values (or the encoded ones) for succeeding 128 |
| 67 | ;; value for those characters. The way of compression depends on a | 68 | ;; characters into a single string and store it for those |
| 68 | ;; property. See the section "SIMPLE TABLE", "RUN-LENGTH TABLE", | 69 | ;; characters. The way of compression depends on a property. See |
| 69 | ;; and "WORD-LIST TABLE". | 70 | ;; the section "SIMPLE TABLE", "RUN-LENGTH TABLE", and "WORD-LIST |
| 70 | 71 | ;; TABLE". | |
| 71 | ;; The char table has four extra slots: | 72 | |
| 73 | ;; The char table has five extra slots: | ||
| 72 | ;; 1st: property symbol | 74 | ;; 1st: property symbol |
| 73 | ;; 2nd: function to call to get a property value | 75 | ;; 2nd: function to call to get a property value, |
| 74 | ;; 3nd: function to call to put a property value | 76 | ;; or an index number of C function to decode the value, |
| 75 | ;; 4th: function to call to get a description of a property value | 77 | ;; or nil if the value can be directly got from the table. |
| 78 | ;; 3nd: function to call to put a property value, | ||
| 79 | ;; or an index number of C function to encode the value, | ||
| 80 | ;; or nil if the value can be directly stored in the table. | ||
| 81 | ;; 4th: function to call to get a description of a property value, or nil | ||
| 76 | ;; 5th: data referred by the above functions | 82 | ;; 5th: data referred by the above functions |
| 77 | 83 | ||
| 78 | ;; List of elements of this form: | 84 | ;; List of elements of this form: |
| @@ -82,6 +88,11 @@ | |||
| 82 | 88 | ||
| 83 | (defvar unidata-list nil) | 89 | (defvar unidata-list nil) |
| 84 | 90 | ||
| 91 | ;; Name of the directory containing files of Unicode Character | ||
| 92 | ;; Database. | ||
| 93 | |||
| 94 | (defvar unidata-dir nil) | ||
| 95 | |||
| 85 | (defun unidata-setup-list (unidata-text-file) | 96 | (defun unidata-setup-list (unidata-text-file) |
| 86 | (let* ((table (list nil)) | 97 | (let* ((table (list nil)) |
| 87 | (tail table) | 98 | (tail table) |
| @@ -90,6 +101,7 @@ | |||
| 90 | ("^<.*Surrogate" . nil) | 101 | ("^<.*Surrogate" . nil) |
| 91 | ("^<.*Private Use" . PRIVATE\ USE))) | 102 | ("^<.*Private Use" . PRIVATE\ USE))) |
| 92 | val char name) | 103 | val char name) |
| 104 | (setq unidata-text-file (expand-file-name unidata-text-file unidata-dir)) | ||
| 93 | (or (file-readable-p unidata-text-file) | 105 | (or (file-readable-p unidata-text-file) |
| 94 | (error "File not readable: %s" unidata-text-file)) | 106 | (error "File not readable: %s" unidata-text-file)) |
| 95 | (with-temp-buffer | 107 | (with-temp-buffer |
| @@ -134,12 +146,17 @@ | |||
| 134 | (setq unidata-list (cdr table)))) | 146 | (setq unidata-list (cdr table)))) |
| 135 | 147 | ||
| 136 | ;; Alist of this form: | 148 | ;; Alist of this form: |
| 137 | ;; (PROP INDEX GENERATOR FILENAME) | 149 | ;; (PROP INDEX GENERATOR FILENAME DOCSTRING DESCRIBER VAL-LIST) |
| 138 | ;; PROP: character property | 150 | ;; PROP: character property |
| 139 | ;; INDEX: index to each element of unidata-list for PROP | 151 | ;; INDEX: index to each element of unidata-list for PROP. |
| 152 | ;; It may be a function that generates an alist of character codes | ||
| 153 | ;; vs. the corresponding property values. | ||
| 140 | ;; GENERATOR: function to generate a char-table | 154 | ;; GENERATOR: function to generate a char-table |
| 141 | ;; FILENAME: filename to store the char-table | 155 | ;; FILENAME: filename to store the char-table |
| 156 | ;; DOCSTRING: docstring for the property | ||
| 142 | ;; DESCRIBER: function to call to get a description string of property value | 157 | ;; DESCRIBER: function to call to get a description string of property value |
| 158 | ;; DEFAULT: the default value of the property | ||
| 159 | ;; VAL-LIST: list of specially ordered property values | ||
| 143 | 160 | ||
| 144 | (defconst unidata-prop-alist | 161 | (defconst unidata-prop-alist |
| 145 | '((name | 162 | '((name |
| @@ -152,7 +169,12 @@ Property value is a string.") | |||
| 152 | Property value is one of the following symbols: | 169 | Property value is one of the following symbols: |
| 153 | Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc, Pd, Ps, Pe, Pi, Pf, Po, | 170 | Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc, Pd, Ps, Pe, Pi, Pf, Po, |
| 154 | Sm, Sc, Sk, So, Zs, Zl, Zp, Cc, Cf, Cs, Co, Cn" | 171 | Sm, Sc, Sk, So, Zs, Zl, Zp, Cc, Cf, Cs, Co, Cn" |
| 155 | unidata-describe-general-category) | 172 | unidata-describe-general-category |
| 173 | nil | ||
| 174 | ;; The order of elements must be in sync with unicode_category_t | ||
| 175 | ;; in src/character.h. | ||
| 176 | (Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Pd Ps Pe Pi Pf Po | ||
| 177 | Sm Sc Sk So Zs Zl Zp Cc Cf Cs Co Cn)) | ||
| 156 | (canonical-combining-class | 178 | (canonical-combining-class |
| 157 | 3 unidata-gen-table-integer "uni-combining.el" | 179 | 3 unidata-gen-table-integer "uni-combining.el" |
| 158 | "Unicode canonical combining class. | 180 | "Unicode canonical combining class. |
| @@ -164,7 +186,11 @@ Property value is an integer." | |||
| 164 | Property value is one of the following symbols: | 186 | Property value is one of the following symbols: |
| 165 | L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET, | 187 | L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET, |
| 166 | AN, CS, NSM, BN, B, S, WS, ON" | 188 | AN, CS, NSM, BN, B, S, WS, ON" |
| 167 | unidata-describe-bidi-class) | 189 | unidata-describe-bidi-class |
| 190 | L | ||
| 191 | ;; The order of elements must be in sync with bidi_type_t in | ||
| 192 | ;; src/dispextern.h. | ||
| 193 | (L R EN AN BN B AL LRE LRO RLE RLO PDF ES ET CS NSM S WS ON)) | ||
| 168 | (decomposition | 194 | (decomposition |
| 169 | 5 unidata-gen-table-decomposition "uni-decomposition.el" | 195 | 5 unidata-gen-table-decomposition "uni-decomposition.el" |
| 170 | "Unicode decomposition mapping. | 196 | "Unicode decomposition mapping. |
| @@ -188,7 +214,7 @@ Property value is an integer or a floating point.") | |||
| 188 | (mirrored | 214 | (mirrored |
| 189 | 9 unidata-gen-table-symbol "uni-mirrored.el" | 215 | 9 unidata-gen-table-symbol "uni-mirrored.el" |
| 190 | "Unicode bidi mirrored flag. | 216 | "Unicode bidi mirrored flag. |
| 191 | Property value is a symbol `Y' or `N'.") | 217 | Property value is a symbol `Y' or `N'. See also the property `mirroring'.") |
| 192 | (old-name | 218 | (old-name |
| 193 | 10 unidata-gen-table-name "uni-old-name.el" | 219 | 10 unidata-gen-table-name "uni-old-name.el" |
| 194 | "Unicode old names as published in Unicode 1.0. | 220 | "Unicode old names as published in Unicode 1.0. |
| @@ -211,7 +237,12 @@ Property value is a character." | |||
| 211 | 14 unidata-gen-table-character "uni-titlecase.el" | 237 | 14 unidata-gen-table-character "uni-titlecase.el" |
| 212 | "Unicode simple titlecase mapping. | 238 | "Unicode simple titlecase mapping. |
| 213 | Property value is a character." | 239 | Property value is a character." |
| 214 | string))) | 240 | string) |
| 241 | (mirroring | ||
| 242 | unidata-gen-mirroring-list unidata-gen-table-character "uni-mirrored.el" | ||
| 243 | "Unicode bidi-mirroring characters. | ||
| 244 | Property value is a character that has the corresponding mirroring image, | ||
| 245 | or nil for non-mirrored character."))) | ||
| 215 | 246 | ||
| 216 | ;; Functions to access the above data. | 247 | ;; Functions to access the above data. |
| 217 | (defsubst unidata-prop-index (prop) (nth 1 (assq prop unidata-prop-alist))) | 248 | (defsubst unidata-prop-index (prop) (nth 1 (assq prop unidata-prop-alist))) |
| @@ -219,6 +250,8 @@ Property value is a character." | |||
| 219 | (defsubst unidata-prop-file (prop) (nth 3 (assq prop unidata-prop-alist))) | 250 | (defsubst unidata-prop-file (prop) (nth 3 (assq prop unidata-prop-alist))) |
| 220 | (defsubst unidata-prop-docstring (prop) (nth 4 (assq prop unidata-prop-alist))) | 251 | (defsubst unidata-prop-docstring (prop) (nth 4 (assq prop unidata-prop-alist))) |
| 221 | (defsubst unidata-prop-describer (prop) (nth 5 (assq prop unidata-prop-alist))) | 252 | (defsubst unidata-prop-describer (prop) (nth 5 (assq prop unidata-prop-alist))) |
| 253 | (defsubst unidata-prop-default (prop) (nth 6 (assq prop unidata-prop-alist))) | ||
| 254 | (defsubst unidata-prop-val-list (prop) (nth 7 (assq prop unidata-prop-alist))) | ||
| 222 | 255 | ||
| 223 | 256 | ||
| 224 | ;; SIMPLE TABLE | 257 | ;; SIMPLE TABLE |
| @@ -227,52 +260,34 @@ Property value is a character." | |||
| 227 | ;; values of succeeding character codes are usually different, we use | 260 | ;; values of succeeding character codes are usually different, we use |
| 228 | ;; a char-table described here to store such values. | 261 | ;; a char-table described here to store such values. |
| 229 | ;; | 262 | ;; |
| 230 | ;; If succeeding 128 characters has no property, a char-table has the | 263 | ;; A char-table divides character code space (#x0..#x3FFFFF) into |
| 231 | ;; symbol t for them. Otherwise a char-table has a string of the | 264 | ;; #x8000 blocks (each block contains 128 characters). |
| 232 | ;; following format for them. | 265 | |
| 266 | ;; If all characters of a block have no property, a char-table has the | ||
| 267 | ;; symbol nil for that block. Otherwise a char-table has a string of | ||
| 268 | ;; the following format for it. | ||
| 233 | ;; | 269 | ;; |
| 234 | ;; The first character of the string is FIRST-INDEX. | 270 | ;; The first character of the string is ?\001. |
| 235 | ;; The Nth (N > 0) character of the string is a property value of the | 271 | ;; The second character of the string is FIRST-INDEX. |
| 236 | ;; character (BLOCK-HEAD + FIRST-INDEX + N - 1), where BLOCK-HEAD is | 272 | ;; The Nth (N > 1) character of the string is a property value of the |
| 237 | ;; the first of the characters in the block. | 273 | ;; character (BLOCK-HEAD + FIRST-INDEX + N - 2), where BLOCK-HEAD is |
| 274 | ;; the first character of the block. | ||
| 238 | ;; | 275 | ;; |
| 239 | ;; The 4th extra slot of a char-table is nil. | 276 | ;; This kind of char-table has these extra slots: |
| 240 | 277 | ;; 1st: the property symbol | |
| 241 | (defun unidata-get-character (char val table) | 278 | ;; 2nd: nil |
| 242 | (cond | 279 | ;; 3rd: 0 (corresponding to uniprop_encode_character in chartab.c) |
| 243 | ((characterp val) | 280 | ;; 4th to 5th: nil |
| 244 | val) | ||
| 245 | 281 | ||
| 246 | ((stringp val) | 282 | (defun unidata-gen-table-character (prop &rest ignore) |
| 247 | (let* ((len (length val)) | ||
| 248 | (block-head (lsh (lsh char -7) 7)) | ||
| 249 | (vec (make-vector 128 nil)) | ||
| 250 | (first-index (aref val 0))) | ||
| 251 | (dotimes (i (1- len)) | ||
| 252 | (let ((elt (aref val (1+ i)))) | ||
| 253 | (if (> elt 0) | ||
| 254 | (aset vec (+ first-index i) elt)))) | ||
| 255 | (dotimes (i 128) | ||
| 256 | (aset table (+ block-head i) (aref vec i))) | ||
| 257 | (aref vec (- char block-head)))))) | ||
| 258 | |||
| 259 | (defun unidata-put-character (char val table) | ||
| 260 | (or (characterp val) | ||
| 261 | (not val) | ||
| 262 | (error "Not a character nor nil: %S" val)) | ||
| 263 | (let ((current-val (aref table char))) | ||
| 264 | (unless (eq current-val val) | ||
| 265 | (if (stringp current-val) | ||
| 266 | (funcall (char-table-extra-slot table 1) char current-val table)) | ||
| 267 | (aset table char val)))) | ||
| 268 | |||
| 269 | (defun unidata-gen-table-character (prop) | ||
| 270 | (let ((table (make-char-table 'char-code-property-table)) | 283 | (let ((table (make-char-table 'char-code-property-table)) |
| 271 | (prop-idx (unidata-prop-index prop)) | 284 | (prop-idx (unidata-prop-index prop)) |
| 272 | (vec (make-vector 128 0)) | 285 | (vec (make-vector 128 0)) |
| 273 | (tail unidata-list) | 286 | (tail unidata-list) |
| 274 | elt range val idx slot) | 287 | elt range val idx slot) |
| 275 | (set-char-table-range table (cons 0 (max-char)) t) | 288 | (if (functionp prop-idx) |
| 289 | (setq tail (funcall prop-idx) | ||
| 290 | prop-idx 1)) | ||
| 276 | (while tail | 291 | (while tail |
| 277 | (setq elt (car tail) tail (cdr tail)) | 292 | (setq elt (car tail) tail (cdr tail)) |
| 278 | (setq range (car elt) | 293 | (setq range (car elt) |
| @@ -301,7 +316,7 @@ Property value is a character." | |||
| 301 | (setq first-index last-index))) | 316 | (setq first-index last-index))) |
| 302 | (setq tail (cdr tail))) | 317 | (setq tail (cdr tail))) |
| 303 | (when first-index | 318 | (when first-index |
| 304 | (let ((str (string first-index)) | 319 | (let ((str (string 1 first-index)) |
| 305 | c) | 320 | c) |
| 306 | (while (<= first-index last-index) | 321 | (while (<= first-index last-index) |
| 307 | (setq str (format "%s%c" str (or (aref vec first-index) 0)) | 322 | (setq str (format "%s%c" str (or (aref vec first-index) 0)) |
| @@ -309,184 +324,78 @@ Property value is a character." | |||
| 309 | (set-char-table-range table (cons start limit) str)))))) | 324 | (set-char-table-range table (cons start limit) str)))))) |
| 310 | 325 | ||
| 311 | (set-char-table-extra-slot table 0 prop) | 326 | (set-char-table-extra-slot table 0 prop) |
| 312 | (byte-compile 'unidata-get-character) | 327 | (set-char-table-extra-slot table 2 0) |
| 313 | (byte-compile 'unidata-put-character) | ||
| 314 | (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-character)) | ||
| 315 | (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-character)) | ||
| 316 | |||
| 317 | table)) | 328 | table)) |
| 318 | 329 | ||
| 319 | 330 | ||
| 320 | 331 | ||
| 321 | ;; RUN-LENGTH TABLE | 332 | ;; RUN-LENGTH TABLE |
| 322 | ;; | 333 | ;; |
| 323 | ;; If the type of character property value is symbol, integer, | 334 | ;; If many characters of successive character codes have the same |
| 324 | ;; boolean, or character, we use a char-table described here to store | 335 | ;; property value, we use a char-table described here to store the |
| 325 | ;; the values. | 336 | ;; values. |
| 326 | ;; | 337 | ;; |
| 327 | ;; The 4th extra slot is a vector of property values (VAL-TABLE), and | 338 | ;; At first, instead of a value itself, we store an index number to |
| 328 | ;; values for succeeding 128 characters are encoded into this | 339 | ;; the VAL-TABLE (5th extra slot) in the table. We call that index |
| 329 | ;; character sequence: | 340 | ;; number as VAL-CODE here after. |
| 341 | ;; | ||
| 342 | ;; A char-table divides character code space (#x0..#x3FFFFF) into | ||
| 343 | ;; #x8000 blocks (each block contains 128 characters). | ||
| 344 | ;; | ||
| 345 | ;; If all characters of a block have the same value, a char-table has | ||
| 346 | ;; VAL-CODE for that block. Otherwise a char-table has a string of | ||
| 347 | ;; the following format for that block. | ||
| 348 | ;; | ||
| 349 | ;; The first character of the string is ?\002. | ||
| 350 | ;; The following characters has this form: | ||
| 330 | ;; ( VAL-CODE RUN-LENGTH ? ) + | 351 | ;; ( VAL-CODE RUN-LENGTH ? ) + |
| 331 | ;; where: | 352 | ;; where: |
| 332 | ;; VAL-CODE (0..127): | 353 | ;; VAL-CODE (0..127): index into VAL-TABLE. |
| 333 | ;; (VAL-CODE - 1) is an index into VAL-TABLE. | ||
| 334 | ;; The value 0 means no-value. | ||
| 335 | ;; RUN-LENGTH (130..255): | 354 | ;; RUN-LENGTH (130..255): |
| 336 | ;; (RUN-LENGTH - 128) specifies how many characters have the same | 355 | ;; (RUN-LENGTH - 128) specifies how many characters have the same |
| 337 | ;; value. If omitted, it means 1. | 356 | ;; value. If omitted, it means 1. |
| 338 | 357 | ;; | |
| 339 | 358 | ;; This kind of char-table has these extra slots: | |
| 340 | ;; Return a symbol-type character property value of CHAR. VAL is the | 359 | ;; 1st: the property symbol |
| 341 | ;; current value of (aref TABLE CHAR). | 360 | ;; 2nd: 0 (corresponding to uniprop_decode_value in chartab.c) |
| 342 | 361 | ;; 3rd: 1..3 (corresponding to uniprop_encode_xxx in chartab.c) | |
| 343 | (defun unidata-get-symbol (char val table) | 362 | ;; 4th: function or nil |
| 344 | (let ((val-table (char-table-extra-slot table 4))) | 363 | ;; 5th: VAL-TABLE |
| 345 | (cond ((symbolp val) | ||
| 346 | val) | ||
| 347 | ((stringp val) | ||
| 348 | (let ((first-char (lsh (lsh char -7) 7)) | ||
| 349 | (str val) | ||
| 350 | (len (length val)) | ||
| 351 | (idx 0) | ||
| 352 | this-val count) | ||
| 353 | (set-char-table-range table (cons first-char (+ first-char 127)) | ||
| 354 | nil) | ||
| 355 | (while (< idx len) | ||
| 356 | (setq val (aref str idx) idx (1+ idx) | ||
| 357 | count (if (< idx len) (aref str idx) 1)) | ||
| 358 | (setq val (and (> val 0) (aref val-table (1- val))) | ||
| 359 | count (if (< count 128) | ||
| 360 | 1 | ||
| 361 | (prog1 (- count 128) (setq idx (1+ idx))))) | ||
| 362 | (dotimes (i count) | ||
| 363 | (if val | ||
| 364 | (aset table first-char val)) | ||
| 365 | (if (= first-char char) | ||
| 366 | (setq this-val val)) | ||
| 367 | (setq first-char (1+ first-char)))) | ||
| 368 | this-val)) | ||
| 369 | ((> val 0) | ||
| 370 | (aref val-table (1- val)))))) | ||
| 371 | |||
| 372 | ;; Return a integer-type character property value of CHAR. VAL is the | ||
| 373 | ;; current value of (aref TABLE CHAR). | ||
| 374 | |||
| 375 | (defun unidata-get-integer (char val table) | ||
| 376 | (let ((val-table (char-table-extra-slot table 4))) | ||
| 377 | (cond ((integerp val) | ||
| 378 | val) | ||
| 379 | ((stringp val) | ||
| 380 | (let ((first-char (lsh (lsh char -7) 7)) | ||
| 381 | (str val) | ||
| 382 | (len (length val)) | ||
| 383 | (idx 0) | ||
| 384 | this-val count) | ||
| 385 | (while (< idx len) | ||
| 386 | (setq val (aref str idx) idx (1+ idx) | ||
| 387 | count (if (< idx len) (aref str idx) 1)) | ||
| 388 | (setq val (and (> val 0) (aref val-table (1- val))) | ||
| 389 | count (if (< count 128) | ||
| 390 | 1 | ||
| 391 | (prog1 (- count 128) (setq idx (1+ idx))))) | ||
| 392 | (dotimes (i count) | ||
| 393 | (aset table first-char val) | ||
| 394 | (if (= first-char char) | ||
| 395 | (setq this-val val)) | ||
| 396 | (setq first-char (1+ first-char)))) | ||
| 397 | this-val))))) | ||
| 398 | |||
| 399 | ;; Return a numeric-type (integer or float) character property value | ||
| 400 | ;; of CHAR. VAL is the current value of (aref TABLE CHAR). | ||
| 401 | |||
| 402 | (defun unidata-get-numeric (char val table) | ||
| 403 | (cond | ||
| 404 | ((numberp val) | ||
| 405 | val) | ||
| 406 | ((stringp val) | ||
| 407 | (let ((val-table (char-table-extra-slot table 4)) | ||
| 408 | (first-char (lsh (lsh char -7) 7)) | ||
| 409 | (str val) | ||
| 410 | (len (length val)) | ||
| 411 | (idx 0) | ||
| 412 | this-val count) | ||
| 413 | (while (< idx len) | ||
| 414 | (setq val (aref str idx) idx (1+ idx) | ||
| 415 | count (if (< idx len) (aref str idx) 1)) | ||
| 416 | (setq val (and (> val 0) (aref val-table (1- val))) | ||
| 417 | count (if (< count 128) | ||
| 418 | 1 | ||
| 419 | (prog1 (- count 128) (setq idx (1+ idx))))) | ||
| 420 | (dotimes (i count) | ||
| 421 | (aset table first-char val) | ||
| 422 | (if (= first-char char) | ||
| 423 | (setq this-val val)) | ||
| 424 | (setq first-char (1+ first-char)))) | ||
| 425 | this-val)))) | ||
| 426 | |||
| 427 | ;; Store VAL (symbol) as a character property value of CHAR in TABLE. | ||
| 428 | |||
| 429 | (defun unidata-put-symbol (char val table) | ||
| 430 | (or (symbolp val) | ||
| 431 | (error "Not a symbol: %S" val)) | ||
| 432 | (let ((current-val (aref table char))) | ||
| 433 | (unless (eq current-val val) | ||
| 434 | (if (stringp current-val) | ||
| 435 | (funcall (char-table-extra-slot table 1) char current-val table)) | ||
| 436 | (aset table char val)))) | ||
| 437 | |||
| 438 | ;; Store VAL (integer) as a character property value of CHAR in TABLE. | ||
| 439 | |||
| 440 | (defun unidata-put-integer (char val table) | ||
| 441 | (or (integerp val) | ||
| 442 | (not val) | ||
| 443 | (error "Not an integer nor nil: %S" val)) | ||
| 444 | (let ((current-val (aref table char))) | ||
| 445 | (unless (eq current-val val) | ||
| 446 | (if (stringp current-val) | ||
| 447 | (funcall (char-table-extra-slot table 1) char current-val table)) | ||
| 448 | (aset table char val)))) | ||
| 449 | |||
| 450 | ;; Store VAL (integer or float) as a character property value of CHAR | ||
| 451 | ;; in TABLE. | ||
| 452 | |||
| 453 | (defun unidata-put-numeric (char val table) | ||
| 454 | (or (numberp val) | ||
| 455 | (not val) | ||
| 456 | (error "Not a number nor nil: %S" val)) | ||
| 457 | (let ((current-val (aref table char))) | ||
| 458 | (unless (equal current-val val) | ||
| 459 | (if (stringp current-val) | ||
| 460 | (funcall (char-table-extra-slot table 1) char current-val table)) | ||
| 461 | (aset table char val)))) | ||
| 462 | 364 | ||
| 463 | ;; Encode the character property value VAL into an integer value by | 365 | ;; Encode the character property value VAL into an integer value by |
| 464 | ;; VAL-LIST. By side effect, VAL-LIST is modified. | 366 | ;; VAL-LIST. By side effect, VAL-LIST is modified. |
| 465 | ;; VAL-LIST has this form: | 367 | ;; VAL-LIST has this form: |
| 466 | ;; (t (VAL1 . VAL-CODE1) (VAL2 . VAL-CODE2) ...) | 368 | ;; ((nil . 0) (VAL1 . 1) (VAL2 . 2) ...) |
| 467 | ;; If VAL is one of VALn, just return VAL-CODEn. Otherwise, | 369 | ;; If VAL is one of VALn, just return n. |
| 468 | ;; VAL-LIST is modified to this: | 370 | ;; Otherwise, VAL-LIST is modified to this: |
| 469 | ;; (t (VAL . (1+ VAL-CODE1)) (VAL1 . VAL-CODE1) (VAL2 . VAL-CODE2) ...) | 371 | ;; ((nil . 0) (VAL1 . 1) (VAL2 . 2) ... (VAL . n+1)) |
| 470 | 372 | ||
| 471 | (defun unidata-encode-val (val-list val) | 373 | (defun unidata-encode-val (val-list val) |
| 472 | (let ((slot (assoc val val-list)) | 374 | (let ((slot (assoc val val-list)) |
| 473 | val-code) | 375 | val-code) |
| 474 | (if slot | 376 | (if slot |
| 475 | (cdr slot) | 377 | (cdr slot) |
| 476 | (setq val-code (if (cdr val-list) (1+ (cdr (nth 1 val-list))) 1)) | 378 | (setq val-code (length val-list)) |
| 477 | (setcdr val-list (cons (cons val val-code) (cdr val-list))) | 379 | (nconc val-list (list (cons val val-code))) |
| 478 | val-code))) | 380 | val-code))) |
| 479 | 381 | ||
| 480 | ;; Generate a char-table for the character property PROP. | 382 | ;; Generate a char-table for the character property PROP. |
| 481 | 383 | ||
| 482 | (defun unidata-gen-table (prop val-func default-value) | 384 | (defun unidata-gen-table (prop val-func default-value val-list) |
| 483 | (let ((table (make-char-table 'char-code-property-table)) | 385 | (let ((table (make-char-table 'char-code-property-table)) |
| 484 | (prop-idx (unidata-prop-index prop)) | 386 | (prop-idx (unidata-prop-index prop)) |
| 485 | (val-list (list t)) | ||
| 486 | (vec (make-vector 128 0)) | 387 | (vec (make-vector 128 0)) |
| 487 | tail elt range val val-code idx slot | 388 | tail elt range val val-code idx slot |
| 488 | prev-range-data) | 389 | prev-range-data) |
| 489 | (set-char-table-range table (cons 0 (max-char)) default-value) | 390 | (setq val-list (cons nil (copy-sequence val-list))) |
| 391 | (setq tail val-list val-code 0) | ||
| 392 | ;; Convert (nil A B ...) to ((nil . 0) (A . 1) (B . 2) ...) | ||
| 393 | (while tail | ||
| 394 | (setcar tail (cons (car tail) val-code)) | ||
| 395 | (setq tail (cdr tail) val-code (1+ val-code))) | ||
| 396 | (setq default-value (unidata-encode-val val-list default-value)) | ||
| 397 | (set-char-table-range table t default-value) | ||
| 398 | (set-char-table-range table nil default-value) | ||
| 490 | (setq tail unidata-list) | 399 | (setq tail unidata-list) |
| 491 | (while tail | 400 | (while tail |
| 492 | (setq elt (car tail) tail (cdr tail)) | 401 | (setq elt (car tail) tail (cdr tail)) |
| @@ -495,7 +404,7 @@ Property value is a character." | |||
| 495 | (setq val-code (if val (unidata-encode-val val-list val))) | 404 | (setq val-code (if val (unidata-encode-val val-list val))) |
| 496 | (if (consp range) | 405 | (if (consp range) |
| 497 | (when val-code | 406 | (when val-code |
| 498 | (set-char-table-range table range val) | 407 | (set-char-table-range table range val-code) |
| 499 | (let ((from (car range)) (to (cdr range))) | 408 | (let ((from (car range)) (to (cdr range))) |
| 500 | ;; If RANGE doesn't end at the char-table boundary (each | 409 | ;; If RANGE doesn't end at the char-table boundary (each |
| 501 | ;; 128 characters), we may have to carry over the data | 410 | ;; 128 characters), we may have to carry over the data |
| @@ -534,7 +443,7 @@ Property value is a character." | |||
| 534 | (if val-code | 443 | (if val-code |
| 535 | (aset vec (- range start) val-code)) | 444 | (aset vec (- range start) val-code)) |
| 536 | (setq tail (cdr tail))) | 445 | (setq tail (cdr tail))) |
| 537 | (setq str "" val-code -1 count 0) | 446 | (setq str "\002" val-code -1 count 0) |
| 538 | (mapc #'(lambda (x) | 447 | (mapc #'(lambda (x) |
| 539 | (if (= val-code x) | 448 | (if (= val-code x) |
| 540 | (setq count (1+ count)) | 449 | (setq count (1+ count)) |
| @@ -549,7 +458,7 @@ Property value is a character." | |||
| 549 | vec) | 458 | vec) |
| 550 | (if (= count 128) | 459 | (if (= count 128) |
| 551 | (if val | 460 | (if val |
| 552 | (set-char-table-range table (cons start limit) val)) | 461 | (set-char-table-range table (cons start limit) val-code)) |
| 553 | (if (= val-code 0) | 462 | (if (= val-code 0) |
| 554 | (set-char-table-range table (cons start limit) str) | 463 | (set-char-table-range table (cons start limit) str) |
| 555 | (if (> count 2) | 464 | (if (> count 2) |
| @@ -559,34 +468,29 @@ Property value is a character." | |||
| 559 | (setq str (concat str (string val-code))))) | 468 | (setq str (concat str (string val-code))))) |
| 560 | (set-char-table-range table (cons start limit) str)))))) | 469 | (set-char-table-range table (cons start limit) str)))))) |
| 561 | 470 | ||
| 562 | (setq val-list (nreverse (cdr val-list))) | ||
| 563 | (set-char-table-extra-slot table 0 prop) | 471 | (set-char-table-extra-slot table 0 prop) |
| 564 | (set-char-table-extra-slot table 4 (vconcat (mapcar 'car val-list))) | 472 | (set-char-table-extra-slot table 4 (vconcat (mapcar 'car val-list))) |
| 565 | table)) | 473 | table)) |
| 566 | 474 | ||
| 567 | (defun unidata-gen-table-symbol (prop) | 475 | (defun unidata-gen-table-symbol (prop default-value val-list) |
| 568 | (let ((table (unidata-gen-table prop | 476 | (let ((table (unidata-gen-table prop |
| 569 | #'(lambda (x) (and (> (length x) 0) | 477 | #'(lambda (x) (and (> (length x) 0) |
| 570 | (intern x))) | 478 | (intern x))) |
| 571 | 0))) | 479 | default-value val-list))) |
| 572 | (byte-compile 'unidata-get-symbol) | 480 | (set-char-table-extra-slot table 1 0) |
| 573 | (byte-compile 'unidata-put-symbol) | 481 | (set-char-table-extra-slot table 2 1) |
| 574 | (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-symbol)) | ||
| 575 | (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-symbol)) | ||
| 576 | table)) | 482 | table)) |
| 577 | 483 | ||
| 578 | (defun unidata-gen-table-integer (prop) | 484 | (defun unidata-gen-table-integer (prop default-value val-list) |
| 579 | (let ((table (unidata-gen-table prop | 485 | (let ((table (unidata-gen-table prop |
| 580 | #'(lambda (x) (and (> (length x) 0) | 486 | #'(lambda (x) (and (> (length x) 0) |
| 581 | (string-to-number x))) | 487 | (string-to-number x))) |
| 582 | t))) | 488 | default-value val-list))) |
| 583 | (byte-compile 'unidata-get-integer) | 489 | (set-char-table-extra-slot table 1 0) |
| 584 | (byte-compile 'unidata-put-integer) | 490 | (set-char-table-extra-slot table 2 1) |
| 585 | (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-integer)) | ||
| 586 | (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-integer)) | ||
| 587 | table)) | 491 | table)) |
| 588 | 492 | ||
| 589 | (defun unidata-gen-table-numeric (prop) | 493 | (defun unidata-gen-table-numeric (prop default-value val-list) |
| 590 | (let ((table (unidata-gen-table prop | 494 | (let ((table (unidata-gen-table prop |
| 591 | #'(lambda (x) | 495 | #'(lambda (x) |
| 592 | (if (string-match "/" x) | 496 | (if (string-match "/" x) |
| @@ -595,11 +499,9 @@ Property value is a character." | |||
| 595 | (substring x (match-end 0)))) | 499 | (substring x (match-end 0)))) |
| 596 | (if (> (length x) 0) | 500 | (if (> (length x) 0) |
| 597 | (string-to-number x)))) | 501 | (string-to-number x)))) |
| 598 | t))) | 502 | default-value val-list))) |
| 599 | (byte-compile 'unidata-get-numeric) | 503 | (set-char-table-extra-slot table 1 0) |
| 600 | (byte-compile 'unidata-put-numeric) | 504 | (set-char-table-extra-slot table 2 2) |
| 601 | (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-numeric)) | ||
| 602 | (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-numeric)) | ||
| 603 | table)) | 505 | table)) |
| 604 | 506 | ||
| 605 | 507 | ||
| @@ -892,7 +794,6 @@ Property value is a character." | |||
| 892 | word-table | 794 | word-table |
| 893 | block-list block-word-table block-end | 795 | block-list block-word-table block-end |
| 894 | tail elt range val idx slot) | 796 | tail elt range val idx slot) |
| 895 | (set-char-table-range table (cons 0 (max-char)) 0) | ||
| 896 | (setq tail unidata-list) | 797 | (setq tail unidata-list) |
| 897 | (setq block-end -1) | 798 | (setq block-end -1) |
| 898 | (while tail | 799 | (while tail |
| @@ -1025,7 +926,7 @@ Property value is a character." | |||
| 1025 | idx (1+ i))))) | 926 | idx (1+ i))))) |
| 1026 | (nreverse (cons (intern (substring str idx)) l)))))) | 927 | (nreverse (cons (intern (substring str idx)) l)))))) |
| 1027 | 928 | ||
| 1028 | (defun unidata-gen-table-name (prop) | 929 | (defun unidata-gen-table-name (prop &rest ignore) |
| 1029 | (let* ((table (unidata-gen-table-word-list prop 'unidata-split-name)) | 930 | (let* ((table (unidata-gen-table-word-list prop 'unidata-split-name)) |
| 1030 | (word-tables (char-table-extra-slot table 4))) | 931 | (word-tables (char-table-extra-slot table 4))) |
| 1031 | (byte-compile 'unidata-get-name) | 932 | (byte-compile 'unidata-get-name) |
| @@ -1064,7 +965,7 @@ Property value is a character." | |||
| 1064 | (nreverse l))))) | 965 | (nreverse l))))) |
| 1065 | 966 | ||
| 1066 | 967 | ||
| 1067 | (defun unidata-gen-table-decomposition (prop) | 968 | (defun unidata-gen-table-decomposition (prop &rest ignore) |
| 1068 | (let* ((table (unidata-gen-table-word-list prop 'unidata-split-decomposition)) | 969 | (let* ((table (unidata-gen-table-word-list prop 'unidata-split-decomposition)) |
| 1069 | (word-tables (char-table-extra-slot table 4))) | 970 | (word-tables (char-table-extra-slot table 4))) |
| 1070 | (byte-compile 'unidata-get-decomposition) | 971 | (byte-compile 'unidata-get-decomposition) |
| @@ -1080,7 +981,8 @@ Property value is a character." | |||
| 1080 | 981 | ||
| 1081 | (defun unidata-describe-general-category (val) | 982 | (defun unidata-describe-general-category (val) |
| 1082 | (cdr (assq val | 983 | (cdr (assq val |
| 1083 | '((Lu . "Letter, Uppercase") | 984 | '((nil . "Uknown") |
| 985 | (Lu . "Letter, Uppercase") | ||
| 1084 | (Ll . "Letter, Lowercase") | 986 | (Ll . "Letter, Lowercase") |
| 1085 | (Lt . "Letter, Titlecase") | 987 | (Lt . "Letter, Titlecase") |
| 1086 | (Lm . "Letter, Modifier") | 988 | (Lm . "Letter, Modifier") |
| @@ -1171,6 +1073,19 @@ Property value is a character." | |||
| 1171 | (string ?')))) | 1073 | (string ?')))) |
| 1172 | val " ")) | 1074 | val " ")) |
| 1173 | 1075 | ||
| 1076 | (defun unidata-gen-mirroring-list () | ||
| 1077 | (let ((head (list nil)) | ||
| 1078 | tail) | ||
| 1079 | (with-temp-buffer | ||
| 1080 | (insert-file-contents (expand-file-name "BidiMirroring.txt" unidata-dir)) | ||
| 1081 | (goto-char (point-min)) | ||
| 1082 | (setq tail head) | ||
| 1083 | (while (re-search-forward "^\\([0-9A-F]+\\);\\s +\\([0-9A-F]+\\)" nil t) | ||
| 1084 | (let ((char (string-to-number (match-string 1) 16)) | ||
| 1085 | (mirror (match-string 2))) | ||
| 1086 | (setq tail (setcdr tail (list (list char mirror))))))) | ||
| 1087 | (cdr head))) | ||
| 1088 | |||
| 1174 | ;; Verify if we can retrieve correct values from the generated | 1089 | ;; Verify if we can retrieve correct values from the generated |
| 1175 | ;; char-tables. | 1090 | ;; char-tables. |
| 1176 | 1091 | ||
| @@ -1212,13 +1127,21 @@ Property value is a character." | |||
| 1212 | ;; The entry function. It generates files described in the header | 1127 | ;; The entry function. It generates files described in the header |
| 1213 | ;; comment of this file. | 1128 | ;; comment of this file. |
| 1214 | 1129 | ||
| 1215 | (defun unidata-gen-files (&optional unidata-text-file) | 1130 | (defun unidata-gen-files (&optional data-dir unidata-text-file) |
| 1216 | (or unidata-text-file | 1131 | (or data-dir |
| 1217 | (setq unidata-text-file (car command-line-args-left) | 1132 | (setq data-dir (car command-line-args-left) |
| 1133 | command-line-args-left (cdr command-line-args-left) | ||
| 1134 | unidata-text-file (car command-line-args-left) | ||
| 1218 | command-line-args-left (cdr command-line-args-left))) | 1135 | command-line-args-left (cdr command-line-args-left))) |
| 1219 | (unidata-setup-list unidata-text-file) | ||
| 1220 | (let ((coding-system-for-write 'utf-8-unix) | 1136 | (let ((coding-system-for-write 'utf-8-unix) |
| 1221 | (charprop-file "charprop.el")) | 1137 | (charprop-file "charprop.el") |
| 1138 | (unidata-dir data-dir)) | ||
| 1139 | (dolist (elt unidata-prop-alist) | ||
| 1140 | (let* ((prop (car elt)) | ||
| 1141 | (file (unidata-prop-file prop))) | ||
| 1142 | (if (file-exists-p file) | ||
| 1143 | (delete-file file)))) | ||
| 1144 | (unidata-setup-list unidata-text-file) | ||
| 1222 | (with-temp-file charprop-file | 1145 | (with-temp-file charprop-file |
| 1223 | (insert ";; Automatically generated by unidata-gen.el.\n") | 1146 | (insert ";; Automatically generated by unidata-gen.el.\n") |
| 1224 | (dolist (elt unidata-prop-alist) | 1147 | (dolist (elt unidata-prop-alist) |
| @@ -1227,31 +1150,41 @@ Property value is a character." | |||
| 1227 | (file (unidata-prop-file prop)) | 1150 | (file (unidata-prop-file prop)) |
| 1228 | (docstring (unidata-prop-docstring prop)) | 1151 | (docstring (unidata-prop-docstring prop)) |
| 1229 | (describer (unidata-prop-describer prop)) | 1152 | (describer (unidata-prop-describer prop)) |
| 1153 | (default-value (unidata-prop-default prop)) | ||
| 1154 | (val-list (unidata-prop-val-list prop)) | ||
| 1230 | table) | 1155 | table) |
| 1231 | ;; Filename in this comment line is extracted by sed in | 1156 | ;; Filename in this comment line is extracted by sed in |
| 1232 | ;; Makefile. | 1157 | ;; Makefile. |
| 1233 | (insert (format ";; FILE: %s\n" file)) | 1158 | (insert (format ";; FILE: %s\n" file)) |
| 1234 | (insert (format "(define-char-code-property '%S %S\n %S)\n" | 1159 | (insert (format "(define-char-code-property '%S %S\n %S)\n" |
| 1235 | prop file docstring)) | 1160 | prop file docstring)) |
| 1236 | (with-temp-file file | 1161 | (with-temp-buffer |
| 1237 | (message "Generating %s..." file) | 1162 | (message "Generating %s..." file) |
| 1238 | (setq table (funcall generator prop)) | 1163 | (when (file-exists-p file) |
| 1164 | (insert-file-contents file) | ||
| 1165 | (goto-char (point-max)) | ||
| 1166 | (search-backward ";; Local Variables:")) | ||
| 1167 | (setq table (funcall generator prop default-value val-list)) | ||
| 1239 | (when describer | 1168 | (when describer |
| 1240 | (unless (subrp (symbol-function describer)) | 1169 | (unless (subrp (symbol-function describer)) |
| 1241 | (byte-compile describer) | 1170 | (byte-compile describer) |
| 1242 | (setq describer (symbol-function describer))) | 1171 | (setq describer (symbol-function describer))) |
| 1243 | (set-char-table-extra-slot table 3 describer)) | 1172 | (set-char-table-extra-slot table 3 describer)) |
| 1244 | (insert ";; Copyright (C) 1991-2009 Unicode, Inc. | 1173 | (if (bobp) |
| 1245 | ;; This file was generated from the Unicode data file at | 1174 | (insert ";; Copyright (C) 1991-2009 Unicode, Inc. |
| 1246 | ;; http://www.unicode.org/Public/UNIDATA/UnicodeData.txt. | 1175 | ;; This file was generated from the Unicode data files at |
| 1247 | ;; See lisp/international/README for the copyright and permission notice.\n" | 1176 | ;; http://www.unicode.org/Public/UNIDATA/. |
| 1248 | (format "(define-char-code-property '%S %S %S)\n" | 1177 | ;; See lisp/international/README for the copyright and permission notice.\n")) |
| 1249 | prop table docstring) | 1178 | (insert (format "(define-char-code-property '%S %S %S)\n" |
| 1250 | ";; Local Variables:\n" | 1179 | prop table docstring)) |
| 1251 | ";; coding: utf-8\n" | 1180 | (if (eobp) |
| 1252 | ";; no-byte-compile: t\n" | 1181 | (insert ";; Local Variables:\n" |
| 1253 | ";; End:\n\n" | 1182 | ";; coding: utf-8\n" |
| 1254 | (format ";; %s ends here\n" file))))) | 1183 | ";; no-byte-compile: t\n" |
| 1184 | ";; End:\n\n" | ||
| 1185 | (format ";; %s ends here\n" file))) | ||
| 1186 | (write-file file) | ||
| 1187 | (message "Generating %s...done" file)))) | ||
| 1255 | (message "Writing %s..." charprop-file) | 1188 | (message "Writing %s..." charprop-file) |
| 1256 | (insert ";; Local Variables:\n" | 1189 | (insert ";; Local Variables:\n" |
| 1257 | ";; coding: utf-8\n" | 1190 | ";; coding: utf-8\n" |