diff options
28 files changed, 880 insertions, 391 deletions
diff --git a/admin/ChangeLog b/admin/ChangeLog index 7aaeb1d5ee2..3632a0992a6 100644 --- a/admin/ChangeLog +++ b/admin/ChangeLog | |||
| @@ -1,3 +1,38 @@ | |||
| 1 | 2011-07-06 Kenichi Handa <handa@m17n.org> | ||
| 2 | |||
| 3 | * unidata/unidata-gen.el (unidata-dir): New variable. | ||
| 4 | (unidata-setup-list): Expand unidata-text-file in unidata-dir. | ||
| 5 | (unidata-prop-alist): INDEX element may be a function. New | ||
| 6 | optional element VAL-LIST (for general-category and bidi-class). | ||
| 7 | New entry `mirroring'. | ||
| 8 | (unidata-prop-default, unidata-prop-val-list): New subst. | ||
| 9 | (unidata-get-character, unidata-put-character): Delete them. | ||
| 10 | (unidata-gen-table-character): New arg IGNORE. Adjusted for the | ||
| 11 | above changes. | ||
| 12 | (unidata-get-symbol, unidata-get-integer, unidata-get-numeric) | ||
| 13 | (unidata-put-symbol, unidata-put-integer, unidata-put-numeric): | ||
| 14 | Delete them. | ||
| 15 | (unidata-encode-val): Assume that the first element of VAL-LIST is | ||
| 16 | a cons (nil . 0). | ||
| 17 | (unidata-gen-table): Change argument DEFAULT-VALUE to VAL-LIST. | ||
| 18 | Always store the encoded value. | ||
| 19 | (unidata-gen-table-symbol): New args DEFAULT-VALUE and VAL-LIST. | ||
| 20 | Set the 1st and the 2nd extra slots to index numbers for C | ||
| 21 | functions. | ||
| 22 | (unidata-gen-table-integer): Likewise. | ||
| 23 | (unidata-gen-table-numeric): Likewise. | ||
| 24 | (unidata-gen-table-name): New arg IGNORE. | ||
| 25 | (unidata-gen-table-decomposition): Likewise. | ||
| 26 | (unidata-describe-general-category): Add the case nil to the | ||
| 27 | description alist. | ||
| 28 | (unidata-gen-mirroring-list): New funciton. | ||
| 29 | (unidata-gen-files): New arg DATA-DIR. Adjusted for the change of | ||
| 30 | unidata-prop-alist. Handle the case of storing multiple | ||
| 31 | char-tables in a file. | ||
| 32 | |||
| 33 | * unidata/Makefile.in (${DSTDIR}/charprop.el): New arg to | ||
| 34 | unidata-gen-files. | ||
| 35 | |||
| 1 | 2011-05-21 Glenn Morris <rgm@gnu.org> | 36 | 2011-05-21 Glenn Morris <rgm@gnu.org> |
| 2 | 37 | ||
| 3 | * bzrmerge.el (bzrmerge-resolve): Suppress prompts about file-locals. | 38 | * bzrmerge.el (bzrmerge-resolve): Suppress prompts about file-locals. |
diff --git a/admin/unidata/Makefile.in b/admin/unidata/Makefile.in index 04f2f1d4380..e1fe247631f 100644 --- a/admin/unidata/Makefile.in +++ b/admin/unidata/Makefile.in | |||
| @@ -33,9 +33,10 @@ unidata.txt: UnicodeData.txt | |||
| 33 | 33 | ||
| 34 | ${DSTDIR}/charprop.el: unidata-gen.elc unidata.txt | 34 | ${DSTDIR}/charprop.el: unidata-gen.elc unidata.txt |
| 35 | ELC=`/bin/pwd`/unidata-gen.elc; \ | 35 | ELC=`/bin/pwd`/unidata-gen.elc; \ |
| 36 | DATA=`/bin/pwd`/unidata.txt; \ | 36 | DATADIR=`/bin/pwd`; \ |
| 37 | DATA=unidata.txt; \ | ||
| 37 | cd ${DSTDIR}; \ | 38 | cd ${DSTDIR}; \ |
| 38 | ${RUNEMACS} -batch --load $${ELC} -f unidata-gen-files $${DATA} | 39 | ${RUNEMACS} -batch --load $${ELC} -f unidata-gen-files $${DATADIR} $${DATA} |
| 39 | 40 | ||
| 40 | ../../src/biditype.h: UnicodeData.txt | 41 | ../../src/biditype.h: UnicodeData.txt |
| 41 | gawk -F";" -f biditype.awk $< > $@ | 42 | gawk -F";" -f biditype.awk $< > $@ |
diff --git a/admin/unidata/unidata-gen.el b/admin/unidata/unidata-gen.el index 9f898668526..ab1dcd134ac 100644 --- a/admin/unidata/unidata-gen.el +++ b/admin/unidata/unidata-gen.el | |||
| @@ -33,24 +33,25 @@ | |||
| 33 | ;; | 33 | ;; |
| 34 | ;; charprop.el | 34 | ;; charprop.el |
| 35 | ;; It contains a series of forms of this format: | 35 | ;; It contains a series of forms of this format: |
| 36 | ;; (char-code-property-register PROP FILE) | 36 | ;; (define-char-code-property PROP FILE) |
| 37 | ;; where PROP is a symbol representing a character property | 37 | ;; where PROP is a symbol representing a character property |
| 38 | ;; (name, generic-category, etc), and FILE is a name of one of | 38 | ;; (name, general-category, etc), and FILE is a name of one of |
| 39 | ;; the following files. | 39 | ;; the following files. |
| 40 | ;; | 40 | ;; |
| 41 | ;; uni-name.el, uni-category.el, uni-combining.el, uni-bidi.el, | 41 | ;; uni-name.el, uni-category.el, uni-combining.el, uni-bidi.el, |
| 42 | ;; uni-decomposition.el, uni-decimal.el, uni-digit.el, uni-numeric.el, | 42 | ;; uni-decomposition.el, uni-decimal.el, uni-digit.el, uni-numeric.el, |
| 43 | ;; uni-mirrored.el, uni-old-name.el, uni-comment.el, uni-uppercase.el, | 43 | ;; uni-mirrored.el, uni-old-name.el, uni-comment.el, uni-uppercase.el, |
| 44 | ;; uni-lowercase.el, uni-titlecase.el | 44 | ;; uni-lowercase.el, uni-titlecase.el |
| 45 | ;; They each contain a single form of this format: | 45 | ;; They contain one or more forms of this format: |
| 46 | ;; (char-code-property-register PROP CHAR-TABLE) | 46 | ;; (define-char-code-property PROP CHAR-TABLE) |
| 47 | ;; where PROP is the same as above, and CHAR-TABLE is a | 47 | ;; where PROP is the same as above, and CHAR-TABLE is a |
| 48 | ;; char-table containing property values in a compressed format. | 48 | ;; char-table containing property values in a compressed format. |
| 49 | ;; | 49 | ;; |
| 50 | ;; When they are installed in .../lisp/international/, the file | 50 | ;; When they are installed in .../lisp/international/, the file |
| 51 | ;; "charprop.el" is preloaded in loadup.el. The other files are | 51 | ;; "charprop.el" is preloaded in loadup.el. The other files are |
| 52 | ;; automatically loaded when the functions `get-char-code-property' | 52 | ;; automatically loaded when the Lisp functions |
| 53 | ;; and `put-char-code-property' are called. | 53 | ;; `get-char-code-property' and `put-char-code-property', and C |
| 54 | ;; function uniprop_table are called. | ||
| 54 | ;; | 55 | ;; |
| 55 | ;; FORMAT OF A CHAR TABLE | 56 | ;; FORMAT OF A CHAR TABLE |
| 56 | ;; | 57 | ;; |
| @@ -62,17 +63,22 @@ | |||
| 62 | ;; data in a char-table as below. | 63 | ;; data in a char-table as below. |
| 63 | ;; | 64 | ;; |
| 64 | ;; If succeeding 128*N characters have the same property value, we | 65 | ;; If succeeding 128*N characters have the same property value, we |
| 65 | ;; store that value for them. Otherwise, compress values for | 66 | ;; store that value (or the encoded one) for them. Otherwise, |
| 66 | ;; succeeding 128 characters into a single string and store it as a | 67 | ;; compress values (or the encoded ones) for succeeding 128 |
| 67 | ;; value for those characters. The way of compression depends on a | 68 | ;; characters into a single string and store it for those |
| 68 | ;; property. See the section "SIMPLE TABLE", "RUN-LENGTH TABLE", | 69 | ;; characters. The way of compression depends on a property. See |
| 69 | ;; and "WORD-LIST TABLE". | 70 | ;; the section "SIMPLE TABLE", "RUN-LENGTH TABLE", and "WORD-LIST |
| 70 | 71 | ;; TABLE". | |
| 71 | ;; The char table has four extra slots: | 72 | |
| 73 | ;; The char table has five extra slots: | ||
| 72 | ;; 1st: property symbol | 74 | ;; 1st: property symbol |
| 73 | ;; 2nd: function to call to get a property value | 75 | ;; 2nd: function to call to get a property value, |
| 74 | ;; 3nd: function to call to put a property value | 76 | ;; or an index number of C function to decode the value, |
| 75 | ;; 4th: function to call to get a description of a property value | 77 | ;; or nil if the value can be directly got from the table. |
| 78 | ;; 3nd: function to call to put a property value, | ||
| 79 | ;; or an index number of C function to encode the value, | ||
| 80 | ;; or nil if the value can be directly stored in the table. | ||
| 81 | ;; 4th: function to call to get a description of a property value, or nil | ||
| 76 | ;; 5th: data referred by the above functions | 82 | ;; 5th: data referred by the above functions |
| 77 | 83 | ||
| 78 | ;; List of elements of this form: | 84 | ;; List of elements of this form: |
| @@ -82,6 +88,11 @@ | |||
| 82 | 88 | ||
| 83 | (defvar unidata-list nil) | 89 | (defvar unidata-list nil) |
| 84 | 90 | ||
| 91 | ;; Name of the directory containing files of Unicode Character | ||
| 92 | ;; Database. | ||
| 93 | |||
| 94 | (defvar unidata-dir nil) | ||
| 95 | |||
| 85 | (defun unidata-setup-list (unidata-text-file) | 96 | (defun unidata-setup-list (unidata-text-file) |
| 86 | (let* ((table (list nil)) | 97 | (let* ((table (list nil)) |
| 87 | (tail table) | 98 | (tail table) |
| @@ -90,6 +101,7 @@ | |||
| 90 | ("^<.*Surrogate" . nil) | 101 | ("^<.*Surrogate" . nil) |
| 91 | ("^<.*Private Use" . PRIVATE\ USE))) | 102 | ("^<.*Private Use" . PRIVATE\ USE))) |
| 92 | val char name) | 103 | val char name) |
| 104 | (setq unidata-text-file (expand-file-name unidata-text-file unidata-dir)) | ||
| 93 | (or (file-readable-p unidata-text-file) | 105 | (or (file-readable-p unidata-text-file) |
| 94 | (error "File not readable: %s" unidata-text-file)) | 106 | (error "File not readable: %s" unidata-text-file)) |
| 95 | (with-temp-buffer | 107 | (with-temp-buffer |
| @@ -134,12 +146,17 @@ | |||
| 134 | (setq unidata-list (cdr table)))) | 146 | (setq unidata-list (cdr table)))) |
| 135 | 147 | ||
| 136 | ;; Alist of this form: | 148 | ;; Alist of this form: |
| 137 | ;; (PROP INDEX GENERATOR FILENAME) | 149 | ;; (PROP INDEX GENERATOR FILENAME DOCSTRING DESCRIBER VAL-LIST) |
| 138 | ;; PROP: character property | 150 | ;; PROP: character property |
| 139 | ;; INDEX: index to each element of unidata-list for PROP | 151 | ;; INDEX: index to each element of unidata-list for PROP. |
| 152 | ;; It may be a function that generates an alist of character codes | ||
| 153 | ;; vs. the corresponding property values. | ||
| 140 | ;; GENERATOR: function to generate a char-table | 154 | ;; GENERATOR: function to generate a char-table |
| 141 | ;; FILENAME: filename to store the char-table | 155 | ;; FILENAME: filename to store the char-table |
| 156 | ;; DOCSTRING: docstring for the property | ||
| 142 | ;; DESCRIBER: function to call to get a description string of property value | 157 | ;; DESCRIBER: function to call to get a description string of property value |
| 158 | ;; DEFAULT: the default value of the property | ||
| 159 | ;; VAL-LIST: list of specially ordered property values | ||
| 143 | 160 | ||
| 144 | (defconst unidata-prop-alist | 161 | (defconst unidata-prop-alist |
| 145 | '((name | 162 | '((name |
| @@ -152,7 +169,12 @@ Property value is a string.") | |||
| 152 | Property value is one of the following symbols: | 169 | Property value is one of the following symbols: |
| 153 | Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc, Pd, Ps, Pe, Pi, Pf, Po, | 170 | Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc, Pd, Ps, Pe, Pi, Pf, Po, |
| 154 | Sm, Sc, Sk, So, Zs, Zl, Zp, Cc, Cf, Cs, Co, Cn" | 171 | Sm, Sc, Sk, So, Zs, Zl, Zp, Cc, Cf, Cs, Co, Cn" |
| 155 | unidata-describe-general-category) | 172 | unidata-describe-general-category |
| 173 | nil | ||
| 174 | ;; The order of elements must be in sync with unicode_category_t | ||
| 175 | ;; in src/character.h. | ||
| 176 | (Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Pd Ps Pe Pi Pf Po | ||
| 177 | Sm Sc Sk So Zs Zl Zp Cc Cf Cs Co Cn)) | ||
| 156 | (canonical-combining-class | 178 | (canonical-combining-class |
| 157 | 3 unidata-gen-table-integer "uni-combining.el" | 179 | 3 unidata-gen-table-integer "uni-combining.el" |
| 158 | "Unicode canonical combining class. | 180 | "Unicode canonical combining class. |
| @@ -164,7 +186,11 @@ Property value is an integer." | |||
| 164 | Property value is one of the following symbols: | 186 | Property value is one of the following symbols: |
| 165 | L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET, | 187 | L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET, |
| 166 | AN, CS, NSM, BN, B, S, WS, ON" | 188 | AN, CS, NSM, BN, B, S, WS, ON" |
| 167 | unidata-describe-bidi-class) | 189 | unidata-describe-bidi-class |
| 190 | L | ||
| 191 | ;; The order of elements must be in sync with bidi_type_t in | ||
| 192 | ;; src/dispextern.h. | ||
| 193 | (L R EN AN BN B AL LRE LRO RLE RLO PDF ES ET CS NSM S WS ON)) | ||
| 168 | (decomposition | 194 | (decomposition |
| 169 | 5 unidata-gen-table-decomposition "uni-decomposition.el" | 195 | 5 unidata-gen-table-decomposition "uni-decomposition.el" |
| 170 | "Unicode decomposition mapping. | 196 | "Unicode decomposition mapping. |
| @@ -188,7 +214,7 @@ Property value is an integer or a floating point.") | |||
| 188 | (mirrored | 214 | (mirrored |
| 189 | 9 unidata-gen-table-symbol "uni-mirrored.el" | 215 | 9 unidata-gen-table-symbol "uni-mirrored.el" |
| 190 | "Unicode bidi mirrored flag. | 216 | "Unicode bidi mirrored flag. |
| 191 | Property value is a symbol `Y' or `N'.") | 217 | Property value is a symbol `Y' or `N'. See also the property `mirroring'.") |
| 192 | (old-name | 218 | (old-name |
| 193 | 10 unidata-gen-table-name "uni-old-name.el" | 219 | 10 unidata-gen-table-name "uni-old-name.el" |
| 194 | "Unicode old names as published in Unicode 1.0. | 220 | "Unicode old names as published in Unicode 1.0. |
| @@ -211,7 +237,12 @@ Property value is a character." | |||
| 211 | 14 unidata-gen-table-character "uni-titlecase.el" | 237 | 14 unidata-gen-table-character "uni-titlecase.el" |
| 212 | "Unicode simple titlecase mapping. | 238 | "Unicode simple titlecase mapping. |
| 213 | Property value is a character." | 239 | Property value is a character." |
| 214 | string))) | 240 | string) |
| 241 | (mirroring | ||
| 242 | unidata-gen-mirroring-list unidata-gen-table-character "uni-mirrored.el" | ||
| 243 | "Unicode bidi-mirroring characters. | ||
| 244 | Property value is a character that has the corresponding mirroring image, | ||
| 245 | or nil for non-mirrored character."))) | ||
| 215 | 246 | ||
| 216 | ;; Functions to access the above data. | 247 | ;; Functions to access the above data. |
| 217 | (defsubst unidata-prop-index (prop) (nth 1 (assq prop unidata-prop-alist))) | 248 | (defsubst unidata-prop-index (prop) (nth 1 (assq prop unidata-prop-alist))) |
| @@ -219,6 +250,8 @@ Property value is a character." | |||
| 219 | (defsubst unidata-prop-file (prop) (nth 3 (assq prop unidata-prop-alist))) | 250 | (defsubst unidata-prop-file (prop) (nth 3 (assq prop unidata-prop-alist))) |
| 220 | (defsubst unidata-prop-docstring (prop) (nth 4 (assq prop unidata-prop-alist))) | 251 | (defsubst unidata-prop-docstring (prop) (nth 4 (assq prop unidata-prop-alist))) |
| 221 | (defsubst unidata-prop-describer (prop) (nth 5 (assq prop unidata-prop-alist))) | 252 | (defsubst unidata-prop-describer (prop) (nth 5 (assq prop unidata-prop-alist))) |
| 253 | (defsubst unidata-prop-default (prop) (nth 6 (assq prop unidata-prop-alist))) | ||
| 254 | (defsubst unidata-prop-val-list (prop) (nth 7 (assq prop unidata-prop-alist))) | ||
| 222 | 255 | ||
| 223 | 256 | ||
| 224 | ;; SIMPLE TABLE | 257 | ;; SIMPLE TABLE |
| @@ -227,52 +260,34 @@ Property value is a character." | |||
| 227 | ;; values of succeeding character codes are usually different, we use | 260 | ;; values of succeeding character codes are usually different, we use |
| 228 | ;; a char-table described here to store such values. | 261 | ;; a char-table described here to store such values. |
| 229 | ;; | 262 | ;; |
| 230 | ;; If succeeding 128 characters has no property, a char-table has the | 263 | ;; A char-table divides character code space (#x0..#x3FFFFF) into |
| 231 | ;; symbol t for them. Otherwise a char-table has a string of the | 264 | ;; #x8000 blocks (each block contains 128 characters). |
| 232 | ;; following format for them. | 265 | |
| 266 | ;; If all characters of a block have no property, a char-table has the | ||
| 267 | ;; symbol nil for that block. Otherwise a char-table has a string of | ||
| 268 | ;; the following format for it. | ||
| 233 | ;; | 269 | ;; |
| 234 | ;; The first character of the string is FIRST-INDEX. | 270 | ;; The first character of the string is ?\001. |
| 235 | ;; The Nth (N > 0) character of the string is a property value of the | 271 | ;; The second character of the string is FIRST-INDEX. |
| 236 | ;; character (BLOCK-HEAD + FIRST-INDEX + N - 1), where BLOCK-HEAD is | 272 | ;; The Nth (N > 1) character of the string is a property value of the |
| 237 | ;; the first of the characters in the block. | 273 | ;; character (BLOCK-HEAD + FIRST-INDEX + N - 2), where BLOCK-HEAD is |
| 274 | ;; the first character of the block. | ||
| 238 | ;; | 275 | ;; |
| 239 | ;; The 4th extra slot of a char-table is nil. | 276 | ;; This kind of char-table has these extra slots: |
| 240 | 277 | ;; 1st: the property symbol | |
| 241 | (defun unidata-get-character (char val table) | 278 | ;; 2nd: nil |
| 242 | (cond | 279 | ;; 3rd: 0 (corresponding to uniprop_encode_character in chartab.c) |
| 243 | ((characterp val) | 280 | ;; 4th to 5th: nil |
| 244 | val) | ||
| 245 | 281 | ||
| 246 | ((stringp val) | 282 | (defun unidata-gen-table-character (prop &rest ignore) |
| 247 | (let* ((len (length val)) | ||
| 248 | (block-head (lsh (lsh char -7) 7)) | ||
| 249 | (vec (make-vector 128 nil)) | ||
| 250 | (first-index (aref val 0))) | ||
| 251 | (dotimes (i (1- len)) | ||
| 252 | (let ((elt (aref val (1+ i)))) | ||
| 253 | (if (> elt 0) | ||
| 254 | (aset vec (+ first-index i) elt)))) | ||
| 255 | (dotimes (i 128) | ||
| 256 | (aset table (+ block-head i) (aref vec i))) | ||
| 257 | (aref vec (- char block-head)))))) | ||
| 258 | |||
| 259 | (defun unidata-put-character (char val table) | ||
| 260 | (or (characterp val) | ||
| 261 | (not val) | ||
| 262 | (error "Not a character nor nil: %S" val)) | ||
| 263 | (let ((current-val (aref table char))) | ||
| 264 | (unless (eq current-val val) | ||
| 265 | (if (stringp current-val) | ||
| 266 | (funcall (char-table-extra-slot table 1) char current-val table)) | ||
| 267 | (aset table char val)))) | ||
| 268 | |||
| 269 | (defun unidata-gen-table-character (prop) | ||
| 270 | (let ((table (make-char-table 'char-code-property-table)) | 283 | (let ((table (make-char-table 'char-code-property-table)) |
| 271 | (prop-idx (unidata-prop-index prop)) | 284 | (prop-idx (unidata-prop-index prop)) |
| 272 | (vec (make-vector 128 0)) | 285 | (vec (make-vector 128 0)) |
| 273 | (tail unidata-list) | 286 | (tail unidata-list) |
| 274 | elt range val idx slot) | 287 | elt range val idx slot) |
| 275 | (set-char-table-range table (cons 0 (max-char)) t) | 288 | (if (functionp prop-idx) |
| 289 | (setq tail (funcall prop-idx) | ||
| 290 | prop-idx 1)) | ||
| 276 | (while tail | 291 | (while tail |
| 277 | (setq elt (car tail) tail (cdr tail)) | 292 | (setq elt (car tail) tail (cdr tail)) |
| 278 | (setq range (car elt) | 293 | (setq range (car elt) |
| @@ -301,7 +316,7 @@ Property value is a character." | |||
| 301 | (setq first-index last-index))) | 316 | (setq first-index last-index))) |
| 302 | (setq tail (cdr tail))) | 317 | (setq tail (cdr tail))) |
| 303 | (when first-index | 318 | (when first-index |
| 304 | (let ((str (string first-index)) | 319 | (let ((str (string 1 first-index)) |
| 305 | c) | 320 | c) |
| 306 | (while (<= first-index last-index) | 321 | (while (<= first-index last-index) |
| 307 | (setq str (format "%s%c" str (or (aref vec first-index) 0)) | 322 | (setq str (format "%s%c" str (or (aref vec first-index) 0)) |
| @@ -309,184 +324,78 @@ Property value is a character." | |||
| 309 | (set-char-table-range table (cons start limit) str)))))) | 324 | (set-char-table-range table (cons start limit) str)))))) |
| 310 | 325 | ||
| 311 | (set-char-table-extra-slot table 0 prop) | 326 | (set-char-table-extra-slot table 0 prop) |
| 312 | (byte-compile 'unidata-get-character) | 327 | (set-char-table-extra-slot table 2 0) |
| 313 | (byte-compile 'unidata-put-character) | ||
| 314 | (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-character)) | ||
| 315 | (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-character)) | ||
| 316 | |||
| 317 | table)) | 328 | table)) |
| 318 | 329 | ||
| 319 | 330 | ||
| 320 | 331 | ||
| 321 | ;; RUN-LENGTH TABLE | 332 | ;; RUN-LENGTH TABLE |
| 322 | ;; | 333 | ;; |
| 323 | ;; If the type of character property value is symbol, integer, | 334 | ;; If many characters of successive character codes have the same |
| 324 | ;; boolean, or character, we use a char-table described here to store | 335 | ;; property value, we use a char-table described here to store the |
| 325 | ;; the values. | 336 | ;; values. |
| 326 | ;; | 337 | ;; |
| 327 | ;; The 4th extra slot is a vector of property values (VAL-TABLE), and | 338 | ;; At first, instead of a value itself, we store an index number to |
| 328 | ;; values for succeeding 128 characters are encoded into this | 339 | ;; the VAL-TABLE (5th extra slot) in the table. We call that index |
| 329 | ;; character sequence: | 340 | ;; number as VAL-CODE here after. |
| 341 | ;; | ||
| 342 | ;; A char-table divides character code space (#x0..#x3FFFFF) into | ||
| 343 | ;; #x8000 blocks (each block contains 128 characters). | ||
| 344 | ;; | ||
| 345 | ;; If all characters of a block have the same value, a char-table has | ||
| 346 | ;; VAL-CODE for that block. Otherwise a char-table has a string of | ||
| 347 | ;; the following format for that block. | ||
| 348 | ;; | ||
| 349 | ;; The first character of the string is ?\002. | ||
| 350 | ;; The following characters has this form: | ||
| 330 | ;; ( VAL-CODE RUN-LENGTH ? ) + | 351 | ;; ( VAL-CODE RUN-LENGTH ? ) + |
| 331 | ;; where: | 352 | ;; where: |
| 332 | ;; VAL-CODE (0..127): | 353 | ;; VAL-CODE (0..127): index into VAL-TABLE. |
| 333 | ;; (VAL-CODE - 1) is an index into VAL-TABLE. | ||
| 334 | ;; The value 0 means no-value. | ||
| 335 | ;; RUN-LENGTH (130..255): | 354 | ;; RUN-LENGTH (130..255): |
| 336 | ;; (RUN-LENGTH - 128) specifies how many characters have the same | 355 | ;; (RUN-LENGTH - 128) specifies how many characters have the same |
| 337 | ;; value. If omitted, it means 1. | 356 | ;; value. If omitted, it means 1. |
| 338 | 357 | ;; | |
| 339 | 358 | ;; This kind of char-table has these extra slots: | |
| 340 | ;; Return a symbol-type character property value of CHAR. VAL is the | 359 | ;; 1st: the property symbol |
| 341 | ;; current value of (aref TABLE CHAR). | 360 | ;; 2nd: 0 (corresponding to uniprop_decode_value in chartab.c) |
| 342 | 361 | ;; 3rd: 1..3 (corresponding to uniprop_encode_xxx in chartab.c) | |
| 343 | (defun unidata-get-symbol (char val table) | 362 | ;; 4th: function or nil |
| 344 | (let ((val-table (char-table-extra-slot table 4))) | 363 | ;; 5th: VAL-TABLE |
| 345 | (cond ((symbolp val) | ||
| 346 | val) | ||
| 347 | ((stringp val) | ||
| 348 | (let ((first-char (lsh (lsh char -7) 7)) | ||
| 349 | (str val) | ||
| 350 | (len (length val)) | ||
| 351 | (idx 0) | ||
| 352 | this-val count) | ||
| 353 | (set-char-table-range table (cons first-char (+ first-char 127)) | ||
| 354 | nil) | ||
| 355 | (while (< idx len) | ||
| 356 | (setq val (aref str idx) idx (1+ idx) | ||
| 357 | count (if (< idx len) (aref str idx) 1)) | ||
| 358 | (setq val (and (> val 0) (aref val-table (1- val))) | ||
| 359 | count (if (< count 128) | ||
| 360 | 1 | ||
| 361 | (prog1 (- count 128) (setq idx (1+ idx))))) | ||
| 362 | (dotimes (i count) | ||
| 363 | (if val | ||
| 364 | (aset table first-char val)) | ||
| 365 | (if (= first-char char) | ||
| 366 | (setq this-val val)) | ||
| 367 | (setq first-char (1+ first-char)))) | ||
| 368 | this-val)) | ||
| 369 | ((> val 0) | ||
| 370 | (aref val-table (1- val)))))) | ||
| 371 | |||
| 372 | ;; Return a integer-type character property value of CHAR. VAL is the | ||
| 373 | ;; current value of (aref TABLE CHAR). | ||
| 374 | |||
| 375 | (defun unidata-get-integer (char val table) | ||
| 376 | (let ((val-table (char-table-extra-slot table 4))) | ||
| 377 | (cond ((integerp val) | ||
| 378 | val) | ||
| 379 | ((stringp val) | ||
| 380 | (let ((first-char (lsh (lsh char -7) 7)) | ||
| 381 | (str val) | ||
| 382 | (len (length val)) | ||
| 383 | (idx 0) | ||
| 384 | this-val count) | ||
| 385 | (while (< idx len) | ||
| 386 | (setq val (aref str idx) idx (1+ idx) | ||
| 387 | count (if (< idx len) (aref str idx) 1)) | ||
| 388 | (setq val (and (> val 0) (aref val-table (1- val))) | ||
| 389 | count (if (< count 128) | ||
| 390 | 1 | ||
| 391 | (prog1 (- count 128) (setq idx (1+ idx))))) | ||
| 392 | (dotimes (i count) | ||
| 393 | (aset table first-char val) | ||
| 394 | (if (= first-char char) | ||
| 395 | (setq this-val val)) | ||
| 396 | (setq first-char (1+ first-char)))) | ||
| 397 | this-val))))) | ||
| 398 | |||
| 399 | ;; Return a numeric-type (integer or float) character property value | ||
| 400 | ;; of CHAR. VAL is the current value of (aref TABLE CHAR). | ||
| 401 | |||
| 402 | (defun unidata-get-numeric (char val table) | ||
| 403 | (cond | ||
| 404 | ((numberp val) | ||
| 405 | val) | ||
| 406 | ((stringp val) | ||
| 407 | (let ((val-table (char-table-extra-slot table 4)) | ||
| 408 | (first-char (lsh (lsh char -7) 7)) | ||
| 409 | (str val) | ||
| 410 | (len (length val)) | ||
| 411 | (idx 0) | ||
| 412 | this-val count) | ||
| 413 | (while (< idx len) | ||
| 414 | (setq val (aref str idx) idx (1+ idx) | ||
| 415 | count (if (< idx len) (aref str idx) 1)) | ||
| 416 | (setq val (and (> val 0) (aref val-table (1- val))) | ||
| 417 | count (if (< count 128) | ||
| 418 | 1 | ||
| 419 | (prog1 (- count 128) (setq idx (1+ idx))))) | ||
| 420 | (dotimes (i count) | ||
| 421 | (aset table first-char val) | ||
| 422 | (if (= first-char char) | ||
| 423 | (setq this-val val)) | ||
| 424 | (setq first-char (1+ first-char)))) | ||
| 425 | this-val)))) | ||
| 426 | |||
| 427 | ;; Store VAL (symbol) as a character property value of CHAR in TABLE. | ||
| 428 | |||
| 429 | (defun unidata-put-symbol (char val table) | ||
| 430 | (or (symbolp val) | ||
| 431 | (error "Not a symbol: %S" val)) | ||
| 432 | (let ((current-val (aref table char))) | ||
| 433 | (unless (eq current-val val) | ||
| 434 | (if (stringp current-val) | ||
| 435 | (funcall (char-table-extra-slot table 1) char current-val table)) | ||
| 436 | (aset table char val)))) | ||
| 437 | |||
| 438 | ;; Store VAL (integer) as a character property value of CHAR in TABLE. | ||
| 439 | |||
| 440 | (defun unidata-put-integer (char val table) | ||
| 441 | (or (integerp val) | ||
| 442 | (not val) | ||
| 443 | (error "Not an integer nor nil: %S" val)) | ||
| 444 | (let ((current-val (aref table char))) | ||
| 445 | (unless (eq current-val val) | ||
| 446 | (if (stringp current-val) | ||
| 447 | (funcall (char-table-extra-slot table 1) char current-val table)) | ||
| 448 | (aset table char val)))) | ||
| 449 | |||
| 450 | ;; Store VAL (integer or float) as a character property value of CHAR | ||
| 451 | ;; in TABLE. | ||
| 452 | |||
| 453 | (defun unidata-put-numeric (char val table) | ||
| 454 | (or (numberp val) | ||
| 455 | (not val) | ||
| 456 | (error "Not a number nor nil: %S" val)) | ||
| 457 | (let ((current-val (aref table char))) | ||
| 458 | (unless (equal current-val val) | ||
| 459 | (if (stringp current-val) | ||
| 460 | (funcall (char-table-extra-slot table 1) char current-val table)) | ||
| 461 | (aset table char val)))) | ||
| 462 | 364 | ||
| 463 | ;; Encode the character property value VAL into an integer value by | 365 | ;; Encode the character property value VAL into an integer value by |
| 464 | ;; VAL-LIST. By side effect, VAL-LIST is modified. | 366 | ;; VAL-LIST. By side effect, VAL-LIST is modified. |
| 465 | ;; VAL-LIST has this form: | 367 | ;; VAL-LIST has this form: |
| 466 | ;; (t (VAL1 . VAL-CODE1) (VAL2 . VAL-CODE2) ...) | 368 | ;; ((nil . 0) (VAL1 . 1) (VAL2 . 2) ...) |
| 467 | ;; If VAL is one of VALn, just return VAL-CODEn. Otherwise, | 369 | ;; If VAL is one of VALn, just return n. |
| 468 | ;; VAL-LIST is modified to this: | 370 | ;; Otherwise, VAL-LIST is modified to this: |
| 469 | ;; (t (VAL . (1+ VAL-CODE1)) (VAL1 . VAL-CODE1) (VAL2 . VAL-CODE2) ...) | 371 | ;; ((nil . 0) (VAL1 . 1) (VAL2 . 2) ... (VAL . n+1)) |
| 470 | 372 | ||
| 471 | (defun unidata-encode-val (val-list val) | 373 | (defun unidata-encode-val (val-list val) |
| 472 | (let ((slot (assoc val val-list)) | 374 | (let ((slot (assoc val val-list)) |
| 473 | val-code) | 375 | val-code) |
| 474 | (if slot | 376 | (if slot |
| 475 | (cdr slot) | 377 | (cdr slot) |
| 476 | (setq val-code (if (cdr val-list) (1+ (cdr (nth 1 val-list))) 1)) | 378 | (setq val-code (length val-list)) |
| 477 | (setcdr val-list (cons (cons val val-code) (cdr val-list))) | 379 | (nconc val-list (list (cons val val-code))) |
| 478 | val-code))) | 380 | val-code))) |
| 479 | 381 | ||
| 480 | ;; Generate a char-table for the character property PROP. | 382 | ;; Generate a char-table for the character property PROP. |
| 481 | 383 | ||
| 482 | (defun unidata-gen-table (prop val-func default-value) | 384 | (defun unidata-gen-table (prop val-func default-value val-list) |
| 483 | (let ((table (make-char-table 'char-code-property-table)) | 385 | (let ((table (make-char-table 'char-code-property-table)) |
| 484 | (prop-idx (unidata-prop-index prop)) | 386 | (prop-idx (unidata-prop-index prop)) |
| 485 | (val-list (list t)) | ||
| 486 | (vec (make-vector 128 0)) | 387 | (vec (make-vector 128 0)) |
| 487 | tail elt range val val-code idx slot | 388 | tail elt range val val-code idx slot |
| 488 | prev-range-data) | 389 | prev-range-data) |
| 489 | (set-char-table-range table (cons 0 (max-char)) default-value) | 390 | (setq val-list (cons nil (copy-sequence val-list))) |
| 391 | (setq tail val-list val-code 0) | ||
| 392 | ;; Convert (nil A B ...) to ((nil . 0) (A . 1) (B . 2) ...) | ||
| 393 | (while tail | ||
| 394 | (setcar tail (cons (car tail) val-code)) | ||
| 395 | (setq tail (cdr tail) val-code (1+ val-code))) | ||
| 396 | (setq default-value (unidata-encode-val val-list default-value)) | ||
| 397 | (set-char-table-range table t default-value) | ||
| 398 | (set-char-table-range table nil default-value) | ||
| 490 | (setq tail unidata-list) | 399 | (setq tail unidata-list) |
| 491 | (while tail | 400 | (while tail |
| 492 | (setq elt (car tail) tail (cdr tail)) | 401 | (setq elt (car tail) tail (cdr tail)) |
| @@ -495,7 +404,7 @@ Property value is a character." | |||
| 495 | (setq val-code (if val (unidata-encode-val val-list val))) | 404 | (setq val-code (if val (unidata-encode-val val-list val))) |
| 496 | (if (consp range) | 405 | (if (consp range) |
| 497 | (when val-code | 406 | (when val-code |
| 498 | (set-char-table-range table range val) | 407 | (set-char-table-range table range val-code) |
| 499 | (let ((from (car range)) (to (cdr range))) | 408 | (let ((from (car range)) (to (cdr range))) |
| 500 | ;; If RANGE doesn't end at the char-table boundary (each | 409 | ;; If RANGE doesn't end at the char-table boundary (each |
| 501 | ;; 128 characters), we may have to carry over the data | 410 | ;; 128 characters), we may have to carry over the data |
| @@ -534,7 +443,7 @@ Property value is a character." | |||
| 534 | (if val-code | 443 | (if val-code |
| 535 | (aset vec (- range start) val-code)) | 444 | (aset vec (- range start) val-code)) |
| 536 | (setq tail (cdr tail))) | 445 | (setq tail (cdr tail))) |
| 537 | (setq str "" val-code -1 count 0) | 446 | (setq str "\002" val-code -1 count 0) |
| 538 | (mapc #'(lambda (x) | 447 | (mapc #'(lambda (x) |
| 539 | (if (= val-code x) | 448 | (if (= val-code x) |
| 540 | (setq count (1+ count)) | 449 | (setq count (1+ count)) |
| @@ -549,7 +458,7 @@ Property value is a character." | |||
| 549 | vec) | 458 | vec) |
| 550 | (if (= count 128) | 459 | (if (= count 128) |
| 551 | (if val | 460 | (if val |
| 552 | (set-char-table-range table (cons start limit) val)) | 461 | (set-char-table-range table (cons start limit) val-code)) |
| 553 | (if (= val-code 0) | 462 | (if (= val-code 0) |
| 554 | (set-char-table-range table (cons start limit) str) | 463 | (set-char-table-range table (cons start limit) str) |
| 555 | (if (> count 2) | 464 | (if (> count 2) |
| @@ -559,34 +468,29 @@ Property value is a character." | |||
| 559 | (setq str (concat str (string val-code))))) | 468 | (setq str (concat str (string val-code))))) |
| 560 | (set-char-table-range table (cons start limit) str)))))) | 469 | (set-char-table-range table (cons start limit) str)))))) |
| 561 | 470 | ||
| 562 | (setq val-list (nreverse (cdr val-list))) | ||
| 563 | (set-char-table-extra-slot table 0 prop) | 471 | (set-char-table-extra-slot table 0 prop) |
| 564 | (set-char-table-extra-slot table 4 (vconcat (mapcar 'car val-list))) | 472 | (set-char-table-extra-slot table 4 (vconcat (mapcar 'car val-list))) |
| 565 | table)) | 473 | table)) |
| 566 | 474 | ||
| 567 | (defun unidata-gen-table-symbol (prop) | 475 | (defun unidata-gen-table-symbol (prop default-value val-list) |
| 568 | (let ((table (unidata-gen-table prop | 476 | (let ((table (unidata-gen-table prop |
| 569 | #'(lambda (x) (and (> (length x) 0) | 477 | #'(lambda (x) (and (> (length x) 0) |
| 570 | (intern x))) | 478 | (intern x))) |
| 571 | 0))) | 479 | default-value val-list))) |
| 572 | (byte-compile 'unidata-get-symbol) | 480 | (set-char-table-extra-slot table 1 0) |
| 573 | (byte-compile 'unidata-put-symbol) | 481 | (set-char-table-extra-slot table 2 1) |
| 574 | (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-symbol)) | ||
| 575 | (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-symbol)) | ||
| 576 | table)) | 482 | table)) |
| 577 | 483 | ||
| 578 | (defun unidata-gen-table-integer (prop) | 484 | (defun unidata-gen-table-integer (prop default-value val-list) |
| 579 | (let ((table (unidata-gen-table prop | 485 | (let ((table (unidata-gen-table prop |
| 580 | #'(lambda (x) (and (> (length x) 0) | 486 | #'(lambda (x) (and (> (length x) 0) |
| 581 | (string-to-number x))) | 487 | (string-to-number x))) |
| 582 | t))) | 488 | default-value val-list))) |
| 583 | (byte-compile 'unidata-get-integer) | 489 | (set-char-table-extra-slot table 1 0) |
| 584 | (byte-compile 'unidata-put-integer) | 490 | (set-char-table-extra-slot table 2 1) |
| 585 | (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-integer)) | ||
| 586 | (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-integer)) | ||
| 587 | table)) | 491 | table)) |
| 588 | 492 | ||
| 589 | (defun unidata-gen-table-numeric (prop) | 493 | (defun unidata-gen-table-numeric (prop default-value val-list) |
| 590 | (let ((table (unidata-gen-table prop | 494 | (let ((table (unidata-gen-table prop |
| 591 | #'(lambda (x) | 495 | #'(lambda (x) |
| 592 | (if (string-match "/" x) | 496 | (if (string-match "/" x) |
| @@ -595,11 +499,9 @@ Property value is a character." | |||
| 595 | (substring x (match-end 0)))) | 499 | (substring x (match-end 0)))) |
| 596 | (if (> (length x) 0) | 500 | (if (> (length x) 0) |
| 597 | (string-to-number x)))) | 501 | (string-to-number x)))) |
| 598 | t))) | 502 | default-value val-list))) |
| 599 | (byte-compile 'unidata-get-numeric) | 503 | (set-char-table-extra-slot table 1 0) |
| 600 | (byte-compile 'unidata-put-numeric) | 504 | (set-char-table-extra-slot table 2 2) |
| 601 | (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-numeric)) | ||
| 602 | (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-numeric)) | ||
| 603 | table)) | 505 | table)) |
| 604 | 506 | ||
| 605 | 507 | ||
| @@ -892,7 +794,6 @@ Property value is a character." | |||
| 892 | word-table | 794 | word-table |
| 893 | block-list block-word-table block-end | 795 | block-list block-word-table block-end |
| 894 | tail elt range val idx slot) | 796 | tail elt range val idx slot) |
| 895 | (set-char-table-range table (cons 0 (max-char)) 0) | ||
| 896 | (setq tail unidata-list) | 797 | (setq tail unidata-list) |
| 897 | (setq block-end -1) | 798 | (setq block-end -1) |
| 898 | (while tail | 799 | (while tail |
| @@ -1025,7 +926,7 @@ Property value is a character." | |||
| 1025 | idx (1+ i))))) | 926 | idx (1+ i))))) |
| 1026 | (nreverse (cons (intern (substring str idx)) l)))))) | 927 | (nreverse (cons (intern (substring str idx)) l)))))) |
| 1027 | 928 | ||
| 1028 | (defun unidata-gen-table-name (prop) | 929 | (defun unidata-gen-table-name (prop &rest ignore) |
| 1029 | (let* ((table (unidata-gen-table-word-list prop 'unidata-split-name)) | 930 | (let* ((table (unidata-gen-table-word-list prop 'unidata-split-name)) |
| 1030 | (word-tables (char-table-extra-slot table 4))) | 931 | (word-tables (char-table-extra-slot table 4))) |
| 1031 | (byte-compile 'unidata-get-name) | 932 | (byte-compile 'unidata-get-name) |
| @@ -1064,7 +965,7 @@ Property value is a character." | |||
| 1064 | (nreverse l))))) | 965 | (nreverse l))))) |
| 1065 | 966 | ||
| 1066 | 967 | ||
| 1067 | (defun unidata-gen-table-decomposition (prop) | 968 | (defun unidata-gen-table-decomposition (prop &rest ignore) |
| 1068 | (let* ((table (unidata-gen-table-word-list prop 'unidata-split-decomposition)) | 969 | (let* ((table (unidata-gen-table-word-list prop 'unidata-split-decomposition)) |
| 1069 | (word-tables (char-table-extra-slot table 4))) | 970 | (word-tables (char-table-extra-slot table 4))) |
| 1070 | (byte-compile 'unidata-get-decomposition) | 971 | (byte-compile 'unidata-get-decomposition) |
| @@ -1080,7 +981,8 @@ Property value is a character." | |||
| 1080 | 981 | ||
| 1081 | (defun unidata-describe-general-category (val) | 982 | (defun unidata-describe-general-category (val) |
| 1082 | (cdr (assq val | 983 | (cdr (assq val |
| 1083 | '((Lu . "Letter, Uppercase") | 984 | '((nil . "Uknown") |
| 985 | (Lu . "Letter, Uppercase") | ||
| 1084 | (Ll . "Letter, Lowercase") | 986 | (Ll . "Letter, Lowercase") |
| 1085 | (Lt . "Letter, Titlecase") | 987 | (Lt . "Letter, Titlecase") |
| 1086 | (Lm . "Letter, Modifier") | 988 | (Lm . "Letter, Modifier") |
| @@ -1171,6 +1073,19 @@ Property value is a character." | |||
| 1171 | (string ?')))) | 1073 | (string ?')))) |
| 1172 | val " ")) | 1074 | val " ")) |
| 1173 | 1075 | ||
| 1076 | (defun unidata-gen-mirroring-list () | ||
| 1077 | (let ((head (list nil)) | ||
| 1078 | tail) | ||
| 1079 | (with-temp-buffer | ||
| 1080 | (insert-file-contents (expand-file-name "BidiMirroring.txt" unidata-dir)) | ||
| 1081 | (goto-char (point-min)) | ||
| 1082 | (setq tail head) | ||
| 1083 | (while (re-search-forward "^\\([0-9A-F]+\\);\\s +\\([0-9A-F]+\\)" nil t) | ||
| 1084 | (let ((char (string-to-number (match-string 1) 16)) | ||
| 1085 | (mirror (match-string 2))) | ||
| 1086 | (setq tail (setcdr tail (list (list char mirror))))))) | ||
| 1087 | (cdr head))) | ||
| 1088 | |||
| 1174 | ;; Verify if we can retrieve correct values from the generated | 1089 | ;; Verify if we can retrieve correct values from the generated |
| 1175 | ;; char-tables. | 1090 | ;; char-tables. |
| 1176 | 1091 | ||
| @@ -1212,13 +1127,21 @@ Property value is a character." | |||
| 1212 | ;; The entry function. It generates files described in the header | 1127 | ;; The entry function. It generates files described in the header |
| 1213 | ;; comment of this file. | 1128 | ;; comment of this file. |
| 1214 | 1129 | ||
| 1215 | (defun unidata-gen-files (&optional unidata-text-file) | 1130 | (defun unidata-gen-files (&optional data-dir unidata-text-file) |
| 1216 | (or unidata-text-file | 1131 | (or data-dir |
| 1217 | (setq unidata-text-file (car command-line-args-left) | 1132 | (setq data-dir (car command-line-args-left) |
| 1133 | command-line-args-left (cdr command-line-args-left) | ||
| 1134 | unidata-text-file (car command-line-args-left) | ||
| 1218 | command-line-args-left (cdr command-line-args-left))) | 1135 | command-line-args-left (cdr command-line-args-left))) |
| 1219 | (unidata-setup-list unidata-text-file) | ||
| 1220 | (let ((coding-system-for-write 'utf-8-unix) | 1136 | (let ((coding-system-for-write 'utf-8-unix) |
| 1221 | (charprop-file "charprop.el")) | 1137 | (charprop-file "charprop.el") |
| 1138 | (unidata-dir data-dir)) | ||
| 1139 | (dolist (elt unidata-prop-alist) | ||
| 1140 | (let* ((prop (car elt)) | ||
| 1141 | (file (unidata-prop-file prop))) | ||
| 1142 | (if (file-exists-p file) | ||
| 1143 | (delete-file file)))) | ||
| 1144 | (unidata-setup-list unidata-text-file) | ||
| 1222 | (with-temp-file charprop-file | 1145 | (with-temp-file charprop-file |
| 1223 | (insert ";; Automatically generated by unidata-gen.el.\n") | 1146 | (insert ";; Automatically generated by unidata-gen.el.\n") |
| 1224 | (dolist (elt unidata-prop-alist) | 1147 | (dolist (elt unidata-prop-alist) |
| @@ -1227,31 +1150,41 @@ Property value is a character." | |||
| 1227 | (file (unidata-prop-file prop)) | 1150 | (file (unidata-prop-file prop)) |
| 1228 | (docstring (unidata-prop-docstring prop)) | 1151 | (docstring (unidata-prop-docstring prop)) |
| 1229 | (describer (unidata-prop-describer prop)) | 1152 | (describer (unidata-prop-describer prop)) |
| 1153 | (default-value (unidata-prop-default prop)) | ||
| 1154 | (val-list (unidata-prop-val-list prop)) | ||
| 1230 | table) | 1155 | table) |
| 1231 | ;; Filename in this comment line is extracted by sed in | 1156 | ;; Filename in this comment line is extracted by sed in |
| 1232 | ;; Makefile. | 1157 | ;; Makefile. |
| 1233 | (insert (format ";; FILE: %s\n" file)) | 1158 | (insert (format ";; FILE: %s\n" file)) |
| 1234 | (insert (format "(define-char-code-property '%S %S\n %S)\n" | 1159 | (insert (format "(define-char-code-property '%S %S\n %S)\n" |
| 1235 | prop file docstring)) | 1160 | prop file docstring)) |
| 1236 | (with-temp-file file | 1161 | (with-temp-buffer |
| 1237 | (message "Generating %s..." file) | 1162 | (message "Generating %s..." file) |
| 1238 | (setq table (funcall generator prop)) | 1163 | (when (file-exists-p file) |
| 1164 | (insert-file-contents file) | ||
| 1165 | (goto-char (point-max)) | ||
| 1166 | (search-backward ";; Local Variables:")) | ||
| 1167 | (setq table (funcall generator prop default-value val-list)) | ||
| 1239 | (when describer | 1168 | (when describer |
| 1240 | (unless (subrp (symbol-function describer)) | 1169 | (unless (subrp (symbol-function describer)) |
| 1241 | (byte-compile describer) | 1170 | (byte-compile describer) |
| 1242 | (setq describer (symbol-function describer))) | 1171 | (setq describer (symbol-function describer))) |
| 1243 | (set-char-table-extra-slot table 3 describer)) | 1172 | (set-char-table-extra-slot table 3 describer)) |
| 1244 | (insert ";; Copyright (C) 1991-2009 Unicode, Inc. | 1173 | (if (bobp) |
| 1245 | ;; This file was generated from the Unicode data file at | 1174 | (insert ";; Copyright (C) 1991-2009 Unicode, Inc. |
| 1246 | ;; http://www.unicode.org/Public/UNIDATA/UnicodeData.txt. | 1175 | ;; This file was generated from the Unicode data files at |
| 1247 | ;; See lisp/international/README for the copyright and permission notice.\n" | 1176 | ;; http://www.unicode.org/Public/UNIDATA/. |
| 1248 | (format "(define-char-code-property '%S %S %S)\n" | 1177 | ;; See lisp/international/README for the copyright and permission notice.\n")) |
| 1249 | prop table docstring) | 1178 | (insert (format "(define-char-code-property '%S %S %S)\n" |
| 1250 | ";; Local Variables:\n" | 1179 | prop table docstring)) |
| 1251 | ";; coding: utf-8\n" | 1180 | (if (eobp) |
| 1252 | ";; no-byte-compile: t\n" | 1181 | (insert ";; Local Variables:\n" |
| 1253 | ";; End:\n\n" | 1182 | ";; coding: utf-8\n" |
| 1254 | (format ";; %s ends here\n" file))))) | 1183 | ";; no-byte-compile: t\n" |
| 1184 | ";; End:\n\n" | ||
| 1185 | (format ";; %s ends here\n" file))) | ||
| 1186 | (write-file file) | ||
| 1187 | (message "Generating %s...done" file)))) | ||
| 1255 | (message "Writing %s..." charprop-file) | 1188 | (message "Writing %s..." charprop-file) |
| 1256 | (insert ";; Local Variables:\n" | 1189 | (insert ";; Local Variables:\n" |
| 1257 | ";; coding: utf-8\n" | 1190 | ";; coding: utf-8\n" |
diff --git a/lisp/ChangeLog b/lisp/ChangeLog index 89b33dc7a62..b85a1680286 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog | |||
| @@ -1,3 +1,39 @@ | |||
| 1 | 2011-07-06 Kenichi Handa <handa@m17n.org> | ||
| 2 | |||
| 3 | * international/characters.el (build-unicode-category-table): | ||
| 4 | Delete it. | ||
| 5 | (unicode-category-table): Set it by | ||
| 6 | unicode-prroperty-table-internal. | ||
| 7 | |||
| 8 | * international/mule-cmds.el (char-code-property-alist): Moved to | ||
| 9 | to src/chartab.c. | ||
| 10 | (get-char-code-property): Call unicode-property-table-internal to | ||
| 11 | load a file. Call get-unicode-property-internal where necessary. | ||
| 12 | (put-char-code-property): Call unicode-property-table-internal to | ||
| 13 | load a file. Call put-unicode-property-internal where necessary. | ||
| 14 | put-unicode-property-internal where necessary. | ||
| 15 | (char-code-property-description): Call | ||
| 16 | unicode-property-table-internal to load a file. | ||
| 17 | |||
| 18 | * international/charprop.el: | ||
| 19 | * international/uni-bidi.el: | ||
| 20 | * international/uni-category.el: | ||
| 21 | * international/uni-combining.el: | ||
| 22 | * international/uni-comment.el: | ||
| 23 | * international/uni-decimal.el: | ||
| 24 | * international/uni-decomposition.el: | ||
| 25 | * international/uni-digit.el: | ||
| 26 | * international/uni-lowercase.el: | ||
| 27 | * international/uni-mirrored.el: | ||
| 28 | * international/uni-name.el: | ||
| 29 | * international/uni-numeric.el: | ||
| 30 | * international/uni-old-name.el: | ||
| 31 | * international/uni-titlecase.el: | ||
| 32 | * international/uni-uppercase.el: Regenerate. | ||
| 33 | |||
| 34 | * loadup.el: Load international/charprop.el before | ||
| 35 | international/characters. | ||
| 36 | |||
| 1 | 2011-06-22 Richard Stallman <rms@gnu.org> | 37 | 2011-06-22 Richard Stallman <rms@gnu.org> |
| 2 | 38 | ||
| 3 | * mail/sendmail.el (mail-bury): If Rmail is in use, return nicely | 39 | * mail/sendmail.el (mail-bury): If Rmail is in use, return nicely |
diff --git a/lisp/international/characters.el b/lisp/international/characters.el index 455cbe697d6..a9657c17b9f 100644 --- a/lisp/international/characters.el +++ b/lisp/international/characters.el | |||
| @@ -1206,22 +1206,8 @@ Setup char-width-table appropriate for non-CJK language environment." | |||
| 1206 | 1206 | ||
| 1207 | ;;; Setting unicode-category-table. | 1207 | ;;; Setting unicode-category-table. |
| 1208 | 1208 | ||
| 1209 | ;; This macro is to build unicode-category-table at compile time so | 1209 | (setq unicode-category-table |
| 1210 | ;; that C code can access the table efficiently. | 1210 | (unicode-property-table-internal 'general-category)) |
| 1211 | (defmacro build-unicode-category-table () | ||
| 1212 | (let ((table (make-char-table 'unicode-category-table nil))) | ||
| 1213 | (dotimes (i #x110000) | ||
| 1214 | (if (or (< i #xD800) | ||
| 1215 | (and (>= i #xF900) (< i #x30000)) | ||
| 1216 | (and (>= i #xE0000) (< i #xE0200))) | ||
| 1217 | (aset table i (get-char-code-property i 'general-category)))) | ||
| 1218 | (set-char-table-range table '(#xE000 . #xF8FF) 'Co) | ||
| 1219 | (set-char-table-range table '(#xF0000 . #xFFFFD) 'Co) | ||
| 1220 | (set-char-table-range table '(#x100000 . #x10FFFD) 'Co) | ||
| 1221 | (optimize-char-table table 'eq) | ||
| 1222 | table)) | ||
| 1223 | |||
| 1224 | (setq unicode-category-table (build-unicode-category-table)) | ||
| 1225 | (map-char-table #'(lambda (key val) | 1211 | (map-char-table #'(lambda (key val) |
| 1226 | (if (and val | 1212 | (if (and val |
| 1227 | (or (and (/= (aref (symbol-name val) 0) ?M) | 1213 | (or (and (/= (aref (symbol-name val) 0) ?M) |
diff --git a/lisp/international/charprop.el b/lisp/international/charprop.el index 5c3efcc9d07..919666010b1 100644 --- a/lisp/international/charprop.el +++ b/lisp/international/charprop.el | |||
| @@ -1,8 +1,4 @@ | |||
| 1 | ;; Copyright (C) 1991-2010 Unicode, Inc. | 1 | ;; Automatically generated by unidata-gen.el. |
| 2 | ;; This file was generated from the Unicode data file at | ||
| 3 | ;; http://www.unicode.org/Public/UNIDATA/UnicodeData.txt. | ||
| 4 | ;; See lisp/international/README for the copyright and permission notice. | ||
| 5 | |||
| 6 | ;; FILE: uni-name.el | 2 | ;; FILE: uni-name.el |
| 7 | (define-char-code-property 'name "uni-name.el" | 3 | (define-char-code-property 'name "uni-name.el" |
| 8 | "Unicode character name. | 4 | "Unicode character name. |
| @@ -45,7 +41,7 @@ Property value is an integer or a floating point.") | |||
| 45 | ;; FILE: uni-mirrored.el | 41 | ;; FILE: uni-mirrored.el |
| 46 | (define-char-code-property 'mirrored "uni-mirrored.el" | 42 | (define-char-code-property 'mirrored "uni-mirrored.el" |
| 47 | "Unicode bidi mirrored flag. | 43 | "Unicode bidi mirrored flag. |
| 48 | Property value is a symbol `Y' or `N'.") | 44 | Property value is a symbol `Y' or `N'. See also the property `mirroring'.") |
| 49 | ;; FILE: uni-old-name.el | 45 | ;; FILE: uni-old-name.el |
| 50 | (define-char-code-property 'old-name "uni-old-name.el" | 46 | (define-char-code-property 'old-name "uni-old-name.el" |
| 51 | "Unicode old names as published in Unicode 1.0. | 47 | "Unicode old names as published in Unicode 1.0. |
| @@ -66,6 +62,11 @@ Property value is a character.") | |||
| 66 | (define-char-code-property 'titlecase "uni-titlecase.el" | 62 | (define-char-code-property 'titlecase "uni-titlecase.el" |
| 67 | "Unicode simple titlecase mapping. | 63 | "Unicode simple titlecase mapping. |
| 68 | Property value is a character.") | 64 | Property value is a character.") |
| 65 | ;; FILE: uni-mirrored.el | ||
| 66 | (define-char-code-property 'mirroring "uni-mirrored.el" | ||
| 67 | "Unicode bidi-mirroring characters. | ||
| 68 | Property value is a character that has the corresponding mirroring image, | ||
| 69 | or nil for non-mirrored character.") | ||
| 69 | ;; Local Variables: | 70 | ;; Local Variables: |
| 70 | ;; coding: utf-8 | 71 | ;; coding: utf-8 |
| 71 | ;; no-byte-compile: t | 72 | ;; no-byte-compile: t |
diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el index b3f17bb3fcf..e75a22d6415 100644 --- a/lisp/international/mule-cmds.el +++ b/lisp/international/mule-cmds.el | |||
| @@ -2709,16 +2709,6 @@ See also `locale-charset-language-names', `locale-language-names', | |||
| 2709 | 2709 | ||
| 2710 | ;;; Character property | 2710 | ;;; Character property |
| 2711 | 2711 | ||
| 2712 | ;; Each element has the form (PROP . TABLE). | ||
| 2713 | ;; PROP is a symbol representing a character property. | ||
| 2714 | ;; TABLE is a char-table containing the property value for each character. | ||
| 2715 | ;; TABLE may be a name of file to load to build a char-table. | ||
| 2716 | ;; Don't modify this variable directly but use `define-char-code-property'. | ||
| 2717 | |||
| 2718 | (defvar char-code-property-alist nil | ||
| 2719 | "Alist of character property name vs char-table containing property values. | ||
| 2720 | Internal use only.") | ||
| 2721 | |||
| 2722 | (put 'char-code-property-table 'char-table-extra-slots 5) | 2712 | (put 'char-code-property-table 'char-table-extra-slots 5) |
| 2723 | 2713 | ||
| 2724 | (defun define-char-code-property (name table &optional docstring) | 2714 | (defun define-char-code-property (name table &optional docstring) |
| @@ -2770,32 +2760,23 @@ See also the documentation of `get-char-code-property' and | |||
| 2770 | 2760 | ||
| 2771 | (defun get-char-code-property (char propname) | 2761 | (defun get-char-code-property (char propname) |
| 2772 | "Return the value of CHAR's PROPNAME property." | 2762 | "Return the value of CHAR's PROPNAME property." |
| 2773 | (let ((slot (assq propname char-code-property-alist))) | 2763 | (let ((table (unicode-property-table-internal propname))) |
| 2774 | (if slot | 2764 | (if table |
| 2775 | (let (table value func) | 2765 | (let ((func (char-table-extra-slot table 1))) |
| 2776 | (if (stringp (cdr slot)) | ||
| 2777 | (load (cdr slot) nil t)) | ||
| 2778 | (setq table (cdr slot) | ||
| 2779 | value (aref table char) | ||
| 2780 | func (char-table-extra-slot table 1)) | ||
| 2781 | (if (functionp func) | 2766 | (if (functionp func) |
| 2782 | (setq value (funcall func char value table))) | 2767 | (funcall func char (aref table char) table) |
| 2783 | value) | 2768 | (get-unicode-property-internal table char))) |
| 2784 | (plist-get (aref char-code-property-table char) propname)))) | 2769 | (plist-get (aref char-code-property-table char) propname)))) |
| 2785 | 2770 | ||
| 2786 | (defun put-char-code-property (char propname value) | 2771 | (defun put-char-code-property (char propname value) |
| 2787 | "Store CHAR's PROPNAME property with VALUE. | 2772 | "Store CHAR's PROPNAME property with VALUE. |
| 2788 | It can be retrieved with `(get-char-code-property CHAR PROPNAME)'." | 2773 | It can be retrieved with `(get-char-code-property CHAR PROPNAME)'." |
| 2789 | (let ((slot (assq propname char-code-property-alist))) | 2774 | (let ((table (unicode-property-table-internal propname))) |
| 2790 | (if slot | 2775 | (if table |
| 2791 | (let (table func) | 2776 | (let ((func (char-table-extra-slot table 2))) |
| 2792 | (if (stringp (cdr slot)) | ||
| 2793 | (load (cdr slot) nil t)) | ||
| 2794 | (setq table (cdr slot) | ||
| 2795 | func (char-table-extra-slot table 2)) | ||
| 2796 | (if (functionp func) | 2777 | (if (functionp func) |
| 2797 | (funcall func char value table) | 2778 | (funcall func char value table) |
| 2798 | (aset table char value))) | 2779 | (put-unicode-property-internal table char value))) |
| 2799 | (let* ((plist (aref char-code-property-table char)) | 2780 | (let* ((plist (aref char-code-property-table char)) |
| 2800 | (x (plist-put plist propname value))) | 2781 | (x (plist-put plist propname value))) |
| 2801 | (or (eq x plist) | 2782 | (or (eq x plist) |
| @@ -2805,13 +2786,9 @@ It can be retrieved with `(get-char-code-property CHAR PROPNAME)'." | |||
| 2805 | (defun char-code-property-description (prop value) | 2786 | (defun char-code-property-description (prop value) |
| 2806 | "Return a description string of character property PROP's value VALUE. | 2787 | "Return a description string of character property PROP's value VALUE. |
| 2807 | If there's no description string for VALUE, return nil." | 2788 | If there's no description string for VALUE, return nil." |
| 2808 | (let ((slot (assq prop char-code-property-alist))) | 2789 | (let ((table (unicode-property-table-internal prop))) |
| 2809 | (if slot | 2790 | (if table |
| 2810 | (let (table func) | 2791 | (let ((func (char-table-extra-slot table 3))) |
| 2811 | (if (stringp (cdr slot)) | ||
| 2812 | (load (cdr slot) nil t)) | ||
| 2813 | (setq table (cdr slot) | ||
| 2814 | func (char-table-extra-slot table 3)) | ||
| 2815 | (if (functionp func) | 2792 | (if (functionp func) |
| 2816 | (funcall func value)))))) | 2793 | (funcall func value)))))) |
| 2817 | 2794 | ||
diff --git a/lisp/international/uni-bidi.el b/lisp/international/uni-bidi.el index 9e571ef9d0d..e7682c6d8ff 100644 --- a/lisp/international/uni-bidi.el +++ b/lisp/international/uni-bidi.el | |||
| Binary files differ | |||
diff --git a/lisp/international/uni-category.el b/lisp/international/uni-category.el index 80538f7b416..a4455decc52 100644 --- a/lisp/international/uni-category.el +++ b/lisp/international/uni-category.el | |||
| Binary files differ | |||
diff --git a/lisp/international/uni-combining.el b/lisp/international/uni-combining.el index 2ee74d8b818..227b9d0af79 100644 --- a/lisp/international/uni-combining.el +++ b/lisp/international/uni-combining.el | |||
| Binary files differ | |||
diff --git a/lisp/international/uni-comment.el b/lisp/international/uni-comment.el index dcc717977c7..c9743064bd4 100644 --- a/lisp/international/uni-comment.el +++ b/lisp/international/uni-comment.el | |||
| Binary files differ | |||
diff --git a/lisp/international/uni-decimal.el b/lisp/international/uni-decimal.el index 22207a224b0..2c424ffb5de 100644 --- a/lisp/international/uni-decimal.el +++ b/lisp/international/uni-decimal.el | |||
| Binary files differ | |||
diff --git a/lisp/international/uni-decomposition.el b/lisp/international/uni-decomposition.el index f35bcebfed8..b0bf07bbe85 100644 --- a/lisp/international/uni-decomposition.el +++ b/lisp/international/uni-decomposition.el | |||
| Binary files differ | |||
diff --git a/lisp/international/uni-digit.el b/lisp/international/uni-digit.el index 692dea1edc8..fc52fd8c28c 100644 --- a/lisp/international/uni-digit.el +++ b/lisp/international/uni-digit.el | |||
| Binary files differ | |||
diff --git a/lisp/international/uni-lowercase.el b/lisp/international/uni-lowercase.el index 7cc601159f0..41890018204 100644 --- a/lisp/international/uni-lowercase.el +++ b/lisp/international/uni-lowercase.el | |||
| Binary files differ | |||
diff --git a/lisp/international/uni-mirrored.el b/lisp/international/uni-mirrored.el index 5129a93396d..006cf575591 100644 --- a/lisp/international/uni-mirrored.el +++ b/lisp/international/uni-mirrored.el | |||
| Binary files differ | |||
diff --git a/lisp/international/uni-name.el b/lisp/international/uni-name.el index 5b9e8323d21..7fac18b278d 100644 --- a/lisp/international/uni-name.el +++ b/lisp/international/uni-name.el | |||
| Binary files differ | |||
diff --git a/lisp/international/uni-numeric.el b/lisp/international/uni-numeric.el index 278ad683fe4..d16e8c00870 100644 --- a/lisp/international/uni-numeric.el +++ b/lisp/international/uni-numeric.el | |||
| Binary files differ | |||
diff --git a/lisp/international/uni-old-name.el b/lisp/international/uni-old-name.el index 2e283492408..4e704e5cdd0 100644 --- a/lisp/international/uni-old-name.el +++ b/lisp/international/uni-old-name.el | |||
| Binary files differ | |||
diff --git a/lisp/international/uni-titlecase.el b/lisp/international/uni-titlecase.el index 729a469d103..b8098c81876 100644 --- a/lisp/international/uni-titlecase.el +++ b/lisp/international/uni-titlecase.el | |||
| Binary files differ | |||
diff --git a/lisp/international/uni-uppercase.el b/lisp/international/uni-uppercase.el index 0714b14794f..899276eb725 100644 --- a/lisp/international/uni-uppercase.el +++ b/lisp/international/uni-uppercase.el | |||
| Binary files differ | |||
diff --git a/lisp/loadup.el b/lisp/loadup.el index 4c677523689..792827dd913 100644 --- a/lisp/loadup.el +++ b/lisp/loadup.el | |||
| @@ -123,11 +123,11 @@ | |||
| 123 | ;; multilingual text. | 123 | ;; multilingual text. |
| 124 | (load "international/mule-cmds") | 124 | (load "international/mule-cmds") |
| 125 | (load "case-table") | 125 | (load "case-table") |
| 126 | (load "international/characters") | ||
| 127 | (load "composite") | ||
| 128 | ;; This file doesn't exist when building a development version of Emacs | 126 | ;; This file doesn't exist when building a development version of Emacs |
| 129 | ;; from the repository. It is generated just after temacs is built. | 127 | ;; from the repository. It is generated just after temacs is built. |
| 130 | (load "international/charprop.el" t) | 128 | (load "international/charprop.el" t) |
| 129 | (load "international/characters") | ||
| 130 | (load "composite") | ||
| 131 | 131 | ||
| 132 | ;; Load language-specific files. | 132 | ;; Load language-specific files. |
| 133 | (load "language/chinese") | 133 | (load "language/chinese") |
diff --git a/src/ChangeLog b/src/ChangeLog index 78fca60ca28..1a56298ee20 100644 --- a/src/ChangeLog +++ b/src/ChangeLog | |||
| @@ -1,3 +1,45 @@ | |||
| 1 | 2011-07-06 Kenichi Handa <handa@m17n.org> | ||
| 2 | |||
| 3 | * character.h (unicode_category_t): New enum type. | ||
| 4 | |||
| 5 | * chartab.c (uniprop_decoder_t, uniprop_encoder_t): New types. | ||
| 6 | (Qchar_code_property_table): New variable. | ||
| 7 | (UNIPROP_TABLE_P, UNIPROP_GET_DECODER) | ||
| 8 | (UNIPROP_COMPRESSED_FORM_P): New macros. | ||
| 9 | (char_table_ascii): Uncompress the compressed values. | ||
| 10 | (sub_char_table_ref): New arg is_uniprop. Callers changed. | ||
| 11 | Uncompress the compressed values. | ||
| 12 | (sub_char_table_ref_and_range): Likewise. | ||
| 13 | (char_table_ref_and_range): Uncompress the compressed values. | ||
| 14 | (sub_char_table_set): New arg is_uniprop. Callers changed. | ||
| 15 | Uncompress the compressed values. | ||
| 16 | (sub_char_table_set_range): Args changed. Callers changed. | ||
| 17 | (char_table_set_range): Adjuted for the above change. | ||
| 18 | (map_sub_char_table): Delete args default_val and parent. Add arg | ||
| 19 | top. Give decoded values to a Lisp function. | ||
| 20 | (map_char_table): Adjusted for the above change. Give decoded | ||
| 21 | values to a Lisp function. Gcpro more variables. | ||
| 22 | (uniprop_table_uncompress) | ||
| 23 | (uniprop_decode_value_run_length): New functions. | ||
| 24 | (uniprop_decoder, uniprop_decoder_count): New variables. | ||
| 25 | (uniprop_get_decoder, uniprop_encode_value_character) | ||
| 26 | (uniprop_encode_value_run_length, uniprop_encode_value_numeric): | ||
| 27 | New functions. | ||
| 28 | (uniprop_encoder, uniprop_encoder_count): New variables. | ||
| 29 | (uniprop_get_encoder, uniprop_table) | ||
| 30 | (Funicode_property_table_internal, Fget_unicode_property_internal) | ||
| 31 | (Fput_unicode_property_internal): New functions. | ||
| 32 | (syms_of_chartab): DEFSYM Qchar_code_property_table, defsubr | ||
| 33 | Sunicode_property_table_internal, Sget_unicode_property_internal, | ||
| 34 | and Sput_unicode_property_internal. Defvar_lisp | ||
| 35 | char-code-property-alist. | ||
| 36 | |||
| 37 | * composite.c (CHAR_COMPOSABLE_P): Adjusted for the change of | ||
| 38 | Vunicode_category_table. | ||
| 39 | |||
| 40 | * font.c (font_range): Adjusted for the change of | ||
| 41 | Vunicode_category_table. | ||
| 42 | |||
| 1 | 2011-06-22 Paul Eggert <eggert@cs.ucla.edu> | 43 | 2011-06-22 Paul Eggert <eggert@cs.ucla.edu> |
| 2 | 44 | ||
| 3 | Fixes for GLYPH_DEBUG found by GCC 4.6.0 static checking. | 45 | Fixes for GLYPH_DEBUG found by GCC 4.6.0 static checking. |
diff --git a/src/character.h b/src/character.h index 9a45e7f0033..d8e77c50953 100644 --- a/src/character.h +++ b/src/character.h | |||
| @@ -597,6 +597,45 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ | |||
| 597 | : (c) <= 0xDFFF ? 2 \ | 597 | : (c) <= 0xDFFF ? 2 \ |
| 598 | : 0) | 598 | : 0) |
| 599 | 599 | ||
| 600 | /* Data type for Unicode general category. | ||
| 601 | |||
| 602 | The order of members must be in sync with the 8th element of the | ||
| 603 | member of unidata-prop-alist (in admin/unidata/unidata-getn.el) for | ||
| 604 | Unicode character property `general-category'. */ | ||
| 605 | |||
| 606 | typedef enum { | ||
| 607 | UNICODE_CATEGORY_UNKNOWN = 0, | ||
| 608 | UNICODE_CATEGORY_Lu, | ||
| 609 | UNICODE_CATEGORY_Ll, | ||
| 610 | UNICODE_CATEGORY_Lt, | ||
| 611 | UNICODE_CATEGORY_Lm, | ||
| 612 | UNICODE_CATEGORY_Lo, | ||
| 613 | UNICODE_CATEGORY_Mn, | ||
| 614 | UNICODE_CATEGORY_Mc, | ||
| 615 | UNICODE_CATEGORY_Me, | ||
| 616 | UNICODE_CATEGORY_Nd, | ||
| 617 | UNICODE_CATEGORY_Nl, | ||
| 618 | UNICODE_CATEGORY_No, | ||
| 619 | UNICODE_CATEGORY_Pc, | ||
| 620 | UNICODE_CATEGORY_Pd, | ||
| 621 | UNICODE_CATEGORY_Ps, | ||
| 622 | UNICODE_CATEGORY_Pe, | ||
| 623 | UNICODE_CATEGORY_Pi, | ||
| 624 | UNICODE_CATEGORY_Pf, | ||
| 625 | UNICODE_CATEGORY_Po, | ||
| 626 | UNICODE_CATEGORY_Sm, | ||
| 627 | UNICODE_CATEGORY_Sc, | ||
| 628 | UNICODE_CATEGORY_Sk, | ||
| 629 | UNICODE_CATEGORY_So, | ||
| 630 | UNICODE_CATEGORY_Zs, | ||
| 631 | UNICODE_CATEGORY_Zl, | ||
| 632 | UNICODE_CATEGORY_Zp, | ||
| 633 | UNICODE_CATEGORY_Cc, | ||
| 634 | UNICODE_CATEGORY_Cf, | ||
| 635 | UNICODE_CATEGORY_Cs, | ||
| 636 | UNICODE_CATEGORY_Co, | ||
| 637 | UNICODE_CATEGORY_Cn | ||
| 638 | } unicode_category_t; | ||
| 600 | 639 | ||
| 601 | extern int char_resolve_modifier_mask (int); | 640 | extern int char_resolve_modifier_mask (int); |
| 602 | extern int char_string (unsigned, unsigned char *); | 641 | extern int char_string (unsigned, unsigned char *); |
diff --git a/src/chartab.c b/src/chartab.c index ed5b238646e..4a9a76bdd60 100644 --- a/src/chartab.c +++ b/src/chartab.c | |||
| @@ -53,7 +53,38 @@ static const int chartab_bits[4] = | |||
| 53 | #define CHARTAB_IDX(c, depth, min_char) \ | 53 | #define CHARTAB_IDX(c, depth, min_char) \ |
| 54 | (((c) - (min_char)) >> chartab_bits[(depth)]) | 54 | (((c) - (min_char)) >> chartab_bits[(depth)]) |
| 55 | 55 | ||
| 56 | |||
| 57 | /* Preamble for uniprop (Unicode character property) tables. See the | ||
| 58 | comment of "Unicode character property tables". */ | ||
| 59 | |||
| 60 | /* Purpose of uniprop tables. */ | ||
| 61 | static Lisp_Object Qchar_code_property_table; | ||
| 62 | |||
| 63 | /* Types of decoder and encoder functions for uniprop values. */ | ||
| 64 | typedef Lisp_Object (*uniprop_decoder_t) (Lisp_Object, Lisp_Object); | ||
| 65 | typedef Lisp_Object (*uniprop_encoder_t) (Lisp_Object, Lisp_Object); | ||
| 66 | |||
| 67 | static Lisp_Object uniprop_table_uncompress (Lisp_Object, int); | ||
| 68 | static uniprop_decoder_t uniprop_get_decoder (Lisp_Object); | ||
| 69 | |||
| 70 | /* 1 iff TABLE is a uniprop table. */ | ||
| 71 | #define UNIPROP_TABLE_P(TABLE) \ | ||
| 72 | (EQ (XCHAR_TABLE (TABLE)->purpose, Qchar_code_property_table) \ | ||
| 73 | && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (TABLE)) == 5) | ||
| 74 | |||
| 75 | /* Return a decoder for values in the uniprop table TABLE. */ | ||
| 76 | #define UNIPROP_GET_DECODER(TABLE) \ | ||
| 77 | (UNIPROP_TABLE_P (TABLE) ? uniprop_get_decoder (TABLE) : NULL) | ||
| 56 | 78 | ||
| 79 | /* Nonzero iff OBJ is a string representing uniprop values of 128 | ||
| 80 | succeeding characters (the bottom level of a char-table) by a | ||
| 81 | compressed format. We are sure that no property value has a string | ||
| 82 | starting with '\001' nor '\002'. */ | ||
| 83 | #define UNIPROP_COMPRESSED_FORM_P(OBJ) \ | ||
| 84 | (STRINGP (OBJ) && SCHARS (OBJ) > 0 \ | ||
| 85 | && ((SREF (OBJ, 0) == 1 || (SREF (OBJ, 0) == 2)))) | ||
| 86 | |||
| 87 | |||
| 57 | DEFUN ("make-char-table", Fmake_char_table, Smake_char_table, 1, 2, 0, | 88 | DEFUN ("make-char-table", Fmake_char_table, Smake_char_table, 1, 2, 0, |
| 58 | doc: /* Return a newly created char-table, with purpose PURPOSE. | 89 | doc: /* Return a newly created char-table, with purpose PURPOSE. |
| 59 | Each element is initialized to INIT, which defaults to nil. | 90 | Each element is initialized to INIT, which defaults to nil. |
| @@ -107,7 +138,7 @@ make_sub_char_table (int depth, int min_char, Lisp_Object defalt) | |||
| 107 | static Lisp_Object | 138 | static Lisp_Object |
| 108 | char_table_ascii (Lisp_Object table) | 139 | char_table_ascii (Lisp_Object table) |
| 109 | { | 140 | { |
| 110 | Lisp_Object sub; | 141 | Lisp_Object sub, val; |
| 111 | 142 | ||
| 112 | sub = XCHAR_TABLE (table)->contents[0]; | 143 | sub = XCHAR_TABLE (table)->contents[0]; |
| 113 | if (! SUB_CHAR_TABLE_P (sub)) | 144 | if (! SUB_CHAR_TABLE_P (sub)) |
| @@ -115,7 +146,10 @@ char_table_ascii (Lisp_Object table) | |||
| 115 | sub = XSUB_CHAR_TABLE (sub)->contents[0]; | 146 | sub = XSUB_CHAR_TABLE (sub)->contents[0]; |
| 116 | if (! SUB_CHAR_TABLE_P (sub)) | 147 | if (! SUB_CHAR_TABLE_P (sub)) |
| 117 | return sub; | 148 | return sub; |
| 118 | return XSUB_CHAR_TABLE (sub)->contents[0]; | 149 | val = XSUB_CHAR_TABLE (sub)->contents[0]; |
| 150 | if (UNIPROP_TABLE_P (table) && UNIPROP_COMPRESSED_FORM_P (val)) | ||
| 151 | val = uniprop_table_uncompress (sub, 0); | ||
| 152 | return val; | ||
| 119 | } | 153 | } |
| 120 | 154 | ||
| 121 | static Lisp_Object | 155 | static Lisp_Object |
| @@ -169,16 +203,19 @@ copy_char_table (Lisp_Object table) | |||
| 169 | } | 203 | } |
| 170 | 204 | ||
| 171 | static Lisp_Object | 205 | static Lisp_Object |
| 172 | sub_char_table_ref (Lisp_Object table, int c) | 206 | sub_char_table_ref (Lisp_Object table, int c, int is_uniprop) |
| 173 | { | 207 | { |
| 174 | struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table); | 208 | struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table); |
| 175 | int depth = XINT (tbl->depth); | 209 | int depth = XINT (tbl->depth); |
| 176 | int min_char = XINT (tbl->min_char); | 210 | int min_char = XINT (tbl->min_char); |
| 177 | Lisp_Object val; | 211 | Lisp_Object val; |
| 212 | int idx = CHARTAB_IDX (c, depth, min_char); | ||
| 178 | 213 | ||
| 179 | val = tbl->contents[CHARTAB_IDX (c, depth, min_char)]; | 214 | val = tbl->contents[idx]; |
| 215 | if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val)) | ||
| 216 | val = uniprop_table_uncompress (table, idx); | ||
| 180 | if (SUB_CHAR_TABLE_P (val)) | 217 | if (SUB_CHAR_TABLE_P (val)) |
| 181 | val = sub_char_table_ref (val, c); | 218 | val = sub_char_table_ref (val, c, is_uniprop); |
| 182 | return val; | 219 | return val; |
| 183 | } | 220 | } |
| 184 | 221 | ||
| @@ -198,7 +235,7 @@ char_table_ref (Lisp_Object table, int c) | |||
| 198 | { | 235 | { |
| 199 | val = tbl->contents[CHARTAB_IDX (c, 0, 0)]; | 236 | val = tbl->contents[CHARTAB_IDX (c, 0, 0)]; |
| 200 | if (SUB_CHAR_TABLE_P (val)) | 237 | if (SUB_CHAR_TABLE_P (val)) |
| 201 | val = sub_char_table_ref (val, c); | 238 | val = sub_char_table_ref (val, c, UNIPROP_TABLE_P (table)); |
| 202 | } | 239 | } |
| 203 | if (NILP (val)) | 240 | if (NILP (val)) |
| 204 | { | 241 | { |
| @@ -210,7 +247,8 @@ char_table_ref (Lisp_Object table, int c) | |||
| 210 | } | 247 | } |
| 211 | 248 | ||
| 212 | static Lisp_Object | 249 | static Lisp_Object |
| 213 | sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to, Lisp_Object defalt) | 250 | sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to, |
| 251 | Lisp_Object defalt, int is_uniprop) | ||
| 214 | { | 252 | { |
| 215 | struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table); | 253 | struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table); |
| 216 | int depth = XINT (tbl->depth); | 254 | int depth = XINT (tbl->depth); |
| @@ -219,8 +257,10 @@ sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to, Lisp | |||
| 219 | Lisp_Object val; | 257 | Lisp_Object val; |
| 220 | 258 | ||
| 221 | val = tbl->contents[chartab_idx]; | 259 | val = tbl->contents[chartab_idx]; |
| 260 | if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val)) | ||
| 261 | val = uniprop_table_uncompress (table, chartab_idx); | ||
| 222 | if (SUB_CHAR_TABLE_P (val)) | 262 | if (SUB_CHAR_TABLE_P (val)) |
| 223 | val = sub_char_table_ref_and_range (val, c, from, to, defalt); | 263 | val = sub_char_table_ref_and_range (val, c, from, to, defalt, is_uniprop); |
| 224 | else if (NILP (val)) | 264 | else if (NILP (val)) |
| 225 | val = defalt; | 265 | val = defalt; |
| 226 | 266 | ||
| @@ -232,8 +272,11 @@ sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to, Lisp | |||
| 232 | c = min_char + idx * chartab_chars[depth] - 1; | 272 | c = min_char + idx * chartab_chars[depth] - 1; |
| 233 | idx--; | 273 | idx--; |
| 234 | this_val = tbl->contents[idx]; | 274 | this_val = tbl->contents[idx]; |
| 275 | if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val)) | ||
| 276 | this_val = uniprop_table_uncompress (table, idx); | ||
| 235 | if (SUB_CHAR_TABLE_P (this_val)) | 277 | if (SUB_CHAR_TABLE_P (this_val)) |
| 236 | this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt); | 278 | this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt, |
| 279 | is_uniprop); | ||
| 237 | else if (NILP (this_val)) | 280 | else if (NILP (this_val)) |
| 238 | this_val = defalt; | 281 | this_val = defalt; |
| 239 | 282 | ||
| @@ -251,8 +294,11 @@ sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to, Lisp | |||
| 251 | 294 | ||
| 252 | chartab_idx++; | 295 | chartab_idx++; |
| 253 | this_val = tbl->contents[chartab_idx]; | 296 | this_val = tbl->contents[chartab_idx]; |
| 297 | if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val)) | ||
| 298 | this_val = uniprop_table_uncompress (table, chartab_idx); | ||
| 254 | if (SUB_CHAR_TABLE_P (this_val)) | 299 | if (SUB_CHAR_TABLE_P (this_val)) |
| 255 | this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt); | 300 | this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt, |
| 301 | is_uniprop); | ||
| 256 | else if (NILP (this_val)) | 302 | else if (NILP (this_val)) |
| 257 | this_val = defalt; | 303 | this_val = defalt; |
| 258 | if (! EQ (this_val, val)) | 304 | if (! EQ (this_val, val)) |
| @@ -277,17 +323,20 @@ char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to) | |||
| 277 | struct Lisp_Char_Table *tbl = XCHAR_TABLE (table); | 323 | struct Lisp_Char_Table *tbl = XCHAR_TABLE (table); |
| 278 | int chartab_idx = CHARTAB_IDX (c, 0, 0), idx; | 324 | int chartab_idx = CHARTAB_IDX (c, 0, 0), idx; |
| 279 | Lisp_Object val; | 325 | Lisp_Object val; |
| 326 | int is_uniprop = UNIPROP_TABLE_P (table); | ||
| 280 | 327 | ||
| 281 | val = tbl->contents[chartab_idx]; | 328 | val = tbl->contents[chartab_idx]; |
| 282 | if (*from < 0) | 329 | if (*from < 0) |
| 283 | *from = 0; | 330 | *from = 0; |
| 284 | if (*to < 0) | 331 | if (*to < 0) |
| 285 | *to = MAX_CHAR; | 332 | *to = MAX_CHAR; |
| 333 | if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val)) | ||
| 334 | val = uniprop_table_uncompress (table, chartab_idx); | ||
| 286 | if (SUB_CHAR_TABLE_P (val)) | 335 | if (SUB_CHAR_TABLE_P (val)) |
| 287 | val = sub_char_table_ref_and_range (val, c, from, to, tbl->defalt); | 336 | val = sub_char_table_ref_and_range (val, c, from, to, tbl->defalt, |
| 337 | is_uniprop); | ||
| 288 | else if (NILP (val)) | 338 | else if (NILP (val)) |
| 289 | val = tbl->defalt; | 339 | val = tbl->defalt; |
| 290 | |||
| 291 | idx = chartab_idx; | 340 | idx = chartab_idx; |
| 292 | while (*from < idx * chartab_chars[0]) | 341 | while (*from < idx * chartab_chars[0]) |
| 293 | { | 342 | { |
| @@ -296,9 +345,11 @@ char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to) | |||
| 296 | c = idx * chartab_chars[0] - 1; | 345 | c = idx * chartab_chars[0] - 1; |
| 297 | idx--; | 346 | idx--; |
| 298 | this_val = tbl->contents[idx]; | 347 | this_val = tbl->contents[idx]; |
| 348 | if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val)) | ||
| 349 | this_val = uniprop_table_uncompress (table, idx); | ||
| 299 | if (SUB_CHAR_TABLE_P (this_val)) | 350 | if (SUB_CHAR_TABLE_P (this_val)) |
| 300 | this_val = sub_char_table_ref_and_range (this_val, c, from, to, | 351 | this_val = sub_char_table_ref_and_range (this_val, c, from, to, |
| 301 | tbl->defalt); | 352 | tbl->defalt, is_uniprop); |
| 302 | else if (NILP (this_val)) | 353 | else if (NILP (this_val)) |
| 303 | this_val = tbl->defalt; | 354 | this_val = tbl->defalt; |
| 304 | 355 | ||
| @@ -315,9 +366,11 @@ char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to) | |||
| 315 | chartab_idx++; | 366 | chartab_idx++; |
| 316 | c = chartab_idx * chartab_chars[0]; | 367 | c = chartab_idx * chartab_chars[0]; |
| 317 | this_val = tbl->contents[chartab_idx]; | 368 | this_val = tbl->contents[chartab_idx]; |
| 369 | if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val)) | ||
| 370 | this_val = uniprop_table_uncompress (table, chartab_idx); | ||
| 318 | if (SUB_CHAR_TABLE_P (this_val)) | 371 | if (SUB_CHAR_TABLE_P (this_val)) |
| 319 | this_val = sub_char_table_ref_and_range (this_val, c, from, to, | 372 | this_val = sub_char_table_ref_and_range (this_val, c, from, to, |
| 320 | tbl->defalt); | 373 | tbl->defalt, is_uniprop); |
| 321 | else if (NILP (this_val)) | 374 | else if (NILP (this_val)) |
| 322 | this_val = tbl->defalt; | 375 | this_val = tbl->defalt; |
| 323 | if (! EQ (this_val, val)) | 376 | if (! EQ (this_val, val)) |
| @@ -332,7 +385,7 @@ char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to) | |||
| 332 | 385 | ||
| 333 | 386 | ||
| 334 | static void | 387 | static void |
| 335 | sub_char_table_set (Lisp_Object table, int c, Lisp_Object val) | 388 | sub_char_table_set (Lisp_Object table, int c, Lisp_Object val, int is_uniprop) |
| 336 | { | 389 | { |
| 337 | struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table); | 390 | struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table); |
| 338 | int depth = XINT ((tbl)->depth); | 391 | int depth = XINT ((tbl)->depth); |
| @@ -347,11 +400,17 @@ sub_char_table_set (Lisp_Object table, int c, Lisp_Object val) | |||
| 347 | sub = tbl->contents[i]; | 400 | sub = tbl->contents[i]; |
| 348 | if (! SUB_CHAR_TABLE_P (sub)) | 401 | if (! SUB_CHAR_TABLE_P (sub)) |
| 349 | { | 402 | { |
| 350 | sub = make_sub_char_table (depth + 1, | 403 | if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (sub)) |
| 351 | min_char + i * chartab_chars[depth], sub); | 404 | sub = uniprop_table_uncompress (table, i); |
| 352 | tbl->contents[i] = sub; | 405 | else |
| 406 | { | ||
| 407 | sub = make_sub_char_table (depth + 1, | ||
| 408 | min_char + i * chartab_chars[depth], | ||
| 409 | sub); | ||
| 410 | tbl->contents[i] = sub; | ||
| 411 | } | ||
| 353 | } | 412 | } |
| 354 | sub_char_table_set (sub, c, val); | 413 | sub_char_table_set (sub, c, val, is_uniprop); |
| 355 | } | 414 | } |
| 356 | } | 415 | } |
| 357 | 416 | ||
| @@ -376,7 +435,7 @@ char_table_set (Lisp_Object table, int c, Lisp_Object val) | |||
| 376 | sub = make_sub_char_table (1, i * chartab_chars[0], sub); | 435 | sub = make_sub_char_table (1, i * chartab_chars[0], sub); |
| 377 | tbl->contents[i] = sub; | 436 | tbl->contents[i] = sub; |
| 378 | } | 437 | } |
| 379 | sub_char_table_set (sub, c, val); | 438 | sub_char_table_set (sub, c, val, UNIPROP_TABLE_P (table)); |
| 380 | if (ASCII_CHAR_P (c)) | 439 | if (ASCII_CHAR_P (c)) |
| 381 | tbl->ascii = char_table_ascii (table); | 440 | tbl->ascii = char_table_ascii (table); |
| 382 | } | 441 | } |
| @@ -384,30 +443,40 @@ char_table_set (Lisp_Object table, int c, Lisp_Object val) | |||
| 384 | } | 443 | } |
| 385 | 444 | ||
| 386 | static void | 445 | static void |
| 387 | sub_char_table_set_range (Lisp_Object *table, int depth, int min_char, int from, int to, Lisp_Object val) | 446 | sub_char_table_set_range (Lisp_Object table, int from, int to, Lisp_Object val, |
| 447 | int is_uniprop) | ||
| 388 | { | 448 | { |
| 389 | int max_char = min_char + chartab_chars[depth] - 1; | 449 | struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table); |
| 390 | 450 | int depth = XINT ((tbl)->depth); | |
| 391 | if (depth == 3 || (from <= min_char && to >= max_char)) | 451 | int min_char = XINT ((tbl)->min_char); |
| 392 | *table = val; | 452 | int chars_in_block = chartab_chars[depth]; |
| 393 | else | 453 | int i, c, lim = chartab_size[depth]; |
| 454 | |||
| 455 | if (from < min_char) | ||
| 456 | from = min_char; | ||
| 457 | i = CHARTAB_IDX (from, depth, min_char); | ||
| 458 | c = min_char + chars_in_block * i; | ||
| 459 | for (; i <= lim; i++, c += chars_in_block) | ||
| 394 | { | 460 | { |
| 395 | int i; | 461 | if (c > to) |
| 396 | unsigned j; | 462 | break; |
| 397 | 463 | if (from <= c && c + chars_in_block - 1 <= to) | |
| 398 | depth++; | 464 | tbl->contents[i] = val; |
| 399 | if (! SUB_CHAR_TABLE_P (*table)) | 465 | else |
| 400 | *table = make_sub_char_table (depth, min_char, *table); | 466 | { |
| 401 | if (from < min_char) | 467 | Lisp_Object sub = tbl->contents[i]; |
| 402 | from = min_char; | 468 | if (! SUB_CHAR_TABLE_P (sub)) |
| 403 | if (to > max_char) | 469 | { |
| 404 | to = max_char; | 470 | if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (sub)) |
| 405 | i = CHARTAB_IDX (from, depth, min_char); | 471 | sub = uniprop_table_uncompress (table, i); |
| 406 | j = CHARTAB_IDX (to, depth, min_char); | 472 | else |
| 407 | min_char += chartab_chars[depth] * i; | 473 | { |
| 408 | for (j++; i < j; i++, min_char += chartab_chars[depth]) | 474 | sub = make_sub_char_table (depth + 1, c, sub); |
| 409 | sub_char_table_set_range (XSUB_CHAR_TABLE (*table)->contents + i, | 475 | tbl->contents[i] = sub; |
| 410 | depth, min_char, from, to, val); | 476 | } |
| 477 | } | ||
| 478 | sub_char_table_set_range (sub, from, to, val, is_uniprop); | ||
| 479 | } | ||
| 411 | } | 480 | } |
| 412 | } | 481 | } |
| 413 | 482 | ||
| @@ -417,16 +486,33 @@ char_table_set_range (Lisp_Object table, int from, int to, Lisp_Object val) | |||
| 417 | { | 486 | { |
| 418 | struct Lisp_Char_Table *tbl = XCHAR_TABLE (table); | 487 | struct Lisp_Char_Table *tbl = XCHAR_TABLE (table); |
| 419 | Lisp_Object *contents = tbl->contents; | 488 | Lisp_Object *contents = tbl->contents; |
| 420 | int i; | ||
| 421 | 489 | ||
| 422 | if (from == to) | 490 | if (from == to) |
| 423 | char_table_set (table, from, val); | 491 | char_table_set (table, from, val); |
| 424 | else | 492 | else |
| 425 | { | 493 | { |
| 426 | unsigned lim = to / chartab_chars[0] + 1; | 494 | int is_uniprop = UNIPROP_TABLE_P (table); |
| 427 | for (i = CHARTAB_IDX (from, 0, 0); i < lim; i++) | 495 | int lim = CHARTAB_IDX (to, 0, 0); |
| 428 | sub_char_table_set_range (contents + i, 0, i * chartab_chars[0], | 496 | int i, c; |
| 429 | from, to, val); | 497 | |
| 498 | for (i = CHARTAB_IDX (from, 0, 0), c = 0; i <= lim; | ||
| 499 | i++, c += chartab_chars[0]) | ||
| 500 | { | ||
| 501 | if (c > to) | ||
| 502 | break; | ||
| 503 | if (from <= c && c + chartab_chars[0] - 1 <= to) | ||
| 504 | tbl->contents[i] = val; | ||
| 505 | else | ||
| 506 | { | ||
| 507 | Lisp_Object sub = tbl->contents[i]; | ||
| 508 | if (! SUB_CHAR_TABLE_P (sub)) | ||
| 509 | { | ||
| 510 | sub = make_sub_char_table (1, i * chartab_chars[0], sub); | ||
| 511 | tbl->contents[i] = sub; | ||
| 512 | } | ||
| 513 | sub_char_table_set_range (sub, from, to, val, is_uniprop); | ||
| 514 | } | ||
| 515 | } | ||
| 430 | if (ASCII_CHAR_P (from)) | 516 | if (ASCII_CHAR_P (from)) |
| 431 | tbl->ascii = char_table_ascii (table); | 517 | tbl->ascii = char_table_ascii (table); |
| 432 | } | 518 | } |
| @@ -504,6 +590,8 @@ DEFUN ("set-char-table-extra-slot", Fset_char_table_extra_slot, | |||
| 504 | (Lisp_Object char_table, Lisp_Object n, Lisp_Object value) | 590 | (Lisp_Object char_table, Lisp_Object n, Lisp_Object value) |
| 505 | { | 591 | { |
| 506 | CHECK_CHAR_TABLE (char_table); | 592 | CHECK_CHAR_TABLE (char_table); |
| 593 | if (EQ (XCHAR_TABLE (char_table)->purpose, Qchar_code_property_table)) | ||
| 594 | error ("Can't change extra-slot of char-code-property-table"); | ||
| 507 | CHECK_NUMBER (n); | 595 | CHECK_NUMBER (n); |
| 508 | if (XINT (n) < 0 | 596 | if (XINT (n) < 0 |
| 509 | || XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table))) | 597 | || XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table))) |
| @@ -532,8 +620,9 @@ a cons of character codes (for characters in the range), or a character code. * | |||
| 532 | 620 | ||
| 533 | CHECK_CHARACTER_CAR (range); | 621 | CHECK_CHARACTER_CAR (range); |
| 534 | CHECK_CHARACTER_CDR (range); | 622 | CHECK_CHARACTER_CDR (range); |
| 535 | val = char_table_ref_and_range (char_table, XFASTINT (XCAR (range)), | 623 | from = XFASTINT (XCAR (range)); |
| 536 | &from, &to); | 624 | to = XFASTINT (XCDR (range)); |
| 625 | val = char_table_ref_and_range (char_table, from, &from, &to); | ||
| 537 | /* Not yet implemented. */ | 626 | /* Not yet implemented. */ |
| 538 | } | 627 | } |
| 539 | else | 628 | else |
| @@ -655,8 +744,7 @@ equivalent and can be merged. It defaults to `equal'. */) | |||
| 655 | /* Map C_FUNCTION or FUNCTION over TABLE (top or sub char-table), | 744 | /* Map C_FUNCTION or FUNCTION over TABLE (top or sub char-table), |
| 656 | calling it for each character or group of characters that share a | 745 | calling it for each character or group of characters that share a |
| 657 | value. RANGE is a cons (FROM . TO) specifying the range of target | 746 | value. RANGE is a cons (FROM . TO) specifying the range of target |
| 658 | characters, VAL is a value of FROM in TABLE, DEFAULT_VAL is the | 747 | characters, VAL is a value of FROM in TABLE, TOP is the top |
| 659 | default value of the char-table, PARENT is the parent of the | ||
| 660 | char-table. | 748 | char-table. |
| 661 | 749 | ||
| 662 | ARG is passed to C_FUNCTION when that is called. | 750 | ARG is passed to C_FUNCTION when that is called. |
| @@ -669,7 +757,7 @@ equivalent and can be merged. It defaults to `equal'. */) | |||
| 669 | static Lisp_Object | 757 | static Lisp_Object |
| 670 | map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), | 758 | map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), |
| 671 | Lisp_Object function, Lisp_Object table, Lisp_Object arg, Lisp_Object val, | 759 | Lisp_Object function, Lisp_Object table, Lisp_Object arg, Lisp_Object val, |
| 672 | Lisp_Object range, Lisp_Object default_val, Lisp_Object parent) | 760 | Lisp_Object range, Lisp_Object top) |
| 673 | { | 761 | { |
| 674 | /* Pointer to the elements of TABLE. */ | 762 | /* Pointer to the elements of TABLE. */ |
| 675 | Lisp_Object *contents; | 763 | Lisp_Object *contents; |
| @@ -681,6 +769,8 @@ map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), | |||
| 681 | int chars_in_block; | 769 | int chars_in_block; |
| 682 | int from = XINT (XCAR (range)), to = XINT (XCDR (range)); | 770 | int from = XINT (XCAR (range)), to = XINT (XCDR (range)); |
| 683 | int i, c; | 771 | int i, c; |
| 772 | int is_uniprop = UNIPROP_TABLE_P (top); | ||
| 773 | uniprop_decoder_t decoder = UNIPROP_GET_DECODER (top); | ||
| 684 | 774 | ||
| 685 | if (SUB_CHAR_TABLE_P (table)) | 775 | if (SUB_CHAR_TABLE_P (table)) |
| 686 | { | 776 | { |
| @@ -710,28 +800,33 @@ map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), | |||
| 710 | for (c = min_char + chars_in_block * i; c <= max_char; | 800 | for (c = min_char + chars_in_block * i; c <= max_char; |
| 711 | i++, c += chars_in_block) | 801 | i++, c += chars_in_block) |
| 712 | { | 802 | { |
| 713 | Lisp_Object this = contents[i]; | 803 | Lisp_Object this = (SUB_CHAR_TABLE_P (table) |
| 804 | ? XSUB_CHAR_TABLE (table)->contents[i] | ||
| 805 | : XCHAR_TABLE (table)->contents[i]); | ||
| 714 | int nextc = c + chars_in_block; | 806 | int nextc = c + chars_in_block; |
| 715 | 807 | ||
| 808 | if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this)) | ||
| 809 | this = uniprop_table_uncompress (table, i); | ||
| 716 | if (SUB_CHAR_TABLE_P (this)) | 810 | if (SUB_CHAR_TABLE_P (this)) |
| 717 | { | 811 | { |
| 718 | if (to >= nextc) | 812 | if (to >= nextc) |
| 719 | XSETCDR (range, make_number (nextc - 1)); | 813 | XSETCDR (range, make_number (nextc - 1)); |
| 720 | val = map_sub_char_table (c_function, function, this, arg, | 814 | val = map_sub_char_table (c_function, function, this, arg, |
| 721 | val, range, default_val, parent); | 815 | val, range, top); |
| 722 | } | 816 | } |
| 723 | else | 817 | else |
| 724 | { | 818 | { |
| 725 | if (NILP (this)) | 819 | if (NILP (this)) |
| 726 | this = default_val; | 820 | this = XCHAR_TABLE (top)->defalt; |
| 727 | if (!EQ (val, this)) | 821 | if (!EQ (val, this)) |
| 728 | { | 822 | { |
| 729 | int different_value = 1; | 823 | int different_value = 1; |
| 730 | 824 | ||
| 731 | if (NILP (val)) | 825 | if (NILP (val)) |
| 732 | { | 826 | { |
| 733 | if (! NILP (parent)) | 827 | if (! NILP (XCHAR_TABLE (top)->parent)) |
| 734 | { | 828 | { |
| 829 | Lisp_Object parent = XCHAR_TABLE (top)->parent; | ||
| 735 | Lisp_Object temp = XCHAR_TABLE (parent)->parent; | 830 | Lisp_Object temp = XCHAR_TABLE (parent)->parent; |
| 736 | 831 | ||
| 737 | /* This is to get a value of FROM in PARENT | 832 | /* This is to get a value of FROM in PARENT |
| @@ -742,8 +837,7 @@ map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), | |||
| 742 | XSETCDR (range, make_number (c - 1)); | 837 | XSETCDR (range, make_number (c - 1)); |
| 743 | val = map_sub_char_table (c_function, function, | 838 | val = map_sub_char_table (c_function, function, |
| 744 | parent, arg, val, range, | 839 | parent, arg, val, range, |
| 745 | XCHAR_TABLE (parent)->defalt, | 840 | parent); |
| 746 | XCHAR_TABLE (parent)->parent); | ||
| 747 | if (EQ (val, this)) | 841 | if (EQ (val, this)) |
| 748 | different_value = 0; | 842 | different_value = 0; |
| 749 | } | 843 | } |
| @@ -756,14 +850,22 @@ map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), | |||
| 756 | if (c_function) | 850 | if (c_function) |
| 757 | (*c_function) (arg, XCAR (range), val); | 851 | (*c_function) (arg, XCAR (range), val); |
| 758 | else | 852 | else |
| 759 | call2 (function, XCAR (range), val); | 853 | { |
| 854 | if (decoder) | ||
| 855 | val = decoder (top, val); | ||
| 856 | call2 (function, XCAR (range), val); | ||
| 857 | } | ||
| 760 | } | 858 | } |
| 761 | else | 859 | else |
| 762 | { | 860 | { |
| 763 | if (c_function) | 861 | if (c_function) |
| 764 | (*c_function) (arg, range, val); | 862 | (*c_function) (arg, range, val); |
| 765 | else | 863 | else |
| 766 | call2 (function, range, val); | 864 | { |
| 865 | if (decoder) | ||
| 866 | val = decoder (top, val); | ||
| 867 | call2 (function, range, val); | ||
| 868 | } | ||
| 767 | } | 869 | } |
| 768 | } | 870 | } |
| 769 | val = this; | 871 | val = this; |
| @@ -783,35 +885,39 @@ map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), | |||
| 783 | ARG is passed to C_FUNCTION when that is called. */ | 885 | ARG is passed to C_FUNCTION when that is called. */ |
| 784 | 886 | ||
| 785 | void | 887 | void |
| 786 | map_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), Lisp_Object function, Lisp_Object table, Lisp_Object arg) | 888 | map_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), |
| 889 | Lisp_Object function, Lisp_Object table, Lisp_Object arg) | ||
| 787 | { | 890 | { |
| 788 | Lisp_Object range, val; | 891 | Lisp_Object range, val, parent; |
| 789 | struct gcpro gcpro1, gcpro2, gcpro3; | 892 | struct gcpro gcpro1, gcpro2, gcpro3, gcpro4; |
| 893 | uniprop_decoder_t decoder = UNIPROP_GET_DECODER (table); | ||
| 790 | 894 | ||
| 791 | range = Fcons (make_number (0), make_number (MAX_CHAR)); | 895 | range = Fcons (make_number (0), make_number (MAX_CHAR)); |
| 792 | GCPRO3 (table, arg, range); | 896 | parent = XCHAR_TABLE (table)->parent; |
| 897 | |||
| 898 | GCPRO4 (table, arg, range, parent); | ||
| 793 | val = XCHAR_TABLE (table)->ascii; | 899 | val = XCHAR_TABLE (table)->ascii; |
| 794 | if (SUB_CHAR_TABLE_P (val)) | 900 | if (SUB_CHAR_TABLE_P (val)) |
| 795 | val = XSUB_CHAR_TABLE (val)->contents[0]; | 901 | val = XSUB_CHAR_TABLE (val)->contents[0]; |
| 796 | val = map_sub_char_table (c_function, function, table, arg, val, range, | 902 | val = map_sub_char_table (c_function, function, table, arg, val, range, |
| 797 | XCHAR_TABLE (table)->defalt, | 903 | table); |
| 798 | XCHAR_TABLE (table)->parent); | 904 | |
| 799 | /* If VAL is nil and TABLE has a parent, we must consult the parent | 905 | /* If VAL is nil and TABLE has a parent, we must consult the parent |
| 800 | recursively. */ | 906 | recursively. */ |
| 801 | while (NILP (val) && ! NILP (XCHAR_TABLE (table)->parent)) | 907 | while (NILP (val) && ! NILP (XCHAR_TABLE (table)->parent)) |
| 802 | { | 908 | { |
| 803 | Lisp_Object parent = XCHAR_TABLE (table)->parent; | 909 | Lisp_Object temp; |
| 804 | Lisp_Object temp = XCHAR_TABLE (parent)->parent; | ||
| 805 | int from = XINT (XCAR (range)); | 910 | int from = XINT (XCAR (range)); |
| 806 | 911 | ||
| 912 | parent = XCHAR_TABLE (table)->parent; | ||
| 913 | temp = XCHAR_TABLE (parent)->parent; | ||
| 807 | /* This is to get a value of FROM in PARENT without checking the | 914 | /* This is to get a value of FROM in PARENT without checking the |
| 808 | parent of PARENT. */ | 915 | parent of PARENT. */ |
| 809 | XCHAR_TABLE (parent)->parent = Qnil; | 916 | XCHAR_TABLE (parent)->parent = Qnil; |
| 810 | val = CHAR_TABLE_REF (parent, from); | 917 | val = CHAR_TABLE_REF (parent, from); |
| 811 | XCHAR_TABLE (parent)->parent = temp; | 918 | XCHAR_TABLE (parent)->parent = temp; |
| 812 | val = map_sub_char_table (c_function, function, parent, arg, val, range, | 919 | val = map_sub_char_table (c_function, function, parent, arg, val, range, |
| 813 | XCHAR_TABLE (parent)->defalt, | 920 | parent); |
| 814 | XCHAR_TABLE (parent)->parent); | ||
| 815 | table = parent; | 921 | table = parent; |
| 816 | } | 922 | } |
| 817 | 923 | ||
| @@ -822,14 +928,22 @@ map_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), Lisp | |||
| 822 | if (c_function) | 928 | if (c_function) |
| 823 | (*c_function) (arg, XCAR (range), val); | 929 | (*c_function) (arg, XCAR (range), val); |
| 824 | else | 930 | else |
| 825 | call2 (function, XCAR (range), val); | 931 | { |
| 932 | if (decoder) | ||
| 933 | val = decoder (table, val); | ||
| 934 | call2 (function, XCAR (range), val); | ||
| 935 | } | ||
| 826 | } | 936 | } |
| 827 | else | 937 | else |
| 828 | { | 938 | { |
| 829 | if (c_function) | 939 | if (c_function) |
| 830 | (*c_function) (arg, range, val); | 940 | (*c_function) (arg, range, val); |
| 831 | else | 941 | else |
| 832 | call2 (function, range, val); | 942 | { |
| 943 | if (decoder) | ||
| 944 | val = decoder (table, val); | ||
| 945 | call2 (function, range, val); | ||
| 946 | } | ||
| 833 | } | 947 | } |
| 834 | } | 948 | } |
| 835 | 949 | ||
| @@ -984,9 +1098,315 @@ map_char_table_for_charset (void (*c_function) (Lisp_Object, Lisp_Object), | |||
| 984 | } | 1098 | } |
| 985 | 1099 | ||
| 986 | 1100 | ||
| 1101 | /* Unicode character property tables. | ||
| 1102 | |||
| 1103 | This section provides a convenient and efficient way to get a | ||
| 1104 | Unicode character property from C code (from Lisp, you must use | ||
| 1105 | get-char-code-property). | ||
| 1106 | |||
| 1107 | The typical usage is to get a char-table for a specific property at | ||
| 1108 | a proper initialization time as this: | ||
| 1109 | |||
| 1110 | Lisp_Object bidi_class_table = uniprop_table (intern ("bidi-class")); | ||
| 1111 | |||
| 1112 | and get a property value for character CH as this: | ||
| 1113 | |||
| 1114 | Lisp_Object bidi_class = CHAR_TABLE_REF (CH, bidi_class_table); | ||
| 1115 | |||
| 1116 | In this case, what you actually get is an index number to the | ||
| 1117 | vector of property values (symbols nil, L, R, etc). | ||
| 1118 | |||
| 1119 | A table for Unicode character property has these characteristics: | ||
| 1120 | |||
| 1121 | o The purpose is `char-code-property-table', which implies that the | ||
| 1122 | table has 5 extra slots. | ||
| 1123 | |||
| 1124 | o The second extra slot is a Lisp function, an index (integer) to | ||
| 1125 | the array uniprop_decoder[], or nil. If it is a Lisp function, we | ||
| 1126 | can't use such a table from C (at the moment). If it is nil, it | ||
| 1127 | means that we don't have to decode values. | ||
| 1128 | |||
| 1129 | o The third extra slot is a Lisp function, an index (integer) to | ||
| 1130 | the array uniprop_enncoder[], or nil. If it is a Lisp function, we | ||
| 1131 | can't use such a table from C (at the moment). If it is nil, it | ||
| 1132 | means that we don't have to encode values. */ | ||
| 1133 | |||
| 1134 | |||
| 1135 | /* Uncompress the IDXth element of sub-char-table TABLE. */ | ||
| 1136 | |||
| 1137 | static Lisp_Object | ||
| 1138 | uniprop_table_uncompress (Lisp_Object table, int idx) | ||
| 1139 | { | ||
| 1140 | Lisp_Object val = XSUB_CHAR_TABLE (table)->contents[idx]; | ||
| 1141 | int min_char = (XINT (XSUB_CHAR_TABLE (table)->min_char) | ||
| 1142 | + chartab_chars[2] * idx); | ||
| 1143 | Lisp_Object sub = make_sub_char_table (3, min_char, Qnil); | ||
| 1144 | struct Lisp_Sub_Char_Table *subtbl = XSUB_CHAR_TABLE (sub); | ||
| 1145 | const unsigned char *p, *pend; | ||
| 1146 | int i; | ||
| 1147 | |||
| 1148 | XSUB_CHAR_TABLE (table)->contents[idx] = sub; | ||
| 1149 | p = SDATA (val), pend = p + SBYTES (val); | ||
| 1150 | if (*p == 1) | ||
| 1151 | { | ||
| 1152 | /* SIMPLE TABLE */ | ||
| 1153 | p++; | ||
| 1154 | idx = STRING_CHAR_ADVANCE (p); | ||
| 1155 | while (p < pend && idx < chartab_chars[2]) | ||
| 1156 | { | ||
| 1157 | int v = STRING_CHAR_ADVANCE (p); | ||
| 1158 | subtbl->contents[idx++] = v > 0 ? make_number (v) : Qnil; | ||
| 1159 | } | ||
| 1160 | } | ||
| 1161 | else if (*p == 2) | ||
| 1162 | { | ||
| 1163 | /* RUN-LENGTH TABLE */ | ||
| 1164 | p++; | ||
| 1165 | for (idx = 0; p < pend; ) | ||
| 1166 | { | ||
| 1167 | int v = STRING_CHAR_ADVANCE (p); | ||
| 1168 | int count = 1; | ||
| 1169 | int len; | ||
| 1170 | |||
| 1171 | if (p < pend) | ||
| 1172 | { | ||
| 1173 | count = STRING_CHAR_AND_LENGTH (p, len); | ||
| 1174 | if (count < 128) | ||
| 1175 | count = 1; | ||
| 1176 | else | ||
| 1177 | { | ||
| 1178 | count -= 128; | ||
| 1179 | p += len; | ||
| 1180 | } | ||
| 1181 | } | ||
| 1182 | while (count-- > 0) | ||
| 1183 | subtbl->contents[idx++] = make_number (v); | ||
| 1184 | } | ||
| 1185 | } | ||
| 1186 | /* It seems that we don't need this function because C code won't need | ||
| 1187 | to get a property that is compressed in this form. */ | ||
| 1188 | #if 0 | ||
| 1189 | else if (*p == 0) | ||
| 1190 | { | ||
| 1191 | /* WORD-LIST TABLE */ | ||
| 1192 | } | ||
| 1193 | #endif | ||
| 1194 | return sub; | ||
| 1195 | } | ||
| 1196 | |||
| 1197 | |||
| 1198 | /* Decode VALUE as an elemnet of char-table TABLE. */ | ||
| 1199 | |||
| 1200 | static Lisp_Object | ||
| 1201 | uniprop_decode_value_run_length (Lisp_Object table, Lisp_Object value) | ||
| 1202 | { | ||
| 1203 | if (VECTORP (XCHAR_TABLE (table)->extras[4])) | ||
| 1204 | { | ||
| 1205 | Lisp_Object valvec = XCHAR_TABLE (table)->extras[4]; | ||
| 1206 | |||
| 1207 | if (XINT (value) >= 0 && XINT (value) < ASIZE (valvec)) | ||
| 1208 | value = AREF (valvec, XINT (value)); | ||
| 1209 | } | ||
| 1210 | return value; | ||
| 1211 | } | ||
| 1212 | |||
| 1213 | static uniprop_decoder_t uniprop_decoder [] = | ||
| 1214 | { uniprop_decode_value_run_length }; | ||
| 1215 | |||
| 1216 | static int uniprop_decoder_count | ||
| 1217 | = (sizeof uniprop_decoder) / sizeof (uniprop_decoder[0]); | ||
| 1218 | |||
| 1219 | |||
| 1220 | /* Return the decoder of char-table TABLE or nil if none. */ | ||
| 1221 | |||
| 1222 | static uniprop_decoder_t | ||
| 1223 | uniprop_get_decoder (Lisp_Object table) | ||
| 1224 | { | ||
| 1225 | int i; | ||
| 1226 | |||
| 1227 | if (! INTEGERP (XCHAR_TABLE (table)->extras[1])) | ||
| 1228 | return NULL; | ||
| 1229 | i = XINT (XCHAR_TABLE (table)->extras[1]); | ||
| 1230 | if (i < 0 || i >= uniprop_decoder_count) | ||
| 1231 | return NULL; | ||
| 1232 | return uniprop_decoder[i]; | ||
| 1233 | } | ||
| 1234 | |||
| 1235 | |||
| 1236 | /* Encode VALUE as an element of char-table TABLE which contains | ||
| 1237 | characters as elements. */ | ||
| 1238 | |||
| 1239 | static Lisp_Object | ||
| 1240 | uniprop_encode_value_character (Lisp_Object table, Lisp_Object value) | ||
| 1241 | { | ||
| 1242 | if (! NILP (value) && ! CHARACTERP (value)) | ||
| 1243 | wrong_type_argument (Qintegerp, value); | ||
| 1244 | return value; | ||
| 1245 | } | ||
| 1246 | |||
| 1247 | |||
| 1248 | /* Encode VALUE as an element of char-table TABLE which adopts RUN-LENGTH | ||
| 1249 | compression. */ | ||
| 1250 | |||
| 1251 | static Lisp_Object | ||
| 1252 | uniprop_encode_value_run_length (Lisp_Object table, Lisp_Object value) | ||
| 1253 | { | ||
| 1254 | Lisp_Object *value_table = XVECTOR (XCHAR_TABLE (table)->extras[4])->contents; | ||
| 1255 | int i, size = ASIZE (XCHAR_TABLE (table)->extras[4]); | ||
| 1256 | |||
| 1257 | for (i = 0; i < size; i++) | ||
| 1258 | if (EQ (value, value_table[i])) | ||
| 1259 | break; | ||
| 1260 | if (i == size) | ||
| 1261 | wrong_type_argument (build_string ("Unicode property value"), value); | ||
| 1262 | return make_number (i); | ||
| 1263 | } | ||
| 1264 | |||
| 1265 | |||
| 1266 | /* Encode VALUE as an element of char-table TABLE which adopts RUN-LENGTH | ||
| 1267 | compression and contains numbers as elements . */ | ||
| 1268 | |||
| 1269 | static Lisp_Object | ||
| 1270 | uniprop_encode_value_numeric (Lisp_Object table, Lisp_Object value) | ||
| 1271 | { | ||
| 1272 | Lisp_Object *value_table = XVECTOR (XCHAR_TABLE (table)->extras[4])->contents; | ||
| 1273 | int i, size = ASIZE (XCHAR_TABLE (table)->extras[4]); | ||
| 1274 | |||
| 1275 | CHECK_NUMBER (value); | ||
| 1276 | for (i = 0; i < size; i++) | ||
| 1277 | if (EQ (value, value_table[i])) | ||
| 1278 | break; | ||
| 1279 | value = make_number (i); | ||
| 1280 | if (i == size) | ||
| 1281 | { | ||
| 1282 | Lisp_Object args[2]; | ||
| 1283 | |||
| 1284 | args[0] = XCHAR_TABLE (table)->extras[4]; | ||
| 1285 | args[1] = Fmake_vector (make_number (1), value); | ||
| 1286 | XCHAR_TABLE (table)->extras[4] = Fvconcat (2, args); | ||
| 1287 | } | ||
| 1288 | return make_number (i); | ||
| 1289 | } | ||
| 1290 | |||
| 1291 | static uniprop_encoder_t uniprop_encoder[] = | ||
| 1292 | { uniprop_encode_value_character, | ||
| 1293 | uniprop_encode_value_run_length, | ||
| 1294 | uniprop_encode_value_numeric }; | ||
| 1295 | |||
| 1296 | static int uniprop_encoder_count | ||
| 1297 | = (sizeof uniprop_encoder) / sizeof (uniprop_encoder[0]); | ||
| 1298 | |||
| 1299 | |||
| 1300 | /* Return the encoder of char-table TABLE or nil if none. */ | ||
| 1301 | |||
| 1302 | static uniprop_decoder_t | ||
| 1303 | uniprop_get_encoder (Lisp_Object table) | ||
| 1304 | { | ||
| 1305 | int i; | ||
| 1306 | |||
| 1307 | if (! INTEGERP (XCHAR_TABLE (table)->extras[2])) | ||
| 1308 | return NULL; | ||
| 1309 | i = XINT (XCHAR_TABLE (table)->extras[2]); | ||
| 1310 | if (i < 0 || i >= uniprop_encoder_count) | ||
| 1311 | return NULL; | ||
| 1312 | return uniprop_encoder[i]; | ||
| 1313 | } | ||
| 1314 | |||
| 1315 | /* Return a char-table for Unicode character property PROP. This | ||
| 1316 | function may load a Lisp file and thus may cause | ||
| 1317 | garbage-collection. */ | ||
| 1318 | |||
| 1319 | Lisp_Object | ||
| 1320 | uniprop_table (Lisp_Object prop) | ||
| 1321 | { | ||
| 1322 | Lisp_Object val, table, result; | ||
| 1323 | |||
| 1324 | val = Fassq (prop, Vchar_code_property_alist); | ||
| 1325 | if (! CONSP (val)) | ||
| 1326 | return Qnil; | ||
| 1327 | table = XCDR (val); | ||
| 1328 | if (STRINGP (table)) | ||
| 1329 | { | ||
| 1330 | struct gcpro gcpro1; | ||
| 1331 | GCPRO1 (val); | ||
| 1332 | result = Fload (concat2 (build_string ("international/"), table), | ||
| 1333 | Qt, Qt, Qt, Qt); | ||
| 1334 | UNGCPRO; | ||
| 1335 | if (NILP (result)) | ||
| 1336 | return Qnil; | ||
| 1337 | table = XCDR (val); | ||
| 1338 | } | ||
| 1339 | if (! CHAR_TABLE_P (table) | ||
| 1340 | || ! UNIPROP_TABLE_P (table)) | ||
| 1341 | return Qnil; | ||
| 1342 | val = XCHAR_TABLE (table)->extras[1]; | ||
| 1343 | if (INTEGERP (val) | ||
| 1344 | ? (XINT (val) < 0 || XINT (val) >= uniprop_decoder_count) | ||
| 1345 | : ! NILP (val)) | ||
| 1346 | return Qnil; | ||
| 1347 | /* Prepare ASCII values in advance for CHAR_TABLE_REF. */ | ||
| 1348 | XCHAR_TABLE (table)->ascii = char_table_ascii (table); | ||
| 1349 | return table; | ||
| 1350 | } | ||
| 1351 | |||
| 1352 | DEFUN ("unicode-property-table-internal", Funicode_property_table_internal, | ||
| 1353 | Sunicode_property_table_internal, 1, 1, 0, | ||
| 1354 | doc: /* Return a char-table for Unicode character property PROP. | ||
| 1355 | Use `get-unicode-property-internal' and | ||
| 1356 | `put-unicode-property-internal' instead of `aref' and `aset' to get | ||
| 1357 | and put an element value. */) | ||
| 1358 | (Lisp_Object prop) | ||
| 1359 | { | ||
| 1360 | Lisp_Object table = uniprop_table (prop); | ||
| 1361 | |||
| 1362 | if (CHAR_TABLE_P (table)) | ||
| 1363 | return table; | ||
| 1364 | return Fcdr (Fassq (prop, Vchar_code_property_alist)); | ||
| 1365 | } | ||
| 1366 | |||
| 1367 | DEFUN ("get-unicode-property-internal", Fget_unicode_property_internal, | ||
| 1368 | Sget_unicode_property_internal, 2, 2, 0, | ||
| 1369 | doc: /* Return an element of CHAR-TABLE for character CH. | ||
| 1370 | CHAR-TABLE must be what returned by `unicode-property-table-internal'. */) | ||
| 1371 | (Lisp_Object char_table, Lisp_Object ch) | ||
| 1372 | { | ||
| 1373 | Lisp_Object val; | ||
| 1374 | uniprop_decoder_t decoder; | ||
| 1375 | |||
| 1376 | CHECK_CHAR_TABLE (char_table); | ||
| 1377 | CHECK_CHARACTER (ch); | ||
| 1378 | if (! UNIPROP_TABLE_P (char_table)) | ||
| 1379 | error ("Invalid Unicode property table"); | ||
| 1380 | val = CHAR_TABLE_REF (char_table, XINT (ch)); | ||
| 1381 | decoder = uniprop_get_decoder (char_table); | ||
| 1382 | return (decoder ? decoder (char_table, val) : val); | ||
| 1383 | } | ||
| 1384 | |||
| 1385 | DEFUN ("put-unicode-property-internal", Fput_unicode_property_internal, | ||
| 1386 | Sput_unicode_property_internal, 3, 3, 0, | ||
| 1387 | doc: /* Set an element of CHAR-TABLE for character CH to VALUE. | ||
| 1388 | CHAR-TABLE must be what returned by `unicode-property-table-internal'. */) | ||
| 1389 | (Lisp_Object char_table, Lisp_Object ch, Lisp_Object value) | ||
| 1390 | { | ||
| 1391 | uniprop_encoder_t encoder; | ||
| 1392 | |||
| 1393 | CHECK_CHAR_TABLE (char_table); | ||
| 1394 | CHECK_CHARACTER (ch); | ||
| 1395 | if (! UNIPROP_TABLE_P (char_table)) | ||
| 1396 | error ("Invalid Unicode property table"); | ||
| 1397 | encoder = uniprop_get_encoder (char_table); | ||
| 1398 | if (encoder) | ||
| 1399 | value = encoder (char_table, value); | ||
| 1400 | CHAR_TABLE_SET (char_table, XINT (ch), value); | ||
| 1401 | return Qnil; | ||
| 1402 | } | ||
| 1403 | |||
| 1404 | |||
| 987 | void | 1405 | void |
| 988 | syms_of_chartab (void) | 1406 | syms_of_chartab (void) |
| 989 | { | 1407 | { |
| 1408 | DEFSYM (Qchar_code_property_table, "char-code-property-table"); | ||
| 1409 | |||
| 990 | defsubr (&Smake_char_table); | 1410 | defsubr (&Smake_char_table); |
| 991 | defsubr (&Schar_table_parent); | 1411 | defsubr (&Schar_table_parent); |
| 992 | defsubr (&Schar_table_subtype); | 1412 | defsubr (&Schar_table_subtype); |
| @@ -998,4 +1418,19 @@ syms_of_chartab (void) | |||
| 998 | defsubr (&Sset_char_table_default); | 1418 | defsubr (&Sset_char_table_default); |
| 999 | defsubr (&Soptimize_char_table); | 1419 | defsubr (&Soptimize_char_table); |
| 1000 | defsubr (&Smap_char_table); | 1420 | defsubr (&Smap_char_table); |
| 1421 | defsubr (&Sunicode_property_table_internal); | ||
| 1422 | defsubr (&Sget_unicode_property_internal); | ||
| 1423 | defsubr (&Sput_unicode_property_internal); | ||
| 1424 | |||
| 1425 | /* Each element has the form (PROP . TABLE). | ||
| 1426 | PROP is a symbol representing a character property. | ||
| 1427 | TABLE is a char-table containing the property value for each character. | ||
| 1428 | TABLE may be a name of file to load to build a char-table. | ||
| 1429 | This variable should be modified only through | ||
| 1430 | `define-char-code-property'. */ | ||
| 1431 | |||
| 1432 | DEFVAR_LISP ("char-code-property-alist", Vchar_code_property_alist, | ||
| 1433 | doc: /* Alist of character property name vs char-table containing property values. | ||
| 1434 | Internal use only. */); | ||
| 1435 | Vchar_code_property_alist = Qnil; | ||
| 1001 | } | 1436 | } |
diff --git a/src/composite.c b/src/composite.c index 796c5a58de6..7123b505e68 100644 --- a/src/composite.c +++ b/src/composite.c | |||
| @@ -976,9 +976,8 @@ static int _work_char; | |||
| 976 | ((C) > ' ' \ | 976 | ((C) > ' ' \ |
| 977 | && ((C) == 0x200C || (C) == 0x200D \ | 977 | && ((C) == 0x200C || (C) == 0x200D \ |
| 978 | || (_work_val = CHAR_TABLE_REF (Vunicode_category_table, (C)), \ | 978 | || (_work_val = CHAR_TABLE_REF (Vunicode_category_table, (C)), \ |
| 979 | (SYMBOLP (_work_val) \ | 979 | (INTEGERP (_work_val) \ |
| 980 | && (_work_char = SDATA (SYMBOL_NAME (_work_val))[0]) != 'C' \ | 980 | && (XINT (_work_val) <= UNICODE_CATEGORY_So))))) |
| 981 | && _work_char != 'Z')))) | ||
| 982 | 981 | ||
| 983 | /* Update cmp_it->stop_pos to the next position after CHARPOS (and | 982 | /* Update cmp_it->stop_pos to the next position after CHARPOS (and |
| 984 | BYTEPOS) where character composition may happen. If BYTEPOS is | 983 | BYTEPOS) where character composition may happen. If BYTEPOS is |
diff --git a/src/dispextern.h b/src/dispextern.h index 57fa09d3bfc..c0a67690a5c 100644 --- a/src/dispextern.h +++ b/src/dispextern.h | |||
| @@ -1773,7 +1773,11 @@ extern int face_change_count; | |||
| 1773 | /* Data type for describing the bidirectional character types. The | 1773 | /* Data type for describing the bidirectional character types. The |
| 1774 | first 7 must be at the beginning, because they are the only values | 1774 | first 7 must be at the beginning, because they are the only values |
| 1775 | valid in the `bidi_type' member of `struct glyph'; we only reserve | 1775 | valid in the `bidi_type' member of `struct glyph'; we only reserve |
| 1776 | 3 bits for it, so we cannot use there values larger than 7. */ | 1776 | 3 bits for it, so we cannot use there values larger than 7. |
| 1777 | |||
| 1778 | The order of members must be in sync with the 8th element of the | ||
| 1779 | member of unidata-prop-alist (in admin/unidata/unidata-getn.el) for | ||
| 1780 | Unicode character property `bidi-class'. */ | ||
| 1777 | typedef enum { | 1781 | typedef enum { |
| 1778 | UNKNOWN_BT = 0, | 1782 | UNKNOWN_BT = 0, |
| 1779 | STRONG_L, /* strong left-to-right */ | 1783 | STRONG_L, /* strong left-to-right */ |
diff --git a/src/font.c b/src/font.c index 14390335f3c..5aff20b1346 100644 --- a/src/font.c +++ b/src/font.c | |||
| @@ -3739,8 +3739,9 @@ font_range (EMACS_INT pos, EMACS_INT *limit, struct window *w, struct face *face | |||
| 3739 | else | 3739 | else |
| 3740 | FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, string, pos, pos_byte); | 3740 | FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, string, pos, pos_byte); |
| 3741 | category = CHAR_TABLE_REF (Vunicode_category_table, c); | 3741 | category = CHAR_TABLE_REF (Vunicode_category_table, c); |
| 3742 | if (EQ (category, QCf) | 3742 | if (INTEGERP (category) |
| 3743 | || CHAR_VARIATION_SELECTOR_P (c)) | 3743 | && (XINT (category) == UNICODE_CATEGORY_Cf |
| 3744 | || CHAR_VARIATION_SELECTOR_P (c))) | ||
| 3744 | continue; | 3745 | continue; |
| 3745 | if (NILP (font_object)) | 3746 | if (NILP (font_object)) |
| 3746 | { | 3747 | { |