diff options
| author | Kenichi Handa | 2009-11-06 06:31:48 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2009-11-06 06:31:48 +0000 |
| commit | 149353a4eb5231e4f2c5af61d1bea2e9e909c679 (patch) | |
| tree | 90d86a2ccc034e33da6908467525480c3c5fe04e | |
| parent | 5e2327cf928d947328b01c87baa79cdaa18c823b (diff) | |
| download | emacs-149353a4eb5231e4f2c5af61d1bea2e9e909c679.tar.gz emacs-149353a4eb5231e4f2c5af61d1bea2e9e909c679.zip | |
(unidata-gen-table): Fix for the case that the block data and the
following per-char data fall into the same char-table leaf.
| -rw-r--r-- | admin/ChangeLog | 6 | ||||
| -rw-r--r-- | admin/unidata/unidata-gen.el | 37 |
2 files changed, 38 insertions, 5 deletions
diff --git a/admin/ChangeLog b/admin/ChangeLog index d6e75e04d98..7d56b2aeb5e 100644 --- a/admin/ChangeLog +++ b/admin/ChangeLog | |||
| @@ -1,3 +1,9 @@ | |||
| 1 | 2009-11-06 Kenichi Handa <handa@m17n.org> | ||
| 2 | |||
| 3 | * unidata/unidata-gen.el (unidata-gen-table): Fix for the case | ||
| 4 | that the block data and the following per-char data fall into the | ||
| 5 | same char-table leaf. | ||
| 6 | |||
| 1 | 2009-10-01 Juanma Barranquero <lekktu@gmail.com> | 7 | 2009-10-01 Juanma Barranquero <lekktu@gmail.com> |
| 2 | 8 | ||
| 3 | * unidata/UnicodeData.txt: Update to Unicode 5.2.0. | 9 | * unidata/UnicodeData.txt: Update to Unicode 5.2.0. |
diff --git a/admin/unidata/unidata-gen.el b/admin/unidata/unidata-gen.el index 9fee8e46c80..2b1f918cc64 100644 --- a/admin/unidata/unidata-gen.el +++ b/admin/unidata/unidata-gen.el | |||
| @@ -93,6 +93,10 @@ | |||
| 93 | (or (file-readable-p unidata-text-file) | 93 | (or (file-readable-p unidata-text-file) |
| 94 | (error "File not readable: %s" unidata-text-file)) | 94 | (error "File not readable: %s" unidata-text-file)) |
| 95 | (with-temp-buffer | 95 | (with-temp-buffer |
| 96 | ;; Insert a file of this format: | ||
| 97 | ;; (CHAR NAME CATEGORY ...) | ||
| 98 | ;; where CHAR is a charater code, the following elements are strings | ||
| 99 | ;; representing character properties. | ||
| 96 | (insert-file-contents unidata-text-file) | 100 | (insert-file-contents unidata-text-file) |
| 97 | (goto-char (point-min)) | 101 | (goto-char (point-min)) |
| 98 | (condition-case nil | 102 | (condition-case nil |
| @@ -103,7 +107,7 @@ | |||
| 103 | 107 | ||
| 104 | ;; Check this kind of block. | 108 | ;; Check this kind of block. |
| 105 | ;; 4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;; | 109 | ;; 4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;; |
| 106 | ;; 9FA5;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;; | 110 | ;; 9FCB;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;; |
| 107 | (if (and (= (aref name 0) ?<) | 111 | (if (and (= (aref name 0) ?<) |
| 108 | (string-match ", First>$" name)) | 112 | (string-match ", First>$" name)) |
| 109 | (let ((first char) | 113 | (let ((first char) |
| @@ -224,7 +228,7 @@ Property value is a character." | |||
| 224 | ;; a char-table described here to store such values. | 228 | ;; a char-table described here to store such values. |
| 225 | ;; | 229 | ;; |
| 226 | ;; If succeeding 128 characters has no property, a char-table has the | 230 | ;; If succeeding 128 characters has no property, a char-table has the |
| 227 | ;; symbol t is for them. Otherwise a char-table has a string of the | 231 | ;; symbol t for them. Otherwise a char-table has a string of the |
| 228 | ;; following format for them. | 232 | ;; following format for them. |
| 229 | ;; | 233 | ;; |
| 230 | ;; The first character of the string is FIRST-INDEX. | 234 | ;; The first character of the string is FIRST-INDEX. |
| @@ -480,7 +484,8 @@ Property value is a character." | |||
| 480 | (prop-idx (unidata-prop-index prop)) | 484 | (prop-idx (unidata-prop-index prop)) |
| 481 | (val-list (list t)) | 485 | (val-list (list t)) |
| 482 | (vec (make-vector 128 0)) | 486 | (vec (make-vector 128 0)) |
| 483 | tail elt range val val-code idx slot) | 487 | tail elt range val val-code idx slot |
| 488 | prev-range-data) | ||
| 484 | (set-char-table-range table (cons 0 (max-char)) default-value) | 489 | (set-char-table-range table (cons 0 (max-char)) default-value) |
| 485 | (setq tail unidata-list) | 490 | (setq tail unidata-list) |
| 486 | (while tail | 491 | (while tail |
| @@ -489,12 +494,34 @@ Property value is a character." | |||
| 489 | val (funcall val-func (nth prop-idx elt))) | 494 | val (funcall val-func (nth prop-idx elt))) |
| 490 | (setq val-code (if val (unidata-encode-val val-list val))) | 495 | (setq val-code (if val (unidata-encode-val val-list val))) |
| 491 | (if (consp range) | 496 | (if (consp range) |
| 492 | (if val-code | 497 | (when val-code |
| 493 | (set-char-table-range table range val)) | 498 | (set-char-table-range table range val) |
| 499 | (let ((from (car range)) (to (cdr range))) | ||
| 500 | ;; If RANGE doesn't end at the char-table boundary (each | ||
| 501 | ;; 128 characters), we may have to carry over the data | ||
| 502 | ;; for the last several characters (at most 127 chars) | ||
| 503 | ;; to the next loop. In that case, set PREV-RANGE-DATA | ||
| 504 | ;; to ((FROM . TO) . VAL-CODE) where (FROM . TO) | ||
| 505 | ;; specifies the range of characters handled in the next | ||
| 506 | ;; loop. | ||
| 507 | (when (< (logand to #x7F) #x7F) | ||
| 508 | (if (< from (logand to #x1FFF80)) | ||
| 509 | (setq from (logand to #x1FFF80))) | ||
| 510 | (setq prev-range-data (cons (cons from to) val-code))))) | ||
| 494 | (let* ((start (lsh (lsh range -7) 7)) | 511 | (let* ((start (lsh (lsh range -7) 7)) |
| 495 | (limit (+ start 127)) | 512 | (limit (+ start 127)) |
| 496 | str count new-val) | 513 | str count new-val) |
| 497 | (fillarray vec 0) | 514 | (fillarray vec 0) |
| 515 | ;; See the comment above. | ||
| 516 | (when (and prev-range-data | ||
| 517 | (>= (cdr (car prev-range-data)) start)) | ||
| 518 | (let ((from (car (car prev-range-data))) | ||
| 519 | (to (cdr (car prev-range-data))) | ||
| 520 | (vcode (cdr prev-range-data))) | ||
| 521 | (while (<= from to) | ||
| 522 | (aset vec (- from start) vcode) | ||
| 523 | (setq from (1+ from))))) | ||
| 524 | (setq prev-range-data nil) | ||
| 498 | (if val-code | 525 | (if val-code |
| 499 | (aset vec (- range start) val-code)) | 526 | (aset vec (- range start) val-code)) |
| 500 | (while (and (setq elt (car tail) range (car elt)) | 527 | (while (and (setq elt (car tail) range (car elt)) |