diff options
| author | Kenichi Handa | 2005-05-10 02:29:41 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2005-05-10 02:29:41 +0000 |
| commit | 1275c1dc8ef258f52304c0a3b287a9e8123c8195 (patch) | |
| tree | ff2cd283616526563ff06718de8ea25eb9b3a756 /admin | |
| parent | bf903420b450e98e1b8abc54cd1ea2cb6cd7e153 (diff) | |
| download | emacs-1275c1dc8ef258f52304c0a3b287a9e8123c8195.tar.gz emacs-1275c1dc8ef258f52304c0a3b287a9e8123c8195.zip | |
Typo fixed in comments. Change
string-to-int to string-to-number.
(unidata-text-file): Defined to .../unidata.txt.
(unidata-list): Just insert unidata-text-file.
(unidata-get-decomposition): Handle Hangul decomposition.
(unidata-gen-files): Don't use \040, instead at ^L near the end of
file.
Diffstat (limited to 'admin')
| -rw-r--r-- | admin/unidata/unidata-gen.el | 64 |
1 files changed, 39 insertions, 25 deletions
diff --git a/admin/unidata/unidata-gen.el b/admin/unidata/unidata-gen.el index 63634c18530..018db0189b4 100644 --- a/admin/unidata/unidata-gen.el +++ b/admin/unidata/unidata-gen.el | |||
| @@ -22,23 +22,29 @@ | |||
| 22 | 22 | ||
| 23 | ;;; Commentary: | 23 | ;;; Commentary: |
| 24 | 24 | ||
| 25 | ;; SPECIAL NOTICE | ||
| 26 | ;; | ||
| 27 | ;; This file must be byte-compilable/loadable by `temacs' and also | ||
| 28 | ;; the entry function `unidata-gen-files' must be runnable by | ||
| 29 | ;; `temacs'. | ||
| 30 | |||
| 25 | ;; FILES TO BE GENERATED | 31 | ;; FILES TO BE GENERATED |
| 26 | ;; | 32 | ;; |
| 27 | ;; The entry function `unidata-gen-files' generated these filese in | 33 | ;; The entry function `unidata-gen-files' generates these files in |
| 28 | ;; the current directory. | 34 | ;; the current directory. |
| 29 | ;; | 35 | ;; |
| 30 | ;; charprop.el | 36 | ;; charprop.el |
| 31 | ;; It contains a series of forms of this format: | 37 | ;; It contains a series of forms of this format: |
| 32 | ;; (char-code-property-register PROP FILE) | 38 | ;; (char-code-property-register PROP FILE) |
| 33 | ;; where PROP is a symbol representing a character property | 39 | ;; where PROP is a symbol representing a character property |
| 34 | ;; (name, geneirc-category, etc), and FILE is a name of one of | 40 | ;; (name, generic-category, etc), and FILE is a name of one of |
| 35 | ;; the following files. | 41 | ;; the following files. |
| 36 | ;; | 42 | ;; |
| 37 | ;; uni-name.el, uni-cat.el, uni-comb.el, uni-bidi.el | 43 | ;; uni-name.el, uni-cat.el, uni-comb.el, uni-bidi.el |
| 38 | ;; It contains a single form of this format: | 44 | ;; It contains a single form of this format: |
| 39 | ;; (char-code-property-register PROP CHAR-TABLE) | 45 | ;; (char-code-property-register PROP CHAR-TABLE) |
| 40 | ;; where PROP is the same as above, and CHAR-TABLE is a | 46 | ;; where PROP is the same as above, and CHAR-TABLE is a |
| 41 | ;; char-table containing property values in a comporessed format. | 47 | ;; char-table containing property values in a compressed format. |
| 42 | ;; | 48 | ;; |
| 43 | ;; When they are installed in .../lisp/international/, the file | 49 | ;; When they are installed in .../lisp/international/, the file |
| 44 | ;; "charprop.el" is preloaded in loadup.el. The other files are | 50 | ;; "charprop.el" is preloaded in loadup.el. The other files are |
| @@ -55,7 +61,7 @@ | |||
| 55 | ;; data in a char-table as below. | 61 | ;; data in a char-table as below. |
| 56 | ;; | 62 | ;; |
| 57 | ;; If succeeding 128*N characters have the same property value, we | 63 | ;; If succeeding 128*N characters have the same property value, we |
| 58 | ;; store that value for them. Otherwise, comporess values for | 64 | ;; store that value for them. Otherwise, compress values for |
| 59 | ;; succeeding 128 characters into a single string and store it as a | 65 | ;; succeeding 128 characters into a single string and store it as a |
| 60 | ;; value for those characters. The way of compression depends on a | 66 | ;; value for those characters. The way of compression depends on a |
| 61 | ;; property. See the section "SIMPLE TABLE", "RUN-LENGTH TABLE", | 67 | ;; property. See the section "SIMPLE TABLE", "RUN-LENGTH TABLE", |
| @@ -67,14 +73,10 @@ | |||
| 67 | ;; 3nd: function to call to put a property value | 73 | ;; 3nd: function to call to put a property value |
| 68 | ;; 4th: function to call to get a description of a property value | 74 | ;; 4th: function to call to get a description of a property value |
| 69 | ;; 5th: data referred by the above functions | 75 | ;; 5th: data referred by the above functions |
| 70 | ;; | ||
| 71 | ;; The actual | ||
| 72 | ;; For more detail, see the comments in the section "SIMPLE TABLE" | ||
| 73 | ;; and "NAME TABLE". | ||
| 74 | 76 | ||
| 75 | ;; The name of the file UnicodeData.txt. | 77 | ;; The name of the file UnicodeData.txt. |
| 76 | (defconst unidata-text-file | 78 | (defconst unidata-text-file |
| 77 | (expand-file-name "admin/unidata/UnicodeData.txt" source-directory)) | 79 | (expand-file-name "admin/unidata/unidata.txt" source-directory)) |
| 78 | 80 | ||
| 79 | ;; List of elements of this form: | 81 | ;; List of elements of this form: |
| 80 | ;; (CHAR-or-RANGE PROP1 PROP2 ... PROPn) | 82 | ;; (CHAR-or-RANGE PROP1 PROP2 ... PROPn) |
| @@ -92,9 +94,7 @@ | |||
| 92 | (or (file-readable-p unidata-text-file) | 94 | (or (file-readable-p unidata-text-file) |
| 93 | (error "File not readable: %s" unidata-text-file)) | 95 | (error "File not readable: %s" unidata-text-file)) |
| 94 | (with-temp-buffer | 96 | (with-temp-buffer |
| 95 | (call-process "sed" unidata-text-file t nil | 97 | (insert-file-contents unidata-text-file) |
| 96 | "-e" "s/\\([^;]*\\);\\(.*\\)/(#x\\1 \\\"\\2\\\")/" | ||
| 97 | "-e" "s/;/\\\" \\\"/g") | ||
| 98 | (goto-char (point-min)) | 98 | (goto-char (point-min)) |
| 99 | (condition-case nil | 99 | (condition-case nil |
| 100 | (while t | 100 | (while t |
| @@ -166,7 +166,7 @@ Property value is one of the following symbols: | |||
| 166 | 5 unidata-gen-table-decomposition "uni-decomposition.el" | 166 | 5 unidata-gen-table-decomposition "uni-decomposition.el" |
| 167 | "Unicode decomposition mapping. | 167 | "Unicode decomposition mapping. |
| 168 | Property value is a list of characters. The first element may be | 168 | Property value is a list of characters. The first element may be |
| 169 | one of these symbols representing compatiblity formatting tag: | 169 | one of these symbols representing compatibility formatting tag: |
| 170 | <font>, <noBreak>, <initial>, <medial>, <final>, <isolated>, <circle>, | 170 | <font>, <noBreak>, <initial>, <medial>, <final>, <isolated>, <circle>, |
| 171 | <super>, <sub>, <vertical>, <wide>, <narrow>, <small>, <square>, <fraction>, | 171 | <super>, <sub>, <vertical>, <wide>, <narrow>, <small>, <square>, <fraction>, |
| 172 | <compat>" | 172 | <compat>" |
| @@ -231,7 +231,7 @@ Property value is a character." | |||
| 231 | ;; | 231 | ;; |
| 232 | ;; The first character of the string is FIRST-INDEX. | 232 | ;; The first character of the string is FIRST-INDEX. |
| 233 | ;; The Nth (N > 0) character of the string is a property value of the | 233 | ;; The Nth (N > 0) character of the string is a property value of the |
| 234 | ;; character (BLOCk-HEAD + FIRST-INDEX + N - 1), where BLOCK-HEAD is | 234 | ;; character (BLOCK-HEAD + FIRST-INDEX + N - 1), where BLOCK-HEAD is |
| 235 | ;; the first of the characters in the block. | 235 | ;; the first of the characters in the block. |
| 236 | ;; | 236 | ;; |
| 237 | ;; The 4th extra slot of a char-table is nil. | 237 | ;; The 4th extra slot of a char-table is nil. |
| @@ -763,9 +763,23 @@ Property value is a character." | |||
| 763 | (aset vec idx (nconc word-list tail-list))) | 763 | (aset vec idx (nconc word-list tail-list))) |
| 764 | (dotimes (i 128) | 764 | (dotimes (i 128) |
| 765 | (aset table (+ first-char i) (aref vec i))) | 765 | (aset table (+ first-char i) (aref vec i))) |
| 766 | (aref vec (- char first-char))))))) | 766 | (aref vec (- char first-char))))) |
| 767 | 767 | ||
| 768 | ;; Store VAL as the name of CHAR in TABLE. | 768 | ;; Hangul syllable |
| 769 | ((and (eq val 0) (>= char #xAC00) (<= char #xD7A3)) | ||
| 770 | ;; SIndex = S (char) - SBase (#xAC00) | ||
| 771 | (setq char (- char #xAC00)) | ||
| 772 | (let (;; L = LBase + SIndex / NCount | ||
| 773 | (L (+ #x1100 (/ char 588))) | ||
| 774 | ;; V = VBase + (SIndex % NCount) * TCount | ||
| 775 | (V (+ #x1161 (/ (% char 588) 28))) | ||
| 776 | ;; T = TBase + SIndex % TCount | ||
| 777 | (T (+ #x11A7 (% char 28)))) | ||
| 778 | (list L V T))) | ||
| 779 | |||
| 780 | )) | ||
| 781 | |||
| 782 | ;; Store VAL as the decomposition information of CHAR in TABLE. | ||
| 769 | 783 | ||
| 770 | (defun unidata-put-decomposition (char val table) | 784 | (defun unidata-put-decomposition (char val table) |
| 771 | (let ((current-val (aref table char))) | 785 | (let ((current-val (aref table char))) |
| @@ -871,7 +885,7 @@ Property value is a character." | |||
| 871 | 885 | ||
| 872 | (if (and (eq prop 'decomposition) | 886 | (if (and (eq prop 'decomposition) |
| 873 | (> idx 32)) | 887 | (> idx 32)) |
| 874 | (error "Too many symobls in decomposition data")) | 888 | (error "Too many symbols in decomposition data")) |
| 875 | 889 | ||
| 876 | (dotimes (i (/ #x110000 128)) | 890 | (dotimes (i (/ #x110000 128)) |
| 877 | (let* ((idx (* i 128)) | 891 | (let* ((idx (* i 128)) |
| @@ -956,11 +970,11 @@ Property value is a character." | |||
| 956 | (if (= c 32) | 970 | (if (= c 32) |
| 957 | (setq l (if (= (aref str idx) ?<) | 971 | (setq l (if (= (aref str idx) ?<) |
| 958 | (cons (intern (substring str idx i)) l) | 972 | (cons (intern (substring str idx i)) l) |
| 959 | (cons (string-to-int (substring str idx i) 16) l)) | 973 | (cons (string-to-number (substring str idx i) 16) l)) |
| 960 | idx (1+ i)))) | 974 | idx (1+ i)))) |
| 961 | (if (= (aref str idx) ?<) | 975 | (if (= (aref str idx) ?<) |
| 962 | (setq l (cons (intern (substring str idx len)) l)) | 976 | (setq l (cons (intern (substring str idx len)) l)) |
| 963 | (setq l (cons (string-to-int (substring str idx len) 16) l))) | 977 | (setq l (cons (string-to-number (substring str idx len) 16) l))) |
| 964 | (nreverse l))))) | 978 | (nreverse l))))) |
| 965 | 979 | ||
| 966 | 980 | ||
| @@ -1091,9 +1105,9 @@ Property value is a character." | |||
| 1091 | (cond ((eq generator 'unidata-gen-table-symbol) | 1105 | (cond ((eq generator 'unidata-gen-table-symbol) |
| 1092 | (setq val1 (intern val1))) | 1106 | (setq val1 (intern val1))) |
| 1093 | ((eq generator 'unidata-gen-table-integer) | 1107 | ((eq generator 'unidata-gen-table-integer) |
| 1094 | (setq val1 (string-to-int val1))) | 1108 | (setq val1 (string-to-number val1))) |
| 1095 | ((eq generator 'unidata-gen-table-character) | 1109 | ((eq generator 'unidata-gen-table-character) |
| 1096 | (setq val1 (string-to-int val1 16))) | 1110 | (setq val1 (string-to-number val1 16))) |
| 1097 | ((eq generator 'unidata-gen-table-decomposition) | 1111 | ((eq generator 'unidata-gen-table-decomposition) |
| 1098 | (setq val1 (unidata-split-decomposition val1))))) | 1112 | (setq val1 (unidata-split-decomposition val1))))) |
| 1099 | (when (>= char check) | 1113 | (when (>= char check) |
| @@ -1136,19 +1150,19 @@ Property value is a character." | |||
| 1136 | (insert ";; Automatically generated from UnicodeData.txt.\n" | 1150 | (insert ";; Automatically generated from UnicodeData.txt.\n" |
| 1137 | (format "(define-char-code-property '%S %S %S)\n" | 1151 | (format "(define-char-code-property '%S %S %S)\n" |
| 1138 | prop table docstring) | 1152 | prop table docstring) |
| 1139 | ;; \040 below is to avoid error on reading this file. | 1153 | ";; Local Variables:\n" |
| 1140 | ";; Local\040Variables:\n" | ||
| 1141 | ";; coding: utf-8\n" | 1154 | ";; coding: utf-8\n" |
| 1142 | ";; no-byte-compile: t\n" | 1155 | ";; no-byte-compile: t\n" |
| 1143 | ";; End:\n\n" | 1156 | ";; End:\n\n" |
| 1144 | (format ";; %s ends here\n" file))))) | 1157 | (format ";; %s ends here\n" file))))) |
| 1145 | (message "Writing %s..." charprop-file) | 1158 | (message "Writing %s..." charprop-file) |
| 1146 | ;; \040 below is to avoid error on reading this file. | 1159 | (insert ";; Local Variables:\n" |
| 1147 | (insert ";; Local\040Variables:\n" | ||
| 1148 | ";; coding: utf-8\n" | 1160 | ";; coding: utf-8\n" |
| 1149 | ";; no-byte-compile: t\n" | 1161 | ";; no-byte-compile: t\n" |
| 1150 | ";; End:\n\n" | 1162 | ";; End:\n\n" |
| 1151 | (format ";; %s ends here\n" charprop-file))))) | 1163 | (format ";; %s ends here\n" charprop-file))))) |
| 1152 | 1164 | ||
| 1165 | |||
| 1166 | |||
| 1153 | ;; arch-tag: 961c862e-b821-447e-9b8a-bfbab9c2d525 | 1167 | ;; arch-tag: 961c862e-b821-447e-9b8a-bfbab9c2d525 |
| 1154 | ;;; unidata-gen.el ends here | 1168 | ;;; unidata-gen.el ends here |