aboutsummaryrefslogtreecommitdiffstats
path: root/admin
diff options
context:
space:
mode:
authorKenichi Handa2005-05-10 02:29:41 +0000
committerKenichi Handa2005-05-10 02:29:41 +0000
commit1275c1dc8ef258f52304c0a3b287a9e8123c8195 (patch)
treeff2cd283616526563ff06718de8ea25eb9b3a756 /admin
parentbf903420b450e98e1b8abc54cd1ea2cb6cd7e153 (diff)
downloademacs-1275c1dc8ef258f52304c0a3b287a9e8123c8195.tar.gz
emacs-1275c1dc8ef258f52304c0a3b287a9e8123c8195.zip
Typo fixed in comments. Change
string-to-int to string-to-number. (unidata-text-file): Defined to .../unidata.txt. (unidata-list): Just insert unidata-text-file. (unidata-get-decomposition): Handle Hangul decomposition. (unidata-gen-files): Don't use \040, instead at ^L near the end of file.
Diffstat (limited to 'admin')
-rw-r--r--admin/unidata/unidata-gen.el64
1 files changed, 39 insertions, 25 deletions
diff --git a/admin/unidata/unidata-gen.el b/admin/unidata/unidata-gen.el
index 63634c18530..018db0189b4 100644
--- a/admin/unidata/unidata-gen.el
+++ b/admin/unidata/unidata-gen.el
@@ -22,23 +22,29 @@
22 22
23;;; Commentary: 23;;; Commentary:
24 24
25;; SPECIAL NOTICE
26;;
27;; This file must be byte-compilable/loadable by `temacs' and also
28;; the entry function `unidata-gen-files' must be runnable by
29;; `temacs'.
30
25;; FILES TO BE GENERATED 31;; FILES TO BE GENERATED
26;; 32;;
27;; The entry function `unidata-gen-files' generated these filese in 33;; The entry function `unidata-gen-files' generates these files in
28;; the current directory. 34;; the current directory.
29;; 35;;
30;; charprop.el 36;; charprop.el
31;; It contains a series of forms of this format: 37;; It contains a series of forms of this format:
32;; (char-code-property-register PROP FILE) 38;; (char-code-property-register PROP FILE)
33;; where PROP is a symbol representing a character property 39;; where PROP is a symbol representing a character property
34;; (name, geneirc-category, etc), and FILE is a name of one of 40;; (name, generic-category, etc), and FILE is a name of one of
35;; the following files. 41;; the following files.
36;; 42;;
37;; uni-name.el, uni-cat.el, uni-comb.el, uni-bidi.el 43;; uni-name.el, uni-cat.el, uni-comb.el, uni-bidi.el
38;; It contains a single form of this format: 44;; It contains a single form of this format:
39;; (char-code-property-register PROP CHAR-TABLE) 45;; (char-code-property-register PROP CHAR-TABLE)
40;; where PROP is the same as above, and CHAR-TABLE is a 46;; where PROP is the same as above, and CHAR-TABLE is a
41;; char-table containing property values in a comporessed format. 47;; char-table containing property values in a compressed format.
42;; 48;;
43;; When they are installed in .../lisp/international/, the file 49;; When they are installed in .../lisp/international/, the file
44;; "charprop.el" is preloaded in loadup.el. The other files are 50;; "charprop.el" is preloaded in loadup.el. The other files are
@@ -55,7 +61,7 @@
55;; data in a char-table as below. 61;; data in a char-table as below.
56;; 62;;
57;; If succeeding 128*N characters have the same property value, we 63;; If succeeding 128*N characters have the same property value, we
58;; store that value for them. Otherwise, comporess values for 64;; store that value for them. Otherwise, compress values for
59;; succeeding 128 characters into a single string and store it as a 65;; succeeding 128 characters into a single string and store it as a
60;; value for those characters. The way of compression depends on a 66;; value for those characters. The way of compression depends on a
61;; property. See the section "SIMPLE TABLE", "RUN-LENGTH TABLE", 67;; property. See the section "SIMPLE TABLE", "RUN-LENGTH TABLE",
@@ -67,14 +73,10 @@
67;; 3nd: function to call to put a property value 73;; 3nd: function to call to put a property value
68;; 4th: function to call to get a description of a property value 74;; 4th: function to call to get a description of a property value
69;; 5th: data referred by the above functions 75;; 5th: data referred by the above functions
70;;
71;; The actual
72;; For more detail, see the comments in the section "SIMPLE TABLE"
73;; and "NAME TABLE".
74 76
75;; The name of the file UnicodeData.txt. 77;; The name of the file UnicodeData.txt.
76(defconst unidata-text-file 78(defconst unidata-text-file
77 (expand-file-name "admin/unidata/UnicodeData.txt" source-directory)) 79 (expand-file-name "admin/unidata/unidata.txt" source-directory))
78 80
79;; List of elements of this form: 81;; List of elements of this form:
80;; (CHAR-or-RANGE PROP1 PROP2 ... PROPn) 82;; (CHAR-or-RANGE PROP1 PROP2 ... PROPn)
@@ -92,9 +94,7 @@
92 (or (file-readable-p unidata-text-file) 94 (or (file-readable-p unidata-text-file)
93 (error "File not readable: %s" unidata-text-file)) 95 (error "File not readable: %s" unidata-text-file))
94 (with-temp-buffer 96 (with-temp-buffer
95 (call-process "sed" unidata-text-file t nil 97 (insert-file-contents unidata-text-file)
96 "-e" "s/\\([^;]*\\);\\(.*\\)/(#x\\1 \\\"\\2\\\")/"
97 "-e" "s/;/\\\" \\\"/g")
98 (goto-char (point-min)) 98 (goto-char (point-min))
99 (condition-case nil 99 (condition-case nil
100 (while t 100 (while t
@@ -166,7 +166,7 @@ Property value is one of the following symbols:
166 5 unidata-gen-table-decomposition "uni-decomposition.el" 166 5 unidata-gen-table-decomposition "uni-decomposition.el"
167 "Unicode decomposition mapping. 167 "Unicode decomposition mapping.
168Property value is a list of characters. The first element may be 168Property value is a list of characters. The first element may be
169one of these symbols representing compatiblity formatting tag: 169one of these symbols representing compatibility formatting tag:
170 <font>, <noBreak>, <initial>, <medial>, <final>, <isolated>, <circle>, 170 <font>, <noBreak>, <initial>, <medial>, <final>, <isolated>, <circle>,
171 <super>, <sub>, <vertical>, <wide>, <narrow>, <small>, <square>, <fraction>, 171 <super>, <sub>, <vertical>, <wide>, <narrow>, <small>, <square>, <fraction>,
172 <compat>" 172 <compat>"
@@ -231,7 +231,7 @@ Property value is a character."
231;; 231;;
232;; The first character of the string is FIRST-INDEX. 232;; The first character of the string is FIRST-INDEX.
233;; The Nth (N > 0) character of the string is a property value of the 233;; The Nth (N > 0) character of the string is a property value of the
234;; character (BLOCk-HEAD + FIRST-INDEX + N - 1), where BLOCK-HEAD is 234;; character (BLOCK-HEAD + FIRST-INDEX + N - 1), where BLOCK-HEAD is
235;; the first of the characters in the block. 235;; the first of the characters in the block.
236;; 236;;
237;; The 4th extra slot of a char-table is nil. 237;; The 4th extra slot of a char-table is nil.
@@ -763,9 +763,23 @@ Property value is a character."
763 (aset vec idx (nconc word-list tail-list))) 763 (aset vec idx (nconc word-list tail-list)))
764 (dotimes (i 128) 764 (dotimes (i 128)
765 (aset table (+ first-char i) (aref vec i))) 765 (aset table (+ first-char i) (aref vec i)))
766 (aref vec (- char first-char))))))) 766 (aref vec (- char first-char)))))
767 767
768;; Store VAL as the name of CHAR in TABLE. 768 ;; Hangul syllable
769 ((and (eq val 0) (>= char #xAC00) (<= char #xD7A3))
770 ;; SIndex = S (char) - SBase (#xAC00)
771 (setq char (- char #xAC00))
772 (let (;; L = LBase + SIndex / NCount
773 (L (+ #x1100 (/ char 588)))
774 ;; V = VBase + (SIndex % NCount) * TCount
775 (V (+ #x1161 (/ (% char 588) 28)))
776 ;; T = TBase + SIndex % TCount
777 (T (+ #x11A7 (% char 28))))
778 (list L V T)))
779
780 ))
781
782;; Store VAL as the decomposition information of CHAR in TABLE.
769 783
770(defun unidata-put-decomposition (char val table) 784(defun unidata-put-decomposition (char val table)
771 (let ((current-val (aref table char))) 785 (let ((current-val (aref table char)))
@@ -871,7 +885,7 @@ Property value is a character."
871 885
872 (if (and (eq prop 'decomposition) 886 (if (and (eq prop 'decomposition)
873 (> idx 32)) 887 (> idx 32))
874 (error "Too many symobls in decomposition data")) 888 (error "Too many symbols in decomposition data"))
875 889
876 (dotimes (i (/ #x110000 128)) 890 (dotimes (i (/ #x110000 128))
877 (let* ((idx (* i 128)) 891 (let* ((idx (* i 128))
@@ -956,11 +970,11 @@ Property value is a character."
956 (if (= c 32) 970 (if (= c 32)
957 (setq l (if (= (aref str idx) ?<) 971 (setq l (if (= (aref str idx) ?<)
958 (cons (intern (substring str idx i)) l) 972 (cons (intern (substring str idx i)) l)
959 (cons (string-to-int (substring str idx i) 16) l)) 973 (cons (string-to-number (substring str idx i) 16) l))
960 idx (1+ i)))) 974 idx (1+ i))))
961 (if (= (aref str idx) ?<) 975 (if (= (aref str idx) ?<)
962 (setq l (cons (intern (substring str idx len)) l)) 976 (setq l (cons (intern (substring str idx len)) l))
963 (setq l (cons (string-to-int (substring str idx len) 16) l))) 977 (setq l (cons (string-to-number (substring str idx len) 16) l)))
964 (nreverse l))))) 978 (nreverse l)))))
965 979
966 980
@@ -1091,9 +1105,9 @@ Property value is a character."
1091 (cond ((eq generator 'unidata-gen-table-symbol) 1105 (cond ((eq generator 'unidata-gen-table-symbol)
1092 (setq val1 (intern val1))) 1106 (setq val1 (intern val1)))
1093 ((eq generator 'unidata-gen-table-integer) 1107 ((eq generator 'unidata-gen-table-integer)
1094 (setq val1 (string-to-int val1))) 1108 (setq val1 (string-to-number val1)))
1095 ((eq generator 'unidata-gen-table-character) 1109 ((eq generator 'unidata-gen-table-character)
1096 (setq val1 (string-to-int val1 16))) 1110 (setq val1 (string-to-number val1 16)))
1097 ((eq generator 'unidata-gen-table-decomposition) 1111 ((eq generator 'unidata-gen-table-decomposition)
1098 (setq val1 (unidata-split-decomposition val1))))) 1112 (setq val1 (unidata-split-decomposition val1)))))
1099 (when (>= char check) 1113 (when (>= char check)
@@ -1136,19 +1150,19 @@ Property value is a character."
1136 (insert ";; Automatically generated from UnicodeData.txt.\n" 1150 (insert ";; Automatically generated from UnicodeData.txt.\n"
1137 (format "(define-char-code-property '%S %S %S)\n" 1151 (format "(define-char-code-property '%S %S %S)\n"
1138 prop table docstring) 1152 prop table docstring)
1139 ;; \040 below is to avoid error on reading this file. 1153 ";; Local Variables:\n"
1140 ";; Local\040Variables:\n"
1141 ";; coding: utf-8\n" 1154 ";; coding: utf-8\n"
1142 ";; no-byte-compile: t\n" 1155 ";; no-byte-compile: t\n"
1143 ";; End:\n\n" 1156 ";; End:\n\n"
1144 (format ";; %s ends here\n" file))))) 1157 (format ";; %s ends here\n" file)))))
1145 (message "Writing %s..." charprop-file) 1158 (message "Writing %s..." charprop-file)
1146 ;; \040 below is to avoid error on reading this file. 1159 (insert ";; Local Variables:\n"
1147 (insert ";; Local\040Variables:\n"
1148 ";; coding: utf-8\n" 1160 ";; coding: utf-8\n"
1149 ";; no-byte-compile: t\n" 1161 ";; no-byte-compile: t\n"
1150 ";; End:\n\n" 1162 ";; End:\n\n"
1151 (format ";; %s ends here\n" charprop-file))))) 1163 (format ";; %s ends here\n" charprop-file)))))
1152 1164
1165
1166
1153;; arch-tag: 961c862e-b821-447e-9b8a-bfbab9c2d525 1167;; arch-tag: 961c862e-b821-447e-9b8a-bfbab9c2d525
1154;;; unidata-gen.el ends here 1168;;; unidata-gen.el ends here