diff options
| author | Eli Zaretskii | 2014-09-03 19:09:48 +0300 |
|---|---|---|
| committer | Eli Zaretskii | 2014-09-03 19:09:48 +0300 |
| commit | a7fed5a88274d2d3737f7fd1192587731b4d4595 (patch) | |
| tree | 07bd20e42ccbb1d2d7a8368c39446a96f1c2f48c /admin | |
| parent | 2fca7ae050c62d572d050b377c289091ff7f0547 (diff) | |
| download | emacs-a7fed5a88274d2d3737f7fd1192587731b4d4595.tar.gz emacs-a7fed5a88274d2d3737f7fd1192587731b4d4595.zip | |
Added BidiBrackets.txt and related Unicode properties to unidqata-gen.el.
Diffstat (limited to 'admin')
| -rw-r--r-- | admin/unidata/BidiBrackets.txt | 176 | ||||
| -rw-r--r-- | admin/unidata/Makefile.in | 5 | ||||
| -rw-r--r-- | admin/unidata/unidata-gen.el | 100 |
3 files changed, 267 insertions, 14 deletions
diff --git a/admin/unidata/BidiBrackets.txt b/admin/unidata/BidiBrackets.txt new file mode 100644 index 00000000000..2a0cc0c7a69 --- /dev/null +++ b/admin/unidata/BidiBrackets.txt | |||
| @@ -0,0 +1,176 @@ | |||
| 1 | # BidiBrackets-7.0.0.txt | ||
| 2 | # Date: 2014-01-21, 02:30:00 GMT [AG, LI, KW] | ||
| 3 | # | ||
| 4 | # Bidi_Paired_Bracket and Bidi_Paired_Bracket_Type Properties | ||
| 5 | # | ||
| 6 | # This file is a normative contributory data file in the Unicode | ||
| 7 | # Character Database. | ||
| 8 | # | ||
| 9 | # Copyright (c) 1991-2014 Unicode, Inc. | ||
| 10 | # For terms of use, see http://www.unicode.org/terms_of_use.html | ||
| 11 | # | ||
| 12 | # Bidi_Paired_Bracket is a normative property of type Miscellaneous, | ||
| 13 | # which establishes a mapping between characters that are treated as | ||
| 14 | # bracket pairs by the Unicode Bidirectional Algorithm. | ||
| 15 | # | ||
| 16 | # Bidi_Paired_Bracket_Type is a normative property of type Enumeration, | ||
| 17 | # which classifies characters into opening and closing paired brackets | ||
| 18 | # for the purposes of the Unicode Bidirectional Algorithm. | ||
| 19 | # | ||
| 20 | # This file lists the set of code points with Bidi_Paired_Bracket_Type | ||
| 21 | # property values Open and Close. The set is derived from the character | ||
| 22 | # properties General_Category (gc), Bidi_Class (bc), Bidi_Mirrored (Bidi_M), | ||
| 23 | # and Bidi_Mirroring_Glyph (bmg), as follows: two characters, A and B, | ||
| 24 | # form a bracket pair if A has gc=Ps and B has gc=Pe, both have bc=ON and | ||
| 25 | # Bidi_M=Y, and bmg of A is B. Bidi_Paired_Bracket (bpb) maps A to B and | ||
| 26 | # vice versa, and their Bidi_Paired_Bracket_Type (bpt) property values are | ||
| 27 | # Open (o) and Close (c), respectively. | ||
| 28 | # | ||
| 29 | # For legacy reasons, the characters U+FD3E ORNATE LEFT PARENTHESIS and | ||
| 30 | # U+FD3F ORNATE RIGHT PARENTHESIS do not mirror in bidirectional display | ||
| 31 | # and therefore do not form a bracket pair. | ||
| 32 | # | ||
| 33 | # The Unicode property value stability policy guarantees that characters | ||
| 34 | # which have bpt=o or bpt=c also have bc=ON and Bidi_M=Y. As a result, an | ||
| 35 | # implementation can optimize the lookup of the Bidi_Paired_Bracket_Type | ||
| 36 | # property values Open and Close by restricting the processing to characters | ||
| 37 | # with bc=ON. | ||
| 38 | # | ||
| 39 | # The format of the file is three fields separated by a semicolon. | ||
| 40 | # Field 0: Unicode code point value, represented as a hexadecimal value | ||
| 41 | # Field 1: Bidi_Paired_Bracket property value, a code point value or <none> | ||
| 42 | # Field 2: Bidi_Paired_Bracket_Type property value, one of the following: | ||
| 43 | # o Open | ||
| 44 | # c Close | ||
| 45 | # n None | ||
| 46 | # The names of the characters in field 0 are given in comments at the end | ||
| 47 | # of each line. | ||
| 48 | # | ||
| 49 | # For information on bidirectional paired brackets, see UAX #9: Unicode | ||
| 50 | # Bidirectional Algorithm, at http://www.unicode.org/unicode/reports/tr9/ | ||
| 51 | # | ||
| 52 | # This file was originally created by Andrew Glass and Laurentiu Iancu | ||
| 53 | # for Unicode 6.3. | ||
| 54 | |||
| 55 | 0028; 0029; o # LEFT PARENTHESIS | ||
| 56 | 0029; 0028; c # RIGHT PARENTHESIS | ||
| 57 | 005B; 005D; o # LEFT SQUARE BRACKET | ||
| 58 | 005D; 005B; c # RIGHT SQUARE BRACKET | ||
| 59 | 007B; 007D; o # LEFT CURLY BRACKET | ||
| 60 | 007D; 007B; c # RIGHT CURLY BRACKET | ||
| 61 | 0F3A; 0F3B; o # TIBETAN MARK GUG RTAGS GYON | ||
| 62 | 0F3B; 0F3A; c # TIBETAN MARK GUG RTAGS GYAS | ||
| 63 | 0F3C; 0F3D; o # TIBETAN MARK ANG KHANG GYON | ||
| 64 | 0F3D; 0F3C; c # TIBETAN MARK ANG KHANG GYAS | ||
| 65 | 169B; 169C; o # OGHAM FEATHER MARK | ||
| 66 | 169C; 169B; c # OGHAM REVERSED FEATHER MARK | ||
| 67 | 2045; 2046; o # LEFT SQUARE BRACKET WITH QUILL | ||
| 68 | 2046; 2045; c # RIGHT SQUARE BRACKET WITH QUILL | ||
| 69 | 207D; 207E; o # SUPERSCRIPT LEFT PARENTHESIS | ||
| 70 | 207E; 207D; c # SUPERSCRIPT RIGHT PARENTHESIS | ||
| 71 | 208D; 208E; o # SUBSCRIPT LEFT PARENTHESIS | ||
| 72 | 208E; 208D; c # SUBSCRIPT RIGHT PARENTHESIS | ||
| 73 | 2308; 2309; o # LEFT CEILING | ||
| 74 | 2309; 2308; c # RIGHT CEILING | ||
| 75 | 230A; 230B; o # LEFT FLOOR | ||
| 76 | 230B; 230A; c # RIGHT FLOOR | ||
| 77 | 2329; 232A; o # LEFT-POINTING ANGLE BRACKET | ||
| 78 | 232A; 2329; c # RIGHT-POINTING ANGLE BRACKET | ||
| 79 | 2768; 2769; o # MEDIUM LEFT PARENTHESIS ORNAMENT | ||
| 80 | 2769; 2768; c # MEDIUM RIGHT PARENTHESIS ORNAMENT | ||
| 81 | 276A; 276B; o # MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT | ||
| 82 | 276B; 276A; c # MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT | ||
| 83 | 276C; 276D; o # MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT | ||
| 84 | 276D; 276C; c # MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT | ||
| 85 | 276E; 276F; o # HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT | ||
| 86 | 276F; 276E; c # HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT | ||
| 87 | 2770; 2771; o # HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT | ||
| 88 | 2771; 2770; c # HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT | ||
| 89 | 2772; 2773; o # LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT | ||
| 90 | 2773; 2772; c # LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT | ||
| 91 | 2774; 2775; o # MEDIUM LEFT CURLY BRACKET ORNAMENT | ||
| 92 | 2775; 2774; c # MEDIUM RIGHT CURLY BRACKET ORNAMENT | ||
| 93 | 27C5; 27C6; o # LEFT S-SHAPED BAG DELIMITER | ||
| 94 | 27C6; 27C5; c # RIGHT S-SHAPED BAG DELIMITER | ||
| 95 | 27E6; 27E7; o # MATHEMATICAL LEFT WHITE SQUARE BRACKET | ||
| 96 | 27E7; 27E6; c # MATHEMATICAL RIGHT WHITE SQUARE BRACKET | ||
| 97 | 27E8; 27E9; o # MATHEMATICAL LEFT ANGLE BRACKET | ||
| 98 | 27E9; 27E8; c # MATHEMATICAL RIGHT ANGLE BRACKET | ||
| 99 | 27EA; 27EB; o # MATHEMATICAL LEFT DOUBLE ANGLE BRACKET | ||
| 100 | 27EB; 27EA; c # MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET | ||
| 101 | 27EC; 27ED; o # MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET | ||
| 102 | 27ED; 27EC; c # MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET | ||
| 103 | 27EE; 27EF; o # MATHEMATICAL LEFT FLATTENED PARENTHESIS | ||
| 104 | 27EF; 27EE; c # MATHEMATICAL RIGHT FLATTENED PARENTHESIS | ||
| 105 | 2983; 2984; o # LEFT WHITE CURLY BRACKET | ||
| 106 | 2984; 2983; c # RIGHT WHITE CURLY BRACKET | ||
| 107 | 2985; 2986; o # LEFT WHITE PARENTHESIS | ||
| 108 | 2986; 2985; c # RIGHT WHITE PARENTHESIS | ||
| 109 | 2987; 2988; o # Z NOTATION LEFT IMAGE BRACKET | ||
| 110 | 2988; 2987; c # Z NOTATION RIGHT IMAGE BRACKET | ||
| 111 | 2989; 298A; o # Z NOTATION LEFT BINDING BRACKET | ||
| 112 | 298A; 2989; c # Z NOTATION RIGHT BINDING BRACKET | ||
| 113 | 298B; 298C; o # LEFT SQUARE BRACKET WITH UNDERBAR | ||
| 114 | 298C; 298B; c # RIGHT SQUARE BRACKET WITH UNDERBAR | ||
| 115 | 298D; 2990; o # LEFT SQUARE BRACKET WITH TICK IN TOP CORNER | ||
| 116 | 298E; 298F; c # RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER | ||
| 117 | 298F; 298E; o # LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER | ||
| 118 | 2990; 298D; c # RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER | ||
| 119 | 2991; 2992; o # LEFT ANGLE BRACKET WITH DOT | ||
| 120 | 2992; 2991; c # RIGHT ANGLE BRACKET WITH DOT | ||
| 121 | 2993; 2994; o # LEFT ARC LESS-THAN BRACKET | ||
| 122 | 2994; 2993; c # RIGHT ARC GREATER-THAN BRACKET | ||
| 123 | 2995; 2996; o # DOUBLE LEFT ARC GREATER-THAN BRACKET | ||
| 124 | 2996; 2995; c # DOUBLE RIGHT ARC LESS-THAN BRACKET | ||
| 125 | 2997; 2998; o # LEFT BLACK TORTOISE SHELL BRACKET | ||
| 126 | 2998; 2997; c # RIGHT BLACK TORTOISE SHELL BRACKET | ||
| 127 | 29D8; 29D9; o # LEFT WIGGLY FENCE | ||
| 128 | 29D9; 29D8; c # RIGHT WIGGLY FENCE | ||
| 129 | 29DA; 29DB; o # LEFT DOUBLE WIGGLY FENCE | ||
| 130 | 29DB; 29DA; c # RIGHT DOUBLE WIGGLY FENCE | ||
| 131 | 29FC; 29FD; o # LEFT-POINTING CURVED ANGLE BRACKET | ||
| 132 | 29FD; 29FC; c # RIGHT-POINTING CURVED ANGLE BRACKET | ||
| 133 | 2E22; 2E23; o # TOP LEFT HALF BRACKET | ||
| 134 | 2E23; 2E22; c # TOP RIGHT HALF BRACKET | ||
| 135 | 2E24; 2E25; o # BOTTOM LEFT HALF BRACKET | ||
| 136 | 2E25; 2E24; c # BOTTOM RIGHT HALF BRACKET | ||
| 137 | 2E26; 2E27; o # LEFT SIDEWAYS U BRACKET | ||
| 138 | 2E27; 2E26; c # RIGHT SIDEWAYS U BRACKET | ||
| 139 | 2E28; 2E29; o # LEFT DOUBLE PARENTHESIS | ||
| 140 | 2E29; 2E28; c # RIGHT DOUBLE PARENTHESIS | ||
| 141 | 3008; 3009; o # LEFT ANGLE BRACKET | ||
| 142 | 3009; 3008; c # RIGHT ANGLE BRACKET | ||
| 143 | 300A; 300B; o # LEFT DOUBLE ANGLE BRACKET | ||
| 144 | 300B; 300A; c # RIGHT DOUBLE ANGLE BRACKET | ||
| 145 | 300C; 300D; o # LEFT CORNER BRACKET | ||
| 146 | 300D; 300C; c # RIGHT CORNER BRACKET | ||
| 147 | 300E; 300F; o # LEFT WHITE CORNER BRACKET | ||
| 148 | 300F; 300E; c # RIGHT WHITE CORNER BRACKET | ||
| 149 | 3010; 3011; o # LEFT BLACK LENTICULAR BRACKET | ||
| 150 | 3011; 3010; c # RIGHT BLACK LENTICULAR BRACKET | ||
| 151 | 3014; 3015; o # LEFT TORTOISE SHELL BRACKET | ||
| 152 | 3015; 3014; c # RIGHT TORTOISE SHELL BRACKET | ||
| 153 | 3016; 3017; o # LEFT WHITE LENTICULAR BRACKET | ||
| 154 | 3017; 3016; c # RIGHT WHITE LENTICULAR BRACKET | ||
| 155 | 3018; 3019; o # LEFT WHITE TORTOISE SHELL BRACKET | ||
| 156 | 3019; 3018; c # RIGHT WHITE TORTOISE SHELL BRACKET | ||
| 157 | 301A; 301B; o # LEFT WHITE SQUARE BRACKET | ||
| 158 | 301B; 301A; c # RIGHT WHITE SQUARE BRACKET | ||
| 159 | FE59; FE5A; o # SMALL LEFT PARENTHESIS | ||
| 160 | FE5A; FE59; c # SMALL RIGHT PARENTHESIS | ||
| 161 | FE5B; FE5C; o # SMALL LEFT CURLY BRACKET | ||
| 162 | FE5C; FE5B; c # SMALL RIGHT CURLY BRACKET | ||
| 163 | FE5D; FE5E; o # SMALL LEFT TORTOISE SHELL BRACKET | ||
| 164 | FE5E; FE5D; c # SMALL RIGHT TORTOISE SHELL BRACKET | ||
| 165 | FF08; FF09; o # FULLWIDTH LEFT PARENTHESIS | ||
| 166 | FF09; FF08; c # FULLWIDTH RIGHT PARENTHESIS | ||
| 167 | FF3B; FF3D; o # FULLWIDTH LEFT SQUARE BRACKET | ||
| 168 | FF3D; FF3B; c # FULLWIDTH RIGHT SQUARE BRACKET | ||
| 169 | FF5B; FF5D; o # FULLWIDTH LEFT CURLY BRACKET | ||
| 170 | FF5D; FF5B; c # FULLWIDTH RIGHT CURLY BRACKET | ||
| 171 | FF5F; FF60; o # FULLWIDTH LEFT WHITE PARENTHESIS | ||
| 172 | FF60; FF5F; c # FULLWIDTH RIGHT WHITE PARENTHESIS | ||
| 173 | FF62; FF63; o # HALFWIDTH LEFT CORNER BRACKET | ||
| 174 | FF63; FF62; c # HALFWIDTH RIGHT CORNER BRACKET | ||
| 175 | |||
| 176 | # EOF | ||
diff --git a/admin/unidata/Makefile.in b/admin/unidata/Makefile.in index 6b253ea565b..8234de1dd70 100644 --- a/admin/unidata/Makefile.in +++ b/admin/unidata/Makefile.in | |||
| @@ -50,7 +50,8 @@ ${top_srcdir}/src/macuvs.h: ${srcdir}/uvs.el ${srcdir}/IVD_Sequences.txt | \ | |||
| 50 | unidata.txt: ${srcdir}/UnicodeData.txt | 50 | unidata.txt: ${srcdir}/UnicodeData.txt |
| 51 | sed -e 's/\([^;]*\);\(.*\)/(#x\1 "\2")/' -e 's/;/" "/g' < $< > $@ | 51 | sed -e 's/\([^;]*\);\(.*\)/(#x\1 "\2")/' -e 's/;/" "/g' < $< > $@ |
| 52 | 52 | ||
| 53 | ${DSTDIR}/charprop.el: ${srcdir}/unidata-gen.el ${srcdir}/UnicodeData.txt | \ | 53 | ${DSTDIR}/charprop.el: ${srcdir}/unidata-gen.el ${srcdir}/UnicodeData.txt \ |
| 54 | ${srcdir}/BidiMirroring.txt ${srcdir}/BidiBrackets.txt | \ | ||
| 54 | ${srcdir}/unidata-gen.elc unidata.txt | 55 | ${srcdir}/unidata-gen.elc unidata.txt |
| 55 | -if [ -f "$@" ]; then \ | 56 | -if [ -f "$@" ]; then \ |
| 56 | cd ${DSTDIR} && chmod +w charprop.el `sed -n 's/^;; FILE: //p' < charprop.el`; \ | 57 | cd ${DSTDIR} && chmod +w charprop.el `sed -n 's/^;; FILE: //p' < charprop.el`; \ |
| @@ -59,7 +60,7 @@ ${DSTDIR}/charprop.el: ${srcdir}/unidata-gen.el ${srcdir}/UnicodeData.txt | \ | |||
| 59 | ${srcdir} "${DSTDIR}" | 60 | ${srcdir} "${DSTDIR}" |
| 60 | 61 | ||
| 61 | ## Like the above, but generate in PWD rather than lisp/international. | 62 | ## Like the above, but generate in PWD rather than lisp/international. |
| 62 | charprop.el: ${srcdir}/unidata-gen.elc unidata.txt | 63 | charprop.el: ${srcdir}/unidata-gen.elc unidata.txt BidiMirroring.txt BidiBrackets.txt |
| 63 | ${emacs} -L ${srcdir} -l unidata-gen -f unidata-gen-files \ | 64 | ${emacs} -L ${srcdir} -l unidata-gen -f unidata-gen-files \ |
| 64 | ${srcdir} | 65 | ${srcdir} |
| 65 | 66 | ||
diff --git a/admin/unidata/unidata-gen.el b/admin/unidata/unidata-gen.el index ff45b79aab7..0dc02c996ed 100644 --- a/admin/unidata/unidata-gen.el +++ b/admin/unidata/unidata-gen.el | |||
| @@ -152,7 +152,8 @@ | |||
| 152 | ;; PROP: character property | 152 | ;; PROP: character property |
| 153 | ;; INDEX: index to each element of unidata-list for PROP. | 153 | ;; INDEX: index to each element of unidata-list for PROP. |
| 154 | ;; It may be a function that generates an alist of character codes | 154 | ;; It may be a function that generates an alist of character codes |
| 155 | ;; vs. the corresponding property values. | 155 | ;; vs. the corresponding property values. Currently, only character |
| 156 | ;; codepoints or symbol values are supported in this case. | ||
| 156 | ;; GENERATOR: function to generate a char-table | 157 | ;; GENERATOR: function to generate a char-table |
| 157 | ;; FILENAME: filename to store the char-table | 158 | ;; FILENAME: filename to store the char-table |
| 158 | ;; DOCSTRING: docstring for the property | 159 | ;; DOCSTRING: docstring for the property |
| @@ -271,7 +272,23 @@ is the character itself." | |||
| 271 | "Unicode bidi-mirroring characters. | 272 | "Unicode bidi-mirroring characters. |
| 272 | Property value is a character that has the corresponding mirroring image or nil. | 273 | Property value is a character that has the corresponding mirroring image or nil. |
| 273 | The value nil means that the actual property value of a character | 274 | The value nil means that the actual property value of a character |
| 274 | is the character itself."))) | 275 | is the character itself.") |
| 276 | (paired-bracket | ||
| 277 | unidata-gen-brackets-list unidata-gen-table-character "uni-brackets.el" | ||
| 278 | "Unicode bidi paired-bracket characters. | ||
| 279 | Property value is the paired bracket character, or nil. | ||
| 280 | The value nil means that the character is neither an opening nor | ||
| 281 | a closing paired bracket." | ||
| 282 | string) | ||
| 283 | (bracket-type | ||
| 284 | unidata-gen-bracket-type-list unidata-gen-table-symbol "uni-brackets.el" | ||
| 285 | "Unicode bidi paired-bracket type. | ||
| 286 | Property value is a symbol `o' (Open), `c' (Close), or `n' (None)." | ||
| 287 | unidata-describe-bidi-bracket-type | ||
| 288 | n | ||
| 289 | ;; The order of elements must be in sync with bidi_bracket_type_t | ||
| 290 | ;; in src/dispextern.h. | ||
| 291 | (n o c)))) | ||
| 275 | 292 | ||
| 276 | ;; Functions to access the above data. | 293 | ;; Functions to access the above data. |
| 277 | (defsubst unidata-prop-index (prop) (nth 1 (assq prop unidata-prop-alist))) | 294 | (defsubst unidata-prop-index (prop) (nth 1 (assq prop unidata-prop-alist))) |
| @@ -449,7 +466,10 @@ is the character itself."))) | |||
| 449 | (unidata-encode-val val-list (nth 2 elm))) | 466 | (unidata-encode-val val-list (nth 2 elm))) |
| 450 | (set-char-table-range table (cons (car elm) (nth 1 elm)) (nth 2 elm))) | 467 | (set-char-table-range table (cons (car elm) (nth 1 elm)) (nth 2 elm))) |
| 451 | 468 | ||
| 452 | (setq tail unidata-list) | 469 | (if (functionp prop-idx) |
| 470 | (setq tail (funcall prop-idx) | ||
| 471 | prop-idx 1) | ||
| 472 | (setq tail unidata-list)) | ||
| 453 | (while tail | 473 | (while tail |
| 454 | (setq elt (car tail) tail (cdr tail)) | 474 | (setq elt (car tail) tail (cdr tail)) |
| 455 | (setq range (car elt) | 475 | (setq range (car elt) |
| @@ -854,7 +874,7 @@ is the character itself."))) | |||
| 854 | ;; The following command yields a file of about 96K bytes. | 874 | ;; The following command yields a file of about 96K bytes. |
| 855 | ;; % gawk -F ';' '{print $1,$2;}' < UnicodeData.txt | gzip > temp.gz | 875 | ;; % gawk -F ';' '{print $1,$2;}' < UnicodeData.txt | gzip > temp.gz |
| 856 | ;; With the following function, we can get a file of almost the same | 876 | ;; With the following function, we can get a file of almost the same |
| 857 | ;; the size. | 877 | ;; size. |
| 858 | 878 | ||
| 859 | ;; Generate a char-table for character names. | 879 | ;; Generate a char-table for character names. |
| 860 | 880 | ||
| @@ -1159,6 +1179,12 @@ is the character itself."))) | |||
| 1159 | (string ?')))) | 1179 | (string ?')))) |
| 1160 | val " ")) | 1180 | val " ")) |
| 1161 | 1181 | ||
| 1182 | (defun unidata-describe-bidi-bracket-type (val) | ||
| 1183 | (cdr (assq val | ||
| 1184 | '((n . "Not a paired bracket character.") | ||
| 1185 | (o . "Opening paired bracket character.") | ||
| 1186 | (c . "Closing paired bracket character."))))) | ||
| 1187 | |||
| 1162 | (defun unidata-gen-mirroring-list () | 1188 | (defun unidata-gen-mirroring-list () |
| 1163 | (let ((head (list nil)) | 1189 | (let ((head (list nil)) |
| 1164 | tail) | 1190 | tail) |
| @@ -1172,6 +1198,36 @@ is the character itself."))) | |||
| 1172 | (setq tail (setcdr tail (list (list char mirror))))))) | 1198 | (setq tail (setcdr tail (list (list char mirror))))))) |
| 1173 | (cdr head))) | 1199 | (cdr head))) |
| 1174 | 1200 | ||
| 1201 | (defun unidata-gen-brackets-list () | ||
| 1202 | (let ((head (list nil)) | ||
| 1203 | tail) | ||
| 1204 | (with-temp-buffer | ||
| 1205 | (insert-file-contents (expand-file-name "BidiBrackets.txt" unidata-dir)) | ||
| 1206 | (goto-char (point-min)) | ||
| 1207 | (setq tail head) | ||
| 1208 | (while (re-search-forward | ||
| 1209 | "^\\([0-9A-F]+\\);\\s +\\([0-9A-F]+\\);\\s +\\([oc]\\)" | ||
| 1210 | nil t) | ||
| 1211 | (let ((char (string-to-number (match-string 1) 16)) | ||
| 1212 | (paired (match-string 2))) | ||
| 1213 | (setq tail (setcdr tail (list (list char paired))))))) | ||
| 1214 | (cdr head))) | ||
| 1215 | |||
| 1216 | (defun unidata-gen-bracket-type-list () | ||
| 1217 | (let ((head (list nil)) | ||
| 1218 | tail) | ||
| 1219 | (with-temp-buffer | ||
| 1220 | (insert-file-contents (expand-file-name "BidiBrackets.txt" unidata-dir)) | ||
| 1221 | (goto-char (point-min)) | ||
| 1222 | (setq tail head) | ||
| 1223 | (while (re-search-forward | ||
| 1224 | "^\\([0-9A-F]+\\);\\s +\\([0-9A-F]+\\);\\s +\\([oc]\\)" | ||
| 1225 | nil t) | ||
| 1226 | (let ((char (string-to-number (match-string 1) 16)) | ||
| 1227 | (type (match-string 3))) | ||
| 1228 | (setq tail (setcdr tail (list (list char type))))))) | ||
| 1229 | (cdr head))) | ||
| 1230 | |||
| 1175 | ;; Verify if we can retrieve correct values from the generated | 1231 | ;; Verify if we can retrieve correct values from the generated |
| 1176 | ;; char-tables. | 1232 | ;; char-tables. |
| 1177 | 1233 | ||
| @@ -1180,19 +1236,30 @@ is the character itself."))) | |||
| 1180 | (let* ((prop (car elt)) | 1236 | (let* ((prop (car elt)) |
| 1181 | (index (unidata-prop-index prop)) | 1237 | (index (unidata-prop-index prop)) |
| 1182 | (generator (unidata-prop-generator prop)) | 1238 | (generator (unidata-prop-generator prop)) |
| 1239 | (default-value (unidata-prop-default prop)) | ||
| 1240 | (val-list (unidata-prop-val-list prop)) | ||
| 1183 | (table (progn | 1241 | (table (progn |
| 1184 | (message "Generating %S table..." prop) | 1242 | (message "Generating %S table..." prop) |
| 1185 | (funcall generator prop))) | 1243 | (funcall generator prop default-value val-list))) |
| 1186 | (decoder (char-table-extra-slot table 1)) | 1244 | (decoder (char-table-extra-slot table 1)) |
| 1245 | (alist (and (functionp index) | ||
| 1246 | (funcall index))) | ||
| 1187 | (check #x400)) | 1247 | (check #x400)) |
| 1188 | (dolist (e unidata-list) | 1248 | (dolist (e unidata-list) |
| 1189 | (let ((char (car e)) | 1249 | (let* ((char (car e)) |
| 1190 | (val1 (nth index e)) | 1250 | (val1 |
| 1191 | val2) | 1251 | (if alist (nth 1 (assoc char alist)) |
| 1252 | (nth index e))) | ||
| 1253 | val2) | ||
| 1192 | (if (and (stringp val1) (= (length val1) 0)) | 1254 | (if (and (stringp val1) (= (length val1) 0)) |
| 1193 | (setq val1 nil)) | 1255 | (setq val1 nil)) |
| 1194 | (unless (consp char) | 1256 | (unless (or (consp char) |
| 1195 | (setq val2 (funcall decoder char (aref table char) table)) | 1257 | (integerp decoder)) |
| 1258 | (setq val2 | ||
| 1259 | (cond ((functionp decoder) | ||
| 1260 | (funcall decoder char (aref table char) table)) | ||
| 1261 | (t ; must be nil | ||
| 1262 | (aref table char)))) | ||
| 1196 | (if val1 | 1263 | (if val1 |
| 1197 | (cond ((eq generator 'unidata-gen-table-symbol) | 1264 | (cond ((eq generator 'unidata-gen-table-symbol) |
| 1198 | (setq val1 (intern val1))) | 1265 | (setq val1 (intern val1))) |
| @@ -1201,11 +1268,17 @@ is the character itself."))) | |||
| 1201 | ((eq generator 'unidata-gen-table-character) | 1268 | ((eq generator 'unidata-gen-table-character) |
| 1202 | (setq val1 (string-to-number val1 16))) | 1269 | (setq val1 (string-to-number val1 16))) |
| 1203 | ((eq generator 'unidata-gen-table-decomposition) | 1270 | ((eq generator 'unidata-gen-table-decomposition) |
| 1204 | (setq val1 (unidata-split-decomposition val1))))) | 1271 | (setq val1 (unidata-split-decomposition val1)))) |
| 1272 | (cond ((eq prop 'decomposition) | ||
| 1273 | (setq val1 (list char))) | ||
| 1274 | ((eq prop 'bracket-type) | ||
| 1275 | (setq val1 'n)))) | ||
| 1205 | (when (>= char check) | 1276 | (when (>= char check) |
| 1206 | (message "%S %04X" prop check) | 1277 | (message "%S %04X" prop check) |
| 1207 | (setq check (+ check #x400))) | 1278 | (setq check (+ check #x400))) |
| 1208 | (or (equal val1 val2) | 1279 | (or (equal val1 val2) |
| 1280 | ;; <control> characters get a 'name' property of nil | ||
| 1281 | (and (eq prop 'name) (string= val1 "<control>") (null val2)) | ||
| 1209 | (insert (format "> %04X %S\n< %04X %S\n" | 1282 | (insert (format "> %04X %S\n< %04X %S\n" |
| 1210 | char val1 char val2))) | 1283 | char val1 char val2))) |
| 1211 | (sit-for 0))))))) | 1284 | (sit-for 0))))))) |
| @@ -1242,6 +1315,9 @@ is the character itself."))) | |||
| 1242 | (describer (unidata-prop-describer prop)) | 1315 | (describer (unidata-prop-describer prop)) |
| 1243 | (default-value (unidata-prop-default prop)) | 1316 | (default-value (unidata-prop-default prop)) |
| 1244 | (val-list (unidata-prop-val-list prop)) | 1317 | (val-list (unidata-prop-val-list prop)) |
| 1318 | ;; Avoid creating backup files for those uni-*.el files | ||
| 1319 | ;; that hold more than one table. | ||
| 1320 | (backup-inhibited t) | ||
| 1245 | table) | 1321 | table) |
| 1246 | ;; Filename in this comment line is extracted by sed in | 1322 | ;; Filename in this comment line is extracted by sed in |
| 1247 | ;; Makefile. | 1323 | ;; Makefile. |
| @@ -1261,7 +1337,7 @@ is the character itself."))) | |||
| 1261 | (setq describer (symbol-function describer))) | 1337 | (setq describer (symbol-function describer))) |
| 1262 | (set-char-table-extra-slot table 3 describer)) | 1338 | (set-char-table-extra-slot table 3 describer)) |
| 1263 | (if (bobp) | 1339 | (if (bobp) |
| 1264 | (insert ";; Copyright (C) 1991-2013 Unicode, Inc. | 1340 | (insert ";; Copyright (C) 1991-2014 Unicode, Inc. |
| 1265 | ;; This file was generated from the Unicode data files at | 1341 | ;; This file was generated from the Unicode data files at |
| 1266 | ;; http://www.unicode.org/Public/UNIDATA/. | 1342 | ;; http://www.unicode.org/Public/UNIDATA/. |
| 1267 | ;; See lisp/international/README for the copyright and permission notice.\n")) | 1343 | ;; See lisp/international/README for the copyright and permission notice.\n")) |