diff options
| author | Eli Zaretskii | 2013-12-04 18:58:05 +0200 |
|---|---|---|
| committer | Eli Zaretskii | 2013-12-04 18:58:05 +0200 |
| commit | ad82612405604b7d20d86fe6b3283f91bee5e60a (patch) | |
| tree | 569b5c2c0e1e57df688bcde545d6bdf77711fdb3 | |
| parent | 456760a5b4595b84f230e527ab148d296e7f8252 (diff) | |
| download | emacs-ad82612405604b7d20d86fe6b3283f91bee5e60a.tar.gz emacs-ad82612405604b7d20d86fe6b3283f91bee5e60a.zip | |
Fix bug #16043 with crashes when displaying new bidi control characters.
src/bidi.c (bidi_get_type, bidi_get_category): Handle the isolate
directional control characters. Update type and category
determination according to the UBA from Unicode v6.3.
(bidi_category_t): New category EXPLICIT_FORMATTING.
src/dispextern.h (bidi_type_t): Update to include new bidirectional
properties introduced with Unicode v6.3.
admin/unidata/unidata-gen.el (unidata-prop-alist): Update bidi-class
to include the new isolate-related classes introduced with Unicode
v6.3.
(unidata-encode-val): Accept an additional optional argument, a
warning message to emit when UnicodeData.txt defines bidi-class
values that are not in unidata-prop-alist. Add a comment
explaining what should maintainers do if/when such a warning ever
appears.
(unidata-gen-table): Call unidata-encode-val with 3rd arg non-nil
when generating uni-bidi.el.
| -rw-r--r-- | admin/ChangeLog | 13 | ||||
| -rw-r--r-- | admin/unidata/unidata-gen.el | 32 | ||||
| -rw-r--r-- | src/ChangeLog | 10 | ||||
| -rw-r--r-- | src/bidi.c | 58 | ||||
| -rw-r--r-- | src/dispextern.h | 4 |
5 files changed, 87 insertions, 30 deletions
diff --git a/admin/ChangeLog b/admin/ChangeLog index 7d23542a84e..730253e616f 100644 --- a/admin/ChangeLog +++ b/admin/ChangeLog | |||
| @@ -1,3 +1,16 @@ | |||
| 1 | 2013-12-04 Eli Zaretskii <eliz@gnu.org> | ||
| 2 | |||
| 3 | * unidata/unidata-gen.el (unidata-prop-alist): Update bidi-class | ||
| 4 | to include the new isolate-related classes introduced with Unicode | ||
| 5 | v6.3. | ||
| 6 | (unidata-encode-val): Accept an additional optional argument, a | ||
| 7 | warning message to emit when UnicodeData.txt defines bidi-class | ||
| 8 | values that are not in unidata-prop-alist. Add a comment | ||
| 9 | explaining what should maintainers do if/when such a warning ever | ||
| 10 | appears. | ||
| 11 | (unidata-gen-table): Call unidata-encode-val with 3rd arg non-nil | ||
| 12 | when generating uni-bidi.el. | ||
| 13 | |||
| 1 | 2013-12-01 Glenn Morris <rgm@gnu.org> | 14 | 2013-12-01 Glenn Morris <rgm@gnu.org> |
| 2 | 15 | ||
| 3 | * unidata/Makefile.in (${DSTDIR}/charprop.el): | 16 | * unidata/Makefile.in (${DSTDIR}/charprop.el): |
diff --git a/admin/unidata/unidata-gen.el b/admin/unidata/unidata-gen.el index 42e1cc0bc3c..fa8f81636e3 100644 --- a/admin/unidata/unidata-gen.el +++ b/admin/unidata/unidata-gen.el | |||
| @@ -194,8 +194,8 @@ Property value is an integer." | |||
| 194 | 4 unidata-gen-table-symbol "uni-bidi.el" | 194 | 4 unidata-gen-table-symbol "uni-bidi.el" |
| 195 | "Unicode bidi class. | 195 | "Unicode bidi class. |
| 196 | Property value is one of the following symbols: | 196 | Property value is one of the following symbols: |
| 197 | L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET, | 197 | L, LRE, LRO, LRI, R, AL, RLE, RLO, RLI, FSI, PDF, PDI, |
| 198 | AN, CS, NSM, BN, B, S, WS, ON" | 198 | EN, ES, ET, AN, CS, NSM, BN, B, S, WS, ON" |
| 199 | unidata-describe-bidi-class | 199 | unidata-describe-bidi-class |
| 200 | ;; The assignment of default values to blocks of code points | 200 | ;; The assignment of default values to blocks of code points |
| 201 | ;; follows the file DerivedBidiClass.txt from the Unicode | 201 | ;; follows the file DerivedBidiClass.txt from the Unicode |
| @@ -205,7 +205,8 @@ Property value is one of the following symbols: | |||
| 205 | (#xFB1D #xFB4F R) (#x10800 #x10FFF R) (#x1E800 #x1EFFF R)) | 205 | (#xFB1D #xFB4F R) (#x10800 #x10FFF R) (#x1E800 #x1EFFF R)) |
| 206 | ;; The order of elements must be in sync with bidi_type_t in | 206 | ;; The order of elements must be in sync with bidi_type_t in |
| 207 | ;; src/dispextern.h. | 207 | ;; src/dispextern.h. |
| 208 | (L R EN AN BN B AL LRE LRO RLE RLO PDF ES ET CS NSM S WS ON)) | 208 | (L R EN AN BN B AL LRE LRO RLE RLO PDF LRI RLI FSI PDI |
| 209 | ES ET CS NSM S WS ON)) | ||
| 209 | (decomposition | 210 | (decomposition |
| 210 | 5 unidata-gen-table-decomposition "uni-decomposition.el" | 211 | 5 unidata-gen-table-decomposition "uni-decomposition.el" |
| 211 | "Unicode decomposition mapping. | 212 | "Unicode decomposition mapping. |
| @@ -397,12 +398,17 @@ is the character itself."))) | |||
| 397 | ;; If VAL is one of VALn, just return n. | 398 | ;; If VAL is one of VALn, just return n. |
| 398 | ;; Otherwise, VAL-LIST is modified to this: | 399 | ;; Otherwise, VAL-LIST is modified to this: |
| 399 | ;; ((nil . 0) (VAL1 . 1) (VAL2 . 2) ... (VAL . n+1)) | 400 | ;; ((nil . 0) (VAL1 . 1) (VAL2 . 2) ... (VAL . n+1)) |
| 401 | ;; | ||
| 402 | ;; WARN is an optional warning to display when the value list is | ||
| 403 | ;; extended, for property values that need to be in sync with other | ||
| 404 | ;; parts of Emacs; currently only used for bidi-class. | ||
| 400 | 405 | ||
| 401 | (defun unidata-encode-val (val-list val) | 406 | (defun unidata-encode-val (val-list val &optional warn) |
| 402 | (let ((slot (assoc val val-list)) | 407 | (let ((slot (assoc val val-list)) |
| 403 | val-code) | 408 | val-code) |
| 404 | (if slot | 409 | (if slot |
| 405 | (cdr slot) | 410 | (cdr slot) |
| 411 | (if warn (message warn val)) | ||
| 406 | (setq val-code (length val-list)) | 412 | (setq val-code (length val-list)) |
| 407 | (nconc val-list (list (cons val val-code))) | 413 | (nconc val-list (list (cons val val-code))) |
| 408 | val-code))) | 414 | val-code))) |
| @@ -413,6 +419,16 @@ is the character itself."))) | |||
| 413 | (let ((table (make-char-table 'char-code-property-table)) | 419 | (let ((table (make-char-table 'char-code-property-table)) |
| 414 | (prop-idx (unidata-prop-index prop)) | 420 | (prop-idx (unidata-prop-index prop)) |
| 415 | (vec (make-vector 128 0)) | 421 | (vec (make-vector 128 0)) |
| 422 | ;; When this warning is printed, there's a need to make the | ||
| 423 | ;; following changes: | ||
| 424 | ;; (1) update unidata-prop-alist with the new bidi-class values; | ||
| 425 | ;; (2) extend bidi_type_t enumeration on src/dispextern.h to | ||
| 426 | ;; include the new classes; | ||
| 427 | ;; (3) possibly update the assertion in bidi.c:bidi_check_type; and | ||
| 428 | ;; (4) possibly update the switch cases in | ||
| 429 | ;; bidi.c:bidi_get_type and bidi.c:bidi_get_category. | ||
| 430 | (bidi-warning "\ | ||
| 431 | ** Found new bidi-class '%s', please update bidi.c and dispextern.h") | ||
| 416 | tail elt range val val-code idx slot | 432 | tail elt range val val-code idx slot |
| 417 | prev-range-data) | 433 | prev-range-data) |
| 418 | (setq val-list (cons nil (copy-sequence val-list))) | 434 | (setq val-list (cons nil (copy-sequence val-list))) |
| @@ -438,7 +454,9 @@ is the character itself."))) | |||
| 438 | (setq elt (car tail) tail (cdr tail)) | 454 | (setq elt (car tail) tail (cdr tail)) |
| 439 | (setq range (car elt) | 455 | (setq range (car elt) |
| 440 | val (funcall val-func (nth prop-idx elt))) | 456 | val (funcall val-func (nth prop-idx elt))) |
| 441 | (setq val-code (if val (unidata-encode-val val-list val))) | 457 | (setq val-code (if val (unidata-encode-val val-list val |
| 458 | (and (eq prop 'bidi-class) | ||
| 459 | bidi-warning)))) | ||
| 442 | (if (consp range) | 460 | (if (consp range) |
| 443 | (when val-code | 461 | (when val-code |
| 444 | (set-char-table-range table range val-code) | 462 | (set-char-table-range table range val-code) |
| @@ -486,7 +504,9 @@ is the character itself."))) | |||
| 486 | (setq new-val (funcall val-func (nth prop-idx elt))) | 504 | (setq new-val (funcall val-func (nth prop-idx elt))) |
| 487 | (if (not (eq val new-val)) | 505 | (if (not (eq val new-val)) |
| 488 | (setq val new-val | 506 | (setq val new-val |
| 489 | val-code (if val (unidata-encode-val val-list val)))) | 507 | val-code (if val (unidata-encode-val |
| 508 | val-list val (and (eq prop 'bidi-class) | ||
| 509 | bidi-warning))))) | ||
| 490 | (if val-code | 510 | (if val-code |
| 491 | (aset vec (- range start) val-code)) | 511 | (aset vec (- range start) val-code)) |
| 492 | (setq tail (cdr tail))) | 512 | (setq tail (cdr tail))) |
diff --git a/src/ChangeLog b/src/ChangeLog index a877bc885e7..4c7b3015877 100644 --- a/src/ChangeLog +++ b/src/ChangeLog | |||
| @@ -1,3 +1,13 @@ | |||
| 1 | 2013-12-04 Eli Zaretskii <eliz@gnu.org> | ||
| 2 | |||
| 3 | * bidi.c (bidi_get_type, bidi_get_category): Handle the isolate | ||
| 4 | directional control characters. Update type and category | ||
| 5 | determination according to the UBA from Unicode v6.3. | ||
| 6 | (bidi_category_t): New category EXPLICIT_FORMATTING. | ||
| 7 | |||
| 8 | * dispextern.h (bidi_type_t): Update to include new bidirectional | ||
| 9 | properties introduced with Unicode v6.3. (Bug#16043) | ||
| 10 | |||
| 1 | 2013-12-04 Martin Rudalics <rudalics@gmx.at> | 11 | 2013-12-04 Martin Rudalics <rudalics@gmx.at> |
| 2 | 12 | ||
| 3 | * xterm.c (XTflash): Fix coordinate of bottom area to flash | 13 | * xterm.c (XTflash): Fix coordinate of bottom area to flash |
diff --git a/src/bidi.c b/src/bidi.c index 98c3c17b78f..d3a617651f6 100644 --- a/src/bidi.c +++ b/src/bidi.c | |||
| @@ -76,7 +76,8 @@ typedef enum { | |||
| 76 | UNKNOWN_BC, | 76 | UNKNOWN_BC, |
| 77 | NEUTRAL, | 77 | NEUTRAL, |
| 78 | WEAK, | 78 | WEAK, |
| 79 | STRONG | 79 | STRONG, |
| 80 | EXPLICIT_FORMATTING | ||
| 80 | } bidi_category_t; | 81 | } bidi_category_t; |
| 81 | 82 | ||
| 82 | /* UAX#9 says to search only for L, AL, or R types of characters, and | 83 | /* UAX#9 says to search only for L, AL, or R types of characters, and |
| @@ -115,13 +116,9 @@ bidi_get_type (int ch, bidi_dir_t override) | |||
| 115 | if (default_type == UNKNOWN_BT) | 116 | if (default_type == UNKNOWN_BT) |
| 116 | emacs_abort (); | 117 | emacs_abort (); |
| 117 | 118 | ||
| 118 | if (override == NEUTRAL_DIR) | ||
| 119 | return default_type; | ||
| 120 | |||
| 121 | switch (default_type) | 119 | switch (default_type) |
| 122 | { | 120 | { |
| 123 | /* Although UAX#9 does not tell, it doesn't make sense to | 121 | case WEAK_BN: |
| 124 | override NEUTRAL_B and LRM/RLM characters. */ | ||
| 125 | case NEUTRAL_B: | 122 | case NEUTRAL_B: |
| 126 | case LRE: | 123 | case LRE: |
| 127 | case LRO: | 124 | case LRO: |
| @@ -129,20 +126,20 @@ bidi_get_type (int ch, bidi_dir_t override) | |||
| 129 | case RLO: | 126 | case RLO: |
| 130 | case PDF: | 127 | case PDF: |
| 131 | return default_type; | 128 | return default_type; |
| 129 | /* FIXME: The isolate controls are treated as BN until we add | ||
| 130 | support for UBA v6.3. */ | ||
| 131 | case LRI: | ||
| 132 | case RLI: | ||
| 133 | case FSI: | ||
| 134 | case PDI: | ||
| 135 | return WEAK_BN; | ||
| 132 | default: | 136 | default: |
| 133 | switch (ch) | 137 | if (override == L2R) |
| 134 | { | 138 | return STRONG_L; |
| 135 | case LRM_CHAR: | 139 | else if (override == R2L) |
| 136 | case RLM_CHAR: | 140 | return STRONG_R; |
| 137 | return default_type; | 141 | else |
| 138 | default: | 142 | return default_type; |
| 139 | if (override == L2R) /* X6 */ | ||
| 140 | return STRONG_L; | ||
| 141 | else if (override == R2L) | ||
| 142 | return STRONG_R; | ||
| 143 | else | ||
| 144 | emacs_abort (); /* can't happen: handled above */ | ||
| 145 | } | ||
| 146 | } | 143 | } |
| 147 | } | 144 | } |
| 148 | 145 | ||
| @@ -163,12 +160,7 @@ bidi_get_category (bidi_type_t type) | |||
| 163 | case STRONG_L: | 160 | case STRONG_L: |
| 164 | case STRONG_R: | 161 | case STRONG_R: |
| 165 | case STRONG_AL: | 162 | case STRONG_AL: |
| 166 | case LRE: | ||
| 167 | case LRO: | ||
| 168 | case RLE: | ||
| 169 | case RLO: | ||
| 170 | return STRONG; | 163 | return STRONG; |
| 171 | case PDF: /* ??? really?? */ | ||
| 172 | case WEAK_EN: | 164 | case WEAK_EN: |
| 173 | case WEAK_ES: | 165 | case WEAK_ES: |
| 174 | case WEAK_ET: | 166 | case WEAK_ET: |
| @@ -176,12 +168,30 @@ bidi_get_category (bidi_type_t type) | |||
| 176 | case WEAK_CS: | 168 | case WEAK_CS: |
| 177 | case WEAK_NSM: | 169 | case WEAK_NSM: |
| 178 | case WEAK_BN: | 170 | case WEAK_BN: |
| 171 | /* FIXME */ | ||
| 172 | case LRI: | ||
| 173 | case RLI: | ||
| 174 | case FSI: | ||
| 175 | case PDI: | ||
| 179 | return WEAK; | 176 | return WEAK; |
| 180 | case NEUTRAL_B: | 177 | case NEUTRAL_B: |
| 181 | case NEUTRAL_S: | 178 | case NEUTRAL_S: |
| 182 | case NEUTRAL_WS: | 179 | case NEUTRAL_WS: |
| 183 | case NEUTRAL_ON: | 180 | case NEUTRAL_ON: |
| 184 | return NEUTRAL; | 181 | return NEUTRAL; |
| 182 | case LRE: | ||
| 183 | case LRO: | ||
| 184 | case RLE: | ||
| 185 | case RLO: | ||
| 186 | case PDF: | ||
| 187 | #if 0 | ||
| 188 | /* FIXME: This awaits implementation of isolate support. */ | ||
| 189 | case LRI: | ||
| 190 | case RLI: | ||
| 191 | case FSI: | ||
| 192 | case PDI: | ||
| 193 | #endif | ||
| 194 | return EXPLICIT_FORMATTING; | ||
| 185 | default: | 195 | default: |
| 186 | emacs_abort (); | 196 | emacs_abort (); |
| 187 | } | 197 | } |
diff --git a/src/dispextern.h b/src/dispextern.h index 2ce0a8f4c99..7de4edf2196 100644 --- a/src/dispextern.h +++ b/src/dispextern.h | |||
| @@ -1895,6 +1895,10 @@ typedef enum { | |||
| 1895 | RLE, /* right-to-left embedding */ | 1895 | RLE, /* right-to-left embedding */ |
| 1896 | RLO, /* right-to-left override */ | 1896 | RLO, /* right-to-left override */ |
| 1897 | PDF, /* pop directional format */ | 1897 | PDF, /* pop directional format */ |
| 1898 | LRI, /* left-to-right isolate */ | ||
| 1899 | RLI, /* right-to-left isolate */ | ||
| 1900 | FSI, /* first strong isolate */ | ||
| 1901 | PDI, /* pop directional isolate */ | ||
| 1898 | WEAK_ES, /* european number separator */ | 1902 | WEAK_ES, /* european number separator */ |
| 1899 | WEAK_ET, /* european number terminator */ | 1903 | WEAK_ET, /* european number terminator */ |
| 1900 | WEAK_CS, /* common separator */ | 1904 | WEAK_CS, /* common separator */ |