aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEli Zaretskii2013-12-04 18:58:05 +0200
committerEli Zaretskii2013-12-04 18:58:05 +0200
commitad82612405604b7d20d86fe6b3283f91bee5e60a (patch)
tree569b5c2c0e1e57df688bcde545d6bdf77711fdb3
parent456760a5b4595b84f230e527ab148d296e7f8252 (diff)
downloademacs-ad82612405604b7d20d86fe6b3283f91bee5e60a.tar.gz
emacs-ad82612405604b7d20d86fe6b3283f91bee5e60a.zip
Fix bug #16043 with crashes when displaying new bidi control characters.
src/bidi.c (bidi_get_type, bidi_get_category): Handle the isolate directional control characters. Update type and category determination according to the UBA from Unicode v6.3. (bidi_category_t): New category EXPLICIT_FORMATTING. src/dispextern.h (bidi_type_t): Update to include new bidirectional properties introduced with Unicode v6.3. admin/unidata/unidata-gen.el (unidata-prop-alist): Update bidi-class to include the new isolate-related classes introduced with Unicode v6.3. (unidata-encode-val): Accept an additional optional argument, a warning message to emit when UnicodeData.txt defines bidi-class values that are not in unidata-prop-alist. Add a comment explaining what should maintainers do if/when such a warning ever appears. (unidata-gen-table): Call unidata-encode-val with 3rd arg non-nil when generating uni-bidi.el.
-rw-r--r--admin/ChangeLog13
-rw-r--r--admin/unidata/unidata-gen.el32
-rw-r--r--src/ChangeLog10
-rw-r--r--src/bidi.c58
-rw-r--r--src/dispextern.h4
5 files changed, 87 insertions, 30 deletions
diff --git a/admin/ChangeLog b/admin/ChangeLog
index 7d23542a84e..730253e616f 100644
--- a/admin/ChangeLog
+++ b/admin/ChangeLog
@@ -1,3 +1,16 @@
12013-12-04 Eli Zaretskii <eliz@gnu.org>
2
3 * unidata/unidata-gen.el (unidata-prop-alist): Update bidi-class
4 to include the new isolate-related classes introduced with Unicode
5 v6.3.
6 (unidata-encode-val): Accept an additional optional argument, a
7 warning message to emit when UnicodeData.txt defines bidi-class
8 values that are not in unidata-prop-alist. Add a comment
9 explaining what should maintainers do if/when such a warning ever
10 appears.
11 (unidata-gen-table): Call unidata-encode-val with 3rd arg non-nil
12 when generating uni-bidi.el.
13
12013-12-01 Glenn Morris <rgm@gnu.org> 142013-12-01 Glenn Morris <rgm@gnu.org>
2 15
3 * unidata/Makefile.in (${DSTDIR}/charprop.el): 16 * unidata/Makefile.in (${DSTDIR}/charprop.el):
diff --git a/admin/unidata/unidata-gen.el b/admin/unidata/unidata-gen.el
index 42e1cc0bc3c..fa8f81636e3 100644
--- a/admin/unidata/unidata-gen.el
+++ b/admin/unidata/unidata-gen.el
@@ -194,8 +194,8 @@ Property value is an integer."
194 4 unidata-gen-table-symbol "uni-bidi.el" 194 4 unidata-gen-table-symbol "uni-bidi.el"
195 "Unicode bidi class. 195 "Unicode bidi class.
196Property value is one of the following symbols: 196Property value is one of the following symbols:
197 L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET, 197 L, LRE, LRO, LRI, R, AL, RLE, RLO, RLI, FSI, PDF, PDI,
198 AN, CS, NSM, BN, B, S, WS, ON" 198 EN, ES, ET, AN, CS, NSM, BN, B, S, WS, ON"
199 unidata-describe-bidi-class 199 unidata-describe-bidi-class
200 ;; The assignment of default values to blocks of code points 200 ;; The assignment of default values to blocks of code points
201 ;; follows the file DerivedBidiClass.txt from the Unicode 201 ;; follows the file DerivedBidiClass.txt from the Unicode
@@ -205,7 +205,8 @@ Property value is one of the following symbols:
205 (#xFB1D #xFB4F R) (#x10800 #x10FFF R) (#x1E800 #x1EFFF R)) 205 (#xFB1D #xFB4F R) (#x10800 #x10FFF R) (#x1E800 #x1EFFF R))
206 ;; The order of elements must be in sync with bidi_type_t in 206 ;; The order of elements must be in sync with bidi_type_t in
207 ;; src/dispextern.h. 207 ;; src/dispextern.h.
208 (L R EN AN BN B AL LRE LRO RLE RLO PDF ES ET CS NSM S WS ON)) 208 (L R EN AN BN B AL LRE LRO RLE RLO PDF LRI RLI FSI PDI
209 ES ET CS NSM S WS ON))
209 (decomposition 210 (decomposition
210 5 unidata-gen-table-decomposition "uni-decomposition.el" 211 5 unidata-gen-table-decomposition "uni-decomposition.el"
211 "Unicode decomposition mapping. 212 "Unicode decomposition mapping.
@@ -397,12 +398,17 @@ is the character itself.")))
397;; If VAL is one of VALn, just return n. 398;; If VAL is one of VALn, just return n.
398;; Otherwise, VAL-LIST is modified to this: 399;; Otherwise, VAL-LIST is modified to this:
399;; ((nil . 0) (VAL1 . 1) (VAL2 . 2) ... (VAL . n+1)) 400;; ((nil . 0) (VAL1 . 1) (VAL2 . 2) ... (VAL . n+1))
401;;
402;; WARN is an optional warning to display when the value list is
403;; extended, for property values that need to be in sync with other
404;; parts of Emacs; currently only used for bidi-class.
400 405
401(defun unidata-encode-val (val-list val) 406(defun unidata-encode-val (val-list val &optional warn)
402 (let ((slot (assoc val val-list)) 407 (let ((slot (assoc val val-list))
403 val-code) 408 val-code)
404 (if slot 409 (if slot
405 (cdr slot) 410 (cdr slot)
411 (if warn (message warn val))
406 (setq val-code (length val-list)) 412 (setq val-code (length val-list))
407 (nconc val-list (list (cons val val-code))) 413 (nconc val-list (list (cons val val-code)))
408 val-code))) 414 val-code)))
@@ -413,6 +419,16 @@ is the character itself.")))
413 (let ((table (make-char-table 'char-code-property-table)) 419 (let ((table (make-char-table 'char-code-property-table))
414 (prop-idx (unidata-prop-index prop)) 420 (prop-idx (unidata-prop-index prop))
415 (vec (make-vector 128 0)) 421 (vec (make-vector 128 0))
422 ;; When this warning is printed, there's a need to make the
423 ;; following changes:
424 ;; (1) update unidata-prop-alist with the new bidi-class values;
425 ;; (2) extend bidi_type_t enumeration on src/dispextern.h to
426 ;; include the new classes;
427 ;; (3) possibly update the assertion in bidi.c:bidi_check_type; and
428 ;; (4) possibly update the switch cases in
429 ;; bidi.c:bidi_get_type and bidi.c:bidi_get_category.
430 (bidi-warning "\
431** Found new bidi-class '%s', please update bidi.c and dispextern.h")
416 tail elt range val val-code idx slot 432 tail elt range val val-code idx slot
417 prev-range-data) 433 prev-range-data)
418 (setq val-list (cons nil (copy-sequence val-list))) 434 (setq val-list (cons nil (copy-sequence val-list)))
@@ -438,7 +454,9 @@ is the character itself.")))
438 (setq elt (car tail) tail (cdr tail)) 454 (setq elt (car tail) tail (cdr tail))
439 (setq range (car elt) 455 (setq range (car elt)
440 val (funcall val-func (nth prop-idx elt))) 456 val (funcall val-func (nth prop-idx elt)))
441 (setq val-code (if val (unidata-encode-val val-list val))) 457 (setq val-code (if val (unidata-encode-val val-list val
458 (and (eq prop 'bidi-class)
459 bidi-warning))))
442 (if (consp range) 460 (if (consp range)
443 (when val-code 461 (when val-code
444 (set-char-table-range table range val-code) 462 (set-char-table-range table range val-code)
@@ -486,7 +504,9 @@ is the character itself.")))
486 (setq new-val (funcall val-func (nth prop-idx elt))) 504 (setq new-val (funcall val-func (nth prop-idx elt)))
487 (if (not (eq val new-val)) 505 (if (not (eq val new-val))
488 (setq val new-val 506 (setq val new-val
489 val-code (if val (unidata-encode-val val-list val)))) 507 val-code (if val (unidata-encode-val
508 val-list val (and (eq prop 'bidi-class)
509 bidi-warning)))))
490 (if val-code 510 (if val-code
491 (aset vec (- range start) val-code)) 511 (aset vec (- range start) val-code))
492 (setq tail (cdr tail))) 512 (setq tail (cdr tail)))
diff --git a/src/ChangeLog b/src/ChangeLog
index a877bc885e7..4c7b3015877 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,13 @@
12013-12-04 Eli Zaretskii <eliz@gnu.org>
2
3 * bidi.c (bidi_get_type, bidi_get_category): Handle the isolate
4 directional control characters. Update type and category
5 determination according to the UBA from Unicode v6.3.
6 (bidi_category_t): New category EXPLICIT_FORMATTING.
7
8 * dispextern.h (bidi_type_t): Update to include new bidirectional
9 properties introduced with Unicode v6.3. (Bug#16043)
10
12013-12-04 Martin Rudalics <rudalics@gmx.at> 112013-12-04 Martin Rudalics <rudalics@gmx.at>
2 12
3 * xterm.c (XTflash): Fix coordinate of bottom area to flash 13 * xterm.c (XTflash): Fix coordinate of bottom area to flash
diff --git a/src/bidi.c b/src/bidi.c
index 98c3c17b78f..d3a617651f6 100644
--- a/src/bidi.c
+++ b/src/bidi.c
@@ -76,7 +76,8 @@ typedef enum {
76 UNKNOWN_BC, 76 UNKNOWN_BC,
77 NEUTRAL, 77 NEUTRAL,
78 WEAK, 78 WEAK,
79 STRONG 79 STRONG,
80 EXPLICIT_FORMATTING
80} bidi_category_t; 81} bidi_category_t;
81 82
82/* UAX#9 says to search only for L, AL, or R types of characters, and 83/* UAX#9 says to search only for L, AL, or R types of characters, and
@@ -115,13 +116,9 @@ bidi_get_type (int ch, bidi_dir_t override)
115 if (default_type == UNKNOWN_BT) 116 if (default_type == UNKNOWN_BT)
116 emacs_abort (); 117 emacs_abort ();
117 118
118 if (override == NEUTRAL_DIR)
119 return default_type;
120
121 switch (default_type) 119 switch (default_type)
122 { 120 {
123 /* Although UAX#9 does not tell, it doesn't make sense to 121 case WEAK_BN:
124 override NEUTRAL_B and LRM/RLM characters. */
125 case NEUTRAL_B: 122 case NEUTRAL_B:
126 case LRE: 123 case LRE:
127 case LRO: 124 case LRO:
@@ -129,20 +126,20 @@ bidi_get_type (int ch, bidi_dir_t override)
129 case RLO: 126 case RLO:
130 case PDF: 127 case PDF:
131 return default_type; 128 return default_type;
129 /* FIXME: The isolate controls are treated as BN until we add
130 support for UBA v6.3. */
131 case LRI:
132 case RLI:
133 case FSI:
134 case PDI:
135 return WEAK_BN;
132 default: 136 default:
133 switch (ch) 137 if (override == L2R)
134 { 138 return STRONG_L;
135 case LRM_CHAR: 139 else if (override == R2L)
136 case RLM_CHAR: 140 return STRONG_R;
137 return default_type; 141 else
138 default: 142 return default_type;
139 if (override == L2R) /* X6 */
140 return STRONG_L;
141 else if (override == R2L)
142 return STRONG_R;
143 else
144 emacs_abort (); /* can't happen: handled above */
145 }
146 } 143 }
147} 144}
148 145
@@ -163,12 +160,7 @@ bidi_get_category (bidi_type_t type)
163 case STRONG_L: 160 case STRONG_L:
164 case STRONG_R: 161 case STRONG_R:
165 case STRONG_AL: 162 case STRONG_AL:
166 case LRE:
167 case LRO:
168 case RLE:
169 case RLO:
170 return STRONG; 163 return STRONG;
171 case PDF: /* ??? really?? */
172 case WEAK_EN: 164 case WEAK_EN:
173 case WEAK_ES: 165 case WEAK_ES:
174 case WEAK_ET: 166 case WEAK_ET:
@@ -176,12 +168,30 @@ bidi_get_category (bidi_type_t type)
176 case WEAK_CS: 168 case WEAK_CS:
177 case WEAK_NSM: 169 case WEAK_NSM:
178 case WEAK_BN: 170 case WEAK_BN:
171 /* FIXME */
172 case LRI:
173 case RLI:
174 case FSI:
175 case PDI:
179 return WEAK; 176 return WEAK;
180 case NEUTRAL_B: 177 case NEUTRAL_B:
181 case NEUTRAL_S: 178 case NEUTRAL_S:
182 case NEUTRAL_WS: 179 case NEUTRAL_WS:
183 case NEUTRAL_ON: 180 case NEUTRAL_ON:
184 return NEUTRAL; 181 return NEUTRAL;
182 case LRE:
183 case LRO:
184 case RLE:
185 case RLO:
186 case PDF:
187#if 0
188 /* FIXME: This awaits implementation of isolate support. */
189 case LRI:
190 case RLI:
191 case FSI:
192 case PDI:
193#endif
194 return EXPLICIT_FORMATTING;
185 default: 195 default:
186 emacs_abort (); 196 emacs_abort ();
187 } 197 }
diff --git a/src/dispextern.h b/src/dispextern.h
index 2ce0a8f4c99..7de4edf2196 100644
--- a/src/dispextern.h
+++ b/src/dispextern.h
@@ -1895,6 +1895,10 @@ typedef enum {
1895 RLE, /* right-to-left embedding */ 1895 RLE, /* right-to-left embedding */
1896 RLO, /* right-to-left override */ 1896 RLO, /* right-to-left override */
1897 PDF, /* pop directional format */ 1897 PDF, /* pop directional format */
1898 LRI, /* left-to-right isolate */
1899 RLI, /* right-to-left isolate */
1900 FSI, /* first strong isolate */
1901 PDI, /* pop directional isolate */
1898 WEAK_ES, /* european number separator */ 1902 WEAK_ES, /* european number separator */
1899 WEAK_ET, /* european number terminator */ 1903 WEAK_ET, /* european number terminator */
1900 WEAK_CS, /* common separator */ 1904 WEAK_CS, /* common separator */