aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEli Zaretskii2014-10-15 17:11:25 +0300
committerEli Zaretskii2014-10-15 17:11:25 +0300
commited7ebd933a5294e0f27cb49268e7b6376de36648 (patch)
tree90256953a29fa18f4cfb3945a6491567f8a8a6d0 /src
parent0b4d6d30be2822df7d6b086bbab32b8ff419ed5d (diff)
parentb8ec977659df3970a6c8afb7a5b79c9b61e08307 (diff)
downloademacs-ed7ebd933a5294e0f27cb49268e7b6376de36648.tar.gz
emacs-ed7ebd933a5294e0f27cb49268e7b6376de36648.zip
Update the bidirectional reordering engine for Unicode 6.3 and 7.0.
src/bidi.c (bidi_ignore_explicit_marks_for_paragraph_level): Remove variable. (bidi_get_type): Return the isolate initiators and terminator types. (bidi_isolate_fmt_char, bidi_paired_bracket_type) (bidi_fetch_char_skip_isolates, find_first_strong_char) (bidi_find_bracket_pairs, bidi_resolve_brackets): New functions. (bidi_set_sos_type): Renamed from bidi_set_sor_type and updated for the new features. (bidi_push_embedding_level, bidi_pop_embedding_level): Update to push and pop correctly for isolates. (bidi_remember_char): Modified to accept an additional argument and record the bidi type according to its value. (bidi_cache_iterator_state): Accept an additional argument to only update an existing state. Handle the new members of struct bidi_it. (bidi_cache_find): Arguments changed: no lnger accepts a level, instead accepts a flag telling it whether it is okay to return unresolved neutrals. (bidi_initialize): Initiate and staticpro the bracket-type uniprop table. Initialize new isolate-related members. (bidi_paragraph_init): Some code factored out into find_first_strong_char. (bidi_resolve_explicit_1): Function deleted, its code incorporated into bidi_resolve_explicit. (bidi_resolve_explicit): Support the isolate initiators and terminator. Fix handling of embeddings and overrides according to new UBA requirements. Record information about previously seen characters here (moved from bidi_level_of_next_char). (bidi_resolve_weak): Adapt to changes in struct members. (FLAG_EMBEDDING_INSIDE, FLAG_OPPOSITE_INSIDE, MAX_BPA_STACK) (STORE_BRACKET_CHARPOS, PUSH_BPA_STACK): New macros. (bidi_resolve_neutral): Call bidi_resolve_brackets to handle the paired bracket resolution. Handle isolate initiators and terminator. (bidi_type_of_next_char): Remove unneeded code for BN limit. (bidi_level_of_next_char): Move the code that records information about previous characters to bidi_resolve_explicit. Fix logic of resolving neutrals and make sure their cache entries are updated. Remove now unneeded special handling of PDF level. src/dispextern.h (struct glyph): Enlarge the width of resolved_level. (BIDI_MAXDEPTH): New macro, renamed from BIDI_MAXLEVEL and enlarged per Unicode 6.3. (enum bidi_bracket_type_t): New data type. (struct bidi_saved_info): Leave only 2 type members out of 4. Remove bytepos. (struct bidi_stack): Add members necessary to support isolating sequences. (struct bidi_it): Add new members necessary to support isolating sequences and bracket pair resolution. src/xdisp.c (Fbidi_resolved_levels): New function. (syms_of_xdisp): Defsubr it. (append_glyph, append_composite_glyph, produce_image_glyph) (append_stretch_glyph, append_glyphless_glyph): Convert aborts to assertions. (syms_of_xdisp) <inhibit-bidi-mirroring>: New variable. src/term.c (append_glyph, append_composite_glyph) (append_glyphless_glyph): Convert aborts to assertions. src/.gdbinit (pgx): Display the character codepoint, resolved level, and bidi type also for glyphless glyphs. lisp/simple.el (what-cursor-position): Update to support the new bidi characters. lisp/descr-text.el (describe-char): Update to support the new bidi characters. admin/unidata/unidata-gen.el (unidata-prop-alist): New properties 'paired-bracket' and 'bracket-type', in support of the UBA 6.3. (unidata-gen-table): Support PROP-IDX being a function. (unidata-describe-bidi-bracket-type, unidata-gen-brackets-list) (unidata-gen-bracket-type-list): New functions. (unidata-check): Support checking the 'bracket-type' attribute. (unidata-gen-files): Don't create backups for uni-*.el files. admin/unidata/Makefile.in (${unidir}/charprop.el): Depend on BidiMirroring.txt and BidiBrackets.txt. admin/unidata/BidiBrackets.txt: New file, from Unicode. etc/NEWS: Mention the UBA implementation update. etc/HELLO: Remove now unneeded directional control characters. doc/lispref/nonascii.texi (Character Properties): Document the new properties 'bracket-type' and 'paired-bracket'. doc/lisprefdisplay.texi (Bidirectional Display): Update the version of the UBA to which we are conforming. test/BidiCharacterTest.txt: New file, from Unicode. test/biditest.el: New file.
Diffstat (limited to 'src')
-rw-r--r--src/.gdbinit12
-rw-r--r--src/ChangeLog67
-rw-r--r--src/bidi.c1636
-rw-r--r--src/dispextern.h59
-rw-r--r--src/term.c9
-rw-r--r--src/xdisp.c133
6 files changed, 1305 insertions, 611 deletions
diff --git a/src/.gdbinit b/src/.gdbinit
index c10fe3ddded..d76c3aa8e05 100644
--- a/src/.gdbinit
+++ b/src/.gdbinit
@@ -468,18 +468,18 @@ define pgx
468 end 468 end
469 # GLYPHLESS_GLYPH 469 # GLYPHLESS_GLYPH
470 if ($g.type == 2) 470 if ($g.type == 2)
471 printf "GLYPHLESS[" 471 printf "G-LESS["
472 if ($g.u.glyphless.method == 0) 472 if ($g.u.glyphless.method == 0)
473 printf "THIN]" 473 printf "THIN;0x%x]", $g.u.glyphless.ch
474 end 474 end
475 if ($g.u.glyphless.method == 1) 475 if ($g.u.glyphless.method == 1)
476 printf "EMPTY]" 476 printf "EMPTY;0x%x]", $g.u.glyphless.ch
477 end 477 end
478 if ($g.u.glyphless.method == 2) 478 if ($g.u.glyphless.method == 2)
479 printf "ACRO]" 479 printf "ACRO;0x%x]", $g.u.glyphless.ch
480 end 480 end
481 if ($g.u.glyphless.method == 3) 481 if ($g.u.glyphless.method == 3)
482 printf "HEX]" 482 printf "HEX;0x%x]", $g.u.glyphless.ch
483 end 483 end
484 end 484 end
485 # IMAGE_GLYPH 485 # IMAGE_GLYPH
@@ -498,7 +498,7 @@ define pgx
498 printf " pos=%d", $g.charpos 498 printf " pos=%d", $g.charpos
499 end 499 end
500 # For characters, print their resolved level and bidi type 500 # For characters, print their resolved level and bidi type
501 if ($g.type == 0) 501 if ($g.type == 0 || $g.type == 2)
502 printf " blev=%d,btyp=", $g.resolved_level 502 printf " blev=%d,btyp=", $g.resolved_level
503 pbiditype $g.bidi_type 503 pbiditype $g.bidi_type
504 end 504 end
diff --git a/src/ChangeLog b/src/ChangeLog
index 6823f6d3127..22fd7891df5 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,70 @@
12014-10-15 Eli Zaretskii <eliz@gnu.org>
2
3 Update the bidirectional reordering engine for Unicode 6.3 and 7.0.
4 * bidi.c (bidi_ignore_explicit_marks_for_paragraph_level): Remove
5 variable.
6 (bidi_get_type): Return the isolate initiators and terminator
7 types.
8 (bidi_isolate_fmt_char, bidi_paired_bracket_type)
9 (bidi_fetch_char_skip_isolates, find_first_strong_char)
10 (bidi_find_bracket_pairs, bidi_resolve_brackets): New functions.
11 (bidi_set_sos_type): Renamed from bidi_set_sor_type and updated
12 for the new features.
13 (bidi_push_embedding_level, bidi_pop_embedding_level): Update to
14 push and pop correctly for isolates.
15 (bidi_remember_char): Modified to accept an additional argument
16 and record the bidi type according to its value.
17 (bidi_cache_iterator_state): Accept an additional argument to only
18 update an existing state. Handle the new members of struct bidi_it.
19 (bidi_cache_find): Arguments changed: no lnger accepts a level,
20 instead accepts a flag telling it whether it is okay to return
21 unresolved neutrals.
22 (bidi_initialize): Initiate and staticpro the bracket-type uniprop
23 table. Initialize new isolate-related members.
24 (bidi_paragraph_init): Some code factored out into
25 find_first_strong_char.
26 (bidi_resolve_explicit_1): Function deleted, its code incorporated
27 into bidi_resolve_explicit.
28 (bidi_resolve_explicit): Support the isolate initiators and
29 terminator. Fix handling of embeddings and overrides according to
30 new UBA requirements. Record information about previously seen
31 characters here (moved from bidi_level_of_next_char).
32 (bidi_resolve_weak): Adapt to changes in struct members.
33 (FLAG_EMBEDDING_INSIDE, FLAG_OPPOSITE_INSIDE, MAX_BPA_STACK)
34 (STORE_BRACKET_CHARPOS, PUSH_BPA_STACK): New macros.
35 (bidi_resolve_neutral): Call bidi_resolve_brackets to handle the
36 paired bracket resolution. Handle isolate initiators and
37 terminator.
38 (bidi_type_of_next_char): Remove unneeded code for BN limit.
39 (bidi_level_of_next_char): Move the code that records information
40 about previous characters to bidi_resolve_explicit. Fix logic of
41 resolving neutrals and make sure their cache entries are updated.
42 Remove now unneeded special handling of PDF level.
43
44 * dispextern.h (struct glyph): Enlarge the width of resolved_level.
45 (BIDI_MAXDEPTH): New macro, renamed from BIDI_MAXLEVEL and
46 enlarged per Unicode 6.3.
47 (enum bidi_bracket_type_t): New data type.
48 (struct bidi_saved_info): Leave only 2 type members out of 4.
49 Remove bytepos.
50 (struct bidi_stack): Add members necessary to support isolating
51 sequences.
52 (struct bidi_it): Add new members necessary to support isolating
53 sequences and bracket pair resolution.
54
55 * xdisp.c (Fbidi_resolved_levels): New function.
56 (syms_of_xdisp): Defsubr it.
57 (append_glyph, append_composite_glyph, produce_image_glyph)
58 (append_stretch_glyph, append_glyphless_glyph): Convert aborts to
59 assertions.
60 (syms_of_xdisp) <inhibit-bidi-mirroring>: New variable.
61
62 * term.c (append_glyph, append_composite_glyph)
63 (append_glyphless_glyph): Convert aborts to assertions.
64
65 * .gdbinit (pgx): Display the character codepoint, resolved level,
66 and bidi type also for glyphless glyphs.
67
12014-10-15 Dmitry Antipov <dmantipov@yandex.ru> 682014-10-15 Dmitry Antipov <dmantipov@yandex.ru>
2 69
3 Avoid unwanted point motion in Fline_beginning_position. 70 Avoid unwanted point motion in Fline_beginning_position.
diff --git a/src/bidi.c b/src/bidi.c
index 53c2dad1b6b..464879ddf98 100644
--- a/src/bidi.c
+++ b/src/bidi.c
@@ -76,6 +76,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
76 bidi_fetch_char -- fetch next character 76 bidi_fetch_char -- fetch next character
77 bidi_resolve_explicit -- resolve explicit levels and directions 77 bidi_resolve_explicit -- resolve explicit levels and directions
78 bidi_resolve_weak -- resolve weak types 78 bidi_resolve_weak -- resolve weak types
79 bidi_resolve_brackets -- resolve "paired brackets" neutral types
79 bidi_resolve_neutral -- resolve neutral types 80 bidi_resolve_neutral -- resolve neutral types
80 bidi_level_of_next_char -- resolve implicit levels 81 bidi_level_of_next_char -- resolve implicit levels
81 82
@@ -247,7 +248,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
247 248
248static bool bidi_initialized = 0; 249static bool bidi_initialized = 0;
249 250
250static Lisp_Object bidi_type_table, bidi_mirror_table; 251static Lisp_Object bidi_type_table, bidi_mirror_table, bidi_brackets_table;
251 252
252#define BIDI_EOB (-1) 253#define BIDI_EOB (-1)
253 254
@@ -260,14 +261,6 @@ typedef enum {
260 EXPLICIT_FORMATTING 261 EXPLICIT_FORMATTING
261} bidi_category_t; 262} bidi_category_t;
262 263
263/* UAX#9 says to search only for L, AL, or R types of characters, and
264 ignore RLE, RLO, LRE, and LRO, when determining the base paragraph
265 level. Yudit indeed ignores them. This variable is therefore set
266 by default to ignore them, but clearing it will take them into
267 account. */
268extern bool bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE;
269bool bidi_ignore_explicit_marks_for_paragraph_level = 1;
270
271static Lisp_Object paragraph_start_re, paragraph_separate_re; 264static Lisp_Object paragraph_start_re, paragraph_separate_re;
272static Lisp_Object Qparagraph_start, Qparagraph_separate; 265static Lisp_Object Qparagraph_start, Qparagraph_separate;
273 266
@@ -305,14 +298,11 @@ bidi_get_type (int ch, bidi_dir_t override)
305 case RLE: 298 case RLE:
306 case RLO: 299 case RLO:
307 case PDF: 300 case PDF:
308 return default_type;
309 /* FIXME: The isolate controls are treated as BN until we add
310 support for UBA v6.3. */
311 case LRI: 301 case LRI:
312 case RLI: 302 case RLI:
313 case FSI: 303 case FSI:
314 case PDI: 304 case PDI:
315 return WEAK_BN; 305 return default_type;
316 default: 306 default:
317 if (override == L2R) 307 if (override == L2R)
318 return STRONG_L; 308 return STRONG_L;
@@ -348,11 +338,6 @@ bidi_get_category (bidi_type_t type)
348 case WEAK_CS: 338 case WEAK_CS:
349 case WEAK_NSM: 339 case WEAK_NSM:
350 case WEAK_BN: 340 case WEAK_BN:
351 /* FIXME */
352 case LRI:
353 case RLI:
354 case FSI:
355 case PDI:
356 return WEAK; 341 return WEAK;
357 case NEUTRAL_B: 342 case NEUTRAL_B:
358 case NEUTRAL_S: 343 case NEUTRAL_S:
@@ -364,19 +349,22 @@ bidi_get_category (bidi_type_t type)
364 case RLE: 349 case RLE:
365 case RLO: 350 case RLO:
366 case PDF: 351 case PDF:
367#if 0
368 /* FIXME: This awaits implementation of isolate support. */
369 case LRI: 352 case LRI:
370 case RLI: 353 case RLI:
371 case FSI: 354 case FSI:
372 case PDI: 355 case PDI:
373#endif
374 return EXPLICIT_FORMATTING; 356 return EXPLICIT_FORMATTING;
375 default: 357 default:
376 emacs_abort (); 358 emacs_abort ();
377 } 359 }
378} 360}
379 361
362static bool
363bidi_isolate_fmt_char (bidi_type_t ch_type)
364{
365 return (ch_type == LRI || ch_type == RLI || ch_type == PDI || ch_type == FSI);
366}
367
380/* Return the mirrored character of C, if it has one. If C has no 368/* Return the mirrored character of C, if it has one. If C has no
381 mirrored counterpart, return C. 369 mirrored counterpart, return C.
382 Note: The conditions in UAX#9 clause L4 regarding the surrounding 370 Note: The conditions in UAX#9 clause L4 regarding the surrounding
@@ -413,75 +401,124 @@ bidi_mirror_char (int c)
413 return c; 401 return c;
414} 402}
415 403
416/* Determine the start-of-run (sor) directional type given the two 404/* Return the Bidi_Paired_Bracket_Type property of the character C. */
405static bidi_bracket_type_t
406bidi_paired_bracket_type (int c)
407{
408 if (c == BIDI_EOB)
409 return BIDI_BRACKET_NONE;
410 if (c < 0 || c > MAX_CHAR)
411 emacs_abort ();
412
413 return (bidi_bracket_type_t) XINT (CHAR_TABLE_REF (bidi_brackets_table, c));
414}
415
416/* Determine the start-of-sequence (sos) directional type given the two
417 embedding levels on either side of the run boundary. Also, update 417 embedding levels on either side of the run boundary. Also, update
418 the saved info about previously seen characters, since that info is 418 the saved info about previously seen characters, since that info is
419 generally valid for a single level run. */ 419 generally valid for a single level run. */
420static void 420static void
421bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after) 421bidi_set_sos_type (struct bidi_it *bidi_it, int level_before, int level_after)
422{ 422{
423 int higher_level = (level_before > level_after ? level_before : level_after); 423 int higher_level = (level_before > level_after ? level_before : level_after);
424 424
425 /* The prev_was_pdf gork is required for when we have several PDFs 425 /* FIXME: should the default sos direction be user selectable? */
426 in a row. In that case, we want to compute the sor type for the 426 bidi_it->sos = ((higher_level & 1) != 0 ? R2L : L2R); /* X10 */
427 next level run only once: when we see the first PDF. That's
428 because the sor type depends only on the higher of the two levels
429 that we find on the two sides of the level boundary (see UAX#9,
430 clause X10), and so we don't need to know the final embedding
431 level to which we descend after processing all the PDFs. */
432 if (!bidi_it->prev_was_pdf || level_before < level_after)
433 /* FIXME: should the default sor direction be user selectable? */
434 bidi_it->sor = ((higher_level & 1) != 0 ? R2L : L2R);
435 if (level_before > level_after)
436 bidi_it->prev_was_pdf = 1;
437 427
438 bidi_it->prev.type = UNKNOWN_BT; 428 bidi_it->prev.type = UNKNOWN_BT;
439 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 429 bidi_it->last_strong.type = bidi_it->last_strong.orig_type = UNKNOWN_BT;
440 = bidi_it->last_strong.orig_type = UNKNOWN_BT; 430 bidi_it->prev_for_neutral.type = (bidi_it->sos == R2L ? STRONG_R : STRONG_L);
441 bidi_it->prev_for_neutral.type = (bidi_it->sor == R2L ? STRONG_R : STRONG_L);
442 bidi_it->prev_for_neutral.charpos = bidi_it->charpos; 431 bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
443 bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos; 432 bidi_it->next_for_neutral.type
444 bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1
445 = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT; 433 = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
446 bidi_it->ignore_bn_limit = -1; /* meaning it's unknown */
447} 434}
448 435
449/* Push the current embedding level and override status; reset the 436/* Push the current embedding level and override status; reset the
450 current level to LEVEL and the current override status to OVERRIDE. */ 437 current level to LEVEL and the current override status to OVERRIDE. */
451static void 438static void
452bidi_push_embedding_level (struct bidi_it *bidi_it, 439bidi_push_embedding_level (struct bidi_it *bidi_it,
453 int level, bidi_dir_t override) 440 int level, bidi_dir_t override, bool isolate_status)
454{ 441{
442 struct bidi_stack *st;
443 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
444
455 bidi_it->stack_idx++; 445 bidi_it->stack_idx++;
456 eassert (bidi_it->stack_idx < BIDI_MAXLEVEL); 446 eassert (bidi_it->stack_idx < BIDI_MAXDEPTH+2+1);
457 bidi_it->level_stack[bidi_it->stack_idx].level = level; 447 st = &bidi_it->level_stack[bidi_it->stack_idx];
458 bidi_it->level_stack[bidi_it->stack_idx].override = override; 448 eassert (level <= (1 << 7));
449 st->level = level;
450 st->override = override;
451 st->isolate_status = isolate_status;
452 if (isolate_status)
453 {
454 st->last_strong = bidi_it->last_strong;
455 st->prev_for_neutral = bidi_it->prev_for_neutral;
456 st->next_for_neutral = bidi_it->next_for_neutral;
457 st->sos = bidi_it->sos;
458 }
459 /* We've got a new isolating sequence, compute the directional type
460 of sos and initialize per-sequence variables (UAX#9, clause X10). */
461 bidi_set_sos_type (bidi_it, prev_level, level);
459} 462}
460 463
461/* Pop the embedding level and directional override status from the 464/* Pop from the stack the embedding level, the directional override
462 stack, and return the new level. */ 465 status, and optionally saved information for the isolating run
466 sequence. Return the new level. */
463static int 467static int
464bidi_pop_embedding_level (struct bidi_it *bidi_it) 468bidi_pop_embedding_level (struct bidi_it *bidi_it)
465{ 469{
466 /* UAX#9 says to ignore invalid PDFs. */ 470 int level;
471
472 /* UAX#9 says to ignore invalid PDFs (X7, last bullet)
473 and PDIs (X6a, 2nd bullet). */
467 if (bidi_it->stack_idx > 0) 474 if (bidi_it->stack_idx > 0)
468 bidi_it->stack_idx--; 475 {
469 return bidi_it->level_stack[bidi_it->stack_idx].level; 476 bool isolate_status
477 = bidi_it->level_stack[bidi_it->stack_idx].isolate_status;
478 int old_level = bidi_it->level_stack[bidi_it->stack_idx].level;
479
480 struct bidi_stack st;
481
482 st = bidi_it->level_stack[bidi_it->stack_idx];
483 if (isolate_status)
484 {
485 /* PREV is used in W1 for resolving WEAK_NSM. By the time
486 we get to an NSM, we must have gotten past at least one
487 character: the PDI that ends the isolate from which we
488 are popping here. So PREV will have been filled up by
489 the time we first use it. We initialize it here to
490 UNKNOWN_BT to be able to catch any blunders in this
491 logic. */
492 bidi_it->prev.orig_type = bidi_it->prev.type = UNKNOWN_BT;
493 bidi_it->last_strong = st.last_strong;
494 bidi_it->prev_for_neutral = st.prev_for_neutral;
495 bidi_it->next_for_neutral = st.next_for_neutral;
496 bidi_it->sos = st.sos;
497 }
498 else
499 bidi_set_sos_type (bidi_it, old_level,
500 bidi_it->level_stack[bidi_it->stack_idx - 1].level);
501
502 bidi_it->stack_idx--;
503 }
504 level = bidi_it->level_stack[bidi_it->stack_idx].level;
505 eassert (0 <= level && level <= BIDI_MAXDEPTH + 1);
506 return level;
470} 507}
471 508
472/* Record in SAVED_INFO the information about the current character. */ 509/* Record in SAVED_INFO the information about the current character. */
473static void 510static void
474bidi_remember_char (struct bidi_saved_info *saved_info, 511bidi_remember_char (struct bidi_saved_info *saved_info,
475 struct bidi_it *bidi_it) 512 struct bidi_it *bidi_it, bool from_type)
476{ 513{
477 saved_info->charpos = bidi_it->charpos; 514 saved_info->charpos = bidi_it->charpos;
478 saved_info->bytepos = bidi_it->bytepos; 515 if (from_type)
479 saved_info->type = bidi_it->type; 516 saved_info->type = bidi_it->type;
480 bidi_check_type (bidi_it->type); 517 else
481 saved_info->type_after_w1 = bidi_it->type_after_w1; 518 saved_info->type = bidi_it->type_after_wn;
482 bidi_check_type (bidi_it->type_after_w1); 519 bidi_check_type (saved_info->type);
483 saved_info->orig_type = bidi_it->orig_type; 520 saved_info->orig_type = bidi_it->orig_type;
484 bidi_check_type (bidi_it->orig_type); 521 bidi_check_type (saved_info->orig_type);
485} 522}
486 523
487/* Copy the bidi iterator from FROM to TO. To save cycles, this only 524/* Copy the bidi iterator from FROM to TO. To save cycles, this only
@@ -501,6 +538,9 @@ bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
501 Caching the bidi iterator states 538 Caching the bidi iterator states
502 ***********************************************************************/ 539 ***********************************************************************/
503 540
541/* We allocate and de-allocate the cache in chunks of this size (in
542 characters). 200 was chosen as an upper limit for reasonably-long
543 lines in a text file/buffer. */
504#define BIDI_CACHE_CHUNK 200 544#define BIDI_CACHE_CHUNK 200
505static struct bidi_it *bidi_cache; 545static struct bidi_it *bidi_cache;
506static ptrdiff_t bidi_cache_size = 0; 546static ptrdiff_t bidi_cache_size = 0;
@@ -567,7 +607,7 @@ bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it)
567} 607}
568 608
569/* Find a cached state with a given CHARPOS and resolved embedding 609/* Find a cached state with a given CHARPOS and resolved embedding
570 level less or equal to LEVEL. if LEVEL is -1, disregard the 610 level less or equal to LEVEL. If LEVEL is -1, disregard the
571 resolved levels in cached states. DIR, if non-zero, means search 611 resolved levels in cached states. DIR, if non-zero, means search
572 in that direction from the last cache hit. */ 612 in that direction from the last cache hit. */
573static ptrdiff_t 613static ptrdiff_t
@@ -698,7 +738,8 @@ bidi_cache_ensure_space (ptrdiff_t idx)
698} 738}
699 739
700static void 740static void
701bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved) 741bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved,
742 bool update_only)
702{ 743{
703 ptrdiff_t idx; 744 ptrdiff_t idx;
704 745
@@ -707,6 +748,9 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved)
707 emacs_abort (); 748 emacs_abort ();
708 idx = bidi_cache_search (bidi_it->charpos, -1, 1); 749 idx = bidi_cache_search (bidi_it->charpos, -1, 1);
709 750
751 if (idx < 0 && update_only)
752 return;
753
710 if (idx < 0) 754 if (idx < 0)
711 { 755 {
712 idx = bidi_cache_idx; 756 idx = bidi_cache_idx;
@@ -734,19 +778,19 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved)
734 costly copying of the entire struct. */ 778 costly copying of the entire struct. */
735 bidi_cache[idx].type = bidi_it->type; 779 bidi_cache[idx].type = bidi_it->type;
736 bidi_check_type (bidi_it->type); 780 bidi_check_type (bidi_it->type);
737 bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1; 781 bidi_cache[idx].type_after_wn = bidi_it->type_after_wn;
738 bidi_check_type (bidi_it->type_after_w1); 782 bidi_check_type (bidi_it->type_after_wn);
739 if (resolved) 783 if (resolved)
740 bidi_cache[idx].resolved_level = bidi_it->resolved_level; 784 bidi_cache[idx].resolved_level = bidi_it->resolved_level;
741 else 785 else
742 bidi_cache[idx].resolved_level = -1; 786 bidi_cache[idx].resolved_level = -1;
743 bidi_cache[idx].invalid_levels = bidi_it->invalid_levels; 787 bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
744 bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
745 bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral; 788 bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
746 bidi_cache[idx].next_for_ws = bidi_it->next_for_ws; 789 bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
747 bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
748 bidi_cache[idx].disp_pos = bidi_it->disp_pos; 790 bidi_cache[idx].disp_pos = bidi_it->disp_pos;
749 bidi_cache[idx].disp_prop = bidi_it->disp_prop; 791 bidi_cache[idx].disp_prop = bidi_it->disp_prop;
792 bidi_cache[idx].bracket_pairing_pos = bidi_it->bracket_pairing_pos;
793 bidi_cache[idx].bracket_enclosed_type = bidi_it->bracket_enclosed_type;
750 } 794 }
751 795
752 bidi_cache_last_idx = idx; 796 bidi_cache_last_idx = idx;
@@ -754,12 +798,28 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved)
754 bidi_cache_idx = idx + 1; 798 bidi_cache_idx = idx + 1;
755} 799}
756 800
801/* Look for a cached iterator state that corresponds to CHARPOS. If
802 found, copy the cached state into BIDI_IT and return the type of
803 the cached entry. If not found, return UNKNOWN_BT. NEUTRALS_OK
804 non-zero means it is OK to return cached state for neutral
805 characters that have no valid next_for_neutral member, and
806 therefore cannot be resolved. This can happen if the state was
807 cached before it was resolved in bidi_resolve_neutral. */
757static bidi_type_t 808static bidi_type_t
758bidi_cache_find (ptrdiff_t charpos, int level, struct bidi_it *bidi_it) 809bidi_cache_find (ptrdiff_t charpos, bool neutrals_ok, struct bidi_it *bidi_it)
759{ 810{
760 ptrdiff_t i = bidi_cache_search (charpos, level, bidi_it->scan_dir); 811 ptrdiff_t i = bidi_cache_search (charpos, -1, bidi_it->scan_dir);
761 812
762 if (i >= bidi_cache_start) 813 if (i >= bidi_cache_start
814 && (neutrals_ok
815 /* Callers that don't want to resolve neutrals (and set
816 neutrals_ok = false) need to be sure that there's enough
817 info in the cached state to resolve the neutrals and
818 isolates, and if not, they don't want the cached state. */
819 || !(bidi_cache[i].resolved_level == -1
820 && (bidi_get_category (bidi_cache[i].type) == NEUTRAL
821 || bidi_isolate_fmt_char (bidi_cache[i].type))
822 && bidi_cache[i].next_for_neutral.type == UNKNOWN_BT)))
763 { 823 {
764 bidi_dir_t current_scan_dir = bidi_it->scan_dir; 824 bidi_dir_t current_scan_dir = bidi_it->scan_dir;
765 825
@@ -956,6 +1016,11 @@ bidi_initialize (void)
956 emacs_abort (); 1016 emacs_abort ();
957 staticpro (&bidi_mirror_table); 1017 staticpro (&bidi_mirror_table);
958 1018
1019 bidi_brackets_table = uniprop_table (intern ("bracket-type"));
1020 if (NILP (bidi_brackets_table))
1021 emacs_abort ();
1022 staticpro (&bidi_brackets_table);
1023
959 Qparagraph_start = intern ("paragraph-start"); 1024 Qparagraph_start = intern ("paragraph-start");
960 staticpro (&Qparagraph_start); 1025 staticpro (&Qparagraph_start);
961 paragraph_start_re = Fsymbol_value (Qparagraph_start); 1026 paragraph_start_re = Fsymbol_value (Qparagraph_start);
@@ -981,7 +1046,7 @@ static void
981bidi_set_paragraph_end (struct bidi_it *bidi_it) 1046bidi_set_paragraph_end (struct bidi_it *bidi_it)
982{ 1047{
983 bidi_it->invalid_levels = 0; 1048 bidi_it->invalid_levels = 0;
984 bidi_it->invalid_rl_levels = -1; 1049 bidi_it->invalid_isolates = 0;
985 bidi_it->stack_idx = 0; 1050 bidi_it->stack_idx = 0;
986 bidi_it->resolved_level = bidi_it->level_stack[0].level; 1051 bidi_it->resolved_level = bidi_it->level_stack[0].level;
987} 1052}
@@ -998,28 +1063,24 @@ bidi_init_it (ptrdiff_t charpos, ptrdiff_t bytepos, bool frame_window_p,
998 if (bytepos >= 0) 1063 if (bytepos >= 0)
999 bidi_it->bytepos = bytepos; 1064 bidi_it->bytepos = bytepos;
1000 bidi_it->frame_window_p = frame_window_p; 1065 bidi_it->frame_window_p = frame_window_p;
1001 bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */ 1066 bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit */
1002 bidi_it->first_elt = 1; 1067 bidi_it->first_elt = 1;
1003 bidi_set_paragraph_end (bidi_it); 1068 bidi_set_paragraph_end (bidi_it);
1004 bidi_it->new_paragraph = 1; 1069 bidi_it->new_paragraph = 1;
1005 bidi_it->separator_limit = -1; 1070 bidi_it->separator_limit = -1;
1006 bidi_it->type = NEUTRAL_B; 1071 bidi_it->type = NEUTRAL_B;
1007 bidi_it->type_after_w1 = NEUTRAL_B; 1072 bidi_it->type_after_wn = NEUTRAL_B;
1008 bidi_it->orig_type = NEUTRAL_B; 1073 bidi_it->orig_type = NEUTRAL_B;
1009 bidi_it->prev_was_pdf = 0; 1074 /* FIXME: Review this!!! */
1010 bidi_it->prev.type = bidi_it->prev.type_after_w1 1075 bidi_it->prev.type = bidi_it->prev.orig_type = UNKNOWN_BT;
1011 = bidi_it->prev.orig_type = UNKNOWN_BT; 1076 bidi_it->last_strong.type = bidi_it->last_strong.orig_type = UNKNOWN_BT;
1012 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1
1013 = bidi_it->last_strong.orig_type = UNKNOWN_BT;
1014 bidi_it->next_for_neutral.charpos = -1; 1077 bidi_it->next_for_neutral.charpos = -1;
1015 bidi_it->next_for_neutral.type 1078 bidi_it->next_for_neutral.type
1016 = bidi_it->next_for_neutral.type_after_w1
1017 = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT; 1079 = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
1018 bidi_it->prev_for_neutral.charpos = -1; 1080 bidi_it->prev_for_neutral.charpos = -1;
1019 bidi_it->prev_for_neutral.type 1081 bidi_it->prev_for_neutral.type
1020 = bidi_it->prev_for_neutral.type_after_w1
1021 = bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT; 1082 = bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
1022 bidi_it->sor = L2R; /* FIXME: should it be user-selectable? */ 1083 bidi_it->sos = L2R; /* FIXME: should it be user-selectable? */
1023 bidi_it->disp_pos = -1; /* invalid/unknown */ 1084 bidi_it->disp_pos = -1; /* invalid/unknown */
1024 bidi_it->disp_prop = 0; 1085 bidi_it->disp_prop = 0;
1025 /* We can only shrink the cache if we are at the bottom level of its 1086 /* We can only shrink the cache if we are at the bottom level of its
@@ -1035,16 +1096,20 @@ static void
1035bidi_line_init (struct bidi_it *bidi_it) 1096bidi_line_init (struct bidi_it *bidi_it)
1036{ 1097{
1037 bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */ 1098 bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
1099 bidi_it->stack_idx = 0;
1038 bidi_it->resolved_level = bidi_it->level_stack[0].level; 1100 bidi_it->resolved_level = bidi_it->level_stack[0].level;
1039 bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */ 1101 bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
1102 bidi_it->level_stack[0].isolate_status = false; /* X1 */
1040 bidi_it->invalid_levels = 0; 1103 bidi_it->invalid_levels = 0;
1041 bidi_it->invalid_rl_levels = -1; 1104 bidi_it->isolate_level = 0; /* X1 */
1105 bidi_it->invalid_isolates = 0; /* X1 */
1042 /* Setting this to zero will force its recomputation the first time 1106 /* Setting this to zero will force its recomputation the first time
1043 we need it for W5. */ 1107 we need it for W5. */
1044 bidi_it->next_en_pos = 0; 1108 bidi_it->next_en_pos = 0;
1045 bidi_it->next_en_type = UNKNOWN_BT; 1109 bidi_it->next_en_type = UNKNOWN_BT;
1110 bidi_it->next_for_ws.charpos = -1;
1046 bidi_it->next_for_ws.type = UNKNOWN_BT; 1111 bidi_it->next_for_ws.type = UNKNOWN_BT;
1047 bidi_set_sor_type (bidi_it, 1112 bidi_set_sos_type (bidi_it,
1048 (bidi_it->paragraph_dir == R2L ? 1 : 0), 1113 (bidi_it->paragraph_dir == R2L ? 1 : 0),
1049 bidi_it->level_stack[0].level); /* X10 */ 1114 bidi_it->level_stack[0].level); /* X10 */
1050 1115
@@ -1244,6 +1309,50 @@ bidi_fetch_char (ptrdiff_t charpos, ptrdiff_t bytepos, ptrdiff_t *disp_pos,
1244 return ch; 1309 return ch;
1245} 1310}
1246 1311
1312/* Like bidi_fetch_char, but ignore any text between an isolate
1313 initiator and its matching PDI or, if it has no matching PDI, the
1314 end of the paragraph. If isolates were skipped, CH_LEN and NCHARS
1315 are set to the number of bytes and characters between BYTEPOS/CHARPOS
1316 and the character that was fetched after skipping the isolates. */
1317static int
1318bidi_fetch_char_skip_isolates (ptrdiff_t charpos, ptrdiff_t bytepos,
1319 ptrdiff_t *disp_pos, int *disp_prop,
1320 struct bidi_string_data *string,
1321 struct window *w, bool frame_window_p,
1322 ptrdiff_t *ch_len, ptrdiff_t *nchars)
1323{
1324 ptrdiff_t orig_charpos = charpos, orig_bytepos = bytepos;
1325 int ch = bidi_fetch_char (charpos, bytepos, disp_pos, disp_prop, string, w,
1326 frame_window_p, ch_len, nchars);
1327 bidi_type_t ch_type = bidi_get_type (ch, NEUTRAL_DIR);
1328 ptrdiff_t level = 0;
1329
1330 if (ch_type == LRI || ch_type == RLI || ch_type == FSI)
1331 {
1332 level++;
1333 while (level > 0 && ch_type != NEUTRAL_B)
1334 {
1335 charpos += *nchars;
1336 bytepos += *ch_len;
1337 ch = bidi_fetch_char (charpos, bytepos, disp_pos, disp_prop, string,
1338 w, frame_window_p, ch_len, nchars);
1339 ch_type = bidi_get_type (ch, NEUTRAL_DIR);
1340 /* A Note to P2 says to ignore max_depth limit. */
1341 if (ch_type == LRI || ch_type == RLI || ch_type == FSI)
1342 level++;
1343 else if (ch_type == PDI)
1344 level--;
1345 }
1346 }
1347
1348 /* Communicate to the caller how much did we skip, so it could get
1349 past the last character position we examined. */
1350 *nchars += charpos - orig_charpos;
1351 *ch_len += bytepos - orig_bytepos;
1352 return ch;
1353}
1354
1355
1247 1356
1248/*********************************************************************** 1357/***********************************************************************
1249 Determining paragraph direction 1358 Determining paragraph direction
@@ -1378,6 +1487,72 @@ bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte)
1378 bidi_paragraph_init to less than 10 ms even on slow machines. */ 1487 bidi_paragraph_init to less than 10 ms even on slow machines. */
1379#define MAX_STRONG_CHAR_SEARCH 100000 1488#define MAX_STRONG_CHAR_SEARCH 100000
1380 1489
1490/* Starting from POS, find the first strong (L, R, or AL) character,
1491 while skipping over any characters between an isolate initiator and
1492 its matching PDI. STOP_AT_PDI non-zero means stop at the PDI that
1493 matches the isolate initiator at POS. Return the bidi type of the
1494 character where the search stopped. Give up if after examining
1495 MAX_STRONG_CHAR_SEARCH buffer or string positions no strong
1496 character was found. */
1497static bidi_type_t
1498find_first_strong_char (ptrdiff_t pos, ptrdiff_t bytepos, ptrdiff_t end,
1499 ptrdiff_t *disp_pos, int *disp_prop,
1500 struct bidi_string_data *string, struct window *w,
1501 bool string_p, bool frame_window_p,
1502 ptrdiff_t *ch_len, ptrdiff_t *nchars, bool stop_at_pdi)
1503{
1504 ptrdiff_t pos1;
1505 bidi_type_t type;
1506 int ch;
1507
1508 if (stop_at_pdi)
1509 {
1510 /* If STOP_AT_PDI is non-zero, we must have been called with FSI
1511 at POS. Get past it. */
1512#ifdef ENABLE_CHECKING
1513 ch = bidi_fetch_char (pos, bytepos, disp_pos, disp_prop, string, w,
1514 frame_window_p, ch_len, nchars);
1515 type = bidi_get_type (ch, NEUTRAL_DIR);
1516 eassert (type == FSI /* || type == LRI || type == RLI */);
1517#endif
1518 pos += *nchars;
1519 bytepos += *ch_len;
1520 }
1521 ch = bidi_fetch_char_skip_isolates (pos, bytepos, disp_pos, disp_prop, string,
1522 w, frame_window_p, ch_len, nchars);
1523 type = bidi_get_type (ch, NEUTRAL_DIR);
1524
1525 pos1 = pos;
1526 for (pos += *nchars, bytepos += *ch_len;
1527 bidi_get_category (type) != STRONG
1528 /* If requested to stop at first PDI, stop there. */
1529 && !(stop_at_pdi && type == PDI)
1530 /* Stop when searched too far into an abnormally large
1531 paragraph full of weak or neutral characters. */
1532 && pos - pos1 < MAX_STRONG_CHAR_SEARCH;
1533 type = bidi_get_type (ch, NEUTRAL_DIR))
1534 {
1535 if (pos >= end)
1536 {
1537 /* Pretend there's a paragraph separator at end of
1538 buffer/string. */
1539 type = NEUTRAL_B;
1540 break;
1541 }
1542 if (!string_p
1543 && type == NEUTRAL_B
1544 && bidi_at_paragraph_end (pos, bytepos) >= -1)
1545 break;
1546 /* Fetch next character and advance to get past it. */
1547 ch = bidi_fetch_char_skip_isolates (pos, bytepos, disp_pos, disp_prop,
1548 string, w, frame_window_p,
1549 ch_len, nchars);
1550 pos += *nchars;
1551 bytepos += *ch_len;
1552 }
1553 return type;
1554}
1555
1381/* Determine the base direction, a.k.a. base embedding level, of the 1556/* Determine the base direction, a.k.a. base embedding level, of the
1382 paragraph we are about to iterate through. If DIR is either L2R or 1557 paragraph we are about to iterate through. If DIR is either L2R or
1383 R2L, just use that. Otherwise, determine the paragraph direction 1558 R2L, just use that. Otherwise, determine the paragraph direction
@@ -1424,7 +1599,6 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, bool no_default_p)
1424 } 1599 }
1425 else if (dir == NEUTRAL_DIR) /* P2 */ 1600 else if (dir == NEUTRAL_DIR) /* P2 */
1426 { 1601 {
1427 int ch;
1428 ptrdiff_t ch_len, nchars; 1602 ptrdiff_t ch_len, nchars;
1429 ptrdiff_t pos, disp_pos = -1; 1603 ptrdiff_t pos, disp_pos = -1;
1430 int disp_prop = 0; 1604 int disp_prop = 0;
@@ -1473,52 +1647,16 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, bool no_default_p)
1473 /* The following loop is run more than once only if NO_DEFAULT_P, 1647 /* The following loop is run more than once only if NO_DEFAULT_P,
1474 and only if we are iterating on a buffer. */ 1648 and only if we are iterating on a buffer. */
1475 do { 1649 do {
1476 ptrdiff_t pos1;
1477
1478 bytepos = pstartbyte; 1650 bytepos = pstartbyte;
1479 if (!string_p) 1651 if (!string_p)
1480 pos = BYTE_TO_CHAR (bytepos); 1652 pos = BYTE_TO_CHAR (bytepos);
1481 ch = bidi_fetch_char (pos, bytepos, &disp_pos, &disp_prop, 1653 type = find_first_strong_char (pos, bytepos, end, &disp_pos, &disp_prop,
1482 &bidi_it->string, bidi_it->w, 1654 &bidi_it->string, bidi_it->w,
1483 bidi_it->frame_window_p, &ch_len, &nchars); 1655 string_p, bidi_it->frame_window_p,
1484 type = bidi_get_type (ch, NEUTRAL_DIR); 1656 &ch_len, &nchars, false);
1485 1657 if (type == STRONG_R || type == STRONG_AL) /* P3 */
1486 pos1 = pos;
1487 for (pos += nchars, bytepos += ch_len;
1488 ((bidi_get_category (type) != STRONG)
1489 || (bidi_ignore_explicit_marks_for_paragraph_level
1490 && (type == RLE || type == RLO
1491 || type == LRE || type == LRO)))
1492 /* Stop when searched too far into an abnormally large
1493 paragraph full of weak or neutral characters. */
1494 && pos - pos1 < MAX_STRONG_CHAR_SEARCH;
1495 type = bidi_get_type (ch, NEUTRAL_DIR))
1496 {
1497 if (pos >= end)
1498 {
1499 /* Pretend there's a paragraph separator at end of
1500 buffer/string. */
1501 type = NEUTRAL_B;
1502 break;
1503 }
1504 if (!string_p
1505 && type == NEUTRAL_B
1506 && bidi_at_paragraph_end (pos, bytepos) >= -1)
1507 break;
1508 /* Fetch next character and advance to get past it. */
1509 ch = bidi_fetch_char (pos, bytepos, &disp_pos,
1510 &disp_prop, &bidi_it->string, bidi_it->w,
1511 bidi_it->frame_window_p, &ch_len, &nchars);
1512 pos += nchars;
1513 bytepos += ch_len;
1514 }
1515 if ((type == STRONG_R || type == STRONG_AL) /* P3 */
1516 || (!bidi_ignore_explicit_marks_for_paragraph_level
1517 && (type == RLO || type == RLE)))
1518 bidi_it->paragraph_dir = R2L; 1658 bidi_it->paragraph_dir = R2L;
1519 else if (type == STRONG_L 1659 else if (type == STRONG_L)
1520 || (!bidi_ignore_explicit_marks_for_paragraph_level
1521 && (type == LRO || type == LRE)))
1522 bidi_it->paragraph_dir = L2R; 1660 bidi_it->paragraph_dir = L2R;
1523 if (!string_p 1661 if (!string_p
1524 && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR) 1662 && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
@@ -1581,19 +1719,60 @@ bidi_explicit_dir_char (int ch)
1581 || ch_type == PDF); 1719 || ch_type == PDF);
1582} 1720}
1583 1721
1584/* A helper function for bidi_resolve_explicit. It advances to the 1722/* Given an iterator state in BIDI_IT, advance one character position
1585 next character in logical order and determines the new embedding 1723 in the buffer/string to the next character (in the logical order),
1586 level and directional override, but does not take into account 1724 resolve any explicit embeddings, directional overrides, and isolate
1587 empty embeddings. */ 1725 initiators and terminators, and return the embedding level of the
1726 character after resolving these explicit directives. */
1588static int 1727static int
1589bidi_resolve_explicit_1 (struct bidi_it *bidi_it) 1728bidi_resolve_explicit (struct bidi_it *bidi_it)
1590{ 1729{
1591 int curchar; 1730 int curchar;
1592 bidi_type_t type; 1731 bidi_type_t type, typ1, prev_type = UNKNOWN_BT;
1593 int current_level; 1732 int current_level;
1594 int new_level; 1733 int new_level;
1595 bidi_dir_t override; 1734 bidi_dir_t override;
1735 bool isolate_status;
1596 bool string_p = bidi_it->string.s || STRINGP (bidi_it->string.lstring); 1736 bool string_p = bidi_it->string.s || STRINGP (bidi_it->string.lstring);
1737 ptrdiff_t ch_len, nchars, disp_pos, end;
1738 int disp_prop;
1739
1740 /* Record the info about the previous character. */
1741 if (bidi_it->type_after_wn != WEAK_BN /* W1/Retaining */
1742 && bidi_it->type != WEAK_BN)
1743 {
1744 /* This special case is needed in support of Unicode 8.0
1745 correction to N0, as implemented in bidi_resolve_weak/W1
1746 below. */
1747 if (bidi_it->type_after_wn == NEUTRAL_ON
1748 && bidi_get_category (bidi_it->type) == STRONG
1749 && bidi_paired_bracket_type (bidi_it->ch) == BIDI_BRACKET_CLOSE)
1750 bidi_remember_char (&bidi_it->prev, bidi_it, 1);
1751 else
1752 bidi_remember_char (&bidi_it->prev, bidi_it, 0);
1753 }
1754 if (bidi_it->type_after_wn == STRONG_R
1755 || bidi_it->type_after_wn == STRONG_L
1756 || bidi_it->type_after_wn == STRONG_AL)
1757 bidi_remember_char (&bidi_it->last_strong, bidi_it, 0);
1758 if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
1759 || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
1760 bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it, 1);
1761
1762 /* If we overstepped the characters used for resolving neutrals
1763 and whitespace, invalidate their info in the iterator. */
1764 if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
1765 bidi_it->next_for_neutral.type = UNKNOWN_BT;
1766 if (bidi_it->next_en_pos >= 0
1767 && bidi_it->charpos >= bidi_it->next_en_pos)
1768 {
1769 bidi_it->next_en_pos = 0;
1770 bidi_it->next_en_type = UNKNOWN_BT;
1771 }
1772
1773 /* Reset the bracket resolution info. */
1774 bidi_it->bracket_pairing_pos = -1;
1775 bidi_it->bracket_enclosed_type = UNKNOWN_BT;
1597 1776
1598 /* If reseat()'ed, don't advance, so as to start iteration from the 1777 /* If reseat()'ed, don't advance, so as to start iteration from the
1599 position where we were reseated. bidi_it->bytepos can be less 1778 position where we were reseated. bidi_it->bytepos can be less
@@ -1624,6 +1803,19 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1624 } 1803 }
1625 eassert (bidi_it->bytepos == CHAR_TO_BYTE (bidi_it->charpos)); 1804 eassert (bidi_it->bytepos == CHAR_TO_BYTE (bidi_it->charpos));
1626 } 1805 }
1806 /* Determine the orginal bidi type of the previous character,
1807 which is needed for handling isolate initiators and PDF. The
1808 type of the previous character will only be non-trivial if
1809 our caller moved through some previous text in
1810 get_visually_first_element, in which case bidi_it->prev holds
1811 the information we want. */
1812 if (bidi_it->first_elt && bidi_it->prev.type != UNKNOWN_BT)
1813 {
1814 eassert (bidi_it->prev.charpos == bidi_it->charpos - 1);
1815 prev_type = bidi_it->prev.orig_type;
1816 if (prev_type == FSI)
1817 prev_type = bidi_it->type_after_wn;
1818 }
1627 } 1819 }
1628 /* Don't move at end of buffer/string. */ 1820 /* Don't move at end of buffer/string. */
1629 else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV)) 1821 else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV))
@@ -1636,10 +1828,16 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1636 if (bidi_it->ch_len == 0) 1828 if (bidi_it->ch_len == 0)
1637 emacs_abort (); 1829 emacs_abort ();
1638 bidi_it->bytepos += bidi_it->ch_len; 1830 bidi_it->bytepos += bidi_it->ch_len;
1831 prev_type = bidi_it->orig_type;
1832 if (prev_type == FSI)
1833 prev_type = bidi_it->type_after_wn;
1639 } 1834 }
1835 else /* EOB or end of string */
1836 prev_type = NEUTRAL_B;
1640 1837
1641 current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */ 1838 current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
1642 override = bidi_it->level_stack[bidi_it->stack_idx].override; 1839 override = bidi_it->level_stack[bidi_it->stack_idx].override;
1840 isolate_status = bidi_it->level_stack[bidi_it->stack_idx].isolate_status;
1643 new_level = current_level; 1841 new_level = current_level;
1644 1842
1645 if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV)) 1843 if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV))
@@ -1652,6 +1850,52 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1652 } 1850 }
1653 else 1851 else
1654 { 1852 {
1853 /* LRI, RLI, and FSI increment, and PDF decrements, the
1854 embedding level of the _following_ characters, so we must
1855 first look at the type of the previous character to support
1856 that. */
1857 switch (prev_type)
1858 {
1859 case RLI: /* X5a */
1860 if (current_level < BIDI_MAXDEPTH
1861 && bidi_it->invalid_levels == 0
1862 && bidi_it->invalid_isolates == 0)
1863 {
1864 new_level = ((current_level + 1) & ~1) + 1;
1865 bidi_it->isolate_level++;
1866 bidi_push_embedding_level (bidi_it, new_level,
1867 NEUTRAL_DIR, true);
1868 }
1869 else
1870 bidi_it->invalid_isolates++;
1871 break;
1872 case LRI: /* X5b */
1873 if (current_level < BIDI_MAXDEPTH - 1
1874 && bidi_it->invalid_levels == 0
1875 && bidi_it->invalid_isolates == 0)
1876 {
1877 new_level = ((current_level + 2) & ~1);
1878 bidi_it->isolate_level++;
1879 bidi_push_embedding_level (bidi_it, new_level,
1880 NEUTRAL_DIR, true);
1881 }
1882 else
1883 bidi_it->invalid_isolates++;
1884 break;
1885 case PDF: /* X7 */
1886 if (!bidi_it->invalid_isolates)
1887 {
1888 if (bidi_it->invalid_levels)
1889 bidi_it->invalid_levels--;
1890 else if (!isolate_status && bidi_it->stack_idx >= 1)
1891 new_level = bidi_pop_embedding_level (bidi_it);
1892 }
1893 break;
1894 default:
1895 eassert (prev_type != FSI);
1896 /* Nothing. */
1897 break;
1898 }
1655 /* Fetch the character at BYTEPOS. If it is covered by a 1899 /* Fetch the character at BYTEPOS. If it is covered by a
1656 display string, treat the entire run of covered characters as 1900 display string, treat the entire run of covered characters as
1657 a single character u+FFFC. */ 1901 a single character u+FFFC. */
@@ -1662,6 +1906,7 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1662 &bidi_it->ch_len, &bidi_it->nchars); 1906 &bidi_it->ch_len, &bidi_it->nchars);
1663 } 1907 }
1664 bidi_it->ch = curchar; 1908 bidi_it->ch = curchar;
1909 bidi_it->resolved_level = new_level;
1665 1910
1666 /* Don't apply directional override here, as all the types we handle 1911 /* Don't apply directional override here, as all the types we handle
1667 below will not be affected by the override anyway, and we need 1912 below will not be affected by the override anyway, and we need
@@ -1671,206 +1916,138 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1671 bidi_it->orig_type = type; 1916 bidi_it->orig_type = type;
1672 bidi_check_type (bidi_it->orig_type); 1917 bidi_check_type (bidi_it->orig_type);
1673 1918
1674 if (type != PDF) 1919 bidi_it->type_after_wn = UNKNOWN_BT;
1675 bidi_it->prev_was_pdf = 0;
1676
1677 bidi_it->type_after_w1 = UNKNOWN_BT;
1678 1920
1679 switch (type) 1921 switch (type)
1680 { 1922 {
1681 case RLE: /* X2 */ 1923 case RLE: /* X2 */
1682 case RLO: /* X4 */ 1924 case RLO: /* X4 */
1683 bidi_it->type_after_w1 = type; 1925 bidi_it->type_after_wn = type;
1684 bidi_check_type (bidi_it->type_after_w1); 1926 bidi_check_type (bidi_it->type_after_wn);
1685 type = WEAK_BN; /* X9/Retaining */ 1927 type = WEAK_BN; /* X9/Retaining */
1686 if (bidi_it->ignore_bn_limit <= -1) 1928 if (new_level < BIDI_MAXDEPTH
1687 { 1929 && bidi_it->invalid_levels == 0
1688 if (current_level <= BIDI_MAXLEVEL - 4) 1930 && bidi_it->invalid_isolates == 0)
1689 {
1690 /* Compute the least odd embedding level greater than
1691 the current level. */
1692 new_level = ((current_level + 1) & ~1) + 1;
1693 if (bidi_it->type_after_w1 == RLE)
1694 override = NEUTRAL_DIR;
1695 else
1696 override = R2L;
1697 if (current_level == BIDI_MAXLEVEL - 4)
1698 bidi_it->invalid_rl_levels = 0;
1699 bidi_push_embedding_level (bidi_it, new_level, override);
1700 }
1701 else
1702 {
1703 bidi_it->invalid_levels++;
1704 /* See the commentary about invalid_rl_levels below. */
1705 if (bidi_it->invalid_rl_levels < 0)
1706 bidi_it->invalid_rl_levels = 0;
1707 bidi_it->invalid_rl_levels++;
1708 }
1709 }
1710 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1711 || (bidi_it->next_en_pos > bidi_it->charpos
1712 && bidi_it->next_en_type == WEAK_EN))
1713 type = WEAK_EN;
1714 break;
1715 case LRE: /* X3 */
1716 case LRO: /* X5 */
1717 bidi_it->type_after_w1 = type;
1718 bidi_check_type (bidi_it->type_after_w1);
1719 type = WEAK_BN; /* X9/Retaining */
1720 if (bidi_it->ignore_bn_limit <= -1)
1721 {
1722 if (current_level <= BIDI_MAXLEVEL - 5)
1723 {
1724 /* Compute the least even embedding level greater than
1725 the current level. */
1726 new_level = ((current_level + 2) & ~1);
1727 if (bidi_it->type_after_w1 == LRE)
1728 override = NEUTRAL_DIR;
1729 else
1730 override = L2R;
1731 bidi_push_embedding_level (bidi_it, new_level, override);
1732 }
1733 else
1734 {
1735 bidi_it->invalid_levels++;
1736 /* invalid_rl_levels counts invalid levels encountered
1737 while the embedding level was already too high for
1738 LRE/LRO, but not for RLE/RLO. That is because
1739 there may be exactly one PDF which we should not
1740 ignore even though invalid_levels is non-zero.
1741 invalid_rl_levels helps to know what PDF is
1742 that. */
1743 if (bidi_it->invalid_rl_levels >= 0)
1744 bidi_it->invalid_rl_levels++;
1745 }
1746 }
1747 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1748 || (bidi_it->next_en_pos > bidi_it->charpos
1749 && bidi_it->next_en_type == WEAK_EN))
1750 type = WEAK_EN;
1751 break;
1752 case PDF: /* X7 */
1753 bidi_it->type_after_w1 = type;
1754 bidi_check_type (bidi_it->type_after_w1);
1755 type = WEAK_BN; /* X9/Retaining */
1756 if (bidi_it->ignore_bn_limit <= -1)
1757 {
1758 if (!bidi_it->invalid_rl_levels)
1759 {
1760 new_level = bidi_pop_embedding_level (bidi_it);
1761 bidi_it->invalid_rl_levels = -1;
1762 if (bidi_it->invalid_levels)
1763 bidi_it->invalid_levels--;
1764 /* else nothing: UAX#9 says to ignore invalid PDFs */
1765 }
1766 if (!bidi_it->invalid_levels)
1767 new_level = bidi_pop_embedding_level (bidi_it);
1768 else
1769 {
1770 bidi_it->invalid_levels--;
1771 bidi_it->invalid_rl_levels--;
1772 }
1773 }
1774 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1775 || (bidi_it->next_en_pos > bidi_it->charpos
1776 && bidi_it->next_en_type == WEAK_EN))
1777 type = WEAK_EN;
1778 break;
1779 default:
1780 /* Nothing. */
1781 break;
1782 }
1783
1784 bidi_it->type = type;
1785 bidi_check_type (bidi_it->type);
1786
1787 return new_level;
1788}
1789
1790/* Given an iterator state in BIDI_IT, advance one character position
1791 in the buffer/string to the next character (in the logical order),
1792 resolve any explicit embeddings and directional overrides, and
1793 return the embedding level of the character after resolving
1794 explicit directives and ignoring empty embeddings. */
1795static int
1796bidi_resolve_explicit (struct bidi_it *bidi_it)
1797{
1798 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1799 int new_level = bidi_resolve_explicit_1 (bidi_it);
1800 ptrdiff_t eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
1801 const unsigned char *s
1802 = (STRINGP (bidi_it->string.lstring)
1803 ? SDATA (bidi_it->string.lstring)
1804 : bidi_it->string.s);
1805
1806 if (prev_level < new_level
1807 && bidi_it->type == WEAK_BN
1808 && bidi_it->ignore_bn_limit == -1 /* only if not already known */
1809 && bidi_it->charpos < eob /* not already at EOB */
1810 && bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1811 + bidi_it->ch_len, s,
1812 bidi_it->string.unibyte)))
1813 {
1814 /* Avoid pushing and popping embedding levels if the level run
1815 is empty, as this breaks level runs where it shouldn't.
1816 UAX#9 removes all the explicit embedding and override codes,
1817 so empty embeddings disappear without a trace. We need to
1818 behave as if we did the same. */
1819 struct bidi_it saved_it;
1820 int level = prev_level;
1821
1822 bidi_copy_it (&saved_it, bidi_it);
1823
1824 while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1825 + bidi_it->ch_len, s,
1826 bidi_it->string.unibyte)))
1827 { 1931 {
1828 /* This advances to the next character, skipping any 1932 /* Compute the least odd embedding level greater than
1829 characters covered by display strings. */ 1933 the current level. */
1830 level = bidi_resolve_explicit_1 (bidi_it); 1934 new_level = ((new_level + 1) & ~1) + 1;
1831 /* If string.lstring was relocated inside bidi_resolve_explicit_1, 1935 if (bidi_it->type_after_wn == RLE)
1832 a pointer to its data is no longer valid. */ 1936 override = NEUTRAL_DIR;
1833 if (STRINGP (bidi_it->string.lstring)) 1937 else
1834 s = SDATA (bidi_it->string.lstring); 1938 override = R2L;
1939 bidi_push_embedding_level (bidi_it, new_level, override, false);
1940 bidi_it->resolved_level = new_level;
1835 } 1941 }
1836 1942 else
1837 if (bidi_it->nchars <= 0)
1838 emacs_abort ();
1839 if (level == prev_level) /* empty embedding */
1840 saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
1841 else /* this embedding is non-empty */
1842 saved_it.ignore_bn_limit = -2;
1843
1844 bidi_copy_it (bidi_it, &saved_it);
1845 if (bidi_it->ignore_bn_limit > -1)
1846 { 1943 {
1847 /* We pushed a level, but we shouldn't have. Undo that. */ 1944 if (bidi_it->invalid_isolates == 0)
1848 if (!bidi_it->invalid_rl_levels) 1945 bidi_it->invalid_levels++;
1849 { 1946 }
1850 new_level = bidi_pop_embedding_level (bidi_it); 1947 break;
1851 bidi_it->invalid_rl_levels = -1; 1948 case LRE: /* X3 */
1852 if (bidi_it->invalid_levels) 1949 case LRO: /* X5 */
1853 bidi_it->invalid_levels--; 1950 bidi_it->type_after_wn = type;
1854 } 1951 bidi_check_type (bidi_it->type_after_wn);
1855 if (!bidi_it->invalid_levels) 1952 type = WEAK_BN; /* X9/Retaining */
1856 new_level = bidi_pop_embedding_level (bidi_it); 1953 if (new_level < BIDI_MAXDEPTH - 1
1954 && bidi_it->invalid_levels == 0
1955 && bidi_it->invalid_isolates == 0)
1956 {
1957 /* Compute the least even embedding level greater than
1958 the current level. */
1959 new_level = ((new_level + 2) & ~1);
1960 if (bidi_it->type_after_wn == LRE)
1961 override = NEUTRAL_DIR;
1857 else 1962 else
1858 { 1963 override = L2R;
1859 bidi_it->invalid_levels--; 1964 bidi_push_embedding_level (bidi_it, new_level, override, false);
1860 bidi_it->invalid_rl_levels--; 1965 bidi_it->resolved_level = new_level;
1861 } 1966 }
1967 else
1968 {
1969 if (bidi_it->invalid_isolates == 0)
1970 bidi_it->invalid_levels++;
1862 } 1971 }
1972 break;
1973 case FSI: /* X5c */
1974 end = string_p ? bidi_it->string.schars : ZV;
1975 disp_pos = bidi_it->disp_pos;
1976 disp_prop = bidi_it->disp_prop;
1977 nchars = bidi_it->nchars;
1978 ch_len = bidi_it->ch_len;
1979 typ1 = find_first_strong_char (bidi_it->charpos,
1980 bidi_it->bytepos, end,
1981 &disp_pos, &disp_prop,
1982 &bidi_it->string, bidi_it->w,
1983 string_p, bidi_it->frame_window_p,
1984 &ch_len, &nchars, true);
1985 if (typ1 != STRONG_R && typ1 != STRONG_AL)
1986 {
1987 type = LRI;
1988 goto fsi_as_lri;
1989 }
1990 else
1991 type = RLI;
1992 /* FALLTHROUGH */
1993 case RLI: /* X5a */
1994 if (override == NEUTRAL_DIR)
1995 bidi_it->type_after_wn = type;
1996 else /* Unicode 8.0 correction. */
1997 bidi_it->type_after_wn = (override == L2R ? STRONG_L : STRONG_R);
1998 bidi_check_type (bidi_it->type_after_wn);
1999 break;
2000 case LRI: /* X5b */
2001 fsi_as_lri:
2002 if (override == NEUTRAL_DIR)
2003 bidi_it->type_after_wn = type;
2004 else /* Unicode 8.0 correction. */
2005 bidi_it->type_after_wn = (override == L2R ? STRONG_L : STRONG_R);
2006 bidi_check_type (bidi_it->type_after_wn);
2007 break;
2008 case PDI: /* X6a */
2009 if (bidi_it->invalid_isolates)
2010 bidi_it->invalid_isolates--;
2011 else if (bidi_it->isolate_level > 0)
2012 {
2013 bidi_it->invalid_levels = 0;
2014 while (!bidi_it->level_stack[bidi_it->stack_idx].isolate_status)
2015 bidi_pop_embedding_level (bidi_it);
2016 eassert (bidi_it->stack_idx > 0);
2017 new_level = bidi_pop_embedding_level (bidi_it);
2018 bidi_it->isolate_level--;
2019 }
2020 bidi_it->resolved_level = new_level;
2021 /* Unicode 8.0 correction. */
2022 if (bidi_it->level_stack[bidi_it->stack_idx].override == L2R)
2023 bidi_it->type_after_wn = STRONG_L;
2024 else if (bidi_it->level_stack[bidi_it->stack_idx].override == R2L)
2025 bidi_it->type_after_wn = STRONG_R;
2026 else
2027 bidi_it->type_after_wn = type;
2028 break;
2029 case PDF: /* X7 */
2030 bidi_it->type_after_wn = type;
2031 bidi_check_type (bidi_it->type_after_wn);
2032 type = WEAK_BN; /* X9/Retaining */
2033 break;
2034 default:
2035 /* Nothing. */
2036 break;
1863 } 2037 }
1864 2038
2039 bidi_it->type = type;
2040 bidi_check_type (bidi_it->type);
2041
1865 if (bidi_it->type == NEUTRAL_B) /* X8 */ 2042 if (bidi_it->type == NEUTRAL_B) /* X8 */
1866 { 2043 {
1867 bidi_set_paragraph_end (bidi_it); 2044 bidi_set_paragraph_end (bidi_it);
1868 /* This is needed by bidi_resolve_weak below, and in L1. */ 2045 /* This is needed by bidi_resolve_weak below, and in L1. */
1869 bidi_it->type_after_w1 = bidi_it->type; 2046 bidi_it->type_after_wn = bidi_it->type;
1870 bidi_check_type (bidi_it->type_after_w1);
1871 } 2047 }
1872 2048
1873 return new_level; 2049 eassert (bidi_it->resolved_level >= 0);
2050 return bidi_it->resolved_level;
1874} 2051}
1875 2052
1876/* Advance in the buffer/string, resolve weak types and return the 2053/* Advance in the buffer/string, resolve weak types and return the
@@ -1892,26 +2069,25 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
1892 type = bidi_it->type; 2069 type = bidi_it->type;
1893 override = bidi_it->level_stack[bidi_it->stack_idx].override; 2070 override = bidi_it->level_stack[bidi_it->stack_idx].override;
1894 2071
1895 if (type == UNKNOWN_BT 2072 eassert (!(type == UNKNOWN_BT
1896 || type == LRE 2073 || type == LRE
1897 || type == LRO 2074 || type == LRO
1898 || type == RLE 2075 || type == RLE
1899 || type == RLO 2076 || type == RLO
1900 || type == PDF) 2077 || type == PDF));
1901 emacs_abort ();
1902 2078
1903 if (new_level != prev_level 2079 eassert (prev_level >= 0);
1904 || bidi_it->type == NEUTRAL_B) 2080 if (bidi_it->type == NEUTRAL_B)
1905 { 2081 {
1906 /* We've got a new embedding level run, compute the directional 2082 /* We've got a new isolating sequence, compute the directional
1907 type of sor and initialize per-run variables (UAX#9, clause 2083 type of sos and initialize per-run variables (UAX#9, clause
1908 X10). */ 2084 X10). */
1909 bidi_set_sor_type (bidi_it, prev_level, new_level); 2085 bidi_set_sos_type (bidi_it, prev_level, new_level);
1910 } 2086 }
1911 else if (type == NEUTRAL_S || type == NEUTRAL_WS 2087 if (type == NEUTRAL_S || type == NEUTRAL_WS
1912 || type == WEAK_BN || type == STRONG_AL) 2088 || type == WEAK_BN || type == STRONG_AL)
1913 bidi_it->type_after_w1 = type; /* needed in L1 */ 2089 bidi_it->type_after_wn = type; /* needed in L1 */
1914 bidi_check_type (bidi_it->type_after_w1); 2090 bidi_check_type (bidi_it->type_after_wn);
1915 2091
1916 /* Level and directional override status are already recorded in 2092 /* Level and directional override status are already recorded in
1917 bidi_it, and do not need any change; see X6. */ 2093 bidi_it, and do not need any change; see X6. */
@@ -1928,31 +2104,49 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
1928 because then either the type of this NSM would have been 2104 because then either the type of this NSM would have been
1929 also overridden, or the previous character is outside the 2105 also overridden, or the previous character is outside the
1930 current level run, and thus not relevant to this NSM. 2106 current level run, and thus not relevant to this NSM.
1931 This is why NSM gets the type_after_w1 of the previous 2107 This is why NSM gets the type_after_wn of the previous
1932 character. */ 2108 character. */
1933 if (bidi_it->prev.type_after_w1 != UNKNOWN_BT 2109 /* bidi_set_sos_type sets type_after_wn to UNKNOWN_BT. */
1934 /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */ 2110 if (bidi_it->prev.type != UNKNOWN_BT
1935 && bidi_it->prev.type_after_w1 != NEUTRAL_B) 2111 /* If type_after_wn is NEUTRAL_B, this NSM is at sos. */
1936 type = bidi_it->prev.type_after_w1; 2112 && bidi_it->prev.type != NEUTRAL_B)
1937 else if (bidi_it->sor == R2L) 2113 {
2114 if (bidi_isolate_fmt_char (bidi_it->prev.type))
2115 {
2116 /* From W1: "Note that in an isolating run sequence,
2117 an isolate initiator followed by an NSM or any
2118 type other than PDI must be an overflow isolate
2119 initiator." */
2120 eassert (bidi_it->invalid_isolates > 0);
2121 type = NEUTRAL_ON;
2122 }
2123 else
2124 {
2125 /* This includes the Unicode 8.0 correction for N0,
2126 due to how we set prev.type in bidi_resolve_explicit,
2127 which see. */
2128 type = bidi_it->prev.type;
2129 }
2130 }
2131 else if (bidi_it->sos == R2L)
1938 type = STRONG_R; 2132 type = STRONG_R;
1939 else if (bidi_it->sor == L2R) 2133 else if (bidi_it->sos == L2R)
1940 type = STRONG_L; 2134 type = STRONG_L;
1941 else /* shouldn't happen! */ 2135 else /* shouldn't happen! */
1942 emacs_abort (); 2136 emacs_abort ();
1943 } 2137 }
1944 if (type == WEAK_EN /* W2 */ 2138 if (type == WEAK_EN /* W2 */
1945 && bidi_it->last_strong.type_after_w1 == STRONG_AL) 2139 && bidi_it->last_strong.type == STRONG_AL)
1946 type = WEAK_AN; 2140 type = WEAK_AN;
1947 else if (type == STRONG_AL) /* W3 */ 2141 else if (type == STRONG_AL) /* W3 */
1948 type = STRONG_R; 2142 type = STRONG_R;
1949 else if ((type == WEAK_ES /* W4 */ 2143 else if ((type == WEAK_ES /* W4 */
1950 && bidi_it->prev.type_after_w1 == WEAK_EN 2144 && bidi_it->prev.type == WEAK_EN
1951 && bidi_it->prev.orig_type == WEAK_EN) 2145 && bidi_it->prev.orig_type == WEAK_EN)
1952 || (type == WEAK_CS 2146 || (type == WEAK_CS
1953 && ((bidi_it->prev.type_after_w1 == WEAK_EN 2147 && ((bidi_it->prev.type == WEAK_EN
1954 && bidi_it->prev.orig_type == WEAK_EN) 2148 && bidi_it->prev.orig_type == WEAK_EN)
1955 || bidi_it->prev.type_after_w1 == WEAK_AN))) 2149 || bidi_it->prev.type == WEAK_AN)))
1956 { 2150 {
1957 const unsigned char *s 2151 const unsigned char *s
1958 = (STRINGP (bidi_it->string.lstring) 2152 = (STRINGP (bidi_it->string.lstring)
@@ -1971,8 +2165,7 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
1971 bidi_copy_it (&saved_it, bidi_it); 2165 bidi_copy_it (&saved_it, bidi_it);
1972 while (bidi_resolve_explicit (bidi_it) == new_level 2166 while (bidi_resolve_explicit (bidi_it) == new_level
1973 && bidi_it->type == WEAK_BN) 2167 && bidi_it->type == WEAK_BN)
1974 ; 2168 type_of_next = bidi_it->type;
1975 type_of_next = bidi_it->type;
1976 bidi_copy_it (bidi_it, &saved_it); 2169 bidi_copy_it (bidi_it, &saved_it);
1977 } 2170 }
1978 2171
@@ -1982,11 +2175,11 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
1982 should not be changed into EN. */ 2175 should not be changed into EN. */
1983 if (type == WEAK_ES 2176 if (type == WEAK_ES
1984 && type_of_next == WEAK_EN 2177 && type_of_next == WEAK_EN
1985 && bidi_it->last_strong.type_after_w1 != STRONG_AL) 2178 && bidi_it->last_strong.type != STRONG_AL)
1986 type = WEAK_EN; 2179 type = WEAK_EN;
1987 else if (type == WEAK_CS) 2180 else if (type == WEAK_CS)
1988 { 2181 {
1989 if (bidi_it->prev.type_after_w1 == WEAK_AN 2182 if (bidi_it->prev.type == WEAK_AN
1990 && (type_of_next == WEAK_AN 2183 && (type_of_next == WEAK_AN
1991 /* If the next character is EN, but the last 2184 /* If the next character is EN, but the last
1992 strong-type character is AL, EN will be later 2185 strong-type character is AL, EN will be later
@@ -1994,18 +2187,18 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
1994 So in that case, this ES should not be 2187 So in that case, this ES should not be
1995 changed into EN. */ 2188 changed into EN. */
1996 || (type_of_next == WEAK_EN 2189 || (type_of_next == WEAK_EN
1997 && bidi_it->last_strong.type_after_w1 == STRONG_AL))) 2190 && bidi_it->last_strong.type == STRONG_AL)))
1998 type = WEAK_AN; 2191 type = WEAK_AN;
1999 else if (bidi_it->prev.type_after_w1 == WEAK_EN 2192 else if (bidi_it->prev.type == WEAK_EN
2000 && type_of_next == WEAK_EN 2193 && type_of_next == WEAK_EN
2001 && bidi_it->last_strong.type_after_w1 != STRONG_AL) 2194 && bidi_it->last_strong.type != STRONG_AL)
2002 type = WEAK_EN; 2195 type = WEAK_EN;
2003 } 2196 }
2004 } 2197 }
2005 else if (type == WEAK_ET /* W5: ET with EN before or after it */ 2198 else if (type == WEAK_ET /* W5: ET with EN before or after it */
2006 || type == WEAK_BN) /* W5/Retaining */ 2199 || type == WEAK_BN) /* W5/Retaining */
2007 { 2200 {
2008 if (bidi_it->prev.type_after_w1 == WEAK_EN) /* ET/BN w/EN before it */ 2201 if (bidi_it->prev.type == WEAK_EN) /* ET/BN w/EN before it */
2009 type = WEAK_EN; 2202 type = WEAK_EN;
2010 else if (bidi_it->next_en_pos > bidi_it->charpos 2203 else if (bidi_it->next_en_pos > bidi_it->charpos
2011 && bidi_it->next_en_type != WEAK_BN) 2204 && bidi_it->next_en_type != WEAK_BN)
@@ -2015,6 +2208,12 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
2015 } 2208 }
2016 else if (bidi_it->next_en_pos >=0) 2209 else if (bidi_it->next_en_pos >=0)
2017 { 2210 {
2211 /* We overstepped the last known position for ET
2212 resolution but there could be other such characters
2213 in this paragraph (when we are sure there are no more
2214 such positions, we set next_en_pos to a negative
2215 value). Try to find the next position for ET
2216 resolution. */
2018 ptrdiff_t en_pos = bidi_it->charpos + bidi_it->nchars; 2217 ptrdiff_t en_pos = bidi_it->charpos + bidi_it->nchars;
2019 const unsigned char *s = (STRINGP (bidi_it->string.lstring) 2218 const unsigned char *s = (STRINGP (bidi_it->string.lstring)
2020 ? SDATA (bidi_it->string.lstring) 2219 ? SDATA (bidi_it->string.lstring)
@@ -2037,9 +2236,20 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
2037 while (bidi_resolve_explicit (bidi_it) == new_level 2236 while (bidi_resolve_explicit (bidi_it) == new_level
2038 && (bidi_it->type == WEAK_BN 2237 && (bidi_it->type == WEAK_BN
2039 || bidi_it->type == WEAK_ET)) 2238 || bidi_it->type == WEAK_ET))
2040 ; 2239 type_of_next = bidi_it->type;
2041 type_of_next = bidi_it->type; 2240 if (type == WEAK_BN
2042 en_pos = bidi_it->charpos; 2241 && bidi_it->charpos == saved_it.charpos + saved_it.nchars)
2242 {
2243 /* If we entered the above loop with a BN that
2244 changes the level, the type of next
2245 character, which is in a different level, is
2246 not relevant to resolving this series of ET
2247 and BN. */
2248 en_pos = saved_it.charpos;
2249 type_of_next = type;
2250 }
2251 else
2252 en_pos = bidi_it->charpos;
2043 bidi_copy_it (bidi_it, &saved_it); 2253 bidi_copy_it (bidi_it, &saved_it);
2044 } 2254 }
2045 /* Remember this position, to speed up processing of the 2255 /* Remember this position, to speed up processing of the
@@ -2049,7 +2259,7 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
2049 { 2259 {
2050 /* If the last strong character is AL, the EN we've 2260 /* If the last strong character is AL, the EN we've
2051 found will become AN when we get to it (W2). */ 2261 found will become AN when we get to it (W2). */
2052 if (bidi_it->last_strong.type_after_w1 == STRONG_AL) 2262 if (bidi_it->last_strong.type == STRONG_AL)
2053 type_of_next = WEAK_AN; 2263 type_of_next = WEAK_AN;
2054 else if (type == WEAK_BN) 2264 else if (type == WEAK_BN)
2055 type = NEUTRAL_ON; /* W6/Retaining */ 2265 type = NEUTRAL_ON; /* W6/Retaining */
@@ -2069,23 +2279,23 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
2069 2279
2070 if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */ 2280 if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
2071 || (type == WEAK_BN 2281 || (type == WEAK_BN
2072 && (bidi_it->prev.type_after_w1 == WEAK_CS /* W6/Retaining */ 2282 && (bidi_it->prev.type == WEAK_CS /* W6/Retaining */
2073 || bidi_it->prev.type_after_w1 == WEAK_ES 2283 || bidi_it->prev.type == WEAK_ES
2074 || bidi_it->prev.type_after_w1 == WEAK_ET))) 2284 || bidi_it->prev.type == WEAK_ET)))
2075 type = NEUTRAL_ON; 2285 type = NEUTRAL_ON;
2076 2286
2077 /* Store the type we've got so far, before we clobber it with strong 2287 /* Store the type we've got so far, before we clobber it with strong
2078 types in W7 and while resolving neutral types. But leave alone 2288 types in W7 and while resolving neutral types. But leave alone
2079 the original types that were recorded above, because we will need 2289 the original types that were recorded above, because we will need
2080 them for the L1 clause. */ 2290 them for the L1 clause. */
2081 if (bidi_it->type_after_w1 == UNKNOWN_BT) 2291 if (bidi_it->type_after_wn == UNKNOWN_BT)
2082 bidi_it->type_after_w1 = type; 2292 bidi_it->type_after_wn = type;
2083 bidi_check_type (bidi_it->type_after_w1); 2293 bidi_check_type (bidi_it->type_after_wn);
2084 2294
2085 if (type == WEAK_EN) /* W7 */ 2295 if (type == WEAK_EN) /* W7 */
2086 { 2296 {
2087 if ((bidi_it->last_strong.type_after_w1 == STRONG_L) 2297 if ((bidi_it->last_strong.type == STRONG_L)
2088 || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R)) 2298 || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sos == L2R))
2089 type = STRONG_L; 2299 type = STRONG_L;
2090 } 2300 }
2091 2301
@@ -2099,7 +2309,8 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
2099static bidi_type_t 2309static bidi_type_t
2100bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev) 2310bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
2101{ 2311{
2102 /* N1: European and Arabic numbers are treated as though they were R. */ 2312 /* N1: "European and Arabic numbers act as if they were R in terms
2313 of their influence on NIs." */
2103 if (next_type == WEAK_EN || next_type == WEAK_AN) 2314 if (next_type == WEAK_EN || next_type == WEAK_AN)
2104 next_type = STRONG_R; 2315 next_type = STRONG_R;
2105 if (prev_type == WEAK_EN || prev_type == WEAK_AN) 2316 if (prev_type == WEAK_EN || prev_type == WEAK_AN)
@@ -2113,28 +2324,334 @@ bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
2113 return STRONG_R; 2324 return STRONG_R;
2114} 2325}
2115 2326
2327#define FLAG_EMBEDDING_INSIDE 1
2328#define FLAG_OPPOSITE_INSIDE 2
2329
2330/* A data type used in the stack maintained by
2331 bidi_find_bracket_pairs below. */
2332typedef struct bpa_stack_entry {
2333 int close_bracket_char;
2334 int open_bracket_idx;
2335#ifdef ENABLE_CHECKING
2336 ptrdiff_t open_bracket_pos;
2337#endif
2338 unsigned flags : 2;
2339} bpa_stack_entry;
2340
2341/* With MAX_ALLOCA of 16KB, this should allow at least 1K slots in the
2342 BPA stack, which should be more than enough for actual bidi text. */
2343#define MAX_BPA_STACK (max (MAX_ALLOCA / sizeof (bpa_stack_entry), 1))
2344
2345#ifdef ENABLE_CHECKING
2346# define STORE_BRACKET_CHARPOS \
2347 bpa_stack[bpa_sp].open_bracket_pos = bidi_it->charpos
2348#else
2349# define STORE_BRACKET_CHARPOS /* nothing */
2350#endif
2351
2352#define PUSH_BPA_STACK \
2353 do { \
2354 bpa_sp++; \
2355 if (bpa_sp >= MAX_BPA_STACK) \
2356 { \
2357 bpa_sp = MAX_BPA_STACK - 1; \
2358 goto bpa_give_up; \
2359 } \
2360 bpa_stack[bpa_sp].close_bracket_char = bidi_mirror_char (bidi_it->ch); \
2361 bpa_stack[bpa_sp].open_bracket_idx = bidi_cache_last_idx; \
2362 bpa_stack[bpa_sp].flags = 0; \
2363 STORE_BRACKET_CHARPOS; \
2364 } while (0)
2365
2366
2367/* This function implements BPA, the Bidi Parenthesis Algorithm,
2368 described in BD16 and N0 of UAX#9. It finds all the bracket pairs
2369 in the current isolating sequence, and records the enclosed type
2370 and the position of the matching bracket in the cache. It returns
2371 non-zero if called with the iterator on the opening bracket which
2372 has a matching closing bracket in the current isolating sequence,
2373 zero otherwise. */
2374static bool
2375bidi_find_bracket_pairs (struct bidi_it *bidi_it)
2376{
2377 bidi_bracket_type_t btype;
2378 bidi_type_t type = bidi_it->type;
2379 bool retval = false;
2380
2381 /* When scanning backwards, we don't expect any unresolved bidi
2382 bracket characters. */
2383 if (bidi_it->scan_dir != 1)
2384 emacs_abort ();
2385
2386 btype = bidi_paired_bracket_type (bidi_it->ch);
2387 if (btype == BIDI_BRACKET_OPEN)
2388 {
2389 bpa_stack_entry bpa_stack[MAX_BPA_STACK];
2390 int bpa_sp = -1;
2391 struct bidi_it saved_it;
2392 int embedding_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2393 bidi_type_t embedding_type = (embedding_level & 1) ? STRONG_R : STRONG_L;
2394 struct bidi_it tem_it;
2395
2396 eassert (MAX_BPA_STACK >= 100);
2397 bidi_copy_it (&saved_it, bidi_it);
2398 /* bidi_cache_iterator_state refuses to cache on backward scans,
2399 and bidi_cache_fetch_state doesn't bring scan_dir from the
2400 cache, so we must initialize this explicitly. */
2401 tem_it.scan_dir = 1;
2402
2403 while (1)
2404 {
2405 int old_sidx, new_sidx;
2406 int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2407
2408 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0);
2409 if (btype == BIDI_BRACKET_OPEN)
2410 PUSH_BPA_STACK;
2411 else if (btype == BIDI_BRACKET_CLOSE)
2412 {
2413 int sp = bpa_sp;
2414 int curchar = bidi_it->ch;
2415
2416 eassert (sp >= 0);
2417 while (sp >= 0 && bpa_stack[sp].close_bracket_char != curchar)
2418 sp--;
2419 if (sp >= 0)
2420 {
2421 /* Update and cache the corresponding opening bracket. */
2422 bidi_cache_fetch_state (bpa_stack[sp].open_bracket_idx,
2423 &tem_it);
2424#ifdef ENABLE_CHECKING
2425 eassert (bpa_stack[sp].open_bracket_pos == tem_it.charpos);
2426#endif
2427 /* Determine the enclosed type for this bracket
2428 pair's type resolution according to N0. */
2429 if (bpa_stack[sp].flags & FLAG_EMBEDDING_INSIDE)
2430 tem_it.bracket_enclosed_type = embedding_type; /* N0b */
2431 else if (bpa_stack[sp].flags & FLAG_OPPOSITE_INSIDE)
2432 tem_it.bracket_enclosed_type /* N0c */
2433 = (embedding_type == STRONG_L ? STRONG_R : STRONG_L);
2434 else /* N0d */
2435 tem_it.bracket_enclosed_type = UNKNOWN_BT;
2436
2437 /* Record the position of the matching closing
2438 bracket, and update the cache. */
2439 tem_it.bracket_pairing_pos = bidi_it->charpos;
2440 bidi_cache_iterator_state (&tem_it, 0, 1);
2441
2442 /* Pop the BPA stack. */
2443 bpa_sp = sp - 1;
2444 }
2445 if (bpa_sp < 0)
2446 {
2447 retval = true;
2448 break;
2449 }
2450 }
2451 else if (bidi_get_category (bidi_it->type_after_wn) != NEUTRAL)
2452 {
2453 unsigned flag;
2454 int sp;
2455
2456 /* Whenever we see a strong type, update the flags of
2457 all the slots on the stack. */
2458 switch (bidi_it->type)
2459 {
2460 case STRONG_L:
2461 flag = ((embedding_level & 1) == 0
2462 ? FLAG_EMBEDDING_INSIDE
2463 : FLAG_OPPOSITE_INSIDE);
2464 break;
2465 case STRONG_R:
2466 case WEAK_EN:
2467 case WEAK_AN:
2468 flag = ((embedding_level & 1) == 1
2469 ? FLAG_EMBEDDING_INSIDE
2470 : FLAG_OPPOSITE_INSIDE);
2471 break;
2472 default:
2473 break;
2474 }
2475 for (sp = bpa_sp; sp >= 0; sp--)
2476 bpa_stack[sp].flags |= flag;
2477 }
2478 old_sidx = bidi_it->stack_idx;
2479 type = bidi_resolve_weak (bidi_it);
2480 /* Skip level runs excluded from this isolating run sequence. */
2481 new_sidx = bidi_it->stack_idx;
2482 if (bidi_it->level_stack[new_sidx].level > current_level
2483 && (bidi_it->level_stack[new_sidx].isolate_status
2484 || (new_sidx > old_sidx + 1
2485 && bidi_it->level_stack[new_sidx - 1].isolate_status)))
2486 {
2487 while (bidi_it->level_stack[bidi_it->stack_idx].level
2488 > current_level)
2489 {
2490 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0);
2491 type = bidi_resolve_weak (bidi_it);
2492 }
2493 }
2494 if (type == NEUTRAL_B
2495 || (bidi_it->level_stack[bidi_it->stack_idx].level
2496 != current_level))
2497 {
2498 bpa_give_up:
2499 /* We've marched all the way to the end of this
2500 isolating run sequence, and didn't find matching
2501 closing brackets for some opening brackets. Leave
2502 their type unchanged. */
2503 break;
2504 }
2505 if (bidi_it->type_after_wn == NEUTRAL_ON) /* Unicode 8.0 correction */
2506 btype = bidi_paired_bracket_type (bidi_it->ch);
2507 else
2508 btype = BIDI_BRACKET_NONE;
2509 }
2510
2511 /* Restore bidi_it from the cache, which should have the bracket
2512 resolution members set as determined by the above loop. */
2513 type = bidi_cache_find (saved_it.charpos, 1, bidi_it);
2514 eassert (type == NEUTRAL_ON);
2515 }
2516
2517 return retval;
2518}
2519
2116static bidi_type_t 2520static bidi_type_t
2117bidi_resolve_neutral (struct bidi_it *bidi_it) 2521bidi_resolve_brackets (struct bidi_it *bidi_it)
2118{ 2522{
2119 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level; 2523 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2120 bidi_type_t type = bidi_resolve_weak (bidi_it); 2524 bool resolve_bracket = false;
2121 int current_level = bidi_it->level_stack[bidi_it->stack_idx].level; 2525 bidi_type_t type = UNKNOWN_BT;
2122 2526 int ch;
2123 if (!(type == STRONG_R 2527 struct bidi_saved_info tem_info;
2124 || type == STRONG_L 2528
2125 || type == WEAK_BN 2529 bidi_remember_char (&tem_info, bidi_it, 1);
2126 || type == WEAK_EN 2530 if (!bidi_it->first_elt)
2127 || type == WEAK_AN 2531 {
2128 || type == NEUTRAL_B 2532 type = bidi_cache_find (bidi_it->charpos + bidi_it->nchars, 1, bidi_it);
2129 || type == NEUTRAL_S 2533 ch = bidi_it->ch;
2130 || type == NEUTRAL_WS 2534 }
2131 || type == NEUTRAL_ON)) 2535 if (type == UNKNOWN_BT)
2132 emacs_abort (); 2536 {
2537 type = bidi_resolve_weak (bidi_it);
2538 if (type == NEUTRAL_ON && bidi_find_bracket_pairs (bidi_it))
2539 resolve_bracket = true;
2540 }
2541 else
2542 {
2543 if (type == NEUTRAL_ON
2544 && bidi_paired_bracket_type (ch) == BIDI_BRACKET_OPEN)
2545 {
2546 if (bidi_it->level_stack[bidi_it->stack_idx].level == prev_level)
2547 {
2548 if (bidi_it->bracket_pairing_pos > 0)
2549 {
2550 /* A cached opening bracket that wasn't completely
2551 resolved yet. */
2552 resolve_bracket = true;
2553 }
2554 }
2555 else
2556 {
2557 /* Higher levels were not BPA-resolved yet, even if
2558 cached by bidi_find_bracket_pairs. Lower levels were
2559 probably processed by bidi_find_bracket_pairs, but we
2560 have no easy way of retaining the prev_for_neutral
2561 from the previous level run of the isolating
2562 sequence. Force application of BPA now. */
2563 if (bidi_find_bracket_pairs (bidi_it))
2564 resolve_bracket = true;
2565 }
2566 }
2567 /* Keep track of the prev_for_neutral type, needed for resolving
2568 brackets below and for resolving neutrals in bidi_resolve_neutral. */
2569 if (bidi_it->level_stack[bidi_it->stack_idx].level == prev_level
2570 && (tem_info.type == STRONG_L || tem_info.type == STRONG_R
2571 || tem_info.type == WEAK_AN || tem_info.type == WEAK_EN))
2572 bidi_it->prev_for_neutral = tem_info;
2573 }
2574
2575 /* If needed, resolve the bracket type according to N0. */
2576 if (resolve_bracket)
2577 {
2578 int embedding_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2579 bidi_type_t embedding_type = (embedding_level & 1) ? STRONG_R : STRONG_L;
2580
2581 eassert (bidi_it->prev_for_neutral.type != UNKNOWN_BT);
2582 eassert (bidi_it->bracket_pairing_pos > bidi_it->charpos);
2583 if (bidi_it->bracket_enclosed_type == embedding_type) /* N0b */
2584 type = embedding_type;
2585 else
2586 {
2587 switch (bidi_it->prev_for_neutral.type)
2588 {
2589 case STRONG_R:
2590 case WEAK_EN:
2591 case WEAK_AN:
2592 type =
2593 (bidi_it->bracket_enclosed_type == STRONG_R) /* N0c */
2594 ? STRONG_R /* N0c1 */
2595 : embedding_type; /* N0c2 */
2596 break;
2597 case STRONG_L:
2598 type =
2599 (bidi_it->bracket_enclosed_type == STRONG_L) /* N0c */
2600 ? STRONG_L /* N0c1 */
2601 : embedding_type; /* N0c2 */
2602 break;
2603 default:
2604 /* N0d: Do not set the type for that bracket pair. */
2605 break;
2606 }
2607 }
2608 eassert (type == STRONG_L || type == STRONG_R || type == NEUTRAL_ON);
2609
2610 /* Update the type of the paired closing bracket to the same
2611 type as for the resolved opening bracket. */
2612 if (type != NEUTRAL_ON)
2613 {
2614 ptrdiff_t idx = bidi_cache_search (bidi_it->bracket_pairing_pos,
2615 -1, 1);
2616
2617 if (idx < bidi_cache_start)
2618 emacs_abort ();
2619 bidi_cache[idx].type = type;
2620 }
2621 }
2622
2623 return type;
2624}
2625
2626static bidi_type_t
2627bidi_resolve_neutral (struct bidi_it *bidi_it)
2628{
2629 bidi_type_t type = bidi_resolve_brackets (bidi_it);
2630 int current_level;
2631 bool is_neutral;
2632
2633 eassert (type == STRONG_R
2634 || type == STRONG_L
2635 || type == WEAK_BN
2636 || type == WEAK_EN
2637 || type == WEAK_AN
2638 || type == NEUTRAL_B
2639 || type == NEUTRAL_S
2640 || type == NEUTRAL_WS
2641 || type == NEUTRAL_ON
2642 || type == LRI
2643 || type == RLI
2644 || type == PDI);
2645
2646 current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2647 eassert (current_level >= 0);
2648 is_neutral = bidi_get_category (type) == NEUTRAL;
2133 2649
2134 if ((type != NEUTRAL_B /* Don't risk entering the long loop below if 2650 if ((type != NEUTRAL_B /* Don't risk entering the long loop below if
2135 we are already at paragraph end. */ 2651 we are already at paragraph end. */
2136 && bidi_get_category (type) == NEUTRAL) 2652 && (is_neutral || bidi_isolate_fmt_char (type)))
2137 || (type == WEAK_BN && prev_level == current_level)) 2653 /* N1-N2/Retaining */
2654 || (type == WEAK_BN && bidi_explicit_dir_char (bidi_it->ch)))
2138 { 2655 {
2139 if (bidi_it->next_for_neutral.type != UNKNOWN_BT) 2656 if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
2140 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, 2657 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
@@ -2155,7 +2672,8 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
2155 entering the expensive loop in the "else" clause. */ 2672 entering the expensive loop in the "else" clause. */
2156 else if (current_level == 0 2673 else if (current_level == 0
2157 && bidi_it->prev_for_neutral.type == STRONG_L 2674 && bidi_it->prev_for_neutral.type == STRONG_L
2158 && !bidi_explicit_dir_char (bidi_it->ch)) 2675 && !bidi_explicit_dir_char (bidi_it->ch)
2676 && !bidi_isolate_fmt_char (type))
2159 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, 2677 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2160 STRONG_L, current_level); 2678 STRONG_L, current_level);
2161 else if (/* current level is 1 */ 2679 else if (/* current level is 1 */
@@ -2167,7 +2685,8 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
2167 && (bidi_it->prev_for_neutral.type == STRONG_R 2685 && (bidi_it->prev_for_neutral.type == STRONG_R
2168 || bidi_it->prev_for_neutral.type == WEAK_EN 2686 || bidi_it->prev_for_neutral.type == WEAK_EN
2169 || bidi_it->prev_for_neutral.type == WEAK_AN) 2687 || bidi_it->prev_for_neutral.type == WEAK_AN)
2170 && !bidi_explicit_dir_char (bidi_it->ch)) 2688 && !bidi_explicit_dir_char (bidi_it->ch)
2689 && !bidi_isolate_fmt_char (type))
2171 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, 2690 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2172 STRONG_R, current_level); 2691 STRONG_R, current_level);
2173 else 2692 else
@@ -2182,85 +2701,107 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
2182 implementations! */ 2701 implementations! */
2183 struct bidi_it saved_it; 2702 struct bidi_it saved_it;
2184 bidi_type_t next_type; 2703 bidi_type_t next_type;
2185 2704 bool adjacent_to_neutrals = is_neutral;
2186 if (bidi_it->scan_dir == -1)
2187 emacs_abort ();
2188 2705
2189 bidi_copy_it (&saved_it, bidi_it); 2706 bidi_copy_it (&saved_it, bidi_it);
2190 /* Scan the text forward until we find the first non-neutral 2707 /* Scan the text forward until we find the first non-neutral
2191 character, and then use that to resolve the neutral we 2708 character, and then use that to resolve the neutral we
2192 are dealing with now. We also cache the scanned iterator 2709 are dealing with now. We also cache the scanned iterator
2193 states, to salvage some of the effort later. */ 2710 states, to salvage some of the effort later. */
2194 bidi_cache_iterator_state (bidi_it, 0);
2195 do { 2711 do {
2196 /* Record the info about the previous character, so that 2712 int old_sidx, new_sidx;
2197 it will be cached below with this state. */ 2713
2198 if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
2199 && bidi_it->type != WEAK_BN)
2200 bidi_remember_char (&bidi_it->prev, bidi_it);
2201 type = bidi_resolve_weak (bidi_it);
2202 /* Paragraph separators have their levels fully resolved 2714 /* Paragraph separators have their levels fully resolved
2203 at this point, so cache them as resolved. */ 2715 at this point, so cache them as resolved. */
2204 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B); 2716 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0);
2205 /* FIXME: implement L1 here, by testing for a newline and 2717 old_sidx = bidi_it->stack_idx;
2206 resetting the level for any sequence of whitespace 2718 type = bidi_resolve_brackets (bidi_it);
2207 characters adjacent to it. */ 2719 /* Skip level runs excluded from this isolating run sequence. */
2720 new_sidx = bidi_it->stack_idx;
2721 if (bidi_it->level_stack[new_sidx].level > current_level
2722 && (bidi_it->level_stack[new_sidx].isolate_status
2723 /* This is for when we have an isolate initiator
2724 immediately followed by an embedding or
2725 override initiator, in which case we get the
2726 level stack pushed twice by the single call to
2727 bidi_resolve_weak above. */
2728 || (new_sidx > old_sidx + 1
2729 && bidi_it->level_stack[new_sidx - 1].isolate_status)))
2730 {
2731 while (bidi_it->level_stack[bidi_it->stack_idx].level
2732 > current_level)
2733 {
2734 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0);
2735 type = bidi_resolve_brackets (bidi_it);
2736 }
2737 }
2738 if (!adjacent_to_neutrals
2739 && (bidi_get_category (type) == NEUTRAL
2740 || bidi_isolate_fmt_char (type)))
2741 adjacent_to_neutrals = true;
2208 } while (!(type == NEUTRAL_B 2742 } while (!(type == NEUTRAL_B
2209 || (type != WEAK_BN 2743 || (type != WEAK_BN
2210 && bidi_get_category (type) != NEUTRAL) 2744 && bidi_get_category (type) != NEUTRAL
2745 && !bidi_isolate_fmt_char (type))
2211 /* This is all per level run, so stop when we 2746 /* This is all per level run, so stop when we
2212 reach the end of this level run. */ 2747 reach the end of this level run. */
2213 || (bidi_it->level_stack[bidi_it->stack_idx].level 2748 || (bidi_it->level_stack[bidi_it->stack_idx].level
2214 != current_level))); 2749 != current_level)));
2215 2750
2216 bidi_remember_char (&saved_it.next_for_neutral, bidi_it); 2751 /* Record the character we stopped at. */
2752 bidi_remember_char (&saved_it.next_for_neutral, bidi_it, 1);
2217 2753
2218 switch (type) 2754 if ((bidi_it->level_stack[bidi_it->stack_idx].level != current_level)
2755 || type == NEUTRAL_B)
2219 { 2756 {
2220 case STRONG_L: 2757 /* Marched all the way to the end of this level run. We
2221 case STRONG_R: 2758 need to use the eos type, whose information is stored
2222 case STRONG_AL: 2759 by bidi_set_sos_type in the prev_for_neutral
2223 /* Actually, STRONG_AL cannot happen here, because 2760 member. */
2224 bidi_resolve_weak converts it to STRONG_R, per W3. */ 2761 if (adjacent_to_neutrals)
2225 eassert (type != STRONG_AL); 2762 next_type = bidi_it->prev_for_neutral.type;
2226 next_type = type; 2763 else
2227 break; 2764 {
2228 case WEAK_EN: 2765 /* This is a BN which does not adjoin neutrals.
2229 case WEAK_AN: 2766 Leave its type alone. */
2230 /* N1: ``European and Arabic numbers are treated as 2767 bidi_copy_it (bidi_it, &saved_it);
2231 though they were R.'' */ 2768 return bidi_it->type;
2232 next_type = STRONG_R; 2769 }
2233 break; 2770 }
2234 case WEAK_BN: 2771 else
2235 case NEUTRAL_ON: /* W6/Retaining */ 2772 {
2236 if (!bidi_explicit_dir_char (bidi_it->ch)) 2773 switch (type)
2237 emacs_abort (); /* can't happen: BNs are skipped */ 2774 {
2238 /* FALLTHROUGH */ 2775 case STRONG_L:
2239 case NEUTRAL_B: 2776 case STRONG_R:
2240 /* Marched all the way to the end of this level run. 2777 case STRONG_AL:
2241 We need to use the eor type, whose information is 2778 /* Actually, STRONG_AL cannot happen here, because
2242 stored by bidi_set_sor_type in the prev_for_neutral 2779 bidi_resolve_weak converts it to STRONG_R, per W3. */
2243 member. */ 2780 eassert (type != STRONG_AL);
2244 if (saved_it.type != WEAK_BN 2781 next_type = type;
2245 || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL) 2782 break;
2246 next_type = bidi_it->prev_for_neutral.type; 2783 case WEAK_EN:
2247 else 2784 case WEAK_AN:
2248 { 2785 /* N1: "European and Arabic numbers act as if they
2249 /* This is a BN which does not adjoin neutrals. 2786 were R in terms of their influence on NIs." */
2250 Leave its type alone. */ 2787 next_type = STRONG_R;
2251 bidi_copy_it (bidi_it, &saved_it); 2788 break;
2252 return bidi_it->type; 2789 default:
2253 } 2790 emacs_abort ();
2254 break; 2791 break;
2255 default: 2792 }
2256 emacs_abort ();
2257 } 2793 }
2794 /* Resolve the type of all the NIs found during the above loop. */
2258 type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type, 2795 type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
2259 next_type, current_level); 2796 next_type, current_level);
2797 /* Update next_for_neutral with the resolved type, so we
2798 could use it for all the other NIs up to the place where
2799 we exited the loop. */
2260 saved_it.next_for_neutral.type = next_type; 2800 saved_it.next_for_neutral.type = next_type;
2801 bidi_check_type (type);
2802 /* Update the character which caused us to enter the above loop. */
2261 saved_it.type = type; 2803 saved_it.type = type;
2262 bidi_check_type (next_type); 2804 bidi_check_type (next_type);
2263 bidi_check_type (type);
2264 bidi_copy_it (bidi_it, &saved_it); 2805 bidi_copy_it (bidi_it, &saved_it);
2265 } 2806 }
2266 } 2807 }
@@ -2280,14 +2821,6 @@ bidi_type_of_next_char (struct bidi_it *bidi_it)
2280 if (bidi_it->scan_dir != 1) 2821 if (bidi_it->scan_dir != 1)
2281 emacs_abort (); 2822 emacs_abort ();
2282 2823
2283 /* Reset the limit until which to ignore BNs if we step out of the
2284 area where we found only empty levels. */
2285 if ((bidi_it->ignore_bn_limit > -1
2286 && bidi_it->ignore_bn_limit <= bidi_it->charpos)
2287 || (bidi_it->ignore_bn_limit == -2
2288 && !bidi_explicit_dir_char (bidi_it->ch)))
2289 bidi_it->ignore_bn_limit = -1;
2290
2291 type = bidi_resolve_neutral (bidi_it); 2824 type = bidi_resolve_neutral (bidi_it);
2292 2825
2293 return type; 2826 return type;
@@ -2300,9 +2833,8 @@ bidi_type_of_next_char (struct bidi_it *bidi_it)
2300static int 2833static int
2301bidi_level_of_next_char (struct bidi_it *bidi_it) 2834bidi_level_of_next_char (struct bidi_it *bidi_it)
2302{ 2835{
2303 bidi_type_t type; 2836 bidi_type_t type = UNKNOWN_BT;
2304 int level, prev_level = -1; 2837 int level;
2305 struct bidi_saved_info next_for_neutral;
2306 ptrdiff_t next_char_pos = -2; 2838 ptrdiff_t next_char_pos = -2;
2307 2839
2308 if (bidi_it->scan_dir == 1) 2840 if (bidi_it->scan_dir == 1)
@@ -2311,46 +2843,14 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
2311 = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring)) 2843 = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2312 ? bidi_it->string.schars : ZV); 2844 ? bidi_it->string.schars : ZV);
2313 2845
2314 /* There's no sense in trying to advance if we hit end of text. */ 2846 /* There's no sense in trying to advance if we've already hit
2847 the end of text. */
2315 if (bidi_it->charpos >= eob) 2848 if (bidi_it->charpos >= eob)
2316 return bidi_it->resolved_level;
2317
2318 /* Record the info about the previous character. */
2319 if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
2320 && bidi_it->type != WEAK_BN)
2321 bidi_remember_char (&bidi_it->prev, bidi_it);
2322 if (bidi_it->type_after_w1 == STRONG_R
2323 || bidi_it->type_after_w1 == STRONG_L
2324 || bidi_it->type_after_w1 == STRONG_AL)
2325 bidi_remember_char (&bidi_it->last_strong, bidi_it);
2326 /* FIXME: it sounds like we don't need both prev and
2327 prev_for_neutral members, but I'm leaving them both for now. */
2328 if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
2329 || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
2330 bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
2331
2332 /* If we overstepped the characters used for resolving neutrals
2333 and whitespace, invalidate their info in the iterator. */
2334 if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
2335 bidi_it->next_for_neutral.type = UNKNOWN_BT;
2336 if (bidi_it->next_en_pos >= 0
2337 && bidi_it->charpos >= bidi_it->next_en_pos)
2338 { 2849 {
2339 bidi_it->next_en_pos = 0; 2850 eassert (bidi_it->resolved_level >= 0);
2340 bidi_it->next_en_type = UNKNOWN_BT; 2851 return bidi_it->resolved_level;
2341 } 2852 }
2342 if (bidi_it->next_for_ws.type != UNKNOWN_BT
2343 && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
2344 bidi_it->next_for_ws.type = UNKNOWN_BT;
2345
2346 /* This must be taken before we fill the iterator with the info
2347 about the next char. If we scan backwards, the iterator
2348 state must be already cached, so there's no need to know the
2349 embedding level of the previous character, since we will be
2350 returning to our caller shortly. */
2351 prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2352 } 2853 }
2353 next_for_neutral = bidi_it->next_for_neutral;
2354 2854
2355 /* Perhaps the character we want is already cached. If it is, the 2855 /* Perhaps the character we want is already cached. If it is, the
2356 call to bidi_cache_find below will return a type other than 2856 call to bidi_cache_find below will return a type other than
@@ -2359,6 +2859,10 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
2359 { 2859 {
2360 int bob = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring)) 2860 int bob = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2361 ? 0 : 1); 2861 ? 0 : 1);
2862 bidi_type_t prev_type = bidi_it->type;
2863 bidi_type_t type_for_neutral = bidi_it->next_for_neutral.type;
2864 ptrdiff_t pos_for_neutral = bidi_it->next_for_neutral.charpos;
2865
2362 if (bidi_it->scan_dir > 0) 2866 if (bidi_it->scan_dir > 0)
2363 { 2867 {
2364 if (bidi_it->nchars <= 0) 2868 if (bidi_it->nchars <= 0)
@@ -2372,29 +2876,60 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
2372 cached at the beginning of the iteration. */ 2876 cached at the beginning of the iteration. */
2373 next_char_pos = bidi_it->charpos - 1; 2877 next_char_pos = bidi_it->charpos - 1;
2374 if (next_char_pos >= bob - 1) 2878 if (next_char_pos >= bob - 1)
2375 type = bidi_cache_find (next_char_pos, -1, bidi_it); 2879 type = bidi_cache_find (next_char_pos, 0, bidi_it);
2376 else 2880
2377 type = UNKNOWN_BT; 2881 /* For a sequence of BN and NI, copy the type from the previous
2378 } 2882 character. This is because the loop in bidi_resolve_neutral
2379 else 2883 that handles such sequences caches the characters it
2380 type = UNKNOWN_BT; 2884 traverses, but does not (and cannot) store the
2381 if (type != UNKNOWN_BT) 2885 next_for_neutral member for them, because it is only known
2382 { 2886 when the loop ends. So when we find them in the cache, their
2383 /* Don't lose the information for resolving neutrals! The 2887 type needs to be updated, but we don't have next_for_neutral
2384 cached states could have been cached before their 2888 to do that. However, whatever type is resolved as result of
2385 next_for_neutral member was computed. If we are on our way 2889 that loop, it will be the same for all the traversed
2386 forward, we can simply take the info from the previous 2890 characters, by virtue of N1 and N2. */
2387 state. */ 2891 if (type == WEAK_BN && bidi_it->scan_dir > 0
2388 if (bidi_it->scan_dir == 1 2892 && bidi_explicit_dir_char (bidi_it->ch)
2389 && bidi_it->next_for_neutral.type == UNKNOWN_BT) 2893 && type_for_neutral != UNKNOWN_BT
2390 bidi_it->next_for_neutral = next_for_neutral; 2894 && bidi_it->charpos < pos_for_neutral)
2391 2895 {
2392 /* If resolved_level is -1, it means this state was cached 2896 type = prev_type;
2393 before it was completely resolved, so we cannot return 2897 eassert (type != UNKNOWN_BT);
2394 it. */ 2898 }
2395 if (bidi_it->resolved_level != -1) 2899 if (type != UNKNOWN_BT)
2396 return bidi_it->resolved_level; 2900 {
2901 /* If resolved_level is -1, it means this state was cached
2902 before it was completely resolved, so we cannot return
2903 it. */
2904 if (bidi_it->resolved_level != -1)
2905 {
2906 eassert (bidi_it->resolved_level >= 0);
2907 return bidi_it->resolved_level;
2908 }
2909 else
2910 {
2911 level = bidi_it->level_stack[bidi_it->stack_idx].level;
2912 if (bidi_get_category (type) == NEUTRAL
2913 || bidi_isolate_fmt_char (type))
2914 {
2915 /* Make sure the data for resolving neutrals we are
2916 about to use is valid. */
2917 if (bidi_it->next_for_neutral.charpos < bidi_it->charpos
2918 /* PDI defines an eos, so it's OK for it to
2919 serve as its own next_for_neutral. */
2920 || (bidi_it->next_for_neutral.charpos == bidi_it->charpos
2921 && bidi_it->type != PDI)
2922 || bidi_it->next_for_neutral.type == UNKNOWN_BT)
2923 emacs_abort ();
2924
2925 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2926 bidi_it->next_for_neutral.type,
2927 level);
2928 }
2929 }
2930 }
2397 } 2931 }
2932
2398 if (bidi_it->scan_dir == -1) 2933 if (bidi_it->scan_dir == -1)
2399 /* If we are going backwards, the iterator state is already cached 2934 /* If we are going backwards, the iterator state is already cached
2400 from previous scans, and should be fully resolved. */ 2935 from previous scans, and should be fully resolved. */
@@ -2404,36 +2939,27 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
2404 type = bidi_type_of_next_char (bidi_it); 2939 type = bidi_type_of_next_char (bidi_it);
2405 2940
2406 if (type == NEUTRAL_B) 2941 if (type == NEUTRAL_B)
2407 return bidi_it->resolved_level;
2408
2409 level = bidi_it->level_stack[bidi_it->stack_idx].level;
2410 if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
2411 || (type == WEAK_BN && prev_level == level))
2412 { 2942 {
2413 if (bidi_it->next_for_neutral.type == UNKNOWN_BT) 2943 eassert (bidi_it->resolved_level >= 0);
2414 emacs_abort (); 2944 return bidi_it->resolved_level;
2415
2416 /* If the cached state shows a neutral character, it was not
2417 resolved by bidi_resolve_neutral, so do it now. */
2418 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2419 bidi_it->next_for_neutral.type,
2420 level);
2421 } 2945 }
2422 2946
2423 if (!(type == STRONG_R 2947 level = bidi_it->level_stack[bidi_it->stack_idx].level;
2424 || type == STRONG_L 2948
2425 || type == WEAK_BN 2949 eassert ((type == STRONG_R
2426 || type == WEAK_EN 2950 || type == STRONG_L
2427 || type == WEAK_AN)) 2951 || type == WEAK_BN
2428 emacs_abort (); 2952 || type == WEAK_EN
2953 || type == WEAK_AN));
2429 bidi_it->type = type; 2954 bidi_it->type = type;
2430 bidi_check_type (bidi_it->type); 2955 bidi_check_type (bidi_it->type);
2431 2956
2432 /* For L1 below, we need to know, for each WS character, whether 2957 /* For L1 below, we need to know, for each WS character, whether
2433 it belongs to a sequence of WS characters preceding a newline 2958 it belongs to a sequence of WS characters preceding a newline
2434 or a TAB or a paragraph separator. */ 2959 or a TAB or a paragraph separator. */
2435 if (bidi_it->orig_type == NEUTRAL_WS 2960 if ((bidi_it->orig_type == NEUTRAL_WS
2436 && bidi_it->next_for_ws.type == UNKNOWN_BT) 2961 || bidi_isolate_fmt_char (bidi_it->orig_type))
2962 && bidi_it->next_for_ws.charpos < bidi_it->charpos)
2437 { 2963 {
2438 int ch; 2964 int ch;
2439 ptrdiff_t clen = bidi_it->ch_len; 2965 ptrdiff_t clen = bidi_it->ch_len;
@@ -2451,54 +2977,20 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
2451 do { 2977 do {
2452 ch = bidi_fetch_char (cpos += nc, bpos += clen, &disp_pos, &dpp, &bs, 2978 ch = bidi_fetch_char (cpos += nc, bpos += clen, &disp_pos, &dpp, &bs,
2453 bidi_it->w, fwp, &clen, &nc); 2979 bidi_it->w, fwp, &clen, &nc);
2454 if (ch == '\n' || ch == BIDI_EOB) 2980 chtype = bidi_get_type (ch, NEUTRAL_DIR);
2455 chtype = NEUTRAL_B;
2456 else
2457 chtype = bidi_get_type (ch, NEUTRAL_DIR);
2458 } while (chtype == NEUTRAL_WS || chtype == WEAK_BN 2981 } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
2982 || bidi_isolate_fmt_char (chtype)
2459 || bidi_explicit_dir_char (ch)); /* L1/Retaining */ 2983 || bidi_explicit_dir_char (ch)); /* L1/Retaining */
2460 bidi_it->next_for_ws.type = chtype; 2984 bidi_it->next_for_ws.type = chtype;
2461 bidi_check_type (bidi_it->next_for_ws.type); 2985 bidi_check_type (bidi_it->next_for_ws.type);
2462 bidi_it->next_for_ws.charpos = cpos; 2986 bidi_it->next_for_ws.charpos = cpos;
2463 bidi_it->next_for_ws.bytepos = bpos;
2464 } 2987 }
2465 2988
2466 /* Resolve implicit levels, with a twist: PDFs get the embedding 2989 /* Update the cache, but only if this state was already cached. */
2467 level of the embedding they terminate. See below for the 2990 bidi_cache_iterator_state (bidi_it, 1, 1);
2468 reason. */
2469 if (bidi_it->orig_type == PDF
2470 /* Don't do this if this formatting code didn't change the
2471 embedding level due to invalid or empty embeddings. */
2472 && prev_level != level)
2473 {
2474 /* Don't look in UAX#9 for the reason for this: it's our own
2475 private quirk. The reason is that we want the formatting
2476 codes to be delivered so that they bracket the text of their
2477 embedding. For example, given the text
2478
2479 {RLO}teST{PDF}
2480
2481 we want it to be displayed as
2482
2483 {PDF}STet{RLO}
2484 2991
2485 not as 2992 /* Resolve implicit levels. */
2486 2993 if (bidi_it->orig_type == NEUTRAL_B /* L1 */
2487 STet{RLO}{PDF}
2488
2489 which will result because we bump up the embedding level as
2490 soon as we see the RLO and pop it as soon as we see the PDF,
2491 so RLO itself has the same embedding level as "teST", and
2492 thus would be normally delivered last, just before the PDF.
2493 The switch below fiddles with the level of PDF so that this
2494 ugly side effect does not happen.
2495
2496 (This is, of course, only important if the formatting codes
2497 are actually displayed, but Emacs does need to display them
2498 if the user wants to.) */
2499 level = prev_level;
2500 }
2501 else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
2502 || bidi_it->orig_type == NEUTRAL_S 2994 || bidi_it->orig_type == NEUTRAL_S
2503 || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB 2995 || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
2504 || (bidi_it->orig_type == NEUTRAL_WS 2996 || (bidi_it->orig_type == NEUTRAL_WS
@@ -2560,10 +3052,10 @@ bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, bool end_flag)
2560 if (end_flag) 3052 if (end_flag)
2561 emacs_abort (); 3053 emacs_abort ();
2562 3054
2563 bidi_cache_iterator_state (bidi_it, 1); 3055 bidi_cache_iterator_state (bidi_it, 1, 0);
2564 do { 3056 do {
2565 new_level = bidi_level_of_next_char (bidi_it); 3057 new_level = bidi_level_of_next_char (bidi_it);
2566 bidi_cache_iterator_state (bidi_it, 1); 3058 bidi_cache_iterator_state (bidi_it, 1, 0);
2567 } while (new_level >= level); 3059 } while (new_level >= level);
2568 } 3060 }
2569} 3061}
@@ -2607,7 +3099,7 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
2607 sentinel.ch_len = 1; 3099 sentinel.ch_len = 1;
2608 sentinel.nchars = 1; 3100 sentinel.nchars = 1;
2609 } 3101 }
2610 bidi_cache_iterator_state (&sentinel, 1); 3102 bidi_cache_iterator_state (&sentinel, 1, 0);
2611 } 3103 }
2612 3104
2613 old_level = bidi_it->resolved_level; 3105 old_level = bidi_it->resolved_level;
@@ -2655,6 +3147,11 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
2655 in the cache, which at this point should not happen. If 3147 in the cache, which at this point should not happen. If
2656 it does, we will infloop. */ 3148 it does, we will infloop. */
2657 eassert (next_level >= 0); 3149 eassert (next_level >= 0);
3150 /* If next_level is not consistent with incr, we might
3151 infloop. */
3152 eassert (incr > 0
3153 ? next_level > expected_next_level
3154 : next_level < expected_next_level);
2658 expected_next_level += incr; 3155 expected_next_level += incr;
2659 level_to_search += incr; 3156 level_to_search += incr;
2660 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending); 3157 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
@@ -2717,9 +3214,12 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
2717 compromised: it assumes cached states correspond to buffer 3214 compromised: it assumes cached states correspond to buffer
2718 positions 1:1. */ 3215 positions 1:1. */
2719 else 3216 else
2720 bidi_cache_iterator_state (bidi_it, 1); 3217 bidi_cache_iterator_state (bidi_it, 1, 0);
2721 } 3218 }
2722 3219
3220 eassert (bidi_it->resolved_level >= 0
3221 && bidi_it->resolved_level <= BIDI_MAXDEPTH + 2);
3222
2723 if (STRINGP (bidi_it->string.lstring)) 3223 if (STRINGP (bidi_it->string.lstring))
2724 UNGCPRO; 3224 UNGCPRO;
2725} 3225}
diff --git a/src/dispextern.h b/src/dispextern.h
index 228502ca06a..0dd0887c7e6 100644
--- a/src/dispextern.h
+++ b/src/dispextern.h
@@ -445,8 +445,8 @@ struct glyph
445 /* True means don't display cursor here. */ 445 /* True means don't display cursor here. */
446 bool_bf avoid_cursor_p : 1; 446 bool_bf avoid_cursor_p : 1;
447 447
448 /* Resolved bidirectional level of this character [0..63]. */ 448 /* Resolved bidirectional level of this character [0..127]. */
449 unsigned resolved_level : 5; 449 unsigned resolved_level : 7;
450 450
451 /* Resolved bidirectional type of this character, see enum 451 /* Resolved bidirectional type of this character, see enum
452 bidi_type_t below. Note that according to UAX#9, only some 452 bidi_type_t below. Note that according to UAX#9, only some
@@ -1857,7 +1857,9 @@ GLYPH_CODE_P (Lisp_Object gc)
1857extern int face_change_count; 1857extern int face_change_count;
1858 1858
1859/* For reordering of bidirectional text. */ 1859/* For reordering of bidirectional text. */
1860#define BIDI_MAXLEVEL 64 1860
1861/* UAX#9's max_depth value. */
1862#define BIDI_MAXDEPTH 125
1861 1863
1862/* Data type for describing the bidirectional character types. The 1864/* Data type for describing the bidirectional character types. The
1863 first 7 must be at the beginning, because they are the only values 1865 first 7 must be at the beginning, because they are the only values
@@ -1894,23 +1896,39 @@ typedef enum {
1894 NEUTRAL_ON /* other neutrals */ 1896 NEUTRAL_ON /* other neutrals */
1895} bidi_type_t; 1897} bidi_type_t;
1896 1898
1899/* Data type for describing the Bidi Paired Bracket Type of a character.
1900
1901 The order of members must be in sync with the 8th element of the
1902 member of unidata-prop-alist (in admin/unidata/unidata-gen.el) for
1903 Unicode character property `bracket-type'. */
1904typedef enum {
1905 BIDI_BRACKET_NONE = 1,
1906 BIDI_BRACKET_OPEN,
1907 BIDI_BRACKET_CLOSE
1908} bidi_bracket_type_t;
1909
1897/* The basic directionality data type. */ 1910/* The basic directionality data type. */
1898typedef enum { NEUTRAL_DIR, L2R, R2L } bidi_dir_t; 1911typedef enum { NEUTRAL_DIR, L2R, R2L } bidi_dir_t;
1899 1912
1900/* Data type for storing information about characters we need to 1913/* Data type for storing information about characters we need to
1901 remember. */ 1914 remember. */
1902struct bidi_saved_info { 1915struct bidi_saved_info {
1903 ptrdiff_t bytepos, charpos; /* character's buffer position */ 1916 ptrdiff_t charpos; /* character's buffer position */
1904 bidi_type_t type; /* character's resolved bidi type */ 1917 bidi_type_t type; /* character's resolved bidi type */
1905 bidi_type_t type_after_w1; /* original type of the character, after W1 */ 1918 bidi_type_t orig_type; /* bidi type as we found it in the buffer */
1906 bidi_type_t orig_type; /* type as we found it in the buffer */
1907}; 1919};
1908 1920
1909/* Data type for keeping track of saved embedding levels and override 1921/* Data type for keeping track of information about saved embedding
1910 status information. */ 1922 levels, override status, isolate status, and isolating sequence
1923 runs. */
1911struct bidi_stack { 1924struct bidi_stack {
1912 int level; 1925 struct bidi_saved_info last_strong;
1913 bidi_dir_t override; 1926 struct bidi_saved_info next_for_neutral;
1927 struct bidi_saved_info prev_for_neutral;
1928 unsigned level : 7;
1929 bool_bf isolate_status : 1;
1930 unsigned override : 2;
1931 unsigned sos : 2;
1914}; 1932};
1915 1933
1916/* Data type for storing information about a string being iterated on. */ 1934/* Data type for storing information about a string being iterated on. */
@@ -1935,22 +1953,24 @@ struct bidi_it {
1935 ptrdiff_t nchars; /* its "length", usually 1; it's > 1 for a run 1953 ptrdiff_t nchars; /* its "length", usually 1; it's > 1 for a run
1936 of characters covered by a display string */ 1954 of characters covered by a display string */
1937 ptrdiff_t ch_len; /* its length in bytes */ 1955 ptrdiff_t ch_len; /* its length in bytes */
1938 bidi_type_t type; /* bidi type of this character, after 1956 bidi_type_t type; /* final bidi type of this character, after
1939 resolving weak and neutral types */ 1957 resolving weak and neutral types */
1940 bidi_type_t type_after_w1; /* original type, after overrides and W1 */ 1958 bidi_type_t type_after_wn; /* bidi type after overrides and Wn */
1941 bidi_type_t orig_type; /* original type, as found in the buffer */ 1959 bidi_type_t orig_type; /* original bidi type, as found in the buffer */
1942 int resolved_level; /* final resolved level of this character */ 1960 char resolved_level; /* final resolved level of this character */
1943 int invalid_levels; /* how many PDFs to ignore */ 1961 char isolate_level; /* count of isolate initiators unmatched by PDI */
1944 int invalid_rl_levels; /* how many PDFs from RLE/RLO to ignore */ 1962 ptrdiff_t invalid_levels; /* how many PDFs to ignore */
1963 ptrdiff_t invalid_isolates; /* how many PDIs to ignore */
1945 struct bidi_saved_info prev; /* info about previous character */ 1964 struct bidi_saved_info prev; /* info about previous character */
1946 struct bidi_saved_info last_strong; /* last-seen strong directional char */ 1965 struct bidi_saved_info last_strong; /* last-seen strong directional char */
1947 struct bidi_saved_info next_for_neutral; /* surrounding characters for... */ 1966 struct bidi_saved_info next_for_neutral; /* surrounding characters for... */
1948 struct bidi_saved_info prev_for_neutral; /* ...resolving neutrals */ 1967 struct bidi_saved_info prev_for_neutral; /* ...resolving neutrals */
1949 struct bidi_saved_info next_for_ws; /* character after sequence of ws */ 1968 struct bidi_saved_info next_for_ws; /* character after sequence of ws */
1969 ptrdiff_t bracket_pairing_pos; /* position of pairing bracket */
1970 bidi_type_t bracket_enclosed_type; /* type for bracket resolution */
1950 ptrdiff_t next_en_pos; /* pos. of next char for determining ET type */ 1971 ptrdiff_t next_en_pos; /* pos. of next char for determining ET type */
1951 bidi_type_t next_en_type; /* type of char at next_en_pos */ 1972 bidi_type_t next_en_type; /* type of char at next_en_pos */
1952 ptrdiff_t ignore_bn_limit; /* position until which to ignore BNs */ 1973 bidi_dir_t sos; /* direction of start-of-sequence in effect */
1953 bidi_dir_t sor; /* direction of start-of-run in effect */
1954 int scan_dir; /* direction of text scan, 1: forw, -1: back */ 1974 int scan_dir; /* direction of text scan, 1: forw, -1: back */
1955 ptrdiff_t disp_pos; /* position of display string after ch */ 1975 ptrdiff_t disp_pos; /* position of display string after ch */
1956 int disp_prop; /* if non-zero, there really is a 1976 int disp_prop; /* if non-zero, there really is a
@@ -1960,12 +1980,11 @@ struct bidi_it {
1960 /* Note: Everything from here on is not copied/saved when the bidi 1980 /* Note: Everything from here on is not copied/saved when the bidi
1961 iterator state is saved, pushed, or popped. So only put here 1981 iterator state is saved, pushed, or popped. So only put here
1962 stuff that is not part of the bidi iterator's state! */ 1982 stuff that is not part of the bidi iterator's state! */
1963 struct bidi_stack level_stack[BIDI_MAXLEVEL]; /* stack of embedding levels */ 1983 struct bidi_stack level_stack[BIDI_MAXDEPTH+2+1]; /* directional status stack */
1964 struct bidi_string_data string; /* string to reorder */ 1984 struct bidi_string_data string; /* string to reorder */
1965 struct window *w; /* the window being displayed */ 1985 struct window *w; /* the window being displayed */
1966 bidi_dir_t paragraph_dir; /* current paragraph direction */ 1986 bidi_dir_t paragraph_dir; /* current paragraph direction */
1967 ptrdiff_t separator_limit; /* where paragraph separator should end */ 1987 ptrdiff_t separator_limit; /* where paragraph separator should end */
1968 bool_bf prev_was_pdf : 1; /* if true, previous char was PDF */
1969 bool_bf first_elt : 1; /* if true, examine current char first */ 1988 bool_bf first_elt : 1; /* if true, examine current char first */
1970 bool_bf new_paragraph : 1; /* if true, we expect a new paragraph */ 1989 bool_bf new_paragraph : 1; /* if true, we expect a new paragraph */
1971 bool_bf frame_window_p : 1; /* true if displaying on a GUI frame */ 1990 bool_bf frame_window_p : 1; /* true if displaying on a GUI frame */
diff --git a/src/term.c b/src/term.c
index 0c36469f655..04f6e3318a0 100644
--- a/src/term.c
+++ b/src/term.c
@@ -1513,8 +1513,7 @@ append_glyph (struct it *it)
1513 if (it->bidi_p) 1513 if (it->bidi_p)
1514 { 1514 {
1515 glyph->resolved_level = it->bidi_it.resolved_level; 1515 glyph->resolved_level = it->bidi_it.resolved_level;
1516 if ((it->bidi_it.type & 7) != it->bidi_it.type) 1516 eassert ((it->bidi_it.type & 7) == it->bidi_it.type);
1517 emacs_abort ();
1518 glyph->bidi_type = it->bidi_it.type; 1517 glyph->bidi_type = it->bidi_it.type;
1519 } 1518 }
1520 else 1519 else
@@ -1710,8 +1709,7 @@ append_composite_glyph (struct it *it)
1710 if (it->bidi_p) 1709 if (it->bidi_p)
1711 { 1710 {
1712 glyph->resolved_level = it->bidi_it.resolved_level; 1711 glyph->resolved_level = it->bidi_it.resolved_level;
1713 if ((it->bidi_it.type & 7) != it->bidi_it.type) 1712 eassert ((it->bidi_it.type & 7) == it->bidi_it.type);
1714 emacs_abort ();
1715 glyph->bidi_type = it->bidi_it.type; 1713 glyph->bidi_type = it->bidi_it.type;
1716 } 1714 }
1717 else 1715 else
@@ -1795,8 +1793,7 @@ append_glyphless_glyph (struct it *it, int face_id, const char *str)
1795 if (it->bidi_p) 1793 if (it->bidi_p)
1796 { 1794 {
1797 glyph->resolved_level = it->bidi_it.resolved_level; 1795 glyph->resolved_level = it->bidi_it.resolved_level;
1798 if ((it->bidi_it.type & 7) != it->bidi_it.type) 1796 eassert ((it->bidi_it.type & 7) == it->bidi_it.type);
1799 emacs_abort ();
1800 glyph->bidi_type = it->bidi_it.type; 1797 glyph->bidi_type = it->bidi_it.type;
1801 } 1798 }
1802 else 1799 else
diff --git a/src/xdisp.c b/src/xdisp.c
index bf73de69669..7c3aaa2042e 100644
--- a/src/xdisp.c
+++ b/src/xdisp.c
@@ -6935,7 +6935,8 @@ get_next_display_element (struct it *it)
6935 is R..." */ 6935 is R..." */
6936 /* FIXME: Do we need an exception for characters from display 6936 /* FIXME: Do we need an exception for characters from display
6937 tables? */ 6937 tables? */
6938 if (it->bidi_p && it->bidi_it.type == STRONG_R) 6938 if (it->bidi_p && it->bidi_it.type == STRONG_R
6939 && !inhibit_bidi_mirroring)
6939 it->c = bidi_mirror_char (it->c); 6940 it->c = bidi_mirror_char (it->c);
6940 /* Map via display table or translate control characters. 6941 /* Map via display table or translate control characters.
6941 IT->c, IT->len etc. have been set to the next character by 6942 IT->c, IT->len etc. have been set to the next character by
@@ -21468,6 +21469,114 @@ Value is the new character position of point. */)
21468#undef ROW_GLYPH_NEWLINE_P 21469#undef ROW_GLYPH_NEWLINE_P
21469} 21470}
21470 21471
21472DEFUN ("bidi-resolved-levels", Fbidi_resolved_levels,
21473 Sbidi_resolved_levels, 0, 1, 0,
21474 doc: /* Return the resolved bidirectional levels of characters at VPOS.
21475
21476The resolved levels are produced by the Emacs bidi reordering engine
21477that implements the UBA, the Unicode Bidirectional Algorithm. Please
21478read the Unicode Standard Annex 9 (UAX#9) for background information
21479about these levels.
21480
21481VPOS is the zero-based number of the current window's screen line
21482for which to produce the resolved levels. If VPOS is nil or omitted,
21483it defaults to the screen line of point. If the window displays a
21484header line, VPOS of zero will report on the header line, and first
21485line of text in the window will have VPOS of 1.
21486
21487Value is an array of resolved levels, indexed by glyph number.
21488Glyphs are numbered from zero starting from the beginning of the
21489screen line, i.e. the left edge of the window for left-to-right lines
21490and from the right edge for right-to-left lines. The resolved levels
21491are produced only for the window's text area; text in display margins
21492is not included.
21493
21494If the selected window's display is not up-to-date, or if the specified
21495screen line does not display text, this function returns nil. It is
21496highly recommended to bind this function to some simple key, like F8,
21497in order to avoid these problems.
21498
21499This function exists mainly for testing the correctness of the
21500Emacs UBA implementation, in particular with the test suite. */)
21501 (Lisp_Object vpos)
21502{
21503 struct window *w = XWINDOW (selected_window);
21504 struct buffer *b = XBUFFER (w->contents);
21505 int nrow;
21506 struct glyph_row *row;
21507
21508 if (NILP (vpos))
21509 {
21510 int d1, d2, d3, d4, d5;
21511
21512 pos_visible_p (w, PT, &d1, &d2, &d3, &d4, &d5, &nrow);
21513 }
21514 else
21515 {
21516 CHECK_NUMBER_COERCE_MARKER (vpos);
21517 nrow = XINT (vpos);
21518 }
21519
21520 /* We require up-to-date glyph matrix for this window. */
21521 if (w->window_end_valid
21522 && !windows_or_buffers_changed
21523 && b
21524 && !b->clip_changed
21525 && !b->prevent_redisplay_optimizations_p
21526 && !window_outdated (w)
21527 && nrow >= 0
21528 && nrow < w->current_matrix->nrows
21529 && (row = MATRIX_ROW (w->current_matrix, nrow))->enabled_p
21530 && MATRIX_ROW_DISPLAYS_TEXT_P (row))
21531 {
21532 struct glyph *g, *e, *g1;
21533 int nglyphs, i;
21534 Lisp_Object levels;
21535
21536 if (!row->reversed_p) /* Left-to-right glyph row. */
21537 {
21538 g = g1 = row->glyphs[TEXT_AREA];
21539 e = g + row->used[TEXT_AREA];
21540
21541 /* Skip over glyphs at the start of the row that was
21542 generated by redisplay for its own needs. */
21543 while (g < e
21544 && INTEGERP (g->object)
21545 && g->charpos < 0)
21546 g++;
21547 g1 = g;
21548
21549 /* Count the "interesting" glyphs in this row. */
21550 for (nglyphs = 0; g < e && !INTEGERP (g->object); g++)
21551 nglyphs++;
21552
21553 /* Create and fill the array. */
21554 levels = make_uninit_vector (nglyphs);
21555 for (i = 0; g1 < g; i++, g1++)
21556 ASET (levels, i, make_number (g1->resolved_level));
21557 }
21558 else /* Right-to-left glyph row. */
21559 {
21560 g = row->glyphs[TEXT_AREA] + row->used[TEXT_AREA] - 1;
21561 e = row->glyphs[TEXT_AREA] - 1;
21562 while (g > e
21563 && INTEGERP (g->object)
21564 && g->charpos < 0)
21565 g--;
21566 g1 = g;
21567 for (nglyphs = 0; g > e && !INTEGERP (g->object); g--)
21568 nglyphs++;
21569 levels = make_uninit_vector (nglyphs);
21570 for (i = 0; g1 > g; i++, g1--)
21571 ASET (levels, i, make_number (g1->resolved_level));
21572 }
21573 return levels;
21574 }
21575 else
21576 return Qnil;
21577}
21578
21579
21471 21580
21472/*********************************************************************** 21581/***********************************************************************
21473 Menu Bar 21582 Menu Bar
@@ -25198,8 +25307,7 @@ append_glyph (struct it *it)
25198 if (it->bidi_p) 25307 if (it->bidi_p)
25199 { 25308 {
25200 glyph->resolved_level = it->bidi_it.resolved_level; 25309 glyph->resolved_level = it->bidi_it.resolved_level;
25201 if ((it->bidi_it.type & 7) != it->bidi_it.type) 25310 eassert ((it->bidi_it.type & 7) == it->bidi_it.type);
25202 emacs_abort ();
25203 glyph->bidi_type = it->bidi_it.type; 25311 glyph->bidi_type = it->bidi_it.type;
25204 } 25312 }
25205 else 25313 else
@@ -25282,8 +25390,7 @@ append_composite_glyph (struct it *it)
25282 if (it->bidi_p) 25390 if (it->bidi_p)
25283 { 25391 {
25284 glyph->resolved_level = it->bidi_it.resolved_level; 25392 glyph->resolved_level = it->bidi_it.resolved_level;
25285 if ((it->bidi_it.type & 7) != it->bidi_it.type) 25393 eassert ((it->bidi_it.type & 7) == it->bidi_it.type);
25286 emacs_abort ();
25287 glyph->bidi_type = it->bidi_it.type; 25394 glyph->bidi_type = it->bidi_it.type;
25288 } 25395 }
25289 ++it->glyph_row->used[area]; 25396 ++it->glyph_row->used[area];
@@ -25471,8 +25578,7 @@ produce_image_glyph (struct it *it)
25471 if (it->bidi_p) 25578 if (it->bidi_p)
25472 { 25579 {
25473 glyph->resolved_level = it->bidi_it.resolved_level; 25580 glyph->resolved_level = it->bidi_it.resolved_level;
25474 if ((it->bidi_it.type & 7) != it->bidi_it.type) 25581 eassert ((it->bidi_it.type & 7) == it->bidi_it.type);
25475 emacs_abort ();
25476 glyph->bidi_type = it->bidi_it.type; 25582 glyph->bidi_type = it->bidi_it.type;
25477 } 25583 }
25478 ++it->glyph_row->used[area]; 25584 ++it->glyph_row->used[area];
@@ -25560,8 +25666,7 @@ append_stretch_glyph (struct it *it, Lisp_Object object,
25560 if (it->bidi_p) 25666 if (it->bidi_p)
25561 { 25667 {
25562 glyph->resolved_level = it->bidi_it.resolved_level; 25668 glyph->resolved_level = it->bidi_it.resolved_level;
25563 if ((it->bidi_it.type & 7) != it->bidi_it.type) 25669 eassert ((it->bidi_it.type & 7) == it->bidi_it.type);
25564 emacs_abort ();
25565 glyph->bidi_type = it->bidi_it.type; 25670 glyph->bidi_type = it->bidi_it.type;
25566 } 25671 }
25567 else 25672 else
@@ -26020,8 +26125,7 @@ append_glyphless_glyph (struct it *it, int face_id, int for_no_font, int len,
26020 if (it->bidi_p) 26125 if (it->bidi_p)
26021 { 26126 {
26022 glyph->resolved_level = it->bidi_it.resolved_level; 26127 glyph->resolved_level = it->bidi_it.resolved_level;
26023 if ((it->bidi_it.type & 7) != it->bidi_it.type) 26128 eassert ((it->bidi_it.type & 7) == it->bidi_it.type);
26024 emacs_abort ();
26025 glyph->bidi_type = it->bidi_it.type; 26129 glyph->bidi_type = it->bidi_it.type;
26026 } 26130 }
26027 ++it->glyph_row->used[area]; 26131 ++it->glyph_row->used[area];
@@ -30437,6 +30541,7 @@ syms_of_xdisp (void)
30437 30541
30438 DEFSYM (Qright_to_left, "right-to-left"); 30542 DEFSYM (Qright_to_left, "right-to-left");
30439 DEFSYM (Qleft_to_right, "left-to-right"); 30543 DEFSYM (Qleft_to_right, "left-to-right");
30544 defsubr (&Sbidi_resolved_levels);
30440 30545
30441#ifdef HAVE_WINDOW_SYSTEM 30546#ifdef HAVE_WINDOW_SYSTEM
30442 DEFVAR_BOOL ("x-stretch-cursor", x_stretch_cursor_p, 30547 DEFVAR_BOOL ("x-stretch-cursor", x_stretch_cursor_p,
@@ -30843,6 +30948,12 @@ To add a prefix to continuation lines, use `wrap-prefix'. */);
30843 doc: /* Non-nil means don't free realized faces. Internal use only. */); 30948 doc: /* Non-nil means don't free realized faces. Internal use only. */);
30844 inhibit_free_realized_faces = 0; 30949 inhibit_free_realized_faces = 0;
30845 30950
30951 DEFVAR_BOOL ("inhibit-bidi-mirroring", inhibit_bidi_mirroring,
30952 doc: /* Non-nil means don't mirror characters even when bidi context requires that.
30953Intended for use during debugging and for testing bidi display;
30954see biditest.el in the test suite. */);
30955 inhibit_bidi_mirroring = 0;
30956
30846#ifdef GLYPH_DEBUG 30957#ifdef GLYPH_DEBUG
30847 DEFVAR_BOOL ("inhibit-try-window-id", inhibit_try_window_id, 30958 DEFVAR_BOOL ("inhibit-try-window-id", inhibit_try_window_id,
30848 doc: /* Inhibit try_window_id display optimization. */); 30959 doc: /* Inhibit try_window_id display optimization. */);