diff options
| author | Eli Zaretskii | 2014-10-18 15:47:57 +0300 |
|---|---|---|
| committer | Eli Zaretskii | 2014-10-18 15:47:57 +0300 |
| commit | 6b247d287327777dfd29e20eac177c2005e99b45 (patch) | |
| tree | f7581b4e96b9ba0f2f475f15c888b718d0050a6d /src | |
| parent | 605cfb8b7a4ef8f73ddc8f2de5c086f3a7455971 (diff) | |
| parent | b5e71861a3b15de7651be4524f38337aa451bfd7 (diff) | |
| download | emacs-6b247d287327777dfd29e20eac177c2005e99b45.tar.gz emacs-6b247d287327777dfd29e20eac177c2005e99b45.zip | |
Fix bidi reordering of bracket characters in isolates.
src/bidi.c (bidi_cache_find): Rename the argument NEUTRALS_OK to
RESOLVED_ONLY; when non-zero, return from the cache only fully
resolved states. All callers changed.
(CANONICAL_EQU): New macro.
(PUSH_BPA_STACK): Use it to push onto the BPA stack the canonical
equivalent of the paired closing bracket character.
(bidi_find_bracket_pairs): Set the bracket_pairing_pos member to
the default non-negative value, to be checked later in
bidi_resolve_brackets. Use CANONICAL_EQU to test candidate
characters against those pushed onto the BPA stack.
(bidi_record_type_for_neutral): New function.
(bidi_resolve_brackets): Record next_for_neutral and
prev_for_neutral when embedding level gets pushed. Force
resolution of bracket pairs when entering a level run that was not
yet BPA-resolved.
(bidi_resolve_neutral): Add assertions before calling
bidi_resolve_neutral_1.
(bidi_level_of_next_char): Remove the code that attempted to
resolve unresolved neutrals; that is now done by
bidi_resolve_neutral.
Diffstat (limited to 'src')
| -rw-r--r-- | src/ChangeLog | 22 | ||||
| -rw-r--r-- | src/bidi.c | 262 |
2 files changed, 176 insertions, 108 deletions
diff --git a/src/ChangeLog b/src/ChangeLog index 026ae46299d..66306bd5fb6 100644 --- a/src/ChangeLog +++ b/src/ChangeLog | |||
| @@ -1,5 +1,27 @@ | |||
| 1 | 2014-10-18 Eli Zaretskii <eliz@gnu.org> | 1 | 2014-10-18 Eli Zaretskii <eliz@gnu.org> |
| 2 | 2 | ||
| 3 | Fix reordering of bracket characters in isolates. | ||
| 4 | * bidi.c (bidi_cache_find): Rename the argument NEUTRALS_OK to | ||
| 5 | RESOLVED_ONLY; when non-zero, return from the cache only fully | ||
| 6 | resolved states. All callers changed. | ||
| 7 | (CANONICAL_EQU): New macro. | ||
| 8 | (PUSH_BPA_STACK): Use it to push onto the BPA stack the canonical | ||
| 9 | equivalent of the paired closing bracket character. | ||
| 10 | (bidi_find_bracket_pairs): Set the bracket_pairing_pos member to | ||
| 11 | the default non-negative value, to be checked later in | ||
| 12 | bidi_resolve_brackets. Use CANONICAL_EQU to test candidate | ||
| 13 | characters against those pushed onto the BPA stack. | ||
| 14 | (bidi_record_type_for_neutral): New function. | ||
| 15 | (bidi_resolve_brackets): Record next_for_neutral and | ||
| 16 | prev_for_neutral when embedding level gets pushed. Force | ||
| 17 | resolution of bracket pairs when entering a level run that was not | ||
| 18 | yet BPA-resolved. | ||
| 19 | (bidi_resolve_neutral): Add assertions before calling | ||
| 20 | bidi_resolve_neutral_1. | ||
| 21 | (bidi_level_of_next_char): Remove the code that attempted to | ||
| 22 | resolve unresolved neutrals; that is now done by | ||
| 23 | bidi_resolve_neutral. | ||
| 24 | |||
| 3 | * w32select.c (owner_callback): Mark with ALIGN_STACK attribute. | 25 | * w32select.c (owner_callback): Mark with ALIGN_STACK attribute. |
| 4 | 26 | ||
| 5 | 2014-10-17 Eli Zaretskii <eliz@gnu.org> | 27 | 2014-10-17 Eli Zaretskii <eliz@gnu.org> |
diff --git a/src/bidi.c b/src/bidi.c index bbafc785e7b..59fade3f785 100644 --- a/src/bidi.c +++ b/src/bidi.c | |||
| @@ -800,26 +800,22 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved, | |||
| 800 | 800 | ||
| 801 | /* Look for a cached iterator state that corresponds to CHARPOS. If | 801 | /* Look for a cached iterator state that corresponds to CHARPOS. If |
| 802 | found, copy the cached state into BIDI_IT and return the type of | 802 | found, copy the cached state into BIDI_IT and return the type of |
| 803 | the cached entry. If not found, return UNKNOWN_BT. NEUTRALS_OK | 803 | the cached entry. If not found, return UNKNOWN_BT. RESOLVED_ONLY |
| 804 | non-zero means it is OK to return cached state for neutral | 804 | zero means it is OK to return cached states tyhat were not fully |
| 805 | characters that have no valid next_for_neutral member, and | 805 | resolved yet. This can happen if the state was cached before it |
| 806 | therefore cannot be resolved. This can happen if the state was | 806 | was resolved in bidi_resolve_neutral. */ |
| 807 | cached before it was resolved in bidi_resolve_neutral. */ | ||
| 808 | static bidi_type_t | 807 | static bidi_type_t |
| 809 | bidi_cache_find (ptrdiff_t charpos, bool neutrals_ok, struct bidi_it *bidi_it) | 808 | bidi_cache_find (ptrdiff_t charpos, bool resolved_only, struct bidi_it *bidi_it) |
| 810 | { | 809 | { |
| 811 | ptrdiff_t i = bidi_cache_search (charpos, -1, bidi_it->scan_dir); | 810 | ptrdiff_t i = bidi_cache_search (charpos, -1, bidi_it->scan_dir); |
| 812 | 811 | ||
| 813 | if (i >= bidi_cache_start | 812 | if (i >= bidi_cache_start |
| 814 | && (neutrals_ok | 813 | && (!resolved_only |
| 815 | /* Callers that don't want to resolve neutrals (and set | 814 | /* Callers that want only fully resolved states (and set |
| 816 | neutrals_ok = false) need to be sure that there's enough | 815 | resolved_only = true) need to be sure that there's enough |
| 817 | info in the cached state to resolve the neutrals and | 816 | info in the cached state to return the state as final, |
| 818 | isolates, and if not, they don't want the cached state. */ | 817 | and if not, they don't want the cached state. */ |
| 819 | || !(bidi_cache[i].resolved_level == -1 | 818 | || bidi_cache[i].resolved_level >= 0)) |
| 820 | && (bidi_get_category (bidi_cache[i].type) == NEUTRAL | ||
| 821 | || bidi_isolate_fmt_char (bidi_cache[i].type)) | ||
| 822 | && bidi_cache[i].next_for_neutral.type == UNKNOWN_BT))) | ||
| 823 | { | 819 | { |
| 824 | bidi_dir_t current_scan_dir = bidi_it->scan_dir; | 820 | bidi_dir_t current_scan_dir = bidi_it->scan_dir; |
| 825 | 821 | ||
| @@ -2342,6 +2338,41 @@ typedef struct bpa_stack_entry { | |||
| 2342 | BPA stack, which should be more than enough for actual bidi text. */ | 2338 | BPA stack, which should be more than enough for actual bidi text. */ |
| 2343 | #define MAX_BPA_STACK (max (MAX_ALLOCA / sizeof (bpa_stack_entry), 1)) | 2339 | #define MAX_BPA_STACK (max (MAX_ALLOCA / sizeof (bpa_stack_entry), 1)) |
| 2344 | 2340 | ||
| 2341 | /* UAX#9 says to match opening brackets with the matching closing | ||
| 2342 | brackets or their canonical equivalents. As of Unicode 7.0, there | ||
| 2343 | are only 2 bracket characters that have canonical equivalence | ||
| 2344 | decompositions: u+2329 and u+232A. So instead of accessing the | ||
| 2345 | table in uni-decomposition.el, we just handle these 2 characters | ||
| 2346 | with this simple macro. Note that ASCII characters don't have | ||
| 2347 | canonical equivalents by definition. */ | ||
| 2348 | |||
| 2349 | /* To find all the characters that need to be processed by | ||
| 2350 | CANONICAL_EQU, first find all the characters which have | ||
| 2351 | decompositions in UnicodeData.txt, with this Awk script: | ||
| 2352 | |||
| 2353 | awk -F ";" " {if ($6 != \"\") print $1, $6}" UnicodeData.txt | ||
| 2354 | |||
| 2355 | Then produce a list of all the bracket characters in BidiBrackets.txt: | ||
| 2356 | |||
| 2357 | awk -F "[ ;]" " {if ($1 != \"#\" && $1 != \"\") print $1}" BidiBrackets.txt | ||
| 2358 | |||
| 2359 | And finally, cross-reference these two: | ||
| 2360 | |||
| 2361 | fgrep -w -f brackets.txt decompositions.txt | ||
| 2362 | |||
| 2363 | where "decompositions.txt" was produced by the 1st script, and | ||
| 2364 | "brackets.txt" by the 2nd script. In the output of fgrep, look | ||
| 2365 | only for decompositions that don't begin with some compatibility | ||
| 2366 | formatting tag, such as "<compat>". Only decompositions that | ||
| 2367 | consist solely of character codepoints are relevant to bidi | ||
| 2368 | brackets processing. */ | ||
| 2369 | |||
| 2370 | #define CANONICAL_EQU(c) \ | ||
| 2371 | ( ASCII_CHAR_P (c) ? c \ | ||
| 2372 | : (c) == 0x2329 ? 0x3008 \ | ||
| 2373 | : (c) == 0x232a ? 0x3009 \ | ||
| 2374 | : c ) | ||
| 2375 | |||
| 2345 | #ifdef ENABLE_CHECKING | 2376 | #ifdef ENABLE_CHECKING |
| 2346 | # define STORE_BRACKET_CHARPOS \ | 2377 | # define STORE_BRACKET_CHARPOS \ |
| 2347 | bpa_stack[bpa_sp].open_bracket_pos = bidi_it->charpos | 2378 | bpa_stack[bpa_sp].open_bracket_pos = bidi_it->charpos |
| @@ -2351,16 +2382,18 @@ typedef struct bpa_stack_entry { | |||
| 2351 | 2382 | ||
| 2352 | #define PUSH_BPA_STACK \ | 2383 | #define PUSH_BPA_STACK \ |
| 2353 | do { \ | 2384 | do { \ |
| 2354 | bpa_sp++; \ | 2385 | int ch; \ |
| 2355 | if (bpa_sp >= MAX_BPA_STACK) \ | 2386 | bpa_sp++; \ |
| 2356 | { \ | 2387 | if (bpa_sp >= MAX_BPA_STACK) \ |
| 2357 | bpa_sp = MAX_BPA_STACK - 1; \ | 2388 | { \ |
| 2358 | goto bpa_give_up; \ | 2389 | bpa_sp = MAX_BPA_STACK - 1; \ |
| 2359 | } \ | 2390 | goto bpa_give_up; \ |
| 2360 | bpa_stack[bpa_sp].close_bracket_char = bidi_mirror_char (bidi_it->ch); \ | 2391 | } \ |
| 2361 | bpa_stack[bpa_sp].open_bracket_idx = bidi_cache_last_idx; \ | 2392 | ch = CANONICAL_EQU (bidi_it->ch); \ |
| 2362 | bpa_stack[bpa_sp].flags = 0; \ | 2393 | bpa_stack[bpa_sp].close_bracket_char = bidi_mirror_char (ch); \ |
| 2363 | STORE_BRACKET_CHARPOS; \ | 2394 | bpa_stack[bpa_sp].open_bracket_idx = bidi_cache_last_idx; \ |
| 2395 | bpa_stack[bpa_sp].flags = 0; \ | ||
| 2396 | STORE_BRACKET_CHARPOS; \ | ||
| 2364 | } while (0) | 2397 | } while (0) |
| 2365 | 2398 | ||
| 2366 | 2399 | ||
| @@ -2405,13 +2438,22 @@ bidi_find_bracket_pairs (struct bidi_it *bidi_it) | |||
| 2405 | int old_sidx, new_sidx; | 2438 | int old_sidx, new_sidx; |
| 2406 | int current_level = bidi_it->level_stack[bidi_it->stack_idx].level; | 2439 | int current_level = bidi_it->level_stack[bidi_it->stack_idx].level; |
| 2407 | 2440 | ||
| 2441 | /* Mark every opening bracket character we've traversed by | ||
| 2442 | putting its own position into bracket_pairing_pos. This | ||
| 2443 | is examined in bidi_resolve_brackets to distinguish | ||
| 2444 | brackets that were already resolved to stay NEUTRAL_ON, | ||
| 2445 | and those that were not yet processed by this function | ||
| 2446 | (because they were skipped when we skip higher embedding | ||
| 2447 | levels below). */ | ||
| 2448 | if (btype == BIDI_BRACKET_OPEN && bidi_it->bracket_pairing_pos == -1) | ||
| 2449 | bidi_it->bracket_pairing_pos = bidi_it->charpos; | ||
| 2408 | bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0); | 2450 | bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B, 0); |
| 2409 | if (btype == BIDI_BRACKET_OPEN) | 2451 | if (btype == BIDI_BRACKET_OPEN) |
| 2410 | PUSH_BPA_STACK; | 2452 | PUSH_BPA_STACK; |
| 2411 | else if (btype == BIDI_BRACKET_CLOSE) | 2453 | else if (btype == BIDI_BRACKET_CLOSE) |
| 2412 | { | 2454 | { |
| 2413 | int sp = bpa_sp; | 2455 | int sp = bpa_sp; |
| 2414 | int curchar = bidi_it->ch; | 2456 | int curchar = CANONICAL_EQU (bidi_it->ch); |
| 2415 | 2457 | ||
| 2416 | eassert (sp >= 0); | 2458 | eassert (sp >= 0); |
| 2417 | while (sp >= 0 && bpa_stack[sp].close_bracket_char != curchar) | 2459 | while (sp >= 0 && bpa_stack[sp].close_bracket_char != curchar) |
| @@ -2513,13 +2555,35 @@ bidi_find_bracket_pairs (struct bidi_it *bidi_it) | |||
| 2513 | 2555 | ||
| 2514 | /* Restore bidi_it from the cache, which should have the bracket | 2556 | /* Restore bidi_it from the cache, which should have the bracket |
| 2515 | resolution members set as determined by the above loop. */ | 2557 | resolution members set as determined by the above loop. */ |
| 2516 | type = bidi_cache_find (saved_it.charpos, 1, bidi_it); | 2558 | type = bidi_cache_find (saved_it.charpos, 0, bidi_it); |
| 2517 | eassert (type == NEUTRAL_ON); | 2559 | eassert (type == NEUTRAL_ON); |
| 2518 | } | 2560 | } |
| 2519 | 2561 | ||
| 2520 | return retval; | 2562 | return retval; |
| 2521 | } | 2563 | } |
| 2522 | 2564 | ||
| 2565 | static void | ||
| 2566 | bidi_record_type_for_neutral (struct bidi_saved_info *info, int level, | ||
| 2567 | bool nextp) | ||
| 2568 | { | ||
| 2569 | int idx; | ||
| 2570 | |||
| 2571 | for (idx = bidi_cache_last_idx + 1; idx < bidi_cache_idx; idx++) | ||
| 2572 | { | ||
| 2573 | int lev = bidi_cache[idx].level_stack[bidi_cache[idx].stack_idx].level; | ||
| 2574 | |||
| 2575 | if (lev <= level) | ||
| 2576 | { | ||
| 2577 | eassert (lev == level); | ||
| 2578 | if (nextp) | ||
| 2579 | bidi_cache[idx].next_for_neutral = *info; | ||
| 2580 | else | ||
| 2581 | bidi_cache[idx].prev_for_neutral = *info; | ||
| 2582 | break; | ||
| 2583 | } | ||
| 2584 | } | ||
| 2585 | } | ||
| 2586 | |||
| 2523 | static bidi_type_t | 2587 | static bidi_type_t |
| 2524 | bidi_resolve_brackets (struct bidi_it *bidi_it) | 2588 | bidi_resolve_brackets (struct bidi_it *bidi_it) |
| 2525 | { | 2589 | { |
| @@ -2527,12 +2591,24 @@ bidi_resolve_brackets (struct bidi_it *bidi_it) | |||
| 2527 | bool resolve_bracket = false; | 2591 | bool resolve_bracket = false; |
| 2528 | bidi_type_t type = UNKNOWN_BT; | 2592 | bidi_type_t type = UNKNOWN_BT; |
| 2529 | int ch; | 2593 | int ch; |
| 2530 | struct bidi_saved_info tem_info; | 2594 | struct bidi_saved_info prev_for_neutral, next_for_neutral; |
| 2531 | 2595 | ||
| 2532 | bidi_remember_char (&tem_info, bidi_it, 1); | 2596 | /* Record the prev_for_neutral type either from the previous |
| 2597 | character, if it was a strong or AN/EN, or from the | ||
| 2598 | prev_for_neutral information recorded previously. */ | ||
| 2599 | if (bidi_it->type == STRONG_L || bidi_it->type == STRONG_R | ||
| 2600 | || bidi_it->type == WEAK_AN || bidi_it->type == WEAK_EN) | ||
| 2601 | bidi_remember_char (&prev_for_neutral, bidi_it, 1); | ||
| 2602 | else | ||
| 2603 | prev_for_neutral = bidi_it->prev_for_neutral; | ||
| 2604 | /* Record the next_for_neutral type information. */ | ||
| 2605 | if (bidi_it->next_for_neutral.charpos > bidi_it->charpos) | ||
| 2606 | next_for_neutral = bidi_it->next_for_neutral; | ||
| 2607 | else | ||
| 2608 | next_for_neutral.charpos = -1; | ||
| 2533 | if (!bidi_it->first_elt) | 2609 | if (!bidi_it->first_elt) |
| 2534 | { | 2610 | { |
| 2535 | type = bidi_cache_find (bidi_it->charpos + bidi_it->nchars, 1, bidi_it); | 2611 | type = bidi_cache_find (bidi_it->charpos + bidi_it->nchars, 0, bidi_it); |
| 2536 | ch = bidi_it->ch; | 2612 | ch = bidi_it->ch; |
| 2537 | } | 2613 | } |
| 2538 | if (type == UNKNOWN_BT) | 2614 | if (type == UNKNOWN_BT) |
| @@ -2543,36 +2619,45 @@ bidi_resolve_brackets (struct bidi_it *bidi_it) | |||
| 2543 | } | 2619 | } |
| 2544 | else | 2620 | else |
| 2545 | { | 2621 | { |
| 2622 | eassert (bidi_it->resolved_level == -1); | ||
| 2623 | /* If the cached state shows an increase of embedding level due | ||
| 2624 | to an isolate initiator, we need to update the 1st cached | ||
| 2625 | state of the next run of the current isolating sequence with | ||
| 2626 | the prev_for_neutral and next_for_neutral information, so | ||
| 2627 | that it will be picked up when we advance to that next run. */ | ||
| 2628 | if (bidi_it->level_stack[bidi_it->stack_idx].level > prev_level | ||
| 2629 | && bidi_it->level_stack[bidi_it->stack_idx].isolate_status) | ||
| 2630 | { | ||
| 2631 | bidi_record_type_for_neutral (&prev_for_neutral, prev_level, 0); | ||
| 2632 | bidi_record_type_for_neutral (&next_for_neutral, prev_level, 1); | ||
| 2633 | } | ||
| 2546 | if (type == NEUTRAL_ON | 2634 | if (type == NEUTRAL_ON |
| 2547 | && bidi_paired_bracket_type (ch) == BIDI_BRACKET_OPEN) | 2635 | && bidi_paired_bracket_type (ch) == BIDI_BRACKET_OPEN) |
| 2548 | { | 2636 | { |
| 2549 | if (bidi_it->level_stack[bidi_it->stack_idx].level == prev_level) | 2637 | if (bidi_it->bracket_pairing_pos > bidi_it->charpos) |
| 2550 | { | 2638 | { |
| 2551 | if (bidi_it->bracket_pairing_pos > 0) | 2639 | /* A cached opening bracket that wasn't completely |
| 2552 | { | 2640 | resolved yet. */ |
| 2553 | /* A cached opening bracket that wasn't completely | 2641 | resolve_bracket = true; |
| 2554 | resolved yet. */ | ||
| 2555 | resolve_bracket = true; | ||
| 2556 | } | ||
| 2557 | } | 2642 | } |
| 2558 | else | 2643 | else if (bidi_it->bracket_pairing_pos == -1) |
| 2559 | { | 2644 | { |
| 2560 | /* Higher levels were not BPA-resolved yet, even if | 2645 | /* Higher levels were not BPA-resolved yet, even if |
| 2561 | cached by bidi_find_bracket_pairs. Lower levels were | 2646 | cached by bidi_find_bracket_pairs. Force application |
| 2562 | probably processed by bidi_find_bracket_pairs, but we | 2647 | of BPA to the new level now. */ |
| 2563 | have no easy way of retaining the prev_for_neutral | ||
| 2564 | from the previous level run of the isolating | ||
| 2565 | sequence. Force application of BPA now. */ | ||
| 2566 | if (bidi_find_bracket_pairs (bidi_it)) | 2648 | if (bidi_find_bracket_pairs (bidi_it)) |
| 2567 | resolve_bracket = true; | 2649 | resolve_bracket = true; |
| 2568 | } | 2650 | } |
| 2569 | } | 2651 | } |
| 2570 | /* Keep track of the prev_for_neutral type, needed for resolving | 2652 | /* Keep track of the prev_for_neutral and next_for_neutral |
| 2571 | brackets below and for resolving neutrals in bidi_resolve_neutral. */ | 2653 | types, needed for resolving brackets below and for resolving |
| 2572 | if (bidi_it->level_stack[bidi_it->stack_idx].level == prev_level | 2654 | neutrals in bidi_resolve_neutral. */ |
| 2573 | && (tem_info.type == STRONG_L || tem_info.type == STRONG_R | 2655 | if (bidi_it->level_stack[bidi_it->stack_idx].level == prev_level) |
| 2574 | || tem_info.type == WEAK_AN || tem_info.type == WEAK_EN)) | 2656 | { |
| 2575 | bidi_it->prev_for_neutral = tem_info; | 2657 | bidi_it->prev_for_neutral = prev_for_neutral; |
| 2658 | if (next_for_neutral.charpos > 0) | ||
| 2659 | bidi_it->next_for_neutral = next_for_neutral; | ||
| 2660 | } | ||
| 2576 | } | 2661 | } |
| 2577 | 2662 | ||
| 2578 | /* If needed, resolve the bracket type according to N0. */ | 2663 | /* If needed, resolve the bracket type according to N0. */ |
| @@ -2657,9 +2742,18 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) | |||
| 2657 | || (type == WEAK_BN && bidi_explicit_dir_char (bidi_it->ch))) | 2742 | || (type == WEAK_BN && bidi_explicit_dir_char (bidi_it->ch))) |
| 2658 | { | 2743 | { |
| 2659 | if (bidi_it->next_for_neutral.type != UNKNOWN_BT) | 2744 | if (bidi_it->next_for_neutral.type != UNKNOWN_BT) |
| 2660 | type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, | 2745 | { |
| 2661 | bidi_it->next_for_neutral.type, | 2746 | /* Make sure the data for resolving neutrals we are |
| 2662 | current_level); | 2747 | about to use is valid. */ |
| 2748 | eassert (bidi_it->next_for_neutral.charpos > bidi_it->charpos | ||
| 2749 | /* PDI defines an eos, so it's OK for it to | ||
| 2750 | serve as its own next_for_neutral. */ | ||
| 2751 | || (bidi_it->next_for_neutral.charpos == bidi_it->charpos | ||
| 2752 | && bidi_it->type == PDI)); | ||
| 2753 | type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, | ||
| 2754 | bidi_it->next_for_neutral.type, | ||
| 2755 | current_level); | ||
| 2756 | } | ||
| 2663 | /* The next two "else if" clauses are shortcuts for the | 2757 | /* The next two "else if" clauses are shortcuts for the |
| 2664 | important special case when we have a long sequence of | 2758 | important special case when we have a long sequence of |
| 2665 | neutral or WEAK_BN characters, such as whitespace or nulls or | 2759 | neutral or WEAK_BN characters, such as whitespace or nulls or |
| @@ -2855,16 +2949,13 @@ bidi_level_of_next_char (struct bidi_it *bidi_it) | |||
| 2855 | } | 2949 | } |
| 2856 | } | 2950 | } |
| 2857 | 2951 | ||
| 2858 | /* Perhaps the character we want is already cached. If it is, the | 2952 | /* Perhaps the character we want is already cached s fully resolved. |
| 2859 | call to bidi_cache_find below will return a type other than | 2953 | If it is, the call to bidi_cache_find below will return a type |
| 2860 | UNKNOWN_BT. */ | 2954 | other than UNKNOWN_BT. */ |
| 2861 | if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt) | 2955 | if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt) |
| 2862 | { | 2956 | { |
| 2863 | int bob = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring)) | 2957 | int bob = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring)) |
| 2864 | ? 0 : 1); | 2958 | ? 0 : 1); |
| 2865 | bidi_type_t prev_type = bidi_it->type; | ||
| 2866 | bidi_type_t type_for_neutral = bidi_it->next_for_neutral.type; | ||
| 2867 | ptrdiff_t pos_for_neutral = bidi_it->next_for_neutral.charpos; | ||
| 2868 | 2959 | ||
| 2869 | if (bidi_it->scan_dir > 0) | 2960 | if (bidi_it->scan_dir > 0) |
| 2870 | { | 2961 | { |
| @@ -2879,57 +2970,12 @@ bidi_level_of_next_char (struct bidi_it *bidi_it) | |||
| 2879 | cached at the beginning of the iteration. */ | 2970 | cached at the beginning of the iteration. */ |
| 2880 | next_char_pos = bidi_it->charpos - 1; | 2971 | next_char_pos = bidi_it->charpos - 1; |
| 2881 | if (next_char_pos >= bob - 1) | 2972 | if (next_char_pos >= bob - 1) |
| 2882 | type = bidi_cache_find (next_char_pos, 0, bidi_it); | 2973 | type = bidi_cache_find (next_char_pos, 1, bidi_it); |
| 2883 | |||
| 2884 | /* For a sequence of BN and NI, copy the type from the previous | ||
| 2885 | character. This is because the loop in bidi_resolve_neutral | ||
| 2886 | that handles such sequences caches the characters it | ||
| 2887 | traverses, but does not (and cannot) store the | ||
| 2888 | next_for_neutral member for them, because it is only known | ||
| 2889 | when the loop ends. So when we find them in the cache, their | ||
| 2890 | type needs to be updated, but we don't have next_for_neutral | ||
| 2891 | to do that. However, whatever type is resolved as result of | ||
| 2892 | that loop, it will be the same for all the traversed | ||
| 2893 | characters, by virtue of N1 and N2. */ | ||
| 2894 | if (type == WEAK_BN && bidi_it->scan_dir > 0 | ||
| 2895 | && bidi_explicit_dir_char (bidi_it->ch) | ||
| 2896 | && type_for_neutral != UNKNOWN_BT | ||
| 2897 | && bidi_it->charpos < pos_for_neutral) | ||
| 2898 | { | ||
| 2899 | type = prev_type; | ||
| 2900 | eassert (type != UNKNOWN_BT); | ||
| 2901 | } | ||
| 2902 | if (type != UNKNOWN_BT) | 2974 | if (type != UNKNOWN_BT) |
| 2903 | { | 2975 | { |
| 2904 | /* If resolved_level is -1, it means this state was cached | 2976 | /* We asked the cache for fully resolved states. */ |
| 2905 | before it was completely resolved, so we cannot return | 2977 | eassert (bidi_it->resolved_level >= 0); |
| 2906 | it. */ | 2978 | return bidi_it->resolved_level; |
| 2907 | if (bidi_it->resolved_level != -1) | ||
| 2908 | { | ||
| 2909 | eassert (bidi_it->resolved_level >= 0); | ||
| 2910 | return bidi_it->resolved_level; | ||
| 2911 | } | ||
| 2912 | else | ||
| 2913 | { | ||
| 2914 | level = bidi_it->level_stack[bidi_it->stack_idx].level; | ||
| 2915 | if (bidi_get_category (type) == NEUTRAL | ||
| 2916 | || bidi_isolate_fmt_char (type)) | ||
| 2917 | { | ||
| 2918 | /* Make sure the data for resolving neutrals we are | ||
| 2919 | about to use is valid. */ | ||
| 2920 | if (bidi_it->next_for_neutral.charpos < bidi_it->charpos | ||
| 2921 | /* PDI defines an eos, so it's OK for it to | ||
| 2922 | serve as its own next_for_neutral. */ | ||
| 2923 | || (bidi_it->next_for_neutral.charpos == bidi_it->charpos | ||
| 2924 | && bidi_it->type != PDI) | ||
| 2925 | || bidi_it->next_for_neutral.type == UNKNOWN_BT) | ||
| 2926 | emacs_abort (); | ||
| 2927 | |||
| 2928 | type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, | ||
| 2929 | bidi_it->next_for_neutral.type, | ||
| 2930 | level); | ||
| 2931 | } | ||
| 2932 | } | ||
| 2933 | } | 2979 | } |
| 2934 | } | 2980 | } |
| 2935 | 2981 | ||