aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEli Zaretskii2011-05-10 19:12:16 +0300
committerEli Zaretskii2011-05-10 19:12:16 +0300
commit182ce2d254ed316239b8deab8adac05c3dbe0149 (patch)
tree8df1f0ece905ab62db502e6fe30cd45acfc6837e /src
parentffb54e99ad5c0c3529e22eec4d112d64977f81a3 (diff)
downloademacs-182ce2d254ed316239b8deab8adac05c3dbe0149.tar.gz
emacs-182ce2d254ed316239b8deab8adac05c3dbe0149.zip
Started work on reordering display strings. Refactor FETCH_CHAR.
Only compiled, not tested. src/xdisp.c (compute_display_string_pos): New function. (reseat_1): Initialize bidi_it.disp_pos. src/bidi.c (bidi_copy_it): Use offsetof. (bidi_fetch_char, bidi_fetch_char_advance): New functions. (bidi_cache_search, bidi_cache_iterator_state) (bidi_paragraph_init, bidi_resolve_explicit, bidi_resolve_weak) (bidi_level_of_next_char, bidi_move_to_visually_next): Support character positions inside a run of characters covered by a display string. (bidi_paragraph_init, bidi_resolve_explicit_1) (bidi_level_of_next_char): Call bidi_fetch_char and bidi_fetch_char_advance instead of FETCH_CHAR and FETCH_CHAR_ADVANCE. (bidi_init_it): Initialize new members. (LRE_CHAR, RLE_CHAR, PDF_CHAR, LRO_CHAR, RLO_CHAR): Remove macro definitions. (bidi_explicit_dir_char): Lookup character type in bidi_type_table, instead of using explicit *_CHAR codes. (bidi_resolve_explicit, bidi_resolve_weak): Use FETCH_MULTIBYTE_CHAR instead of FETCH_CHAR, as reordering of bidirectional text is supported only in multibyte buffers. src/dispextern.h (struct bidi_it): New members nchars and disp_pos. ch_len is now EMACS_INT. (compute_display_string_pos): Declare prototype.
Diffstat (limited to 'src')
-rw-r--r--src/ChangeLog29
-rw-r--r--src/bidi.c246
-rw-r--r--src/dispextern.h12
-rw-r--r--src/xdisp.c15
4 files changed, 232 insertions, 70 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index 6f70908caae..03fe0029e70 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,32 @@
12011-05-10 Eli Zaretskii <eliz@gnu.org>
2
3 * xdisp.c (compute_display_string_pos): New function.
4 (reseat_1): Initialize bidi_it.disp_pos.
5
6 * bidi.c (bidi_copy_it): Use offsetof.
7 (bidi_fetch_char, bidi_fetch_char_advance): New functions.
8 (bidi_cache_search, bidi_cache_iterator_state)
9 (bidi_paragraph_init, bidi_resolve_explicit, bidi_resolve_weak)
10 (bidi_level_of_next_char, bidi_move_to_visually_next): Support
11 character positions inside a run of characters covered by a
12 display string.
13 (bidi_paragraph_init, bidi_resolve_explicit_1)
14 (bidi_level_of_next_char): Call bidi_fetch_char and
15 bidi_fetch_char_advance instead of FETCH_CHAR and
16 FETCH_CHAR_ADVANCE.
17 (bidi_init_it): Initialize new members.
18 (LRE_CHAR, RLE_CHAR, PDF_CHAR, LRO_CHAR, RLO_CHAR): Remove macro
19 definitions.
20 (bidi_explicit_dir_char): Lookup character type in bidi_type_table,
21 instead of using explicit *_CHAR codes.
22 (bidi_resolve_explicit, bidi_resolve_weak): Use
23 FETCH_MULTIBYTE_CHAR instead of FETCH_CHAR, as reordering of
24 bidirectional text is supported only in multibyte buffers.
25
26 * dispextern.h (struct bidi_it): New members nchars and disp_pos.
27 ch_len is now EMACS_INT.
28 (compute_display_string_pos): Declare prototype.
29
12011-05-09 Andreas Schwab <schwab@linux-m68k.org> 302011-05-09 Andreas Schwab <schwab@linux-m68k.org>
2 31
3 * w32menu.c (set_frame_menubar): Fix submenu allocation. 32 * w32menu.c (set_frame_menubar): Fix submenu allocation.
diff --git a/src/bidi.c b/src/bidi.c
index 88c45e24a14..0a7c92cfea6 100644
--- a/src/bidi.c
+++ b/src/bidi.c
@@ -62,15 +62,8 @@ static int bidi_initialized = 0;
62 62
63static Lisp_Object bidi_type_table, bidi_mirror_table; 63static Lisp_Object bidi_type_table, bidi_mirror_table;
64 64
65/* FIXME: Remove these when bidi_explicit_dir_char uses a lookup table. */
66#define LRM_CHAR 0x200E 65#define LRM_CHAR 0x200E
67#define RLM_CHAR 0x200F 66#define RLM_CHAR 0x200F
68#define LRE_CHAR 0x202A
69#define RLE_CHAR 0x202B
70#define PDF_CHAR 0x202C
71#define LRO_CHAR 0x202D
72#define RLO_CHAR 0x202E
73
74#define BIDI_EOB -1 67#define BIDI_EOB -1
75 68
76/* Local data structures. (Look in dispextern.h for the rest.) */ 69/* Local data structures. (Look in dispextern.h for the rest.) */
@@ -258,7 +251,7 @@ bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
258 int i; 251 int i;
259 252
260 /* Copy everything except the level stack and beyond. */ 253 /* Copy everything except the level stack and beyond. */
261 memcpy (to, from, ((size_t)&((struct bidi_it *)0)->level_stack[0])); 254 memcpy (to, from, offsetof (struct bidi_it, level_stack[0]));
262 255
263 /* Copy the active part of the level stack. */ 256 /* Copy the active part of the level stack. */
264 to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */ 257 to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
@@ -319,10 +312,17 @@ bidi_cache_search (EMACS_INT charpos, int level, int dir)
319 if (bidi_cache_idx) 312 if (bidi_cache_idx)
320 { 313 {
321 if (charpos < bidi_cache[bidi_cache_last_idx].charpos) 314 if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
322 dir = -1; 315 {
323 else if (charpos > bidi_cache[bidi_cache_last_idx].charpos) 316 dir = -1;
324 dir = 1; 317 i_start = bidi_cache_last_idx - 1;
325 if (dir) 318 }
319 else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
320 + bidi_cache[bidi_cache_last_idx].nchars - 1))
321 {
322 dir = 1;
323 i_start = bidi_cache_last_idx + 1;
324 }
325 else if (dir)
326 i_start = bidi_cache_last_idx; 326 i_start = bidi_cache_last_idx;
327 else 327 else
328 { 328 {
@@ -334,14 +334,16 @@ bidi_cache_search (EMACS_INT charpos, int level, int dir)
334 { 334 {
335 /* Linear search for now; FIXME! */ 335 /* Linear search for now; FIXME! */
336 for (i = i_start; i >= 0; i--) 336 for (i = i_start; i >= 0; i--)
337 if (bidi_cache[i].charpos == charpos 337 if (bidi_cache[i].charpos <= charpos
338 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
338 && (level == -1 || bidi_cache[i].resolved_level <= level)) 339 && (level == -1 || bidi_cache[i].resolved_level <= level))
339 return i; 340 return i;
340 } 341 }
341 else 342 else
342 { 343 {
343 for (i = i_start; i < bidi_cache_idx; i++) 344 for (i = i_start; i < bidi_cache_idx; i++)
344 if (bidi_cache[i].charpos == charpos 345 if (bidi_cache[i].charpos <= charpos
346 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
345 && (level == -1 || bidi_cache[i].resolved_level <= level)) 347 && (level == -1 || bidi_cache[i].resolved_level <= level))
346 return i; 348 return i;
347 } 349 }
@@ -426,7 +428,8 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
426 If we are outside the range of cached positions, the cache is 428 If we are outside the range of cached positions, the cache is
427 useless and must be reset. */ 429 useless and must be reset. */
428 if (idx > 0 && 430 if (idx > 0 &&
429 (bidi_it->charpos > bidi_cache[idx - 1].charpos + 1 431 (bidi_it->charpos > (bidi_cache[idx - 1].charpos
432 + bidi_cache[idx - 1].nchars)
430 || bidi_it->charpos < bidi_cache[0].charpos)) 433 || bidi_it->charpos < bidi_cache[0].charpos))
431 { 434 {
432 bidi_cache_reset (); 435 bidi_cache_reset ();
@@ -548,6 +551,7 @@ bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
548 bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */ 551 bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */
549} 552}
550 553
554/* Perform initializations for reordering a new line of bidi text. */
551static void 555static void
552bidi_line_init (struct bidi_it *bidi_it) 556bidi_line_init (struct bidi_it *bidi_it)
553{ 557{
@@ -565,6 +569,77 @@ bidi_line_init (struct bidi_it *bidi_it)
565 bidi_cache_reset (); 569 bidi_cache_reset ();
566} 570}
567 571
572/* Fetch and return the character at BYTEPOS. If that character is
573 covered by a display string, treat the entire run of covered
574 characters as a single character u+FFFC, and return their combined
575 length in CH_LEN and NCHARS. DISP_POS specifies the byte position
576 of the next display string, or -1 if not yet computed. When the
577 next character is at or beyond that position, the function updates
578 DISP_POS with the position of the next display string. */
579static INLINE int
580bidi_fetch_char (EMACS_INT bytepos, EMACS_INT *disp_pos,
581 EMACS_INT *ch_len, EMACS_INT *nchars)
582{
583 int ch;
584
585 /* FIXME: Support strings in addition to buffers. */
586 /* If we got past the last known position of display string, compute
587 the position of the next one. That position could be at BYTEPOS. */
588 if (bytepos < ZV_BYTE && bytepos > *disp_pos)
589 *disp_pos = compute_display_string_pos (bytepos);
590 if (bytepos >= ZV_BYTE)
591 {
592 ch = BIDI_EOB;
593 *ch_len = 1;
594 *nchars = 1;
595 }
596#if 0
597 else if (bytepos >= *disp_pos)
598 {
599 /* support characters covered by a display string */
600 ch = 0xFFFC; /* Unicode Object Replacement Character */
601 }
602#endif
603 else
604 {
605 ch = FETCH_MULTIBYTE_CHAR (bytepos);
606 *ch_len = CHAR_BYTES (ch);
607 *nchars = 1;
608 }
609
610 /* If we just entered a run of characters covered by a display
611 string, compute the position of the next display string. */
612 if (bytepos + *ch_len <= ZV_BYTE && bytepos + *ch_len > *disp_pos)
613 *disp_pos = compute_display_string_pos (bytepos + *ch_len);
614
615 return ch;
616}
617
618/* Looks like we won't need this one. */
619#if 0
620/* Fetch character at CHARPOS/BYTEPOS. Return the character, and
621 advance CHARPOS and BYTEPOS to the next character in logical
622 order. */
623static INLINE int
624bidi_fetch_char_advance (EMACS_INT *charpos, EMACS_INT *bytepos)
625{
626 int ch;
627
628 /* FIXME: Support strings in addition to buffers. */
629 FETCH_CHAR_ADVANCE_NO_CHECK (ch, charpos, bytepos);
630
631#if 0
632 if (...)
633 {
634 /* FIXME: Support characters covered by display strings. */
635 ch = 0xFFFC;
636 }
637#endif
638
639 return ch;
640}
641#endif
642
568/* Find the beginning of this paragraph by looking back in the buffer. 643/* Find the beginning of this paragraph by looking back in the buffer.
569 Value is the byte position of the paragraph's beginning. */ 644 Value is the byte position of the paragraph's beginning. */
570static EMACS_INT 645static EMACS_INT
@@ -576,6 +651,10 @@ bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
576 while (pos_byte > BEGV_BYTE 651 while (pos_byte > BEGV_BYTE
577 && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0) 652 && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
578 { 653 {
654 /* FIXME: What if the paragraph beginning is covered by a
655 display string? And what if a display string covering some
656 of the text over which we scan back includes
657 paragraph_start_re? */
579 pos = find_next_newline_no_quit (pos - 1, -1); 658 pos = find_next_newline_no_quit (pos - 1, -1);
580 pos_byte = CHAR_TO_BYTE (pos); 659 pos_byte = CHAR_TO_BYTE (pos);
581 } 660 }
@@ -587,7 +666,7 @@ bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
587 R2L, just use that. Otherwise, determine the paragraph direction 666 R2L, just use that. Otherwise, determine the paragraph direction
588 from the first strong directional character of the paragraph. 667 from the first strong directional character of the paragraph.
589 668
590 NO_DEFAULT_P non-nil means don't default to L2R if the paragraph 669 NO_DEFAULT_P non-zero means don't default to L2R if the paragraph
591 has no strong directional characters and both DIR and 670 has no strong directional characters and both DIR and
592 bidi_it->paragraph_dir are NEUTRAL_DIR. In that case, search back 671 bidi_it->paragraph_dir are NEUTRAL_DIR. In that case, search back
593 in the buffer until a paragraph is found with a strong character, 672 in the buffer until a paragraph is found with a strong character,
@@ -622,8 +701,9 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
622 } 701 }
623 else if (dir == NEUTRAL_DIR) /* P2 */ 702 else if (dir == NEUTRAL_DIR) /* P2 */
624 { 703 {
625 int ch, ch_len; 704 int ch;
626 EMACS_INT pos; 705 EMACS_INT ch_len, nchars;
706 EMACS_INT pos, disp_pos = -1;
627 bidi_type_t type; 707 bidi_type_t type;
628 708
629 if (!bidi_initialized) 709 if (!bidi_initialized)
@@ -658,12 +738,11 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
658 is non-zero. */ 738 is non-zero. */
659 do { 739 do {
660 bytepos = pstartbyte; 740 bytepos = pstartbyte;
661 ch = FETCH_CHAR (bytepos); 741 ch = bidi_fetch_char (bytepos, &ch_len, &nchars, &disp_pos);
662 ch_len = CHAR_BYTES (ch);
663 pos = BYTE_TO_CHAR (bytepos); 742 pos = BYTE_TO_CHAR (bytepos);
664 type = bidi_get_type (ch, NEUTRAL_DIR); 743 type = bidi_get_type (ch, NEUTRAL_DIR);
665 744
666 for (pos++, bytepos += ch_len; 745 for (pos += nchars, bytepos += ch_len;
667 /* NOTE: UAX#9 says to search only for L, AL, or R types 746 /* NOTE: UAX#9 says to search only for L, AL, or R types
668 of characters, and ignore RLE, RLO, LRE, and LRO. 747 of characters, and ignore RLE, RLO, LRE, and LRO.
669 However, I'm not sure it makes sense to omit those 4; 748 However, I'm not sure it makes sense to omit those 4;
@@ -683,7 +762,9 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
683 type = NEUTRAL_B; 762 type = NEUTRAL_B;
684 break; 763 break;
685 } 764 }
686 FETCH_CHAR_ADVANCE (ch, pos, bytepos); 765 ch = bidi_fetch_char (bytepos, &ch_len, &nchars, &disp_pos);
766 pos += nchars;
767 bytepos += ch_len;
687 } 768 }
688 if (type == STRONG_R || type == STRONG_AL) /* P3 */ 769 if (type == STRONG_R || type == STRONG_AL) /* P3 */
689 bidi_it->paragraph_dir = R2L; 770 bidi_it->paragraph_dir = R2L;
@@ -702,6 +783,9 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
702 /* Find the beginning of the previous paragraph, if any. */ 783 /* Find the beginning of the previous paragraph, if any. */
703 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte) 784 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
704 { 785 {
786 /* FXIME: What if p is covered by a display
787 string? See also a FIXME inside
788 bidi_find_paragraph_start. */
705 p--; 789 p--;
706 pbyte = CHAR_TO_BYTE (p); 790 pbyte = CHAR_TO_BYTE (p);
707 prevpbyte = bidi_find_paragraph_start (p, pbyte); 791 prevpbyte = bidi_find_paragraph_start (p, pbyte);
@@ -738,7 +822,7 @@ bidi_set_paragraph_end (struct bidi_it *bidi_it)
738 bidi_it->resolved_level = bidi_it->level_stack[0].level; 822 bidi_it->resolved_level = bidi_it->level_stack[0].level;
739} 823}
740 824
741/* Initialize the bidi iterator from buffer position CHARPOS. */ 825/* Initialize the bidi iterator from buffer/string position CHARPOS. */
742void 826void
743bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, struct bidi_it *bidi_it) 827bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, struct bidi_it *bidi_it)
744{ 828{
@@ -746,6 +830,7 @@ bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, struct bidi_it *bidi_it)
746 bidi_initialize (); 830 bidi_initialize ();
747 bidi_it->charpos = charpos; 831 bidi_it->charpos = charpos;
748 bidi_it->bytepos = bytepos; 832 bidi_it->bytepos = bytepos;
833 bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */
749 bidi_it->first_elt = 1; 834 bidi_it->first_elt = 1;
750 bidi_set_paragraph_end (bidi_it); 835 bidi_set_paragraph_end (bidi_it);
751 bidi_it->new_paragraph = 1; 836 bidi_it->new_paragraph = 1;
@@ -767,6 +852,7 @@ bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, struct bidi_it *bidi_it)
767 bidi_it->prev_for_neutral.type_after_w1 = 852 bidi_it->prev_for_neutral.type_after_w1 =
768 bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT; 853 bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
769 bidi_it->sor = L2R; /* FIXME: should it be user-selectable? */ 854 bidi_it->sor = L2R; /* FIXME: should it be user-selectable? */
855 bidi_it->disp_pos = -1; /* invalid/unknown */
770 bidi_cache_shrink (); 856 bidi_cache_shrink ();
771} 857}
772 858
@@ -829,12 +915,16 @@ bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
829} 915}
830 916
831static INLINE int 917static INLINE int
832bidi_explicit_dir_char (int c) 918bidi_explicit_dir_char (int ch)
833{ 919{
834 /* FIXME: this should be replaced with a lookup table with suitable 920 bidi_type_t ch_type;
835 bits set, like standard C ctype macros do. */ 921
836 return (c == LRE_CHAR || c == LRO_CHAR 922 if (!bidi_initialized)
837 || c == RLE_CHAR || c == RLO_CHAR || c == PDF_CHAR); 923 abort ();
924 ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
925 return (ch_type == LRE || ch_type == LRO
926 || ch_type == RLE || ch_type == RLO
927 || ch_type == PDF);
838} 928}
839 929
840/* A helper function for bidi_resolve_explicit. It advances to the 930/* A helper function for bidi_resolve_explicit. It advances to the
@@ -850,7 +940,10 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
850 int new_level; 940 int new_level;
851 bidi_dir_t override; 941 bidi_dir_t override;
852 942
853 if (bidi_it->bytepos < BEGV_BYTE /* after reseat to BEGV? */ 943 /* If reseat()'ed, don't advance, so as to start iteration from the
944 position where we were reseated. bidi_it->bytepos can be less
945 than BEGV_BYTE after reseat to BEGV. */
946 if (bidi_it->bytepos < BEGV_BYTE
854 || bidi_it->first_elt) 947 || bidi_it->first_elt)
855 { 948 {
856 bidi_it->first_elt = 0; 949 bidi_it->first_elt = 0;
@@ -860,7 +953,9 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
860 } 953 }
861 else if (bidi_it->bytepos < ZV_BYTE) /* don't move at ZV */ 954 else if (bidi_it->bytepos < ZV_BYTE) /* don't move at ZV */
862 { 955 {
863 bidi_it->charpos++; 956 /* Advance to the next character, skipping characters covered by
957 display strings (nchars > 1). */
958 bidi_it->charpos += bidi_it->nchars;
864 if (bidi_it->ch_len == 0) 959 if (bidi_it->ch_len == 0)
865 abort (); 960 abort ();
866 bidi_it->bytepos += bidi_it->ch_len; 961 bidi_it->bytepos += bidi_it->ch_len;
@@ -870,17 +965,20 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
870 override = bidi_it->level_stack[bidi_it->stack_idx].override; 965 override = bidi_it->level_stack[bidi_it->stack_idx].override;
871 new_level = current_level; 966 new_level = current_level;
872 967
873 /* in case it is a unibyte character (not yet implemented) */
874 /* _fetch_multibyte_char_len = 1; */
875 if (bidi_it->bytepos >= ZV_BYTE) 968 if (bidi_it->bytepos >= ZV_BYTE)
876 { 969 {
877 curchar = BIDI_EOB; 970 curchar = BIDI_EOB;
878 bidi_it->ch_len = 1; 971 bidi_it->ch_len = 1;
972 bidi_it->nchars = 1;
973 bidi_it->disp_pos = ZV_BYTE;
879 } 974 }
880 else 975 else
881 { 976 {
882 curchar = FETCH_CHAR (bidi_it->bytepos); 977 /* Fetch the character at BYTEPOS. If it is covered by a
883 bidi_it->ch_len = CHAR_BYTES (curchar); 978 display string, treat the entire run of covered characters as
979 a single character u+FFFC. */
980 curchar = bidi_fetch_char (bidi_it->bytepos, &bidi_it->ch_len,
981 &bidi_it->nchars, &bidi_it->disp_pos);
884 } 982 }
885 bidi_it->ch = curchar; 983 bidi_it->ch = curchar;
886 984
@@ -1006,10 +1104,10 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1006} 1104}
1007 1105
1008/* Given an iterator state in BIDI_IT, advance one character position 1106/* Given an iterator state in BIDI_IT, advance one character position
1009 in the buffer to the next character (in the logical order), resolve 1107 in the buffer/string to the next character (in the logical order),
1010 any explicit embeddings and directional overrides, and return the 1108 resolve any explicit embeddings and directional overrides, and
1011 embedding level of the character after resolving explicit 1109 return the embedding level of the character after resolving
1012 directives and ignoring empty embeddings. */ 1110 explicit directives and ignoring empty embeddings. */
1013static int 1111static int
1014bidi_resolve_explicit (struct bidi_it *bidi_it) 1112bidi_resolve_explicit (struct bidi_it *bidi_it)
1015{ 1113{
@@ -1020,8 +1118,8 @@ bidi_resolve_explicit (struct bidi_it *bidi_it)
1020 && bidi_it->type == WEAK_BN 1118 && bidi_it->type == WEAK_BN
1021 && bidi_it->ignore_bn_limit == 0 /* only if not already known */ 1119 && bidi_it->ignore_bn_limit == 0 /* only if not already known */
1022 && bidi_it->bytepos < ZV_BYTE /* not already at EOB */ 1120 && bidi_it->bytepos < ZV_BYTE /* not already at EOB */
1023 && bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos 1121 && bidi_explicit_dir_char (FETCH_MULTIBYTE_CHAR (bidi_it->bytepos
1024 + bidi_it->ch_len))) 1122 + bidi_it->ch_len)))
1025 { 1123 {
1026 /* Avoid pushing and popping embedding levels if the level run 1124 /* Avoid pushing and popping embedding levels if the level run
1027 is empty, as this breaks level runs where it shouldn't. 1125 is empty, as this breaks level runs where it shouldn't.
@@ -1033,14 +1131,16 @@ bidi_resolve_explicit (struct bidi_it *bidi_it)
1033 1131
1034 bidi_copy_it (&saved_it, bidi_it); 1132 bidi_copy_it (&saved_it, bidi_it);
1035 1133
1036 while (bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos 1134 while (bidi_explicit_dir_char (FETCH_MULTIBYTE_CHAR (bidi_it->bytepos
1037 + bidi_it->ch_len))) 1135 + bidi_it->ch_len)))
1038 { 1136 {
1137 /* This advances to the next character, skipping any
1138 characters covered by display strings. */
1039 level = bidi_resolve_explicit_1 (bidi_it); 1139 level = bidi_resolve_explicit_1 (bidi_it);
1040 } 1140 }
1041 1141
1042 if (level == prev_level) /* empty embedding */ 1142 if (level == prev_level) /* empty embedding */
1043 saved_it.ignore_bn_limit = bidi_it->charpos + 1; 1143 saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
1044 else /* this embedding is non-empty */ 1144 else /* this embedding is non-empty */
1045 saved_it.ignore_bn_limit = -1; 1145 saved_it.ignore_bn_limit = -1;
1046 1146
@@ -1076,8 +1176,8 @@ bidi_resolve_explicit (struct bidi_it *bidi_it)
1076 return new_level; 1176 return new_level;
1077} 1177}
1078 1178
1079/* Advance in the buffer, resolve weak types and return the type of 1179/* Advance in the buffer/string, resolve weak types and return the
1080 the next character after weak type resolution. */ 1180 type of the next character after weak type resolution. */
1081static bidi_type_t 1181static bidi_type_t
1082bidi_resolve_weak (struct bidi_it *bidi_it) 1182bidi_resolve_weak (struct bidi_it *bidi_it)
1083{ 1183{
@@ -1156,7 +1256,8 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
1156 { 1256 {
1157 next_char = 1257 next_char =
1158 bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE 1258 bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
1159 ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len); 1259 ? BIDI_EOB : FETCH_MULTIBYTE_CHAR (bidi_it->bytepos
1260 + bidi_it->ch_len);
1160 type_of_next = bidi_get_type (next_char, override); 1261 type_of_next = bidi_get_type (next_char, override);
1161 1262
1162 if (type_of_next == WEAK_BN 1263 if (type_of_next == WEAK_BN
@@ -1204,11 +1305,12 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
1204 type = WEAK_EN; 1305 type = WEAK_EN;
1205 else /* W5: ET/BN with EN after it. */ 1306 else /* W5: ET/BN with EN after it. */
1206 { 1307 {
1207 EMACS_INT en_pos = bidi_it->charpos + 1; 1308 EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars;
1208 1309
1209 next_char = 1310 next_char =
1210 bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE 1311 bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
1211 ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len); 1312 ? BIDI_EOB : FETCH_MULTIBYTE_CHAR (bidi_it->bytepos
1313 + bidi_it->ch_len);
1212 type_of_next = bidi_get_type (next_char, override); 1314 type_of_next = bidi_get_type (next_char, override);
1213 1315
1214 if (type_of_next == WEAK_ET 1316 if (type_of_next == WEAK_ET
@@ -1299,8 +1401,8 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
1299 /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in 1401 /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in
1300 the assumption of batch-style processing; see clauses W4, 1402 the assumption of batch-style processing; see clauses W4,
1301 W5, and especially N1, which require to look far forward 1403 W5, and especially N1, which require to look far forward
1302 (as well as back) in the buffer. May the fleas of a 1404 (as well as back) in the buffer/string. May the fleas of
1303 thousand camels infest the armpits of those who design 1405 a thousand camels infest the armpits of those who design
1304 supposedly general-purpose algorithms by looking at their 1406 supposedly general-purpose algorithms by looking at their
1305 own implementations, and fail to consider other possible 1407 own implementations, and fail to consider other possible
1306 implementations! */ 1408 implementations! */
@@ -1391,8 +1493,9 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
1391} 1493}
1392 1494
1393/* Given an iterator state in BIDI_IT, advance one character position 1495/* Given an iterator state in BIDI_IT, advance one character position
1394 in the buffer to the next character (in the logical order), resolve 1496 in the buffer/string to the next character (in the logical order),
1395 the bidi type of that next character, and return that type. */ 1497 resolve the bidi type of that next character, and return that
1498 type. */
1396static bidi_type_t 1499static bidi_type_t
1397bidi_type_of_next_char (struct bidi_it *bidi_it) 1500bidi_type_of_next_char (struct bidi_it *bidi_it)
1398{ 1501{
@@ -1416,15 +1519,16 @@ bidi_type_of_next_char (struct bidi_it *bidi_it)
1416} 1519}
1417 1520
1418/* Given an iterator state BIDI_IT, advance one character position in 1521/* Given an iterator state BIDI_IT, advance one character position in
1419 the buffer to the next character (in the logical order), resolve 1522 the buffer/string to the next character (in the current scan
1420 the embedding and implicit levels of that next character, and 1523 direction), resolve the embedding and implicit levels of that next
1421 return the resulting level. */ 1524 character, and return the resulting level. */
1422static int 1525static int
1423bidi_level_of_next_char (struct bidi_it *bidi_it) 1526bidi_level_of_next_char (struct bidi_it *bidi_it)
1424{ 1527{
1425 bidi_type_t type; 1528 bidi_type_t type;
1426 int level, prev_level = -1; 1529 int level, prev_level = -1;
1427 struct bidi_saved_info next_for_neutral; 1530 struct bidi_saved_info next_for_neutral;
1531 EMACS_INT next_char_pos;
1428 1532
1429 if (bidi_it->scan_dir == 1) 1533 if (bidi_it->scan_dir == 1)
1430 { 1534 {
@@ -1466,8 +1570,14 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
1466 } 1570 }
1467 next_for_neutral = bidi_it->next_for_neutral; 1571 next_for_neutral = bidi_it->next_for_neutral;
1468 1572
1469 /* Perhaps it is already cached. */ 1573 /* Perhaps the character we want is already cached. If it is, the
1470 type = bidi_cache_find (bidi_it->charpos + bidi_it->scan_dir, -1, bidi_it); 1574 call to bidi_cache_find below will return a type other than
1575 UNKNOWN_BT. */
1576 if (bidi_it->scan_dir > 0)
1577 next_char_pos = bidi_it->charpos + bidi_it->nchars;
1578 else
1579 next_char_pos = bidi_it->charpos - 1;
1580 type = bidi_cache_find (next_char_pos, -1, bidi_it);
1471 if (type != UNKNOWN_BT) 1581 if (type != UNKNOWN_BT)
1472 { 1582 {
1473 /* Don't lose the information for resolving neutrals! The 1583 /* Don't lose the information for resolving neutrals! The
@@ -1529,14 +1639,13 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
1529 int clen = bidi_it->ch_len; 1639 int clen = bidi_it->ch_len;
1530 EMACS_INT bpos = bidi_it->bytepos; 1640 EMACS_INT bpos = bidi_it->bytepos;
1531 EMACS_INT cpos = bidi_it->charpos; 1641 EMACS_INT cpos = bidi_it->charpos;
1642 EMACS_INT disp_pos = bidi_it->disp_pos;
1643 EMACS_INT nc;
1532 bidi_type_t chtype; 1644 bidi_type_t chtype;
1533 1645
1534 do { 1646 do {
1535 /*_fetch_multibyte_char_len = 1;*/ 1647 ch = bidi_fetch_char (bpos += clen, &clen, &nc, &disp_pos);
1536 ch = bpos + clen >= ZV_BYTE ? BIDI_EOB : FETCH_CHAR (bpos + clen); 1648 cpos += nc;
1537 bpos += clen;
1538 cpos++;
1539 clen = (ch == BIDI_EOB ? 1 : CHAR_BYTES (ch));
1540 if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */) 1649 if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
1541 chtype = NEUTRAL_B; 1650 chtype = NEUTRAL_B;
1542 else 1651 else
@@ -1615,8 +1724,8 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
1615 1724
1616 If this level's other edge is cached, we simply jump to it, filling 1725 If this level's other edge is cached, we simply jump to it, filling
1617 the iterator structure with the iterator state on the other edge. 1726 the iterator structure with the iterator state on the other edge.
1618 Otherwise, we walk the buffer until we come back to the same level 1727 Otherwise, we walk the buffer or string until we come back to the
1619 as LEVEL. 1728 same level as LEVEL.
1620 1729
1621 Note: we are not talking here about a ``level run'' in the UAX#9 1730 Note: we are not talking here about a ``level run'' in the UAX#9
1622 sense of the term, but rather about a ``level'' which includes 1731 sense of the term, but rather about a ``level'' which includes
@@ -1680,6 +1789,7 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
1680 sentinel.bytepos--; 1789 sentinel.bytepos--;
1681 sentinel.ch = '\n'; /* doesn't matter, but why not? */ 1790 sentinel.ch = '\n'; /* doesn't matter, but why not? */
1682 sentinel.ch_len = 1; 1791 sentinel.ch_len = 1;
1792 sentinel.nchars = 1;
1683 } 1793 }
1684 bidi_cache_iterator_state (&sentinel, 1); 1794 bidi_cache_iterator_state (&sentinel, 1);
1685 } 1795 }
@@ -1750,14 +1860,15 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
1750 && bidi_it->bytepos < ZV_BYTE) 1860 && bidi_it->bytepos < ZV_BYTE)
1751 { 1861 {
1752 EMACS_INT sep_len = 1862 EMACS_INT sep_len =
1753 bidi_at_paragraph_end (bidi_it->charpos + 1, 1863 bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
1754 bidi_it->bytepos + bidi_it->ch_len); 1864 bidi_it->bytepos + bidi_it->ch_len);
1755 if (sep_len >= 0) 1865 if (sep_len >= 0)
1756 { 1866 {
1757 bidi_it->new_paragraph = 1; 1867 bidi_it->new_paragraph = 1;
1758 /* Record the buffer position of the last character of the 1868 /* Record the buffer position of the last character of the
1759 paragraph separator. */ 1869 paragraph separator. */
1760 bidi_it->separator_limit = bidi_it->charpos + 1 + sep_len; 1870 bidi_it->separator_limit =
1871 bidi_it->charpos + bidi_it->nchars + sep_len;
1761 } 1872 }
1762 } 1873 }
1763 1874
@@ -1767,7 +1878,8 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
1767 last cached position, the cache's job is done and we can 1878 last cached position, the cache's job is done and we can
1768 discard it. */ 1879 discard it. */
1769 if (bidi_it->resolved_level == bidi_it->level_stack[0].level 1880 if (bidi_it->resolved_level == bidi_it->level_stack[0].level
1770 && bidi_it->charpos > bidi_cache[bidi_cache_idx - 1].charpos) 1881 && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
1882 + bidi_cache[bidi_cache_idx - 1].nchars - 1))
1771 bidi_cache_reset (); 1883 bidi_cache_reset ();
1772 /* But as long as we are caching during forward scan, we must 1884 /* But as long as we are caching during forward scan, we must
1773 cache each state, or else the cache integrity will be 1885 cache each state, or else the cache integrity will be
diff --git a/src/dispextern.h b/src/dispextern.h
index 72e23e6642a..f94723099f9 100644
--- a/src/dispextern.h
+++ b/src/dispextern.h
@@ -1812,12 +1812,16 @@ struct bidi_stack {
1812 bidi_dir_t override; 1812 bidi_dir_t override;
1813}; 1813};
1814 1814
1815/* Data type for iterating over bidi text. */ 1815/* Data type for reordering bidirectional text. */
1816struct bidi_it { 1816struct bidi_it {
1817 EMACS_INT bytepos; /* iterator's position in buffer */ 1817 EMACS_INT bytepos; /* iterator's position in buffer */
1818 EMACS_INT charpos; 1818 EMACS_INT charpos;
1819 int ch; /* character itself */ 1819 int ch; /* character at that position, or u+FFFC
1820 int ch_len; /* length of its multibyte sequence */ 1820 ("object replacement character") for a run
1821 of characters covered by a display string */
1822 EMACS_INT nchars; /* its "length", usually 1; it's > 1 for a run
1823 of characters covered by a display string */
1824 EMACS_INT ch_len; /* its length in bytes */
1821 bidi_type_t type; /* bidi type of this character, after 1825 bidi_type_t type; /* bidi type of this character, after
1822 resolving weak and neutral types */ 1826 resolving weak and neutral types */
1823 bidi_type_t type_after_w1; /* original type, after overrides and W1 */ 1827 bidi_type_t type_after_w1; /* original type, after overrides and W1 */
@@ -1844,6 +1848,7 @@ struct bidi_it {
1844 bidi_dir_t paragraph_dir; /* current paragraph direction */ 1848 bidi_dir_t paragraph_dir; /* current paragraph direction */
1845 int new_paragraph; /* if non-zero, we expect a new paragraph */ 1849 int new_paragraph; /* if non-zero, we expect a new paragraph */
1846 EMACS_INT separator_limit; /* where paragraph separator should end */ 1850 EMACS_INT separator_limit; /* where paragraph separator should end */
1851 EMACS_INT disp_pos; /* byte position of display string after ch */
1847}; 1852};
1848 1853
1849/* Value is non-zero when the bidi iterator is at base paragraph 1854/* Value is non-zero when the bidi iterator is at base paragraph
@@ -3001,6 +3006,7 @@ extern void reseat_at_previous_visible_line_start (struct it *);
3001extern Lisp_Object lookup_glyphless_char_display (int, struct it *); 3006extern Lisp_Object lookup_glyphless_char_display (int, struct it *);
3002extern int calc_pixel_width_or_height (double *, struct it *, Lisp_Object, 3007extern int calc_pixel_width_or_height (double *, struct it *, Lisp_Object,
3003 struct font *, int, int *); 3008 struct font *, int, int *);
3009extern EMACS_INT compute_display_string_pos (EMACS_INT);
3004 3010
3005#ifdef HAVE_WINDOW_SYSTEM 3011#ifdef HAVE_WINDOW_SYSTEM
3006 3012
diff --git a/src/xdisp.c b/src/xdisp.c
index 88353e37925..10f69b4cd38 100644
--- a/src/xdisp.c
+++ b/src/xdisp.c
@@ -3085,6 +3085,20 @@ next_overlay_change (EMACS_INT pos)
3085 return endpos; 3085 return endpos;
3086} 3086}
3087 3087
3088/* Return the byte position of a display string at or after BYTEPOS.
3089 If no display string exist at or after BYTEPOS, return ZV_BYTE. A
3090 display string is either an overlay with `display' property whose
3091 value is a string or a `display' text property whose value is a
3092 string. */
3093EMACS_INT
3094compute_display_string_pos (EMACS_INT bytepos)
3095{
3096 if (bytepos >= ZV_BYTE)
3097 return ZV_BYTE;
3098 /* FIXME! */
3099 return ZV_BYTE;
3100}
3101
3088 3102
3089 3103
3090/*********************************************************************** 3104/***********************************************************************
@@ -5382,6 +5396,7 @@ reseat_1 (struct it *it, struct text_pos pos, int set_stop_p)
5382 { 5396 {
5383 it->bidi_it.first_elt = 1; 5397 it->bidi_it.first_elt = 1;
5384 it->bidi_it.paragraph_dir = NEUTRAL_DIR; 5398 it->bidi_it.paragraph_dir = NEUTRAL_DIR;
5399 it->bidi_it.disp_pos = -1;
5385 } 5400 }
5386 5401
5387 if (set_stop_p) 5402 if (set_stop_p)