aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/bidi.c419
1 files changed, 224 insertions, 195 deletions
diff --git a/src/bidi.c b/src/bidi.c
index 61de1fc7b5f..0f5d43147e2 100644
--- a/src/bidi.c
+++ b/src/bidi.c
@@ -1,4 +1,4 @@
1/* Low-level bidirectional buffer-scanning functions for GNU Emacs. 1/* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
2 Copyright (C) 2000-2001, 2004-2005, 2009-2011 2 Copyright (C) 2000-2001, 2004-2005, 2009-2011
3 Free Software Foundation, Inc. 3 Free Software Foundation, Inc.
4 4
@@ -20,7 +20,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
20/* Written by Eli Zaretskii <eliz@gnu.org>. 20/* Written by Eli Zaretskii <eliz@gnu.org>.
21 21
22 A sequential implementation of the Unicode Bidirectional algorithm, 22 A sequential implementation of the Unicode Bidirectional algorithm,
23 as per UAX#9, a part of the Unicode Standard. 23 (UBA) as per UAX#9, a part of the Unicode Standard.
24 24
25 Unlike the reference and most other implementations, this one is 25 Unlike the reference and most other implementations, this one is
26 designed to be called once for every character in the buffer or 26 designed to be called once for every character in the buffer or
@@ -80,43 +80,10 @@ int bidi_ignore_explicit_marks_for_paragraph_level = 1;
80static Lisp_Object paragraph_start_re, paragraph_separate_re; 80static Lisp_Object paragraph_start_re, paragraph_separate_re;
81static Lisp_Object Qparagraph_start, Qparagraph_separate; 81static Lisp_Object Qparagraph_start, Qparagraph_separate;
82 82
83static void 83
84bidi_initialize (void) 84/***********************************************************************
85{ 85 Utilities
86 86 ***********************************************************************/
87#include "biditype.h"
88#include "bidimirror.h"
89
90 int i;
91
92 bidi_type_table = Fmake_char_table (Qnil, make_number (STRONG_L));
93 staticpro (&bidi_type_table);
94
95 for (i = 0; i < sizeof bidi_type / sizeof bidi_type[0]; i++)
96 char_table_set_range (bidi_type_table, bidi_type[i].from, bidi_type[i].to,
97 make_number (bidi_type[i].type));
98
99 bidi_mirror_table = Fmake_char_table (Qnil, Qnil);
100 staticpro (&bidi_mirror_table);
101
102 for (i = 0; i < sizeof bidi_mirror / sizeof bidi_mirror[0]; i++)
103 char_table_set (bidi_mirror_table, bidi_mirror[i].from,
104 make_number (bidi_mirror[i].to));
105
106 Qparagraph_start = intern ("paragraph-start");
107 staticpro (&Qparagraph_start);
108 paragraph_start_re = Fsymbol_value (Qparagraph_start);
109 if (!STRINGP (paragraph_start_re))
110 paragraph_start_re = build_string ("\f\\|[ \t]*$");
111 staticpro (&paragraph_start_re);
112 Qparagraph_separate = intern ("paragraph-separate");
113 staticpro (&Qparagraph_separate);
114 paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
115 if (!STRINGP (paragraph_separate_re))
116 paragraph_separate_re = build_string ("[ \t\f]*$");
117 staticpro (&paragraph_separate_re);
118 bidi_initialized = 1;
119}
120 87
121/* Return the bidi type of a character CH, subject to the current 88/* Return the bidi type of a character CH, subject to the current
122 directional OVERRIDE. */ 89 directional OVERRIDE. */
@@ -233,6 +200,78 @@ bidi_mirror_char (int c)
233 return c; 200 return c;
234} 201}
235 202
203/* Determine the start-of-run (sor) directional type given the two
204 embedding levels on either side of the run boundary. Also, update
205 the saved info about previously seen characters, since that info is
206 generally valid for a single level run. */
207static INLINE void
208bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
209{
210 int higher_level = level_before > level_after ? level_before : level_after;
211
212 /* The prev_was_pdf gork is required for when we have several PDFs
213 in a row. In that case, we want to compute the sor type for the
214 next level run only once: when we see the first PDF. That's
215 because the sor type depends only on the higher of the two levels
216 that we find on the two sides of the level boundary (see UAX#9,
217 clause X10), and so we don't need to know the final embedding
218 level to which we descend after processing all the PDFs. */
219 if (!bidi_it->prev_was_pdf || level_before < level_after)
220 /* FIXME: should the default sor direction be user selectable? */
221 bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
222 if (level_before > level_after)
223 bidi_it->prev_was_pdf = 1;
224
225 bidi_it->prev.type = UNKNOWN_BT;
226 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
227 bidi_it->last_strong.orig_type = UNKNOWN_BT;
228 bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L;
229 bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
230 bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
231 bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 =
232 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
233 bidi_it->ignore_bn_limit = -1; /* meaning it's unknown */
234}
235
236/* Push the current embedding level and override status; reset the
237 current level to LEVEL and the current override status to OVERRIDE. */
238static INLINE void
239bidi_push_embedding_level (struct bidi_it *bidi_it,
240 int level, bidi_dir_t override)
241{
242 bidi_it->stack_idx++;
243 if (bidi_it->stack_idx >= BIDI_MAXLEVEL)
244 abort ();
245 bidi_it->level_stack[bidi_it->stack_idx].level = level;
246 bidi_it->level_stack[bidi_it->stack_idx].override = override;
247}
248
249/* Pop the embedding level and directional override status from the
250 stack, and return the new level. */
251static INLINE int
252bidi_pop_embedding_level (struct bidi_it *bidi_it)
253{
254 /* UAX#9 says to ignore invalid PDFs. */
255 if (bidi_it->stack_idx > 0)
256 bidi_it->stack_idx--;
257 return bidi_it->level_stack[bidi_it->stack_idx].level;
258}
259
260/* Record in SAVED_INFO the information about the current character. */
261static INLINE void
262bidi_remember_char (struct bidi_saved_info *saved_info,
263 struct bidi_it *bidi_it)
264{
265 saved_info->charpos = bidi_it->charpos;
266 saved_info->bytepos = bidi_it->bytepos;
267 saved_info->type = bidi_it->type;
268 bidi_check_type (bidi_it->type);
269 saved_info->type_after_w1 = bidi_it->type_after_w1;
270 bidi_check_type (bidi_it->type_after_w1);
271 saved_info->orig_type = bidi_it->orig_type;
272 bidi_check_type (bidi_it->orig_type);
273}
274
236/* Copy the bidi iterator from FROM to TO. To save cycles, this only 275/* Copy the bidi iterator from FROM to TO. To save cycles, this only
237 copies the part of the level stack that is actually in use. */ 276 copies the part of the level stack that is actually in use. */
238static INLINE void 277static INLINE void
@@ -249,7 +288,10 @@ bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
249 to->level_stack[i] = from->level_stack[i]; 288 to->level_stack[i] = from->level_stack[i];
250} 289}
251 290
252/* Caching the bidi iterator states. */ 291
292/***********************************************************************
293 Caching the bidi iterator states
294 ***********************************************************************/
253 295
254#define BIDI_CACHE_CHUNK 200 296#define BIDI_CACHE_CHUNK 200
255static struct bidi_it *bidi_cache; 297static struct bidi_it *bidi_cache;
@@ -496,64 +538,98 @@ bidi_peek_at_next_level (struct bidi_it *bidi_it)
496 return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level; 538 return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
497} 539}
498 540
499/* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph. 541
500 Value is the non-negative length of the paragraph separator 542/***********************************************************************
501 following the buffer position, -1 if position is at the beginning 543 Initialization
502 of a new paragraph, or -2 if position is neither at beginning nor 544 ***********************************************************************/
503 at end of a paragraph. */ 545static void
504static EMACS_INT 546bidi_initialize (void)
505bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
506{ 547{
507 Lisp_Object sep_re;
508 Lisp_Object start_re;
509 EMACS_INT val;
510 548
511 sep_re = paragraph_separate_re; 549#include "biditype.h"
512 start_re = paragraph_start_re; 550#include "bidimirror.h"
513 551
514 val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil); 552 int i;
515 if (val < 0)
516 {
517 if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
518 val = -1;
519 else
520 val = -2;
521 }
522 553
523 return val; 554 bidi_type_table = Fmake_char_table (Qnil, make_number (STRONG_L));
555 staticpro (&bidi_type_table);
556
557 for (i = 0; i < sizeof bidi_type / sizeof bidi_type[0]; i++)
558 char_table_set_range (bidi_type_table, bidi_type[i].from, bidi_type[i].to,
559 make_number (bidi_type[i].type));
560
561 bidi_mirror_table = Fmake_char_table (Qnil, Qnil);
562 staticpro (&bidi_mirror_table);
563
564 for (i = 0; i < sizeof bidi_mirror / sizeof bidi_mirror[0]; i++)
565 char_table_set (bidi_mirror_table, bidi_mirror[i].from,
566 make_number (bidi_mirror[i].to));
567
568 Qparagraph_start = intern ("paragraph-start");
569 staticpro (&Qparagraph_start);
570 paragraph_start_re = Fsymbol_value (Qparagraph_start);
571 if (!STRINGP (paragraph_start_re))
572 paragraph_start_re = build_string ("\f\\|[ \t]*$");
573 staticpro (&paragraph_start_re);
574 Qparagraph_separate = intern ("paragraph-separate");
575 staticpro (&Qparagraph_separate);
576 paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
577 if (!STRINGP (paragraph_separate_re))
578 paragraph_separate_re = build_string ("[ \t\f]*$");
579 staticpro (&paragraph_separate_re);
580 bidi_initialized = 1;
524} 581}
525 582
526/* Determine the start-of-run (sor) directional type given the two 583/* Do whatever UAX#9 clause X8 says should be done at paragraph's
527 embedding levels on either side of the run boundary. Also, update 584 end. */
528 the saved info about previously seen characters, since that info is
529 generally valid for a single level run. */
530static INLINE void 585static INLINE void
531bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after) 586bidi_set_paragraph_end (struct bidi_it *bidi_it)
532{ 587{
533 int higher_level = level_before > level_after ? level_before : level_after; 588 bidi_it->invalid_levels = 0;
534 589 bidi_it->invalid_rl_levels = -1;
535 /* The prev_was_pdf gork is required for when we have several PDFs 590 bidi_it->stack_idx = 0;
536 in a row. In that case, we want to compute the sor type for the 591 bidi_it->resolved_level = bidi_it->level_stack[0].level;
537 next level run only once: when we see the first PDF. That's 592}
538 because the sor type depends only on the higher of the two levels
539 that we find on the two sides of the level boundary (see UAX#9,
540 clause X10), and so we don't need to know the final embedding
541 level to which we descend after processing all the PDFs. */
542 if (!bidi_it->prev_was_pdf || level_before < level_after)
543 /* FIXME: should the default sor direction be user selectable? */
544 bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
545 if (level_before > level_after)
546 bidi_it->prev_was_pdf = 1;
547 593
548 bidi_it->prev.type = UNKNOWN_BT; 594/* Initialize the bidi iterator from buffer/string position CHARPOS. */
595void
596bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, int frame_window_p,
597 struct bidi_it *bidi_it)
598{
599 if (! bidi_initialized)
600 bidi_initialize ();
601 if (charpos >= 0)
602 bidi_it->charpos = charpos;
603 if (bytepos >= 0)
604 bidi_it->bytepos = bytepos;
605 bidi_it->frame_window_p = frame_window_p;
606 bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */
607 bidi_it->first_elt = 1;
608 bidi_set_paragraph_end (bidi_it);
609 bidi_it->new_paragraph = 1;
610 bidi_it->separator_limit = -1;
611 bidi_it->type = NEUTRAL_B;
612 bidi_it->type_after_w1 = NEUTRAL_B;
613 bidi_it->orig_type = NEUTRAL_B;
614 bidi_it->prev_was_pdf = 0;
615 bidi_it->prev.type = bidi_it->prev.type_after_w1 =
616 bidi_it->prev.orig_type = UNKNOWN_BT;
549 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 = 617 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
550 bidi_it->last_strong.orig_type = UNKNOWN_BT; 618 bidi_it->last_strong.orig_type = UNKNOWN_BT;
551 bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L; 619 bidi_it->next_for_neutral.charpos = -1;
552 bidi_it->prev_for_neutral.charpos = bidi_it->charpos; 620 bidi_it->next_for_neutral.type =
553 bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos; 621 bidi_it->next_for_neutral.type_after_w1 =
554 bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 =
555 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT; 622 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
556 bidi_it->ignore_bn_limit = -1; /* meaning it's unknown */ 623 bidi_it->prev_for_neutral.charpos = -1;
624 bidi_it->prev_for_neutral.type =
625 bidi_it->prev_for_neutral.type_after_w1 =
626 bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
627 bidi_it->sor = L2R; /* FIXME: should it be user-selectable? */
628 bidi_it->disp_pos = -1; /* invalid/unknown */
629 /* We can only shrink the cache if we are at the bottom level of its
630 "stack". */
631 if (bidi_cache_start == 0)
632 bidi_cache_shrink ();
557} 633}
558 634
559/* Perform initializations for reordering a new line of bidi text. */ 635/* Perform initializations for reordering a new line of bidi text. */
@@ -574,6 +650,11 @@ bidi_line_init (struct bidi_it *bidi_it)
574 bidi_cache_reset (); 650 bidi_cache_reset ();
575} 651}
576 652
653
654/***********************************************************************
655 Fetching characters
656 ***********************************************************************/
657
577/* Count bytes in multibyte string S between BEG/BEGBYTE and END. BEG 658/* Count bytes in multibyte string S between BEG/BEGBYTE and END. BEG
578 and END are zero-based character positions in S, BEGBYTE is byte 659 and END are zero-based character positions in S, BEGBYTE is byte
579 position corresponding to BEG. */ 660 position corresponding to BEG. */
@@ -701,6 +782,38 @@ bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
701 return ch; 782 return ch;
702} 783}
703 784
785
786/***********************************************************************
787 Determining paragraph direction
788 ***********************************************************************/
789
790/* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
791 Value is the non-negative length of the paragraph separator
792 following the buffer position, -1 if position is at the beginning
793 of a new paragraph, or -2 if position is neither at beginning nor
794 at end of a paragraph. */
795static EMACS_INT
796bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
797{
798 Lisp_Object sep_re;
799 Lisp_Object start_re;
800 EMACS_INT val;
801
802 sep_re = paragraph_separate_re;
803 start_re = paragraph_start_re;
804
805 val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
806 if (val < 0)
807 {
808 if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
809 val = -1;
810 else
811 val = -2;
812 }
813
814 return val;
815}
816
704/* Find the beginning of this paragraph by looking back in the buffer. 817/* Find the beginning of this paragraph by looking back in the buffer.
705 Value is the byte position of the paragraph's beginning. */ 818 Value is the byte position of the paragraph's beginning. */
706static EMACS_INT 819static EMACS_INT
@@ -896,115 +1009,12 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
896 bidi_line_init (bidi_it); 1009 bidi_line_init (bidi_it);
897} 1010}
898 1011
899/* Do whatever UAX#9 clause X8 says should be done at paragraph's 1012
900 end. */ 1013/***********************************************************************
901static INLINE void 1014 Resolving explicit and implicit levels.
902bidi_set_paragraph_end (struct bidi_it *bidi_it) 1015 The rest of the file constitutes the core
903{ 1016 of the UBA implementation.
904 bidi_it->invalid_levels = 0; 1017 ***********************************************************************/
905 bidi_it->invalid_rl_levels = -1;
906 bidi_it->stack_idx = 0;
907 bidi_it->resolved_level = bidi_it->level_stack[0].level;
908}
909
910/* Initialize the bidi iterator from buffer/string position CHARPOS. */
911void
912bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, int frame_window_p,
913 struct bidi_it *bidi_it)
914{
915 if (! bidi_initialized)
916 bidi_initialize ();
917 if (charpos >= 0)
918 bidi_it->charpos = charpos;
919 if (bytepos >= 0)
920 bidi_it->bytepos = bytepos;
921 bidi_it->frame_window_p = frame_window_p;
922 bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */
923 bidi_it->first_elt = 1;
924 bidi_set_paragraph_end (bidi_it);
925 bidi_it->new_paragraph = 1;
926 bidi_it->separator_limit = -1;
927 bidi_it->type = NEUTRAL_B;
928 bidi_it->type_after_w1 = NEUTRAL_B;
929 bidi_it->orig_type = NEUTRAL_B;
930 bidi_it->prev_was_pdf = 0;
931 bidi_it->prev.type = bidi_it->prev.type_after_w1 =
932 bidi_it->prev.orig_type = UNKNOWN_BT;
933 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
934 bidi_it->last_strong.orig_type = UNKNOWN_BT;
935 bidi_it->next_for_neutral.charpos = -1;
936 bidi_it->next_for_neutral.type =
937 bidi_it->next_for_neutral.type_after_w1 =
938 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
939 bidi_it->prev_for_neutral.charpos = -1;
940 bidi_it->prev_for_neutral.type =
941 bidi_it->prev_for_neutral.type_after_w1 =
942 bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
943 bidi_it->sor = L2R; /* FIXME: should it be user-selectable? */
944 bidi_it->disp_pos = -1; /* invalid/unknown */
945 /* We can only shrink the cache if we are at the bottom level of its
946 "stack". */
947 if (bidi_cache_start == 0)
948 bidi_cache_shrink ();
949}
950
951/* Push the current embedding level and override status; reset the
952 current level to LEVEL and the current override status to OVERRIDE. */
953static INLINE void
954bidi_push_embedding_level (struct bidi_it *bidi_it,
955 int level, bidi_dir_t override)
956{
957 bidi_it->stack_idx++;
958 if (bidi_it->stack_idx >= BIDI_MAXLEVEL)
959 abort ();
960 bidi_it->level_stack[bidi_it->stack_idx].level = level;
961 bidi_it->level_stack[bidi_it->stack_idx].override = override;
962}
963
964/* Pop the embedding level and directional override status from the
965 stack, and return the new level. */
966static INLINE int
967bidi_pop_embedding_level (struct bidi_it *bidi_it)
968{
969 /* UAX#9 says to ignore invalid PDFs. */
970 if (bidi_it->stack_idx > 0)
971 bidi_it->stack_idx--;
972 return bidi_it->level_stack[bidi_it->stack_idx].level;
973}
974
975/* Record in SAVED_INFO the information about the current character. */
976static INLINE void
977bidi_remember_char (struct bidi_saved_info *saved_info,
978 struct bidi_it *bidi_it)
979{
980 saved_info->charpos = bidi_it->charpos;
981 saved_info->bytepos = bidi_it->bytepos;
982 saved_info->type = bidi_it->type;
983 bidi_check_type (bidi_it->type);
984 saved_info->type_after_w1 = bidi_it->type_after_w1;
985 bidi_check_type (bidi_it->type_after_w1);
986 saved_info->orig_type = bidi_it->orig_type;
987 bidi_check_type (bidi_it->orig_type);
988}
989
990/* Resolve the type of a neutral character according to the type of
991 surrounding strong text and the current embedding level. */
992static INLINE bidi_type_t
993bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
994{
995 /* N1: European and Arabic numbers are treated as though they were R. */
996 if (next_type == WEAK_EN || next_type == WEAK_AN)
997 next_type = STRONG_R;
998 if (prev_type == WEAK_EN || prev_type == WEAK_AN)
999 prev_type = STRONG_R;
1000
1001 if (next_type == prev_type) /* N1 */
1002 return next_type;
1003 else if ((lev & 1) == 0) /* N2 */
1004 return STRONG_L;
1005 else
1006 return STRONG_R;
1007}
1008 1018
1009static INLINE int 1019static INLINE int
1010bidi_explicit_dir_char (int ch) 1020bidi_explicit_dir_char (int ch)
@@ -1503,6 +1513,25 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
1503 return type; 1513 return type;
1504} 1514}
1505 1515
1516/* Resolve the type of a neutral character according to the type of
1517 surrounding strong text and the current embedding level. */
1518static INLINE bidi_type_t
1519bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
1520{
1521 /* N1: European and Arabic numbers are treated as though they were R. */
1522 if (next_type == WEAK_EN || next_type == WEAK_AN)
1523 next_type = STRONG_R;
1524 if (prev_type == WEAK_EN || prev_type == WEAK_AN)
1525 prev_type = STRONG_R;
1526
1527 if (next_type == prev_type) /* N1 */
1528 return next_type;
1529 else if ((lev & 1) == 0) /* N2 */
1530 return STRONG_L;
1531 else
1532 return STRONG_R;
1533}
1534
1506static bidi_type_t 1535static bidi_type_t
1507bidi_resolve_neutral (struct bidi_it *bidi_it) 1536bidi_resolve_neutral (struct bidi_it *bidi_it)
1508{ 1537{