aboutsummaryrefslogtreecommitdiffstats
path: root/src/bidi.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/bidi.c')
-rw-r--r--src/bidi.c1278
1 files changed, 909 insertions, 369 deletions
diff --git a/src/bidi.c b/src/bidi.c
index 469afdb3819..b3479b17b16 100644
--- a/src/bidi.c
+++ b/src/bidi.c
@@ -1,5 +1,5 @@
1/* Low-level bidirectional buffer-scanning functions for GNU Emacs. 1/* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
2 Copyright (C) 2000-2001, 2004-2005, 2009-2011 2 Copyright (C) 2000-2001, 2004-2005, 2009-2012
3 Free Software Foundation, Inc. 3 Free Software Foundation, Inc.
4 4
5This file is part of GNU Emacs. 5This file is part of GNU Emacs.
@@ -20,7 +20,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
20/* Written by Eli Zaretskii <eliz@gnu.org>. 20/* Written by Eli Zaretskii <eliz@gnu.org>.
21 21
22 A sequential implementation of the Unicode Bidirectional algorithm, 22 A sequential implementation of the Unicode Bidirectional algorithm,
23 as per UAX#9, a part of the Unicode Standard. 23 (UBA) as per UAX#9, a part of the Unicode Standard.
24 24
25 Unlike the reference and most other implementations, this one is 25 Unlike the reference and most other implementations, this one is
26 designed to be called once for every character in the buffer or 26 designed to be called once for every character in the buffer or
@@ -35,18 +35,23 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
35 details about its algorithm that finds the next visual-order 35 details about its algorithm that finds the next visual-order
36 character by resolving their levels on the fly. 36 character by resolving their levels on the fly.
37 37
38 The two other entry points are bidi_paragraph_init and 38 Two other entry points are bidi_paragraph_init and
39 bidi_mirror_char. The first determines the base direction of a 39 bidi_mirror_char. The first determines the base direction of a
40 paragraph, while the second returns the mirrored version of its 40 paragraph, while the second returns the mirrored version of its
41 argument character. 41 argument character.
42 42
43 A few auxiliary entry points are used to initialize the bidi
44 iterator for iterating an object (buffer or string), push and pop
45 the bidi iterator state, and save and restore the state of the bidi
46 cache.
47
43 If you want to understand the code, you will have to read it 48 If you want to understand the code, you will have to read it
44 together with the relevant portions of UAX#9. The comments include 49 together with the relevant portions of UAX#9. The comments include
45 references to UAX#9 rules, for that very reason. 50 references to UAX#9 rules, for that very reason.
46 51
47 A note about references to UAX#9 rules: if the reference says 52 A note about references to UAX#9 rules: if the reference says
48 something like "X9/Retaining", it means that you need to refer to 53 something like "X9/Retaining", it means that you need to refer to
49 rule X9 and to its modifications decribed in the "Implementation 54 rule X9 and to its modifications described in the "Implementation
50 Notes" section of UAX#9, under "Retaining Format Codes". */ 55 Notes" section of UAX#9, under "Retaining Format Codes". */
51 56
52#include <config.h> 57#include <config.h>
@@ -66,16 +71,6 @@ static Lisp_Object bidi_type_table, bidi_mirror_table;
66#define RLM_CHAR 0x200F 71#define RLM_CHAR 0x200F
67#define BIDI_EOB -1 72#define BIDI_EOB -1
68 73
69/* Local data structures. (Look in dispextern.h for the rest.) */
70
71/* What we need to know about the current paragraph. */
72struct bidi_paragraph_info {
73 EMACS_INT start_bytepos; /* byte position where it begins */
74 EMACS_INT end_bytepos; /* byte position where it ends */
75 int embedding_level; /* its basic embedding level */
76 bidi_dir_t base_dir; /* its base direction */
77};
78
79/* Data type for describing the bidirectional character categories. */ 74/* Data type for describing the bidirectional character categories. */
80typedef enum { 75typedef enum {
81 UNKNOWN_BC, 76 UNKNOWN_BC,
@@ -84,49 +79,21 @@ typedef enum {
84 STRONG 79 STRONG
85} bidi_category_t; 80} bidi_category_t;
86 81
82/* UAX#9 says to search only for L, AL, or R types of characters, and
83 ignore RLE, RLO, LRE, and LRO, when determining the base paragraph
84 level. Yudit indeed ignores them. This variable is therefore set
85 by default to ignore them, but setting it to zero will take them
86 into account. */
87extern int bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE; 87extern int bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE;
88int bidi_ignore_explicit_marks_for_paragraph_level = 1; 88int bidi_ignore_explicit_marks_for_paragraph_level = 1;
89 89
90static Lisp_Object paragraph_start_re, paragraph_separate_re; 90static Lisp_Object paragraph_start_re, paragraph_separate_re;
91static Lisp_Object Qparagraph_start, Qparagraph_separate; 91static Lisp_Object Qparagraph_start, Qparagraph_separate;
92 92
93static void 93
94bidi_initialize (void) 94/***********************************************************************
95{ 95 Utilities
96 96 ***********************************************************************/
97#include "biditype.h"
98#include "bidimirror.h"
99
100 int i;
101
102 bidi_type_table = Fmake_char_table (Qnil, make_number (STRONG_L));
103 staticpro (&bidi_type_table);
104
105 for (i = 0; i < sizeof bidi_type / sizeof bidi_type[0]; i++)
106 char_table_set_range (bidi_type_table, bidi_type[i].from, bidi_type[i].to,
107 make_number (bidi_type[i].type));
108
109 bidi_mirror_table = Fmake_char_table (Qnil, Qnil);
110 staticpro (&bidi_mirror_table);
111
112 for (i = 0; i < sizeof bidi_mirror / sizeof bidi_mirror[0]; i++)
113 char_table_set (bidi_mirror_table, bidi_mirror[i].from,
114 make_number (bidi_mirror[i].to));
115
116 Qparagraph_start = intern ("paragraph-start");
117 staticpro (&Qparagraph_start);
118 paragraph_start_re = Fsymbol_value (Qparagraph_start);
119 if (!STRINGP (paragraph_start_re))
120 paragraph_start_re = build_string ("\f\\|[ \t]*$");
121 staticpro (&paragraph_start_re);
122 Qparagraph_separate = intern ("paragraph-separate");
123 staticpro (&Qparagraph_separate);
124 paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
125 if (!STRINGP (paragraph_separate_re))
126 paragraph_separate_re = build_string ("[ \t\f]*$");
127 staticpro (&paragraph_separate_re);
128 bidi_initialized = 1;
129}
130 97
131/* Return the bidi type of a character CH, subject to the current 98/* Return the bidi type of a character CH, subject to the current
132 directional OVERRIDE. */ 99 directional OVERRIDE. */
@@ -141,6 +108,12 @@ bidi_get_type (int ch, bidi_dir_t override)
141 abort (); 108 abort ();
142 109
143 default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch)); 110 default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
111 /* Every valid character code, even those that are unassigned by the
112 UCD, have some bidi-class property, according to
113 DerivedBidiClass.txt file. Therefore, if we ever get UNKNOWN_BT
114 (= zero) code from CHAR_TABLE_REF, that's a bug. */
115 if (default_type == UNKNOWN_BT)
116 abort ();
144 117
145 if (override == NEUTRAL_DIR) 118 if (override == NEUTRAL_DIR)
146 return default_type; 119 return default_type;
@@ -173,11 +146,10 @@ bidi_get_type (int ch, bidi_dir_t override)
173 } 146 }
174} 147}
175 148
176static void 149static inline void
177bidi_check_type (bidi_type_t type) 150bidi_check_type (bidi_type_t type)
178{ 151{
179 if (type < UNKNOWN_BT || type > NEUTRAL_ON) 152 xassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON);
180 abort ();
181} 153}
182 154
183/* Given a bidi TYPE of a character, return its category. */ 155/* Given a bidi TYPE of a character, return its category. */
@@ -243,6 +215,77 @@ bidi_mirror_char (int c)
243 return c; 215 return c;
244} 216}
245 217
218/* Determine the start-of-run (sor) directional type given the two
219 embedding levels on either side of the run boundary. Also, update
220 the saved info about previously seen characters, since that info is
221 generally valid for a single level run. */
222static inline void
223bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
224{
225 int higher_level = (level_before > level_after ? level_before : level_after);
226
227 /* The prev_was_pdf gork is required for when we have several PDFs
228 in a row. In that case, we want to compute the sor type for the
229 next level run only once: when we see the first PDF. That's
230 because the sor type depends only on the higher of the two levels
231 that we find on the two sides of the level boundary (see UAX#9,
232 clause X10), and so we don't need to know the final embedding
233 level to which we descend after processing all the PDFs. */
234 if (!bidi_it->prev_was_pdf || level_before < level_after)
235 /* FIXME: should the default sor direction be user selectable? */
236 bidi_it->sor = ((higher_level & 1) != 0 ? R2L : L2R);
237 if (level_before > level_after)
238 bidi_it->prev_was_pdf = 1;
239
240 bidi_it->prev.type = UNKNOWN_BT;
241 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1
242 = bidi_it->last_strong.orig_type = UNKNOWN_BT;
243 bidi_it->prev_for_neutral.type = (bidi_it->sor == R2L ? STRONG_R : STRONG_L);
244 bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
245 bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
246 bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1
247 = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
248 bidi_it->ignore_bn_limit = -1; /* meaning it's unknown */
249}
250
251/* Push the current embedding level and override status; reset the
252 current level to LEVEL and the current override status to OVERRIDE. */
253static inline void
254bidi_push_embedding_level (struct bidi_it *bidi_it,
255 int level, bidi_dir_t override)
256{
257 bidi_it->stack_idx++;
258 xassert (bidi_it->stack_idx < BIDI_MAXLEVEL);
259 bidi_it->level_stack[bidi_it->stack_idx].level = level;
260 bidi_it->level_stack[bidi_it->stack_idx].override = override;
261}
262
263/* Pop the embedding level and directional override status from the
264 stack, and return the new level. */
265static inline int
266bidi_pop_embedding_level (struct bidi_it *bidi_it)
267{
268 /* UAX#9 says to ignore invalid PDFs. */
269 if (bidi_it->stack_idx > 0)
270 bidi_it->stack_idx--;
271 return bidi_it->level_stack[bidi_it->stack_idx].level;
272}
273
274/* Record in SAVED_INFO the information about the current character. */
275static inline void
276bidi_remember_char (struct bidi_saved_info *saved_info,
277 struct bidi_it *bidi_it)
278{
279 saved_info->charpos = bidi_it->charpos;
280 saved_info->bytepos = bidi_it->bytepos;
281 saved_info->type = bidi_it->type;
282 bidi_check_type (bidi_it->type);
283 saved_info->type_after_w1 = bidi_it->type_after_w1;
284 bidi_check_type (bidi_it->type_after_w1);
285 saved_info->orig_type = bidi_it->orig_type;
286 bidi_check_type (bidi_it->orig_type);
287}
288
246/* Copy the bidi iterator from FROM to TO. To save cycles, this only 289/* Copy the bidi iterator from FROM to TO. To save cycles, this only
247 copies the part of the level stack that is actually in use. */ 290 copies the part of the level stack that is actually in use. */
248static inline void 291static inline void
@@ -259,40 +302,70 @@ bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
259 to->level_stack[i] = from->level_stack[i]; 302 to->level_stack[i] = from->level_stack[i];
260} 303}
261 304
262/* Caching the bidi iterator states. */ 305
306/***********************************************************************
307 Caching the bidi iterator states
308 ***********************************************************************/
263 309
264#define BIDI_CACHE_CHUNK 200 310#define BIDI_CACHE_CHUNK 200
265static struct bidi_it *bidi_cache; 311static struct bidi_it *bidi_cache;
266static size_t bidi_cache_size = 0; 312static ptrdiff_t bidi_cache_size = 0;
267static size_t elsz = sizeof (struct bidi_it); 313enum { elsz = sizeof (struct bidi_it) };
268static int bidi_cache_idx; /* next unused cache slot */ 314static ptrdiff_t bidi_cache_idx; /* next unused cache slot */
269static int bidi_cache_last_idx; /* slot of last cache hit */ 315static ptrdiff_t bidi_cache_last_idx; /* slot of last cache hit */
270 316static ptrdiff_t bidi_cache_start = 0; /* start of cache for this
317 "stack" level */
318
319/* 5-slot stack for saving the start of the previous level of the
320 cache. xdisp.c maintains a 5-slot stack for its iterator state,
321 and we need the same size of our stack. */
322static ptrdiff_t bidi_cache_start_stack[IT_STACK_SIZE];
323static int bidi_cache_sp;
324
325/* Size of header used by bidi_shelve_cache. */
326enum
327 {
328 bidi_shelve_header_size
329 = (sizeof (bidi_cache_idx) + sizeof (bidi_cache_start_stack)
330 + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
331 + sizeof (bidi_cache_last_idx))
332 };
333
334/* Reset the cache state to the empty state. We only reset the part
335 of the cache relevant to iteration of the current object. Previous
336 objects, which are pushed on the display iterator's stack, are left
337 intact. This is called when the cached information is no more
338 useful for the current iteration, e.g. when we were reseated to a
339 new position on the same object. */
271static inline void 340static inline void
272bidi_cache_reset (void) 341bidi_cache_reset (void)
273{ 342{
274 bidi_cache_idx = 0; 343 bidi_cache_idx = bidi_cache_start;
275 bidi_cache_last_idx = -1; 344 bidi_cache_last_idx = -1;
276} 345}
277 346
347/* Shrink the cache to its minimal size. Called when we init the bidi
348 iterator for reordering a buffer or a string that does not come
349 from display properties, because that means all the previously
350 cached info is of no further use. */
278static inline void 351static inline void
279bidi_cache_shrink (void) 352bidi_cache_shrink (void)
280{ 353{
281 if (bidi_cache_size > BIDI_CACHE_CHUNK) 354 if (bidi_cache_size > BIDI_CACHE_CHUNK)
282 { 355 {
356 bidi_cache
357 = (struct bidi_it *) xrealloc (bidi_cache, BIDI_CACHE_CHUNK * elsz);
283 bidi_cache_size = BIDI_CACHE_CHUNK; 358 bidi_cache_size = BIDI_CACHE_CHUNK;
284 bidi_cache =
285 (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
286 } 359 }
287 bidi_cache_reset (); 360 bidi_cache_reset ();
288} 361}
289 362
290static inline void 363static inline void
291bidi_cache_fetch_state (int idx, struct bidi_it *bidi_it) 364bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it)
292{ 365{
293 int current_scan_dir = bidi_it->scan_dir; 366 int current_scan_dir = bidi_it->scan_dir;
294 367
295 if (idx < 0 || idx >= bidi_cache_idx) 368 if (idx < bidi_cache_start || idx >= bidi_cache_idx)
296 abort (); 369 abort ();
297 370
298 bidi_copy_it (bidi_it, &bidi_cache[idx]); 371 bidi_copy_it (bidi_it, &bidi_cache[idx]);
@@ -304,13 +377,15 @@ bidi_cache_fetch_state (int idx, struct bidi_it *bidi_it)
304 level less or equal to LEVEL. if LEVEL is -1, disregard the 377 level less or equal to LEVEL. if LEVEL is -1, disregard the
305 resolved levels in cached states. DIR, if non-zero, means search 378 resolved levels in cached states. DIR, if non-zero, means search
306 in that direction from the last cache hit. */ 379 in that direction from the last cache hit. */
307static inline int 380static inline ptrdiff_t
308bidi_cache_search (EMACS_INT charpos, int level, int dir) 381bidi_cache_search (EMACS_INT charpos, int level, int dir)
309{ 382{
310 int i, i_start; 383 ptrdiff_t i, i_start;
311 384
312 if (bidi_cache_idx) 385 if (bidi_cache_idx > bidi_cache_start)
313 { 386 {
387 if (bidi_cache_last_idx == -1)
388 bidi_cache_last_idx = bidi_cache_idx - 1;
314 if (charpos < bidi_cache[bidi_cache_last_idx].charpos) 389 if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
315 { 390 {
316 dir = -1; 391 dir = -1;
@@ -333,7 +408,7 @@ bidi_cache_search (EMACS_INT charpos, int level, int dir)
333 if (dir < 0) 408 if (dir < 0)
334 { 409 {
335 /* Linear search for now; FIXME! */ 410 /* Linear search for now; FIXME! */
336 for (i = i_start; i >= 0; i--) 411 for (i = i_start; i >= bidi_cache_start; i--)
337 if (bidi_cache[i].charpos <= charpos 412 if (bidi_cache[i].charpos <= charpos
338 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars 413 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
339 && (level == -1 || bidi_cache[i].resolved_level <= level)) 414 && (level == -1 || bidi_cache[i].resolved_level <= level))
@@ -355,8 +430,9 @@ bidi_cache_search (EMACS_INT charpos, int level, int dir)
355/* Find a cached state where the resolved level changes to a value 430/* Find a cached state where the resolved level changes to a value
356 that is lower than LEVEL, and return its cache slot index. DIR is 431 that is lower than LEVEL, and return its cache slot index. DIR is
357 the direction to search, starting with the last used cache slot. 432 the direction to search, starting with the last used cache slot.
358 BEFORE, if non-zero, means return the index of the slot that is 433 If DIR is zero, we search backwards from the last occupied cache
359 ``before'' the level change in the search direction. That is, 434 slot. BEFORE, if non-zero, means return the index of the slot that
435 is ``before'' the level change in the search direction. That is,
360 given the cached levels like this: 436 given the cached levels like this:
361 437
362 1122333442211 438 1122333442211
@@ -366,14 +442,16 @@ bidi_cache_search (EMACS_INT charpos, int level, int dir)
366 C, searching backwards (DIR = -1) for LEVEL = 2 will return the 442 C, searching backwards (DIR = -1) for LEVEL = 2 will return the
367 index of slot B or A, depending whether BEFORE is, respectively, 443 index of slot B or A, depending whether BEFORE is, respectively,
368 non-zero or zero. */ 444 non-zero or zero. */
369static int 445static ptrdiff_t
370bidi_cache_find_level_change (int level, int dir, int before) 446bidi_cache_find_level_change (int level, int dir, int before)
371{ 447{
372 if (bidi_cache_idx) 448 if (bidi_cache_idx)
373 { 449 {
374 int i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1; 450 ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
375 int incr = before ? 1 : 0; 451 int incr = before ? 1 : 0;
376 452
453 xassert (!dir || bidi_cache_last_idx >= 0);
454
377 if (!dir) 455 if (!dir)
378 dir = -1; 456 dir = -1;
379 else if (!incr) 457 else if (!incr)
@@ -381,7 +459,7 @@ bidi_cache_find_level_change (int level, int dir, int before)
381 459
382 if (dir < 0) 460 if (dir < 0)
383 { 461 {
384 while (i >= incr) 462 while (i >= bidi_cache_start + incr)
385 { 463 {
386 if (bidi_cache[i - incr].resolved_level >= 0 464 if (bidi_cache[i - incr].resolved_level >= 0
387 && bidi_cache[i - incr].resolved_level < level) 465 && bidi_cache[i - incr].resolved_level < level)
@@ -405,9 +483,31 @@ bidi_cache_find_level_change (int level, int dir, int before)
405} 483}
406 484
407static inline void 485static inline void
486bidi_cache_ensure_space (ptrdiff_t idx)
487{
488 /* Enlarge the cache as needed. */
489 if (idx >= bidi_cache_size)
490 {
491 /* The bidi cache cannot be larger than the largest Lisp string
492 or buffer. */
493 ptrdiff_t string_or_buffer_bound
494 = max (BUF_BYTES_MAX, STRING_BYTES_BOUND);
495
496 /* Also, it cannot be larger than what C can represent. */
497 ptrdiff_t c_bound
498 = (min (PTRDIFF_MAX, SIZE_MAX) - bidi_shelve_header_size) / elsz;
499
500 bidi_cache
501 = xpalloc (bidi_cache, &bidi_cache_size,
502 max (BIDI_CACHE_CHUNK, idx - bidi_cache_size + 1),
503 min (string_or_buffer_bound, c_bound), elsz);
504 }
505}
506
507static inline void
408bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved) 508bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
409{ 509{
410 int idx; 510 ptrdiff_t idx;
411 511
412 /* We should never cache on backward scans. */ 512 /* We should never cache on backward scans. */
413 if (bidi_it->scan_dir == -1) 513 if (bidi_it->scan_dir == -1)
@@ -417,23 +517,17 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
417 if (idx < 0) 517 if (idx < 0)
418 { 518 {
419 idx = bidi_cache_idx; 519 idx = bidi_cache_idx;
420 /* Enlarge the cache as needed. */ 520 bidi_cache_ensure_space (idx);
421 if (idx >= bidi_cache_size)
422 {
423 bidi_cache_size += BIDI_CACHE_CHUNK;
424 bidi_cache =
425 (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
426 }
427 /* Character positions should correspond to cache positions 1:1. 521 /* Character positions should correspond to cache positions 1:1.
428 If we are outside the range of cached positions, the cache is 522 If we are outside the range of cached positions, the cache is
429 useless and must be reset. */ 523 useless and must be reset. */
430 if (idx > 0 && 524 if (idx > bidi_cache_start &&
431 (bidi_it->charpos > (bidi_cache[idx - 1].charpos 525 (bidi_it->charpos > (bidi_cache[idx - 1].charpos
432 + bidi_cache[idx - 1].nchars) 526 + bidi_cache[idx - 1].nchars)
433 || bidi_it->charpos < bidi_cache[0].charpos)) 527 || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos))
434 { 528 {
435 bidi_cache_reset (); 529 bidi_cache_reset ();
436 idx = 0; 530 idx = bidi_cache_start;
437 } 531 }
438 if (bidi_it->nchars <= 0) 532 if (bidi_it->nchars <= 0)
439 abort (); 533 abort ();
@@ -458,6 +552,8 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
458 bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral; 552 bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
459 bidi_cache[idx].next_for_ws = bidi_it->next_for_ws; 553 bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
460 bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit; 554 bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
555 bidi_cache[idx].disp_pos = bidi_it->disp_pos;
556 bidi_cache[idx].disp_prop = bidi_it->disp_prop;
461 } 557 }
462 558
463 bidi_cache_last_idx = idx; 559 bidi_cache_last_idx = idx;
@@ -468,15 +564,15 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
468static inline bidi_type_t 564static inline bidi_type_t
469bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it) 565bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it)
470{ 566{
471 int i = bidi_cache_search (charpos, level, bidi_it->scan_dir); 567 ptrdiff_t i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
472 568
473 if (i >= 0) 569 if (i >= bidi_cache_start)
474 { 570 {
475 bidi_dir_t current_scan_dir = bidi_it->scan_dir; 571 bidi_dir_t current_scan_dir = bidi_it->scan_dir;
476 572
477 bidi_copy_it (bidi_it, &bidi_cache[i]); 573 bidi_copy_it (bidi_it, &bidi_cache[i]);
478 bidi_cache_last_idx = i; 574 bidi_cache_last_idx = i;
479 /* Don't let scan direction from from the cached state override 575 /* Don't let scan direction from the cached state override
480 the current scan direction. */ 576 the current scan direction. */
481 bidi_it->scan_dir = current_scan_dir; 577 bidi_it->scan_dir = current_scan_dir;
482 return bidi_it->type; 578 return bidi_it->type;
@@ -488,69 +584,257 @@ bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it)
488static inline int 584static inline int
489bidi_peek_at_next_level (struct bidi_it *bidi_it) 585bidi_peek_at_next_level (struct bidi_it *bidi_it)
490{ 586{
491 if (bidi_cache_idx == 0 || bidi_cache_last_idx == -1) 587 if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1)
492 abort (); 588 abort ();
493 return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level; 589 return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
494} 590}
495 591
496/* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph. 592
497 Value is the non-negative length of the paragraph separator 593/***********************************************************************
498 following the buffer position, -1 if position is at the beginning 594 Pushing and popping the bidi iterator state
499 of a new paragraph, or -2 if position is neither at beginning nor 595 ***********************************************************************/
500 at end of a paragraph. */ 596
501static EMACS_INT 597/* Push the bidi iterator state in preparation for reordering a
502bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos) 598 different object, e.g. display string found at certain buffer
599 position. Pushing the bidi iterator boils down to saving its
600 entire state on the cache and starting a new cache "stacked" on top
601 of the current cache. */
602void
603bidi_push_it (struct bidi_it *bidi_it)
503{ 604{
504 Lisp_Object sep_re; 605 /* Save the current iterator state in its entirety after the last
505 Lisp_Object start_re; 606 used cache slot. */
506 EMACS_INT val; 607 bidi_cache_ensure_space (bidi_cache_idx);
608 memcpy (&bidi_cache[bidi_cache_idx++], bidi_it, sizeof (struct bidi_it));
507 609
508 sep_re = paragraph_separate_re; 610 /* Push the current cache start onto the stack. */
509 start_re = paragraph_start_re; 611 xassert (bidi_cache_sp < IT_STACK_SIZE);
612 bidi_cache_start_stack[bidi_cache_sp++] = bidi_cache_start;
510 613
511 val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil); 614 /* Start a new level of cache, and make it empty. */
512 if (val < 0) 615 bidi_cache_start = bidi_cache_idx;
616 bidi_cache_last_idx = -1;
617}
618
619/* Restore the iterator state saved by bidi_push_it and return the
620 cache to the corresponding state. */
621void
622bidi_pop_it (struct bidi_it *bidi_it)
623{
624 if (bidi_cache_start <= 0)
625 abort ();
626
627 /* Reset the next free cache slot index to what it was before the
628 call to bidi_push_it. */
629 bidi_cache_idx = bidi_cache_start - 1;
630
631 /* Restore the bidi iterator state saved in the cache. */
632 memcpy (bidi_it, &bidi_cache[bidi_cache_idx], sizeof (struct bidi_it));
633
634 /* Pop the previous cache start from the stack. */
635 if (bidi_cache_sp <= 0)
636 abort ();
637 bidi_cache_start = bidi_cache_start_stack[--bidi_cache_sp];
638
639 /* Invalidate the last-used cache slot data. */
640 bidi_cache_last_idx = -1;
641}
642
643static ptrdiff_t bidi_cache_total_alloc;
644
645/* Stash away a copy of the cache and its control variables. */
646void *
647bidi_shelve_cache (void)
648{
649 unsigned char *databuf;
650 ptrdiff_t alloc;
651
652 /* Empty cache. */
653 if (bidi_cache_idx == 0)
654 return NULL;
655
656 alloc = (bidi_shelve_header_size
657 + bidi_cache_idx * sizeof (struct bidi_it));
658 databuf = xmalloc (alloc);
659 bidi_cache_total_alloc += alloc;
660
661 memcpy (databuf, &bidi_cache_idx, sizeof (bidi_cache_idx));
662 memcpy (databuf + sizeof (bidi_cache_idx),
663 bidi_cache, bidi_cache_idx * sizeof (struct bidi_it));
664 memcpy (databuf + sizeof (bidi_cache_idx)
665 + bidi_cache_idx * sizeof (struct bidi_it),
666 bidi_cache_start_stack, sizeof (bidi_cache_start_stack));
667 memcpy (databuf + sizeof (bidi_cache_idx)
668 + bidi_cache_idx * sizeof (struct bidi_it)
669 + sizeof (bidi_cache_start_stack),
670 &bidi_cache_sp, sizeof (bidi_cache_sp));
671 memcpy (databuf + sizeof (bidi_cache_idx)
672 + bidi_cache_idx * sizeof (struct bidi_it)
673 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
674 &bidi_cache_start, sizeof (bidi_cache_start));
675 memcpy (databuf + sizeof (bidi_cache_idx)
676 + bidi_cache_idx * sizeof (struct bidi_it)
677 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
678 + sizeof (bidi_cache_start),
679 &bidi_cache_last_idx, sizeof (bidi_cache_last_idx));
680
681 return databuf;
682}
683
684/* Restore the cache state from a copy stashed away by
685 bidi_shelve_cache, and free the buffer used to stash that copy.
686 JUST_FREE non-zero means free the buffer, but don't restore the
687 cache; used when the corresponding iterator is discarded instead of
688 being restored. */
689void
690bidi_unshelve_cache (void *databuf, int just_free)
691{
692 unsigned char *p = databuf;
693
694 if (!p)
513 { 695 {
514 if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0) 696 if (!just_free)
515 val = -1; 697 {
698 /* A NULL pointer means an empty cache. */
699 bidi_cache_start = 0;
700 bidi_cache_sp = 0;
701 bidi_cache_reset ();
702 }
703 }
704 else
705 {
706 if (just_free)
707 {
708 ptrdiff_t idx;
709
710 memcpy (&idx, p, sizeof (bidi_cache_idx));
711 bidi_cache_total_alloc
712 -= bidi_shelve_header_size + idx * sizeof (struct bidi_it);
713 }
516 else 714 else
517 val = -2; 715 {
716 memcpy (&bidi_cache_idx, p, sizeof (bidi_cache_idx));
717 bidi_cache_ensure_space (bidi_cache_idx);
718 memcpy (bidi_cache, p + sizeof (bidi_cache_idx),
719 bidi_cache_idx * sizeof (struct bidi_it));
720 memcpy (bidi_cache_start_stack,
721 p + sizeof (bidi_cache_idx)
722 + bidi_cache_idx * sizeof (struct bidi_it),
723 sizeof (bidi_cache_start_stack));
724 memcpy (&bidi_cache_sp,
725 p + sizeof (bidi_cache_idx)
726 + bidi_cache_idx * sizeof (struct bidi_it)
727 + sizeof (bidi_cache_start_stack),
728 sizeof (bidi_cache_sp));
729 memcpy (&bidi_cache_start,
730 p + sizeof (bidi_cache_idx)
731 + bidi_cache_idx * sizeof (struct bidi_it)
732 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
733 sizeof (bidi_cache_start));
734 memcpy (&bidi_cache_last_idx,
735 p + sizeof (bidi_cache_idx)
736 + bidi_cache_idx * sizeof (struct bidi_it)
737 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
738 + sizeof (bidi_cache_start),
739 sizeof (bidi_cache_last_idx));
740 bidi_cache_total_alloc
741 -= (bidi_shelve_header_size
742 + bidi_cache_idx * sizeof (struct bidi_it));
743 }
744
745 xfree (p);
518 } 746 }
747}
519 748
520 return val; 749
750/***********************************************************************
751 Initialization
752 ***********************************************************************/
753static void
754bidi_initialize (void)
755{
756 bidi_type_table = uniprop_table (intern ("bidi-class"));
757 if (NILP (bidi_type_table))
758 abort ();
759 staticpro (&bidi_type_table);
760
761 bidi_mirror_table = uniprop_table (intern ("mirroring"));
762 if (NILP (bidi_mirror_table))
763 abort ();
764 staticpro (&bidi_mirror_table);
765
766 Qparagraph_start = intern ("paragraph-start");
767 staticpro (&Qparagraph_start);
768 paragraph_start_re = Fsymbol_value (Qparagraph_start);
769 if (!STRINGP (paragraph_start_re))
770 paragraph_start_re = build_string ("\f\\|[ \t]*$");
771 staticpro (&paragraph_start_re);
772 Qparagraph_separate = intern ("paragraph-separate");
773 staticpro (&Qparagraph_separate);
774 paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
775 if (!STRINGP (paragraph_separate_re))
776 paragraph_separate_re = build_string ("[ \t\f]*$");
777 staticpro (&paragraph_separate_re);
778
779 bidi_cache_sp = 0;
780 bidi_cache_total_alloc = 0;
781
782 bidi_initialized = 1;
521} 783}
522 784
523/* Determine the start-of-run (sor) directional type given the two 785/* Do whatever UAX#9 clause X8 says should be done at paragraph's
524 embedding levels on either side of the run boundary. Also, update 786 end. */
525 the saved info about previously seen characters, since that info is
526 generally valid for a single level run. */
527static inline void 787static inline void
528bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after) 788bidi_set_paragraph_end (struct bidi_it *bidi_it)
529{ 789{
530 int higher_level = level_before > level_after ? level_before : level_after; 790 bidi_it->invalid_levels = 0;
531 791 bidi_it->invalid_rl_levels = -1;
532 /* The prev_was_pdf gork is required for when we have several PDFs 792 bidi_it->stack_idx = 0;
533 in a row. In that case, we want to compute the sor type for the 793 bidi_it->resolved_level = bidi_it->level_stack[0].level;
534 next level run only once: when we see the first PDF. That's 794}
535 because the sor type depends only on the higher of the two levels
536 that we find on the two sides of the level boundary (see UAX#9,
537 clause X10), and so we don't need to know the final embedding
538 level to which we descend after processing all the PDFs. */
539 if (!bidi_it->prev_was_pdf || level_before < level_after)
540 /* FIXME: should the default sor direction be user selectable? */
541 bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
542 if (level_before > level_after)
543 bidi_it->prev_was_pdf = 1;
544 795
545 bidi_it->prev.type = UNKNOWN_BT; 796/* Initialize the bidi iterator from buffer/string position CHARPOS. */
546 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 = 797void
547 bidi_it->last_strong.orig_type = UNKNOWN_BT; 798bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, int frame_window_p,
548 bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L; 799 struct bidi_it *bidi_it)
549 bidi_it->prev_for_neutral.charpos = bidi_it->charpos; 800{
550 bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos; 801 if (! bidi_initialized)
551 bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 = 802 bidi_initialize ();
552 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT; 803 if (charpos >= 0)
553 bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */ 804 bidi_it->charpos = charpos;
805 if (bytepos >= 0)
806 bidi_it->bytepos = bytepos;
807 bidi_it->frame_window_p = frame_window_p;
808 bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */
809 bidi_it->first_elt = 1;
810 bidi_set_paragraph_end (bidi_it);
811 bidi_it->new_paragraph = 1;
812 bidi_it->separator_limit = -1;
813 bidi_it->type = NEUTRAL_B;
814 bidi_it->type_after_w1 = NEUTRAL_B;
815 bidi_it->orig_type = NEUTRAL_B;
816 bidi_it->prev_was_pdf = 0;
817 bidi_it->prev.type = bidi_it->prev.type_after_w1
818 = bidi_it->prev.orig_type = UNKNOWN_BT;
819 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1
820 = bidi_it->last_strong.orig_type = UNKNOWN_BT;
821 bidi_it->next_for_neutral.charpos = -1;
822 bidi_it->next_for_neutral.type
823 = bidi_it->next_for_neutral.type_after_w1
824 = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
825 bidi_it->prev_for_neutral.charpos = -1;
826 bidi_it->prev_for_neutral.type
827 = bidi_it->prev_for_neutral.type_after_w1
828 = bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
829 bidi_it->sor = L2R; /* FIXME: should it be user-selectable? */
830 bidi_it->disp_pos = -1; /* invalid/unknown */
831 bidi_it->disp_prop = 0;
832 /* We can only shrink the cache if we are at the bottom level of its
833 "stack". */
834 if (bidi_cache_start == 0)
835 bidi_cache_shrink ();
836 else
837 bidi_cache_reset ();
554} 838}
555 839
556/* Perform initializations for reordering a new line of bidi text. */ 840/* Perform initializations for reordering a new line of bidi text. */
@@ -562,44 +846,113 @@ bidi_line_init (struct bidi_it *bidi_it)
562 bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */ 846 bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
563 bidi_it->invalid_levels = 0; 847 bidi_it->invalid_levels = 0;
564 bidi_it->invalid_rl_levels = -1; 848 bidi_it->invalid_rl_levels = -1;
565 bidi_it->next_en_pos = -1; 849 /* Setting this to zero will force its recomputation the first time
850 we need it for W5. */
851 bidi_it->next_en_pos = 0;
852 bidi_it->next_en_type = UNKNOWN_BT;
566 bidi_it->next_for_ws.type = UNKNOWN_BT; 853 bidi_it->next_for_ws.type = UNKNOWN_BT;
567 bidi_set_sor_type (bidi_it, 854 bidi_set_sor_type (bidi_it,
568 bidi_it->paragraph_dir == R2L ? 1 : 0, 855 (bidi_it->paragraph_dir == R2L ? 1 : 0),
569 bidi_it->level_stack[0].level); /* X10 */ 856 bidi_it->level_stack[0].level); /* X10 */
570 857
571 bidi_cache_reset (); 858 bidi_cache_reset ();
572} 859}
573 860
861
862/***********************************************************************
863 Fetching characters
864 ***********************************************************************/
865
866/* Count bytes in string S between BEG/BEGBYTE and END. BEG and END
867 are zero-based character positions in S, BEGBYTE is byte position
868 corresponding to BEG. UNIBYTE, if non-zero, means S is a unibyte
869 string. */
870static inline EMACS_INT
871bidi_count_bytes (const unsigned char *s, const EMACS_INT beg,
872 const EMACS_INT begbyte, const EMACS_INT end, int unibyte)
873{
874 EMACS_INT pos = beg;
875 const unsigned char *p = s + begbyte, *start = p;
876
877 if (unibyte)
878 p = s + end;
879 else
880 {
881 if (!CHAR_HEAD_P (*p))
882 abort ();
883
884 while (pos < end)
885 {
886 p += BYTES_BY_CHAR_HEAD (*p);
887 pos++;
888 }
889 }
890
891 return p - start;
892}
893
894/* Fetch and returns the character at byte position BYTEPOS. If S is
895 non-NULL, fetch the character from string S; otherwise fetch the
896 character from the current buffer. UNIBYTE non-zero means S is a
897 unibyte string. */
898static inline int
899bidi_char_at_pos (EMACS_INT bytepos, const unsigned char *s, int unibyte)
900{
901 if (s)
902 {
903 if (unibyte)
904 return s[bytepos];
905 else
906 return STRING_CHAR (s + bytepos);
907 }
908 else
909 return FETCH_MULTIBYTE_CHAR (bytepos);
910}
911
574/* Fetch and return the character at BYTEPOS/CHARPOS. If that 912/* Fetch and return the character at BYTEPOS/CHARPOS. If that
575 character is covered by a display string, treat the entire run of 913 character is covered by a display string, treat the entire run of
576 covered characters as a single character u+FFFC, and return their 914 covered characters as a single character, either u+2029 or u+FFFC,
577 combined length in CH_LEN and NCHARS. DISP_POS specifies the 915 and return their combined length in CH_LEN and NCHARS. DISP_POS
578 character position of the next display string, or -1 if not yet 916 specifies the character position of the next display string, or -1
579 computed. When the next character is at or beyond that position, 917 if not yet computed. When the next character is at or beyond that
580 the function updates DISP_POS with the position of the next display 918 position, the function updates DISP_POS with the position of the
581 string. */ 919 next display string. DISP_PROP non-zero means that there's really
920 a display string at DISP_POS, as opposed to when we searched till
921 DISP_POS without finding one. If DISP_PROP is 2, it means the
922 display spec is of the form `(space ...)', which is replaced with
923 u+2029 to handle it as a paragraph separator. STRING->s is the C
924 string to iterate, or NULL if iterating over a buffer or a Lisp
925 string; in the latter case, STRING->lstring is the Lisp string. */
582static inline int 926static inline int
583bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos, 927bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
928 int *disp_prop, struct bidi_string_data *string,
584 int frame_window_p, EMACS_INT *ch_len, EMACS_INT *nchars) 929 int frame_window_p, EMACS_INT *ch_len, EMACS_INT *nchars)
585{ 930{
586 int ch; 931 int ch;
932 EMACS_INT endpos
933 = (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
934 struct text_pos pos;
935 int len;
587 936
588 /* FIXME: Support strings in addition to buffers. */
589 /* If we got past the last known position of display string, compute 937 /* If we got past the last known position of display string, compute
590 the position of the next one. That position could be at BYTEPOS. */ 938 the position of the next one. That position could be at CHARPOS. */
591 if (charpos < ZV && charpos > *disp_pos) 939 if (charpos < endpos && charpos > *disp_pos)
592 *disp_pos = compute_display_string_pos (charpos, frame_window_p); 940 {
941 SET_TEXT_POS (pos, charpos, bytepos);
942 *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
943 disp_prop);
944 }
593 945
594 /* Fetch the character at BYTEPOS. */ 946 /* Fetch the character at BYTEPOS. */
595 if (bytepos >= ZV_BYTE) 947 if (charpos >= endpos)
596 { 948 {
597 ch = BIDI_EOB; 949 ch = BIDI_EOB;
598 *ch_len = 1; 950 *ch_len = 1;
599 *nchars = 1; 951 *nchars = 1;
600 *disp_pos = ZV; 952 *disp_pos = endpos;
953 *disp_prop = 0;
601 } 954 }
602 else if (charpos >= *disp_pos) 955 else if (charpos >= *disp_pos && *disp_prop)
603 { 956 {
604 EMACS_INT disp_end_pos; 957 EMACS_INT disp_end_pos;
605 958
@@ -607,38 +960,148 @@ bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
607 property. Hopefully, it will never be needed. */ 960 property. Hopefully, it will never be needed. */
608 if (charpos > *disp_pos) 961 if (charpos > *disp_pos)
609 abort (); 962 abort ();
610 /* Return the Unicode Object Replacement Character to represent 963 /* Text covered by `display' properties and overlays with
611 the entire run of characters covered by the display 964 display properties or display strings is handled as a single
612 string. */ 965 character that represents the entire run of characters
613 ch = 0xFFFC; 966 covered by the display property. */
614 disp_end_pos = compute_display_string_end (*disp_pos); 967 if (*disp_prop == 2)
968 {
969 /* `(space ...)' display specs are handled as paragraph
970 separators for the purposes of the reordering; see UAX#9
971 section 3 and clause HL1 in section 4.3 there. */
972 ch = 0x2029;
973 }
974 else
975 {
976 /* All other display specs are handled as the Unicode Object
977 Replacement Character. */
978 ch = 0xFFFC;
979 }
980 disp_end_pos = compute_display_string_end (*disp_pos, string);
981 if (disp_end_pos < 0)
982 {
983 /* Somebody removed the display string from the buffer
984 behind our back. Recover by processing this buffer
985 position as if no display property were present there to
986 begin with. */
987 *disp_prop = 0;
988 goto normal_char;
989 }
615 *nchars = disp_end_pos - *disp_pos; 990 *nchars = disp_end_pos - *disp_pos;
616 *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos; 991 if (*nchars <= 0)
992 abort ();
993 if (string->s)
994 *ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
995 disp_end_pos, string->unibyte);
996 else if (STRINGP (string->lstring))
997 *ch_len = bidi_count_bytes (SDATA (string->lstring), *disp_pos,
998 bytepos, disp_end_pos, string->unibyte);
999 else
1000 *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
617 } 1001 }
618 else 1002 else
619 { 1003 {
620 ch = FETCH_MULTIBYTE_CHAR (bytepos); 1004 normal_char:
1005 if (string->s)
1006 {
1007
1008 if (!string->unibyte)
1009 {
1010 ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
1011 *ch_len = len;
1012 }
1013 else
1014 {
1015 ch = UNIBYTE_TO_CHAR (string->s[bytepos]);
1016 *ch_len = 1;
1017 }
1018 }
1019 else if (STRINGP (string->lstring))
1020 {
1021 if (!string->unibyte)
1022 {
1023 ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos,
1024 len);
1025 *ch_len = len;
1026 }
1027 else
1028 {
1029 ch = UNIBYTE_TO_CHAR (SREF (string->lstring, bytepos));
1030 *ch_len = 1;
1031 }
1032 }
1033 else
1034 {
1035 ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (bytepos), len);
1036 *ch_len = len;
1037 }
621 *nchars = 1; 1038 *nchars = 1;
622 *ch_len = CHAR_BYTES (ch);
623 } 1039 }
624 1040
625 /* If we just entered a run of characters covered by a display 1041 /* If we just entered a run of characters covered by a display
626 string, compute the position of the next display string. */ 1042 string, compute the position of the next display string. */
627 if (charpos + *nchars <= ZV && charpos + *nchars > *disp_pos) 1043 if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos
628 *disp_pos = compute_display_string_pos (charpos + *nchars, frame_window_p); 1044 && *disp_prop)
1045 {
1046 SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
1047 *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
1048 disp_prop);
1049 }
629 1050
630 return ch; 1051 return ch;
631} 1052}
632 1053
1054
1055/***********************************************************************
1056 Determining paragraph direction
1057 ***********************************************************************/
1058
1059/* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
1060 Value is the non-negative length of the paragraph separator
1061 following the buffer position, -1 if position is at the beginning
1062 of a new paragraph, or -2 if position is neither at beginning nor
1063 at end of a paragraph. */
1064static EMACS_INT
1065bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
1066{
1067 Lisp_Object sep_re;
1068 Lisp_Object start_re;
1069 EMACS_INT val;
1070
1071 sep_re = paragraph_separate_re;
1072 start_re = paragraph_start_re;
1073
1074 val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
1075 if (val < 0)
1076 {
1077 if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
1078 val = -1;
1079 else
1080 val = -2;
1081 }
1082
1083 return val;
1084}
1085
1086/* On my 2005-vintage machine, searching back for paragraph start
1087 takes ~1 ms per line. And bidi_paragraph_init is called 4 times
1088 when user types C-p. The number below limits each call to
1089 bidi_paragraph_init to about 10 ms. */
1090#define MAX_PARAGRAPH_SEARCH 7500
1091
633/* Find the beginning of this paragraph by looking back in the buffer. 1092/* Find the beginning of this paragraph by looking back in the buffer.
634 Value is the byte position of the paragraph's beginning. */ 1093 Value is the byte position of the paragraph's beginning, or
1094 BEGV_BYTE if paragraph_start_re is still not found after looking
1095 back MAX_PARAGRAPH_SEARCH lines in the buffer. */
635static EMACS_INT 1096static EMACS_INT
636bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte) 1097bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
637{ 1098{
638 Lisp_Object re = paragraph_start_re; 1099 Lisp_Object re = paragraph_start_re;
639 EMACS_INT limit = ZV, limit_byte = ZV_BYTE; 1100 EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
1101 EMACS_INT n = 0;
640 1102
641 while (pos_byte > BEGV_BYTE 1103 while (pos_byte > BEGV_BYTE
1104 && n++ < MAX_PARAGRAPH_SEARCH
642 && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0) 1105 && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
643 { 1106 {
644 /* FIXME: What if the paragraph beginning is covered by a 1107 /* FIXME: What if the paragraph beginning is covered by a
@@ -648,6 +1111,8 @@ bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
648 pos = find_next_newline_no_quit (pos - 1, -1); 1111 pos = find_next_newline_no_quit (pos - 1, -1);
649 pos_byte = CHAR_TO_BYTE (pos); 1112 pos_byte = CHAR_TO_BYTE (pos);
650 } 1113 }
1114 if (n >= MAX_PARAGRAPH_SEARCH)
1115 pos_byte = BEGV_BYTE;
651 return pos_byte; 1116 return pos_byte;
652} 1117}
653 1118
@@ -665,18 +1130,24 @@ bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
665 1130
666 Note that this function gives the paragraph separator the same 1131 Note that this function gives the paragraph separator the same
667 direction as the preceding paragraph, even though Emacs generally 1132 direction as the preceding paragraph, even though Emacs generally
668 views the separartor as not belonging to any paragraph. */ 1133 views the separator as not belonging to any paragraph. */
669void 1134void
670bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p) 1135bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
671{ 1136{
672 EMACS_INT bytepos = bidi_it->bytepos; 1137 EMACS_INT bytepos = bidi_it->bytepos;
1138 int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
673 EMACS_INT pstartbyte; 1139 EMACS_INT pstartbyte;
1140 /* Note that begbyte is a byte position, while end is a character
1141 position. Yes, this is ugly, but we are trying to avoid costly
1142 calls to BYTE_TO_CHAR and its ilk. */
1143 EMACS_INT begbyte = string_p ? 0 : BEGV_BYTE;
1144 EMACS_INT end = string_p ? bidi_it->string.schars : ZV;
674 1145
675 /* Special case for an empty buffer. */ 1146 /* Special case for an empty buffer. */
676 if (bytepos == BEGV_BYTE && bytepos == ZV_BYTE) 1147 if (bytepos == begbyte && bidi_it->charpos == end)
677 dir = L2R; 1148 dir = L2R;
678 /* We should never be called at EOB or before BEGV. */ 1149 /* We should never be called at EOB or before BEGV. */
679 else if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE) 1150 else if (bidi_it->charpos >= end || bytepos < begbyte)
680 abort (); 1151 abort ();
681 1152
682 if (dir == L2R) 1153 if (dir == L2R)
@@ -694,7 +1165,9 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
694 int ch; 1165 int ch;
695 EMACS_INT ch_len, nchars; 1166 EMACS_INT ch_len, nchars;
696 EMACS_INT pos, disp_pos = -1; 1167 EMACS_INT pos, disp_pos = -1;
1168 int disp_prop = 0;
697 bidi_type_t type; 1169 bidi_type_t type;
1170 const unsigned char *s;
698 1171
699 if (!bidi_initialized) 1172 if (!bidi_initialized)
700 bidi_initialize (); 1173 bidi_initialize ();
@@ -712,7 +1185,11 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
712 we are potentially in a new paragraph that doesn't yet 1185 we are potentially in a new paragraph that doesn't yet
713 exist. */ 1186 exist. */
714 pos = bidi_it->charpos; 1187 pos = bidi_it->charpos;
715 if (bytepos > BEGV_BYTE && FETCH_CHAR (bytepos) == '\n') 1188 s = (STRINGP (bidi_it->string.lstring)
1189 ? SDATA (bidi_it->string.lstring)
1190 : bidi_it->string.s);
1191 if (bytepos > begbyte
1192 && bidi_char_at_pos (bytepos, s, bidi_it->string.unibyte) == '\n')
716 { 1193 {
717 bytepos++; 1194 bytepos++;
718 pos++; 1195 pos++;
@@ -720,50 +1197,63 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
720 1197
721 /* We are either at the beginning of a paragraph or in the 1198 /* We are either at the beginning of a paragraph or in the
722 middle of it. Find where this paragraph starts. */ 1199 middle of it. Find where this paragraph starts. */
723 pstartbyte = bidi_find_paragraph_start (pos, bytepos); 1200 if (string_p)
1201 {
1202 /* We don't support changes of paragraph direction inside a
1203 string. It is treated as a single paragraph. */
1204 pstartbyte = 0;
1205 }
1206 else
1207 pstartbyte = bidi_find_paragraph_start (pos, bytepos);
724 bidi_it->separator_limit = -1; 1208 bidi_it->separator_limit = -1;
725 bidi_it->new_paragraph = 0; 1209 bidi_it->new_paragraph = 0;
726 1210
727 /* The following loop is run more than once only if NO_DEFAULT_P 1211 /* The following loop is run more than once only if NO_DEFAULT_P
728 is non-zero. */ 1212 is non-zero, and only if we are iterating on a buffer. */
729 do { 1213 do {
730 bytepos = pstartbyte; 1214 bytepos = pstartbyte;
731 pos = BYTE_TO_CHAR (bytepos); 1215 if (!string_p)
732 ch = bidi_fetch_char (bytepos, pos, &disp_pos, bidi_it->frame_window_p, 1216 pos = BYTE_TO_CHAR (bytepos);
733 &ch_len, &nchars); 1217 ch = bidi_fetch_char (bytepos, pos, &disp_pos, &disp_prop,
1218 &bidi_it->string,
1219 bidi_it->frame_window_p, &ch_len, &nchars);
734 type = bidi_get_type (ch, NEUTRAL_DIR); 1220 type = bidi_get_type (ch, NEUTRAL_DIR);
735 1221
736 for (pos += nchars, bytepos += ch_len; 1222 for (pos += nchars, bytepos += ch_len;
737 /* NOTE: UAX#9 says to search only for L, AL, or R types
738 of characters, and ignore RLE, RLO, LRE, and LRO.
739 However, I'm not sure it makes sense to omit those 4;
740 should try with and without that to see the effect. */
741 (bidi_get_category (type) != STRONG) 1223 (bidi_get_category (type) != STRONG)
742 || (bidi_ignore_explicit_marks_for_paragraph_level 1224 || (bidi_ignore_explicit_marks_for_paragraph_level
743 && (type == RLE || type == RLO 1225 && (type == RLE || type == RLO
744 || type == LRE || type == LRO)); 1226 || type == LRE || type == LRO));
745 type = bidi_get_type (ch, NEUTRAL_DIR)) 1227 type = bidi_get_type (ch, NEUTRAL_DIR))
746 { 1228 {
747 if (bytepos >= ZV_BYTE) 1229 if (pos >= end)
748 { 1230 {
749 /* Pretend there's a paragraph separator at end of 1231 /* Pretend there's a paragraph separator at end of
750 buffer. */ 1232 buffer/string. */
751 type = NEUTRAL_B; 1233 type = NEUTRAL_B;
752 break; 1234 break;
753 } 1235 }
754 if (type == NEUTRAL_B && bidi_at_paragraph_end (pos, bytepos) >= -1) 1236 if (!string_p
1237 && type == NEUTRAL_B
1238 && bidi_at_paragraph_end (pos, bytepos) >= -1)
755 break; 1239 break;
756 /* Fetch next character and advance to get past it. */ 1240 /* Fetch next character and advance to get past it. */
757 ch = bidi_fetch_char (bytepos, pos, &disp_pos, 1241 ch = bidi_fetch_char (bytepos, pos, &disp_pos,
1242 &disp_prop, &bidi_it->string,
758 bidi_it->frame_window_p, &ch_len, &nchars); 1243 bidi_it->frame_window_p, &ch_len, &nchars);
759 pos += nchars; 1244 pos += nchars;
760 bytepos += ch_len; 1245 bytepos += ch_len;
761 } 1246 }
762 if (type == STRONG_R || type == STRONG_AL) /* P3 */ 1247 if ((type == STRONG_R || type == STRONG_AL) /* P3 */
1248 || (!bidi_ignore_explicit_marks_for_paragraph_level
1249 && (type == RLO || type == RLE)))
763 bidi_it->paragraph_dir = R2L; 1250 bidi_it->paragraph_dir = R2L;
764 else if (type == STRONG_L) 1251 else if (type == STRONG_L
1252 || (!bidi_ignore_explicit_marks_for_paragraph_level
1253 && (type == LRO || type == LRE)))
765 bidi_it->paragraph_dir = L2R; 1254 bidi_it->paragraph_dir = L2R;
766 if (no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR) 1255 if (!string_p
1256 && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
767 { 1257 {
768 /* If this paragraph is at BEGV, default to L2R. */ 1258 /* If this paragraph is at BEGV, default to L2R. */
769 if (pstartbyte == BEGV_BYTE) 1259 if (pstartbyte == BEGV_BYTE)
@@ -786,7 +1276,8 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
786 pstartbyte = prevpbyte; 1276 pstartbyte = prevpbyte;
787 } 1277 }
788 } 1278 }
789 } while (no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR); 1279 } while (!string_p
1280 && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
790 } 1281 }
791 else 1282 else
792 abort (); 1283 abort ();
@@ -804,110 +1295,11 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
804 bidi_line_init (bidi_it); 1295 bidi_line_init (bidi_it);
805} 1296}
806 1297
807/* Do whatever UAX#9 clause X8 says should be done at paragraph's 1298
808 end. */ 1299/***********************************************************************
809static inline void 1300 Resolving explicit and implicit levels.
810bidi_set_paragraph_end (struct bidi_it *bidi_it) 1301 The rest of this file constitutes the core of the UBA implementation.
811{ 1302 ***********************************************************************/
812 bidi_it->invalid_levels = 0;
813 bidi_it->invalid_rl_levels = -1;
814 bidi_it->stack_idx = 0;
815 bidi_it->resolved_level = bidi_it->level_stack[0].level;
816}
817
818/* Initialize the bidi iterator from buffer/string position CHARPOS. */
819void
820bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, int frame_window_p,
821 struct bidi_it *bidi_it)
822{
823 if (! bidi_initialized)
824 bidi_initialize ();
825 bidi_it->charpos = charpos;
826 bidi_it->bytepos = bytepos;
827 bidi_it->frame_window_p = frame_window_p;
828 bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */
829 bidi_it->first_elt = 1;
830 bidi_set_paragraph_end (bidi_it);
831 bidi_it->new_paragraph = 1;
832 bidi_it->separator_limit = -1;
833 bidi_it->type = NEUTRAL_B;
834 bidi_it->type_after_w1 = NEUTRAL_B;
835 bidi_it->orig_type = NEUTRAL_B;
836 bidi_it->prev_was_pdf = 0;
837 bidi_it->prev.type = bidi_it->prev.type_after_w1 =
838 bidi_it->prev.orig_type = UNKNOWN_BT;
839 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
840 bidi_it->last_strong.orig_type = UNKNOWN_BT;
841 bidi_it->next_for_neutral.charpos = -1;
842 bidi_it->next_for_neutral.type =
843 bidi_it->next_for_neutral.type_after_w1 =
844 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
845 bidi_it->prev_for_neutral.charpos = -1;
846 bidi_it->prev_for_neutral.type =
847 bidi_it->prev_for_neutral.type_after_w1 =
848 bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
849 bidi_it->sor = L2R; /* FIXME: should it be user-selectable? */
850 bidi_it->disp_pos = -1; /* invalid/unknown */
851 bidi_cache_shrink ();
852}
853
854/* Push the current embedding level and override status; reset the
855 current level to LEVEL and the current override status to OVERRIDE. */
856static inline void
857bidi_push_embedding_level (struct bidi_it *bidi_it,
858 int level, bidi_dir_t override)
859{
860 bidi_it->stack_idx++;
861 if (bidi_it->stack_idx >= BIDI_MAXLEVEL)
862 abort ();
863 bidi_it->level_stack[bidi_it->stack_idx].level = level;
864 bidi_it->level_stack[bidi_it->stack_idx].override = override;
865}
866
867/* Pop the embedding level and directional override status from the
868 stack, and return the new level. */
869static inline int
870bidi_pop_embedding_level (struct bidi_it *bidi_it)
871{
872 /* UAX#9 says to ignore invalid PDFs. */
873 if (bidi_it->stack_idx > 0)
874 bidi_it->stack_idx--;
875 return bidi_it->level_stack[bidi_it->stack_idx].level;
876}
877
878/* Record in SAVED_INFO the information about the current character. */
879static inline void
880bidi_remember_char (struct bidi_saved_info *saved_info,
881 struct bidi_it *bidi_it)
882{
883 saved_info->charpos = bidi_it->charpos;
884 saved_info->bytepos = bidi_it->bytepos;
885 saved_info->type = bidi_it->type;
886 bidi_check_type (bidi_it->type);
887 saved_info->type_after_w1 = bidi_it->type_after_w1;
888 bidi_check_type (bidi_it->type_after_w1);
889 saved_info->orig_type = bidi_it->orig_type;
890 bidi_check_type (bidi_it->orig_type);
891}
892
893/* Resolve the type of a neutral character according to the type of
894 surrounding strong text and the current embedding level. */
895static inline bidi_type_t
896bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
897{
898 /* N1: European and Arabic numbers are treated as though they were R. */
899 if (next_type == WEAK_EN || next_type == WEAK_AN)
900 next_type = STRONG_R;
901 if (prev_type == WEAK_EN || prev_type == WEAK_AN)
902 prev_type = STRONG_R;
903
904 if (next_type == prev_type) /* N1 */
905 return next_type;
906 else if ((lev & 1) == 0) /* N2 */
907 return STRONG_L;
908 else
909 return STRONG_R;
910}
911 1303
912static inline int 1304static inline int
913bidi_explicit_dir_char (int ch) 1305bidi_explicit_dir_char (int ch)
@@ -934,19 +1326,36 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
934 int current_level; 1326 int current_level;
935 int new_level; 1327 int new_level;
936 bidi_dir_t override; 1328 bidi_dir_t override;
1329 int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
937 1330
938 /* If reseat()'ed, don't advance, so as to start iteration from the 1331 /* If reseat()'ed, don't advance, so as to start iteration from the
939 position where we were reseated. bidi_it->bytepos can be less 1332 position where we were reseated. bidi_it->bytepos can be less
940 than BEGV_BYTE after reseat to BEGV. */ 1333 than BEGV_BYTE after reseat to BEGV. */
941 if (bidi_it->bytepos < BEGV_BYTE 1334 if (bidi_it->bytepos < (string_p ? 0 : BEGV_BYTE)
942 || bidi_it->first_elt) 1335 || bidi_it->first_elt)
943 { 1336 {
944 bidi_it->first_elt = 0; 1337 bidi_it->first_elt = 0;
945 if (bidi_it->charpos < BEGV) 1338 if (string_p)
946 bidi_it->charpos = BEGV; 1339 {
947 bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos); 1340 const unsigned char *p
1341 = (STRINGP (bidi_it->string.lstring)
1342 ? SDATA (bidi_it->string.lstring)
1343 : bidi_it->string.s);
1344
1345 if (bidi_it->charpos < 0)
1346 bidi_it->charpos = 0;
1347 bidi_it->bytepos = bidi_count_bytes (p, 0, 0, bidi_it->charpos,
1348 bidi_it->string.unibyte);
1349 }
1350 else
1351 {
1352 if (bidi_it->charpos < BEGV)
1353 bidi_it->charpos = BEGV;
1354 bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
1355 }
948 } 1356 }
949 else if (bidi_it->bytepos < ZV_BYTE) /* don't move at ZV */ 1357 /* Don't move at end of buffer/string. */
1358 else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV))
950 { 1359 {
951 /* Advance to the next character, skipping characters covered by 1360 /* Advance to the next character, skipping characters covered by
952 display strings (nchars > 1). */ 1361 display strings (nchars > 1). */
@@ -962,12 +1371,13 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
962 override = bidi_it->level_stack[bidi_it->stack_idx].override; 1371 override = bidi_it->level_stack[bidi_it->stack_idx].override;
963 new_level = current_level; 1372 new_level = current_level;
964 1373
965 if (bidi_it->bytepos >= ZV_BYTE) 1374 if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV))
966 { 1375 {
967 curchar = BIDI_EOB; 1376 curchar = BIDI_EOB;
968 bidi_it->ch_len = 1; 1377 bidi_it->ch_len = 1;
969 bidi_it->nchars = 1; 1378 bidi_it->nchars = 1;
970 bidi_it->disp_pos = ZV; 1379 bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
1380 bidi_it->disp_prop = 0;
971 } 1381 }
972 else 1382 else
973 { 1383 {
@@ -975,7 +1385,8 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
975 display string, treat the entire run of covered characters as 1385 display string, treat the entire run of covered characters as
976 a single character u+FFFC. */ 1386 a single character u+FFFC. */
977 curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos, 1387 curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
978 &bidi_it->disp_pos, bidi_it->frame_window_p, 1388 &bidi_it->disp_pos, &bidi_it->disp_prop,
1389 &bidi_it->string, bidi_it->frame_window_p,
979 &bidi_it->ch_len, &bidi_it->nchars); 1390 &bidi_it->ch_len, &bidi_it->nchars);
980 } 1391 }
981 bidi_it->ch = curchar; 1392 bidi_it->ch = curchar;
@@ -1000,7 +1411,7 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1000 bidi_it->type_after_w1 = type; 1411 bidi_it->type_after_w1 = type;
1001 bidi_check_type (bidi_it->type_after_w1); 1412 bidi_check_type (bidi_it->type_after_w1);
1002 type = WEAK_BN; /* X9/Retaining */ 1413 type = WEAK_BN; /* X9/Retaining */
1003 if (bidi_it->ignore_bn_limit <= 0) 1414 if (bidi_it->ignore_bn_limit <= -1)
1004 { 1415 {
1005 if (current_level <= BIDI_MAXLEVEL - 4) 1416 if (current_level <= BIDI_MAXLEVEL - 4)
1006 { 1417 {
@@ -1025,7 +1436,8 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1025 } 1436 }
1026 } 1437 }
1027 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */ 1438 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1028 || bidi_it->next_en_pos > bidi_it->charpos) 1439 || (bidi_it->next_en_pos > bidi_it->charpos
1440 && bidi_it->next_en_type == WEAK_EN))
1029 type = WEAK_EN; 1441 type = WEAK_EN;
1030 break; 1442 break;
1031 case LRE: /* X3 */ 1443 case LRE: /* X3 */
@@ -1033,7 +1445,7 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1033 bidi_it->type_after_w1 = type; 1445 bidi_it->type_after_w1 = type;
1034 bidi_check_type (bidi_it->type_after_w1); 1446 bidi_check_type (bidi_it->type_after_w1);
1035 type = WEAK_BN; /* X9/Retaining */ 1447 type = WEAK_BN; /* X9/Retaining */
1036 if (bidi_it->ignore_bn_limit <= 0) 1448 if (bidi_it->ignore_bn_limit <= -1)
1037 { 1449 {
1038 if (current_level <= BIDI_MAXLEVEL - 5) 1450 if (current_level <= BIDI_MAXLEVEL - 5)
1039 { 1451 {
@@ -1061,14 +1473,15 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1061 } 1473 }
1062 } 1474 }
1063 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */ 1475 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1064 || bidi_it->next_en_pos > bidi_it->charpos) 1476 || (bidi_it->next_en_pos > bidi_it->charpos
1477 && bidi_it->next_en_type == WEAK_EN))
1065 type = WEAK_EN; 1478 type = WEAK_EN;
1066 break; 1479 break;
1067 case PDF: /* X7 */ 1480 case PDF: /* X7 */
1068 bidi_it->type_after_w1 = type; 1481 bidi_it->type_after_w1 = type;
1069 bidi_check_type (bidi_it->type_after_w1); 1482 bidi_check_type (bidi_it->type_after_w1);
1070 type = WEAK_BN; /* X9/Retaining */ 1483 type = WEAK_BN; /* X9/Retaining */
1071 if (bidi_it->ignore_bn_limit <= 0) 1484 if (bidi_it->ignore_bn_limit <= -1)
1072 { 1485 {
1073 if (!bidi_it->invalid_rl_levels) 1486 if (!bidi_it->invalid_rl_levels)
1074 { 1487 {
@@ -1087,7 +1500,8 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1087 } 1500 }
1088 } 1501 }
1089 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */ 1502 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1090 || bidi_it->next_en_pos > bidi_it->charpos) 1503 || (bidi_it->next_en_pos > bidi_it->charpos
1504 && bidi_it->next_en_type == WEAK_EN))
1091 type = WEAK_EN; 1505 type = WEAK_EN;
1092 break; 1506 break;
1093 default: 1507 default:
@@ -1111,13 +1525,19 @@ bidi_resolve_explicit (struct bidi_it *bidi_it)
1111{ 1525{
1112 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level; 1526 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1113 int new_level = bidi_resolve_explicit_1 (bidi_it); 1527 int new_level = bidi_resolve_explicit_1 (bidi_it);
1528 EMACS_INT eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
1529 const unsigned char *s
1530 = (STRINGP (bidi_it->string.lstring)
1531 ? SDATA (bidi_it->string.lstring)
1532 : bidi_it->string.s);
1114 1533
1115 if (prev_level < new_level 1534 if (prev_level < new_level
1116 && bidi_it->type == WEAK_BN 1535 && bidi_it->type == WEAK_BN
1117 && bidi_it->ignore_bn_limit == 0 /* only if not already known */ 1536 && bidi_it->ignore_bn_limit == -1 /* only if not already known */
1118 && bidi_it->bytepos < ZV_BYTE /* not already at EOB */ 1537 && bidi_it->charpos < eob /* not already at EOB */
1119 && bidi_explicit_dir_char (FETCH_MULTIBYTE_CHAR (bidi_it->bytepos 1538 && bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1120 + bidi_it->ch_len))) 1539 + bidi_it->ch_len, s,
1540 bidi_it->string.unibyte)))
1121 { 1541 {
1122 /* Avoid pushing and popping embedding levels if the level run 1542 /* Avoid pushing and popping embedding levels if the level run
1123 is empty, as this breaks level runs where it shouldn't. 1543 is empty, as this breaks level runs where it shouldn't.
@@ -1129,12 +1549,17 @@ bidi_resolve_explicit (struct bidi_it *bidi_it)
1129 1549
1130 bidi_copy_it (&saved_it, bidi_it); 1550 bidi_copy_it (&saved_it, bidi_it);
1131 1551
1132 while (bidi_explicit_dir_char (FETCH_MULTIBYTE_CHAR (bidi_it->bytepos 1552 while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1133 + bidi_it->ch_len))) 1553 + bidi_it->ch_len, s,
1554 bidi_it->string.unibyte)))
1134 { 1555 {
1135 /* This advances to the next character, skipping any 1556 /* This advances to the next character, skipping any
1136 characters covered by display strings. */ 1557 characters covered by display strings. */
1137 level = bidi_resolve_explicit_1 (bidi_it); 1558 level = bidi_resolve_explicit_1 (bidi_it);
1559 /* If string.lstring was relocated inside bidi_resolve_explicit_1,
1560 a pointer to its data is no longer valid. */
1561 if (STRINGP (bidi_it->string.lstring))
1562 s = SDATA (bidi_it->string.lstring);
1138 } 1563 }
1139 1564
1140 if (bidi_it->nchars <= 0) 1565 if (bidi_it->nchars <= 0)
@@ -1142,10 +1567,10 @@ bidi_resolve_explicit (struct bidi_it *bidi_it)
1142 if (level == prev_level) /* empty embedding */ 1567 if (level == prev_level) /* empty embedding */
1143 saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars; 1568 saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
1144 else /* this embedding is non-empty */ 1569 else /* this embedding is non-empty */
1145 saved_it.ignore_bn_limit = -1; 1570 saved_it.ignore_bn_limit = -2;
1146 1571
1147 bidi_copy_it (bidi_it, &saved_it); 1572 bidi_copy_it (bidi_it, &saved_it);
1148 if (bidi_it->ignore_bn_limit > 0) 1573 if (bidi_it->ignore_bn_limit > -1)
1149 { 1574 {
1150 /* We pushed a level, but we shouldn't have. Undo that. */ 1575 /* We pushed a level, but we shouldn't have. Undo that. */
1151 if (!bidi_it->invalid_rl_levels) 1576 if (!bidi_it->invalid_rl_levels)
@@ -1188,6 +1613,9 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
1188 int next_char; 1613 int next_char;
1189 bidi_type_t type_of_next; 1614 bidi_type_t type_of_next;
1190 struct bidi_it saved_it; 1615 struct bidi_it saved_it;
1616 EMACS_INT eob
1617 = ((STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
1618 ? bidi_it->string.schars : ZV);
1191 1619
1192 type = bidi_it->type; 1620 type = bidi_it->type;
1193 override = bidi_it->level_stack[bidi_it->stack_idx].override; 1621 override = bidi_it->level_stack[bidi_it->stack_idx].override;
@@ -1254,10 +1682,15 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
1254 && bidi_it->prev.orig_type == WEAK_EN) 1682 && bidi_it->prev.orig_type == WEAK_EN)
1255 || bidi_it->prev.type_after_w1 == WEAK_AN))) 1683 || bidi_it->prev.type_after_w1 == WEAK_AN)))
1256 { 1684 {
1257 next_char = 1685 const unsigned char *s
1258 bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE 1686 = (STRINGP (bidi_it->string.lstring)
1259 ? BIDI_EOB : FETCH_MULTIBYTE_CHAR (bidi_it->bytepos 1687 ? SDATA (bidi_it->string.lstring)
1260 + bidi_it->ch_len); 1688 : bidi_it->string.s);
1689
1690 next_char = (bidi_it->charpos + bidi_it->nchars >= eob
1691 ? BIDI_EOB
1692 : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len,
1693 s, bidi_it->string.unibyte));
1261 type_of_next = bidi_get_type (next_char, override); 1694 type_of_next = bidi_get_type (next_char, override);
1262 1695
1263 if (type_of_next == WEAK_BN 1696 if (type_of_next == WEAK_BN
@@ -1300,19 +1733,28 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
1300 else if (type == WEAK_ET /* W5: ET with EN before or after it */ 1733 else if (type == WEAK_ET /* W5: ET with EN before or after it */
1301 || type == WEAK_BN) /* W5/Retaining */ 1734 || type == WEAK_BN) /* W5/Retaining */
1302 { 1735 {
1303 if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */ 1736 if (bidi_it->prev.type_after_w1 == WEAK_EN) /* ET/BN w/EN before it */
1304 || bidi_it->next_en_pos > bidi_it->charpos)
1305 type = WEAK_EN; 1737 type = WEAK_EN;
1306 else /* W5: ET/BN with EN after it. */ 1738 else if (bidi_it->next_en_pos > bidi_it->charpos
1739 && bidi_it->next_en_type != WEAK_BN)
1740 {
1741 if (bidi_it->next_en_type == WEAK_EN) /* ET/BN with EN after it */
1742 type = WEAK_EN;
1743 }
1744 else if (bidi_it->next_en_pos >=0)
1307 { 1745 {
1308 EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars; 1746 EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars;
1747 const unsigned char *s = (STRINGP (bidi_it->string.lstring)
1748 ? SDATA (bidi_it->string.lstring)
1749 : bidi_it->string.s);
1309 1750
1310 if (bidi_it->nchars <= 0) 1751 if (bidi_it->nchars <= 0)
1311 abort (); 1752 abort ();
1312 next_char = 1753 next_char
1313 bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE 1754 = (bidi_it->charpos + bidi_it->nchars >= eob
1314 ? BIDI_EOB : FETCH_MULTIBYTE_CHAR (bidi_it->bytepos 1755 ? BIDI_EOB
1315 + bidi_it->ch_len); 1756 : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
1757 bidi_it->string.unibyte));
1316 type_of_next = bidi_get_type (next_char, override); 1758 type_of_next = bidi_get_type (next_char, override);
1317 1759
1318 if (type_of_next == WEAK_ET 1760 if (type_of_next == WEAK_ET
@@ -1328,20 +1770,27 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
1328 en_pos = bidi_it->charpos; 1770 en_pos = bidi_it->charpos;
1329 bidi_copy_it (bidi_it, &saved_it); 1771 bidi_copy_it (bidi_it, &saved_it);
1330 } 1772 }
1773 /* Remember this position, to speed up processing of the
1774 next ETs. */
1775 bidi_it->next_en_pos = en_pos;
1331 if (type_of_next == WEAK_EN) 1776 if (type_of_next == WEAK_EN)
1332 { 1777 {
1333 /* If the last strong character is AL, the EN we've 1778 /* If the last strong character is AL, the EN we've
1334 found will become AN when we get to it (W2). */ 1779 found will become AN when we get to it (W2). */
1335 if (bidi_it->last_strong.type_after_w1 != STRONG_AL) 1780 if (bidi_it->last_strong.type_after_w1 == STRONG_AL)
1336 { 1781 type_of_next = WEAK_AN;
1337 type = WEAK_EN;
1338 /* Remember this EN position, to speed up processing
1339 of the next ETs. */
1340 bidi_it->next_en_pos = en_pos;
1341 }
1342 else if (type == WEAK_BN) 1782 else if (type == WEAK_BN)
1343 type = NEUTRAL_ON; /* W6/Retaining */ 1783 type = NEUTRAL_ON; /* W6/Retaining */
1784 else
1785 type = WEAK_EN;
1344 } 1786 }
1787 else if (type_of_next == NEUTRAL_B)
1788 /* Record the fact that there are no more ENs from
1789 here to the end of paragraph, to avoid entering the
1790 loop above ever again in this paragraph. */
1791 bidi_it->next_en_pos = -1;
1792 /* Record the type of the character where we ended our search. */
1793 bidi_it->next_en_type = type_of_next;
1345 } 1794 }
1346 } 1795 }
1347 } 1796 }
@@ -1373,6 +1822,25 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
1373 return type; 1822 return type;
1374} 1823}
1375 1824
1825/* Resolve the type of a neutral character according to the type of
1826 surrounding strong text and the current embedding level. */
1827static inline bidi_type_t
1828bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
1829{
1830 /* N1: European and Arabic numbers are treated as though they were R. */
1831 if (next_type == WEAK_EN || next_type == WEAK_AN)
1832 next_type = STRONG_R;
1833 if (prev_type == WEAK_EN || prev_type == WEAK_AN)
1834 prev_type = STRONG_R;
1835
1836 if (next_type == prev_type) /* N1 */
1837 return next_type;
1838 else if ((lev & 1) == 0) /* N2 */
1839 return STRONG_L;
1840 else
1841 return STRONG_R;
1842}
1843
1376static bidi_type_t 1844static bidi_type_t
1377bidi_resolve_neutral (struct bidi_it *bidi_it) 1845bidi_resolve_neutral (struct bidi_it *bidi_it)
1378{ 1846{
@@ -1391,13 +1859,45 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
1391 || type == NEUTRAL_ON)) 1859 || type == NEUTRAL_ON))
1392 abort (); 1860 abort ();
1393 1861
1394 if (bidi_get_category (type) == NEUTRAL 1862 if ((type != NEUTRAL_B /* Don't risk entering the long loop below if
1863 we are already at paragraph end. */
1864 && bidi_get_category (type) == NEUTRAL)
1395 || (type == WEAK_BN && prev_level == current_level)) 1865 || (type == WEAK_BN && prev_level == current_level))
1396 { 1866 {
1397 if (bidi_it->next_for_neutral.type != UNKNOWN_BT) 1867 if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1398 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, 1868 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1399 bidi_it->next_for_neutral.type, 1869 bidi_it->next_for_neutral.type,
1400 current_level); 1870 current_level);
1871 /* The next two "else if" clauses are shortcuts for the
1872 important special case when we have a long sequence of
1873 neutral or WEAK_BN characters, such as whitespace or nulls or
1874 other control characters, on the base embedding level of the
1875 paragraph, and that sequence goes all the way to the end of
1876 the paragraph and follows a character whose resolved
1877 directionality is identical to the base embedding level.
1878 (This is what happens in a buffer with plain L2R text that
1879 happens to include long sequences of control characters.) By
1880 virtue of N1, the result of examining this long sequence will
1881 always be either STRONG_L or STRONG_R, depending on the base
1882 embedding level. So we use this fact directly instead of
1883 entering the expensive loop in the "else" clause. */
1884 else if (current_level == 0
1885 && bidi_it->prev_for_neutral.type == STRONG_L
1886 && !bidi_explicit_dir_char (bidi_it->ch))
1887 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1888 STRONG_L, current_level);
1889 else if (/* current level is 1 */
1890 current_level == 1
1891 /* base embedding level is also 1 */
1892 && bidi_it->level_stack[0].level == 1
1893 /* previous character is one of those considered R for
1894 the purposes of W5 */
1895 && (bidi_it->prev_for_neutral.type == STRONG_R
1896 || bidi_it->prev_for_neutral.type == WEAK_EN
1897 || bidi_it->prev_for_neutral.type == WEAK_AN)
1898 && !bidi_explicit_dir_char (bidi_it->ch))
1899 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1900 STRONG_R, current_level);
1401 else 1901 else
1402 { 1902 {
1403 /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in 1903 /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in
@@ -1438,8 +1938,8 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
1438 && bidi_get_category (type) != NEUTRAL) 1938 && bidi_get_category (type) != NEUTRAL)
1439 /* This is all per level run, so stop when we 1939 /* This is all per level run, so stop when we
1440 reach the end of this level run. */ 1940 reach the end of this level run. */
1441 || bidi_it->level_stack[bidi_it->stack_idx].level != 1941 || (bidi_it->level_stack[bidi_it->stack_idx].level
1442 current_level)); 1942 != current_level)));
1443 1943
1444 bidi_remember_char (&saved_it.next_for_neutral, bidi_it); 1944 bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1445 1945
@@ -1448,6 +1948,9 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
1448 case STRONG_L: 1948 case STRONG_L:
1449 case STRONG_R: 1949 case STRONG_R:
1450 case STRONG_AL: 1950 case STRONG_AL:
1951 /* Actually, STRONG_AL cannot happen here, because
1952 bidi_resolve_weak converts it to STRONG_R, per W3. */
1953 xassert (type != STRONG_AL);
1451 next_type = type; 1954 next_type = type;
1452 break; 1955 break;
1453 case WEAK_EN: 1956 case WEAK_EN:
@@ -1455,7 +1958,6 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
1455 /* N1: ``European and Arabic numbers are treated as 1958 /* N1: ``European and Arabic numbers are treated as
1456 though they were R.'' */ 1959 though they were R.'' */
1457 next_type = STRONG_R; 1960 next_type = STRONG_R;
1458 saved_it.next_for_neutral.type = STRONG_R;
1459 break; 1961 break;
1460 case WEAK_BN: 1962 case WEAK_BN:
1461 if (!bidi_explicit_dir_char (bidi_it->ch)) 1963 if (!bidi_explicit_dir_char (bidi_it->ch))
@@ -1468,11 +1970,7 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
1468 member. */ 1970 member. */
1469 if (saved_it.type != WEAK_BN 1971 if (saved_it.type != WEAK_BN
1470 || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL) 1972 || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
1471 { 1973 next_type = bidi_it->prev_for_neutral.type;
1472 next_type = bidi_it->prev_for_neutral.type;
1473 saved_it.next_for_neutral.type = next_type;
1474 bidi_check_type (next_type);
1475 }
1476 else 1974 else
1477 { 1975 {
1478 /* This is a BN which does not adjoin neutrals. 1976 /* This is a BN which does not adjoin neutrals.
@@ -1486,7 +1984,9 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
1486 } 1984 }
1487 type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type, 1985 type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1488 next_type, current_level); 1986 next_type, current_level);
1987 saved_it.next_for_neutral.type = next_type;
1489 saved_it.type = type; 1988 saved_it.type = type;
1989 bidi_check_type (next_type);
1490 bidi_check_type (type); 1990 bidi_check_type (type);
1491 bidi_copy_it (bidi_it, &saved_it); 1991 bidi_copy_it (bidi_it, &saved_it);
1492 } 1992 }
@@ -1509,11 +2009,11 @@ bidi_type_of_next_char (struct bidi_it *bidi_it)
1509 2009
1510 /* Reset the limit until which to ignore BNs if we step out of the 2010 /* Reset the limit until which to ignore BNs if we step out of the
1511 area where we found only empty levels. */ 2011 area where we found only empty levels. */
1512 if ((bidi_it->ignore_bn_limit > 0 2012 if ((bidi_it->ignore_bn_limit > -1
1513 && bidi_it->ignore_bn_limit <= bidi_it->charpos) 2013 && bidi_it->ignore_bn_limit <= bidi_it->charpos)
1514 || (bidi_it->ignore_bn_limit == -1 2014 || (bidi_it->ignore_bn_limit == -2
1515 && !bidi_explicit_dir_char (bidi_it->ch))) 2015 && !bidi_explicit_dir_char (bidi_it->ch)))
1516 bidi_it->ignore_bn_limit = 0; 2016 bidi_it->ignore_bn_limit = -1;
1517 2017
1518 type = bidi_resolve_neutral (bidi_it); 2018 type = bidi_resolve_neutral (bidi_it);
1519 2019
@@ -1530,12 +2030,16 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
1530 bidi_type_t type; 2030 bidi_type_t type;
1531 int level, prev_level = -1; 2031 int level, prev_level = -1;
1532 struct bidi_saved_info next_for_neutral; 2032 struct bidi_saved_info next_for_neutral;
1533 EMACS_INT next_char_pos; 2033 EMACS_INT next_char_pos = -2;
1534 2034
1535 if (bidi_it->scan_dir == 1) 2035 if (bidi_it->scan_dir == 1)
1536 { 2036 {
2037 EMACS_INT eob
2038 = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2039 ? bidi_it->string.schars : ZV);
2040
1537 /* There's no sense in trying to advance if we hit end of text. */ 2041 /* There's no sense in trying to advance if we hit end of text. */
1538 if (bidi_it->bytepos >= ZV_BYTE) 2042 if (bidi_it->charpos >= eob)
1539 return bidi_it->resolved_level; 2043 return bidi_it->resolved_level;
1540 2044
1541 /* Record the info about the previous character. */ 2045 /* Record the info about the previous character. */
@@ -1558,7 +2062,10 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
1558 bidi_it->next_for_neutral.type = UNKNOWN_BT; 2062 bidi_it->next_for_neutral.type = UNKNOWN_BT;
1559 if (bidi_it->next_en_pos >= 0 2063 if (bidi_it->next_en_pos >= 0
1560 && bidi_it->charpos >= bidi_it->next_en_pos) 2064 && bidi_it->charpos >= bidi_it->next_en_pos)
1561 bidi_it->next_en_pos = -1; 2065 {
2066 bidi_it->next_en_pos = 0;
2067 bidi_it->next_en_type = UNKNOWN_BT;
2068 }
1562 if (bidi_it->next_for_ws.type != UNKNOWN_BT 2069 if (bidi_it->next_for_ws.type != UNKNOWN_BT
1563 && bidi_it->charpos >= bidi_it->next_for_ws.charpos) 2070 && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
1564 bidi_it->next_for_ws.type = UNKNOWN_BT; 2071 bidi_it->next_for_ws.type = UNKNOWN_BT;
@@ -1575,17 +2082,26 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
1575 /* Perhaps the character we want is already cached. If it is, the 2082 /* Perhaps the character we want is already cached. If it is, the
1576 call to bidi_cache_find below will return a type other than 2083 call to bidi_cache_find below will return a type other than
1577 UNKNOWN_BT. */ 2084 UNKNOWN_BT. */
1578 if (bidi_cache_idx && !bidi_it->first_elt) 2085 if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
1579 { 2086 {
2087 int bob = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2088 ? 0 : 1);
1580 if (bidi_it->scan_dir > 0) 2089 if (bidi_it->scan_dir > 0)
1581 { 2090 {
1582 if (bidi_it->nchars <= 0) 2091 if (bidi_it->nchars <= 0)
1583 abort (); 2092 abort ();
1584 next_char_pos = bidi_it->charpos + bidi_it->nchars; 2093 next_char_pos = bidi_it->charpos + bidi_it->nchars;
1585 } 2094 }
1586 else 2095 else if (bidi_it->charpos >= bob)
2096 /* Implementation note: we allow next_char_pos to be as low as
2097 0 for buffers or -1 for strings, and that is okay because
2098 that's the "position" of the sentinel iterator state we
2099 cached at the beginning of the iteration. */
1587 next_char_pos = bidi_it->charpos - 1; 2100 next_char_pos = bidi_it->charpos - 1;
1588 type = bidi_cache_find (next_char_pos, -1, bidi_it); 2101 if (next_char_pos >= bob - 1)
2102 type = bidi_cache_find (next_char_pos, -1, bidi_it);
2103 else
2104 type = UNKNOWN_BT;
1589 } 2105 }
1590 else 2106 else
1591 type = UNKNOWN_BT; 2107 type = UNKNOWN_BT;
@@ -1652,15 +2168,17 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
1652 EMACS_INT cpos = bidi_it->charpos; 2168 EMACS_INT cpos = bidi_it->charpos;
1653 EMACS_INT disp_pos = bidi_it->disp_pos; 2169 EMACS_INT disp_pos = bidi_it->disp_pos;
1654 EMACS_INT nc = bidi_it->nchars; 2170 EMACS_INT nc = bidi_it->nchars;
2171 struct bidi_string_data bs = bidi_it->string;
1655 bidi_type_t chtype; 2172 bidi_type_t chtype;
1656 int fwp = bidi_it->frame_window_p; 2173 int fwp = bidi_it->frame_window_p;
2174 int dpp = bidi_it->disp_prop;
1657 2175
1658 if (bidi_it->nchars <= 0) 2176 if (bidi_it->nchars <= 0)
1659 abort (); 2177 abort ();
1660 do { 2178 do {
1661 ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, fwp, 2179 ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, &dpp, &bs,
1662 &clen, &nc); 2180 fwp, &clen, &nc);
1663 if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */) 2181 if (ch == '\n' || ch == BIDI_EOB)
1664 chtype = NEUTRAL_B; 2182 chtype = NEUTRAL_B;
1665 else 2183 else
1666 chtype = bidi_get_type (ch, NEUTRAL_DIR); 2184 chtype = bidi_get_type (ch, NEUTRAL_DIR);
@@ -1673,7 +2191,7 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
1673 } 2191 }
1674 2192
1675 /* Resolve implicit levels, with a twist: PDFs get the embedding 2193 /* Resolve implicit levels, with a twist: PDFs get the embedding
1676 level of the enbedding they terminate. See below for the 2194 level of the embedding they terminate. See below for the
1677 reason. */ 2195 reason. */
1678 if (bidi_it->orig_type == PDF 2196 if (bidi_it->orig_type == PDF
1679 /* Don't do this if this formatting code didn't change the 2197 /* Don't do this if this formatting code didn't change the
@@ -1710,7 +2228,6 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
1710 else if (bidi_it->orig_type == NEUTRAL_B /* L1 */ 2228 else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
1711 || bidi_it->orig_type == NEUTRAL_S 2229 || bidi_it->orig_type == NEUTRAL_S
1712 || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB 2230 || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
1713 /* || bidi_it->ch == LINESEP_CHAR */
1714 || (bidi_it->orig_type == NEUTRAL_WS 2231 || (bidi_it->orig_type == NEUTRAL_WS
1715 && (bidi_it->next_for_ws.type == NEUTRAL_B 2232 && (bidi_it->next_for_ws.type == NEUTRAL_B
1716 || bidi_it->next_for_ws.type == NEUTRAL_S))) 2233 || bidi_it->next_for_ws.type == NEUTRAL_S)))
@@ -1756,10 +2273,11 @@ static void
1756bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag) 2273bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
1757{ 2274{
1758 int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir; 2275 int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
1759 int idx; 2276 ptrdiff_t idx;
1760 2277
1761 /* Try the cache first. */ 2278 /* Try the cache first. */
1762 if ((idx = bidi_cache_find_level_change (level, dir, end_flag)) >= 0) 2279 if ((idx = bidi_cache_find_level_change (level, dir, end_flag))
2280 >= bidi_cache_start)
1763 bidi_cache_fetch_state (idx, bidi_it); 2281 bidi_cache_fetch_state (idx, bidi_it);
1764 else 2282 else
1765 { 2283 {
@@ -1781,20 +2299,30 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
1781{ 2299{
1782 int old_level, new_level, next_level; 2300 int old_level, new_level, next_level;
1783 struct bidi_it sentinel; 2301 struct bidi_it sentinel;
2302 struct gcpro gcpro1;
2303
2304 if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
2305 abort ();
1784 2306
1785 if (bidi_it->scan_dir == 0) 2307 if (bidi_it->scan_dir == 0)
1786 { 2308 {
1787 bidi_it->scan_dir = 1; /* default to logical order */ 2309 bidi_it->scan_dir = 1; /* default to logical order */
1788 } 2310 }
1789 2311
2312 /* The code below can call eval, and thus cause GC. If we are
2313 iterating a Lisp string, make sure it won't be GCed. */
2314 if (STRINGP (bidi_it->string.lstring))
2315 GCPRO1 (bidi_it->string.lstring);
2316
1790 /* If we just passed a newline, initialize for the next line. */ 2317 /* If we just passed a newline, initialize for the next line. */
1791 if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B) 2318 if (!bidi_it->first_elt
2319 && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
1792 bidi_line_init (bidi_it); 2320 bidi_line_init (bidi_it);
1793 2321
1794 /* Prepare the sentinel iterator state, and cache it. When we bump 2322 /* Prepare the sentinel iterator state, and cache it. When we bump
1795 into it, scanning backwards, we'll know that the last non-base 2323 into it, scanning backwards, we'll know that the last non-base
1796 level is exhausted. */ 2324 level is exhausted. */
1797 if (bidi_cache_idx == 0) 2325 if (bidi_cache_idx == bidi_cache_start)
1798 { 2326 {
1799 bidi_copy_it (&sentinel, bidi_it); 2327 bidi_copy_it (&sentinel, bidi_it);
1800 if (bidi_it->first_elt) 2328 if (bidi_it->first_elt)
@@ -1870,25 +2398,34 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
1870 _before_ we process the paragraph's text, since the base 2398 _before_ we process the paragraph's text, since the base
1871 direction affects the reordering. */ 2399 direction affects the reordering. */
1872 if (bidi_it->scan_dir == 1 2400 if (bidi_it->scan_dir == 1
1873 && bidi_it->orig_type == NEUTRAL_B 2401 && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
1874 && bidi_it->bytepos < ZV_BYTE)
1875 { 2402 {
1876 EMACS_INT sep_len = 2403 /* The paragraph direction of the entire string, once
1877 bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars, 2404 determined, is in effect for the entire string. Setting the
1878 bidi_it->bytepos + bidi_it->ch_len); 2405 separator limit to the end of the string prevents
1879 if (bidi_it->nchars <= 0) 2406 bidi_paragraph_init from being called automatically on this
1880 abort (); 2407 string. */
1881 if (sep_len >= 0) 2408 if (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2409 bidi_it->separator_limit = bidi_it->string.schars;
2410 else if (bidi_it->bytepos < ZV_BYTE)
1882 { 2411 {
1883 bidi_it->new_paragraph = 1; 2412 EMACS_INT sep_len
1884 /* Record the buffer position of the last character of the 2413 = bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
1885 paragraph separator. */ 2414 bidi_it->bytepos + bidi_it->ch_len);
1886 bidi_it->separator_limit = 2415 if (bidi_it->nchars <= 0)
1887 bidi_it->charpos + bidi_it->nchars + sep_len; 2416 abort ();
2417 if (sep_len >= 0)
2418 {
2419 bidi_it->new_paragraph = 1;
2420 /* Record the buffer position of the last character of the
2421 paragraph separator. */
2422 bidi_it->separator_limit
2423 = bidi_it->charpos + bidi_it->nchars + sep_len;
2424 }
1888 } 2425 }
1889 } 2426 }
1890 2427
1891 if (bidi_it->scan_dir == 1 && bidi_cache_idx) 2428 if (bidi_it->scan_dir == 1 && bidi_cache_idx > bidi_cache_start)
1892 { 2429 {
1893 /* If we are at paragraph's base embedding level and beyond the 2430 /* If we are at paragraph's base embedding level and beyond the
1894 last cached position, the cache's job is done and we can 2431 last cached position, the cache's job is done and we can
@@ -1904,6 +2441,9 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
1904 else 2441 else
1905 bidi_cache_iterator_state (bidi_it, 1); 2442 bidi_cache_iterator_state (bidi_it, 1);
1906 } 2443 }
2444
2445 if (STRINGP (bidi_it->string.lstring))
2446 UNGCPRO;
1907} 2447}
1908 2448
1909/* This is meant to be called from within the debugger, whenever you 2449/* This is meant to be called from within the debugger, whenever you
@@ -1912,7 +2452,7 @@ void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
1912void 2452void
1913bidi_dump_cached_states (void) 2453bidi_dump_cached_states (void)
1914{ 2454{
1915 int i; 2455 ptrdiff_t i;
1916 int ndigits = 1; 2456 int ndigits = 1;
1917 2457
1918 if (bidi_cache_idx == 0) 2458 if (bidi_cache_idx == 0)
@@ -1920,7 +2460,7 @@ bidi_dump_cached_states (void)
1920 fprintf (stderr, "The cache is empty.\n"); 2460 fprintf (stderr, "The cache is empty.\n");
1921 return; 2461 return;
1922 } 2462 }
1923 fprintf (stderr, "Total of %d state%s in cache:\n", 2463 fprintf (stderr, "Total of %"pD"d state%s in cache:\n",
1924 bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s"); 2464 bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
1925 2465
1926 for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10) 2466 for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)