diff options
Diffstat (limited to 'src/bidi.c')
| -rw-r--r-- | src/bidi.c | 1278 |
1 files changed, 909 insertions, 369 deletions
diff --git a/src/bidi.c b/src/bidi.c index 469afdb3819..b3479b17b16 100644 --- a/src/bidi.c +++ b/src/bidi.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* Low-level bidirectional buffer-scanning functions for GNU Emacs. | 1 | /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs. |
| 2 | Copyright (C) 2000-2001, 2004-2005, 2009-2011 | 2 | Copyright (C) 2000-2001, 2004-2005, 2009-2012 |
| 3 | Free Software Foundation, Inc. | 3 | Free Software Foundation, Inc. |
| 4 | 4 | ||
| 5 | This file is part of GNU Emacs. | 5 | This file is part of GNU Emacs. |
| @@ -20,7 +20,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ | |||
| 20 | /* Written by Eli Zaretskii <eliz@gnu.org>. | 20 | /* Written by Eli Zaretskii <eliz@gnu.org>. |
| 21 | 21 | ||
| 22 | A sequential implementation of the Unicode Bidirectional algorithm, | 22 | A sequential implementation of the Unicode Bidirectional algorithm, |
| 23 | as per UAX#9, a part of the Unicode Standard. | 23 | (UBA) as per UAX#9, a part of the Unicode Standard. |
| 24 | 24 | ||
| 25 | Unlike the reference and most other implementations, this one is | 25 | Unlike the reference and most other implementations, this one is |
| 26 | designed to be called once for every character in the buffer or | 26 | designed to be called once for every character in the buffer or |
| @@ -35,18 +35,23 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ | |||
| 35 | details about its algorithm that finds the next visual-order | 35 | details about its algorithm that finds the next visual-order |
| 36 | character by resolving their levels on the fly. | 36 | character by resolving their levels on the fly. |
| 37 | 37 | ||
| 38 | The two other entry points are bidi_paragraph_init and | 38 | Two other entry points are bidi_paragraph_init and |
| 39 | bidi_mirror_char. The first determines the base direction of a | 39 | bidi_mirror_char. The first determines the base direction of a |
| 40 | paragraph, while the second returns the mirrored version of its | 40 | paragraph, while the second returns the mirrored version of its |
| 41 | argument character. | 41 | argument character. |
| 42 | 42 | ||
| 43 | A few auxiliary entry points are used to initialize the bidi | ||
| 44 | iterator for iterating an object (buffer or string), push and pop | ||
| 45 | the bidi iterator state, and save and restore the state of the bidi | ||
| 46 | cache. | ||
| 47 | |||
| 43 | If you want to understand the code, you will have to read it | 48 | If you want to understand the code, you will have to read it |
| 44 | together with the relevant portions of UAX#9. The comments include | 49 | together with the relevant portions of UAX#9. The comments include |
| 45 | references to UAX#9 rules, for that very reason. | 50 | references to UAX#9 rules, for that very reason. |
| 46 | 51 | ||
| 47 | A note about references to UAX#9 rules: if the reference says | 52 | A note about references to UAX#9 rules: if the reference says |
| 48 | something like "X9/Retaining", it means that you need to refer to | 53 | something like "X9/Retaining", it means that you need to refer to |
| 49 | rule X9 and to its modifications decribed in the "Implementation | 54 | rule X9 and to its modifications described in the "Implementation |
| 50 | Notes" section of UAX#9, under "Retaining Format Codes". */ | 55 | Notes" section of UAX#9, under "Retaining Format Codes". */ |
| 51 | 56 | ||
| 52 | #include <config.h> | 57 | #include <config.h> |
| @@ -66,16 +71,6 @@ static Lisp_Object bidi_type_table, bidi_mirror_table; | |||
| 66 | #define RLM_CHAR 0x200F | 71 | #define RLM_CHAR 0x200F |
| 67 | #define BIDI_EOB -1 | 72 | #define BIDI_EOB -1 |
| 68 | 73 | ||
| 69 | /* Local data structures. (Look in dispextern.h for the rest.) */ | ||
| 70 | |||
| 71 | /* What we need to know about the current paragraph. */ | ||
| 72 | struct bidi_paragraph_info { | ||
| 73 | EMACS_INT start_bytepos; /* byte position where it begins */ | ||
| 74 | EMACS_INT end_bytepos; /* byte position where it ends */ | ||
| 75 | int embedding_level; /* its basic embedding level */ | ||
| 76 | bidi_dir_t base_dir; /* its base direction */ | ||
| 77 | }; | ||
| 78 | |||
| 79 | /* Data type for describing the bidirectional character categories. */ | 74 | /* Data type for describing the bidirectional character categories. */ |
| 80 | typedef enum { | 75 | typedef enum { |
| 81 | UNKNOWN_BC, | 76 | UNKNOWN_BC, |
| @@ -84,49 +79,21 @@ typedef enum { | |||
| 84 | STRONG | 79 | STRONG |
| 85 | } bidi_category_t; | 80 | } bidi_category_t; |
| 86 | 81 | ||
| 82 | /* UAX#9 says to search only for L, AL, or R types of characters, and | ||
| 83 | ignore RLE, RLO, LRE, and LRO, when determining the base paragraph | ||
| 84 | level. Yudit indeed ignores them. This variable is therefore set | ||
| 85 | by default to ignore them, but setting it to zero will take them | ||
| 86 | into account. */ | ||
| 87 | extern int bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE; | 87 | extern int bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE; |
| 88 | int bidi_ignore_explicit_marks_for_paragraph_level = 1; | 88 | int bidi_ignore_explicit_marks_for_paragraph_level = 1; |
| 89 | 89 | ||
| 90 | static Lisp_Object paragraph_start_re, paragraph_separate_re; | 90 | static Lisp_Object paragraph_start_re, paragraph_separate_re; |
| 91 | static Lisp_Object Qparagraph_start, Qparagraph_separate; | 91 | static Lisp_Object Qparagraph_start, Qparagraph_separate; |
| 92 | 92 | ||
| 93 | static void | 93 | |
| 94 | bidi_initialize (void) | 94 | /*********************************************************************** |
| 95 | { | 95 | Utilities |
| 96 | 96 | ***********************************************************************/ | |
| 97 | #include "biditype.h" | ||
| 98 | #include "bidimirror.h" | ||
| 99 | |||
| 100 | int i; | ||
| 101 | |||
| 102 | bidi_type_table = Fmake_char_table (Qnil, make_number (STRONG_L)); | ||
| 103 | staticpro (&bidi_type_table); | ||
| 104 | |||
| 105 | for (i = 0; i < sizeof bidi_type / sizeof bidi_type[0]; i++) | ||
| 106 | char_table_set_range (bidi_type_table, bidi_type[i].from, bidi_type[i].to, | ||
| 107 | make_number (bidi_type[i].type)); | ||
| 108 | |||
| 109 | bidi_mirror_table = Fmake_char_table (Qnil, Qnil); | ||
| 110 | staticpro (&bidi_mirror_table); | ||
| 111 | |||
| 112 | for (i = 0; i < sizeof bidi_mirror / sizeof bidi_mirror[0]; i++) | ||
| 113 | char_table_set (bidi_mirror_table, bidi_mirror[i].from, | ||
| 114 | make_number (bidi_mirror[i].to)); | ||
| 115 | |||
| 116 | Qparagraph_start = intern ("paragraph-start"); | ||
| 117 | staticpro (&Qparagraph_start); | ||
| 118 | paragraph_start_re = Fsymbol_value (Qparagraph_start); | ||
| 119 | if (!STRINGP (paragraph_start_re)) | ||
| 120 | paragraph_start_re = build_string ("\f\\|[ \t]*$"); | ||
| 121 | staticpro (¶graph_start_re); | ||
| 122 | Qparagraph_separate = intern ("paragraph-separate"); | ||
| 123 | staticpro (&Qparagraph_separate); | ||
| 124 | paragraph_separate_re = Fsymbol_value (Qparagraph_separate); | ||
| 125 | if (!STRINGP (paragraph_separate_re)) | ||
| 126 | paragraph_separate_re = build_string ("[ \t\f]*$"); | ||
| 127 | staticpro (¶graph_separate_re); | ||
| 128 | bidi_initialized = 1; | ||
| 129 | } | ||
| 130 | 97 | ||
| 131 | /* Return the bidi type of a character CH, subject to the current | 98 | /* Return the bidi type of a character CH, subject to the current |
| 132 | directional OVERRIDE. */ | 99 | directional OVERRIDE. */ |
| @@ -141,6 +108,12 @@ bidi_get_type (int ch, bidi_dir_t override) | |||
| 141 | abort (); | 108 | abort (); |
| 142 | 109 | ||
| 143 | default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch)); | 110 | default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch)); |
| 111 | /* Every valid character code, even those that are unassigned by the | ||
| 112 | UCD, have some bidi-class property, according to | ||
| 113 | DerivedBidiClass.txt file. Therefore, if we ever get UNKNOWN_BT | ||
| 114 | (= zero) code from CHAR_TABLE_REF, that's a bug. */ | ||
| 115 | if (default_type == UNKNOWN_BT) | ||
| 116 | abort (); | ||
| 144 | 117 | ||
| 145 | if (override == NEUTRAL_DIR) | 118 | if (override == NEUTRAL_DIR) |
| 146 | return default_type; | 119 | return default_type; |
| @@ -173,11 +146,10 @@ bidi_get_type (int ch, bidi_dir_t override) | |||
| 173 | } | 146 | } |
| 174 | } | 147 | } |
| 175 | 148 | ||
| 176 | static void | 149 | static inline void |
| 177 | bidi_check_type (bidi_type_t type) | 150 | bidi_check_type (bidi_type_t type) |
| 178 | { | 151 | { |
| 179 | if (type < UNKNOWN_BT || type > NEUTRAL_ON) | 152 | xassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON); |
| 180 | abort (); | ||
| 181 | } | 153 | } |
| 182 | 154 | ||
| 183 | /* Given a bidi TYPE of a character, return its category. */ | 155 | /* Given a bidi TYPE of a character, return its category. */ |
| @@ -243,6 +215,77 @@ bidi_mirror_char (int c) | |||
| 243 | return c; | 215 | return c; |
| 244 | } | 216 | } |
| 245 | 217 | ||
| 218 | /* Determine the start-of-run (sor) directional type given the two | ||
| 219 | embedding levels on either side of the run boundary. Also, update | ||
| 220 | the saved info about previously seen characters, since that info is | ||
| 221 | generally valid for a single level run. */ | ||
| 222 | static inline void | ||
| 223 | bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after) | ||
| 224 | { | ||
| 225 | int higher_level = (level_before > level_after ? level_before : level_after); | ||
| 226 | |||
| 227 | /* The prev_was_pdf gork is required for when we have several PDFs | ||
| 228 | in a row. In that case, we want to compute the sor type for the | ||
| 229 | next level run only once: when we see the first PDF. That's | ||
| 230 | because the sor type depends only on the higher of the two levels | ||
| 231 | that we find on the two sides of the level boundary (see UAX#9, | ||
| 232 | clause X10), and so we don't need to know the final embedding | ||
| 233 | level to which we descend after processing all the PDFs. */ | ||
| 234 | if (!bidi_it->prev_was_pdf || level_before < level_after) | ||
| 235 | /* FIXME: should the default sor direction be user selectable? */ | ||
| 236 | bidi_it->sor = ((higher_level & 1) != 0 ? R2L : L2R); | ||
| 237 | if (level_before > level_after) | ||
| 238 | bidi_it->prev_was_pdf = 1; | ||
| 239 | |||
| 240 | bidi_it->prev.type = UNKNOWN_BT; | ||
| 241 | bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 | ||
| 242 | = bidi_it->last_strong.orig_type = UNKNOWN_BT; | ||
| 243 | bidi_it->prev_for_neutral.type = (bidi_it->sor == R2L ? STRONG_R : STRONG_L); | ||
| 244 | bidi_it->prev_for_neutral.charpos = bidi_it->charpos; | ||
| 245 | bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos; | ||
| 246 | bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 | ||
| 247 | = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT; | ||
| 248 | bidi_it->ignore_bn_limit = -1; /* meaning it's unknown */ | ||
| 249 | } | ||
| 250 | |||
| 251 | /* Push the current embedding level and override status; reset the | ||
| 252 | current level to LEVEL and the current override status to OVERRIDE. */ | ||
| 253 | static inline void | ||
| 254 | bidi_push_embedding_level (struct bidi_it *bidi_it, | ||
| 255 | int level, bidi_dir_t override) | ||
| 256 | { | ||
| 257 | bidi_it->stack_idx++; | ||
| 258 | xassert (bidi_it->stack_idx < BIDI_MAXLEVEL); | ||
| 259 | bidi_it->level_stack[bidi_it->stack_idx].level = level; | ||
| 260 | bidi_it->level_stack[bidi_it->stack_idx].override = override; | ||
| 261 | } | ||
| 262 | |||
| 263 | /* Pop the embedding level and directional override status from the | ||
| 264 | stack, and return the new level. */ | ||
| 265 | static inline int | ||
| 266 | bidi_pop_embedding_level (struct bidi_it *bidi_it) | ||
| 267 | { | ||
| 268 | /* UAX#9 says to ignore invalid PDFs. */ | ||
| 269 | if (bidi_it->stack_idx > 0) | ||
| 270 | bidi_it->stack_idx--; | ||
| 271 | return bidi_it->level_stack[bidi_it->stack_idx].level; | ||
| 272 | } | ||
| 273 | |||
| 274 | /* Record in SAVED_INFO the information about the current character. */ | ||
| 275 | static inline void | ||
| 276 | bidi_remember_char (struct bidi_saved_info *saved_info, | ||
| 277 | struct bidi_it *bidi_it) | ||
| 278 | { | ||
| 279 | saved_info->charpos = bidi_it->charpos; | ||
| 280 | saved_info->bytepos = bidi_it->bytepos; | ||
| 281 | saved_info->type = bidi_it->type; | ||
| 282 | bidi_check_type (bidi_it->type); | ||
| 283 | saved_info->type_after_w1 = bidi_it->type_after_w1; | ||
| 284 | bidi_check_type (bidi_it->type_after_w1); | ||
| 285 | saved_info->orig_type = bidi_it->orig_type; | ||
| 286 | bidi_check_type (bidi_it->orig_type); | ||
| 287 | } | ||
| 288 | |||
| 246 | /* Copy the bidi iterator from FROM to TO. To save cycles, this only | 289 | /* Copy the bidi iterator from FROM to TO. To save cycles, this only |
| 247 | copies the part of the level stack that is actually in use. */ | 290 | copies the part of the level stack that is actually in use. */ |
| 248 | static inline void | 291 | static inline void |
| @@ -259,40 +302,70 @@ bidi_copy_it (struct bidi_it *to, struct bidi_it *from) | |||
| 259 | to->level_stack[i] = from->level_stack[i]; | 302 | to->level_stack[i] = from->level_stack[i]; |
| 260 | } | 303 | } |
| 261 | 304 | ||
| 262 | /* Caching the bidi iterator states. */ | 305 | |
| 306 | /*********************************************************************** | ||
| 307 | Caching the bidi iterator states | ||
| 308 | ***********************************************************************/ | ||
| 263 | 309 | ||
| 264 | #define BIDI_CACHE_CHUNK 200 | 310 | #define BIDI_CACHE_CHUNK 200 |
| 265 | static struct bidi_it *bidi_cache; | 311 | static struct bidi_it *bidi_cache; |
| 266 | static size_t bidi_cache_size = 0; | 312 | static ptrdiff_t bidi_cache_size = 0; |
| 267 | static size_t elsz = sizeof (struct bidi_it); | 313 | enum { elsz = sizeof (struct bidi_it) }; |
| 268 | static int bidi_cache_idx; /* next unused cache slot */ | 314 | static ptrdiff_t bidi_cache_idx; /* next unused cache slot */ |
| 269 | static int bidi_cache_last_idx; /* slot of last cache hit */ | 315 | static ptrdiff_t bidi_cache_last_idx; /* slot of last cache hit */ |
| 270 | 316 | static ptrdiff_t bidi_cache_start = 0; /* start of cache for this | |
| 317 | "stack" level */ | ||
| 318 | |||
| 319 | /* 5-slot stack for saving the start of the previous level of the | ||
| 320 | cache. xdisp.c maintains a 5-slot stack for its iterator state, | ||
| 321 | and we need the same size of our stack. */ | ||
| 322 | static ptrdiff_t bidi_cache_start_stack[IT_STACK_SIZE]; | ||
| 323 | static int bidi_cache_sp; | ||
| 324 | |||
| 325 | /* Size of header used by bidi_shelve_cache. */ | ||
| 326 | enum | ||
| 327 | { | ||
| 328 | bidi_shelve_header_size | ||
| 329 | = (sizeof (bidi_cache_idx) + sizeof (bidi_cache_start_stack) | ||
| 330 | + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start) | ||
| 331 | + sizeof (bidi_cache_last_idx)) | ||
| 332 | }; | ||
| 333 | |||
| 334 | /* Reset the cache state to the empty state. We only reset the part | ||
| 335 | of the cache relevant to iteration of the current object. Previous | ||
| 336 | objects, which are pushed on the display iterator's stack, are left | ||
| 337 | intact. This is called when the cached information is no more | ||
| 338 | useful for the current iteration, e.g. when we were reseated to a | ||
| 339 | new position on the same object. */ | ||
| 271 | static inline void | 340 | static inline void |
| 272 | bidi_cache_reset (void) | 341 | bidi_cache_reset (void) |
| 273 | { | 342 | { |
| 274 | bidi_cache_idx = 0; | 343 | bidi_cache_idx = bidi_cache_start; |
| 275 | bidi_cache_last_idx = -1; | 344 | bidi_cache_last_idx = -1; |
| 276 | } | 345 | } |
| 277 | 346 | ||
| 347 | /* Shrink the cache to its minimal size. Called when we init the bidi | ||
| 348 | iterator for reordering a buffer or a string that does not come | ||
| 349 | from display properties, because that means all the previously | ||
| 350 | cached info is of no further use. */ | ||
| 278 | static inline void | 351 | static inline void |
| 279 | bidi_cache_shrink (void) | 352 | bidi_cache_shrink (void) |
| 280 | { | 353 | { |
| 281 | if (bidi_cache_size > BIDI_CACHE_CHUNK) | 354 | if (bidi_cache_size > BIDI_CACHE_CHUNK) |
| 282 | { | 355 | { |
| 356 | bidi_cache | ||
| 357 | = (struct bidi_it *) xrealloc (bidi_cache, BIDI_CACHE_CHUNK * elsz); | ||
| 283 | bidi_cache_size = BIDI_CACHE_CHUNK; | 358 | bidi_cache_size = BIDI_CACHE_CHUNK; |
| 284 | bidi_cache = | ||
| 285 | (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz); | ||
| 286 | } | 359 | } |
| 287 | bidi_cache_reset (); | 360 | bidi_cache_reset (); |
| 288 | } | 361 | } |
| 289 | 362 | ||
| 290 | static inline void | 363 | static inline void |
| 291 | bidi_cache_fetch_state (int idx, struct bidi_it *bidi_it) | 364 | bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it) |
| 292 | { | 365 | { |
| 293 | int current_scan_dir = bidi_it->scan_dir; | 366 | int current_scan_dir = bidi_it->scan_dir; |
| 294 | 367 | ||
| 295 | if (idx < 0 || idx >= bidi_cache_idx) | 368 | if (idx < bidi_cache_start || idx >= bidi_cache_idx) |
| 296 | abort (); | 369 | abort (); |
| 297 | 370 | ||
| 298 | bidi_copy_it (bidi_it, &bidi_cache[idx]); | 371 | bidi_copy_it (bidi_it, &bidi_cache[idx]); |
| @@ -304,13 +377,15 @@ bidi_cache_fetch_state (int idx, struct bidi_it *bidi_it) | |||
| 304 | level less or equal to LEVEL. if LEVEL is -1, disregard the | 377 | level less or equal to LEVEL. if LEVEL is -1, disregard the |
| 305 | resolved levels in cached states. DIR, if non-zero, means search | 378 | resolved levels in cached states. DIR, if non-zero, means search |
| 306 | in that direction from the last cache hit. */ | 379 | in that direction from the last cache hit. */ |
| 307 | static inline int | 380 | static inline ptrdiff_t |
| 308 | bidi_cache_search (EMACS_INT charpos, int level, int dir) | 381 | bidi_cache_search (EMACS_INT charpos, int level, int dir) |
| 309 | { | 382 | { |
| 310 | int i, i_start; | 383 | ptrdiff_t i, i_start; |
| 311 | 384 | ||
| 312 | if (bidi_cache_idx) | 385 | if (bidi_cache_idx > bidi_cache_start) |
| 313 | { | 386 | { |
| 387 | if (bidi_cache_last_idx == -1) | ||
| 388 | bidi_cache_last_idx = bidi_cache_idx - 1; | ||
| 314 | if (charpos < bidi_cache[bidi_cache_last_idx].charpos) | 389 | if (charpos < bidi_cache[bidi_cache_last_idx].charpos) |
| 315 | { | 390 | { |
| 316 | dir = -1; | 391 | dir = -1; |
| @@ -333,7 +408,7 @@ bidi_cache_search (EMACS_INT charpos, int level, int dir) | |||
| 333 | if (dir < 0) | 408 | if (dir < 0) |
| 334 | { | 409 | { |
| 335 | /* Linear search for now; FIXME! */ | 410 | /* Linear search for now; FIXME! */ |
| 336 | for (i = i_start; i >= 0; i--) | 411 | for (i = i_start; i >= bidi_cache_start; i--) |
| 337 | if (bidi_cache[i].charpos <= charpos | 412 | if (bidi_cache[i].charpos <= charpos |
| 338 | && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars | 413 | && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars |
| 339 | && (level == -1 || bidi_cache[i].resolved_level <= level)) | 414 | && (level == -1 || bidi_cache[i].resolved_level <= level)) |
| @@ -355,8 +430,9 @@ bidi_cache_search (EMACS_INT charpos, int level, int dir) | |||
| 355 | /* Find a cached state where the resolved level changes to a value | 430 | /* Find a cached state where the resolved level changes to a value |
| 356 | that is lower than LEVEL, and return its cache slot index. DIR is | 431 | that is lower than LEVEL, and return its cache slot index. DIR is |
| 357 | the direction to search, starting with the last used cache slot. | 432 | the direction to search, starting with the last used cache slot. |
| 358 | BEFORE, if non-zero, means return the index of the slot that is | 433 | If DIR is zero, we search backwards from the last occupied cache |
| 359 | ``before'' the level change in the search direction. That is, | 434 | slot. BEFORE, if non-zero, means return the index of the slot that |
| 435 | is ``before'' the level change in the search direction. That is, | ||
| 360 | given the cached levels like this: | 436 | given the cached levels like this: |
| 361 | 437 | ||
| 362 | 1122333442211 | 438 | 1122333442211 |
| @@ -366,14 +442,16 @@ bidi_cache_search (EMACS_INT charpos, int level, int dir) | |||
| 366 | C, searching backwards (DIR = -1) for LEVEL = 2 will return the | 442 | C, searching backwards (DIR = -1) for LEVEL = 2 will return the |
| 367 | index of slot B or A, depending whether BEFORE is, respectively, | 443 | index of slot B or A, depending whether BEFORE is, respectively, |
| 368 | non-zero or zero. */ | 444 | non-zero or zero. */ |
| 369 | static int | 445 | static ptrdiff_t |
| 370 | bidi_cache_find_level_change (int level, int dir, int before) | 446 | bidi_cache_find_level_change (int level, int dir, int before) |
| 371 | { | 447 | { |
| 372 | if (bidi_cache_idx) | 448 | if (bidi_cache_idx) |
| 373 | { | 449 | { |
| 374 | int i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1; | 450 | ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1; |
| 375 | int incr = before ? 1 : 0; | 451 | int incr = before ? 1 : 0; |
| 376 | 452 | ||
| 453 | xassert (!dir || bidi_cache_last_idx >= 0); | ||
| 454 | |||
| 377 | if (!dir) | 455 | if (!dir) |
| 378 | dir = -1; | 456 | dir = -1; |
| 379 | else if (!incr) | 457 | else if (!incr) |
| @@ -381,7 +459,7 @@ bidi_cache_find_level_change (int level, int dir, int before) | |||
| 381 | 459 | ||
| 382 | if (dir < 0) | 460 | if (dir < 0) |
| 383 | { | 461 | { |
| 384 | while (i >= incr) | 462 | while (i >= bidi_cache_start + incr) |
| 385 | { | 463 | { |
| 386 | if (bidi_cache[i - incr].resolved_level >= 0 | 464 | if (bidi_cache[i - incr].resolved_level >= 0 |
| 387 | && bidi_cache[i - incr].resolved_level < level) | 465 | && bidi_cache[i - incr].resolved_level < level) |
| @@ -405,9 +483,31 @@ bidi_cache_find_level_change (int level, int dir, int before) | |||
| 405 | } | 483 | } |
| 406 | 484 | ||
| 407 | static inline void | 485 | static inline void |
| 486 | bidi_cache_ensure_space (ptrdiff_t idx) | ||
| 487 | { | ||
| 488 | /* Enlarge the cache as needed. */ | ||
| 489 | if (idx >= bidi_cache_size) | ||
| 490 | { | ||
| 491 | /* The bidi cache cannot be larger than the largest Lisp string | ||
| 492 | or buffer. */ | ||
| 493 | ptrdiff_t string_or_buffer_bound | ||
| 494 | = max (BUF_BYTES_MAX, STRING_BYTES_BOUND); | ||
| 495 | |||
| 496 | /* Also, it cannot be larger than what C can represent. */ | ||
| 497 | ptrdiff_t c_bound | ||
| 498 | = (min (PTRDIFF_MAX, SIZE_MAX) - bidi_shelve_header_size) / elsz; | ||
| 499 | |||
| 500 | bidi_cache | ||
| 501 | = xpalloc (bidi_cache, &bidi_cache_size, | ||
| 502 | max (BIDI_CACHE_CHUNK, idx - bidi_cache_size + 1), | ||
| 503 | min (string_or_buffer_bound, c_bound), elsz); | ||
| 504 | } | ||
| 505 | } | ||
| 506 | |||
| 507 | static inline void | ||
| 408 | bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved) | 508 | bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved) |
| 409 | { | 509 | { |
| 410 | int idx; | 510 | ptrdiff_t idx; |
| 411 | 511 | ||
| 412 | /* We should never cache on backward scans. */ | 512 | /* We should never cache on backward scans. */ |
| 413 | if (bidi_it->scan_dir == -1) | 513 | if (bidi_it->scan_dir == -1) |
| @@ -417,23 +517,17 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved) | |||
| 417 | if (idx < 0) | 517 | if (idx < 0) |
| 418 | { | 518 | { |
| 419 | idx = bidi_cache_idx; | 519 | idx = bidi_cache_idx; |
| 420 | /* Enlarge the cache as needed. */ | 520 | bidi_cache_ensure_space (idx); |
| 421 | if (idx >= bidi_cache_size) | ||
| 422 | { | ||
| 423 | bidi_cache_size += BIDI_CACHE_CHUNK; | ||
| 424 | bidi_cache = | ||
| 425 | (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz); | ||
| 426 | } | ||
| 427 | /* Character positions should correspond to cache positions 1:1. | 521 | /* Character positions should correspond to cache positions 1:1. |
| 428 | If we are outside the range of cached positions, the cache is | 522 | If we are outside the range of cached positions, the cache is |
| 429 | useless and must be reset. */ | 523 | useless and must be reset. */ |
| 430 | if (idx > 0 && | 524 | if (idx > bidi_cache_start && |
| 431 | (bidi_it->charpos > (bidi_cache[idx - 1].charpos | 525 | (bidi_it->charpos > (bidi_cache[idx - 1].charpos |
| 432 | + bidi_cache[idx - 1].nchars) | 526 | + bidi_cache[idx - 1].nchars) |
| 433 | || bidi_it->charpos < bidi_cache[0].charpos)) | 527 | || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos)) |
| 434 | { | 528 | { |
| 435 | bidi_cache_reset (); | 529 | bidi_cache_reset (); |
| 436 | idx = 0; | 530 | idx = bidi_cache_start; |
| 437 | } | 531 | } |
| 438 | if (bidi_it->nchars <= 0) | 532 | if (bidi_it->nchars <= 0) |
| 439 | abort (); | 533 | abort (); |
| @@ -458,6 +552,8 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved) | |||
| 458 | bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral; | 552 | bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral; |
| 459 | bidi_cache[idx].next_for_ws = bidi_it->next_for_ws; | 553 | bidi_cache[idx].next_for_ws = bidi_it->next_for_ws; |
| 460 | bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit; | 554 | bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit; |
| 555 | bidi_cache[idx].disp_pos = bidi_it->disp_pos; | ||
| 556 | bidi_cache[idx].disp_prop = bidi_it->disp_prop; | ||
| 461 | } | 557 | } |
| 462 | 558 | ||
| 463 | bidi_cache_last_idx = idx; | 559 | bidi_cache_last_idx = idx; |
| @@ -468,15 +564,15 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved) | |||
| 468 | static inline bidi_type_t | 564 | static inline bidi_type_t |
| 469 | bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it) | 565 | bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it) |
| 470 | { | 566 | { |
| 471 | int i = bidi_cache_search (charpos, level, bidi_it->scan_dir); | 567 | ptrdiff_t i = bidi_cache_search (charpos, level, bidi_it->scan_dir); |
| 472 | 568 | ||
| 473 | if (i >= 0) | 569 | if (i >= bidi_cache_start) |
| 474 | { | 570 | { |
| 475 | bidi_dir_t current_scan_dir = bidi_it->scan_dir; | 571 | bidi_dir_t current_scan_dir = bidi_it->scan_dir; |
| 476 | 572 | ||
| 477 | bidi_copy_it (bidi_it, &bidi_cache[i]); | 573 | bidi_copy_it (bidi_it, &bidi_cache[i]); |
| 478 | bidi_cache_last_idx = i; | 574 | bidi_cache_last_idx = i; |
| 479 | /* Don't let scan direction from from the cached state override | 575 | /* Don't let scan direction from the cached state override |
| 480 | the current scan direction. */ | 576 | the current scan direction. */ |
| 481 | bidi_it->scan_dir = current_scan_dir; | 577 | bidi_it->scan_dir = current_scan_dir; |
| 482 | return bidi_it->type; | 578 | return bidi_it->type; |
| @@ -488,69 +584,257 @@ bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it) | |||
| 488 | static inline int | 584 | static inline int |
| 489 | bidi_peek_at_next_level (struct bidi_it *bidi_it) | 585 | bidi_peek_at_next_level (struct bidi_it *bidi_it) |
| 490 | { | 586 | { |
| 491 | if (bidi_cache_idx == 0 || bidi_cache_last_idx == -1) | 587 | if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1) |
| 492 | abort (); | 588 | abort (); |
| 493 | return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level; | 589 | return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level; |
| 494 | } | 590 | } |
| 495 | 591 | ||
| 496 | /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph. | 592 | |
| 497 | Value is the non-negative length of the paragraph separator | 593 | /*********************************************************************** |
| 498 | following the buffer position, -1 if position is at the beginning | 594 | Pushing and popping the bidi iterator state |
| 499 | of a new paragraph, or -2 if position is neither at beginning nor | 595 | ***********************************************************************/ |
| 500 | at end of a paragraph. */ | 596 | |
| 501 | static EMACS_INT | 597 | /* Push the bidi iterator state in preparation for reordering a |
| 502 | bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos) | 598 | different object, e.g. display string found at certain buffer |
| 599 | position. Pushing the bidi iterator boils down to saving its | ||
| 600 | entire state on the cache and starting a new cache "stacked" on top | ||
| 601 | of the current cache. */ | ||
| 602 | void | ||
| 603 | bidi_push_it (struct bidi_it *bidi_it) | ||
| 503 | { | 604 | { |
| 504 | Lisp_Object sep_re; | 605 | /* Save the current iterator state in its entirety after the last |
| 505 | Lisp_Object start_re; | 606 | used cache slot. */ |
| 506 | EMACS_INT val; | 607 | bidi_cache_ensure_space (bidi_cache_idx); |
| 608 | memcpy (&bidi_cache[bidi_cache_idx++], bidi_it, sizeof (struct bidi_it)); | ||
| 507 | 609 | ||
| 508 | sep_re = paragraph_separate_re; | 610 | /* Push the current cache start onto the stack. */ |
| 509 | start_re = paragraph_start_re; | 611 | xassert (bidi_cache_sp < IT_STACK_SIZE); |
| 612 | bidi_cache_start_stack[bidi_cache_sp++] = bidi_cache_start; | ||
| 510 | 613 | ||
| 511 | val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil); | 614 | /* Start a new level of cache, and make it empty. */ |
| 512 | if (val < 0) | 615 | bidi_cache_start = bidi_cache_idx; |
| 616 | bidi_cache_last_idx = -1; | ||
| 617 | } | ||
| 618 | |||
| 619 | /* Restore the iterator state saved by bidi_push_it and return the | ||
| 620 | cache to the corresponding state. */ | ||
| 621 | void | ||
| 622 | bidi_pop_it (struct bidi_it *bidi_it) | ||
| 623 | { | ||
| 624 | if (bidi_cache_start <= 0) | ||
| 625 | abort (); | ||
| 626 | |||
| 627 | /* Reset the next free cache slot index to what it was before the | ||
| 628 | call to bidi_push_it. */ | ||
| 629 | bidi_cache_idx = bidi_cache_start - 1; | ||
| 630 | |||
| 631 | /* Restore the bidi iterator state saved in the cache. */ | ||
| 632 | memcpy (bidi_it, &bidi_cache[bidi_cache_idx], sizeof (struct bidi_it)); | ||
| 633 | |||
| 634 | /* Pop the previous cache start from the stack. */ | ||
| 635 | if (bidi_cache_sp <= 0) | ||
| 636 | abort (); | ||
| 637 | bidi_cache_start = bidi_cache_start_stack[--bidi_cache_sp]; | ||
| 638 | |||
| 639 | /* Invalidate the last-used cache slot data. */ | ||
| 640 | bidi_cache_last_idx = -1; | ||
| 641 | } | ||
| 642 | |||
| 643 | static ptrdiff_t bidi_cache_total_alloc; | ||
| 644 | |||
| 645 | /* Stash away a copy of the cache and its control variables. */ | ||
| 646 | void * | ||
| 647 | bidi_shelve_cache (void) | ||
| 648 | { | ||
| 649 | unsigned char *databuf; | ||
| 650 | ptrdiff_t alloc; | ||
| 651 | |||
| 652 | /* Empty cache. */ | ||
| 653 | if (bidi_cache_idx == 0) | ||
| 654 | return NULL; | ||
| 655 | |||
| 656 | alloc = (bidi_shelve_header_size | ||
| 657 | + bidi_cache_idx * sizeof (struct bidi_it)); | ||
| 658 | databuf = xmalloc (alloc); | ||
| 659 | bidi_cache_total_alloc += alloc; | ||
| 660 | |||
| 661 | memcpy (databuf, &bidi_cache_idx, sizeof (bidi_cache_idx)); | ||
| 662 | memcpy (databuf + sizeof (bidi_cache_idx), | ||
| 663 | bidi_cache, bidi_cache_idx * sizeof (struct bidi_it)); | ||
| 664 | memcpy (databuf + sizeof (bidi_cache_idx) | ||
| 665 | + bidi_cache_idx * sizeof (struct bidi_it), | ||
| 666 | bidi_cache_start_stack, sizeof (bidi_cache_start_stack)); | ||
| 667 | memcpy (databuf + sizeof (bidi_cache_idx) | ||
| 668 | + bidi_cache_idx * sizeof (struct bidi_it) | ||
| 669 | + sizeof (bidi_cache_start_stack), | ||
| 670 | &bidi_cache_sp, sizeof (bidi_cache_sp)); | ||
| 671 | memcpy (databuf + sizeof (bidi_cache_idx) | ||
| 672 | + bidi_cache_idx * sizeof (struct bidi_it) | ||
| 673 | + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp), | ||
| 674 | &bidi_cache_start, sizeof (bidi_cache_start)); | ||
| 675 | memcpy (databuf + sizeof (bidi_cache_idx) | ||
| 676 | + bidi_cache_idx * sizeof (struct bidi_it) | ||
| 677 | + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp) | ||
| 678 | + sizeof (bidi_cache_start), | ||
| 679 | &bidi_cache_last_idx, sizeof (bidi_cache_last_idx)); | ||
| 680 | |||
| 681 | return databuf; | ||
| 682 | } | ||
| 683 | |||
| 684 | /* Restore the cache state from a copy stashed away by | ||
| 685 | bidi_shelve_cache, and free the buffer used to stash that copy. | ||
| 686 | JUST_FREE non-zero means free the buffer, but don't restore the | ||
| 687 | cache; used when the corresponding iterator is discarded instead of | ||
| 688 | being restored. */ | ||
| 689 | void | ||
| 690 | bidi_unshelve_cache (void *databuf, int just_free) | ||
| 691 | { | ||
| 692 | unsigned char *p = databuf; | ||
| 693 | |||
| 694 | if (!p) | ||
| 513 | { | 695 | { |
| 514 | if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0) | 696 | if (!just_free) |
| 515 | val = -1; | 697 | { |
| 698 | /* A NULL pointer means an empty cache. */ | ||
| 699 | bidi_cache_start = 0; | ||
| 700 | bidi_cache_sp = 0; | ||
| 701 | bidi_cache_reset (); | ||
| 702 | } | ||
| 703 | } | ||
| 704 | else | ||
| 705 | { | ||
| 706 | if (just_free) | ||
| 707 | { | ||
| 708 | ptrdiff_t idx; | ||
| 709 | |||
| 710 | memcpy (&idx, p, sizeof (bidi_cache_idx)); | ||
| 711 | bidi_cache_total_alloc | ||
| 712 | -= bidi_shelve_header_size + idx * sizeof (struct bidi_it); | ||
| 713 | } | ||
| 516 | else | 714 | else |
| 517 | val = -2; | 715 | { |
| 716 | memcpy (&bidi_cache_idx, p, sizeof (bidi_cache_idx)); | ||
| 717 | bidi_cache_ensure_space (bidi_cache_idx); | ||
| 718 | memcpy (bidi_cache, p + sizeof (bidi_cache_idx), | ||
| 719 | bidi_cache_idx * sizeof (struct bidi_it)); | ||
| 720 | memcpy (bidi_cache_start_stack, | ||
| 721 | p + sizeof (bidi_cache_idx) | ||
| 722 | + bidi_cache_idx * sizeof (struct bidi_it), | ||
| 723 | sizeof (bidi_cache_start_stack)); | ||
| 724 | memcpy (&bidi_cache_sp, | ||
| 725 | p + sizeof (bidi_cache_idx) | ||
| 726 | + bidi_cache_idx * sizeof (struct bidi_it) | ||
| 727 | + sizeof (bidi_cache_start_stack), | ||
| 728 | sizeof (bidi_cache_sp)); | ||
| 729 | memcpy (&bidi_cache_start, | ||
| 730 | p + sizeof (bidi_cache_idx) | ||
| 731 | + bidi_cache_idx * sizeof (struct bidi_it) | ||
| 732 | + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp), | ||
| 733 | sizeof (bidi_cache_start)); | ||
| 734 | memcpy (&bidi_cache_last_idx, | ||
| 735 | p + sizeof (bidi_cache_idx) | ||
| 736 | + bidi_cache_idx * sizeof (struct bidi_it) | ||
| 737 | + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp) | ||
| 738 | + sizeof (bidi_cache_start), | ||
| 739 | sizeof (bidi_cache_last_idx)); | ||
| 740 | bidi_cache_total_alloc | ||
| 741 | -= (bidi_shelve_header_size | ||
| 742 | + bidi_cache_idx * sizeof (struct bidi_it)); | ||
| 743 | } | ||
| 744 | |||
| 745 | xfree (p); | ||
| 518 | } | 746 | } |
| 747 | } | ||
| 519 | 748 | ||
| 520 | return val; | 749 | |
| 750 | /*********************************************************************** | ||
| 751 | Initialization | ||
| 752 | ***********************************************************************/ | ||
| 753 | static void | ||
| 754 | bidi_initialize (void) | ||
| 755 | { | ||
| 756 | bidi_type_table = uniprop_table (intern ("bidi-class")); | ||
| 757 | if (NILP (bidi_type_table)) | ||
| 758 | abort (); | ||
| 759 | staticpro (&bidi_type_table); | ||
| 760 | |||
| 761 | bidi_mirror_table = uniprop_table (intern ("mirroring")); | ||
| 762 | if (NILP (bidi_mirror_table)) | ||
| 763 | abort (); | ||
| 764 | staticpro (&bidi_mirror_table); | ||
| 765 | |||
| 766 | Qparagraph_start = intern ("paragraph-start"); | ||
| 767 | staticpro (&Qparagraph_start); | ||
| 768 | paragraph_start_re = Fsymbol_value (Qparagraph_start); | ||
| 769 | if (!STRINGP (paragraph_start_re)) | ||
| 770 | paragraph_start_re = build_string ("\f\\|[ \t]*$"); | ||
| 771 | staticpro (¶graph_start_re); | ||
| 772 | Qparagraph_separate = intern ("paragraph-separate"); | ||
| 773 | staticpro (&Qparagraph_separate); | ||
| 774 | paragraph_separate_re = Fsymbol_value (Qparagraph_separate); | ||
| 775 | if (!STRINGP (paragraph_separate_re)) | ||
| 776 | paragraph_separate_re = build_string ("[ \t\f]*$"); | ||
| 777 | staticpro (¶graph_separate_re); | ||
| 778 | |||
| 779 | bidi_cache_sp = 0; | ||
| 780 | bidi_cache_total_alloc = 0; | ||
| 781 | |||
| 782 | bidi_initialized = 1; | ||
| 521 | } | 783 | } |
| 522 | 784 | ||
| 523 | /* Determine the start-of-run (sor) directional type given the two | 785 | /* Do whatever UAX#9 clause X8 says should be done at paragraph's |
| 524 | embedding levels on either side of the run boundary. Also, update | 786 | end. */ |
| 525 | the saved info about previously seen characters, since that info is | ||
| 526 | generally valid for a single level run. */ | ||
| 527 | static inline void | 787 | static inline void |
| 528 | bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after) | 788 | bidi_set_paragraph_end (struct bidi_it *bidi_it) |
| 529 | { | 789 | { |
| 530 | int higher_level = level_before > level_after ? level_before : level_after; | 790 | bidi_it->invalid_levels = 0; |
| 531 | 791 | bidi_it->invalid_rl_levels = -1; | |
| 532 | /* The prev_was_pdf gork is required for when we have several PDFs | 792 | bidi_it->stack_idx = 0; |
| 533 | in a row. In that case, we want to compute the sor type for the | 793 | bidi_it->resolved_level = bidi_it->level_stack[0].level; |
| 534 | next level run only once: when we see the first PDF. That's | 794 | } |
| 535 | because the sor type depends only on the higher of the two levels | ||
| 536 | that we find on the two sides of the level boundary (see UAX#9, | ||
| 537 | clause X10), and so we don't need to know the final embedding | ||
| 538 | level to which we descend after processing all the PDFs. */ | ||
| 539 | if (!bidi_it->prev_was_pdf || level_before < level_after) | ||
| 540 | /* FIXME: should the default sor direction be user selectable? */ | ||
| 541 | bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R; | ||
| 542 | if (level_before > level_after) | ||
| 543 | bidi_it->prev_was_pdf = 1; | ||
| 544 | 795 | ||
| 545 | bidi_it->prev.type = UNKNOWN_BT; | 796 | /* Initialize the bidi iterator from buffer/string position CHARPOS. */ |
| 546 | bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 = | 797 | void |
| 547 | bidi_it->last_strong.orig_type = UNKNOWN_BT; | 798 | bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, int frame_window_p, |
| 548 | bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L; | 799 | struct bidi_it *bidi_it) |
| 549 | bidi_it->prev_for_neutral.charpos = bidi_it->charpos; | 800 | { |
| 550 | bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos; | 801 | if (! bidi_initialized) |
| 551 | bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 = | 802 | bidi_initialize (); |
| 552 | bidi_it->next_for_neutral.orig_type = UNKNOWN_BT; | 803 | if (charpos >= 0) |
| 553 | bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */ | 804 | bidi_it->charpos = charpos; |
| 805 | if (bytepos >= 0) | ||
| 806 | bidi_it->bytepos = bytepos; | ||
| 807 | bidi_it->frame_window_p = frame_window_p; | ||
| 808 | bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */ | ||
| 809 | bidi_it->first_elt = 1; | ||
| 810 | bidi_set_paragraph_end (bidi_it); | ||
| 811 | bidi_it->new_paragraph = 1; | ||
| 812 | bidi_it->separator_limit = -1; | ||
| 813 | bidi_it->type = NEUTRAL_B; | ||
| 814 | bidi_it->type_after_w1 = NEUTRAL_B; | ||
| 815 | bidi_it->orig_type = NEUTRAL_B; | ||
| 816 | bidi_it->prev_was_pdf = 0; | ||
| 817 | bidi_it->prev.type = bidi_it->prev.type_after_w1 | ||
| 818 | = bidi_it->prev.orig_type = UNKNOWN_BT; | ||
| 819 | bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 | ||
| 820 | = bidi_it->last_strong.orig_type = UNKNOWN_BT; | ||
| 821 | bidi_it->next_for_neutral.charpos = -1; | ||
| 822 | bidi_it->next_for_neutral.type | ||
| 823 | = bidi_it->next_for_neutral.type_after_w1 | ||
| 824 | = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT; | ||
| 825 | bidi_it->prev_for_neutral.charpos = -1; | ||
| 826 | bidi_it->prev_for_neutral.type | ||
| 827 | = bidi_it->prev_for_neutral.type_after_w1 | ||
| 828 | = bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT; | ||
| 829 | bidi_it->sor = L2R; /* FIXME: should it be user-selectable? */ | ||
| 830 | bidi_it->disp_pos = -1; /* invalid/unknown */ | ||
| 831 | bidi_it->disp_prop = 0; | ||
| 832 | /* We can only shrink the cache if we are at the bottom level of its | ||
| 833 | "stack". */ | ||
| 834 | if (bidi_cache_start == 0) | ||
| 835 | bidi_cache_shrink (); | ||
| 836 | else | ||
| 837 | bidi_cache_reset (); | ||
| 554 | } | 838 | } |
| 555 | 839 | ||
| 556 | /* Perform initializations for reordering a new line of bidi text. */ | 840 | /* Perform initializations for reordering a new line of bidi text. */ |
| @@ -562,44 +846,113 @@ bidi_line_init (struct bidi_it *bidi_it) | |||
| 562 | bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */ | 846 | bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */ |
| 563 | bidi_it->invalid_levels = 0; | 847 | bidi_it->invalid_levels = 0; |
| 564 | bidi_it->invalid_rl_levels = -1; | 848 | bidi_it->invalid_rl_levels = -1; |
| 565 | bidi_it->next_en_pos = -1; | 849 | /* Setting this to zero will force its recomputation the first time |
| 850 | we need it for W5. */ | ||
| 851 | bidi_it->next_en_pos = 0; | ||
| 852 | bidi_it->next_en_type = UNKNOWN_BT; | ||
| 566 | bidi_it->next_for_ws.type = UNKNOWN_BT; | 853 | bidi_it->next_for_ws.type = UNKNOWN_BT; |
| 567 | bidi_set_sor_type (bidi_it, | 854 | bidi_set_sor_type (bidi_it, |
| 568 | bidi_it->paragraph_dir == R2L ? 1 : 0, | 855 | (bidi_it->paragraph_dir == R2L ? 1 : 0), |
| 569 | bidi_it->level_stack[0].level); /* X10 */ | 856 | bidi_it->level_stack[0].level); /* X10 */ |
| 570 | 857 | ||
| 571 | bidi_cache_reset (); | 858 | bidi_cache_reset (); |
| 572 | } | 859 | } |
| 573 | 860 | ||
| 861 | |||
| 862 | /*********************************************************************** | ||
| 863 | Fetching characters | ||
| 864 | ***********************************************************************/ | ||
| 865 | |||
| 866 | /* Count bytes in string S between BEG/BEGBYTE and END. BEG and END | ||
| 867 | are zero-based character positions in S, BEGBYTE is byte position | ||
| 868 | corresponding to BEG. UNIBYTE, if non-zero, means S is a unibyte | ||
| 869 | string. */ | ||
| 870 | static inline EMACS_INT | ||
| 871 | bidi_count_bytes (const unsigned char *s, const EMACS_INT beg, | ||
| 872 | const EMACS_INT begbyte, const EMACS_INT end, int unibyte) | ||
| 873 | { | ||
| 874 | EMACS_INT pos = beg; | ||
| 875 | const unsigned char *p = s + begbyte, *start = p; | ||
| 876 | |||
| 877 | if (unibyte) | ||
| 878 | p = s + end; | ||
| 879 | else | ||
| 880 | { | ||
| 881 | if (!CHAR_HEAD_P (*p)) | ||
| 882 | abort (); | ||
| 883 | |||
| 884 | while (pos < end) | ||
| 885 | { | ||
| 886 | p += BYTES_BY_CHAR_HEAD (*p); | ||
| 887 | pos++; | ||
| 888 | } | ||
| 889 | } | ||
| 890 | |||
| 891 | return p - start; | ||
| 892 | } | ||
| 893 | |||
| 894 | /* Fetch and returns the character at byte position BYTEPOS. If S is | ||
| 895 | non-NULL, fetch the character from string S; otherwise fetch the | ||
| 896 | character from the current buffer. UNIBYTE non-zero means S is a | ||
| 897 | unibyte string. */ | ||
| 898 | static inline int | ||
| 899 | bidi_char_at_pos (EMACS_INT bytepos, const unsigned char *s, int unibyte) | ||
| 900 | { | ||
| 901 | if (s) | ||
| 902 | { | ||
| 903 | if (unibyte) | ||
| 904 | return s[bytepos]; | ||
| 905 | else | ||
| 906 | return STRING_CHAR (s + bytepos); | ||
| 907 | } | ||
| 908 | else | ||
| 909 | return FETCH_MULTIBYTE_CHAR (bytepos); | ||
| 910 | } | ||
| 911 | |||
| 574 | /* Fetch and return the character at BYTEPOS/CHARPOS. If that | 912 | /* Fetch and return the character at BYTEPOS/CHARPOS. If that |
| 575 | character is covered by a display string, treat the entire run of | 913 | character is covered by a display string, treat the entire run of |
| 576 | covered characters as a single character u+FFFC, and return their | 914 | covered characters as a single character, either u+2029 or u+FFFC, |
| 577 | combined length in CH_LEN and NCHARS. DISP_POS specifies the | 915 | and return their combined length in CH_LEN and NCHARS. DISP_POS |
| 578 | character position of the next display string, or -1 if not yet | 916 | specifies the character position of the next display string, or -1 |
| 579 | computed. When the next character is at or beyond that position, | 917 | if not yet computed. When the next character is at or beyond that |
| 580 | the function updates DISP_POS with the position of the next display | 918 | position, the function updates DISP_POS with the position of the |
| 581 | string. */ | 919 | next display string. DISP_PROP non-zero means that there's really |
| 920 | a display string at DISP_POS, as opposed to when we searched till | ||
| 921 | DISP_POS without finding one. If DISP_PROP is 2, it means the | ||
| 922 | display spec is of the form `(space ...)', which is replaced with | ||
| 923 | u+2029 to handle it as a paragraph separator. STRING->s is the C | ||
| 924 | string to iterate, or NULL if iterating over a buffer or a Lisp | ||
| 925 | string; in the latter case, STRING->lstring is the Lisp string. */ | ||
| 582 | static inline int | 926 | static inline int |
| 583 | bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos, | 927 | bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos, |
| 928 | int *disp_prop, struct bidi_string_data *string, | ||
| 584 | int frame_window_p, EMACS_INT *ch_len, EMACS_INT *nchars) | 929 | int frame_window_p, EMACS_INT *ch_len, EMACS_INT *nchars) |
| 585 | { | 930 | { |
| 586 | int ch; | 931 | int ch; |
| 932 | EMACS_INT endpos | ||
| 933 | = (string->s || STRINGP (string->lstring)) ? string->schars : ZV; | ||
| 934 | struct text_pos pos; | ||
| 935 | int len; | ||
| 587 | 936 | ||
| 588 | /* FIXME: Support strings in addition to buffers. */ | ||
| 589 | /* If we got past the last known position of display string, compute | 937 | /* If we got past the last known position of display string, compute |
| 590 | the position of the next one. That position could be at BYTEPOS. */ | 938 | the position of the next one. That position could be at CHARPOS. */ |
| 591 | if (charpos < ZV && charpos > *disp_pos) | 939 | if (charpos < endpos && charpos > *disp_pos) |
| 592 | *disp_pos = compute_display_string_pos (charpos, frame_window_p); | 940 | { |
| 941 | SET_TEXT_POS (pos, charpos, bytepos); | ||
| 942 | *disp_pos = compute_display_string_pos (&pos, string, frame_window_p, | ||
| 943 | disp_prop); | ||
| 944 | } | ||
| 593 | 945 | ||
| 594 | /* Fetch the character at BYTEPOS. */ | 946 | /* Fetch the character at BYTEPOS. */ |
| 595 | if (bytepos >= ZV_BYTE) | 947 | if (charpos >= endpos) |
| 596 | { | 948 | { |
| 597 | ch = BIDI_EOB; | 949 | ch = BIDI_EOB; |
| 598 | *ch_len = 1; | 950 | *ch_len = 1; |
| 599 | *nchars = 1; | 951 | *nchars = 1; |
| 600 | *disp_pos = ZV; | 952 | *disp_pos = endpos; |
| 953 | *disp_prop = 0; | ||
| 601 | } | 954 | } |
| 602 | else if (charpos >= *disp_pos) | 955 | else if (charpos >= *disp_pos && *disp_prop) |
| 603 | { | 956 | { |
| 604 | EMACS_INT disp_end_pos; | 957 | EMACS_INT disp_end_pos; |
| 605 | 958 | ||
| @@ -607,38 +960,148 @@ bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos, | |||
| 607 | property. Hopefully, it will never be needed. */ | 960 | property. Hopefully, it will never be needed. */ |
| 608 | if (charpos > *disp_pos) | 961 | if (charpos > *disp_pos) |
| 609 | abort (); | 962 | abort (); |
| 610 | /* Return the Unicode Object Replacement Character to represent | 963 | /* Text covered by `display' properties and overlays with |
| 611 | the entire run of characters covered by the display | 964 | display properties or display strings is handled as a single |
| 612 | string. */ | 965 | character that represents the entire run of characters |
| 613 | ch = 0xFFFC; | 966 | covered by the display property. */ |
| 614 | disp_end_pos = compute_display_string_end (*disp_pos); | 967 | if (*disp_prop == 2) |
| 968 | { | ||
| 969 | /* `(space ...)' display specs are handled as paragraph | ||
| 970 | separators for the purposes of the reordering; see UAX#9 | ||
| 971 | section 3 and clause HL1 in section 4.3 there. */ | ||
| 972 | ch = 0x2029; | ||
| 973 | } | ||
| 974 | else | ||
| 975 | { | ||
| 976 | /* All other display specs are handled as the Unicode Object | ||
| 977 | Replacement Character. */ | ||
| 978 | ch = 0xFFFC; | ||
| 979 | } | ||
| 980 | disp_end_pos = compute_display_string_end (*disp_pos, string); | ||
| 981 | if (disp_end_pos < 0) | ||
| 982 | { | ||
| 983 | /* Somebody removed the display string from the buffer | ||
| 984 | behind our back. Recover by processing this buffer | ||
| 985 | position as if no display property were present there to | ||
| 986 | begin with. */ | ||
| 987 | *disp_prop = 0; | ||
| 988 | goto normal_char; | ||
| 989 | } | ||
| 615 | *nchars = disp_end_pos - *disp_pos; | 990 | *nchars = disp_end_pos - *disp_pos; |
| 616 | *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos; | 991 | if (*nchars <= 0) |
| 992 | abort (); | ||
| 993 | if (string->s) | ||
| 994 | *ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos, | ||
| 995 | disp_end_pos, string->unibyte); | ||
| 996 | else if (STRINGP (string->lstring)) | ||
| 997 | *ch_len = bidi_count_bytes (SDATA (string->lstring), *disp_pos, | ||
| 998 | bytepos, disp_end_pos, string->unibyte); | ||
| 999 | else | ||
| 1000 | *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos; | ||
| 617 | } | 1001 | } |
| 618 | else | 1002 | else |
| 619 | { | 1003 | { |
| 620 | ch = FETCH_MULTIBYTE_CHAR (bytepos); | 1004 | normal_char: |
| 1005 | if (string->s) | ||
| 1006 | { | ||
| 1007 | |||
| 1008 | if (!string->unibyte) | ||
| 1009 | { | ||
| 1010 | ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len); | ||
| 1011 | *ch_len = len; | ||
| 1012 | } | ||
| 1013 | else | ||
| 1014 | { | ||
| 1015 | ch = UNIBYTE_TO_CHAR (string->s[bytepos]); | ||
| 1016 | *ch_len = 1; | ||
| 1017 | } | ||
| 1018 | } | ||
| 1019 | else if (STRINGP (string->lstring)) | ||
| 1020 | { | ||
| 1021 | if (!string->unibyte) | ||
| 1022 | { | ||
| 1023 | ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos, | ||
| 1024 | len); | ||
| 1025 | *ch_len = len; | ||
| 1026 | } | ||
| 1027 | else | ||
| 1028 | { | ||
| 1029 | ch = UNIBYTE_TO_CHAR (SREF (string->lstring, bytepos)); | ||
| 1030 | *ch_len = 1; | ||
| 1031 | } | ||
| 1032 | } | ||
| 1033 | else | ||
| 1034 | { | ||
| 1035 | ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (bytepos), len); | ||
| 1036 | *ch_len = len; | ||
| 1037 | } | ||
| 621 | *nchars = 1; | 1038 | *nchars = 1; |
| 622 | *ch_len = CHAR_BYTES (ch); | ||
| 623 | } | 1039 | } |
| 624 | 1040 | ||
| 625 | /* If we just entered a run of characters covered by a display | 1041 | /* If we just entered a run of characters covered by a display |
| 626 | string, compute the position of the next display string. */ | 1042 | string, compute the position of the next display string. */ |
| 627 | if (charpos + *nchars <= ZV && charpos + *nchars > *disp_pos) | 1043 | if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos |
| 628 | *disp_pos = compute_display_string_pos (charpos + *nchars, frame_window_p); | 1044 | && *disp_prop) |
| 1045 | { | ||
| 1046 | SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len); | ||
| 1047 | *disp_pos = compute_display_string_pos (&pos, string, frame_window_p, | ||
| 1048 | disp_prop); | ||
| 1049 | } | ||
| 629 | 1050 | ||
| 630 | return ch; | 1051 | return ch; |
| 631 | } | 1052 | } |
| 632 | 1053 | ||
| 1054 | |||
| 1055 | /*********************************************************************** | ||
| 1056 | Determining paragraph direction | ||
| 1057 | ***********************************************************************/ | ||
| 1058 | |||
| 1059 | /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph. | ||
| 1060 | Value is the non-negative length of the paragraph separator | ||
| 1061 | following the buffer position, -1 if position is at the beginning | ||
| 1062 | of a new paragraph, or -2 if position is neither at beginning nor | ||
| 1063 | at end of a paragraph. */ | ||
| 1064 | static EMACS_INT | ||
| 1065 | bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos) | ||
| 1066 | { | ||
| 1067 | Lisp_Object sep_re; | ||
| 1068 | Lisp_Object start_re; | ||
| 1069 | EMACS_INT val; | ||
| 1070 | |||
| 1071 | sep_re = paragraph_separate_re; | ||
| 1072 | start_re = paragraph_start_re; | ||
| 1073 | |||
| 1074 | val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil); | ||
| 1075 | if (val < 0) | ||
| 1076 | { | ||
| 1077 | if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0) | ||
| 1078 | val = -1; | ||
| 1079 | else | ||
| 1080 | val = -2; | ||
| 1081 | } | ||
| 1082 | |||
| 1083 | return val; | ||
| 1084 | } | ||
| 1085 | |||
| 1086 | /* On my 2005-vintage machine, searching back for paragraph start | ||
| 1087 | takes ~1 ms per line. And bidi_paragraph_init is called 4 times | ||
| 1088 | when user types C-p. The number below limits each call to | ||
| 1089 | bidi_paragraph_init to about 10 ms. */ | ||
| 1090 | #define MAX_PARAGRAPH_SEARCH 7500 | ||
| 1091 | |||
| 633 | /* Find the beginning of this paragraph by looking back in the buffer. | 1092 | /* Find the beginning of this paragraph by looking back in the buffer. |
| 634 | Value is the byte position of the paragraph's beginning. */ | 1093 | Value is the byte position of the paragraph's beginning, or |
| 1094 | BEGV_BYTE if paragraph_start_re is still not found after looking | ||
| 1095 | back MAX_PARAGRAPH_SEARCH lines in the buffer. */ | ||
| 635 | static EMACS_INT | 1096 | static EMACS_INT |
| 636 | bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte) | 1097 | bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte) |
| 637 | { | 1098 | { |
| 638 | Lisp_Object re = paragraph_start_re; | 1099 | Lisp_Object re = paragraph_start_re; |
| 639 | EMACS_INT limit = ZV, limit_byte = ZV_BYTE; | 1100 | EMACS_INT limit = ZV, limit_byte = ZV_BYTE; |
| 1101 | EMACS_INT n = 0; | ||
| 640 | 1102 | ||
| 641 | while (pos_byte > BEGV_BYTE | 1103 | while (pos_byte > BEGV_BYTE |
| 1104 | && n++ < MAX_PARAGRAPH_SEARCH | ||
| 642 | && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0) | 1105 | && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0) |
| 643 | { | 1106 | { |
| 644 | /* FIXME: What if the paragraph beginning is covered by a | 1107 | /* FIXME: What if the paragraph beginning is covered by a |
| @@ -648,6 +1111,8 @@ bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte) | |||
| 648 | pos = find_next_newline_no_quit (pos - 1, -1); | 1111 | pos = find_next_newline_no_quit (pos - 1, -1); |
| 649 | pos_byte = CHAR_TO_BYTE (pos); | 1112 | pos_byte = CHAR_TO_BYTE (pos); |
| 650 | } | 1113 | } |
| 1114 | if (n >= MAX_PARAGRAPH_SEARCH) | ||
| 1115 | pos_byte = BEGV_BYTE; | ||
| 651 | return pos_byte; | 1116 | return pos_byte; |
| 652 | } | 1117 | } |
| 653 | 1118 | ||
| @@ -665,18 +1130,24 @@ bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte) | |||
| 665 | 1130 | ||
| 666 | Note that this function gives the paragraph separator the same | 1131 | Note that this function gives the paragraph separator the same |
| 667 | direction as the preceding paragraph, even though Emacs generally | 1132 | direction as the preceding paragraph, even though Emacs generally |
| 668 | views the separartor as not belonging to any paragraph. */ | 1133 | views the separator as not belonging to any paragraph. */ |
| 669 | void | 1134 | void |
| 670 | bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p) | 1135 | bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p) |
| 671 | { | 1136 | { |
| 672 | EMACS_INT bytepos = bidi_it->bytepos; | 1137 | EMACS_INT bytepos = bidi_it->bytepos; |
| 1138 | int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring); | ||
| 673 | EMACS_INT pstartbyte; | 1139 | EMACS_INT pstartbyte; |
| 1140 | /* Note that begbyte is a byte position, while end is a character | ||
| 1141 | position. Yes, this is ugly, but we are trying to avoid costly | ||
| 1142 | calls to BYTE_TO_CHAR and its ilk. */ | ||
| 1143 | EMACS_INT begbyte = string_p ? 0 : BEGV_BYTE; | ||
| 1144 | EMACS_INT end = string_p ? bidi_it->string.schars : ZV; | ||
| 674 | 1145 | ||
| 675 | /* Special case for an empty buffer. */ | 1146 | /* Special case for an empty buffer. */ |
| 676 | if (bytepos == BEGV_BYTE && bytepos == ZV_BYTE) | 1147 | if (bytepos == begbyte && bidi_it->charpos == end) |
| 677 | dir = L2R; | 1148 | dir = L2R; |
| 678 | /* We should never be called at EOB or before BEGV. */ | 1149 | /* We should never be called at EOB or before BEGV. */ |
| 679 | else if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE) | 1150 | else if (bidi_it->charpos >= end || bytepos < begbyte) |
| 680 | abort (); | 1151 | abort (); |
| 681 | 1152 | ||
| 682 | if (dir == L2R) | 1153 | if (dir == L2R) |
| @@ -694,7 +1165,9 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p) | |||
| 694 | int ch; | 1165 | int ch; |
| 695 | EMACS_INT ch_len, nchars; | 1166 | EMACS_INT ch_len, nchars; |
| 696 | EMACS_INT pos, disp_pos = -1; | 1167 | EMACS_INT pos, disp_pos = -1; |
| 1168 | int disp_prop = 0; | ||
| 697 | bidi_type_t type; | 1169 | bidi_type_t type; |
| 1170 | const unsigned char *s; | ||
| 698 | 1171 | ||
| 699 | if (!bidi_initialized) | 1172 | if (!bidi_initialized) |
| 700 | bidi_initialize (); | 1173 | bidi_initialize (); |
| @@ -712,7 +1185,11 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p) | |||
| 712 | we are potentially in a new paragraph that doesn't yet | 1185 | we are potentially in a new paragraph that doesn't yet |
| 713 | exist. */ | 1186 | exist. */ |
| 714 | pos = bidi_it->charpos; | 1187 | pos = bidi_it->charpos; |
| 715 | if (bytepos > BEGV_BYTE && FETCH_CHAR (bytepos) == '\n') | 1188 | s = (STRINGP (bidi_it->string.lstring) |
| 1189 | ? SDATA (bidi_it->string.lstring) | ||
| 1190 | : bidi_it->string.s); | ||
| 1191 | if (bytepos > begbyte | ||
| 1192 | && bidi_char_at_pos (bytepos, s, bidi_it->string.unibyte) == '\n') | ||
| 716 | { | 1193 | { |
| 717 | bytepos++; | 1194 | bytepos++; |
| 718 | pos++; | 1195 | pos++; |
| @@ -720,50 +1197,63 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p) | |||
| 720 | 1197 | ||
| 721 | /* We are either at the beginning of a paragraph or in the | 1198 | /* We are either at the beginning of a paragraph or in the |
| 722 | middle of it. Find where this paragraph starts. */ | 1199 | middle of it. Find where this paragraph starts. */ |
| 723 | pstartbyte = bidi_find_paragraph_start (pos, bytepos); | 1200 | if (string_p) |
| 1201 | { | ||
| 1202 | /* We don't support changes of paragraph direction inside a | ||
| 1203 | string. It is treated as a single paragraph. */ | ||
| 1204 | pstartbyte = 0; | ||
| 1205 | } | ||
| 1206 | else | ||
| 1207 | pstartbyte = bidi_find_paragraph_start (pos, bytepos); | ||
| 724 | bidi_it->separator_limit = -1; | 1208 | bidi_it->separator_limit = -1; |
| 725 | bidi_it->new_paragraph = 0; | 1209 | bidi_it->new_paragraph = 0; |
| 726 | 1210 | ||
| 727 | /* The following loop is run more than once only if NO_DEFAULT_P | 1211 | /* The following loop is run more than once only if NO_DEFAULT_P |
| 728 | is non-zero. */ | 1212 | is non-zero, and only if we are iterating on a buffer. */ |
| 729 | do { | 1213 | do { |
| 730 | bytepos = pstartbyte; | 1214 | bytepos = pstartbyte; |
| 731 | pos = BYTE_TO_CHAR (bytepos); | 1215 | if (!string_p) |
| 732 | ch = bidi_fetch_char (bytepos, pos, &disp_pos, bidi_it->frame_window_p, | 1216 | pos = BYTE_TO_CHAR (bytepos); |
| 733 | &ch_len, &nchars); | 1217 | ch = bidi_fetch_char (bytepos, pos, &disp_pos, &disp_prop, |
| 1218 | &bidi_it->string, | ||
| 1219 | bidi_it->frame_window_p, &ch_len, &nchars); | ||
| 734 | type = bidi_get_type (ch, NEUTRAL_DIR); | 1220 | type = bidi_get_type (ch, NEUTRAL_DIR); |
| 735 | 1221 | ||
| 736 | for (pos += nchars, bytepos += ch_len; | 1222 | for (pos += nchars, bytepos += ch_len; |
| 737 | /* NOTE: UAX#9 says to search only for L, AL, or R types | ||
| 738 | of characters, and ignore RLE, RLO, LRE, and LRO. | ||
| 739 | However, I'm not sure it makes sense to omit those 4; | ||
| 740 | should try with and without that to see the effect. */ | ||
| 741 | (bidi_get_category (type) != STRONG) | 1223 | (bidi_get_category (type) != STRONG) |
| 742 | || (bidi_ignore_explicit_marks_for_paragraph_level | 1224 | || (bidi_ignore_explicit_marks_for_paragraph_level |
| 743 | && (type == RLE || type == RLO | 1225 | && (type == RLE || type == RLO |
| 744 | || type == LRE || type == LRO)); | 1226 | || type == LRE || type == LRO)); |
| 745 | type = bidi_get_type (ch, NEUTRAL_DIR)) | 1227 | type = bidi_get_type (ch, NEUTRAL_DIR)) |
| 746 | { | 1228 | { |
| 747 | if (bytepos >= ZV_BYTE) | 1229 | if (pos >= end) |
| 748 | { | 1230 | { |
| 749 | /* Pretend there's a paragraph separator at end of | 1231 | /* Pretend there's a paragraph separator at end of |
| 750 | buffer. */ | 1232 | buffer/string. */ |
| 751 | type = NEUTRAL_B; | 1233 | type = NEUTRAL_B; |
| 752 | break; | 1234 | break; |
| 753 | } | 1235 | } |
| 754 | if (type == NEUTRAL_B && bidi_at_paragraph_end (pos, bytepos) >= -1) | 1236 | if (!string_p |
| 1237 | && type == NEUTRAL_B | ||
| 1238 | && bidi_at_paragraph_end (pos, bytepos) >= -1) | ||
| 755 | break; | 1239 | break; |
| 756 | /* Fetch next character and advance to get past it. */ | 1240 | /* Fetch next character and advance to get past it. */ |
| 757 | ch = bidi_fetch_char (bytepos, pos, &disp_pos, | 1241 | ch = bidi_fetch_char (bytepos, pos, &disp_pos, |
| 1242 | &disp_prop, &bidi_it->string, | ||
| 758 | bidi_it->frame_window_p, &ch_len, &nchars); | 1243 | bidi_it->frame_window_p, &ch_len, &nchars); |
| 759 | pos += nchars; | 1244 | pos += nchars; |
| 760 | bytepos += ch_len; | 1245 | bytepos += ch_len; |
| 761 | } | 1246 | } |
| 762 | if (type == STRONG_R || type == STRONG_AL) /* P3 */ | 1247 | if ((type == STRONG_R || type == STRONG_AL) /* P3 */ |
| 1248 | || (!bidi_ignore_explicit_marks_for_paragraph_level | ||
| 1249 | && (type == RLO || type == RLE))) | ||
| 763 | bidi_it->paragraph_dir = R2L; | 1250 | bidi_it->paragraph_dir = R2L; |
| 764 | else if (type == STRONG_L) | 1251 | else if (type == STRONG_L |
| 1252 | || (!bidi_ignore_explicit_marks_for_paragraph_level | ||
| 1253 | && (type == LRO || type == LRE))) | ||
| 765 | bidi_it->paragraph_dir = L2R; | 1254 | bidi_it->paragraph_dir = L2R; |
| 766 | if (no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR) | 1255 | if (!string_p |
| 1256 | && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR) | ||
| 767 | { | 1257 | { |
| 768 | /* If this paragraph is at BEGV, default to L2R. */ | 1258 | /* If this paragraph is at BEGV, default to L2R. */ |
| 769 | if (pstartbyte == BEGV_BYTE) | 1259 | if (pstartbyte == BEGV_BYTE) |
| @@ -786,7 +1276,8 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p) | |||
| 786 | pstartbyte = prevpbyte; | 1276 | pstartbyte = prevpbyte; |
| 787 | } | 1277 | } |
| 788 | } | 1278 | } |
| 789 | } while (no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR); | 1279 | } while (!string_p |
| 1280 | && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR); | ||
| 790 | } | 1281 | } |
| 791 | else | 1282 | else |
| 792 | abort (); | 1283 | abort (); |
| @@ -804,110 +1295,11 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p) | |||
| 804 | bidi_line_init (bidi_it); | 1295 | bidi_line_init (bidi_it); |
| 805 | } | 1296 | } |
| 806 | 1297 | ||
| 807 | /* Do whatever UAX#9 clause X8 says should be done at paragraph's | 1298 | |
| 808 | end. */ | 1299 | /*********************************************************************** |
| 809 | static inline void | 1300 | Resolving explicit and implicit levels. |
| 810 | bidi_set_paragraph_end (struct bidi_it *bidi_it) | 1301 | The rest of this file constitutes the core of the UBA implementation. |
| 811 | { | 1302 | ***********************************************************************/ |
| 812 | bidi_it->invalid_levels = 0; | ||
| 813 | bidi_it->invalid_rl_levels = -1; | ||
| 814 | bidi_it->stack_idx = 0; | ||
| 815 | bidi_it->resolved_level = bidi_it->level_stack[0].level; | ||
| 816 | } | ||
| 817 | |||
| 818 | /* Initialize the bidi iterator from buffer/string position CHARPOS. */ | ||
| 819 | void | ||
| 820 | bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, int frame_window_p, | ||
| 821 | struct bidi_it *bidi_it) | ||
| 822 | { | ||
| 823 | if (! bidi_initialized) | ||
| 824 | bidi_initialize (); | ||
| 825 | bidi_it->charpos = charpos; | ||
| 826 | bidi_it->bytepos = bytepos; | ||
| 827 | bidi_it->frame_window_p = frame_window_p; | ||
| 828 | bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */ | ||
| 829 | bidi_it->first_elt = 1; | ||
| 830 | bidi_set_paragraph_end (bidi_it); | ||
| 831 | bidi_it->new_paragraph = 1; | ||
| 832 | bidi_it->separator_limit = -1; | ||
| 833 | bidi_it->type = NEUTRAL_B; | ||
| 834 | bidi_it->type_after_w1 = NEUTRAL_B; | ||
| 835 | bidi_it->orig_type = NEUTRAL_B; | ||
| 836 | bidi_it->prev_was_pdf = 0; | ||
| 837 | bidi_it->prev.type = bidi_it->prev.type_after_w1 = | ||
| 838 | bidi_it->prev.orig_type = UNKNOWN_BT; | ||
| 839 | bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 = | ||
| 840 | bidi_it->last_strong.orig_type = UNKNOWN_BT; | ||
| 841 | bidi_it->next_for_neutral.charpos = -1; | ||
| 842 | bidi_it->next_for_neutral.type = | ||
| 843 | bidi_it->next_for_neutral.type_after_w1 = | ||
| 844 | bidi_it->next_for_neutral.orig_type = UNKNOWN_BT; | ||
| 845 | bidi_it->prev_for_neutral.charpos = -1; | ||
| 846 | bidi_it->prev_for_neutral.type = | ||
| 847 | bidi_it->prev_for_neutral.type_after_w1 = | ||
| 848 | bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT; | ||
| 849 | bidi_it->sor = L2R; /* FIXME: should it be user-selectable? */ | ||
| 850 | bidi_it->disp_pos = -1; /* invalid/unknown */ | ||
| 851 | bidi_cache_shrink (); | ||
| 852 | } | ||
| 853 | |||
| 854 | /* Push the current embedding level and override status; reset the | ||
| 855 | current level to LEVEL and the current override status to OVERRIDE. */ | ||
| 856 | static inline void | ||
| 857 | bidi_push_embedding_level (struct bidi_it *bidi_it, | ||
| 858 | int level, bidi_dir_t override) | ||
| 859 | { | ||
| 860 | bidi_it->stack_idx++; | ||
| 861 | if (bidi_it->stack_idx >= BIDI_MAXLEVEL) | ||
| 862 | abort (); | ||
| 863 | bidi_it->level_stack[bidi_it->stack_idx].level = level; | ||
| 864 | bidi_it->level_stack[bidi_it->stack_idx].override = override; | ||
| 865 | } | ||
| 866 | |||
| 867 | /* Pop the embedding level and directional override status from the | ||
| 868 | stack, and return the new level. */ | ||
| 869 | static inline int | ||
| 870 | bidi_pop_embedding_level (struct bidi_it *bidi_it) | ||
| 871 | { | ||
| 872 | /* UAX#9 says to ignore invalid PDFs. */ | ||
| 873 | if (bidi_it->stack_idx > 0) | ||
| 874 | bidi_it->stack_idx--; | ||
| 875 | return bidi_it->level_stack[bidi_it->stack_idx].level; | ||
| 876 | } | ||
| 877 | |||
| 878 | /* Record in SAVED_INFO the information about the current character. */ | ||
| 879 | static inline void | ||
| 880 | bidi_remember_char (struct bidi_saved_info *saved_info, | ||
| 881 | struct bidi_it *bidi_it) | ||
| 882 | { | ||
| 883 | saved_info->charpos = bidi_it->charpos; | ||
| 884 | saved_info->bytepos = bidi_it->bytepos; | ||
| 885 | saved_info->type = bidi_it->type; | ||
| 886 | bidi_check_type (bidi_it->type); | ||
| 887 | saved_info->type_after_w1 = bidi_it->type_after_w1; | ||
| 888 | bidi_check_type (bidi_it->type_after_w1); | ||
| 889 | saved_info->orig_type = bidi_it->orig_type; | ||
| 890 | bidi_check_type (bidi_it->orig_type); | ||
| 891 | } | ||
| 892 | |||
| 893 | /* Resolve the type of a neutral character according to the type of | ||
| 894 | surrounding strong text and the current embedding level. */ | ||
| 895 | static inline bidi_type_t | ||
| 896 | bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev) | ||
| 897 | { | ||
| 898 | /* N1: European and Arabic numbers are treated as though they were R. */ | ||
| 899 | if (next_type == WEAK_EN || next_type == WEAK_AN) | ||
| 900 | next_type = STRONG_R; | ||
| 901 | if (prev_type == WEAK_EN || prev_type == WEAK_AN) | ||
| 902 | prev_type = STRONG_R; | ||
| 903 | |||
| 904 | if (next_type == prev_type) /* N1 */ | ||
| 905 | return next_type; | ||
| 906 | else if ((lev & 1) == 0) /* N2 */ | ||
| 907 | return STRONG_L; | ||
| 908 | else | ||
| 909 | return STRONG_R; | ||
| 910 | } | ||
| 911 | 1303 | ||
| 912 | static inline int | 1304 | static inline int |
| 913 | bidi_explicit_dir_char (int ch) | 1305 | bidi_explicit_dir_char (int ch) |
| @@ -934,19 +1326,36 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it) | |||
| 934 | int current_level; | 1326 | int current_level; |
| 935 | int new_level; | 1327 | int new_level; |
| 936 | bidi_dir_t override; | 1328 | bidi_dir_t override; |
| 1329 | int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring); | ||
| 937 | 1330 | ||
| 938 | /* If reseat()'ed, don't advance, so as to start iteration from the | 1331 | /* If reseat()'ed, don't advance, so as to start iteration from the |
| 939 | position where we were reseated. bidi_it->bytepos can be less | 1332 | position where we were reseated. bidi_it->bytepos can be less |
| 940 | than BEGV_BYTE after reseat to BEGV. */ | 1333 | than BEGV_BYTE after reseat to BEGV. */ |
| 941 | if (bidi_it->bytepos < BEGV_BYTE | 1334 | if (bidi_it->bytepos < (string_p ? 0 : BEGV_BYTE) |
| 942 | || bidi_it->first_elt) | 1335 | || bidi_it->first_elt) |
| 943 | { | 1336 | { |
| 944 | bidi_it->first_elt = 0; | 1337 | bidi_it->first_elt = 0; |
| 945 | if (bidi_it->charpos < BEGV) | 1338 | if (string_p) |
| 946 | bidi_it->charpos = BEGV; | 1339 | { |
| 947 | bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos); | 1340 | const unsigned char *p |
| 1341 | = (STRINGP (bidi_it->string.lstring) | ||
| 1342 | ? SDATA (bidi_it->string.lstring) | ||
| 1343 | : bidi_it->string.s); | ||
| 1344 | |||
| 1345 | if (bidi_it->charpos < 0) | ||
| 1346 | bidi_it->charpos = 0; | ||
| 1347 | bidi_it->bytepos = bidi_count_bytes (p, 0, 0, bidi_it->charpos, | ||
| 1348 | bidi_it->string.unibyte); | ||
| 1349 | } | ||
| 1350 | else | ||
| 1351 | { | ||
| 1352 | if (bidi_it->charpos < BEGV) | ||
| 1353 | bidi_it->charpos = BEGV; | ||
| 1354 | bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos); | ||
| 1355 | } | ||
| 948 | } | 1356 | } |
| 949 | else if (bidi_it->bytepos < ZV_BYTE) /* don't move at ZV */ | 1357 | /* Don't move at end of buffer/string. */ |
| 1358 | else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV)) | ||
| 950 | { | 1359 | { |
| 951 | /* Advance to the next character, skipping characters covered by | 1360 | /* Advance to the next character, skipping characters covered by |
| 952 | display strings (nchars > 1). */ | 1361 | display strings (nchars > 1). */ |
| @@ -962,12 +1371,13 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it) | |||
| 962 | override = bidi_it->level_stack[bidi_it->stack_idx].override; | 1371 | override = bidi_it->level_stack[bidi_it->stack_idx].override; |
| 963 | new_level = current_level; | 1372 | new_level = current_level; |
| 964 | 1373 | ||
| 965 | if (bidi_it->bytepos >= ZV_BYTE) | 1374 | if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV)) |
| 966 | { | 1375 | { |
| 967 | curchar = BIDI_EOB; | 1376 | curchar = BIDI_EOB; |
| 968 | bidi_it->ch_len = 1; | 1377 | bidi_it->ch_len = 1; |
| 969 | bidi_it->nchars = 1; | 1378 | bidi_it->nchars = 1; |
| 970 | bidi_it->disp_pos = ZV; | 1379 | bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV); |
| 1380 | bidi_it->disp_prop = 0; | ||
| 971 | } | 1381 | } |
| 972 | else | 1382 | else |
| 973 | { | 1383 | { |
| @@ -975,7 +1385,8 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it) | |||
| 975 | display string, treat the entire run of covered characters as | 1385 | display string, treat the entire run of covered characters as |
| 976 | a single character u+FFFC. */ | 1386 | a single character u+FFFC. */ |
| 977 | curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos, | 1387 | curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos, |
| 978 | &bidi_it->disp_pos, bidi_it->frame_window_p, | 1388 | &bidi_it->disp_pos, &bidi_it->disp_prop, |
| 1389 | &bidi_it->string, bidi_it->frame_window_p, | ||
| 979 | &bidi_it->ch_len, &bidi_it->nchars); | 1390 | &bidi_it->ch_len, &bidi_it->nchars); |
| 980 | } | 1391 | } |
| 981 | bidi_it->ch = curchar; | 1392 | bidi_it->ch = curchar; |
| @@ -1000,7 +1411,7 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it) | |||
| 1000 | bidi_it->type_after_w1 = type; | 1411 | bidi_it->type_after_w1 = type; |
| 1001 | bidi_check_type (bidi_it->type_after_w1); | 1412 | bidi_check_type (bidi_it->type_after_w1); |
| 1002 | type = WEAK_BN; /* X9/Retaining */ | 1413 | type = WEAK_BN; /* X9/Retaining */ |
| 1003 | if (bidi_it->ignore_bn_limit <= 0) | 1414 | if (bidi_it->ignore_bn_limit <= -1) |
| 1004 | { | 1415 | { |
| 1005 | if (current_level <= BIDI_MAXLEVEL - 4) | 1416 | if (current_level <= BIDI_MAXLEVEL - 4) |
| 1006 | { | 1417 | { |
| @@ -1025,7 +1436,8 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it) | |||
| 1025 | } | 1436 | } |
| 1026 | } | 1437 | } |
| 1027 | else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */ | 1438 | else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */ |
| 1028 | || bidi_it->next_en_pos > bidi_it->charpos) | 1439 | || (bidi_it->next_en_pos > bidi_it->charpos |
| 1440 | && bidi_it->next_en_type == WEAK_EN)) | ||
| 1029 | type = WEAK_EN; | 1441 | type = WEAK_EN; |
| 1030 | break; | 1442 | break; |
| 1031 | case LRE: /* X3 */ | 1443 | case LRE: /* X3 */ |
| @@ -1033,7 +1445,7 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it) | |||
| 1033 | bidi_it->type_after_w1 = type; | 1445 | bidi_it->type_after_w1 = type; |
| 1034 | bidi_check_type (bidi_it->type_after_w1); | 1446 | bidi_check_type (bidi_it->type_after_w1); |
| 1035 | type = WEAK_BN; /* X9/Retaining */ | 1447 | type = WEAK_BN; /* X9/Retaining */ |
| 1036 | if (bidi_it->ignore_bn_limit <= 0) | 1448 | if (bidi_it->ignore_bn_limit <= -1) |
| 1037 | { | 1449 | { |
| 1038 | if (current_level <= BIDI_MAXLEVEL - 5) | 1450 | if (current_level <= BIDI_MAXLEVEL - 5) |
| 1039 | { | 1451 | { |
| @@ -1061,14 +1473,15 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it) | |||
| 1061 | } | 1473 | } |
| 1062 | } | 1474 | } |
| 1063 | else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */ | 1475 | else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */ |
| 1064 | || bidi_it->next_en_pos > bidi_it->charpos) | 1476 | || (bidi_it->next_en_pos > bidi_it->charpos |
| 1477 | && bidi_it->next_en_type == WEAK_EN)) | ||
| 1065 | type = WEAK_EN; | 1478 | type = WEAK_EN; |
| 1066 | break; | 1479 | break; |
| 1067 | case PDF: /* X7 */ | 1480 | case PDF: /* X7 */ |
| 1068 | bidi_it->type_after_w1 = type; | 1481 | bidi_it->type_after_w1 = type; |
| 1069 | bidi_check_type (bidi_it->type_after_w1); | 1482 | bidi_check_type (bidi_it->type_after_w1); |
| 1070 | type = WEAK_BN; /* X9/Retaining */ | 1483 | type = WEAK_BN; /* X9/Retaining */ |
| 1071 | if (bidi_it->ignore_bn_limit <= 0) | 1484 | if (bidi_it->ignore_bn_limit <= -1) |
| 1072 | { | 1485 | { |
| 1073 | if (!bidi_it->invalid_rl_levels) | 1486 | if (!bidi_it->invalid_rl_levels) |
| 1074 | { | 1487 | { |
| @@ -1087,7 +1500,8 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it) | |||
| 1087 | } | 1500 | } |
| 1088 | } | 1501 | } |
| 1089 | else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */ | 1502 | else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */ |
| 1090 | || bidi_it->next_en_pos > bidi_it->charpos) | 1503 | || (bidi_it->next_en_pos > bidi_it->charpos |
| 1504 | && bidi_it->next_en_type == WEAK_EN)) | ||
| 1091 | type = WEAK_EN; | 1505 | type = WEAK_EN; |
| 1092 | break; | 1506 | break; |
| 1093 | default: | 1507 | default: |
| @@ -1111,13 +1525,19 @@ bidi_resolve_explicit (struct bidi_it *bidi_it) | |||
| 1111 | { | 1525 | { |
| 1112 | int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level; | 1526 | int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level; |
| 1113 | int new_level = bidi_resolve_explicit_1 (bidi_it); | 1527 | int new_level = bidi_resolve_explicit_1 (bidi_it); |
| 1528 | EMACS_INT eob = bidi_it->string.s ? bidi_it->string.schars : ZV; | ||
| 1529 | const unsigned char *s | ||
| 1530 | = (STRINGP (bidi_it->string.lstring) | ||
| 1531 | ? SDATA (bidi_it->string.lstring) | ||
| 1532 | : bidi_it->string.s); | ||
| 1114 | 1533 | ||
| 1115 | if (prev_level < new_level | 1534 | if (prev_level < new_level |
| 1116 | && bidi_it->type == WEAK_BN | 1535 | && bidi_it->type == WEAK_BN |
| 1117 | && bidi_it->ignore_bn_limit == 0 /* only if not already known */ | 1536 | && bidi_it->ignore_bn_limit == -1 /* only if not already known */ |
| 1118 | && bidi_it->bytepos < ZV_BYTE /* not already at EOB */ | 1537 | && bidi_it->charpos < eob /* not already at EOB */ |
| 1119 | && bidi_explicit_dir_char (FETCH_MULTIBYTE_CHAR (bidi_it->bytepos | 1538 | && bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos |
| 1120 | + bidi_it->ch_len))) | 1539 | + bidi_it->ch_len, s, |
| 1540 | bidi_it->string.unibyte))) | ||
| 1121 | { | 1541 | { |
| 1122 | /* Avoid pushing and popping embedding levels if the level run | 1542 | /* Avoid pushing and popping embedding levels if the level run |
| 1123 | is empty, as this breaks level runs where it shouldn't. | 1543 | is empty, as this breaks level runs where it shouldn't. |
| @@ -1129,12 +1549,17 @@ bidi_resolve_explicit (struct bidi_it *bidi_it) | |||
| 1129 | 1549 | ||
| 1130 | bidi_copy_it (&saved_it, bidi_it); | 1550 | bidi_copy_it (&saved_it, bidi_it); |
| 1131 | 1551 | ||
| 1132 | while (bidi_explicit_dir_char (FETCH_MULTIBYTE_CHAR (bidi_it->bytepos | 1552 | while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos |
| 1133 | + bidi_it->ch_len))) | 1553 | + bidi_it->ch_len, s, |
| 1554 | bidi_it->string.unibyte))) | ||
| 1134 | { | 1555 | { |
| 1135 | /* This advances to the next character, skipping any | 1556 | /* This advances to the next character, skipping any |
| 1136 | characters covered by display strings. */ | 1557 | characters covered by display strings. */ |
| 1137 | level = bidi_resolve_explicit_1 (bidi_it); | 1558 | level = bidi_resolve_explicit_1 (bidi_it); |
| 1559 | /* If string.lstring was relocated inside bidi_resolve_explicit_1, | ||
| 1560 | a pointer to its data is no longer valid. */ | ||
| 1561 | if (STRINGP (bidi_it->string.lstring)) | ||
| 1562 | s = SDATA (bidi_it->string.lstring); | ||
| 1138 | } | 1563 | } |
| 1139 | 1564 | ||
| 1140 | if (bidi_it->nchars <= 0) | 1565 | if (bidi_it->nchars <= 0) |
| @@ -1142,10 +1567,10 @@ bidi_resolve_explicit (struct bidi_it *bidi_it) | |||
| 1142 | if (level == prev_level) /* empty embedding */ | 1567 | if (level == prev_level) /* empty embedding */ |
| 1143 | saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars; | 1568 | saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars; |
| 1144 | else /* this embedding is non-empty */ | 1569 | else /* this embedding is non-empty */ |
| 1145 | saved_it.ignore_bn_limit = -1; | 1570 | saved_it.ignore_bn_limit = -2; |
| 1146 | 1571 | ||
| 1147 | bidi_copy_it (bidi_it, &saved_it); | 1572 | bidi_copy_it (bidi_it, &saved_it); |
| 1148 | if (bidi_it->ignore_bn_limit > 0) | 1573 | if (bidi_it->ignore_bn_limit > -1) |
| 1149 | { | 1574 | { |
| 1150 | /* We pushed a level, but we shouldn't have. Undo that. */ | 1575 | /* We pushed a level, but we shouldn't have. Undo that. */ |
| 1151 | if (!bidi_it->invalid_rl_levels) | 1576 | if (!bidi_it->invalid_rl_levels) |
| @@ -1188,6 +1613,9 @@ bidi_resolve_weak (struct bidi_it *bidi_it) | |||
| 1188 | int next_char; | 1613 | int next_char; |
| 1189 | bidi_type_t type_of_next; | 1614 | bidi_type_t type_of_next; |
| 1190 | struct bidi_it saved_it; | 1615 | struct bidi_it saved_it; |
| 1616 | EMACS_INT eob | ||
| 1617 | = ((STRINGP (bidi_it->string.lstring) || bidi_it->string.s) | ||
| 1618 | ? bidi_it->string.schars : ZV); | ||
| 1191 | 1619 | ||
| 1192 | type = bidi_it->type; | 1620 | type = bidi_it->type; |
| 1193 | override = bidi_it->level_stack[bidi_it->stack_idx].override; | 1621 | override = bidi_it->level_stack[bidi_it->stack_idx].override; |
| @@ -1254,10 +1682,15 @@ bidi_resolve_weak (struct bidi_it *bidi_it) | |||
| 1254 | && bidi_it->prev.orig_type == WEAK_EN) | 1682 | && bidi_it->prev.orig_type == WEAK_EN) |
| 1255 | || bidi_it->prev.type_after_w1 == WEAK_AN))) | 1683 | || bidi_it->prev.type_after_w1 == WEAK_AN))) |
| 1256 | { | 1684 | { |
| 1257 | next_char = | 1685 | const unsigned char *s |
| 1258 | bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE | 1686 | = (STRINGP (bidi_it->string.lstring) |
| 1259 | ? BIDI_EOB : FETCH_MULTIBYTE_CHAR (bidi_it->bytepos | 1687 | ? SDATA (bidi_it->string.lstring) |
| 1260 | + bidi_it->ch_len); | 1688 | : bidi_it->string.s); |
| 1689 | |||
| 1690 | next_char = (bidi_it->charpos + bidi_it->nchars >= eob | ||
| 1691 | ? BIDI_EOB | ||
| 1692 | : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, | ||
| 1693 | s, bidi_it->string.unibyte)); | ||
| 1261 | type_of_next = bidi_get_type (next_char, override); | 1694 | type_of_next = bidi_get_type (next_char, override); |
| 1262 | 1695 | ||
| 1263 | if (type_of_next == WEAK_BN | 1696 | if (type_of_next == WEAK_BN |
| @@ -1300,19 +1733,28 @@ bidi_resolve_weak (struct bidi_it *bidi_it) | |||
| 1300 | else if (type == WEAK_ET /* W5: ET with EN before or after it */ | 1733 | else if (type == WEAK_ET /* W5: ET with EN before or after it */ |
| 1301 | || type == WEAK_BN) /* W5/Retaining */ | 1734 | || type == WEAK_BN) /* W5/Retaining */ |
| 1302 | { | 1735 | { |
| 1303 | if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */ | 1736 | if (bidi_it->prev.type_after_w1 == WEAK_EN) /* ET/BN w/EN before it */ |
| 1304 | || bidi_it->next_en_pos > bidi_it->charpos) | ||
| 1305 | type = WEAK_EN; | 1737 | type = WEAK_EN; |
| 1306 | else /* W5: ET/BN with EN after it. */ | 1738 | else if (bidi_it->next_en_pos > bidi_it->charpos |
| 1739 | && bidi_it->next_en_type != WEAK_BN) | ||
| 1740 | { | ||
| 1741 | if (bidi_it->next_en_type == WEAK_EN) /* ET/BN with EN after it */ | ||
| 1742 | type = WEAK_EN; | ||
| 1743 | } | ||
| 1744 | else if (bidi_it->next_en_pos >=0) | ||
| 1307 | { | 1745 | { |
| 1308 | EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars; | 1746 | EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars; |
| 1747 | const unsigned char *s = (STRINGP (bidi_it->string.lstring) | ||
| 1748 | ? SDATA (bidi_it->string.lstring) | ||
| 1749 | : bidi_it->string.s); | ||
| 1309 | 1750 | ||
| 1310 | if (bidi_it->nchars <= 0) | 1751 | if (bidi_it->nchars <= 0) |
| 1311 | abort (); | 1752 | abort (); |
| 1312 | next_char = | 1753 | next_char |
| 1313 | bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE | 1754 | = (bidi_it->charpos + bidi_it->nchars >= eob |
| 1314 | ? BIDI_EOB : FETCH_MULTIBYTE_CHAR (bidi_it->bytepos | 1755 | ? BIDI_EOB |
| 1315 | + bidi_it->ch_len); | 1756 | : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s, |
| 1757 | bidi_it->string.unibyte)); | ||
| 1316 | type_of_next = bidi_get_type (next_char, override); | 1758 | type_of_next = bidi_get_type (next_char, override); |
| 1317 | 1759 | ||
| 1318 | if (type_of_next == WEAK_ET | 1760 | if (type_of_next == WEAK_ET |
| @@ -1328,20 +1770,27 @@ bidi_resolve_weak (struct bidi_it *bidi_it) | |||
| 1328 | en_pos = bidi_it->charpos; | 1770 | en_pos = bidi_it->charpos; |
| 1329 | bidi_copy_it (bidi_it, &saved_it); | 1771 | bidi_copy_it (bidi_it, &saved_it); |
| 1330 | } | 1772 | } |
| 1773 | /* Remember this position, to speed up processing of the | ||
| 1774 | next ETs. */ | ||
| 1775 | bidi_it->next_en_pos = en_pos; | ||
| 1331 | if (type_of_next == WEAK_EN) | 1776 | if (type_of_next == WEAK_EN) |
| 1332 | { | 1777 | { |
| 1333 | /* If the last strong character is AL, the EN we've | 1778 | /* If the last strong character is AL, the EN we've |
| 1334 | found will become AN when we get to it (W2). */ | 1779 | found will become AN when we get to it (W2). */ |
| 1335 | if (bidi_it->last_strong.type_after_w1 != STRONG_AL) | 1780 | if (bidi_it->last_strong.type_after_w1 == STRONG_AL) |
| 1336 | { | 1781 | type_of_next = WEAK_AN; |
| 1337 | type = WEAK_EN; | ||
| 1338 | /* Remember this EN position, to speed up processing | ||
| 1339 | of the next ETs. */ | ||
| 1340 | bidi_it->next_en_pos = en_pos; | ||
| 1341 | } | ||
| 1342 | else if (type == WEAK_BN) | 1782 | else if (type == WEAK_BN) |
| 1343 | type = NEUTRAL_ON; /* W6/Retaining */ | 1783 | type = NEUTRAL_ON; /* W6/Retaining */ |
| 1784 | else | ||
| 1785 | type = WEAK_EN; | ||
| 1344 | } | 1786 | } |
| 1787 | else if (type_of_next == NEUTRAL_B) | ||
| 1788 | /* Record the fact that there are no more ENs from | ||
| 1789 | here to the end of paragraph, to avoid entering the | ||
| 1790 | loop above ever again in this paragraph. */ | ||
| 1791 | bidi_it->next_en_pos = -1; | ||
| 1792 | /* Record the type of the character where we ended our search. */ | ||
| 1793 | bidi_it->next_en_type = type_of_next; | ||
| 1345 | } | 1794 | } |
| 1346 | } | 1795 | } |
| 1347 | } | 1796 | } |
| @@ -1373,6 +1822,25 @@ bidi_resolve_weak (struct bidi_it *bidi_it) | |||
| 1373 | return type; | 1822 | return type; |
| 1374 | } | 1823 | } |
| 1375 | 1824 | ||
| 1825 | /* Resolve the type of a neutral character according to the type of | ||
| 1826 | surrounding strong text and the current embedding level. */ | ||
| 1827 | static inline bidi_type_t | ||
| 1828 | bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev) | ||
| 1829 | { | ||
| 1830 | /* N1: European and Arabic numbers are treated as though they were R. */ | ||
| 1831 | if (next_type == WEAK_EN || next_type == WEAK_AN) | ||
| 1832 | next_type = STRONG_R; | ||
| 1833 | if (prev_type == WEAK_EN || prev_type == WEAK_AN) | ||
| 1834 | prev_type = STRONG_R; | ||
| 1835 | |||
| 1836 | if (next_type == prev_type) /* N1 */ | ||
| 1837 | return next_type; | ||
| 1838 | else if ((lev & 1) == 0) /* N2 */ | ||
| 1839 | return STRONG_L; | ||
| 1840 | else | ||
| 1841 | return STRONG_R; | ||
| 1842 | } | ||
| 1843 | |||
| 1376 | static bidi_type_t | 1844 | static bidi_type_t |
| 1377 | bidi_resolve_neutral (struct bidi_it *bidi_it) | 1845 | bidi_resolve_neutral (struct bidi_it *bidi_it) |
| 1378 | { | 1846 | { |
| @@ -1391,13 +1859,45 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) | |||
| 1391 | || type == NEUTRAL_ON)) | 1859 | || type == NEUTRAL_ON)) |
| 1392 | abort (); | 1860 | abort (); |
| 1393 | 1861 | ||
| 1394 | if (bidi_get_category (type) == NEUTRAL | 1862 | if ((type != NEUTRAL_B /* Don't risk entering the long loop below if |
| 1863 | we are already at paragraph end. */ | ||
| 1864 | && bidi_get_category (type) == NEUTRAL) | ||
| 1395 | || (type == WEAK_BN && prev_level == current_level)) | 1865 | || (type == WEAK_BN && prev_level == current_level)) |
| 1396 | { | 1866 | { |
| 1397 | if (bidi_it->next_for_neutral.type != UNKNOWN_BT) | 1867 | if (bidi_it->next_for_neutral.type != UNKNOWN_BT) |
| 1398 | type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, | 1868 | type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, |
| 1399 | bidi_it->next_for_neutral.type, | 1869 | bidi_it->next_for_neutral.type, |
| 1400 | current_level); | 1870 | current_level); |
| 1871 | /* The next two "else if" clauses are shortcuts for the | ||
| 1872 | important special case when we have a long sequence of | ||
| 1873 | neutral or WEAK_BN characters, such as whitespace or nulls or | ||
| 1874 | other control characters, on the base embedding level of the | ||
| 1875 | paragraph, and that sequence goes all the way to the end of | ||
| 1876 | the paragraph and follows a character whose resolved | ||
| 1877 | directionality is identical to the base embedding level. | ||
| 1878 | (This is what happens in a buffer with plain L2R text that | ||
| 1879 | happens to include long sequences of control characters.) By | ||
| 1880 | virtue of N1, the result of examining this long sequence will | ||
| 1881 | always be either STRONG_L or STRONG_R, depending on the base | ||
| 1882 | embedding level. So we use this fact directly instead of | ||
| 1883 | entering the expensive loop in the "else" clause. */ | ||
| 1884 | else if (current_level == 0 | ||
| 1885 | && bidi_it->prev_for_neutral.type == STRONG_L | ||
| 1886 | && !bidi_explicit_dir_char (bidi_it->ch)) | ||
| 1887 | type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, | ||
| 1888 | STRONG_L, current_level); | ||
| 1889 | else if (/* current level is 1 */ | ||
| 1890 | current_level == 1 | ||
| 1891 | /* base embedding level is also 1 */ | ||
| 1892 | && bidi_it->level_stack[0].level == 1 | ||
| 1893 | /* previous character is one of those considered R for | ||
| 1894 | the purposes of W5 */ | ||
| 1895 | && (bidi_it->prev_for_neutral.type == STRONG_R | ||
| 1896 | || bidi_it->prev_for_neutral.type == WEAK_EN | ||
| 1897 | || bidi_it->prev_for_neutral.type == WEAK_AN) | ||
| 1898 | && !bidi_explicit_dir_char (bidi_it->ch)) | ||
| 1899 | type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, | ||
| 1900 | STRONG_R, current_level); | ||
| 1401 | else | 1901 | else |
| 1402 | { | 1902 | { |
| 1403 | /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in | 1903 | /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in |
| @@ -1438,8 +1938,8 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) | |||
| 1438 | && bidi_get_category (type) != NEUTRAL) | 1938 | && bidi_get_category (type) != NEUTRAL) |
| 1439 | /* This is all per level run, so stop when we | 1939 | /* This is all per level run, so stop when we |
| 1440 | reach the end of this level run. */ | 1940 | reach the end of this level run. */ |
| 1441 | || bidi_it->level_stack[bidi_it->stack_idx].level != | 1941 | || (bidi_it->level_stack[bidi_it->stack_idx].level |
| 1442 | current_level)); | 1942 | != current_level))); |
| 1443 | 1943 | ||
| 1444 | bidi_remember_char (&saved_it.next_for_neutral, bidi_it); | 1944 | bidi_remember_char (&saved_it.next_for_neutral, bidi_it); |
| 1445 | 1945 | ||
| @@ -1448,6 +1948,9 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) | |||
| 1448 | case STRONG_L: | 1948 | case STRONG_L: |
| 1449 | case STRONG_R: | 1949 | case STRONG_R: |
| 1450 | case STRONG_AL: | 1950 | case STRONG_AL: |
| 1951 | /* Actually, STRONG_AL cannot happen here, because | ||
| 1952 | bidi_resolve_weak converts it to STRONG_R, per W3. */ | ||
| 1953 | xassert (type != STRONG_AL); | ||
| 1451 | next_type = type; | 1954 | next_type = type; |
| 1452 | break; | 1955 | break; |
| 1453 | case WEAK_EN: | 1956 | case WEAK_EN: |
| @@ -1455,7 +1958,6 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) | |||
| 1455 | /* N1: ``European and Arabic numbers are treated as | 1958 | /* N1: ``European and Arabic numbers are treated as |
| 1456 | though they were R.'' */ | 1959 | though they were R.'' */ |
| 1457 | next_type = STRONG_R; | 1960 | next_type = STRONG_R; |
| 1458 | saved_it.next_for_neutral.type = STRONG_R; | ||
| 1459 | break; | 1961 | break; |
| 1460 | case WEAK_BN: | 1962 | case WEAK_BN: |
| 1461 | if (!bidi_explicit_dir_char (bidi_it->ch)) | 1963 | if (!bidi_explicit_dir_char (bidi_it->ch)) |
| @@ -1468,11 +1970,7 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) | |||
| 1468 | member. */ | 1970 | member. */ |
| 1469 | if (saved_it.type != WEAK_BN | 1971 | if (saved_it.type != WEAK_BN |
| 1470 | || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL) | 1972 | || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL) |
| 1471 | { | 1973 | next_type = bidi_it->prev_for_neutral.type; |
| 1472 | next_type = bidi_it->prev_for_neutral.type; | ||
| 1473 | saved_it.next_for_neutral.type = next_type; | ||
| 1474 | bidi_check_type (next_type); | ||
| 1475 | } | ||
| 1476 | else | 1974 | else |
| 1477 | { | 1975 | { |
| 1478 | /* This is a BN which does not adjoin neutrals. | 1976 | /* This is a BN which does not adjoin neutrals. |
| @@ -1486,7 +1984,9 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) | |||
| 1486 | } | 1984 | } |
| 1487 | type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type, | 1985 | type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type, |
| 1488 | next_type, current_level); | 1986 | next_type, current_level); |
| 1987 | saved_it.next_for_neutral.type = next_type; | ||
| 1489 | saved_it.type = type; | 1988 | saved_it.type = type; |
| 1989 | bidi_check_type (next_type); | ||
| 1490 | bidi_check_type (type); | 1990 | bidi_check_type (type); |
| 1491 | bidi_copy_it (bidi_it, &saved_it); | 1991 | bidi_copy_it (bidi_it, &saved_it); |
| 1492 | } | 1992 | } |
| @@ -1509,11 +2009,11 @@ bidi_type_of_next_char (struct bidi_it *bidi_it) | |||
| 1509 | 2009 | ||
| 1510 | /* Reset the limit until which to ignore BNs if we step out of the | 2010 | /* Reset the limit until which to ignore BNs if we step out of the |
| 1511 | area where we found only empty levels. */ | 2011 | area where we found only empty levels. */ |
| 1512 | if ((bidi_it->ignore_bn_limit > 0 | 2012 | if ((bidi_it->ignore_bn_limit > -1 |
| 1513 | && bidi_it->ignore_bn_limit <= bidi_it->charpos) | 2013 | && bidi_it->ignore_bn_limit <= bidi_it->charpos) |
| 1514 | || (bidi_it->ignore_bn_limit == -1 | 2014 | || (bidi_it->ignore_bn_limit == -2 |
| 1515 | && !bidi_explicit_dir_char (bidi_it->ch))) | 2015 | && !bidi_explicit_dir_char (bidi_it->ch))) |
| 1516 | bidi_it->ignore_bn_limit = 0; | 2016 | bidi_it->ignore_bn_limit = -1; |
| 1517 | 2017 | ||
| 1518 | type = bidi_resolve_neutral (bidi_it); | 2018 | type = bidi_resolve_neutral (bidi_it); |
| 1519 | 2019 | ||
| @@ -1530,12 +2030,16 @@ bidi_level_of_next_char (struct bidi_it *bidi_it) | |||
| 1530 | bidi_type_t type; | 2030 | bidi_type_t type; |
| 1531 | int level, prev_level = -1; | 2031 | int level, prev_level = -1; |
| 1532 | struct bidi_saved_info next_for_neutral; | 2032 | struct bidi_saved_info next_for_neutral; |
| 1533 | EMACS_INT next_char_pos; | 2033 | EMACS_INT next_char_pos = -2; |
| 1534 | 2034 | ||
| 1535 | if (bidi_it->scan_dir == 1) | 2035 | if (bidi_it->scan_dir == 1) |
| 1536 | { | 2036 | { |
| 2037 | EMACS_INT eob | ||
| 2038 | = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring)) | ||
| 2039 | ? bidi_it->string.schars : ZV); | ||
| 2040 | |||
| 1537 | /* There's no sense in trying to advance if we hit end of text. */ | 2041 | /* There's no sense in trying to advance if we hit end of text. */ |
| 1538 | if (bidi_it->bytepos >= ZV_BYTE) | 2042 | if (bidi_it->charpos >= eob) |
| 1539 | return bidi_it->resolved_level; | 2043 | return bidi_it->resolved_level; |
| 1540 | 2044 | ||
| 1541 | /* Record the info about the previous character. */ | 2045 | /* Record the info about the previous character. */ |
| @@ -1558,7 +2062,10 @@ bidi_level_of_next_char (struct bidi_it *bidi_it) | |||
| 1558 | bidi_it->next_for_neutral.type = UNKNOWN_BT; | 2062 | bidi_it->next_for_neutral.type = UNKNOWN_BT; |
| 1559 | if (bidi_it->next_en_pos >= 0 | 2063 | if (bidi_it->next_en_pos >= 0 |
| 1560 | && bidi_it->charpos >= bidi_it->next_en_pos) | 2064 | && bidi_it->charpos >= bidi_it->next_en_pos) |
| 1561 | bidi_it->next_en_pos = -1; | 2065 | { |
| 2066 | bidi_it->next_en_pos = 0; | ||
| 2067 | bidi_it->next_en_type = UNKNOWN_BT; | ||
| 2068 | } | ||
| 1562 | if (bidi_it->next_for_ws.type != UNKNOWN_BT | 2069 | if (bidi_it->next_for_ws.type != UNKNOWN_BT |
| 1563 | && bidi_it->charpos >= bidi_it->next_for_ws.charpos) | 2070 | && bidi_it->charpos >= bidi_it->next_for_ws.charpos) |
| 1564 | bidi_it->next_for_ws.type = UNKNOWN_BT; | 2071 | bidi_it->next_for_ws.type = UNKNOWN_BT; |
| @@ -1575,17 +2082,26 @@ bidi_level_of_next_char (struct bidi_it *bidi_it) | |||
| 1575 | /* Perhaps the character we want is already cached. If it is, the | 2082 | /* Perhaps the character we want is already cached. If it is, the |
| 1576 | call to bidi_cache_find below will return a type other than | 2083 | call to bidi_cache_find below will return a type other than |
| 1577 | UNKNOWN_BT. */ | 2084 | UNKNOWN_BT. */ |
| 1578 | if (bidi_cache_idx && !bidi_it->first_elt) | 2085 | if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt) |
| 1579 | { | 2086 | { |
| 2087 | int bob = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring)) | ||
| 2088 | ? 0 : 1); | ||
| 1580 | if (bidi_it->scan_dir > 0) | 2089 | if (bidi_it->scan_dir > 0) |
| 1581 | { | 2090 | { |
| 1582 | if (bidi_it->nchars <= 0) | 2091 | if (bidi_it->nchars <= 0) |
| 1583 | abort (); | 2092 | abort (); |
| 1584 | next_char_pos = bidi_it->charpos + bidi_it->nchars; | 2093 | next_char_pos = bidi_it->charpos + bidi_it->nchars; |
| 1585 | } | 2094 | } |
| 1586 | else | 2095 | else if (bidi_it->charpos >= bob) |
| 2096 | /* Implementation note: we allow next_char_pos to be as low as | ||
| 2097 | 0 for buffers or -1 for strings, and that is okay because | ||
| 2098 | that's the "position" of the sentinel iterator state we | ||
| 2099 | cached at the beginning of the iteration. */ | ||
| 1587 | next_char_pos = bidi_it->charpos - 1; | 2100 | next_char_pos = bidi_it->charpos - 1; |
| 1588 | type = bidi_cache_find (next_char_pos, -1, bidi_it); | 2101 | if (next_char_pos >= bob - 1) |
| 2102 | type = bidi_cache_find (next_char_pos, -1, bidi_it); | ||
| 2103 | else | ||
| 2104 | type = UNKNOWN_BT; | ||
| 1589 | } | 2105 | } |
| 1590 | else | 2106 | else |
| 1591 | type = UNKNOWN_BT; | 2107 | type = UNKNOWN_BT; |
| @@ -1652,15 +2168,17 @@ bidi_level_of_next_char (struct bidi_it *bidi_it) | |||
| 1652 | EMACS_INT cpos = bidi_it->charpos; | 2168 | EMACS_INT cpos = bidi_it->charpos; |
| 1653 | EMACS_INT disp_pos = bidi_it->disp_pos; | 2169 | EMACS_INT disp_pos = bidi_it->disp_pos; |
| 1654 | EMACS_INT nc = bidi_it->nchars; | 2170 | EMACS_INT nc = bidi_it->nchars; |
| 2171 | struct bidi_string_data bs = bidi_it->string; | ||
| 1655 | bidi_type_t chtype; | 2172 | bidi_type_t chtype; |
| 1656 | int fwp = bidi_it->frame_window_p; | 2173 | int fwp = bidi_it->frame_window_p; |
| 2174 | int dpp = bidi_it->disp_prop; | ||
| 1657 | 2175 | ||
| 1658 | if (bidi_it->nchars <= 0) | 2176 | if (bidi_it->nchars <= 0) |
| 1659 | abort (); | 2177 | abort (); |
| 1660 | do { | 2178 | do { |
| 1661 | ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, fwp, | 2179 | ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, &dpp, &bs, |
| 1662 | &clen, &nc); | 2180 | fwp, &clen, &nc); |
| 1663 | if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */) | 2181 | if (ch == '\n' || ch == BIDI_EOB) |
| 1664 | chtype = NEUTRAL_B; | 2182 | chtype = NEUTRAL_B; |
| 1665 | else | 2183 | else |
| 1666 | chtype = bidi_get_type (ch, NEUTRAL_DIR); | 2184 | chtype = bidi_get_type (ch, NEUTRAL_DIR); |
| @@ -1673,7 +2191,7 @@ bidi_level_of_next_char (struct bidi_it *bidi_it) | |||
| 1673 | } | 2191 | } |
| 1674 | 2192 | ||
| 1675 | /* Resolve implicit levels, with a twist: PDFs get the embedding | 2193 | /* Resolve implicit levels, with a twist: PDFs get the embedding |
| 1676 | level of the enbedding they terminate. See below for the | 2194 | level of the embedding they terminate. See below for the |
| 1677 | reason. */ | 2195 | reason. */ |
| 1678 | if (bidi_it->orig_type == PDF | 2196 | if (bidi_it->orig_type == PDF |
| 1679 | /* Don't do this if this formatting code didn't change the | 2197 | /* Don't do this if this formatting code didn't change the |
| @@ -1710,7 +2228,6 @@ bidi_level_of_next_char (struct bidi_it *bidi_it) | |||
| 1710 | else if (bidi_it->orig_type == NEUTRAL_B /* L1 */ | 2228 | else if (bidi_it->orig_type == NEUTRAL_B /* L1 */ |
| 1711 | || bidi_it->orig_type == NEUTRAL_S | 2229 | || bidi_it->orig_type == NEUTRAL_S |
| 1712 | || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB | 2230 | || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB |
| 1713 | /* || bidi_it->ch == LINESEP_CHAR */ | ||
| 1714 | || (bidi_it->orig_type == NEUTRAL_WS | 2231 | || (bidi_it->orig_type == NEUTRAL_WS |
| 1715 | && (bidi_it->next_for_ws.type == NEUTRAL_B | 2232 | && (bidi_it->next_for_ws.type == NEUTRAL_B |
| 1716 | || bidi_it->next_for_ws.type == NEUTRAL_S))) | 2233 | || bidi_it->next_for_ws.type == NEUTRAL_S))) |
| @@ -1756,10 +2273,11 @@ static void | |||
| 1756 | bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag) | 2273 | bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag) |
| 1757 | { | 2274 | { |
| 1758 | int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir; | 2275 | int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir; |
| 1759 | int idx; | 2276 | ptrdiff_t idx; |
| 1760 | 2277 | ||
| 1761 | /* Try the cache first. */ | 2278 | /* Try the cache first. */ |
| 1762 | if ((idx = bidi_cache_find_level_change (level, dir, end_flag)) >= 0) | 2279 | if ((idx = bidi_cache_find_level_change (level, dir, end_flag)) |
| 2280 | >= bidi_cache_start) | ||
| 1763 | bidi_cache_fetch_state (idx, bidi_it); | 2281 | bidi_cache_fetch_state (idx, bidi_it); |
| 1764 | else | 2282 | else |
| 1765 | { | 2283 | { |
| @@ -1781,20 +2299,30 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it) | |||
| 1781 | { | 2299 | { |
| 1782 | int old_level, new_level, next_level; | 2300 | int old_level, new_level, next_level; |
| 1783 | struct bidi_it sentinel; | 2301 | struct bidi_it sentinel; |
| 2302 | struct gcpro gcpro1; | ||
| 2303 | |||
| 2304 | if (bidi_it->charpos < 0 || bidi_it->bytepos < 0) | ||
| 2305 | abort (); | ||
| 1784 | 2306 | ||
| 1785 | if (bidi_it->scan_dir == 0) | 2307 | if (bidi_it->scan_dir == 0) |
| 1786 | { | 2308 | { |
| 1787 | bidi_it->scan_dir = 1; /* default to logical order */ | 2309 | bidi_it->scan_dir = 1; /* default to logical order */ |
| 1788 | } | 2310 | } |
| 1789 | 2311 | ||
| 2312 | /* The code below can call eval, and thus cause GC. If we are | ||
| 2313 | iterating a Lisp string, make sure it won't be GCed. */ | ||
| 2314 | if (STRINGP (bidi_it->string.lstring)) | ||
| 2315 | GCPRO1 (bidi_it->string.lstring); | ||
| 2316 | |||
| 1790 | /* If we just passed a newline, initialize for the next line. */ | 2317 | /* If we just passed a newline, initialize for the next line. */ |
| 1791 | if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B) | 2318 | if (!bidi_it->first_elt |
| 2319 | && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB)) | ||
| 1792 | bidi_line_init (bidi_it); | 2320 | bidi_line_init (bidi_it); |
| 1793 | 2321 | ||
| 1794 | /* Prepare the sentinel iterator state, and cache it. When we bump | 2322 | /* Prepare the sentinel iterator state, and cache it. When we bump |
| 1795 | into it, scanning backwards, we'll know that the last non-base | 2323 | into it, scanning backwards, we'll know that the last non-base |
| 1796 | level is exhausted. */ | 2324 | level is exhausted. */ |
| 1797 | if (bidi_cache_idx == 0) | 2325 | if (bidi_cache_idx == bidi_cache_start) |
| 1798 | { | 2326 | { |
| 1799 | bidi_copy_it (&sentinel, bidi_it); | 2327 | bidi_copy_it (&sentinel, bidi_it); |
| 1800 | if (bidi_it->first_elt) | 2328 | if (bidi_it->first_elt) |
| @@ -1870,25 +2398,34 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it) | |||
| 1870 | _before_ we process the paragraph's text, since the base | 2398 | _before_ we process the paragraph's text, since the base |
| 1871 | direction affects the reordering. */ | 2399 | direction affects the reordering. */ |
| 1872 | if (bidi_it->scan_dir == 1 | 2400 | if (bidi_it->scan_dir == 1 |
| 1873 | && bidi_it->orig_type == NEUTRAL_B | 2401 | && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB)) |
| 1874 | && bidi_it->bytepos < ZV_BYTE) | ||
| 1875 | { | 2402 | { |
| 1876 | EMACS_INT sep_len = | 2403 | /* The paragraph direction of the entire string, once |
| 1877 | bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars, | 2404 | determined, is in effect for the entire string. Setting the |
| 1878 | bidi_it->bytepos + bidi_it->ch_len); | 2405 | separator limit to the end of the string prevents |
| 1879 | if (bidi_it->nchars <= 0) | 2406 | bidi_paragraph_init from being called automatically on this |
| 1880 | abort (); | 2407 | string. */ |
| 1881 | if (sep_len >= 0) | 2408 | if (bidi_it->string.s || STRINGP (bidi_it->string.lstring)) |
| 2409 | bidi_it->separator_limit = bidi_it->string.schars; | ||
| 2410 | else if (bidi_it->bytepos < ZV_BYTE) | ||
| 1882 | { | 2411 | { |
| 1883 | bidi_it->new_paragraph = 1; | 2412 | EMACS_INT sep_len |
| 1884 | /* Record the buffer position of the last character of the | 2413 | = bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars, |
| 1885 | paragraph separator. */ | 2414 | bidi_it->bytepos + bidi_it->ch_len); |
| 1886 | bidi_it->separator_limit = | 2415 | if (bidi_it->nchars <= 0) |
| 1887 | bidi_it->charpos + bidi_it->nchars + sep_len; | 2416 | abort (); |
| 2417 | if (sep_len >= 0) | ||
| 2418 | { | ||
| 2419 | bidi_it->new_paragraph = 1; | ||
| 2420 | /* Record the buffer position of the last character of the | ||
| 2421 | paragraph separator. */ | ||
| 2422 | bidi_it->separator_limit | ||
| 2423 | = bidi_it->charpos + bidi_it->nchars + sep_len; | ||
| 2424 | } | ||
| 1888 | } | 2425 | } |
| 1889 | } | 2426 | } |
| 1890 | 2427 | ||
| 1891 | if (bidi_it->scan_dir == 1 && bidi_cache_idx) | 2428 | if (bidi_it->scan_dir == 1 && bidi_cache_idx > bidi_cache_start) |
| 1892 | { | 2429 | { |
| 1893 | /* If we are at paragraph's base embedding level and beyond the | 2430 | /* If we are at paragraph's base embedding level and beyond the |
| 1894 | last cached position, the cache's job is done and we can | 2431 | last cached position, the cache's job is done and we can |
| @@ -1904,6 +2441,9 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it) | |||
| 1904 | else | 2441 | else |
| 1905 | bidi_cache_iterator_state (bidi_it, 1); | 2442 | bidi_cache_iterator_state (bidi_it, 1); |
| 1906 | } | 2443 | } |
| 2444 | |||
| 2445 | if (STRINGP (bidi_it->string.lstring)) | ||
| 2446 | UNGCPRO; | ||
| 1907 | } | 2447 | } |
| 1908 | 2448 | ||
| 1909 | /* This is meant to be called from within the debugger, whenever you | 2449 | /* This is meant to be called from within the debugger, whenever you |
| @@ -1912,7 +2452,7 @@ void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE; | |||
| 1912 | void | 2452 | void |
| 1913 | bidi_dump_cached_states (void) | 2453 | bidi_dump_cached_states (void) |
| 1914 | { | 2454 | { |
| 1915 | int i; | 2455 | ptrdiff_t i; |
| 1916 | int ndigits = 1; | 2456 | int ndigits = 1; |
| 1917 | 2457 | ||
| 1918 | if (bidi_cache_idx == 0) | 2458 | if (bidi_cache_idx == 0) |
| @@ -1920,7 +2460,7 @@ bidi_dump_cached_states (void) | |||
| 1920 | fprintf (stderr, "The cache is empty.\n"); | 2460 | fprintf (stderr, "The cache is empty.\n"); |
| 1921 | return; | 2461 | return; |
| 1922 | } | 2462 | } |
| 1923 | fprintf (stderr, "Total of %d state%s in cache:\n", | 2463 | fprintf (stderr, "Total of %"pD"d state%s in cache:\n", |
| 1924 | bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s"); | 2464 | bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s"); |
| 1925 | 2465 | ||
| 1926 | for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10) | 2466 | for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10) |