aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEli Zaretskii2014-10-09 20:54:42 +0300
committerEli Zaretskii2014-10-09 20:54:42 +0300
commit18e91e6296b5430279a441d39d259b92ed12d96e (patch)
tree651bef209eb19f3688f9a98de6e16089a5444e67 /src
parent58242892925c82a15592bccd900ee977d830d265 (diff)
downloademacs-18e91e6296b5430279a441d39d259b92ed12d96e.tar.gz
emacs-18e91e6296b5430279a441d39d259b92ed12d96e.zip
Added BPA. Emacs aborts at startup.
Diffstat (limited to 'src')
-rw-r--r--src/bidi.c254
-rw-r--r--src/dispextern.h13
2 files changed, 264 insertions, 3 deletions
diff --git a/src/bidi.c b/src/bidi.c
index c7a66198ff9..989bcf5989b 100644
--- a/src/bidi.c
+++ b/src/bidi.c
@@ -247,7 +247,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
247 247
248static bool bidi_initialized = 0; 248static bool bidi_initialized = 0;
249 249
250static Lisp_Object bidi_type_table, bidi_mirror_table; 250static Lisp_Object bidi_type_table, bidi_mirror_table, bidi_brackets_table;
251 251
252#define BIDI_EOB (-1) 252#define BIDI_EOB (-1)
253 253
@@ -503,6 +503,7 @@ bidi_remember_char (struct bidi_saved_info *saved_info,
503 bidi_check_type (bidi_it->type_after_w1); 503 bidi_check_type (bidi_it->type_after_w1);
504 saved_info->orig_type = bidi_it->orig_type; 504 saved_info->orig_type = bidi_it->orig_type;
505 bidi_check_type (bidi_it->orig_type); 505 bidi_check_type (bidi_it->orig_type);
506 saved_info->bracket_resolved = bidi_it->bracket_resolved;
506} 507}
507 508
508/* Copy the bidi iterator from FROM to TO. To save cycles, this only 509/* Copy the bidi iterator from FROM to TO. To save cycles, this only
@@ -769,6 +770,7 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved)
769 bidi_cache[idx].next_for_ws = bidi_it->next_for_ws; 770 bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
770 bidi_cache[idx].disp_pos = bidi_it->disp_pos; 771 bidi_cache[idx].disp_pos = bidi_it->disp_pos;
771 bidi_cache[idx].disp_prop = bidi_it->disp_prop; 772 bidi_cache[idx].disp_prop = bidi_it->disp_prop;
773 bidi_cache[idx].bracket_resolved = bidi_it->bracket_resolved;
772 } 774 }
773 775
774 bidi_cache_last_idx = idx; 776 bidi_cache_last_idx = idx;
@@ -978,6 +980,11 @@ bidi_initialize (void)
978 emacs_abort (); 980 emacs_abort ();
979 staticpro (&bidi_mirror_table); 981 staticpro (&bidi_mirror_table);
980 982
983 bidi_brackets_table = uniprop_table (intern ("bracket-type"));
984 if (NILP (bidi_brackets_table))
985 emacs_abort ();
986 staticpro (&bidi_brackets_table);
987
981 Qparagraph_start = intern ("paragraph-start"); 988 Qparagraph_start = intern ("paragraph-start");
982 staticpro (&Qparagraph_start); 989 staticpro (&Qparagraph_start);
983 paragraph_start_re = Fsymbol_value (Qparagraph_start); 990 paragraph_start_re = Fsymbol_value (Qparagraph_start);
@@ -2050,7 +2057,15 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
2050 type = NEUTRAL_ON; 2057 type = NEUTRAL_ON;
2051 } 2058 }
2052 else 2059 else
2053 type = bidi_it->prev.type_after_w1; 2060 {
2061 type = bidi_it->prev.type_after_w1;
2062 /* Unicode 8.0 correction for N0. */
2063 if (type == NEUTRAL_ON
2064 && bidi_it->prev.bracket_resolved
2065 && (bidi_it->prev.type == STRONG_L
2066 || bidi_it->prev.type == STRONG_R))
2067 type = bidi_it->prev.type;
2068 }
2054 } 2069 }
2055 else if (bidi_it->sos == R2L) 2070 else if (bidi_it->sos == R2L)
2056 type = STRONG_R; 2071 type = STRONG_R;
@@ -2248,13 +2263,234 @@ bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
2248 return STRONG_R; 2263 return STRONG_R;
2249} 2264}
2250 2265
2266static bidi_bracket_type_t
2267bidi_paired_bracket_type (int c)
2268{
2269 if (c == BIDI_EOB)
2270 return BIDI_BRACKET_NONE;
2271 if (c < 0 || c > MAX_CHAR)
2272 emacs_abort ();
2273
2274 return (bidi_bracket_type_t) XINT (CHAR_TABLE_REF (bidi_brackets_table, c));
2275}
2276
2277#define FLAG_EMBEDDING_INSIDE 1
2278#define FLAG_OPPOSITE_INSIDE 2
2279#define FLAG_EMBEDDING_OUTSIDE 4
2280#define FLAG_OPPOSITE_OUTSIDE 8
2281
2282/* A data type used in the stack maintained by
2283 bidi_resolve_bracket_pairs below. */
2284typedef struct bpa_stack_entry {
2285 int close_bracket_char;
2286 int open_bracket_idx;
2287#ifdef ENABLE_CHECKING
2288 ptrdiff_t open_bracket_pos;
2289#endif
2290 unsigned flags : 4;
2291} bpa_stack_entry;
2292
2293/* With MAX_ALLOCA of 16KB, this should allow at least 1K slots in the
2294 BPA stack, which should be more than enough for actual bidi text. */
2295#define MAX_BPA_STACK (max (MAX_ALLOCA / sizeof (bpa_stack_entry), 1))
2296
2297#ifdef ENABLE_CHECKING
2298# define STORE_BRACKET_CHARPOS \
2299 bpa_stack[bpa_sp].open_bracket_pos = bidi_it->charpos
2300#else
2301# define STORE_BRACKET_CHARPOS /* nothing */
2302#endif
2303
2304#define PUSH_BPA_STACK(EMBEDDING_LEVEL, LAST_STRONG) \
2305 do { \
2306 bpa_sp++; \
2307 if (bpa_sp >= MAX_BPA_STACK) \
2308 goto bpa_give_up; \
2309 bpa_stack[bpa_sp].close_bracket_char = bidi_mirror_char (bidi_it->ch); \
2310 bpa_stack[bpa_sp].open_bracket_idx = bidi_cache_last_idx; \
2311 STORE_BRACKET_CHARPOS; \
2312 if (((EMBEDDING_LEVEL) & 1) == 0) \
2313 bpa_stack[bpa_sp].flags = ((LAST_STRONG) == STRONG_L \
2314 ? FLAG_EMBEDDING_OUTSIDE \
2315 : FLAG_OPPOSITE_OUTSIDE); \
2316 else \
2317 bpa_stack[bpa_sp].flags = ((LAST_STRONG) == STRONG_L \
2318 ? FLAG_OPPOSITE_OUTSIDE \
2319 : FLAG_EMBEDDING_OUTSIDE); \
2320 } while (0)
2321
2322
2323/* This function implements BPA, the Bidi Parenthesis Algorithm,
2324 described in BD16 and N0 of UAX#9. */
2325static bidi_type_t
2326bidi_resolve_bracket_pairs (struct bidi_it *bidi_it)
2327{
2328 bidi_bracket_type_t btype;
2329 bidi_type_t type = bidi_it->type;
2330
2331 /* When scanning backwards, we don't expect any unresolved bidi
2332 bracket characters. */
2333 if (bidi_it->scan_dir != 1)
2334 emacs_abort ();
2335
2336 btype = bidi_paired_bracket_type (bidi_it->ch);
2337 if (btype == BIDI_BRACKET_OPEN)
2338 {
2339 bpa_stack_entry bpa_stack[MAX_BPA_STACK];
2340 int bpa_sp = -1;
2341 struct bidi_it saved_it;
2342 bidi_type_t last_strong;
2343 int embedding_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2344
2345 eassert (MAX_BPA_STACK >= 100);
2346 bidi_copy_it (&saved_it, bidi_it);
2347 last_strong = bidi_it->prev_for_neutral.type;
2348
2349 while (1)
2350 {
2351 int old_sidx, new_sidx;
2352 int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2353
2354 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
2355 if (btype == BIDI_BRACKET_OPEN)
2356 PUSH_BPA_STACK (embedding_level, last_strong);
2357 else if (btype == BIDI_BRACKET_CLOSE)
2358 {
2359 int sp = bpa_sp;
2360 int curchar = bidi_it->ch;
2361
2362 eassert (sp >= 0);
2363 while (sp >= 0 && bpa_stack[sp].close_bracket_char != curchar)
2364 sp--;
2365 if (sp >= 0)
2366 {
2367 /* Resolve the bracket type according to N0. */
2368 if (bpa_stack[sp].flags & FLAG_EMBEDDING_INSIDE) /* N0b */
2369 type = ((embedding_level & 1) ? STRONG_R : STRONG_L);
2370 else if ((bpa_stack[sp].flags /* N0c1 */
2371 & (FLAG_OPPOSITE_INSIDE | FLAG_OPPOSITE_OUTSIDE))
2372 == (FLAG_OPPOSITE_INSIDE | FLAG_OPPOSITE_OUTSIDE))
2373 type = ((embedding_level & 1) ? STRONG_L : STRONG_R);
2374 else if (bpa_stack[sp].flags & FLAG_OPPOSITE_INSIDE) /* N0c2 */
2375 type = ((embedding_level & 1) ? STRONG_R : STRONG_L);
2376
2377 /* Update and cache the closing bracket. */
2378 bidi_it->type = type;
2379 bidi_it->bracket_resolved = 1;
2380 bidi_cache_iterator_state (bidi_it, 0);
2381 /* Update and cache the corresponding opening bracket. */
2382 bidi_cache_fetch_state (bpa_stack[sp].open_bracket_idx,
2383 bidi_it);
2384#ifdef ENABLE_CHECKING
2385 eassert (bpa_stack[sp].open_bracket_pos == bidi_it->charpos);
2386#endif
2387 bidi_it->type = type;
2388 bidi_it->bracket_resolved = 1;
2389 bidi_cache_iterator_state (bidi_it, 0);
2390 bpa_sp = sp - 1;
2391 if (bpa_sp < 0)
2392 break;
2393 }
2394 else
2395 bidi_it->bracket_resolved = 1;
2396 }
2397 else if (bidi_get_category (bidi_it->type_after_w1) != NEUTRAL)
2398 {
2399 unsigned flag;
2400 int sp;
2401
2402 /* Update the "inside" flags of all the slots on the stack. */
2403 switch (bidi_it->type)
2404 {
2405 case STRONG_L:
2406 flag = ((embedding_level & 1) == 0
2407 ? FLAG_EMBEDDING_INSIDE
2408 : FLAG_OPPOSITE_INSIDE);
2409 last_strong = STRONG_L;
2410 break;
2411 case STRONG_R:
2412 case WEAK_EN:
2413 case WEAK_AN:
2414 flag = ((embedding_level & 1) == 1
2415 ? FLAG_EMBEDDING_INSIDE
2416 : FLAG_OPPOSITE_INSIDE);
2417 last_strong = STRONG_R;
2418 break;
2419 default:
2420 break;
2421 }
2422 for (sp = bpa_sp; sp >= 0; sp--)
2423 bpa_stack[sp].flags |= flag;
2424 }
2425 /* Record the info about the previous character, so that it
2426 will be cached with this state. */
2427 if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
2428 && bidi_it->type != WEAK_BN)
2429 bidi_remember_char (&bidi_it->prev, bidi_it);
2430 old_sidx = bidi_it->stack_idx;
2431 type = bidi_resolve_weak (bidi_it);
2432 /* Skip level runs excluded from this isolating run sequence. */
2433 new_sidx = bidi_it->stack_idx;
2434 if (bidi_it->level_stack[new_sidx].level > current_level
2435 && (bidi_it->level_stack[new_sidx].isolate_status
2436 || (new_sidx > old_sidx + 1
2437 && bidi_it->level_stack[new_sidx - 1].isolate_status)))
2438 {
2439 while (bidi_it->level_stack[bidi_it->stack_idx].level
2440 > current_level)
2441 {
2442 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
2443 type = bidi_resolve_weak (bidi_it);
2444 }
2445 }
2446 if (type == NEUTRAL_B
2447 || (bidi_it->level_stack[bidi_it->stack_idx].level
2448 != current_level))
2449 {
2450 bpa_give_up:
2451 /* We've marched all the way to the end of this
2452 isolating run sequence, and didn't find matching
2453 closing brackets for some opening brackets. Unwind
2454 whatever is left on the BPA stack, and mark each
2455 bracket there as BPA-resolved. */
2456 while (bpa_sp >= 0)
2457 {
2458 bidi_cache_fetch_state (bpa_stack[bpa_sp].open_bracket_idx,
2459 bidi_it);
2460#ifdef ENABLE_CHECKING
2461 eassert (bpa_stack[bpa_sp].open_bracket_pos
2462 == bidi_it->charpos);
2463#endif
2464 bidi_it->bracket_resolved = 1;
2465 bidi_cache_iterator_state (bidi_it, 0);
2466 bpa_sp--;
2467 }
2468 type = saved_it.type;
2469 break;
2470 }
2471 if (bidi_it->type_after_w1 == NEUTRAL_ON) /* Unicode 8.0 correction */
2472 btype = bidi_paired_bracket_type (bidi_it->ch);
2473 else
2474 btype = BIDI_BRACKET_NONE;
2475 }
2476 bidi_check_type (type);
2477
2478 bidi_copy_it (bidi_it, &saved_it);
2479 bidi_it->type = type;
2480 bidi_it->bracket_resolved = 1;
2481 }
2482
2483 return type;
2484}
2485
2251static bidi_type_t 2486static bidi_type_t
2252bidi_resolve_neutral (struct bidi_it *bidi_it) 2487bidi_resolve_neutral (struct bidi_it *bidi_it)
2253{ 2488{
2254 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level; 2489 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2255 bidi_type_t type = bidi_resolve_weak (bidi_it); 2490 bidi_type_t type = bidi_resolve_weak (bidi_it);
2256 int current_level = bidi_it->level_stack[bidi_it->stack_idx].level; 2491 int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2257 bool is_neutral = bidi_get_category (type) == NEUTRAL; 2492 bool is_neutral;
2493 int ch = bidi_it->ch;
2258 2494
2259 eassert (type == STRONG_R 2495 eassert (type == STRONG_R
2260 || type == STRONG_L 2496 || type == STRONG_L
@@ -2273,6 +2509,18 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
2273 eassert (current_level >= 0); 2509 eassert (current_level >= 0);
2274 2510
2275 /* FIXME: Insert the code for N0 here. */ 2511 /* FIXME: Insert the code for N0 here. */
2512 if (type == NEUTRAL_ON
2513 && bidi_paired_bracket_type (ch) != BIDI_BRACKET_NONE)
2514 {
2515 if (bidi_cache_idx > bidi_cache_start
2516 && bidi_cache_find (bidi_it->charpos, -1, bidi_it) != UNKNOWN_BT
2517 && bidi_it->bracket_resolved)
2518 type = bidi_it->type;
2519 else
2520 type = bidi_resolve_bracket_pairs (bidi_it);
2521 }
2522
2523 is_neutral = bidi_get_category (type) == NEUTRAL;
2276 2524
2277 if ((type != NEUTRAL_B /* Don't risk entering the long loop below if 2525 if ((type != NEUTRAL_B /* Don't risk entering the long loop below if
2278 we are already at paragraph end. */ 2526 we are already at paragraph end. */
diff --git a/src/dispextern.h b/src/dispextern.h
index d61b2a388d5..b7a4225221a 100644
--- a/src/dispextern.h
+++ b/src/dispextern.h
@@ -1896,6 +1896,17 @@ typedef enum {
1896 NEUTRAL_ON /* other neutrals */ 1896 NEUTRAL_ON /* other neutrals */
1897} bidi_type_t; 1897} bidi_type_t;
1898 1898
1899/* Data type for describing the Bidi Paired Bracket Type of a character.
1900
1901 The order of members must be in sync with the 8th element of the
1902 member of unidata-prop-alist (in admin/unidata/unidata-gen.el) for
1903 Unicode character property `bracket-type'. */
1904typedef enum {
1905 BIDI_BRACKET_NONE = 0,
1906 BIDI_BRACKET_OPEN,
1907 BIDI_BRACKET_CLOSE
1908} bidi_bracket_type_t;
1909
1899/* The basic directionality data type. */ 1910/* The basic directionality data type. */
1900typedef enum { NEUTRAL_DIR, L2R, R2L } bidi_dir_t; 1911typedef enum { NEUTRAL_DIR, L2R, R2L } bidi_dir_t;
1901 1912
@@ -1906,6 +1917,7 @@ struct bidi_saved_info {
1906 bidi_type_t type; /* character's resolved bidi type */ 1917 bidi_type_t type; /* character's resolved bidi type */
1907 bidi_type_t type_after_w1; /* original type of the character, after W1 */ 1918 bidi_type_t type_after_w1; /* original type of the character, after W1 */
1908 bidi_type_t orig_type; /* type as we found it in the buffer */ 1919 bidi_type_t orig_type; /* type as we found it in the buffer */
1920 bool_bf bracket_resolved : 1; /* 1 if type was BPA-resolved */
1909}; 1921};
1910 1922
1911/* Data type for keeping track of information about saved embedding 1923/* Data type for keeping track of information about saved embedding
@@ -1964,6 +1976,7 @@ struct bidi_it {
1964 int disp_prop; /* if non-zero, there really is a 1976 int disp_prop; /* if non-zero, there really is a
1965 `display' property/string at disp_pos; 1977 `display' property/string at disp_pos;
1966 if 2, the property is a `space' spec */ 1978 if 2, the property is a `space' spec */
1979 bool_bf bracket_resolved : 1; /* if 1, this bracket's type is BPA-resolved */
1967 int stack_idx; /* index of current data on the stack */ 1980 int stack_idx; /* index of current data on the stack */
1968 /* Note: Everything from here on is not copied/saved when the bidi 1981 /* Note: Everything from here on is not copied/saved when the bidi
1969 iterator state is saved, pushed, or popped. So only put here 1982 iterator state is saved, pushed, or popped. So only put here