diff options
| author | Eli Zaretskii | 2014-10-09 20:54:42 +0300 |
|---|---|---|
| committer | Eli Zaretskii | 2014-10-09 20:54:42 +0300 |
| commit | 18e91e6296b5430279a441d39d259b92ed12d96e (patch) | |
| tree | 651bef209eb19f3688f9a98de6e16089a5444e67 /src | |
| parent | 58242892925c82a15592bccd900ee977d830d265 (diff) | |
| download | emacs-18e91e6296b5430279a441d39d259b92ed12d96e.tar.gz emacs-18e91e6296b5430279a441d39d259b92ed12d96e.zip | |
Added BPA. Emacs aborts at startup.
Diffstat (limited to 'src')
| -rw-r--r-- | src/bidi.c | 254 | ||||
| -rw-r--r-- | src/dispextern.h | 13 |
2 files changed, 264 insertions, 3 deletions
diff --git a/src/bidi.c b/src/bidi.c index c7a66198ff9..989bcf5989b 100644 --- a/src/bidi.c +++ b/src/bidi.c | |||
| @@ -247,7 +247,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ | |||
| 247 | 247 | ||
| 248 | static bool bidi_initialized = 0; | 248 | static bool bidi_initialized = 0; |
| 249 | 249 | ||
| 250 | static Lisp_Object bidi_type_table, bidi_mirror_table; | 250 | static Lisp_Object bidi_type_table, bidi_mirror_table, bidi_brackets_table; |
| 251 | 251 | ||
| 252 | #define BIDI_EOB (-1) | 252 | #define BIDI_EOB (-1) |
| 253 | 253 | ||
| @@ -503,6 +503,7 @@ bidi_remember_char (struct bidi_saved_info *saved_info, | |||
| 503 | bidi_check_type (bidi_it->type_after_w1); | 503 | bidi_check_type (bidi_it->type_after_w1); |
| 504 | saved_info->orig_type = bidi_it->orig_type; | 504 | saved_info->orig_type = bidi_it->orig_type; |
| 505 | bidi_check_type (bidi_it->orig_type); | 505 | bidi_check_type (bidi_it->orig_type); |
| 506 | saved_info->bracket_resolved = bidi_it->bracket_resolved; | ||
| 506 | } | 507 | } |
| 507 | 508 | ||
| 508 | /* Copy the bidi iterator from FROM to TO. To save cycles, this only | 509 | /* Copy the bidi iterator from FROM to TO. To save cycles, this only |
| @@ -769,6 +770,7 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved) | |||
| 769 | bidi_cache[idx].next_for_ws = bidi_it->next_for_ws; | 770 | bidi_cache[idx].next_for_ws = bidi_it->next_for_ws; |
| 770 | bidi_cache[idx].disp_pos = bidi_it->disp_pos; | 771 | bidi_cache[idx].disp_pos = bidi_it->disp_pos; |
| 771 | bidi_cache[idx].disp_prop = bidi_it->disp_prop; | 772 | bidi_cache[idx].disp_prop = bidi_it->disp_prop; |
| 773 | bidi_cache[idx].bracket_resolved = bidi_it->bracket_resolved; | ||
| 772 | } | 774 | } |
| 773 | 775 | ||
| 774 | bidi_cache_last_idx = idx; | 776 | bidi_cache_last_idx = idx; |
| @@ -978,6 +980,11 @@ bidi_initialize (void) | |||
| 978 | emacs_abort (); | 980 | emacs_abort (); |
| 979 | staticpro (&bidi_mirror_table); | 981 | staticpro (&bidi_mirror_table); |
| 980 | 982 | ||
| 983 | bidi_brackets_table = uniprop_table (intern ("bracket-type")); | ||
| 984 | if (NILP (bidi_brackets_table)) | ||
| 985 | emacs_abort (); | ||
| 986 | staticpro (&bidi_brackets_table); | ||
| 987 | |||
| 981 | Qparagraph_start = intern ("paragraph-start"); | 988 | Qparagraph_start = intern ("paragraph-start"); |
| 982 | staticpro (&Qparagraph_start); | 989 | staticpro (&Qparagraph_start); |
| 983 | paragraph_start_re = Fsymbol_value (Qparagraph_start); | 990 | paragraph_start_re = Fsymbol_value (Qparagraph_start); |
| @@ -2050,7 +2057,15 @@ bidi_resolve_weak (struct bidi_it *bidi_it) | |||
| 2050 | type = NEUTRAL_ON; | 2057 | type = NEUTRAL_ON; |
| 2051 | } | 2058 | } |
| 2052 | else | 2059 | else |
| 2053 | type = bidi_it->prev.type_after_w1; | 2060 | { |
| 2061 | type = bidi_it->prev.type_after_w1; | ||
| 2062 | /* Unicode 8.0 correction for N0. */ | ||
| 2063 | if (type == NEUTRAL_ON | ||
| 2064 | && bidi_it->prev.bracket_resolved | ||
| 2065 | && (bidi_it->prev.type == STRONG_L | ||
| 2066 | || bidi_it->prev.type == STRONG_R)) | ||
| 2067 | type = bidi_it->prev.type; | ||
| 2068 | } | ||
| 2054 | } | 2069 | } |
| 2055 | else if (bidi_it->sos == R2L) | 2070 | else if (bidi_it->sos == R2L) |
| 2056 | type = STRONG_R; | 2071 | type = STRONG_R; |
| @@ -2248,13 +2263,234 @@ bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev) | |||
| 2248 | return STRONG_R; | 2263 | return STRONG_R; |
| 2249 | } | 2264 | } |
| 2250 | 2265 | ||
| 2266 | static bidi_bracket_type_t | ||
| 2267 | bidi_paired_bracket_type (int c) | ||
| 2268 | { | ||
| 2269 | if (c == BIDI_EOB) | ||
| 2270 | return BIDI_BRACKET_NONE; | ||
| 2271 | if (c < 0 || c > MAX_CHAR) | ||
| 2272 | emacs_abort (); | ||
| 2273 | |||
| 2274 | return (bidi_bracket_type_t) XINT (CHAR_TABLE_REF (bidi_brackets_table, c)); | ||
| 2275 | } | ||
| 2276 | |||
| 2277 | #define FLAG_EMBEDDING_INSIDE 1 | ||
| 2278 | #define FLAG_OPPOSITE_INSIDE 2 | ||
| 2279 | #define FLAG_EMBEDDING_OUTSIDE 4 | ||
| 2280 | #define FLAG_OPPOSITE_OUTSIDE 8 | ||
| 2281 | |||
| 2282 | /* A data type used in the stack maintained by | ||
| 2283 | bidi_resolve_bracket_pairs below. */ | ||
| 2284 | typedef struct bpa_stack_entry { | ||
| 2285 | int close_bracket_char; | ||
| 2286 | int open_bracket_idx; | ||
| 2287 | #ifdef ENABLE_CHECKING | ||
| 2288 | ptrdiff_t open_bracket_pos; | ||
| 2289 | #endif | ||
| 2290 | unsigned flags : 4; | ||
| 2291 | } bpa_stack_entry; | ||
| 2292 | |||
| 2293 | /* With MAX_ALLOCA of 16KB, this should allow at least 1K slots in the | ||
| 2294 | BPA stack, which should be more than enough for actual bidi text. */ | ||
| 2295 | #define MAX_BPA_STACK (max (MAX_ALLOCA / sizeof (bpa_stack_entry), 1)) | ||
| 2296 | |||
| 2297 | #ifdef ENABLE_CHECKING | ||
| 2298 | # define STORE_BRACKET_CHARPOS \ | ||
| 2299 | bpa_stack[bpa_sp].open_bracket_pos = bidi_it->charpos | ||
| 2300 | #else | ||
| 2301 | # define STORE_BRACKET_CHARPOS /* nothing */ | ||
| 2302 | #endif | ||
| 2303 | |||
| 2304 | #define PUSH_BPA_STACK(EMBEDDING_LEVEL, LAST_STRONG) \ | ||
| 2305 | do { \ | ||
| 2306 | bpa_sp++; \ | ||
| 2307 | if (bpa_sp >= MAX_BPA_STACK) \ | ||
| 2308 | goto bpa_give_up; \ | ||
| 2309 | bpa_stack[bpa_sp].close_bracket_char = bidi_mirror_char (bidi_it->ch); \ | ||
| 2310 | bpa_stack[bpa_sp].open_bracket_idx = bidi_cache_last_idx; \ | ||
| 2311 | STORE_BRACKET_CHARPOS; \ | ||
| 2312 | if (((EMBEDDING_LEVEL) & 1) == 0) \ | ||
| 2313 | bpa_stack[bpa_sp].flags = ((LAST_STRONG) == STRONG_L \ | ||
| 2314 | ? FLAG_EMBEDDING_OUTSIDE \ | ||
| 2315 | : FLAG_OPPOSITE_OUTSIDE); \ | ||
| 2316 | else \ | ||
| 2317 | bpa_stack[bpa_sp].flags = ((LAST_STRONG) == STRONG_L \ | ||
| 2318 | ? FLAG_OPPOSITE_OUTSIDE \ | ||
| 2319 | : FLAG_EMBEDDING_OUTSIDE); \ | ||
| 2320 | } while (0) | ||
| 2321 | |||
| 2322 | |||
| 2323 | /* This function implements BPA, the Bidi Parenthesis Algorithm, | ||
| 2324 | described in BD16 and N0 of UAX#9. */ | ||
| 2325 | static bidi_type_t | ||
| 2326 | bidi_resolve_bracket_pairs (struct bidi_it *bidi_it) | ||
| 2327 | { | ||
| 2328 | bidi_bracket_type_t btype; | ||
| 2329 | bidi_type_t type = bidi_it->type; | ||
| 2330 | |||
| 2331 | /* When scanning backwards, we don't expect any unresolved bidi | ||
| 2332 | bracket characters. */ | ||
| 2333 | if (bidi_it->scan_dir != 1) | ||
| 2334 | emacs_abort (); | ||
| 2335 | |||
| 2336 | btype = bidi_paired_bracket_type (bidi_it->ch); | ||
| 2337 | if (btype == BIDI_BRACKET_OPEN) | ||
| 2338 | { | ||
| 2339 | bpa_stack_entry bpa_stack[MAX_BPA_STACK]; | ||
| 2340 | int bpa_sp = -1; | ||
| 2341 | struct bidi_it saved_it; | ||
| 2342 | bidi_type_t last_strong; | ||
| 2343 | int embedding_level = bidi_it->level_stack[bidi_it->stack_idx].level; | ||
| 2344 | |||
| 2345 | eassert (MAX_BPA_STACK >= 100); | ||
| 2346 | bidi_copy_it (&saved_it, bidi_it); | ||
| 2347 | last_strong = bidi_it->prev_for_neutral.type; | ||
| 2348 | |||
| 2349 | while (1) | ||
| 2350 | { | ||
| 2351 | int old_sidx, new_sidx; | ||
| 2352 | int current_level = bidi_it->level_stack[bidi_it->stack_idx].level; | ||
| 2353 | |||
| 2354 | bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B); | ||
| 2355 | if (btype == BIDI_BRACKET_OPEN) | ||
| 2356 | PUSH_BPA_STACK (embedding_level, last_strong); | ||
| 2357 | else if (btype == BIDI_BRACKET_CLOSE) | ||
| 2358 | { | ||
| 2359 | int sp = bpa_sp; | ||
| 2360 | int curchar = bidi_it->ch; | ||
| 2361 | |||
| 2362 | eassert (sp >= 0); | ||
| 2363 | while (sp >= 0 && bpa_stack[sp].close_bracket_char != curchar) | ||
| 2364 | sp--; | ||
| 2365 | if (sp >= 0) | ||
| 2366 | { | ||
| 2367 | /* Resolve the bracket type according to N0. */ | ||
| 2368 | if (bpa_stack[sp].flags & FLAG_EMBEDDING_INSIDE) /* N0b */ | ||
| 2369 | type = ((embedding_level & 1) ? STRONG_R : STRONG_L); | ||
| 2370 | else if ((bpa_stack[sp].flags /* N0c1 */ | ||
| 2371 | & (FLAG_OPPOSITE_INSIDE | FLAG_OPPOSITE_OUTSIDE)) | ||
| 2372 | == (FLAG_OPPOSITE_INSIDE | FLAG_OPPOSITE_OUTSIDE)) | ||
| 2373 | type = ((embedding_level & 1) ? STRONG_L : STRONG_R); | ||
| 2374 | else if (bpa_stack[sp].flags & FLAG_OPPOSITE_INSIDE) /* N0c2 */ | ||
| 2375 | type = ((embedding_level & 1) ? STRONG_R : STRONG_L); | ||
| 2376 | |||
| 2377 | /* Update and cache the closing bracket. */ | ||
| 2378 | bidi_it->type = type; | ||
| 2379 | bidi_it->bracket_resolved = 1; | ||
| 2380 | bidi_cache_iterator_state (bidi_it, 0); | ||
| 2381 | /* Update and cache the corresponding opening bracket. */ | ||
| 2382 | bidi_cache_fetch_state (bpa_stack[sp].open_bracket_idx, | ||
| 2383 | bidi_it); | ||
| 2384 | #ifdef ENABLE_CHECKING | ||
| 2385 | eassert (bpa_stack[sp].open_bracket_pos == bidi_it->charpos); | ||
| 2386 | #endif | ||
| 2387 | bidi_it->type = type; | ||
| 2388 | bidi_it->bracket_resolved = 1; | ||
| 2389 | bidi_cache_iterator_state (bidi_it, 0); | ||
| 2390 | bpa_sp = sp - 1; | ||
| 2391 | if (bpa_sp < 0) | ||
| 2392 | break; | ||
| 2393 | } | ||
| 2394 | else | ||
| 2395 | bidi_it->bracket_resolved = 1; | ||
| 2396 | } | ||
| 2397 | else if (bidi_get_category (bidi_it->type_after_w1) != NEUTRAL) | ||
| 2398 | { | ||
| 2399 | unsigned flag; | ||
| 2400 | int sp; | ||
| 2401 | |||
| 2402 | /* Update the "inside" flags of all the slots on the stack. */ | ||
| 2403 | switch (bidi_it->type) | ||
| 2404 | { | ||
| 2405 | case STRONG_L: | ||
| 2406 | flag = ((embedding_level & 1) == 0 | ||
| 2407 | ? FLAG_EMBEDDING_INSIDE | ||
| 2408 | : FLAG_OPPOSITE_INSIDE); | ||
| 2409 | last_strong = STRONG_L; | ||
| 2410 | break; | ||
| 2411 | case STRONG_R: | ||
| 2412 | case WEAK_EN: | ||
| 2413 | case WEAK_AN: | ||
| 2414 | flag = ((embedding_level & 1) == 1 | ||
| 2415 | ? FLAG_EMBEDDING_INSIDE | ||
| 2416 | : FLAG_OPPOSITE_INSIDE); | ||
| 2417 | last_strong = STRONG_R; | ||
| 2418 | break; | ||
| 2419 | default: | ||
| 2420 | break; | ||
| 2421 | } | ||
| 2422 | for (sp = bpa_sp; sp >= 0; sp--) | ||
| 2423 | bpa_stack[sp].flags |= flag; | ||
| 2424 | } | ||
| 2425 | /* Record the info about the previous character, so that it | ||
| 2426 | will be cached with this state. */ | ||
| 2427 | if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */ | ||
| 2428 | && bidi_it->type != WEAK_BN) | ||
| 2429 | bidi_remember_char (&bidi_it->prev, bidi_it); | ||
| 2430 | old_sidx = bidi_it->stack_idx; | ||
| 2431 | type = bidi_resolve_weak (bidi_it); | ||
| 2432 | /* Skip level runs excluded from this isolating run sequence. */ | ||
| 2433 | new_sidx = bidi_it->stack_idx; | ||
| 2434 | if (bidi_it->level_stack[new_sidx].level > current_level | ||
| 2435 | && (bidi_it->level_stack[new_sidx].isolate_status | ||
| 2436 | || (new_sidx > old_sidx + 1 | ||
| 2437 | && bidi_it->level_stack[new_sidx - 1].isolate_status))) | ||
| 2438 | { | ||
| 2439 | while (bidi_it->level_stack[bidi_it->stack_idx].level | ||
| 2440 | > current_level) | ||
| 2441 | { | ||
| 2442 | bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B); | ||
| 2443 | type = bidi_resolve_weak (bidi_it); | ||
| 2444 | } | ||
| 2445 | } | ||
| 2446 | if (type == NEUTRAL_B | ||
| 2447 | || (bidi_it->level_stack[bidi_it->stack_idx].level | ||
| 2448 | != current_level)) | ||
| 2449 | { | ||
| 2450 | bpa_give_up: | ||
| 2451 | /* We've marched all the way to the end of this | ||
| 2452 | isolating run sequence, and didn't find matching | ||
| 2453 | closing brackets for some opening brackets. Unwind | ||
| 2454 | whatever is left on the BPA stack, and mark each | ||
| 2455 | bracket there as BPA-resolved. */ | ||
| 2456 | while (bpa_sp >= 0) | ||
| 2457 | { | ||
| 2458 | bidi_cache_fetch_state (bpa_stack[bpa_sp].open_bracket_idx, | ||
| 2459 | bidi_it); | ||
| 2460 | #ifdef ENABLE_CHECKING | ||
| 2461 | eassert (bpa_stack[bpa_sp].open_bracket_pos | ||
| 2462 | == bidi_it->charpos); | ||
| 2463 | #endif | ||
| 2464 | bidi_it->bracket_resolved = 1; | ||
| 2465 | bidi_cache_iterator_state (bidi_it, 0); | ||
| 2466 | bpa_sp--; | ||
| 2467 | } | ||
| 2468 | type = saved_it.type; | ||
| 2469 | break; | ||
| 2470 | } | ||
| 2471 | if (bidi_it->type_after_w1 == NEUTRAL_ON) /* Unicode 8.0 correction */ | ||
| 2472 | btype = bidi_paired_bracket_type (bidi_it->ch); | ||
| 2473 | else | ||
| 2474 | btype = BIDI_BRACKET_NONE; | ||
| 2475 | } | ||
| 2476 | bidi_check_type (type); | ||
| 2477 | |||
| 2478 | bidi_copy_it (bidi_it, &saved_it); | ||
| 2479 | bidi_it->type = type; | ||
| 2480 | bidi_it->bracket_resolved = 1; | ||
| 2481 | } | ||
| 2482 | |||
| 2483 | return type; | ||
| 2484 | } | ||
| 2485 | |||
| 2251 | static bidi_type_t | 2486 | static bidi_type_t |
| 2252 | bidi_resolve_neutral (struct bidi_it *bidi_it) | 2487 | bidi_resolve_neutral (struct bidi_it *bidi_it) |
| 2253 | { | 2488 | { |
| 2254 | int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level; | 2489 | int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level; |
| 2255 | bidi_type_t type = bidi_resolve_weak (bidi_it); | 2490 | bidi_type_t type = bidi_resolve_weak (bidi_it); |
| 2256 | int current_level = bidi_it->level_stack[bidi_it->stack_idx].level; | 2491 | int current_level = bidi_it->level_stack[bidi_it->stack_idx].level; |
| 2257 | bool is_neutral = bidi_get_category (type) == NEUTRAL; | 2492 | bool is_neutral; |
| 2493 | int ch = bidi_it->ch; | ||
| 2258 | 2494 | ||
| 2259 | eassert (type == STRONG_R | 2495 | eassert (type == STRONG_R |
| 2260 | || type == STRONG_L | 2496 | || type == STRONG_L |
| @@ -2273,6 +2509,18 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) | |||
| 2273 | eassert (current_level >= 0); | 2509 | eassert (current_level >= 0); |
| 2274 | 2510 | ||
| 2275 | /* FIXME: Insert the code for N0 here. */ | 2511 | /* FIXME: Insert the code for N0 here. */ |
| 2512 | if (type == NEUTRAL_ON | ||
| 2513 | && bidi_paired_bracket_type (ch) != BIDI_BRACKET_NONE) | ||
| 2514 | { | ||
| 2515 | if (bidi_cache_idx > bidi_cache_start | ||
| 2516 | && bidi_cache_find (bidi_it->charpos, -1, bidi_it) != UNKNOWN_BT | ||
| 2517 | && bidi_it->bracket_resolved) | ||
| 2518 | type = bidi_it->type; | ||
| 2519 | else | ||
| 2520 | type = bidi_resolve_bracket_pairs (bidi_it); | ||
| 2521 | } | ||
| 2522 | |||
| 2523 | is_neutral = bidi_get_category (type) == NEUTRAL; | ||
| 2276 | 2524 | ||
| 2277 | if ((type != NEUTRAL_B /* Don't risk entering the long loop below if | 2525 | if ((type != NEUTRAL_B /* Don't risk entering the long loop below if |
| 2278 | we are already at paragraph end. */ | 2526 | we are already at paragraph end. */ |
diff --git a/src/dispextern.h b/src/dispextern.h index d61b2a388d5..b7a4225221a 100644 --- a/src/dispextern.h +++ b/src/dispextern.h | |||
| @@ -1896,6 +1896,17 @@ typedef enum { | |||
| 1896 | NEUTRAL_ON /* other neutrals */ | 1896 | NEUTRAL_ON /* other neutrals */ |
| 1897 | } bidi_type_t; | 1897 | } bidi_type_t; |
| 1898 | 1898 | ||
| 1899 | /* Data type for describing the Bidi Paired Bracket Type of a character. | ||
| 1900 | |||
| 1901 | The order of members must be in sync with the 8th element of the | ||
| 1902 | member of unidata-prop-alist (in admin/unidata/unidata-gen.el) for | ||
| 1903 | Unicode character property `bracket-type'. */ | ||
| 1904 | typedef enum { | ||
| 1905 | BIDI_BRACKET_NONE = 0, | ||
| 1906 | BIDI_BRACKET_OPEN, | ||
| 1907 | BIDI_BRACKET_CLOSE | ||
| 1908 | } bidi_bracket_type_t; | ||
| 1909 | |||
| 1899 | /* The basic directionality data type. */ | 1910 | /* The basic directionality data type. */ |
| 1900 | typedef enum { NEUTRAL_DIR, L2R, R2L } bidi_dir_t; | 1911 | typedef enum { NEUTRAL_DIR, L2R, R2L } bidi_dir_t; |
| 1901 | 1912 | ||
| @@ -1906,6 +1917,7 @@ struct bidi_saved_info { | |||
| 1906 | bidi_type_t type; /* character's resolved bidi type */ | 1917 | bidi_type_t type; /* character's resolved bidi type */ |
| 1907 | bidi_type_t type_after_w1; /* original type of the character, after W1 */ | 1918 | bidi_type_t type_after_w1; /* original type of the character, after W1 */ |
| 1908 | bidi_type_t orig_type; /* type as we found it in the buffer */ | 1919 | bidi_type_t orig_type; /* type as we found it in the buffer */ |
| 1920 | bool_bf bracket_resolved : 1; /* 1 if type was BPA-resolved */ | ||
| 1909 | }; | 1921 | }; |
| 1910 | 1922 | ||
| 1911 | /* Data type for keeping track of information about saved embedding | 1923 | /* Data type for keeping track of information about saved embedding |
| @@ -1964,6 +1976,7 @@ struct bidi_it { | |||
| 1964 | int disp_prop; /* if non-zero, there really is a | 1976 | int disp_prop; /* if non-zero, there really is a |
| 1965 | `display' property/string at disp_pos; | 1977 | `display' property/string at disp_pos; |
| 1966 | if 2, the property is a `space' spec */ | 1978 | if 2, the property is a `space' spec */ |
| 1979 | bool_bf bracket_resolved : 1; /* if 1, this bracket's type is BPA-resolved */ | ||
| 1967 | int stack_idx; /* index of current data on the stack */ | 1980 | int stack_idx; /* index of current data on the stack */ |
| 1968 | /* Note: Everything from here on is not copied/saved when the bidi | 1981 | /* Note: Everything from here on is not copied/saved when the bidi |
| 1969 | iterator state is saved, pushed, or popped. So only put here | 1982 | iterator state is saved, pushed, or popped. So only put here |