diff options
| author | Eli Zaretskii | 2014-08-23 13:58:14 +0300 |
|---|---|---|
| committer | Eli Zaretskii | 2014-08-23 13:58:14 +0300 |
| commit | 4f6bc3c7d0aa0a540a20d3a46c147419ebb49156 (patch) | |
| tree | 878094f5e11d493ded5e585a162b418abb464fb0 /src | |
| parent | 30f1ca3b9ac0f0e7a997e98829fd7c423f1334d0 (diff) | |
| download | emacs-4f6bc3c7d0aa0a540a20d3a46c147419ebb49156.tar.gz emacs-4f6bc3c7d0aa0a540a20d3a46c147419ebb49156.zip | |
Started working on implementing UBA v6.3.
Modified struct bidi_it as required by UBA changes.
Introduced BIDI_MAXDEPTH instead of BIDI_MAXLEVEL; users changed.
Added bidi_fetch_char_skip_isolates.
Changed bidi_paragraph_init to use bidi_fetch_char_skip_isolates.
Removed uses of bidi_ignore_explicit_marks_for_paragraph_level.
Reverted temporary kludges that treat isolate initiators as weak characters.
Diffstat (limited to 'src')
| -rw-r--r-- | src/bidi.c | 98 | ||||
| -rw-r--r-- | src/dispextern.h | 21 |
2 files changed, 75 insertions, 44 deletions
diff --git a/src/bidi.c b/src/bidi.c index 53c2dad1b6b..30ec1f0a744 100644 --- a/src/bidi.c +++ b/src/bidi.c | |||
| @@ -305,14 +305,11 @@ bidi_get_type (int ch, bidi_dir_t override) | |||
| 305 | case RLE: | 305 | case RLE: |
| 306 | case RLO: | 306 | case RLO: |
| 307 | case PDF: | 307 | case PDF: |
| 308 | return default_type; | ||
| 309 | /* FIXME: The isolate controls are treated as BN until we add | ||
| 310 | support for UBA v6.3. */ | ||
| 311 | case LRI: | 308 | case LRI: |
| 312 | case RLI: | 309 | case RLI: |
| 313 | case FSI: | 310 | case FSI: |
| 314 | case PDI: | 311 | case PDI: |
| 315 | return WEAK_BN; | 312 | return default_type; |
| 316 | default: | 313 | default: |
| 317 | if (override == L2R) | 314 | if (override == L2R) |
| 318 | return STRONG_L; | 315 | return STRONG_L; |
| @@ -348,11 +345,6 @@ bidi_get_category (bidi_type_t type) | |||
| 348 | case WEAK_CS: | 345 | case WEAK_CS: |
| 349 | case WEAK_NSM: | 346 | case WEAK_NSM: |
| 350 | case WEAK_BN: | 347 | case WEAK_BN: |
| 351 | /* FIXME */ | ||
| 352 | case LRI: | ||
| 353 | case RLI: | ||
| 354 | case FSI: | ||
| 355 | case PDI: | ||
| 356 | return WEAK; | 348 | return WEAK; |
| 357 | case NEUTRAL_B: | 349 | case NEUTRAL_B: |
| 358 | case NEUTRAL_S: | 350 | case NEUTRAL_S: |
| @@ -364,13 +356,10 @@ bidi_get_category (bidi_type_t type) | |||
| 364 | case RLE: | 356 | case RLE: |
| 365 | case RLO: | 357 | case RLO: |
| 366 | case PDF: | 358 | case PDF: |
| 367 | #if 0 | ||
| 368 | /* FIXME: This awaits implementation of isolate support. */ | ||
| 369 | case LRI: | 359 | case LRI: |
| 370 | case RLI: | 360 | case RLI: |
| 371 | case FSI: | 361 | case FSI: |
| 372 | case PDI: | 362 | case PDI: |
| 373 | #endif | ||
| 374 | return EXPLICIT_FORMATTING; | 363 | return EXPLICIT_FORMATTING; |
| 375 | default: | 364 | default: |
| 376 | emacs_abort (); | 365 | emacs_abort (); |
| @@ -453,7 +442,7 @@ bidi_push_embedding_level (struct bidi_it *bidi_it, | |||
| 453 | int level, bidi_dir_t override) | 442 | int level, bidi_dir_t override) |
| 454 | { | 443 | { |
| 455 | bidi_it->stack_idx++; | 444 | bidi_it->stack_idx++; |
| 456 | eassert (bidi_it->stack_idx < BIDI_MAXLEVEL); | 445 | eassert (bidi_it->stack_idx < BIDI_MAXDEPTH+2+1); |
| 457 | bidi_it->level_stack[bidi_it->stack_idx].level = level; | 446 | bidi_it->level_stack[bidi_it->stack_idx].level = level; |
| 458 | bidi_it->level_stack[bidi_it->stack_idx].override = override; | 447 | bidi_it->level_stack[bidi_it->stack_idx].override = override; |
| 459 | } | 448 | } |
| @@ -1244,6 +1233,50 @@ bidi_fetch_char (ptrdiff_t charpos, ptrdiff_t bytepos, ptrdiff_t *disp_pos, | |||
| 1244 | return ch; | 1233 | return ch; |
| 1245 | } | 1234 | } |
| 1246 | 1235 | ||
| 1236 | /* Like bidi_fetch_char, but ignore any text between an isolate | ||
| 1237 | initiator and its matching PDI or, if it has no matching PDI, the | ||
| 1238 | end of the paragraph. If isolates were skipped, CH_LEN and NCHARS | ||
| 1239 | are set to the number of bytes and characters between BYTEPOS/CHARPOS | ||
| 1240 | and the character that was fetched after skipping the isolates. */ | ||
| 1241 | static int | ||
| 1242 | bidi_fetch_char_skip_isolates (ptrdiff_t charpos, ptrdiff_t bytepos, | ||
| 1243 | ptrdiff_t *disp_pos, int *disp_prop, | ||
| 1244 | struct bidi_string_data *string, | ||
| 1245 | struct window *w, bool frame_window_p, | ||
| 1246 | ptrdiff_t *ch_len, ptrdiff_t *nchars) | ||
| 1247 | { | ||
| 1248 | ptrdiff_t orig_charpos = charpos, orig_bytepos = bytepos; | ||
| 1249 | int ch = bidi_fetch_char (charpos, bytepos, disp_pos, disp_prop, string, w, | ||
| 1250 | frame_window_p, ch_len, nchars); | ||
| 1251 | bidi_type_t ch_type = bidi_get_type (ch, NEUTRAL_DIR); | ||
| 1252 | ptrdiff_t level = 0; | ||
| 1253 | |||
| 1254 | if (ch_type == LRI || ch_type == RLI || ch_type == FSI) | ||
| 1255 | { | ||
| 1256 | level++; | ||
| 1257 | while (level > 0 && ch_type != NEUTRAL_B) | ||
| 1258 | { | ||
| 1259 | charpos += *nchars; | ||
| 1260 | bytepos += *ch_len; | ||
| 1261 | ch = bidi_fetch_char (charpos, bytepos, disp_pos, disp_prop, string, | ||
| 1262 | w, frame_window_p, ch_len, nchars); | ||
| 1263 | ch_type = bidi_get_type (ch, NEUTRAL_DIR); | ||
| 1264 | /* A Note to P2 says to ignore max_depth limit. */ | ||
| 1265 | if (ch_type == LRI || ch_type == RLI || ch_type == FSI) | ||
| 1266 | level++; | ||
| 1267 | else if (ch_type == PDI) | ||
| 1268 | level--; | ||
| 1269 | } | ||
| 1270 | } | ||
| 1271 | |||
| 1272 | /* Communicate to the caller how much did we skip, so it could get | ||
| 1273 | past the last character position we examined. */ | ||
| 1274 | *nchars += charpos - orig_charpos; | ||
| 1275 | *ch_len += bytepos - orig_bytepos; | ||
| 1276 | return ch; | ||
| 1277 | } | ||
| 1278 | |||
| 1279 | |||
| 1247 | 1280 | ||
| 1248 | /*********************************************************************** | 1281 | /*********************************************************************** |
| 1249 | Determining paragraph direction | 1282 | Determining paragraph direction |
| @@ -1478,17 +1511,15 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, bool no_default_p) | |||
| 1478 | bytepos = pstartbyte; | 1511 | bytepos = pstartbyte; |
| 1479 | if (!string_p) | 1512 | if (!string_p) |
| 1480 | pos = BYTE_TO_CHAR (bytepos); | 1513 | pos = BYTE_TO_CHAR (bytepos); |
| 1481 | ch = bidi_fetch_char (pos, bytepos, &disp_pos, &disp_prop, | 1514 | ch = bidi_fetch_char_skip_isolates (pos, bytepos, &disp_pos, &disp_prop, |
| 1482 | &bidi_it->string, bidi_it->w, | 1515 | &bidi_it->string, bidi_it->w, |
| 1483 | bidi_it->frame_window_p, &ch_len, &nchars); | 1516 | bidi_it->frame_window_p, &ch_len, |
| 1517 | &nchars); | ||
| 1484 | type = bidi_get_type (ch, NEUTRAL_DIR); | 1518 | type = bidi_get_type (ch, NEUTRAL_DIR); |
| 1485 | 1519 | ||
| 1486 | pos1 = pos; | 1520 | pos1 = pos; |
| 1487 | for (pos += nchars, bytepos += ch_len; | 1521 | for (pos += nchars, bytepos += ch_len; |
| 1488 | ((bidi_get_category (type) != STRONG) | 1522 | bidi_get_category (type) != STRONG |
| 1489 | || (bidi_ignore_explicit_marks_for_paragraph_level | ||
| 1490 | && (type == RLE || type == RLO | ||
| 1491 | || type == LRE || type == LRO))) | ||
| 1492 | /* Stop when searched too far into an abnormally large | 1523 | /* Stop when searched too far into an abnormally large |
| 1493 | paragraph full of weak or neutral characters. */ | 1524 | paragraph full of weak or neutral characters. */ |
| 1494 | && pos - pos1 < MAX_STRONG_CHAR_SEARCH; | 1525 | && pos - pos1 < MAX_STRONG_CHAR_SEARCH; |
| @@ -1506,19 +1537,17 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, bool no_default_p) | |||
| 1506 | && bidi_at_paragraph_end (pos, bytepos) >= -1) | 1537 | && bidi_at_paragraph_end (pos, bytepos) >= -1) |
| 1507 | break; | 1538 | break; |
| 1508 | /* Fetch next character and advance to get past it. */ | 1539 | /* Fetch next character and advance to get past it. */ |
| 1509 | ch = bidi_fetch_char (pos, bytepos, &disp_pos, | 1540 | ch = bidi_fetch_char_skip_isolates (pos, bytepos, &disp_pos, |
| 1510 | &disp_prop, &bidi_it->string, bidi_it->w, | 1541 | &disp_prop, &bidi_it->string, |
| 1511 | bidi_it->frame_window_p, &ch_len, &nchars); | 1542 | bidi_it->w, |
| 1543 | bidi_it->frame_window_p, | ||
| 1544 | &ch_len, &nchars); | ||
| 1512 | pos += nchars; | 1545 | pos += nchars; |
| 1513 | bytepos += ch_len; | 1546 | bytepos += ch_len; |
| 1514 | } | 1547 | } |
| 1515 | if ((type == STRONG_R || type == STRONG_AL) /* P3 */ | 1548 | if (type == STRONG_R || type == STRONG_AL) /* P3 */ |
| 1516 | || (!bidi_ignore_explicit_marks_for_paragraph_level | ||
| 1517 | && (type == RLO || type == RLE))) | ||
| 1518 | bidi_it->paragraph_dir = R2L; | 1549 | bidi_it->paragraph_dir = R2L; |
| 1519 | else if (type == STRONG_L | 1550 | else if (type == STRONG_L) |
| 1520 | || (!bidi_ignore_explicit_marks_for_paragraph_level | ||
| 1521 | && (type == LRO || type == LRE))) | ||
| 1522 | bidi_it->paragraph_dir = L2R; | 1551 | bidi_it->paragraph_dir = L2R; |
| 1523 | if (!string_p | 1552 | if (!string_p |
| 1524 | && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR) | 1553 | && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR) |
| @@ -1685,7 +1714,7 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it) | |||
| 1685 | type = WEAK_BN; /* X9/Retaining */ | 1714 | type = WEAK_BN; /* X9/Retaining */ |
| 1686 | if (bidi_it->ignore_bn_limit <= -1) | 1715 | if (bidi_it->ignore_bn_limit <= -1) |
| 1687 | { | 1716 | { |
| 1688 | if (current_level <= BIDI_MAXLEVEL - 4) | 1717 | if (current_level < BIDI_MAXDEPTH) |
| 1689 | { | 1718 | { |
| 1690 | /* Compute the least odd embedding level greater than | 1719 | /* Compute the least odd embedding level greater than |
| 1691 | the current level. */ | 1720 | the current level. */ |
| @@ -1694,7 +1723,7 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it) | |||
| 1694 | override = NEUTRAL_DIR; | 1723 | override = NEUTRAL_DIR; |
| 1695 | else | 1724 | else |
| 1696 | override = R2L; | 1725 | override = R2L; |
| 1697 | if (current_level == BIDI_MAXLEVEL - 4) | 1726 | if (current_level == BIDI_MAXDEPTH - 1) |
| 1698 | bidi_it->invalid_rl_levels = 0; | 1727 | bidi_it->invalid_rl_levels = 0; |
| 1699 | bidi_push_embedding_level (bidi_it, new_level, override); | 1728 | bidi_push_embedding_level (bidi_it, new_level, override); |
| 1700 | } | 1729 | } |
| @@ -1719,7 +1748,7 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it) | |||
| 1719 | type = WEAK_BN; /* X9/Retaining */ | 1748 | type = WEAK_BN; /* X9/Retaining */ |
| 1720 | if (bidi_it->ignore_bn_limit <= -1) | 1749 | if (bidi_it->ignore_bn_limit <= -1) |
| 1721 | { | 1750 | { |
| 1722 | if (current_level <= BIDI_MAXLEVEL - 5) | 1751 | if (current_level < BIDI_MAXDEPTH - 1) |
| 1723 | { | 1752 | { |
| 1724 | /* Compute the least even embedding level greater than | 1753 | /* Compute the least even embedding level greater than |
| 1725 | the current level. */ | 1754 | the current level. */ |
| @@ -2451,10 +2480,7 @@ bidi_level_of_next_char (struct bidi_it *bidi_it) | |||
| 2451 | do { | 2480 | do { |
| 2452 | ch = bidi_fetch_char (cpos += nc, bpos += clen, &disp_pos, &dpp, &bs, | 2481 | ch = bidi_fetch_char (cpos += nc, bpos += clen, &disp_pos, &dpp, &bs, |
| 2453 | bidi_it->w, fwp, &clen, &nc); | 2482 | bidi_it->w, fwp, &clen, &nc); |
| 2454 | if (ch == '\n' || ch == BIDI_EOB) | 2483 | chtype = bidi_get_type (ch, NEUTRAL_DIR); |
| 2455 | chtype = NEUTRAL_B; | ||
| 2456 | else | ||
| 2457 | chtype = bidi_get_type (ch, NEUTRAL_DIR); | ||
| 2458 | } while (chtype == NEUTRAL_WS || chtype == WEAK_BN | 2484 | } while (chtype == NEUTRAL_WS || chtype == WEAK_BN |
| 2459 | || bidi_explicit_dir_char (ch)); /* L1/Retaining */ | 2485 | || bidi_explicit_dir_char (ch)); /* L1/Retaining */ |
| 2460 | bidi_it->next_for_ws.type = chtype; | 2486 | bidi_it->next_for_ws.type = chtype; |
diff --git a/src/dispextern.h b/src/dispextern.h index ebd4260d408..9e6d27db1c1 100644 --- a/src/dispextern.h +++ b/src/dispextern.h | |||
| @@ -1857,7 +1857,9 @@ GLYPH_CODE_P (Lisp_Object gc) | |||
| 1857 | extern int face_change_count; | 1857 | extern int face_change_count; |
| 1858 | 1858 | ||
| 1859 | /* For reordering of bidirectional text. */ | 1859 | /* For reordering of bidirectional text. */ |
| 1860 | #define BIDI_MAXLEVEL 64 | 1860 | |
| 1861 | /* UAX#9's max_depth value. */ | ||
| 1862 | #define BIDI_MAXDEPTH 125 | ||
| 1861 | 1863 | ||
| 1862 | /* Data type for describing the bidirectional character types. The | 1864 | /* Data type for describing the bidirectional character types. The |
| 1863 | first 7 must be at the beginning, because they are the only values | 1865 | first 7 must be at the beginning, because they are the only values |
| @@ -1906,10 +1908,11 @@ struct bidi_saved_info { | |||
| 1906 | bidi_type_t orig_type; /* type as we found it in the buffer */ | 1908 | bidi_type_t orig_type; /* type as we found it in the buffer */ |
| 1907 | }; | 1909 | }; |
| 1908 | 1910 | ||
| 1909 | /* Data type for keeping track of saved embedding levels and override | 1911 | /* Data type for keeping track of saved embedding levels, override |
| 1910 | status information. */ | 1912 | status, and isolate status information. */ |
| 1911 | struct bidi_stack { | 1913 | struct bidi_stack { |
| 1912 | int level; | 1914 | char level; |
| 1915 | bool isolate_status; | ||
| 1913 | bidi_dir_t override; | 1916 | bidi_dir_t override; |
| 1914 | }; | 1917 | }; |
| 1915 | 1918 | ||
| @@ -1939,9 +1942,11 @@ struct bidi_it { | |||
| 1939 | resolving weak and neutral types */ | 1942 | resolving weak and neutral types */ |
| 1940 | bidi_type_t type_after_w1; /* original type, after overrides and W1 */ | 1943 | bidi_type_t type_after_w1; /* original type, after overrides and W1 */ |
| 1941 | bidi_type_t orig_type; /* original type, as found in the buffer */ | 1944 | bidi_type_t orig_type; /* original type, as found in the buffer */ |
| 1942 | int resolved_level; /* final resolved level of this character */ | 1945 | char resolved_level; /* final resolved level of this character */ |
| 1943 | int invalid_levels; /* how many PDFs to ignore */ | 1946 | char isolate_level; /* count of isolate initiators unmatched by PDI */ |
| 1944 | int invalid_rl_levels; /* how many PDFs from RLE/RLO to ignore */ | 1947 | ptrdiff_t invalid_levels; /* how many PDFs to ignore */ |
| 1948 | ptrdiff_t invalid_rl_levels; /* how many PDFs from RLE/RLO to ignore */ | ||
| 1949 | ptrdiff_t invalid_isolates; /* how many PDIs to ignore */ | ||
| 1945 | struct bidi_saved_info prev; /* info about previous character */ | 1950 | struct bidi_saved_info prev; /* info about previous character */ |
| 1946 | struct bidi_saved_info last_strong; /* last-seen strong directional char */ | 1951 | struct bidi_saved_info last_strong; /* last-seen strong directional char */ |
| 1947 | struct bidi_saved_info next_for_neutral; /* surrounding characters for... */ | 1952 | struct bidi_saved_info next_for_neutral; /* surrounding characters for... */ |
| @@ -1960,7 +1965,7 @@ struct bidi_it { | |||
| 1960 | /* Note: Everything from here on is not copied/saved when the bidi | 1965 | /* Note: Everything from here on is not copied/saved when the bidi |
| 1961 | iterator state is saved, pushed, or popped. So only put here | 1966 | iterator state is saved, pushed, or popped. So only put here |
| 1962 | stuff that is not part of the bidi iterator's state! */ | 1967 | stuff that is not part of the bidi iterator's state! */ |
| 1963 | struct bidi_stack level_stack[BIDI_MAXLEVEL]; /* stack of embedding levels */ | 1968 | struct bidi_stack level_stack[BIDI_MAXDEPTH+2+1]; /* directional status stack */ |
| 1964 | struct bidi_string_data string; /* string to reorder */ | 1969 | struct bidi_string_data string; /* string to reorder */ |
| 1965 | struct window *w; /* the window being displayed */ | 1970 | struct window *w; /* the window being displayed */ |
| 1966 | bidi_dir_t paragraph_dir; /* current paragraph direction */ | 1971 | bidi_dir_t paragraph_dir; /* current paragraph direction */ |