diff options
| author | Eli Zaretskii | 2014-08-29 17:36:05 +0300 |
|---|---|---|
| committer | Eli Zaretskii | 2014-08-29 17:36:05 +0300 |
| commit | 5d84f5d6985b2f9d146519d3c8a2147fa3d9aafd (patch) | |
| tree | ab40627c5c6559be2f1faea163deec6a746109e1 | |
| parent | d9abb2814d772b1efe89a9e665157d322d690d18 (diff) | |
| download | emacs-5d84f5d6985b2f9d146519d3c8a2147fa3d9aafd.tar.gz emacs-5d84f5d6985b2f9d146519d3c8a2147fa3d9aafd.zip | |
Fix find_first_strong_char and implement Unicode 8.0 isolate corrections.
find_first_strong_char accepts 1 additional argument, which says to stop
at the PDI that matches the FSI where we started; callers changed.
Implemented suggested Unicode 8.0 corrections for type of RLI/LRI/PDI.
| -rw-r--r-- | src/bidi.c | 69 |
1 files changed, 57 insertions, 12 deletions
diff --git a/src/bidi.c b/src/bidi.c index ca5b950d6d3..e01b77d0e96 100644 --- a/src/bidi.c +++ b/src/bidi.c | |||
| @@ -1398,18 +1398,38 @@ bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte) | |||
| 1398 | bidi_paragraph_init to less than 10 ms even on slow machines. */ | 1398 | bidi_paragraph_init to less than 10 ms even on slow machines. */ |
| 1399 | #define MAX_STRONG_CHAR_SEARCH 100000 | 1399 | #define MAX_STRONG_CHAR_SEARCH 100000 |
| 1400 | 1400 | ||
| 1401 | /* Starting from POS, find the first strong (L, R, or AL) character, | ||
| 1402 | while skipping over any characters between an isolate initiator and | ||
| 1403 | its matching PDI. STOP_AT_PDI non-zero means stop at the PDI that | ||
| 1404 | matches the isolate initiator at POS. Return the bidi type of the | ||
| 1405 | character where the search stopped. Give up if after examining | ||
| 1406 | MAX_STRONG_CHAR_SEARCH buffer or string positions no strong | ||
| 1407 | character was found. */ | ||
| 1401 | static bidi_type_t | 1408 | static bidi_type_t |
| 1402 | find_first_strong_char (ptrdiff_t pos, ptrdiff_t bytepos, ptrdiff_t end, | 1409 | find_first_strong_char (ptrdiff_t pos, ptrdiff_t bytepos, ptrdiff_t end, |
| 1403 | ptrdiff_t *disp_pos, int *disp_prop, | 1410 | ptrdiff_t *disp_pos, int *disp_prop, |
| 1404 | struct bidi_string_data *string, struct window *w, | 1411 | struct bidi_string_data *string, struct window *w, |
| 1405 | bool string_p, bool frame_window_p, | 1412 | bool string_p, bool frame_window_p, |
| 1406 | ptrdiff_t *ch_len, ptrdiff_t *nchars) | 1413 | ptrdiff_t *ch_len, ptrdiff_t *nchars, bool stop_at_pdi) |
| 1407 | { | 1414 | { |
| 1408 | ptrdiff_t pos1; | 1415 | ptrdiff_t pos1; |
| 1409 | bidi_type_t type; | 1416 | bidi_type_t type; |
| 1410 | const unsigned char *s; | 1417 | const unsigned char *s; |
| 1411 | int ch; | 1418 | int ch; |
| 1412 | 1419 | ||
| 1420 | if (stop_at_pdi) | ||
| 1421 | { | ||
| 1422 | /* If STOP_AT_PDI is non-zero, we must have been called with FSI | ||
| 1423 | at POS. Get past it. */ | ||
| 1424 | #ifdef ENABLE_CHECKING | ||
| 1425 | ch = bidi_fetch_char (pos, bytepos, disp_pos, disp_prop, string, w, | ||
| 1426 | frame_window_p, ch_len, nchars); | ||
| 1427 | type = bidi_get_type (ch, NEUTRAL_DIR); | ||
| 1428 | eassert (type == FSI /* || type == LRI || type == RLI */); | ||
| 1429 | #endif | ||
| 1430 | pos += *nchars; | ||
| 1431 | bytepos += *ch_len; | ||
| 1432 | } | ||
| 1413 | ch = bidi_fetch_char_skip_isolates (pos, bytepos, disp_pos, disp_prop, string, | 1433 | ch = bidi_fetch_char_skip_isolates (pos, bytepos, disp_pos, disp_prop, string, |
| 1414 | w, frame_window_p, ch_len, nchars); | 1434 | w, frame_window_p, ch_len, nchars); |
| 1415 | type = bidi_get_type (ch, NEUTRAL_DIR); | 1435 | type = bidi_get_type (ch, NEUTRAL_DIR); |
| @@ -1417,6 +1437,8 @@ find_first_strong_char (ptrdiff_t pos, ptrdiff_t bytepos, ptrdiff_t end, | |||
| 1417 | pos1 = pos; | 1437 | pos1 = pos; |
| 1418 | for (pos += *nchars, bytepos += *ch_len; | 1438 | for (pos += *nchars, bytepos += *ch_len; |
| 1419 | bidi_get_category (type) != STRONG | 1439 | bidi_get_category (type) != STRONG |
| 1440 | /* If requested to stop at first PDI, stop there. */ | ||
| 1441 | && !(stop_at_pdi && type == PDI) | ||
| 1420 | /* Stop when searched too far into an abnormally large | 1442 | /* Stop when searched too far into an abnormally large |
| 1421 | paragraph full of weak or neutral characters. */ | 1443 | paragraph full of weak or neutral characters. */ |
| 1422 | && pos - pos1 < MAX_STRONG_CHAR_SEARCH; | 1444 | && pos - pos1 < MAX_STRONG_CHAR_SEARCH; |
| @@ -1440,6 +1462,7 @@ find_first_strong_char (ptrdiff_t pos, ptrdiff_t bytepos, ptrdiff_t end, | |||
| 1440 | pos += *nchars; | 1462 | pos += *nchars; |
| 1441 | bytepos += *ch_len; | 1463 | bytepos += *ch_len; |
| 1442 | } | 1464 | } |
| 1465 | return type; | ||
| 1443 | } | 1466 | } |
| 1444 | 1467 | ||
| 1445 | /* Determine the base direction, a.k.a. base embedding level, of the | 1468 | /* Determine the base direction, a.k.a. base embedding level, of the |
| @@ -1545,7 +1568,7 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, bool no_default_p) | |||
| 1545 | type = find_first_strong_char (pos, bytepos, end, &disp_pos, &disp_prop, | 1568 | type = find_first_strong_char (pos, bytepos, end, &disp_pos, &disp_prop, |
| 1546 | &bidi_it->string, bidi_it->w, | 1569 | &bidi_it->string, bidi_it->w, |
| 1547 | string_p, bidi_it->frame_window_p, | 1570 | string_p, bidi_it->frame_window_p, |
| 1548 | &ch_len, &nchars); | 1571 | &ch_len, &nchars, false); |
| 1549 | if (type == STRONG_R || type == STRONG_AL) /* P3 */ | 1572 | if (type == STRONG_R || type == STRONG_AL) /* P3 */ |
| 1550 | bidi_it->paragraph_dir = R2L; | 1573 | bidi_it->paragraph_dir = R2L; |
| 1551 | else if (type == STRONG_L) | 1574 | else if (type == STRONG_L) |
| @@ -1619,7 +1642,7 @@ static int | |||
| 1619 | bidi_resolve_explicit_1 (struct bidi_it *bidi_it) | 1642 | bidi_resolve_explicit_1 (struct bidi_it *bidi_it) |
| 1620 | { | 1643 | { |
| 1621 | int curchar; | 1644 | int curchar; |
| 1622 | bidi_type_t type; | 1645 | bidi_type_t type, typ1; |
| 1623 | int current_level; | 1646 | int current_level; |
| 1624 | int new_level; | 1647 | int new_level; |
| 1625 | bidi_dir_t override; | 1648 | bidi_dir_t override; |
| @@ -1775,19 +1798,28 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it) | |||
| 1775 | type = WEAK_EN; | 1798 | type = WEAK_EN; |
| 1776 | break; | 1799 | break; |
| 1777 | case FSI: /* X5c */ | 1800 | case FSI: /* X5c */ |
| 1778 | bidi_it->type_after_w1 = type; | ||
| 1779 | end = string_p ? bidi_it->string.schars : ZV; | 1801 | end = string_p ? bidi_it->string.schars : ZV; |
| 1780 | disp_pos = bidi_it->disp_pos; | 1802 | disp_pos = bidi_it->disp_pos; |
| 1781 | disp_prop = bidi_it->disp_prop; | 1803 | disp_prop = bidi_it->disp_prop; |
| 1782 | type = find_first_strong_char (bidi_it->charpos, bidi_it->bytepos, end, | 1804 | nchars = bidi_it->nchars; |
| 1805 | ch_len = bidi_it->ch_len; | ||
| 1806 | typ1 = find_first_strong_char (bidi_it->charpos, bidi_it->bytepos, end, | ||
| 1783 | &disp_pos, &disp_prop, &bidi_it->string, | 1807 | &disp_pos, &disp_prop, &bidi_it->string, |
| 1784 | bidi_it->w, string_p, bidi_it->frame_window_p, | 1808 | bidi_it->w, string_p, bidi_it->frame_window_p, |
| 1785 | &ch_len, &nchars); | 1809 | &ch_len, &nchars, true); |
| 1786 | if (type != STRONG_R && type != STRONG_AL) | 1810 | if (typ1 != STRONG_R && typ1 != STRONG_AL) |
| 1787 | goto fsi_as_lri; | 1811 | { |
| 1812 | type = LRI; | ||
| 1813 | goto fsi_as_lri; | ||
| 1814 | } | ||
| 1815 | else | ||
| 1816 | type == RLI; | ||
| 1788 | /* FALLTHROUGH */ | 1817 | /* FALLTHROUGH */ |
| 1789 | case RLI: /* X5a */ | 1818 | case RLI: /* X5a */ |
| 1790 | bidi_it->type_after_w1 = type; | 1819 | if (override == NEUTRAL_DIR) |
| 1820 | bidi_it->type_after_w1 = type; | ||
| 1821 | else /* Unicode 8.0 correction. */ | ||
| 1822 | bidi_it->type_after_w1 = (override == L2R ? STRONG_L : STRONG_R); | ||
| 1791 | bidi_check_type (bidi_it->type_after_w1); | 1823 | bidi_check_type (bidi_it->type_after_w1); |
| 1792 | if (current_level < BIDI_MAXDEPTH | 1824 | if (current_level < BIDI_MAXDEPTH |
| 1793 | && bidi_it->invalid_levels == 0 | 1825 | && bidi_it->invalid_levels == 0 |
| @@ -1802,7 +1834,10 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it) | |||
| 1802 | break; | 1834 | break; |
| 1803 | case LRI: /* X5b */ | 1835 | case LRI: /* X5b */ |
| 1804 | fsi_as_lri: | 1836 | fsi_as_lri: |
| 1805 | bidi_it->type_after_w1 = type; | 1837 | if (override == NEUTRAL_DIR) |
| 1838 | bidi_it->type_after_w1 = type; | ||
| 1839 | else /* Unicode 8.0 correction. */ | ||
| 1840 | bidi_it->type_after_w1 = (override == L2R ? STRONG_L : STRONG_R); | ||
| 1806 | bidi_check_type (bidi_it->type_after_w1); | 1841 | bidi_check_type (bidi_it->type_after_w1); |
| 1807 | if (current_level < BIDI_MAXDEPTH - 1 | 1842 | if (current_level < BIDI_MAXDEPTH - 1 |
| 1808 | && bidi_it->invalid_levels == 0 | 1843 | && bidi_it->invalid_levels == 0 |
| @@ -1817,7 +1852,10 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it) | |||
| 1817 | break; | 1852 | break; |
| 1818 | case PDI: /* X6a */ | 1853 | case PDI: /* X6a */ |
| 1819 | if (bidi_it->invalid_isolates) | 1854 | if (bidi_it->invalid_isolates) |
| 1820 | bidi_it->invalid_isolates--; | 1855 | { |
| 1856 | bidi_it->invalid_isolates--; | ||
| 1857 | new_level = current_level; | ||
| 1858 | } | ||
| 1821 | else if (bidi_it->isolate_level > 0) | 1859 | else if (bidi_it->isolate_level > 0) |
| 1822 | { | 1860 | { |
| 1823 | bidi_it->invalid_levels = 0; | 1861 | bidi_it->invalid_levels = 0; |
| @@ -1825,9 +1863,16 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it) | |||
| 1825 | bidi_pop_embedding_level (bidi_it); | 1863 | bidi_pop_embedding_level (bidi_it); |
| 1826 | eassert (bidi_it->stack_idx > 0); | 1864 | eassert (bidi_it->stack_idx > 0); |
| 1827 | new_level = bidi_pop_embedding_level (bidi_it); | 1865 | new_level = bidi_pop_embedding_level (bidi_it); |
| 1828 | bidi_it->resolved_level = new_level; | ||
| 1829 | bidi_it->isolate_level--; | 1866 | bidi_it->isolate_level--; |
| 1830 | } | 1867 | } |
| 1868 | bidi_it->resolved_level = new_level; | ||
| 1869 | /* Unicode 8.0 correction. */ | ||
| 1870 | if (bidi_it->level_stack[bidi_it->stack_idx].override == L2R) | ||
| 1871 | bidi_it->type_after_w1 = STRONG_L; | ||
| 1872 | else if (bidi_it->level_stack[bidi_it->stack_idx].override == R2L) | ||
| 1873 | bidi_it->type_after_w1 = STRONG_R; | ||
| 1874 | else | ||
| 1875 | bidi_it->type_after_w1 = type; | ||
| 1831 | break; | 1876 | break; |
| 1832 | case PDF: /* X7 */ | 1877 | case PDF: /* X7 */ |
| 1833 | bidi_it->type_after_w1 = type; | 1878 | bidi_it->type_after_w1 = type; |