diff options
| author | Paul Eggert | 2011-10-22 23:38:24 -0700 |
|---|---|---|
| committer | Paul Eggert | 2011-10-22 23:38:24 -0700 |
| commit | cfc09582247ffef6a46b6249e2fba9136a62d21e (patch) | |
| tree | 50e102f64a2b88c692d9110990abd416c78c32f0 /src/bidi.c | |
| parent | 92c938895c639463681ae1c58a944cae62b70b87 (diff) | |
| parent | 86c606818495d9411fd5d6b1477f9a097eb18020 (diff) | |
| download | emacs-cfc09582247ffef6a46b6249e2fba9136a62d21e.tar.gz emacs-cfc09582247ffef6a46b6249e2fba9136a62d21e.zip | |
Merge from trunk.
Diffstat (limited to 'src/bidi.c')
| -rw-r--r-- | src/bidi.c | 99 |
1 files changed, 76 insertions, 23 deletions
diff --git a/src/bidi.c b/src/bidi.c index c6d7db96576..e8f2df89a9e 100644 --- a/src/bidi.c +++ b/src/bidi.c | |||
| @@ -846,7 +846,10 @@ bidi_line_init (struct bidi_it *bidi_it) | |||
| 846 | bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */ | 846 | bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */ |
| 847 | bidi_it->invalid_levels = 0; | 847 | bidi_it->invalid_levels = 0; |
| 848 | bidi_it->invalid_rl_levels = -1; | 848 | bidi_it->invalid_rl_levels = -1; |
| 849 | bidi_it->next_en_pos = -1; | 849 | /* Setting this to zero will force its recomputation the first time |
| 850 | we need it for W5. */ | ||
| 851 | bidi_it->next_en_pos = 0; | ||
| 852 | bidi_it->next_en_type = UNKNOWN_BT; | ||
| 850 | bidi_it->next_for_ws.type = UNKNOWN_BT; | 853 | bidi_it->next_for_ws.type = UNKNOWN_BT; |
| 851 | bidi_set_sor_type (bidi_it, | 854 | bidi_set_sor_type (bidi_it, |
| 852 | (bidi_it->paragraph_dir == R2L ? 1 : 0), | 855 | (bidi_it->paragraph_dir == R2L ? 1 : 0), |
| @@ -1435,7 +1438,8 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it) | |||
| 1435 | } | 1438 | } |
| 1436 | } | 1439 | } |
| 1437 | else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */ | 1440 | else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */ |
| 1438 | || bidi_it->next_en_pos > bidi_it->charpos) | 1441 | || (bidi_it->next_en_pos > bidi_it->charpos |
| 1442 | && bidi_it->next_en_type == WEAK_EN)) | ||
| 1439 | type = WEAK_EN; | 1443 | type = WEAK_EN; |
| 1440 | break; | 1444 | break; |
| 1441 | case LRE: /* X3 */ | 1445 | case LRE: /* X3 */ |
| @@ -1471,7 +1475,8 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it) | |||
| 1471 | } | 1475 | } |
| 1472 | } | 1476 | } |
| 1473 | else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */ | 1477 | else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */ |
| 1474 | || bidi_it->next_en_pos > bidi_it->charpos) | 1478 | || (bidi_it->next_en_pos > bidi_it->charpos |
| 1479 | && bidi_it->next_en_type == WEAK_EN)) | ||
| 1475 | type = WEAK_EN; | 1480 | type = WEAK_EN; |
| 1476 | break; | 1481 | break; |
| 1477 | case PDF: /* X7 */ | 1482 | case PDF: /* X7 */ |
| @@ -1497,7 +1502,8 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it) | |||
| 1497 | } | 1502 | } |
| 1498 | } | 1503 | } |
| 1499 | else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */ | 1504 | else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */ |
| 1500 | || bidi_it->next_en_pos > bidi_it->charpos) | 1505 | || (bidi_it->next_en_pos > bidi_it->charpos |
| 1506 | && bidi_it->next_en_type == WEAK_EN)) | ||
| 1501 | type = WEAK_EN; | 1507 | type = WEAK_EN; |
| 1502 | break; | 1508 | break; |
| 1503 | default: | 1509 | default: |
| @@ -1729,10 +1735,15 @@ bidi_resolve_weak (struct bidi_it *bidi_it) | |||
| 1729 | else if (type == WEAK_ET /* W5: ET with EN before or after it */ | 1735 | else if (type == WEAK_ET /* W5: ET with EN before or after it */ |
| 1730 | || type == WEAK_BN) /* W5/Retaining */ | 1736 | || type == WEAK_BN) /* W5/Retaining */ |
| 1731 | { | 1737 | { |
| 1732 | if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */ | 1738 | if (bidi_it->prev.type_after_w1 == WEAK_EN) /* ET/BN w/EN before it */ |
| 1733 | || bidi_it->next_en_pos > bidi_it->charpos) | ||
| 1734 | type = WEAK_EN; | 1739 | type = WEAK_EN; |
| 1735 | else /* W5: ET/BN with EN after it. */ | 1740 | else if (bidi_it->next_en_pos > bidi_it->charpos |
| 1741 | && bidi_it->next_en_type != WEAK_BN) | ||
| 1742 | { | ||
| 1743 | if (bidi_it->next_en_type == WEAK_EN) /* ET/BN with EN after it */ | ||
| 1744 | type = WEAK_EN; | ||
| 1745 | } | ||
| 1746 | else if (bidi_it->next_en_pos >=0) | ||
| 1736 | { | 1747 | { |
| 1737 | ptrdiff_t en_pos = bidi_it->charpos + bidi_it->nchars; | 1748 | ptrdiff_t en_pos = bidi_it->charpos + bidi_it->nchars; |
| 1738 | const unsigned char *s = (STRINGP (bidi_it->string.lstring) | 1749 | const unsigned char *s = (STRINGP (bidi_it->string.lstring) |
| @@ -1761,20 +1772,27 @@ bidi_resolve_weak (struct bidi_it *bidi_it) | |||
| 1761 | en_pos = bidi_it->charpos; | 1772 | en_pos = bidi_it->charpos; |
| 1762 | bidi_copy_it (bidi_it, &saved_it); | 1773 | bidi_copy_it (bidi_it, &saved_it); |
| 1763 | } | 1774 | } |
| 1775 | /* Remember this position, to speed up processing of the | ||
| 1776 | next ETs. */ | ||
| 1777 | bidi_it->next_en_pos = en_pos; | ||
| 1764 | if (type_of_next == WEAK_EN) | 1778 | if (type_of_next == WEAK_EN) |
| 1765 | { | 1779 | { |
| 1766 | /* If the last strong character is AL, the EN we've | 1780 | /* If the last strong character is AL, the EN we've |
| 1767 | found will become AN when we get to it (W2). */ | 1781 | found will become AN when we get to it (W2). */ |
| 1768 | if (bidi_it->last_strong.type_after_w1 != STRONG_AL) | 1782 | if (bidi_it->last_strong.type_after_w1 == STRONG_AL) |
| 1769 | { | 1783 | type_of_next = WEAK_AN; |
| 1770 | type = WEAK_EN; | ||
| 1771 | /* Remember this EN position, to speed up processing | ||
| 1772 | of the next ETs. */ | ||
| 1773 | bidi_it->next_en_pos = en_pos; | ||
| 1774 | } | ||
| 1775 | else if (type == WEAK_BN) | 1784 | else if (type == WEAK_BN) |
| 1776 | type = NEUTRAL_ON; /* W6/Retaining */ | 1785 | type = NEUTRAL_ON; /* W6/Retaining */ |
| 1786 | else | ||
| 1787 | type = WEAK_EN; | ||
| 1777 | } | 1788 | } |
| 1789 | else if (type_of_next == NEUTRAL_B) | ||
| 1790 | /* Record the fact that there are no more ENs from | ||
| 1791 | here to the end of paragraph, to avoid entering the | ||
| 1792 | loop above ever again in this paragraph. */ | ||
| 1793 | bidi_it->next_en_pos = -1; | ||
| 1794 | /* Record the type of the character where we ended our search. */ | ||
| 1795 | bidi_it->next_en_type = type_of_next; | ||
| 1778 | } | 1796 | } |
| 1779 | } | 1797 | } |
| 1780 | } | 1798 | } |
| @@ -1843,13 +1861,45 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) | |||
| 1843 | || type == NEUTRAL_ON)) | 1861 | || type == NEUTRAL_ON)) |
| 1844 | abort (); | 1862 | abort (); |
| 1845 | 1863 | ||
| 1846 | if (bidi_get_category (type) == NEUTRAL | 1864 | if ((type != NEUTRAL_B /* Don't risk entering the long loop below if |
| 1865 | we are already at paragraph end. */ | ||
| 1866 | && bidi_get_category (type) == NEUTRAL) | ||
| 1847 | || (type == WEAK_BN && prev_level == current_level)) | 1867 | || (type == WEAK_BN && prev_level == current_level)) |
| 1848 | { | 1868 | { |
| 1849 | if (bidi_it->next_for_neutral.type != UNKNOWN_BT) | 1869 | if (bidi_it->next_for_neutral.type != UNKNOWN_BT) |
| 1850 | type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, | 1870 | type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, |
| 1851 | bidi_it->next_for_neutral.type, | 1871 | bidi_it->next_for_neutral.type, |
| 1852 | current_level); | 1872 | current_level); |
| 1873 | /* The next two "else if" clauses are shortcuts for the | ||
| 1874 | important special case when we have a long sequence of | ||
| 1875 | neutral or WEAK_BN characters, such as whitespace or nulls or | ||
| 1876 | other control characters, on the base embedding level of the | ||
| 1877 | paragraph, and that sequence goes all the way to the end of | ||
| 1878 | the paragraph and follows a character whose resolved | ||
| 1879 | directionality is identical to the base embedding level. | ||
| 1880 | (This is what happens in a buffer with plain L2R text that | ||
| 1881 | happens to include long sequences of control characters.) By | ||
| 1882 | virtue of N1, the result of examining this long sequence will | ||
| 1883 | always be either STRONG_L or STRONG_R, depending on the base | ||
| 1884 | embedding level. So we use this fact directly instead of | ||
| 1885 | entering the expensive loop in the "else" clause. */ | ||
| 1886 | else if (current_level == 0 | ||
| 1887 | && bidi_it->prev_for_neutral.type == STRONG_L | ||
| 1888 | && !bidi_explicit_dir_char (bidi_it->ch)) | ||
| 1889 | type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, | ||
| 1890 | STRONG_L, current_level); | ||
| 1891 | else if (/* current level is 1 */ | ||
| 1892 | current_level == 1 | ||
| 1893 | /* base embedding level is also 1 */ | ||
| 1894 | && bidi_it->level_stack[0].level == 1 | ||
| 1895 | /* previous character is one of those considered R for | ||
| 1896 | the purposes of W5 */ | ||
| 1897 | && (bidi_it->prev_for_neutral.type == STRONG_R | ||
| 1898 | || bidi_it->prev_for_neutral.type == WEAK_EN | ||
| 1899 | || bidi_it->prev_for_neutral.type == WEAK_AN) | ||
| 1900 | && !bidi_explicit_dir_char (bidi_it->ch)) | ||
| 1901 | type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, | ||
| 1902 | STRONG_R, current_level); | ||
| 1853 | else | 1903 | else |
| 1854 | { | 1904 | { |
| 1855 | /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in | 1905 | /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in |
| @@ -1900,6 +1950,9 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) | |||
| 1900 | case STRONG_L: | 1950 | case STRONG_L: |
| 1901 | case STRONG_R: | 1951 | case STRONG_R: |
| 1902 | case STRONG_AL: | 1952 | case STRONG_AL: |
| 1953 | /* Actually, STRONG_AL cannot happen here, because | ||
| 1954 | bidi_resolve_weak converts it to STRONG_R, per W3. */ | ||
| 1955 | xassert (type != STRONG_AL); | ||
| 1903 | next_type = type; | 1956 | next_type = type; |
| 1904 | break; | 1957 | break; |
| 1905 | case WEAK_EN: | 1958 | case WEAK_EN: |
| @@ -1907,7 +1960,6 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) | |||
| 1907 | /* N1: ``European and Arabic numbers are treated as | 1960 | /* N1: ``European and Arabic numbers are treated as |
| 1908 | though they were R.'' */ | 1961 | though they were R.'' */ |
| 1909 | next_type = STRONG_R; | 1962 | next_type = STRONG_R; |
| 1910 | saved_it.next_for_neutral.type = STRONG_R; | ||
| 1911 | break; | 1963 | break; |
| 1912 | case WEAK_BN: | 1964 | case WEAK_BN: |
| 1913 | if (!bidi_explicit_dir_char (bidi_it->ch)) | 1965 | if (!bidi_explicit_dir_char (bidi_it->ch)) |
| @@ -1920,11 +1972,7 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) | |||
| 1920 | member. */ | 1972 | member. */ |
| 1921 | if (saved_it.type != WEAK_BN | 1973 | if (saved_it.type != WEAK_BN |
| 1922 | || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL) | 1974 | || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL) |
| 1923 | { | 1975 | next_type = bidi_it->prev_for_neutral.type; |
| 1924 | next_type = bidi_it->prev_for_neutral.type; | ||
| 1925 | saved_it.next_for_neutral.type = next_type; | ||
| 1926 | bidi_check_type (next_type); | ||
| 1927 | } | ||
| 1928 | else | 1976 | else |
| 1929 | { | 1977 | { |
| 1930 | /* This is a BN which does not adjoin neutrals. | 1978 | /* This is a BN which does not adjoin neutrals. |
| @@ -1938,7 +1986,9 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) | |||
| 1938 | } | 1986 | } |
| 1939 | type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type, | 1987 | type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type, |
| 1940 | next_type, current_level); | 1988 | next_type, current_level); |
| 1989 | saved_it.next_for_neutral.type = next_type; | ||
| 1941 | saved_it.type = type; | 1990 | saved_it.type = type; |
| 1991 | bidi_check_type (next_type); | ||
| 1942 | bidi_check_type (type); | 1992 | bidi_check_type (type); |
| 1943 | bidi_copy_it (bidi_it, &saved_it); | 1993 | bidi_copy_it (bidi_it, &saved_it); |
| 1944 | } | 1994 | } |
| @@ -2014,7 +2064,10 @@ bidi_level_of_next_char (struct bidi_it *bidi_it) | |||
| 2014 | bidi_it->next_for_neutral.type = UNKNOWN_BT; | 2064 | bidi_it->next_for_neutral.type = UNKNOWN_BT; |
| 2015 | if (bidi_it->next_en_pos >= 0 | 2065 | if (bidi_it->next_en_pos >= 0 |
| 2016 | && bidi_it->charpos >= bidi_it->next_en_pos) | 2066 | && bidi_it->charpos >= bidi_it->next_en_pos) |
| 2017 | bidi_it->next_en_pos = -1; | 2067 | { |
| 2068 | bidi_it->next_en_pos = 0; | ||
| 2069 | bidi_it->next_en_type = UNKNOWN_BT; | ||
| 2070 | } | ||
| 2018 | if (bidi_it->next_for_ws.type != UNKNOWN_BT | 2071 | if (bidi_it->next_for_ws.type != UNKNOWN_BT |
| 2019 | && bidi_it->charpos >= bidi_it->next_for_ws.charpos) | 2072 | && bidi_it->charpos >= bidi_it->next_for_ws.charpos) |
| 2020 | bidi_it->next_for_ws.type = UNKNOWN_BT; | 2073 | bidi_it->next_for_ws.type = UNKNOWN_BT; |
| @@ -2140,7 +2193,7 @@ bidi_level_of_next_char (struct bidi_it *bidi_it) | |||
| 2140 | } | 2193 | } |
| 2141 | 2194 | ||
| 2142 | /* Resolve implicit levels, with a twist: PDFs get the embedding | 2195 | /* Resolve implicit levels, with a twist: PDFs get the embedding |
| 2143 | level of the enbedding they terminate. See below for the | 2196 | level of the embedding they terminate. See below for the |
| 2144 | reason. */ | 2197 | reason. */ |
| 2145 | if (bidi_it->orig_type == PDF | 2198 | if (bidi_it->orig_type == PDF |
| 2146 | /* Don't do this if this formatting code didn't change the | 2199 | /* Don't do this if this formatting code didn't change the |