diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/ChangeLog | 19 | ||||
| -rw-r--r-- | src/bidi.c | 59 |
2 files changed, 68 insertions, 10 deletions
diff --git a/src/ChangeLog b/src/ChangeLog index 07b0418b399..d21e6383764 100644 --- a/src/ChangeLog +++ b/src/ChangeLog | |||
| @@ -1,3 +1,22 @@ | |||
| 1 | 2011-10-18 Eli Zaretskii <eliz@gnu.org> | ||
| 2 | |||
| 3 | Fix part 3 of bug#9771. | ||
| 4 | * bidi.c (bidi_line_init): Initialize next_en_pos to zero, not -1. | ||
| 5 | (bidi_resolve_neutral): Don't enter the expensive loop looking for | ||
| 6 | non-neutral characters if the current character is a paragraph | ||
| 7 | separator (a.k.a. Newline). This avoids running the same | ||
| 8 | expensive loop twice, once when we consume the preceding newline | ||
| 9 | and the other time when the line actually needs to be displayed. | ||
| 10 | Avoid the loop when we see neutrals on the base embedding level | ||
| 11 | following a character whose directionality is the same as the | ||
| 12 | paragraph's. This avoids running the expensive loop when a line | ||
| 13 | ends in a long sequence of neutrals, like control characters. | ||
| 14 | Add assertion against STRONG_AL type. Slightly rearrange code | ||
| 15 | that determines the type of a neutral given the first non-neutral | ||
| 16 | that follows it. | ||
| 17 | (bidi_level_of_next_char): Set next_en_pos to zero when | ||
| 18 | invalidating its info. | ||
| 19 | |||
| 1 | 2011-10-17 Eli Zaretskii <eliz@gnu.org> | 20 | 2011-10-17 Eli Zaretskii <eliz@gnu.org> |
| 2 | 21 | ||
| 3 | * xdisp.c (push_display_prop): Determine whether to record string | 22 | * xdisp.c (push_display_prop): Determine whether to record string |
diff --git a/src/bidi.c b/src/bidi.c index f06c146ca84..29e3c817318 100644 --- a/src/bidi.c +++ b/src/bidi.c | |||
| @@ -846,7 +846,9 @@ bidi_line_init (struct bidi_it *bidi_it) | |||
| 846 | bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */ | 846 | bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */ |
| 847 | bidi_it->invalid_levels = 0; | 847 | bidi_it->invalid_levels = 0; |
| 848 | bidi_it->invalid_rl_levels = -1; | 848 | bidi_it->invalid_rl_levels = -1; |
| 849 | bidi_it->next_en_pos = -1; | 849 | /* Setting this to zero will force its recomputation the first time |
| 850 | we need it for W5. */ | ||
| 851 | bidi_it->next_en_pos = 0; | ||
| 850 | bidi_it->next_for_ws.type = UNKNOWN_BT; | 852 | bidi_it->next_for_ws.type = UNKNOWN_BT; |
| 851 | bidi_set_sor_type (bidi_it, | 853 | bidi_set_sor_type (bidi_it, |
| 852 | (bidi_it->paragraph_dir == R2L ? 1 : 0), | 854 | (bidi_it->paragraph_dir == R2L ? 1 : 0), |
| @@ -1732,7 +1734,7 @@ bidi_resolve_weak (struct bidi_it *bidi_it) | |||
| 1732 | if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */ | 1734 | if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */ |
| 1733 | || bidi_it->next_en_pos > bidi_it->charpos) | 1735 | || bidi_it->next_en_pos > bidi_it->charpos) |
| 1734 | type = WEAK_EN; | 1736 | type = WEAK_EN; |
| 1735 | else /* W5: ET/BN with EN after it. */ | 1737 | else if (bidi_it->next_en_pos >=0) /* W5: ET/BN with EN after it. */ |
| 1736 | { | 1738 | { |
| 1737 | EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars; | 1739 | EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars; |
| 1738 | const unsigned char *s = (STRINGP (bidi_it->string.lstring) | 1740 | const unsigned char *s = (STRINGP (bidi_it->string.lstring) |
| @@ -1775,6 +1777,11 @@ bidi_resolve_weak (struct bidi_it *bidi_it) | |||
| 1775 | else if (type == WEAK_BN) | 1777 | else if (type == WEAK_BN) |
| 1776 | type = NEUTRAL_ON; /* W6/Retaining */ | 1778 | type = NEUTRAL_ON; /* W6/Retaining */ |
| 1777 | } | 1779 | } |
| 1780 | else if (type_of_next == NEUTRAL_B) | ||
| 1781 | /* Record the fact that there are no more ENs from | ||
| 1782 | here to the end of paragraph, to avoid entering the | ||
| 1783 | loop above ever again in this paragraph. */ | ||
| 1784 | bidi_it->next_en_pos = -1; | ||
| 1778 | } | 1785 | } |
| 1779 | } | 1786 | } |
| 1780 | } | 1787 | } |
| @@ -1843,13 +1850,45 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) | |||
| 1843 | || type == NEUTRAL_ON)) | 1850 | || type == NEUTRAL_ON)) |
| 1844 | abort (); | 1851 | abort (); |
| 1845 | 1852 | ||
| 1846 | if (bidi_get_category (type) == NEUTRAL | 1853 | if ((type != NEUTRAL_B /* Don't risk entering the long loop below if |
| 1854 | we are already at paragraph end. */ | ||
| 1855 | && bidi_get_category (type) == NEUTRAL) | ||
| 1847 | || (type == WEAK_BN && prev_level == current_level)) | 1856 | || (type == WEAK_BN && prev_level == current_level)) |
| 1848 | { | 1857 | { |
| 1849 | if (bidi_it->next_for_neutral.type != UNKNOWN_BT) | 1858 | if (bidi_it->next_for_neutral.type != UNKNOWN_BT) |
| 1850 | type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, | 1859 | type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, |
| 1851 | bidi_it->next_for_neutral.type, | 1860 | bidi_it->next_for_neutral.type, |
| 1852 | current_level); | 1861 | current_level); |
| 1862 | /* The next two "else if" clauses are shortcuts for the | ||
| 1863 | important special case when we have a long sequence of | ||
| 1864 | neutral or WEAK_BN characters, such as whitespace or nulls or | ||
| 1865 | other control characters, on the base embedding level of the | ||
| 1866 | paragraph, and that sequence goes all the way to the end of | ||
| 1867 | the paragraph and follows a character whose resolved | ||
| 1868 | directionality is identical to the base embedding level. | ||
| 1869 | (This is what happens in a buffer with plain L2R text that | ||
| 1870 | happens to include long sequences of control characters.) By | ||
| 1871 | virtue of N1, the result of examining this long sequence will | ||
| 1872 | always be either STRONG_L or STRONG_R, depending on the base | ||
| 1873 | embedding level. So we use this fact directly instead of | ||
| 1874 | entering the expensive loop in the "else" clause. */ | ||
| 1875 | else if (current_level == 0 | ||
| 1876 | && bidi_it->prev_for_neutral.type == STRONG_L | ||
| 1877 | && !bidi_explicit_dir_char (bidi_it->ch)) | ||
| 1878 | type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, | ||
| 1879 | STRONG_L, current_level); | ||
| 1880 | else if (/* current level is 1 */ | ||
| 1881 | current_level == 1 | ||
| 1882 | /* base embedding level is also 1 */ | ||
| 1883 | && bidi_it->level_stack[0].level == 1 | ||
| 1884 | /* previous character is one of those considered R for | ||
| 1885 | the purposes of W5 */ | ||
| 1886 | && (bidi_it->prev_for_neutral.type == STRONG_R | ||
| 1887 | || bidi_it->prev_for_neutral.type == WEAK_EN | ||
| 1888 | || bidi_it->prev_for_neutral.type == WEAK_AN) | ||
| 1889 | && !bidi_explicit_dir_char (bidi_it->ch)) | ||
| 1890 | type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, | ||
| 1891 | STRONG_R, current_level); | ||
| 1853 | else | 1892 | else |
| 1854 | { | 1893 | { |
| 1855 | /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in | 1894 | /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in |
| @@ -1900,6 +1939,9 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) | |||
| 1900 | case STRONG_L: | 1939 | case STRONG_L: |
| 1901 | case STRONG_R: | 1940 | case STRONG_R: |
| 1902 | case STRONG_AL: | 1941 | case STRONG_AL: |
| 1942 | /* Actually, STRONG_AL cannot happen here, because | ||
| 1943 | bidi_resolve_weak converts it to STRONG_R, per W3. */ | ||
| 1944 | xassert (type != STRONG_AL); | ||
| 1903 | next_type = type; | 1945 | next_type = type; |
| 1904 | break; | 1946 | break; |
| 1905 | case WEAK_EN: | 1947 | case WEAK_EN: |
| @@ -1907,7 +1949,6 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) | |||
| 1907 | /* N1: ``European and Arabic numbers are treated as | 1949 | /* N1: ``European and Arabic numbers are treated as |
| 1908 | though they were R.'' */ | 1950 | though they were R.'' */ |
| 1909 | next_type = STRONG_R; | 1951 | next_type = STRONG_R; |
| 1910 | saved_it.next_for_neutral.type = STRONG_R; | ||
| 1911 | break; | 1952 | break; |
| 1912 | case WEAK_BN: | 1953 | case WEAK_BN: |
| 1913 | if (!bidi_explicit_dir_char (bidi_it->ch)) | 1954 | if (!bidi_explicit_dir_char (bidi_it->ch)) |
| @@ -1920,11 +1961,7 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) | |||
| 1920 | member. */ | 1961 | member. */ |
| 1921 | if (saved_it.type != WEAK_BN | 1962 | if (saved_it.type != WEAK_BN |
| 1922 | || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL) | 1963 | || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL) |
| 1923 | { | 1964 | next_type = bidi_it->prev_for_neutral.type; |
| 1924 | next_type = bidi_it->prev_for_neutral.type; | ||
| 1925 | saved_it.next_for_neutral.type = next_type; | ||
| 1926 | bidi_check_type (next_type); | ||
| 1927 | } | ||
| 1928 | else | 1965 | else |
| 1929 | { | 1966 | { |
| 1930 | /* This is a BN which does not adjoin neutrals. | 1967 | /* This is a BN which does not adjoin neutrals. |
| @@ -1938,7 +1975,9 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) | |||
| 1938 | } | 1975 | } |
| 1939 | type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type, | 1976 | type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type, |
| 1940 | next_type, current_level); | 1977 | next_type, current_level); |
| 1978 | saved_it.next_for_neutral.type = next_type; | ||
| 1941 | saved_it.type = type; | 1979 | saved_it.type = type; |
| 1980 | bidi_check_type (next_type); | ||
| 1942 | bidi_check_type (type); | 1981 | bidi_check_type (type); |
| 1943 | bidi_copy_it (bidi_it, &saved_it); | 1982 | bidi_copy_it (bidi_it, &saved_it); |
| 1944 | } | 1983 | } |
| @@ -2014,7 +2053,7 @@ bidi_level_of_next_char (struct bidi_it *bidi_it) | |||
| 2014 | bidi_it->next_for_neutral.type = UNKNOWN_BT; | 2053 | bidi_it->next_for_neutral.type = UNKNOWN_BT; |
| 2015 | if (bidi_it->next_en_pos >= 0 | 2054 | if (bidi_it->next_en_pos >= 0 |
| 2016 | && bidi_it->charpos >= bidi_it->next_en_pos) | 2055 | && bidi_it->charpos >= bidi_it->next_en_pos) |
| 2017 | bidi_it->next_en_pos = -1; | 2056 | bidi_it->next_en_pos = 0; |
| 2018 | if (bidi_it->next_for_ws.type != UNKNOWN_BT | 2057 | if (bidi_it->next_for_ws.type != UNKNOWN_BT |
| 2019 | && bidi_it->charpos >= bidi_it->next_for_ws.charpos) | 2058 | && bidi_it->charpos >= bidi_it->next_for_ws.charpos) |
| 2020 | bidi_it->next_for_ws.type = UNKNOWN_BT; | 2059 | bidi_it->next_for_ws.type = UNKNOWN_BT; |