aboutsummaryrefslogtreecommitdiffstats
path: root/src/bidi.c
diff options
context:
space:
mode:
authorPaul Eggert2011-10-22 23:38:24 -0700
committerPaul Eggert2011-10-22 23:38:24 -0700
commitcfc09582247ffef6a46b6249e2fba9136a62d21e (patch)
tree50e102f64a2b88c692d9110990abd416c78c32f0 /src/bidi.c
parent92c938895c639463681ae1c58a944cae62b70b87 (diff)
parent86c606818495d9411fd5d6b1477f9a097eb18020 (diff)
downloademacs-cfc09582247ffef6a46b6249e2fba9136a62d21e.tar.gz
emacs-cfc09582247ffef6a46b6249e2fba9136a62d21e.zip
Merge from trunk.
Diffstat (limited to 'src/bidi.c')
-rw-r--r--src/bidi.c99
1 files changed, 76 insertions, 23 deletions
diff --git a/src/bidi.c b/src/bidi.c
index c6d7db96576..e8f2df89a9e 100644
--- a/src/bidi.c
+++ b/src/bidi.c
@@ -846,7 +846,10 @@ bidi_line_init (struct bidi_it *bidi_it)
846 bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */ 846 bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
847 bidi_it->invalid_levels = 0; 847 bidi_it->invalid_levels = 0;
848 bidi_it->invalid_rl_levels = -1; 848 bidi_it->invalid_rl_levels = -1;
849 bidi_it->next_en_pos = -1; 849 /* Setting this to zero will force its recomputation the first time
850 we need it for W5. */
851 bidi_it->next_en_pos = 0;
852 bidi_it->next_en_type = UNKNOWN_BT;
850 bidi_it->next_for_ws.type = UNKNOWN_BT; 853 bidi_it->next_for_ws.type = UNKNOWN_BT;
851 bidi_set_sor_type (bidi_it, 854 bidi_set_sor_type (bidi_it,
852 (bidi_it->paragraph_dir == R2L ? 1 : 0), 855 (bidi_it->paragraph_dir == R2L ? 1 : 0),
@@ -1435,7 +1438,8 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1435 } 1438 }
1436 } 1439 }
1437 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */ 1440 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1438 || bidi_it->next_en_pos > bidi_it->charpos) 1441 || (bidi_it->next_en_pos > bidi_it->charpos
1442 && bidi_it->next_en_type == WEAK_EN))
1439 type = WEAK_EN; 1443 type = WEAK_EN;
1440 break; 1444 break;
1441 case LRE: /* X3 */ 1445 case LRE: /* X3 */
@@ -1471,7 +1475,8 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1471 } 1475 }
1472 } 1476 }
1473 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */ 1477 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1474 || bidi_it->next_en_pos > bidi_it->charpos) 1478 || (bidi_it->next_en_pos > bidi_it->charpos
1479 && bidi_it->next_en_type == WEAK_EN))
1475 type = WEAK_EN; 1480 type = WEAK_EN;
1476 break; 1481 break;
1477 case PDF: /* X7 */ 1482 case PDF: /* X7 */
@@ -1497,7 +1502,8 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1497 } 1502 }
1498 } 1503 }
1499 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */ 1504 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1500 || bidi_it->next_en_pos > bidi_it->charpos) 1505 || (bidi_it->next_en_pos > bidi_it->charpos
1506 && bidi_it->next_en_type == WEAK_EN))
1501 type = WEAK_EN; 1507 type = WEAK_EN;
1502 break; 1508 break;
1503 default: 1509 default:
@@ -1729,10 +1735,15 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
1729 else if (type == WEAK_ET /* W5: ET with EN before or after it */ 1735 else if (type == WEAK_ET /* W5: ET with EN before or after it */
1730 || type == WEAK_BN) /* W5/Retaining */ 1736 || type == WEAK_BN) /* W5/Retaining */
1731 { 1737 {
1732 if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */ 1738 if (bidi_it->prev.type_after_w1 == WEAK_EN) /* ET/BN w/EN before it */
1733 || bidi_it->next_en_pos > bidi_it->charpos)
1734 type = WEAK_EN; 1739 type = WEAK_EN;
1735 else /* W5: ET/BN with EN after it. */ 1740 else if (bidi_it->next_en_pos > bidi_it->charpos
1741 && bidi_it->next_en_type != WEAK_BN)
1742 {
1743 if (bidi_it->next_en_type == WEAK_EN) /* ET/BN with EN after it */
1744 type = WEAK_EN;
1745 }
1746 else if (bidi_it->next_en_pos >=0)
1736 { 1747 {
1737 ptrdiff_t en_pos = bidi_it->charpos + bidi_it->nchars; 1748 ptrdiff_t en_pos = bidi_it->charpos + bidi_it->nchars;
1738 const unsigned char *s = (STRINGP (bidi_it->string.lstring) 1749 const unsigned char *s = (STRINGP (bidi_it->string.lstring)
@@ -1761,20 +1772,27 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
1761 en_pos = bidi_it->charpos; 1772 en_pos = bidi_it->charpos;
1762 bidi_copy_it (bidi_it, &saved_it); 1773 bidi_copy_it (bidi_it, &saved_it);
1763 } 1774 }
1775 /* Remember this position, to speed up processing of the
1776 next ETs. */
1777 bidi_it->next_en_pos = en_pos;
1764 if (type_of_next == WEAK_EN) 1778 if (type_of_next == WEAK_EN)
1765 { 1779 {
1766 /* If the last strong character is AL, the EN we've 1780 /* If the last strong character is AL, the EN we've
1767 found will become AN when we get to it (W2). */ 1781 found will become AN when we get to it (W2). */
1768 if (bidi_it->last_strong.type_after_w1 != STRONG_AL) 1782 if (bidi_it->last_strong.type_after_w1 == STRONG_AL)
1769 { 1783 type_of_next = WEAK_AN;
1770 type = WEAK_EN;
1771 /* Remember this EN position, to speed up processing
1772 of the next ETs. */
1773 bidi_it->next_en_pos = en_pos;
1774 }
1775 else if (type == WEAK_BN) 1784 else if (type == WEAK_BN)
1776 type = NEUTRAL_ON; /* W6/Retaining */ 1785 type = NEUTRAL_ON; /* W6/Retaining */
1786 else
1787 type = WEAK_EN;
1777 } 1788 }
1789 else if (type_of_next == NEUTRAL_B)
1790 /* Record the fact that there are no more ENs from
1791 here to the end of paragraph, to avoid entering the
1792 loop above ever again in this paragraph. */
1793 bidi_it->next_en_pos = -1;
1794 /* Record the type of the character where we ended our search. */
1795 bidi_it->next_en_type = type_of_next;
1778 } 1796 }
1779 } 1797 }
1780 } 1798 }
@@ -1843,13 +1861,45 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
1843 || type == NEUTRAL_ON)) 1861 || type == NEUTRAL_ON))
1844 abort (); 1862 abort ();
1845 1863
1846 if (bidi_get_category (type) == NEUTRAL 1864 if ((type != NEUTRAL_B /* Don't risk entering the long loop below if
1865 we are already at paragraph end. */
1866 && bidi_get_category (type) == NEUTRAL)
1847 || (type == WEAK_BN && prev_level == current_level)) 1867 || (type == WEAK_BN && prev_level == current_level))
1848 { 1868 {
1849 if (bidi_it->next_for_neutral.type != UNKNOWN_BT) 1869 if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1850 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, 1870 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1851 bidi_it->next_for_neutral.type, 1871 bidi_it->next_for_neutral.type,
1852 current_level); 1872 current_level);
1873 /* The next two "else if" clauses are shortcuts for the
1874 important special case when we have a long sequence of
1875 neutral or WEAK_BN characters, such as whitespace or nulls or
1876 other control characters, on the base embedding level of the
1877 paragraph, and that sequence goes all the way to the end of
1878 the paragraph and follows a character whose resolved
1879 directionality is identical to the base embedding level.
1880 (This is what happens in a buffer with plain L2R text that
1881 happens to include long sequences of control characters.) By
1882 virtue of N1, the result of examining this long sequence will
1883 always be either STRONG_L or STRONG_R, depending on the base
1884 embedding level. So we use this fact directly instead of
1885 entering the expensive loop in the "else" clause. */
1886 else if (current_level == 0
1887 && bidi_it->prev_for_neutral.type == STRONG_L
1888 && !bidi_explicit_dir_char (bidi_it->ch))
1889 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1890 STRONG_L, current_level);
1891 else if (/* current level is 1 */
1892 current_level == 1
1893 /* base embedding level is also 1 */
1894 && bidi_it->level_stack[0].level == 1
1895 /* previous character is one of those considered R for
1896 the purposes of W5 */
1897 && (bidi_it->prev_for_neutral.type == STRONG_R
1898 || bidi_it->prev_for_neutral.type == WEAK_EN
1899 || bidi_it->prev_for_neutral.type == WEAK_AN)
1900 && !bidi_explicit_dir_char (bidi_it->ch))
1901 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1902 STRONG_R, current_level);
1853 else 1903 else
1854 { 1904 {
1855 /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in 1905 /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in
@@ -1900,6 +1950,9 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
1900 case STRONG_L: 1950 case STRONG_L:
1901 case STRONG_R: 1951 case STRONG_R:
1902 case STRONG_AL: 1952 case STRONG_AL:
1953 /* Actually, STRONG_AL cannot happen here, because
1954 bidi_resolve_weak converts it to STRONG_R, per W3. */
1955 xassert (type != STRONG_AL);
1903 next_type = type; 1956 next_type = type;
1904 break; 1957 break;
1905 case WEAK_EN: 1958 case WEAK_EN:
@@ -1907,7 +1960,6 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
1907 /* N1: ``European and Arabic numbers are treated as 1960 /* N1: ``European and Arabic numbers are treated as
1908 though they were R.'' */ 1961 though they were R.'' */
1909 next_type = STRONG_R; 1962 next_type = STRONG_R;
1910 saved_it.next_for_neutral.type = STRONG_R;
1911 break; 1963 break;
1912 case WEAK_BN: 1964 case WEAK_BN:
1913 if (!bidi_explicit_dir_char (bidi_it->ch)) 1965 if (!bidi_explicit_dir_char (bidi_it->ch))
@@ -1920,11 +1972,7 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
1920 member. */ 1972 member. */
1921 if (saved_it.type != WEAK_BN 1973 if (saved_it.type != WEAK_BN
1922 || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL) 1974 || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
1923 { 1975 next_type = bidi_it->prev_for_neutral.type;
1924 next_type = bidi_it->prev_for_neutral.type;
1925 saved_it.next_for_neutral.type = next_type;
1926 bidi_check_type (next_type);
1927 }
1928 else 1976 else
1929 { 1977 {
1930 /* This is a BN which does not adjoin neutrals. 1978 /* This is a BN which does not adjoin neutrals.
@@ -1938,7 +1986,9 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
1938 } 1986 }
1939 type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type, 1987 type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1940 next_type, current_level); 1988 next_type, current_level);
1989 saved_it.next_for_neutral.type = next_type;
1941 saved_it.type = type; 1990 saved_it.type = type;
1991 bidi_check_type (next_type);
1942 bidi_check_type (type); 1992 bidi_check_type (type);
1943 bidi_copy_it (bidi_it, &saved_it); 1993 bidi_copy_it (bidi_it, &saved_it);
1944 } 1994 }
@@ -2014,7 +2064,10 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
2014 bidi_it->next_for_neutral.type = UNKNOWN_BT; 2064 bidi_it->next_for_neutral.type = UNKNOWN_BT;
2015 if (bidi_it->next_en_pos >= 0 2065 if (bidi_it->next_en_pos >= 0
2016 && bidi_it->charpos >= bidi_it->next_en_pos) 2066 && bidi_it->charpos >= bidi_it->next_en_pos)
2017 bidi_it->next_en_pos = -1; 2067 {
2068 bidi_it->next_en_pos = 0;
2069 bidi_it->next_en_type = UNKNOWN_BT;
2070 }
2018 if (bidi_it->next_for_ws.type != UNKNOWN_BT 2071 if (bidi_it->next_for_ws.type != UNKNOWN_BT
2019 && bidi_it->charpos >= bidi_it->next_for_ws.charpos) 2072 && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
2020 bidi_it->next_for_ws.type = UNKNOWN_BT; 2073 bidi_it->next_for_ws.type = UNKNOWN_BT;
@@ -2140,7 +2193,7 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
2140 } 2193 }
2141 2194
2142 /* Resolve implicit levels, with a twist: PDFs get the embedding 2195 /* Resolve implicit levels, with a twist: PDFs get the embedding
2143 level of the enbedding they terminate. See below for the 2196 level of the embedding they terminate. See below for the
2144 reason. */ 2197 reason. */
2145 if (bidi_it->orig_type == PDF 2198 if (bidi_it->orig_type == PDF
2146 /* Don't do this if this formatting code didn't change the 2199 /* Don't do this if this formatting code didn't change the