diff options
| author | Eli Zaretskii | 2009-12-31 16:31:54 -0500 |
|---|---|---|
| committer | Eli Zaretskii | 2009-12-31 16:31:54 -0500 |
| commit | e7402cb24ccbf5776dbc5b086bc42bd40d5fb05e (patch) | |
| tree | 04b18fdf7b56aaed30b43da7623652c25e345430 /src | |
| parent | 89d3374a8cb2a24adabe88765da58a85855ff6f8 (diff) | |
| download | emacs-e7402cb24ccbf5776dbc5b086bc42bd40d5fb05e.tar.gz emacs-e7402cb24ccbf5776dbc5b086bc42bd40d5fb05e.zip | |
Retrospective commit from 2009-09-20.
Handle positions at BEGV and ZV. Mor initialization cleanups.
xdisp.c (reseat_1): Handle position < BEGV.
bidi.c (bidi_paragraph_init): Set bidi_it->ch_len. Handle ZV.
(bidi_init_it): Don't initialize bidi_it->ch_len.
(bidi_resolve_explicit_1): Abort if bidi_it->ch_len was not
initialized.
(bidi_at_paragraph_end, bidi_resolve_explicit_1)
(bidi_resolve_weak, bidi_level_of_next_char): Handle bytepos at
ZV_BYTE.
(bidi_resolve_explicit_1): Handle position < BEGV.
Diffstat (limited to 'src')
| -rw-r--r-- | src/ChangeLog.bidi | 13 | ||||
| -rw-r--r-- | src/bidi.c | 85 | ||||
| -rw-r--r-- | src/xdisp.c | 6 |
3 files changed, 69 insertions, 35 deletions
diff --git a/src/ChangeLog.bidi b/src/ChangeLog.bidi index 1cd44eff453..bcdb1ad309a 100644 --- a/src/ChangeLog.bidi +++ b/src/ChangeLog.bidi | |||
| @@ -1,3 +1,16 @@ | |||
| 1 | 2009-09-20 Eli Zaretskii <eliz@gnu.org> | ||
| 2 | |||
| 3 | * xdisp.c (reseat_1): Handle position < BEGV. | ||
| 4 | |||
| 5 | * bidi.c (bidi_paragraph_init): Set bidi_it->ch_len. Handle ZV. | ||
| 6 | (bidi_init_it): Don't initialize bidi_it->ch_len. | ||
| 7 | (bidi_resolve_explicit_1): Abort if bidi_it->ch_len was not | ||
| 8 | initialized. | ||
| 9 | (bidi_at_paragraph_end, bidi_resolve_explicit_1) | ||
| 10 | (bidi_resolve_weak, bidi_level_of_next_char): Handle bytepos at | ||
| 11 | ZV_BYTE. | ||
| 12 | (bidi_resolve_explicit_1): Handle position < BEGV. | ||
| 13 | |||
| 1 | 2009-09-19 Eli Zaretskii <eliz@gnu.org> | 14 | 2009-09-19 Eli Zaretskii <eliz@gnu.org> |
| 2 | 15 | ||
| 3 | * xdisp.c (init_iterator): Call bidi_init_it. Set | 16 | * xdisp.c (init_iterator): Call bidi_init_it. Set |
diff --git a/src/bidi.c b/src/bidi.c index a7c905239c7..b146d562c37 100644 --- a/src/bidi.c +++ b/src/bidi.c | |||
| @@ -64,6 +64,7 @@ static int bidi_initialized = 0; | |||
| 64 | 64 | ||
| 65 | static Lisp_Object bidi_type_table; | 65 | static Lisp_Object bidi_type_table; |
| 66 | 66 | ||
| 67 | /* FIXME: Remove these when bidi_explicit_dir_char uses a lookup table. */ | ||
| 67 | #define LRM_CHAR 0x200E | 68 | #define LRM_CHAR 0x200E |
| 68 | #define RLM_CHAR 0x200F | 69 | #define RLM_CHAR 0x200F |
| 69 | #define LRE_CHAR 0x202A | 70 | #define LRE_CHAR 0x202A |
| @@ -72,15 +73,8 @@ static Lisp_Object bidi_type_table; | |||
| 72 | #define LRO_CHAR 0x202D | 73 | #define LRO_CHAR 0x202D |
| 73 | #define RLO_CHAR 0x202E | 74 | #define RLO_CHAR 0x202E |
| 74 | 75 | ||
| 75 | #define CHARSET_HEBREW 0x88 | ||
| 76 | #define CHARSET_ARABIC 0x87 | ||
| 77 | #define CHARSET_SYRIAC -1 /* these are undefined yet, -1 is invalid */ | ||
| 78 | #define CHARSET_THAANA -1 | ||
| 79 | |||
| 80 | /* FIXME: need to define wrappers for FETCH_CHAR etc. that return | ||
| 81 | BIDI_EOB when they hit ZV. */ | ||
| 82 | #define BIDI_EOB -1 | 76 | #define BIDI_EOB -1 |
| 83 | #define BIDI_BOB -2 | 77 | #define BIDI_BOB -2 /* FIXME: Is this needed? */ |
| 84 | 78 | ||
| 85 | /* Local data structures. (Look in dispextern.h for the rest.) */ | 79 | /* Local data structures. (Look in dispextern.h for the rest.) */ |
| 86 | 80 | ||
| @@ -416,6 +410,8 @@ bidi_is_arabic_number (int ch) | |||
| 416 | bidi_type_t | 410 | bidi_type_t |
| 417 | bidi_get_type (int ch) | 411 | bidi_get_type (int ch) |
| 418 | { | 412 | { |
| 413 | if (ch == BIDI_EOB) | ||
| 414 | return NEUTRAL_B; | ||
| 419 | return (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch)); | 415 | return (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch)); |
| 420 | } | 416 | } |
| 421 | 417 | ||
| @@ -695,11 +691,15 @@ bidi_peek_at_next_level (struct bidi_it *bidi_it) | |||
| 695 | int | 691 | int |
| 696 | bidi_at_paragraph_end (int this_ch, int pos) | 692 | bidi_at_paragraph_end (int this_ch, int pos) |
| 697 | { | 693 | { |
| 698 | int next_ch = FETCH_CHAR (pos); | 694 | int next_ch; |
| 695 | |||
| 696 | if (pos >= ZV_BYTE) | ||
| 697 | return 1; | ||
| 699 | 698 | ||
| 699 | next_ch = FETCH_CHAR (pos); | ||
| 700 | /* FIXME: This should support all Unicode characters that can end a | 700 | /* FIXME: This should support all Unicode characters that can end a |
| 701 | paragraph. */ | 701 | paragraph. */ |
| 702 | return (this_ch == '\n' && next_ch == '\n') || this_ch == BIDI_EOB; | 702 | return (this_ch == '\n' && next_ch == '\n'); |
| 703 | } | 703 | } |
| 704 | 704 | ||
| 705 | /* Determine the start-of-run (sor) directional type given the two | 705 | /* Determine the start-of-run (sor) directional type given the two |
| @@ -738,24 +738,27 @@ bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after) | |||
| 738 | void | 738 | void |
| 739 | bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it) | 739 | bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it) |
| 740 | { | 740 | { |
| 741 | bidi_it->level_stack[0].level = 0; | 741 | int pos = bidi_it->charpos, bytepos = bidi_it->bytepos; |
| 742 | int ch; | ||
| 743 | |||
| 744 | /* We should never be called at EOB. */ | ||
| 745 | if (bytepos >= ZV_BYTE) | ||
| 746 | abort (); | ||
| 747 | |||
| 748 | ch = FETCH_CHAR (bytepos); | ||
| 749 | bidi_it->ch_len = CHAR_BYTES (ch); | ||
| 750 | bidi_it->level_stack[0].level = 0; /* default for L2R */ | ||
| 742 | if (dir == R2L) | 751 | if (dir == R2L) |
| 743 | bidi_it->level_stack[0].level = 1; | 752 | bidi_it->level_stack[0].level = 1; |
| 744 | else if (dir == NEUTRAL_DIR) /* P2 */ | 753 | else if (dir == NEUTRAL_DIR) /* P2 */ |
| 745 | { | 754 | { |
| 746 | bidi_type_t type; | 755 | bidi_type_t type; |
| 747 | int pos = bidi_it->charpos, bytepos = bidi_it->bytepos; | ||
| 748 | int ch; | ||
| 749 | |||
| 750 | ch = FETCH_CHAR (bytepos); | ||
| 751 | pos++; | ||
| 752 | bytepos += CHAR_BYTES (ch); | ||
| 753 | 756 | ||
| 754 | /* FIXME: should actually go to where the paragraph begins and | 757 | /* FIXME: should actually go to where the paragraph begins and |
| 755 | start the loop below from there, since UAX#9 says to find the | 758 | start the loop below from there, since UAX#9 says to find the |
| 756 | first strong directional character in the paragraph. */ | 759 | first strong directional character in the paragraph. */ |
| 757 | 760 | ||
| 758 | for (type = bidi_get_type (ch); | 761 | for (type = bidi_get_type (ch), pos++, bytepos += bidi_it->ch_len; |
| 759 | /* NOTE: UAX#9 says to search only for L, AL, or R types of | 762 | /* NOTE: UAX#9 says to search only for L, AL, or R types of |
| 760 | characters, and ignore RLE, RLO, LRE, and LRO. However, | 763 | characters, and ignore RLE, RLO, LRE, and LRO. However, |
| 761 | I'm not sure it makes sense to omit those 4; should try | 764 | I'm not sure it makes sense to omit those 4; should try |
| @@ -807,7 +810,6 @@ bidi_init_it (int charpos, int bytepos, struct bidi_it *bidi_it) | |||
| 807 | bidi_set_paragraph_end (bidi_it); | 810 | bidi_set_paragraph_end (bidi_it); |
| 808 | bidi_it->charpos = charpos; | 811 | bidi_it->charpos = charpos; |
| 809 | bidi_it->bytepos = bytepos; | 812 | bidi_it->bytepos = bytepos; |
| 810 | bidi_it->ch_len = 1; | ||
| 811 | bidi_it->type = NEUTRAL_B; | 813 | bidi_it->type = NEUTRAL_B; |
| 812 | bidi_it->type_after_w1 = UNKNOWN_BT; | 814 | bidi_it->type_after_w1 = UNKNOWN_BT; |
| 813 | bidi_it->orig_type = UNKNOWN_BT; | 815 | bidi_it->orig_type = UNKNOWN_BT; |
| @@ -906,11 +908,16 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it) | |||
| 906 | int new_level; | 908 | int new_level; |
| 907 | bidi_dir_t override; | 909 | bidi_dir_t override; |
| 908 | 910 | ||
| 909 | if (bidi_it->charpos < 0) | 911 | if (bidi_it->bytepos < BEGV_BYTE) /* after reseat to BEGV */ |
| 910 | bidi_it->charpos = bidi_it->bytepos = 0; | 912 | { |
| 911 | else | 913 | bidi_it->charpos = BEGV; |
| 914 | bidi_it->bytepos = BEGV_BYTE; | ||
| 915 | } | ||
| 916 | else if (bidi_it->bytepos < ZV_BYTE) /* don't move at ZV */ | ||
| 912 | { | 917 | { |
| 913 | bidi_it->charpos++; | 918 | bidi_it->charpos++; |
| 919 | if (bidi_it->ch_len == 0) | ||
| 920 | abort (); | ||
| 914 | bidi_it->bytepos += bidi_it->ch_len; | 921 | bidi_it->bytepos += bidi_it->ch_len; |
| 915 | } | 922 | } |
| 916 | 923 | ||
| @@ -920,9 +927,17 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it) | |||
| 920 | 927 | ||
| 921 | /* in case it is a unibyte character (not yet implemented) */ | 928 | /* in case it is a unibyte character (not yet implemented) */ |
| 922 | /* _fetch_multibyte_char_len = 1; */ | 929 | /* _fetch_multibyte_char_len = 1; */ |
| 923 | curchar = FETCH_CHAR (bidi_it->bytepos); | 930 | if (bidi_it->bytepos >= ZV_BYTE) |
| 931 | { | ||
| 932 | curchar = BIDI_EOB; | ||
| 933 | bidi_it->ch_len = 1; | ||
| 934 | } | ||
| 935 | else | ||
| 936 | { | ||
| 937 | curchar = FETCH_CHAR (bidi_it->bytepos); | ||
| 938 | bidi_it->ch_len = CHAR_BYTES (curchar); | ||
| 939 | } | ||
| 924 | bidi_it->ch = curchar; | 940 | bidi_it->ch = curchar; |
| 925 | bidi_it->ch_len = CHAR_BYTES (curchar); | ||
| 926 | 941 | ||
| 927 | type = bidi_get_type (curchar); | 942 | type = bidi_get_type (curchar); |
| 928 | bidi_it->orig_type = type; | 943 | bidi_it->orig_type = type; |
| @@ -1055,6 +1070,7 @@ bidi_resolve_explicit (struct bidi_it *bidi_it) | |||
| 1055 | if (prev_level < new_level | 1070 | if (prev_level < new_level |
| 1056 | && bidi_it->type == WEAK_BN | 1071 | && bidi_it->type == WEAK_BN |
| 1057 | && bidi_it->ignore_bn_limit == 0 /* only if not already known */ | 1072 | && bidi_it->ignore_bn_limit == 0 /* only if not already known */ |
| 1073 | && bidi_it->ch != BIDI_EOB /* not already at EOB */ | ||
| 1058 | && bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos | 1074 | && bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos |
| 1059 | + bidi_it->ch_len))) | 1075 | + bidi_it->ch_len))) |
| 1060 | { | 1076 | { |
| @@ -1194,7 +1210,9 @@ bidi_resolve_weak (struct bidi_it *bidi_it) | |||
| 1194 | || bidi_it->prev.orig_type == WEAK_NSM)) /* a/W1 */ | 1210 | || bidi_it->prev.orig_type == WEAK_NSM)) /* a/W1 */ |
| 1195 | || bidi_it->prev.type_after_w1 == WEAK_AN))) /* W4 */ | 1211 | || bidi_it->prev.type_after_w1 == WEAK_AN))) /* W4 */ |
| 1196 | { | 1212 | { |
| 1197 | next_char = FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len); | 1213 | next_char = |
| 1214 | bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE | ||
| 1215 | ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len); | ||
| 1198 | type_of_next = bidi_get_type (next_char); | 1216 | type_of_next = bidi_get_type (next_char); |
| 1199 | 1217 | ||
| 1200 | if (type_of_next == WEAK_BN | 1218 | if (type_of_next == WEAK_BN |
| @@ -1245,7 +1263,9 @@ bidi_resolve_weak (struct bidi_it *bidi_it) | |||
| 1245 | { | 1263 | { |
| 1246 | int en_pos = bidi_it->charpos + 1; | 1264 | int en_pos = bidi_it->charpos + 1; |
| 1247 | 1265 | ||
| 1248 | next_char = FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len); | 1266 | next_char = |
| 1267 | bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE | ||
| 1268 | ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len); | ||
| 1249 | type_of_next = bidi_get_type (next_char); | 1269 | type_of_next = bidi_get_type (next_char); |
| 1250 | 1270 | ||
| 1251 | if (type_of_next == WEAK_ET | 1271 | if (type_of_next == WEAK_ET |
| @@ -1568,11 +1588,11 @@ bidi_level_of_next_char (struct bidi_it *bidi_it) | |||
| 1568 | 1588 | ||
| 1569 | do { | 1589 | do { |
| 1570 | /*_fetch_multibyte_char_len = 1;*/ | 1590 | /*_fetch_multibyte_char_len = 1;*/ |
| 1571 | ch = FETCH_CHAR (bpos + clen); | 1591 | ch = bpos + clen >= ZV_BYTE ? BIDI_EOB : FETCH_CHAR (bpos + clen); |
| 1572 | bpos += clen; | 1592 | bpos += clen; |
| 1573 | cpos++; | 1593 | cpos++; |
| 1574 | clen = CHAR_BYTES (ch); | 1594 | clen = (ch == BIDI_EOB ? 1 : CHAR_BYTES (ch)); |
| 1575 | if (ch == '\n' /* || ch == LINESEP_CHAR */) | 1595 | if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */) |
| 1576 | chtype = NEUTRAL_B; | 1596 | chtype = NEUTRAL_B; |
| 1577 | else | 1597 | else |
| 1578 | chtype = bidi_get_type (ch); | 1598 | chtype = bidi_get_type (ch); |
| @@ -1615,13 +1635,14 @@ bidi_level_of_next_char (struct bidi_it *bidi_it) | |||
| 1615 | ugly side effect does not happen. | 1635 | ugly side effect does not happen. |
| 1616 | 1636 | ||
| 1617 | (This is, of course, only important if the formatting codes | 1637 | (This is, of course, only important if the formatting codes |
| 1618 | are actually displayed, but Emacs does display them if the | 1638 | are actually displayed, but Emacs does need to display them |
| 1619 | user wants to.) */ | 1639 | if the user wants to.) */ |
| 1620 | level = prev_level; | 1640 | level = prev_level; |
| 1621 | } | 1641 | } |
| 1622 | else if (bidi_it->orig_type == NEUTRAL_B /* L1 */ | 1642 | else if (bidi_it->orig_type == NEUTRAL_B /* L1 */ |
| 1623 | || bidi_it->orig_type == NEUTRAL_S | 1643 | || bidi_it->orig_type == NEUTRAL_S |
| 1624 | || bidi_it->ch == '\n' /* || bidi_it->ch == LINESEP_CHAR */ | 1644 | || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB |
| 1645 | /* || bidi_it->ch == LINESEP_CHAR */ | ||
| 1625 | || (bidi_it->orig_type == NEUTRAL_WS | 1646 | || (bidi_it->orig_type == NEUTRAL_WS |
| 1626 | && (bidi_it->next_for_ws.type == NEUTRAL_B | 1647 | && (bidi_it->next_for_ws.type == NEUTRAL_B |
| 1627 | || bidi_it->next_for_ws.type == NEUTRAL_S))) | 1648 | || bidi_it->next_for_ws.type == NEUTRAL_S))) |
diff --git a/src/xdisp.c b/src/xdisp.c index 2744a5565da..05cc61e9759 100644 --- a/src/xdisp.c +++ b/src/xdisp.c | |||
| @@ -5541,13 +5541,13 @@ reseat_1 (it, pos, set_stop_p) | |||
| 5541 | not be the character at POS. We need to find the next | 5541 | not be the character at POS. We need to find the next |
| 5542 | character in visual order starting from the preceding | 5542 | character in visual order starting from the preceding |
| 5543 | character. */ | 5543 | character. */ |
| 5544 | if ((it->bidi_it.charpos = CHARPOS (pos) - 1) > 1) | 5544 | if ((it->bidi_it.charpos = CHARPOS (pos) - 1) >= BEGV) |
| 5545 | { | 5545 | { |
| 5546 | it->bidi_it.bytepos = CHAR_TO_BYTE (CHARPOS (pos) - 1); | 5546 | it->bidi_it.bytepos = CHAR_TO_BYTE (CHARPOS (pos) - 1); |
| 5547 | it->bidi_it.ch_len = CHAR_BYTES (CHARPOS (pos) - 1); | 5547 | it->bidi_it.ch_len = CHAR_BYTES (FETCH_CHAR (it->bidi_it.bytepos)); |
| 5548 | } | 5548 | } |
| 5549 | else | 5549 | else |
| 5550 | it->bidi_it.bytepos = 0; | 5550 | it->bidi_it.bytepos = 0; /* signal bidi.c not to move */ |
| 5551 | bidi_get_next_char_visually (&it->bidi_it); | 5551 | bidi_get_next_char_visually (&it->bidi_it); |
| 5552 | SET_TEXT_POS (pos, it->bidi_it.charpos, it->bidi_it.bytepos); | 5552 | SET_TEXT_POS (pos, it->bidi_it.charpos, it->bidi_it.bytepos); |
| 5553 | it->current.pos = it->position = pos; | 5553 | it->current.pos = it->position = pos; |