diff options
| author | Eli Zaretskii | 2010-01-01 06:01:34 -0500 |
|---|---|---|
| committer | Eli Zaretskii | 2010-01-01 06:01:34 -0500 |
| commit | e342a24d6251c0cef99572722eb816d860352390 (patch) | |
| tree | 2d5ba0ab3146ce8f18f2f47127bd61933746c8fb /src | |
| parent | 9c82e14597e63ce0d4b036deedc18a8a4732d19b (diff) | |
| download | emacs-e342a24d6251c0cef99572722eb816d860352390.tar.gz emacs-e342a24d6251c0cef99572722eb816d860352390.zip | |
Retrospective commit from 2009-09-27.
Support character mirroring.
Support iteration that starts in the middle of a line.
Misc cleanups.
xdisp.c (next_element_from_buffer): If called not at line
beginning, start bidi iteration from line beginning.
bidi.c (bidi_paragraph_init): Use
bidi_overriding_paragraph_direction instead of a literal zero.
(bidi_initialize): Fix some character types, per Unicode 5.x.
(bidi_get_type): Abort if called with invalid character code.
dispextern.h: Add prototype of bidi_mirror_char.
xdisp.c (get_next_display_element): Mirror characters whose
resolved type is STRONG_R.
Diffstat (limited to 'src')
| -rw-r--r-- | src/ChangeLog.bidi | 15 | ||||
| -rw-r--r-- | src/bidi.c | 55 | ||||
| -rw-r--r-- | src/dispextern.h | 1 | ||||
| -rw-r--r-- | src/xdisp.c | 54 |
4 files changed, 84 insertions, 41 deletions
diff --git a/src/ChangeLog.bidi b/src/ChangeLog.bidi index bc7a473af49..97f300fc8b3 100644 --- a/src/ChangeLog.bidi +++ b/src/ChangeLog.bidi | |||
| @@ -1,3 +1,18 @@ | |||
| 1 | 2009-09-27 Eli Zaretskii <eliz@gnu.org> | ||
| 2 | |||
| 3 | * xdisp.c (next_element_from_buffer): If called not at line | ||
| 4 | beginning, start bidi iteration from line beginning. | ||
| 5 | |||
| 6 | * bidi.c (bidi_paragraph_init): Use | ||
| 7 | bidi_overriding_paragraph_direction instead of a literal zero. | ||
| 8 | (bidi_initialize): Fix some character types, per Unicode 5.x. | ||
| 9 | (bidi_get_type): Abort if called with invalid character code. | ||
| 10 | |||
| 11 | * dispextern.h: Add prototype of bidi_mirror_char. | ||
| 12 | |||
| 13 | * xdisp.c (get_next_display_element): Mirror characters whose | ||
| 14 | resolved type is STRONG_R. | ||
| 15 | |||
| 1 | 2009-09-26 Eli Zaretskii <eliz@gnu.org> | 16 | 2009-09-26 Eli Zaretskii <eliz@gnu.org> |
| 2 | 17 | ||
| 3 | * bidi.c (bidi_paragraph_init): Don't set bidi_it->ch_len. Abort | 18 | * bidi.c (bidi_paragraph_init): Don't set bidi_it->ch_len. Abort |
diff --git a/src/bidi.c b/src/bidi.c index bcbbb485e1a..3ec0d2c1035 100644 --- a/src/bidi.c +++ b/src/bidi.c | |||
| @@ -99,14 +99,6 @@ int bidi_ignore_explicit_marks_for_paragraph_level = 1; | |||
| 99 | /* FIXME: Should be user-definable. */ | 99 | /* FIXME: Should be user-definable. */ |
| 100 | bidi_dir_t bidi_overriding_paragraph_direction = L2R; | 100 | bidi_dir_t bidi_overriding_paragraph_direction = L2R; |
| 101 | 101 | ||
| 102 | /* FIXME: Unused? */ | ||
| 103 | #define ASCII_BIDI_TYPE_SET(STR, TYPE) \ | ||
| 104 | do { \ | ||
| 105 | unsigned char *p; \ | ||
| 106 | for (p = (STR); *p; p++) \ | ||
| 107 | CHAR_TABLE_SET (bidi_type_table, *p, (TYPE)); \ | ||
| 108 | } while (0) | ||
| 109 | |||
| 110 | static void | 102 | static void |
| 111 | bidi_initialize () | 103 | bidi_initialize () |
| 112 | { | 104 | { |
| @@ -128,11 +120,10 @@ bidi_initialize () | |||
| 128 | { 0x0021, 0x0022, NEUTRAL_ON }, | 120 | { 0x0021, 0x0022, NEUTRAL_ON }, |
| 129 | { 0x0023, 0x0025, WEAK_ET }, | 121 | { 0x0023, 0x0025, WEAK_ET }, |
| 130 | { 0x0026, 0x002A, NEUTRAL_ON }, | 122 | { 0x0026, 0x002A, NEUTRAL_ON }, |
| 131 | { 0x002B, 0x0000, WEAK_ET }, | 123 | { 0x002B, 0x0000, WEAK_ES }, |
| 132 | { 0x002C, 0x0000, WEAK_CS }, | 124 | { 0x002C, 0x0000, WEAK_CS }, |
| 133 | { 0x002D, 0x0000, WEAK_ET }, | 125 | { 0x002D, 0x0000, WEAK_ES }, |
| 134 | { 0x002E, 0x0000, WEAK_CS }, | 126 | { 0x002E, 0x002F, WEAK_CS }, |
| 135 | { 0x002F, 0x0000, WEAK_ES }, | ||
| 136 | { 0x0030, 0x0039, WEAK_EN }, | 127 | { 0x0030, 0x0039, WEAK_EN }, |
| 137 | { 0x003A, 0x0000, WEAK_CS }, | 128 | { 0x003A, 0x0000, WEAK_CS }, |
| 138 | { 0x003B, 0x0040, NEUTRAL_ON }, | 129 | { 0x003B, 0x0040, NEUTRAL_ON }, |
| @@ -145,7 +136,9 @@ bidi_initialize () | |||
| 145 | { 0x00A1, 0x0000, NEUTRAL_ON }, | 136 | { 0x00A1, 0x0000, NEUTRAL_ON }, |
| 146 | { 0x00A2, 0x00A5, WEAK_ET }, | 137 | { 0x00A2, 0x00A5, WEAK_ET }, |
| 147 | { 0x00A6, 0x00A9, NEUTRAL_ON }, | 138 | { 0x00A6, 0x00A9, NEUTRAL_ON }, |
| 148 | { 0x00AB, 0x00AF, NEUTRAL_ON }, | 139 | { 0x00AB, 0x00AC, NEUTRAL_ON }, |
| 140 | { 0x00AD, 0x0000, WEAK_BN }, | ||
| 141 | { 0x00AE, 0x00Af, NEUTRAL_ON }, | ||
| 149 | { 0x00B0, 0x00B1, WEAK_ET }, | 142 | { 0x00B0, 0x00B1, WEAK_ET }, |
| 150 | { 0x00B2, 0x00B3, WEAK_EN }, | 143 | { 0x00B2, 0x00B3, WEAK_EN }, |
| 151 | { 0x00B4, 0x0000, NEUTRAL_ON }, | 144 | { 0x00B4, 0x0000, NEUTRAL_ON }, |
| @@ -171,7 +164,9 @@ bidi_initialize () | |||
| 171 | { 0x05C0, 0x0000, STRONG_R }, | 164 | { 0x05C0, 0x0000, STRONG_R }, |
| 172 | { 0x05C1, 0x05C2, WEAK_NSM }, | 165 | { 0x05C1, 0x05C2, WEAK_NSM }, |
| 173 | { 0x05C3, 0x0000, STRONG_R }, | 166 | { 0x05C3, 0x0000, STRONG_R }, |
| 174 | { 0x05C4, 0x0000, WEAK_NSM }, | 167 | { 0x05C4, 0x05C5, WEAK_NSM }, |
| 168 | { 0x05C6, 0x0000, STRONG_R }, | ||
| 169 | { 0x05C7, 0x0000, WEAK_NSM }, | ||
| 175 | { 0x05D0, 0x05F4, STRONG_R }, | 170 | { 0x05D0, 0x05F4, STRONG_R }, |
| 176 | { 0x060C, 0x0000, WEAK_CS }, | 171 | { 0x060C, 0x0000, WEAK_CS }, |
| 177 | { 0x061B, 0x064A, STRONG_AL }, | 172 | { 0x061B, 0x064A, STRONG_AL }, |
| @@ -400,18 +395,14 @@ bidi_initialize () | |||
| 400 | bidi_initialized = 1; | 395 | bidi_initialized = 1; |
| 401 | } | 396 | } |
| 402 | 397 | ||
| 403 | static int | ||
| 404 | bidi_is_arabic_number (int ch) | ||
| 405 | { | ||
| 406 | return 0; /* FIXME! */ | ||
| 407 | } | ||
| 408 | |||
| 409 | /* Return the bidi type of a character CH. */ | 398 | /* Return the bidi type of a character CH. */ |
| 410 | bidi_type_t | 399 | bidi_type_t |
| 411 | bidi_get_type (int ch) | 400 | bidi_get_type (int ch) |
| 412 | { | 401 | { |
| 413 | if (ch == BIDI_EOB) | 402 | if (ch == BIDI_EOB) |
| 414 | return NEUTRAL_B; | 403 | return NEUTRAL_B; |
| 404 | if (ch < 0 || ch > MAX_CHAR) | ||
| 405 | abort (); | ||
| 415 | return (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch)); | 406 | return (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch)); |
| 416 | } | 407 | } |
| 417 | 408 | ||
| @@ -457,6 +448,10 @@ bidi_get_category (bidi_type_t type) | |||
| 457 | } | 448 | } |
| 458 | } | 449 | } |
| 459 | 450 | ||
| 451 | /* Return the mirrored character of C, if any. | ||
| 452 | |||
| 453 | Note: The conditions in UAX#9 clause L4 must be tested by the | ||
| 454 | caller. */ | ||
| 460 | /* FIXME: exceedingly temporary! Should consult the Unicode database | 455 | /* FIXME: exceedingly temporary! Should consult the Unicode database |
| 461 | of character properties. */ | 456 | of character properties. */ |
| 462 | int | 457 | int |
| @@ -722,7 +717,7 @@ bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after) | |||
| 722 | that we find on the two sides of the level boundary (see UAX#9, | 717 | that we find on the two sides of the level boundary (see UAX#9, |
| 723 | clause X10), and so we don't need to know the final embedding | 718 | clause X10), and so we don't need to know the final embedding |
| 724 | level to which we descend after processing all the PDFs. */ | 719 | level to which we descend after processing all the PDFs. */ |
| 725 | if (level_before < level_after || !bidi_it->prev_was_pdf) | 720 | if (!bidi_it->prev_was_pdf || level_before < level_after) |
| 726 | /* FIXME: should the default sor direction be user selectable? */ | 721 | /* FIXME: should the default sor direction be user selectable? */ |
| 727 | bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R; | 722 | bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R; |
| 728 | if (level_before > level_after) | 723 | if (level_before > level_after) |
| @@ -742,8 +737,7 @@ bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after) | |||
| 742 | void | 737 | void |
| 743 | bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it) | 738 | bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it) |
| 744 | { | 739 | { |
| 745 | int pos = bidi_it->charpos, bytepos = bidi_it->bytepos; | 740 | int bytepos = bidi_it->bytepos; |
| 746 | int ch, ch_len; | ||
| 747 | 741 | ||
| 748 | /* We should never be called at EOB or before BEGV. */ | 742 | /* We should never be called at EOB or before BEGV. */ |
| 749 | if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE) | 743 | if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE) |
| @@ -756,20 +750,16 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it) | |||
| 756 | || FETCH_CHAR (bytepos - 1) == '\n')) | 750 | || FETCH_CHAR (bytepos - 1) == '\n')) |
| 757 | abort (); | 751 | abort (); |
| 758 | 752 | ||
| 759 | ch = FETCH_CHAR (bytepos); | ||
| 760 | ch_len = CHAR_BYTES (ch); | ||
| 761 | bidi_it->level_stack[0].level = 0; /* default for L2R */ | 753 | bidi_it->level_stack[0].level = 0; /* default for L2R */ |
| 762 | if (dir == R2L) | 754 | if (dir == R2L) |
| 763 | bidi_it->level_stack[0].level = 1; | 755 | bidi_it->level_stack[0].level = 1; |
| 764 | else if (dir == NEUTRAL_DIR) /* P2 */ | 756 | else if (dir == NEUTRAL_DIR) /* P2 */ |
| 765 | { | 757 | { |
| 766 | bidi_type_t type; | 758 | int ch = FETCH_CHAR (bytepos), ch_len = CHAR_BYTES (ch); |
| 767 | 759 | int pos = bidi_it->charpos; | |
| 768 | /* FIXME: should actually go to where the paragraph begins and | 760 | bidi_type_t type = bidi_get_type (ch); |
| 769 | start the loop below from there, since UAX#9 says to find the | ||
| 770 | first strong directional character in the paragraph. */ | ||
| 771 | 761 | ||
| 772 | for (type = bidi_get_type (ch), pos++, bytepos += ch_len; | 762 | for (pos++, bytepos += ch_len; |
| 773 | /* NOTE: UAX#9 says to search only for L, AL, or R types of | 763 | /* NOTE: UAX#9 says to search only for L, AL, or R types of |
| 774 | characters, and ignore RLE, RLO, LRE, and LRO. However, | 764 | characters, and ignore RLE, RLO, LRE, and LRO. However, |
| 775 | I'm not sure it makes sense to omit those 4; should try | 765 | I'm not sure it makes sense to omit those 4; should try |
| @@ -795,7 +785,8 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it) | |||
| 795 | bidi_it->new_paragraph = 0; | 785 | bidi_it->new_paragraph = 0; |
| 796 | bidi_it->next_en_pos = -1; | 786 | bidi_it->next_en_pos = -1; |
| 797 | bidi_it->next_for_ws.type = UNKNOWN_BT; | 787 | bidi_it->next_for_ws.type = UNKNOWN_BT; |
| 798 | bidi_set_sor_type (bidi_it, bidi_it->level_stack[0].level, 0); /* X10 */ | 788 | bidi_set_sor_type (bidi_it, bidi_overriding_paragraph_direction, |
| 789 | bidi_it->level_stack[0].level); /* X10 */ | ||
| 799 | 790 | ||
| 800 | bidi_cache_reset (); | 791 | bidi_cache_reset (); |
| 801 | } | 792 | } |
diff --git a/src/dispextern.h b/src/dispextern.h index 389d0acc23c..0e765aa667c 100644 --- a/src/dispextern.h +++ b/src/dispextern.h | |||
| @@ -2801,6 +2801,7 @@ extern EMACS_INT tool_bar_button_relief; | |||
| 2801 | extern void bidi_init_it P_ ((int, int, struct bidi_it *)); | 2801 | extern void bidi_init_it P_ ((int, int, struct bidi_it *)); |
| 2802 | extern void bidi_get_next_char_visually P_ ((struct bidi_it *)); | 2802 | extern void bidi_get_next_char_visually P_ ((struct bidi_it *)); |
| 2803 | extern void bidi_paragraph_init P_ ((bidi_dir_t, struct bidi_it *)); | 2803 | extern void bidi_paragraph_init P_ ((bidi_dir_t, struct bidi_it *)); |
| 2804 | extern int bidi_mirror_char P_ ((int)); | ||
| 2804 | 2805 | ||
| 2805 | /* Defined in xdisp.c */ | 2806 | /* Defined in xdisp.c */ |
| 2806 | 2807 | ||
diff --git a/src/xdisp.c b/src/xdisp.c index ae4a0305034..e77a197006d 100644 --- a/src/xdisp.c +++ b/src/xdisp.c | |||
| @@ -5682,6 +5682,13 @@ get_next_display_element (it) | |||
| 5682 | 5682 | ||
| 5683 | if (it->what == IT_CHARACTER) | 5683 | if (it->what == IT_CHARACTER) |
| 5684 | { | 5684 | { |
| 5685 | /* UAX#9, L4: "A character is depicted by a mirrored glyph if | ||
| 5686 | and only if (a) the resolved directionality of that character | ||
| 5687 | is R..." */ | ||
| 5688 | /* FIXME: Do we need an exception for characters from display | ||
| 5689 | tables? */ | ||
| 5690 | if (it->bidi_p && it->bidi_it.type == STRONG_R) | ||
| 5691 | it->c = bidi_mirror_char (it->c); | ||
| 5685 | /* Map via display table or translate control characters. | 5692 | /* Map via display table or translate control characters. |
| 5686 | IT->c, IT->len etc. have been set to the next character by | 5693 | IT->c, IT->len etc. have been set to the next character by |
| 5687 | the function call above. If we have a display table, and it | 5694 | the function call above. If we have a display table, and it |
| @@ -6505,17 +6512,46 @@ next_element_from_buffer (it) | |||
| 6505 | the character at IT_CHARPOS. */ | 6512 | the character at IT_CHARPOS. */ |
| 6506 | if (it->bidi_p && it->bidi_it.first_elt) | 6513 | if (it->bidi_p && it->bidi_it.first_elt) |
| 6507 | { | 6514 | { |
| 6508 | /* FIXME: L2R below is just for easyness of testing, as we | ||
| 6509 | currently support only left-to-right paragraphs. The value | ||
| 6510 | should be user-definable and/or come from some ``higher | ||
| 6511 | protocol''. In the absence of any other guidance, the default | ||
| 6512 | for this initialization should be NEUTRAL_DIR. */ | ||
| 6513 | it->bidi_it.charpos = IT_CHARPOS (*it); | 6515 | it->bidi_it.charpos = IT_CHARPOS (*it); |
| 6514 | it->bidi_it.bytepos = IT_BYTEPOS (*it); | 6516 | it->bidi_it.bytepos = IT_BYTEPOS (*it); |
| 6515 | bidi_paragraph_init (L2R, &it->bidi_it); | 6517 | /* If we are at the beginning of a line, we can produce the next |
| 6516 | bidi_get_next_char_visually (&it->bidi_it); | 6518 | element right away. */ |
| 6517 | it->bidi_it.first_elt = 0; | 6519 | if (it->bidi_it.bytepos == BEGV_BYTE |
| 6518 | /* Adjust IT's position information to where we moved. */ | 6520 | /* FIXME: Should support all Unicode line separators. */ |
| 6521 | || FETCH_CHAR (it->bidi_it.bytepos - 1) == '\n' | ||
| 6522 | || FETCH_CHAR (it->bidi_it.bytepos) == '\n') | ||
| 6523 | { | ||
| 6524 | /* FIXME: L2R below is just for easyness of testing, as we | ||
| 6525 | currently support only left-to-right paragraphs. The | ||
| 6526 | value should be user-definable and/or come from some | ||
| 6527 | ``higher protocol''. In the absence of any other | ||
| 6528 | guidance, the default for this initialization should be | ||
| 6529 | NEUTRAL_DIR. */ | ||
| 6530 | bidi_paragraph_init (L2R, &it->bidi_it); | ||
| 6531 | bidi_get_next_char_visually (&it->bidi_it); | ||
| 6532 | } | ||
| 6533 | else | ||
| 6534 | { | ||
| 6535 | int orig_bytepos = IT_BYTEPOS (*it); | ||
| 6536 | |||
| 6537 | /* We need to prime the bidi iterator starting at the line's | ||
| 6538 | beginning, before we will be able to produce the next | ||
| 6539 | element. */ | ||
| 6540 | IT_CHARPOS (*it) = find_next_newline_no_quit (IT_CHARPOS (*it), -1); | ||
| 6541 | IT_BYTEPOS (*it) = CHAR_TO_BYTE (IT_CHARPOS (*it)); | ||
| 6542 | it->bidi_it.charpos = IT_CHARPOS (*it); | ||
| 6543 | it->bidi_it.bytepos = IT_BYTEPOS (*it); | ||
| 6544 | bidi_paragraph_init (L2R, &it->bidi_it); | ||
| 6545 | do { | ||
| 6546 | /* Now return to buffer position where we were asked to | ||
| 6547 | get the next display element, and produce that. */ | ||
| 6548 | bidi_get_next_char_visually (&it->bidi_it); | ||
| 6549 | } while (it->bidi_it.bytepos != orig_bytepos | ||
| 6550 | && it->bidi_it.bytepos < ZV_BYTE); | ||
| 6551 | } | ||
| 6552 | |||
| 6553 | it->bidi_it.first_elt = 0; /* paranoia: bidi.c does this */ | ||
| 6554 | /* Adjust IT's position information to where we ended up. */ | ||
| 6519 | IT_CHARPOS (*it) = it->bidi_it.charpos; | 6555 | IT_CHARPOS (*it) = it->bidi_it.charpos; |
| 6520 | IT_BYTEPOS (*it) = it->bidi_it.bytepos; | 6556 | IT_BYTEPOS (*it) = it->bidi_it.bytepos; |
| 6521 | SET_TEXT_POS (it->position, IT_CHARPOS (*it), IT_BYTEPOS (*it)); | 6557 | SET_TEXT_POS (it->position, IT_CHARPOS (*it), IT_BYTEPOS (*it)); |