diff options
| -rw-r--r-- | src/text-index.c | 159 |
1 files changed, 74 insertions, 85 deletions
diff --git a/src/text-index.c b/src/text-index.c index 54cf9ea3b3b..ec83bfb1bd9 100644 --- a/src/text-index.c +++ b/src/text-index.c | |||
| @@ -91,7 +91,9 @@ struct text_index | |||
| 91 | 91 | ||
| 92 | enum | 92 | enum |
| 93 | { | 93 | { |
| 94 | /* Number of bytes in an interval. */ | 94 | /* Number of bytes in an interval. |
| 95 | Tradeoff between cost of the text-index array and cost of scanning | ||
| 96 | bytes between the positions recorded in the array. */ | ||
| 95 | TEXT_INDEX_INTERVAL = 4096, | 97 | TEXT_INDEX_INTERVAL = 4096, |
| 96 | 98 | ||
| 97 | /* Default capacity in number of intervals for text indices. */ | 99 | /* Default capacity in number of intervals for text indices. */ |
| @@ -116,7 +118,7 @@ static struct text_pos | |||
| 116 | beg_pos (const struct buffer *b) | 118 | beg_pos (const struct buffer *b) |
| 117 | { | 119 | { |
| 118 | return (struct text_pos) | 120 | return (struct text_pos) |
| 119 | {.charpos = BEG, .bytepos =BEG_BYTE}; | 121 | {.charpos = BEG, .bytepos = BEG_BYTE}; |
| 120 | } | 122 | } |
| 121 | 123 | ||
| 122 | static struct text_pos | 124 | static struct text_pos |
| @@ -138,8 +140,7 @@ pt_pos (const struct buffer *b) | |||
| 138 | necessary. */ | 140 | necessary. */ |
| 139 | 141 | ||
| 140 | static bool | 142 | static bool |
| 141 | is_close_enough_charpos (const struct text_index *ti, | 143 | is_close_enough_charpos (ptrdiff_t charpos, |
| 142 | ptrdiff_t charpos, | ||
| 143 | const struct text_pos pos) | 144 | const struct text_pos pos) |
| 144 | { | 145 | { |
| 145 | return eabs (charpos - pos.charpos) < TEXT_INDEX_INTERVAL / 4; | 146 | return eabs (charpos - pos.charpos) < TEXT_INDEX_INTERVAL / 4; |
| @@ -503,103 +504,77 @@ next_known_text_pos (struct buffer *b, ptrdiff_t entry) | |||
| 503 | } | 504 | } |
| 504 | 505 | ||
| 505 | /* Improve the known bytepos bounds *PREV and *NEXT if KNOWN is closer | 506 | /* Improve the known bytepos bounds *PREV and *NEXT if KNOWN is closer |
| 506 | to BYTEPOS. If KNOWN is an exact match for BYTEPOS return true. */ | 507 | to BYTEPOS. */ |
| 507 | 508 | ||
| 508 | static bool | 509 | static void |
| 509 | narrow_bytepos_bounds_1 (const struct text_pos known, struct text_pos *prev, | 510 | narrow_bytepos_bounds_1 (const struct text_pos known, struct text_pos *prev, |
| 510 | struct text_pos *next, const ptrdiff_t bytepos) | 511 | struct text_pos *next, const ptrdiff_t bytepos) |
| 511 | { | 512 | { |
| 512 | eassert (bytepos >= prev->bytepos && bytepos <= next->bytepos); | 513 | eassert (bytepos >= prev->bytepos && bytepos <= next->bytepos); |
| 513 | eassert (known.bytepos != TEXT_INDEX_INVALID_POSITION); | 514 | eassert (known.bytepos != TEXT_INDEX_INVALID_POSITION); |
| 514 | if (known.bytepos == bytepos) | ||
| 515 | return true; | ||
| 516 | 515 | ||
| 517 | /* If KNOWN is in (PREV, BYTEPOS] it is a better PREV. */ | 516 | /* If KNOWN is in (PREV, BYTEPOS] it is a better PREV. */ |
| 518 | if (known.bytepos < bytepos | 517 | if (known.bytepos <= bytepos |
| 519 | && known.bytepos > prev->bytepos) | 518 | && known.bytepos > prev->bytepos) |
| 520 | *prev = known; | 519 | *prev = known; |
| 521 | 520 | ||
| 522 | /* If KNOWN is in [BYTEPOS NEXT) it is a better NEXT. */ | 521 | /* If KNOWN is in [BYTEPOS NEXT) it is a better NEXT. */ |
| 523 | if (known.bytepos > bytepos | 522 | if (known.bytepos >= bytepos |
| 524 | && known.bytepos < next->bytepos) | 523 | && known.bytepos < next->bytepos) |
| 525 | *next = known; | 524 | *next = known; |
| 526 | |||
| 527 | return false; | ||
| 528 | } | 525 | } |
| 529 | 526 | ||
| 530 | /* Improve the known bytepos bounds *PREV and *NEXT of buffer B using | 527 | /* Improve the known bytepos bounds *PREV and *NEXT of buffer B using |
| 531 | known positions in B. BYTEPOS is a byte position to convert to a | 528 | known positions in B. BYTEPOS is a byte position to convert to a |
| 532 | character position. If an exact match for BYTEPOS is found, return | 529 | character position. */ |
| 533 | its charpos, otherwise return TEXT_INDEX_INVALID_POSITION. */ | ||
| 534 | 530 | ||
| 535 | static ptrdiff_t | 531 | static void |
| 536 | narrow_bytepos_bounds (struct buffer *b, struct text_pos *prev, | 532 | narrow_bytepos_bounds (struct buffer *b, struct text_pos *prev, |
| 537 | struct text_pos *next, const ptrdiff_t bytepos) | 533 | struct text_pos *next, const ptrdiff_t bytepos) |
| 538 | { | 534 | { |
| 539 | const struct text_pos pt = pt_pos (b); | 535 | narrow_bytepos_bounds_1 (pt_pos (b), prev, next, bytepos); |
| 540 | if (narrow_bytepos_bounds_1 (pt, prev, next, bytepos)) | 536 | narrow_bytepos_bounds_1 (gpt_pos (b), prev, next, bytepos); |
| 541 | return pt.charpos; | ||
| 542 | |||
| 543 | const struct text_pos gpt = gpt_pos (b); | ||
| 544 | if (narrow_bytepos_bounds_1 (gpt, prev, next, bytepos)) | ||
| 545 | return gpt.charpos; | ||
| 546 | 537 | ||
| 547 | struct text_index *ti = b->text->index; | 538 | struct text_index *ti = b->text->index; |
| 548 | if (is_cache_valid (ti) | 539 | if (is_cache_valid (ti)) |
| 549 | && narrow_bytepos_bounds_1 (ti->cache, prev, next, bytepos)) | 540 | narrow_bytepos_bounds_1 (ti->cache, prev, next, bytepos); |
| 550 | return ti->cache.charpos; | ||
| 551 | |||
| 552 | return TEXT_INDEX_INVALID_POSITION; | ||
| 553 | } | 541 | } |
| 554 | 542 | ||
| 555 | /* Improve the known bytepos bounds *PREV and *NEXT if KNOWN is closer | 543 | /* Improve the known bytepos bounds *PREV and *NEXT if KNOWN is closer |
| 556 | to BYTEPOS. If KNOWN is an exact match for BYTEPOS return true. */ | 544 | to BYTEPOS. */ |
| 557 | 545 | ||
| 558 | static bool | 546 | static void |
| 559 | narrow_charpos_bounds_1 (const struct text_pos known, struct text_pos *prev, | 547 | narrow_charpos_bounds_1 (const struct text_pos known, struct text_pos *prev, |
| 560 | struct text_pos *next, const ptrdiff_t charpos) | 548 | struct text_pos *next, const ptrdiff_t charpos) |
| 561 | { | 549 | { |
| 562 | eassert (charpos >= prev->charpos && charpos <= next->charpos); | 550 | eassert (charpos >= prev->charpos && charpos <= next->charpos); |
| 563 | eassert (known.charpos != TEXT_INDEX_INVALID_POSITION); | 551 | eassert (known.charpos != TEXT_INDEX_INVALID_POSITION); |
| 564 | if (known.charpos == charpos) | ||
| 565 | return true; | ||
| 566 | 552 | ||
| 567 | /* If KNOWN is in (PREV, BYTEPOS] it is a better PREV. */ | 553 | /* If KNOWN is in (PREV, BYTEPOS] it is a better PREV. */ |
| 568 | if (known.charpos < charpos | 554 | if (known.charpos <= charpos |
| 569 | && known.charpos > prev->charpos) | 555 | && known.charpos > prev->charpos) |
| 570 | *prev = known; | 556 | *prev = known; |
| 571 | 557 | ||
| 572 | /* If KNOWN is in [BYTEPOS NEXT) it is a better NEXT. */ | 558 | /* If KNOWN is in [BYTEPOS NEXT) it is a better NEXT. */ |
| 573 | if (known.charpos > charpos | 559 | if (known.charpos >= charpos |
| 574 | && known.charpos < next->charpos) | 560 | && known.charpos < next->charpos) |
| 575 | *next = known; | 561 | *next = known; |
| 576 | |||
| 577 | return false; | ||
| 578 | } | 562 | } |
| 579 | 563 | ||
| 580 | /* Improve the known bytepos bounds *PREV and *NEXT of buffer B using | 564 | /* Improve the known bytepos bounds *PREV and *NEXT of buffer B using |
| 581 | known positions in B. BYTEPOS is a byte position to convert to a | 565 | known positions in B. BYTEPOS is a byte position to convert to a |
| 582 | character position. If an exact match for BYTEPOS is found, return | 566 | character position. */ |
| 583 | its charpos, otherwise return TEXT_INDEX_INVALID_POSITION. */ | ||
| 584 | 567 | ||
| 585 | static ptrdiff_t | 568 | static void |
| 586 | narrow_charpos_bounds (struct buffer *b, struct text_pos *prev, | 569 | narrow_charpos_bounds (struct buffer *b, struct text_pos *prev, |
| 587 | struct text_pos *next, const ptrdiff_t charpos) | 570 | struct text_pos *next, const ptrdiff_t charpos) |
| 588 | { | 571 | { |
| 589 | const struct text_pos pt = pt_pos (b); | 572 | narrow_charpos_bounds_1 (pt_pos (b), prev, next, charpos); |
| 590 | if (narrow_charpos_bounds_1 (pt, prev, next, charpos)) | 573 | narrow_charpos_bounds_1 (gpt_pos (b), prev, next, charpos); |
| 591 | return pt.bytepos; | ||
| 592 | |||
| 593 | const struct text_pos gpt = gpt_pos (b); | ||
| 594 | if (narrow_charpos_bounds_1 (gpt, prev, next, charpos)) | ||
| 595 | return gpt.bytepos; | ||
| 596 | 574 | ||
| 597 | struct text_index *ti = b->text->index; | 575 | struct text_index *ti = b->text->index; |
| 598 | if (is_cache_valid (ti) | 576 | if (is_cache_valid (ti)) |
| 599 | && narrow_charpos_bounds_1 (ti->cache, prev, next, charpos)) | 577 | narrow_charpos_bounds_1 (ti->cache, prev, next, charpos); |
| 600 | return ti->cache.bytepos; | ||
| 601 | |||
| 602 | return TEXT_INDEX_INVALID_POSITION; | ||
| 603 | } | 578 | } |
| 604 | 579 | ||
| 605 | /* Return the character position in buffer B corresponding to | 580 | /* Return the character position in buffer B corresponding to |
| @@ -608,6 +583,7 @@ narrow_charpos_bounds (struct buffer *b, struct text_pos *prev, | |||
| 608 | ptrdiff_t | 583 | ptrdiff_t |
| 609 | buf_bytepos_to_charpos (struct buffer *b, const ptrdiff_t bytepos) | 584 | buf_bytepos_to_charpos (struct buffer *b, const ptrdiff_t bytepos) |
| 610 | { | 585 | { |
| 586 | /* FIXME: Can BYTEPOS ever be outside of BEGV_BYTE..ZV_BYTE? */ | ||
| 611 | /* If this buffer has as many characters as bytes, each character must | 587 | /* If this buffer has as many characters as bytes, each character must |
| 612 | be one byte. This takes care of the case where | 588 | be one byte. This takes care of the case where |
| 613 | enable-multibyte-characters is nil. */ | 589 | enable-multibyte-characters is nil. */ |
| @@ -615,30 +591,39 @@ buf_bytepos_to_charpos (struct buffer *b, const ptrdiff_t bytepos) | |||
| 615 | if (z.charpos == z.bytepos) | 591 | if (z.charpos == z.bytepos) |
| 616 | return bytepos; | 592 | return bytepos; |
| 617 | 593 | ||
| 618 | /* BYTEPOS == Z_BYTE, and BYTEPOS is an interval boundary, | 594 | /* Begin with the interval (BEG, Z), and improve on that by taking known |
| 619 | then BYTEPOS does not have an index entry because we don't want | 595 | positions into account like PT, GPT and the cache. This might |
| 620 | extra entries for (Z, Z_BYTE). Changing that would be possible | 596 | already find the answer. */ |
| 621 | but leads to more code than this if-statement, so it's probably | 597 | struct text_index *ti = ensure_has_index (b); |
| 622 | not worth it. */ | 598 | struct text_pos prev = beg_pos (b); |
| 623 | if (bytepos == z.bytepos) | 599 | struct text_pos next = z; |
| 624 | return z.charpos; | ||
| 625 | ensure_bytepos_indexed (b, bytepos); | ||
| 626 | 600 | ||
| 627 | struct text_index *ti = b->text->index; | 601 | narrow_bytepos_bounds (b, &prev, &next, bytepos); |
| 602 | |||
| 603 | /* Z_BYTE does not have an index entry because we don't want | ||
| 604 | extra entries for (Z, Z_BYTE), so short-circuit *before* looking | ||
| 605 | up the index. Changing that would be possible but leads to more | ||
| 606 | code than this if-statement, so it's probably not worth it. */ | ||
| 607 | if (next.bytepos == bytepos) | ||
| 608 | return next.charpos; | ||
| 609 | |||
| 610 | ensure_bytepos_indexed (b, bytepos); | ||
| 628 | const ptrdiff_t entry = index_bytepos_entry (ti, bytepos); | 611 | const ptrdiff_t entry = index_bytepos_entry (ti, bytepos); |
| 629 | struct text_pos prev = index_text_pos (ti, entry); | 612 | narrow_bytepos_bounds_1 (index_text_pos (ti, entry), &prev, &next, bytepos); |
| 630 | struct text_pos next = next_known_text_pos (b, entry); | 613 | narrow_bytepos_bounds_1 (next_known_text_pos (b, entry), |
| 614 | &prev, &next, bytepos); | ||
| 631 | 615 | ||
| 632 | ptrdiff_t charpos = narrow_bytepos_bounds (b, &prev, &next, bytepos); | 616 | if (next.charpos - prev.charpos == next.bytepos - prev.bytepos |
| 633 | if (charpos != TEXT_INDEX_INVALID_POSITION) | 617 | /* Beware: NEXT and PREV can be in the middle of multibyte chars! */ |
| 634 | return charpos; | 618 | && CHAR_HEAD_P (BUF_FETCH_BYTE (b, prev.bytepos))) |
| 619 | return prev.charpos + (bytepos - prev.bytepos); /* ASCII-only! */ | ||
| 635 | 620 | ||
| 636 | /* Scan forward if the distance to the previous known position is | 621 | /* Scan forward if the distance to the previous known position is |
| 637 | smaller than the distance to the next known position. */ | 622 | smaller than the distance to the next known position. */ |
| 638 | if (bytepos - prev.bytepos < next.bytepos - bytepos) | 623 | ptrdiff_t charpos |
| 639 | charpos = charpos_forward_to_bytepos (b, prev, bytepos); | 624 | = (bytepos - prev.bytepos < next.bytepos - bytepos) |
| 640 | else | 625 | ? charpos_forward_to_bytepos (b, prev, bytepos) |
| 641 | charpos = charpos_backward_to_bytepos (b, next, bytepos); | 626 | : charpos_backward_to_bytepos (b, next, bytepos); |
| 642 | 627 | ||
| 643 | cache (ti, charpos, bytepos); | 628 | cache (ti, charpos, bytepos); |
| 644 | return charpos; | 629 | return charpos; |
| @@ -650,6 +635,7 @@ buf_bytepos_to_charpos (struct buffer *b, const ptrdiff_t bytepos) | |||
| 650 | ptrdiff_t | 635 | ptrdiff_t |
| 651 | buf_charpos_to_bytepos (struct buffer *b, const ptrdiff_t charpos) | 636 | buf_charpos_to_bytepos (struct buffer *b, const ptrdiff_t charpos) |
| 652 | { | 637 | { |
| 638 | /* FIXME: Can CHARPOS ever be outside of BEGV..ZV? */ | ||
| 653 | /* If this buffer has as many characters as bytes, each character must | 639 | /* If this buffer has as many characters as bytes, each character must |
| 654 | be one byte. This takes care of the case where | 640 | be one byte. This takes care of the case where |
| 655 | enable-multibyte-characters is nil. */ | 641 | enable-multibyte-characters is nil. */ |
| @@ -657,25 +643,24 @@ buf_charpos_to_bytepos (struct buffer *b, const ptrdiff_t charpos) | |||
| 657 | if (z.charpos == z.bytepos) | 643 | if (z.charpos == z.bytepos) |
| 658 | return charpos; | 644 | return charpos; |
| 659 | 645 | ||
| 660 | if (charpos == z.charpos) | ||
| 661 | return z.bytepos; | ||
| 662 | ensure_charpos_indexed (b, charpos); | ||
| 663 | |||
| 664 | /* Begin with the interval (BEG, Z), and improve on that by taking known | 646 | /* Begin with the interval (BEG, Z), and improve on that by taking known |
| 665 | positions into account like PT, GPT and the cache. This might | 647 | positions into account like PT, GPT and the cache. This might |
| 666 | already find the bytepos. */ | 648 | already find the answer. */ |
| 667 | struct text_index *ti = ensure_has_index (b); | 649 | struct text_index *ti = ensure_has_index (b); |
| 668 | struct text_pos prev = beg_pos (b); | 650 | struct text_pos prev = beg_pos (b); |
| 669 | struct text_pos next = z; | 651 | struct text_pos next = z; |
| 670 | 652 | ||
| 671 | ptrdiff_t bytepos = narrow_charpos_bounds (b, &prev, &next, charpos); | 653 | narrow_charpos_bounds (b, &prev, &next, charpos); |
| 672 | if (bytepos != TEXT_INDEX_INVALID_POSITION) | 654 | |
| 673 | return bytepos; | 655 | if (next.charpos - prev.charpos == next.bytepos - prev.bytepos) |
| 656 | return prev.bytepos + (charpos - prev.charpos); /* ASCII-only! */ | ||
| 657 | else if (next.charpos == charpos) | ||
| 658 | return next.bytepos; | ||
| 674 | 659 | ||
| 675 | /* If one of the bounds is already good enough, avoid consulting | 660 | /* If one of the bounds is already good enough, avoid consulting |
| 676 | the index since that involves some overhead. */ | 661 | the index since that involves some overhead. */ |
| 677 | if (!is_close_enough_charpos (ti, charpos, prev) | 662 | if (!is_close_enough_charpos (charpos, prev) |
| 678 | && !is_close_enough_charpos (ti, charpos, next)) | 663 | && !is_close_enough_charpos (charpos, next)) |
| 679 | { | 664 | { |
| 680 | ensure_charpos_indexed (b, charpos); | 665 | ensure_charpos_indexed (b, charpos); |
| 681 | const ptrdiff_t entry = index_charpos_entry (ti, charpos); | 666 | const ptrdiff_t entry = index_charpos_entry (ti, charpos); |
| @@ -683,19 +668,23 @@ buf_charpos_to_bytepos (struct buffer *b, const ptrdiff_t charpos) | |||
| 683 | narrow_charpos_bounds_1 (index_prev, &prev, &next, charpos); | 668 | narrow_charpos_bounds_1 (index_prev, &prev, &next, charpos); |
| 684 | const struct text_pos index_next = next_known_text_pos (b, entry); | 669 | const struct text_pos index_next = next_known_text_pos (b, entry); |
| 685 | narrow_charpos_bounds_1 (index_next, &prev, &next, charpos); | 670 | narrow_charpos_bounds_1 (index_next, &prev, &next, charpos); |
| 686 | } | ||
| 687 | |||
| 688 | 671 | ||
| 672 | if (next.charpos - prev.charpos == next.bytepos - prev.bytepos | ||
| 673 | /* Beware: NEXT and PREV can be in the middle of multibyte chars! */ | ||
| 674 | && CHAR_HEAD_P (BUF_FETCH_BYTE (b, prev.bytepos)) | ||
| 675 | && CHAR_HEAD_P (BUF_FETCH_BYTE (b, next.bytepos - 1))) | ||
| 676 | return prev.bytepos + (charpos - prev.charpos); /* ASCII-only! */ | ||
| 677 | } | ||
| 689 | 678 | ||
| 690 | /* Don't scan forward if CHARPOS is exactly on the previous know | 679 | /* Don't scan forward if CHARPOS is exactly on the previous know |
| 691 | position because the index bytepos can be in the middle of a | 680 | position because the index bytepos can be in the middle of a |
| 692 | character, which is found by scanning backwards. Otherwise, scan | 681 | character, which is found by scanning backwards. Otherwise, scan |
| 693 | forward if we believe that's less expensive. */ | 682 | forward if we believe that's less expensive. */ |
| 694 | if (charpos > prev.charpos | 683 | ptrdiff_t bytepos |
| 695 | && charpos - prev.charpos < next.charpos - charpos) | 684 | = (charpos > prev.charpos |
| 696 | bytepos = bytepos_forward_to_charpos (b, prev, charpos); | 685 | && charpos - prev.charpos < next.charpos - charpos) |
| 697 | else | 686 | ? bytepos_forward_to_charpos (b, prev, charpos) |
| 698 | bytepos = bytepos_backward_to_charpos (b, next, charpos); | 687 | : bytepos_backward_to_charpos (b, next, charpos); |
| 699 | 688 | ||
| 700 | cache (ti, charpos, bytepos); | 689 | cache (ti, charpos, bytepos); |
| 701 | return bytepos; | 690 | return bytepos; |