aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/text-index.c159
1 files changed, 74 insertions, 85 deletions
diff --git a/src/text-index.c b/src/text-index.c
index 54cf9ea3b3b..ec83bfb1bd9 100644
--- a/src/text-index.c
+++ b/src/text-index.c
@@ -91,7 +91,9 @@ struct text_index
91 91
92enum 92enum
93{ 93{
94 /* Number of bytes in an interval. */ 94 /* Number of bytes in an interval.
95 Tradeoff between cost of the text-index array and cost of scanning
96 bytes between the positions recorded in the array. */
95 TEXT_INDEX_INTERVAL = 4096, 97 TEXT_INDEX_INTERVAL = 4096,
96 98
97 /* Default capacity in number of intervals for text indices. */ 99 /* Default capacity in number of intervals for text indices. */
@@ -116,7 +118,7 @@ static struct text_pos
116beg_pos (const struct buffer *b) 118beg_pos (const struct buffer *b)
117{ 119{
118 return (struct text_pos) 120 return (struct text_pos)
119 {.charpos = BEG, .bytepos =BEG_BYTE}; 121 {.charpos = BEG, .bytepos = BEG_BYTE};
120} 122}
121 123
122static struct text_pos 124static struct text_pos
@@ -138,8 +140,7 @@ pt_pos (const struct buffer *b)
138 necessary. */ 140 necessary. */
139 141
140static bool 142static bool
141is_close_enough_charpos (const struct text_index *ti, 143is_close_enough_charpos (ptrdiff_t charpos,
142 ptrdiff_t charpos,
143 const struct text_pos pos) 144 const struct text_pos pos)
144{ 145{
145 return eabs (charpos - pos.charpos) < TEXT_INDEX_INTERVAL / 4; 146 return eabs (charpos - pos.charpos) < TEXT_INDEX_INTERVAL / 4;
@@ -503,103 +504,77 @@ next_known_text_pos (struct buffer *b, ptrdiff_t entry)
503} 504}
504 505
505/* Improve the known bytepos bounds *PREV and *NEXT if KNOWN is closer 506/* Improve the known bytepos bounds *PREV and *NEXT if KNOWN is closer
506 to BYTEPOS. If KNOWN is an exact match for BYTEPOS return true. */ 507 to BYTEPOS. */
507 508
508static bool 509static void
509narrow_bytepos_bounds_1 (const struct text_pos known, struct text_pos *prev, 510narrow_bytepos_bounds_1 (const struct text_pos known, struct text_pos *prev,
510 struct text_pos *next, const ptrdiff_t bytepos) 511 struct text_pos *next, const ptrdiff_t bytepos)
511{ 512{
512 eassert (bytepos >= prev->bytepos && bytepos <= next->bytepos); 513 eassert (bytepos >= prev->bytepos && bytepos <= next->bytepos);
513 eassert (known.bytepos != TEXT_INDEX_INVALID_POSITION); 514 eassert (known.bytepos != TEXT_INDEX_INVALID_POSITION);
514 if (known.bytepos == bytepos)
515 return true;
516 515
517 /* If KNOWN is in (PREV, BYTEPOS] it is a better PREV. */ 516 /* If KNOWN is in (PREV, BYTEPOS] it is a better PREV. */
518 if (known.bytepos < bytepos 517 if (known.bytepos <= bytepos
519 && known.bytepos > prev->bytepos) 518 && known.bytepos > prev->bytepos)
520 *prev = known; 519 *prev = known;
521 520
522 /* If KNOWN is in [BYTEPOS NEXT) it is a better NEXT. */ 521 /* If KNOWN is in [BYTEPOS NEXT) it is a better NEXT. */
523 if (known.bytepos > bytepos 522 if (known.bytepos >= bytepos
524 && known.bytepos < next->bytepos) 523 && known.bytepos < next->bytepos)
525 *next = known; 524 *next = known;
526
527 return false;
528} 525}
529 526
530/* Improve the known bytepos bounds *PREV and *NEXT of buffer B using 527/* Improve the known bytepos bounds *PREV and *NEXT of buffer B using
531 known positions in B. BYTEPOS is a byte position to convert to a 528 known positions in B. BYTEPOS is a byte position to convert to a
532 character position. If an exact match for BYTEPOS is found, return 529 character position. */
533 its charpos, otherwise return TEXT_INDEX_INVALID_POSITION. */
534 530
535static ptrdiff_t 531static void
536narrow_bytepos_bounds (struct buffer *b, struct text_pos *prev, 532narrow_bytepos_bounds (struct buffer *b, struct text_pos *prev,
537 struct text_pos *next, const ptrdiff_t bytepos) 533 struct text_pos *next, const ptrdiff_t bytepos)
538{ 534{
539 const struct text_pos pt = pt_pos (b); 535 narrow_bytepos_bounds_1 (pt_pos (b), prev, next, bytepos);
540 if (narrow_bytepos_bounds_1 (pt, prev, next, bytepos)) 536 narrow_bytepos_bounds_1 (gpt_pos (b), prev, next, bytepos);
541 return pt.charpos;
542
543 const struct text_pos gpt = gpt_pos (b);
544 if (narrow_bytepos_bounds_1 (gpt, prev, next, bytepos))
545 return gpt.charpos;
546 537
547 struct text_index *ti = b->text->index; 538 struct text_index *ti = b->text->index;
548 if (is_cache_valid (ti) 539 if (is_cache_valid (ti))
549 && narrow_bytepos_bounds_1 (ti->cache, prev, next, bytepos)) 540 narrow_bytepos_bounds_1 (ti->cache, prev, next, bytepos);
550 return ti->cache.charpos;
551
552 return TEXT_INDEX_INVALID_POSITION;
553} 541}
554 542
555/* Improve the known bytepos bounds *PREV and *NEXT if KNOWN is closer 543/* Improve the known bytepos bounds *PREV and *NEXT if KNOWN is closer
556 to BYTEPOS. If KNOWN is an exact match for BYTEPOS return true. */ 544 to BYTEPOS. */
557 545
558static bool 546static void
559narrow_charpos_bounds_1 (const struct text_pos known, struct text_pos *prev, 547narrow_charpos_bounds_1 (const struct text_pos known, struct text_pos *prev,
560 struct text_pos *next, const ptrdiff_t charpos) 548 struct text_pos *next, const ptrdiff_t charpos)
561{ 549{
562 eassert (charpos >= prev->charpos && charpos <= next->charpos); 550 eassert (charpos >= prev->charpos && charpos <= next->charpos);
563 eassert (known.charpos != TEXT_INDEX_INVALID_POSITION); 551 eassert (known.charpos != TEXT_INDEX_INVALID_POSITION);
564 if (known.charpos == charpos)
565 return true;
566 552
567 /* If KNOWN is in (PREV, BYTEPOS] it is a better PREV. */ 553 /* If KNOWN is in (PREV, BYTEPOS] it is a better PREV. */
568 if (known.charpos < charpos 554 if (known.charpos <= charpos
569 && known.charpos > prev->charpos) 555 && known.charpos > prev->charpos)
570 *prev = known; 556 *prev = known;
571 557
572 /* If KNOWN is in [BYTEPOS NEXT) it is a better NEXT. */ 558 /* If KNOWN is in [BYTEPOS NEXT) it is a better NEXT. */
573 if (known.charpos > charpos 559 if (known.charpos >= charpos
574 && known.charpos < next->charpos) 560 && known.charpos < next->charpos)
575 *next = known; 561 *next = known;
576
577 return false;
578} 562}
579 563
580/* Improve the known bytepos bounds *PREV and *NEXT of buffer B using 564/* Improve the known bytepos bounds *PREV and *NEXT of buffer B using
581 known positions in B. BYTEPOS is a byte position to convert to a 565 known positions in B. BYTEPOS is a byte position to convert to a
582 character position. If an exact match for BYTEPOS is found, return 566 character position. */
583 its charpos, otherwise return TEXT_INDEX_INVALID_POSITION. */
584 567
585static ptrdiff_t 568static void
586narrow_charpos_bounds (struct buffer *b, struct text_pos *prev, 569narrow_charpos_bounds (struct buffer *b, struct text_pos *prev,
587 struct text_pos *next, const ptrdiff_t charpos) 570 struct text_pos *next, const ptrdiff_t charpos)
588{ 571{
589 const struct text_pos pt = pt_pos (b); 572 narrow_charpos_bounds_1 (pt_pos (b), prev, next, charpos);
590 if (narrow_charpos_bounds_1 (pt, prev, next, charpos)) 573 narrow_charpos_bounds_1 (gpt_pos (b), prev, next, charpos);
591 return pt.bytepos;
592
593 const struct text_pos gpt = gpt_pos (b);
594 if (narrow_charpos_bounds_1 (gpt, prev, next, charpos))
595 return gpt.bytepos;
596 574
597 struct text_index *ti = b->text->index; 575 struct text_index *ti = b->text->index;
598 if (is_cache_valid (ti) 576 if (is_cache_valid (ti))
599 && narrow_charpos_bounds_1 (ti->cache, prev, next, charpos)) 577 narrow_charpos_bounds_1 (ti->cache, prev, next, charpos);
600 return ti->cache.bytepos;
601
602 return TEXT_INDEX_INVALID_POSITION;
603} 578}
604 579
605/* Return the character position in buffer B corresponding to 580/* Return the character position in buffer B corresponding to
@@ -608,6 +583,7 @@ narrow_charpos_bounds (struct buffer *b, struct text_pos *prev,
608ptrdiff_t 583ptrdiff_t
609buf_bytepos_to_charpos (struct buffer *b, const ptrdiff_t bytepos) 584buf_bytepos_to_charpos (struct buffer *b, const ptrdiff_t bytepos)
610{ 585{
586 /* FIXME: Can BYTEPOS ever be outside of BEGV_BYTE..ZV_BYTE? */
611 /* If this buffer has as many characters as bytes, each character must 587 /* If this buffer has as many characters as bytes, each character must
612 be one byte. This takes care of the case where 588 be one byte. This takes care of the case where
613 enable-multibyte-characters is nil. */ 589 enable-multibyte-characters is nil. */
@@ -615,30 +591,39 @@ buf_bytepos_to_charpos (struct buffer *b, const ptrdiff_t bytepos)
615 if (z.charpos == z.bytepos) 591 if (z.charpos == z.bytepos)
616 return bytepos; 592 return bytepos;
617 593
618 /* BYTEPOS == Z_BYTE, and BYTEPOS is an interval boundary, 594 /* Begin with the interval (BEG, Z), and improve on that by taking known
619 then BYTEPOS does not have an index entry because we don't want 595 positions into account like PT, GPT and the cache. This might
620 extra entries for (Z, Z_BYTE). Changing that would be possible 596 already find the answer. */
621 but leads to more code than this if-statement, so it's probably 597 struct text_index *ti = ensure_has_index (b);
622 not worth it. */ 598 struct text_pos prev = beg_pos (b);
623 if (bytepos == z.bytepos) 599 struct text_pos next = z;
624 return z.charpos;
625 ensure_bytepos_indexed (b, bytepos);
626 600
627 struct text_index *ti = b->text->index; 601 narrow_bytepos_bounds (b, &prev, &next, bytepos);
602
603 /* Z_BYTE does not have an index entry because we don't want
604 extra entries for (Z, Z_BYTE), so short-circuit *before* looking
605 up the index. Changing that would be possible but leads to more
606 code than this if-statement, so it's probably not worth it. */
607 if (next.bytepos == bytepos)
608 return next.charpos;
609
610 ensure_bytepos_indexed (b, bytepos);
628 const ptrdiff_t entry = index_bytepos_entry (ti, bytepos); 611 const ptrdiff_t entry = index_bytepos_entry (ti, bytepos);
629 struct text_pos prev = index_text_pos (ti, entry); 612 narrow_bytepos_bounds_1 (index_text_pos (ti, entry), &prev, &next, bytepos);
630 struct text_pos next = next_known_text_pos (b, entry); 613 narrow_bytepos_bounds_1 (next_known_text_pos (b, entry),
614 &prev, &next, bytepos);
631 615
632 ptrdiff_t charpos = narrow_bytepos_bounds (b, &prev, &next, bytepos); 616 if (next.charpos - prev.charpos == next.bytepos - prev.bytepos
633 if (charpos != TEXT_INDEX_INVALID_POSITION) 617 /* Beware: NEXT and PREV can be in the middle of multibyte chars! */
634 return charpos; 618 && CHAR_HEAD_P (BUF_FETCH_BYTE (b, prev.bytepos)))
619 return prev.charpos + (bytepos - prev.bytepos); /* ASCII-only! */
635 620
636 /* Scan forward if the distance to the previous known position is 621 /* Scan forward if the distance to the previous known position is
637 smaller than the distance to the next known position. */ 622 smaller than the distance to the next known position. */
638 if (bytepos - prev.bytepos < next.bytepos - bytepos) 623 ptrdiff_t charpos
639 charpos = charpos_forward_to_bytepos (b, prev, bytepos); 624 = (bytepos - prev.bytepos < next.bytepos - bytepos)
640 else 625 ? charpos_forward_to_bytepos (b, prev, bytepos)
641 charpos = charpos_backward_to_bytepos (b, next, bytepos); 626 : charpos_backward_to_bytepos (b, next, bytepos);
642 627
643 cache (ti, charpos, bytepos); 628 cache (ti, charpos, bytepos);
644 return charpos; 629 return charpos;
@@ -650,6 +635,7 @@ buf_bytepos_to_charpos (struct buffer *b, const ptrdiff_t bytepos)
650ptrdiff_t 635ptrdiff_t
651buf_charpos_to_bytepos (struct buffer *b, const ptrdiff_t charpos) 636buf_charpos_to_bytepos (struct buffer *b, const ptrdiff_t charpos)
652{ 637{
638 /* FIXME: Can CHARPOS ever be outside of BEGV..ZV? */
653 /* If this buffer has as many characters as bytes, each character must 639 /* If this buffer has as many characters as bytes, each character must
654 be one byte. This takes care of the case where 640 be one byte. This takes care of the case where
655 enable-multibyte-characters is nil. */ 641 enable-multibyte-characters is nil. */
@@ -657,25 +643,24 @@ buf_charpos_to_bytepos (struct buffer *b, const ptrdiff_t charpos)
657 if (z.charpos == z.bytepos) 643 if (z.charpos == z.bytepos)
658 return charpos; 644 return charpos;
659 645
660 if (charpos == z.charpos)
661 return z.bytepos;
662 ensure_charpos_indexed (b, charpos);
663
664 /* Begin with the interval (BEG, Z), and improve on that by taking known 646 /* Begin with the interval (BEG, Z), and improve on that by taking known
665 positions into account like PT, GPT and the cache. This might 647 positions into account like PT, GPT and the cache. This might
666 already find the bytepos. */ 648 already find the answer. */
667 struct text_index *ti = ensure_has_index (b); 649 struct text_index *ti = ensure_has_index (b);
668 struct text_pos prev = beg_pos (b); 650 struct text_pos prev = beg_pos (b);
669 struct text_pos next = z; 651 struct text_pos next = z;
670 652
671 ptrdiff_t bytepos = narrow_charpos_bounds (b, &prev, &next, charpos); 653 narrow_charpos_bounds (b, &prev, &next, charpos);
672 if (bytepos != TEXT_INDEX_INVALID_POSITION) 654
673 return bytepos; 655 if (next.charpos - prev.charpos == next.bytepos - prev.bytepos)
656 return prev.bytepos + (charpos - prev.charpos); /* ASCII-only! */
657 else if (next.charpos == charpos)
658 return next.bytepos;
674 659
675 /* If one of the bounds is already good enough, avoid consulting 660 /* If one of the bounds is already good enough, avoid consulting
676 the index since that involves some overhead. */ 661 the index since that involves some overhead. */
677 if (!is_close_enough_charpos (ti, charpos, prev) 662 if (!is_close_enough_charpos (charpos, prev)
678 && !is_close_enough_charpos (ti, charpos, next)) 663 && !is_close_enough_charpos (charpos, next))
679 { 664 {
680 ensure_charpos_indexed (b, charpos); 665 ensure_charpos_indexed (b, charpos);
681 const ptrdiff_t entry = index_charpos_entry (ti, charpos); 666 const ptrdiff_t entry = index_charpos_entry (ti, charpos);
@@ -683,19 +668,23 @@ buf_charpos_to_bytepos (struct buffer *b, const ptrdiff_t charpos)
683 narrow_charpos_bounds_1 (index_prev, &prev, &next, charpos); 668 narrow_charpos_bounds_1 (index_prev, &prev, &next, charpos);
684 const struct text_pos index_next = next_known_text_pos (b, entry); 669 const struct text_pos index_next = next_known_text_pos (b, entry);
685 narrow_charpos_bounds_1 (index_next, &prev, &next, charpos); 670 narrow_charpos_bounds_1 (index_next, &prev, &next, charpos);
686 }
687
688 671
672 if (next.charpos - prev.charpos == next.bytepos - prev.bytepos
673 /* Beware: NEXT and PREV can be in the middle of multibyte chars! */
674 && CHAR_HEAD_P (BUF_FETCH_BYTE (b, prev.bytepos))
675 && CHAR_HEAD_P (BUF_FETCH_BYTE (b, next.bytepos - 1)))
676 return prev.bytepos + (charpos - prev.charpos); /* ASCII-only! */
677 }
689 678
690 /* Don't scan forward if CHARPOS is exactly on the previous know 679 /* Don't scan forward if CHARPOS is exactly on the previous know
691 position because the index bytepos can be in the middle of a 680 position because the index bytepos can be in the middle of a
692 character, which is found by scanning backwards. Otherwise, scan 681 character, which is found by scanning backwards. Otherwise, scan
693 forward if we believe that's less expensive. */ 682 forward if we believe that's less expensive. */
694 if (charpos > prev.charpos 683 ptrdiff_t bytepos
695 && charpos - prev.charpos < next.charpos - charpos) 684 = (charpos > prev.charpos
696 bytepos = bytepos_forward_to_charpos (b, prev, charpos); 685 && charpos - prev.charpos < next.charpos - charpos)
697 else 686 ? bytepos_forward_to_charpos (b, prev, charpos)
698 bytepos = bytepos_backward_to_charpos (b, next, charpos); 687 : bytepos_backward_to_charpos (b, next, charpos);
699 688
700 cache (ti, charpos, bytepos); 689 cache (ti, charpos, bytepos);
701 return bytepos; 690 return bytepos;