aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorYuan Fu2025-03-18 17:26:26 -0700
committerYuan Fu2025-05-03 22:14:03 -0700
commit1897da0b599cc3ea1e4aa626e47ac8943a7b6833 (patch)
treefef9034fb01a0883e5874923ad756b554b49cffd /src
parent159e3a981ed5482393182b036e38818d42405c90 (diff)
downloademacs-1897da0b599cc3ea1e4aa626e47ac8943a7b6833.tar.gz
emacs-1897da0b599cc3ea1e4aa626e47ac8943a7b6833.zip
Add line-column tracking for tree-sitter
Add line-column tracking for tree-sitter parsers. Copied from comments in treesit.c: Technically we had to send tree-sitter the line and column position of each edit. But in practice we just send it dummy values, because tree-sitter doesn't use it for parsing and mostly just carries the line and column positions around and return it when e.g. reporting node positions[1]. This has been working fine until we encountered grammars that actually utilizes the line and column information for parsing (Haskell)[2]. [1] https://github.com/tree-sitter/tree-sitter/issues/445 [2] https://github.com/tree-sitter/tree-sitter/issues/4001 So now we have to keep track of line and column positions and pass valid values to tree-sitter. (It adds quite some complexity, but only linearly; one can ignore all the linecol stuff when trying to understand treesit code and then come back to it later.) Eli convinced me to disable tracking by default, and only enable it for languages that needs it. So the buffer starts out not tracking linecol. And when a parser is created, if the language is in treesit-languages-require-line-column-tracking, we enable tracking in the buffer, and enable tracking for the parser. To simplify things, once a buffer starts tracking linecol, it never disables tracking, even if parsers that need tracking are all deleted; and for parsers, tracking is determined at creation time, if it starts out tracking/non-tracking, it stays that way, regardless of later changes to treesit-languages-require-line-column-tracking. To make calculating line/column positons fast, we store linecol caches for begv, point, and zv in the buffer (buf->ts_linecol_cache_xxx); and in the parser object, we store linecol cache for visible beg/end of that parser. In buffer editing functions, we need the linecol for start/old_end/new_end, those can be calculated by scanning newlines (treesit_linecol_of_pos) from the buffer point cache, which should be always near the point. And we usually set the calculated linecol of new_end back to the buffer point cache. We also need to calculate linecol for the visible_beg/end for each parser, and linecol for the buffer's begv/zv, these positions are usually far from point, so we have caches for all of them (in either the parser object or the buffer). These positions are far from point, so it's inefficient to scan newlines from point to there to get up-to-date linecol for them; but in the same time, because they're far and outside the changed region, we can calculate their change in line and column number by simply counting how much newlines are added/removed in the changed region (compute_new_linecol_by_change). * doc/lispref/parsing.texi (Using Parser): Mention line-column tracking in manual. * etc/NEWS: Add news. * lisp/treesit.el: (treesit-languages-need-line-column-tracking): New variable. * src/buffer.c: Include treesit.h (for TREESIT_EMPTY_LINECOL). (Fget_buffer_create): (Fmake_indirect_buffer): Initialize new buffer fields. (Fbuffer_swap_text): Add new buffer fields. * src/buffer.h (ts_linecol): New struct. (buffer): New buffer fields. (BUF_TS_LINECOL_BEGV): (BUF_TS_LINECOL_POINT): (BUF_TS_LINECOL_ZV): (SET_BUF_TS_LINECOL_BEGV): (SET_BUF_TS_LINECOL_POINT): (SET_BUF_TS_LINECOL_ZV): New inline functions. * src/casefiddle.c (casify_region): Record linecol info. * src/editfns.c (Fsubst_char_in_region): (Ftranslate_region_internal): (Ftranspose_regions): Record linecol info. * src/insdel.c (insert_1_both): (insert_from_string_1): (insert_from_gap_1): (insert_from_buffer): (replace_range): (del_range_2): Record linecol info. * src/treesit.c (TREESIT_BOB_LINECOL): (TREESIT_EMPTY_LINECOL): (TREESIT_TS_POINT_1_0): New constants. (treesit_debug_print_linecol): (treesit_buf_tracks_linecol_p): (restore_restriction_and_selective_display): (treesit_count_lines): (treesit_debug_validate_linecol): (treesit_linecol_of_pos): (treesit_make_ts_point): (Ftreesit_tracking_line_column_p): (Ftreesit_parser_tracking_line_column_p): New functions. (treesit_tree_edit_1): Accept real TSPoint and pass to tree-sitter. (compute_new_linecol_by_change): New function. (treesit_record_change_1): Rename from treesit_record_change, handle linecol if tracking is enabled. (treesit_linecol_maybe): New function. (treesit_record_change): New wrapper around treesit_record_change_1 that handles some boilerplate and sets buffer state. (treesit_sync_visible_region): Handle linecol if tracking is enabled. (make_treesit_parser): Setup parser's linecol cache if tracking is enabled. (Ftreesit_parser_create): Enable tracking if the parser's language requires it. (Ftreesit__linecol_at): (Ftreesit__linecol_cache_set): (Ftreesit__linecol_cache): New functions for debugging and testing. (syms_of_treesit): New variable Vtreesit_languages_require_line_column_tracking. * src/treesit.h (Lisp_TS_Parser): New fields. (TREESIT_BOB_LINECOL): (TREESIT_EMPTY_LINECOL): New constants. * test/src/treesit-tests.el (treesit-linecol-basic): (treesit-linecol-search-back-across-newline): (treesit-linecol-col-same-line): (treesit-linecol-enable-disable): New tests. * src/lisp.h: Declare display_count_lines. * src/xdisp.c (display_count_lines): Remove static keyword.
Diffstat (limited to 'src')
-rw-r--r--src/buffer.c25
-rw-r--r--src/buffer.h72
-rw-r--r--src/casefiddle.c13
-rw-r--r--src/editfns.c43
-rw-r--r--src/insdel.c63
-rw-r--r--src/lisp.h4
-rw-r--r--src/treesit.c784
-rw-r--r--src/treesit.h25
-rw-r--r--src/xdisp.c4
9 files changed, 965 insertions, 68 deletions
diff --git a/src/buffer.c b/src/buffer.c
index a408b799ff4..53aa3163fe0 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -48,6 +48,10 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
48#include "w32heap.h" /* for mmap_* */ 48#include "w32heap.h" /* for mmap_* */
49#endif 49#endif
50 50
51#ifdef HAVE_TREE_SITTER
52#include "treesit.h"
53#endif
54
51/* Work around GCC bug 109847 55/* Work around GCC bug 109847
52 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109847 56 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109847
53 which causes GCC to mistakenly complain about 57 which causes GCC to mistakenly complain about
@@ -641,6 +645,13 @@ even if it is dead. The return value is never nil. */)
641 bset_width_table (b, Qnil); 645 bset_width_table (b, Qnil);
642 b->prevent_redisplay_optimizations_p = 1; 646 b->prevent_redisplay_optimizations_p = 1;
643 647
648#ifdef HAVE_TREE_SITTER
649 /* By default, use empty linecol, which means disable tracking. */
650 SET_BUF_TS_LINECOL_BEGV (b, TREESIT_EMPTY_LINECOL);
651 SET_BUF_TS_LINECOL_POINT (b, TREESIT_EMPTY_LINECOL);
652 SET_BUF_TS_LINECOL_ZV (b, TREESIT_EMPTY_LINECOL);
653#endif
654
644 /* An ordinary buffer normally doesn't need markers 655 /* An ordinary buffer normally doesn't need markers
645 to handle BEGV and ZV. */ 656 to handle BEGV and ZV. */
646 bset_pt_marker (b, Qnil); 657 bset_pt_marker (b, Qnil);
@@ -867,6 +878,13 @@ Interactively, CLONE and INHIBIT-BUFFER-HOOKS are nil. */)
867 b->bidi_paragraph_cache = 0; 878 b->bidi_paragraph_cache = 0;
868 bset_width_table (b, Qnil); 879 bset_width_table (b, Qnil);
869 880
881#ifdef HAVE_TREE_SITTER
882 /* By default, use empty linecol, which means disable tracking. */
883 SET_BUF_TS_LINECOL_BEGV (b, TREESIT_EMPTY_LINECOL);
884 SET_BUF_TS_LINECOL_POINT (b, TREESIT_EMPTY_LINECOL);
885 SET_BUF_TS_LINECOL_ZV (b, TREESIT_EMPTY_LINECOL);
886#endif
887
870 name = Fcopy_sequence (name); 888 name = Fcopy_sequence (name);
871 set_string_intervals (name, NULL); 889 set_string_intervals (name, NULL);
872 bset_name (b, name); 890 bset_name (b, name);
@@ -2618,6 +2636,13 @@ results, see Info node `(elisp)Swapping Text'. */)
2618 bset_point_before_scroll (current_buffer, Qnil); 2636 bset_point_before_scroll (current_buffer, Qnil);
2619 bset_point_before_scroll (other_buffer, Qnil); 2637 bset_point_before_scroll (other_buffer, Qnil);
2620 2638
2639#ifdef HAVE_TREE_SITTER
2640 swapfield_ (ts_parser_list, Lisp_Object);
2641 swapfield (ts_linecol_begv, struct ts_linecol);
2642 swapfield (ts_linecol_point, struct ts_linecol);
2643 swapfield (ts_linecol_zv, struct ts_linecol);
2644#endif
2645
2621 modiff_incr (&current_buffer->text->modiff, 1); 2646 modiff_incr (&current_buffer->text->modiff, 1);
2622 modiff_incr (&other_buffer->text->modiff, 1); 2647 modiff_incr (&other_buffer->text->modiff, 1);
2623 modiff_incr (&current_buffer->text->chars_modiff, 1); 2648 modiff_incr (&current_buffer->text->chars_modiff, 1);
diff --git a/src/buffer.h b/src/buffer.h
index d19ff22babd..26a334ea810 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -220,6 +220,20 @@ extern ptrdiff_t advance_to_char_boundary (ptrdiff_t byte_pos);
220 220
221/* Define the actual buffer data structures. */ 221/* Define the actual buffer data structures. */
222 222
223/* This data structure stores the cache of a position and its line and
224 column number. The column number is counted in bytes. The line
225 number and column number don't respect narrowing. */
226struct ts_linecol
227{
228 /* The byte position. */
229 ptrdiff_t bytepos;
230 /* The line number of this position. */
231 ptrdiff_t line;
232 /* The column number (in bytes) of this position (0-based). Basically
233 the byte offset from BOL (or BOB). */
234 ptrdiff_t col;
235};
236
223/* This data structure describes the actual text contents of a buffer. 237/* This data structure describes the actual text contents of a buffer.
224 It is shared between indirect buffers and their base buffer. */ 238 It is shared between indirect buffers and their base buffer. */
225 239
@@ -700,6 +714,25 @@ struct buffer
700 /* The interval tree containing this buffer's overlays. */ 714 /* The interval tree containing this buffer's overlays. */
701 struct itree_tree *overlays; 715 struct itree_tree *overlays;
702 716
717 /* Right now only tree-sitter makes use of this, so I don't want
718 non-tree-sitter build to pay for it. If something else can make
719 use of this, we can remove the gate. */
720#ifdef HAVE_TREE_SITTER
721 /* Cache of line and column number of a position. Tree-sitter uses
722 this cache to calculate line and column of the beginning and end of
723 buffer edits. Stores three caches for BEGV, point, ZV,
724 respectively. All three are refreshed in buffer edit functions, so
725 they're always up-to-date (in the sense that the bytepos and
726 line/column number are in sync, not in the sense that the bytepos
727 is at the actual position of point/BEGV/ZV, indeed, most of the
728 time the bytepos is only near the actual position). All caches are
729 initialized to empty, meaning no linecol tracking for this
730 buffer. */
731 struct ts_linecol ts_linecol_begv;
732 struct ts_linecol ts_linecol_point;
733 struct ts_linecol ts_linecol_zv;
734#endif
735
703 /* Changes in the buffer are recorded here for undo, and t means 736 /* Changes in the buffer are recorded here for undo, and t means
704 don't record anything. This information belongs to the base 737 don't record anything. This information belongs to the base
705 buffer of an indirect buffer. But we can't store it in the 738 buffer of an indirect buffer. But we can't store it in the
@@ -1134,6 +1167,45 @@ BUFFER_CHECK_INDIRECTION (struct buffer *b)
1134 } 1167 }
1135} 1168}
1136 1169
1170#ifdef HAVE_TREE_SITTER
1171
1172INLINE struct ts_linecol
1173BUF_TS_LINECOL_BEGV (struct buffer *buf)
1174{
1175 return buf->ts_linecol_begv;
1176}
1177INLINE struct ts_linecol
1178BUF_TS_LINECOL_POINT (struct buffer *buf)
1179{
1180 return buf->ts_linecol_point;
1181}
1182
1183INLINE struct ts_linecol
1184BUF_TS_LINECOL_ZV (struct buffer *buf)
1185{
1186 return buf->ts_linecol_zv;
1187}
1188
1189INLINE void
1190SET_BUF_TS_LINECOL_BEGV (struct buffer *buf, struct ts_linecol linecol)
1191{
1192 buf->ts_linecol_begv = linecol;
1193}
1194
1195INLINE void
1196SET_BUF_TS_LINECOL_POINT (struct buffer *buf, struct ts_linecol linecol)
1197{
1198 buf->ts_linecol_point = linecol;
1199}
1200
1201INLINE void
1202SET_BUF_TS_LINECOL_ZV (struct buffer *buf, struct ts_linecol linecol)
1203{
1204 buf->ts_linecol_zv = linecol;
1205}
1206
1207#endif
1208
1137/* This structure holds the default values of the buffer-local variables 1209/* This structure holds the default values of the buffer-local variables
1138 that have special slots in each buffer. 1210 that have special slots in each buffer.
1139 The default value occupies the same slot in this structure 1211 The default value occupies the same slot in this structure
diff --git a/src/casefiddle.c b/src/casefiddle.c
index faeb16fb8f2..93a66b15923 100644
--- a/src/casefiddle.c
+++ b/src/casefiddle.c
@@ -543,6 +543,12 @@ casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
543#ifdef HAVE_TREE_SITTER 543#ifdef HAVE_TREE_SITTER
544 ptrdiff_t start_byte = CHAR_TO_BYTE (start); 544 ptrdiff_t start_byte = CHAR_TO_BYTE (start);
545 ptrdiff_t old_end_byte = CHAR_TO_BYTE (end); 545 ptrdiff_t old_end_byte = CHAR_TO_BYTE (end);
546 struct ts_linecol start_linecol
547 = treesit_linecol_maybe (start, start_byte,
548 BUF_TS_LINECOL_POINT (current_buffer));
549 struct ts_linecol old_end_linecol
550 = treesit_linecol_maybe (end, old_end_byte,
551 BUF_TS_LINECOL_POINT (current_buffer));
546#endif 552#endif
547 553
548 ptrdiff_t orig_end = end; 554 ptrdiff_t orig_end = end;
@@ -565,8 +571,11 @@ casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
565 update_compositions (start, end, CHECK_ALL); 571 update_compositions (start, end, CHECK_ALL);
566 } 572 }
567#ifdef HAVE_TREE_SITTER 573#ifdef HAVE_TREE_SITTER
568 treesit_record_change (start_byte, old_end_byte, 574 ptrdiff_t new_end = orig_end + added;
569 CHAR_TO_BYTE (orig_end + added)); 575 ptrdiff_t new_end_byte = CHAR_TO_BYTE (new_end);
576
577 treesit_record_change (start_byte, old_end_byte, new_end_byte,
578 start_linecol, old_end_linecol, new_end);
570#endif 579#endif
571 580
572 return orig_end + added; 581 return orig_end + added;
diff --git a/src/editfns.c b/src/editfns.c
index e6d8e278c12..714ed422b38 100644
--- a/src/editfns.c
+++ b/src/editfns.c
@@ -2305,6 +2305,19 @@ Both characters must have the same length of multi-byte form. */)
2305 = !NILP (BVAR (current_buffer, enable_multibyte_characters)); 2305 = !NILP (BVAR (current_buffer, enable_multibyte_characters));
2306 int fromc, toc; 2306 int fromc, toc;
2307 2307
2308#ifdef HAVE_TREE_SITTER
2309 ptrdiff_t start_char = fix_position (start);
2310 ptrdiff_t old_end_char = fix_position (end);
2311 ptrdiff_t start_byte = CHAR_TO_BYTE (start_char);
2312 ptrdiff_t old_end_byte = CHAR_TO_BYTE (old_end_char);
2313 struct ts_linecol start_linecol
2314 = treesit_linecol_maybe (start_char, start_byte,
2315 BUF_TS_LINECOL_POINT (current_buffer));
2316 struct ts_linecol old_end_linecol
2317 = treesit_linecol_maybe (old_end_char, old_end_byte,
2318 BUF_TS_LINECOL_POINT (current_buffer));
2319#endif
2320
2308 restart: 2321 restart:
2309 2322
2310 validate_region (&start, &end); 2323 validate_region (&start, &end);
@@ -2405,7 +2418,8 @@ Both characters must have the same length of multi-byte form. */)
2405 if (changed > 0) 2418 if (changed > 0)
2406 { 2419 {
2407#ifdef HAVE_TREE_SITTER 2420#ifdef HAVE_TREE_SITTER
2408 treesit_record_change (changed, last_changed, last_changed); 2421 treesit_record_change (start_byte, old_end_byte, old_end_byte,
2422 start_linecol, old_end_linecol, old_end_char);
2409#endif 2423#endif
2410 signal_after_change (changed, 2424 signal_after_change (changed,
2411 last_changed - changed, last_changed - changed); 2425 last_changed - changed, last_changed - changed);
@@ -2592,6 +2606,15 @@ It returns the number of characters changed. */)
2592 } 2606 }
2593 else 2607 else
2594 { 2608 {
2609#ifdef HAVE_TREE_SITTER
2610 struct ts_linecol linecol_cache
2611 = BUF_TS_LINECOL_POINT (current_buffer);
2612 struct ts_linecol start_linecol
2613 = treesit_linecol_maybe (pos, pos_byte, linecol_cache);
2614 struct ts_linecol old_end_linecol
2615 = treesit_linecol_maybe (pos + 1, pos_byte + len,
2616 start_linecol);
2617#endif
2595 record_change (pos, 1); 2618 record_change (pos, 1);
2596 while (str_len-- > 0) 2619 while (str_len-- > 0)
2597 *p++ = *str++; 2620 *p++ = *str++;
@@ -2604,7 +2627,8 @@ It returns the number of characters changed. */)
2604 modified buffer content manually, so we need to 2627 modified buffer content manually, so we need to
2605 notify tree-sitter manually. */ 2628 notify tree-sitter manually. */
2606 treesit_record_change (pos_byte, pos_byte + len, 2629 treesit_record_change (pos_byte, pos_byte + len,
2607 pos_byte + len); 2630 pos_byte + len, start_linecol,
2631 old_end_linecol, pos + 1);
2608#endif 2632#endif
2609 } 2633 }
2610 characters_changed++; 2634 characters_changed++;
@@ -4555,6 +4579,15 @@ ring. */)
4555 start1_byte = CHAR_TO_BYTE (start1); 4579 start1_byte = CHAR_TO_BYTE (start1);
4556 end2_byte = CHAR_TO_BYTE (end2); 4580 end2_byte = CHAR_TO_BYTE (end2);
4557 4581
4582#ifdef HAVE_TREE_SITTER
4583 struct ts_linecol start_linecol
4584 = treesit_linecol_maybe (start1, start1_byte,
4585 BUF_TS_LINECOL_POINT (current_buffer));
4586 struct ts_linecol old_end_linecol
4587 = treesit_linecol_maybe (end2, end2_byte,
4588 BUF_TS_LINECOL_POINT (current_buffer));
4589#endif
4590
4558 /* Make sure the gap won't interfere, by moving it out of the text 4591 /* Make sure the gap won't interfere, by moving it out of the text
4559 we will operate on. */ 4592 we will operate on. */
4560 if (start1 < gap && gap < end2) 4593 if (start1 < gap && gap < end2)
@@ -4694,10 +4727,8 @@ ring. */)
4694 } 4727 }
4695 4728
4696#ifdef HAVE_TREE_SITTER 4729#ifdef HAVE_TREE_SITTER
4697 /* I don't think it's common to transpose two far-apart regions, so 4730 treesit_record_change (start1_byte, end2_byte, end2_byte,
4698 amalgamating the edit into one should be fine. This is what the 4731 start_linecol, old_end_linecol, end2);
4699 signal_after_change below does, too. */
4700 treesit_record_change (start1_byte, end2_byte, end2_byte);
4701#endif 4732#endif
4702 4733
4703 signal_after_change (start1, end2 - start1, end2 - start1); 4734 signal_after_change (start1, end2 - start1, end2 - start1);
diff --git a/src/insdel.c b/src/insdel.c
index 7e361fbb2ce..24bacdbcaf2 100644
--- a/src/insdel.c
+++ b/src/insdel.c
@@ -898,6 +898,12 @@ insert_1_both (const char *string,
898 if (NILP (BVAR (current_buffer, enable_multibyte_characters))) 898 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
899 nchars = nbytes; 899 nchars = nbytes;
900 900
901#ifdef HAVE_TREE_SITTER
902 struct ts_linecol start_linecol
903 = treesit_linecol_maybe (PT, PT_BYTE,
904 BUF_TS_LINECOL_POINT (current_buffer));
905#endif
906
901 if (prepare) 907 if (prepare)
902 /* Do this before moving and increasing the gap, 908 /* Do this before moving and increasing the gap,
903 because the before-change hooks might move the gap 909 because the before-change hooks might move the gap
@@ -952,7 +958,9 @@ insert_1_both (const char *string,
952#ifdef HAVE_TREE_SITTER 958#ifdef HAVE_TREE_SITTER
953 eassert (nbytes >= 0); 959 eassert (nbytes >= 0);
954 eassert (PT_BYTE >= 0); 960 eassert (PT_BYTE >= 0);
955 treesit_record_change (PT_BYTE, PT_BYTE, PT_BYTE + nbytes); 961
962 treesit_record_change (PT_BYTE, PT_BYTE, PT_BYTE + nbytes,
963 start_linecol, start_linecol, PT + nchars);
956#endif 964#endif
957 965
958 adjust_point (nchars, nbytes); 966 adjust_point (nchars, nbytes);
@@ -1024,6 +1032,12 @@ insert_from_string_1 (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1024 = count_size_as_multibyte (SDATA (string) + pos_byte, 1032 = count_size_as_multibyte (SDATA (string) + pos_byte,
1025 nbytes); 1033 nbytes);
1026 1034
1035#ifdef HAVE_TREE_SITTER
1036 struct ts_linecol start_linecol
1037 = treesit_linecol_maybe (PT, PT_BYTE,
1038 BUF_TS_LINECOL_POINT (current_buffer));
1039#endif
1040
1027 /* Do this before moving and increasing the gap, 1041 /* Do this before moving and increasing the gap,
1028 because the before-change hooks might move the gap 1042 because the before-change hooks might move the gap
1029 or make it smaller. */ 1043 or make it smaller. */
@@ -1088,7 +1102,9 @@ insert_from_string_1 (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1088#ifdef HAVE_TREE_SITTER 1102#ifdef HAVE_TREE_SITTER
1089 eassert (nbytes >= 0); 1103 eassert (nbytes >= 0);
1090 eassert (PT_BYTE >= 0); 1104 eassert (PT_BYTE >= 0);
1091 treesit_record_change (PT_BYTE, PT_BYTE, PT_BYTE + nbytes); 1105
1106 treesit_record_change (PT_BYTE, PT_BYTE, PT_BYTE + nbytes,
1107 start_linecol, start_linecol, PT + nchars);
1092#endif 1108#endif
1093 1109
1094 adjust_point (nchars, outgoing_nbytes); 1110 adjust_point (nchars, outgoing_nbytes);
@@ -1101,7 +1117,8 @@ insert_from_string_1 (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1101 GPT_ADDR (if not text_at_gap_tail). 1117 GPT_ADDR (if not text_at_gap_tail).
1102 Contrary to insert_from_gap, this does not invalidate any cache, 1118 Contrary to insert_from_gap, this does not invalidate any cache,
1103 nor update any markers, nor record any buffer modification information 1119 nor update any markers, nor record any buffer modification information
1104 of any sort, with the single exception of notifying tree-sitter. */ 1120 of any sort, with the single exception of notifying tree-sitter and
1121 updating tree-sitter linecol cache. */
1105void 1122void
1106insert_from_gap_1 (ptrdiff_t nchars, ptrdiff_t nbytes, bool text_at_gap_tail) 1123insert_from_gap_1 (ptrdiff_t nchars, ptrdiff_t nbytes, bool text_at_gap_tail)
1107{ 1124{
@@ -1110,6 +1127,9 @@ insert_from_gap_1 (ptrdiff_t nchars, ptrdiff_t nbytes, bool text_at_gap_tail)
1110 1127
1111#ifdef HAVE_TREE_SITTER 1128#ifdef HAVE_TREE_SITTER
1112 ptrdiff_t ins_bytepos = GPT_BYTE; 1129 ptrdiff_t ins_bytepos = GPT_BYTE;
1130 struct ts_linecol start_linecol
1131 = treesit_linecol_maybe (GPT, GPT_BYTE,
1132 BUF_TS_LINECOL_POINT (current_buffer));
1113#endif 1133#endif
1114 1134
1115 GAP_SIZE -= nbytes; 1135 GAP_SIZE -= nbytes;
@@ -1130,7 +1150,9 @@ insert_from_gap_1 (ptrdiff_t nchars, ptrdiff_t nbytes, bool text_at_gap_tail)
1130#ifdef HAVE_TREE_SITTER 1150#ifdef HAVE_TREE_SITTER
1131 eassert (nbytes >= 0); 1151 eassert (nbytes >= 0);
1132 eassert (ins_bytepos >= 0); 1152 eassert (ins_bytepos >= 0);
1133 treesit_record_change (ins_bytepos, ins_bytepos, ins_bytepos + nbytes); 1153
1154 treesit_record_change (ins_bytepos, ins_bytepos, ins_bytepos + nbytes,
1155 start_linecol, start_linecol, ins_bytepos + nbytes);
1134#endif 1156#endif
1135} 1157}
1136 1158
@@ -1193,6 +1215,9 @@ insert_from_buffer (struct buffer *buf,
1193 1215
1194#ifdef HAVE_TREE_SITTER 1216#ifdef HAVE_TREE_SITTER
1195 ptrdiff_t obyte = PT_BYTE; 1217 ptrdiff_t obyte = PT_BYTE;
1218 struct ts_linecol start_linecol
1219 = treesit_linecol_maybe (opoint, obyte,
1220 BUF_TS_LINECOL_POINT (current_buffer));
1196#endif 1221#endif
1197 1222
1198 insert_from_buffer_1 (buf, charpos, nchars, inherit); 1223 insert_from_buffer_1 (buf, charpos, nchars, inherit);
@@ -1203,7 +1228,9 @@ insert_from_buffer (struct buffer *buf,
1203 eassert (PT_BYTE >= BEG_BYTE); 1228 eassert (PT_BYTE >= BEG_BYTE);
1204 eassert (obyte >= BEG_BYTE); 1229 eassert (obyte >= BEG_BYTE);
1205 eassert (PT_BYTE >= obyte); 1230 eassert (PT_BYTE >= obyte);
1206 treesit_record_change (obyte, obyte, PT_BYTE); 1231
1232 treesit_record_change (obyte, obyte, PT_BYTE,
1233 start_linecol, start_linecol, PT);
1207#endif 1234#endif
1208} 1235}
1209 1236
@@ -1494,6 +1521,16 @@ replace_range (ptrdiff_t from, ptrdiff_t to, Lisp_Object new,
1494 if (nbytes_del <= 0 && inschars == 0) 1521 if (nbytes_del <= 0 && inschars == 0)
1495 return; 1522 return;
1496 1523
1524#ifdef HAVE_TREE_SITTER
1525 struct ts_linecol start_linecol
1526 = treesit_linecol_maybe (from, from_byte,
1527 BUF_TS_LINECOL_POINT (current_buffer));
1528 struct ts_linecol old_end_linecol
1529 = treesit_linecol_maybe (to, to_byte,
1530 BUF_TS_LINECOL_POINT (current_buffer));
1531#endif
1532
1533
1497 ptrdiff_t insbeg_bytes, insend_bytes; 1534 ptrdiff_t insbeg_bytes, insend_bytes;
1498 ptrdiff_t insbytes; 1535 ptrdiff_t insbytes;
1499 unsigned char *insbeg_ptr; 1536 unsigned char *insbeg_ptr;
@@ -1633,7 +1670,9 @@ replace_range (ptrdiff_t from, ptrdiff_t to, Lisp_Object new,
1633 eassert (to_byte >= from_byte); 1670 eassert (to_byte >= from_byte);
1634 eassert (outgoing_insbytes >= 0); 1671 eassert (outgoing_insbytes >= 0);
1635 eassert (from_byte >= 0); 1672 eassert (from_byte >= 0);
1636 treesit_record_change (from_byte, to_byte, from_byte + outgoing_insbytes); 1673
1674 treesit_record_change (from_byte, to_byte, from_byte + outgoing_insbytes,
1675 start_linecol, old_end_linecol, from + inschars);
1637#endif 1676#endif
1638 1677
1639 /* Relocate point as if it were a marker. */ 1678 /* Relocate point as if it were a marker. */
@@ -1960,6 +1999,15 @@ del_range_2 (ptrdiff_t from, ptrdiff_t from_byte,
1960 nchars_del = to - from; 1999 nchars_del = to - from;
1961 nbytes_del = to_byte - from_byte; 2000 nbytes_del = to_byte - from_byte;
1962 2001
2002#ifdef HAVE_TREE_SITTER
2003 struct ts_linecol start_linecol
2004 = treesit_linecol_maybe (from, from_byte,
2005 BUF_TS_LINECOL_POINT (current_buffer));
2006 struct ts_linecol old_end_linecol
2007 = treesit_linecol_maybe (to, to_byte,
2008 BUF_TS_LINECOL_POINT (current_buffer));
2009#endif
2010
1963 /* Make sure the gap is somewhere in or next to what we are deleting. */ 2011 /* Make sure the gap is somewhere in or next to what we are deleting. */
1964 if (from > GPT) 2012 if (from > GPT)
1965 gap_right (from, from_byte); 2013 gap_right (from, from_byte);
@@ -2019,7 +2067,8 @@ del_range_2 (ptrdiff_t from, ptrdiff_t from_byte,
2019#ifdef HAVE_TREE_SITTER 2067#ifdef HAVE_TREE_SITTER
2020 eassert (from_byte <= to_byte); 2068 eassert (from_byte <= to_byte);
2021 eassert (from_byte >= 0); 2069 eassert (from_byte >= 0);
2022 treesit_record_change (from_byte, to_byte, from_byte); 2070 treesit_record_change (from_byte, to_byte, from_byte,
2071 start_linecol, old_end_linecol, from);
2023#endif 2072#endif
2024 2073
2025 return deletion; 2074 return deletion;
diff --git a/src/lisp.h b/src/lisp.h
index c2450440ab9..ff68b5d2736 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -4415,6 +4415,10 @@ extern void update_echo_area (void);
4415extern void truncate_echo_area (ptrdiff_t); 4415extern void truncate_echo_area (ptrdiff_t);
4416extern void redisplay (void); 4416extern void redisplay (void);
4417extern ptrdiff_t count_lines (ptrdiff_t start_byte, ptrdiff_t end_byte); 4417extern ptrdiff_t count_lines (ptrdiff_t start_byte, ptrdiff_t end_byte);
4418extern ptrdiff_t display_count_lines (ptrdiff_t start_byte,
4419 ptrdiff_t limit_byte,
4420 ptrdiff_t count,
4421 ptrdiff_t *byte_pos_ptr);
4418 4422
4419void set_frame_cursor_types (struct frame *, Lisp_Object); 4423void set_frame_cursor_types (struct frame *, Lisp_Object);
4420extern void syms_of_xdisp (void); 4424extern void syms_of_xdisp (void);
diff --git a/src/treesit.c b/src/treesit.c
index b0979397d35..3a19e0cb282 100644
--- a/src/treesit.c
+++ b/src/treesit.c
@@ -307,18 +307,13 @@ init_treesit_functions (void)
307 in Emacs's use cases. 307 in Emacs's use cases.
308 308
309 - Many tree-sitter functions take a TSPoint, which is basically a 309 - Many tree-sitter functions take a TSPoint, which is basically a
310 row and column. Emacs uses a gap buffer and does not keep 310 line and column. Emacs uses a gap buffer and does not keep
311 information about the row and column position of a buffer. 311 information about the line and column positions in a buffer, so
312 According to the author of tree-sitter, those functions only take 312 it's hard for us to pass it to tree-sitter. Instead we just give
313 a TSPoint so that it can be moved alongside the byte position and 313 it dummy values. But there are certain languages that does need
314 returned to the caller afterwards, and the position actually used 314 the line and column positions to work right, like Haskell. So we
315 is the specified byte position. He also said that he _thinks_ 315 added optional line and column tracking. See the linecol section
316 that just passing a byte position will also work. As a result, a 316 below.
317 dummy value is used in place of each TSPoint. Judging by the
318 nature of parsing algorithms, I think it is safe to use only the
319 byte position, and I don't think this will change in the future.
320
321 See: https://github.com/tree-sitter/tree-sitter/issues/445
322 317
323 treesit.h has some commentary on the two main data structure for 318 treesit.h has some commentary on the two main data structure for
324 the parser and node. treesit_sync_visible_region has some 319 the parser and node. treesit_sync_visible_region has some
@@ -350,8 +345,8 @@ init_treesit_functions (void)
350 345
351 Tree-sitter-related code in other files: 346 Tree-sitter-related code in other files:
352 - src/alloc.c for gc for parser and node 347 - src/alloc.c for gc for parser and node
353 - src/casefiddle.c & src/insdel.c for notifying tree-sitter 348 - src/casefiddle.c, src/insdel.c, src/editfns.c for notifying
354 parser of buffer changes. 349 tree-sitter parser of buffer changes.
355 - lisp/emacs-lisp/cl-preloaded.el & data.c & lisp.h for parser and 350 - lisp/emacs-lisp/cl-preloaded.el & data.c & lisp.h for parser and
356 node type. 351 node type.
357 - print.c for printing tree-sitter objects (node, parser, query). 352 - print.c for printing tree-sitter objects (node, parser, query).
@@ -406,7 +401,66 @@ init_treesit_functions (void)
406 from the user's POV, each buffer, regardless of indirect or not, 401 from the user's POV, each buffer, regardless of indirect or not,
407 appears to have their own parser list. A discussion can be found in 402 appears to have their own parser list. A discussion can be found in
408 bug#59693. Note that that discussion led to an earlier design, which 403 bug#59693. Note that that discussion led to an earlier design, which
409 is different from the current one. */ 404 is different from the current one.
405
406 Line and column reporting to tree-sitter: technically we had to send
407 tree-sitter the line and column position of each edit. But in
408 practice we just send it dummy values, because tree-sitter doesn't
409 use it for parsing and mostly just carries the line and column
410 positions around and return it when e.g. reporting node positions[1].
411 This has been working fine until we encountered grammars that
412 actually utilizes the line and column information for parsing
413 (Haskell)[2].
414
415 [1] https://github.com/tree-sitter/tree-sitter/issues/445
416 [2] https://github.com/tree-sitter/tree-sitter/issues/4001
417
418 So now we have to keep track of line and column positions and pass
419 valid values to tree-sitter. (It adds quite some complexity, but
420 only linearly; one can ignore all the linecol stuff when trying to
421 understand treesit code and then come back to it later.) Eli
422 convinced me to disable tracking by default, and only enable it for
423 languages that needs it. So the buffer starts out not tracking
424 linecol. And when a parser is created, if the language is in
425 treesit-languages-require-line-column-tracking, we enable tracking in
426 the buffer, and enable tracking for the parser. To simplify things,
427 once a buffer starts tracking linecol, it never disables tracking,
428 even if parsers that need tracking are all deleted; and for parsers,
429 tracking is determined at creation time, if it starts out
430 tracking/non-tracking, it stays that way, regardless of later changes
431 to treesit-languages-require-line-column-tracking.
432
433 To make calculating line/column positons fast, we store linecol
434 caches for begv, point, and zv in the buffer
435 (buf->ts_linecol_cache_xxx); and in the parser object, we store
436 linecol cache for visible beg/end of that parser.
437
438 In buffer editing functions, we need the linecol for
439 start/old_end/new_end, those can be calculated by scanning newlines
440 (treesit_linecol_of_pos) from the buffer point cache, which should be
441 always near the point. And we usually set the calculated linecol of
442 new_end back to the buffer point cache.
443
444 We also need to calculate linecol for the visible_beg/end for each
445 parser, and linecol for the buffer's begv/zv, these positions are
446 usually far from point, so we have caches for all of them (in either
447 the parser object or the buffer). These positions are far from
448 point, so it's inefficient to scan newlines from point to there to
449 get up-to-date linecol for them; but in the same time, because
450 they're far and outside the changed region, we can calculate their
451 change in line and column number by simply counting how much newlines
452 are added/removed in the changed region
453 (compute_new_linecol_by_change). */
454
455
456/*** Constants */
457
458/* A linecol_cache that points to BOB, this is always valid. */
459const struct ts_linecol TREESIT_BOB_LINECOL = { 1, 1, 0 };
460/* An uninitialized linecol. */
461const struct ts_linecol TREESIT_EMPTY_LINECOL = { 0, 0, 0 };
462const TSPoint TREESIT_TS_POINT_1_0 = { 1, 0 };
463
410 464
411 465
412/*** Initialization */ 466/*** Initialization */
@@ -864,6 +918,241 @@ loaded or the file name couldn't be determined, return nil. */)
864} 918}
865 919
866 920
921/*** Linecol functions */
922
923#define TREESIT_DEBUG_LINECOL false
924
925void treesit_debug_print_linecol (struct ts_linecol);
926
927void
928treesit_debug_print_linecol (struct ts_linecol linecol)
929{
930 printf ("{ line=%ld col=%ld bytepos=%ld }\n", linecol.line, linecol.col, linecol.bytepos);
931}
932
933/* Returns true if BUF tracks linecol. */
934bool treesit_buf_tracks_linecol_p (struct buffer *buf)
935{
936 return BUF_TS_LINECOL_BEGV (buf).bytepos != 0;
937}
938
939static void
940restore_restriction_and_selective_display (Lisp_Object record)
941{
942 save_restriction_restore (Fcar (record));
943 BVAR (current_buffer, selective_display) = Fcdr (record);
944 return;
945}
946
947/* Similar to display_count_lines, but behaves differently when
948 searching backwards: when found a newline, stop at the newline,
949 return count as normal (display_count_lines stops after the newline
950 and subtracts one from count). When searching forward, stop at the
951 position after the newline. Another difference is this function
952 disregards narrowing, so it works on bytepos outside of the visible
953 range. */
954static ptrdiff_t
955treesit_count_lines (ptrdiff_t start_byte,
956 ptrdiff_t limit_byte, ptrdiff_t count,
957 ptrdiff_t *byte_pos_ptr)
958{
959 /* I don't think display_count_lines signals, so the unwind-protect
960 technically isn't necessary. Also treesit_count_lines aren't
961 suppose to signal either since it's used in functions that aren't
962 supposed to signal (treesit_record_change and friends). */
963 Lisp_Object record = Fcons (save_restriction_save (),
964 BVAR (current_buffer, selective_display));
965
966
967 specpdl_ref pdl_count = SPECPDL_INDEX ();
968 record_unwind_protect (restore_restriction_and_selective_display, record);
969
970 BVAR (current_buffer, selective_display) = Qnil;
971 labeled_restrictions_remove_in_current_buffer ();
972 Fwiden ();
973 ptrdiff_t counted = display_count_lines (start_byte, limit_byte,
974 count, byte_pos_ptr);
975
976 unbind_to (pdl_count, Qnil);
977
978 /* If searching backwards and we found COUNT newlines, countermand the
979 different logic in display_count_lines. */
980 if (count < 0 && limit_byte != *byte_pos_ptr)
981 {
982 counted += 1;
983 *byte_pos_ptr -= 1;
984 }
985
986 return counted;
987}
988
989static void
990treesit_debug_validate_linecol (struct ts_linecol linecol)
991{
992 eassert (linecol.bytepos <= Z_BYTE);
993
994 /* We can't use count_lines as ground truth because it respects
995 narrowing, and calling it with a bytepos outside of the visible
996 portion results in infloop. */
997 ptrdiff_t _unused;
998 ptrdiff_t true_line_count = treesit_count_lines (BEG_BYTE, linecol.bytepos,
999 Z_BYTE, &_unused) + 1;
1000 eassert (true_line_count == linecol.line);
1001}
1002
1003/* Calculate and return the line and column number of BYTE_POS by
1004 scanning newlines from CACHE. CACHE must be valid. */
1005static struct ts_linecol
1006treesit_linecol_of_pos (ptrdiff_t target_bytepos,
1007 struct ts_linecol cache)
1008{
1009 if (TREESIT_DEBUG_LINECOL)
1010 {
1011 treesit_debug_validate_linecol (cache);
1012 }
1013
1014 /* When we finished searching for newlines between CACHE and
1015 TARGET_POS, BYTE_POS_2 is at TARGET_POS, and BYTE_POS_1 is at the
1016 previous newline. If TARGET_POS happends to be on a newline,
1017 BYTE_POS_1 will be on that position. BYTE_POS_1 is used for
1018 calculating the column. (If CACHE and TARGET_POS are in the same
1019 line, BYTE_POS_1 is unset and we don't use it.) */
1020 ptrdiff_t byte_pos_1 = 0;
1021 ptrdiff_t byte_pos_2 = 0;
1022 /* Number of lines between CACHE and TARGET_POS. */
1023 ptrdiff_t line_delta = 0;
1024
1025 if (target_bytepos == cache.bytepos)
1026 return cache;
1027
1028 /* Search forward. */
1029 if (cache.bytepos < target_bytepos)
1030 {
1031 byte_pos_2 = cache.bytepos;
1032 while (byte_pos_2 < target_bytepos)
1033 {
1034 ptrdiff_t counted = treesit_count_lines (byte_pos_2, target_bytepos,
1035 1, &byte_pos_2);
1036
1037 if (counted > 0)
1038 {
1039 byte_pos_1 = byte_pos_2;
1040 }
1041 line_delta += counted;
1042 }
1043 eassert (byte_pos_2 == target_bytepos);
1044 /* At this point, byte_pos_2 is at target_pos, and byte_pos_1 is
1045 at the previous newline if we went across any. */
1046
1047 struct ts_linecol target_linecol;
1048 target_linecol.bytepos = target_bytepos;
1049 target_linecol.line = cache.line + line_delta;
1050 /* If we moved across any newline, use the previous newline to
1051 calculate the column; if we stayed at the same line, use the
1052 cached column to calculate the new column. */
1053 target_linecol.col = line_delta > 0
1054 ? target_bytepos - byte_pos_1
1055 : target_bytepos - cache.bytepos + cache.col;
1056
1057 if (TREESIT_DEBUG_LINECOL)
1058 {
1059 treesit_debug_validate_linecol (target_linecol);
1060 }
1061
1062 return target_linecol;
1063 }
1064
1065 /* Search backward. */
1066 byte_pos_2 = cache.bytepos;
1067 while (byte_pos_2 > target_bytepos)
1068 {
1069 ptrdiff_t counted = treesit_count_lines (byte_pos_2, target_bytepos,
1070 -1, &byte_pos_2);
1071 line_delta -= counted;
1072 }
1073 eassert (byte_pos_2 == target_bytepos);
1074 /* At this point, pos_2 is at target_pos. */
1075
1076 struct ts_linecol target_linecol;
1077 target_linecol.bytepos = target_bytepos;
1078 target_linecol.line = cache.line + line_delta;
1079 eassert (cache.line + line_delta > 0);
1080
1081 /* Calculate the column. */
1082 if (line_delta == 0)
1083 {
1084 target_linecol.col = cache.col - (cache.bytepos - target_bytepos);
1085 }
1086 else
1087 {
1088 /* We need to find the previous newline in order to calculate the
1089 column. */
1090 ptrdiff_t counted = treesit_count_lines (byte_pos_2, BEG_BYTE, -1, &byte_pos_2);
1091 target_linecol.col
1092 = target_bytepos - (byte_pos_2 + counted == 1 ? 1 : 0);
1093 }
1094
1095 if (TREESIT_DEBUG_LINECOL)
1096 {
1097 treesit_debug_validate_linecol (target_linecol);
1098 }
1099
1100 return target_linecol;
1101}
1102
1103/* Return a TSPoint given POS and VISIBLE_BEG. VISIBLE_BEG must be
1104 before POS. */
1105static TSPoint
1106treesit_make_ts_point (struct ts_linecol visible_beg,
1107 struct ts_linecol pos)
1108{
1109 TSPoint point;
1110 if (visible_beg.line == pos.line)
1111 {
1112 point.row = 0;
1113 point.column = pos.col - visible_beg.col;
1114 eassert (point.column >= 0);
1115 }
1116 else
1117 {
1118 point.row = pos.line - visible_beg.line;
1119 eassert (point.row > 0);
1120 point.column = pos.col;
1121 }
1122 return point;
1123}
1124
1125DEFUN ("treesit-tracking-line-column-p",
1126 Ftreesit_tracking_line_column_p,
1127 Streesit_tracking_line_column_p, 0, 1, 0,
1128 doc : /* Return non-nil if BUFFER is tracking line and column.
1129
1130Return nil otherwise. BUFFER defaults to the current buffer. */)
1131 (Lisp_Object buffer)
1132{
1133 struct buffer *buf = current_buffer;
1134 if (!NILP (buffer))
1135 {
1136 CHECK_BUFFER (buffer);
1137 buf = XBUFFER (buffer);
1138 }
1139
1140 return treesit_buf_tracks_linecol_p (buf) ? Qt : Qnil;
1141}
1142
1143DEFUN ("treesit-parser-tracking-line-column-p",
1144 Ftreesit_parser_tracking_line_column_p,
1145 Streesit_parser_tracking_line_column_p, 1, 1, 0,
1146 doc : /* Return non-nil if PARSER is tracking line and column.
1147
1148Return nil otherwise.*/)
1149 (Lisp_Object parser)
1150{
1151 CHECK_TS_PARSER (parser);
1152 return XTS_PARSER (parser)->visi_beg_linecol.bytepos == 0 ? Qnil : Qt;
1153}
1154
1155
867/*** Parsing functions */ 1156/*** Parsing functions */
868 1157
869static void 1158static void
@@ -879,34 +1168,147 @@ treesit_check_parser (Lisp_Object obj)
879 larger than UINT32_MAX. */ 1168 larger than UINT32_MAX. */
880static inline void 1169static inline void
881treesit_tree_edit_1 (TSTree *tree, ptrdiff_t start_byte, 1170treesit_tree_edit_1 (TSTree *tree, ptrdiff_t start_byte,
882 ptrdiff_t old_end_byte, ptrdiff_t new_end_byte) 1171 ptrdiff_t old_end_byte, ptrdiff_t new_end_byte,
1172 TSPoint start_point, TSPoint old_end_point,
1173 TSPoint new_end_point)
883{ 1174{
884 eassert (start_byte >= 0); 1175 eassert (start_byte >= 0);
885 eassert (start_byte <= old_end_byte); 1176 eassert (start_byte <= old_end_byte);
886 eassert (start_byte <= new_end_byte); 1177 eassert (start_byte <= new_end_byte);
887 TSPoint dummy_point = {0, 0};
888 eassert (start_byte <= UINT32_MAX); 1178 eassert (start_byte <= UINT32_MAX);
889 eassert (old_end_byte <= UINT32_MAX); 1179 eassert (old_end_byte <= UINT32_MAX);
890 eassert (new_end_byte <= UINT32_MAX); 1180 eassert (new_end_byte <= UINT32_MAX);
891 TSInputEdit edit = {(uint32_t) start_byte, 1181 TSInputEdit edit = {(uint32_t) start_byte,
892 (uint32_t) old_end_byte, 1182 (uint32_t) old_end_byte,
893 (uint32_t) new_end_byte, 1183 (uint32_t) new_end_byte,
894 dummy_point, dummy_point, dummy_point}; 1184 start_point, old_end_point, new_end_point};
895 ts_tree_edit (tree, &edit); 1185 ts_tree_edit (tree, &edit);
896} 1186}
897 1187
898/* Update each parser's tree after the user made an edit. This 1188/* Given a position at POS_LINECOL, and the linecol of a buffer change
899 function does not parse the buffer and only updates the tree, so it 1189 (START_LINECOL, OLD_END_LINECOL, and NEW_END_LINCOL), compute the new
900 should be very fast. */ 1190 linecol for that position, then scan from this now valid linecol to
901void 1191 TARGET_BYTEPOS and return the linecol at TARGET_BYTEPOS.
902treesit_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte, 1192
903 ptrdiff_t new_end_byte) 1193 When POS_LINECOL is outside of the range between START_LINECOL and
1194 OLD_END_LINECOL, we can calculate the change in line and column
1195 number of POS_LINECOL by simply counting how many newlines are
1196 removed/added in the change. Once we have the up-to-date line and
1197 column number at POS_LINECOL.bytepos, we can just scan to
1198 TARGET_BYTEPOS to get a linecol for it. The assumption is that
1199 TARGET_BYTEPOS is far from START_LINECOL, etc, but close to
1200 POS_LINECOL. So we avoids scanning longs distance from
1201 START_LINECOL, etc.
1202
1203 However, this optimization only works when POS_LINECOL is outside the
1204 range between START_LINECOL and OLD_END_LINECOL. If not, we've have
1205 to scan from START_LINECOL or NEW_END_LINECOL to TARGET_BYTEPOS. */
1206static struct ts_linecol
1207compute_new_linecol_by_change (struct ts_linecol pos_linecol,
1208 struct ts_linecol start_linecol,
1209 struct ts_linecol old_end_linecol,
1210 struct ts_linecol new_end_linecol,
1211 ptrdiff_t target_bytepos)
1212{
1213 struct ts_linecol new_linecol = { 0, 0, 0 };
1214
1215 /* 1. Even start is behind pos, pos isn't affected. */
1216 if (start_linecol.bytepos >= pos_linecol.bytepos)
1217 {
1218 new_linecol = pos_linecol;
1219 }
1220 /* 2. When old_end (oe) is before pos, the differnce between pos and
1221 pos' is the difference between old_end and new_end (ne).
1222
1223 | | | | | |
1224 s oe pos s oe pos
1225 OR
1226 | | | | |
1227 s ne pos' s ne pos'
1228
1229 */
1230 else if (old_end_linecol.bytepos <= pos_linecol.bytepos)
1231 {
1232 ptrdiff_t line_delta = new_end_linecol.line - old_end_linecol.line;
1233 new_linecol.line = pos_linecol.line + line_delta;
1234 new_linecol.bytepos
1235 = pos_linecol.bytepos + new_end_linecol.bytepos - old_end_linecol.bytepos;
1236
1237 /* Suppose # is text, | is cursor:
1238
1239 ################
1240 ########|########|
1241 oe pos
1242
1243 Now, if we insert something:
1244
1245 ################
1246 ########|OOOOO
1247 OOOOOOOOOO|########|
1248 ne pos'
1249
1250 Clearly, col for pos' is just the col of new_end plus the
1251 distance between old_end and pos. The same goes for deletion.
1252 */
1253 if (old_end_linecol.line == pos_linecol.line)
1254 {
1255 eassert (old_end_linecol.col <= pos_linecol.col);
1256 ptrdiff_t old_end_to_pos = pos_linecol.col - old_end_linecol.col;
1257 new_linecol.col = new_end_linecol.col + old_end_to_pos;
1258 }
1259 else
1260 {
1261 new_linecol.col = pos_linecol.col;
1262 }
1263 }
1264 /* 3. At this point, start < pos < old_end. We're kinda cooked, there
1265 aren't much we can do other than scan the buffer from new_end or
1266 start. */
1267 else if (target_bytepos - start_linecol.bytepos
1268 < eabs (target_bytepos - new_end_linecol.bytepos))
1269 {
1270 new_linecol = treesit_linecol_of_pos (target_bytepos, start_linecol);
1271 }
1272 else
1273 {
1274 new_linecol = treesit_linecol_of_pos (target_bytepos, new_end_linecol);
1275 }
1276
1277 /* Now new_linecol is a valid linecol, scan from it to target_bytepos. */
1278 if (new_linecol.bytepos != target_bytepos)
1279 {
1280 new_linecol = treesit_linecol_of_pos (target_bytepos, new_linecol);
1281 }
1282
1283 if (TREESIT_DEBUG_LINECOL)
1284 treesit_debug_validate_linecol (new_linecol);
1285
1286 return new_linecol;
1287}
1288
1289/* Update each parser's tree after the user made an edit. This function
1290 does not parse the buffer and only updates the tree, so it should be
1291 very fast. If the caller knows there's no parser in the current
1292 buffer, they can pass empty linecol for
1293 START/OLD_END/NEW_END_linecol.
1294
1295 If the current buffer doesn't track linecol, start_linecol,
1296 old_end_linecol, and new_end_linecol will be empty. In that case,
1297 don't process linecols. */
1298static void
1299treesit_record_change_1 (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
1300 ptrdiff_t new_end_byte,
1301 struct ts_linecol start_linecol,
1302 struct ts_linecol old_end_linecol,
1303 struct ts_linecol new_end_linecol)
904{ 1304{
905 struct buffer *base_buffer = current_buffer; 1305 struct buffer *base_buffer = current_buffer;
906 if (current_buffer->base_buffer) 1306 if (current_buffer->base_buffer)
907 base_buffer = current_buffer->base_buffer; 1307 base_buffer = current_buffer->base_buffer;
908 Lisp_Object parser_list = BVAR (base_buffer, ts_parser_list); 1308 Lisp_Object parser_list = BVAR (base_buffer, ts_parser_list);
909 1309
1310 bool buf_tracks_linecol = start_linecol.bytepos != 0;
1311
910 FOR_EACH_TAIL_SAFE (parser_list) 1312 FOR_EACH_TAIL_SAFE (parser_list)
911 { 1313 {
912 CHECK_CONS (parser_list); 1314 CHECK_CONS (parser_list);
@@ -916,16 +1318,22 @@ treesit_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
916 /* See comment (ref:visible-beg-null) if you wonder why we don't 1318 /* See comment (ref:visible-beg-null) if you wonder why we don't
917 update visible_beg/end when tree is NULL. */ 1319 update visible_beg/end when tree is NULL. */
918 1320
1321 bool parser_tracks_linecol
1322 = XTS_PARSER (lisp_parser)->visi_beg_linecol.bytepos != 0;
1323
919 if (tree != NULL) 1324 if (tree != NULL)
920 { 1325 {
921 eassert (start_byte <= old_end_byte); 1326 eassert (start_byte <= old_end_byte);
922 eassert (start_byte <= new_end_byte); 1327 eassert (start_byte <= new_end_byte);
923 /* Think the recorded change as a delete followed by an 1328 /* Before sending the edit to tree-sitter, we need to first
924 insert, and think of them as moving unchanged text back 1329 clip the beg/end to visible_beg and visible_end of the
925 and forth. After all, the whole point of updating the 1330 parser. A tip for understanding the code below: think the
926 tree is to update the position of unchanged text. */ 1331 recorded change as a delete followed by an insert, and
927 ptrdiff_t visible_beg = XTS_PARSER (lisp_parser)->visible_beg; 1332 think of them as moving unchanged text back and forth.
928 ptrdiff_t visible_end = XTS_PARSER (lisp_parser)->visible_end; 1333 After all, the whole point of updating the tree is to
1334 update the position of unchanged text. */
1335 const ptrdiff_t visible_beg = XTS_PARSER (lisp_parser)->visible_beg;
1336 const ptrdiff_t visible_end = XTS_PARSER (lisp_parser)->visible_end;
929 eassert (visible_beg >= 0); 1337 eassert (visible_beg >= 0);
930 eassert (visible_beg <= visible_end); 1338 eassert (visible_beg <= visible_end);
931 1339
@@ -949,10 +1357,6 @@ treesit_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
949 eassert (start_offset <= old_end_offset); 1357 eassert (start_offset <= old_end_offset);
950 eassert (start_offset <= new_end_offset); 1358 eassert (start_offset <= new_end_offset);
951 1359
952 treesit_tree_edit_1 (tree, start_offset, old_end_offset,
953 new_end_offset);
954 XTS_PARSER (lisp_parser)->need_reparse = true;
955
956 /* VISIBLE_BEG/END records tree-sitter's range of view in 1360 /* VISIBLE_BEG/END records tree-sitter's range of view in
957 the buffer. We need to adjust them when tree-sitter's 1361 the buffer. We need to adjust them when tree-sitter's
958 view changes. */ 1362 view changes. */
@@ -966,19 +1370,133 @@ treesit_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
966 visi_beg_delta = (old_end_byte < visible_beg 1370 visi_beg_delta = (old_end_byte < visible_beg
967 ? new_end_byte - old_end_byte : 0); 1371 ? new_end_byte - old_end_byte : 0);
968 1372
969 XTS_PARSER (lisp_parser)->visible_beg = visible_beg + visi_beg_delta; 1373 const ptrdiff_t new_visible_beg = visible_beg + visi_beg_delta;
970 XTS_PARSER (lisp_parser)->visible_end = (visible_end 1374 const ptrdiff_t new_visible_end
971 + visi_beg_delta 1375 = (visible_end + visi_beg_delta
972 + (new_end_offset 1376 + (new_end_offset - old_end_offset));
973 - old_end_offset)); 1377
1378 XTS_PARSER (lisp_parser)->visible_beg = new_visible_beg;
1379 XTS_PARSER (lisp_parser)->visible_end = new_visible_end;
1380
1381 eassert (BEG_BYTE <= new_visible_beg);
1382 eassert (new_visible_beg <= new_visible_end);
1383 eassert (new_visible_end <= Z_BYTE);
1384
1385 /* (Optionally) calculate the point for start/old_end/new_end
1386 to be sent to tree-sitter. Also update parser cache for
1387 linecol. */
1388 TSPoint start_point = TREESIT_TS_POINT_1_0;
1389 TSPoint old_end_point = TREESIT_TS_POINT_1_0;
1390 TSPoint new_end_point = TREESIT_TS_POINT_1_0;
1391 if (parser_tracks_linecol)
1392 {
1393 eassert (buf_tracks_linecol);
1394 struct ts_linecol old_visi_beg_linecol
1395 = XTS_PARSER (lisp_parser)->visi_beg_linecol;
1396 struct ts_linecol old_visi_end_linecol
1397 = XTS_PARSER (lisp_parser)->visi_end_linecol;
1398
1399 const struct ts_linecol new_visi_beg_linecol
1400 = compute_new_linecol_by_change (old_visi_beg_linecol,
1401 start_linecol,
1402 old_end_linecol,
1403 new_end_linecol,
1404 new_visible_beg);
1405 const struct ts_linecol new_visi_end_linecol
1406 = compute_new_linecol_by_change (old_visi_end_linecol,
1407 start_linecol,
1408 old_end_linecol,
1409 new_end_linecol,
1410 new_visible_end);
1411 XTS_PARSER (lisp_parser)->visi_beg_linecol
1412 = new_visi_beg_linecol;
1413 XTS_PARSER (lisp_parser)->visi_end_linecol
1414 = new_visi_end_linecol;
1415
1416 /* Now, calculate TSPoints and finally update the tree. */
1417 struct ts_linecol new_begv_linecol
1418 = XTS_PARSER (lisp_parser)->visi_beg_linecol;
1419 old_end_point = treesit_make_ts_point (old_visi_beg_linecol,
1420 old_end_linecol);
1421 start_point = treesit_make_ts_point (new_begv_linecol,
1422 start_linecol);
1423 new_end_point = treesit_make_ts_point (new_begv_linecol,
1424 new_end_linecol);
1425 }
974 1426
975 eassert (XTS_PARSER (lisp_parser)->visible_beg >= 0); 1427 treesit_tree_edit_1 (tree, start_offset, old_end_offset,
976 eassert (XTS_PARSER (lisp_parser)->visible_beg 1428 new_end_offset, start_point, old_end_point,
977 <= XTS_PARSER (lisp_parser)->visible_end); 1429 new_end_point);
1430 XTS_PARSER (lisp_parser)->need_reparse = true;
978 } 1431 }
979 } 1432 }
980} 1433}
981 1434
1435/* Return the linecol of POS, calculated from CACHE. But if there's no
1436 parser in the current buffer, or line-column tracking is disabled,
1437 skip calculation and return an empty linecol instead. */
1438struct ts_linecol
1439treesit_linecol_maybe (ptrdiff_t pos, ptrdiff_t pos_byte,
1440 struct ts_linecol cache)
1441{
1442 if (NILP (BVAR (current_buffer, ts_parser_list))
1443 || !treesit_buf_tracks_linecol_p (current_buffer))
1444 return TREESIT_EMPTY_LINECOL;
1445
1446 return treesit_linecol_of_pos (pos_byte, cache);
1447}
1448
1449/* Update each parser's tree after the user made an edit. This function
1450 does not parse the buffer and only updates the tree, so it should be
1451 very fast.
1452
1453 This is a wrapper over treesit_record_change that does a bit more
1454 boilerplate work: it (optionally) calculates linecol for new_end,
1455 pass all the positions into treesit_record_change_1 which does the
1456 real work, and finally (optionally) sets buffer's linecol cache to
1457 new_end's linecol.
1458
1459 If NEW_END is next to NEW_END_BYTE in the arglist, caller might
1460 accidentally swap them, so I placed NEW_END at the end of the
1461 arglist.
1462
1463 If the current buffer doesn't track linecol, start_linecol and
1464 old_end_linecol will be empty. In that case, don't process
1465 linecols. */
1466void
1467treesit_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
1468 ptrdiff_t new_end_byte,
1469 struct ts_linecol start_linecol,
1470 struct ts_linecol old_end_linecol,
1471 ptrdiff_t new_end)
1472{
1473 struct ts_linecol new_end_linecol
1474 = treesit_linecol_maybe (new_end, new_end_byte, start_linecol);
1475
1476 treesit_record_change_1 (start_byte, old_end_byte, new_end_byte,
1477 start_linecol, old_end_linecol, new_end_linecol);
1478
1479 if (new_end_linecol.bytepos != 0)
1480 {
1481 const struct ts_linecol new_begv_linecol
1482 = compute_new_linecol_by_change (BUF_TS_LINECOL_BEGV (current_buffer),
1483 start_linecol,
1484 old_end_linecol,
1485 new_end_linecol,
1486 BEGV_BYTE);
1487 const struct ts_linecol new_zv_linecol
1488 = compute_new_linecol_by_change (BUF_TS_LINECOL_ZV (current_buffer),
1489 start_linecol,
1490 old_end_linecol,
1491 new_end_linecol,
1492 ZV_BYTE);
1493
1494 SET_BUF_TS_LINECOL_BEGV (current_buffer, new_begv_linecol);
1495 SET_BUF_TS_LINECOL_POINT (current_buffer, new_end_linecol);
1496 SET_BUF_TS_LINECOL_ZV (current_buffer, new_zv_linecol);
1497 }
1498}
1499
982static TSRange *treesit_make_ts_ranges (Lisp_Object, Lisp_Object, 1500static TSRange *treesit_make_ts_ranges (Lisp_Object, Lisp_Object,
983 uint32_t *); 1501 uint32_t *);
984 1502
@@ -1034,6 +1552,7 @@ treesit_sync_visible_region (Lisp_Object parser)
1034{ 1552{
1035 TSTree *tree = XTS_PARSER (parser)->tree; 1553 TSTree *tree = XTS_PARSER (parser)->tree;
1036 struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); 1554 struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
1555 const bool track_linecol = treesit_buf_tracks_linecol_p (buffer);
1037 1556
1038 /* If we are setting visible_beg/end for the first time, we can skip 1557 /* If we are setting visible_beg/end for the first time, we can skip
1039 the offset acrobatics and updating the tree below. */ 1558 the offset acrobatics and updating the tree below. */
@@ -1046,6 +1565,7 @@ treesit_sync_visible_region (Lisp_Object parser)
1046 1565
1047 ptrdiff_t visible_beg = XTS_PARSER (parser)->visible_beg; 1566 ptrdiff_t visible_beg = XTS_PARSER (parser)->visible_beg;
1048 ptrdiff_t visible_end = XTS_PARSER (parser)->visible_end; 1567 ptrdiff_t visible_end = XTS_PARSER (parser)->visible_end;
1568
1049 eassert (0 <= visible_beg); 1569 eassert (0 <= visible_beg);
1050 eassert (visible_beg <= visible_end); 1570 eassert (visible_beg <= visible_end);
1051 1571
@@ -1066,39 +1586,81 @@ treesit_sync_visible_region (Lisp_Object parser)
1066 from ________|xxxx|__ 1586 from ________|xxxx|__
1067 to |xxxx|__________ */ 1587 to |xxxx|__________ */
1068 1588
1589 struct ts_linecol visi_beg_linecol = track_linecol
1590 ? XTS_PARSER (parser)->visi_beg_linecol : TREESIT_EMPTY_LINECOL;
1591 struct ts_linecol visi_end_linecol = track_linecol
1592 ? XTS_PARSER (parser)->visi_end_linecol : TREESIT_EMPTY_LINECOL;
1593
1594 struct ts_linecol buffer_begv_linecol = track_linecol
1595 ? treesit_linecol_of_pos (BUF_BEGV_BYTE (buffer), BUF_TS_LINECOL_BEGV (buffer))
1596 : TREESIT_EMPTY_LINECOL;
1597 struct ts_linecol buffer_zv_linecol = track_linecol
1598 ? treesit_linecol_of_pos (BUF_ZV_BYTE (buffer), BUF_TS_LINECOL_ZV (buffer))
1599 : TREESIT_EMPTY_LINECOL;
1600
1601 if (track_linecol) eassert (visi_beg_linecol.bytepos == visible_beg);
1602
1069 /* 1. Make sure visible_beg <= BUF_BEGV_BYTE. */ 1603 /* 1. Make sure visible_beg <= BUF_BEGV_BYTE. */
1070 if (visible_beg > BUF_BEGV_BYTE (buffer)) 1604 if (visible_beg > BUF_BEGV_BYTE (buffer))
1071 { 1605 {
1606 TSPoint point_new_end = track_linecol
1607 ? treesit_make_ts_point (buffer_begv_linecol, visi_beg_linecol)
1608 : TREESIT_TS_POINT_1_0;
1072 /* Tree-sitter sees: insert at the beginning. */ 1609 /* Tree-sitter sees: insert at the beginning. */
1073 treesit_tree_edit_1 (tree, 0, 0, visible_beg - BUF_BEGV_BYTE (buffer)); 1610 treesit_tree_edit_1 (tree, 0, 0, visible_beg - BUF_BEGV_BYTE (buffer),
1611 TREESIT_TS_POINT_1_0, TREESIT_TS_POINT_1_0,
1612 point_new_end);
1074 visible_beg = BUF_BEGV_BYTE (buffer); 1613 visible_beg = BUF_BEGV_BYTE (buffer);
1614 visi_beg_linecol = buffer_begv_linecol;
1075 eassert (visible_beg <= visible_end); 1615 eassert (visible_beg <= visible_end);
1076 } 1616 }
1077 /* 2. Make sure visible_end = BUF_ZV_BYTE. */ 1617 /* 2. Make sure visible_end = BUF_ZV_BYTE. */
1078 if (visible_end < BUF_ZV_BYTE (buffer)) 1618 if (visible_end < BUF_ZV_BYTE (buffer))
1079 { 1619 {
1620 TSPoint point_start = track_linecol
1621 ? treesit_make_ts_point (visi_beg_linecol, visi_end_linecol)
1622 : TREESIT_TS_POINT_1_0;
1623 TSPoint point_new_end = track_linecol
1624 ? treesit_make_ts_point (visi_beg_linecol, buffer_zv_linecol)
1625 : TREESIT_TS_POINT_1_0;
1080 /* Tree-sitter sees: insert at the end. */ 1626 /* Tree-sitter sees: insert at the end. */
1081 treesit_tree_edit_1 (tree, visible_end - visible_beg, 1627 treesit_tree_edit_1 (tree, visible_end - visible_beg,
1082 visible_end - visible_beg, 1628 visible_end - visible_beg,
1083 BUF_ZV_BYTE (buffer) - visible_beg); 1629 BUF_ZV_BYTE (buffer) - visible_beg,
1630 point_start, point_start, point_new_end);
1084 visible_end = BUF_ZV_BYTE (buffer); 1631 visible_end = BUF_ZV_BYTE (buffer);
1632 visi_end_linecol = buffer_zv_linecol;
1085 eassert (visible_beg <= visible_end); 1633 eassert (visible_beg <= visible_end);
1086 } 1634 }
1087 else if (visible_end > BUF_ZV_BYTE (buffer)) 1635 else if (visible_end > BUF_ZV_BYTE (buffer))
1088 { 1636 {
1637 TSPoint point_start = track_linecol
1638 ? treesit_make_ts_point (visi_beg_linecol, buffer_zv_linecol)
1639 : TREESIT_TS_POINT_1_0;
1640 TSPoint point_old_end = track_linecol
1641 ? treesit_make_ts_point (visi_beg_linecol, visi_end_linecol)
1642 : TREESIT_TS_POINT_1_0;
1089 /* Tree-sitter sees: delete at the end. */ 1643 /* Tree-sitter sees: delete at the end. */
1090 treesit_tree_edit_1 (tree, BUF_ZV_BYTE (buffer) - visible_beg, 1644 treesit_tree_edit_1 (tree, BUF_ZV_BYTE (buffer) - visible_beg,
1091 visible_end - visible_beg, 1645 visible_end - visible_beg,
1092 BUF_ZV_BYTE (buffer) - visible_beg); 1646 BUF_ZV_BYTE (buffer) - visible_beg,
1647 point_start, point_old_end, point_start);
1093 visible_end = BUF_ZV_BYTE (buffer); 1648 visible_end = BUF_ZV_BYTE (buffer);
1649 visi_end_linecol = buffer_zv_linecol;
1094 eassert (visible_beg <= visible_end); 1650 eassert (visible_beg <= visible_end);
1095 } 1651 }
1096 /* 3. Make sure visible_beg = BUF_BEGV_BYTE. */ 1652 /* 3. Make sure visible_beg = BUF_BEGV_BYTE. */
1097 if (visible_beg < BUF_BEGV_BYTE (buffer)) 1653 if (visible_beg < BUF_BEGV_BYTE (buffer))
1098 { 1654 {
1655 TSPoint point_old_end = track_linecol
1656 ? treesit_make_ts_point (visi_beg_linecol, buffer_begv_linecol)
1657 : TREESIT_TS_POINT_1_0;
1099 /* Tree-sitter sees: delete at the beginning. */ 1658 /* Tree-sitter sees: delete at the beginning. */
1100 treesit_tree_edit_1 (tree, 0, BUF_BEGV_BYTE (buffer) - visible_beg, 0); 1659 treesit_tree_edit_1 (tree, 0, BUF_BEGV_BYTE (buffer) - visible_beg, 0,
1660 TREESIT_TS_POINT_1_0, point_old_end,
1661 TREESIT_TS_POINT_1_0);
1101 visible_beg = BUF_BEGV_BYTE (buffer); 1662 visible_beg = BUF_BEGV_BYTE (buffer);
1663 visi_beg_linecol = buffer_begv_linecol;
1102 eassert (visible_beg <= visible_end); 1664 eassert (visible_beg <= visible_end);
1103 } 1665 }
1104 eassert (0 <= visible_beg); 1666 eassert (0 <= visible_beg);
@@ -1108,6 +1670,14 @@ treesit_sync_visible_region (Lisp_Object parser)
1108 1670
1109 XTS_PARSER (parser)->visible_beg = visible_beg; 1671 XTS_PARSER (parser)->visible_beg = visible_beg;
1110 XTS_PARSER (parser)->visible_end = visible_end; 1672 XTS_PARSER (parser)->visible_end = visible_end;
1673 XTS_PARSER (parser)->visi_beg_linecol = visi_beg_linecol;
1674 XTS_PARSER (parser)->visi_end_linecol = visi_end_linecol;
1675
1676 if (track_linecol)
1677 {
1678 eassert (visi_beg_linecol.bytepos == visible_beg);
1679 eassert (visi_end_linecol.bytepos == visible_end);
1680 }
1111 1681
1112 /* Fix ranges so that the ranges stays with in visible_end. Here we 1682 /* Fix ranges so that the ranges stays with in visible_end. Here we
1113 try to do minimal work so that the ranges is minimally correct and 1683 try to do minimal work so that the ranges is minimally correct and
@@ -1356,7 +1926,7 @@ treesit_read_buffer (void *parser, uint32_t byte_index,
1356Lisp_Object 1926Lisp_Object
1357make_treesit_parser (Lisp_Object buffer, TSParser *parser, 1927make_treesit_parser (Lisp_Object buffer, TSParser *parser,
1358 TSTree *tree, Lisp_Object language_symbol, 1928 TSTree *tree, Lisp_Object language_symbol,
1359 Lisp_Object tag) 1929 Lisp_Object tag, bool tracks_linecol)
1360{ 1930{
1361 struct Lisp_TS_Parser *lisp_parser; 1931 struct Lisp_TS_Parser *lisp_parser;
1362 1932
@@ -1381,6 +1951,27 @@ make_treesit_parser (Lisp_Object buffer, TSParser *parser,
1381 lisp_parser->need_to_gc_buffer = false; 1951 lisp_parser->need_to_gc_buffer = false;
1382 lisp_parser->within_reparse = false; 1952 lisp_parser->within_reparse = false;
1383 eassert (lisp_parser->visible_beg <= lisp_parser->visible_end); 1953 eassert (lisp_parser->visible_beg <= lisp_parser->visible_end);
1954
1955 if (tracks_linecol)
1956 {
1957 struct buffer *old_buf = current_buffer;
1958 set_buffer_internal (XBUFFER (buffer));
1959
1960 /* treesit_linecol_of_pos doesn't signal, so no need to
1961 unwind-protect. */
1962 lisp_parser->visi_beg_linecol
1963 = treesit_linecol_of_pos (BEGV_BYTE, TREESIT_BOB_LINECOL);
1964 lisp_parser->visi_end_linecol
1965 = treesit_linecol_of_pos (ZV_BYTE, lisp_parser->visi_beg_linecol);
1966
1967 set_buffer_internal (old_buf);
1968 }
1969 else
1970 {
1971 lisp_parser->visi_beg_linecol = TREESIT_EMPTY_LINECOL;
1972 lisp_parser->visi_end_linecol = TREESIT_EMPTY_LINECOL;
1973 }
1974
1384 return make_lisp_ptr (lisp_parser, Lisp_Vectorlike); 1975 return make_lisp_ptr (lisp_parser, Lisp_Vectorlike);
1385} 1976}
1386 1977
@@ -1698,13 +2289,28 @@ an indirect buffer. */)
1698 always succeed. */ 2289 always succeed. */
1699 ts_parser_set_language (parser, lang); 2290 ts_parser_set_language (parser, lang);
1700 2291
2292 const bool lang_need_linecol_tracking
2293 = !NILP (Fmemq (remapped_lang,
2294 Vtreesit_languages_require_line_column_tracking));
2295
1701 /* Create parser. Use the unmapped LANGUAGE symbol, so the nodes 2296 /* Create parser. Use the unmapped LANGUAGE symbol, so the nodes
1702 created by this parser (and the parser itself) identify themselves 2297 created by this parser (and the parser itself) identify themselves
1703 as the unmapped language. This makes the grammar mapping 2298 as the unmapped language. This makes the grammar mapping
1704 completely transparent. */ 2299 completely transparent. */
1705 Lisp_Object lisp_parser = make_treesit_parser (buf_orig, 2300 Lisp_Object lisp_parser = make_treesit_parser (buf_orig,
1706 parser, NULL, 2301 parser, NULL,
1707 language, tag); 2302 language, tag,
2303 lang_need_linecol_tracking);
2304
2305 /* Enable line-column tracking if this language requires it. */
2306 if (lang_need_linecol_tracking && !treesit_buf_tracks_linecol_p (buf))
2307 {
2308 /* We can use TREESIT_BOB_LINECOL for begv and zv since these
2309 cache doesn't need to be always in sync with BEGV and ZV. */
2310 SET_BUF_TS_LINECOL_BEGV (buf, TREESIT_BOB_LINECOL);
2311 SET_BUF_TS_LINECOL_POINT (buf, TREESIT_BOB_LINECOL);
2312 SET_BUF_TS_LINECOL_ZV (buf, TREESIT_BOB_LINECOL);
2313 }
1708 2314
1709 /* Update parser-list. */ 2315 /* Update parser-list. */
1710 BVAR (buf, ts_parser_list) = Fcons (lisp_parser, BVAR (buf, ts_parser_list)); 2316 BVAR (buf, ts_parser_list) = Fcons (lisp_parser, BVAR (buf, ts_parser_list));
@@ -4376,6 +4982,65 @@ nodes in the subtree, including NODE. */)
4376 } 4982 }
4377} 4983}
4378 4984
4985DEFUN ("treesit--linecol-at", Ftreesit__linecol_at,
4986 Streesit__linecol_at, 1, 1, 0,
4987 doc: /* Test buffer-local linecol cache.
4988
4989Calculate the line and column at POS using the buffer-local cache,
4990return the line and column in the form of
4991
4992 (LINE . COL)
4993
4994This is used for internal testing and debugging ONLY. */)
4995 (Lisp_Object pos)
4996{
4997 CHECK_NUMBER (pos);
4998 struct ts_linecol pos_linecol
4999 = treesit_linecol_of_pos (CHAR_TO_BYTE (XFIXNUM (pos)),
5000 BUF_TS_LINECOL_POINT (current_buffer));
5001 return Fcons (make_fixnum (pos_linecol.line), make_fixnum (pos_linecol.col));
5002}
5003
5004DEFUN ("treesit--linecol-cache-set", Ftreesit__linecol_cache_set,
5005 Streesit__linecol_cache_set, 3, 3, 0,
5006 doc: /* Set the linecol cache for the current buffer.
5007
5008This is used for internal testing and debugging ONLY. */)
5009 (Lisp_Object line, Lisp_Object col, Lisp_Object bytepos)
5010{
5011 CHECK_FIXNUM (line);
5012 CHECK_FIXNUM (col);
5013 CHECK_FIXNUM (bytepos);
5014
5015 struct ts_linecol linecol;
5016 linecol.line = XFIXNUM (line);
5017 linecol.col = XFIXNUM (col);
5018 linecol.bytepos = XFIXNUM (bytepos);
5019
5020 SET_BUF_TS_LINECOL_POINT (current_buffer, linecol);
5021
5022 return Qnil;
5023}
5024
5025DEFUN ("treesit--linecol-cache", Ftreesit__linecol_cache,
5026 Streesit__linecol_cache, 0, 0, 0,
5027 doc: /* Return the buffer-local linecol cache for debugging.
5028
5029Return a plist (:line LINE :col COL :pos POS :bytepos BYTEPOS). This is
5030used for internal testing and debugging ONLY. */)
5031 (void)
5032{
5033 struct ts_linecol cache = BUF_TS_LINECOL_POINT (current_buffer);
5034
5035 Lisp_Object plist = (list4 (QCcol, make_fixnum (cache.col),
5036 QCbytepos, make_fixnum (cache.bytepos)));
5037 plist = Fcons (make_fixnum (cache.line), plist);
5038 plist = Fcons (QCline, plist);
5039
5040 return plist;
5041}
5042
5043
4379#endif /* HAVE_TREE_SITTER */ 5044#endif /* HAVE_TREE_SITTER */
4380 5045
4381DEFUN ("treesit-available-p", Ftreesit_available_p, 5046DEFUN ("treesit-available-p", Ftreesit_available_p,
@@ -4418,6 +5083,11 @@ syms_of_treesit (void)
4418 DEFSYM (QCequal, ":equal"); 5083 DEFSYM (QCequal, ":equal");
4419 DEFSYM (QCmatch, ":match"); 5084 DEFSYM (QCmatch, ":match");
4420 DEFSYM (QCpred, ":pred"); 5085 DEFSYM (QCpred, ":pred");
5086 DEFSYM (QCline, ":line");
5087 DEFSYM (QCcol, ":col");
5088 DEFSYM (QCpos, ":pos");
5089 DEFSYM (QCbytepos, ":bytepos");
5090
4421 5091
4422 DEFSYM (Qnot_found, "not-found"); 5092 DEFSYM (Qnot_found, "not-found");
4423 DEFSYM (Qsymbol_error, "symbol-error"); 5093 DEFSYM (Qsymbol_error, "symbol-error");
@@ -4552,6 +5222,17 @@ applies to LANGUAGE-A will be redirected to LANGUAGE-B instead. */);
4552 DEFSYM (Qtreesit_language_remap_alist, "treesit-language-remap-alist"); 5222 DEFSYM (Qtreesit_language_remap_alist, "treesit-language-remap-alist");
4553 Fmake_variable_buffer_local (Qtreesit_language_remap_alist); 5223 Fmake_variable_buffer_local (Qtreesit_language_remap_alist);
4554 5224
5225 DEFVAR_LISP ("treesit-languages-require-line-column-tracking",
5226 Vtreesit_languages_require_line_column_tracking,
5227 doc:
5228 /* A list of languages that need line-column tracking.
5229
5230Most tree-sitter language grammars don't require line and column
5231tracking to work, but some languages do. When creating a parser, if the
5232language is in this list, Emacs enables line-column tracking for the
5233buffer. */);
5234 Vtreesit_languages_require_line_column_tracking = Qnil;
5235
4555 staticpro (&Vtreesit_str_libtree_sitter); 5236 staticpro (&Vtreesit_str_libtree_sitter);
4556 Vtreesit_str_libtree_sitter = build_string ("libtree-sitter-"); 5237 Vtreesit_str_libtree_sitter = build_string ("libtree-sitter-");
4557 staticpro (&Vtreesit_str_tree_sitter); 5238 staticpro (&Vtreesit_str_tree_sitter);
@@ -4596,6 +5277,9 @@ applies to LANGUAGE-A will be redirected to LANGUAGE-B instead. */);
4596 defsubr (&Streesit_language_abi_version); 5277 defsubr (&Streesit_language_abi_version);
4597 defsubr (&Streesit_grammar_location); 5278 defsubr (&Streesit_grammar_location);
4598 5279
5280 defsubr (&Streesit_parser_tracking_line_column_p);
5281 defsubr (&Streesit_tracking_line_column_p);
5282
4599 defsubr (&Streesit_parser_p); 5283 defsubr (&Streesit_parser_p);
4600 defsubr (&Streesit_node_p); 5284 defsubr (&Streesit_node_p);
4601 defsubr (&Streesit_compiled_query_p); 5285 defsubr (&Streesit_compiled_query_p);
@@ -4649,6 +5333,10 @@ applies to LANGUAGE-A will be redirected to LANGUAGE-B instead. */);
4649 defsubr (&Streesit_induce_sparse_tree); 5333 defsubr (&Streesit_induce_sparse_tree);
4650 defsubr (&Streesit_node_match_p); 5334 defsubr (&Streesit_node_match_p);
4651 defsubr (&Streesit_subtree_stat); 5335 defsubr (&Streesit_subtree_stat);
5336
5337 defsubr (&Streesit__linecol_at);
5338 defsubr (&Streesit__linecol_cache);
5339 defsubr (&Streesit__linecol_cache_set);
4652#endif /* HAVE_TREE_SITTER */ 5340#endif /* HAVE_TREE_SITTER */
4653 defsubr (&Streesit_available_p); 5341 defsubr (&Streesit_available_p);
4654#ifdef WINDOWSNT 5342#ifdef WINDOWSNT
diff --git a/src/treesit.h b/src/treesit.h
index 0d4635f4253..5937dba8653 100644
--- a/src/treesit.h
+++ b/src/treesit.h
@@ -26,6 +26,7 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
26 26
27#include <tree_sitter/api.h> 27#include <tree_sitter/api.h>
28#include "lisp.h" 28#include "lisp.h"
29#include "buffer.h"
29 30
30INLINE_HEADER_BEGIN 31INLINE_HEADER_BEGIN
31 32
@@ -97,6 +98,14 @@ struct Lisp_TS_Parser
97 (ref:visible-beg-null) in treesit.c for more explanation. */ 98 (ref:visible-beg-null) in treesit.c for more explanation. */
98 ptrdiff_t visible_beg; 99 ptrdiff_t visible_beg;
99 ptrdiff_t visible_end; 100 ptrdiff_t visible_end;
101 /* Caches the line and column number of VISIBLE_BEG. It's always
102 valid and matches VISIBLE_BEG (because it's updated at each buffer
103 edit). (It has to be, because in treesit_record_change, we need to
104 calculate the line/col offset of old_end_linecol, the exact reason
105 why is left as an exercise to the reader.) */
106 struct ts_linecol visi_beg_linecol;
107 /* Similar to VISI_BEG_LINECOL but caches VISIBLE_END. */
108 struct ts_linecol visi_end_linecol;
100 /* This counter is incremented every time a change is made to the 109 /* This counter is incremented every time a change is made to the
101 buffer in treesit_record_change. The node retrieved from this parser 110 buffer in treesit_record_change. The node retrieved from this parser
102 inherits this timestamp. This way we can make sure the node is 111 inherits this timestamp. This way we can make sure the node is
@@ -222,9 +231,21 @@ CHECK_TS_COMPILED_QUERY (Lisp_Object query)
222 231
223INLINE_HEADER_END 232INLINE_HEADER_END
224 233
225extern void treesit_record_change (ptrdiff_t, ptrdiff_t, ptrdiff_t); 234extern const struct ts_linecol TREESIT_BOB_LINECOL;
235/* An uninitialized linecol. */
236extern const struct ts_linecol TREESIT_EMPTY_LINECOL;
237extern const TSPoint TREESIT_TS_POINT_1_0;
238
239extern bool treesit_buf_tracks_linecol_p (struct buffer *);
240extern struct ts_linecol linecol_offset (struct ts_linecol,
241 struct ts_linecol);
242extern struct ts_linecol treesit_linecol_maybe (ptrdiff_t, ptrdiff_t,
243 struct ts_linecol);
244extern void treesit_record_change (ptrdiff_t, ptrdiff_t, ptrdiff_t,
245 struct ts_linecol, struct ts_linecol,
246 ptrdiff_t);
226extern Lisp_Object make_treesit_parser (Lisp_Object, TSParser *, TSTree *, 247extern Lisp_Object make_treesit_parser (Lisp_Object, TSParser *, TSTree *,
227 Lisp_Object, Lisp_Object); 248 Lisp_Object, Lisp_Object, bool);
228extern Lisp_Object make_treesit_node (Lisp_Object, TSNode); 249extern Lisp_Object make_treesit_node (Lisp_Object, TSNode);
229 250
230extern bool treesit_node_uptodate_p (Lisp_Object); 251extern bool treesit_node_uptodate_p (Lisp_Object);
diff --git a/src/xdisp.c b/src/xdisp.c
index a26626dd61e..9f3a684f25e 100644
--- a/src/xdisp.c
+++ b/src/xdisp.c
@@ -1163,8 +1163,6 @@ static const char *decode_mode_spec (struct window *, int, int, Lisp_Object *);
1163static void display_menu_bar (struct window *); 1163static void display_menu_bar (struct window *);
1164static void display_tab_bar (struct window *); 1164static void display_tab_bar (struct window *);
1165static void update_tab_bar (struct frame *, bool); 1165static void update_tab_bar (struct frame *, bool);
1166static ptrdiff_t display_count_lines (ptrdiff_t, ptrdiff_t, ptrdiff_t,
1167 ptrdiff_t *);
1168static void pint2str (register char *, register int, register ptrdiff_t); 1166static void pint2str (register char *, register int, register ptrdiff_t);
1169 1167
1170static int display_string (const char *, Lisp_Object, Lisp_Object, 1168static int display_string (const char *, Lisp_Object, Lisp_Object,
@@ -29399,7 +29397,7 @@ count_lines (ptrdiff_t start_byte, ptrdiff_t end_byte)
29399 found COUNT lines, or LIMIT_BYTE if we hit the limit before finding 29397 found COUNT lines, or LIMIT_BYTE if we hit the limit before finding
29400 COUNT lines. */ 29398 COUNT lines. */
29401 29399
29402static ptrdiff_t 29400ptrdiff_t
29403display_count_lines (ptrdiff_t start_byte, 29401display_count_lines (ptrdiff_t start_byte,
29404 ptrdiff_t limit_byte, ptrdiff_t count, 29402 ptrdiff_t limit_byte, ptrdiff_t count,
29405 ptrdiff_t *byte_pos_ptr) 29403 ptrdiff_t *byte_pos_ptr)