aboutsummaryrefslogtreecommitdiffstats
path: root/src/buffer.c
diff options
context:
space:
mode:
authorYuan Fu2025-03-18 17:26:26 -0700
committerYuan Fu2025-05-03 22:14:03 -0700
commit1897da0b599cc3ea1e4aa626e47ac8943a7b6833 (patch)
treefef9034fb01a0883e5874923ad756b554b49cffd /src/buffer.c
parent159e3a981ed5482393182b036e38818d42405c90 (diff)
downloademacs-1897da0b599cc3ea1e4aa626e47ac8943a7b6833.tar.gz
emacs-1897da0b599cc3ea1e4aa626e47ac8943a7b6833.zip
Add line-column tracking for tree-sitter
Add line-column tracking for tree-sitter parsers. Copied from comments in treesit.c: Technically we had to send tree-sitter the line and column position of each edit. But in practice we just send it dummy values, because tree-sitter doesn't use it for parsing and mostly just carries the line and column positions around and return it when e.g. reporting node positions[1]. This has been working fine until we encountered grammars that actually utilizes the line and column information for parsing (Haskell)[2]. [1] https://github.com/tree-sitter/tree-sitter/issues/445 [2] https://github.com/tree-sitter/tree-sitter/issues/4001 So now we have to keep track of line and column positions and pass valid values to tree-sitter. (It adds quite some complexity, but only linearly; one can ignore all the linecol stuff when trying to understand treesit code and then come back to it later.) Eli convinced me to disable tracking by default, and only enable it for languages that needs it. So the buffer starts out not tracking linecol. And when a parser is created, if the language is in treesit-languages-require-line-column-tracking, we enable tracking in the buffer, and enable tracking for the parser. To simplify things, once a buffer starts tracking linecol, it never disables tracking, even if parsers that need tracking are all deleted; and for parsers, tracking is determined at creation time, if it starts out tracking/non-tracking, it stays that way, regardless of later changes to treesit-languages-require-line-column-tracking. To make calculating line/column positons fast, we store linecol caches for begv, point, and zv in the buffer (buf->ts_linecol_cache_xxx); and in the parser object, we store linecol cache for visible beg/end of that parser. In buffer editing functions, we need the linecol for start/old_end/new_end, those can be calculated by scanning newlines (treesit_linecol_of_pos) from the buffer point cache, which should be always near the point. And we usually set the calculated linecol of new_end back to the buffer point cache. We also need to calculate linecol for the visible_beg/end for each parser, and linecol for the buffer's begv/zv, these positions are usually far from point, so we have caches for all of them (in either the parser object or the buffer). These positions are far from point, so it's inefficient to scan newlines from point to there to get up-to-date linecol for them; but in the same time, because they're far and outside the changed region, we can calculate their change in line and column number by simply counting how much newlines are added/removed in the changed region (compute_new_linecol_by_change). * doc/lispref/parsing.texi (Using Parser): Mention line-column tracking in manual. * etc/NEWS: Add news. * lisp/treesit.el: (treesit-languages-need-line-column-tracking): New variable. * src/buffer.c: Include treesit.h (for TREESIT_EMPTY_LINECOL). (Fget_buffer_create): (Fmake_indirect_buffer): Initialize new buffer fields. (Fbuffer_swap_text): Add new buffer fields. * src/buffer.h (ts_linecol): New struct. (buffer): New buffer fields. (BUF_TS_LINECOL_BEGV): (BUF_TS_LINECOL_POINT): (BUF_TS_LINECOL_ZV): (SET_BUF_TS_LINECOL_BEGV): (SET_BUF_TS_LINECOL_POINT): (SET_BUF_TS_LINECOL_ZV): New inline functions. * src/casefiddle.c (casify_region): Record linecol info. * src/editfns.c (Fsubst_char_in_region): (Ftranslate_region_internal): (Ftranspose_regions): Record linecol info. * src/insdel.c (insert_1_both): (insert_from_string_1): (insert_from_gap_1): (insert_from_buffer): (replace_range): (del_range_2): Record linecol info. * src/treesit.c (TREESIT_BOB_LINECOL): (TREESIT_EMPTY_LINECOL): (TREESIT_TS_POINT_1_0): New constants. (treesit_debug_print_linecol): (treesit_buf_tracks_linecol_p): (restore_restriction_and_selective_display): (treesit_count_lines): (treesit_debug_validate_linecol): (treesit_linecol_of_pos): (treesit_make_ts_point): (Ftreesit_tracking_line_column_p): (Ftreesit_parser_tracking_line_column_p): New functions. (treesit_tree_edit_1): Accept real TSPoint and pass to tree-sitter. (compute_new_linecol_by_change): New function. (treesit_record_change_1): Rename from treesit_record_change, handle linecol if tracking is enabled. (treesit_linecol_maybe): New function. (treesit_record_change): New wrapper around treesit_record_change_1 that handles some boilerplate and sets buffer state. (treesit_sync_visible_region): Handle linecol if tracking is enabled. (make_treesit_parser): Setup parser's linecol cache if tracking is enabled. (Ftreesit_parser_create): Enable tracking if the parser's language requires it. (Ftreesit__linecol_at): (Ftreesit__linecol_cache_set): (Ftreesit__linecol_cache): New functions for debugging and testing. (syms_of_treesit): New variable Vtreesit_languages_require_line_column_tracking. * src/treesit.h (Lisp_TS_Parser): New fields. (TREESIT_BOB_LINECOL): (TREESIT_EMPTY_LINECOL): New constants. * test/src/treesit-tests.el (treesit-linecol-basic): (treesit-linecol-search-back-across-newline): (treesit-linecol-col-same-line): (treesit-linecol-enable-disable): New tests. * src/lisp.h: Declare display_count_lines. * src/xdisp.c (display_count_lines): Remove static keyword.
Diffstat (limited to 'src/buffer.c')
-rw-r--r--src/buffer.c25
1 files changed, 25 insertions, 0 deletions
diff --git a/src/buffer.c b/src/buffer.c
index a408b799ff4..53aa3163fe0 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -48,6 +48,10 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
48#include "w32heap.h" /* for mmap_* */ 48#include "w32heap.h" /* for mmap_* */
49#endif 49#endif
50 50
51#ifdef HAVE_TREE_SITTER
52#include "treesit.h"
53#endif
54
51/* Work around GCC bug 109847 55/* Work around GCC bug 109847
52 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109847 56 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109847
53 which causes GCC to mistakenly complain about 57 which causes GCC to mistakenly complain about
@@ -641,6 +645,13 @@ even if it is dead. The return value is never nil. */)
641 bset_width_table (b, Qnil); 645 bset_width_table (b, Qnil);
642 b->prevent_redisplay_optimizations_p = 1; 646 b->prevent_redisplay_optimizations_p = 1;
643 647
648#ifdef HAVE_TREE_SITTER
649 /* By default, use empty linecol, which means disable tracking. */
650 SET_BUF_TS_LINECOL_BEGV (b, TREESIT_EMPTY_LINECOL);
651 SET_BUF_TS_LINECOL_POINT (b, TREESIT_EMPTY_LINECOL);
652 SET_BUF_TS_LINECOL_ZV (b, TREESIT_EMPTY_LINECOL);
653#endif
654
644 /* An ordinary buffer normally doesn't need markers 655 /* An ordinary buffer normally doesn't need markers
645 to handle BEGV and ZV. */ 656 to handle BEGV and ZV. */
646 bset_pt_marker (b, Qnil); 657 bset_pt_marker (b, Qnil);
@@ -867,6 +878,13 @@ Interactively, CLONE and INHIBIT-BUFFER-HOOKS are nil. */)
867 b->bidi_paragraph_cache = 0; 878 b->bidi_paragraph_cache = 0;
868 bset_width_table (b, Qnil); 879 bset_width_table (b, Qnil);
869 880
881#ifdef HAVE_TREE_SITTER
882 /* By default, use empty linecol, which means disable tracking. */
883 SET_BUF_TS_LINECOL_BEGV (b, TREESIT_EMPTY_LINECOL);
884 SET_BUF_TS_LINECOL_POINT (b, TREESIT_EMPTY_LINECOL);
885 SET_BUF_TS_LINECOL_ZV (b, TREESIT_EMPTY_LINECOL);
886#endif
887
870 name = Fcopy_sequence (name); 888 name = Fcopy_sequence (name);
871 set_string_intervals (name, NULL); 889 set_string_intervals (name, NULL);
872 bset_name (b, name); 890 bset_name (b, name);
@@ -2618,6 +2636,13 @@ results, see Info node `(elisp)Swapping Text'. */)
2618 bset_point_before_scroll (current_buffer, Qnil); 2636 bset_point_before_scroll (current_buffer, Qnil);
2619 bset_point_before_scroll (other_buffer, Qnil); 2637 bset_point_before_scroll (other_buffer, Qnil);
2620 2638
2639#ifdef HAVE_TREE_SITTER
2640 swapfield_ (ts_parser_list, Lisp_Object);
2641 swapfield (ts_linecol_begv, struct ts_linecol);
2642 swapfield (ts_linecol_point, struct ts_linecol);
2643 swapfield (ts_linecol_zv, struct ts_linecol);
2644#endif
2645
2621 modiff_incr (&current_buffer->text->modiff, 1); 2646 modiff_incr (&current_buffer->text->modiff, 1);
2622 modiff_incr (&other_buffer->text->modiff, 1); 2647 modiff_incr (&other_buffer->text->modiff, 1);
2623 modiff_incr (&current_buffer->text->chars_modiff, 1); 2648 modiff_incr (&current_buffer->text->chars_modiff, 1);