aboutsummaryrefslogtreecommitdiffstats
path: root/src/treesit.c
diff options
context:
space:
mode:
authorYuan Fu2025-03-18 17:26:26 -0700
committerYuan Fu2025-05-03 22:14:03 -0700
commit1897da0b599cc3ea1e4aa626e47ac8943a7b6833 (patch)
treefef9034fb01a0883e5874923ad756b554b49cffd /src/treesit.c
parent159e3a981ed5482393182b036e38818d42405c90 (diff)
downloademacs-1897da0b599cc3ea1e4aa626e47ac8943a7b6833.tar.gz
emacs-1897da0b599cc3ea1e4aa626e47ac8943a7b6833.zip
Add line-column tracking for tree-sitter
Add line-column tracking for tree-sitter parsers. Copied from comments in treesit.c: Technically we had to send tree-sitter the line and column position of each edit. But in practice we just send it dummy values, because tree-sitter doesn't use it for parsing and mostly just carries the line and column positions around and return it when e.g. reporting node positions[1]. This has been working fine until we encountered grammars that actually utilizes the line and column information for parsing (Haskell)[2]. [1] https://github.com/tree-sitter/tree-sitter/issues/445 [2] https://github.com/tree-sitter/tree-sitter/issues/4001 So now we have to keep track of line and column positions and pass valid values to tree-sitter. (It adds quite some complexity, but only linearly; one can ignore all the linecol stuff when trying to understand treesit code and then come back to it later.) Eli convinced me to disable tracking by default, and only enable it for languages that needs it. So the buffer starts out not tracking linecol. And when a parser is created, if the language is in treesit-languages-require-line-column-tracking, we enable tracking in the buffer, and enable tracking for the parser. To simplify things, once a buffer starts tracking linecol, it never disables tracking, even if parsers that need tracking are all deleted; and for parsers, tracking is determined at creation time, if it starts out tracking/non-tracking, it stays that way, regardless of later changes to treesit-languages-require-line-column-tracking. To make calculating line/column positons fast, we store linecol caches for begv, point, and zv in the buffer (buf->ts_linecol_cache_xxx); and in the parser object, we store linecol cache for visible beg/end of that parser. In buffer editing functions, we need the linecol for start/old_end/new_end, those can be calculated by scanning newlines (treesit_linecol_of_pos) from the buffer point cache, which should be always near the point. And we usually set the calculated linecol of new_end back to the buffer point cache. We also need to calculate linecol for the visible_beg/end for each parser, and linecol for the buffer's begv/zv, these positions are usually far from point, so we have caches for all of them (in either the parser object or the buffer). These positions are far from point, so it's inefficient to scan newlines from point to there to get up-to-date linecol for them; but in the same time, because they're far and outside the changed region, we can calculate their change in line and column number by simply counting how much newlines are added/removed in the changed region (compute_new_linecol_by_change). * doc/lispref/parsing.texi (Using Parser): Mention line-column tracking in manual. * etc/NEWS: Add news. * lisp/treesit.el: (treesit-languages-need-line-column-tracking): New variable. * src/buffer.c: Include treesit.h (for TREESIT_EMPTY_LINECOL). (Fget_buffer_create): (Fmake_indirect_buffer): Initialize new buffer fields. (Fbuffer_swap_text): Add new buffer fields. * src/buffer.h (ts_linecol): New struct. (buffer): New buffer fields. (BUF_TS_LINECOL_BEGV): (BUF_TS_LINECOL_POINT): (BUF_TS_LINECOL_ZV): (SET_BUF_TS_LINECOL_BEGV): (SET_BUF_TS_LINECOL_POINT): (SET_BUF_TS_LINECOL_ZV): New inline functions. * src/casefiddle.c (casify_region): Record linecol info. * src/editfns.c (Fsubst_char_in_region): (Ftranslate_region_internal): (Ftranspose_regions): Record linecol info. * src/insdel.c (insert_1_both): (insert_from_string_1): (insert_from_gap_1): (insert_from_buffer): (replace_range): (del_range_2): Record linecol info. * src/treesit.c (TREESIT_BOB_LINECOL): (TREESIT_EMPTY_LINECOL): (TREESIT_TS_POINT_1_0): New constants. (treesit_debug_print_linecol): (treesit_buf_tracks_linecol_p): (restore_restriction_and_selective_display): (treesit_count_lines): (treesit_debug_validate_linecol): (treesit_linecol_of_pos): (treesit_make_ts_point): (Ftreesit_tracking_line_column_p): (Ftreesit_parser_tracking_line_column_p): New functions. (treesit_tree_edit_1): Accept real TSPoint and pass to tree-sitter. (compute_new_linecol_by_change): New function. (treesit_record_change_1): Rename from treesit_record_change, handle linecol if tracking is enabled. (treesit_linecol_maybe): New function. (treesit_record_change): New wrapper around treesit_record_change_1 that handles some boilerplate and sets buffer state. (treesit_sync_visible_region): Handle linecol if tracking is enabled. (make_treesit_parser): Setup parser's linecol cache if tracking is enabled. (Ftreesit_parser_create): Enable tracking if the parser's language requires it. (Ftreesit__linecol_at): (Ftreesit__linecol_cache_set): (Ftreesit__linecol_cache): New functions for debugging and testing. (syms_of_treesit): New variable Vtreesit_languages_require_line_column_tracking. * src/treesit.h (Lisp_TS_Parser): New fields. (TREESIT_BOB_LINECOL): (TREESIT_EMPTY_LINECOL): New constants. * test/src/treesit-tests.el (treesit-linecol-basic): (treesit-linecol-search-back-across-newline): (treesit-linecol-col-same-line): (treesit-linecol-enable-disable): New tests. * src/lisp.h: Declare display_count_lines. * src/xdisp.c (display_count_lines): Remove static keyword.
Diffstat (limited to 'src/treesit.c')
-rw-r--r--src/treesit.c784
1 files changed, 736 insertions, 48 deletions
diff --git a/src/treesit.c b/src/treesit.c
index b0979397d35..3a19e0cb282 100644
--- a/src/treesit.c
+++ b/src/treesit.c
@@ -307,18 +307,13 @@ init_treesit_functions (void)
307 in Emacs's use cases. 307 in Emacs's use cases.
308 308
309 - Many tree-sitter functions take a TSPoint, which is basically a 309 - Many tree-sitter functions take a TSPoint, which is basically a
310 row and column. Emacs uses a gap buffer and does not keep 310 line and column. Emacs uses a gap buffer and does not keep
311 information about the row and column position of a buffer. 311 information about the line and column positions in a buffer, so
312 According to the author of tree-sitter, those functions only take 312 it's hard for us to pass it to tree-sitter. Instead we just give
313 a TSPoint so that it can be moved alongside the byte position and 313 it dummy values. But there are certain languages that does need
314 returned to the caller afterwards, and the position actually used 314 the line and column positions to work right, like Haskell. So we
315 is the specified byte position. He also said that he _thinks_ 315 added optional line and column tracking. See the linecol section
316 that just passing a byte position will also work. As a result, a 316 below.
317 dummy value is used in place of each TSPoint. Judging by the
318 nature of parsing algorithms, I think it is safe to use only the
319 byte position, and I don't think this will change in the future.
320
321 See: https://github.com/tree-sitter/tree-sitter/issues/445
322 317
323 treesit.h has some commentary on the two main data structure for 318 treesit.h has some commentary on the two main data structure for
324 the parser and node. treesit_sync_visible_region has some 319 the parser and node. treesit_sync_visible_region has some
@@ -350,8 +345,8 @@ init_treesit_functions (void)
350 345
351 Tree-sitter-related code in other files: 346 Tree-sitter-related code in other files:
352 - src/alloc.c for gc for parser and node 347 - src/alloc.c for gc for parser and node
353 - src/casefiddle.c & src/insdel.c for notifying tree-sitter 348 - src/casefiddle.c, src/insdel.c, src/editfns.c for notifying
354 parser of buffer changes. 349 tree-sitter parser of buffer changes.
355 - lisp/emacs-lisp/cl-preloaded.el & data.c & lisp.h for parser and 350 - lisp/emacs-lisp/cl-preloaded.el & data.c & lisp.h for parser and
356 node type. 351 node type.
357 - print.c for printing tree-sitter objects (node, parser, query). 352 - print.c for printing tree-sitter objects (node, parser, query).
@@ -406,7 +401,66 @@ init_treesit_functions (void)
406 from the user's POV, each buffer, regardless of indirect or not, 401 from the user's POV, each buffer, regardless of indirect or not,
407 appears to have their own parser list. A discussion can be found in 402 appears to have their own parser list. A discussion can be found in
408 bug#59693. Note that that discussion led to an earlier design, which 403 bug#59693. Note that that discussion led to an earlier design, which
409 is different from the current one. */ 404 is different from the current one.
405
406 Line and column reporting to tree-sitter: technically we had to send
407 tree-sitter the line and column position of each edit. But in
408 practice we just send it dummy values, because tree-sitter doesn't
409 use it for parsing and mostly just carries the line and column
410 positions around and return it when e.g. reporting node positions[1].
411 This has been working fine until we encountered grammars that
412 actually utilizes the line and column information for parsing
413 (Haskell)[2].
414
415 [1] https://github.com/tree-sitter/tree-sitter/issues/445
416 [2] https://github.com/tree-sitter/tree-sitter/issues/4001
417
418 So now we have to keep track of line and column positions and pass
419 valid values to tree-sitter. (It adds quite some complexity, but
420 only linearly; one can ignore all the linecol stuff when trying to
421 understand treesit code and then come back to it later.) Eli
422 convinced me to disable tracking by default, and only enable it for
423 languages that needs it. So the buffer starts out not tracking
424 linecol. And when a parser is created, if the language is in
425 treesit-languages-require-line-column-tracking, we enable tracking in
426 the buffer, and enable tracking for the parser. To simplify things,
427 once a buffer starts tracking linecol, it never disables tracking,
428 even if parsers that need tracking are all deleted; and for parsers,
429 tracking is determined at creation time, if it starts out
430 tracking/non-tracking, it stays that way, regardless of later changes
431 to treesit-languages-require-line-column-tracking.
432
433 To make calculating line/column positons fast, we store linecol
434 caches for begv, point, and zv in the buffer
435 (buf->ts_linecol_cache_xxx); and in the parser object, we store
436 linecol cache for visible beg/end of that parser.
437
438 In buffer editing functions, we need the linecol for
439 start/old_end/new_end, those can be calculated by scanning newlines
440 (treesit_linecol_of_pos) from the buffer point cache, which should be
441 always near the point. And we usually set the calculated linecol of
442 new_end back to the buffer point cache.
443
444 We also need to calculate linecol for the visible_beg/end for each
445 parser, and linecol for the buffer's begv/zv, these positions are
446 usually far from point, so we have caches for all of them (in either
447 the parser object or the buffer). These positions are far from
448 point, so it's inefficient to scan newlines from point to there to
449 get up-to-date linecol for them; but in the same time, because
450 they're far and outside the changed region, we can calculate their
451 change in line and column number by simply counting how much newlines
452 are added/removed in the changed region
453 (compute_new_linecol_by_change). */
454
455
456/*** Constants */
457
458/* A linecol_cache that points to BOB, this is always valid. */
459const struct ts_linecol TREESIT_BOB_LINECOL = { 1, 1, 0 };
460/* An uninitialized linecol. */
461const struct ts_linecol TREESIT_EMPTY_LINECOL = { 0, 0, 0 };
462const TSPoint TREESIT_TS_POINT_1_0 = { 1, 0 };
463
410 464
411 465
412/*** Initialization */ 466/*** Initialization */
@@ -864,6 +918,241 @@ loaded or the file name couldn't be determined, return nil. */)
864} 918}
865 919
866 920
921/*** Linecol functions */
922
923#define TREESIT_DEBUG_LINECOL false
924
925void treesit_debug_print_linecol (struct ts_linecol);
926
927void
928treesit_debug_print_linecol (struct ts_linecol linecol)
929{
930 printf ("{ line=%ld col=%ld bytepos=%ld }\n", linecol.line, linecol.col, linecol.bytepos);
931}
932
933/* Returns true if BUF tracks linecol. */
934bool treesit_buf_tracks_linecol_p (struct buffer *buf)
935{
936 return BUF_TS_LINECOL_BEGV (buf).bytepos != 0;
937}
938
939static void
940restore_restriction_and_selective_display (Lisp_Object record)
941{
942 save_restriction_restore (Fcar (record));
943 BVAR (current_buffer, selective_display) = Fcdr (record);
944 return;
945}
946
947/* Similar to display_count_lines, but behaves differently when
948 searching backwards: when found a newline, stop at the newline,
949 return count as normal (display_count_lines stops after the newline
950 and subtracts one from count). When searching forward, stop at the
951 position after the newline. Another difference is this function
952 disregards narrowing, so it works on bytepos outside of the visible
953 range. */
954static ptrdiff_t
955treesit_count_lines (ptrdiff_t start_byte,
956 ptrdiff_t limit_byte, ptrdiff_t count,
957 ptrdiff_t *byte_pos_ptr)
958{
959 /* I don't think display_count_lines signals, so the unwind-protect
960 technically isn't necessary. Also treesit_count_lines aren't
961 suppose to signal either since it's used in functions that aren't
962 supposed to signal (treesit_record_change and friends). */
963 Lisp_Object record = Fcons (save_restriction_save (),
964 BVAR (current_buffer, selective_display));
965
966
967 specpdl_ref pdl_count = SPECPDL_INDEX ();
968 record_unwind_protect (restore_restriction_and_selective_display, record);
969
970 BVAR (current_buffer, selective_display) = Qnil;
971 labeled_restrictions_remove_in_current_buffer ();
972 Fwiden ();
973 ptrdiff_t counted = display_count_lines (start_byte, limit_byte,
974 count, byte_pos_ptr);
975
976 unbind_to (pdl_count, Qnil);
977
978 /* If searching backwards and we found COUNT newlines, countermand the
979 different logic in display_count_lines. */
980 if (count < 0 && limit_byte != *byte_pos_ptr)
981 {
982 counted += 1;
983 *byte_pos_ptr -= 1;
984 }
985
986 return counted;
987}
988
989static void
990treesit_debug_validate_linecol (struct ts_linecol linecol)
991{
992 eassert (linecol.bytepos <= Z_BYTE);
993
994 /* We can't use count_lines as ground truth because it respects
995 narrowing, and calling it with a bytepos outside of the visible
996 portion results in infloop. */
997 ptrdiff_t _unused;
998 ptrdiff_t true_line_count = treesit_count_lines (BEG_BYTE, linecol.bytepos,
999 Z_BYTE, &_unused) + 1;
1000 eassert (true_line_count == linecol.line);
1001}
1002
1003/* Calculate and return the line and column number of BYTE_POS by
1004 scanning newlines from CACHE. CACHE must be valid. */
1005static struct ts_linecol
1006treesit_linecol_of_pos (ptrdiff_t target_bytepos,
1007 struct ts_linecol cache)
1008{
1009 if (TREESIT_DEBUG_LINECOL)
1010 {
1011 treesit_debug_validate_linecol (cache);
1012 }
1013
1014 /* When we finished searching for newlines between CACHE and
1015 TARGET_POS, BYTE_POS_2 is at TARGET_POS, and BYTE_POS_1 is at the
1016 previous newline. If TARGET_POS happends to be on a newline,
1017 BYTE_POS_1 will be on that position. BYTE_POS_1 is used for
1018 calculating the column. (If CACHE and TARGET_POS are in the same
1019 line, BYTE_POS_1 is unset and we don't use it.) */
1020 ptrdiff_t byte_pos_1 = 0;
1021 ptrdiff_t byte_pos_2 = 0;
1022 /* Number of lines between CACHE and TARGET_POS. */
1023 ptrdiff_t line_delta = 0;
1024
1025 if (target_bytepos == cache.bytepos)
1026 return cache;
1027
1028 /* Search forward. */
1029 if (cache.bytepos < target_bytepos)
1030 {
1031 byte_pos_2 = cache.bytepos;
1032 while (byte_pos_2 < target_bytepos)
1033 {
1034 ptrdiff_t counted = treesit_count_lines (byte_pos_2, target_bytepos,
1035 1, &byte_pos_2);
1036
1037 if (counted > 0)
1038 {
1039 byte_pos_1 = byte_pos_2;
1040 }
1041 line_delta += counted;
1042 }
1043 eassert (byte_pos_2 == target_bytepos);
1044 /* At this point, byte_pos_2 is at target_pos, and byte_pos_1 is
1045 at the previous newline if we went across any. */
1046
1047 struct ts_linecol target_linecol;
1048 target_linecol.bytepos = target_bytepos;
1049 target_linecol.line = cache.line + line_delta;
1050 /* If we moved across any newline, use the previous newline to
1051 calculate the column; if we stayed at the same line, use the
1052 cached column to calculate the new column. */
1053 target_linecol.col = line_delta > 0
1054 ? target_bytepos - byte_pos_1
1055 : target_bytepos - cache.bytepos + cache.col;
1056
1057 if (TREESIT_DEBUG_LINECOL)
1058 {
1059 treesit_debug_validate_linecol (target_linecol);
1060 }
1061
1062 return target_linecol;
1063 }
1064
1065 /* Search backward. */
1066 byte_pos_2 = cache.bytepos;
1067 while (byte_pos_2 > target_bytepos)
1068 {
1069 ptrdiff_t counted = treesit_count_lines (byte_pos_2, target_bytepos,
1070 -1, &byte_pos_2);
1071 line_delta -= counted;
1072 }
1073 eassert (byte_pos_2 == target_bytepos);
1074 /* At this point, pos_2 is at target_pos. */
1075
1076 struct ts_linecol target_linecol;
1077 target_linecol.bytepos = target_bytepos;
1078 target_linecol.line = cache.line + line_delta;
1079 eassert (cache.line + line_delta > 0);
1080
1081 /* Calculate the column. */
1082 if (line_delta == 0)
1083 {
1084 target_linecol.col = cache.col - (cache.bytepos - target_bytepos);
1085 }
1086 else
1087 {
1088 /* We need to find the previous newline in order to calculate the
1089 column. */
1090 ptrdiff_t counted = treesit_count_lines (byte_pos_2, BEG_BYTE, -1, &byte_pos_2);
1091 target_linecol.col
1092 = target_bytepos - (byte_pos_2 + counted == 1 ? 1 : 0);
1093 }
1094
1095 if (TREESIT_DEBUG_LINECOL)
1096 {
1097 treesit_debug_validate_linecol (target_linecol);
1098 }
1099
1100 return target_linecol;
1101}
1102
1103/* Return a TSPoint given POS and VISIBLE_BEG. VISIBLE_BEG must be
1104 before POS. */
1105static TSPoint
1106treesit_make_ts_point (struct ts_linecol visible_beg,
1107 struct ts_linecol pos)
1108{
1109 TSPoint point;
1110 if (visible_beg.line == pos.line)
1111 {
1112 point.row = 0;
1113 point.column = pos.col - visible_beg.col;
1114 eassert (point.column >= 0);
1115 }
1116 else
1117 {
1118 point.row = pos.line - visible_beg.line;
1119 eassert (point.row > 0);
1120 point.column = pos.col;
1121 }
1122 return point;
1123}
1124
1125DEFUN ("treesit-tracking-line-column-p",
1126 Ftreesit_tracking_line_column_p,
1127 Streesit_tracking_line_column_p, 0, 1, 0,
1128 doc : /* Return non-nil if BUFFER is tracking line and column.
1129
1130Return nil otherwise. BUFFER defaults to the current buffer. */)
1131 (Lisp_Object buffer)
1132{
1133 struct buffer *buf = current_buffer;
1134 if (!NILP (buffer))
1135 {
1136 CHECK_BUFFER (buffer);
1137 buf = XBUFFER (buffer);
1138 }
1139
1140 return treesit_buf_tracks_linecol_p (buf) ? Qt : Qnil;
1141}
1142
1143DEFUN ("treesit-parser-tracking-line-column-p",
1144 Ftreesit_parser_tracking_line_column_p,
1145 Streesit_parser_tracking_line_column_p, 1, 1, 0,
1146 doc : /* Return non-nil if PARSER is tracking line and column.
1147
1148Return nil otherwise.*/)
1149 (Lisp_Object parser)
1150{
1151 CHECK_TS_PARSER (parser);
1152 return XTS_PARSER (parser)->visi_beg_linecol.bytepos == 0 ? Qnil : Qt;
1153}
1154
1155
867/*** Parsing functions */ 1156/*** Parsing functions */
868 1157
869static void 1158static void
@@ -879,34 +1168,147 @@ treesit_check_parser (Lisp_Object obj)
879 larger than UINT32_MAX. */ 1168 larger than UINT32_MAX. */
880static inline void 1169static inline void
881treesit_tree_edit_1 (TSTree *tree, ptrdiff_t start_byte, 1170treesit_tree_edit_1 (TSTree *tree, ptrdiff_t start_byte,
882 ptrdiff_t old_end_byte, ptrdiff_t new_end_byte) 1171 ptrdiff_t old_end_byte, ptrdiff_t new_end_byte,
1172 TSPoint start_point, TSPoint old_end_point,
1173 TSPoint new_end_point)
883{ 1174{
884 eassert (start_byte >= 0); 1175 eassert (start_byte >= 0);
885 eassert (start_byte <= old_end_byte); 1176 eassert (start_byte <= old_end_byte);
886 eassert (start_byte <= new_end_byte); 1177 eassert (start_byte <= new_end_byte);
887 TSPoint dummy_point = {0, 0};
888 eassert (start_byte <= UINT32_MAX); 1178 eassert (start_byte <= UINT32_MAX);
889 eassert (old_end_byte <= UINT32_MAX); 1179 eassert (old_end_byte <= UINT32_MAX);
890 eassert (new_end_byte <= UINT32_MAX); 1180 eassert (new_end_byte <= UINT32_MAX);
891 TSInputEdit edit = {(uint32_t) start_byte, 1181 TSInputEdit edit = {(uint32_t) start_byte,
892 (uint32_t) old_end_byte, 1182 (uint32_t) old_end_byte,
893 (uint32_t) new_end_byte, 1183 (uint32_t) new_end_byte,
894 dummy_point, dummy_point, dummy_point}; 1184 start_point, old_end_point, new_end_point};
895 ts_tree_edit (tree, &edit); 1185 ts_tree_edit (tree, &edit);
896} 1186}
897 1187
898/* Update each parser's tree after the user made an edit. This 1188/* Given a position at POS_LINECOL, and the linecol of a buffer change
899 function does not parse the buffer and only updates the tree, so it 1189 (START_LINECOL, OLD_END_LINECOL, and NEW_END_LINCOL), compute the new
900 should be very fast. */ 1190 linecol for that position, then scan from this now valid linecol to
901void 1191 TARGET_BYTEPOS and return the linecol at TARGET_BYTEPOS.
902treesit_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte, 1192
903 ptrdiff_t new_end_byte) 1193 When POS_LINECOL is outside of the range between START_LINECOL and
1194 OLD_END_LINECOL, we can calculate the change in line and column
1195 number of POS_LINECOL by simply counting how many newlines are
1196 removed/added in the change. Once we have the up-to-date line and
1197 column number at POS_LINECOL.bytepos, we can just scan to
1198 TARGET_BYTEPOS to get a linecol for it. The assumption is that
1199 TARGET_BYTEPOS is far from START_LINECOL, etc, but close to
1200 POS_LINECOL. So we avoids scanning longs distance from
1201 START_LINECOL, etc.
1202
1203 However, this optimization only works when POS_LINECOL is outside the
1204 range between START_LINECOL and OLD_END_LINECOL. If not, we've have
1205 to scan from START_LINECOL or NEW_END_LINECOL to TARGET_BYTEPOS. */
1206static struct ts_linecol
1207compute_new_linecol_by_change (struct ts_linecol pos_linecol,
1208 struct ts_linecol start_linecol,
1209 struct ts_linecol old_end_linecol,
1210 struct ts_linecol new_end_linecol,
1211 ptrdiff_t target_bytepos)
1212{
1213 struct ts_linecol new_linecol = { 0, 0, 0 };
1214
1215 /* 1. Even start is behind pos, pos isn't affected. */
1216 if (start_linecol.bytepos >= pos_linecol.bytepos)
1217 {
1218 new_linecol = pos_linecol;
1219 }
1220 /* 2. When old_end (oe) is before pos, the differnce between pos and
1221 pos' is the difference between old_end and new_end (ne).
1222
1223 | | | | | |
1224 s oe pos s oe pos
1225 OR
1226 | | | | |
1227 s ne pos' s ne pos'
1228
1229 */
1230 else if (old_end_linecol.bytepos <= pos_linecol.bytepos)
1231 {
1232 ptrdiff_t line_delta = new_end_linecol.line - old_end_linecol.line;
1233 new_linecol.line = pos_linecol.line + line_delta;
1234 new_linecol.bytepos
1235 = pos_linecol.bytepos + new_end_linecol.bytepos - old_end_linecol.bytepos;
1236
1237 /* Suppose # is text, | is cursor:
1238
1239 ################
1240 ########|########|
1241 oe pos
1242
1243 Now, if we insert something:
1244
1245 ################
1246 ########|OOOOO
1247 OOOOOOOOOO|########|
1248 ne pos'
1249
1250 Clearly, col for pos' is just the col of new_end plus the
1251 distance between old_end and pos. The same goes for deletion.
1252 */
1253 if (old_end_linecol.line == pos_linecol.line)
1254 {
1255 eassert (old_end_linecol.col <= pos_linecol.col);
1256 ptrdiff_t old_end_to_pos = pos_linecol.col - old_end_linecol.col;
1257 new_linecol.col = new_end_linecol.col + old_end_to_pos;
1258 }
1259 else
1260 {
1261 new_linecol.col = pos_linecol.col;
1262 }
1263 }
1264 /* 3. At this point, start < pos < old_end. We're kinda cooked, there
1265 aren't much we can do other than scan the buffer from new_end or
1266 start. */
1267 else if (target_bytepos - start_linecol.bytepos
1268 < eabs (target_bytepos - new_end_linecol.bytepos))
1269 {
1270 new_linecol = treesit_linecol_of_pos (target_bytepos, start_linecol);
1271 }
1272 else
1273 {
1274 new_linecol = treesit_linecol_of_pos (target_bytepos, new_end_linecol);
1275 }
1276
1277 /* Now new_linecol is a valid linecol, scan from it to target_bytepos. */
1278 if (new_linecol.bytepos != target_bytepos)
1279 {
1280 new_linecol = treesit_linecol_of_pos (target_bytepos, new_linecol);
1281 }
1282
1283 if (TREESIT_DEBUG_LINECOL)
1284 treesit_debug_validate_linecol (new_linecol);
1285
1286 return new_linecol;
1287}
1288
1289/* Update each parser's tree after the user made an edit. This function
1290 does not parse the buffer and only updates the tree, so it should be
1291 very fast. If the caller knows there's no parser in the current
1292 buffer, they can pass empty linecol for
1293 START/OLD_END/NEW_END_linecol.
1294
1295 If the current buffer doesn't track linecol, start_linecol,
1296 old_end_linecol, and new_end_linecol will be empty. In that case,
1297 don't process linecols. */
1298static void
1299treesit_record_change_1 (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
1300 ptrdiff_t new_end_byte,
1301 struct ts_linecol start_linecol,
1302 struct ts_linecol old_end_linecol,
1303 struct ts_linecol new_end_linecol)
904{ 1304{
905 struct buffer *base_buffer = current_buffer; 1305 struct buffer *base_buffer = current_buffer;
906 if (current_buffer->base_buffer) 1306 if (current_buffer->base_buffer)
907 base_buffer = current_buffer->base_buffer; 1307 base_buffer = current_buffer->base_buffer;
908 Lisp_Object parser_list = BVAR (base_buffer, ts_parser_list); 1308 Lisp_Object parser_list = BVAR (base_buffer, ts_parser_list);
909 1309
1310 bool buf_tracks_linecol = start_linecol.bytepos != 0;
1311
910 FOR_EACH_TAIL_SAFE (parser_list) 1312 FOR_EACH_TAIL_SAFE (parser_list)
911 { 1313 {
912 CHECK_CONS (parser_list); 1314 CHECK_CONS (parser_list);
@@ -916,16 +1318,22 @@ treesit_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
916 /* See comment (ref:visible-beg-null) if you wonder why we don't 1318 /* See comment (ref:visible-beg-null) if you wonder why we don't
917 update visible_beg/end when tree is NULL. */ 1319 update visible_beg/end when tree is NULL. */
918 1320
1321 bool parser_tracks_linecol
1322 = XTS_PARSER (lisp_parser)->visi_beg_linecol.bytepos != 0;
1323
919 if (tree != NULL) 1324 if (tree != NULL)
920 { 1325 {
921 eassert (start_byte <= old_end_byte); 1326 eassert (start_byte <= old_end_byte);
922 eassert (start_byte <= new_end_byte); 1327 eassert (start_byte <= new_end_byte);
923 /* Think the recorded change as a delete followed by an 1328 /* Before sending the edit to tree-sitter, we need to first
924 insert, and think of them as moving unchanged text back 1329 clip the beg/end to visible_beg and visible_end of the
925 and forth. After all, the whole point of updating the 1330 parser. A tip for understanding the code below: think the
926 tree is to update the position of unchanged text. */ 1331 recorded change as a delete followed by an insert, and
927 ptrdiff_t visible_beg = XTS_PARSER (lisp_parser)->visible_beg; 1332 think of them as moving unchanged text back and forth.
928 ptrdiff_t visible_end = XTS_PARSER (lisp_parser)->visible_end; 1333 After all, the whole point of updating the tree is to
1334 update the position of unchanged text. */
1335 const ptrdiff_t visible_beg = XTS_PARSER (lisp_parser)->visible_beg;
1336 const ptrdiff_t visible_end = XTS_PARSER (lisp_parser)->visible_end;
929 eassert (visible_beg >= 0); 1337 eassert (visible_beg >= 0);
930 eassert (visible_beg <= visible_end); 1338 eassert (visible_beg <= visible_end);
931 1339
@@ -949,10 +1357,6 @@ treesit_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
949 eassert (start_offset <= old_end_offset); 1357 eassert (start_offset <= old_end_offset);
950 eassert (start_offset <= new_end_offset); 1358 eassert (start_offset <= new_end_offset);
951 1359
952 treesit_tree_edit_1 (tree, start_offset, old_end_offset,
953 new_end_offset);
954 XTS_PARSER (lisp_parser)->need_reparse = true;
955
956 /* VISIBLE_BEG/END records tree-sitter's range of view in 1360 /* VISIBLE_BEG/END records tree-sitter's range of view in
957 the buffer. We need to adjust them when tree-sitter's 1361 the buffer. We need to adjust them when tree-sitter's
958 view changes. */ 1362 view changes. */
@@ -966,19 +1370,133 @@ treesit_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
966 visi_beg_delta = (old_end_byte < visible_beg 1370 visi_beg_delta = (old_end_byte < visible_beg
967 ? new_end_byte - old_end_byte : 0); 1371 ? new_end_byte - old_end_byte : 0);
968 1372
969 XTS_PARSER (lisp_parser)->visible_beg = visible_beg + visi_beg_delta; 1373 const ptrdiff_t new_visible_beg = visible_beg + visi_beg_delta;
970 XTS_PARSER (lisp_parser)->visible_end = (visible_end 1374 const ptrdiff_t new_visible_end
971 + visi_beg_delta 1375 = (visible_end + visi_beg_delta
972 + (new_end_offset 1376 + (new_end_offset - old_end_offset));
973 - old_end_offset)); 1377
1378 XTS_PARSER (lisp_parser)->visible_beg = new_visible_beg;
1379 XTS_PARSER (lisp_parser)->visible_end = new_visible_end;
1380
1381 eassert (BEG_BYTE <= new_visible_beg);
1382 eassert (new_visible_beg <= new_visible_end);
1383 eassert (new_visible_end <= Z_BYTE);
1384
1385 /* (Optionally) calculate the point for start/old_end/new_end
1386 to be sent to tree-sitter. Also update parser cache for
1387 linecol. */
1388 TSPoint start_point = TREESIT_TS_POINT_1_0;
1389 TSPoint old_end_point = TREESIT_TS_POINT_1_0;
1390 TSPoint new_end_point = TREESIT_TS_POINT_1_0;
1391 if (parser_tracks_linecol)
1392 {
1393 eassert (buf_tracks_linecol);
1394 struct ts_linecol old_visi_beg_linecol
1395 = XTS_PARSER (lisp_parser)->visi_beg_linecol;
1396 struct ts_linecol old_visi_end_linecol
1397 = XTS_PARSER (lisp_parser)->visi_end_linecol;
1398
1399 const struct ts_linecol new_visi_beg_linecol
1400 = compute_new_linecol_by_change (old_visi_beg_linecol,
1401 start_linecol,
1402 old_end_linecol,
1403 new_end_linecol,
1404 new_visible_beg);
1405 const struct ts_linecol new_visi_end_linecol
1406 = compute_new_linecol_by_change (old_visi_end_linecol,
1407 start_linecol,
1408 old_end_linecol,
1409 new_end_linecol,
1410 new_visible_end);
1411 XTS_PARSER (lisp_parser)->visi_beg_linecol
1412 = new_visi_beg_linecol;
1413 XTS_PARSER (lisp_parser)->visi_end_linecol
1414 = new_visi_end_linecol;
1415
1416 /* Now, calculate TSPoints and finally update the tree. */
1417 struct ts_linecol new_begv_linecol
1418 = XTS_PARSER (lisp_parser)->visi_beg_linecol;
1419 old_end_point = treesit_make_ts_point (old_visi_beg_linecol,
1420 old_end_linecol);
1421 start_point = treesit_make_ts_point (new_begv_linecol,
1422 start_linecol);
1423 new_end_point = treesit_make_ts_point (new_begv_linecol,
1424 new_end_linecol);
1425 }
974 1426
975 eassert (XTS_PARSER (lisp_parser)->visible_beg >= 0); 1427 treesit_tree_edit_1 (tree, start_offset, old_end_offset,
976 eassert (XTS_PARSER (lisp_parser)->visible_beg 1428 new_end_offset, start_point, old_end_point,
977 <= XTS_PARSER (lisp_parser)->visible_end); 1429 new_end_point);
1430 XTS_PARSER (lisp_parser)->need_reparse = true;
978 } 1431 }
979 } 1432 }
980} 1433}
981 1434
1435/* Return the linecol of POS, calculated from CACHE. But if there's no
1436 parser in the current buffer, or line-column tracking is disabled,
1437 skip calculation and return an empty linecol instead. */
1438struct ts_linecol
1439treesit_linecol_maybe (ptrdiff_t pos, ptrdiff_t pos_byte,
1440 struct ts_linecol cache)
1441{
1442 if (NILP (BVAR (current_buffer, ts_parser_list))
1443 || !treesit_buf_tracks_linecol_p (current_buffer))
1444 return TREESIT_EMPTY_LINECOL;
1445
1446 return treesit_linecol_of_pos (pos_byte, cache);
1447}
1448
1449/* Update each parser's tree after the user made an edit. This function
1450 does not parse the buffer and only updates the tree, so it should be
1451 very fast.
1452
1453 This is a wrapper over treesit_record_change that does a bit more
1454 boilerplate work: it (optionally) calculates linecol for new_end,
1455 pass all the positions into treesit_record_change_1 which does the
1456 real work, and finally (optionally) sets buffer's linecol cache to
1457 new_end's linecol.
1458
1459 If NEW_END is next to NEW_END_BYTE in the arglist, caller might
1460 accidentally swap them, so I placed NEW_END at the end of the
1461 arglist.
1462
1463 If the current buffer doesn't track linecol, start_linecol and
1464 old_end_linecol will be empty. In that case, don't process
1465 linecols. */
1466void
1467treesit_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
1468 ptrdiff_t new_end_byte,
1469 struct ts_linecol start_linecol,
1470 struct ts_linecol old_end_linecol,
1471 ptrdiff_t new_end)
1472{
1473 struct ts_linecol new_end_linecol
1474 = treesit_linecol_maybe (new_end, new_end_byte, start_linecol);
1475
1476 treesit_record_change_1 (start_byte, old_end_byte, new_end_byte,
1477 start_linecol, old_end_linecol, new_end_linecol);
1478
1479 if (new_end_linecol.bytepos != 0)
1480 {
1481 const struct ts_linecol new_begv_linecol
1482 = compute_new_linecol_by_change (BUF_TS_LINECOL_BEGV (current_buffer),
1483 start_linecol,
1484 old_end_linecol,
1485 new_end_linecol,
1486 BEGV_BYTE);
1487 const struct ts_linecol new_zv_linecol
1488 = compute_new_linecol_by_change (BUF_TS_LINECOL_ZV (current_buffer),
1489 start_linecol,
1490 old_end_linecol,
1491 new_end_linecol,
1492 ZV_BYTE);
1493
1494 SET_BUF_TS_LINECOL_BEGV (current_buffer, new_begv_linecol);
1495 SET_BUF_TS_LINECOL_POINT (current_buffer, new_end_linecol);
1496 SET_BUF_TS_LINECOL_ZV (current_buffer, new_zv_linecol);
1497 }
1498}
1499
982static TSRange *treesit_make_ts_ranges (Lisp_Object, Lisp_Object, 1500static TSRange *treesit_make_ts_ranges (Lisp_Object, Lisp_Object,
983 uint32_t *); 1501 uint32_t *);
984 1502
@@ -1034,6 +1552,7 @@ treesit_sync_visible_region (Lisp_Object parser)
1034{ 1552{
1035 TSTree *tree = XTS_PARSER (parser)->tree; 1553 TSTree *tree = XTS_PARSER (parser)->tree;
1036 struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); 1554 struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
1555 const bool track_linecol = treesit_buf_tracks_linecol_p (buffer);
1037 1556
1038 /* If we are setting visible_beg/end for the first time, we can skip 1557 /* If we are setting visible_beg/end for the first time, we can skip
1039 the offset acrobatics and updating the tree below. */ 1558 the offset acrobatics and updating the tree below. */
@@ -1046,6 +1565,7 @@ treesit_sync_visible_region (Lisp_Object parser)
1046 1565
1047 ptrdiff_t visible_beg = XTS_PARSER (parser)->visible_beg; 1566 ptrdiff_t visible_beg = XTS_PARSER (parser)->visible_beg;
1048 ptrdiff_t visible_end = XTS_PARSER (parser)->visible_end; 1567 ptrdiff_t visible_end = XTS_PARSER (parser)->visible_end;
1568
1049 eassert (0 <= visible_beg); 1569 eassert (0 <= visible_beg);
1050 eassert (visible_beg <= visible_end); 1570 eassert (visible_beg <= visible_end);
1051 1571
@@ -1066,39 +1586,81 @@ treesit_sync_visible_region (Lisp_Object parser)
1066 from ________|xxxx|__ 1586 from ________|xxxx|__
1067 to |xxxx|__________ */ 1587 to |xxxx|__________ */
1068 1588
1589 struct ts_linecol visi_beg_linecol = track_linecol
1590 ? XTS_PARSER (parser)->visi_beg_linecol : TREESIT_EMPTY_LINECOL;
1591 struct ts_linecol visi_end_linecol = track_linecol
1592 ? XTS_PARSER (parser)->visi_end_linecol : TREESIT_EMPTY_LINECOL;
1593
1594 struct ts_linecol buffer_begv_linecol = track_linecol
1595 ? treesit_linecol_of_pos (BUF_BEGV_BYTE (buffer), BUF_TS_LINECOL_BEGV (buffer))
1596 : TREESIT_EMPTY_LINECOL;
1597 struct ts_linecol buffer_zv_linecol = track_linecol
1598 ? treesit_linecol_of_pos (BUF_ZV_BYTE (buffer), BUF_TS_LINECOL_ZV (buffer))
1599 : TREESIT_EMPTY_LINECOL;
1600
1601 if (track_linecol) eassert (visi_beg_linecol.bytepos == visible_beg);
1602
1069 /* 1. Make sure visible_beg <= BUF_BEGV_BYTE. */ 1603 /* 1. Make sure visible_beg <= BUF_BEGV_BYTE. */
1070 if (visible_beg > BUF_BEGV_BYTE (buffer)) 1604 if (visible_beg > BUF_BEGV_BYTE (buffer))
1071 { 1605 {
1606 TSPoint point_new_end = track_linecol
1607 ? treesit_make_ts_point (buffer_begv_linecol, visi_beg_linecol)
1608 : TREESIT_TS_POINT_1_0;
1072 /* Tree-sitter sees: insert at the beginning. */ 1609 /* Tree-sitter sees: insert at the beginning. */
1073 treesit_tree_edit_1 (tree, 0, 0, visible_beg - BUF_BEGV_BYTE (buffer)); 1610 treesit_tree_edit_1 (tree, 0, 0, visible_beg - BUF_BEGV_BYTE (buffer),
1611 TREESIT_TS_POINT_1_0, TREESIT_TS_POINT_1_0,
1612 point_new_end);
1074 visible_beg = BUF_BEGV_BYTE (buffer); 1613 visible_beg = BUF_BEGV_BYTE (buffer);
1614 visi_beg_linecol = buffer_begv_linecol;
1075 eassert (visible_beg <= visible_end); 1615 eassert (visible_beg <= visible_end);
1076 } 1616 }
1077 /* 2. Make sure visible_end = BUF_ZV_BYTE. */ 1617 /* 2. Make sure visible_end = BUF_ZV_BYTE. */
1078 if (visible_end < BUF_ZV_BYTE (buffer)) 1618 if (visible_end < BUF_ZV_BYTE (buffer))
1079 { 1619 {
1620 TSPoint point_start = track_linecol
1621 ? treesit_make_ts_point (visi_beg_linecol, visi_end_linecol)
1622 : TREESIT_TS_POINT_1_0;
1623 TSPoint point_new_end = track_linecol
1624 ? treesit_make_ts_point (visi_beg_linecol, buffer_zv_linecol)
1625 : TREESIT_TS_POINT_1_0;
1080 /* Tree-sitter sees: insert at the end. */ 1626 /* Tree-sitter sees: insert at the end. */
1081 treesit_tree_edit_1 (tree, visible_end - visible_beg, 1627 treesit_tree_edit_1 (tree, visible_end - visible_beg,
1082 visible_end - visible_beg, 1628 visible_end - visible_beg,
1083 BUF_ZV_BYTE (buffer) - visible_beg); 1629 BUF_ZV_BYTE (buffer) - visible_beg,
1630 point_start, point_start, point_new_end);
1084 visible_end = BUF_ZV_BYTE (buffer); 1631 visible_end = BUF_ZV_BYTE (buffer);
1632 visi_end_linecol = buffer_zv_linecol;
1085 eassert (visible_beg <= visible_end); 1633 eassert (visible_beg <= visible_end);
1086 } 1634 }
1087 else if (visible_end > BUF_ZV_BYTE (buffer)) 1635 else if (visible_end > BUF_ZV_BYTE (buffer))
1088 { 1636 {
1637 TSPoint point_start = track_linecol
1638 ? treesit_make_ts_point (visi_beg_linecol, buffer_zv_linecol)
1639 : TREESIT_TS_POINT_1_0;
1640 TSPoint point_old_end = track_linecol
1641 ? treesit_make_ts_point (visi_beg_linecol, visi_end_linecol)
1642 : TREESIT_TS_POINT_1_0;
1089 /* Tree-sitter sees: delete at the end. */ 1643 /* Tree-sitter sees: delete at the end. */
1090 treesit_tree_edit_1 (tree, BUF_ZV_BYTE (buffer) - visible_beg, 1644 treesit_tree_edit_1 (tree, BUF_ZV_BYTE (buffer) - visible_beg,
1091 visible_end - visible_beg, 1645 visible_end - visible_beg,
1092 BUF_ZV_BYTE (buffer) - visible_beg); 1646 BUF_ZV_BYTE (buffer) - visible_beg,
1647 point_start, point_old_end, point_start);
1093 visible_end = BUF_ZV_BYTE (buffer); 1648 visible_end = BUF_ZV_BYTE (buffer);
1649 visi_end_linecol = buffer_zv_linecol;
1094 eassert (visible_beg <= visible_end); 1650 eassert (visible_beg <= visible_end);
1095 } 1651 }
1096 /* 3. Make sure visible_beg = BUF_BEGV_BYTE. */ 1652 /* 3. Make sure visible_beg = BUF_BEGV_BYTE. */
1097 if (visible_beg < BUF_BEGV_BYTE (buffer)) 1653 if (visible_beg < BUF_BEGV_BYTE (buffer))
1098 { 1654 {
1655 TSPoint point_old_end = track_linecol
1656 ? treesit_make_ts_point (visi_beg_linecol, buffer_begv_linecol)
1657 : TREESIT_TS_POINT_1_0;
1099 /* Tree-sitter sees: delete at the beginning. */ 1658 /* Tree-sitter sees: delete at the beginning. */
1100 treesit_tree_edit_1 (tree, 0, BUF_BEGV_BYTE (buffer) - visible_beg, 0); 1659 treesit_tree_edit_1 (tree, 0, BUF_BEGV_BYTE (buffer) - visible_beg, 0,
1660 TREESIT_TS_POINT_1_0, point_old_end,
1661 TREESIT_TS_POINT_1_0);
1101 visible_beg = BUF_BEGV_BYTE (buffer); 1662 visible_beg = BUF_BEGV_BYTE (buffer);
1663 visi_beg_linecol = buffer_begv_linecol;
1102 eassert (visible_beg <= visible_end); 1664 eassert (visible_beg <= visible_end);
1103 } 1665 }
1104 eassert (0 <= visible_beg); 1666 eassert (0 <= visible_beg);
@@ -1108,6 +1670,14 @@ treesit_sync_visible_region (Lisp_Object parser)
1108 1670
1109 XTS_PARSER (parser)->visible_beg = visible_beg; 1671 XTS_PARSER (parser)->visible_beg = visible_beg;
1110 XTS_PARSER (parser)->visible_end = visible_end; 1672 XTS_PARSER (parser)->visible_end = visible_end;
1673 XTS_PARSER (parser)->visi_beg_linecol = visi_beg_linecol;
1674 XTS_PARSER (parser)->visi_end_linecol = visi_end_linecol;
1675
1676 if (track_linecol)
1677 {
1678 eassert (visi_beg_linecol.bytepos == visible_beg);
1679 eassert (visi_end_linecol.bytepos == visible_end);
1680 }
1111 1681
1112 /* Fix ranges so that the ranges stays with in visible_end. Here we 1682 /* Fix ranges so that the ranges stays with in visible_end. Here we
1113 try to do minimal work so that the ranges is minimally correct and 1683 try to do minimal work so that the ranges is minimally correct and
@@ -1356,7 +1926,7 @@ treesit_read_buffer (void *parser, uint32_t byte_index,
1356Lisp_Object 1926Lisp_Object
1357make_treesit_parser (Lisp_Object buffer, TSParser *parser, 1927make_treesit_parser (Lisp_Object buffer, TSParser *parser,
1358 TSTree *tree, Lisp_Object language_symbol, 1928 TSTree *tree, Lisp_Object language_symbol,
1359 Lisp_Object tag) 1929 Lisp_Object tag, bool tracks_linecol)
1360{ 1930{
1361 struct Lisp_TS_Parser *lisp_parser; 1931 struct Lisp_TS_Parser *lisp_parser;
1362 1932
@@ -1381,6 +1951,27 @@ make_treesit_parser (Lisp_Object buffer, TSParser *parser,
1381 lisp_parser->need_to_gc_buffer = false; 1951 lisp_parser->need_to_gc_buffer = false;
1382 lisp_parser->within_reparse = false; 1952 lisp_parser->within_reparse = false;
1383 eassert (lisp_parser->visible_beg <= lisp_parser->visible_end); 1953 eassert (lisp_parser->visible_beg <= lisp_parser->visible_end);
1954
1955 if (tracks_linecol)
1956 {
1957 struct buffer *old_buf = current_buffer;
1958 set_buffer_internal (XBUFFER (buffer));
1959
1960 /* treesit_linecol_of_pos doesn't signal, so no need to
1961 unwind-protect. */
1962 lisp_parser->visi_beg_linecol
1963 = treesit_linecol_of_pos (BEGV_BYTE, TREESIT_BOB_LINECOL);
1964 lisp_parser->visi_end_linecol
1965 = treesit_linecol_of_pos (ZV_BYTE, lisp_parser->visi_beg_linecol);
1966
1967 set_buffer_internal (old_buf);
1968 }
1969 else
1970 {
1971 lisp_parser->visi_beg_linecol = TREESIT_EMPTY_LINECOL;
1972 lisp_parser->visi_end_linecol = TREESIT_EMPTY_LINECOL;
1973 }
1974
1384 return make_lisp_ptr (lisp_parser, Lisp_Vectorlike); 1975 return make_lisp_ptr (lisp_parser, Lisp_Vectorlike);
1385} 1976}
1386 1977
@@ -1698,13 +2289,28 @@ an indirect buffer. */)
1698 always succeed. */ 2289 always succeed. */
1699 ts_parser_set_language (parser, lang); 2290 ts_parser_set_language (parser, lang);
1700 2291
2292 const bool lang_need_linecol_tracking
2293 = !NILP (Fmemq (remapped_lang,
2294 Vtreesit_languages_require_line_column_tracking));
2295
1701 /* Create parser. Use the unmapped LANGUAGE symbol, so the nodes 2296 /* Create parser. Use the unmapped LANGUAGE symbol, so the nodes
1702 created by this parser (and the parser itself) identify themselves 2297 created by this parser (and the parser itself) identify themselves
1703 as the unmapped language. This makes the grammar mapping 2298 as the unmapped language. This makes the grammar mapping
1704 completely transparent. */ 2299 completely transparent. */
1705 Lisp_Object lisp_parser = make_treesit_parser (buf_orig, 2300 Lisp_Object lisp_parser = make_treesit_parser (buf_orig,
1706 parser, NULL, 2301 parser, NULL,
1707 language, tag); 2302 language, tag,
2303 lang_need_linecol_tracking);
2304
2305 /* Enable line-column tracking if this language requires it. */
2306 if (lang_need_linecol_tracking && !treesit_buf_tracks_linecol_p (buf))
2307 {
2308 /* We can use TREESIT_BOB_LINECOL for begv and zv since these
2309 cache doesn't need to be always in sync with BEGV and ZV. */
2310 SET_BUF_TS_LINECOL_BEGV (buf, TREESIT_BOB_LINECOL);
2311 SET_BUF_TS_LINECOL_POINT (buf, TREESIT_BOB_LINECOL);
2312 SET_BUF_TS_LINECOL_ZV (buf, TREESIT_BOB_LINECOL);
2313 }
1708 2314
1709 /* Update parser-list. */ 2315 /* Update parser-list. */
1710 BVAR (buf, ts_parser_list) = Fcons (lisp_parser, BVAR (buf, ts_parser_list)); 2316 BVAR (buf, ts_parser_list) = Fcons (lisp_parser, BVAR (buf, ts_parser_list));
@@ -4376,6 +4982,65 @@ nodes in the subtree, including NODE. */)
4376 } 4982 }
4377} 4983}
4378 4984
4985DEFUN ("treesit--linecol-at", Ftreesit__linecol_at,
4986 Streesit__linecol_at, 1, 1, 0,
4987 doc: /* Test buffer-local linecol cache.
4988
4989Calculate the line and column at POS using the buffer-local cache,
4990return the line and column in the form of
4991
4992 (LINE . COL)
4993
4994This is used for internal testing and debugging ONLY. */)
4995 (Lisp_Object pos)
4996{
4997 CHECK_NUMBER (pos);
4998 struct ts_linecol pos_linecol
4999 = treesit_linecol_of_pos (CHAR_TO_BYTE (XFIXNUM (pos)),
5000 BUF_TS_LINECOL_POINT (current_buffer));
5001 return Fcons (make_fixnum (pos_linecol.line), make_fixnum (pos_linecol.col));
5002}
5003
5004DEFUN ("treesit--linecol-cache-set", Ftreesit__linecol_cache_set,
5005 Streesit__linecol_cache_set, 3, 3, 0,
5006 doc: /* Set the linecol cache for the current buffer.
5007
5008This is used for internal testing and debugging ONLY. */)
5009 (Lisp_Object line, Lisp_Object col, Lisp_Object bytepos)
5010{
5011 CHECK_FIXNUM (line);
5012 CHECK_FIXNUM (col);
5013 CHECK_FIXNUM (bytepos);
5014
5015 struct ts_linecol linecol;
5016 linecol.line = XFIXNUM (line);
5017 linecol.col = XFIXNUM (col);
5018 linecol.bytepos = XFIXNUM (bytepos);
5019
5020 SET_BUF_TS_LINECOL_POINT (current_buffer, linecol);
5021
5022 return Qnil;
5023}
5024
5025DEFUN ("treesit--linecol-cache", Ftreesit__linecol_cache,
5026 Streesit__linecol_cache, 0, 0, 0,
5027 doc: /* Return the buffer-local linecol cache for debugging.
5028
5029Return a plist (:line LINE :col COL :pos POS :bytepos BYTEPOS). This is
5030used for internal testing and debugging ONLY. */)
5031 (void)
5032{
5033 struct ts_linecol cache = BUF_TS_LINECOL_POINT (current_buffer);
5034
5035 Lisp_Object plist = (list4 (QCcol, make_fixnum (cache.col),
5036 QCbytepos, make_fixnum (cache.bytepos)));
5037 plist = Fcons (make_fixnum (cache.line), plist);
5038 plist = Fcons (QCline, plist);
5039
5040 return plist;
5041}
5042
5043
4379#endif /* HAVE_TREE_SITTER */ 5044#endif /* HAVE_TREE_SITTER */
4380 5045
4381DEFUN ("treesit-available-p", Ftreesit_available_p, 5046DEFUN ("treesit-available-p", Ftreesit_available_p,
@@ -4418,6 +5083,11 @@ syms_of_treesit (void)
4418 DEFSYM (QCequal, ":equal"); 5083 DEFSYM (QCequal, ":equal");
4419 DEFSYM (QCmatch, ":match"); 5084 DEFSYM (QCmatch, ":match");
4420 DEFSYM (QCpred, ":pred"); 5085 DEFSYM (QCpred, ":pred");
5086 DEFSYM (QCline, ":line");
5087 DEFSYM (QCcol, ":col");
5088 DEFSYM (QCpos, ":pos");
5089 DEFSYM (QCbytepos, ":bytepos");
5090
4421 5091
4422 DEFSYM (Qnot_found, "not-found"); 5092 DEFSYM (Qnot_found, "not-found");
4423 DEFSYM (Qsymbol_error, "symbol-error"); 5093 DEFSYM (Qsymbol_error, "symbol-error");
@@ -4552,6 +5222,17 @@ applies to LANGUAGE-A will be redirected to LANGUAGE-B instead. */);
4552 DEFSYM (Qtreesit_language_remap_alist, "treesit-language-remap-alist"); 5222 DEFSYM (Qtreesit_language_remap_alist, "treesit-language-remap-alist");
4553 Fmake_variable_buffer_local (Qtreesit_language_remap_alist); 5223 Fmake_variable_buffer_local (Qtreesit_language_remap_alist);
4554 5224
5225 DEFVAR_LISP ("treesit-languages-require-line-column-tracking",
5226 Vtreesit_languages_require_line_column_tracking,
5227 doc:
5228 /* A list of languages that need line-column tracking.
5229
5230Most tree-sitter language grammars don't require line and column
5231tracking to work, but some languages do. When creating a parser, if the
5232language is in this list, Emacs enables line-column tracking for the
5233buffer. */);
5234 Vtreesit_languages_require_line_column_tracking = Qnil;
5235
4555 staticpro (&Vtreesit_str_libtree_sitter); 5236 staticpro (&Vtreesit_str_libtree_sitter);
4556 Vtreesit_str_libtree_sitter = build_string ("libtree-sitter-"); 5237 Vtreesit_str_libtree_sitter = build_string ("libtree-sitter-");
4557 staticpro (&Vtreesit_str_tree_sitter); 5238 staticpro (&Vtreesit_str_tree_sitter);
@@ -4596,6 +5277,9 @@ applies to LANGUAGE-A will be redirected to LANGUAGE-B instead. */);
4596 defsubr (&Streesit_language_abi_version); 5277 defsubr (&Streesit_language_abi_version);
4597 defsubr (&Streesit_grammar_location); 5278 defsubr (&Streesit_grammar_location);
4598 5279
5280 defsubr (&Streesit_parser_tracking_line_column_p);
5281 defsubr (&Streesit_tracking_line_column_p);
5282
4599 defsubr (&Streesit_parser_p); 5283 defsubr (&Streesit_parser_p);
4600 defsubr (&Streesit_node_p); 5284 defsubr (&Streesit_node_p);
4601 defsubr (&Streesit_compiled_query_p); 5285 defsubr (&Streesit_compiled_query_p);
@@ -4649,6 +5333,10 @@ applies to LANGUAGE-A will be redirected to LANGUAGE-B instead. */);
4649 defsubr (&Streesit_induce_sparse_tree); 5333 defsubr (&Streesit_induce_sparse_tree);
4650 defsubr (&Streesit_node_match_p); 5334 defsubr (&Streesit_node_match_p);
4651 defsubr (&Streesit_subtree_stat); 5335 defsubr (&Streesit_subtree_stat);
5336
5337 defsubr (&Streesit__linecol_at);
5338 defsubr (&Streesit__linecol_cache);
5339 defsubr (&Streesit__linecol_cache_set);
4652#endif /* HAVE_TREE_SITTER */ 5340#endif /* HAVE_TREE_SITTER */
4653 defsubr (&Streesit_available_p); 5341 defsubr (&Streesit_available_p);
4654#ifdef WINDOWSNT 5342#ifdef WINDOWSNT