aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorYuan Fu2024-09-08 17:50:52 -0700
committerYuan Fu2024-09-08 20:52:20 -0700
commit3435464452b4947098b8ccbe93e5c0afdd2ed06e (patch)
tree63bb296af999b1cdee76d2ba60f2b383464b7f3e /src
parent3fcec09f754af9822339eff3ea15d43eb7d19014 (diff)
downloademacs-3435464452b4947098b8ccbe93e5c0afdd2ed06e.tar.gz
emacs-3435464452b4947098b8ccbe93e5c0afdd2ed06e.zip
Fix the range handling in treesit.c
1. In treesit_sync_visible_region, reduce the ranges for a parser so it doesn't go beyond the visible range. 2. To avoid possible infinite recursion, add a within_reparse field to parsers. Previously we were using the need_reparse field to avoid infinite recursion, but lisp programs in a parser's after change hook might make some buffer edit which turns need_reparse to true. To avoid that, we now use an explicit field. If a parser's after change function makes a buffer edit, lisp program ends up with a desynced parse tree, but that's better than possible infinite recursion. Also after change function shouldn't edit the buffer. 3. In treesit_make_ranges, use parser's visible_beg instead of buffer's BEGV. I mean technically whenever we make ranges, buffer's BEGV should be equal to parser's visible_beg, but better not take that uncertainty, also makes the code more readable. 4. In Ftreesit_parser_included_ranges, move visible region sync code before the body of the function. * src/treesit.c (treesit_sync_visible_region): Minimally fix ranges so it doesn't exceed parser's visible range. (treesit_call_after_change_functions): Update calling sigature to treesit_make_ranges. (treesit_ensure_parsed, make_treesit_parser): Use the new field within_reparse. (treesit_make_ranges): Use parser's visible_beg instead of buffer's BEGV. (Ftreesit_parser_included_ranges): Move visible region check before function body. * src/treesit.h (Lisp_TS_Parser): Add new field within_reparse.
Diffstat (limited to 'src')
-rw-r--r--src/treesit.c92
-rw-r--r--src/treesit.h13
2 files changed, 77 insertions, 28 deletions
diff --git a/src/treesit.c b/src/treesit.c
index 351bd65819a..970754f3c1b 100644
--- a/src/treesit.c
+++ b/src/treesit.c
@@ -1045,6 +1045,48 @@ treesit_sync_visible_region (Lisp_Object parser)
1045 1045
1046 XTS_PARSER (parser)->visible_beg = visible_beg; 1046 XTS_PARSER (parser)->visible_beg = visible_beg;
1047 XTS_PARSER (parser)->visible_end = visible_end; 1047 XTS_PARSER (parser)->visible_end = visible_end;
1048
1049 /* Fix ranges so that the ranges stays with in visible_end. Here we
1050 try to do minimal work so that the ranges is minimally correct such
1051 that there's no OOB error. Usually treesit-update-ranges should
1052 update the parser with actually correct ranges. */
1053 if (NILP (XTS_PARSER (parser)->last_set_ranges)) return;
1054 uint32_t len;
1055 const TSRange *ranges
1056 = ts_parser_included_ranges (XTS_PARSER (parser)->parser, &len);
1057 /* We might need to discard some ranges that exceeds visible_end, in
1058 that case, new_len is the length of the new ranges array (which
1059 will be shorter than len). */
1060 uint32_t new_len = 0;
1061 uint32_t new_end = 0;
1062 for (int idx = 0; idx < len; idx++)
1063 {
1064 TSRange range = ranges[idx];
1065 /* If this range starts after visible_end, we don't include this
1066 range and the ranges after it in the new ranges. */
1067 if (range.start_byte + visible_beg >= visible_end)
1068 break;
1069 /* If this range's end is after visible_end, we don't include any
1070 ranges after it, and changes the end of this range to
1071 visible_end. */
1072 if (range.end_byte + visible_beg > visible_end)
1073 {
1074 new_end = visible_end - visible_beg;
1075 new_len++;
1076 break;
1077 }
1078 new_len++;
1079 }
1080 if (new_len != len || new_end != 0)
1081 {
1082 TSRange *new_ranges = xmalloc (sizeof (TSRange) * new_len);
1083 memcpy (new_ranges, ranges, sizeof (TSRange) * new_len);
1084 new_ranges[new_len - 1].end_byte = new_end;
1085 /* TODO: What should we do if this fails? */
1086 ts_parser_set_included_ranges (XTS_PARSER (parser)->parser,
1087 new_ranges, new_len);
1088 xfree (new_ranges);
1089 }
1048} 1090}
1049 1091
1050static void 1092static void
@@ -1057,7 +1099,8 @@ treesit_check_buffer_size (struct buffer *buffer)
1057 make_fixnum (buffer_size_bytes)); 1099 make_fixnum (buffer_size_bytes));
1058} 1100}
1059 1101
1060static Lisp_Object treesit_make_ranges (const TSRange *, uint32_t, struct buffer *); 1102static Lisp_Object treesit_make_ranges (const TSRange *, uint32_t,
1103 Lisp_Object, struct buffer *);
1061 1104
1062static void 1105static void
1063treesit_call_after_change_functions (TSTree *old_tree, TSTree *new_tree, 1106treesit_call_after_change_functions (TSTree *old_tree, TSTree *new_tree,
@@ -1071,7 +1114,7 @@ treesit_call_after_change_functions (TSTree *old_tree, TSTree *new_tree,
1071 { 1114 {
1072 uint32_t len; 1115 uint32_t len;
1073 TSRange *ranges = ts_tree_get_changed_ranges (old_tree, new_tree, &len); 1116 TSRange *ranges = ts_tree_get_changed_ranges (old_tree, new_tree, &len);
1074 lisp_ranges = treesit_make_ranges (ranges, len, buf); 1117 lisp_ranges = treesit_make_ranges (ranges, len, parser, buf);
1075 xfree (ranges); 1118 xfree (ranges);
1076 } 1119 }
1077 else 1120 else
@@ -1098,6 +1141,9 @@ treesit_call_after_change_functions (TSTree *old_tree, TSTree *new_tree,
1098static void 1141static void
1099treesit_ensure_parsed (Lisp_Object parser) 1142treesit_ensure_parsed (Lisp_Object parser)
1100{ 1143{
1144 if (XTS_PARSER (parser)->within_reparse) return;
1145 XTS_PARSER (parser)->within_reparse = true;
1146
1101 struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); 1147 struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
1102 1148
1103 /* Before we parse, catch up with the narrowing situation. */ 1149 /* Before we parse, catch up with the narrowing situation. */
@@ -1106,10 +1152,11 @@ treesit_ensure_parsed (Lisp_Object parser)
1106 because it might set the flag to true. */ 1152 because it might set the flag to true. */
1107 treesit_sync_visible_region (parser); 1153 treesit_sync_visible_region (parser);
1108 1154
1109 /* Make sure this comes before everything else, see comment
1110 (ref:notifier-inside-ensure-parsed) for more detail. */
1111 if (!XTS_PARSER (parser)->need_reparse) 1155 if (!XTS_PARSER (parser)->need_reparse)
1112 return; 1156 {
1157 XTS_PARSER (parser)->within_reparse = false;
1158 return;
1159 }
1113 1160
1114 TSParser *treesit_parser = XTS_PARSER (parser)->parser; 1161 TSParser *treesit_parser = XTS_PARSER (parser)->parser;
1115 TSTree *tree = XTS_PARSER (parser)->tree; 1162 TSTree *tree = XTS_PARSER (parser)->tree;
@@ -1134,14 +1181,10 @@ treesit_ensure_parsed (Lisp_Object parser)
1134 XTS_PARSER (parser)->need_reparse = false; 1181 XTS_PARSER (parser)->need_reparse = false;
1135 XTS_PARSER (parser)->timestamp++; 1182 XTS_PARSER (parser)->timestamp++;
1136 1183
1137 /* After-change functions should run at the very end, most crucially
1138 after need_reparse is set to false, this way if the function
1139 calls some tree-sitter function which invokes
1140 treesit_ensure_parsed again, it returns early and do not
1141 recursively call the after change functions again.
1142 (ref:notifier-inside-ensure-parsed) */
1143 treesit_call_after_change_functions (tree, new_tree, parser); 1184 treesit_call_after_change_functions (tree, new_tree, parser);
1144 ts_tree_delete (tree); 1185 ts_tree_delete (tree);
1186
1187 XTS_PARSER (parser)->within_reparse = false;
1145} 1188}
1146 1189
1147/* This is the read function provided to tree-sitter to read from a 1190/* This is the read function provided to tree-sitter to read from a
@@ -1224,6 +1267,7 @@ make_treesit_parser (Lisp_Object buffer, TSParser *parser,
1224 lisp_parser->visible_end = BUF_ZV_BYTE (XBUFFER (buffer)); 1267 lisp_parser->visible_end = BUF_ZV_BYTE (XBUFFER (buffer));
1225 lisp_parser->timestamp = 0; 1268 lisp_parser->timestamp = 0;
1226 lisp_parser->deleted = false; 1269 lisp_parser->deleted = false;
1270 lisp_parser->within_reparse = false;
1227 eassert (lisp_parser->visible_beg <= lisp_parser->visible_end); 1271 eassert (lisp_parser->visible_beg <= lisp_parser->visible_end);
1228 return make_lisp_ptr (lisp_parser, Lisp_Vectorlike); 1272 return make_lisp_ptr (lisp_parser, Lisp_Vectorlike);
1229} 1273}
@@ -1672,14 +1716,14 @@ treesit_check_range_argument (Lisp_Object ranges)
1672 convert between tree-sitter buffer offset and buffer position. */ 1716 convert between tree-sitter buffer offset and buffer position. */
1673static Lisp_Object 1717static Lisp_Object
1674treesit_make_ranges (const TSRange *ranges, uint32_t len, 1718treesit_make_ranges (const TSRange *ranges, uint32_t len,
1675 struct buffer *buffer) 1719 Lisp_Object parser, struct buffer *buffer)
1676{ 1720{
1677 Lisp_Object list = Qnil; 1721 Lisp_Object list = Qnil;
1678 for (int idx = 0; idx < len; idx++) 1722 for (int idx = 0; idx < len; idx++)
1679 { 1723 {
1680 TSRange range = ranges[idx]; 1724 TSRange range = ranges[idx];
1681 uint32_t beg_byte = range.start_byte + BUF_BEGV_BYTE (buffer); 1725 uint32_t beg_byte = range.start_byte + XTS_PARSER (parser)->visible_beg;
1682 uint32_t end_byte = range.end_byte + BUF_BEGV_BYTE (buffer); 1726 uint32_t end_byte = range.end_byte + XTS_PARSER (parser)->visible_beg;
1683 eassert (BUF_BEGV_BYTE (buffer) <= beg_byte); 1727 eassert (BUF_BEGV_BYTE (buffer) <= beg_byte);
1684 eassert (beg_byte <= end_byte); 1728 eassert (beg_byte <= end_byte);
1685 eassert (end_byte <= BUF_ZV_BYTE (buffer)); 1729 eassert (end_byte <= BUF_ZV_BYTE (buffer));
@@ -1725,11 +1769,9 @@ buffer. */)
1725 if (NILP (ranges)) 1769 if (NILP (ranges))
1726 { 1770 {
1727 /* If RANGES is nil, make parser to parse the whole document. 1771 /* If RANGES is nil, make parser to parse the whole document.
1728 To do that we give tree-sitter a 0 length, the range is a 1772 To do that we give tree-sitter a 0 length. */
1729 dummy. */
1730 TSRange treesit_range = {{0, 0}, {0, 0}, 0, 0};
1731 success = ts_parser_set_included_ranges (XTS_PARSER (parser)->parser, 1773 success = ts_parser_set_included_ranges (XTS_PARSER (parser)->parser,
1732 &treesit_range , 0); 1774 NULL , 0);
1733 } 1775 }
1734 else 1776 else
1735 { 1777 {
@@ -1742,7 +1784,6 @@ buffer. */)
1742 1784
1743 /* We can use XFIXNUM, XCAR, XCDR freely because we have checked 1785 /* We can use XFIXNUM, XCAR, XCDR freely because we have checked
1744 the input by treesit_check_range_argument. */ 1786 the input by treesit_check_range_argument. */
1745
1746 for (int idx = 0; !NILP (ranges); idx++, ranges = XCDR (ranges)) 1787 for (int idx = 0; !NILP (ranges); idx++, ranges = XCDR (ranges))
1747 { 1788 {
1748 Lisp_Object range = XCAR (ranges); 1789 Lisp_Object range = XCAR (ranges);
@@ -1787,6 +1828,10 @@ See also `treesit-parser-set-included-ranges'. */)
1787 treesit_check_parser (parser); 1828 treesit_check_parser (parser);
1788 treesit_initialize (); 1829 treesit_initialize ();
1789 1830
1831 /* Our return value depends on the buffer state (BUF_BEGV_BYTE,
1832 etc), so we need to sync up. */
1833 treesit_check_buffer_size (XBUFFER (XTS_PARSER (parser)->buffer));
1834 treesit_sync_visible_region (parser);
1790 /* When the parser doesn't have a range set and we call 1835 /* When the parser doesn't have a range set and we call
1791 ts_parser_included_ranges on it, it doesn't return an empty list, 1836 ts_parser_included_ranges on it, it doesn't return an empty list,
1792 but rather return DEFAULT_RANGE. (A single range where start_byte 1837 but rather return DEFAULT_RANGE. (A single range where start_byte
@@ -1799,13 +1844,10 @@ See also `treesit-parser-set-included-ranges'. */)
1799 const TSRange *ranges 1844 const TSRange *ranges
1800 = ts_parser_included_ranges (XTS_PARSER (parser)->parser, &len); 1845 = ts_parser_included_ranges (XTS_PARSER (parser)->parser, &len);
1801 1846
1802 /* Our return value depends on the buffer state (BUF_BEGV_BYTE,
1803 etc), so we need to sync up. */
1804 treesit_check_buffer_size (XBUFFER (XTS_PARSER (parser)->buffer));
1805 treesit_sync_visible_region (parser);
1806
1807 struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); 1847 struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
1808 return treesit_make_ranges (ranges, len, buffer); 1848
1849
1850 return treesit_make_ranges (ranges, len, parser, buffer);
1809} 1851}
1810 1852
1811DEFUN ("treesit-parser-notifiers", Ftreesit_parser_notifiers, 1853DEFUN ("treesit-parser-notifiers", Ftreesit_parser_notifiers,
diff --git a/src/treesit.h b/src/treesit.h
index d3c6aa4c250..f8227a64b0a 100644
--- a/src/treesit.h
+++ b/src/treesit.h
@@ -45,9 +45,12 @@ struct Lisp_TS_Parser
45 same tag. A tag is primarily used to differentiate between 45 same tag. A tag is primarily used to differentiate between
46 parsers for the same language. */ 46 parsers for the same language. */
47 Lisp_Object tag; 47 Lisp_Object tag;
48 /* The Lisp ranges last set. This is use to compare to the new 48 /* The Lisp ranges last set. This is use to compare to the new ranges
49 ranges the users wants to set, and avoid reparse if the new 49 the users wants to set, and avoid reparse if the new ranges is the
50 ranges is the same as the last set one. */ 50 same as the last set one. This might go out of sync with the
51 ranges we return from Ftreesit_parser_included_ranges, if we did a
52 ranges fix in treesit_sync_visible_region, but I don't think
53 that'll cause any harm. */
51 Lisp_Object last_set_ranges; 54 Lisp_Object last_set_ranges;
52 /* The buffer associated with this parser. */ 55 /* The buffer associated with this parser. */
53 Lisp_Object buffer; 56 Lisp_Object buffer;
@@ -82,6 +85,10 @@ struct Lisp_TS_Parser
82 /* If this field is true, parser functions raises 85 /* If this field is true, parser functions raises
83 treesit-parser-deleted signal. */ 86 treesit-parser-deleted signal. */
84 bool deleted; 87 bool deleted;
88 /* This field is set to true when treesit_ensure_parsed runs, to
89 prevent infinite recursion due to calling after change
90 functions. */
91 bool within_reparse;
85}; 92};
86 93
87/* A wrapper around a tree-sitter node. */ 94/* A wrapper around a tree-sitter node. */