diff options
| author | Yuan Fu | 2024-09-08 17:50:52 -0700 |
|---|---|---|
| committer | Yuan Fu | 2024-09-08 20:52:20 -0700 |
| commit | 3435464452b4947098b8ccbe93e5c0afdd2ed06e (patch) | |
| tree | 63bb296af999b1cdee76d2ba60f2b383464b7f3e /src | |
| parent | 3fcec09f754af9822339eff3ea15d43eb7d19014 (diff) | |
| download | emacs-3435464452b4947098b8ccbe93e5c0afdd2ed06e.tar.gz emacs-3435464452b4947098b8ccbe93e5c0afdd2ed06e.zip | |
Fix the range handling in treesit.c
1. In treesit_sync_visible_region, reduce the ranges for a parser so it
doesn't go beyond the visible range.
2. To avoid possible infinite recursion, add a within_reparse field to
parsers. Previously we were using the need_reparse field to avoid
infinite recursion, but lisp programs in a parser's after change hook
might make some buffer edit which turns need_reparse to true. To avoid
that, we now use an explicit field. If a parser's after change function
makes a buffer edit, lisp program ends up with a desynced parse tree,
but that's better than possible infinite recursion. Also after change
function shouldn't edit the buffer.
3. In treesit_make_ranges, use parser's visible_beg instead of buffer's
BEGV. I mean technically whenever we make ranges, buffer's BEGV should
be equal to parser's visible_beg, but better not take that uncertainty,
also makes the code more readable.
4. In Ftreesit_parser_included_ranges, move visible region sync code
before the body of the function.
* src/treesit.c (treesit_sync_visible_region): Minimally fix ranges so
it doesn't exceed parser's visible range.
(treesit_call_after_change_functions): Update calling sigature to
treesit_make_ranges.
(treesit_ensure_parsed, make_treesit_parser): Use the new field
within_reparse.
(treesit_make_ranges): Use parser's visible_beg instead of buffer's
BEGV.
(Ftreesit_parser_included_ranges): Move visible region check before
function body.
* src/treesit.h (Lisp_TS_Parser): Add new field within_reparse.
Diffstat (limited to 'src')
| -rw-r--r-- | src/treesit.c | 92 | ||||
| -rw-r--r-- | src/treesit.h | 13 |
2 files changed, 77 insertions, 28 deletions
diff --git a/src/treesit.c b/src/treesit.c index 351bd65819a..970754f3c1b 100644 --- a/src/treesit.c +++ b/src/treesit.c | |||
| @@ -1045,6 +1045,48 @@ treesit_sync_visible_region (Lisp_Object parser) | |||
| 1045 | 1045 | ||
| 1046 | XTS_PARSER (parser)->visible_beg = visible_beg; | 1046 | XTS_PARSER (parser)->visible_beg = visible_beg; |
| 1047 | XTS_PARSER (parser)->visible_end = visible_end; | 1047 | XTS_PARSER (parser)->visible_end = visible_end; |
| 1048 | |||
| 1049 | /* Fix ranges so that the ranges stays with in visible_end. Here we | ||
| 1050 | try to do minimal work so that the ranges is minimally correct such | ||
| 1051 | that there's no OOB error. Usually treesit-update-ranges should | ||
| 1052 | update the parser with actually correct ranges. */ | ||
| 1053 | if (NILP (XTS_PARSER (parser)->last_set_ranges)) return; | ||
| 1054 | uint32_t len; | ||
| 1055 | const TSRange *ranges | ||
| 1056 | = ts_parser_included_ranges (XTS_PARSER (parser)->parser, &len); | ||
| 1057 | /* We might need to discard some ranges that exceeds visible_end, in | ||
| 1058 | that case, new_len is the length of the new ranges array (which | ||
| 1059 | will be shorter than len). */ | ||
| 1060 | uint32_t new_len = 0; | ||
| 1061 | uint32_t new_end = 0; | ||
| 1062 | for (int idx = 0; idx < len; idx++) | ||
| 1063 | { | ||
| 1064 | TSRange range = ranges[idx]; | ||
| 1065 | /* If this range starts after visible_end, we don't include this | ||
| 1066 | range and the ranges after it in the new ranges. */ | ||
| 1067 | if (range.start_byte + visible_beg >= visible_end) | ||
| 1068 | break; | ||
| 1069 | /* If this range's end is after visible_end, we don't include any | ||
| 1070 | ranges after it, and changes the end of this range to | ||
| 1071 | visible_end. */ | ||
| 1072 | if (range.end_byte + visible_beg > visible_end) | ||
| 1073 | { | ||
| 1074 | new_end = visible_end - visible_beg; | ||
| 1075 | new_len++; | ||
| 1076 | break; | ||
| 1077 | } | ||
| 1078 | new_len++; | ||
| 1079 | } | ||
| 1080 | if (new_len != len || new_end != 0) | ||
| 1081 | { | ||
| 1082 | TSRange *new_ranges = xmalloc (sizeof (TSRange) * new_len); | ||
| 1083 | memcpy (new_ranges, ranges, sizeof (TSRange) * new_len); | ||
| 1084 | new_ranges[new_len - 1].end_byte = new_end; | ||
| 1085 | /* TODO: What should we do if this fails? */ | ||
| 1086 | ts_parser_set_included_ranges (XTS_PARSER (parser)->parser, | ||
| 1087 | new_ranges, new_len); | ||
| 1088 | xfree (new_ranges); | ||
| 1089 | } | ||
| 1048 | } | 1090 | } |
| 1049 | 1091 | ||
| 1050 | static void | 1092 | static void |
| @@ -1057,7 +1099,8 @@ treesit_check_buffer_size (struct buffer *buffer) | |||
| 1057 | make_fixnum (buffer_size_bytes)); | 1099 | make_fixnum (buffer_size_bytes)); |
| 1058 | } | 1100 | } |
| 1059 | 1101 | ||
| 1060 | static Lisp_Object treesit_make_ranges (const TSRange *, uint32_t, struct buffer *); | 1102 | static Lisp_Object treesit_make_ranges (const TSRange *, uint32_t, |
| 1103 | Lisp_Object, struct buffer *); | ||
| 1061 | 1104 | ||
| 1062 | static void | 1105 | static void |
| 1063 | treesit_call_after_change_functions (TSTree *old_tree, TSTree *new_tree, | 1106 | treesit_call_after_change_functions (TSTree *old_tree, TSTree *new_tree, |
| @@ -1071,7 +1114,7 @@ treesit_call_after_change_functions (TSTree *old_tree, TSTree *new_tree, | |||
| 1071 | { | 1114 | { |
| 1072 | uint32_t len; | 1115 | uint32_t len; |
| 1073 | TSRange *ranges = ts_tree_get_changed_ranges (old_tree, new_tree, &len); | 1116 | TSRange *ranges = ts_tree_get_changed_ranges (old_tree, new_tree, &len); |
| 1074 | lisp_ranges = treesit_make_ranges (ranges, len, buf); | 1117 | lisp_ranges = treesit_make_ranges (ranges, len, parser, buf); |
| 1075 | xfree (ranges); | 1118 | xfree (ranges); |
| 1076 | } | 1119 | } |
| 1077 | else | 1120 | else |
| @@ -1098,6 +1141,9 @@ treesit_call_after_change_functions (TSTree *old_tree, TSTree *new_tree, | |||
| 1098 | static void | 1141 | static void |
| 1099 | treesit_ensure_parsed (Lisp_Object parser) | 1142 | treesit_ensure_parsed (Lisp_Object parser) |
| 1100 | { | 1143 | { |
| 1144 | if (XTS_PARSER (parser)->within_reparse) return; | ||
| 1145 | XTS_PARSER (parser)->within_reparse = true; | ||
| 1146 | |||
| 1101 | struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); | 1147 | struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); |
| 1102 | 1148 | ||
| 1103 | /* Before we parse, catch up with the narrowing situation. */ | 1149 | /* Before we parse, catch up with the narrowing situation. */ |
| @@ -1106,10 +1152,11 @@ treesit_ensure_parsed (Lisp_Object parser) | |||
| 1106 | because it might set the flag to true. */ | 1152 | because it might set the flag to true. */ |
| 1107 | treesit_sync_visible_region (parser); | 1153 | treesit_sync_visible_region (parser); |
| 1108 | 1154 | ||
| 1109 | /* Make sure this comes before everything else, see comment | ||
| 1110 | (ref:notifier-inside-ensure-parsed) for more detail. */ | ||
| 1111 | if (!XTS_PARSER (parser)->need_reparse) | 1155 | if (!XTS_PARSER (parser)->need_reparse) |
| 1112 | return; | 1156 | { |
| 1157 | XTS_PARSER (parser)->within_reparse = false; | ||
| 1158 | return; | ||
| 1159 | } | ||
| 1113 | 1160 | ||
| 1114 | TSParser *treesit_parser = XTS_PARSER (parser)->parser; | 1161 | TSParser *treesit_parser = XTS_PARSER (parser)->parser; |
| 1115 | TSTree *tree = XTS_PARSER (parser)->tree; | 1162 | TSTree *tree = XTS_PARSER (parser)->tree; |
| @@ -1134,14 +1181,10 @@ treesit_ensure_parsed (Lisp_Object parser) | |||
| 1134 | XTS_PARSER (parser)->need_reparse = false; | 1181 | XTS_PARSER (parser)->need_reparse = false; |
| 1135 | XTS_PARSER (parser)->timestamp++; | 1182 | XTS_PARSER (parser)->timestamp++; |
| 1136 | 1183 | ||
| 1137 | /* After-change functions should run at the very end, most crucially | ||
| 1138 | after need_reparse is set to false, this way if the function | ||
| 1139 | calls some tree-sitter function which invokes | ||
| 1140 | treesit_ensure_parsed again, it returns early and do not | ||
| 1141 | recursively call the after change functions again. | ||
| 1142 | (ref:notifier-inside-ensure-parsed) */ | ||
| 1143 | treesit_call_after_change_functions (tree, new_tree, parser); | 1184 | treesit_call_after_change_functions (tree, new_tree, parser); |
| 1144 | ts_tree_delete (tree); | 1185 | ts_tree_delete (tree); |
| 1186 | |||
| 1187 | XTS_PARSER (parser)->within_reparse = false; | ||
| 1145 | } | 1188 | } |
| 1146 | 1189 | ||
| 1147 | /* This is the read function provided to tree-sitter to read from a | 1190 | /* This is the read function provided to tree-sitter to read from a |
| @@ -1224,6 +1267,7 @@ make_treesit_parser (Lisp_Object buffer, TSParser *parser, | |||
| 1224 | lisp_parser->visible_end = BUF_ZV_BYTE (XBUFFER (buffer)); | 1267 | lisp_parser->visible_end = BUF_ZV_BYTE (XBUFFER (buffer)); |
| 1225 | lisp_parser->timestamp = 0; | 1268 | lisp_parser->timestamp = 0; |
| 1226 | lisp_parser->deleted = false; | 1269 | lisp_parser->deleted = false; |
| 1270 | lisp_parser->within_reparse = false; | ||
| 1227 | eassert (lisp_parser->visible_beg <= lisp_parser->visible_end); | 1271 | eassert (lisp_parser->visible_beg <= lisp_parser->visible_end); |
| 1228 | return make_lisp_ptr (lisp_parser, Lisp_Vectorlike); | 1272 | return make_lisp_ptr (lisp_parser, Lisp_Vectorlike); |
| 1229 | } | 1273 | } |
| @@ -1672,14 +1716,14 @@ treesit_check_range_argument (Lisp_Object ranges) | |||
| 1672 | convert between tree-sitter buffer offset and buffer position. */ | 1716 | convert between tree-sitter buffer offset and buffer position. */ |
| 1673 | static Lisp_Object | 1717 | static Lisp_Object |
| 1674 | treesit_make_ranges (const TSRange *ranges, uint32_t len, | 1718 | treesit_make_ranges (const TSRange *ranges, uint32_t len, |
| 1675 | struct buffer *buffer) | 1719 | Lisp_Object parser, struct buffer *buffer) |
| 1676 | { | 1720 | { |
| 1677 | Lisp_Object list = Qnil; | 1721 | Lisp_Object list = Qnil; |
| 1678 | for (int idx = 0; idx < len; idx++) | 1722 | for (int idx = 0; idx < len; idx++) |
| 1679 | { | 1723 | { |
| 1680 | TSRange range = ranges[idx]; | 1724 | TSRange range = ranges[idx]; |
| 1681 | uint32_t beg_byte = range.start_byte + BUF_BEGV_BYTE (buffer); | 1725 | uint32_t beg_byte = range.start_byte + XTS_PARSER (parser)->visible_beg; |
| 1682 | uint32_t end_byte = range.end_byte + BUF_BEGV_BYTE (buffer); | 1726 | uint32_t end_byte = range.end_byte + XTS_PARSER (parser)->visible_beg; |
| 1683 | eassert (BUF_BEGV_BYTE (buffer) <= beg_byte); | 1727 | eassert (BUF_BEGV_BYTE (buffer) <= beg_byte); |
| 1684 | eassert (beg_byte <= end_byte); | 1728 | eassert (beg_byte <= end_byte); |
| 1685 | eassert (end_byte <= BUF_ZV_BYTE (buffer)); | 1729 | eassert (end_byte <= BUF_ZV_BYTE (buffer)); |
| @@ -1725,11 +1769,9 @@ buffer. */) | |||
| 1725 | if (NILP (ranges)) | 1769 | if (NILP (ranges)) |
| 1726 | { | 1770 | { |
| 1727 | /* If RANGES is nil, make parser to parse the whole document. | 1771 | /* If RANGES is nil, make parser to parse the whole document. |
| 1728 | To do that we give tree-sitter a 0 length, the range is a | 1772 | To do that we give tree-sitter a 0 length. */ |
| 1729 | dummy. */ | ||
| 1730 | TSRange treesit_range = {{0, 0}, {0, 0}, 0, 0}; | ||
| 1731 | success = ts_parser_set_included_ranges (XTS_PARSER (parser)->parser, | 1773 | success = ts_parser_set_included_ranges (XTS_PARSER (parser)->parser, |
| 1732 | &treesit_range , 0); | 1774 | NULL , 0); |
| 1733 | } | 1775 | } |
| 1734 | else | 1776 | else |
| 1735 | { | 1777 | { |
| @@ -1742,7 +1784,6 @@ buffer. */) | |||
| 1742 | 1784 | ||
| 1743 | /* We can use XFIXNUM, XCAR, XCDR freely because we have checked | 1785 | /* We can use XFIXNUM, XCAR, XCDR freely because we have checked |
| 1744 | the input by treesit_check_range_argument. */ | 1786 | the input by treesit_check_range_argument. */ |
| 1745 | |||
| 1746 | for (int idx = 0; !NILP (ranges); idx++, ranges = XCDR (ranges)) | 1787 | for (int idx = 0; !NILP (ranges); idx++, ranges = XCDR (ranges)) |
| 1747 | { | 1788 | { |
| 1748 | Lisp_Object range = XCAR (ranges); | 1789 | Lisp_Object range = XCAR (ranges); |
| @@ -1787,6 +1828,10 @@ See also `treesit-parser-set-included-ranges'. */) | |||
| 1787 | treesit_check_parser (parser); | 1828 | treesit_check_parser (parser); |
| 1788 | treesit_initialize (); | 1829 | treesit_initialize (); |
| 1789 | 1830 | ||
| 1831 | /* Our return value depends on the buffer state (BUF_BEGV_BYTE, | ||
| 1832 | etc), so we need to sync up. */ | ||
| 1833 | treesit_check_buffer_size (XBUFFER (XTS_PARSER (parser)->buffer)); | ||
| 1834 | treesit_sync_visible_region (parser); | ||
| 1790 | /* When the parser doesn't have a range set and we call | 1835 | /* When the parser doesn't have a range set and we call |
| 1791 | ts_parser_included_ranges on it, it doesn't return an empty list, | 1836 | ts_parser_included_ranges on it, it doesn't return an empty list, |
| 1792 | but rather return DEFAULT_RANGE. (A single range where start_byte | 1837 | but rather return DEFAULT_RANGE. (A single range where start_byte |
| @@ -1799,13 +1844,10 @@ See also `treesit-parser-set-included-ranges'. */) | |||
| 1799 | const TSRange *ranges | 1844 | const TSRange *ranges |
| 1800 | = ts_parser_included_ranges (XTS_PARSER (parser)->parser, &len); | 1845 | = ts_parser_included_ranges (XTS_PARSER (parser)->parser, &len); |
| 1801 | 1846 | ||
| 1802 | /* Our return value depends on the buffer state (BUF_BEGV_BYTE, | ||
| 1803 | etc), so we need to sync up. */ | ||
| 1804 | treesit_check_buffer_size (XBUFFER (XTS_PARSER (parser)->buffer)); | ||
| 1805 | treesit_sync_visible_region (parser); | ||
| 1806 | |||
| 1807 | struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); | 1847 | struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer); |
| 1808 | return treesit_make_ranges (ranges, len, buffer); | 1848 | |
| 1849 | |||
| 1850 | return treesit_make_ranges (ranges, len, parser, buffer); | ||
| 1809 | } | 1851 | } |
| 1810 | 1852 | ||
| 1811 | DEFUN ("treesit-parser-notifiers", Ftreesit_parser_notifiers, | 1853 | DEFUN ("treesit-parser-notifiers", Ftreesit_parser_notifiers, |
diff --git a/src/treesit.h b/src/treesit.h index d3c6aa4c250..f8227a64b0a 100644 --- a/src/treesit.h +++ b/src/treesit.h | |||
| @@ -45,9 +45,12 @@ struct Lisp_TS_Parser | |||
| 45 | same tag. A tag is primarily used to differentiate between | 45 | same tag. A tag is primarily used to differentiate between |
| 46 | parsers for the same language. */ | 46 | parsers for the same language. */ |
| 47 | Lisp_Object tag; | 47 | Lisp_Object tag; |
| 48 | /* The Lisp ranges last set. This is use to compare to the new | 48 | /* The Lisp ranges last set. This is use to compare to the new ranges |
| 49 | ranges the users wants to set, and avoid reparse if the new | 49 | the users wants to set, and avoid reparse if the new ranges is the |
| 50 | ranges is the same as the last set one. */ | 50 | same as the last set one. This might go out of sync with the |
| 51 | ranges we return from Ftreesit_parser_included_ranges, if we did a | ||
| 52 | ranges fix in treesit_sync_visible_region, but I don't think | ||
| 53 | that'll cause any harm. */ | ||
| 51 | Lisp_Object last_set_ranges; | 54 | Lisp_Object last_set_ranges; |
| 52 | /* The buffer associated with this parser. */ | 55 | /* The buffer associated with this parser. */ |
| 53 | Lisp_Object buffer; | 56 | Lisp_Object buffer; |
| @@ -82,6 +85,10 @@ struct Lisp_TS_Parser | |||
| 82 | /* If this field is true, parser functions raises | 85 | /* If this field is true, parser functions raises |
| 83 | treesit-parser-deleted signal. */ | 86 | treesit-parser-deleted signal. */ |
| 84 | bool deleted; | 87 | bool deleted; |
| 88 | /* This field is set to true when treesit_ensure_parsed runs, to | ||
| 89 | prevent infinite recursion due to calling after change | ||
| 90 | functions. */ | ||
| 91 | bool within_reparse; | ||
| 85 | }; | 92 | }; |
| 86 | 93 | ||
| 87 | /* A wrapper around a tree-sitter node. */ | 94 | /* A wrapper around a tree-sitter node. */ |