aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorYuan Fu2023-04-13 15:03:05 -0700
committerYuan Fu2023-04-13 15:08:51 -0700
commit361c5fc2d8e52d70aa58956c57eaef9495881197 (patch)
tree31c1573058144e6a70515b0198f1c2e7cf34e8bb /src
parenta5eb9f6ad4e6f5a2819b540a477f1e889f6ef355 (diff)
downloademacs-361c5fc2d8e52d70aa58956c57eaef9495881197.tar.gz
emacs-361c5fc2d8e52d70aa58956c57eaef9495881197.zip
Support more predicates in tree-sitter search functions
Right now we support regexp strings and predicate functions for the PRED argument. This change adds support for (not ...) (or ...) and (regexp . pred) predicates. I still need to find a place to document the supported shapes of a predicate. * src/treesit.c (treesit_traverse_validate_predicate): New function. (treesit_traverse_match_predicate): Support more predicate shapes. (treesit_search_dfs): (treesit_search_forward) (treesit_build_sparse_tree): Fix docstring (unrelated to this change). (Ftreesit_search_subtree) (Ftreesit_search_forward) (Ftreesit_induce_sparse_tree): Use the new function to validate predicate shape. (syms_of_treesit): New error Qtreesit_invalid_predicate. * test/src/treesit-tests.el: (treesit--ert-search-setup): Add edebug declaration. (treesit-search-forward-predicate) (treesit-search-forward-predicate-invalid-predicate): New tests.
Diffstat (limited to 'src')
-rw-r--r--src/treesit.c168
1 files changed, 148 insertions, 20 deletions
diff --git a/src/treesit.c b/src/treesit.c
index 76d1dc8ccf4..09d998b56c8 100644
--- a/src/treesit.c
+++ b/src/treesit.c
@@ -3139,10 +3139,84 @@ treesit_traverse_child_helper (TSTreeCursor *cursor,
3139 } 3139 }
3140} 3140}
3141 3141
3142/* Return true if the node at CURSOR matches PRED. PRED can be a 3142/* Validate the PRED passed to treesit_traverse_match_predicate. If
3143 string or a function. This function assumes PRED is either a 3143 there's an error, set SIGNAL_DATA to something signal accepts, and
3144 string or a function. If NAMED is true, also check that the node 3144 return false, otherwise return true. */
3145 is named. */ 3145static bool
3146treesit_traverse_validate_predicate (Lisp_Object pred,
3147 Lisp_Object *signal_data)
3148{
3149 if (STRINGP (pred))
3150 return true;
3151 /* We want to allow cl-labels-defined functions, so we allow
3152 symbols. */
3153 else if (FUNCTIONP (pred) || SYMBOLP (pred))
3154 return true;
3155 else if (CONSP (pred))
3156 {
3157 Lisp_Object car = XCAR (pred);
3158 Lisp_Object cdr = XCDR (pred);
3159 if (EQ (car, Qnot))
3160 {
3161 if (!CONSP (cdr))
3162 {
3163 *signal_data = list2 (build_string ("Invalide `not' "
3164 "predicate"),
3165 pred);
3166 return false;
3167 }
3168 /* At this point CDR must be a cons. */
3169 if (XFIXNUM (Flength (cdr)) != 1)
3170 {
3171 *signal_data = list2 (build_string ("`not' can only "
3172 "have one argument"),
3173 pred);
3174 return false;
3175 }
3176 return treesit_traverse_validate_predicate (XCAR (cdr),
3177 signal_data);
3178 }
3179 else if (EQ (car, Qor))
3180 {
3181 if (!CONSP (cdr) || NILP (cdr))
3182 {
3183 *signal_data = list2 (build_string ("`or' must have a list "
3184 "of patterns as "
3185 "arguments "),
3186 pred);
3187 return false;
3188 }
3189 FOR_EACH_TAIL (cdr)
3190 {
3191 if (!treesit_traverse_validate_predicate (XCAR (cdr),
3192 signal_data))
3193 return false;
3194 }
3195 return true;
3196 }
3197 /* We allow the function to be a symbol to support cl-label. */
3198 else if (STRINGP (car) && (FUNCTIONP (cdr) || SYMBOLP (cdr)))
3199 return true;
3200 }
3201 *signal_data = list2 (build_string ("Invalid predicate, see TODO for "
3202 "valid forms of predicate"),
3203 pred);
3204 return false;
3205}
3206
3207/* Return true if the node at CURSOR matches PRED. PRED can be a lot
3208 of things:
3209
3210 PRED := string | function | (string . function)
3211 | (or PRED...) | (not PRED)
3212
3213 See docstring of treesit-search-forward and friends for the meaning
3214 of each shape.
3215
3216 This function assumes PRED is in one of its valid forms. If NAMED
3217 is true, also check that the node is named.
3218
3219 This function may signal if the predicate function signals. */
3146static bool 3220static bool
3147treesit_traverse_match_predicate (TSTreeCursor *cursor, Lisp_Object pred, 3221treesit_traverse_match_predicate (TSTreeCursor *cursor, Lisp_Object pred,
3148 Lisp_Object parser, bool named) 3222 Lisp_Object parser, bool named)
@@ -3156,24 +3230,63 @@ treesit_traverse_match_predicate (TSTreeCursor *cursor, Lisp_Object pred,
3156 const char *type = ts_node_type (node); 3230 const char *type = ts_node_type (node);
3157 return fast_c_string_match (pred, type, strlen (type)) >= 0; 3231 return fast_c_string_match (pred, type, strlen (type)) >= 0;
3158 } 3232 }
3159 else 3233 /* We want to allow cl-labels-defined functions, so we allow
3234 symbols. */
3235 else if (FUNCTIONP (pred) || SYMBOLP (pred))
3160 { 3236 {
3161 Lisp_Object lisp_node = make_treesit_node (parser, node); 3237 Lisp_Object lisp_node = make_treesit_node (parser, node);
3162 return !NILP (CALLN (Ffuncall, pred, lisp_node)); 3238 return !NILP (CALLN (Ffuncall, pred, lisp_node));
3163 } 3239 }
3240 else if (CONSP (pred))
3241 {
3242 Lisp_Object car = XCAR (pred);
3243 Lisp_Object cdr = XCDR (pred);
3244
3245 if (EQ (car, Qnot))
3246 {
3247 return !treesit_traverse_match_predicate (cursor, XCAR (cdr),
3248 parser, named);
3249 }
3250 else if (EQ (car, Qor))
3251 {
3252 FOR_EACH_TAIL (cdr)
3253 {
3254 if (treesit_traverse_match_predicate (cursor, XCAR (cdr),
3255 parser, named))
3256 return true;
3257 }
3258 return false;
3259 }
3260 /* We want to allow cl-labels-defined functions, so we allow
3261 symbols. */
3262 else if (STRINGP (car) && (FUNCTIONP (cdr) || SYMBOLP (cdr)))
3263 {
3264 /* A bit of code duplication here, but should be fine. */
3265 const char *type = ts_node_type (node);
3266 if (!(fast_c_string_match (pred, type, strlen (type)) >= 0))
3267 return false;
3268
3269 Lisp_Object lisp_node = make_treesit_node (parser, node);
3270 if (NILP (CALLN (Ffuncall, pred, lisp_node)))
3271 return false;
3272
3273 return true;
3274 }
3275 }
3276 /* Returning false is better than UB. */
3277 return false;
3164} 3278}
3165 3279
3166/* Traverse the parse tree starting from CURSOR. PRED can be a 3280/* Traverse the parse tree starting from CURSOR. See TODO for the
3167 function (takes a node and returns nil/non-nil), or a string 3281 shapes PRED can have. If the node satisfies PRED, leave CURSOR on
3168 (treated as regexp matching the node's type, must be all single 3282 that node and return true. If no node satisfies PRED, move CURSOR
3169 byte characters). If the node satisfies PRED, leave CURSOR on that 3283 back to starting position and return false.
3170 node and return true. If no node satisfies PRED, move CURSOR back
3171 to starting position and return false.
3172 3284
3173 LIMIT is the number of levels we descend in the tree. FORWARD 3285 LIMIT is the number of levels we descend in the tree. FORWARD
3174 controls the direction in which we traverse the tree, true means 3286 controls the direction in which we traverse the tree, true means
3175 forward, false backward. If SKIP_ROOT is true, don't match ROOT. 3287 forward, false backward. If SKIP_ROOT is true, don't match ROOT.
3176 */ 3288
3289 This function may signal if the predicate function signals. */
3177static bool 3290static bool
3178treesit_search_dfs (TSTreeCursor *cursor, 3291treesit_search_dfs (TSTreeCursor *cursor,
3179 Lisp_Object pred, Lisp_Object parser, 3292 Lisp_Object pred, Lisp_Object parser,
@@ -3209,7 +3322,9 @@ treesit_search_dfs (TSTreeCursor *cursor,
3209 START. PRED, PARSER, NAMED, FORWARD are the same as in 3322 START. PRED, PARSER, NAMED, FORWARD are the same as in
3210 ts_search_subtree. If a match is found, leave CURSOR at that node, 3323 ts_search_subtree. If a match is found, leave CURSOR at that node,
3211 and return true, if no match is found, return false, and CURSOR's 3324 and return true, if no match is found, return false, and CURSOR's
3212 position is undefined. */ 3325 position is undefined.
3326
3327 This function may signal if the predicate function signals. */
3213static bool 3328static bool
3214treesit_search_forward (TSTreeCursor *cursor, 3329treesit_search_forward (TSTreeCursor *cursor,
3215 Lisp_Object pred, Lisp_Object parser, 3330 Lisp_Object pred, Lisp_Object parser,
@@ -3272,11 +3387,13 @@ Return the first matched node, or nil if none matches. */)
3272 Lisp_Object all, Lisp_Object depth) 3387 Lisp_Object all, Lisp_Object depth)
3273{ 3388{
3274 CHECK_TS_NODE (node); 3389 CHECK_TS_NODE (node);
3275 CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate),
3276 list3 (Qor, Qstringp, Qfunctionp), predicate);
3277 CHECK_SYMBOL (all); 3390 CHECK_SYMBOL (all);
3278 CHECK_SYMBOL (backward); 3391 CHECK_SYMBOL (backward);
3279 3392
3393 Lisp_Object signal_data = Qnil;
3394 if (!treesit_traverse_validate_predicate (predicate, &signal_data))
3395 xsignal1 (Qtreesit_invalid_predicate, signal_data);
3396
3280 /* We use a default limit of 1000. See bug#59426 for the 3397 /* We use a default limit of 1000. See bug#59426 for the
3281 discussion. */ 3398 discussion. */
3282 ptrdiff_t the_limit = treesit_recursion_limit; 3399 ptrdiff_t the_limit = treesit_recursion_limit;
@@ -3344,11 +3461,13 @@ always traverse leaf nodes first, then upwards. */)
3344 Lisp_Object all) 3461 Lisp_Object all)
3345{ 3462{
3346 CHECK_TS_NODE (start); 3463 CHECK_TS_NODE (start);
3347 CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate),
3348 list3 (Qor, Qstringp, Qfunctionp), predicate);
3349 CHECK_SYMBOL (all); 3464 CHECK_SYMBOL (all);
3350 CHECK_SYMBOL (backward); 3465 CHECK_SYMBOL (backward);
3351 3466
3467 Lisp_Object signal_data = Qnil;
3468 if (!treesit_traverse_validate_predicate (predicate, &signal_data))
3469 xsignal1 (Qtreesit_invalid_predicate, signal_data);
3470
3352 treesit_initialize (); 3471 treesit_initialize ();
3353 3472
3354 Lisp_Object parser = XTS_NODE (start)->parser; 3473 Lisp_Object parser = XTS_NODE (start)->parser;
@@ -3376,7 +3495,9 @@ always traverse leaf nodes first, then upwards. */)
3376/* Recursively traverse the tree under CURSOR, and append the result 3495/* Recursively traverse the tree under CURSOR, and append the result
3377 subtree to PARENT's cdr. See more in Ftreesit_induce_sparse_tree. 3496 subtree to PARENT's cdr. See more in Ftreesit_induce_sparse_tree.
3378 Note that the top-level children list is reversed, because 3497 Note that the top-level children list is reversed, because
3379 reasons. */ 3498 reasons.
3499
3500 This function may signal if the predicate function signals. */
3380static void 3501static void
3381treesit_build_sparse_tree (TSTreeCursor *cursor, Lisp_Object parent, 3502treesit_build_sparse_tree (TSTreeCursor *cursor, Lisp_Object parent,
3382 Lisp_Object pred, Lisp_Object process_fn, 3503 Lisp_Object pred, Lisp_Object process_fn,
@@ -3462,8 +3583,10 @@ a regexp. */)
3462 Lisp_Object depth) 3583 Lisp_Object depth)
3463{ 3584{
3464 CHECK_TS_NODE (root); 3585 CHECK_TS_NODE (root);
3465 CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate), 3586
3466 list3 (Qor, Qstringp, Qfunctionp), predicate); 3587 Lisp_Object signal_data = Qnil;
3588 if (!treesit_traverse_validate_predicate (predicate, &signal_data))
3589 xsignal1 (Qtreesit_invalid_predicate, signal_data);
3467 3590
3468 if (!NILP (process_fn)) 3591 if (!NILP (process_fn))
3469 CHECK_TYPE (FUNCTIONP (process_fn), Qfunctionp, process_fn); 3592 CHECK_TYPE (FUNCTIONP (process_fn), Qfunctionp, process_fn);
@@ -3595,6 +3718,7 @@ syms_of_treesit (void)
3595 DEFSYM (Qoutdated, "outdated"); 3718 DEFSYM (Qoutdated, "outdated");
3596 DEFSYM (Qhas_error, "has-error"); 3719 DEFSYM (Qhas_error, "has-error");
3597 DEFSYM (Qlive, "live"); 3720 DEFSYM (Qlive, "live");
3721 DEFSYM (Qnot, "not");
3598 3722
3599 DEFSYM (QCanchor, ":anchor"); 3723 DEFSYM (QCanchor, ":anchor");
3600 DEFSYM (QCequal, ":equal"); 3724 DEFSYM (QCequal, ":equal");
@@ -3619,6 +3743,7 @@ syms_of_treesit (void)
3619 "user-emacs-directory"); 3743 "user-emacs-directory");
3620 DEFSYM (Qtreesit_parser_deleted, "treesit-parser-deleted"); 3744 DEFSYM (Qtreesit_parser_deleted, "treesit-parser-deleted");
3621 DEFSYM (Qtreesit_pattern_expand, "treesit-pattern-expand"); 3745 DEFSYM (Qtreesit_pattern_expand, "treesit-pattern-expand");
3746 DEFSYM (Qtreesit_invalid_predicate, "treesit-invalid-predicate");
3622 3747
3623 DEFSYM (Qor, "or"); 3748 DEFSYM (Qor, "or");
3624 3749
@@ -3646,6 +3771,9 @@ syms_of_treesit (void)
3646 define_error (Qtreesit_parser_deleted, 3771 define_error (Qtreesit_parser_deleted,
3647 "This parser is deleted and cannot be used", 3772 "This parser is deleted and cannot be used",
3648 Qtreesit_error); 3773 Qtreesit_error);
3774 define_error (Qtreesit_invalid_predicate,
3775 "Invalid predicate, see TODO for valid forms for a predicate",
3776 Qtreesit_error);
3649 3777
3650 DEFVAR_LISP ("treesit-load-name-override-list", 3778 DEFVAR_LISP ("treesit-load-name-override-list",
3651 Vtreesit_load_name_override_list, 3779 Vtreesit_load_name_override_list,