diff options
| author | Yuan Fu | 2023-04-13 15:03:05 -0700 |
|---|---|---|
| committer | Yuan Fu | 2023-04-13 15:08:51 -0700 |
| commit | 361c5fc2d8e52d70aa58956c57eaef9495881197 (patch) | |
| tree | 31c1573058144e6a70515b0198f1c2e7cf34e8bb /src | |
| parent | a5eb9f6ad4e6f5a2819b540a477f1e889f6ef355 (diff) | |
| download | emacs-361c5fc2d8e52d70aa58956c57eaef9495881197.tar.gz emacs-361c5fc2d8e52d70aa58956c57eaef9495881197.zip | |
Support more predicates in tree-sitter search functions
Right now we support regexp strings and predicate functions for the
PRED argument. This change adds support for (not ...) (or ...)
and (regexp . pred) predicates.
I still need to find a place to document the supported shapes of a
predicate.
* src/treesit.c (treesit_traverse_validate_predicate): New function.
(treesit_traverse_match_predicate): Support more predicate shapes.
(treesit_search_dfs):
(treesit_search_forward)
(treesit_build_sparse_tree): Fix docstring (unrelated to this change).
(Ftreesit_search_subtree)
(Ftreesit_search_forward)
(Ftreesit_induce_sparse_tree): Use the new function to validate
predicate shape.
(syms_of_treesit): New error Qtreesit_invalid_predicate.
* test/src/treesit-tests.el:
(treesit--ert-search-setup): Add edebug declaration.
(treesit-search-forward-predicate)
(treesit-search-forward-predicate-invalid-predicate): New tests.
Diffstat (limited to 'src')
| -rw-r--r-- | src/treesit.c | 168 |
1 files changed, 148 insertions, 20 deletions
diff --git a/src/treesit.c b/src/treesit.c index 76d1dc8ccf4..09d998b56c8 100644 --- a/src/treesit.c +++ b/src/treesit.c | |||
| @@ -3139,10 +3139,84 @@ treesit_traverse_child_helper (TSTreeCursor *cursor, | |||
| 3139 | } | 3139 | } |
| 3140 | } | 3140 | } |
| 3141 | 3141 | ||
| 3142 | /* Return true if the node at CURSOR matches PRED. PRED can be a | 3142 | /* Validate the PRED passed to treesit_traverse_match_predicate. If |
| 3143 | string or a function. This function assumes PRED is either a | 3143 | there's an error, set SIGNAL_DATA to something signal accepts, and |
| 3144 | string or a function. If NAMED is true, also check that the node | 3144 | return false, otherwise return true. */ |
| 3145 | is named. */ | 3145 | static bool |
| 3146 | treesit_traverse_validate_predicate (Lisp_Object pred, | ||
| 3147 | Lisp_Object *signal_data) | ||
| 3148 | { | ||
| 3149 | if (STRINGP (pred)) | ||
| 3150 | return true; | ||
| 3151 | /* We want to allow cl-labels-defined functions, so we allow | ||
| 3152 | symbols. */ | ||
| 3153 | else if (FUNCTIONP (pred) || SYMBOLP (pred)) | ||
| 3154 | return true; | ||
| 3155 | else if (CONSP (pred)) | ||
| 3156 | { | ||
| 3157 | Lisp_Object car = XCAR (pred); | ||
| 3158 | Lisp_Object cdr = XCDR (pred); | ||
| 3159 | if (EQ (car, Qnot)) | ||
| 3160 | { | ||
| 3161 | if (!CONSP (cdr)) | ||
| 3162 | { | ||
| 3163 | *signal_data = list2 (build_string ("Invalide `not' " | ||
| 3164 | "predicate"), | ||
| 3165 | pred); | ||
| 3166 | return false; | ||
| 3167 | } | ||
| 3168 | /* At this point CDR must be a cons. */ | ||
| 3169 | if (XFIXNUM (Flength (cdr)) != 1) | ||
| 3170 | { | ||
| 3171 | *signal_data = list2 (build_string ("`not' can only " | ||
| 3172 | "have one argument"), | ||
| 3173 | pred); | ||
| 3174 | return false; | ||
| 3175 | } | ||
| 3176 | return treesit_traverse_validate_predicate (XCAR (cdr), | ||
| 3177 | signal_data); | ||
| 3178 | } | ||
| 3179 | else if (EQ (car, Qor)) | ||
| 3180 | { | ||
| 3181 | if (!CONSP (cdr) || NILP (cdr)) | ||
| 3182 | { | ||
| 3183 | *signal_data = list2 (build_string ("`or' must have a list " | ||
| 3184 | "of patterns as " | ||
| 3185 | "arguments "), | ||
| 3186 | pred); | ||
| 3187 | return false; | ||
| 3188 | } | ||
| 3189 | FOR_EACH_TAIL (cdr) | ||
| 3190 | { | ||
| 3191 | if (!treesit_traverse_validate_predicate (XCAR (cdr), | ||
| 3192 | signal_data)) | ||
| 3193 | return false; | ||
| 3194 | } | ||
| 3195 | return true; | ||
| 3196 | } | ||
| 3197 | /* We allow the function to be a symbol to support cl-label. */ | ||
| 3198 | else if (STRINGP (car) && (FUNCTIONP (cdr) || SYMBOLP (cdr))) | ||
| 3199 | return true; | ||
| 3200 | } | ||
| 3201 | *signal_data = list2 (build_string ("Invalid predicate, see TODO for " | ||
| 3202 | "valid forms of predicate"), | ||
| 3203 | pred); | ||
| 3204 | return false; | ||
| 3205 | } | ||
| 3206 | |||
| 3207 | /* Return true if the node at CURSOR matches PRED. PRED can be a lot | ||
| 3208 | of things: | ||
| 3209 | |||
| 3210 | PRED := string | function | (string . function) | ||
| 3211 | | (or PRED...) | (not PRED) | ||
| 3212 | |||
| 3213 | See docstring of treesit-search-forward and friends for the meaning | ||
| 3214 | of each shape. | ||
| 3215 | |||
| 3216 | This function assumes PRED is in one of its valid forms. If NAMED | ||
| 3217 | is true, also check that the node is named. | ||
| 3218 | |||
| 3219 | This function may signal if the predicate function signals. */ | ||
| 3146 | static bool | 3220 | static bool |
| 3147 | treesit_traverse_match_predicate (TSTreeCursor *cursor, Lisp_Object pred, | 3221 | treesit_traverse_match_predicate (TSTreeCursor *cursor, Lisp_Object pred, |
| 3148 | Lisp_Object parser, bool named) | 3222 | Lisp_Object parser, bool named) |
| @@ -3156,24 +3230,63 @@ treesit_traverse_match_predicate (TSTreeCursor *cursor, Lisp_Object pred, | |||
| 3156 | const char *type = ts_node_type (node); | 3230 | const char *type = ts_node_type (node); |
| 3157 | return fast_c_string_match (pred, type, strlen (type)) >= 0; | 3231 | return fast_c_string_match (pred, type, strlen (type)) >= 0; |
| 3158 | } | 3232 | } |
| 3159 | else | 3233 | /* We want to allow cl-labels-defined functions, so we allow |
| 3234 | symbols. */ | ||
| 3235 | else if (FUNCTIONP (pred) || SYMBOLP (pred)) | ||
| 3160 | { | 3236 | { |
| 3161 | Lisp_Object lisp_node = make_treesit_node (parser, node); | 3237 | Lisp_Object lisp_node = make_treesit_node (parser, node); |
| 3162 | return !NILP (CALLN (Ffuncall, pred, lisp_node)); | 3238 | return !NILP (CALLN (Ffuncall, pred, lisp_node)); |
| 3163 | } | 3239 | } |
| 3240 | else if (CONSP (pred)) | ||
| 3241 | { | ||
| 3242 | Lisp_Object car = XCAR (pred); | ||
| 3243 | Lisp_Object cdr = XCDR (pred); | ||
| 3244 | |||
| 3245 | if (EQ (car, Qnot)) | ||
| 3246 | { | ||
| 3247 | return !treesit_traverse_match_predicate (cursor, XCAR (cdr), | ||
| 3248 | parser, named); | ||
| 3249 | } | ||
| 3250 | else if (EQ (car, Qor)) | ||
| 3251 | { | ||
| 3252 | FOR_EACH_TAIL (cdr) | ||
| 3253 | { | ||
| 3254 | if (treesit_traverse_match_predicate (cursor, XCAR (cdr), | ||
| 3255 | parser, named)) | ||
| 3256 | return true; | ||
| 3257 | } | ||
| 3258 | return false; | ||
| 3259 | } | ||
| 3260 | /* We want to allow cl-labels-defined functions, so we allow | ||
| 3261 | symbols. */ | ||
| 3262 | else if (STRINGP (car) && (FUNCTIONP (cdr) || SYMBOLP (cdr))) | ||
| 3263 | { | ||
| 3264 | /* A bit of code duplication here, but should be fine. */ | ||
| 3265 | const char *type = ts_node_type (node); | ||
| 3266 | if (!(fast_c_string_match (pred, type, strlen (type)) >= 0)) | ||
| 3267 | return false; | ||
| 3268 | |||
| 3269 | Lisp_Object lisp_node = make_treesit_node (parser, node); | ||
| 3270 | if (NILP (CALLN (Ffuncall, pred, lisp_node))) | ||
| 3271 | return false; | ||
| 3272 | |||
| 3273 | return true; | ||
| 3274 | } | ||
| 3275 | } | ||
| 3276 | /* Returning false is better than UB. */ | ||
| 3277 | return false; | ||
| 3164 | } | 3278 | } |
| 3165 | 3279 | ||
| 3166 | /* Traverse the parse tree starting from CURSOR. PRED can be a | 3280 | /* Traverse the parse tree starting from CURSOR. See TODO for the |
| 3167 | function (takes a node and returns nil/non-nil), or a string | 3281 | shapes PRED can have. If the node satisfies PRED, leave CURSOR on |
| 3168 | (treated as regexp matching the node's type, must be all single | 3282 | that node and return true. If no node satisfies PRED, move CURSOR |
| 3169 | byte characters). If the node satisfies PRED, leave CURSOR on that | 3283 | back to starting position and return false. |
| 3170 | node and return true. If no node satisfies PRED, move CURSOR back | ||
| 3171 | to starting position and return false. | ||
| 3172 | 3284 | ||
| 3173 | LIMIT is the number of levels we descend in the tree. FORWARD | 3285 | LIMIT is the number of levels we descend in the tree. FORWARD |
| 3174 | controls the direction in which we traverse the tree, true means | 3286 | controls the direction in which we traverse the tree, true means |
| 3175 | forward, false backward. If SKIP_ROOT is true, don't match ROOT. | 3287 | forward, false backward. If SKIP_ROOT is true, don't match ROOT. |
| 3176 | */ | 3288 | |
| 3289 | This function may signal if the predicate function signals. */ | ||
| 3177 | static bool | 3290 | static bool |
| 3178 | treesit_search_dfs (TSTreeCursor *cursor, | 3291 | treesit_search_dfs (TSTreeCursor *cursor, |
| 3179 | Lisp_Object pred, Lisp_Object parser, | 3292 | Lisp_Object pred, Lisp_Object parser, |
| @@ -3209,7 +3322,9 @@ treesit_search_dfs (TSTreeCursor *cursor, | |||
| 3209 | START. PRED, PARSER, NAMED, FORWARD are the same as in | 3322 | START. PRED, PARSER, NAMED, FORWARD are the same as in |
| 3210 | ts_search_subtree. If a match is found, leave CURSOR at that node, | 3323 | ts_search_subtree. If a match is found, leave CURSOR at that node, |
| 3211 | and return true, if no match is found, return false, and CURSOR's | 3324 | and return true, if no match is found, return false, and CURSOR's |
| 3212 | position is undefined. */ | 3325 | position is undefined. |
| 3326 | |||
| 3327 | This function may signal if the predicate function signals. */ | ||
| 3213 | static bool | 3328 | static bool |
| 3214 | treesit_search_forward (TSTreeCursor *cursor, | 3329 | treesit_search_forward (TSTreeCursor *cursor, |
| 3215 | Lisp_Object pred, Lisp_Object parser, | 3330 | Lisp_Object pred, Lisp_Object parser, |
| @@ -3272,11 +3387,13 @@ Return the first matched node, or nil if none matches. */) | |||
| 3272 | Lisp_Object all, Lisp_Object depth) | 3387 | Lisp_Object all, Lisp_Object depth) |
| 3273 | { | 3388 | { |
| 3274 | CHECK_TS_NODE (node); | 3389 | CHECK_TS_NODE (node); |
| 3275 | CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate), | ||
| 3276 | list3 (Qor, Qstringp, Qfunctionp), predicate); | ||
| 3277 | CHECK_SYMBOL (all); | 3390 | CHECK_SYMBOL (all); |
| 3278 | CHECK_SYMBOL (backward); | 3391 | CHECK_SYMBOL (backward); |
| 3279 | 3392 | ||
| 3393 | Lisp_Object signal_data = Qnil; | ||
| 3394 | if (!treesit_traverse_validate_predicate (predicate, &signal_data)) | ||
| 3395 | xsignal1 (Qtreesit_invalid_predicate, signal_data); | ||
| 3396 | |||
| 3280 | /* We use a default limit of 1000. See bug#59426 for the | 3397 | /* We use a default limit of 1000. See bug#59426 for the |
| 3281 | discussion. */ | 3398 | discussion. */ |
| 3282 | ptrdiff_t the_limit = treesit_recursion_limit; | 3399 | ptrdiff_t the_limit = treesit_recursion_limit; |
| @@ -3344,11 +3461,13 @@ always traverse leaf nodes first, then upwards. */) | |||
| 3344 | Lisp_Object all) | 3461 | Lisp_Object all) |
| 3345 | { | 3462 | { |
| 3346 | CHECK_TS_NODE (start); | 3463 | CHECK_TS_NODE (start); |
| 3347 | CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate), | ||
| 3348 | list3 (Qor, Qstringp, Qfunctionp), predicate); | ||
| 3349 | CHECK_SYMBOL (all); | 3464 | CHECK_SYMBOL (all); |
| 3350 | CHECK_SYMBOL (backward); | 3465 | CHECK_SYMBOL (backward); |
| 3351 | 3466 | ||
| 3467 | Lisp_Object signal_data = Qnil; | ||
| 3468 | if (!treesit_traverse_validate_predicate (predicate, &signal_data)) | ||
| 3469 | xsignal1 (Qtreesit_invalid_predicate, signal_data); | ||
| 3470 | |||
| 3352 | treesit_initialize (); | 3471 | treesit_initialize (); |
| 3353 | 3472 | ||
| 3354 | Lisp_Object parser = XTS_NODE (start)->parser; | 3473 | Lisp_Object parser = XTS_NODE (start)->parser; |
| @@ -3376,7 +3495,9 @@ always traverse leaf nodes first, then upwards. */) | |||
| 3376 | /* Recursively traverse the tree under CURSOR, and append the result | 3495 | /* Recursively traverse the tree under CURSOR, and append the result |
| 3377 | subtree to PARENT's cdr. See more in Ftreesit_induce_sparse_tree. | 3496 | subtree to PARENT's cdr. See more in Ftreesit_induce_sparse_tree. |
| 3378 | Note that the top-level children list is reversed, because | 3497 | Note that the top-level children list is reversed, because |
| 3379 | reasons. */ | 3498 | reasons. |
| 3499 | |||
| 3500 | This function may signal if the predicate function signals. */ | ||
| 3380 | static void | 3501 | static void |
| 3381 | treesit_build_sparse_tree (TSTreeCursor *cursor, Lisp_Object parent, | 3502 | treesit_build_sparse_tree (TSTreeCursor *cursor, Lisp_Object parent, |
| 3382 | Lisp_Object pred, Lisp_Object process_fn, | 3503 | Lisp_Object pred, Lisp_Object process_fn, |
| @@ -3462,8 +3583,10 @@ a regexp. */) | |||
| 3462 | Lisp_Object depth) | 3583 | Lisp_Object depth) |
| 3463 | { | 3584 | { |
| 3464 | CHECK_TS_NODE (root); | 3585 | CHECK_TS_NODE (root); |
| 3465 | CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate), | 3586 | |
| 3466 | list3 (Qor, Qstringp, Qfunctionp), predicate); | 3587 | Lisp_Object signal_data = Qnil; |
| 3588 | if (!treesit_traverse_validate_predicate (predicate, &signal_data)) | ||
| 3589 | xsignal1 (Qtreesit_invalid_predicate, signal_data); | ||
| 3467 | 3590 | ||
| 3468 | if (!NILP (process_fn)) | 3591 | if (!NILP (process_fn)) |
| 3469 | CHECK_TYPE (FUNCTIONP (process_fn), Qfunctionp, process_fn); | 3592 | CHECK_TYPE (FUNCTIONP (process_fn), Qfunctionp, process_fn); |
| @@ -3595,6 +3718,7 @@ syms_of_treesit (void) | |||
| 3595 | DEFSYM (Qoutdated, "outdated"); | 3718 | DEFSYM (Qoutdated, "outdated"); |
| 3596 | DEFSYM (Qhas_error, "has-error"); | 3719 | DEFSYM (Qhas_error, "has-error"); |
| 3597 | DEFSYM (Qlive, "live"); | 3720 | DEFSYM (Qlive, "live"); |
| 3721 | DEFSYM (Qnot, "not"); | ||
| 3598 | 3722 | ||
| 3599 | DEFSYM (QCanchor, ":anchor"); | 3723 | DEFSYM (QCanchor, ":anchor"); |
| 3600 | DEFSYM (QCequal, ":equal"); | 3724 | DEFSYM (QCequal, ":equal"); |
| @@ -3619,6 +3743,7 @@ syms_of_treesit (void) | |||
| 3619 | "user-emacs-directory"); | 3743 | "user-emacs-directory"); |
| 3620 | DEFSYM (Qtreesit_parser_deleted, "treesit-parser-deleted"); | 3744 | DEFSYM (Qtreesit_parser_deleted, "treesit-parser-deleted"); |
| 3621 | DEFSYM (Qtreesit_pattern_expand, "treesit-pattern-expand"); | 3745 | DEFSYM (Qtreesit_pattern_expand, "treesit-pattern-expand"); |
| 3746 | DEFSYM (Qtreesit_invalid_predicate, "treesit-invalid-predicate"); | ||
| 3622 | 3747 | ||
| 3623 | DEFSYM (Qor, "or"); | 3748 | DEFSYM (Qor, "or"); |
| 3624 | 3749 | ||
| @@ -3646,6 +3771,9 @@ syms_of_treesit (void) | |||
| 3646 | define_error (Qtreesit_parser_deleted, | 3771 | define_error (Qtreesit_parser_deleted, |
| 3647 | "This parser is deleted and cannot be used", | 3772 | "This parser is deleted and cannot be used", |
| 3648 | Qtreesit_error); | 3773 | Qtreesit_error); |
| 3774 | define_error (Qtreesit_invalid_predicate, | ||
| 3775 | "Invalid predicate, see TODO for valid forms for a predicate", | ||
| 3776 | Qtreesit_error); | ||
| 3649 | 3777 | ||
| 3650 | DEFVAR_LISP ("treesit-load-name-override-list", | 3778 | DEFVAR_LISP ("treesit-load-name-override-list", |
| 3651 | Vtreesit_load_name_override_list, | 3779 | Vtreesit_load_name_override_list, |