diff options
| author | Yuan Fu | 2025-02-27 03:07:34 -0800 |
|---|---|---|
| committer | Yuan Fu | 2025-02-27 17:22:03 -0800 |
| commit | 625b2b02a3c9bad6d7abf57ea7f95ece29855906 (patch) | |
| tree | 2a651ebd19f45a5666ad726a802c4e7965782e5f /src | |
| parent | 30e1508ef2d40e221736cea2c50c64941d7d2f0d (diff) | |
| download | emacs-625b2b02a3c9bad6d7abf57ea7f95ece29855906.tar.gz emacs-625b2b02a3c9bad6d7abf57ea7f95ece29855906.zip | |
Enable treesit-query-capture to return grouped captures
This is needed for creating embedded parsers for embedded code
blocks of which language cannot be known ahead of time. E.g.,
markdown and org mode's code block.
* src/treesit.c (Ftreesit_query_capture): Add parameter GROUPED.
Diffstat (limited to 'src')
| -rw-r--r-- | src/treesit.c | 52 |
1 files changed, 43 insertions, 9 deletions
diff --git a/src/treesit.c b/src/treesit.c index 16308193bf5..c8af17a5b8b 100644 --- a/src/treesit.c +++ b/src/treesit.c | |||
| @@ -3272,7 +3272,7 @@ treesit_initialize_query (Lisp_Object query, const TSLanguage *lang, | |||
| 3272 | 3272 | ||
| 3273 | DEFUN ("treesit-query-capture", | 3273 | DEFUN ("treesit-query-capture", |
| 3274 | Ftreesit_query_capture, | 3274 | Ftreesit_query_capture, |
| 3275 | Streesit_query_capture, 2, 5, 0, | 3275 | Streesit_query_capture, 2, 6, 0, |
| 3276 | doc: /* Query NODE with patterns in QUERY. | 3276 | doc: /* Query NODE with patterns in QUERY. |
| 3277 | 3277 | ||
| 3278 | Return a list of (CAPTURE_NAME . NODE). CAPTURE_NAME is the name | 3278 | Return a list of (CAPTURE_NAME . NODE). CAPTURE_NAME is the name |
| @@ -3289,7 +3289,11 @@ in which the query is executed. Any matching node whose span overlaps | |||
| 3289 | with the region between BEG and END are captured, it doesn't have to | 3289 | with the region between BEG and END are captured, it doesn't have to |
| 3290 | be completely in the region. | 3290 | be completely in the region. |
| 3291 | 3291 | ||
| 3292 | If NODE-ONLY is non-nil, return a list of nodes. | 3292 | If GROUPED is non-nil, group captures into matches and return a list of |
| 3293 | MATCH, where each MATH is a list of (CAPTURE_NAME . NODE). | ||
| 3294 | |||
| 3295 | If NODE-ONLY is non-nil, return nodes only, and don't include | ||
| 3296 | CAPTURE_NAME. | ||
| 3293 | 3297 | ||
| 3294 | Besides a node, NODE can be a parser, in which case the root node of | 3298 | Besides a node, NODE can be a parser, in which case the root node of |
| 3295 | that parser is used. NODE can also be a language symbol, in which case | 3299 | that parser is used. NODE can also be a language symbol, in which case |
| @@ -3300,7 +3304,8 @@ Signal `treesit-query-error' if QUERY is malformed or something else | |||
| 3300 | goes wrong. You can use `treesit-query-validate' to validate and debug | 3304 | goes wrong. You can use `treesit-query-validate' to validate and debug |
| 3301 | the query. */) | 3305 | the query. */) |
| 3302 | (Lisp_Object node, Lisp_Object query, | 3306 | (Lisp_Object node, Lisp_Object query, |
| 3303 | Lisp_Object beg, Lisp_Object end, Lisp_Object node_only) | 3307 | Lisp_Object beg, Lisp_Object end, Lisp_Object node_only, |
| 3308 | Lisp_Object grouped) | ||
| 3304 | { | 3309 | { |
| 3305 | if (!(TS_COMPILED_QUERY_P (query) | 3310 | if (!(TS_COMPILED_QUERY_P (query) |
| 3306 | || CONSP (query) || STRINGP (query))) | 3311 | || CONSP (query) || STRINGP (query))) |
| @@ -3385,8 +3390,22 @@ the query. */) | |||
| 3385 | 3390 | ||
| 3386 | while (ts_query_cursor_next_match (cursor, &match)) | 3391 | while (ts_query_cursor_next_match (cursor, &match)) |
| 3387 | { | 3392 | { |
| 3388 | /* Record the checkpoint that we may roll back to. */ | 3393 | /* Depends on the value of GROUPED, we have two modes of |
| 3394 | operation. | ||
| 3395 | |||
| 3396 | If GROUPED is nil (mode 1), we return a list of captures; in | ||
| 3397 | this case, we append the captures first, and revert back if the | ||
| 3398 | captures don't match. | ||
| 3399 | |||
| 3400 | If GROUPED is non-nil (mode 2), we return a list of match | ||
| 3401 | groups; in this case, we collect captures into a list first, | ||
| 3402 | and append to the results after verifying that the group | ||
| 3403 | matches. */ | ||
| 3404 | |||
| 3405 | /* Mode 1: Record the checkpoint that we may roll back to. */ | ||
| 3389 | prev_result = result; | 3406 | prev_result = result; |
| 3407 | /* Mode 2: Create a list storing captures of this match group. */ | ||
| 3408 | Lisp_Object match_group = Qnil; | ||
| 3390 | /* 1. Get captured nodes. */ | 3409 | /* 1. Get captured nodes. */ |
| 3391 | const TSQueryCapture *captures = match.captures; | 3410 | const TSQueryCapture *captures = match.captures; |
| 3392 | for (int idx = 0; idx < match.capture_count; idx++) | 3411 | for (int idx = 0; idx < match.capture_count; idx++) |
| @@ -3408,7 +3427,10 @@ the query. */) | |||
| 3408 | else | 3427 | else |
| 3409 | cap = captured_node; | 3428 | cap = captured_node; |
| 3410 | 3429 | ||
| 3411 | result = Fcons (cap, result); | 3430 | if (NILP (grouped)) |
| 3431 | result = Fcons (cap, result); /* Mode 1. */ | ||
| 3432 | else | ||
| 3433 | match_group = Fcons (cap, match_group); /* Mode 2. */ | ||
| 3412 | } | 3434 | } |
| 3413 | /* 2. Get predicates and check whether this match can be | 3435 | /* 2. Get predicates and check whether this match can be |
| 3414 | included in the result list. */ | 3436 | included in the result list. */ |
| @@ -3421,15 +3443,27 @@ the query. */) | |||
| 3421 | } | 3443 | } |
| 3422 | 3444 | ||
| 3423 | /* captures_lisp = Fnreverse (captures_lisp); */ | 3445 | /* captures_lisp = Fnreverse (captures_lisp); */ |
| 3446 | /* Mode 1. */ | ||
| 3424 | struct capture_range captures_range = { result, prev_result }; | 3447 | struct capture_range captures_range = { result, prev_result }; |
| 3425 | bool match = treesit_eval_predicates (captures_range, predicates, | 3448 | /* Mode 2. */ |
| 3426 | &predicate_signal_data); | 3449 | if (!NILP (grouped)) |
| 3450 | { | ||
| 3451 | captures_range.start = match_group; | ||
| 3452 | captures_range.end = Qnil; | ||
| 3453 | } | ||
| 3454 | bool match | ||
| 3455 | = treesit_eval_predicates (captures_range, predicates, | ||
| 3456 | &predicate_signal_data); | ||
| 3457 | |||
| 3427 | if (!NILP (predicate_signal_data)) | 3458 | if (!NILP (predicate_signal_data)) |
| 3428 | break; | 3459 | break; |
| 3429 | 3460 | ||
| 3430 | /* Predicates didn't pass, roll back. */ | 3461 | /* Mode 1: Predicates didn't pass, roll back. */ |
| 3431 | if (!match) | 3462 | if (!match && NILP (grouped)) |
| 3432 | result = prev_result; | 3463 | result = prev_result; |
| 3464 | /* Mode 2: Predicates pass, add this match group. */ | ||
| 3465 | if (match && !NILP (grouped)) | ||
| 3466 | result = Fcons (Fnreverse (match_group), result); | ||
| 3433 | } | 3467 | } |
| 3434 | 3468 | ||
| 3435 | /* Final clean up. */ | 3469 | /* Final clean up. */ |