aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYuan Fu2025-02-27 17:18:28 -0800
committerYuan Fu2025-02-27 17:22:04 -0800
commit8a3e19f4b39be68c22e056d56adb86397e25a673 (patch)
tree39bebe021eee23d4bdcf14ff4b4fbce17737b020
parent8a45c2da226e188420956fd6269f72db3f437e38 (diff)
downloademacs-8a3e19f4b39be68c22e056d56adb86397e25a673.tar.gz
emacs-8a3e19f4b39be68c22e056d56adb86397e25a673.zip
Support alternative range function for tree-sitter range settings
Some embedded parser needs to exclude child nodes from the range, like markdown-inline. So I added this keyword that allows users to customize the range for the embedded parser. This can also be potentially useful for markdown comments in rust, for example, because we want to exclude the comment starters (//) from the embedded markdown parser. * lisp/treesit.el (treesit-query-range): (treesit-query-range-by-language): Add new parameter RANGE-FN. (treesit-range-settings): Add new field RANGE-FN. (treesit-range-rules): Add new keyword RANGE-FN. (treesit-range-fn-exclude-children): New function. (treesit--update-ranges-non-local): (treesit--update-ranges-local): (treesit--update-range-1): Support the RANGE-FN field.
-rw-r--r--lisp/treesit.el130
1 files changed, 97 insertions, 33 deletions
diff --git a/lisp/treesit.el b/lisp/treesit.el
index 8374ce936de..e05e3526c15 100644
--- a/lisp/treesit.el
+++ b/lisp/treesit.el
@@ -548,7 +548,7 @@ See `treesit-query-capture' for QUERY."
548 (treesit-parser-root-node parser) 548 (treesit-parser-root-node parser)
549 query)))) 549 query))))
550 550
551(defun treesit-query-range (node query &optional beg end offset) 551(defun treesit-query-range (node query &optional beg end offset range-fn)
552 "Query the current buffer and return ranges of captured nodes. 552 "Query the current buffer and return ranges of captured nodes.
553 553
554QUERY, NODE, BEG, END are the same as in `treesit-query-capture'. 554QUERY, NODE, BEG, END are the same as in `treesit-query-capture'.
@@ -557,7 +557,10 @@ END specifics the range of each captured node. OFFSET is an
557optional pair of numbers (START-OFFSET . END-OFFSET). The 557optional pair of numbers (START-OFFSET . END-OFFSET). The
558respective offset values are added to each (START . END) range 558respective offset values are added to each (START . END) range
559being returned. Capture names generally don't matter, but names 559being returned. Capture names generally don't matter, but names
560that starts with an underscore are ignored." 560that starts with an underscore are ignored.
561
562RANGE-FN, if non-nil, is a function that takes a node and OFFSET, and
563returns the ranges to use for that node."
561 (let ((offset-left (or (car offset) 0)) 564 (let ((offset-left (or (car offset) 0))
562 (offset-right (or (cdr offset) 0))) 565 (offset-right (or (cdr offset) 0)))
563 (cl-loop for capture 566 (cl-loop for capture
@@ -565,11 +568,14 @@ that starts with an underscore are ignored."
565 for name = (car capture) 568 for name = (car capture)
566 for node = (cdr capture) 569 for node = (cdr capture)
567 if (not (string-prefix-p "_" (symbol-name name))) 570 if (not (string-prefix-p "_" (symbol-name name)))
568 collect (cons (+ (treesit-node-start node) offset-left) 571 append
569 (+ (treesit-node-end node) offset-right))))) 572 (if range-fn
573 (funcall range-fn node offset)
574 (list (cons (+ (treesit-node-start node) offset-left)
575 (+ (treesit-node-end node) offset-right)))))))
570 576
571(defun treesit-query-range-by-language 577(defun treesit-query-range-by-language
572 (node query language-fn &optional beg end offset) 578 (node query language-fn &optional beg end offset range-fn)
573 "Like `treesit-query-range', but return multiple ranges by language. 579 "Like `treesit-query-range', but return multiple ranges by language.
574 580
575Return an alist of the form ((LANGUAGE . RANGES) ...), containing 581Return an alist of the form ((LANGUAGE . RANGES) ...), containing
@@ -579,6 +585,9 @@ Query NODE with QUERY, the captured nodes generates ranges. Nodes
579captured by the `@language' capture name are converted to language 585captured by the `@language' capture name are converted to language
580symbols with LANGUAGE-FN. 586symbols with LANGUAGE-FN.
581 587
588RANGE-FN, if non-nil, is a function that takes a node and OFFSET, and
589returns the ranges to use for that node.
590
582BEG, END, OFFSET are the same as in `treesit-query-range'." 591BEG, END, OFFSET are the same as in `treesit-query-range'."
583 (let ((offset-left (or (car offset) 0)) 592 (let ((offset-left (or (car offset) 0))
584 (offset-right (or (cdr offset) 0)) 593 (offset-right (or (cdr offset) 0))
@@ -591,12 +600,14 @@ BEG, END, OFFSET are the same as in `treesit-query-range'."
591 (node (cdr capture))) 600 (node (cdr capture)))
592 (when (and (not (equal (symbol-name name) "language")) 601 (when (and (not (equal (symbol-name name) "language"))
593 (not (string-prefix-p "_" (symbol-name name)))) 602 (not (string-prefix-p "_" (symbol-name name))))
594 (push (cons (+ (treesit-node-start node) offset-left) 603 (push (if range-fn
595 (+ (treesit-node-end node) offset-right)) 604 (funcall range-fn node offset)
605 (list (cons (+ (treesit-node-start node) offset-left)
606 (+ (treesit-node-end node) offset-right))))
596 (alist-get lang ranges-by-language))))))) 607 (alist-get lang ranges-by-language)))))))
597 (mapcar (lambda (entry) 608 (mapcar (lambda (entry)
598 (cons (car entry) 609 (cons (car entry)
599 (nreverse (cdr entry)))) 610 (apply #'append (nreverse (cdr entry)))))
600 ranges-by-language))) 611 ranges-by-language)))
601 612
602(defun treesit-query-valid-p (language query) 613(defun treesit-query-valid-p (language query)
@@ -621,15 +632,23 @@ If none are valid, return nil."
621(defvar-local treesit-range-settings nil 632(defvar-local treesit-range-settings nil
622 "A list of range settings. 633 "A list of range settings.
623 634
624Each element of the list is of the form (QUERY LANGUAGE LOCAL-P 635Each element of the list is of the form
625OFFSET). When updating the range of each parser in the buffer, 636
626`treesit-update-ranges' queries each QUERY, and sets LANGUAGE's 637 (QUERY LANGUAGE LOCAL-P OFFSET RANGE-FN)
627range to the range spanned by captured nodes. QUERY must be a 638
628compiled query. If LOCAL-P is t, give each range a separate 639When updating the range of each parser in the buffer,
629local parser rather than using a single parser for all the 640`treesit-update-ranges' queries each QUERY, and sets LANGUAGE's range to
630ranges. If OFFSET is non-nil, it should be a cons of 641the range spanned by captured nodes. QUERY must be a compiled query.
631numbers (START-OFFSET . END-OFFSET), where the start and end 642If LOCAL-P is t, give each range a separate local parser rather than
632offset are added to each queried range to get the result ranges. 643using a single parser for all the ranges. If OFFSET is non-nil, it
644should be a cons of numbers (START-OFFSET . END-OFFSET), where the start
645and end offset are added to each queried range to get the result ranges.
646
647If RANGE-FN is non-nil, it should be a function, Emacs uses this
648function to compute the ranges to use for the embedded parser. The
649function is passed the captured node and OFFSET, and should return a
650list of ranges, where each range is a cons of the start and end
651position.
633 652
634Capture names generally don't matter, but names that starts with 653Capture names generally don't matter, but names that starts with
635an underscore are ignored. 654an underscore are ignored.
@@ -688,13 +707,19 @@ be (3 . 7). This can be used to exclude things like surrounding
688delimiters from being included in the range covered by an 707delimiters from being included in the range covered by an
689embedded parser. 708embedded parser.
690 709
710If there's a `:range-fn' keyword with a function, Emacs uses that
711function to compute the ranges to use for the embedded parser. The
712function is passed the captured node and the offset given by the
713`:offset' keyword, and should return a list of ranges, where each range
714is a cons of the start and end position.
715
691QUERY can also be a function that takes two arguments, START and 716QUERY can also be a function that takes two arguments, START and
692END. If QUERY is a function, it doesn't need the :KEYWORD VALUE 717END. If QUERY is a function, it doesn't need the :KEYWORD VALUE
693pair preceding it. This function should set the ranges for 718pair preceding it. This function should set the ranges for
694parsers in the current buffer in the region between START and 719parsers in the current buffer in the region between START and
695END. It is OK for this function to set ranges in a larger region 720END. It is OK for this function to set ranges in a larger region
696that encompasses the region between START and END." 721that encompasses the region between START and END."
697 (let (host embed offset result local) 722 (let (host embed offset result local range-fn)
698 (while query-specs 723 (while query-specs
699 (pcase (pop query-specs) 724 (pcase (pop query-specs)
700 (:local (when (eq t (pop query-specs)) 725 (:local (when (eq t (pop query-specs))
@@ -713,6 +738,10 @@ that encompasses the region between START and END."
713 (numberp (cdr range-offset))) 738 (numberp (cdr range-offset)))
714 (signal 'treesit-error (list "Value of :offset option should be a pair of numbers" range-offset))) 739 (signal 'treesit-error (list "Value of :offset option should be a pair of numbers" range-offset)))
715 (setq offset range-offset))) 740 (setq offset range-offset)))
741 (:range-fn (let ((range-fn (pop query-specs)))
742 (unless (functionp range-fn)
743 (signal 'treesit-error (list "Value of :range-fn option should be a function" range-fn)))
744 (setq range-fn range-fn)))
716 (query (if (functionp query) 745 (query (if (functionp query)
717 (push (list query nil nil) result) 746 (push (list query nil nil) result)
718 (when (null embed) 747 (when (null embed)
@@ -720,11 +749,29 @@ that encompasses the region between START and END."
720 (when (null host) 749 (when (null host)
721 (signal 'treesit-error (list "Value of :host option cannot be omitted"))) 750 (signal 'treesit-error (list "Value of :host option cannot be omitted")))
722 (push (list (treesit-query-compile host query) 751 (push (list (treesit-query-compile host query)
723 embed local offset) 752 embed local offset range-fn)
724 result)) 753 result))
725 (setq host nil embed nil offset nil local nil)))) 754 (setq host nil embed nil offset nil local nil range-fn nil))))
726 (nreverse result))) 755 (nreverse result)))
727 756
757(defun treesit-range-fn-exclude-children (node offset)
758 "Return ranges spanned by NODE but excluding its children.
759
760OFFSET is added to the start and end of the overall range.
761
762This can be used as a `:range-fn' in `treesit-range-rules'."
763 (let* ((start (+ (treesit-node-start node) (or (car offset) 0)))
764 (end (+ (treesit-node-end node) (or (cdr offset) 0)))
765 (prev-end start)
766 (ranges nil))
767 (dolist (child (treesit-node-children node))
768 (let ((child-start (treesit-node-start child))
769 (child-end (treesit-node-end child)))
770 (push (cons prev-end child-start) ranges)
771 (setq prev-end child-end)))
772 (push (cons prev-end end) ranges)
773 (nreverse ranges)))
774
728(defun treesit--merge-ranges (old-ranges new-ranges start end) 775(defun treesit--merge-ranges (old-ranges new-ranges start end)
729 "Merge OLD-RANGES and NEW-RANGES, discarding ranges between START and END. 776 "Merge OLD-RANGES and NEW-RANGES, discarding ranges between START and END.
730OLD-RANGES and NEW-RANGES are lists of cons of the form (BEG . END). 777OLD-RANGES and NEW-RANGES are lists of cons of the form (BEG . END).
@@ -861,7 +908,7 @@ level."
861 908
862(defun treesit--update-ranges-non-local 909(defun treesit--update-ranges-non-local
863 ( host-parser query embed-lang embed-level 910 ( host-parser query embed-lang embed-level
864 &optional beg end offset) 911 &optional beg end offset range-fn)
865 "Update range for non-local parsers between BEG and END under HOST-PARSER. 912 "Update range for non-local parsers between BEG and END under HOST-PARSER.
866 913
867OFFSET is a cons (OFFSET-START . OFFSET-END), the start and end will be 914OFFSET is a cons (OFFSET-START . OFFSET-END), the start and end will be
@@ -878,22 +925,31 @@ updated. When looking for existing local parsers, only look for parsers
878of this level; when creating new local parsers, set their level to this 925of this level; when creating new local parsers, set their level to this
879level. 926level.
880 927
881Return updated parsers." 928RANGE-FN, if non-nil, is a function that takes a node and OFFSET, and
929returns the ranges to use for that node.
930
931Return updated parsers in a list."
882 (let ((ranges-by-lang 932 (let ((ranges-by-lang
883 (if (functionp embed-lang) 933 (if (functionp embed-lang)
884 (treesit-query-range-by-language 934 (treesit-query-range-by-language
885 host-parser query embed-lang beg end offset) 935 host-parser query embed-lang beg end offset range-fn)
886 (list (cons embed-lang 936 (list (cons embed-lang
887 (treesit-query-range 937 (treesit-query-range
888 host-parser query beg end offset))))) 938 host-parser query beg end offset range-fn)))))
889 (touched-parsers nil)) 939 (touched-parsers nil))
890 (dolist (lang-and-ranges ranges-by-lang) 940 (dolist (lang-and-ranges ranges-by-lang)
891 (let* ((resolved-embed-lang (car lang-and-ranges)) 941 (let* ((resolved-embed-lang (car lang-and-ranges))
892 (new-ranges (cdr lang-and-ranges)) 942 (new-ranges (cdr lang-and-ranges))
893 (embed-parser 943 (embed-parser
894 (car (treesit--parser-at-level 944 ;; Prefer embed parser with the right level, but if none
895 (treesit-parser-list nil resolved-embed-lang) 945 ;; exists, ones that doesn't have a embed level are ok
896 embed-level 'include-null)))) 946 ;; too.
947 (or (car (treesit--parser-at-level
948 (treesit-parser-list nil resolved-embed-lang)
949 embed-level))
950 (car (treesit--parser-at-level
951 (treesit-parser-list nil resolved-embed-lang)
952 embed-level 'include-null)))))
897 (when embed-parser 953 (when embed-parser
898 (let* ((old-ranges (treesit-parser-included-ranges 954 (let* ((old-ranges (treesit-parser-included-ranges
899 embed-parser)) 955 embed-parser))
@@ -917,7 +973,7 @@ Return updated parsers."
917 973
918(defun treesit--update-ranges-local 974(defun treesit--update-ranges-local
919 ( host-parser query embedded-lang modified-tick embed-level 975 ( host-parser query embedded-lang modified-tick embed-level
920 &optional beg end) 976 &optional beg end range-fn)
921 "Update range for local parsers between BEG and END under HOST-PARSER. 977 "Update range for local parsers between BEG and END under HOST-PARSER.
922Use QUERY to get the ranges, and make sure each range has a local 978Use QUERY to get the ranges, and make sure each range has a local
923parser for EMBEDDED-LANG. HOST-PARSER and QUERY must match. 979parser for EMBEDDED-LANG. HOST-PARSER and QUERY must match.
@@ -936,14 +992,21 @@ updated. When looking for existing local parsers, only look for parsers
936of this level; when creating new local parsers, set their level to this 992of this level; when creating new local parsers, set their level to this
937level. 993level.
938 994
995OFFSET is a cons of start and end offsets that are applied to the range
996for the local parser.
997
998RANGE-FN, if non-nil, is a function that takes a node and OFFSET, and
999returns the ranges to use for that node.
1000
939Return the created local parsers in a list." 1001Return the created local parsers in a list."
940 ;; Update range. 1002 ;; Update range.
941 (let ((ranges-by-lang 1003 (let ((ranges-by-lang
942 (if (functionp embedded-lang) 1004 (if (functionp embedded-lang)
943 (treesit-query-range-by-language 1005 (treesit-query-range-by-language
944 host-parser query embedded-lang beg end) 1006 host-parser query embedded-lang beg end range-fn)
945 (list (cons embedded-lang 1007 (list (cons embedded-lang
946 (treesit-query-range host-parser query beg end))))) 1008 (treesit-query-range
1009 host-parser query beg end range-fn)))))
947 (touched-parsers nil)) 1010 (touched-parsers nil))
948 (dolist (lang-and-range ranges-by-lang) 1011 (dolist (lang-and-range ranges-by-lang)
949 (let ((embedded-lang (car lang-and-range)) 1012 (let ((embedded-lang (car lang-and-range))
@@ -1001,7 +1064,8 @@ this level."
1001 (query-lang (treesit-query-language query)) 1064 (query-lang (treesit-query-language query))
1002 (embed-lang (nth 1 setting)) 1065 (embed-lang (nth 1 setting))
1003 (local (nth 2 setting)) 1066 (local (nth 2 setting))
1004 (offset (nth 3 setting))) 1067 (offset (nth 3 setting))
1068 (range-fn (nth 4 setting)))
1005 (when (eq query-lang (treesit-parser-language host-parser)) 1069 (when (eq query-lang (treesit-parser-language host-parser))
1006 (cond 1070 (cond
1007 ((functionp query) (funcall query beg end)) 1071 ((functionp query) (funcall query beg end))
@@ -1010,7 +1074,7 @@ this level."
1010 (append touched-parsers 1074 (append touched-parsers
1011 (treesit--update-ranges-local 1075 (treesit--update-ranges-local
1012 host-parser query embed-lang modified-tick 1076 host-parser query embed-lang modified-tick
1013 embed-level beg end)))) 1077 embed-level beg end range-fn))))
1014 ;; When updating ranges, we want to avoid querying the whole 1078 ;; When updating ranges, we want to avoid querying the whole
1015 ;; buffer which could be slow in very large buffers. 1079 ;; buffer which could be slow in very large buffers.
1016 ;; Instead, we only query for nodes that intersect with the 1080 ;; Instead, we only query for nodes that intersect with the
@@ -1021,7 +1085,7 @@ this level."
1021 (append touched-parsers 1085 (append touched-parsers
1022 (treesit--update-ranges-non-local 1086 (treesit--update-ranges-non-local
1023 host-parser query embed-lang embed-level 1087 host-parser query embed-lang embed-level
1024 beg end offset)))))))) 1088 beg end offset range-fn))))))))
1025 touched-parsers)) 1089 touched-parsers))
1026 1090
1027(defun treesit-update-ranges (&optional beg end) 1091(defun treesit-update-ranges (&optional beg end)