diff options
| author | João Távora | 2026-02-03 12:14:03 +0000 |
|---|---|---|
| committer | João Távora | 2026-02-05 15:17:42 +0000 |
| commit | afb422bb9840a4b24ed803fdd3546bc4ef2bcb4f (patch) | |
| tree | b90dd79c8c85aca68ab3eaa301f6b0757d9d3813 | |
| parent | 3ea1010a6b0a63e90896133deaba189f13d47436 (diff) | |
| download | emacs-feature/newflex.tar.gz emacs-feature/newflex.zip | |
Rewrite flex completion scoring with Gotoh algorithmfeature/newflex
The greedy regexp matching, broken scoring and broken highlight were
sources of frequent complaints about the 'flex' matching style. This
commit fixes that.
It was inspired by the 'hotfuzz' style available at
https://github.com/axelf4/hotfuzz which is a modified version of Gotoh's
1982 dynamic programming algorithm (see: GOTOH, Osamu. An improved
algorithm for matching biological sequences. Journal of molecular
biology, 1982, 162.3: 705-708.). That style is slightly more
sophisticated than 'flex' (has special rules for matching things at word
boundaries, a C module with multithreading support). It's almost (but not
entirely) void of hacks so it'd make a good candidate to replace 'flex'
entirely, but no progress has been made in getting it into Emacs's core
in over 2 years, so I thought I'd try my hand at it.
The new 'flex' implementation also uses Gotoh algorithm (apparently
a common choice for these kinds of task) and happens mostly in a new C
function. It is strictly more correct than the "old" flex. For
example, when matching the pattern 'goto' to, say, 'eglot--goto' and
'eglot--bol', no longer is the latter returned first, which was a
substantial annoyance. And of course the highlighting is also correctly
placed on the 'goto' not scattered across the candidate.
Regarding performance, it is faster than the naive 'flex', but that's
mainly because this commit also includes changes to the Elisp code which
make faster regexp's for the filtering step. It is slower than
'hotfuzz' when that style's C-module extension is leveraged. 'hotfuzz'
does the filtering and sorting steps together in C code and has
multithreaded workers there. The matching and scoring algorithm itself
is not the bottleneck.
Test code were refactored and more tests were added.
* src/minibuf.c (completion--flex-score-gotoh): New function.
* lisp/minibuffer.el (completion--flex-score): Rewrite.
(completion--flex-propertize): New function.
(completion-flex--pattern-str): New variable.
(flex-score-match-tightness): Make obsolete.
(completion-pcm--all-completions): Add optional override-re parameter.
(completion-pcm--hilit-commonality): No more re-based highlighting.
(completion-substring--all-completions): Add optional simple-re parameters.
(completion--flex-adjust-metadata): Tweak to new scoring API.
(completion-flex-try-completion, completion-flex-all-completions):
Pass simple-re parameter to completion-substring--all-completions.
(completion--hilit-from-re, completion--flex-score-1)
(completion--flex-score-last-md, completion-pcm--regexp): Delete.
* test/lisp/minibuffer-tests.el (completion--sorted-flex-completions):
New helper function.
(completion-flex-test-non-ascii): New test.
(completion--pcm-score): Delete.
(completion-pcm-test-3, completion-pcm-test-4)
(completion-substring-test-1, completion-substring-test-2)
(completion-flex-test-2, completion-flex-test-3): Remove old scoring
expectations.
| -rw-r--r-- | lisp/minibuffer.el | 260 | ||||
| -rw-r--r-- | src/minibuf.c | 197 | ||||
| -rw-r--r-- | test/lisp/minibuffer-tests.el | 95 |
3 files changed, 347 insertions, 205 deletions
diff --git a/lisp/minibuffer.el b/lisp/minibuffer.el index fc193fe54f0..5b5408a595c 100644 --- a/lisp/minibuffer.el +++ b/lisp/minibuffer.el | |||
| @@ -4421,9 +4421,12 @@ This is used in combination with `completion-pcm--segments->regex'." | |||
| 4421 | (setq idx i))) | 4421 | (setq idx i))) |
| 4422 | idx)) | 4422 | idx)) |
| 4423 | 4423 | ||
| 4424 | (defun completion-pcm--all-completions (prefix pattern table pred) | 4424 | (defun completion-pcm--all-completions (prefix pattern table pred |
| 4425 | &optional override-re) | ||
| 4425 | "Find all completions for PATTERN in TABLE obeying PRED. | 4426 | "Find all completions for PATTERN in TABLE obeying PRED. |
| 4426 | PATTERN is as returned by `completion-pcm--string->pattern'." | 4427 | PATTERN is as returned by `completion-pcm--string->pattern'. |
| 4428 | OVERRIDE-RE means to use this regular expression instead of grabbing one | ||
| 4429 | from PATTERN." | ||
| 4427 | ;; (cl-assert (= (car (completion-boundaries prefix table pred "")) | 4430 | ;; (cl-assert (= (car (completion-boundaries prefix table pred "")) |
| 4428 | ;; (length prefix))) | 4431 | ;; (length prefix))) |
| 4429 | ;; Find an initial list of possible completions. | 4432 | ;; Find an initial list of possible completions. |
| @@ -4435,7 +4438,7 @@ PATTERN is as returned by `completion-pcm--string->pattern'." | |||
| 4435 | ;; Use all-completions to do an initial cull. This is a big win, | 4438 | ;; Use all-completions to do an initial cull. This is a big win, |
| 4436 | ;; since all-completions is written in C! | 4439 | ;; since all-completions is written in C! |
| 4437 | (let* (;; Convert search pattern to a standard regular expression. | 4440 | (let* (;; Convert search pattern to a standard regular expression. |
| 4438 | (regex (completion-pcm--pattern->regex pattern)) | 4441 | (regex (or override-re (completion-pcm--pattern->regex pattern))) |
| 4439 | (case-fold-search completion-ignore-case) | 4442 | (case-fold-search completion-ignore-case) |
| 4440 | (completion-regexp-list (cons regex completion-regexp-list)) | 4443 | (completion-regexp-list (cons regex completion-regexp-list)) |
| 4441 | (compl (all-completions | 4444 | (compl (all-completions |
| @@ -4450,18 +4453,12 @@ PATTERN is as returned by `completion-pcm--string->pattern'." | |||
| 4450 | (when (string-match-p regex c) (push c poss))) | 4453 | (when (string-match-p regex c) (push c poss))) |
| 4451 | (nreverse poss)))))) | 4454 | (nreverse poss)))))) |
| 4452 | 4455 | ||
| 4453 | (defvar flex-score-match-tightness 3 | 4456 | (defvar flex-score-match-tightness nil) |
| 4454 | "Controls how the `flex' completion style scores its matches. | ||
| 4455 | 4457 | ||
| 4456 | Value is a positive number. A number smaller than 1 makes the | 4458 | (make-obsolete-variable |
| 4457 | scoring formula reward matches scattered along the string, while | 4459 | 'flex-score-match-tightness |
| 4458 | a number greater than one make the formula reward matches that | 4460 | "It never did anything very useful anyway." |
| 4459 | are clumped together. I.e \"foo\" matches both strings | 4461 | "31.0") |
| 4460 | \"fbarbazoo\" and \"fabrobazo\", which are of equal length, but | ||
| 4461 | only a value greater than one will score the former (which has | ||
| 4462 | one large \"hole\" and a clumped-together \"oo\" match) higher | ||
| 4463 | than the latter (which has two \"holes\" and three | ||
| 4464 | one-letter-long matches).") | ||
| 4465 | 4462 | ||
| 4466 | (defvar completion-lazy-hilit nil | 4463 | (defvar completion-lazy-hilit nil |
| 4467 | "If non-nil, request lazy highlighting of completion candidates. | 4464 | "If non-nil, request lazy highlighting of completion candidates. |
| @@ -4502,133 +4499,49 @@ details." | |||
| 4502 | (funcall completion-lazy-hilit-fn (copy-sequence str)) | 4499 | (funcall completion-lazy-hilit-fn (copy-sequence str)) |
| 4503 | str)) | 4500 | str)) |
| 4504 | 4501 | ||
| 4505 | (defun completion--hilit-from-re (string regexp &optional point-idx) | 4502 | (cl-defun completion--flex-score (pat str &optional dont-error) |
| 4506 | "Fontify STRING using REGEXP POINT-IDX. | 4503 | "Compute flex score of STR matching PAT using Gotoh algorithm. |
| 4507 | Uses `completions-common-part' and `completions-first-difference' | 4504 | If DONT-ERROR, return nil if PAT cannot match STR. |
| 4508 | faces to fontify STRING. | 4505 | Return (NORMALIZED-COST . MATCHES) where NORMALIZED-COST is a |
| 4509 | POINT-IDX is the position of point in the presumed \"PCM\" pattern | 4506 | number (lower = better) and MATCHES is a list of match positions in STR." |
| 4510 | from which REGEXP was generated." | 4507 | (pcase-let ((`(,cost . ,matches) |
| 4511 | (let* ((md (and regexp (string-match regexp string) (cddr (match-data t)))) | 4508 | (completion--flex-score-gotoh pat str))) |
| 4512 | (pos (if point-idx (match-beginning point-idx) (match-end 0))) | 4509 | (unless cost |
| 4513 | (me (and md (match-end 0))) | 4510 | (if dont-error (cl-return-from completion--flex-score nil) |
| 4514 | (from 0)) | 4511 | (error "Pattern %s does not match %s" pat str))) |
| 4515 | (while md | 4512 | (cons (* (1+ cost) (- (length str) (length pat))) matches))) |
| 4516 | (add-face-text-property from (pop md) | 4513 | |
| 4517 | 'completions-common-part nil string) | 4514 | (defun completion--flex-propertize (str matches point-idx segments) |
| 4518 | (setq from (pop md))) | 4515 | "Add completion faces to STR based on MATCHES and POINT-IDX. |
| 4519 | (if (and (numberp pos) (> (length string) pos)) | 4516 | MATCHES is a list of match positions. POINT-IDX is a match group index |
| 4520 | (add-face-text-property | 4517 | from the PCM pattern. SEGMENTS are extracted from the full PCM pattern. |
| 4521 | pos (1+ pos) | 4518 | Adds `completions-common-part' for matched positions and |
| 4522 | 'completions-first-difference | 4519 | `completions-first-difference' for the position corresponding to point." |
| 4523 | nil string)) | 4520 | (when point-idx |
| 4524 | (unless (or (not me) (= from me)) | 4521 | ;; Compute character position from segments |
| 4525 | (add-face-text-property from me 'completions-common-part nil string)) | 4522 | (let* ((pos (cl-loop for seg in segments |
| 4526 | string)) | 4523 | for i from 1 |
| 4527 | 4524 | while (<= i point-idx) | |
| 4528 | (defun completion--flex-score-1 (md-groups match-end len) | 4525 | sum (length (car seg))))) |
| 4529 | "Compute matching score of completion. | 4526 | ;; Add first-difference after pos-th match, if in range |
| 4530 | The score lies in the range between 0 and 1, where 1 corresponds to | 4527 | (let ((point-match (and (> pos 0) |
| 4531 | the full match. | 4528 | (<= pos (length matches)) |
| 4532 | MD-GROUPS is the \"group\" part of the match data. | 4529 | (nth (1- pos) matches)))) |
| 4533 | MATCH-END is the end of the match. | 4530 | (when (and point-match (< (1+ point-match) (length str))) |
| 4534 | LEN is the length of the completion string." | 4531 | (add-face-text-property |
| 4535 | (let* ((from 0) | 4532 | (1+ point-match) (+ 2 point-match) |
| 4536 | ;; To understand how this works, consider these simple | 4533 | 'completions-first-difference nil str))))) |
| 4537 | ;; ascii diagrams showing how the pattern "foo" | 4534 | ;; Highlight matched positions |
| 4538 | ;; flex-matches "fabrobazo", "fbarbazoo" and | 4535 | (dolist (pos matches) |
| 4539 | ;; "barfoobaz": | 4536 | (add-face-text-property pos (1+ pos) |
| 4540 | 4537 | 'completions-common-part | |
| 4541 | ;; f abr o baz o | 4538 | nil str)) |
| 4542 | ;; + --- + --- + | 4539 | str) |
| 4543 | 4540 | ||
| 4544 | ;; f barbaz oo | 4541 | (defvar completion-flex--pattern-str nil |
| 4545 | ;; + ------ ++ | 4542 | "Pattern string for flex completion scoring. |
| 4546 | 4543 | This is the concatenated string parts from the PCM pattern, | |
| 4547 | ;; bar foo baz | 4544 | used by `completion--flex-score' for Gotoh algorithm matching.") |
| 4548 | ;; +++ | ||
| 4549 | |||
| 4550 | ;; "+" indicates parts where the pattern matched. A | ||
| 4551 | ;; "hole" in the middle of the string is indicated by | ||
| 4552 | ;; "-". Note that there are no "holes" near the edges | ||
| 4553 | ;; of the string. The completion score is a number | ||
| 4554 | ;; bound by (0..1] (i.e., larger than (but not equal | ||
| 4555 | ;; to) zero, and smaller or equal to one): the higher | ||
| 4556 | ;; the better and only a perfect match (pattern equals | ||
| 4557 | ;; string) will have score 1. The formula takes the | ||
| 4558 | ;; form of a quotient. For the numerator, we use the | ||
| 4559 | ;; number of +, i.e. the length of the pattern. For | ||
| 4560 | ;; the denominator, it first computes | ||
| 4561 | ;; | ||
| 4562 | ;; hole_i_contrib = 1 + (Li-1)^(1/tightness) | ||
| 4563 | ;; | ||
| 4564 | ;; , for each hole "i" of length "Li", where tightness | ||
| 4565 | ;; is given by `flex-score-match-tightness'. The | ||
| 4566 | ;; final value for the denominator is then given by: | ||
| 4567 | ;; | ||
| 4568 | ;; (SUM_across_i(hole_i_contrib) + 1) * len | ||
| 4569 | ;; | ||
| 4570 | ;; , where "len" is the string's length. | ||
| 4571 | (score-numerator 0) | ||
| 4572 | (score-denominator 0) | ||
| 4573 | (last-b 0)) | ||
| 4574 | (while (and md-groups (car md-groups)) | ||
| 4575 | (let ((a from) | ||
| 4576 | (b (pop md-groups))) | ||
| 4577 | (setq | ||
| 4578 | score-numerator (+ score-numerator (- b a))) | ||
| 4579 | (unless (or (= a last-b) | ||
| 4580 | (zerop last-b) | ||
| 4581 | (= a len)) | ||
| 4582 | (setq | ||
| 4583 | score-denominator (+ score-denominator | ||
| 4584 | 1 | ||
| 4585 | (expt (- a last-b 1) | ||
| 4586 | (/ 1.0 | ||
| 4587 | flex-score-match-tightness))))) | ||
| 4588 | (setq | ||
| 4589 | last-b b)) | ||
| 4590 | (setq from (pop md-groups))) | ||
| 4591 | ;; If `pattern' doesn't have an explicit trailing any, the | ||
| 4592 | ;; regex `re' won't produce match data representing the | ||
| 4593 | ;; region after the match. We need to account to account | ||
| 4594 | ;; for that extra bit of match (bug#42149). | ||
| 4595 | (unless (= from match-end) | ||
| 4596 | (let ((a from) | ||
| 4597 | (b match-end)) | ||
| 4598 | (setq | ||
| 4599 | score-numerator (+ score-numerator (- b a))) | ||
| 4600 | (unless (or (= a last-b) | ||
| 4601 | (zerop last-b) | ||
| 4602 | (= a len)) | ||
| 4603 | (setq | ||
| 4604 | score-denominator (+ score-denominator | ||
| 4605 | 1 | ||
| 4606 | (expt (- a last-b 1) | ||
| 4607 | (/ 1.0 | ||
| 4608 | flex-score-match-tightness))))) | ||
| 4609 | (setq | ||
| 4610 | last-b b))) | ||
| 4611 | (/ score-numerator (* len (1+ score-denominator)) 1.0))) | ||
| 4612 | |||
| 4613 | (defvar completion--flex-score-last-md nil | ||
| 4614 | "Helper variable for `completion--flex-score'.") | ||
| 4615 | |||
| 4616 | (defun completion--flex-score (str re &optional dont-error) | ||
| 4617 | "Compute flex score of completion STR based on RE. | ||
| 4618 | If DONT-ERROR, just return nil if RE doesn't match STR." | ||
| 4619 | (let ((case-fold-search completion-ignore-case)) | ||
| 4620 | (cond ((string-match re str) | ||
| 4621 | (let* ((match-end (match-end 0)) | ||
| 4622 | (md (cddr | ||
| 4623 | (setq | ||
| 4624 | completion--flex-score-last-md | ||
| 4625 | (match-data t completion--flex-score-last-md))))) | ||
| 4626 | (completion--flex-score-1 md match-end (length str)))) | ||
| 4627 | ((not dont-error) | ||
| 4628 | (error "Internal error: %s does not match %s" re str))))) | ||
| 4629 | |||
| 4630 | (defvar completion-pcm--regexp nil | ||
| 4631 | "Regexp from PCM pattern in `completion-pcm--hilit-commonality'.") | ||
| 4632 | 4545 | ||
| 4633 | (defun completion-pcm--hilit-commonality (pattern completions) | 4546 | (defun completion-pcm--hilit-commonality (pattern completions) |
| 4634 | "Show where and how well PATTERN matches COMPLETIONS. | 4547 | "Show where and how well PATTERN matches COMPLETIONS. |
| @@ -4645,22 +4558,37 @@ relevant segments. | |||
| 4645 | Else, if `completion-lazy-hilit' is t, return COMPLETIONS | 4558 | Else, if `completion-lazy-hilit' is t, return COMPLETIONS |
| 4646 | unchanged, but setup a suitable `completion-lazy-hilit-fn' (which | 4559 | unchanged, but setup a suitable `completion-lazy-hilit-fn' (which |
| 4647 | see) for later lazy highlighting." | 4560 | see) for later lazy highlighting." |
| 4648 | (setq completion-pcm--regexp nil | 4561 | (setq completion-lazy-hilit-fn nil |
| 4649 | completion-lazy-hilit-fn nil) | 4562 | completion-flex--pattern-str nil) |
| 4650 | (cond | 4563 | (cond |
| 4651 | ((and completions (cl-loop for e in pattern thereis (stringp e))) | 4564 | ((and completions (cl-loop for e in pattern thereis (stringp e))) |
| 4652 | (let* ((segments (completion-pcm--pattern->segments pattern)) | 4565 | (let* ((segments (completion-pcm--pattern->segments pattern)) |
| 4653 | (re (completion-pcm--segments->regex segments 'group)) | 4566 | (point-idx (completion-pcm--segments-point-idx segments)) |
| 4654 | (point-idx (completion-pcm--segments-point-idx segments))) | 4567 | ;; Extract pattern string (concatenate string elements) |
| 4655 | (setq completion-pcm--regexp re) | 4568 | (pat (mapconcat #'identity |
| 4569 | (delq nil (mapcar (lambda (x) | ||
| 4570 | (if (stringp x) x nil)) | ||
| 4571 | pattern)) | ||
| 4572 | ""))) | ||
| 4573 | (setq completion-flex--pattern-str pat) | ||
| 4656 | (cond (completion-lazy-hilit | 4574 | (cond (completion-lazy-hilit |
| 4657 | (setq completion-lazy-hilit-fn | 4575 | (setq completion-lazy-hilit-fn |
| 4658 | (lambda (str) (completion--hilit-from-re str re point-idx))) | 4576 | (lambda (str) |
| 4577 | (let ((result (completion--flex-score pat str t))) | ||
| 4578 | (when result | ||
| 4579 | (completion--flex-propertize | ||
| 4580 | str (cdr result) point-idx segments))) | ||
| 4581 | str)) | ||
| 4659 | completions) | 4582 | completions) |
| 4660 | (t | 4583 | (t |
| 4661 | (mapcar | 4584 | (mapcar |
| 4662 | (lambda (str) | 4585 | (lambda (str) |
| 4663 | (completion--hilit-from-re (copy-sequence str) re point-idx)) | 4586 | (setq str (copy-sequence str)) |
| 4587 | (let ((result (completion--flex-score pat str t))) | ||
| 4588 | (when result | ||
| 4589 | (completion--flex-propertize | ||
| 4590 | str (cdr result) point-idx segments))) | ||
| 4591 | str) | ||
| 4664 | completions))))) | 4592 | completions))))) |
| 4665 | (t completions))) | 4593 | (t completions))) |
| 4666 | 4594 | ||
| @@ -4959,11 +4887,13 @@ the same set of elements." | |||
| 4959 | ;; Mostly derived from the code of `basic' completion. | 4887 | ;; Mostly derived from the code of `basic' completion. |
| 4960 | 4888 | ||
| 4961 | (defun completion-substring--all-completions | 4889 | (defun completion-substring--all-completions |
| 4962 | (string table pred point &optional transform-pattern-fn) | 4890 | (string table pred point &optional |
| 4891 | transform-pattern-fn simple-re) | ||
| 4963 | "Match the presumed substring STRING to the entries in TABLE. | 4892 | "Match the presumed substring STRING to the entries in TABLE. |
| 4964 | Respect PRED and POINT. The pattern used is a PCM-style | 4893 | Respect PRED and POINT. The pattern used is a PCM-style substring |
| 4965 | substring pattern, but it be massaged by TRANSFORM-PATTERN-FN, if | 4894 | pattern, but it be massaged by TRANSFORM-PATTERN-FN, if that is non-nil. |
| 4966 | that is non-nil." | 4895 | SIMPLE-RE is means to pass a simpler faster regular expression to |
| 4896 | `completion-pcm--all-completions'" | ||
| 4967 | (let* ((beforepoint (substring string 0 point)) | 4897 | (let* ((beforepoint (substring string 0 point)) |
| 4968 | (afterpoint (substring string point)) | 4898 | (afterpoint (substring string point)) |
| 4969 | (bounds (completion-boundaries beforepoint table pred afterpoint)) | 4899 | (bounds (completion-boundaries beforepoint table pred afterpoint)) |
| @@ -4978,7 +4908,13 @@ that is non-nil." | |||
| 4978 | (if transform-pattern-fn | 4908 | (if transform-pattern-fn |
| 4979 | (funcall transform-pattern-fn pattern) | 4909 | (funcall transform-pattern-fn pattern) |
| 4980 | pattern))) | 4910 | pattern))) |
| 4981 | (all (completion-pcm--all-completions prefix pattern table pred))) | 4911 | (override-re (and simple-re |
| 4912 | (mapconcat #'identity | ||
| 4913 | (split-string | ||
| 4914 | (substring string (car bounds) | ||
| 4915 | (+ point (cdr bounds))) "" t) | ||
| 4916 | ".*"))) | ||
| 4917 | (all (completion-pcm--all-completions prefix pattern table pred override-re))) | ||
| 4982 | (list all pattern prefix suffix (car bounds)))) | 4918 | (list all pattern prefix suffix (car bounds)))) |
| 4983 | 4919 | ||
| 4984 | (defun completion-substring-try-completion (string table pred point) | 4920 | (defun completion-substring-try-completion (string table pred point) |
| @@ -5009,7 +4945,7 @@ that is non-nil." | |||
| 5009 | 4945 | ||
| 5010 | (defun completion--flex-adjust-metadata (metadata) | 4946 | (defun completion--flex-adjust-metadata (metadata) |
| 5011 | "If `flex' is actually doing filtering, adjust sorting." | 4947 | "If `flex' is actually doing filtering, adjust sorting." |
| 5012 | (let ((flex-is-filtering-p completion-pcm--regexp) | 4948 | (let ((flex-is-filtering-p completion-flex--pattern-str) |
| 5013 | (existing-dsf | 4949 | (existing-dsf |
| 5014 | (completion-metadata-get metadata 'display-sort-function)) | 4950 | (completion-metadata-get metadata 'display-sort-function)) |
| 5015 | (existing-csf | 4951 | (existing-csf |
| @@ -5021,11 +4957,11 @@ that is non-nil." | |||
| 5021 | (mapcar | 4957 | (mapcar |
| 5022 | (lambda (str) | 4958 | (lambda (str) |
| 5023 | (cons | 4959 | (cons |
| 5024 | (- (completion--flex-score | 4960 | (car (completion--flex-score |
| 5025 | (or (get-text-property | 4961 | completion-flex--pattern-str |
| 5026 | 0 'completion--unquoted str) | 4962 | (or (get-text-property |
| 5027 | str) | 4963 | 0 'completion--unquoted str) |
| 5028 | completion-pcm--regexp)) | 4964 | str))) |
| 5029 | str)) | 4965 | str)) |
| 5030 | (if existing-sort-fn | 4966 | (if existing-sort-fn |
| 5031 | (funcall existing-sort-fn completions) | 4967 | (funcall existing-sort-fn completions) |
| @@ -5067,7 +5003,8 @@ which is at the core of flex logic. The extra | |||
| 5067 | (pcase-let ((`(,all ,pattern ,prefix ,suffix ,_carbounds) | 5003 | (pcase-let ((`(,all ,pattern ,prefix ,suffix ,_carbounds) |
| 5068 | (completion-substring--all-completions | 5004 | (completion-substring--all-completions |
| 5069 | string table pred point | 5005 | string table pred point |
| 5070 | #'completion-flex--make-flex-pattern))) | 5006 | #'completion-flex--make-flex-pattern |
| 5007 | t))) | ||
| 5071 | (if minibuffer-completing-file-name | 5008 | (if minibuffer-completing-file-name |
| 5072 | (setq all (completion-pcm--filename-try-filter all))) | 5009 | (setq all (completion-pcm--filename-try-filter all))) |
| 5073 | ;; Try some "merging", meaning add as much as possible to the | 5010 | ;; Try some "merging", meaning add as much as possible to the |
| @@ -5084,7 +5021,8 @@ which is at the core of flex logic. The extra | |||
| 5084 | (pcase-let ((`(,all ,pattern ,prefix ,_suffix ,_carbounds) | 5021 | (pcase-let ((`(,all ,pattern ,prefix ,_suffix ,_carbounds) |
| 5085 | (completion-substring--all-completions | 5022 | (completion-substring--all-completions |
| 5086 | string table pred point | 5023 | string table pred point |
| 5087 | #'completion-flex--make-flex-pattern))) | 5024 | #'completion-flex--make-flex-pattern |
| 5025 | t))) | ||
| 5088 | (when all | 5026 | (when all |
| 5089 | (nconc (completion-pcm--hilit-commonality pattern all) | 5027 | (nconc (completion-pcm--hilit-commonality pattern all) |
| 5090 | (length prefix)))))) | 5028 | (length prefix)))))) |
diff --git a/src/minibuf.c b/src/minibuf.c index 5dc2b230883..f7dffc24b94 100644 --- a/src/minibuf.c +++ b/src/minibuf.c | |||
| @@ -20,6 +20,7 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */ | |||
| 20 | 20 | ||
| 21 | #include <config.h> | 21 | #include <config.h> |
| 22 | #include <errno.h> | 22 | #include <errno.h> |
| 23 | #include <math.h> | ||
| 23 | 24 | ||
| 24 | #include <binary-io.h> | 25 | #include <binary-io.h> |
| 25 | 26 | ||
| @@ -2279,6 +2280,201 @@ init_minibuf_once_for_pdumper (void) | |||
| 2279 | last_minibuf_string = Qnil; | 2280 | last_minibuf_string = Qnil; |
| 2280 | } | 2281 | } |
| 2281 | 2282 | ||
| 2283 | /* FLEX/GOTOH algorithm for the 'flex' completion-style. Adapted from | ||
| 2284 | GOTOH, Osamu. An improved algorithm for matching biological | ||
| 2285 | sequences. Journal of molecular biology, 1982, 162.3: 705-708. | ||
| 2286 | |||
| 2287 | This algorithm matches patterns to candidate strings, or needles to | ||
| 2288 | haystacks. It works with cost matrices: imagine rows of these | ||
| 2289 | matrices as pattern characters, and columns as the candidate string | ||
| 2290 | characters. There is a -1 row, and a -1 column. The values there | ||
| 2291 | hold real costs used for situations "before the first ever" match of | ||
| 2292 | a pattern character to a string character. | ||
| 2293 | |||
| 2294 | M and D are cost matrices. At the end of the algorithm, M will have | ||
| 2295 | non-infinite values only for the spots where a pattern character | ||
| 2296 | matches a string character. So a non-infinite M[i,j] means the i-th | ||
| 2297 | character of the pattern matches the j-th character of the string. | ||
| 2298 | The value stored is the lowest possible cost the algorithm had to | ||
| 2299 | "pay" to be able to make that match there, given everything that may | ||
| 2300 | have happened before/to the left. An infinite value simply means no | ||
| 2301 | match at this pattern/string position. Note that both row and column | ||
| 2302 | of M may have more than one match at multiple indices. But this | ||
| 2303 | particular implementation of the algorithm assumes they have at least | ||
| 2304 | one match. | ||
| 2305 | |||
| 2306 | D (originally stands for 'Deletion' in the Gotoh paper) has "running | ||
| 2307 | costs". Each value D[i,j] represents what the algorithm has to pay | ||
| 2308 | to make or extend a gap when a match is found at i+1, j+1. By that | ||
| 2309 | time, that cost may or may not be lower than continuing from a match | ||
| 2310 | that had also been found at i,j. We always pick the lowest cost, and | ||
| 2311 | by the time we reach the final column, we know we have picked the | ||
| 2312 | cheapest possible path choosing when to gap, and when to follow up. | ||
| 2313 | |||
| 2314 | Along the way, we construct P, a matrix used just for backtracking, | ||
| 2315 | to reconstruct that path. Maybe P isn't needed, and all the | ||
| 2316 | information can be cleverly derived from the final state of M and D. | ||
| 2317 | But I couldn't make it work. */ | ||
| 2318 | DEFUN ("completion--flex-score-gotoh", Fcompletion__flex_score_gotoh, | ||
| 2319 | Scompletion__flex_score_gotoh, 2, 2, 0, | ||
| 2320 | doc: /* Compute flex score of STR matching PAT using Gotoh | ||
| 2321 | algorithm. Return nil if no match found, else return (COST . MATCHES) | ||
| 2322 | where COST is a fixnum (lower is better) and MATCHES is a list of match | ||
| 2323 | positions in STR. */) | ||
| 2324 | (Lisp_Object pat, Lisp_Object str) | ||
| 2325 | { | ||
| 2326 | /* Pre-allocated matrices for flex completion scoring. */ | ||
| 2327 | #define FLEX_MAX_STR_SIZE 512 | ||
| 2328 | #define FLEX_MAX_PAT_SIZE 128 | ||
| 2329 | #define FLEX_MAX_MATRIX_SIZE FLEX_MAX_PAT_SIZE * FLEX_MAX_STR_SIZE | ||
| 2330 | /* Macro for 2D indexing into "flat" arrays. */ | ||
| 2331 | #define MAT(matrix, i, j) ((matrix)[((i) + 1) * width + ((j) + 1)]) | ||
| 2332 | |||
| 2333 | CHECK_STRING (pat); | ||
| 2334 | CHECK_STRING (str); | ||
| 2335 | |||
| 2336 | size_t patlen = SCHARS (pat); | ||
| 2337 | size_t strlen = SCHARS (str); | ||
| 2338 | size_t width = strlen + 1; | ||
| 2339 | size_t size = (patlen + 1) * width; | ||
| 2340 | |||
| 2341 | /* Bail if strings are empty or matrix too large. */ | ||
| 2342 | if (patlen == 0 || strlen == 0) | ||
| 2343 | return Qnil; | ||
| 2344 | |||
| 2345 | if (size > FLEX_MAX_MATRIX_SIZE) | ||
| 2346 | return Qnil; | ||
| 2347 | |||
| 2348 | /* Cost constants (lower is better). Maybe these could be | ||
| 2349 | defcustom's?*/ | ||
| 2350 | const int gap_open_cost = 10; | ||
| 2351 | const int gap_extend_cost = 1; | ||
| 2352 | const int pos_inf = INT_MAX / 2; | ||
| 2353 | |||
| 2354 | static int M[FLEX_MAX_MATRIX_SIZE]; | ||
| 2355 | static int D[FLEX_MAX_MATRIX_SIZE]; | ||
| 2356 | static size_t P[FLEX_MAX_MATRIX_SIZE]; | ||
| 2357 | |||
| 2358 | /* Initialize costs. Fill both matrices with positive infinity. */ | ||
| 2359 | for (int j = 0; j < size; j++) M[j] = pos_inf; | ||
| 2360 | for (int j = 0; j < size; j++) D[j] = pos_inf; | ||
| 2361 | /* Except for D[0,0], which is 0, for prioritizing matches at the | ||
| 2362 | beginning. Remaining elements on the first row are gap_open_cost/2 | ||
| 2363 | to represent cheaper leading gaps. */ | ||
| 2364 | for (int j = 0; j < width; j++) D[j] = gap_open_cost/2; | ||
| 2365 | D[0] = 0; | ||
| 2366 | |||
| 2367 | /* Index of last match before gap started, as computed in the previous | ||
| 2368 | row. Used to build P. */ | ||
| 2369 | int prev_gap_origin = -1; | ||
| 2370 | |||
| 2371 | /* Poor man's iterator type. */ | ||
| 2372 | typedef struct iter { int x; ptrdiff_t c; ptrdiff_t b; } iter_t; | ||
| 2373 | |||
| 2374 | /* Info about first match computed in the previous row. */ | ||
| 2375 | iter_t prev_match = {0,0,0}; | ||
| 2376 | /* Forward pass. */ | ||
| 2377 | for (iter_t i = {0,0,0}; i.x < patlen; i.x++) | ||
| 2378 | { | ||
| 2379 | int pat_char = fetch_string_char_advance(pat, &i.c, &i.b); | ||
| 2380 | int gap_origin = -1; | ||
| 2381 | bool match_seen = false; | ||
| 2382 | |||
| 2383 | for (iter_t j = prev_match; j.x < strlen; j.x++) | ||
| 2384 | { | ||
| 2385 | iter_t jcopy = j; /* else advance function destroys it... */ | ||
| 2386 | int str_char | ||
| 2387 | = fetch_string_char_advance (str, &j.c, &j.b); | ||
| 2388 | |||
| 2389 | /* Check if characters match (case-insensitive if needed). */ | ||
| 2390 | bool cmatch; | ||
| 2391 | if (completion_ignore_case) | ||
| 2392 | cmatch = (downcase (pat_char) == downcase (str_char)); | ||
| 2393 | else | ||
| 2394 | cmatch = (pat_char == str_char); | ||
| 2395 | |||
| 2396 | /* Compute match cost M[i][j], i.e. replace its infinite | ||
| 2397 | value with something finite. */ | ||
| 2398 | if (cmatch) | ||
| 2399 | { | ||
| 2400 | if (!match_seen) | ||
| 2401 | { | ||
| 2402 | match_seen = true; | ||
| 2403 | prev_match = jcopy; | ||
| 2404 | } | ||
| 2405 | int pmatch_cost = MAT (M, i.x - 1, j.x - 1); | ||
| 2406 | int pgap_cost = MAT (D, i.x - 1, j.x - 1); | ||
| 2407 | |||
| 2408 | if (pmatch_cost <= pgap_cost) | ||
| 2409 | { | ||
| 2410 | /* Not only did the previous char also match (else | ||
| 2411 | pmatch_cost would have been infinite) but following | ||
| 2412 | it up with this match is best overall. */ | ||
| 2413 | MAT (M, i.x, j.x) = pmatch_cost; | ||
| 2414 | MAT (P, i.x, j.x) = j.x - 1; | ||
| 2415 | } | ||
| 2416 | else | ||
| 2417 | { | ||
| 2418 | /* Gapping is best, regardless of whether the previous | ||
| 2419 | char also matched. That is, it's better to arrive at | ||
| 2420 | this match from a gap. */ | ||
| 2421 | MAT (M, i.x, j.x) = pgap_cost; | ||
| 2422 | MAT (P, i.x, j.x) = prev_gap_origin; | ||
| 2423 | } | ||
| 2424 | } | ||
| 2425 | |||
| 2426 | /* Regardless of a match here, compute D[i,j], the best | ||
| 2427 | accumulated gapping cost at this point, considering whether | ||
| 2428 | it's more advantageous to open from a previous match on | ||
| 2429 | this row (a cost which may well be infinite if no such | ||
| 2430 | match ever existed) or extend a gap started sometime | ||
| 2431 | before. The next iteration will take this into account, | ||
| 2432 | and so will the next row when analyzing a possible match | ||
| 2433 | for the j+1-th string character. */ | ||
| 2434 | int open_cost = MAT (M, i.x, j.x - 1) + gap_open_cost; | ||
| 2435 | int extend_cost = MAT (D, i.x, j.x - 1) + gap_extend_cost; | ||
| 2436 | |||
| 2437 | if (open_cost < extend_cost) | ||
| 2438 | { | ||
| 2439 | MAT (D, i.x, j.x) = open_cost; | ||
| 2440 | gap_origin = j.x - 1; /* New gap. */ | ||
| 2441 | } | ||
| 2442 | else | ||
| 2443 | MAT (D, i.x, j.x) = extend_cost; /* Extend gap. */ | ||
| 2444 | } | ||
| 2445 | prev_gap_origin = gap_origin; | ||
| 2446 | } | ||
| 2447 | |||
| 2448 | /* Find best (lowest) cost in last row. */ | ||
| 2449 | int best_cost = pos_inf; | ||
| 2450 | int lastcol = -1; | ||
| 2451 | |||
| 2452 | for (int j = 0; j < strlen; j++) | ||
| 2453 | { | ||
| 2454 | int cost = MAT (M, patlen - 1, j); | ||
| 2455 | if (cost < best_cost) | ||
| 2456 | { | ||
| 2457 | best_cost = cost; | ||
| 2458 | lastcol = j; | ||
| 2459 | } | ||
| 2460 | } | ||
| 2461 | |||
| 2462 | if (lastcol < 0 || best_cost >= pos_inf) | ||
| 2463 | return Qnil; | ||
| 2464 | |||
| 2465 | /* Build match positions list by tracing back through P matrix. */ | ||
| 2466 | Lisp_Object matches = Qnil; | ||
| 2467 | for (int i = patlen - 1, l = lastcol; i >= 0 && l >= 0; i--) | ||
| 2468 | { | ||
| 2469 | matches = Fcons (make_fixnum (l), matches); | ||
| 2470 | l = MAT (P, i, l); | ||
| 2471 | } | ||
| 2472 | |||
| 2473 | return Fcons (make_fixnum (best_cost), matches); | ||
| 2474 | #undef MAT | ||
| 2475 | |||
| 2476 | } | ||
| 2477 | |||
| 2282 | void | 2478 | void |
| 2283 | syms_of_minibuf (void) | 2479 | syms_of_minibuf (void) |
| 2284 | { | 2480 | { |
| @@ -2541,6 +2737,7 @@ showing the *Completions* buffer, if any. */); | |||
| 2541 | defsubr (&Stest_completion); | 2737 | defsubr (&Stest_completion); |
| 2542 | defsubr (&Sassoc_string); | 2738 | defsubr (&Sassoc_string); |
| 2543 | defsubr (&Scompleting_read); | 2739 | defsubr (&Scompleting_read); |
| 2740 | defsubr (&Scompletion__flex_score_gotoh); | ||
| 2544 | DEFSYM (Qminibuffer_quit_recursive_edit, "minibuffer-quit-recursive-edit"); | 2741 | DEFSYM (Qminibuffer_quit_recursive_edit, "minibuffer-quit-recursive-edit"); |
| 2545 | DEFSYM (Qinternal_complete_buffer, "internal-complete-buffer"); | 2742 | DEFSYM (Qinternal_complete_buffer, "internal-complete-buffer"); |
| 2546 | DEFSYM (Qcompleting_read_function, "completing-read-function"); | 2743 | DEFSYM (Qcompleting_read_function, "completing-read-function"); |
diff --git a/test/lisp/minibuffer-tests.el b/test/lisp/minibuffer-tests.el index 79ffb1d3fc7..02df7661c75 100644 --- a/test/lisp/minibuffer-tests.el +++ b/test/lisp/minibuffer-tests.el | |||
| @@ -205,11 +205,6 @@ | |||
| 205 | '("some/alpha" "base/epsilon" "base/delta")) | 205 | '("some/alpha" "base/epsilon" "base/delta")) |
| 206 | `("epsilon" "delta" "beta" "alpha" "gamma" . 5)))) | 206 | `("epsilon" "delta" "beta" "alpha" "gamma" . 5)))) |
| 207 | 207 | ||
| 208 | (defun completion--pcm-score (comp) | ||
| 209 | "Get `completion-score' from COMP." | ||
| 210 | ;; FIXME, uses minibuffer.el implementation details | ||
| 211 | (completion--flex-score comp completion-pcm--regexp)) | ||
| 212 | |||
| 213 | (defun completion--pcm-first-difference-pos (comp) | 208 | (defun completion--pcm-first-difference-pos (comp) |
| 214 | "Get `completions-first-difference' from COMP." | 209 | "Get `completions-first-difference' from COMP." |
| 215 | (cl-loop for pos = (next-single-property-change 0 'face comp) | 210 | (cl-loop for pos = (next-single-property-change 0 'face comp) |
| @@ -244,20 +239,11 @@ | |||
| 244 | "barfoobar"))) | 239 | "barfoobar"))) |
| 245 | 240 | ||
| 246 | (ert-deftest completion-pcm-test-3 () | 241 | (ert-deftest completion-pcm-test-3 () |
| 247 | ;; Full match! | 242 | (should (car (completion-pcm-all-completions |
| 248 | (should (eql | 243 | "R" '("R" "hello") nil 1)))) |
| 249 | (completion--pcm-score | ||
| 250 | (car (completion-pcm-all-completions | ||
| 251 | "R" '("R" "hello") nil 1))) | ||
| 252 | 1.0))) | ||
| 253 | 244 | ||
| 254 | (ert-deftest completion-pcm-test-4 () | 245 | (ert-deftest completion-pcm-test-4 () |
| 255 | ;; One fourth of a match and no match due to point being at the end | 246 | ;; No match due to point being at the end |
| 256 | (should (eql | ||
| 257 | (completion--pcm-score | ||
| 258 | (car (completion-pcm-all-completions | ||
| 259 | "RO" '("RaOb") nil 1))) | ||
| 260 | (/ 1.0 4.0))) | ||
| 261 | (should (null | 247 | (should (null |
| 262 | (completion-pcm-all-completions | 248 | (completion-pcm-all-completions |
| 263 | "RO" '("RaOb") nil 2)))) | 249 | "RO" '("RaOb") nil 2)))) |
| @@ -420,24 +406,14 @@ | |||
| 420 | "a"))) | 406 | "a"))) |
| 421 | 407 | ||
| 422 | (ert-deftest completion-substring-test-1 () | 408 | (ert-deftest completion-substring-test-1 () |
| 423 | ;; One third of a match! | ||
| 424 | (should (equal | 409 | (should (equal |
| 425 | (car (completion-substring-all-completions | 410 | (car (completion-substring-all-completions |
| 426 | "foo" '("hello" "world" "barfoobar") nil 3)) | 411 | "foo" '("hello" "world" "barfoobar") nil 3)) |
| 427 | "barfoobar")) | 412 | "barfoobar"))) |
| 428 | (should (eql | ||
| 429 | (completion--pcm-score | ||
| 430 | (car (completion-substring-all-completions | ||
| 431 | "foo" '("hello" "world" "barfoobar") nil 3))) | ||
| 432 | (/ 1.0 3.0)))) | ||
| 433 | 413 | ||
| 434 | (ert-deftest completion-substring-test-2 () | 414 | (ert-deftest completion-substring-test-2 () |
| 435 | ;; Full match! | 415 | (should (car (completion-substring-all-completions |
| 436 | (should (eql | 416 | "R" '("R" "hello") nil 1)))) |
| 437 | (completion--pcm-score | ||
| 438 | (car (completion-substring-all-completions | ||
| 439 | "R" '("R" "hello") nil 1))) | ||
| 440 | 1.0))) | ||
| 441 | 417 | ||
| 442 | (ert-deftest completion-substring-test-3 () | 418 | (ert-deftest completion-substring-test-3 () |
| 443 | ;; Substring match | 419 | ;; Substring match |
| @@ -495,39 +471,70 @@ | |||
| 495 | (completion-substring-try-completion "b" '("ab" "ab") nil 0) | 471 | (completion-substring-try-completion "b" '("ab" "ab") nil 0) |
| 496 | '("ab" . 2)))) | 472 | '("ab" . 2)))) |
| 497 | 473 | ||
| 474 | (defun completion--sorted-flex-completions (pat list &optional point) | ||
| 475 | "Flex test helper" | ||
| 476 | (let ((all (completion-flex-all-completions pat list nil point))) | ||
| 477 | (setcdr (last all) nil) | ||
| 478 | (sort all | ||
| 479 | (lambda (a b) | ||
| 480 | (< (car (completion--flex-score pat a)) | ||
| 481 | (car (completion--flex-score pat b))))))) | ||
| 482 | |||
| 498 | (ert-deftest completion-flex-test-1 () | 483 | (ert-deftest completion-flex-test-1 () |
| 499 | ;; Fuzzy match | ||
| 500 | (should (equal | 484 | (should (equal |
| 501 | (car (completion-flex-all-completions | 485 | (car (completion-flex-all-completions |
| 502 | "foo" '("hello" "world" "fabrobazo") nil 3)) | 486 | "foo" '("hello" "world" "fabrobazo") nil 3)) |
| 503 | "fabrobazo"))) | 487 | "fabrobazo"))) |
| 504 | 488 | ||
| 505 | (ert-deftest completion-flex-test-2 () | 489 | (ert-deftest completion-flex-test-2 () |
| 506 | ;; Full match! | 490 | (should (car (completion--sorted-flex-completions |
| 507 | (should (eql | 491 | "R" '("R" "hello") 1)))) |
| 508 | (completion--pcm-score | ||
| 509 | (car (completion-flex-all-completions | ||
| 510 | "R" '("R" "hello") nil 1))) | ||
| 511 | 1.0))) | ||
| 512 | 492 | ||
| 513 | (ert-deftest completion-flex-test-3 () | 493 | (ert-deftest completion-flex-test-3 () |
| 514 | ;; Another fuzzy match, but more of a "substring" one | ||
| 515 | (should (equal | 494 | (should (equal |
| 516 | (car (completion-flex-all-completions | 495 | (car (completion--sorted-flex-completions |
| 517 | "custgroup" '("customize-group-other-window") nil 4)) | 496 | "custgroup" '("customize-group-other-window") 4)) |
| 518 | "customize-group-other-window")) | 497 | "customize-group-other-window")) |
| 519 | ;; `completions-first-difference' should be at the right place | 498 | ;; `completions-first-difference' should be at the right place |
| 520 | (should (equal | 499 | (should (equal |
| 521 | (completion--pcm-first-difference-pos | 500 | (completion--pcm-first-difference-pos |
| 522 | (car (completion-flex-all-completions | 501 | (car (completion--sorted-flex-completions |
| 523 | "custgroup" '("customize-group-other-window") nil 4))) | 502 | "custgroup" '("customize-group-other-window") 4))) |
| 524 | 4)) | 503 | 4)) |
| 525 | (should (equal | 504 | (should (equal |
| 526 | (completion--pcm-first-difference-pos | 505 | (completion--pcm-first-difference-pos |
| 527 | (car (completion-flex-all-completions | 506 | (car (completion--sorted-flex-completions |
| 528 | "custgroup" '("customize-group-other-window") nil 9))) | 507 | "custgroup" '("customize-group-other-window") 9))) |
| 529 | 15))) | 508 | 15))) |
| 530 | 509 | ||
| 510 | (ert-deftest completion-flex-test-non-ascii () | ||
| 511 | "Test flex completion with variable-width UTF-8 characters." | ||
| 512 | ;; Uses Japanese Kanji to test multi-byte character handling. | ||
| 513 | |||
| 514 | ;; 日本 = "nihon" (Japan), 東京 = "tōkyō" (Tokyo) | ||
| 515 | (should (equal | ||
| 516 | (car (completion--sorted-flex-completions | ||
| 517 | "日本" '("日本語" "日本" "中国") 2)) | ||
| 518 | "日本")) | ||
| 519 | |||
| 520 | ;; 図書館 = "toshokan" (library) | ||
| 521 | (should (equal | ||
| 522 | (car (completion--sorted-flex-completions | ||
| 523 | "tsk" '("図書館-toshokan" "task" "desk") 3)) | ||
| 524 | "task")) | ||
| 525 | |||
| 526 | ;; Mixed pattern (Kanji + ASCII) matching mixed string | ||
| 527 | ;; 学校 = "gakkō" (school) | ||
| 528 | (should (equal | ||
| 529 | (car (completion--sorted-flex-completions | ||
| 530 | "学s" '("学校-school" "school" "学生") 2)) | ||
| 531 | "学校-school")) | ||
| 532 | |||
| 533 | ;; Pattern "東" should match "東京" better than "関東" | ||
| 534 | (let ((results (completion--sorted-flex-completions | ||
| 535 | "東" '("東京" "関東") 1))) | ||
| 536 | (should (equal (car results) "東京")))) | ||
| 537 | |||
| 531 | 538 | ||
| 532 | (defmacro with-minibuffer-setup (completing-read &rest body) | 539 | (defmacro with-minibuffer-setup (completing-read &rest body) |
| 533 | (declare (indent 1) (debug t)) | 540 | (declare (indent 1) (debug t)) |