aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoão Távora2026-02-03 12:14:03 +0000
committerJoão Távora2026-02-05 15:17:42 +0000
commitafb422bb9840a4b24ed803fdd3546bc4ef2bcb4f (patch)
treeb90dd79c8c85aca68ab3eaa301f6b0757d9d3813
parent3ea1010a6b0a63e90896133deaba189f13d47436 (diff)
downloademacs-feature/newflex.tar.gz
emacs-feature/newflex.zip
Rewrite flex completion scoring with Gotoh algorithmfeature/newflex
The greedy regexp matching, broken scoring and broken highlight were sources of frequent complaints about the 'flex' matching style. This commit fixes that. It was inspired by the 'hotfuzz' style available at https://github.com/axelf4/hotfuzz which is a modified version of Gotoh's 1982 dynamic programming algorithm (see: GOTOH, Osamu. An improved algorithm for matching biological sequences. Journal of molecular biology, 1982, 162.3: 705-708.). That style is slightly more sophisticated than 'flex' (has special rules for matching things at word boundaries, a C module with multithreading support). It's almost (but not entirely) void of hacks so it'd make a good candidate to replace 'flex' entirely, but no progress has been made in getting it into Emacs's core in over 2 years, so I thought I'd try my hand at it. The new 'flex' implementation also uses Gotoh algorithm (apparently a common choice for these kinds of task) and happens mostly in a new C function. It is strictly more correct than the "old" flex. For example, when matching the pattern 'goto' to, say, 'eglot--goto' and 'eglot--bol', no longer is the latter returned first, which was a substantial annoyance. And of course the highlighting is also correctly placed on the 'goto' not scattered across the candidate. Regarding performance, it is faster than the naive 'flex', but that's mainly because this commit also includes changes to the Elisp code which make faster regexp's for the filtering step. It is slower than 'hotfuzz' when that style's C-module extension is leveraged. 'hotfuzz' does the filtering and sorting steps together in C code and has multithreaded workers there. The matching and scoring algorithm itself is not the bottleneck. Test code were refactored and more tests were added. * src/minibuf.c (completion--flex-score-gotoh): New function. * lisp/minibuffer.el (completion--flex-score): Rewrite. (completion--flex-propertize): New function. (completion-flex--pattern-str): New variable. (flex-score-match-tightness): Make obsolete. (completion-pcm--all-completions): Add optional override-re parameter. (completion-pcm--hilit-commonality): No more re-based highlighting. (completion-substring--all-completions): Add optional simple-re parameters. (completion--flex-adjust-metadata): Tweak to new scoring API. (completion-flex-try-completion, completion-flex-all-completions): Pass simple-re parameter to completion-substring--all-completions. (completion--hilit-from-re, completion--flex-score-1) (completion--flex-score-last-md, completion-pcm--regexp): Delete. * test/lisp/minibuffer-tests.el (completion--sorted-flex-completions): New helper function. (completion-flex-test-non-ascii): New test. (completion--pcm-score): Delete. (completion-pcm-test-3, completion-pcm-test-4) (completion-substring-test-1, completion-substring-test-2) (completion-flex-test-2, completion-flex-test-3): Remove old scoring expectations.
-rw-r--r--lisp/minibuffer.el260
-rw-r--r--src/minibuf.c197
-rw-r--r--test/lisp/minibuffer-tests.el95
3 files changed, 347 insertions, 205 deletions
diff --git a/lisp/minibuffer.el b/lisp/minibuffer.el
index fc193fe54f0..5b5408a595c 100644
--- a/lisp/minibuffer.el
+++ b/lisp/minibuffer.el
@@ -4421,9 +4421,12 @@ This is used in combination with `completion-pcm--segments->regex'."
4421 (setq idx i))) 4421 (setq idx i)))
4422 idx)) 4422 idx))
4423 4423
4424(defun completion-pcm--all-completions (prefix pattern table pred) 4424(defun completion-pcm--all-completions (prefix pattern table pred
4425 &optional override-re)
4425 "Find all completions for PATTERN in TABLE obeying PRED. 4426 "Find all completions for PATTERN in TABLE obeying PRED.
4426PATTERN is as returned by `completion-pcm--string->pattern'." 4427PATTERN is as returned by `completion-pcm--string->pattern'.
4428OVERRIDE-RE means to use this regular expression instead of grabbing one
4429from PATTERN."
4427 ;; (cl-assert (= (car (completion-boundaries prefix table pred "")) 4430 ;; (cl-assert (= (car (completion-boundaries prefix table pred ""))
4428 ;; (length prefix))) 4431 ;; (length prefix)))
4429 ;; Find an initial list of possible completions. 4432 ;; Find an initial list of possible completions.
@@ -4435,7 +4438,7 @@ PATTERN is as returned by `completion-pcm--string->pattern'."
4435 ;; Use all-completions to do an initial cull. This is a big win, 4438 ;; Use all-completions to do an initial cull. This is a big win,
4436 ;; since all-completions is written in C! 4439 ;; since all-completions is written in C!
4437 (let* (;; Convert search pattern to a standard regular expression. 4440 (let* (;; Convert search pattern to a standard regular expression.
4438 (regex (completion-pcm--pattern->regex pattern)) 4441 (regex (or override-re (completion-pcm--pattern->regex pattern)))
4439 (case-fold-search completion-ignore-case) 4442 (case-fold-search completion-ignore-case)
4440 (completion-regexp-list (cons regex completion-regexp-list)) 4443 (completion-regexp-list (cons regex completion-regexp-list))
4441 (compl (all-completions 4444 (compl (all-completions
@@ -4450,18 +4453,12 @@ PATTERN is as returned by `completion-pcm--string->pattern'."
4450 (when (string-match-p regex c) (push c poss))) 4453 (when (string-match-p regex c) (push c poss)))
4451 (nreverse poss)))))) 4454 (nreverse poss))))))
4452 4455
4453(defvar flex-score-match-tightness 3 4456(defvar flex-score-match-tightness nil)
4454 "Controls how the `flex' completion style scores its matches.
4455 4457
4456Value is a positive number. A number smaller than 1 makes the 4458(make-obsolete-variable
4457scoring formula reward matches scattered along the string, while 4459 'flex-score-match-tightness
4458a number greater than one make the formula reward matches that 4460 "It never did anything very useful anyway."
4459are clumped together. I.e \"foo\" matches both strings 4461 "31.0")
4460\"fbarbazoo\" and \"fabrobazo\", which are of equal length, but
4461only a value greater than one will score the former (which has
4462one large \"hole\" and a clumped-together \"oo\" match) higher
4463than the latter (which has two \"holes\" and three
4464one-letter-long matches).")
4465 4462
4466(defvar completion-lazy-hilit nil 4463(defvar completion-lazy-hilit nil
4467 "If non-nil, request lazy highlighting of completion candidates. 4464 "If non-nil, request lazy highlighting of completion candidates.
@@ -4502,133 +4499,49 @@ details."
4502 (funcall completion-lazy-hilit-fn (copy-sequence str)) 4499 (funcall completion-lazy-hilit-fn (copy-sequence str))
4503 str)) 4500 str))
4504 4501
4505(defun completion--hilit-from-re (string regexp &optional point-idx) 4502(cl-defun completion--flex-score (pat str &optional dont-error)
4506 "Fontify STRING using REGEXP POINT-IDX. 4503 "Compute flex score of STR matching PAT using Gotoh algorithm.
4507Uses `completions-common-part' and `completions-first-difference' 4504If DONT-ERROR, return nil if PAT cannot match STR.
4508faces to fontify STRING. 4505Return (NORMALIZED-COST . MATCHES) where NORMALIZED-COST is a
4509POINT-IDX is the position of point in the presumed \"PCM\" pattern 4506number (lower = better) and MATCHES is a list of match positions in STR."
4510from which REGEXP was generated." 4507 (pcase-let ((`(,cost . ,matches)
4511 (let* ((md (and regexp (string-match regexp string) (cddr (match-data t)))) 4508 (completion--flex-score-gotoh pat str)))
4512 (pos (if point-idx (match-beginning point-idx) (match-end 0))) 4509 (unless cost
4513 (me (and md (match-end 0))) 4510 (if dont-error (cl-return-from completion--flex-score nil)
4514 (from 0)) 4511 (error "Pattern %s does not match %s" pat str)))
4515 (while md 4512 (cons (* (1+ cost) (- (length str) (length pat))) matches)))
4516 (add-face-text-property from (pop md) 4513
4517 'completions-common-part nil string) 4514(defun completion--flex-propertize (str matches point-idx segments)
4518 (setq from (pop md))) 4515 "Add completion faces to STR based on MATCHES and POINT-IDX.
4519 (if (and (numberp pos) (> (length string) pos)) 4516MATCHES is a list of match positions. POINT-IDX is a match group index
4520 (add-face-text-property 4517from the PCM pattern. SEGMENTS are extracted from the full PCM pattern.
4521 pos (1+ pos) 4518Adds `completions-common-part' for matched positions and
4522 'completions-first-difference 4519`completions-first-difference' for the position corresponding to point."
4523 nil string)) 4520 (when point-idx
4524 (unless (or (not me) (= from me)) 4521 ;; Compute character position from segments
4525 (add-face-text-property from me 'completions-common-part nil string)) 4522 (let* ((pos (cl-loop for seg in segments
4526 string)) 4523 for i from 1
4527 4524 while (<= i point-idx)
4528(defun completion--flex-score-1 (md-groups match-end len) 4525 sum (length (car seg)))))
4529 "Compute matching score of completion. 4526 ;; Add first-difference after pos-th match, if in range
4530The score lies in the range between 0 and 1, where 1 corresponds to 4527 (let ((point-match (and (> pos 0)
4531the full match. 4528 (<= pos (length matches))
4532MD-GROUPS is the \"group\" part of the match data. 4529 (nth (1- pos) matches))))
4533MATCH-END is the end of the match. 4530 (when (and point-match (< (1+ point-match) (length str)))
4534LEN is the length of the completion string." 4531 (add-face-text-property
4535 (let* ((from 0) 4532 (1+ point-match) (+ 2 point-match)
4536 ;; To understand how this works, consider these simple 4533 'completions-first-difference nil str)))))
4537 ;; ascii diagrams showing how the pattern "foo" 4534 ;; Highlight matched positions
4538 ;; flex-matches "fabrobazo", "fbarbazoo" and 4535 (dolist (pos matches)
4539 ;; "barfoobaz": 4536 (add-face-text-property pos (1+ pos)
4540 4537 'completions-common-part
4541 ;; f abr o baz o 4538 nil str))
4542 ;; + --- + --- + 4539 str)
4543 4540
4544 ;; f barbaz oo 4541(defvar completion-flex--pattern-str nil
4545 ;; + ------ ++ 4542 "Pattern string for flex completion scoring.
4546 4543This is the concatenated string parts from the PCM pattern,
4547 ;; bar foo baz 4544used by `completion--flex-score' for Gotoh algorithm matching.")
4548 ;; +++
4549
4550 ;; "+" indicates parts where the pattern matched. A
4551 ;; "hole" in the middle of the string is indicated by
4552 ;; "-". Note that there are no "holes" near the edges
4553 ;; of the string. The completion score is a number
4554 ;; bound by (0..1] (i.e., larger than (but not equal
4555 ;; to) zero, and smaller or equal to one): the higher
4556 ;; the better and only a perfect match (pattern equals
4557 ;; string) will have score 1. The formula takes the
4558 ;; form of a quotient. For the numerator, we use the
4559 ;; number of +, i.e. the length of the pattern. For
4560 ;; the denominator, it first computes
4561 ;;
4562 ;; hole_i_contrib = 1 + (Li-1)^(1/tightness)
4563 ;;
4564 ;; , for each hole "i" of length "Li", where tightness
4565 ;; is given by `flex-score-match-tightness'. The
4566 ;; final value for the denominator is then given by:
4567 ;;
4568 ;; (SUM_across_i(hole_i_contrib) + 1) * len
4569 ;;
4570 ;; , where "len" is the string's length.
4571 (score-numerator 0)
4572 (score-denominator 0)
4573 (last-b 0))
4574 (while (and md-groups (car md-groups))
4575 (let ((a from)
4576 (b (pop md-groups)))
4577 (setq
4578 score-numerator (+ score-numerator (- b a)))
4579 (unless (or (= a last-b)
4580 (zerop last-b)
4581 (= a len))
4582 (setq
4583 score-denominator (+ score-denominator
4584 1
4585 (expt (- a last-b 1)
4586 (/ 1.0
4587 flex-score-match-tightness)))))
4588 (setq
4589 last-b b))
4590 (setq from (pop md-groups)))
4591 ;; If `pattern' doesn't have an explicit trailing any, the
4592 ;; regex `re' won't produce match data representing the
4593 ;; region after the match. We need to account to account
4594 ;; for that extra bit of match (bug#42149).
4595 (unless (= from match-end)
4596 (let ((a from)
4597 (b match-end))
4598 (setq
4599 score-numerator (+ score-numerator (- b a)))
4600 (unless (or (= a last-b)
4601 (zerop last-b)
4602 (= a len))
4603 (setq
4604 score-denominator (+ score-denominator
4605 1
4606 (expt (- a last-b 1)
4607 (/ 1.0
4608 flex-score-match-tightness)))))
4609 (setq
4610 last-b b)))
4611 (/ score-numerator (* len (1+ score-denominator)) 1.0)))
4612
4613(defvar completion--flex-score-last-md nil
4614 "Helper variable for `completion--flex-score'.")
4615
4616(defun completion--flex-score (str re &optional dont-error)
4617 "Compute flex score of completion STR based on RE.
4618If DONT-ERROR, just return nil if RE doesn't match STR."
4619 (let ((case-fold-search completion-ignore-case))
4620 (cond ((string-match re str)
4621 (let* ((match-end (match-end 0))
4622 (md (cddr
4623 (setq
4624 completion--flex-score-last-md
4625 (match-data t completion--flex-score-last-md)))))
4626 (completion--flex-score-1 md match-end (length str))))
4627 ((not dont-error)
4628 (error "Internal error: %s does not match %s" re str)))))
4629
4630(defvar completion-pcm--regexp nil
4631 "Regexp from PCM pattern in `completion-pcm--hilit-commonality'.")
4632 4545
4633(defun completion-pcm--hilit-commonality (pattern completions) 4546(defun completion-pcm--hilit-commonality (pattern completions)
4634 "Show where and how well PATTERN matches COMPLETIONS. 4547 "Show where and how well PATTERN matches COMPLETIONS.
@@ -4645,22 +4558,37 @@ relevant segments.
4645Else, if `completion-lazy-hilit' is t, return COMPLETIONS 4558Else, if `completion-lazy-hilit' is t, return COMPLETIONS
4646unchanged, but setup a suitable `completion-lazy-hilit-fn' (which 4559unchanged, but setup a suitable `completion-lazy-hilit-fn' (which
4647see) for later lazy highlighting." 4560see) for later lazy highlighting."
4648 (setq completion-pcm--regexp nil 4561 (setq completion-lazy-hilit-fn nil
4649 completion-lazy-hilit-fn nil) 4562 completion-flex--pattern-str nil)
4650 (cond 4563 (cond
4651 ((and completions (cl-loop for e in pattern thereis (stringp e))) 4564 ((and completions (cl-loop for e in pattern thereis (stringp e)))
4652 (let* ((segments (completion-pcm--pattern->segments pattern)) 4565 (let* ((segments (completion-pcm--pattern->segments pattern))
4653 (re (completion-pcm--segments->regex segments 'group)) 4566 (point-idx (completion-pcm--segments-point-idx segments))
4654 (point-idx (completion-pcm--segments-point-idx segments))) 4567 ;; Extract pattern string (concatenate string elements)
4655 (setq completion-pcm--regexp re) 4568 (pat (mapconcat #'identity
4569 (delq nil (mapcar (lambda (x)
4570 (if (stringp x) x nil))
4571 pattern))
4572 "")))
4573 (setq completion-flex--pattern-str pat)
4656 (cond (completion-lazy-hilit 4574 (cond (completion-lazy-hilit
4657 (setq completion-lazy-hilit-fn 4575 (setq completion-lazy-hilit-fn
4658 (lambda (str) (completion--hilit-from-re str re point-idx))) 4576 (lambda (str)
4577 (let ((result (completion--flex-score pat str t)))
4578 (when result
4579 (completion--flex-propertize
4580 str (cdr result) point-idx segments)))
4581 str))
4659 completions) 4582 completions)
4660 (t 4583 (t
4661 (mapcar 4584 (mapcar
4662 (lambda (str) 4585 (lambda (str)
4663 (completion--hilit-from-re (copy-sequence str) re point-idx)) 4586 (setq str (copy-sequence str))
4587 (let ((result (completion--flex-score pat str t)))
4588 (when result
4589 (completion--flex-propertize
4590 str (cdr result) point-idx segments)))
4591 str)
4664 completions))))) 4592 completions)))))
4665 (t completions))) 4593 (t completions)))
4666 4594
@@ -4959,11 +4887,13 @@ the same set of elements."
4959;; Mostly derived from the code of `basic' completion. 4887;; Mostly derived from the code of `basic' completion.
4960 4888
4961(defun completion-substring--all-completions 4889(defun completion-substring--all-completions
4962 (string table pred point &optional transform-pattern-fn) 4890 (string table pred point &optional
4891 transform-pattern-fn simple-re)
4963 "Match the presumed substring STRING to the entries in TABLE. 4892 "Match the presumed substring STRING to the entries in TABLE.
4964Respect PRED and POINT. The pattern used is a PCM-style 4893Respect PRED and POINT. The pattern used is a PCM-style substring
4965substring pattern, but it be massaged by TRANSFORM-PATTERN-FN, if 4894pattern, but it be massaged by TRANSFORM-PATTERN-FN, if that is non-nil.
4966that is non-nil." 4895SIMPLE-RE is means to pass a simpler faster regular expression to
4896`completion-pcm--all-completions'"
4967 (let* ((beforepoint (substring string 0 point)) 4897 (let* ((beforepoint (substring string 0 point))
4968 (afterpoint (substring string point)) 4898 (afterpoint (substring string point))
4969 (bounds (completion-boundaries beforepoint table pred afterpoint)) 4899 (bounds (completion-boundaries beforepoint table pred afterpoint))
@@ -4978,7 +4908,13 @@ that is non-nil."
4978 (if transform-pattern-fn 4908 (if transform-pattern-fn
4979 (funcall transform-pattern-fn pattern) 4909 (funcall transform-pattern-fn pattern)
4980 pattern))) 4910 pattern)))
4981 (all (completion-pcm--all-completions prefix pattern table pred))) 4911 (override-re (and simple-re
4912 (mapconcat #'identity
4913 (split-string
4914 (substring string (car bounds)
4915 (+ point (cdr bounds))) "" t)
4916 ".*")))
4917 (all (completion-pcm--all-completions prefix pattern table pred override-re)))
4982 (list all pattern prefix suffix (car bounds)))) 4918 (list all pattern prefix suffix (car bounds))))
4983 4919
4984(defun completion-substring-try-completion (string table pred point) 4920(defun completion-substring-try-completion (string table pred point)
@@ -5009,7 +4945,7 @@ that is non-nil."
5009 4945
5010(defun completion--flex-adjust-metadata (metadata) 4946(defun completion--flex-adjust-metadata (metadata)
5011 "If `flex' is actually doing filtering, adjust sorting." 4947 "If `flex' is actually doing filtering, adjust sorting."
5012 (let ((flex-is-filtering-p completion-pcm--regexp) 4948 (let ((flex-is-filtering-p completion-flex--pattern-str)
5013 (existing-dsf 4949 (existing-dsf
5014 (completion-metadata-get metadata 'display-sort-function)) 4950 (completion-metadata-get metadata 'display-sort-function))
5015 (existing-csf 4951 (existing-csf
@@ -5021,11 +4957,11 @@ that is non-nil."
5021 (mapcar 4957 (mapcar
5022 (lambda (str) 4958 (lambda (str)
5023 (cons 4959 (cons
5024 (- (completion--flex-score 4960 (car (completion--flex-score
5025 (or (get-text-property 4961 completion-flex--pattern-str
5026 0 'completion--unquoted str) 4962 (or (get-text-property
5027 str) 4963 0 'completion--unquoted str)
5028 completion-pcm--regexp)) 4964 str)))
5029 str)) 4965 str))
5030 (if existing-sort-fn 4966 (if existing-sort-fn
5031 (funcall existing-sort-fn completions) 4967 (funcall existing-sort-fn completions)
@@ -5067,7 +5003,8 @@ which is at the core of flex logic. The extra
5067 (pcase-let ((`(,all ,pattern ,prefix ,suffix ,_carbounds) 5003 (pcase-let ((`(,all ,pattern ,prefix ,suffix ,_carbounds)
5068 (completion-substring--all-completions 5004 (completion-substring--all-completions
5069 string table pred point 5005 string table pred point
5070 #'completion-flex--make-flex-pattern))) 5006 #'completion-flex--make-flex-pattern
5007 t)))
5071 (if minibuffer-completing-file-name 5008 (if minibuffer-completing-file-name
5072 (setq all (completion-pcm--filename-try-filter all))) 5009 (setq all (completion-pcm--filename-try-filter all)))
5073 ;; Try some "merging", meaning add as much as possible to the 5010 ;; Try some "merging", meaning add as much as possible to the
@@ -5084,7 +5021,8 @@ which is at the core of flex logic. The extra
5084 (pcase-let ((`(,all ,pattern ,prefix ,_suffix ,_carbounds) 5021 (pcase-let ((`(,all ,pattern ,prefix ,_suffix ,_carbounds)
5085 (completion-substring--all-completions 5022 (completion-substring--all-completions
5086 string table pred point 5023 string table pred point
5087 #'completion-flex--make-flex-pattern))) 5024 #'completion-flex--make-flex-pattern
5025 t)))
5088 (when all 5026 (when all
5089 (nconc (completion-pcm--hilit-commonality pattern all) 5027 (nconc (completion-pcm--hilit-commonality pattern all)
5090 (length prefix)))))) 5028 (length prefix))))))
diff --git a/src/minibuf.c b/src/minibuf.c
index 5dc2b230883..f7dffc24b94 100644
--- a/src/minibuf.c
+++ b/src/minibuf.c
@@ -20,6 +20,7 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
20 20
21#include <config.h> 21#include <config.h>
22#include <errno.h> 22#include <errno.h>
23#include <math.h>
23 24
24#include <binary-io.h> 25#include <binary-io.h>
25 26
@@ -2279,6 +2280,201 @@ init_minibuf_once_for_pdumper (void)
2279 last_minibuf_string = Qnil; 2280 last_minibuf_string = Qnil;
2280} 2281}
2281 2282
2283/* FLEX/GOTOH algorithm for the 'flex' completion-style. Adapted from
2284 GOTOH, Osamu. An improved algorithm for matching biological
2285 sequences. Journal of molecular biology, 1982, 162.3: 705-708.
2286
2287 This algorithm matches patterns to candidate strings, or needles to
2288 haystacks. It works with cost matrices: imagine rows of these
2289 matrices as pattern characters, and columns as the candidate string
2290 characters. There is a -1 row, and a -1 column. The values there
2291 hold real costs used for situations "before the first ever" match of
2292 a pattern character to a string character.
2293
2294 M and D are cost matrices. At the end of the algorithm, M will have
2295 non-infinite values only for the spots where a pattern character
2296 matches a string character. So a non-infinite M[i,j] means the i-th
2297 character of the pattern matches the j-th character of the string.
2298 The value stored is the lowest possible cost the algorithm had to
2299 "pay" to be able to make that match there, given everything that may
2300 have happened before/to the left. An infinite value simply means no
2301 match at this pattern/string position. Note that both row and column
2302 of M may have more than one match at multiple indices. But this
2303 particular implementation of the algorithm assumes they have at least
2304 one match.
2305
2306 D (originally stands for 'Deletion' in the Gotoh paper) has "running
2307 costs". Each value D[i,j] represents what the algorithm has to pay
2308 to make or extend a gap when a match is found at i+1, j+1. By that
2309 time, that cost may or may not be lower than continuing from a match
2310 that had also been found at i,j. We always pick the lowest cost, and
2311 by the time we reach the final column, we know we have picked the
2312 cheapest possible path choosing when to gap, and when to follow up.
2313
2314 Along the way, we construct P, a matrix used just for backtracking,
2315 to reconstruct that path. Maybe P isn't needed, and all the
2316 information can be cleverly derived from the final state of M and D.
2317 But I couldn't make it work. */
2318DEFUN ("completion--flex-score-gotoh", Fcompletion__flex_score_gotoh,
2319 Scompletion__flex_score_gotoh, 2, 2, 0,
2320 doc: /* Compute flex score of STR matching PAT using Gotoh
2321algorithm. Return nil if no match found, else return (COST . MATCHES)
2322where COST is a fixnum (lower is better) and MATCHES is a list of match
2323positions in STR. */)
2324 (Lisp_Object pat, Lisp_Object str)
2325{
2326 /* Pre-allocated matrices for flex completion scoring. */
2327#define FLEX_MAX_STR_SIZE 512
2328#define FLEX_MAX_PAT_SIZE 128
2329#define FLEX_MAX_MATRIX_SIZE FLEX_MAX_PAT_SIZE * FLEX_MAX_STR_SIZE
2330 /* Macro for 2D indexing into "flat" arrays. */
2331#define MAT(matrix, i, j) ((matrix)[((i) + 1) * width + ((j) + 1)])
2332
2333 CHECK_STRING (pat);
2334 CHECK_STRING (str);
2335
2336 size_t patlen = SCHARS (pat);
2337 size_t strlen = SCHARS (str);
2338 size_t width = strlen + 1;
2339 size_t size = (patlen + 1) * width;
2340
2341 /* Bail if strings are empty or matrix too large. */
2342 if (patlen == 0 || strlen == 0)
2343 return Qnil;
2344
2345 if (size > FLEX_MAX_MATRIX_SIZE)
2346 return Qnil;
2347
2348 /* Cost constants (lower is better). Maybe these could be
2349 defcustom's?*/
2350 const int gap_open_cost = 10;
2351 const int gap_extend_cost = 1;
2352 const int pos_inf = INT_MAX / 2;
2353
2354 static int M[FLEX_MAX_MATRIX_SIZE];
2355 static int D[FLEX_MAX_MATRIX_SIZE];
2356 static size_t P[FLEX_MAX_MATRIX_SIZE];
2357
2358 /* Initialize costs. Fill both matrices with positive infinity. */
2359 for (int j = 0; j < size; j++) M[j] = pos_inf;
2360 for (int j = 0; j < size; j++) D[j] = pos_inf;
2361 /* Except for D[0,0], which is 0, for prioritizing matches at the
2362 beginning. Remaining elements on the first row are gap_open_cost/2
2363 to represent cheaper leading gaps. */
2364 for (int j = 0; j < width; j++) D[j] = gap_open_cost/2;
2365 D[0] = 0;
2366
2367 /* Index of last match before gap started, as computed in the previous
2368 row. Used to build P. */
2369 int prev_gap_origin = -1;
2370
2371 /* Poor man's iterator type. */
2372 typedef struct iter { int x; ptrdiff_t c; ptrdiff_t b; } iter_t;
2373
2374 /* Info about first match computed in the previous row. */
2375 iter_t prev_match = {0,0,0};
2376 /* Forward pass. */
2377 for (iter_t i = {0,0,0}; i.x < patlen; i.x++)
2378 {
2379 int pat_char = fetch_string_char_advance(pat, &i.c, &i.b);
2380 int gap_origin = -1;
2381 bool match_seen = false;
2382
2383 for (iter_t j = prev_match; j.x < strlen; j.x++)
2384 {
2385 iter_t jcopy = j; /* else advance function destroys it... */
2386 int str_char
2387 = fetch_string_char_advance (str, &j.c, &j.b);
2388
2389 /* Check if characters match (case-insensitive if needed). */
2390 bool cmatch;
2391 if (completion_ignore_case)
2392 cmatch = (downcase (pat_char) == downcase (str_char));
2393 else
2394 cmatch = (pat_char == str_char);
2395
2396 /* Compute match cost M[i][j], i.e. replace its infinite
2397 value with something finite. */
2398 if (cmatch)
2399 {
2400 if (!match_seen)
2401 {
2402 match_seen = true;
2403 prev_match = jcopy;
2404 }
2405 int pmatch_cost = MAT (M, i.x - 1, j.x - 1);
2406 int pgap_cost = MAT (D, i.x - 1, j.x - 1);
2407
2408 if (pmatch_cost <= pgap_cost)
2409 {
2410 /* Not only did the previous char also match (else
2411 pmatch_cost would have been infinite) but following
2412 it up with this match is best overall. */
2413 MAT (M, i.x, j.x) = pmatch_cost;
2414 MAT (P, i.x, j.x) = j.x - 1;
2415 }
2416 else
2417 {
2418 /* Gapping is best, regardless of whether the previous
2419 char also matched. That is, it's better to arrive at
2420 this match from a gap. */
2421 MAT (M, i.x, j.x) = pgap_cost;
2422 MAT (P, i.x, j.x) = prev_gap_origin;
2423 }
2424 }
2425
2426 /* Regardless of a match here, compute D[i,j], the best
2427 accumulated gapping cost at this point, considering whether
2428 it's more advantageous to open from a previous match on
2429 this row (a cost which may well be infinite if no such
2430 match ever existed) or extend a gap started sometime
2431 before. The next iteration will take this into account,
2432 and so will the next row when analyzing a possible match
2433 for the j+1-th string character. */
2434 int open_cost = MAT (M, i.x, j.x - 1) + gap_open_cost;
2435 int extend_cost = MAT (D, i.x, j.x - 1) + gap_extend_cost;
2436
2437 if (open_cost < extend_cost)
2438 {
2439 MAT (D, i.x, j.x) = open_cost;
2440 gap_origin = j.x - 1; /* New gap. */
2441 }
2442 else
2443 MAT (D, i.x, j.x) = extend_cost; /* Extend gap. */
2444 }
2445 prev_gap_origin = gap_origin;
2446 }
2447
2448 /* Find best (lowest) cost in last row. */
2449 int best_cost = pos_inf;
2450 int lastcol = -1;
2451
2452 for (int j = 0; j < strlen; j++)
2453 {
2454 int cost = MAT (M, patlen - 1, j);
2455 if (cost < best_cost)
2456 {
2457 best_cost = cost;
2458 lastcol = j;
2459 }
2460 }
2461
2462 if (lastcol < 0 || best_cost >= pos_inf)
2463 return Qnil;
2464
2465 /* Build match positions list by tracing back through P matrix. */
2466 Lisp_Object matches = Qnil;
2467 for (int i = patlen - 1, l = lastcol; i >= 0 && l >= 0; i--)
2468 {
2469 matches = Fcons (make_fixnum (l), matches);
2470 l = MAT (P, i, l);
2471 }
2472
2473 return Fcons (make_fixnum (best_cost), matches);
2474#undef MAT
2475
2476}
2477
2282void 2478void
2283syms_of_minibuf (void) 2479syms_of_minibuf (void)
2284{ 2480{
@@ -2541,6 +2737,7 @@ showing the *Completions* buffer, if any. */);
2541 defsubr (&Stest_completion); 2737 defsubr (&Stest_completion);
2542 defsubr (&Sassoc_string); 2738 defsubr (&Sassoc_string);
2543 defsubr (&Scompleting_read); 2739 defsubr (&Scompleting_read);
2740 defsubr (&Scompletion__flex_score_gotoh);
2544 DEFSYM (Qminibuffer_quit_recursive_edit, "minibuffer-quit-recursive-edit"); 2741 DEFSYM (Qminibuffer_quit_recursive_edit, "minibuffer-quit-recursive-edit");
2545 DEFSYM (Qinternal_complete_buffer, "internal-complete-buffer"); 2742 DEFSYM (Qinternal_complete_buffer, "internal-complete-buffer");
2546 DEFSYM (Qcompleting_read_function, "completing-read-function"); 2743 DEFSYM (Qcompleting_read_function, "completing-read-function");
diff --git a/test/lisp/minibuffer-tests.el b/test/lisp/minibuffer-tests.el
index 79ffb1d3fc7..02df7661c75 100644
--- a/test/lisp/minibuffer-tests.el
+++ b/test/lisp/minibuffer-tests.el
@@ -205,11 +205,6 @@
205 '("some/alpha" "base/epsilon" "base/delta")) 205 '("some/alpha" "base/epsilon" "base/delta"))
206 `("epsilon" "delta" "beta" "alpha" "gamma" . 5)))) 206 `("epsilon" "delta" "beta" "alpha" "gamma" . 5))))
207 207
208(defun completion--pcm-score (comp)
209 "Get `completion-score' from COMP."
210 ;; FIXME, uses minibuffer.el implementation details
211 (completion--flex-score comp completion-pcm--regexp))
212
213(defun completion--pcm-first-difference-pos (comp) 208(defun completion--pcm-first-difference-pos (comp)
214 "Get `completions-first-difference' from COMP." 209 "Get `completions-first-difference' from COMP."
215 (cl-loop for pos = (next-single-property-change 0 'face comp) 210 (cl-loop for pos = (next-single-property-change 0 'face comp)
@@ -244,20 +239,11 @@
244 "barfoobar"))) 239 "barfoobar")))
245 240
246(ert-deftest completion-pcm-test-3 () 241(ert-deftest completion-pcm-test-3 ()
247 ;; Full match! 242 (should (car (completion-pcm-all-completions
248 (should (eql 243 "R" '("R" "hello") nil 1))))
249 (completion--pcm-score
250 (car (completion-pcm-all-completions
251 "R" '("R" "hello") nil 1)))
252 1.0)))
253 244
254(ert-deftest completion-pcm-test-4 () 245(ert-deftest completion-pcm-test-4 ()
255 ;; One fourth of a match and no match due to point being at the end 246 ;; No match due to point being at the end
256 (should (eql
257 (completion--pcm-score
258 (car (completion-pcm-all-completions
259 "RO" '("RaOb") nil 1)))
260 (/ 1.0 4.0)))
261 (should (null 247 (should (null
262 (completion-pcm-all-completions 248 (completion-pcm-all-completions
263 "RO" '("RaOb") nil 2)))) 249 "RO" '("RaOb") nil 2))))
@@ -420,24 +406,14 @@
420 "a"))) 406 "a")))
421 407
422(ert-deftest completion-substring-test-1 () 408(ert-deftest completion-substring-test-1 ()
423 ;; One third of a match!
424 (should (equal 409 (should (equal
425 (car (completion-substring-all-completions 410 (car (completion-substring-all-completions
426 "foo" '("hello" "world" "barfoobar") nil 3)) 411 "foo" '("hello" "world" "barfoobar") nil 3))
427 "barfoobar")) 412 "barfoobar")))
428 (should (eql
429 (completion--pcm-score
430 (car (completion-substring-all-completions
431 "foo" '("hello" "world" "barfoobar") nil 3)))
432 (/ 1.0 3.0))))
433 413
434(ert-deftest completion-substring-test-2 () 414(ert-deftest completion-substring-test-2 ()
435 ;; Full match! 415 (should (car (completion-substring-all-completions
436 (should (eql 416 "R" '("R" "hello") nil 1))))
437 (completion--pcm-score
438 (car (completion-substring-all-completions
439 "R" '("R" "hello") nil 1)))
440 1.0)))
441 417
442(ert-deftest completion-substring-test-3 () 418(ert-deftest completion-substring-test-3 ()
443 ;; Substring match 419 ;; Substring match
@@ -495,39 +471,70 @@
495 (completion-substring-try-completion "b" '("ab" "ab") nil 0) 471 (completion-substring-try-completion "b" '("ab" "ab") nil 0)
496 '("ab" . 2)))) 472 '("ab" . 2))))
497 473
474(defun completion--sorted-flex-completions (pat list &optional point)
475 "Flex test helper"
476 (let ((all (completion-flex-all-completions pat list nil point)))
477 (setcdr (last all) nil)
478 (sort all
479 (lambda (a b)
480 (< (car (completion--flex-score pat a))
481 (car (completion--flex-score pat b)))))))
482
498(ert-deftest completion-flex-test-1 () 483(ert-deftest completion-flex-test-1 ()
499 ;; Fuzzy match
500 (should (equal 484 (should (equal
501 (car (completion-flex-all-completions 485 (car (completion-flex-all-completions
502 "foo" '("hello" "world" "fabrobazo") nil 3)) 486 "foo" '("hello" "world" "fabrobazo") nil 3))
503 "fabrobazo"))) 487 "fabrobazo")))
504 488
505(ert-deftest completion-flex-test-2 () 489(ert-deftest completion-flex-test-2 ()
506 ;; Full match! 490 (should (car (completion--sorted-flex-completions
507 (should (eql 491 "R" '("R" "hello") 1))))
508 (completion--pcm-score
509 (car (completion-flex-all-completions
510 "R" '("R" "hello") nil 1)))
511 1.0)))
512 492
513(ert-deftest completion-flex-test-3 () 493(ert-deftest completion-flex-test-3 ()
514 ;; Another fuzzy match, but more of a "substring" one
515 (should (equal 494 (should (equal
516 (car (completion-flex-all-completions 495 (car (completion--sorted-flex-completions
517 "custgroup" '("customize-group-other-window") nil 4)) 496 "custgroup" '("customize-group-other-window") 4))
518 "customize-group-other-window")) 497 "customize-group-other-window"))
519 ;; `completions-first-difference' should be at the right place 498 ;; `completions-first-difference' should be at the right place
520 (should (equal 499 (should (equal
521 (completion--pcm-first-difference-pos 500 (completion--pcm-first-difference-pos
522 (car (completion-flex-all-completions 501 (car (completion--sorted-flex-completions
523 "custgroup" '("customize-group-other-window") nil 4))) 502 "custgroup" '("customize-group-other-window") 4)))
524 4)) 503 4))
525 (should (equal 504 (should (equal
526 (completion--pcm-first-difference-pos 505 (completion--pcm-first-difference-pos
527 (car (completion-flex-all-completions 506 (car (completion--sorted-flex-completions
528 "custgroup" '("customize-group-other-window") nil 9))) 507 "custgroup" '("customize-group-other-window") 9)))
529 15))) 508 15)))
530 509
510(ert-deftest completion-flex-test-non-ascii ()
511 "Test flex completion with variable-width UTF-8 characters."
512 ;; Uses Japanese Kanji to test multi-byte character handling.
513
514 ;; 日本 = "nihon" (Japan), 東京 = "tōkyō" (Tokyo)
515 (should (equal
516 (car (completion--sorted-flex-completions
517 "日本" '("日本語" "日本" "中国") 2))
518 "日本"))
519
520 ;; 図書館 = "toshokan" (library)
521 (should (equal
522 (car (completion--sorted-flex-completions
523 "tsk" '("図書館-toshokan" "task" "desk") 3))
524 "task"))
525
526 ;; Mixed pattern (Kanji + ASCII) matching mixed string
527 ;; 学校 = "gakkō" (school)
528 (should (equal
529 (car (completion--sorted-flex-completions
530 "学s" '("学校-school" "school" "学生") 2))
531 "学校-school"))
532
533 ;; Pattern "東" should match "東京" better than "関東"
534 (let ((results (completion--sorted-flex-completions
535 "東" '("東京" "関東") 1)))
536 (should (equal (car results) "東京"))))
537
531 538
532(defmacro with-minibuffer-setup (completing-read &rest body) 539(defmacro with-minibuffer-setup (completing-read &rest body)
533 (declare (indent 1) (debug t)) 540 (declare (indent 1) (debug t))