diff options
| -rw-r--r-- | lisp/minibuffer.el | 260 | ||||
| -rw-r--r-- | src/minibuf.c | 197 | ||||
| -rw-r--r-- | test/lisp/minibuffer-tests.el | 95 |
3 files changed, 347 insertions, 205 deletions
diff --git a/lisp/minibuffer.el b/lisp/minibuffer.el index fc193fe54f0..5b5408a595c 100644 --- a/lisp/minibuffer.el +++ b/lisp/minibuffer.el | |||
| @@ -4421,9 +4421,12 @@ This is used in combination with `completion-pcm--segments->regex'." | |||
| 4421 | (setq idx i))) | 4421 | (setq idx i))) |
| 4422 | idx)) | 4422 | idx)) |
| 4423 | 4423 | ||
| 4424 | (defun completion-pcm--all-completions (prefix pattern table pred) | 4424 | (defun completion-pcm--all-completions (prefix pattern table pred |
| 4425 | &optional override-re) | ||
| 4425 | "Find all completions for PATTERN in TABLE obeying PRED. | 4426 | "Find all completions for PATTERN in TABLE obeying PRED. |
| 4426 | PATTERN is as returned by `completion-pcm--string->pattern'." | 4427 | PATTERN is as returned by `completion-pcm--string->pattern'. |
| 4428 | OVERRIDE-RE means to use this regular expression instead of grabbing one | ||
| 4429 | from PATTERN." | ||
| 4427 | ;; (cl-assert (= (car (completion-boundaries prefix table pred "")) | 4430 | ;; (cl-assert (= (car (completion-boundaries prefix table pred "")) |
| 4428 | ;; (length prefix))) | 4431 | ;; (length prefix))) |
| 4429 | ;; Find an initial list of possible completions. | 4432 | ;; Find an initial list of possible completions. |
| @@ -4435,7 +4438,7 @@ PATTERN is as returned by `completion-pcm--string->pattern'." | |||
| 4435 | ;; Use all-completions to do an initial cull. This is a big win, | 4438 | ;; Use all-completions to do an initial cull. This is a big win, |
| 4436 | ;; since all-completions is written in C! | 4439 | ;; since all-completions is written in C! |
| 4437 | (let* (;; Convert search pattern to a standard regular expression. | 4440 | (let* (;; Convert search pattern to a standard regular expression. |
| 4438 | (regex (completion-pcm--pattern->regex pattern)) | 4441 | (regex (or override-re (completion-pcm--pattern->regex pattern))) |
| 4439 | (case-fold-search completion-ignore-case) | 4442 | (case-fold-search completion-ignore-case) |
| 4440 | (completion-regexp-list (cons regex completion-regexp-list)) | 4443 | (completion-regexp-list (cons regex completion-regexp-list)) |
| 4441 | (compl (all-completions | 4444 | (compl (all-completions |
| @@ -4450,18 +4453,12 @@ PATTERN is as returned by `completion-pcm--string->pattern'." | |||
| 4450 | (when (string-match-p regex c) (push c poss))) | 4453 | (when (string-match-p regex c) (push c poss))) |
| 4451 | (nreverse poss)))))) | 4454 | (nreverse poss)))))) |
| 4452 | 4455 | ||
| 4453 | (defvar flex-score-match-tightness 3 | 4456 | (defvar flex-score-match-tightness nil) |
| 4454 | "Controls how the `flex' completion style scores its matches. | ||
| 4455 | 4457 | ||
| 4456 | Value is a positive number. A number smaller than 1 makes the | 4458 | (make-obsolete-variable |
| 4457 | scoring formula reward matches scattered along the string, while | 4459 | 'flex-score-match-tightness |
| 4458 | a number greater than one make the formula reward matches that | 4460 | "It never did anything very useful anyway." |
| 4459 | are clumped together. I.e \"foo\" matches both strings | 4461 | "31.0") |
| 4460 | \"fbarbazoo\" and \"fabrobazo\", which are of equal length, but | ||
| 4461 | only a value greater than one will score the former (which has | ||
| 4462 | one large \"hole\" and a clumped-together \"oo\" match) higher | ||
| 4463 | than the latter (which has two \"holes\" and three | ||
| 4464 | one-letter-long matches).") | ||
| 4465 | 4462 | ||
| 4466 | (defvar completion-lazy-hilit nil | 4463 | (defvar completion-lazy-hilit nil |
| 4467 | "If non-nil, request lazy highlighting of completion candidates. | 4464 | "If non-nil, request lazy highlighting of completion candidates. |
| @@ -4502,133 +4499,49 @@ details." | |||
| 4502 | (funcall completion-lazy-hilit-fn (copy-sequence str)) | 4499 | (funcall completion-lazy-hilit-fn (copy-sequence str)) |
| 4503 | str)) | 4500 | str)) |
| 4504 | 4501 | ||
| 4505 | (defun completion--hilit-from-re (string regexp &optional point-idx) | 4502 | (cl-defun completion--flex-score (pat str &optional dont-error) |
| 4506 | "Fontify STRING using REGEXP POINT-IDX. | 4503 | "Compute flex score of STR matching PAT using Gotoh algorithm. |
| 4507 | Uses `completions-common-part' and `completions-first-difference' | 4504 | If DONT-ERROR, return nil if PAT cannot match STR. |
| 4508 | faces to fontify STRING. | 4505 | Return (NORMALIZED-COST . MATCHES) where NORMALIZED-COST is a |
| 4509 | POINT-IDX is the position of point in the presumed \"PCM\" pattern | 4506 | number (lower = better) and MATCHES is a list of match positions in STR." |
| 4510 | from which REGEXP was generated." | 4507 | (pcase-let ((`(,cost . ,matches) |
| 4511 | (let* ((md (and regexp (string-match regexp string) (cddr (match-data t)))) | 4508 | (completion--flex-score-gotoh pat str))) |
| 4512 | (pos (if point-idx (match-beginning point-idx) (match-end 0))) | 4509 | (unless cost |
| 4513 | (me (and md (match-end 0))) | 4510 | (if dont-error (cl-return-from completion--flex-score nil) |
| 4514 | (from 0)) | 4511 | (error "Pattern %s does not match %s" pat str))) |
| 4515 | (while md | 4512 | (cons (* (1+ cost) (- (length str) (length pat))) matches))) |
| 4516 | (add-face-text-property from (pop md) | 4513 | |
| 4517 | 'completions-common-part nil string) | 4514 | (defun completion--flex-propertize (str matches point-idx segments) |
| 4518 | (setq from (pop md))) | 4515 | "Add completion faces to STR based on MATCHES and POINT-IDX. |
| 4519 | (if (and (numberp pos) (> (length string) pos)) | 4516 | MATCHES is a list of match positions. POINT-IDX is a match group index |
| 4520 | (add-face-text-property | 4517 | from the PCM pattern. SEGMENTS are extracted from the full PCM pattern. |
| 4521 | pos (1+ pos) | 4518 | Adds `completions-common-part' for matched positions and |
| 4522 | 'completions-first-difference | 4519 | `completions-first-difference' for the position corresponding to point." |
| 4523 | nil string)) | 4520 | (when point-idx |
| 4524 | (unless (or (not me) (= from me)) | 4521 | ;; Compute character position from segments |
| 4525 | (add-face-text-property from me 'completions-common-part nil string)) | 4522 | (let* ((pos (cl-loop for seg in segments |
| 4526 | string)) | 4523 | for i from 1 |
| 4527 | 4524 | while (<= i point-idx) | |
| 4528 | (defun completion--flex-score-1 (md-groups match-end len) | 4525 | sum (length (car seg))))) |
| 4529 | "Compute matching score of completion. | 4526 | ;; Add first-difference after pos-th match, if in range |
| 4530 | The score lies in the range between 0 and 1, where 1 corresponds to | 4527 | (let ((point-match (and (> pos 0) |
| 4531 | the full match. | 4528 | (<= pos (length matches)) |
| 4532 | MD-GROUPS is the \"group\" part of the match data. | 4529 | (nth (1- pos) matches)))) |
| 4533 | MATCH-END is the end of the match. | 4530 | (when (and point-match (< (1+ point-match) (length str))) |
| 4534 | LEN is the length of the completion string." | 4531 | (add-face-text-property |
| 4535 | (let* ((from 0) | 4532 | (1+ point-match) (+ 2 point-match) |
| 4536 | ;; To understand how this works, consider these simple | 4533 | 'completions-first-difference nil str))))) |
| 4537 | ;; ascii diagrams showing how the pattern "foo" | 4534 | ;; Highlight matched positions |
| 4538 | ;; flex-matches "fabrobazo", "fbarbazoo" and | 4535 | (dolist (pos matches) |
| 4539 | ;; "barfoobaz": | 4536 | (add-face-text-property pos (1+ pos) |
| 4540 | 4537 | 'completions-common-part | |
| 4541 | ;; f abr o baz o | 4538 | nil str)) |
| 4542 | ;; + --- + --- + | 4539 | str) |
| 4543 | 4540 | ||
| 4544 | ;; f barbaz oo | 4541 | (defvar completion-flex--pattern-str nil |
| 4545 | ;; + ------ ++ | 4542 | "Pattern string for flex completion scoring. |
| 4546 | 4543 | This is the concatenated string parts from the PCM pattern, | |
| 4547 | ;; bar foo baz | 4544 | used by `completion--flex-score' for Gotoh algorithm matching.") |
| 4548 | ;; +++ | ||
| 4549 | |||
| 4550 | ;; "+" indicates parts where the pattern matched. A | ||
| 4551 | ;; "hole" in the middle of the string is indicated by | ||
| 4552 | ;; "-". Note that there are no "holes" near the edges | ||
| 4553 | ;; of the string. The completion score is a number | ||
| 4554 | ;; bound by (0..1] (i.e., larger than (but not equal | ||
| 4555 | ;; to) zero, and smaller or equal to one): the higher | ||
| 4556 | ;; the better and only a perfect match (pattern equals | ||
| 4557 | ;; string) will have score 1. The formula takes the | ||
| 4558 | ;; form of a quotient. For the numerator, we use the | ||
| 4559 | ;; number of +, i.e. the length of the pattern. For | ||
| 4560 | ;; the denominator, it first computes | ||
| 4561 | ;; | ||
| 4562 | ;; hole_i_contrib = 1 + (Li-1)^(1/tightness) | ||
| 4563 | ;; | ||
| 4564 | ;; , for each hole "i" of length "Li", where tightness | ||
| 4565 | ;; is given by `flex-score-match-tightness'. The | ||
| 4566 | ;; final value for the denominator is then given by: | ||
| 4567 | ;; | ||
| 4568 | ;; (SUM_across_i(hole_i_contrib) + 1) * len | ||
| 4569 | ;; | ||
| 4570 | ;; , where "len" is the string's length. | ||
| 4571 | (score-numerator 0) | ||
| 4572 | (score-denominator 0) | ||
| 4573 | (last-b 0)) | ||
| 4574 | (while (and md-groups (car md-groups)) | ||
| 4575 | (let ((a from) | ||
| 4576 | (b (pop md-groups))) | ||
| 4577 | (setq | ||
| 4578 | score-numerator (+ score-numerator (- b a))) | ||
| 4579 | (unless (or (= a last-b) | ||
| 4580 | (zerop last-b) | ||
| 4581 | (= a len)) | ||
| 4582 | (setq | ||
| 4583 | score-denominator (+ score-denominator | ||
| 4584 | 1 | ||
| 4585 | (expt (- a last-b 1) | ||
| 4586 | (/ 1.0 | ||
| 4587 | flex-score-match-tightness))))) | ||
| 4588 | (setq | ||
| 4589 | last-b b)) | ||
| 4590 | (setq from (pop md-groups))) | ||
| 4591 | ;; If `pattern' doesn't have an explicit trailing any, the | ||
| 4592 | ;; regex `re' won't produce match data representing the | ||
| 4593 | ;; region after the match. We need to account to account | ||
| 4594 | ;; for that extra bit of match (bug#42149). | ||
| 4595 | (unless (= from match-end) | ||
| 4596 | (let ((a from) | ||
| 4597 | (b match-end)) | ||
| 4598 | (setq | ||
| 4599 | score-numerator (+ score-numerator (- b a))) | ||
| 4600 | (unless (or (= a last-b) | ||
| 4601 | (zerop last-b) | ||
| 4602 | (= a len)) | ||
| 4603 | (setq | ||
| 4604 | score-denominator (+ score-denominator | ||
| 4605 | 1 | ||
| 4606 | (expt (- a last-b 1) | ||
| 4607 | (/ 1.0 | ||
| 4608 | flex-score-match-tightness))))) | ||
| 4609 | (setq | ||
| 4610 | last-b b))) | ||
| 4611 | (/ score-numerator (* len (1+ score-denominator)) 1.0))) | ||
| 4612 | |||
| 4613 | (defvar completion--flex-score-last-md nil | ||
| 4614 | "Helper variable for `completion--flex-score'.") | ||
| 4615 | |||
| 4616 | (defun completion--flex-score (str re &optional dont-error) | ||
| 4617 | "Compute flex score of completion STR based on RE. | ||
| 4618 | If DONT-ERROR, just return nil if RE doesn't match STR." | ||
| 4619 | (let ((case-fold-search completion-ignore-case)) | ||
| 4620 | (cond ((string-match re str) | ||
| 4621 | (let* ((match-end (match-end 0)) | ||
| 4622 | (md (cddr | ||
| 4623 | (setq | ||
| 4624 | completion--flex-score-last-md | ||
| 4625 | (match-data t completion--flex-score-last-md))))) | ||
| 4626 | (completion--flex-score-1 md match-end (length str)))) | ||
| 4627 | ((not dont-error) | ||
| 4628 | (error "Internal error: %s does not match %s" re str))))) | ||
| 4629 | |||
| 4630 | (defvar completion-pcm--regexp nil | ||
| 4631 | "Regexp from PCM pattern in `completion-pcm--hilit-commonality'.") | ||
| 4632 | 4545 | ||
| 4633 | (defun completion-pcm--hilit-commonality (pattern completions) | 4546 | (defun completion-pcm--hilit-commonality (pattern completions) |
| 4634 | "Show where and how well PATTERN matches COMPLETIONS. | 4547 | "Show where and how well PATTERN matches COMPLETIONS. |
| @@ -4645,22 +4558,37 @@ relevant segments. | |||
| 4645 | Else, if `completion-lazy-hilit' is t, return COMPLETIONS | 4558 | Else, if `completion-lazy-hilit' is t, return COMPLETIONS |
| 4646 | unchanged, but setup a suitable `completion-lazy-hilit-fn' (which | 4559 | unchanged, but setup a suitable `completion-lazy-hilit-fn' (which |
| 4647 | see) for later lazy highlighting." | 4560 | see) for later lazy highlighting." |
| 4648 | (setq completion-pcm--regexp nil | 4561 | (setq completion-lazy-hilit-fn nil |
| 4649 | completion-lazy-hilit-fn nil) | 4562 | completion-flex--pattern-str nil) |
| 4650 | (cond | 4563 | (cond |
| 4651 | ((and completions (cl-loop for e in pattern thereis (stringp e))) | 4564 | ((and completions (cl-loop for e in pattern thereis (stringp e))) |
| 4652 | (let* ((segments (completion-pcm--pattern->segments pattern)) | 4565 | (let* ((segments (completion-pcm--pattern->segments pattern)) |
| 4653 | (re (completion-pcm--segments->regex segments 'group)) | 4566 | (point-idx (completion-pcm--segments-point-idx segments)) |
| 4654 | (point-idx (completion-pcm--segments-point-idx segments))) | 4567 | ;; Extract pattern string (concatenate string elements) |
| 4655 | (setq completion-pcm--regexp re) | 4568 | (pat (mapconcat #'identity |
| 4569 | (delq nil (mapcar (lambda (x) | ||
| 4570 | (if (stringp x) x nil)) | ||
| 4571 | pattern)) | ||
| 4572 | ""))) | ||
| 4573 | (setq completion-flex--pattern-str pat) | ||
| 4656 | (cond (completion-lazy-hilit | 4574 | (cond (completion-lazy-hilit |
| 4657 | (setq completion-lazy-hilit-fn | 4575 | (setq completion-lazy-hilit-fn |
| 4658 | (lambda (str) (completion--hilit-from-re str re point-idx))) | 4576 | (lambda (str) |
| 4577 | (let ((result (completion--flex-score pat str t))) | ||
| 4578 | (when result | ||
| 4579 | (completion--flex-propertize | ||
| 4580 | str (cdr result) point-idx segments))) | ||
| 4581 | str)) | ||
| 4659 | completions) | 4582 | completions) |
| 4660 | (t | 4583 | (t |
| 4661 | (mapcar | 4584 | (mapcar |
| 4662 | (lambda (str) | 4585 | (lambda (str) |
| 4663 | (completion--hilit-from-re (copy-sequence str) re point-idx)) | 4586 | (setq str (copy-sequence str)) |
| 4587 | (let ((result (completion--flex-score pat str t))) | ||
| 4588 | (when result | ||
| 4589 | (completion--flex-propertize | ||
| 4590 | str (cdr result) point-idx segments))) | ||
| 4591 | str) | ||
| 4664 | completions))))) | 4592 | completions))))) |
| 4665 | (t completions))) | 4593 | (t completions))) |
| 4666 | 4594 | ||
| @@ -4959,11 +4887,13 @@ the same set of elements." | |||
| 4959 | ;; Mostly derived from the code of `basic' completion. | 4887 | ;; Mostly derived from the code of `basic' completion. |
| 4960 | 4888 | ||
| 4961 | (defun completion-substring--all-completions | 4889 | (defun completion-substring--all-completions |
| 4962 | (string table pred point &optional transform-pattern-fn) | 4890 | (string table pred point &optional |
| 4891 | transform-pattern-fn simple-re) | ||
| 4963 | "Match the presumed substring STRING to the entries in TABLE. | 4892 | "Match the presumed substring STRING to the entries in TABLE. |
| 4964 | Respect PRED and POINT. The pattern used is a PCM-style | 4893 | Respect PRED and POINT. The pattern used is a PCM-style substring |
| 4965 | substring pattern, but it be massaged by TRANSFORM-PATTERN-FN, if | 4894 | pattern, but it be massaged by TRANSFORM-PATTERN-FN, if that is non-nil. |
| 4966 | that is non-nil." | 4895 | SIMPLE-RE is means to pass a simpler faster regular expression to |
| 4896 | `completion-pcm--all-completions'" | ||
| 4967 | (let* ((beforepoint (substring string 0 point)) | 4897 | (let* ((beforepoint (substring string 0 point)) |
| 4968 | (afterpoint (substring string point)) | 4898 | (afterpoint (substring string point)) |
| 4969 | (bounds (completion-boundaries beforepoint table pred afterpoint)) | 4899 | (bounds (completion-boundaries beforepoint table pred afterpoint)) |
| @@ -4978,7 +4908,13 @@ that is non-nil." | |||
| 4978 | (if transform-pattern-fn | 4908 | (if transform-pattern-fn |
| 4979 | (funcall transform-pattern-fn pattern) | 4909 | (funcall transform-pattern-fn pattern) |
| 4980 | pattern))) | 4910 | pattern))) |
| 4981 | (all (completion-pcm--all-completions prefix pattern table pred))) | 4911 | (override-re (and simple-re |
| 4912 | (mapconcat #'identity | ||
| 4913 | (split-string | ||
| 4914 | (substring string (car bounds) | ||
| 4915 | (+ point (cdr bounds))) "" t) | ||
| 4916 | ".*"))) | ||
| 4917 | (all (completion-pcm--all-completions prefix pattern table pred override-re))) | ||
| 4982 | (list all pattern prefix suffix (car bounds)))) | 4918 | (list all pattern prefix suffix (car bounds)))) |
| 4983 | 4919 | ||
| 4984 | (defun completion-substring-try-completion (string table pred point) | 4920 | (defun completion-substring-try-completion (string table pred point) |
| @@ -5009,7 +4945,7 @@ that is non-nil." | |||
| 5009 | 4945 | ||
| 5010 | (defun completion--flex-adjust-metadata (metadata) | 4946 | (defun completion--flex-adjust-metadata (metadata) |
| 5011 | "If `flex' is actually doing filtering, adjust sorting." | 4947 | "If `flex' is actually doing filtering, adjust sorting." |
| 5012 | (let ((flex-is-filtering-p completion-pcm--regexp) | 4948 | (let ((flex-is-filtering-p completion-flex--pattern-str) |
| 5013 | (existing-dsf | 4949 | (existing-dsf |
| 5014 | (completion-metadata-get metadata 'display-sort-function)) | 4950 | (completion-metadata-get metadata 'display-sort-function)) |
| 5015 | (existing-csf | 4951 | (existing-csf |
| @@ -5021,11 +4957,11 @@ that is non-nil." | |||
| 5021 | (mapcar | 4957 | (mapcar |
| 5022 | (lambda (str) | 4958 | (lambda (str) |
| 5023 | (cons | 4959 | (cons |
| 5024 | (- (completion--flex-score | 4960 | (car (completion--flex-score |
| 5025 | (or (get-text-property | 4961 | completion-flex--pattern-str |
| 5026 | 0 'completion--unquoted str) | 4962 | (or (get-text-property |
| 5027 | str) | 4963 | 0 'completion--unquoted str) |
| 5028 | completion-pcm--regexp)) | 4964 | str))) |
| 5029 | str)) | 4965 | str)) |
| 5030 | (if existing-sort-fn | 4966 | (if existing-sort-fn |
| 5031 | (funcall existing-sort-fn completions) | 4967 | (funcall existing-sort-fn completions) |
| @@ -5067,7 +5003,8 @@ which is at the core of flex logic. The extra | |||
| 5067 | (pcase-let ((`(,all ,pattern ,prefix ,suffix ,_carbounds) | 5003 | (pcase-let ((`(,all ,pattern ,prefix ,suffix ,_carbounds) |
| 5068 | (completion-substring--all-completions | 5004 | (completion-substring--all-completions |
| 5069 | string table pred point | 5005 | string table pred point |
| 5070 | #'completion-flex--make-flex-pattern))) | 5006 | #'completion-flex--make-flex-pattern |
| 5007 | t))) | ||
| 5071 | (if minibuffer-completing-file-name | 5008 | (if minibuffer-completing-file-name |
| 5072 | (setq all (completion-pcm--filename-try-filter all))) | 5009 | (setq all (completion-pcm--filename-try-filter all))) |
| 5073 | ;; Try some "merging", meaning add as much as possible to the | 5010 | ;; Try some "merging", meaning add as much as possible to the |
| @@ -5084,7 +5021,8 @@ which is at the core of flex logic. The extra | |||
| 5084 | (pcase-let ((`(,all ,pattern ,prefix ,_suffix ,_carbounds) | 5021 | (pcase-let ((`(,all ,pattern ,prefix ,_suffix ,_carbounds) |
| 5085 | (completion-substring--all-completions | 5022 | (completion-substring--all-completions |
| 5086 | string table pred point | 5023 | string table pred point |
| 5087 | #'completion-flex--make-flex-pattern))) | 5024 | #'completion-flex--make-flex-pattern |
| 5025 | t))) | ||
| 5088 | (when all | 5026 | (when all |
| 5089 | (nconc (completion-pcm--hilit-commonality pattern all) | 5027 | (nconc (completion-pcm--hilit-commonality pattern all) |
| 5090 | (length prefix)))))) | 5028 | (length prefix)))))) |
diff --git a/src/minibuf.c b/src/minibuf.c index 5dc2b230883..f7dffc24b94 100644 --- a/src/minibuf.c +++ b/src/minibuf.c | |||
| @@ -20,6 +20,7 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */ | |||
| 20 | 20 | ||
| 21 | #include <config.h> | 21 | #include <config.h> |
| 22 | #include <errno.h> | 22 | #include <errno.h> |
| 23 | #include <math.h> | ||
| 23 | 24 | ||
| 24 | #include <binary-io.h> | 25 | #include <binary-io.h> |
| 25 | 26 | ||
| @@ -2279,6 +2280,201 @@ init_minibuf_once_for_pdumper (void) | |||
| 2279 | last_minibuf_string = Qnil; | 2280 | last_minibuf_string = Qnil; |
| 2280 | } | 2281 | } |
| 2281 | 2282 | ||
| 2283 | /* FLEX/GOTOH algorithm for the 'flex' completion-style. Adapted from | ||
| 2284 | GOTOH, Osamu. An improved algorithm for matching biological | ||
| 2285 | sequences. Journal of molecular biology, 1982, 162.3: 705-708. | ||
| 2286 | |||
| 2287 | This algorithm matches patterns to candidate strings, or needles to | ||
| 2288 | haystacks. It works with cost matrices: imagine rows of these | ||
| 2289 | matrices as pattern characters, and columns as the candidate string | ||
| 2290 | characters. There is a -1 row, and a -1 column. The values there | ||
| 2291 | hold real costs used for situations "before the first ever" match of | ||
| 2292 | a pattern character to a string character. | ||
| 2293 | |||
| 2294 | M and D are cost matrices. At the end of the algorithm, M will have | ||
| 2295 | non-infinite values only for the spots where a pattern character | ||
| 2296 | matches a string character. So a non-infinite M[i,j] means the i-th | ||
| 2297 | character of the pattern matches the j-th character of the string. | ||
| 2298 | The value stored is the lowest possible cost the algorithm had to | ||
| 2299 | "pay" to be able to make that match there, given everything that may | ||
| 2300 | have happened before/to the left. An infinite value simply means no | ||
| 2301 | match at this pattern/string position. Note that both row and column | ||
| 2302 | of M may have more than one match at multiple indices. But this | ||
| 2303 | particular implementation of the algorithm assumes they have at least | ||
| 2304 | one match. | ||
| 2305 | |||
| 2306 | D (originally stands for 'Deletion' in the Gotoh paper) has "running | ||
| 2307 | costs". Each value D[i,j] represents what the algorithm has to pay | ||
| 2308 | to make or extend a gap when a match is found at i+1, j+1. By that | ||
| 2309 | time, that cost may or may not be lower than continuing from a match | ||
| 2310 | that had also been found at i,j. We always pick the lowest cost, and | ||
| 2311 | by the time we reach the final column, we know we have picked the | ||
| 2312 | cheapest possible path choosing when to gap, and when to follow up. | ||
| 2313 | |||
| 2314 | Along the way, we construct P, a matrix used just for backtracking, | ||
| 2315 | to reconstruct that path. Maybe P isn't needed, and all the | ||
| 2316 | information can be cleverly derived from the final state of M and D. | ||
| 2317 | But I couldn't make it work. */ | ||
| 2318 | DEFUN ("completion--flex-score-gotoh", Fcompletion__flex_score_gotoh, | ||
| 2319 | Scompletion__flex_score_gotoh, 2, 2, 0, | ||
| 2320 | doc: /* Compute flex score of STR matching PAT using Gotoh | ||
| 2321 | algorithm. Return nil if no match found, else return (COST . MATCHES) | ||
| 2322 | where COST is a fixnum (lower is better) and MATCHES is a list of match | ||
| 2323 | positions in STR. */) | ||
| 2324 | (Lisp_Object pat, Lisp_Object str) | ||
| 2325 | { | ||
| 2326 | /* Pre-allocated matrices for flex completion scoring. */ | ||
| 2327 | #define FLEX_MAX_STR_SIZE 512 | ||
| 2328 | #define FLEX_MAX_PAT_SIZE 128 | ||
| 2329 | #define FLEX_MAX_MATRIX_SIZE FLEX_MAX_PAT_SIZE * FLEX_MAX_STR_SIZE | ||
| 2330 | /* Macro for 2D indexing into "flat" arrays. */ | ||
| 2331 | #define MAT(matrix, i, j) ((matrix)[((i) + 1) * width + ((j) + 1)]) | ||
| 2332 | |||
| 2333 | CHECK_STRING (pat); | ||
| 2334 | CHECK_STRING (str); | ||
| 2335 | |||
| 2336 | size_t patlen = SCHARS (pat); | ||
| 2337 | size_t strlen = SCHARS (str); | ||
| 2338 | size_t width = strlen + 1; | ||
| 2339 | size_t size = (patlen + 1) * width; | ||
| 2340 | |||
| 2341 | /* Bail if strings are empty or matrix too large. */ | ||
| 2342 | if (patlen == 0 || strlen == 0) | ||
| 2343 | return Qnil; | ||
| 2344 | |||
| 2345 | if (size > FLEX_MAX_MATRIX_SIZE) | ||
| 2346 | return Qnil; | ||
| 2347 | |||
| 2348 | /* Cost constants (lower is better). Maybe these could be | ||
| 2349 | defcustom's?*/ | ||
| 2350 | const int gap_open_cost = 10; | ||
| 2351 | const int gap_extend_cost = 1; | ||
| 2352 | const int pos_inf = INT_MAX / 2; | ||
| 2353 | |||
| 2354 | static int M[FLEX_MAX_MATRIX_SIZE]; | ||
| 2355 | static int D[FLEX_MAX_MATRIX_SIZE]; | ||
| 2356 | static size_t P[FLEX_MAX_MATRIX_SIZE]; | ||
| 2357 | |||
| 2358 | /* Initialize costs. Fill both matrices with positive infinity. */ | ||
| 2359 | for (int j = 0; j < size; j++) M[j] = pos_inf; | ||
| 2360 | for (int j = 0; j < size; j++) D[j] = pos_inf; | ||
| 2361 | /* Except for D[0,0], which is 0, for prioritizing matches at the | ||
| 2362 | beginning. Remaining elements on the first row are gap_open_cost/2 | ||
| 2363 | to represent cheaper leading gaps. */ | ||
| 2364 | for (int j = 0; j < width; j++) D[j] = gap_open_cost/2; | ||
| 2365 | D[0] = 0; | ||
| 2366 | |||
| 2367 | /* Index of last match before gap started, as computed in the previous | ||
| 2368 | row. Used to build P. */ | ||
| 2369 | int prev_gap_origin = -1; | ||
| 2370 | |||
| 2371 | /* Poor man's iterator type. */ | ||
| 2372 | typedef struct iter { int x; ptrdiff_t c; ptrdiff_t b; } iter_t; | ||
| 2373 | |||
| 2374 | /* Info about first match computed in the previous row. */ | ||
| 2375 | iter_t prev_match = {0,0,0}; | ||
| 2376 | /* Forward pass. */ | ||
| 2377 | for (iter_t i = {0,0,0}; i.x < patlen; i.x++) | ||
| 2378 | { | ||
| 2379 | int pat_char = fetch_string_char_advance(pat, &i.c, &i.b); | ||
| 2380 | int gap_origin = -1; | ||
| 2381 | bool match_seen = false; | ||
| 2382 | |||
| 2383 | for (iter_t j = prev_match; j.x < strlen; j.x++) | ||
| 2384 | { | ||
| 2385 | iter_t jcopy = j; /* else advance function destroys it... */ | ||
| 2386 | int str_char | ||
| 2387 | = fetch_string_char_advance (str, &j.c, &j.b); | ||
| 2388 | |||
| 2389 | /* Check if characters match (case-insensitive if needed). */ | ||
| 2390 | bool cmatch; | ||
| 2391 | if (completion_ignore_case) | ||
| 2392 | cmatch = (downcase (pat_char) == downcase (str_char)); | ||
| 2393 | else | ||
| 2394 | cmatch = (pat_char == str_char); | ||
| 2395 | |||
| 2396 | /* Compute match cost M[i][j], i.e. replace its infinite | ||
| 2397 | value with something finite. */ | ||
| 2398 | if (cmatch) | ||
| 2399 | { | ||
| 2400 | if (!match_seen) | ||
| 2401 | { | ||
| 2402 | match_seen = true; | ||
| 2403 | prev_match = jcopy; | ||
| 2404 | } | ||
| 2405 | int pmatch_cost = MAT (M, i.x - 1, j.x - 1); | ||
| 2406 | int pgap_cost = MAT (D, i.x - 1, j.x - 1); | ||
| 2407 | |||
| 2408 | if (pmatch_cost <= pgap_cost) | ||
| 2409 | { | ||
| 2410 | /* Not only did the previous char also match (else | ||
| 2411 | pmatch_cost would have been infinite) but following | ||
| 2412 | it up with this match is best overall. */ | ||
| 2413 | MAT (M, i.x, j.x) = pmatch_cost; | ||
| 2414 | MAT (P, i.x, j.x) = j.x - 1; | ||
| 2415 | } | ||
| 2416 | else | ||
| 2417 | { | ||
| 2418 | /* Gapping is best, regardless of whether the previous | ||
| 2419 | char also matched. That is, it's better to arrive at | ||
| 2420 | this match from a gap. */ | ||
| 2421 | MAT (M, i.x, j.x) = pgap_cost; | ||
| 2422 | MAT (P, i.x, j.x) = prev_gap_origin; | ||
| 2423 | } | ||
| 2424 | } | ||
| 2425 | |||
| 2426 | /* Regardless of a match here, compute D[i,j], the best | ||
| 2427 | accumulated gapping cost at this point, considering whether | ||
| 2428 | it's more advantageous to open from a previous match on | ||
| 2429 | this row (a cost which may well be infinite if no such | ||
| 2430 | match ever existed) or extend a gap started sometime | ||
| 2431 | before. The next iteration will take this into account, | ||
| 2432 | and so will the next row when analyzing a possible match | ||
| 2433 | for the j+1-th string character. */ | ||
| 2434 | int open_cost = MAT (M, i.x, j.x - 1) + gap_open_cost; | ||
| 2435 | int extend_cost = MAT (D, i.x, j.x - 1) + gap_extend_cost; | ||
| 2436 | |||
| 2437 | if (open_cost < extend_cost) | ||
| 2438 | { | ||
| 2439 | MAT (D, i.x, j.x) = open_cost; | ||
| 2440 | gap_origin = j.x - 1; /* New gap. */ | ||
| 2441 | } | ||
| 2442 | else | ||
| 2443 | MAT (D, i.x, j.x) = extend_cost; /* Extend gap. */ | ||
| 2444 | } | ||
| 2445 | prev_gap_origin = gap_origin; | ||
| 2446 | } | ||
| 2447 | |||
| 2448 | /* Find best (lowest) cost in last row. */ | ||
| 2449 | int best_cost = pos_inf; | ||
| 2450 | int lastcol = -1; | ||
| 2451 | |||
| 2452 | for (int j = 0; j < strlen; j++) | ||
| 2453 | { | ||
| 2454 | int cost = MAT (M, patlen - 1, j); | ||
| 2455 | if (cost < best_cost) | ||
| 2456 | { | ||
| 2457 | best_cost = cost; | ||
| 2458 | lastcol = j; | ||
| 2459 | } | ||
| 2460 | } | ||
| 2461 | |||
| 2462 | if (lastcol < 0 || best_cost >= pos_inf) | ||
| 2463 | return Qnil; | ||
| 2464 | |||
| 2465 | /* Build match positions list by tracing back through P matrix. */ | ||
| 2466 | Lisp_Object matches = Qnil; | ||
| 2467 | for (int i = patlen - 1, l = lastcol; i >= 0 && l >= 0; i--) | ||
| 2468 | { | ||
| 2469 | matches = Fcons (make_fixnum (l), matches); | ||
| 2470 | l = MAT (P, i, l); | ||
| 2471 | } | ||
| 2472 | |||
| 2473 | return Fcons (make_fixnum (best_cost), matches); | ||
| 2474 | #undef MAT | ||
| 2475 | |||
| 2476 | } | ||
| 2477 | |||
| 2282 | void | 2478 | void |
| 2283 | syms_of_minibuf (void) | 2479 | syms_of_minibuf (void) |
| 2284 | { | 2480 | { |
| @@ -2541,6 +2737,7 @@ showing the *Completions* buffer, if any. */); | |||
| 2541 | defsubr (&Stest_completion); | 2737 | defsubr (&Stest_completion); |
| 2542 | defsubr (&Sassoc_string); | 2738 | defsubr (&Sassoc_string); |
| 2543 | defsubr (&Scompleting_read); | 2739 | defsubr (&Scompleting_read); |
| 2740 | defsubr (&Scompletion__flex_score_gotoh); | ||
| 2544 | DEFSYM (Qminibuffer_quit_recursive_edit, "minibuffer-quit-recursive-edit"); | 2741 | DEFSYM (Qminibuffer_quit_recursive_edit, "minibuffer-quit-recursive-edit"); |
| 2545 | DEFSYM (Qinternal_complete_buffer, "internal-complete-buffer"); | 2742 | DEFSYM (Qinternal_complete_buffer, "internal-complete-buffer"); |
| 2546 | DEFSYM (Qcompleting_read_function, "completing-read-function"); | 2743 | DEFSYM (Qcompleting_read_function, "completing-read-function"); |
diff --git a/test/lisp/minibuffer-tests.el b/test/lisp/minibuffer-tests.el index 79ffb1d3fc7..02df7661c75 100644 --- a/test/lisp/minibuffer-tests.el +++ b/test/lisp/minibuffer-tests.el | |||
| @@ -205,11 +205,6 @@ | |||
| 205 | '("some/alpha" "base/epsilon" "base/delta")) | 205 | '("some/alpha" "base/epsilon" "base/delta")) |
| 206 | `("epsilon" "delta" "beta" "alpha" "gamma" . 5)))) | 206 | `("epsilon" "delta" "beta" "alpha" "gamma" . 5)))) |
| 207 | 207 | ||
| 208 | (defun completion--pcm-score (comp) | ||
| 209 | "Get `completion-score' from COMP." | ||
| 210 | ;; FIXME, uses minibuffer.el implementation details | ||
| 211 | (completion--flex-score comp completion-pcm--regexp)) | ||
| 212 | |||
| 213 | (defun completion--pcm-first-difference-pos (comp) | 208 | (defun completion--pcm-first-difference-pos (comp) |
| 214 | "Get `completions-first-difference' from COMP." | 209 | "Get `completions-first-difference' from COMP." |
| 215 | (cl-loop for pos = (next-single-property-change 0 'face comp) | 210 | (cl-loop for pos = (next-single-property-change 0 'face comp) |
| @@ -244,20 +239,11 @@ | |||
| 244 | "barfoobar"))) | 239 | "barfoobar"))) |
| 245 | 240 | ||
| 246 | (ert-deftest completion-pcm-test-3 () | 241 | (ert-deftest completion-pcm-test-3 () |
| 247 | ;; Full match! | 242 | (should (car (completion-pcm-all-completions |
| 248 | (should (eql | 243 | "R" '("R" "hello") nil 1)))) |
| 249 | (completion--pcm-score | ||
| 250 | (car (completion-pcm-all-completions | ||
| 251 | "R" '("R" "hello") nil 1))) | ||
| 252 | 1.0))) | ||
| 253 | 244 | ||
| 254 | (ert-deftest completion-pcm-test-4 () | 245 | (ert-deftest completion-pcm-test-4 () |
| 255 | ;; One fourth of a match and no match due to point being at the end | 246 | ;; No match due to point being at the end |
| 256 | (should (eql | ||
| 257 | (completion--pcm-score | ||
| 258 | (car (completion-pcm-all-completions | ||
| 259 | "RO" '("RaOb") nil 1))) | ||
| 260 | (/ 1.0 4.0))) | ||
| 261 | (should (null | 247 | (should (null |
| 262 | (completion-pcm-all-completions | 248 | (completion-pcm-all-completions |
| 263 | "RO" '("RaOb") nil 2)))) | 249 | "RO" '("RaOb") nil 2)))) |
| @@ -420,24 +406,14 @@ | |||
| 420 | "a"))) | 406 | "a"))) |
| 421 | 407 | ||
| 422 | (ert-deftest completion-substring-test-1 () | 408 | (ert-deftest completion-substring-test-1 () |
| 423 | ;; One third of a match! | ||
| 424 | (should (equal | 409 | (should (equal |
| 425 | (car (completion-substring-all-completions | 410 | (car (completion-substring-all-completions |
| 426 | "foo" '("hello" "world" "barfoobar") nil 3)) | 411 | "foo" '("hello" "world" "barfoobar") nil 3)) |
| 427 | "barfoobar")) | 412 | "barfoobar"))) |
| 428 | (should (eql | ||
| 429 | (completion--pcm-score | ||
| 430 | (car (completion-substring-all-completions | ||
| 431 | "foo" '("hello" "world" "barfoobar") nil 3))) | ||
| 432 | (/ 1.0 3.0)))) | ||
| 433 | 413 | ||
| 434 | (ert-deftest completion-substring-test-2 () | 414 | (ert-deftest completion-substring-test-2 () |
| 435 | ;; Full match! | 415 | (should (car (completion-substring-all-completions |
| 436 | (should (eql | 416 | "R" '("R" "hello") nil 1)))) |
| 437 | (completion--pcm-score | ||
| 438 | (car (completion-substring-all-completions | ||
| 439 | "R" '("R" "hello") nil 1))) | ||
| 440 | 1.0))) | ||
| 441 | 417 | ||
| 442 | (ert-deftest completion-substring-test-3 () | 418 | (ert-deftest completion-substring-test-3 () |
| 443 | ;; Substring match | 419 | ;; Substring match |
| @@ -495,39 +471,70 @@ | |||
| 495 | (completion-substring-try-completion "b" '("ab" "ab") nil 0) | 471 | (completion-substring-try-completion "b" '("ab" "ab") nil 0) |
| 496 | '("ab" . 2)))) | 472 | '("ab" . 2)))) |
| 497 | 473 | ||
| 474 | (defun completion--sorted-flex-completions (pat list &optional point) | ||
| 475 | "Flex test helper" | ||
| 476 | (let ((all (completion-flex-all-completions pat list nil point))) | ||
| 477 | (setcdr (last all) nil) | ||
| 478 | (sort all | ||
| 479 | (lambda (a b) | ||
| 480 | (< (car (completion--flex-score pat a)) | ||
| 481 | (car (completion--flex-score pat b))))))) | ||
| 482 | |||
| 498 | (ert-deftest completion-flex-test-1 () | 483 | (ert-deftest completion-flex-test-1 () |
| 499 | ;; Fuzzy match | ||
| 500 | (should (equal | 484 | (should (equal |
| 501 | (car (completion-flex-all-completions | 485 | (car (completion-flex-all-completions |
| 502 | "foo" '("hello" "world" "fabrobazo") nil 3)) | 486 | "foo" '("hello" "world" "fabrobazo") nil 3)) |
| 503 | "fabrobazo"))) | 487 | "fabrobazo"))) |
| 504 | 488 | ||
| 505 | (ert-deftest completion-flex-test-2 () | 489 | (ert-deftest completion-flex-test-2 () |
| 506 | ;; Full match! | 490 | (should (car (completion--sorted-flex-completions |
| 507 | (should (eql | 491 | "R" '("R" "hello") 1)))) |
| 508 | (completion--pcm-score | ||
| 509 | (car (completion-flex-all-completions | ||
| 510 | "R" '("R" "hello") nil 1))) | ||
| 511 | 1.0))) | ||
| 512 | 492 | ||
| 513 | (ert-deftest completion-flex-test-3 () | 493 | (ert-deftest completion-flex-test-3 () |
| 514 | ;; Another fuzzy match, but more of a "substring" one | ||
| 515 | (should (equal | 494 | (should (equal |
| 516 | (car (completion-flex-all-completions | 495 | (car (completion--sorted-flex-completions |
| 517 | "custgroup" '("customize-group-other-window") nil 4)) | 496 | "custgroup" '("customize-group-other-window") 4)) |
| 518 | "customize-group-other-window")) | 497 | "customize-group-other-window")) |
| 519 | ;; `completions-first-difference' should be at the right place | 498 | ;; `completions-first-difference' should be at the right place |
| 520 | (should (equal | 499 | (should (equal |
| 521 | (completion--pcm-first-difference-pos | 500 | (completion--pcm-first-difference-pos |
| 522 | (car (completion-flex-all-completions | 501 | (car (completion--sorted-flex-completions |
| 523 | "custgroup" '("customize-group-other-window") nil 4))) | 502 | "custgroup" '("customize-group-other-window") 4))) |
| 524 | 4)) | 503 | 4)) |
| 525 | (should (equal | 504 | (should (equal |
| 526 | (completion--pcm-first-difference-pos | 505 | (completion--pcm-first-difference-pos |
| 527 | (car (completion-flex-all-completions | 506 | (car (completion--sorted-flex-completions |
| 528 | "custgroup" '("customize-group-other-window") nil 9))) | 507 | "custgroup" '("customize-group-other-window") 9))) |
| 529 | 15))) | 508 | 15))) |
| 530 | 509 | ||
| 510 | (ert-deftest completion-flex-test-non-ascii () | ||
| 511 | "Test flex completion with variable-width UTF-8 characters." | ||
| 512 | ;; Uses Japanese Kanji to test multi-byte character handling. | ||
| 513 | |||
| 514 | ;; 日本 = "nihon" (Japan), 東京 = "tōkyō" (Tokyo) | ||
| 515 | (should (equal | ||
| 516 | (car (completion--sorted-flex-completions | ||
| 517 | "日本" '("日本語" "日本" "中国") 2)) | ||
| 518 | "日本")) | ||
| 519 | |||
| 520 | ;; 図書館 = "toshokan" (library) | ||
| 521 | (should (equal | ||
| 522 | (car (completion--sorted-flex-completions | ||
| 523 | "tsk" '("図書館-toshokan" "task" "desk") 3)) | ||
| 524 | "task")) | ||
| 525 | |||
| 526 | ;; Mixed pattern (Kanji + ASCII) matching mixed string | ||
| 527 | ;; 学校 = "gakkō" (school) | ||
| 528 | (should (equal | ||
| 529 | (car (completion--sorted-flex-completions | ||
| 530 | "学s" '("学校-school" "school" "学生") 2)) | ||
| 531 | "学校-school")) | ||
| 532 | |||
| 533 | ;; Pattern "東" should match "東京" better than "関東" | ||
| 534 | (let ((results (completion--sorted-flex-completions | ||
| 535 | "東" '("東京" "関東") 1))) | ||
| 536 | (should (equal (car results) "東京")))) | ||
| 537 | |||
| 531 | 538 | ||
| 532 | (defmacro with-minibuffer-setup (completing-read &rest body) | 539 | (defmacro with-minibuffer-setup (completing-read &rest body) |
| 533 | (declare (indent 1) (debug t)) | 540 | (declare (indent 1) (debug t)) |