diff options
| author | João Távora | 2019-10-27 01:33:54 +0100 |
|---|---|---|
| committer | João Távora | 2019-10-27 01:46:56 +0100 |
| commit | 63fd71cd092de8daded15e32c268215b62c488b9 (patch) | |
| tree | 24c4105207ebceddd71570b241e0425306a2ce8d | |
| parent | f4ee7c83823b7a9369e79dafa70e4fd98633704a (diff) | |
| download | emacs-63fd71cd092de8daded15e32c268215b62c488b9.tar.gz emacs-63fd71cd092de8daded15e32c268215b62c488b9.zip | |
Improve scoring algorithm for flex-style completions
The previous algorithm had two problems: it considered non-matches in
the beginning and end of the string as matching "holes" and failed to
penalize larger holes, making flex-score-match-tightness only
effective in some corner cases.
The new formula, which is described in code and in pseudo-code in the
comments, fixes these problems.
As a result, by default, C-h f flex now correctly bubbles up
"company-search-flex-regexp" to the top, in front of "file-exists-p".
With a flex-score-match-tightness smaller than 1.0, the situation is
reversed.
* lisp/minibuffer.el (flex-score-match-tightness): Adjust default
value. Improve docstring example.
(completion-pcm--hilit-commonality): Improve example. Remove unused
variable. Improve algorithm.
| -rw-r--r-- | lisp/minibuffer.el | 60 |
1 files changed, 38 insertions, 22 deletions
diff --git a/lisp/minibuffer.el b/lisp/minibuffer.el index 542e672400a..c92a91e76ce 100644 --- a/lisp/minibuffer.el +++ b/lisp/minibuffer.el | |||
| @@ -3060,16 +3060,18 @@ PATTERN is as returned by `completion-pcm--string->pattern'." | |||
| 3060 | (when (string-match-p regex c) (push c poss))) | 3060 | (when (string-match-p regex c) (push c poss))) |
| 3061 | (nreverse poss)))))) | 3061 | (nreverse poss)))))) |
| 3062 | 3062 | ||
| 3063 | (defvar flex-score-match-tightness 100 | 3063 | (defvar flex-score-match-tightness 3 |
| 3064 | "Controls how the `flex' completion style scores its matches. | 3064 | "Controls how the `flex' completion style scores its matches. |
| 3065 | 3065 | ||
| 3066 | Value is a positive number. Values smaller than one make the | 3066 | Value is a positive number. A number smaller than 1 makes the |
| 3067 | scoring formula value matches scattered along the string, while | 3067 | scoring formula reward matches scattered along the string, while |
| 3068 | values greater than one make the formula value tighter matches. | 3068 | a number greater than one make the formula reward matches that |
| 3069 | I.e \"foo\" matches both strings \"barbazfoo\" and \"fabrobazo\", | 3069 | are clumped together. I.e \"foo\" matches both strings |
| 3070 | which are of equal length, but only a value greater than one will | 3070 | \"fbarbazoo\" and \"fabrobazo\", which are of equal length, but |
| 3071 | score the former (which has one \"hole\") higher than the | 3071 | only a value greater than one will score the former (which has |
| 3072 | latter (which has two).") | 3072 | one large \"hole\" and a clumped-together \"oo\" match) higher |
| 3073 | than the latter (which has two \"holes\" and three | ||
| 3074 | one-letter-long matches).") | ||
| 3073 | 3075 | ||
| 3074 | (defun completion-pcm--hilit-commonality (pattern completions) | 3076 | (defun completion-pcm--hilit-commonality (pattern completions) |
| 3075 | (when completions | 3077 | (when completions |
| @@ -3086,27 +3088,39 @@ latter (which has two).") | |||
| 3086 | (end (pop md)) | 3088 | (end (pop md)) |
| 3087 | (len (length str)) | 3089 | (len (length str)) |
| 3088 | ;; To understand how this works, consider these bad | 3090 | ;; To understand how this works, consider these bad |
| 3089 | ;; ascii(tm) diagrams showing how the pattern \"foo\" | 3091 | ;; ascii(tm) diagrams showing how the pattern "foo" |
| 3090 | ;; flex-matches \"fabrobazo" and | 3092 | ;; flex-matches "fabrobazo", "fbarbazoo" and |
| 3091 | ;; \"barfoobaz\": | 3093 | ;; "barfoobaz": |
| 3092 | 3094 | ||
| 3093 | ;; f abr o baz o | 3095 | ;; f abr o baz o |
| 3094 | ;; + --- + --- + | 3096 | ;; + --- + --- + |
| 3095 | 3097 | ||
| 3098 | ;; f barbaz oo | ||
| 3099 | ;; + ------ ++ | ||
| 3100 | |||
| 3096 | ;; bar foo baz | 3101 | ;; bar foo baz |
| 3097 | ;; --- +++ --- | 3102 | ;; +++ |
| 3098 | 3103 | ||
| 3099 | ;; Where + indicates parts where the pattern matched, | 3104 | ;; "+" indicates parts where the pattern matched. A |
| 3100 | ;; - where it didn't match. The score is a number | 3105 | ;; "hole" in the middle of the string is indicated by |
| 3106 | ;; "-". Note that there are no "holes" near the edges | ||
| 3107 | ;; of the string. The completion score is a number | ||
| 3101 | ;; bound by ]0..1]: the higher the better and only a | 3108 | ;; bound by ]0..1]: the higher the better and only a |
| 3102 | ;; perfect match (pattern equals string) will have | 3109 | ;; perfect match (pattern equals string) will have |
| 3103 | ;; score 1. The formula takes the form of a quotient. | 3110 | ;; score 1. The formula takes the form of a quotient. |
| 3104 | ;; For the numerator, we use the number of +, i.e. the | 3111 | ;; For the numerator, we use the number of +, i.e. the |
| 3105 | ;; length of the pattern. For the denominator, it | 3112 | ;; length of the pattern. For the denominator, it |
| 3106 | ;; sums (1+ (/ (grouplen - 1) | 3113 | ;; first computes |
| 3107 | ;; flex-score-match-tightness)) across all groups of | 3114 | ;; |
| 3108 | ;; -, sums one to that total, and then multiples by | 3115 | ;; hole_i_contrib = 1 + (Li-1)^(1/tightness) |
| 3109 | ;; the length of the string. | 3116 | ;; |
| 3117 | ;; , for each hole "i" of length "Li", where tightness | ||
| 3118 | ;; is given by `flex-score-match-tightness'. The | ||
| 3119 | ;; final value for the denominator is then given by: | ||
| 3120 | ;; | ||
| 3121 | ;; (SUM_across_i(hole_i_contrib) + 1) * len | ||
| 3122 | ;; | ||
| 3123 | ;; , where "len" is the string's length. | ||
| 3110 | (score-numerator 0) | 3124 | (score-numerator 0) |
| 3111 | (score-denominator 0) | 3125 | (score-denominator 0) |
| 3112 | (last-b 0) | 3126 | (last-b 0) |
| @@ -3115,13 +3129,15 @@ latter (which has two).") | |||
| 3115 | "Update score variables given match range (A B)." | 3129 | "Update score variables given match range (A B)." |
| 3116 | (setq | 3130 | (setq |
| 3117 | score-numerator (+ score-numerator (- b a))) | 3131 | score-numerator (+ score-numerator (- b a))) |
| 3118 | (unless (= a last-b) | 3132 | (unless (or (= a last-b) |
| 3133 | (zerop last-b) | ||
| 3134 | (= a (length str))) | ||
| 3119 | (setq | 3135 | (setq |
| 3120 | score-denominator (+ score-denominator | 3136 | score-denominator (+ score-denominator |
| 3121 | 1 | 3137 | 1 |
| 3122 | (/ (- a last-b 1) | 3138 | (expt (- a last-b 1) |
| 3123 | flex-score-match-tightness | 3139 | (/ 1.0 |
| 3124 | 1.0)))) | 3140 | flex-score-match-tightness))))) |
| 3125 | (setq | 3141 | (setq |
| 3126 | last-b b)))) | 3142 | last-b b)))) |
| 3127 | (funcall update-score start start) | 3143 | (funcall update-score start start) |