aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoão Távora2019-10-27 01:33:54 +0100
committerJoão Távora2019-10-27 01:46:56 +0100
commit63fd71cd092de8daded15e32c268215b62c488b9 (patch)
tree24c4105207ebceddd71570b241e0425306a2ce8d
parentf4ee7c83823b7a9369e79dafa70e4fd98633704a (diff)
downloademacs-63fd71cd092de8daded15e32c268215b62c488b9.tar.gz
emacs-63fd71cd092de8daded15e32c268215b62c488b9.zip
Improve scoring algorithm for flex-style completions
The previous algorithm had two problems: it considered non-matches in the beginning and end of the string as matching "holes" and failed to penalize larger holes, making flex-score-match-tightness only effective in some corner cases. The new formula, which is described in code and in pseudo-code in the comments, fixes these problems. As a result, by default, C-h f flex now correctly bubbles up "company-search-flex-regexp" to the top, in front of "file-exists-p". With a flex-score-match-tightness smaller than 1.0, the situation is reversed. * lisp/minibuffer.el (flex-score-match-tightness): Adjust default value. Improve docstring example. (completion-pcm--hilit-commonality): Improve example. Remove unused variable. Improve algorithm.
-rw-r--r--lisp/minibuffer.el60
1 files changed, 38 insertions, 22 deletions
diff --git a/lisp/minibuffer.el b/lisp/minibuffer.el
index 542e672400a..c92a91e76ce 100644
--- a/lisp/minibuffer.el
+++ b/lisp/minibuffer.el
@@ -3060,16 +3060,18 @@ PATTERN is as returned by `completion-pcm--string->pattern'."
3060 (when (string-match-p regex c) (push c poss))) 3060 (when (string-match-p regex c) (push c poss)))
3061 (nreverse poss)))))) 3061 (nreverse poss))))))
3062 3062
3063(defvar flex-score-match-tightness 100 3063(defvar flex-score-match-tightness 3
3064 "Controls how the `flex' completion style scores its matches. 3064 "Controls how the `flex' completion style scores its matches.
3065 3065
3066Value is a positive number. Values smaller than one make the 3066Value is a positive number. A number smaller than 1 makes the
3067scoring formula value matches scattered along the string, while 3067scoring formula reward matches scattered along the string, while
3068values greater than one make the formula value tighter matches. 3068a number greater than one make the formula reward matches that
3069I.e \"foo\" matches both strings \"barbazfoo\" and \"fabrobazo\", 3069are clumped together. I.e \"foo\" matches both strings
3070which are of equal length, but only a value greater than one will 3070\"fbarbazoo\" and \"fabrobazo\", which are of equal length, but
3071score the former (which has one \"hole\") higher than the 3071only a value greater than one will score the former (which has
3072latter (which has two).") 3072one large \"hole\" and a clumped-together \"oo\" match) higher
3073than the latter (which has two \"holes\" and three
3074one-letter-long matches).")
3073 3075
3074(defun completion-pcm--hilit-commonality (pattern completions) 3076(defun completion-pcm--hilit-commonality (pattern completions)
3075 (when completions 3077 (when completions
@@ -3086,27 +3088,39 @@ latter (which has two).")
3086 (end (pop md)) 3088 (end (pop md))
3087 (len (length str)) 3089 (len (length str))
3088 ;; To understand how this works, consider these bad 3090 ;; To understand how this works, consider these bad
3089 ;; ascii(tm) diagrams showing how the pattern \"foo\" 3091 ;; ascii(tm) diagrams showing how the pattern "foo"
3090 ;; flex-matches \"fabrobazo" and 3092 ;; flex-matches "fabrobazo", "fbarbazoo" and
3091 ;; \"barfoobaz\": 3093 ;; "barfoobaz":
3092 3094
3093 ;; f abr o baz o 3095 ;; f abr o baz o
3094 ;; + --- + --- + 3096 ;; + --- + --- +
3095 3097
3098 ;; f barbaz oo
3099 ;; + ------ ++
3100
3096 ;; bar foo baz 3101 ;; bar foo baz
3097 ;; --- +++ --- 3102 ;; +++
3098 3103
3099 ;; Where + indicates parts where the pattern matched, 3104 ;; "+" indicates parts where the pattern matched. A
3100 ;; - where it didn't match. The score is a number 3105 ;; "hole" in the middle of the string is indicated by
3106 ;; "-". Note that there are no "holes" near the edges
3107 ;; of the string. The completion score is a number
3101 ;; bound by ]0..1]: the higher the better and only a 3108 ;; bound by ]0..1]: the higher the better and only a
3102 ;; perfect match (pattern equals string) will have 3109 ;; perfect match (pattern equals string) will have
3103 ;; score 1. The formula takes the form of a quotient. 3110 ;; score 1. The formula takes the form of a quotient.
3104 ;; For the numerator, we use the number of +, i.e. the 3111 ;; For the numerator, we use the number of +, i.e. the
3105 ;; length of the pattern. For the denominator, it 3112 ;; length of the pattern. For the denominator, it
3106 ;; sums (1+ (/ (grouplen - 1) 3113 ;; first computes
3107 ;; flex-score-match-tightness)) across all groups of 3114 ;;
3108 ;; -, sums one to that total, and then multiples by 3115 ;; hole_i_contrib = 1 + (Li-1)^(1/tightness)
3109 ;; the length of the string. 3116 ;;
3117 ;; , for each hole "i" of length "Li", where tightness
3118 ;; is given by `flex-score-match-tightness'. The
3119 ;; final value for the denominator is then given by:
3120 ;;
3121 ;; (SUM_across_i(hole_i_contrib) + 1) * len
3122 ;;
3123 ;; , where "len" is the string's length.
3110 (score-numerator 0) 3124 (score-numerator 0)
3111 (score-denominator 0) 3125 (score-denominator 0)
3112 (last-b 0) 3126 (last-b 0)
@@ -3115,13 +3129,15 @@ latter (which has two).")
3115 "Update score variables given match range (A B)." 3129 "Update score variables given match range (A B)."
3116 (setq 3130 (setq
3117 score-numerator (+ score-numerator (- b a))) 3131 score-numerator (+ score-numerator (- b a)))
3118 (unless (= a last-b) 3132 (unless (or (= a last-b)
3133 (zerop last-b)
3134 (= a (length str)))
3119 (setq 3135 (setq
3120 score-denominator (+ score-denominator 3136 score-denominator (+ score-denominator
3121 1 3137 1
3122 (/ (- a last-b 1) 3138 (expt (- a last-b 1)
3123 flex-score-match-tightness 3139 (/ 1.0
3124 1.0)))) 3140 flex-score-match-tightness)))))
3125 (setq 3141 (setq
3126 last-b b)))) 3142 last-b b))))
3127 (funcall update-score start start) 3143 (funcall update-score start start)