diff options
| author | Noam Postavsky | 2017-07-01 22:39:16 -0400 |
|---|---|---|
| committer | Noam Postavsky | 2017-08-21 20:52:25 -0400 |
| commit | 9d7973530f912c6001445ba9b83b7893b466aee8 (patch) | |
| tree | 4581c401602c8def0331858ff9ef16122b00229d /lisp | |
| parent | ba0bb332dd841274208f71e0739e0c5e5d231d7a (diff) | |
| download | emacs-9d7973530f912c6001445ba9b83b7893b466aee8.tar.gz emacs-9d7973530f912c6001445ba9b83b7893b466aee8.zip | |
Optimize skkdic conversion (Bug#28043)
The primary speedup comes from the optimizing lookup-nested-alist and
set-nested-alist for the case where the key is a string. This brings
the time down to less than half the original.
* lisp/international/mule-util.el (lookup-nested-alist)
(set-nested-alist): Use `assq' instead of `assoc' when KEYSEQ is a
string.
* lisp/international/ja-dic-cnv.el (skkdic-collect-okuri-nasi)
(skkdic-convert-okuri-nasi): Use progress-reporter functions instead
of calculating ratio of work done inline.
(skkdic-reduced-candidates): Call `char-category-set' on the first
character of the string directly, instead of using a regexp for the
character category.
(skkdic--japanese-category-set): New constant.
(skkdic-collect-okuri-nasi): Just set
`skkdic-okuri-nasi-entries-count' at once at the end rather than
updating it throughout the loop.
(skkdic-convert-postfix skkdic-convert-prefix)
skkdic-get-candidate-list, skkdic-collect-okuri-nasi)
(skkdic-extract-conversion-data): Use `match-string-no-properties'
instead of `match-string'.
Diffstat (limited to 'lisp')
| -rw-r--r-- | lisp/international/ja-dic-cnv.el | 61 | ||||
| -rw-r--r-- | lisp/international/mule-util.el | 77 |
2 files changed, 83 insertions, 55 deletions
diff --git a/lisp/international/ja-dic-cnv.el b/lisp/international/ja-dic-cnv.el index e80b1b28810..63eede093d5 100644 --- a/lisp/international/ja-dic-cnv.el +++ b/lisp/international/ja-dic-cnv.el | |||
| @@ -125,10 +125,10 @@ | |||
| 125 | 125 | ||
| 126 | ;; Search postfix entries. | 126 | ;; Search postfix entries. |
| 127 | (while (re-search-forward "^[#<>?]\\(\\(\\cH\\|$B!<(B\\)+\\) " nil t) | 127 | (while (re-search-forward "^[#<>?]\\(\\(\\cH\\|$B!<(B\\)+\\) " nil t) |
| 128 | (let ((kana (match-string 1)) | 128 | (let ((kana (match-string-no-properties 1)) |
| 129 | str candidates) | 129 | str candidates) |
| 130 | (while (looking-at "/[#0-9 ]*\\([^/\n]*\\)/") | 130 | (while (looking-at "/[#0-9 ]*\\([^/\n]*\\)/") |
| 131 | (setq str (match-string 1)) | 131 | (setq str (match-string-no-properties 1)) |
| 132 | (if (not (member str candidates)) | 132 | (if (not (member str candidates)) |
| 133 | (setq candidates (cons str candidates))) | 133 | (setq candidates (cons str candidates))) |
| 134 | (goto-char (match-end 1))) | 134 | (goto-char (match-end 1))) |
| @@ -158,10 +158,10 @@ | |||
| 158 | "(skkdic-set-prefix\n")) | 158 | "(skkdic-set-prefix\n")) |
| 159 | (save-excursion | 159 | (save-excursion |
| 160 | (while (re-search-forward "^\\(\\(\\cH\\|$B!<(B\\)+\\)[<>?] " nil t) | 160 | (while (re-search-forward "^\\(\\(\\cH\\|$B!<(B\\)+\\)[<>?] " nil t) |
| 161 | (let ((kana (match-string 1)) | 161 | (let ((kana (match-string-no-properties 1)) |
| 162 | str candidates) | 162 | str candidates) |
| 163 | (while (looking-at "/\\([^/\n]+\\)/") | 163 | (while (looking-at "/\\([^/\n]+\\)/") |
| 164 | (setq str (match-string 1)) | 164 | (setq str (match-string-no-properties 1)) |
| 165 | (if (not (member str candidates)) | 165 | (if (not (member str candidates)) |
| 166 | (setq candidates (cons str candidates))) | 166 | (setq candidates (cons str candidates))) |
| 167 | (goto-char (match-end 1))) | 167 | (goto-char (match-end 1))) |
| @@ -180,8 +180,8 @@ | |||
| 180 | (let (candidates) | 180 | (let (candidates) |
| 181 | (goto-char from) | 181 | (goto-char from) |
| 182 | (while (re-search-forward "/[^/ \n]+" to t) | 182 | (while (re-search-forward "/[^/ \n]+" to t) |
| 183 | (setq candidates (cons (buffer-substring (1+ (match-beginning 0)) | 183 | (setq candidates (cons (buffer-substring-no-properties |
| 184 | (match-end 0)) | 184 | (1+ (match-beginning 0)) (match-end 0)) |
| 185 | candidates))) | 185 | candidates))) |
| 186 | candidates)) | 186 | candidates)) |
| 187 | 187 | ||
| @@ -251,12 +251,16 @@ | |||
| 251 | ;; Return list of candidates which excludes some from CANDIDATES. | 251 | ;; Return list of candidates which excludes some from CANDIDATES. |
| 252 | ;; Excluded candidates can be derived from another entry. | 252 | ;; Excluded candidates can be derived from another entry. |
| 253 | 253 | ||
| 254 | (defconst skkdic--japanese-category-set (make-category-set "j")) | ||
| 255 | |||
| 254 | (defun skkdic-reduced-candidates (skkbuf kana candidates) | 256 | (defun skkdic-reduced-candidates (skkbuf kana candidates) |
| 255 | (let (elt l) | 257 | (let (elt l) |
| 256 | (while candidates | 258 | (while candidates |
| 257 | (setq elt (car candidates)) | 259 | (setq elt (car candidates)) |
| 258 | (if (or (= (length elt) 1) | 260 | (if (or (= (length elt) 1) |
| 259 | (and (string-match "^\\cj" elt) | 261 | (and (bool-vector-subsetp |
| 262 | skkdic--japanese-category-set | ||
| 263 | (char-category-set (aref elt 0))) | ||
| 260 | (not (skkdic-breakup-string skkbuf kana elt 0 (length elt) | 264 | (not (skkdic-breakup-string skkbuf kana elt 0 (length elt) |
| 261 | 'first)))) | 265 | 'first)))) |
| 262 | (setq l (cons elt l))) | 266 | (setq l (cons elt l))) |
| @@ -267,24 +271,18 @@ | |||
| 267 | (defvar skkdic-okuri-nasi-entries-count 0) | 271 | (defvar skkdic-okuri-nasi-entries-count 0) |
| 268 | 272 | ||
| 269 | (defun skkdic-collect-okuri-nasi () | 273 | (defun skkdic-collect-okuri-nasi () |
| 270 | (message "Collecting OKURI-NASI entries ...") | ||
| 271 | (save-excursion | 274 | (save-excursion |
| 272 | (let ((prev-ratio 0) | 275 | (let ((progress (make-progress-reporter "Collecting OKURI-NASI entries" |
| 273 | ratio) | 276 | (point) (point-max) |
| 277 | nil 10))) | ||
| 274 | (while (re-search-forward "^\\(\\(\\cH\\|$B!<(B\\)+\\) \\(/\\cj.*\\)/$" | 278 | (while (re-search-forward "^\\(\\(\\cH\\|$B!<(B\\)+\\) \\(/\\cj.*\\)/$" |
| 275 | nil t) | 279 | nil t) |
| 276 | (let ((kana (match-string 1)) | 280 | (let ((kana (match-string-no-properties 1)) |
| 277 | (candidates (skkdic-get-candidate-list (match-beginning 3) | 281 | (candidates (skkdic-get-candidate-list (match-beginning 3) |
| 278 | (match-end 3)))) | 282 | (match-end 3)))) |
| 279 | (setq skkdic-okuri-nasi-entries | 283 | (setq skkdic-okuri-nasi-entries |
| 280 | (cons (cons kana candidates) skkdic-okuri-nasi-entries) | 284 | (cons (cons kana candidates) skkdic-okuri-nasi-entries)) |
| 281 | skkdic-okuri-nasi-entries-count | 285 | (progress-reporter-update progress (point)) |
| 282 | (1+ skkdic-okuri-nasi-entries-count)) | ||
| 283 | (setq ratio (floor (* (point) 100.0) (point-max))) | ||
| 284 | (if (/= (/ prev-ratio 10) (/ ratio 10)) | ||
| 285 | (progn | ||
| 286 | (message "collected %2d%% ..." ratio) | ||
| 287 | (setq prev-ratio ratio))) | ||
| 288 | (while candidates | 286 | (while candidates |
| 289 | (let ((entry (lookup-nested-alist (car candidates) | 287 | (let ((entry (lookup-nested-alist (car candidates) |
| 290 | skkdic-word-list nil nil t))) | 288 | skkdic-word-list nil nil t))) |
| @@ -292,26 +290,24 @@ | |||
| 292 | (setcar entry (cons kana (car entry))) | 290 | (setcar entry (cons kana (car entry))) |
| 293 | (set-nested-alist (car candidates) (list kana) | 291 | (set-nested-alist (car candidates) (list kana) |
| 294 | skkdic-word-list))) | 292 | skkdic-word-list))) |
| 295 | (setq candidates (cdr candidates)))))))) | 293 | (setq candidates (cdr candidates))))) |
| 294 | (setq skkdic-okuri-nasi-entries-count (length skkdic-okuri-nasi-entries)) | ||
| 295 | (progress-reporter-done progress)))) | ||
| 296 | 296 | ||
| 297 | (defun skkdic-convert-okuri-nasi (skkbuf buf) | 297 | (defun skkdic-convert-okuri-nasi (skkbuf buf) |
| 298 | (message "Processing OKURI-NASI entries ...") | ||
| 299 | (with-current-buffer buf | 298 | (with-current-buffer buf |
| 300 | (insert ";; Setting okuri-nasi entries.\n" | 299 | (insert ";; Setting okuri-nasi entries.\n" |
| 301 | "(skkdic-set-okuri-nasi\n") | 300 | "(skkdic-set-okuri-nasi\n") |
| 302 | (let ((l (nreverse skkdic-okuri-nasi-entries)) | 301 | (let ((l (nreverse skkdic-okuri-nasi-entries)) |
| 303 | (count 0) | 302 | (progress (make-progress-reporter "Processing OKURI-NASI entries" |
| 304 | (prev-ratio 0) | 303 | 0 skkdic-okuri-nasi-entries-count |
| 305 | ratio) | 304 | nil 10)) |
| 305 | (count 0)) | ||
| 306 | (while l | 306 | (while l |
| 307 | (let ((kana (car (car l))) | 307 | (let ((kana (car (car l))) |
| 308 | (candidates (cdr (car l)))) | 308 | (candidates (cdr (car l)))) |
| 309 | (setq ratio (floor (* count 100.0) skkdic-okuri-nasi-entries-count) | 309 | (setq count (1+ count)) |
| 310 | count (1+ count)) | 310 | (progress-reporter-update progress count) |
| 311 | (if (/= (/ prev-ratio 10) (/ ratio 10)) | ||
| 312 | (progn | ||
| 313 | (message "processed %2d%% ..." ratio) | ||
| 314 | (setq prev-ratio ratio))) | ||
| 315 | (if (setq candidates | 311 | (if (setq candidates |
| 316 | (skkdic-reduced-candidates skkbuf kana candidates)) | 312 | (skkdic-reduced-candidates skkbuf kana candidates)) |
| 317 | (progn | 313 | (progn |
| @@ -320,7 +316,8 @@ | |||
| 320 | (insert " " (car candidates)) | 316 | (insert " " (car candidates)) |
| 321 | (setq candidates (cdr candidates))) | 317 | (setq candidates (cdr candidates))) |
| 322 | (insert "\"\n")))) | 318 | (insert "\"\n")))) |
| 323 | (setq l (cdr l)))) | 319 | (setq l (cdr l))) |
| 320 | (progress-reporter-done progress)) | ||
| 324 | (insert ")\n\n"))) | 321 | (insert ")\n\n"))) |
| 325 | 322 | ||
| 326 | (defun skkdic-convert (filename &optional dirname) | 323 | (defun skkdic-convert (filename &optional dirname) |
| @@ -467,7 +464,7 @@ To get complete usage, invoke: | |||
| 467 | (i (match-end 0)) | 464 | (i (match-end 0)) |
| 468 | candidates) | 465 | candidates) |
| 469 | (while (string-match "[^ ]+" entry i) | 466 | (while (string-match "[^ ]+" entry i) |
| 470 | (setq candidates (cons (match-string 0 entry) candidates)) | 467 | (setq candidates (cons (match-string-no-properties 0 entry) candidates)) |
| 471 | (setq i (match-end 0))) | 468 | (setq i (match-end 0))) |
| 472 | (cons (skkdic-get-kana-compact-codes kana) candidates))) | 469 | (cons (skkdic-get-kana-compact-codes kana) candidates))) |
| 473 | 470 | ||
diff --git a/lisp/international/mule-util.el b/lisp/international/mule-util.el index e34b01c3064..257f8854c38 100644 --- a/lisp/international/mule-util.el +++ b/lisp/international/mule-util.el | |||
| @@ -143,20 +143,43 @@ longer than KEYSEQ. | |||
| 143 | See the documentation of `nested-alist-p' for more detail." | 143 | See the documentation of `nested-alist-p' for more detail." |
| 144 | (or (nested-alist-p alist) | 144 | (or (nested-alist-p alist) |
| 145 | (error "Invalid argument %s" alist)) | 145 | (error "Invalid argument %s" alist)) |
| 146 | (let ((islist (listp keyseq)) | 146 | (let ((len (or len (length keyseq))) |
| 147 | (len (or len (length keyseq))) | 147 | (i 0)) |
| 148 | (i 0) | 148 | (cond |
| 149 | key-elt slot) | 149 | ((stringp keyseq) ; We can use `assq' for characters. |
| 150 | (while (< i len) | 150 | (while (< i len) |
| 151 | (if (null (nested-alist-p alist)) | 151 | (if (null (nested-alist-p alist)) |
| 152 | (error "Keyseq %s is too long for this nested alist" keyseq)) | 152 | (error "Keyseq %s is too long for this nested alist" keyseq)) |
| 153 | (setq key-elt (if islist (nth i keyseq) (aref keyseq i))) | 153 | (let* ((key-elt (aref keyseq i)) |
| 154 | (setq slot (assoc key-elt (cdr alist))) | 154 | (slot (assq key-elt (cdr alist)))) |
| 155 | (unless slot | 155 | (unless slot |
| 156 | (setq slot (cons key-elt (list t))) | 156 | (setq slot (list key-elt t)) |
| 157 | (setcdr alist (cons slot (cdr alist)))) | 157 | (push slot (cdr alist))) |
| 158 | (setq alist (cdr slot)) | 158 | (setq alist (cdr slot))) |
| 159 | (setq i (1+ i))) | 159 | (setq i (1+ i)))) |
| 160 | ((arrayp keyseq) | ||
| 161 | (while (< i len) | ||
| 162 | (if (null (nested-alist-p alist)) | ||
| 163 | (error "Keyseq %s is too long for this nested alist" keyseq)) | ||
| 164 | (let* ((key-elt (aref keyseq i)) | ||
| 165 | (slot (assoc key-elt (cdr alist)))) | ||
| 166 | (unless slot | ||
| 167 | (setq slot (list key-elt t)) | ||
| 168 | (push slot (cdr alist))) | ||
| 169 | (setq alist (cdr slot))) | ||
| 170 | (setq i (1+ i)))) | ||
| 171 | ((listp keyseq) | ||
| 172 | (while (< i len) | ||
| 173 | (if (null (nested-alist-p alist)) | ||
| 174 | (error "Keyseq %s is too long for this nested alist" keyseq)) | ||
| 175 | (let* ((key-elt (pop keyseq)) | ||
| 176 | (slot (assoc key-elt (cdr alist)))) | ||
| 177 | (unless slot | ||
| 178 | (setq slot (list key-elt t)) | ||
| 179 | (push slot (cdr alist))) | ||
| 180 | (setq alist (cdr slot))) | ||
| 181 | (setq i (1+ i)))) | ||
| 182 | (t (signal 'wrong-type-argument (list keyseq)))) | ||
| 160 | (setcar alist entry) | 183 | (setcar alist entry) |
| 161 | (if branches | 184 | (if branches |
| 162 | (setcdr (last alist) branches)))) | 185 | (setcdr (last alist) branches)))) |
| @@ -179,15 +202,23 @@ Optional 5th argument NIL-FOR-TOO-LONG non-nil means return nil | |||
| 179 | (setq len (length keyseq))) | 202 | (setq len (length keyseq))) |
| 180 | (let ((i (or start 0))) | 203 | (let ((i (or start 0))) |
| 181 | (if (catch 'lookup-nested-alist-tag | 204 | (if (catch 'lookup-nested-alist-tag |
| 182 | (if (listp keyseq) | 205 | (cond ((stringp keyseq) ; We can use `assq' for characters. |
| 183 | (while (< i len) | 206 | (while (< i len) |
| 184 | (if (setq alist (cdr (assoc (nth i keyseq) (cdr alist)))) | 207 | (if (setq alist (cdr (assq (aref keyseq i) (cdr alist)))) |
| 185 | (setq i (1+ i)) | 208 | (setq i (1+ i)) |
| 186 | (throw 'lookup-nested-alist-tag t)))) | 209 | (throw 'lookup-nested-alist-tag t)))) |
| 187 | (while (< i len) | 210 | ((arrayp keyseq) |
| 188 | (if (setq alist (cdr (assoc (aref keyseq i) (cdr alist)))) | 211 | (while (< i len) |
| 189 | (setq i (1+ i)) | 212 | (if (setq alist (cdr (assoc (aref keyseq i) (cdr alist)))) |
| 190 | (throw 'lookup-nested-alist-tag t)))) | 213 | (setq i (1+ i)) |
| 214 | (throw 'lookup-nested-alist-tag t)))) | ||
| 215 | ((listp keyseq) | ||
| 216 | (setq keyseq (nthcdr i keyseq)) | ||
| 217 | (while (< i len) | ||
| 218 | (if (setq alist (cdr (assoc (pop keyseq) (cdr alist)))) | ||
| 219 | (setq i (1+ i)) | ||
| 220 | (throw 'lookup-nested-alist-tag t)))) | ||
| 221 | (t (signal 'wrong-type-argument (list keyseq))))) | ||
| 191 | ;; KEYSEQ is too long. | 222 | ;; KEYSEQ is too long. |
| 192 | (if nil-for-too-long nil i) | 223 | (if nil-for-too-long nil i) |
| 193 | alist))) | 224 | alist))) |