aboutsummaryrefslogtreecommitdiffstats
path: root/lisp
diff options
context:
space:
mode:
authorNoam Postavsky2017-07-01 22:39:16 -0400
committerNoam Postavsky2017-08-21 20:52:25 -0400
commit9d7973530f912c6001445ba9b83b7893b466aee8 (patch)
tree4581c401602c8def0331858ff9ef16122b00229d /lisp
parentba0bb332dd841274208f71e0739e0c5e5d231d7a (diff)
downloademacs-9d7973530f912c6001445ba9b83b7893b466aee8.tar.gz
emacs-9d7973530f912c6001445ba9b83b7893b466aee8.zip
Optimize skkdic conversion (Bug#28043)
The primary speedup comes from the optimizing lookup-nested-alist and set-nested-alist for the case where the key is a string. This brings the time down to less than half the original. * lisp/international/mule-util.el (lookup-nested-alist) (set-nested-alist): Use `assq' instead of `assoc' when KEYSEQ is a string. * lisp/international/ja-dic-cnv.el (skkdic-collect-okuri-nasi) (skkdic-convert-okuri-nasi): Use progress-reporter functions instead of calculating ratio of work done inline. (skkdic-reduced-candidates): Call `char-category-set' on the first character of the string directly, instead of using a regexp for the character category. (skkdic--japanese-category-set): New constant. (skkdic-collect-okuri-nasi): Just set `skkdic-okuri-nasi-entries-count' at once at the end rather than updating it throughout the loop. (skkdic-convert-postfix skkdic-convert-prefix) skkdic-get-candidate-list, skkdic-collect-okuri-nasi) (skkdic-extract-conversion-data): Use `match-string-no-properties' instead of `match-string'.
Diffstat (limited to 'lisp')
-rw-r--r--lisp/international/ja-dic-cnv.el61
-rw-r--r--lisp/international/mule-util.el77
2 files changed, 83 insertions, 55 deletions
diff --git a/lisp/international/ja-dic-cnv.el b/lisp/international/ja-dic-cnv.el
index e80b1b28810..63eede093d5 100644
--- a/lisp/international/ja-dic-cnv.el
+++ b/lisp/international/ja-dic-cnv.el
@@ -125,10 +125,10 @@
125 125
126 ;; Search postfix entries. 126 ;; Search postfix entries.
127 (while (re-search-forward "^[#<>?]\\(\\(\\cH\\|$B!<(B\\)+\\) " nil t) 127 (while (re-search-forward "^[#<>?]\\(\\(\\cH\\|$B!<(B\\)+\\) " nil t)
128 (let ((kana (match-string 1)) 128 (let ((kana (match-string-no-properties 1))
129 str candidates) 129 str candidates)
130 (while (looking-at "/[#0-9 ]*\\([^/\n]*\\)/") 130 (while (looking-at "/[#0-9 ]*\\([^/\n]*\\)/")
131 (setq str (match-string 1)) 131 (setq str (match-string-no-properties 1))
132 (if (not (member str candidates)) 132 (if (not (member str candidates))
133 (setq candidates (cons str candidates))) 133 (setq candidates (cons str candidates)))
134 (goto-char (match-end 1))) 134 (goto-char (match-end 1)))
@@ -158,10 +158,10 @@
158 "(skkdic-set-prefix\n")) 158 "(skkdic-set-prefix\n"))
159 (save-excursion 159 (save-excursion
160 (while (re-search-forward "^\\(\\(\\cH\\|$B!<(B\\)+\\)[<>?] " nil t) 160 (while (re-search-forward "^\\(\\(\\cH\\|$B!<(B\\)+\\)[<>?] " nil t)
161 (let ((kana (match-string 1)) 161 (let ((kana (match-string-no-properties 1))
162 str candidates) 162 str candidates)
163 (while (looking-at "/\\([^/\n]+\\)/") 163 (while (looking-at "/\\([^/\n]+\\)/")
164 (setq str (match-string 1)) 164 (setq str (match-string-no-properties 1))
165 (if (not (member str candidates)) 165 (if (not (member str candidates))
166 (setq candidates (cons str candidates))) 166 (setq candidates (cons str candidates)))
167 (goto-char (match-end 1))) 167 (goto-char (match-end 1)))
@@ -180,8 +180,8 @@
180 (let (candidates) 180 (let (candidates)
181 (goto-char from) 181 (goto-char from)
182 (while (re-search-forward "/[^/ \n]+" to t) 182 (while (re-search-forward "/[^/ \n]+" to t)
183 (setq candidates (cons (buffer-substring (1+ (match-beginning 0)) 183 (setq candidates (cons (buffer-substring-no-properties
184 (match-end 0)) 184 (1+ (match-beginning 0)) (match-end 0))
185 candidates))) 185 candidates)))
186 candidates)) 186 candidates))
187 187
@@ -251,12 +251,16 @@
251;; Return list of candidates which excludes some from CANDIDATES. 251;; Return list of candidates which excludes some from CANDIDATES.
252;; Excluded candidates can be derived from another entry. 252;; Excluded candidates can be derived from another entry.
253 253
254(defconst skkdic--japanese-category-set (make-category-set "j"))
255
254(defun skkdic-reduced-candidates (skkbuf kana candidates) 256(defun skkdic-reduced-candidates (skkbuf kana candidates)
255 (let (elt l) 257 (let (elt l)
256 (while candidates 258 (while candidates
257 (setq elt (car candidates)) 259 (setq elt (car candidates))
258 (if (or (= (length elt) 1) 260 (if (or (= (length elt) 1)
259 (and (string-match "^\\cj" elt) 261 (and (bool-vector-subsetp
262 skkdic--japanese-category-set
263 (char-category-set (aref elt 0)))
260 (not (skkdic-breakup-string skkbuf kana elt 0 (length elt) 264 (not (skkdic-breakup-string skkbuf kana elt 0 (length elt)
261 'first)))) 265 'first))))
262 (setq l (cons elt l))) 266 (setq l (cons elt l)))
@@ -267,24 +271,18 @@
267(defvar skkdic-okuri-nasi-entries-count 0) 271(defvar skkdic-okuri-nasi-entries-count 0)
268 272
269(defun skkdic-collect-okuri-nasi () 273(defun skkdic-collect-okuri-nasi ()
270 (message "Collecting OKURI-NASI entries ...")
271 (save-excursion 274 (save-excursion
272 (let ((prev-ratio 0) 275 (let ((progress (make-progress-reporter "Collecting OKURI-NASI entries"
273 ratio) 276 (point) (point-max)
277 nil 10)))
274 (while (re-search-forward "^\\(\\(\\cH\\|$B!<(B\\)+\\) \\(/\\cj.*\\)/$" 278 (while (re-search-forward "^\\(\\(\\cH\\|$B!<(B\\)+\\) \\(/\\cj.*\\)/$"
275 nil t) 279 nil t)
276 (let ((kana (match-string 1)) 280 (let ((kana (match-string-no-properties 1))
277 (candidates (skkdic-get-candidate-list (match-beginning 3) 281 (candidates (skkdic-get-candidate-list (match-beginning 3)
278 (match-end 3)))) 282 (match-end 3))))
279 (setq skkdic-okuri-nasi-entries 283 (setq skkdic-okuri-nasi-entries
280 (cons (cons kana candidates) skkdic-okuri-nasi-entries) 284 (cons (cons kana candidates) skkdic-okuri-nasi-entries))
281 skkdic-okuri-nasi-entries-count 285 (progress-reporter-update progress (point))
282 (1+ skkdic-okuri-nasi-entries-count))
283 (setq ratio (floor (* (point) 100.0) (point-max)))
284 (if (/= (/ prev-ratio 10) (/ ratio 10))
285 (progn
286 (message "collected %2d%% ..." ratio)
287 (setq prev-ratio ratio)))
288 (while candidates 286 (while candidates
289 (let ((entry (lookup-nested-alist (car candidates) 287 (let ((entry (lookup-nested-alist (car candidates)
290 skkdic-word-list nil nil t))) 288 skkdic-word-list nil nil t)))
@@ -292,26 +290,24 @@
292 (setcar entry (cons kana (car entry))) 290 (setcar entry (cons kana (car entry)))
293 (set-nested-alist (car candidates) (list kana) 291 (set-nested-alist (car candidates) (list kana)
294 skkdic-word-list))) 292 skkdic-word-list)))
295 (setq candidates (cdr candidates)))))))) 293 (setq candidates (cdr candidates)))))
294 (setq skkdic-okuri-nasi-entries-count (length skkdic-okuri-nasi-entries))
295 (progress-reporter-done progress))))
296 296
297(defun skkdic-convert-okuri-nasi (skkbuf buf) 297(defun skkdic-convert-okuri-nasi (skkbuf buf)
298 (message "Processing OKURI-NASI entries ...")
299 (with-current-buffer buf 298 (with-current-buffer buf
300 (insert ";; Setting okuri-nasi entries.\n" 299 (insert ";; Setting okuri-nasi entries.\n"
301 "(skkdic-set-okuri-nasi\n") 300 "(skkdic-set-okuri-nasi\n")
302 (let ((l (nreverse skkdic-okuri-nasi-entries)) 301 (let ((l (nreverse skkdic-okuri-nasi-entries))
303 (count 0) 302 (progress (make-progress-reporter "Processing OKURI-NASI entries"
304 (prev-ratio 0) 303 0 skkdic-okuri-nasi-entries-count
305 ratio) 304 nil 10))
305 (count 0))
306 (while l 306 (while l
307 (let ((kana (car (car l))) 307 (let ((kana (car (car l)))
308 (candidates (cdr (car l)))) 308 (candidates (cdr (car l))))
309 (setq ratio (floor (* count 100.0) skkdic-okuri-nasi-entries-count) 309 (setq count (1+ count))
310 count (1+ count)) 310 (progress-reporter-update progress count)
311 (if (/= (/ prev-ratio 10) (/ ratio 10))
312 (progn
313 (message "processed %2d%% ..." ratio)
314 (setq prev-ratio ratio)))
315 (if (setq candidates 311 (if (setq candidates
316 (skkdic-reduced-candidates skkbuf kana candidates)) 312 (skkdic-reduced-candidates skkbuf kana candidates))
317 (progn 313 (progn
@@ -320,7 +316,8 @@
320 (insert " " (car candidates)) 316 (insert " " (car candidates))
321 (setq candidates (cdr candidates))) 317 (setq candidates (cdr candidates)))
322 (insert "\"\n")))) 318 (insert "\"\n"))))
323 (setq l (cdr l)))) 319 (setq l (cdr l)))
320 (progress-reporter-done progress))
324 (insert ")\n\n"))) 321 (insert ")\n\n")))
325 322
326(defun skkdic-convert (filename &optional dirname) 323(defun skkdic-convert (filename &optional dirname)
@@ -467,7 +464,7 @@ To get complete usage, invoke:
467 (i (match-end 0)) 464 (i (match-end 0))
468 candidates) 465 candidates)
469 (while (string-match "[^ ]+" entry i) 466 (while (string-match "[^ ]+" entry i)
470 (setq candidates (cons (match-string 0 entry) candidates)) 467 (setq candidates (cons (match-string-no-properties 0 entry) candidates))
471 (setq i (match-end 0))) 468 (setq i (match-end 0)))
472 (cons (skkdic-get-kana-compact-codes kana) candidates))) 469 (cons (skkdic-get-kana-compact-codes kana) candidates)))
473 470
diff --git a/lisp/international/mule-util.el b/lisp/international/mule-util.el
index e34b01c3064..257f8854c38 100644
--- a/lisp/international/mule-util.el
+++ b/lisp/international/mule-util.el
@@ -143,20 +143,43 @@ longer than KEYSEQ.
143See the documentation of `nested-alist-p' for more detail." 143See the documentation of `nested-alist-p' for more detail."
144 (or (nested-alist-p alist) 144 (or (nested-alist-p alist)
145 (error "Invalid argument %s" alist)) 145 (error "Invalid argument %s" alist))
146 (let ((islist (listp keyseq)) 146 (let ((len (or len (length keyseq)))
147 (len (or len (length keyseq))) 147 (i 0))
148 (i 0) 148 (cond
149 key-elt slot) 149 ((stringp keyseq) ; We can use `assq' for characters.
150 (while (< i len) 150 (while (< i len)
151 (if (null (nested-alist-p alist)) 151 (if (null (nested-alist-p alist))
152 (error "Keyseq %s is too long for this nested alist" keyseq)) 152 (error "Keyseq %s is too long for this nested alist" keyseq))
153 (setq key-elt (if islist (nth i keyseq) (aref keyseq i))) 153 (let* ((key-elt (aref keyseq i))
154 (setq slot (assoc key-elt (cdr alist))) 154 (slot (assq key-elt (cdr alist))))
155 (unless slot 155 (unless slot
156 (setq slot (cons key-elt (list t))) 156 (setq slot (list key-elt t))
157 (setcdr alist (cons slot (cdr alist)))) 157 (push slot (cdr alist)))
158 (setq alist (cdr slot)) 158 (setq alist (cdr slot)))
159 (setq i (1+ i))) 159 (setq i (1+ i))))
160 ((arrayp keyseq)
161 (while (< i len)
162 (if (null (nested-alist-p alist))
163 (error "Keyseq %s is too long for this nested alist" keyseq))
164 (let* ((key-elt (aref keyseq i))
165 (slot (assoc key-elt (cdr alist))))
166 (unless slot
167 (setq slot (list key-elt t))
168 (push slot (cdr alist)))
169 (setq alist (cdr slot)))
170 (setq i (1+ i))))
171 ((listp keyseq)
172 (while (< i len)
173 (if (null (nested-alist-p alist))
174 (error "Keyseq %s is too long for this nested alist" keyseq))
175 (let* ((key-elt (pop keyseq))
176 (slot (assoc key-elt (cdr alist))))
177 (unless slot
178 (setq slot (list key-elt t))
179 (push slot (cdr alist)))
180 (setq alist (cdr slot)))
181 (setq i (1+ i))))
182 (t (signal 'wrong-type-argument (list keyseq))))
160 (setcar alist entry) 183 (setcar alist entry)
161 (if branches 184 (if branches
162 (setcdr (last alist) branches)))) 185 (setcdr (last alist) branches))))
@@ -179,15 +202,23 @@ Optional 5th argument NIL-FOR-TOO-LONG non-nil means return nil
179 (setq len (length keyseq))) 202 (setq len (length keyseq)))
180 (let ((i (or start 0))) 203 (let ((i (or start 0)))
181 (if (catch 'lookup-nested-alist-tag 204 (if (catch 'lookup-nested-alist-tag
182 (if (listp keyseq) 205 (cond ((stringp keyseq) ; We can use `assq' for characters.
183 (while (< i len) 206 (while (< i len)
184 (if (setq alist (cdr (assoc (nth i keyseq) (cdr alist)))) 207 (if (setq alist (cdr (assq (aref keyseq i) (cdr alist))))
185 (setq i (1+ i)) 208 (setq i (1+ i))
186 (throw 'lookup-nested-alist-tag t)))) 209 (throw 'lookup-nested-alist-tag t))))
187 (while (< i len) 210 ((arrayp keyseq)
188 (if (setq alist (cdr (assoc (aref keyseq i) (cdr alist)))) 211 (while (< i len)
189 (setq i (1+ i)) 212 (if (setq alist (cdr (assoc (aref keyseq i) (cdr alist))))
190 (throw 'lookup-nested-alist-tag t)))) 213 (setq i (1+ i))
214 (throw 'lookup-nested-alist-tag t))))
215 ((listp keyseq)
216 (setq keyseq (nthcdr i keyseq))
217 (while (< i len)
218 (if (setq alist (cdr (assoc (pop keyseq) (cdr alist))))
219 (setq i (1+ i))
220 (throw 'lookup-nested-alist-tag t))))
221 (t (signal 'wrong-type-argument (list keyseq)))))
191 ;; KEYSEQ is too long. 222 ;; KEYSEQ is too long.
192 (if nil-for-too-long nil i) 223 (if nil-for-too-long nil i)
193 alist))) 224 alist)))