aboutsummaryrefslogtreecommitdiffstats
path: root/lisp/international
diff options
context:
space:
mode:
authorMark Oteiza2017-08-31 17:22:39 -0400
committerMark Oteiza2017-08-31 17:22:39 -0400
commit96c2c098aeed5c85733577ebbdaf33af6fbb59e9 (patch)
treed80538fd4ed9047f29b07aeff40feffa6d464005 /lisp/international
parente6a2b4c2df96ed8780ff407481a18e3f4299c8ad (diff)
downloademacs-96c2c098aeed5c85733577ebbdaf33af6fbb59e9.tar.gz
emacs-96c2c098aeed5c85733577ebbdaf33af6fbb59e9.zip
Make ucs-names a hash table (Bug#28302)
* etc/NEWS: Mention the type change. * lisp/descr-text.el (describe-char): Use gethash to access ucs-names. Hardcode BEL's name into the function instead of needlessly mapping over the hash table in the spirit of rassoc. * lisp/international/mule-cmds.el (ucs-names): Fix variable and function docstrings. Initialize a hash table for ucs-names--the number of entries is 42845 here. Switch to hash-table getters/setters. (mule--ucs-names-annotation): Use hash-table getter. (char-from-name): Upcase the string if ignore-case is truthy. * lisp/leim/quail/latin-ltx.el: Use maphash instead of dolist.
Diffstat (limited to 'lisp/international')
-rw-r--r--lisp/international/mule-cmds.el43
1 files changed, 22 insertions, 21 deletions
diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el
index 338ca6a6e3c..a596411eb78 100644
--- a/lisp/international/mule-cmds.el
+++ b/lisp/international/mule-cmds.el
@@ -2923,10 +2923,10 @@ on encoding."
2923(make-obsolete-variable 'nonascii-translation-table "do not use it." "23.1") 2923(make-obsolete-variable 'nonascii-translation-table "do not use it." "23.1")
2924 2924
2925(defvar ucs-names nil 2925(defvar ucs-names nil
2926 "Alist of cached (CHAR-NAME . CHAR-CODE) pairs.") 2926 "Hash table of cached CHAR-NAME keys to CHAR-CODE values.")
2927 2927
2928(defun ucs-names () 2928(defun ucs-names ()
2929 "Return alist of (CHAR-NAME . CHAR-CODE) pairs cached in `ucs-names'." 2929 "Return table of CHAR-NAME keys and CHAR-CODE values cached in `ucs-names'."
2930 (or ucs-names 2930 (or ucs-names
2931 (let ((ranges 2931 (let ((ranges
2932 '((#x0000 . #x33FF) 2932 '((#x0000 . #x33FF)
@@ -2954,38 +2954,39 @@ on encoding."
2954 ;; (#x20000 . #xDFFFF) CJK Ideograph Extension A, B, etc, unused 2954 ;; (#x20000 . #xDFFFF) CJK Ideograph Extension A, B, etc, unused
2955 (#xE0000 . #xE01FF))) 2955 (#xE0000 . #xE01FF)))
2956 (gc-cons-threshold 10000000) 2956 (gc-cons-threshold 10000000)
2957 names) 2957 (names (make-hash-table :size 42943 :test #'equal)))
2958 (dolist (range ranges) 2958 (dolist (range ranges)
2959 (let ((c (car range)) 2959 (let ((c (car range))
2960 (end (cdr range))) 2960 (end (cdr range)))
2961 (while (<= c end) 2961 (while (<= c end)
2962 (let ((new-name (get-char-code-property c 'name)) 2962 (let ((new-name (get-char-code-property c 'name))
2963 (old-name (get-char-code-property c 'old-name))) 2963 (old-name (get-char-code-property c 'old-name)))
2964 ;; In theory this code could end up pushing an "old-name" that 2964 ;; In theory this code could end up pushing an "old-name" that
2965 ;; shadows a "new-name" but in practice every time an 2965 ;; shadows a "new-name" but in practice every time an
2966 ;; `old-name' conflicts with a `new-name', the newer one has a 2966 ;; `old-name' conflicts with a `new-name', the newer one has a
2967 ;; higher code, so it gets pushed later! 2967 ;; higher code, so it gets pushed later!
2968 (if new-name (push (cons new-name c) names)) 2968 (if new-name (puthash new-name c names))
2969 (if old-name (push (cons old-name c) names)) 2969 (if old-name (puthash old-name c names))
2970 (setq c (1+ c)))))) 2970 (setq c (1+ c))))))
2971 ;; Special case for "BELL" which is apparently the only char which 2971 ;; Special case for "BELL" which is apparently the only char which
2972 ;; doesn't have a new name and whose old-name is shadowed by a newer 2972 ;; doesn't have a new name and whose old-name is shadowed by a newer
2973 ;; char with that name. 2973 ;; char with that name.
2974 (setq ucs-names `(("BELL (BEL)" . 7) ,@names))))) 2974 (puthash "BELL (BEL)" ?\a names)
2975 (setq ucs-names names))))
2975 2976
2976(defun mule--ucs-names-annotation (name) 2977(defun mule--ucs-names-annotation (name)
2977 ;; FIXME: It would be much better to add this annotation before rather than 2978 ;; FIXME: It would be much better to add this annotation before rather than
2978 ;; after the char name, so the annotations are aligned. 2979 ;; after the char name, so the annotations are aligned.
2979 ;; FIXME: The default behavior of displaying annotations in italics 2980 ;; FIXME: The default behavior of displaying annotations in italics
2980 ;; doesn't work well here. 2981 ;; doesn't work well here.
2981 (let ((char (assoc name ucs-names))) 2982 (let ((char (gethash name ucs-names)))
2982 (when char (format " (%c)" (cdr char))))) 2983 (when char (format " (%c)" char))))
2983 2984
2984(defun char-from-name (string &optional ignore-case) 2985(defun char-from-name (string &optional ignore-case)
2985 "Return a character as a number from its Unicode name STRING. 2986 "Return a character as a number from its Unicode name STRING.
2986If optional IGNORE-CASE is non-nil, ignore case in STRING. 2987If optional IGNORE-CASE is non-nil, ignore case in STRING.
2987Return nil if STRING does not name a character." 2988Return nil if STRING does not name a character."
2988 (or (cdr (assoc-string string (ucs-names) ignore-case)) 2989 (or (gethash (if ignore-case (upcase string) string) (ucs-names))
2989 (let ((minus (string-match-p "-[0-9A-F]+\\'" string))) 2990 (let ((minus (string-match-p "-[0-9A-F]+\\'" string)))
2990 (when minus 2991 (when minus
2991 ;; Parse names like "VARIATION SELECTOR-17" and "CJK 2992 ;; Parse names like "VARIATION SELECTOR-17" and "CJK