aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Oteiza2017-08-31 17:22:39 -0400
committerMark Oteiza2017-08-31 17:22:39 -0400
commit96c2c098aeed5c85733577ebbdaf33af6fbb59e9 (patch)
treed80538fd4ed9047f29b07aeff40feffa6d464005
parente6a2b4c2df96ed8780ff407481a18e3f4299c8ad (diff)
downloademacs-96c2c098aeed5c85733577ebbdaf33af6fbb59e9.tar.gz
emacs-96c2c098aeed5c85733577ebbdaf33af6fbb59e9.zip
Make ucs-names a hash table (Bug#28302)
* etc/NEWS: Mention the type change. * lisp/descr-text.el (describe-char): Use gethash to access ucs-names. Hardcode BEL's name into the function instead of needlessly mapping over the hash table in the spirit of rassoc. * lisp/international/mule-cmds.el (ucs-names): Fix variable and function docstrings. Initialize a hash table for ucs-names--the number of entries is 42845 here. Switch to hash-table getters/setters. (mule--ucs-names-annotation): Use hash-table getter. (char-from-name): Upcase the string if ignore-case is truthy. * lisp/leim/quail/latin-ltx.el: Use maphash instead of dolist.
-rw-r--r--etc/NEWS3
-rw-r--r--lisp/descr-text.el6
-rw-r--r--lisp/international/mule-cmds.el43
-rw-r--r--lisp/leim/quail/latin-ltx.el30
4 files changed, 43 insertions, 39 deletions
diff --git a/etc/NEWS b/etc/NEWS
index 0889303f82e..d32b0e5bc89 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -1155,6 +1155,9 @@ isn't compatible with previous Emacs versions. This functionality can
1155be disabled by setting 'byte-compile-cond-use-jump-table' to nil. 1155be disabled by setting 'byte-compile-cond-use-jump-table' to nil.
1156 1156
1157--- 1157---
1158** The alist 'ucs-names' is now a hash table.
1159
1160---
1158** 'C-up', 'C-down', 'C-left' and 'C-right' are now defined in term 1161** 'C-up', 'C-down', 'C-left' and 'C-right' are now defined in term
1159mode to send the same escape sequences that xterm does. This makes 1162mode to send the same escape sequences that xterm does. This makes
1160things like forward-word in readline work. 1163things like forward-word in readline work.
diff --git a/lisp/descr-text.el b/lisp/descr-text.el
index 6f36bbed680..b3c96988dd6 100644
--- a/lisp/descr-text.el
+++ b/lisp/descr-text.el
@@ -617,16 +617,16 @@ relevant to POS."
617 (list 617 (list
618 (let* ((names (ucs-names)) 618 (let* ((names (ucs-names))
619 (name 619 (name
620 (or (when (= char 7) 620 (or (when (= char ?\a)
621 ;; Special case for "BELL" which is 621 ;; Special case for "BELL" which is
622 ;; apparently the only char which 622 ;; apparently the only char which
623 ;; doesn't have a new name and whose 623 ;; doesn't have a new name and whose
624 ;; old-name is shadowed by a newer char 624 ;; old-name is shadowed by a newer char
625 ;; with that name (bug#25641). 625 ;; with that name (bug#25641).
626 (car (rassoc char names))) 626 "BELL (BEL)")
627 (get-char-code-property char 'name) 627 (get-char-code-property char 'name)
628 (get-char-code-property char 'old-name)))) 628 (get-char-code-property char 'old-name))))
629 (if (and name (assoc-string name names)) 629 (if (and name (gethash name names))
630 (format 630 (format
631 "type \"C-x 8 RET %x\" or \"C-x 8 RET %s\"" 631 "type \"C-x 8 RET %x\" or \"C-x 8 RET %s\""
632 char name) 632 char name)
diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el
index 338ca6a6e3c..a596411eb78 100644
--- a/lisp/international/mule-cmds.el
+++ b/lisp/international/mule-cmds.el
@@ -2923,10 +2923,10 @@ on encoding."
2923(make-obsolete-variable 'nonascii-translation-table "do not use it." "23.1") 2923(make-obsolete-variable 'nonascii-translation-table "do not use it." "23.1")
2924 2924
2925(defvar ucs-names nil 2925(defvar ucs-names nil
2926 "Alist of cached (CHAR-NAME . CHAR-CODE) pairs.") 2926 "Hash table of cached CHAR-NAME keys to CHAR-CODE values.")
2927 2927
2928(defun ucs-names () 2928(defun ucs-names ()
2929 "Return alist of (CHAR-NAME . CHAR-CODE) pairs cached in `ucs-names'." 2929 "Return table of CHAR-NAME keys and CHAR-CODE values cached in `ucs-names'."
2930 (or ucs-names 2930 (or ucs-names
2931 (let ((ranges 2931 (let ((ranges
2932 '((#x0000 . #x33FF) 2932 '((#x0000 . #x33FF)
@@ -2954,38 +2954,39 @@ on encoding."
2954 ;; (#x20000 . #xDFFFF) CJK Ideograph Extension A, B, etc, unused 2954 ;; (#x20000 . #xDFFFF) CJK Ideograph Extension A, B, etc, unused
2955 (#xE0000 . #xE01FF))) 2955 (#xE0000 . #xE01FF)))
2956 (gc-cons-threshold 10000000) 2956 (gc-cons-threshold 10000000)
2957 names) 2957 (names (make-hash-table :size 42943 :test #'equal)))
2958 (dolist (range ranges) 2958 (dolist (range ranges)
2959 (let ((c (car range)) 2959 (let ((c (car range))
2960 (end (cdr range))) 2960 (end (cdr range)))
2961 (while (<= c end) 2961 (while (<= c end)
2962 (let ((new-name (get-char-code-property c 'name)) 2962 (let ((new-name (get-char-code-property c 'name))
2963 (old-name (get-char-code-property c 'old-name))) 2963 (old-name (get-char-code-property c 'old-name)))
2964 ;; In theory this code could end up pushing an "old-name" that 2964 ;; In theory this code could end up pushing an "old-name" that
2965 ;; shadows a "new-name" but in practice every time an 2965 ;; shadows a "new-name" but in practice every time an
2966 ;; `old-name' conflicts with a `new-name', the newer one has a 2966 ;; `old-name' conflicts with a `new-name', the newer one has a
2967 ;; higher code, so it gets pushed later! 2967 ;; higher code, so it gets pushed later!
2968 (if new-name (push (cons new-name c) names)) 2968 (if new-name (puthash new-name c names))
2969 (if old-name (push (cons old-name c) names)) 2969 (if old-name (puthash old-name c names))
2970 (setq c (1+ c)))))) 2970 (setq c (1+ c))))))
2971 ;; Special case for "BELL" which is apparently the only char which 2971 ;; Special case for "BELL" which is apparently the only char which
2972 ;; doesn't have a new name and whose old-name is shadowed by a newer 2972 ;; doesn't have a new name and whose old-name is shadowed by a newer
2973 ;; char with that name. 2973 ;; char with that name.
2974 (setq ucs-names `(("BELL (BEL)" . 7) ,@names))))) 2974 (puthash "BELL (BEL)" ?\a names)
2975 (setq ucs-names names))))
2975 2976
2976(defun mule--ucs-names-annotation (name) 2977(defun mule--ucs-names-annotation (name)
2977 ;; FIXME: It would be much better to add this annotation before rather than 2978 ;; FIXME: It would be much better to add this annotation before rather than
2978 ;; after the char name, so the annotations are aligned. 2979 ;; after the char name, so the annotations are aligned.
2979 ;; FIXME: The default behavior of displaying annotations in italics 2980 ;; FIXME: The default behavior of displaying annotations in italics
2980 ;; doesn't work well here. 2981 ;; doesn't work well here.
2981 (let ((char (assoc name ucs-names))) 2982 (let ((char (gethash name ucs-names)))
2982 (when char (format " (%c)" (cdr char))))) 2983 (when char (format " (%c)" char))))
2983 2984
2984(defun char-from-name (string &optional ignore-case) 2985(defun char-from-name (string &optional ignore-case)
2985 "Return a character as a number from its Unicode name STRING. 2986 "Return a character as a number from its Unicode name STRING.
2986If optional IGNORE-CASE is non-nil, ignore case in STRING. 2987If optional IGNORE-CASE is non-nil, ignore case in STRING.
2987Return nil if STRING does not name a character." 2988Return nil if STRING does not name a character."
2988 (or (cdr (assoc-string string (ucs-names) ignore-case)) 2989 (or (gethash (if ignore-case (upcase string) string) (ucs-names))
2989 (let ((minus (string-match-p "-[0-9A-F]+\\'" string))) 2990 (let ((minus (string-match-p "-[0-9A-F]+\\'" string)))
2990 (when minus 2991 (when minus
2991 ;; Parse names like "VARIATION SELECTOR-17" and "CJK 2992 ;; Parse names like "VARIATION SELECTOR-17" and "CJK
diff --git a/lisp/leim/quail/latin-ltx.el b/lisp/leim/quail/latin-ltx.el
index 6c5afcd4f93..d8ea90ec3ec 100644
--- a/lisp/leim/quail/latin-ltx.el
+++ b/lisp/leim/quail/latin-ltx.el
@@ -75,20 +75,20 @@ system, including many technical ones. Examples:
75 (`(,seq ,re) 75 (`(,seq ,re)
76 (let ((count 0) 76 (let ((count 0)
77 (re (eval re t))) 77 (re (eval re t)))
78 (dolist (pair (ucs-names)) 78 (maphash
79 (let ((name (car pair)) 79 (lambda (name char)
80 (char (cdr pair))) 80 (when (and (characterp char) ;; Ignore char-ranges.
81 (when (and (characterp char) ;; Ignore char-ranges. 81 (string-match re name))
82 (string-match re name)) 82 (let ((keys (if (stringp seq)
83 (let ((keys (if (stringp seq) 83 (replace-match seq nil nil name)
84 (replace-match seq nil nil name) 84 (funcall seq name char))))
85 (funcall seq name char)))) 85 (if (listp keys)
86 (if (listp keys) 86 (dolist (x keys)
87 (dolist (x keys) 87 (setq count (1+ count))
88 (setq count (1+ count)) 88 (push (list x char) newrules))
89 (push (list x char) newrules)) 89 (setq count (1+ count))
90 (setq count (1+ count)) 90 (push (list keys char) newrules)))))
91 (push (list keys char) newrules)))))) 91 (ucs-names))
92 ;; (message "latin-ltx: %d mappings for %S" count re) 92 ;; (message "latin-ltx: %d mappings for %S" count re)
93 )))) 93 ))))
94 (setq newrules (delete-dups newrules)) 94 (setq newrules (delete-dups newrules))
@@ -206,7 +206,7 @@ system, including many technical ones. Examples:
206 206
207 ((lambda (name char) 207 ((lambda (name char)
208 (let* ((base (concat (match-string 1 name) (match-string 3 name))) 208 (let* ((base (concat (match-string 1 name) (match-string 3 name)))
209 (basechar (cdr (assoc base (ucs-names))))) 209 (basechar (gethash base (ucs-names))))
210 (when (latin-ltx--ascii-p basechar) 210 (when (latin-ltx--ascii-p basechar)
211 (string (if (match-end 2) ?^ ?_) basechar)))) 211 (string (if (match-end 2) ?^ ?_) basechar))))
212 "\\(.*\\)SU\\(?:B\\|\\(PER\\)\\)SCRIPT \\(.*\\)") 212 "\\(.*\\)SU\\(?:B\\|\\(PER\\)\\)SCRIPT \\(.*\\)")