diff options
| author | Mark Oteiza | 2017-08-31 17:22:39 -0400 |
|---|---|---|
| committer | Mark Oteiza | 2017-08-31 17:22:39 -0400 |
| commit | 96c2c098aeed5c85733577ebbdaf33af6fbb59e9 (patch) | |
| tree | d80538fd4ed9047f29b07aeff40feffa6d464005 /lisp/international | |
| parent | e6a2b4c2df96ed8780ff407481a18e3f4299c8ad (diff) | |
| download | emacs-96c2c098aeed5c85733577ebbdaf33af6fbb59e9.tar.gz emacs-96c2c098aeed5c85733577ebbdaf33af6fbb59e9.zip | |
Make ucs-names a hash table (Bug#28302)
* etc/NEWS: Mention the type change.
* lisp/descr-text.el (describe-char): Use gethash to access ucs-names.
Hardcode BEL's name into the function instead of needlessly mapping
over the hash table in the spirit of rassoc.
* lisp/international/mule-cmds.el (ucs-names): Fix variable and
function docstrings. Initialize a hash table for ucs-names--the
number of entries is 42845 here. Switch to hash-table
getters/setters.
(mule--ucs-names-annotation): Use hash-table getter.
(char-from-name): Upcase the string if ignore-case is truthy.
* lisp/leim/quail/latin-ltx.el: Use maphash instead of dolist.
Diffstat (limited to 'lisp/international')
| -rw-r--r-- | lisp/international/mule-cmds.el | 43 |
1 files changed, 22 insertions, 21 deletions
diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el index 338ca6a6e3c..a596411eb78 100644 --- a/lisp/international/mule-cmds.el +++ b/lisp/international/mule-cmds.el | |||
| @@ -2923,10 +2923,10 @@ on encoding." | |||
| 2923 | (make-obsolete-variable 'nonascii-translation-table "do not use it." "23.1") | 2923 | (make-obsolete-variable 'nonascii-translation-table "do not use it." "23.1") |
| 2924 | 2924 | ||
| 2925 | (defvar ucs-names nil | 2925 | (defvar ucs-names nil |
| 2926 | "Alist of cached (CHAR-NAME . CHAR-CODE) pairs.") | 2926 | "Hash table of cached CHAR-NAME keys to CHAR-CODE values.") |
| 2927 | 2927 | ||
| 2928 | (defun ucs-names () | 2928 | (defun ucs-names () |
| 2929 | "Return alist of (CHAR-NAME . CHAR-CODE) pairs cached in `ucs-names'." | 2929 | "Return table of CHAR-NAME keys and CHAR-CODE values cached in `ucs-names'." |
| 2930 | (or ucs-names | 2930 | (or ucs-names |
| 2931 | (let ((ranges | 2931 | (let ((ranges |
| 2932 | '((#x0000 . #x33FF) | 2932 | '((#x0000 . #x33FF) |
| @@ -2954,38 +2954,39 @@ on encoding." | |||
| 2954 | ;; (#x20000 . #xDFFFF) CJK Ideograph Extension A, B, etc, unused | 2954 | ;; (#x20000 . #xDFFFF) CJK Ideograph Extension A, B, etc, unused |
| 2955 | (#xE0000 . #xE01FF))) | 2955 | (#xE0000 . #xE01FF))) |
| 2956 | (gc-cons-threshold 10000000) | 2956 | (gc-cons-threshold 10000000) |
| 2957 | names) | 2957 | (names (make-hash-table :size 42943 :test #'equal))) |
| 2958 | (dolist (range ranges) | 2958 | (dolist (range ranges) |
| 2959 | (let ((c (car range)) | 2959 | (let ((c (car range)) |
| 2960 | (end (cdr range))) | 2960 | (end (cdr range))) |
| 2961 | (while (<= c end) | 2961 | (while (<= c end) |
| 2962 | (let ((new-name (get-char-code-property c 'name)) | 2962 | (let ((new-name (get-char-code-property c 'name)) |
| 2963 | (old-name (get-char-code-property c 'old-name))) | 2963 | (old-name (get-char-code-property c 'old-name))) |
| 2964 | ;; In theory this code could end up pushing an "old-name" that | 2964 | ;; In theory this code could end up pushing an "old-name" that |
| 2965 | ;; shadows a "new-name" but in practice every time an | 2965 | ;; shadows a "new-name" but in practice every time an |
| 2966 | ;; `old-name' conflicts with a `new-name', the newer one has a | 2966 | ;; `old-name' conflicts with a `new-name', the newer one has a |
| 2967 | ;; higher code, so it gets pushed later! | 2967 | ;; higher code, so it gets pushed later! |
| 2968 | (if new-name (push (cons new-name c) names)) | 2968 | (if new-name (puthash new-name c names)) |
| 2969 | (if old-name (push (cons old-name c) names)) | 2969 | (if old-name (puthash old-name c names)) |
| 2970 | (setq c (1+ c)))))) | 2970 | (setq c (1+ c)))))) |
| 2971 | ;; Special case for "BELL" which is apparently the only char which | 2971 | ;; Special case for "BELL" which is apparently the only char which |
| 2972 | ;; doesn't have a new name and whose old-name is shadowed by a newer | 2972 | ;; doesn't have a new name and whose old-name is shadowed by a newer |
| 2973 | ;; char with that name. | 2973 | ;; char with that name. |
| 2974 | (setq ucs-names `(("BELL (BEL)" . 7) ,@names))))) | 2974 | (puthash "BELL (BEL)" ?\a names) |
| 2975 | (setq ucs-names names)))) | ||
| 2975 | 2976 | ||
| 2976 | (defun mule--ucs-names-annotation (name) | 2977 | (defun mule--ucs-names-annotation (name) |
| 2977 | ;; FIXME: It would be much better to add this annotation before rather than | 2978 | ;; FIXME: It would be much better to add this annotation before rather than |
| 2978 | ;; after the char name, so the annotations are aligned. | 2979 | ;; after the char name, so the annotations are aligned. |
| 2979 | ;; FIXME: The default behavior of displaying annotations in italics | 2980 | ;; FIXME: The default behavior of displaying annotations in italics |
| 2980 | ;; doesn't work well here. | 2981 | ;; doesn't work well here. |
| 2981 | (let ((char (assoc name ucs-names))) | 2982 | (let ((char (gethash name ucs-names))) |
| 2982 | (when char (format " (%c)" (cdr char))))) | 2983 | (when char (format " (%c)" char)))) |
| 2983 | 2984 | ||
| 2984 | (defun char-from-name (string &optional ignore-case) | 2985 | (defun char-from-name (string &optional ignore-case) |
| 2985 | "Return a character as a number from its Unicode name STRING. | 2986 | "Return a character as a number from its Unicode name STRING. |
| 2986 | If optional IGNORE-CASE is non-nil, ignore case in STRING. | 2987 | If optional IGNORE-CASE is non-nil, ignore case in STRING. |
| 2987 | Return nil if STRING does not name a character." | 2988 | Return nil if STRING does not name a character." |
| 2988 | (or (cdr (assoc-string string (ucs-names) ignore-case)) | 2989 | (or (gethash (if ignore-case (upcase string) string) (ucs-names)) |
| 2989 | (let ((minus (string-match-p "-[0-9A-F]+\\'" string))) | 2990 | (let ((minus (string-match-p "-[0-9A-F]+\\'" string))) |
| 2990 | (when minus | 2991 | (when minus |
| 2991 | ;; Parse names like "VARIATION SELECTOR-17" and "CJK | 2992 | ;; Parse names like "VARIATION SELECTOR-17" and "CJK |