diff options
| author | Mark Oteiza | 2017-08-31 17:22:39 -0400 |
|---|---|---|
| committer | Mark Oteiza | 2017-08-31 17:22:39 -0400 |
| commit | 96c2c098aeed5c85733577ebbdaf33af6fbb59e9 (patch) | |
| tree | d80538fd4ed9047f29b07aeff40feffa6d464005 | |
| parent | e6a2b4c2df96ed8780ff407481a18e3f4299c8ad (diff) | |
| download | emacs-96c2c098aeed5c85733577ebbdaf33af6fbb59e9.tar.gz emacs-96c2c098aeed5c85733577ebbdaf33af6fbb59e9.zip | |
Make ucs-names a hash table (Bug#28302)
* etc/NEWS: Mention the type change.
* lisp/descr-text.el (describe-char): Use gethash to access ucs-names.
Hardcode BEL's name into the function instead of needlessly mapping
over the hash table in the spirit of rassoc.
* lisp/international/mule-cmds.el (ucs-names): Fix variable and
function docstrings. Initialize a hash table for ucs-names--the
number of entries is 42845 here. Switch to hash-table
getters/setters.
(mule--ucs-names-annotation): Use hash-table getter.
(char-from-name): Upcase the string if ignore-case is truthy.
* lisp/leim/quail/latin-ltx.el: Use maphash instead of dolist.
| -rw-r--r-- | etc/NEWS | 3 | ||||
| -rw-r--r-- | lisp/descr-text.el | 6 | ||||
| -rw-r--r-- | lisp/international/mule-cmds.el | 43 | ||||
| -rw-r--r-- | lisp/leim/quail/latin-ltx.el | 30 |
4 files changed, 43 insertions, 39 deletions
| @@ -1155,6 +1155,9 @@ isn't compatible with previous Emacs versions. This functionality can | |||
| 1155 | be disabled by setting 'byte-compile-cond-use-jump-table' to nil. | 1155 | be disabled by setting 'byte-compile-cond-use-jump-table' to nil. |
| 1156 | 1156 | ||
| 1157 | --- | 1157 | --- |
| 1158 | ** The alist 'ucs-names' is now a hash table. | ||
| 1159 | |||
| 1160 | --- | ||
| 1158 | ** 'C-up', 'C-down', 'C-left' and 'C-right' are now defined in term | 1161 | ** 'C-up', 'C-down', 'C-left' and 'C-right' are now defined in term |
| 1159 | mode to send the same escape sequences that xterm does. This makes | 1162 | mode to send the same escape sequences that xterm does. This makes |
| 1160 | things like forward-word in readline work. | 1163 | things like forward-word in readline work. |
diff --git a/lisp/descr-text.el b/lisp/descr-text.el index 6f36bbed680..b3c96988dd6 100644 --- a/lisp/descr-text.el +++ b/lisp/descr-text.el | |||
| @@ -617,16 +617,16 @@ relevant to POS." | |||
| 617 | (list | 617 | (list |
| 618 | (let* ((names (ucs-names)) | 618 | (let* ((names (ucs-names)) |
| 619 | (name | 619 | (name |
| 620 | (or (when (= char 7) | 620 | (or (when (= char ?\a) |
| 621 | ;; Special case for "BELL" which is | 621 | ;; Special case for "BELL" which is |
| 622 | ;; apparently the only char which | 622 | ;; apparently the only char which |
| 623 | ;; doesn't have a new name and whose | 623 | ;; doesn't have a new name and whose |
| 624 | ;; old-name is shadowed by a newer char | 624 | ;; old-name is shadowed by a newer char |
| 625 | ;; with that name (bug#25641). | 625 | ;; with that name (bug#25641). |
| 626 | (car (rassoc char names))) | 626 | "BELL (BEL)") |
| 627 | (get-char-code-property char 'name) | 627 | (get-char-code-property char 'name) |
| 628 | (get-char-code-property char 'old-name)))) | 628 | (get-char-code-property char 'old-name)))) |
| 629 | (if (and name (assoc-string name names)) | 629 | (if (and name (gethash name names)) |
| 630 | (format | 630 | (format |
| 631 | "type \"C-x 8 RET %x\" or \"C-x 8 RET %s\"" | 631 | "type \"C-x 8 RET %x\" or \"C-x 8 RET %s\"" |
| 632 | char name) | 632 | char name) |
diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el index 338ca6a6e3c..a596411eb78 100644 --- a/lisp/international/mule-cmds.el +++ b/lisp/international/mule-cmds.el | |||
| @@ -2923,10 +2923,10 @@ on encoding." | |||
| 2923 | (make-obsolete-variable 'nonascii-translation-table "do not use it." "23.1") | 2923 | (make-obsolete-variable 'nonascii-translation-table "do not use it." "23.1") |
| 2924 | 2924 | ||
| 2925 | (defvar ucs-names nil | 2925 | (defvar ucs-names nil |
| 2926 | "Alist of cached (CHAR-NAME . CHAR-CODE) pairs.") | 2926 | "Hash table of cached CHAR-NAME keys to CHAR-CODE values.") |
| 2927 | 2927 | ||
| 2928 | (defun ucs-names () | 2928 | (defun ucs-names () |
| 2929 | "Return alist of (CHAR-NAME . CHAR-CODE) pairs cached in `ucs-names'." | 2929 | "Return table of CHAR-NAME keys and CHAR-CODE values cached in `ucs-names'." |
| 2930 | (or ucs-names | 2930 | (or ucs-names |
| 2931 | (let ((ranges | 2931 | (let ((ranges |
| 2932 | '((#x0000 . #x33FF) | 2932 | '((#x0000 . #x33FF) |
| @@ -2954,38 +2954,39 @@ on encoding." | |||
| 2954 | ;; (#x20000 . #xDFFFF) CJK Ideograph Extension A, B, etc, unused | 2954 | ;; (#x20000 . #xDFFFF) CJK Ideograph Extension A, B, etc, unused |
| 2955 | (#xE0000 . #xE01FF))) | 2955 | (#xE0000 . #xE01FF))) |
| 2956 | (gc-cons-threshold 10000000) | 2956 | (gc-cons-threshold 10000000) |
| 2957 | names) | 2957 | (names (make-hash-table :size 42943 :test #'equal))) |
| 2958 | (dolist (range ranges) | 2958 | (dolist (range ranges) |
| 2959 | (let ((c (car range)) | 2959 | (let ((c (car range)) |
| 2960 | (end (cdr range))) | 2960 | (end (cdr range))) |
| 2961 | (while (<= c end) | 2961 | (while (<= c end) |
| 2962 | (let ((new-name (get-char-code-property c 'name)) | 2962 | (let ((new-name (get-char-code-property c 'name)) |
| 2963 | (old-name (get-char-code-property c 'old-name))) | 2963 | (old-name (get-char-code-property c 'old-name))) |
| 2964 | ;; In theory this code could end up pushing an "old-name" that | 2964 | ;; In theory this code could end up pushing an "old-name" that |
| 2965 | ;; shadows a "new-name" but in practice every time an | 2965 | ;; shadows a "new-name" but in practice every time an |
| 2966 | ;; `old-name' conflicts with a `new-name', the newer one has a | 2966 | ;; `old-name' conflicts with a `new-name', the newer one has a |
| 2967 | ;; higher code, so it gets pushed later! | 2967 | ;; higher code, so it gets pushed later! |
| 2968 | (if new-name (push (cons new-name c) names)) | 2968 | (if new-name (puthash new-name c names)) |
| 2969 | (if old-name (push (cons old-name c) names)) | 2969 | (if old-name (puthash old-name c names)) |
| 2970 | (setq c (1+ c)))))) | 2970 | (setq c (1+ c)))))) |
| 2971 | ;; Special case for "BELL" which is apparently the only char which | 2971 | ;; Special case for "BELL" which is apparently the only char which |
| 2972 | ;; doesn't have a new name and whose old-name is shadowed by a newer | 2972 | ;; doesn't have a new name and whose old-name is shadowed by a newer |
| 2973 | ;; char with that name. | 2973 | ;; char with that name. |
| 2974 | (setq ucs-names `(("BELL (BEL)" . 7) ,@names))))) | 2974 | (puthash "BELL (BEL)" ?\a names) |
| 2975 | (setq ucs-names names)))) | ||
| 2975 | 2976 | ||
| 2976 | (defun mule--ucs-names-annotation (name) | 2977 | (defun mule--ucs-names-annotation (name) |
| 2977 | ;; FIXME: It would be much better to add this annotation before rather than | 2978 | ;; FIXME: It would be much better to add this annotation before rather than |
| 2978 | ;; after the char name, so the annotations are aligned. | 2979 | ;; after the char name, so the annotations are aligned. |
| 2979 | ;; FIXME: The default behavior of displaying annotations in italics | 2980 | ;; FIXME: The default behavior of displaying annotations in italics |
| 2980 | ;; doesn't work well here. | 2981 | ;; doesn't work well here. |
| 2981 | (let ((char (assoc name ucs-names))) | 2982 | (let ((char (gethash name ucs-names))) |
| 2982 | (when char (format " (%c)" (cdr char))))) | 2983 | (when char (format " (%c)" char)))) |
| 2983 | 2984 | ||
| 2984 | (defun char-from-name (string &optional ignore-case) | 2985 | (defun char-from-name (string &optional ignore-case) |
| 2985 | "Return a character as a number from its Unicode name STRING. | 2986 | "Return a character as a number from its Unicode name STRING. |
| 2986 | If optional IGNORE-CASE is non-nil, ignore case in STRING. | 2987 | If optional IGNORE-CASE is non-nil, ignore case in STRING. |
| 2987 | Return nil if STRING does not name a character." | 2988 | Return nil if STRING does not name a character." |
| 2988 | (or (cdr (assoc-string string (ucs-names) ignore-case)) | 2989 | (or (gethash (if ignore-case (upcase string) string) (ucs-names)) |
| 2989 | (let ((minus (string-match-p "-[0-9A-F]+\\'" string))) | 2990 | (let ((minus (string-match-p "-[0-9A-F]+\\'" string))) |
| 2990 | (when minus | 2991 | (when minus |
| 2991 | ;; Parse names like "VARIATION SELECTOR-17" and "CJK | 2992 | ;; Parse names like "VARIATION SELECTOR-17" and "CJK |
diff --git a/lisp/leim/quail/latin-ltx.el b/lisp/leim/quail/latin-ltx.el index 6c5afcd4f93..d8ea90ec3ec 100644 --- a/lisp/leim/quail/latin-ltx.el +++ b/lisp/leim/quail/latin-ltx.el | |||
| @@ -75,20 +75,20 @@ system, including many technical ones. Examples: | |||
| 75 | (`(,seq ,re) | 75 | (`(,seq ,re) |
| 76 | (let ((count 0) | 76 | (let ((count 0) |
| 77 | (re (eval re t))) | 77 | (re (eval re t))) |
| 78 | (dolist (pair (ucs-names)) | 78 | (maphash |
| 79 | (let ((name (car pair)) | 79 | (lambda (name char) |
| 80 | (char (cdr pair))) | 80 | (when (and (characterp char) ;; Ignore char-ranges. |
| 81 | (when (and (characterp char) ;; Ignore char-ranges. | 81 | (string-match re name)) |
| 82 | (string-match re name)) | 82 | (let ((keys (if (stringp seq) |
| 83 | (let ((keys (if (stringp seq) | 83 | (replace-match seq nil nil name) |
| 84 | (replace-match seq nil nil name) | 84 | (funcall seq name char)))) |
| 85 | (funcall seq name char)))) | 85 | (if (listp keys) |
| 86 | (if (listp keys) | 86 | (dolist (x keys) |
| 87 | (dolist (x keys) | 87 | (setq count (1+ count)) |
| 88 | (setq count (1+ count)) | 88 | (push (list x char) newrules)) |
| 89 | (push (list x char) newrules)) | 89 | (setq count (1+ count)) |
| 90 | (setq count (1+ count)) | 90 | (push (list keys char) newrules))))) |
| 91 | (push (list keys char) newrules)))))) | 91 | (ucs-names)) |
| 92 | ;; (message "latin-ltx: %d mappings for %S" count re) | 92 | ;; (message "latin-ltx: %d mappings for %S" count re) |
| 93 | )))) | 93 | )))) |
| 94 | (setq newrules (delete-dups newrules)) | 94 | (setq newrules (delete-dups newrules)) |
| @@ -206,7 +206,7 @@ system, including many technical ones. Examples: | |||
| 206 | 206 | ||
| 207 | ((lambda (name char) | 207 | ((lambda (name char) |
| 208 | (let* ((base (concat (match-string 1 name) (match-string 3 name))) | 208 | (let* ((base (concat (match-string 1 name) (match-string 3 name))) |
| 209 | (basechar (cdr (assoc base (ucs-names))))) | 209 | (basechar (gethash base (ucs-names)))) |
| 210 | (when (latin-ltx--ascii-p basechar) | 210 | (when (latin-ltx--ascii-p basechar) |
| 211 | (string (if (match-end 2) ?^ ?_) basechar)))) | 211 | (string (if (match-end 2) ?^ ?_) basechar)))) |
| 212 | "\\(.*\\)SU\\(?:B\\|\\(PER\\)\\)SCRIPT \\(.*\\)") | 212 | "\\(.*\\)SU\\(?:B\\|\\(PER\\)\\)SCRIPT \\(.*\\)") |