aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenichi Handa2000-02-29 11:32:52 +0000
committerKenichi Handa2000-02-29 11:32:52 +0000
commitefdd2d7964060e1fe5041998f9bdb74741118f38 (patch)
tree155a4080f78da72ec27dedd34e9b711844713124
parent1ac1c8367a8cd7d12764eaed3d802ed3ed1fb702 (diff)
downloademacs-efdd2d7964060e1fe5041998f9bdb74741118f38.tar.gz
emacs-efdd2d7964060e1fe5041998f9bdb74741118f38.zip
(list-character-sets): Completely
rewritten. (sort-listed-character-sets): New function. (list-character-sets-1): Completely rewritten. (list-character-sets-2): New function. (non-iso-charset-alist): New variable. (decode-codepage-char): New function. (charset-history): New variable. (read-charset) (list-block-of-chars) (list-iso-charset-chars) (list-non-iso-charset-chars) (list-charset-chars): New functions. (mule-diag): Call list-character-sets-2, not list-character-sets-2. (dump-charsets): Likewise.
-rw-r--r--lisp/international/mule-diag.el423
1 files changed, 379 insertions, 44 deletions
diff --git a/lisp/international/mule-diag.el b/lisp/international/mule-diag.el
index d0fed4eb3ea..b501bfe2190 100644
--- a/lisp/international/mule-diag.el
+++ b/lisp/international/mule-diag.el
@@ -43,43 +43,153 @@
43;;; CHARSET 43;;; CHARSET
44 44
45;;;###autoload 45;;;###autoload
46(defun list-character-sets (&optional arg) 46(defun list-character-sets (arg)
47 "Display a list of all character sets. 47 "Display a list of all character sets.
48 48
49The ID column contains a charset identification number for internal Emacs use. 49The ID-NUM column contains a charset identification number
50The B column contains a number of bytes occupied in a buffer 50 for internal Emacs use.
51 by any character in this character set. 51
52The W column contains a number of columns occupied on the screen 52The MULTIBYTE-FORM column contains a format of multibyte sequence
53 by any character in this character set. 53 of characters in the charset for buffer and string
54 by one to four hexadecimal digits.
55 `xx' stands for any byte in the range 0..127.
56 `XX' stands for any byte in the range 160..255.
57
58The D column contains a dimension of this character set.
59The CH column contains a number of characters in a block of this character set.
60The FINAL-CHAR column contains an ISO-2022's <final-char> to use for
61 designating this character set in ISO-2022-based coding systems.
54 62
55With prefix arg, the output format gets more cryptic, 63With prefix arg, the output format gets more cryptic,
56but still shows the full information." 64but still shows the full information."
57 (interactive "P") 65 (interactive "P")
58 (sort-charset-list)
59 (with-output-to-temp-buffer "*Help*" 66 (with-output-to-temp-buffer "*Help*"
60 (save-excursion 67 (with-current-buffer standard-output
61 (set-buffer standard-output) 68 (if arg
62 (list-character-sets-1 arg) 69 (list-character-sets-2)
63 (help-mode) 70 ;; Insert header.
64 (setq truncate-lines t)))) 71 (insert
72 (substitute-command-keys
73 (concat
74 "Use "
75 (if (display-mouse-p) "\\[help-follow-mouse] or ")
76 "\\[help-follow] on a title of column\nto sort by that title.")))
77 (indent-to 56)
78 (insert "+----DIMENSION\n")
79 (indent-to 56)
80 (insert "| +--CHARS\n")
81 (let ((columns '(("ID-NUM" . id) "\t"
82 ("CHARSET-NAME" . name) "\t\t\t"
83 ("MULTIBYTE-FORM" . id) "\t"
84 ("D CH FINAL-CHAR" . iso-spec)))
85 (help-highlight-face 'region)
86 pos)
87 (while columns
88 (if (stringp (car columns))
89 (insert (car columns))
90 (insert (car (car columns)))
91 (search-backward (car (car columns)))
92 (help-xref-button 0 'sort-listed-character-sets
93 (cdr (car columns)))
94 (goto-char (point-max)))
95 (setq columns (cdr columns)))
96 (insert "\n"))
97 (insert "------\t------------\t\t\t--------------\t- -- ----------\n")
65 98
66(defun list-character-sets-1 (arg) 99 ;; Insert body sorted by charset IDs.
67 (let ((l charset-list) 100 (list-character-sets-1 'id)))))
68 charset) 101
69 (if (null arg) 102
70 (progn 103;; Sort character set list by SORT-KEY.
71 (insert "ID Name B W Description\n") 104
72 (insert "-- ---- - - -----------\n") 105(defun sort-listed-character-sets (sort-key)
73 (while l 106 (if sort-key
74 (setq charset (car l) l (cdr l)) 107 (save-excursion
75 (insert (format "%03d %s" (charset-id charset) charset)) 108 (let ((buffer-read-only nil))
76 (indent-to 28) 109 (goto-char (point-min))
77 (insert (format "%d %d %s\n" 110 (re-search-forward "[0-9][0-9][0-9]")
78 (charset-bytes charset) 111 (beginning-of-line)
79 (charset-width charset) 112 (delete-region (point) (point-max))
80 (charset-description charset))))) 113 (list-character-sets-1 sort-key)))))
81 (insert "\ 114
82######################### 115
116;; Insert a list of character sets sorted by SORT-KEY. SORT-KEY
117;; should be one of `id', `name', and `iso-spec'. If SORT-KEY is nil,
118;; it defaults to `id'.
119
120(defun list-character-sets-1 (sort-key)
121 (or sort-key
122 (setq sort-key 'id))
123 (let ((tail (charset-list))
124 charset-info-list elt charset info sort-func)
125 (while tail
126 (setq charset (car tail) tail (cdr tail)
127 info (charset-info charset))
128
129 ;; Generate a list that contains all information to display.
130 (setq charset-info-list
131 (cons (list (charset-id charset) ; ID-NUM
132 charset ; CHARSET-NAME
133 (if (eq charset 'ascii) ; MULTIBYTE-FORM
134 "xx"
135 (let ((str (format "%2X" (aref info 6))))
136 (if (> (aref info 7) 0)
137 (setq str (format "%s %2X" str (aref info 7))))
138 (setq str (concat str " XX"))
139 (if (> (aref info 2) 1)
140 (setq str (concat str " XX")))
141 str))
142 (aref info 2) ; DIMENSION
143 (aref info 3) ; CHARS
144 (aref info 8) ; FINAL-CHAR
145 )
146 charset-info-list)))
147
148 ;; Determine a predicate for `sort' by SORT-KEY.
149 (setq sort-func
150 (cond ((eq sort-key 'id)
151 (function (lambda (x y) (< (car x) (car y)))))
152
153 ((eq sort-key 'name)
154 (function (lambda (x y) (string< (nth 1 x) (nth 1 y)))))
155
156 ((eq sort-key 'iso-spec)
157 ;; Sort by DIMENSION CHARS FINAL-CHAR
158 (function
159 (lambda (x y)
160 (or (< (nth 3 x) (nth 3 y))
161 (and (= (nth 3 x) (nth 3 y))
162 (or (< (nth 4 x) (nth 4 y))
163 (and (= (nth 4 x) (nth 4 y))
164 (< (nth 5 x) (nth 5 y)))))))))
165 (t
166 (error "Invalid charset sort key: %s" sort-key))))
167
168 (setq charset-info-list (sort charset-info-list sort-func))
169
170 ;; Insert information of character sets.
171 (while charset-info-list
172 (setq elt (car charset-info-list)
173 charset-info-list (cdr charset-info-list))
174 (insert (format "%03d(%02X)" (car elt) (car elt))) ; ID-NUM
175 (indent-to 8)
176 (insert (symbol-name (nth 1 elt))) ; CHARSET-NAME
177 (search-backward (symbol-name (nth 1 elt)))
178 (help-xref-button 0 'list-charset-chars (nth 1 elt))
179 (goto-char (point-max))
180 (insert "\t")
181 (indent-to 40)
182 (insert (nth 2 elt)) ; MULTIBYTE-FORM
183 (indent-to 56)
184 (insert (format "%d %2d %c" ; ISO specs
185 (nth 3 elt) (nth 4 elt) (nth 5 elt)))
186 (insert "\n"))))
187
188
189;; List all character sets in a form that a program can easily parse.
190
191(defun list-character-sets-2 ()
192 (insert "#########################
83## LIST OF CHARSETS 193## LIST OF CHARSETS
84## Each line corresponds to one charset. 194## Each line corresponds to one charset.
85## The following attributes are listed in this order 195## The following attributes are listed in this order
@@ -95,19 +205,244 @@ but still shows the full information."
95## ISO-GRAPHIC-PLANE (ISO-2022's graphic plane, 0:GL, 1:GR) 205## ISO-GRAPHIC-PLANE (ISO-2022's graphic plane, 0:GL, 1:GR)
96## DESCRIPTION (describing string of the charset) 206## DESCRIPTION (describing string of the charset)
97") 207")
98 (while l 208 (let ((l charset-list)
99 (setq charset (car l) l (cdr l)) 209 charset)
100 (princ (format "%03d:%s:%d:%d:%d:%d:%d:%d:%d:%s\n" 210 (while l
101 (charset-id charset) 211 (setq charset (car l) l (cdr l))
102 charset 212 (princ (format "%03d:%s:%d:%d:%d:%d:%d:%d:%d:%s\n"
103 (charset-dimension charset) 213 (charset-id charset)
104 (charset-chars charset) 214 charset
105 (charset-bytes charset) 215 (charset-dimension charset)
106 (charset-width charset) 216 (charset-chars charset)
107 (charset-direction charset) 217 (charset-bytes charset)
108 (charset-iso-final-char charset) 218 (charset-width charset)
109 (charset-iso-graphic-plane charset) 219 (charset-direction charset)
110 (charset-description charset))))))) 220 (charset-iso-final-char charset)
221 (charset-iso-graphic-plane charset)
222 (charset-description charset))))))
223
224(defvar non-iso-charset-alist
225 `((viscii
226 (ascii vietnamese-viscii-lower vietnamese-viscii-upper)
227 ,viet-viscii-nonascii-translation-table
228 ((0 255)))
229 (koi8-r
230 (ascii cyrillic-iso8859-5)
231 ,cyrillic-koi8-r-nonascii-translation-table
232 ((32 255)))
233 (alternativnyj
234 (ascii cyrillic-iso8859-5)
235 ,cyrillic-alternativnyj-nonascii-translation-table
236 ((32 255)))
237 (big5
238 (ascii chinese-big5-1 chinese-big5-2)
239 decode-big5-char
240 ((32 127)
241 ((?\xA1 ?\xFE) . (?\x40 ?\x7E ?\xA1 ?\xFE))))
242 (sjis
243 (ascii katakana-jisx0201 japanese-jisx0208)
244 decode-sjis-char
245 ((32 127 ?\xA1 ?\xDF)
246 ((?\x81 ?\x9F ?\xE0 ?\xEF) . (?\x40 ?\x7E ?\x80 ?\xFC)))))
247 "Alist of non-ISO charset names vs the corresponding information.
248
249Non-ISO charsets are what Emacs can read (or write) by mapping to (or
250from) some Emacs' charsets that correspond to ISO charsets.
251
252Each element has the following format:
253 (NON-ISO-CHARSET CHARSET-LIST TRANSLATION-METHOD [ CODE-RANGE ])
254
255NON-ISO-CHARSET is a name (symbol) of the non-ISO charset.
256
257CHARSET-LIST is a list of Emacs' charsets into which characters of
258NON-ISO-CHARSET are mapped.
259
260TRANSLATION-METHOD is a char-table to translate a character code of
261NON-ISO-CHARSET to the corresponding Emacs character code. It can
262also be a function to call with one argument, a character code in
263NON-ISO-CHARSET.
264
265CODE-RANGE specifies the valid code ranges of NON-ISO-CHARSET.
266It is a list of RANGEs, where each RANGE is of the form:
267 (FROM1 TO1 FROM2 TO2 ...)
268or
269 ((FROM1-1 TO1-1 FROM1-2 TO1-2 ...) . (FROM2-1 TO2-1 FROM2-2 TO2-2 ...))
270In the first form, valid codes are between FROM1 and TO1, or FROM2 and
271TO2, or...
272The second form is used for 2-byte codes. The car part is the ranges
273of the first byte, and the cdr part is the ranges of the second byte.")
274
275
276;; Decode a character that has code CODE in CODEPAGE. Value is a
277;; string of decoded character.
278
279(defun decode-codepage-char (codepage code)
280 ;; Each CODEPAGE corresponds to a coding system cpCODEPAGE.
281 (let ((coding-system (intern (format "cp%d" codepage))))
282 (or (coding-system-p coding-system)
283 (codepage-setup codepage))
284 (string-to-char
285 (decode-coding-string (char-to-string code) coding-system))))
286
287
288;; Add DOS codepages to `non-iso-charset-alist'.
289
290(let ((tail (cp-supported-codepages))
291 elt)
292 (while tail
293 (setq elt (car tail) tail (cdr tail))
294 ;; Now ELT is (CODEPAGE . CHARSET), where CODEPAGE is a string
295 ;; (e.g. "850"), CHARSET is a charset that characters in CODEPAGE
296 ;; are mapped to.
297 (setq non-iso-charset-alist
298 (cons (list (intern (concat "cp" (car elt)))
299 (list 'ascii (cdr elt))
300 `(lambda (code)
301 (decode-codepage-char ,(string-to-int (car elt))
302 code))
303 (list (list 0 255)))
304 non-iso-charset-alist))))
305
306
307;; A variable to hold charset input history.
308(defvar charset-history nil)
309
310
311;;;###autoload
312(defun read-charset (prompt &optional default-value initial-input)
313 "Read a character set from the minibuffer, prompting with string PROMPT.
314It reads an Emacs' character set listed in the variable `charset-list'
315or a non-ISO character set listed in the variable
316`non-iso-charset-alist'.
317
318Optional arguments are DEFAULT-VALUE and INITIAL-INPUT.
319DEFAULT-VALUE, if non-nil, is the default value.
320INITIAL-INPUT, if non-nil, is a string inserted in the minibuffer initially.
321See the documentation of the function `completing-read' for the
322detailed meanings of these arguments."
323 (let* ((table (append (mapcar (function (lambda (x) (list (symbol-name x))))
324 charset-list)
325 (mapcar (function (lambda (x)
326 (list (symbol-name (car x)))))
327 non-iso-charset-alist)))
328 (charset (completing-read prompt table
329 nil t initial-input 'charset-history
330 default-value)))
331 (if (> (length charset) 0)
332 (intern charset))))
333
334
335;; List characters of the range MIN and MAX of CHARSET. If dimension
336;; of CHARSET is two (i.e. 2-byte charset), ROW is the first byte
337;; (block index) of the characters, and MIN and MAX are the second
338;; bytes of the characters. If the dimension is one, ROW should be 0.
339;; For a non-ISO charset, CHARSET is a char-table or a function to get
340;; Emacs' character codes that corresponds to the characters to list.
341
342(defun list-block-of-chars (charset row min max)
343 (let (i ch)
344 (insert-char ?- (+ 4 (* 3 16)))
345 (insert "\n ")
346 (setq i 0)
347 (while (< i 16)
348 (insert (format "%3X" i))
349 (setq i (1+ i)))
350 (setq i (* (/ min 16) 16))
351 (while (<= i max)
352 (if (= (% i 16) 0)
353 (insert (format "\n%3Xx" (/ (+ (* row 256) i) 16))))
354 (setq ch (cond ((< i min)
355 32)
356 ((charsetp charset)
357 (if (= row 0)
358 (make-char charset i)
359 (make-char charset row i)))
360 ((char-table-p charset)
361 (aref charset i))
362 (t (funcall charset (+ (* row 256) i)))))
363 (if (or (< ch 32) (and (>= ch 127) (<= ch 255)))
364 ;; Don't insert a control code.
365 (setq ch 32))
366 (insert (format "%3c" ch))
367 (setq i (1+ i))))
368 (insert "\n"))
369
370
371;; List all characters in ISO charset CHARSET.
372
373(defun list-iso-charset-chars (charset)
374 (let ((dim (charset-dimension charset))
375 (chars (charset-chars charset))
376 (plane (charset-iso-graphic-plane charset))
377 min max)
378 (insert (format "Characters in the charset %s.\n" charset))
379
380 (if (= chars 94)
381 (setq min 33 max 126)
382 (setq min 32 max 127))
383 (or (= plane 0)
384 (setq min (+ min 128) max (+ max 128)))
385
386 (if (= dim 1)
387 (list-block-of-chars charset 0 min max)
388 (let ((i min))
389 (while (< i max)
390 (list-block-of-chars charset i min max)
391 (setq i (1+ i)))))))
392
393
394;; List all characters in non-ISO charset CHARSET.
395
396(defun list-non-iso-charset-chars (charset)
397 (let* ((slot (assq charset non-iso-charset-alist))
398 (charsets (nth 1 slot))
399 (translate-method (nth 2 slot))
400 (ranges (nth 3 slot))
401 range)
402 (or slot
403 (error "Unknown external charset: %s" charset))
404 (insert (format "Characters in non-ISO charset %s.\n" charset))
405 (insert "They are mapped to: "
406 (mapconcat (lambda (x) (symbol-name x)) charsets ", ")
407 "\n")
408 (while ranges
409 (setq range (car ranges) ranges (cdr ranges))
410 (if (integerp (car range))
411 ;; The form of RANGES is (FROM1 TO1 FROM2 TO2 ...).
412 (while range
413 (list-block-of-chars translate-method
414 0 (car range) (nth 1 range))
415 (setq range (nthcdr 2 range)))
416 ;; The form of RANGES is ((FROM1-1 TO1-1 ...) . (FROM2-1 TO2-1 ...)).
417 (let ((row-range (car range))
418 row row-max
419 col-range col col-max)
420 (while row-range
421 (setq row (car row-range) row-max (nth 1 row-range)
422 row-range (nthcdr 2 row-range))
423 (while (< row row-max)
424 (setq col-range (cdr range))
425 (while col-range
426 (setq col (car col-range) col-max (nth 1 col-range)
427 col-range (nthcdr 2 col-range))
428 (list-block-of-chars translate-method row col col-max))
429 (setq row (1+ row)))))))))
430
431
432;;;###autoload
433(defun list-charset-chars (charset)
434 "Display a list of characters in the specified character set."
435 (interactive (list (read-charset "Character set: ")))
436 (with-output-to-temp-buffer "*Help*"
437 (with-current-buffer standard-output
438 (set-buffer-multibyte t)
439 (cond ((charsetp charset)
440 (list-iso-charset-chars charset))
441 ((assq charset non-iso-charset-alist)
442 (list-non-iso-charset-chars charset))
443 (t
444 (error "Invalid charset %s" charset))))))
445
111 446
112;;; CODING-SYSTEM 447;;; CODING-SYSTEM
113 448
@@ -801,7 +1136,7 @@ system which uses fontsets)."
801 (insert "\n") 1136 (insert "\n")
802 1137
803 (insert-section 5 "Character sets") 1138 (insert-section 5 "Character sets")
804 (list-character-sets-1 t) 1139 (list-character-sets-2)
805 (insert "\n") 1140 (insert "\n")
806 1141
807 (when (and window-system (boundp 'global-fontset-alist)) 1142 (when (and window-system (boundp 'global-fontset-alist))
@@ -832,7 +1167,7 @@ The file is saved in the directory `data-directory'."
832 (set-buffer buf) 1167 (set-buffer buf)
833 (setq buffer-read-only nil) 1168 (setq buffer-read-only nil)
834 (erase-buffer) 1169 (erase-buffer)
835 (list-character-sets t) 1170 (list-character-sets-2)
836 (insert-buffer-substring "*Help*") 1171 (insert-buffer-substring "*Help*")
837 (let (make-backup-files 1172 (let (make-backup-files
838 coding-system-for-write) 1173 coding-system-for-write)