aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard M. Stallman2003-05-28 11:14:07 +0000
committerRichard M. Stallman2003-05-28 11:14:07 +0000
commitd6c135fb4fb52fc8741e00c587638dd01a0cec3d (patch)
tree03c4ecb3e290e676ddd26c0e733a1819f6e30a4c
parenta4992f73f89c563c3b52e03498e8827b98eeec71 (diff)
downloademacs-d6c135fb4fb52fc8741e00c587638dd01a0cec3d.tar.gz
emacs-d6c135fb4fb52fc8741e00c587638dd01a0cec3d.zip
(describe-char-unicode-data): New dummy definition.
Real definition commented out since we can't use UnicodeData.txt as is. (describe-char-unicodedata-file): Variable commented out.
-rw-r--r--lisp/descr-text.el429
1 files changed, 218 insertions, 211 deletions
diff --git a/lisp/descr-text.el b/lisp/descr-text.el
index 221f09f6f62..b5a8b0d7a96 100644
--- a/lisp/descr-text.el
+++ b/lisp/descr-text.el
@@ -218,216 +218,223 @@ otherwise."
218 (newline) 218 (newline)
219 (widget-insert "There are text properties here:\n") 219 (widget-insert "There are text properties here:\n")
220 (describe-property-list properties))))) 220 (describe-property-list properties)))))
221 221
222(defcustom unicodedata-file nil 222;;; We cannot use the UnicodeData.txt file as such; it is not free.
223 "Location of Unicode data file. 223;;; We can turn that info a different format and release the result
224This is the UnicodeData.txt file from the Unicode consortium, used for 224;;; as free data. When that is done, we could reinstate the code below.
225diagnostics. If it is non-nil `describe-char-after' will print data 225;;; For the mean time, here is a dummy placeholder.
226looked up from it. This facility is mostly of use to people doing 226;;; -- rms
227multilingual development. 227(defun describe-char-unicode-data (char) nil)
228 228
229This is a fairly large file, not typically present on GNU systems. At 229;;; (defcustom describe-char-unicodedata-file nil
230the time of writing it is at 230;;; "Location of Unicode data file.
231<URL:ftp://www.unicode.org/Public/UNIDATA/UnicodeData.txt>." 231;;; This is the UnicodeData.txt file from the Unicode consortium, used for
232 :group 'mule 232;;; diagnostics. If it is non-nil `describe-char-after' will print data
233 :version "21.5" 233;;; looked up from it. This facility is mostly of use to people doing
234 :type '(choice (const :tag "None" nil) 234;;; multilingual development.
235 file)) 235
236 236;;; This is a fairly large file, not typically present on GNU systems. At
237;; We could convert the unidata file into a Lispy form once-for-all 237;;; the time of writing it is at
238;; and distribute it for loading on demand. It might be made more 238;;; <URL:ftp://www.unicode.org/Public/UNIDATA/UnicodeData.txt>."
239;; space-efficient by splitting strings word-wise and replacing them 239;;; :group 'mule
240;; with lists of symbols interned in a private obarray, e.g. 240;;; :version "21.5"
241;; "LATIN SMALL LETTER A" => '(LATIN SMALL LETTER A). 241;;; :type '(choice (const :tag "None" nil)
242 242;;; file))
243;; Fixme: Check whether this needs updating for Unicode 4. 243
244(defun unicode-data (char) 244;;; ;; We could convert the unidata file into a Lispy form once-for-all
245 "Return a list of Unicode data for unicode CHAR. 245;;; ;; and distribute it for loading on demand. It might be made more
246Each element is a list of a property description and the property value. 246;;; ;; space-efficient by splitting strings word-wise and replacing them
247The list is null if CHAR isn't found in `unicodedata-file'." 247;;; ;; with lists of symbols interned in a private obarray, e.g.
248 (when unicodedata-file 248;;; ;; "LATIN SMALL LETTER A" => '(LATIN SMALL LETTER A).
249 (unless (file-exists-p unicodedata-file) 249
250 (error "`unicodedata-file' %s not found" unicodedata-file)) 250;;; ;; Fixme: Check whether this needs updating for Unicode 4.
251 (save-excursion 251;;; (defun describe-char-unicode-data (char)
252 ;; Find file in fundamental mode to avoid, e.g. flyspell turned 252;;; "Return a list of Unicode data for unicode CHAR.
253 ;; on for .txt. Don't use RAWFILE arg in case of DOS line endings. 253;;; Each element is a list of a property description and the property value.
254 (set-buffer (let ((auto-mode-alist)) 254;;; The list is null if CHAR isn't found in `describe-char-unicodedata-file'."
255 (find-file-noselect unicodedata-file))) 255;;; (when describe-char-unicodedata-file
256 (goto-char (point-min)) 256;;; (unless (file-exists-p describe-char-unicodedata-file)
257 (let ((hex (format "%04X" char)) 257;;; (error "`unicodedata-file' %s not found" describe-char-unicodedata-file))
258 found first last) 258;;; (save-excursion
259 (if (re-search-forward (concat "^" hex) nil t) 259;;; ;; Find file in fundamental mode to avoid, e.g. flyspell turned
260 (setq found t) 260;;; ;; on for .txt. Don't use RAWFILE arg in case of DOS line endings.
261 ;; It's not listed explicitly. Look for ranges, e.g. CJK 261;;; (set-buffer (let ((auto-mode-alist))
262 ;; ideographs, and check whether it's in one of them. 262;;; (find-file-noselect describe-char-unicodedata-file)))
263 (while (and (re-search-forward "^\\([^;]+\\);[^;]+First>;" nil t) 263;;; (goto-char (point-min))
264 (>= char (setq first 264;;; (let ((hex (format "%04X" char))
265 (string-to-number (match-string 1) 16))) 265;;; found first last)
266 (progn 266;;; (if (re-search-forward (concat "^" hex) nil t)
267 (forward-line 1) 267;;; (setq found t)
268 (looking-at "^\\([^;]+\\);[^;]+Last>;") 268;;; ;; It's not listed explicitly. Look for ranges, e.g. CJK
269 (> char 269;;; ;; ideographs, and check whether it's in one of them.
270 (setq last 270;;; (while (and (re-search-forward "^\\([^;]+\\);[^;]+First>;" nil t)
271 (string-to-number (match-string 1) 16)))))) 271;;; (>= char (setq first
272 (if (and (>= char first) 272;;; (string-to-number (match-string 1) 16)))
273 (<= char last)) 273;;; (progn
274 (setq found t))) 274;;; (forward-line 1)
275 (if found 275;;; (looking-at "^\\([^;]+\\);[^;]+Last>;")
276 (let ((fields (mapcar (lambda (elt) 276;;; (> char
277 (if (> (length elt) 0) 277;;; (setq last
278 elt)) 278;;; (string-to-number (match-string 1) 16))))))
279 (cdr (split-string 279;;; (if (and (>= char first)
280 (buffer-substring 280;;; (<= char last))
281 (line-beginning-position) 281;;; (setq found t)))
282 (line-end-position)) 282;;; (if found
283 ";"))))) 283;;; (let ((fields (mapcar (lambda (elt)
284 ;; The length depends on whether the last field was empty. 284;;; (if (> (length elt) 0)
285 (unless (or (= 13 (length fields)) 285;;; elt))
286 (= 14 (length fields))) 286;;; (cdr (split-string
287 (error "Invalid contents in %s" unicodedata-file)) 287;;; (buffer-substring
288 ;; The field names and values lists are slightly 288;;; (line-beginning-position)
289 ;; modified from Mule-UCS unidata.el. 289;;; (line-end-position))
290 (list 290;;; ";")))))
291 (list "Name" (let ((name (nth 0 fields))) 291;;; ;; The length depends on whether the last field was empty.
292 ;; Check for <..., First>, <..., Last> 292;;; (unless (or (= 13 (length fields))
293 (if (string-match "\\`\\(<[^,]+\\)," name) 293;;; (= 14 (length fields)))
294 (concat (match-string 1 name) ">") 294;;; (error "Invalid contents in %s" describe-char-unicodedata-file))
295 name))) 295;;; ;; The field names and values lists are slightly
296 (list "Category" 296;;; ;; modified from Mule-UCS unidata.el.
297 (cdr (assoc 297;;; (list
298 (nth 1 fields) 298;;; (list "Name" (let ((name (nth 0 fields)))
299 '(("Lu" . "uppercase letter") 299;;; ;; Check for <..., First>, <..., Last>
300 ("Ll" . "lowercase letter") 300;;; (if (string-match "\\`\\(<[^,]+\\)," name)
301 ("Lt" . "titlecase letter") 301;;; (concat (match-string 1 name) ">")
302 ("Mn" . "non-spacing mark") 302;;; name)))
303 ("Mc" . "spacing-combining mark") 303;;; (list "Category"
304 ("Me" . "enclosing mark") 304;;; (cdr (assoc
305 ("Nd" . "decimal digit") 305;;; (nth 1 fields)
306 ("Nl" . "letter number") 306;;; '(("Lu" . "uppercase letter")
307 ("No" . "other number") 307;;; ("Ll" . "lowercase letter")
308 ("Zs" . "space separator") 308;;; ("Lt" . "titlecase letter")
309 ("Zl" . "line separator") 309;;; ("Mn" . "non-spacing mark")
310 ("Zp" . "paragraph separator") 310;;; ("Mc" . "spacing-combining mark")
311 ("Cc" . "other control") 311;;; ("Me" . "enclosing mark")
312 ("Cf" . "other format") 312;;; ("Nd" . "decimal digit")
313 ("Cs" . "surrogate") 313;;; ("Nl" . "letter number")
314 ("Co" . "private use") 314;;; ("No" . "other number")
315 ("Cn" . "not assigned") 315;;; ("Zs" . "space separator")
316 ("Lm" . "modifier letter") 316;;; ("Zl" . "line separator")
317 ("Lo" . "other letter") 317;;; ("Zp" . "paragraph separator")
318 ("Pc" . "connector punctuation") 318;;; ("Cc" . "other control")
319 ("Pd" . "dash punctuation") 319;;; ("Cf" . "other format")
320 ("Ps" . "open punctuation") 320;;; ("Cs" . "surrogate")
321 ("Pe" . "close punctuation") 321;;; ("Co" . "private use")
322 ("Pi" . "initial-quotation punctuation") 322;;; ("Cn" . "not assigned")
323 ("Pf" . "final-quotation punctuation") 323;;; ("Lm" . "modifier letter")
324 ("Po" . "other punctuation") 324;;; ("Lo" . "other letter")
325 ("Sm" . "math symbol") 325;;; ("Pc" . "connector punctuation")
326 ("Sc" . "currency symbol") 326;;; ("Pd" . "dash punctuation")
327 ("Sk" . "modifier symbol") 327;;; ("Ps" . "open punctuation")
328 ("So" . "other symbol"))))) 328;;; ("Pe" . "close punctuation")
329 (list "Combining class" 329;;; ("Pi" . "initial-quotation punctuation")
330 (cdr (assoc 330;;; ("Pf" . "final-quotation punctuation")
331 (string-to-number (nth 2 fields)) 331;;; ("Po" . "other punctuation")
332 '((0 . "Spacing") 332;;; ("Sm" . "math symbol")
333 (1 . "Overlays and interior") 333;;; ("Sc" . "currency symbol")
334 (7 . "Nuktas") 334;;; ("Sk" . "modifier symbol")
335 (8 . "Hiragana/Katakana voicing marks") 335;;; ("So" . "other symbol")))))
336 (9 . "Viramas") 336;;; (list "Combining class"
337 (10 . "Start of fixed position classes") 337;;; (cdr (assoc
338 (199 . "End of fixed position classes") 338;;; (string-to-number (nth 2 fields))
339 (200 . "Below left attached") 339;;; '((0 . "Spacing")
340 (202 . "Below attached") 340;;; (1 . "Overlays and interior")
341 (204 . "Below right attached") 341;;; (7 . "Nuktas")
342 (208 . "Left attached (reordrant around \ 342;;; (8 . "Hiragana/Katakana voicing marks")
343single base character)") 343;;; (9 . "Viramas")
344 (210 . "Right attached") 344;;; (10 . "Start of fixed position classes")
345 (212 . "Above left attached") 345;;; (199 . "End of fixed position classes")
346 (214 . "Above attached") 346;;; (200 . "Below left attached")
347 (216 . "Above right attached") 347;;; (202 . "Below attached")
348 (218 . "Below left") 348;;; (204 . "Below right attached")
349 (220 . "Below") 349;;; (208 . "Left attached (reordrant around \
350 (222 . "Below right") 350;;; single base character)")
351 (224 . "Left (reordrant around single base \ 351;;; (210 . "Right attached")
352character)") 352;;; (212 . "Above left attached")
353 (226 . "Right") 353;;; (214 . "Above attached")
354 (228 . "Above left") 354;;; (216 . "Above right attached")
355 (230 . "Above") 355;;; (218 . "Below left")
356 (232 . "Above right") 356;;; (220 . "Below")
357 (233 . "Double below") 357;;; (222 . "Below right")
358 (234 . "Double above") 358;;; (224 . "Left (reordrant around single base \
359 (240 . "Below (iota subscript)"))))) 359;;; character)")
360 (list "Bidi category" 360;;; (226 . "Right")
361 (cdr (assoc 361;;; (228 . "Above left")
362 (nth 3 fields) 362;;; (230 . "Above")
363 '(("L" . "Left-to-Right") 363;;; (232 . "Above right")
364 ("LRE" . "Left-to-Right Embedding") 364;;; (233 . "Double below")
365 ("LRO" . "Left-to-Right Override") 365;;; (234 . "Double above")
366 ("R" . "Right-to-Left") 366;;; (240 . "Below (iota subscript)")))))
367 ("AL" . "Right-to-Left Arabic") 367;;; (list "Bidi category"
368 ("RLE" . "Right-to-Left Embedding") 368;;; (cdr (assoc
369 ("RLO" . "Right-to-Left Override") 369;;; (nth 3 fields)
370 ("PDF" . "Pop Directional Format") 370;;; '(("L" . "Left-to-Right")
371 ("EN" . "European Number") 371;;; ("LRE" . "Left-to-Right Embedding")
372 ("ES" . "European Number Separator") 372;;; ("LRO" . "Left-to-Right Override")
373 ("ET" . "European Number Terminator") 373;;; ("R" . "Right-to-Left")
374 ("AN" . "Arabic Number") 374;;; ("AL" . "Right-to-Left Arabic")
375 ("CS" . "Common Number Separator") 375;;; ("RLE" . "Right-to-Left Embedding")
376 ("NSM" . "Non-Spacing Mark") 376;;; ("RLO" . "Right-to-Left Override")
377 ("BN" . "Boundary Neutral") 377;;; ("PDF" . "Pop Directional Format")
378 ("B" . "Paragraph Separator") 378;;; ("EN" . "European Number")
379 ("S" . "Segment Separator") 379;;; ("ES" . "European Number Separator")
380 ("WS" . "Whitespace") 380;;; ("ET" . "European Number Terminator")
381 ("ON" . "Other Neutrals"))))) 381;;; ("AN" . "Arabic Number")
382 (list 382;;; ("CS" . "Common Number Separator")
383 "Decomposition" 383;;; ("NSM" . "Non-Spacing Mark")
384 (if (nth 4 fields) 384;;; ("BN" . "Boundary Neutral")
385 (let* ((parts (split-string (nth 4 fields))) 385;;; ("B" . "Paragraph Separator")
386 (info (car parts))) 386;;; ("S" . "Segment Separator")
387 (if (string-match "\\`<\\(.+\\)>\\'" info) 387;;; ("WS" . "Whitespace")
388 (setq info (match-string 1 info)) 388;;; ("ON" . "Other Neutrals")))))
389 (setq info nil)) 389;;; (list
390 (if info (setq parts (cdr parts))) 390;;; "Decomposition"
391 ;; Maybe printing ? for unrepresentable unicodes 391;;; (if (nth 4 fields)
392 ;; here and below should be changed? 392;;; (let* ((parts (split-string (nth 4 fields)))
393 (setq parts (mapconcat 393;;; (info (car parts)))
394 (lambda (arg) 394;;; (if (string-match "\\`<\\(.+\\)>\\'" info)
395 (string (or (decode-char 395;;; (setq info (match-string 1 info))
396 'ucs 396;;; (setq info nil))
397 (string-to-number arg 16)) 397;;; (if info (setq parts (cdr parts)))
398 ??))) 398;;; ;; Maybe printing ? for unrepresentable unicodes
399 parts " ")) 399;;; ;; here and below should be changed?
400 (concat info parts)))) 400;;; (setq parts (mapconcat
401 (list "Decimal digit value" 401;;; (lambda (arg)
402 (nth 5 fields)) 402;;; (string (or (decode-char
403 (list "Digit value" 403;;; 'ucs
404 (nth 6 fields)) 404;;; (string-to-number arg 16))
405 (list "Numeric value" 405;;; ??)))
406 (nth 7 fields)) 406;;; parts " "))
407 (list "Mirrored" 407;;; (concat info parts))))
408 (if (equal "Y" (nth 8 fields)) 408;;; (list "Decimal digit value"
409 "yes")) 409;;; (nth 5 fields))
410 (list "Old name" (nth 9 fields)) 410;;; (list "Digit value"
411 (list "ISO 10646 comment" (nth 10 fields)) 411;;; (nth 6 fields))
412 (list "Uppercase" (and (nth 11 fields) 412;;; (list "Numeric value"
413 (string (or (decode-char 413;;; (nth 7 fields))
414 'ucs 414;;; (list "Mirrored"
415 (string-to-number 415;;; (if (equal "Y" (nth 8 fields))
416 (nth 11 fields) 16)) 416;;; "yes"))
417 ??)))) 417;;; (list "Old name" (nth 9 fields))
418 (list "Lowercase" (and (nth 12 fields) 418;;; (list "ISO 10646 comment" (nth 10 fields))
419 (string (or (decode-char 419;;; (list "Uppercase" (and (nth 11 fields)
420 'ucs 420;;; (string (or (decode-char
421 (string-to-number 421;;; 'ucs
422 (nth 12 fields) 16)) 422;;; (string-to-number
423 ??)))) 423;;; (nth 11 fields) 16))
424 (list "Titlecase" (and (nth 13 fields) 424;;; ??))))
425 (string (or (decode-char 425;;; (list "Lowercase" (and (nth 12 fields)
426 'ucs 426;;; (string (or (decode-char
427 (string-to-number 427;;; 'ucs
428 (nth 13 fields) 16)) 428;;; (string-to-number
429 ??))))))))))) 429;;; (nth 12 fields) 16))
430 430;;; ??))))
431;;; (list "Titlecase" (and (nth 13 fields)
432;;; (string (or (decode-char
433;;; 'ucs
434;;; (string-to-number
435;;; (nth 13 fields) 16))
436;;; ??)))))))))))
437
431;;;###autoload 438;;;###autoload
432(defun describe-char (pos) 439(defun describe-char (pos)
433 "Describe the character after POS (interactively, the character after point). 440 "Describe the character after POS (interactively, the character after point).
@@ -517,7 +524,7 @@ as well as widgets, buttons, overlays, and text properties."
517 (encoded-string-description encoded coding) 524 (encoded-string-description encoded coding)
518 "not encodable")))) 525 "not encodable"))))
519 ,@(let ((unicodedata (and unicode 526 ,@(let ((unicodedata (and unicode
520 (unicode-data unicode)))) 527 (describe-char-unicode-data unicode))))
521 (if unicodedata 528 (if unicodedata
522 (cons (list "Unicode data" " ") unicodedata)))))) 529 (cons (list "Unicode data" " ") unicodedata))))))
523 (setq max-width (apply #'max (mapcar #'(lambda (x) (length (car x))) 530 (setq max-width (apply #'max (mapcar #'(lambda (x) (length (car x)))