diff options
| author | Richard M. Stallman | 2003-05-28 11:14:07 +0000 |
|---|---|---|
| committer | Richard M. Stallman | 2003-05-28 11:14:07 +0000 |
| commit | d6c135fb4fb52fc8741e00c587638dd01a0cec3d (patch) | |
| tree | 03c4ecb3e290e676ddd26c0e733a1819f6e30a4c | |
| parent | a4992f73f89c563c3b52e03498e8827b98eeec71 (diff) | |
| download | emacs-d6c135fb4fb52fc8741e00c587638dd01a0cec3d.tar.gz emacs-d6c135fb4fb52fc8741e00c587638dd01a0cec3d.zip | |
(describe-char-unicode-data): New dummy definition.
Real definition commented out since we can't use UnicodeData.txt as is.
(describe-char-unicodedata-file): Variable commented out.
| -rw-r--r-- | lisp/descr-text.el | 429 |
1 files changed, 218 insertions, 211 deletions
diff --git a/lisp/descr-text.el b/lisp/descr-text.el index 221f09f6f62..b5a8b0d7a96 100644 --- a/lisp/descr-text.el +++ b/lisp/descr-text.el | |||
| @@ -218,216 +218,223 @@ otherwise." | |||
| 218 | (newline) | 218 | (newline) |
| 219 | (widget-insert "There are text properties here:\n") | 219 | (widget-insert "There are text properties here:\n") |
| 220 | (describe-property-list properties))))) | 220 | (describe-property-list properties))))) |
| 221 | 221 | ||
| 222 | (defcustom unicodedata-file nil | 222 | ;;; We cannot use the UnicodeData.txt file as such; it is not free. |
| 223 | "Location of Unicode data file. | 223 | ;;; We can turn that info a different format and release the result |
| 224 | This is the UnicodeData.txt file from the Unicode consortium, used for | 224 | ;;; as free data. When that is done, we could reinstate the code below. |
| 225 | diagnostics. If it is non-nil `describe-char-after' will print data | 225 | ;;; For the mean time, here is a dummy placeholder. |
| 226 | looked up from it. This facility is mostly of use to people doing | 226 | ;;; -- rms |
| 227 | multilingual development. | 227 | (defun describe-char-unicode-data (char) nil) |
| 228 | 228 | ||
| 229 | This is a fairly large file, not typically present on GNU systems. At | 229 | ;;; (defcustom describe-char-unicodedata-file nil |
| 230 | the time of writing it is at | 230 | ;;; "Location of Unicode data file. |
| 231 | <URL:ftp://www.unicode.org/Public/UNIDATA/UnicodeData.txt>." | 231 | ;;; This is the UnicodeData.txt file from the Unicode consortium, used for |
| 232 | :group 'mule | 232 | ;;; diagnostics. If it is non-nil `describe-char-after' will print data |
| 233 | :version "21.5" | 233 | ;;; looked up from it. This facility is mostly of use to people doing |
| 234 | :type '(choice (const :tag "None" nil) | 234 | ;;; multilingual development. |
| 235 | file)) | 235 | |
| 236 | 236 | ;;; This is a fairly large file, not typically present on GNU systems. At | |
| 237 | ;; We could convert the unidata file into a Lispy form once-for-all | 237 | ;;; the time of writing it is at |
| 238 | ;; and distribute it for loading on demand. It might be made more | 238 | ;;; <URL:ftp://www.unicode.org/Public/UNIDATA/UnicodeData.txt>." |
| 239 | ;; space-efficient by splitting strings word-wise and replacing them | 239 | ;;; :group 'mule |
| 240 | ;; with lists of symbols interned in a private obarray, e.g. | 240 | ;;; :version "21.5" |
| 241 | ;; "LATIN SMALL LETTER A" => '(LATIN SMALL LETTER A). | 241 | ;;; :type '(choice (const :tag "None" nil) |
| 242 | 242 | ;;; file)) | |
| 243 | ;; Fixme: Check whether this needs updating for Unicode 4. | 243 | |
| 244 | (defun unicode-data (char) | 244 | ;;; ;; We could convert the unidata file into a Lispy form once-for-all |
| 245 | "Return a list of Unicode data for unicode CHAR. | 245 | ;;; ;; and distribute it for loading on demand. It might be made more |
| 246 | Each element is a list of a property description and the property value. | 246 | ;;; ;; space-efficient by splitting strings word-wise and replacing them |
| 247 | The list is null if CHAR isn't found in `unicodedata-file'." | 247 | ;;; ;; with lists of symbols interned in a private obarray, e.g. |
| 248 | (when unicodedata-file | 248 | ;;; ;; "LATIN SMALL LETTER A" => '(LATIN SMALL LETTER A). |
| 249 | (unless (file-exists-p unicodedata-file) | 249 | |
| 250 | (error "`unicodedata-file' %s not found" unicodedata-file)) | 250 | ;;; ;; Fixme: Check whether this needs updating for Unicode 4. |
| 251 | (save-excursion | 251 | ;;; (defun describe-char-unicode-data (char) |
| 252 | ;; Find file in fundamental mode to avoid, e.g. flyspell turned | 252 | ;;; "Return a list of Unicode data for unicode CHAR. |
| 253 | ;; on for .txt. Don't use RAWFILE arg in case of DOS line endings. | 253 | ;;; Each element is a list of a property description and the property value. |
| 254 | (set-buffer (let ((auto-mode-alist)) | 254 | ;;; The list is null if CHAR isn't found in `describe-char-unicodedata-file'." |
| 255 | (find-file-noselect unicodedata-file))) | 255 | ;;; (when describe-char-unicodedata-file |
| 256 | (goto-char (point-min)) | 256 | ;;; (unless (file-exists-p describe-char-unicodedata-file) |
| 257 | (let ((hex (format "%04X" char)) | 257 | ;;; (error "`unicodedata-file' %s not found" describe-char-unicodedata-file)) |
| 258 | found first last) | 258 | ;;; (save-excursion |
| 259 | (if (re-search-forward (concat "^" hex) nil t) | 259 | ;;; ;; Find file in fundamental mode to avoid, e.g. flyspell turned |
| 260 | (setq found t) | 260 | ;;; ;; on for .txt. Don't use RAWFILE arg in case of DOS line endings. |
| 261 | ;; It's not listed explicitly. Look for ranges, e.g. CJK | 261 | ;;; (set-buffer (let ((auto-mode-alist)) |
| 262 | ;; ideographs, and check whether it's in one of them. | 262 | ;;; (find-file-noselect describe-char-unicodedata-file))) |
| 263 | (while (and (re-search-forward "^\\([^;]+\\);[^;]+First>;" nil t) | 263 | ;;; (goto-char (point-min)) |
| 264 | (>= char (setq first | 264 | ;;; (let ((hex (format "%04X" char)) |
| 265 | (string-to-number (match-string 1) 16))) | 265 | ;;; found first last) |
| 266 | (progn | 266 | ;;; (if (re-search-forward (concat "^" hex) nil t) |
| 267 | (forward-line 1) | 267 | ;;; (setq found t) |
| 268 | (looking-at "^\\([^;]+\\);[^;]+Last>;") | 268 | ;;; ;; It's not listed explicitly. Look for ranges, e.g. CJK |
| 269 | (> char | 269 | ;;; ;; ideographs, and check whether it's in one of them. |
| 270 | (setq last | 270 | ;;; (while (and (re-search-forward "^\\([^;]+\\);[^;]+First>;" nil t) |
| 271 | (string-to-number (match-string 1) 16)))))) | 271 | ;;; (>= char (setq first |
| 272 | (if (and (>= char first) | 272 | ;;; (string-to-number (match-string 1) 16))) |
| 273 | (<= char last)) | 273 | ;;; (progn |
| 274 | (setq found t))) | 274 | ;;; (forward-line 1) |
| 275 | (if found | 275 | ;;; (looking-at "^\\([^;]+\\);[^;]+Last>;") |
| 276 | (let ((fields (mapcar (lambda (elt) | 276 | ;;; (> char |
| 277 | (if (> (length elt) 0) | 277 | ;;; (setq last |
| 278 | elt)) | 278 | ;;; (string-to-number (match-string 1) 16)))))) |
| 279 | (cdr (split-string | 279 | ;;; (if (and (>= char first) |
| 280 | (buffer-substring | 280 | ;;; (<= char last)) |
| 281 | (line-beginning-position) | 281 | ;;; (setq found t))) |
| 282 | (line-end-position)) | 282 | ;;; (if found |
| 283 | ";"))))) | 283 | ;;; (let ((fields (mapcar (lambda (elt) |
| 284 | ;; The length depends on whether the last field was empty. | 284 | ;;; (if (> (length elt) 0) |
| 285 | (unless (or (= 13 (length fields)) | 285 | ;;; elt)) |
| 286 | (= 14 (length fields))) | 286 | ;;; (cdr (split-string |
| 287 | (error "Invalid contents in %s" unicodedata-file)) | 287 | ;;; (buffer-substring |
| 288 | ;; The field names and values lists are slightly | 288 | ;;; (line-beginning-position) |
| 289 | ;; modified from Mule-UCS unidata.el. | 289 | ;;; (line-end-position)) |
| 290 | (list | 290 | ;;; ";"))))) |
| 291 | (list "Name" (let ((name (nth 0 fields))) | 291 | ;;; ;; The length depends on whether the last field was empty. |
| 292 | ;; Check for <..., First>, <..., Last> | 292 | ;;; (unless (or (= 13 (length fields)) |
| 293 | (if (string-match "\\`\\(<[^,]+\\)," name) | 293 | ;;; (= 14 (length fields))) |
| 294 | (concat (match-string 1 name) ">") | 294 | ;;; (error "Invalid contents in %s" describe-char-unicodedata-file)) |
| 295 | name))) | 295 | ;;; ;; The field names and values lists are slightly |
| 296 | (list "Category" | 296 | ;;; ;; modified from Mule-UCS unidata.el. |
| 297 | (cdr (assoc | 297 | ;;; (list |
| 298 | (nth 1 fields) | 298 | ;;; (list "Name" (let ((name (nth 0 fields))) |
| 299 | '(("Lu" . "uppercase letter") | 299 | ;;; ;; Check for <..., First>, <..., Last> |
| 300 | ("Ll" . "lowercase letter") | 300 | ;;; (if (string-match "\\`\\(<[^,]+\\)," name) |
| 301 | ("Lt" . "titlecase letter") | 301 | ;;; (concat (match-string 1 name) ">") |
| 302 | ("Mn" . "non-spacing mark") | 302 | ;;; name))) |
| 303 | ("Mc" . "spacing-combining mark") | 303 | ;;; (list "Category" |
| 304 | ("Me" . "enclosing mark") | 304 | ;;; (cdr (assoc |
| 305 | ("Nd" . "decimal digit") | 305 | ;;; (nth 1 fields) |
| 306 | ("Nl" . "letter number") | 306 | ;;; '(("Lu" . "uppercase letter") |
| 307 | ("No" . "other number") | 307 | ;;; ("Ll" . "lowercase letter") |
| 308 | ("Zs" . "space separator") | 308 | ;;; ("Lt" . "titlecase letter") |
| 309 | ("Zl" . "line separator") | 309 | ;;; ("Mn" . "non-spacing mark") |
| 310 | ("Zp" . "paragraph separator") | 310 | ;;; ("Mc" . "spacing-combining mark") |
| 311 | ("Cc" . "other control") | 311 | ;;; ("Me" . "enclosing mark") |
| 312 | ("Cf" . "other format") | 312 | ;;; ("Nd" . "decimal digit") |
| 313 | ("Cs" . "surrogate") | 313 | ;;; ("Nl" . "letter number") |
| 314 | ("Co" . "private use") | 314 | ;;; ("No" . "other number") |
| 315 | ("Cn" . "not assigned") | 315 | ;;; ("Zs" . "space separator") |
| 316 | ("Lm" . "modifier letter") | 316 | ;;; ("Zl" . "line separator") |
| 317 | ("Lo" . "other letter") | 317 | ;;; ("Zp" . "paragraph separator") |
| 318 | ("Pc" . "connector punctuation") | 318 | ;;; ("Cc" . "other control") |
| 319 | ("Pd" . "dash punctuation") | 319 | ;;; ("Cf" . "other format") |
| 320 | ("Ps" . "open punctuation") | 320 | ;;; ("Cs" . "surrogate") |
| 321 | ("Pe" . "close punctuation") | 321 | ;;; ("Co" . "private use") |
| 322 | ("Pi" . "initial-quotation punctuation") | 322 | ;;; ("Cn" . "not assigned") |
| 323 | ("Pf" . "final-quotation punctuation") | 323 | ;;; ("Lm" . "modifier letter") |
| 324 | ("Po" . "other punctuation") | 324 | ;;; ("Lo" . "other letter") |
| 325 | ("Sm" . "math symbol") | 325 | ;;; ("Pc" . "connector punctuation") |
| 326 | ("Sc" . "currency symbol") | 326 | ;;; ("Pd" . "dash punctuation") |
| 327 | ("Sk" . "modifier symbol") | 327 | ;;; ("Ps" . "open punctuation") |
| 328 | ("So" . "other symbol"))))) | 328 | ;;; ("Pe" . "close punctuation") |
| 329 | (list "Combining class" | 329 | ;;; ("Pi" . "initial-quotation punctuation") |
| 330 | (cdr (assoc | 330 | ;;; ("Pf" . "final-quotation punctuation") |
| 331 | (string-to-number (nth 2 fields)) | 331 | ;;; ("Po" . "other punctuation") |
| 332 | '((0 . "Spacing") | 332 | ;;; ("Sm" . "math symbol") |
| 333 | (1 . "Overlays and interior") | 333 | ;;; ("Sc" . "currency symbol") |
| 334 | (7 . "Nuktas") | 334 | ;;; ("Sk" . "modifier symbol") |
| 335 | (8 . "Hiragana/Katakana voicing marks") | 335 | ;;; ("So" . "other symbol"))))) |
| 336 | (9 . "Viramas") | 336 | ;;; (list "Combining class" |
| 337 | (10 . "Start of fixed position classes") | 337 | ;;; (cdr (assoc |
| 338 | (199 . "End of fixed position classes") | 338 | ;;; (string-to-number (nth 2 fields)) |
| 339 | (200 . "Below left attached") | 339 | ;;; '((0 . "Spacing") |
| 340 | (202 . "Below attached") | 340 | ;;; (1 . "Overlays and interior") |
| 341 | (204 . "Below right attached") | 341 | ;;; (7 . "Nuktas") |
| 342 | (208 . "Left attached (reordrant around \ | 342 | ;;; (8 . "Hiragana/Katakana voicing marks") |
| 343 | single base character)") | 343 | ;;; (9 . "Viramas") |
| 344 | (210 . "Right attached") | 344 | ;;; (10 . "Start of fixed position classes") |
| 345 | (212 . "Above left attached") | 345 | ;;; (199 . "End of fixed position classes") |
| 346 | (214 . "Above attached") | 346 | ;;; (200 . "Below left attached") |
| 347 | (216 . "Above right attached") | 347 | ;;; (202 . "Below attached") |
| 348 | (218 . "Below left") | 348 | ;;; (204 . "Below right attached") |
| 349 | (220 . "Below") | 349 | ;;; (208 . "Left attached (reordrant around \ |
| 350 | (222 . "Below right") | 350 | ;;; single base character)") |
| 351 | (224 . "Left (reordrant around single base \ | 351 | ;;; (210 . "Right attached") |
| 352 | character)") | 352 | ;;; (212 . "Above left attached") |
| 353 | (226 . "Right") | 353 | ;;; (214 . "Above attached") |
| 354 | (228 . "Above left") | 354 | ;;; (216 . "Above right attached") |
| 355 | (230 . "Above") | 355 | ;;; (218 . "Below left") |
| 356 | (232 . "Above right") | 356 | ;;; (220 . "Below") |
| 357 | (233 . "Double below") | 357 | ;;; (222 . "Below right") |
| 358 | (234 . "Double above") | 358 | ;;; (224 . "Left (reordrant around single base \ |
| 359 | (240 . "Below (iota subscript)"))))) | 359 | ;;; character)") |
| 360 | (list "Bidi category" | 360 | ;;; (226 . "Right") |
| 361 | (cdr (assoc | 361 | ;;; (228 . "Above left") |
| 362 | (nth 3 fields) | 362 | ;;; (230 . "Above") |
| 363 | '(("L" . "Left-to-Right") | 363 | ;;; (232 . "Above right") |
| 364 | ("LRE" . "Left-to-Right Embedding") | 364 | ;;; (233 . "Double below") |
| 365 | ("LRO" . "Left-to-Right Override") | 365 | ;;; (234 . "Double above") |
| 366 | ("R" . "Right-to-Left") | 366 | ;;; (240 . "Below (iota subscript)"))))) |
| 367 | ("AL" . "Right-to-Left Arabic") | 367 | ;;; (list "Bidi category" |
| 368 | ("RLE" . "Right-to-Left Embedding") | 368 | ;;; (cdr (assoc |
| 369 | ("RLO" . "Right-to-Left Override") | 369 | ;;; (nth 3 fields) |
| 370 | ("PDF" . "Pop Directional Format") | 370 | ;;; '(("L" . "Left-to-Right") |
| 371 | ("EN" . "European Number") | 371 | ;;; ("LRE" . "Left-to-Right Embedding") |
| 372 | ("ES" . "European Number Separator") | 372 | ;;; ("LRO" . "Left-to-Right Override") |
| 373 | ("ET" . "European Number Terminator") | 373 | ;;; ("R" . "Right-to-Left") |
| 374 | ("AN" . "Arabic Number") | 374 | ;;; ("AL" . "Right-to-Left Arabic") |
| 375 | ("CS" . "Common Number Separator") | 375 | ;;; ("RLE" . "Right-to-Left Embedding") |
| 376 | ("NSM" . "Non-Spacing Mark") | 376 | ;;; ("RLO" . "Right-to-Left Override") |
| 377 | ("BN" . "Boundary Neutral") | 377 | ;;; ("PDF" . "Pop Directional Format") |
| 378 | ("B" . "Paragraph Separator") | 378 | ;;; ("EN" . "European Number") |
| 379 | ("S" . "Segment Separator") | 379 | ;;; ("ES" . "European Number Separator") |
| 380 | ("WS" . "Whitespace") | 380 | ;;; ("ET" . "European Number Terminator") |
| 381 | ("ON" . "Other Neutrals"))))) | 381 | ;;; ("AN" . "Arabic Number") |
| 382 | (list | 382 | ;;; ("CS" . "Common Number Separator") |
| 383 | "Decomposition" | 383 | ;;; ("NSM" . "Non-Spacing Mark") |
| 384 | (if (nth 4 fields) | 384 | ;;; ("BN" . "Boundary Neutral") |
| 385 | (let* ((parts (split-string (nth 4 fields))) | 385 | ;;; ("B" . "Paragraph Separator") |
| 386 | (info (car parts))) | 386 | ;;; ("S" . "Segment Separator") |
| 387 | (if (string-match "\\`<\\(.+\\)>\\'" info) | 387 | ;;; ("WS" . "Whitespace") |
| 388 | (setq info (match-string 1 info)) | 388 | ;;; ("ON" . "Other Neutrals"))))) |
| 389 | (setq info nil)) | 389 | ;;; (list |
| 390 | (if info (setq parts (cdr parts))) | 390 | ;;; "Decomposition" |
| 391 | ;; Maybe printing ? for unrepresentable unicodes | 391 | ;;; (if (nth 4 fields) |
| 392 | ;; here and below should be changed? | 392 | ;;; (let* ((parts (split-string (nth 4 fields))) |
| 393 | (setq parts (mapconcat | 393 | ;;; (info (car parts))) |
| 394 | (lambda (arg) | 394 | ;;; (if (string-match "\\`<\\(.+\\)>\\'" info) |
| 395 | (string (or (decode-char | 395 | ;;; (setq info (match-string 1 info)) |
| 396 | 'ucs | 396 | ;;; (setq info nil)) |
| 397 | (string-to-number arg 16)) | 397 | ;;; (if info (setq parts (cdr parts))) |
| 398 | ??))) | 398 | ;;; ;; Maybe printing ? for unrepresentable unicodes |
| 399 | parts " ")) | 399 | ;;; ;; here and below should be changed? |
| 400 | (concat info parts)))) | 400 | ;;; (setq parts (mapconcat |
| 401 | (list "Decimal digit value" | 401 | ;;; (lambda (arg) |
| 402 | (nth 5 fields)) | 402 | ;;; (string (or (decode-char |
| 403 | (list "Digit value" | 403 | ;;; 'ucs |
| 404 | (nth 6 fields)) | 404 | ;;; (string-to-number arg 16)) |
| 405 | (list "Numeric value" | 405 | ;;; ??))) |
| 406 | (nth 7 fields)) | 406 | ;;; parts " ")) |
| 407 | (list "Mirrored" | 407 | ;;; (concat info parts)))) |
| 408 | (if (equal "Y" (nth 8 fields)) | 408 | ;;; (list "Decimal digit value" |
| 409 | "yes")) | 409 | ;;; (nth 5 fields)) |
| 410 | (list "Old name" (nth 9 fields)) | 410 | ;;; (list "Digit value" |
| 411 | (list "ISO 10646 comment" (nth 10 fields)) | 411 | ;;; (nth 6 fields)) |
| 412 | (list "Uppercase" (and (nth 11 fields) | 412 | ;;; (list "Numeric value" |
| 413 | (string (or (decode-char | 413 | ;;; (nth 7 fields)) |
| 414 | 'ucs | 414 | ;;; (list "Mirrored" |
| 415 | (string-to-number | 415 | ;;; (if (equal "Y" (nth 8 fields)) |
| 416 | (nth 11 fields) 16)) | 416 | ;;; "yes")) |
| 417 | ??)))) | 417 | ;;; (list "Old name" (nth 9 fields)) |
| 418 | (list "Lowercase" (and (nth 12 fields) | 418 | ;;; (list "ISO 10646 comment" (nth 10 fields)) |
| 419 | (string (or (decode-char | 419 | ;;; (list "Uppercase" (and (nth 11 fields) |
| 420 | 'ucs | 420 | ;;; (string (or (decode-char |
| 421 | (string-to-number | 421 | ;;; 'ucs |
| 422 | (nth 12 fields) 16)) | 422 | ;;; (string-to-number |
| 423 | ??)))) | 423 | ;;; (nth 11 fields) 16)) |
| 424 | (list "Titlecase" (and (nth 13 fields) | 424 | ;;; ??)))) |
| 425 | (string (or (decode-char | 425 | ;;; (list "Lowercase" (and (nth 12 fields) |
| 426 | 'ucs | 426 | ;;; (string (or (decode-char |
| 427 | (string-to-number | 427 | ;;; 'ucs |
| 428 | (nth 13 fields) 16)) | 428 | ;;; (string-to-number |
| 429 | ??))))))))))) | 429 | ;;; (nth 12 fields) 16)) |
| 430 | 430 | ;;; ??)))) | |
| 431 | ;;; (list "Titlecase" (and (nth 13 fields) | ||
| 432 | ;;; (string (or (decode-char | ||
| 433 | ;;; 'ucs | ||
| 434 | ;;; (string-to-number | ||
| 435 | ;;; (nth 13 fields) 16)) | ||
| 436 | ;;; ??))))))))))) | ||
| 437 | |||
| 431 | ;;;###autoload | 438 | ;;;###autoload |
| 432 | (defun describe-char (pos) | 439 | (defun describe-char (pos) |
| 433 | "Describe the character after POS (interactively, the character after point). | 440 | "Describe the character after POS (interactively, the character after point). |
| @@ -517,7 +524,7 @@ as well as widgets, buttons, overlays, and text properties." | |||
| 517 | (encoded-string-description encoded coding) | 524 | (encoded-string-description encoded coding) |
| 518 | "not encodable")))) | 525 | "not encodable")))) |
| 519 | ,@(let ((unicodedata (and unicode | 526 | ,@(let ((unicodedata (and unicode |
| 520 | (unicode-data unicode)))) | 527 | (describe-char-unicode-data unicode)))) |
| 521 | (if unicodedata | 528 | (if unicodedata |
| 522 | (cons (list "Unicode data" " ") unicodedata)))))) | 529 | (cons (list "Unicode data" " ") unicodedata)))))) |
| 523 | (setq max-width (apply #'max (mapcar #'(lambda (x) (length (car x))) | 530 | (setq max-width (apply #'max (mapcar #'(lambda (x) (length (car x))) |