diff options
| author | Dave Love | 2002-05-23 18:16:52 +0000 |
|---|---|---|
| committer | Dave Love | 2002-05-23 18:16:52 +0000 |
| commit | abdaa411369afe215bcabe202b249acd119b8e29 (patch) | |
| tree | 87fa14f35605241b3c2c23262fac09f081d2c5a2 | |
| parent | 354a6a959d93268a58cb44b294cba01200be38a1 (diff) | |
| download | emacs-abdaa411369afe215bcabe202b249acd119b8e29.tar.gz emacs-abdaa411369afe215bcabe202b249acd119b8e29.zip | |
Various simplifications and
additions.
| -rw-r--r-- | lisp/international/characters.el | 219 |
1 files changed, 71 insertions, 148 deletions
diff --git a/lisp/international/characters.el b/lisp/international/characters.el index 409170a78a3..01665440f1c 100644 --- a/lisp/international/characters.el +++ b/lisp/international/characters.el | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | 2 | ||
| 3 | ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN. | 3 | ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN. |
| 4 | ;; Licensed to the Free Software Foundation. | 4 | ;; Licensed to the Free Software Foundation. |
| 5 | ;; Copyright (C) 2001 Free Software Foundation, Inc. | 5 | ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc. |
| 6 | ;; Copyright (C) 2001, 2002 | 6 | ;; Copyright (C) 2001, 2002 |
| 7 | ;; National Institute of Advanced Industrial Science and Technology (AIST) | 7 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
| 8 | ;; Registration Number H13PRO009 | 8 | ;; Registration Number H13PRO009 |
| @@ -113,9 +113,7 @@ | |||
| 113 | arabic-2-column))) | 113 | arabic-2-column))) |
| 114 | (while charsets | 114 | (while charsets |
| 115 | ;; (modify-syntax-entry (make-char (car charsets)) "w") | 115 | ;; (modify-syntax-entry (make-char (car charsets)) "w") |
| 116 | (map-charset-chars | 116 | (map-charset-chars #'modify-category-entry (car charsets) ?b) |
| 117 | #'(lambda (char ignore) (modify-category-entry char ?b)) | ||
| 118 | (car charsets)) | ||
| 119 | (setq charsets (cdr charsets)))) | 117 | (setq charsets (cdr charsets)))) |
| 120 | (modify-category-entry '(#x600 . #x6ff) ?b) | 118 | (modify-category-entry '(#x600 . #x6ff) ?b) |
| 121 | (modify-category-entry '(#xfb50 . #xfdff) ?b) | 119 | (modify-category-entry '(#xfb50 . #xfdff) ?b) |
| @@ -142,6 +140,8 @@ | |||
| 142 | (modify-syntax-entry ?\〗 ")〖") | 140 | (modify-syntax-entry ?\〗 ")〖") |
| 143 | (modify-syntax-entry ?\】 ")【") | 141 | (modify-syntax-entry ?\】 ")【") |
| 144 | 142 | ||
| 143 | ;; Fixme: should any Chinese stuff be re-instated? | ||
| 144 | |||
| 145 | ;; (modify-category-entry (make-char 'chinese-gb2312) ?c) | 145 | ;; (modify-category-entry (make-char 'chinese-gb2312) ?c) |
| 146 | ;; (modify-category-entry (make-char 'chinese-gb2312) ?\|) | 146 | ;; (modify-category-entry (make-char 'chinese-gb2312) ?\|) |
| 147 | ;; (modify-category-entry (make-char 'chinese-gb2312 35) ?A) | 147 | ;; (modify-category-entry (make-char 'chinese-gb2312 35) ?A) |
| @@ -191,10 +191,7 @@ | |||
| 191 | 191 | ||
| 192 | ;; Cyrillic character set (ISO-8859-5) | 192 | ;; Cyrillic character set (ISO-8859-5) |
| 193 | 193 | ||
| 194 | (modify-syntax-entry (decode-char 'iso-8859-5 160) " ") | ||
| 195 | (modify-syntax-entry ? ".") | ||
| 196 | (modify-syntax-entry ?№ ".") | 194 | (modify-syntax-entry ?№ ".") |
| 197 | (modify-syntax-entry ?§ ".") | ||
| 198 | (let ((tbl (standard-case-table))) | 195 | (let ((tbl (standard-case-table))) |
| 199 | (set-case-syntax-pair ?Ё ?ё tbl) | 196 | (set-case-syntax-pair ?Ё ?ё tbl) |
| 200 | (set-case-syntax-pair ?Ђ ?ђ tbl) | 197 | (set-case-syntax-pair ?Ђ ?ђ tbl) |
| @@ -285,23 +282,16 @@ | |||
| 285 | 282 | ||
| 286 | ;; Ethiopic character set | 283 | ;; Ethiopic character set |
| 287 | 284 | ||
| 288 | ;; (modify-category-entry (make-char 'ethiopic) ?e) | ||
| 289 | ;; (modify-syntax-entry (make-char 'ethiopic) "w") | ||
| 290 | (modify-category-entry '(#x1200 . #x137b) ?e) | 285 | (modify-category-entry '(#x1200 . #x137b) ?e) |
| 291 | (let ((chars '(? ? ? ? ? ? ? ? ? ? ? ? ? ? | 286 | (let ((chars '(? ? ? ? ? ? ? ? ? ? ? ? ? ?))) |
| 292 | ;; Unicode equivalents of the above: | ||
| 293 | ?፡ ?። ?፣ ?፤ ?፥ ?፦ ?፧ ?፨ ?ﷰ ?ﷻ ?﷼ ?﷽ ?﷾ ?﷿))) | ||
| 294 | (while chars | 287 | (while chars |
| 295 | (modify-syntax-entry (car chars) ".") | 288 | (modify-syntax-entry (car chars) ".") |
| 296 | (setq chars (cdr chars)))) | 289 | (setq chars (cdr chars)))) |
| 290 | (map-charset-chars #'modify-category-entry 'ethiopic ?e) | ||
| 297 | 291 | ||
| 298 | ;; Greek character set (ISO-8859-7) | 292 | ;; Greek character set (ISO-8859-7) |
| 299 | 293 | ||
| 300 | ;; (modify-category-entry (make-char 'greek-iso8859-7) ?g) | 294 | (modify-category-entry '(#x370 . #x3ff) ?g) |
| 301 | (let ((c #x370)) | ||
| 302 | (while (<= c #x3ff) | ||
| 303 | (modify-category-entry (decode-char 'ucs c) ?g) | ||
| 304 | (setq c (1+ c)))) | ||
| 305 | 295 | ||
| 306 | ;; (let ((c 182)) | 296 | ;; (let ((c 182)) |
| 307 | ;; (while (< c 255) | 297 | ;; (while (< c 255) |
| @@ -364,19 +354,15 @@ | |||
| 364 | 354 | ||
| 365 | ;; Hebrew character set (ISO-8859-8) | 355 | ;; Hebrew character set (ISO-8859-8) |
| 366 | 356 | ||
| 367 | ;; (modify-category-entry (make-char 'hebrew-iso8859-8) ?w) | 357 | (modify-category-entry '(#x590 . #x5f4) ?w) |
| 368 | (let ((c #x591)) | ||
| 369 | (while (<= c #x5f4) | ||
| 370 | (modify-category-entry (decode-char 'ucs c) ?w) | ||
| 371 | (setq c (1+ c)))) | ||
| 372 | 358 | ||
| 373 | ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 208) ".") ; PASEQ | 359 | ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 208) ".") ; PASEQ |
| 374 | ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 211) ".") ; SOF PASUQ | 360 | ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 211) ".") ; SOF PASUQ |
| 375 | (modify-syntax-entry (decode-char 'ucs #x5be) ".") ; MAQAF | 361 | (modify-syntax-entry #x5be ".") ; MAQAF |
| 376 | (modify-syntax-entry (decode-char 'ucs #x5c0) ".") ; PASEQ | 362 | (modify-syntax-entry #x5c0 ".") ; PASEQ |
| 377 | (modify-syntax-entry (decode-char 'ucs #x5c3) ".") ; SOF PASUQ | 363 | (modify-syntax-entry #x5c3 ".") ; SOF PASUQ |
| 378 | (modify-syntax-entry (decode-char 'ucs #x5f3) ".") ; GERESH | 364 | (modify-syntax-entry #x5f3 ".") ; GERESH |
| 379 | (modify-syntax-entry (decode-char 'ucs #x5f4) ".") ; GERSHAYIM | 365 | (modify-syntax-entry #x5f4 ".") ; GERSHAYIM |
| 380 | 366 | ||
| 381 | ;; (let ((c 224)) | 367 | ;; (let ((c 224)) |
| 382 | ;; (while (< c 251) | 368 | ;; (while (< c 251) |
| @@ -390,10 +376,9 @@ | |||
| 390 | ;; (modify-category-entry (make-char 'indian-2-column) ?I) | 376 | ;; (modify-category-entry (make-char 'indian-2-column) ?I) |
| 391 | ;; (modify-category-entry (make-char 'indian-glyph) ?I) | 377 | ;; (modify-category-entry (make-char 'indian-glyph) ?I) |
| 392 | ;; Unicode Devanagari block | 378 | ;; Unicode Devanagari block |
| 393 | (let ((c #x901)) | 379 | (modify-category-entry '(#x901 . #x970) ?i) |
| 394 | (while (<= c #x970) | 380 | (map-charset-chars #'modify-category-entry 'indian-is13194 ?i) |
| 395 | (modify-category-entry (decode-char 'ucs c) ?i) | 381 | (map-charset-chars #'modify-category-entry 'indian-2-column ?i) |
| 396 | (setq c (1+ c)))) | ||
| 397 | 382 | ||
| 398 | ;;; Commented out since the categories appear not to be used anywhere | 383 | ;;; Commented out since the categories appear not to be used anywhere |
| 399 | ;;; and word syntax is the default. | 384 | ;;; and word syntax is the default. |
| @@ -468,27 +453,27 @@ | |||
| 468 | ;; Unicode equivalents of JISX0201-kana | 453 | ;; Unicode equivalents of JISX0201-kana |
| 469 | (let ((c #xff61)) | 454 | (let ((c #xff61)) |
| 470 | (while (<= c #xff9f) | 455 | (while (<= c #xff9f) |
| 471 | (modify-category-entry (decode-char 'ucs c) ?k) | 456 | (modify-category-entry c ?k) |
| 472 | (modify-category-entry (decode-char 'ucs c) ?j) | 457 | (modify-category-entry c ?j) |
| 473 | (modify-category-entry (decode-char 'ucs c) ?\|) | 458 | (modify-category-entry c ?\|) |
| 474 | (setq c (1+ c)))) | 459 | (setq c (1+ c)))) |
| 475 | 460 | ||
| 476 | ;; Katakana block | 461 | ;; Katakana block |
| 477 | (let ((c #x30a0)) | 462 | (let ((c #x30a0)) |
| 478 | (while (<= c #x30ff) | 463 | (while (<= c #x30ff) |
| 479 | ;; ?K is double width, ?k isn't specified | 464 | ;; ?K is double width, ?k isn't specified |
| 480 | (modify-category-entry (decode-char 'ucs c) ?K) | 465 | (modify-category-entry c ?K) |
| 481 | ;;(modify-category-entry (decode-char 'ucs c) ?j) | 466 | ;;(modify-category-entry (decode-char 'ucs c) ?j) |
| 482 | (modify-category-entry (decode-char 'ucs c) ?\|) | 467 | (modify-category-entry c ?\|) |
| 483 | (setq c (1+ c)))) | 468 | (setq c (1+ c)))) |
| 484 | 469 | ||
| 485 | ;; Hiragana block | 470 | ;; Hiragana block |
| 486 | (let ((c #x3040)) | 471 | (let ((c #x3040)) |
| 487 | (while (<= c #x309f) | 472 | (while (<= c #x309f) |
| 488 | ;; ?H is actually defined to be double width | 473 | ;; ?H is actually defined to be double width |
| 489 | (modify-category-entry (decode-char 'ucs c) ?H) | 474 | (modify-category-entry c ?H) |
| 490 | ;;(modify-category-entry (decode-char 'ucs c) ?j) | 475 | ;;(modify-category-entry (decode-char 'ucs c) ?j) |
| 491 | (modify-category-entry (decode-char 'ucs c) ?\|) | 476 | (modify-category-entry c ?\|) |
| 492 | (setq c (1+ c)))) | 477 | (setq c (1+ c)))) |
| 493 | 478 | ||
| 494 | ;; JISX0208 | 479 | ;; JISX0208 |
| @@ -498,8 +483,7 @@ | |||
| 498 | (decode-char 'japanese-jisx0208 #x287E)) "_") | 483 | (decode-char 'japanese-jisx0208 #x287E)) "_") |
| 499 | (let ((chars '(? ? ? ? ? ? ? ? ? ? ? ?))) | 484 | (let ((chars '(? ? ? ? ? ? ? ? ? ? ? ?))) |
| 500 | (dolist (elt chars) | 485 | (dolist (elt chars) |
| 501 | (modify-syntax-entry (car chars) "w") | 486 | (modify-syntax-entry (car chars) "w"))) |
| 502 | (setq chars (cdr chars)))) | ||
| 503 | (modify-syntax-entry ?\ "(") | 487 | (modify-syntax-entry ?\ "(") |
| 504 | (modify-syntax-entry ?\ "(") | 488 | (modify-syntax-entry ?\ "(") |
| 505 | (modify-syntax-entry ?\ "(") | 489 | (modify-syntax-entry ?\ "(") |
| @@ -546,9 +530,7 @@ | |||
| 546 | 530 | ||
| 547 | ;; JISX0201-Kana | 531 | ;; JISX0201-Kana |
| 548 | ;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w") | 532 | ;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w") |
| 549 | (let ((chars '(?。 ?、 ?・ | 533 | (let ((chars '(?。 ?、 ?・))) |
| 550 | ;; Unicode: | ||
| 551 | ?。 ?、 ?・))) | ||
| 552 | (while chars | 534 | (while chars |
| 553 | (modify-syntax-entry (car chars) ".") | 535 | (modify-syntax-entry (car chars) ".") |
| 554 | (setq chars (cdr chars)))) | 536 | (setq chars (cdr chars)))) |
| @@ -558,6 +540,8 @@ | |||
| 558 | 540 | ||
| 559 | ;; Korean character set (KSC5601) | 541 | ;; Korean character set (KSC5601) |
| 560 | 542 | ||
| 543 | ;; Fixme: re-instate these | ||
| 544 | |||
| 561 | ;; (modify-syntax-entry (make-char 'korean-ksc5601) "w") | 545 | ;; (modify-syntax-entry (make-char 'korean-ksc5601) "w") |
| 562 | ;; (modify-syntax-entry (make-char 'korean-ksc5601 33) "_") | 546 | ;; (modify-syntax-entry (make-char 'korean-ksc5601 33) "_") |
| 563 | ;; (modify-syntax-entry (make-char 'korean-ksc5601 34) "_") | 547 | ;; (modify-syntax-entry (make-char 'korean-ksc5601 34) "_") |
| @@ -573,41 +557,17 @@ | |||
| 573 | ;; (modify-category-entry (make-char 'korean-ksc5601 43) ?K) | 557 | ;; (modify-category-entry (make-char 'korean-ksc5601 43) ?K) |
| 574 | ;; (modify-category-entry (make-char 'korean-ksc5601 44) ?Y) | 558 | ;; (modify-category-entry (make-char 'korean-ksc5601 44) ?Y) |
| 575 | 559 | ||
| 576 | ;; Latin character set (latin-1,2,3,4,5,8,9) | 560 | ;; Latin |
| 577 | 561 | ||
| 578 | ;; (modify-category-entry (make-char 'latin-iso8859-1) ?l) | 562 | (modify-category-entry '(#x80 . #x024F) ?l) |
| 579 | ;; (modify-category-entry (make-char 'latin-iso8859-2) ?l) | ||
| 580 | ;; (modify-category-entry (make-char 'latin-iso8859-3) ?l) | ||
| 581 | ;; (modify-category-entry (make-char 'latin-iso8859-4) ?l) | ||
| 582 | ;; (modify-category-entry (make-char 'latin-iso8859-9) ?l) | ||
| 583 | ;; (modify-category-entry (make-char 'latin-iso8859-14) ?l) | ||
| 584 | ;; (modify-category-entry (make-char 'latin-iso8859-15) ?l) | ||
| 585 | |||
| 586 | ;; (modify-category-entry (make-char 'latin-iso8859-1 160) ?\ ) | ||
| 587 | ;; (modify-category-entry (make-char 'latin-iso8859-2 160) ?\ ) | ||
| 588 | ;; (modify-category-entry (make-char 'latin-iso8859-3 160) ?\ ) | ||
| 589 | ;; (modify-category-entry (make-char 'latin-iso8859-4 160) ?\ ) | ||
| 590 | ;; (modify-category-entry (make-char 'latin-iso8859-9 160) ?\ ) | ||
| 591 | ;; (modify-category-entry (make-char 'latin-iso8859-14 160) ?\ ) | ||
| 592 | ;; (modify-category-entry (make-char 'latin-iso8859-15 160) ?\ ) | ||
| 593 | 563 | ||
| 594 | ;; Lao character set | 564 | ;; Lao character set |
| 595 | 565 | ||
| 596 | ;; (modify-category-entry (make-char 'lao) ?o) | 566 | (modify-category-entry '(#xe80 . #xeff) ?o) |
| 597 | (dotimes (i (1+ (- #xeff #xe80))) | 567 | (map-charset-chars #'modify-category-entry 'lao ?o) |
| 598 | (modify-category-entry (decode-char 'ucs (+ i #xe80)) ?o)) | ||
| 599 | 568 | ||
| 600 | (let ((deflist '(;; chars syntax category | 569 | ;; Fixme: check this. Lao characters in HELLO seem to have all the categories |
| 601 | ("ກ-ຮ" "w" ?0) ; consonant | 570 | (let ((deflist '(("ກ-ຮ" "w" ?0) ; consonant |
| 602 | ("ະາຳຽເ-ໄ" "w" ?1) ; vowel base | ||
| 603 | ("ັິ-ືົໍ" "w" ?2) ; vowel upper | ||
| 604 | ("ຸູ" "w" ?3) ; vowel lower | ||
| 605 | ("່-໌" "w" ?4) ; tone mark | ||
| 606 | ("ຼ" "w" ?9) ; semivowel lower | ||
| 607 | ("໐-໙" "w" ?6) ; digit | ||
| 608 | ("ຯໆ" "_" ?5) ; symbol | ||
| 609 | ;; Unicode equivalents | ||
| 610 | ("ກ-ຮ" "w" ?0) ; consonant | ||
| 611 | ("ະາຳຽເ-ໄ" "w" ?1) ; vowel base | 571 | ("ະາຳຽເ-ໄ" "w" ?1) ; vowel base |
| 612 | ("ັິ-ືົໍ" "w" ?2) ; vowel upper | 572 | ("ັິ-ືົໍ" "w" ?2) ; vowel upper |
| 613 | ("ຸູ" "w" ?3) ; vowel lower | 573 | ("ຸູ" "w" ?3) ; vowel lower |
| @@ -640,9 +600,8 @@ | |||
| 640 | 600 | ||
| 641 | ;; Thai character set (TIS620) | 601 | ;; Thai character set (TIS620) |
| 642 | 602 | ||
| 643 | ;; (modify-category-entry (make-char 'thai-tis620) ?t) | 603 | (modify-category-entry '(#xe00 . #xe7f) ?t) |
| 644 | (dotimes (i (1+ (- #xe7f #xe00))) | 604 | (map-charset-chars #'modify-category-entry 'thai-tis620 ?t) |
| 645 | (modify-category-entry (decode-char 'ucs (+ i #xe00)) ?t)) | ||
| 646 | 605 | ||
| 647 | (let ((deflist '(;; chars syntax category | 606 | (let ((deflist '(;; chars syntax category |
| 648 | ("ก-รลว-ฮ" "w" ?0) ; consonant | 607 | ("ก-รลว-ฮ" "w" ?0) ; consonant |
| @@ -677,10 +636,9 @@ | |||
| 677 | 636 | ||
| 678 | ;; Tibetan character set | 637 | ;; Tibetan character set |
| 679 | 638 | ||
| 680 | ;; (modify-category-entry (make-char 'tibetan) ?q) | 639 | (modify-category-entry '(#xf00 . #xfff) ?q) |
| 681 | ;; (modify-category-entry (make-char 'tibetan-1-column) ?q) | 640 | (map-charset-chars #'modify-category-entry 'tibetan ?q) |
| 682 | (dotimes (i (1+ (- #xfff #xf00))) | 641 | (map-charset-chars #'modify-category-entry 'tibetan-1-column ?q) |
| 683 | (modify-category-entry (decode-char 'ucs (+ i #xf00)) ?q)) | ||
| 684 | 642 | ||
| 685 | (let ((deflist '(;; chars syntax category | 643 | (let ((deflist '(;; chars syntax category |
| 686 | ("-" "w" ?0) ; consonant | 644 | ("-" "w" ?0) ; consonant |
| @@ -697,18 +655,6 @@ | |||
| 697 | ("" "." ?>) ; | 655 | ("" "." ?>) ; |
| 698 | ("-" "." ?<) ; prohibition | 656 | ("-" "." ?<) ; prohibition |
| 699 | ("----" "." ?q) ; others | 657 | ("----" "." ?q) ; others |
| 700 | |||
| 701 | ;; Unicode version (not complete) | ||
| 702 | ("ཀ-ཀྵཪ" "w" ?0) ; consonant | ||
| 703 | ("ྐ-ྐྵྺྻྼ" "w" ?0) ; | ||
| 704 | ("ིེཻོཽྀ" "w" ?2) ; upper vowel | ||
| 705 | ("ཾྂྃ྆྇ྈྉྊྋ" "w" ?2) ; upper modifier | ||
| 706 | ("྄ཱུ༙༵༷" "w" ?3) ; lowel vowel/modifier | ||
| 707 | ("༠-༩༪-༳" "w" ?6) ; digit | ||
| 708 | ("་།-༒༔ཿ" "." ?|) ; line-break char | ||
| 709 | ("༈་།-༒༔ཿ༽༴" "." ?>) ; prohibition | ||
| 710 | ("ༀ-༊༼࿁࿂྅" "." ?<) ; prohibition | ||
| 711 | ("༓༕-༘༚-༟༶༸-༻༾༿྾྿-࿏" "." ?q) ; others | ||
| 712 | )) | 658 | )) |
| 713 | elm chars len syntax category to ch i) | 659 | elm chars len syntax category to ch i) |
| 714 | (while deflist | 660 | (while deflist |
| @@ -734,15 +680,13 @@ | |||
| 734 | 680 | ||
| 735 | ;; Vietnamese character set | 681 | ;; Vietnamese character set |
| 736 | 682 | ||
| 737 | ;; (let ((lower (make-char 'vietnamese-viscii-lower)) | 683 | ;; To make a word with Latin characters |
| 738 | ;; (upper (make-char 'vietnamese-viscii-upper))) | 684 | (map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?l) |
| 739 | ;; (modify-syntax-entry lower "w") | 685 | (map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?v) |
| 740 | ;; (modify-syntax-entry upper "w") | 686 | |
| 741 | ;; (modify-category-entry lower ?v) | 687 | (map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?l) |
| 742 | ;; (modify-category-entry upper ?v) | 688 | (map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?v) |
| 743 | ;; (modify-category-entry lower ?l) ; To make a word with | 689 | ;; Fixme Unicode versions of Vietnamese categeory. |
| 744 | ;; (modify-category-entry upper ?l) ; latin characters. | ||
| 745 | ;; ) | ||
| 746 | 690 | ||
| 747 | (let ((tbl (standard-case-table)) | 691 | (let ((tbl (standard-case-table)) |
| 748 | (i 32)) | 692 | (i 32)) |
| @@ -766,17 +710,14 @@ | |||
| 766 | ;; Latin Extended-A, Latin Extended-B | 710 | ;; Latin Extended-A, Latin Extended-B |
| 767 | (setq c #x0100) | 711 | (setq c #x0100) |
| 768 | (while (<= c #x0233) | 712 | (while (<= c #x0233) |
| 769 | (modify-category-entry (decode-char 'ucs c) ?l) | ||
| 770 | (and (or (<= c #x012e) | 713 | (and (or (<= c #x012e) |
| 771 | (and (>= c #x014a) (<= c #x0177))) | 714 | (and (>= c #x014a) (<= c #x0177))) |
| 772 | (zerop (% c 2)) | 715 | (zerop (% c 2)) |
| 773 | (set-case-syntax-pair | 716 | (set-case-syntax-pair c (1+ c) tbl)) |
| 774 | (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl)) | ||
| 775 | (and (>= c #x013a) | 717 | (and (>= c #x013a) |
| 776 | (<= c #x0148) | 718 | (<= c #x0148) |
| 777 | (zerop (% c 2)) | 719 | (zerop (% c 2)) |
| 778 | (set-case-syntax-pair | 720 | (set-case-syntax-pair (1- c) c tbl)) |
| 779 | (decode-char 'ucs (1- c)) (decode-char 'ucs c) tbl)) | ||
| 780 | (setq c (1+ c))) | 721 | (setq c (1+ c))) |
| 781 | (set-case-syntax-pair ?IJ ?ij tbl) | 722 | (set-case-syntax-pair ?IJ ?ij tbl) |
| 782 | (set-case-syntax-pair ?Ĵ ?ĵ tbl) | 723 | (set-case-syntax-pair ?Ĵ ?ĵ tbl) |
| @@ -883,28 +824,25 @@ | |||
| 883 | (set-case-syntax-pair ?Ȳ ?ȳ tbl) | 824 | (set-case-syntax-pair ?Ȳ ?ȳ tbl) |
| 884 | 825 | ||
| 885 | ;; Latin Extended Additional | 826 | ;; Latin Extended Additional |
| 827 | (modify-category-entry '(#x1e00 . #x1ef9) ?l) | ||
| 886 | (setq c #x1e00) | 828 | (setq c #x1e00) |
| 887 | (while (<= c #x1ef9) | 829 | (while (<= c #x1ef9) |
| 888 | (modify-category-entry (decode-char 'ucs c) ?l) | ||
| 889 | (and (zerop (% c 2)) | 830 | (and (zerop (% c 2)) |
| 890 | (or (<= c #x1e94) (>= c #x1ea0)) | 831 | (or (<= c #x1e94) (>= c #x1ea0)) |
| 891 | (set-case-syntax-pair | 832 | (set-case-syntax-pair c (1+ c) tbl)) |
| 892 | (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl)) | ||
| 893 | (setq c (1+ c))) | 833 | (setq c (1+ c))) |
| 894 | 834 | ||
| 895 | ;; Greek | 835 | ;; Greek |
| 836 | (modify-category-entry '(#x0370 . #x03ff) ?g) | ||
| 896 | (setq c #x0370) | 837 | (setq c #x0370) |
| 897 | (while (<= c #x03ff) | 838 | (while (<= c #x03ff) |
| 898 | (modify-category-entry (decode-char 'ucs c) ?g) | ||
| 899 | (if (or (and (>= c #x0391) (<= c #x03a1)) | 839 | (if (or (and (>= c #x0391) (<= c #x03a1)) |
| 900 | (and (>= c #x03a3) (<= c #x03ab))) | 840 | (and (>= c #x03a3) (<= c #x03ab))) |
| 901 | (set-case-syntax-pair | 841 | (set-case-syntax-pair c (+ c 32) tbl)) |
| 902 | (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl)) | ||
| 903 | (and (>= c #x03da) | 842 | (and (>= c #x03da) |
| 904 | (<= c #x03ee) | 843 | (<= c #x03ee) |
| 905 | (zerop (% c 2)) | 844 | (zerop (% c 2)) |
| 906 | (set-case-syntax-pair | 845 | (set-case-syntax-pair c (1+ c) tbl)) |
| 907 | (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl)) | ||
| 908 | (setq c (1+ c))) | 846 | (setq c (1+ c))) |
| 909 | (set-case-syntax-pair ?Ά ?ά tbl) | 847 | (set-case-syntax-pair ?Ά ?ά tbl) |
| 910 | (set-case-syntax-pair ?Έ ?έ tbl) | 848 | (set-case-syntax-pair ?Έ ?έ tbl) |
| @@ -917,20 +855,18 @@ | |||
| 917 | ;; Armenian | 855 | ;; Armenian |
| 918 | (setq c #x531) | 856 | (setq c #x531) |
| 919 | (while (<= c #x556) | 857 | (while (<= c #x556) |
| 920 | (set-case-syntax-pair (decode-char 'ucs c) | 858 | (set-case-syntax-pair c (+ c #x30) tbl) |
| 921 | (decode-char 'ucs (+ c #x30)) tbl) | ||
| 922 | (setq c (1+ c))) | 859 | (setq c (1+ c))) |
| 923 | 860 | ||
| 924 | ;; Greek Extended | 861 | ;; Greek Extended |
| 862 | (modify-category-entry '(#x1f00 . #x1fff) ?g) | ||
| 925 | (setq c #x1f00) | 863 | (setq c #x1f00) |
| 926 | (while (<= c #x1fff) | 864 | (while (<= c #x1fff) |
| 927 | (modify-category-entry (decode-char 'ucs c) ?g) | ||
| 928 | (and (<= (logand c #x000f) 7) | 865 | (and (<= (logand c #x000f) 7) |
| 929 | (<= c #x1fa7) | 866 | (<= c #x1fa7) |
| 930 | (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56))) | 867 | (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56))) |
| 931 | (/= (logand c #x00f0) 7) | 868 | (/= (logand c #x00f0) 7) |
| 932 | (set-case-syntax-pair | 869 | (set-case-syntax-pair (+ c 8) c tbl)) |
| 933 | (decode-char 'ucs (+ c 8)) (decode-char 'ucs c) tbl)) | ||
| 934 | (setq c (1+ c))) | 870 | (setq c (1+ c))) |
| 935 | (set-case-syntax-pair ?Ᾰ ?ᾰ tbl) | 871 | (set-case-syntax-pair ?Ᾰ ?ᾰ tbl) |
| 936 | (set-case-syntax-pair ?Ᾱ ?ᾱ tbl) | 872 | (set-case-syntax-pair ?Ᾱ ?ᾱ tbl) |
| @@ -958,23 +894,20 @@ | |||
| 958 | (set-case-syntax-pair ?ῼ ?ῳ tbl) | 894 | (set-case-syntax-pair ?ῼ ?ῳ tbl) |
| 959 | 895 | ||
| 960 | ;; cyrillic | 896 | ;; cyrillic |
| 897 | (modify-category-entry '(#x0400 . #x04FF) ?y) | ||
| 961 | (setq c #x0400) | 898 | (setq c #x0400) |
| 962 | (while (<= c #x04ff) | 899 | (while (<= c #x04ff) |
| 963 | (modify-category-entry (decode-char 'ucs c) ?y) | ||
| 964 | (and (>= c #x0400) | 900 | (and (>= c #x0400) |
| 965 | (<= c #x040f) | 901 | (<= c #x040f) |
| 966 | (set-case-syntax-pair | 902 | (set-case-syntax-pair c (+ c 80) tbl)) |
| 967 | (decode-char 'ucs c) (decode-char 'ucs (+ c 80)) tbl)) | ||
| 968 | (and (>= c #x0410) | 903 | (and (>= c #x0410) |
| 969 | (<= c #x042f) | 904 | (<= c #x042f) |
| 970 | (set-case-syntax-pair | 905 | (set-case-syntax-pair c (+ c 32) tbl)) |
| 971 | (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl)) | ||
| 972 | (and (zerop (% c 2)) | 906 | (and (zerop (% c 2)) |
| 973 | (or (and (>= c #x0460) (<= c #x0480)) | 907 | (or (and (>= c #x0460) (<= c #x0480)) |
| 974 | (and (>= c #x048c) (<= c #x04be)) | 908 | (and (>= c #x048c) (<= c #x04be)) |
| 975 | (and (>= c #x04d0) (<= c #x04f4))) | 909 | (and (>= c #x04d0) (<= c #x04f4))) |
| 976 | (set-case-syntax-pair | 910 | (set-case-syntax-pair c (1+ c) tbl)) |
| 977 | (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl)) | ||
| 978 | (setq c (1+ c))) | 911 | (setq c (1+ c))) |
| 979 | (set-case-syntax-pair ?Ӂ ?ӂ tbl) | 912 | (set-case-syntax-pair ?Ӂ ?ӂ tbl) |
| 980 | (set-case-syntax-pair ?Ӄ ?ӄ tbl) | 913 | (set-case-syntax-pair ?Ӄ ?ӄ tbl) |
| @@ -995,45 +928,35 @@ | |||
| 995 | ;; Roman numerals | 928 | ;; Roman numerals |
| 996 | (setq c #x2160) | 929 | (setq c #x2160) |
| 997 | (while (<= c #x216f) | 930 | (while (<= c #x216f) |
| 998 | (set-case-syntax-pair (decode-char 'ucs c) | 931 | (set-case-syntax-pair c (+ c #x10) tbl) |
| 999 | (decode-char 'ucs (+ c #x10)) tbl) | ||
| 1000 | (setq c (1+ c))) | 932 | (setq c (1+ c))) |
| 1001 | 933 | ||
| 1002 | ;; Circled Latin | 934 | ;; Circled Latin |
| 1003 | (setq c #x24b6) | 935 | (setq c #x24b6) |
| 1004 | (while (<= c #x24cf) | 936 | (while (<= c #x24cf) |
| 1005 | (set-case-syntax-pair (decode-char 'ucs c) | 937 | (set-case-syntax-pair c (+ c 26) tbl) |
| 1006 | (decode-char 'ucs (+ c 26)) tbl) | 938 | (modify-category-entry c ?l) |
| 1007 | (modify-category-entry (decode-char 'ucs c) ?l) | 939 | (modify-category-entry (+ c 26) ?l) |
| 1008 | (modify-category-entry (decode-char 'ucs (+ c 26)) ?l) | ||
| 1009 | (setq c (1+ c))) | 940 | (setq c (1+ c))) |
| 1010 | 941 | ||
| 1011 | ;; Fullwidth Latin | 942 | ;; Fullwidth Latin |
| 1012 | (setq c #xff21) | 943 | (setq c #xff21) |
| 1013 | (while (<= c #xff3a) | 944 | (while (<= c #xff3a) |
| 1014 | (set-case-syntax-pair (decode-char 'ucs c) | 945 | (set-case-syntax-pair c (+ c #x20) tbl) |
| 1015 | (decode-char 'ucs (+ c #x20)) tbl) | 946 | (modify-category-entry c ?l) |
| 1016 | (modify-category-entry (decode-char 'ucs c) ?l) | 947 | (modify-category-entry (+ c #x20) ?l) |
| 1017 | (modify-category-entry (decode-char 'ucs (+ c #x20)) ?l) | ||
| 1018 | (setq c (1+ c))) | 948 | (setq c (1+ c))) |
| 1019 | 949 | ||
| 1020 | ;; Ohm, Kelvin, Angstrom | 950 | ;; Ohm, Kelvin, Angstrom |
| 1021 | (set-case-syntax-pair ?Ω ?ω tbl) | 951 | ;;; (set-case-syntax-pair ?Ω ?ω tbl) |
| 1022 | ;;; These mess up the case conversion of k and å. | 952 | ;;; These mess up the case conversion of k and å. |
| 1023 | ;;; (set-case-syntax-pair ?K ?k tbl) | 953 | ;;; (set-case-syntax-pair ?K ?k tbl) |
| 1024 | ;;; (set-case-syntax-pair ?Å ?å tbl) | 954 | ;;; (set-case-syntax-pair ?Å ?å tbl) |
| 1025 | 955 | ||
| 1026 | ;; Combining diacritics | 956 | ;; Combining diacritics |
| 1027 | (setq c #x300) | 957 | (modify-category-entry '(#x300 . #x362) ?^) |
| 1028 | (while (<= c #x362) | ||
| 1029 | (modify-category-entry (decode-char 'ucs c) ?^) | ||
| 1030 | (setq c (1+ c))) | ||
| 1031 | |||
| 1032 | ;; Combining marks | 958 | ;; Combining marks |
| 1033 | (setq c #x20d0) | 959 | (modify-category-entry '(#x20d0 . #x20e3) ?^) |
| 1034 | (while (<= c #x20e3) | ||
| 1035 | (modify-category-entry (decode-char 'ucs c) ?^) | ||
| 1036 | (setq c (1+ c))) | ||
| 1037 | 960 | ||
| 1038 | ;; Fixme: syntax for symbols &c | 961 | ;; Fixme: syntax for symbols &c |
| 1039 | ) | 962 | ) |
| @@ -1059,6 +982,7 @@ | |||
| 1059 | ;; For each character set, put the information of the most proper | 982 | ;; For each character set, put the information of the most proper |
| 1060 | ;; coding system to encode it by `preferred-coding-system' property. | 983 | ;; coding system to encode it by `preferred-coding-system' property. |
| 1061 | 984 | ||
| 985 | ;; Fixme: should this be junked? | ||
| 1062 | (let ((l '((latin-iso8859-1 . iso-latin-1) | 986 | (let ((l '((latin-iso8859-1 . iso-latin-1) |
| 1063 | (latin-iso8859-2 . iso-latin-2) | 987 | (latin-iso8859-2 . iso-latin-2) |
| 1064 | (latin-iso8859-3 . iso-latin-3) | 988 | (latin-iso8859-3 . iso-latin-3) |
| @@ -1131,8 +1055,7 @@ | |||
| 1131 | (#xFFE0 . #xFFEF)))) | 1055 | (#xFFE0 . #xFFEF)))) |
| 1132 | (dolist (elt l) | 1056 | (dolist (elt l) |
| 1133 | (set-char-table-range char-width-table | 1057 | (set-char-table-range char-width-table |
| 1134 | (cons (decode-char 'ucs (car elt)) | 1058 | (cons (car elt) (cdr elt)) |
| 1135 | (decode-char 'ucs (cdr elt))) | ||
| 1136 | 2))) | 1059 | 2))) |
| 1137 | (map-charset-chars | 1060 | (map-charset-chars |
| 1138 | #'(lambda (range ignore) (set-char-table-range char-width-table range 2)) | 1061 | #'(lambda (range ignore) (set-char-table-range char-width-table range 2)) |