aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Love2002-05-23 18:16:52 +0000
committerDave Love2002-05-23 18:16:52 +0000
commitabdaa411369afe215bcabe202b249acd119b8e29 (patch)
tree87fa14f35605241b3c2c23262fac09f081d2c5a2
parent354a6a959d93268a58cb44b294cba01200be38a1 (diff)
downloademacs-abdaa411369afe215bcabe202b249acd119b8e29.tar.gz
emacs-abdaa411369afe215bcabe202b249acd119b8e29.zip
Various simplifications and
additions.
-rw-r--r--lisp/international/characters.el219
1 files changed, 71 insertions, 148 deletions
diff --git a/lisp/international/characters.el b/lisp/international/characters.el
index 409170a78a3..01665440f1c 100644
--- a/lisp/international/characters.el
+++ b/lisp/international/characters.el
@@ -2,7 +2,7 @@
2 2
3;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN. 3;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
4;; Licensed to the Free Software Foundation. 4;; Licensed to the Free Software Foundation.
5;; Copyright (C) 2001 Free Software Foundation, Inc. 5;; Copyright (C) 2001, 2002 Free Software Foundation, Inc.
6;; Copyright (C) 2001, 2002 6;; Copyright (C) 2001, 2002
7;; National Institute of Advanced Industrial Science and Technology (AIST) 7;; National Institute of Advanced Industrial Science and Technology (AIST)
8;; Registration Number H13PRO009 8;; Registration Number H13PRO009
@@ -113,9 +113,7 @@
113 arabic-2-column))) 113 arabic-2-column)))
114 (while charsets 114 (while charsets
115;; (modify-syntax-entry (make-char (car charsets)) "w") 115;; (modify-syntax-entry (make-char (car charsets)) "w")
116 (map-charset-chars 116 (map-charset-chars #'modify-category-entry (car charsets) ?b)
117 #'(lambda (char ignore) (modify-category-entry char ?b))
118 (car charsets))
119 (setq charsets (cdr charsets)))) 117 (setq charsets (cdr charsets))))
120(modify-category-entry '(#x600 . #x6ff) ?b) 118(modify-category-entry '(#x600 . #x6ff) ?b)
121(modify-category-entry '(#xfb50 . #xfdff) ?b) 119(modify-category-entry '(#xfb50 . #xfdff) ?b)
@@ -142,6 +140,8 @@
142(modify-syntax-entry ?\〗 ")〖") 140(modify-syntax-entry ?\〗 ")〖")
143(modify-syntax-entry ?\】 ")【") 141(modify-syntax-entry ?\】 ")【")
144 142
143;; Fixme: should any Chinese stuff be re-instated?
144
145;; (modify-category-entry (make-char 'chinese-gb2312) ?c) 145;; (modify-category-entry (make-char 'chinese-gb2312) ?c)
146;; (modify-category-entry (make-char 'chinese-gb2312) ?\|) 146;; (modify-category-entry (make-char 'chinese-gb2312) ?\|)
147;; (modify-category-entry (make-char 'chinese-gb2312 35) ?A) 147;; (modify-category-entry (make-char 'chinese-gb2312 35) ?A)
@@ -191,10 +191,7 @@
191 191
192;; Cyrillic character set (ISO-8859-5) 192;; Cyrillic character set (ISO-8859-5)
193 193
194(modify-syntax-entry (decode-char 'iso-8859-5 160) " ")
195(modify-syntax-entry ?­ ".")
196(modify-syntax-entry ?№ ".") 194(modify-syntax-entry ?№ ".")
197(modify-syntax-entry ?§ ".")
198(let ((tbl (standard-case-table))) 195(let ((tbl (standard-case-table)))
199 (set-case-syntax-pair ?Ё ?ё tbl) 196 (set-case-syntax-pair ?Ё ?ё tbl)
200 (set-case-syntax-pair ?Ђ ?ђ tbl) 197 (set-case-syntax-pair ?Ђ ?ђ tbl)
@@ -285,23 +282,16 @@
285 282
286;; Ethiopic character set 283;; Ethiopic character set
287 284
288;; (modify-category-entry (make-char 'ethiopic) ?e)
289;; (modify-syntax-entry (make-char 'ethiopic) "w")
290(modify-category-entry '(#x1200 . #x137b) ?e) 285(modify-category-entry '(#x1200 . #x137b) ?e)
291(let ((chars '(? ? ? ? ? ? ? ? ? ? ? ? ? ? 286(let ((chars '(? ? ? ? ? ? ? ? ? ? ? ? ? ?)))
292 ;; Unicode equivalents of the above:
293 ?፡ ?። ?፣ ?፤ ?፥ ?፦ ?፧ ?፨ ?ﷰ ?ﷻ ?﷼ ?﷽ ?﷾ ?﷿)))
294 (while chars 287 (while chars
295 (modify-syntax-entry (car chars) ".") 288 (modify-syntax-entry (car chars) ".")
296 (setq chars (cdr chars)))) 289 (setq chars (cdr chars))))
290(map-charset-chars #'modify-category-entry 'ethiopic ?e)
297 291
298;; Greek character set (ISO-8859-7) 292;; Greek character set (ISO-8859-7)
299 293
300;; (modify-category-entry (make-char 'greek-iso8859-7) ?g) 294(modify-category-entry '(#x370 . #x3ff) ?g)
301(let ((c #x370))
302 (while (<= c #x3ff)
303 (modify-category-entry (decode-char 'ucs c) ?g)
304 (setq c (1+ c))))
305 295
306;; (let ((c 182)) 296;; (let ((c 182))
307;; (while (< c 255) 297;; (while (< c 255)
@@ -364,19 +354,15 @@
364 354
365;; Hebrew character set (ISO-8859-8) 355;; Hebrew character set (ISO-8859-8)
366 356
367;; (modify-category-entry (make-char 'hebrew-iso8859-8) ?w) 357(modify-category-entry '(#x590 . #x5f4) ?w)
368(let ((c #x591))
369 (while (<= c #x5f4)
370 (modify-category-entry (decode-char 'ucs c) ?w)
371 (setq c (1+ c))))
372 358
373;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 208) ".") ; PASEQ 359;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 208) ".") ; PASEQ
374;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 211) ".") ; SOF PASUQ 360;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 211) ".") ; SOF PASUQ
375(modify-syntax-entry (decode-char 'ucs #x5be) ".") ; MAQAF 361(modify-syntax-entry #x5be ".") ; MAQAF
376(modify-syntax-entry (decode-char 'ucs #x5c0) ".") ; PASEQ 362(modify-syntax-entry #x5c0 ".") ; PASEQ
377(modify-syntax-entry (decode-char 'ucs #x5c3) ".") ; SOF PASUQ 363(modify-syntax-entry #x5c3 ".") ; SOF PASUQ
378(modify-syntax-entry (decode-char 'ucs #x5f3) ".") ; GERESH 364(modify-syntax-entry #x5f3 ".") ; GERESH
379(modify-syntax-entry (decode-char 'ucs #x5f4) ".") ; GERSHAYIM 365(modify-syntax-entry #x5f4 ".") ; GERSHAYIM
380 366
381;; (let ((c 224)) 367;; (let ((c 224))
382;; (while (< c 251) 368;; (while (< c 251)
@@ -390,10 +376,9 @@
390;; (modify-category-entry (make-char 'indian-2-column) ?I) 376;; (modify-category-entry (make-char 'indian-2-column) ?I)
391;; (modify-category-entry (make-char 'indian-glyph) ?I) 377;; (modify-category-entry (make-char 'indian-glyph) ?I)
392;; Unicode Devanagari block 378;; Unicode Devanagari block
393(let ((c #x901)) 379(modify-category-entry '(#x901 . #x970) ?i)
394 (while (<= c #x970) 380(map-charset-chars #'modify-category-entry 'indian-is13194 ?i)
395 (modify-category-entry (decode-char 'ucs c) ?i) 381(map-charset-chars #'modify-category-entry 'indian-2-column ?i)
396 (setq c (1+ c))))
397 382
398;;; Commented out since the categories appear not to be used anywhere 383;;; Commented out since the categories appear not to be used anywhere
399;;; and word syntax is the default. 384;;; and word syntax is the default.
@@ -468,27 +453,27 @@
468;; Unicode equivalents of JISX0201-kana 453;; Unicode equivalents of JISX0201-kana
469(let ((c #xff61)) 454(let ((c #xff61))
470 (while (<= c #xff9f) 455 (while (<= c #xff9f)
471 (modify-category-entry (decode-char 'ucs c) ?k) 456 (modify-category-entry c ?k)
472 (modify-category-entry (decode-char 'ucs c) ?j) 457 (modify-category-entry c ?j)
473 (modify-category-entry (decode-char 'ucs c) ?\|) 458 (modify-category-entry c ?\|)
474 (setq c (1+ c)))) 459 (setq c (1+ c))))
475 460
476;; Katakana block 461;; Katakana block
477(let ((c #x30a0)) 462(let ((c #x30a0))
478 (while (<= c #x30ff) 463 (while (<= c #x30ff)
479 ;; ?K is double width, ?k isn't specified 464 ;; ?K is double width, ?k isn't specified
480 (modify-category-entry (decode-char 'ucs c) ?K) 465 (modify-category-entry c ?K)
481 ;;(modify-category-entry (decode-char 'ucs c) ?j) 466 ;;(modify-category-entry (decode-char 'ucs c) ?j)
482 (modify-category-entry (decode-char 'ucs c) ?\|) 467 (modify-category-entry c ?\|)
483 (setq c (1+ c)))) 468 (setq c (1+ c))))
484 469
485;; Hiragana block 470;; Hiragana block
486(let ((c #x3040)) 471(let ((c #x3040))
487 (while (<= c #x309f) 472 (while (<= c #x309f)
488 ;; ?H is actually defined to be double width 473 ;; ?H is actually defined to be double width
489 (modify-category-entry (decode-char 'ucs c) ?H) 474 (modify-category-entry c ?H)
490 ;;(modify-category-entry (decode-char 'ucs c) ?j) 475 ;;(modify-category-entry (decode-char 'ucs c) ?j)
491 (modify-category-entry (decode-char 'ucs c) ?\|) 476 (modify-category-entry c ?\|)
492 (setq c (1+ c)))) 477 (setq c (1+ c))))
493 478
494;; JISX0208 479;; JISX0208
@@ -498,8 +483,7 @@
498 (decode-char 'japanese-jisx0208 #x287E)) "_") 483 (decode-char 'japanese-jisx0208 #x287E)) "_")
499(let ((chars '(? ? ? ? ? ? ? ? ? ? ? ?))) 484(let ((chars '(? ? ? ? ? ? ? ? ? ? ? ?)))
500 (dolist (elt chars) 485 (dolist (elt chars)
501 (modify-syntax-entry (car chars) "w") 486 (modify-syntax-entry (car chars) "w")))
502 (setq chars (cdr chars))))
503(modify-syntax-entry ?\ "(") 487(modify-syntax-entry ?\ "(")
504(modify-syntax-entry ?\ "(") 488(modify-syntax-entry ?\ "(")
505(modify-syntax-entry ?\ "(") 489(modify-syntax-entry ?\ "(")
@@ -546,9 +530,7 @@
546 530
547;; JISX0201-Kana 531;; JISX0201-Kana
548;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w") 532;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w")
549(let ((chars '(?。 ?、 ?・ 533(let ((chars '(?。 ?、 ?・)))
550 ;; Unicode:
551 ?。 ?、 ?・)))
552 (while chars 534 (while chars
553 (modify-syntax-entry (car chars) ".") 535 (modify-syntax-entry (car chars) ".")
554 (setq chars (cdr chars)))) 536 (setq chars (cdr chars))))
@@ -558,6 +540,8 @@
558 540
559;; Korean character set (KSC5601) 541;; Korean character set (KSC5601)
560 542
543;; Fixme: re-instate these
544
561;; (modify-syntax-entry (make-char 'korean-ksc5601) "w") 545;; (modify-syntax-entry (make-char 'korean-ksc5601) "w")
562;; (modify-syntax-entry (make-char 'korean-ksc5601 33) "_") 546;; (modify-syntax-entry (make-char 'korean-ksc5601 33) "_")
563;; (modify-syntax-entry (make-char 'korean-ksc5601 34) "_") 547;; (modify-syntax-entry (make-char 'korean-ksc5601 34) "_")
@@ -573,41 +557,17 @@
573;; (modify-category-entry (make-char 'korean-ksc5601 43) ?K) 557;; (modify-category-entry (make-char 'korean-ksc5601 43) ?K)
574;; (modify-category-entry (make-char 'korean-ksc5601 44) ?Y) 558;; (modify-category-entry (make-char 'korean-ksc5601 44) ?Y)
575 559
576;; Latin character set (latin-1,2,3,4,5,8,9) 560;; Latin
577 561
578;; (modify-category-entry (make-char 'latin-iso8859-1) ?l) 562(modify-category-entry '(#x80 . #x024F) ?l)
579;; (modify-category-entry (make-char 'latin-iso8859-2) ?l)
580;; (modify-category-entry (make-char 'latin-iso8859-3) ?l)
581;; (modify-category-entry (make-char 'latin-iso8859-4) ?l)
582;; (modify-category-entry (make-char 'latin-iso8859-9) ?l)
583;; (modify-category-entry (make-char 'latin-iso8859-14) ?l)
584;; (modify-category-entry (make-char 'latin-iso8859-15) ?l)
585
586;; (modify-category-entry (make-char 'latin-iso8859-1 160) ?\ )
587;; (modify-category-entry (make-char 'latin-iso8859-2 160) ?\ )
588;; (modify-category-entry (make-char 'latin-iso8859-3 160) ?\ )
589;; (modify-category-entry (make-char 'latin-iso8859-4 160) ?\ )
590;; (modify-category-entry (make-char 'latin-iso8859-9 160) ?\ )
591;; (modify-category-entry (make-char 'latin-iso8859-14 160) ?\ )
592;; (modify-category-entry (make-char 'latin-iso8859-15 160) ?\ )
593 563
594;; Lao character set 564;; Lao character set
595 565
596;; (modify-category-entry (make-char 'lao) ?o) 566(modify-category-entry '(#xe80 . #xeff) ?o)
597(dotimes (i (1+ (- #xeff #xe80))) 567(map-charset-chars #'modify-category-entry 'lao ?o)
598 (modify-category-entry (decode-char 'ucs (+ i #xe80)) ?o))
599 568
600(let ((deflist '(;; chars syntax category 569;; Fixme: check this. Lao characters in HELLO seem to have all the categories
601 ("ກ-ຮ" "w" ?0) ; consonant 570(let ((deflist '(("ກ-ຮ" "w" ?0) ; consonant
602 ("ະາຳຽເ-ໄ" "w" ?1) ; vowel base
603 ("ັິ-ືົໍ" "w" ?2) ; vowel upper
604 ("ຸູ" "w" ?3) ; vowel lower
605 ("່-໌" "w" ?4) ; tone mark
606 ("ຼ" "w" ?9) ; semivowel lower
607 ("໐-໙" "w" ?6) ; digit
608 ("ຯໆ" "_" ?5) ; symbol
609 ;; Unicode equivalents
610 ("ກ-ຮ" "w" ?0) ; consonant
611 ("ະາຳຽເ-ໄ" "w" ?1) ; vowel base 571 ("ະາຳຽເ-ໄ" "w" ?1) ; vowel base
612 ("ັິ-ືົໍ" "w" ?2) ; vowel upper 572 ("ັິ-ືົໍ" "w" ?2) ; vowel upper
613 ("ຸູ" "w" ?3) ; vowel lower 573 ("ຸູ" "w" ?3) ; vowel lower
@@ -640,9 +600,8 @@
640 600
641;; Thai character set (TIS620) 601;; Thai character set (TIS620)
642 602
643;; (modify-category-entry (make-char 'thai-tis620) ?t) 603(modify-category-entry '(#xe00 . #xe7f) ?t)
644(dotimes (i (1+ (- #xe7f #xe00))) 604(map-charset-chars #'modify-category-entry 'thai-tis620 ?t)
645 (modify-category-entry (decode-char 'ucs (+ i #xe00)) ?t))
646 605
647(let ((deflist '(;; chars syntax category 606(let ((deflist '(;; chars syntax category
648 ("ก-รลว-ฮ" "w" ?0) ; consonant 607 ("ก-รลว-ฮ" "w" ?0) ; consonant
@@ -677,10 +636,9 @@
677 636
678;; Tibetan character set 637;; Tibetan character set
679 638
680;; (modify-category-entry (make-char 'tibetan) ?q) 639(modify-category-entry '(#xf00 . #xfff) ?q)
681;; (modify-category-entry (make-char 'tibetan-1-column) ?q) 640(map-charset-chars #'modify-category-entry 'tibetan ?q)
682(dotimes (i (1+ (- #xfff #xf00))) 641(map-charset-chars #'modify-category-entry 'tibetan-1-column ?q)
683 (modify-category-entry (decode-char 'ucs (+ i #xf00)) ?q))
684 642
685(let ((deflist '(;; chars syntax category 643(let ((deflist '(;; chars syntax category
686 ("-" "w" ?0) ; consonant 644 ("-" "w" ?0) ; consonant
@@ -697,18 +655,6 @@
697 ("" "." ?>) ; 655 ("" "." ?>) ;
698 ("-" "." ?<) ; prohibition 656 ("-" "." ?<) ; prohibition
699 ("----" "." ?q) ; others 657 ("----" "." ?q) ; others
700
701 ;; Unicode version (not complete)
702 ("ཀ-ཀྵཪ" "w" ?0) ; consonant
703 ("ྐ-ྐྵྺྻྼ" "w" ?0) ;
704 ("ིེཻོཽྀ" "w" ?2) ; upper vowel
705 ("ཾྂྃ྆྇ྈྉྊྋ" "w" ?2) ; upper modifier
706 ("༙཰྄ཱུ༵༷" "w" ?3) ; lowel vowel/modifier
707 ("༠-༩༪-༳" "w" ?6) ; digit
708 ("་།-༒༔ཿ" "." ?|) ; line-break char
709 ("༈་།-༒༔ཿ༽༴" "." ?>) ; prohibition
710 ("ༀ-༊༼࿁࿂྅" "." ?<) ; prohibition
711 ("༓༕-༘༚-༟༶༸-༻༾༿྾྿-࿏" "." ?q) ; others
712 )) 658 ))
713 elm chars len syntax category to ch i) 659 elm chars len syntax category to ch i)
714 (while deflist 660 (while deflist
@@ -734,15 +680,13 @@
734 680
735;; Vietnamese character set 681;; Vietnamese character set
736 682
737;; (let ((lower (make-char 'vietnamese-viscii-lower)) 683;; To make a word with Latin characters
738;; (upper (make-char 'vietnamese-viscii-upper))) 684(map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?l)
739;; (modify-syntax-entry lower "w") 685(map-charset-chars #'modify-category-entry 'vietnamese-viscii-lower ?v)
740;; (modify-syntax-entry upper "w") 686
741;; (modify-category-entry lower ?v) 687(map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?l)
742;; (modify-category-entry upper ?v) 688(map-charset-chars #'modify-category-entry 'vietnamese-viscii-upper ?v)
743;; (modify-category-entry lower ?l) ; To make a word with 689;; Fixme Unicode versions of Vietnamese categeory.
744;; (modify-category-entry upper ?l) ; latin characters.
745;; )
746 690
747(let ((tbl (standard-case-table)) 691(let ((tbl (standard-case-table))
748 (i 32)) 692 (i 32))
@@ -766,17 +710,14 @@
766 ;; Latin Extended-A, Latin Extended-B 710 ;; Latin Extended-A, Latin Extended-B
767 (setq c #x0100) 711 (setq c #x0100)
768 (while (<= c #x0233) 712 (while (<= c #x0233)
769 (modify-category-entry (decode-char 'ucs c) ?l)
770 (and (or (<= c #x012e) 713 (and (or (<= c #x012e)
771 (and (>= c #x014a) (<= c #x0177))) 714 (and (>= c #x014a) (<= c #x0177)))
772 (zerop (% c 2)) 715 (zerop (% c 2))
773 (set-case-syntax-pair 716 (set-case-syntax-pair c (1+ c) tbl))
774 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
775 (and (>= c #x013a) 717 (and (>= c #x013a)
776 (<= c #x0148) 718 (<= c #x0148)
777 (zerop (% c 2)) 719 (zerop (% c 2))
778 (set-case-syntax-pair 720 (set-case-syntax-pair (1- c) c tbl))
779 (decode-char 'ucs (1- c)) (decode-char 'ucs c) tbl))
780 (setq c (1+ c))) 721 (setq c (1+ c)))
781 (set-case-syntax-pair ?IJ ?ij tbl) 722 (set-case-syntax-pair ?IJ ?ij tbl)
782 (set-case-syntax-pair ?Ĵ ?ĵ tbl) 723 (set-case-syntax-pair ?Ĵ ?ĵ tbl)
@@ -883,28 +824,25 @@
883 (set-case-syntax-pair ?Ȳ ?ȳ tbl) 824 (set-case-syntax-pair ?Ȳ ?ȳ tbl)
884 825
885 ;; Latin Extended Additional 826 ;; Latin Extended Additional
827 (modify-category-entry '(#x1e00 . #x1ef9) ?l)
886 (setq c #x1e00) 828 (setq c #x1e00)
887 (while (<= c #x1ef9) 829 (while (<= c #x1ef9)
888 (modify-category-entry (decode-char 'ucs c) ?l)
889 (and (zerop (% c 2)) 830 (and (zerop (% c 2))
890 (or (<= c #x1e94) (>= c #x1ea0)) 831 (or (<= c #x1e94) (>= c #x1ea0))
891 (set-case-syntax-pair 832 (set-case-syntax-pair c (1+ c) tbl))
892 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
893 (setq c (1+ c))) 833 (setq c (1+ c)))
894 834
895 ;; Greek 835 ;; Greek
836 (modify-category-entry '(#x0370 . #x03ff) ?g)
896 (setq c #x0370) 837 (setq c #x0370)
897 (while (<= c #x03ff) 838 (while (<= c #x03ff)
898 (modify-category-entry (decode-char 'ucs c) ?g)
899 (if (or (and (>= c #x0391) (<= c #x03a1)) 839 (if (or (and (>= c #x0391) (<= c #x03a1))
900 (and (>= c #x03a3) (<= c #x03ab))) 840 (and (>= c #x03a3) (<= c #x03ab)))
901 (set-case-syntax-pair 841 (set-case-syntax-pair c (+ c 32) tbl))
902 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
903 (and (>= c #x03da) 842 (and (>= c #x03da)
904 (<= c #x03ee) 843 (<= c #x03ee)
905 (zerop (% c 2)) 844 (zerop (% c 2))
906 (set-case-syntax-pair 845 (set-case-syntax-pair c (1+ c) tbl))
907 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
908 (setq c (1+ c))) 846 (setq c (1+ c)))
909 (set-case-syntax-pair ?Ά ?ά tbl) 847 (set-case-syntax-pair ?Ά ?ά tbl)
910 (set-case-syntax-pair ?Έ ?έ tbl) 848 (set-case-syntax-pair ?Έ ?έ tbl)
@@ -917,20 +855,18 @@
917 ;; Armenian 855 ;; Armenian
918 (setq c #x531) 856 (setq c #x531)
919 (while (<= c #x556) 857 (while (<= c #x556)
920 (set-case-syntax-pair (decode-char 'ucs c) 858 (set-case-syntax-pair c (+ c #x30) tbl)
921 (decode-char 'ucs (+ c #x30)) tbl)
922 (setq c (1+ c))) 859 (setq c (1+ c)))
923 860
924 ;; Greek Extended 861 ;; Greek Extended
862 (modify-category-entry '(#x1f00 . #x1fff) ?g)
925 (setq c #x1f00) 863 (setq c #x1f00)
926 (while (<= c #x1fff) 864 (while (<= c #x1fff)
927 (modify-category-entry (decode-char 'ucs c) ?g)
928 (and (<= (logand c #x000f) 7) 865 (and (<= (logand c #x000f) 7)
929 (<= c #x1fa7) 866 (<= c #x1fa7)
930 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56))) 867 (not (memq c '(#x1f50 #x1f52 #x1f54 #x1f56)))
931 (/= (logand c #x00f0) 7) 868 (/= (logand c #x00f0) 7)
932 (set-case-syntax-pair 869 (set-case-syntax-pair (+ c 8) c tbl))
933 (decode-char 'ucs (+ c 8)) (decode-char 'ucs c) tbl))
934 (setq c (1+ c))) 870 (setq c (1+ c)))
935 (set-case-syntax-pair ?Ᾰ ?ᾰ tbl) 871 (set-case-syntax-pair ?Ᾰ ?ᾰ tbl)
936 (set-case-syntax-pair ?Ᾱ ?ᾱ tbl) 872 (set-case-syntax-pair ?Ᾱ ?ᾱ tbl)
@@ -958,23 +894,20 @@
958 (set-case-syntax-pair ?ῼ ?ῳ tbl) 894 (set-case-syntax-pair ?ῼ ?ῳ tbl)
959 895
960 ;; cyrillic 896 ;; cyrillic
897 (modify-category-entry '(#x0400 . #x04FF) ?y)
961 (setq c #x0400) 898 (setq c #x0400)
962 (while (<= c #x04ff) 899 (while (<= c #x04ff)
963 (modify-category-entry (decode-char 'ucs c) ?y)
964 (and (>= c #x0400) 900 (and (>= c #x0400)
965 (<= c #x040f) 901 (<= c #x040f)
966 (set-case-syntax-pair 902 (set-case-syntax-pair c (+ c 80) tbl))
967 (decode-char 'ucs c) (decode-char 'ucs (+ c 80)) tbl))
968 (and (>= c #x0410) 903 (and (>= c #x0410)
969 (<= c #x042f) 904 (<= c #x042f)
970 (set-case-syntax-pair 905 (set-case-syntax-pair c (+ c 32) tbl))
971 (decode-char 'ucs c) (decode-char 'ucs (+ c 32)) tbl))
972 (and (zerop (% c 2)) 906 (and (zerop (% c 2))
973 (or (and (>= c #x0460) (<= c #x0480)) 907 (or (and (>= c #x0460) (<= c #x0480))
974 (and (>= c #x048c) (<= c #x04be)) 908 (and (>= c #x048c) (<= c #x04be))
975 (and (>= c #x04d0) (<= c #x04f4))) 909 (and (>= c #x04d0) (<= c #x04f4)))
976 (set-case-syntax-pair 910 (set-case-syntax-pair c (1+ c) tbl))
977 (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl))
978 (setq c (1+ c))) 911 (setq c (1+ c)))
979 (set-case-syntax-pair ?Ӂ ?ӂ tbl) 912 (set-case-syntax-pair ?Ӂ ?ӂ tbl)
980 (set-case-syntax-pair ?Ӄ ?ӄ tbl) 913 (set-case-syntax-pair ?Ӄ ?ӄ tbl)
@@ -995,45 +928,35 @@
995 ;; Roman numerals 928 ;; Roman numerals
996 (setq c #x2160) 929 (setq c #x2160)
997 (while (<= c #x216f) 930 (while (<= c #x216f)
998 (set-case-syntax-pair (decode-char 'ucs c) 931 (set-case-syntax-pair c (+ c #x10) tbl)
999 (decode-char 'ucs (+ c #x10)) tbl)
1000 (setq c (1+ c))) 932 (setq c (1+ c)))
1001 933
1002 ;; Circled Latin 934 ;; Circled Latin
1003 (setq c #x24b6) 935 (setq c #x24b6)
1004 (while (<= c #x24cf) 936 (while (<= c #x24cf)
1005 (set-case-syntax-pair (decode-char 'ucs c) 937 (set-case-syntax-pair c (+ c 26) tbl)
1006 (decode-char 'ucs (+ c 26)) tbl) 938 (modify-category-entry c ?l)
1007 (modify-category-entry (decode-char 'ucs c) ?l) 939 (modify-category-entry (+ c 26) ?l)
1008 (modify-category-entry (decode-char 'ucs (+ c 26)) ?l)
1009 (setq c (1+ c))) 940 (setq c (1+ c)))
1010 941
1011 ;; Fullwidth Latin 942 ;; Fullwidth Latin
1012 (setq c #xff21) 943 (setq c #xff21)
1013 (while (<= c #xff3a) 944 (while (<= c #xff3a)
1014 (set-case-syntax-pair (decode-char 'ucs c) 945 (set-case-syntax-pair c (+ c #x20) tbl)
1015 (decode-char 'ucs (+ c #x20)) tbl) 946 (modify-category-entry c ?l)
1016 (modify-category-entry (decode-char 'ucs c) ?l) 947 (modify-category-entry (+ c #x20) ?l)
1017 (modify-category-entry (decode-char 'ucs (+ c #x20)) ?l)
1018 (setq c (1+ c))) 948 (setq c (1+ c)))
1019 949
1020 ;; Ohm, Kelvin, Angstrom 950 ;; Ohm, Kelvin, Angstrom
1021 (set-case-syntax-pair ?Ω ?ω tbl) 951;;; (set-case-syntax-pair ?Ω ?ω tbl)
1022;;; These mess up the case conversion of k and å. 952;;; These mess up the case conversion of k and å.
1023;;; (set-case-syntax-pair ?K ?k tbl) 953;;; (set-case-syntax-pair ?K ?k tbl)
1024;;; (set-case-syntax-pair ?Å ?å tbl) 954;;; (set-case-syntax-pair ?Å ?å tbl)
1025 955
1026 ;; Combining diacritics 956 ;; Combining diacritics
1027 (setq c #x300) 957 (modify-category-entry '(#x300 . #x362) ?^)
1028 (while (<= c #x362)
1029 (modify-category-entry (decode-char 'ucs c) ?^)
1030 (setq c (1+ c)))
1031
1032 ;; Combining marks 958 ;; Combining marks
1033 (setq c #x20d0) 959 (modify-category-entry '(#x20d0 . #x20e3) ?^)
1034 (while (<= c #x20e3)
1035 (modify-category-entry (decode-char 'ucs c) ?^)
1036 (setq c (1+ c)))
1037 960
1038 ;; Fixme: syntax for symbols &c 961 ;; Fixme: syntax for symbols &c
1039 ) 962 )
@@ -1059,6 +982,7 @@
1059;; For each character set, put the information of the most proper 982;; For each character set, put the information of the most proper
1060;; coding system to encode it by `preferred-coding-system' property. 983;; coding system to encode it by `preferred-coding-system' property.
1061 984
985;; Fixme: should this be junked?
1062(let ((l '((latin-iso8859-1 . iso-latin-1) 986(let ((l '((latin-iso8859-1 . iso-latin-1)
1063 (latin-iso8859-2 . iso-latin-2) 987 (latin-iso8859-2 . iso-latin-2)
1064 (latin-iso8859-3 . iso-latin-3) 988 (latin-iso8859-3 . iso-latin-3)
@@ -1131,8 +1055,7 @@
1131 (#xFFE0 . #xFFEF)))) 1055 (#xFFE0 . #xFFEF))))
1132 (dolist (elt l) 1056 (dolist (elt l)
1133 (set-char-table-range char-width-table 1057 (set-char-table-range char-width-table
1134 (cons (decode-char 'ucs (car elt)) 1058 (cons (car elt) (cdr elt))
1135 (decode-char 'ucs (cdr elt)))
1136 2))) 1059 2)))
1137(map-charset-chars 1060(map-charset-chars
1138 #'(lambda (range ignore) (set-char-table-range char-width-table range 2)) 1061 #'(lambda (range ignore) (set-char-table-range char-width-table range 2))