aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Love2002-06-06 11:34:24 +0000
committerDave Love2002-06-06 11:34:24 +0000
commit87a39edb6e3372b726326e255270a334fa33376d (patch)
treeb0c3c4d288c7a3a002f3e41777e7d9d2a9c678bd
parent67813f55547a5a74e59bd46f6db62ee1e7837150 (diff)
downloademacs-87a39edb6e3372b726326e255270a334fa33376d.tar.gz
emacs-87a39edb6e3372b726326e255270a334fa33376d.zip
Reinstate various CJK syntax and
category setup. Remove obsolete syntax setting in Greek section. Optimize the char tables.
-rw-r--r--lisp/international/characters.el284
1 files changed, 96 insertions, 188 deletions
diff --git a/lisp/international/characters.el b/lisp/international/characters.el
index 17c48ba542f..7cc0d0ce29a 100644
--- a/lisp/international/characters.el
+++ b/lisp/international/characters.el
@@ -112,7 +112,6 @@
112 arabic-1-column 112 arabic-1-column
113 arabic-2-column))) 113 arabic-2-column)))
114 (while charsets 114 (while charsets
115;; (modify-syntax-entry (make-char (car charsets)) "w")
116 (map-charset-chars #'modify-category-entry (car charsets) ?b) 115 (map-charset-chars #'modify-category-entry (car charsets) ?b)
117 (setq charsets (cdr charsets)))) 116 (setq charsets (cdr charsets))))
118(modify-category-entry '(#x600 . #x6ff) ?b) 117(modify-category-entry '(#x600 . #x6ff) ?b)
@@ -121,10 +120,30 @@
121 120
122;; Chinese character set (GB2312) 121;; Chinese character set (GB2312)
123 122
124;; (modify-syntax-entry (make-char 'chinese-gb2312) "w") 123(modify-syntax-entry (cons (make-char 'chinese-gb2312 33 33)
125;; (modify-syntax-entry (make-char 'chinese-gb2312 33) "_") 124 (make-char 'chinese-gb2312 33 126))
126;; (modify-syntax-entry (make-char 'chinese-gb2312 34) "_") 125 "_")
127;; (modify-syntax-entry (make-char 'chinese-gb2312 41) "_") 126(modify-syntax-entry (cons (make-char 'chinese-gb2312 34 33)
127 (make-char 'chinese-gb2312 34 126))
128 "_")
129(modify-syntax-entry (cons (make-char 'chinese-gb2312 41 33)
130 (make-char 'chinese-gb2312 41 126))
131 "_")
132(modify-category-entry (cons (make-char 'chinese-gb2312 35 33)
133 (make-char 'chinese-gb2312 35 126))
134 ?A)
135(modify-category-entry (cons (make-char 'chinese-gb2312 36 33)
136 (make-char 'chinese-gb2312 36 126))
137 ?H)
138(modify-category-entry (cons (make-char 'chinese-gb2312 37 33)
139 (make-char 'chinese-gb2312 37 126))
140 ?K)
141(modify-category-entry (cons (make-char 'chinese-gb2312 38 33)
142 (make-char 'chinese-gb2312 38 126))
143 ?G)
144(modify-category-entry (cons (make-char 'chinese-gb2312 39 33)
145 (make-char 'chinese-gb2312 39 126))
146 ?Y)
128(modify-syntax-entry ?\〔 "(〕") 147(modify-syntax-entry ?\〔 "(〕")
129(modify-syntax-entry ?\〈 "(〉") 148(modify-syntax-entry ?\〈 "(〉")
130(modify-syntax-entry ?\《 "(》") 149(modify-syntax-entry ?\《 "(》")
@@ -140,54 +159,32 @@
140(modify-syntax-entry ?\〗 ")〖") 159(modify-syntax-entry ?\〗 ")〖")
141(modify-syntax-entry ?\】 ")【") 160(modify-syntax-entry ?\】 ")【")
142 161
143;; Fixme: should any Chinese stuff be re-instated? 162(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?c)
144 163(map-charset-chars #'modify-category-entry 'chinese-gb2312 ?|)
145;; (modify-category-entry (make-char 'chinese-gb2312) ?c) 164(let ((row 48))
146;; (modify-category-entry (make-char 'chinese-gb2312) ?\|) 165 (while (< row 127)
147;; (modify-category-entry (make-char 'chinese-gb2312 35) ?A) 166 (modify-category-entry (cons (make-char 'chinese-gb2312 row 33)
148;; (modify-category-entry (make-char 'chinese-gb2312 36) ?H) 167 (make-char 'chinese-gb2312 row 126))
149;; (modify-category-entry (make-char 'chinese-gb2312 37) ?K) 168 ?C)
150;; (modify-category-entry (make-char 'chinese-gb2312 38) ?G) 169 (setq row (1+ row))))
151;; (modify-category-entry (make-char 'chinese-gb2312 39) ?Y)
152;; (let ((row 48))
153;; (while (< row 127)
154;; (modify-category-entry (make-char 'chinese-gb2312 row) ?C)
155;; (setq row (1+ row))))
156 170
157;; Chinese character set (BIG5) 171;; Chinese character set (BIG5)
158 172
159;; (let ((generic-big5-1-char (make-char 'chinese-big5-1)) 173(map-charset-chars #'modify-category-entry 'chinese-big5-1 ?c)
160;; (generic-big5-2-char (make-char 'chinese-big5-2))) 174(map-charset-chars #'modify-category-entry 'chinese-big5-2 ?c)
161;; (modify-syntax-entry generic-big5-1-char "w") 175(map-charset-chars #'modify-category-entry 'chinese-big5-1 ?C)
162;; (modify-syntax-entry generic-big5-2-char "w") 176(map-charset-chars #'modify-category-entry 'chinese-big5-2 ?C)
163 177(map-charset-chars #'modify-category-entry 'chinese-big5-1 ?|)
164;; (modify-category-entry generic-big5-1-char ?c) 178(map-charset-chars #'modify-category-entry 'chinese-big5-2 ?|)
165;; (modify-category-entry generic-big5-2-char ?c)
166
167;; (modify-category-entry generic-big5-1-char ?C)
168;; (modify-category-entry generic-big5-2-char ?C)
169
170;; (modify-category-entry generic-big5-1-char ?\|)
171;; (modify-category-entry generic-big5-2-char ?\|))
172
173 179
174;; Chinese character set (CNS11643) 180;; Chinese character set (CNS11643)
175 181
176;; (let ((cns-list '(chinese-cns11643-1 182(dolist (c '(chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3
177;; chinese-cns11643-2 183 chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6
178;; chinese-cns11643-3 184 chinese-cns11643-7))
179;; chinese-cns11643-4 185 (map-charset-chars #'modify-category-entry c ?c)
180;; chinese-cns11643-5 186 (map-charset-chars #'modify-category-entry c ?C)
181;; chinese-cns11643-6 187 (map-charset-chars #'modify-category-entry c ?|))
182;; chinese-cns11643-7))
183;; generic-char)
184;; (while cns-list
185;; (setq generic-char (make-char (car cns-list)))
186;; (modify-syntax-entry generic-char "w")
187;; (modify-category-entry generic-char ?c)
188;; (modify-category-entry generic-char ?C)
189;; (modify-category-entry generic-char ?|)
190;; (setq cns-list (cdr cns-list))))
191 188
192;; Cyrillic character set (ISO-8859-5) 189;; Cyrillic character set (ISO-8859-5)
193 190
@@ -240,46 +237,6 @@
240 (set-case-syntax-pair ?Ю ?ю tbl) 237 (set-case-syntax-pair ?Ю ?ю tbl)
241 (set-case-syntax-pair ?Я ?я tbl)) 238 (set-case-syntax-pair ?Я ?я tbl))
242 239
243;; Devanagari character set
244
245;;; Commented out since the categories appear not to be used anywhere
246;;; and word syntax is the default.
247;; (let ((deflist '(;; chars syntax category
248;; ("" "w" ?7) ; vowel-modifying diacritical mark
249;; ; chandrabindu, anuswar, visarga
250;; ("-" "w" ?1) ; independent vowel
251;; ("-" "w" ?0) ; consonant
252;; ("-" "w" ?8) ; matra
253;; ("-" "w" ?6) ; digit
254;; ;; Unicode equivalents
255;; ("ँंः" "w" ?7) ; vowel-modifying diacritical mark
256;; ; chandrabindu, anuswar, visarga
257;; ("अ-ऍ" "w" ?1) ; independent vowel
258;; ("क-ह" "w" ?0) ; consonant
259;; ("ा-ॉ" "w" ?8) ; matra
260;; ("०-९" "w" ?6) ; digit
261;; ))
262;; elm chars len syntax category to ch i)
263;; (while deflist
264;; (setq elm (car deflist))
265;; (setq chars (car elm)
266;; len (length chars)
267;; syntax (nth 1 elm)
268;; category (nth 2 elm)
269;; i 0)
270;; (while (< i len)
271;; (if (= (aref chars i) ?-)
272;; (setq i (1+ i)
273;; to (aref chars i))
274;; (setq ch (aref chars i)
275;; to ch))
276;; (while (<= ch to)
277;; (modify-syntax-entry ch syntax)
278;; (modify-category-entry ch category)
279;; (setq ch (1+ ch)))
280;; (setq i (1+ i)))
281;; (setq deflist (cdr deflist))))
282
283;; Ethiopic character set 240;; Ethiopic character set
284 241
285(modify-category-entry '(#x1200 . #x137b) ?e) 242(modify-category-entry '(#x1200 . #x137b) ?e)
@@ -293,31 +250,7 @@
293 250
294(modify-category-entry '(#x370 . #x3ff) ?g) 251(modify-category-entry '(#x370 . #x3ff) ?g)
295 252
296;; (let ((c 182))
297;; (while (< c 255)
298;; (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w")
299;; (setq c (1+ c))))
300;; (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP
301;; (modify-syntax-entry ?· ".")
302;; (modify-syntax-entry ?» ".")
303;; (modify-syntax-entry ?½ ".")
304(let ((tbl (standard-case-table))) 253(let ((tbl (standard-case-table)))
305 ;; Fixme: non-letter syntax copied from latin-1, but that's dubious
306 ;; in several cases.
307 (set-case-syntax ?‘ "." tbl)
308 (set-case-syntax ?’ "." tbl)
309 (set-case-syntax ?¦ "." tbl)
310 (set-case-syntax ?¦ "_" tbl)
311 (set-case-syntax ?§ "." tbl)
312 (set-case-syntax ?© "_" tbl)
313 (set-case-syntax ?\« "." tbl)
314 (set-case-syntax ?¬ "_" tbl)
315 (set-case-syntax ?­ "_" tbl)
316 (set-case-syntax ?― "." tbl)
317 (set-case-syntax ?° "_" tbl)
318 (set-case-syntax ?± "_" tbl)
319;; (set-case-syntax ?· "_" tbl)
320;; (set-case-syntax ?½ "_" tbl)
321 (set-case-syntax-pair ?Α ?α tbl) 254 (set-case-syntax-pair ?Α ?α tbl)
322 (set-case-syntax-pair ?Β ?β tbl) 255 (set-case-syntax-pair ?Β ?β tbl)
323 (set-case-syntax-pair ?Γ ?γ tbl) 256 (set-case-syntax-pair ?Γ ?γ tbl)
@@ -354,64 +287,18 @@
354 287
355;; Hebrew character set (ISO-8859-8) 288;; Hebrew character set (ISO-8859-8)
356 289
357(modify-category-entry '(#x590 . #x5f4) ?w)
358
359;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 208) ".") ; PASEQ
360;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 211) ".") ; SOF PASUQ
361(modify-syntax-entry #x5be ".") ; MAQAF 290(modify-syntax-entry #x5be ".") ; MAQAF
362(modify-syntax-entry #x5c0 ".") ; PASEQ 291(modify-syntax-entry #x5c0 ".") ; PASEQ
363(modify-syntax-entry #x5c3 ".") ; SOF PASUQ 292(modify-syntax-entry #x5c3 ".") ; SOF PASUQ
364(modify-syntax-entry #x5f3 ".") ; GERESH 293(modify-syntax-entry #x5f3 ".") ; GERESH
365(modify-syntax-entry #x5f4 ".") ; GERSHAYIM 294(modify-syntax-entry #x5f4 ".") ; GERSHAYIM
366 295
367;; (let ((c 224))
368;; (while (< c 251)
369;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w")
370;; (setq c (1+ c))))
371;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP
372
373;; Indian character set (IS 13194 and other Emacs original Indian charsets) 296;; Indian character set (IS 13194 and other Emacs original Indian charsets)
374 297
375;; (modify-category-entry (make-char 'indian-is13194) ?i)
376;; (modify-category-entry (make-char 'indian-2-column) ?I)
377;; (modify-category-entry (make-char 'indian-glyph) ?I)
378;; Unicode Devanagari block
379(modify-category-entry '(#x901 . #x970) ?i) 298(modify-category-entry '(#x901 . #x970) ?i)
380(map-charset-chars #'modify-category-entry 'indian-is13194 ?i) 299(map-charset-chars #'modify-category-entry 'indian-is13194 ?i)
381(map-charset-chars #'modify-category-entry 'indian-2-column ?i) 300(map-charset-chars #'modify-category-entry 'indian-2-column ?i)
382 301
383;;; Commented out since the categories appear not to be used anywhere
384;;; and word syntax is the default.
385;; (let ((deflist ;
386;; '(;; chars syntax category
387;; ("ँंः" "w" ?7) ; vowel-modifying diacritical mark
388;; ; chandrabindu, anuswar, visarga
389;; ("अ-ऍ" "w" ?1) ; base (independent) vowel
390;; ("क-ह" "w" ?0) ; consonant
391;; ("ा-ॉ" "w" ?8) ; matra
392;; ("०-९" "w" ?6) ; digit
393;; ))
394;; elm chars len syntax category to ch i)
395;; (while deflist
396;; (setq elm (car deflist))
397;; (setq chars (car elm)
398;; len (length chars)
399;; syntax (nth 1 elm)
400;; category (nth 2 elm)
401;; i 0)
402;; (while (< i len)
403;; (if (= (aref chars i) ?-)
404;; (setq i (1+ i)
405;; to (aref chars i))
406;; (setq ch (aref chars i)
407;; to ch))
408;; (while (<= ch to)
409;; (modify-syntax-entry ch syntax)
410;; (modify-category-entry ch category)
411;; (setq ch (1+ ch)))
412;; (setq i (1+ i)))
413;; (setq deflist (cdr deflist))))
414
415 302
416;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212) 303;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212)
417 304
@@ -463,7 +350,6 @@
463 (while (<= c #x30ff) 350 (while (<= c #x30ff)
464 ;; ?K is double width, ?k isn't specified 351 ;; ?K is double width, ?k isn't specified
465 (modify-category-entry c ?K) 352 (modify-category-entry c ?K)
466 ;;(modify-category-entry (decode-char 'ucs c) ?j)
467 (modify-category-entry c ?\|) 353 (modify-category-entry c ?\|)
468 (setq c (1+ c)))) 354 (setq c (1+ c))))
469 355
@@ -472,7 +358,6 @@
472 (while (<= c #x309f) 358 (while (<= c #x309f)
473 ;; ?H is actually defined to be double width 359 ;; ?H is actually defined to be double width
474 (modify-category-entry c ?H) 360 (modify-category-entry c ?H)
475 ;;(modify-category-entry (decode-char 'ucs c) ?j)
476 (modify-category-entry c ?\|) 361 (modify-category-entry c ?\|)
477 (setq c (1+ c)))) 362 (setq c (1+ c))))
478 363
@@ -519,17 +404,23 @@
519 (setq chars (cdr chars)))) 404 (setq chars (cdr chars))))
520 405
521;; JISX0212 406;; JISX0212
522;; (modify-syntax-entry (make-char 'japanese-jisx0212) "w") 407
523;; (modify-syntax-entry (make-char 'japanese-jisx0212 33) "_") 408(modify-syntax-entry (cons (make-char 'japanese-jisx0212 33 33)
524;; (modify-syntax-entry (make-char 'japanese-jisx0212 34) "_") 409 (make-char 'japanese-jisx0212 33 126))
525;; (modify-syntax-entry (make-char 'japanese-jisx0212 35) "_") 410 "_")
411(modify-syntax-entry (cons (make-char 'japanese-jisx0212 34 33)
412 (make-char 'japanese-jisx0212 34 126))
413 "_")
414(modify-syntax-entry (cons (make-char 'japanese-jisx0212 35 33)
415 (make-char 'japanese-jisx0212 35 126))
416 "_")
526 417
527(modify-syntax-entry (cons (decode-char 'japanese-jisx0212 #x2121) 418(modify-syntax-entry (cons (decode-char 'japanese-jisx0212 #x2121)
528 (decode-char 'japanese-jisx0212 #x237E)) 419 (decode-char 'japanese-jisx0212 #x237E))
529 "_") 420 "_")
530 421
531;; JISX0201-Kana 422;; JISX0201-Kana
532;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w") 423
533(let ((chars '(?。 ?、 ?・))) 424(let ((chars '(?。 ?、 ?・)))
534 (while chars 425 (while chars
535 (modify-syntax-entry (car chars) ".") 426 (modify-syntax-entry (car chars) ".")
@@ -540,22 +431,42 @@
540 431
541;; Korean character set (KSC5601) 432;; Korean character set (KSC5601)
542 433
543;; Fixme: re-instate these 434(map-charset-chars #'modify-category-entry 'korean-ksc5601 ?h)
544 435(modify-syntax-entry (cons (make-char 'korean-ksc5601 33 33)
545;; (modify-syntax-entry (make-char 'korean-ksc5601) "w") 436 (make-char 'korean-ksc5601 33 126))
546;; (modify-syntax-entry (make-char 'korean-ksc5601 33) "_") 437 "_")
547;; (modify-syntax-entry (make-char 'korean-ksc5601 34) "_") 438;; Fixme: Giving `invalid code' because the charset has holes --
548;; (modify-syntax-entry (make-char 'korean-ksc5601 38) "_") 439;; presumably map should be used just for unification.
549;; (modify-syntax-entry (make-char 'korean-ksc5601 39) "_") 440;; (modify-syntax-entry (cons (make-char 'korean-ksc5601 34 33)
550;; (modify-syntax-entry (make-char 'korean-ksc5601 40) "_") 441;; (make-char 'korean-ksc5601 34 126))
551;; (modify-syntax-entry (make-char 'korean-ksc5601 41) "_") 442;; "_")
552 443;; (modify-syntax-entry (cons (make-char 'korean-ksc5601 38 33)
553;; (modify-category-entry (make-char 'korean-ksc5601) ?h) 444;; (make-char 'korean-ksc5601 38 126))
554;; (modify-category-entry (make-char 'korean-ksc5601 35) ?A) 445;; "_")
555;; (modify-category-entry (make-char 'korean-ksc5601 37) ?G) 446;; (modify-syntax-entry (cons (make-char 'korean-ksc5601 39 33)
556;; (modify-category-entry (make-char 'korean-ksc5601 42) ?H) 447;; (make-char 'korean-ksc5601 39 126))
557;; (modify-category-entry (make-char 'korean-ksc5601 43) ?K) 448;; "_")
558;; (modify-category-entry (make-char 'korean-ksc5601 44) ?Y) 449(modify-syntax-entry (cons (make-char 'korean-ksc5601 40 33)
450 (make-char 'korean-ksc5601 40 126))
451 "_")
452(modify-syntax-entry (cons (make-char 'korean-ksc5601 41 33)
453 (make-char 'korean-ksc5601 41 126))
454 "_")
455(modify-category-entry (cons (make-char 'korean-ksc5601 35 33)
456 (make-char 'korean-ksc5601 35 126))
457 ?A)
458;; (modify-category-entry (cons (make-char 'korean-ksc5601 37 33)
459;; (make-char 'korean-ksc5601 37 126))
460;; ?G)
461;; (modify-category-entry (cons (make-char 'korean-ksc5601 42 33)
462;; (make-char 'korean-ksc5601 42 126))
463;; ?H)
464;; (modify-category-entry (cons (make-char 'korean-ksc5601 43 33)
465;; (make-char 'korean-ksc5601 43 126))
466;; ?K)
467;; (modify-category-entry (cons (make-char 'korean-ksc5601 44 33)
468;; (make-char 'korean-ksc5601 44 126))
469;; ?Y)
559 470
560;; Latin 471;; Latin
561 472
@@ -700,8 +611,6 @@
700 (if lc (modify-category-entry lc ?v))) 611 (if lc (modify-category-entry lc ?v)))
701 (setq i (1+ i)))) 612 (setq i (1+ i))))
702 613
703;; Unicode (mule-unicode-0100-24ff)
704
705(let ((tbl (standard-case-table)) c) 614(let ((tbl (standard-case-table)) c)
706 615
707;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN 616;; In some languages, U+0049 LATIN CAPITAL LETTER I and U+0131 LATIN
@@ -950,12 +859,6 @@
950 (modify-category-entry (+ c #x20) ?l) 859 (modify-category-entry (+ c #x20) ?l)
951 (setq c (1+ c))) 860 (setq c (1+ c)))
952 861
953 ;; Ohm, Kelvin, Angstrom
954;;; (set-case-syntax-pair ?Ω ?ω tbl)
955;;; These mess up the case conversion of k and å.
956;;; (set-case-syntax-pair ?K ?k tbl)
957;;; (set-case-syntax-pair ?Å ?å tbl)
958
959 ;; Combining diacritics 862 ;; Combining diacritics
960 (modify-category-entry '(#x300 . #x362) ?^) 863 (modify-category-entry '(#x300 . #x362) ?^)
961 ;; Combining marks 864 ;; Combining marks
@@ -1087,6 +990,11 @@
1087 (lambda (range ignore) (set-char-table-range char-width-table range 2)) 990 (lambda (range ignore) (set-char-table-range char-width-table range 2))
1088 'arabic-2-column) 991 'arabic-2-column)
1089 992
993(optimize-char-table (standard-case-table))
994(optimize-char-table char-width-table)
995(optimize-char-table (standard-category-table))
996(optimize-char-table (standard-syntax-table))
997
1090;;; Local Variables: 998;;; Local Variables:
1091;;; coding: utf-8-emacs 999;;; coding: utf-8-emacs
1092;;; End: 1000;;; End: