diff options
| author | Dave Love | 2001-12-07 14:47:37 +0000 |
|---|---|---|
| committer | Dave Love | 2001-12-07 14:47:37 +0000 |
| commit | 269a5dd0adc17737ef2589084dc6ad1bb98de36a (patch) | |
| tree | 17f95f1ec901030d1eafd708cef056c65d589605 | |
| parent | aa2e3f49f337b3a14e208d1f2ddb34912730c535 (diff) | |
| download | emacs-269a5dd0adc17737ef2589084dc6ad1bb98de36a.tar.gz emacs-269a5dd0adc17737ef2589084dc6ad1bb98de36a.zip | |
Don't set word syntax (the default)
explicitly. Add a diacritic category. Add info for Unicode
equivalents of characters in various Mule charsets and for extra
Unicode characters. Don't define specific categories for
Indian/Devanagari, since they aren't used.
| -rw-r--r-- | lisp/international/characters.el | 554 |
1 files changed, 457 insertions, 97 deletions
diff --git a/lisp/international/characters.el b/lisp/international/characters.el index f578d7ff264..cf3c9f92041 100644 --- a/lisp/international/characters.el +++ b/lisp/international/characters.el | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | 2 | ||
| 3 | ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN. | 3 | ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN. |
| 4 | ;; Licensed to the Free Software Foundation. | 4 | ;; Licensed to the Free Software Foundation. |
| 5 | ;; Copyright (C) 2001 Free Software Foundation, Inc. | ||
| 5 | 6 | ||
| 6 | ;; Keywords: multibyte character, character set, syntax, category | 7 | ;; Keywords: multibyte character, character set, syntax, category |
| 7 | 8 | ||
| @@ -90,6 +91,8 @@ | |||
| 90 | (define-category ?> "A character which can't be placed at beginning of line.") | 91 | (define-category ?> "A character which can't be placed at beginning of line.") |
| 91 | (define-category ?< "A character which can't be placed at end of line.") | 92 | (define-category ?< "A character which can't be placed at end of line.") |
| 92 | 93 | ||
| 94 | ;; Combining | ||
| 95 | (define-category ?^ "Combining diacritic or mark") | ||
| 93 | 96 | ||
| 94 | ;;; Setting syntax and category. | 97 | ;;; Setting syntax and category. |
| 95 | 98 | ||
| @@ -108,13 +111,25 @@ | |||
| 108 | arabic-1-column | 111 | arabic-1-column |
| 109 | arabic-2-column))) | 112 | arabic-2-column))) |
| 110 | (while charsets | 113 | (while charsets |
| 111 | (modify-syntax-entry (make-char (car charsets)) "w") | 114 | ;; (modify-syntax-entry (make-char (car charsets)) "w") |
| 112 | (modify-category-entry (make-char (car charsets)) ?b) | 115 | (modify-category-entry (make-char (car charsets)) ?b) |
| 113 | (setq charsets (cdr charsets)))) | 116 | (setq charsets (cdr charsets)))) |
| 117 | (let ((ch #x600)) | ||
| 118 | (while (<= ch #x6ff) | ||
| 119 | (modify-category-entry (decode-char 'ucs ch) ?b) | ||
| 120 | (setq ch (1+ ch))) | ||
| 121 | (setq ch #xfb50) | ||
| 122 | (while (<= ch #xfdff) | ||
| 123 | (modify-category-entry (decode-char 'ucs ch) ?b) | ||
| 124 | (setq ch (1+ ch))) | ||
| 125 | (setq ch #xfe70) | ||
| 126 | (while (<= ch #xfefe) | ||
| 127 | (modify-category-entry (decode-char 'ucs ch) ?b) | ||
| 128 | (setq ch (1+ ch)))) | ||
| 114 | 129 | ||
| 115 | ;; Chinese character set (GB2312) | 130 | ;; Chinese character set (GB2312) |
| 116 | 131 | ||
| 117 | (modify-syntax-entry (make-char 'chinese-gb2312) "w") | 132 | ;; (modify-syntax-entry (make-char 'chinese-gb2312) "w") |
| 118 | (modify-syntax-entry (make-char 'chinese-gb2312 33) "_") | 133 | (modify-syntax-entry (make-char 'chinese-gb2312 33) "_") |
| 119 | (modify-syntax-entry (make-char 'chinese-gb2312 34) "_") | 134 | (modify-syntax-entry (make-char 'chinese-gb2312 34) "_") |
| 120 | (modify-syntax-entry (make-char 'chinese-gb2312 41) "_") | 135 | (modify-syntax-entry (make-char 'chinese-gb2312 41) "_") |
| @@ -132,6 +147,21 @@ | |||
| 132 | (modify-syntax-entry ?\$A!;(B ")$A!:(B") | 147 | (modify-syntax-entry ?\$A!;(B ")$A!:(B") |
| 133 | (modify-syntax-entry ?\$A!=(B ")$A!<(B") | 148 | (modify-syntax-entry ?\$A!=(B ")$A!<(B") |
| 134 | (modify-syntax-entry ?\$A!?(B ")$A!>(B") | 149 | (modify-syntax-entry ?\$A!?(B ")$A!>(B") |
| 150 | ;; Unicode equivalents of above | ||
| 151 | (modify-syntax-entry ?\$,2=T(B "($,2=U(B") | ||
| 152 | (modify-syntax-entry ?\$,2=H(B "($,2=I(B") | ||
| 153 | (modify-syntax-entry ?\$,2=J(B "($,2=K(B") | ||
| 154 | (modify-syntax-entry ?\$,2=L(B "($,2=M(B") | ||
| 155 | (modify-syntax-entry ?\$,2=N(B "($,2=O(B") | ||
| 156 | (modify-syntax-entry ?\$,2=V(B "($,2=W(B") | ||
| 157 | (modify-syntax-entry ?\$,2=P(B "($,2=Q(B") | ||
| 158 | (modify-syntax-entry ?\$,2=U(B ")$,2=T(B") | ||
| 159 | (modify-syntax-entry ?\$,2=I(B ")$,2=H(B") | ||
| 160 | (modify-syntax-entry ?\$,2=K(B ")$,2=J(B") | ||
| 161 | (modify-syntax-entry ?\$,2=M(B ")$,2=L(B") | ||
| 162 | (modify-syntax-entry ?\$,2=O(B ")$,2=N(B") | ||
| 163 | (modify-syntax-entry ?\$,2=W(B ")$,2=V(B") | ||
| 164 | (modify-syntax-entry ?\$,2=Q(B ")$,2=P(B") | ||
| 135 | 165 | ||
| 136 | (modify-category-entry (make-char 'chinese-gb2312) ?c) | 166 | (modify-category-entry (make-char 'chinese-gb2312) ?c) |
| 137 | (modify-category-entry (make-char 'chinese-gb2312) ?\|) | 167 | (modify-category-entry (make-char 'chinese-gb2312) ?\|) |
| @@ -149,8 +179,8 @@ | |||
| 149 | 179 | ||
| 150 | (let ((generic-big5-1-char (make-char 'chinese-big5-1)) | 180 | (let ((generic-big5-1-char (make-char 'chinese-big5-1)) |
| 151 | (generic-big5-2-char (make-char 'chinese-big5-2))) | 181 | (generic-big5-2-char (make-char 'chinese-big5-2))) |
| 152 | (modify-syntax-entry generic-big5-1-char "w") | 182 | ;; (modify-syntax-entry generic-big5-1-char "w") |
| 153 | (modify-syntax-entry generic-big5-2-char "w") | 183 | ;; (modify-syntax-entry generic-big5-2-char "w") |
| 154 | 184 | ||
| 155 | (modify-category-entry generic-big5-1-char ?c) | 185 | (modify-category-entry generic-big5-1-char ?c) |
| 156 | (modify-category-entry generic-big5-2-char ?c) | 186 | (modify-category-entry generic-big5-2-char ?c) |
| @@ -174,7 +204,7 @@ | |||
| 174 | generic-char) | 204 | generic-char) |
| 175 | (while cns-list | 205 | (while cns-list |
| 176 | (setq generic-char (make-char (car cns-list))) | 206 | (setq generic-char (make-char (car cns-list))) |
| 177 | (modify-syntax-entry generic-char "w") | 207 | ;; (modify-syntax-entry generic-char "w") |
| 178 | (modify-category-entry generic-char ?c) | 208 | (modify-category-entry generic-char ?c) |
| 179 | (modify-category-entry generic-char ?C) | 209 | (modify-category-entry generic-char ?C) |
| 180 | (modify-category-entry generic-char ?|) | 210 | (modify-category-entry generic-char ?|) |
| @@ -234,44 +264,103 @@ | |||
| 234 | (set-case-syntax-pair ?,LL(B ?,Ll(B tbl) | 264 | (set-case-syntax-pair ?,LL(B ?,Ll(B tbl) |
| 235 | (set-case-syntax-pair ?,LM(B ?,Lm(B tbl) | 265 | (set-case-syntax-pair ?,LM(B ?,Lm(B tbl) |
| 236 | (set-case-syntax-pair ?,LN(B ?,Ln(B tbl) | 266 | (set-case-syntax-pair ?,LN(B ?,Ln(B tbl) |
| 237 | (set-case-syntax-pair ?,LO(B ?,Lo(B tbl)) | 267 | (set-case-syntax-pair ?,LO(B ?,Lo(B tbl) |
| 268 | (set-case-syntax-pair ?$,1(!(B ?$,1(q(B tbl) | ||
| 269 | (set-case-syntax-pair ?$,1("(B ?$,1(r(B tbl) | ||
| 270 | (set-case-syntax-pair ?$,1(#(B ?$,1(s(B tbl) | ||
| 271 | (set-case-syntax-pair ?$,1($(B ?$,1(t(B tbl) | ||
| 272 | (set-case-syntax-pair ?$,1(%(B ?$,1(u(B tbl) | ||
| 273 | (set-case-syntax-pair ?$,1(&(B ?$,1(v(B tbl) | ||
| 274 | (set-case-syntax-pair ?$,1('(B ?$,1(w(B tbl) | ||
| 275 | (set-case-syntax-pair ?$,1(((B ?$,1(x(B tbl) | ||
| 276 | (set-case-syntax-pair ?$,1()(B ?$,1(y(B tbl) | ||
| 277 | (set-case-syntax-pair ?$,1(*(B ?$,1(z(B tbl) | ||
| 278 | (set-case-syntax-pair ?$,1(+(B ?$,1({(B tbl) | ||
| 279 | (set-case-syntax-pair ?$,1(,(B ?$,1(|(B tbl) | ||
| 280 | (set-case-syntax-pair ?$,1(.(B ?$,1(~(B tbl) | ||
| 281 | (set-case-syntax-pair ?$,1(/(B ?$,1((B tbl) | ||
| 282 | (set-case-syntax-pair ?$,1(0(B ?$,1(P(B tbl) | ||
| 283 | (set-case-syntax-pair ?$,1(1(B ?$,1(Q(B tbl) | ||
| 284 | (set-case-syntax-pair ?$,1(2(B ?$,1(R(B tbl) | ||
| 285 | (set-case-syntax-pair ?$,1(3(B ?$,1(S(B tbl) | ||
| 286 | (set-case-syntax-pair ?$,1(4(B ?$,1(T(B tbl) | ||
| 287 | (set-case-syntax-pair ?$,1(5(B ?$,1(U(B tbl) | ||
| 288 | (set-case-syntax-pair ?$,1(6(B ?$,1(V(B tbl) | ||
| 289 | (set-case-syntax-pair ?$,1(7(B ?$,1(W(B tbl) | ||
| 290 | (set-case-syntax-pair ?$,1(8(B ?$,1(X(B tbl) | ||
| 291 | (set-case-syntax-pair ?$,1(9(B ?$,1(Y(B tbl) | ||
| 292 | (set-case-syntax-pair ?$,1(:(B ?$,1(Z(B tbl) | ||
| 293 | (set-case-syntax-pair ?$,1(;(B ?$,1([(B tbl) | ||
| 294 | (set-case-syntax-pair ?$,1(<(B ?$,1(\(B tbl) | ||
| 295 | (set-case-syntax-pair ?$,1(=(B ?$,1(](B tbl) | ||
| 296 | (set-case-syntax-pair ?$,1(>(B ?$,1(^(B tbl) | ||
| 297 | (set-case-syntax-pair ?$,1(?(B ?$,1(_(B tbl) | ||
| 298 | (set-case-syntax-pair ?$,1(@(B ?$,1(`(B tbl) | ||
| 299 | (set-case-syntax-pair ?$,1(A(B ?$,1(a(B tbl) | ||
| 300 | (set-case-syntax-pair ?$,1(B(B ?$,1(b(B tbl) | ||
| 301 | (set-case-syntax-pair ?$,1(C(B ?$,1(c(B tbl) | ||
| 302 | (set-case-syntax-pair ?$,1(D(B ?$,1(d(B tbl) | ||
| 303 | (set-case-syntax-pair ?$,1(E(B ?$,1(e(B tbl) | ||
| 304 | (set-case-syntax-pair ?$,1(F(B ?$,1(f(B tbl) | ||
| 305 | (set-case-syntax-pair ?$,1(G(B ?$,1(g(B tbl) | ||
| 306 | (set-case-syntax-pair ?$,1(H(B ?$,1(h(B tbl) | ||
| 307 | (set-case-syntax-pair ?$,1(I(B ?$,1(i(B tbl) | ||
| 308 | (set-case-syntax-pair ?$,1(J(B ?$,1(j(B tbl) | ||
| 309 | (set-case-syntax-pair ?$,1(K(B ?$,1(k(B tbl) | ||
| 310 | (set-case-syntax-pair ?$,1(L(B ?$,1(l(B tbl) | ||
| 311 | (set-case-syntax-pair ?$,1(M(B ?$,1(m(B tbl) | ||
| 312 | (set-case-syntax-pair ?$,1(N(B ?$,1(n(B tbl) | ||
| 313 | (set-case-syntax-pair ?$,1(O(B ?$,1(o(B tbl)) | ||
| 238 | 314 | ||
| 239 | ;; Devanagari character set | 315 | ;; Devanagari character set |
| 240 | 316 | ||
| 241 | (let ((deflist '(;; chars syntax category | 317 | ;;; Commented out since the categories appear not to be used anywhere |
| 242 | ("$(5!!!"!#(B" "w" ?7) ; vowel-modifying diacritical mark | 318 | ;;; and word syntax is the default. |
| 243 | ; chandrabindu, anuswar, visarga | 319 | ;; (let ((deflist '(;; chars syntax category |
| 244 | ("$(5!$(B-$(5!2(B" "w" ?1) ; independent vowel | 320 | ;; ("$(5!!!"!#(B" "w" ?7) ; vowel-modifying diacritical mark |
| 245 | ("$(5!3(B-$(5!X(B" "w" ?0) ; consonant | 321 | ;; ; chandrabindu, anuswar, visarga |
| 246 | ("$(5!Z(B-$(5!g(B" "w" ?8) ; matra | 322 | ;; ("$(5!$(B-$(5!2(B" "w" ?1) ; independent vowel |
| 247 | ("$(5!q(B-$(5!z(B" "w" ?6) ; digit | 323 | ;; ("$(5!3(B-$(5!X(B" "w" ?0) ; consonant |
| 248 | )) | 324 | ;; ("$(5!Z(B-$(5!g(B" "w" ?8) ; matra |
| 249 | elm chars len syntax category to ch i) | 325 | ;; ("$(5!q(B-$(5!z(B" "w" ?6) ; digit |
| 250 | (while deflist | 326 | ;; ;; Unicode equivalents |
| 251 | (setq elm (car deflist)) | 327 | ;; ("$,15A5B5C(B" "w" ?7) ; vowel-modifying diacritical mark |
| 252 | (setq chars (car elm) | 328 | ;; ; chandrabindu, anuswar, visarga |
| 253 | len (length chars) | 329 | ;; ("$,15E(B-$,15M(B" "w" ?1) ; independent vowel |
| 254 | syntax (nth 1 elm) | 330 | ;; ("$,15U(B-$,15y(B" "w" ?0) ; consonant |
| 255 | category (nth 2 elm) | 331 | ;; ("$,15~(B-$,16)(B" "w" ?8) ; matra |
| 256 | i 0) | 332 | ;; ("$,16F(B-$,16O(B" "w" ?6) ; digit |
| 257 | (while (< i len) | 333 | ;; )) |
| 258 | (if (= (aref chars i) ?-) | 334 | ;; elm chars len syntax category to ch i) |
| 259 | (setq i (1+ i) | 335 | ;; (while deflist |
| 260 | to (aref chars i)) | 336 | ;; (setq elm (car deflist)) |
| 261 | (setq ch (aref chars i) | 337 | ;; (setq chars (car elm) |
| 262 | to ch)) | 338 | ;; len (length chars) |
| 263 | (while (<= ch to) | 339 | ;; syntax (nth 1 elm) |
| 264 | (modify-syntax-entry ch syntax) | 340 | ;; category (nth 2 elm) |
| 265 | (modify-category-entry ch category) | 341 | ;; i 0) |
| 266 | (setq ch (1+ ch))) | 342 | ;; (while (< i len) |
| 267 | (setq i (1+ i))) | 343 | ;; (if (= (aref chars i) ?-) |
| 268 | (setq deflist (cdr deflist)))) | 344 | ;; (setq i (1+ i) |
| 345 | ;; to (aref chars i)) | ||
| 346 | ;; (setq ch (aref chars i) | ||
| 347 | ;; to ch)) | ||
| 348 | ;; (while (<= ch to) | ||
| 349 | ;; (modify-syntax-entry ch syntax) | ||
| 350 | ;; (modify-category-entry ch category) | ||
| 351 | ;; (setq ch (1+ ch))) | ||
| 352 | ;; (setq i (1+ i))) | ||
| 353 | ;; (setq deflist (cdr deflist)))) | ||
| 269 | 354 | ||
| 270 | ;; Ethiopic character set | 355 | ;; Ethiopic character set |
| 271 | 356 | ||
| 272 | (modify-category-entry (make-char 'ethiopic) ?e) | 357 | (modify-category-entry (make-char 'ethiopic) ?e) |
| 273 | (modify-syntax-entry (make-char 'ethiopic) "w") | 358 | ;; (modify-syntax-entry (make-char 'ethiopic) "w") |
| 274 | (let ((chars '(?$(3$h(B ?$(3$i(B ?$(3$j(B ?$(3$k(B ?$(3$l(B ?$(3$m(B ?$(3$n(B ?$(3$o(B ?$(3%i(B ?$(3%t(B ?$(3%u(B ?$(3%v(B ?$(3%w(B ?$(3%x(B))) | 359 | (dotimes (i (1+ (- #x137c #x1200))) |
| 360 | (modify-category-entry (decode-char 'ucs (+ #x1200 i)) ?e)) | ||
| 361 | (let ((chars '(?$(3$h(B ?$(3$i(B ?$(3$j(B ?$(3$k(B ?$(3$l(B ?$(3$m(B ?$(3$n(B ?$(3$o(B ?$(3%i(B ?$(3%t(B ?$(3%u(B ?$(3%v(B ?$(3%w(B ?$(3%x(B | ||
| 362 | ;; Unicode equivalents of the above: | ||
| 363 | ?$,1Q!(B ?$,1Q"(B ?$,1Q#(B ?$,1Q$(B ?$,1Q%(B ?$,1Q&(B ?$,1Q'(B ?$,1Q((B ?$,3op(B ?$,3o{(B ?$,3o|(B ?$,3o}(B ?$,3o~(B ?$,3o(B))) | ||
| 275 | (while chars | 364 | (while chars |
| 276 | (modify-syntax-entry (car chars) ".") | 365 | (modify-syntax-entry (car chars) ".") |
| 277 | (setq chars (cdr chars)))) | 366 | (setq chars (cdr chars)))) |
| @@ -279,16 +368,36 @@ | |||
| 279 | ;; Greek character set (ISO-8859-7) | 368 | ;; Greek character set (ISO-8859-7) |
| 280 | 369 | ||
| 281 | (modify-category-entry (make-char 'greek-iso8859-7) ?g) | 370 | (modify-category-entry (make-char 'greek-iso8859-7) ?g) |
| 282 | 371 | (let ((c #x370)) | |
| 283 | (let ((c 182)) | 372 | (while (<= c #x3ff) |
| 284 | (while (< c 255) | 373 | (modify-category-entry (decode-char 'ucs c) ?g) |
| 285 | (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w") | ||
| 286 | (setq c (1+ c)))) | 374 | (setq c (1+ c)))) |
| 287 | (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP | 375 | |
| 376 | ;; (let ((c 182)) | ||
| 377 | ;; (while (< c 255) | ||
| 378 | ;; (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w") | ||
| 379 | ;; (setq c (1+ c)))) | ||
| 380 | ;; (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP | ||
| 288 | (modify-syntax-entry ?,F7(B ".") | 381 | (modify-syntax-entry ?,F7(B ".") |
| 289 | (modify-syntax-entry ?,F;(B ".") | 382 | (modify-syntax-entry ?,F;(B ".") |
| 290 | (modify-syntax-entry ?,F=(B ".") | 383 | (modify-syntax-entry ?,F=(B ".") |
| 291 | (let ((tbl (standard-case-table))) | 384 | (let ((tbl (standard-case-table))) |
| 385 | ;; Fixme: non-letter syntax copied from latin-1, but that's dubious | ||
| 386 | ;; in several cases. | ||
| 387 | (set-case-syntax ?,F!(B "." tbl) | ||
| 388 | (set-case-syntax ?,F"(B "." tbl) | ||
| 389 | (set-case-syntax ?,F&(B "." tbl) | ||
| 390 | (set-case-syntax ?,F&(B "_" tbl) | ||
| 391 | (set-case-syntax ?,F'(B "." tbl) | ||
| 392 | (set-case-syntax ?,F)(B "_" tbl) | ||
| 393 | (set-case-syntax ?,F+(B "." tbl) | ||
| 394 | (set-case-syntax ?,F,(B "_" tbl) | ||
| 395 | (set-case-syntax ?,F-(B "_" tbl) | ||
| 396 | (set-case-syntax ?,F/(B "." tbl) | ||
| 397 | (set-case-syntax ?,F0(B "_" tbl) | ||
| 398 | (set-case-syntax ?,F1(B "_" tbl) | ||
| 399 | ;; (set-case-syntax ?,F7(B "_" tbl) | ||
| 400 | ;; (set-case-syntax ?,F=(B "_" tbl) | ||
| 292 | (set-case-syntax-pair ?,FA(B ?,Fa(B tbl) | 401 | (set-case-syntax-pair ?,FA(B ?,Fa(B tbl) |
| 293 | (set-case-syntax-pair ?,FB(B ?,Fb(B tbl) | 402 | (set-case-syntax-pair ?,FB(B ?,Fb(B tbl) |
| 294 | (set-case-syntax-pair ?,FC(B ?,Fc(B tbl) | 403 | (set-case-syntax-pair ?,FC(B ?,Fc(B tbl) |
| @@ -321,53 +430,94 @@ | |||
| 321 | (set-case-syntax-pair ?,F6(B ?,F\(B tbl) | 430 | (set-case-syntax-pair ?,F6(B ?,F\(B tbl) |
| 322 | (set-case-syntax-pair ?,F8(B ?,F](B tbl) | 431 | (set-case-syntax-pair ?,F8(B ?,F](B tbl) |
| 323 | (set-case-syntax-pair ?,F9(B ?,F^(B tbl) | 432 | (set-case-syntax-pair ?,F9(B ?,F^(B tbl) |
| 324 | (set-case-syntax-pair ?,F:(B ?,F_(B tbl)) | 433 | (set-case-syntax-pair ?,F:(B ?,F_(B tbl) |
| 434 | ;; Unicode equivalents | ||
| 435 | (set-case-syntax-pair ?$,1&q(B ?$,1'1(B tbl) | ||
| 436 | (set-case-syntax-pair ?$,1&r(B ?$,1'2(B tbl) | ||
| 437 | (set-case-syntax-pair ?$,1&s(B ?$,1'3(B tbl) | ||
| 438 | (set-case-syntax-pair ?$,1&t(B ?$,1'4(B tbl) | ||
| 439 | (set-case-syntax-pair ?$,1&u(B ?$,1'5(B tbl) | ||
| 440 | (set-case-syntax-pair ?$,1&v(B ?$,1'6(B tbl) | ||
| 441 | (set-case-syntax-pair ?$,1&w(B ?$,1'7(B tbl) | ||
| 442 | (set-case-syntax-pair ?$,1&x(B ?$,1'8(B tbl) | ||
| 443 | (set-case-syntax-pair ?$,1&y(B ?$,1'9(B tbl) | ||
| 444 | (set-case-syntax-pair ?$,1&z(B ?$,1':(B tbl) | ||
| 445 | (set-case-syntax-pair ?$,1&{(B ?$,1';(B tbl) | ||
| 446 | (set-case-syntax-pair ?$,1&|(B ?$,1'<(B tbl) | ||
| 447 | (set-case-syntax-pair ?$,1&}(B ?$,1'=(B tbl) | ||
| 448 | (set-case-syntax-pair ?$,1&~(B ?$,1'>(B tbl) | ||
| 449 | (set-case-syntax-pair ?$,1&(B ?$,1'?(B tbl) | ||
| 450 | (set-case-syntax-pair ?$,1' (B ?$,1'@(B tbl) | ||
| 451 | (set-case-syntax-pair ?$,1'!(B ?$,1'A(B tbl) | ||
| 452 | (set-case-syntax-pair ?$,1'#(B ?$,1'C(B tbl) | ||
| 453 | (set-case-syntax-pair ?$,1'$(B ?$,1'D(B tbl) | ||
| 454 | (set-case-syntax-pair ?$,1'%(B ?$,1'E(B tbl) | ||
| 455 | (set-case-syntax-pair ?$,1'&(B ?$,1'F(B tbl) | ||
| 456 | (set-case-syntax-pair ?$,1''(B ?$,1'G(B tbl) | ||
| 457 | (set-case-syntax-pair ?$,1'((B ?$,1'H(B tbl) | ||
| 458 | (set-case-syntax-pair ?$,1')(B ?$,1'I(B tbl) | ||
| 459 | (set-case-syntax-pair ?$,1'*(B ?$,1'J(B tbl) | ||
| 460 | (set-case-syntax-pair ?$,1'+(B ?$,1'K(B tbl) | ||
| 461 | (set-case-syntax-pair ?$,1&o(B ?$,1'N(B tbl) | ||
| 462 | (set-case-syntax-pair ?$,1&n(B ?$,1'M(B tbl) | ||
| 463 | (set-case-syntax-pair ?$,1&l(B ?$,1'L(B tbl) | ||
| 464 | (set-case-syntax-pair ?$,1&f(B ?$,1',(B tbl) | ||
| 465 | (set-case-syntax-pair ?$,1&h(B ?$,1'-(B tbl) | ||
| 466 | (set-case-syntax-pair ?$,1&i(B ?$,1'.(B tbl) | ||
| 467 | (set-case-syntax-pair ?$,1&j(B ?$,1'/(B tbl)) | ||
| 325 | 468 | ||
| 326 | ;; Hebrew character set (ISO-8859-8) | 469 | ;; Hebrew character set (ISO-8859-8) |
| 327 | 470 | ||
| 328 | (modify-category-entry (make-char 'hebrew-iso8859-8) ?w) | 471 | ;; (modify-category-entry (make-char 'hebrew-iso8859-8) ?w) |
| 329 | 472 | ||
| 330 | (let ((c 224)) | 473 | ;; (let ((c 224)) |
| 331 | (while (< c 251) | 474 | ;; (while (< c 251) |
| 332 | (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w") | 475 | ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w") |
| 333 | (setq c (1+ c)))) | 476 | ;; (setq c (1+ c)))) |
| 334 | (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP | 477 | ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP |
| 335 | 478 | ||
| 336 | ;; Indian character set (IS 13194 and other Emacs original Indian charsets) | 479 | ;; Indian character set (IS 13194 and other Emacs original Indian charsets) |
| 337 | 480 | ||
| 338 | (modify-category-entry (make-char 'indian-is13194) ?i) | 481 | (modify-category-entry (make-char 'indian-is13194) ?i) |
| 339 | (modify-category-entry (make-char 'indian-2-column) ?I) | 482 | (modify-category-entry (make-char 'indian-2-column) ?I) |
| 340 | (modify-category-entry (make-char 'indian-glyph) ?I) | 483 | (modify-category-entry (make-char 'indian-glyph) ?I) |
| 484 | ;; Unicode Devanagari block | ||
| 485 | (let ((c #x901)) | ||
| 486 | (while (<= c #x970) | ||
| 487 | (modify-category-entry (decode-char 'ucs c) ?i) | ||
| 488 | (setq c (1+ c)))) | ||
| 341 | 489 | ||
| 342 | (let ((deflist | 490 | ;;; Commented out since the categories appear not to be used anywhere |
| 343 | '(;; chars syntax category | 491 | ;;; and word syntax is the default. |
| 344 | ("(5!"#(B" "w" ?7) ; vowel-modifying diacritical mark | 492 | ;; (let ((deflist ; |
| 345 | ; chandrabindu, anuswar, visarga | 493 | ;; '(;; chars syntax category |
| 346 | ("(5$(B-(52(B" "w" ?1) ; base (independent) vowel | 494 | ;; ("(5!"#(B" "w" ?7) ; vowel-modifying diacritical mark |
| 347 | ("(53(B-(5X(B" "w" ?0) ; consonant | 495 | ;; ; chandrabindu, anuswar, visarga |
| 348 | ("(5Z(B-(5g(B" "w" ?8) ; matra | 496 | ;; ("(5$(B-(52(B" "w" ?1) ; base (independent) vowel |
| 349 | ("(5q(B-(5z(B" "w" ?6) ; digit | 497 | ;; ("(53(B-(5X(B" "w" ?0) ; consonant |
| 350 | )) | 498 | ;; ("(5Z(B-(5g(B" "w" ?8) ; matra |
| 351 | elm chars len syntax category to ch i) | 499 | ;; ("(5q(B-(5z(B" "w" ?6) ; digit |
| 352 | (while deflist | 500 | ;; )) |
| 353 | (setq elm (car deflist)) | 501 | ;; elm chars len syntax category to ch i) |
| 354 | (setq chars (car elm) | 502 | ;; (while deflist |
| 355 | len (length chars) | 503 | ;; (setq elm (car deflist)) |
| 356 | syntax (nth 1 elm) | 504 | ;; (setq chars (car elm) |
| 357 | category (nth 2 elm) | 505 | ;; len (length chars) |
| 358 | i 0) | 506 | ;; syntax (nth 1 elm) |
| 359 | (while (< i len) | 507 | ;; category (nth 2 elm) |
| 360 | (if (= (aref chars i) ?-) | 508 | ;; i 0) |
| 361 | (setq i (1+ i) | 509 | ;; (while (< i len) |
| 362 | to (aref chars i)) | 510 | ;; (if (= (aref chars i) ?-) |
| 363 | (setq ch (aref chars i) | 511 | ;; (setq i (1+ i) |
| 364 | to ch)) | 512 | ;; to (aref chars i)) |
| 365 | (while (<= ch to) | 513 | ;; (setq ch (aref chars i) |
| 366 | (modify-syntax-entry ch syntax) | 514 | ;; to ch)) |
| 367 | (modify-category-entry ch category) | 515 | ;; (while (<= ch to) |
| 368 | (setq ch (1+ ch))) | 516 | ;; (modify-syntax-entry ch syntax) |
| 369 | (setq i (1+ i))) | 517 | ;; (modify-category-entry ch category) |
| 370 | (setq deflist (cdr deflist)))) | 518 | ;; (setq ch (1+ ch))) |
| 519 | ;; (setq i (1+ i))) | ||
| 520 | ;; (setq deflist (cdr deflist)))) | ||
| 371 | 521 | ||
| 372 | 522 | ||
| 373 | ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212) | 523 | ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212) |
| @@ -381,15 +531,41 @@ | |||
| 381 | (modify-category-entry (make-char 'japanese-jisx0208) ?\|) | 531 | (modify-category-entry (make-char 'japanese-jisx0208) ?\|) |
| 382 | (modify-category-entry (make-char 'japanese-jisx0212) ?\|) | 532 | (modify-category-entry (make-char 'japanese-jisx0212) ?\|) |
| 383 | 533 | ||
| 534 | ;; Unicode equivalents of JISX0201-kana | ||
| 535 | (let ((c #xff61)) | ||
| 536 | (while (<= c #xff9f) | ||
| 537 | (modify-category-entry (decode-char 'ucs c) ?k) | ||
| 538 | (modify-category-entry (decode-char 'ucs c) ?j) | ||
| 539 | (modify-category-entry (decode-char 'ucs c) ?\|) | ||
| 540 | (setq c (1+ c)))) | ||
| 541 | |||
| 542 | ;; Katakana block | ||
| 543 | (let ((c #x30a0)) | ||
| 544 | (while (<= c #x30ff) | ||
| 545 | ;; ?K is double width, ?k isn't specified | ||
| 546 | (modify-category-entry (decode-char 'ucs c) ?k) | ||
| 547 | (modify-category-entry (decode-char 'ucs c) ?j) | ||
| 548 | (modify-category-entry (decode-char 'ucs c) ?\|) | ||
| 549 | (setq c (1+ c)))) | ||
| 550 | |||
| 551 | ;; Hiragana block | ||
| 552 | (let ((c #x3040)) | ||
| 553 | (while (<= c #x309f) | ||
| 554 | ;; ?H is actually defined to be double width | ||
| 555 | (modify-category-entry (decode-char 'ucs c) ?H) | ||
| 556 | ;;(modify-category-entry (decode-char 'ucs c) ?j) | ||
| 557 | (modify-category-entry (decode-char 'ucs c) ?\|) | ||
| 558 | (setq c (1+ c)))) | ||
| 559 | |||
| 384 | ;; JISX0208 | 560 | ;; JISX0208 |
| 385 | (modify-syntax-entry (make-char 'japanese-jisx0208) "w") | 561 | ;; (modify-syntax-entry (make-char 'japanese-jisx0208) "w") |
| 386 | (modify-syntax-entry (make-char 'japanese-jisx0208 33) "_") | 562 | (modify-syntax-entry (make-char 'japanese-jisx0208 33) "_") |
| 387 | (modify-syntax-entry (make-char 'japanese-jisx0208 34) "_") | 563 | (modify-syntax-entry (make-char 'japanese-jisx0208 34) "_") |
| 388 | (modify-syntax-entry (make-char 'japanese-jisx0208 40) "_") | 564 | (modify-syntax-entry (make-char 'japanese-jisx0208 40) "_") |
| 389 | (let ((chars '(?$B!<(B ?$B!+(B ?$B!,(B ?$B!3(B ?$B!4(B ?$B!5(B ?$B!6(B ?$B!7(B ?$B!8(B ?$B!9(B ?$B!:(B ?$B!;(B))) | 565 | ;; (let ((chars '(?$B!<(B ?$B!+(B ?$B!,(B ?$B!3(B ?$B!4(B ?$B!5(B ?$B!6(B ?$B!7(B ?$B!8(B ?$B!9(B ?$B!:(B ?$B!;(B))) |
| 390 | (while chars | 566 | ;; (while chars |
| 391 | (modify-syntax-entry (car chars) "w") | 567 | ;; (modify-syntax-entry (car chars) "w") |
| 392 | (setq chars (cdr chars)))) | 568 | ;; (setq chars (cdr chars)))) |
| 393 | (modify-syntax-entry ?\$B!J(B "($B!K(B") | 569 | (modify-syntax-entry ?\$B!J(B "($B!K(B") |
| 394 | (modify-syntax-entry ?\$B!N(B "($B!O(B") | 570 | (modify-syntax-entry ?\$B!N(B "($B!O(B") |
| 395 | (modify-syntax-entry ?\$B!P(B "($B!Q(B") | 571 | (modify-syntax-entry ?\$B!P(B "($B!Q(B") |
| @@ -422,7 +598,7 @@ | |||
| 422 | (setq chars (cdr chars)))) | 598 | (setq chars (cdr chars)))) |
| 423 | 599 | ||
| 424 | ;; JISX0212 | 600 | ;; JISX0212 |
| 425 | (modify-syntax-entry (make-char 'japanese-jisx0212) "w") | 601 | ;; (modify-syntax-entry (make-char 'japanese-jisx0212) "w") |
| 426 | (modify-syntax-entry (make-char 'japanese-jisx0212 33) "_") | 602 | (modify-syntax-entry (make-char 'japanese-jisx0212 33) "_") |
| 427 | (modify-syntax-entry (make-char 'japanese-jisx0212 34) "_") | 603 | (modify-syntax-entry (make-char 'japanese-jisx0212 34) "_") |
| 428 | (modify-syntax-entry (make-char 'japanese-jisx0212 35) "_") | 604 | (modify-syntax-entry (make-char 'japanese-jisx0212 35) "_") |
| @@ -430,8 +606,10 @@ | |||
| 430 | (modify-category-entry (make-char 'japanese-jisx0212 ) ?C) | 606 | (modify-category-entry (make-char 'japanese-jisx0212 ) ?C) |
| 431 | 607 | ||
| 432 | ;; JISX0201-Kana | 608 | ;; JISX0201-Kana |
| 433 | (modify-syntax-entry (make-char 'katakana-jisx0201) "w") | 609 | ;; (modify-syntax-entry (make-char 'katakana-jisx0201) "w") |
| 434 | (let ((chars '(?(I!(B ?(I$(B ?(I%(B))) | 610 | (let ((chars '(?(I!(B ?(I$(B ?(I%(B |
| 611 | ;; Unicode: | ||
| 612 | ?$,3sa(B ?$,3sd(B ?$,3se(B))) | ||
| 435 | (while chars | 613 | (while chars |
| 436 | (modify-syntax-entry (car chars) ".") | 614 | (modify-syntax-entry (car chars) ".") |
| 437 | (setq chars (cdr chars)))) | 615 | (setq chars (cdr chars)))) |
| @@ -441,7 +619,7 @@ | |||
| 441 | 619 | ||
| 442 | ;; Korean character set (KSC5601) | 620 | ;; Korean character set (KSC5601) |
| 443 | 621 | ||
| 444 | (modify-syntax-entry (make-char 'korean-ksc5601) "w") | 622 | ;; (modify-syntax-entry (make-char 'korean-ksc5601) "w") |
| 445 | (modify-syntax-entry (make-char 'korean-ksc5601 33) "_") | 623 | (modify-syntax-entry (make-char 'korean-ksc5601 33) "_") |
| 446 | (modify-syntax-entry (make-char 'korean-ksc5601 34) "_") | 624 | (modify-syntax-entry (make-char 'korean-ksc5601 34) "_") |
| 447 | (modify-syntax-entry (make-char 'korean-ksc5601 38) "_") | 625 | (modify-syntax-entry (make-char 'korean-ksc5601 38) "_") |
| @@ -477,6 +655,8 @@ | |||
| 477 | ;; Lao character set | 655 | ;; Lao character set |
| 478 | 656 | ||
| 479 | (modify-category-entry (make-char 'lao) ?o) | 657 | (modify-category-entry (make-char 'lao) ?o) |
| 658 | (dotimes (i (1+ (- #xeff #xe80))) | ||
| 659 | (modify-category-entry (decode-char 'ucs (+ i #xe80)) ?o)) | ||
| 480 | 660 | ||
| 481 | (let ((deflist '(;; chars syntax category | 661 | (let ((deflist '(;; chars syntax category |
| 482 | ("(1!(B-(1N(B" "w" ?0) ; consonant | 662 | ("(1!(B-(1N(B" "w" ?0) ; consonant |
| @@ -487,6 +667,15 @@ | |||
| 487 | ("(1\(B" "w" ?9) ; semivowel lower | 667 | ("(1\(B" "w" ?9) ; semivowel lower |
| 488 | ("(1p(B-(1y(B" "w" ?6) ; digit | 668 | ("(1p(B-(1y(B" "w" ?6) ; digit |
| 489 | ("(1Of(B" "_" ?5) ; symbol | 669 | ("(1Of(B" "_" ?5) ; symbol |
| 670 | ;; Unicode equivalents | ||
| 671 | ("$,1D!(B-$,1DN(B" "w" ?0) ; consonant | ||
| 672 | ("$,1DPDRDSD]D`(B-$,1Dd(B" "w" ?1) ; vowel base | ||
| 673 | ("$,1DQDT(B-$,1DWD[Dm(B" "w" ?2) ; vowel upper | ||
| 674 | ("$,1DXDY(B" "w" ?3) ; vowel lower | ||
| 675 | ("$,1Dh(B-$,1Dk(B" "w" ?4) ; tone mark | ||
| 676 | ("$,1D\D](B" "w" ?9) ; semivowel lower | ||
| 677 | ("$,1Dp(B-$,1Dy(B" "w" ?6) ; digit | ||
| 678 | ("$,1DODf(B" "_" ?5) ; symbol | ||
| 490 | )) | 679 | )) |
| 491 | elm chars len syntax category to ch i) | 680 | elm chars len syntax category to ch i) |
| 492 | (while deflist | 681 | (while deflist |
| @@ -503,7 +692,8 @@ | |||
| 503 | (setq ch (aref chars i) | 692 | (setq ch (aref chars i) |
| 504 | to ch)) | 693 | to ch)) |
| 505 | (while (<= ch to) | 694 | (while (<= ch to) |
| 506 | (modify-syntax-entry ch syntax) | 695 | (unless (string-equal syntax "w") |
| 696 | (modify-syntax-entry ch syntax)) | ||
| 507 | (modify-category-entry ch category) | 697 | (modify-category-entry ch category) |
| 508 | (setq ch (1+ ch))) | 698 | (setq ch (1+ ch))) |
| 509 | (setq i (1+ i))) | 699 | (setq i (1+ i))) |
| @@ -512,6 +702,8 @@ | |||
| 512 | ;; Thai character set (TIS620) | 702 | ;; Thai character set (TIS620) |
| 513 | 703 | ||
| 514 | (modify-category-entry (make-char 'thai-tis620) ?t) | 704 | (modify-category-entry (make-char 'thai-tis620) ?t) |
| 705 | (dotimes (i (1+ (- #xe7f #xe00))) | ||
| 706 | (modify-category-entry (decode-char 'ucs (+ i #xe00)) ?t)) | ||
| 515 | 707 | ||
| 516 | (let ((deflist '(;; chars syntax category | 708 | (let ((deflist '(;; chars syntax category |
| 517 | (",T!(B-,TCEG(B-,TN(B" "w" ?0) ; consonant | 709 | (",T!(B-,TCEG(B-,TN(B" "w" ?0) ; consonant |
| @@ -521,6 +713,14 @@ | |||
| 521 | (",Th(B-,Tm(B" "w" ?4) ; tone mark | 713 | (",Th(B-,Tm(B" "w" ?4) ; tone mark |
| 522 | (",Tp(B-,Ty(B" "w" ?6) ; digit | 714 | (",Tp(B-,Ty(B" "w" ?6) ; digit |
| 523 | (",TOf_oz{(B" "_" ?5) ; symbol | 715 | (",TOf_oz{(B" "_" ?5) ; symbol |
| 716 | ;; Unicode equivalents | ||
| 717 | ("$,1Ba(B-$,1C#C%C'(B-$,1C.(B" "w" ?0) ; consonant | ||
| 718 | ("$,1C$C&C0C2C3C@(B-$,1CE(B" "w" ?1) ; vowel base | ||
| 719 | ("$,1C1C4(B-$,1C7CGCN(B" "w" ?2) ; vowel upper | ||
| 720 | ("$,1C8(B-$,1C:(B" "w" ?3) ; vowel lower | ||
| 721 | ("$,1CH(B-$,1CM(B" "w" ?4) ; tone mark | ||
| 722 | ("$,1CP(B-$,1CY(B" "w" ?6) ; digit | ||
| 723 | ("$,1C/CFC?COCZC[(B" "_" ?5) ; symbol | ||
| 524 | )) | 724 | )) |
| 525 | elm chars len syntax category to ch i) | 725 | elm chars len syntax category to ch i) |
| 526 | (while deflist | 726 | (while deflist |
| @@ -537,7 +737,8 @@ | |||
| 537 | (setq ch (aref chars i) | 737 | (setq ch (aref chars i) |
| 538 | to ch)) | 738 | to ch)) |
| 539 | (while (<= ch to) | 739 | (while (<= ch to) |
| 540 | (modify-syntax-entry ch syntax) | 740 | (unless (string-equal syntax "w") |
| 741 | (modify-syntax-entry ch syntax)) | ||
| 541 | (modify-category-entry ch category) | 742 | (modify-category-entry ch category) |
| 542 | (setq ch (1+ ch))) | 743 | (setq ch (1+ ch))) |
| 543 | (setq i (1+ i))) | 744 | (setq i (1+ i))) |
| @@ -547,9 +748,11 @@ | |||
| 547 | 748 | ||
| 548 | (modify-category-entry (make-char 'tibetan) ?q) | 749 | (modify-category-entry (make-char 'tibetan) ?q) |
| 549 | (modify-category-entry (make-char 'tibetan-1-column) ?q) | 750 | (modify-category-entry (make-char 'tibetan-1-column) ?q) |
| 751 | (dotimes (i (1+ (- #xfff #xf00))) | ||
| 752 | (modify-category-entry (decode-char 'ucs (+ i #xf00)) ?q)) | ||
| 550 | 753 | ||
| 551 | (let ((deflist '(;; chars syntax category | 754 | (let ((deflist '(;; chars syntax category |
| 552 | ("$(7"!(B-$(7"J"K(B" "w" ?0) ; consonant | 755 | ("4$(7"!0"!1(B-4$(7"J0"J14"K0"K1(B" "w" ?0) ; consonant |
| 553 | ("$(7#!(B-$(7#J#K#L#M!"!#(B" "w" ?0) ; | 756 | ("$(7#!(B-$(7#J#K#L#M!"!#(B" "w" ?0) ; |
| 554 | ("$(7$!(B-$(7$e(B" "w" ?0) ; | 757 | ("$(7$!(B-$(7$e(B" "w" ?0) ; |
| 555 | ("$(7%!(B-$(7%u(B" "w" ?0) ; | 758 | ("$(7%!(B-$(7%u(B" "w" ?0) ; |
| @@ -563,6 +766,18 @@ | |||
| 563 | ("$(8!;!=!?!@!A!D"`(B" "." ?>) ; | 766 | ("$(8!;!=!?!@!A!D"`(B" "." ?>) ; |
| 564 | ("$(7!0(B-$(7!:!l#R#S"f(B" "." ?<) ; prohibition | 767 | ("$(7!0(B-$(7!:!l#R#S"f(B" "." ?<) ; prohibition |
| 565 | ("$(7!C!E(B-$(7!H!J(B-$(7!O!f!h(B-$(7!k!n!o#O#P(B-$(7#`(B" "." ?q) ; others | 768 | ("$(7!C!E(B-$(7!H!J(B-$(7!O!f!h(B-$(7!k!n!o#O#P(B-$(7#`(B" "." ?q) ; others |
| 769 | |||
| 770 | ;; Unicode version (not complete) | ||
| 771 | ("$,1F (B-$,1FIFJ(B" "w" ?0) ; consonant | ||
| 772 | ("$,1Fp(B-$,1G9G:G;G<(B" "w" ?0) ; | ||
| 773 | ("$,1FRFZF[F\F]F`(B" "w" ?2) ; upper vowel | ||
| 774 | ("$,1F^FbFcFfFgFhFiFjFk(B" "w" ?2) ; upper modifier | ||
| 775 | ("$,1EYFPFQFTFdEuEw(B" "w" ?3) ; lowel vowel/modifier | ||
| 776 | ("$,1E`(B-$,1EiEj(B-$,1Es(B" "w" ?6) ; digit | ||
| 777 | ("$,1EKEM(B-$,1ERETF_(B" "." ?|) ; line-break char | ||
| 778 | ("$,1EHEKEM(B-$,1ERETF_E}Et(B" "." ?>) ; prohibition | ||
| 779 | ("$,1E@(B-$,1EJE|GAGBFe(B" "." ?<) ; prohibition | ||
| 780 | ("$,1ESEU(B-$,1EXEZ(B-$,1E_EvEx(B-$,1E{E~EG>G?(B-$,1GO(B" "." ?q) ; others | ||
| 566 | )) | 781 | )) |
| 567 | elm chars len syntax category to ch i) | 782 | elm chars len syntax category to ch i) |
| 568 | (while deflist | 783 | (while deflist |
| @@ -579,7 +794,8 @@ | |||
| 579 | (setq ch (aref chars i) | 794 | (setq ch (aref chars i) |
| 580 | to ch)) | 795 | to ch)) |
| 581 | (while (<= ch to) | 796 | (while (<= ch to) |
| 582 | (modify-syntax-entry ch syntax) | 797 | (unless (string-equal syntax "w") |
| 798 | (modify-syntax-entry ch syntax)) | ||
| 583 | (modify-category-entry ch category) | 799 | (modify-category-entry ch category) |
| 584 | (setq ch (1+ ch))) | 800 | (setq ch (1+ ch))) |
| 585 | (setq i (1+ i))) | 801 | (setq i (1+ i))) |
| @@ -589,8 +805,8 @@ | |||
| 589 | 805 | ||
| 590 | (let ((lower (make-char 'vietnamese-viscii-lower)) | 806 | (let ((lower (make-char 'vietnamese-viscii-lower)) |
| 591 | (upper (make-char 'vietnamese-viscii-upper))) | 807 | (upper (make-char 'vietnamese-viscii-upper))) |
| 592 | (modify-syntax-entry lower "w") | 808 | ;; (modify-syntax-entry lower "w") |
| 593 | (modify-syntax-entry upper "w") | 809 | ;; (modify-syntax-entry upper "w") |
| 594 | (modify-category-entry lower ?v) | 810 | (modify-category-entry lower ?v) |
| 595 | (modify-category-entry upper ?v) | 811 | (modify-category-entry upper ?v) |
| 596 | (modify-category-entry lower ?l) ; To make a word with | 812 | (modify-category-entry lower ?l) ; To make a word with |
| @@ -615,8 +831,6 @@ | |||
| 615 | ;; Thus we have to check language-environment to handle casing | 831 | ;; Thus we have to check language-environment to handle casing |
| 616 | ;; correctly. Currently only I<->i is available. | 832 | ;; correctly. Currently only I<->i is available. |
| 617 | 833 | ||
| 618 | ;; case-syntax-pair's are not yet given for Latin Extendet-B | ||
| 619 | |||
| 620 | ;; Latin Extended-A, Latin Extended-B | 834 | ;; Latin Extended-A, Latin Extended-B |
| 621 | (setq c #x0100) | 835 | (setq c #x0100) |
| 622 | (while (<= c #x0233) | 836 | (while (<= c #x0233) |
| @@ -640,6 +854,102 @@ | |||
| 640 | (set-case-syntax-pair ?$,1!;(B ?$,1!<(B tbl) | 854 | (set-case-syntax-pair ?$,1!;(B ?$,1!<(B tbl) |
| 641 | (set-case-syntax-pair ?$,1!=(B ?$,1!>(B tbl) | 855 | (set-case-syntax-pair ?$,1!=(B ?$,1!>(B tbl) |
| 642 | 856 | ||
| 857 | ;; Latin Extended-B | ||
| 858 | (set-case-syntax-pair ?$,1!A(B ?$,1#S(B tbl) | ||
| 859 | (set-case-syntax-pair ?$,1!B(B ?$,1!C(B tbl) | ||
| 860 | (set-case-syntax-pair ?$,1!D(B ?$,1!E(B tbl) | ||
| 861 | (set-case-syntax-pair ?$,1!F(B ?$,1#T(B tbl) | ||
| 862 | (set-case-syntax-pair ?$,1!G(B ?$,1!H(B tbl) | ||
| 863 | (set-case-syntax-pair ?$,1!I(B ?$,1#V(B tbl) | ||
| 864 | (set-case-syntax-pair ?$,1!J(B ?$,1#W(B tbl) | ||
| 865 | (set-case-syntax-pair ?$,1!K(B ?$,1!L(B tbl) | ||
| 866 | (set-case-syntax-pair ?$,1!N(B ?$,1"=(B tbl) | ||
| 867 | (set-case-syntax-pair ?$,1!O(B ?$,1#Y(B tbl) | ||
| 868 | (set-case-syntax-pair ?$,1!P(B ?$,1#[(B tbl) | ||
| 869 | (set-case-syntax-pair ?$,1!Q(B ?$,1!R(B tbl) | ||
| 870 | (set-case-syntax-pair ?$,1!S(B ?$,1#`(B tbl) | ||
| 871 | (set-case-syntax-pair ?$,1!T(B ?$,1#c(B tbl) | ||
| 872 | (set-case-syntax-pair ?$,1!V(B ?$,1#i(B tbl) | ||
| 873 | (set-case-syntax-pair ?$,1!W(B ?$,1#h(B tbl) | ||
| 874 | (set-case-syntax-pair ?$,1!X(B ?$,1!Y(B tbl) | ||
| 875 | (set-case-syntax-pair ?$,1!\(B ?$,1#o(B tbl) | ||
| 876 | (set-case-syntax-pair ?$,1!](B ?$,1#r(B tbl) | ||
| 877 | (set-case-syntax-pair ?$,1!_(B ?$,1#u(B tbl) | ||
| 878 | (set-case-syntax-pair ?$,1!`(B ?$,1!a(B tbl) | ||
| 879 | (set-case-syntax-pair ?$,1!b(B ?$,1!c(B tbl) | ||
| 880 | (set-case-syntax-pair ?$,1!d(B ?$,1!e(B tbl) | ||
| 881 | (set-case-syntax-pair ?$,1!f(B ?$,1$ (B tbl) | ||
| 882 | (set-case-syntax-pair ?$,1!g(B ?$,1!h(B tbl) | ||
| 883 | (set-case-syntax-pair ?$,1!i(B ?$,1$#(B tbl) | ||
| 884 | (set-case-syntax-pair ?$,1!l(B ?$,1!m(B tbl) | ||
| 885 | (set-case-syntax-pair ?$,1!n(B ?$,1$((B tbl) | ||
| 886 | (set-case-syntax-pair ?$,1!o(B ?$,1!p(B tbl) | ||
| 887 | (set-case-syntax-pair ?$,1!q(B ?$,1$*(B tbl) | ||
| 888 | (set-case-syntax-pair ?$,1!r(B ?$,1$+(B tbl) | ||
| 889 | (set-case-syntax-pair ?$,1!s(B ?$,1!t(B tbl) | ||
| 890 | (set-case-syntax-pair ?$,1!u(B ?$,1!v(B tbl) | ||
| 891 | (set-case-syntax-pair ?$,1!w(B ?$,1$2(B tbl) | ||
| 892 | (set-case-syntax-pair ?$,1!x(B ?$,1!y(B tbl) | ||
| 893 | (set-case-syntax-pair ?$,1!|(B ?$,1!}(B tbl) | ||
| 894 | (set-case-syntax-pair ?$,1"$(B ?$,1"&(B tbl) | ||
| 895 | (set-case-syntax-pair ?$,1"%(B ?$,1"&(B tbl) | ||
| 896 | (set-case-syntax-pair ?$,1"'(B ?$,1")(B tbl) | ||
| 897 | (set-case-syntax-pair ?$,1"((B ?$,1")(B tbl) | ||
| 898 | (set-case-syntax-pair ?$,1"*(B ?$,1",(B tbl) | ||
| 899 | (set-case-syntax-pair ?$,1"+(B ?$,1",(B tbl) | ||
| 900 | (set-case-syntax-pair ?$,1"-(B ?$,1".(B tbl) | ||
| 901 | (set-case-syntax-pair ?$,1"/(B ?$,1"0(B tbl) | ||
| 902 | (set-case-syntax-pair ?$,1"1(B ?$,1"2(B tbl) | ||
| 903 | (set-case-syntax-pair ?$,1"3(B ?$,1"4(B tbl) | ||
| 904 | (set-case-syntax-pair ?$,1"5(B ?$,1"6(B tbl) | ||
| 905 | (set-case-syntax-pair ?$,1"7(B ?$,1"8(B tbl) | ||
| 906 | (set-case-syntax-pair ?$,1"9(B ?$,1":(B tbl) | ||
| 907 | (set-case-syntax-pair ?$,1";(B ?$,1"<(B tbl) | ||
| 908 | (set-case-syntax-pair ?$,1">(B ?$,1"?(B tbl) | ||
| 909 | (set-case-syntax-pair ?$,1"@(B ?$,1"A(B tbl) | ||
| 910 | (set-case-syntax-pair ?$,1"B(B ?$,1"C(B tbl) | ||
| 911 | (set-case-syntax-pair ?$,1"D(B ?$,1"E(B tbl) | ||
| 912 | (set-case-syntax-pair ?$,1"F(B ?$,1"G(B tbl) | ||
| 913 | (set-case-syntax-pair ?$,1"H(B ?$,1"I(B tbl) | ||
| 914 | (set-case-syntax-pair ?$,1"J(B ?$,1"K(B tbl) | ||
| 915 | (set-case-syntax-pair ?$,1"L(B ?$,1"M(B tbl) | ||
| 916 | (set-case-syntax-pair ?$,1"N(B ?$,1"O(B tbl) | ||
| 917 | ;; 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON | ||
| 918 | (set-case-syntax-pair ?$,1"Q(B ?$,1"S(B tbl) | ||
| 919 | (set-case-syntax-pair ?$,1"R(B ?$,1"S(B tbl) | ||
| 920 | (set-case-syntax-pair ?$,1"T(B ?$,1"U(B tbl) | ||
| 921 | (set-case-syntax-pair ?$,1"V(B ?$,1!U(B tbl) | ||
| 922 | (set-case-syntax-pair ?$,1"W(B ?$,1!(B tbl) | ||
| 923 | (set-case-syntax-pair ?$,1"X(B ?$,1"Y(B tbl) | ||
| 924 | (set-case-syntax-pair ?$,1"Z(B ?$,1"[(B tbl) | ||
| 925 | (set-case-syntax-pair ?$,1"\(B ?$,1"](B tbl) | ||
| 926 | (set-case-syntax-pair ?$,1"^(B ?$,1"_(B tbl) | ||
| 927 | (set-case-syntax-pair ?$,1"`(B ?$,1"a(B tbl) | ||
| 928 | (set-case-syntax-pair ?$,1"b(B ?$,1"c(B tbl) | ||
| 929 | (set-case-syntax-pair ?$,1"d(B ?$,1"e(B tbl) | ||
| 930 | (set-case-syntax-pair ?$,1"f(B ?$,1"g(B tbl) | ||
| 931 | (set-case-syntax-pair ?$,1"h(B ?$,1"i(B tbl) | ||
| 932 | (set-case-syntax-pair ?$,1"j(B ?$,1"k(B tbl) | ||
| 933 | (set-case-syntax-pair ?$,1"l(B ?$,1"m(B tbl) | ||
| 934 | (set-case-syntax-pair ?$,1"n(B ?$,1"o(B tbl) | ||
| 935 | (set-case-syntax-pair ?$,1"p(B ?$,1"q(B tbl) | ||
| 936 | (set-case-syntax-pair ?$,1"r(B ?$,1"s(B tbl) | ||
| 937 | (set-case-syntax-pair ?$,1"t(B ?$,1"u(B tbl) | ||
| 938 | (set-case-syntax-pair ?$,1"v(B ?$,1"w(B tbl) | ||
| 939 | (set-case-syntax-pair ?$,1"x(B ?$,1"y(B tbl) | ||
| 940 | (set-case-syntax-pair ?$,1"z(B ?$,1"{(B tbl) | ||
| 941 | (set-case-syntax-pair ?$,1"|(B ?$,1"}(B tbl) | ||
| 942 | (set-case-syntax-pair ?$,1"~(B ?$,1"(B tbl) | ||
| 943 | (set-case-syntax-pair ?$,1#"(B ?$,1##(B tbl) | ||
| 944 | (set-case-syntax-pair ?$,1#$(B ?$,1#%(B tbl) | ||
| 945 | (set-case-syntax-pair ?$,1#&(B ?$,1#'(B tbl) | ||
| 946 | (set-case-syntax-pair ?$,1#((B ?$,1#)(B tbl) | ||
| 947 | (set-case-syntax-pair ?$,1#*(B ?$,1#+(B tbl) | ||
| 948 | (set-case-syntax-pair ?$,1#,(B ?$,1#-(B tbl) | ||
| 949 | (set-case-syntax-pair ?$,1#.(B ?$,1#/(B tbl) | ||
| 950 | (set-case-syntax-pair ?$,1#0(B ?$,1#1(B tbl) | ||
| 951 | (set-case-syntax-pair ?$,1#2(B ?$,1#3(B tbl) | ||
| 952 | |||
| 643 | ;; Latin Extended Additional | 953 | ;; Latin Extended Additional |
| 644 | (setq c #x1e00) | 954 | (setq c #x1e00) |
| 645 | (while (<= c #x1ef9) | 955 | (while (<= c #x1ef9) |
| @@ -672,6 +982,13 @@ | |||
| 672 | (set-case-syntax-pair ?$,1&n(B ?$,1'M(B tbl) | 982 | (set-case-syntax-pair ?$,1&n(B ?$,1'M(B tbl) |
| 673 | (set-case-syntax-pair ?$,1&o(B ?$,1'N(B tbl) | 983 | (set-case-syntax-pair ?$,1&o(B ?$,1'N(B tbl) |
| 674 | 984 | ||
| 985 | ;; Armenian | ||
| 986 | (setq c #x531) | ||
| 987 | (while (<= c #x556) | ||
| 988 | (set-case-syntax-pair (decode-char 'ucs c) | ||
| 989 | (decode-char 'ucs (+ c #x30)) tbl) | ||
| 990 | (setq c (1+ c))) | ||
| 991 | |||
| 675 | ;; Greek Extended | 992 | ;; Greek Extended |
| 676 | (setq c #x1f00) | 993 | (setq c #x1f00) |
| 677 | (while (<= c #x1fff) | 994 | (while (<= c #x1fff) |
| @@ -742,8 +1059,51 @@ | |||
| 742 | (while (<= c #x2027) | 1059 | (while (<= c #x2027) |
| 743 | (set-case-syntax c "_" tbl) | 1060 | (set-case-syntax c "_" tbl) |
| 744 | (setq c (1+ c))) | 1061 | (setq c (1+ c))) |
| 745 | ) | ||
| 746 | 1062 | ||
| 1063 | ;; Roman numerals | ||
| 1064 | (setq c #x2160) | ||
| 1065 | (while (<= c #x216f) | ||
| 1066 | (set-case-syntax-pair (decode-char 'ucs c) | ||
| 1067 | (decode-char 'ucs (+ c #x10)) tbl) | ||
| 1068 | (setq c (1+ c))) | ||
| 1069 | |||
| 1070 | ;; Circled Latin | ||
| 1071 | (setq c #x24b6) | ||
| 1072 | (while (<= c #x24cf) | ||
| 1073 | (set-case-syntax-pair (decode-char 'ucs c) | ||
| 1074 | (decode-char 'ucs (+ c 26)) tbl) | ||
| 1075 | (modify-category-entry (decode-char 'ucs c) ?l) | ||
| 1076 | (modify-category-entry (decode-char 'ucs (+ c 26)) ?l) | ||
| 1077 | (setq c (1+ c))) | ||
| 1078 | |||
| 1079 | ;; Fullwidth Latin | ||
| 1080 | (setq c #xff21) | ||
| 1081 | (while (<= c #xff3a) | ||
| 1082 | (set-case-syntax-pair (decode-char 'ucs c) | ||
| 1083 | (decode-char 'ucs (+ c #x20)) tbl) | ||
| 1084 | (modify-category-entry (decode-char 'ucs c) ?l) | ||
| 1085 | (modify-category-entry (decode-char 'ucs (+ c #x20)) ?l) | ||
| 1086 | (setq c (1+ c))) | ||
| 1087 | |||
| 1088 | ;; Ohm, Kelvin, Angstrom | ||
| 1089 | (set-case-syntax-pair ?$,1uf(B ?$,1'I(B tbl) | ||
| 1090 | (set-case-syntax-pair ?$,1uj(B ?k tbl) | ||
| 1091 | (set-case-syntax-pair ?$,1uk(B ?,Ae(B tbl) | ||
| 1092 | |||
| 1093 | ;; Combining diacritics | ||
| 1094 | (setq c #x300) | ||
| 1095 | (while (<= c #x362) | ||
| 1096 | (modify-category-entry (decode-char 'ucs c) ?^) | ||
| 1097 | (setq c (1+ c))) | ||
| 1098 | |||
| 1099 | ;; Combining marks | ||
| 1100 | (setq c #x20d0) | ||
| 1101 | (while (<= c #x20e3) | ||
| 1102 | (modify-category-entry (decode-char 'ucs c) ?^) | ||
| 1103 | (setq c (1+ c))) | ||
| 1104 | |||
| 1105 | ;; Fixme: syntax for symbols &c | ||
| 1106 | ) | ||
| 747 | 1107 | ||
| 748 | ;;; Setting word boundary. | 1108 | ;;; Setting word boundary. |
| 749 | 1109 | ||
| @@ -815,8 +1175,8 @@ | |||
| 815 | 1175 | ||
| 816 | 1176 | ||
| 817 | ;; Setup auto-fill-chars for charsets that should invoke auto-filling. | 1177 | ;; Setup auto-fill-chars for charsets that should invoke auto-filling. |
| 818 | ;; SPACE and NEWLIE are already set. Also put `nospace-between-words' | 1178 | ;; SPACE and NEWLINE are already set. Also put `nospace-between-words' |
| 819 | ;; property to the charsets. | 1179 | ;; property on the charsets. |
| 820 | (let ((l '(katakana-jisx0201 | 1180 | (let ((l '(katakana-jisx0201 |
| 821 | japanese-jisx0208 japanese-jisx0212 | 1181 | japanese-jisx0208 japanese-jisx0212 |
| 822 | chinese-gb2312 chinese-big5-1 chinese-big5-2))) | 1182 | chinese-gb2312 chinese-big5-1 chinese-big5-2))) |