diff options
| author | Kenichi Handa | 2008-10-22 05:23:47 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2008-10-22 05:23:47 +0000 |
| commit | 714b2198bf98f68e3f721675c4df8cefb7d0b268 (patch) | |
| tree | 38419404dfe6bf1bf13b5a2e8991b9b7e49c49e8 /src | |
| parent | 67a9bee7b897452b1515e491a91bd75261dabbe0 (diff) | |
| download | emacs-714b2198bf98f68e3f721675c4df8cefb7d0b268.tar.gz emacs-714b2198bf98f68e3f721675c4df8cefb7d0b268.zip | |
(word_boundary_p): Check scripts instead of charset.
Handle nil value in word-separating-categories and
word-combining-categories.
(syms_of_category): Fix docstrings of word-separating-categories
and word-combining-categories.
Diffstat (limited to 'src')
| -rw-r--r-- | src/category.c | 35 |
1 files changed, 19 insertions, 16 deletions
diff --git a/src/category.c b/src/category.c index fca39ecb4e6..d5776fa4556 100644 --- a/src/category.c +++ b/src/category.c | |||
| @@ -397,7 +397,8 @@ word_boundary_p (c1, c2) | |||
| 397 | Lisp_Object tail; | 397 | Lisp_Object tail; |
| 398 | int default_result; | 398 | int default_result; |
| 399 | 399 | ||
| 400 | if (CHAR_CHARSET (c1) == CHAR_CHARSET (c2)) | 400 | if (EQ (CHAR_TABLE_REF (Vchar_script_table, c1), |
| 401 | CHAR_TABLE_REF (Vchar_script_table, c2))) | ||
| 401 | { | 402 | { |
| 402 | tail = Vword_separating_categories; | 403 | tail = Vword_separating_categories; |
| 403 | default_result = 0; | 404 | default_result = 0; |
| @@ -420,10 +421,12 @@ word_boundary_p (c1, c2) | |||
| 420 | Lisp_Object elt = XCAR (tail); | 421 | Lisp_Object elt = XCAR (tail); |
| 421 | 422 | ||
| 422 | if (CONSP (elt) | 423 | if (CONSP (elt) |
| 423 | && CATEGORYP (XCAR (elt)) | 424 | && (NILP (XCAR (elt)) |
| 424 | && CATEGORYP (XCDR (elt)) | 425 | || (CATEGORYP (XCAR (elt)) |
| 425 | && CATEGORY_MEMBER (XFASTINT (XCAR (elt)), category_set1) | 426 | && CATEGORY_MEMBER (XFASTINT (XCAR (elt)), category_set1))) |
| 426 | && CATEGORY_MEMBER (XFASTINT (XCDR (elt)), category_set2)) | 427 | && (NILP (XCDR (elt)) |
| 428 | || (CATEGORYP (XCDR (elt)) | ||
| 429 | && CATEGORY_MEMBER (XFASTINT (XCDR (elt)), category_set2)))) | ||
| 427 | return !default_result; | 430 | return !default_result; |
| 428 | } | 431 | } |
| 429 | return default_result; | 432 | return default_result; |
| @@ -468,35 +471,35 @@ syms_of_category () | |||
| 468 | 471 | ||
| 469 | Emacs treats a sequence of word constituent characters as a single | 472 | Emacs treats a sequence of word constituent characters as a single |
| 470 | word (i.e. finds no word boundary between them) only if they belong to | 473 | word (i.e. finds no word boundary between them) only if they belong to |
| 471 | the same charset. But, exceptions are allowed in the following cases. | 474 | the same script. But, exceptions are allowed in the following cases. |
| 472 | 475 | ||
| 473 | \(1) The case that characters are in different charsets is controlled | 476 | \(1) The case that characters are in different scripts is controlled |
| 474 | by the variable `word-combining-categories'. | 477 | by the variable `word-combining-categories'. |
| 475 | 478 | ||
| 476 | Emacs finds no word boundary between characters of different charsets | 479 | Emacs finds no word boundary between characters of different scripts |
| 477 | if they have categories matching some element of this list. | 480 | if they have categories matching some element of this list. |
| 478 | 481 | ||
| 479 | More precisely, if an element of this list is a cons of category CAT1 | 482 | More precisely, if an element of this list is a cons of category CAT1 |
| 480 | and CAT2, and a multibyte character C1 which has CAT1 is followed by | 483 | and CAT2, and a multibyte character C1 which has CAT1 is followed by |
| 481 | C2 which has CAT2, there's no word boundary between C1 and C2. | 484 | C2 which has CAT2, there's no word boundary between C1 and C2. |
| 482 | 485 | ||
| 483 | For instance, to tell that ASCII characters and Latin-1 characters can | 486 | For instance, to tell that Han characters followed by Hiragana |
| 484 | form a single word, the element `(?l . ?l)' should be in this list | 487 | characters can form a single word, the element `(?C . ?H)' should be |
| 485 | because both characters have the category `l' (Latin characters). | 488 | in this list. |
| 486 | 489 | ||
| 487 | \(2) The case that character are in the same charset is controlled by | 490 | \(2) The case that character are in the same script is controlled by |
| 488 | the variable `word-separating-categories'. | 491 | the variable `word-separating-categories'. |
| 489 | 492 | ||
| 490 | Emacs find a word boundary between characters of the same charset | 493 | Emacs find a word boundary between characters of the same script |
| 491 | if they have categories matching some element of this list. | 494 | if they have categories matching some element of this list. |
| 492 | 495 | ||
| 493 | More precisely, if an element of this list is a cons of category CAT1 | 496 | More precisely, if an element of this list is a cons of category CAT1 |
| 494 | and CAT2, and a multibyte character C1 which has CAT1 is followed by | 497 | and CAT2, and a multibyte character C1 which has CAT1 is followed by |
| 495 | C2 which has CAT2, there's a word boundary between C1 and C2. | 498 | C2 which has CAT2, there's a word boundary between C1 and C2. |
| 496 | 499 | ||
| 497 | For instance, to tell that there's a word boundary between Japanese | 500 | For instance, to tell that there's a word boundary between Hiragana |
| 498 | Hiragana and Japanese Kanji (both are in the same charset), the | 501 | and Katakana (both are in the same script `kana'), |
| 499 | element `(?H . ?C) should be in this list. */); | 502 | the element `(?H . ?K) should be in this list. */); |
| 500 | 503 | ||
| 501 | Vword_combining_categories = Qnil; | 504 | Vword_combining_categories = Qnil; |
| 502 | 505 | ||