diff options
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 63 |
1 files changed, 41 insertions, 22 deletions
diff --git a/src/coding.c b/src/coding.c index d3093a58960..116a54e444f 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -308,7 +308,8 @@ char *coding_category_name[CODING_CATEGORY_IDX_MAX] = { | |||
| 308 | "coding-category-iso-7", | 308 | "coding-category-iso-7", |
| 309 | "coding-category-iso-8-1", | 309 | "coding-category-iso-8-1", |
| 310 | "coding-category-iso-8-2", | 310 | "coding-category-iso-8-2", |
| 311 | "coding-category-iso-else", | 311 | "coding-category-iso-7-else", |
| 312 | "coding-category-iso-8-else", | ||
| 312 | "coding-category-big5", | 313 | "coding-category-big5", |
| 313 | "coding-category-binary" | 314 | "coding-category-binary" |
| 314 | }; | 315 | }; |
| @@ -595,7 +596,8 @@ enum iso_code_class_type iso_code_class[256]; | |||
| 595 | CODING_CATEGORY_MASK_ISO_7 | 596 | CODING_CATEGORY_MASK_ISO_7 |
| 596 | CODING_CATEGORY_MASK_ISO_8_1 | 597 | CODING_CATEGORY_MASK_ISO_8_1 |
| 597 | CODING_CATEGORY_MASK_ISO_8_2 | 598 | CODING_CATEGORY_MASK_ISO_8_2 |
| 598 | CODING_CATEGORY_MASK_ISO_ELSE | 599 | CODING_CATEGORY_MASK_ISO_7_ELSE |
| 600 | CODING_CATEGORY_MASK_ISO_8_ELSE | ||
| 599 | are set. If a code which should never appear in ISO2022 is found, | 601 | are set. If a code which should never appear in ISO2022 is found, |
| 600 | returns 0. */ | 602 | returns 0. */ |
| 601 | 603 | ||
| @@ -606,7 +608,9 @@ detect_coding_iso2022 (src, src_end) | |||
| 606 | int mask = (CODING_CATEGORY_MASK_ISO_7 | 608 | int mask = (CODING_CATEGORY_MASK_ISO_7 |
| 607 | | CODING_CATEGORY_MASK_ISO_8_1 | 609 | | CODING_CATEGORY_MASK_ISO_8_1 |
| 608 | | CODING_CATEGORY_MASK_ISO_8_2 | 610 | | CODING_CATEGORY_MASK_ISO_8_2 |
| 609 | | CODING_CATEGORY_MASK_ISO_ELSE); | 611 | | CODING_CATEGORY_MASK_ISO_7_ELSE |
| 612 | | CODING_CATEGORY_MASK_ISO_8_ELSE | ||
| 613 | ); | ||
| 610 | int g1 = 0; /* 1 iff designating to G1. */ | 614 | int g1 = 0; /* 1 iff designating to G1. */ |
| 611 | int c, i; | 615 | int c, i; |
| 612 | 616 | ||
| @@ -628,24 +632,28 @@ detect_coding_iso2022 (src, src_end) | |||
| 628 | if (c == ')' || (c == '$' && *src == ')')) | 632 | if (c == ')' || (c == '$' && *src == ')')) |
| 629 | { | 633 | { |
| 630 | g1 = 1; | 634 | g1 = 1; |
| 631 | mask &= ~CODING_CATEGORY_MASK_ISO_7; | 635 | mask &= ~(CODING_CATEGORY_MASK_ISO_7 |
| 636 | | CODING_CATEGORY_MASK_ISO_7_ELSE); | ||
| 632 | } | 637 | } |
| 633 | src++; | 638 | src++; |
| 634 | break; | 639 | break; |
| 635 | } | 640 | } |
| 636 | else if (c == 'N' || c == 'O' || c == 'n' || c == 'o') | 641 | else if (c == 'N' || c == 'O' || c == 'n' || c == 'o') |
| 637 | return CODING_CATEGORY_MASK_ISO_ELSE; | 642 | mask &= (CODING_CATEGORY_MASK_ISO_7_ELSE |
| 643 | | CODING_CATEGORY_MASK_ISO_8_ELSE); | ||
| 638 | break; | 644 | break; |
| 639 | 645 | ||
| 640 | case ISO_CODE_SO: | 646 | case ISO_CODE_SO: |
| 641 | if (g1) | 647 | if (g1) |
| 642 | return CODING_CATEGORY_MASK_ISO_ELSE; | 648 | mask &= (CODING_CATEGORY_MASK_ISO_7_ELSE |
| 649 | | CODING_CATEGORY_MASK_ISO_8_ELSE); | ||
| 643 | break; | 650 | break; |
| 644 | 651 | ||
| 645 | case ISO_CODE_CSI: | 652 | case ISO_CODE_CSI: |
| 646 | case ISO_CODE_SS2: | 653 | case ISO_CODE_SS2: |
| 647 | case ISO_CODE_SS3: | 654 | case ISO_CODE_SS3: |
| 648 | mask &= ~CODING_CATEGORY_MASK_ISO_7; | 655 | mask &= ~(CODING_CATEGORY_MASK_ISO_7 |
| 656 | | CODING_CATEGORY_MASK_ISO_7_ELSE); | ||
| 649 | break; | 657 | break; |
| 650 | 658 | ||
| 651 | default: | 659 | default: |
| @@ -655,12 +663,13 @@ detect_coding_iso2022 (src, src_end) | |||
| 655 | return 0; | 663 | return 0; |
| 656 | else | 664 | else |
| 657 | { | 665 | { |
| 658 | int count = 1; | 666 | unsigned char *src_begin = src; |
| 659 | 667 | ||
| 660 | mask &= ~CODING_CATEGORY_MASK_ISO_7; | 668 | mask &= ~(CODING_CATEGORY_MASK_ISO_7 |
| 669 | | CODING_CATEGORY_MASK_ISO_7_ELSE); | ||
| 661 | while (src < src_end && *src >= 0xA0) | 670 | while (src < src_end && *src >= 0xA0) |
| 662 | count++, src++; | 671 | src++; |
| 663 | if (count & 1 && src < src_end) | 672 | if ((src - src_begin - 1) & 1 && src < src_end) |
| 664 | mask &= ~CODING_CATEGORY_MASK_ISO_8_2; | 673 | mask &= ~CODING_CATEGORY_MASK_ISO_8_2; |
| 665 | } | 674 | } |
| 666 | break; | 675 | break; |
| @@ -2443,34 +2452,44 @@ setup_coding_system (coding_system, coding) | |||
| 2443 | 2452 | ||
| 2444 | The category for a coding system which has the same code range | 2453 | The category for a coding system which has the same code range |
| 2445 | as SJIS. Assigned the coding-system (Lisp | 2454 | as SJIS. Assigned the coding-system (Lisp |
| 2446 | symbol) `shift-jis' by default. | 2455 | symbol) `japanese-shift-jis' by default. |
| 2447 | 2456 | ||
| 2448 | o coding-category-iso-7 | 2457 | o coding-category-iso-7 |
| 2449 | 2458 | ||
| 2450 | The category for a coding system which has the same code range | 2459 | The category for a coding system which has the same code range |
| 2451 | as ISO2022 of 7-bit environment. Assigned the coding-system | 2460 | as ISO2022 of 7-bit environment. This doesn't use any locking |
| 2452 | (Lisp symbol) `iso-2022-7' by default. | 2461 | shift and single shift functions. Assigned the coding-system |
| 2462 | (Lisp symbol) `iso-2022-7bit' by default. | ||
| 2453 | 2463 | ||
| 2454 | o coding-category-iso-8-1 | 2464 | o coding-category-iso-8-1 |
| 2455 | 2465 | ||
| 2456 | The category for a coding system which has the same code range | 2466 | The category for a coding system which has the same code range |
| 2457 | as ISO2022 of 8-bit environment and graphic plane 1 used only | 2467 | as ISO2022 of 8-bit environment and graphic plane 1 used only |
| 2458 | for DIMENSION1 charset. Assigned the coding-system (Lisp | 2468 | for DIMENSION1 charset. This doesn't use any locking shift |
| 2459 | symbol) `iso-8859-1' by default. | 2469 | and single shift functions. Assigned the coding-system (Lisp |
| 2470 | symbol) `iso-latin-1' by default. | ||
| 2460 | 2471 | ||
| 2461 | o coding-category-iso-8-2 | 2472 | o coding-category-iso-8-2 |
| 2462 | 2473 | ||
| 2463 | The category for a coding system which has the same code range | 2474 | The category for a coding system which has the same code range |
| 2464 | as ISO2022 of 8-bit environment and graphic plane 1 used only | 2475 | as ISO2022 of 8-bit environment and graphic plane 1 used only |
| 2465 | for DIMENSION2 charset. Assigned the coding-system (Lisp | 2476 | for DIMENSION2 charset. This doesn't use any locking shift |
| 2466 | symbol) `euc-japan' by default. | 2477 | and single shift functions. Assigned the coding-system (Lisp |
| 2478 | symbol) `japanese-iso-8bit' by default. | ||
| 2467 | 2479 | ||
| 2468 | o coding-category-iso-else | 2480 | o coding-category-iso-7-else |
| 2469 | 2481 | ||
| 2470 | The category for a coding system which has the same code range | 2482 | The category for a coding system which has the same code range |
| 2471 | as ISO2022 but not belongs to any of the above three | 2483 | as ISO2022 of 7-bit environemnt but uses locking shift or |
| 2472 | categories. Assigned the coding-system (Lisp symbol) | 2484 | single shift functions. Assigned the coding-system (Lisp |
| 2473 | `iso-2022-ss2-7' by default. | 2485 | symbol) `iso-2022-7bit-lock' by default. |
| 2486 | |||
| 2487 | o coding-category-iso-8-else | ||
| 2488 | |||
| 2489 | The category for a coding system which has the same code range | ||
| 2490 | as ISO2022 of 8-bit environemnt but uses locking shift or | ||
| 2491 | single shift functions. Assigned the coding-system (Lisp | ||
| 2492 | symbol) `iso-2022-8bit-ss2' by default. | ||
| 2474 | 2493 | ||
| 2475 | o coding-category-big5 | 2494 | o coding-category-big5 |
| 2476 | 2495 | ||