diff options
| author | Kenichi Handa | 2000-12-28 01:05:02 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2000-12-28 01:05:02 +0000 |
| commit | aa72b389be17f2b1c703ac236cc839a2f6b428a5 (patch) | |
| tree | f2f7e76a4cd135934f7f11fc963564741fef727d /src/coding.c | |
| parent | 0c80628aa43fa965ff6abbed74c8d5f2d94054c6 (diff) | |
| download | emacs-aa72b389be17f2b1c703ac236cc839a2f6b428a5.tar.gz emacs-aa72b389be17f2b1c703ac236cc839a2f6b428a5.zip | |
(SAFE_ONE_MORE_BYTE): New macro.
(DECODE_EMACS_MULE_COMPOSITION_CHAR): New macro.
(DECODE_EMACS_MULE_COMPOSITION_RULE): New macro.
(decode_composition_emacs_mule): New function.
(decode_coding_emacs_mule): Decode composition sequence by calling
decode_composition_emacs_mule.
(ENCODE_COMPOSITION_EMACS_MULE): New macro.
(encode_coding_emacs_mule): Changed from macro to function. If
a text contains compostions, encode them correctly.
(setup_coding_system): Set coding->commong_flags for emacs-mule so
that decoding and encoding are required.
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 475 |
1 files changed, 439 insertions, 36 deletions
diff --git a/src/coding.c b/src/coding.c index 10be961c3aa..ade20141c57 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -513,9 +513,9 @@ coding_safe_chars (coding) | |||
| 513 | 513 | ||
| 514 | /*** 2. Emacs internal format (emacs-mule) handlers ***/ | 514 | /*** 2. Emacs internal format (emacs-mule) handlers ***/ |
| 515 | 515 | ||
| 516 | /* Emacs' internal format for encoding multiple character sets is a | 516 | /* Emacs' internal format for representation of multiple character |
| 517 | kind of multi-byte encoding, i.e. characters are encoded by | 517 | sets is a kind of multi-byte encoding, i.e. characters are |
| 518 | variable-length sequences of one-byte codes. | 518 | represented by variable-length sequences of one-byte codes. |
| 519 | 519 | ||
| 520 | ASCII characters and control characters (e.g. `tab', `newline') are | 520 | ASCII characters and control characters (e.g. `tab', `newline') are |
| 521 | represented by one-byte sequences which are their ASCII codes, in | 521 | represented by one-byte sequences which are their ASCII codes, in |
| @@ -531,7 +531,7 @@ coding_safe_chars (coding) | |||
| 531 | The other characters are represented by a sequence of `base | 531 | The other characters are represented by a sequence of `base |
| 532 | leading-code', optional `extended leading-code', and one or two | 532 | leading-code', optional `extended leading-code', and one or two |
| 533 | `position-code's. The length of the sequence is determined by the | 533 | `position-code's. The length of the sequence is determined by the |
| 534 | base leading-code. Leading-code takes the range 0x80 through 0x9F, | 534 | base leading-code. Leading-code takes the range 0x81 through 0x9D, |
| 535 | whereas extended leading-code and position-code take the range 0xA0 | 535 | whereas extended leading-code and position-code take the range 0xA0 |
| 536 | through 0xFF. See `charset.h' for more details about leading-code | 536 | through 0xFF. See `charset.h' for more details about leading-code |
| 537 | and position-code. | 537 | and position-code. |
| @@ -542,9 +542,46 @@ coding_safe_chars (coding) | |||
| 542 | ascii 0x00..0x7F | 542 | ascii 0x00..0x7F |
| 543 | eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF | 543 | eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF |
| 544 | eight-bit-graphic 0xA0..0xBF | 544 | eight-bit-graphic 0xA0..0xBF |
| 545 | ELSE 0x81..0x9F + [0xA0..0xFF]+ | 545 | ELSE 0x81..0x9D + [0xA0..0xFF]+ |
| 546 | --------------------------------------------- | 546 | --------------------------------------------- |
| 547 | 547 | ||
| 548 | As this is the internal character representation, the format is | ||
| 549 | usually not used externally (i.e. in a file or in a data sent to a | ||
| 550 | process). But, it is possible to have a text externally in this | ||
| 551 | format (i.e. by encoding by the coding system `emacs-mule'). | ||
| 552 | |||
| 553 | In that case, a sequence of one-byte codes has a slightly different | ||
| 554 | form. | ||
| 555 | |||
| 556 | At first, all characters in eight-bit-control are represented by | ||
| 557 | one-byte sequences which are their 8-bit code. | ||
| 558 | |||
| 559 | Next, character composition data are represented by the byte | ||
| 560 | sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ..., | ||
| 561 | where, | ||
| 562 | METHOD is 0xF0 plus one of composition method (enum | ||
| 563 | composition_method), | ||
| 564 | |||
| 565 | BYTES is 0x20 plus a byte length of this composition data, | ||
| 566 | |||
| 567 | CHARS is 0x20 plus a number of characters composed by this | ||
| 568 | data, | ||
| 569 | |||
| 570 | COMPONENTs are characters of multibye form or composition | ||
| 571 | rules encoded by two-byte of ASCII codes. | ||
| 572 | |||
| 573 | In addition, for backward compatibility, the following formats are | ||
| 574 | also recognized as composition data on decoding. | ||
| 575 | |||
| 576 | 0x80 MSEQ ... | ||
| 577 | 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ | ||
| 578 | |||
| 579 | Here, | ||
| 580 | MSEQ is a multibyte form but in these special format: | ||
| 581 | ASCII: 0xA0 ASCII_CODE+0x80, | ||
| 582 | other: LEADING_CODE+0x20 FOLLOWING-BYTE ..., | ||
| 583 | RULE is a one byte code of the range 0xA0..0xF0 that | ||
| 584 | represents a composition rule. | ||
| 548 | */ | 585 | */ |
| 549 | 586 | ||
| 550 | enum emacs_code_class_type emacs_code_class[256]; | 587 | enum emacs_code_class_type emacs_code_class[256]; |
| @@ -608,6 +645,261 @@ detect_coding_emacs_mule (src, src_end, multibytep) | |||
| 608 | } | 645 | } |
| 609 | 646 | ||
| 610 | 647 | ||
| 648 | /* Record the starting position START and METHOD of one composition. */ | ||
| 649 | |||
| 650 | #define CODING_ADD_COMPOSITION_START(coding, start, method) \ | ||
| 651 | do { \ | ||
| 652 | struct composition_data *cmp_data = coding->cmp_data; \ | ||
| 653 | int *data = cmp_data->data + cmp_data->used; \ | ||
| 654 | coding->cmp_data_start = cmp_data->used; \ | ||
| 655 | data[0] = -1; \ | ||
| 656 | data[1] = cmp_data->char_offset + start; \ | ||
| 657 | data[3] = (int) method; \ | ||
| 658 | cmp_data->used += 4; \ | ||
| 659 | } while (0) | ||
| 660 | |||
| 661 | /* Record the ending position END of the current composition. */ | ||
| 662 | |||
| 663 | #define CODING_ADD_COMPOSITION_END(coding, end) \ | ||
| 664 | do { \ | ||
| 665 | struct composition_data *cmp_data = coding->cmp_data; \ | ||
| 666 | int *data = cmp_data->data + coding->cmp_data_start; \ | ||
| 667 | data[0] = cmp_data->used - coding->cmp_data_start; \ | ||
| 668 | data[2] = cmp_data->char_offset + end; \ | ||
| 669 | } while (0) | ||
| 670 | |||
| 671 | /* Record one COMPONENT (alternate character or composition rule). */ | ||
| 672 | |||
| 673 | #define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \ | ||
| 674 | (coding->cmp_data->data[coding->cmp_data->used++] = component) | ||
| 675 | |||
| 676 | |||
| 677 | /* Get one byte from a data pointed by SRC and increment SRC. If SRC | ||
| 678 | is not less than SRC_END, return -1 without inccrementing Src. */ | ||
| 679 | |||
| 680 | #define SAFE_ONE_MORE_BYTE() (src >= src_end ? -1 : *src++) | ||
| 681 | |||
| 682 | |||
| 683 | /* Decode a character represented as a component of composition | ||
| 684 | sequence of Emacs 20 style at SRC. Set C to that character, store | ||
| 685 | its multibyte form sequence at P, and set P to the end of that | ||
| 686 | sequence. If no valid character is found, set C to -1. */ | ||
| 687 | |||
| 688 | #define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p) \ | ||
| 689 | do { \ | ||
| 690 | int bytes; \ | ||
| 691 | \ | ||
| 692 | c = SAFE_ONE_MORE_BYTE (); \ | ||
| 693 | if (c < 0) \ | ||
| 694 | break; \ | ||
| 695 | if (CHAR_HEAD_P (c)) \ | ||
| 696 | c = -1; \ | ||
| 697 | else if (c == 0xA0) \ | ||
| 698 | { \ | ||
| 699 | c = SAFE_ONE_MORE_BYTE (); \ | ||
| 700 | if (c < 0xA0) \ | ||
| 701 | c = -1; \ | ||
| 702 | else \ | ||
| 703 | { \ | ||
| 704 | c -= 0xA0; \ | ||
| 705 | *p++ = c; \ | ||
| 706 | } \ | ||
| 707 | } \ | ||
| 708 | else if (BASE_LEADING_CODE_P (c - 0x20)) \ | ||
| 709 | { \ | ||
| 710 | unsigned char *p0 = p; \ | ||
| 711 | \ | ||
| 712 | c -= 0x20; \ | ||
| 713 | *p++ = c; \ | ||
| 714 | bytes = BYTES_BY_CHAR_HEAD (c); \ | ||
| 715 | while (--bytes) \ | ||
| 716 | { \ | ||
| 717 | c = SAFE_ONE_MORE_BYTE (); \ | ||
| 718 | if (c < 0) \ | ||
| 719 | break; \ | ||
| 720 | *p++ = c; \ | ||
| 721 | } \ | ||
| 722 | if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes)) \ | ||
| 723 | c = STRING_CHAR (p0, bytes); \ | ||
| 724 | else \ | ||
| 725 | c = -1; \ | ||
| 726 | } \ | ||
| 727 | else \ | ||
| 728 | c = -1; \ | ||
| 729 | } while (0) | ||
| 730 | |||
| 731 | |||
| 732 | /* Decode a composition rule represented as a component of composition | ||
| 733 | sequence of Emacs 20 style at SRC. Set C to the rule. If not | ||
| 734 | valid rule is found, set C to -1. */ | ||
| 735 | |||
| 736 | #define DECODE_EMACS_MULE_COMPOSITION_RULE(c) \ | ||
| 737 | do { \ | ||
| 738 | c = SAFE_ONE_MORE_BYTE (); \ | ||
| 739 | c -= 0xA0; \ | ||
| 740 | if (c < 0 || c >= 81) \ | ||
| 741 | c = -1; \ | ||
| 742 | else \ | ||
| 743 | { \ | ||
| 744 | gref = c / 9, nref = c % 9; \ | ||
| 745 | c = COMPOSITION_ENCODE_RULE (gref, nref); \ | ||
| 746 | } \ | ||
| 747 | } while (0) | ||
| 748 | |||
| 749 | |||
| 750 | /* Decode composition sequence encoded by `emacs-mule' at the source | ||
| 751 | pointed by SRC. SRC_END is the end of source. Store information | ||
| 752 | of the composition in CODING->cmp_data. | ||
| 753 | |||
| 754 | For backward compatibility, decode also a composition sequence of | ||
| 755 | Emacs 20 style. In that case, the composition sequence contains | ||
| 756 | characters that should be extracted into a buffer or string. Store | ||
| 757 | those characters at *DESTINATION in multibyte form. | ||
| 758 | |||
| 759 | If we encounter an invalid byte sequence, return 0. | ||
| 760 | If we encounter an insufficient source or destination, or | ||
| 761 | insufficient space in CODING->cmp_data, return 1. | ||
| 762 | Otherwise, return consumed bytes in the source. | ||
| 763 | |||
| 764 | */ | ||
| 765 | static INLINE int | ||
| 766 | decode_composition_emacs_mule (coding, src, src_end, | ||
| 767 | destination, dst_end, dst_bytes) | ||
| 768 | struct coding_system *coding; | ||
| 769 | unsigned char *src, *src_end, **destination, *dst_end; | ||
| 770 | int dst_bytes; | ||
| 771 | { | ||
| 772 | unsigned char *dst = *destination; | ||
| 773 | int method, data_len, nchars; | ||
| 774 | unsigned char *src_base = src++; | ||
| 775 | /* Store compoments of composition. */ | ||
| 776 | int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH]; | ||
| 777 | int ncomponent; | ||
| 778 | /* Store multibyte form of characters to be composed. This is for | ||
| 779 | Emacs 20 style composition sequence. */ | ||
| 780 | unsigned char buf[MAX_COMPOSITION_COMPONENTS * MAX_MULTIBYTE_LENGTH]; | ||
| 781 | unsigned char *bufp = buf; | ||
| 782 | int c, i, gref, nref; | ||
| 783 | |||
| 784 | if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH | ||
| 785 | >= COMPOSITION_DATA_SIZE) | ||
| 786 | { | ||
| 787 | coding->result = CODING_FINISH_INSUFFICIENT_CMP; | ||
| 788 | return -1; | ||
| 789 | } | ||
| 790 | |||
| 791 | ONE_MORE_BYTE (c); | ||
| 792 | if (c - 0xF0 >= COMPOSITION_RELATIVE | ||
| 793 | && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS) | ||
| 794 | { | ||
| 795 | int with_rule; | ||
| 796 | |||
| 797 | method = c - 0xF0; | ||
| 798 | with_rule = (method == COMPOSITION_WITH_RULE | ||
| 799 | || method == COMPOSITION_WITH_RULE_ALTCHARS); | ||
| 800 | ONE_MORE_BYTE (c); | ||
| 801 | data_len = c - 0xA0; | ||
| 802 | if (data_len < 4 | ||
| 803 | || src_base + data_len > src_end) | ||
| 804 | return 0; | ||
| 805 | ONE_MORE_BYTE (c); | ||
| 806 | nchars = c - 0xA0; | ||
| 807 | if (c < 1) | ||
| 808 | return 0; | ||
| 809 | for (ncomponent = 0; src < src_base + data_len; ncomponent++) | ||
| 810 | { | ||
| 811 | if (ncomponent % 2 && with_rule) | ||
| 812 | { | ||
| 813 | ONE_MORE_BYTE (gref); | ||
| 814 | gref -= 32; | ||
| 815 | ONE_MORE_BYTE (nref); | ||
| 816 | nref -= 32; | ||
| 817 | c = COMPOSITION_ENCODE_RULE (gref, nref); | ||
| 818 | } | ||
| 819 | else | ||
| 820 | { | ||
| 821 | int bytes; | ||
| 822 | if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)) | ||
| 823 | c = STRING_CHAR (src, bytes); | ||
| 824 | else | ||
| 825 | c = *src, bytes = 1; | ||
| 826 | src += bytes; | ||
| 827 | } | ||
| 828 | component[ncomponent] = c; | ||
| 829 | } | ||
| 830 | } | ||
| 831 | else | ||
| 832 | { | ||
| 833 | /* This may be an old Emacs 20 style format. See the comment at | ||
| 834 | the section 2 of this file. */ | ||
| 835 | while (src < src_end && !CHAR_HEAD_P (*src)) src++; | ||
| 836 | if (src == src_end | ||
| 837 | && !(coding->mode & CODING_MODE_LAST_BLOCK)) | ||
| 838 | goto label_end_of_loop; | ||
| 839 | |||
| 840 | src_end = src; | ||
| 841 | src = src_base + 1; | ||
| 842 | if (c < 0xC0) | ||
| 843 | { | ||
| 844 | method = COMPOSITION_RELATIVE; | ||
| 845 | for (ncomponent = 0; ncomponent < MAX_COMPOSITION_COMPONENTS;) | ||
| 846 | { | ||
| 847 | DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp); | ||
| 848 | if (c < 0) | ||
| 849 | break; | ||
| 850 | component[ncomponent++] = c; | ||
| 851 | } | ||
| 852 | if (ncomponent < 2) | ||
| 853 | return 0; | ||
| 854 | nchars = ncomponent; | ||
| 855 | } | ||
| 856 | else if (c == 0xFF) | ||
| 857 | { | ||
| 858 | method = COMPOSITION_WITH_RULE; | ||
| 859 | src++; | ||
| 860 | DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp); | ||
| 861 | if (c < 0) | ||
| 862 | return 0; | ||
| 863 | component[0] = c; | ||
| 864 | for (ncomponent = 1; | ||
| 865 | ncomponent < MAX_COMPOSITION_COMPONENTS * 2 - 1;) | ||
| 866 | { | ||
| 867 | DECODE_EMACS_MULE_COMPOSITION_RULE (c); | ||
| 868 | if (c < 0) | ||
| 869 | break; | ||
| 870 | component[ncomponent++] = c; | ||
| 871 | DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp); | ||
| 872 | if (c < 0) | ||
| 873 | break; | ||
| 874 | component[ncomponent++] = c; | ||
| 875 | } | ||
| 876 | if (ncomponent < 3) | ||
| 877 | return 0; | ||
| 878 | nchars = (ncomponent + 1) / 2; | ||
| 879 | } | ||
| 880 | else | ||
| 881 | return 0; | ||
| 882 | } | ||
| 883 | |||
| 884 | if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src)) | ||
| 885 | { | ||
| 886 | CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method); | ||
| 887 | for (i = 0; i < ncomponent; i++) | ||
| 888 | CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]); | ||
| 889 | CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars); | ||
| 890 | if (buf < bufp) | ||
| 891 | { | ||
| 892 | unsigned char *p = buf; | ||
| 893 | EMIT_BYTES (p, bufp); | ||
| 894 | *destination += bufp - buf; | ||
| 895 | coding->produced_char += nchars; | ||
| 896 | } | ||
| 897 | return (src - src_base); | ||
| 898 | } | ||
| 899 | label_end_of_loop: | ||
| 900 | return -1; | ||
| 901 | } | ||
| 902 | |||
| 611 | /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ | 903 | /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ |
| 612 | 904 | ||
| 613 | static void | 905 | static void |
| @@ -669,6 +961,23 @@ decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) | |||
| 669 | coding->produced_char++; | 961 | coding->produced_char++; |
| 670 | continue; | 962 | continue; |
| 671 | } | 963 | } |
| 964 | else if (*src == 0x80) | ||
| 965 | { | ||
| 966 | /* Start of composition data. */ | ||
| 967 | int consumed = decode_composition_emacs_mule (coding, src, src_end, | ||
| 968 | &dst, dst_end, | ||
| 969 | dst_bytes); | ||
| 970 | if (consumed < 0) | ||
| 971 | goto label_end_of_loop; | ||
| 972 | else if (consumed > 0) | ||
| 973 | { | ||
| 974 | src += consumed; | ||
| 975 | continue; | ||
| 976 | } | ||
| 977 | bytes = CHAR_STRING (*src, tmp); | ||
| 978 | p = tmp; | ||
| 979 | src++; | ||
| 980 | } | ||
| 672 | else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)) | 981 | else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)) |
| 673 | { | 982 | { |
| 674 | p = src; | 983 | p = src; |
| @@ -693,9 +1002,123 @@ decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) | |||
| 693 | coding->produced = dst - destination; | 1002 | coding->produced = dst - destination; |
| 694 | } | 1003 | } |
| 695 | 1004 | ||
| 696 | #define encode_coding_emacs_mule(coding, source, destination, src_bytes, dst_bytes) \ | ||
| 697 | encode_eol (coding, source, destination, src_bytes, dst_bytes) | ||
| 698 | 1005 | ||
| 1006 | /* Encode composition data stored at DATA into a special byte sequence | ||
| 1007 | starting by 0x80. Update CODING->cmp_data_start and maybe | ||
| 1008 | CODING->cmp_data for the next call. */ | ||
| 1009 | |||
| 1010 | #define ENCODE_COMPOSITION_EMACS_MULE(coding, data) \ | ||
| 1011 | do { \ | ||
| 1012 | unsigned char buf[1024], *p0 = buf, *p; \ | ||
| 1013 | int len = data[0]; \ | ||
| 1014 | int i; \ | ||
| 1015 | \ | ||
| 1016 | buf[0] = 0x80; \ | ||
| 1017 | buf[1] = 0xF0 + data[3]; /* METHOD */ \ | ||
| 1018 | buf[3] = 0xA0 + (data[2] - data[1]); /* COMPOSED-CHARS */ \ | ||
| 1019 | p = buf + 4; \ | ||
| 1020 | if (data[3] == COMPOSITION_WITH_RULE \ | ||
| 1021 | || data[3] == COMPOSITION_WITH_RULE_ALTCHARS) \ | ||
| 1022 | { \ | ||
| 1023 | p += CHAR_STRING (data[4], p); \ | ||
| 1024 | for (i = 5; i < len; i += 2) \ | ||
| 1025 | { \ | ||
| 1026 | int gref, nref; \ | ||
| 1027 | COMPOSITION_DECODE_RULE (data[i], gref, nref); \ | ||
| 1028 | *p++ = 0x20 + gref; \ | ||
| 1029 | *p++ = 0x20 + nref; \ | ||
| 1030 | p += CHAR_STRING (data[i + 1], p); \ | ||
| 1031 | } \ | ||
| 1032 | } \ | ||
| 1033 | else \ | ||
| 1034 | { \ | ||
| 1035 | for (i = 4; i < len; i++) \ | ||
| 1036 | p += CHAR_STRING (data[i], p); \ | ||
| 1037 | } \ | ||
| 1038 | buf[2] = 0xA0 + (p - buf); /* COMPONENTS-BYTES */ \ | ||
| 1039 | \ | ||
| 1040 | if (dst + (p - buf) + 4 > (dst_bytes ? dst_end : src)) \ | ||
| 1041 | { \ | ||
| 1042 | coding->result = CODING_FINISH_INSUFFICIENT_DST; \ | ||
| 1043 | goto label_end_of_loop; \ | ||
| 1044 | } \ | ||
| 1045 | while (p0 < p) \ | ||
| 1046 | *dst++ = *p0++; \ | ||
| 1047 | coding->cmp_data_start += data[0]; \ | ||
| 1048 | if (coding->cmp_data_start == coding->cmp_data->used \ | ||
| 1049 | && coding->cmp_data->next) \ | ||
| 1050 | { \ | ||
| 1051 | coding->cmp_data = coding->cmp_data->next; \ | ||
| 1052 | coding->cmp_data_start = 0; \ | ||
| 1053 | } \ | ||
| 1054 | } while (0) | ||
| 1055 | |||
| 1056 | |||
| 1057 | static void encode_eol P_ ((struct coding_system *, unsigned char *, | ||
| 1058 | unsigned char *, int, int)); | ||
| 1059 | |||
| 1060 | static void | ||
| 1061 | encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) | ||
| 1062 | struct coding_system *coding; | ||
| 1063 | unsigned char *source, *destination; | ||
| 1064 | int src_bytes, dst_bytes; | ||
| 1065 | { | ||
| 1066 | unsigned char *src = source; | ||
| 1067 | unsigned char *src_end = source + src_bytes; | ||
| 1068 | unsigned char *dst = destination; | ||
| 1069 | unsigned char *dst_end = destination + dst_bytes; | ||
| 1070 | unsigned char *src_base; | ||
| 1071 | int c; | ||
| 1072 | int char_offset; | ||
| 1073 | int *data; | ||
| 1074 | |||
| 1075 | Lisp_Object translation_table; | ||
| 1076 | |||
| 1077 | translation_table = Qnil; | ||
| 1078 | |||
| 1079 | /* Optimization for the case that there's no composition. */ | ||
| 1080 | if (!coding->cmp_data || coding->cmp_data->used == 0) | ||
| 1081 | { | ||
| 1082 | encode_eol (coding, source, destination, src_bytes, dst_bytes); | ||
| 1083 | return; | ||
| 1084 | } | ||
| 1085 | |||
| 1086 | char_offset = coding->cmp_data->char_offset; | ||
| 1087 | data = coding->cmp_data->data + coding->cmp_data_start; | ||
| 1088 | while (1) | ||
| 1089 | { | ||
| 1090 | src_base = src; | ||
| 1091 | |||
| 1092 | /* If SRC starts a composition, encode the information about the | ||
| 1093 | composition in advance. */ | ||
| 1094 | if (coding->cmp_data_start < coding->cmp_data->used | ||
| 1095 | && char_offset + coding->consumed_char == data[1]) | ||
| 1096 | { | ||
| 1097 | ENCODE_COMPOSITION_EMACS_MULE (coding, data); | ||
| 1098 | char_offset = coding->cmp_data->char_offset; | ||
| 1099 | data = coding->cmp_data->data + coding->cmp_data_start; | ||
| 1100 | } | ||
| 1101 | |||
| 1102 | ONE_MORE_CHAR (c); | ||
| 1103 | if (c == '\n' && (coding->eol_type == CODING_EOL_CRLF | ||
| 1104 | || coding->eol_type == CODING_EOL_CR)) | ||
| 1105 | { | ||
| 1106 | if (coding->eol_type == CODING_EOL_CRLF) | ||
| 1107 | EMIT_TWO_BYTES ('\r', c); | ||
| 1108 | else | ||
| 1109 | EMIT_ONE_BYTE ('\r'); | ||
| 1110 | } | ||
| 1111 | else if (SINGLE_BYTE_CHAR_P (c)) | ||
| 1112 | EMIT_ONE_BYTE (c); | ||
| 1113 | else | ||
| 1114 | EMIT_BYTES (src_base, src); | ||
| 1115 | coding->consumed_char++; | ||
| 1116 | } | ||
| 1117 | label_end_of_loop: | ||
| 1118 | coding->consumed = src_base - source; | ||
| 1119 | coding->produced = coding->produced_char = dst - destination; | ||
| 1120 | return; | ||
| 1121 | } | ||
| 699 | 1122 | ||
| 700 | 1123 | ||
| 701 | /*** 3. ISO2022 handlers ***/ | 1124 | /*** 3. ISO2022 handlers ***/ |
| @@ -1180,35 +1603,12 @@ coding_allocate_composition_data (coding, char_offset) | |||
| 1180 | coding->cmp_data_start = 0; | 1603 | coding->cmp_data_start = 0; |
| 1181 | } | 1604 | } |
| 1182 | 1605 | ||
| 1183 | /* Record the starting position START and METHOD of one composition. */ | 1606 | /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4. |
| 1184 | 1607 | ESC 0 : relative composition : ESC 0 CHAR ... ESC 1 | |
| 1185 | #define CODING_ADD_COMPOSITION_START(coding, start, method) \ | 1608 | ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1 |
| 1186 | do { \ | 1609 | ESC 3 : altchar composition : ESC 3 ALT ... ESC 0 CHAR ... ESC 1 |
| 1187 | struct composition_data *cmp_data = coding->cmp_data; \ | 1610 | ESC 4 : alt&rule composition : ESC 4 ALT RULE .. ALT ESC 0 CHAR ... ESC 1 |
| 1188 | int *data = cmp_data->data + cmp_data->used; \ | 1611 | */ |
| 1189 | coding->cmp_data_start = cmp_data->used; \ | ||
| 1190 | data[0] = -1; \ | ||
| 1191 | data[1] = cmp_data->char_offset + start; \ | ||
| 1192 | data[3] = (int) method; \ | ||
| 1193 | cmp_data->used += 4; \ | ||
| 1194 | } while (0) | ||
| 1195 | |||
| 1196 | /* Record the ending position END of the current composition. */ | ||
| 1197 | |||
| 1198 | #define CODING_ADD_COMPOSITION_END(coding, end) \ | ||
| 1199 | do { \ | ||
| 1200 | struct composition_data *cmp_data = coding->cmp_data; \ | ||
| 1201 | int *data = cmp_data->data + coding->cmp_data_start; \ | ||
| 1202 | data[0] = cmp_data->used - coding->cmp_data_start; \ | ||
| 1203 | data[2] = cmp_data->char_offset + end; \ | ||
| 1204 | } while (0) | ||
| 1205 | |||
| 1206 | /* Record one COMPONENT (alternate character or composition rule). */ | ||
| 1207 | |||
| 1208 | #define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \ | ||
| 1209 | (coding->cmp_data->data[coding->cmp_data->used++] = component) | ||
| 1210 | |||
| 1211 | /* Handle compositoin start sequence ESC 0, ESC 2, ESC 3, or ESC 4. */ | ||
| 1212 | 1612 | ||
| 1213 | #define DECODE_COMPOSITION_START(c1) \ | 1613 | #define DECODE_COMPOSITION_START(c1) \ |
| 1214 | do { \ | 1614 | do { \ |
| @@ -3088,6 +3488,9 @@ setup_coding_system (coding_system, coding) | |||
| 3088 | { | 3488 | { |
| 3089 | case 0: | 3489 | case 0: |
| 3090 | coding->type = coding_type_emacs_mule; | 3490 | coding->type = coding_type_emacs_mule; |
| 3491 | coding->common_flags | ||
| 3492 | |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK; | ||
| 3493 | coding->composing = COMPOSITION_NO; | ||
| 3091 | if (!NILP (coding->post_read_conversion)) | 3494 | if (!NILP (coding->post_read_conversion)) |
| 3092 | coding->common_flags |= CODING_REQUIRE_DECODING_MASK; | 3495 | coding->common_flags |= CODING_REQUIRE_DECODING_MASK; |
| 3093 | if (!NILP (coding->pre_write_conversion)) | 3496 | if (!NILP (coding->pre_write_conversion)) |