diff options
| author | Kenichi Handa | 1997-08-28 10:51:12 +0000 |
|---|---|---|
| committer | Kenichi Handa | 1997-08-28 10:51:12 +0000 |
| commit | 27901516b62b78a4b52c7787c7c115c8d00368d8 (patch) | |
| tree | c0da83fc214ed75b914c84eaa20826f3d4ca7297 /src/coding.c | |
| parent | e80de6b18fbc4102b28a86d0169c6f06ce12e9da (diff) | |
| download | emacs-27901516b62b78a4b52c7787c7c115c8d00368d8.tar.gz emacs-27901516b62b78a4b52c7787c7c115c8d00368d8.zip | |
(Qno_conversion, Qundecided): New variables.
(syms_of_coding): Initialize and staticpro them.
(coding_category_name): Include "coding-category-raw-test".
(setup_coding_system): Handle coding_type_raw_text.
(detect_coding_mask): Include CODING_CATEGORY_MASK_RAW_TEXT in the
return value instead of CODING_CATEGORY_MASK_BINARY.
(detect_coding): Do not check the case that `mask' is 0, which
never happens now.
(detect_eol_type): If EOL format is inconsistent, return
CODING_EOL_INCONSISTENT.
(detect_eol): If EOL format of raw-text file is inconsistent,
detect it as no-conversion.
(decode_coding): Handle coding_type_raw_text.
(encode_coding): Likewise.
(Fdetect_coding_region): Ajusted for the above changes.
(shrink_conversion_area): Handle coding_type_raw_text.
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 129 |
1 files changed, 87 insertions, 42 deletions
diff --git a/src/coding.c b/src/coding.c index 1e0e992ea35..66fbe517215 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -67,7 +67,13 @@ Boston, MA 02111-1307, USA. */ | |||
| 67 | (all uppercase), we mean the coding system, and when we write | 67 | (all uppercase), we mean the coding system, and when we write |
| 68 | "Big5" (capitalized), we mean the character set. | 68 | "Big5" (capitalized), we mean the character set. |
| 69 | 69 | ||
| 70 | 4. Other | 70 | 4. Raw text |
| 71 | |||
| 72 | A coding system to for a text containing random 8-bit code. Emacs | ||
| 73 | does no code conversion on such a text except for end-of-line | ||
| 74 | format. | ||
| 75 | |||
| 76 | 5. Other | ||
| 71 | 77 | ||
| 72 | If a user wants to read/write a text encoded in a coding system not | 78 | If a user wants to read/write a text encoded in a coding system not |
| 73 | listed above, he can supply a decoder and an encoder for it in CCL | 79 | listed above, he can supply a decoder and an encoder for it in CCL |
| @@ -246,6 +252,7 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed) | |||
| 246 | Lisp_Object Qcoding_system, Qeol_type; | 252 | Lisp_Object Qcoding_system, Qeol_type; |
| 247 | Lisp_Object Qbuffer_file_coding_system; | 253 | Lisp_Object Qbuffer_file_coding_system; |
| 248 | Lisp_Object Qpost_read_conversion, Qpre_write_conversion; | 254 | Lisp_Object Qpost_read_conversion, Qpre_write_conversion; |
| 255 | Lisp_Object Qno_conversion, Qundecided; | ||
| 249 | 256 | ||
| 250 | extern Lisp_Object Qinsert_file_contents, Qwrite_region; | 257 | extern Lisp_Object Qinsert_file_contents, Qwrite_region; |
| 251 | Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument; | 258 | Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument; |
| @@ -319,6 +326,7 @@ char *coding_category_name[CODING_CATEGORY_IDX_MAX] = { | |||
| 319 | "coding-category-iso-7-else", | 326 | "coding-category-iso-7-else", |
| 320 | "coding-category-iso-8-else", | 327 | "coding-category-iso-8-else", |
| 321 | "coding-category-big5", | 328 | "coding-category-big5", |
| 329 | "coding-category-raw-text", | ||
| 322 | "coding-category-binary" | 330 | "coding-category-binary" |
| 323 | }; | 331 | }; |
| 324 | 332 | ||
| @@ -2546,6 +2554,10 @@ setup_coding_system (coding_system, coding) | |||
| 2546 | coding->require_flushing = 1; | 2554 | coding->require_flushing = 1; |
| 2547 | break; | 2555 | break; |
| 2548 | 2556 | ||
| 2557 | case 5: | ||
| 2558 | coding->type = coding_type_raw_text; | ||
| 2559 | break; | ||
| 2560 | |||
| 2549 | default: | 2561 | default: |
| 2550 | if (EQ (type, Qt)) | 2562 | if (EQ (type, Qt)) |
| 2551 | coding->type = coding_type_undecided; | 2563 | coding->type = coding_type_undecided; |
| @@ -2687,7 +2699,7 @@ detect_coding_mask (src, src_bytes) | |||
| 2687 | /* If C is a special latin extra code, | 2699 | /* If C is a special latin extra code, |
| 2688 | or is an ISO2022 specific control code of C1 (SS2 or SS3), | 2700 | or is an ISO2022 specific control code of C1 (SS2 or SS3), |
| 2689 | or is an ISO2022 control-sequence-introducer (CSI), | 2701 | or is an ISO2022 control-sequence-introducer (CSI), |
| 2690 | we should also consider the possibility of someof ISO2022 codings. */ | 2702 | we should also consider the possibility of ISO2022 codings. */ |
| 2691 | if ((VECTORP (Vlatin_extra_code_table) | 2703 | if ((VECTORP (Vlatin_extra_code_table) |
| 2692 | && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) | 2704 | && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) |
| 2693 | || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3) | 2705 | || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3) |
| @@ -2700,14 +2712,14 @@ detect_coding_mask (src, src_bytes) | |||
| 2700 | mask = (detect_coding_iso2022 (src, src_end) | 2712 | mask = (detect_coding_iso2022 (src, src_end) |
| 2701 | | detect_coding_sjis (src, src_end) | 2713 | | detect_coding_sjis (src, src_end) |
| 2702 | | detect_coding_emacs_mule (src, src_end) | 2714 | | detect_coding_emacs_mule (src, src_end) |
| 2703 | | CODING_CATEGORY_MASK_BINARY); | 2715 | | CODING_CATEGORY_MASK_RAW_TEXT); |
| 2704 | 2716 | ||
| 2705 | else | 2717 | else |
| 2706 | /* C is the first byte of SJIS character code, or a | 2718 | /* C is the first byte of SJIS character code, |
| 2707 | leading-code of Emacs. */ | 2719 | or a leading-code of Emacs' internal format (emacs-mule). */ |
| 2708 | mask = (detect_coding_sjis (src, src_end) | 2720 | mask = (detect_coding_sjis (src, src_end) |
| 2709 | | detect_coding_emacs_mule (src, src_end) | 2721 | | detect_coding_emacs_mule (src, src_end) |
| 2710 | | CODING_CATEGORY_MASK_BINARY); | 2722 | | CODING_CATEGORY_MASK_RAW_TEXT); |
| 2711 | } | 2723 | } |
| 2712 | else | 2724 | else |
| 2713 | /* C is a character of ISO2022 in graphic plane right, | 2725 | /* C is a character of ISO2022 in graphic plane right, |
| @@ -2716,7 +2728,7 @@ detect_coding_mask (src, src_bytes) | |||
| 2716 | mask = (detect_coding_iso2022 (src, src_end) | 2728 | mask = (detect_coding_iso2022 (src, src_end) |
| 2717 | | detect_coding_sjis (src, src_end) | 2729 | | detect_coding_sjis (src, src_end) |
| 2718 | | detect_coding_big5 (src, src_end) | 2730 | | detect_coding_big5 (src, src_end) |
| 2719 | | CODING_CATEGORY_MASK_BINARY); | 2731 | | CODING_CATEGORY_MASK_RAW_TEXT); |
| 2720 | 2732 | ||
| 2721 | return mask; | 2733 | return mask; |
| 2722 | } | 2734 | } |
| @@ -2732,42 +2744,33 @@ detect_coding (coding, src, src_bytes) | |||
| 2732 | { | 2744 | { |
| 2733 | int mask = detect_coding_mask (src, src_bytes); | 2745 | int mask = detect_coding_mask (src, src_bytes); |
| 2734 | int idx; | 2746 | int idx; |
| 2747 | Lisp_Object val = Vcoding_category_list; | ||
| 2735 | 2748 | ||
| 2736 | if (mask == CODING_CATEGORY_MASK_ANY) | 2749 | if (mask == CODING_CATEGORY_MASK_ANY) |
| 2737 | /* We found nothing other than ASCII. There's nothing to do. */ | 2750 | /* We found nothing other than ASCII. There's nothing to do. */ |
| 2738 | return; | 2751 | return; |
| 2739 | 2752 | ||
| 2740 | if (!mask) | 2753 | /* We found some plausible coding systems. Let's use a coding |
| 2741 | /* The source text seems to be encoded in unknown coding system. | 2754 | system of the highest priority. */ |
| 2742 | Emacs regards the category of such a kind of coding system as | ||
| 2743 | `coding-category-binary'. We assume that a user has assigned | ||
| 2744 | an appropriate coding system for a `coding-category-binary'. */ | ||
| 2745 | idx = CODING_CATEGORY_IDX_BINARY; | ||
| 2746 | else | ||
| 2747 | { | ||
| 2748 | /* We found some plausible coding systems. Let's use a coding | ||
| 2749 | system of the highest priority. */ | ||
| 2750 | Lisp_Object val = Vcoding_category_list; | ||
| 2751 | 2755 | ||
| 2752 | if (CONSP (val)) | 2756 | if (CONSP (val)) |
| 2753 | while (!NILP (val)) | 2757 | while (!NILP (val)) |
| 2754 | { | 2758 | { |
| 2755 | idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index)); | 2759 | idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index)); |
| 2756 | if ((idx < CODING_CATEGORY_IDX_MAX) && (mask & (1 << idx))) | 2760 | if ((idx < CODING_CATEGORY_IDX_MAX) && (mask & (1 << idx))) |
| 2757 | break; | 2761 | break; |
| 2758 | val = XCONS (val)->cdr; | 2762 | val = XCONS (val)->cdr; |
| 2759 | } | 2763 | } |
| 2760 | else | 2764 | else |
| 2761 | val = Qnil; | 2765 | val = Qnil; |
| 2762 | 2766 | ||
| 2763 | if (NILP (val)) | 2767 | if (NILP (val)) |
| 2764 | { | 2768 | { |
| 2765 | /* For unknown reason, `Vcoding_category_list' contains none | 2769 | /* For unknown reason, `Vcoding_category_list' contains none of |
| 2766 | of found categories. Let's use any of them. */ | 2770 | found categories. Let's use any of them. */ |
| 2767 | for (idx = 0; idx < CODING_CATEGORY_IDX_MAX; idx++) | 2771 | for (idx = 0; idx < CODING_CATEGORY_IDX_MAX; idx++) |
| 2768 | if (mask & (1 << idx)) | 2772 | if (mask & (1 << idx)) |
| 2769 | break; | 2773 | break; |
| 2770 | } | ||
| 2771 | } | 2774 | } |
| 2772 | setup_coding_system (XSYMBOL (coding_category_table[idx])->value, coding); | 2775 | setup_coding_system (XSYMBOL (coding_category_table[idx])->value, coding); |
| 2773 | } | 2776 | } |
| @@ -2807,8 +2810,8 @@ detect_eol_type (src, src_bytes) | |||
| 2807 | eol_type = this_eol_type; | 2810 | eol_type = this_eol_type; |
| 2808 | else if (eol_type != this_eol_type) | 2811 | else if (eol_type != this_eol_type) |
| 2809 | /* The found type is different from what found before. | 2812 | /* The found type is different from what found before. |
| 2810 | We had better not decode end-of-line. */ | 2813 | Let's notice the caller about this inconsistency. */ |
| 2811 | return CODING_EOL_LF; | 2814 | return CODING_EOL_INCONSISTENT; |
| 2812 | } | 2815 | } |
| 2813 | } | 2816 | } |
| 2814 | 2817 | ||
| @@ -2832,6 +2835,24 @@ detect_eol (coding, src, src_bytes) | |||
| 2832 | /* We found no end-of-line in the source text. */ | 2835 | /* We found no end-of-line in the source text. */ |
| 2833 | return; | 2836 | return; |
| 2834 | 2837 | ||
| 2838 | if (eol_type == CODING_EOL_INCONSISTENT) | ||
| 2839 | { | ||
| 2840 | #if 0 | ||
| 2841 | /* This code is suppressed until we find a better way to | ||
| 2842 | distinguish raw-text and binary. */ | ||
| 2843 | |||
| 2844 | /* If we have already detected that the coding is raw-text, the | ||
| 2845 | coding should actually be no-conversion. */ | ||
| 2846 | if (coding->type == coding_type_raw_text) | ||
| 2847 | { | ||
| 2848 | setup_coding_system (Qno_conversion, coding); | ||
| 2849 | return; | ||
| 2850 | } | ||
| 2851 | /* Else, let's decode only text code anyway. */ | ||
| 2852 | #endif /* 0 */ | ||
| 2853 | eol_type == CODING_EOL_LF; | ||
| 2854 | } | ||
| 2855 | |||
| 2835 | coding_system = coding->symbol; | 2856 | coding_system = coding->symbol; |
| 2836 | while (!NILP (coding_system) | 2857 | while (!NILP (coding_system) |
| 2837 | && NILP (val = Fget (coding_system, Qeol_type))) | 2858 | && NILP (val = Fget (coding_system, Qeol_type))) |
| @@ -2877,6 +2898,7 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes, consumed) | |||
| 2877 | 2898 | ||
| 2878 | case coding_type_emacs_mule: | 2899 | case coding_type_emacs_mule: |
| 2879 | case coding_type_undecided: | 2900 | case coding_type_undecided: |
| 2901 | case coding_type_raw_text: | ||
| 2880 | if (coding->eol_type == CODING_EOL_LF | 2902 | if (coding->eol_type == CODING_EOL_LF |
| 2881 | || coding->eol_type == CODING_EOL_UNDECIDED) | 2903 | || coding->eol_type == CODING_EOL_UNDECIDED) |
| 2882 | goto label_no_conversion; | 2904 | goto label_no_conversion; |
| @@ -2941,6 +2963,7 @@ encode_coding (coding, source, destination, src_bytes, dst_bytes, consumed) | |||
| 2941 | 2963 | ||
| 2942 | case coding_type_emacs_mule: | 2964 | case coding_type_emacs_mule: |
| 2943 | case coding_type_undecided: | 2965 | case coding_type_undecided: |
| 2966 | case coding_type_raw_text: | ||
| 2944 | if (coding->eol_type == CODING_EOL_LF | 2967 | if (coding->eol_type == CODING_EOL_LF |
| 2945 | || coding->eol_type == CODING_EOL_UNDECIDED) | 2968 | || coding->eol_type == CODING_EOL_UNDECIDED) |
| 2946 | goto label_no_conversion; | 2969 | goto label_no_conversion; |
| @@ -3133,10 +3156,11 @@ If only ASCII characters are found, it returns `undecided'\n\ | |||
| 3133 | 3156 | ||
| 3134 | if (coding_mask == CODING_CATEGORY_MASK_ANY) | 3157 | if (coding_mask == CODING_CATEGORY_MASK_ANY) |
| 3135 | { | 3158 | { |
| 3136 | val = intern ("undecided"); | 3159 | val = Qundecided; |
| 3137 | if (eol_type != CODING_EOL_UNDECIDED) | 3160 | if (eol_type != CODING_EOL_UNDECIDED |
| 3161 | && eol_type != CODING_EOL_INCONSISTENT) | ||
| 3138 | { | 3162 | { |
| 3139 | Lisp_Object val2 = Fget (val, Qeol_type); | 3163 | Lisp_Object val2 = Fget (Qundecided, Qeol_type); |
| 3140 | if (VECTORP (val2)) | 3164 | if (VECTORP (val2)) |
| 3141 | val = XVECTOR (val2)->contents[eol_type]; | 3165 | val = XVECTOR (val2)->contents[eol_type]; |
| 3142 | } | 3166 | } |
| @@ -3155,13 +3179,26 @@ If only ASCII characters are found, it returns `undecided'\n\ | |||
| 3155 | int idx | 3179 | int idx |
| 3156 | = XFASTINT (Fget (XCONS (val2)->car, Qcoding_category_index)); | 3180 | = XFASTINT (Fget (XCONS (val2)->car, Qcoding_category_index)); |
| 3157 | if (coding_mask & (1 << idx)) | 3181 | if (coding_mask & (1 << idx)) |
| 3158 | val = Fcons (Fsymbol_value (XCONS (val2)->car), val); | 3182 | { |
| 3183 | #if 0 | ||
| 3184 | /* This code is suppressed until we find a better way to | ||
| 3185 | distinguish raw-text and binary. */ | ||
| 3186 | |||
| 3187 | if (idx == CODING_CATEGORY_IDX_RAW_TEXT | ||
| 3188 | && eol_type == CODING_EOL_INCONSISTENT) | ||
| 3189 | val = Fcons (Qno_conversion, val); | ||
| 3190 | else | ||
| 3191 | #endif /* 0 */ | ||
| 3192 | val = Fcons (Fsymbol_value (XCONS (val2)->car), val); | ||
| 3193 | } | ||
| 3159 | } | 3194 | } |
| 3160 | 3195 | ||
| 3161 | /* Then, change the order of the list, while getting subsidiary | 3196 | /* Then, change the order of the list, while getting subsidiary |
| 3162 | coding-systems. */ | 3197 | coding-systems. */ |
| 3163 | val2 = val; | 3198 | val2 = val; |
| 3164 | val = Qnil; | 3199 | val = Qnil; |
| 3200 | if (eol_type == CODING_EOL_INCONSISTENT) | ||
| 3201 | eol_type == CODING_EOL_UNDECIDED; | ||
| 3165 | for (; !NILP (val2); val2 = XCONS (val2)->cdr) | 3202 | for (; !NILP (val2); val2 = XCONS (val2)->cdr) |
| 3166 | { | 3203 | { |
| 3167 | if (eol_type == CODING_EOL_UNDECIDED) | 3204 | if (eol_type == CODING_EOL_UNDECIDED) |
| @@ -3206,6 +3243,7 @@ shrink_conversion_area (begp, endp, coding, encodep) | |||
| 3206 | case coding_type_no_conversion: | 3243 | case coding_type_no_conversion: |
| 3207 | case coding_type_emacs_mule: | 3244 | case coding_type_emacs_mule: |
| 3208 | case coding_type_undecided: | 3245 | case coding_type_undecided: |
| 3246 | case coding_type_raw_text: | ||
| 3209 | /* We need no conversion. */ | 3247 | /* We need no conversion. */ |
| 3210 | *begp = *endp; | 3248 | *begp = *endp; |
| 3211 | return; | 3249 | return; |
| @@ -3243,6 +3281,7 @@ shrink_conversion_area (begp, endp, coding, encodep) | |||
| 3243 | *begp = *endp; | 3281 | *begp = *endp; |
| 3244 | return; | 3282 | return; |
| 3245 | case coding_type_emacs_mule: | 3283 | case coding_type_emacs_mule: |
| 3284 | case coding_type_raw_text: | ||
| 3246 | if (coding->eol_type == CODING_EOL_LF) | 3285 | if (coding->eol_type == CODING_EOL_LF) |
| 3247 | { | 3286 | { |
| 3248 | /* We need no conversion. */ | 3287 | /* We need no conversion. */ |
| @@ -3857,6 +3896,12 @@ syms_of_coding () | |||
| 3857 | Qpre_write_conversion = intern ("pre-write-conversion"); | 3896 | Qpre_write_conversion = intern ("pre-write-conversion"); |
| 3858 | staticpro (&Qpre_write_conversion); | 3897 | staticpro (&Qpre_write_conversion); |
| 3859 | 3898 | ||
| 3899 | Qno_conversion = intern ("no-conversion"); | ||
| 3900 | staticpro (&Qno_conversion); | ||
| 3901 | |||
| 3902 | Qundecided = intern ("undecided"); | ||
| 3903 | staticpro (&Qundecided); | ||
| 3904 | |||
| 3860 | Qcoding_system_spec = intern ("coding-system-spec"); | 3905 | Qcoding_system_spec = intern ("coding-system-spec"); |
| 3861 | staticpro (&Qcoding_system_spec); | 3906 | staticpro (&Qcoding_system_spec); |
| 3862 | 3907 | ||