aboutsummaryrefslogtreecommitdiffstats
path: root/src/coding.c
diff options
context:
space:
mode:
authorKenichi Handa1997-08-28 10:51:12 +0000
committerKenichi Handa1997-08-28 10:51:12 +0000
commit27901516b62b78a4b52c7787c7c115c8d00368d8 (patch)
treec0da83fc214ed75b914c84eaa20826f3d4ca7297 /src/coding.c
parente80de6b18fbc4102b28a86d0169c6f06ce12e9da (diff)
downloademacs-27901516b62b78a4b52c7787c7c115c8d00368d8.tar.gz
emacs-27901516b62b78a4b52c7787c7c115c8d00368d8.zip
(Qno_conversion, Qundecided): New variables.
(syms_of_coding): Initialize and staticpro them. (coding_category_name): Include "coding-category-raw-test". (setup_coding_system): Handle coding_type_raw_text. (detect_coding_mask): Include CODING_CATEGORY_MASK_RAW_TEXT in the return value instead of CODING_CATEGORY_MASK_BINARY. (detect_coding): Do not check the case that `mask' is 0, which never happens now. (detect_eol_type): If EOL format is inconsistent, return CODING_EOL_INCONSISTENT. (detect_eol): If EOL format of raw-text file is inconsistent, detect it as no-conversion. (decode_coding): Handle coding_type_raw_text. (encode_coding): Likewise. (Fdetect_coding_region): Ajusted for the above changes. (shrink_conversion_area): Handle coding_type_raw_text.
Diffstat (limited to 'src/coding.c')
-rw-r--r--src/coding.c129
1 files changed, 87 insertions, 42 deletions
diff --git a/src/coding.c b/src/coding.c
index 1e0e992ea35..66fbe517215 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -67,7 +67,13 @@ Boston, MA 02111-1307, USA. */
67 (all uppercase), we mean the coding system, and when we write 67 (all uppercase), we mean the coding system, and when we write
68 "Big5" (capitalized), we mean the character set. 68 "Big5" (capitalized), we mean the character set.
69 69
70 4. Other 70 4. Raw text
71
72 A coding system to for a text containing random 8-bit code. Emacs
73 does no code conversion on such a text except for end-of-line
74 format.
75
76 5. Other
71 77
72 If a user wants to read/write a text encoded in a coding system not 78 If a user wants to read/write a text encoded in a coding system not
73 listed above, he can supply a decoder and an encoder for it in CCL 79 listed above, he can supply a decoder and an encoder for it in CCL
@@ -246,6 +252,7 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed)
246Lisp_Object Qcoding_system, Qeol_type; 252Lisp_Object Qcoding_system, Qeol_type;
247Lisp_Object Qbuffer_file_coding_system; 253Lisp_Object Qbuffer_file_coding_system;
248Lisp_Object Qpost_read_conversion, Qpre_write_conversion; 254Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
255Lisp_Object Qno_conversion, Qundecided;
249 256
250extern Lisp_Object Qinsert_file_contents, Qwrite_region; 257extern Lisp_Object Qinsert_file_contents, Qwrite_region;
251Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument; 258Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
@@ -319,6 +326,7 @@ char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
319 "coding-category-iso-7-else", 326 "coding-category-iso-7-else",
320 "coding-category-iso-8-else", 327 "coding-category-iso-8-else",
321 "coding-category-big5", 328 "coding-category-big5",
329 "coding-category-raw-text",
322 "coding-category-binary" 330 "coding-category-binary"
323}; 331};
324 332
@@ -2546,6 +2554,10 @@ setup_coding_system (coding_system, coding)
2546 coding->require_flushing = 1; 2554 coding->require_flushing = 1;
2547 break; 2555 break;
2548 2556
2557 case 5:
2558 coding->type = coding_type_raw_text;
2559 break;
2560
2549 default: 2561 default:
2550 if (EQ (type, Qt)) 2562 if (EQ (type, Qt))
2551 coding->type = coding_type_undecided; 2563 coding->type = coding_type_undecided;
@@ -2687,7 +2699,7 @@ detect_coding_mask (src, src_bytes)
2687 /* If C is a special latin extra code, 2699 /* If C is a special latin extra code,
2688 or is an ISO2022 specific control code of C1 (SS2 or SS3), 2700 or is an ISO2022 specific control code of C1 (SS2 or SS3),
2689 or is an ISO2022 control-sequence-introducer (CSI), 2701 or is an ISO2022 control-sequence-introducer (CSI),
2690 we should also consider the possibility of someof ISO2022 codings. */ 2702 we should also consider the possibility of ISO2022 codings. */
2691 if ((VECTORP (Vlatin_extra_code_table) 2703 if ((VECTORP (Vlatin_extra_code_table)
2692 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) 2704 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2693 || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3) 2705 || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
@@ -2700,14 +2712,14 @@ detect_coding_mask (src, src_bytes)
2700 mask = (detect_coding_iso2022 (src, src_end) 2712 mask = (detect_coding_iso2022 (src, src_end)
2701 | detect_coding_sjis (src, src_end) 2713 | detect_coding_sjis (src, src_end)
2702 | detect_coding_emacs_mule (src, src_end) 2714 | detect_coding_emacs_mule (src, src_end)
2703 | CODING_CATEGORY_MASK_BINARY); 2715 | CODING_CATEGORY_MASK_RAW_TEXT);
2704 2716
2705 else 2717 else
2706 /* C is the first byte of SJIS character code, or a 2718 /* C is the first byte of SJIS character code,
2707 leading-code of Emacs. */ 2719 or a leading-code of Emacs' internal format (emacs-mule). */
2708 mask = (detect_coding_sjis (src, src_end) 2720 mask = (detect_coding_sjis (src, src_end)
2709 | detect_coding_emacs_mule (src, src_end) 2721 | detect_coding_emacs_mule (src, src_end)
2710 | CODING_CATEGORY_MASK_BINARY); 2722 | CODING_CATEGORY_MASK_RAW_TEXT);
2711 } 2723 }
2712 else 2724 else
2713 /* C is a character of ISO2022 in graphic plane right, 2725 /* C is a character of ISO2022 in graphic plane right,
@@ -2716,7 +2728,7 @@ detect_coding_mask (src, src_bytes)
2716 mask = (detect_coding_iso2022 (src, src_end) 2728 mask = (detect_coding_iso2022 (src, src_end)
2717 | detect_coding_sjis (src, src_end) 2729 | detect_coding_sjis (src, src_end)
2718 | detect_coding_big5 (src, src_end) 2730 | detect_coding_big5 (src, src_end)
2719 | CODING_CATEGORY_MASK_BINARY); 2731 | CODING_CATEGORY_MASK_RAW_TEXT);
2720 2732
2721 return mask; 2733 return mask;
2722} 2734}
@@ -2732,42 +2744,33 @@ detect_coding (coding, src, src_bytes)
2732{ 2744{
2733 int mask = detect_coding_mask (src, src_bytes); 2745 int mask = detect_coding_mask (src, src_bytes);
2734 int idx; 2746 int idx;
2747 Lisp_Object val = Vcoding_category_list;
2735 2748
2736 if (mask == CODING_CATEGORY_MASK_ANY) 2749 if (mask == CODING_CATEGORY_MASK_ANY)
2737 /* We found nothing other than ASCII. There's nothing to do. */ 2750 /* We found nothing other than ASCII. There's nothing to do. */
2738 return; 2751 return;
2739 2752
2740 if (!mask) 2753 /* We found some plausible coding systems. Let's use a coding
2741 /* The source text seems to be encoded in unknown coding system. 2754 system of the highest priority. */
2742 Emacs regards the category of such a kind of coding system as
2743 `coding-category-binary'. We assume that a user has assigned
2744 an appropriate coding system for a `coding-category-binary'. */
2745 idx = CODING_CATEGORY_IDX_BINARY;
2746 else
2747 {
2748 /* We found some plausible coding systems. Let's use a coding
2749 system of the highest priority. */
2750 Lisp_Object val = Vcoding_category_list;
2751 2755
2752 if (CONSP (val)) 2756 if (CONSP (val))
2753 while (!NILP (val)) 2757 while (!NILP (val))
2754 { 2758 {
2755 idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index)); 2759 idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
2756 if ((idx < CODING_CATEGORY_IDX_MAX) && (mask & (1 << idx))) 2760 if ((idx < CODING_CATEGORY_IDX_MAX) && (mask & (1 << idx)))
2757 break; 2761 break;
2758 val = XCONS (val)->cdr; 2762 val = XCONS (val)->cdr;
2759 } 2763 }
2760 else 2764 else
2761 val = Qnil; 2765 val = Qnil;
2762 2766
2763 if (NILP (val)) 2767 if (NILP (val))
2764 { 2768 {
2765 /* For unknown reason, `Vcoding_category_list' contains none 2769 /* For unknown reason, `Vcoding_category_list' contains none of
2766 of found categories. Let's use any of them. */ 2770 found categories. Let's use any of them. */
2767 for (idx = 0; idx < CODING_CATEGORY_IDX_MAX; idx++) 2771 for (idx = 0; idx < CODING_CATEGORY_IDX_MAX; idx++)
2768 if (mask & (1 << idx)) 2772 if (mask & (1 << idx))
2769 break; 2773 break;
2770 }
2771 } 2774 }
2772 setup_coding_system (XSYMBOL (coding_category_table[idx])->value, coding); 2775 setup_coding_system (XSYMBOL (coding_category_table[idx])->value, coding);
2773} 2776}
@@ -2807,8 +2810,8 @@ detect_eol_type (src, src_bytes)
2807 eol_type = this_eol_type; 2810 eol_type = this_eol_type;
2808 else if (eol_type != this_eol_type) 2811 else if (eol_type != this_eol_type)
2809 /* The found type is different from what found before. 2812 /* The found type is different from what found before.
2810 We had better not decode end-of-line. */ 2813 Let's notice the caller about this inconsistency. */
2811 return CODING_EOL_LF; 2814 return CODING_EOL_INCONSISTENT;
2812 } 2815 }
2813 } 2816 }
2814 2817
@@ -2832,6 +2835,24 @@ detect_eol (coding, src, src_bytes)
2832 /* We found no end-of-line in the source text. */ 2835 /* We found no end-of-line in the source text. */
2833 return; 2836 return;
2834 2837
2838 if (eol_type == CODING_EOL_INCONSISTENT)
2839 {
2840#if 0
2841 /* This code is suppressed until we find a better way to
2842 distinguish raw-text and binary. */
2843
2844 /* If we have already detected that the coding is raw-text, the
2845 coding should actually be no-conversion. */
2846 if (coding->type == coding_type_raw_text)
2847 {
2848 setup_coding_system (Qno_conversion, coding);
2849 return;
2850 }
2851 /* Else, let's decode only text code anyway. */
2852#endif /* 0 */
2853 eol_type == CODING_EOL_LF;
2854 }
2855
2835 coding_system = coding->symbol; 2856 coding_system = coding->symbol;
2836 while (!NILP (coding_system) 2857 while (!NILP (coding_system)
2837 && NILP (val = Fget (coding_system, Qeol_type))) 2858 && NILP (val = Fget (coding_system, Qeol_type)))
@@ -2877,6 +2898,7 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes, consumed)
2877 2898
2878 case coding_type_emacs_mule: 2899 case coding_type_emacs_mule:
2879 case coding_type_undecided: 2900 case coding_type_undecided:
2901 case coding_type_raw_text:
2880 if (coding->eol_type == CODING_EOL_LF 2902 if (coding->eol_type == CODING_EOL_LF
2881 || coding->eol_type == CODING_EOL_UNDECIDED) 2903 || coding->eol_type == CODING_EOL_UNDECIDED)
2882 goto label_no_conversion; 2904 goto label_no_conversion;
@@ -2941,6 +2963,7 @@ encode_coding (coding, source, destination, src_bytes, dst_bytes, consumed)
2941 2963
2942 case coding_type_emacs_mule: 2964 case coding_type_emacs_mule:
2943 case coding_type_undecided: 2965 case coding_type_undecided:
2966 case coding_type_raw_text:
2944 if (coding->eol_type == CODING_EOL_LF 2967 if (coding->eol_type == CODING_EOL_LF
2945 || coding->eol_type == CODING_EOL_UNDECIDED) 2968 || coding->eol_type == CODING_EOL_UNDECIDED)
2946 goto label_no_conversion; 2969 goto label_no_conversion;
@@ -3133,10 +3156,11 @@ If only ASCII characters are found, it returns `undecided'\n\
3133 3156
3134 if (coding_mask == CODING_CATEGORY_MASK_ANY) 3157 if (coding_mask == CODING_CATEGORY_MASK_ANY)
3135 { 3158 {
3136 val = intern ("undecided"); 3159 val = Qundecided;
3137 if (eol_type != CODING_EOL_UNDECIDED) 3160 if (eol_type != CODING_EOL_UNDECIDED
3161 && eol_type != CODING_EOL_INCONSISTENT)
3138 { 3162 {
3139 Lisp_Object val2 = Fget (val, Qeol_type); 3163 Lisp_Object val2 = Fget (Qundecided, Qeol_type);
3140 if (VECTORP (val2)) 3164 if (VECTORP (val2))
3141 val = XVECTOR (val2)->contents[eol_type]; 3165 val = XVECTOR (val2)->contents[eol_type];
3142 } 3166 }
@@ -3155,13 +3179,26 @@ If only ASCII characters are found, it returns `undecided'\n\
3155 int idx 3179 int idx
3156 = XFASTINT (Fget (XCONS (val2)->car, Qcoding_category_index)); 3180 = XFASTINT (Fget (XCONS (val2)->car, Qcoding_category_index));
3157 if (coding_mask & (1 << idx)) 3181 if (coding_mask & (1 << idx))
3158 val = Fcons (Fsymbol_value (XCONS (val2)->car), val); 3182 {
3183#if 0
3184 /* This code is suppressed until we find a better way to
3185 distinguish raw-text and binary. */
3186
3187 if (idx == CODING_CATEGORY_IDX_RAW_TEXT
3188 && eol_type == CODING_EOL_INCONSISTENT)
3189 val = Fcons (Qno_conversion, val);
3190 else
3191#endif /* 0 */
3192 val = Fcons (Fsymbol_value (XCONS (val2)->car), val);
3193 }
3159 } 3194 }
3160 3195
3161 /* Then, change the order of the list, while getting subsidiary 3196 /* Then, change the order of the list, while getting subsidiary
3162 coding-systems. */ 3197 coding-systems. */
3163 val2 = val; 3198 val2 = val;
3164 val = Qnil; 3199 val = Qnil;
3200 if (eol_type == CODING_EOL_INCONSISTENT)
3201 eol_type == CODING_EOL_UNDECIDED;
3165 for (; !NILP (val2); val2 = XCONS (val2)->cdr) 3202 for (; !NILP (val2); val2 = XCONS (val2)->cdr)
3166 { 3203 {
3167 if (eol_type == CODING_EOL_UNDECIDED) 3204 if (eol_type == CODING_EOL_UNDECIDED)
@@ -3206,6 +3243,7 @@ shrink_conversion_area (begp, endp, coding, encodep)
3206 case coding_type_no_conversion: 3243 case coding_type_no_conversion:
3207 case coding_type_emacs_mule: 3244 case coding_type_emacs_mule:
3208 case coding_type_undecided: 3245 case coding_type_undecided:
3246 case coding_type_raw_text:
3209 /* We need no conversion. */ 3247 /* We need no conversion. */
3210 *begp = *endp; 3248 *begp = *endp;
3211 return; 3249 return;
@@ -3243,6 +3281,7 @@ shrink_conversion_area (begp, endp, coding, encodep)
3243 *begp = *endp; 3281 *begp = *endp;
3244 return; 3282 return;
3245 case coding_type_emacs_mule: 3283 case coding_type_emacs_mule:
3284 case coding_type_raw_text:
3246 if (coding->eol_type == CODING_EOL_LF) 3285 if (coding->eol_type == CODING_EOL_LF)
3247 { 3286 {
3248 /* We need no conversion. */ 3287 /* We need no conversion. */
@@ -3857,6 +3896,12 @@ syms_of_coding ()
3857 Qpre_write_conversion = intern ("pre-write-conversion"); 3896 Qpre_write_conversion = intern ("pre-write-conversion");
3858 staticpro (&Qpre_write_conversion); 3897 staticpro (&Qpre_write_conversion);
3859 3898
3899 Qno_conversion = intern ("no-conversion");
3900 staticpro (&Qno_conversion);
3901
3902 Qundecided = intern ("undecided");
3903 staticpro (&Qundecided);
3904
3860 Qcoding_system_spec = intern ("coding-system-spec"); 3905 Qcoding_system_spec = intern ("coding-system-spec");
3861 staticpro (&Qcoding_system_spec); 3906 staticpro (&Qcoding_system_spec);
3862 3907