diff options
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 132 |
1 files changed, 108 insertions, 24 deletions
diff --git a/src/coding.c b/src/coding.c index 635b28aa713..1a293fb6a3b 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -25,10 +25,11 @@ Boston, MA 02111-1307, USA. */ | |||
| 25 | 2. Emacs' internal format (emacs-mule) handlers | 25 | 2. Emacs' internal format (emacs-mule) handlers |
| 26 | 3. ISO2022 handlers | 26 | 3. ISO2022 handlers |
| 27 | 4. Shift-JIS and BIG5 handlers | 27 | 4. Shift-JIS and BIG5 handlers |
| 28 | 5. End-of-line handlers | 28 | 5. CCL handlers |
| 29 | 6. C library functions | 29 | 6. End-of-line handlers |
| 30 | 7. Emacs Lisp library functions | 30 | 7. C library functions |
| 31 | 8. Post-amble | 31 | 8. Emacs Lisp library functions |
| 32 | 9. Post-amble | ||
| 32 | 33 | ||
| 33 | */ | 34 | */ |
| 34 | 35 | ||
| @@ -277,6 +278,7 @@ Lisp_Object Qpost_read_conversion, Qpre_write_conversion; | |||
| 277 | Lisp_Object Qno_conversion, Qundecided; | 278 | Lisp_Object Qno_conversion, Qundecided; |
| 278 | Lisp_Object Qcoding_system_history; | 279 | Lisp_Object Qcoding_system_history; |
| 279 | Lisp_Object Qsafe_charsets; | 280 | Lisp_Object Qsafe_charsets; |
| 281 | Lisp_Object Qvalid_codes; | ||
| 280 | 282 | ||
| 281 | extern Lisp_Object Qinsert_file_contents, Qwrite_region; | 283 | extern Lisp_Object Qinsert_file_contents, Qwrite_region; |
| 282 | Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument; | 284 | Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument; |
| @@ -360,7 +362,8 @@ char *coding_category_name[CODING_CATEGORY_IDX_MAX] = { | |||
| 360 | "coding-category-iso-8-else", | 362 | "coding-category-iso-8-else", |
| 361 | "coding-category-big5", | 363 | "coding-category-big5", |
| 362 | "coding-category-raw-text", | 364 | "coding-category-raw-text", |
| 363 | "coding-category-binary" | 365 | "coding-category-binary", |
| 366 | "coding-category-ccl" | ||
| 364 | }; | 367 | }; |
| 365 | 368 | ||
| 366 | /* Table of pointers to coding systems corresponding to each coding | 369 | /* Table of pointers to coding systems corresponding to each coding |
| @@ -2451,7 +2454,34 @@ encode_coding_sjis_big5 (coding, source, destination, | |||
| 2451 | } | 2454 | } |
| 2452 | 2455 | ||
| 2453 | 2456 | ||
| 2454 | /*** 5. End-of-line handlers ***/ | 2457 | /*** 5. CCL handlers ***/ |
| 2458 | |||
| 2459 | /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | ||
| 2460 | Check if a text is encoded in a coding system of which | ||
| 2461 | encoder/decoder are written in CCL program. If it is, return | ||
| 2462 | CODING_CATEGORY_MASK_CCL, else return 0. */ | ||
| 2463 | |||
| 2464 | int | ||
| 2465 | detect_coding_ccl (src, src_end) | ||
| 2466 | unsigned char *src, *src_end; | ||
| 2467 | { | ||
| 2468 | unsigned char *valid; | ||
| 2469 | |||
| 2470 | /* No coding system is assigned to coding-category-ccl. */ | ||
| 2471 | if (!coding_system_table[CODING_CATEGORY_IDX_CCL]) | ||
| 2472 | return 0; | ||
| 2473 | |||
| 2474 | valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes; | ||
| 2475 | while (src < src_end) | ||
| 2476 | { | ||
| 2477 | if (! valid[*src]) return 0; | ||
| 2478 | src++; | ||
| 2479 | } | ||
| 2480 | return CODING_CATEGORY_MASK_CCL; | ||
| 2481 | } | ||
| 2482 | |||
| 2483 | |||
| 2484 | /*** 6. End-of-line handlers ***/ | ||
| 2455 | 2485 | ||
| 2456 | /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". | 2486 | /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". |
| 2457 | This function is called only when `coding->eol_type' is | 2487 | This function is called only when `coding->eol_type' is |
| @@ -2671,7 +2701,7 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes) | |||
| 2671 | } | 2701 | } |
| 2672 | 2702 | ||
| 2673 | 2703 | ||
| 2674 | /*** 6. C library functions ***/ | 2704 | /*** 7. C library functions ***/ |
| 2675 | 2705 | ||
| 2676 | /* In Emacs Lisp, coding system is represented by a Lisp symbol which | 2706 | /* In Emacs Lisp, coding system is represented by a Lisp symbol which |
| 2677 | has a property `coding-system'. The value of this property is a | 2707 | has a property `coding-system'. The value of this property is a |
| @@ -3043,6 +3073,31 @@ setup_coding_system (coding_system, coding) | |||
| 3043 | } | 3073 | } |
| 3044 | else | 3074 | else |
| 3045 | goto label_invalid_coding_system; | 3075 | goto label_invalid_coding_system; |
| 3076 | |||
| 3077 | bzero (coding->spec.ccl.valid_codes, 256); | ||
| 3078 | val = Fplist_get (plist, Qvalid_codes); | ||
| 3079 | if (CONSP (val)) | ||
| 3080 | { | ||
| 3081 | Lisp_Object this; | ||
| 3082 | |||
| 3083 | for (this = XCONS (val)->car; CONSP (val); val = XCONS (val)->cdr) | ||
| 3084 | { | ||
| 3085 | if (INTEGERP (this) | ||
| 3086 | && XINT (this) >= 0 && XINT (this) < 256) | ||
| 3087 | coding->spec.ccl.valid_codes[XINT (this)] = 1; | ||
| 3088 | else if (CONSP (this) | ||
| 3089 | && INTEGERP (XCONS (this)->car) | ||
| 3090 | && INTEGERP (XCONS (this)->cdr)) | ||
| 3091 | { | ||
| 3092 | int start = XINT (XCONS (this)->car); | ||
| 3093 | int end = XINT (XCONS (this)->cdr); | ||
| 3094 | |||
| 3095 | if (start >= 0 && start <= end && end < 256) | ||
| 3096 | while (start < end) | ||
| 3097 | coding->spec.ccl.valid_codes[start++] = 1; | ||
| 3098 | } | ||
| 3099 | } | ||
| 3100 | } | ||
| 3046 | } | 3101 | } |
| 3047 | coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK; | 3102 | coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK; |
| 3048 | break; | 3103 | break; |
| @@ -3158,6 +3213,12 @@ setup_raw_text_coding_system (coding) | |||
| 3158 | as BIG5. Assigned the coding-system (Lisp symbol) | 3213 | as BIG5. Assigned the coding-system (Lisp symbol) |
| 3159 | `cn-big5' by default. | 3214 | `cn-big5' by default. |
| 3160 | 3215 | ||
| 3216 | o coding-category-ccl | ||
| 3217 | |||
| 3218 | The category for a coding system of which encoder/decoder is | ||
| 3219 | written in CCL programs. The default value is nil, i.e., no | ||
| 3220 | coding system is assigned. | ||
| 3221 | |||
| 3161 | o coding-category-binary | 3222 | o coding-category-binary |
| 3162 | 3223 | ||
| 3163 | The category for a coding system not categorized in any of the | 3224 | The category for a coding system not categorized in any of the |
| @@ -3264,6 +3325,12 @@ detect_coding_mask (source, src_bytes, priorities, skip) | |||
| 3264 | | CODING_CATEGORY_MASK_SJIS | 3325 | | CODING_CATEGORY_MASK_SJIS |
| 3265 | | CODING_CATEGORY_MASK_BIG5); | 3326 | | CODING_CATEGORY_MASK_BIG5); |
| 3266 | 3327 | ||
| 3328 | /* Or, we may have to consider the possibility of CCL. */ | ||
| 3329 | if (coding_system_table[CODING_CATEGORY_IDX_CCL] | ||
| 3330 | && (coding_system_table[CODING_CATEGORY_IDX_CCL] | ||
| 3331 | ->spec.ccl.valid_codes)[c]) | ||
| 3332 | try |= CODING_CATEGORY_MASK_CCL; | ||
| 3333 | |||
| 3267 | mask = 0; | 3334 | mask = 0; |
| 3268 | if (priorities) | 3335 | if (priorities) |
| 3269 | { | 3336 | { |
| @@ -3277,6 +3344,8 @@ detect_coding_mask (source, src_bytes, priorities, skip) | |||
| 3277 | mask = detect_coding_big5 (src, src_end); | 3344 | mask = detect_coding_big5 (src, src_end); |
| 3278 | else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE) | 3345 | else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE) |
| 3279 | mask = detect_coding_emacs_mule (src, src_end); | 3346 | mask = detect_coding_emacs_mule (src, src_end); |
| 3347 | else if (priorities[i] & CODING_CATEGORY_MASK_CCL) | ||
| 3348 | mask = detect_coding_ccl (src, src_end); | ||
| 3280 | else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT) | 3349 | else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT) |
| 3281 | mask = CODING_CATEGORY_MASK_RAW_TEXT; | 3350 | mask = CODING_CATEGORY_MASK_RAW_TEXT; |
| 3282 | else if (priorities[i] & CODING_CATEGORY_MASK_BINARY) | 3351 | else if (priorities[i] & CODING_CATEGORY_MASK_BINARY) |
| @@ -3293,7 +3362,9 @@ detect_coding_mask (source, src_bytes, priorities, skip) | |||
| 3293 | if (try & CODING_CATEGORY_MASK_BIG5) | 3362 | if (try & CODING_CATEGORY_MASK_BIG5) |
| 3294 | mask |= detect_coding_big5 (src, src_end); | 3363 | mask |= detect_coding_big5 (src, src_end); |
| 3295 | if (try & CODING_CATEGORY_MASK_EMACS_MULE) | 3364 | if (try & CODING_CATEGORY_MASK_EMACS_MULE) |
| 3296 | mask |= detect_coding_emacs_mule (src, src_end); | 3365 | mask |= detect_coding_emacs_mule (src, src_end); |
| 3366 | if (try & CODING_CATEGORY_MASK_CCL) | ||
| 3367 | mask |= detect_coding_ccl (src, src_end); | ||
| 3297 | } | 3368 | } |
| 3298 | return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY); | 3369 | return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY); |
| 3299 | 3370 | ||
| @@ -4445,7 +4516,7 @@ code_convert_string (str, coding, encodep, nocopy) | |||
| 4445 | 4516 | ||
| 4446 | 4517 | ||
| 4447 | #ifdef emacs | 4518 | #ifdef emacs |
| 4448 | /*** 7. Emacs Lisp library functions ***/ | 4519 | /*** 8. Emacs Lisp library functions ***/ |
| 4449 | 4520 | ||
| 4450 | DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0, | 4521 | DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0, |
| 4451 | "Return t if OBJECT is nil or a coding-system.\n\ | 4522 | "Return t if OBJECT is nil or a coding-system.\n\ |
| @@ -4979,28 +5050,38 @@ which is a list of all the arguments given to this function.") | |||
| 4979 | return Qnil; | 5050 | return Qnil; |
| 4980 | } | 5051 | } |
| 4981 | 5052 | ||
| 4982 | DEFUN ("update-iso-coding-systems", Fupdate_iso_coding_systems, | 5053 | DEFUN ("update-coding-systems-internal", Fupdate_coding_systems_internal, |
| 4983 | Supdate_iso_coding_systems, 0, 0, 0, | 5054 | Supdate_coding_systems_internal, 0, 0, 0, |
| 4984 | "Update internal database for ISO2022 based coding systems.\n\ | 5055 | "Update internal database for ISO2022 and CCL based coding systems.\n\ |
| 4985 | When values of the following coding categories are changed, you must\n\ | 5056 | When values of the following coding categories are changed, you must\n\ |
| 4986 | call this function:\n\ | 5057 | call this function:\n\ |
| 4987 | coding-category-iso-7, coding-category-iso-7-tight,\n\ | 5058 | coding-category-iso-7, coding-category-iso-7-tight,\n\ |
| 4988 | coding-category-iso-8-1, coding-category-iso-8-2,\n\ | 5059 | coding-category-iso-8-1, coding-category-iso-8-2,\n\ |
| 4989 | coding-category-iso-7-else, coding-category-iso-8-else") | 5060 | coding-category-iso-7-else, coding-category-iso-8-else,\n\ |
| 5061 | coding-category-ccl") | ||
| 4990 | () | 5062 | () |
| 4991 | { | 5063 | { |
| 4992 | int i; | 5064 | int i; |
| 4993 | 5065 | ||
| 4994 | for (i = CODING_CATEGORY_IDX_ISO_7; i <= CODING_CATEGORY_IDX_ISO_8_ELSE; | 5066 | for (i = CODING_CATEGORY_IDX_ISO_7; i <= CODING_CATEGORY_IDX_CCL; i++) |
| 4995 | i++) | ||
| 4996 | { | 5067 | { |
| 4997 | if (! coding_system_table[i]) | 5068 | Lisp_Object val; |
| 4998 | coding_system_table[i] | 5069 | |
| 4999 | = (struct coding_system *) xmalloc (sizeof (struct coding_system)); | 5070 | val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value; |
| 5000 | setup_coding_system | 5071 | if (!NILP (val)) |
| 5001 | (XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value, | 5072 | { |
| 5002 | coding_system_table[i]); | 5073 | if (! coding_system_table[i]) |
| 5074 | coding_system_table[i] = ((struct coding_system *) | ||
| 5075 | xmalloc (sizeof (struct coding_system))); | ||
| 5076 | setup_coding_system (val, coding_system_table[i]); | ||
| 5077 | } | ||
| 5078 | else if (coding_system_table[i]) | ||
| 5079 | { | ||
| 5080 | xfree (coding_system_table[i]); | ||
| 5081 | coding_system_table[i] = NULL; | ||
| 5082 | } | ||
| 5003 | } | 5083 | } |
| 5084 | |||
| 5004 | return Qnil; | 5085 | return Qnil; |
| 5005 | } | 5086 | } |
| 5006 | 5087 | ||
| @@ -5035,7 +5116,7 @@ This function is internal use only.") | |||
| 5035 | #endif /* emacs */ | 5116 | #endif /* emacs */ |
| 5036 | 5117 | ||
| 5037 | 5118 | ||
| 5038 | /*** 8. Post-amble ***/ | 5119 | /*** 9. Post-amble ***/ |
| 5039 | 5120 | ||
| 5040 | void | 5121 | void |
| 5041 | init_coding () | 5122 | init_coding () |
| @@ -5193,7 +5274,7 @@ syms_of_coding () | |||
| 5193 | 5274 | ||
| 5194 | Qtranslation_table = intern ("translation-table"); | 5275 | Qtranslation_table = intern ("translation-table"); |
| 5195 | staticpro (&Qtranslation_table); | 5276 | staticpro (&Qtranslation_table); |
| 5196 | Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (0)); | 5277 | Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1)); |
| 5197 | 5278 | ||
| 5198 | Qtranslation_table_id = intern ("translation-table-id"); | 5279 | Qtranslation_table_id = intern ("translation-table-id"); |
| 5199 | staticpro (&Qtranslation_table_id); | 5280 | staticpro (&Qtranslation_table_id); |
| @@ -5207,6 +5288,9 @@ syms_of_coding () | |||
| 5207 | Qsafe_charsets = intern ("safe-charsets"); | 5288 | Qsafe_charsets = intern ("safe-charsets"); |
| 5208 | staticpro (&Qsafe_charsets); | 5289 | staticpro (&Qsafe_charsets); |
| 5209 | 5290 | ||
| 5291 | Qvalid_codes = intern ("valid-codes"); | ||
| 5292 | staticpro (&Qvalid_codes); | ||
| 5293 | |||
| 5210 | Qemacs_mule = intern ("emacs-mule"); | 5294 | Qemacs_mule = intern ("emacs-mule"); |
| 5211 | staticpro (&Qemacs_mule); | 5295 | staticpro (&Qemacs_mule); |
| 5212 | 5296 | ||
| @@ -5233,7 +5317,7 @@ syms_of_coding () | |||
| 5233 | defsubr (&Sset_keyboard_coding_system_internal); | 5317 | defsubr (&Sset_keyboard_coding_system_internal); |
| 5234 | defsubr (&Skeyboard_coding_system); | 5318 | defsubr (&Skeyboard_coding_system); |
| 5235 | defsubr (&Sfind_operation_coding_system); | 5319 | defsubr (&Sfind_operation_coding_system); |
| 5236 | defsubr (&Supdate_iso_coding_systems); | 5320 | defsubr (&Supdate_coding_systems_internal); |
| 5237 | defsubr (&Sset_coding_priority_internal); | 5321 | defsubr (&Sset_coding_priority_internal); |
| 5238 | 5322 | ||
| 5239 | DEFVAR_LISP ("coding-system-list", &Vcoding_system_list, | 5323 | DEFVAR_LISP ("coding-system-list", &Vcoding_system_list, |