diff options
| author | Kenichi Handa | 1997-10-23 12:01:50 +0000 |
|---|---|---|
| committer | Kenichi Handa | 1997-10-23 12:01:50 +0000 |
| commit | 70c222451ed8c2557306ad46d862b7fbef0828d2 (patch) | |
| tree | f93ed79611b4fb5fe5f747cf5cd943124a465ec0 /src/coding.c | |
| parent | 1db9ba06365d6d77f8ac34ff694d001e70af84a8 (diff) | |
| download | emacs-70c222451ed8c2557306ad46d862b7fbef0828d2.tar.gz emacs-70c222451ed8c2557306ad46d862b7fbef0828d2.zip | |
(encode_designation_at_bol): Fix bug of finding graphic
registers which should be designated at bol.
(Qsafe_charset): New variable.
(syms_of_coding): Initialize and staticpro it.
(detect_coding_iso2022): Handle SS2 and SS3 correctly.
(DECODE_ISO_CHARACTER): Recover from incorrect encoding in less
dangerous way.
(ENCODE_DESIGNATION): Get charset revision number by
CODING_SPEC_ISO_REVISION_NUMBER.
(setup_coding_system): Initialize the member safe_charsets from
the coding systems's safe-charsets property. Initialize the
member charset_revision_number of struct iso2022_spec.
(ENCODE_ISO_CHARACTER_DIMENSION1): Adjusted for the change of
`safe_charsets' member.
(ENCODE_ISO_CHARACTER_DIMENSION2): Likewise.
(code_convert_region): Restore the current point after calling a
function in coding->post_read_conversion.
Diffstat (limited to 'src/coding.c')
| -rw-r--r-- | src/coding.c | 87 |
1 files changed, 67 insertions, 20 deletions
diff --git a/src/coding.c b/src/coding.c index 12891aec6e9..ceb579a7239 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -253,6 +253,7 @@ Lisp_Object Qbuffer_file_coding_system; | |||
| 253 | Lisp_Object Qpost_read_conversion, Qpre_write_conversion; | 253 | Lisp_Object Qpost_read_conversion, Qpre_write_conversion; |
| 254 | Lisp_Object Qno_conversion, Qundecided; | 254 | Lisp_Object Qno_conversion, Qundecided; |
| 255 | Lisp_Object Qcoding_system_history; | 255 | Lisp_Object Qcoding_system_history; |
| 256 | Lisp_Object Qsafe_charsets; | ||
| 256 | 257 | ||
| 257 | extern Lisp_Object Qinsert_file_contents, Qwrite_region; | 258 | extern Lisp_Object Qinsert_file_contents, Qwrite_region; |
| 258 | Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument; | 259 | Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument; |
| @@ -705,6 +706,13 @@ detect_coding_iso2022 (src, src_end) | |||
| 705 | { | 706 | { |
| 706 | int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE; | 707 | int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE; |
| 707 | 708 | ||
| 709 | if (c != ISO_CODE_CSI) | ||
| 710 | { | ||
| 711 | if (coding_iso_8_1.flags & CODING_FLAG_ISO_SINGLE_SHIFT) | ||
| 712 | newmask |= CODING_CATEGORY_MASK_ISO_8_1; | ||
| 713 | if (coding_iso_8_2.flags & CODING_FLAG_ISO_SINGLE_SHIFT) | ||
| 714 | newmask |= CODING_CATEGORY_MASK_ISO_8_2; | ||
| 715 | } | ||
| 708 | if (VECTORP (Vlatin_extra_code_table) | 716 | if (VECTORP (Vlatin_extra_code_table) |
| 709 | && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) | 717 | && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) |
| 710 | { | 718 | { |
| @@ -774,7 +782,15 @@ detect_coding_iso2022 (src, src_end) | |||
| 774 | if ((charset) >= 0) \ | 782 | if ((charset) >= 0) \ |
| 775 | { \ | 783 | { \ |
| 776 | if (CHARSET_DIMENSION (charset) == 2) \ | 784 | if (CHARSET_DIMENSION (charset) == 2) \ |
| 777 | ONE_MORE_BYTE (c2); \ | 785 | { \ |
| 786 | ONE_MORE_BYTE (c2); \ | ||
| 787 | if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F \ | ||
| 788 | && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0) \ | ||
| 789 | { \ | ||
| 790 | src--; \ | ||
| 791 | c2 = ' '; \ | ||
| 792 | } \ | ||
| 793 | } \ | ||
| 778 | if (!NILP (unification_table) \ | 794 | if (!NILP (unification_table) \ |
| 779 | && ((c_alt = unify_char (unification_table, \ | 795 | && ((c_alt = unify_char (unification_table, \ |
| 780 | -1, (charset), c1, c2)) >= 0)) \ | 796 | -1, (charset), c1, c2)) >= 0)) \ |
| @@ -1131,13 +1147,12 @@ decode_coding_iso2022 (coding, source, destination, | |||
| 1131 | unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset); \ | 1147 | unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset); \ |
| 1132 | char *intermediate_char_94 = "()*+"; \ | 1148 | char *intermediate_char_94 = "()*+"; \ |
| 1133 | char *intermediate_char_96 = ",-./"; \ | 1149 | char *intermediate_char_96 = ",-./"; \ |
| 1134 | Lisp_Object temp \ | 1150 | int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset); \ |
| 1135 | = Fassq (make_number (charset), Vcharset_revision_alist); \ | 1151 | if (revision < 255) \ |
| 1136 | if (! NILP (temp)) \ | 1152 | { \ |
| 1137 | { \ | ||
| 1138 | *dst++ = ISO_CODE_ESC; \ | 1153 | *dst++ = ISO_CODE_ESC; \ |
| 1139 | *dst++ = '&'; \ | 1154 | *dst++ = '&'; \ |
| 1140 | *dst++ = XINT (XCONS (temp)->cdr) + '@'; \ | 1155 | *dst++ = '@' + revision; \ |
| 1141 | } \ | 1156 | } \ |
| 1142 | *dst++ = ISO_CODE_ESC; \ | 1157 | *dst++ = ISO_CODE_ESC; \ |
| 1143 | if (CHARSET_DIMENSION (charset) == 1) \ | 1158 | if (CHARSET_DIMENSION (charset) == 1) \ |
| @@ -1241,7 +1256,7 @@ decode_coding_iso2022 (coding, source, destination, | |||
| 1241 | break; \ | 1256 | break; \ |
| 1242 | } \ | 1257 | } \ |
| 1243 | else if (coding->flags & CODING_FLAG_ISO_SAFE \ | 1258 | else if (coding->flags & CODING_FLAG_ISO_SAFE \ |
| 1244 | && !CODING_SPEC_ISO_EXPECTED_CHARSETS (coding)[charset]) \ | 1259 | && !coding->safe_charsets[charset]) \ |
| 1245 | { \ | 1260 | { \ |
| 1246 | /* We should not encode this character, instead produce one or \ | 1261 | /* We should not encode this character, instead produce one or \ |
| 1247 | two `?'s. */ \ | 1262 | two `?'s. */ \ |
| @@ -1284,7 +1299,7 @@ decode_coding_iso2022 (coding, source, destination, | |||
| 1284 | break; \ | 1299 | break; \ |
| 1285 | } \ | 1300 | } \ |
| 1286 | else if (coding->flags & CODING_FLAG_ISO_SAFE \ | 1301 | else if (coding->flags & CODING_FLAG_ISO_SAFE \ |
| 1287 | && !CODING_SPEC_ISO_EXPECTED_CHARSETS (coding)[charset]) \ | 1302 | && !coding->safe_charsets[charset]) \ |
| 1288 | { \ | 1303 | { \ |
| 1289 | /* We should not encode this character, instead produce one or \ | 1304 | /* We should not encode this character, instead produce one or \ |
| 1290 | two `?'s. */ \ | 1305 | two `?'s. */ \ |
| @@ -1450,7 +1465,7 @@ encode_designation_at_bol (coding, table, src, src_end, dstp) | |||
| 1450 | } | 1465 | } |
| 1451 | 1466 | ||
| 1452 | reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset); | 1467 | reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset); |
| 1453 | if (r[reg] == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION) | 1468 | if (r[reg] < 0) |
| 1454 | { | 1469 | { |
| 1455 | found++; | 1470 | found++; |
| 1456 | r[reg] = charset; | 1471 | r[reg] = charset; |
| @@ -2302,6 +2317,7 @@ setup_coding_system (coding_system, coding) | |||
| 2302 | { | 2317 | { |
| 2303 | Lisp_Object coding_spec, plist, type, eol_type; | 2318 | Lisp_Object coding_spec, plist, type, eol_type; |
| 2304 | Lisp_Object val; | 2319 | Lisp_Object val; |
| 2320 | int i; | ||
| 2305 | 2321 | ||
| 2306 | /* At first, set several fields to default values. */ | 2322 | /* At first, set several fields to default values. */ |
| 2307 | coding->require_flushing = 0; | 2323 | coding->require_flushing = 0; |
| @@ -2344,6 +2360,23 @@ setup_coding_system (coding_system, coding) | |||
| 2344 | coding->character_unification_table_for_encode | 2360 | coding->character_unification_table_for_encode |
| 2345 | = CHAR_TABLE_P (val) ? val : Qnil; | 2361 | = CHAR_TABLE_P (val) ? val : Qnil; |
| 2346 | 2362 | ||
| 2363 | val = Fplist_get (plist, Qsafe_charsets); | ||
| 2364 | if (EQ (val, Qt)) | ||
| 2365 | { | ||
| 2366 | for (i = 0; i <= MAX_CHARSET; i++) | ||
| 2367 | coding->safe_charsets[i] = 1; | ||
| 2368 | } | ||
| 2369 | else | ||
| 2370 | { | ||
| 2371 | bzero (coding->safe_charsets, MAX_CHARSET + 1); | ||
| 2372 | while (CONSP (val)) | ||
| 2373 | { | ||
| 2374 | if ((i = get_charset_id (XCONS (val)->car)) >= 0) | ||
| 2375 | coding->safe_charsets[i] = 1; | ||
| 2376 | val = XCONS (val)->cdr; | ||
| 2377 | } | ||
| 2378 | } | ||
| 2379 | |||
| 2347 | if (VECTORP (eol_type)) | 2380 | if (VECTORP (eol_type)) |
| 2348 | coding->eol_type = CODING_EOL_UNDECIDED; | 2381 | coding->eol_type = CODING_EOL_UNDECIDED; |
| 2349 | else if (XFASTINT (eol_type) == 1) | 2382 | else if (XFASTINT (eol_type) == 1) |
| @@ -2367,7 +2400,7 @@ setup_coding_system (coding_system, coding) | |||
| 2367 | case 2: | 2400 | case 2: |
| 2368 | coding->type = coding_type_iso2022; | 2401 | coding->type = coding_type_iso2022; |
| 2369 | { | 2402 | { |
| 2370 | Lisp_Object val; | 2403 | Lisp_Object val, temp; |
| 2371 | Lisp_Object *flags; | 2404 | Lisp_Object *flags; |
| 2372 | int i, charset, default_reg_bits = 0; | 2405 | int i, charset, default_reg_bits = 0; |
| 2373 | 2406 | ||
| @@ -2403,6 +2436,19 @@ setup_coding_system (coding_system, coding) | |||
| 2403 | /* Beginning of buffer should also be regarded as bol. */ | 2436 | /* Beginning of buffer should also be regarded as bol. */ |
| 2404 | CODING_SPEC_ISO_BOL (coding) = 1; | 2437 | CODING_SPEC_ISO_BOL (coding) = 1; |
| 2405 | 2438 | ||
| 2439 | for (charset = 0; charset <= MAX_CHARSET; charset++) | ||
| 2440 | CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255; | ||
| 2441 | val = Vcharset_revision_alist; | ||
| 2442 | while (CONSP (val)) | ||
| 2443 | { | ||
| 2444 | charset = get_charset_id (Fcar_safe (XCONS (val)->car)); | ||
| 2445 | if (charset >= 0 | ||
| 2446 | && (temp = Fcdr_safe (XCONS (val)->car), INTEGERP (temp)) | ||
| 2447 | && (i = XINT (temp), (i >= 0 && (i + '@') < 128))) | ||
| 2448 | CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i; | ||
| 2449 | val = XCONS (val)->cdr; | ||
| 2450 | } | ||
| 2451 | |||
| 2406 | /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations. | 2452 | /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations. |
| 2407 | FLAGS[REG] can be one of below: | 2453 | FLAGS[REG] can be one of below: |
| 2408 | integer CHARSET: CHARSET occupies register I, | 2454 | integer CHARSET: CHARSET occupies register I, |
| @@ -2416,7 +2462,6 @@ setup_coding_system (coding_system, coding) | |||
| 2416 | for (charset = 0; charset <= MAX_CHARSET; charset++) | 2462 | for (charset = 0; charset <= MAX_CHARSET; charset++) |
| 2417 | CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) | 2463 | CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) |
| 2418 | = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION; | 2464 | = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION; |
| 2419 | bzero (CODING_SPEC_ISO_EXPECTED_CHARSETS (coding), MAX_CHARSET + 1); | ||
| 2420 | for (i = 0; i < 4; i++) | 2465 | for (i = 0; i < 4; i++) |
| 2421 | { | 2466 | { |
| 2422 | if (INTEGERP (flags[i]) | 2467 | if (INTEGERP (flags[i]) |
| @@ -2425,7 +2470,6 @@ setup_coding_system (coding_system, coding) | |||
| 2425 | { | 2470 | { |
| 2426 | CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset; | 2471 | CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset; |
| 2427 | CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i; | 2472 | CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i; |
| 2428 | CODING_SPEC_ISO_EXPECTED_CHARSETS (coding)[charset] = 1; | ||
| 2429 | } | 2473 | } |
| 2430 | else if (EQ (flags[i], Qt)) | 2474 | else if (EQ (flags[i], Qt)) |
| 2431 | { | 2475 | { |
| @@ -2443,7 +2487,6 @@ setup_coding_system (coding_system, coding) | |||
| 2443 | { | 2487 | { |
| 2444 | CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset; | 2488 | CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset; |
| 2445 | CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i; | 2489 | CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i; |
| 2446 | CODING_SPEC_ISO_EXPECTED_CHARSETS (coding)[charset] = 1; | ||
| 2447 | } | 2490 | } |
| 2448 | else | 2491 | else |
| 2449 | CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1; | 2492 | CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1; |
| @@ -2454,12 +2497,8 @@ setup_coding_system (coding_system, coding) | |||
| 2454 | && (charset = XINT (XCONS (tail)->car), | 2497 | && (charset = XINT (XCONS (tail)->car), |
| 2455 | CHARSET_VALID_P (charset)) | 2498 | CHARSET_VALID_P (charset)) |
| 2456 | || (charset = get_charset_id (XCONS (tail)->car)) >= 0) | 2499 | || (charset = get_charset_id (XCONS (tail)->car)) >= 0) |
| 2457 | { | 2500 | CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) |
| 2458 | CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) | 2501 | = i; |
| 2459 | = i; | ||
| 2460 | CODING_SPEC_ISO_EXPECTED_CHARSETS (coding)[charset] | ||
| 2461 | = 1; | ||
| 2462 | } | ||
| 2463 | else if (EQ (XCONS (tail)->car, Qt)) | 2502 | else if (EQ (XCONS (tail)->car, Qt)) |
| 2464 | default_reg_bits |= 1 << i; | 2503 | default_reg_bits |= 1 << i; |
| 2465 | tail = XCONS (tail)->cdr; | 2504 | tail = XCONS (tail)->cdr; |
| @@ -3394,6 +3433,11 @@ code_convert_region (b, e, coding, encodep) | |||
| 3394 | TEMP_SET_PT (beg); | 3433 | TEMP_SET_PT (beg); |
| 3395 | insval = call1 (coding->post_read_conversion, make_number (len)); | 3434 | insval = call1 (coding->post_read_conversion, make_number (len)); |
| 3396 | CHECK_NUMBER (insval, 0); | 3435 | CHECK_NUMBER (insval, 0); |
| 3436 | if (pos >= beg + len) | ||
| 3437 | pos = beg + XINT (insval); | ||
| 3438 | else if (pos > beg) | ||
| 3439 | pos = beg; | ||
| 3440 | TEMP_SET_PT (pos); | ||
| 3397 | len = XINT (insval); | 3441 | len = XINT (insval); |
| 3398 | } | 3442 | } |
| 3399 | 3443 | ||
| @@ -3643,7 +3687,7 @@ DEFUN ("set-terminal-coding-system-internal", | |||
| 3643 | { | 3687 | { |
| 3644 | CHECK_SYMBOL (coding_system, 0); | 3688 | CHECK_SYMBOL (coding_system, 0); |
| 3645 | setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding); | 3689 | setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding); |
| 3646 | /* We had better not send unexpected characters to terminal. */ | 3690 | /* We had better not send unsafe characters to terminal. */ |
| 3647 | terminal_coding.flags |= CODING_FLAG_ISO_SAFE; | 3691 | terminal_coding.flags |= CODING_FLAG_ISO_SAFE; |
| 3648 | 3692 | ||
| 3649 | return Qnil; | 3693 | return Qnil; |
| @@ -3937,6 +3981,9 @@ syms_of_coding () | |||
| 3937 | = intern ("character-unification-table-for-encode"); | 3981 | = intern ("character-unification-table-for-encode"); |
| 3938 | staticpro (&Qcharacter_unification_table_for_encode); | 3982 | staticpro (&Qcharacter_unification_table_for_encode); |
| 3939 | 3983 | ||
| 3984 | Qsafe_charsets = intern ("safe-charsets"); | ||
| 3985 | staticpro (&Qsafe_charsets); | ||
| 3986 | |||
| 3940 | Qemacs_mule = intern ("emacs-mule"); | 3987 | Qemacs_mule = intern ("emacs-mule"); |
| 3941 | staticpro (&Qemacs_mule); | 3988 | staticpro (&Qemacs_mule); |
| 3942 | 3989 | ||