diff options
| author | Kenichi Handa | 2003-05-06 12:28:11 +0000 |
|---|---|---|
| committer | Kenichi Handa | 2003-05-06 12:28:11 +0000 |
| commit | b49a1807a984553f0bb9c1e2d3d16d306c2d13d0 (patch) | |
| tree | dfd0ae0fe1882a55158522fce3e9a22961a68e5f /src | |
| parent | bf470ae20fae6855a6408f3ffad3a45cde0763c7 (diff) | |
| download | emacs-b49a1807a984553f0bb9c1e2d3d16d306c2d13d0.tar.gz emacs-b49a1807a984553f0bb9c1e2d3d16d306c2d13d0.zip | |
(Qsignature, Qendian): Delete these variables.
(syms_of_coding): Don't initialize them.
(CATEGORY_MASK_UTF_16_AUTO): New macro.
(detect_coding_utf_16): Add CATEGORY_MASK_UTF_16_AUTO in
detect_info->found.
(decode_coding_utf_16): Don't detect BOM here.
(encode_coding_utf_16): Produce BOM if CODING_UTF_16_BOM (coding)
is NOT utf_16_without_bom.
(setup_coding_system): For a coding system of type utf-16, check
if the attribute :endian is Qbig or not (not nil or not), and set
CODING_REQUIRE_DETECTION_MASK if BOM detection is required.
(detect_coding): If coding type is utf-16 and BOM detection is
required, detect it.
(Fdefine_coding_system_internal): For a coding system of type
utf-16, check if the attribute :endian is Qbig or not (not nil or
not).
Diffstat (limited to 'src')
| -rw-r--r-- | src/coding.c | 86 |
1 files changed, 51 insertions, 35 deletions
diff --git a/src/coding.c b/src/coding.c index 0099c68a812..19d9ebfc68c 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -308,7 +308,7 @@ Lisp_Object Qpost_read_conversion, Qpre_write_conversion; | |||
| 308 | Lisp_Object Qdefault_char; | 308 | Lisp_Object Qdefault_char; |
| 309 | Lisp_Object Qno_conversion, Qundecided; | 309 | Lisp_Object Qno_conversion, Qundecided; |
| 310 | Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5; | 310 | Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5; |
| 311 | Lisp_Object Qsignature, Qendian, Qbig, Qlittle; | 311 | Lisp_Object Qbig, Qlittle; |
| 312 | Lisp_Object Qcoding_system_history; | 312 | Lisp_Object Qcoding_system_history; |
| 313 | Lisp_Object Qvalid_codes; | 313 | Lisp_Object Qvalid_codes; |
| 314 | 314 | ||
| @@ -626,6 +626,7 @@ enum coding_category | |||
| 626 | #define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else) | 626 | #define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else) |
| 627 | #define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else) | 627 | #define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else) |
| 628 | #define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8) | 628 | #define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8) |
| 629 | #define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto) | ||
| 629 | #define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be) | 630 | #define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be) |
| 630 | #define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le) | 631 | #define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le) |
| 631 | #define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig) | 632 | #define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig) |
| @@ -1357,12 +1358,14 @@ detect_coding_utf_16 (coding, detect_info) | |||
| 1357 | 1358 | ||
| 1358 | if ((c1 == 0xFF) && (c2 == 0xFE)) | 1359 | if ((c1 == 0xFF) && (c2 == 0xFE)) |
| 1359 | { | 1360 | { |
| 1360 | detect_info->found |= CATEGORY_MASK_UTF_16_LE; | 1361 | detect_info->found |= (CATEGORY_MASK_UTF_16_LE |
| 1362 | | CATEGORY_MASK_UTF_16_AUTO); | ||
| 1361 | detect_info->rejected |= CATEGORY_MASK_UTF_16_BE; | 1363 | detect_info->rejected |= CATEGORY_MASK_UTF_16_BE; |
| 1362 | } | 1364 | } |
| 1363 | else if ((c1 == 0xFE) && (c2 == 0xFF)) | 1365 | else if ((c1 == 0xFE) && (c2 == 0xFF)) |
| 1364 | { | 1366 | { |
| 1365 | detect_info->found |= CATEGORY_MASK_UTF_16_BE; | 1367 | detect_info->found |= (CATEGORY_MASK_UTF_16_BE |
| 1368 | | CATEGORY_MASK_UTF_16_AUTO); | ||
| 1366 | detect_info->rejected |= CATEGORY_MASK_UTF_16_LE; | 1369 | detect_info->rejected |= CATEGORY_MASK_UTF_16_LE; |
| 1367 | } | 1370 | } |
| 1368 | no_more_source: | 1371 | no_more_source: |
| @@ -1387,7 +1390,7 @@ decode_coding_utf_16 (coding) | |||
| 1387 | 1390 | ||
| 1388 | CODING_GET_INFO (coding, attr, eol_type, charset_list); | 1391 | CODING_GET_INFO (coding, attr, eol_type, charset_list); |
| 1389 | 1392 | ||
| 1390 | if (bom != utf_16_without_bom) | 1393 | if (bom == utf_16_with_bom) |
| 1391 | { | 1394 | { |
| 1392 | int c, c1, c2; | 1395 | int c, c1, c2; |
| 1393 | 1396 | ||
| @@ -1395,33 +1398,22 @@ decode_coding_utf_16 (coding) | |||
| 1395 | ONE_MORE_BYTE (c1); | 1398 | ONE_MORE_BYTE (c1); |
| 1396 | ONE_MORE_BYTE (c2); | 1399 | ONE_MORE_BYTE (c2); |
| 1397 | c = (c1 << 8) | c2; | 1400 | c = (c1 << 8) | c2; |
| 1398 | if (bom == utf_16_with_bom) | 1401 | |
| 1399 | { | 1402 | if (endian == utf_16_big_endian |
| 1400 | if (endian == utf_16_big_endian | 1403 | ? c != 0xFEFF : c != 0xFFFE) |
| 1401 | ? c != 0xFEFF : c != 0xFFFE) | ||
| 1402 | { | ||
| 1403 | /* We are sure that there's enouph room at CHARBUF. */ | ||
| 1404 | *charbuf++ = c1; | ||
| 1405 | *charbuf++ = c2; | ||
| 1406 | coding->errors++; | ||
| 1407 | } | ||
| 1408 | } | ||
| 1409 | else | ||
| 1410 | { | 1404 | { |
| 1411 | if (c == 0xFEFF) | 1405 | /* The first two bytes are not BOM. Treat them as bytes |
| 1412 | CODING_UTF_16_ENDIAN (coding) | 1406 | for a normal character. */ |
| 1413 | = endian = utf_16_big_endian; | 1407 | src = src_base; |
| 1414 | else if (c == 0xFFFE) | 1408 | coding->errors++; |
| 1415 | CODING_UTF_16_ENDIAN (coding) | ||
| 1416 | = endian = utf_16_little_endian; | ||
| 1417 | else | ||
| 1418 | { | ||
| 1419 | CODING_UTF_16_ENDIAN (coding) | ||
| 1420 | = endian = utf_16_big_endian; | ||
| 1421 | src = src_base; | ||
| 1422 | } | ||
| 1423 | } | 1409 | } |
| 1424 | CODING_UTF_16_BOM (coding) = utf_16_with_bom; | 1410 | CODING_UTF_16_BOM (coding) = utf_16_without_bom; |
| 1411 | } | ||
| 1412 | else if (bom == utf_16_detect_bom) | ||
| 1413 | { | ||
| 1414 | /* We have already tried to detect BOM and failed in | ||
| 1415 | detect_coding. */ | ||
| 1416 | CODING_UTF_16_BOM (coding) = utf_16_without_bom; | ||
| 1425 | } | 1417 | } |
| 1426 | 1418 | ||
| 1427 | while (1) | 1419 | while (1) |
| @@ -1494,7 +1486,7 @@ encode_coding_utf_16 (coding) | |||
| 1494 | 1486 | ||
| 1495 | CODING_GET_INFO (coding, attrs, eol_type, charset_list); | 1487 | CODING_GET_INFO (coding, attrs, eol_type, charset_list); |
| 1496 | 1488 | ||
| 1497 | if (bom == utf_16_with_bom) | 1489 | if (bom != utf_16_without_bom) |
| 1498 | { | 1490 | { |
| 1499 | ASSURE_DESTINATION (safe_room); | 1491 | ASSURE_DESTINATION (safe_room); |
| 1500 | if (big_endian) | 1492 | if (big_endian) |
| @@ -4859,7 +4851,7 @@ setup_coding_system (coding_system, coding) | |||
| 4859 | : EQ (val, Qt) ? utf_16_with_bom | 4851 | : EQ (val, Qt) ? utf_16_with_bom |
| 4860 | : utf_16_without_bom); | 4852 | : utf_16_without_bom); |
| 4861 | val = AREF (attrs, coding_attr_utf_16_endian); | 4853 | val = AREF (attrs, coding_attr_utf_16_endian); |
| 4862 | CODING_UTF_16_ENDIAN (coding) = (NILP (val) ? utf_16_big_endian | 4854 | CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian |
| 4863 | : utf_16_little_endian); | 4855 | : utf_16_little_endian); |
| 4864 | CODING_UTF_16_SURROGATE (coding) = 0; | 4856 | CODING_UTF_16_SURROGATE (coding) = 0; |
| 4865 | coding->detector = detect_coding_utf_16; | 4857 | coding->detector = detect_coding_utf_16; |
| @@ -4867,6 +4859,8 @@ setup_coding_system (coding_system, coding) | |||
| 4867 | coding->encoder = encode_coding_utf_16; | 4859 | coding->encoder = encode_coding_utf_16; |
| 4868 | coding->common_flags | 4860 | coding->common_flags |
| 4869 | |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK); | 4861 | |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK); |
| 4862 | if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom) | ||
| 4863 | coding->common_flags |= CODING_REQUIRE_DETECTION_MASK; | ||
| 4870 | } | 4864 | } |
| 4871 | else if (EQ (coding_type, Qccl)) | 4865 | else if (EQ (coding_type, Qccl)) |
| 4872 | { | 4866 | { |
| @@ -5285,6 +5279,25 @@ detect_coding (coding) | |||
| 5285 | } | 5279 | } |
| 5286 | } | 5280 | } |
| 5287 | } | 5281 | } |
| 5282 | else if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qutf_16)) | ||
| 5283 | { | ||
| 5284 | Lisp_Object coding_systems; | ||
| 5285 | struct coding_detection_info detect_info; | ||
| 5286 | |||
| 5287 | coding_systems | ||
| 5288 | = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom); | ||
| 5289 | detect_info.found = detect_info.rejected = 0; | ||
| 5290 | if (CONSP (coding_systems) | ||
| 5291 | && detect_coding_utf_16 (coding, &detect_info) | ||
| 5292 | && (detect_info.found & (CATEGORY_MASK_UTF_16_LE | ||
| 5293 | | CATEGORY_MASK_UTF_16_BE))) | ||
| 5294 | { | ||
| 5295 | if (detect_info.found & CATEGORY_MASK_UTF_16_LE) | ||
| 5296 | setup_coding_system (XCAR (coding_systems), coding); | ||
| 5297 | else | ||
| 5298 | setup_coding_system (XCDR (coding_systems), coding); | ||
| 5299 | } | ||
| 5300 | } | ||
| 5288 | 5301 | ||
| 5289 | attrs = CODING_ID_ATTRS (coding->id); | 5302 | attrs = CODING_ID_ATTRS (coding->id); |
| 5290 | coding_type = CODING_ATTR_TYPE (attrs); | 5303 | coding_type = CODING_ATTR_TYPE (attrs); |
| @@ -7957,15 +7970,20 @@ usage: (define-coding-system-internal ...) */) | |||
| 7957 | ASET (attrs, coding_attr_utf_16_bom, bom); | 7970 | ASET (attrs, coding_attr_utf_16_bom, bom); |
| 7958 | 7971 | ||
| 7959 | endian = args[coding_arg_utf16_endian]; | 7972 | endian = args[coding_arg_utf16_endian]; |
| 7973 | CHECK_SYMBOL (endian); | ||
| 7974 | if (NILP (endian)) | ||
| 7975 | endian = Qbig; | ||
| 7976 | else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle)) | ||
| 7977 | error ("Invalid endian: %s", XSYMBOL (endian)->name->data); | ||
| 7960 | ASET (attrs, coding_attr_utf_16_endian, endian); | 7978 | ASET (attrs, coding_attr_utf_16_endian, endian); |
| 7961 | 7979 | ||
| 7962 | category = (CONSP (bom) | 7980 | category = (CONSP (bom) |
| 7963 | ? coding_category_utf_16_auto | 7981 | ? coding_category_utf_16_auto |
| 7964 | : NILP (bom) | 7982 | : NILP (bom) |
| 7965 | ? (NILP (endian) | 7983 | ? (EQ (endian, Qbig) |
| 7966 | ? coding_category_utf_16_be_nosig | 7984 | ? coding_category_utf_16_be_nosig |
| 7967 | : coding_category_utf_16_le_nosig) | 7985 | : coding_category_utf_16_le_nosig) |
| 7968 | : (NILP (endian) | 7986 | : (EQ (endian, Qbig) |
| 7969 | ? coding_category_utf_16_be | 7987 | ? coding_category_utf_16_be |
| 7970 | : coding_category_utf_16_le)); | 7988 | : coding_category_utf_16_le)); |
| 7971 | } | 7989 | } |
| @@ -8407,8 +8425,6 @@ syms_of_coding () | |||
| 8407 | DEFSYM (Qutf_8, "utf-8"); | 8425 | DEFSYM (Qutf_8, "utf-8"); |
| 8408 | 8426 | ||
| 8409 | DEFSYM (Qutf_16, "utf-16"); | 8427 | DEFSYM (Qutf_16, "utf-16"); |
| 8410 | DEFSYM (Qsignature, "signature"); | ||
| 8411 | DEFSYM (Qendian, "endian"); | ||
| 8412 | DEFSYM (Qbig, "big"); | 8428 | DEFSYM (Qbig, "big"); |
| 8413 | DEFSYM (Qlittle, "little"); | 8429 | DEFSYM (Qlittle, "little"); |
| 8414 | 8430 | ||