aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorKenichi Handa2003-05-06 12:28:11 +0000
committerKenichi Handa2003-05-06 12:28:11 +0000
commitb49a1807a984553f0bb9c1e2d3d16d306c2d13d0 (patch)
treedfd0ae0fe1882a55158522fce3e9a22961a68e5f /src
parentbf470ae20fae6855a6408f3ffad3a45cde0763c7 (diff)
downloademacs-b49a1807a984553f0bb9c1e2d3d16d306c2d13d0.tar.gz
emacs-b49a1807a984553f0bb9c1e2d3d16d306c2d13d0.zip
(Qsignature, Qendian): Delete these variables.
(syms_of_coding): Don't initialize them. (CATEGORY_MASK_UTF_16_AUTO): New macro. (detect_coding_utf_16): Add CATEGORY_MASK_UTF_16_AUTO in detect_info->found. (decode_coding_utf_16): Don't detect BOM here. (encode_coding_utf_16): Produce BOM if CODING_UTF_16_BOM (coding) is NOT utf_16_without_bom. (setup_coding_system): For a coding system of type utf-16, check if the attribute :endian is Qbig or not (not nil or not), and set CODING_REQUIRE_DETECTION_MASK if BOM detection is required. (detect_coding): If coding type is utf-16 and BOM detection is required, detect it. (Fdefine_coding_system_internal): For a coding system of type utf-16, check if the attribute :endian is Qbig or not (not nil or not).
Diffstat (limited to 'src')
-rw-r--r--src/coding.c86
1 files changed, 51 insertions, 35 deletions
diff --git a/src/coding.c b/src/coding.c
index 0099c68a812..19d9ebfc68c 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -308,7 +308,7 @@ Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
308Lisp_Object Qdefault_char; 308Lisp_Object Qdefault_char;
309Lisp_Object Qno_conversion, Qundecided; 309Lisp_Object Qno_conversion, Qundecided;
310Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5; 310Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
311Lisp_Object Qsignature, Qendian, Qbig, Qlittle; 311Lisp_Object Qbig, Qlittle;
312Lisp_Object Qcoding_system_history; 312Lisp_Object Qcoding_system_history;
313Lisp_Object Qvalid_codes; 313Lisp_Object Qvalid_codes;
314 314
@@ -626,6 +626,7 @@ enum coding_category
626#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else) 626#define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else)
627#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else) 627#define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else)
628#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8) 628#define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8)
629#define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto)
629#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be) 630#define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be)
630#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le) 631#define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le)
631#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig) 632#define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig)
@@ -1357,12 +1358,14 @@ detect_coding_utf_16 (coding, detect_info)
1357 1358
1358 if ((c1 == 0xFF) && (c2 == 0xFE)) 1359 if ((c1 == 0xFF) && (c2 == 0xFE))
1359 { 1360 {
1360 detect_info->found |= CATEGORY_MASK_UTF_16_LE; 1361 detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1362 | CATEGORY_MASK_UTF_16_AUTO);
1361 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE; 1363 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE;
1362 } 1364 }
1363 else if ((c1 == 0xFE) && (c2 == 0xFF)) 1365 else if ((c1 == 0xFE) && (c2 == 0xFF))
1364 { 1366 {
1365 detect_info->found |= CATEGORY_MASK_UTF_16_BE; 1367 detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1368 | CATEGORY_MASK_UTF_16_AUTO);
1366 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE; 1369 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE;
1367 } 1370 }
1368 no_more_source: 1371 no_more_source:
@@ -1387,7 +1390,7 @@ decode_coding_utf_16 (coding)
1387 1390
1388 CODING_GET_INFO (coding, attr, eol_type, charset_list); 1391 CODING_GET_INFO (coding, attr, eol_type, charset_list);
1389 1392
1390 if (bom != utf_16_without_bom) 1393 if (bom == utf_16_with_bom)
1391 { 1394 {
1392 int c, c1, c2; 1395 int c, c1, c2;
1393 1396
@@ -1395,33 +1398,22 @@ decode_coding_utf_16 (coding)
1395 ONE_MORE_BYTE (c1); 1398 ONE_MORE_BYTE (c1);
1396 ONE_MORE_BYTE (c2); 1399 ONE_MORE_BYTE (c2);
1397 c = (c1 << 8) | c2; 1400 c = (c1 << 8) | c2;
1398 if (bom == utf_16_with_bom) 1401
1399 { 1402 if (endian == utf_16_big_endian
1400 if (endian == utf_16_big_endian 1403 ? c != 0xFEFF : c != 0xFFFE)
1401 ? c != 0xFEFF : c != 0xFFFE)
1402 {
1403 /* We are sure that there's enouph room at CHARBUF. */
1404 *charbuf++ = c1;
1405 *charbuf++ = c2;
1406 coding->errors++;
1407 }
1408 }
1409 else
1410 { 1404 {
1411 if (c == 0xFEFF) 1405 /* The first two bytes are not BOM. Treat them as bytes
1412 CODING_UTF_16_ENDIAN (coding) 1406 for a normal character. */
1413 = endian = utf_16_big_endian; 1407 src = src_base;
1414 else if (c == 0xFFFE) 1408 coding->errors++;
1415 CODING_UTF_16_ENDIAN (coding)
1416 = endian = utf_16_little_endian;
1417 else
1418 {
1419 CODING_UTF_16_ENDIAN (coding)
1420 = endian = utf_16_big_endian;
1421 src = src_base;
1422 }
1423 } 1409 }
1424 CODING_UTF_16_BOM (coding) = utf_16_with_bom; 1410 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1411 }
1412 else if (bom == utf_16_detect_bom)
1413 {
1414 /* We have already tried to detect BOM and failed in
1415 detect_coding. */
1416 CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1425 } 1417 }
1426 1418
1427 while (1) 1419 while (1)
@@ -1494,7 +1486,7 @@ encode_coding_utf_16 (coding)
1494 1486
1495 CODING_GET_INFO (coding, attrs, eol_type, charset_list); 1487 CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1496 1488
1497 if (bom == utf_16_with_bom) 1489 if (bom != utf_16_without_bom)
1498 { 1490 {
1499 ASSURE_DESTINATION (safe_room); 1491 ASSURE_DESTINATION (safe_room);
1500 if (big_endian) 1492 if (big_endian)
@@ -4859,7 +4851,7 @@ setup_coding_system (coding_system, coding)
4859 : EQ (val, Qt) ? utf_16_with_bom 4851 : EQ (val, Qt) ? utf_16_with_bom
4860 : utf_16_without_bom); 4852 : utf_16_without_bom);
4861 val = AREF (attrs, coding_attr_utf_16_endian); 4853 val = AREF (attrs, coding_attr_utf_16_endian);
4862 CODING_UTF_16_ENDIAN (coding) = (NILP (val) ? utf_16_big_endian 4854 CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
4863 : utf_16_little_endian); 4855 : utf_16_little_endian);
4864 CODING_UTF_16_SURROGATE (coding) = 0; 4856 CODING_UTF_16_SURROGATE (coding) = 0;
4865 coding->detector = detect_coding_utf_16; 4857 coding->detector = detect_coding_utf_16;
@@ -4867,6 +4859,8 @@ setup_coding_system (coding_system, coding)
4867 coding->encoder = encode_coding_utf_16; 4859 coding->encoder = encode_coding_utf_16;
4868 coding->common_flags 4860 coding->common_flags
4869 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK); 4861 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4862 if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
4863 coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4870 } 4864 }
4871 else if (EQ (coding_type, Qccl)) 4865 else if (EQ (coding_type, Qccl))
4872 { 4866 {
@@ -5285,6 +5279,25 @@ detect_coding (coding)
5285 } 5279 }
5286 } 5280 }
5287 } 5281 }
5282 else if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qutf_16))
5283 {
5284 Lisp_Object coding_systems;
5285 struct coding_detection_info detect_info;
5286
5287 coding_systems
5288 = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5289 detect_info.found = detect_info.rejected = 0;
5290 if (CONSP (coding_systems)
5291 && detect_coding_utf_16 (coding, &detect_info)
5292 && (detect_info.found & (CATEGORY_MASK_UTF_16_LE
5293 | CATEGORY_MASK_UTF_16_BE)))
5294 {
5295 if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5296 setup_coding_system (XCAR (coding_systems), coding);
5297 else
5298 setup_coding_system (XCDR (coding_systems), coding);
5299 }
5300 }
5288 5301
5289 attrs = CODING_ID_ATTRS (coding->id); 5302 attrs = CODING_ID_ATTRS (coding->id);
5290 coding_type = CODING_ATTR_TYPE (attrs); 5303 coding_type = CODING_ATTR_TYPE (attrs);
@@ -7957,15 +7970,20 @@ usage: (define-coding-system-internal ...) */)
7957 ASET (attrs, coding_attr_utf_16_bom, bom); 7970 ASET (attrs, coding_attr_utf_16_bom, bom);
7958 7971
7959 endian = args[coding_arg_utf16_endian]; 7972 endian = args[coding_arg_utf16_endian];
7973 CHECK_SYMBOL (endian);
7974 if (NILP (endian))
7975 endian = Qbig;
7976 else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
7977 error ("Invalid endian: %s", XSYMBOL (endian)->name->data);
7960 ASET (attrs, coding_attr_utf_16_endian, endian); 7978 ASET (attrs, coding_attr_utf_16_endian, endian);
7961 7979
7962 category = (CONSP (bom) 7980 category = (CONSP (bom)
7963 ? coding_category_utf_16_auto 7981 ? coding_category_utf_16_auto
7964 : NILP (bom) 7982 : NILP (bom)
7965 ? (NILP (endian) 7983 ? (EQ (endian, Qbig)
7966 ? coding_category_utf_16_be_nosig 7984 ? coding_category_utf_16_be_nosig
7967 : coding_category_utf_16_le_nosig) 7985 : coding_category_utf_16_le_nosig)
7968 : (NILP (endian) 7986 : (EQ (endian, Qbig)
7969 ? coding_category_utf_16_be 7987 ? coding_category_utf_16_be
7970 : coding_category_utf_16_le)); 7988 : coding_category_utf_16_le));
7971 } 7989 }
@@ -8407,8 +8425,6 @@ syms_of_coding ()
8407 DEFSYM (Qutf_8, "utf-8"); 8425 DEFSYM (Qutf_8, "utf-8");
8408 8426
8409 DEFSYM (Qutf_16, "utf-16"); 8427 DEFSYM (Qutf_16, "utf-16");
8410 DEFSYM (Qsignature, "signature");
8411 DEFSYM (Qendian, "endian");
8412 DEFSYM (Qbig, "big"); 8428 DEFSYM (Qbig, "big");
8413 DEFSYM (Qlittle, "little"); 8429 DEFSYM (Qlittle, "little");
8414 8430