aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorPhilipp Stephani2019-04-28 12:28:27 +0200
committerPhilipp Stephani2019-04-28 12:28:27 +0200
commit75ee20364c5ed4c175b13debaa53a2ba14168999 (patch)
tree3bdff98355f169ddf8fe20cc7c60aa13ba682139 /src
parentdbe81e16583adcae664871206694573209540286 (diff)
downloademacs-75ee20364c5ed4c175b13debaa53a2ba14168999.tar.gz
emacs-75ee20364c5ed4c175b13debaa53a2ba14168999.zip
Refactoring: move UTF-8 decoding functions into coding.h.
json_make_string and json_build_string are generally useful and not JSON-specific. Move them to coding.[ch]. * src/coding.h (build_utf8_string): Move from json.c. * src/coding.c (make_utf8_string): Move from json.c. * src/json.c (json_make_string, json_build_string): Move to coding.[ch]. Split out JSON-specific comment. (json_parse_error, Fjson_serialize, json_to_lisp): Fix callers. * src/emacs-module.c (module_make_function, module_make_string): Use new functions. (module_decode, module_decode_copy): Remove.
Diffstat (limited to 'src')
-rw-r--r--src/coding.c19
-rw-r--r--src/coding.h12
-rw-r--r--src/emacs-module.c23
-rw-r--r--src/json.c52
4 files changed, 42 insertions, 64 deletions
diff --git a/src/coding.c b/src/coding.c
index 2c6b2c4d051..71f687a14e3 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -6353,6 +6353,25 @@ utf8_string_p (Lisp_Object string)
6353 return check_utf_8 (&coding) != -1; 6353 return check_utf_8 (&coding) != -1;
6354} 6354}
6355 6355
6356Lisp_Object
6357make_utf8_string (const char *data, ptrdiff_t size)
6358{
6359 ptrdiff_t chars, bytes;
6360 parse_str_as_multibyte ((const unsigned char *) data, size, &chars, &bytes);
6361 /* If DATA is a valid UTF-8 string, we can convert it to a Lisp
6362 string directly. Otherwise, we need to decode it. */
6363 if (chars == size || bytes == size)
6364 return make_specified_string (data, chars, size, true);
6365 else
6366 {
6367 struct coding_system coding;
6368 setup_coding_system (Qutf_8_unix, &coding);
6369 coding.mode |= CODING_MODE_LAST_BLOCK;
6370 coding.source = (const unsigned char *) data;
6371 decode_coding_object (&coding, Qnil, 0, 0, size, size, Qt);
6372 return coding.dst_object;
6373 }
6374}
6356 6375
6357/* Detect how end-of-line of a text of length SRC_BYTES pointed by 6376/* Detect how end-of-line of a text of length SRC_BYTES pointed by
6358 SOURCE is encoded. If CATEGORY is one of 6377 SOURCE is encoded. If CATEGORY is one of
diff --git a/src/coding.h b/src/coding.h
index 0c03d1a44ed..773df9abb90 100644
--- a/src/coding.h
+++ b/src/coding.h
@@ -695,6 +695,7 @@ extern Lisp_Object raw_text_coding_system (Lisp_Object);
695extern bool raw_text_coding_system_p (struct coding_system *); 695extern bool raw_text_coding_system_p (struct coding_system *);
696extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object); 696extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object);
697extern Lisp_Object complement_process_encoding_system (Lisp_Object); 697extern Lisp_Object complement_process_encoding_system (Lisp_Object);
698extern Lisp_Object make_utf8_string (const char *, ptrdiff_t);
698 699
699extern void decode_coding_gap (struct coding_system *, 700extern void decode_coding_gap (struct coding_system *,
700 ptrdiff_t, ptrdiff_t); 701 ptrdiff_t, ptrdiff_t);
@@ -762,6 +763,17 @@ surrogates_to_codepoint (int low, int high)
762 return 0x10000 + (low - 0xDC00) + ((high - 0xD800) * 0x400); 763 return 0x10000 + (low - 0xDC00) + ((high - 0xD800) * 0x400);
763} 764}
764 765
766/* Create a multibyte Lisp string from the NUL-terminated UTF-8 string
767 beginning at DATA. If the string is not a valid UTF-8 string, an
768 unspecified string is returned. */
769
770INLINE Lisp_Object
771build_utf8_string (const char *data)
772{
773 return make_utf8_string (data, strlen (data));
774}
775
776
765extern Lisp_Object preferred_coding_system (void); 777extern Lisp_Object preferred_coding_system (void);
766 778
767/* Coding system to be used to encode text for terminal display when 779/* Coding system to be used to encode text for terminal display when
diff --git a/src/emacs-module.c b/src/emacs-module.c
index 80a04bafc2d..b9050942559 100644
--- a/src/emacs-module.c
+++ b/src/emacs-module.c
@@ -223,8 +223,6 @@ static void module_reset_handlerlist (struct handler **);
223static bool value_storage_contains_p (const struct emacs_value_storage *, 223static bool value_storage_contains_p (const struct emacs_value_storage *,
224 emacs_value, ptrdiff_t *); 224 emacs_value, ptrdiff_t *);
225static Lisp_Object module_encode (Lisp_Object); 225static Lisp_Object module_encode (Lisp_Object);
226static Lisp_Object module_decode (Lisp_Object);
227static Lisp_Object module_decode_copy (Lisp_Object);
228 226
229static bool module_assertions = false; 227static bool module_assertions = false;
230 228
@@ -532,10 +530,7 @@ module_make_function (emacs_env *env, ptrdiff_t min_arity, ptrdiff_t max_arity,
532 function->data = data; 530 function->data = data;
533 531
534 if (documentation) 532 if (documentation)
535 { 533 function->documentation = build_utf8_string (documentation);
536 AUTO_STRING (unibyte_doc, documentation);
537 function->documentation = module_decode_copy (unibyte_doc);
538 }
539 534
540 Lisp_Object result; 535 Lisp_Object result;
541 XSET_MODULE_FUNCTION (result, function); 536 XSET_MODULE_FUNCTION (result, function);
@@ -668,8 +663,8 @@ module_make_string (emacs_env *env, const char *str, ptrdiff_t length)
668 MODULE_FUNCTION_BEGIN (NULL); 663 MODULE_FUNCTION_BEGIN (NULL);
669 if (! (0 <= length && length <= STRING_BYTES_BOUND)) 664 if (! (0 <= length && length <= STRING_BYTES_BOUND))
670 overflow_error (); 665 overflow_error ();
671 Lisp_Object lstr = make_unibyte_string (str, length); 666 Lisp_Object lstr = make_utf8_string (str, length);
672 return lisp_to_value (env, module_decode (lstr)); 667 return lisp_to_value (env, lstr);
673} 668}
674 669
675static emacs_value 670static emacs_value
@@ -1030,18 +1025,6 @@ module_encode (Lisp_Object string)
1030 return code_convert_string (string, Qutf_8_unix, Qt, true, true, true); 1025 return code_convert_string (string, Qutf_8_unix, Qt, true, true, true);
1031} 1026}
1032 1027
1033static Lisp_Object
1034module_decode (Lisp_Object string)
1035{
1036 return code_convert_string (string, Qutf_8_unix, Qt, false, true, true);
1037}
1038
1039static Lisp_Object
1040module_decode_copy (Lisp_Object string)
1041{
1042 return code_convert_string (string, Qutf_8_unix, Qt, false, false, true);
1043}
1044
1045 1028
1046/* Value conversion. */ 1029/* Value conversion. */
1047 1030
diff --git a/src/json.c b/src/json.c
index 03468e9f338..cc98914423b 100644
--- a/src/json.c
+++ b/src/json.c
@@ -215,47 +215,11 @@ json_has_suffix (const char *string, const char *suffix)
215 215
216#endif 216#endif
217 217
218/* Create a multibyte Lisp string from the UTF-8 string in 218/* Note that all callers of make_utf8_string and build_utf8_string
219 [DATA, DATA + SIZE). If the range [DATA, DATA + SIZE) does not 219 below either pass only value UTF-8 strings or use the functionf for
220 contain a valid UTF-8 string, the returned string will include raw
221 bytes.
222 Note that all callers below either pass only value UTF-8 strings or
223 use this function for formatting error messages; in the latter case
224 correctness isn't critical. */
225
226static Lisp_Object
227json_make_string (const char *data, ptrdiff_t size)
228{
229 ptrdiff_t chars, bytes;
230 parse_str_as_multibyte ((const unsigned char *) data, size, &chars, &bytes);
231 /* If DATA is a valid UTF-8 string, we can convert it to a Lisp
232 string directly. Otherwise, we need to decode it. */
233 if (chars == size || bytes == size)
234 return make_specified_string (data, chars, size, true);
235 else
236 {
237 struct coding_system coding;
238 setup_coding_system (Qutf_8_unix, &coding);
239 coding.mode |= CODING_MODE_LAST_BLOCK;
240 coding.source = (const unsigned char *) data;
241 decode_coding_object (&coding, Qnil, 0, 0, size, size, Qt);
242 return coding.dst_object;
243 }
244}
245
246/* Create a multibyte Lisp string from the NUL-terminated UTF-8
247 string beginning at DATA. If the string is not a valid UTF-8
248 string, an unspecified string is returned. Note that all callers
249 below either pass only value UTF-8 strings or use this function for
250 formatting error messages; in the latter case correctness isn't 220 formatting error messages; in the latter case correctness isn't
251 critical. */ 221 critical. */
252 222
253static Lisp_Object
254json_build_string (const char *data)
255{
256 return json_make_string (data, strlen (data));
257}
258
259/* Return a unibyte string containing the sequence of UTF-8 encoding 223/* Return a unibyte string containing the sequence of UTF-8 encoding
260 units of the UTF-8 representation of STRING. If STRING does not 224 units of the UTF-8 representation of STRING. If STRING does not
261 represent a sequence of Unicode scalar values, return a string with 225 represent a sequence of Unicode scalar values, return a string with
@@ -303,8 +267,8 @@ json_parse_error (const json_error_t *error)
303 symbol = Qjson_parse_error; 267 symbol = Qjson_parse_error;
304#endif 268#endif
305 xsignal (symbol, 269 xsignal (symbol,
306 list5 (json_build_string (error->text), 270 list5 (build_utf8_string (error->text),
307 json_build_string (error->source), INT_TO_INTEGER (error->line), 271 build_utf8_string (error->source), INT_TO_INTEGER (error->line),
308 INT_TO_INTEGER (error->column), INT_TO_INTEGER (error->position))); 272 INT_TO_INTEGER (error->column), INT_TO_INTEGER (error->position)));
309} 273}
310 274
@@ -648,7 +612,7 @@ usage: (json-serialize OBJECT &rest ARGS) */)
648 json_out_of_memory (); 612 json_out_of_memory ();
649 record_unwind_protect_ptr (json_free, string); 613 record_unwind_protect_ptr (json_free, string);
650 614
651 return unbind_to (count, json_build_string (string)); 615 return unbind_to (count, build_utf8_string (string));
652} 616}
653 617
654struct json_buffer_and_size 618struct json_buffer_and_size
@@ -855,7 +819,7 @@ json_to_lisp (json_t *json, struct json_configuration *conf)
855 case JSON_REAL: 819 case JSON_REAL:
856 return make_float (json_real_value (json)); 820 return make_float (json_real_value (json));
857 case JSON_STRING: 821 case JSON_STRING:
858 return json_make_string (json_string_value (json), 822 return make_utf8_string (json_string_value (json),
859 json_string_length (json)); 823 json_string_length (json));
860 case JSON_ARRAY: 824 case JSON_ARRAY:
861 { 825 {
@@ -915,7 +879,7 @@ json_to_lisp (json_t *json, struct json_configuration *conf)
915 json_t *value; 879 json_t *value;
916 json_object_foreach (json, key_str, value) 880 json_object_foreach (json, key_str, value)
917 { 881 {
918 Lisp_Object key = json_build_string (key_str); 882 Lisp_Object key = build_utf8_string (key_str);
919 EMACS_UINT hash; 883 EMACS_UINT hash;
920 ptrdiff_t i = hash_lookup (h, key, &hash); 884 ptrdiff_t i = hash_lookup (h, key, &hash);
921 /* Keys in JSON objects are unique, so the key can't 885 /* Keys in JSON objects are unique, so the key can't
@@ -932,7 +896,7 @@ json_to_lisp (json_t *json, struct json_configuration *conf)
932 json_t *value; 896 json_t *value;
933 json_object_foreach (json, key_str, value) 897 json_object_foreach (json, key_str, value)
934 { 898 {
935 Lisp_Object key = Fintern (json_build_string (key_str), Qnil); 899 Lisp_Object key = Fintern (build_utf8_string (key_str), Qnil);
936 result 900 result
937 = Fcons (Fcons (key, json_to_lisp (value, conf)), 901 = Fcons (Fcons (key, json_to_lisp (value, conf)),
938 result); 902 result);