diff options
| author | Philipp Stephani | 2019-04-28 12:28:27 +0200 |
|---|---|---|
| committer | Philipp Stephani | 2019-04-28 12:28:27 +0200 |
| commit | 75ee20364c5ed4c175b13debaa53a2ba14168999 (patch) | |
| tree | 3bdff98355f169ddf8fe20cc7c60aa13ba682139 /src | |
| parent | dbe81e16583adcae664871206694573209540286 (diff) | |
| download | emacs-75ee20364c5ed4c175b13debaa53a2ba14168999.tar.gz emacs-75ee20364c5ed4c175b13debaa53a2ba14168999.zip | |
Refactoring: move UTF-8 decoding functions into coding.h.
json_make_string and json_build_string are generally useful and not
JSON-specific. Move them to coding.[ch].
* src/coding.h (build_utf8_string): Move from json.c.
* src/coding.c (make_utf8_string): Move from json.c.
* src/json.c (json_make_string, json_build_string): Move to
coding.[ch]. Split out JSON-specific comment.
(json_parse_error, Fjson_serialize, json_to_lisp): Fix callers.
* src/emacs-module.c (module_make_function, module_make_string): Use
new functions.
(module_decode, module_decode_copy): Remove.
Diffstat (limited to 'src')
| -rw-r--r-- | src/coding.c | 19 | ||||
| -rw-r--r-- | src/coding.h | 12 | ||||
| -rw-r--r-- | src/emacs-module.c | 23 | ||||
| -rw-r--r-- | src/json.c | 52 |
4 files changed, 42 insertions, 64 deletions
diff --git a/src/coding.c b/src/coding.c index 2c6b2c4d051..71f687a14e3 100644 --- a/src/coding.c +++ b/src/coding.c | |||
| @@ -6353,6 +6353,25 @@ utf8_string_p (Lisp_Object string) | |||
| 6353 | return check_utf_8 (&coding) != -1; | 6353 | return check_utf_8 (&coding) != -1; |
| 6354 | } | 6354 | } |
| 6355 | 6355 | ||
| 6356 | Lisp_Object | ||
| 6357 | make_utf8_string (const char *data, ptrdiff_t size) | ||
| 6358 | { | ||
| 6359 | ptrdiff_t chars, bytes; | ||
| 6360 | parse_str_as_multibyte ((const unsigned char *) data, size, &chars, &bytes); | ||
| 6361 | /* If DATA is a valid UTF-8 string, we can convert it to a Lisp | ||
| 6362 | string directly. Otherwise, we need to decode it. */ | ||
| 6363 | if (chars == size || bytes == size) | ||
| 6364 | return make_specified_string (data, chars, size, true); | ||
| 6365 | else | ||
| 6366 | { | ||
| 6367 | struct coding_system coding; | ||
| 6368 | setup_coding_system (Qutf_8_unix, &coding); | ||
| 6369 | coding.mode |= CODING_MODE_LAST_BLOCK; | ||
| 6370 | coding.source = (const unsigned char *) data; | ||
| 6371 | decode_coding_object (&coding, Qnil, 0, 0, size, size, Qt); | ||
| 6372 | return coding.dst_object; | ||
| 6373 | } | ||
| 6374 | } | ||
| 6356 | 6375 | ||
| 6357 | /* Detect how end-of-line of a text of length SRC_BYTES pointed by | 6376 | /* Detect how end-of-line of a text of length SRC_BYTES pointed by |
| 6358 | SOURCE is encoded. If CATEGORY is one of | 6377 | SOURCE is encoded. If CATEGORY is one of |
diff --git a/src/coding.h b/src/coding.h index 0c03d1a44ed..773df9abb90 100644 --- a/src/coding.h +++ b/src/coding.h | |||
| @@ -695,6 +695,7 @@ extern Lisp_Object raw_text_coding_system (Lisp_Object); | |||
| 695 | extern bool raw_text_coding_system_p (struct coding_system *); | 695 | extern bool raw_text_coding_system_p (struct coding_system *); |
| 696 | extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object); | 696 | extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object); |
| 697 | extern Lisp_Object complement_process_encoding_system (Lisp_Object); | 697 | extern Lisp_Object complement_process_encoding_system (Lisp_Object); |
| 698 | extern Lisp_Object make_utf8_string (const char *, ptrdiff_t); | ||
| 698 | 699 | ||
| 699 | extern void decode_coding_gap (struct coding_system *, | 700 | extern void decode_coding_gap (struct coding_system *, |
| 700 | ptrdiff_t, ptrdiff_t); | 701 | ptrdiff_t, ptrdiff_t); |
| @@ -762,6 +763,17 @@ surrogates_to_codepoint (int low, int high) | |||
| 762 | return 0x10000 + (low - 0xDC00) + ((high - 0xD800) * 0x400); | 763 | return 0x10000 + (low - 0xDC00) + ((high - 0xD800) * 0x400); |
| 763 | } | 764 | } |
| 764 | 765 | ||
| 766 | /* Create a multibyte Lisp string from the NUL-terminated UTF-8 string | ||
| 767 | beginning at DATA. If the string is not a valid UTF-8 string, an | ||
| 768 | unspecified string is returned. */ | ||
| 769 | |||
| 770 | INLINE Lisp_Object | ||
| 771 | build_utf8_string (const char *data) | ||
| 772 | { | ||
| 773 | return make_utf8_string (data, strlen (data)); | ||
| 774 | } | ||
| 775 | |||
| 776 | |||
| 765 | extern Lisp_Object preferred_coding_system (void); | 777 | extern Lisp_Object preferred_coding_system (void); |
| 766 | 778 | ||
| 767 | /* Coding system to be used to encode text for terminal display when | 779 | /* Coding system to be used to encode text for terminal display when |
diff --git a/src/emacs-module.c b/src/emacs-module.c index 80a04bafc2d..b9050942559 100644 --- a/src/emacs-module.c +++ b/src/emacs-module.c | |||
| @@ -223,8 +223,6 @@ static void module_reset_handlerlist (struct handler **); | |||
| 223 | static bool value_storage_contains_p (const struct emacs_value_storage *, | 223 | static bool value_storage_contains_p (const struct emacs_value_storage *, |
| 224 | emacs_value, ptrdiff_t *); | 224 | emacs_value, ptrdiff_t *); |
| 225 | static Lisp_Object module_encode (Lisp_Object); | 225 | static Lisp_Object module_encode (Lisp_Object); |
| 226 | static Lisp_Object module_decode (Lisp_Object); | ||
| 227 | static Lisp_Object module_decode_copy (Lisp_Object); | ||
| 228 | 226 | ||
| 229 | static bool module_assertions = false; | 227 | static bool module_assertions = false; |
| 230 | 228 | ||
| @@ -532,10 +530,7 @@ module_make_function (emacs_env *env, ptrdiff_t min_arity, ptrdiff_t max_arity, | |||
| 532 | function->data = data; | 530 | function->data = data; |
| 533 | 531 | ||
| 534 | if (documentation) | 532 | if (documentation) |
| 535 | { | 533 | function->documentation = build_utf8_string (documentation); |
| 536 | AUTO_STRING (unibyte_doc, documentation); | ||
| 537 | function->documentation = module_decode_copy (unibyte_doc); | ||
| 538 | } | ||
| 539 | 534 | ||
| 540 | Lisp_Object result; | 535 | Lisp_Object result; |
| 541 | XSET_MODULE_FUNCTION (result, function); | 536 | XSET_MODULE_FUNCTION (result, function); |
| @@ -668,8 +663,8 @@ module_make_string (emacs_env *env, const char *str, ptrdiff_t length) | |||
| 668 | MODULE_FUNCTION_BEGIN (NULL); | 663 | MODULE_FUNCTION_BEGIN (NULL); |
| 669 | if (! (0 <= length && length <= STRING_BYTES_BOUND)) | 664 | if (! (0 <= length && length <= STRING_BYTES_BOUND)) |
| 670 | overflow_error (); | 665 | overflow_error (); |
| 671 | Lisp_Object lstr = make_unibyte_string (str, length); | 666 | Lisp_Object lstr = make_utf8_string (str, length); |
| 672 | return lisp_to_value (env, module_decode (lstr)); | 667 | return lisp_to_value (env, lstr); |
| 673 | } | 668 | } |
| 674 | 669 | ||
| 675 | static emacs_value | 670 | static emacs_value |
| @@ -1030,18 +1025,6 @@ module_encode (Lisp_Object string) | |||
| 1030 | return code_convert_string (string, Qutf_8_unix, Qt, true, true, true); | 1025 | return code_convert_string (string, Qutf_8_unix, Qt, true, true, true); |
| 1031 | } | 1026 | } |
| 1032 | 1027 | ||
| 1033 | static Lisp_Object | ||
| 1034 | module_decode (Lisp_Object string) | ||
| 1035 | { | ||
| 1036 | return code_convert_string (string, Qutf_8_unix, Qt, false, true, true); | ||
| 1037 | } | ||
| 1038 | |||
| 1039 | static Lisp_Object | ||
| 1040 | module_decode_copy (Lisp_Object string) | ||
| 1041 | { | ||
| 1042 | return code_convert_string (string, Qutf_8_unix, Qt, false, false, true); | ||
| 1043 | } | ||
| 1044 | |||
| 1045 | 1028 | ||
| 1046 | /* Value conversion. */ | 1029 | /* Value conversion. */ |
| 1047 | 1030 | ||
diff --git a/src/json.c b/src/json.c index 03468e9f338..cc98914423b 100644 --- a/src/json.c +++ b/src/json.c | |||
| @@ -215,47 +215,11 @@ json_has_suffix (const char *string, const char *suffix) | |||
| 215 | 215 | ||
| 216 | #endif | 216 | #endif |
| 217 | 217 | ||
| 218 | /* Create a multibyte Lisp string from the UTF-8 string in | 218 | /* Note that all callers of make_utf8_string and build_utf8_string |
| 219 | [DATA, DATA + SIZE). If the range [DATA, DATA + SIZE) does not | 219 | below either pass only value UTF-8 strings or use the functionf for |
| 220 | contain a valid UTF-8 string, the returned string will include raw | ||
| 221 | bytes. | ||
| 222 | Note that all callers below either pass only value UTF-8 strings or | ||
| 223 | use this function for formatting error messages; in the latter case | ||
| 224 | correctness isn't critical. */ | ||
| 225 | |||
| 226 | static Lisp_Object | ||
| 227 | json_make_string (const char *data, ptrdiff_t size) | ||
| 228 | { | ||
| 229 | ptrdiff_t chars, bytes; | ||
| 230 | parse_str_as_multibyte ((const unsigned char *) data, size, &chars, &bytes); | ||
| 231 | /* If DATA is a valid UTF-8 string, we can convert it to a Lisp | ||
| 232 | string directly. Otherwise, we need to decode it. */ | ||
| 233 | if (chars == size || bytes == size) | ||
| 234 | return make_specified_string (data, chars, size, true); | ||
| 235 | else | ||
| 236 | { | ||
| 237 | struct coding_system coding; | ||
| 238 | setup_coding_system (Qutf_8_unix, &coding); | ||
| 239 | coding.mode |= CODING_MODE_LAST_BLOCK; | ||
| 240 | coding.source = (const unsigned char *) data; | ||
| 241 | decode_coding_object (&coding, Qnil, 0, 0, size, size, Qt); | ||
| 242 | return coding.dst_object; | ||
| 243 | } | ||
| 244 | } | ||
| 245 | |||
| 246 | /* Create a multibyte Lisp string from the NUL-terminated UTF-8 | ||
| 247 | string beginning at DATA. If the string is not a valid UTF-8 | ||
| 248 | string, an unspecified string is returned. Note that all callers | ||
| 249 | below either pass only value UTF-8 strings or use this function for | ||
| 250 | formatting error messages; in the latter case correctness isn't | 220 | formatting error messages; in the latter case correctness isn't |
| 251 | critical. */ | 221 | critical. */ |
| 252 | 222 | ||
| 253 | static Lisp_Object | ||
| 254 | json_build_string (const char *data) | ||
| 255 | { | ||
| 256 | return json_make_string (data, strlen (data)); | ||
| 257 | } | ||
| 258 | |||
| 259 | /* Return a unibyte string containing the sequence of UTF-8 encoding | 223 | /* Return a unibyte string containing the sequence of UTF-8 encoding |
| 260 | units of the UTF-8 representation of STRING. If STRING does not | 224 | units of the UTF-8 representation of STRING. If STRING does not |
| 261 | represent a sequence of Unicode scalar values, return a string with | 225 | represent a sequence of Unicode scalar values, return a string with |
| @@ -303,8 +267,8 @@ json_parse_error (const json_error_t *error) | |||
| 303 | symbol = Qjson_parse_error; | 267 | symbol = Qjson_parse_error; |
| 304 | #endif | 268 | #endif |
| 305 | xsignal (symbol, | 269 | xsignal (symbol, |
| 306 | list5 (json_build_string (error->text), | 270 | list5 (build_utf8_string (error->text), |
| 307 | json_build_string (error->source), INT_TO_INTEGER (error->line), | 271 | build_utf8_string (error->source), INT_TO_INTEGER (error->line), |
| 308 | INT_TO_INTEGER (error->column), INT_TO_INTEGER (error->position))); | 272 | INT_TO_INTEGER (error->column), INT_TO_INTEGER (error->position))); |
| 309 | } | 273 | } |
| 310 | 274 | ||
| @@ -648,7 +612,7 @@ usage: (json-serialize OBJECT &rest ARGS) */) | |||
| 648 | json_out_of_memory (); | 612 | json_out_of_memory (); |
| 649 | record_unwind_protect_ptr (json_free, string); | 613 | record_unwind_protect_ptr (json_free, string); |
| 650 | 614 | ||
| 651 | return unbind_to (count, json_build_string (string)); | 615 | return unbind_to (count, build_utf8_string (string)); |
| 652 | } | 616 | } |
| 653 | 617 | ||
| 654 | struct json_buffer_and_size | 618 | struct json_buffer_and_size |
| @@ -855,7 +819,7 @@ json_to_lisp (json_t *json, struct json_configuration *conf) | |||
| 855 | case JSON_REAL: | 819 | case JSON_REAL: |
| 856 | return make_float (json_real_value (json)); | 820 | return make_float (json_real_value (json)); |
| 857 | case JSON_STRING: | 821 | case JSON_STRING: |
| 858 | return json_make_string (json_string_value (json), | 822 | return make_utf8_string (json_string_value (json), |
| 859 | json_string_length (json)); | 823 | json_string_length (json)); |
| 860 | case JSON_ARRAY: | 824 | case JSON_ARRAY: |
| 861 | { | 825 | { |
| @@ -915,7 +879,7 @@ json_to_lisp (json_t *json, struct json_configuration *conf) | |||
| 915 | json_t *value; | 879 | json_t *value; |
| 916 | json_object_foreach (json, key_str, value) | 880 | json_object_foreach (json, key_str, value) |
| 917 | { | 881 | { |
| 918 | Lisp_Object key = json_build_string (key_str); | 882 | Lisp_Object key = build_utf8_string (key_str); |
| 919 | EMACS_UINT hash; | 883 | EMACS_UINT hash; |
| 920 | ptrdiff_t i = hash_lookup (h, key, &hash); | 884 | ptrdiff_t i = hash_lookup (h, key, &hash); |
| 921 | /* Keys in JSON objects are unique, so the key can't | 885 | /* Keys in JSON objects are unique, so the key can't |
| @@ -932,7 +896,7 @@ json_to_lisp (json_t *json, struct json_configuration *conf) | |||
| 932 | json_t *value; | 896 | json_t *value; |
| 933 | json_object_foreach (json, key_str, value) | 897 | json_object_foreach (json, key_str, value) |
| 934 | { | 898 | { |
| 935 | Lisp_Object key = Fintern (json_build_string (key_str), Qnil); | 899 | Lisp_Object key = Fintern (build_utf8_string (key_str), Qnil); |
| 936 | result | 900 | result |
| 937 | = Fcons (Fcons (key, json_to_lisp (value, conf)), | 901 | = Fcons (Fcons (key, json_to_lisp (value, conf)), |
| 938 | result); | 902 | result); |