diff options
| author | Ken Raeburn | 2016-10-30 09:05:46 -0400 |
|---|---|---|
| committer | Ken Raeburn | 2017-06-21 22:34:33 -0400 |
| commit | b91455633b03add918af3eb166ac797fd6c95722 (patch) | |
| tree | 8291ac7adfcf7f60d3fe7560db0d90175122c8a4 /src | |
| parent | efe200c10da02db68c6eeadc3cd82a8cc3108c96 (diff) | |
| download | emacs-b91455633b03add918af3eb166ac797fd6c95722.tar.gz emacs-b91455633b03add918af3eb166ac797fd6c95722.zip | |
Replace read_objects assoc list with two hash tables.
For larger input files with lots of shared data structures, an
association list is too slow.
* src/lread.c (read_objects_map, read_objects_completed): New
variables, replacing read_objects.
(readevalloop): Initialize them with hash tables before starting a
top-level read, if they're not already empty hash tables, and reset
them to Qnil afterwards if something was added to the hash tables.
(read_internal_start): Likewise.
(read1): Store first the placeholder and later the newly read object
into read_objects_map under the specified object number. If the new
object can contain a reference to itself, store it in
read_objects_completed.
(substitute_objects_recurse): Check read_objects_completed instead of
read_objects for the known possibly-recursive objects.
(syms_of_lread): Update initializations.
Diffstat (limited to 'src')
| -rw-r--r-- | src/lread.c | 125 |
1 files changed, 107 insertions, 18 deletions
diff --git a/src/lread.c b/src/lread.c index b4ee3015e5d..d6a7e55b98a 100644 --- a/src/lread.c +++ b/src/lread.c | |||
| @@ -76,11 +76,36 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ | |||
| 76 | #define getc_unlocked getc | 76 | #define getc_unlocked getc |
| 77 | #endif | 77 | #endif |
| 78 | 78 | ||
| 79 | /* The association list of objects read with the #n=object form. | 79 | /* The objects or placeholders read with the #n=object form. |
| 80 | Each member of the list has the form (n . object), and is used to | 80 | |
| 81 | look up the object for the corresponding #n# construct. | 81 | A hash table maps a number to either a placeholder (while the |
| 82 | It must be set to nil before all top-level calls to read0. */ | 82 | object is still being parsed, in case it's referenced within its |
| 83 | static Lisp_Object read_objects; | 83 | own definition) or to the completed object. With small integers |
| 84 | for keys, it's effectively little more than a vector, but it'll | ||
| 85 | manage any needed resizing for us. | ||
| 86 | |||
| 87 | The variable must be reset to an empty hash table before all | ||
| 88 | top-level calls to read0. In between calls, it may be an empty | ||
| 89 | hash table left unused from the previous call (to reduce | ||
| 90 | allocations), or nil. */ | ||
| 91 | static Lisp_Object read_objects_map; | ||
| 92 | |||
| 93 | /* The recursive objects read with the #n=object form. | ||
| 94 | |||
| 95 | Objects that might have circular references are stored here, so | ||
| 96 | that recursive substitution knows not to keep processing them | ||
| 97 | multiple times. | ||
| 98 | |||
| 99 | Only objects that are completely processed, including substituting | ||
| 100 | references to themselves (but not necessarily replacing | ||
| 101 | placeholders for other objects still being read), are stored. | ||
| 102 | |||
| 103 | A hash table is used for efficient lookups of keys. We don't care | ||
| 104 | what the value slots hold. The variable must be set to an empty | ||
| 105 | hash table before all top-level calls to read0. In between calls, | ||
| 106 | it may be an empty hash table left unused from the previous call | ||
| 107 | (to reduce allocations), or nil. */ | ||
| 108 | static Lisp_Object read_objects_completed; | ||
| 84 | 109 | ||
| 85 | /* File for get_file_char to read from. Use by load. */ | 110 | /* File for get_file_char to read from. Use by load. */ |
| 86 | static FILE *instream; | 111 | static FILE *instream; |
| @@ -1912,6 +1937,18 @@ readevalloop (Lisp_Object readcharfun, | |||
| 1912 | || c == NO_BREAK_SPACE) | 1937 | || c == NO_BREAK_SPACE) |
| 1913 | goto read_next; | 1938 | goto read_next; |
| 1914 | 1939 | ||
| 1940 | if (! HASH_TABLE_P (read_objects_map) | ||
| 1941 | || XHASH_TABLE (read_objects_map)->count) | ||
| 1942 | read_objects_map | ||
| 1943 | = make_hash_table (hashtest_eq, DEFAULT_HASH_SIZE, | ||
| 1944 | DEFAULT_REHASH_SIZE, DEFAULT_REHASH_THRESHOLD, | ||
| 1945 | Qnil, Qnil); | ||
| 1946 | if (! HASH_TABLE_P (read_objects_completed) | ||
| 1947 | || XHASH_TABLE (read_objects_completed)->count) | ||
| 1948 | read_objects_completed | ||
| 1949 | = make_hash_table (hashtest_eq, DEFAULT_HASH_SIZE, | ||
| 1950 | DEFAULT_REHASH_SIZE, DEFAULT_REHASH_THRESHOLD, | ||
| 1951 | Qnil, Qnil); | ||
| 1915 | if (!NILP (Vpurify_flag) && c == '(') | 1952 | if (!NILP (Vpurify_flag) && c == '(') |
| 1916 | { | 1953 | { |
| 1917 | val = read_list (0, readcharfun); | 1954 | val = read_list (0, readcharfun); |
| @@ -1919,7 +1956,6 @@ readevalloop (Lisp_Object readcharfun, | |||
| 1919 | else | 1956 | else |
| 1920 | { | 1957 | { |
| 1921 | UNREAD (c); | 1958 | UNREAD (c); |
| 1922 | read_objects = Qnil; | ||
| 1923 | if (!NILP (readfun)) | 1959 | if (!NILP (readfun)) |
| 1924 | { | 1960 | { |
| 1925 | val = call1 (readfun, readcharfun); | 1961 | val = call1 (readfun, readcharfun); |
| @@ -1939,6 +1975,13 @@ readevalloop (Lisp_Object readcharfun, | |||
| 1939 | else | 1975 | else |
| 1940 | val = read_internal_start (readcharfun, Qnil, Qnil); | 1976 | val = read_internal_start (readcharfun, Qnil, Qnil); |
| 1941 | } | 1977 | } |
| 1978 | /* Empty hashes can be reused; otherwise, reset on next call. */ | ||
| 1979 | if (HASH_TABLE_P (read_objects_map) | ||
| 1980 | && XHASH_TABLE (read_objects_map)->count > 0) | ||
| 1981 | read_objects_map = Qnil; | ||
| 1982 | if (HASH_TABLE_P (read_objects_completed) | ||
| 1983 | && XHASH_TABLE (read_objects_completed)->count > 0) | ||
| 1984 | read_objects_completed = Qnil; | ||
| 1942 | 1985 | ||
| 1943 | if (!NILP (start) && continue_reading_p) | 1986 | if (!NILP (start) && continue_reading_p) |
| 1944 | start = Fpoint_marker (); | 1987 | start = Fpoint_marker (); |
| @@ -2110,7 +2153,18 @@ read_internal_start (Lisp_Object stream, Lisp_Object start, Lisp_Object end) | |||
| 2110 | 2153 | ||
| 2111 | readchar_count = 0; | 2154 | readchar_count = 0; |
| 2112 | new_backquote_flag = 0; | 2155 | new_backquote_flag = 0; |
| 2113 | read_objects = Qnil; | 2156 | /* We can get called from readevalloop which may have set these |
| 2157 | already. */ | ||
| 2158 | if (! HASH_TABLE_P (read_objects_map) | ||
| 2159 | || XHASH_TABLE (read_objects_map)->count) | ||
| 2160 | read_objects_map | ||
| 2161 | = make_hash_table (hashtest_eq, DEFAULT_HASH_SIZE, DEFAULT_REHASH_SIZE, | ||
| 2162 | DEFAULT_REHASH_THRESHOLD, Qnil, Qnil); | ||
| 2163 | if (! HASH_TABLE_P (read_objects_completed) | ||
| 2164 | || XHASH_TABLE (read_objects_completed)->count) | ||
| 2165 | read_objects_completed | ||
| 2166 | = make_hash_table (hashtest_eq, DEFAULT_HASH_SIZE, DEFAULT_REHASH_SIZE, | ||
| 2167 | DEFAULT_REHASH_THRESHOLD, Qnil, Qnil); | ||
| 2114 | if (EQ (Vread_with_symbol_positions, Qt) | 2168 | if (EQ (Vread_with_symbol_positions, Qt) |
| 2115 | || EQ (Vread_with_symbol_positions, stream)) | 2169 | || EQ (Vread_with_symbol_positions, stream)) |
| 2116 | Vread_symbol_positions_list = Qnil; | 2170 | Vread_symbol_positions_list = Qnil; |
| @@ -2138,6 +2192,13 @@ read_internal_start (Lisp_Object stream, Lisp_Object start, Lisp_Object end) | |||
| 2138 | if (EQ (Vread_with_symbol_positions, Qt) | 2192 | if (EQ (Vread_with_symbol_positions, Qt) |
| 2139 | || EQ (Vread_with_symbol_positions, stream)) | 2193 | || EQ (Vread_with_symbol_positions, stream)) |
| 2140 | Vread_symbol_positions_list = Fnreverse (Vread_symbol_positions_list); | 2194 | Vread_symbol_positions_list = Fnreverse (Vread_symbol_positions_list); |
| 2195 | /* Empty hashes can be reused; otherwise, reset on next call. */ | ||
| 2196 | if (HASH_TABLE_P (read_objects_map) | ||
| 2197 | && XHASH_TABLE (read_objects_map)->count > 0) | ||
| 2198 | read_objects_map = Qnil; | ||
| 2199 | if (HASH_TABLE_P (read_objects_completed) | ||
| 2200 | && XHASH_TABLE (read_objects_completed)->count > 0) | ||
| 2201 | read_objects_completed = Qnil; | ||
| 2141 | return retval; | 2202 | return retval; |
| 2142 | } | 2203 | } |
| 2143 | 2204 | ||
| @@ -2978,7 +3039,7 @@ read1 (Lisp_Object readcharfun, int *pch, bool first_in_list) | |||
| 2978 | /* Note: We used to use AUTO_CONS to allocate | 3039 | /* Note: We used to use AUTO_CONS to allocate |
| 2979 | placeholder, but that is a bad idea, since it | 3040 | placeholder, but that is a bad idea, since it |
| 2980 | will place a stack-allocated cons cell into | 3041 | will place a stack-allocated cons cell into |
| 2981 | the list in read_objects, which is a | 3042 | the list in read_objects_map, which is a |
| 2982 | staticpro'd global variable, and thus each of | 3043 | staticpro'd global variable, and thus each of |
| 2983 | its elements is marked during each GC. A | 3044 | its elements is marked during each GC. A |
| 2984 | stack-allocated object will become garbled | 3045 | stack-allocated object will become garbled |
| @@ -2987,12 +3048,34 @@ read1 (Lisp_Object readcharfun, int *pch, bool first_in_list) | |||
| 2987 | different purposes, which will cause crashes | 3048 | different purposes, which will cause crashes |
| 2988 | in GC. */ | 3049 | in GC. */ |
| 2989 | Lisp_Object placeholder = Fcons (Qnil, Qnil); | 3050 | Lisp_Object placeholder = Fcons (Qnil, Qnil); |
| 2990 | Lisp_Object cell = Fcons (make_number (n), placeholder); | 3051 | struct Lisp_Hash_Table *h |
| 2991 | read_objects = Fcons (cell, read_objects); | 3052 | = XHASH_TABLE (read_objects_map); |
| 3053 | EMACS_UINT hash; | ||
| 3054 | Lisp_Object number = make_number (n); | ||
| 3055 | |||
| 3056 | ptrdiff_t i = hash_lookup (h, number, &hash); | ||
| 3057 | if (i >= 0) | ||
| 3058 | /* Not normal, but input could be malformed. */ | ||
| 3059 | set_hash_value_slot (h, i, placeholder); | ||
| 3060 | else | ||
| 3061 | hash_put (h, number, placeholder, hash); | ||
| 2992 | 3062 | ||
| 2993 | /* Read the object itself. */ | 3063 | /* Read the object itself. */ |
| 2994 | tem = read0 (readcharfun); | 3064 | tem = read0 (readcharfun); |
| 2995 | 3065 | ||
| 3066 | /* If it can be recursive, remember it for | ||
| 3067 | future substitutions. */ | ||
| 3068 | if (! SYMBOLP (tem) | ||
| 3069 | && ! NUMBERP (tem) | ||
| 3070 | && ! (STRINGP (tem) && !string_intervals (tem))) | ||
| 3071 | { | ||
| 3072 | struct Lisp_Hash_Table *h2 | ||
| 3073 | = XHASH_TABLE (read_objects_completed); | ||
| 3074 | i = hash_lookup (h2, tem, &hash); | ||
| 3075 | eassert (i < 0); | ||
| 3076 | hash_put (h2, tem, Qnil, hash); | ||
| 3077 | } | ||
| 3078 | |||
| 2996 | /* Now put it everywhere the placeholder was... */ | 3079 | /* Now put it everywhere the placeholder was... */ |
| 2997 | if (CONSP (tem)) | 3080 | if (CONSP (tem)) |
| 2998 | { | 3081 | { |
| @@ -3005,7 +3088,9 @@ read1 (Lisp_Object readcharfun, int *pch, bool first_in_list) | |||
| 3005 | Fsubstitute_object_in_subtree (tem, placeholder); | 3088 | Fsubstitute_object_in_subtree (tem, placeholder); |
| 3006 | 3089 | ||
| 3007 | /* ...and #n# will use the real value from now on. */ | 3090 | /* ...and #n# will use the real value from now on. */ |
| 3008 | Fsetcdr (cell, tem); | 3091 | i = hash_lookup (h, number, &hash); |
| 3092 | eassert (i >= 0); | ||
| 3093 | set_hash_value_slot (h, i, tem); | ||
| 3009 | 3094 | ||
| 3010 | return tem; | 3095 | return tem; |
| 3011 | } | 3096 | } |
| @@ -3014,9 +3099,11 @@ read1 (Lisp_Object readcharfun, int *pch, bool first_in_list) | |||
| 3014 | /* #n# returns a previously read object. */ | 3099 | /* #n# returns a previously read object. */ |
| 3015 | if (c == '#') | 3100 | if (c == '#') |
| 3016 | { | 3101 | { |
| 3017 | tem = Fassq (make_number (n), read_objects); | 3102 | struct Lisp_Hash_Table *h |
| 3018 | if (CONSP (tem)) | 3103 | = XHASH_TABLE (read_objects_map); |
| 3019 | return XCDR (tem); | 3104 | ptrdiff_t i = hash_lookup (h, make_number (n), NULL); |
| 3105 | if (i >= 0) | ||
| 3106 | return HASH_VALUE (h, i); | ||
| 3020 | } | 3107 | } |
| 3021 | } | 3108 | } |
| 3022 | } | 3109 | } |
| @@ -3441,8 +3528,8 @@ substitute_object_recurse (Lisp_Object object, Lisp_Object placeholder, Lisp_Obj | |||
| 3441 | /* If this node can be the entry point to a cycle, remember that | 3528 | /* If this node can be the entry point to a cycle, remember that |
| 3442 | we've seen it. It can only be such an entry point if it was made | 3529 | we've seen it. It can only be such an entry point if it was made |
| 3443 | by #n=, which means that we can find it as a value in | 3530 | by #n=, which means that we can find it as a value in |
| 3444 | read_objects. */ | 3531 | read_objects_completed. */ |
| 3445 | if (!EQ (Qnil, Frassq (subtree, read_objects))) | 3532 | if (hash_lookup (XHASH_TABLE (read_objects_completed), subtree, NULL) >= 0) |
| 3446 | seen_list = Fcons (subtree, seen_list); | 3533 | seen_list = Fcons (subtree, seen_list); |
| 3447 | 3534 | ||
| 3448 | /* Recurse according to subtree's type. | 3535 | /* Recurse according to subtree's type. |
| @@ -4917,8 +5004,10 @@ that are loaded before your customizations are read! */); | |||
| 4917 | DEFSYM (Qdir_ok, "dir-ok"); | 5004 | DEFSYM (Qdir_ok, "dir-ok"); |
| 4918 | DEFSYM (Qdo_after_load_evaluation, "do-after-load-evaluation"); | 5005 | DEFSYM (Qdo_after_load_evaluation, "do-after-load-evaluation"); |
| 4919 | 5006 | ||
| 4920 | staticpro (&read_objects); | 5007 | staticpro (&read_objects_map); |
| 4921 | read_objects = Qnil; | 5008 | read_objects_map = Qnil; |
| 5009 | staticpro (&read_objects_completed); | ||
| 5010 | read_objects_completed = Qnil; | ||
| 4922 | staticpro (&seen_list); | 5011 | staticpro (&seen_list); |
| 4923 | seen_list = Qnil; | 5012 | seen_list = Qnil; |
| 4924 | 5013 | ||