diff options
| author | Mattias EngdegÄrd | 2023-11-04 18:21:06 +0100 |
|---|---|---|
| committer | Mattias EngdegÄrd | 2024-01-13 20:50:38 +0100 |
| commit | ed06de52a53135ee42e528496fdddbf3d74b0479 (patch) | |
| tree | ee3528ede552b36163055c0fb75da21ac539dff9 | |
| parent | 47502c55b0ce2e4cd3f43fefb77d9c2c11ed7c0a (diff) | |
| download | emacs-ed06de52a53135ee42e528496fdddbf3d74b0479.tar.gz emacs-ed06de52a53135ee42e528496fdddbf3d74b0479.zip | |
Faster hash table growth, starting at zero size
The algorithms no longer use the rehash_threshold and rehash_size
float constants, but vary depending on size. In particular, the table
now grows faster, especially from smaller sizes.
The default size is now 0, starting empty, which effectively postpones
allocation until the first insertion (unless make-hash-table was
called with a positive :size); this is a clear gain as long as the
table remains empty. The first inserted item will use an initial size
of 8 because most tables are small.
* src/fns.c (std_rehash_size, std_rehash_threshold): Remove.
(hash_index_size): Integer-only computation.
(maybe_resize_hash_table): Grow more aggressively.
(Fhash_table_rehash_size, Fhash_table_rehash_threshold):
Use the constants directly.
* src/lisp.h (DEFAULT_HASH_SIZE): New value.
| -rw-r--r-- | src/fns.c | 60 | ||||
| -rw-r--r-- | src/lisp.h | 2 |
2 files changed, 25 insertions, 37 deletions
| @@ -4508,31 +4508,18 @@ allocate_hash_table (void) | |||
| 4508 | return ALLOCATE_PLAIN_PSEUDOVECTOR (struct Lisp_Hash_Table, PVEC_HASH_TABLE); | 4508 | return ALLOCATE_PLAIN_PSEUDOVECTOR (struct Lisp_Hash_Table, PVEC_HASH_TABLE); |
| 4509 | } | 4509 | } |
| 4510 | 4510 | ||
| 4511 | /* An upper bound on the size of a hash table index. It must fit in | 4511 | /* Compute the size of the index from the table capacity. */ |
| 4512 | ptrdiff_t and be a valid Emacs fixnum. This is an upper bound on | ||
| 4513 | VECTOR_ELTS_MAX (see alloc.c) and gets as close as we can without | ||
| 4514 | violating modularity. */ | ||
| 4515 | #define INDEX_SIZE_BOUND \ | ||
| 4516 | ((ptrdiff_t) min (MOST_POSITIVE_FIXNUM, \ | ||
| 4517 | ((min (PTRDIFF_MAX, SIZE_MAX) \ | ||
| 4518 | - header_size - GCALIGNMENT) \ | ||
| 4519 | / word_size))) | ||
| 4520 | |||
| 4521 | /* Default factor by which to increase the size of a hash table. */ | ||
| 4522 | static const double std_rehash_size = 1.5; | ||
| 4523 | |||
| 4524 | /* Resize hash table when number of entries / table size is >= this | ||
| 4525 | ratio. */ | ||
| 4526 | static const double std_rehash_threshold = 0.8125; | ||
| 4527 | |||
| 4528 | static ptrdiff_t | 4512 | static ptrdiff_t |
| 4529 | hash_index_size (ptrdiff_t size) | 4513 | hash_index_size (ptrdiff_t size) |
| 4530 | { | 4514 | { |
| 4531 | double index_float = size * (1.0 / std_rehash_threshold); | 4515 | /* An upper bound on the size of a hash table index. It must fit in |
| 4532 | ptrdiff_t index_size = (index_float < INDEX_SIZE_BOUND + 1 | 4516 | ptrdiff_t and be a valid Emacs fixnum. */ |
| 4533 | ? next_almost_prime (index_float) | 4517 | ptrdiff_t upper_bound = min (MOST_POSITIVE_FIXNUM, |
| 4534 | : INDEX_SIZE_BOUND + 1); | 4518 | PTRDIFF_MAX / sizeof (ptrdiff_t)); |
| 4535 | if (INDEX_SIZE_BOUND < index_size) | 4519 | ptrdiff_t index_size = size + (size >> 2); /* 1.25x larger */ |
| 4520 | if (index_size < upper_bound) | ||
| 4521 | index_size = next_almost_prime (index_size); | ||
| 4522 | if (index_size > upper_bound) | ||
| 4536 | error ("Hash table too large"); | 4523 | error ("Hash table too large"); |
| 4537 | return index_size; | 4524 | return index_size; |
| 4538 | } | 4525 | } |
| @@ -4671,16 +4658,12 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h) | |||
| 4671 | if (h->next_free < 0) | 4658 | if (h->next_free < 0) |
| 4672 | { | 4659 | { |
| 4673 | ptrdiff_t old_size = HASH_TABLE_SIZE (h); | 4660 | ptrdiff_t old_size = HASH_TABLE_SIZE (h); |
| 4674 | /* FIXME: better growth management, ditch std_rehash_size */ | 4661 | ptrdiff_t base_size = min (max (old_size, 8), PTRDIFF_MAX / 2); |
| 4675 | EMACS_INT new_size = old_size * std_rehash_size; | 4662 | /* Grow aggressively at small sizes, then just double. */ |
| 4676 | if (new_size < EMACS_INT_MAX) | 4663 | ptrdiff_t new_size = |
| 4677 | new_size = max (new_size, 32); /* avoid slow initial growth */ | 4664 | old_size == 0 |
| 4678 | else | 4665 | ? 8 |
| 4679 | new_size = EMACS_INT_MAX; | 4666 | : (base_size <= 64 ? base_size * 4 : base_size * 2); |
| 4680 | if (PTRDIFF_MAX < new_size) | ||
| 4681 | new_size = PTRDIFF_MAX; | ||
| 4682 | if (new_size <= old_size) | ||
| 4683 | new_size = old_size + 1; | ||
| 4684 | 4667 | ||
| 4685 | /* Allocate all the new vectors before updating *H, to | 4668 | /* Allocate all the new vectors before updating *H, to |
| 4686 | avoid problems if memory is exhausted. */ | 4669 | avoid problems if memory is exhausted. */ |
| @@ -4738,7 +4721,7 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h) | |||
| 4738 | 4721 | ||
| 4739 | #ifdef ENABLE_CHECKING | 4722 | #ifdef ENABLE_CHECKING |
| 4740 | if (HASH_TABLE_P (Vpurify_flag) && XHASH_TABLE (Vpurify_flag) == h) | 4723 | if (HASH_TABLE_P (Vpurify_flag) && XHASH_TABLE (Vpurify_flag) == h) |
| 4741 | message ("Growing hash table to: %"pI"d", new_size); | 4724 | message ("Growing hash table to: %"pD"d", new_size); |
| 4742 | #endif | 4725 | #endif |
| 4743 | } | 4726 | } |
| 4744 | } | 4727 | } |
| @@ -5403,7 +5386,8 @@ keys. Default is `eql'. Predefined are the tests `eq', `eql', and | |||
| 5403 | `define-hash-table-test'. | 5386 | `define-hash-table-test'. |
| 5404 | 5387 | ||
| 5405 | :size SIZE -- A hint as to how many elements will be put in the table. | 5388 | :size SIZE -- A hint as to how many elements will be put in the table. |
| 5406 | Default is 65. | 5389 | The table will always grow as needed; this argument may help performance |
| 5390 | slightly if the size is known in advance but is never required. | ||
| 5407 | 5391 | ||
| 5408 | :weakness WEAK -- WEAK must be one of nil, t, `key', `value', | 5392 | :weakness WEAK -- WEAK must be one of nil, t, `key', `value', |
| 5409 | `key-or-value', or `key-and-value'. If WEAK is not nil, the table | 5393 | `key-or-value', or `key-and-value'. If WEAK is not nil, the table |
| @@ -5516,7 +5500,9 @@ DEFUN ("hash-table-rehash-size", Fhash_table_rehash_size, | |||
| 5516 | (Lisp_Object table) | 5500 | (Lisp_Object table) |
| 5517 | { | 5501 | { |
| 5518 | CHECK_HASH_TABLE (table); | 5502 | CHECK_HASH_TABLE (table); |
| 5519 | return make_float (std_rehash_size); | 5503 | /* Nominal factor by which to increase the size of a hash table. |
| 5504 | No longer used; this is for compatibility. */ | ||
| 5505 | return make_float (1.5); | ||
| 5520 | } | 5506 | } |
| 5521 | 5507 | ||
| 5522 | 5508 | ||
| @@ -5526,7 +5512,9 @@ DEFUN ("hash-table-rehash-threshold", Fhash_table_rehash_threshold, | |||
| 5526 | (Lisp_Object table) | 5512 | (Lisp_Object table) |
| 5527 | { | 5513 | { |
| 5528 | CHECK_HASH_TABLE (table); | 5514 | CHECK_HASH_TABLE (table); |
| 5529 | return make_float (std_rehash_threshold); | 5515 | /* Nominal threshold for when to resize a hash table. |
| 5516 | No longer used; this is for compatibility. */ | ||
| 5517 | return make_float (0.8125); | ||
| 5530 | } | 5518 | } |
| 5531 | 5519 | ||
| 5532 | 5520 | ||
diff --git a/src/lisp.h b/src/lisp.h index 658bcd8b780..5b70e96d6a1 100644 --- a/src/lisp.h +++ b/src/lisp.h | |||
| @@ -2591,7 +2591,7 @@ void hash_table_thaw (Lisp_Object hash_table); | |||
| 2591 | 2591 | ||
| 2592 | /* Default size for hash tables if not specified. */ | 2592 | /* Default size for hash tables if not specified. */ |
| 2593 | 2593 | ||
| 2594 | enum DEFAULT_HASH_SIZE { DEFAULT_HASH_SIZE = 65 }; | 2594 | enum DEFAULT_HASH_SIZE { DEFAULT_HASH_SIZE = 0 }; |
| 2595 | 2595 | ||
| 2596 | /* Combine two integers X and Y for hashing. The result might exceed | 2596 | /* Combine two integers X and Y for hashing. The result might exceed |
| 2597 | INTMASK. */ | 2597 | INTMASK. */ |