summaryrefslogtreecommitdiff
path: root/lib/util/hash_table.h
diff options
context:
space:
mode:
authorMatt Turner <mattst88@gmail.com>2020-04-19 16:01:22 -0700
committerDavid Oberhollenzer <goliath@infraroot.at>2020-04-22 14:48:46 +0200
commit20756f4354f333005bc59a2d07593d5d1429d287 (patch)
treedf1e0475927fa7b395c20ed9e777011f9f592b63 /lib/util/hash_table.h
parent566f67ce915f9175ed9075bb1d6c553249c9a426 (diff)
Import and use Mesa's hash table
With `perf record`/`perf report` I saw that 30% of the time was spent in `sqfs_frag_table_find_tail_end` with tar2sqfs for a tarball containing the Gentoo ebuild repository (many thousands of small files). The reason was the bucketing hash table in frag_table.c: too many elements in too few buckets meant lots of walking over the linked lists. This patch replaces that hash table with the hash table implementation from Mesa. Its implementation is more complex (is is an open-addressing, linear-reprobing) hash table, but it is much better suited for the task. On my 4c/8t Skylake, the time to run tar2sqfs drops from 7.5s to less than 3s. CPU usage increases from ~207% to ~356%, presumably indicating an increase in available parallelism due to the removal of the hash table as a bottleneck. The `perf report` profile with this patch shows that the time spent in `sqfs_frag_table_find_tail_end` has dropped from ~30% to 0.01%. Output from ministat: x before + after N Min Max Median Avg Stddev x 20 7.476 7.685 7.5725 7.5615 0.051254268 + 20 2.79 2.901 2.846 2.84475 0.03543842 Difference at 95.0% confidence -4.71675 +/- 0.0282015 -62.3785% +/- 0.241477% (Student's t, pooled s = 0.0440618) I imported only the bits of the hash table implementation that were needed for frag_table.c. Among the changes I made after importing are - removed usage of ralloc, Mesa's recursive memory allocator - Replaced ralloc -> malloc ralloc_free -> free rzalloc_array -> calloc - Removed mem_ctx parameters - Added free()s to the appropriate places (valgrind confirms there are no leaks) - removed _mesa_-prefix from function names Fixes: #40 Signed-off-by: Matt Turner <mattst88@gmail.com>
Diffstat (limited to 'lib/util/hash_table.h')
-rw-r--r--lib/util/hash_table.h86
1 files changed, 86 insertions, 0 deletions
diff --git a/lib/util/hash_table.h b/lib/util/hash_table.h
new file mode 100644
index 0000000..ccbd9c0
--- /dev/null
+++ b/lib/util/hash_table.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright © 2009,2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#ifndef _HASH_TABLE_H
+#define _HASH_TABLE_H
+
+#include <stdlib.h>
+#include <inttypes.h>
+#include <stdbool.h>
+
+struct hash_entry {
+ uint32_t hash;
+ const void *key;
+ void *data;
+};
+
+struct hash_table {
+ struct hash_entry *table;
+ uint32_t (*key_hash_function)(const void *key);
+ bool (*key_equals_function)(const void *a, const void *b);
+ const void *deleted_key;
+ uint32_t size;
+ uint32_t rehash;
+ uint64_t size_magic;
+ uint64_t rehash_magic;
+ uint32_t max_entries;
+ uint32_t size_index;
+ uint32_t entries;
+ uint32_t deleted_entries;
+};
+
+struct hash_table *
+hash_table_create(uint32_t (*key_hash_function)(const void *key),
+ bool (*key_equals_function)(const void *a,
+ const void *b));
+
+struct hash_table *
+hash_table_clone(struct hash_table *src);
+void hash_table_destroy(struct hash_table *ht,
+ void (*delete_function)(struct hash_entry *entry));
+
+struct hash_entry *
+hash_table_insert_pre_hashed(struct hash_table *ht, uint32_t hash,
+ const void *key, void *data);
+struct hash_entry *
+hash_table_search_pre_hashed(struct hash_table *ht, uint32_t hash,
+ const void *key);
+
+struct hash_entry *hash_table_next_entry(struct hash_table *ht,
+ struct hash_entry *entry);
+
+/**
+ * This foreach function is safe against deletion (which just replaces
+ * an entry's data with the deleted marker), but not against insertion
+ * (which may rehash the table, making entry a dangling pointer).
+ */
+#define hash_table_foreach(ht, entry) \
+ for (struct hash_entry *entry = hash_table_next_entry(ht, NULL); \
+ entry != NULL; \
+ entry = hash_table_next_entry(ht, entry))
+
+#endif /* _HASH_TABLE_H */