From 257a5737d708b706be29526050f61ded9793d0ab Mon Sep 17 00:00:00 2001 From: David Oberhollenzer Date: Sun, 7 Mar 2021 01:14:13 +0100 Subject: Rewrite the str_table to internally use the more opimized hash_table Signed-off-by: David Oberhollenzer --- include/str_table.h | 27 +++-- lib/sqfs/xattr/xattr_writer.c | 4 +- lib/sqfs/xattr/xattr_writer.h | 2 - lib/sqfs/xattr/xattr_writer_flush.c | 4 +- lib/util/str_table.c | 198 +++++++++++++----------------------- tests/libutil/str_table.c | 2 +- 6 files changed, 95 insertions(+), 142 deletions(-) diff --git a/include/str_table.h b/include/str_table.h index 7ae050e..206fa3a 100644 --- a/include/str_table.h +++ b/include/str_table.h @@ -8,28 +8,37 @@ #define STR_TABLE_H #include "sqfs/predef.h" +#include "array.h" +#include "hash_table.h" -typedef struct str_bucket_t { - struct str_bucket_t *next; - char *str; +typedef struct { size_t index; size_t refcount; + char string[]; } str_bucket_t; /* Stores strings in a hash table and assigns an incremental, unique ID to each string. Subsequent additions return the existing ID. The ID can be used for (hopefully) constant time lookup of the original string. */ typedef struct { - str_bucket_t **buckets; - size_t num_buckets; + /* an array that resolves index to bucket pointer */ + array_t bucket_ptrs; + + /* hash table with the string buckets attached */ + struct hash_table *ht; - char **strings; - size_t num_strings; - size_t max_strings; + /* the next ID we are going to allocate */ + size_t next_index; } str_table_t; +/* the number of strings currently stored in the table */ +static SQFS_INLINE size_t str_table_count(const str_table_t *table) +{ + return table->next_index; +} + /* `size` is the number of hash table buckets to use internally. */ -SQFS_INTERNAL int str_table_init(str_table_t *table, size_t size); +SQFS_INTERNAL int str_table_init(str_table_t *table); SQFS_INTERNAL void str_table_cleanup(str_table_t *table); diff --git a/lib/sqfs/xattr/xattr_writer.c b/lib/sqfs/xattr/xattr_writer.c index e38c2be..c49aaf1 100644 --- a/lib/sqfs/xattr/xattr_writer.c +++ b/lib/sqfs/xattr/xattr_writer.c @@ -95,10 +95,10 @@ sqfs_xattr_writer_t *sqfs_xattr_writer_create(sqfs_u32 flags) if (xwr == NULL) return NULL; - if (str_table_init(&xwr->keys, XATTR_KEY_BUCKETS)) + if (str_table_init(&xwr->keys)) goto fail_keys; - if (str_table_init(&xwr->values, XATTR_VALUE_BUCKETS)) + if (str_table_init(&xwr->values)) goto fail_values; if (array_init(&xwr->kv_pairs, sizeof(sqfs_u64), diff --git a/lib/sqfs/xattr/xattr_writer.h b/lib/sqfs/xattr/xattr_writer.h index ff8479a..6f414b8 100644 --- a/lib/sqfs/xattr/xattr_writer.h +++ b/lib/sqfs/xattr/xattr_writer.h @@ -28,8 +28,6 @@ #include -#define XATTR_KEY_BUCKETS 31 -#define XATTR_VALUE_BUCKETS 511 #define XATTR_INITIAL_PAIR_CAP 128 #define MK_PAIR(key, value) (((sqfs_u64)(key) << 32UL) | (sqfs_u64)(value)) diff --git a/lib/sqfs/xattr/xattr_writer_flush.c b/lib/sqfs/xattr/xattr_writer_flush.c index 85ec724..98d2619 100644 --- a/lib/sqfs/xattr/xattr_writer_flush.c +++ b/lib/sqfs/xattr/xattr_writer_flush.c @@ -200,11 +200,11 @@ static int write_kv_pairs(const sqfs_xattr_writer_t *xwr, size_t i; ool_locations = alloc_array(sizeof(ool_locations[0]), - xwr->values.num_strings); + str_table_count(&xwr->values)); if (ool_locations == NULL) return SQFS_ERROR_ALLOC; - for (i = 0; i < xwr->values.num_strings; ++i) + for (i = 0; i < str_table_count(&xwr->values); ++i) ool_locations[i] = 0xFFFFFFFFFFFFFFFFUL; for (blk = xwr->kv_block_first; blk != NULL; blk = blk->next) { diff --git a/lib/util/str_table.c b/lib/util/str_table.c index 1463546..2816ff8 100644 --- a/lib/util/str_table.c +++ b/lib/util/str_table.c @@ -30,187 +30,133 @@ static sqfs_u32 strhash(const char *s) return a; } -static int strings_grow(str_table_t *table) +static bool key_equals_function(void *user, const void *a, const void *b) { - size_t newsz; - void *new; - - if (table->num_strings < table->max_strings) - return 0; - - newsz = table->max_strings ? (table->max_strings * 2) : 16; - new = realloc(table->strings, sizeof(table->strings[0]) * newsz); - - if (new == NULL) - return SQFS_ERROR_ALLOC; - - table->strings = new; - table->max_strings = newsz; - return 0; + (void)user; + return strcmp(a, b) == 0; } -int str_table_init(str_table_t *table, size_t size) +int str_table_init(str_table_t *table) { memset(table, 0, sizeof(*table)); - table->buckets = alloc_array(size, sizeof(table->buckets[0])); - table->num_buckets = size; + if (array_init(&table->bucket_ptrs, sizeof(str_bucket_t *), 0)) + goto fail_arr; - if (table->buckets == NULL) - return SQFS_ERROR_ALLOC; + table->ht = hash_table_create(NULL, key_equals_function); + if (table->ht == NULL) + goto fail_ht; return 0; +fail_ht: + array_cleanup(&table->bucket_ptrs); +fail_arr: + memset(table, 0, sizeof(*table)); + return SQFS_ERROR_ALLOC; } int str_table_copy(str_table_t *dst, const str_table_t *src) { - str_bucket_t *bucket, *it, **next; - size_t i; + str_bucket_t *bucket, **array; + int ret; - dst->num_strings = src->num_strings; - dst->max_strings = src->max_strings; - dst->strings = calloc(sizeof(dst->strings[0]), src->num_strings); + ret = array_init_copy(&dst->bucket_ptrs, &src->bucket_ptrs); + if (ret != 0) + return ret; - if (dst->strings == NULL) - return -1; - - for (i = 0; i < src->num_strings; ++i) { - dst->strings[i] = strdup(src->strings[i]); - - if (dst->strings[i] == NULL) - goto fail_str; + dst->ht = hash_table_clone(src->ht); + if (dst->ht == NULL) { + array_cleanup(&dst->bucket_ptrs); + return SQFS_ERROR_ALLOC; } - dst->num_buckets = src->num_buckets; - dst->buckets = calloc(sizeof(dst->buckets[0]), src->num_buckets); + array = (str_bucket_t **)dst->bucket_ptrs.data; - if (dst->buckets == NULL) - goto fail_str; - - for (i = 0; i < src->num_buckets; ++i) { - next = &(dst->buckets[i]); + hash_table_foreach(dst->ht, ent) { + bucket = alloc_flex(sizeof(*bucket), 1, strlen(ent->key) + 1); + if (bucket == NULL) { + str_table_cleanup(dst); + return SQFS_ERROR_ALLOC; + } - for (it = src->buckets[i]; it != NULL; it = it->next) { - bucket = malloc(sizeof(*bucket)); - if (bucket == NULL) - goto fail_buck; + memcpy(bucket, ent->data, + sizeof(*bucket) + strlen(ent->key) + 1); - bucket->next = NULL; - bucket->index = it->index; - bucket->refcount = it->refcount; - bucket->str = dst->strings[bucket->index]; + ent->data = bucket; + ent->key = bucket->string; - *next = bucket; - next = &(bucket->next); - } + array[bucket->index] = bucket; } return 0; -fail_buck: - for (i = 0; i < dst->num_buckets; ++i) { - while (dst->buckets[i] != NULL) { - bucket = dst->buckets[i]; - dst->buckets[i] = bucket->next; - - free(bucket); - } - } - free(dst->buckets); -fail_str: - for (i = 0; i < dst->num_strings; ++i) - free(dst->strings[i]); - - free(dst->strings); - dst->strings = NULL; - return -1; } void str_table_cleanup(str_table_t *table) { - str_bucket_t *bucket; - size_t i; - - for (i = 0; i < table->num_buckets; ++i) { - while (table->buckets[i] != NULL) { - bucket = table->buckets[i]; - table->buckets[i] = bucket->next; - - free(bucket->str); - free(bucket); - } + hash_table_foreach(table->ht, ent) { + free(ent->data); + ent->data = NULL; + ent->key = NULL; } - free(table->buckets); - free(table->strings); + hash_table_destroy(table->ht, NULL); + array_cleanup(&table->bucket_ptrs); memset(table, 0, sizeof(*table)); } int str_table_get_index(str_table_t *table, const char *str, size_t *idx) { - str_bucket_t *bucket; + struct hash_entry *ent; + str_bucket_t *new; sqfs_u32 hash; - size_t index; - int err; hash = strhash(str); - index = hash % table->num_buckets; - bucket = table->buckets[index]; - - while (bucket != NULL) { - if (strcmp(bucket->str, str) == 0) { - *idx = bucket->index; - return 0; - } + ent = hash_table_search_pre_hashed(table->ht, hash, str); - bucket = bucket->next; + if (ent != NULL) { + *idx = ((str_bucket_t *)ent->data)->index; + return 0; } - err = strings_grow(table); - if (err) - return err; + new = alloc_flex(sizeof(*new), 1, strlen(str) + 1); + if (new == NULL) + return SQFS_ERROR_ALLOC; - bucket = calloc(1, sizeof(*bucket)); - if (bucket == NULL) - goto fail_oom; + new->index = table->next_index; + strcpy(new->string, str); - bucket->str = strdup(str); - if (bucket->str == NULL) - goto fail_oom; + ent = hash_table_insert_pre_hashed(table->ht, hash, str, new); + if (ent == NULL) { + free(new); + return SQFS_ERROR_ALLOC; + } + + ent->key = new->string; - bucket->index = table->num_strings; - table->strings[table->num_strings++] = bucket->str; - *idx = bucket->index; + if (array_append(&table->bucket_ptrs, &new) != 0) { + free(new); + ent->key = NULL; + ent->data = NULL; + return SQFS_ERROR_ALLOC; + } - bucket->next = table->buckets[index]; - table->buckets[index] = bucket; + *idx = table->next_index++; return 0; -fail_oom: - free(bucket); - return SQFS_ERROR_ALLOC; } -const char *str_table_get_string(const str_table_t *table, size_t index) +static str_bucket_t *bucket_by_index(const str_table_t *table, size_t index) { - if (index >= table->num_strings) + if (index >= table->bucket_ptrs.used) return NULL; - return table->strings[index]; + return ((str_bucket_t **)table->bucket_ptrs.data)[index]; } -static str_bucket_t *bucket_by_index(const str_table_t *table, size_t index) +const char *str_table_get_string(const str_table_t *table, size_t index) { - str_bucket_t *bucket = NULL; - sqfs_u32 hash; - - if (index < table->num_strings) { - hash = strhash(table->strings[index]); - bucket = table->buckets[hash % table->num_buckets]; - - while (bucket != NULL && bucket->index != index) - bucket = bucket->next; - } + str_bucket_t *bucket = bucket_by_index(table, index); - return bucket; + return bucket == NULL ? NULL : bucket->string; } void str_table_add_ref(str_table_t *table, size_t index) diff --git a/tests/libutil/str_table.c b/tests/libutil/str_table.c index 83526af..9baba27 100644 --- a/tests/libutil/str_table.c +++ b/tests/libutil/str_table.c @@ -45,7 +45,7 @@ int main(void) if (read_strings()) return EXIT_FAILURE; - TEST_ASSERT(str_table_init(&table, 64) == 0); + TEST_ASSERT(str_table_init(&table) == 0); for (i = 0; i < 1000; ++i) { TEST_ASSERT(str_table_get_index(&table, strings[i], &idx) == 0); -- cgit v1.2.3