From fe5102f23da752870c9514d5ffe79b7f067f70c4 Mon Sep 17 00:00:00 2001 From: David Oberhollenzer Date: Wed, 8 May 2019 20:43:38 +0200 Subject: Add string table implementation Signed-off-by: David Oberhollenzer --- include/str_table.h | 76 ++++++++++++++++++++++++++++++ lib/Makemodule.am | 1 + lib/util/str_table.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 205 insertions(+) create mode 100644 include/str_table.h create mode 100644 lib/util/str_table.c diff --git a/include/str_table.h b/include/str_table.h new file mode 100644 index 0000000..b0d83f1 --- /dev/null +++ b/include/str_table.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ +#ifndef STR_TABLE_H +#define STR_TABLE_H + +typedef struct str_bucket_t { + struct str_bucket_t *next; + char *str; + size_t index; +} str_bucket_t; + +/** + * @struct str_table_t + * + * @brief A data structure that manages incremental, unique IDs for strings + * + * A string table allocates IDs for strings, and provides fast lookup for ID by + * string and string by ID. + */ +typedef struct { + str_bucket_t **buckets; + size_t num_buckets; + + char **strings; + size_t num_strings; + size_t max_strings; +} str_table_t; + +/** + * @brief Initialize a string table + * + * @memberof str_table_t + * + * @param size The number of hash table buckets to use internally + * + * @return Zero on success, -1 on failure (reports error to stderr) + */ +int str_table_init(str_table_t *table, size_t size); + +/** + * @brief Free all memory used by a string table + * + * @memberof str_table_t + * + * @param table A pointer to a string table object + */ +void str_table_cleanup(str_table_t *table); + +/** + * @brief Resolve a string to an incremental, unique ID + * + * @memberof str_table_t + * + * If the string is passed to this function for the first time, a new ID + * is allocated for the string. + * + * @param table A pointer to a string table object + * @param str A pointer to a string to resolve + * @param idx Returns the unique ID of the string + * + * @return Zero on success, -1 on failure (reports error to stderr) + */ +int str_table_get_index(str_table_t *table, const char *str, size_t *idx); + +/** + * @brief Resolve a unique ID to the string it represents + * + * @memberof str_table_t + * + * @param table A pointer to a string table object + * @param index The ID to resolve + * + * @return A pointer to the string or NULL if it hasn't been added yet + */ +const char *str_table_get_string(str_table_t *table, size_t index); + +#endif /* STR_TABLE_H */ diff --git a/lib/Makemodule.am b/lib/Makemodule.am index 17004bc..cd24eae 100644 --- a/lib/Makemodule.am +++ b/lib/Makemodule.am @@ -20,6 +20,7 @@ libsquashfs_a_SOURCES += include/frag_reader.h libutil_a_SOURCES = lib/util/canonicalize_name.c lib/util/write_retry.c libutil_a_SOURCES += lib/util/read_retry.c include/util.h libutil_a_SOURCES += lib/util/print_version.c lib/util/mkdir_p.c +libutil_a_SOURCES += lib/util/str_table.c lib/util/str_table.c if WITH_GZIP libcompress_a_SOURCES += lib/comp/gzip.c diff --git a/lib/util/str_table.c b/lib/util/str_table.c new file mode 100644 index 0000000..6363c81 --- /dev/null +++ b/lib/util/str_table.c @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ +#include +#include +#include +#include + +#include "str_table.h" + +/* R5 hash function (borrowed from reiserfs) */ +static uint32_t strhash(const char *s) +{ + const signed char *str = (const signed char *)s; + uint32_t a = 0; + + while (*str != '\0') { + a += *str << 4; + a += *str >> 4; + a *= 11; + str++; + } + + return a; +} + +static int strings_grow(str_table_t *table) +{ + size_t newsz; + void *new; + + if (table->num_strings < table->max_strings) + return 0; + + newsz = table->max_strings ? (table->max_strings * 2) : 16; + new = realloc(table->strings, sizeof(table->strings[0]) * newsz); + + if (new == NULL) { + perror("growing string table"); + return -1; + } + + table->strings = new; + table->max_strings = newsz; + return 0; +} + +int str_table_init(str_table_t *table, size_t size) +{ + table->buckets = calloc(size, sizeof(table->buckets[0])); + table->num_buckets = size; + + if (table->buckets == NULL) { + perror("initializing string table"); + return -1; + } + + return 0; +} + +void str_table_cleanup(str_table_t *table) +{ + str_bucket_t *bucket; + size_t i; + + for (i = 0; i < table->num_buckets; ++i) { + while (table->buckets[i] != NULL) { + bucket = table->buckets[i]; + table->buckets[i] = bucket->next; + + free(bucket->str); + free(bucket); + } + } + + free(table->buckets); + free(table->strings); + memset(table, 0, sizeof(*table)); +} + +int str_table_get_index(str_table_t *table, const char *str, size_t *idx) +{ + str_bucket_t *bucket; + uint32_t hash; + size_t index; + + hash = strhash(str); + index = hash % table->num_buckets; + bucket = table->buckets[index]; + + while (bucket != NULL) { + if (strcmp(bucket->str, str) == 0) { + *idx = bucket->index; + return 0; + } + + bucket = bucket->next; + } + + if (strings_grow(table)) + return -1; + + bucket = calloc(1, sizeof(*bucket)); + if (bucket == NULL) + goto fail_oom; + + bucket->str = strdup(str); + if (bucket->str == NULL) + goto fail_oom; + + bucket->index = table->num_strings; + table->strings[table->num_strings++] = bucket->str; + *idx = bucket->index; + + bucket->next = table->buckets[index]; + table->buckets[index] = bucket; + return 0; +fail_oom: + free(bucket); + perror("allocating hash table bucket"); + return -1; +} + +const char *str_table_get_string(str_table_t *table, size_t index) +{ + if (index >= table->num_strings) + return NULL; + + return table->strings[index]; +} -- cgit v1.2.3