aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2019-05-08 20:43:38 +0200
committerDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2019-05-19 14:27:34 +0200
commitfe5102f23da752870c9514d5ffe79b7f067f70c4 (patch)
tree7dd19dd16dfa79eebe5f78a847289c914454790d
parent087bdaa6f4c597626cc547b59b7764405f16c4a2 (diff)
Add string table implementation
Signed-off-by: David Oberhollenzer <david.oberhollenzer@sigma-star.at>
-rw-r--r--include/str_table.h76
-rw-r--r--lib/Makemodule.am1
-rw-r--r--lib/util/str_table.c128
3 files changed, 205 insertions, 0 deletions
diff --git a/include/str_table.h b/include/str_table.h
new file mode 100644
index 0000000..b0d83f1
--- /dev/null
+++ b/include/str_table.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+#ifndef STR_TABLE_H
+#define STR_TABLE_H
+
+typedef struct str_bucket_t {
+ struct str_bucket_t *next;
+ char *str;
+ size_t index;
+} str_bucket_t;
+
+/**
+ * @struct str_table_t
+ *
+ * @brief A data structure that manages incremental, unique IDs for strings
+ *
+ * A string table allocates IDs for strings, and provides fast lookup for ID by
+ * string and string by ID.
+ */
+typedef struct {
+ str_bucket_t **buckets;
+ size_t num_buckets;
+
+ char **strings;
+ size_t num_strings;
+ size_t max_strings;
+} str_table_t;
+
+/**
+ * @brief Initialize a string table
+ *
+ * @memberof str_table_t
+ *
+ * @param size The number of hash table buckets to use internally
+ *
+ * @return Zero on success, -1 on failure (reports error to stderr)
+ */
+int str_table_init(str_table_t *table, size_t size);
+
+/**
+ * @brief Free all memory used by a string table
+ *
+ * @memberof str_table_t
+ *
+ * @param table A pointer to a string table object
+ */
+void str_table_cleanup(str_table_t *table);
+
+/**
+ * @brief Resolve a string to an incremental, unique ID
+ *
+ * @memberof str_table_t
+ *
+ * If the string is passed to this function for the first time, a new ID
+ * is allocated for the string.
+ *
+ * @param table A pointer to a string table object
+ * @param str A pointer to a string to resolve
+ * @param idx Returns the unique ID of the string
+ *
+ * @return Zero on success, -1 on failure (reports error to stderr)
+ */
+int str_table_get_index(str_table_t *table, const char *str, size_t *idx);
+
+/**
+ * @brief Resolve a unique ID to the string it represents
+ *
+ * @memberof str_table_t
+ *
+ * @param table A pointer to a string table object
+ * @param index The ID to resolve
+ *
+ * @return A pointer to the string or NULL if it hasn't been added yet
+ */
+const char *str_table_get_string(str_table_t *table, size_t index);
+
+#endif /* STR_TABLE_H */
diff --git a/lib/Makemodule.am b/lib/Makemodule.am
index 17004bc..cd24eae 100644
--- a/lib/Makemodule.am
+++ b/lib/Makemodule.am
@@ -20,6 +20,7 @@ libsquashfs_a_SOURCES += include/frag_reader.h
libutil_a_SOURCES = lib/util/canonicalize_name.c lib/util/write_retry.c
libutil_a_SOURCES += lib/util/read_retry.c include/util.h
libutil_a_SOURCES += lib/util/print_version.c lib/util/mkdir_p.c
+libutil_a_SOURCES += lib/util/str_table.c lib/util/str_table.c
if WITH_GZIP
libcompress_a_SOURCES += lib/comp/gzip.c
diff --git a/lib/util/str_table.c b/lib/util/str_table.c
new file mode 100644
index 0000000..6363c81
--- /dev/null
+++ b/lib/util/str_table.c
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "str_table.h"
+
+/* R5 hash function (borrowed from reiserfs) */
+static uint32_t strhash(const char *s)
+{
+ const signed char *str = (const signed char *)s;
+ uint32_t a = 0;
+
+ while (*str != '\0') {
+ a += *str << 4;
+ a += *str >> 4;
+ a *= 11;
+ str++;
+ }
+
+ return a;
+}
+
+static int strings_grow(str_table_t *table)
+{
+ size_t newsz;
+ void *new;
+
+ if (table->num_strings < table->max_strings)
+ return 0;
+
+ newsz = table->max_strings ? (table->max_strings * 2) : 16;
+ new = realloc(table->strings, sizeof(table->strings[0]) * newsz);
+
+ if (new == NULL) {
+ perror("growing string table");
+ return -1;
+ }
+
+ table->strings = new;
+ table->max_strings = newsz;
+ return 0;
+}
+
+int str_table_init(str_table_t *table, size_t size)
+{
+ table->buckets = calloc(size, sizeof(table->buckets[0]));
+ table->num_buckets = size;
+
+ if (table->buckets == NULL) {
+ perror("initializing string table");
+ return -1;
+ }
+
+ return 0;
+}
+
+void str_table_cleanup(str_table_t *table)
+{
+ str_bucket_t *bucket;
+ size_t i;
+
+ for (i = 0; i < table->num_buckets; ++i) {
+ while (table->buckets[i] != NULL) {
+ bucket = table->buckets[i];
+ table->buckets[i] = bucket->next;
+
+ free(bucket->str);
+ free(bucket);
+ }
+ }
+
+ free(table->buckets);
+ free(table->strings);
+ memset(table, 0, sizeof(*table));
+}
+
+int str_table_get_index(str_table_t *table, const char *str, size_t *idx)
+{
+ str_bucket_t *bucket;
+ uint32_t hash;
+ size_t index;
+
+ hash = strhash(str);
+ index = hash % table->num_buckets;
+ bucket = table->buckets[index];
+
+ while (bucket != NULL) {
+ if (strcmp(bucket->str, str) == 0) {
+ *idx = bucket->index;
+ return 0;
+ }
+
+ bucket = bucket->next;
+ }
+
+ if (strings_grow(table))
+ return -1;
+
+ bucket = calloc(1, sizeof(*bucket));
+ if (bucket == NULL)
+ goto fail_oom;
+
+ bucket->str = strdup(str);
+ if (bucket->str == NULL)
+ goto fail_oom;
+
+ bucket->index = table->num_strings;
+ table->strings[table->num_strings++] = bucket->str;
+ *idx = bucket->index;
+
+ bucket->next = table->buckets[index];
+ table->buckets[index] = bucket;
+ return 0;
+fail_oom:
+ free(bucket);
+ perror("allocating hash table bucket");
+ return -1;
+}
+
+const char *str_table_get_string(str_table_t *table, size_t index)
+{
+ if (index >= table->num_strings)
+ return NULL;
+
+ return table->strings[index];
+}