summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2020-02-12 18:58:45 +0100
committerDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2020-02-12 18:58:45 +0100
commit6546c3f4a1d140024cb2ab2c90fbf192749a1bf7 (patch)
tree453faf64d09638313820414116a8012d54ede18b /lib
parent84c9566aaf2dd456992b9b37a6324c09af055afb (diff)
Use a hash table for fragment lookup instead of linear search
Profiling on a sample filesystem determined that fragment deduplication lookups rank third place directly after crc32 and the actual compression. By using a hash table instead of linear search, this time can be reduced drastically. Signed-off-by: David Oberhollenzer <david.oberhollenzer@sigma-star.at>
Diffstat (limited to 'lib')
-rw-r--r--lib/sqfs/frag_table.c74
1 files changed, 45 insertions, 29 deletions
diff --git a/lib/sqfs/frag_table.c b/lib/sqfs/frag_table.c
index 58851c5..bf1c109 100644
--- a/lib/sqfs/frag_table.c
+++ b/lib/sqfs/frag_table.c
@@ -18,7 +18,11 @@
#include <string.h>
-typedef struct {
+#define NUM_BUCKETS (128)
+
+
+typedef struct chunk_info_t {
+ struct chunk_info_t *next;
sqfs_u32 index;
sqfs_u32 offset;
sqfs_u32 size;
@@ -33,17 +37,23 @@ struct sqfs_frag_table_t {
size_t used;
sqfs_fragment_t *table;
- /* TODO: more efficient data structure */
- size_t chunk_num;
- size_t chunk_max;
- chunk_info_t *chunk_list;
+ chunk_info_t chunks[NUM_BUCKETS];
};
static void frag_table_destroy(sqfs_object_t *obj)
{
sqfs_frag_table_t *tbl = (sqfs_frag_table_t *)obj;
+ chunk_info_t *info;
+ size_t i;
+
+ for (i = 0; i < NUM_BUCKETS; ++i) {
+ while (tbl->chunks[i].next != NULL) {
+ info = tbl->chunks[i].next;
+ tbl->chunks[i].next = info->next;
+ free(info);
+ }
+ }
- free(tbl->chunk_list);
free(tbl->table);
free(tbl);
}
@@ -218,26 +228,31 @@ int sqfs_frag_table_add_tail_end(sqfs_frag_table_t *tbl,
sqfs_u32 index, sqfs_u32 offset,
sqfs_u32 size, sqfs_u32 hash)
{
- size_t new_sz;
- void *new;
-
- if (tbl->chunk_num == tbl->chunk_max) {
- new_sz = tbl->chunk_max ? tbl->chunk_max * 2 : 128;
- new = realloc(tbl->chunk_list,
- sizeof(tbl->chunk_list[0]) * new_sz);
-
+ size_t idx = hash % NUM_BUCKETS;
+ chunk_info_t *new, *it;
+
+ if (tbl->chunks[idx].size == 0 && tbl->chunks[idx].hash == 0) {
+ tbl->chunks[idx].index = index;
+ tbl->chunks[idx].offset = offset;
+ tbl->chunks[idx].size = size;
+ tbl->chunks[idx].hash = hash;
+ } else {
+ new = calloc(1, sizeof(*new));
if (new == NULL)
return SQFS_ERROR_ALLOC;
- tbl->chunk_list = new;
- tbl->chunk_max = new_sz;
+ new->index = index;
+ new->offset = offset;
+ new->size = size;
+ new->hash = hash;
+
+ it = &tbl->chunks[idx];
+ while (it->next != NULL)
+ it = it->next;
+
+ it->next = new;
}
- tbl->chunk_list[tbl->chunk_num].index = index;
- tbl->chunk_list[tbl->chunk_num].offset = offset;
- tbl->chunk_list[tbl->chunk_num].size = size;
- tbl->chunk_list[tbl->chunk_num].hash = hash;
- tbl->chunk_num += 1;
return 0;
}
@@ -245,15 +260,16 @@ int sqfs_frag_table_find_tail_end(sqfs_frag_table_t *tbl,
sqfs_u32 hash, sqfs_u32 size,
sqfs_u32 *index, sqfs_u32 *offset)
{
- size_t i;
+ size_t idx = hash % NUM_BUCKETS;
+ chunk_info_t *it;
+
+ if (tbl->chunks[idx].size == 0 && tbl->chunks[idx].hash == 0)
+ return SQFS_ERROR_NO_ENTRY;
- for (i = 0; i < tbl->chunk_num; ++i) {
- if (tbl->chunk_list[i].hash == hash &&
- tbl->chunk_list[i].size == size) {
- if (index != NULL)
- *index = tbl->chunk_list[i].index;
- if (offset != NULL)
- *offset = tbl->chunk_list[i].offset;
+ for (it = &tbl->chunks[idx]; it != NULL; it = it->next) {
+ if (it->hash == hash && it->size == size) {
+ *index = it->index;
+ *offset = it->offset;
return 0;
}
}