From daa1409632c380f88150be29889714044a843dcc Mon Sep 17 00:00:00 2001 From: David Oberhollenzer Date: Sun, 26 Jan 2020 15:00:03 +0100 Subject: Cleanup: Move fragment deduplication code over to fragment table This removes further clutter from the data writer. Any future efforts on making fragment by hash lookup faster can focus on that area only and don't clutter the block processor. Signed-off-by: David Oberhollenzer --- lib/sqfs/data_writer/common.c | 7 ---- lib/sqfs/data_writer/fragment.c | 84 ++++++++++++----------------------------- lib/sqfs/data_writer/internal.h | 8 ---- lib/sqfs/frag_table.c | 62 ++++++++++++++++++++++++++++++ 4 files changed, 86 insertions(+), 75 deletions(-) (limited to 'lib') diff --git a/lib/sqfs/data_writer/common.c b/lib/sqfs/data_writer/common.c index bc6bac4..b207291 100644 --- a/lib/sqfs/data_writer/common.c +++ b/lib/sqfs/data_writer/common.c @@ -29,7 +29,6 @@ int data_writer_init(sqfs_data_writer_t *proc, size_t max_block_size, proc->cmp = cmp; proc->file = file; proc->max_blocks = INIT_BLOCK_COUNT; - proc->frag_list_max = INIT_BLOCK_COUNT; proc->frag_tbl = sqfs_frag_table_create(0); if (proc->frag_tbl == NULL) @@ -39,11 +38,6 @@ int data_writer_init(sqfs_data_writer_t *proc, size_t max_block_size, if (proc->blocks == NULL) return -1; - proc->frag_list = alloc_array(sizeof(proc->frag_list[0]), - proc->frag_list_max); - if (proc->frag_list == NULL) - return -1; - return 0; } @@ -55,7 +49,6 @@ void data_writer_cleanup(sqfs_data_writer_t *proc) free_blk_list(proc->done); free(proc->blk_current); free(proc->frag_block); - free(proc->frag_list); free(proc->blocks); free(proc); } diff --git a/lib/sqfs/data_writer/fragment.c b/lib/sqfs/data_writer/fragment.c index 70679c9..9862c89 100644 --- a/lib/sqfs/data_writer/fragment.c +++ b/lib/sqfs/data_writer/fragment.c @@ -7,68 +7,18 @@ #define SQFS_BUILDING_DLL #include "internal.h" -static int grow_deduplication_list(sqfs_data_writer_t *proc) -{ - size_t new_sz; - void *new; - - if (proc->frag_list_num == proc->frag_list_max) { - new_sz = proc->frag_list_max * 2; - new = realloc(proc->frag_list, - sizeof(proc->frag_list[0]) * new_sz); - - if (new == NULL) - return SQFS_ERROR_ALLOC; - - proc->frag_list = new; - proc->frag_list_max = new_sz; - } - - return 0; -} - -static int store_fragment(sqfs_data_writer_t *proc, sqfs_block_t *frag, - sqfs_u64 hash) -{ - int err = grow_deduplication_list(proc); - - if (err) - return err; - - proc->frag_list[proc->frag_list_num].index = proc->frag_block->index; - proc->frag_list[proc->frag_list_num].offset = proc->frag_block->size; - proc->frag_list[proc->frag_list_num].hash = hash; - proc->frag_list_num += 1; - - sqfs_inode_set_frag_location(frag->inode, proc->frag_block->index, - proc->frag_block->size); - - if (proc->hooks != NULL && proc->hooks->pre_fragment_store != NULL) { - proc->hooks->pre_fragment_store(proc->user_ptr, frag); - } - - memcpy(proc->frag_block->data + proc->frag_block->size, - frag->data, frag->size); - - proc->frag_block->flags |= (frag->flags & SQFS_BLK_DONT_COMPRESS); - proc->frag_block->size += frag->size; - return 0; -} - int process_completed_fragment(sqfs_data_writer_t *proc, sqfs_block_t *frag, sqfs_block_t **blk_out) { - sqfs_u64 hash; - sqfs_u32 index; - size_t i, size; + sqfs_u32 index, offset; + size_t size; int err; - hash = MK_BLK_HASH(frag->checksum, frag->size); - - for (i = 0; i < proc->frag_list_num; ++i) { - if (proc->frag_list[i].hash == hash) - goto out_duplicate; - } + err = sqfs_frag_table_find_tail_end(proc->frag_tbl, + frag->checksum, frag->size, + &index, &offset); + if (err == 0) + goto out_duplicate; if (proc->frag_block != NULL) { size = proc->frag_block->size + frag->size; @@ -96,18 +46,32 @@ int process_completed_fragment(sqfs_data_writer_t *proc, sqfs_block_t *frag, proc->frag_block->flags = SQFS_BLK_FRAGMENT_BLOCK; } - err = store_fragment(proc, frag, hash); + err = sqfs_frag_table_add_tail_end(proc->frag_tbl, + proc->frag_block->index, + proc->frag_block->size, + frag->size, frag->checksum); if (err) goto fail; + sqfs_inode_set_frag_location(frag->inode, proc->frag_block->index, + proc->frag_block->size); + + if (proc->hooks != NULL && proc->hooks->pre_fragment_store != NULL) { + proc->hooks->pre_fragment_store(proc->user_ptr, frag); + } + + memcpy(proc->frag_block->data + proc->frag_block->size, + frag->data, frag->size); + + proc->frag_block->flags |= (frag->flags & SQFS_BLK_DONT_COMPRESS); + proc->frag_block->size += frag->size; return 0; fail: free(*blk_out); *blk_out = NULL; return err; out_duplicate: - sqfs_inode_set_frag_location(frag->inode, proc->frag_list[i].index, - proc->frag_list[i].offset); + sqfs_inode_set_frag_location(frag->inode, index, offset); if (proc->hooks != NULL && proc->hooks->notify_fragment_discard != NULL) { diff --git a/lib/sqfs/data_writer/internal.h b/lib/sqfs/data_writer/internal.h index 5042f0a..8f59fb7 100644 --- a/lib/sqfs/data_writer/internal.h +++ b/lib/sqfs/data_writer/internal.h @@ -43,11 +43,6 @@ typedef struct { sqfs_u64 hash; } blk_info_t; -typedef struct { - sqfs_u32 index; - sqfs_u32 offset; - sqfs_u64 hash; -} frag_info_t; typedef struct compress_worker_t compress_worker_t; @@ -92,9 +87,6 @@ struct sqfs_data_writer_t { sqfs_compressor_t *cmp; sqfs_block_t *frag_block; - frag_info_t *frag_list; - size_t frag_list_num; - size_t frag_list_max; const sqfs_block_hooks_t *hooks; void *user_ptr; diff --git a/lib/sqfs/frag_table.c b/lib/sqfs/frag_table.c index 3373a3c..200f3d5 100644 --- a/lib/sqfs/frag_table.c +++ b/lib/sqfs/frag_table.c @@ -17,10 +17,24 @@ #include #include + +typedef struct { + sqfs_u32 index; + sqfs_u32 offset; + sqfs_u32 size; + sqfs_u32 hash; +} chunk_info_t; + + struct sqfs_frag_table_t { size_t capacity; size_t used; sqfs_fragment_t *table; + + /* TODO: more efficient data structure */ + size_t chunk_num; + size_t chunk_max; + chunk_info_t *chunk_list; }; sqfs_frag_table_t *sqfs_frag_table_create(sqfs_u32 flags) @@ -39,6 +53,7 @@ sqfs_frag_table_t *sqfs_frag_table_create(sqfs_u32 flags) void sqfs_frag_table_destroy(sqfs_frag_table_t *tbl) { + free(tbl->chunk_list); free(tbl->table); free(tbl); } @@ -193,3 +208,50 @@ size_t sqfs_frag_table_get_size(sqfs_frag_table_t *tbl) { return tbl->used; } + +int sqfs_frag_table_add_tail_end(sqfs_frag_table_t *tbl, + sqfs_u32 index, sqfs_u32 offset, + sqfs_u32 size, sqfs_u32 hash) +{ + size_t new_sz; + void *new; + + if (tbl->chunk_num == tbl->chunk_max) { + new_sz = tbl->chunk_max ? tbl->chunk_max * 2 : 128; + new = realloc(tbl->chunk_list, + sizeof(tbl->chunk_list[0]) * new_sz); + + if (new == NULL) + return SQFS_ERROR_ALLOC; + + tbl->chunk_list = new; + tbl->chunk_max = new_sz; + } + + tbl->chunk_list[tbl->chunk_num].index = index; + tbl->chunk_list[tbl->chunk_num].offset = offset; + tbl->chunk_list[tbl->chunk_num].size = size; + tbl->chunk_list[tbl->chunk_num].hash = hash; + tbl->chunk_num += 1; + return 0; +} + +int sqfs_frag_table_find_tail_end(sqfs_frag_table_t *tbl, + sqfs_u32 hash, sqfs_u32 size, + sqfs_u32 *index, sqfs_u32 *offset) +{ + size_t i; + + for (i = 0; i < tbl->chunk_num; ++i) { + if (tbl->chunk_list[i].hash == hash && + tbl->chunk_list[i].size == size) { + if (index != NULL) + *index = tbl->chunk_list[i].index; + if (offset != NULL) + *offset = tbl->chunk_list[i].offset; + return 0; + } + } + + return SQFS_ERROR_NO_ENTRY; +} -- cgit v1.2.3