From daa1409632c380f88150be29889714044a843dcc Mon Sep 17 00:00:00 2001 From: David Oberhollenzer Date: Sun, 26 Jan 2020 15:00:03 +0100 Subject: Cleanup: Move fragment deduplication code over to fragment table This removes further clutter from the data writer. Any future efforts on making fragment by hash lookup faster can focus on that area only and don't clutter the block processor. Signed-off-by: David Oberhollenzer --- include/sqfs/frag_table.h | 42 +++++++++++++++++++++ lib/sqfs/data_writer/common.c | 7 ---- lib/sqfs/data_writer/fragment.c | 84 ++++++++++++----------------------------- lib/sqfs/data_writer/internal.h | 8 ---- lib/sqfs/frag_table.c | 62 ++++++++++++++++++++++++++++++ 5 files changed, 128 insertions(+), 75 deletions(-) diff --git a/include/sqfs/frag_table.h b/include/sqfs/frag_table.h index 12ddd40..45ba645 100644 --- a/include/sqfs/frag_table.h +++ b/include/sqfs/frag_table.h @@ -151,6 +151,48 @@ SQFS_API int sqfs_frag_table_set(sqfs_frag_table_t *tbl, sqfs_u32 index, */ SQFS_API size_t sqfs_frag_table_get_size(sqfs_frag_table_t *tbl); +/** + * @brief Remember a specific tail end chunk within a fragment block. + * + * @memberof sqfs_frag_table_t + * + * This is a convenience function that memorizes a specific tail end packed + * into a fragment block, primarily for looking it up later by hash and size + * using @ref sqfs_frag_table_find_tail_end. + * + * @param tbl A pointer to the fragmen table object. + * @param index An index into the fragment table that identifies the + * fragment block. + * @param offset A byte offset into the actual fragment block itself. + * @param size The size of the tail en inside the fragment block. + * @param hash An arbitrary 32 bit data hash to memorize. + * + * @return Zero on success, an @ref E_SQFS_ERROR on faiure. + */ +SQFS_API int sqfs_frag_table_add_tail_end(sqfs_frag_table_t *tbl, + sqfs_u32 index, sqfs_u32 offset, + sqfs_u32 size, sqfs_u32 hash); + +/** + * @brief RFetch a fragment block index and offset by hash and size + * + * @memberof sqfs_frag_table_t + * + * This is a convenience function that can find data chunks previously + * memorized using @ref sqfs_frag_table_add_tail_end. + * + * @param tbl A pointer to the fragmen table object. + * @param hash An arbitrary 32 bit data hash that describes the chunk. + * @param size The size of the chunk to look for. + * @param index Returns an index into the fragment table on success. + * @param offset Returns a byte offset into the fragment block on success. + * + * @return Zero on success, non-zero if the chunk could not be found. + */ +SQFS_API int sqfs_frag_table_find_tail_end(sqfs_frag_table_t *tbl, + sqfs_u32 hash, sqfs_u32 size, + sqfs_u32 *index, sqfs_u32 *offset); + #ifdef __cplusplus } #endif diff --git a/lib/sqfs/data_writer/common.c b/lib/sqfs/data_writer/common.c index bc6bac4..b207291 100644 --- a/lib/sqfs/data_writer/common.c +++ b/lib/sqfs/data_writer/common.c @@ -29,7 +29,6 @@ int data_writer_init(sqfs_data_writer_t *proc, size_t max_block_size, proc->cmp = cmp; proc->file = file; proc->max_blocks = INIT_BLOCK_COUNT; - proc->frag_list_max = INIT_BLOCK_COUNT; proc->frag_tbl = sqfs_frag_table_create(0); if (proc->frag_tbl == NULL) @@ -39,11 +38,6 @@ int data_writer_init(sqfs_data_writer_t *proc, size_t max_block_size, if (proc->blocks == NULL) return -1; - proc->frag_list = alloc_array(sizeof(proc->frag_list[0]), - proc->frag_list_max); - if (proc->frag_list == NULL) - return -1; - return 0; } @@ -55,7 +49,6 @@ void data_writer_cleanup(sqfs_data_writer_t *proc) free_blk_list(proc->done); free(proc->blk_current); free(proc->frag_block); - free(proc->frag_list); free(proc->blocks); free(proc); } diff --git a/lib/sqfs/data_writer/fragment.c b/lib/sqfs/data_writer/fragment.c index 70679c9..9862c89 100644 --- a/lib/sqfs/data_writer/fragment.c +++ b/lib/sqfs/data_writer/fragment.c @@ -7,68 +7,18 @@ #define SQFS_BUILDING_DLL #include "internal.h" -static int grow_deduplication_list(sqfs_data_writer_t *proc) -{ - size_t new_sz; - void *new; - - if (proc->frag_list_num == proc->frag_list_max) { - new_sz = proc->frag_list_max * 2; - new = realloc(proc->frag_list, - sizeof(proc->frag_list[0]) * new_sz); - - if (new == NULL) - return SQFS_ERROR_ALLOC; - - proc->frag_list = new; - proc->frag_list_max = new_sz; - } - - return 0; -} - -static int store_fragment(sqfs_data_writer_t *proc, sqfs_block_t *frag, - sqfs_u64 hash) -{ - int err = grow_deduplication_list(proc); - - if (err) - return err; - - proc->frag_list[proc->frag_list_num].index = proc->frag_block->index; - proc->frag_list[proc->frag_list_num].offset = proc->frag_block->size; - proc->frag_list[proc->frag_list_num].hash = hash; - proc->frag_list_num += 1; - - sqfs_inode_set_frag_location(frag->inode, proc->frag_block->index, - proc->frag_block->size); - - if (proc->hooks != NULL && proc->hooks->pre_fragment_store != NULL) { - proc->hooks->pre_fragment_store(proc->user_ptr, frag); - } - - memcpy(proc->frag_block->data + proc->frag_block->size, - frag->data, frag->size); - - proc->frag_block->flags |= (frag->flags & SQFS_BLK_DONT_COMPRESS); - proc->frag_block->size += frag->size; - return 0; -} - int process_completed_fragment(sqfs_data_writer_t *proc, sqfs_block_t *frag, sqfs_block_t **blk_out) { - sqfs_u64 hash; - sqfs_u32 index; - size_t i, size; + sqfs_u32 index, offset; + size_t size; int err; - hash = MK_BLK_HASH(frag->checksum, frag->size); - - for (i = 0; i < proc->frag_list_num; ++i) { - if (proc->frag_list[i].hash == hash) - goto out_duplicate; - } + err = sqfs_frag_table_find_tail_end(proc->frag_tbl, + frag->checksum, frag->size, + &index, &offset); + if (err == 0) + goto out_duplicate; if (proc->frag_block != NULL) { size = proc->frag_block->size + frag->size; @@ -96,18 +46,32 @@ int process_completed_fragment(sqfs_data_writer_t *proc, sqfs_block_t *frag, proc->frag_block->flags = SQFS_BLK_FRAGMENT_BLOCK; } - err = store_fragment(proc, frag, hash); + err = sqfs_frag_table_add_tail_end(proc->frag_tbl, + proc->frag_block->index, + proc->frag_block->size, + frag->size, frag->checksum); if (err) goto fail; + sqfs_inode_set_frag_location(frag->inode, proc->frag_block->index, + proc->frag_block->size); + + if (proc->hooks != NULL && proc->hooks->pre_fragment_store != NULL) { + proc->hooks->pre_fragment_store(proc->user_ptr, frag); + } + + memcpy(proc->frag_block->data + proc->frag_block->size, + frag->data, frag->size); + + proc->frag_block->flags |= (frag->flags & SQFS_BLK_DONT_COMPRESS); + proc->frag_block->size += frag->size; return 0; fail: free(*blk_out); *blk_out = NULL; return err; out_duplicate: - sqfs_inode_set_frag_location(frag->inode, proc->frag_list[i].index, - proc->frag_list[i].offset); + sqfs_inode_set_frag_location(frag->inode, index, offset); if (proc->hooks != NULL && proc->hooks->notify_fragment_discard != NULL) { diff --git a/lib/sqfs/data_writer/internal.h b/lib/sqfs/data_writer/internal.h index 5042f0a..8f59fb7 100644 --- a/lib/sqfs/data_writer/internal.h +++ b/lib/sqfs/data_writer/internal.h @@ -43,11 +43,6 @@ typedef struct { sqfs_u64 hash; } blk_info_t; -typedef struct { - sqfs_u32 index; - sqfs_u32 offset; - sqfs_u64 hash; -} frag_info_t; typedef struct compress_worker_t compress_worker_t; @@ -92,9 +87,6 @@ struct sqfs_data_writer_t { sqfs_compressor_t *cmp; sqfs_block_t *frag_block; - frag_info_t *frag_list; - size_t frag_list_num; - size_t frag_list_max; const sqfs_block_hooks_t *hooks; void *user_ptr; diff --git a/lib/sqfs/frag_table.c b/lib/sqfs/frag_table.c index 3373a3c..200f3d5 100644 --- a/lib/sqfs/frag_table.c +++ b/lib/sqfs/frag_table.c @@ -17,10 +17,24 @@ #include #include + +typedef struct { + sqfs_u32 index; + sqfs_u32 offset; + sqfs_u32 size; + sqfs_u32 hash; +} chunk_info_t; + + struct sqfs_frag_table_t { size_t capacity; size_t used; sqfs_fragment_t *table; + + /* TODO: more efficient data structure */ + size_t chunk_num; + size_t chunk_max; + chunk_info_t *chunk_list; }; sqfs_frag_table_t *sqfs_frag_table_create(sqfs_u32 flags) @@ -39,6 +53,7 @@ sqfs_frag_table_t *sqfs_frag_table_create(sqfs_u32 flags) void sqfs_frag_table_destroy(sqfs_frag_table_t *tbl) { + free(tbl->chunk_list); free(tbl->table); free(tbl); } @@ -193,3 +208,50 @@ size_t sqfs_frag_table_get_size(sqfs_frag_table_t *tbl) { return tbl->used; } + +int sqfs_frag_table_add_tail_end(sqfs_frag_table_t *tbl, + sqfs_u32 index, sqfs_u32 offset, + sqfs_u32 size, sqfs_u32 hash) +{ + size_t new_sz; + void *new; + + if (tbl->chunk_num == tbl->chunk_max) { + new_sz = tbl->chunk_max ? tbl->chunk_max * 2 : 128; + new = realloc(tbl->chunk_list, + sizeof(tbl->chunk_list[0]) * new_sz); + + if (new == NULL) + return SQFS_ERROR_ALLOC; + + tbl->chunk_list = new; + tbl->chunk_max = new_sz; + } + + tbl->chunk_list[tbl->chunk_num].index = index; + tbl->chunk_list[tbl->chunk_num].offset = offset; + tbl->chunk_list[tbl->chunk_num].size = size; + tbl->chunk_list[tbl->chunk_num].hash = hash; + tbl->chunk_num += 1; + return 0; +} + +int sqfs_frag_table_find_tail_end(sqfs_frag_table_t *tbl, + sqfs_u32 hash, sqfs_u32 size, + sqfs_u32 *index, sqfs_u32 *offset) +{ + size_t i; + + for (i = 0; i < tbl->chunk_num; ++i) { + if (tbl->chunk_list[i].hash == hash && + tbl->chunk_list[i].size == size) { + if (index != NULL) + *index = tbl->chunk_list[i].index; + if (offset != NULL) + *offset = tbl->chunk_list[i].offset; + return 0; + } + } + + return SQFS_ERROR_NO_ENTRY; +} -- cgit v1.2.3