From 4249e123d321650050259fb602f06497519077d0 Mon Sep 17 00:00:00 2001 From: David Oberhollenzer Date: Thu, 14 Jan 2021 04:38:33 +0100 Subject: libsqfs: block processor: backport exact fragment matching This commit is an amalgamation of the commits on master that implement exact matching of fragment blocks during deduplication. Signed-off-by: David Oberhollenzer --- include/sqfs/block_processor.h | 98 +++++++++++++++++++++++++++++++++++++++++- include/sqfs/predef.h | 1 + 2 files changed, 97 insertions(+), 2 deletions(-) (limited to 'include/sqfs') diff --git a/include/sqfs/block_processor.h b/include/sqfs/block_processor.h index 1846069..a5eed33 100644 --- a/include/sqfs/block_processor.h +++ b/include/sqfs/block_processor.h @@ -110,12 +110,90 @@ struct sqfs_block_processor_stats_t { sqfs_u64 actual_frag_count; }; +/** + * @struct sqfs_block_processor_desc_t + * + * @brief Encapsulates a description for an @ref sqfs_block_processor_t + * + * An instance of this struct is used by @ref sqfs_block_processor_create_ex to + * instantiate block processor objects. + */ +struct sqfs_block_processor_desc_t { + /** + * @brief Holds the size of the structure. + * + * If a later version of libsquashfs expands this structure, the value + * of this field can be used to check at runtime whether the newer + * fields are avaialable or not. + * + * If @ref sqfs_block_processor_create_ex is given a struct whose size + * it does not recognize, it returns @ref SQFS_ERROR_ARG_INVALID. + */ + sqfs_u32 size; + + /** + * @brief The maximum size of a data block. + */ + sqfs_u32 max_block_size; + + /** + * @brief The number of worker threads to create. + */ + sqfs_u32 num_workers; + + /** + * @brief The maximum number of blocks currently in flight. + * + * When trying to add more, enqueueing blocks until the + * in-flight block count drops below the threshold. + */ + sqfs_u32 max_backlog; + + /** + * @brief A pointer to a compressor. + * + * If multiple worker threads are used, the deep copy function of the + * compressor is used to create several instances that don't interfere + * with each other. This means, the compressor implementation must be + * able to create copies of itself that can be used independendly and + * concurrently. + */ + sqfs_compressor_t *cmp; + + /** + * @brief A block writer to send to finished blocks to. + */ + sqfs_block_writer_t *wr; + + /** + * @brief A fragment table to use for storing block locations. + */ + sqfs_frag_table_t *tbl; + + /** + * @brief Pointer to a file to read back fragment blocks from. + * + * If file and uncmp are not NULL, the file is used to read back + * fragment blocks during fragment deduplication and verify possible + * matches. If either of them are NULL, the deduplication relies on + * fragment size and hash alone. + */ + sqfs_file_t *file; + + /** + * @brief A pointer to a compressor the decompresses data. + * + * @copydoc file + */ + sqfs_compressor_t *uncmp; +}; + #ifdef __cplusplus extern "C" { #endif /** - * @brief Create a data block writer. + * @brief Create a data block processor. * * @memberof sqfs_block_processor_t * @@ -132,7 +210,7 @@ extern "C" { * @param tbl A fragment table to use for storing fragment and fragment block * locations. * - * @return A pointer to a data writer object on success, NULL on allocation + * @return A pointer to a block processor object on success, NULL on allocation * failure or on failure to create and initialize the worker threads. */ SQFS_API @@ -143,6 +221,22 @@ sqfs_block_processor_t *sqfs_block_processor_create(size_t max_block_size, sqfs_block_writer_t *wr, sqfs_frag_table_t *tbl); +/** + * @brief Create a data block processor. + * + * @memberof sqfs_block_processor_t + * + * @param desc A pointer to an extensible structure that holds the description + * of the block processor. + * @param out On success, returns the pointer to the newly created block + * processor object. + * + * @return Zero on success, an @ref SQFS_ERROR value on failure. + */ +SQFS_API +int sqfs_block_processor_create_ex(const sqfs_block_processor_desc_t *desc, + sqfs_block_processor_t **out); + /** * @brief Start writing a file. * diff --git a/include/sqfs/predef.h b/include/sqfs/predef.h index e0afac4..55ccc86 100644 --- a/include/sqfs/predef.h +++ b/include/sqfs/predef.h @@ -91,6 +91,7 @@ typedef struct sqfs_frag_table_t sqfs_frag_table_t; typedef struct sqfs_block_writer_t sqfs_block_writer_t; typedef struct sqfs_block_writer_stats_t sqfs_block_writer_stats_t; typedef struct sqfs_block_processor_stats_t sqfs_block_processor_stats_t; +typedef struct sqfs_block_processor_desc_t sqfs_block_processor_desc_t; typedef struct sqfs_fragment_t sqfs_fragment_t; typedef struct sqfs_dir_header_t sqfs_dir_header_t; -- cgit v1.2.3