From 56edfefb9718f72bd45093e0efd76bd88645fa89 Mon Sep 17 00:00:00 2001 From: David Oberhollenzer Date: Sun, 18 Sep 2022 16:04:24 +0200 Subject: block writer: move block comaprison to utility function Slightly modify the byte-for-byte comparison function to compare an arbitrary range in a file and move it to libutil. Instead of calling it for each block in the block writer, simply let it check an entire range in the block writer and compute the range position/size of the reference ahead, before looking for potential matches. Signed-off-by: David Oberhollenzer --- include/util/util.h | 8 +++++++ lib/sqfs/Makemodule.am | 2 +- lib/sqfs/block_writer.c | 59 ++++++++++++------------------------------------- lib/util/Makemodule.am | 1 + lib/util/file_cmp.c | 41 ++++++++++++++++++++++++++++++++++ 5 files changed, 65 insertions(+), 46 deletions(-) create mode 100644 lib/util/file_cmp.c diff --git a/include/util/util.h b/include/util/util.h index 6e34a82..0161c2b 100644 --- a/include/util/util.h +++ b/include/util/util.h @@ -69,4 +69,12 @@ SQFS_INTERNAL bool is_filename_sane(const char *name, bool check_os_specific); */ SQFS_INTERNAL sqfs_u32 get_source_date_epoch(void); +/* + Check if two regions in a file are equal using a scratch buffer to load the + data into for comparision. Returns 0 if equal, > 0 if not, < 0 if error. + */ +SQFS_INTERNAL int check_file_range_equal(sqfs_file_t *file, void *scratch, + size_t scratch_size, sqfs_u64 loc_a, + sqfs_u64 loc_b, sqfs_u64 size); + #endif /* SQFS_UTIL_H */ diff --git a/lib/sqfs/Makemodule.am b/lib/sqfs/Makemodule.am index ad3d42d..b1c9d94 100644 --- a/lib/sqfs/Makemodule.am +++ b/lib/sqfs/Makemodule.am @@ -44,7 +44,7 @@ libsquashfs_la_LIBADD += $(ZSTD_LIBS) $(PTHREAD_LIBS) # directly "import" stuff from libutil libsquashfs_la_SOURCES += lib/util/str_table.c lib/util/alloc.c -libsquashfs_la_SOURCES += lib/util/xxhash.c +libsquashfs_la_SOURCES += lib/util/xxhash.c lib/util/file_cmp.c libsquashfs_la_SOURCES += lib/util/hash_table.c include/util/hash_table.h libsquashfs_la_SOURCES += lib/util/rbtree.c include/util/rbtree.h libsquashfs_la_SOURCES += lib/util/array.c include/util/array.h diff --git a/lib/sqfs/block_writer.c b/lib/sqfs/block_writer.c index 10f5ada..7709fb3 100644 --- a/lib/sqfs/block_writer.c +++ b/lib/sqfs/block_writer.c @@ -54,44 +54,20 @@ static int store_block_location(block_writer_default_t *wr, sqfs_u64 offset, return array_append(&(wr->blocks), &info); } -static int compare_blocks(block_writer_default_t *wr, sqfs_u64 loc_a, - sqfs_u64 loc_b, size_t size) -{ - sqfs_u8 *ptr_a = wr->scratch, *ptr_b = ptr_a + SCRATCH_SIZE / 2; - size_t diff; - int ret; - - while (size > 0) { - diff = SCRATCH_SIZE / 2; - diff = diff > size ? size : diff; - - ret = wr->file->read_at(wr->file, loc_a, ptr_a, diff); - if (ret != 0) - return ret; - - ret = wr->file->read_at(wr->file, loc_b, ptr_b, diff); - if (ret != 0) - return ret; - - if (memcmp(ptr_a, ptr_b, diff) != 0) - return 1; - - size -= diff; - loc_a += diff; - loc_b += diff; - } - - return 0; -} - static int deduplicate_blocks(block_writer_default_t *wr, size_t count, size_t *out) { const blk_info_t *blocks = wr->blocks.data; - sqfs_u64 loc_a, loc_b; - size_t i, j, sz; + sqfs_u64 loc_a, loc_b, sz; + size_t i, j; int ret; + sz = 0; + loc_a = blocks[wr->file_start].offset; + + for (i = 0; i < count; ++i) + sz += SIZE_FROM_HASH(blocks[wr->file_start + i].hash); + for (i = 0; i < wr->file_start; ++i) { for (j = 0; j < count; ++j) { if (blocks[i + j].hash == 0) @@ -108,21 +84,14 @@ static int deduplicate_blocks(block_writer_default_t *wr, size_t count, if (wr->flags & SQFS_BLOCK_WRITER_HASH_COMPARE_ONLY) break; - for (j = 0; j < count; ++j) { - sz = SIZE_FROM_HASH(blocks[i + j].hash); + loc_b = blocks[i].offset; - loc_a = blocks[i + j].offset; - loc_b = blocks[wr->file_start + j].offset; - - ret = compare_blocks(wr, loc_a, loc_b, sz); - if (ret < 0) - return ret; - if (ret > 0) - break; - } - - if (j == count) + ret = check_file_range_equal(wr->file, wr->scratch, + SCRATCH_SIZE, loc_a, loc_b, sz); + if (ret == 0) break; + if (ret < 0) + return ret; } *out = i; diff --git a/lib/util/Makemodule.am b/lib/util/Makemodule.am index b65ebda..830a338 100644 --- a/lib/util/Makemodule.am +++ b/lib/util/Makemodule.am @@ -13,6 +13,7 @@ libutil_a_SOURCES += lib/util/mkdir_p.c libutil_a_SOURCES += lib/util/canonicalize_name.c libutil_a_SOURCES += lib/util/filename_sane.c libutil_a_SOURCES += lib/util/source_date_epoch.c +libutil_a_SOURCES += lib/util/file_cmp.c libutil_a_CFLAGS = $(AM_CFLAGS) libutil_a_CPPFLAGS = $(AM_CPPFLAGS) diff --git a/lib/util/file_cmp.c b/lib/util/file_cmp.c new file mode 100644 index 0000000..2aa0cc2 --- /dev/null +++ b/lib/util/file_cmp.c @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: LGPL-3.0-or-later */ +/* + * file_cmp.c + * + * Copyright (C) 2019 David Oberhollenzer + */ +#include "config.h" + +#include "util/util.h" +#include "sqfs/io.h" + +#include + +int check_file_range_equal(sqfs_file_t *file, void *scratch, size_t scratch_sz, + sqfs_u64 loc_a, sqfs_u64 loc_b, sqfs_u64 size) +{ + sqfs_u8 *ptr_a = scratch, *ptr_b = ptr_a + scratch_sz / 2; + int ret; + + while (size > 0) { + size_t diff = scratch_sz / 2; + diff = (sqfs_u64)diff > size ? size : diff; + + ret = file->read_at(file, loc_a, ptr_a, diff); + if (ret != 0) + return ret; + + ret = file->read_at(file, loc_b, ptr_b, diff); + if (ret != 0) + return ret; + + if (memcmp(ptr_a, ptr_b, diff) != 0) + return 1; + + size -= diff; + loc_a += diff; + loc_b += diff; + } + + return 0; +} -- cgit v1.2.3