diff options
author | David Oberhollenzer <david.oberhollenzer@sigma-star.at> | 2022-09-18 16:04:24 +0200 |
---|---|---|
committer | David Oberhollenzer <david.oberhollenzer@sigma-star.at> | 2022-09-20 08:47:02 +0200 |
commit | 56edfefb9718f72bd45093e0efd76bd88645fa89 (patch) | |
tree | 44868fd03fdfe1977cfb6275c60703b972130726 /lib | |
parent | 310d0f23da22435be13864c93364359f0cb7f443 (diff) |
block writer: move block comaprison to utility function
Slightly modify the byte-for-byte comparison function to compare an
arbitrary range in a file and move it to libutil. Instead of calling
it for each block in the block writer, simply let it check an entire
range in the block writer and compute the range position/size of the
reference ahead, before looking for potential matches.
Signed-off-by: David Oberhollenzer <david.oberhollenzer@sigma-star.at>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/sqfs/Makemodule.am | 2 | ||||
-rw-r--r-- | lib/sqfs/block_writer.c | 59 | ||||
-rw-r--r-- | lib/util/Makemodule.am | 1 | ||||
-rw-r--r-- | lib/util/file_cmp.c | 41 |
4 files changed, 57 insertions, 46 deletions
diff --git a/lib/sqfs/Makemodule.am b/lib/sqfs/Makemodule.am index ad3d42d..b1c9d94 100644 --- a/lib/sqfs/Makemodule.am +++ b/lib/sqfs/Makemodule.am @@ -44,7 +44,7 @@ libsquashfs_la_LIBADD += $(ZSTD_LIBS) $(PTHREAD_LIBS) # directly "import" stuff from libutil libsquashfs_la_SOURCES += lib/util/str_table.c lib/util/alloc.c -libsquashfs_la_SOURCES += lib/util/xxhash.c +libsquashfs_la_SOURCES += lib/util/xxhash.c lib/util/file_cmp.c libsquashfs_la_SOURCES += lib/util/hash_table.c include/util/hash_table.h libsquashfs_la_SOURCES += lib/util/rbtree.c include/util/rbtree.h libsquashfs_la_SOURCES += lib/util/array.c include/util/array.h diff --git a/lib/sqfs/block_writer.c b/lib/sqfs/block_writer.c index 10f5ada..7709fb3 100644 --- a/lib/sqfs/block_writer.c +++ b/lib/sqfs/block_writer.c @@ -54,44 +54,20 @@ static int store_block_location(block_writer_default_t *wr, sqfs_u64 offset, return array_append(&(wr->blocks), &info); } -static int compare_blocks(block_writer_default_t *wr, sqfs_u64 loc_a, - sqfs_u64 loc_b, size_t size) -{ - sqfs_u8 *ptr_a = wr->scratch, *ptr_b = ptr_a + SCRATCH_SIZE / 2; - size_t diff; - int ret; - - while (size > 0) { - diff = SCRATCH_SIZE / 2; - diff = diff > size ? size : diff; - - ret = wr->file->read_at(wr->file, loc_a, ptr_a, diff); - if (ret != 0) - return ret; - - ret = wr->file->read_at(wr->file, loc_b, ptr_b, diff); - if (ret != 0) - return ret; - - if (memcmp(ptr_a, ptr_b, diff) != 0) - return 1; - - size -= diff; - loc_a += diff; - loc_b += diff; - } - - return 0; -} - static int deduplicate_blocks(block_writer_default_t *wr, size_t count, size_t *out) { const blk_info_t *blocks = wr->blocks.data; - sqfs_u64 loc_a, loc_b; - size_t i, j, sz; + sqfs_u64 loc_a, loc_b, sz; + size_t i, j; int ret; + sz = 0; + loc_a = blocks[wr->file_start].offset; + + for (i = 0; i < count; ++i) + sz += SIZE_FROM_HASH(blocks[wr->file_start + i].hash); + for (i = 0; i < wr->file_start; ++i) { for (j = 0; j < count; ++j) { if (blocks[i + j].hash == 0) @@ -108,21 +84,14 @@ static int deduplicate_blocks(block_writer_default_t *wr, size_t count, if (wr->flags & SQFS_BLOCK_WRITER_HASH_COMPARE_ONLY) break; - for (j = 0; j < count; ++j) { - sz = SIZE_FROM_HASH(blocks[i + j].hash); + loc_b = blocks[i].offset; - loc_a = blocks[i + j].offset; - loc_b = blocks[wr->file_start + j].offset; - - ret = compare_blocks(wr, loc_a, loc_b, sz); - if (ret < 0) - return ret; - if (ret > 0) - break; - } - - if (j == count) + ret = check_file_range_equal(wr->file, wr->scratch, + SCRATCH_SIZE, loc_a, loc_b, sz); + if (ret == 0) break; + if (ret < 0) + return ret; } *out = i; diff --git a/lib/util/Makemodule.am b/lib/util/Makemodule.am index b65ebda..830a338 100644 --- a/lib/util/Makemodule.am +++ b/lib/util/Makemodule.am @@ -13,6 +13,7 @@ libutil_a_SOURCES += lib/util/mkdir_p.c libutil_a_SOURCES += lib/util/canonicalize_name.c libutil_a_SOURCES += lib/util/filename_sane.c libutil_a_SOURCES += lib/util/source_date_epoch.c +libutil_a_SOURCES += lib/util/file_cmp.c libutil_a_CFLAGS = $(AM_CFLAGS) libutil_a_CPPFLAGS = $(AM_CPPFLAGS) diff --git a/lib/util/file_cmp.c b/lib/util/file_cmp.c new file mode 100644 index 0000000..2aa0cc2 --- /dev/null +++ b/lib/util/file_cmp.c @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: LGPL-3.0-or-later */ +/* + * file_cmp.c + * + * Copyright (C) 2019 David Oberhollenzer <goliath@infraroot.at> + */ +#include "config.h" + +#include "util/util.h" +#include "sqfs/io.h" + +#include <string.h> + +int check_file_range_equal(sqfs_file_t *file, void *scratch, size_t scratch_sz, + sqfs_u64 loc_a, sqfs_u64 loc_b, sqfs_u64 size) +{ + sqfs_u8 *ptr_a = scratch, *ptr_b = ptr_a + scratch_sz / 2; + int ret; + + while (size > 0) { + size_t diff = scratch_sz / 2; + diff = (sqfs_u64)diff > size ? size : diff; + + ret = file->read_at(file, loc_a, ptr_a, diff); + if (ret != 0) + return ret; + + ret = file->read_at(file, loc_b, ptr_b, diff); + if (ret != 0) + return ret; + + if (memcmp(ptr_a, ptr_b, diff) != 0) + return 1; + + size -= diff; + loc_a += diff; + loc_b += diff; + } + + return 0; +} |