summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2022-09-18 16:04:24 +0200
committerDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2022-09-20 08:47:02 +0200
commit56edfefb9718f72bd45093e0efd76bd88645fa89 (patch)
tree44868fd03fdfe1977cfb6275c60703b972130726
parent310d0f23da22435be13864c93364359f0cb7f443 (diff)
block writer: move block comaprison to utility function
Slightly modify the byte-for-byte comparison function to compare an arbitrary range in a file and move it to libutil. Instead of calling it for each block in the block writer, simply let it check an entire range in the block writer and compute the range position/size of the reference ahead, before looking for potential matches. Signed-off-by: David Oberhollenzer <david.oberhollenzer@sigma-star.at>
-rw-r--r--include/util/util.h8
-rw-r--r--lib/sqfs/Makemodule.am2
-rw-r--r--lib/sqfs/block_writer.c59
-rw-r--r--lib/util/Makemodule.am1
-rw-r--r--lib/util/file_cmp.c41
5 files changed, 65 insertions, 46 deletions
diff --git a/include/util/util.h b/include/util/util.h
index 6e34a82..0161c2b 100644
--- a/include/util/util.h
+++ b/include/util/util.h
@@ -69,4 +69,12 @@ SQFS_INTERNAL bool is_filename_sane(const char *name, bool check_os_specific);
*/
SQFS_INTERNAL sqfs_u32 get_source_date_epoch(void);
+/*
+ Check if two regions in a file are equal using a scratch buffer to load the
+ data into for comparision. Returns 0 if equal, > 0 if not, < 0 if error.
+ */
+SQFS_INTERNAL int check_file_range_equal(sqfs_file_t *file, void *scratch,
+ size_t scratch_size, sqfs_u64 loc_a,
+ sqfs_u64 loc_b, sqfs_u64 size);
+
#endif /* SQFS_UTIL_H */
diff --git a/lib/sqfs/Makemodule.am b/lib/sqfs/Makemodule.am
index ad3d42d..b1c9d94 100644
--- a/lib/sqfs/Makemodule.am
+++ b/lib/sqfs/Makemodule.am
@@ -44,7 +44,7 @@ libsquashfs_la_LIBADD += $(ZSTD_LIBS) $(PTHREAD_LIBS)
# directly "import" stuff from libutil
libsquashfs_la_SOURCES += lib/util/str_table.c lib/util/alloc.c
-libsquashfs_la_SOURCES += lib/util/xxhash.c
+libsquashfs_la_SOURCES += lib/util/xxhash.c lib/util/file_cmp.c
libsquashfs_la_SOURCES += lib/util/hash_table.c include/util/hash_table.h
libsquashfs_la_SOURCES += lib/util/rbtree.c include/util/rbtree.h
libsquashfs_la_SOURCES += lib/util/array.c include/util/array.h
diff --git a/lib/sqfs/block_writer.c b/lib/sqfs/block_writer.c
index 10f5ada..7709fb3 100644
--- a/lib/sqfs/block_writer.c
+++ b/lib/sqfs/block_writer.c
@@ -54,44 +54,20 @@ static int store_block_location(block_writer_default_t *wr, sqfs_u64 offset,
return array_append(&(wr->blocks), &info);
}
-static int compare_blocks(block_writer_default_t *wr, sqfs_u64 loc_a,
- sqfs_u64 loc_b, size_t size)
-{
- sqfs_u8 *ptr_a = wr->scratch, *ptr_b = ptr_a + SCRATCH_SIZE / 2;
- size_t diff;
- int ret;
-
- while (size > 0) {
- diff = SCRATCH_SIZE / 2;
- diff = diff > size ? size : diff;
-
- ret = wr->file->read_at(wr->file, loc_a, ptr_a, diff);
- if (ret != 0)
- return ret;
-
- ret = wr->file->read_at(wr->file, loc_b, ptr_b, diff);
- if (ret != 0)
- return ret;
-
- if (memcmp(ptr_a, ptr_b, diff) != 0)
- return 1;
-
- size -= diff;
- loc_a += diff;
- loc_b += diff;
- }
-
- return 0;
-}
-
static int deduplicate_blocks(block_writer_default_t *wr, size_t count,
size_t *out)
{
const blk_info_t *blocks = wr->blocks.data;
- sqfs_u64 loc_a, loc_b;
- size_t i, j, sz;
+ sqfs_u64 loc_a, loc_b, sz;
+ size_t i, j;
int ret;
+ sz = 0;
+ loc_a = blocks[wr->file_start].offset;
+
+ for (i = 0; i < count; ++i)
+ sz += SIZE_FROM_HASH(blocks[wr->file_start + i].hash);
+
for (i = 0; i < wr->file_start; ++i) {
for (j = 0; j < count; ++j) {
if (blocks[i + j].hash == 0)
@@ -108,21 +84,14 @@ static int deduplicate_blocks(block_writer_default_t *wr, size_t count,
if (wr->flags & SQFS_BLOCK_WRITER_HASH_COMPARE_ONLY)
break;
- for (j = 0; j < count; ++j) {
- sz = SIZE_FROM_HASH(blocks[i + j].hash);
+ loc_b = blocks[i].offset;
- loc_a = blocks[i + j].offset;
- loc_b = blocks[wr->file_start + j].offset;
-
- ret = compare_blocks(wr, loc_a, loc_b, sz);
- if (ret < 0)
- return ret;
- if (ret > 0)
- break;
- }
-
- if (j == count)
+ ret = check_file_range_equal(wr->file, wr->scratch,
+ SCRATCH_SIZE, loc_a, loc_b, sz);
+ if (ret == 0)
break;
+ if (ret < 0)
+ return ret;
}
*out = i;
diff --git a/lib/util/Makemodule.am b/lib/util/Makemodule.am
index b65ebda..830a338 100644
--- a/lib/util/Makemodule.am
+++ b/lib/util/Makemodule.am
@@ -13,6 +13,7 @@ libutil_a_SOURCES += lib/util/mkdir_p.c
libutil_a_SOURCES += lib/util/canonicalize_name.c
libutil_a_SOURCES += lib/util/filename_sane.c
libutil_a_SOURCES += lib/util/source_date_epoch.c
+libutil_a_SOURCES += lib/util/file_cmp.c
libutil_a_CFLAGS = $(AM_CFLAGS)
libutil_a_CPPFLAGS = $(AM_CPPFLAGS)
diff --git a/lib/util/file_cmp.c b/lib/util/file_cmp.c
new file mode 100644
index 0000000..2aa0cc2
--- /dev/null
+++ b/lib/util/file_cmp.c
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: LGPL-3.0-or-later */
+/*
+ * file_cmp.c
+ *
+ * Copyright (C) 2019 David Oberhollenzer <goliath@infraroot.at>
+ */
+#include "config.h"
+
+#include "util/util.h"
+#include "sqfs/io.h"
+
+#include <string.h>
+
+int check_file_range_equal(sqfs_file_t *file, void *scratch, size_t scratch_sz,
+ sqfs_u64 loc_a, sqfs_u64 loc_b, sqfs_u64 size)
+{
+ sqfs_u8 *ptr_a = scratch, *ptr_b = ptr_a + scratch_sz / 2;
+ int ret;
+
+ while (size > 0) {
+ size_t diff = scratch_sz / 2;
+ diff = (sqfs_u64)diff > size ? size : diff;
+
+ ret = file->read_at(file, loc_a, ptr_a, diff);
+ if (ret != 0)
+ return ret;
+
+ ret = file->read_at(file, loc_b, ptr_b, diff);
+ if (ret != 0)
+ return ret;
+
+ if (memcmp(ptr_a, ptr_b, diff) != 0)
+ return 1;
+
+ size -= diff;
+ loc_a += diff;
+ loc_b += diff;
+ }
+
+ return 0;
+}