summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2020-12-06 00:35:01 +0100
committerDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2021-01-19 10:45:59 +0100
commit854119c62621e017c13be5192a9494c0eea2fe2f (patch)
treeb76393602d2334aa17d01818e845e87913f32d1a
parent8db9823b63d82196b204cb969752eff6adb15756 (diff)
libsqfs: implement exact matching in the default block writer.
Instead of comparing (compresed, disk-size, checksum) tuples to find block matches, do an exact, byte-for-byte comparison of the data stored on disk to avoid the possibility of a spurious colision. Since this is the desired behaviour, make it the default, optionally overrideable through a flag. Signed-off-by: David Oberhollenzer <david.oberhollenzer@sigma-star.at>
-rw-r--r--include/sqfs/block_writer.h29
-rw-r--r--lib/sqfs/block_writer.c82
2 files changed, 104 insertions, 7 deletions
diff --git a/include/sqfs/block_writer.h b/include/sqfs/block_writer.h
index 6352f4d..6376d2e 100644
--- a/include/sqfs/block_writer.h
+++ b/include/sqfs/block_writer.h
@@ -90,6 +90,32 @@ struct sqfs_block_writer_t {
sqfs_u64 (*get_block_count)(const sqfs_block_writer_t *wr);
};
+/**
+ * @enum SQFS_BLOCK_WRITER_FLAGS
+ *
+ * @brief Flags that can be passed to @ref sqfs_block_writer_create
+ */
+typedef enum {
+ /**
+ * @brief If set, only compare checksums when deduplicating blocks.
+ *
+ * Since squashfs-tools-ng version 1.1, the default for the block
+ * writer is to compare checksum & size for blocks during deduplication
+ * and then read the potential match back from disk and do a byte for
+ * byte comparison to make absolutely sure they match.
+ *
+ * If this flag is set, the hash & size check is treated as being
+ * sufficient for block deduplication, which does increase performance,
+ * but risks data loss or corruption if a hash collision occours.
+ */
+ SQFS_BLOCK_WRITER_HASH_COMPARE_ONLY = 0x01,
+
+ /**
+ * @brief A combination of all valid flags.
+ */
+ SQFS_BLOCK_WRITER_ALL_FLAGS = 0x01
+} SQFS_BLOCK_WRITER_FLAGS;
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -102,7 +128,8 @@ extern "C" {
* @param file A pointer to a file object that data should be appended to.
* @param devblksz The underlying device block size if output data
* should be aligned.
- * @param flags Currently must be set to 0 or creation will fail.
+ * @param flags A combination of @ref SQFS_BLOCK_WRITER_FLAGS values. If an
+ * unknown flag is set, creation will fail.
*
* @return A pointer to a new block writer on success, NULL on failure.
*/
diff --git a/lib/sqfs/block_writer.c b/lib/sqfs/block_writer.c
index 07780d7..17d045f 100644
--- a/lib/sqfs/block_writer.c
+++ b/lib/sqfs/block_writer.c
@@ -14,11 +14,15 @@
#include "util.h"
#include <stdlib.h>
+#include <string.h>
#define MK_BLK_HASH(chksum, size) \
(((sqfs_u64)(size) << 32) | (sqfs_u64)(chksum))
+#define SIZE_FROM_HASH(hash) ((hash >> 32) & ((1 << 24) - 1))
+
#define INIT_BLOCK_COUNT (128)
+#define SCRATCH_SIZE (8192)
typedef struct {
sqfs_u64 offset;
@@ -39,6 +43,10 @@ typedef struct {
sqfs_u64 start;
size_t file_start;
+
+ sqfs_u32 flags;
+
+ sqfs_u8 scratch[];
} block_writer_default_t;
static int store_block_location(block_writer_default_t *wr, sqfs_u64 offset,
@@ -64,9 +72,42 @@ static int store_block_location(block_writer_default_t *wr, sqfs_u64 offset,
return 0;
}
-static size_t deduplicate_blocks(block_writer_default_t *wr, size_t count)
+static int compare_blocks(block_writer_default_t *wr, sqfs_u64 loc_a,
+ sqfs_u64 loc_b, size_t size)
+{
+ sqfs_u8 *ptr_a = wr->scratch, *ptr_b = ptr_a + SCRATCH_SIZE / 2;
+ size_t diff;
+ int ret;
+
+ while (size > 0) {
+ diff = SCRATCH_SIZE / 2;
+ diff = diff > size ? size : diff;
+
+ ret = wr->file->read_at(wr->file, loc_a, ptr_a, diff);
+ if (ret != 0)
+ return ret;
+
+ ret = wr->file->read_at(wr->file, loc_b, ptr_b, diff);
+ if (ret != 0)
+ return ret;
+
+ if (memcmp(ptr_a, ptr_b, diff) != 0)
+ return 1;
+
+ size -= diff;
+ loc_a += diff;
+ loc_b += diff;
+ }
+
+ return 0;
+}
+
+static int deduplicate_blocks(block_writer_default_t *wr, size_t count,
+ size_t *out)
{
- size_t i, j;
+ sqfs_u64 loc_a, loc_b;
+ size_t i, j, sz;
+ int ret;
for (i = 0; i < wr->file_start; ++i) {
for (j = 0; j < count; ++j) {
@@ -78,11 +119,31 @@ static size_t deduplicate_blocks(block_writer_default_t *wr, size_t count)
break;
}
+ if (j != count)
+ continue;
+
+ if (wr->flags & SQFS_BLOCK_WRITER_HASH_COMPARE_ONLY)
+ break;
+
+ for (j = 0; j < count; ++j) {
+ sz = SIZE_FROM_HASH(wr->blocks[i + j].hash);
+
+ loc_a = wr->blocks[i + j].offset;
+ loc_b = wr->blocks[wr->file_start + j].offset;
+
+ ret = compare_blocks(wr, loc_a, loc_b, sz);
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ break;
+ }
+
if (j == count)
break;
}
- return i;
+ *out = i;
+ return 0;
}
static int align_file(block_writer_default_t *wr)
@@ -168,7 +229,10 @@ static int write_data_block(sqfs_block_writer_t *base, void *user,
if (count == 0) {
*location = 0;
} else if (!(flags & SQFS_BLK_DONT_DEDUPLICATE)) {
- start = deduplicate_blocks(wr, count);
+ err = deduplicate_blocks(wr, count, &start);
+ if (err)
+ return err;
+
offset = wr->blocks[start].offset;
*location = offset;
@@ -204,16 +268,22 @@ sqfs_block_writer_t *sqfs_block_writer_create(sqfs_file_t *file,
{
block_writer_default_t *wr;
- if (flags != 0)
+ if (flags & ~SQFS_BLOCK_WRITER_ALL_FLAGS)
return NULL;
- wr = calloc(1, sizeof(*wr));
+ if (flags & SQFS_BLOCK_WRITER_HASH_COMPARE_ONLY) {
+ wr = calloc(1, sizeof(*wr));
+ } else {
+ wr = alloc_flex(sizeof(*wr), 1, SCRATCH_SIZE);
+ }
+
if (wr == NULL)
return NULL;
((sqfs_block_writer_t *)wr)->write_data_block = write_data_block;
((sqfs_block_writer_t *)wr)->get_block_count = get_block_count;
((sqfs_object_t *)wr)->destroy = block_writer_destroy;
+ wr->flags = flags;
wr->file = file;
wr->devblksz = devblksz;
wr->max_blocks = INIT_BLOCK_COUNT;