aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/fstree.h29
-rw-r--r--lib/fstree/Makemodule.am2
-rw-r--r--lib/fstree/deduplicate.c133
-rw-r--r--lib/fstree/mknode.c6
-rw-r--r--lib/sqfshelper/data_reader.c14
-rw-r--r--lib/sqfshelper/data_writer.c203
-rw-r--r--lib/sqfshelper/statistics.c2
-rw-r--r--lib/sqfshelper/tree_node_from_inode.c6
-rw-r--r--lib/sqfshelper/tree_node_to_inode.c2
-rw-r--r--mkfs/mkfs.c5
-rw-r--r--tests/mknode_reg.c2
11 files changed, 178 insertions, 226 deletions
diff --git a/include/fstree.h b/include/fstree.h
index 7ee105b..8d0b952 100644
--- a/include/fstree.h
+++ b/include/fstree.h
@@ -99,18 +99,12 @@ struct file_info_t {
/* Byte offset into the fragment block. */
uint32_t fragment_offset;
- uint32_t fragment_chksum;
-
/* combination of FILE_FLAG_* flags */
uint32_t flags;
/* Stores data about each full data block. */
- struct {
- uint32_t chksum;
-
- /* Bit (1 << 24) is set if the block is stored uncompressed. */
- uint32_t size;
- } blocks[];
+ /* Bit (1 << 24) is set if the block is stored uncompressed. */
+ uint32_t block_size[];
};
/* Additional meta data stored in a tree_node_t for directories */
@@ -312,25 +306,6 @@ void tree_node_sort_recursive(tree_node_t *root);
tree_node_t *fstree_node_from_path(fstree_t *fs, const char *path);
/*
- Walk through 'list' to find a file with a fragment that has
- the same size ('frag_size') and checksum ('chksum') as 'fi'.
-
- Returns NULL if no such fragment could be found.
-*/
-file_info_t *fragment_by_chksum(file_info_t *fi, uint32_t chksum,
- size_t frag_size, file_info_t *list,
- size_t block_size);
-
-/*
- Walk through 'list' to find a file that contains the same sequence of blocks
- as 'file', comparing size and checksum.
-
- Returns NULL if no such fragment could be found.
- */
-uint64_t find_equal_blocks(file_info_t *file, file_info_t *list,
- size_t block_size);
-
-/*
Optimize the order of the fstree file list for unpacking as to avoid
unpacking fragment blocks more than once and to improve locality when
fetching data from disk. The resulting list is returned in 'out'.
diff --git a/lib/fstree/Makemodule.am b/lib/fstree/Makemodule.am
index 3c74f6d..c3c56fb 100644
--- a/lib/fstree/Makemodule.am
+++ b/lib/fstree/Makemodule.am
@@ -4,7 +4,7 @@ libfstree_a_SOURCES += lib/fstree/gen_inode_table.c lib/fstree/get_path.c
libfstree_a_SOURCES += lib/fstree/node_stat.c lib/fstree/mknode.c
libfstree_a_SOURCES += lib/fstree/add_by_path.c lib/fstree/xattr.c
libfstree_a_SOURCES += lib/fstree/node_from_path.c include/fstree.h
-libfstree_a_SOURCES += lib/fstree/gen_file_list.c lib/fstree/deduplicate.c
+libfstree_a_SOURCES += lib/fstree/gen_file_list.c
libfstree_a_SOURCES += lib/fstree/optimize_unpack_order.c
libfstree_a_SOURCES += lib/fstree/canonicalize_name.c
libfstree_a_SOURCES += lib/fstree/source_date_epoch.c
diff --git a/lib/fstree/deduplicate.c b/lib/fstree/deduplicate.c
deleted file mode 100644
index 00815a6..0000000
--- a/lib/fstree/deduplicate.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/* SPDX-License-Identifier: GPL-3.0-or-later */
-/*
- * deduplicate.c
- *
- * Copyright (C) 2019 David Oberhollenzer <goliath@infraroot.at>
- */
-#include "config.h"
-#include "fstree.h"
-
-#include <string.h>
-
-file_info_t *fragment_by_chksum(file_info_t *fi, uint32_t chksum,
- size_t frag_size, file_info_t *list,
- size_t block_size)
-{
- file_info_t *it;
-
- for (it = list; it != NULL; it = it->next) {
- if (it == fi)
- continue;
-
- if (!(it->flags & FILE_FLAG_HAS_FRAGMENT))
- continue;
-
- if (it->flags & FILE_FLAG_FRAGMENT_IS_DUPLICATE)
- continue;
-
- if ((it->size % block_size) != frag_size)
- continue;
-
- if (it->fragment_chksum == chksum)
- break;
- }
-
- return it;
-}
-
-static size_t get_block_count(file_info_t *fi, size_t block_size)
-{
- size_t count = fi->size / block_size;
-
- if ((fi->size % block_size) && !(fi->flags & FILE_FLAG_HAS_FRAGMENT))
- ++count;
-
- while (count > 0 && fi->blocks[count - 1].size == 0)
- --count;
-
- return count;
-}
-
-static size_t find_first_match(file_info_t *file, file_info_t *cmp,
- size_t idx, size_t cmp_blk_count)
-{
- size_t i;
-
- for (i = 0; i < cmp_blk_count; ++i) {
- if (memcmp(file->blocks + idx, cmp->blocks + i,
- sizeof(file->blocks[idx])) == 0) {
- break;
- }
- }
-
- return i;
-}
-
-uint64_t find_equal_blocks(file_info_t *file, file_info_t *list,
- size_t block_size)
-{
- size_t start, first_match, i, j, block_count, cmp_blk_count;
- uint64_t location;
- file_info_t *it;
-
- block_count = get_block_count(file, block_size);
- if (block_count == 0)
- return 0;
-
- for (start = 0; start < block_count; ++start) {
- if (file->blocks[start].size != 0)
- break;
- }
-
- for (it = list; it != NULL; it = it->next) {
- if (it == file)
- continue;
-
- if (it->flags & FILE_FLAG_BLOCKS_ARE_DUPLICATE)
- continue;
-
- cmp_blk_count = get_block_count(it, block_size);
- if (cmp_blk_count == 0)
- continue;
-
- first_match = find_first_match(file, it, start, cmp_blk_count);
- if (first_match == cmp_blk_count)
- continue;
-
- i = start;
- j = first_match;
-
- while (i < block_count && j < cmp_blk_count) {
- if (file->blocks[i].size == 0) {
- ++i;
- continue;
- }
-
- if (it->blocks[j].size == 0) {
- ++j;
- continue;
- }
-
- if (memcmp(it->blocks + j, file->blocks + i,
- sizeof(file->blocks[i])) != 0) {
- break;
- }
-
- ++i;
- ++j;
- }
-
- if (i == block_count)
- break;
- }
-
- if (it == NULL)
- return 0;
-
- location = it->startblock;
-
- for (i = 0; i < first_match; ++i)
- location += it->blocks[i].size & ((1 << 24) - 1);
-
- return location;
-}
diff --git a/lib/fstree/mknode.c b/lib/fstree/mknode.c
index 1c3b3a0..ace99f3 100644
--- a/lib/fstree/mknode.c
+++ b/lib/fstree/mknode.c
@@ -41,7 +41,7 @@ tree_node_t *fstree_mknode(fstree_t *fs, tree_node_t *parent, const char *name,
if ((sb->st_size % fs->block_size) != 0)
++block_count;
- if (SZ_MUL_OV(block_count, sizeof(n->data.file->blocks[0]),
+ if (SZ_MUL_OV(block_count, sizeof(n->data.file->block_size[0]),
&total)) {
goto fail_ov;
}
@@ -92,8 +92,8 @@ tree_node_t *fstree_mknode(fstree_t *fs, tree_node_t *parent, const char *name,
if (extra == NULL)
break;
- ptr = (char *)n->data.file->blocks;
- ptr += block_count * sizeof(n->data.file->blocks[0]);
+ ptr = (char *)n->data.file->block_size;
+ ptr += block_count * sizeof(n->data.file->block_size[0]);
n->data.file->input_file = ptr;
strcpy(n->data.file->input_file, extra);
break;
diff --git a/lib/sqfshelper/data_reader.c b/lib/sqfshelper/data_reader.c
index d1a9cce..6906933 100644
--- a/lib/sqfshelper/data_reader.c
+++ b/lib/sqfshelper/data_reader.c
@@ -193,7 +193,7 @@ int data_reader_dump_file(data_reader_t *data, file_info_t *fi, int outfd,
diff = filesz > data->block_size ? data->block_size : filesz;
filesz -= diff;
- if (SQFS_IS_SPARSE_BLOCK(fi->blocks[i].size)) {
+ if (SQFS_IS_SPARSE_BLOCK(fi->block_size[i])) {
if (allow_sparse) {
if (lseek(outfd, diff, SEEK_CUR) == (off_t)-1)
goto fail_sparse;
@@ -201,9 +201,9 @@ int data_reader_dump_file(data_reader_t *data, file_info_t *fi, int outfd,
}
memset(data->block, 0, diff);
} else {
- if (precache_data_block(data, off, fi->blocks[i].size))
+ if (precache_data_block(data, off, fi->block_size[i]))
return -1;
- off += SQFS_ON_DISK_BLOCK_SIZE(fi->blocks[i].size);
+ off += SQFS_ON_DISK_BLOCK_SIZE(fi->block_size[i]);
}
if (write_data("writing uncompressed block",
@@ -259,7 +259,7 @@ ssize_t data_reader_read(data_reader_t *data, file_info_t *fi,
i = 0;
while (offset > data->block_size && i < count) {
- off += SQFS_ON_DISK_BLOCK_SIZE(fi->blocks[i++].size);
+ off += SQFS_ON_DISK_BLOCK_SIZE(fi->block_size[i++]);
offset -= data->block_size;
}
@@ -269,14 +269,14 @@ ssize_t data_reader_read(data_reader_t *data, file_info_t *fi,
if (size < diff)
size = diff;
- if (SQFS_IS_SPARSE_BLOCK(fi->blocks[i].size)) {
+ if (SQFS_IS_SPARSE_BLOCK(fi->block_size[i])) {
memset(buffer, 0, diff);
} else {
- if (precache_data_block(data, off, fi->blocks[i].size))
+ if (precache_data_block(data, off, fi->block_size[i]))
return -1;
memcpy(buffer, (char *)data->block + offset, diff);
- off += SQFS_ON_DISK_BLOCK_SIZE(fi->blocks[i].size);
+ off += SQFS_ON_DISK_BLOCK_SIZE(fi->block_size[i]);
}
++i;
diff --git a/lib/sqfshelper/data_writer.c b/lib/sqfshelper/data_writer.c
index d6a935b..7d84762 100644
--- a/lib/sqfshelper/data_writer.c
+++ b/lib/sqfshelper/data_writer.c
@@ -18,6 +18,24 @@
#include <errno.h>
#include <zlib.h>
+#define MK_BLK_SIG(chksum, size) \
+ (((uint64_t)(size) << 32) | (uint64_t)(chksum))
+
+#define BLK_SIZE(sig) ((sig) >> 32)
+
+#define INIT_BLOCK_COUNT (128)
+
+typedef struct {
+ uint64_t offset;
+ uint64_t signature;
+} blk_info_t;
+
+typedef struct {
+ uint32_t index;
+ uint32_t offset;
+ uint64_t signature;
+} frag_info_t;
+
struct data_writer_t {
sqfs_block_t *frag_block;
sqfs_fragment_t *fragments;
@@ -29,9 +47,17 @@ struct data_writer_t {
sqfs_block_processor_t *proc;
sqfs_compressor_t *cmp;
- file_info_t *list;
sqfs_super_t *super;
sqfs_file_t *file;
+
+ size_t file_start;
+ size_t num_blocks;
+ size_t max_blocks;
+ blk_info_t *blocks;
+
+ size_t frag_list_num;
+ size_t frag_list_max;
+ frag_info_t *frag_list;
};
enum {
@@ -47,20 +73,63 @@ static int allign_file(data_writer_t *data)
data->devblksz);
}
+static int store_block_location(data_writer_t *data, uint64_t offset,
+ uint32_t size, uint32_t chksum)
+{
+ size_t new_sz;
+ void *new;
+
+ if (data->num_blocks == data->max_blocks) {
+ new_sz = data->max_blocks * 2;
+ new = realloc(data->blocks, sizeof(data->blocks[0]) * new_sz);
+
+ if (new == NULL) {
+ perror("growing data block checksum table");
+ return -1;
+ }
+
+ data->blocks = new;
+ data->max_blocks = new_sz;
+ }
+
+ data->blocks[data->num_blocks].offset = offset;
+ data->blocks[data->num_blocks].signature = MK_BLK_SIG(chksum, size);
+ data->num_blocks += 1;
+ return 0;
+}
+
+static size_t deduplicate_blocks(data_writer_t *data, size_t count)
+{
+ size_t i, j;
+
+ for (i = 0; i < data->file_start; ++i) {
+ for (j = 0; j < count; ++j) {
+ if (data->blocks[i + j].signature !=
+ data->blocks[data->file_start + j].signature)
+ break;
+ }
+
+ if (j == count)
+ break;
+ }
+
+ return i;
+}
+
static int block_callback(void *user, sqfs_block_t *blk)
{
file_info_t *fi = blk->user;
data_writer_t *data = user;
- uint64_t ref, offset;
+ size_t start, count;
+ uint64_t offset;
uint32_t out;
if (blk->flags & BLK_FIRST_BLOCK) {
data->start = data->file->get_size(data->file);
+ data->file_start = data->num_blocks;
if ((blk->flags & BLK_ALLIGN) && allign_file(data) != 0)
return -1;
-
- fi->startblock = data->file->get_size(data->file);
}
if (blk->size != 0) {
@@ -68,20 +137,22 @@ static int block_callback(void *user, sqfs_block_t *blk)
if (!(blk->flags & SQFS_BLK_IS_COMPRESSED))
out |= 1 << 24;
+ offset = data->file->get_size(data->file);
+
if (blk->flags & BLK_FRAGMENT_BLOCK) {
- offset = htole64(data->file->get_size(data->file));
- data->fragments[blk->index].start_offset = offset;
+ data->fragments[blk->index].start_offset = htole64(offset);
data->fragments[blk->index].pad0 = 0;
data->fragments[blk->index].size = htole32(out);
data->super->flags &= ~SQFS_FLAG_NO_FRAGMENTS;
data->super->flags |= SQFS_FLAG_ALWAYS_FRAGMENTS;
} else {
- fi->blocks[blk->index].chksum = blk->checksum;
- fi->blocks[blk->index].size = htole32(out);
- }
+ fi->block_size[blk->index] = htole32(out);
- offset = data->file->get_size(data->file);
+ if (store_block_location(data, offset, out,
+ blk->checksum))
+ return -1;
+ }
if (data->file->write_at(data->file, offset,
blk->data, blk->size)) {
@@ -93,11 +164,14 @@ static int block_callback(void *user, sqfs_block_t *blk)
if ((blk->flags & BLK_ALLIGN) && allign_file(data) != 0)
return -1;
- ref = find_equal_blocks(fi, data->list,
- data->super->block_size);
- if (ref > 0) {
- fi->startblock = ref;
+ count = data->num_blocks - data->file_start;
+ start = deduplicate_blocks(data, count);
+
+ fi->startblock = data->blocks[start].offset;
+
+ if (start + count < data->file_start) {
fi->flags |= FILE_FLAG_BLOCKS_ARE_DUPLICATE;
+ data->num_blocks = data->file_start;
if (data->file->truncate(data->file, data->start)) {
perror("truncating squashfs image after "
@@ -139,10 +213,25 @@ static int flush_fragment_block(data_writer_t *data)
return ret;
}
-static int store_fragment(data_writer_t *data, sqfs_block_t *frag)
+static int handle_fragment(data_writer_t *data, sqfs_block_t *frag)
{
file_info_t *fi = frag->user;
- size_t size;
+ size_t i, size, new_sz;
+ uint64_t signature;
+ void *new;
+
+ frag->checksum = crc32(0, frag->data, frag->size);
+ signature = MK_BLK_SIG(frag->checksum, frag->size);
+
+ for (i = 0; i < data->frag_list_num; ++i) {
+ if (data->frag_list[i].signature == signature) {
+ fi->fragment_offset = data->frag_list[i].offset;
+ fi->fragment = data->frag_list[i].index;
+ fi->flags |= FILE_FLAG_FRAGMENT_IS_DUPLICATE;
+ free(frag);
+ return 0;
+ }
+ }
if (data->frag_block != NULL) {
size = data->frag_block->size + frag->size;
@@ -162,10 +251,28 @@ static int store_fragment(data_writer_t *data, sqfs_block_t *frag)
goto fail;
}
- data->frag_block->flags = SQFS_BLK_DONT_CHECKSUM;
- data->frag_block->flags |= BLK_FRAGMENT_BLOCK;
+ data->frag_block->flags = BLK_FRAGMENT_BLOCK;
+ }
+
+ if (data->frag_list_num == data->frag_list_max) {
+ new_sz = data->frag_list_max * 2;
+ new = realloc(data->frag_list,
+ sizeof(data->frag_list[0]) * new_sz);
+
+ if (new == NULL) {
+ perror("growing fragment checksum table");
+ return -1;
+ }
+
+ data->frag_list = new;
+ data->frag_list_max = new_sz;
}
+ data->frag_list[data->frag_list_num].index = data->num_fragments;
+ data->frag_list[data->frag_list_num].offset = data->frag_block->size;
+ data->frag_list[data->frag_list_num].signature = signature;
+ data->frag_list_num += 1;
+
fi->fragment_offset = data->frag_block->size;
fi->fragment = data->num_fragments;
@@ -186,26 +293,6 @@ static bool is_zero_block(unsigned char *ptr, size_t size)
return ptr[0] == 0 && memcmp(ptr, ptr + 1, size - 1) == 0;
}
-static int handle_fragment(data_writer_t *data, sqfs_block_t *blk)
-{
- file_info_t *fi = blk->user, *ref;
-
- fi->fragment_chksum = crc32(0, blk->data, blk->size);
-
- ref = fragment_by_chksum(fi, fi->fragment_chksum, blk->size,
- data->list, data->super->block_size);
-
- if (ref != NULL) {
- fi->fragment_offset = ref->fragment_offset;
- fi->fragment = ref->fragment;
- fi->flags |= FILE_FLAG_FRAGMENT_IS_DUPLICATE;
- free(blk);
- return 0;
- }
-
- return store_fragment(data, blk);
-}
-
static int add_sentinel_block(data_writer_t *data, file_info_t *fi,
uint32_t flags)
{
@@ -259,9 +346,6 @@ int write_data_from_fd(data_writer_t *data, file_info_t *fi,
if (flags & DW_ALLIGN_DEVBLK)
blk_flags |= BLK_ALLIGN;
- fi->next = data->list;
- data->list = fi;
-
for (; file_size > 0; file_size -= diff) {
if (file_size > data->super->block_size) {
diff = data->super->block_size;
@@ -276,8 +360,7 @@ int write_data_from_fd(data_writer_t *data, file_info_t *fi,
blk->index = i++;
if (is_zero_block(blk->data, blk->size)) {
- fi->blocks[blk->index].chksum = 0;
- fi->blocks[blk->index].size = 0;
+ fi->block_size[blk->index] = 0;
free(blk);
continue;
}
@@ -411,8 +494,7 @@ int write_data_from_fd_condensed(data_writer_t *data, file_info_t *fi,
}
if (is_zero_block(blk->data, blk->size)) {
- fi->blocks[blk->index].chksum = 0;
- fi->blocks[blk->index].size = 0;
+ fi->block_size[blk->index] = 0;
free(blk);
continue;
}
@@ -461,9 +543,39 @@ data_writer_t *data_writer_create(sqfs_super_t *super, sqfs_compressor_t *cmp,
return NULL;
}
+ data->max_blocks = INIT_BLOCK_COUNT;
+ data->frag_list_max = INIT_BLOCK_COUNT;
+
+ data->blocks = alloc_array(sizeof(data->blocks[0]),
+ data->max_blocks);
+
+ if (data->blocks == NULL) {
+ perror("creating data writer");
+ free(data);
+ return NULL;
+ }
+
+ data->frag_list = alloc_array(sizeof(data->frag_list[0]),
+ data->frag_list_max);
+
+ if (data->frag_list == NULL) {
+ perror("creating data writer");
+ free(data->blocks);
+ free(data);
+ return NULL;
+ }
+
data->proc = sqfs_block_processor_create(super->block_size, cmp,
num_jobs, max_backlog, data,
block_callback);
+ if (data->proc == NULL) {
+ perror("creating data block processor");
+ free(data->frag_list);
+ free(data->blocks);
+ free(data);
+ return NULL;
+ }
+
data->cmp = cmp;
data->super = super;
data->file = file;
@@ -475,6 +587,7 @@ void data_writer_destroy(data_writer_t *data)
{
sqfs_block_processor_destroy(data->proc);
free(data->fragments);
+ free(data->blocks);
free(data);
}
diff --git a/lib/sqfshelper/statistics.c b/lib/sqfshelper/statistics.c
index 33ff7cb..0fe325b 100644
--- a/lib/sqfshelper/statistics.c
+++ b/lib/sqfshelper/statistics.c
@@ -30,7 +30,7 @@ void sqfs_print_statistics(fstree_t *fs, sqfs_super_t *super)
}
for (sparse = 0, i = 0; i < num_blocks; ++i) {
- if (fi->blocks[i].size == 0)
+ if (fi->block_size[i] == 0)
sparse += 1;
}
diff --git a/lib/sqfshelper/tree_node_from_inode.c b/lib/sqfshelper/tree_node_from_inode.c
index 68365d0..fee191b 100644
--- a/lib/sqfshelper/tree_node_from_inode.c
+++ b/lib/sqfshelper/tree_node_from_inode.c
@@ -25,7 +25,7 @@ static size_t compute_size(sqfs_inode_generic_t *inode, const char *name)
case SQFS_INODE_EXT_FILE:
size += sizeof(file_info_t);
size += inode->num_file_blocks *
- sizeof(((file_info_t *)0)->blocks[0]);
+ sizeof(((file_info_t *)0)->block_size[0]);
break;
case SQFS_INODE_SLINK:
case SQFS_INODE_EXT_SLINK:
@@ -51,10 +51,10 @@ static void copy_block_sizes(sqfs_inode_generic_t *inode, tree_node_t *out,
}
out->name += inode->num_file_blocks *
- sizeof(out->data.file->blocks[0]);
+ sizeof(out->data.file->block_size[0]);
for (i = 0; i < inode->num_file_blocks; ++i)
- out->data.file->blocks[i].size = inode->block_sizes[i];
+ out->data.file->block_size[i] = inode->block_sizes[i];
}
tree_node_t *tree_node_from_inode(sqfs_inode_generic_t *inode,
diff --git a/lib/sqfshelper/tree_node_to_inode.c b/lib/sqfshelper/tree_node_to_inode.c
index 7de8abd..cc76a8d 100644
--- a/lib/sqfshelper/tree_node_to_inode.c
+++ b/lib/sqfshelper/tree_node_to_inode.c
@@ -120,7 +120,7 @@ sqfs_inode_generic_t *tree_node_to_inode(fstree_t *fs, sqfs_id_table_t *idtbl,
for (i = 0; i < block_count; ++i) {
inode->block_sizes[i] =
- node->data.file->blocks[i].size;
+ node->data.file->block_size[i];
}
}
diff --git a/mkfs/mkfs.c b/mkfs/mkfs.c
index fda9831..0c4e9a5 100644
--- a/mkfs/mkfs.c
+++ b/mkfs/mkfs.c
@@ -36,10 +36,7 @@ static int pack_files(data_writer_t *data, fstree_t *fs, options_t *opt)
if (set_working_dir(opt))
return -1;
- while (fs->files != NULL) {
- fi = fs->files;
- fs->files = fi->next;
-
+ for (fi = fs->files; fi != NULL; fi = fi->next) {
if (!opt->quiet)
printf("packing %s\n", fi->input_file);
diff --git a/tests/mknode_reg.c b/tests/mknode_reg.c
index 7d8e934..65de0b6 100644
--- a/tests/mknode_reg.c
+++ b/tests/mknode_reg.c
@@ -36,7 +36,7 @@ int main(void)
assert((char *)node->name >= (char *)node->payload);
assert((char *)node->data.file >= (char *)node->payload);
assert(node->data.file->input_file > (char *)(node->data.file + 1) +
- sizeof(node->data.file->blocks[0]) * 4);
+ sizeof(node->data.file->block_size[0]) * 4);
assert(node->name >= node->data.file->input_file + 6);
assert(strcmp(node->name, "filename") == 0);
assert(strcmp(node->data.file->input_file, "input") == 0);