From f57814332a69bebc40e25e6537a3c08fc9e18f97 Mon Sep 17 00:00:00 2001 From: David Oberhollenzer Date: Sat, 14 Sep 2019 01:41:38 +0200 Subject: Move data deduplication from fstree code to data writer Signed-off-by: David Oberhollenzer --- lib/fstree/Makemodule.am | 2 +- lib/fstree/deduplicate.c | 133 ----------------------------------------------- lib/fstree/mknode.c | 6 +-- 3 files changed, 4 insertions(+), 137 deletions(-) delete mode 100644 lib/fstree/deduplicate.c (limited to 'lib/fstree') diff --git a/lib/fstree/Makemodule.am b/lib/fstree/Makemodule.am index 3c74f6d..c3c56fb 100644 --- a/lib/fstree/Makemodule.am +++ b/lib/fstree/Makemodule.am @@ -4,7 +4,7 @@ libfstree_a_SOURCES += lib/fstree/gen_inode_table.c lib/fstree/get_path.c libfstree_a_SOURCES += lib/fstree/node_stat.c lib/fstree/mknode.c libfstree_a_SOURCES += lib/fstree/add_by_path.c lib/fstree/xattr.c libfstree_a_SOURCES += lib/fstree/node_from_path.c include/fstree.h -libfstree_a_SOURCES += lib/fstree/gen_file_list.c lib/fstree/deduplicate.c +libfstree_a_SOURCES += lib/fstree/gen_file_list.c libfstree_a_SOURCES += lib/fstree/optimize_unpack_order.c libfstree_a_SOURCES += lib/fstree/canonicalize_name.c libfstree_a_SOURCES += lib/fstree/source_date_epoch.c diff --git a/lib/fstree/deduplicate.c b/lib/fstree/deduplicate.c deleted file mode 100644 index 00815a6..0000000 --- a/lib/fstree/deduplicate.c +++ /dev/null @@ -1,133 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ -/* - * deduplicate.c - * - * Copyright (C) 2019 David Oberhollenzer - */ -#include "config.h" -#include "fstree.h" - -#include - -file_info_t *fragment_by_chksum(file_info_t *fi, uint32_t chksum, - size_t frag_size, file_info_t *list, - size_t block_size) -{ - file_info_t *it; - - for (it = list; it != NULL; it = it->next) { - if (it == fi) - continue; - - if (!(it->flags & FILE_FLAG_HAS_FRAGMENT)) - continue; - - if (it->flags & FILE_FLAG_FRAGMENT_IS_DUPLICATE) - continue; - - if ((it->size % block_size) != frag_size) - continue; - - if (it->fragment_chksum == chksum) - break; - } - - return it; -} - -static size_t get_block_count(file_info_t *fi, size_t block_size) -{ - size_t count = fi->size / block_size; - - if ((fi->size % block_size) && !(fi->flags & FILE_FLAG_HAS_FRAGMENT)) - ++count; - - while (count > 0 && fi->blocks[count - 1].size == 0) - --count; - - return count; -} - -static size_t find_first_match(file_info_t *file, file_info_t *cmp, - size_t idx, size_t cmp_blk_count) -{ - size_t i; - - for (i = 0; i < cmp_blk_count; ++i) { - if (memcmp(file->blocks + idx, cmp->blocks + i, - sizeof(file->blocks[idx])) == 0) { - break; - } - } - - return i; -} - -uint64_t find_equal_blocks(file_info_t *file, file_info_t *list, - size_t block_size) -{ - size_t start, first_match, i, j, block_count, cmp_blk_count; - uint64_t location; - file_info_t *it; - - block_count = get_block_count(file, block_size); - if (block_count == 0) - return 0; - - for (start = 0; start < block_count; ++start) { - if (file->blocks[start].size != 0) - break; - } - - for (it = list; it != NULL; it = it->next) { - if (it == file) - continue; - - if (it->flags & FILE_FLAG_BLOCKS_ARE_DUPLICATE) - continue; - - cmp_blk_count = get_block_count(it, block_size); - if (cmp_blk_count == 0) - continue; - - first_match = find_first_match(file, it, start, cmp_blk_count); - if (first_match == cmp_blk_count) - continue; - - i = start; - j = first_match; - - while (i < block_count && j < cmp_blk_count) { - if (file->blocks[i].size == 0) { - ++i; - continue; - } - - if (it->blocks[j].size == 0) { - ++j; - continue; - } - - if (memcmp(it->blocks + j, file->blocks + i, - sizeof(file->blocks[i])) != 0) { - break; - } - - ++i; - ++j; - } - - if (i == block_count) - break; - } - - if (it == NULL) - return 0; - - location = it->startblock; - - for (i = 0; i < first_match; ++i) - location += it->blocks[i].size & ((1 << 24) - 1); - - return location; -} diff --git a/lib/fstree/mknode.c b/lib/fstree/mknode.c index 1c3b3a0..ace99f3 100644 --- a/lib/fstree/mknode.c +++ b/lib/fstree/mknode.c @@ -41,7 +41,7 @@ tree_node_t *fstree_mknode(fstree_t *fs, tree_node_t *parent, const char *name, if ((sb->st_size % fs->block_size) != 0) ++block_count; - if (SZ_MUL_OV(block_count, sizeof(n->data.file->blocks[0]), + if (SZ_MUL_OV(block_count, sizeof(n->data.file->block_size[0]), &total)) { goto fail_ov; } @@ -92,8 +92,8 @@ tree_node_t *fstree_mknode(fstree_t *fs, tree_node_t *parent, const char *name, if (extra == NULL) break; - ptr = (char *)n->data.file->blocks; - ptr += block_count * sizeof(n->data.file->blocks[0]); + ptr = (char *)n->data.file->block_size; + ptr += block_count * sizeof(n->data.file->block_size[0]); n->data.file->input_file = ptr; strcpy(n->data.file->input_file, extra); break; -- cgit v1.2.3