summaryrefslogtreecommitdiff
path: root/lib/fstree
diff options
context:
space:
mode:
authorDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2019-09-14 01:41:38 +0200
committerDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2019-09-14 04:10:45 +0200
commitf57814332a69bebc40e25e6537a3c08fc9e18f97 (patch)
tree7ff880b8eb53f4852c6f0be9436f220643219795 /lib/fstree
parentd455ff92da0249e731cff7613f42b0f7359775da (diff)
Move data deduplication from fstree code to data writer
Signed-off-by: David Oberhollenzer <david.oberhollenzer@sigma-star.at>
Diffstat (limited to 'lib/fstree')
-rw-r--r--lib/fstree/Makemodule.am2
-rw-r--r--lib/fstree/deduplicate.c133
-rw-r--r--lib/fstree/mknode.c6
3 files changed, 4 insertions, 137 deletions
diff --git a/lib/fstree/Makemodule.am b/lib/fstree/Makemodule.am
index 3c74f6d..c3c56fb 100644
--- a/lib/fstree/Makemodule.am
+++ b/lib/fstree/Makemodule.am
@@ -4,7 +4,7 @@ libfstree_a_SOURCES += lib/fstree/gen_inode_table.c lib/fstree/get_path.c
libfstree_a_SOURCES += lib/fstree/node_stat.c lib/fstree/mknode.c
libfstree_a_SOURCES += lib/fstree/add_by_path.c lib/fstree/xattr.c
libfstree_a_SOURCES += lib/fstree/node_from_path.c include/fstree.h
-libfstree_a_SOURCES += lib/fstree/gen_file_list.c lib/fstree/deduplicate.c
+libfstree_a_SOURCES += lib/fstree/gen_file_list.c
libfstree_a_SOURCES += lib/fstree/optimize_unpack_order.c
libfstree_a_SOURCES += lib/fstree/canonicalize_name.c
libfstree_a_SOURCES += lib/fstree/source_date_epoch.c
diff --git a/lib/fstree/deduplicate.c b/lib/fstree/deduplicate.c
deleted file mode 100644
index 00815a6..0000000
--- a/lib/fstree/deduplicate.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/* SPDX-License-Identifier: GPL-3.0-or-later */
-/*
- * deduplicate.c
- *
- * Copyright (C) 2019 David Oberhollenzer <goliath@infraroot.at>
- */
-#include "config.h"
-#include "fstree.h"
-
-#include <string.h>
-
-file_info_t *fragment_by_chksum(file_info_t *fi, uint32_t chksum,
- size_t frag_size, file_info_t *list,
- size_t block_size)
-{
- file_info_t *it;
-
- for (it = list; it != NULL; it = it->next) {
- if (it == fi)
- continue;
-
- if (!(it->flags & FILE_FLAG_HAS_FRAGMENT))
- continue;
-
- if (it->flags & FILE_FLAG_FRAGMENT_IS_DUPLICATE)
- continue;
-
- if ((it->size % block_size) != frag_size)
- continue;
-
- if (it->fragment_chksum == chksum)
- break;
- }
-
- return it;
-}
-
-static size_t get_block_count(file_info_t *fi, size_t block_size)
-{
- size_t count = fi->size / block_size;
-
- if ((fi->size % block_size) && !(fi->flags & FILE_FLAG_HAS_FRAGMENT))
- ++count;
-
- while (count > 0 && fi->blocks[count - 1].size == 0)
- --count;
-
- return count;
-}
-
-static size_t find_first_match(file_info_t *file, file_info_t *cmp,
- size_t idx, size_t cmp_blk_count)
-{
- size_t i;
-
- for (i = 0; i < cmp_blk_count; ++i) {
- if (memcmp(file->blocks + idx, cmp->blocks + i,
- sizeof(file->blocks[idx])) == 0) {
- break;
- }
- }
-
- return i;
-}
-
-uint64_t find_equal_blocks(file_info_t *file, file_info_t *list,
- size_t block_size)
-{
- size_t start, first_match, i, j, block_count, cmp_blk_count;
- uint64_t location;
- file_info_t *it;
-
- block_count = get_block_count(file, block_size);
- if (block_count == 0)
- return 0;
-
- for (start = 0; start < block_count; ++start) {
- if (file->blocks[start].size != 0)
- break;
- }
-
- for (it = list; it != NULL; it = it->next) {
- if (it == file)
- continue;
-
- if (it->flags & FILE_FLAG_BLOCKS_ARE_DUPLICATE)
- continue;
-
- cmp_blk_count = get_block_count(it, block_size);
- if (cmp_blk_count == 0)
- continue;
-
- first_match = find_first_match(file, it, start, cmp_blk_count);
- if (first_match == cmp_blk_count)
- continue;
-
- i = start;
- j = first_match;
-
- while (i < block_count && j < cmp_blk_count) {
- if (file->blocks[i].size == 0) {
- ++i;
- continue;
- }
-
- if (it->blocks[j].size == 0) {
- ++j;
- continue;
- }
-
- if (memcmp(it->blocks + j, file->blocks + i,
- sizeof(file->blocks[i])) != 0) {
- break;
- }
-
- ++i;
- ++j;
- }
-
- if (i == block_count)
- break;
- }
-
- if (it == NULL)
- return 0;
-
- location = it->startblock;
-
- for (i = 0; i < first_match; ++i)
- location += it->blocks[i].size & ((1 << 24) - 1);
-
- return location;
-}
diff --git a/lib/fstree/mknode.c b/lib/fstree/mknode.c
index 1c3b3a0..ace99f3 100644
--- a/lib/fstree/mknode.c
+++ b/lib/fstree/mknode.c
@@ -41,7 +41,7 @@ tree_node_t *fstree_mknode(fstree_t *fs, tree_node_t *parent, const char *name,
if ((sb->st_size % fs->block_size) != 0)
++block_count;
- if (SZ_MUL_OV(block_count, sizeof(n->data.file->blocks[0]),
+ if (SZ_MUL_OV(block_count, sizeof(n->data.file->block_size[0]),
&total)) {
goto fail_ov;
}
@@ -92,8 +92,8 @@ tree_node_t *fstree_mknode(fstree_t *fs, tree_node_t *parent, const char *name,
if (extra == NULL)
break;
- ptr = (char *)n->data.file->blocks;
- ptr += block_count * sizeof(n->data.file->blocks[0]);
+ ptr = (char *)n->data.file->block_size;
+ ptr += block_count * sizeof(n->data.file->block_size[0]);
n->data.file->input_file = ptr;
strcpy(n->data.file->input_file, extra);
break;