From f57814332a69bebc40e25e6537a3c08fc9e18f97 Mon Sep 17 00:00:00 2001 From: David Oberhollenzer Date: Sat, 14 Sep 2019 01:41:38 +0200 Subject: Move data deduplication from fstree code to data writer Signed-off-by: David Oberhollenzer --- lib/fstree/deduplicate.c | 133 ----------------------------------------------- 1 file changed, 133 deletions(-) delete mode 100644 lib/fstree/deduplicate.c (limited to 'lib/fstree/deduplicate.c') diff --git a/lib/fstree/deduplicate.c b/lib/fstree/deduplicate.c deleted file mode 100644 index 00815a6..0000000 --- a/lib/fstree/deduplicate.c +++ /dev/null @@ -1,133 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ -/* - * deduplicate.c - * - * Copyright (C) 2019 David Oberhollenzer - */ -#include "config.h" -#include "fstree.h" - -#include - -file_info_t *fragment_by_chksum(file_info_t *fi, uint32_t chksum, - size_t frag_size, file_info_t *list, - size_t block_size) -{ - file_info_t *it; - - for (it = list; it != NULL; it = it->next) { - if (it == fi) - continue; - - if (!(it->flags & FILE_FLAG_HAS_FRAGMENT)) - continue; - - if (it->flags & FILE_FLAG_FRAGMENT_IS_DUPLICATE) - continue; - - if ((it->size % block_size) != frag_size) - continue; - - if (it->fragment_chksum == chksum) - break; - } - - return it; -} - -static size_t get_block_count(file_info_t *fi, size_t block_size) -{ - size_t count = fi->size / block_size; - - if ((fi->size % block_size) && !(fi->flags & FILE_FLAG_HAS_FRAGMENT)) - ++count; - - while (count > 0 && fi->blocks[count - 1].size == 0) - --count; - - return count; -} - -static size_t find_first_match(file_info_t *file, file_info_t *cmp, - size_t idx, size_t cmp_blk_count) -{ - size_t i; - - for (i = 0; i < cmp_blk_count; ++i) { - if (memcmp(file->blocks + idx, cmp->blocks + i, - sizeof(file->blocks[idx])) == 0) { - break; - } - } - - return i; -} - -uint64_t find_equal_blocks(file_info_t *file, file_info_t *list, - size_t block_size) -{ - size_t start, first_match, i, j, block_count, cmp_blk_count; - uint64_t location; - file_info_t *it; - - block_count = get_block_count(file, block_size); - if (block_count == 0) - return 0; - - for (start = 0; start < block_count; ++start) { - if (file->blocks[start].size != 0) - break; - } - - for (it = list; it != NULL; it = it->next) { - if (it == file) - continue; - - if (it->flags & FILE_FLAG_BLOCKS_ARE_DUPLICATE) - continue; - - cmp_blk_count = get_block_count(it, block_size); - if (cmp_blk_count == 0) - continue; - - first_match = find_first_match(file, it, start, cmp_blk_count); - if (first_match == cmp_blk_count) - continue; - - i = start; - j = first_match; - - while (i < block_count && j < cmp_blk_count) { - if (file->blocks[i].size == 0) { - ++i; - continue; - } - - if (it->blocks[j].size == 0) { - ++j; - continue; - } - - if (memcmp(it->blocks + j, file->blocks + i, - sizeof(file->blocks[i])) != 0) { - break; - } - - ++i; - ++j; - } - - if (i == block_count) - break; - } - - if (it == NULL) - return 0; - - location = it->startblock; - - for (i = 0; i < first_match; ++i) - location += it->blocks[i].size & ((1 << 24) - 1); - - return location; -} -- cgit v1.2.3