summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2019-07-29 10:57:40 +0200
committerDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2019-07-29 10:58:42 +0200
commit864302059679c15bc223d37ed8cef87c5b4a97aa (patch)
tree419cdc8670878602c8b615e98937db5da723ca7a /lib
parentdad72be359d2670dd4993101591e1b14638dedec (diff)
Cleanup: move deduplication code from data writer to fstree
Since it is actually completely independend of libsqfs and only works on file_info_t lists, it can be safely moved over to libfstree and the data writer becomes less cluttered as a result. Signed-off-by: David Oberhollenzer <david.oberhollenzer@sigma-star.at>
Diffstat (limited to 'lib')
-rw-r--r--lib/Makemodule.am2
-rw-r--r--lib/fstree/deduplicate.c132
-rw-r--r--lib/sqfs/data_writer.c127
3 files changed, 133 insertions, 128 deletions
diff --git a/lib/Makemodule.am b/lib/Makemodule.am
index 8258136..4288ef1 100644
--- a/lib/Makemodule.am
+++ b/lib/Makemodule.am
@@ -4,7 +4,7 @@ libfstree_a_SOURCES += lib/fstree/gen_inode_table.c lib/fstree/get_path.c
libfstree_a_SOURCES += lib/fstree/node_stat.c lib/fstree/mknode.c
libfstree_a_SOURCES += lib/fstree/add_by_path.c lib/fstree/xattr.c
libfstree_a_SOURCES += lib/fstree/node_from_path.c include/fstree.h
-libfstree_a_SOURCES += lib/fstree/gen_file_list.c
+libfstree_a_SOURCES += lib/fstree/gen_file_list.c lib/fstree/deduplicate.c
libfstree_a_CFLAGS = $(AM_CFLAGS)
libfstree_a_CPPFLAGS = $(AM_CPPFLAGS)
diff --git a/lib/fstree/deduplicate.c b/lib/fstree/deduplicate.c
new file mode 100644
index 0000000..9ae4440
--- /dev/null
+++ b/lib/fstree/deduplicate.c
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+#include "config.h"
+#include "fstree.h"
+
+#include <string.h>
+
+file_info_t *fragment_by_chksum(file_info_t *fi, uint32_t chksum,
+ size_t frag_size, file_info_t *list,
+ size_t block_size)
+{
+ file_info_t *it;
+
+ for (it = list; it != NULL; it = it->next) {
+ if (it == fi) {
+ it = NULL;
+ break;
+ }
+
+ if (!(it->flags & FILE_FLAG_HAS_FRAGMENT))
+ continue;
+
+ if (it->flags & FILE_FLAG_FRAGMENT_IS_DUPLICATE)
+ continue;
+
+ if ((it->size % block_size) != frag_size)
+ continue;
+
+ if (it->fragment_chksum == chksum)
+ break;
+ }
+
+ return it;
+}
+
+static size_t get_block_count(file_info_t *fi, size_t block_size)
+{
+ size_t count = fi->size / block_size;
+
+ if ((fi->size % block_size) && !(fi->flags & FILE_FLAG_HAS_FRAGMENT))
+ ++count;
+
+ while (count > 0 && fi->blocks[count - 1].size == 0)
+ --count;
+
+ return count;
+}
+
+static size_t find_first_match(file_info_t *file, file_info_t *cmp,
+ size_t idx, size_t cmp_blk_count)
+{
+ size_t i;
+
+ for (i = 0; i < cmp_blk_count; ++i) {
+ if (memcmp(file->blocks + idx, cmp->blocks + i,
+ sizeof(file->blocks[idx])) == 0) {
+ break;
+ }
+ }
+
+ return i;
+}
+
+uint64_t find_equal_blocks(file_info_t *file, file_info_t *list,
+ size_t block_size)
+{
+ size_t start, first_match, i, j, block_count, cmp_blk_count;
+ uint64_t location;
+ file_info_t *it;
+
+ block_count = get_block_count(file, block_size);
+ if (block_count == 0)
+ return 0;
+
+ for (start = 0; start < block_count; ++start) {
+ if (file->blocks[start].size != 0)
+ break;
+ }
+
+ for (it = list; it != NULL; it = it->next) {
+ if (it == file) {
+ it = NULL;
+ break;
+ }
+
+ if (it->flags & FILE_FLAG_BLOCKS_ARE_DUPLICATE)
+ continue;
+
+ cmp_blk_count = get_block_count(it, block_size);
+ if (cmp_blk_count == 0)
+ continue;
+
+ first_match = find_first_match(file, it, start, cmp_blk_count);
+ if (first_match == cmp_blk_count)
+ continue;
+
+ i = start;
+ j = first_match;
+
+ while (i < block_count && j < cmp_blk_count) {
+ if (file->blocks[i].size == 0) {
+ ++i;
+ continue;
+ }
+
+ if (it->blocks[j].size == 0) {
+ ++j;
+ continue;
+ }
+
+ if (memcmp(it->blocks + j, file->blocks + i,
+ sizeof(file->blocks[i])) != 0) {
+ break;
+ }
+
+ ++i;
+ ++j;
+ }
+
+ if (i == block_count)
+ break;
+ }
+
+ if (it == NULL)
+ return 0;
+
+ location = it->startblock;
+
+ for (i = 0; i < first_match; ++i)
+ location += it->blocks[i].size & ((1 << 24) - 1);
+
+ return location;
+}
diff --git a/lib/sqfs/data_writer.c b/lib/sqfs/data_writer.c
index fddb9d3..4324f06 100644
--- a/lib/sqfs/data_writer.c
+++ b/lib/sqfs/data_writer.c
@@ -126,133 +126,6 @@ int data_writer_flush_fragments(data_writer_t *data)
return 0;
}
-static file_info_t *fragment_by_chksum(file_info_t *fi, uint32_t chksum,
- size_t frag_size, file_info_t *list,
- size_t block_size)
-{
- file_info_t *it;
-
- for (it = list; it != NULL; it = it->next) {
- if (it == fi) {
- it = NULL;
- break;
- }
-
- if (!(it->flags & FILE_FLAG_HAS_FRAGMENT))
- continue;
-
- if (it->flags & FILE_FLAG_FRAGMENT_IS_DUPLICATE)
- continue;
-
- if ((it->size % block_size) != frag_size)
- continue;
-
- if (it->fragment_chksum == chksum)
- break;
- }
-
- return it;
-}
-
-static size_t get_block_count(file_info_t *fi, size_t block_size)
-{
- size_t count = fi->size / block_size;
-
- if ((fi->size % block_size) && !(fi->flags & FILE_FLAG_HAS_FRAGMENT))
- ++count;
-
- while (count > 0 && fi->blocks[count - 1].size == 0)
- --count;
-
- return count;
-}
-
-static size_t find_first_match(file_info_t *file, file_info_t *cmp,
- size_t idx, size_t cmp_blk_count)
-{
- size_t i;
-
- for (i = 0; i < cmp_blk_count; ++i) {
- if (memcmp(file->blocks + idx, cmp->blocks + i,
- sizeof(file->blocks[idx])) == 0) {
- break;
- }
- }
-
- return i;
-}
-
-static uint64_t find_equal_blocks(file_info_t *file, file_info_t *list,
- size_t block_size)
-{
- size_t start, first_match, i, j, block_count, cmp_blk_count;
- uint64_t location;
- file_info_t *it;
-
- block_count = get_block_count(file, block_size);
- if (block_count == 0)
- return 0;
-
- for (start = 0; start < block_count; ++start) {
- if (file->blocks[start].size != 0)
- break;
- }
-
- for (it = list; it != NULL; it = it->next) {
- if (it == file) {
- it = NULL;
- break;
- }
-
- if (it->flags & FILE_FLAG_BLOCKS_ARE_DUPLICATE)
- continue;
-
- cmp_blk_count = get_block_count(it, block_size);
- if (cmp_blk_count == 0)
- continue;
-
- first_match = find_first_match(file, it, start, cmp_blk_count);
- if (first_match == cmp_blk_count)
- continue;
-
- i = start;
- j = first_match;
-
- while (i < block_count && j < cmp_blk_count) {
- if (file->blocks[i].size == 0) {
- ++i;
- continue;
- }
-
- if (it->blocks[j].size == 0) {
- ++j;
- continue;
- }
-
- if (memcmp(it->blocks + j, file->blocks + i,
- sizeof(file->blocks[i])) != 0) {
- break;
- }
-
- ++i;
- ++j;
- }
-
- if (i == block_count)
- break;
- }
-
- if (it == NULL)
- return 0;
-
- location = it->startblock;
-
- for (i = 0; i < first_match; ++i)
- location += it->blocks[i].size & ((1 << 24) - 1);
-
- return location;
-}
-
static int flush_data_block(data_writer_t *data, size_t size,
file_info_t *fi, int flags, file_info_t *list)
{