aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/fstree.h9
-rw-r--r--lib/Makemodule.am1
-rw-r--r--lib/fstree/optimize_unpack_order.c161
-rw-r--r--unpack/fill_files.c78
4 files changed, 192 insertions, 57 deletions
diff --git a/include/fstree.h b/include/fstree.h
index 608ff97..dbc76db 100644
--- a/include/fstree.h
+++ b/include/fstree.h
@@ -323,4 +323,13 @@ file_info_t *fragment_by_chksum(file_info_t *fi, uint32_t chksum,
uint64_t find_equal_blocks(file_info_t *file, file_info_t *list,
size_t block_size);
+/*
+ Optimize the order of the fstree file list for unpacking as to avoid
+ unpacking fragment blocks more than once and to improve locality when
+ fetching data from disk. The resulting list is returned in 'out'.
+ If num_jobs is > 1, the list is split up for parallel processing.
+ */
+void optimize_unpack_order(fstree_t *fs, size_t num_jobs,
+ file_info_t *out[num_jobs]);
+
#endif /* FSTREE_H */
diff --git a/lib/Makemodule.am b/lib/Makemodule.am
index f49ff91..c3451f4 100644
--- a/lib/Makemodule.am
+++ b/lib/Makemodule.am
@@ -5,6 +5,7 @@ libfstree_a_SOURCES += lib/fstree/node_stat.c lib/fstree/mknode.c
libfstree_a_SOURCES += lib/fstree/add_by_path.c lib/fstree/xattr.c
libfstree_a_SOURCES += lib/fstree/node_from_path.c include/fstree.h
libfstree_a_SOURCES += lib/fstree/gen_file_list.c lib/fstree/deduplicate.c
+libfstree_a_SOURCES += lib/fstree/optimize_unpack_order.c
libfstree_a_CFLAGS = $(AM_CFLAGS)
libfstree_a_CPPFLAGS = $(AM_CPPFLAGS)
diff --git a/lib/fstree/optimize_unpack_order.c b/lib/fstree/optimize_unpack_order.c
new file mode 100644
index 0000000..ad036e8
--- /dev/null
+++ b/lib/fstree/optimize_unpack_order.c
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+/*
+ * optimize_unpack_order.c
+ *
+ * Copyright (C) 2019 David Oberhollenzer <goliath@infraroot.at>
+ */
+#include "config.h"
+#include "fstree.h"
+
+static int compare_files(const fstree_t *fs, const file_info_t *lhs,
+ const file_info_t *rhs)
+{
+ /* NOOP < everything else */
+ if (lhs->input_file == NULL)
+ return rhs->input_file == NULL ? 0 : -1;
+
+ if (rhs->input_file == NULL)
+ return 1;
+
+ /* Files with fragments come first, ordered by ID.
+ In case of tie, files without data blocks come first,
+ and the others are ordered by start block. */
+ if (lhs->flags & FILE_FLAG_HAS_FRAGMENT) {
+ if (!(rhs->flags & FILE_FLAG_HAS_FRAGMENT))
+ return -1;
+
+ if (lhs->fragment < rhs->fragment)
+ return -1;
+ if (lhs->fragment > rhs->fragment)
+ return 1;
+
+ if (lhs->size < fs->block_size)
+ return (rhs->size < fs->block_size) ? 0 : -1;
+ if (rhs->size < fs->block_size)
+ return 1;
+ goto order_by_start;
+ }
+
+ if (rhs->flags & FILE_FLAG_HAS_FRAGMENT)
+ return 1;
+
+ /* order the rest by start block */
+order_by_start:
+ return lhs->startblock < rhs->startblock ? -1 :
+ lhs->startblock > rhs->startblock ? 1 : 0;
+}
+
+/* TODO: unify ad-hoc merge sort with the one in fstree_sort */
+static file_info_t *merge(const fstree_t *fs, file_info_t *lhs,
+ file_info_t *rhs)
+{
+ file_info_t *it;
+ file_info_t *head = NULL;
+ file_info_t **next_ptr = &head;
+
+ while (lhs != NULL && rhs != NULL) {
+ if (compare_files(fs, lhs, rhs) <= 0) {
+ it = lhs;
+ lhs = lhs->next;
+ } else {
+ it = rhs;
+ rhs = rhs->next;
+ }
+
+ *next_ptr = it;
+ next_ptr = &it->next;
+ }
+
+ it = (lhs != NULL ? lhs : rhs);
+ *next_ptr = it;
+ return head;
+}
+
+static file_info_t *list_sort(const fstree_t *fs, file_info_t *head)
+{
+ file_info_t *it, *half, *prev;
+
+ it = half = prev = head;
+ while (it != NULL) {
+ prev = half;
+ half = half->next;
+ it = it->next;
+ if (it != NULL) {
+ it = it->next;
+ }
+ }
+
+ // half refers to the (count/2)'th element ROUNDED UP.
+ // It will be null therefore only in the empty and the
+ // single element list
+ if (half == NULL) {
+ return head;
+ }
+
+ prev->next = NULL;
+
+ return merge(fs, list_sort(fs, head), list_sort(fs, half));
+}
+
+static file_info_t *split_list(file_info_t *list, uint64_t threashold)
+{
+ file_info_t *it, *new = NULL;
+ uint64_t size = 0;
+
+ for (it = list; it != NULL; it = it->next) {
+ size += it->size - it->sparse;
+
+ if (size >= threashold) {
+ new = it->next;
+ it->next = NULL;
+ break;
+ }
+ }
+
+ return new;
+}
+
+static uint64_t total_size(file_info_t *list)
+{
+ uint64_t size = 0;
+ file_info_t *it;
+
+ for (it = list; it != NULL; it = it->next)
+ size += it->size - it->sparse;
+
+ return size;
+}
+
+void optimize_unpack_order(fstree_t *fs, size_t num_jobs,
+ file_info_t *out[num_jobs])
+{
+ file_info_t *file_list;
+ uint64_t threshold;
+ size_t i;
+
+ if (num_jobs < 1)
+ return;
+
+ for (i = 0; i < num_jobs; ++i)
+ out[i] = NULL;
+
+ file_list = list_sort(fs, fs->files);
+ while (file_list != NULL && file_list->input_file == NULL)
+ file_list = file_list->next;
+
+ fs->files = NULL;
+
+ if (num_jobs < 2) {
+ out[0] = file_list;
+ return;
+ }
+
+ threshold = total_size(file_list) / num_jobs;
+
+ for (i = 0; i < (num_jobs - 1); ++i) {
+ out[i] = file_list;
+ file_list = split_list(file_list, threshold);
+ }
+
+ out[i] = file_list;
+}
diff --git a/unpack/fill_files.c b/unpack/fill_files.c
index e104ae7..c0fc26c 100644
--- a/unpack/fill_files.c
+++ b/unpack/fill_files.c
@@ -38,64 +38,23 @@ static int fill_files(data_reader_t *data, file_info_t *list, int flags)
return 0;
}
-static file_info_t *split_list(file_info_t *list, uint64_t threashold)
-{
- file_info_t *it, *new = NULL;
- uint64_t size = 0;
-
- for (it = list; it != NULL; it = it->next) {
- if (it->input_file == NULL)
- continue;
-
- size += it->size - it->sparse;
-
- if (size >= threashold) {
- new = it->next;
- it->next = NULL;
- break;
- }
- }
-
- return new;
-}
-
-static uint64_t total_size(file_info_t *list)
-{
- uint64_t size = 0;
- file_info_t *it;
-
- for (it = list; it != NULL; it = it->next) {
- if (it->input_file != NULL)
- size += it->size - it->sparse;
- }
-
- return size;
-}
-
int fill_unpacked_files(fstree_t *fs, data_reader_t *data, int flags,
unsigned int num_jobs)
{
- file_info_t *sublists[num_jobs], *it;
+ file_info_t **sublists, *it;
int exitstatus, status = 0;
- uint64_t threshold;
unsigned int i;
pid_t pid;
- if (num_jobs <= 1) {
- status = fill_files(data, fs->files, flags);
+ if (num_jobs < 1)
+ num_jobs = 1;
- for (it = fs->files; it != NULL; it = it->next)
- free(it->input_file);
-
- return status;
- }
-
- threshold = total_size(fs->files) / num_jobs;
+ sublists = alloca(sizeof(sublists[0]) * num_jobs);
+ optimize_unpack_order(fs, num_jobs, sublists);
- for (i = 0; i < num_jobs; ++i) {
- sublists[i] = fs->files;
-
- fs->files = split_list(fs->files, threshold);
+ if (num_jobs < 2) {
+ status = fill_files(data, sublists[0], flags);
+ goto out;
}
for (i = 0; i < num_jobs; ++i) {
@@ -113,26 +72,31 @@ int fill_unpacked_files(fstree_t *fs, data_reader_t *data, int flags,
if (pid < 0) {
perror("fork");
status = -1;
- num_jobs = i;
break;
}
}
- for (i = 0; i < num_jobs; ++i) {
- do {
- pid = waitpid(-1, &exitstatus, 0);
+ for (;;) {
+ errno = 0;
+ pid = waitpid(-1, &exitstatus, 0);
+
+ if (pid < 0) {
+ if (errno == EINTR)
+ continue;
if (errno == ECHILD)
- goto out;
- } while (pid < 0);
+ break;
+ }
if (!WIFEXITED(exitstatus) ||
WEXITSTATUS(exitstatus) != EXIT_SUCCESS) {
status = -1;
}
-
+ }
+out:
+ for (i = 0; i < num_jobs; ++i) {
for (it = sublists[i]; it != NULL; it = it->next)
free(it->input_file);
}
-out:
+
return status;
}