diff options
-rw-r--r-- | include/fstree.h | 9 | ||||
-rw-r--r-- | lib/Makemodule.am | 1 | ||||
-rw-r--r-- | lib/fstree/optimize_unpack_order.c | 161 | ||||
-rw-r--r-- | unpack/fill_files.c | 78 |
4 files changed, 192 insertions, 57 deletions
diff --git a/include/fstree.h b/include/fstree.h index 608ff97..dbc76db 100644 --- a/include/fstree.h +++ b/include/fstree.h @@ -323,4 +323,13 @@ file_info_t *fragment_by_chksum(file_info_t *fi, uint32_t chksum, uint64_t find_equal_blocks(file_info_t *file, file_info_t *list, size_t block_size); +/* + Optimize the order of the fstree file list for unpacking as to avoid + unpacking fragment blocks more than once and to improve locality when + fetching data from disk. The resulting list is returned in 'out'. + If num_jobs is > 1, the list is split up for parallel processing. + */ +void optimize_unpack_order(fstree_t *fs, size_t num_jobs, + file_info_t *out[num_jobs]); + #endif /* FSTREE_H */ diff --git a/lib/Makemodule.am b/lib/Makemodule.am index f49ff91..c3451f4 100644 --- a/lib/Makemodule.am +++ b/lib/Makemodule.am @@ -5,6 +5,7 @@ libfstree_a_SOURCES += lib/fstree/node_stat.c lib/fstree/mknode.c libfstree_a_SOURCES += lib/fstree/add_by_path.c lib/fstree/xattr.c libfstree_a_SOURCES += lib/fstree/node_from_path.c include/fstree.h libfstree_a_SOURCES += lib/fstree/gen_file_list.c lib/fstree/deduplicate.c +libfstree_a_SOURCES += lib/fstree/optimize_unpack_order.c libfstree_a_CFLAGS = $(AM_CFLAGS) libfstree_a_CPPFLAGS = $(AM_CPPFLAGS) diff --git a/lib/fstree/optimize_unpack_order.c b/lib/fstree/optimize_unpack_order.c new file mode 100644 index 0000000..ad036e8 --- /dev/null +++ b/lib/fstree/optimize_unpack_order.c @@ -0,0 +1,161 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ +/* + * optimize_unpack_order.c + * + * Copyright (C) 2019 David Oberhollenzer <goliath@infraroot.at> + */ +#include "config.h" +#include "fstree.h" + +static int compare_files(const fstree_t *fs, const file_info_t *lhs, + const file_info_t *rhs) +{ + /* NOOP < everything else */ + if (lhs->input_file == NULL) + return rhs->input_file == NULL ? 0 : -1; + + if (rhs->input_file == NULL) + return 1; + + /* Files with fragments come first, ordered by ID. + In case of tie, files without data blocks come first, + and the others are ordered by start block. */ + if (lhs->flags & FILE_FLAG_HAS_FRAGMENT) { + if (!(rhs->flags & FILE_FLAG_HAS_FRAGMENT)) + return -1; + + if (lhs->fragment < rhs->fragment) + return -1; + if (lhs->fragment > rhs->fragment) + return 1; + + if (lhs->size < fs->block_size) + return (rhs->size < fs->block_size) ? 0 : -1; + if (rhs->size < fs->block_size) + return 1; + goto order_by_start; + } + + if (rhs->flags & FILE_FLAG_HAS_FRAGMENT) + return 1; + + /* order the rest by start block */ +order_by_start: + return lhs->startblock < rhs->startblock ? -1 : + lhs->startblock > rhs->startblock ? 1 : 0; +} + +/* TODO: unify ad-hoc merge sort with the one in fstree_sort */ +static file_info_t *merge(const fstree_t *fs, file_info_t *lhs, + file_info_t *rhs) +{ + file_info_t *it; + file_info_t *head = NULL; + file_info_t **next_ptr = &head; + + while (lhs != NULL && rhs != NULL) { + if (compare_files(fs, lhs, rhs) <= 0) { + it = lhs; + lhs = lhs->next; + } else { + it = rhs; + rhs = rhs->next; + } + + *next_ptr = it; + next_ptr = &it->next; + } + + it = (lhs != NULL ? lhs : rhs); + *next_ptr = it; + return head; +} + +static file_info_t *list_sort(const fstree_t *fs, file_info_t *head) +{ + file_info_t *it, *half, *prev; + + it = half = prev = head; + while (it != NULL) { + prev = half; + half = half->next; + it = it->next; + if (it != NULL) { + it = it->next; + } + } + + // half refers to the (count/2)'th element ROUNDED UP. + // It will be null therefore only in the empty and the + // single element list + if (half == NULL) { + return head; + } + + prev->next = NULL; + + return merge(fs, list_sort(fs, head), list_sort(fs, half)); +} + +static file_info_t *split_list(file_info_t *list, uint64_t threashold) +{ + file_info_t *it, *new = NULL; + uint64_t size = 0; + + for (it = list; it != NULL; it = it->next) { + size += it->size - it->sparse; + + if (size >= threashold) { + new = it->next; + it->next = NULL; + break; + } + } + + return new; +} + +static uint64_t total_size(file_info_t *list) +{ + uint64_t size = 0; + file_info_t *it; + + for (it = list; it != NULL; it = it->next) + size += it->size - it->sparse; + + return size; +} + +void optimize_unpack_order(fstree_t *fs, size_t num_jobs, + file_info_t *out[num_jobs]) +{ + file_info_t *file_list; + uint64_t threshold; + size_t i; + + if (num_jobs < 1) + return; + + for (i = 0; i < num_jobs; ++i) + out[i] = NULL; + + file_list = list_sort(fs, fs->files); + while (file_list != NULL && file_list->input_file == NULL) + file_list = file_list->next; + + fs->files = NULL; + + if (num_jobs < 2) { + out[0] = file_list; + return; + } + + threshold = total_size(file_list) / num_jobs; + + for (i = 0; i < (num_jobs - 1); ++i) { + out[i] = file_list; + file_list = split_list(file_list, threshold); + } + + out[i] = file_list; +} diff --git a/unpack/fill_files.c b/unpack/fill_files.c index e104ae7..c0fc26c 100644 --- a/unpack/fill_files.c +++ b/unpack/fill_files.c @@ -38,64 +38,23 @@ static int fill_files(data_reader_t *data, file_info_t *list, int flags) return 0; } -static file_info_t *split_list(file_info_t *list, uint64_t threashold) -{ - file_info_t *it, *new = NULL; - uint64_t size = 0; - - for (it = list; it != NULL; it = it->next) { - if (it->input_file == NULL) - continue; - - size += it->size - it->sparse; - - if (size >= threashold) { - new = it->next; - it->next = NULL; - break; - } - } - - return new; -} - -static uint64_t total_size(file_info_t *list) -{ - uint64_t size = 0; - file_info_t *it; - - for (it = list; it != NULL; it = it->next) { - if (it->input_file != NULL) - size += it->size - it->sparse; - } - - return size; -} - int fill_unpacked_files(fstree_t *fs, data_reader_t *data, int flags, unsigned int num_jobs) { - file_info_t *sublists[num_jobs], *it; + file_info_t **sublists, *it; int exitstatus, status = 0; - uint64_t threshold; unsigned int i; pid_t pid; - if (num_jobs <= 1) { - status = fill_files(data, fs->files, flags); + if (num_jobs < 1) + num_jobs = 1; - for (it = fs->files; it != NULL; it = it->next) - free(it->input_file); - - return status; - } - - threshold = total_size(fs->files) / num_jobs; + sublists = alloca(sizeof(sublists[0]) * num_jobs); + optimize_unpack_order(fs, num_jobs, sublists); - for (i = 0; i < num_jobs; ++i) { - sublists[i] = fs->files; - - fs->files = split_list(fs->files, threshold); + if (num_jobs < 2) { + status = fill_files(data, sublists[0], flags); + goto out; } for (i = 0; i < num_jobs; ++i) { @@ -113,26 +72,31 @@ int fill_unpacked_files(fstree_t *fs, data_reader_t *data, int flags, if (pid < 0) { perror("fork"); status = -1; - num_jobs = i; break; } } - for (i = 0; i < num_jobs; ++i) { - do { - pid = waitpid(-1, &exitstatus, 0); + for (;;) { + errno = 0; + pid = waitpid(-1, &exitstatus, 0); + + if (pid < 0) { + if (errno == EINTR) + continue; if (errno == ECHILD) - goto out; - } while (pid < 0); + break; + } if (!WIFEXITED(exitstatus) || WEXITSTATUS(exitstatus) != EXIT_SUCCESS) { status = -1; } - + } +out: + for (i = 0; i < num_jobs; ++i) { for (it = sublists[i]; it != NULL; it = it->next) free(it->input_file); } -out: + return status; } |