From a41a57834fc70541ada5579e2d74114f7113e9bc Mon Sep 17 00:00:00 2001 From: David Oberhollenzer Date: Mon, 1 Nov 2021 11:51:49 +0100 Subject: Add sort-file implementation A `flags` field and `priority` are added to all file information structs. A news fstree function is introduced for parsing a "sort-file". Each line in the file is space separated, and has the following format: priority [flags] filename Priority is a 64 bit number, flags are optional and filename can be put in quotes if it is supposed to start or end with spaces. Single line comments can be used. The flags can be used to set block-processor flags (e.g. don't fragment, or don't compress), as well as instructing the parser to use file globbing to match the filename. After parsing the file, the list of file info structure is sorted according to the priority (default is 0) using a stable sort algorithm. Signed-off-by: David Oberhollenzer --- lib/fstree/Makemodule.am | 1 + lib/fstree/sort_by_file.c | 366 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 367 insertions(+) create mode 100644 lib/fstree/sort_by_file.c (limited to 'lib') diff --git a/lib/fstree/Makemodule.am b/lib/fstree/Makemodule.am index 4bdea27..c2981ce 100644 --- a/lib/fstree/Makemodule.am +++ b/lib/fstree/Makemodule.am @@ -7,6 +7,7 @@ libfstree_a_SOURCES += include/fstree.h lib/fstree/internal.h libfstree_a_SOURCES += lib/fstree/source_date_epoch.c libfstree_a_SOURCES += lib/fstree/canonicalize_name.c libfstree_a_SOURCES += lib/fstree/filename_sane.c +libfstree_a_SOURCES += lib/fstree/sort_by_file.c libfstree_a_CFLAGS = $(AM_CFLAGS) libfstree_a_CPPFLAGS = $(AM_CPPFLAGS) diff --git a/lib/fstree/sort_by_file.c b/lib/fstree/sort_by_file.c new file mode 100644 index 0000000..7d3f5cc --- /dev/null +++ b/lib/fstree/sort_by_file.c @@ -0,0 +1,366 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ +/* + * sort_by_file.c + * + * Copyright (C) 2021 David Oberhollenzer + */ +#include "config.h" + +#include "fstree.h" +#include "compat.h" + +#include "sqfs/block.h" + +#include +#include +#include + +static int decode_priority(const char *filename, size_t line_no, + char *line, sqfs_s64 *priority) +{ + bool negative = false; + size_t i = 0; + + if (line[0] == '-') { + negative = true; + i = 1; + } + + if (!isdigit(line[i])) + goto fail_number; + + *priority = 0; + + for (; isdigit(line[i]); ++i) { + sqfs_s64 x = line[i] - '0'; + + if ((*priority) >= ((0x7FFFFFFFFFFFFFFFL - x) / 10L)) + goto fail_ov; + + (*priority) = (*priority) * 10 + x; + } + + if (!isspace(line[i])) + goto fail_filename; + + while (isspace(line[i])) + ++i; + + if (line[i] == '\0') + goto fail_filename; + + if (negative) + (*priority) = -(*priority); + + memmove(line, line + i, strlen(line + i) + 1); + return 0; +fail_number: + fprintf(stderr, "%s: " PRI_SZ ": Line must start with " + "numeric sort priority.\n", + filename, line_no); + return -1; +fail_ov: + fprintf(stderr, "%s: " PRI_SZ ": Numeric overflow in sort priority.\n", + filename, line_no); + return -1; +fail_filename: + fprintf(stderr, "%s: " PRI_SZ ": Expacted ` ` " + "after sort priority.\n", + filename, line_no); + return -1; +} + +static int decode_filename(const char *filename, size_t line_no, char *buffer) +{ + char *src, *dst; + + if (buffer[0] == '"') { + src = buffer + 1; + dst = buffer; + + for (;;) { + if (src[0] == '\0') + goto fail_match; + + if (src[0] == '"') { + ++src; + break; + } + + if (src[0] == '\\') { + switch (src[1]) { + case '\\': + *(dst++) = '\\'; + src += 2; + break; + case '"': + *(dst++) = '"'; + src += 2; + break; + default: + goto fail_escape; + } + } else { + *(dst++) = *(src++); + } + } + + if (*src != '\0') + return -1; + } + + if (canonicalize_name(buffer)) + goto fail_canon; + return 0; +fail_canon: + fprintf(stderr, "%s: " PRI_SZ ": Malformed filename.\n", + filename, line_no); + return -1; +fail_escape: + fprintf(stderr, "%s: " PRI_SZ ": Unknown escape sequence `\\%c` " + "in filename.\n", filename, line_no, src[1]); + return -1; +fail_match: + fprintf(stderr, "%s: " PRI_SZ ": Unmatched '\"' in filename.\n", + filename, line_no); + return -1; +} + +static int decode_flags(const char *filename, size_t line_no, bool *do_glob, + bool *path_glob, int *flags, char *line) +{ + char *start = line; + + *do_glob = false; + *path_glob = false; + *flags = 0; + + if (*(line++) != '[') + return 0; + + for (;;) { + while (isspace(*line)) + ++line; + + if (*line == ']') { + ++line; + break; + } + + if (strncmp(line, "glob_no_path", 12) == 0) { + line += 12; + *do_glob = true; + *path_glob = false; + } else if (strncmp(line, "glob", 4) == 0) { + line += 4; + *do_glob = true; + *path_glob = true; + } else if (strncmp(line, "dont_fragment", 13) == 0) { + line += 13; + (*flags) |= SQFS_BLK_DONT_FRAGMENT; + } else if (strncmp(line, "align", 5) == 0) { + line += 5; + (*flags) |= SQFS_BLK_ALIGN; + } else if (strncmp(line, "dont_compress", 13) == 0) { + line += 13; + (*flags) |= SQFS_BLK_DONT_COMPRESS; + } else if (strncmp(line, "dont_deduplicate", 16) == 0) { + line += 16; + (*flags) |= SQFS_BLK_DONT_DEDUPLICATE; + } else if (strncmp(line, "nosparse", 8) == 0) { + line += 8; + (*flags) |= SQFS_BLK_IGNORE_SPARSE; + } else { + goto fail_flag; + } + + while (isspace(*line)) + ++line; + + if (*line == ']') { + ++line; + break; + } + + if (*(line++) != ',') + goto fail_sep; + } + + if (!isspace(*line)) + goto fail_fname; + + while (isspace(*line)) + ++line; + + memmove(start, line, strlen(line) + 1); + return 0; +fail_fname: + fprintf(stderr, "%s: " PRI_SZ ": Expected ` ` " + "after flag list.\n", filename, line_no); + return -1; +fail_sep: + fprintf(stderr, "%s: " PRI_SZ ": Unexpected '%c' after flag.\n", + filename, line_no, *line); + return -1; +fail_flag: + fprintf(stderr, "%s: " PRI_SZ ": Unknown flag `%.3s...`.\n", + filename, line_no, line); + return -1; +} + +static void sort_file_list(fstree_t *fs) +{ + file_info_t *out = NULL, *out_last = NULL; + + while (fs->files != NULL) { + sqfs_s64 lowest = fs->files->priority; + file_info_t *it, *prev; + + for (it = fs->files; it != NULL; it = it->next) { + if (it->priority < lowest) + lowest = it->priority; + } + + it = fs->files; + prev = NULL; + + while (it != NULL) { + if (it->priority != lowest) { + prev = it; + it = it->next; + continue; + } + + if (prev == NULL) { + fs->files = it->next; + } else { + prev->next = it->next; + } + + if (out == NULL) { + out = it; + } else { + out_last->next = it; + } + + out_last = it; + it = it->next; + out_last->next = NULL; + } + } + + fs->files = out; +} + +int fstree_sort_files(fstree_t *fs, istream_t *sortfile) +{ + const char *filename; + size_t line_num = 1; + file_info_t *it; + + for (it = fs->files; it != NULL; it = it->next) { + it->priority = 0; + it->flags = 0; + it->already_matched = false; + } + + filename = istream_get_filename(sortfile); + + for (;;) { + bool do_glob, path_glob, have_match; + char *line = NULL; + sqfs_s64 priority; + int ret, flags; + + ret = istream_get_line(sortfile, &line, &line_num, + ISTREAM_LINE_LTRIM | + ISTREAM_LINE_RTRIM | + ISTREAM_LINE_SKIP_EMPTY); + if (ret != 0) { + free(line); + if (ret < 0) + return -1; + break; + } + + if (line[0] == '#') { + free(line); + continue; + } + + if (decode_priority(filename, line_num, line, &priority)) { + free(line); + return -1; + } + + if (decode_flags(filename, line_num, &do_glob, &path_glob, + &flags, line)) { + free(line); + return -1; + } + + if (decode_filename(filename, line_num, line)) { + free(line); + return -1; + } + + have_match = false; + + for (it = fs->files; it != NULL; it = it->next) { + tree_node_t *node; + char *path; + + if (it->already_matched) + continue; + + node = container_of(it, tree_node_t, data.file); + path = fstree_get_path(node); + if (path == NULL) { + fprintf(stderr, "%s: " PRI_SZ ": out-of-memory\n", + filename, line_num); + free(line); + return -1; + } + + if (canonicalize_name(path)) { + fprintf(stderr, + "%s: " PRI_SZ ": [BUG] error " + "reconstructing node path\n", + filename, line_num); + free(line); + return -1; + } + + if (do_glob) { + ret = fnmatch(line, path, + path_glob ? FNM_PATHNAME : 0); + + } else { + ret = strcmp(path, line); + } + + free(path); + + if (ret == 0) { + have_match = true; + it->flags = flags; + it->priority = priority; + it->already_matched = true; + + if (!do_glob) + break; + } + } + + if (!have_match) { + fprintf(stderr, "WARNING: %s: " PRI_SZ ": no match " + "for '%s'.\n", + filename, line_num, line); + } + + free(line); + } + + sort_file_list(fs); + return 0; +} -- cgit v1.2.3