diff options
author | David Oberhollenzer <david.oberhollenzer@sigma-star.at> | 2020-02-22 14:18:13 +0100 |
---|---|---|
committer | David Oberhollenzer <david.oberhollenzer@sigma-star.at> | 2020-02-22 14:18:13 +0100 |
commit | a807a45ffd64db60dc69012bef2538eaac3dac0b (patch) | |
tree | 818b73062c8684f9b96fca49d7d6d477c450fc22 /lib/common | |
parent | 0b1fce762dca88659b3103783e8852cb7265823e (diff) |
Make hard link detection scale better
Instead of doing a linear search, which scales quadratically, use a
red-black tree with inode numbers as key for finding hard links.
To reiterate: we can't just use a flat table, because the SquashFS
file is potentially untrusted and the inode numbers can be anything,
no matter what the super block says.
Signed-off-by: David Oberhollenzer <david.oberhollenzer@sigma-star.at>
Diffstat (limited to 'lib/common')
-rw-r--r-- | lib/common/hardlink.c | 156 | ||||
-rw-r--r-- | lib/common/perror.c | 8 |
2 files changed, 88 insertions, 76 deletions
diff --git a/lib/common/hardlink.c b/lib/common/hardlink.c index ac01938..63621a4 100644 --- a/lib/common/hardlink.c +++ b/lib/common/hardlink.c @@ -5,101 +5,107 @@ * Copyright (C) 2019 David Oberhollenzer <goliath@infraroot.at> */ #include "common.h" +#include "rbtree.h" #include <stdlib.h> #include <assert.h> #include <stdio.h> -static size_t count_nodes(const sqfs_tree_node_t *n) +static int map_nodes(rbtree_t *inumtree, sqfs_hard_link_t **out, + const sqfs_tree_node_t *n) { - size_t count = n->children == NULL ? 1 : 0; + const sqfs_tree_node_t *target; + sqfs_hard_link_t *lnk; + rbtree_node_t *tn; + sqfs_u32 idx; + int ret; + + /* XXX: refuse to generate hard links to directories */ + if (n->children != NULL) { + for (n = n->children; n != NULL; n = n->next) { + if (map_nodes(inumtree, out, n)) + return -1; + } + return 0; + } + + if (!is_filename_sane((const char *)n->name, false)) + return 0; + + idx = n->inode->base.inode_number; + tn = rbtree_lookup(inumtree, &idx); + + if (tn == NULL) { + ret = rbtree_insert(inumtree, &idx, &n); + if (ret != 0) + goto fail_insert; + return 0; + } - for (n = n->children; n != NULL; n = n->next) - count += count_nodes(n); + target = *((const sqfs_tree_node_t **)rbtree_node_value(tn)); - return count; + lnk = calloc(1, sizeof(*lnk)); + if (lnk == NULL) + goto fail_oom; + + lnk->inode_number = idx; + lnk->target = sqfs_tree_node_get_path(target); + if (lnk->target == NULL) + goto fail_oom; + + if (canonicalize_name(lnk->target) == 0) { + lnk->next = (*out); + (*out) = lnk; + } else { + free(lnk->target); + free(lnk); + } + return 0; +fail_oom: + fputs("detecting hard links in file system tree: out of memory\n", + stderr); + free(lnk); + return -1; +fail_insert: + sqfs_perror(NULL, "detecting hard links in file system tree", ret); + return -1; } -static size_t map_nodes(const sqfs_tree_node_t **list, - const sqfs_tree_node_t *n, - size_t idx) +static int compare_inum(const void *lhs, const void *rhs) { - if (n->children == NULL) - list[idx++] = n; + sqfs_u32 l = *((sqfs_u32 *)lhs), r = *((sqfs_u32 *)rhs); - for (n = n->children; n != NULL; n = n->next) - idx = map_nodes(list, n, idx); - - return idx; + return l < r ? -1 : (l > r ? 1 : 0); } int sqfs_tree_find_hard_links(const sqfs_tree_node_t *root, sqfs_hard_link_t **out) { - sqfs_hard_link_t *hardlinks = NULL, *lnk = NULL; - const sqfs_tree_node_t **list = NULL; - size_t i, j, count; - const char *name; - bool is_dup; - - count = count_nodes(root); - list = malloc(sizeof(list[0]) * count); - if (list == NULL) - goto fail; - - map_nodes(list, root, 0); - - for (i = 0; i < count; ++i) { - name = (const char *)list[i]->name; - if (!is_filename_sane(name, false)) - continue; - - is_dup = false; - - for (j = 0; j < i; ++j) { - if (list[j]->inode->base.inode_number == - list[i]->inode->base.inode_number) { - name = (const char *)list[j]->name; - if (!is_filename_sane(name, false)) - continue; - - is_dup = true; - break; - } - } + sqfs_hard_link_t *lnk = NULL; + rbtree_t inumtree; + int ret; + + ret = rbtree_init(&inumtree, sizeof(sqfs_u32), + sizeof(sqfs_tree_node_t *), + compare_inum); + if (ret != 0) { + sqfs_perror(NULL, + "detecting hard links in file system tree", ret); + return -1; + } - if (is_dup) { - lnk = calloc(1, sizeof(*lnk)); - if (lnk == NULL) - goto fail; - - lnk->inode_number = list[j]->inode->base.inode_number; - lnk->target = sqfs_tree_node_get_path(list[j]); - if (lnk->target == NULL) - goto fail; - - if (canonicalize_name(lnk->target) == 0) { - lnk->next = hardlinks; - hardlinks = lnk; - } else { - free(lnk->target); - free(lnk); - } + ret = map_nodes(&inumtree, out, root); + rbtree_cleanup(&inumtree); + + if (ret != 0) { + while ((*out) != NULL) { + lnk = (*out); + (*out) = lnk->next; + free(lnk->target); + free(lnk); } + return -1; } - *out = hardlinks; - free(list); return 0; -fail: - perror("detecting hard links in file system tree"); - free(lnk); - while (hardlinks != NULL) { - lnk = hardlinks; - hardlinks = hardlinks->next; - free(lnk->target); - free(lnk); - } - free(list); - return -1; } diff --git a/lib/common/perror.c b/lib/common/perror.c index c7dab08..53a8c16 100644 --- a/lib/common/perror.c +++ b/lib/common/perror.c @@ -69,5 +69,11 @@ void sqfs_perror(const char *file, const char *action, int error_code) break; } - fprintf(stderr, "%s: %s: %s.\n", file, action, errstr); + if (file != NULL) + fprintf(stderr, "%s: ", file); + + if (action != NULL) + fprintf(stderr, "%s: ", action); + + fprintf(stderr, "%s.\n", errstr); } |