summaryrefslogtreecommitdiff
path: root/lib/common/hardlink.c
diff options
context:
space:
mode:
authorDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2020-02-22 14:18:13 +0100
committerDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2020-02-22 14:18:13 +0100
commita807a45ffd64db60dc69012bef2538eaac3dac0b (patch)
tree818b73062c8684f9b96fca49d7d6d477c450fc22 /lib/common/hardlink.c
parent0b1fce762dca88659b3103783e8852cb7265823e (diff)
Make hard link detection scale better
Instead of doing a linear search, which scales quadratically, use a red-black tree with inode numbers as key for finding hard links. To reiterate: we can't just use a flat table, because the SquashFS file is potentially untrusted and the inode numbers can be anything, no matter what the super block says. Signed-off-by: David Oberhollenzer <david.oberhollenzer@sigma-star.at>
Diffstat (limited to 'lib/common/hardlink.c')
-rw-r--r--lib/common/hardlink.c156
1 files changed, 81 insertions, 75 deletions
diff --git a/lib/common/hardlink.c b/lib/common/hardlink.c
index ac01938..63621a4 100644
--- a/lib/common/hardlink.c
+++ b/lib/common/hardlink.c
@@ -5,101 +5,107 @@
* Copyright (C) 2019 David Oberhollenzer <goliath@infraroot.at>
*/
#include "common.h"
+#include "rbtree.h"
#include <stdlib.h>
#include <assert.h>
#include <stdio.h>
-static size_t count_nodes(const sqfs_tree_node_t *n)
+static int map_nodes(rbtree_t *inumtree, sqfs_hard_link_t **out,
+ const sqfs_tree_node_t *n)
{
- size_t count = n->children == NULL ? 1 : 0;
+ const sqfs_tree_node_t *target;
+ sqfs_hard_link_t *lnk;
+ rbtree_node_t *tn;
+ sqfs_u32 idx;
+ int ret;
+
+ /* XXX: refuse to generate hard links to directories */
+ if (n->children != NULL) {
+ for (n = n->children; n != NULL; n = n->next) {
+ if (map_nodes(inumtree, out, n))
+ return -1;
+ }
+ return 0;
+ }
+
+ if (!is_filename_sane((const char *)n->name, false))
+ return 0;
+
+ idx = n->inode->base.inode_number;
+ tn = rbtree_lookup(inumtree, &idx);
+
+ if (tn == NULL) {
+ ret = rbtree_insert(inumtree, &idx, &n);
+ if (ret != 0)
+ goto fail_insert;
+ return 0;
+ }
- for (n = n->children; n != NULL; n = n->next)
- count += count_nodes(n);
+ target = *((const sqfs_tree_node_t **)rbtree_node_value(tn));
- return count;
+ lnk = calloc(1, sizeof(*lnk));
+ if (lnk == NULL)
+ goto fail_oom;
+
+ lnk->inode_number = idx;
+ lnk->target = sqfs_tree_node_get_path(target);
+ if (lnk->target == NULL)
+ goto fail_oom;
+
+ if (canonicalize_name(lnk->target) == 0) {
+ lnk->next = (*out);
+ (*out) = lnk;
+ } else {
+ free(lnk->target);
+ free(lnk);
+ }
+ return 0;
+fail_oom:
+ fputs("detecting hard links in file system tree: out of memory\n",
+ stderr);
+ free(lnk);
+ return -1;
+fail_insert:
+ sqfs_perror(NULL, "detecting hard links in file system tree", ret);
+ return -1;
}
-static size_t map_nodes(const sqfs_tree_node_t **list,
- const sqfs_tree_node_t *n,
- size_t idx)
+static int compare_inum(const void *lhs, const void *rhs)
{
- if (n->children == NULL)
- list[idx++] = n;
+ sqfs_u32 l = *((sqfs_u32 *)lhs), r = *((sqfs_u32 *)rhs);
- for (n = n->children; n != NULL; n = n->next)
- idx = map_nodes(list, n, idx);
-
- return idx;
+ return l < r ? -1 : (l > r ? 1 : 0);
}
int sqfs_tree_find_hard_links(const sqfs_tree_node_t *root,
sqfs_hard_link_t **out)
{
- sqfs_hard_link_t *hardlinks = NULL, *lnk = NULL;
- const sqfs_tree_node_t **list = NULL;
- size_t i, j, count;
- const char *name;
- bool is_dup;
-
- count = count_nodes(root);
- list = malloc(sizeof(list[0]) * count);
- if (list == NULL)
- goto fail;
-
- map_nodes(list, root, 0);
-
- for (i = 0; i < count; ++i) {
- name = (const char *)list[i]->name;
- if (!is_filename_sane(name, false))
- continue;
-
- is_dup = false;
-
- for (j = 0; j < i; ++j) {
- if (list[j]->inode->base.inode_number ==
- list[i]->inode->base.inode_number) {
- name = (const char *)list[j]->name;
- if (!is_filename_sane(name, false))
- continue;
-
- is_dup = true;
- break;
- }
- }
+ sqfs_hard_link_t *lnk = NULL;
+ rbtree_t inumtree;
+ int ret;
+
+ ret = rbtree_init(&inumtree, sizeof(sqfs_u32),
+ sizeof(sqfs_tree_node_t *),
+ compare_inum);
+ if (ret != 0) {
+ sqfs_perror(NULL,
+ "detecting hard links in file system tree", ret);
+ return -1;
+ }
- if (is_dup) {
- lnk = calloc(1, sizeof(*lnk));
- if (lnk == NULL)
- goto fail;
-
- lnk->inode_number = list[j]->inode->base.inode_number;
- lnk->target = sqfs_tree_node_get_path(list[j]);
- if (lnk->target == NULL)
- goto fail;
-
- if (canonicalize_name(lnk->target) == 0) {
- lnk->next = hardlinks;
- hardlinks = lnk;
- } else {
- free(lnk->target);
- free(lnk);
- }
+ ret = map_nodes(&inumtree, out, root);
+ rbtree_cleanup(&inumtree);
+
+ if (ret != 0) {
+ while ((*out) != NULL) {
+ lnk = (*out);
+ (*out) = lnk->next;
+ free(lnk->target);
+ free(lnk);
}
+ return -1;
}
- *out = hardlinks;
- free(list);
return 0;
-fail:
- perror("detecting hard links in file system tree");
- free(lnk);
- while (hardlinks != NULL) {
- lnk = hardlinks;
- hardlinks = hardlinks->next;
- free(lnk->target);
- free(lnk);
- }
- free(list);
- return -1;
}