From f5e46e0444197deee2eca93d36a8ebeb1ffd7a17 Mon Sep 17 00:00:00 2001 From: David Oberhollenzer Date: Sat, 22 Jul 2023 14:06:47 +0200 Subject: Add a hard link detecting/filtering directory iterator The reason this is implemented separately, instead of roling it into the recursive iterator, is so that we can do additional filtering in between. For instance, we can rewrite the path and the hard link path will match up, or if we remove nodes from the hierarchy, we won't end up with a hard link pointing outside. Signed-off-by: David Oberhollenzer --- include/sqfs/io.h | 20 ++++ lib/sqfs/Makemodule.am | 8 +- lib/sqfs/src/io/dir_hl.c | 245 +++++++++++++++++++++++++++++++++++++++++++++++ lib/sqfs/test/hl_dir.c | 215 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 486 insertions(+), 2 deletions(-) create mode 100644 lib/sqfs/src/io/dir_hl.c create mode 100644 lib/sqfs/test/hl_dir.c diff --git a/include/sqfs/io.h b/include/sqfs/io.h index bf104f9..91343d6 100644 --- a/include/sqfs/io.h +++ b/include/sqfs/io.h @@ -653,6 +653,26 @@ SQFS_API int sqfs_dir_iterator_create_native(sqfs_dir_iterator_t **out, SQFS_API int sqfs_dir_iterator_create_recursive(sqfs_dir_iterator_t **out, sqfs_dir_iterator_t *base); +/** + * @brief Construct a directory iterator that detects hard links + * + * @memberof sqfs_dir_iterator_t + * + * This creates a directory iterator implementation that returns entries from + * a wrapped iterator, but detects and filters hard links using the device and + * inode numbers from the entries. If an entry is observed with the same values + * than a previous entry, the entry is changed into a link with + * the @ref SQFS_DIR_ENTRY_FLAG_HARD_LINK flags set and asking for the link + * target returns the previously seen entry name. + * + * @param out Returns a pointer to the hard link filter iterator + * @param base The directory iterator to wrap internally + * + * @return Zero on success, an @ref SQFS_ERROR code on failure. + */ +SQFS_API int sqfs_hard_link_filter_create(sqfs_dir_iterator_t **out, + sqfs_dir_iterator_t *base); + #ifdef __cplusplus } #endif diff --git a/lib/sqfs/Makemodule.am b/lib/sqfs/Makemodule.am index 053dab8..3d61cd5 100644 --- a/lib/sqfs/Makemodule.am +++ b/lib/sqfs/Makemodule.am @@ -35,7 +35,7 @@ libsquashfs_la_SOURCES = $(LIBSQFS_HEARDS) lib/sqfs/src/id_table.c \ lib/sqfs/src/misc.c lib/sqfs/src/io/istream.c \ lib/sqfs/src/io/ostream.c lib/sqfs/src/io/file.c \ lib/sqfs/src/io/stream_api.c lib/sqfs/src/dir_entry.c \ - lib/sqfs/src/io/dir_rec.c + lib/sqfs/src/io/dir_rec.c lib/sqfs/src/io/dir_hl.c libsquashfs_la_CPPFLAGS = $(AM_CPPFLAGS) libsquashfs_la_LDFLAGS = $(AM_LDFLAGS) -version-info $(LIBSQUASHFS_SO_VERSION) libsquashfs_la_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) $(ZLIB_CFLAGS) @@ -140,9 +140,13 @@ test_stream_splice_LDADD = libsquashfs.la libio.a libutil.a libcompat.a test_rec_dir_SOURCES = lib/sqfs/test/rec_dir.c test_rec_dir_LDADD = libsquashfs.la libio.a libutil.a libcompat.a +test_hl_dir_SOURCES = lib/sqfs/test/hl_dir.c +test_hl_dir_LDADD = libsquashfs.la libio.a libutil.a libcompat.a + LIBSQFS_TESTS = \ test_abi test_xattr test_table test_xattr_writer test_get_node_path \ - test_istream_read test_istream_skip test_stream_splice test_rec_dir + test_istream_read test_istream_skip test_stream_splice test_rec_dir \ + test_hl_dir noinst_PROGRAMS += xattr_benchmark check_PROGRAMS += $(LIBSQFS_TESTS) diff --git a/lib/sqfs/src/io/dir_hl.c b/lib/sqfs/src/io/dir_hl.c new file mode 100644 index 0000000..08efaaf --- /dev/null +++ b/lib/sqfs/src/io/dir_hl.c @@ -0,0 +1,245 @@ +/* SPDX-License-Identifier: LGPL-3.0-or-later */ +/* + * dir_hl.c + * + * Copyright (C) 2023 David Oberhollenzer + */ +#define SQFS_BUILDING_DLL +#include "config.h" + +#include "util/util.h" +#include "util/rbtree.h" +#include "compat.h" + +#include "sqfs/dir_entry.h" +#include "sqfs/error.h" +#include "sqfs/inode.h" +#include "sqfs/io.h" + +#include +#include + +typedef struct { + sqfs_u64 dev; + sqfs_u64 inum; +} inumtree_key_t; + +typedef struct { + sqfs_dir_iterator_t base; + + int state; + const char *link_target; + sqfs_dir_iterator_t *src; + rbtree_t inumtree; +} hl_iterator_t; + +static int compare_inum(const void *ctx, const void *lhs, const void *rhs) +{ + const inumtree_key_t *l = (const inumtree_key_t *)lhs; + const inumtree_key_t *r = (const inumtree_key_t *)rhs; + (void)ctx; + + if (l->dev != r->dev) + return l->dev < r->dev ? -1 : 1; + + return l->inum < r->inum ? -1 : (l->inum > r->inum ? 1 : 0); +} + +static const char *detect_hard_link(const hl_iterator_t *it, + const sqfs_dir_entry_t *ent) +{ + inumtree_key_t key = { ent->dev, ent->inode }; + rbtree_node_t *tn; + + if (S_ISDIR(ent->mode)) + return NULL; + + tn = rbtree_lookup(&(it->inumtree), &key); + if (tn == NULL) + return NULL; + + return *((const char **)rbtree_node_value(tn)); +} + +static int store_hard_link(hl_iterator_t *it, + const sqfs_dir_entry_t *ent) +{ + inumtree_key_t key = { ent->dev, ent->inode }; + char *target; + int ret; + + if (S_ISDIR(ent->mode) || (ent->flags & SQFS_DIR_ENTRY_FLAG_HARD_LINK)) + return 0; + + target = strdup(ent->name); + if (target == NULL) + return SQFS_ERROR_ALLOC; + + ret = rbtree_insert(&(it->inumtree), &key, &target); + if (ret != 0) + free(target); + + return ret; +} + +static void remove_links(rbtree_node_t *n) +{ + if (n != NULL) { + char **lnk = rbtree_node_value(n); + free(*lnk); + *lnk = NULL; + + remove_links(n->left); + remove_links(n->right); + } +} + +/*****************************************************************************/ + +static void destroy(sqfs_object_t *obj) +{ + hl_iterator_t *it = (hl_iterator_t *)obj; + + remove_links(it->inumtree.root); + rbtree_cleanup(&it->inumtree); + sqfs_drop(it->src); + free(it); +} + +static int next(sqfs_dir_iterator_t *base, sqfs_dir_entry_t **out) +{ + hl_iterator_t *it = (hl_iterator_t *)base; + int ret; + + if (it->state != 0) { + *out = NULL; + return it->state; + } + + ret = it->src->next(it->src, out); + if (ret != 0) { + *out = NULL; + it->link_target = NULL; + it->state = ret; + return ret; + } + + it->link_target = detect_hard_link(it, *out); + + if (it->link_target == NULL) { + ret = store_hard_link(it, *out); + if (ret != 0) { + it->state = ret; + sqfs_free(*out); + *out = NULL; + } + } else { + (*out)->mode = SQFS_INODE_MODE_LNK | 0777; + (*out)->flags |= SQFS_DIR_ENTRY_FLAG_HARD_LINK; + } + + return ret; +} + +static int read_link(sqfs_dir_iterator_t *base, char **out) +{ + hl_iterator_t *it = (hl_iterator_t *)base; + + if (it->link_target != NULL) { + *out = strdup(it->link_target); + if (*out == NULL) + return SQFS_ERROR_ALLOC; + return 0; + } + + if (it->state != 0) + return SQFS_ERROR_NO_ENTRY; + + return it->src->read_link(it->src, out); +} + +static int open_subdir(sqfs_dir_iterator_t *base, sqfs_dir_iterator_t **out) +{ + hl_iterator_t *it = (hl_iterator_t *)base; + + if (it->link_target != NULL) { + *out = NULL; + return SQFS_ERROR_NOT_DIR; + } + + if (it->state != 0) + return SQFS_ERROR_NO_ENTRY; + + return it->src->open_subdir(it->src, out); +} + +static void ignore_subdir(sqfs_dir_iterator_t *base) +{ + hl_iterator_t *it = (hl_iterator_t *)base; + + if (it->link_target == NULL && it->state == 0) + it->src->ignore_subdir(it->src); +} + +static int open_file_ro(sqfs_dir_iterator_t *base, sqfs_istream_t **out) +{ + hl_iterator_t *it = (hl_iterator_t *)base; + + if (it->link_target != NULL) { + *out = NULL; + return SQFS_ERROR_NOT_FILE; + } + + if (it->state != 0) + return SQFS_ERROR_NO_ENTRY; + + return it->src->open_file_ro(it->src, out); +} + +static int read_xattr(sqfs_dir_iterator_t *base, sqfs_xattr_t **out) +{ + hl_iterator_t *it = (hl_iterator_t *)base; + + if (it->link_target != NULL) { + *out = NULL; + return 0; + } + + if (it->state != 0) + return SQFS_ERROR_NO_ENTRY; + + return it->src->read_xattr(it->src, out); +} + +int sqfs_hard_link_filter_create(sqfs_dir_iterator_t **out, + sqfs_dir_iterator_t *base) +{ + hl_iterator_t *it; + int ret; + + *out = NULL; + + it = calloc(1, sizeof(*it)); + if (it == NULL) + return SQFS_ERROR_ALLOC; + + ret = rbtree_init(&it->inumtree, sizeof(inumtree_key_t), + sizeof(char *), compare_inum); + if (ret != 0) { + free(it); + return ret; + } + + sqfs_object_init(it, destroy, NULL); + ((sqfs_dir_iterator_t *)it)->next = next; + ((sqfs_dir_iterator_t *)it)->read_link = read_link; + ((sqfs_dir_iterator_t *)it)->open_subdir = open_subdir; + ((sqfs_dir_iterator_t *)it)->ignore_subdir = ignore_subdir; + ((sqfs_dir_iterator_t *)it)->open_file_ro = open_file_ro; + ((sqfs_dir_iterator_t *)it)->read_xattr = read_xattr; + + it->src = sqfs_grab(base); + + *out = (sqfs_dir_iterator_t *)it; + return 0; +} diff --git a/lib/sqfs/test/hl_dir.c b/lib/sqfs/test/hl_dir.c new file mode 100644 index 0000000..4ff00e8 --- /dev/null +++ b/lib/sqfs/test/hl_dir.c @@ -0,0 +1,215 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ +/* + * hl_dir.c + * + * Copyright (C) 2023 David Oberhollenzer + */ +#include "config.h" + +#include "util/test.h" +#include "sqfs/dir_entry.h" +#include "sqfs/inode.h" +#include "sqfs/io.h" +#include "compat.h" + +typedef struct { + sqfs_dir_iterator_t obj; + size_t idx; +} dummy_it_t; + +static const struct { + const char *name; + int dev; + int inum; +} entries[] = { + { "foo", 1, 1 }, + { "bar", 1, 2 }, + { "baz", 1, 3 }, + { "blub", 1, 2 }, + { "a", 2, 2 }, + { "b", 2, 1 }, + { "c", 2, 2 }, +}; + +static int dummy_read_link(sqfs_dir_iterator_t *it, char **out) +{ + (void)it; (void)out; + *out = NULL; + return 0; +} + +static void dummy_ignore_subdir(sqfs_dir_iterator_t *it) +{ + (void)it; + TEST_ASSERT(0); +} + +static int dummy_open_file_ro(sqfs_dir_iterator_t *it, sqfs_istream_t **out) +{ + (void)it; (void)out; + TEST_ASSERT(0); + return 0; +} + +static int dummy_read_xattr(sqfs_dir_iterator_t *it, sqfs_xattr_t **out) +{ + (void)it; (void)out; + TEST_ASSERT(0); + return 0; +} + +static int dummy_open_subdir(sqfs_dir_iterator_t *base, + sqfs_dir_iterator_t **out) +{ + (void)base; (void)out; + TEST_ASSERT(0); + return 0; +} + +static int dummy_next(sqfs_dir_iterator_t *base, sqfs_dir_entry_t **out) +{ + dummy_it_t *it = (dummy_it_t *)base; + const char *name; + int inum, dev; + + if (it->idx >= (sizeof(entries) / sizeof(entries[0]))) + return 1; + + name = entries[it->idx].name; + inum = entries[it->idx].inum; + dev = entries[it->idx].dev; + it->idx += 1; + + *out = sqfs_dir_entry_create(name, SQFS_INODE_MODE_REG | 0644, 0); + TEST_NOT_NULL(*out); + (*out)->inode = inum; + (*out)->dev = dev; + return 0; +} + +static void dummy_destroy(sqfs_object_t *obj) +{ + free(obj); +} + +static sqfs_dir_iterator_t *mkdummyit(void) +{ + dummy_it_t *it = calloc(1, sizeof(*it)); + TEST_NOT_NULL(it); + + sqfs_object_init(it, dummy_destroy, NULL); + ((sqfs_dir_iterator_t *)it)->read_link = dummy_read_link; + ((sqfs_dir_iterator_t *)it)->ignore_subdir = dummy_ignore_subdir; + ((sqfs_dir_iterator_t *)it)->open_file_ro = dummy_open_file_ro; + ((sqfs_dir_iterator_t *)it)->read_xattr = dummy_read_xattr; + ((sqfs_dir_iterator_t *)it)->next = dummy_next; + ((sqfs_dir_iterator_t *)it)->open_subdir = dummy_open_subdir; + return (sqfs_dir_iterator_t *)it; +} + +int main(int argc, char **argv) +{ + sqfs_dir_iterator_t *base, *it; + sqfs_dir_entry_t *ent; + char *target; + int ret; + (void)argc; (void)argv; + + base = mkdummyit(); + TEST_NOT_NULL(base); + + ret = sqfs_hard_link_filter_create(&it, base); + TEST_EQUAL_I(ret, 0); + TEST_NOT_NULL(it); + TEST_EQUAL_UI(((sqfs_object_t *)base)->refcount, 2); + + ret = it->next(it, &ent); + TEST_EQUAL_I(ret, 0); + TEST_NOT_NULL(&ent); + TEST_STR_EQUAL(ent->name, "foo"); + TEST_ASSERT(S_ISREG(ent->mode)); + TEST_EQUAL_UI(ent->flags, 0); + ret = it->read_link(it, &target); + TEST_EQUAL_I(ret, 0); + TEST_NULL(target); + free(ent); + + ret = it->next(it, &ent); + TEST_EQUAL_I(ret, 0); + TEST_NOT_NULL(&ent); + TEST_STR_EQUAL(ent->name, "bar"); + TEST_ASSERT(S_ISREG(ent->mode)); + TEST_EQUAL_UI(ent->flags, 0); + ret = it->read_link(it, &target); + TEST_EQUAL_I(ret, 0); + TEST_NULL(target); + free(ent); + + ret = it->next(it, &ent); + TEST_EQUAL_I(ret, 0); + TEST_NOT_NULL(&ent); + TEST_STR_EQUAL(ent->name, "baz"); + TEST_ASSERT(S_ISREG(ent->mode)); + TEST_EQUAL_UI(ent->flags, 0); + ret = it->read_link(it, &target); + TEST_EQUAL_I(ret, 0); + TEST_NULL(target); + free(ent); + + ret = it->next(it, &ent); + TEST_EQUAL_I(ret, 0); + TEST_NOT_NULL(&ent); + TEST_STR_EQUAL(ent->name, "blub"); + TEST_EQUAL_UI(ent->flags, SQFS_DIR_ENTRY_FLAG_HARD_LINK); + TEST_ASSERT(S_ISLNK(ent->mode)); + ret = it->read_link(it, &target); + TEST_EQUAL_I(ret, 0); + TEST_NOT_NULL(target); + TEST_STR_EQUAL(target, "bar"); + free(target); + free(ent); + + ret = it->next(it, &ent); + TEST_EQUAL_I(ret, 0); + TEST_NOT_NULL(&ent); + TEST_STR_EQUAL(ent->name, "a"); + TEST_ASSERT(S_ISREG(ent->mode)); + TEST_EQUAL_UI(ent->flags, 0); + ret = it->read_link(it, &target); + TEST_EQUAL_I(ret, 0); + TEST_NULL(target); + free(ent); + + ret = it->next(it, &ent); + TEST_EQUAL_I(ret, 0); + TEST_NOT_NULL(&ent); + TEST_STR_EQUAL(ent->name, "b"); + TEST_ASSERT(S_ISREG(ent->mode)); + TEST_EQUAL_UI(ent->flags, 0); + ret = it->read_link(it, &target); + TEST_EQUAL_I(ret, 0); + TEST_NULL(target); + free(ent); + + ret = it->next(it, &ent); + TEST_EQUAL_I(ret, 0); + TEST_NOT_NULL(&ent); + TEST_STR_EQUAL(ent->name, "c"); + TEST_EQUAL_UI(ent->flags, SQFS_DIR_ENTRY_FLAG_HARD_LINK); + TEST_ASSERT(S_ISLNK(ent->mode)); + ret = it->read_link(it, &target); + TEST_EQUAL_I(ret, 0); + TEST_NOT_NULL(target); + TEST_STR_EQUAL(target, "a"); + free(target); + free(ent); + + ret = it->next(it, &ent); + TEST_EQUAL_I(ret, 1); + TEST_NULL(ent); + it = sqfs_drop(it); + + TEST_EQUAL_UI(((sqfs_object_t *)base)->refcount, 1); + base = sqfs_drop(base); + return EXIT_SUCCESS; +} -- cgit v1.2.3