From 1b832b6dfb8d9da2b94f07ccc95c03614b378786 Mon Sep 17 00:00:00 2001 From: David Oberhollenzer Date: Fri, 21 Apr 2023 23:02:32 +0200 Subject: libutil: Add a stacked, recursive directory tree iterator The concept is simple: Use the existing, platform dependent iterator to walk a directory. If a directory entry is encountered, recurse into it using the open_subdir handler, reconstruct the full path for any entries discovered using the directory stack. An additional function is added to skip a sub-hierarchy. Signed-off-by: David Oberhollenzer --- lib/util/Makemodule.am | 13 +- lib/util/src/dir_tree_iterator.c | 215 ++++++++++++++++++++++++++++++++ lib/util/test/dir_iterator.c | 36 +++--- lib/util/test/dir_tree_iterator.c | 197 +++++++++++++++++++++++++++++ lib/util/test/testdir/dirb/dirx/file_x0 | 0 lib/util/test/testdir/dirb/dirx/file_x1 | 0 lib/util/test/testdir/dirb/dirx/file_x2 | 0 7 files changed, 444 insertions(+), 17 deletions(-) create mode 100644 lib/util/src/dir_tree_iterator.c create mode 100644 lib/util/test/dir_tree_iterator.c create mode 100644 lib/util/test/testdir/dirb/dirx/file_x0 create mode 100644 lib/util/test/testdir/dirb/dirx/file_x1 create mode 100644 lib/util/test/testdir/dirb/dirx/file_x2 (limited to 'lib/util') diff --git a/lib/util/Makemodule.am b/lib/util/Makemodule.am index 04c5989..26056ea 100644 --- a/lib/util/Makemodule.am +++ b/lib/util/Makemodule.am @@ -2,14 +2,15 @@ libutil_a_SOURCES = include/util/util.h include/util/str_table.h \ include/util/hash_table.h include/util/test.h include/util/rbtree.h \ include/util/array.h include/util/threadpool.h \ include/util/w32threadwrap.h include/util/mempool.h \ - include/util/dir_iterator.h \ + include/util/dir_iterator.h include/util/dir_tree_iterator.h \ lib/util/src/str_table.c lib/util/src/alloc.c lib/util/src/rbtree.c \ lib/util/src/array.c lib/util/src/xxhash.c lib/util/src/hash_table.c \ lib/util/src/fast_urem_by_const.h lib/util/src/threadpool_serial.c \ lib/util/src/is_memory_zero.c lib/util/src/mkdir_p.c \ lib/util/src/canonicalize_name.c lib/util/src/filename_sane.c \ lib/util/src/source_date_epoch.c lib/util/src/file_cmp.c \ - lib/util/src/hex_decode.c lib/util/src/base64_decode.c + lib/util/src/hex_decode.c lib/util/src/base64_decode.c \ + lib/util/src/dir_tree_iterator.c libutil_a_CFLAGS = $(AM_CFLAGS) libutil_a_CPPFLAGS = $(AM_CPPFLAGS) @@ -85,10 +86,16 @@ test_dir_iterator_LDADD = libutil.a libcompat.a test_dir_iterator_CPPFLAGS = $(AM_CPPFLAGS) test_dir_iterator_CPPFLAGS += -DTESTPATH=$(top_srcdir)/lib/util/test/testdir +test_dir_tree_iterator_SOURCES = lib/util/test/dir_tree_iterator.c +test_dir_tree_iterator_LDADD = libutil.a libcompat.a +test_dir_tree_iterator_CPPFLAGS = $(AM_CPPFLAGS) +test_dir_tree_iterator_CPPFLAGS += -DTESTPATH=$(top_srcdir)/lib/util/test/testdir + LIBUTIL_TESTS = \ test_str_table test_rbtree test_xxhash test_threadpool test_ismemzero \ test_canonicalize_name test_filename_sane test_filename_sane_w32 \ - test_sdate_epoch test_hex_decode test_base64_decode test_dir_iterator + test_sdate_epoch test_hex_decode test_base64_decode test_dir_iterator \ + test_dir_tree_iterator check_PROGRAMS += $(LIBUTIL_TESTS) TESTS += $(LIBUTIL_TESTS) diff --git a/lib/util/src/dir_tree_iterator.c b/lib/util/src/dir_tree_iterator.c new file mode 100644 index 0000000..88b6840 --- /dev/null +++ b/lib/util/src/dir_tree_iterator.c @@ -0,0 +1,215 @@ +/* SPDX-License-Identifier: LGPL-3.0-or-later */ +/* + * dir_tree_iterator.c + * + * Copyright (C) 2023 David Oberhollenzer + */ +#include "config.h" +#include "util/dir_tree_iterator.h" +#include "util/util.h" +#include "sqfs/error.h" + +#include +#include + +typedef struct dir_stack_t { + struct dir_stack_t *next; + dir_iterator_t *dir; + char name[]; +} dir_stack_t; + +typedef struct { + dir_iterator_t base; + + int state; + dir_stack_t *top; +} dir_tree_iterator_t; + +static void pop(dir_tree_iterator_t *it) +{ + if (it->top != NULL) { + dir_stack_t *ent = it->top; + it->top = it->top->next; + + sqfs_drop(ent->dir); + free(ent); + } +} + +static int push(dir_tree_iterator_t *it, const char *name, dir_iterator_t *dir) +{ + dir_stack_t *ent = alloc_flex(sizeof(*ent), 1, strlen(name) + 1); + + if (ent == NULL) + return SQFS_ERROR_ALLOC; + + strcpy(ent->name, name); + ent->dir = sqfs_grab(dir); + ent->next = it->top; + it->top = ent; + return 0; +} + +/*****************************************************************************/ + +static void destroy(sqfs_object_t *obj) +{ + dir_tree_iterator_t *it = (dir_tree_iterator_t *)obj; + + while (it->top != NULL) + pop(it); + + free(it); +} + +static int next(dir_iterator_t *base, dir_entry_t **out) +{ + dir_tree_iterator_t *it = (dir_tree_iterator_t *)base; + dir_iterator_t *sub = NULL; + dir_entry_t *ent = NULL; + dir_stack_t *sit = NULL; + size_t plen = 0; + int ret; + + *out = NULL; + + if (it->state != 0) + return it->state; + + for (;;) { + if (it->top == NULL) { + ret = 1; + goto fail; + } + + ret = it->top->dir->next(it->top->dir, &ent); + if (ret < 0) + goto fail; + + if (ret > 0) { + pop(it); + continue; + } + + if (!strcmp(ent->name, ".") || !strcmp(ent->name, "..")) { + free(ent); + ent = NULL; + continue; + } + + break; + } + + for (sit = it->top; sit != NULL; sit = sit->next) { + size_t len = strlen(sit->name); + if (len > 0) + plen += len + 1; + } + + if (plen > 0) { + size_t slen = strlen(ent->name) + 1; + void *new = realloc(ent, sizeof(*ent) + plen + slen); + char *dst; + + if (new == NULL) { + ret = SQFS_ERROR_ALLOC; + goto fail; + } + + ent = new; + memmove(ent->name + plen, ent->name, slen); + dst = ent->name + plen; + + for (sit = it->top; sit != NULL; sit = sit->next) { + size_t len = strlen(sit->name); + if (len > 0) { + *(--dst) = '/'; + dst -= len; + memcpy(dst, sit->name, len); + } + } + } + + if (S_ISDIR(ent->mode)) { + ret = it->top->dir->open_subdir(it->top->dir, &sub); + if (ret != 0) + goto fail; + + ret = push(it, ent->name + plen, sub); + sqfs_drop(sub); + if (ret != 0) + goto fail; + } + + *out = ent; + return it->state; +fail: + free(ent); + it->state = ret; + return it->state; +} + +static int read_link(dir_iterator_t *base, char **out) +{ + dir_tree_iterator_t *it = (dir_tree_iterator_t *)base; + + if (it->top == NULL) { + *out = NULL; + return SQFS_ERROR_NO_ENTRY; + } + + return it->top->dir->read_link(it->top->dir, out); +} + +static int open_subdir(dir_iterator_t *base, dir_iterator_t **out) +{ + dir_tree_iterator_t *it = (dir_tree_iterator_t *)base; + + if (it->top == NULL) { + *out = NULL; + return SQFS_ERROR_NO_ENTRY; + } + + return it->top->dir->open_subdir(it->top->dir, out); +} + +dir_iterator_t *dir_tree_iterator_create(const char *path) +{ + dir_tree_iterator_t *it = calloc(1, sizeof(*it)); + dir_iterator_t *dir; + int ret; + + if (it == NULL) { + perror(path); + return NULL; + } + + dir = dir_iterator_create(path); + if (dir == NULL) + goto fail; + + ret = push(it, "", dir); + dir = sqfs_drop(dir); + if (ret != 0) { + fprintf(stderr, "%s: out of memory\n", path); + goto fail; + } + + sqfs_object_init(it, destroy, NULL); + ((dir_iterator_t *)it)->dev = it->top->dir->dev; + ((dir_iterator_t *)it)->next = next; + ((dir_iterator_t *)it)->read_link = read_link; + ((dir_iterator_t *)it)->open_subdir = open_subdir; + + return (dir_iterator_t *)it; +fail: + free(it); + return NULL; +} + +void dir_tree_iterator_skip(dir_iterator_t *base) +{ + dir_tree_iterator_t *it = (dir_tree_iterator_t *)base; + + pop(it); +} diff --git a/lib/util/test/dir_iterator.c b/lib/util/test/dir_iterator.c index d209afd..5ebdaf7 100644 --- a/lib/util/test/dir_iterator.c +++ b/lib/util/test/dir_iterator.c @@ -144,25 +144,31 @@ int main(int argc, char **argv) TEST_EQUAL_I(ret, 0); ret = dir->next(dir, &ent[5]); - TEST_NULL(ent[5]); + TEST_NOT_NULL(ent[5]); + TEST_EQUAL_I(ret, 0); + + ret = dir->next(dir, &dent); + TEST_NULL(dent); TEST_ASSERT(ret > 0); dir = sqfs_drop(dir); - qsort(ent, 5, sizeof(ent[0]), compare_entries); + qsort(ent, 6, sizeof(ent[0]), compare_entries); TEST_STR_EQUAL(ent[0]->name, "."); TEST_ASSERT(S_ISDIR(ent[0]->mode)); TEST_STR_EQUAL(ent[1]->name, ".."); TEST_ASSERT(S_ISDIR(ent[1]->mode)); - TEST_STR_EQUAL(ent[2]->name, "file_b0"); - TEST_ASSERT(S_ISREG(ent[2]->mode)); - TEST_STR_EQUAL(ent[3]->name, "file_b1"); + TEST_STR_EQUAL(ent[2]->name, "dirx"); + TEST_ASSERT(S_ISDIR(ent[2]->mode)); + TEST_STR_EQUAL(ent[3]->name, "file_b0"); TEST_ASSERT(S_ISREG(ent[3]->mode)); - TEST_STR_EQUAL(ent[4]->name, "file_b2"); + TEST_STR_EQUAL(ent[4]->name, "file_b1"); TEST_ASSERT(S_ISREG(ent[4]->mode)); + TEST_STR_EQUAL(ent[5]->name, "file_b2"); + TEST_ASSERT(S_ISREG(ent[5]->mode)); - for (i = 0; i < 5; ++i) + for (i = 0; i < 6; ++i) free(ent[i]); /* scan first sub hierarchy */ @@ -288,7 +294,7 @@ int main(int argc, char **argv) free(ent[i]); /* sub iterator b */ - for (i = 0; i < 5; ++i) { + for (i = 0; i < 6; ++i) { ret = subb->next(subb, &ent[i]); TEST_NOT_NULL(ent[0]); TEST_EQUAL_I(ret, 0); @@ -305,20 +311,22 @@ int main(int argc, char **argv) TEST_ASSERT(ret > 0); subb = sqfs_drop(subb); - qsort(ent, 5, sizeof(ent[0]), compare_entries); + qsort(ent, 6, sizeof(ent[0]), compare_entries); TEST_STR_EQUAL(ent[0]->name, "."); TEST_ASSERT(S_ISDIR(ent[0]->mode)); TEST_STR_EQUAL(ent[1]->name, ".."); TEST_ASSERT(S_ISDIR(ent[1]->mode)); - TEST_STR_EQUAL(ent[2]->name, "file_b0"); - TEST_ASSERT(S_ISREG(ent[2]->mode)); - TEST_STR_EQUAL(ent[3]->name, "file_b1"); + TEST_STR_EQUAL(ent[2]->name, "dirx"); + TEST_ASSERT(S_ISDIR(ent[2]->mode)); + TEST_STR_EQUAL(ent[3]->name, "file_b0"); TEST_ASSERT(S_ISREG(ent[3]->mode)); - TEST_STR_EQUAL(ent[4]->name, "file_b2"); + TEST_STR_EQUAL(ent[4]->name, "file_b1"); TEST_ASSERT(S_ISREG(ent[4]->mode)); + TEST_STR_EQUAL(ent[5]->name, "file_b2"); + TEST_ASSERT(S_ISREG(ent[5]->mode)); - for (i = 0; i < 5; ++i) + for (i = 0; i < 6; ++i) free(ent[i]); /* sub iterator c */ diff --git a/lib/util/test/dir_tree_iterator.c b/lib/util/test/dir_tree_iterator.c new file mode 100644 index 0000000..e47efed --- /dev/null +++ b/lib/util/test/dir_tree_iterator.c @@ -0,0 +1,197 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ +/* + * dir_tree_iterator.c + * + * Copyright (C) 2019 David Oberhollenzer + */ +#include "config.h" + +#include "util/dir_tree_iterator.h" +#include "sqfs/error.h" +#include "util/test.h" +#include "compat.h" + +static int compare_entries(const void *a, const void *b) +{ + const dir_entry_t *const *lhs = a; + const dir_entry_t *const *rhs = b; + + return strcmp((*lhs)->name, (*rhs)->name); +} + +int main(int argc, char **argv) +{ + dir_entry_t *ent[17]; + dir_iterator_t *dir; + size_t i; + int ret; + (void)argc; (void)argv; + + dir = dir_tree_iterator_create(TEST_PATH); + TEST_NOT_NULL(dir); + + for (i = 0; i < 16; ++i) { + ret = dir->next(dir, &ent[i]); + TEST_NOT_NULL(ent[i]); + TEST_EQUAL_I(ret, 0); + printf("READ %s\n", ent[i]->name); + } + + ret = dir->next(dir, &ent[16]); + TEST_NULL(ent[16]); + TEST_ASSERT(ret > 0); + + dir = sqfs_drop(dir); + + qsort(ent, 16, sizeof(ent[0]), compare_entries); + + printf("After sort:\n"); + for (i = 0; i < 16; ++i) + printf("%s\n", ent[i]->name); + + TEST_STR_EQUAL(ent[0]->name, "dira"); + TEST_ASSERT(S_ISDIR(ent[0]->mode)); + TEST_STR_EQUAL(ent[1]->name, "dira/file_a0"); + TEST_ASSERT(S_ISREG(ent[1]->mode)); + TEST_STR_EQUAL(ent[2]->name, "dira/file_a1"); + TEST_ASSERT(S_ISREG(ent[2]->mode)); + TEST_STR_EQUAL(ent[3]->name, "dira/file_a2"); + TEST_ASSERT(S_ISREG(ent[3]->mode)); + TEST_STR_EQUAL(ent[4]->name, "dirb"); + TEST_ASSERT(S_ISDIR(ent[4]->mode)); + TEST_STR_EQUAL(ent[5]->name, "dirb/dirx"); + TEST_ASSERT(S_ISDIR(ent[5]->mode)); + TEST_STR_EQUAL(ent[6]->name, "dirb/dirx/file_x0"); + TEST_ASSERT(S_ISREG(ent[6]->mode)); + TEST_STR_EQUAL(ent[7]->name, "dirb/dirx/file_x1"); + TEST_ASSERT(S_ISREG(ent[7]->mode)); + TEST_STR_EQUAL(ent[8]->name, "dirb/dirx/file_x2"); + TEST_ASSERT(S_ISREG(ent[8]->mode)); + TEST_STR_EQUAL(ent[9]->name, "dirb/file_b0"); + TEST_ASSERT(S_ISREG(ent[9]->mode)); + TEST_STR_EQUAL(ent[10]->name, "dirb/file_b1"); + TEST_ASSERT(S_ISREG(ent[10]->mode)); + TEST_STR_EQUAL(ent[11]->name, "dirb/file_b2"); + TEST_ASSERT(S_ISREG(ent[11]->mode)); + TEST_STR_EQUAL(ent[12]->name, "dirc"); + TEST_ASSERT(S_ISDIR(ent[12]->mode)); + TEST_STR_EQUAL(ent[13]->name, "dirc/file_c0"); + TEST_ASSERT(S_ISREG(ent[13]->mode)); + TEST_STR_EQUAL(ent[14]->name, "dirc/file_c1"); + TEST_ASSERT(S_ISREG(ent[14]->mode)); + TEST_STR_EQUAL(ent[15]->name, "dirc/file_c2"); + TEST_ASSERT(S_ISREG(ent[15]->mode)); + + for (i = 0; i < 16; ++i) + free(ent[i]); + + /* retry with skipping */ + printf("**********\n"); + + dir = dir_tree_iterator_create(TEST_PATH); + TEST_NOT_NULL(dir); + + for (i = 0; i < 13; ++i) { + ret = dir->next(dir, &ent[i]); + TEST_NOT_NULL(ent[i]); + TEST_EQUAL_I(ret, 0); + printf("READ %s\n", ent[i]->name); + + if (!strcmp(ent[i]->name, "dirb/dirx")) + dir_tree_iterator_skip(dir); + } + + ret = dir->next(dir, &ent[13]); + TEST_NULL(ent[13]); + TEST_ASSERT(ret > 0); + + dir = sqfs_drop(dir); + + qsort(ent, 13, sizeof(ent[0]), compare_entries); + + printf("After sort:\n"); + for (i = 0; i < 13; ++i) + printf("%s\n", ent[i]->name); + + TEST_STR_EQUAL(ent[0]->name, "dira"); + TEST_ASSERT(S_ISDIR(ent[0]->mode)); + TEST_STR_EQUAL(ent[1]->name, "dira/file_a0"); + TEST_ASSERT(S_ISREG(ent[1]->mode)); + TEST_STR_EQUAL(ent[2]->name, "dira/file_a1"); + TEST_ASSERT(S_ISREG(ent[2]->mode)); + TEST_STR_EQUAL(ent[3]->name, "dira/file_a2"); + TEST_ASSERT(S_ISREG(ent[3]->mode)); + TEST_STR_EQUAL(ent[4]->name, "dirb"); + TEST_ASSERT(S_ISDIR(ent[4]->mode)); + TEST_STR_EQUAL(ent[5]->name, "dirb/dirx"); + TEST_ASSERT(S_ISDIR(ent[5]->mode)); + TEST_STR_EQUAL(ent[6]->name, "dirb/file_b0"); + TEST_ASSERT(S_ISREG(ent[6]->mode)); + TEST_STR_EQUAL(ent[7]->name, "dirb/file_b1"); + TEST_ASSERT(S_ISREG(ent[7]->mode)); + TEST_STR_EQUAL(ent[8]->name, "dirb/file_b2"); + TEST_ASSERT(S_ISREG(ent[8]->mode)); + TEST_STR_EQUAL(ent[9]->name, "dirc"); + TEST_ASSERT(S_ISDIR(ent[9]->mode)); + TEST_STR_EQUAL(ent[10]->name, "dirc/file_c0"); + TEST_ASSERT(S_ISREG(ent[10]->mode)); + TEST_STR_EQUAL(ent[11]->name, "dirc/file_c1"); + TEST_ASSERT(S_ISREG(ent[11]->mode)); + TEST_STR_EQUAL(ent[12]->name, "dirc/file_c2"); + TEST_ASSERT(S_ISREG(ent[12]->mode)); + + for (i = 0; i < 13; ++i) + free(ent[i]); + + /* retry with skipping */ + printf("**********\n"); + + dir = dir_tree_iterator_create(TEST_PATH); + TEST_NOT_NULL(dir); + + for (i = 0; i < 9; ++i) { + ret = dir->next(dir, &ent[i]); + TEST_NOT_NULL(ent[i]); + TEST_EQUAL_I(ret, 0); + printf("READ %s\n", ent[i]->name); + + if (!strcmp(ent[i]->name, "dirb")) + dir_tree_iterator_skip(dir); + } + + ret = dir->next(dir, &ent[9]); + TEST_NULL(ent[9]); + TEST_ASSERT(ret > 0); + + dir = sqfs_drop(dir); + + qsort(ent, 9, sizeof(ent[0]), compare_entries); + + printf("After sort:\n"); + for (i = 0; i < 9; ++i) + printf("%s\n", ent[i]->name); + + TEST_STR_EQUAL(ent[0]->name, "dira"); + TEST_ASSERT(S_ISDIR(ent[0]->mode)); + TEST_STR_EQUAL(ent[1]->name, "dira/file_a0"); + TEST_ASSERT(S_ISREG(ent[1]->mode)); + TEST_STR_EQUAL(ent[2]->name, "dira/file_a1"); + TEST_ASSERT(S_ISREG(ent[2]->mode)); + TEST_STR_EQUAL(ent[3]->name, "dira/file_a2"); + TEST_ASSERT(S_ISREG(ent[3]->mode)); + TEST_STR_EQUAL(ent[4]->name, "dirb"); + TEST_ASSERT(S_ISDIR(ent[4]->mode)); + TEST_STR_EQUAL(ent[5]->name, "dirc"); + TEST_ASSERT(S_ISDIR(ent[5]->mode)); + TEST_STR_EQUAL(ent[6]->name, "dirc/file_c0"); + TEST_ASSERT(S_ISREG(ent[6]->mode)); + TEST_STR_EQUAL(ent[7]->name, "dirc/file_c1"); + TEST_ASSERT(S_ISREG(ent[7]->mode)); + TEST_STR_EQUAL(ent[8]->name, "dirc/file_c2"); + TEST_ASSERT(S_ISREG(ent[8]->mode)); + + for (i = 0; i < 9; ++i) + free(ent[i]); + + return EXIT_SUCCESS; +} diff --git a/lib/util/test/testdir/dirb/dirx/file_x0 b/lib/util/test/testdir/dirb/dirx/file_x0 new file mode 100644 index 0000000..e69de29 diff --git a/lib/util/test/testdir/dirb/dirx/file_x1 b/lib/util/test/testdir/dirb/dirx/file_x1 new file mode 100644 index 0000000..e69de29 diff --git a/lib/util/test/testdir/dirb/dirx/file_x2 b/lib/util/test/testdir/dirb/dirx/file_x2 new file mode 100644 index 0000000..e69de29 -- cgit v1.2.3