From 6351872732fce77186f401050eee92c7c3aa3461 Mon Sep 17 00:00:00 2001 From: David Oberhollenzer Date: Sat, 20 May 2023 17:04:15 +0200 Subject: libtar: add a dir_iterator_t implementation for tar files The existing istream_t wrapper is mered into this one as well, we can open the files via the iterators open_file_ro function. Unit tests and tar2sqfs are modified accordingly. Signed-off-by: David Oberhollenzer --- lib/tar/src/istream.c | 240 -------------------------------- lib/tar/src/iterator.c | 339 ++++++++++++++++++++++++++++++++++++++++++++++ lib/tar/src/read_header.c | 11 +- 3 files changed, 342 insertions(+), 248 deletions(-) delete mode 100644 lib/tar/src/istream.c create mode 100644 lib/tar/src/iterator.c (limited to 'lib/tar/src') diff --git a/lib/tar/src/istream.c b/lib/tar/src/istream.c deleted file mode 100644 index 80519b1..0000000 --- a/lib/tar/src/istream.c +++ /dev/null @@ -1,240 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ -/* - * istream.c - * - * Copyright (C) 2019 David Oberhollenzer - */ -#include "internal.h" - -#include -#include - -typedef struct { - sqfs_u64 offset; - sqfs_u64 count; -} sparse_ent_t; - -typedef struct { - istream_t base; - - istream_t *parent; - - char *filename; - - sparse_ent_t *sparse; - size_t num_sparse; - - sqfs_u64 record_size; - sqfs_u64 file_size; - sqfs_u64 offset; - - size_t padding; - size_t last_chunk; - bool last_sparse; - - sqfs_u8 buffer[4096]; -} tar_istream_t; - -static bool is_sparse_region(tar_istream_t *tar, sqfs_u64 *count) -{ - size_t i; - - *count = tar->file_size - tar->offset; - if (tar->num_sparse == 0) - return false; - - for (i = 0; i < tar->num_sparse; ++i) { - if (tar->offset >= tar->sparse[i].offset) { - sqfs_u64 diff = tar->offset - tar->sparse[i].offset; - - if (diff < tar->sparse[i].count) { - *count = tar->sparse[i].count - diff; - return false; - } - } - } - - for (i = 0; i < tar->num_sparse; ++i) { - if (tar->offset < tar->sparse[i].offset) { - sqfs_u64 diff = tar->sparse[i].offset - tar->offset; - - if (diff < *count) - *count = diff; - } - } - - return true; -} - -static int precache(istream_t *strm) -{ - tar_istream_t *tar = (tar_istream_t *)strm; - sqfs_u64 diff, avail; - - tar->offset += tar->last_chunk; - - if (!tar->last_sparse) { - tar->parent->buffer_offset += tar->last_chunk; - tar->record_size -= tar->last_chunk; - } - - if (tar->offset >= tar->file_size) { - strm->eof = true; - strm->buffer_used = 0; - strm->buffer = tar->buffer; - if (tar->record_size > 0) - goto fail_rec_sz; - if (istream_skip(tar->parent, tar->padding)) - goto fail; - tar->padding = 0; - return 0; - } - - if (is_sparse_region(tar, &diff)) { - if (diff > sizeof(tar->buffer)) - diff = sizeof(tar->buffer); - - strm->buffer = tar->buffer; - strm->buffer_used = diff; - tar->last_chunk = diff; - tar->last_sparse = true; - - memset(tar->buffer, 0, diff); - } else { - if (diff > tar->record_size) - goto fail_rec_sz; - - avail = tar->parent->buffer_used - tar->parent->buffer_offset; - - if ((diff > avail) && - ((tar->parent->buffer_offset > 0) || avail == 0)) { - if (istream_precache(tar->parent)) - goto fail; - - if (tar->parent->buffer_used == 0 && tar->parent->eof) - goto fail_eof; - - avail = tar->parent->buffer_used; - } - - if (diff > avail) - diff = avail; - - strm->buffer = tar->parent->buffer + tar->parent->buffer_offset; - strm->buffer_used = diff; - tar->last_chunk = diff; - tar->last_sparse = false; - } - - return 0; -fail_rec_sz: - fprintf(stderr, - "%s: missmatch in tar record size vs file size for `%s`.\n", - istream_get_filename(tar->parent), istream_get_filename(strm)); - goto fail; -fail_eof: - fprintf(stderr, "%s: unexpected end-of-file while reading `%s`\n", - istream_get_filename(tar->parent), istream_get_filename(strm)); - goto fail; -fail: - tar->record_size = 0; - tar->padding = 0; - return -1; -} - -static const char *get_filename(istream_t *strm) -{ - return ((tar_istream_t *)strm)->filename; -} - -static void tar_istream_destroy(sqfs_object_t *obj) -{ - tar_istream_t *strm = (tar_istream_t *)obj; - - if (strm->record_size > 0) - istream_skip(strm->parent, strm->record_size); - - if (strm->padding > 0) - istream_skip(strm->parent, strm->padding); - - sqfs_drop(strm->parent); - free(strm->sparse); - free(strm->filename); - free(strm); -} - -istream_t *tar_record_istream_create(istream_t *parent, - const tar_header_decoded_t *hdr) -{ - tar_istream_t *strm; - sparse_map_t *it; - sqfs_u64 diff; - size_t idx; - - strm = calloc(1, sizeof(*strm)); - if (strm == NULL) - goto fail_oom; - - sqfs_object_init(strm, tar_istream_destroy, NULL); - - strm->filename = strdup(hdr->name); - if (strm->filename == NULL) - goto fail_oom; - - strm->num_sparse = 0; - for (it = hdr->sparse; it != NULL; it = it->next) - strm->num_sparse += 1; - - if (strm->num_sparse > 0) { - strm->sparse = alloc_array(sizeof(strm->sparse[0]), - strm->num_sparse); - if (strm->sparse == NULL) - goto fail_oom; - - idx = 0; - it = hdr->sparse; - while (it != NULL && idx < strm->num_sparse) { - strm->sparse[idx].offset = it->offset; - strm->sparse[idx].count = it->count; - ++idx; - it = it->next; - } - } - - for (idx = 1; idx < strm->num_sparse; ++idx) { - if (strm->sparse[idx].offset <= strm->sparse[idx - 1].offset) - goto fail_sparse; - - diff = strm->sparse[idx].offset - strm->sparse[idx - 1].offset; - - if (diff < strm->sparse[idx - 1].count) - goto fail_sparse; - } - - strm->padding = hdr->record_size % 512; - if (strm->padding > 0) - strm->padding = 512 - strm->padding; - - strm->record_size = hdr->record_size; - strm->file_size = hdr->actual_size; - strm->parent = sqfs_grab(parent); - - ((istream_t *)strm)->precache = precache; - ((istream_t *)strm)->get_filename = get_filename; - ((istream_t *)strm)->buffer = strm->buffer; - ((istream_t *)strm)->eof = false; - return (istream_t *)strm; -fail_sparse: - fprintf(stderr, "%s: sparse map is not ordered or overlapping!\n", - hdr->name); - goto fail; -fail_oom: - fputs("tar istream create: out-of-memory\n", stderr); - goto fail; -fail: - if (strm != NULL) { - free(strm->filename); - free(strm); - } - return NULL; -} diff --git a/lib/tar/src/iterator.c b/lib/tar/src/iterator.c new file mode 100644 index 0000000..9a6b9f7 --- /dev/null +++ b/lib/tar/src/iterator.c @@ -0,0 +1,339 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ +/* + * iterator.c + * + * Copyright (C) 2023 David Oberhollenzer + */ +#include "tar/tar.h" +#include "sqfs/error.h" +#include "util/util.h" + +#include +#include +#include + +typedef struct { + dir_iterator_t base; + tar_header_decoded_t current; + istream_t *stream; + int state; + + /* File I/O wrapper related */ + bool locked; + + sqfs_u64 record_size; + sqfs_u64 file_size; + sqfs_u64 offset; + + size_t padding; + size_t last_chunk; + bool last_sparse; +} tar_iterator_t; + +typedef struct { + istream_t base; + + tar_iterator_t *parent; + + sqfs_u8 buffer[4096]; +} tar_istream_t; + +static bool is_sparse_region(const tar_iterator_t *tar, sqfs_u64 *count) +{ + const sparse_map_t *it; + + *count = tar->file_size - tar->offset; + if (tar->current.sparse == NULL) + return false; + + for (it = tar->current.sparse; it != NULL; it = it->next) { + if (tar->offset >= it->offset) { + sqfs_u64 diff = tar->offset - it->offset; + + if (diff < it->count) { + *count = it->count - diff; + return false; + } + } + } + + for (it = tar->current.sparse; it != NULL; it = it->next) { + if (tar->offset < it->offset) { + sqfs_u64 diff = it->offset - tar->offset; + + if (diff < *count) + *count = diff; + } + } + + return true; +} + +static int data_available(tar_iterator_t *tar, sqfs_u64 want, sqfs_u64 *out) +{ + sqfs_u64 avail = tar->stream->buffer_used - tar->stream->buffer_offset; + + if ((want > avail) && + ((tar->stream->buffer_offset > 0) || avail == 0)) { + if (istream_precache(tar->stream)) { + tar->state = SQFS_ERROR_IO; + return -1; + } + + if (tar->stream->buffer_used == 0 && tar->stream->eof) { + tar->state = SQFS_ERROR_CORRUPTED; + return -1; + } + + avail = tar->stream->buffer_used; + } + + *out = avail <= want ? avail : want; + return 0; +} + +/*****************************************************************************/ + +static const char *strm_get_filename(istream_t *strm) +{ + return ((tar_istream_t *)strm)->parent->current.name; +} + +static int strm_precache(istream_t *strm) +{ + tar_istream_t *tar = (tar_istream_t *)strm; + sqfs_u64 diff; + + tar->parent->offset += tar->parent->last_chunk; + + if (!tar->parent->last_sparse) { + tar->parent->stream->buffer_offset += tar->parent->last_chunk; + tar->parent->record_size -= tar->parent->last_chunk; + } + + if (tar->parent->offset >= tar->parent->file_size) + goto out_eof; + + if (is_sparse_region(tar->parent, &diff)) { + if (diff > sizeof(tar->buffer)) + diff = sizeof(tar->buffer); + + strm->buffer = tar->buffer; + strm->buffer_used = diff; + tar->parent->last_chunk = diff; + tar->parent->last_sparse = true; + + memset(tar->buffer, 0, diff); + } else { + if (data_available(tar->parent, diff, &diff)) + goto out_eof; + + strm->buffer = tar->parent->stream->buffer + + tar->parent->stream->buffer_offset; + strm->buffer_used = diff; + tar->parent->last_chunk = diff; + tar->parent->last_sparse = false; + } + + return 0; +out_eof: + strm->eof = true; + strm->buffer_used = 0; + strm->buffer = tar->buffer; + tar->parent->locked = false; + return tar->parent->state < 0 ? -1 : 0; +} + +static void strm_destroy(sqfs_object_t *obj) +{ + tar_istream_t *tar = (tar_istream_t *)obj; + + tar->parent->locked = false; + sqfs_drop(tar->parent); + free(tar); +} + +/*****************************************************************************/ + +static int it_next(dir_iterator_t *it, dir_entry_t **out) +{ + tar_iterator_t *tar = (tar_iterator_t *)it; + dir_entry_t *ent; + int ret; + + *out = NULL; + if (tar->locked) + return SQFS_ERROR_SEQUENCE; + + if (tar->state != 0) + return tar->state; +retry: + if (tar->record_size > 0) { + ret = istream_skip(tar->stream, tar->record_size); + if (ret) + goto fail; + } + + if (tar->padding > 0) { + ret = istream_skip(tar->stream, tar->padding); + if (ret) + goto fail; + } + + clear_header(&(tar->current)); + ret = read_header(tar->stream, &(tar->current)); + if (ret != 0) + goto fail; + + tar->offset = 0; + tar->last_chunk = 0; + tar->last_sparse = false; + tar->record_size = tar->current.record_size; + tar->file_size = tar->current.actual_size; + tar->padding = tar->current.record_size % 512; + if (tar->padding > 0) + tar->padding = 512 - tar->padding; + + if (tar->current.unknown_record) + goto retry; + + if (canonicalize_name(tar->current.name) != 0) { + tar->state = SQFS_ERROR_CORRUPTED; + return tar->state; + } + + ent = calloc(1, sizeof(*ent) + strlen(tar->current.name) + 1); + if (ent == NULL) { + tar->state = SQFS_ERROR_ALLOC; + return tar->state; + } + + ent->mtime = tar->current.mtime; + ent->rdev = tar->current.devno; + ent->uid = tar->current.uid; + ent->gid = tar->current.gid; + ent->mode = tar->current.mode; + strcpy(ent->name, tar->current.name); + + if (tar->current.is_hard_link) { + ent->mode = (S_IFLNK | 0777); + ent->flags |= DIR_ENTRY_FLAG_HARD_LINK; + } + + if (S_ISREG(ent->mode)) + ent->size = tar->current.actual_size; + + *out = ent; + return 0; +fail: + tar->state = ret < 0 ? SQFS_ERROR_IO : 1; + return tar->state; +} + +static int it_read_link(dir_iterator_t *it, char **out) +{ + tar_iterator_t *tar = (tar_iterator_t *)it; + + *out = NULL; + if (tar->locked) + return SQFS_ERROR_SEQUENCE; + + if (tar->state != 0 || tar->current.link_target == NULL) + return tar->state < 0 ? tar->state : SQFS_ERROR_NO_ENTRY; + + *out = strdup(tar->current.link_target); + return (*out == NULL) ? SQFS_ERROR_ALLOC : 0; +} + +static int it_open_subdir(dir_iterator_t *it, dir_iterator_t **out) +{ + (void)it; + *out = NULL; + return SQFS_ERROR_UNSUPPORTED; +} + +static void it_ignore_subdir(dir_iterator_t *it) +{ + (void)it; + /* TODO: skip list */ +} + +static int it_open_file_ro(dir_iterator_t *it, istream_t **out) +{ + tar_iterator_t *tar = (tar_iterator_t *)it; + tar_istream_t *strm; + + *out = NULL; + if (tar->locked) + return SQFS_ERROR_SEQUENCE; + + if (tar->state != 0) + return tar->state < 0 ? tar->state : SQFS_ERROR_NO_ENTRY; + + if (!S_ISREG(tar->current.mode)) + return SQFS_ERROR_NOT_FILE; + + strm = calloc(1, sizeof(*strm)); + if (strm == NULL) + return SQFS_ERROR_ALLOC; + + sqfs_object_init(strm, strm_destroy, NULL); + strm->parent = sqfs_grab(tar); + + ((istream_t *)strm)->precache = strm_precache; + ((istream_t *)strm)->get_filename = strm_get_filename; + ((istream_t *)strm)->buffer = strm->buffer; + + tar->locked = true; + *out = (istream_t *)strm; + return 0; +} + +static int it_read_xattr(dir_iterator_t *it, dir_entry_xattr_t **out) +{ + tar_iterator_t *tar = (tar_iterator_t *)it; + + *out = NULL; + if (tar->locked) + return SQFS_ERROR_SEQUENCE; + + if (tar->state != 0) + return tar->state < 0 ? tar->state : SQFS_ERROR_NO_ENTRY; + + if (tar->current.xattr != NULL) { + *out = dir_entry_xattr_list_copy(tar->current.xattr); + if (*out == NULL) + return SQFS_ERROR_ALLOC; + } + + return 0; +} + +static void it_destroy(sqfs_object_t *obj) +{ + tar_iterator_t *tar = (tar_iterator_t *)obj; + + clear_header(&(tar->current)); + sqfs_drop(tar->stream); + free(tar); +} + +dir_iterator_t *tar_open_stream(istream_t *stream) +{ + tar_iterator_t *tar = calloc(1, sizeof(*tar)); + dir_iterator_t *it = (dir_iterator_t *)tar; + + if (tar == NULL) + return NULL; + + sqfs_object_init(it, it_destroy, NULL); + tar->stream = sqfs_grab(stream); + it->next = it_next; + it->read_link = it_read_link; + it->open_subdir = it_open_subdir; + it->ignore_subdir = it_ignore_subdir; + it->open_file_ro = it_open_file_ro; + it->read_xattr = it_read_xattr; + + return it; +} diff --git a/lib/tar/src/read_header.c b/lib/tar/src/read_header.c index 8d3145b..751c5dc 100644 --- a/lib/tar/src/read_header.c +++ b/lib/tar/src/read_header.c @@ -224,7 +224,9 @@ int read_header(istream_t *fp, tar_header_decoded_t *out) case TAR_TYPE_PAX_GLOBAL: if (read_number(hdr.size, sizeof(hdr.size), &pax_size)) goto fail; - skip_entry(fp, pax_size); + if (pax_size % 512) + pax_size += 512 - (pax_size % 512); + istream_skip(fp, pax_size); continue; case TAR_TYPE_PAX: clear_header(out); @@ -291,10 +293,3 @@ fail: clear_header(out); return -1; } - -int skip_entry(istream_t *fp, sqfs_u64 size) -{ - size_t tail = size % 512; - - return istream_skip(fp, tail ? (size + 512 - tail) : size); -} -- cgit v1.2.3