aboutsummaryrefslogtreecommitdiff
path: root/lib/tar/src
diff options
context:
space:
mode:
authorDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2023-05-20 17:04:15 +0200
committerDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2023-05-22 16:15:45 +0200
commit6351872732fce77186f401050eee92c7c3aa3461 (patch)
treec938233378d13dbc94e08f24e34bb84406b08d21 /lib/tar/src
parent9a97a9a4fe224bcf53ad23af31bca67bbb71a824 (diff)
libtar: add a dir_iterator_t implementation for tar files
The existing istream_t wrapper is mered into this one as well, we can open the files via the iterators open_file_ro function. Unit tests and tar2sqfs are modified accordingly. Signed-off-by: David Oberhollenzer <david.oberhollenzer@sigma-star.at>
Diffstat (limited to 'lib/tar/src')
-rw-r--r--lib/tar/src/istream.c240
-rw-r--r--lib/tar/src/iterator.c339
-rw-r--r--lib/tar/src/read_header.c11
3 files changed, 342 insertions, 248 deletions
diff --git a/lib/tar/src/istream.c b/lib/tar/src/istream.c
deleted file mode 100644
index 80519b1..0000000
--- a/lib/tar/src/istream.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/* SPDX-License-Identifier: GPL-3.0-or-later */
-/*
- * istream.c
- *
- * Copyright (C) 2019 David Oberhollenzer <goliath@infraroot.at>
- */
-#include "internal.h"
-
-#include <string.h>
-#include <stdlib.h>
-
-typedef struct {
- sqfs_u64 offset;
- sqfs_u64 count;
-} sparse_ent_t;
-
-typedef struct {
- istream_t base;
-
- istream_t *parent;
-
- char *filename;
-
- sparse_ent_t *sparse;
- size_t num_sparse;
-
- sqfs_u64 record_size;
- sqfs_u64 file_size;
- sqfs_u64 offset;
-
- size_t padding;
- size_t last_chunk;
- bool last_sparse;
-
- sqfs_u8 buffer[4096];
-} tar_istream_t;
-
-static bool is_sparse_region(tar_istream_t *tar, sqfs_u64 *count)
-{
- size_t i;
-
- *count = tar->file_size - tar->offset;
- if (tar->num_sparse == 0)
- return false;
-
- for (i = 0; i < tar->num_sparse; ++i) {
- if (tar->offset >= tar->sparse[i].offset) {
- sqfs_u64 diff = tar->offset - tar->sparse[i].offset;
-
- if (diff < tar->sparse[i].count) {
- *count = tar->sparse[i].count - diff;
- return false;
- }
- }
- }
-
- for (i = 0; i < tar->num_sparse; ++i) {
- if (tar->offset < tar->sparse[i].offset) {
- sqfs_u64 diff = tar->sparse[i].offset - tar->offset;
-
- if (diff < *count)
- *count = diff;
- }
- }
-
- return true;
-}
-
-static int precache(istream_t *strm)
-{
- tar_istream_t *tar = (tar_istream_t *)strm;
- sqfs_u64 diff, avail;
-
- tar->offset += tar->last_chunk;
-
- if (!tar->last_sparse) {
- tar->parent->buffer_offset += tar->last_chunk;
- tar->record_size -= tar->last_chunk;
- }
-
- if (tar->offset >= tar->file_size) {
- strm->eof = true;
- strm->buffer_used = 0;
- strm->buffer = tar->buffer;
- if (tar->record_size > 0)
- goto fail_rec_sz;
- if (istream_skip(tar->parent, tar->padding))
- goto fail;
- tar->padding = 0;
- return 0;
- }
-
- if (is_sparse_region(tar, &diff)) {
- if (diff > sizeof(tar->buffer))
- diff = sizeof(tar->buffer);
-
- strm->buffer = tar->buffer;
- strm->buffer_used = diff;
- tar->last_chunk = diff;
- tar->last_sparse = true;
-
- memset(tar->buffer, 0, diff);
- } else {
- if (diff > tar->record_size)
- goto fail_rec_sz;
-
- avail = tar->parent->buffer_used - tar->parent->buffer_offset;
-
- if ((diff > avail) &&
- ((tar->parent->buffer_offset > 0) || avail == 0)) {
- if (istream_precache(tar->parent))
- goto fail;
-
- if (tar->parent->buffer_used == 0 && tar->parent->eof)
- goto fail_eof;
-
- avail = tar->parent->buffer_used;
- }
-
- if (diff > avail)
- diff = avail;
-
- strm->buffer = tar->parent->buffer + tar->parent->buffer_offset;
- strm->buffer_used = diff;
- tar->last_chunk = diff;
- tar->last_sparse = false;
- }
-
- return 0;
-fail_rec_sz:
- fprintf(stderr,
- "%s: missmatch in tar record size vs file size for `%s`.\n",
- istream_get_filename(tar->parent), istream_get_filename(strm));
- goto fail;
-fail_eof:
- fprintf(stderr, "%s: unexpected end-of-file while reading `%s`\n",
- istream_get_filename(tar->parent), istream_get_filename(strm));
- goto fail;
-fail:
- tar->record_size = 0;
- tar->padding = 0;
- return -1;
-}
-
-static const char *get_filename(istream_t *strm)
-{
- return ((tar_istream_t *)strm)->filename;
-}
-
-static void tar_istream_destroy(sqfs_object_t *obj)
-{
- tar_istream_t *strm = (tar_istream_t *)obj;
-
- if (strm->record_size > 0)
- istream_skip(strm->parent, strm->record_size);
-
- if (strm->padding > 0)
- istream_skip(strm->parent, strm->padding);
-
- sqfs_drop(strm->parent);
- free(strm->sparse);
- free(strm->filename);
- free(strm);
-}
-
-istream_t *tar_record_istream_create(istream_t *parent,
- const tar_header_decoded_t *hdr)
-{
- tar_istream_t *strm;
- sparse_map_t *it;
- sqfs_u64 diff;
- size_t idx;
-
- strm = calloc(1, sizeof(*strm));
- if (strm == NULL)
- goto fail_oom;
-
- sqfs_object_init(strm, tar_istream_destroy, NULL);
-
- strm->filename = strdup(hdr->name);
- if (strm->filename == NULL)
- goto fail_oom;
-
- strm->num_sparse = 0;
- for (it = hdr->sparse; it != NULL; it = it->next)
- strm->num_sparse += 1;
-
- if (strm->num_sparse > 0) {
- strm->sparse = alloc_array(sizeof(strm->sparse[0]),
- strm->num_sparse);
- if (strm->sparse == NULL)
- goto fail_oom;
-
- idx = 0;
- it = hdr->sparse;
- while (it != NULL && idx < strm->num_sparse) {
- strm->sparse[idx].offset = it->offset;
- strm->sparse[idx].count = it->count;
- ++idx;
- it = it->next;
- }
- }
-
- for (idx = 1; idx < strm->num_sparse; ++idx) {
- if (strm->sparse[idx].offset <= strm->sparse[idx - 1].offset)
- goto fail_sparse;
-
- diff = strm->sparse[idx].offset - strm->sparse[idx - 1].offset;
-
- if (diff < strm->sparse[idx - 1].count)
- goto fail_sparse;
- }
-
- strm->padding = hdr->record_size % 512;
- if (strm->padding > 0)
- strm->padding = 512 - strm->padding;
-
- strm->record_size = hdr->record_size;
- strm->file_size = hdr->actual_size;
- strm->parent = sqfs_grab(parent);
-
- ((istream_t *)strm)->precache = precache;
- ((istream_t *)strm)->get_filename = get_filename;
- ((istream_t *)strm)->buffer = strm->buffer;
- ((istream_t *)strm)->eof = false;
- return (istream_t *)strm;
-fail_sparse:
- fprintf(stderr, "%s: sparse map is not ordered or overlapping!\n",
- hdr->name);
- goto fail;
-fail_oom:
- fputs("tar istream create: out-of-memory\n", stderr);
- goto fail;
-fail:
- if (strm != NULL) {
- free(strm->filename);
- free(strm);
- }
- return NULL;
-}
diff --git a/lib/tar/src/iterator.c b/lib/tar/src/iterator.c
new file mode 100644
index 0000000..9a6b9f7
--- /dev/null
+++ b/lib/tar/src/iterator.c
@@ -0,0 +1,339 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+/*
+ * iterator.c
+ *
+ * Copyright (C) 2023 David Oberhollenzer <goliath@infraroot.at>
+ */
+#include "tar/tar.h"
+#include "sqfs/error.h"
+#include "util/util.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+
+typedef struct {
+ dir_iterator_t base;
+ tar_header_decoded_t current;
+ istream_t *stream;
+ int state;
+
+ /* File I/O wrapper related */
+ bool locked;
+
+ sqfs_u64 record_size;
+ sqfs_u64 file_size;
+ sqfs_u64 offset;
+
+ size_t padding;
+ size_t last_chunk;
+ bool last_sparse;
+} tar_iterator_t;
+
+typedef struct {
+ istream_t base;
+
+ tar_iterator_t *parent;
+
+ sqfs_u8 buffer[4096];
+} tar_istream_t;
+
+static bool is_sparse_region(const tar_iterator_t *tar, sqfs_u64 *count)
+{
+ const sparse_map_t *it;
+
+ *count = tar->file_size - tar->offset;
+ if (tar->current.sparse == NULL)
+ return false;
+
+ for (it = tar->current.sparse; it != NULL; it = it->next) {
+ if (tar->offset >= it->offset) {
+ sqfs_u64 diff = tar->offset - it->offset;
+
+ if (diff < it->count) {
+ *count = it->count - diff;
+ return false;
+ }
+ }
+ }
+
+ for (it = tar->current.sparse; it != NULL; it = it->next) {
+ if (tar->offset < it->offset) {
+ sqfs_u64 diff = it->offset - tar->offset;
+
+ if (diff < *count)
+ *count = diff;
+ }
+ }
+
+ return true;
+}
+
+static int data_available(tar_iterator_t *tar, sqfs_u64 want, sqfs_u64 *out)
+{
+ sqfs_u64 avail = tar->stream->buffer_used - tar->stream->buffer_offset;
+
+ if ((want > avail) &&
+ ((tar->stream->buffer_offset > 0) || avail == 0)) {
+ if (istream_precache(tar->stream)) {
+ tar->state = SQFS_ERROR_IO;
+ return -1;
+ }
+
+ if (tar->stream->buffer_used == 0 && tar->stream->eof) {
+ tar->state = SQFS_ERROR_CORRUPTED;
+ return -1;
+ }
+
+ avail = tar->stream->buffer_used;
+ }
+
+ *out = avail <= want ? avail : want;
+ return 0;
+}
+
+/*****************************************************************************/
+
+static const char *strm_get_filename(istream_t *strm)
+{
+ return ((tar_istream_t *)strm)->parent->current.name;
+}
+
+static int strm_precache(istream_t *strm)
+{
+ tar_istream_t *tar = (tar_istream_t *)strm;
+ sqfs_u64 diff;
+
+ tar->parent->offset += tar->parent->last_chunk;
+
+ if (!tar->parent->last_sparse) {
+ tar->parent->stream->buffer_offset += tar->parent->last_chunk;
+ tar->parent->record_size -= tar->parent->last_chunk;
+ }
+
+ if (tar->parent->offset >= tar->parent->file_size)
+ goto out_eof;
+
+ if (is_sparse_region(tar->parent, &diff)) {
+ if (diff > sizeof(tar->buffer))
+ diff = sizeof(tar->buffer);
+
+ strm->buffer = tar->buffer;
+ strm->buffer_used = diff;
+ tar->parent->last_chunk = diff;
+ tar->parent->last_sparse = true;
+
+ memset(tar->buffer, 0, diff);
+ } else {
+ if (data_available(tar->parent, diff, &diff))
+ goto out_eof;
+
+ strm->buffer = tar->parent->stream->buffer +
+ tar->parent->stream->buffer_offset;
+ strm->buffer_used = diff;
+ tar->parent->last_chunk = diff;
+ tar->parent->last_sparse = false;
+ }
+
+ return 0;
+out_eof:
+ strm->eof = true;
+ strm->buffer_used = 0;
+ strm->buffer = tar->buffer;
+ tar->parent->locked = false;
+ return tar->parent->state < 0 ? -1 : 0;
+}
+
+static void strm_destroy(sqfs_object_t *obj)
+{
+ tar_istream_t *tar = (tar_istream_t *)obj;
+
+ tar->parent->locked = false;
+ sqfs_drop(tar->parent);
+ free(tar);
+}
+
+/*****************************************************************************/
+
+static int it_next(dir_iterator_t *it, dir_entry_t **out)
+{
+ tar_iterator_t *tar = (tar_iterator_t *)it;
+ dir_entry_t *ent;
+ int ret;
+
+ *out = NULL;
+ if (tar->locked)
+ return SQFS_ERROR_SEQUENCE;
+
+ if (tar->state != 0)
+ return tar->state;
+retry:
+ if (tar->record_size > 0) {
+ ret = istream_skip(tar->stream, tar->record_size);
+ if (ret)
+ goto fail;
+ }
+
+ if (tar->padding > 0) {
+ ret = istream_skip(tar->stream, tar->padding);
+ if (ret)
+ goto fail;
+ }
+
+ clear_header(&(tar->current));
+ ret = read_header(tar->stream, &(tar->current));
+ if (ret != 0)
+ goto fail;
+
+ tar->offset = 0;
+ tar->last_chunk = 0;
+ tar->last_sparse = false;
+ tar->record_size = tar->current.record_size;
+ tar->file_size = tar->current.actual_size;
+ tar->padding = tar->current.record_size % 512;
+ if (tar->padding > 0)
+ tar->padding = 512 - tar->padding;
+
+ if (tar->current.unknown_record)
+ goto retry;
+
+ if (canonicalize_name(tar->current.name) != 0) {
+ tar->state = SQFS_ERROR_CORRUPTED;
+ return tar->state;
+ }
+
+ ent = calloc(1, sizeof(*ent) + strlen(tar->current.name) + 1);
+ if (ent == NULL) {
+ tar->state = SQFS_ERROR_ALLOC;
+ return tar->state;
+ }
+
+ ent->mtime = tar->current.mtime;
+ ent->rdev = tar->current.devno;
+ ent->uid = tar->current.uid;
+ ent->gid = tar->current.gid;
+ ent->mode = tar->current.mode;
+ strcpy(ent->name, tar->current.name);
+
+ if (tar->current.is_hard_link) {
+ ent->mode = (S_IFLNK | 0777);
+ ent->flags |= DIR_ENTRY_FLAG_HARD_LINK;
+ }
+
+ if (S_ISREG(ent->mode))
+ ent->size = tar->current.actual_size;
+
+ *out = ent;
+ return 0;
+fail:
+ tar->state = ret < 0 ? SQFS_ERROR_IO : 1;
+ return tar->state;
+}
+
+static int it_read_link(dir_iterator_t *it, char **out)
+{
+ tar_iterator_t *tar = (tar_iterator_t *)it;
+
+ *out = NULL;
+ if (tar->locked)
+ return SQFS_ERROR_SEQUENCE;
+
+ if (tar->state != 0 || tar->current.link_target == NULL)
+ return tar->state < 0 ? tar->state : SQFS_ERROR_NO_ENTRY;
+
+ *out = strdup(tar->current.link_target);
+ return (*out == NULL) ? SQFS_ERROR_ALLOC : 0;
+}
+
+static int it_open_subdir(dir_iterator_t *it, dir_iterator_t **out)
+{
+ (void)it;
+ *out = NULL;
+ return SQFS_ERROR_UNSUPPORTED;
+}
+
+static void it_ignore_subdir(dir_iterator_t *it)
+{
+ (void)it;
+ /* TODO: skip list */
+}
+
+static int it_open_file_ro(dir_iterator_t *it, istream_t **out)
+{
+ tar_iterator_t *tar = (tar_iterator_t *)it;
+ tar_istream_t *strm;
+
+ *out = NULL;
+ if (tar->locked)
+ return SQFS_ERROR_SEQUENCE;
+
+ if (tar->state != 0)
+ return tar->state < 0 ? tar->state : SQFS_ERROR_NO_ENTRY;
+
+ if (!S_ISREG(tar->current.mode))
+ return SQFS_ERROR_NOT_FILE;
+
+ strm = calloc(1, sizeof(*strm));
+ if (strm == NULL)
+ return SQFS_ERROR_ALLOC;
+
+ sqfs_object_init(strm, strm_destroy, NULL);
+ strm->parent = sqfs_grab(tar);
+
+ ((istream_t *)strm)->precache = strm_precache;
+ ((istream_t *)strm)->get_filename = strm_get_filename;
+ ((istream_t *)strm)->buffer = strm->buffer;
+
+ tar->locked = true;
+ *out = (istream_t *)strm;
+ return 0;
+}
+
+static int it_read_xattr(dir_iterator_t *it, dir_entry_xattr_t **out)
+{
+ tar_iterator_t *tar = (tar_iterator_t *)it;
+
+ *out = NULL;
+ if (tar->locked)
+ return SQFS_ERROR_SEQUENCE;
+
+ if (tar->state != 0)
+ return tar->state < 0 ? tar->state : SQFS_ERROR_NO_ENTRY;
+
+ if (tar->current.xattr != NULL) {
+ *out = dir_entry_xattr_list_copy(tar->current.xattr);
+ if (*out == NULL)
+ return SQFS_ERROR_ALLOC;
+ }
+
+ return 0;
+}
+
+static void it_destroy(sqfs_object_t *obj)
+{
+ tar_iterator_t *tar = (tar_iterator_t *)obj;
+
+ clear_header(&(tar->current));
+ sqfs_drop(tar->stream);
+ free(tar);
+}
+
+dir_iterator_t *tar_open_stream(istream_t *stream)
+{
+ tar_iterator_t *tar = calloc(1, sizeof(*tar));
+ dir_iterator_t *it = (dir_iterator_t *)tar;
+
+ if (tar == NULL)
+ return NULL;
+
+ sqfs_object_init(it, it_destroy, NULL);
+ tar->stream = sqfs_grab(stream);
+ it->next = it_next;
+ it->read_link = it_read_link;
+ it->open_subdir = it_open_subdir;
+ it->ignore_subdir = it_ignore_subdir;
+ it->open_file_ro = it_open_file_ro;
+ it->read_xattr = it_read_xattr;
+
+ return it;
+}
diff --git a/lib/tar/src/read_header.c b/lib/tar/src/read_header.c
index 8d3145b..751c5dc 100644
--- a/lib/tar/src/read_header.c
+++ b/lib/tar/src/read_header.c
@@ -224,7 +224,9 @@ int read_header(istream_t *fp, tar_header_decoded_t *out)
case TAR_TYPE_PAX_GLOBAL:
if (read_number(hdr.size, sizeof(hdr.size), &pax_size))
goto fail;
- skip_entry(fp, pax_size);
+ if (pax_size % 512)
+ pax_size += 512 - (pax_size % 512);
+ istream_skip(fp, pax_size);
continue;
case TAR_TYPE_PAX:
clear_header(out);
@@ -291,10 +293,3 @@ fail:
clear_header(out);
return -1;
}
-
-int skip_entry(istream_t *fp, sqfs_u64 size)
-{
- size_t tail = size % 512;
-
- return istream_skip(fp, tail ? (size + 512 - tail) : size);
-}