aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2019-06-30 16:41:21 +0200
committerDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2019-06-30 16:41:21 +0200
commitf439b20706ade4b5630c3d6c57e6a36ce0dc287a (patch)
tree210abf39ac29d7cf7513f667f63495ef1fd43700
parent3e920038ecd8cc123b0c8dd957f94a8e1a616c0c (diff)
Add support for repacking condensed sparse files
This commit broadly does the following things: - Rename and move the sparse mapping structure to libutil - Add a function to the data writer for writing condensed versions of sparse files, given the mapping. - This shares code with the already existing function for regular files. The shared code is moved to a common helper function. - Add support to tar2sqfs for repacking sparse files. Signed-off-by: David Oberhollenzer <david.oberhollenzer@sigma-star.at>
-rw-r--r--include/data_writer.h14
-rw-r--r--include/tar.h10
-rw-r--r--include/util.h7
-rw-r--r--lib/sqfs/data_writer.c150
-rw-r--r--lib/tar/read_header.c8
-rw-r--r--tar/tar2sqfs.c62
-rw-r--r--tests/tar_sparse_gnu.c2
7 files changed, 200 insertions, 53 deletions
diff --git a/include/data_writer.h b/include/data_writer.h
index cafe61e..2ace899 100644
--- a/include/data_writer.h
+++ b/include/data_writer.h
@@ -5,6 +5,7 @@
#include "squashfs.h"
#include "compress.h"
#include "fstree.h"
+#include "util.h"
typedef struct data_writer_t data_writer_t;
@@ -42,8 +43,21 @@ int data_writer_flush_fragments(data_writer_t *data);
The file_info_t object is updated accordingly and used to determine the
number of bytes to write and the input file name to report errors.
+ Blocks or fragments that are all zero bytes automatically detected,
+ not written out and the sparse file accounting updated accordingly.
+
Returns 0 on success, prints errors to stderr.
*/
int write_data_from_fd(data_writer_t *data, file_info_t *fi, int infd);
+/*
+ Does the same as write_data_from_fd but the input file is the condensed
+ representation of a sparse file. The layout must be in order and
+ non-overlapping.
+
+ Returns 0 on success, prints errors to stderr.
+ */
+int write_data_from_fd_condensed(data_writer_t *data, file_info_t *fi,
+ int infd, sparse_map_t *map);
+
#endif /* DATA_WRITER_H */
diff --git a/include/tar.h b/include/tar.h
index 2819740..1f8ca7e 100644
--- a/include/tar.h
+++ b/include/tar.h
@@ -6,6 +6,8 @@
#include <stdbool.h>
#include <stdint.h>
+#include "util.h"
+
typedef enum {
ETV_UNKNOWN = 0,
ETV_V7_UNIX,
@@ -61,17 +63,11 @@ typedef struct {
char padding[7];
} gnu_sparse_t;
-typedef struct tar_sparse_data_t {
- struct tar_sparse_data_t *next;
- uint64_t offset;
- uint64_t count;
-} tar_sparse_data_t;
-
typedef struct {
struct stat sb;
char *name;
char *link_target;
- tar_sparse_data_t *sparse;
+ sparse_map_t *sparse;
uint64_t sparse_size;
bool unknown_record;
} tar_header_decoded_t;
diff --git a/include/util.h b/include/util.h
index d5b20dd..997cbf5 100644
--- a/include/util.h
+++ b/include/util.h
@@ -5,6 +5,13 @@
#include <sys/types.h>
#include <stdint.h>
+/* layout structure for sparse files, indicating where the actual data is */
+typedef struct sparse_map_t {
+ struct sparse_map_t *next;
+ uint64_t offset;
+ uint64_t count;
+} sparse_map_t;
+
/*
Convert back to forward slashed, remove all preceeding and trailing slashes,
collapse all sequences of slashes, remove all path components that are '.'
diff --git a/lib/sqfs/data_writer.c b/lib/sqfs/data_writer.c
index 70fe884..8492c98 100644
--- a/lib/sqfs/data_writer.c
+++ b/lib/sqfs/data_writer.c
@@ -18,6 +18,8 @@ struct data_writer_t {
size_t max_fragments;
size_t frag_offset;
+ int block_idx;
+
sqfs_super_t *super;
compressor_t *cmp;
int outfd;
@@ -111,18 +113,55 @@ int data_writer_flush_fragments(data_writer_t *data)
return 0;
}
-int write_data_from_fd(data_writer_t *data, file_info_t *fi, int infd)
+static int flush_data_block(data_writer_t *data, size_t size, file_info_t *fi)
{
- uint64_t count = fi->size;
- int blk_idx = 0;
uint32_t out;
+
+ if (is_zero_block(data->block, size)) {
+ if (size < data->super->block_size) {
+ fi->fragment_offset = 0xFFFFFFFF;
+ fi->fragment = 0xFFFFFFFF;
+ } else {
+ fi->blocksizes[data->block_idx++] = 0;
+ }
+
+ fi->sparse += size;
+ return 0;
+ }
+
+ if (size < data->super->block_size) {
+ if (data->frag_offset + size > data->super->block_size) {
+ if (data_writer_flush_fragments(data))
+ return -1;
+ }
+
+ fi->fragment_offset = data->frag_offset;
+ fi->fragment = data->num_fragments;
+
+ memcpy((char *)data->fragment + data->frag_offset,
+ data->block, size);
+ data->frag_offset += size;
+ } else {
+ if (write_compressed(data, data->block, size, &out))
+ return -1;
+
+ fi->blocksizes[data->block_idx++] = out;
+ }
+
+ return 0;
+}
+
+int write_data_from_fd(data_writer_t *data, file_info_t *fi, int infd)
+{
+ uint64_t count;
ssize_t ret;
size_t diff;
fi->startblock = data->super->bytes_used;
fi->sparse = 0;
+ data->block_idx = 0;
- while (count != 0) {
+ for (count = fi->size; count != 0; count -= diff) {
diff = count > (uint64_t)data->super->block_size ?
data->super->block_size : count;
@@ -132,45 +171,92 @@ int write_data_from_fd(data_writer_t *data, file_info_t *fi, int infd)
if ((size_t)ret < diff)
goto fail_trunc;
- if (is_zero_block(data->block, diff)) {
- if (diff < data->super->block_size) {
- fi->fragment_offset = 0xFFFFFFFF;
- fi->fragment = 0xFFFFFFFF;
- } else {
- fi->blocksizes[blk_idx++] = 0;
- }
- fi->sparse += diff;
- count -= diff;
- continue;
- }
+ if (flush_data_block(data, diff, fi))
+ return -1;
+ }
- if (diff < data->super->block_size) {
- if (data->frag_offset + diff > data->super->block_size) {
- if (data_writer_flush_fragments(data))
- return -1;
- }
+ return 0;
+fail_read:
+ perror(fi->input_file);
+ return -1;
+fail_trunc:
+ fprintf(stderr, "%s: truncated read\n", fi->input_file);
+ return -1;
+}
+
+int write_data_from_fd_condensed(data_writer_t *data, file_info_t *fi,
+ int infd, sparse_map_t *map)
+{
+ size_t start, count, diff;
+ sparse_map_t *m;
+ uint64_t offset;
+ ssize_t ret;
- fi->fragment_offset = data->frag_offset;
- fi->fragment = data->num_fragments;
+ fi->startblock = data->super->bytes_used;
+ fi->sparse = 0;
+ data->block_idx = 0;
+
+ if (map != NULL) {
+ offset = map->offset;
+
+ for (m = map; m != NULL; m = m->next) {
+ if (m->offset < offset)
+ goto fail_map;
+ offset = m->offset + m->count;
+ }
- memcpy((char *)data->fragment + data->frag_offset,
- data->block, diff);
- data->frag_offset += diff;
+ if (offset > fi->size)
+ goto fail_map_size;
+ }
+
+ for (offset = 0; offset < fi->size; offset += diff) {
+ if (fi->size - offset >= (uint64_t)data->super->block_size) {
+ diff = data->super->block_size;
} else {
- if (write_compressed(data, data->block,
- data->super->block_size, &out)) {
- return -1;
- }
+ diff = fi->size - offset;
+ }
+
+ memset(data->block, 0, diff);
+
+ while (map != NULL && map->offset < offset + diff) {
+ start = 0;
+ count = map->count;
+
+ if (map->offset < offset)
+ count -= offset - map->offset;
- fi->blocksizes[blk_idx++] = out;
+ if (map->offset > offset)
+ start = map->offset - offset;
+
+ if (start + count > diff)
+ count = diff - start;
+
+ ret = read_retry(infd, (char *)data->block + start,
+ count);
+ if (ret < 0)
+ goto fail_read;
+ if ((size_t)ret < count)
+ goto fail_trunc;
+
+ map = map->next;
}
- count -= diff;
+ if (flush_data_block(data, diff, fi))
+ return -1;
}
return 0;
+fail_map_size:
+ fprintf(stderr, "%s: sparse file map spans beyond file size\n",
+ fi->input_file);
+ return -1;
+fail_map:
+ fprintf(stderr,
+ "%s: sparse file map is unordered or self overlapping\n",
+ fi->input_file);
+ return -1;
fail_read:
- fprintf(stderr, "read from %s: %s\n", fi->input_file, strerror(errno));
+ perror(fi->input_file);
return -1;
fail_trunc:
fprintf(stderr, "%s: truncated read\n", fi->input_file);
diff --git a/lib/tar/read_header.c b/lib/tar/read_header.c
index 5d2a808..74666b3 100644
--- a/lib/tar/read_header.c
+++ b/lib/tar/read_header.c
@@ -451,9 +451,9 @@ fail:
return NULL;
}
-static void free_sparse_list(tar_sparse_data_t *sparse)
+static void free_sparse_list(sparse_map_t *sparse)
{
- tar_sparse_data_t *old;
+ sparse_map_t *old;
while (sparse != NULL) {
old = sparse;
@@ -462,9 +462,9 @@ static void free_sparse_list(tar_sparse_data_t *sparse)
}
}
-static tar_sparse_data_t *read_gnu_old_sparse(int fd, tar_header_t *hdr)
+static sparse_map_t *read_gnu_old_sparse(int fd, tar_header_t *hdr)
{
- tar_sparse_data_t *list = NULL, *end = NULL, *node;
+ sparse_map_t *list = NULL, *end = NULL, *node;
gnu_sparse_t sph;
uint64_t off, sz;
ssize_t ret;
diff --git a/tar/tar2sqfs.c b/tar/tar2sqfs.c
index 48e7fda..bce6a4d 100644
--- a/tar/tar2sqfs.c
+++ b/tar/tar2sqfs.c
@@ -165,6 +165,26 @@ fail_arg:
exit(EXIT_FAILURE);
}
+static int write_file(tar_header_decoded_t *hdr, file_info_t *fi,
+ data_writer_t *data)
+{
+ int ret;
+
+ if (hdr->sparse != NULL) {
+ ret = write_data_from_fd_condensed(data, fi, STDIN_FILENO,
+ hdr->sparse);
+ if (ret)
+ return -1;
+
+ return skip_padding(STDIN_FILENO, hdr->sparse_size);
+ }
+
+ if (write_data_from_fd(data, fi, STDIN_FILENO))
+ return -1;
+
+ return skip_padding(STDIN_FILENO, fi->size);
+}
+
static int create_node_and_repack_data(tar_header_decoded_t *hdr, fstree_t *fs,
data_writer_t *data)
{
@@ -177,15 +197,8 @@ static int create_node_and_repack_data(tar_header_decoded_t *hdr, fstree_t *fs,
if (!quiet)
printf("Packing %s\n", hdr->name);
- if (S_ISREG(hdr->sb.st_mode)) {
- if (write_data_from_fd(data, node->data.file,
- STDIN_FILENO)) {
- return -1;
- }
-
- if (skip_padding(STDIN_FILENO, node->data.file->size))
- return -1;
- }
+ if (S_ISREG(hdr->sb.st_mode))
+ return write_file(hdr, node->data.file, data);
return 0;
fail_errno:
@@ -196,6 +209,9 @@ fail_errno:
static int process_tar_ball(fstree_t *fs, data_writer_t *data)
{
tar_header_decoded_t hdr;
+ uint64_t offset, count;
+ sparse_map_t *m;
+ bool skip;
int ret;
for (;;) {
@@ -205,9 +221,37 @@ static int process_tar_ball(fstree_t *fs, data_writer_t *data)
if (ret < 0)
return -1;
+ skip = false;
+
if (hdr.unknown_record) {
fprintf(stderr, "skipping '%s' (unknown entry type)\n",
hdr.name);
+ skip = true;
+ }
+
+ if (!skip && hdr.sparse != NULL) {
+ offset = hdr.sparse->offset;
+ count = 0;
+
+ for (m = hdr.sparse; m != NULL; m = m->next) {
+ if (m->offset < offset) {
+ skip = true;
+ break;
+ }
+ offset = m->offset + m->count;
+ count += m->count;
+ }
+
+ if (count != hdr.sparse_size)
+ skip = true;
+
+ if (skip) {
+ fprintf(stderr, "skipping '%s' (broken sparse "
+ "file layout)\n", hdr.name);
+ }
+ }
+
+ if (skip) {
if (skip_entry(STDIN_FILENO, hdr.sb.st_size))
goto fail;
continue;
diff --git a/tests/tar_sparse_gnu.c b/tests/tar_sparse_gnu.c
index 844f7e3..d4d65d3 100644
--- a/tests/tar_sparse_gnu.c
+++ b/tests/tar_sparse_gnu.c
@@ -28,8 +28,8 @@ static int open_read(const char *path)
int main(void)
{
- tar_sparse_data_t *sparse;
tar_header_decoded_t hdr;
+ sparse_map_t *sparse;
int fd;
assert(chdir(TEST_PATH) == 0);