From f439b20706ade4b5630c3d6c57e6a36ce0dc287a Mon Sep 17 00:00:00 2001 From: David Oberhollenzer Date: Sun, 30 Jun 2019 16:41:21 +0200 Subject: Add support for repacking condensed sparse files This commit broadly does the following things: - Rename and move the sparse mapping structure to libutil - Add a function to the data writer for writing condensed versions of sparse files, given the mapping. - This shares code with the already existing function for regular files. The shared code is moved to a common helper function. - Add support to tar2sqfs for repacking sparse files. Signed-off-by: David Oberhollenzer --- include/data_writer.h | 14 +++++ include/tar.h | 10 +--- include/util.h | 7 +++ lib/sqfs/data_writer.c | 150 ++++++++++++++++++++++++++++++++++++++----------- lib/tar/read_header.c | 8 +-- tar/tar2sqfs.c | 62 +++++++++++++++++--- tests/tar_sparse_gnu.c | 2 +- 7 files changed, 200 insertions(+), 53 deletions(-) diff --git a/include/data_writer.h b/include/data_writer.h index cafe61e..2ace899 100644 --- a/include/data_writer.h +++ b/include/data_writer.h @@ -5,6 +5,7 @@ #include "squashfs.h" #include "compress.h" #include "fstree.h" +#include "util.h" typedef struct data_writer_t data_writer_t; @@ -42,8 +43,21 @@ int data_writer_flush_fragments(data_writer_t *data); The file_info_t object is updated accordingly and used to determine the number of bytes to write and the input file name to report errors. + Blocks or fragments that are all zero bytes automatically detected, + not written out and the sparse file accounting updated accordingly. + Returns 0 on success, prints errors to stderr. */ int write_data_from_fd(data_writer_t *data, file_info_t *fi, int infd); +/* + Does the same as write_data_from_fd but the input file is the condensed + representation of a sparse file. The layout must be in order and + non-overlapping. + + Returns 0 on success, prints errors to stderr. + */ +int write_data_from_fd_condensed(data_writer_t *data, file_info_t *fi, + int infd, sparse_map_t *map); + #endif /* DATA_WRITER_H */ diff --git a/include/tar.h b/include/tar.h index 2819740..1f8ca7e 100644 --- a/include/tar.h +++ b/include/tar.h @@ -6,6 +6,8 @@ #include #include +#include "util.h" + typedef enum { ETV_UNKNOWN = 0, ETV_V7_UNIX, @@ -61,17 +63,11 @@ typedef struct { char padding[7]; } gnu_sparse_t; -typedef struct tar_sparse_data_t { - struct tar_sparse_data_t *next; - uint64_t offset; - uint64_t count; -} tar_sparse_data_t; - typedef struct { struct stat sb; char *name; char *link_target; - tar_sparse_data_t *sparse; + sparse_map_t *sparse; uint64_t sparse_size; bool unknown_record; } tar_header_decoded_t; diff --git a/include/util.h b/include/util.h index d5b20dd..997cbf5 100644 --- a/include/util.h +++ b/include/util.h @@ -5,6 +5,13 @@ #include #include +/* layout structure for sparse files, indicating where the actual data is */ +typedef struct sparse_map_t { + struct sparse_map_t *next; + uint64_t offset; + uint64_t count; +} sparse_map_t; + /* Convert back to forward slashed, remove all preceeding and trailing slashes, collapse all sequences of slashes, remove all path components that are '.' diff --git a/lib/sqfs/data_writer.c b/lib/sqfs/data_writer.c index 70fe884..8492c98 100644 --- a/lib/sqfs/data_writer.c +++ b/lib/sqfs/data_writer.c @@ -18,6 +18,8 @@ struct data_writer_t { size_t max_fragments; size_t frag_offset; + int block_idx; + sqfs_super_t *super; compressor_t *cmp; int outfd; @@ -111,18 +113,55 @@ int data_writer_flush_fragments(data_writer_t *data) return 0; } -int write_data_from_fd(data_writer_t *data, file_info_t *fi, int infd) +static int flush_data_block(data_writer_t *data, size_t size, file_info_t *fi) { - uint64_t count = fi->size; - int blk_idx = 0; uint32_t out; + + if (is_zero_block(data->block, size)) { + if (size < data->super->block_size) { + fi->fragment_offset = 0xFFFFFFFF; + fi->fragment = 0xFFFFFFFF; + } else { + fi->blocksizes[data->block_idx++] = 0; + } + + fi->sparse += size; + return 0; + } + + if (size < data->super->block_size) { + if (data->frag_offset + size > data->super->block_size) { + if (data_writer_flush_fragments(data)) + return -1; + } + + fi->fragment_offset = data->frag_offset; + fi->fragment = data->num_fragments; + + memcpy((char *)data->fragment + data->frag_offset, + data->block, size); + data->frag_offset += size; + } else { + if (write_compressed(data, data->block, size, &out)) + return -1; + + fi->blocksizes[data->block_idx++] = out; + } + + return 0; +} + +int write_data_from_fd(data_writer_t *data, file_info_t *fi, int infd) +{ + uint64_t count; ssize_t ret; size_t diff; fi->startblock = data->super->bytes_used; fi->sparse = 0; + data->block_idx = 0; - while (count != 0) { + for (count = fi->size; count != 0; count -= diff) { diff = count > (uint64_t)data->super->block_size ? data->super->block_size : count; @@ -132,45 +171,92 @@ int write_data_from_fd(data_writer_t *data, file_info_t *fi, int infd) if ((size_t)ret < diff) goto fail_trunc; - if (is_zero_block(data->block, diff)) { - if (diff < data->super->block_size) { - fi->fragment_offset = 0xFFFFFFFF; - fi->fragment = 0xFFFFFFFF; - } else { - fi->blocksizes[blk_idx++] = 0; - } - fi->sparse += diff; - count -= diff; - continue; - } + if (flush_data_block(data, diff, fi)) + return -1; + } - if (diff < data->super->block_size) { - if (data->frag_offset + diff > data->super->block_size) { - if (data_writer_flush_fragments(data)) - return -1; - } + return 0; +fail_read: + perror(fi->input_file); + return -1; +fail_trunc: + fprintf(stderr, "%s: truncated read\n", fi->input_file); + return -1; +} + +int write_data_from_fd_condensed(data_writer_t *data, file_info_t *fi, + int infd, sparse_map_t *map) +{ + size_t start, count, diff; + sparse_map_t *m; + uint64_t offset; + ssize_t ret; - fi->fragment_offset = data->frag_offset; - fi->fragment = data->num_fragments; + fi->startblock = data->super->bytes_used; + fi->sparse = 0; + data->block_idx = 0; + + if (map != NULL) { + offset = map->offset; + + for (m = map; m != NULL; m = m->next) { + if (m->offset < offset) + goto fail_map; + offset = m->offset + m->count; + } - memcpy((char *)data->fragment + data->frag_offset, - data->block, diff); - data->frag_offset += diff; + if (offset > fi->size) + goto fail_map_size; + } + + for (offset = 0; offset < fi->size; offset += diff) { + if (fi->size - offset >= (uint64_t)data->super->block_size) { + diff = data->super->block_size; } else { - if (write_compressed(data, data->block, - data->super->block_size, &out)) { - return -1; - } + diff = fi->size - offset; + } + + memset(data->block, 0, diff); + + while (map != NULL && map->offset < offset + diff) { + start = 0; + count = map->count; + + if (map->offset < offset) + count -= offset - map->offset; - fi->blocksizes[blk_idx++] = out; + if (map->offset > offset) + start = map->offset - offset; + + if (start + count > diff) + count = diff - start; + + ret = read_retry(infd, (char *)data->block + start, + count); + if (ret < 0) + goto fail_read; + if ((size_t)ret < count) + goto fail_trunc; + + map = map->next; } - count -= diff; + if (flush_data_block(data, diff, fi)) + return -1; } return 0; +fail_map_size: + fprintf(stderr, "%s: sparse file map spans beyond file size\n", + fi->input_file); + return -1; +fail_map: + fprintf(stderr, + "%s: sparse file map is unordered or self overlapping\n", + fi->input_file); + return -1; fail_read: - fprintf(stderr, "read from %s: %s\n", fi->input_file, strerror(errno)); + perror(fi->input_file); return -1; fail_trunc: fprintf(stderr, "%s: truncated read\n", fi->input_file); diff --git a/lib/tar/read_header.c b/lib/tar/read_header.c index 5d2a808..74666b3 100644 --- a/lib/tar/read_header.c +++ b/lib/tar/read_header.c @@ -451,9 +451,9 @@ fail: return NULL; } -static void free_sparse_list(tar_sparse_data_t *sparse) +static void free_sparse_list(sparse_map_t *sparse) { - tar_sparse_data_t *old; + sparse_map_t *old; while (sparse != NULL) { old = sparse; @@ -462,9 +462,9 @@ static void free_sparse_list(tar_sparse_data_t *sparse) } } -static tar_sparse_data_t *read_gnu_old_sparse(int fd, tar_header_t *hdr) +static sparse_map_t *read_gnu_old_sparse(int fd, tar_header_t *hdr) { - tar_sparse_data_t *list = NULL, *end = NULL, *node; + sparse_map_t *list = NULL, *end = NULL, *node; gnu_sparse_t sph; uint64_t off, sz; ssize_t ret; diff --git a/tar/tar2sqfs.c b/tar/tar2sqfs.c index 48e7fda..bce6a4d 100644 --- a/tar/tar2sqfs.c +++ b/tar/tar2sqfs.c @@ -165,6 +165,26 @@ fail_arg: exit(EXIT_FAILURE); } +static int write_file(tar_header_decoded_t *hdr, file_info_t *fi, + data_writer_t *data) +{ + int ret; + + if (hdr->sparse != NULL) { + ret = write_data_from_fd_condensed(data, fi, STDIN_FILENO, + hdr->sparse); + if (ret) + return -1; + + return skip_padding(STDIN_FILENO, hdr->sparse_size); + } + + if (write_data_from_fd(data, fi, STDIN_FILENO)) + return -1; + + return skip_padding(STDIN_FILENO, fi->size); +} + static int create_node_and_repack_data(tar_header_decoded_t *hdr, fstree_t *fs, data_writer_t *data) { @@ -177,15 +197,8 @@ static int create_node_and_repack_data(tar_header_decoded_t *hdr, fstree_t *fs, if (!quiet) printf("Packing %s\n", hdr->name); - if (S_ISREG(hdr->sb.st_mode)) { - if (write_data_from_fd(data, node->data.file, - STDIN_FILENO)) { - return -1; - } - - if (skip_padding(STDIN_FILENO, node->data.file->size)) - return -1; - } + if (S_ISREG(hdr->sb.st_mode)) + return write_file(hdr, node->data.file, data); return 0; fail_errno: @@ -196,6 +209,9 @@ fail_errno: static int process_tar_ball(fstree_t *fs, data_writer_t *data) { tar_header_decoded_t hdr; + uint64_t offset, count; + sparse_map_t *m; + bool skip; int ret; for (;;) { @@ -205,9 +221,37 @@ static int process_tar_ball(fstree_t *fs, data_writer_t *data) if (ret < 0) return -1; + skip = false; + if (hdr.unknown_record) { fprintf(stderr, "skipping '%s' (unknown entry type)\n", hdr.name); + skip = true; + } + + if (!skip && hdr.sparse != NULL) { + offset = hdr.sparse->offset; + count = 0; + + for (m = hdr.sparse; m != NULL; m = m->next) { + if (m->offset < offset) { + skip = true; + break; + } + offset = m->offset + m->count; + count += m->count; + } + + if (count != hdr.sparse_size) + skip = true; + + if (skip) { + fprintf(stderr, "skipping '%s' (broken sparse " + "file layout)\n", hdr.name); + } + } + + if (skip) { if (skip_entry(STDIN_FILENO, hdr.sb.st_size)) goto fail; continue; diff --git a/tests/tar_sparse_gnu.c b/tests/tar_sparse_gnu.c index 844f7e3..d4d65d3 100644 --- a/tests/tar_sparse_gnu.c +++ b/tests/tar_sparse_gnu.c @@ -28,8 +28,8 @@ static int open_read(const char *path) int main(void) { - tar_sparse_data_t *sparse; tar_header_decoded_t hdr; + sparse_map_t *sparse; int fd; assert(chdir(TEST_PATH) == 0); -- cgit v1.2.3