From e1a47443ad2ee424b2be9c2d4f2761dfe85ded1a Mon Sep 17 00:00:00 2001 From: David Oberhollenzer Date: Thu, 13 Jun 2019 17:16:06 +0200 Subject: Add utility to turn a POSIX/PAX tar archive into a squashfs image Signed-off-by: David Oberhollenzer --- .gitignore | 1 + tar/Makemodule.am | 6 +- tar/read_header.c | 379 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ tar/skip.c | 48 +++++++ tar/tar.h | 22 ++++ tar/tar2sqfs.c | 267 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 722 insertions(+), 1 deletion(-) create mode 100644 tar/read_header.c create mode 100644 tar/skip.c create mode 100644 tar/tar2sqfs.c diff --git a/.gitignore b/.gitignore index 532b328..a0fc326 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,4 @@ config.h gensquashfs rdsquashfs sqfs2tar +tar2sqfs diff --git a/tar/Makemodule.am b/tar/Makemodule.am index d192380..6c9edc7 100644 --- a/tar/Makemodule.am +++ b/tar/Makemodule.am @@ -2,4 +2,8 @@ sqfs2tar_SOURCES = tar/sqfs2tar.c tar/tar.h tar/write_header.c sqfs2tar_LDADD = libsquashfs.a libfstree.a libcompress.a libutil.a sqfs2tar_LDADD += $(XZ_LIBS) $(ZLIB_LIBS) $(LZO_LIBS) $(LZ4_LIBS) $(ZSTD_LIBS) -bin_PROGRAMS += sqfs2tar +tar2sqfs_SOURCES = tar/tar2sqfs.c tar/skip.c tar/read_header.c tar/tar.h +tar2sqfs_LDADD = libsquashfs.a libfstree.a libcompress.a libutil.a +tar2sqfs_LDADD += $(XZ_LIBS) $(ZLIB_LIBS) $(LZO_LIBS) $(LZ4_LIBS) $(ZSTD_LIBS) + +bin_PROGRAMS += sqfs2tar tar2sqfs diff --git a/tar/read_header.c b/tar/read_header.c new file mode 100644 index 0000000..099a9ce --- /dev/null +++ b/tar/read_header.c @@ -0,0 +1,379 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ +#include "util.h" +#include "tar.h" + +#include +#include +#include +#include + +enum { + PAX_SIZE = 0x01, + PAX_UID = 0x02, + PAX_GID = 0x04, + PAX_DEV_MAJ = 0x08, + PAX_DEV_MIN = 0x10, + PAX_NAME = 0x20, + PAX_SLINK_TARGET = 0x40, +}; + +static int read_octal(const char *str, int digits, uint64_t *out) +{ + uint64_t result = 0; + + while (digits > 0 && *str >= '0' && *str <= '7') { + if (result > 0x1FFFFFFFFFFFFFFFUL) { + fputs("numeric overflow parsing tar header\n", stderr); + return -1; + } + + result = (result << 3) | (*(str++) - '0'); + --digits; + } + + *out = result; + return 0; +} + +static int read_binary(const char *str, int digits, uint64_t *out) +{ + uint64_t x, result = 0; + + while (digits > 0) { + if (result > 0x00FFFFFFFFFFFFFFUL) { + fputs("numeric overflow parsing tar header\n", stderr); + return -1; + } + + x = *((const unsigned char *)str++); + result = (result << 8) | x; + --digits; + } + + *out = result; + return 0; +} + +static int read_number(const char *str, int digits, uint64_t *out) +{ + if (*((unsigned char *)str) & 0x80) { + if (read_binary(str, digits, out)) + return -1; + *out &= 0x7FFFFFFFFFFFFFFF; + return 0; + } + + return read_octal(str, digits, out); +} + +static bool is_zero_block(const tar_header_t *hdr) +{ + const unsigned char *ptr = (const unsigned char *)hdr; + + return ptr[0] == '\0' && memcmp(ptr, ptr + 1, sizeof(*hdr) - 1) == 0; +} + +static bool is_checksum_valid(const tar_header_t *hdr) +{ + unsigned int chksum = 0; + tar_header_t copy; + uint64_t ref; + size_t i; + + if (read_octal(hdr->chksum, sizeof(hdr->chksum), &ref)) + return false; + + memcpy(©, hdr, sizeof(*hdr)); + memset(copy.chksum, ' ', sizeof(copy.chksum)); + + for (i = 0; i < sizeof(copy); ++i) + chksum += ((unsigned char *)©)[i]; + + return chksum == ref; +} + +static bool is_magic_valid(const tar_header_t *hdr) +{ + if (memcmp(hdr->magic, TAR_MAGIC, sizeof(hdr->magic)) != 0) + return false; + + if (memcmp(hdr->version, TAR_VERSION, sizeof(hdr->version)) != 0) + return false; + + return true; +} + +static int pax_read_decimal(const char *str, uint64_t *out) +{ + uint64_t result = 0; + + while (*str >= '0' && *str <= '9') { + if (result > 0xFFFFFFFFFFFFFFFFUL / 10) { + fputs("numeric overflow parsing pax header\n", stderr); + return -1; + } + + result = (result * 10) + (*(str++) - '0'); + } + + *out = result; + return 0; +} + +static int read_pax_header(int fd, uint64_t entsize, unsigned int *set_by_pax, + tar_header_decoded_t *out) +{ + char *buffer, *line; + ssize_t ret; + uint64_t i; + + buffer = malloc(entsize + 1); + if (buffer == NULL) + goto fail_errno; + + ret = read_retry(fd, buffer, entsize); + if (ret < 0) + goto fail_errno; + if ((size_t)ret < entsize) + goto fail_eof; + + if (skip_padding(fd, entsize)) + goto fail; + + buffer[entsize] = '\0'; + + for (i = 0; i < entsize; ++i) { + while (i < entsize && isspace(buffer[i])) + ++i; + while (i < entsize && isdigit(buffer[i])) + ++i; + while (i < entsize && isspace(buffer[i])) + ++i; + if (i >= entsize) + break; + + line = buffer + i; + + while (i < entsize && buffer[i] != '\n') + ++i; + + buffer[i] = '\0'; + + if (!strncmp(line, "uid=", 4)) { + pax_read_decimal(line + 4, &out->uid); + *set_by_pax |= PAX_UID; + } else if (!strncmp(line, "gid=", 4)) { + pax_read_decimal(line + 4, &out->gid); + *set_by_pax |= PAX_GID; + } else if (!strncmp(line, "path=", 5)) { + free(out->name); + out->name = strdup(line + 5); + if (out->name == NULL) + goto fail_errno; + *set_by_pax |= PAX_NAME; + } else if (!strncmp(line, "size=", 5)) { + pax_read_decimal(line + 5, &out->size); + *set_by_pax |= PAX_SIZE; + } else if (!strncmp(line, "linkpath=", 9)) { + free(out->link_target); + out->link_target = strdup(line + 9); + if (out->link_target == NULL) + goto fail_errno; + *set_by_pax |= PAX_SLINK_TARGET; + } + } + + free(buffer); + return 0; +fail_errno: + perror("reading pax header"); + goto fail; +fail_eof: + fputs("reading pax header: unexpected end of file\n", stderr); + goto fail; +fail: + free(buffer); + return -1; +} + +static int decode_header(const tar_header_t *hdr, unsigned int set_by_pax, + tar_header_decoded_t *out) +{ + size_t count; + + if (!(set_by_pax & PAX_NAME)) { + if (hdr->prefix[0] == '\0') { + count = strlen(hdr->name) + 1; + count += strlen(hdr->prefix) + 1; + + out->name = malloc(count); + + if (out->name != NULL) { + sprintf(out->name, "%s/%s", + hdr->prefix, hdr->name); + } + } else { + out->name = strdup(hdr->name); + } + + if (out->name == NULL) { + perror("decoding filename"); + return -1; + } + } + + if (!(set_by_pax & PAX_SIZE)) { + if (read_number(hdr->size, sizeof(hdr->size), &out->size)) + return -1; + } + + if (!(set_by_pax & PAX_UID)) { + if (read_number(hdr->uid, sizeof(hdr->uid), &out->uid)) + return -1; + } + + if (!(set_by_pax & PAX_GID)) { + if (read_number(hdr->gid, sizeof(hdr->gid), &out->gid)) + return -1; + } + + if (!(set_by_pax & PAX_DEV_MAJ)) { + if (read_number(hdr->devmajor, sizeof(hdr->devmajor), + &out->dev_maj)) { + return -1; + } + } + + if (!(set_by_pax & PAX_DEV_MIN)) { + if (read_number(hdr->devminor, sizeof(hdr->devminor), + &out->dev_min)) { + return -1; + } + } + + if (read_octal(hdr->mode, sizeof(hdr->mode), &out->mode)) + return -1; + + out->mode &= 07777; + + if (hdr->typeflag == TAR_TYPE_LINK || + hdr->typeflag == TAR_TYPE_SLINK) { + if (!(set_by_pax & PAX_SLINK_TARGET)) { + out->link_target = strdup(hdr->linkname); + if (out->link_target == NULL) { + perror("decoding symlink target"); + return -1; + } + } + } + + out->unknown_record = false; + + switch (hdr->typeflag) { + case '\0': + case TAR_TYPE_FILE: + out->mode |= S_IFREG; + break; + case TAR_TYPE_LINK: + /* XXX: hard links are not support yet */ + out->mode = S_IFLNK | 0777; + break; + case TAR_TYPE_SLINK: + out->mode = S_IFLNK | 0777; + break; + case TAR_TYPE_CHARDEV: + out->mode |= S_IFCHR; + break; + case TAR_TYPE_BLOCKDEV: + out->mode |= S_IFBLK; + break; + case TAR_TYPE_DIR: + out->mode |= S_IFDIR; + break; + case TAR_TYPE_FIFO: + out->mode |= S_IFIFO; + break; + default: + out->unknown_record = true; + break; + } + + return 0; +} + +int read_header(int fd, tar_header_decoded_t *out) +{ + unsigned int set_by_pax = 0; + bool prev_was_zero = false; + uint64_t pax_size; + tar_header_t hdr; + int ret; + + memset(out, 0, sizeof(*out)); + + for (;;) { + ret = read_retry(fd, &hdr, sizeof(hdr)); + if (ret == 0) + goto out_eof; + if (ret < 0) + goto fail_errno; + if (ret < (int)sizeof(hdr)) + goto fail_eof; + + if (is_zero_block(&hdr)) { + if (prev_was_zero) + goto out_eof; + prev_was_zero = true; + continue; + } + + prev_was_zero = false; + + if (!is_magic_valid(&hdr)) + goto fail_magic; + + if (!is_checksum_valid(&hdr)) + goto fail_chksum; + + if (hdr.typeflag == TAR_TYPE_PAX) { + clear_header(out); + if (read_number(hdr.size, sizeof(hdr.size), &pax_size)) + return -1; + set_by_pax = 0; + if (read_pax_header(fd, pax_size, &set_by_pax, out)) + return -1; + continue; + } + break; + } + + if (decode_header(&hdr, set_by_pax, out)) + goto fail; + + return 0; +out_eof: + clear_header(out); + return 1; +fail_errno: + perror("reading tar header"); + goto fail; +fail_eof: + fputs("reading tar header: unexpected end of file\n", stderr); + goto fail; +fail_magic: + fputs("input is not a ustar tar archive!\n", stderr); + goto fail; +fail_chksum: + fputs("invalid tar header checksum!\n", stderr); + goto fail; +fail: + clear_header(out); + return -1; +} + +void clear_header(tar_header_decoded_t *hdr) +{ + free(hdr->name); + free(hdr->link_target); + memset(hdr, 0, sizeof(*hdr)); +} diff --git a/tar/skip.c b/tar/skip.c new file mode 100644 index 0000000..69208b2 --- /dev/null +++ b/tar/skip.c @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ +#include "util.h" +#include "tar.h" + +#include + +static int skip_bytes(int fd, uint64_t size) +{ + unsigned char buffer[1024]; + ssize_t ret; + size_t diff; + + while (size != 0) { + diff = sizeof(buffer); + if (diff > size) + diff = size; + + ret = read_retry(fd, buffer, diff); + + if (ret < 0) { + perror("reading tar record data"); + return -1; + } + + if ((size_t)ret < diff) { + fputs("unexpected end of file\n", stderr); + return -1; + } + + size -= diff; + } + + return 0; +} + +int skip_padding(int fd, uint64_t size) +{ + size_t tail = size % 512; + + return tail ? skip_bytes(fd, 512 - tail) : 0; +} + +int skip_entry(int fd, uint64_t size) +{ + size_t tail = size % 512; + + return skip_bytes(fd, tail ? (size + 512 - tail) : size); +} diff --git a/tar/tar.h b/tar/tar.h index 62c612a..11af6ee 100644 --- a/tar/tar.h +++ b/tar/tar.h @@ -24,6 +24,18 @@ typedef struct { char padding[12]; } tar_header_t; +typedef struct { + uint64_t size; + uint64_t mode; + uint64_t uid; + uint64_t gid; + uint64_t dev_maj; + uint64_t dev_min; + char *name; + char *link_target; + bool unknown_record; +} tar_header_decoded_t; + #define TAR_TYPE_FILE '0' #define TAR_TYPE_LINK '1' #define TAR_TYPE_SLINK '2' @@ -44,4 +56,14 @@ typedef struct { int write_tar_header(int fd, const fstree_t *fs, const tree_node_t *n, const char *name); +/* calcuate and skip the zero padding */ +int skip_padding(int fd, uint64_t size); + +/* round up to block size and skip the entire entry */ +int skip_entry(int fd, uint64_t size); + +int read_header(int fd, tar_header_decoded_t *out); + +void clear_header(tar_header_decoded_t *hdr); + #endif /* TAR_H */ diff --git a/tar/tar2sqfs.c b/tar/tar2sqfs.c new file mode 100644 index 0000000..f00f281 --- /dev/null +++ b/tar/tar2sqfs.c @@ -0,0 +1,267 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ +#include "data_writer.h" +#include "highlevel.h" +#include "squashfs.h" +#include "compress.h" +#include "id_table.h" +#include "fstree.h" +#include "util.h" +#include "tar.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct option long_opts[] = { + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, 'V' }, +}; + +static const char *short_opts = "hV"; + +static const char *usagestr = +"Usage: tar2sqfs [OPTIONS...] \n" +"\n" +"Read an uncompressed tar archive from stdin and turn it into a squashfs\n" +"filesystem image.\n" +"\n" +"Possible options:\n" +"\n" +" --help, -h Print help text and exit.\n" +" --version, -V Print version information and exit.\n" +"\n" +"Examples:\n" +"\n" +"\ttar2sqfs rootfs.sqfs < rootfs.tar\n" +"\tzcat rootfs.tar.gz | tar2sqfs rootfs.sqfs\n" +"\txzcat rootfs.tar.xz | tar2sqfs rootfs.sqfs\n" +"\n"; + +static const char *filename; +static int block_size = SQFS_DEFAULT_BLOCK_SIZE; +static uint32_t def_mtime = 0; +static uint16_t def_mode = 0755; +static uint32_t def_uid = 0; +static uint32_t def_gid = 0; +static size_t devblksize = SQFS_DEVBLK_SIZE; + +static void process_args(int argc, char **argv) +{ + int i; + + for (;;) { + i = getopt_long(argc, argv, short_opts, long_opts, NULL); + if (i == -1) + break; + + switch (i) { + case 'h': + fputs(usagestr, stdout); + exit(EXIT_SUCCESS); + case 'V': + print_version(); + exit(EXIT_SUCCESS); + default: + goto fail_arg; + } + } + + if (optind >= argc) { + fputs("Missing argument: squashfs image\n", stderr); + goto fail_arg; + } + + filename = argv[optind++]; + + if (optind < argc) { + fputs("Unknown extra arguments\n", stderr); + goto fail_arg; + } + return; +fail_arg: + fputs("Try `tar2sqfs --help' for more information.\n", stderr); + exit(EXIT_FAILURE); +} + +static int create_node_and_repack_data(tar_header_decoded_t *hdr, fstree_t *fs, + data_writer_t *data) +{ + tree_node_t *node; + size_t extra = 0; + + if (S_ISLNK(hdr->mode)) + extra = strlen(hdr->link_target) + 1; + + if (S_ISREG(hdr->mode)) { + node = fstree_add_file(fs, hdr->name, hdr->mode, + hdr->uid, hdr->gid, hdr->size, NULL); + if (node == NULL) + goto fail_errno; + + if (write_data_from_fd(data, node->data.file, + STDIN_FILENO)) { + return -1; + } + + if (skip_padding(STDIN_FILENO, node->data.file->size)) + return -1; + } else { + node = fstree_add(fs, hdr->name, hdr->mode, + hdr->uid, hdr->gid, extra); + if (node == NULL) + goto fail_errno; + + if (S_ISLNK(hdr->mode)) { + strcpy(node->data.slink_target, + hdr->link_target); + } else if (S_ISBLK(hdr->mode) || S_ISCHR(hdr->mode)) { + node->data.devno = makedev(hdr->dev_maj, hdr->dev_min); + } + } + + return 0; +fail_errno: + perror(hdr->name); + return -1; +} + +static int process_tar_ball(fstree_t *fs, data_writer_t *data) +{ + tar_header_decoded_t hdr; + int ret; + + for (;;) { + ret = read_header(STDIN_FILENO, &hdr); + if (ret > 0) + break; + if (ret < 0) + return -1; + + if (hdr.unknown_record) { + fprintf(stderr, "skipping '%s' (unknown entry type)\n", + hdr.name); + if (skip_entry(STDIN_FILENO, hdr.size)) + goto fail; + continue; + } + + if (canonicalize_name(hdr.name)) { + fprintf(stderr, "skipping '%s' (invalid name)\n", + hdr.name); + if (skip_entry(STDIN_FILENO, hdr.size)) + goto fail; + continue; + } + + if (create_node_and_repack_data(&hdr, fs, data)) + goto fail; + + clear_header(&hdr); + } + + return 0; +fail: + clear_header(&hdr); + return -1; +} + +int main(int argc, char **argv) +{ + int outfd, status = EXIT_SUCCESS; + E_SQFS_COMPRESSOR comp_id; + data_writer_t *data; + sqfs_super_t super; + compressor_t *cmp; + id_table_t idtbl; + fstree_t fs; + int ret; + + process_args(argc, argv); + + outfd = open(filename, O_CREAT | O_EXCL | O_RDWR, 0644); + if (outfd < 0) { + perror(filename); + return EXIT_FAILURE; + } + + if (fstree_init(&fs, block_size, def_mtime, def_mode, + def_uid, def_gid)) { + goto out_fd; + } + + comp_id = compressor_get_default(); + + cmp = compressor_create(comp_id, true, block_size, NULL); + if (cmp == NULL) { + fputs("Error creating compressor\n", stderr); + goto out_fs; + } + + if (sqfs_super_init(&super, block_size, def_mtime, comp_id)) + goto out_cmp; + + if (sqfs_super_write(&super, outfd)) + goto out_cmp; + + ret = cmp->write_options(cmp, outfd); + if (ret < 0) + goto out_cmp; + + if (ret > 0) { + super.flags |= SQFS_FLAG_COMPRESSOR_OPTIONS; + super.bytes_used += ret; + } + + data = data_writer_create(&super, cmp, outfd); + if (data == NULL) + goto out_cmp; + + if (id_table_init(&idtbl)) + goto out_data; + + if (process_tar_ball(&fs, data)) + goto out; + + if (data_writer_flush_fragments(data)) + goto out; + + fstree_sort(&fs); + if (fstree_gen_inode_table(&fs)) + goto out; + + super.inode_count = fs.inode_tbl_size - 2; + + if (sqfs_serialize_fstree(outfd, &super, &fs, cmp, &idtbl)) + goto out; + + if (data_writer_write_fragment_table(data)) + goto out; + + if (id_table_write(&idtbl, outfd, &super, cmp)) + goto out; + + if (sqfs_super_write(&super, outfd)) + goto out; + + if (padd_file(outfd, super.bytes_used, devblksize)) + goto out; + + status = EXIT_SUCCESS; +out: + id_table_cleanup(&idtbl); +out_data: + data_writer_destroy(data); +out_cmp: + cmp->destroy(cmp); +out_fs: + fstree_cleanup(&fs); +out_fd: + close(outfd); + return status; +} -- cgit v1.2.3