From 8c449c503f4c37f72089a0dc50e00631a22ed12a Mon Sep 17 00:00:00 2001 From: David Oberhollenzer Date: Sat, 29 Jun 2019 02:26:11 +0200 Subject: Add support for reading old style GNU sparse tar file format Signed-off-by: David Oberhollenzer --- include/tar.h | 17 +++++++ lib/tar/read_header.c | 116 ++++++++++++++++++++++++++++++++++++++++++++ tests/Makemodule.am | 8 +++- tests/tar_sparse_gnu.c | 127 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 266 insertions(+), 2 deletions(-) create mode 100644 tests/tar_sparse_gnu.c diff --git a/include/tar.h b/include/tar.h index 4da04f5..6fdb22d 100644 --- a/include/tar.h +++ b/include/tar.h @@ -52,10 +52,26 @@ typedef struct { } tail; } tar_header_t; +typedef struct { + struct { + char offset[12]; + char numbytes[12]; + } sparse[21]; + char isextended; + char padding[7]; +} gnu_sparse_t; + +typedef struct tar_sparse_data_t { + struct tar_sparse_data_t *next; + uint64_t offset; + uint64_t count; +} tar_sparse_data_t; + typedef struct { struct stat sb; char *name; char *link_target; + tar_sparse_data_t *sparse; bool unknown_record; } tar_header_decoded_t; @@ -69,6 +85,7 @@ typedef struct { #define TAR_TYPE_GNU_SLINK 'K' #define TAR_TYPE_GNU_PATH 'L' +#define TAR_TYPE_GNU_SPARSE 'S' #define TAR_TYPE_PAX 'x' diff --git a/lib/tar/read_header.c b/lib/tar/read_header.c index 76634e1..44a38f9 100644 --- a/lib/tar/read_header.c +++ b/lib/tar/read_header.c @@ -390,6 +390,7 @@ static int decode_header(const tar_header_t *hdr, unsigned int set_by_pax, switch (hdr->typeflag) { case '\0': case TAR_TYPE_FILE: + case TAR_TYPE_GNU_SPARSE: out->sb.st_mode |= S_IFREG; break; case TAR_TYPE_LINK: @@ -450,6 +451,104 @@ fail: return NULL; } +static void free_sparse_list(tar_sparse_data_t *sparse) +{ + tar_sparse_data_t *old; + + while (sparse != NULL) { + old = sparse; + sparse = sparse->next; + free(old); + } +} + +static tar_sparse_data_t *read_gnu_old_sparse(int fd, tar_header_t *hdr) +{ + tar_sparse_data_t *list = NULL, *end = NULL, *node; + gnu_sparse_t sph; + uint64_t off, sz; + ssize_t ret; + int i; + + for (i = 0; i < 4; ++i) { + if (!isdigit(hdr->tail.gnu.sparse[i].offset[0])) + break; + if (!isdigit(hdr->tail.gnu.sparse[i].numbytes[0])) + break; + + if (read_octal(hdr->tail.gnu.sparse[i].offset, + sizeof(hdr->tail.gnu.sparse[i].offset), &off)) + goto fail; + if (read_octal(hdr->tail.gnu.sparse[i].numbytes, + sizeof(hdr->tail.gnu.sparse[i].numbytes), &sz)) + goto fail; + + node = calloc(1, sizeof(*node)); + if (node == NULL) + goto fail_errno; + + node->offset = off; + node->count = sz; + + if (list == NULL) { + list = end = node; + } else { + end->next = node; + end = node; + } + } + + if (hdr->tail.gnu.isextended == 0) + return list; + + do { + ret = read_retry(fd, &sph, sizeof(sph)); + if (ret < 0) + goto fail_errno; + if ((size_t)ret < sizeof(sph)) + goto fail_eof; + + for (i = 0; i < 21; ++i) { + if (!isdigit(sph.sparse[i].offset[0])) + break; + if (!isdigit(sph.sparse[i].numbytes[0])) + break; + + if (read_octal(sph.sparse[i].offset, + sizeof(sph.sparse[i].offset), &off)) + goto fail; + if (read_octal(sph.sparse[i].numbytes, + sizeof(sph.sparse[i].numbytes), &sz)) + goto fail; + + node = calloc(1, sizeof(*node)); + if (node == NULL) + goto fail_errno; + + node->offset = off; + node->count = sz; + + if (list == NULL) { + list = end = node; + } else { + end->next = node; + end = node; + } + } + } while (sph.isextended != 0); + + return list; +fail_eof: + fputs("parsing GNU sparse header: unexpected end of file", stderr); + goto fail; +fail_errno: + perror("parsing GNU sparse header"); + goto fail; +fail: + free_sparse_list(list); + return NULL; +} + int read_header(int fd, tar_header_decoded_t *out) { unsigned int set_by_pax = 0; @@ -513,6 +612,22 @@ int read_header(int fd, tar_header_decoded_t *out) if (read_pax_header(fd, pax_size, &set_by_pax, out)) goto fail; continue; + case TAR_TYPE_GNU_SPARSE: + if (!(set_by_pax & PAX_SIZE)) { + if (read_number(hdr.tail.gnu.realsize, + sizeof(hdr.tail.gnu.realsize), + &pax_size)) + goto fail; + + out->sb.st_size = pax_size; + set_by_pax |= PAX_SIZE; + } + + free_sparse_list(out->sparse); + out->sparse = read_gnu_old_sparse(fd, &hdr); + if (out->sparse == NULL) + goto fail; + break; } break; } @@ -543,6 +658,7 @@ fail: void clear_header(tar_header_decoded_t *hdr) { + free_sparse_list(hdr->sparse); free(hdr->name); free(hdr->link_target); memset(hdr, 0, sizeof(*hdr)); diff --git a/tests/Makemodule.am b/tests/Makemodule.am index e9cb8c4..004f83c 100644 --- a/tests/Makemodule.am +++ b/tests/Makemodule.am @@ -46,16 +46,20 @@ test_tar_ustar_SOURCES = tests/tar_ustar.c test_tar_ustar_LDADD = libtar.a libutil.a test_tar_ustar_CPPFLAGS = $(AM_CPPFLAGS) -DTESTPATH=$(top_srcdir)/tests/tar +test_tar_sparse_gnu_SOURCES = tests/tar_sparse_gnu.c +test_tar_sparse_gnu_LDADD = libtar.a libutil.a +test_tar_sparse_gnu_CPPFLAGS = $(AM_CPPFLAGS) -DTESTPATH=$(top_srcdir)/tests/tar + check_PROGRAMS += test_canonicalize_name test_mknode_simple test_mknode_slink check_PROGRAMS += test_mknode_reg test_mknode_dir test_gen_inode_table check_PROGRAMS += test_add_by_path test_get_path test_fstree_sort check_PROGRAMS += test_fstree_from_file test_fstree_init test_fstree_xattr -check_PROGRAMS += test_tar_ustar test_tar_pax test_tar_gnu +check_PROGRAMS += test_tar_ustar test_tar_pax test_tar_gnu test_tar_sparse_gnu TESTS += test_canonicalize_name test_mknode_simple test_mknode_slink TESTS += test_mknode_reg test_mknode_dir test_gen_inode_table TESTS += test_add_by_path test_get_path test_fstree_sort test_fstree_from_file TESTS += test_fstree_init test_fstree_xattr test_tar_ustar test_tar_pax -TESTS += test_tar_gnu +TESTS += test_tar_gnu test_tar_sparse_gnu EXTRA_DIST += $(top_srcdir)/tests/tar diff --git a/tests/tar_sparse_gnu.c b/tests/tar_sparse_gnu.c new file mode 100644 index 0000000..2f2e1c5 --- /dev/null +++ b/tests/tar_sparse_gnu.c @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ +#include "util.h" +#include "tar.h" + +#include +#include +#include +#include +#include +#include + +#define STR(x) #x +#define STRVALUE(x) STR(x) + +#define TEST_PATH STRVALUE(TESTPATH) + +static int open_read(const char *path) +{ + int fd = open(path, O_RDONLY); + + if (fd < 0) { + perror(path); + exit(EXIT_FAILURE); + } + + return fd; +} + +int main(void) +{ + tar_sparse_data_t *sparse; + tar_header_decoded_t hdr; + int fd; + + assert(chdir(TEST_PATH) == 0); + + fd = open_read("sparse-files/gnu-small.tar"); + assert(read_header(fd, &hdr) == 0); + assert(hdr.sb.st_mode == (S_IFREG | 0644)); + assert(hdr.sb.st_uid == 01750); + assert(hdr.sb.st_gid == 01750); + assert(hdr.sb.st_size == 524288); + assert(strcmp(hdr.name, "input.bin") == 0); + assert(!hdr.unknown_record); + + sparse = hdr.sparse; + assert(sparse != NULL); + assert(sparse->offset == 0); + assert(sparse->count == 4096); + + sparse = sparse->next; + assert(sparse != NULL); + assert(sparse->offset == 262144); + assert(sparse->count == 4096); + + sparse = sparse->next; + assert(sparse != NULL); + assert(sparse->offset == 524288); + assert(sparse->count == 0); + + assert(sparse->next == NULL); + + clear_header(&hdr); + close(fd); + + fd = open_read("sparse-files/gnu.tar"); + assert(read_header(fd, &hdr) == 0); + assert(hdr.sb.st_mode == (S_IFREG | 0644)); + assert(hdr.sb.st_uid == 01750); + assert(hdr.sb.st_gid == 01750); + assert(hdr.sb.st_size == 2097152); + assert(strcmp(hdr.name, "input.bin") == 0); + assert(!hdr.unknown_record); + + sparse = hdr.sparse; + assert(sparse != NULL); + assert(sparse->offset == 0); + assert(sparse->count == 4096); + + sparse = sparse->next; + assert(sparse != NULL); + assert(sparse->offset == 262144); + assert(sparse->count == 4096); + + sparse = sparse->next; + assert(sparse != NULL); + assert(sparse->offset == 524288); + assert(sparse->count == 4096); + + sparse = sparse->next; + assert(sparse != NULL); + assert(sparse->offset == 786432); + assert(sparse->count == 4096); + + sparse = sparse->next; + assert(sparse != NULL); + assert(sparse->offset == 1048576); + assert(sparse->count == 4096); + + sparse = sparse->next; + assert(sparse != NULL); + assert(sparse->offset == 1310720); + assert(sparse->count == 4096); + + sparse = sparse->next; + assert(sparse != NULL); + assert(sparse->offset == 1572864); + assert(sparse->count == 4096); + + sparse = sparse->next; + assert(sparse != NULL); + assert(sparse->offset == 1835008); + assert(sparse->count == 4096); + + sparse = sparse->next; + assert(sparse != NULL); + assert(sparse->offset == 2097152); + assert(sparse->count == 0); + + sparse = sparse->next; + assert(sparse == NULL); + + clear_header(&hdr); + close(fd); + + return EXIT_SUCCESS; +} -- cgit v1.2.3