From 1582ca88cb056d84a93cd40731d362083dad8cd4 Mon Sep 17 00:00:00 2001 From: David Oberhollenzer Date: Fri, 28 Jun 2019 11:17:37 +0200 Subject: Add basic support for the GNU tar format Signed-off-by: David Oberhollenzer --- include/tar.h | 26 ++++++++- lib/tar/read_header.c | 145 +++++++++++++++++++++++++++++++++++++++++-------- lib/tar/write_header.c | 4 +- tests/tar_formats.c | 34 ++++++++++++ tests/tar_large_uid.c | 17 ++++++ tests/tar_long_paths.c | 17 ++++++ tests/tar_mtime.c | 34 ++++++++++++ 7 files changed, 249 insertions(+), 28 deletions(-) diff --git a/include/tar.h b/include/tar.h index 3b7c8eb..4da04f5 100644 --- a/include/tar.h +++ b/include/tar.h @@ -29,8 +29,27 @@ typedef struct { char gname[32]; char devmajor[8]; char devminor[8]; - char prefix[155]; - char padding[12]; + union { + struct { + char prefix[155]; + char padding[12]; + } posix; + + struct { + char atime[12]; + char ctime[12]; + char offset[12]; + char deprecated[4]; + char unused; + struct { + char offset[12]; + char numbytes[12]; + } sparse[4]; + char isextended; + char realsize[12]; + char padding[17]; + } gnu; + } tail; } tar_header_t; typedef struct { @@ -48,6 +67,9 @@ typedef struct { #define TAR_TYPE_DIR '5' #define TAR_TYPE_FIFO '6' +#define TAR_TYPE_GNU_SLINK 'K' +#define TAR_TYPE_GNU_PATH 'L' + #define TAR_TYPE_PAX 'x' #define TAR_MAGIC "ustar" diff --git a/lib/tar/read_header.c b/lib/tar/read_header.c index 01e4543..9ef20b6 100644 --- a/lib/tar/read_header.c +++ b/lib/tar/read_header.c @@ -46,36 +46,44 @@ static int read_octal(const char *str, int digits, uint64_t *out) static int read_binary(const char *str, int digits, uint64_t *out) { - uint64_t x, result = 0; + uint64_t x, ov, result; + bool first = true; - while (digits > 0 && isspace(*str)) { - ++str; + while (digits > 0) { + x = *((const unsigned char *)str++); --digits; - } - while (digits > 0) { - if (result > 0x00FFFFFFFFFFFFFFUL) { - fputs("numeric overflow parsing tar header\n", stderr); - return -1; + if (first) { + first = false; + if (x == 0xFF) { + result = 0xFFFFFFFFFFFFFFFFUL; + } else { + x &= 0x7F; + result = 0; + if (digits > 7 && x != 0) + goto fail_ov; + } } - x = *((const unsigned char *)str++); + ov = (result >> 56) & 0xFF; + + if (ov != 0 && ov != 0xFF) + goto fail_ov; + result = (result << 8) | x; - --digits; } *out = result; return 0; +fail_ov: + fputs("numeric overflow parsing tar header\n", stderr); + return -1; } static int read_number(const char *str, int digits, uint64_t *out) { - if (*((unsigned char *)str) & 0x80) { - if (read_binary(str, digits, out)) - return -1; - *out &= 0x7FFFFFFFFFFFFFFF; - return 0; - } + if (*((unsigned char *)str) & 0x80) + return read_binary(str, digits, out); return read_octal(str, digits, out); } @@ -260,13 +268,13 @@ static int decode_header(const tar_header_t *hdr, unsigned int set_by_pax, if (hdr->tail.posix.prefix[0] != '\0' && version == ETV_POSIX) { count = strlen(hdr->name) + 1; - count += strlen(hdr->prefix) + 1; + count += strlen(hdr->tail.posix.prefix) + 1; out->name = malloc(count); if (out->name != NULL) { sprintf(out->name, "%s/%s", - hdr->prefix, hdr->name); + hdr->tail.posix.prefix, hdr->name); } } else { out->name = strdup(hdr->name); @@ -313,14 +321,53 @@ static int decode_header(const tar_header_t *hdr, unsigned int set_by_pax, if (!(set_by_pax & PAX_MTIME)) { if (read_number(hdr->mtime, sizeof(hdr->mtime), &field)) return -1; - out->sb.st_mtime = field; + if (field & 0x8000000000000000UL) { + field = ~field + 1; + out->sb.st_mtime = -((int64_t)field); + } else { + out->sb.st_mtime = field; + } + } + + if (!(set_by_pax & PAX_ATIME)) { + field = out->sb.st_mtime; + + if (version == ETV_PRE_POSIX && + ((uint8_t)hdr->tail.gnu.atime[0] == 0x80 || + (uint8_t)hdr->tail.gnu.atime[0] == 0xFF || + isdigit(hdr->tail.gnu.atime[0]))) { + if (read_number(hdr->tail.gnu.atime, + sizeof(hdr->tail.gnu.atime), &field)) + return -1; + } + + if (field & 0x8000000000000000UL) { + field = ~field + 1; + out->sb.st_atime = -((int64_t)field); + } else { + out->sb.st_atime = field; + } } - if (!(set_by_pax & PAX_ATIME)) - out->sb.st_atime = out->sb.st_mtime; + if (!(set_by_pax & PAX_CTIME)) { + field = out->sb.st_mtime; + + if (version == ETV_PRE_POSIX && + ((uint8_t)hdr->tail.gnu.ctime[0] == 0x80 || + (uint8_t)hdr->tail.gnu.ctime[0] == 0xFF || + isdigit(hdr->tail.gnu.ctime[0]))) { + if (read_number(hdr->tail.gnu.ctime, + sizeof(hdr->tail.gnu.atime), &field)) + return -1; + } - if (!(set_by_pax & PAX_CTIME)) - out->sb.st_ctime = out->sb.st_mtime; + if (field & 0x8000000000000000UL) { + field = ~field + 1; + out->sb.st_ctime = -((int64_t)field); + } else { + out->sb.st_ctime = field; + } + } if (read_octal(hdr->mode, sizeof(hdr->mode), &field)) return -1; @@ -372,6 +419,37 @@ static int decode_header(const tar_header_t *hdr, unsigned int set_by_pax, return 0; } +static char *record_to_memory(int fd, uint64_t size) +{ + char *buffer = malloc(size + 1); + ssize_t ret; + + if (buffer == NULL) + goto fail_errno; + + ret = read_retry(fd, buffer, size); + if (ret == 0) + goto fail_eof; + if (ret < 0) + goto fail_errno; + if ((uint64_t)ret < size) + goto fail_eof; + + if (skip_padding(fd, size)) + goto fail; + + return buffer; +fail_errno: + perror("reading tar record"); + goto fail; +fail_eof: + fputs("reading tar record: unexpected end of file\n", stderr); + goto fail; +fail: + free(buffer); + return NULL; +} + int read_header(int fd, tar_header_decoded_t *out) { unsigned int set_by_pax = 0; @@ -408,7 +486,26 @@ int read_header(int fd, tar_header_decoded_t *out) if (!is_checksum_valid(&hdr)) goto fail_chksum; - if (hdr.typeflag == TAR_TYPE_PAX) { + switch (hdr.typeflag) { + case TAR_TYPE_GNU_SLINK: + if (read_number(hdr.size, sizeof(hdr.size), &pax_size)) + return -1; + free(out->link_target); + out->link_target = record_to_memory(fd, pax_size); + if (out->link_target == NULL) + goto fail; + set_by_pax |= PAX_SLINK_TARGET; + continue; + case TAR_TYPE_GNU_PATH: + if (read_number(hdr.size, sizeof(hdr.size), &pax_size)) + return -1; + free(out->name); + out->name = record_to_memory(fd, pax_size); + if (out->name == NULL) + goto fail; + set_by_pax |= PAX_NAME; + continue; + case TAR_TYPE_PAX: clear_header(out); if (read_number(hdr.size, sizeof(hdr.size), &pax_size)) return -1; diff --git a/lib/tar/write_header.c b/lib/tar/write_header.c index 80db327..d9db24a 100644 --- a/lib/tar/write_header.c +++ b/lib/tar/write_header.c @@ -33,14 +33,14 @@ static int name_to_tar_header(tar_header_t *hdr, const char *path) return -1; len = ptr - path; - if (len >= sizeof(hdr->prefix)) + if (len >= sizeof(hdr->tail.posix.prefix)) continue; if (strlen(ptr + 1) >= sizeof(hdr->name)) continue; break; } - memcpy(hdr->prefix, path, ptr - path); + memcpy(hdr->tail.posix.prefix, path, ptr - path); memcpy(hdr->name, ptr + 1, strlen(ptr + 1)); return 0; } diff --git a/tests/tar_formats.c b/tests/tar_formats.c index 58cd223..a9dcbc3 100644 --- a/tests/tar_formats.c +++ b/tests/tar_formats.c @@ -102,5 +102,39 @@ int main(void) clear_header(&hdr); close(fd); + fd = open_read("format-acceptance/gnu.tar"); + assert(read_header(fd, &hdr) == 0); + assert(hdr.sb.st_mode == (S_IFREG | 0644)); + assert(hdr.sb.st_uid == 01750); + assert(hdr.sb.st_gid == 01750); + assert(hdr.sb.st_size == 5); + assert(hdr.sb.st_mtime == 1542905892); + assert(hdr.sb.st_atime == 1542905892); + assert(hdr.sb.st_ctime == 1542905892); + assert(strcmp(hdr.name, "input.txt") == 0); + assert(!hdr.unknown_record); + assert(read_retry(fd, buffer, 5) == 5); + buffer[5] = '\0'; + assert(strcmp(buffer, "test\n") == 0); + clear_header(&hdr); + close(fd); + + fd = open_read("format-acceptance/gnu-g.tar"); + assert(read_header(fd, &hdr) == 0); + assert(hdr.sb.st_mode == (S_IFREG | 0644)); + assert(hdr.sb.st_uid == 01750); + assert(hdr.sb.st_gid == 01750); + assert(hdr.sb.st_size == 5); + assert(hdr.sb.st_mtime == 013375560044); + assert(hdr.sb.st_atime == 013375561762); + assert(hdr.sb.st_ctime == 013375561750); + assert(strcmp(hdr.name, "input.txt") == 0); + assert(!hdr.unknown_record); + assert(read_retry(fd, buffer, 5) == 5); + buffer[5] = '\0'; + assert(strcmp(buffer, "test\n") == 0); + clear_header(&hdr); + close(fd); + return EXIT_SUCCESS; } diff --git a/tests/tar_large_uid.c b/tests/tar_large_uid.c index 8a86c03..36ef222 100644 --- a/tests/tar_large_uid.c +++ b/tests/tar_large_uid.c @@ -68,5 +68,22 @@ int main(void) clear_header(&hdr); close(fd); + fd = open_read("user-group-largenum/gnu.tar"); + assert(read_header(fd, &hdr) == 0); + assert(hdr.sb.st_mode == (S_IFREG | 0644)); + assert(hdr.sb.st_uid == 0x80000000); + assert(hdr.sb.st_gid == 0x80000000); + assert(hdr.sb.st_size == 5); + assert(hdr.sb.st_mtime == 013376036700); + assert(hdr.sb.st_atime == 013376036700); + assert(hdr.sb.st_ctime == 013376036700); + assert(strcmp(hdr.name, "input.txt") == 0); + assert(!hdr.unknown_record); + assert(read_retry(fd, buffer, 5) == 5); + buffer[5] = '\0'; + assert(strcmp(buffer, "test\n") == 0); + clear_header(&hdr); + close(fd); + return EXIT_SUCCESS; } diff --git a/tests/tar_long_paths.c b/tests/tar_long_paths.c index 2615a61..a19a235 100644 --- a/tests/tar_long_paths.c +++ b/tests/tar_long_paths.c @@ -73,5 +73,22 @@ int main(void) clear_header(&hdr); close(fd); + fd = open_read("long-paths/gnu.tar"); + assert(read_header(fd, &hdr) == 0); + assert(hdr.sb.st_mode == (S_IFREG | 0644)); + assert(hdr.sb.st_uid == 01750); + assert(hdr.sb.st_gid == 01750); + assert(hdr.sb.st_size == 5); + assert(hdr.sb.st_mtime == 1542909670); + assert(hdr.sb.st_atime == 1542909670); + assert(hdr.sb.st_ctime == 1542909670); + assert(strcmp(hdr.name, filename) == 0); + assert(!hdr.unknown_record); + assert(read_retry(fd, buffer, 5) == 5); + buffer[5] = '\0'; + assert(strcmp(buffer, "test\n") == 0); + clear_header(&hdr); + close(fd); + return EXIT_SUCCESS; } diff --git a/tests/tar_mtime.c b/tests/tar_mtime.c index 30d4bc4..b255656 100644 --- a/tests/tar_mtime.c +++ b/tests/tar_mtime.c @@ -51,6 +51,23 @@ int main(void) clear_header(&hdr); close(fd); + fd = open_read("large-mtime/gnu.tar"); + assert(read_header(fd, &hdr) == 0); + assert(hdr.sb.st_mode == (S_IFREG | 0644)); + assert(hdr.sb.st_uid == 01750); + assert(hdr.sb.st_gid == 01750); + assert(hdr.sb.st_size == 5); + assert(hdr.sb.st_mtime == 8589934592); + assert(hdr.sb.st_atime == 8589934592); + assert(hdr.sb.st_ctime == 8589934592); + assert(strcmp(hdr.name, "input.txt") == 0); + assert(!hdr.unknown_record); + assert(read_retry(fd, buffer, 5) == 5); + buffer[5] = '\0'; + assert(strcmp(buffer, "test\n") == 0); + clear_header(&hdr); + close(fd); + fd = open_read("large-mtime/pax.tar"); assert(read_header(fd, &hdr) == 0); assert(hdr.sb.st_mode == (S_IFREG | 0644)); @@ -85,5 +102,22 @@ int main(void) clear_header(&hdr); close(fd); + fd = open_read("negative-mtime/gnu.tar"); + assert(read_header(fd, &hdr) == 0); + assert(hdr.sb.st_mode == (S_IFREG | 0644)); + assert(hdr.sb.st_uid == 01750); + assert(hdr.sb.st_gid == 01750); + assert(hdr.sb.st_size == 5); + assert(hdr.sb.st_mtime == -315622800); + assert(hdr.sb.st_atime == -315622800); + assert(hdr.sb.st_ctime == -315622800); + assert(strcmp(hdr.name, "input.txt") == 0); + assert(!hdr.unknown_record); + assert(read_retry(fd, buffer, 5) == 5); + buffer[5] = '\0'; + assert(strcmp(buffer, "test\n") == 0); + clear_header(&hdr); + close(fd); + return EXIT_SUCCESS; } -- cgit v1.2.3