From 2f172ede7115d0a2730a3b689131042ba559e272 Mon Sep 17 00:00:00 2001 From: David Oberhollenzer Date: Wed, 19 Jun 2019 15:04:17 +0200 Subject: Split generic tar code off to static library Signed-off-by: David Oberhollenzer --- lib/tar/read_header.c | 415 +++++++++++++++++++++++++++++++++++++++++++++++++ lib/tar/skip.c | 48 ++++++ lib/tar/write_header.c | 248 +++++++++++++++++++++++++++++ 3 files changed, 711 insertions(+) create mode 100644 lib/tar/read_header.c create mode 100644 lib/tar/skip.c create mode 100644 lib/tar/write_header.c (limited to 'lib/tar') diff --git a/lib/tar/read_header.c b/lib/tar/read_header.c new file mode 100644 index 0000000..bdb2d20 --- /dev/null +++ b/lib/tar/read_header.c @@ -0,0 +1,415 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ +#include "util.h" +#include "tar.h" + +#include +#include +#include +#include +#include + +enum { + PAX_SIZE = 0x001, + PAX_UID = 0x002, + PAX_GID = 0x004, + PAX_DEV_MAJ = 0x008, + PAX_DEV_MIN = 0x010, + PAX_NAME = 0x020, + PAX_SLINK_TARGET = 0x040, + PAX_ATIME = 0x080, + PAX_MTIME = 0x100, + PAX_CTIME = 0x200, +}; + +static int read_octal(const char *str, int digits, uint64_t *out) +{ + uint64_t result = 0; + + while (digits > 0 && *str >= '0' && *str <= '7') { + if (result > 0x1FFFFFFFFFFFFFFFUL) { + fputs("numeric overflow parsing tar header\n", stderr); + return -1; + } + + result = (result << 3) | (*(str++) - '0'); + --digits; + } + + *out = result; + return 0; +} + +static int read_binary(const char *str, int digits, uint64_t *out) +{ + uint64_t x, result = 0; + + while (digits > 0) { + if (result > 0x00FFFFFFFFFFFFFFUL) { + fputs("numeric overflow parsing tar header\n", stderr); + return -1; + } + + x = *((const unsigned char *)str++); + result = (result << 8) | x; + --digits; + } + + *out = result; + return 0; +} + +static int read_number(const char *str, int digits, uint64_t *out) +{ + if (*((unsigned char *)str) & 0x80) { + if (read_binary(str, digits, out)) + return -1; + *out &= 0x7FFFFFFFFFFFFFFF; + return 0; + } + + return read_octal(str, digits, out); +} + +static bool is_zero_block(const tar_header_t *hdr) +{ + const unsigned char *ptr = (const unsigned char *)hdr; + + return ptr[0] == '\0' && memcmp(ptr, ptr + 1, sizeof(*hdr) - 1) == 0; +} + +static bool is_checksum_valid(const tar_header_t *hdr) +{ + unsigned int chksum = 0; + tar_header_t copy; + uint64_t ref; + size_t i; + + if (read_octal(hdr->chksum, sizeof(hdr->chksum), &ref)) + return false; + + memcpy(©, hdr, sizeof(*hdr)); + memset(copy.chksum, ' ', sizeof(copy.chksum)); + + for (i = 0; i < sizeof(copy); ++i) + chksum += ((unsigned char *)©)[i]; + + return chksum == ref; +} + +static bool is_magic_valid(const tar_header_t *hdr) +{ + if (memcmp(hdr->magic, TAR_MAGIC, sizeof(hdr->magic)) != 0) + return false; + + if (memcmp(hdr->version, TAR_VERSION, sizeof(hdr->version)) != 0) + return false; + + return true; +} + +static int pax_read_decimal(const char *str, uint64_t *out) +{ + uint64_t result = 0; + + while (*str >= '0' && *str <= '9') { + if (result > 0xFFFFFFFFFFFFFFFFUL / 10) { + fputs("numeric overflow parsing pax header\n", stderr); + return -1; + } + + result = (result * 10) + (*(str++) - '0'); + } + + *out = result; + return 0; +} + +static int read_pax_header(int fd, uint64_t entsize, unsigned int *set_by_pax, + tar_header_decoded_t *out) +{ + char *buffer, *line; + uint64_t field; + ssize_t ret; + uint64_t i; + + buffer = malloc(entsize + 1); + if (buffer == NULL) + goto fail_errno; + + ret = read_retry(fd, buffer, entsize); + if (ret < 0) + goto fail_errno; + if ((size_t)ret < entsize) + goto fail_eof; + + if (skip_padding(fd, entsize)) + goto fail; + + buffer[entsize] = '\0'; + + for (i = 0; i < entsize; ++i) { + while (i < entsize && isspace(buffer[i])) + ++i; + while (i < entsize && isdigit(buffer[i])) + ++i; + while (i < entsize && isspace(buffer[i])) + ++i; + if (i >= entsize) + break; + + line = buffer + i; + + while (i < entsize && buffer[i] != '\n') + ++i; + + buffer[i] = '\0'; + + if (!strncmp(line, "uid=", 4)) { + pax_read_decimal(line + 4, &field); + out->sb.st_uid = field; + *set_by_pax |= PAX_UID; + } else if (!strncmp(line, "gid=", 4)) { + pax_read_decimal(line + 4, &field); + out->sb.st_gid = field; + *set_by_pax |= PAX_GID; + } else if (!strncmp(line, "path=", 5)) { + free(out->name); + out->name = strdup(line + 5); + if (out->name == NULL) + goto fail_errno; + *set_by_pax |= PAX_NAME; + } else if (!strncmp(line, "size=", 5)) { + pax_read_decimal(line + 5, &field); + out->sb.st_size = field; + *set_by_pax |= PAX_SIZE; + } else if (!strncmp(line, "linkpath=", 9)) { + free(out->link_target); + out->link_target = strdup(line + 9); + if (out->link_target == NULL) + goto fail_errno; + *set_by_pax |= PAX_SLINK_TARGET; + } else if (!strncmp(line, "atime=", 6)) { + pax_read_decimal(line + 6, &field); + out->sb.st_atime = field; + *set_by_pax |= PAX_ATIME; + } else if (!strncmp(line, "mtime=", 6)) { + pax_read_decimal(line + 6, &field); + out->sb.st_mtime = field; + *set_by_pax |= PAX_MTIME; + } else if (!strncmp(line, "ctime=", 6)) { + pax_read_decimal(line + 6, &field); + out->sb.st_ctime = field; + *set_by_pax |= PAX_CTIME; + } + } + + free(buffer); + return 0; +fail_errno: + perror("reading pax header"); + goto fail; +fail_eof: + fputs("reading pax header: unexpected end of file\n", stderr); + goto fail; +fail: + free(buffer); + return -1; +} + +static int decode_header(const tar_header_t *hdr, unsigned int set_by_pax, + tar_header_decoded_t *out) +{ + uint64_t field; + size_t count; + + if (!(set_by_pax & PAX_NAME)) { + if (hdr->prefix[0] == '\0') { + count = strlen(hdr->name) + 1; + count += strlen(hdr->prefix) + 1; + + out->name = malloc(count); + + if (out->name != NULL) { + sprintf(out->name, "%s/%s", + hdr->prefix, hdr->name); + } + } else { + out->name = strdup(hdr->name); + } + + if (out->name == NULL) { + perror("decoding filename"); + return -1; + } + } + + if (!(set_by_pax & PAX_SIZE)) { + if (read_number(hdr->size, sizeof(hdr->size), &field)) + return -1; + out->sb.st_size = field; + } + + if (!(set_by_pax & PAX_UID)) { + if (read_number(hdr->uid, sizeof(hdr->uid), &field)) + return -1; + out->sb.st_uid = field; + } + + if (!(set_by_pax & PAX_GID)) { + if (read_number(hdr->gid, sizeof(hdr->gid), &field)) + return -1; + out->sb.st_gid = field; + } + + if (!(set_by_pax & PAX_DEV_MAJ)) { + if (read_number(hdr->devmajor, sizeof(hdr->devmajor), &field)) + return -1; + + out->sb.st_rdev = makedev(field, minor(out->sb.st_rdev)); + } + + if (!(set_by_pax & PAX_DEV_MIN)) { + if (read_number(hdr->devminor, sizeof(hdr->devminor), &field)) + return -1; + + out->sb.st_rdev = makedev(major(out->sb.st_rdev), field); + } + + if (!(set_by_pax & PAX_MTIME)) { + if (read_number(hdr->mtime, sizeof(hdr->mtime), &field)) + return -1; + out->sb.st_mtime = field; + } + + if (!(set_by_pax & PAX_ATIME)) + out->sb.st_atime = out->sb.st_mtime; + + if (!(set_by_pax & PAX_CTIME)) + out->sb.st_ctime = out->sb.st_mtime; + + if (read_octal(hdr->mode, sizeof(hdr->mode), &field)) + return -1; + + out->sb.st_mode = field & 07777; + + if (hdr->typeflag == TAR_TYPE_LINK || + hdr->typeflag == TAR_TYPE_SLINK) { + if (!(set_by_pax & PAX_SLINK_TARGET)) { + out->link_target = strdup(hdr->linkname); + if (out->link_target == NULL) { + perror("decoding symlink target"); + return -1; + } + } + } + + out->unknown_record = false; + + switch (hdr->typeflag) { + case '\0': + case TAR_TYPE_FILE: + out->sb.st_mode |= S_IFREG; + break; + case TAR_TYPE_LINK: + /* XXX: hard links are not support yet */ + out->sb.st_mode = S_IFLNK | 0777; + break; + case TAR_TYPE_SLINK: + out->sb.st_mode = S_IFLNK | 0777; + break; + case TAR_TYPE_CHARDEV: + out->sb.st_mode |= S_IFCHR; + break; + case TAR_TYPE_BLOCKDEV: + out->sb.st_mode |= S_IFBLK; + break; + case TAR_TYPE_DIR: + out->sb.st_mode |= S_IFDIR; + break; + case TAR_TYPE_FIFO: + out->sb.st_mode |= S_IFIFO; + break; + default: + out->unknown_record = true; + break; + } + + return 0; +} + +int read_header(int fd, tar_header_decoded_t *out) +{ + unsigned int set_by_pax = 0; + bool prev_was_zero = false; + uint64_t pax_size; + tar_header_t hdr; + int ret; + + memset(out, 0, sizeof(*out)); + + for (;;) { + ret = read_retry(fd, &hdr, sizeof(hdr)); + if (ret == 0) + goto out_eof; + if (ret < 0) + goto fail_errno; + if (ret < (int)sizeof(hdr)) + goto fail_eof; + + if (is_zero_block(&hdr)) { + if (prev_was_zero) + goto out_eof; + prev_was_zero = true; + continue; + } + + prev_was_zero = false; + + if (!is_magic_valid(&hdr)) + goto fail_magic; + + if (!is_checksum_valid(&hdr)) + goto fail_chksum; + + if (hdr.typeflag == TAR_TYPE_PAX) { + clear_header(out); + if (read_number(hdr.size, sizeof(hdr.size), &pax_size)) + return -1; + set_by_pax = 0; + if (read_pax_header(fd, pax_size, &set_by_pax, out)) + return -1; + continue; + } + break; + } + + if (decode_header(&hdr, set_by_pax, out)) + goto fail; + + return 0; +out_eof: + clear_header(out); + return 1; +fail_errno: + perror("reading tar header"); + goto fail; +fail_eof: + fputs("reading tar header: unexpected end of file\n", stderr); + goto fail; +fail_magic: + fputs("input is not a ustar tar archive!\n", stderr); + goto fail; +fail_chksum: + fputs("invalid tar header checksum!\n", stderr); + goto fail; +fail: + clear_header(out); + return -1; +} + +void clear_header(tar_header_decoded_t *hdr) +{ + free(hdr->name); + free(hdr->link_target); + memset(hdr, 0, sizeof(*hdr)); +} diff --git a/lib/tar/skip.c b/lib/tar/skip.c new file mode 100644 index 0000000..69208b2 --- /dev/null +++ b/lib/tar/skip.c @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ +#include "util.h" +#include "tar.h" + +#include + +static int skip_bytes(int fd, uint64_t size) +{ + unsigned char buffer[1024]; + ssize_t ret; + size_t diff; + + while (size != 0) { + diff = sizeof(buffer); + if (diff > size) + diff = size; + + ret = read_retry(fd, buffer, diff); + + if (ret < 0) { + perror("reading tar record data"); + return -1; + } + + if ((size_t)ret < diff) { + fputs("unexpected end of file\n", stderr); + return -1; + } + + size -= diff; + } + + return 0; +} + +int skip_padding(int fd, uint64_t size) +{ + size_t tail = size % 512; + + return tail ? skip_bytes(fd, 512 - tail) : 0; +} + +int skip_entry(int fd, uint64_t size) +{ + size_t tail = size % 512; + + return skip_bytes(fd, tail ? (size + 512 - tail) : size); +} diff --git a/lib/tar/write_header.c b/lib/tar/write_header.c new file mode 100644 index 0000000..80db327 --- /dev/null +++ b/lib/tar/write_header.c @@ -0,0 +1,248 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ +#include "util.h" +#include "tar.h" + +#include +#include +#include + +static unsigned long pax_hdr_counter = 0; +static char buffer[4096]; + +static void write_octal(char *dst, unsigned int value, int digits) +{ + char temp[64]; + + sprintf(temp, "%0*o ", digits, value); + memcpy(dst, temp, strlen(temp)); +} + +static int name_to_tar_header(tar_header_t *hdr, const char *path) +{ + size_t len = strlen(path); + const char *ptr; + + if ((len + 1) <= sizeof(hdr->name)) { + memcpy(hdr->name, path, len); + return 0; + } + + for (ptr = path; ; ++ptr) { + ptr = strchr(ptr, '/'); + if (ptr == NULL) + return -1; + + len = ptr - path; + if (len >= sizeof(hdr->prefix)) + continue; + if (strlen(ptr + 1) >= sizeof(hdr->name)) + continue; + break; + } + + memcpy(hdr->prefix, path, ptr - path); + memcpy(hdr->name, ptr + 1, strlen(ptr + 1)); + return 0; +} + +static void init_header(tar_header_t *hdr, const struct stat *sb, + const char *name, const char *slink_target) +{ + memset(hdr, 0, sizeof(*hdr)); + + name_to_tar_header(hdr, name); + memcpy(hdr->magic, TAR_MAGIC, sizeof(hdr->magic)); + memcpy(hdr->version, TAR_VERSION, sizeof(hdr->version)); + write_octal(hdr->mode, sb->st_mode & ~S_IFMT, 6); + write_octal(hdr->uid, sb->st_uid, 6); + write_octal(hdr->gid, sb->st_gid, 6); + write_octal(hdr->mtime, sb->st_mtime, 11); + write_octal(hdr->size, 0, 11); + write_octal(hdr->devmajor, 0, 6); + write_octal(hdr->devminor, 0, 6); + + switch (sb->st_mode & S_IFMT) { + case S_IFREG: + write_octal(hdr->size, sb->st_size & 077777777777L, 11); + break; + case S_IFLNK: + if (sb->st_size < (off_t)sizeof(hdr->linkname)) + strcpy(hdr->linkname, slink_target); + break; + case S_IFCHR: + case S_IFBLK: + write_octal(hdr->devmajor, major(sb->st_rdev), 6); + write_octal(hdr->devminor, minor(sb->st_rdev), 6); + break; + } + + sprintf(hdr->uname, "%u", sb->st_uid); + sprintf(hdr->gname, "%u", sb->st_gid); +} + +static void update_checksum(tar_header_t *hdr) +{ + unsigned int chksum = 0; + size_t i; + + memset(hdr->chksum, ' ', sizeof(hdr->chksum)); + + for (i = 0; i < sizeof(*hdr); ++i) + chksum += ((unsigned char *)hdr)[i]; + + write_octal(hdr->chksum, chksum, 6); + hdr->chksum[6] = '\0'; + hdr->chksum[7] = ' '; +} + +static bool need_pax_header(const struct stat *sb, const char *name) +{ + tar_header_t tmp; + + if (sb->st_uid > 0777777 || sb->st_gid > 0777777) + return true; + + if (S_ISREG(sb->st_mode) && sb->st_size > 077777777777L) + return true; + + if (S_ISLNK(sb->st_mode) && sb->st_size >= (off_t)sizeof(tmp.linkname)) + return true; + + if (name_to_tar_header(&tmp, name)) + return true; + + return false; +} + +static char *write_pax_entry(char *dst, const char *key, const char *value) +{ + size_t i, len, prefix = 0, oldprefix; + + do { + len = prefix + 1 + strlen(key) + 1 + strlen(value) + 1; + + oldprefix = prefix; + prefix = 1; + + for (i = len; i >= 10; i /= 10) + ++prefix; + } while (oldprefix != prefix); + + sprintf(dst, "%zu %s=%s\n", len, key, value); + + return dst + len; +} + +static int write_pax_header(int fd, const struct stat *sb, const char *name, + const char *slink_target) +{ + char temp[64], *ptr; + struct stat fakesb; + tar_header_t hdr; + ssize_t ret; + size_t len; + + memset(buffer, 0, sizeof(buffer)); + memset(&fakesb, 0, sizeof(fakesb)); + fakesb.st_mode = S_IFREG | 0644; + + sprintf(temp, "pax%lu", pax_hdr_counter); + init_header(&hdr, &fakesb, temp, NULL); + hdr.typeflag = TAR_TYPE_PAX; + + sprintf(temp, "%u", sb->st_uid); + ptr = buffer; + ptr = write_pax_entry(ptr, "uid", temp); + ptr = write_pax_entry(ptr, "uname", temp); + + sprintf(temp, "%lu", sb->st_mtime); + ptr = write_pax_entry(ptr, "mtime", temp); + + sprintf(temp, "%u", sb->st_gid); + ptr = write_pax_entry(ptr, "gid", temp); + ptr = write_pax_entry(ptr, "gname", temp); + + ptr = write_pax_entry(ptr, "path", name); + + if (S_ISLNK(sb->st_mode)) { + ptr = write_pax_entry(ptr, "linkpath", slink_target); + } else if (S_ISREG(sb->st_mode)) { + sprintf(temp, "%lu", sb->st_size); + ptr = write_pax_entry(ptr, "size", temp); + } + + len = strlen(buffer); + write_octal(hdr.size, len, 11); + update_checksum(&hdr); + + ret = write_retry(fd, &hdr, sizeof(hdr)); + if (ret < 0) + goto fail_wr; + if ((size_t)ret < sizeof(hdr)) + goto fail_trunc; + + ret = write_retry(fd, buffer, len); + if (ret < 0) + goto fail_wr; + if ((size_t)ret < len) + goto fail_trunc; + + return padd_file(fd, len, 512); +fail_wr: + perror("writing pax header"); + return -1; +fail_trunc: + fputs("writing pax header: truncated write\n", stderr); + return -1; +} + +int write_tar_header(int fd, const struct stat *sb, const char *name, + const char *slink_target) +{ + const char *reason; + tar_header_t hdr; + ssize_t ret; + + if (need_pax_header(sb, name)) { + if (write_pax_header(fd, sb, name, slink_target)) + return -1; + + sprintf(buffer, "pax%lu_data", pax_hdr_counter++); + name = buffer; + } + + init_header(&hdr, sb, name, slink_target); + + switch (sb->st_mode & S_IFMT) { + case S_IFCHR: hdr.typeflag = TAR_TYPE_CHARDEV; break; + case S_IFBLK: hdr.typeflag = TAR_TYPE_BLOCKDEV; break; + case S_IFLNK: hdr.typeflag = TAR_TYPE_SLINK; break; + case S_IFREG: hdr.typeflag = TAR_TYPE_FILE; break; + case S_IFDIR: hdr.typeflag = TAR_TYPE_DIR; break; + case S_IFIFO: hdr.typeflag = TAR_TYPE_FIFO; break; + case S_IFSOCK: + reason = "cannot pack socket"; + goto out_skip; + default: + reason = "unknown type"; + goto out_skip; + } + + update_checksum(&hdr); + + ret = write_retry(fd, &hdr, sizeof(hdr)); + + if (ret < 0) { + perror("writing header record"); + } else if ((size_t)ret < sizeof(hdr)) { + fputs("writing header record: truncated write\n", stderr); + ret = -1; + } else { + ret = 0; + } + + return ret; +out_skip: + fprintf(stderr, "WARNING: skipping '%s' (%s)\n", name, reason); + return 1; +} -- cgit v1.2.3