From e165fb864fe86eda398a14713cf38924cc537adc Mon Sep 17 00:00:00 2001 From: David Oberhollenzer Date: Sat, 14 Sep 2019 17:57:58 +0200 Subject: Remove fstree file flags As a side effect, this requires the data writer to keep track of statistics. Signed-off-by: David Oberhollenzer --- include/data_writer.h | 13 +++++++ include/fstree.h | 11 ------ include/highlevel.h | 6 +-- lib/fstree/optimize_unpack_order.c | 15 ++++++-- lib/sqfshelper/data_reader.c | 6 ++- lib/sqfshelper/data_writer.c | 31 ++++++++++++--- lib/sqfshelper/deserialize_fstree.c | 5 +-- lib/sqfshelper/statistics.c | 72 ++++++----------------------------- lib/sqfshelper/tree_node_from_inode.c | 17 ++------- lib/sqfshelper/tree_node_to_inode.c | 19 +++++---- mkfs/mkfs.c | 2 +- tar/tar2sqfs.c | 2 +- 12 files changed, 89 insertions(+), 110 deletions(-) diff --git a/include/data_writer.h b/include/data_writer.h index 9e82d11..5987cd6 100644 --- a/include/data_writer.h +++ b/include/data_writer.h @@ -16,6 +16,17 @@ typedef struct data_writer_t data_writer_t; +typedef struct { + size_t file_count; + size_t blocks_written; + size_t frag_blocks_written; + size_t duplicate_blocks; + size_t sparse_blocks; + size_t frag_count; + size_t frag_dup; + uint64_t bytes_read; +} data_writer_stats_t; + enum { /* Don't generate fragments, always write the last block to disk as a block, even if it is incomplete. */ @@ -89,4 +100,6 @@ int write_data_from_fd(data_writer_t *data, file_info_t *fi, int infd, int write_data_from_fd_condensed(data_writer_t *data, file_info_t *fi, int infd, sparse_map_t *map, int flags); +data_writer_stats_t *data_writer_get_stats(data_writer_t *data); + #endif /* DATA_WRITER_H */ diff --git a/include/fstree.h b/include/fstree.h index 8d0b952..4c02ce5 100644 --- a/include/fstree.h +++ b/include/fstree.h @@ -27,14 +27,6 @@ typedef struct dir_info_t dir_info_t; typedef struct fstree_t fstree_t; typedef struct tree_xattr_t tree_xattr_t; -enum { - FILE_FLAG_HAS_FRAGMENT = 0x01, - - FILE_FLAG_FRAGMENT_IS_DUPLICATE = 0x02, - - FILE_FLAG_BLOCKS_ARE_DUPLICATE = 0x04, -}; - enum { DIR_SCAN_KEEP_TIME = 0x01, @@ -99,9 +91,6 @@ struct file_info_t { /* Byte offset into the fragment block. */ uint32_t fragment_offset; - /* combination of FILE_FLAG_* flags */ - uint32_t flags; - /* Stores data about each full data block. */ /* Bit (1 << 24) is set if the block is stored uncompressed. */ uint32_t block_size[]; diff --git a/include/highlevel.h b/include/highlevel.h index 4c560f7..8173d6f 100644 --- a/include/highlevel.h +++ b/include/highlevel.h @@ -19,6 +19,7 @@ #include "sqfs/dir.h" #include "sqfs/io.h" #include "data_reader.h" +#include "data_writer.h" #include "fstree.h" #include @@ -63,8 +64,7 @@ int sqfs_serialize_fstree(sqfs_file_t *file, sqfs_super_t *super, fstree_t *fs, */ tree_node_t *tree_node_from_inode(sqfs_inode_generic_t *inode, const sqfs_id_table_t *idtbl, - const char *name, - size_t block_size); + const char *name); /* Restore a file system tree from a squashfs image. The given flags are a @@ -92,7 +92,7 @@ int write_export_table(sqfs_file_t *file, fstree_t *fs, sqfs_super_t *super, sqfs_compressor_t *cmp); /* Print out fancy statistics for squashfs packing tools */ -void sqfs_print_statistics(fstree_t *fs, sqfs_super_t *super); +void sqfs_print_statistics(sqfs_super_t *super, data_writer_stats_t *stats); /* Open a squashfs file, extract all the information we may need and construct datastructures we need to access its contents. diff --git a/lib/fstree/optimize_unpack_order.c b/lib/fstree/optimize_unpack_order.c index ad036e8..51576bd 100644 --- a/lib/fstree/optimize_unpack_order.c +++ b/lib/fstree/optimize_unpack_order.c @@ -7,6 +7,15 @@ #include "config.h" #include "fstree.h" +static bool has_fragment(const fstree_t *fs, const file_info_t *file) +{ + if (file->size % fs->block_size == 0) + return false; + + return file->fragment_offset < fs->block_size && + (file->fragment != 0xFFFFFFFF); +} + static int compare_files(const fstree_t *fs, const file_info_t *lhs, const file_info_t *rhs) { @@ -20,8 +29,8 @@ static int compare_files(const fstree_t *fs, const file_info_t *lhs, /* Files with fragments come first, ordered by ID. In case of tie, files without data blocks come first, and the others are ordered by start block. */ - if (lhs->flags & FILE_FLAG_HAS_FRAGMENT) { - if (!(rhs->flags & FILE_FLAG_HAS_FRAGMENT)) + if (has_fragment(fs, lhs)) { + if (!(has_fragment(fs, rhs))) return -1; if (lhs->fragment < rhs->fragment) @@ -36,7 +45,7 @@ static int compare_files(const fstree_t *fs, const file_info_t *lhs, goto order_by_start; } - if (rhs->flags & FILE_FLAG_HAS_FRAGMENT) + if (has_fragment(fs, rhs)) return 1; /* order the rest by start block */ diff --git a/lib/sqfshelper/data_reader.c b/lib/sqfshelper/data_reader.c index 6906933..42a351f 100644 --- a/lib/sqfshelper/data_reader.c +++ b/lib/sqfshelper/data_reader.c @@ -181,7 +181,8 @@ int data_reader_dump_file(data_reader_t *data, file_info_t *fi, int outfd, off_t off = fi->startblock; size_t i, diff; - if (fragsz != 0 && !(fi->flags & FILE_FLAG_HAS_FRAGMENT)) { + if (fragsz != 0 && (fi->fragment_offset >= data->block_size || + fi->fragment == 0xFFFFFFFF)) { fragsz = 0; ++count; } @@ -249,7 +250,8 @@ ssize_t data_reader_read(data_reader_t *data, file_info_t *fi, fragsz = fi->size % data->block_size; count = fi->size / data->block_size; - if (fragsz != 0 && !(fi->flags & FILE_FLAG_HAS_FRAGMENT)) { + if (fragsz != 0 && (fi->fragment_offset >= data->block_size || + fi->fragment == 0xFFFFFFFF)) { fragsz = 0; ++count; } diff --git a/lib/sqfshelper/data_writer.c b/lib/sqfshelper/data_writer.c index 7d84762..43d5851 100644 --- a/lib/sqfshelper/data_writer.c +++ b/lib/sqfshelper/data_writer.c @@ -58,6 +58,8 @@ struct data_writer_t { size_t frag_list_num; size_t frag_list_max; frag_info_t *frag_list; + + data_writer_stats_t stats; }; enum { @@ -146,12 +148,16 @@ static int block_callback(void *user, sqfs_block_t *blk) data->super->flags &= ~SQFS_FLAG_NO_FRAGMENTS; data->super->flags |= SQFS_FLAG_ALWAYS_FRAGMENTS; + + data->stats.frag_blocks_written += 1; } else { fi->block_size[blk->index] = htole32(out); if (store_block_location(data, offset, out, blk->checksum)) return -1; + + data->stats.blocks_written += 1; } if (data->file->write_at(data->file, offset, @@ -170,9 +176,10 @@ static int block_callback(void *user, sqfs_block_t *blk) fi->startblock = data->blocks[start].offset; if (start + count < data->file_start) { - fi->flags |= FILE_FLAG_BLOCKS_ARE_DUPLICATE; data->num_blocks = data->file_start; + data->stats.duplicate_blocks += count; + if (data->file->truncate(data->file, data->start)) { perror("truncating squashfs image after " "file deduplication"); @@ -227,8 +234,9 @@ static int handle_fragment(data_writer_t *data, sqfs_block_t *frag) if (data->frag_list[i].signature == signature) { fi->fragment_offset = data->frag_list[i].offset; fi->fragment = data->frag_list[i].index; - fi->flags |= FILE_FLAG_FRAGMENT_IS_DUPLICATE; free(frag); + + data->stats.frag_dup += 1; return 0; } } @@ -282,6 +290,8 @@ static int handle_fragment(data_writer_t *data, sqfs_block_t *frag) data->frag_block->size += frag->size; free(frag); + + data->stats.frag_count += 1; return 0; fail: free(frag); @@ -360,6 +370,8 @@ int write_data_from_fd(data_writer_t *data, file_info_t *fi, blk->index = i++; if (is_zero_block(blk->data, blk->size)) { + data->stats.sparse_blocks += 1; + fi->block_size[blk->index] = 0; free(blk); continue; @@ -367,8 +379,6 @@ int write_data_from_fd(data_writer_t *data, file_info_t *fi, if (diff < data->super->block_size && !(flags & DW_DONT_FRAGMENT)) { - fi->flags |= FILE_FLAG_HAS_FRAGMENT; - if (!(blk_flags & (BLK_FIRST_BLOCK | BLK_LAST_BLOCK))) { blk_flags |= BLK_LAST_BLOCK; @@ -395,6 +405,8 @@ int write_data_from_fd(data_writer_t *data, file_info_t *fi, return -1; } + data->stats.bytes_read += fi->size; + data->stats.file_count += 1; return 0; } @@ -494,6 +506,8 @@ int write_data_from_fd_condensed(data_writer_t *data, file_info_t *fi, } if (is_zero_block(blk->data, blk->size)) { + data->stats.sparse_blocks += 1; + fi->block_size[blk->index] = 0; free(blk); continue; @@ -501,8 +515,6 @@ int write_data_from_fd_condensed(data_writer_t *data, file_info_t *fi, if (diff < data->super->block_size && !(flags & DW_DONT_FRAGMENT)) { - fi->flags |= FILE_FLAG_HAS_FRAGMENT; - if (!(blk_flags & (BLK_FIRST_BLOCK | BLK_LAST_BLOCK))) { blk_flags |= BLK_LAST_BLOCK; @@ -529,6 +541,8 @@ int write_data_from_fd_condensed(data_writer_t *data, file_info_t *fi, return -1; } + data->stats.bytes_read += fi->size; + data->stats.file_count += 1; return 0; } @@ -623,3 +637,8 @@ int data_writer_sync(data_writer_t *data) return sqfs_block_processor_finish(data->proc); } + +data_writer_stats_t *data_writer_get_stats(data_writer_t *data) +{ + return &data->stats; +} diff --git a/lib/sqfshelper/deserialize_fstree.c b/lib/sqfshelper/deserialize_fstree.c index 6c536c3..ef000d4 100644 --- a/lib/sqfshelper/deserialize_fstree.c +++ b/lib/sqfshelper/deserialize_fstree.c @@ -155,8 +155,7 @@ static int fill_dir(sqfs_meta_reader_t *ir, sqfs_meta_reader_t *dr, } n = tree_node_from_inode(inode, idtbl, - (char *)ent->name, - fs->block_size); + (char *)ent->name); if (n == NULL) { free(ent); @@ -283,7 +282,7 @@ int deserialize_fstree(fstree_t *out, sqfs_super_t *super, out->defaults.st_mode = 0755; out->defaults.st_mtime = super->modification_time; - out->root = tree_node_from_inode(root, idtbl, "", out->block_size); + out->root = tree_node_from_inode(root, idtbl, ""); if (out->root == NULL) { free(root); diff --git a/lib/sqfshelper/statistics.c b/lib/sqfshelper/statistics.c index 0fe325b..fda4c3f 100644 --- a/lib/sqfshelper/statistics.c +++ b/lib/sqfshelper/statistics.c @@ -9,73 +9,25 @@ #include -void sqfs_print_statistics(fstree_t *fs, sqfs_super_t *super) +void sqfs_print_statistics(sqfs_super_t *super, data_writer_stats_t *stats) { - size_t blocks_written = 0, duplicate_blocks = 0, sparse_blocks = 0; - size_t ratio, file_count = 0, file_dup_count = 0; - size_t frag_count = 0, frag_dup = 0; - size_t i, num_blocks, sparse; - uint64_t output_bytes = 0; - uint64_t input_bytes = 0; - file_info_t *fi; - bool is_dupe; + uint64_t bytes_written = super->inode_table_start - sizeof(*super); + size_t ratio; - for (fi = fs->files; fi != NULL; fi = fi->next) { - num_blocks = fi->size / fs->block_size; - is_dupe = true; - - if ((fi->size % fs->block_size) && - !(fi->flags & FILE_FLAG_HAS_FRAGMENT)) { - ++num_blocks; - } - - for (sparse = 0, i = 0; i < num_blocks; ++i) { - if (fi->block_size[i] == 0) - sparse += 1; - } - - if (num_blocks > sparse) { - if (fi->flags & FILE_FLAG_BLOCKS_ARE_DUPLICATE) { - duplicate_blocks += num_blocks - sparse; - } else { - blocks_written += num_blocks - sparse; - is_dupe = false; - } - } - - if (fi->flags & FILE_FLAG_HAS_FRAGMENT) { - if (fi->flags & FILE_FLAG_FRAGMENT_IS_DUPLICATE) { - frag_dup += 1; - } else { - frag_count += 1; - is_dupe = false; - } - } - - if (is_dupe) - file_dup_count += 1; - - sparse_blocks += sparse; - file_count += 1; - input_bytes += fi->size; - } - - if (input_bytes > 0) { - output_bytes = super->inode_table_start - sizeof(*super); - ratio = (100 * output_bytes) / input_bytes; + if (bytes_written > 0) { + ratio = (100 * bytes_written) / stats->bytes_read; } else { ratio = 100; } fputs("---------------------------------------------------\n", stdout); - printf("Input files processed: %zu\n", file_count); - printf("Files that were complete duplicates: %zu\n", file_dup_count); - printf("Data blocks actually written: %zu\n", blocks_written); - printf("Fragment blocks written: %u\n", super->fragment_entry_count); - printf("Duplicate data blocks omitted: %zu\n", duplicate_blocks); - printf("Sparse blocks omitted: %zu\n", sparse_blocks); - printf("Fragments actually written: %zu\n", frag_count); - printf("Duplicated fragments omitted: %zu\n", frag_dup); + printf("Input files processed: %zu\n", stats->file_count); + printf("Data blocks actually written: %zu\n", stats->blocks_written); + printf("Fragment blocks written: %zu\n", stats->frag_blocks_written); + printf("Duplicate data blocks omitted: %zu\n", stats->duplicate_blocks); + printf("Sparse blocks omitted: %zu\n", stats->sparse_blocks); + printf("Fragments actually written: %zu\n", stats->frag_count); + printf("Duplicated fragments omitted: %zu\n", stats->frag_dup); printf("Total number of inodes: %u\n", super->inode_count); printf("Number of unique group/user IDs: %u\n", super->id_count); printf("Data compression ratio: %zu%%\n", ratio); diff --git a/lib/sqfshelper/tree_node_from_inode.c b/lib/sqfshelper/tree_node_from_inode.c index fee191b..f5b643c 100644 --- a/lib/sqfshelper/tree_node_from_inode.c +++ b/lib/sqfshelper/tree_node_from_inode.c @@ -38,18 +38,10 @@ static size_t compute_size(sqfs_inode_generic_t *inode, const char *name) return size; } -static void copy_block_sizes(sqfs_inode_generic_t *inode, tree_node_t *out, - size_t block_size) +static void copy_block_sizes(sqfs_inode_generic_t *inode, tree_node_t *out) { size_t i; - if ((out->data.file->size % block_size) != 0) { - if (out->data.file->fragment != 0xFFFFFFFF && - out->data.file->fragment_offset != 0xFFFFFFFF) { - out->data.file->flags |= FILE_FLAG_HAS_FRAGMENT; - } - } - out->name += inode->num_file_blocks * sizeof(out->data.file->block_size[0]); @@ -59,8 +51,7 @@ static void copy_block_sizes(sqfs_inode_generic_t *inode, tree_node_t *out, tree_node_t *tree_node_from_inode(sqfs_inode_generic_t *inode, const sqfs_id_table_t *idtbl, - const char *name, - size_t block_size) + const char *name) { tree_node_t *out; @@ -112,7 +103,7 @@ tree_node_t *tree_node_from_inode(sqfs_inode_generic_t *inode, out->data.file->fragment_offset = inode->data.file.fragment_offset; - copy_block_sizes(inode, out, block_size); + copy_block_sizes(inode, out); break; case SQFS_INODE_EXT_FILE: out->data.file = (file_info_t *)out->payload; @@ -125,7 +116,7 @@ tree_node_t *tree_node_from_inode(sqfs_inode_generic_t *inode, out->data.file->fragment_offset = inode->data.file_ext.fragment_offset; - copy_block_sizes(inode, out, block_size); + copy_block_sizes(inode, out); break; case SQFS_INODE_SLINK: case SQFS_INODE_EXT_SLINK: diff --git a/lib/sqfshelper/tree_node_to_inode.c b/lib/sqfshelper/tree_node_to_inode.c index cc76a8d..2fdc21b 100644 --- a/lib/sqfshelper/tree_node_to_inode.c +++ b/lib/sqfshelper/tree_node_to_inode.c @@ -81,6 +81,15 @@ static int get_type(tree_node_t *node) assert(0); } +static bool has_fragment(const fstree_t *fs, const file_info_t *file) +{ + if (file->size % fs->block_size == 0) + return false; + + return file->fragment_offset < fs->block_size && + (file->fragment != 0xFFFFFFFF); +} + sqfs_inode_generic_t *tree_node_to_inode(fstree_t *fs, sqfs_id_table_t *idtbl, tree_node_t *node) { @@ -97,10 +106,8 @@ sqfs_inode_generic_t *tree_node_to_inode(fstree_t *fs, sqfs_id_table_t *idtbl, block_count = fi->size / fs->block_size; - if ((fi->size % fs->block_size) != 0 && - !(fi->flags & FILE_FLAG_HAS_FRAGMENT)) { + if ((fi->size % fs->block_size) != 0 && !has_fragment(fs, fi)) ++block_count; - } extra = block_count * sizeof(uint32_t); } @@ -176,8 +183,7 @@ sqfs_inode_generic_t *tree_node_to_inode(fstree_t *fs, sqfs_id_table_t *idtbl, inode->data.file.fragment_offset = 0xFFFFFFFF; inode->data.file.file_size = fi->size; - if ((fi->size % fs->block_size) != 0 && - (fi->flags & FILE_FLAG_HAS_FRAGMENT)) { + if (has_fragment(fs, fi)) { inode->data.file.fragment_index = fi->fragment; inode->data.file.fragment_offset = fi->fragment_offset; } @@ -191,8 +197,7 @@ sqfs_inode_generic_t *tree_node_to_inode(fstree_t *fs, sqfs_id_table_t *idtbl, inode->data.file_ext.fragment_offset = 0xFFFFFFFF; inode->data.file_ext.xattr_idx = xattr; - if ((fi->size % fs->block_size) != 0 && - (fi->flags & FILE_FLAG_HAS_FRAGMENT)) { + if (has_fragment(fs, fi)) { inode->data.file_ext.fragment_idx = fi->fragment; inode->data.file_ext.fragment_offset = fi->fragment_offset; diff --git a/mkfs/mkfs.c b/mkfs/mkfs.c index 0c4e9a5..a046cbd 100644 --- a/mkfs/mkfs.c +++ b/mkfs/mkfs.c @@ -196,7 +196,7 @@ int main(int argc, char **argv) if (!opt.quiet) { fstree_gen_file_list(&fs); - sqfs_print_statistics(&fs, &super); + sqfs_print_statistics(&super, data_writer_get_stats(data)); } status = EXIT_SUCCESS; diff --git a/tar/tar2sqfs.c b/tar/tar2sqfs.c index 5f53547..13276b8 100644 --- a/tar/tar2sqfs.c +++ b/tar/tar2sqfs.c @@ -466,7 +466,7 @@ int main(int argc, char **argv) if (!quiet) { fstree_gen_file_list(&fs); - sqfs_print_statistics(&fs, &super); + sqfs_print_statistics(&super, data_writer_get_stats(data)); } status = EXIT_SUCCESS; -- cgit v1.2.3