From b45850388ea5505f4fd50cd659abe2f02aeca0b5 Mon Sep 17 00:00:00 2001 From: David Oberhollenzer Date: Fri, 16 Aug 2019 19:52:42 +0200 Subject: cleanup: internalize deduplication list in data_writer This change removes the need for passing a list of files around for deduplication. Also the deduplication code no longer needs to worry about order, since the file being deduplicated is only added after deduplication is done. Signed-off-by: David Oberhollenzer --- include/data_writer.h | 19 ++++++------------- include/fstree.h | 4 +--- lib/fstree/deduplicate.c | 12 ++++-------- lib/sqfs/data_writer.c | 34 ++++++++++++++++++---------------- mkfs/mkfs.c | 39 +++++++++++++++++---------------------- tar/tar2sqfs.c | 11 ++++------- 6 files changed, 50 insertions(+), 69 deletions(-) diff --git a/include/data_writer.h b/include/data_writer.h index dc63592..f384ce7 100644 --- a/include/data_writer.h +++ b/include/data_writer.h @@ -69,32 +69,25 @@ int data_writer_flush_fragments(data_writer_t *data); Blocks or fragments that are all zero bytes automatically detected, not written out and the sparse file accounting updated accordingly. - The flags argument is a combination of DW_* flags. - - If 'list' is not NULL, it is used for fragment and data block deduplication. - It is assumed that the list is processed in order and scanning stops as soon - as the current file info 'fi' is encountered in the list. + The flags argument is a combination of DW_* flags. After completion the + data writer collects the 'fi' in an internal list it uses for deduplication. Returns 0 on success, prints errors to stderr. */ int write_data_from_fd(data_writer_t *data, file_info_t *fi, int infd, - int flags, file_info_t *list); + int flags); /* Does the same as write_data_from_fd but the input file is the condensed representation of a sparse file. The layout must be in order and non-overlapping. - The flags argument is a combination of DW_* flags. - - If 'list' is not NULL, it is used for fragment and data block deduplication. - It is assumed that the list is processed in order and scanning stops as soon - as the current file info 'fi' is encountered in the list. + The flags argument is a combination of DW_* flags. After completion the + data writer collects the 'fi' in an internal list it uses for deduplication. Returns 0 on success, prints errors to stderr. */ int write_data_from_fd_condensed(data_writer_t *data, file_info_t *fi, - int infd, sparse_map_t *map, int flags, - file_info_t *list); + int infd, sparse_map_t *map, int flags); #endif /* DATA_WRITER_H */ diff --git a/include/fstree.h b/include/fstree.h index cbd3c9c..a5d930e 100644 --- a/include/fstree.h +++ b/include/fstree.h @@ -313,7 +313,6 @@ tree_node_t *fstree_node_from_path(fstree_t *fs, const char *path); /* Walk through 'list' to find a file with a fragment that has the same size ('frag_size') and checksum ('chksum') as 'fi'. - Processing stopps if 'fi' itself is found in the list. Returns NULL if no such fragment could be found. */ @@ -323,8 +322,7 @@ file_info_t *fragment_by_chksum(file_info_t *fi, uint32_t chksum, /* Walk through 'list' to find a file that contains the same sequence of blocks - as 'file', comparing size and checksum. Processing stops if 'file' is found - in the list. + as 'file', comparing size and checksum. Returns NULL if no such fragment could be found. */ diff --git a/lib/fstree/deduplicate.c b/lib/fstree/deduplicate.c index 7390a2c..00815a6 100644 --- a/lib/fstree/deduplicate.c +++ b/lib/fstree/deduplicate.c @@ -16,10 +16,8 @@ file_info_t *fragment_by_chksum(file_info_t *fi, uint32_t chksum, file_info_t *it; for (it = list; it != NULL; it = it->next) { - if (it == fi) { - it = NULL; - break; - } + if (it == fi) + continue; if (!(it->flags & FILE_FLAG_HAS_FRAGMENT)) continue; @@ -82,10 +80,8 @@ uint64_t find_equal_blocks(file_info_t *file, file_info_t *list, } for (it = list; it != NULL; it = it->next) { - if (it == file) { - it = NULL; - break; - } + if (it == file) + continue; if (it->flags & FILE_FLAG_BLOCKS_ARE_DUPLICATE) continue; diff --git a/lib/sqfs/data_writer.c b/lib/sqfs/data_writer.c index 0eeecc3..ea650bd 100644 --- a/lib/sqfs/data_writer.c +++ b/lib/sqfs/data_writer.c @@ -31,6 +31,7 @@ struct data_writer_t { int block_idx; + file_info_t *list; sqfs_super_t *super; compressor_t *cmp; int outfd; @@ -132,10 +133,10 @@ int data_writer_flush_fragments(data_writer_t *data) return 0; } -static int deduplicate_data(data_writer_t *data, file_info_t *fi, - file_info_t *list) +static int deduplicate_data(data_writer_t *data, file_info_t *fi) { - uint64_t ref = find_equal_blocks(fi, list, data->super->block_size); + uint64_t ref = find_equal_blocks(fi, data->list, + data->super->block_size); if (ref > 0) { data->super->bytes_used = fi->startblock; @@ -159,7 +160,7 @@ fail_truncate: } static int flush_data_block(data_writer_t *data, size_t size, bool is_last, - file_info_t *fi, int flags, file_info_t *list) + file_info_t *fi, int flags) { uint32_t out, chksum; file_info_t *ref; @@ -169,7 +170,7 @@ static int flush_data_block(data_writer_t *data, size_t size, bool is_last, fi->blocks[data->block_idx].chksum = 0; fi->sparse += size; data->block_idx++; - return is_last ? deduplicate_data(data, fi, list) : 0; + return is_last ? deduplicate_data(data, fi) : 0; } chksum = update_crc32(0, data->block, size); @@ -177,10 +178,10 @@ static int flush_data_block(data_writer_t *data, size_t size, bool is_last, if (size < data->super->block_size && !(flags & DW_DONT_FRAGMENT)) { fi->flags |= FILE_FLAG_HAS_FRAGMENT; - if (deduplicate_data(data, fi, list)) + if (deduplicate_data(data, fi)) return -1; - ref = fragment_by_chksum(fi, chksum, size, list, + ref = fragment_by_chksum(chksum, size, data->list, data->super->block_size); if (ref != NULL) { @@ -211,7 +212,7 @@ static int flush_data_block(data_writer_t *data, size_t size, bool is_last, fi->blocks[data->block_idx].size = out; data->block_idx++; - if (is_last && deduplicate_data(data, fi, list) != 0) + if (is_last && deduplicate_data(data, fi) != 0) return -1; } @@ -236,16 +237,18 @@ fail_seek: return -1; } -static int end_file(data_writer_t *data, int flags) +static int end_file(data_writer_t *data, file_info_t *fi, int flags) { if ((flags & DW_ALLIGN_DEVBLK) && allign_file(data) != 0) return -1; + fi->next = data->list; + data->list = fi; return 0; } int write_data_from_fd(data_writer_t *data, file_info_t *fi, - int infd, int flags, file_info_t *list) + int infd, int flags) { uint64_t count; bool is_last; @@ -266,16 +269,15 @@ int write_data_from_fd(data_writer_t *data, file_info_t *fi, if (read_data(fi->input_file, infd, data->block, diff)) return -1; - if (flush_data_block(data, diff, is_last, fi, flags, list)) + if (flush_data_block(data, diff, is_last, fi, flags)) return -1; } - return end_file(data, flags); + return end_file(data, fi, flags); } int write_data_from_fd_condensed(data_writer_t *data, file_info_t *fi, - int infd, sparse_map_t *map, int flags, - file_info_t *list) + int infd, sparse_map_t *map, int flags) { size_t start, count, diff; sparse_map_t *m; @@ -330,11 +332,11 @@ int write_data_from_fd_condensed(data_writer_t *data, file_info_t *fi, map = map->next; } - if (flush_data_block(data, diff, is_last, fi, flags, list)) + if (flush_data_block(data, diff, is_last, fi, flags)) return -1; } - return end_file(data, flags); + return end_file(data, fi, flags); fail_map_size: fprintf(stderr, "%s: sparse file map spans beyond file size\n", fi->input_file); diff --git a/mkfs/mkfs.c b/mkfs/mkfs.c index 8ee2268..3dc9efb 100644 --- a/mkfs/mkfs.c +++ b/mkfs/mkfs.c @@ -6,26 +6,6 @@ */ #include "mkfs.h" -static int process_file(data_writer_t *data, file_info_t *fi, bool quiet, - file_info_t *list) -{ - int ret, infd; - - if (!quiet) - printf("packing %s\n", fi->input_file); - - infd = open(fi->input_file, O_RDONLY); - if (infd < 0) { - perror(fi->input_file); - return -1; - } - - ret = write_data_from_fd(data, fi, infd, 0, list); - - close(infd); - return ret; -} - static int set_working_dir(options_t *opt) { const char *ptr; @@ -51,12 +31,27 @@ static int restore_working_dir(options_t *opt) static int pack_files(data_writer_t *data, fstree_t *fs, options_t *opt) { file_info_t *fi; + int ret, infd; if (set_working_dir(opt)) return -1; - for (fi = fs->files; fi != NULL; fi = fi->next) { - if (process_file(data, fi, opt->quiet, fs->files)) + while (fs->files != NULL) { + fi = fs->files; + fs->files = fi->next; + + if (!opt->quiet) + printf("packing %s\n", fi->input_file); + + infd = open(fi->input_file, O_RDONLY); + if (infd < 0) { + perror(fi->input_file); + return -1; + } + + ret = write_data_from_fd(data, fi, infd, 0); + close(infd); + if (ret) return -1; } diff --git a/tar/tar2sqfs.c b/tar/tar2sqfs.c index f774c9d..36f8cd6 100644 --- a/tar/tar2sqfs.c +++ b/tar/tar2sqfs.c @@ -199,20 +199,20 @@ fail_arg: } static int write_file(tar_header_decoded_t *hdr, file_info_t *fi, - data_writer_t *data, file_info_t *list) + data_writer_t *data) { int ret; if (hdr->sparse != NULL) { ret = write_data_from_fd_condensed(data, fi, STDIN_FILENO, - hdr->sparse, 0, list); + hdr->sparse, 0); if (ret) return -1; return skip_padding(STDIN_FILENO, hdr->record_size); } - if (write_data_from_fd(data, fi, STDIN_FILENO, 0, list)) + if (write_data_from_fd(data, fi, STDIN_FILENO, 0)) return -1; return skip_padding(STDIN_FILENO, fi->size); @@ -265,11 +265,8 @@ static int create_node_and_repack_data(tar_header_decoded_t *hdr, fstree_t *fs, } if (S_ISREG(hdr->sb.st_mode)) { - if (write_file(hdr, node->data.file, data, fs->files)) + if (write_file(hdr, node->data.file, data)) return -1; - - node->data.file->next = fs->files; - fs->files = node->data.file; } return 0; -- cgit v1.2.3