diff options
| author | David Oberhollenzer <david.oberhollenzer@sigma-star.at> | 2019-08-16 19:52:42 +0200 | 
|---|---|---|
| committer | David Oberhollenzer <david.oberhollenzer@sigma-star.at> | 2019-08-18 22:44:39 +0200 | 
| commit | b45850388ea5505f4fd50cd659abe2f02aeca0b5 (patch) | |
| tree | 994f878caf990d1569831762f0b0f4d787ab9839 | |
| parent | 1a95478b8d340c8b6b9dbff4f38f9293388fd1a3 (diff) | |
cleanup: internalize deduplication list in data_writer
This change removes the need for passing a list of files around for
deduplication. Also the deduplication code no longer needs to worry
about order, since the file being deduplicated is only added after
deduplication is done.
Signed-off-by: David Oberhollenzer <david.oberhollenzer@sigma-star.at>
| -rw-r--r-- | include/data_writer.h | 19 | ||||
| -rw-r--r-- | include/fstree.h | 4 | ||||
| -rw-r--r-- | lib/fstree/deduplicate.c | 12 | ||||
| -rw-r--r-- | lib/sqfs/data_writer.c | 34 | ||||
| -rw-r--r-- | mkfs/mkfs.c | 39 | ||||
| -rw-r--r-- | tar/tar2sqfs.c | 11 | 
6 files changed, 50 insertions, 69 deletions
diff --git a/include/data_writer.h b/include/data_writer.h index dc63592..f384ce7 100644 --- a/include/data_writer.h +++ b/include/data_writer.h @@ -69,32 +69,25 @@ int data_writer_flush_fragments(data_writer_t *data);    Blocks or fragments that are all zero bytes automatically detected,    not written out and the sparse file accounting updated accordingly. -  The flags argument is a combination of DW_* flags. - -  If 'list' is not NULL, it is used for fragment and data block deduplication. -  It is assumed that the list is processed in order and scanning stops as soon -  as the current file info 'fi' is encountered in the list. +  The flags argument is a combination of DW_* flags. After completion the +  data writer collects the 'fi' in an internal list it uses for deduplication.    Returns 0 on success, prints errors to stderr.  */  int write_data_from_fd(data_writer_t *data, file_info_t *fi, int infd, -		       int flags, file_info_t *list); +		       int flags);  /*    Does the same as write_data_from_fd but the input file is the condensed    representation of a sparse file. The layout must be in order and    non-overlapping. -  The flags argument is a combination of DW_* flags. - -  If 'list' is not NULL, it is used for fragment and data block deduplication. -  It is assumed that the list is processed in order and scanning stops as soon -  as the current file info 'fi' is encountered in the list. +  The flags argument is a combination of DW_* flags. After completion the +  data writer collects the 'fi' in an internal list it uses for deduplication.    Returns 0 on success, prints errors to stderr.   */  int write_data_from_fd_condensed(data_writer_t *data, file_info_t *fi, -				 int infd, sparse_map_t *map, int flags, -				 file_info_t *list); +				 int infd, sparse_map_t *map, int flags);  #endif /* DATA_WRITER_H */ diff --git a/include/fstree.h b/include/fstree.h index cbd3c9c..a5d930e 100644 --- a/include/fstree.h +++ b/include/fstree.h @@ -313,7 +313,6 @@ tree_node_t *fstree_node_from_path(fstree_t *fs, const char *path);  /*    Walk through 'list' to find a file with a fragment that has    the same size ('frag_size') and checksum ('chksum') as 'fi'. -  Processing stopps if 'fi' itself is found in the list.    Returns NULL if no such fragment could be found.  */ @@ -323,8 +322,7 @@ file_info_t *fragment_by_chksum(file_info_t *fi, uint32_t chksum,  /*    Walk through 'list' to find a file that contains the same sequence of blocks -  as 'file', comparing size and checksum. Processing stops if 'file' is found -  in the list. +  as 'file', comparing size and checksum.    Returns NULL if no such fragment could be found.   */ diff --git a/lib/fstree/deduplicate.c b/lib/fstree/deduplicate.c index 7390a2c..00815a6 100644 --- a/lib/fstree/deduplicate.c +++ b/lib/fstree/deduplicate.c @@ -16,10 +16,8 @@ file_info_t *fragment_by_chksum(file_info_t *fi, uint32_t chksum,  	file_info_t *it;  	for (it = list; it != NULL; it = it->next) { -		if (it == fi) { -			it = NULL; -			break; -		} +		if (it == fi) +			continue;  		if (!(it->flags & FILE_FLAG_HAS_FRAGMENT))  			continue; @@ -82,10 +80,8 @@ uint64_t find_equal_blocks(file_info_t *file, file_info_t *list,  	}  	for (it = list; it != NULL; it = it->next) { -		if (it == file) { -			it = NULL; -			break; -		} +		if (it == file) +			continue;  		if (it->flags & FILE_FLAG_BLOCKS_ARE_DUPLICATE)  			continue; diff --git a/lib/sqfs/data_writer.c b/lib/sqfs/data_writer.c index 0eeecc3..ea650bd 100644 --- a/lib/sqfs/data_writer.c +++ b/lib/sqfs/data_writer.c @@ -31,6 +31,7 @@ struct data_writer_t {  	int block_idx; +	file_info_t *list;  	sqfs_super_t *super;  	compressor_t *cmp;  	int outfd; @@ -132,10 +133,10 @@ int data_writer_flush_fragments(data_writer_t *data)  	return 0;  } -static int deduplicate_data(data_writer_t *data, file_info_t *fi, -			    file_info_t *list) +static int deduplicate_data(data_writer_t *data, file_info_t *fi)  { -	uint64_t ref = find_equal_blocks(fi, list, data->super->block_size); +	uint64_t ref = find_equal_blocks(fi, data->list, +					 data->super->block_size);  	if (ref > 0) {  		data->super->bytes_used = fi->startblock; @@ -159,7 +160,7 @@ fail_truncate:  }  static int flush_data_block(data_writer_t *data, size_t size, bool is_last, -			    file_info_t *fi, int flags, file_info_t *list) +			    file_info_t *fi, int flags)  {  	uint32_t out, chksum;  	file_info_t *ref; @@ -169,7 +170,7 @@ static int flush_data_block(data_writer_t *data, size_t size, bool is_last,  		fi->blocks[data->block_idx].chksum = 0;  		fi->sparse += size;  		data->block_idx++; -		return is_last ? deduplicate_data(data, fi, list) : 0; +		return is_last ? deduplicate_data(data, fi) : 0;  	}  	chksum = update_crc32(0, data->block, size); @@ -177,10 +178,10 @@ static int flush_data_block(data_writer_t *data, size_t size, bool is_last,  	if (size < data->super->block_size && !(flags & DW_DONT_FRAGMENT)) {  		fi->flags |= FILE_FLAG_HAS_FRAGMENT; -		if (deduplicate_data(data, fi, list)) +		if (deduplicate_data(data, fi))  			return -1; -		ref = fragment_by_chksum(fi, chksum, size, list, +		ref = fragment_by_chksum(chksum, size, data->list,  					 data->super->block_size);  		if (ref != NULL) { @@ -211,7 +212,7 @@ static int flush_data_block(data_writer_t *data, size_t size, bool is_last,  		fi->blocks[data->block_idx].size = out;  		data->block_idx++; -		if (is_last && deduplicate_data(data, fi, list) != 0) +		if (is_last && deduplicate_data(data, fi) != 0)  			return -1;  	} @@ -236,16 +237,18 @@ fail_seek:  	return -1;  } -static int end_file(data_writer_t *data, int flags) +static int end_file(data_writer_t *data, file_info_t *fi, int flags)  {  	if ((flags & DW_ALLIGN_DEVBLK) && allign_file(data) != 0)  		return -1; +	fi->next = data->list; +	data->list = fi;  	return 0;  }  int write_data_from_fd(data_writer_t *data, file_info_t *fi, -		       int infd, int flags, file_info_t *list) +		       int infd, int flags)  {  	uint64_t count;  	bool is_last; @@ -266,16 +269,15 @@ int write_data_from_fd(data_writer_t *data, file_info_t *fi,  		if (read_data(fi->input_file, infd, data->block, diff))  			return -1; -		if (flush_data_block(data, diff, is_last, fi, flags, list)) +		if (flush_data_block(data, diff, is_last, fi, flags))  			return -1;  	} -	return end_file(data, flags); +	return end_file(data, fi, flags);  }  int write_data_from_fd_condensed(data_writer_t *data, file_info_t *fi, -				 int infd, sparse_map_t *map, int flags, -				 file_info_t *list) +				 int infd, sparse_map_t *map, int flags)  {  	size_t start, count, diff;  	sparse_map_t *m; @@ -330,11 +332,11 @@ int write_data_from_fd_condensed(data_writer_t *data, file_info_t *fi,  			map = map->next;  		} -		if (flush_data_block(data, diff, is_last, fi, flags, list)) +		if (flush_data_block(data, diff, is_last, fi, flags))  			return -1;  	} -	return end_file(data, flags); +	return end_file(data, fi, flags);  fail_map_size:  	fprintf(stderr, "%s: sparse file map spans beyond file size\n",  		fi->input_file); diff --git a/mkfs/mkfs.c b/mkfs/mkfs.c index 8ee2268..3dc9efb 100644 --- a/mkfs/mkfs.c +++ b/mkfs/mkfs.c @@ -6,26 +6,6 @@   */  #include "mkfs.h" -static int process_file(data_writer_t *data, file_info_t *fi, bool quiet, -			file_info_t *list) -{ -	int ret, infd; - -	if (!quiet) -		printf("packing %s\n", fi->input_file); - -	infd = open(fi->input_file, O_RDONLY); -	if (infd < 0) { -		perror(fi->input_file); -		return -1; -	} - -	ret = write_data_from_fd(data, fi, infd, 0, list); - -	close(infd); -	return ret; -} -  static int set_working_dir(options_t *opt)  {  	const char *ptr; @@ -51,12 +31,27 @@ static int restore_working_dir(options_t *opt)  static int pack_files(data_writer_t *data, fstree_t *fs, options_t *opt)  {  	file_info_t *fi; +	int ret, infd;  	if (set_working_dir(opt))  		return -1; -	for (fi = fs->files; fi != NULL; fi = fi->next) { -		if (process_file(data, fi, opt->quiet, fs->files)) +	while (fs->files != NULL) { +		fi = fs->files; +		fs->files = fi->next; + +		if (!opt->quiet) +			printf("packing %s\n", fi->input_file); + +		infd = open(fi->input_file, O_RDONLY); +		if (infd < 0) { +			perror(fi->input_file); +			return -1; +		} + +		ret = write_data_from_fd(data, fi, infd, 0); +		close(infd); +		if (ret)  			return -1;  	} diff --git a/tar/tar2sqfs.c b/tar/tar2sqfs.c index f774c9d..36f8cd6 100644 --- a/tar/tar2sqfs.c +++ b/tar/tar2sqfs.c @@ -199,20 +199,20 @@ fail_arg:  }  static int write_file(tar_header_decoded_t *hdr, file_info_t *fi, -		      data_writer_t *data, file_info_t *list) +		      data_writer_t *data)  {  	int ret;  	if (hdr->sparse != NULL) {  		ret = write_data_from_fd_condensed(data, fi, STDIN_FILENO, -						   hdr->sparse, 0, list); +						   hdr->sparse, 0);  		if (ret)  			return -1;  		return skip_padding(STDIN_FILENO, hdr->record_size);  	} -	if (write_data_from_fd(data, fi, STDIN_FILENO, 0, list)) +	if (write_data_from_fd(data, fi, STDIN_FILENO, 0))  		return -1;  	return skip_padding(STDIN_FILENO, fi->size); @@ -265,11 +265,8 @@ static int create_node_and_repack_data(tar_header_decoded_t *hdr, fstree_t *fs,  	}  	if (S_ISREG(hdr->sb.st_mode)) { -		if (write_file(hdr, node->data.file, data, fs->files)) +		if (write_file(hdr, node->data.file, data))  			return -1; - -		node->data.file->next = fs->files; -		fs->files = node->data.file;  	}  	return 0;  | 
