summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2019-08-16 19:52:42 +0200
committerDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2019-08-18 22:44:39 +0200
commitb45850388ea5505f4fd50cd659abe2f02aeca0b5 (patch)
tree994f878caf990d1569831762f0b0f4d787ab9839
parent1a95478b8d340c8b6b9dbff4f38f9293388fd1a3 (diff)
cleanup: internalize deduplication list in data_writer
This change removes the need for passing a list of files around for deduplication. Also the deduplication code no longer needs to worry about order, since the file being deduplicated is only added after deduplication is done. Signed-off-by: David Oberhollenzer <david.oberhollenzer@sigma-star.at>
-rw-r--r--include/data_writer.h19
-rw-r--r--include/fstree.h4
-rw-r--r--lib/fstree/deduplicate.c12
-rw-r--r--lib/sqfs/data_writer.c34
-rw-r--r--mkfs/mkfs.c39
-rw-r--r--tar/tar2sqfs.c11
6 files changed, 50 insertions, 69 deletions
diff --git a/include/data_writer.h b/include/data_writer.h
index dc63592..f384ce7 100644
--- a/include/data_writer.h
+++ b/include/data_writer.h
@@ -69,32 +69,25 @@ int data_writer_flush_fragments(data_writer_t *data);
Blocks or fragments that are all zero bytes automatically detected,
not written out and the sparse file accounting updated accordingly.
- The flags argument is a combination of DW_* flags.
-
- If 'list' is not NULL, it is used for fragment and data block deduplication.
- It is assumed that the list is processed in order and scanning stops as soon
- as the current file info 'fi' is encountered in the list.
+ The flags argument is a combination of DW_* flags. After completion the
+ data writer collects the 'fi' in an internal list it uses for deduplication.
Returns 0 on success, prints errors to stderr.
*/
int write_data_from_fd(data_writer_t *data, file_info_t *fi, int infd,
- int flags, file_info_t *list);
+ int flags);
/*
Does the same as write_data_from_fd but the input file is the condensed
representation of a sparse file. The layout must be in order and
non-overlapping.
- The flags argument is a combination of DW_* flags.
-
- If 'list' is not NULL, it is used for fragment and data block deduplication.
- It is assumed that the list is processed in order and scanning stops as soon
- as the current file info 'fi' is encountered in the list.
+ The flags argument is a combination of DW_* flags. After completion the
+ data writer collects the 'fi' in an internal list it uses for deduplication.
Returns 0 on success, prints errors to stderr.
*/
int write_data_from_fd_condensed(data_writer_t *data, file_info_t *fi,
- int infd, sparse_map_t *map, int flags,
- file_info_t *list);
+ int infd, sparse_map_t *map, int flags);
#endif /* DATA_WRITER_H */
diff --git a/include/fstree.h b/include/fstree.h
index cbd3c9c..a5d930e 100644
--- a/include/fstree.h
+++ b/include/fstree.h
@@ -313,7 +313,6 @@ tree_node_t *fstree_node_from_path(fstree_t *fs, const char *path);
/*
Walk through 'list' to find a file with a fragment that has
the same size ('frag_size') and checksum ('chksum') as 'fi'.
- Processing stopps if 'fi' itself is found in the list.
Returns NULL if no such fragment could be found.
*/
@@ -323,8 +322,7 @@ file_info_t *fragment_by_chksum(file_info_t *fi, uint32_t chksum,
/*
Walk through 'list' to find a file that contains the same sequence of blocks
- as 'file', comparing size and checksum. Processing stops if 'file' is found
- in the list.
+ as 'file', comparing size and checksum.
Returns NULL if no such fragment could be found.
*/
diff --git a/lib/fstree/deduplicate.c b/lib/fstree/deduplicate.c
index 7390a2c..00815a6 100644
--- a/lib/fstree/deduplicate.c
+++ b/lib/fstree/deduplicate.c
@@ -16,10 +16,8 @@ file_info_t *fragment_by_chksum(file_info_t *fi, uint32_t chksum,
file_info_t *it;
for (it = list; it != NULL; it = it->next) {
- if (it == fi) {
- it = NULL;
- break;
- }
+ if (it == fi)
+ continue;
if (!(it->flags & FILE_FLAG_HAS_FRAGMENT))
continue;
@@ -82,10 +80,8 @@ uint64_t find_equal_blocks(file_info_t *file, file_info_t *list,
}
for (it = list; it != NULL; it = it->next) {
- if (it == file) {
- it = NULL;
- break;
- }
+ if (it == file)
+ continue;
if (it->flags & FILE_FLAG_BLOCKS_ARE_DUPLICATE)
continue;
diff --git a/lib/sqfs/data_writer.c b/lib/sqfs/data_writer.c
index 0eeecc3..ea650bd 100644
--- a/lib/sqfs/data_writer.c
+++ b/lib/sqfs/data_writer.c
@@ -31,6 +31,7 @@ struct data_writer_t {
int block_idx;
+ file_info_t *list;
sqfs_super_t *super;
compressor_t *cmp;
int outfd;
@@ -132,10 +133,10 @@ int data_writer_flush_fragments(data_writer_t *data)
return 0;
}
-static int deduplicate_data(data_writer_t *data, file_info_t *fi,
- file_info_t *list)
+static int deduplicate_data(data_writer_t *data, file_info_t *fi)
{
- uint64_t ref = find_equal_blocks(fi, list, data->super->block_size);
+ uint64_t ref = find_equal_blocks(fi, data->list,
+ data->super->block_size);
if (ref > 0) {
data->super->bytes_used = fi->startblock;
@@ -159,7 +160,7 @@ fail_truncate:
}
static int flush_data_block(data_writer_t *data, size_t size, bool is_last,
- file_info_t *fi, int flags, file_info_t *list)
+ file_info_t *fi, int flags)
{
uint32_t out, chksum;
file_info_t *ref;
@@ -169,7 +170,7 @@ static int flush_data_block(data_writer_t *data, size_t size, bool is_last,
fi->blocks[data->block_idx].chksum = 0;
fi->sparse += size;
data->block_idx++;
- return is_last ? deduplicate_data(data, fi, list) : 0;
+ return is_last ? deduplicate_data(data, fi) : 0;
}
chksum = update_crc32(0, data->block, size);
@@ -177,10 +178,10 @@ static int flush_data_block(data_writer_t *data, size_t size, bool is_last,
if (size < data->super->block_size && !(flags & DW_DONT_FRAGMENT)) {
fi->flags |= FILE_FLAG_HAS_FRAGMENT;
- if (deduplicate_data(data, fi, list))
+ if (deduplicate_data(data, fi))
return -1;
- ref = fragment_by_chksum(fi, chksum, size, list,
+ ref = fragment_by_chksum(chksum, size, data->list,
data->super->block_size);
if (ref != NULL) {
@@ -211,7 +212,7 @@ static int flush_data_block(data_writer_t *data, size_t size, bool is_last,
fi->blocks[data->block_idx].size = out;
data->block_idx++;
- if (is_last && deduplicate_data(data, fi, list) != 0)
+ if (is_last && deduplicate_data(data, fi) != 0)
return -1;
}
@@ -236,16 +237,18 @@ fail_seek:
return -1;
}
-static int end_file(data_writer_t *data, int flags)
+static int end_file(data_writer_t *data, file_info_t *fi, int flags)
{
if ((flags & DW_ALLIGN_DEVBLK) && allign_file(data) != 0)
return -1;
+ fi->next = data->list;
+ data->list = fi;
return 0;
}
int write_data_from_fd(data_writer_t *data, file_info_t *fi,
- int infd, int flags, file_info_t *list)
+ int infd, int flags)
{
uint64_t count;
bool is_last;
@@ -266,16 +269,15 @@ int write_data_from_fd(data_writer_t *data, file_info_t *fi,
if (read_data(fi->input_file, infd, data->block, diff))
return -1;
- if (flush_data_block(data, diff, is_last, fi, flags, list))
+ if (flush_data_block(data, diff, is_last, fi, flags))
return -1;
}
- return end_file(data, flags);
+ return end_file(data, fi, flags);
}
int write_data_from_fd_condensed(data_writer_t *data, file_info_t *fi,
- int infd, sparse_map_t *map, int flags,
- file_info_t *list)
+ int infd, sparse_map_t *map, int flags)
{
size_t start, count, diff;
sparse_map_t *m;
@@ -330,11 +332,11 @@ int write_data_from_fd_condensed(data_writer_t *data, file_info_t *fi,
map = map->next;
}
- if (flush_data_block(data, diff, is_last, fi, flags, list))
+ if (flush_data_block(data, diff, is_last, fi, flags))
return -1;
}
- return end_file(data, flags);
+ return end_file(data, fi, flags);
fail_map_size:
fprintf(stderr, "%s: sparse file map spans beyond file size\n",
fi->input_file);
diff --git a/mkfs/mkfs.c b/mkfs/mkfs.c
index 8ee2268..3dc9efb 100644
--- a/mkfs/mkfs.c
+++ b/mkfs/mkfs.c
@@ -6,26 +6,6 @@
*/
#include "mkfs.h"
-static int process_file(data_writer_t *data, file_info_t *fi, bool quiet,
- file_info_t *list)
-{
- int ret, infd;
-
- if (!quiet)
- printf("packing %s\n", fi->input_file);
-
- infd = open(fi->input_file, O_RDONLY);
- if (infd < 0) {
- perror(fi->input_file);
- return -1;
- }
-
- ret = write_data_from_fd(data, fi, infd, 0, list);
-
- close(infd);
- return ret;
-}
-
static int set_working_dir(options_t *opt)
{
const char *ptr;
@@ -51,12 +31,27 @@ static int restore_working_dir(options_t *opt)
static int pack_files(data_writer_t *data, fstree_t *fs, options_t *opt)
{
file_info_t *fi;
+ int ret, infd;
if (set_working_dir(opt))
return -1;
- for (fi = fs->files; fi != NULL; fi = fi->next) {
- if (process_file(data, fi, opt->quiet, fs->files))
+ while (fs->files != NULL) {
+ fi = fs->files;
+ fs->files = fi->next;
+
+ if (!opt->quiet)
+ printf("packing %s\n", fi->input_file);
+
+ infd = open(fi->input_file, O_RDONLY);
+ if (infd < 0) {
+ perror(fi->input_file);
+ return -1;
+ }
+
+ ret = write_data_from_fd(data, fi, infd, 0);
+ close(infd);
+ if (ret)
return -1;
}
diff --git a/tar/tar2sqfs.c b/tar/tar2sqfs.c
index f774c9d..36f8cd6 100644
--- a/tar/tar2sqfs.c
+++ b/tar/tar2sqfs.c
@@ -199,20 +199,20 @@ fail_arg:
}
static int write_file(tar_header_decoded_t *hdr, file_info_t *fi,
- data_writer_t *data, file_info_t *list)
+ data_writer_t *data)
{
int ret;
if (hdr->sparse != NULL) {
ret = write_data_from_fd_condensed(data, fi, STDIN_FILENO,
- hdr->sparse, 0, list);
+ hdr->sparse, 0);
if (ret)
return -1;
return skip_padding(STDIN_FILENO, hdr->record_size);
}
- if (write_data_from_fd(data, fi, STDIN_FILENO, 0, list))
+ if (write_data_from_fd(data, fi, STDIN_FILENO, 0))
return -1;
return skip_padding(STDIN_FILENO, fi->size);
@@ -265,11 +265,8 @@ static int create_node_and_repack_data(tar_header_decoded_t *hdr, fstree_t *fs,
}
if (S_ISREG(hdr->sb.st_mode)) {
- if (write_file(hdr, node->data.file, data, fs->files))
+ if (write_file(hdr, node->data.file, data))
return -1;
-
- node->data.file->next = fs->files;
- fs->files = node->data.file;
}
return 0;