aboutsummaryrefslogtreecommitdiff
path: root/include/sqfs/block_processor.h
blob: a5eed33a46a3e5ef51c9215c71454fa134363d2e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
/* SPDX-License-Identifier: LGPL-3.0-or-later */
/*
 * block_processor.h - This file is part of libsquashfs
 *
 * Copyright (C) 2019 David Oberhollenzer <goliath@infraroot.at>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */
#ifndef SFQS_BLOCK_PROCESSOR_H
#define SFQS_BLOCK_PROCESSOR_H

#include "sqfs/predef.h"

/**
 * @file block_processor.h
 *
 * @brief Contains declarations for the data block processor.
 */

/**
 * @struct sqfs_block_processor_t
 *
 * @brief Abstracts generating of file data and fragment blocks.
 *
 * @implements sqfs_object_t
 *
 * This data structure provides a simple begin/append/end interface
 * to generate file data blocks (see @ref sqfs_block_processor_begin_file,
 * @ref sqfs_block_processor_append and @ref sqfs_block_processor_end
 * respectively).
 *
 * Internally it takes care of partitioning data in the correct block sizes,
 * adding tail-ens to fragment blocks, compressing the data, deduplicating data
 * and finally writing it to disk.
 *
 * This object is not copyable, i.e. @ref sqfs_copy will always return NULL.
 */

/**
 * @struct sqfs_block_processor_stats_t
 *
 * @brief Used to store runtime statistics about
 *        the @ref sqfs_block_processor_t.
 */
struct sqfs_block_processor_stats_t {
	/**
	 * @brief Holds the size of the structure.
	 *
	 * If a later version of libsquashfs expands this structure, the value
	 * of this field can be used to check at runtime whether the newer
	 * fields are avaialable or not.
	 */
	size_t size;

	/**
	 * @brief Total number of bytes fed into the front end API.
	 */
	sqfs_u64 input_bytes_read;

	/**
	 * @brief Total number of bytes sent down to the block processor.
	 *
	 * This is the sum of generated, compressed blocks, including blocks
	 * that were possibly deduplicated by the block writer and not
	 * counting padding that the block writer may have added.
	 */
	sqfs_u64 output_bytes_generated;

	/**
	 * @brief Total number of data blocks produced.
	 */
	sqfs_u64 data_block_count;

	/**
	 * @brief Total number of fragment blocks produced.
	 */
	sqfs_u64 frag_block_count;

	/**
	 * @brief Total number of sparse blocks encountered.
	 */
	sqfs_u64 sparse_block_count;

	/**
	 * @brief Total number of tail-end fragments produced.
	 *
	 * This number also includes the fragments that have later been
	 * eliminated by deduplication.
	 */
	sqfs_u64 total_frag_count;

	/**
	 * @brief Total number of tail-end fragments actually stored in
	 *        fragment blocks.
	 *
	 * This number does not include the fragments that have been
	 * eliminated by deduplication.
	 */
	sqfs_u64 actual_frag_count;
};

/**
 * @struct sqfs_block_processor_desc_t
 *
 * @brief Encapsulates a description for an @ref sqfs_block_processor_t
 *
 * An instance of this struct is used by @ref sqfs_block_processor_create_ex to
 * instantiate block processor objects.
 */
struct sqfs_block_processor_desc_t {
	/**
	 * @brief Holds the size of the structure.
	 *
	 * If a later version of libsquashfs expands this structure, the value
	 * of this field can be used to check at runtime whether the newer
	 * fields are avaialable or not.
	 *
	 * If @ref sqfs_block_processor_create_ex is given a struct whose size
	 * it does not recognize, it returns @ref SQFS_ERROR_ARG_INVALID.
	 */
	sqfs_u32 size;

	/**
	 * @brief The maximum size of a data block.
	 */
	sqfs_u32 max_block_size;

	/**
	 * @brief The number of worker threads to create.
	 */
	sqfs_u32 num_workers;

	/**
	 * @brief The maximum number of blocks currently in flight.
	 *
	 * When trying to add more, enqueueing blocks until the
	 * in-flight block count drops below the threshold.
	 */
	sqfs_u32 max_backlog;

	/**
	 * @brief A pointer to a compressor.
	 *
	 * If multiple worker threads are used, the deep copy function of the
	 * compressor is used to create several instances that don't interfere
	 * with each other. This means, the compressor implementation must be
	 * able to create copies of itself that can be used independendly and
	 * concurrently.
	 */
	sqfs_compressor_t *cmp;

	/**
	 * @brief A block writer to send to finished blocks to.
	 */
	sqfs_block_writer_t *wr;

	/**
	 * @brief A fragment table to use for storing block locations.
	 */
	sqfs_frag_table_t *tbl;

	/**
	 * @brief Pointer to a file to read back fragment blocks from.
	 *
	 * If file and uncmp are not NULL, the file is used to read back
	 * fragment blocks during fragment deduplication and verify possible
	 * matches. If either of them are NULL, the deduplication relies on
	 * fragment size and hash alone.
	 */
	sqfs_file_t *file;

	/**
	 * @brief A pointer to a compressor the decompresses data.
	 *
	 * @copydoc file
	 */
	sqfs_compressor_t *uncmp;
};

#ifdef __cplusplus
extern "C" {
#endif

/**
 * @brief Create a data block processor.
 *
 * @memberof sqfs_block_processor_t
 *
 * @param max_block_size The maximum size of a data block. Required for the
 *                       internal scratch buffer used for compressing data.
 * @param cmp A pointer to a compressor. If multiple worker threads are used,
 *            the deep copy function of the compressor is used to create
 *            several instances that don't interfere with each other.
 * @param num_workers The number of worker threads to create.
 * @param max_backlog The maximum number of blocks currently in flight. When
 *                    trying to add more, enqueueing blocks until the in-flight
 *                    block count drops below the threshold.
 * @param wr A block writer to send to finished blocks to.
 * @param tbl A fragment table to use for storing fragment and fragment block
 *            locations.
 *
 * @return A pointer to a block processor object on success, NULL on allocation
 *         failure or on failure to create and initialize the worker threads.
 */
SQFS_API
sqfs_block_processor_t *sqfs_block_processor_create(size_t max_block_size,
						    sqfs_compressor_t *cmp,
						    unsigned int num_workers,
						    size_t max_backlog,
						    sqfs_block_writer_t *wr,
						    sqfs_frag_table_t *tbl);

/**
 * @brief Create a data block processor.
 *
 * @memberof sqfs_block_processor_t
 *
 * @param desc A pointer to an extensible structure that holds the description
 *             of the block processor.
 * @param out On success, returns the pointer to the newly created block
 *            processor object.
 *
 * @return Zero on success, an @ref SQFS_ERROR value on failure.
 */
SQFS_API
int sqfs_block_processor_create_ex(const sqfs_block_processor_desc_t *desc,
				   sqfs_block_processor_t **out);

/**
 * @brief Start writing a file.
 *
 * @memberof sqfs_block_processor_t
 *
 * After calling this function, call @ref sqfs_block_processor_append
 * repeatedly to add data to the file. Finally
 * call @ref sqfs_block_processor_end_file when you
 * are done. After writing all files, use @ref sqfs_block_processor_finish to
 * wait until all blocks that are still in flight are done and written to disk.
 *
 * The specified pointer to an inode pointer is kept internally and updated with
 * the compressed block sizes and final destinations of the file and possible
 * fragment. Since the inode may have to be grown to larger size, the actual
 * inode pointer can change over time. Furthermore, since there can still be
 * blocks in-flight even after calling @ref sqfs_block_processor_end_file, the
 * data in the inode and the value of the pointer may still change. The only
 * point at which the data writer is guaranteed to not touch them anymore is
 * after @ref sqfs_block_processor_sync or @ref sqfs_block_processor_finish has
 * returned.
 *
 * @param proc A pointer to a data writer object.
 * @param inode An optional pointer to a pointer to an inode. If not NULL, the
 *              block processor creates a file inode and stores a pointer to
 *              it here and keeps updating the inode as the file grows.
 * @param user An optional user data pointer that is passed to the
 *             the @ref sqfs_block_writer_t for each file data block.
 * @param flags A combination of @ref SQFS_BLK_FLAGS that can be used to
 *              micro manage how the data is processed.
 *
 * @return Zero on success, an @ref SQFS_ERROR value on failure.
 */
SQFS_API int sqfs_block_processor_begin_file(sqfs_block_processor_t *proc,
					     sqfs_inode_generic_t **inode,
					     void *user, sqfs_u32 flags);

/**
 * @brief Append data to the current file.
 *
 * @memberof sqfs_block_processor_t
 *
 * Call this after @ref sqfs_block_processor_begin_file to add data to a file.
 *
 * @param proc A pointer to a data writer object.
 * @param data A pointer to a buffer to read data from.
 * @param size How many bytes should be copied out of the given
 *             buffer and written to disk.
 *
 * @return Zero on success, an @ref SQFS_ERROR value on failure.
 */
SQFS_API int sqfs_block_processor_append(sqfs_block_processor_t *proc,
					 const void *data, size_t size);

/**
 * @brief Stop writing the current file and flush everything that is
 *        buffered internally.
 *
 * @memberof sqfs_block_processor_t
 *
 * The counter part to @ref sqfs_block_processor_begin_file.
 *
 * Even after calling this, there might still be data blocks in-flight.
 * Use @ref sqfs_block_processor_finish when you are done writing files to force
 * the remaining blocks to be processed and written to disk.
 *
 * @param proc A pointer to a data writer object.
 *
 * @return Zero on success, an @ref SQFS_ERROR value on failure.
 */
SQFS_API int sqfs_block_processor_end_file(sqfs_block_processor_t *proc);

/**
 * @brief Submit a raw block for processing.
 *
 * @memberof sqfs_block_processor_t
 *
 * This function provides an alternative to the simple file front end to submit
 * raw data blocks to the processor. It will throw an error if called in between
 * @ref sqfs_block_processor_begin_file and @ref sqfs_block_processor_end_file.
 *
 * The flags aren't sanity checked (besides being a subset
 * of @ref SQFS_BLK_FLAGS_ALL), so in contrast to the simple file API, you can
 * shoot yourself in the foot as hard as you want.
 *
 * If not specified otherwise through flags, the fragment block/deduplication
 * and fragment consolidation are still in effect and sparse blocks are
 * discarded.
 *
 * @param proc A pointer to a block processor object.
 * @param user An optional user data pointer stored with the block and passed on
 *             to the underlying @ref sqfs_block_writer_t.
 * @param flags A combination of @ref SQFS_BLK_FLAGS that can be used to
 *              micro manage how the data is processed.
 * @param data The data to write to the block.
 *
 * @return Zero on success, an @ref SQFS_ERROR value on failure.
 */
SQFS_API int sqfs_block_processor_submit_block(sqfs_block_processor_t *proc,
					       void *user, sqfs_u32 flags,
					       const void *data, size_t size);

/**
 * @brief Wait for the in-flight data blocks to finish.
 *
 * @memberof sqfs_block_processor_t
 *
 * @param proc A pointer to a block processor object.
 *
 * @return Zero on success, an @ref SQFS_ERROR value on failure. The failure
 *         return value can either be an error encountered during enqueueing,
 *         processing or writing to disk.
 */
SQFS_API int sqfs_block_processor_sync(sqfs_block_processor_t *proc);

/**
 * @brief Wait for the in-flight data blocks to finish and finally flush the
 *        current fragment block.
 *
 * @memberof sqfs_block_processor_t
 *
 * This does essentially the same as @ref sqfs_block_processor_sync, but after
 * syncing, it also flushes the current fragment block, even if it isn't full
 * yet and waits for it to be completed as well.
 *
 * @param proc A pointer to a block processor object.
 *
 * @return Zero on success, an @ref SQFS_ERROR value on failure. The failure
 *         return value can either be an error encountered during enqueueing,
 *         processing or writing to disk.
 */
SQFS_API int sqfs_block_processor_finish(sqfs_block_processor_t *proc);

/**
 * @brief Get accumulated runtime statistics from a block processor
 *
 * @memberof sqfs_block_processor_t
 *
 * @param proc A pointer to a data writer object.
 *
 * @return A pointer to a @ref sqfs_block_processor_stats_t structure.
 */
SQFS_API const sqfs_block_processor_stats_t
*sqfs_block_processor_get_stats(const sqfs_block_processor_t *proc);

#ifdef __cplusplus
}
#endif

#endif /* SFQS_BLOCK_PROCESSOR_H */