1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
|
// SPDX-License-Identifier: GPL-2.0-only
/*
* This file is part of UBIFS.
*
* Copyright (C) 2006-2008 Nokia Corporation.
*
* Authors: Artem Bityutskiy (Битюцкий Артём)
* Adrian Hunter
*/
/*
* This file implements UBIFS journal.
*
* The journal consists of 2 parts - the log and bud LEBs. The log has fixed
* length and position, while a bud logical eraseblock is any LEB in the main
* area. Buds contain file system data - data nodes, inode nodes, etc. The log
* contains only references to buds and some other stuff like commit
* start node. The idea is that when we commit the journal, we do
* not copy the data, the buds just become indexed. Since after the commit the
* nodes in bud eraseblocks become leaf nodes of the file system index tree, we
* use term "bud". Analogy is obvious, bud eraseblocks contain nodes which will
* become leafs in the future.
*
* The journal is multi-headed because we want to write data to the journal as
* optimally as possible. It is nice to have nodes belonging to the same inode
* in one LEB, so we may write data owned by different inodes to different
* journal heads, although at present only one data head is used.
*
* For recovery reasons, the base head contains all inode nodes, all directory
* entry nodes and all truncate nodes. This means that the other heads contain
* only data nodes.
*
* Bud LEBs may be half-indexed. For example, if the bud was not full at the
* time of commit, the bud is retained to continue to be used in the journal,
* even though the "front" of the LEB is now indexed. In that case, the log
* reference contains the offset where the bud starts for the purposes of the
* journal.
*
* The journal size has to be limited, because the larger is the journal, the
* longer it takes to mount UBIFS (scanning the journal) and the more memory it
* takes (indexing in the TNC).
*
* All the journal write operations like 'ubifs_jnl_update()' here, which write
* multiple UBIFS nodes to the journal at one go, are atomic with respect to
* unclean reboots. Should the unclean reboot happen, the recovery code drops
* all the nodes.
*/
#include "bitops.h"
#include "ubifs.h"
#include "defs.h"
#include "debug.h"
#include "misc.h"
/**
* zero_ino_node_unused - zero out unused fields of an on-flash inode node.
* @ino: the inode to zero out
*/
static inline void zero_ino_node_unused(struct ubifs_ino_node *ino)
{
memset(ino->padding1, 0, 4);
memset(ino->padding2, 0, 26);
}
/**
* zero_dent_node_unused - zero out unused fields of an on-flash directory
* entry node.
* @dent: the directory entry to zero out
*/
static inline void zero_dent_node_unused(struct ubifs_dent_node *dent)
{
dent->padding1 = 0;
}
static void ubifs_add_auth_dirt(struct ubifs_info *c, int lnum)
{
if (ubifs_authenticated(c))
ubifs_add_dirt(c, lnum, ubifs_auth_node_sz(c));
}
/**
* reserve_space - reserve space in the journal.
* @c: UBIFS file-system description object
* @jhead: journal head number
* @len: node length
*
* This function reserves space in journal head @head. If the reservation
* succeeded, the journal head stays locked and later has to be unlocked using
* 'release_head()'. Returns zero in case of success, %-EAGAIN if commit has to
* be done, and other negative error codes in case of other failures.
*/
static int reserve_space(struct ubifs_info *c, int jhead, int len)
{
int err = 0, err1, retries = 0, avail, lnum, offs, squeeze;
struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf;
/*
* Typically, the base head has smaller nodes written to it, so it is
* better to try to allocate space at the ends of eraseblocks. This is
* what the squeeze parameter does.
*/
ubifs_assert(c, !c->ro_media && !c->ro_mount);
squeeze = (jhead == BASEHD);
again:
mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
if (c->ro_error) {
err = -EROFS;
goto out_unlock;
}
avail = c->leb_size - wbuf->offs - wbuf->used;
if (wbuf->lnum != -1 && avail >= len)
return 0;
/*
* Write buffer wasn't seek'ed or there is no enough space - look for an
* LEB with some empty space.
*/
lnum = ubifs_find_free_space(c, len, &offs, squeeze);
if (lnum >= 0)
goto out;
err = lnum;
if (err != -ENOSPC)
goto out_unlock;
/*
* No free space, we have to run garbage collector to make
* some. But the write-buffer mutex has to be unlocked because
* GC also takes it.
*/
dbg_jnl("no free space in jhead %s, run GC", dbg_jhead(jhead));
mutex_unlock(&wbuf->io_mutex);
lnum = ubifs_garbage_collect(c, 0);
if (lnum < 0) {
err = lnum;
if (err != -ENOSPC)
return err;
/*
* GC could not make a free LEB. But someone else may
* have allocated new bud for this journal head,
* because we dropped @wbuf->io_mutex, so try once
* again.
*/
dbg_jnl("GC couldn't make a free LEB for jhead %s",
dbg_jhead(jhead));
if (retries++ < 2) {
dbg_jnl("retry (%d)", retries);
goto again;
}
dbg_jnl("return -ENOSPC");
return err;
}
mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
dbg_jnl("got LEB %d for jhead %s", lnum, dbg_jhead(jhead));
avail = c->leb_size - wbuf->offs - wbuf->used;
if (wbuf->lnum != -1 && avail >= len) {
/*
* Someone else has switched the journal head and we have
* enough space now. This happens when more than one process is
* trying to write to the same journal head at the same time.
*/
dbg_jnl("return LEB %d back, already have LEB %d:%d",
lnum, wbuf->lnum, wbuf->offs + wbuf->used);
err = ubifs_return_leb(c, lnum);
if (err)
goto out_unlock;
return 0;
}
offs = 0;
out:
/*
* Make sure we synchronize the write-buffer before we add the new bud
* to the log. Otherwise we may have a power cut after the log
* reference node for the last bud (@lnum) is written but before the
* write-buffer data are written to the next-to-last bud
* (@wbuf->lnum). And the effect would be that the recovery would see
* that there is corruption in the next-to-last bud.
*/
err = ubifs_wbuf_sync_nolock(wbuf);
if (err)
goto out_return;
err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
if (err)
goto out_return;
err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs);
if (err)
goto out_unlock;
return 0;
out_unlock:
mutex_unlock(&wbuf->io_mutex);
return err;
out_return:
/* An error occurred and the LEB has to be returned to lprops */
ubifs_assert(c, err < 0);
err1 = ubifs_return_leb(c, lnum);
if (err1 && err == -EAGAIN)
/*
* Return original error code only if it is not %-EAGAIN,
* which is not really an error. Otherwise, return the error
* code of 'ubifs_return_leb()'.
*/
err = err1;
mutex_unlock(&wbuf->io_mutex);
return err;
}
static int ubifs_hash_nodes(struct ubifs_info *c, void *node,
int len, struct shash_desc *hash)
{
int auth_node_size = ubifs_auth_node_sz(c);
int err;
while (1) {
const struct ubifs_ch *ch = node;
int nodelen = le32_to_cpu(ch->len);
ubifs_assert(c, len >= auth_node_size);
if (len == auth_node_size)
break;
ubifs_assert(c, len > nodelen);
ubifs_assert(c, ch->magic == cpu_to_le32(UBIFS_NODE_MAGIC));
err = ubifs_shash_update(c, hash, (void *)node, nodelen);
if (err)
return err;
node += ALIGN(nodelen, 8);
len -= ALIGN(nodelen, 8);
}
return ubifs_prepare_auth_node(c, node, hash);
}
/**
* write_head - write data to a journal head.
* @c: UBIFS file-system description object
* @jhead: journal head
* @buf: buffer to write
* @len: length to write
* @lnum: LEB number written is returned here
* @offs: offset written is returned here
* @sync: non-zero if the write-buffer has to by synchronized
*
* This function writes data to the reserved space of journal head @jhead.
* Returns zero in case of success and a negative error code in case of
* failure.
*/
static int write_head(struct ubifs_info *c, int jhead, void *buf, int len,
int *lnum, int *offs, int sync)
{
int err;
struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf;
ubifs_assert(c, jhead != GCHD);
*lnum = c->jheads[jhead].wbuf.lnum;
*offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used;
dbg_jnl("jhead %s, LEB %d:%d, len %d",
dbg_jhead(jhead), *lnum, *offs, len);
if (ubifs_authenticated(c)) {
err = ubifs_hash_nodes(c, buf, len, c->jheads[jhead].log_hash);
if (err)
return err;
}
err = ubifs_wbuf_write_nolock(wbuf, buf, len);
if (err)
return err;
if (sync)
err = ubifs_wbuf_sync_nolock(wbuf);
return err;
}
/**
* make_reservation - reserve journal space.
* @c: UBIFS file-system description object
* @jhead: journal head
* @len: how many bytes to reserve
*
* This function makes space reservation in journal head @jhead. The function
* takes the commit lock and locks the journal head, and the caller has to
* unlock the head and finish the reservation with 'finish_reservation()'.
* Returns zero in case of success and a negative error code in case of
* failure.
*
* Note, the journal head may be unlocked as soon as the data is written, while
* the commit lock has to be released after the data has been added to the
* TNC.
*/
static int make_reservation(struct ubifs_info *c, int jhead, int len)
{
int err, cmt_retries = 0, nospc_retries = 0;
again:
down_read(&c->commit_sem);
err = reserve_space(c, jhead, len);
if (!err)
/* c->commit_sem will get released via finish_reservation(). */
return 0;
up_read(&c->commit_sem);
if (err == -ENOSPC) {
/*
* GC could not make any progress. We should try to commit
* once because it could make some dirty space and GC would
* make progress, so make the error -EAGAIN so that the below
* will commit and re-try.
*/
if (nospc_retries++ < 2) {
dbg_jnl("no space, retry");
err = -EAGAIN;
}
/*
* This means that the budgeting is incorrect. We always have
* to be able to write to the media, because all operations are
* budgeted. Deletions are not budgeted, though, but we reserve
* an extra LEB for them.
*/
}
if (err != -EAGAIN)
goto out;
/*
* -EAGAIN means that the journal is full or too large, or the above
* code wants to do one commit. Do this and re-try.
*/
if (cmt_retries > 128) {
/*
* This should not happen unless the journal size limitations
* are too tough.
*/
ubifs_err(c, "stuck in space allocation");
err = -ENOSPC;
goto out;
} else if (cmt_retries > 32)
ubifs_warn(c, "too many space allocation re-tries (%d)",
cmt_retries);
dbg_jnl("-EAGAIN, commit and retry (retried %d times)",
cmt_retries);
cmt_retries += 1;
err = ubifs_run_commit(c);
if (err)
return err;
goto again;
out:
ubifs_err(c, "cannot reserve %d bytes in jhead %d, error %d",
len, jhead, err);
if (err == -ENOSPC) {
/* This are some budgeting problems, print useful information */
down_write(&c->commit_sem);
dump_stack();
ubifs_dump_budg(c, &c->bi);
ubifs_dump_lprops(c);
cmt_retries = dbg_check_lprops(c);
up_write(&c->commit_sem);
}
return err;
}
/**
* release_head - release a journal head.
* @c: UBIFS file-system description object
* @jhead: journal head
*
* This function releases journal head @jhead which was locked by
* the 'make_reservation()' function. It has to be called after each successful
* 'make_reservation()' invocation.
*/
static inline void release_head(struct ubifs_info *c, int jhead)
{
mutex_unlock(&c->jheads[jhead].wbuf.io_mutex);
}
/**
* finish_reservation - finish a reservation.
* @c: UBIFS file-system description object
*
* This function finishes journal space reservation. It must be called after
* 'make_reservation()'.
*/
static void finish_reservation(struct ubifs_info *c)
{
up_read(&c->commit_sem);
}
/**
* get_dent_type - translate VFS inode mode to UBIFS directory entry type.
* @mode: inode mode
*/
static int get_dent_type(int mode)
{
switch (mode & S_IFMT) {
case S_IFREG:
return UBIFS_ITYPE_REG;
case S_IFDIR:
return UBIFS_ITYPE_DIR;
case S_IFLNK:
return UBIFS_ITYPE_LNK;
case S_IFBLK:
return UBIFS_ITYPE_BLK;
case S_IFCHR:
return UBIFS_ITYPE_CHR;
case S_IFIFO:
return UBIFS_ITYPE_FIFO;
case S_IFSOCK:
return UBIFS_ITYPE_SOCK;
default:
BUG();
}
return 0;
}
static void set_dent_cookie(struct ubifs_info *c, struct ubifs_dent_node *dent)
{
if (c->double_hash)
dent->cookie = (__force __le32) get_random_u32();
else
dent->cookie = 0;
}
|