1/* 2 * fs/logfs/journal.c - journal handling code 3 * 4 * As should be obvious for Linux kernel code, license is GPLv2 5 * 6 * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org> 7 */ 8#include "logfs.h" 9#include <linux/slab.h> 10 11static void logfs_calc_free(struct super_block *sb) 12{ 13 struct logfs_super *super = logfs_super(sb); 14 u64 reserve, no_segs = super->s_no_segs; 15 s64 free; 16 int i; 17 18 /* superblock segments */ 19 no_segs -= 2; 20 super->s_no_journal_segs = 0; 21 /* journal */ 22 journal_for_each(i) 23 if (super->s_journal_seg[i]) { 24 no_segs--; 25 super->s_no_journal_segs++; 26 } 27 28 /* open segments plus one extra per level for GC */ 29 no_segs -= 2 * super->s_total_levels; 30 31 free = no_segs * (super->s_segsize - LOGFS_SEGMENT_RESERVE); 32 free -= super->s_used_bytes; 33 /* just a bit extra */ 34 free -= super->s_total_levels * 4096; 35 36 /* Bad blocks are 'paid' for with speed reserve - the filesystem 37 * simply gets slower as bad blocks accumulate. Until the bad blocks 38 * exceed the speed reserve - then the filesystem gets smaller. 39 */ 40 reserve = super->s_bad_segments + super->s_bad_seg_reserve; 41 reserve *= super->s_segsize - LOGFS_SEGMENT_RESERVE; 42 reserve = max(reserve, super->s_speed_reserve); 43 free -= reserve; 44 if (free < 0) 45 free = 0; 46 47 super->s_free_bytes = free; 48} 49 50static void reserve_sb_and_journal(struct super_block *sb) 51{ 52 struct logfs_super *super = logfs_super(sb); 53 struct btree_head32 *head = &super->s_reserved_segments; 54 int i, err; 55 56 err = btree_insert32(head, seg_no(sb, super->s_sb_ofs[0]), (void *)1, 57 GFP_KERNEL); 58 BUG_ON(err); 59 60 err = btree_insert32(head, seg_no(sb, super->s_sb_ofs[1]), (void *)1, 61 GFP_KERNEL); 62 BUG_ON(err); 63 64 journal_for_each(i) { 65 if (!super->s_journal_seg[i]) 66 continue; 67 err = btree_insert32(head, super->s_journal_seg[i], (void *)1, 68 GFP_KERNEL); 69 BUG_ON(err); 70 } 71} 72 73static void read_dynsb(struct super_block *sb, 74 struct logfs_je_dynsb *dynsb) 75{ 76 struct logfs_super *super = logfs_super(sb); 77 78 super->s_gec = be64_to_cpu(dynsb->ds_gec); 79 super->s_sweeper = be64_to_cpu(dynsb->ds_sweeper); 80 super->s_victim_ino = be64_to_cpu(dynsb->ds_victim_ino); 81 super->s_rename_dir = be64_to_cpu(dynsb->ds_rename_dir); 82 super->s_rename_pos = be64_to_cpu(dynsb->ds_rename_pos); 83 super->s_used_bytes = be64_to_cpu(dynsb->ds_used_bytes); 84 super->s_generation = be32_to_cpu(dynsb->ds_generation); 85} 86 87static void read_anchor(struct super_block *sb, 88 struct logfs_je_anchor *da) 89{ 90 struct logfs_super *super = logfs_super(sb); 91 struct inode *inode = super->s_master_inode; 92 struct logfs_inode *li = logfs_inode(inode); 93 int i; 94 95 super->s_last_ino = be64_to_cpu(da->da_last_ino); 96 li->li_flags = 0; 97 li->li_height = da->da_height; 98 i_size_write(inode, be64_to_cpu(da->da_size)); 99 li->li_used_bytes = be64_to_cpu(da->da_used_bytes); 100 101 for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++) 102 li->li_data[i] = be64_to_cpu(da->da_data[i]); 103} 104 105static void read_erasecount(struct super_block *sb, 106 struct logfs_je_journal_ec *ec) 107{ 108 struct logfs_super *super = logfs_super(sb); 109 int i; 110 111 journal_for_each(i) 112 super->s_journal_ec[i] = be32_to_cpu(ec->ec[i]); 113} 114 115static int read_area(struct super_block *sb, struct logfs_je_area *a) 116{ 117 struct logfs_super *super = logfs_super(sb); 118 struct logfs_area *area = super->s_area[a->gc_level]; 119 u64 ofs; 120 u32 writemask = ~(super->s_writesize - 1); 121 122 if (a->gc_level >= LOGFS_NO_AREAS) 123 return -EIO; 124 if (a->vim != VIM_DEFAULT) 125 return -EIO; /* TODO: close area and continue */ 126 127 area->a_used_bytes = be32_to_cpu(a->used_bytes); 128 area->a_written_bytes = area->a_used_bytes & writemask; 129 area->a_segno = be32_to_cpu(a->segno); 130 if (area->a_segno) 131 area->a_is_open = 1; 132 133 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes); 134 if (super->s_writesize > 1) 135 return logfs_buf_recover(area, ofs, a + 1, super->s_writesize); 136 else 137 return logfs_buf_recover(area, ofs, NULL, 0); 138} 139 140static void *unpack(void *from, void *to) 141{ 142 struct logfs_journal_header *jh = from; 143 void *data = from + sizeof(struct logfs_journal_header); 144 int err; 145 size_t inlen, outlen; 146 147 inlen = be16_to_cpu(jh->h_len); 148 outlen = be16_to_cpu(jh->h_datalen); 149 150 if (jh->h_compr == COMPR_NONE) 151 memcpy(to, data, inlen); 152 else { 153 err = logfs_uncompress(data, to, inlen, outlen); 154 BUG_ON(err); 155 } 156 return to; 157} 158 159static int __read_je_header(struct super_block *sb, u64 ofs, 160 struct logfs_journal_header *jh) 161{ 162 struct logfs_super *super = logfs_super(sb); 163 size_t bufsize = max_t(size_t, sb->s_blocksize, super->s_writesize) 164 + MAX_JOURNAL_HEADER; 165 u16 type, len, datalen; 166 int err; 167 168 /* read header only */ 169 err = wbuf_read(sb, ofs, sizeof(*jh), jh); 170 if (err) 171 return err; 172 type = be16_to_cpu(jh->h_type); 173 len = be16_to_cpu(jh->h_len); 174 datalen = be16_to_cpu(jh->h_datalen); 175 if (len > sb->s_blocksize) 176 return -EIO; 177 if ((type < JE_FIRST) || (type > JE_LAST)) 178 return -EIO; 179 if (datalen > bufsize) 180 return -EIO; 181 return 0; 182} 183 184static int __read_je_payload(struct super_block *sb, u64 ofs, 185 struct logfs_journal_header *jh) 186{ 187 u16 len; 188 int err; 189 190 len = be16_to_cpu(jh->h_len); 191 err = wbuf_read(sb, ofs + sizeof(*jh), len, jh + 1); 192 if (err) 193 return err; 194 if (jh->h_crc != logfs_crc32(jh, len + sizeof(*jh), 4)) { 195 /* Old code was confused. It forgot about the header length 196 * and stopped calculating the crc 16 bytes before the end 197 * of data - ick! 198 * FIXME: Remove this hack once the old code is fixed. 199 */ 200 if (jh->h_crc == logfs_crc32(jh, len, 4)) 201 WARN_ON_ONCE(1); 202 else 203 return -EIO; 204 } 205 return 0; 206} 207 208/* 209 * jh needs to be large enough to hold the complete entry, not just the header 210 */ 211static int __read_je(struct super_block *sb, u64 ofs, 212 struct logfs_journal_header *jh) 213{ 214 int err; 215 216 err = __read_je_header(sb, ofs, jh); 217 if (err) 218 return err; 219 return __read_je_payload(sb, ofs, jh); 220} 221 222static int read_je(struct super_block *sb, u64 ofs) 223{ 224 struct logfs_super *super = logfs_super(sb); 225 struct logfs_journal_header *jh = super->s_compressed_je; 226 void *scratch = super->s_je; 227 u16 type, datalen; 228 int err; 229 230 err = __read_je(sb, ofs, jh); 231 if (err) 232 return err; 233 type = be16_to_cpu(jh->h_type); 234 datalen = be16_to_cpu(jh->h_datalen); 235 236 switch (type) { 237 case JE_DYNSB: 238 read_dynsb(sb, unpack(jh, scratch)); 239 break; 240 case JE_ANCHOR: 241 read_anchor(sb, unpack(jh, scratch)); 242 break; 243 case JE_ERASECOUNT: 244 read_erasecount(sb, unpack(jh, scratch)); 245 break; 246 case JE_AREA: 247 err = read_area(sb, unpack(jh, scratch)); 248 break; 249 case JE_OBJ_ALIAS: 250 err = logfs_load_object_aliases(sb, unpack(jh, scratch), 251 datalen); 252 break; 253 default: 254 WARN_ON_ONCE(1); 255 return -EIO; 256 } 257 return err; 258} 259 260static int logfs_read_segment(struct super_block *sb, u32 segno) 261{ 262 struct logfs_super *super = logfs_super(sb); 263 struct logfs_journal_header *jh = super->s_compressed_je; 264 u64 ofs, seg_ofs = dev_ofs(sb, segno, 0); 265 u32 h_ofs, last_ofs = 0; 266 u16 len, datalen, last_len = 0; 267 int i, err; 268 269 /* search for most recent commit */ 270 for (h_ofs = 0; h_ofs < super->s_segsize; h_ofs += sizeof(*jh)) { 271 ofs = seg_ofs + h_ofs; 272 err = __read_je_header(sb, ofs, jh); 273 if (err) 274 continue; 275 if (jh->h_type != cpu_to_be16(JE_COMMIT)) 276 continue; 277 err = __read_je_payload(sb, ofs, jh); 278 if (err) 279 continue; 280 len = be16_to_cpu(jh->h_len); 281 datalen = be16_to_cpu(jh->h_datalen); 282 if ((datalen > sizeof(super->s_je_array)) || 283 (datalen % sizeof(__be64))) 284 continue; 285 last_ofs = h_ofs; 286 last_len = datalen; 287 h_ofs += ALIGN(len, sizeof(*jh)) - sizeof(*jh); 288 } 289 /* read commit */ 290 if (last_ofs == 0) 291 return -ENOENT; 292 ofs = seg_ofs + last_ofs; 293 log_journal("Read commit from %llx\n", ofs); 294 err = __read_je(sb, ofs, jh); 295 BUG_ON(err); /* We should have caught it in the scan loop already */ 296 if (err) 297 return err; 298 /* uncompress */ 299 unpack(jh, super->s_je_array); 300 super->s_no_je = last_len / sizeof(__be64); 301 /* iterate over array */ 302 for (i = 0; i < super->s_no_je; i++) { 303 err = read_je(sb, be64_to_cpu(super->s_je_array[i])); 304 if (err) 305 return err; 306 } 307 super->s_journal_area->a_segno = segno; 308 return 0; 309} 310 311static u64 read_gec(struct super_block *sb, u32 segno) 312{ 313 struct logfs_segment_header sh; 314 __be32 crc; 315 int err; 316 317 if (!segno) 318 return 0; 319 err = wbuf_read(sb, dev_ofs(sb, segno, 0), sizeof(sh), &sh); 320 if (err) 321 return 0; 322 crc = logfs_crc32(&sh, sizeof(sh), 4); 323 if (crc != sh.crc) { 324 WARN_ON(sh.gec != cpu_to_be64(0xffffffffffffffffull)); 325 /* Most likely it was just erased */ 326 return 0; 327 } 328 return be64_to_cpu(sh.gec); 329} 330 331static int logfs_read_journal(struct super_block *sb) 332{ 333 struct logfs_super *super = logfs_super(sb); 334 u64 gec[LOGFS_JOURNAL_SEGS], max; 335 u32 segno; 336 int i, max_i; 337 338 max = 0; 339 max_i = -1; 340 journal_for_each(i) { 341 segno = super->s_journal_seg[i]; 342 gec[i] = read_gec(sb, super->s_journal_seg[i]); 343 if (gec[i] > max) { 344 max = gec[i]; 345 max_i = i; 346 } 347 } 348 if (max_i == -1) 349 return -EIO; 350 /* FIXME: Try older segments in case of error */ 351 return logfs_read_segment(sb, super->s_journal_seg[max_i]); 352} 353 354/* 355 * First search the current segment (outer loop), then pick the next segment 356 * in the array, skipping any zero entries (inner loop). 357 */ 358static void journal_get_free_segment(struct logfs_area *area) 359{ 360 struct logfs_super *super = logfs_super(area->a_sb); 361 int i; 362 363 journal_for_each(i) { 364 if (area->a_segno != super->s_journal_seg[i]) 365 continue; 366 367 do { 368 i++; 369 if (i == LOGFS_JOURNAL_SEGS) 370 i = 0; 371 } while (!super->s_journal_seg[i]); 372 373 area->a_segno = super->s_journal_seg[i]; 374 area->a_erase_count = ++(super->s_journal_ec[i]); 375 log_journal("Journal now at %x (ec %x)\n", area->a_segno, 376 area->a_erase_count); 377 return; 378 } 379 BUG(); 380} 381 382static void journal_get_erase_count(struct logfs_area *area) 383{ 384 /* erase count is stored globally and incremented in 385 * journal_get_free_segment() - nothing to do here */ 386} 387 388static int journal_erase_segment(struct logfs_area *area) 389{ 390 struct super_block *sb = area->a_sb; 391 union { 392 struct logfs_segment_header sh; 393 unsigned char c[ALIGN(sizeof(struct logfs_segment_header), 16)]; 394 } u; 395 u64 ofs; 396 int err; 397 398 err = logfs_erase_segment(sb, area->a_segno, 1); 399 if (err) 400 return err; 401 402 memset(&u, 0, sizeof(u)); 403 u.sh.pad = 0; 404 u.sh.type = SEG_JOURNAL; 405 u.sh.level = 0; 406 u.sh.segno = cpu_to_be32(area->a_segno); 407 u.sh.ec = cpu_to_be32(area->a_erase_count); 408 u.sh.gec = cpu_to_be64(logfs_super(sb)->s_gec); 409 u.sh.crc = logfs_crc32(&u.sh, sizeof(u.sh), 4); 410 411 /* This causes a bug in segment.c. Not yet. */ 412 //logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, 0); 413 414 ofs = dev_ofs(sb, area->a_segno, 0); 415 area->a_used_bytes = sizeof(u); 416 logfs_buf_write(area, ofs, &u, sizeof(u)); 417 return 0; 418} 419 420static size_t __logfs_write_header(struct logfs_super *super, 421 struct logfs_journal_header *jh, size_t len, size_t datalen, 422 u16 type, u8 compr) 423{ 424 jh->h_len = cpu_to_be16(len); 425 jh->h_type = cpu_to_be16(type); 426 jh->h_datalen = cpu_to_be16(datalen); 427 jh->h_compr = compr; 428 jh->h_pad[0] = 'H'; 429 jh->h_pad[1] = 'E'; 430 jh->h_pad[2] = 'A'; 431 jh->h_pad[3] = 'D'; 432 jh->h_pad[4] = 'R'; 433 jh->h_crc = logfs_crc32(jh, len + sizeof(*jh), 4); 434 return ALIGN(len, 16) + sizeof(*jh); 435} 436 437static size_t logfs_write_header(struct logfs_super *super, 438 struct logfs_journal_header *jh, size_t datalen, u16 type) 439{ 440 size_t len = datalen; 441 442 return __logfs_write_header(super, jh, len, datalen, type, COMPR_NONE); 443} 444 445static inline size_t logfs_journal_erasecount_size(struct logfs_super *super) 446{ 447 return LOGFS_JOURNAL_SEGS * sizeof(__be32); 448} 449 450static void *logfs_write_erasecount(struct super_block *sb, void *_ec, 451 u16 *type, size_t *len) 452{ 453 struct logfs_super *super = logfs_super(sb); 454 struct logfs_je_journal_ec *ec = _ec; 455 int i; 456 457 journal_for_each(i) 458 ec->ec[i] = cpu_to_be32(super->s_journal_ec[i]); 459 *type = JE_ERASECOUNT; 460 *len = logfs_journal_erasecount_size(super); 461 return ec; 462} 463 464static void account_shadow(void *_shadow, unsigned long _sb, u64 ignore, 465 size_t ignore2) 466{ 467 struct logfs_shadow *shadow = _shadow; 468 struct super_block *sb = (void *)_sb; 469 struct logfs_super *super = logfs_super(sb); 470 471 /* consume new space */ 472 super->s_free_bytes -= shadow->new_len; 473 super->s_used_bytes += shadow->new_len; 474 super->s_dirty_used_bytes -= shadow->new_len; 475 476 /* free up old space */ 477 super->s_free_bytes += shadow->old_len; 478 super->s_used_bytes -= shadow->old_len; 479 super->s_dirty_free_bytes -= shadow->old_len; 480 481 logfs_set_segment_used(sb, shadow->old_ofs, -shadow->old_len); 482 logfs_set_segment_used(sb, shadow->new_ofs, shadow->new_len); 483 484 log_journal("account_shadow(%llx, %llx, %x) %llx->%llx %x->%x\n", 485 shadow->ino, shadow->bix, shadow->gc_level, 486 shadow->old_ofs, shadow->new_ofs, 487 shadow->old_len, shadow->new_len); 488 mempool_free(shadow, super->s_shadow_pool); 489} 490 491static void account_shadows(struct super_block *sb) 492{ 493 struct logfs_super *super = logfs_super(sb); 494 struct inode *inode = super->s_master_inode; 495 struct logfs_inode *li = logfs_inode(inode); 496 struct shadow_tree *tree = &super->s_shadow_tree; 497 498 btree_grim_visitor64(&tree->new, (unsigned long)sb, account_shadow); 499 btree_grim_visitor64(&tree->old, (unsigned long)sb, account_shadow); 500 btree_grim_visitor32(&tree->segment_map, 0, NULL); 501 tree->no_shadowed_segments = 0; 502 503 if (li->li_block) { 504 /* 505 * We never actually use the structure, when attached to the 506 * master inode. But it is easier to always free it here than 507 * to have checks in several places elsewhere when allocating 508 * it. 509 */ 510 li->li_block->ops->free_block(sb, li->li_block); 511 } 512 BUG_ON((s64)li->li_used_bytes < 0); 513} 514 515static void *__logfs_write_anchor(struct super_block *sb, void *_da, 516 u16 *type, size_t *len) 517{ 518 struct logfs_super *super = logfs_super(sb); 519 struct logfs_je_anchor *da = _da; 520 struct inode *inode = super->s_master_inode; 521 struct logfs_inode *li = logfs_inode(inode); 522 int i; 523 524 da->da_height = li->li_height; 525 da->da_last_ino = cpu_to_be64(super->s_last_ino); 526 da->da_size = cpu_to_be64(i_size_read(inode)); 527 da->da_used_bytes = cpu_to_be64(li->li_used_bytes); 528 for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++) 529 da->da_data[i] = cpu_to_be64(li->li_data[i]); 530 *type = JE_ANCHOR; 531 *len = sizeof(*da); 532 return da; 533} 534 535static void *logfs_write_dynsb(struct super_block *sb, void *_dynsb, 536 u16 *type, size_t *len) 537{ 538 struct logfs_super *super = logfs_super(sb); 539 struct logfs_je_dynsb *dynsb = _dynsb; 540 541 dynsb->ds_gec = cpu_to_be64(super->s_gec); 542 dynsb->ds_sweeper = cpu_to_be64(super->s_sweeper); 543 dynsb->ds_victim_ino = cpu_to_be64(super->s_victim_ino); 544 dynsb->ds_rename_dir = cpu_to_be64(super->s_rename_dir); 545 dynsb->ds_rename_pos = cpu_to_be64(super->s_rename_pos); 546 dynsb->ds_used_bytes = cpu_to_be64(super->s_used_bytes); 547 dynsb->ds_generation = cpu_to_be32(super->s_generation); 548 *type = JE_DYNSB; 549 *len = sizeof(*dynsb); 550 return dynsb; 551} 552 553static void write_wbuf(struct super_block *sb, struct logfs_area *area, 554 void *wbuf) 555{ 556 struct logfs_super *super = logfs_super(sb); 557 struct address_space *mapping = super->s_mapping_inode->i_mapping; 558 u64 ofs; 559 pgoff_t index; 560 int page_ofs; 561 struct page *page; 562 563 ofs = dev_ofs(sb, area->a_segno, 564 area->a_used_bytes & ~(super->s_writesize - 1)); 565 index = ofs >> PAGE_SHIFT; 566 page_ofs = ofs & (PAGE_SIZE - 1); 567 568 page = find_or_create_page(mapping, index, GFP_NOFS); 569 BUG_ON(!page); 570 memcpy(wbuf, page_address(page) + page_ofs, super->s_writesize); 571 unlock_page(page); 572} 573 574static void *logfs_write_area(struct super_block *sb, void *_a, 575 u16 *type, size_t *len) 576{ 577 struct logfs_super *super = logfs_super(sb); 578 struct logfs_area *area = super->s_area[super->s_sum_index]; 579 struct logfs_je_area *a = _a; 580 581 a->vim = VIM_DEFAULT; 582 a->gc_level = super->s_sum_index; 583 a->used_bytes = cpu_to_be32(area->a_used_bytes); 584 a->segno = cpu_to_be32(area->a_segno); 585 if (super->s_writesize > 1) 586 write_wbuf(sb, area, a + 1); 587 588 *type = JE_AREA; 589 *len = sizeof(*a) + super->s_writesize; 590 return a; 591} 592 593static void *logfs_write_commit(struct super_block *sb, void *h, 594 u16 *type, size_t *len) 595{ 596 struct logfs_super *super = logfs_super(sb); 597 598 *type = JE_COMMIT; 599 *len = super->s_no_je * sizeof(__be64); 600 return super->s_je_array; 601} 602 603static size_t __logfs_write_je(struct super_block *sb, void *buf, u16 type, 604 size_t len) 605{ 606 struct logfs_super *super = logfs_super(sb); 607 void *header = super->s_compressed_je; 608 void *data = header + sizeof(struct logfs_journal_header); 609 ssize_t compr_len, pad_len; 610 u8 compr = COMPR_ZLIB; 611 612 if (len == 0) 613 return logfs_write_header(super, header, 0, type); 614 615 compr_len = logfs_compress(buf, data, len, sb->s_blocksize); 616 if (compr_len < 0 || type == JE_ANCHOR) { 617 memcpy(data, buf, len); 618 compr_len = len; 619 compr = COMPR_NONE; 620 } 621 622 pad_len = ALIGN(compr_len, 16); 623 memset(data + compr_len, 0, pad_len - compr_len); 624 625 return __logfs_write_header(super, header, compr_len, len, type, compr); 626} 627 628static s64 logfs_get_free_bytes(struct logfs_area *area, size_t *bytes, 629 int must_pad) 630{ 631 u32 writesize = logfs_super(area->a_sb)->s_writesize; 632 s32 ofs; 633 int ret; 634 635 ret = logfs_open_area(area, *bytes); 636 if (ret) 637 return -EAGAIN; 638 639 ofs = area->a_used_bytes; 640 area->a_used_bytes += *bytes; 641 642 if (must_pad) { 643 area->a_used_bytes = ALIGN(area->a_used_bytes, writesize); 644 *bytes = area->a_used_bytes - ofs; 645 } 646 647 return dev_ofs(area->a_sb, area->a_segno, ofs); 648} 649 650static int logfs_write_je_buf(struct super_block *sb, void *buf, u16 type, 651 size_t buf_len) 652{ 653 struct logfs_super *super = logfs_super(sb); 654 struct logfs_area *area = super->s_journal_area; 655 struct logfs_journal_header *jh = super->s_compressed_je; 656 size_t len; 657 int must_pad = 0; 658 s64 ofs; 659 660 len = __logfs_write_je(sb, buf, type, buf_len); 661 if (jh->h_type == cpu_to_be16(JE_COMMIT)) 662 must_pad = 1; 663 664 ofs = logfs_get_free_bytes(area, &len, must_pad); 665 if (ofs < 0) 666 return ofs; 667 logfs_buf_write(area, ofs, super->s_compressed_je, len); 668 BUG_ON(super->s_no_je >= MAX_JOURNAL_ENTRIES); 669 super->s_je_array[super->s_no_je++] = cpu_to_be64(ofs); 670 return 0; 671} 672 673static int logfs_write_je(struct super_block *sb, 674 void* (*write)(struct super_block *sb, void *scratch, 675 u16 *type, size_t *len)) 676{ 677 void *buf; 678 size_t len; 679 u16 type; 680 681 buf = write(sb, logfs_super(sb)->s_je, &type, &len); 682 return logfs_write_je_buf(sb, buf, type, len); 683} 684 685int write_alias_journal(struct super_block *sb, u64 ino, u64 bix, 686 level_t level, int child_no, __be64 val) 687{ 688 struct logfs_super *super = logfs_super(sb); 689 struct logfs_obj_alias *oa = super->s_je; 690 int err = 0, fill = super->s_je_fill; 691 692 log_aliases("logfs_write_obj_aliases #%x(%llx, %llx, %x, %x) %llx\n", 693 fill, ino, bix, level, child_no, be64_to_cpu(val)); 694 oa[fill].ino = cpu_to_be64(ino); 695 oa[fill].bix = cpu_to_be64(bix); 696 oa[fill].val = val; 697 oa[fill].level = (__force u8)level; 698 oa[fill].child_no = cpu_to_be16(child_no); 699 fill++; 700 if (fill >= sb->s_blocksize / sizeof(*oa)) { 701 err = logfs_write_je_buf(sb, oa, JE_OBJ_ALIAS, sb->s_blocksize); 702 fill = 0; 703 } 704 705 super->s_je_fill = fill; 706 return err; 707} 708 709static int logfs_write_obj_aliases(struct super_block *sb) 710{ 711 struct logfs_super *super = logfs_super(sb); 712 int err; 713 714 log_journal("logfs_write_obj_aliases: %d aliases to write\n", 715 super->s_no_object_aliases); 716 super->s_je_fill = 0; 717 err = logfs_write_obj_aliases_pagecache(sb); 718 if (err) 719 return err; 720 721 if (super->s_je_fill) 722 err = logfs_write_je_buf(sb, super->s_je, JE_OBJ_ALIAS, 723 super->s_je_fill 724 * sizeof(struct logfs_obj_alias)); 725 return err; 726} 727 728/* 729 * Write all journal entries. The goto logic ensures that all journal entries 730 * are written whenever a new segment is used. It is ugly and potentially a 731 * bit wasteful, but robustness is more important. With this we can *always* 732 * erase all journal segments except the one containing the most recent commit. 733 */ 734void logfs_write_anchor(struct super_block *sb) 735{ 736 struct logfs_super *super = logfs_super(sb); 737 struct logfs_area *area = super->s_journal_area; 738 int i, err; 739 740 if (!(super->s_flags & LOGFS_SB_FLAG_DIRTY)) 741 return; 742 super->s_flags &= ~LOGFS_SB_FLAG_DIRTY; 743 744 BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN); 745 mutex_lock(&super->s_journal_mutex); 746 747 /* Do this first or suffer corruption */ 748 logfs_sync_segments(sb); 749 account_shadows(sb); 750 751again: 752 super->s_no_je = 0; 753 for_each_area(i) { 754 if (!super->s_area[i]->a_is_open) 755 continue; 756 super->s_sum_index = i; 757 err = logfs_write_je(sb, logfs_write_area); 758 if (err) 759 goto again; 760 } 761 err = logfs_write_obj_aliases(sb); 762 if (err) 763 goto again; 764 err = logfs_write_je(sb, logfs_write_erasecount); 765 if (err) 766 goto again; 767 err = logfs_write_je(sb, __logfs_write_anchor); 768 if (err) 769 goto again; 770 err = logfs_write_je(sb, logfs_write_dynsb); 771 if (err) 772 goto again; 773 /* 774 * Order is imperative. First we sync all writes, including the 775 * non-committed journal writes. Then we write the final commit and 776 * sync the current journal segment. 777 * There is a theoretical bug here. Syncing the journal segment will 778 * write a number of journal entries and the final commit. All these 779 * are written in a single operation. If the device layer writes the 780 * data back-to-front, the commit will precede the other journal 781 * entries, leaving a race window. 782 * Two fixes are possible. Preferred is to fix the device layer to 783 * ensure writes happen front-to-back. Alternatively we can insert 784 * another logfs_sync_area() super->s_devops->sync() combo before 785 * writing the commit. 786 */ 787 /* 788 * On another subject, super->s_devops->sync is usually not necessary. 789 * Unless called from sys_sync or friends, a barrier would suffice. 790 */ 791 super->s_devops->sync(sb); 792 err = logfs_write_je(sb, logfs_write_commit); 793 if (err) 794 goto again; 795 log_journal("Write commit to %llx\n", 796 be64_to_cpu(super->s_je_array[super->s_no_je - 1])); 797 logfs_sync_area(area); 798 BUG_ON(area->a_used_bytes != area->a_written_bytes); 799 super->s_devops->sync(sb); 800 801 mutex_unlock(&super->s_journal_mutex); 802 return; 803} 804 805void do_logfs_journal_wl_pass(struct super_block *sb) 806{ 807 struct logfs_super *super = logfs_super(sb); 808 struct logfs_area *area = super->s_journal_area; 809 struct btree_head32 *head = &super->s_reserved_segments; 810 u32 segno, ec; 811 int i, err; 812 813 log_journal("Journal requires wear-leveling.\n"); 814 /* Drop old segments */ 815 journal_for_each(i) 816 if (super->s_journal_seg[i]) { 817 btree_remove32(head, super->s_journal_seg[i]); 818 logfs_set_segment_unreserved(sb, 819 super->s_journal_seg[i], 820 super->s_journal_ec[i]); 821 super->s_journal_seg[i] = 0; 822 super->s_journal_ec[i] = 0; 823 } 824 /* Get new segments */ 825 for (i = 0; i < super->s_no_journal_segs; i++) { 826 segno = get_best_cand(sb, &super->s_reserve_list, &ec); 827 super->s_journal_seg[i] = segno; 828 super->s_journal_ec[i] = ec; 829 logfs_set_segment_reserved(sb, segno); 830 err = btree_insert32(head, segno, (void *)1, GFP_NOFS); 831 BUG_ON(err); /* mempool should prevent this */ 832 err = logfs_erase_segment(sb, segno, 1); 833 BUG_ON(err); /* FIXME: remount-ro would be nicer */ 834 } 835 /* Manually move journal_area */ 836 freeseg(sb, area->a_segno); 837 area->a_segno = super->s_journal_seg[0]; 838 area->a_is_open = 0; 839 area->a_used_bytes = 0; 840 /* Write journal */ 841 logfs_write_anchor(sb); 842 /* Write superblocks */ 843 err = logfs_write_sb(sb); 844 BUG_ON(err); 845} 846 847static const struct logfs_area_ops journal_area_ops = { 848 .get_free_segment = journal_get_free_segment, 849 .get_erase_count = journal_get_erase_count, 850 .erase_segment = journal_erase_segment, 851}; 852 853int logfs_init_journal(struct super_block *sb) 854{ 855 struct logfs_super *super = logfs_super(sb); 856 size_t bufsize = max_t(size_t, sb->s_blocksize, super->s_writesize) 857 + MAX_JOURNAL_HEADER; 858 int ret = -ENOMEM; 859 860 mutex_init(&super->s_journal_mutex); 861 btree_init_mempool32(&super->s_reserved_segments, super->s_btree_pool); 862 863 super->s_je = kzalloc(bufsize, GFP_KERNEL); 864 if (!super->s_je) 865 return ret; 866 867 super->s_compressed_je = kzalloc(bufsize, GFP_KERNEL); 868 if (!super->s_compressed_je) 869 return ret; 870 871 super->s_master_inode = logfs_new_meta_inode(sb, LOGFS_INO_MASTER); 872 if (IS_ERR(super->s_master_inode)) 873 return PTR_ERR(super->s_master_inode); 874 875 ret = logfs_read_journal(sb); 876 if (ret) 877 return -EIO; 878 879 reserve_sb_and_journal(sb); 880 logfs_calc_free(sb); 881 882 super->s_journal_area->a_ops = &journal_area_ops; 883 return 0; 884} 885 886void logfs_cleanup_journal(struct super_block *sb) 887{ 888 struct logfs_super *super = logfs_super(sb); 889 890 btree_grim_visitor32(&super->s_reserved_segments, 0, NULL); 891 892 kfree(super->s_compressed_je); 893 kfree(super->s_je); 894} 895