1#include "dm.h" 2#include "persistent-data/dm-transaction-manager.h" 3#include "persistent-data/dm-bitset.h" 4#include "persistent-data/dm-space-map.h" 5 6#include <linux/dm-io.h> 7#include <linux/dm-kcopyd.h> 8#include <linux/init.h> 9#include <linux/mempool.h> 10#include <linux/module.h> 11#include <linux/slab.h> 12#include <linux/vmalloc.h> 13 14#define DM_MSG_PREFIX "era" 15 16#define SUPERBLOCK_LOCATION 0 17#define SUPERBLOCK_MAGIC 2126579579 18#define SUPERBLOCK_CSUM_XOR 146538381 19#define MIN_ERA_VERSION 1 20#define MAX_ERA_VERSION 1 21#define INVALID_WRITESET_ROOT SUPERBLOCK_LOCATION 22#define MIN_BLOCK_SIZE 8 23 24/*---------------------------------------------------------------- 25 * Writeset 26 *--------------------------------------------------------------*/ 27struct writeset_metadata { 28 uint32_t nr_bits; 29 dm_block_t root; 30}; 31 32struct writeset { 33 struct writeset_metadata md; 34 35 /* 36 * An in core copy of the bits to save constantly doing look ups on 37 * disk. 38 */ 39 unsigned long *bits; 40}; 41 42/* 43 * This does not free off the on disk bitset as this will normally be done 44 * after digesting into the era array. 45 */ 46static void writeset_free(struct writeset *ws) 47{ 48 vfree(ws->bits); 49} 50 51static int setup_on_disk_bitset(struct dm_disk_bitset *info, 52 unsigned nr_bits, dm_block_t *root) 53{ 54 int r; 55 56 r = dm_bitset_empty(info, root); 57 if (r) 58 return r; 59 60 return dm_bitset_resize(info, *root, 0, nr_bits, false, root); 61} 62 63static size_t bitset_size(unsigned nr_bits) 64{ 65 return sizeof(unsigned long) * dm_div_up(nr_bits, BITS_PER_LONG); 66} 67 68/* 69 * Allocates memory for the in core bitset. 70 */ 71static int writeset_alloc(struct writeset *ws, dm_block_t nr_blocks) 72{ 73 ws->md.nr_bits = nr_blocks; 74 ws->md.root = INVALID_WRITESET_ROOT; 75 ws->bits = vzalloc(bitset_size(nr_blocks)); 76 if (!ws->bits) { 77 DMERR("%s: couldn't allocate in memory bitset", __func__); 78 return -ENOMEM; 79 } 80 81 return 0; 82} 83 84/* 85 * Wipes the in-core bitset, and creates a new on disk bitset. 86 */ 87static int writeset_init(struct dm_disk_bitset *info, struct writeset *ws) 88{ 89 int r; 90 91 memset(ws->bits, 0, bitset_size(ws->md.nr_bits)); 92 93 r = setup_on_disk_bitset(info, ws->md.nr_bits, &ws->md.root); 94 if (r) { 95 DMERR("%s: setup_on_disk_bitset failed", __func__); 96 return r; 97 } 98 99 return 0; 100} 101 102static bool writeset_marked(struct writeset *ws, dm_block_t block) 103{ 104 return test_bit(block, ws->bits); 105} 106 107static int writeset_marked_on_disk(struct dm_disk_bitset *info, 108 struct writeset_metadata *m, dm_block_t block, 109 bool *result) 110{ 111 dm_block_t old = m->root; 112 113 /* 114 * The bitset was flushed when it was archived, so we know there'll 115 * be no change to the root. 116 */ 117 int r = dm_bitset_test_bit(info, m->root, block, &m->root, result); 118 if (r) { 119 DMERR("%s: dm_bitset_test_bit failed", __func__); 120 return r; 121 } 122 123 BUG_ON(m->root != old); 124 125 return r; 126} 127 128/* 129 * Returns < 0 on error, 0 if the bit wasn't previously set, 1 if it was. 130 */ 131static int writeset_test_and_set(struct dm_disk_bitset *info, 132 struct writeset *ws, uint32_t block) 133{ 134 int r; 135 136 if (!test_and_set_bit(block, ws->bits)) { 137 r = dm_bitset_set_bit(info, ws->md.root, block, &ws->md.root); 138 if (r) { 139 /* FIXME: fail mode */ 140 return r; 141 } 142 143 return 0; 144 } 145 146 return 1; 147} 148 149/*---------------------------------------------------------------- 150 * On disk metadata layout 151 *--------------------------------------------------------------*/ 152#define SPACE_MAP_ROOT_SIZE 128 153#define UUID_LEN 16 154 155struct writeset_disk { 156 __le32 nr_bits; 157 __le64 root; 158} __packed; 159 160struct superblock_disk { 161 __le32 csum; 162 __le32 flags; 163 __le64 blocknr; 164 165 __u8 uuid[UUID_LEN]; 166 __le64 magic; 167 __le32 version; 168 169 __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; 170 171 __le32 data_block_size; 172 __le32 metadata_block_size; 173 __le32 nr_blocks; 174 175 __le32 current_era; 176 struct writeset_disk current_writeset; 177 178 /* 179 * Only these two fields are valid within the metadata snapshot. 180 */ 181 __le64 writeset_tree_root; 182 __le64 era_array_root; 183 184 __le64 metadata_snap; 185} __packed; 186 187/*---------------------------------------------------------------- 188 * Superblock validation 189 *--------------------------------------------------------------*/ 190static void sb_prepare_for_write(struct dm_block_validator *v, 191 struct dm_block *b, 192 size_t sb_block_size) 193{ 194 struct superblock_disk *disk = dm_block_data(b); 195 196 disk->blocknr = cpu_to_le64(dm_block_location(b)); 197 disk->csum = cpu_to_le32(dm_bm_checksum(&disk->flags, 198 sb_block_size - sizeof(__le32), 199 SUPERBLOCK_CSUM_XOR)); 200} 201 202static int check_metadata_version(struct superblock_disk *disk) 203{ 204 uint32_t metadata_version = le32_to_cpu(disk->version); 205 if (metadata_version < MIN_ERA_VERSION || metadata_version > MAX_ERA_VERSION) { 206 DMERR("Era metadata version %u found, but only versions between %u and %u supported.", 207 metadata_version, MIN_ERA_VERSION, MAX_ERA_VERSION); 208 return -EINVAL; 209 } 210 211 return 0; 212} 213 214static int sb_check(struct dm_block_validator *v, 215 struct dm_block *b, 216 size_t sb_block_size) 217{ 218 struct superblock_disk *disk = dm_block_data(b); 219 __le32 csum_le; 220 221 if (dm_block_location(b) != le64_to_cpu(disk->blocknr)) { 222 DMERR("sb_check failed: blocknr %llu: wanted %llu", 223 le64_to_cpu(disk->blocknr), 224 (unsigned long long)dm_block_location(b)); 225 return -ENOTBLK; 226 } 227 228 if (le64_to_cpu(disk->magic) != SUPERBLOCK_MAGIC) { 229 DMERR("sb_check failed: magic %llu: wanted %llu", 230 le64_to_cpu(disk->magic), 231 (unsigned long long) SUPERBLOCK_MAGIC); 232 return -EILSEQ; 233 } 234 235 csum_le = cpu_to_le32(dm_bm_checksum(&disk->flags, 236 sb_block_size - sizeof(__le32), 237 SUPERBLOCK_CSUM_XOR)); 238 if (csum_le != disk->csum) { 239 DMERR("sb_check failed: csum %u: wanted %u", 240 le32_to_cpu(csum_le), le32_to_cpu(disk->csum)); 241 return -EILSEQ; 242 } 243 244 return check_metadata_version(disk); 245} 246 247static struct dm_block_validator sb_validator = { 248 .name = "superblock", 249 .prepare_for_write = sb_prepare_for_write, 250 .check = sb_check 251}; 252 253/*---------------------------------------------------------------- 254 * Low level metadata handling 255 *--------------------------------------------------------------*/ 256#define DM_ERA_METADATA_BLOCK_SIZE 4096 257#define DM_ERA_METADATA_CACHE_SIZE 64 258#define ERA_MAX_CONCURRENT_LOCKS 5 259 260struct era_metadata { 261 struct block_device *bdev; 262 struct dm_block_manager *bm; 263 struct dm_space_map *sm; 264 struct dm_transaction_manager *tm; 265 266 dm_block_t block_size; 267 uint32_t nr_blocks; 268 269 uint32_t current_era; 270 271 /* 272 * We preallocate 2 writesets. When an era rolls over we 273 * switch between them. This means the allocation is done at 274 * preresume time, rather than on the io path. 275 */ 276 struct writeset writesets[2]; 277 struct writeset *current_writeset; 278 279 dm_block_t writeset_tree_root; 280 dm_block_t era_array_root; 281 282 struct dm_disk_bitset bitset_info; 283 struct dm_btree_info writeset_tree_info; 284 struct dm_array_info era_array_info; 285 286 dm_block_t metadata_snap; 287 288 /* 289 * A flag that is set whenever a writeset has been archived. 290 */ 291 bool archived_writesets; 292 293 /* 294 * Reading the space map root can fail, so we read it into this 295 * buffer before the superblock is locked and updated. 296 */ 297 __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; 298}; 299 300static int superblock_read_lock(struct era_metadata *md, 301 struct dm_block **sblock) 302{ 303 return dm_bm_read_lock(md->bm, SUPERBLOCK_LOCATION, 304 &sb_validator, sblock); 305} 306 307static int superblock_lock_zero(struct era_metadata *md, 308 struct dm_block **sblock) 309{ 310 return dm_bm_write_lock_zero(md->bm, SUPERBLOCK_LOCATION, 311 &sb_validator, sblock); 312} 313 314static int superblock_lock(struct era_metadata *md, 315 struct dm_block **sblock) 316{ 317 return dm_bm_write_lock(md->bm, SUPERBLOCK_LOCATION, 318 &sb_validator, sblock); 319} 320 321/* FIXME: duplication with cache and thin */ 322static int superblock_all_zeroes(struct dm_block_manager *bm, bool *result) 323{ 324 int r; 325 unsigned i; 326 struct dm_block *b; 327 __le64 *data_le, zero = cpu_to_le64(0); 328 unsigned sb_block_size = dm_bm_block_size(bm) / sizeof(__le64); 329 330 /* 331 * We can't use a validator here - it may be all zeroes. 332 */ 333 r = dm_bm_read_lock(bm, SUPERBLOCK_LOCATION, NULL, &b); 334 if (r) 335 return r; 336 337 data_le = dm_block_data(b); 338 *result = true; 339 for (i = 0; i < sb_block_size; i++) { 340 if (data_le[i] != zero) { 341 *result = false; 342 break; 343 } 344 } 345 346 return dm_bm_unlock(b); 347} 348 349/*----------------------------------------------------------------*/ 350 351static void ws_pack(const struct writeset_metadata *core, struct writeset_disk *disk) 352{ 353 disk->nr_bits = cpu_to_le32(core->nr_bits); 354 disk->root = cpu_to_le64(core->root); 355} 356 357static void ws_unpack(const struct writeset_disk *disk, struct writeset_metadata *core) 358{ 359 core->nr_bits = le32_to_cpu(disk->nr_bits); 360 core->root = le64_to_cpu(disk->root); 361} 362 363static void ws_inc(void *context, const void *value) 364{ 365 struct era_metadata *md = context; 366 struct writeset_disk ws_d; 367 dm_block_t b; 368 369 memcpy(&ws_d, value, sizeof(ws_d)); 370 b = le64_to_cpu(ws_d.root); 371 372 dm_tm_inc(md->tm, b); 373} 374 375static void ws_dec(void *context, const void *value) 376{ 377 struct era_metadata *md = context; 378 struct writeset_disk ws_d; 379 dm_block_t b; 380 381 memcpy(&ws_d, value, sizeof(ws_d)); 382 b = le64_to_cpu(ws_d.root); 383 384 dm_bitset_del(&md->bitset_info, b); 385} 386 387static int ws_eq(void *context, const void *value1, const void *value2) 388{ 389 return !memcmp(value1, value2, sizeof(struct writeset_metadata)); 390} 391 392/*----------------------------------------------------------------*/ 393 394static void setup_writeset_tree_info(struct era_metadata *md) 395{ 396 struct dm_btree_value_type *vt = &md->writeset_tree_info.value_type; 397 md->writeset_tree_info.tm = md->tm; 398 md->writeset_tree_info.levels = 1; 399 vt->context = md; 400 vt->size = sizeof(struct writeset_disk); 401 vt->inc = ws_inc; 402 vt->dec = ws_dec; 403 vt->equal = ws_eq; 404} 405 406static void setup_era_array_info(struct era_metadata *md) 407 408{ 409 struct dm_btree_value_type vt; 410 vt.context = NULL; 411 vt.size = sizeof(__le32); 412 vt.inc = NULL; 413 vt.dec = NULL; 414 vt.equal = NULL; 415 416 dm_array_info_init(&md->era_array_info, md->tm, &vt); 417} 418 419static void setup_infos(struct era_metadata *md) 420{ 421 dm_disk_bitset_init(md->tm, &md->bitset_info); 422 setup_writeset_tree_info(md); 423 setup_era_array_info(md); 424} 425 426/*----------------------------------------------------------------*/ 427 428static int create_fresh_metadata(struct era_metadata *md) 429{ 430 int r; 431 432 r = dm_tm_create_with_sm(md->bm, SUPERBLOCK_LOCATION, 433 &md->tm, &md->sm); 434 if (r < 0) { 435 DMERR("dm_tm_create_with_sm failed"); 436 return r; 437 } 438 439 setup_infos(md); 440 441 r = dm_btree_empty(&md->writeset_tree_info, &md->writeset_tree_root); 442 if (r) { 443 DMERR("couldn't create new writeset tree"); 444 goto bad; 445 } 446 447 r = dm_array_empty(&md->era_array_info, &md->era_array_root); 448 if (r) { 449 DMERR("couldn't create era array"); 450 goto bad; 451 } 452 453 return 0; 454 455bad: 456 dm_sm_destroy(md->sm); 457 dm_tm_destroy(md->tm); 458 459 return r; 460} 461 462static int save_sm_root(struct era_metadata *md) 463{ 464 int r; 465 size_t metadata_len; 466 467 r = dm_sm_root_size(md->sm, &metadata_len); 468 if (r < 0) 469 return r; 470 471 return dm_sm_copy_root(md->sm, &md->metadata_space_map_root, 472 metadata_len); 473} 474 475static void copy_sm_root(struct era_metadata *md, struct superblock_disk *disk) 476{ 477 memcpy(&disk->metadata_space_map_root, 478 &md->metadata_space_map_root, 479 sizeof(md->metadata_space_map_root)); 480} 481 482/* 483 * Writes a superblock, including the static fields that don't get updated 484 * with every commit (possible optimisation here). 'md' should be fully 485 * constructed when this is called. 486 */ 487static void prepare_superblock(struct era_metadata *md, struct superblock_disk *disk) 488{ 489 disk->magic = cpu_to_le64(SUPERBLOCK_MAGIC); 490 disk->flags = cpu_to_le32(0ul); 491 492 /* FIXME: can't keep blanking the uuid (uuid is currently unused though) */ 493 memset(disk->uuid, 0, sizeof(disk->uuid)); 494 disk->version = cpu_to_le32(MAX_ERA_VERSION); 495 496 copy_sm_root(md, disk); 497 498 disk->data_block_size = cpu_to_le32(md->block_size); 499 disk->metadata_block_size = cpu_to_le32(DM_ERA_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); 500 disk->nr_blocks = cpu_to_le32(md->nr_blocks); 501 disk->current_era = cpu_to_le32(md->current_era); 502 503 ws_pack(&md->current_writeset->md, &disk->current_writeset); 504 disk->writeset_tree_root = cpu_to_le64(md->writeset_tree_root); 505 disk->era_array_root = cpu_to_le64(md->era_array_root); 506 disk->metadata_snap = cpu_to_le64(md->metadata_snap); 507} 508 509static int write_superblock(struct era_metadata *md) 510{ 511 int r; 512 struct dm_block *sblock; 513 struct superblock_disk *disk; 514 515 r = save_sm_root(md); 516 if (r) { 517 DMERR("%s: save_sm_root failed", __func__); 518 return r; 519 } 520 521 r = superblock_lock_zero(md, &sblock); 522 if (r) 523 return r; 524 525 disk = dm_block_data(sblock); 526 prepare_superblock(md, disk); 527 528 return dm_tm_commit(md->tm, sblock); 529} 530 531/* 532 * Assumes block_size and the infos are set. 533 */ 534static int format_metadata(struct era_metadata *md) 535{ 536 int r; 537 538 r = create_fresh_metadata(md); 539 if (r) 540 return r; 541 542 r = write_superblock(md); 543 if (r) { 544 dm_sm_destroy(md->sm); 545 dm_tm_destroy(md->tm); 546 return r; 547 } 548 549 return 0; 550} 551 552static int open_metadata(struct era_metadata *md) 553{ 554 int r; 555 struct dm_block *sblock; 556 struct superblock_disk *disk; 557 558 r = superblock_read_lock(md, &sblock); 559 if (r) { 560 DMERR("couldn't read_lock superblock"); 561 return r; 562 } 563 564 disk = dm_block_data(sblock); 565 r = dm_tm_open_with_sm(md->bm, SUPERBLOCK_LOCATION, 566 disk->metadata_space_map_root, 567 sizeof(disk->metadata_space_map_root), 568 &md->tm, &md->sm); 569 if (r) { 570 DMERR("dm_tm_open_with_sm failed"); 571 goto bad; 572 } 573 574 setup_infos(md); 575 576 md->block_size = le32_to_cpu(disk->data_block_size); 577 md->nr_blocks = le32_to_cpu(disk->nr_blocks); 578 md->current_era = le32_to_cpu(disk->current_era); 579 580 md->writeset_tree_root = le64_to_cpu(disk->writeset_tree_root); 581 md->era_array_root = le64_to_cpu(disk->era_array_root); 582 md->metadata_snap = le64_to_cpu(disk->metadata_snap); 583 md->archived_writesets = true; 584 585 return dm_bm_unlock(sblock); 586 587bad: 588 dm_bm_unlock(sblock); 589 return r; 590} 591 592static int open_or_format_metadata(struct era_metadata *md, 593 bool may_format) 594{ 595 int r; 596 bool unformatted = false; 597 598 r = superblock_all_zeroes(md->bm, &unformatted); 599 if (r) 600 return r; 601 602 if (unformatted) 603 return may_format ? format_metadata(md) : -EPERM; 604 605 return open_metadata(md); 606} 607 608static int create_persistent_data_objects(struct era_metadata *md, 609 bool may_format) 610{ 611 int r; 612 613 md->bm = dm_block_manager_create(md->bdev, DM_ERA_METADATA_BLOCK_SIZE, 614 DM_ERA_METADATA_CACHE_SIZE, 615 ERA_MAX_CONCURRENT_LOCKS); 616 if (IS_ERR(md->bm)) { 617 DMERR("could not create block manager"); 618 return PTR_ERR(md->bm); 619 } 620 621 r = open_or_format_metadata(md, may_format); 622 if (r) 623 dm_block_manager_destroy(md->bm); 624 625 return r; 626} 627 628static void destroy_persistent_data_objects(struct era_metadata *md) 629{ 630 dm_sm_destroy(md->sm); 631 dm_tm_destroy(md->tm); 632 dm_block_manager_destroy(md->bm); 633} 634 635/* 636 * This waits until all era_map threads have picked up the new filter. 637 */ 638static void swap_writeset(struct era_metadata *md, struct writeset *new_writeset) 639{ 640 rcu_assign_pointer(md->current_writeset, new_writeset); 641 synchronize_rcu(); 642} 643 644/*---------------------------------------------------------------- 645 * Writesets get 'digested' into the main era array. 646 * 647 * We're using a coroutine here so the worker thread can do the digestion, 648 * thus avoiding synchronisation of the metadata. Digesting a whole 649 * writeset in one go would cause too much latency. 650 *--------------------------------------------------------------*/ 651struct digest { 652 uint32_t era; 653 unsigned nr_bits, current_bit; 654 struct writeset_metadata writeset; 655 __le32 value; 656 struct dm_disk_bitset info; 657 658 int (*step)(struct era_metadata *, struct digest *); 659}; 660 661static int metadata_digest_lookup_writeset(struct era_metadata *md, 662 struct digest *d); 663 664static int metadata_digest_remove_writeset(struct era_metadata *md, 665 struct digest *d) 666{ 667 int r; 668 uint64_t key = d->era; 669 670 r = dm_btree_remove(&md->writeset_tree_info, md->writeset_tree_root, 671 &key, &md->writeset_tree_root); 672 if (r) { 673 DMERR("%s: dm_btree_remove failed", __func__); 674 return r; 675 } 676 677 d->step = metadata_digest_lookup_writeset; 678 return 0; 679} 680 681#define INSERTS_PER_STEP 100 682 683static int metadata_digest_transcribe_writeset(struct era_metadata *md, 684 struct digest *d) 685{ 686 int r; 687 bool marked; 688 unsigned b, e = min(d->current_bit + INSERTS_PER_STEP, d->nr_bits); 689 690 for (b = d->current_bit; b < e; b++) { 691 r = writeset_marked_on_disk(&d->info, &d->writeset, b, &marked); 692 if (r) { 693 DMERR("%s: writeset_marked_on_disk failed", __func__); 694 return r; 695 } 696 697 if (!marked) 698 continue; 699 700 __dm_bless_for_disk(&d->value); 701 r = dm_array_set_value(&md->era_array_info, md->era_array_root, 702 b, &d->value, &md->era_array_root); 703 if (r) { 704 DMERR("%s: dm_array_set_value failed", __func__); 705 return r; 706 } 707 } 708 709 if (b == d->nr_bits) 710 d->step = metadata_digest_remove_writeset; 711 else 712 d->current_bit = b; 713 714 return 0; 715} 716 717static int metadata_digest_lookup_writeset(struct era_metadata *md, 718 struct digest *d) 719{ 720 int r; 721 uint64_t key; 722 struct writeset_disk disk; 723 724 r = dm_btree_find_lowest_key(&md->writeset_tree_info, 725 md->writeset_tree_root, &key); 726 if (r < 0) 727 return r; 728 729 d->era = key; 730 731 r = dm_btree_lookup(&md->writeset_tree_info, 732 md->writeset_tree_root, &key, &disk); 733 if (r) { 734 if (r == -ENODATA) { 735 d->step = NULL; 736 return 0; 737 } 738 739 DMERR("%s: dm_btree_lookup failed", __func__); 740 return r; 741 } 742 743 ws_unpack(&disk, &d->writeset); 744 d->value = cpu_to_le32(key); 745 746 d->nr_bits = min(d->writeset.nr_bits, md->nr_blocks); 747 d->current_bit = 0; 748 d->step = metadata_digest_transcribe_writeset; 749 750 return 0; 751} 752 753static int metadata_digest_start(struct era_metadata *md, struct digest *d) 754{ 755 if (d->step) 756 return 0; 757 758 memset(d, 0, sizeof(*d)); 759 760 /* 761 * We initialise another bitset info to avoid any caching side 762 * effects with the previous one. 763 */ 764 dm_disk_bitset_init(md->tm, &d->info); 765 d->step = metadata_digest_lookup_writeset; 766 767 return 0; 768} 769 770/*---------------------------------------------------------------- 771 * High level metadata interface. Target methods should use these, and not 772 * the lower level ones. 773 *--------------------------------------------------------------*/ 774static struct era_metadata *metadata_open(struct block_device *bdev, 775 sector_t block_size, 776 bool may_format) 777{ 778 int r; 779 struct era_metadata *md = kzalloc(sizeof(*md), GFP_KERNEL); 780 781 if (!md) 782 return NULL; 783 784 md->bdev = bdev; 785 md->block_size = block_size; 786 787 md->writesets[0].md.root = INVALID_WRITESET_ROOT; 788 md->writesets[1].md.root = INVALID_WRITESET_ROOT; 789 md->current_writeset = &md->writesets[0]; 790 791 r = create_persistent_data_objects(md, may_format); 792 if (r) { 793 kfree(md); 794 return ERR_PTR(r); 795 } 796 797 return md; 798} 799 800static void metadata_close(struct era_metadata *md) 801{ 802 destroy_persistent_data_objects(md); 803 kfree(md); 804} 805 806static bool valid_nr_blocks(dm_block_t n) 807{ 808 /* 809 * dm_bitset restricts us to 2^32. test_bit & co. restrict us 810 * further to 2^31 - 1 811 */ 812 return n < (1ull << 31); 813} 814 815static int metadata_resize(struct era_metadata *md, void *arg) 816{ 817 int r; 818 dm_block_t *new_size = arg; 819 __le32 value; 820 821 if (!valid_nr_blocks(*new_size)) { 822 DMERR("Invalid number of origin blocks %llu", 823 (unsigned long long) *new_size); 824 return -EINVAL; 825 } 826 827 writeset_free(&md->writesets[0]); 828 writeset_free(&md->writesets[1]); 829 830 r = writeset_alloc(&md->writesets[0], *new_size); 831 if (r) { 832 DMERR("%s: writeset_alloc failed for writeset 0", __func__); 833 return r; 834 } 835 836 r = writeset_alloc(&md->writesets[1], *new_size); 837 if (r) { 838 DMERR("%s: writeset_alloc failed for writeset 1", __func__); 839 return r; 840 } 841 842 value = cpu_to_le32(0u); 843 __dm_bless_for_disk(&value); 844 r = dm_array_resize(&md->era_array_info, md->era_array_root, 845 md->nr_blocks, *new_size, 846 &value, &md->era_array_root); 847 if (r) { 848 DMERR("%s: dm_array_resize failed", __func__); 849 return r; 850 } 851 852 md->nr_blocks = *new_size; 853 return 0; 854} 855 856static int metadata_era_archive(struct era_metadata *md) 857{ 858 int r; 859 uint64_t keys[1]; 860 struct writeset_disk value; 861 862 r = dm_bitset_flush(&md->bitset_info, md->current_writeset->md.root, 863 &md->current_writeset->md.root); 864 if (r) { 865 DMERR("%s: dm_bitset_flush failed", __func__); 866 return r; 867 } 868 869 ws_pack(&md->current_writeset->md, &value); 870 md->current_writeset->md.root = INVALID_WRITESET_ROOT; 871 872 keys[0] = md->current_era; 873 __dm_bless_for_disk(&value); 874 r = dm_btree_insert(&md->writeset_tree_info, md->writeset_tree_root, 875 keys, &value, &md->writeset_tree_root); 876 if (r) { 877 DMERR("%s: couldn't insert writeset into btree", __func__); 878 /* FIXME: fail mode */ 879 return r; 880 } 881 882 md->archived_writesets = true; 883 884 return 0; 885} 886 887static struct writeset *next_writeset(struct era_metadata *md) 888{ 889 return (md->current_writeset == &md->writesets[0]) ? 890 &md->writesets[1] : &md->writesets[0]; 891} 892 893static int metadata_new_era(struct era_metadata *md) 894{ 895 int r; 896 struct writeset *new_writeset = next_writeset(md); 897 898 r = writeset_init(&md->bitset_info, new_writeset); 899 if (r) { 900 DMERR("%s: writeset_init failed", __func__); 901 return r; 902 } 903 904 swap_writeset(md, new_writeset); 905 md->current_era++; 906 907 return 0; 908} 909 910static int metadata_era_rollover(struct era_metadata *md) 911{ 912 int r; 913 914 if (md->current_writeset->md.root != INVALID_WRITESET_ROOT) { 915 r = metadata_era_archive(md); 916 if (r) { 917 DMERR("%s: metadata_archive_era failed", __func__); 918 /* FIXME: fail mode? */ 919 return r; 920 } 921 } 922 923 r = metadata_new_era(md); 924 if (r) { 925 DMERR("%s: new era failed", __func__); 926 /* FIXME: fail mode */ 927 return r; 928 } 929 930 return 0; 931} 932 933static bool metadata_current_marked(struct era_metadata *md, dm_block_t block) 934{ 935 bool r; 936 struct writeset *ws; 937 938 rcu_read_lock(); 939 ws = rcu_dereference(md->current_writeset); 940 r = writeset_marked(ws, block); 941 rcu_read_unlock(); 942 943 return r; 944} 945 946static int metadata_commit(struct era_metadata *md) 947{ 948 int r; 949 struct dm_block *sblock; 950 951 if (md->current_writeset->md.root != SUPERBLOCK_LOCATION) { 952 r = dm_bitset_flush(&md->bitset_info, md->current_writeset->md.root, 953 &md->current_writeset->md.root); 954 if (r) { 955 DMERR("%s: bitset flush failed", __func__); 956 return r; 957 } 958 } 959 960 r = save_sm_root(md); 961 if (r) { 962 DMERR("%s: save_sm_root failed", __func__); 963 return r; 964 } 965 966 r = dm_tm_pre_commit(md->tm); 967 if (r) { 968 DMERR("%s: pre commit failed", __func__); 969 return r; 970 } 971 972 r = superblock_lock(md, &sblock); 973 if (r) { 974 DMERR("%s: superblock lock failed", __func__); 975 return r; 976 } 977 978 prepare_superblock(md, dm_block_data(sblock)); 979 980 return dm_tm_commit(md->tm, sblock); 981} 982 983static int metadata_checkpoint(struct era_metadata *md) 984{ 985 /* 986 * For now we just rollover, but later I want to put a check in to 987 * avoid this if the filter is still pretty fresh. 988 */ 989 return metadata_era_rollover(md); 990} 991 992/* 993 * Metadata snapshots allow userland to access era data. 994 */ 995static int metadata_take_snap(struct era_metadata *md) 996{ 997 int r, inc; 998 struct dm_block *clone; 999 1000 if (md->metadata_snap != SUPERBLOCK_LOCATION) { 1001 DMERR("%s: metadata snapshot already exists", __func__); 1002 return -EINVAL; 1003 } 1004 1005 r = metadata_era_rollover(md); 1006 if (r) { 1007 DMERR("%s: era rollover failed", __func__); 1008 return r; 1009 } 1010 1011 r = metadata_commit(md); 1012 if (r) { 1013 DMERR("%s: pre commit failed", __func__); 1014 return r; 1015 } 1016 1017 r = dm_sm_inc_block(md->sm, SUPERBLOCK_LOCATION); 1018 if (r) { 1019 DMERR("%s: couldn't increment superblock", __func__); 1020 return r; 1021 } 1022 1023 r = dm_tm_shadow_block(md->tm, SUPERBLOCK_LOCATION, 1024 &sb_validator, &clone, &inc); 1025 if (r) { 1026 DMERR("%s: couldn't shadow superblock", __func__); 1027 dm_sm_dec_block(md->sm, SUPERBLOCK_LOCATION); 1028 return r; 1029 } 1030 BUG_ON(!inc); 1031 1032 r = dm_sm_inc_block(md->sm, md->writeset_tree_root); 1033 if (r) { 1034 DMERR("%s: couldn't inc writeset tree root", __func__); 1035 dm_tm_unlock(md->tm, clone); 1036 return r; 1037 } 1038 1039 r = dm_sm_inc_block(md->sm, md->era_array_root); 1040 if (r) { 1041 DMERR("%s: couldn't inc era tree root", __func__); 1042 dm_sm_dec_block(md->sm, md->writeset_tree_root); 1043 dm_tm_unlock(md->tm, clone); 1044 return r; 1045 } 1046 1047 md->metadata_snap = dm_block_location(clone); 1048 1049 r = dm_tm_unlock(md->tm, clone); 1050 if (r) { 1051 DMERR("%s: couldn't unlock clone", __func__); 1052 md->metadata_snap = SUPERBLOCK_LOCATION; 1053 return r; 1054 } 1055 1056 return 0; 1057} 1058 1059static int metadata_drop_snap(struct era_metadata *md) 1060{ 1061 int r; 1062 dm_block_t location; 1063 struct dm_block *clone; 1064 struct superblock_disk *disk; 1065 1066 if (md->metadata_snap == SUPERBLOCK_LOCATION) { 1067 DMERR("%s: no snap to drop", __func__); 1068 return -EINVAL; 1069 } 1070 1071 r = dm_tm_read_lock(md->tm, md->metadata_snap, &sb_validator, &clone); 1072 if (r) { 1073 DMERR("%s: couldn't read lock superblock clone", __func__); 1074 return r; 1075 } 1076 1077 /* 1078 * Whatever happens now we'll commit with no record of the metadata 1079 * snap. 1080 */ 1081 md->metadata_snap = SUPERBLOCK_LOCATION; 1082 1083 disk = dm_block_data(clone); 1084 r = dm_btree_del(&md->writeset_tree_info, 1085 le64_to_cpu(disk->writeset_tree_root)); 1086 if (r) { 1087 DMERR("%s: error deleting writeset tree clone", __func__); 1088 dm_tm_unlock(md->tm, clone); 1089 return r; 1090 } 1091 1092 r = dm_array_del(&md->era_array_info, le64_to_cpu(disk->era_array_root)); 1093 if (r) { 1094 DMERR("%s: error deleting era array clone", __func__); 1095 dm_tm_unlock(md->tm, clone); 1096 return r; 1097 } 1098 1099 location = dm_block_location(clone); 1100 dm_tm_unlock(md->tm, clone); 1101 1102 return dm_sm_dec_block(md->sm, location); 1103} 1104 1105struct metadata_stats { 1106 dm_block_t used; 1107 dm_block_t total; 1108 dm_block_t snap; 1109 uint32_t era; 1110}; 1111 1112static int metadata_get_stats(struct era_metadata *md, void *ptr) 1113{ 1114 int r; 1115 struct metadata_stats *s = ptr; 1116 dm_block_t nr_free, nr_total; 1117 1118 r = dm_sm_get_nr_free(md->sm, &nr_free); 1119 if (r) { 1120 DMERR("dm_sm_get_nr_free returned %d", r); 1121 return r; 1122 } 1123 1124 r = dm_sm_get_nr_blocks(md->sm, &nr_total); 1125 if (r) { 1126 DMERR("dm_pool_get_metadata_dev_size returned %d", r); 1127 return r; 1128 } 1129 1130 s->used = nr_total - nr_free; 1131 s->total = nr_total; 1132 s->snap = md->metadata_snap; 1133 s->era = md->current_era; 1134 1135 return 0; 1136} 1137 1138/*----------------------------------------------------------------*/ 1139 1140struct era { 1141 struct dm_target *ti; 1142 struct dm_target_callbacks callbacks; 1143 1144 struct dm_dev *metadata_dev; 1145 struct dm_dev *origin_dev; 1146 1147 dm_block_t nr_blocks; 1148 uint32_t sectors_per_block; 1149 int sectors_per_block_shift; 1150 struct era_metadata *md; 1151 1152 struct workqueue_struct *wq; 1153 struct work_struct worker; 1154 1155 spinlock_t deferred_lock; 1156 struct bio_list deferred_bios; 1157 1158 spinlock_t rpc_lock; 1159 struct list_head rpc_calls; 1160 1161 struct digest digest; 1162 atomic_t suspended; 1163}; 1164 1165struct rpc { 1166 struct list_head list; 1167 1168 int (*fn0)(struct era_metadata *); 1169 int (*fn1)(struct era_metadata *, void *); 1170 void *arg; 1171 int result; 1172 1173 struct completion complete; 1174}; 1175 1176/*---------------------------------------------------------------- 1177 * Remapping. 1178 *---------------------------------------------------------------*/ 1179static bool block_size_is_power_of_two(struct era *era) 1180{ 1181 return era->sectors_per_block_shift >= 0; 1182} 1183 1184static dm_block_t get_block(struct era *era, struct bio *bio) 1185{ 1186 sector_t block_nr = bio->bi_iter.bi_sector; 1187 1188 if (!block_size_is_power_of_two(era)) 1189 (void) sector_div(block_nr, era->sectors_per_block); 1190 else 1191 block_nr >>= era->sectors_per_block_shift; 1192 1193 return block_nr; 1194} 1195 1196static void remap_to_origin(struct era *era, struct bio *bio) 1197{ 1198 bio->bi_bdev = era->origin_dev->bdev; 1199} 1200 1201/*---------------------------------------------------------------- 1202 * Worker thread 1203 *--------------------------------------------------------------*/ 1204static void wake_worker(struct era *era) 1205{ 1206 if (!atomic_read(&era->suspended)) 1207 queue_work(era->wq, &era->worker); 1208} 1209 1210static void process_old_eras(struct era *era) 1211{ 1212 int r; 1213 1214 if (!era->digest.step) 1215 return; 1216 1217 r = era->digest.step(era->md, &era->digest); 1218 if (r < 0) { 1219 DMERR("%s: digest step failed, stopping digestion", __func__); 1220 era->digest.step = NULL; 1221 1222 } else if (era->digest.step) 1223 wake_worker(era); 1224} 1225 1226static void process_deferred_bios(struct era *era) 1227{ 1228 int r; 1229 struct bio_list deferred_bios, marked_bios; 1230 struct bio *bio; 1231 bool commit_needed = false; 1232 bool failed = false; 1233 1234 bio_list_init(&deferred_bios); 1235 bio_list_init(&marked_bios); 1236 1237 spin_lock(&era->deferred_lock); 1238 bio_list_merge(&deferred_bios, &era->deferred_bios); 1239 bio_list_init(&era->deferred_bios); 1240 spin_unlock(&era->deferred_lock); 1241 1242 while ((bio = bio_list_pop(&deferred_bios))) { 1243 r = writeset_test_and_set(&era->md->bitset_info, 1244 era->md->current_writeset, 1245 get_block(era, bio)); 1246 if (r < 0) { 1247 /* 1248 * This is bad news, we need to rollback. 1249 * FIXME: finish. 1250 */ 1251 failed = true; 1252 1253 } else if (r == 0) 1254 commit_needed = true; 1255 1256 bio_list_add(&marked_bios, bio); 1257 } 1258 1259 if (commit_needed) { 1260 r = metadata_commit(era->md); 1261 if (r) 1262 failed = true; 1263 } 1264 1265 if (failed) 1266 while ((bio = bio_list_pop(&marked_bios))) 1267 bio_io_error(bio); 1268 else 1269 while ((bio = bio_list_pop(&marked_bios))) 1270 generic_make_request(bio); 1271} 1272 1273static void process_rpc_calls(struct era *era) 1274{ 1275 int r; 1276 bool need_commit = false; 1277 struct list_head calls; 1278 struct rpc *rpc, *tmp; 1279 1280 INIT_LIST_HEAD(&calls); 1281 spin_lock(&era->rpc_lock); 1282 list_splice_init(&era->rpc_calls, &calls); 1283 spin_unlock(&era->rpc_lock); 1284 1285 list_for_each_entry_safe(rpc, tmp, &calls, list) { 1286 rpc->result = rpc->fn0 ? rpc->fn0(era->md) : rpc->fn1(era->md, rpc->arg); 1287 need_commit = true; 1288 } 1289 1290 if (need_commit) { 1291 r = metadata_commit(era->md); 1292 if (r) 1293 list_for_each_entry_safe(rpc, tmp, &calls, list) 1294 rpc->result = r; 1295 } 1296 1297 list_for_each_entry_safe(rpc, tmp, &calls, list) 1298 complete(&rpc->complete); 1299} 1300 1301static void kick_off_digest(struct era *era) 1302{ 1303 if (era->md->archived_writesets) { 1304 era->md->archived_writesets = false; 1305 metadata_digest_start(era->md, &era->digest); 1306 } 1307} 1308 1309static void do_work(struct work_struct *ws) 1310{ 1311 struct era *era = container_of(ws, struct era, worker); 1312 1313 kick_off_digest(era); 1314 process_old_eras(era); 1315 process_deferred_bios(era); 1316 process_rpc_calls(era); 1317} 1318 1319static void defer_bio(struct era *era, struct bio *bio) 1320{ 1321 spin_lock(&era->deferred_lock); 1322 bio_list_add(&era->deferred_bios, bio); 1323 spin_unlock(&era->deferred_lock); 1324 1325 wake_worker(era); 1326} 1327 1328/* 1329 * Make an rpc call to the worker to change the metadata. 1330 */ 1331static int perform_rpc(struct era *era, struct rpc *rpc) 1332{ 1333 rpc->result = 0; 1334 init_completion(&rpc->complete); 1335 1336 spin_lock(&era->rpc_lock); 1337 list_add(&rpc->list, &era->rpc_calls); 1338 spin_unlock(&era->rpc_lock); 1339 1340 wake_worker(era); 1341 wait_for_completion(&rpc->complete); 1342 1343 return rpc->result; 1344} 1345 1346static int in_worker0(struct era *era, int (*fn)(struct era_metadata *)) 1347{ 1348 struct rpc rpc; 1349 rpc.fn0 = fn; 1350 rpc.fn1 = NULL; 1351 1352 return perform_rpc(era, &rpc); 1353} 1354 1355static int in_worker1(struct era *era, 1356 int (*fn)(struct era_metadata *, void *), void *arg) 1357{ 1358 struct rpc rpc; 1359 rpc.fn0 = NULL; 1360 rpc.fn1 = fn; 1361 rpc.arg = arg; 1362 1363 return perform_rpc(era, &rpc); 1364} 1365 1366static void start_worker(struct era *era) 1367{ 1368 atomic_set(&era->suspended, 0); 1369} 1370 1371static void stop_worker(struct era *era) 1372{ 1373 atomic_set(&era->suspended, 1); 1374 flush_workqueue(era->wq); 1375} 1376 1377/*---------------------------------------------------------------- 1378 * Target methods 1379 *--------------------------------------------------------------*/ 1380static int dev_is_congested(struct dm_dev *dev, int bdi_bits) 1381{ 1382 struct request_queue *q = bdev_get_queue(dev->bdev); 1383 return bdi_congested(&q->backing_dev_info, bdi_bits); 1384} 1385 1386static int era_is_congested(struct dm_target_callbacks *cb, int bdi_bits) 1387{ 1388 struct era *era = container_of(cb, struct era, callbacks); 1389 return dev_is_congested(era->origin_dev, bdi_bits); 1390} 1391 1392static void era_destroy(struct era *era) 1393{ 1394 if (era->md) 1395 metadata_close(era->md); 1396 1397 if (era->wq) 1398 destroy_workqueue(era->wq); 1399 1400 if (era->origin_dev) 1401 dm_put_device(era->ti, era->origin_dev); 1402 1403 if (era->metadata_dev) 1404 dm_put_device(era->ti, era->metadata_dev); 1405 1406 kfree(era); 1407} 1408 1409static dm_block_t calc_nr_blocks(struct era *era) 1410{ 1411 return dm_sector_div_up(era->ti->len, era->sectors_per_block); 1412} 1413 1414static bool valid_block_size(dm_block_t block_size) 1415{ 1416 bool greater_than_zero = block_size > 0; 1417 bool multiple_of_min_block_size = (block_size & (MIN_BLOCK_SIZE - 1)) == 0; 1418 1419 return greater_than_zero && multiple_of_min_block_size; 1420} 1421 1422/* 1423 * <metadata dev> <data dev> <data block size (sectors)> 1424 */ 1425static int era_ctr(struct dm_target *ti, unsigned argc, char **argv) 1426{ 1427 int r; 1428 char dummy; 1429 struct era *era; 1430 struct era_metadata *md; 1431 1432 if (argc != 3) { 1433 ti->error = "Invalid argument count"; 1434 return -EINVAL; 1435 } 1436 1437 era = kzalloc(sizeof(*era), GFP_KERNEL); 1438 if (!era) { 1439 ti->error = "Error allocating era structure"; 1440 return -ENOMEM; 1441 } 1442 1443 era->ti = ti; 1444 1445 r = dm_get_device(ti, argv[0], FMODE_READ | FMODE_WRITE, &era->metadata_dev); 1446 if (r) { 1447 ti->error = "Error opening metadata device"; 1448 era_destroy(era); 1449 return -EINVAL; 1450 } 1451 1452 r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &era->origin_dev); 1453 if (r) { 1454 ti->error = "Error opening data device"; 1455 era_destroy(era); 1456 return -EINVAL; 1457 } 1458 1459 r = sscanf(argv[2], "%u%c", &era->sectors_per_block, &dummy); 1460 if (r != 1) { 1461 ti->error = "Error parsing block size"; 1462 era_destroy(era); 1463 return -EINVAL; 1464 } 1465 1466 r = dm_set_target_max_io_len(ti, era->sectors_per_block); 1467 if (r) { 1468 ti->error = "could not set max io len"; 1469 era_destroy(era); 1470 return -EINVAL; 1471 } 1472 1473 if (!valid_block_size(era->sectors_per_block)) { 1474 ti->error = "Invalid block size"; 1475 era_destroy(era); 1476 return -EINVAL; 1477 } 1478 if (era->sectors_per_block & (era->sectors_per_block - 1)) 1479 era->sectors_per_block_shift = -1; 1480 else 1481 era->sectors_per_block_shift = __ffs(era->sectors_per_block); 1482 1483 md = metadata_open(era->metadata_dev->bdev, era->sectors_per_block, true); 1484 if (IS_ERR(md)) { 1485 ti->error = "Error reading metadata"; 1486 era_destroy(era); 1487 return PTR_ERR(md); 1488 } 1489 era->md = md; 1490 1491 era->nr_blocks = calc_nr_blocks(era); 1492 1493 r = metadata_resize(era->md, &era->nr_blocks); 1494 if (r) { 1495 ti->error = "couldn't resize metadata"; 1496 era_destroy(era); 1497 return -ENOMEM; 1498 } 1499 1500 era->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM); 1501 if (!era->wq) { 1502 ti->error = "could not create workqueue for metadata object"; 1503 era_destroy(era); 1504 return -ENOMEM; 1505 } 1506 INIT_WORK(&era->worker, do_work); 1507 1508 spin_lock_init(&era->deferred_lock); 1509 bio_list_init(&era->deferred_bios); 1510 1511 spin_lock_init(&era->rpc_lock); 1512 INIT_LIST_HEAD(&era->rpc_calls); 1513 1514 ti->private = era; 1515 ti->num_flush_bios = 1; 1516 ti->flush_supported = true; 1517 1518 ti->num_discard_bios = 1; 1519 ti->discards_supported = true; 1520 era->callbacks.congested_fn = era_is_congested; 1521 dm_table_add_target_callbacks(ti->table, &era->callbacks); 1522 1523 return 0; 1524} 1525 1526static void era_dtr(struct dm_target *ti) 1527{ 1528 era_destroy(ti->private); 1529} 1530 1531static int era_map(struct dm_target *ti, struct bio *bio) 1532{ 1533 struct era *era = ti->private; 1534 dm_block_t block = get_block(era, bio); 1535 1536 /* 1537 * All bios get remapped to the origin device. We do this now, but 1538 * it may not get issued until later. Depending on whether the 1539 * block is marked in this era. 1540 */ 1541 remap_to_origin(era, bio); 1542 1543 /* 1544 * REQ_FLUSH bios carry no data, so we're not interested in them. 1545 */ 1546 if (!(bio->bi_rw & REQ_FLUSH) && 1547 (bio_data_dir(bio) == WRITE) && 1548 !metadata_current_marked(era->md, block)) { 1549 defer_bio(era, bio); 1550 return DM_MAPIO_SUBMITTED; 1551 } 1552 1553 return DM_MAPIO_REMAPPED; 1554} 1555 1556static void era_postsuspend(struct dm_target *ti) 1557{ 1558 int r; 1559 struct era *era = ti->private; 1560 1561 r = in_worker0(era, metadata_era_archive); 1562 if (r) { 1563 DMERR("%s: couldn't archive current era", __func__); 1564 /* FIXME: fail mode */ 1565 } 1566 1567 stop_worker(era); 1568} 1569 1570static int era_preresume(struct dm_target *ti) 1571{ 1572 int r; 1573 struct era *era = ti->private; 1574 dm_block_t new_size = calc_nr_blocks(era); 1575 1576 if (era->nr_blocks != new_size) { 1577 r = in_worker1(era, metadata_resize, &new_size); 1578 if (r) 1579 return r; 1580 1581 era->nr_blocks = new_size; 1582 } 1583 1584 start_worker(era); 1585 1586 r = in_worker0(era, metadata_new_era); 1587 if (r) { 1588 DMERR("%s: metadata_era_rollover failed", __func__); 1589 return r; 1590 } 1591 1592 return 0; 1593} 1594 1595/* 1596 * Status format: 1597 * 1598 * <metadata block size> <#used metadata blocks>/<#total metadata blocks> 1599 * <current era> <held metadata root | '-'> 1600 */ 1601static void era_status(struct dm_target *ti, status_type_t type, 1602 unsigned status_flags, char *result, unsigned maxlen) 1603{ 1604 int r; 1605 struct era *era = ti->private; 1606 ssize_t sz = 0; 1607 struct metadata_stats stats; 1608 char buf[BDEVNAME_SIZE]; 1609 1610 switch (type) { 1611 case STATUSTYPE_INFO: 1612 r = in_worker1(era, metadata_get_stats, &stats); 1613 if (r) 1614 goto err; 1615 1616 DMEMIT("%u %llu/%llu %u", 1617 (unsigned) (DM_ERA_METADATA_BLOCK_SIZE >> SECTOR_SHIFT), 1618 (unsigned long long) stats.used, 1619 (unsigned long long) stats.total, 1620 (unsigned) stats.era); 1621 1622 if (stats.snap != SUPERBLOCK_LOCATION) 1623 DMEMIT(" %llu", stats.snap); 1624 else 1625 DMEMIT(" -"); 1626 break; 1627 1628 case STATUSTYPE_TABLE: 1629 format_dev_t(buf, era->metadata_dev->bdev->bd_dev); 1630 DMEMIT("%s ", buf); 1631 format_dev_t(buf, era->origin_dev->bdev->bd_dev); 1632 DMEMIT("%s %u", buf, era->sectors_per_block); 1633 break; 1634 } 1635 1636 return; 1637 1638err: 1639 DMEMIT("Error"); 1640} 1641 1642static int era_message(struct dm_target *ti, unsigned argc, char **argv) 1643{ 1644 struct era *era = ti->private; 1645 1646 if (argc != 1) { 1647 DMERR("incorrect number of message arguments"); 1648 return -EINVAL; 1649 } 1650 1651 if (!strcasecmp(argv[0], "checkpoint")) 1652 return in_worker0(era, metadata_checkpoint); 1653 1654 if (!strcasecmp(argv[0], "take_metadata_snap")) 1655 return in_worker0(era, metadata_take_snap); 1656 1657 if (!strcasecmp(argv[0], "drop_metadata_snap")) 1658 return in_worker0(era, metadata_drop_snap); 1659 1660 DMERR("unsupported message '%s'", argv[0]); 1661 return -EINVAL; 1662} 1663 1664static sector_t get_dev_size(struct dm_dev *dev) 1665{ 1666 return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; 1667} 1668 1669static int era_iterate_devices(struct dm_target *ti, 1670 iterate_devices_callout_fn fn, void *data) 1671{ 1672 struct era *era = ti->private; 1673 return fn(ti, era->origin_dev, 0, get_dev_size(era->origin_dev), data); 1674} 1675 1676static int era_merge(struct dm_target *ti, struct bvec_merge_data *bvm, 1677 struct bio_vec *biovec, int max_size) 1678{ 1679 struct era *era = ti->private; 1680 struct request_queue *q = bdev_get_queue(era->origin_dev->bdev); 1681 1682 if (!q->merge_bvec_fn) 1683 return max_size; 1684 1685 bvm->bi_bdev = era->origin_dev->bdev; 1686 1687 return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); 1688} 1689 1690static void era_io_hints(struct dm_target *ti, struct queue_limits *limits) 1691{ 1692 struct era *era = ti->private; 1693 uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT; 1694 1695 /* 1696 * If the system-determined stacked limits are compatible with the 1697 * era device's blocksize (io_opt is a factor) do not override them. 1698 */ 1699 if (io_opt_sectors < era->sectors_per_block || 1700 do_div(io_opt_sectors, era->sectors_per_block)) { 1701 blk_limits_io_min(limits, 0); 1702 blk_limits_io_opt(limits, era->sectors_per_block << SECTOR_SHIFT); 1703 } 1704} 1705 1706/*----------------------------------------------------------------*/ 1707 1708static struct target_type era_target = { 1709 .name = "era", 1710 .version = {1, 0, 0}, 1711 .module = THIS_MODULE, 1712 .ctr = era_ctr, 1713 .dtr = era_dtr, 1714 .map = era_map, 1715 .postsuspend = era_postsuspend, 1716 .preresume = era_preresume, 1717 .status = era_status, 1718 .message = era_message, 1719 .iterate_devices = era_iterate_devices, 1720 .merge = era_merge, 1721 .io_hints = era_io_hints 1722}; 1723 1724static int __init dm_era_init(void) 1725{ 1726 int r; 1727 1728 r = dm_register_target(&era_target); 1729 if (r) { 1730 DMERR("era target registration failed: %d", r); 1731 return r; 1732 } 1733 1734 return 0; 1735} 1736 1737static void __exit dm_era_exit(void) 1738{ 1739 dm_unregister_target(&era_target); 1740} 1741 1742module_init(dm_era_init); 1743module_exit(dm_era_exit); 1744 1745MODULE_DESCRIPTION(DM_NAME " era target"); 1746MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>"); 1747MODULE_LICENSE("GPL"); 1748