1/* 2 * Copyright (C) 2011 Red Hat, Inc. 3 * 4 * This file is released under the GPL. 5 */ 6 7#include "dm-space-map.h" 8#include "dm-space-map-common.h" 9#include "dm-space-map-metadata.h" 10 11#include <linux/list.h> 12#include <linux/slab.h> 13#include <linux/device-mapper.h> 14 15#define DM_MSG_PREFIX "space map metadata" 16 17/*----------------------------------------------------------------*/ 18 19/* 20 * An edge triggered threshold. 21 */ 22struct threshold { 23 bool threshold_set; 24 bool value_set; 25 dm_block_t threshold; 26 dm_block_t current_value; 27 dm_sm_threshold_fn fn; 28 void *context; 29}; 30 31static void threshold_init(struct threshold *t) 32{ 33 t->threshold_set = false; 34 t->value_set = false; 35} 36 37static void set_threshold(struct threshold *t, dm_block_t value, 38 dm_sm_threshold_fn fn, void *context) 39{ 40 t->threshold_set = true; 41 t->threshold = value; 42 t->fn = fn; 43 t->context = context; 44} 45 46static bool below_threshold(struct threshold *t, dm_block_t value) 47{ 48 return t->threshold_set && value <= t->threshold; 49} 50 51static bool threshold_already_triggered(struct threshold *t) 52{ 53 return t->value_set && below_threshold(t, t->current_value); 54} 55 56static void check_threshold(struct threshold *t, dm_block_t value) 57{ 58 if (below_threshold(t, value) && 59 !threshold_already_triggered(t)) 60 t->fn(t->context); 61 62 t->value_set = true; 63 t->current_value = value; 64} 65 66/*----------------------------------------------------------------*/ 67 68/* 69 * Space map interface. 70 * 71 * The low level disk format is written using the standard btree and 72 * transaction manager. This means that performing disk operations may 73 * cause us to recurse into the space map in order to allocate new blocks. 74 * For this reason we have a pool of pre-allocated blocks large enough to 75 * service any metadata_ll_disk operation. 76 */ 77 78/* 79 * FIXME: we should calculate this based on the size of the device. 80 * Only the metadata space map needs this functionality. 81 */ 82#define MAX_RECURSIVE_ALLOCATIONS 1024 83 84enum block_op_type { 85 BOP_INC, 86 BOP_DEC 87}; 88 89struct block_op { 90 enum block_op_type type; 91 dm_block_t block; 92}; 93 94struct bop_ring_buffer { 95 unsigned begin; 96 unsigned end; 97 struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1]; 98}; 99 100static void brb_init(struct bop_ring_buffer *brb) 101{ 102 brb->begin = 0; 103 brb->end = 0; 104} 105 106static bool brb_empty(struct bop_ring_buffer *brb) 107{ 108 return brb->begin == brb->end; 109} 110 111static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old) 112{ 113 unsigned r = old + 1; 114 return (r >= (sizeof(brb->bops) / sizeof(*brb->bops))) ? 0 : r; 115} 116 117static int brb_push(struct bop_ring_buffer *brb, 118 enum block_op_type type, dm_block_t b) 119{ 120 struct block_op *bop; 121 unsigned next = brb_next(brb, brb->end); 122 123 /* 124 * We don't allow the last bop to be filled, this way we can 125 * differentiate between full and empty. 126 */ 127 if (next == brb->begin) 128 return -ENOMEM; 129 130 bop = brb->bops + brb->end; 131 bop->type = type; 132 bop->block = b; 133 134 brb->end = next; 135 136 return 0; 137} 138 139static int brb_pop(struct bop_ring_buffer *brb, struct block_op *result) 140{ 141 struct block_op *bop; 142 143 if (brb_empty(brb)) 144 return -ENODATA; 145 146 bop = brb->bops + brb->begin; 147 result->type = bop->type; 148 result->block = bop->block; 149 150 brb->begin = brb_next(brb, brb->begin); 151 152 return 0; 153} 154 155/*----------------------------------------------------------------*/ 156 157struct sm_metadata { 158 struct dm_space_map sm; 159 160 struct ll_disk ll; 161 struct ll_disk old_ll; 162 163 dm_block_t begin; 164 165 unsigned recursion_count; 166 unsigned allocated_this_transaction; 167 struct bop_ring_buffer uncommitted; 168 169 struct threshold threshold; 170}; 171 172static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b) 173{ 174 int r = brb_push(&smm->uncommitted, type, b); 175 176 if (r) { 177 DMERR("too many recursive allocations"); 178 return -ENOMEM; 179 } 180 181 return 0; 182} 183 184static int commit_bop(struct sm_metadata *smm, struct block_op *op) 185{ 186 int r = 0; 187 enum allocation_event ev; 188 189 switch (op->type) { 190 case BOP_INC: 191 r = sm_ll_inc(&smm->ll, op->block, &ev); 192 break; 193 194 case BOP_DEC: 195 r = sm_ll_dec(&smm->ll, op->block, &ev); 196 break; 197 } 198 199 return r; 200} 201 202static void in(struct sm_metadata *smm) 203{ 204 smm->recursion_count++; 205} 206 207static int apply_bops(struct sm_metadata *smm) 208{ 209 int r = 0; 210 211 while (!brb_empty(&smm->uncommitted)) { 212 struct block_op bop; 213 214 r = brb_pop(&smm->uncommitted, &bop); 215 if (r) { 216 DMERR("bug in bop ring buffer"); 217 break; 218 } 219 220 r = commit_bop(smm, &bop); 221 if (r) 222 break; 223 } 224 225 return r; 226} 227 228static int out(struct sm_metadata *smm) 229{ 230 int r = 0; 231 232 /* 233 * If we're not recursing then very bad things are happening. 234 */ 235 if (!smm->recursion_count) { 236 DMERR("lost track of recursion depth"); 237 return -ENOMEM; 238 } 239 240 if (smm->recursion_count == 1) 241 apply_bops(smm); 242 243 smm->recursion_count--; 244 245 return r; 246} 247 248/* 249 * When using the out() function above, we often want to combine an error 250 * code for the operation run in the recursive context with that from 251 * out(). 252 */ 253static int combine_errors(int r1, int r2) 254{ 255 return r1 ? r1 : r2; 256} 257 258static int recursing(struct sm_metadata *smm) 259{ 260 return smm->recursion_count; 261} 262 263static void sm_metadata_destroy(struct dm_space_map *sm) 264{ 265 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 266 267 kfree(smm); 268} 269 270static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 271{ 272 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 273 274 *count = smm->ll.nr_blocks; 275 276 return 0; 277} 278 279static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 280{ 281 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 282 283 *count = smm->old_ll.nr_blocks - smm->old_ll.nr_allocated - 284 smm->allocated_this_transaction; 285 286 return 0; 287} 288 289static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, 290 uint32_t *result) 291{ 292 int r; 293 unsigned i; 294 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 295 unsigned adjustment = 0; 296 297 /* 298 * We may have some uncommitted adjustments to add. This list 299 * should always be really short. 300 */ 301 for (i = smm->uncommitted.begin; 302 i != smm->uncommitted.end; 303 i = brb_next(&smm->uncommitted, i)) { 304 struct block_op *op = smm->uncommitted.bops + i; 305 306 if (op->block != b) 307 continue; 308 309 switch (op->type) { 310 case BOP_INC: 311 adjustment++; 312 break; 313 314 case BOP_DEC: 315 adjustment--; 316 break; 317 } 318 } 319 320 r = sm_ll_lookup(&smm->ll, b, result); 321 if (r) 322 return r; 323 324 *result += adjustment; 325 326 return 0; 327} 328 329static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm, 330 dm_block_t b, int *result) 331{ 332 int r, adjustment = 0; 333 unsigned i; 334 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 335 uint32_t rc; 336 337 /* 338 * We may have some uncommitted adjustments to add. This list 339 * should always be really short. 340 */ 341 for (i = smm->uncommitted.begin; 342 i != smm->uncommitted.end; 343 i = brb_next(&smm->uncommitted, i)) { 344 345 struct block_op *op = smm->uncommitted.bops + i; 346 347 if (op->block != b) 348 continue; 349 350 switch (op->type) { 351 case BOP_INC: 352 adjustment++; 353 break; 354 355 case BOP_DEC: 356 adjustment--; 357 break; 358 } 359 } 360 361 if (adjustment > 1) { 362 *result = 1; 363 return 0; 364 } 365 366 r = sm_ll_lookup_bitmap(&smm->ll, b, &rc); 367 if (r) 368 return r; 369 370 if (rc == 3) 371 /* 372 * We err on the side of caution, and always return true. 373 */ 374 *result = 1; 375 else 376 *result = rc + adjustment > 1; 377 378 return 0; 379} 380 381static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b, 382 uint32_t count) 383{ 384 int r, r2; 385 enum allocation_event ev; 386 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 387 388 if (smm->recursion_count) { 389 DMERR("cannot recurse set_count()"); 390 return -EINVAL; 391 } 392 393 in(smm); 394 r = sm_ll_insert(&smm->ll, b, count, &ev); 395 r2 = out(smm); 396 397 return combine_errors(r, r2); 398} 399 400static int sm_metadata_inc_block(struct dm_space_map *sm, dm_block_t b) 401{ 402 int r, r2 = 0; 403 enum allocation_event ev; 404 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 405 406 if (recursing(smm)) 407 r = add_bop(smm, BOP_INC, b); 408 else { 409 in(smm); 410 r = sm_ll_inc(&smm->ll, b, &ev); 411 r2 = out(smm); 412 } 413 414 return combine_errors(r, r2); 415} 416 417static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b) 418{ 419 int r, r2 = 0; 420 enum allocation_event ev; 421 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 422 423 if (recursing(smm)) 424 r = add_bop(smm, BOP_DEC, b); 425 else { 426 in(smm); 427 r = sm_ll_dec(&smm->ll, b, &ev); 428 r2 = out(smm); 429 } 430 431 return combine_errors(r, r2); 432} 433 434static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b) 435{ 436 int r, r2 = 0; 437 enum allocation_event ev; 438 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 439 440 r = sm_ll_find_free_block(&smm->old_ll, smm->begin, smm->old_ll.nr_blocks, b); 441 if (r) 442 return r; 443 444 smm->begin = *b + 1; 445 446 if (recursing(smm)) 447 r = add_bop(smm, BOP_INC, *b); 448 else { 449 in(smm); 450 r = sm_ll_inc(&smm->ll, *b, &ev); 451 r2 = out(smm); 452 } 453 454 if (!r) 455 smm->allocated_this_transaction++; 456 457 return combine_errors(r, r2); 458} 459 460static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) 461{ 462 dm_block_t count; 463 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 464 465 int r = sm_metadata_new_block_(sm, b); 466 if (r) { 467 DMERR_LIMIT("unable to allocate new metadata block"); 468 return r; 469 } 470 471 r = sm_metadata_get_nr_free(sm, &count); 472 if (r) { 473 DMERR_LIMIT("couldn't get free block count"); 474 return r; 475 } 476 477 check_threshold(&smm->threshold, count); 478 479 return r; 480} 481 482static int sm_metadata_commit(struct dm_space_map *sm) 483{ 484 int r; 485 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 486 487 r = sm_ll_commit(&smm->ll); 488 if (r) 489 return r; 490 491 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 492 smm->begin = 0; 493 smm->allocated_this_transaction = 0; 494 495 return 0; 496} 497 498static int sm_metadata_register_threshold_callback(struct dm_space_map *sm, 499 dm_block_t threshold, 500 dm_sm_threshold_fn fn, 501 void *context) 502{ 503 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 504 505 set_threshold(&smm->threshold, threshold, fn, context); 506 507 return 0; 508} 509 510static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result) 511{ 512 *result = sizeof(struct disk_sm_root); 513 514 return 0; 515} 516 517static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t max) 518{ 519 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 520 struct disk_sm_root root_le; 521 522 root_le.nr_blocks = cpu_to_le64(smm->ll.nr_blocks); 523 root_le.nr_allocated = cpu_to_le64(smm->ll.nr_allocated); 524 root_le.bitmap_root = cpu_to_le64(smm->ll.bitmap_root); 525 root_le.ref_count_root = cpu_to_le64(smm->ll.ref_count_root); 526 527 if (max < sizeof(root_le)) 528 return -ENOSPC; 529 530 memcpy(where_le, &root_le, sizeof(root_le)); 531 532 return 0; 533} 534 535static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks); 536 537static struct dm_space_map ops = { 538 .destroy = sm_metadata_destroy, 539 .extend = sm_metadata_extend, 540 .get_nr_blocks = sm_metadata_get_nr_blocks, 541 .get_nr_free = sm_metadata_get_nr_free, 542 .get_count = sm_metadata_get_count, 543 .count_is_more_than_one = sm_metadata_count_is_more_than_one, 544 .set_count = sm_metadata_set_count, 545 .inc_block = sm_metadata_inc_block, 546 .dec_block = sm_metadata_dec_block, 547 .new_block = sm_metadata_new_block, 548 .commit = sm_metadata_commit, 549 .root_size = sm_metadata_root_size, 550 .copy_root = sm_metadata_copy_root, 551 .register_threshold_callback = sm_metadata_register_threshold_callback 552}; 553 554/*----------------------------------------------------------------*/ 555 556/* 557 * When a new space map is created that manages its own space. We use 558 * this tiny bootstrap allocator. 559 */ 560static void sm_bootstrap_destroy(struct dm_space_map *sm) 561{ 562} 563 564static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 565{ 566 DMERR("bootstrap doesn't support extend"); 567 568 return -EINVAL; 569} 570 571static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 572{ 573 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 574 575 *count = smm->ll.nr_blocks; 576 577 return 0; 578} 579 580static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 581{ 582 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 583 584 *count = smm->ll.nr_blocks - smm->begin; 585 586 return 0; 587} 588 589static int sm_bootstrap_get_count(struct dm_space_map *sm, dm_block_t b, 590 uint32_t *result) 591{ 592 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 593 594 *result = (b < smm->begin) ? 1 : 0; 595 596 return 0; 597} 598 599static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm, 600 dm_block_t b, int *result) 601{ 602 *result = 0; 603 604 return 0; 605} 606 607static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b, 608 uint32_t count) 609{ 610 DMERR("bootstrap doesn't support set_count"); 611 612 return -EINVAL; 613} 614 615static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b) 616{ 617 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 618 619 /* 620 * We know the entire device is unused. 621 */ 622 if (smm->begin == smm->ll.nr_blocks) 623 return -ENOSPC; 624 625 *b = smm->begin++; 626 627 return 0; 628} 629 630static int sm_bootstrap_inc_block(struct dm_space_map *sm, dm_block_t b) 631{ 632 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 633 634 return add_bop(smm, BOP_INC, b); 635} 636 637static int sm_bootstrap_dec_block(struct dm_space_map *sm, dm_block_t b) 638{ 639 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 640 641 return add_bop(smm, BOP_DEC, b); 642} 643 644static int sm_bootstrap_commit(struct dm_space_map *sm) 645{ 646 return 0; 647} 648 649static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result) 650{ 651 DMERR("bootstrap doesn't support root_size"); 652 653 return -EINVAL; 654} 655 656static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where, 657 size_t max) 658{ 659 DMERR("bootstrap doesn't support copy_root"); 660 661 return -EINVAL; 662} 663 664static struct dm_space_map bootstrap_ops = { 665 .destroy = sm_bootstrap_destroy, 666 .extend = sm_bootstrap_extend, 667 .get_nr_blocks = sm_bootstrap_get_nr_blocks, 668 .get_nr_free = sm_bootstrap_get_nr_free, 669 .get_count = sm_bootstrap_get_count, 670 .count_is_more_than_one = sm_bootstrap_count_is_more_than_one, 671 .set_count = sm_bootstrap_set_count, 672 .inc_block = sm_bootstrap_inc_block, 673 .dec_block = sm_bootstrap_dec_block, 674 .new_block = sm_bootstrap_new_block, 675 .commit = sm_bootstrap_commit, 676 .root_size = sm_bootstrap_root_size, 677 .copy_root = sm_bootstrap_copy_root, 678 .register_threshold_callback = NULL 679}; 680 681/*----------------------------------------------------------------*/ 682 683static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 684{ 685 int r, i; 686 enum allocation_event ev; 687 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 688 dm_block_t old_len = smm->ll.nr_blocks; 689 690 /* 691 * Flick into a mode where all blocks get allocated in the new area. 692 */ 693 smm->begin = old_len; 694 memcpy(sm, &bootstrap_ops, sizeof(*sm)); 695 696 /* 697 * Extend. 698 */ 699 r = sm_ll_extend(&smm->ll, extra_blocks); 700 if (r) 701 goto out; 702 703 /* 704 * We repeatedly increment then commit until the commit doesn't 705 * allocate any new blocks. 706 */ 707 do { 708 for (i = old_len; !r && i < smm->begin; i++) { 709 r = sm_ll_inc(&smm->ll, i, &ev); 710 if (r) 711 goto out; 712 } 713 old_len = smm->begin; 714 715 r = apply_bops(smm); 716 if (r) { 717 DMERR("%s: apply_bops failed", __func__); 718 goto out; 719 } 720 721 r = sm_ll_commit(&smm->ll); 722 if (r) 723 goto out; 724 725 } while (old_len != smm->begin); 726 727out: 728 /* 729 * Switch back to normal behaviour. 730 */ 731 memcpy(sm, &ops, sizeof(*sm)); 732 return r; 733} 734 735/*----------------------------------------------------------------*/ 736 737struct dm_space_map *dm_sm_metadata_init(void) 738{ 739 struct sm_metadata *smm; 740 741 smm = kmalloc(sizeof(*smm), GFP_KERNEL); 742 if (!smm) 743 return ERR_PTR(-ENOMEM); 744 745 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 746 747 return &smm->sm; 748} 749 750int dm_sm_metadata_create(struct dm_space_map *sm, 751 struct dm_transaction_manager *tm, 752 dm_block_t nr_blocks, 753 dm_block_t superblock) 754{ 755 int r; 756 dm_block_t i; 757 enum allocation_event ev; 758 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 759 760 smm->begin = superblock + 1; 761 smm->recursion_count = 0; 762 smm->allocated_this_transaction = 0; 763 brb_init(&smm->uncommitted); 764 threshold_init(&smm->threshold); 765 766 memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); 767 768 r = sm_ll_new_metadata(&smm->ll, tm); 769 if (r) 770 return r; 771 772 if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) 773 nr_blocks = DM_SM_METADATA_MAX_BLOCKS; 774 r = sm_ll_extend(&smm->ll, nr_blocks); 775 if (r) 776 return r; 777 778 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 779 780 /* 781 * Now we need to update the newly created data structures with the 782 * allocated blocks that they were built from. 783 */ 784 for (i = superblock; !r && i < smm->begin; i++) 785 r = sm_ll_inc(&smm->ll, i, &ev); 786 787 if (r) 788 return r; 789 790 r = apply_bops(smm); 791 if (r) { 792 DMERR("%s: apply_bops failed", __func__); 793 return r; 794 } 795 796 return sm_metadata_commit(sm); 797} 798 799int dm_sm_metadata_open(struct dm_space_map *sm, 800 struct dm_transaction_manager *tm, 801 void *root_le, size_t len) 802{ 803 int r; 804 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 805 806 r = sm_ll_open_metadata(&smm->ll, tm, root_le, len); 807 if (r) 808 return r; 809 810 smm->begin = 0; 811 smm->recursion_count = 0; 812 smm->allocated_this_transaction = 0; 813 brb_init(&smm->uncommitted); 814 threshold_init(&smm->threshold); 815 816 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 817 return 0; 818} 819