root/block/blk-zoned.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. blk_zone_start
  2. blk_req_needs_zone_write_lock
  3. __blk_req_zone_write_lock
  4. __blk_req_zone_write_unlock
  5. __blkdev_nr_zones
  6. blkdev_nr_zones
  7. blkdev_report_zone
  8. blk_report_zones
  9. blkdev_report_zones
  10. __blkdev_reset_all_zones
  11. blkdev_allow_reset_all_zones
  12. blkdev_reset_zones
  13. blkdev_report_zones_ioctl
  14. blkdev_reset_zones_ioctl
  15. blk_alloc_zone_bitmap
  16. blk_alloc_zones
  17. blk_queue_free_zone_bitmaps
  18. blk_revalidate_disk_zones

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Zoned block device handling
   4  *
   5  * Copyright (c) 2015, Hannes Reinecke
   6  * Copyright (c) 2015, SUSE Linux GmbH
   7  *
   8  * Copyright (c) 2016, Damien Le Moal
   9  * Copyright (c) 2016, Western Digital
  10  */
  11 
  12 #include <linux/kernel.h>
  13 #include <linux/module.h>
  14 #include <linux/rbtree.h>
  15 #include <linux/blkdev.h>
  16 #include <linux/blk-mq.h>
  17 #include <linux/mm.h>
  18 #include <linux/vmalloc.h>
  19 #include <linux/sched/mm.h>
  20 
  21 #include "blk.h"
  22 
  23 static inline sector_t blk_zone_start(struct request_queue *q,
  24                                       sector_t sector)
  25 {
  26         sector_t zone_mask = blk_queue_zone_sectors(q) - 1;
  27 
  28         return sector & ~zone_mask;
  29 }
  30 
  31 /*
  32  * Return true if a request is a write requests that needs zone write locking.
  33  */
  34 bool blk_req_needs_zone_write_lock(struct request *rq)
  35 {
  36         if (!rq->q->seq_zones_wlock)
  37                 return false;
  38 
  39         if (blk_rq_is_passthrough(rq))
  40                 return false;
  41 
  42         switch (req_op(rq)) {
  43         case REQ_OP_WRITE_ZEROES:
  44         case REQ_OP_WRITE_SAME:
  45         case REQ_OP_WRITE:
  46                 return blk_rq_zone_is_seq(rq);
  47         default:
  48                 return false;
  49         }
  50 }
  51 EXPORT_SYMBOL_GPL(blk_req_needs_zone_write_lock);
  52 
  53 void __blk_req_zone_write_lock(struct request *rq)
  54 {
  55         if (WARN_ON_ONCE(test_and_set_bit(blk_rq_zone_no(rq),
  56                                           rq->q->seq_zones_wlock)))
  57                 return;
  58 
  59         WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED);
  60         rq->rq_flags |= RQF_ZONE_WRITE_LOCKED;
  61 }
  62 EXPORT_SYMBOL_GPL(__blk_req_zone_write_lock);
  63 
  64 void __blk_req_zone_write_unlock(struct request *rq)
  65 {
  66         rq->rq_flags &= ~RQF_ZONE_WRITE_LOCKED;
  67         if (rq->q->seq_zones_wlock)
  68                 WARN_ON_ONCE(!test_and_clear_bit(blk_rq_zone_no(rq),
  69                                                  rq->q->seq_zones_wlock));
  70 }
  71 EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock);
  72 
  73 static inline unsigned int __blkdev_nr_zones(struct request_queue *q,
  74                                              sector_t nr_sectors)
  75 {
  76         sector_t zone_sectors = blk_queue_zone_sectors(q);
  77 
  78         return (nr_sectors + zone_sectors - 1) >> ilog2(zone_sectors);
  79 }
  80 
  81 /**
  82  * blkdev_nr_zones - Get number of zones
  83  * @bdev:       Target block device
  84  *
  85  * Description:
  86  *    Return the total number of zones of a zoned block device.
  87  *    For a regular block device, the number of zones is always 0.
  88  */
  89 unsigned int blkdev_nr_zones(struct block_device *bdev)
  90 {
  91         struct request_queue *q = bdev_get_queue(bdev);
  92 
  93         if (!blk_queue_is_zoned(q))
  94                 return 0;
  95 
  96         return __blkdev_nr_zones(q, bdev->bd_part->nr_sects);
  97 }
  98 EXPORT_SYMBOL_GPL(blkdev_nr_zones);
  99 
 100 /*
 101  * Check that a zone report belongs to this partition, and if yes, fix its start
 102  * sector and write pointer and return true. Return false otherwise.
 103  */
 104 static bool blkdev_report_zone(struct block_device *bdev, struct blk_zone *rep)
 105 {
 106         sector_t offset = get_start_sect(bdev);
 107 
 108         if (rep->start < offset)
 109                 return false;
 110 
 111         rep->start -= offset;
 112         if (rep->start + rep->len > bdev->bd_part->nr_sects)
 113                 return false;
 114 
 115         if (rep->type == BLK_ZONE_TYPE_CONVENTIONAL)
 116                 rep->wp = rep->start + rep->len;
 117         else
 118                 rep->wp -= offset;
 119         return true;
 120 }
 121 
 122 static int blk_report_zones(struct gendisk *disk, sector_t sector,
 123                             struct blk_zone *zones, unsigned int *nr_zones)
 124 {
 125         struct request_queue *q = disk->queue;
 126         unsigned int z = 0, n, nrz = *nr_zones;
 127         sector_t capacity = get_capacity(disk);
 128         int ret;
 129 
 130         while (z < nrz && sector < capacity) {
 131                 n = nrz - z;
 132                 ret = disk->fops->report_zones(disk, sector, &zones[z], &n);
 133                 if (ret)
 134                         return ret;
 135                 if (!n)
 136                         break;
 137                 sector += blk_queue_zone_sectors(q) * n;
 138                 z += n;
 139         }
 140 
 141         WARN_ON(z > *nr_zones);
 142         *nr_zones = z;
 143 
 144         return 0;
 145 }
 146 
 147 /**
 148  * blkdev_report_zones - Get zones information
 149  * @bdev:       Target block device
 150  * @sector:     Sector from which to report zones
 151  * @zones:      Array of zone structures where to return the zones information
 152  * @nr_zones:   Number of zone structures in the zone array
 153  *
 154  * Description:
 155  *    Get zone information starting from the zone containing @sector.
 156  *    The number of zone information reported may be less than the number
 157  *    requested by @nr_zones. The number of zones actually reported is
 158  *    returned in @nr_zones.
 159  *    The caller must use memalloc_noXX_save/restore() calls to control
 160  *    memory allocations done within this function (zone array and command
 161  *    buffer allocation by the device driver).
 162  */
 163 int blkdev_report_zones(struct block_device *bdev, sector_t sector,
 164                         struct blk_zone *zones, unsigned int *nr_zones)
 165 {
 166         struct request_queue *q = bdev_get_queue(bdev);
 167         unsigned int i, nrz;
 168         int ret;
 169 
 170         if (!blk_queue_is_zoned(q))
 171                 return -EOPNOTSUPP;
 172 
 173         /*
 174          * A block device that advertized itself as zoned must have a
 175          * report_zones method. If it does not have one defined, the device
 176          * driver has a bug. So warn about that.
 177          */
 178         if (WARN_ON_ONCE(!bdev->bd_disk->fops->report_zones))
 179                 return -EOPNOTSUPP;
 180 
 181         if (!*nr_zones || sector >= bdev->bd_part->nr_sects) {
 182                 *nr_zones = 0;
 183                 return 0;
 184         }
 185 
 186         nrz = min(*nr_zones,
 187                   __blkdev_nr_zones(q, bdev->bd_part->nr_sects - sector));
 188         ret = blk_report_zones(bdev->bd_disk, get_start_sect(bdev) + sector,
 189                                zones, &nrz);
 190         if (ret)
 191                 return ret;
 192 
 193         for (i = 0; i < nrz; i++) {
 194                 if (!blkdev_report_zone(bdev, zones))
 195                         break;
 196                 zones++;
 197         }
 198 
 199         *nr_zones = i;
 200 
 201         return 0;
 202 }
 203 EXPORT_SYMBOL_GPL(blkdev_report_zones);
 204 
 205 /*
 206  * Special case of zone reset operation to reset all zones in one command,
 207  * useful for applications like mkfs.
 208  */
 209 static int __blkdev_reset_all_zones(struct block_device *bdev, gfp_t gfp_mask)
 210 {
 211         struct bio *bio = bio_alloc(gfp_mask, 0);
 212         int ret;
 213 
 214         /* across the zones operations, don't need any sectors */
 215         bio_set_dev(bio, bdev);
 216         bio_set_op_attrs(bio, REQ_OP_ZONE_RESET_ALL, 0);
 217 
 218         ret = submit_bio_wait(bio);
 219         bio_put(bio);
 220 
 221         return ret;
 222 }
 223 
 224 static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
 225                                                 sector_t nr_sectors)
 226 {
 227         if (!blk_queue_zone_resetall(bdev_get_queue(bdev)))
 228                 return false;
 229 
 230         if (nr_sectors != part_nr_sects_read(bdev->bd_part))
 231                 return false;
 232         /*
 233          * REQ_OP_ZONE_RESET_ALL can be executed only if the block device is
 234          * the entire disk, that is, if the blocks device start offset is 0 and
 235          * its capacity is the same as the entire disk.
 236          */
 237         return get_start_sect(bdev) == 0 &&
 238                part_nr_sects_read(bdev->bd_part) == get_capacity(bdev->bd_disk);
 239 }
 240 
 241 /**
 242  * blkdev_reset_zones - Reset zones write pointer
 243  * @bdev:       Target block device
 244  * @sector:     Start sector of the first zone to reset
 245  * @nr_sectors: Number of sectors, at least the length of one zone
 246  * @gfp_mask:   Memory allocation flags (for bio_alloc)
 247  *
 248  * Description:
 249  *    Reset the write pointer of the zones contained in the range
 250  *    @sector..@sector+@nr_sectors. Specifying the entire disk sector range
 251  *    is valid, but the specified range should not contain conventional zones.
 252  */
 253 int blkdev_reset_zones(struct block_device *bdev,
 254                        sector_t sector, sector_t nr_sectors,
 255                        gfp_t gfp_mask)
 256 {
 257         struct request_queue *q = bdev_get_queue(bdev);
 258         sector_t zone_sectors;
 259         sector_t end_sector = sector + nr_sectors;
 260         struct bio *bio = NULL;
 261         struct blk_plug plug;
 262         int ret;
 263 
 264         if (!blk_queue_is_zoned(q))
 265                 return -EOPNOTSUPP;
 266 
 267         if (bdev_read_only(bdev))
 268                 return -EPERM;
 269 
 270         if (!nr_sectors || end_sector > bdev->bd_part->nr_sects)
 271                 /* Out of range */
 272                 return -EINVAL;
 273 
 274         if (blkdev_allow_reset_all_zones(bdev, nr_sectors))
 275                 return  __blkdev_reset_all_zones(bdev, gfp_mask);
 276 
 277         /* Check alignment (handle eventual smaller last zone) */
 278         zone_sectors = blk_queue_zone_sectors(q);
 279         if (sector & (zone_sectors - 1))
 280                 return -EINVAL;
 281 
 282         if ((nr_sectors & (zone_sectors - 1)) &&
 283             end_sector != bdev->bd_part->nr_sects)
 284                 return -EINVAL;
 285 
 286         blk_start_plug(&plug);
 287         while (sector < end_sector) {
 288 
 289                 bio = blk_next_bio(bio, 0, gfp_mask);
 290                 bio->bi_iter.bi_sector = sector;
 291                 bio_set_dev(bio, bdev);
 292                 bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
 293 
 294                 sector += zone_sectors;
 295 
 296                 /* This may take a while, so be nice to others */
 297                 cond_resched();
 298 
 299         }
 300 
 301         ret = submit_bio_wait(bio);
 302         bio_put(bio);
 303 
 304         blk_finish_plug(&plug);
 305 
 306         return ret;
 307 }
 308 EXPORT_SYMBOL_GPL(blkdev_reset_zones);
 309 
 310 /*
 311  * BLKREPORTZONE ioctl processing.
 312  * Called from blkdev_ioctl.
 313  */
 314 int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
 315                               unsigned int cmd, unsigned long arg)
 316 {
 317         void __user *argp = (void __user *)arg;
 318         struct request_queue *q;
 319         struct blk_zone_report rep;
 320         struct blk_zone *zones;
 321         int ret;
 322 
 323         if (!argp)
 324                 return -EINVAL;
 325 
 326         q = bdev_get_queue(bdev);
 327         if (!q)
 328                 return -ENXIO;
 329 
 330         if (!blk_queue_is_zoned(q))
 331                 return -ENOTTY;
 332 
 333         if (!capable(CAP_SYS_ADMIN))
 334                 return -EACCES;
 335 
 336         if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report)))
 337                 return -EFAULT;
 338 
 339         if (!rep.nr_zones)
 340                 return -EINVAL;
 341 
 342         rep.nr_zones = min(blkdev_nr_zones(bdev), rep.nr_zones);
 343 
 344         zones = kvmalloc_array(rep.nr_zones, sizeof(struct blk_zone),
 345                                GFP_KERNEL | __GFP_ZERO);
 346         if (!zones)
 347                 return -ENOMEM;
 348 
 349         ret = blkdev_report_zones(bdev, rep.sector, zones, &rep.nr_zones);
 350         if (ret)
 351                 goto out;
 352 
 353         if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) {
 354                 ret = -EFAULT;
 355                 goto out;
 356         }
 357 
 358         if (rep.nr_zones) {
 359                 if (copy_to_user(argp + sizeof(struct blk_zone_report), zones,
 360                                  sizeof(struct blk_zone) * rep.nr_zones))
 361                         ret = -EFAULT;
 362         }
 363 
 364  out:
 365         kvfree(zones);
 366 
 367         return ret;
 368 }
 369 
 370 /*
 371  * BLKRESETZONE ioctl processing.
 372  * Called from blkdev_ioctl.
 373  */
 374 int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
 375                              unsigned int cmd, unsigned long arg)
 376 {
 377         void __user *argp = (void __user *)arg;
 378         struct request_queue *q;
 379         struct blk_zone_range zrange;
 380 
 381         if (!argp)
 382                 return -EINVAL;
 383 
 384         q = bdev_get_queue(bdev);
 385         if (!q)
 386                 return -ENXIO;
 387 
 388         if (!blk_queue_is_zoned(q))
 389                 return -ENOTTY;
 390 
 391         if (!capable(CAP_SYS_ADMIN))
 392                 return -EACCES;
 393 
 394         if (!(mode & FMODE_WRITE))
 395                 return -EBADF;
 396 
 397         if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
 398                 return -EFAULT;
 399 
 400         return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
 401                                   GFP_KERNEL);
 402 }
 403 
 404 static inline unsigned long *blk_alloc_zone_bitmap(int node,
 405                                                    unsigned int nr_zones)
 406 {
 407         return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
 408                             GFP_NOIO, node);
 409 }
 410 
 411 /*
 412  * Allocate an array of struct blk_zone to get nr_zones zone information.
 413  * The allocated array may be smaller than nr_zones.
 414  */
 415 static struct blk_zone *blk_alloc_zones(unsigned int *nr_zones)
 416 {
 417         struct blk_zone *zones;
 418         size_t nrz = min(*nr_zones, BLK_ZONED_REPORT_MAX_ZONES);
 419 
 420         /*
 421          * GFP_KERNEL here is meaningless as the caller task context has
 422          * the PF_MEMALLOC_NOIO flag set in blk_revalidate_disk_zones()
 423          * with memalloc_noio_save().
 424          */
 425         zones = kvcalloc(nrz, sizeof(struct blk_zone), GFP_KERNEL);
 426         if (!zones) {
 427                 *nr_zones = 0;
 428                 return NULL;
 429         }
 430 
 431         *nr_zones = nrz;
 432 
 433         return zones;
 434 }
 435 
 436 void blk_queue_free_zone_bitmaps(struct request_queue *q)
 437 {
 438         kfree(q->seq_zones_bitmap);
 439         q->seq_zones_bitmap = NULL;
 440         kfree(q->seq_zones_wlock);
 441         q->seq_zones_wlock = NULL;
 442 }
 443 
 444 /**
 445  * blk_revalidate_disk_zones - (re)allocate and initialize zone bitmaps
 446  * @disk:       Target disk
 447  *
 448  * Helper function for low-level device drivers to (re) allocate and initialize
 449  * a disk request queue zone bitmaps. This functions should normally be called
 450  * within the disk ->revalidate method. For BIO based queues, no zone bitmap
 451  * is allocated.
 452  */
 453 int blk_revalidate_disk_zones(struct gendisk *disk)
 454 {
 455         struct request_queue *q = disk->queue;
 456         unsigned int nr_zones = __blkdev_nr_zones(q, get_capacity(disk));
 457         unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
 458         unsigned int i, rep_nr_zones = 0, z = 0, nrz;
 459         struct blk_zone *zones = NULL;
 460         unsigned int noio_flag;
 461         sector_t sector = 0;
 462         int ret = 0;
 463 
 464         /*
 465          * BIO based queues do not use a scheduler so only q->nr_zones
 466          * needs to be updated so that the sysfs exposed value is correct.
 467          */
 468         if (!queue_is_mq(q)) {
 469                 q->nr_zones = nr_zones;
 470                 return 0;
 471         }
 472 
 473         /*
 474          * Ensure that all memory allocations in this context are done as
 475          * if GFP_NOIO was specified.
 476          */
 477         noio_flag = memalloc_noio_save();
 478 
 479         if (!blk_queue_is_zoned(q) || !nr_zones) {
 480                 nr_zones = 0;
 481                 goto update;
 482         }
 483 
 484         /* Allocate bitmaps */
 485         ret = -ENOMEM;
 486         seq_zones_wlock = blk_alloc_zone_bitmap(q->node, nr_zones);
 487         if (!seq_zones_wlock)
 488                 goto out;
 489         seq_zones_bitmap = blk_alloc_zone_bitmap(q->node, nr_zones);
 490         if (!seq_zones_bitmap)
 491                 goto out;
 492 
 493         /* Get zone information and initialize seq_zones_bitmap */
 494         rep_nr_zones = nr_zones;
 495         zones = blk_alloc_zones(&rep_nr_zones);
 496         if (!zones)
 497                 goto out;
 498 
 499         while (z < nr_zones) {
 500                 nrz = min(nr_zones - z, rep_nr_zones);
 501                 ret = blk_report_zones(disk, sector, zones, &nrz);
 502                 if (ret)
 503                         goto out;
 504                 if (!nrz)
 505                         break;
 506                 for (i = 0; i < nrz; i++) {
 507                         if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL)
 508                                 set_bit(z, seq_zones_bitmap);
 509                         z++;
 510                 }
 511                 sector += nrz * blk_queue_zone_sectors(q);
 512         }
 513 
 514         if (WARN_ON(z != nr_zones)) {
 515                 ret = -EIO;
 516                 goto out;
 517         }
 518 
 519 update:
 520         /*
 521          * Install the new bitmaps, making sure the queue is stopped and
 522          * all I/Os are completed (i.e. a scheduler is not referencing the
 523          * bitmaps).
 524          */
 525         blk_mq_freeze_queue(q);
 526         q->nr_zones = nr_zones;
 527         swap(q->seq_zones_wlock, seq_zones_wlock);
 528         swap(q->seq_zones_bitmap, seq_zones_bitmap);
 529         blk_mq_unfreeze_queue(q);
 530 
 531 out:
 532         memalloc_noio_restore(noio_flag);
 533 
 534         kvfree(zones);
 535         kfree(seq_zones_wlock);
 536         kfree(seq_zones_bitmap);
 537 
 538         if (ret) {
 539                 pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
 540                 blk_mq_freeze_queue(q);
 541                 blk_queue_free_zone_bitmaps(q);
 542                 blk_mq_unfreeze_queue(q);
 543         }
 544 
 545         return ret;
 546 }
 547 EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones);
 548 

/* [<][>][^][v][top][bottom][index][help] */