root/drivers/block/xen-blkback/common.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. blkif_get_x86_32_req
  2. blkif_get_x86_64_req

   1 /*
   2  * This program is free software; you can redistribute it and/or
   3  * modify it under the terms of the GNU General Public License version 2
   4  * as published by the Free Software Foundation; or, when distributed
   5  * separately from the Linux kernel or incorporated into other
   6  * software packages, subject to the following license:
   7  *
   8  * Permission is hereby granted, free of charge, to any person obtaining a copy
   9  * of this source file (the "Software"), to deal in the Software without
  10  * restriction, including without limitation the rights to use, copy, modify,
  11  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  12  * and to permit persons to whom the Software is furnished to do so, subject to
  13  * the following conditions:
  14  *
  15  * The above copyright notice and this permission notice shall be included in
  16  * all copies or substantial portions of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  24  * IN THE SOFTWARE.
  25  */
  26 
  27 #ifndef __XEN_BLKIF__BACKEND__COMMON_H__
  28 #define __XEN_BLKIF__BACKEND__COMMON_H__
  29 
  30 #include <linux/module.h>
  31 #include <linux/interrupt.h>
  32 #include <linux/slab.h>
  33 #include <linux/blkdev.h>
  34 #include <linux/vmalloc.h>
  35 #include <linux/wait.h>
  36 #include <linux/io.h>
  37 #include <linux/rbtree.h>
  38 #include <asm/setup.h>
  39 #include <asm/pgalloc.h>
  40 #include <asm/hypervisor.h>
  41 #include <xen/grant_table.h>
  42 #include <xen/page.h>
  43 #include <xen/xenbus.h>
  44 #include <xen/interface/io/ring.h>
  45 #include <xen/interface/io/blkif.h>
  46 #include <xen/interface/io/protocols.h>
  47 
  48 extern unsigned int xen_blkif_max_ring_order;
  49 extern unsigned int xenblk_max_queues;
  50 /*
  51  * This is the maximum number of segments that would be allowed in indirect
  52  * requests. This value will also be passed to the frontend.
  53  */
  54 #define MAX_INDIRECT_SEGMENTS 256
  55 
  56 /*
  57  * Xen use 4K pages. The guest may use different page size (4K or 64K)
  58  * Number of Xen pages per segment
  59  */
  60 #define XEN_PAGES_PER_SEGMENT   (PAGE_SIZE / XEN_PAGE_SIZE)
  61 
  62 #define XEN_PAGES_PER_INDIRECT_FRAME \
  63         (XEN_PAGE_SIZE/sizeof(struct blkif_request_segment))
  64 #define SEGS_PER_INDIRECT_FRAME \
  65         (XEN_PAGES_PER_INDIRECT_FRAME / XEN_PAGES_PER_SEGMENT)
  66 
  67 #define MAX_INDIRECT_PAGES \
  68         ((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
  69 #define INDIRECT_PAGES(_segs) DIV_ROUND_UP(_segs, XEN_PAGES_PER_INDIRECT_FRAME)
  70 
  71 /* Not a real protocol.  Used to generate ring structs which contain
  72  * the elements common to all protocols only.  This way we get a
  73  * compiler-checkable way to use common struct elements, so we can
  74  * avoid using switch(protocol) in a number of places.  */
  75 struct blkif_common_request {
  76         char dummy;
  77 };
  78 
  79 /* i386 protocol version */
  80 
  81 struct blkif_x86_32_request_rw {
  82         uint8_t        nr_segments;  /* number of segments                   */
  83         blkif_vdev_t   handle;       /* only for read/write requests         */
  84         uint64_t       id;           /* private guest value, echoed in resp  */
  85         blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
  86         struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
  87 } __attribute__((__packed__));
  88 
  89 struct blkif_x86_32_request_discard {
  90         uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero         */
  91         blkif_vdev_t   _pad1;        /* was "handle" for read/write requests */
  92         uint64_t       id;           /* private guest value, echoed in resp  */
  93         blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
  94         uint64_t       nr_sectors;
  95 } __attribute__((__packed__));
  96 
  97 struct blkif_x86_32_request_other {
  98         uint8_t        _pad1;
  99         blkif_vdev_t   _pad2;
 100         uint64_t       id;           /* private guest value, echoed in resp  */
 101 } __attribute__((__packed__));
 102 
 103 struct blkif_x86_32_request_indirect {
 104         uint8_t        indirect_op;
 105         uint16_t       nr_segments;
 106         uint64_t       id;
 107         blkif_sector_t sector_number;
 108         blkif_vdev_t   handle;
 109         uint16_t       _pad1;
 110         grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
 111         /*
 112          * The maximum number of indirect segments (and pages) that will
 113          * be used is determined by MAX_INDIRECT_SEGMENTS, this value
 114          * is also exported to the guest (via xenstore
 115          * feature-max-indirect-segments entry), so the frontend knows how
 116          * many indirect segments the backend supports.
 117          */
 118         uint64_t       _pad2;        /* make it 64 byte aligned */
 119 } __attribute__((__packed__));
 120 
 121 struct blkif_x86_32_request {
 122         uint8_t        operation;    /* BLKIF_OP_???                         */
 123         union {
 124                 struct blkif_x86_32_request_rw rw;
 125                 struct blkif_x86_32_request_discard discard;
 126                 struct blkif_x86_32_request_other other;
 127                 struct blkif_x86_32_request_indirect indirect;
 128         } u;
 129 } __attribute__((__packed__));
 130 
 131 /* x86_64 protocol version */
 132 
 133 struct blkif_x86_64_request_rw {
 134         uint8_t        nr_segments;  /* number of segments                   */
 135         blkif_vdev_t   handle;       /* only for read/write requests         */
 136         uint32_t       _pad1;        /* offsetof(blkif_reqest..,u.rw.id)==8  */
 137         uint64_t       id;
 138         blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
 139         struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 140 } __attribute__((__packed__));
 141 
 142 struct blkif_x86_64_request_discard {
 143         uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero         */
 144         blkif_vdev_t   _pad1;        /* was "handle" for read/write requests */
 145         uint32_t       _pad2;        /* offsetof(blkif_..,u.discard.id)==8   */
 146         uint64_t       id;
 147         blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
 148         uint64_t       nr_sectors;
 149 } __attribute__((__packed__));
 150 
 151 struct blkif_x86_64_request_other {
 152         uint8_t        _pad1;
 153         blkif_vdev_t   _pad2;
 154         uint32_t       _pad3;        /* offsetof(blkif_..,u.discard.id)==8   */
 155         uint64_t       id;           /* private guest value, echoed in resp  */
 156 } __attribute__((__packed__));
 157 
 158 struct blkif_x86_64_request_indirect {
 159         uint8_t        indirect_op;
 160         uint16_t       nr_segments;
 161         uint32_t       _pad1;        /* offsetof(blkif_..,u.indirect.id)==8   */
 162         uint64_t       id;
 163         blkif_sector_t sector_number;
 164         blkif_vdev_t   handle;
 165         uint16_t       _pad2;
 166         grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
 167         /*
 168          * The maximum number of indirect segments (and pages) that will
 169          * be used is determined by MAX_INDIRECT_SEGMENTS, this value
 170          * is also exported to the guest (via xenstore
 171          * feature-max-indirect-segments entry), so the frontend knows how
 172          * many indirect segments the backend supports.
 173          */
 174         uint32_t       _pad3;        /* make it 64 byte aligned */
 175 } __attribute__((__packed__));
 176 
 177 struct blkif_x86_64_request {
 178         uint8_t        operation;    /* BLKIF_OP_???                         */
 179         union {
 180                 struct blkif_x86_64_request_rw rw;
 181                 struct blkif_x86_64_request_discard discard;
 182                 struct blkif_x86_64_request_other other;
 183                 struct blkif_x86_64_request_indirect indirect;
 184         } u;
 185 } __attribute__((__packed__));
 186 
 187 DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
 188                   struct blkif_response);
 189 DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
 190                   struct blkif_response __packed);
 191 DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
 192                   struct blkif_response);
 193 
 194 union blkif_back_rings {
 195         struct blkif_back_ring        native;
 196         struct blkif_common_back_ring common;
 197         struct blkif_x86_32_back_ring x86_32;
 198         struct blkif_x86_64_back_ring x86_64;
 199 };
 200 
 201 enum blkif_protocol {
 202         BLKIF_PROTOCOL_NATIVE = 1,
 203         BLKIF_PROTOCOL_X86_32 = 2,
 204         BLKIF_PROTOCOL_X86_64 = 3,
 205 };
 206 
 207 /*
 208  * Default protocol if the frontend doesn't specify one.
 209  */
 210 #ifdef CONFIG_X86
 211 #  define BLKIF_PROTOCOL_DEFAULT BLKIF_PROTOCOL_X86_32
 212 #else
 213 #  define BLKIF_PROTOCOL_DEFAULT BLKIF_PROTOCOL_NATIVE
 214 #endif
 215 
 216 struct xen_vbd {
 217         /* What the domain refers to this vbd as. */
 218         blkif_vdev_t            handle;
 219         /* Non-zero -> read-only */
 220         unsigned char           readonly;
 221         /* VDISK_xxx */
 222         unsigned char           type;
 223         /* phys device that this vbd maps to. */
 224         u32                     pdevice;
 225         struct block_device     *bdev;
 226         /* Cached size parameter. */
 227         sector_t                size;
 228         unsigned int            flush_support:1;
 229         unsigned int            discard_secure:1;
 230         unsigned int            feature_gnt_persistent:1;
 231         unsigned int            overflow_max_grants:1;
 232 };
 233 
 234 struct backend_info;
 235 
 236 /* Number of requests that we can fit in a ring */
 237 #define XEN_BLKIF_REQS_PER_PAGE         32
 238 
 239 struct persistent_gnt {
 240         struct page *page;
 241         grant_ref_t gnt;
 242         grant_handle_t handle;
 243         unsigned long last_used;
 244         bool active;
 245         struct rb_node node;
 246         struct list_head remove_node;
 247 };
 248 
 249 /* Per-ring information. */
 250 struct xen_blkif_ring {
 251         /* Physical parameters of the comms window. */
 252         unsigned int            irq;
 253         union blkif_back_rings  blk_rings;
 254         void                    *blk_ring;
 255         /* Private fields. */
 256         spinlock_t              blk_ring_lock;
 257 
 258         wait_queue_head_t       wq;
 259         atomic_t                inflight;
 260         bool                    active;
 261         /* One thread per blkif ring. */
 262         struct task_struct      *xenblkd;
 263         unsigned int            waiting_reqs;
 264 
 265         /* List of all 'pending_req' available */
 266         struct list_head        pending_free;
 267         /* And its spinlock. */
 268         spinlock_t              pending_free_lock;
 269         wait_queue_head_t       pending_free_wq;
 270 
 271         /* Tree to store persistent grants. */
 272         struct rb_root          persistent_gnts;
 273         unsigned int            persistent_gnt_c;
 274         atomic_t                persistent_gnt_in_use;
 275         unsigned long           next_lru;
 276 
 277         /* Statistics. */
 278         unsigned long           st_print;
 279         unsigned long long      st_rd_req;
 280         unsigned long long      st_wr_req;
 281         unsigned long long      st_oo_req;
 282         unsigned long long      st_f_req;
 283         unsigned long long      st_ds_req;
 284         unsigned long long      st_rd_sect;
 285         unsigned long long      st_wr_sect;
 286 
 287         /* Used by the kworker that offload work from the persistent purge. */
 288         struct list_head        persistent_purge_list;
 289         struct work_struct      persistent_purge_work;
 290 
 291         /* Buffer of free pages to map grant refs. */
 292         spinlock_t              free_pages_lock;
 293         int                     free_pages_num;
 294         struct list_head        free_pages;
 295 
 296         struct work_struct      free_work;
 297         /* Thread shutdown wait queue. */
 298         wait_queue_head_t       shutdown_wq;
 299         struct xen_blkif        *blkif;
 300 };
 301 
 302 struct xen_blkif {
 303         /* Unique identifier for this interface. */
 304         domid_t                 domid;
 305         unsigned int            handle;
 306         /* Comms information. */
 307         enum blkif_protocol     blk_protocol;
 308         /* The VBD attached to this interface. */
 309         struct xen_vbd          vbd;
 310         /* Back pointer to the backend_info. */
 311         struct backend_info     *be;
 312         atomic_t                refcnt;
 313         /* for barrier (drain) requests */
 314         struct completion       drain_complete;
 315         atomic_t                drain;
 316 
 317         struct work_struct      free_work;
 318         unsigned int            nr_ring_pages;
 319         /* All rings for this device. */
 320         struct xen_blkif_ring   *rings;
 321         unsigned int            nr_rings;
 322 };
 323 
 324 struct seg_buf {
 325         unsigned long offset;
 326         unsigned int nsec;
 327 };
 328 
 329 struct grant_page {
 330         struct page             *page;
 331         struct persistent_gnt   *persistent_gnt;
 332         grant_handle_t          handle;
 333         grant_ref_t             gref;
 334 };
 335 
 336 /*
 337  * Each outstanding request that we've passed to the lower device layers has a
 338  * 'pending_req' allocated to it. Each buffer_head that completes decrements
 339  * the pendcnt towards zero. When it hits zero, the specified domain has a
 340  * response queued for it, with the saved 'id' passed back.
 341  */
 342 struct pending_req {
 343         struct xen_blkif_ring   *ring;
 344         u64                     id;
 345         int                     nr_segs;
 346         atomic_t                pendcnt;
 347         unsigned short          operation;
 348         int                     status;
 349         struct list_head        free_list;
 350         struct grant_page       *segments[MAX_INDIRECT_SEGMENTS];
 351         /* Indirect descriptors */
 352         struct grant_page       *indirect_pages[MAX_INDIRECT_PAGES];
 353         struct seg_buf          seg[MAX_INDIRECT_SEGMENTS];
 354         struct bio              *biolist[MAX_INDIRECT_SEGMENTS];
 355         struct gnttab_unmap_grant_ref unmap[MAX_INDIRECT_SEGMENTS];
 356         struct page                   *unmap_pages[MAX_INDIRECT_SEGMENTS];
 357         struct gntab_unmap_queue_data gnttab_unmap_data;
 358 };
 359 
 360 
 361 #define vbd_sz(_v)      ((_v)->bdev->bd_part ? \
 362                          (_v)->bdev->bd_part->nr_sects : \
 363                           get_capacity((_v)->bdev->bd_disk))
 364 
 365 #define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt))
 366 #define xen_blkif_put(_b)                               \
 367         do {                                            \
 368                 if (atomic_dec_and_test(&(_b)->refcnt)) \
 369                         schedule_work(&(_b)->free_work);\
 370         } while (0)
 371 
 372 struct phys_req {
 373         unsigned short          dev;
 374         blkif_sector_t          nr_sects;
 375         struct block_device     *bdev;
 376         blkif_sector_t          sector_number;
 377 };
 378 int xen_blkif_interface_init(void);
 379 
 380 int xen_blkif_xenbus_init(void);
 381 
 382 irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
 383 int xen_blkif_schedule(void *arg);
 384 int xen_blkif_purge_persistent(void *arg);
 385 void xen_blkbk_free_caches(struct xen_blkif_ring *ring);
 386 
 387 int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
 388                               struct backend_info *be, int state);
 389 
 390 int xen_blkbk_barrier(struct xenbus_transaction xbt,
 391                       struct backend_info *be, int state);
 392 struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
 393 void xen_blkbk_unmap_purged_grants(struct work_struct *work);
 394 
 395 static inline void blkif_get_x86_32_req(struct blkif_request *dst,
 396                                         struct blkif_x86_32_request *src)
 397 {
 398         int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
 399         dst->operation = READ_ONCE(src->operation);
 400         switch (dst->operation) {
 401         case BLKIF_OP_READ:
 402         case BLKIF_OP_WRITE:
 403         case BLKIF_OP_WRITE_BARRIER:
 404         case BLKIF_OP_FLUSH_DISKCACHE:
 405                 dst->u.rw.nr_segments = src->u.rw.nr_segments;
 406                 dst->u.rw.handle = src->u.rw.handle;
 407                 dst->u.rw.id = src->u.rw.id;
 408                 dst->u.rw.sector_number = src->u.rw.sector_number;
 409                 barrier();
 410                 if (n > dst->u.rw.nr_segments)
 411                         n = dst->u.rw.nr_segments;
 412                 for (i = 0; i < n; i++)
 413                         dst->u.rw.seg[i] = src->u.rw.seg[i];
 414                 break;
 415         case BLKIF_OP_DISCARD:
 416                 dst->u.discard.flag = src->u.discard.flag;
 417                 dst->u.discard.id = src->u.discard.id;
 418                 dst->u.discard.sector_number = src->u.discard.sector_number;
 419                 dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
 420                 break;
 421         case BLKIF_OP_INDIRECT:
 422                 dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
 423                 dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
 424                 dst->u.indirect.handle = src->u.indirect.handle;
 425                 dst->u.indirect.id = src->u.indirect.id;
 426                 dst->u.indirect.sector_number = src->u.indirect.sector_number;
 427                 barrier();
 428                 j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
 429                 for (i = 0; i < j; i++)
 430                         dst->u.indirect.indirect_grefs[i] =
 431                                 src->u.indirect.indirect_grefs[i];
 432                 break;
 433         default:
 434                 /*
 435                  * Don't know how to translate this op. Only get the
 436                  * ID so failure can be reported to the frontend.
 437                  */
 438                 dst->u.other.id = src->u.other.id;
 439                 break;
 440         }
 441 }
 442 
 443 static inline void blkif_get_x86_64_req(struct blkif_request *dst,
 444                                         struct blkif_x86_64_request *src)
 445 {
 446         int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
 447         dst->operation = READ_ONCE(src->operation);
 448         switch (dst->operation) {
 449         case BLKIF_OP_READ:
 450         case BLKIF_OP_WRITE:
 451         case BLKIF_OP_WRITE_BARRIER:
 452         case BLKIF_OP_FLUSH_DISKCACHE:
 453                 dst->u.rw.nr_segments = src->u.rw.nr_segments;
 454                 dst->u.rw.handle = src->u.rw.handle;
 455                 dst->u.rw.id = src->u.rw.id;
 456                 dst->u.rw.sector_number = src->u.rw.sector_number;
 457                 barrier();
 458                 if (n > dst->u.rw.nr_segments)
 459                         n = dst->u.rw.nr_segments;
 460                 for (i = 0; i < n; i++)
 461                         dst->u.rw.seg[i] = src->u.rw.seg[i];
 462                 break;
 463         case BLKIF_OP_DISCARD:
 464                 dst->u.discard.flag = src->u.discard.flag;
 465                 dst->u.discard.id = src->u.discard.id;
 466                 dst->u.discard.sector_number = src->u.discard.sector_number;
 467                 dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
 468                 break;
 469         case BLKIF_OP_INDIRECT:
 470                 dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
 471                 dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
 472                 dst->u.indirect.handle = src->u.indirect.handle;
 473                 dst->u.indirect.id = src->u.indirect.id;
 474                 dst->u.indirect.sector_number = src->u.indirect.sector_number;
 475                 barrier();
 476                 j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
 477                 for (i = 0; i < j; i++)
 478                         dst->u.indirect.indirect_grefs[i] =
 479                                 src->u.indirect.indirect_grefs[i];
 480                 break;
 481         default:
 482                 /*
 483                  * Don't know how to translate this op. Only get the
 484                  * ID so failure can be reported to the frontend.
 485                  */
 486                 dst->u.other.id = src->u.other.id;
 487                 break;
 488         }
 489 }
 490 
 491 #endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */

/* [<][>][^][v][top][bottom][index][help] */