1/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License version 2
4 * as published by the Free Software Foundation; or, when distributed
5 * separately from the Linux kernel or incorporated into other
6 * software packages, subject to the following license:
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this source file (the "Software"), to deal in the Software without
10 * restriction, including without limitation the rights to use, copy, modify,
11 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
12 * and to permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * IN THE SOFTWARE.
25 */
26
27#ifndef __XEN_BLKIF__BACKEND__COMMON_H__
28#define __XEN_BLKIF__BACKEND__COMMON_H__
29
30#include <linux/module.h>
31#include <linux/interrupt.h>
32#include <linux/slab.h>
33#include <linux/blkdev.h>
34#include <linux/vmalloc.h>
35#include <linux/wait.h>
36#include <linux/io.h>
37#include <linux/rbtree.h>
38#include <asm/setup.h>
39#include <asm/pgalloc.h>
40#include <asm/hypervisor.h>
41#include <xen/grant_table.h>
42#include <xen/xenbus.h>
43#include <xen/interface/io/ring.h>
44#include <xen/interface/io/blkif.h>
45#include <xen/interface/io/protocols.h>
46
47/*
48 * This is the maximum number of segments that would be allowed in indirect
49 * requests. This value will also be passed to the frontend.
50 */
51#define MAX_INDIRECT_SEGMENTS 256
52
53#define SEGS_PER_INDIRECT_FRAME \
54	(PAGE_SIZE/sizeof(struct blkif_request_segment))
55#define MAX_INDIRECT_PAGES \
56	((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
57#define INDIRECT_PAGES(_segs) \
58	((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
59
60/* Not a real protocol.  Used to generate ring structs which contain
61 * the elements common to all protocols only.  This way we get a
62 * compiler-checkable way to use common struct elements, so we can
63 * avoid using switch(protocol) in a number of places.  */
64struct blkif_common_request {
65	char dummy;
66};
67struct blkif_common_response {
68	char dummy;
69};
70
71struct blkif_x86_32_request_rw {
72	uint8_t        nr_segments;  /* number of segments                   */
73	blkif_vdev_t   handle;       /* only for read/write requests         */
74	uint64_t       id;           /* private guest value, echoed in resp  */
75	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
76	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
77} __attribute__((__packed__));
78
79struct blkif_x86_32_request_discard {
80	uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero         */
81	blkif_vdev_t   _pad1;        /* was "handle" for read/write requests */
82	uint64_t       id;           /* private guest value, echoed in resp  */
83	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
84	uint64_t       nr_sectors;
85} __attribute__((__packed__));
86
87struct blkif_x86_32_request_other {
88	uint8_t        _pad1;
89	blkif_vdev_t   _pad2;
90	uint64_t       id;           /* private guest value, echoed in resp  */
91} __attribute__((__packed__));
92
93struct blkif_x86_32_request_indirect {
94	uint8_t        indirect_op;
95	uint16_t       nr_segments;
96	uint64_t       id;
97	blkif_sector_t sector_number;
98	blkif_vdev_t   handle;
99	uint16_t       _pad1;
100	grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
101	/*
102	 * The maximum number of indirect segments (and pages) that will
103	 * be used is determined by MAX_INDIRECT_SEGMENTS, this value
104	 * is also exported to the guest (via xenstore
105	 * feature-max-indirect-segments entry), so the frontend knows how
106	 * many indirect segments the backend supports.
107	 */
108	uint64_t       _pad2;        /* make it 64 byte aligned */
109} __attribute__((__packed__));
110
111struct blkif_x86_32_request {
112	uint8_t        operation;    /* BLKIF_OP_???                         */
113	union {
114		struct blkif_x86_32_request_rw rw;
115		struct blkif_x86_32_request_discard discard;
116		struct blkif_x86_32_request_other other;
117		struct blkif_x86_32_request_indirect indirect;
118	} u;
119} __attribute__((__packed__));
120
121/* i386 protocol version */
122#pragma pack(push, 4)
123struct blkif_x86_32_response {
124	uint64_t        id;              /* copied from request */
125	uint8_t         operation;       /* copied from request */
126	int16_t         status;          /* BLKIF_RSP_???       */
127};
128#pragma pack(pop)
129/* x86_64 protocol version */
130
131struct blkif_x86_64_request_rw {
132	uint8_t        nr_segments;  /* number of segments                   */
133	blkif_vdev_t   handle;       /* only for read/write requests         */
134	uint32_t       _pad1;        /* offsetof(blkif_reqest..,u.rw.id)==8  */
135	uint64_t       id;
136	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
137	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
138} __attribute__((__packed__));
139
140struct blkif_x86_64_request_discard {
141	uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero         */
142	blkif_vdev_t   _pad1;        /* was "handle" for read/write requests */
143        uint32_t       _pad2;        /* offsetof(blkif_..,u.discard.id)==8   */
144	uint64_t       id;
145	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
146	uint64_t       nr_sectors;
147} __attribute__((__packed__));
148
149struct blkif_x86_64_request_other {
150	uint8_t        _pad1;
151	blkif_vdev_t   _pad2;
152	uint32_t       _pad3;        /* offsetof(blkif_..,u.discard.id)==8   */
153	uint64_t       id;           /* private guest value, echoed in resp  */
154} __attribute__((__packed__));
155
156struct blkif_x86_64_request_indirect {
157	uint8_t        indirect_op;
158	uint16_t       nr_segments;
159	uint32_t       _pad1;        /* offsetof(blkif_..,u.indirect.id)==8   */
160	uint64_t       id;
161	blkif_sector_t sector_number;
162	blkif_vdev_t   handle;
163	uint16_t       _pad2;
164	grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
165	/*
166	 * The maximum number of indirect segments (and pages) that will
167	 * be used is determined by MAX_INDIRECT_SEGMENTS, this value
168	 * is also exported to the guest (via xenstore
169	 * feature-max-indirect-segments entry), so the frontend knows how
170	 * many indirect segments the backend supports.
171	 */
172	uint32_t       _pad3;        /* make it 64 byte aligned */
173} __attribute__((__packed__));
174
175struct blkif_x86_64_request {
176	uint8_t        operation;    /* BLKIF_OP_???                         */
177	union {
178		struct blkif_x86_64_request_rw rw;
179		struct blkif_x86_64_request_discard discard;
180		struct blkif_x86_64_request_other other;
181		struct blkif_x86_64_request_indirect indirect;
182	} u;
183} __attribute__((__packed__));
184
185struct blkif_x86_64_response {
186	uint64_t       __attribute__((__aligned__(8))) id;
187	uint8_t         operation;       /* copied from request */
188	int16_t         status;          /* BLKIF_RSP_???       */
189};
190
191DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
192		  struct blkif_common_response);
193DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
194		  struct blkif_x86_32_response);
195DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
196		  struct blkif_x86_64_response);
197
198union blkif_back_rings {
199	struct blkif_back_ring        native;
200	struct blkif_common_back_ring common;
201	struct blkif_x86_32_back_ring x86_32;
202	struct blkif_x86_64_back_ring x86_64;
203};
204
205enum blkif_protocol {
206	BLKIF_PROTOCOL_NATIVE = 1,
207	BLKIF_PROTOCOL_X86_32 = 2,
208	BLKIF_PROTOCOL_X86_64 = 3,
209};
210
211/*
212 * Default protocol if the frontend doesn't specify one.
213 */
214#ifdef CONFIG_X86
215#  define BLKIF_PROTOCOL_DEFAULT BLKIF_PROTOCOL_X86_32
216#else
217#  define BLKIF_PROTOCOL_DEFAULT BLKIF_PROTOCOL_NATIVE
218#endif
219
220struct xen_vbd {
221	/* What the domain refers to this vbd as. */
222	blkif_vdev_t		handle;
223	/* Non-zero -> read-only */
224	unsigned char		readonly;
225	/* VDISK_xxx */
226	unsigned char		type;
227	/* phys device that this vbd maps to. */
228	u32			pdevice;
229	struct block_device	*bdev;
230	/* Cached size parameter. */
231	sector_t		size;
232	unsigned int		flush_support:1;
233	unsigned int		discard_secure:1;
234	unsigned int		feature_gnt_persistent:1;
235	unsigned int		overflow_max_grants:1;
236};
237
238struct backend_info;
239
240/* Number of available flags */
241#define PERSISTENT_GNT_FLAGS_SIZE	2
242/* This persistent grant is currently in use */
243#define PERSISTENT_GNT_ACTIVE		0
244/*
245 * This persistent grant has been used, this flag is set when we remove the
246 * PERSISTENT_GNT_ACTIVE, to know that this grant has been used recently.
247 */
248#define PERSISTENT_GNT_WAS_ACTIVE	1
249
250/* Number of requests that we can fit in a ring */
251#define XEN_BLKIF_REQS			32
252
253struct persistent_gnt {
254	struct page *page;
255	grant_ref_t gnt;
256	grant_handle_t handle;
257	DECLARE_BITMAP(flags, PERSISTENT_GNT_FLAGS_SIZE);
258	struct rb_node node;
259	struct list_head remove_node;
260};
261
262struct xen_blkif {
263	/* Unique identifier for this interface. */
264	domid_t			domid;
265	unsigned int		handle;
266	/* Physical parameters of the comms window. */
267	unsigned int		irq;
268	/* Comms information. */
269	enum blkif_protocol	blk_protocol;
270	union blkif_back_rings	blk_rings;
271	void			*blk_ring;
272	/* The VBD attached to this interface. */
273	struct xen_vbd		vbd;
274	/* Back pointer to the backend_info. */
275	struct backend_info	*be;
276	/* Private fields. */
277	spinlock_t		blk_ring_lock;
278	atomic_t		refcnt;
279
280	wait_queue_head_t	wq;
281	/* for barrier (drain) requests */
282	struct completion	drain_complete;
283	atomic_t		drain;
284	atomic_t		inflight;
285	/* One thread per one blkif. */
286	struct task_struct	*xenblkd;
287	unsigned int		waiting_reqs;
288
289	/* tree to store persistent grants */
290	struct rb_root		persistent_gnts;
291	unsigned int		persistent_gnt_c;
292	atomic_t		persistent_gnt_in_use;
293	unsigned long           next_lru;
294
295	/* used by the kworker that offload work from the persistent purge */
296	struct list_head	persistent_purge_list;
297	struct work_struct	persistent_purge_work;
298
299	/* buffer of free pages to map grant refs */
300	spinlock_t		free_pages_lock;
301	int			free_pages_num;
302	struct list_head	free_pages;
303
304	/* List of all 'pending_req' available */
305	struct list_head	pending_free;
306	/* And its spinlock. */
307	spinlock_t		pending_free_lock;
308	wait_queue_head_t	pending_free_wq;
309
310	/* statistics */
311	unsigned long		st_print;
312	unsigned long long			st_rd_req;
313	unsigned long long			st_wr_req;
314	unsigned long long			st_oo_req;
315	unsigned long long			st_f_req;
316	unsigned long long			st_ds_req;
317	unsigned long long			st_rd_sect;
318	unsigned long long			st_wr_sect;
319
320	struct work_struct	free_work;
321	/* Thread shutdown wait queue. */
322	wait_queue_head_t	shutdown_wq;
323};
324
325struct seg_buf {
326	unsigned long offset;
327	unsigned int nsec;
328};
329
330struct grant_page {
331	struct page 		*page;
332	struct persistent_gnt	*persistent_gnt;
333	grant_handle_t		handle;
334	grant_ref_t		gref;
335};
336
337/*
338 * Each outstanding request that we've passed to the lower device layers has a
339 * 'pending_req' allocated to it. Each buffer_head that completes decrements
340 * the pendcnt towards zero. When it hits zero, the specified domain has a
341 * response queued for it, with the saved 'id' passed back.
342 */
343struct pending_req {
344	struct xen_blkif	*blkif;
345	u64			id;
346	int			nr_pages;
347	atomic_t		pendcnt;
348	unsigned short		operation;
349	int			status;
350	struct list_head	free_list;
351	struct grant_page	*segments[MAX_INDIRECT_SEGMENTS];
352	/* Indirect descriptors */
353	struct grant_page	*indirect_pages[MAX_INDIRECT_PAGES];
354	struct seg_buf		seg[MAX_INDIRECT_SEGMENTS];
355	struct bio		*biolist[MAX_INDIRECT_SEGMENTS];
356	struct gnttab_unmap_grant_ref unmap[MAX_INDIRECT_SEGMENTS];
357	struct page                   *unmap_pages[MAX_INDIRECT_SEGMENTS];
358	struct gntab_unmap_queue_data gnttab_unmap_data;
359};
360
361
362#define vbd_sz(_v)	((_v)->bdev->bd_part ? \
363			 (_v)->bdev->bd_part->nr_sects : \
364			  get_capacity((_v)->bdev->bd_disk))
365
366#define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt))
367#define xen_blkif_put(_b)				\
368	do {						\
369		if (atomic_dec_and_test(&(_b)->refcnt))	\
370			schedule_work(&(_b)->free_work);\
371	} while (0)
372
373struct phys_req {
374	unsigned short		dev;
375	blkif_sector_t		nr_sects;
376	struct block_device	*bdev;
377	blkif_sector_t		sector_number;
378};
379int xen_blkif_interface_init(void);
380
381int xen_blkif_xenbus_init(void);
382
383irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
384int xen_blkif_schedule(void *arg);
385int xen_blkif_purge_persistent(void *arg);
386void xen_blkbk_free_caches(struct xen_blkif *blkif);
387
388int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
389			      struct backend_info *be, int state);
390
391int xen_blkbk_barrier(struct xenbus_transaction xbt,
392		      struct backend_info *be, int state);
393struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
394void xen_blkbk_unmap_purged_grants(struct work_struct *work);
395
396static inline void blkif_get_x86_32_req(struct blkif_request *dst,
397					struct blkif_x86_32_request *src)
398{
399	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
400	dst->operation = src->operation;
401	switch (src->operation) {
402	case BLKIF_OP_READ:
403	case BLKIF_OP_WRITE:
404	case BLKIF_OP_WRITE_BARRIER:
405	case BLKIF_OP_FLUSH_DISKCACHE:
406		dst->u.rw.nr_segments = src->u.rw.nr_segments;
407		dst->u.rw.handle = src->u.rw.handle;
408		dst->u.rw.id = src->u.rw.id;
409		dst->u.rw.sector_number = src->u.rw.sector_number;
410		barrier();
411		if (n > dst->u.rw.nr_segments)
412			n = dst->u.rw.nr_segments;
413		for (i = 0; i < n; i++)
414			dst->u.rw.seg[i] = src->u.rw.seg[i];
415		break;
416	case BLKIF_OP_DISCARD:
417		dst->u.discard.flag = src->u.discard.flag;
418		dst->u.discard.id = src->u.discard.id;
419		dst->u.discard.sector_number = src->u.discard.sector_number;
420		dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
421		break;
422	case BLKIF_OP_INDIRECT:
423		dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
424		dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
425		dst->u.indirect.handle = src->u.indirect.handle;
426		dst->u.indirect.id = src->u.indirect.id;
427		dst->u.indirect.sector_number = src->u.indirect.sector_number;
428		barrier();
429		j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
430		for (i = 0; i < j; i++)
431			dst->u.indirect.indirect_grefs[i] =
432				src->u.indirect.indirect_grefs[i];
433		break;
434	default:
435		/*
436		 * Don't know how to translate this op. Only get the
437		 * ID so failure can be reported to the frontend.
438		 */
439		dst->u.other.id = src->u.other.id;
440		break;
441	}
442}
443
444static inline void blkif_get_x86_64_req(struct blkif_request *dst,
445					struct blkif_x86_64_request *src)
446{
447	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
448	dst->operation = src->operation;
449	switch (src->operation) {
450	case BLKIF_OP_READ:
451	case BLKIF_OP_WRITE:
452	case BLKIF_OP_WRITE_BARRIER:
453	case BLKIF_OP_FLUSH_DISKCACHE:
454		dst->u.rw.nr_segments = src->u.rw.nr_segments;
455		dst->u.rw.handle = src->u.rw.handle;
456		dst->u.rw.id = src->u.rw.id;
457		dst->u.rw.sector_number = src->u.rw.sector_number;
458		barrier();
459		if (n > dst->u.rw.nr_segments)
460			n = dst->u.rw.nr_segments;
461		for (i = 0; i < n; i++)
462			dst->u.rw.seg[i] = src->u.rw.seg[i];
463		break;
464	case BLKIF_OP_DISCARD:
465		dst->u.discard.flag = src->u.discard.flag;
466		dst->u.discard.id = src->u.discard.id;
467		dst->u.discard.sector_number = src->u.discard.sector_number;
468		dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
469		break;
470	case BLKIF_OP_INDIRECT:
471		dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
472		dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
473		dst->u.indirect.handle = src->u.indirect.handle;
474		dst->u.indirect.id = src->u.indirect.id;
475		dst->u.indirect.sector_number = src->u.indirect.sector_number;
476		barrier();
477		j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
478		for (i = 0; i < j; i++)
479			dst->u.indirect.indirect_grefs[i] =
480				src->u.indirect.indirect_grefs[i];
481		break;
482	default:
483		/*
484		 * Don't know how to translate this op. Only get the
485		 * ID so failure can be reported to the frontend.
486		 */
487		dst->u.other.id = src->u.other.id;
488		break;
489	}
490}
491
492#endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */
493