1/*
2 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses.  You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 *     Redistribution and use in source and binary forms, with or
12 *     without modification, are permitted provided that the following
13 *     conditions are met:
14 *
15 *	- Redistributions of source code must retain the above
16 *	  copyright notice, this list of conditions and the following
17 *	  disclaimer.
18 *
19 *	- Redistributions in binary form must reproduce the above
20 *	  copyright notice, this list of conditions and the following
21 *	  disclaimer in the documentation and/or other materials
22 *	  provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#include <linux/module.h>
34#include <linux/kernel.h>
35#include <linux/slab.h>
36#include <linux/mm.h>
37#include <linux/highmem.h>
38#include <linux/scatterlist.h>
39
40#include "iscsi_iser.h"
41
42static void
43iser_free_bounce_sg(struct iser_data_buf *data)
44{
45	struct scatterlist *sg;
46	int count;
47
48	for_each_sg(data->sg, sg, data->size, count)
49		__free_page(sg_page(sg));
50
51	kfree(data->sg);
52
53	data->sg = data->orig_sg;
54	data->size = data->orig_size;
55	data->orig_sg = NULL;
56	data->orig_size = 0;
57}
58
59static int
60iser_alloc_bounce_sg(struct iser_data_buf *data)
61{
62	struct scatterlist *sg;
63	struct page *page;
64	unsigned long length = data->data_len;
65	int i = 0, nents = DIV_ROUND_UP(length, PAGE_SIZE);
66
67	sg = kcalloc(nents, sizeof(*sg), GFP_ATOMIC);
68	if (!sg)
69		goto err;
70
71	sg_init_table(sg, nents);
72	while (length) {
73		u32 page_len = min_t(u32, length, PAGE_SIZE);
74
75		page = alloc_page(GFP_ATOMIC);
76		if (!page)
77			goto err;
78
79		sg_set_page(&sg[i], page, page_len, 0);
80		length -= page_len;
81		i++;
82	}
83
84	data->orig_sg = data->sg;
85	data->orig_size = data->size;
86	data->sg = sg;
87	data->size = nents;
88
89	return 0;
90
91err:
92	for (; i > 0; i--)
93		__free_page(sg_page(&sg[i - 1]));
94	kfree(sg);
95
96	return -ENOMEM;
97}
98
99static void
100iser_copy_bounce(struct iser_data_buf *data, bool to_buffer)
101{
102	struct scatterlist *osg, *bsg = data->sg;
103	void *oaddr, *baddr;
104	unsigned int left = data->data_len;
105	unsigned int bsg_off = 0;
106	int i;
107
108	for_each_sg(data->orig_sg, osg, data->orig_size, i) {
109		unsigned int copy_len, osg_off = 0;
110
111		oaddr = kmap_atomic(sg_page(osg)) + osg->offset;
112		copy_len = min(left, osg->length);
113		while (copy_len) {
114			unsigned int len = min(copy_len, bsg->length - bsg_off);
115
116			baddr = kmap_atomic(sg_page(bsg)) + bsg->offset;
117			if (to_buffer)
118				memcpy(baddr + bsg_off, oaddr + osg_off, len);
119			else
120				memcpy(oaddr + osg_off, baddr + bsg_off, len);
121
122			kunmap_atomic(baddr - bsg->offset);
123			osg_off += len;
124			bsg_off += len;
125			copy_len -= len;
126
127			if (bsg_off >= bsg->length) {
128				bsg = sg_next(bsg);
129				bsg_off = 0;
130			}
131		}
132		kunmap_atomic(oaddr - osg->offset);
133		left -= osg_off;
134	}
135}
136
137static inline void
138iser_copy_from_bounce(struct iser_data_buf *data)
139{
140	iser_copy_bounce(data, false);
141}
142
143static inline void
144iser_copy_to_bounce(struct iser_data_buf *data)
145{
146	iser_copy_bounce(data, true);
147}
148
149struct fast_reg_descriptor *
150iser_reg_desc_get(struct ib_conn *ib_conn)
151{
152	struct fast_reg_descriptor *desc;
153	unsigned long flags;
154
155	spin_lock_irqsave(&ib_conn->lock, flags);
156	desc = list_first_entry(&ib_conn->fastreg.pool,
157				struct fast_reg_descriptor, list);
158	list_del(&desc->list);
159	spin_unlock_irqrestore(&ib_conn->lock, flags);
160
161	return desc;
162}
163
164void
165iser_reg_desc_put(struct ib_conn *ib_conn,
166		  struct fast_reg_descriptor *desc)
167{
168	unsigned long flags;
169
170	spin_lock_irqsave(&ib_conn->lock, flags);
171	list_add(&desc->list, &ib_conn->fastreg.pool);
172	spin_unlock_irqrestore(&ib_conn->lock, flags);
173}
174
175/**
176 * iser_start_rdma_unaligned_sg
177 */
178static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
179					struct iser_data_buf *data,
180					enum iser_data_dir cmd_dir)
181{
182	struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
183	int rc;
184
185	rc = iser_alloc_bounce_sg(data);
186	if (rc) {
187		iser_err("Failed to allocate bounce for data len %lu\n",
188			 data->data_len);
189		return rc;
190	}
191
192	if (cmd_dir == ISER_DIR_OUT)
193		iser_copy_to_bounce(data);
194
195	data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size,
196					(cmd_dir == ISER_DIR_OUT) ?
197					DMA_TO_DEVICE : DMA_FROM_DEVICE);
198	if (!data->dma_nents) {
199		iser_err("Got dma_nents %d, something went wrong...\n",
200			 data->dma_nents);
201		rc = -ENOMEM;
202		goto err;
203	}
204
205	return 0;
206err:
207	iser_free_bounce_sg(data);
208	return rc;
209}
210
211/**
212 * iser_finalize_rdma_unaligned_sg
213 */
214
215void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
216				     struct iser_data_buf *data,
217				     enum iser_data_dir cmd_dir)
218{
219	struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
220
221	ib_dma_unmap_sg(dev, data->sg, data->size,
222			(cmd_dir == ISER_DIR_OUT) ?
223			DMA_TO_DEVICE : DMA_FROM_DEVICE);
224
225	if (cmd_dir == ISER_DIR_IN)
226		iser_copy_from_bounce(data);
227
228	iser_free_bounce_sg(data);
229}
230
231#define IS_4K_ALIGNED(addr)	((((unsigned long)addr) & ~MASK_4K) == 0)
232
233/**
234 * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
235 * and returns the length of resulting physical address array (may be less than
236 * the original due to possible compaction).
237 *
238 * we build a "page vec" under the assumption that the SG meets the RDMA
239 * alignment requirements. Other then the first and last SG elements, all
240 * the "internal" elements can be compacted into a list whose elements are
241 * dma addresses of physical pages. The code supports also the weird case
242 * where --few fragments of the same page-- are present in the SG as
243 * consecutive elements. Also, it handles one entry SG.
244 */
245
246static int iser_sg_to_page_vec(struct iser_data_buf *data,
247			       struct ib_device *ibdev, u64 *pages,
248			       int *offset, int *data_size)
249{
250	struct scatterlist *sg, *sgl = data->sg;
251	u64 start_addr, end_addr, page, chunk_start = 0;
252	unsigned long total_sz = 0;
253	unsigned int dma_len;
254	int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
255
256	/* compute the offset of first element */
257	*offset = (u64) sgl[0].offset & ~MASK_4K;
258
259	new_chunk = 1;
260	cur_page  = 0;
261	for_each_sg(sgl, sg, data->dma_nents, i) {
262		start_addr = ib_sg_dma_address(ibdev, sg);
263		if (new_chunk)
264			chunk_start = start_addr;
265		dma_len = ib_sg_dma_len(ibdev, sg);
266		end_addr = start_addr + dma_len;
267		total_sz += dma_len;
268
269		/* collect page fragments until aligned or end of SG list */
270		if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
271			new_chunk = 0;
272			continue;
273		}
274		new_chunk = 1;
275
276		/* address of the first page in the contiguous chunk;
277		   masking relevant for the very first SG entry,
278		   which might be unaligned */
279		page = chunk_start & MASK_4K;
280		do {
281			pages[cur_page++] = page;
282			page += SIZE_4K;
283		} while (page < end_addr);
284	}
285
286	*data_size = total_sz;
287	iser_dbg("page_vec->data_size:%d cur_page %d\n",
288		 *data_size, cur_page);
289	return cur_page;
290}
291
292
293/**
294 * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
295 * for RDMA sub-list of a scatter-gather list of memory buffers, and  returns
296 * the number of entries which are aligned correctly. Supports the case where
297 * consecutive SG elements are actually fragments of the same physcial page.
298 */
299static int iser_data_buf_aligned_len(struct iser_data_buf *data,
300				      struct ib_device *ibdev)
301{
302	struct scatterlist *sg, *sgl, *next_sg = NULL;
303	u64 start_addr, end_addr;
304	int i, ret_len, start_check = 0;
305
306	if (data->dma_nents == 1)
307		return 1;
308
309	sgl = data->sg;
310	start_addr  = ib_sg_dma_address(ibdev, sgl);
311
312	for_each_sg(sgl, sg, data->dma_nents, i) {
313		if (start_check && !IS_4K_ALIGNED(start_addr))
314			break;
315
316		next_sg = sg_next(sg);
317		if (!next_sg)
318			break;
319
320		end_addr    = start_addr + ib_sg_dma_len(ibdev, sg);
321		start_addr  = ib_sg_dma_address(ibdev, next_sg);
322
323		if (end_addr == start_addr) {
324			start_check = 0;
325			continue;
326		} else
327			start_check = 1;
328
329		if (!IS_4K_ALIGNED(end_addr))
330			break;
331	}
332	ret_len = (next_sg) ? i : i+1;
333	iser_dbg("Found %d aligned entries out of %d in sg:0x%p\n",
334		 ret_len, data->dma_nents, data);
335	return ret_len;
336}
337
338static void iser_data_buf_dump(struct iser_data_buf *data,
339			       struct ib_device *ibdev)
340{
341	struct scatterlist *sg;
342	int i;
343
344	for_each_sg(data->sg, sg, data->dma_nents, i)
345		iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p "
346			 "off:0x%x sz:0x%x dma_len:0x%x\n",
347			 i, (unsigned long)ib_sg_dma_address(ibdev, sg),
348			 sg_page(sg), sg->offset,
349			 sg->length, ib_sg_dma_len(ibdev, sg));
350}
351
352static void iser_dump_page_vec(struct iser_page_vec *page_vec)
353{
354	int i;
355
356	iser_err("page vec length %d data size %d\n",
357		 page_vec->length, page_vec->data_size);
358	for (i = 0; i < page_vec->length; i++)
359		iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]);
360}
361
362int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
363			    struct iser_data_buf *data,
364			    enum iser_data_dir iser_dir,
365			    enum dma_data_direction dma_dir)
366{
367	struct ib_device *dev;
368
369	iser_task->dir[iser_dir] = 1;
370	dev = iser_task->iser_conn->ib_conn.device->ib_device;
371
372	data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size, dma_dir);
373	if (data->dma_nents == 0) {
374		iser_err("dma_map_sg failed!!!\n");
375		return -EINVAL;
376	}
377	return 0;
378}
379
380void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
381			      struct iser_data_buf *data,
382			      enum dma_data_direction dir)
383{
384	struct ib_device *dev;
385
386	dev = iser_task->iser_conn->ib_conn.device->ib_device;
387	ib_dma_unmap_sg(dev, data->sg, data->size, dir);
388}
389
390static int
391iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
392	     struct iser_mem_reg *reg)
393{
394	struct scatterlist *sg = mem->sg;
395
396	reg->sge.lkey = device->mr->lkey;
397	reg->rkey = device->mr->rkey;
398	reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]);
399	reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]);
400
401	iser_dbg("Single DMA entry: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
402		 " length=0x%x\n", reg->sge.lkey, reg->rkey,
403		 reg->sge.addr, reg->sge.length);
404
405	return 0;
406}
407
408static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
409			      struct iser_data_buf *mem,
410			      enum iser_data_dir cmd_dir,
411			      int aligned_len)
412{
413	struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
414	struct iser_device *device = iser_task->iser_conn->ib_conn.device;
415
416	iscsi_conn->fmr_unalign_cnt++;
417	iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
418		  aligned_len, mem->size);
419
420	if (iser_debug_level > 0)
421		iser_data_buf_dump(mem, device->ib_device);
422
423	/* unmap the command data before accessing it */
424	iser_dma_unmap_task_data(iser_task, mem,
425				 (cmd_dir == ISER_DIR_OUT) ?
426				 DMA_TO_DEVICE : DMA_FROM_DEVICE);
427
428	/* allocate copy buf, if we are writing, copy the */
429	/* unaligned scatterlist, dma map the copy        */
430	if (iser_start_rdma_unaligned_sg(iser_task, mem, cmd_dir) != 0)
431		return -ENOMEM;
432
433	return 0;
434}
435
436/**
437 * iser_reg_page_vec - Register physical memory
438 *
439 * returns: 0 on success, errno code on failure
440 */
441static
442int iser_reg_page_vec(struct iscsi_iser_task *iser_task,
443		      struct iser_data_buf *mem,
444		      struct iser_page_vec *page_vec,
445		      struct iser_mem_reg *mem_reg)
446{
447	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
448	struct iser_device *device = ib_conn->device;
449	struct ib_pool_fmr *fmr;
450	int ret, plen;
451
452	plen = iser_sg_to_page_vec(mem, device->ib_device,
453				   page_vec->pages,
454				   &page_vec->offset,
455				   &page_vec->data_size);
456	page_vec->length = plen;
457	if (plen * SIZE_4K < page_vec->data_size) {
458		iser_err("page vec too short to hold this SG\n");
459		iser_data_buf_dump(mem, device->ib_device);
460		iser_dump_page_vec(page_vec);
461		return -EINVAL;
462	}
463
464	fmr  = ib_fmr_pool_map_phys(ib_conn->fmr.pool,
465				    page_vec->pages,
466				    page_vec->length,
467				    page_vec->pages[0]);
468	if (IS_ERR(fmr)) {
469		ret = PTR_ERR(fmr);
470		iser_err("ib_fmr_pool_map_phys failed: %d\n", ret);
471		return ret;
472	}
473
474	mem_reg->sge.lkey = fmr->fmr->lkey;
475	mem_reg->rkey = fmr->fmr->rkey;
476	mem_reg->sge.addr = page_vec->pages[0] + page_vec->offset;
477	mem_reg->sge.length = page_vec->data_size;
478	mem_reg->mem_h = fmr;
479
480	return 0;
481}
482
483/**
484 * Unregister (previosuly registered using FMR) memory.
485 * If memory is non-FMR does nothing.
486 */
487void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
488			enum iser_data_dir cmd_dir)
489{
490	struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
491	int ret;
492
493	if (!reg->mem_h)
494		return;
495
496	iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n", reg->mem_h);
497
498	ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
499	if (ret)
500		iser_err("ib_fmr_pool_unmap failed %d\n", ret);
501
502	reg->mem_h = NULL;
503}
504
505void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
506			    enum iser_data_dir cmd_dir)
507{
508	struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
509
510	if (!reg->mem_h)
511		return;
512
513	iser_reg_desc_put(&iser_task->iser_conn->ib_conn,
514			  reg->mem_h);
515	reg->mem_h = NULL;
516}
517
518/**
519 * iser_reg_rdma_mem_fmr - Registers memory intended for RDMA,
520 * using FMR (if possible) obtaining rkey and va
521 *
522 * returns 0 on success, errno code on failure
523 */
524int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
525			  enum iser_data_dir cmd_dir)
526{
527	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
528	struct iser_device   *device = ib_conn->device;
529	struct ib_device     *ibdev = device->ib_device;
530	struct iser_data_buf *mem = &iser_task->data[cmd_dir];
531	struct iser_mem_reg *mem_reg;
532	int aligned_len;
533	int err;
534	int i;
535
536	mem_reg = &iser_task->rdma_reg[cmd_dir];
537
538	aligned_len = iser_data_buf_aligned_len(mem, ibdev);
539	if (aligned_len != mem->dma_nents) {
540		err = fall_to_bounce_buf(iser_task, mem,
541					 cmd_dir, aligned_len);
542		if (err) {
543			iser_err("failed to allocate bounce buffer\n");
544			return err;
545		}
546	}
547
548	/* if there a single dma entry, FMR is not needed */
549	if (mem->dma_nents == 1) {
550		return iser_reg_dma(device, mem, mem_reg);
551	} else { /* use FMR for multiple dma entries */
552		err = iser_reg_page_vec(iser_task, mem, ib_conn->fmr.page_vec,
553					mem_reg);
554		if (err && err != -EAGAIN) {
555			iser_data_buf_dump(mem, ibdev);
556			iser_err("mem->dma_nents = %d (dlength = 0x%x)\n",
557				 mem->dma_nents,
558				 ntoh24(iser_task->desc.iscsi_header.dlength));
559			iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
560				 ib_conn->fmr.page_vec->data_size,
561				 ib_conn->fmr.page_vec->length,
562				 ib_conn->fmr.page_vec->offset);
563			for (i = 0; i < ib_conn->fmr.page_vec->length; i++)
564				iser_err("page_vec[%d] = 0x%llx\n", i,
565					 (unsigned long long)ib_conn->fmr.page_vec->pages[i]);
566		}
567		if (err)
568			return err;
569	}
570	return 0;
571}
572
573static void
574iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
575		    struct ib_sig_domain *domain)
576{
577	domain->sig_type = IB_SIG_TYPE_T10_DIF;
578	domain->sig.dif.pi_interval = scsi_prot_interval(sc);
579	domain->sig.dif.ref_tag = scsi_prot_ref_tag(sc);
580	/*
581	 * At the moment we hard code those, but in the future
582	 * we will take them from sc.
583	 */
584	domain->sig.dif.apptag_check_mask = 0xffff;
585	domain->sig.dif.app_escape = true;
586	domain->sig.dif.ref_escape = true;
587	if (sc->prot_flags & SCSI_PROT_REF_INCREMENT)
588		domain->sig.dif.ref_remap = true;
589};
590
591static int
592iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
593{
594	switch (scsi_get_prot_op(sc)) {
595	case SCSI_PROT_WRITE_INSERT:
596	case SCSI_PROT_READ_STRIP:
597		sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
598		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
599		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
600		break;
601	case SCSI_PROT_READ_INSERT:
602	case SCSI_PROT_WRITE_STRIP:
603		sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
604		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
605		sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
606						IB_T10DIF_CSUM : IB_T10DIF_CRC;
607		break;
608	case SCSI_PROT_READ_PASS:
609	case SCSI_PROT_WRITE_PASS:
610		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
611		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
612		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
613		sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
614						IB_T10DIF_CSUM : IB_T10DIF_CRC;
615		break;
616	default:
617		iser_err("Unsupported PI operation %d\n",
618			 scsi_get_prot_op(sc));
619		return -EINVAL;
620	}
621
622	return 0;
623}
624
625static inline void
626iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
627{
628	*mask = 0;
629	if (sc->prot_flags & SCSI_PROT_REF_CHECK)
630		*mask |= ISER_CHECK_REFTAG;
631	if (sc->prot_flags & SCSI_PROT_GUARD_CHECK)
632		*mask |= ISER_CHECK_GUARD;
633}
634
635static void
636iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
637{
638	u32 rkey;
639
640	memset(inv_wr, 0, sizeof(*inv_wr));
641	inv_wr->opcode = IB_WR_LOCAL_INV;
642	inv_wr->wr_id = ISER_FASTREG_LI_WRID;
643	inv_wr->ex.invalidate_rkey = mr->rkey;
644
645	rkey = ib_inc_rkey(mr->rkey);
646	ib_update_fast_reg_key(mr, rkey);
647}
648
649static int
650iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
651		struct fast_reg_descriptor *desc,
652		struct iser_mem_reg *data_reg,
653		struct iser_mem_reg *prot_reg,
654		struct iser_mem_reg *sig_reg)
655{
656	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
657	struct iser_pi_context *pi_ctx = desc->pi_ctx;
658	struct ib_send_wr sig_wr, inv_wr;
659	struct ib_send_wr *bad_wr, *wr = NULL;
660	struct ib_sig_attrs sig_attrs;
661	int ret;
662
663	memset(&sig_attrs, 0, sizeof(sig_attrs));
664	ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs);
665	if (ret)
666		goto err;
667
668	iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask);
669
670	if (!(desc->reg_indicators & ISER_SIG_KEY_VALID)) {
671		iser_inv_rkey(&inv_wr, pi_ctx->sig_mr);
672		wr = &inv_wr;
673	}
674
675	memset(&sig_wr, 0, sizeof(sig_wr));
676	sig_wr.opcode = IB_WR_REG_SIG_MR;
677	sig_wr.wr_id = ISER_FASTREG_LI_WRID;
678	sig_wr.sg_list = &data_reg->sge;
679	sig_wr.num_sge = 1;
680	sig_wr.wr.sig_handover.sig_attrs = &sig_attrs;
681	sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr;
682	if (scsi_prot_sg_count(iser_task->sc))
683		sig_wr.wr.sig_handover.prot = &prot_reg->sge;
684	sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE |
685					      IB_ACCESS_REMOTE_READ |
686					      IB_ACCESS_REMOTE_WRITE;
687
688	if (!wr)
689		wr = &sig_wr;
690	else
691		wr->next = &sig_wr;
692
693	ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
694	if (ret) {
695		iser_err("reg_sig_mr failed, ret:%d\n", ret);
696		goto err;
697	}
698	desc->reg_indicators &= ~ISER_SIG_KEY_VALID;
699
700	sig_reg->sge.lkey = pi_ctx->sig_mr->lkey;
701	sig_reg->rkey = pi_ctx->sig_mr->rkey;
702	sig_reg->sge.addr = 0;
703	sig_reg->sge.length = scsi_transfer_length(iser_task->sc);
704
705	iser_dbg("sig_sge: lkey: 0x%x, rkey: 0x%x, addr: 0x%llx, length: %u\n",
706		 sig_reg->sge.lkey, sig_reg->rkey, sig_reg->sge.addr,
707		 sig_reg->sge.length);
708err:
709	return ret;
710}
711
712static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
713			    struct iser_data_buf *mem,
714			    struct fast_reg_descriptor *desc,
715			    enum iser_reg_indicator ind,
716			    struct iser_mem_reg *reg)
717{
718	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
719	struct iser_device *device = ib_conn->device;
720	struct ib_mr *mr;
721	struct ib_fast_reg_page_list *frpl;
722	struct ib_send_wr fastreg_wr, inv_wr;
723	struct ib_send_wr *bad_wr, *wr = NULL;
724	int ret, offset, size, plen;
725
726	/* if there a single dma entry, dma mr suffices */
727	if (mem->dma_nents == 1)
728		return iser_reg_dma(device, mem, reg);
729
730	if (ind == ISER_DATA_KEY_VALID) {
731		mr = desc->data_mr;
732		frpl = desc->data_frpl;
733	} else {
734		mr = desc->pi_ctx->prot_mr;
735		frpl = desc->pi_ctx->prot_frpl;
736	}
737
738	plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list,
739				   &offset, &size);
740	if (plen * SIZE_4K < size) {
741		iser_err("fast reg page_list too short to hold this SG\n");
742		return -EINVAL;
743	}
744
745	if (!(desc->reg_indicators & ind)) {
746		iser_inv_rkey(&inv_wr, mr);
747		wr = &inv_wr;
748	}
749
750	/* Prepare FASTREG WR */
751	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
752	fastreg_wr.wr_id = ISER_FASTREG_LI_WRID;
753	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
754	fastreg_wr.wr.fast_reg.iova_start = frpl->page_list[0] + offset;
755	fastreg_wr.wr.fast_reg.page_list = frpl;
756	fastreg_wr.wr.fast_reg.page_list_len = plen;
757	fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K;
758	fastreg_wr.wr.fast_reg.length = size;
759	fastreg_wr.wr.fast_reg.rkey = mr->rkey;
760	fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE  |
761					       IB_ACCESS_REMOTE_WRITE |
762					       IB_ACCESS_REMOTE_READ);
763
764	if (!wr)
765		wr = &fastreg_wr;
766	else
767		wr->next = &fastreg_wr;
768
769	ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
770	if (ret) {
771		iser_err("fast registration failed, ret:%d\n", ret);
772		return ret;
773	}
774	desc->reg_indicators &= ~ind;
775
776	reg->sge.lkey = mr->lkey;
777	reg->rkey = mr->rkey;
778	reg->sge.addr = frpl->page_list[0] + offset;
779	reg->sge.length = size;
780
781	return ret;
782}
783
784/**
785 * iser_reg_rdma_mem_fastreg - Registers memory intended for RDMA,
786 * using Fast Registration WR (if possible) obtaining rkey and va
787 *
788 * returns 0 on success, errno code on failure
789 */
790int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
791			      enum iser_data_dir cmd_dir)
792{
793	struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
794	struct iser_device *device = ib_conn->device;
795	struct ib_device *ibdev = device->ib_device;
796	struct iser_data_buf *mem = &iser_task->data[cmd_dir];
797	struct iser_mem_reg *mem_reg = &iser_task->rdma_reg[cmd_dir];
798	struct fast_reg_descriptor *desc = NULL;
799	int err, aligned_len;
800
801	aligned_len = iser_data_buf_aligned_len(mem, ibdev);
802	if (aligned_len != mem->dma_nents) {
803		err = fall_to_bounce_buf(iser_task, mem,
804					 cmd_dir, aligned_len);
805		if (err) {
806			iser_err("failed to allocate bounce buffer\n");
807			return err;
808		}
809	}
810
811	if (mem->dma_nents != 1 ||
812	    scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
813		desc = iser_reg_desc_get(ib_conn);
814		mem_reg->mem_h = desc;
815	}
816
817	err = iser_fast_reg_mr(iser_task, mem, desc,
818			       ISER_DATA_KEY_VALID, mem_reg);
819	if (err)
820		goto err_reg;
821
822	if (scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
823		struct iser_mem_reg prot_reg;
824
825		memset(&prot_reg, 0, sizeof(prot_reg));
826		if (scsi_prot_sg_count(iser_task->sc)) {
827			mem = &iser_task->prot[cmd_dir];
828			aligned_len = iser_data_buf_aligned_len(mem, ibdev);
829			if (aligned_len != mem->dma_nents) {
830				err = fall_to_bounce_buf(iser_task, mem,
831							 cmd_dir, aligned_len);
832				if (err) {
833					iser_err("failed to allocate bounce buffer\n");
834					return err;
835				}
836			}
837
838			err = iser_fast_reg_mr(iser_task, mem, desc,
839					       ISER_PROT_KEY_VALID, &prot_reg);
840			if (err)
841				goto err_reg;
842		}
843
844		err = iser_reg_sig_mr(iser_task, desc, mem_reg,
845				      &prot_reg, mem_reg);
846		if (err) {
847			iser_err("Failed to register signature mr\n");
848			return err;
849		}
850		desc->reg_indicators |= ISER_FASTREG_PROTECTED;
851	}
852
853	return 0;
854err_reg:
855	if (desc)
856		iser_reg_desc_put(ib_conn, desc);
857
858	return err;
859}
860