1/* 2 * Copyright (c) 2013 Intel Corporation. All rights reserved. 3 * Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved. 4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 35#include <linux/err.h> 36#include <linux/slab.h> 37#include <linux/vmalloc.h> 38#include <linux/kthread.h> 39 40#include "qib_verbs.h" 41#include "qib.h" 42 43/** 44 * qib_cq_enter - add a new entry to the completion queue 45 * @cq: completion queue 46 * @entry: work completion entry to add 47 * @sig: true if @entry is a solicitated entry 48 * 49 * This may be called with qp->s_lock held. 50 */ 51void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited) 52{ 53 struct qib_cq_wc *wc; 54 unsigned long flags; 55 u32 head; 56 u32 next; 57 58 spin_lock_irqsave(&cq->lock, flags); 59 60 /* 61 * Note that the head pointer might be writable by user processes. 62 * Take care to verify it is a sane value. 63 */ 64 wc = cq->queue; 65 head = wc->head; 66 if (head >= (unsigned) cq->ibcq.cqe) { 67 head = cq->ibcq.cqe; 68 next = 0; 69 } else 70 next = head + 1; 71 if (unlikely(next == wc->tail)) { 72 spin_unlock_irqrestore(&cq->lock, flags); 73 if (cq->ibcq.event_handler) { 74 struct ib_event ev; 75 76 ev.device = cq->ibcq.device; 77 ev.element.cq = &cq->ibcq; 78 ev.event = IB_EVENT_CQ_ERR; 79 cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); 80 } 81 return; 82 } 83 if (cq->ip) { 84 wc->uqueue[head].wr_id = entry->wr_id; 85 wc->uqueue[head].status = entry->status; 86 wc->uqueue[head].opcode = entry->opcode; 87 wc->uqueue[head].vendor_err = entry->vendor_err; 88 wc->uqueue[head].byte_len = entry->byte_len; 89 wc->uqueue[head].ex.imm_data = 90 (__u32 __force)entry->ex.imm_data; 91 wc->uqueue[head].qp_num = entry->qp->qp_num; 92 wc->uqueue[head].src_qp = entry->src_qp; 93 wc->uqueue[head].wc_flags = entry->wc_flags; 94 wc->uqueue[head].pkey_index = entry->pkey_index; 95 wc->uqueue[head].slid = entry->slid; 96 wc->uqueue[head].sl = entry->sl; 97 wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; 98 wc->uqueue[head].port_num = entry->port_num; 99 /* Make sure entry is written before the head index. */ 100 smp_wmb(); 101 } else 102 wc->kqueue[head] = *entry; 103 wc->head = next; 104 105 if (cq->notify == IB_CQ_NEXT_COMP || 106 (cq->notify == IB_CQ_SOLICITED && 107 (solicited || entry->status != IB_WC_SUCCESS))) { 108 struct kthread_worker *worker; 109 /* 110 * This will cause send_complete() to be called in 111 * another thread. 112 */ 113 smp_rmb(); 114 worker = cq->dd->worker; 115 if (likely(worker)) { 116 cq->notify = IB_CQ_NONE; 117 cq->triggered++; 118 queue_kthread_work(worker, &cq->comptask); 119 } 120 } 121 122 spin_unlock_irqrestore(&cq->lock, flags); 123} 124 125/** 126 * qib_poll_cq - poll for work completion entries 127 * @ibcq: the completion queue to poll 128 * @num_entries: the maximum number of entries to return 129 * @entry: pointer to array where work completions are placed 130 * 131 * Returns the number of completion entries polled. 132 * 133 * This may be called from interrupt context. Also called by ib_poll_cq() 134 * in the generic verbs code. 135 */ 136int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) 137{ 138 struct qib_cq *cq = to_icq(ibcq); 139 struct qib_cq_wc *wc; 140 unsigned long flags; 141 int npolled; 142 u32 tail; 143 144 /* The kernel can only poll a kernel completion queue */ 145 if (cq->ip) { 146 npolled = -EINVAL; 147 goto bail; 148 } 149 150 spin_lock_irqsave(&cq->lock, flags); 151 152 wc = cq->queue; 153 tail = wc->tail; 154 if (tail > (u32) cq->ibcq.cqe) 155 tail = (u32) cq->ibcq.cqe; 156 for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { 157 if (tail == wc->head) 158 break; 159 /* The kernel doesn't need a RMB since it has the lock. */ 160 *entry = wc->kqueue[tail]; 161 if (tail >= cq->ibcq.cqe) 162 tail = 0; 163 else 164 tail++; 165 } 166 wc->tail = tail; 167 168 spin_unlock_irqrestore(&cq->lock, flags); 169 170bail: 171 return npolled; 172} 173 174static void send_complete(struct kthread_work *work) 175{ 176 struct qib_cq *cq = container_of(work, struct qib_cq, comptask); 177 178 /* 179 * The completion handler will most likely rearm the notification 180 * and poll for all pending entries. If a new completion entry 181 * is added while we are in this routine, queue_work() 182 * won't call us again until we return so we check triggered to 183 * see if we need to call the handler again. 184 */ 185 for (;;) { 186 u8 triggered = cq->triggered; 187 188 /* 189 * IPoIB connected mode assumes the callback is from a 190 * soft IRQ. We simulate this by blocking "bottom halves". 191 * See the implementation for ipoib_cm_handle_tx_wc(), 192 * netif_tx_lock_bh() and netif_tx_lock(). 193 */ 194 local_bh_disable(); 195 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); 196 local_bh_enable(); 197 198 if (cq->triggered == triggered) 199 return; 200 } 201} 202 203/** 204 * qib_create_cq - create a completion queue 205 * @ibdev: the device this completion queue is attached to 206 * @entries: the minimum size of the completion queue 207 * @context: unused by the QLogic_IB driver 208 * @udata: user data for libibverbs.so 209 * 210 * Returns a pointer to the completion queue or negative errno values 211 * for failure. 212 * 213 * Called by ib_create_cq() in the generic verbs code. 214 */ 215struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries, 216 int comp_vector, struct ib_ucontext *context, 217 struct ib_udata *udata) 218{ 219 struct qib_ibdev *dev = to_idev(ibdev); 220 struct qib_cq *cq; 221 struct qib_cq_wc *wc; 222 struct ib_cq *ret; 223 u32 sz; 224 225 if (entries < 1 || entries > ib_qib_max_cqes) { 226 ret = ERR_PTR(-EINVAL); 227 goto done; 228 } 229 230 /* Allocate the completion queue structure. */ 231 cq = kmalloc(sizeof(*cq), GFP_KERNEL); 232 if (!cq) { 233 ret = ERR_PTR(-ENOMEM); 234 goto done; 235 } 236 237 /* 238 * Allocate the completion queue entries and head/tail pointers. 239 * This is allocated separately so that it can be resized and 240 * also mapped into user space. 241 * We need to use vmalloc() in order to support mmap and large 242 * numbers of entries. 243 */ 244 sz = sizeof(*wc); 245 if (udata && udata->outlen >= sizeof(__u64)) 246 sz += sizeof(struct ib_uverbs_wc) * (entries + 1); 247 else 248 sz += sizeof(struct ib_wc) * (entries + 1); 249 wc = vmalloc_user(sz); 250 if (!wc) { 251 ret = ERR_PTR(-ENOMEM); 252 goto bail_cq; 253 } 254 255 /* 256 * Return the address of the WC as the offset to mmap. 257 * See qib_mmap() for details. 258 */ 259 if (udata && udata->outlen >= sizeof(__u64)) { 260 int err; 261 262 cq->ip = qib_create_mmap_info(dev, sz, context, wc); 263 if (!cq->ip) { 264 ret = ERR_PTR(-ENOMEM); 265 goto bail_wc; 266 } 267 268 err = ib_copy_to_udata(udata, &cq->ip->offset, 269 sizeof(cq->ip->offset)); 270 if (err) { 271 ret = ERR_PTR(err); 272 goto bail_ip; 273 } 274 } else 275 cq->ip = NULL; 276 277 spin_lock(&dev->n_cqs_lock); 278 if (dev->n_cqs_allocated == ib_qib_max_cqs) { 279 spin_unlock(&dev->n_cqs_lock); 280 ret = ERR_PTR(-ENOMEM); 281 goto bail_ip; 282 } 283 284 dev->n_cqs_allocated++; 285 spin_unlock(&dev->n_cqs_lock); 286 287 if (cq->ip) { 288 spin_lock_irq(&dev->pending_lock); 289 list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps); 290 spin_unlock_irq(&dev->pending_lock); 291 } 292 293 /* 294 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. 295 * The number of entries should be >= the number requested or return 296 * an error. 297 */ 298 cq->dd = dd_from_dev(dev); 299 cq->ibcq.cqe = entries; 300 cq->notify = IB_CQ_NONE; 301 cq->triggered = 0; 302 spin_lock_init(&cq->lock); 303 init_kthread_work(&cq->comptask, send_complete); 304 wc->head = 0; 305 wc->tail = 0; 306 cq->queue = wc; 307 308 ret = &cq->ibcq; 309 310 goto done; 311 312bail_ip: 313 kfree(cq->ip); 314bail_wc: 315 vfree(wc); 316bail_cq: 317 kfree(cq); 318done: 319 return ret; 320} 321 322/** 323 * qib_destroy_cq - destroy a completion queue 324 * @ibcq: the completion queue to destroy. 325 * 326 * Returns 0 for success. 327 * 328 * Called by ib_destroy_cq() in the generic verbs code. 329 */ 330int qib_destroy_cq(struct ib_cq *ibcq) 331{ 332 struct qib_ibdev *dev = to_idev(ibcq->device); 333 struct qib_cq *cq = to_icq(ibcq); 334 335 flush_kthread_work(&cq->comptask); 336 spin_lock(&dev->n_cqs_lock); 337 dev->n_cqs_allocated--; 338 spin_unlock(&dev->n_cqs_lock); 339 if (cq->ip) 340 kref_put(&cq->ip->ref, qib_release_mmap_info); 341 else 342 vfree(cq->queue); 343 kfree(cq); 344 345 return 0; 346} 347 348/** 349 * qib_req_notify_cq - change the notification type for a completion queue 350 * @ibcq: the completion queue 351 * @notify_flags: the type of notification to request 352 * 353 * Returns 0 for success. 354 * 355 * This may be called from interrupt context. Also called by 356 * ib_req_notify_cq() in the generic verbs code. 357 */ 358int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) 359{ 360 struct qib_cq *cq = to_icq(ibcq); 361 unsigned long flags; 362 int ret = 0; 363 364 spin_lock_irqsave(&cq->lock, flags); 365 /* 366 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow 367 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2). 368 */ 369 if (cq->notify != IB_CQ_NEXT_COMP) 370 cq->notify = notify_flags & IB_CQ_SOLICITED_MASK; 371 372 if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && 373 cq->queue->head != cq->queue->tail) 374 ret = 1; 375 376 spin_unlock_irqrestore(&cq->lock, flags); 377 378 return ret; 379} 380 381/** 382 * qib_resize_cq - change the size of the CQ 383 * @ibcq: the completion queue 384 * 385 * Returns 0 for success. 386 */ 387int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) 388{ 389 struct qib_cq *cq = to_icq(ibcq); 390 struct qib_cq_wc *old_wc; 391 struct qib_cq_wc *wc; 392 u32 head, tail, n; 393 int ret; 394 u32 sz; 395 396 if (cqe < 1 || cqe > ib_qib_max_cqes) { 397 ret = -EINVAL; 398 goto bail; 399 } 400 401 /* 402 * Need to use vmalloc() if we want to support large #s of entries. 403 */ 404 sz = sizeof(*wc); 405 if (udata && udata->outlen >= sizeof(__u64)) 406 sz += sizeof(struct ib_uverbs_wc) * (cqe + 1); 407 else 408 sz += sizeof(struct ib_wc) * (cqe + 1); 409 wc = vmalloc_user(sz); 410 if (!wc) { 411 ret = -ENOMEM; 412 goto bail; 413 } 414 415 /* Check that we can write the offset to mmap. */ 416 if (udata && udata->outlen >= sizeof(__u64)) { 417 __u64 offset = 0; 418 419 ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); 420 if (ret) 421 goto bail_free; 422 } 423 424 spin_lock_irq(&cq->lock); 425 /* 426 * Make sure head and tail are sane since they 427 * might be user writable. 428 */ 429 old_wc = cq->queue; 430 head = old_wc->head; 431 if (head > (u32) cq->ibcq.cqe) 432 head = (u32) cq->ibcq.cqe; 433 tail = old_wc->tail; 434 if (tail > (u32) cq->ibcq.cqe) 435 tail = (u32) cq->ibcq.cqe; 436 if (head < tail) 437 n = cq->ibcq.cqe + 1 + head - tail; 438 else 439 n = head - tail; 440 if (unlikely((u32)cqe < n)) { 441 ret = -EINVAL; 442 goto bail_unlock; 443 } 444 for (n = 0; tail != head; n++) { 445 if (cq->ip) 446 wc->uqueue[n] = old_wc->uqueue[tail]; 447 else 448 wc->kqueue[n] = old_wc->kqueue[tail]; 449 if (tail == (u32) cq->ibcq.cqe) 450 tail = 0; 451 else 452 tail++; 453 } 454 cq->ibcq.cqe = cqe; 455 wc->head = n; 456 wc->tail = 0; 457 cq->queue = wc; 458 spin_unlock_irq(&cq->lock); 459 460 vfree(old_wc); 461 462 if (cq->ip) { 463 struct qib_ibdev *dev = to_idev(ibcq->device); 464 struct qib_mmap_info *ip = cq->ip; 465 466 qib_update_mmap_info(dev, ip, sz, wc); 467 468 /* 469 * Return the offset to mmap. 470 * See qib_mmap() for details. 471 */ 472 if (udata && udata->outlen >= sizeof(__u64)) { 473 ret = ib_copy_to_udata(udata, &ip->offset, 474 sizeof(ip->offset)); 475 if (ret) 476 goto bail; 477 } 478 479 spin_lock_irq(&dev->pending_lock); 480 if (list_empty(&ip->pending_mmaps)) 481 list_add(&ip->pending_mmaps, &dev->pending_mmaps); 482 spin_unlock_irq(&dev->pending_lock); 483 } 484 485 ret = 0; 486 goto bail; 487 488bail_unlock: 489 spin_unlock_irq(&cq->lock); 490bail_free: 491 vfree(wc); 492bail: 493 return ret; 494} 495 496int qib_cq_init(struct qib_devdata *dd) 497{ 498 int ret = 0; 499 int cpu; 500 struct task_struct *task; 501 502 if (dd->worker) 503 return 0; 504 dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL); 505 if (!dd->worker) 506 return -ENOMEM; 507 init_kthread_worker(dd->worker); 508 task = kthread_create_on_node( 509 kthread_worker_fn, 510 dd->worker, 511 dd->assigned_node_id, 512 "qib_cq%d", dd->unit); 513 if (IS_ERR(task)) 514 goto task_fail; 515 cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id)); 516 kthread_bind(task, cpu); 517 wake_up_process(task); 518out: 519 return ret; 520task_fail: 521 ret = PTR_ERR(task); 522 kfree(dd->worker); 523 dd->worker = NULL; 524 goto out; 525} 526 527void qib_cq_exit(struct qib_devdata *dd) 528{ 529 struct kthread_worker *worker; 530 531 worker = dd->worker; 532 if (!worker) 533 return; 534 /* blocks future queuing from send_complete() */ 535 dd->worker = NULL; 536 smp_wmb(); 537 flush_kthread_worker(worker); 538 kthread_stop(worker->task); 539 kfree(worker); 540} 541