1/* 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 34#include <linux/kref.h> 35#include <linux/random.h> 36#include <linux/debugfs.h> 37#include <linux/export.h> 38#include <linux/delay.h> 39#include <rdma/ib_umem.h> 40#include <rdma/ib_umem_odp.h> 41#include <rdma/ib_verbs.h> 42#include "mlx5_ib.h" 43 44enum { 45 MAX_PENDING_REG_MR = 8, 46}; 47 48#define MLX5_UMR_ALIGN 2048 49#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 50static __be64 mlx5_ib_update_mtt_emergency_buffer[ 51 MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)] 52 __aligned(MLX5_UMR_ALIGN); 53static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex); 54#endif 55 56static int clean_mr(struct mlx5_ib_mr *mr); 57 58static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 59{ 60 int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr); 61 62#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 63 /* Wait until all page fault handlers using the mr complete. */ 64 synchronize_srcu(&dev->mr_srcu); 65#endif 66 67 return err; 68} 69 70static int order2idx(struct mlx5_ib_dev *dev, int order) 71{ 72 struct mlx5_mr_cache *cache = &dev->cache; 73 74 if (order < cache->ent[0].order) 75 return 0; 76 else 77 return order - cache->ent[0].order; 78} 79 80static void reg_mr_callback(int status, void *context) 81{ 82 struct mlx5_ib_mr *mr = context; 83 struct mlx5_ib_dev *dev = mr->dev; 84 struct mlx5_mr_cache *cache = &dev->cache; 85 int c = order2idx(dev, mr->order); 86 struct mlx5_cache_ent *ent = &cache->ent[c]; 87 u8 key; 88 unsigned long flags; 89 struct mlx5_mr_table *table = &dev->mdev->priv.mr_table; 90 int err; 91 92 spin_lock_irqsave(&ent->lock, flags); 93 ent->pending--; 94 spin_unlock_irqrestore(&ent->lock, flags); 95 if (status) { 96 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); 97 kfree(mr); 98 dev->fill_delay = 1; 99 mod_timer(&dev->delay_timer, jiffies + HZ); 100 return; 101 } 102 103 if (mr->out.hdr.status) { 104 mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n", 105 mr->out.hdr.status, 106 be32_to_cpu(mr->out.hdr.syndrome)); 107 kfree(mr); 108 dev->fill_delay = 1; 109 mod_timer(&dev->delay_timer, jiffies + HZ); 110 return; 111 } 112 113 spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); 114 key = dev->mdev->priv.mkey_key++; 115 spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); 116 mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key; 117 118 cache->last_add = jiffies; 119 120 spin_lock_irqsave(&ent->lock, flags); 121 list_add_tail(&mr->list, &ent->head); 122 ent->cur++; 123 ent->size++; 124 spin_unlock_irqrestore(&ent->lock, flags); 125 126 write_lock_irqsave(&table->lock, flags); 127 err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key), 128 &mr->mmr); 129 if (err) 130 pr_err("Error inserting to mr tree. 0x%x\n", -err); 131 write_unlock_irqrestore(&table->lock, flags); 132} 133 134static int add_keys(struct mlx5_ib_dev *dev, int c, int num) 135{ 136 struct mlx5_mr_cache *cache = &dev->cache; 137 struct mlx5_cache_ent *ent = &cache->ent[c]; 138 struct mlx5_create_mkey_mbox_in *in; 139 struct mlx5_ib_mr *mr; 140 int npages = 1 << ent->order; 141 int err = 0; 142 int i; 143 144 in = kzalloc(sizeof(*in), GFP_KERNEL); 145 if (!in) 146 return -ENOMEM; 147 148 for (i = 0; i < num; i++) { 149 if (ent->pending >= MAX_PENDING_REG_MR) { 150 err = -EAGAIN; 151 break; 152 } 153 154 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 155 if (!mr) { 156 err = -ENOMEM; 157 break; 158 } 159 mr->order = ent->order; 160 mr->umred = 1; 161 mr->dev = dev; 162 in->seg.status = MLX5_MKEY_STATUS_FREE; 163 in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2); 164 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 165 in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN; 166 in->seg.log2_page_size = 12; 167 168 spin_lock_irq(&ent->lock); 169 ent->pending++; 170 spin_unlock_irq(&ent->lock); 171 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, 172 sizeof(*in), reg_mr_callback, 173 mr, &mr->out); 174 if (err) { 175 spin_lock_irq(&ent->lock); 176 ent->pending--; 177 spin_unlock_irq(&ent->lock); 178 mlx5_ib_warn(dev, "create mkey failed %d\n", err); 179 kfree(mr); 180 break; 181 } 182 } 183 184 kfree(in); 185 return err; 186} 187 188static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) 189{ 190 struct mlx5_mr_cache *cache = &dev->cache; 191 struct mlx5_cache_ent *ent = &cache->ent[c]; 192 struct mlx5_ib_mr *mr; 193 int err; 194 int i; 195 196 for (i = 0; i < num; i++) { 197 spin_lock_irq(&ent->lock); 198 if (list_empty(&ent->head)) { 199 spin_unlock_irq(&ent->lock); 200 return; 201 } 202 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 203 list_del(&mr->list); 204 ent->cur--; 205 ent->size--; 206 spin_unlock_irq(&ent->lock); 207 err = destroy_mkey(dev, mr); 208 if (err) 209 mlx5_ib_warn(dev, "failed destroy mkey\n"); 210 else 211 kfree(mr); 212 } 213} 214 215static ssize_t size_write(struct file *filp, const char __user *buf, 216 size_t count, loff_t *pos) 217{ 218 struct mlx5_cache_ent *ent = filp->private_data; 219 struct mlx5_ib_dev *dev = ent->dev; 220 char lbuf[20]; 221 u32 var; 222 int err; 223 int c; 224 225 if (copy_from_user(lbuf, buf, sizeof(lbuf))) 226 return -EFAULT; 227 228 c = order2idx(dev, ent->order); 229 lbuf[sizeof(lbuf) - 1] = 0; 230 231 if (sscanf(lbuf, "%u", &var) != 1) 232 return -EINVAL; 233 234 if (var < ent->limit) 235 return -EINVAL; 236 237 if (var > ent->size) { 238 do { 239 err = add_keys(dev, c, var - ent->size); 240 if (err && err != -EAGAIN) 241 return err; 242 243 usleep_range(3000, 5000); 244 } while (err); 245 } else if (var < ent->size) { 246 remove_keys(dev, c, ent->size - var); 247 } 248 249 return count; 250} 251 252static ssize_t size_read(struct file *filp, char __user *buf, size_t count, 253 loff_t *pos) 254{ 255 struct mlx5_cache_ent *ent = filp->private_data; 256 char lbuf[20]; 257 int err; 258 259 if (*pos) 260 return 0; 261 262 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size); 263 if (err < 0) 264 return err; 265 266 if (copy_to_user(buf, lbuf, err)) 267 return -EFAULT; 268 269 *pos += err; 270 271 return err; 272} 273 274static const struct file_operations size_fops = { 275 .owner = THIS_MODULE, 276 .open = simple_open, 277 .write = size_write, 278 .read = size_read, 279}; 280 281static ssize_t limit_write(struct file *filp, const char __user *buf, 282 size_t count, loff_t *pos) 283{ 284 struct mlx5_cache_ent *ent = filp->private_data; 285 struct mlx5_ib_dev *dev = ent->dev; 286 char lbuf[20]; 287 u32 var; 288 int err; 289 int c; 290 291 if (copy_from_user(lbuf, buf, sizeof(lbuf))) 292 return -EFAULT; 293 294 c = order2idx(dev, ent->order); 295 lbuf[sizeof(lbuf) - 1] = 0; 296 297 if (sscanf(lbuf, "%u", &var) != 1) 298 return -EINVAL; 299 300 if (var > ent->size) 301 return -EINVAL; 302 303 ent->limit = var; 304 305 if (ent->cur < ent->limit) { 306 err = add_keys(dev, c, 2 * ent->limit - ent->cur); 307 if (err) 308 return err; 309 } 310 311 return count; 312} 313 314static ssize_t limit_read(struct file *filp, char __user *buf, size_t count, 315 loff_t *pos) 316{ 317 struct mlx5_cache_ent *ent = filp->private_data; 318 char lbuf[20]; 319 int err; 320 321 if (*pos) 322 return 0; 323 324 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); 325 if (err < 0) 326 return err; 327 328 if (copy_to_user(buf, lbuf, err)) 329 return -EFAULT; 330 331 *pos += err; 332 333 return err; 334} 335 336static const struct file_operations limit_fops = { 337 .owner = THIS_MODULE, 338 .open = simple_open, 339 .write = limit_write, 340 .read = limit_read, 341}; 342 343static int someone_adding(struct mlx5_mr_cache *cache) 344{ 345 int i; 346 347 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 348 if (cache->ent[i].cur < cache->ent[i].limit) 349 return 1; 350 } 351 352 return 0; 353} 354 355static void __cache_work_func(struct mlx5_cache_ent *ent) 356{ 357 struct mlx5_ib_dev *dev = ent->dev; 358 struct mlx5_mr_cache *cache = &dev->cache; 359 int i = order2idx(dev, ent->order); 360 int err; 361 362 if (cache->stopped) 363 return; 364 365 ent = &dev->cache.ent[i]; 366 if (ent->cur < 2 * ent->limit && !dev->fill_delay) { 367 err = add_keys(dev, i, 1); 368 if (ent->cur < 2 * ent->limit) { 369 if (err == -EAGAIN) { 370 mlx5_ib_dbg(dev, "returned eagain, order %d\n", 371 i + 2); 372 queue_delayed_work(cache->wq, &ent->dwork, 373 msecs_to_jiffies(3)); 374 } else if (err) { 375 mlx5_ib_warn(dev, "command failed order %d, err %d\n", 376 i + 2, err); 377 queue_delayed_work(cache->wq, &ent->dwork, 378 msecs_to_jiffies(1000)); 379 } else { 380 queue_work(cache->wq, &ent->work); 381 } 382 } 383 } else if (ent->cur > 2 * ent->limit) { 384 if (!someone_adding(cache) && 385 time_after(jiffies, cache->last_add + 300 * HZ)) { 386 remove_keys(dev, i, 1); 387 if (ent->cur > ent->limit) 388 queue_work(cache->wq, &ent->work); 389 } else { 390 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); 391 } 392 } 393} 394 395static void delayed_cache_work_func(struct work_struct *work) 396{ 397 struct mlx5_cache_ent *ent; 398 399 ent = container_of(work, struct mlx5_cache_ent, dwork.work); 400 __cache_work_func(ent); 401} 402 403static void cache_work_func(struct work_struct *work) 404{ 405 struct mlx5_cache_ent *ent; 406 407 ent = container_of(work, struct mlx5_cache_ent, work); 408 __cache_work_func(ent); 409} 410 411static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) 412{ 413 struct mlx5_mr_cache *cache = &dev->cache; 414 struct mlx5_ib_mr *mr = NULL; 415 struct mlx5_cache_ent *ent; 416 int c; 417 int i; 418 419 c = order2idx(dev, order); 420 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { 421 mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); 422 return NULL; 423 } 424 425 for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) { 426 ent = &cache->ent[i]; 427 428 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); 429 430 spin_lock_irq(&ent->lock); 431 if (!list_empty(&ent->head)) { 432 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, 433 list); 434 list_del(&mr->list); 435 ent->cur--; 436 spin_unlock_irq(&ent->lock); 437 if (ent->cur < ent->limit) 438 queue_work(cache->wq, &ent->work); 439 break; 440 } 441 spin_unlock_irq(&ent->lock); 442 443 queue_work(cache->wq, &ent->work); 444 445 if (mr) 446 break; 447 } 448 449 if (!mr) 450 cache->ent[c].miss++; 451 452 return mr; 453} 454 455static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 456{ 457 struct mlx5_mr_cache *cache = &dev->cache; 458 struct mlx5_cache_ent *ent; 459 int shrink = 0; 460 int c; 461 462 c = order2idx(dev, mr->order); 463 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { 464 mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c); 465 return; 466 } 467 ent = &cache->ent[c]; 468 spin_lock_irq(&ent->lock); 469 list_add_tail(&mr->list, &ent->head); 470 ent->cur++; 471 if (ent->cur > 2 * ent->limit) 472 shrink = 1; 473 spin_unlock_irq(&ent->lock); 474 475 if (shrink) 476 queue_work(cache->wq, &ent->work); 477} 478 479static void clean_keys(struct mlx5_ib_dev *dev, int c) 480{ 481 struct mlx5_mr_cache *cache = &dev->cache; 482 struct mlx5_cache_ent *ent = &cache->ent[c]; 483 struct mlx5_ib_mr *mr; 484 int err; 485 486 cancel_delayed_work(&ent->dwork); 487 while (1) { 488 spin_lock_irq(&ent->lock); 489 if (list_empty(&ent->head)) { 490 spin_unlock_irq(&ent->lock); 491 return; 492 } 493 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 494 list_del(&mr->list); 495 ent->cur--; 496 ent->size--; 497 spin_unlock_irq(&ent->lock); 498 err = destroy_mkey(dev, mr); 499 if (err) 500 mlx5_ib_warn(dev, "failed destroy mkey\n"); 501 else 502 kfree(mr); 503 } 504} 505 506static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) 507{ 508 struct mlx5_mr_cache *cache = &dev->cache; 509 struct mlx5_cache_ent *ent; 510 int i; 511 512 if (!mlx5_debugfs_root) 513 return 0; 514 515 cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root); 516 if (!cache->root) 517 return -ENOMEM; 518 519 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 520 ent = &cache->ent[i]; 521 sprintf(ent->name, "%d", ent->order); 522 ent->dir = debugfs_create_dir(ent->name, cache->root); 523 if (!ent->dir) 524 return -ENOMEM; 525 526 ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent, 527 &size_fops); 528 if (!ent->fsize) 529 return -ENOMEM; 530 531 ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent, 532 &limit_fops); 533 if (!ent->flimit) 534 return -ENOMEM; 535 536 ent->fcur = debugfs_create_u32("cur", 0400, ent->dir, 537 &ent->cur); 538 if (!ent->fcur) 539 return -ENOMEM; 540 541 ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir, 542 &ent->miss); 543 if (!ent->fmiss) 544 return -ENOMEM; 545 } 546 547 return 0; 548} 549 550static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) 551{ 552 if (!mlx5_debugfs_root) 553 return; 554 555 debugfs_remove_recursive(dev->cache.root); 556} 557 558static void delay_time_func(unsigned long ctx) 559{ 560 struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx; 561 562 dev->fill_delay = 0; 563} 564 565int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) 566{ 567 struct mlx5_mr_cache *cache = &dev->cache; 568 struct mlx5_cache_ent *ent; 569 int limit; 570 int err; 571 int i; 572 573 cache->wq = create_singlethread_workqueue("mkey_cache"); 574 if (!cache->wq) { 575 mlx5_ib_warn(dev, "failed to create work queue\n"); 576 return -ENOMEM; 577 } 578 579 setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev); 580 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 581 INIT_LIST_HEAD(&cache->ent[i].head); 582 spin_lock_init(&cache->ent[i].lock); 583 584 ent = &cache->ent[i]; 585 INIT_LIST_HEAD(&ent->head); 586 spin_lock_init(&ent->lock); 587 ent->order = i + 2; 588 ent->dev = dev; 589 590 if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) 591 limit = dev->mdev->profile->mr_cache[i].limit; 592 else 593 limit = 0; 594 595 INIT_WORK(&ent->work, cache_work_func); 596 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); 597 ent->limit = limit; 598 queue_work(cache->wq, &ent->work); 599 } 600 601 err = mlx5_mr_cache_debugfs_init(dev); 602 if (err) 603 mlx5_ib_warn(dev, "cache debugfs failure\n"); 604 605 return 0; 606} 607 608int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) 609{ 610 int i; 611 612 dev->cache.stopped = 1; 613 flush_workqueue(dev->cache.wq); 614 615 mlx5_mr_cache_debugfs_cleanup(dev); 616 617 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) 618 clean_keys(dev, i); 619 620 destroy_workqueue(dev->cache.wq); 621 del_timer_sync(&dev->delay_timer); 622 623 return 0; 624} 625 626struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) 627{ 628 struct mlx5_ib_dev *dev = to_mdev(pd->device); 629 struct mlx5_core_dev *mdev = dev->mdev; 630 struct mlx5_create_mkey_mbox_in *in; 631 struct mlx5_mkey_seg *seg; 632 struct mlx5_ib_mr *mr; 633 int err; 634 635 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 636 if (!mr) 637 return ERR_PTR(-ENOMEM); 638 639 in = kzalloc(sizeof(*in), GFP_KERNEL); 640 if (!in) { 641 err = -ENOMEM; 642 goto err_free; 643 } 644 645 seg = &in->seg; 646 seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA; 647 seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64); 648 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 649 seg->start_addr = 0; 650 651 err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL, 652 NULL); 653 if (err) 654 goto err_in; 655 656 kfree(in); 657 mr->ibmr.lkey = mr->mmr.key; 658 mr->ibmr.rkey = mr->mmr.key; 659 mr->umem = NULL; 660 661 return &mr->ibmr; 662 663err_in: 664 kfree(in); 665 666err_free: 667 kfree(mr); 668 669 return ERR_PTR(err); 670} 671 672static int get_octo_len(u64 addr, u64 len, int page_size) 673{ 674 u64 offset; 675 int npages; 676 677 offset = addr & (page_size - 1); 678 npages = ALIGN(len + offset, page_size) >> ilog2(page_size); 679 return (npages + 1) / 2; 680} 681 682static int use_umr(int order) 683{ 684 return order <= MLX5_MAX_UMR_SHIFT; 685} 686 687static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, 688 struct ib_sge *sg, u64 dma, int n, u32 key, 689 int page_shift, u64 virt_addr, u64 len, 690 int access_flags) 691{ 692 struct mlx5_ib_dev *dev = to_mdev(pd->device); 693 struct ib_mr *mr = dev->umrc.mr; 694 struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; 695 696 sg->addr = dma; 697 sg->length = ALIGN(sizeof(u64) * n, 64); 698 sg->lkey = mr->lkey; 699 700 wr->next = NULL; 701 wr->send_flags = 0; 702 wr->sg_list = sg; 703 if (n) 704 wr->num_sge = 1; 705 else 706 wr->num_sge = 0; 707 708 wr->opcode = MLX5_IB_WR_UMR; 709 710 umrwr->npages = n; 711 umrwr->page_shift = page_shift; 712 umrwr->mkey = key; 713 umrwr->target.virt_addr = virt_addr; 714 umrwr->length = len; 715 umrwr->access_flags = access_flags; 716 umrwr->pd = pd; 717} 718 719static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, 720 struct ib_send_wr *wr, u32 key) 721{ 722 struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; 723 724 wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE; 725 wr->opcode = MLX5_IB_WR_UMR; 726 umrwr->mkey = key; 727} 728 729void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) 730{ 731 struct mlx5_ib_umr_context *context; 732 struct ib_wc wc; 733 int err; 734 735 while (1) { 736 err = ib_poll_cq(cq, 1, &wc); 737 if (err < 0) { 738 pr_warn("poll cq error %d\n", err); 739 return; 740 } 741 if (err == 0) 742 break; 743 744 context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id; 745 context->status = wc.status; 746 complete(&context->done); 747 } 748 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 749} 750 751static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, 752 u64 virt_addr, u64 len, int npages, 753 int page_shift, int order, int access_flags) 754{ 755 struct mlx5_ib_dev *dev = to_mdev(pd->device); 756 struct device *ddev = dev->ib_dev.dma_device; 757 struct umr_common *umrc = &dev->umrc; 758 struct mlx5_ib_umr_context umr_context; 759 struct ib_send_wr wr, *bad; 760 struct mlx5_ib_mr *mr; 761 struct ib_sge sg; 762 int size; 763 __be64 *mr_pas; 764 __be64 *pas; 765 dma_addr_t dma; 766 int err = 0; 767 int i; 768 769 for (i = 0; i < 1; i++) { 770 mr = alloc_cached_mr(dev, order); 771 if (mr) 772 break; 773 774 err = add_keys(dev, order2idx(dev, order), 1); 775 if (err && err != -EAGAIN) { 776 mlx5_ib_warn(dev, "add_keys failed, err %d\n", err); 777 break; 778 } 779 } 780 781 if (!mr) 782 return ERR_PTR(-EAGAIN); 783 784 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. 785 * To avoid copying garbage after the pas array, we allocate 786 * a little more. */ 787 size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT); 788 mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); 789 if (!mr_pas) { 790 err = -ENOMEM; 791 goto free_mr; 792 } 793 794 pas = PTR_ALIGN(mr_pas, MLX5_UMR_ALIGN); 795 mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT); 796 /* Clear padding after the actual pages. */ 797 memset(pas + npages, 0, size - npages * sizeof(u64)); 798 799 dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE); 800 if (dma_mapping_error(ddev, dma)) { 801 err = -ENOMEM; 802 goto free_pas; 803 } 804 805 memset(&wr, 0, sizeof(wr)); 806 wr.wr_id = (u64)(unsigned long)&umr_context; 807 prep_umr_reg_wqe(pd, &wr, &sg, dma, npages, mr->mmr.key, page_shift, 808 virt_addr, len, access_flags); 809 810 mlx5_ib_init_umr_context(&umr_context); 811 down(&umrc->sem); 812 err = ib_post_send(umrc->qp, &wr, &bad); 813 if (err) { 814 mlx5_ib_warn(dev, "post send failed, err %d\n", err); 815 goto unmap_dma; 816 } else { 817 wait_for_completion(&umr_context.done); 818 if (umr_context.status != IB_WC_SUCCESS) { 819 mlx5_ib_warn(dev, "reg umr failed\n"); 820 err = -EFAULT; 821 } 822 } 823 824 mr->mmr.iova = virt_addr; 825 mr->mmr.size = len; 826 mr->mmr.pd = to_mpd(pd)->pdn; 827 828 mr->live = 1; 829 830unmap_dma: 831 up(&umrc->sem); 832 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); 833 834free_pas: 835 kfree(mr_pas); 836 837free_mr: 838 if (err) { 839 free_cached_mr(dev, mr); 840 return ERR_PTR(err); 841 } 842 843 return mr; 844} 845 846#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 847int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, 848 int zap) 849{ 850 struct mlx5_ib_dev *dev = mr->dev; 851 struct device *ddev = dev->ib_dev.dma_device; 852 struct umr_common *umrc = &dev->umrc; 853 struct mlx5_ib_umr_context umr_context; 854 struct ib_umem *umem = mr->umem; 855 int size; 856 __be64 *pas; 857 dma_addr_t dma; 858 struct ib_send_wr wr, *bad; 859 struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr.wr.fast_reg; 860 struct ib_sge sg; 861 int err = 0; 862 const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64); 863 const int page_index_mask = page_index_alignment - 1; 864 size_t pages_mapped = 0; 865 size_t pages_to_map = 0; 866 size_t pages_iter = 0; 867 int use_emergency_buf = 0; 868 869 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, 870 * so we need to align the offset and length accordingly */ 871 if (start_page_index & page_index_mask) { 872 npages += start_page_index & page_index_mask; 873 start_page_index &= ~page_index_mask; 874 } 875 876 pages_to_map = ALIGN(npages, page_index_alignment); 877 878 if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES) 879 return -EINVAL; 880 881 size = sizeof(u64) * pages_to_map; 882 size = min_t(int, PAGE_SIZE, size); 883 /* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim 884 * code, when we are called from an invalidation. The pas buffer must 885 * be 2k-aligned for Connect-IB. */ 886 pas = (__be64 *)get_zeroed_page(GFP_ATOMIC); 887 if (!pas) { 888 mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n"); 889 pas = mlx5_ib_update_mtt_emergency_buffer; 890 size = MLX5_UMR_MTT_MIN_CHUNK_SIZE; 891 use_emergency_buf = 1; 892 mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex); 893 memset(pas, 0, size); 894 } 895 pages_iter = size / sizeof(u64); 896 dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE); 897 if (dma_mapping_error(ddev, dma)) { 898 mlx5_ib_err(dev, "unable to map DMA during MTT update.\n"); 899 err = -ENOMEM; 900 goto free_pas; 901 } 902 903 for (pages_mapped = 0; 904 pages_mapped < pages_to_map && !err; 905 pages_mapped += pages_iter, start_page_index += pages_iter) { 906 dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE); 907 908 npages = min_t(size_t, 909 pages_iter, 910 ib_umem_num_pages(umem) - start_page_index); 911 912 if (!zap) { 913 __mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT, 914 start_page_index, npages, pas, 915 MLX5_IB_MTT_PRESENT); 916 /* Clear padding after the pages brought from the 917 * umem. */ 918 memset(pas + npages, 0, size - npages * sizeof(u64)); 919 } 920 921 dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); 922 923 memset(&wr, 0, sizeof(wr)); 924 wr.wr_id = (u64)(unsigned long)&umr_context; 925 926 sg.addr = dma; 927 sg.length = ALIGN(npages * sizeof(u64), 928 MLX5_UMR_MTT_ALIGNMENT); 929 sg.lkey = dev->umrc.mr->lkey; 930 931 wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | 932 MLX5_IB_SEND_UMR_UPDATE_MTT; 933 wr.sg_list = &sg; 934 wr.num_sge = 1; 935 wr.opcode = MLX5_IB_WR_UMR; 936 umrwr->npages = sg.length / sizeof(u64); 937 umrwr->page_shift = PAGE_SHIFT; 938 umrwr->mkey = mr->mmr.key; 939 umrwr->target.offset = start_page_index; 940 941 mlx5_ib_init_umr_context(&umr_context); 942 down(&umrc->sem); 943 err = ib_post_send(umrc->qp, &wr, &bad); 944 if (err) { 945 mlx5_ib_err(dev, "UMR post send failed, err %d\n", err); 946 } else { 947 wait_for_completion(&umr_context.done); 948 if (umr_context.status != IB_WC_SUCCESS) { 949 mlx5_ib_err(dev, "UMR completion failed, code %d\n", 950 umr_context.status); 951 err = -EFAULT; 952 } 953 } 954 up(&umrc->sem); 955 } 956 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); 957 958free_pas: 959 if (!use_emergency_buf) 960 free_page((unsigned long)pas); 961 else 962 mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex); 963 964 return err; 965} 966#endif 967 968static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, 969 u64 length, struct ib_umem *umem, 970 int npages, int page_shift, 971 int access_flags) 972{ 973 struct mlx5_ib_dev *dev = to_mdev(pd->device); 974 struct mlx5_create_mkey_mbox_in *in; 975 struct mlx5_ib_mr *mr; 976 int inlen; 977 int err; 978 bool pg_cap = !!(dev->mdev->caps.gen.flags & 979 MLX5_DEV_CAP_FLAG_ON_DMND_PG); 980 981 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 982 if (!mr) 983 return ERR_PTR(-ENOMEM); 984 985 inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2; 986 in = mlx5_vzalloc(inlen); 987 if (!in) { 988 err = -ENOMEM; 989 goto err_1; 990 } 991 mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 992 pg_cap ? MLX5_IB_MTT_PRESENT : 0); 993 994 /* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags 995 * in the page list submitted with the command. */ 996 in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0; 997 in->seg.flags = convert_access(access_flags) | 998 MLX5_ACCESS_MODE_MTT; 999 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 1000 in->seg.start_addr = cpu_to_be64(virt_addr); 1001 in->seg.len = cpu_to_be64(length); 1002 in->seg.bsfs_octo_size = 0; 1003 in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); 1004 in->seg.log2_page_size = page_shift; 1005 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 1006 in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1007 1 << page_shift)); 1008 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, inlen, NULL, 1009 NULL, NULL); 1010 if (err) { 1011 mlx5_ib_warn(dev, "create mkey failed\n"); 1012 goto err_2; 1013 } 1014 mr->umem = umem; 1015 mr->dev = dev; 1016 mr->live = 1; 1017 kvfree(in); 1018 1019 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key); 1020 1021 return mr; 1022 1023err_2: 1024 kvfree(in); 1025 1026err_1: 1027 kfree(mr); 1028 1029 return ERR_PTR(err); 1030} 1031 1032struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 1033 u64 virt_addr, int access_flags, 1034 struct ib_udata *udata) 1035{ 1036 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1037 struct mlx5_ib_mr *mr = NULL; 1038 struct ib_umem *umem; 1039 int page_shift; 1040 int npages; 1041 int ncont; 1042 int order; 1043 int err; 1044 1045 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", 1046 start, virt_addr, length, access_flags); 1047 umem = ib_umem_get(pd->uobject->context, start, length, access_flags, 1048 0); 1049 if (IS_ERR(umem)) { 1050 mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(umem)); 1051 return (void *)umem; 1052 } 1053 1054 mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order); 1055 if (!npages) { 1056 mlx5_ib_warn(dev, "avoid zero region\n"); 1057 err = -EINVAL; 1058 goto error; 1059 } 1060 1061 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", 1062 npages, ncont, order, page_shift); 1063 1064 if (use_umr(order)) { 1065 mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, 1066 order, access_flags); 1067 if (PTR_ERR(mr) == -EAGAIN) { 1068 mlx5_ib_dbg(dev, "cache empty for order %d", order); 1069 mr = NULL; 1070 } 1071 } else if (access_flags & IB_ACCESS_ON_DEMAND) { 1072 err = -EINVAL; 1073 pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB"); 1074 goto error; 1075 } 1076 1077 if (!mr) 1078 mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift, 1079 access_flags); 1080 1081 if (IS_ERR(mr)) { 1082 err = PTR_ERR(mr); 1083 goto error; 1084 } 1085 1086 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key); 1087 1088 mr->umem = umem; 1089 mr->npages = npages; 1090 atomic_add(npages, &dev->mdev->priv.reg_pages); 1091 mr->ibmr.lkey = mr->mmr.key; 1092 mr->ibmr.rkey = mr->mmr.key; 1093 1094#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1095 if (umem->odp_data) { 1096 /* 1097 * This barrier prevents the compiler from moving the 1098 * setting of umem->odp_data->private to point to our 1099 * MR, before reg_umr finished, to ensure that the MR 1100 * initialization have finished before starting to 1101 * handle invalidations. 1102 */ 1103 smp_wmb(); 1104 mr->umem->odp_data->private = mr; 1105 /* 1106 * Make sure we will see the new 1107 * umem->odp_data->private value in the invalidation 1108 * routines, before we can get page faults on the 1109 * MR. Page faults can happen once we put the MR in 1110 * the tree, below this line. Without the barrier, 1111 * there can be a fault handling and an invalidation 1112 * before umem->odp_data->private == mr is visible to 1113 * the invalidation handler. 1114 */ 1115 smp_wmb(); 1116 } 1117#endif 1118 1119 return &mr->ibmr; 1120 1121error: 1122 ib_umem_release(umem); 1123 return ERR_PTR(err); 1124} 1125 1126static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 1127{ 1128 struct umr_common *umrc = &dev->umrc; 1129 struct mlx5_ib_umr_context umr_context; 1130 struct ib_send_wr wr, *bad; 1131 int err; 1132 1133 memset(&wr, 0, sizeof(wr)); 1134 wr.wr_id = (u64)(unsigned long)&umr_context; 1135 prep_umr_unreg_wqe(dev, &wr, mr->mmr.key); 1136 1137 mlx5_ib_init_umr_context(&umr_context); 1138 down(&umrc->sem); 1139 err = ib_post_send(umrc->qp, &wr, &bad); 1140 if (err) { 1141 up(&umrc->sem); 1142 mlx5_ib_dbg(dev, "err %d\n", err); 1143 goto error; 1144 } else { 1145 wait_for_completion(&umr_context.done); 1146 up(&umrc->sem); 1147 } 1148 if (umr_context.status != IB_WC_SUCCESS) { 1149 mlx5_ib_warn(dev, "unreg umr failed\n"); 1150 err = -EFAULT; 1151 goto error; 1152 } 1153 return 0; 1154 1155error: 1156 return err; 1157} 1158 1159static int clean_mr(struct mlx5_ib_mr *mr) 1160{ 1161 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 1162 int umred = mr->umred; 1163 int err; 1164 1165 if (!umred) { 1166 err = destroy_mkey(dev, mr); 1167 if (err) { 1168 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", 1169 mr->mmr.key, err); 1170 return err; 1171 } 1172 } else { 1173 err = unreg_umr(dev, mr); 1174 if (err) { 1175 mlx5_ib_warn(dev, "failed unregister\n"); 1176 return err; 1177 } 1178 free_cached_mr(dev, mr); 1179 } 1180 1181 if (!umred) 1182 kfree(mr); 1183 1184 return 0; 1185} 1186 1187int mlx5_ib_dereg_mr(struct ib_mr *ibmr) 1188{ 1189 struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 1190 struct mlx5_ib_mr *mr = to_mmr(ibmr); 1191 int npages = mr->npages; 1192 struct ib_umem *umem = mr->umem; 1193 1194#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1195 if (umem && umem->odp_data) { 1196 /* Prevent new page faults from succeeding */ 1197 mr->live = 0; 1198 /* Wait for all running page-fault handlers to finish. */ 1199 synchronize_srcu(&dev->mr_srcu); 1200 /* Destroy all page mappings */ 1201 mlx5_ib_invalidate_range(umem, ib_umem_start(umem), 1202 ib_umem_end(umem)); 1203 /* 1204 * We kill the umem before the MR for ODP, 1205 * so that there will not be any invalidations in 1206 * flight, looking at the *mr struct. 1207 */ 1208 ib_umem_release(umem); 1209 atomic_sub(npages, &dev->mdev->priv.reg_pages); 1210 1211 /* Avoid double-freeing the umem. */ 1212 umem = NULL; 1213 } 1214#endif 1215 1216 clean_mr(mr); 1217 1218 if (umem) { 1219 ib_umem_release(umem); 1220 atomic_sub(npages, &dev->mdev->priv.reg_pages); 1221 } 1222 1223 return 0; 1224} 1225 1226struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, 1227 struct ib_mr_init_attr *mr_init_attr) 1228{ 1229 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1230 struct mlx5_create_mkey_mbox_in *in; 1231 struct mlx5_ib_mr *mr; 1232 int access_mode, err; 1233 int ndescs = roundup(mr_init_attr->max_reg_descriptors, 4); 1234 1235 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1236 if (!mr) 1237 return ERR_PTR(-ENOMEM); 1238 1239 in = kzalloc(sizeof(*in), GFP_KERNEL); 1240 if (!in) { 1241 err = -ENOMEM; 1242 goto err_free; 1243 } 1244 1245 in->seg.status = MLX5_MKEY_STATUS_FREE; 1246 in->seg.xlt_oct_size = cpu_to_be32(ndescs); 1247 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 1248 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 1249 access_mode = MLX5_ACCESS_MODE_MTT; 1250 1251 if (mr_init_attr->flags & IB_MR_SIGNATURE_EN) { 1252 u32 psv_index[2]; 1253 1254 in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) | 1255 MLX5_MKEY_BSF_EN); 1256 in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); 1257 mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); 1258 if (!mr->sig) { 1259 err = -ENOMEM; 1260 goto err_free_in; 1261 } 1262 1263 /* create mem & wire PSVs */ 1264 err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 1265 2, psv_index); 1266 if (err) 1267 goto err_free_sig; 1268 1269 access_mode = MLX5_ACCESS_MODE_KLM; 1270 mr->sig->psv_memory.psv_idx = psv_index[0]; 1271 mr->sig->psv_wire.psv_idx = psv_index[1]; 1272 1273 mr->sig->sig_status_checked = true; 1274 mr->sig->sig_err_exists = false; 1275 /* Next UMR, Arm SIGERR */ 1276 ++mr->sig->sigerr_count; 1277 } 1278 1279 in->seg.flags = MLX5_PERM_UMR_EN | access_mode; 1280 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in), 1281 NULL, NULL, NULL); 1282 if (err) 1283 goto err_destroy_psv; 1284 1285 mr->ibmr.lkey = mr->mmr.key; 1286 mr->ibmr.rkey = mr->mmr.key; 1287 mr->umem = NULL; 1288 kfree(in); 1289 1290 return &mr->ibmr; 1291 1292err_destroy_psv: 1293 if (mr->sig) { 1294 if (mlx5_core_destroy_psv(dev->mdev, 1295 mr->sig->psv_memory.psv_idx)) 1296 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 1297 mr->sig->psv_memory.psv_idx); 1298 if (mlx5_core_destroy_psv(dev->mdev, 1299 mr->sig->psv_wire.psv_idx)) 1300 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 1301 mr->sig->psv_wire.psv_idx); 1302 } 1303err_free_sig: 1304 kfree(mr->sig); 1305err_free_in: 1306 kfree(in); 1307err_free: 1308 kfree(mr); 1309 return ERR_PTR(err); 1310} 1311 1312int mlx5_ib_destroy_mr(struct ib_mr *ibmr) 1313{ 1314 struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 1315 struct mlx5_ib_mr *mr = to_mmr(ibmr); 1316 int err; 1317 1318 if (mr->sig) { 1319 if (mlx5_core_destroy_psv(dev->mdev, 1320 mr->sig->psv_memory.psv_idx)) 1321 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 1322 mr->sig->psv_memory.psv_idx); 1323 if (mlx5_core_destroy_psv(dev->mdev, 1324 mr->sig->psv_wire.psv_idx)) 1325 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 1326 mr->sig->psv_wire.psv_idx); 1327 kfree(mr->sig); 1328 } 1329 1330 err = destroy_mkey(dev, mr); 1331 if (err) { 1332 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", 1333 mr->mmr.key, err); 1334 return err; 1335 } 1336 1337 kfree(mr); 1338 1339 return err; 1340} 1341 1342struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, 1343 int max_page_list_len) 1344{ 1345 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1346 struct mlx5_create_mkey_mbox_in *in; 1347 struct mlx5_ib_mr *mr; 1348 int err; 1349 1350 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1351 if (!mr) 1352 return ERR_PTR(-ENOMEM); 1353 1354 in = kzalloc(sizeof(*in), GFP_KERNEL); 1355 if (!in) { 1356 err = -ENOMEM; 1357 goto err_free; 1358 } 1359 1360 in->seg.status = MLX5_MKEY_STATUS_FREE; 1361 in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2); 1362 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 1363 in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT; 1364 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 1365 /* 1366 * TBD not needed - issue 197292 */ 1367 in->seg.log2_page_size = PAGE_SHIFT; 1368 1369 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in), NULL, 1370 NULL, NULL); 1371 kfree(in); 1372 if (err) 1373 goto err_free; 1374 1375 mr->ibmr.lkey = mr->mmr.key; 1376 mr->ibmr.rkey = mr->mmr.key; 1377 mr->umem = NULL; 1378 1379 return &mr->ibmr; 1380 1381err_free: 1382 kfree(mr); 1383 return ERR_PTR(err); 1384} 1385 1386struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, 1387 int page_list_len) 1388{ 1389 struct mlx5_ib_fast_reg_page_list *mfrpl; 1390 int size = page_list_len * sizeof(u64); 1391 1392 mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL); 1393 if (!mfrpl) 1394 return ERR_PTR(-ENOMEM); 1395 1396 mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL); 1397 if (!mfrpl->ibfrpl.page_list) 1398 goto err_free; 1399 1400 mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device, 1401 size, &mfrpl->map, 1402 GFP_KERNEL); 1403 if (!mfrpl->mapped_page_list) 1404 goto err_free; 1405 1406 WARN_ON(mfrpl->map & 0x3f); 1407 1408 return &mfrpl->ibfrpl; 1409 1410err_free: 1411 kfree(mfrpl->ibfrpl.page_list); 1412 kfree(mfrpl); 1413 return ERR_PTR(-ENOMEM); 1414} 1415 1416void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) 1417{ 1418 struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); 1419 struct mlx5_ib_dev *dev = to_mdev(page_list->device); 1420 int size = page_list->max_page_list_len * sizeof(u64); 1421 1422 dma_free_coherent(&dev->mdev->pdev->dev, size, mfrpl->mapped_page_list, 1423 mfrpl->map); 1424 kfree(mfrpl->ibfrpl.page_list); 1425 kfree(mfrpl); 1426} 1427 1428int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, 1429 struct ib_mr_status *mr_status) 1430{ 1431 struct mlx5_ib_mr *mmr = to_mmr(ibmr); 1432 int ret = 0; 1433 1434 if (check_mask & ~IB_MR_CHECK_SIG_STATUS) { 1435 pr_err("Invalid status check mask\n"); 1436 ret = -EINVAL; 1437 goto done; 1438 } 1439 1440 mr_status->fail_status = 0; 1441 if (check_mask & IB_MR_CHECK_SIG_STATUS) { 1442 if (!mmr->sig) { 1443 ret = -EINVAL; 1444 pr_err("signature status check requested on a non-signature enabled MR\n"); 1445 goto done; 1446 } 1447 1448 mmr->sig->sig_status_checked = true; 1449 if (!mmr->sig->sig_err_exists) 1450 goto done; 1451 1452 if (ibmr->lkey == mmr->sig->err_item.key) 1453 memcpy(&mr_status->sig_err, &mmr->sig->err_item, 1454 sizeof(mr_status->sig_err)); 1455 else { 1456 mr_status->sig_err.err_type = IB_SIG_BAD_GUARD; 1457 mr_status->sig_err.sig_err_offset = 0; 1458 mr_status->sig_err.key = mmr->sig->err_item.key; 1459 } 1460 1461 mmr->sig->sig_err_exists = false; 1462 mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS; 1463 } 1464 1465done: 1466 return ret; 1467} 1468