root/drivers/infiniband/hw/hns/hns_roce_mr.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. hw_index_to_key
  2. key_to_hw_index
  3. hns_roce_sw2hw_mpt
  4. hns_roce_hw2sw_mpt
  5. hns_roce_buddy_alloc
  6. hns_roce_buddy_free
  7. hns_roce_buddy_init
  8. hns_roce_buddy_cleanup
  9. hns_roce_alloc_mtt_range
  10. hns_roce_mtt_init
  11. hns_roce_mtt_cleanup
  12. hns_roce_loop_free
  13. pbl_1hop_alloc
  14. pbl_2hop_alloc
  15. pbl_3hop_alloc
  16. hns_roce_mhop_alloc
  17. hns_roce_mr_alloc
  18. hns_roce_mhop_free
  19. hns_roce_mr_free
  20. hns_roce_mr_enable
  21. hns_roce_write_mtt_chunk
  22. hns_roce_write_mtt
  23. hns_roce_buf_write_mtt
  24. hns_roce_init_mr_table
  25. hns_roce_cleanup_mr_table
  26. hns_roce_get_dma_mr
  27. hns_roce_ib_umem_write_mtt
  28. hns_roce_ib_umem_write_mr
  29. hns_roce_reg_user_mr
  30. rereg_mr_trans
  31. hns_roce_rereg_user_mr
  32. hns_roce_dereg_mr
  33. hns_roce_alloc_mr
  34. hns_roce_set_page
  35. hns_roce_map_mr_sg
  36. hns_roce_mw_free
  37. hns_roce_mw_enable
  38. hns_roce_alloc_mw
  39. hns_roce_dealloc_mw
  40. hns_roce_mtr_init
  41. hns_roce_mtr_cleanup
  42. hns_roce_write_mtr
  43. hns_roce_mtr_attach
  44. hns_roce_mtr_find

   1 /*
   2  * Copyright (c) 2016 Hisilicon Limited.
   3  * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
   4  *
   5  * This software is available to you under a choice of one of two
   6  * licenses.  You may choose to be licensed under the terms of the GNU
   7  * General Public License (GPL) Version 2, available from the file
   8  * COPYING in the main directory of this source tree, or the
   9  * OpenIB.org BSD license below:
  10  *
  11  *     Redistribution and use in source and binary forms, with or
  12  *     without modification, are permitted provided that the following
  13  *     conditions are met:
  14  *
  15  *      - Redistributions of source code must retain the above
  16  *        copyright notice, this list of conditions and the following
  17  *        disclaimer.
  18  *
  19  *      - Redistributions in binary form must reproduce the above
  20  *        copyright notice, this list of conditions and the following
  21  *        disclaimer in the documentation and/or other materials
  22  *        provided with the distribution.
  23  *
  24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31  * SOFTWARE.
  32  */
  33 
  34 #include <linux/platform_device.h>
  35 #include <linux/vmalloc.h>
  36 #include <rdma/ib_umem.h>
  37 #include "hns_roce_device.h"
  38 #include "hns_roce_cmd.h"
  39 #include "hns_roce_hem.h"
  40 
  41 static u32 hw_index_to_key(unsigned long ind)
  42 {
  43         return (u32)(ind >> 24) | (ind << 8);
  44 }
  45 
  46 unsigned long key_to_hw_index(u32 key)
  47 {
  48         return (key << 24) | (key >> 8);
  49 }
  50 
  51 static int hns_roce_sw2hw_mpt(struct hns_roce_dev *hr_dev,
  52                               struct hns_roce_cmd_mailbox *mailbox,
  53                               unsigned long mpt_index)
  54 {
  55         return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0,
  56                                  HNS_ROCE_CMD_SW2HW_MPT,
  57                                  HNS_ROCE_CMD_TIMEOUT_MSECS);
  58 }
  59 
  60 int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
  61                               struct hns_roce_cmd_mailbox *mailbox,
  62                               unsigned long mpt_index)
  63 {
  64         return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0,
  65                                  mpt_index, !mailbox, HNS_ROCE_CMD_HW2SW_MPT,
  66                                  HNS_ROCE_CMD_TIMEOUT_MSECS);
  67 }
  68 
  69 static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order,
  70                                 unsigned long *seg)
  71 {
  72         int o;
  73         u32 m;
  74 
  75         spin_lock(&buddy->lock);
  76 
  77         for (o = order; o <= buddy->max_order; ++o) {
  78                 if (buddy->num_free[o]) {
  79                         m = 1 << (buddy->max_order - o);
  80                         *seg = find_first_bit(buddy->bits[o], m);
  81                         if (*seg < m)
  82                                 goto found;
  83                 }
  84         }
  85         spin_unlock(&buddy->lock);
  86         return -1;
  87 
  88  found:
  89         clear_bit(*seg, buddy->bits[o]);
  90         --buddy->num_free[o];
  91 
  92         while (o > order) {
  93                 --o;
  94                 *seg <<= 1;
  95                 set_bit(*seg ^ 1, buddy->bits[o]);
  96                 ++buddy->num_free[o];
  97         }
  98 
  99         spin_unlock(&buddy->lock);
 100 
 101         *seg <<= order;
 102         return 0;
 103 }
 104 
 105 static void hns_roce_buddy_free(struct hns_roce_buddy *buddy, unsigned long seg,
 106                                 int order)
 107 {
 108         seg >>= order;
 109 
 110         spin_lock(&buddy->lock);
 111 
 112         while (test_bit(seg ^ 1, buddy->bits[order])) {
 113                 clear_bit(seg ^ 1, buddy->bits[order]);
 114                 --buddy->num_free[order];
 115                 seg >>= 1;
 116                 ++order;
 117         }
 118 
 119         set_bit(seg, buddy->bits[order]);
 120         ++buddy->num_free[order];
 121 
 122         spin_unlock(&buddy->lock);
 123 }
 124 
 125 static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order)
 126 {
 127         int i, s;
 128 
 129         buddy->max_order = max_order;
 130         spin_lock_init(&buddy->lock);
 131         buddy->bits = kcalloc(buddy->max_order + 1,
 132                               sizeof(*buddy->bits),
 133                               GFP_KERNEL);
 134         buddy->num_free = kcalloc(buddy->max_order + 1,
 135                                   sizeof(*buddy->num_free),
 136                                   GFP_KERNEL);
 137         if (!buddy->bits || !buddy->num_free)
 138                 goto err_out;
 139 
 140         for (i = 0; i <= buddy->max_order; ++i) {
 141                 s = BITS_TO_LONGS(1 << (buddy->max_order - i));
 142                 buddy->bits[i] = kcalloc(s, sizeof(long), GFP_KERNEL |
 143                                          __GFP_NOWARN);
 144                 if (!buddy->bits[i]) {
 145                         buddy->bits[i] = vzalloc(array_size(s, sizeof(long)));
 146                         if (!buddy->bits[i])
 147                                 goto err_out_free;
 148                 }
 149         }
 150 
 151         set_bit(0, buddy->bits[buddy->max_order]);
 152         buddy->num_free[buddy->max_order] = 1;
 153 
 154         return 0;
 155 
 156 err_out_free:
 157         for (i = 0; i <= buddy->max_order; ++i)
 158                 kvfree(buddy->bits[i]);
 159 
 160 err_out:
 161         kfree(buddy->bits);
 162         kfree(buddy->num_free);
 163         return -ENOMEM;
 164 }
 165 
 166 static void hns_roce_buddy_cleanup(struct hns_roce_buddy *buddy)
 167 {
 168         int i;
 169 
 170         for (i = 0; i <= buddy->max_order; ++i)
 171                 kvfree(buddy->bits[i]);
 172 
 173         kfree(buddy->bits);
 174         kfree(buddy->num_free);
 175 }
 176 
 177 static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order,
 178                                     unsigned long *seg, u32 mtt_type)
 179 {
 180         struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
 181         struct hns_roce_hem_table *table;
 182         struct hns_roce_buddy *buddy;
 183         int ret;
 184 
 185         switch (mtt_type) {
 186         case MTT_TYPE_WQE:
 187                 buddy = &mr_table->mtt_buddy;
 188                 table = &mr_table->mtt_table;
 189                 break;
 190         case MTT_TYPE_CQE:
 191                 buddy = &mr_table->mtt_cqe_buddy;
 192                 table = &mr_table->mtt_cqe_table;
 193                 break;
 194         case MTT_TYPE_SRQWQE:
 195                 buddy = &mr_table->mtt_srqwqe_buddy;
 196                 table = &mr_table->mtt_srqwqe_table;
 197                 break;
 198         case MTT_TYPE_IDX:
 199                 buddy = &mr_table->mtt_idx_buddy;
 200                 table = &mr_table->mtt_idx_table;
 201                 break;
 202         default:
 203                 dev_err(hr_dev->dev, "Unsupport MTT table type: %d\n",
 204                         mtt_type);
 205                 return -EINVAL;
 206         }
 207 
 208         ret = hns_roce_buddy_alloc(buddy, order, seg);
 209         if (ret == -1)
 210                 return -1;
 211 
 212         if (hns_roce_table_get_range(hr_dev, table, *seg,
 213                                      *seg + (1 << order) - 1)) {
 214                 hns_roce_buddy_free(buddy, *seg, order);
 215                 return -1;
 216         }
 217 
 218         return 0;
 219 }
 220 
 221 int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift,
 222                       struct hns_roce_mtt *mtt)
 223 {
 224         int ret;
 225         int i;
 226 
 227         /* Page num is zero, correspond to DMA memory register */
 228         if (!npages) {
 229                 mtt->order = -1;
 230                 mtt->page_shift = HNS_ROCE_HEM_PAGE_SHIFT;
 231                 return 0;
 232         }
 233 
 234         /* Note: if page_shift is zero, FAST memory register */
 235         mtt->page_shift = page_shift;
 236 
 237         /* Compute MTT entry necessary */
 238         for (mtt->order = 0, i = HNS_ROCE_MTT_ENTRY_PER_SEG; i < npages;
 239              i <<= 1)
 240                 ++mtt->order;
 241 
 242         /* Allocate MTT entry */
 243         ret = hns_roce_alloc_mtt_range(hr_dev, mtt->order, &mtt->first_seg,
 244                                        mtt->mtt_type);
 245         if (ret == -1)
 246                 return -ENOMEM;
 247 
 248         return 0;
 249 }
 250 
 251 void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt)
 252 {
 253         struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
 254 
 255         if (mtt->order < 0)
 256                 return;
 257 
 258         switch (mtt->mtt_type) {
 259         case MTT_TYPE_WQE:
 260                 hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg,
 261                                     mtt->order);
 262                 hns_roce_table_put_range(hr_dev, &mr_table->mtt_table,
 263                                         mtt->first_seg,
 264                                         mtt->first_seg + (1 << mtt->order) - 1);
 265                 break;
 266         case MTT_TYPE_CQE:
 267                 hns_roce_buddy_free(&mr_table->mtt_cqe_buddy, mtt->first_seg,
 268                                     mtt->order);
 269                 hns_roce_table_put_range(hr_dev, &mr_table->mtt_cqe_table,
 270                                         mtt->first_seg,
 271                                         mtt->first_seg + (1 << mtt->order) - 1);
 272                 break;
 273         case MTT_TYPE_SRQWQE:
 274                 hns_roce_buddy_free(&mr_table->mtt_srqwqe_buddy, mtt->first_seg,
 275                                     mtt->order);
 276                 hns_roce_table_put_range(hr_dev, &mr_table->mtt_srqwqe_table,
 277                                         mtt->first_seg,
 278                                         mtt->first_seg + (1 << mtt->order) - 1);
 279                 break;
 280         case MTT_TYPE_IDX:
 281                 hns_roce_buddy_free(&mr_table->mtt_idx_buddy, mtt->first_seg,
 282                                     mtt->order);
 283                 hns_roce_table_put_range(hr_dev, &mr_table->mtt_idx_table,
 284                                         mtt->first_seg,
 285                                         mtt->first_seg + (1 << mtt->order) - 1);
 286                 break;
 287         default:
 288                 dev_err(hr_dev->dev,
 289                         "Unsupport mtt type %d, clean mtt failed\n",
 290                         mtt->mtt_type);
 291                 break;
 292         }
 293 }
 294 
 295 static void hns_roce_loop_free(struct hns_roce_dev *hr_dev,
 296                                struct hns_roce_mr *mr, int err_loop_index,
 297                                int loop_i, int loop_j)
 298 {
 299         struct device *dev = hr_dev->dev;
 300         u32 mhop_num;
 301         u32 pbl_bt_sz;
 302         u64 bt_idx;
 303         int i, j;
 304 
 305         pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
 306         mhop_num = hr_dev->caps.pbl_hop_num;
 307 
 308         i = loop_i;
 309         if (mhop_num == 3 && err_loop_index == 2) {
 310                 for (; i >= 0; i--) {
 311                         dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
 312                                           mr->pbl_l1_dma_addr[i]);
 313 
 314                         for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) {
 315                                 if (i == loop_i && j >= loop_j)
 316                                         break;
 317 
 318                                 bt_idx = i * pbl_bt_sz / BA_BYTE_LEN + j;
 319                                 dma_free_coherent(dev, pbl_bt_sz,
 320                                                   mr->pbl_bt_l2[bt_idx],
 321                                                   mr->pbl_l2_dma_addr[bt_idx]);
 322                         }
 323                 }
 324         } else if (mhop_num == 3 && err_loop_index == 1) {
 325                 for (i -= 1; i >= 0; i--) {
 326                         dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
 327                                           mr->pbl_l1_dma_addr[i]);
 328 
 329                         for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) {
 330                                 bt_idx = i * pbl_bt_sz / BA_BYTE_LEN + j;
 331                                 dma_free_coherent(dev, pbl_bt_sz,
 332                                                   mr->pbl_bt_l2[bt_idx],
 333                                                   mr->pbl_l2_dma_addr[bt_idx]);
 334                         }
 335                 }
 336         } else if (mhop_num == 2 && err_loop_index == 1) {
 337                 for (i -= 1; i >= 0; i--)
 338                         dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
 339                                           mr->pbl_l1_dma_addr[i]);
 340         } else {
 341                 dev_warn(dev, "not support: mhop_num=%d, err_loop_index=%d.",
 342                          mhop_num, err_loop_index);
 343                 return;
 344         }
 345 
 346         dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0, mr->pbl_l0_dma_addr);
 347         mr->pbl_bt_l0 = NULL;
 348         mr->pbl_l0_dma_addr = 0;
 349 }
 350 static int pbl_1hop_alloc(struct hns_roce_dev *hr_dev, int npages,
 351                                struct hns_roce_mr *mr, u32 pbl_bt_sz)
 352 {
 353         struct device *dev = hr_dev->dev;
 354 
 355         if (npages > pbl_bt_sz / 8) {
 356                 dev_err(dev, "npages %d is larger than buf_pg_sz!",
 357                         npages);
 358                 return -EINVAL;
 359         }
 360         mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
 361                                          &(mr->pbl_dma_addr),
 362                                          GFP_KERNEL);
 363         if (!mr->pbl_buf)
 364                 return -ENOMEM;
 365 
 366         mr->pbl_size = npages;
 367         mr->pbl_ba = mr->pbl_dma_addr;
 368         mr->pbl_hop_num = 1;
 369         mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
 370         mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
 371         return 0;
 372 
 373 }
 374 
 375 
 376 static int pbl_2hop_alloc(struct hns_roce_dev *hr_dev, int npages,
 377                                struct hns_roce_mr *mr, u32 pbl_bt_sz)
 378 {
 379         struct device *dev = hr_dev->dev;
 380         int npages_allocated;
 381         u64 pbl_last_bt_num;
 382         u64 pbl_bt_cnt = 0;
 383         u64 size;
 384         int i;
 385 
 386         pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
 387 
 388         /* alloc L1 BT */
 389         for (i = 0; i < pbl_bt_sz / 8; i++) {
 390                 if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
 391                         size = pbl_bt_sz;
 392                 } else {
 393                         npages_allocated = i * (pbl_bt_sz / 8);
 394                         size = (npages - npages_allocated) * 8;
 395                 }
 396                 mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size,
 397                                             &(mr->pbl_l1_dma_addr[i]),
 398                                             GFP_KERNEL);
 399                 if (!mr->pbl_bt_l1[i]) {
 400                         hns_roce_loop_free(hr_dev, mr, 1, i, 0);
 401                         return -ENOMEM;
 402                 }
 403 
 404                 *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
 405 
 406                 pbl_bt_cnt++;
 407                 if (pbl_bt_cnt >= pbl_last_bt_num)
 408                         break;
 409         }
 410 
 411         mr->l0_chunk_last_num = i + 1;
 412 
 413         return 0;
 414 }
 415 
 416 static int pbl_3hop_alloc(struct hns_roce_dev *hr_dev, int npages,
 417                                struct hns_roce_mr *mr, u32 pbl_bt_sz)
 418 {
 419         struct device *dev = hr_dev->dev;
 420         int mr_alloc_done = 0;
 421         int npages_allocated;
 422         u64 pbl_last_bt_num;
 423         u64 pbl_bt_cnt = 0;
 424         u64 bt_idx;
 425         u64 size;
 426         int i;
 427         int j = 0;
 428 
 429         pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
 430 
 431         mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num,
 432                                       sizeof(*mr->pbl_l2_dma_addr),
 433                                       GFP_KERNEL);
 434         if (!mr->pbl_l2_dma_addr)
 435                 return -ENOMEM;
 436 
 437         mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num,
 438                                 sizeof(*mr->pbl_bt_l2),
 439                                 GFP_KERNEL);
 440         if (!mr->pbl_bt_l2)
 441                 goto err_kcalloc_bt_l2;
 442 
 443         /* alloc L1, L2 BT */
 444         for (i = 0; i < pbl_bt_sz / 8; i++) {
 445                 mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz,
 446                                             &(mr->pbl_l1_dma_addr[i]),
 447                                             GFP_KERNEL);
 448                 if (!mr->pbl_bt_l1[i]) {
 449                         hns_roce_loop_free(hr_dev, mr, 1, i, 0);
 450                         goto err_dma_alloc_l0;
 451                 }
 452 
 453                 *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
 454 
 455                 for (j = 0; j < pbl_bt_sz / 8; j++) {
 456                         bt_idx = i * pbl_bt_sz / 8 + j;
 457 
 458                         if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
 459                                 size = pbl_bt_sz;
 460                         } else {
 461                                 npages_allocated = bt_idx *
 462                                                    (pbl_bt_sz / 8);
 463                                 size = (npages - npages_allocated) * 8;
 464                         }
 465                         mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent(
 466                                       dev, size,
 467                                       &(mr->pbl_l2_dma_addr[bt_idx]),
 468                                       GFP_KERNEL);
 469                         if (!mr->pbl_bt_l2[bt_idx]) {
 470                                 hns_roce_loop_free(hr_dev, mr, 2, i, j);
 471                                 goto err_dma_alloc_l0;
 472                         }
 473 
 474                         *(mr->pbl_bt_l1[i] + j) =
 475                                         mr->pbl_l2_dma_addr[bt_idx];
 476 
 477                         pbl_bt_cnt++;
 478                         if (pbl_bt_cnt >= pbl_last_bt_num) {
 479                                 mr_alloc_done = 1;
 480                                 break;
 481                         }
 482                 }
 483 
 484                 if (mr_alloc_done)
 485                         break;
 486         }
 487 
 488         mr->l0_chunk_last_num = i + 1;
 489         mr->l1_chunk_last_num = j + 1;
 490 
 491 
 492         return 0;
 493 
 494 err_dma_alloc_l0:
 495         kfree(mr->pbl_bt_l2);
 496         mr->pbl_bt_l2 = NULL;
 497 
 498 err_kcalloc_bt_l2:
 499         kfree(mr->pbl_l2_dma_addr);
 500         mr->pbl_l2_dma_addr = NULL;
 501 
 502         return -ENOMEM;
 503 }
 504 
 505 
 506 /* PBL multi hop addressing */
 507 static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
 508                                struct hns_roce_mr *mr)
 509 {
 510         struct device *dev = hr_dev->dev;
 511         u32 pbl_bt_sz;
 512         u32 mhop_num;
 513 
 514         mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num);
 515         pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
 516 
 517         if (mhop_num == HNS_ROCE_HOP_NUM_0)
 518                 return 0;
 519 
 520         if (mhop_num == 1)
 521                 return pbl_1hop_alloc(hr_dev, npages, mr, pbl_bt_sz);
 522 
 523         mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8,
 524                                       sizeof(*mr->pbl_l1_dma_addr),
 525                                       GFP_KERNEL);
 526         if (!mr->pbl_l1_dma_addr)
 527                 return -ENOMEM;
 528 
 529         mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1),
 530                                 GFP_KERNEL);
 531         if (!mr->pbl_bt_l1)
 532                 goto err_kcalloc_bt_l1;
 533 
 534         /* alloc L0 BT */
 535         mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz,
 536                                            &(mr->pbl_l0_dma_addr),
 537                                            GFP_KERNEL);
 538         if (!mr->pbl_bt_l0)
 539                 goto err_kcalloc_l2_dma;
 540 
 541         if (mhop_num == 2) {
 542                 if (pbl_2hop_alloc(hr_dev, npages, mr, pbl_bt_sz))
 543                         goto err_kcalloc_l2_dma;
 544         }
 545 
 546         if (mhop_num == 3) {
 547                 if (pbl_3hop_alloc(hr_dev, npages, mr, pbl_bt_sz))
 548                         goto err_kcalloc_l2_dma;
 549         }
 550 
 551 
 552         mr->pbl_size = npages;
 553         mr->pbl_ba = mr->pbl_l0_dma_addr;
 554         mr->pbl_hop_num = hr_dev->caps.pbl_hop_num;
 555         mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
 556         mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
 557 
 558         return 0;
 559 
 560 err_kcalloc_l2_dma:
 561         kfree(mr->pbl_bt_l1);
 562         mr->pbl_bt_l1 = NULL;
 563 
 564 err_kcalloc_bt_l1:
 565         kfree(mr->pbl_l1_dma_addr);
 566         mr->pbl_l1_dma_addr = NULL;
 567 
 568         return -ENOMEM;
 569 }
 570 
 571 static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
 572                              u64 size, u32 access, int npages,
 573                              struct hns_roce_mr *mr)
 574 {
 575         struct device *dev = hr_dev->dev;
 576         unsigned long index = 0;
 577         int ret;
 578 
 579         /* Allocate a key for mr from mr_table */
 580         ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
 581         if (ret == -1)
 582                 return -ENOMEM;
 583 
 584         mr->iova = iova;                        /* MR va starting addr */
 585         mr->size = size;                        /* MR addr range */
 586         mr->pd = pd;                            /* MR num */
 587         mr->access = access;                    /* MR access permit */
 588         mr->enabled = 0;                        /* MR active status */
 589         mr->key = hw_index_to_key(index);       /* MR key */
 590 
 591         if (size == ~0ull) {
 592                 mr->pbl_buf = NULL;
 593                 mr->pbl_dma_addr = 0;
 594                 /* PBL multi-hop addressing parameters */
 595                 mr->pbl_bt_l2 = NULL;
 596                 mr->pbl_bt_l1 = NULL;
 597                 mr->pbl_bt_l0 = NULL;
 598                 mr->pbl_l2_dma_addr = NULL;
 599                 mr->pbl_l1_dma_addr = NULL;
 600                 mr->pbl_l0_dma_addr = 0;
 601         } else {
 602                 if (!hr_dev->caps.pbl_hop_num) {
 603                         mr->pbl_buf = dma_alloc_coherent(dev,
 604                                                          npages * BA_BYTE_LEN,
 605                                                          &(mr->pbl_dma_addr),
 606                                                          GFP_KERNEL);
 607                         if (!mr->pbl_buf)
 608                                 return -ENOMEM;
 609                 } else {
 610                         ret = hns_roce_mhop_alloc(hr_dev, npages, mr);
 611                 }
 612         }
 613 
 614         return ret;
 615 }
 616 
 617 static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev,
 618                                struct hns_roce_mr *mr)
 619 {
 620         struct device *dev = hr_dev->dev;
 621         int npages_allocated;
 622         int npages;
 623         int i, j;
 624         u32 pbl_bt_sz;
 625         u32 mhop_num;
 626         u64 bt_idx;
 627 
 628         npages = mr->pbl_size;
 629         pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
 630         mhop_num = (mr->type == MR_TYPE_FRMR) ? 1 : hr_dev->caps.pbl_hop_num;
 631 
 632         if (mhop_num == HNS_ROCE_HOP_NUM_0)
 633                 return;
 634 
 635         if (mhop_num == 1) {
 636                 dma_free_coherent(dev, (unsigned int)(npages * BA_BYTE_LEN),
 637                                   mr->pbl_buf, mr->pbl_dma_addr);
 638                 return;
 639         }
 640 
 641         dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0,
 642                           mr->pbl_l0_dma_addr);
 643 
 644         if (mhop_num == 2) {
 645                 for (i = 0; i < mr->l0_chunk_last_num; i++) {
 646                         if (i == mr->l0_chunk_last_num - 1) {
 647                                 npages_allocated =
 648                                                 i * (pbl_bt_sz / BA_BYTE_LEN);
 649 
 650                                 dma_free_coherent(dev,
 651                                       (npages - npages_allocated) * BA_BYTE_LEN,
 652                                        mr->pbl_bt_l1[i],
 653                                        mr->pbl_l1_dma_addr[i]);
 654 
 655                                 break;
 656                         }
 657 
 658                         dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
 659                                           mr->pbl_l1_dma_addr[i]);
 660                 }
 661         } else if (mhop_num == 3) {
 662                 for (i = 0; i < mr->l0_chunk_last_num; i++) {
 663                         dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
 664                                           mr->pbl_l1_dma_addr[i]);
 665 
 666                         for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) {
 667                                 bt_idx = i * (pbl_bt_sz / BA_BYTE_LEN) + j;
 668 
 669                                 if ((i == mr->l0_chunk_last_num - 1)
 670                                     && j == mr->l1_chunk_last_num - 1) {
 671                                         npages_allocated = bt_idx *
 672                                                       (pbl_bt_sz / BA_BYTE_LEN);
 673 
 674                                         dma_free_coherent(dev,
 675                                               (npages - npages_allocated) *
 676                                               BA_BYTE_LEN,
 677                                               mr->pbl_bt_l2[bt_idx],
 678                                               mr->pbl_l2_dma_addr[bt_idx]);
 679 
 680                                         break;
 681                                 }
 682 
 683                                 dma_free_coherent(dev, pbl_bt_sz,
 684                                                 mr->pbl_bt_l2[bt_idx],
 685                                                 mr->pbl_l2_dma_addr[bt_idx]);
 686                         }
 687                 }
 688         }
 689 
 690         kfree(mr->pbl_bt_l1);
 691         kfree(mr->pbl_l1_dma_addr);
 692         mr->pbl_bt_l1 = NULL;
 693         mr->pbl_l1_dma_addr = NULL;
 694         if (mhop_num == 3) {
 695                 kfree(mr->pbl_bt_l2);
 696                 kfree(mr->pbl_l2_dma_addr);
 697                 mr->pbl_bt_l2 = NULL;
 698                 mr->pbl_l2_dma_addr = NULL;
 699         }
 700 }
 701 
 702 static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
 703                              struct hns_roce_mr *mr)
 704 {
 705         struct device *dev = hr_dev->dev;
 706         int npages = 0;
 707         int ret;
 708 
 709         if (mr->enabled) {
 710                 ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key)
 711                                          & (hr_dev->caps.num_mtpts - 1));
 712                 if (ret)
 713                         dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret);
 714         }
 715 
 716         if (mr->size != ~0ULL) {
 717                 if (mr->type == MR_TYPE_MR)
 718                         npages = ib_umem_page_count(mr->umem);
 719 
 720                 if (!hr_dev->caps.pbl_hop_num)
 721                         dma_free_coherent(dev,
 722                                           (unsigned int)(npages * BA_BYTE_LEN),
 723                                           mr->pbl_buf, mr->pbl_dma_addr);
 724                 else
 725                         hns_roce_mhop_free(hr_dev, mr);
 726         }
 727 
 728         if (mr->enabled)
 729                 hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
 730                                    key_to_hw_index(mr->key));
 731 
 732         hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
 733                              key_to_hw_index(mr->key), BITMAP_NO_RR);
 734 }
 735 
 736 static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
 737                               struct hns_roce_mr *mr)
 738 {
 739         int ret;
 740         unsigned long mtpt_idx = key_to_hw_index(mr->key);
 741         struct device *dev = hr_dev->dev;
 742         struct hns_roce_cmd_mailbox *mailbox;
 743         struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
 744 
 745         /* Prepare HEM entry memory */
 746         ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
 747         if (ret)
 748                 return ret;
 749 
 750         /* Allocate mailbox memory */
 751         mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
 752         if (IS_ERR(mailbox)) {
 753                 ret = PTR_ERR(mailbox);
 754                 goto err_table;
 755         }
 756 
 757         if (mr->type != MR_TYPE_FRMR)
 758                 ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
 759         else
 760                 ret = hr_dev->hw->frmr_write_mtpt(mailbox->buf, mr);
 761         if (ret) {
 762                 dev_err(dev, "Write mtpt fail!\n");
 763                 goto err_page;
 764         }
 765 
 766         ret = hns_roce_sw2hw_mpt(hr_dev, mailbox,
 767                                  mtpt_idx & (hr_dev->caps.num_mtpts - 1));
 768         if (ret) {
 769                 dev_err(dev, "SW2HW_MPT failed (%d)\n", ret);
 770                 goto err_page;
 771         }
 772 
 773         mr->enabled = 1;
 774         hns_roce_free_cmd_mailbox(hr_dev, mailbox);
 775 
 776         return 0;
 777 
 778 err_page:
 779         hns_roce_free_cmd_mailbox(hr_dev, mailbox);
 780 
 781 err_table:
 782         hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
 783         return ret;
 784 }
 785 
 786 static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev,
 787                                     struct hns_roce_mtt *mtt, u32 start_index,
 788                                     u32 npages, u64 *page_list)
 789 {
 790         struct hns_roce_hem_table *table;
 791         dma_addr_t dma_handle;
 792         __le64 *mtts;
 793         u32 bt_page_size;
 794         u32 i;
 795 
 796         switch (mtt->mtt_type) {
 797         case MTT_TYPE_WQE:
 798                 table = &hr_dev->mr_table.mtt_table;
 799                 bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
 800                 break;
 801         case MTT_TYPE_CQE:
 802                 table = &hr_dev->mr_table.mtt_cqe_table;
 803                 bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
 804                 break;
 805         case MTT_TYPE_SRQWQE:
 806                 table = &hr_dev->mr_table.mtt_srqwqe_table;
 807                 bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT);
 808                 break;
 809         case MTT_TYPE_IDX:
 810                 table = &hr_dev->mr_table.mtt_idx_table;
 811                 bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT);
 812                 break;
 813         default:
 814                 return -EINVAL;
 815         }
 816 
 817         /* All MTTs must fit in the same page */
 818         if (start_index / (bt_page_size / sizeof(u64)) !=
 819                 (start_index + npages - 1) / (bt_page_size / sizeof(u64)))
 820                 return -EINVAL;
 821 
 822         if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1))
 823                 return -EINVAL;
 824 
 825         mtts = hns_roce_table_find(hr_dev, table,
 826                                 mtt->first_seg +
 827                                 start_index / HNS_ROCE_MTT_ENTRY_PER_SEG,
 828                                 &dma_handle);
 829         if (!mtts)
 830                 return -ENOMEM;
 831 
 832         /* Save page addr, low 12 bits : 0 */
 833         for (i = 0; i < npages; ++i) {
 834                 if (!hr_dev->caps.mtt_hop_num)
 835                         mtts[i] = cpu_to_le64(page_list[i] >> PAGE_ADDR_SHIFT);
 836                 else
 837                         mtts[i] = cpu_to_le64(page_list[i]);
 838         }
 839 
 840         return 0;
 841 }
 842 
 843 static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev,
 844                               struct hns_roce_mtt *mtt, u32 start_index,
 845                               u32 npages, u64 *page_list)
 846 {
 847         int chunk;
 848         int ret;
 849         u32 bt_page_size;
 850 
 851         if (mtt->order < 0)
 852                 return -EINVAL;
 853 
 854         switch (mtt->mtt_type) {
 855         case MTT_TYPE_WQE:
 856                 bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
 857                 break;
 858         case MTT_TYPE_CQE:
 859                 bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
 860                 break;
 861         case MTT_TYPE_SRQWQE:
 862                 bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT);
 863                 break;
 864         case MTT_TYPE_IDX:
 865                 bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT);
 866                 break;
 867         default:
 868                 dev_err(hr_dev->dev,
 869                         "Unsupport mtt type %d, write mtt failed\n",
 870                         mtt->mtt_type);
 871                 return -EINVAL;
 872         }
 873 
 874         while (npages > 0) {
 875                 chunk = min_t(int, bt_page_size / sizeof(u64), npages);
 876 
 877                 ret = hns_roce_write_mtt_chunk(hr_dev, mtt, start_index, chunk,
 878                                                page_list);
 879                 if (ret)
 880                         return ret;
 881 
 882                 npages -= chunk;
 883                 start_index += chunk;
 884                 page_list += chunk;
 885         }
 886 
 887         return 0;
 888 }
 889 
 890 int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev,
 891                            struct hns_roce_mtt *mtt, struct hns_roce_buf *buf)
 892 {
 893         u64 *page_list;
 894         int ret;
 895         u32 i;
 896 
 897         page_list = kmalloc_array(buf->npages, sizeof(*page_list), GFP_KERNEL);
 898         if (!page_list)
 899                 return -ENOMEM;
 900 
 901         for (i = 0; i < buf->npages; ++i) {
 902                 if (buf->nbufs == 1)
 903                         page_list[i] = buf->direct.map + (i << buf->page_shift);
 904                 else
 905                         page_list[i] = buf->page_list[i].map;
 906 
 907         }
 908         ret = hns_roce_write_mtt(hr_dev, mtt, 0, buf->npages, page_list);
 909 
 910         kfree(page_list);
 911 
 912         return ret;
 913 }
 914 
 915 int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev)
 916 {
 917         struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
 918         int ret;
 919 
 920         ret = hns_roce_bitmap_init(&mr_table->mtpt_bitmap,
 921                                    hr_dev->caps.num_mtpts,
 922                                    hr_dev->caps.num_mtpts - 1,
 923                                    hr_dev->caps.reserved_mrws, 0);
 924         if (ret)
 925                 return ret;
 926 
 927         ret = hns_roce_buddy_init(&mr_table->mtt_buddy,
 928                                   ilog2(hr_dev->caps.num_mtt_segs));
 929         if (ret)
 930                 goto err_buddy;
 931 
 932         if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) {
 933                 ret = hns_roce_buddy_init(&mr_table->mtt_cqe_buddy,
 934                                           ilog2(hr_dev->caps.num_cqe_segs));
 935                 if (ret)
 936                         goto err_buddy_cqe;
 937         }
 938 
 939         if (hr_dev->caps.num_srqwqe_segs) {
 940                 ret = hns_roce_buddy_init(&mr_table->mtt_srqwqe_buddy,
 941                                           ilog2(hr_dev->caps.num_srqwqe_segs));
 942                 if (ret)
 943                         goto err_buddy_srqwqe;
 944         }
 945 
 946         if (hr_dev->caps.num_idx_segs) {
 947                 ret = hns_roce_buddy_init(&mr_table->mtt_idx_buddy,
 948                                           ilog2(hr_dev->caps.num_idx_segs));
 949                 if (ret)
 950                         goto err_buddy_idx;
 951         }
 952 
 953         return 0;
 954 
 955 err_buddy_idx:
 956         if (hr_dev->caps.num_srqwqe_segs)
 957                 hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy);
 958 
 959 err_buddy_srqwqe:
 960         if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
 961                 hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy);
 962 
 963 err_buddy_cqe:
 964         hns_roce_buddy_cleanup(&mr_table->mtt_buddy);
 965 
 966 err_buddy:
 967         hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap);
 968         return ret;
 969 }
 970 
 971 void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev)
 972 {
 973         struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
 974 
 975         if (hr_dev->caps.num_idx_segs)
 976                 hns_roce_buddy_cleanup(&mr_table->mtt_idx_buddy);
 977         if (hr_dev->caps.num_srqwqe_segs)
 978                 hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy);
 979         hns_roce_buddy_cleanup(&mr_table->mtt_buddy);
 980         if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
 981                 hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy);
 982         hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap);
 983 }
 984 
 985 struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
 986 {
 987         struct hns_roce_mr *mr;
 988         int ret;
 989 
 990         mr = kmalloc(sizeof(*mr), GFP_KERNEL);
 991         if (mr == NULL)
 992                 return  ERR_PTR(-ENOMEM);
 993 
 994         mr->type = MR_TYPE_DMA;
 995 
 996         /* Allocate memory region key */
 997         ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0,
 998                                 ~0ULL, acc, 0, mr);
 999         if (ret)
1000                 goto err_free;
1001 
1002         ret = hns_roce_mr_enable(to_hr_dev(pd->device), mr);
1003         if (ret)
1004                 goto err_mr;
1005 
1006         mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
1007         mr->umem = NULL;
1008 
1009         return &mr->ibmr;
1010 
1011 err_mr:
1012         hns_roce_mr_free(to_hr_dev(pd->device), mr);
1013 
1014 err_free:
1015         kfree(mr);
1016         return ERR_PTR(ret);
1017 }
1018 
1019 int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
1020                                struct hns_roce_mtt *mtt, struct ib_umem *umem)
1021 {
1022         struct device *dev = hr_dev->dev;
1023         struct sg_dma_page_iter sg_iter;
1024         unsigned int order;
1025         int npage = 0;
1026         int ret = 0;
1027         int i;
1028         u64 page_addr;
1029         u64 *pages;
1030         u32 bt_page_size;
1031         u32 n;
1032 
1033         switch (mtt->mtt_type) {
1034         case MTT_TYPE_WQE:
1035                 order = hr_dev->caps.mtt_ba_pg_sz;
1036                 break;
1037         case MTT_TYPE_CQE:
1038                 order = hr_dev->caps.cqe_ba_pg_sz;
1039                 break;
1040         case MTT_TYPE_SRQWQE:
1041                 order = hr_dev->caps.srqwqe_ba_pg_sz;
1042                 break;
1043         case MTT_TYPE_IDX:
1044                 order = hr_dev->caps.idx_ba_pg_sz;
1045                 break;
1046         default:
1047                 dev_err(dev, "Unsupport mtt type %d, write mtt failed\n",
1048                         mtt->mtt_type);
1049                 return -EINVAL;
1050         }
1051 
1052         bt_page_size = 1 << (order + PAGE_SHIFT);
1053 
1054         pages = (u64 *) __get_free_pages(GFP_KERNEL, order);
1055         if (!pages)
1056                 return -ENOMEM;
1057 
1058         i = n = 0;
1059 
1060         for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
1061                 page_addr = sg_page_iter_dma_address(&sg_iter);
1062                 if (!(npage % (1 << (mtt->page_shift - PAGE_SHIFT)))) {
1063                         if (page_addr & ((1 << mtt->page_shift) - 1)) {
1064                                 dev_err(dev,
1065                                         "page_addr is not page_shift %d alignment!\n",
1066                                         mtt->page_shift);
1067                                 ret = -EINVAL;
1068                                 goto out;
1069                         }
1070                         pages[i++] = page_addr;
1071                 }
1072                 npage++;
1073                 if (i == bt_page_size / sizeof(u64)) {
1074                         ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages);
1075                         if (ret)
1076                                 goto out;
1077                         n += i;
1078                         i = 0;
1079                 }
1080         }
1081 
1082         if (i)
1083                 ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages);
1084 
1085 out:
1086         free_pages((unsigned long) pages, order);
1087         return ret;
1088 }
1089 
1090 static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev,
1091                                      struct hns_roce_mr *mr,
1092                                      struct ib_umem *umem)
1093 {
1094         struct sg_dma_page_iter sg_iter;
1095         int i = 0, j = 0;
1096         u64 page_addr;
1097         u32 pbl_bt_sz;
1098 
1099         if (hr_dev->caps.pbl_hop_num == HNS_ROCE_HOP_NUM_0)
1100                 return 0;
1101 
1102         pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
1103         for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
1104                 page_addr = sg_page_iter_dma_address(&sg_iter);
1105                 if (!hr_dev->caps.pbl_hop_num) {
1106                         /* for hip06, page addr is aligned to 4K */
1107                         mr->pbl_buf[i++] = page_addr >> 12;
1108                 } else if (hr_dev->caps.pbl_hop_num == 1) {
1109                         mr->pbl_buf[i++] = page_addr;
1110                 } else {
1111                         if (hr_dev->caps.pbl_hop_num == 2)
1112                                 mr->pbl_bt_l1[i][j] = page_addr;
1113                         else if (hr_dev->caps.pbl_hop_num == 3)
1114                                 mr->pbl_bt_l2[i][j] = page_addr;
1115 
1116                         j++;
1117                         if (j >= (pbl_bt_sz / BA_BYTE_LEN)) {
1118                                 i++;
1119                                 j = 0;
1120                         }
1121                 }
1122         }
1123 
1124         /* Memory barrier */
1125         mb();
1126 
1127         return 0;
1128 }
1129 
1130 struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1131                                    u64 virt_addr, int access_flags,
1132                                    struct ib_udata *udata)
1133 {
1134         struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
1135         struct device *dev = hr_dev->dev;
1136         struct hns_roce_mr *mr;
1137         int bt_size;
1138         int ret;
1139         int n;
1140         int i;
1141 
1142         mr = kmalloc(sizeof(*mr), GFP_KERNEL);
1143         if (!mr)
1144                 return ERR_PTR(-ENOMEM);
1145 
1146         mr->umem = ib_umem_get(udata, start, length, access_flags, 0);
1147         if (IS_ERR(mr->umem)) {
1148                 ret = PTR_ERR(mr->umem);
1149                 goto err_free;
1150         }
1151 
1152         n = ib_umem_page_count(mr->umem);
1153 
1154         if (!hr_dev->caps.pbl_hop_num) {
1155                 if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) {
1156                         dev_err(dev,
1157                              " MR len %lld err. MR is limited to 4G at most!\n",
1158                              length);
1159                         ret = -EINVAL;
1160                         goto err_umem;
1161                 }
1162         } else {
1163                 u64 pbl_size = 1;
1164 
1165                 bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) /
1166                           BA_BYTE_LEN;
1167                 for (i = 0; i < hr_dev->caps.pbl_hop_num; i++)
1168                         pbl_size *= bt_size;
1169                 if (n > pbl_size) {
1170                         dev_err(dev,
1171                             " MR len %lld err. MR page num is limited to %lld!\n",
1172                             length, pbl_size);
1173                         ret = -EINVAL;
1174                         goto err_umem;
1175                 }
1176         }
1177 
1178         mr->type = MR_TYPE_MR;
1179 
1180         ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length,
1181                                 access_flags, n, mr);
1182         if (ret)
1183                 goto err_umem;
1184 
1185         ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem);
1186         if (ret)
1187                 goto err_mr;
1188 
1189         ret = hns_roce_mr_enable(hr_dev, mr);
1190         if (ret)
1191                 goto err_mr;
1192 
1193         mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
1194 
1195         return &mr->ibmr;
1196 
1197 err_mr:
1198         hns_roce_mr_free(hr_dev, mr);
1199 
1200 err_umem:
1201         ib_umem_release(mr->umem);
1202 
1203 err_free:
1204         kfree(mr);
1205         return ERR_PTR(ret);
1206 }
1207 
1208 static int rereg_mr_trans(struct ib_mr *ibmr, int flags,
1209                           u64 start, u64 length,
1210                           u64 virt_addr, int mr_access_flags,
1211                           struct hns_roce_cmd_mailbox *mailbox,
1212                           u32 pdn, struct ib_udata *udata)
1213 {
1214         struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
1215         struct hns_roce_mr *mr = to_hr_mr(ibmr);
1216         struct device *dev = hr_dev->dev;
1217         int npages;
1218         int ret;
1219 
1220         if (mr->size != ~0ULL) {
1221                 npages = ib_umem_page_count(mr->umem);
1222 
1223                 if (hr_dev->caps.pbl_hop_num)
1224                         hns_roce_mhop_free(hr_dev, mr);
1225                 else
1226                         dma_free_coherent(dev, npages * 8,
1227                                           mr->pbl_buf, mr->pbl_dma_addr);
1228         }
1229         ib_umem_release(mr->umem);
1230 
1231         mr->umem = ib_umem_get(udata, start, length, mr_access_flags, 0);
1232         if (IS_ERR(mr->umem)) {
1233                 ret = PTR_ERR(mr->umem);
1234                 mr->umem = NULL;
1235                 return -ENOMEM;
1236         }
1237         npages = ib_umem_page_count(mr->umem);
1238 
1239         if (hr_dev->caps.pbl_hop_num) {
1240                 ret = hns_roce_mhop_alloc(hr_dev, npages, mr);
1241                 if (ret)
1242                         goto release_umem;
1243         } else {
1244                 mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
1245                                                  &(mr->pbl_dma_addr),
1246                                                  GFP_KERNEL);
1247                 if (!mr->pbl_buf) {
1248                         ret = -ENOMEM;
1249                         goto release_umem;
1250                 }
1251         }
1252 
1253         ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
1254                                            mr_access_flags, virt_addr,
1255                                            length, mailbox->buf);
1256         if (ret)
1257                 goto release_umem;
1258 
1259 
1260         ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem);
1261         if (ret) {
1262                 if (mr->size != ~0ULL) {
1263                         npages = ib_umem_page_count(mr->umem);
1264 
1265                         if (hr_dev->caps.pbl_hop_num)
1266                                 hns_roce_mhop_free(hr_dev, mr);
1267                         else
1268                                 dma_free_coherent(dev, npages * 8,
1269                                                   mr->pbl_buf,
1270                                                   mr->pbl_dma_addr);
1271                 }
1272 
1273                 goto release_umem;
1274         }
1275 
1276         return 0;
1277 
1278 release_umem:
1279         ib_umem_release(mr->umem);
1280         return ret;
1281 
1282 }
1283 
1284 
1285 int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
1286                            u64 virt_addr, int mr_access_flags, struct ib_pd *pd,
1287                            struct ib_udata *udata)
1288 {
1289         struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
1290         struct hns_roce_mr *mr = to_hr_mr(ibmr);
1291         struct hns_roce_cmd_mailbox *mailbox;
1292         struct device *dev = hr_dev->dev;
1293         unsigned long mtpt_idx;
1294         u32 pdn = 0;
1295         int ret;
1296 
1297         if (!mr->enabled)
1298                 return -EINVAL;
1299 
1300         mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
1301         if (IS_ERR(mailbox))
1302                 return PTR_ERR(mailbox);
1303 
1304         mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1);
1305         ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, mtpt_idx, 0,
1306                                 HNS_ROCE_CMD_QUERY_MPT,
1307                                 HNS_ROCE_CMD_TIMEOUT_MSECS);
1308         if (ret)
1309                 goto free_cmd_mbox;
1310 
1311         ret = hns_roce_hw2sw_mpt(hr_dev, NULL, mtpt_idx);
1312         if (ret)
1313                 dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret);
1314 
1315         mr->enabled = 0;
1316 
1317         if (flags & IB_MR_REREG_PD)
1318                 pdn = to_hr_pd(pd)->pdn;
1319 
1320         if (flags & IB_MR_REREG_TRANS) {
1321                 ret = rereg_mr_trans(ibmr, flags,
1322                                      start, length,
1323                                      virt_addr, mr_access_flags,
1324                                      mailbox, pdn, udata);
1325                 if (ret)
1326                         goto free_cmd_mbox;
1327         } else {
1328                 ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
1329                                                    mr_access_flags, virt_addr,
1330                                                    length, mailbox->buf);
1331                 if (ret)
1332                         goto free_cmd_mbox;
1333         }
1334 
1335         ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, mtpt_idx);
1336         if (ret) {
1337                 dev_err(dev, "SW2HW_MPT failed (%d)\n", ret);
1338                 ib_umem_release(mr->umem);
1339                 goto free_cmd_mbox;
1340         }
1341 
1342         mr->enabled = 1;
1343         if (flags & IB_MR_REREG_ACCESS)
1344                 mr->access = mr_access_flags;
1345 
1346         hns_roce_free_cmd_mailbox(hr_dev, mailbox);
1347 
1348         return 0;
1349 
1350 free_cmd_mbox:
1351         hns_roce_free_cmd_mailbox(hr_dev, mailbox);
1352 
1353         return ret;
1354 }
1355 
1356 int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
1357 {
1358         struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
1359         struct hns_roce_mr *mr = to_hr_mr(ibmr);
1360         int ret = 0;
1361 
1362         if (hr_dev->hw->dereg_mr) {
1363                 ret = hr_dev->hw->dereg_mr(hr_dev, mr, udata);
1364         } else {
1365                 hns_roce_mr_free(hr_dev, mr);
1366 
1367                 ib_umem_release(mr->umem);
1368                 kfree(mr);
1369         }
1370 
1371         return ret;
1372 }
1373 
1374 struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
1375                                 u32 max_num_sg, struct ib_udata *udata)
1376 {
1377         struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
1378         struct device *dev = hr_dev->dev;
1379         struct hns_roce_mr *mr;
1380         u64 length;
1381         u32 page_size;
1382         int ret;
1383 
1384         page_size = 1 << (hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT);
1385         length = max_num_sg * page_size;
1386 
1387         if (mr_type != IB_MR_TYPE_MEM_REG)
1388                 return ERR_PTR(-EINVAL);
1389 
1390         if (max_num_sg > HNS_ROCE_FRMR_MAX_PA) {
1391                 dev_err(dev, "max_num_sg larger than %d\n",
1392                         HNS_ROCE_FRMR_MAX_PA);
1393                 return ERR_PTR(-EINVAL);
1394         }
1395 
1396         mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1397         if (!mr)
1398                 return ERR_PTR(-ENOMEM);
1399 
1400         mr->type = MR_TYPE_FRMR;
1401 
1402         /* Allocate memory region key */
1403         ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, 0, length,
1404                                 0, max_num_sg, mr);
1405         if (ret)
1406                 goto err_free;
1407 
1408         ret = hns_roce_mr_enable(hr_dev, mr);
1409         if (ret)
1410                 goto err_mr;
1411 
1412         mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
1413         mr->umem = NULL;
1414 
1415         return &mr->ibmr;
1416 
1417 err_mr:
1418         hns_roce_mr_free(to_hr_dev(pd->device), mr);
1419 
1420 err_free:
1421         kfree(mr);
1422         return ERR_PTR(ret);
1423 }
1424 
1425 static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
1426 {
1427         struct hns_roce_mr *mr = to_hr_mr(ibmr);
1428 
1429         mr->pbl_buf[mr->npages++] = addr;
1430 
1431         return 0;
1432 }
1433 
1434 int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
1435                        unsigned int *sg_offset)
1436 {
1437         struct hns_roce_mr *mr = to_hr_mr(ibmr);
1438 
1439         mr->npages = 0;
1440 
1441         return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
1442 }
1443 
1444 static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
1445                              struct hns_roce_mw *mw)
1446 {
1447         struct device *dev = hr_dev->dev;
1448         int ret;
1449 
1450         if (mw->enabled) {
1451                 ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mw->rkey)
1452                                          & (hr_dev->caps.num_mtpts - 1));
1453                 if (ret)
1454                         dev_warn(dev, "MW HW2SW_MPT failed (%d)\n", ret);
1455 
1456                 hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
1457                                    key_to_hw_index(mw->rkey));
1458         }
1459 
1460         hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
1461                              key_to_hw_index(mw->rkey), BITMAP_NO_RR);
1462 }
1463 
1464 static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev,
1465                               struct hns_roce_mw *mw)
1466 {
1467         struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
1468         struct hns_roce_cmd_mailbox *mailbox;
1469         struct device *dev = hr_dev->dev;
1470         unsigned long mtpt_idx = key_to_hw_index(mw->rkey);
1471         int ret;
1472 
1473         /* prepare HEM entry memory */
1474         ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
1475         if (ret)
1476                 return ret;
1477 
1478         mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
1479         if (IS_ERR(mailbox)) {
1480                 ret = PTR_ERR(mailbox);
1481                 goto err_table;
1482         }
1483 
1484         ret = hr_dev->hw->mw_write_mtpt(mailbox->buf, mw);
1485         if (ret) {
1486                 dev_err(dev, "MW write mtpt fail!\n");
1487                 goto err_page;
1488         }
1489 
1490         ret = hns_roce_sw2hw_mpt(hr_dev, mailbox,
1491                                  mtpt_idx & (hr_dev->caps.num_mtpts - 1));
1492         if (ret) {
1493                 dev_err(dev, "MW sw2hw_mpt failed (%d)\n", ret);
1494                 goto err_page;
1495         }
1496 
1497         mw->enabled = 1;
1498 
1499         hns_roce_free_cmd_mailbox(hr_dev, mailbox);
1500 
1501         return 0;
1502 
1503 err_page:
1504         hns_roce_free_cmd_mailbox(hr_dev, mailbox);
1505 
1506 err_table:
1507         hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
1508 
1509         return ret;
1510 }
1511 
1512 struct ib_mw *hns_roce_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
1513                                 struct ib_udata *udata)
1514 {
1515         struct hns_roce_dev *hr_dev = to_hr_dev(ib_pd->device);
1516         struct hns_roce_mw *mw;
1517         unsigned long index = 0;
1518         int ret;
1519 
1520         mw = kmalloc(sizeof(*mw), GFP_KERNEL);
1521         if (!mw)
1522                 return ERR_PTR(-ENOMEM);
1523 
1524         /* Allocate a key for mw from bitmap */
1525         ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
1526         if (ret)
1527                 goto err_bitmap;
1528 
1529         mw->rkey = hw_index_to_key(index);
1530 
1531         mw->ibmw.rkey = mw->rkey;
1532         mw->ibmw.type = type;
1533         mw->pdn = to_hr_pd(ib_pd)->pdn;
1534         mw->pbl_hop_num = hr_dev->caps.pbl_hop_num;
1535         mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
1536         mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
1537 
1538         ret = hns_roce_mw_enable(hr_dev, mw);
1539         if (ret)
1540                 goto err_mw;
1541 
1542         return &mw->ibmw;
1543 
1544 err_mw:
1545         hns_roce_mw_free(hr_dev, mw);
1546 
1547 err_bitmap:
1548         kfree(mw);
1549 
1550         return ERR_PTR(ret);
1551 }
1552 
1553 int hns_roce_dealloc_mw(struct ib_mw *ibmw)
1554 {
1555         struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device);
1556         struct hns_roce_mw *mw = to_hr_mw(ibmw);
1557 
1558         hns_roce_mw_free(hr_dev, mw);
1559         kfree(mw);
1560 
1561         return 0;
1562 }
1563 
1564 void hns_roce_mtr_init(struct hns_roce_mtr *mtr, int bt_pg_shift,
1565                        int buf_pg_shift)
1566 {
1567         hns_roce_hem_list_init(&mtr->hem_list, bt_pg_shift);
1568         mtr->buf_pg_shift = buf_pg_shift;
1569 }
1570 
1571 void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev,
1572                           struct hns_roce_mtr *mtr)
1573 {
1574         hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
1575 }
1576 
1577 static int hns_roce_write_mtr(struct hns_roce_dev *hr_dev,
1578                               struct hns_roce_mtr *mtr, dma_addr_t *bufs,
1579                               struct hns_roce_buf_region *r)
1580 {
1581         int offset;
1582         int count;
1583         int npage;
1584         u64 *mtts;
1585         int end;
1586         int i;
1587 
1588         offset = r->offset;
1589         end = offset + r->count;
1590         npage = 0;
1591         while (offset < end) {
1592                 mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
1593                                                   offset, &count, NULL);
1594                 if (!mtts)
1595                         return -ENOBUFS;
1596 
1597                 /* Save page addr, low 12 bits : 0 */
1598                 for (i = 0; i < count; i++) {
1599                         if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
1600                                 mtts[i] = bufs[npage] >> PAGE_ADDR_SHIFT;
1601                         else
1602                                 mtts[i] = bufs[npage];
1603 
1604                         npage++;
1605                 }
1606                 offset += count;
1607         }
1608 
1609         return 0;
1610 }
1611 
1612 int hns_roce_mtr_attach(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
1613                         dma_addr_t **bufs, struct hns_roce_buf_region *regions,
1614                         int region_cnt)
1615 {
1616         struct hns_roce_buf_region *r;
1617         int ret;
1618         int i;
1619 
1620         ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, regions,
1621                                         region_cnt);
1622         if (ret)
1623                 return ret;
1624 
1625         for (i = 0; i < region_cnt; i++) {
1626                 r = &regions[i];
1627                 ret = hns_roce_write_mtr(hr_dev, mtr, bufs[i], r);
1628                 if (ret) {
1629                         dev_err(hr_dev->dev,
1630                                 "write mtr[%d/%d] err %d,offset=%d.\n",
1631                                 i, region_cnt, ret,  r->offset);
1632                         goto err_write;
1633                 }
1634         }
1635 
1636         return 0;
1637 
1638 err_write:
1639         hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
1640 
1641         return ret;
1642 }
1643 
1644 int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
1645                       int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr)
1646 {
1647         u64 *mtts = mtt_buf;
1648         int mtt_count;
1649         int total = 0;
1650         u64 *addr;
1651         int npage;
1652         int left;
1653 
1654         if (mtts == NULL || mtt_max < 1)
1655                 goto done;
1656 
1657         left = mtt_max;
1658         while (left > 0) {
1659                 mtt_count = 0;
1660                 addr = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
1661                                                   offset + total,
1662                                                   &mtt_count, NULL);
1663                 if (!addr || !mtt_count)
1664                         goto done;
1665 
1666                 npage = min(mtt_count, left);
1667                 memcpy(&mtts[total], addr, BA_BYTE_LEN * npage);
1668                 left -= npage;
1669                 total += npage;
1670         }
1671 
1672 done:
1673         if (base_addr)
1674                 *base_addr = mtr->hem_list.root_ba;
1675 
1676         return total;
1677 }

/* [<][>][^][v][top][bottom][index][help] */