root/drivers/infiniband/hw/efa/efa_verbs.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. get_mmap_key
  2. EFA_DEFINE_STATS
  3. to_edev
  4. to_eucontext
  5. to_epd
  6. to_emr
  7. to_eqp
  8. to_ecq
  9. to_eah
  10. efa_zalloc_mapped
  11. mmap_entries_remove_free
  12. mmap_entry_get
  13. mmap_entry_insert
  14. efa_query_device
  15. efa_query_port
  16. efa_query_qp
  17. efa_query_gid
  18. efa_query_pkey
  19. efa_pd_dealloc
  20. efa_alloc_pd
  21. efa_dealloc_pd
  22. efa_destroy_qp_handle
  23. efa_destroy_qp
  24. qp_mmap_entries_setup
  25. efa_qp_validate_cap
  26. efa_qp_validate_attr
  27. efa_create_qp
  28. efa_modify_qp_validate
  29. efa_modify_qp
  30. efa_destroy_cq_idx
  31. efa_destroy_cq
  32. cq_mmap_entries_setup
  33. efa_create_cq
  34. umem_to_page_list
  35. efa_vmalloc_buf_to_sg
  36. pbl_chunk_list_create
  37. pbl_chunk_list_destroy
  38. pbl_continuous_initialize
  39. pbl_indirect_initialize
  40. pbl_indirect_terminate
  41. pbl_create
  42. pbl_destroy
  43. efa_create_inline_pbl
  44. efa_create_pbl
  45. efa_reg_mr
  46. efa_dereg_mr
  47. efa_get_port_immutable
  48. efa_dealloc_uar
  49. efa_alloc_ucontext
  50. efa_dealloc_ucontext
  51. __efa_mmap
  52. efa_mmap
  53. efa_ah_destroy
  54. efa_create_ah
  55. efa_destroy_ah
  56. efa_alloc_hw_stats
  57. efa_get_hw_stats
  58. efa_port_link_layer

   1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2 /*
   3  * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
   4  */
   5 
   6 #include <linux/vmalloc.h>
   7 
   8 #include <rdma/ib_addr.h>
   9 #include <rdma/ib_umem.h>
  10 #include <rdma/ib_user_verbs.h>
  11 #include <rdma/ib_verbs.h>
  12 #include <rdma/uverbs_ioctl.h>
  13 
  14 #include "efa.h"
  15 
  16 #define EFA_MMAP_FLAG_SHIFT 56
  17 #define EFA_MMAP_PAGE_MASK GENMASK(EFA_MMAP_FLAG_SHIFT - 1, 0)
  18 #define EFA_MMAP_INVALID U64_MAX
  19 
  20 enum {
  21         EFA_MMAP_DMA_PAGE = 0,
  22         EFA_MMAP_IO_WC,
  23         EFA_MMAP_IO_NC,
  24 };
  25 
  26 #define EFA_AENQ_ENABLED_GROUPS \
  27         (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
  28          BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
  29 
  30 struct efa_mmap_entry {
  31         void  *obj;
  32         u64 address;
  33         u64 length;
  34         u32 mmap_page;
  35         u8 mmap_flag;
  36 };
  37 
  38 static inline u64 get_mmap_key(const struct efa_mmap_entry *efa)
  39 {
  40         return ((u64)efa->mmap_flag << EFA_MMAP_FLAG_SHIFT) |
  41                ((u64)efa->mmap_page << PAGE_SHIFT);
  42 }
  43 
  44 #define EFA_DEFINE_STATS(op) \
  45         op(EFA_TX_BYTES, "tx_bytes") \
  46         op(EFA_TX_PKTS, "tx_pkts") \
  47         op(EFA_RX_BYTES, "rx_bytes") \
  48         op(EFA_RX_PKTS, "rx_pkts") \
  49         op(EFA_RX_DROPS, "rx_drops") \
  50         op(EFA_SUBMITTED_CMDS, "submitted_cmds") \
  51         op(EFA_COMPLETED_CMDS, "completed_cmds") \
  52         op(EFA_NO_COMPLETION_CMDS, "no_completion_cmds") \
  53         op(EFA_KEEP_ALIVE_RCVD, "keep_alive_rcvd") \
  54         op(EFA_ALLOC_PD_ERR, "alloc_pd_err") \
  55         op(EFA_CREATE_QP_ERR, "create_qp_err") \
  56         op(EFA_REG_MR_ERR, "reg_mr_err") \
  57         op(EFA_ALLOC_UCONTEXT_ERR, "alloc_ucontext_err") \
  58         op(EFA_CREATE_AH_ERR, "create_ah_err")
  59 
  60 #define EFA_STATS_ENUM(ename, name) ename,
  61 #define EFA_STATS_STR(ename, name) [ename] = name,
  62 
  63 enum efa_hw_stats {
  64         EFA_DEFINE_STATS(EFA_STATS_ENUM)
  65 };
  66 
  67 static const char *const efa_stats_names[] = {
  68         EFA_DEFINE_STATS(EFA_STATS_STR)
  69 };
  70 
  71 #define EFA_CHUNK_PAYLOAD_SHIFT       12
  72 #define EFA_CHUNK_PAYLOAD_SIZE        BIT(EFA_CHUNK_PAYLOAD_SHIFT)
  73 #define EFA_CHUNK_PAYLOAD_PTR_SIZE    8
  74 
  75 #define EFA_CHUNK_SHIFT               12
  76 #define EFA_CHUNK_SIZE                BIT(EFA_CHUNK_SHIFT)
  77 #define EFA_CHUNK_PTR_SIZE            sizeof(struct efa_com_ctrl_buff_info)
  78 
  79 #define EFA_PTRS_PER_CHUNK \
  80         ((EFA_CHUNK_SIZE - EFA_CHUNK_PTR_SIZE) / EFA_CHUNK_PAYLOAD_PTR_SIZE)
  81 
  82 #define EFA_CHUNK_USED_SIZE \
  83         ((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE)
  84 
  85 #define EFA_SUPPORTED_ACCESS_FLAGS IB_ACCESS_LOCAL_WRITE
  86 
  87 struct pbl_chunk {
  88         dma_addr_t dma_addr;
  89         u64 *buf;
  90         u32 length;
  91 };
  92 
  93 struct pbl_chunk_list {
  94         struct pbl_chunk *chunks;
  95         unsigned int size;
  96 };
  97 
  98 struct pbl_context {
  99         union {
 100                 struct {
 101                         dma_addr_t dma_addr;
 102                 } continuous;
 103                 struct {
 104                         u32 pbl_buf_size_in_pages;
 105                         struct scatterlist *sgl;
 106                         int sg_dma_cnt;
 107                         struct pbl_chunk_list chunk_list;
 108                 } indirect;
 109         } phys;
 110         u64 *pbl_buf;
 111         u32 pbl_buf_size_in_bytes;
 112         u8 physically_continuous;
 113 };
 114 
 115 static inline struct efa_dev *to_edev(struct ib_device *ibdev)
 116 {
 117         return container_of(ibdev, struct efa_dev, ibdev);
 118 }
 119 
 120 static inline struct efa_ucontext *to_eucontext(struct ib_ucontext *ibucontext)
 121 {
 122         return container_of(ibucontext, struct efa_ucontext, ibucontext);
 123 }
 124 
 125 static inline struct efa_pd *to_epd(struct ib_pd *ibpd)
 126 {
 127         return container_of(ibpd, struct efa_pd, ibpd);
 128 }
 129 
 130 static inline struct efa_mr *to_emr(struct ib_mr *ibmr)
 131 {
 132         return container_of(ibmr, struct efa_mr, ibmr);
 133 }
 134 
 135 static inline struct efa_qp *to_eqp(struct ib_qp *ibqp)
 136 {
 137         return container_of(ibqp, struct efa_qp, ibqp);
 138 }
 139 
 140 static inline struct efa_cq *to_ecq(struct ib_cq *ibcq)
 141 {
 142         return container_of(ibcq, struct efa_cq, ibcq);
 143 }
 144 
 145 static inline struct efa_ah *to_eah(struct ib_ah *ibah)
 146 {
 147         return container_of(ibah, struct efa_ah, ibah);
 148 }
 149 
 150 #define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \
 151                                  FIELD_SIZEOF(typeof(x), fld) <= (sz))
 152 
 153 #define is_reserved_cleared(reserved) \
 154         !memchr_inv(reserved, 0, sizeof(reserved))
 155 
 156 static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr,
 157                                size_t size, enum dma_data_direction dir)
 158 {
 159         void *addr;
 160 
 161         addr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
 162         if (!addr)
 163                 return NULL;
 164 
 165         *dma_addr = dma_map_single(&dev->pdev->dev, addr, size, dir);
 166         if (dma_mapping_error(&dev->pdev->dev, *dma_addr)) {
 167                 ibdev_err(&dev->ibdev, "Failed to map DMA address\n");
 168                 free_pages_exact(addr, size);
 169                 return NULL;
 170         }
 171 
 172         return addr;
 173 }
 174 
 175 /*
 176  * This is only called when the ucontext is destroyed and there can be no
 177  * concurrent query via mmap or allocate on the xarray, thus we can be sure no
 178  * other thread is using the entry pointer. We also know that all the BAR
 179  * pages have either been zap'd or munmaped at this point.  Normal pages are
 180  * refcounted and will be freed at the proper time.
 181  */
 182 static void mmap_entries_remove_free(struct efa_dev *dev,
 183                                      struct efa_ucontext *ucontext)
 184 {
 185         struct efa_mmap_entry *entry;
 186         unsigned long mmap_page;
 187 
 188         xa_for_each(&ucontext->mmap_xa, mmap_page, entry) {
 189                 xa_erase(&ucontext->mmap_xa, mmap_page);
 190 
 191                 ibdev_dbg(
 192                         &dev->ibdev,
 193                         "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
 194                         entry->obj, get_mmap_key(entry), entry->address,
 195                         entry->length);
 196                 if (entry->mmap_flag == EFA_MMAP_DMA_PAGE)
 197                         /* DMA mapping is already gone, now free the pages */
 198                         free_pages_exact(phys_to_virt(entry->address),
 199                                          entry->length);
 200                 kfree(entry);
 201         }
 202 }
 203 
 204 static struct efa_mmap_entry *mmap_entry_get(struct efa_dev *dev,
 205                                              struct efa_ucontext *ucontext,
 206                                              u64 key, u64 len)
 207 {
 208         struct efa_mmap_entry *entry;
 209         u64 mmap_page;
 210 
 211         mmap_page = (key & EFA_MMAP_PAGE_MASK) >> PAGE_SHIFT;
 212         if (mmap_page > U32_MAX)
 213                 return NULL;
 214 
 215         entry = xa_load(&ucontext->mmap_xa, mmap_page);
 216         if (!entry || get_mmap_key(entry) != key || entry->length != len)
 217                 return NULL;
 218 
 219         ibdev_dbg(&dev->ibdev,
 220                   "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
 221                   entry->obj, key, entry->address, entry->length);
 222 
 223         return entry;
 224 }
 225 
 226 /*
 227  * Note this locking scheme cannot support removal of entries, except during
 228  * ucontext destruction when the core code guarentees no concurrency.
 229  */
 230 static u64 mmap_entry_insert(struct efa_dev *dev, struct efa_ucontext *ucontext,
 231                              void *obj, u64 address, u64 length, u8 mmap_flag)
 232 {
 233         struct efa_mmap_entry *entry;
 234         u32 next_mmap_page;
 235         int err;
 236 
 237         entry = kmalloc(sizeof(*entry), GFP_KERNEL);
 238         if (!entry)
 239                 return EFA_MMAP_INVALID;
 240 
 241         entry->obj = obj;
 242         entry->address = address;
 243         entry->length = length;
 244         entry->mmap_flag = mmap_flag;
 245 
 246         xa_lock(&ucontext->mmap_xa);
 247         if (check_add_overflow(ucontext->mmap_xa_page,
 248                                (u32)(length >> PAGE_SHIFT),
 249                                &next_mmap_page))
 250                 goto err_unlock;
 251 
 252         entry->mmap_page = ucontext->mmap_xa_page;
 253         ucontext->mmap_xa_page = next_mmap_page;
 254         err = __xa_insert(&ucontext->mmap_xa, entry->mmap_page, entry,
 255                           GFP_KERNEL);
 256         if (err)
 257                 goto err_unlock;
 258 
 259         xa_unlock(&ucontext->mmap_xa);
 260 
 261         ibdev_dbg(
 262                 &dev->ibdev,
 263                 "mmap: obj[0x%p] addr[%#llx], len[%#llx], key[%#llx] inserted\n",
 264                 entry->obj, entry->address, entry->length, get_mmap_key(entry));
 265 
 266         return get_mmap_key(entry);
 267 
 268 err_unlock:
 269         xa_unlock(&ucontext->mmap_xa);
 270         kfree(entry);
 271         return EFA_MMAP_INVALID;
 272 
 273 }
 274 
 275 int efa_query_device(struct ib_device *ibdev,
 276                      struct ib_device_attr *props,
 277                      struct ib_udata *udata)
 278 {
 279         struct efa_com_get_device_attr_result *dev_attr;
 280         struct efa_ibv_ex_query_device_resp resp = {};
 281         struct efa_dev *dev = to_edev(ibdev);
 282         int err;
 283 
 284         if (udata && udata->inlen &&
 285             !ib_is_udata_cleared(udata, 0, udata->inlen)) {
 286                 ibdev_dbg(ibdev,
 287                           "Incompatible ABI params, udata not cleared\n");
 288                 return -EINVAL;
 289         }
 290 
 291         dev_attr = &dev->dev_attr;
 292 
 293         memset(props, 0, sizeof(*props));
 294         props->max_mr_size = dev_attr->max_mr_pages * PAGE_SIZE;
 295         props->page_size_cap = dev_attr->page_size_cap;
 296         props->vendor_id = dev->pdev->vendor;
 297         props->vendor_part_id = dev->pdev->device;
 298         props->hw_ver = dev->pdev->subsystem_device;
 299         props->max_qp = dev_attr->max_qp;
 300         props->max_cq = dev_attr->max_cq;
 301         props->max_pd = dev_attr->max_pd;
 302         props->max_mr = dev_attr->max_mr;
 303         props->max_ah = dev_attr->max_ah;
 304         props->max_cqe = dev_attr->max_cq_depth;
 305         props->max_qp_wr = min_t(u32, dev_attr->max_sq_depth,
 306                                  dev_attr->max_rq_depth);
 307         props->max_send_sge = dev_attr->max_sq_sge;
 308         props->max_recv_sge = dev_attr->max_rq_sge;
 309 
 310         if (udata && udata->outlen) {
 311                 resp.max_sq_sge = dev_attr->max_sq_sge;
 312                 resp.max_rq_sge = dev_attr->max_rq_sge;
 313                 resp.max_sq_wr = dev_attr->max_sq_depth;
 314                 resp.max_rq_wr = dev_attr->max_rq_depth;
 315 
 316                 err = ib_copy_to_udata(udata, &resp,
 317                                        min(sizeof(resp), udata->outlen));
 318                 if (err) {
 319                         ibdev_dbg(ibdev,
 320                                   "Failed to copy udata for query_device\n");
 321                         return err;
 322                 }
 323         }
 324 
 325         return 0;
 326 }
 327 
 328 int efa_query_port(struct ib_device *ibdev, u8 port,
 329                    struct ib_port_attr *props)
 330 {
 331         struct efa_dev *dev = to_edev(ibdev);
 332 
 333         props->lmc = 1;
 334 
 335         props->state = IB_PORT_ACTIVE;
 336         props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
 337         props->gid_tbl_len = 1;
 338         props->pkey_tbl_len = 1;
 339         props->active_speed = IB_SPEED_EDR;
 340         props->active_width = IB_WIDTH_4X;
 341         props->max_mtu = ib_mtu_int_to_enum(dev->mtu);
 342         props->active_mtu = ib_mtu_int_to_enum(dev->mtu);
 343         props->max_msg_sz = dev->mtu;
 344         props->max_vl_num = 1;
 345 
 346         return 0;
 347 }
 348 
 349 int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 350                  int qp_attr_mask,
 351                  struct ib_qp_init_attr *qp_init_attr)
 352 {
 353         struct efa_dev *dev = to_edev(ibqp->device);
 354         struct efa_com_query_qp_params params = {};
 355         struct efa_com_query_qp_result result;
 356         struct efa_qp *qp = to_eqp(ibqp);
 357         int err;
 358 
 359 #define EFA_QUERY_QP_SUPP_MASK \
 360         (IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \
 361          IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP)
 362 
 363         if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) {
 364                 ibdev_dbg(&dev->ibdev,
 365                           "Unsupported qp_attr_mask[%#x] supported[%#x]\n",
 366                           qp_attr_mask, EFA_QUERY_QP_SUPP_MASK);
 367                 return -EOPNOTSUPP;
 368         }
 369 
 370         memset(qp_attr, 0, sizeof(*qp_attr));
 371         memset(qp_init_attr, 0, sizeof(*qp_init_attr));
 372 
 373         params.qp_handle = qp->qp_handle;
 374         err = efa_com_query_qp(&dev->edev, &params, &result);
 375         if (err)
 376                 return err;
 377 
 378         qp_attr->qp_state = result.qp_state;
 379         qp_attr->qkey = result.qkey;
 380         qp_attr->sq_psn = result.sq_psn;
 381         qp_attr->sq_draining = result.sq_draining;
 382         qp_attr->port_num = 1;
 383 
 384         qp_attr->cap.max_send_wr = qp->max_send_wr;
 385         qp_attr->cap.max_recv_wr = qp->max_recv_wr;
 386         qp_attr->cap.max_send_sge = qp->max_send_sge;
 387         qp_attr->cap.max_recv_sge = qp->max_recv_sge;
 388         qp_attr->cap.max_inline_data = qp->max_inline_data;
 389 
 390         qp_init_attr->qp_type = ibqp->qp_type;
 391         qp_init_attr->recv_cq = ibqp->recv_cq;
 392         qp_init_attr->send_cq = ibqp->send_cq;
 393         qp_init_attr->qp_context = ibqp->qp_context;
 394         qp_init_attr->cap = qp_attr->cap;
 395 
 396         return 0;
 397 }
 398 
 399 int efa_query_gid(struct ib_device *ibdev, u8 port, int index,
 400                   union ib_gid *gid)
 401 {
 402         struct efa_dev *dev = to_edev(ibdev);
 403 
 404         memcpy(gid->raw, dev->addr, sizeof(dev->addr));
 405 
 406         return 0;
 407 }
 408 
 409 int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
 410                    u16 *pkey)
 411 {
 412         if (index > 0)
 413                 return -EINVAL;
 414 
 415         *pkey = 0xffff;
 416         return 0;
 417 }
 418 
 419 static int efa_pd_dealloc(struct efa_dev *dev, u16 pdn)
 420 {
 421         struct efa_com_dealloc_pd_params params = {
 422                 .pdn = pdn,
 423         };
 424 
 425         return efa_com_dealloc_pd(&dev->edev, &params);
 426 }
 427 
 428 int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 429 {
 430         struct efa_dev *dev = to_edev(ibpd->device);
 431         struct efa_ibv_alloc_pd_resp resp = {};
 432         struct efa_com_alloc_pd_result result;
 433         struct efa_pd *pd = to_epd(ibpd);
 434         int err;
 435 
 436         if (udata->inlen &&
 437             !ib_is_udata_cleared(udata, 0, udata->inlen)) {
 438                 ibdev_dbg(&dev->ibdev,
 439                           "Incompatible ABI params, udata not cleared\n");
 440                 err = -EINVAL;
 441                 goto err_out;
 442         }
 443 
 444         err = efa_com_alloc_pd(&dev->edev, &result);
 445         if (err)
 446                 goto err_out;
 447 
 448         pd->pdn = result.pdn;
 449         resp.pdn = result.pdn;
 450 
 451         if (udata->outlen) {
 452                 err = ib_copy_to_udata(udata, &resp,
 453                                        min(sizeof(resp), udata->outlen));
 454                 if (err) {
 455                         ibdev_dbg(&dev->ibdev,
 456                                   "Failed to copy udata for alloc_pd\n");
 457                         goto err_dealloc_pd;
 458                 }
 459         }
 460 
 461         ibdev_dbg(&dev->ibdev, "Allocated pd[%d]\n", pd->pdn);
 462 
 463         return 0;
 464 
 465 err_dealloc_pd:
 466         efa_pd_dealloc(dev, result.pdn);
 467 err_out:
 468         atomic64_inc(&dev->stats.sw_stats.alloc_pd_err);
 469         return err;
 470 }
 471 
 472 void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 473 {
 474         struct efa_dev *dev = to_edev(ibpd->device);
 475         struct efa_pd *pd = to_epd(ibpd);
 476 
 477         ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn);
 478         efa_pd_dealloc(dev, pd->pdn);
 479 }
 480 
 481 static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle)
 482 {
 483         struct efa_com_destroy_qp_params params = { .qp_handle = qp_handle };
 484 
 485         return efa_com_destroy_qp(&dev->edev, &params);
 486 }
 487 
 488 int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 489 {
 490         struct efa_dev *dev = to_edev(ibqp->pd->device);
 491         struct efa_qp *qp = to_eqp(ibqp);
 492         int err;
 493 
 494         ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num);
 495         err = efa_destroy_qp_handle(dev, qp->qp_handle);
 496         if (err)
 497                 return err;
 498 
 499         if (qp->rq_cpu_addr) {
 500                 ibdev_dbg(&dev->ibdev,
 501                           "qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n",
 502                           qp->rq_cpu_addr, qp->rq_size,
 503                           &qp->rq_dma_addr);
 504                 dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size,
 505                                  DMA_TO_DEVICE);
 506         }
 507 
 508         kfree(qp);
 509         return 0;
 510 }
 511 
 512 static int qp_mmap_entries_setup(struct efa_qp *qp,
 513                                  struct efa_dev *dev,
 514                                  struct efa_ucontext *ucontext,
 515                                  struct efa_com_create_qp_params *params,
 516                                  struct efa_ibv_create_qp_resp *resp)
 517 {
 518         /*
 519          * Once an entry is inserted it might be mmapped, hence cannot be
 520          * cleaned up until dealloc_ucontext.
 521          */
 522         resp->sq_db_mmap_key =
 523                 mmap_entry_insert(dev, ucontext, qp,
 524                                   dev->db_bar_addr + resp->sq_db_offset,
 525                                   PAGE_SIZE, EFA_MMAP_IO_NC);
 526         if (resp->sq_db_mmap_key == EFA_MMAP_INVALID)
 527                 return -ENOMEM;
 528 
 529         resp->sq_db_offset &= ~PAGE_MASK;
 530 
 531         resp->llq_desc_mmap_key =
 532                 mmap_entry_insert(dev, ucontext, qp,
 533                                   dev->mem_bar_addr + resp->llq_desc_offset,
 534                                   PAGE_ALIGN(params->sq_ring_size_in_bytes +
 535                                              (resp->llq_desc_offset & ~PAGE_MASK)),
 536                                   EFA_MMAP_IO_WC);
 537         if (resp->llq_desc_mmap_key == EFA_MMAP_INVALID)
 538                 return -ENOMEM;
 539 
 540         resp->llq_desc_offset &= ~PAGE_MASK;
 541 
 542         if (qp->rq_size) {
 543                 resp->rq_db_mmap_key =
 544                         mmap_entry_insert(dev, ucontext, qp,
 545                                           dev->db_bar_addr + resp->rq_db_offset,
 546                                           PAGE_SIZE, EFA_MMAP_IO_NC);
 547                 if (resp->rq_db_mmap_key == EFA_MMAP_INVALID)
 548                         return -ENOMEM;
 549 
 550                 resp->rq_db_offset &= ~PAGE_MASK;
 551 
 552                 resp->rq_mmap_key =
 553                         mmap_entry_insert(dev, ucontext, qp,
 554                                           virt_to_phys(qp->rq_cpu_addr),
 555                                           qp->rq_size, EFA_MMAP_DMA_PAGE);
 556                 if (resp->rq_mmap_key == EFA_MMAP_INVALID)
 557                         return -ENOMEM;
 558 
 559                 resp->rq_mmap_size = qp->rq_size;
 560         }
 561 
 562         return 0;
 563 }
 564 
 565 static int efa_qp_validate_cap(struct efa_dev *dev,
 566                                struct ib_qp_init_attr *init_attr)
 567 {
 568         if (init_attr->cap.max_send_wr > dev->dev_attr.max_sq_depth) {
 569                 ibdev_dbg(&dev->ibdev,
 570                           "qp: requested send wr[%u] exceeds the max[%u]\n",
 571                           init_attr->cap.max_send_wr,
 572                           dev->dev_attr.max_sq_depth);
 573                 return -EINVAL;
 574         }
 575         if (init_attr->cap.max_recv_wr > dev->dev_attr.max_rq_depth) {
 576                 ibdev_dbg(&dev->ibdev,
 577                           "qp: requested receive wr[%u] exceeds the max[%u]\n",
 578                           init_attr->cap.max_recv_wr,
 579                           dev->dev_attr.max_rq_depth);
 580                 return -EINVAL;
 581         }
 582         if (init_attr->cap.max_send_sge > dev->dev_attr.max_sq_sge) {
 583                 ibdev_dbg(&dev->ibdev,
 584                           "qp: requested sge send[%u] exceeds the max[%u]\n",
 585                           init_attr->cap.max_send_sge, dev->dev_attr.max_sq_sge);
 586                 return -EINVAL;
 587         }
 588         if (init_attr->cap.max_recv_sge > dev->dev_attr.max_rq_sge) {
 589                 ibdev_dbg(&dev->ibdev,
 590                           "qp: requested sge recv[%u] exceeds the max[%u]\n",
 591                           init_attr->cap.max_recv_sge, dev->dev_attr.max_rq_sge);
 592                 return -EINVAL;
 593         }
 594         if (init_attr->cap.max_inline_data > dev->dev_attr.inline_buf_size) {
 595                 ibdev_dbg(&dev->ibdev,
 596                           "qp: requested inline data[%u] exceeds the max[%u]\n",
 597                           init_attr->cap.max_inline_data,
 598                           dev->dev_attr.inline_buf_size);
 599                 return -EINVAL;
 600         }
 601 
 602         return 0;
 603 }
 604 
 605 static int efa_qp_validate_attr(struct efa_dev *dev,
 606                                 struct ib_qp_init_attr *init_attr)
 607 {
 608         if (init_attr->qp_type != IB_QPT_DRIVER &&
 609             init_attr->qp_type != IB_QPT_UD) {
 610                 ibdev_dbg(&dev->ibdev,
 611                           "Unsupported qp type %d\n", init_attr->qp_type);
 612                 return -EOPNOTSUPP;
 613         }
 614 
 615         if (init_attr->srq) {
 616                 ibdev_dbg(&dev->ibdev, "SRQ is not supported\n");
 617                 return -EOPNOTSUPP;
 618         }
 619 
 620         if (init_attr->create_flags) {
 621                 ibdev_dbg(&dev->ibdev, "Unsupported create flags\n");
 622                 return -EOPNOTSUPP;
 623         }
 624 
 625         return 0;
 626 }
 627 
 628 struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
 629                             struct ib_qp_init_attr *init_attr,
 630                             struct ib_udata *udata)
 631 {
 632         struct efa_com_create_qp_params create_qp_params = {};
 633         struct efa_com_create_qp_result create_qp_resp;
 634         struct efa_dev *dev = to_edev(ibpd->device);
 635         struct efa_ibv_create_qp_resp resp = {};
 636         struct efa_ibv_create_qp cmd = {};
 637         bool rq_entry_inserted = false;
 638         struct efa_ucontext *ucontext;
 639         struct efa_qp *qp;
 640         int err;
 641 
 642         ucontext = rdma_udata_to_drv_context(udata, struct efa_ucontext,
 643                                              ibucontext);
 644 
 645         err = efa_qp_validate_cap(dev, init_attr);
 646         if (err)
 647                 goto err_out;
 648 
 649         err = efa_qp_validate_attr(dev, init_attr);
 650         if (err)
 651                 goto err_out;
 652 
 653         if (!field_avail(cmd, driver_qp_type, udata->inlen)) {
 654                 ibdev_dbg(&dev->ibdev,
 655                           "Incompatible ABI params, no input udata\n");
 656                 err = -EINVAL;
 657                 goto err_out;
 658         }
 659 
 660         if (udata->inlen > sizeof(cmd) &&
 661             !ib_is_udata_cleared(udata, sizeof(cmd),
 662                                  udata->inlen - sizeof(cmd))) {
 663                 ibdev_dbg(&dev->ibdev,
 664                           "Incompatible ABI params, unknown fields in udata\n");
 665                 err = -EINVAL;
 666                 goto err_out;
 667         }
 668 
 669         err = ib_copy_from_udata(&cmd, udata,
 670                                  min(sizeof(cmd), udata->inlen));
 671         if (err) {
 672                 ibdev_dbg(&dev->ibdev,
 673                           "Cannot copy udata for create_qp\n");
 674                 goto err_out;
 675         }
 676 
 677         if (cmd.comp_mask) {
 678                 ibdev_dbg(&dev->ibdev,
 679                           "Incompatible ABI params, unknown fields in udata\n");
 680                 err = -EINVAL;
 681                 goto err_out;
 682         }
 683 
 684         qp = kzalloc(sizeof(*qp), GFP_KERNEL);
 685         if (!qp) {
 686                 err = -ENOMEM;
 687                 goto err_out;
 688         }
 689 
 690         create_qp_params.uarn = ucontext->uarn;
 691         create_qp_params.pd = to_epd(ibpd)->pdn;
 692 
 693         if (init_attr->qp_type == IB_QPT_UD) {
 694                 create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_UD;
 695         } else if (cmd.driver_qp_type == EFA_QP_DRIVER_TYPE_SRD) {
 696                 create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_SRD;
 697         } else {
 698                 ibdev_dbg(&dev->ibdev,
 699                           "Unsupported qp type %d driver qp type %d\n",
 700                           init_attr->qp_type, cmd.driver_qp_type);
 701                 err = -EOPNOTSUPP;
 702                 goto err_free_qp;
 703         }
 704 
 705         ibdev_dbg(&dev->ibdev, "Create QP: qp type %d driver qp type %#x\n",
 706                   init_attr->qp_type, cmd.driver_qp_type);
 707         create_qp_params.send_cq_idx = to_ecq(init_attr->send_cq)->cq_idx;
 708         create_qp_params.recv_cq_idx = to_ecq(init_attr->recv_cq)->cq_idx;
 709         create_qp_params.sq_depth = init_attr->cap.max_send_wr;
 710         create_qp_params.sq_ring_size_in_bytes = cmd.sq_ring_size;
 711 
 712         create_qp_params.rq_depth = init_attr->cap.max_recv_wr;
 713         create_qp_params.rq_ring_size_in_bytes = cmd.rq_ring_size;
 714         qp->rq_size = PAGE_ALIGN(create_qp_params.rq_ring_size_in_bytes);
 715         if (qp->rq_size) {
 716                 qp->rq_cpu_addr = efa_zalloc_mapped(dev, &qp->rq_dma_addr,
 717                                                     qp->rq_size, DMA_TO_DEVICE);
 718                 if (!qp->rq_cpu_addr) {
 719                         err = -ENOMEM;
 720                         goto err_free_qp;
 721                 }
 722 
 723                 ibdev_dbg(&dev->ibdev,
 724                           "qp->cpu_addr[0x%p] allocated: size[%lu], dma[%pad]\n",
 725                           qp->rq_cpu_addr, qp->rq_size, &qp->rq_dma_addr);
 726                 create_qp_params.rq_base_addr = qp->rq_dma_addr;
 727         }
 728 
 729         err = efa_com_create_qp(&dev->edev, &create_qp_params,
 730                                 &create_qp_resp);
 731         if (err)
 732                 goto err_free_mapped;
 733 
 734         resp.sq_db_offset = create_qp_resp.sq_db_offset;
 735         resp.rq_db_offset = create_qp_resp.rq_db_offset;
 736         resp.llq_desc_offset = create_qp_resp.llq_descriptors_offset;
 737         resp.send_sub_cq_idx = create_qp_resp.send_sub_cq_idx;
 738         resp.recv_sub_cq_idx = create_qp_resp.recv_sub_cq_idx;
 739 
 740         err = qp_mmap_entries_setup(qp, dev, ucontext, &create_qp_params,
 741                                     &resp);
 742         if (err)
 743                 goto err_destroy_qp;
 744 
 745         rq_entry_inserted = true;
 746         qp->qp_handle = create_qp_resp.qp_handle;
 747         qp->ibqp.qp_num = create_qp_resp.qp_num;
 748         qp->ibqp.qp_type = init_attr->qp_type;
 749         qp->max_send_wr = init_attr->cap.max_send_wr;
 750         qp->max_recv_wr = init_attr->cap.max_recv_wr;
 751         qp->max_send_sge = init_attr->cap.max_send_sge;
 752         qp->max_recv_sge = init_attr->cap.max_recv_sge;
 753         qp->max_inline_data = init_attr->cap.max_inline_data;
 754 
 755         if (udata->outlen) {
 756                 err = ib_copy_to_udata(udata, &resp,
 757                                        min(sizeof(resp), udata->outlen));
 758                 if (err) {
 759                         ibdev_dbg(&dev->ibdev,
 760                                   "Failed to copy udata for qp[%u]\n",
 761                                   create_qp_resp.qp_num);
 762                         goto err_destroy_qp;
 763                 }
 764         }
 765 
 766         ibdev_dbg(&dev->ibdev, "Created qp[%d]\n", qp->ibqp.qp_num);
 767 
 768         return &qp->ibqp;
 769 
 770 err_destroy_qp:
 771         efa_destroy_qp_handle(dev, create_qp_resp.qp_handle);
 772 err_free_mapped:
 773         if (qp->rq_size) {
 774                 dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size,
 775                                  DMA_TO_DEVICE);
 776                 if (!rq_entry_inserted)
 777                         free_pages_exact(qp->rq_cpu_addr, qp->rq_size);
 778         }
 779 err_free_qp:
 780         kfree(qp);
 781 err_out:
 782         atomic64_inc(&dev->stats.sw_stats.create_qp_err);
 783         return ERR_PTR(err);
 784 }
 785 
 786 static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp,
 787                                   struct ib_qp_attr *qp_attr, int qp_attr_mask,
 788                                   enum ib_qp_state cur_state,
 789                                   enum ib_qp_state new_state)
 790 {
 791 #define EFA_MODIFY_QP_SUPP_MASK \
 792         (IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \
 793          IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN)
 794 
 795         if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) {
 796                 ibdev_dbg(&dev->ibdev,
 797                           "Unsupported qp_attr_mask[%#x] supported[%#x]\n",
 798                           qp_attr_mask, EFA_MODIFY_QP_SUPP_MASK);
 799                 return -EOPNOTSUPP;
 800         }
 801 
 802         if (!ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD,
 803                                 qp_attr_mask)) {
 804                 ibdev_dbg(&dev->ibdev, "Invalid modify QP parameters\n");
 805                 return -EINVAL;
 806         }
 807 
 808         if ((qp_attr_mask & IB_QP_PORT) && qp_attr->port_num != 1) {
 809                 ibdev_dbg(&dev->ibdev, "Can't change port num\n");
 810                 return -EOPNOTSUPP;
 811         }
 812 
 813         if ((qp_attr_mask & IB_QP_PKEY_INDEX) && qp_attr->pkey_index) {
 814                 ibdev_dbg(&dev->ibdev, "Can't change pkey index\n");
 815                 return -EOPNOTSUPP;
 816         }
 817 
 818         return 0;
 819 }
 820 
 821 int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 822                   int qp_attr_mask, struct ib_udata *udata)
 823 {
 824         struct efa_dev *dev = to_edev(ibqp->device);
 825         struct efa_com_modify_qp_params params = {};
 826         struct efa_qp *qp = to_eqp(ibqp);
 827         enum ib_qp_state cur_state;
 828         enum ib_qp_state new_state;
 829         int err;
 830 
 831         if (udata->inlen &&
 832             !ib_is_udata_cleared(udata, 0, udata->inlen)) {
 833                 ibdev_dbg(&dev->ibdev,
 834                           "Incompatible ABI params, udata not cleared\n");
 835                 return -EINVAL;
 836         }
 837 
 838         cur_state = qp_attr_mask & IB_QP_CUR_STATE ? qp_attr->cur_qp_state :
 839                                                      qp->state;
 840         new_state = qp_attr_mask & IB_QP_STATE ? qp_attr->qp_state : cur_state;
 841 
 842         err = efa_modify_qp_validate(dev, qp, qp_attr, qp_attr_mask, cur_state,
 843                                      new_state);
 844         if (err)
 845                 return err;
 846 
 847         params.qp_handle = qp->qp_handle;
 848 
 849         if (qp_attr_mask & IB_QP_STATE) {
 850                 params.modify_mask |= BIT(EFA_ADMIN_QP_STATE_BIT) |
 851                                       BIT(EFA_ADMIN_CUR_QP_STATE_BIT);
 852                 params.cur_qp_state = qp_attr->cur_qp_state;
 853                 params.qp_state = qp_attr->qp_state;
 854         }
 855 
 856         if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
 857                 params.modify_mask |=
 858                         BIT(EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT);
 859                 params.sq_drained_async_notify = qp_attr->en_sqd_async_notify;
 860         }
 861 
 862         if (qp_attr_mask & IB_QP_QKEY) {
 863                 params.modify_mask |= BIT(EFA_ADMIN_QKEY_BIT);
 864                 params.qkey = qp_attr->qkey;
 865         }
 866 
 867         if (qp_attr_mask & IB_QP_SQ_PSN) {
 868                 params.modify_mask |= BIT(EFA_ADMIN_SQ_PSN_BIT);
 869                 params.sq_psn = qp_attr->sq_psn;
 870         }
 871 
 872         err = efa_com_modify_qp(&dev->edev, &params);
 873         if (err)
 874                 return err;
 875 
 876         qp->state = new_state;
 877 
 878         return 0;
 879 }
 880 
 881 static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx)
 882 {
 883         struct efa_com_destroy_cq_params params = { .cq_idx = cq_idx };
 884 
 885         return efa_com_destroy_cq(&dev->edev, &params);
 886 }
 887 
 888 void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
 889 {
 890         struct efa_dev *dev = to_edev(ibcq->device);
 891         struct efa_cq *cq = to_ecq(ibcq);
 892 
 893         ibdev_dbg(&dev->ibdev,
 894                   "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n",
 895                   cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr);
 896 
 897         efa_destroy_cq_idx(dev, cq->cq_idx);
 898         dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size,
 899                          DMA_FROM_DEVICE);
 900 }
 901 
 902 static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq,
 903                                  struct efa_ibv_create_cq_resp *resp)
 904 {
 905         resp->q_mmap_size = cq->size;
 906         resp->q_mmap_key = mmap_entry_insert(dev, cq->ucontext, cq,
 907                                              virt_to_phys(cq->cpu_addr),
 908                                              cq->size, EFA_MMAP_DMA_PAGE);
 909         if (resp->q_mmap_key == EFA_MMAP_INVALID)
 910                 return -ENOMEM;
 911 
 912         return 0;
 913 }
 914 
 915 int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 916                   struct ib_udata *udata)
 917 {
 918         struct efa_ucontext *ucontext = rdma_udata_to_drv_context(
 919                 udata, struct efa_ucontext, ibucontext);
 920         struct efa_ibv_create_cq_resp resp = {};
 921         struct efa_com_create_cq_params params;
 922         struct efa_com_create_cq_result result;
 923         struct ib_device *ibdev = ibcq->device;
 924         struct efa_dev *dev = to_edev(ibdev);
 925         struct efa_ibv_create_cq cmd = {};
 926         struct efa_cq *cq = to_ecq(ibcq);
 927         bool cq_entry_inserted = false;
 928         int entries = attr->cqe;
 929         int err;
 930 
 931         ibdev_dbg(ibdev, "create_cq entries %d\n", entries);
 932 
 933         if (entries < 1 || entries > dev->dev_attr.max_cq_depth) {
 934                 ibdev_dbg(ibdev,
 935                           "cq: requested entries[%u] non-positive or greater than max[%u]\n",
 936                           entries, dev->dev_attr.max_cq_depth);
 937                 err = -EINVAL;
 938                 goto err_out;
 939         }
 940 
 941         if (!field_avail(cmd, num_sub_cqs, udata->inlen)) {
 942                 ibdev_dbg(ibdev,
 943                           "Incompatible ABI params, no input udata\n");
 944                 err = -EINVAL;
 945                 goto err_out;
 946         }
 947 
 948         if (udata->inlen > sizeof(cmd) &&
 949             !ib_is_udata_cleared(udata, sizeof(cmd),
 950                                  udata->inlen - sizeof(cmd))) {
 951                 ibdev_dbg(ibdev,
 952                           "Incompatible ABI params, unknown fields in udata\n");
 953                 err = -EINVAL;
 954                 goto err_out;
 955         }
 956 
 957         err = ib_copy_from_udata(&cmd, udata,
 958                                  min(sizeof(cmd), udata->inlen));
 959         if (err) {
 960                 ibdev_dbg(ibdev, "Cannot copy udata for create_cq\n");
 961                 goto err_out;
 962         }
 963 
 964         if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_50)) {
 965                 ibdev_dbg(ibdev,
 966                           "Incompatible ABI params, unknown fields in udata\n");
 967                 err = -EINVAL;
 968                 goto err_out;
 969         }
 970 
 971         if (!cmd.cq_entry_size) {
 972                 ibdev_dbg(ibdev,
 973                           "Invalid entry size [%u]\n", cmd.cq_entry_size);
 974                 err = -EINVAL;
 975                 goto err_out;
 976         }
 977 
 978         if (cmd.num_sub_cqs != dev->dev_attr.sub_cqs_per_cq) {
 979                 ibdev_dbg(ibdev,
 980                           "Invalid number of sub cqs[%u] expected[%u]\n",
 981                           cmd.num_sub_cqs, dev->dev_attr.sub_cqs_per_cq);
 982                 err = -EINVAL;
 983                 goto err_out;
 984         }
 985 
 986         cq->ucontext = ucontext;
 987         cq->size = PAGE_ALIGN(cmd.cq_entry_size * entries * cmd.num_sub_cqs);
 988         cq->cpu_addr = efa_zalloc_mapped(dev, &cq->dma_addr, cq->size,
 989                                          DMA_FROM_DEVICE);
 990         if (!cq->cpu_addr) {
 991                 err = -ENOMEM;
 992                 goto err_out;
 993         }
 994 
 995         params.uarn = cq->ucontext->uarn;
 996         params.cq_depth = entries;
 997         params.dma_addr = cq->dma_addr;
 998         params.entry_size_in_bytes = cmd.cq_entry_size;
 999         params.num_sub_cqs = cmd.num_sub_cqs;
1000         err = efa_com_create_cq(&dev->edev, &params, &result);
1001         if (err)
1002                 goto err_free_mapped;
1003 
1004         resp.cq_idx = result.cq_idx;
1005         cq->cq_idx = result.cq_idx;
1006         cq->ibcq.cqe = result.actual_depth;
1007         WARN_ON_ONCE(entries != result.actual_depth);
1008 
1009         err = cq_mmap_entries_setup(dev, cq, &resp);
1010         if (err) {
1011                 ibdev_dbg(ibdev, "Could not setup cq[%u] mmap entries\n",
1012                           cq->cq_idx);
1013                 goto err_destroy_cq;
1014         }
1015 
1016         cq_entry_inserted = true;
1017 
1018         if (udata->outlen) {
1019                 err = ib_copy_to_udata(udata, &resp,
1020                                        min(sizeof(resp), udata->outlen));
1021                 if (err) {
1022                         ibdev_dbg(ibdev,
1023                                   "Failed to copy udata for create_cq\n");
1024                         goto err_destroy_cq;
1025                 }
1026         }
1027 
1028         ibdev_dbg(ibdev, "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n",
1029                   cq->cq_idx, result.actual_depth, &cq->dma_addr, cq->cpu_addr);
1030 
1031         return 0;
1032 
1033 err_destroy_cq:
1034         efa_destroy_cq_idx(dev, cq->cq_idx);
1035 err_free_mapped:
1036         dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size,
1037                          DMA_FROM_DEVICE);
1038         if (!cq_entry_inserted)
1039                 free_pages_exact(cq->cpu_addr, cq->size);
1040 err_out:
1041         atomic64_inc(&dev->stats.sw_stats.create_cq_err);
1042         return err;
1043 }
1044 
1045 static int umem_to_page_list(struct efa_dev *dev,
1046                              struct ib_umem *umem,
1047                              u64 *page_list,
1048                              u32 hp_cnt,
1049                              u8 hp_shift)
1050 {
1051         u32 pages_in_hp = BIT(hp_shift - PAGE_SHIFT);
1052         struct ib_block_iter biter;
1053         unsigned int hp_idx = 0;
1054 
1055         ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n",
1056                   hp_cnt, pages_in_hp);
1057 
1058         rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap,
1059                             BIT(hp_shift))
1060                 page_list[hp_idx++] = rdma_block_iter_dma_address(&biter);
1061 
1062         return 0;
1063 }
1064 
1065 static struct scatterlist *efa_vmalloc_buf_to_sg(u64 *buf, int page_cnt)
1066 {
1067         struct scatterlist *sglist;
1068         struct page *pg;
1069         int i;
1070 
1071         sglist = kcalloc(page_cnt, sizeof(*sglist), GFP_KERNEL);
1072         if (!sglist)
1073                 return NULL;
1074         sg_init_table(sglist, page_cnt);
1075         for (i = 0; i < page_cnt; i++) {
1076                 pg = vmalloc_to_page(buf);
1077                 if (!pg)
1078                         goto err;
1079                 sg_set_page(&sglist[i], pg, PAGE_SIZE, 0);
1080                 buf += PAGE_SIZE / sizeof(*buf);
1081         }
1082         return sglist;
1083 
1084 err:
1085         kfree(sglist);
1086         return NULL;
1087 }
1088 
1089 /*
1090  * create a chunk list of physical pages dma addresses from the supplied
1091  * scatter gather list
1092  */
1093 static int pbl_chunk_list_create(struct efa_dev *dev, struct pbl_context *pbl)
1094 {
1095         struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list;
1096         int page_cnt = pbl->phys.indirect.pbl_buf_size_in_pages;
1097         struct scatterlist *pages_sgl = pbl->phys.indirect.sgl;
1098         unsigned int chunk_list_size, chunk_idx, payload_idx;
1099         int sg_dma_cnt = pbl->phys.indirect.sg_dma_cnt;
1100         struct efa_com_ctrl_buff_info *ctrl_buf;
1101         u64 *cur_chunk_buf, *prev_chunk_buf;
1102         struct ib_block_iter biter;
1103         dma_addr_t dma_addr;
1104         int i;
1105 
1106         /* allocate a chunk list that consists of 4KB chunks */
1107         chunk_list_size = DIV_ROUND_UP(page_cnt, EFA_PTRS_PER_CHUNK);
1108 
1109         chunk_list->size = chunk_list_size;
1110         chunk_list->chunks = kcalloc(chunk_list_size,
1111                                      sizeof(*chunk_list->chunks),
1112                                      GFP_KERNEL);
1113         if (!chunk_list->chunks)
1114                 return -ENOMEM;
1115 
1116         ibdev_dbg(&dev->ibdev,
1117                   "chunk_list_size[%u] - pages[%u]\n", chunk_list_size,
1118                   page_cnt);
1119 
1120         /* allocate chunk buffers: */
1121         for (i = 0; i < chunk_list_size; i++) {
1122                 chunk_list->chunks[i].buf = kzalloc(EFA_CHUNK_SIZE, GFP_KERNEL);
1123                 if (!chunk_list->chunks[i].buf)
1124                         goto chunk_list_dealloc;
1125 
1126                 chunk_list->chunks[i].length = EFA_CHUNK_USED_SIZE;
1127         }
1128         chunk_list->chunks[chunk_list_size - 1].length =
1129                 ((page_cnt % EFA_PTRS_PER_CHUNK) * EFA_CHUNK_PAYLOAD_PTR_SIZE) +
1130                         EFA_CHUNK_PTR_SIZE;
1131 
1132         /* fill the dma addresses of sg list pages to chunks: */
1133         chunk_idx = 0;
1134         payload_idx = 0;
1135         cur_chunk_buf = chunk_list->chunks[0].buf;
1136         rdma_for_each_block(pages_sgl, &biter, sg_dma_cnt,
1137                             EFA_CHUNK_PAYLOAD_SIZE) {
1138                 cur_chunk_buf[payload_idx++] =
1139                         rdma_block_iter_dma_address(&biter);
1140 
1141                 if (payload_idx == EFA_PTRS_PER_CHUNK) {
1142                         chunk_idx++;
1143                         cur_chunk_buf = chunk_list->chunks[chunk_idx].buf;
1144                         payload_idx = 0;
1145                 }
1146         }
1147 
1148         /* map chunks to dma and fill chunks next ptrs */
1149         for (i = chunk_list_size - 1; i >= 0; i--) {
1150                 dma_addr = dma_map_single(&dev->pdev->dev,
1151                                           chunk_list->chunks[i].buf,
1152                                           chunk_list->chunks[i].length,
1153                                           DMA_TO_DEVICE);
1154                 if (dma_mapping_error(&dev->pdev->dev, dma_addr)) {
1155                         ibdev_err(&dev->ibdev,
1156                                   "chunk[%u] dma_map_failed\n", i);
1157                         goto chunk_list_unmap;
1158                 }
1159 
1160                 chunk_list->chunks[i].dma_addr = dma_addr;
1161                 ibdev_dbg(&dev->ibdev,
1162                           "chunk[%u] mapped at [%pad]\n", i, &dma_addr);
1163 
1164                 if (!i)
1165                         break;
1166 
1167                 prev_chunk_buf = chunk_list->chunks[i - 1].buf;
1168 
1169                 ctrl_buf = (struct efa_com_ctrl_buff_info *)
1170                                 &prev_chunk_buf[EFA_PTRS_PER_CHUNK];
1171                 ctrl_buf->length = chunk_list->chunks[i].length;
1172 
1173                 efa_com_set_dma_addr(dma_addr,
1174                                      &ctrl_buf->address.mem_addr_high,
1175                                      &ctrl_buf->address.mem_addr_low);
1176         }
1177 
1178         return 0;
1179 
1180 chunk_list_unmap:
1181         for (; i < chunk_list_size; i++) {
1182                 dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr,
1183                                  chunk_list->chunks[i].length, DMA_TO_DEVICE);
1184         }
1185 chunk_list_dealloc:
1186         for (i = 0; i < chunk_list_size; i++)
1187                 kfree(chunk_list->chunks[i].buf);
1188 
1189         kfree(chunk_list->chunks);
1190         return -ENOMEM;
1191 }
1192 
1193 static void pbl_chunk_list_destroy(struct efa_dev *dev, struct pbl_context *pbl)
1194 {
1195         struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list;
1196         int i;
1197 
1198         for (i = 0; i < chunk_list->size; i++) {
1199                 dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr,
1200                                  chunk_list->chunks[i].length, DMA_TO_DEVICE);
1201                 kfree(chunk_list->chunks[i].buf);
1202         }
1203 
1204         kfree(chunk_list->chunks);
1205 }
1206 
1207 /* initialize pbl continuous mode: map pbl buffer to a dma address. */
1208 static int pbl_continuous_initialize(struct efa_dev *dev,
1209                                      struct pbl_context *pbl)
1210 {
1211         dma_addr_t dma_addr;
1212 
1213         dma_addr = dma_map_single(&dev->pdev->dev, pbl->pbl_buf,
1214                                   pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE);
1215         if (dma_mapping_error(&dev->pdev->dev, dma_addr)) {
1216                 ibdev_err(&dev->ibdev, "Unable to map pbl to DMA address\n");
1217                 return -ENOMEM;
1218         }
1219 
1220         pbl->phys.continuous.dma_addr = dma_addr;
1221         ibdev_dbg(&dev->ibdev,
1222                   "pbl continuous - dma_addr = %pad, size[%u]\n",
1223                   &dma_addr, pbl->pbl_buf_size_in_bytes);
1224 
1225         return 0;
1226 }
1227 
1228 /*
1229  * initialize pbl indirect mode:
1230  * create a chunk list out of the dma addresses of the physical pages of
1231  * pbl buffer.
1232  */
1233 static int pbl_indirect_initialize(struct efa_dev *dev, struct pbl_context *pbl)
1234 {
1235         u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, PAGE_SIZE);
1236         struct scatterlist *sgl;
1237         int sg_dma_cnt, err;
1238 
1239         BUILD_BUG_ON(EFA_CHUNK_PAYLOAD_SIZE > PAGE_SIZE);
1240         sgl = efa_vmalloc_buf_to_sg(pbl->pbl_buf, size_in_pages);
1241         if (!sgl)
1242                 return -ENOMEM;
1243 
1244         sg_dma_cnt = dma_map_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE);
1245         if (!sg_dma_cnt) {
1246                 err = -EINVAL;
1247                 goto err_map;
1248         }
1249 
1250         pbl->phys.indirect.pbl_buf_size_in_pages = size_in_pages;
1251         pbl->phys.indirect.sgl = sgl;
1252         pbl->phys.indirect.sg_dma_cnt = sg_dma_cnt;
1253         err = pbl_chunk_list_create(dev, pbl);
1254         if (err) {
1255                 ibdev_dbg(&dev->ibdev,
1256                           "chunk_list creation failed[%d]\n", err);
1257                 goto err_chunk;
1258         }
1259 
1260         ibdev_dbg(&dev->ibdev,
1261                   "pbl indirect - size[%u], chunks[%u]\n",
1262                   pbl->pbl_buf_size_in_bytes,
1263                   pbl->phys.indirect.chunk_list.size);
1264 
1265         return 0;
1266 
1267 err_chunk:
1268         dma_unmap_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE);
1269 err_map:
1270         kfree(sgl);
1271         return err;
1272 }
1273 
1274 static void pbl_indirect_terminate(struct efa_dev *dev, struct pbl_context *pbl)
1275 {
1276         pbl_chunk_list_destroy(dev, pbl);
1277         dma_unmap_sg(&dev->pdev->dev, pbl->phys.indirect.sgl,
1278                      pbl->phys.indirect.pbl_buf_size_in_pages, DMA_TO_DEVICE);
1279         kfree(pbl->phys.indirect.sgl);
1280 }
1281 
1282 /* create a page buffer list from a mapped user memory region */
1283 static int pbl_create(struct efa_dev *dev,
1284                       struct pbl_context *pbl,
1285                       struct ib_umem *umem,
1286                       int hp_cnt,
1287                       u8 hp_shift)
1288 {
1289         int err;
1290 
1291         pbl->pbl_buf_size_in_bytes = hp_cnt * EFA_CHUNK_PAYLOAD_PTR_SIZE;
1292         pbl->pbl_buf = kvzalloc(pbl->pbl_buf_size_in_bytes, GFP_KERNEL);
1293         if (!pbl->pbl_buf)
1294                 return -ENOMEM;
1295 
1296         if (is_vmalloc_addr(pbl->pbl_buf)) {
1297                 pbl->physically_continuous = 0;
1298                 err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt,
1299                                         hp_shift);
1300                 if (err)
1301                         goto err_free;
1302 
1303                 err = pbl_indirect_initialize(dev, pbl);
1304                 if (err)
1305                         goto err_free;
1306         } else {
1307                 pbl->physically_continuous = 1;
1308                 err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt,
1309                                         hp_shift);
1310                 if (err)
1311                         goto err_free;
1312 
1313                 err = pbl_continuous_initialize(dev, pbl);
1314                 if (err)
1315                         goto err_free;
1316         }
1317 
1318         ibdev_dbg(&dev->ibdev,
1319                   "user_pbl_created: user_pages[%u], continuous[%u]\n",
1320                   hp_cnt, pbl->physically_continuous);
1321 
1322         return 0;
1323 
1324 err_free:
1325         kvfree(pbl->pbl_buf);
1326         return err;
1327 }
1328 
1329 static void pbl_destroy(struct efa_dev *dev, struct pbl_context *pbl)
1330 {
1331         if (pbl->physically_continuous)
1332                 dma_unmap_single(&dev->pdev->dev, pbl->phys.continuous.dma_addr,
1333                                  pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE);
1334         else
1335                 pbl_indirect_terminate(dev, pbl);
1336 
1337         kvfree(pbl->pbl_buf);
1338 }
1339 
1340 static int efa_create_inline_pbl(struct efa_dev *dev, struct efa_mr *mr,
1341                                  struct efa_com_reg_mr_params *params)
1342 {
1343         int err;
1344 
1345         params->inline_pbl = 1;
1346         err = umem_to_page_list(dev, mr->umem, params->pbl.inline_pbl_array,
1347                                 params->page_num, params->page_shift);
1348         if (err)
1349                 return err;
1350 
1351         ibdev_dbg(&dev->ibdev,
1352                   "inline_pbl_array - pages[%u]\n", params->page_num);
1353 
1354         return 0;
1355 }
1356 
1357 static int efa_create_pbl(struct efa_dev *dev,
1358                           struct pbl_context *pbl,
1359                           struct efa_mr *mr,
1360                           struct efa_com_reg_mr_params *params)
1361 {
1362         int err;
1363 
1364         err = pbl_create(dev, pbl, mr->umem, params->page_num,
1365                          params->page_shift);
1366         if (err) {
1367                 ibdev_dbg(&dev->ibdev, "Failed to create pbl[%d]\n", err);
1368                 return err;
1369         }
1370 
1371         params->inline_pbl = 0;
1372         params->indirect = !pbl->physically_continuous;
1373         if (pbl->physically_continuous) {
1374                 params->pbl.pbl.length = pbl->pbl_buf_size_in_bytes;
1375 
1376                 efa_com_set_dma_addr(pbl->phys.continuous.dma_addr,
1377                                      &params->pbl.pbl.address.mem_addr_high,
1378                                      &params->pbl.pbl.address.mem_addr_low);
1379         } else {
1380                 params->pbl.pbl.length =
1381                         pbl->phys.indirect.chunk_list.chunks[0].length;
1382 
1383                 efa_com_set_dma_addr(pbl->phys.indirect.chunk_list.chunks[0].dma_addr,
1384                                      &params->pbl.pbl.address.mem_addr_high,
1385                                      &params->pbl.pbl.address.mem_addr_low);
1386         }
1387 
1388         return 0;
1389 }
1390 
1391 struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
1392                          u64 virt_addr, int access_flags,
1393                          struct ib_udata *udata)
1394 {
1395         struct efa_dev *dev = to_edev(ibpd->device);
1396         struct efa_com_reg_mr_params params = {};
1397         struct efa_com_reg_mr_result result = {};
1398         struct pbl_context pbl;
1399         unsigned int pg_sz;
1400         struct efa_mr *mr;
1401         int inline_size;
1402         int err;
1403 
1404         if (udata->inlen &&
1405             !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) {
1406                 ibdev_dbg(&dev->ibdev,
1407                           "Incompatible ABI params, udata not cleared\n");
1408                 err = -EINVAL;
1409                 goto err_out;
1410         }
1411 
1412         if (access_flags & ~EFA_SUPPORTED_ACCESS_FLAGS) {
1413                 ibdev_dbg(&dev->ibdev,
1414                           "Unsupported access flags[%#x], supported[%#x]\n",
1415                           access_flags, EFA_SUPPORTED_ACCESS_FLAGS);
1416                 err = -EOPNOTSUPP;
1417                 goto err_out;
1418         }
1419 
1420         mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1421         if (!mr) {
1422                 err = -ENOMEM;
1423                 goto err_out;
1424         }
1425 
1426         mr->umem = ib_umem_get(udata, start, length, access_flags, 0);
1427         if (IS_ERR(mr->umem)) {
1428                 err = PTR_ERR(mr->umem);
1429                 ibdev_dbg(&dev->ibdev,
1430                           "Failed to pin and map user space memory[%d]\n", err);
1431                 goto err_free;
1432         }
1433 
1434         params.pd = to_epd(ibpd)->pdn;
1435         params.iova = virt_addr;
1436         params.mr_length_in_bytes = length;
1437         params.permissions = access_flags & 0x1;
1438 
1439         pg_sz = ib_umem_find_best_pgsz(mr->umem,
1440                                        dev->dev_attr.page_size_cap,
1441                                        virt_addr);
1442         if (!pg_sz) {
1443                 err = -EOPNOTSUPP;
1444                 ibdev_dbg(&dev->ibdev, "Failed to find a suitable page size in page_size_cap %#llx\n",
1445                           dev->dev_attr.page_size_cap);
1446                 goto err_unmap;
1447         }
1448 
1449         params.page_shift = __ffs(pg_sz);
1450         params.page_num = DIV_ROUND_UP(length + (start & (pg_sz - 1)),
1451                                        pg_sz);
1452 
1453         ibdev_dbg(&dev->ibdev,
1454                   "start %#llx length %#llx params.page_shift %u params.page_num %u\n",
1455                   start, length, params.page_shift, params.page_num);
1456 
1457         inline_size = ARRAY_SIZE(params.pbl.inline_pbl_array);
1458         if (params.page_num <= inline_size) {
1459                 err = efa_create_inline_pbl(dev, mr, &params);
1460                 if (err)
1461                         goto err_unmap;
1462 
1463                 err = efa_com_register_mr(&dev->edev, &params, &result);
1464                 if (err)
1465                         goto err_unmap;
1466         } else {
1467                 err = efa_create_pbl(dev, &pbl, mr, &params);
1468                 if (err)
1469                         goto err_unmap;
1470 
1471                 err = efa_com_register_mr(&dev->edev, &params, &result);
1472                 pbl_destroy(dev, &pbl);
1473 
1474                 if (err)
1475                         goto err_unmap;
1476         }
1477 
1478         mr->ibmr.lkey = result.l_key;
1479         mr->ibmr.rkey = result.r_key;
1480         mr->ibmr.length = length;
1481         ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey);
1482 
1483         return &mr->ibmr;
1484 
1485 err_unmap:
1486         ib_umem_release(mr->umem);
1487 err_free:
1488         kfree(mr);
1489 err_out:
1490         atomic64_inc(&dev->stats.sw_stats.reg_mr_err);
1491         return ERR_PTR(err);
1492 }
1493 
1494 int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
1495 {
1496         struct efa_dev *dev = to_edev(ibmr->device);
1497         struct efa_com_dereg_mr_params params;
1498         struct efa_mr *mr = to_emr(ibmr);
1499         int err;
1500 
1501         ibdev_dbg(&dev->ibdev, "Deregister mr[%d]\n", ibmr->lkey);
1502 
1503         params.l_key = mr->ibmr.lkey;
1504         err = efa_com_dereg_mr(&dev->edev, &params);
1505         if (err)
1506                 return err;
1507 
1508         ib_umem_release(mr->umem);
1509         kfree(mr);
1510 
1511         return 0;
1512 }
1513 
1514 int efa_get_port_immutable(struct ib_device *ibdev, u8 port_num,
1515                            struct ib_port_immutable *immutable)
1516 {
1517         struct ib_port_attr attr;
1518         int err;
1519 
1520         err = ib_query_port(ibdev, port_num, &attr);
1521         if (err) {
1522                 ibdev_dbg(ibdev, "Couldn't query port err[%d]\n", err);
1523                 return err;
1524         }
1525 
1526         immutable->pkey_tbl_len = attr.pkey_tbl_len;
1527         immutable->gid_tbl_len = attr.gid_tbl_len;
1528 
1529         return 0;
1530 }
1531 
1532 static int efa_dealloc_uar(struct efa_dev *dev, u16 uarn)
1533 {
1534         struct efa_com_dealloc_uar_params params = {
1535                 .uarn = uarn,
1536         };
1537 
1538         return efa_com_dealloc_uar(&dev->edev, &params);
1539 }
1540 
1541 int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
1542 {
1543         struct efa_ucontext *ucontext = to_eucontext(ibucontext);
1544         struct efa_dev *dev = to_edev(ibucontext->device);
1545         struct efa_ibv_alloc_ucontext_resp resp = {};
1546         struct efa_com_alloc_uar_result result;
1547         int err;
1548 
1549         /*
1550          * it's fine if the driver does not know all request fields,
1551          * we will ack input fields in our response.
1552          */
1553 
1554         err = efa_com_alloc_uar(&dev->edev, &result);
1555         if (err)
1556                 goto err_out;
1557 
1558         ucontext->uarn = result.uarn;
1559         xa_init(&ucontext->mmap_xa);
1560 
1561         resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE;
1562         resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH;
1563         resp.sub_cqs_per_cq = dev->dev_attr.sub_cqs_per_cq;
1564         resp.inline_buf_size = dev->dev_attr.inline_buf_size;
1565         resp.max_llq_size = dev->dev_attr.max_llq_size;
1566 
1567         if (udata && udata->outlen) {
1568                 err = ib_copy_to_udata(udata, &resp,
1569                                        min(sizeof(resp), udata->outlen));
1570                 if (err)
1571                         goto err_dealloc_uar;
1572         }
1573 
1574         return 0;
1575 
1576 err_dealloc_uar:
1577         efa_dealloc_uar(dev, result.uarn);
1578 err_out:
1579         atomic64_inc(&dev->stats.sw_stats.alloc_ucontext_err);
1580         return err;
1581 }
1582 
1583 void efa_dealloc_ucontext(struct ib_ucontext *ibucontext)
1584 {
1585         struct efa_ucontext *ucontext = to_eucontext(ibucontext);
1586         struct efa_dev *dev = to_edev(ibucontext->device);
1587 
1588         mmap_entries_remove_free(dev, ucontext);
1589         efa_dealloc_uar(dev, ucontext->uarn);
1590 }
1591 
1592 static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
1593                       struct vm_area_struct *vma, u64 key, u64 length)
1594 {
1595         struct efa_mmap_entry *entry;
1596         unsigned long va;
1597         u64 pfn;
1598         int err;
1599 
1600         entry = mmap_entry_get(dev, ucontext, key, length);
1601         if (!entry) {
1602                 ibdev_dbg(&dev->ibdev, "key[%#llx] does not have valid entry\n",
1603                           key);
1604                 return -EINVAL;
1605         }
1606 
1607         ibdev_dbg(&dev->ibdev,
1608                   "Mapping address[%#llx], length[%#llx], mmap_flag[%d]\n",
1609                   entry->address, length, entry->mmap_flag);
1610 
1611         pfn = entry->address >> PAGE_SHIFT;
1612         switch (entry->mmap_flag) {
1613         case EFA_MMAP_IO_NC:
1614                 err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length,
1615                                         pgprot_noncached(vma->vm_page_prot));
1616                 break;
1617         case EFA_MMAP_IO_WC:
1618                 err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length,
1619                                         pgprot_writecombine(vma->vm_page_prot));
1620                 break;
1621         case EFA_MMAP_DMA_PAGE:
1622                 for (va = vma->vm_start; va < vma->vm_end;
1623                      va += PAGE_SIZE, pfn++) {
1624                         err = vm_insert_page(vma, va, pfn_to_page(pfn));
1625                         if (err)
1626                                 break;
1627                 }
1628                 break;
1629         default:
1630                 err = -EINVAL;
1631         }
1632 
1633         if (err) {
1634                 ibdev_dbg(
1635                         &dev->ibdev,
1636                         "Couldn't mmap address[%#llx] length[%#llx] mmap_flag[%d] err[%d]\n",
1637                         entry->address, length, entry->mmap_flag, err);
1638                 return err;
1639         }
1640 
1641         return 0;
1642 }
1643 
1644 int efa_mmap(struct ib_ucontext *ibucontext,
1645              struct vm_area_struct *vma)
1646 {
1647         struct efa_ucontext *ucontext = to_eucontext(ibucontext);
1648         struct efa_dev *dev = to_edev(ibucontext->device);
1649         u64 length = vma->vm_end - vma->vm_start;
1650         u64 key = vma->vm_pgoff << PAGE_SHIFT;
1651 
1652         ibdev_dbg(&dev->ibdev,
1653                   "start %#lx, end %#lx, length = %#llx, key = %#llx\n",
1654                   vma->vm_start, vma->vm_end, length, key);
1655 
1656         if (length % PAGE_SIZE != 0 || !(vma->vm_flags & VM_SHARED)) {
1657                 ibdev_dbg(&dev->ibdev,
1658                           "length[%#llx] is not page size aligned[%#lx] or VM_SHARED is not set [%#lx]\n",
1659                           length, PAGE_SIZE, vma->vm_flags);
1660                 return -EINVAL;
1661         }
1662 
1663         if (vma->vm_flags & VM_EXEC) {
1664                 ibdev_dbg(&dev->ibdev, "Mapping executable pages is not permitted\n");
1665                 return -EPERM;
1666         }
1667 
1668         return __efa_mmap(dev, ucontext, vma, key, length);
1669 }
1670 
1671 static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah)
1672 {
1673         struct efa_com_destroy_ah_params params = {
1674                 .ah = ah->ah,
1675                 .pdn = to_epd(ah->ibah.pd)->pdn,
1676         };
1677 
1678         return efa_com_destroy_ah(&dev->edev, &params);
1679 }
1680 
1681 int efa_create_ah(struct ib_ah *ibah,
1682                   struct rdma_ah_attr *ah_attr,
1683                   u32 flags,
1684                   struct ib_udata *udata)
1685 {
1686         struct efa_dev *dev = to_edev(ibah->device);
1687         struct efa_com_create_ah_params params = {};
1688         struct efa_ibv_create_ah_resp resp = {};
1689         struct efa_com_create_ah_result result;
1690         struct efa_ah *ah = to_eah(ibah);
1691         int err;
1692 
1693         if (!(flags & RDMA_CREATE_AH_SLEEPABLE)) {
1694                 ibdev_dbg(&dev->ibdev,
1695                           "Create address handle is not supported in atomic context\n");
1696                 err = -EOPNOTSUPP;
1697                 goto err_out;
1698         }
1699 
1700         if (udata->inlen &&
1701             !ib_is_udata_cleared(udata, 0, udata->inlen)) {
1702                 ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
1703                 err = -EINVAL;
1704                 goto err_out;
1705         }
1706 
1707         memcpy(params.dest_addr, ah_attr->grh.dgid.raw,
1708                sizeof(params.dest_addr));
1709         params.pdn = to_epd(ibah->pd)->pdn;
1710         err = efa_com_create_ah(&dev->edev, &params, &result);
1711         if (err)
1712                 goto err_out;
1713 
1714         memcpy(ah->id, ah_attr->grh.dgid.raw, sizeof(ah->id));
1715         ah->ah = result.ah;
1716 
1717         resp.efa_address_handle = result.ah;
1718 
1719         if (udata->outlen) {
1720                 err = ib_copy_to_udata(udata, &resp,
1721                                        min(sizeof(resp), udata->outlen));
1722                 if (err) {
1723                         ibdev_dbg(&dev->ibdev,
1724                                   "Failed to copy udata for create_ah response\n");
1725                         goto err_destroy_ah;
1726                 }
1727         }
1728         ibdev_dbg(&dev->ibdev, "Created ah[%d]\n", ah->ah);
1729 
1730         return 0;
1731 
1732 err_destroy_ah:
1733         efa_ah_destroy(dev, ah);
1734 err_out:
1735         atomic64_inc(&dev->stats.sw_stats.create_ah_err);
1736         return err;
1737 }
1738 
1739 void efa_destroy_ah(struct ib_ah *ibah, u32 flags)
1740 {
1741         struct efa_dev *dev = to_edev(ibah->pd->device);
1742         struct efa_ah *ah = to_eah(ibah);
1743 
1744         ibdev_dbg(&dev->ibdev, "Destroy ah[%d]\n", ah->ah);
1745 
1746         if (!(flags & RDMA_DESTROY_AH_SLEEPABLE)) {
1747                 ibdev_dbg(&dev->ibdev,
1748                           "Destroy address handle is not supported in atomic context\n");
1749                 return;
1750         }
1751 
1752         efa_ah_destroy(dev, ah);
1753 }
1754 
1755 struct rdma_hw_stats *efa_alloc_hw_stats(struct ib_device *ibdev, u8 port_num)
1756 {
1757         return rdma_alloc_hw_stats_struct(efa_stats_names,
1758                                           ARRAY_SIZE(efa_stats_names),
1759                                           RDMA_HW_STATS_DEFAULT_LIFESPAN);
1760 }
1761 
1762 int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
1763                      u8 port_num, int index)
1764 {
1765         struct efa_com_get_stats_params params = {};
1766         union efa_com_get_stats_result result;
1767         struct efa_dev *dev = to_edev(ibdev);
1768         struct efa_com_basic_stats *bs;
1769         struct efa_com_stats_admin *as;
1770         struct efa_stats *s;
1771         int err;
1772 
1773         params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC;
1774         params.scope = EFA_ADMIN_GET_STATS_SCOPE_ALL;
1775 
1776         err = efa_com_get_stats(&dev->edev, &params, &result);
1777         if (err)
1778                 return err;
1779 
1780         bs = &result.basic_stats;
1781         stats->value[EFA_TX_BYTES] = bs->tx_bytes;
1782         stats->value[EFA_TX_PKTS] = bs->tx_pkts;
1783         stats->value[EFA_RX_BYTES] = bs->rx_bytes;
1784         stats->value[EFA_RX_PKTS] = bs->rx_pkts;
1785         stats->value[EFA_RX_DROPS] = bs->rx_drops;
1786 
1787         as = &dev->edev.aq.stats;
1788         stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd);
1789         stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd);
1790         stats->value[EFA_NO_COMPLETION_CMDS] = atomic64_read(&as->no_completion);
1791 
1792         s = &dev->stats;
1793         stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd);
1794         stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->sw_stats.alloc_pd_err);
1795         stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->sw_stats.create_qp_err);
1796         stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->sw_stats.reg_mr_err);
1797         stats->value[EFA_ALLOC_UCONTEXT_ERR] = atomic64_read(&s->sw_stats.alloc_ucontext_err);
1798         stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->sw_stats.create_ah_err);
1799 
1800         return ARRAY_SIZE(efa_stats_names);
1801 }
1802 
1803 enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
1804                                          u8 port_num)
1805 {
1806         return IB_LINK_LAYER_UNSPECIFIED;
1807 }
1808 

/* [<][>][^][v][top][bottom][index][help] */