root/drivers/infiniband/sw/siw/siw_mem.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. siw_mem_add
  2. siw_mem_id2obj
  3. siw_free_plist
  4. siw_umem_release
  5. siw_mr_add_mem
  6. siw_mr_drop_mem
  7. siw_free_mem
  8. siw_check_mem
  9. siw_check_sge
  10. siw_wqe_put_mem
  11. siw_invalidate_stag
  12. siw_pbl_get_buffer
  13. siw_pbl_alloc
  14. siw_umem_get

   1 // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
   2 
   3 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
   4 /* Copyright (c) 2008-2019, IBM Corporation */
   5 
   6 #include <linux/gfp.h>
   7 #include <rdma/ib_verbs.h>
   8 #include <linux/dma-mapping.h>
   9 #include <linux/slab.h>
  10 #include <linux/sched/mm.h>
  11 #include <linux/resource.h>
  12 
  13 #include "siw.h"
  14 #include "siw_mem.h"
  15 
  16 /*
  17  * Stag lookup is based on its index part only (24 bits).
  18  * The code avoids special Stag of zero and tries to randomize
  19  * STag values between 1 and SIW_STAG_MAX_INDEX.
  20  */
  21 int siw_mem_add(struct siw_device *sdev, struct siw_mem *m)
  22 {
  23         struct xa_limit limit = XA_LIMIT(1, 0x00ffffff);
  24         u32 id, next;
  25 
  26         get_random_bytes(&next, 4);
  27         next &= 0x00ffffff;
  28 
  29         if (xa_alloc_cyclic(&sdev->mem_xa, &id, m, limit, &next,
  30             GFP_KERNEL) < 0)
  31                 return -ENOMEM;
  32 
  33         /* Set the STag index part */
  34         m->stag = id << 8;
  35 
  36         siw_dbg_mem(m, "new MEM object\n");
  37 
  38         return 0;
  39 }
  40 
  41 /*
  42  * siw_mem_id2obj()
  43  *
  44  * resolves memory from stag given by id. might be called from:
  45  * o process context before sending out of sgl, or
  46  * o in softirq when resolving target memory
  47  */
  48 struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
  49 {
  50         struct siw_mem *mem;
  51 
  52         rcu_read_lock();
  53         mem = xa_load(&sdev->mem_xa, stag_index);
  54         if (likely(mem && kref_get_unless_zero(&mem->ref))) {
  55                 rcu_read_unlock();
  56                 return mem;
  57         }
  58         rcu_read_unlock();
  59 
  60         return NULL;
  61 }
  62 
  63 static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages,
  64                            bool dirty)
  65 {
  66         put_user_pages_dirty_lock(chunk->plist, num_pages, dirty);
  67 }
  68 
  69 void siw_umem_release(struct siw_umem *umem, bool dirty)
  70 {
  71         struct mm_struct *mm_s = umem->owning_mm;
  72         int i, num_pages = umem->num_pages;
  73 
  74         for (i = 0; num_pages; i++) {
  75                 int to_free = min_t(int, PAGES_PER_CHUNK, num_pages);
  76 
  77                 siw_free_plist(&umem->page_chunk[i], to_free,
  78                                umem->writable && dirty);
  79                 kfree(umem->page_chunk[i].plist);
  80                 num_pages -= to_free;
  81         }
  82         atomic64_sub(umem->num_pages, &mm_s->pinned_vm);
  83 
  84         mmdrop(mm_s);
  85         kfree(umem->page_chunk);
  86         kfree(umem);
  87 }
  88 
  89 int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj,
  90                    u64 start, u64 len, int rights)
  91 {
  92         struct siw_device *sdev = to_siw_dev(pd->device);
  93         struct siw_mem *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
  94         struct xa_limit limit = XA_LIMIT(1, 0x00ffffff);
  95         u32 id, next;
  96 
  97         if (!mem)
  98                 return -ENOMEM;
  99 
 100         mem->mem_obj = mem_obj;
 101         mem->stag_valid = 0;
 102         mem->sdev = sdev;
 103         mem->va = start;
 104         mem->len = len;
 105         mem->pd = pd;
 106         mem->perms = rights & IWARP_ACCESS_MASK;
 107         kref_init(&mem->ref);
 108 
 109         mr->mem = mem;
 110 
 111         get_random_bytes(&next, 4);
 112         next &= 0x00ffffff;
 113 
 114         if (xa_alloc_cyclic(&sdev->mem_xa, &id, mem, limit, &next,
 115             GFP_KERNEL) < 0) {
 116                 kfree(mem);
 117                 return -ENOMEM;
 118         }
 119         /* Set the STag index part */
 120         mem->stag = id << 8;
 121         mr->base_mr.lkey = mr->base_mr.rkey = mem->stag;
 122 
 123         return 0;
 124 }
 125 
 126 void siw_mr_drop_mem(struct siw_mr *mr)
 127 {
 128         struct siw_mem *mem = mr->mem, *found;
 129 
 130         mem->stag_valid = 0;
 131 
 132         /* make STag invalid visible asap */
 133         smp_mb();
 134 
 135         found = xa_erase(&mem->sdev->mem_xa, mem->stag >> 8);
 136         WARN_ON(found != mem);
 137         siw_mem_put(mem);
 138 }
 139 
 140 void siw_free_mem(struct kref *ref)
 141 {
 142         struct siw_mem *mem = container_of(ref, struct siw_mem, ref);
 143 
 144         siw_dbg_mem(mem, "free mem, pbl: %s\n", mem->is_pbl ? "y" : "n");
 145 
 146         if (!mem->is_mw && mem->mem_obj) {
 147                 if (mem->is_pbl == 0)
 148                         siw_umem_release(mem->umem, true);
 149                 else
 150                         kfree(mem->pbl);
 151         }
 152         kfree(mem);
 153 }
 154 
 155 /*
 156  * siw_check_mem()
 157  *
 158  * Check protection domain, STAG state, access permissions and
 159  * address range for memory object.
 160  *
 161  * @pd:         Protection Domain memory should belong to
 162  * @mem:        memory to be checked
 163  * @addr:       starting addr of mem
 164  * @perms:      requested access permissions
 165  * @len:        len of memory interval to be checked
 166  *
 167  */
 168 int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr,
 169                   enum ib_access_flags perms, int len)
 170 {
 171         if (!mem->stag_valid) {
 172                 siw_dbg_pd(pd, "STag 0x%08x invalid\n", mem->stag);
 173                 return -E_STAG_INVALID;
 174         }
 175         if (mem->pd != pd) {
 176                 siw_dbg_pd(pd, "STag 0x%08x: PD mismatch\n", mem->stag);
 177                 return -E_PD_MISMATCH;
 178         }
 179         /*
 180          * check access permissions
 181          */
 182         if ((mem->perms & perms) < perms) {
 183                 siw_dbg_pd(pd, "permissions 0x%08x < 0x%08x\n",
 184                            mem->perms, perms);
 185                 return -E_ACCESS_PERM;
 186         }
 187         /*
 188          * Check if access falls into valid memory interval.
 189          */
 190         if (addr < mem->va || addr + len > mem->va + mem->len) {
 191                 siw_dbg_pd(pd, "MEM interval len %d\n", len);
 192                 siw_dbg_pd(pd, "[0x%pK, 0x%pK] out of bounds\n",
 193                            (void *)(uintptr_t)addr,
 194                            (void *)(uintptr_t)(addr + len));
 195                 siw_dbg_pd(pd, "[0x%pK, 0x%pK] STag=0x%08x\n",
 196                            (void *)(uintptr_t)mem->va,
 197                            (void *)(uintptr_t)(mem->va + mem->len),
 198                            mem->stag);
 199 
 200                 return -E_BASE_BOUNDS;
 201         }
 202         return E_ACCESS_OK;
 203 }
 204 
 205 /*
 206  * siw_check_sge()
 207  *
 208  * Check SGE for access rights in given interval
 209  *
 210  * @pd:         Protection Domain memory should belong to
 211  * @sge:        SGE to be checked
 212  * @mem:        location of memory reference within array
 213  * @perms:      requested access permissions
 214  * @off:        starting offset in SGE
 215  * @len:        len of memory interval to be checked
 216  *
 217  * NOTE: Function references SGE's memory object (mem->obj)
 218  * if not yet done. New reference is kept if check went ok and
 219  * released if check failed. If mem->obj is already valid, no new
 220  * lookup is being done and mem is not released it check fails.
 221  */
 222 int siw_check_sge(struct ib_pd *pd, struct siw_sge *sge, struct siw_mem *mem[],
 223                   enum ib_access_flags perms, u32 off, int len)
 224 {
 225         struct siw_device *sdev = to_siw_dev(pd->device);
 226         struct siw_mem *new = NULL;
 227         int rv = E_ACCESS_OK;
 228 
 229         if (len + off > sge->length) {
 230                 rv = -E_BASE_BOUNDS;
 231                 goto fail;
 232         }
 233         if (*mem == NULL) {
 234                 new = siw_mem_id2obj(sdev, sge->lkey >> 8);
 235                 if (unlikely(!new)) {
 236                         siw_dbg_pd(pd, "STag unknown: 0x%08x\n", sge->lkey);
 237                         rv = -E_STAG_INVALID;
 238                         goto fail;
 239                 }
 240                 *mem = new;
 241         }
 242         /* Check if user re-registered with different STag key */
 243         if (unlikely((*mem)->stag != sge->lkey)) {
 244                 siw_dbg_mem((*mem), "STag mismatch: 0x%08x\n", sge->lkey);
 245                 rv = -E_STAG_INVALID;
 246                 goto fail;
 247         }
 248         rv = siw_check_mem(pd, *mem, sge->laddr + off, perms, len);
 249         if (unlikely(rv))
 250                 goto fail;
 251 
 252         return 0;
 253 
 254 fail:
 255         if (new) {
 256                 *mem = NULL;
 257                 siw_mem_put(new);
 258         }
 259         return rv;
 260 }
 261 
 262 void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op)
 263 {
 264         switch (op) {
 265         case SIW_OP_SEND:
 266         case SIW_OP_WRITE:
 267         case SIW_OP_SEND_WITH_IMM:
 268         case SIW_OP_SEND_REMOTE_INV:
 269         case SIW_OP_READ:
 270         case SIW_OP_READ_LOCAL_INV:
 271                 if (!(wqe->sqe.flags & SIW_WQE_INLINE))
 272                         siw_unref_mem_sgl(wqe->mem, wqe->sqe.num_sge);
 273                 break;
 274 
 275         case SIW_OP_RECEIVE:
 276                 siw_unref_mem_sgl(wqe->mem, wqe->rqe.num_sge);
 277                 break;
 278 
 279         case SIW_OP_READ_RESPONSE:
 280                 siw_unref_mem_sgl(wqe->mem, 1);
 281                 break;
 282 
 283         default:
 284                 /*
 285                  * SIW_OP_INVAL_STAG and SIW_OP_REG_MR
 286                  * do not hold memory references
 287                  */
 288                 break;
 289         }
 290 }
 291 
 292 int siw_invalidate_stag(struct ib_pd *pd, u32 stag)
 293 {
 294         struct siw_device *sdev = to_siw_dev(pd->device);
 295         struct siw_mem *mem = siw_mem_id2obj(sdev, stag >> 8);
 296         int rv = 0;
 297 
 298         if (unlikely(!mem)) {
 299                 siw_dbg_pd(pd, "STag 0x%08x unknown\n", stag);
 300                 return -EINVAL;
 301         }
 302         if (unlikely(mem->pd != pd)) {
 303                 siw_dbg_pd(pd, "PD mismatch for STag 0x%08x\n", stag);
 304                 rv = -EACCES;
 305                 goto out;
 306         }
 307         /*
 308          * Per RDMA verbs definition, an STag may already be in invalid
 309          * state if invalidation is requested. So no state check here.
 310          */
 311         mem->stag_valid = 0;
 312 
 313         siw_dbg_pd(pd, "STag 0x%08x now invalid\n", stag);
 314 out:
 315         siw_mem_put(mem);
 316         return rv;
 317 }
 318 
 319 /*
 320  * Gets physical address backed by PBL element. Address is referenced
 321  * by linear byte offset into list of variably sized PB elements.
 322  * Optionally, provides remaining len within current element, and
 323  * current PBL index for later resume at same element.
 324  */
 325 dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx)
 326 {
 327         int i = idx ? *idx : 0;
 328 
 329         while (i < pbl->num_buf) {
 330                 struct siw_pble *pble = &pbl->pbe[i];
 331 
 332                 if (pble->pbl_off + pble->size > off) {
 333                         u64 pble_off = off - pble->pbl_off;
 334 
 335                         if (len)
 336                                 *len = pble->size - pble_off;
 337                         if (idx)
 338                                 *idx = i;
 339 
 340                         return pble->addr + pble_off;
 341                 }
 342                 i++;
 343         }
 344         if (len)
 345                 *len = 0;
 346         return 0;
 347 }
 348 
 349 struct siw_pbl *siw_pbl_alloc(u32 num_buf)
 350 {
 351         struct siw_pbl *pbl;
 352         int buf_size = sizeof(*pbl);
 353 
 354         if (num_buf == 0)
 355                 return ERR_PTR(-EINVAL);
 356 
 357         buf_size += ((num_buf - 1) * sizeof(struct siw_pble));
 358 
 359         pbl = kzalloc(buf_size, GFP_KERNEL);
 360         if (!pbl)
 361                 return ERR_PTR(-ENOMEM);
 362 
 363         pbl->max_buf = num_buf;
 364 
 365         return pbl;
 366 }
 367 
 368 struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
 369 {
 370         struct siw_umem *umem;
 371         struct mm_struct *mm_s;
 372         u64 first_page_va;
 373         unsigned long mlock_limit;
 374         unsigned int foll_flags = FOLL_WRITE;
 375         int num_pages, num_chunks, i, rv = 0;
 376 
 377         if (!can_do_mlock())
 378                 return ERR_PTR(-EPERM);
 379 
 380         if (!len)
 381                 return ERR_PTR(-EINVAL);
 382 
 383         first_page_va = start & PAGE_MASK;
 384         num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT;
 385         num_chunks = (num_pages >> CHUNK_SHIFT) + 1;
 386 
 387         umem = kzalloc(sizeof(*umem), GFP_KERNEL);
 388         if (!umem)
 389                 return ERR_PTR(-ENOMEM);
 390 
 391         mm_s = current->mm;
 392         umem->owning_mm = mm_s;
 393         umem->writable = writable;
 394 
 395         mmgrab(mm_s);
 396 
 397         if (!writable)
 398                 foll_flags |= FOLL_FORCE;
 399 
 400         down_read(&mm_s->mmap_sem);
 401 
 402         mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 403 
 404         if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) {
 405                 rv = -ENOMEM;
 406                 goto out_sem_up;
 407         }
 408         umem->fp_addr = first_page_va;
 409 
 410         umem->page_chunk =
 411                 kcalloc(num_chunks, sizeof(struct siw_page_chunk), GFP_KERNEL);
 412         if (!umem->page_chunk) {
 413                 rv = -ENOMEM;
 414                 goto out_sem_up;
 415         }
 416         for (i = 0; num_pages; i++) {
 417                 int got, nents = min_t(int, num_pages, PAGES_PER_CHUNK);
 418 
 419                 umem->page_chunk[i].plist =
 420                         kcalloc(nents, sizeof(struct page *), GFP_KERNEL);
 421                 if (!umem->page_chunk[i].plist) {
 422                         rv = -ENOMEM;
 423                         goto out_sem_up;
 424                 }
 425                 got = 0;
 426                 while (nents) {
 427                         struct page **plist = &umem->page_chunk[i].plist[got];
 428 
 429                         rv = get_user_pages(first_page_va, nents,
 430                                             foll_flags | FOLL_LONGTERM,
 431                                             plist, NULL);
 432                         if (rv < 0)
 433                                 goto out_sem_up;
 434 
 435                         umem->num_pages += rv;
 436                         atomic64_add(rv, &mm_s->pinned_vm);
 437                         first_page_va += rv * PAGE_SIZE;
 438                         nents -= rv;
 439                         got += rv;
 440                 }
 441                 num_pages -= got;
 442         }
 443 out_sem_up:
 444         up_read(&mm_s->mmap_sem);
 445 
 446         if (rv > 0)
 447                 return umem;
 448 
 449         siw_umem_release(umem, false);
 450 
 451         return ERR_PTR(rv);
 452 }

/* [<][>][^][v][top][bottom][index][help] */