root/arch/powerpc/kvm/book3s_hv_nested.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. kvmhv_save_hv_regs
  2. byteswap_pt_regs
  3. byteswap_hv_regs
  4. save_hv_return_state
  5. sanitise_hv_regs
  6. restore_hv_regs
  7. kvmhv_restore_hv_return_state
  8. kvmhv_nested_mmio_needed
  9. kvmhv_enter_nested_guest
  10. kvmhv_nested_init
  11. kvmhv_nested_exit
  12. kvmhv_flush_lpid
  13. kvmhv_set_ptbl_entry
  14. kvmhv_set_nested_ptbl
  15. kvmhv_vm_nested_init
  16. kvmhv_set_partition_table
  17. kvmhv_copy_tofrom_guest_nested
  18. kvmhv_update_ptbl_cache
  19. kvmhv_alloc_nested
  20. kvmhv_release_nested
  21. kvmhv_remove_nested
  22. kvmhv_release_all_nested
  23. kvmhv_flush_nested
  24. kvmhv_get_nested
  25. kvmhv_put_nested
  26. kvmhv_find_nested
  27. kvmhv_n_rmap_is_equal
  28. kvmhv_insert_nest_rmap
  29. kvmhv_update_nest_rmap_rc
  30. kvmhv_update_nest_rmap_rc_list
  31. kvmhv_remove_nest_rmap
  32. kvmhv_remove_nest_rmap_list
  33. kvmhv_remove_nest_rmap_range
  34. kvmhv_free_memslot_nest_rmap
  35. kvmhv_invalidate_shadow_pte
  36. get_ric
  37. get_prs
  38. get_r
  39. get_lpid
  40. get_is
  41. get_ap
  42. get_epn
  43. kvmhv_emulate_tlbie_tlb_addr
  44. kvmhv_emulate_tlbie_lpid
  45. kvmhv_emulate_tlbie_all_lpid
  46. kvmhv_emulate_priv_tlbie
  47. kvmhv_do_nested_tlbie
  48. kvmhv_translate_addr_nested
  49. kvmhv_handle_nested_set_rc
  50. kvmppc_radix_level_to_shift
  51. kvmppc_radix_shift_to_level
  52. __kvmhv_nested_page_fault
  53. kvmhv_nested_page_fault
  54. kvmhv_nested_next_lpid

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright IBM Corporation, 2018
   4  * Authors Suraj Jitindar Singh <sjitindarsingh@gmail.com>
   5  *         Paul Mackerras <paulus@ozlabs.org>
   6  *
   7  * Description: KVM functions specific to running nested KVM-HV guests
   8  * on Book3S processors (specifically POWER9 and later).
   9  */
  10 
  11 #include <linux/kernel.h>
  12 #include <linux/kvm_host.h>
  13 #include <linux/llist.h>
  14 
  15 #include <asm/kvm_ppc.h>
  16 #include <asm/kvm_book3s.h>
  17 #include <asm/mmu.h>
  18 #include <asm/pgtable.h>
  19 #include <asm/pgalloc.h>
  20 #include <asm/pte-walk.h>
  21 #include <asm/reg.h>
  22 
  23 static struct patb_entry *pseries_partition_tb;
  24 
  25 static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp);
  26 static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free);
  27 
  28 void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
  29 {
  30         struct kvmppc_vcore *vc = vcpu->arch.vcore;
  31 
  32         hr->pcr = vc->pcr | PCR_MASK;
  33         hr->dpdes = vc->dpdes;
  34         hr->hfscr = vcpu->arch.hfscr;
  35         hr->tb_offset = vc->tb_offset;
  36         hr->dawr0 = vcpu->arch.dawr;
  37         hr->dawrx0 = vcpu->arch.dawrx;
  38         hr->ciabr = vcpu->arch.ciabr;
  39         hr->purr = vcpu->arch.purr;
  40         hr->spurr = vcpu->arch.spurr;
  41         hr->ic = vcpu->arch.ic;
  42         hr->vtb = vc->vtb;
  43         hr->srr0 = vcpu->arch.shregs.srr0;
  44         hr->srr1 = vcpu->arch.shregs.srr1;
  45         hr->sprg[0] = vcpu->arch.shregs.sprg0;
  46         hr->sprg[1] = vcpu->arch.shregs.sprg1;
  47         hr->sprg[2] = vcpu->arch.shregs.sprg2;
  48         hr->sprg[3] = vcpu->arch.shregs.sprg3;
  49         hr->pidr = vcpu->arch.pid;
  50         hr->cfar = vcpu->arch.cfar;
  51         hr->ppr = vcpu->arch.ppr;
  52 }
  53 
  54 static void byteswap_pt_regs(struct pt_regs *regs)
  55 {
  56         unsigned long *addr = (unsigned long *) regs;
  57 
  58         for (; addr < ((unsigned long *) (regs + 1)); addr++)
  59                 *addr = swab64(*addr);
  60 }
  61 
  62 static void byteswap_hv_regs(struct hv_guest_state *hr)
  63 {
  64         hr->version = swab64(hr->version);
  65         hr->lpid = swab32(hr->lpid);
  66         hr->vcpu_token = swab32(hr->vcpu_token);
  67         hr->lpcr = swab64(hr->lpcr);
  68         hr->pcr = swab64(hr->pcr) | PCR_MASK;
  69         hr->amor = swab64(hr->amor);
  70         hr->dpdes = swab64(hr->dpdes);
  71         hr->hfscr = swab64(hr->hfscr);
  72         hr->tb_offset = swab64(hr->tb_offset);
  73         hr->dawr0 = swab64(hr->dawr0);
  74         hr->dawrx0 = swab64(hr->dawrx0);
  75         hr->ciabr = swab64(hr->ciabr);
  76         hr->hdec_expiry = swab64(hr->hdec_expiry);
  77         hr->purr = swab64(hr->purr);
  78         hr->spurr = swab64(hr->spurr);
  79         hr->ic = swab64(hr->ic);
  80         hr->vtb = swab64(hr->vtb);
  81         hr->hdar = swab64(hr->hdar);
  82         hr->hdsisr = swab64(hr->hdsisr);
  83         hr->heir = swab64(hr->heir);
  84         hr->asdr = swab64(hr->asdr);
  85         hr->srr0 = swab64(hr->srr0);
  86         hr->srr1 = swab64(hr->srr1);
  87         hr->sprg[0] = swab64(hr->sprg[0]);
  88         hr->sprg[1] = swab64(hr->sprg[1]);
  89         hr->sprg[2] = swab64(hr->sprg[2]);
  90         hr->sprg[3] = swab64(hr->sprg[3]);
  91         hr->pidr = swab64(hr->pidr);
  92         hr->cfar = swab64(hr->cfar);
  93         hr->ppr = swab64(hr->ppr);
  94 }
  95 
  96 static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
  97                                  struct hv_guest_state *hr)
  98 {
  99         struct kvmppc_vcore *vc = vcpu->arch.vcore;
 100 
 101         hr->dpdes = vc->dpdes;
 102         hr->hfscr = vcpu->arch.hfscr;
 103         hr->purr = vcpu->arch.purr;
 104         hr->spurr = vcpu->arch.spurr;
 105         hr->ic = vcpu->arch.ic;
 106         hr->vtb = vc->vtb;
 107         hr->srr0 = vcpu->arch.shregs.srr0;
 108         hr->srr1 = vcpu->arch.shregs.srr1;
 109         hr->sprg[0] = vcpu->arch.shregs.sprg0;
 110         hr->sprg[1] = vcpu->arch.shregs.sprg1;
 111         hr->sprg[2] = vcpu->arch.shregs.sprg2;
 112         hr->sprg[3] = vcpu->arch.shregs.sprg3;
 113         hr->pidr = vcpu->arch.pid;
 114         hr->cfar = vcpu->arch.cfar;
 115         hr->ppr = vcpu->arch.ppr;
 116         switch (trap) {
 117         case BOOK3S_INTERRUPT_H_DATA_STORAGE:
 118                 hr->hdar = vcpu->arch.fault_dar;
 119                 hr->hdsisr = vcpu->arch.fault_dsisr;
 120                 hr->asdr = vcpu->arch.fault_gpa;
 121                 break;
 122         case BOOK3S_INTERRUPT_H_INST_STORAGE:
 123                 hr->asdr = vcpu->arch.fault_gpa;
 124                 break;
 125         case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
 126                 hr->heir = vcpu->arch.emul_inst;
 127                 break;
 128         }
 129 }
 130 
 131 static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
 132 {
 133         /*
 134          * Don't let L1 enable features for L2 which we've disabled for L1,
 135          * but preserve the interrupt cause field.
 136          */
 137         hr->hfscr &= (HFSCR_INTR_CAUSE | vcpu->arch.hfscr);
 138 
 139         /* Don't let data address watchpoint match in hypervisor state */
 140         hr->dawrx0 &= ~DAWRX_HYP;
 141 
 142         /* Don't let completed instruction address breakpt match in HV state */
 143         if ((hr->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
 144                 hr->ciabr &= ~CIABR_PRIV;
 145 }
 146 
 147 static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
 148 {
 149         struct kvmppc_vcore *vc = vcpu->arch.vcore;
 150 
 151         vc->pcr = hr->pcr | PCR_MASK;
 152         vc->dpdes = hr->dpdes;
 153         vcpu->arch.hfscr = hr->hfscr;
 154         vcpu->arch.dawr = hr->dawr0;
 155         vcpu->arch.dawrx = hr->dawrx0;
 156         vcpu->arch.ciabr = hr->ciabr;
 157         vcpu->arch.purr = hr->purr;
 158         vcpu->arch.spurr = hr->spurr;
 159         vcpu->arch.ic = hr->ic;
 160         vc->vtb = hr->vtb;
 161         vcpu->arch.shregs.srr0 = hr->srr0;
 162         vcpu->arch.shregs.srr1 = hr->srr1;
 163         vcpu->arch.shregs.sprg0 = hr->sprg[0];
 164         vcpu->arch.shregs.sprg1 = hr->sprg[1];
 165         vcpu->arch.shregs.sprg2 = hr->sprg[2];
 166         vcpu->arch.shregs.sprg3 = hr->sprg[3];
 167         vcpu->arch.pid = hr->pidr;
 168         vcpu->arch.cfar = hr->cfar;
 169         vcpu->arch.ppr = hr->ppr;
 170 }
 171 
 172 void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
 173                                    struct hv_guest_state *hr)
 174 {
 175         struct kvmppc_vcore *vc = vcpu->arch.vcore;
 176 
 177         vc->dpdes = hr->dpdes;
 178         vcpu->arch.hfscr = hr->hfscr;
 179         vcpu->arch.purr = hr->purr;
 180         vcpu->arch.spurr = hr->spurr;
 181         vcpu->arch.ic = hr->ic;
 182         vc->vtb = hr->vtb;
 183         vcpu->arch.fault_dar = hr->hdar;
 184         vcpu->arch.fault_dsisr = hr->hdsisr;
 185         vcpu->arch.fault_gpa = hr->asdr;
 186         vcpu->arch.emul_inst = hr->heir;
 187         vcpu->arch.shregs.srr0 = hr->srr0;
 188         vcpu->arch.shregs.srr1 = hr->srr1;
 189         vcpu->arch.shregs.sprg0 = hr->sprg[0];
 190         vcpu->arch.shregs.sprg1 = hr->sprg[1];
 191         vcpu->arch.shregs.sprg2 = hr->sprg[2];
 192         vcpu->arch.shregs.sprg3 = hr->sprg[3];
 193         vcpu->arch.pid = hr->pidr;
 194         vcpu->arch.cfar = hr->cfar;
 195         vcpu->arch.ppr = hr->ppr;
 196 }
 197 
 198 static void kvmhv_nested_mmio_needed(struct kvm_vcpu *vcpu, u64 regs_ptr)
 199 {
 200         /* No need to reflect the page fault to L1, we've handled it */
 201         vcpu->arch.trap = 0;
 202 
 203         /*
 204          * Since the L2 gprs have already been written back into L1 memory when
 205          * we complete the mmio, store the L1 memory location of the L2 gpr
 206          * being loaded into by the mmio so that the loaded value can be
 207          * written there in kvmppc_complete_mmio_load()
 208          */
 209         if (((vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) == KVM_MMIO_REG_GPR)
 210             && (vcpu->mmio_is_write == 0)) {
 211                 vcpu->arch.nested_io_gpr = (gpa_t) regs_ptr +
 212                                            offsetof(struct pt_regs,
 213                                                     gpr[vcpu->arch.io_gpr]);
 214                 vcpu->arch.io_gpr = KVM_MMIO_REG_NESTED_GPR;
 215         }
 216 }
 217 
 218 long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 219 {
 220         long int err, r;
 221         struct kvm_nested_guest *l2;
 222         struct pt_regs l2_regs, saved_l1_regs;
 223         struct hv_guest_state l2_hv, saved_l1_hv;
 224         struct kvmppc_vcore *vc = vcpu->arch.vcore;
 225         u64 hv_ptr, regs_ptr;
 226         u64 hdec_exp;
 227         s64 delta_purr, delta_spurr, delta_ic, delta_vtb;
 228         u64 mask;
 229         unsigned long lpcr;
 230 
 231         if (vcpu->kvm->arch.l1_ptcr == 0)
 232                 return H_NOT_AVAILABLE;
 233 
 234         /* copy parameters in */
 235         hv_ptr = kvmppc_get_gpr(vcpu, 4);
 236         err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv,
 237                                   sizeof(struct hv_guest_state));
 238         if (err)
 239                 return H_PARAMETER;
 240         if (kvmppc_need_byteswap(vcpu))
 241                 byteswap_hv_regs(&l2_hv);
 242         if (l2_hv.version != HV_GUEST_STATE_VERSION)
 243                 return H_P2;
 244 
 245         regs_ptr = kvmppc_get_gpr(vcpu, 5);
 246         err = kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs,
 247                                   sizeof(struct pt_regs));
 248         if (err)
 249                 return H_PARAMETER;
 250         if (kvmppc_need_byteswap(vcpu))
 251                 byteswap_pt_regs(&l2_regs);
 252         if (l2_hv.vcpu_token >= NR_CPUS)
 253                 return H_PARAMETER;
 254 
 255         /* translate lpid */
 256         l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true);
 257         if (!l2)
 258                 return H_PARAMETER;
 259         if (!l2->l1_gr_to_hr) {
 260                 mutex_lock(&l2->tlb_lock);
 261                 kvmhv_update_ptbl_cache(l2);
 262                 mutex_unlock(&l2->tlb_lock);
 263         }
 264 
 265         /* save l1 values of things */
 266         vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
 267         saved_l1_regs = vcpu->arch.regs;
 268         kvmhv_save_hv_regs(vcpu, &saved_l1_hv);
 269 
 270         /* convert TB values/offsets to host (L0) values */
 271         hdec_exp = l2_hv.hdec_expiry - vc->tb_offset;
 272         vc->tb_offset += l2_hv.tb_offset;
 273 
 274         /* set L1 state to L2 state */
 275         vcpu->arch.nested = l2;
 276         vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
 277         vcpu->arch.regs = l2_regs;
 278         vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
 279         mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
 280                 LPCR_LPES | LPCR_MER;
 281         lpcr = (vc->lpcr & ~mask) | (l2_hv.lpcr & mask);
 282         sanitise_hv_regs(vcpu, &l2_hv);
 283         restore_hv_regs(vcpu, &l2_hv);
 284 
 285         vcpu->arch.ret = RESUME_GUEST;
 286         vcpu->arch.trap = 0;
 287         do {
 288                 if (mftb() >= hdec_exp) {
 289                         vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
 290                         r = RESUME_HOST;
 291                         break;
 292                 }
 293                 r = kvmhv_run_single_vcpu(vcpu->arch.kvm_run, vcpu, hdec_exp,
 294                                           lpcr);
 295         } while (is_kvmppc_resume_guest(r));
 296 
 297         /* save L2 state for return */
 298         l2_regs = vcpu->arch.regs;
 299         l2_regs.msr = vcpu->arch.shregs.msr;
 300         delta_purr = vcpu->arch.purr - l2_hv.purr;
 301         delta_spurr = vcpu->arch.spurr - l2_hv.spurr;
 302         delta_ic = vcpu->arch.ic - l2_hv.ic;
 303         delta_vtb = vc->vtb - l2_hv.vtb;
 304         save_hv_return_state(vcpu, vcpu->arch.trap, &l2_hv);
 305 
 306         /* restore L1 state */
 307         vcpu->arch.nested = NULL;
 308         vcpu->arch.regs = saved_l1_regs;
 309         vcpu->arch.shregs.msr = saved_l1_regs.msr & ~MSR_TS_MASK;
 310         /* set L1 MSR TS field according to L2 transaction state */
 311         if (l2_regs.msr & MSR_TS_MASK)
 312                 vcpu->arch.shregs.msr |= MSR_TS_S;
 313         vc->tb_offset = saved_l1_hv.tb_offset;
 314         restore_hv_regs(vcpu, &saved_l1_hv);
 315         vcpu->arch.purr += delta_purr;
 316         vcpu->arch.spurr += delta_spurr;
 317         vcpu->arch.ic += delta_ic;
 318         vc->vtb += delta_vtb;
 319 
 320         kvmhv_put_nested(l2);
 321 
 322         /* copy l2_hv_state and regs back to guest */
 323         if (kvmppc_need_byteswap(vcpu)) {
 324                 byteswap_hv_regs(&l2_hv);
 325                 byteswap_pt_regs(&l2_regs);
 326         }
 327         err = kvm_vcpu_write_guest(vcpu, hv_ptr, &l2_hv,
 328                                    sizeof(struct hv_guest_state));
 329         if (err)
 330                 return H_AUTHORITY;
 331         err = kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs,
 332                                    sizeof(struct pt_regs));
 333         if (err)
 334                 return H_AUTHORITY;
 335 
 336         if (r == -EINTR)
 337                 return H_INTERRUPT;
 338 
 339         if (vcpu->mmio_needed) {
 340                 kvmhv_nested_mmio_needed(vcpu, regs_ptr);
 341                 return H_TOO_HARD;
 342         }
 343 
 344         return vcpu->arch.trap;
 345 }
 346 
 347 long kvmhv_nested_init(void)
 348 {
 349         long int ptb_order;
 350         unsigned long ptcr;
 351         long rc;
 352 
 353         if (!kvmhv_on_pseries())
 354                 return 0;
 355         if (!radix_enabled())
 356                 return -ENODEV;
 357 
 358         /* find log base 2 of KVMPPC_NR_LPIDS, rounding up */
 359         ptb_order = __ilog2(KVMPPC_NR_LPIDS - 1) + 1;
 360         if (ptb_order < 8)
 361                 ptb_order = 8;
 362         pseries_partition_tb = kmalloc(sizeof(struct patb_entry) << ptb_order,
 363                                        GFP_KERNEL);
 364         if (!pseries_partition_tb) {
 365                 pr_err("kvm-hv: failed to allocated nested partition table\n");
 366                 return -ENOMEM;
 367         }
 368 
 369         ptcr = __pa(pseries_partition_tb) | (ptb_order - 8);
 370         rc = plpar_hcall_norets(H_SET_PARTITION_TABLE, ptcr);
 371         if (rc != H_SUCCESS) {
 372                 pr_err("kvm-hv: Parent hypervisor does not support nesting (rc=%ld)\n",
 373                        rc);
 374                 kfree(pseries_partition_tb);
 375                 pseries_partition_tb = NULL;
 376                 return -ENODEV;
 377         }
 378 
 379         return 0;
 380 }
 381 
 382 void kvmhv_nested_exit(void)
 383 {
 384         /*
 385          * N.B. the kvmhv_on_pseries() test is there because it enables
 386          * the compiler to remove the call to plpar_hcall_norets()
 387          * when CONFIG_PPC_PSERIES=n.
 388          */
 389         if (kvmhv_on_pseries() && pseries_partition_tb) {
 390                 plpar_hcall_norets(H_SET_PARTITION_TABLE, 0);
 391                 kfree(pseries_partition_tb);
 392                 pseries_partition_tb = NULL;
 393         }
 394 }
 395 
 396 static void kvmhv_flush_lpid(unsigned int lpid)
 397 {
 398         long rc;
 399 
 400         if (!kvmhv_on_pseries()) {
 401                 radix__flush_all_lpid(lpid);
 402                 return;
 403         }
 404 
 405         rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
 406                                 lpid, TLBIEL_INVAL_SET_LPID);
 407         if (rc)
 408                 pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc);
 409 }
 410 
 411 void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1)
 412 {
 413         if (!kvmhv_on_pseries()) {
 414                 mmu_partition_table_set_entry(lpid, dw0, dw1, true);
 415                 return;
 416         }
 417 
 418         pseries_partition_tb[lpid].patb0 = cpu_to_be64(dw0);
 419         pseries_partition_tb[lpid].patb1 = cpu_to_be64(dw1);
 420         /* L0 will do the necessary barriers */
 421         kvmhv_flush_lpid(lpid);
 422 }
 423 
 424 static void kvmhv_set_nested_ptbl(struct kvm_nested_guest *gp)
 425 {
 426         unsigned long dw0;
 427 
 428         dw0 = PATB_HR | radix__get_tree_size() |
 429                 __pa(gp->shadow_pgtable) | RADIX_PGD_INDEX_SIZE;
 430         kvmhv_set_ptbl_entry(gp->shadow_lpid, dw0, gp->process_table);
 431 }
 432 
 433 void kvmhv_vm_nested_init(struct kvm *kvm)
 434 {
 435         kvm->arch.max_nested_lpid = -1;
 436 }
 437 
 438 /*
 439  * Handle the H_SET_PARTITION_TABLE hcall.
 440  * r4 = guest real address of partition table + log_2(size) - 12
 441  * (formatted as for the PTCR).
 442  */
 443 long kvmhv_set_partition_table(struct kvm_vcpu *vcpu)
 444 {
 445         struct kvm *kvm = vcpu->kvm;
 446         unsigned long ptcr = kvmppc_get_gpr(vcpu, 4);
 447         int srcu_idx;
 448         long ret = H_SUCCESS;
 449 
 450         srcu_idx = srcu_read_lock(&kvm->srcu);
 451         /*
 452          * Limit the partition table to 4096 entries (because that's what
 453          * hardware supports), and check the base address.
 454          */
 455         if ((ptcr & PRTS_MASK) > 12 - 8 ||
 456             !kvm_is_visible_gfn(vcpu->kvm, (ptcr & PRTB_MASK) >> PAGE_SHIFT))
 457                 ret = H_PARAMETER;
 458         srcu_read_unlock(&kvm->srcu, srcu_idx);
 459         if (ret == H_SUCCESS)
 460                 kvm->arch.l1_ptcr = ptcr;
 461         return ret;
 462 }
 463 
 464 /*
 465  * Handle the H_COPY_TOFROM_GUEST hcall.
 466  * r4 = L1 lpid of nested guest
 467  * r5 = pid
 468  * r6 = eaddr to access
 469  * r7 = to buffer (L1 gpa)
 470  * r8 = from buffer (L1 gpa)
 471  * r9 = n bytes to copy
 472  */
 473 long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu)
 474 {
 475         struct kvm_nested_guest *gp;
 476         int l1_lpid = kvmppc_get_gpr(vcpu, 4);
 477         int pid = kvmppc_get_gpr(vcpu, 5);
 478         gva_t eaddr = kvmppc_get_gpr(vcpu, 6);
 479         gpa_t gp_to = (gpa_t) kvmppc_get_gpr(vcpu, 7);
 480         gpa_t gp_from = (gpa_t) kvmppc_get_gpr(vcpu, 8);
 481         void *buf;
 482         unsigned long n = kvmppc_get_gpr(vcpu, 9);
 483         bool is_load = !!gp_to;
 484         long rc;
 485 
 486         if (gp_to && gp_from) /* One must be NULL to determine the direction */
 487                 return H_PARAMETER;
 488 
 489         if (eaddr & (0xFFFUL << 52))
 490                 return H_PARAMETER;
 491 
 492         buf = kzalloc(n, GFP_KERNEL);
 493         if (!buf)
 494                 return H_NO_MEM;
 495 
 496         gp = kvmhv_get_nested(vcpu->kvm, l1_lpid, false);
 497         if (!gp) {
 498                 rc = H_PARAMETER;
 499                 goto out_free;
 500         }
 501 
 502         mutex_lock(&gp->tlb_lock);
 503 
 504         if (is_load) {
 505                 /* Load from the nested guest into our buffer */
 506                 rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid,
 507                                                      eaddr, buf, NULL, n);
 508                 if (rc)
 509                         goto not_found;
 510 
 511                 /* Write what was loaded into our buffer back to the L1 guest */
 512                 rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n);
 513                 if (rc)
 514                         goto not_found;
 515         } else {
 516                 /* Load the data to be stored from the L1 guest into our buf */
 517                 rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n);
 518                 if (rc)
 519                         goto not_found;
 520 
 521                 /* Store from our buffer into the nested guest */
 522                 rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid,
 523                                                      eaddr, NULL, buf, n);
 524                 if (rc)
 525                         goto not_found;
 526         }
 527 
 528 out_unlock:
 529         mutex_unlock(&gp->tlb_lock);
 530         kvmhv_put_nested(gp);
 531 out_free:
 532         kfree(buf);
 533         return rc;
 534 not_found:
 535         rc = H_NOT_FOUND;
 536         goto out_unlock;
 537 }
 538 
 539 /*
 540  * Reload the partition table entry for a guest.
 541  * Caller must hold gp->tlb_lock.
 542  */
 543 static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp)
 544 {
 545         int ret;
 546         struct patb_entry ptbl_entry;
 547         unsigned long ptbl_addr;
 548         struct kvm *kvm = gp->l1_host;
 549 
 550         ret = -EFAULT;
 551         ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4);
 552         if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8)))
 553                 ret = kvm_read_guest(kvm, ptbl_addr,
 554                                      &ptbl_entry, sizeof(ptbl_entry));
 555         if (ret) {
 556                 gp->l1_gr_to_hr = 0;
 557                 gp->process_table = 0;
 558         } else {
 559                 gp->l1_gr_to_hr = be64_to_cpu(ptbl_entry.patb0);
 560                 gp->process_table = be64_to_cpu(ptbl_entry.patb1);
 561         }
 562         kvmhv_set_nested_ptbl(gp);
 563 }
 564 
 565 struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid)
 566 {
 567         struct kvm_nested_guest *gp;
 568         long shadow_lpid;
 569 
 570         gp = kzalloc(sizeof(*gp), GFP_KERNEL);
 571         if (!gp)
 572                 return NULL;
 573         gp->l1_host = kvm;
 574         gp->l1_lpid = lpid;
 575         mutex_init(&gp->tlb_lock);
 576         gp->shadow_pgtable = pgd_alloc(kvm->mm);
 577         if (!gp->shadow_pgtable)
 578                 goto out_free;
 579         shadow_lpid = kvmppc_alloc_lpid();
 580         if (shadow_lpid < 0)
 581                 goto out_free2;
 582         gp->shadow_lpid = shadow_lpid;
 583         gp->radix = 1;
 584 
 585         memset(gp->prev_cpu, -1, sizeof(gp->prev_cpu));
 586 
 587         return gp;
 588 
 589  out_free2:
 590         pgd_free(kvm->mm, gp->shadow_pgtable);
 591  out_free:
 592         kfree(gp);
 593         return NULL;
 594 }
 595 
 596 /*
 597  * Free up any resources allocated for a nested guest.
 598  */
 599 static void kvmhv_release_nested(struct kvm_nested_guest *gp)
 600 {
 601         struct kvm *kvm = gp->l1_host;
 602 
 603         if (gp->shadow_pgtable) {
 604                 /*
 605                  * No vcpu is using this struct and no call to
 606                  * kvmhv_get_nested can find this struct,
 607                  * so we don't need to hold kvm->mmu_lock.
 608                  */
 609                 kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
 610                                           gp->shadow_lpid);
 611                 pgd_free(kvm->mm, gp->shadow_pgtable);
 612         }
 613         kvmhv_set_ptbl_entry(gp->shadow_lpid, 0, 0);
 614         kvmppc_free_lpid(gp->shadow_lpid);
 615         kfree(gp);
 616 }
 617 
 618 static void kvmhv_remove_nested(struct kvm_nested_guest *gp)
 619 {
 620         struct kvm *kvm = gp->l1_host;
 621         int lpid = gp->l1_lpid;
 622         long ref;
 623 
 624         spin_lock(&kvm->mmu_lock);
 625         if (gp == kvm->arch.nested_guests[lpid]) {
 626                 kvm->arch.nested_guests[lpid] = NULL;
 627                 if (lpid == kvm->arch.max_nested_lpid) {
 628                         while (--lpid >= 0 && !kvm->arch.nested_guests[lpid])
 629                                 ;
 630                         kvm->arch.max_nested_lpid = lpid;
 631                 }
 632                 --gp->refcnt;
 633         }
 634         ref = gp->refcnt;
 635         spin_unlock(&kvm->mmu_lock);
 636         if (ref == 0)
 637                 kvmhv_release_nested(gp);
 638 }
 639 
 640 /*
 641  * Free up all nested resources allocated for this guest.
 642  * This is called with no vcpus of the guest running, when
 643  * switching the guest to HPT mode or when destroying the
 644  * guest.
 645  */
 646 void kvmhv_release_all_nested(struct kvm *kvm)
 647 {
 648         int i;
 649         struct kvm_nested_guest *gp;
 650         struct kvm_nested_guest *freelist = NULL;
 651         struct kvm_memory_slot *memslot;
 652         int srcu_idx;
 653 
 654         spin_lock(&kvm->mmu_lock);
 655         for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
 656                 gp = kvm->arch.nested_guests[i];
 657                 if (!gp)
 658                         continue;
 659                 kvm->arch.nested_guests[i] = NULL;
 660                 if (--gp->refcnt == 0) {
 661                         gp->next = freelist;
 662                         freelist = gp;
 663                 }
 664         }
 665         kvm->arch.max_nested_lpid = -1;
 666         spin_unlock(&kvm->mmu_lock);
 667         while ((gp = freelist) != NULL) {
 668                 freelist = gp->next;
 669                 kvmhv_release_nested(gp);
 670         }
 671 
 672         srcu_idx = srcu_read_lock(&kvm->srcu);
 673         kvm_for_each_memslot(memslot, kvm_memslots(kvm))
 674                 kvmhv_free_memslot_nest_rmap(memslot);
 675         srcu_read_unlock(&kvm->srcu, srcu_idx);
 676 }
 677 
 678 /* caller must hold gp->tlb_lock */
 679 static void kvmhv_flush_nested(struct kvm_nested_guest *gp)
 680 {
 681         struct kvm *kvm = gp->l1_host;
 682 
 683         spin_lock(&kvm->mmu_lock);
 684         kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable, gp->shadow_lpid);
 685         spin_unlock(&kvm->mmu_lock);
 686         kvmhv_flush_lpid(gp->shadow_lpid);
 687         kvmhv_update_ptbl_cache(gp);
 688         if (gp->l1_gr_to_hr == 0)
 689                 kvmhv_remove_nested(gp);
 690 }
 691 
 692 struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid,
 693                                           bool create)
 694 {
 695         struct kvm_nested_guest *gp, *newgp;
 696 
 697         if (l1_lpid >= KVM_MAX_NESTED_GUESTS ||
 698             l1_lpid >= (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4)))
 699                 return NULL;
 700 
 701         spin_lock(&kvm->mmu_lock);
 702         gp = kvm->arch.nested_guests[l1_lpid];
 703         if (gp)
 704                 ++gp->refcnt;
 705         spin_unlock(&kvm->mmu_lock);
 706 
 707         if (gp || !create)
 708                 return gp;
 709 
 710         newgp = kvmhv_alloc_nested(kvm, l1_lpid);
 711         if (!newgp)
 712                 return NULL;
 713         spin_lock(&kvm->mmu_lock);
 714         if (kvm->arch.nested_guests[l1_lpid]) {
 715                 /* someone else beat us to it */
 716                 gp = kvm->arch.nested_guests[l1_lpid];
 717         } else {
 718                 kvm->arch.nested_guests[l1_lpid] = newgp;
 719                 ++newgp->refcnt;
 720                 gp = newgp;
 721                 newgp = NULL;
 722                 if (l1_lpid > kvm->arch.max_nested_lpid)
 723                         kvm->arch.max_nested_lpid = l1_lpid;
 724         }
 725         ++gp->refcnt;
 726         spin_unlock(&kvm->mmu_lock);
 727 
 728         if (newgp)
 729                 kvmhv_release_nested(newgp);
 730 
 731         return gp;
 732 }
 733 
 734 void kvmhv_put_nested(struct kvm_nested_guest *gp)
 735 {
 736         struct kvm *kvm = gp->l1_host;
 737         long ref;
 738 
 739         spin_lock(&kvm->mmu_lock);
 740         ref = --gp->refcnt;
 741         spin_unlock(&kvm->mmu_lock);
 742         if (ref == 0)
 743                 kvmhv_release_nested(gp);
 744 }
 745 
 746 static struct kvm_nested_guest *kvmhv_find_nested(struct kvm *kvm, int lpid)
 747 {
 748         if (lpid > kvm->arch.max_nested_lpid)
 749                 return NULL;
 750         return kvm->arch.nested_guests[lpid];
 751 }
 752 
 753 static inline bool kvmhv_n_rmap_is_equal(u64 rmap_1, u64 rmap_2)
 754 {
 755         return !((rmap_1 ^ rmap_2) & (RMAP_NESTED_LPID_MASK |
 756                                        RMAP_NESTED_GPA_MASK));
 757 }
 758 
 759 void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp,
 760                             struct rmap_nested **n_rmap)
 761 {
 762         struct llist_node *entry = ((struct llist_head *) rmapp)->first;
 763         struct rmap_nested *cursor;
 764         u64 rmap, new_rmap = (*n_rmap)->rmap;
 765 
 766         /* Are there any existing entries? */
 767         if (!(*rmapp)) {
 768                 /* No -> use the rmap as a single entry */
 769                 *rmapp = new_rmap | RMAP_NESTED_IS_SINGLE_ENTRY;
 770                 return;
 771         }
 772 
 773         /* Do any entries match what we're trying to insert? */
 774         for_each_nest_rmap_safe(cursor, entry, &rmap) {
 775                 if (kvmhv_n_rmap_is_equal(rmap, new_rmap))
 776                         return;
 777         }
 778 
 779         /* Do we need to create a list or just add the new entry? */
 780         rmap = *rmapp;
 781         if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */
 782                 *rmapp = 0UL;
 783         llist_add(&((*n_rmap)->list), (struct llist_head *) rmapp);
 784         if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */
 785                 (*n_rmap)->list.next = (struct llist_node *) rmap;
 786 
 787         /* Set NULL so not freed by caller */
 788         *n_rmap = NULL;
 789 }
 790 
 791 static void kvmhv_update_nest_rmap_rc(struct kvm *kvm, u64 n_rmap,
 792                                       unsigned long clr, unsigned long set,
 793                                       unsigned long hpa, unsigned long mask)
 794 {
 795         struct kvm_nested_guest *gp;
 796         unsigned long gpa;
 797         unsigned int shift, lpid;
 798         pte_t *ptep;
 799 
 800         gpa = n_rmap & RMAP_NESTED_GPA_MASK;
 801         lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT;
 802         gp = kvmhv_find_nested(kvm, lpid);
 803         if (!gp)
 804                 return;
 805 
 806         /* Find the pte */
 807         ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift);
 808         /*
 809          * If the pte is present and the pfn is still the same, update the pte.
 810          * If the pfn has changed then this is a stale rmap entry, the nested
 811          * gpa actually points somewhere else now, and there is nothing to do.
 812          * XXX A future optimisation would be to remove the rmap entry here.
 813          */
 814         if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa)) {
 815                 __radix_pte_update(ptep, clr, set);
 816                 kvmppc_radix_tlbie_page(kvm, gpa, shift, lpid);
 817         }
 818 }
 819 
 820 /*
 821  * For a given list of rmap entries, update the rc bits in all ptes in shadow
 822  * page tables for nested guests which are referenced by the rmap list.
 823  */
 824 void kvmhv_update_nest_rmap_rc_list(struct kvm *kvm, unsigned long *rmapp,
 825                                     unsigned long clr, unsigned long set,
 826                                     unsigned long hpa, unsigned long nbytes)
 827 {
 828         struct llist_node *entry = ((struct llist_head *) rmapp)->first;
 829         struct rmap_nested *cursor;
 830         unsigned long rmap, mask;
 831 
 832         if ((clr | set) & ~(_PAGE_DIRTY | _PAGE_ACCESSED))
 833                 return;
 834 
 835         mask = PTE_RPN_MASK & ~(nbytes - 1);
 836         hpa &= mask;
 837 
 838         for_each_nest_rmap_safe(cursor, entry, &rmap)
 839                 kvmhv_update_nest_rmap_rc(kvm, rmap, clr, set, hpa, mask);
 840 }
 841 
 842 static void kvmhv_remove_nest_rmap(struct kvm *kvm, u64 n_rmap,
 843                                    unsigned long hpa, unsigned long mask)
 844 {
 845         struct kvm_nested_guest *gp;
 846         unsigned long gpa;
 847         unsigned int shift, lpid;
 848         pte_t *ptep;
 849 
 850         gpa = n_rmap & RMAP_NESTED_GPA_MASK;
 851         lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT;
 852         gp = kvmhv_find_nested(kvm, lpid);
 853         if (!gp)
 854                 return;
 855 
 856         /* Find and invalidate the pte */
 857         ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift);
 858         /* Don't spuriously invalidate ptes if the pfn has changed */
 859         if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa))
 860                 kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
 861 }
 862 
 863 static void kvmhv_remove_nest_rmap_list(struct kvm *kvm, unsigned long *rmapp,
 864                                         unsigned long hpa, unsigned long mask)
 865 {
 866         struct llist_node *entry = llist_del_all((struct llist_head *) rmapp);
 867         struct rmap_nested *cursor;
 868         unsigned long rmap;
 869 
 870         for_each_nest_rmap_safe(cursor, entry, &rmap) {
 871                 kvmhv_remove_nest_rmap(kvm, rmap, hpa, mask);
 872                 kfree(cursor);
 873         }
 874 }
 875 
 876 /* called with kvm->mmu_lock held */
 877 void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
 878                                   const struct kvm_memory_slot *memslot,
 879                                   unsigned long gpa, unsigned long hpa,
 880                                   unsigned long nbytes)
 881 {
 882         unsigned long gfn, end_gfn;
 883         unsigned long addr_mask;
 884 
 885         if (!memslot)
 886                 return;
 887         gfn = (gpa >> PAGE_SHIFT) - memslot->base_gfn;
 888         end_gfn = gfn + (nbytes >> PAGE_SHIFT);
 889 
 890         addr_mask = PTE_RPN_MASK & ~(nbytes - 1);
 891         hpa &= addr_mask;
 892 
 893         for (; gfn < end_gfn; gfn++) {
 894                 unsigned long *rmap = &memslot->arch.rmap[gfn];
 895                 kvmhv_remove_nest_rmap_list(kvm, rmap, hpa, addr_mask);
 896         }
 897 }
 898 
 899 static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free)
 900 {
 901         unsigned long page;
 902 
 903         for (page = 0; page < free->npages; page++) {
 904                 unsigned long rmap, *rmapp = &free->arch.rmap[page];
 905                 struct rmap_nested *cursor;
 906                 struct llist_node *entry;
 907 
 908                 entry = llist_del_all((struct llist_head *) rmapp);
 909                 for_each_nest_rmap_safe(cursor, entry, &rmap)
 910                         kfree(cursor);
 911         }
 912 }
 913 
 914 static bool kvmhv_invalidate_shadow_pte(struct kvm_vcpu *vcpu,
 915                                         struct kvm_nested_guest *gp,
 916                                         long gpa, int *shift_ret)
 917 {
 918         struct kvm *kvm = vcpu->kvm;
 919         bool ret = false;
 920         pte_t *ptep;
 921         int shift;
 922 
 923         spin_lock(&kvm->mmu_lock);
 924         ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift);
 925         if (!shift)
 926                 shift = PAGE_SHIFT;
 927         if (ptep && pte_present(*ptep)) {
 928                 kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
 929                 ret = true;
 930         }
 931         spin_unlock(&kvm->mmu_lock);
 932 
 933         if (shift_ret)
 934                 *shift_ret = shift;
 935         return ret;
 936 }
 937 
 938 static inline int get_ric(unsigned int instr)
 939 {
 940         return (instr >> 18) & 0x3;
 941 }
 942 
 943 static inline int get_prs(unsigned int instr)
 944 {
 945         return (instr >> 17) & 0x1;
 946 }
 947 
 948 static inline int get_r(unsigned int instr)
 949 {
 950         return (instr >> 16) & 0x1;
 951 }
 952 
 953 static inline int get_lpid(unsigned long r_val)
 954 {
 955         return r_val & 0xffffffff;
 956 }
 957 
 958 static inline int get_is(unsigned long r_val)
 959 {
 960         return (r_val >> 10) & 0x3;
 961 }
 962 
 963 static inline int get_ap(unsigned long r_val)
 964 {
 965         return (r_val >> 5) & 0x7;
 966 }
 967 
 968 static inline long get_epn(unsigned long r_val)
 969 {
 970         return r_val >> 12;
 971 }
 972 
 973 static int kvmhv_emulate_tlbie_tlb_addr(struct kvm_vcpu *vcpu, int lpid,
 974                                         int ap, long epn)
 975 {
 976         struct kvm *kvm = vcpu->kvm;
 977         struct kvm_nested_guest *gp;
 978         long npages;
 979         int shift, shadow_shift;
 980         unsigned long addr;
 981 
 982         shift = ap_to_shift(ap);
 983         addr = epn << 12;
 984         if (shift < 0)
 985                 /* Invalid ap encoding */
 986                 return -EINVAL;
 987 
 988         addr &= ~((1UL << shift) - 1);
 989         npages = 1UL << (shift - PAGE_SHIFT);
 990 
 991         gp = kvmhv_get_nested(kvm, lpid, false);
 992         if (!gp) /* No such guest -> nothing to do */
 993                 return 0;
 994         mutex_lock(&gp->tlb_lock);
 995 
 996         /* There may be more than one host page backing this single guest pte */
 997         do {
 998                 kvmhv_invalidate_shadow_pte(vcpu, gp, addr, &shadow_shift);
 999 
1000                 npages -= 1UL << (shadow_shift - PAGE_SHIFT);
1001                 addr += 1UL << shadow_shift;
1002         } while (npages > 0);
1003 
1004         mutex_unlock(&gp->tlb_lock);
1005         kvmhv_put_nested(gp);
1006         return 0;
1007 }
1008 
1009 static void kvmhv_emulate_tlbie_lpid(struct kvm_vcpu *vcpu,
1010                                      struct kvm_nested_guest *gp, int ric)
1011 {
1012         struct kvm *kvm = vcpu->kvm;
1013 
1014         mutex_lock(&gp->tlb_lock);
1015         switch (ric) {
1016         case 0:
1017                 /* Invalidate TLB */
1018                 spin_lock(&kvm->mmu_lock);
1019                 kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
1020                                           gp->shadow_lpid);
1021                 kvmhv_flush_lpid(gp->shadow_lpid);
1022                 spin_unlock(&kvm->mmu_lock);
1023                 break;
1024         case 1:
1025                 /*
1026                  * Invalidate PWC
1027                  * We don't cache this -> nothing to do
1028                  */
1029                 break;
1030         case 2:
1031                 /* Invalidate TLB, PWC and caching of partition table entries */
1032                 kvmhv_flush_nested(gp);
1033                 break;
1034         default:
1035                 break;
1036         }
1037         mutex_unlock(&gp->tlb_lock);
1038 }
1039 
1040 static void kvmhv_emulate_tlbie_all_lpid(struct kvm_vcpu *vcpu, int ric)
1041 {
1042         struct kvm *kvm = vcpu->kvm;
1043         struct kvm_nested_guest *gp;
1044         int i;
1045 
1046         spin_lock(&kvm->mmu_lock);
1047         for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
1048                 gp = kvm->arch.nested_guests[i];
1049                 if (gp) {
1050                         spin_unlock(&kvm->mmu_lock);
1051                         kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
1052                         spin_lock(&kvm->mmu_lock);
1053                 }
1054         }
1055         spin_unlock(&kvm->mmu_lock);
1056 }
1057 
1058 static int kvmhv_emulate_priv_tlbie(struct kvm_vcpu *vcpu, unsigned int instr,
1059                                     unsigned long rsval, unsigned long rbval)
1060 {
1061         struct kvm *kvm = vcpu->kvm;
1062         struct kvm_nested_guest *gp;
1063         int r, ric, prs, is, ap;
1064         int lpid;
1065         long epn;
1066         int ret = 0;
1067 
1068         ric = get_ric(instr);
1069         prs = get_prs(instr);
1070         r = get_r(instr);
1071         lpid = get_lpid(rsval);
1072         is = get_is(rbval);
1073 
1074         /*
1075          * These cases are invalid and are not handled:
1076          * r   != 1 -> Only radix supported
1077          * prs == 1 -> Not HV privileged
1078          * ric == 3 -> No cluster bombs for radix
1079          * is  == 1 -> Partition scoped translations not associated with pid
1080          * (!is) && (ric == 1 || ric == 2) -> Not supported by ISA
1081          */
1082         if ((!r) || (prs) || (ric == 3) || (is == 1) ||
1083             ((!is) && (ric == 1 || ric == 2)))
1084                 return -EINVAL;
1085 
1086         switch (is) {
1087         case 0:
1088                 /*
1089                  * We know ric == 0
1090                  * Invalidate TLB for a given target address
1091                  */
1092                 epn = get_epn(rbval);
1093                 ap = get_ap(rbval);
1094                 ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap, epn);
1095                 break;
1096         case 2:
1097                 /* Invalidate matching LPID */
1098                 gp = kvmhv_get_nested(kvm, lpid, false);
1099                 if (gp) {
1100                         kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
1101                         kvmhv_put_nested(gp);
1102                 }
1103                 break;
1104         case 3:
1105                 /* Invalidate ALL LPIDs */
1106                 kvmhv_emulate_tlbie_all_lpid(vcpu, ric);
1107                 break;
1108         default:
1109                 ret = -EINVAL;
1110                 break;
1111         }
1112 
1113         return ret;
1114 }
1115 
1116 /*
1117  * This handles the H_TLB_INVALIDATE hcall.
1118  * Parameters are (r4) tlbie instruction code, (r5) rS contents,
1119  * (r6) rB contents.
1120  */
1121 long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu)
1122 {
1123         int ret;
1124 
1125         ret = kvmhv_emulate_priv_tlbie(vcpu, kvmppc_get_gpr(vcpu, 4),
1126                         kvmppc_get_gpr(vcpu, 5), kvmppc_get_gpr(vcpu, 6));
1127         if (ret)
1128                 return H_PARAMETER;
1129         return H_SUCCESS;
1130 }
1131 
1132 /* Used to convert a nested guest real address to a L1 guest real address */
1133 static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
1134                                        struct kvm_nested_guest *gp,
1135                                        unsigned long n_gpa, unsigned long dsisr,
1136                                        struct kvmppc_pte *gpte_p)
1137 {
1138         u64 fault_addr, flags = dsisr & DSISR_ISSTORE;
1139         int ret;
1140 
1141         ret = kvmppc_mmu_walk_radix_tree(vcpu, n_gpa, gpte_p, gp->l1_gr_to_hr,
1142                                          &fault_addr);
1143 
1144         if (ret) {
1145                 /* We didn't find a pte */
1146                 if (ret == -EINVAL) {
1147                         /* Unsupported mmu config */
1148                         flags |= DSISR_UNSUPP_MMU;
1149                 } else if (ret == -ENOENT) {
1150                         /* No translation found */
1151                         flags |= DSISR_NOHPTE;
1152                 } else if (ret == -EFAULT) {
1153                         /* Couldn't access L1 real address */
1154                         flags |= DSISR_PRTABLE_FAULT;
1155                         vcpu->arch.fault_gpa = fault_addr;
1156                 } else {
1157                         /* Unknown error */
1158                         return ret;
1159                 }
1160                 goto forward_to_l1;
1161         } else {
1162                 /* We found a pte -> check permissions */
1163                 if (dsisr & DSISR_ISSTORE) {
1164                         /* Can we write? */
1165                         if (!gpte_p->may_write) {
1166                                 flags |= DSISR_PROTFAULT;
1167                                 goto forward_to_l1;
1168                         }
1169                 } else if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
1170                         /* Can we execute? */
1171                         if (!gpte_p->may_execute) {
1172                                 flags |= SRR1_ISI_N_OR_G;
1173                                 goto forward_to_l1;
1174                         }
1175                 } else {
1176                         /* Can we read? */
1177                         if (!gpte_p->may_read && !gpte_p->may_write) {
1178                                 flags |= DSISR_PROTFAULT;
1179                                 goto forward_to_l1;
1180                         }
1181                 }
1182         }
1183 
1184         return 0;
1185 
1186 forward_to_l1:
1187         vcpu->arch.fault_dsisr = flags;
1188         if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
1189                 vcpu->arch.shregs.msr &= ~0x783f0000ul;
1190                 vcpu->arch.shregs.msr |= flags;
1191         }
1192         return RESUME_HOST;
1193 }
1194 
1195 static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu,
1196                                        struct kvm_nested_guest *gp,
1197                                        unsigned long n_gpa,
1198                                        struct kvmppc_pte gpte,
1199                                        unsigned long dsisr)
1200 {
1201         struct kvm *kvm = vcpu->kvm;
1202         bool writing = !!(dsisr & DSISR_ISSTORE);
1203         u64 pgflags;
1204         long ret;
1205 
1206         /* Are the rc bits set in the L1 partition scoped pte? */
1207         pgflags = _PAGE_ACCESSED;
1208         if (writing)
1209                 pgflags |= _PAGE_DIRTY;
1210         if (pgflags & ~gpte.rc)
1211                 return RESUME_HOST;
1212 
1213         spin_lock(&kvm->mmu_lock);
1214         /* Set the rc bit in the pte of our (L0) pgtable for the L1 guest */
1215         ret = kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable, writing,
1216                                      gpte.raddr, kvm->arch.lpid);
1217         if (!ret) {
1218                 ret = -EINVAL;
1219                 goto out_unlock;
1220         }
1221 
1222         /* Set the rc bit in the pte of the shadow_pgtable for the nest guest */
1223         ret = kvmppc_hv_handle_set_rc(kvm, gp->shadow_pgtable, writing, n_gpa,
1224                                       gp->shadow_lpid);
1225         if (!ret)
1226                 ret = -EINVAL;
1227         else
1228                 ret = 0;
1229 
1230 out_unlock:
1231         spin_unlock(&kvm->mmu_lock);
1232         return ret;
1233 }
1234 
1235 static inline int kvmppc_radix_level_to_shift(int level)
1236 {
1237         switch (level) {
1238         case 2:
1239                 return PUD_SHIFT;
1240         case 1:
1241                 return PMD_SHIFT;
1242         default:
1243                 return PAGE_SHIFT;
1244         }
1245 }
1246 
1247 static inline int kvmppc_radix_shift_to_level(int shift)
1248 {
1249         if (shift == PUD_SHIFT)
1250                 return 2;
1251         if (shift == PMD_SHIFT)
1252                 return 1;
1253         if (shift == PAGE_SHIFT)
1254                 return 0;
1255         WARN_ON_ONCE(1);
1256         return 0;
1257 }
1258 
1259 /* called with gp->tlb_lock held */
1260 static long int __kvmhv_nested_page_fault(struct kvm_run *run,
1261                                           struct kvm_vcpu *vcpu,
1262                                           struct kvm_nested_guest *gp)
1263 {
1264         struct kvm *kvm = vcpu->kvm;
1265         struct kvm_memory_slot *memslot;
1266         struct rmap_nested *n_rmap;
1267         struct kvmppc_pte gpte;
1268         pte_t pte, *pte_p;
1269         unsigned long mmu_seq;
1270         unsigned long dsisr = vcpu->arch.fault_dsisr;
1271         unsigned long ea = vcpu->arch.fault_dar;
1272         unsigned long *rmapp;
1273         unsigned long n_gpa, gpa, gfn, perm = 0UL;
1274         unsigned int shift, l1_shift, level;
1275         bool writing = !!(dsisr & DSISR_ISSTORE);
1276         bool kvm_ro = false;
1277         long int ret;
1278 
1279         if (!gp->l1_gr_to_hr) {
1280                 kvmhv_update_ptbl_cache(gp);
1281                 if (!gp->l1_gr_to_hr)
1282                         return RESUME_HOST;
1283         }
1284 
1285         /* Convert the nested guest real address into a L1 guest real address */
1286 
1287         n_gpa = vcpu->arch.fault_gpa & ~0xF000000000000FFFULL;
1288         if (!(dsisr & DSISR_PRTABLE_FAULT))
1289                 n_gpa |= ea & 0xFFF;
1290         ret = kvmhv_translate_addr_nested(vcpu, gp, n_gpa, dsisr, &gpte);
1291 
1292         /*
1293          * If the hardware found a translation but we don't now have a usable
1294          * translation in the l1 partition-scoped tree, remove the shadow pte
1295          * and let the guest retry.
1296          */
1297         if (ret == RESUME_HOST &&
1298             (dsisr & (DSISR_PROTFAULT | DSISR_BADACCESS | DSISR_NOEXEC_OR_G |
1299                       DSISR_BAD_COPYPASTE)))
1300                 goto inval;
1301         if (ret)
1302                 return ret;
1303 
1304         /* Failed to set the reference/change bits */
1305         if (dsisr & DSISR_SET_RC) {
1306                 ret = kvmhv_handle_nested_set_rc(vcpu, gp, n_gpa, gpte, dsisr);
1307                 if (ret == RESUME_HOST)
1308                         return ret;
1309                 if (ret)
1310                         goto inval;
1311                 dsisr &= ~DSISR_SET_RC;
1312                 if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
1313                                DSISR_PROTFAULT)))
1314                         return RESUME_GUEST;
1315         }
1316 
1317         /*
1318          * We took an HISI or HDSI while we were running a nested guest which
1319          * means we have no partition scoped translation for that. This means
1320          * we need to insert a pte for the mapping into our shadow_pgtable.
1321          */
1322 
1323         l1_shift = gpte.page_shift;
1324         if (l1_shift < PAGE_SHIFT) {
1325                 /* We don't support l1 using a page size smaller than our own */
1326                 pr_err("KVM: L1 guest page shift (%d) less than our own (%d)\n",
1327                         l1_shift, PAGE_SHIFT);
1328                 return -EINVAL;
1329         }
1330         gpa = gpte.raddr;
1331         gfn = gpa >> PAGE_SHIFT;
1332 
1333         /* 1. Get the corresponding host memslot */
1334 
1335         memslot = gfn_to_memslot(kvm, gfn);
1336         if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
1337                 if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS)) {
1338                         /* unusual error -> reflect to the guest as a DSI */
1339                         kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
1340                         return RESUME_GUEST;
1341                 }
1342 
1343                 /* passthrough of emulated MMIO case */
1344                 return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, writing);
1345         }
1346         if (memslot->flags & KVM_MEM_READONLY) {
1347                 if (writing) {
1348                         /* Give the guest a DSI */
1349                         kvmppc_core_queue_data_storage(vcpu, ea,
1350                                         DSISR_ISSTORE | DSISR_PROTFAULT);
1351                         return RESUME_GUEST;
1352                 }
1353                 kvm_ro = true;
1354         }
1355 
1356         /* 2. Find the host pte for this L1 guest real address */
1357 
1358         /* Used to check for invalidations in progress */
1359         mmu_seq = kvm->mmu_notifier_seq;
1360         smp_rmb();
1361 
1362         /* See if can find translation in our partition scoped tables for L1 */
1363         pte = __pte(0);
1364         spin_lock(&kvm->mmu_lock);
1365         pte_p = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
1366         if (!shift)
1367                 shift = PAGE_SHIFT;
1368         if (pte_p)
1369                 pte = *pte_p;
1370         spin_unlock(&kvm->mmu_lock);
1371 
1372         if (!pte_present(pte) || (writing && !(pte_val(pte) & _PAGE_WRITE))) {
1373                 /* No suitable pte found -> try to insert a mapping */
1374                 ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot,
1375                                         writing, kvm_ro, &pte, &level);
1376                 if (ret == -EAGAIN)
1377                         return RESUME_GUEST;
1378                 else if (ret)
1379                         return ret;
1380                 shift = kvmppc_radix_level_to_shift(level);
1381         }
1382         /* Align gfn to the start of the page */
1383         gfn = (gpa & ~((1UL << shift) - 1)) >> PAGE_SHIFT;
1384 
1385         /* 3. Compute the pte we need to insert for nest_gpa -> host r_addr */
1386 
1387         /* The permissions is the combination of the host and l1 guest ptes */
1388         perm |= gpte.may_read ? 0UL : _PAGE_READ;
1389         perm |= gpte.may_write ? 0UL : _PAGE_WRITE;
1390         perm |= gpte.may_execute ? 0UL : _PAGE_EXEC;
1391         /* Only set accessed/dirty (rc) bits if set in host and l1 guest ptes */
1392         perm |= (gpte.rc & _PAGE_ACCESSED) ? 0UL : _PAGE_ACCESSED;
1393         perm |= ((gpte.rc & _PAGE_DIRTY) && writing) ? 0UL : _PAGE_DIRTY;
1394         pte = __pte(pte_val(pte) & ~perm);
1395 
1396         /* What size pte can we insert? */
1397         if (shift > l1_shift) {
1398                 u64 mask;
1399                 unsigned int actual_shift = PAGE_SHIFT;
1400                 if (PMD_SHIFT < l1_shift)
1401                         actual_shift = PMD_SHIFT;
1402                 mask = (1UL << shift) - (1UL << actual_shift);
1403                 pte = __pte(pte_val(pte) | (gpa & mask));
1404                 shift = actual_shift;
1405         }
1406         level = kvmppc_radix_shift_to_level(shift);
1407         n_gpa &= ~((1UL << shift) - 1);
1408 
1409         /* 4. Insert the pte into our shadow_pgtable */
1410 
1411         n_rmap = kzalloc(sizeof(*n_rmap), GFP_KERNEL);
1412         if (!n_rmap)
1413                 return RESUME_GUEST; /* Let the guest try again */
1414         n_rmap->rmap = (n_gpa & RMAP_NESTED_GPA_MASK) |
1415                 (((unsigned long) gp->l1_lpid) << RMAP_NESTED_LPID_SHIFT);
1416         rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
1417         ret = kvmppc_create_pte(kvm, gp->shadow_pgtable, pte, n_gpa, level,
1418                                 mmu_seq, gp->shadow_lpid, rmapp, &n_rmap);
1419         if (n_rmap)
1420                 kfree(n_rmap);
1421         if (ret == -EAGAIN)
1422                 ret = RESUME_GUEST;     /* Let the guest try again */
1423 
1424         return ret;
1425 
1426  inval:
1427         kvmhv_invalidate_shadow_pte(vcpu, gp, n_gpa, NULL);
1428         return RESUME_GUEST;
1429 }
1430 
1431 long int kvmhv_nested_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu)
1432 {
1433         struct kvm_nested_guest *gp = vcpu->arch.nested;
1434         long int ret;
1435 
1436         mutex_lock(&gp->tlb_lock);
1437         ret = __kvmhv_nested_page_fault(run, vcpu, gp);
1438         mutex_unlock(&gp->tlb_lock);
1439         return ret;
1440 }
1441 
1442 int kvmhv_nested_next_lpid(struct kvm *kvm, int lpid)
1443 {
1444         int ret = -1;
1445 
1446         spin_lock(&kvm->mmu_lock);
1447         while (++lpid <= kvm->arch.max_nested_lpid) {
1448                 if (kvm->arch.nested_guests[lpid]) {
1449                         ret = lpid;
1450                         break;
1451                 }
1452         }
1453         spin_unlock(&kvm->mmu_lock);
1454         return ret;
1455 }

/* [<][>][^][v][top][bottom][index][help] */