root/arch/powerpc/mm/ptdump/hashpagetable.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. calculate_pagesize
  2. dump_flag_info
  3. dump_hpte_info
  4. native_find
  5. pseries_find
  6. decode_r
  7. base_hpte_find
  8. hpte_find
  9. walk_pte
  10. walk_pmd
  11. walk_pud
  12. walk_pagetables
  13. walk_linearmapping
  14. walk_vmemmap
  15. populate_markers
  16. ptdump_show
  17. ptdump_open
  18. ptdump_init

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright 2016, Rashmica Gupta, IBM Corp.
   4  *
   5  * This traverses the kernel virtual memory and dumps the pages that are in
   6  * the hash pagetable, along with their flags to
   7  * /sys/kernel/debug/kernel_hash_pagetable.
   8  *
   9  * If radix is enabled then there is no hash page table and so no debugfs file
  10  * is generated.
  11  */
  12 #include <linux/debugfs.h>
  13 #include <linux/fs.h>
  14 #include <linux/io.h>
  15 #include <linux/mm.h>
  16 #include <linux/sched.h>
  17 #include <linux/seq_file.h>
  18 #include <asm/pgtable.h>
  19 #include <linux/const.h>
  20 #include <asm/page.h>
  21 #include <asm/pgalloc.h>
  22 #include <asm/plpar_wrappers.h>
  23 #include <linux/memblock.h>
  24 #include <asm/firmware.h>
  25 
  26 struct pg_state {
  27         struct seq_file *seq;
  28         const struct addr_marker *marker;
  29         unsigned long start_address;
  30         unsigned int level;
  31         u64 current_flags;
  32 };
  33 
  34 struct addr_marker {
  35         unsigned long start_address;
  36         const char *name;
  37 };
  38 
  39 static struct addr_marker address_markers[] = {
  40         { 0,    "Start of kernel VM" },
  41         { 0,    "vmalloc() Area" },
  42         { 0,    "vmalloc() End" },
  43         { 0,    "isa I/O start" },
  44         { 0,    "isa I/O end" },
  45         { 0,    "phb I/O start" },
  46         { 0,    "phb I/O end" },
  47         { 0,    "I/O remap start" },
  48         { 0,    "I/O remap end" },
  49         { 0,    "vmemmap start" },
  50         { -1,   NULL },
  51 };
  52 
  53 struct flag_info {
  54         u64             mask;
  55         u64             val;
  56         const char      *set;
  57         const char      *clear;
  58         bool            is_val;
  59         int             shift;
  60 };
  61 
  62 static const struct flag_info v_flag_array[] = {
  63         {
  64                 .mask   = SLB_VSID_B,
  65                 .val    = SLB_VSID_B_256M,
  66                 .set    = "ssize: 256M",
  67                 .clear  = "ssize: 1T  ",
  68         }, {
  69                 .mask   = HPTE_V_SECONDARY,
  70                 .val    = HPTE_V_SECONDARY,
  71                 .set    = "secondary",
  72                 .clear  = "primary  ",
  73         }, {
  74                 .mask   = HPTE_V_VALID,
  75                 .val    = HPTE_V_VALID,
  76                 .set    = "valid  ",
  77                 .clear  = "invalid",
  78         }, {
  79                 .mask   = HPTE_V_BOLTED,
  80                 .val    = HPTE_V_BOLTED,
  81                 .set    = "bolted",
  82                 .clear  = "",
  83         }
  84 };
  85 
  86 static const struct flag_info r_flag_array[] = {
  87         {
  88                 .mask   = HPTE_R_PP0 | HPTE_R_PP,
  89                 .val    = PP_RWXX,
  90                 .set    = "prot:RW--",
  91         }, {
  92                 .mask   = HPTE_R_PP0 | HPTE_R_PP,
  93                 .val    = PP_RWRX,
  94                 .set    = "prot:RWR-",
  95         }, {
  96                 .mask   = HPTE_R_PP0 | HPTE_R_PP,
  97                 .val    = PP_RWRW,
  98                 .set    = "prot:RWRW",
  99         }, {
 100                 .mask   = HPTE_R_PP0 | HPTE_R_PP,
 101                 .val    = PP_RXRX,
 102                 .set    = "prot:R-R-",
 103         }, {
 104                 .mask   = HPTE_R_PP0 | HPTE_R_PP,
 105                 .val    = PP_RXXX,
 106                 .set    = "prot:R---",
 107         }, {
 108                 .mask   = HPTE_R_KEY_HI | HPTE_R_KEY_LO,
 109                 .val    = HPTE_R_KEY_HI | HPTE_R_KEY_LO,
 110                 .set    = "key",
 111                 .clear  = "",
 112                 .is_val = true,
 113         }, {
 114                 .mask   = HPTE_R_R,
 115                 .val    = HPTE_R_R,
 116                 .set    = "ref",
 117                 .clear  = "   ",
 118         }, {
 119                 .mask   = HPTE_R_C,
 120                 .val    = HPTE_R_C,
 121                 .set    = "changed",
 122                 .clear  = "       ",
 123         }, {
 124                 .mask   = HPTE_R_N,
 125                 .val    = HPTE_R_N,
 126                 .set    = "no execute",
 127         }, {
 128                 .mask   = HPTE_R_WIMG,
 129                 .val    = HPTE_R_W,
 130                 .set    = "writethru",
 131         }, {
 132                 .mask   = HPTE_R_WIMG,
 133                 .val    = HPTE_R_I,
 134                 .set    = "no cache",
 135         }, {
 136                 .mask   = HPTE_R_WIMG,
 137                 .val    = HPTE_R_G,
 138                 .set    = "guarded",
 139         }
 140 };
 141 
 142 static int calculate_pagesize(struct pg_state *st, int ps, char s[])
 143 {
 144         static const char units[] = "BKMGTPE";
 145         const char *unit = units;
 146 
 147         while (ps > 9 && unit[1]) {
 148                 ps -= 10;
 149                 unit++;
 150         }
 151         seq_printf(st->seq, "  %s_ps: %i%c\t", s, 1<<ps, *unit);
 152         return ps;
 153 }
 154 
 155 static void dump_flag_info(struct pg_state *st, const struct flag_info
 156                 *flag, u64 pte, int num)
 157 {
 158         unsigned int i;
 159 
 160         for (i = 0; i < num; i++, flag++) {
 161                 const char *s = NULL;
 162                 u64 val;
 163 
 164                 /* flag not defined so don't check it */
 165                 if (flag->mask == 0)
 166                         continue;
 167                 /* Some 'flags' are actually values */
 168                 if (flag->is_val) {
 169                         val = pte & flag->val;
 170                         if (flag->shift)
 171                                 val = val >> flag->shift;
 172                         seq_printf(st->seq, "  %s:%llx", flag->set, val);
 173                 } else {
 174                         if ((pte & flag->mask) == flag->val)
 175                                 s = flag->set;
 176                         else
 177                                 s = flag->clear;
 178                         if (s)
 179                                 seq_printf(st->seq, "  %s", s);
 180                 }
 181         }
 182 }
 183 
 184 static void dump_hpte_info(struct pg_state *st, unsigned long ea, u64 v, u64 r,
 185                 unsigned long rpn, int bps, int aps, unsigned long lp)
 186 {
 187         int aps_index;
 188 
 189         while (ea >= st->marker[1].start_address) {
 190                 st->marker++;
 191                 seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
 192         }
 193         seq_printf(st->seq, "0x%lx:\t", ea);
 194         seq_printf(st->seq, "AVPN:%llx\t", HPTE_V_AVPN_VAL(v));
 195         dump_flag_info(st, v_flag_array, v, ARRAY_SIZE(v_flag_array));
 196         seq_printf(st->seq, "  rpn: %lx\t", rpn);
 197         dump_flag_info(st, r_flag_array, r, ARRAY_SIZE(r_flag_array));
 198 
 199         calculate_pagesize(st, bps, "base");
 200         aps_index = calculate_pagesize(st, aps, "actual");
 201         if (aps_index != 2)
 202                 seq_printf(st->seq, "LP enc: %lx", lp);
 203         seq_putc(st->seq, '\n');
 204 }
 205 
 206 
 207 static int native_find(unsigned long ea, int psize, bool primary, u64 *v, u64
 208                 *r)
 209 {
 210         struct hash_pte *hptep;
 211         unsigned long hash, vsid, vpn, hpte_group, want_v, hpte_v;
 212         int i, ssize = mmu_kernel_ssize;
 213         unsigned long shift = mmu_psize_defs[psize].shift;
 214 
 215         /* calculate hash */
 216         vsid = get_kernel_vsid(ea, ssize);
 217         vpn  = hpt_vpn(ea, vsid, ssize);
 218         hash = hpt_hash(vpn, shift, ssize);
 219         want_v = hpte_encode_avpn(vpn, psize, ssize);
 220 
 221         /* to check in the secondary hash table, we invert the hash */
 222         if (!primary)
 223                 hash = ~hash;
 224         hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 225         for (i = 0; i < HPTES_PER_GROUP; i++) {
 226                 hptep = htab_address + hpte_group;
 227                 hpte_v = be64_to_cpu(hptep->v);
 228 
 229                 if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
 230                         /* HPTE matches */
 231                         *v = be64_to_cpu(hptep->v);
 232                         *r = be64_to_cpu(hptep->r);
 233                         return 0;
 234                 }
 235                 ++hpte_group;
 236         }
 237         return -1;
 238 }
 239 
 240 static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *r)
 241 {
 242         struct hash_pte ptes[4];
 243         unsigned long vsid, vpn, hash, hpte_group, want_v;
 244         int i, j, ssize = mmu_kernel_ssize;
 245         long lpar_rc = 0;
 246         unsigned long shift = mmu_psize_defs[psize].shift;
 247 
 248         /* calculate hash */
 249         vsid = get_kernel_vsid(ea, ssize);
 250         vpn  = hpt_vpn(ea, vsid, ssize);
 251         hash = hpt_hash(vpn, shift, ssize);
 252         want_v = hpte_encode_avpn(vpn, psize, ssize);
 253 
 254         /* to check in the secondary hash table, we invert the hash */
 255         if (!primary)
 256                 hash = ~hash;
 257         hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
 258         /* see if we can find an entry in the hpte with this hash */
 259         for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) {
 260                 lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes);
 261 
 262                 if (lpar_rc != H_SUCCESS)
 263                         continue;
 264                 for (j = 0; j < 4; j++) {
 265                         if (HPTE_V_COMPARE(ptes[j].v, want_v) &&
 266                                         (ptes[j].v & HPTE_V_VALID)) {
 267                                 /* HPTE matches */
 268                                 *v = ptes[j].v;
 269                                 *r = ptes[j].r;
 270                                 return 0;
 271                         }
 272                 }
 273         }
 274         return -1;
 275 }
 276 
 277 static void decode_r(int bps, unsigned long r, unsigned long *rpn, int *aps,
 278                 unsigned long *lp_bits)
 279 {
 280         struct mmu_psize_def entry;
 281         unsigned long arpn, mask, lp;
 282         int penc = -2, idx = 0, shift;
 283 
 284         /*.
 285          * The LP field has 8 bits. Depending on the actual page size, some of
 286          * these bits are concatenated with the APRN to get the RPN. The rest
 287          * of the bits in the LP field is the LP value and is an encoding for
 288          * the base page size and the actual page size.
 289          *
 290          *  -   find the mmu entry for our base page size
 291          *  -   go through all page encodings and use the associated mask to
 292          *      find an encoding that matches our encoding in the LP field.
 293          */
 294         arpn = (r & HPTE_R_RPN) >> HPTE_R_RPN_SHIFT;
 295         lp = arpn & 0xff;
 296 
 297         entry = mmu_psize_defs[bps];
 298         while (idx < MMU_PAGE_COUNT) {
 299                 penc = entry.penc[idx];
 300                 if ((penc != -1) && (mmu_psize_defs[idx].shift)) {
 301                         shift = mmu_psize_defs[idx].shift -  HPTE_R_RPN_SHIFT;
 302                         mask = (0x1 << (shift)) - 1;
 303                         if ((lp & mask) == penc) {
 304                                 *aps = mmu_psize_to_shift(idx);
 305                                 *lp_bits = lp & mask;
 306                                 *rpn = arpn >> shift;
 307                                 return;
 308                         }
 309                 }
 310                 idx++;
 311         }
 312 }
 313 
 314 static int base_hpte_find(unsigned long ea, int psize, bool primary, u64 *v,
 315                           u64 *r)
 316 {
 317         if (IS_ENABLED(CONFIG_PPC_PSERIES) && firmware_has_feature(FW_FEATURE_LPAR))
 318                 return pseries_find(ea, psize, primary, v, r);
 319 
 320         return native_find(ea, psize, primary, v, r);
 321 }
 322 
 323 static unsigned long hpte_find(struct pg_state *st, unsigned long ea, int psize)
 324 {
 325         unsigned long slot;
 326         u64 v  = 0, r = 0;
 327         unsigned long rpn, lp_bits;
 328         int base_psize = 0, actual_psize = 0;
 329 
 330         if (ea < PAGE_OFFSET)
 331                 return -1;
 332 
 333         /* Look in primary table */
 334         slot = base_hpte_find(ea, psize, true, &v, &r);
 335 
 336         /* Look in secondary table */
 337         if (slot == -1)
 338                 slot = base_hpte_find(ea, psize, false, &v, &r);
 339 
 340         /* No entry found */
 341         if (slot == -1)
 342                 return -1;
 343 
 344         /*
 345          * We found an entry in the hash page table:
 346          *  - check that this has the same base page
 347          *  - find the actual page size
 348          *  - find the RPN
 349          */
 350         base_psize = mmu_psize_to_shift(psize);
 351 
 352         if ((v & HPTE_V_LARGE) == HPTE_V_LARGE) {
 353                 decode_r(psize, r, &rpn, &actual_psize, &lp_bits);
 354         } else {
 355                 /* 4K actual page size */
 356                 actual_psize = 12;
 357                 rpn = (r & HPTE_R_RPN) >> HPTE_R_RPN_SHIFT;
 358                 /* In this case there are no LP bits */
 359                 lp_bits = -1;
 360         }
 361         /*
 362          * We didn't find a matching encoding, so the PTE we found isn't for
 363          * this address.
 364          */
 365         if (actual_psize == -1)
 366                 return -1;
 367 
 368         dump_hpte_info(st, ea, v, r, rpn, base_psize, actual_psize, lp_bits);
 369         return 0;
 370 }
 371 
 372 static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
 373 {
 374         pte_t *pte = pte_offset_kernel(pmd, 0);
 375         unsigned long addr, pteval, psize;
 376         int i, status;
 377 
 378         for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
 379                 addr = start + i * PAGE_SIZE;
 380                 pteval = pte_val(*pte);
 381 
 382                 if (addr < VMALLOC_END)
 383                         psize = mmu_vmalloc_psize;
 384                 else
 385                         psize = mmu_io_psize;
 386 
 387                 /* check for secret 4K mappings */
 388                 if (IS_ENABLED(CONFIG_PPC_64K_PAGES) &&
 389                     ((pteval & H_PAGE_COMBO) == H_PAGE_COMBO ||
 390                      (pteval & H_PAGE_4K_PFN) == H_PAGE_4K_PFN))
 391                         psize = mmu_io_psize;
 392 
 393                 /* check for hashpte */
 394                 status = hpte_find(st, addr, psize);
 395 
 396                 if (((pteval & H_PAGE_HASHPTE) != H_PAGE_HASHPTE)
 397                                 && (status != -1)) {
 398                 /* found a hpte that is not in the linux page tables */
 399                         seq_printf(st->seq, "page probably bolted before linux"
 400                                 " pagetables were set: addr:%lx, pteval:%lx\n",
 401                                 addr, pteval);
 402                 }
 403         }
 404 }
 405 
 406 static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
 407 {
 408         pmd_t *pmd = pmd_offset(pud, 0);
 409         unsigned long addr;
 410         unsigned int i;
 411 
 412         for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
 413                 addr = start + i * PMD_SIZE;
 414                 if (!pmd_none(*pmd))
 415                         /* pmd exists */
 416                         walk_pte(st, pmd, addr);
 417         }
 418 }
 419 
 420 static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
 421 {
 422         pud_t *pud = pud_offset(pgd, 0);
 423         unsigned long addr;
 424         unsigned int i;
 425 
 426         for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
 427                 addr = start + i * PUD_SIZE;
 428                 if (!pud_none(*pud))
 429                         /* pud exists */
 430                         walk_pmd(st, pud, addr);
 431         }
 432 }
 433 
 434 static void walk_pagetables(struct pg_state *st)
 435 {
 436         pgd_t *pgd = pgd_offset_k(0UL);
 437         unsigned int i;
 438         unsigned long addr;
 439 
 440         /*
 441          * Traverse the linux pagetable structure and dump pages that are in
 442          * the hash pagetable.
 443          */
 444         for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
 445                 addr = KERN_VIRT_START + i * PGDIR_SIZE;
 446                 if (!pgd_none(*pgd))
 447                         /* pgd exists */
 448                         walk_pud(st, pgd, addr);
 449         }
 450 }
 451 
 452 
 453 static void walk_linearmapping(struct pg_state *st)
 454 {
 455         unsigned long addr;
 456 
 457         /*
 458          * Traverse the linear mapping section of virtual memory and dump pages
 459          * that are in the hash pagetable.
 460          */
 461         unsigned long psize = 1 << mmu_psize_defs[mmu_linear_psize].shift;
 462 
 463         for (addr = PAGE_OFFSET; addr < PAGE_OFFSET +
 464                         memblock_end_of_DRAM(); addr += psize)
 465                 hpte_find(st, addr, mmu_linear_psize);
 466 }
 467 
 468 static void walk_vmemmap(struct pg_state *st)
 469 {
 470         struct vmemmap_backing *ptr = vmemmap_list;
 471 
 472         if (!IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP))
 473                 return;
 474         /*
 475          * Traverse the vmemmaped memory and dump pages that are in the hash
 476          * pagetable.
 477          */
 478         while (ptr->list) {
 479                 hpte_find(st, ptr->virt_addr, mmu_vmemmap_psize);
 480                 ptr = ptr->list;
 481         }
 482         seq_puts(st->seq, "---[ vmemmap end ]---\n");
 483 }
 484 
 485 static void populate_markers(void)
 486 {
 487         address_markers[0].start_address = PAGE_OFFSET;
 488         address_markers[1].start_address = VMALLOC_START;
 489         address_markers[2].start_address = VMALLOC_END;
 490         address_markers[3].start_address = ISA_IO_BASE;
 491         address_markers[4].start_address = ISA_IO_END;
 492         address_markers[5].start_address = PHB_IO_BASE;
 493         address_markers[6].start_address = PHB_IO_END;
 494         address_markers[7].start_address = IOREMAP_BASE;
 495         address_markers[8].start_address = IOREMAP_END;
 496         address_markers[9].start_address =  H_VMEMMAP_START;
 497 }
 498 
 499 static int ptdump_show(struct seq_file *m, void *v)
 500 {
 501         struct pg_state st = {
 502                 .seq = m,
 503                 .start_address = PAGE_OFFSET,
 504                 .marker = address_markers,
 505         };
 506         /*
 507          * Traverse the 0xc, 0xd and 0xf areas of the kernel virtual memory and
 508          * dump pages that are in the hash pagetable.
 509          */
 510         walk_linearmapping(&st);
 511         walk_pagetables(&st);
 512         walk_vmemmap(&st);
 513         return 0;
 514 }
 515 
 516 static int ptdump_open(struct inode *inode, struct file *file)
 517 {
 518         return single_open(file, ptdump_show, NULL);
 519 }
 520 
 521 static const struct file_operations ptdump_fops = {
 522         .open           = ptdump_open,
 523         .read           = seq_read,
 524         .llseek         = seq_lseek,
 525         .release        = single_release,
 526 };
 527 
 528 static int ptdump_init(void)
 529 {
 530         struct dentry *debugfs_file;
 531 
 532         if (!radix_enabled()) {
 533                 populate_markers();
 534                 debugfs_file = debugfs_create_file("kernel_hash_pagetable",
 535                                 0400, NULL, NULL, &ptdump_fops);
 536                 return debugfs_file ? 0 : -ENOMEM;
 537         }
 538         return 0;
 539 }
 540 device_initcall(ptdump_init);

/* [<][>][^][v][top][bottom][index][help] */