root/arch/arm64/kernel/hibernate.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. arch_hdr_invariants
  2. pfn_is_nosave
  3. save_processor_state
  4. restore_processor_state
  5. arch_hibernation_header_save
  6. arch_hibernation_header_restore
  7. create_safe_exec_page
  8. swsusp_arch_suspend
  9. _copy_pte
  10. copy_pte
  11. copy_pmd
  12. copy_pud
  13. copy_page_tables
  14. swsusp_arch_resume
  15. hibernate_resume_nonboot_cpu_disable

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*:
   3  * Hibernate support specific for ARM64
   4  *
   5  * Derived from work on ARM hibernation support by:
   6  *
   7  * Ubuntu project, hibernation support for mach-dove
   8  * Copyright (C) 2010 Nokia Corporation (Hiroshi Doyu)
   9  * Copyright (C) 2010 Texas Instruments, Inc. (Teerth Reddy et al.)
  10  *  https://lkml.org/lkml/2010/6/18/4
  11  *  https://lists.linux-foundation.org/pipermail/linux-pm/2010-June/027422.html
  12  *  https://patchwork.kernel.org/patch/96442/
  13  *
  14  * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
  15  */
  16 #define pr_fmt(x) "hibernate: " x
  17 #include <linux/cpu.h>
  18 #include <linux/kvm_host.h>
  19 #include <linux/mm.h>
  20 #include <linux/pm.h>
  21 #include <linux/sched.h>
  22 #include <linux/suspend.h>
  23 #include <linux/utsname.h>
  24 #include <linux/version.h>
  25 
  26 #include <asm/barrier.h>
  27 #include <asm/cacheflush.h>
  28 #include <asm/cputype.h>
  29 #include <asm/daifflags.h>
  30 #include <asm/irqflags.h>
  31 #include <asm/kexec.h>
  32 #include <asm/memory.h>
  33 #include <asm/mmu_context.h>
  34 #include <asm/pgalloc.h>
  35 #include <asm/pgtable.h>
  36 #include <asm/pgtable-hwdef.h>
  37 #include <asm/sections.h>
  38 #include <asm/smp.h>
  39 #include <asm/smp_plat.h>
  40 #include <asm/suspend.h>
  41 #include <asm/sysreg.h>
  42 #include <asm/virt.h>
  43 
  44 /*
  45  * Hibernate core relies on this value being 0 on resume, and marks it
  46  * __nosavedata assuming it will keep the resume kernel's '0' value. This
  47  * doesn't happen with either KASLR.
  48  *
  49  * defined as "__visible int in_suspend __nosavedata" in
  50  * kernel/power/hibernate.c
  51  */
  52 extern int in_suspend;
  53 
  54 /* Do we need to reset el2? */
  55 #define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode())
  56 
  57 /* temporary el2 vectors in the __hibernate_exit_text section. */
  58 extern char hibernate_el2_vectors[];
  59 
  60 /* hyp-stub vectors, used to restore el2 during resume from hibernate. */
  61 extern char __hyp_stub_vectors[];
  62 
  63 /*
  64  * The logical cpu number we should resume on, initialised to a non-cpu
  65  * number.
  66  */
  67 static int sleep_cpu = -EINVAL;
  68 
  69 /*
  70  * Values that may not change over hibernate/resume. We put the build number
  71  * and date in here so that we guarantee not to resume with a different
  72  * kernel.
  73  */
  74 struct arch_hibernate_hdr_invariants {
  75         char            uts_version[__NEW_UTS_LEN + 1];
  76 };
  77 
  78 /* These values need to be know across a hibernate/restore. */
  79 static struct arch_hibernate_hdr {
  80         struct arch_hibernate_hdr_invariants invariants;
  81 
  82         /* These are needed to find the relocated kernel if built with kaslr */
  83         phys_addr_t     ttbr1_el1;
  84         void            (*reenter_kernel)(void);
  85 
  86         /*
  87          * We need to know where the __hyp_stub_vectors are after restore to
  88          * re-configure el2.
  89          */
  90         phys_addr_t     __hyp_stub_vectors;
  91 
  92         u64             sleep_cpu_mpidr;
  93 } resume_hdr;
  94 
  95 static inline void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i)
  96 {
  97         memset(i, 0, sizeof(*i));
  98         memcpy(i->uts_version, init_utsname()->version, sizeof(i->uts_version));
  99 }
 100 
 101 int pfn_is_nosave(unsigned long pfn)
 102 {
 103         unsigned long nosave_begin_pfn = sym_to_pfn(&__nosave_begin);
 104         unsigned long nosave_end_pfn = sym_to_pfn(&__nosave_end - 1);
 105 
 106         return ((pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn)) ||
 107                 crash_is_nosave(pfn);
 108 }
 109 
 110 void notrace save_processor_state(void)
 111 {
 112         WARN_ON(num_online_cpus() != 1);
 113 }
 114 
 115 void notrace restore_processor_state(void)
 116 {
 117 }
 118 
 119 int arch_hibernation_header_save(void *addr, unsigned int max_size)
 120 {
 121         struct arch_hibernate_hdr *hdr = addr;
 122 
 123         if (max_size < sizeof(*hdr))
 124                 return -EOVERFLOW;
 125 
 126         arch_hdr_invariants(&hdr->invariants);
 127         hdr->ttbr1_el1          = __pa_symbol(swapper_pg_dir);
 128         hdr->reenter_kernel     = _cpu_resume;
 129 
 130         /* We can't use __hyp_get_vectors() because kvm may still be loaded */
 131         if (el2_reset_needed())
 132                 hdr->__hyp_stub_vectors = __pa_symbol(__hyp_stub_vectors);
 133         else
 134                 hdr->__hyp_stub_vectors = 0;
 135 
 136         /* Save the mpidr of the cpu we called cpu_suspend() on... */
 137         if (sleep_cpu < 0) {
 138                 pr_err("Failing to hibernate on an unknown CPU.\n");
 139                 return -ENODEV;
 140         }
 141         hdr->sleep_cpu_mpidr = cpu_logical_map(sleep_cpu);
 142         pr_info("Hibernating on CPU %d [mpidr:0x%llx]\n", sleep_cpu,
 143                 hdr->sleep_cpu_mpidr);
 144 
 145         return 0;
 146 }
 147 EXPORT_SYMBOL(arch_hibernation_header_save);
 148 
 149 int arch_hibernation_header_restore(void *addr)
 150 {
 151         int ret;
 152         struct arch_hibernate_hdr_invariants invariants;
 153         struct arch_hibernate_hdr *hdr = addr;
 154 
 155         arch_hdr_invariants(&invariants);
 156         if (memcmp(&hdr->invariants, &invariants, sizeof(invariants))) {
 157                 pr_crit("Hibernate image not generated by this kernel!\n");
 158                 return -EINVAL;
 159         }
 160 
 161         sleep_cpu = get_logical_index(hdr->sleep_cpu_mpidr);
 162         pr_info("Hibernated on CPU %d [mpidr:0x%llx]\n", sleep_cpu,
 163                 hdr->sleep_cpu_mpidr);
 164         if (sleep_cpu < 0) {
 165                 pr_crit("Hibernated on a CPU not known to this kernel!\n");
 166                 sleep_cpu = -EINVAL;
 167                 return -EINVAL;
 168         }
 169         if (!cpu_online(sleep_cpu)) {
 170                 pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n");
 171                 ret = cpu_up(sleep_cpu);
 172                 if (ret) {
 173                         pr_err("Failed to bring hibernate-CPU up!\n");
 174                         sleep_cpu = -EINVAL;
 175                         return ret;
 176                 }
 177         }
 178 
 179         resume_hdr = *hdr;
 180 
 181         return 0;
 182 }
 183 EXPORT_SYMBOL(arch_hibernation_header_restore);
 184 
 185 /*
 186  * Copies length bytes, starting at src_start into an new page,
 187  * perform cache maintentance, then maps it at the specified address low
 188  * address as executable.
 189  *
 190  * This is used by hibernate to copy the code it needs to execute when
 191  * overwriting the kernel text. This function generates a new set of page
 192  * tables, which it loads into ttbr0.
 193  *
 194  * Length is provided as we probably only want 4K of data, even on a 64K
 195  * page system.
 196  */
 197 static int create_safe_exec_page(void *src_start, size_t length,
 198                                  unsigned long dst_addr,
 199                                  phys_addr_t *phys_dst_addr,
 200                                  void *(*allocator)(gfp_t mask),
 201                                  gfp_t mask)
 202 {
 203         int rc = 0;
 204         pgd_t *trans_pgd;
 205         pgd_t *pgdp;
 206         pud_t *pudp;
 207         pmd_t *pmdp;
 208         pte_t *ptep;
 209         unsigned long dst = (unsigned long)allocator(mask);
 210 
 211         if (!dst) {
 212                 rc = -ENOMEM;
 213                 goto out;
 214         }
 215 
 216         memcpy((void *)dst, src_start, length);
 217         __flush_icache_range(dst, dst + length);
 218 
 219         trans_pgd = allocator(mask);
 220         if (!trans_pgd) {
 221                 rc = -ENOMEM;
 222                 goto out;
 223         }
 224 
 225         pgdp = pgd_offset_raw(trans_pgd, dst_addr);
 226         if (pgd_none(READ_ONCE(*pgdp))) {
 227                 pudp = allocator(mask);
 228                 if (!pudp) {
 229                         rc = -ENOMEM;
 230                         goto out;
 231                 }
 232                 pgd_populate(&init_mm, pgdp, pudp);
 233         }
 234 
 235         pudp = pud_offset(pgdp, dst_addr);
 236         if (pud_none(READ_ONCE(*pudp))) {
 237                 pmdp = allocator(mask);
 238                 if (!pmdp) {
 239                         rc = -ENOMEM;
 240                         goto out;
 241                 }
 242                 pud_populate(&init_mm, pudp, pmdp);
 243         }
 244 
 245         pmdp = pmd_offset(pudp, dst_addr);
 246         if (pmd_none(READ_ONCE(*pmdp))) {
 247                 ptep = allocator(mask);
 248                 if (!ptep) {
 249                         rc = -ENOMEM;
 250                         goto out;
 251                 }
 252                 pmd_populate_kernel(&init_mm, pmdp, ptep);
 253         }
 254 
 255         ptep = pte_offset_kernel(pmdp, dst_addr);
 256         set_pte(ptep, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));
 257 
 258         /*
 259          * Load our new page tables. A strict BBM approach requires that we
 260          * ensure that TLBs are free of any entries that may overlap with the
 261          * global mappings we are about to install.
 262          *
 263          * For a real hibernate/resume cycle TTBR0 currently points to a zero
 264          * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI
 265          * runtime services), while for a userspace-driven test_resume cycle it
 266          * points to userspace page tables (and we must point it at a zero page
 267          * ourselves). Elsewhere we only (un)install the idmap with preemption
 268          * disabled, so T0SZ should be as required regardless.
 269          */
 270         cpu_set_reserved_ttbr0();
 271         local_flush_tlb_all();
 272         write_sysreg(phys_to_ttbr(virt_to_phys(pgdp)), ttbr0_el1);
 273         isb();
 274 
 275         *phys_dst_addr = virt_to_phys((void *)dst);
 276 
 277 out:
 278         return rc;
 279 }
 280 
 281 #define dcache_clean_range(start, end)  __flush_dcache_area(start, (end - start))
 282 
 283 int swsusp_arch_suspend(void)
 284 {
 285         int ret = 0;
 286         unsigned long flags;
 287         struct sleep_stack_data state;
 288 
 289         if (cpus_are_stuck_in_kernel()) {
 290                 pr_err("Can't hibernate: no mechanism to offline secondary CPUs.\n");
 291                 return -EBUSY;
 292         }
 293 
 294         flags = local_daif_save();
 295 
 296         if (__cpu_suspend_enter(&state)) {
 297                 /* make the crash dump kernel image visible/saveable */
 298                 crash_prepare_suspend();
 299 
 300                 sleep_cpu = smp_processor_id();
 301                 ret = swsusp_save();
 302         } else {
 303                 /* Clean kernel core startup/idle code to PoC*/
 304                 dcache_clean_range(__mmuoff_data_start, __mmuoff_data_end);
 305                 dcache_clean_range(__idmap_text_start, __idmap_text_end);
 306 
 307                 /* Clean kvm setup code to PoC? */
 308                 if (el2_reset_needed()) {
 309                         dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end);
 310                         dcache_clean_range(__hyp_text_start, __hyp_text_end);
 311                 }
 312 
 313                 /* make the crash dump kernel image protected again */
 314                 crash_post_resume();
 315 
 316                 /*
 317                  * Tell the hibernation core that we've just restored
 318                  * the memory
 319                  */
 320                 in_suspend = 0;
 321 
 322                 sleep_cpu = -EINVAL;
 323                 __cpu_suspend_exit();
 324 
 325                 /*
 326                  * Just in case the boot kernel did turn the SSBD
 327                  * mitigation off behind our back, let's set the state
 328                  * to what we expect it to be.
 329                  */
 330                 switch (arm64_get_ssbd_state()) {
 331                 case ARM64_SSBD_FORCE_ENABLE:
 332                 case ARM64_SSBD_KERNEL:
 333                         arm64_set_ssbd_mitigation(true);
 334                 }
 335         }
 336 
 337         local_daif_restore(flags);
 338 
 339         return ret;
 340 }
 341 
 342 static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
 343 {
 344         pte_t pte = READ_ONCE(*src_ptep);
 345 
 346         if (pte_valid(pte)) {
 347                 /*
 348                  * Resume will overwrite areas that may be marked
 349                  * read only (code, rodata). Clear the RDONLY bit from
 350                  * the temporary mappings we use during restore.
 351                  */
 352                 set_pte(dst_ptep, pte_mkwrite(pte));
 353         } else if (debug_pagealloc_enabled() && !pte_none(pte)) {
 354                 /*
 355                  * debug_pagealloc will removed the PTE_VALID bit if
 356                  * the page isn't in use by the resume kernel. It may have
 357                  * been in use by the original kernel, in which case we need
 358                  * to put it back in our copy to do the restore.
 359                  *
 360                  * Before marking this entry valid, check the pfn should
 361                  * be mapped.
 362                  */
 363                 BUG_ON(!pfn_valid(pte_pfn(pte)));
 364 
 365                 set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
 366         }
 367 }
 368 
 369 static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start,
 370                     unsigned long end)
 371 {
 372         pte_t *src_ptep;
 373         pte_t *dst_ptep;
 374         unsigned long addr = start;
 375 
 376         dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC);
 377         if (!dst_ptep)
 378                 return -ENOMEM;
 379         pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep);
 380         dst_ptep = pte_offset_kernel(dst_pmdp, start);
 381 
 382         src_ptep = pte_offset_kernel(src_pmdp, start);
 383         do {
 384                 _copy_pte(dst_ptep, src_ptep, addr);
 385         } while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);
 386 
 387         return 0;
 388 }
 389 
 390 static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start,
 391                     unsigned long end)
 392 {
 393         pmd_t *src_pmdp;
 394         pmd_t *dst_pmdp;
 395         unsigned long next;
 396         unsigned long addr = start;
 397 
 398         if (pud_none(READ_ONCE(*dst_pudp))) {
 399                 dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC);
 400                 if (!dst_pmdp)
 401                         return -ENOMEM;
 402                 pud_populate(&init_mm, dst_pudp, dst_pmdp);
 403         }
 404         dst_pmdp = pmd_offset(dst_pudp, start);
 405 
 406         src_pmdp = pmd_offset(src_pudp, start);
 407         do {
 408                 pmd_t pmd = READ_ONCE(*src_pmdp);
 409 
 410                 next = pmd_addr_end(addr, end);
 411                 if (pmd_none(pmd))
 412                         continue;
 413                 if (pmd_table(pmd)) {
 414                         if (copy_pte(dst_pmdp, src_pmdp, addr, next))
 415                                 return -ENOMEM;
 416                 } else {
 417                         set_pmd(dst_pmdp,
 418                                 __pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
 419                 }
 420         } while (dst_pmdp++, src_pmdp++, addr = next, addr != end);
 421 
 422         return 0;
 423 }
 424 
 425 static int copy_pud(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
 426                     unsigned long end)
 427 {
 428         pud_t *dst_pudp;
 429         pud_t *src_pudp;
 430         unsigned long next;
 431         unsigned long addr = start;
 432 
 433         if (pgd_none(READ_ONCE(*dst_pgdp))) {
 434                 dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC);
 435                 if (!dst_pudp)
 436                         return -ENOMEM;
 437                 pgd_populate(&init_mm, dst_pgdp, dst_pudp);
 438         }
 439         dst_pudp = pud_offset(dst_pgdp, start);
 440 
 441         src_pudp = pud_offset(src_pgdp, start);
 442         do {
 443                 pud_t pud = READ_ONCE(*src_pudp);
 444 
 445                 next = pud_addr_end(addr, end);
 446                 if (pud_none(pud))
 447                         continue;
 448                 if (pud_table(pud)) {
 449                         if (copy_pmd(dst_pudp, src_pudp, addr, next))
 450                                 return -ENOMEM;
 451                 } else {
 452                         set_pud(dst_pudp,
 453                                 __pud(pud_val(pud) & ~PMD_SECT_RDONLY));
 454                 }
 455         } while (dst_pudp++, src_pudp++, addr = next, addr != end);
 456 
 457         return 0;
 458 }
 459 
 460 static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
 461                             unsigned long end)
 462 {
 463         unsigned long next;
 464         unsigned long addr = start;
 465         pgd_t *src_pgdp = pgd_offset_k(start);
 466 
 467         dst_pgdp = pgd_offset_raw(dst_pgdp, start);
 468         do {
 469                 next = pgd_addr_end(addr, end);
 470                 if (pgd_none(READ_ONCE(*src_pgdp)))
 471                         continue;
 472                 if (copy_pud(dst_pgdp, src_pgdp, addr, next))
 473                         return -ENOMEM;
 474         } while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
 475 
 476         return 0;
 477 }
 478 
 479 /*
 480  * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit().
 481  *
 482  * Memory allocated by get_safe_page() will be dealt with by the hibernate code,
 483  * we don't need to free it here.
 484  */
 485 int swsusp_arch_resume(void)
 486 {
 487         int rc = 0;
 488         void *zero_page;
 489         size_t exit_size;
 490         pgd_t *tmp_pg_dir;
 491         phys_addr_t phys_hibernate_exit;
 492         void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *,
 493                                           void *, phys_addr_t, phys_addr_t);
 494 
 495         /*
 496          * Restoring the memory image will overwrite the ttbr1 page tables.
 497          * Create a second copy of just the linear map, and use this when
 498          * restoring.
 499          */
 500         tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
 501         if (!tmp_pg_dir) {
 502                 pr_err("Failed to allocate memory for temporary page tables.\n");
 503                 rc = -ENOMEM;
 504                 goto out;
 505         }
 506         rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, PAGE_END);
 507         if (rc)
 508                 goto out;
 509 
 510         /*
 511          * We need a zero page that is zero before & after resume in order to
 512          * to break before make on the ttbr1 page tables.
 513          */
 514         zero_page = (void *)get_safe_page(GFP_ATOMIC);
 515         if (!zero_page) {
 516                 pr_err("Failed to allocate zero page.\n");
 517                 rc = -ENOMEM;
 518                 goto out;
 519         }
 520 
 521         /*
 522          * Locate the exit code in the bottom-but-one page, so that *NULL
 523          * still has disastrous affects.
 524          */
 525         hibernate_exit = (void *)PAGE_SIZE;
 526         exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start;
 527         /*
 528          * Copy swsusp_arch_suspend_exit() to a safe page. This will generate
 529          * a new set of ttbr0 page tables and load them.
 530          */
 531         rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size,
 532                                    (unsigned long)hibernate_exit,
 533                                    &phys_hibernate_exit,
 534                                    (void *)get_safe_page, GFP_ATOMIC);
 535         if (rc) {
 536                 pr_err("Failed to create safe executable page for hibernate_exit code.\n");
 537                 goto out;
 538         }
 539 
 540         /*
 541          * The hibernate exit text contains a set of el2 vectors, that will
 542          * be executed at el2 with the mmu off in order to reload hyp-stub.
 543          */
 544         __flush_dcache_area(hibernate_exit, exit_size);
 545 
 546         /*
 547          * KASLR will cause the el2 vectors to be in a different location in
 548          * the resumed kernel. Load hibernate's temporary copy into el2.
 549          *
 550          * We can skip this step if we booted at EL1, or are running with VHE.
 551          */
 552         if (el2_reset_needed()) {
 553                 phys_addr_t el2_vectors = phys_hibernate_exit;  /* base */
 554                 el2_vectors += hibernate_el2_vectors -
 555                                __hibernate_exit_text_start;     /* offset */
 556 
 557                 __hyp_set_vectors(el2_vectors);
 558         }
 559 
 560         hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1,
 561                        resume_hdr.reenter_kernel, restore_pblist,
 562                        resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page));
 563 
 564 out:
 565         return rc;
 566 }
 567 
 568 int hibernate_resume_nonboot_cpu_disable(void)
 569 {
 570         if (sleep_cpu < 0) {
 571                 pr_err("Failing to resume from hibernate on an unknown CPU.\n");
 572                 return -ENODEV;
 573         }
 574 
 575         return freeze_secondary_cpus(sleep_cpu);
 576 }

/* [<][>][^][v][top][bottom][index][help] */