root/arch/x86/kernel/crash.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. cpu_crash_vmclear_loaded_vmcss
  2. kdump_nmi_callback
  3. kdump_nmi_shootdown_cpus
  4. crash_smp_send_stop
  5. crash_smp_send_stop
  6. native_machine_crash_shutdown
  7. get_nr_ram_ranges_callback
  8. fill_up_crash_elf_data
  9. elf_header_exclude_ranges
  10. prepare_elf64_ram_headers_callback
  11. prepare_elf_headers
  12. add_e820_entry
  13. memmap_entry_callback
  14. memmap_exclude_ranges
  15. crash_setup_memmap_entries
  16. determine_backup_region
  17. crash_load_segments

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Architecture specific (i386/x86_64) functions for kexec based crash dumps.
   4  *
   5  * Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
   6  *
   7  * Copyright (C) IBM Corporation, 2004. All rights reserved.
   8  * Copyright (C) Red Hat Inc., 2014. All rights reserved.
   9  * Authors:
  10  *      Vivek Goyal <vgoyal@redhat.com>
  11  *
  12  */
  13 
  14 #define pr_fmt(fmt)     "kexec: " fmt
  15 
  16 #include <linux/types.h>
  17 #include <linux/kernel.h>
  18 #include <linux/smp.h>
  19 #include <linux/reboot.h>
  20 #include <linux/kexec.h>
  21 #include <linux/delay.h>
  22 #include <linux/elf.h>
  23 #include <linux/elfcore.h>
  24 #include <linux/export.h>
  25 #include <linux/slab.h>
  26 #include <linux/vmalloc.h>
  27 
  28 #include <asm/processor.h>
  29 #include <asm/hardirq.h>
  30 #include <asm/nmi.h>
  31 #include <asm/hw_irq.h>
  32 #include <asm/apic.h>
  33 #include <asm/e820/types.h>
  34 #include <asm/io_apic.h>
  35 #include <asm/hpet.h>
  36 #include <linux/kdebug.h>
  37 #include <asm/cpu.h>
  38 #include <asm/reboot.h>
  39 #include <asm/virtext.h>
  40 #include <asm/intel_pt.h>
  41 #include <asm/crash.h>
  42 
  43 /* Used while preparing memory map entries for second kernel */
  44 struct crash_memmap_data {
  45         struct boot_params *params;
  46         /* Type of memory */
  47         unsigned int type;
  48 };
  49 
  50 /*
  51  * This is used to VMCLEAR all VMCSs loaded on the
  52  * processor. And when loading kvm_intel module, the
  53  * callback function pointer will be assigned.
  54  *
  55  * protected by rcu.
  56  */
  57 crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
  58 EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
  59 
  60 static inline void cpu_crash_vmclear_loaded_vmcss(void)
  61 {
  62         crash_vmclear_fn *do_vmclear_operation = NULL;
  63 
  64         rcu_read_lock();
  65         do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss);
  66         if (do_vmclear_operation)
  67                 do_vmclear_operation();
  68         rcu_read_unlock();
  69 }
  70 
  71 #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
  72 
  73 static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
  74 {
  75         crash_save_cpu(regs, cpu);
  76 
  77         /*
  78          * VMCLEAR VMCSs loaded on all cpus if needed.
  79          */
  80         cpu_crash_vmclear_loaded_vmcss();
  81 
  82         /* Disable VMX or SVM if needed.
  83          *
  84          * We need to disable virtualization on all CPUs.
  85          * Having VMX or SVM enabled on any CPU may break rebooting
  86          * after the kdump kernel has finished its task.
  87          */
  88         cpu_emergency_vmxoff();
  89         cpu_emergency_svm_disable();
  90 
  91         /*
  92          * Disable Intel PT to stop its logging
  93          */
  94         cpu_emergency_stop_pt();
  95 
  96         disable_local_APIC();
  97 }
  98 
  99 void kdump_nmi_shootdown_cpus(void)
 100 {
 101         nmi_shootdown_cpus(kdump_nmi_callback);
 102 
 103         disable_local_APIC();
 104 }
 105 
 106 /* Override the weak function in kernel/panic.c */
 107 void crash_smp_send_stop(void)
 108 {
 109         static int cpus_stopped;
 110 
 111         if (cpus_stopped)
 112                 return;
 113 
 114         if (smp_ops.crash_stop_other_cpus)
 115                 smp_ops.crash_stop_other_cpus();
 116         else
 117                 smp_send_stop();
 118 
 119         cpus_stopped = 1;
 120 }
 121 
 122 #else
 123 void crash_smp_send_stop(void)
 124 {
 125         /* There are no cpus to shootdown */
 126 }
 127 #endif
 128 
 129 void native_machine_crash_shutdown(struct pt_regs *regs)
 130 {
 131         /* This function is only called after the system
 132          * has panicked or is otherwise in a critical state.
 133          * The minimum amount of code to allow a kexec'd kernel
 134          * to run successfully needs to happen here.
 135          *
 136          * In practice this means shooting down the other cpus in
 137          * an SMP system.
 138          */
 139         /* The kernel is broken so disable interrupts */
 140         local_irq_disable();
 141 
 142         crash_smp_send_stop();
 143 
 144         /*
 145          * VMCLEAR VMCSs loaded on this cpu if needed.
 146          */
 147         cpu_crash_vmclear_loaded_vmcss();
 148 
 149         /* Booting kdump kernel with VMX or SVM enabled won't work,
 150          * because (among other limitations) we can't disable paging
 151          * with the virt flags.
 152          */
 153         cpu_emergency_vmxoff();
 154         cpu_emergency_svm_disable();
 155 
 156         /*
 157          * Disable Intel PT to stop its logging
 158          */
 159         cpu_emergency_stop_pt();
 160 
 161 #ifdef CONFIG_X86_IO_APIC
 162         /* Prevent crash_kexec() from deadlocking on ioapic_lock. */
 163         ioapic_zap_locks();
 164         clear_IO_APIC();
 165 #endif
 166         lapic_shutdown();
 167         restore_boot_irq_mode();
 168 #ifdef CONFIG_HPET_TIMER
 169         hpet_disable();
 170 #endif
 171         crash_save_cpu(regs, safe_smp_processor_id());
 172 }
 173 
 174 #ifdef CONFIG_KEXEC_FILE
 175 
 176 static unsigned long crash_zero_bytes;
 177 
 178 static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
 179 {
 180         unsigned int *nr_ranges = arg;
 181 
 182         (*nr_ranges)++;
 183         return 0;
 184 }
 185 
 186 /* Gather all the required information to prepare elf headers for ram regions */
 187 static struct crash_mem *fill_up_crash_elf_data(void)
 188 {
 189         unsigned int nr_ranges = 0;
 190         struct crash_mem *cmem;
 191 
 192         walk_system_ram_res(0, -1, &nr_ranges,
 193                                 get_nr_ram_ranges_callback);
 194         if (!nr_ranges)
 195                 return NULL;
 196 
 197         /*
 198          * Exclusion of crash region and/or crashk_low_res may cause
 199          * another range split. So add extra two slots here.
 200          */
 201         nr_ranges += 2;
 202         cmem = vzalloc(struct_size(cmem, ranges, nr_ranges));
 203         if (!cmem)
 204                 return NULL;
 205 
 206         cmem->max_nr_ranges = nr_ranges;
 207         cmem->nr_ranges = 0;
 208 
 209         return cmem;
 210 }
 211 
 212 /*
 213  * Look for any unwanted ranges between mstart, mend and remove them. This
 214  * might lead to split and split ranges are put in cmem->ranges[] array
 215  */
 216 static int elf_header_exclude_ranges(struct crash_mem *cmem)
 217 {
 218         int ret = 0;
 219 
 220         /* Exclude crashkernel region */
 221         ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
 222         if (ret)
 223                 return ret;
 224 
 225         if (crashk_low_res.end) {
 226                 ret = crash_exclude_mem_range(cmem, crashk_low_res.start,
 227                                                         crashk_low_res.end);
 228         }
 229 
 230         return ret;
 231 }
 232 
 233 static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
 234 {
 235         struct crash_mem *cmem = arg;
 236 
 237         cmem->ranges[cmem->nr_ranges].start = res->start;
 238         cmem->ranges[cmem->nr_ranges].end = res->end;
 239         cmem->nr_ranges++;
 240 
 241         return 0;
 242 }
 243 
 244 /* Prepare elf headers. Return addr and size */
 245 static int prepare_elf_headers(struct kimage *image, void **addr,
 246                                         unsigned long *sz)
 247 {
 248         struct crash_mem *cmem;
 249         Elf64_Ehdr *ehdr;
 250         Elf64_Phdr *phdr;
 251         int ret, i;
 252 
 253         cmem = fill_up_crash_elf_data();
 254         if (!cmem)
 255                 return -ENOMEM;
 256 
 257         ret = walk_system_ram_res(0, -1, cmem,
 258                                 prepare_elf64_ram_headers_callback);
 259         if (ret)
 260                 goto out;
 261 
 262         /* Exclude unwanted mem ranges */
 263         ret = elf_header_exclude_ranges(cmem);
 264         if (ret)
 265                 goto out;
 266 
 267         /* By default prepare 64bit headers */
 268         ret =  crash_prepare_elf64_headers(cmem,
 269                                 IS_ENABLED(CONFIG_X86_64), addr, sz);
 270         if (ret)
 271                 goto out;
 272 
 273         /*
 274          * If a range matches backup region, adjust offset to backup
 275          * segment.
 276          */
 277         ehdr = (Elf64_Ehdr *)*addr;
 278         phdr = (Elf64_Phdr *)(ehdr + 1);
 279         for (i = 0; i < ehdr->e_phnum; phdr++, i++)
 280                 if (phdr->p_type == PT_LOAD &&
 281                                 phdr->p_paddr == image->arch.backup_src_start &&
 282                                 phdr->p_memsz == image->arch.backup_src_sz) {
 283                         phdr->p_offset = image->arch.backup_load_addr;
 284                         break;
 285                 }
 286 out:
 287         vfree(cmem);
 288         return ret;
 289 }
 290 
 291 static int add_e820_entry(struct boot_params *params, struct e820_entry *entry)
 292 {
 293         unsigned int nr_e820_entries;
 294 
 295         nr_e820_entries = params->e820_entries;
 296         if (nr_e820_entries >= E820_MAX_ENTRIES_ZEROPAGE)
 297                 return 1;
 298 
 299         memcpy(&params->e820_table[nr_e820_entries], entry,
 300                         sizeof(struct e820_entry));
 301         params->e820_entries++;
 302         return 0;
 303 }
 304 
 305 static int memmap_entry_callback(struct resource *res, void *arg)
 306 {
 307         struct crash_memmap_data *cmd = arg;
 308         struct boot_params *params = cmd->params;
 309         struct e820_entry ei;
 310 
 311         ei.addr = res->start;
 312         ei.size = resource_size(res);
 313         ei.type = cmd->type;
 314         add_e820_entry(params, &ei);
 315 
 316         return 0;
 317 }
 318 
 319 static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem,
 320                                  unsigned long long mstart,
 321                                  unsigned long long mend)
 322 {
 323         unsigned long start, end;
 324         int ret = 0;
 325 
 326         cmem->ranges[0].start = mstart;
 327         cmem->ranges[0].end = mend;
 328         cmem->nr_ranges = 1;
 329 
 330         /* Exclude Backup region */
 331         start = image->arch.backup_load_addr;
 332         end = start + image->arch.backup_src_sz - 1;
 333         ret = crash_exclude_mem_range(cmem, start, end);
 334         if (ret)
 335                 return ret;
 336 
 337         /* Exclude elf header region */
 338         start = image->arch.elf_load_addr;
 339         end = start + image->arch.elf_headers_sz - 1;
 340         return crash_exclude_mem_range(cmem, start, end);
 341 }
 342 
 343 /* Prepare memory map for crash dump kernel */
 344 int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
 345 {
 346         int i, ret = 0;
 347         unsigned long flags;
 348         struct e820_entry ei;
 349         struct crash_memmap_data cmd;
 350         struct crash_mem *cmem;
 351 
 352         cmem = vzalloc(sizeof(struct crash_mem));
 353         if (!cmem)
 354                 return -ENOMEM;
 355 
 356         memset(&cmd, 0, sizeof(struct crash_memmap_data));
 357         cmd.params = params;
 358 
 359         /* Add first 640K segment */
 360         ei.addr = image->arch.backup_src_start;
 361         ei.size = image->arch.backup_src_sz;
 362         ei.type = E820_TYPE_RAM;
 363         add_e820_entry(params, &ei);
 364 
 365         /* Add ACPI tables */
 366         cmd.type = E820_TYPE_ACPI;
 367         flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 368         walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1, &cmd,
 369                        memmap_entry_callback);
 370 
 371         /* Add ACPI Non-volatile Storage */
 372         cmd.type = E820_TYPE_NVS;
 373         walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &cmd,
 374                         memmap_entry_callback);
 375 
 376         /* Add e820 reserved ranges */
 377         cmd.type = E820_TYPE_RESERVED;
 378         flags = IORESOURCE_MEM;
 379         walk_iomem_res_desc(IORES_DESC_RESERVED, flags, 0, -1, &cmd,
 380                            memmap_entry_callback);
 381 
 382         /* Add crashk_low_res region */
 383         if (crashk_low_res.end) {
 384                 ei.addr = crashk_low_res.start;
 385                 ei.size = crashk_low_res.end - crashk_low_res.start + 1;
 386                 ei.type = E820_TYPE_RAM;
 387                 add_e820_entry(params, &ei);
 388         }
 389 
 390         /* Exclude some ranges from crashk_res and add rest to memmap */
 391         ret = memmap_exclude_ranges(image, cmem, crashk_res.start,
 392                                                 crashk_res.end);
 393         if (ret)
 394                 goto out;
 395 
 396         for (i = 0; i < cmem->nr_ranges; i++) {
 397                 ei.size = cmem->ranges[i].end - cmem->ranges[i].start + 1;
 398 
 399                 /* If entry is less than a page, skip it */
 400                 if (ei.size < PAGE_SIZE)
 401                         continue;
 402                 ei.addr = cmem->ranges[i].start;
 403                 ei.type = E820_TYPE_RAM;
 404                 add_e820_entry(params, &ei);
 405         }
 406 
 407 out:
 408         vfree(cmem);
 409         return ret;
 410 }
 411 
 412 static int determine_backup_region(struct resource *res, void *arg)
 413 {
 414         struct kimage *image = arg;
 415 
 416         image->arch.backup_src_start = res->start;
 417         image->arch.backup_src_sz = resource_size(res);
 418 
 419         /* Expecting only one range for backup region */
 420         return 1;
 421 }
 422 
 423 int crash_load_segments(struct kimage *image)
 424 {
 425         int ret;
 426         struct kexec_buf kbuf = { .image = image, .buf_min = 0,
 427                                   .buf_max = ULONG_MAX, .top_down = false };
 428 
 429         /*
 430          * Determine and load a segment for backup area. First 640K RAM
 431          * region is backup source
 432          */
 433 
 434         ret = walk_system_ram_res(KEXEC_BACKUP_SRC_START, KEXEC_BACKUP_SRC_END,
 435                                 image, determine_backup_region);
 436 
 437         /* Zero or postive return values are ok */
 438         if (ret < 0)
 439                 return ret;
 440 
 441         /* Add backup segment. */
 442         if (image->arch.backup_src_sz) {
 443                 kbuf.buffer = &crash_zero_bytes;
 444                 kbuf.bufsz = sizeof(crash_zero_bytes);
 445                 kbuf.memsz = image->arch.backup_src_sz;
 446                 kbuf.buf_align = PAGE_SIZE;
 447                 /*
 448                  * Ideally there is no source for backup segment. This is
 449                  * copied in purgatory after crash. Just add a zero filled
 450                  * segment for now to make sure checksum logic works fine.
 451                  */
 452                 ret = kexec_add_buffer(&kbuf);
 453                 if (ret)
 454                         return ret;
 455                 image->arch.backup_load_addr = kbuf.mem;
 456                 pr_debug("Loaded backup region at 0x%lx backup_start=0x%lx memsz=0x%lx\n",
 457                          image->arch.backup_load_addr,
 458                          image->arch.backup_src_start, kbuf.memsz);
 459         }
 460 
 461         /* Prepare elf headers and add a segment */
 462         ret = prepare_elf_headers(image, &kbuf.buffer, &kbuf.bufsz);
 463         if (ret)
 464                 return ret;
 465 
 466         image->arch.elf_headers = kbuf.buffer;
 467         image->arch.elf_headers_sz = kbuf.bufsz;
 468 
 469         kbuf.memsz = kbuf.bufsz;
 470         kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
 471         kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
 472         ret = kexec_add_buffer(&kbuf);
 473         if (ret) {
 474                 vfree((void *)image->arch.elf_headers);
 475                 return ret;
 476         }
 477         image->arch.elf_load_addr = kbuf.mem;
 478         pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
 479                  image->arch.elf_load_addr, kbuf.bufsz, kbuf.bufsz);
 480 
 481         return ret;
 482 }
 483 #endif /* CONFIG_KEXEC_FILE */

/* [<][>][^][v][top][bottom][index][help] */