root/arch/s390/kernel/nmi.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. nmi_needs_mcesa
  2. nmi_get_mcesa_size
  3. nmi_alloc_boot_cpu
  4. nmi_init
  5. nmi_alloc_per_cpu
  6. nmi_free_per_cpu
  7. s390_handle_damage
  8. s390_handle_mcck
  9. s390_check_registers
  10. s390_backup_mcck_info
  11. s390_do_machine_check
  12. machine_check_init

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  *   Machine check handler
   4  *
   5  *    Copyright IBM Corp. 2000, 2009
   6  *    Author(s): Ingo Adlung <adlung@de.ibm.com>,
   7  *               Martin Schwidefsky <schwidefsky@de.ibm.com>,
   8  *               Cornelia Huck <cornelia.huck@de.ibm.com>,
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>,
  10  */
  11 
  12 #include <linux/kernel_stat.h>
  13 #include <linux/init.h>
  14 #include <linux/errno.h>
  15 #include <linux/hardirq.h>
  16 #include <linux/log2.h>
  17 #include <linux/kprobes.h>
  18 #include <linux/kmemleak.h>
  19 #include <linux/time.h>
  20 #include <linux/module.h>
  21 #include <linux/sched/signal.h>
  22 
  23 #include <linux/export.h>
  24 #include <asm/lowcore.h>
  25 #include <asm/smp.h>
  26 #include <asm/stp.h>
  27 #include <asm/cputime.h>
  28 #include <asm/nmi.h>
  29 #include <asm/crw.h>
  30 #include <asm/switch_to.h>
  31 #include <asm/ctl_reg.h>
  32 #include <asm/asm-offsets.h>
  33 #include <linux/kvm_host.h>
  34 
  35 struct mcck_struct {
  36         unsigned int kill_task : 1;
  37         unsigned int channel_report : 1;
  38         unsigned int warning : 1;
  39         unsigned int stp_queue : 1;
  40         unsigned long mcck_code;
  41 };
  42 
  43 static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
  44 static struct kmem_cache *mcesa_cache;
  45 static unsigned long mcesa_origin_lc;
  46 
  47 static inline int nmi_needs_mcesa(void)
  48 {
  49         return MACHINE_HAS_VX || MACHINE_HAS_GS;
  50 }
  51 
  52 static inline unsigned long nmi_get_mcesa_size(void)
  53 {
  54         if (MACHINE_HAS_GS)
  55                 return MCESA_MAX_SIZE;
  56         return MCESA_MIN_SIZE;
  57 }
  58 
  59 /*
  60  * The initial machine check extended save area for the boot CPU.
  61  * It will be replaced by nmi_init() with an allocated structure.
  62  * The structure is required for machine check happening early in
  63  * the boot process.
  64  */
  65 static struct mcesa boot_mcesa __initdata __aligned(MCESA_MAX_SIZE);
  66 
  67 void __init nmi_alloc_boot_cpu(struct lowcore *lc)
  68 {
  69         if (!nmi_needs_mcesa())
  70                 return;
  71         lc->mcesad = (unsigned long) &boot_mcesa;
  72         if (MACHINE_HAS_GS)
  73                 lc->mcesad |= ilog2(MCESA_MAX_SIZE);
  74 }
  75 
  76 static int __init nmi_init(void)
  77 {
  78         unsigned long origin, cr0, size;
  79 
  80         if (!nmi_needs_mcesa())
  81                 return 0;
  82         size = nmi_get_mcesa_size();
  83         if (size > MCESA_MIN_SIZE)
  84                 mcesa_origin_lc = ilog2(size);
  85         /* create slab cache for the machine-check-extended-save-areas */
  86         mcesa_cache = kmem_cache_create("nmi_save_areas", size, size, 0, NULL);
  87         if (!mcesa_cache)
  88                 panic("Couldn't create nmi save area cache");
  89         origin = (unsigned long) kmem_cache_alloc(mcesa_cache, GFP_KERNEL);
  90         if (!origin)
  91                 panic("Couldn't allocate nmi save area");
  92         /* The pointer is stored with mcesa_bits ORed in */
  93         kmemleak_not_leak((void *) origin);
  94         __ctl_store(cr0, 0, 0);
  95         __ctl_clear_bit(0, 28); /* disable lowcore protection */
  96         /* Replace boot_mcesa on the boot CPU */
  97         S390_lowcore.mcesad = origin | mcesa_origin_lc;
  98         __ctl_load(cr0, 0, 0);
  99         return 0;
 100 }
 101 early_initcall(nmi_init);
 102 
 103 int nmi_alloc_per_cpu(struct lowcore *lc)
 104 {
 105         unsigned long origin;
 106 
 107         if (!nmi_needs_mcesa())
 108                 return 0;
 109         origin = (unsigned long) kmem_cache_alloc(mcesa_cache, GFP_KERNEL);
 110         if (!origin)
 111                 return -ENOMEM;
 112         /* The pointer is stored with mcesa_bits ORed in */
 113         kmemleak_not_leak((void *) origin);
 114         lc->mcesad = origin | mcesa_origin_lc;
 115         return 0;
 116 }
 117 
 118 void nmi_free_per_cpu(struct lowcore *lc)
 119 {
 120         if (!nmi_needs_mcesa())
 121                 return;
 122         kmem_cache_free(mcesa_cache, (void *)(lc->mcesad & MCESA_ORIGIN_MASK));
 123 }
 124 
 125 static notrace void s390_handle_damage(void)
 126 {
 127         smp_emergency_stop();
 128         disabled_wait();
 129         while (1);
 130 }
 131 NOKPROBE_SYMBOL(s390_handle_damage);
 132 
 133 /*
 134  * Main machine check handler function. Will be called with interrupts enabled
 135  * or disabled and machine checks enabled or disabled.
 136  */
 137 void s390_handle_mcck(void)
 138 {
 139         unsigned long flags;
 140         struct mcck_struct mcck;
 141 
 142         /*
 143          * Disable machine checks and get the current state of accumulated
 144          * machine checks. Afterwards delete the old state and enable machine
 145          * checks again.
 146          */
 147         local_irq_save(flags);
 148         local_mcck_disable();
 149         mcck = *this_cpu_ptr(&cpu_mcck);
 150         memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck));
 151         clear_cpu_flag(CIF_MCCK_PENDING);
 152         local_mcck_enable();
 153         local_irq_restore(flags);
 154 
 155         if (mcck.channel_report)
 156                 crw_handle_channel_report();
 157         /*
 158          * A warning may remain for a prolonged period on the bare iron.
 159          * (actually until the machine is powered off, or the problem is gone)
 160          * So we just stop listening for the WARNING MCH and avoid continuously
 161          * being interrupted.  One caveat is however, that we must do this per
 162          * processor and cannot use the smp version of ctl_clear_bit().
 163          * On VM we only get one interrupt per virtally presented machinecheck.
 164          * Though one suffices, we may get one interrupt per (virtual) cpu.
 165          */
 166         if (mcck.warning) {     /* WARNING pending ? */
 167                 static int mchchk_wng_posted = 0;
 168 
 169                 /* Use single cpu clear, as we cannot handle smp here. */
 170                 __ctl_clear_bit(14, 24);        /* Disable WARNING MCH */
 171                 if (xchg(&mchchk_wng_posted, 1) == 0)
 172                         kill_cad_pid(SIGPWR, 1);
 173         }
 174         if (mcck.stp_queue)
 175                 stp_queue_work();
 176         if (mcck.kill_task) {
 177                 local_irq_enable();
 178                 printk(KERN_EMERG "mcck: Terminating task because of machine "
 179                        "malfunction (code 0x%016lx).\n", mcck.mcck_code);
 180                 printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
 181                        current->comm, current->pid);
 182                 do_exit(SIGSEGV);
 183         }
 184 }
 185 EXPORT_SYMBOL_GPL(s390_handle_mcck);
 186 
 187 /*
 188  * returns 0 if all required registers are available
 189  * returns 1 otherwise
 190  */
 191 static int notrace s390_check_registers(union mci mci, int umode)
 192 {
 193         union ctlreg2 cr2;
 194         int kill_task;
 195 
 196         kill_task = 0;
 197 
 198         if (!mci.gr) {
 199                 /*
 200                  * General purpose registers couldn't be restored and have
 201                  * unknown contents. Stop system or terminate process.
 202                  */
 203                 if (!umode)
 204                         s390_handle_damage();
 205                 kill_task = 1;
 206         }
 207         /* Check control registers */
 208         if (!mci.cr) {
 209                 /*
 210                  * Control registers have unknown contents.
 211                  * Can't recover and therefore stopping machine.
 212                  */
 213                 s390_handle_damage();
 214         }
 215         if (!mci.fp) {
 216                 /*
 217                  * Floating point registers can't be restored. If the
 218                  * kernel currently uses floating point registers the
 219                  * system is stopped. If the process has its floating
 220                  * pointer registers loaded it is terminated.
 221                  */
 222                 if (S390_lowcore.fpu_flags & KERNEL_VXR_V0V7)
 223                         s390_handle_damage();
 224                 if (!test_cpu_flag(CIF_FPU))
 225                         kill_task = 1;
 226         }
 227         if (!mci.fc) {
 228                 /*
 229                  * Floating point control register can't be restored.
 230                  * If the kernel currently uses the floating pointer
 231                  * registers and needs the FPC register the system is
 232                  * stopped. If the process has its floating pointer
 233                  * registers loaded it is terminated.
 234                  */
 235                 if (S390_lowcore.fpu_flags & KERNEL_FPC)
 236                         s390_handle_damage();
 237                 if (!test_cpu_flag(CIF_FPU))
 238                         kill_task = 1;
 239         }
 240 
 241         if (MACHINE_HAS_VX) {
 242                 if (!mci.vr) {
 243                         /*
 244                          * Vector registers can't be restored. If the kernel
 245                          * currently uses vector registers the system is
 246                          * stopped. If the process has its vector registers
 247                          * loaded it is terminated.
 248                          */
 249                         if (S390_lowcore.fpu_flags & KERNEL_VXR)
 250                                 s390_handle_damage();
 251                         if (!test_cpu_flag(CIF_FPU))
 252                                 kill_task = 1;
 253                 }
 254         }
 255         /* Check if access registers are valid */
 256         if (!mci.ar) {
 257                 /*
 258                  * Access registers have unknown contents.
 259                  * Terminating task.
 260                  */
 261                 kill_task = 1;
 262         }
 263         /* Check guarded storage registers */
 264         cr2.val = S390_lowcore.cregs_save_area[2];
 265         if (cr2.gse) {
 266                 if (!mci.gs) {
 267                         /*
 268                          * Guarded storage register can't be restored and
 269                          * the current processes uses guarded storage.
 270                          * It has to be terminated.
 271                          */
 272                         kill_task = 1;
 273                 }
 274         }
 275         /* Check if old PSW is valid */
 276         if (!mci.wp) {
 277                 /*
 278                  * Can't tell if we come from user or kernel mode
 279                  * -> stopping machine.
 280                  */
 281                 s390_handle_damage();
 282         }
 283         /* Check for invalid kernel instruction address */
 284         if (!mci.ia && !umode) {
 285                 /*
 286                  * The instruction address got lost while running
 287                  * in the kernel -> stopping machine.
 288                  */
 289                 s390_handle_damage();
 290         }
 291 
 292         if (!mci.ms || !mci.pm || !mci.ia)
 293                 kill_task = 1;
 294 
 295         return kill_task;
 296 }
 297 NOKPROBE_SYMBOL(s390_check_registers);
 298 
 299 /*
 300  * Backup the guest's machine check info to its description block
 301  */
 302 static void notrace s390_backup_mcck_info(struct pt_regs *regs)
 303 {
 304         struct mcck_volatile_info *mcck_backup;
 305         struct sie_page *sie_page;
 306 
 307         /* r14 contains the sie block, which was set in sie64a */
 308         struct kvm_s390_sie_block *sie_block =
 309                         (struct kvm_s390_sie_block *) regs->gprs[14];
 310 
 311         if (sie_block == NULL)
 312                 /* Something's seriously wrong, stop system. */
 313                 s390_handle_damage();
 314 
 315         sie_page = container_of(sie_block, struct sie_page, sie_block);
 316         mcck_backup = &sie_page->mcck_info;
 317         mcck_backup->mcic = S390_lowcore.mcck_interruption_code &
 318                                 ~(MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE);
 319         mcck_backup->ext_damage_code = S390_lowcore.external_damage_code;
 320         mcck_backup->failing_storage_address
 321                         = S390_lowcore.failing_storage_address;
 322 }
 323 NOKPROBE_SYMBOL(s390_backup_mcck_info);
 324 
 325 #define MAX_IPD_COUNT   29
 326 #define MAX_IPD_TIME    (5 * 60 * USEC_PER_SEC) /* 5 minutes */
 327 
 328 #define ED_STP_ISLAND   6       /* External damage STP island check */
 329 #define ED_STP_SYNC     7       /* External damage STP sync check */
 330 
 331 #define MCCK_CODE_NO_GUEST      (MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE)
 332 
 333 /*
 334  * machine check handler.
 335  */
 336 void notrace s390_do_machine_check(struct pt_regs *regs)
 337 {
 338         static int ipd_count;
 339         static DEFINE_SPINLOCK(ipd_lock);
 340         static unsigned long long last_ipd;
 341         struct mcck_struct *mcck;
 342         unsigned long long tmp;
 343         union mci mci;
 344         unsigned long mcck_dam_code;
 345 
 346         nmi_enter();
 347         inc_irq_stat(NMI_NMI);
 348         mci.val = S390_lowcore.mcck_interruption_code;
 349         mcck = this_cpu_ptr(&cpu_mcck);
 350 
 351         if (mci.sd) {
 352                 /* System damage -> stopping machine */
 353                 s390_handle_damage();
 354         }
 355 
 356         /*
 357          * Reinject the instruction processing damages' machine checks
 358          * including Delayed Access Exception into the guest
 359          * instead of damaging the host if they happen in the guest.
 360          */
 361         if (mci.pd && !test_cpu_flag(CIF_MCCK_GUEST)) {
 362                 if (mci.b) {
 363                         /* Processing backup -> verify if we can survive this */
 364                         u64 z_mcic, o_mcic, t_mcic;
 365                         z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
 366                         o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
 367                                   1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
 368                                   1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
 369                                   1ULL<<16);
 370                         t_mcic = mci.val;
 371 
 372                         if (((t_mcic & z_mcic) != 0) ||
 373                             ((t_mcic & o_mcic) != o_mcic)) {
 374                                 s390_handle_damage();
 375                         }
 376 
 377                         /*
 378                          * Nullifying exigent condition, therefore we might
 379                          * retry this instruction.
 380                          */
 381                         spin_lock(&ipd_lock);
 382                         tmp = get_tod_clock();
 383                         if (((tmp - last_ipd) >> 12) < MAX_IPD_TIME)
 384                                 ipd_count++;
 385                         else
 386                                 ipd_count = 1;
 387                         last_ipd = tmp;
 388                         if (ipd_count == MAX_IPD_COUNT)
 389                                 s390_handle_damage();
 390                         spin_unlock(&ipd_lock);
 391                 } else {
 392                         /* Processing damage -> stopping machine */
 393                         s390_handle_damage();
 394                 }
 395         }
 396         if (s390_check_registers(mci, user_mode(regs))) {
 397                 /*
 398                  * Couldn't restore all register contents for the
 399                  * user space process -> mark task for termination.
 400                  */
 401                 mcck->kill_task = 1;
 402                 mcck->mcck_code = mci.val;
 403                 set_cpu_flag(CIF_MCCK_PENDING);
 404         }
 405 
 406         /*
 407          * Backup the machine check's info if it happens when the guest
 408          * is running.
 409          */
 410         if (test_cpu_flag(CIF_MCCK_GUEST))
 411                 s390_backup_mcck_info(regs);
 412 
 413         if (mci.cd) {
 414                 /* Timing facility damage */
 415                 s390_handle_damage();
 416         }
 417         if (mci.ed && mci.ec) {
 418                 /* External damage */
 419                 if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
 420                         mcck->stp_queue |= stp_sync_check();
 421                 if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
 422                         mcck->stp_queue |= stp_island_check();
 423                 if (mcck->stp_queue)
 424                         set_cpu_flag(CIF_MCCK_PENDING);
 425         }
 426 
 427         /*
 428          * Reinject storage related machine checks into the guest if they
 429          * happen when the guest is running.
 430          */
 431         if (!test_cpu_flag(CIF_MCCK_GUEST)) {
 432                 if (mci.se)
 433                         /* Storage error uncorrected */
 434                         s390_handle_damage();
 435                 if (mci.ke)
 436                         /* Storage key-error uncorrected */
 437                         s390_handle_damage();
 438                 if (mci.ds && mci.fa)
 439                         /* Storage degradation */
 440                         s390_handle_damage();
 441         }
 442         if (mci.cp) {
 443                 /* Channel report word pending */
 444                 mcck->channel_report = 1;
 445                 set_cpu_flag(CIF_MCCK_PENDING);
 446         }
 447         if (mci.w) {
 448                 /* Warning pending */
 449                 mcck->warning = 1;
 450                 set_cpu_flag(CIF_MCCK_PENDING);
 451         }
 452 
 453         /*
 454          * If there are only Channel Report Pending and External Damage
 455          * machine checks, they will not be reinjected into the guest
 456          * because they refer to host conditions only.
 457          */
 458         mcck_dam_code = (mci.val & MCIC_SUBCLASS_MASK);
 459         if (test_cpu_flag(CIF_MCCK_GUEST) &&
 460         (mcck_dam_code & MCCK_CODE_NO_GUEST) != mcck_dam_code) {
 461                 /* Set exit reason code for host's later handling */
 462                 *((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR;
 463         }
 464         clear_cpu_flag(CIF_MCCK_GUEST);
 465         nmi_exit();
 466 }
 467 NOKPROBE_SYMBOL(s390_do_machine_check);
 468 
 469 static int __init machine_check_init(void)
 470 {
 471         ctl_set_bit(14, 25);    /* enable external damage MCH */
 472         ctl_set_bit(14, 27);    /* enable system recovery MCH */
 473         ctl_set_bit(14, 24);    /* enable warning MCH */
 474         return 0;
 475 }
 476 early_initcall(machine_check_init);

/* [<][>][^][v][top][bottom][index][help] */