1/* 2 * PowerNV setup code. 3 * 4 * Copyright 2011 IBM Corp. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12#undef DEBUG 13 14#include <linux/cpu.h> 15#include <linux/errno.h> 16#include <linux/sched.h> 17#include <linux/kernel.h> 18#include <linux/tty.h> 19#include <linux/reboot.h> 20#include <linux/init.h> 21#include <linux/console.h> 22#include <linux/delay.h> 23#include <linux/irq.h> 24#include <linux/seq_file.h> 25#include <linux/of.h> 26#include <linux/of_fdt.h> 27#include <linux/interrupt.h> 28#include <linux/bug.h> 29#include <linux/pci.h> 30#include <linux/cpufreq.h> 31 32#include <asm/machdep.h> 33#include <asm/firmware.h> 34#include <asm/xics.h> 35#include <asm/opal.h> 36#include <asm/kexec.h> 37#include <asm/smp.h> 38#include <asm/cputhreads.h> 39#include <asm/cpuidle.h> 40#include <asm/code-patching.h> 41 42#include "powernv.h" 43#include "subcore.h" 44 45static void __init pnv_setup_arch(void) 46{ 47 set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); 48 49 /* Initialize SMP */ 50 pnv_smp_init(); 51 52 /* Setup PCI */ 53 pnv_pci_init(); 54 55 /* Setup RTC and NVRAM callbacks */ 56 if (firmware_has_feature(FW_FEATURE_OPAL)) 57 opal_nvram_init(); 58 59 /* Enable NAP mode */ 60 powersave_nap = 1; 61 62 /* XXX PMCS */ 63} 64 65static void __init pnv_init_early(void) 66{ 67 /* 68 * Initialize the LPC bus now so that legacy serial 69 * ports can be found on it 70 */ 71 opal_lpc_init(); 72 73#ifdef CONFIG_HVC_OPAL 74 if (firmware_has_feature(FW_FEATURE_OPAL)) 75 hvc_opal_init_early(); 76 else 77#endif 78 add_preferred_console("hvc", 0, NULL); 79} 80 81static void __init pnv_init_IRQ(void) 82{ 83 xics_init(); 84 85 WARN_ON(!ppc_md.get_irq); 86} 87 88static void pnv_show_cpuinfo(struct seq_file *m) 89{ 90 struct device_node *root; 91 const char *model = ""; 92 93 root = of_find_node_by_path("/"); 94 if (root) 95 model = of_get_property(root, "model", NULL); 96 seq_printf(m, "machine\t\t: PowerNV %s\n", model); 97 if (firmware_has_feature(FW_FEATURE_OPALv3)) 98 seq_printf(m, "firmware\t: OPAL v3\n"); 99 else if (firmware_has_feature(FW_FEATURE_OPALv2)) 100 seq_printf(m, "firmware\t: OPAL v2\n"); 101 else if (firmware_has_feature(FW_FEATURE_OPAL)) 102 seq_printf(m, "firmware\t: OPAL v1\n"); 103 else 104 seq_printf(m, "firmware\t: BML\n"); 105 of_node_put(root); 106} 107 108static void pnv_prepare_going_down(void) 109{ 110 /* 111 * Disable all notifiers from OPAL, we can't 112 * service interrupts anymore anyway 113 */ 114 opal_notifier_disable(); 115 116 /* Soft disable interrupts */ 117 local_irq_disable(); 118 119 /* 120 * Return secondary CPUs to firwmare if a flash update 121 * is pending otherwise we will get all sort of error 122 * messages about CPU being stuck etc.. This will also 123 * have the side effect of hard disabling interrupts so 124 * past this point, the kernel is effectively dead. 125 */ 126 opal_flash_term_callback(); 127} 128 129static void __noreturn pnv_restart(char *cmd) 130{ 131 long rc = OPAL_BUSY; 132 133 pnv_prepare_going_down(); 134 135 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 136 rc = opal_cec_reboot(); 137 if (rc == OPAL_BUSY_EVENT) 138 opal_poll_events(NULL); 139 else 140 mdelay(10); 141 } 142 for (;;) 143 opal_poll_events(NULL); 144} 145 146static void __noreturn pnv_power_off(void) 147{ 148 long rc = OPAL_BUSY; 149 150 pnv_prepare_going_down(); 151 152 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 153 rc = opal_cec_power_down(0); 154 if (rc == OPAL_BUSY_EVENT) 155 opal_poll_events(NULL); 156 else 157 mdelay(10); 158 } 159 for (;;) 160 opal_poll_events(NULL); 161} 162 163static void __noreturn pnv_halt(void) 164{ 165 pnv_power_off(); 166} 167 168static void pnv_progress(char *s, unsigned short hex) 169{ 170} 171 172static int pnv_dma_set_mask(struct device *dev, u64 dma_mask) 173{ 174 if (dev_is_pci(dev)) 175 return pnv_pci_dma_set_mask(to_pci_dev(dev), dma_mask); 176 return __dma_set_mask(dev, dma_mask); 177} 178 179static u64 pnv_dma_get_required_mask(struct device *dev) 180{ 181 if (dev_is_pci(dev)) 182 return pnv_pci_dma_get_required_mask(to_pci_dev(dev)); 183 184 return __dma_get_required_mask(dev); 185} 186 187static void pnv_shutdown(void) 188{ 189 /* Let the PCI code clear up IODA tables */ 190 pnv_pci_shutdown(); 191 192 /* 193 * Stop OPAL activity: Unregister all OPAL interrupts so they 194 * don't fire up while we kexec and make sure all potentially 195 * DMA'ing ops are complete (such as dump retrieval). 196 */ 197 opal_shutdown(); 198} 199 200#ifdef CONFIG_KEXEC 201static void pnv_kexec_wait_secondaries_down(void) 202{ 203 int my_cpu, i, notified = -1; 204 205 my_cpu = get_cpu(); 206 207 for_each_online_cpu(i) { 208 uint8_t status; 209 int64_t rc; 210 211 if (i == my_cpu) 212 continue; 213 214 for (;;) { 215 rc = opal_query_cpu_status(get_hard_smp_processor_id(i), 216 &status); 217 if (rc != OPAL_SUCCESS || status != OPAL_THREAD_STARTED) 218 break; 219 barrier(); 220 if (i != notified) { 221 printk(KERN_INFO "kexec: waiting for cpu %d " 222 "(physical %d) to enter OPAL\n", 223 i, paca[i].hw_cpu_id); 224 notified = i; 225 } 226 } 227 } 228} 229 230static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) 231{ 232 xics_kexec_teardown_cpu(secondary); 233 234 /* On OPAL v3, we return all CPUs to firmware */ 235 236 if (!firmware_has_feature(FW_FEATURE_OPALv3)) 237 return; 238 239 if (secondary) { 240 /* Return secondary CPUs to firmware on OPAL v3 */ 241 mb(); 242 get_paca()->kexec_state = KEXEC_STATE_REAL_MODE; 243 mb(); 244 245 /* Return the CPU to OPAL */ 246 opal_return_cpu(); 247 } else if (crash_shutdown) { 248 /* 249 * On crash, we don't wait for secondaries to go 250 * down as they might be unreachable or hung, so 251 * instead we just wait a bit and move on. 252 */ 253 mdelay(1); 254 } else { 255 /* Primary waits for the secondaries to have reached OPAL */ 256 pnv_kexec_wait_secondaries_down(); 257 } 258} 259#endif /* CONFIG_KEXEC */ 260 261#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 262static unsigned long pnv_memory_block_size(void) 263{ 264 return 256UL * 1024 * 1024; 265} 266#endif 267 268static void __init pnv_setup_machdep_opal(void) 269{ 270 ppc_md.get_boot_time = opal_get_boot_time; 271 ppc_md.restart = pnv_restart; 272 pm_power_off = pnv_power_off; 273 ppc_md.halt = pnv_halt; 274 ppc_md.machine_check_exception = opal_machine_check; 275 ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery; 276 ppc_md.hmi_exception_early = opal_hmi_exception_early; 277 ppc_md.handle_hmi_exception = opal_handle_hmi_exception; 278} 279 280static u32 supported_cpuidle_states; 281 282int pnv_save_sprs_for_winkle(void) 283{ 284 int cpu; 285 int rc; 286 287 /* 288 * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross 289 * all cpus at boot. Get these reg values of current cpu and use the 290 * same accross all cpus. 291 */ 292 uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1; 293 uint64_t hid0_val = mfspr(SPRN_HID0); 294 uint64_t hid1_val = mfspr(SPRN_HID1); 295 uint64_t hid4_val = mfspr(SPRN_HID4); 296 uint64_t hid5_val = mfspr(SPRN_HID5); 297 uint64_t hmeer_val = mfspr(SPRN_HMEER); 298 299 for_each_possible_cpu(cpu) { 300 uint64_t pir = get_hard_smp_processor_id(cpu); 301 uint64_t hsprg0_val = (uint64_t)&paca[cpu]; 302 303 /* 304 * HSPRG0 is used to store the cpu's pointer to paca. Hence last 305 * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0 306 * with 63rd bit set, so that when a thread wakes up at 0x100 we 307 * can use this bit to distinguish between fastsleep and 308 * deep winkle. 309 */ 310 hsprg0_val |= 1; 311 312 rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); 313 if (rc != 0) 314 return rc; 315 316 rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); 317 if (rc != 0) 318 return rc; 319 320 /* HIDs are per core registers */ 321 if (cpu_thread_in_core(cpu) == 0) { 322 323 rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val); 324 if (rc != 0) 325 return rc; 326 327 rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val); 328 if (rc != 0) 329 return rc; 330 331 rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); 332 if (rc != 0) 333 return rc; 334 335 rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val); 336 if (rc != 0) 337 return rc; 338 339 rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val); 340 if (rc != 0) 341 return rc; 342 } 343 } 344 345 return 0; 346} 347 348static void pnv_alloc_idle_core_states(void) 349{ 350 int i, j; 351 int nr_cores = cpu_nr_cores(); 352 u32 *core_idle_state; 353 354 /* 355 * core_idle_state - First 8 bits track the idle state of each thread 356 * of the core. The 8th bit is the lock bit. Initially all thread bits 357 * are set. They are cleared when the thread enters deep idle state 358 * like sleep and winkle. Initially the lock bit is cleared. 359 * The lock bit has 2 purposes 360 * a. While the first thread is restoring core state, it prevents 361 * other threads in the core from switching to process context. 362 * b. While the last thread in the core is saving the core state, it 363 * prevents a different thread from waking up. 364 */ 365 for (i = 0; i < nr_cores; i++) { 366 int first_cpu = i * threads_per_core; 367 int node = cpu_to_node(first_cpu); 368 369 core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node); 370 *core_idle_state = PNV_CORE_IDLE_THREAD_BITS; 371 372 for (j = 0; j < threads_per_core; j++) { 373 int cpu = first_cpu + j; 374 375 paca[cpu].core_idle_state_ptr = core_idle_state; 376 paca[cpu].thread_idle_state = PNV_THREAD_RUNNING; 377 paca[cpu].thread_mask = 1 << j; 378 } 379 } 380 381 update_subcore_sibling_mask(); 382 383 if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) 384 pnv_save_sprs_for_winkle(); 385} 386 387u32 pnv_get_supported_cpuidle_states(void) 388{ 389 return supported_cpuidle_states; 390} 391EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); 392 393static int __init pnv_init_idle_states(void) 394{ 395 struct device_node *power_mgt; 396 int dt_idle_states; 397 u32 *flags; 398 int i; 399 400 supported_cpuidle_states = 0; 401 402 if (cpuidle_disable != IDLE_NO_OVERRIDE) 403 goto out; 404 405 if (!firmware_has_feature(FW_FEATURE_OPALv3)) 406 goto out; 407 408 power_mgt = of_find_node_by_path("/ibm,opal/power-mgt"); 409 if (!power_mgt) { 410 pr_warn("opal: PowerMgmt Node not found\n"); 411 goto out; 412 } 413 dt_idle_states = of_property_count_u32_elems(power_mgt, 414 "ibm,cpu-idle-state-flags"); 415 if (dt_idle_states < 0) { 416 pr_warn("cpuidle-powernv: no idle states found in the DT\n"); 417 goto out; 418 } 419 420 flags = kzalloc(sizeof(*flags) * dt_idle_states, GFP_KERNEL); 421 if (of_property_read_u32_array(power_mgt, 422 "ibm,cpu-idle-state-flags", flags, dt_idle_states)) { 423 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n"); 424 goto out_free; 425 } 426 427 for (i = 0; i < dt_idle_states; i++) 428 supported_cpuidle_states |= flags[i]; 429 430 if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { 431 patch_instruction( 432 (unsigned int *)pnv_fastsleep_workaround_at_entry, 433 PPC_INST_NOP); 434 patch_instruction( 435 (unsigned int *)pnv_fastsleep_workaround_at_exit, 436 PPC_INST_NOP); 437 } 438 pnv_alloc_idle_core_states(); 439out_free: 440 kfree(flags); 441out: 442 return 0; 443} 444 445subsys_initcall(pnv_init_idle_states); 446 447static int __init pnv_probe(void) 448{ 449 unsigned long root = of_get_flat_dt_root(); 450 451 if (!of_flat_dt_is_compatible(root, "ibm,powernv")) 452 return 0; 453 454 hpte_init_native(); 455 456 if (firmware_has_feature(FW_FEATURE_OPAL)) 457 pnv_setup_machdep_opal(); 458 459 pr_debug("PowerNV detected !\n"); 460 461 return 1; 462} 463 464/* 465 * Returns the cpu frequency for 'cpu' in Hz. This is used by 466 * /proc/cpuinfo 467 */ 468static unsigned long pnv_get_proc_freq(unsigned int cpu) 469{ 470 unsigned long ret_freq; 471 472 ret_freq = cpufreq_quick_get(cpu) * 1000ul; 473 474 /* 475 * If the backend cpufreq driver does not exist, 476 * then fallback to old way of reporting the clockrate. 477 */ 478 if (!ret_freq) 479 ret_freq = ppc_proc_freq; 480 return ret_freq; 481} 482 483define_machine(powernv) { 484 .name = "PowerNV", 485 .probe = pnv_probe, 486 .init_early = pnv_init_early, 487 .setup_arch = pnv_setup_arch, 488 .init_IRQ = pnv_init_IRQ, 489 .show_cpuinfo = pnv_show_cpuinfo, 490 .get_proc_freq = pnv_get_proc_freq, 491 .progress = pnv_progress, 492 .machine_shutdown = pnv_shutdown, 493 .power_save = power7_idle, 494 .calibrate_decr = generic_calibrate_decr, 495 .dma_set_mask = pnv_dma_set_mask, 496 .dma_get_required_mask = pnv_dma_get_required_mask, 497#ifdef CONFIG_KEXEC 498 .kexec_cpu_down = pnv_kexec_cpu_down, 499#endif 500#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 501 .memory_block_size = pnv_memory_block_size, 502#endif 503}; 504