1/* 2 * Copyright IBM Corporation 2001, 2005, 2006 3 * Copyright Dave Engebretsen & Todd Inglett 2001 4 * Copyright Linas Vepstas 2005, 2006 5 * Copyright 2001-2012 IBM Corporation. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 * 21 * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com> 22 */ 23 24#include <linux/delay.h> 25#include <linux/debugfs.h> 26#include <linux/sched.h> 27#include <linux/init.h> 28#include <linux/list.h> 29#include <linux/pci.h> 30#include <linux/iommu.h> 31#include <linux/proc_fs.h> 32#include <linux/rbtree.h> 33#include <linux/reboot.h> 34#include <linux/seq_file.h> 35#include <linux/spinlock.h> 36#include <linux/export.h> 37#include <linux/of.h> 38 39#include <linux/atomic.h> 40#include <asm/debug.h> 41#include <asm/eeh.h> 42#include <asm/eeh_event.h> 43#include <asm/io.h> 44#include <asm/iommu.h> 45#include <asm/machdep.h> 46#include <asm/ppc-pci.h> 47#include <asm/rtas.h> 48 49 50/** Overview: 51 * EEH, or "Extended Error Handling" is a PCI bridge technology for 52 * dealing with PCI bus errors that can't be dealt with within the 53 * usual PCI framework, except by check-stopping the CPU. Systems 54 * that are designed for high-availability/reliability cannot afford 55 * to crash due to a "mere" PCI error, thus the need for EEH. 56 * An EEH-capable bridge operates by converting a detected error 57 * into a "slot freeze", taking the PCI adapter off-line, making 58 * the slot behave, from the OS'es point of view, as if the slot 59 * were "empty": all reads return 0xff's and all writes are silently 60 * ignored. EEH slot isolation events can be triggered by parity 61 * errors on the address or data busses (e.g. during posted writes), 62 * which in turn might be caused by low voltage on the bus, dust, 63 * vibration, humidity, radioactivity or plain-old failed hardware. 64 * 65 * Note, however, that one of the leading causes of EEH slot 66 * freeze events are buggy device drivers, buggy device microcode, 67 * or buggy device hardware. This is because any attempt by the 68 * device to bus-master data to a memory address that is not 69 * assigned to the device will trigger a slot freeze. (The idea 70 * is to prevent devices-gone-wild from corrupting system memory). 71 * Buggy hardware/drivers will have a miserable time co-existing 72 * with EEH. 73 * 74 * Ideally, a PCI device driver, when suspecting that an isolation 75 * event has occurred (e.g. by reading 0xff's), will then ask EEH 76 * whether this is the case, and then take appropriate steps to 77 * reset the PCI slot, the PCI device, and then resume operations. 78 * However, until that day, the checking is done here, with the 79 * eeh_check_failure() routine embedded in the MMIO macros. If 80 * the slot is found to be isolated, an "EEH Event" is synthesized 81 * and sent out for processing. 82 */ 83 84/* If a device driver keeps reading an MMIO register in an interrupt 85 * handler after a slot isolation event, it might be broken. 86 * This sets the threshold for how many read attempts we allow 87 * before printing an error message. 88 */ 89#define EEH_MAX_FAILS 2100000 90 91/* Time to wait for a PCI slot to report status, in milliseconds */ 92#define PCI_BUS_RESET_WAIT_MSEC (5*60*1000) 93 94/* 95 * EEH probe mode support, which is part of the flags, 96 * is to support multiple platforms for EEH. Some platforms 97 * like pSeries do PCI emunation based on device tree. 98 * However, other platforms like powernv probe PCI devices 99 * from hardware. The flag is used to distinguish that. 100 * In addition, struct eeh_ops::probe would be invoked for 101 * particular OF node or PCI device so that the corresponding 102 * PE would be created there. 103 */ 104int eeh_subsystem_flags; 105EXPORT_SYMBOL(eeh_subsystem_flags); 106 107/* 108 * EEH allowed maximal frozen times. If one particular PE's 109 * frozen count in last hour exceeds this limit, the PE will 110 * be forced to be offline permanently. 111 */ 112int eeh_max_freezes = 5; 113 114/* Platform dependent EEH operations */ 115struct eeh_ops *eeh_ops = NULL; 116 117/* Lock to avoid races due to multiple reports of an error */ 118DEFINE_RAW_SPINLOCK(confirm_error_lock); 119 120/* Lock to protect passed flags */ 121static DEFINE_MUTEX(eeh_dev_mutex); 122 123/* Buffer for reporting pci register dumps. Its here in BSS, and 124 * not dynamically alloced, so that it ends up in RMO where RTAS 125 * can access it. 126 */ 127#define EEH_PCI_REGS_LOG_LEN 8192 128static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; 129 130/* 131 * The struct is used to maintain the EEH global statistic 132 * information. Besides, the EEH global statistics will be 133 * exported to user space through procfs 134 */ 135struct eeh_stats { 136 u64 no_device; /* PCI device not found */ 137 u64 no_dn; /* OF node not found */ 138 u64 no_cfg_addr; /* Config address not found */ 139 u64 ignored_check; /* EEH check skipped */ 140 u64 total_mmio_ffs; /* Total EEH checks */ 141 u64 false_positives; /* Unnecessary EEH checks */ 142 u64 slot_resets; /* PE reset */ 143}; 144 145static struct eeh_stats eeh_stats; 146 147#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) 148 149static int __init eeh_setup(char *str) 150{ 151 if (!strcmp(str, "off")) 152 eeh_add_flag(EEH_FORCE_DISABLED); 153 else if (!strcmp(str, "early_log")) 154 eeh_add_flag(EEH_EARLY_DUMP_LOG); 155 156 return 1; 157} 158__setup("eeh=", eeh_setup); 159 160/* 161 * This routine captures assorted PCI configuration space data 162 * for the indicated PCI device, and puts them into a buffer 163 * for RTAS error logging. 164 */ 165static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) 166{ 167 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 168 u32 cfg; 169 int cap, i; 170 int n = 0, l = 0; 171 char buffer[128]; 172 173 n += scnprintf(buf+n, len-n, "%04x:%02x:%02x:%01x\n", 174 edev->phb->global_number, pdn->busno, 175 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 176 pr_warn("EEH: of node=%04x:%02x:%02x:%01x\n", 177 edev->phb->global_number, pdn->busno, 178 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 179 180 eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg); 181 n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); 182 pr_warn("EEH: PCI device/vendor: %08x\n", cfg); 183 184 eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg); 185 n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); 186 pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); 187 188 /* Gather bridge-specific registers */ 189 if (edev->mode & EEH_DEV_BRIDGE) { 190 eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg); 191 n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); 192 pr_warn("EEH: Bridge secondary status: %04x\n", cfg); 193 194 eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg); 195 n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); 196 pr_warn("EEH: Bridge control: %04x\n", cfg); 197 } 198 199 /* Dump out the PCI-X command and status regs */ 200 cap = edev->pcix_cap; 201 if (cap) { 202 eeh_ops->read_config(pdn, cap, 4, &cfg); 203 n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); 204 pr_warn("EEH: PCI-X cmd: %08x\n", cfg); 205 206 eeh_ops->read_config(pdn, cap+4, 4, &cfg); 207 n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); 208 pr_warn("EEH: PCI-X status: %08x\n", cfg); 209 } 210 211 /* If PCI-E capable, dump PCI-E cap 10 */ 212 cap = edev->pcie_cap; 213 if (cap) { 214 n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); 215 pr_warn("EEH: PCI-E capabilities and status follow:\n"); 216 217 for (i=0; i<=8; i++) { 218 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 219 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 220 221 if ((i % 4) == 0) { 222 if (i != 0) 223 pr_warn("%s\n", buffer); 224 225 l = scnprintf(buffer, sizeof(buffer), 226 "EEH: PCI-E %02x: %08x ", 227 4*i, cfg); 228 } else { 229 l += scnprintf(buffer+l, sizeof(buffer)-l, 230 "%08x ", cfg); 231 } 232 233 } 234 235 pr_warn("%s\n", buffer); 236 } 237 238 /* If AER capable, dump it */ 239 cap = edev->aer_cap; 240 if (cap) { 241 n += scnprintf(buf+n, len-n, "pci-e AER:\n"); 242 pr_warn("EEH: PCI-E AER capability register set follows:\n"); 243 244 for (i=0; i<=13; i++) { 245 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 246 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 247 248 if ((i % 4) == 0) { 249 if (i != 0) 250 pr_warn("%s\n", buffer); 251 252 l = scnprintf(buffer, sizeof(buffer), 253 "EEH: PCI-E AER %02x: %08x ", 254 4*i, cfg); 255 } else { 256 l += scnprintf(buffer+l, sizeof(buffer)-l, 257 "%08x ", cfg); 258 } 259 } 260 261 pr_warn("%s\n", buffer); 262 } 263 264 return n; 265} 266 267static void *eeh_dump_pe_log(void *data, void *flag) 268{ 269 struct eeh_pe *pe = data; 270 struct eeh_dev *edev, *tmp; 271 size_t *plen = flag; 272 273 /* If the PE's config space is blocked, 0xFF's will be 274 * returned. It's pointless to collect the log in this 275 * case. 276 */ 277 if (pe->state & EEH_PE_CFG_BLOCKED) 278 return NULL; 279 280 eeh_pe_for_each_dev(pe, edev, tmp) 281 *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen, 282 EEH_PCI_REGS_LOG_LEN - *plen); 283 284 return NULL; 285} 286 287/** 288 * eeh_slot_error_detail - Generate combined log including driver log and error log 289 * @pe: EEH PE 290 * @severity: temporary or permanent error log 291 * 292 * This routine should be called to generate the combined log, which 293 * is comprised of driver log and error log. The driver log is figured 294 * out from the config space of the corresponding PCI device, while 295 * the error log is fetched through platform dependent function call. 296 */ 297void eeh_slot_error_detail(struct eeh_pe *pe, int severity) 298{ 299 size_t loglen = 0; 300 301 /* 302 * When the PHB is fenced or dead, it's pointless to collect 303 * the data from PCI config space because it should return 304 * 0xFF's. For ER, we still retrieve the data from the PCI 305 * config space. 306 * 307 * For pHyp, we have to enable IO for log retrieval. Otherwise, 308 * 0xFF's is always returned from PCI config space. 309 */ 310 if (!(pe->type & EEH_PE_PHB)) { 311 if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG)) 312 eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 313 314 /* 315 * The config space of some PCI devices can't be accessed 316 * when their PEs are in frozen state. Otherwise, fenced 317 * PHB might be seen. Those PEs are identified with flag 318 * EEH_PE_CFG_RESTRICTED, indicating EEH_PE_CFG_BLOCKED 319 * is set automatically when the PE is put to EEH_PE_ISOLATED. 320 * 321 * Restoring BARs possibly triggers PCI config access in 322 * (OPAL) firmware and then causes fenced PHB. If the 323 * PCI config is blocked with flag EEH_PE_CFG_BLOCKED, it's 324 * pointless to restore BARs and dump config space. 325 */ 326 eeh_ops->configure_bridge(pe); 327 if (!(pe->state & EEH_PE_CFG_BLOCKED)) { 328 eeh_pe_restore_bars(pe); 329 330 pci_regs_buf[0] = 0; 331 eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); 332 } 333 } 334 335 eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); 336} 337 338/** 339 * eeh_token_to_phys - Convert EEH address token to phys address 340 * @token: I/O token, should be address in the form 0xA.... 341 * 342 * This routine should be called to convert virtual I/O address 343 * to physical one. 344 */ 345static inline unsigned long eeh_token_to_phys(unsigned long token) 346{ 347 pte_t *ptep; 348 unsigned long pa; 349 int hugepage_shift; 350 351 /* 352 * We won't find hugepages here(this is iomem). Hence we are not 353 * worried about _PAGE_SPLITTING/collapse. Also we will not hit 354 * page table free, because of init_mm. 355 */ 356 ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift); 357 if (!ptep) 358 return token; 359 WARN_ON(hugepage_shift); 360 pa = pte_pfn(*ptep) << PAGE_SHIFT; 361 362 return pa | (token & (PAGE_SIZE-1)); 363} 364 365/* 366 * On PowerNV platform, we might already have fenced PHB there. 367 * For that case, it's meaningless to recover frozen PE. Intead, 368 * We have to handle fenced PHB firstly. 369 */ 370static int eeh_phb_check_failure(struct eeh_pe *pe) 371{ 372 struct eeh_pe *phb_pe; 373 unsigned long flags; 374 int ret; 375 376 if (!eeh_has_flag(EEH_PROBE_MODE_DEV)) 377 return -EPERM; 378 379 /* Find the PHB PE */ 380 phb_pe = eeh_phb_pe_get(pe->phb); 381 if (!phb_pe) { 382 pr_warn("%s Can't find PE for PHB#%d\n", 383 __func__, pe->phb->global_number); 384 return -EEXIST; 385 } 386 387 /* If the PHB has been in problematic state */ 388 eeh_serialize_lock(&flags); 389 if (phb_pe->state & EEH_PE_ISOLATED) { 390 ret = 0; 391 goto out; 392 } 393 394 /* Check PHB state */ 395 ret = eeh_ops->get_state(phb_pe, NULL); 396 if ((ret < 0) || 397 (ret == EEH_STATE_NOT_SUPPORT) || 398 (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == 399 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { 400 ret = 0; 401 goto out; 402 } 403 404 /* Isolate the PHB and send event */ 405 eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); 406 eeh_serialize_unlock(flags); 407 408 pr_err("EEH: PHB#%x failure detected, location: %s\n", 409 phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); 410 dump_stack(); 411 eeh_send_failure_event(phb_pe); 412 413 return 1; 414out: 415 eeh_serialize_unlock(flags); 416 return ret; 417} 418 419/** 420 * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze 421 * @edev: eeh device 422 * 423 * Check for an EEH failure for the given device node. Call this 424 * routine if the result of a read was all 0xff's and you want to 425 * find out if this is due to an EEH slot freeze. This routine 426 * will query firmware for the EEH status. 427 * 428 * Returns 0 if there has not been an EEH error; otherwise returns 429 * a non-zero value and queues up a slot isolation event notification. 430 * 431 * It is safe to call this routine in an interrupt context. 432 */ 433int eeh_dev_check_failure(struct eeh_dev *edev) 434{ 435 int ret; 436 int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 437 unsigned long flags; 438 struct pci_dn *pdn; 439 struct pci_dev *dev; 440 struct eeh_pe *pe, *parent_pe, *phb_pe; 441 int rc = 0; 442 const char *location = NULL; 443 444 eeh_stats.total_mmio_ffs++; 445 446 if (!eeh_enabled()) 447 return 0; 448 449 if (!edev) { 450 eeh_stats.no_dn++; 451 return 0; 452 } 453 dev = eeh_dev_to_pci_dev(edev); 454 pe = eeh_dev_to_pe(edev); 455 456 /* Access to IO BARs might get this far and still not want checking. */ 457 if (!pe) { 458 eeh_stats.ignored_check++; 459 pr_debug("EEH: Ignored check for %s\n", 460 eeh_pci_name(dev)); 461 return 0; 462 } 463 464 if (!pe->addr && !pe->config_addr) { 465 eeh_stats.no_cfg_addr++; 466 return 0; 467 } 468 469 /* 470 * On PowerNV platform, we might already have fenced PHB 471 * there and we need take care of that firstly. 472 */ 473 ret = eeh_phb_check_failure(pe); 474 if (ret > 0) 475 return ret; 476 477 /* 478 * If the PE isn't owned by us, we shouldn't check the 479 * state. Instead, let the owner handle it if the PE has 480 * been frozen. 481 */ 482 if (eeh_pe_passed(pe)) 483 return 0; 484 485 /* If we already have a pending isolation event for this 486 * slot, we know it's bad already, we don't need to check. 487 * Do this checking under a lock; as multiple PCI devices 488 * in one slot might report errors simultaneously, and we 489 * only want one error recovery routine running. 490 */ 491 eeh_serialize_lock(&flags); 492 rc = 1; 493 if (pe->state & EEH_PE_ISOLATED) { 494 pe->check_count++; 495 if (pe->check_count % EEH_MAX_FAILS == 0) { 496 pdn = eeh_dev_to_pdn(edev); 497 if (pdn->node) 498 location = of_get_property(pdn->node, "ibm,loc-code", NULL); 499 printk(KERN_ERR "EEH: %d reads ignored for recovering device at " 500 "location=%s driver=%s pci addr=%s\n", 501 pe->check_count, 502 location ? location : "unknown", 503 eeh_driver_name(dev), eeh_pci_name(dev)); 504 printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", 505 eeh_driver_name(dev)); 506 dump_stack(); 507 } 508 goto dn_unlock; 509 } 510 511 /* 512 * Now test for an EEH failure. This is VERY expensive. 513 * Note that the eeh_config_addr may be a parent device 514 * in the case of a device behind a bridge, or it may be 515 * function zero of a multi-function device. 516 * In any case they must share a common PHB. 517 */ 518 ret = eeh_ops->get_state(pe, NULL); 519 520 /* Note that config-io to empty slots may fail; 521 * they are empty when they don't have children. 522 * We will punt with the following conditions: Failure to get 523 * PE's state, EEH not support and Permanently unavailable 524 * state, PE is in good state. 525 */ 526 if ((ret < 0) || 527 (ret == EEH_STATE_NOT_SUPPORT) || 528 ((ret & active_flags) == active_flags)) { 529 eeh_stats.false_positives++; 530 pe->false_positives++; 531 rc = 0; 532 goto dn_unlock; 533 } 534 535 /* 536 * It should be corner case that the parent PE has been 537 * put into frozen state as well. We should take care 538 * that at first. 539 */ 540 parent_pe = pe->parent; 541 while (parent_pe) { 542 /* Hit the ceiling ? */ 543 if (parent_pe->type & EEH_PE_PHB) 544 break; 545 546 /* Frozen parent PE ? */ 547 ret = eeh_ops->get_state(parent_pe, NULL); 548 if (ret > 0 && 549 (ret & active_flags) != active_flags) 550 pe = parent_pe; 551 552 /* Next parent level */ 553 parent_pe = parent_pe->parent; 554 } 555 556 eeh_stats.slot_resets++; 557 558 /* Avoid repeated reports of this failure, including problems 559 * with other functions on this device, and functions under 560 * bridges. 561 */ 562 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 563 eeh_serialize_unlock(flags); 564 565 /* Most EEH events are due to device driver bugs. Having 566 * a stack trace will help the device-driver authors figure 567 * out what happened. So print that out. 568 */ 569 phb_pe = eeh_phb_pe_get(pe->phb); 570 pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", 571 pe->phb->global_number, pe->addr); 572 pr_err("EEH: PE location: %s, PHB location: %s\n", 573 eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); 574 dump_stack(); 575 576 eeh_send_failure_event(pe); 577 578 return 1; 579 580dn_unlock: 581 eeh_serialize_unlock(flags); 582 return rc; 583} 584 585EXPORT_SYMBOL_GPL(eeh_dev_check_failure); 586 587/** 588 * eeh_check_failure - Check if all 1's data is due to EEH slot freeze 589 * @token: I/O address 590 * 591 * Check for an EEH failure at the given I/O address. Call this 592 * routine if the result of a read was all 0xff's and you want to 593 * find out if this is due to an EEH slot freeze event. This routine 594 * will query firmware for the EEH status. 595 * 596 * Note this routine is safe to call in an interrupt context. 597 */ 598int eeh_check_failure(const volatile void __iomem *token) 599{ 600 unsigned long addr; 601 struct eeh_dev *edev; 602 603 /* Finding the phys addr + pci device; this is pretty quick. */ 604 addr = eeh_token_to_phys((unsigned long __force) token); 605 edev = eeh_addr_cache_get_dev(addr); 606 if (!edev) { 607 eeh_stats.no_device++; 608 return 0; 609 } 610 611 return eeh_dev_check_failure(edev); 612} 613EXPORT_SYMBOL(eeh_check_failure); 614 615 616/** 617 * eeh_pci_enable - Enable MMIO or DMA transfers for this slot 618 * @pe: EEH PE 619 * 620 * This routine should be called to reenable frozen MMIO or DMA 621 * so that it would work correctly again. It's useful while doing 622 * recovery or log collection on the indicated device. 623 */ 624int eeh_pci_enable(struct eeh_pe *pe, int function) 625{ 626 int active_flag, rc; 627 628 /* 629 * pHyp doesn't allow to enable IO or DMA on unfrozen PE. 630 * Also, it's pointless to enable them on unfrozen PE. So 631 * we have to check before enabling IO or DMA. 632 */ 633 switch (function) { 634 case EEH_OPT_THAW_MMIO: 635 active_flag = EEH_STATE_MMIO_ACTIVE; 636 break; 637 case EEH_OPT_THAW_DMA: 638 active_flag = EEH_STATE_DMA_ACTIVE; 639 break; 640 case EEH_OPT_DISABLE: 641 case EEH_OPT_ENABLE: 642 case EEH_OPT_FREEZE_PE: 643 active_flag = 0; 644 break; 645 default: 646 pr_warn("%s: Invalid function %d\n", 647 __func__, function); 648 return -EINVAL; 649 } 650 651 /* 652 * Check if IO or DMA has been enabled before 653 * enabling them. 654 */ 655 if (active_flag) { 656 rc = eeh_ops->get_state(pe, NULL); 657 if (rc < 0) 658 return rc; 659 660 /* Needn't enable it at all */ 661 if (rc == EEH_STATE_NOT_SUPPORT) 662 return 0; 663 664 /* It's already enabled */ 665 if (rc & active_flag) 666 return 0; 667 } 668 669 670 /* Issue the request */ 671 rc = eeh_ops->set_option(pe, function); 672 if (rc) 673 pr_warn("%s: Unexpected state change %d on " 674 "PHB#%d-PE#%x, err=%d\n", 675 __func__, function, pe->phb->global_number, 676 pe->addr, rc); 677 678 /* Check if the request is finished successfully */ 679 if (active_flag) { 680 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 681 if (rc <= 0) 682 return rc; 683 684 if (rc & active_flag) 685 return 0; 686 687 return -EIO; 688 } 689 690 return rc; 691} 692 693static void *eeh_disable_and_save_dev_state(void *data, void *userdata) 694{ 695 struct eeh_dev *edev = data; 696 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 697 struct pci_dev *dev = userdata; 698 699 /* 700 * The caller should have disabled and saved the 701 * state for the specified device 702 */ 703 if (!pdev || pdev == dev) 704 return NULL; 705 706 /* Ensure we have D0 power state */ 707 pci_set_power_state(pdev, PCI_D0); 708 709 /* Save device state */ 710 pci_save_state(pdev); 711 712 /* 713 * Disable device to avoid any DMA traffic and 714 * interrupt from the device 715 */ 716 pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); 717 718 return NULL; 719} 720 721static void *eeh_restore_dev_state(void *data, void *userdata) 722{ 723 struct eeh_dev *edev = data; 724 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 725 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 726 struct pci_dev *dev = userdata; 727 728 if (!pdev) 729 return NULL; 730 731 /* Apply customization from firmware */ 732 if (pdn && eeh_ops->restore_config) 733 eeh_ops->restore_config(pdn); 734 735 /* The caller should restore state for the specified device */ 736 if (pdev != dev) 737 pci_save_state(pdev); 738 739 return NULL; 740} 741 742/** 743 * pcibios_set_pcie_slot_reset - Set PCI-E reset state 744 * @dev: pci device struct 745 * @state: reset state to enter 746 * 747 * Return value: 748 * 0 if success 749 */ 750int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) 751{ 752 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); 753 struct eeh_pe *pe = eeh_dev_to_pe(edev); 754 755 if (!pe) { 756 pr_err("%s: No PE found on PCI device %s\n", 757 __func__, pci_name(dev)); 758 return -EINVAL; 759 } 760 761 switch (state) { 762 case pcie_deassert_reset: 763 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 764 eeh_unfreeze_pe(pe, false); 765 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 766 eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev); 767 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 768 break; 769 case pcie_hot_reset: 770 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 771 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 772 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 773 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 774 eeh_ops->reset(pe, EEH_RESET_HOT); 775 break; 776 case pcie_warm_reset: 777 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 778 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 779 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 780 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 781 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 782 break; 783 default: 784 eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED); 785 return -EINVAL; 786 }; 787 788 return 0; 789} 790 791/** 792 * eeh_set_pe_freset - Check the required reset for the indicated device 793 * @data: EEH device 794 * @flag: return value 795 * 796 * Each device might have its preferred reset type: fundamental or 797 * hot reset. The routine is used to collected the information for 798 * the indicated device and its children so that the bunch of the 799 * devices could be reset properly. 800 */ 801static void *eeh_set_dev_freset(void *data, void *flag) 802{ 803 struct pci_dev *dev; 804 unsigned int *freset = (unsigned int *)flag; 805 struct eeh_dev *edev = (struct eeh_dev *)data; 806 807 dev = eeh_dev_to_pci_dev(edev); 808 if (dev) 809 *freset |= dev->needs_freset; 810 811 return NULL; 812} 813 814/** 815 * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second 816 * @pe: EEH PE 817 * 818 * Assert the PCI #RST line for 1/4 second. 819 */ 820static void eeh_reset_pe_once(struct eeh_pe *pe) 821{ 822 unsigned int freset = 0; 823 824 /* Determine type of EEH reset required for 825 * Partitionable Endpoint, a hot-reset (1) 826 * or a fundamental reset (3). 827 * A fundamental reset required by any device under 828 * Partitionable Endpoint trumps hot-reset. 829 */ 830 eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); 831 832 if (freset) 833 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 834 else 835 eeh_ops->reset(pe, EEH_RESET_HOT); 836 837 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 838} 839 840/** 841 * eeh_reset_pe - Reset the indicated PE 842 * @pe: EEH PE 843 * 844 * This routine should be called to reset indicated device, including 845 * PE. A PE might include multiple PCI devices and sometimes PCI bridges 846 * might be involved as well. 847 */ 848int eeh_reset_pe(struct eeh_pe *pe) 849{ 850 int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 851 int i, state, ret; 852 853 /* Mark as reset and block config space */ 854 eeh_pe_state_mark(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); 855 856 /* Take three shots at resetting the bus */ 857 for (i = 0; i < 3; i++) { 858 eeh_reset_pe_once(pe); 859 860 /* 861 * EEH_PE_ISOLATED is expected to be removed after 862 * BAR restore. 863 */ 864 state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 865 if ((state & flags) == flags) { 866 ret = 0; 867 goto out; 868 } 869 870 if (state < 0) { 871 pr_warn("%s: Unrecoverable slot failure on PHB#%d-PE#%x", 872 __func__, pe->phb->global_number, pe->addr); 873 ret = -ENOTRECOVERABLE; 874 goto out; 875 } 876 877 /* We might run out of credits */ 878 ret = -EIO; 879 pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n", 880 __func__, state, pe->phb->global_number, pe->addr, (i + 1)); 881 } 882 883out: 884 eeh_pe_state_clear(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); 885 return ret; 886} 887 888/** 889 * eeh_save_bars - Save device bars 890 * @edev: PCI device associated EEH device 891 * 892 * Save the values of the device bars. Unlike the restore 893 * routine, this routine is *not* recursive. This is because 894 * PCI devices are added individually; but, for the restore, 895 * an entire slot is reset at a time. 896 */ 897void eeh_save_bars(struct eeh_dev *edev) 898{ 899 struct pci_dn *pdn; 900 int i; 901 902 pdn = eeh_dev_to_pdn(edev); 903 if (!pdn) 904 return; 905 906 for (i = 0; i < 16; i++) 907 eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]); 908 909 /* 910 * For PCI bridges including root port, we need enable bus 911 * master explicitly. Otherwise, it can't fetch IODA table 912 * entries correctly. So we cache the bit in advance so that 913 * we can restore it after reset, either PHB range or PE range. 914 */ 915 if (edev->mode & EEH_DEV_BRIDGE) 916 edev->config_space[1] |= PCI_COMMAND_MASTER; 917} 918 919/** 920 * eeh_ops_register - Register platform dependent EEH operations 921 * @ops: platform dependent EEH operations 922 * 923 * Register the platform dependent EEH operation callback 924 * functions. The platform should call this function before 925 * any other EEH operations. 926 */ 927int __init eeh_ops_register(struct eeh_ops *ops) 928{ 929 if (!ops->name) { 930 pr_warn("%s: Invalid EEH ops name for %p\n", 931 __func__, ops); 932 return -EINVAL; 933 } 934 935 if (eeh_ops && eeh_ops != ops) { 936 pr_warn("%s: EEH ops of platform %s already existing (%s)\n", 937 __func__, eeh_ops->name, ops->name); 938 return -EEXIST; 939 } 940 941 eeh_ops = ops; 942 943 return 0; 944} 945 946/** 947 * eeh_ops_unregister - Unreigster platform dependent EEH operations 948 * @name: name of EEH platform operations 949 * 950 * Unregister the platform dependent EEH operation callback 951 * functions. 952 */ 953int __exit eeh_ops_unregister(const char *name) 954{ 955 if (!name || !strlen(name)) { 956 pr_warn("%s: Invalid EEH ops name\n", 957 __func__); 958 return -EINVAL; 959 } 960 961 if (eeh_ops && !strcmp(eeh_ops->name, name)) { 962 eeh_ops = NULL; 963 return 0; 964 } 965 966 return -EEXIST; 967} 968 969static int eeh_reboot_notifier(struct notifier_block *nb, 970 unsigned long action, void *unused) 971{ 972 eeh_clear_flag(EEH_ENABLED); 973 return NOTIFY_DONE; 974} 975 976static struct notifier_block eeh_reboot_nb = { 977 .notifier_call = eeh_reboot_notifier, 978}; 979 980/** 981 * eeh_init - EEH initialization 982 * 983 * Initialize EEH by trying to enable it for all of the adapters in the system. 984 * As a side effect we can determine here if eeh is supported at all. 985 * Note that we leave EEH on so failed config cycles won't cause a machine 986 * check. If a user turns off EEH for a particular adapter they are really 987 * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't 988 * grant access to a slot if EEH isn't enabled, and so we always enable 989 * EEH for all slots/all devices. 990 * 991 * The eeh-force-off option disables EEH checking globally, for all slots. 992 * Even if force-off is set, the EEH hardware is still enabled, so that 993 * newer systems can boot. 994 */ 995int eeh_init(void) 996{ 997 struct pci_controller *hose, *tmp; 998 struct pci_dn *pdn; 999 static int cnt = 0; 1000 int ret = 0; 1001 1002 /* 1003 * We have to delay the initialization on PowerNV after 1004 * the PCI hierarchy tree has been built because the PEs 1005 * are figured out based on PCI devices instead of device 1006 * tree nodes 1007 */ 1008 if (machine_is(powernv) && cnt++ <= 0) 1009 return ret; 1010 1011 /* Register reboot notifier */ 1012 ret = register_reboot_notifier(&eeh_reboot_nb); 1013 if (ret) { 1014 pr_warn("%s: Failed to register notifier (%d)\n", 1015 __func__, ret); 1016 return ret; 1017 } 1018 1019 /* call platform initialization function */ 1020 if (!eeh_ops) { 1021 pr_warn("%s: Platform EEH operation not found\n", 1022 __func__); 1023 return -EEXIST; 1024 } else if ((ret = eeh_ops->init())) 1025 return ret; 1026 1027 /* Initialize EEH event */ 1028 ret = eeh_event_init(); 1029 if (ret) 1030 return ret; 1031 1032 /* Enable EEH for all adapters */ 1033 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1034 pdn = hose->pci_data; 1035 traverse_pci_dn(pdn, eeh_ops->probe, NULL); 1036 } 1037 1038 /* 1039 * Call platform post-initialization. Actually, It's good chance 1040 * to inform platform that EEH is ready to supply service if the 1041 * I/O cache stuff has been built up. 1042 */ 1043 if (eeh_ops->post_init) { 1044 ret = eeh_ops->post_init(); 1045 if (ret) 1046 return ret; 1047 } 1048 1049 if (eeh_enabled()) 1050 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); 1051 else 1052 pr_warn("EEH: No capable adapters found\n"); 1053 1054 return ret; 1055} 1056 1057core_initcall_sync(eeh_init); 1058 1059/** 1060 * eeh_add_device_early - Enable EEH for the indicated device node 1061 * @pdn: PCI device node for which to set up EEH 1062 * 1063 * This routine must be used to perform EEH initialization for PCI 1064 * devices that were added after system boot (e.g. hotplug, dlpar). 1065 * This routine must be called before any i/o is performed to the 1066 * adapter (inluding any config-space i/o). 1067 * Whether this actually enables EEH or not for this device depends 1068 * on the CEC architecture, type of the device, on earlier boot 1069 * command-line arguments & etc. 1070 */ 1071void eeh_add_device_early(struct pci_dn *pdn) 1072{ 1073 struct pci_controller *phb; 1074 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 1075 1076 if (!edev) 1077 return; 1078 1079 if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) 1080 return; 1081 1082 /* USB Bus children of PCI devices will not have BUID's */ 1083 phb = edev->phb; 1084 if (NULL == phb || 1085 (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid)) 1086 return; 1087 1088 eeh_ops->probe(pdn, NULL); 1089} 1090 1091/** 1092 * eeh_add_device_tree_early - Enable EEH for the indicated device 1093 * @pdn: PCI device node 1094 * 1095 * This routine must be used to perform EEH initialization for the 1096 * indicated PCI device that was added after system boot (e.g. 1097 * hotplug, dlpar). 1098 */ 1099void eeh_add_device_tree_early(struct pci_dn *pdn) 1100{ 1101 struct pci_dn *n; 1102 1103 if (!pdn) 1104 return; 1105 1106 list_for_each_entry(n, &pdn->child_list, list) 1107 eeh_add_device_tree_early(n); 1108 eeh_add_device_early(pdn); 1109} 1110EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); 1111 1112/** 1113 * eeh_add_device_late - Perform EEH initialization for the indicated pci device 1114 * @dev: pci device for which to set up EEH 1115 * 1116 * This routine must be used to complete EEH initialization for PCI 1117 * devices that were added after system boot (e.g. hotplug, dlpar). 1118 */ 1119void eeh_add_device_late(struct pci_dev *dev) 1120{ 1121 struct pci_dn *pdn; 1122 struct eeh_dev *edev; 1123 1124 if (!dev || !eeh_enabled()) 1125 return; 1126 1127 pr_debug("EEH: Adding device %s\n", pci_name(dev)); 1128 1129 pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn); 1130 edev = pdn_to_eeh_dev(pdn); 1131 if (edev->pdev == dev) { 1132 pr_debug("EEH: Already referenced !\n"); 1133 return; 1134 } 1135 1136 /* 1137 * The EEH cache might not be removed correctly because of 1138 * unbalanced kref to the device during unplug time, which 1139 * relies on pcibios_release_device(). So we have to remove 1140 * that here explicitly. 1141 */ 1142 if (edev->pdev) { 1143 eeh_rmv_from_parent_pe(edev); 1144 eeh_addr_cache_rmv_dev(edev->pdev); 1145 eeh_sysfs_remove_device(edev->pdev); 1146 edev->mode &= ~EEH_DEV_SYSFS; 1147 1148 /* 1149 * We definitely should have the PCI device removed 1150 * though it wasn't correctly. So we needn't call 1151 * into error handler afterwards. 1152 */ 1153 edev->mode |= EEH_DEV_NO_HANDLER; 1154 1155 edev->pdev = NULL; 1156 dev->dev.archdata.edev = NULL; 1157 } 1158 1159 if (eeh_has_flag(EEH_PROBE_MODE_DEV)) 1160 eeh_ops->probe(pdn, NULL); 1161 1162 edev->pdev = dev; 1163 dev->dev.archdata.edev = edev; 1164 1165 eeh_addr_cache_insert_dev(dev); 1166} 1167 1168/** 1169 * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus 1170 * @bus: PCI bus 1171 * 1172 * This routine must be used to perform EEH initialization for PCI 1173 * devices which are attached to the indicated PCI bus. The PCI bus 1174 * is added after system boot through hotplug or dlpar. 1175 */ 1176void eeh_add_device_tree_late(struct pci_bus *bus) 1177{ 1178 struct pci_dev *dev; 1179 1180 list_for_each_entry(dev, &bus->devices, bus_list) { 1181 eeh_add_device_late(dev); 1182 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1183 struct pci_bus *subbus = dev->subordinate; 1184 if (subbus) 1185 eeh_add_device_tree_late(subbus); 1186 } 1187 } 1188} 1189EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); 1190 1191/** 1192 * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus 1193 * @bus: PCI bus 1194 * 1195 * This routine must be used to add EEH sysfs files for PCI 1196 * devices which are attached to the indicated PCI bus. The PCI bus 1197 * is added after system boot through hotplug or dlpar. 1198 */ 1199void eeh_add_sysfs_files(struct pci_bus *bus) 1200{ 1201 struct pci_dev *dev; 1202 1203 list_for_each_entry(dev, &bus->devices, bus_list) { 1204 eeh_sysfs_add_device(dev); 1205 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1206 struct pci_bus *subbus = dev->subordinate; 1207 if (subbus) 1208 eeh_add_sysfs_files(subbus); 1209 } 1210 } 1211} 1212EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); 1213 1214/** 1215 * eeh_remove_device - Undo EEH setup for the indicated pci device 1216 * @dev: pci device to be removed 1217 * 1218 * This routine should be called when a device is removed from 1219 * a running system (e.g. by hotplug or dlpar). It unregisters 1220 * the PCI device from the EEH subsystem. I/O errors affecting 1221 * this device will no longer be detected after this call; thus, 1222 * i/o errors affecting this slot may leave this device unusable. 1223 */ 1224void eeh_remove_device(struct pci_dev *dev) 1225{ 1226 struct eeh_dev *edev; 1227 1228 if (!dev || !eeh_enabled()) 1229 return; 1230 edev = pci_dev_to_eeh_dev(dev); 1231 1232 /* Unregister the device with the EEH/PCI address search system */ 1233 pr_debug("EEH: Removing device %s\n", pci_name(dev)); 1234 1235 if (!edev || !edev->pdev || !edev->pe) { 1236 pr_debug("EEH: Not referenced !\n"); 1237 return; 1238 } 1239 1240 /* 1241 * During the hotplug for EEH error recovery, we need the EEH 1242 * device attached to the parent PE in order for BAR restore 1243 * a bit later. So we keep it for BAR restore and remove it 1244 * from the parent PE during the BAR resotre. 1245 */ 1246 edev->pdev = NULL; 1247 dev->dev.archdata.edev = NULL; 1248 if (!(edev->pe->state & EEH_PE_KEEP)) 1249 eeh_rmv_from_parent_pe(edev); 1250 else 1251 edev->mode |= EEH_DEV_DISCONNECTED; 1252 1253 /* 1254 * We're removing from the PCI subsystem, that means 1255 * the PCI device driver can't support EEH or not 1256 * well. So we rely on hotplug completely to do recovery 1257 * for the specific PCI device. 1258 */ 1259 edev->mode |= EEH_DEV_NO_HANDLER; 1260 1261 eeh_addr_cache_rmv_dev(dev); 1262 eeh_sysfs_remove_device(dev); 1263 edev->mode &= ~EEH_DEV_SYSFS; 1264} 1265 1266int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state) 1267{ 1268 int ret; 1269 1270 ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 1271 if (ret) { 1272 pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n", 1273 __func__, ret, pe->phb->global_number, pe->addr); 1274 return ret; 1275 } 1276 1277 ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); 1278 if (ret) { 1279 pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n", 1280 __func__, ret, pe->phb->global_number, pe->addr); 1281 return ret; 1282 } 1283 1284 /* Clear software isolated state */ 1285 if (sw_state && (pe->state & EEH_PE_ISOLATED)) 1286 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 1287 1288 return ret; 1289} 1290 1291 1292static struct pci_device_id eeh_reset_ids[] = { 1293 { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */ 1294 { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */ 1295 { PCI_DEVICE(0x14e4, 0x1657) }, /* Broadcom BCM5719 */ 1296 { 0 } 1297}; 1298 1299static int eeh_pe_change_owner(struct eeh_pe *pe) 1300{ 1301 struct eeh_dev *edev, *tmp; 1302 struct pci_dev *pdev; 1303 struct pci_device_id *id; 1304 int flags, ret; 1305 1306 /* Check PE state */ 1307 flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 1308 ret = eeh_ops->get_state(pe, NULL); 1309 if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT) 1310 return 0; 1311 1312 /* Unfrozen PE, nothing to do */ 1313 if ((ret & flags) == flags) 1314 return 0; 1315 1316 /* Frozen PE, check if it needs PE level reset */ 1317 eeh_pe_for_each_dev(pe, edev, tmp) { 1318 pdev = eeh_dev_to_pci_dev(edev); 1319 if (!pdev) 1320 continue; 1321 1322 for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) { 1323 if (id->vendor != PCI_ANY_ID && 1324 id->vendor != pdev->vendor) 1325 continue; 1326 if (id->device != PCI_ANY_ID && 1327 id->device != pdev->device) 1328 continue; 1329 if (id->subvendor != PCI_ANY_ID && 1330 id->subvendor != pdev->subsystem_vendor) 1331 continue; 1332 if (id->subdevice != PCI_ANY_ID && 1333 id->subdevice != pdev->subsystem_device) 1334 continue; 1335 1336 goto reset; 1337 } 1338 } 1339 1340 return eeh_unfreeze_pe(pe, true); 1341 1342reset: 1343 return eeh_pe_reset_and_recover(pe); 1344} 1345 1346/** 1347 * eeh_dev_open - Increase count of pass through devices for PE 1348 * @pdev: PCI device 1349 * 1350 * Increase count of passed through devices for the indicated 1351 * PE. In the result, the EEH errors detected on the PE won't be 1352 * reported. The PE owner will be responsible for detection 1353 * and recovery. 1354 */ 1355int eeh_dev_open(struct pci_dev *pdev) 1356{ 1357 struct eeh_dev *edev; 1358 int ret = -ENODEV; 1359 1360 mutex_lock(&eeh_dev_mutex); 1361 1362 /* No PCI device ? */ 1363 if (!pdev) 1364 goto out; 1365 1366 /* No EEH device or PE ? */ 1367 edev = pci_dev_to_eeh_dev(pdev); 1368 if (!edev || !edev->pe) 1369 goto out; 1370 1371 /* 1372 * The PE might have been put into frozen state, but we 1373 * didn't detect that yet. The passed through PCI devices 1374 * in frozen PE won't work properly. Clear the frozen state 1375 * in advance. 1376 */ 1377 ret = eeh_pe_change_owner(edev->pe); 1378 if (ret) 1379 goto out; 1380 1381 /* Increase PE's pass through count */ 1382 atomic_inc(&edev->pe->pass_dev_cnt); 1383 mutex_unlock(&eeh_dev_mutex); 1384 1385 return 0; 1386out: 1387 mutex_unlock(&eeh_dev_mutex); 1388 return ret; 1389} 1390EXPORT_SYMBOL_GPL(eeh_dev_open); 1391 1392/** 1393 * eeh_dev_release - Decrease count of pass through devices for PE 1394 * @pdev: PCI device 1395 * 1396 * Decrease count of pass through devices for the indicated PE. If 1397 * there is no passed through device in PE, the EEH errors detected 1398 * on the PE will be reported and handled as usual. 1399 */ 1400void eeh_dev_release(struct pci_dev *pdev) 1401{ 1402 struct eeh_dev *edev; 1403 1404 mutex_lock(&eeh_dev_mutex); 1405 1406 /* No PCI device ? */ 1407 if (!pdev) 1408 goto out; 1409 1410 /* No EEH device ? */ 1411 edev = pci_dev_to_eeh_dev(pdev); 1412 if (!edev || !edev->pe || !eeh_pe_passed(edev->pe)) 1413 goto out; 1414 1415 /* Decrease PE's pass through count */ 1416 atomic_dec(&edev->pe->pass_dev_cnt); 1417 WARN_ON(atomic_read(&edev->pe->pass_dev_cnt) < 0); 1418 eeh_pe_change_owner(edev->pe); 1419out: 1420 mutex_unlock(&eeh_dev_mutex); 1421} 1422EXPORT_SYMBOL(eeh_dev_release); 1423 1424#ifdef CONFIG_IOMMU_API 1425 1426static int dev_has_iommu_table(struct device *dev, void *data) 1427{ 1428 struct pci_dev *pdev = to_pci_dev(dev); 1429 struct pci_dev **ppdev = data; 1430 struct iommu_table *tbl; 1431 1432 if (!dev) 1433 return 0; 1434 1435 tbl = get_iommu_table_base(dev); 1436 if (tbl && tbl->it_group) { 1437 *ppdev = pdev; 1438 return 1; 1439 } 1440 1441 return 0; 1442} 1443 1444/** 1445 * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE 1446 * @group: IOMMU group 1447 * 1448 * The routine is called to convert IOMMU group to EEH PE. 1449 */ 1450struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group) 1451{ 1452 struct pci_dev *pdev = NULL; 1453 struct eeh_dev *edev; 1454 int ret; 1455 1456 /* No IOMMU group ? */ 1457 if (!group) 1458 return NULL; 1459 1460 ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table); 1461 if (!ret || !pdev) 1462 return NULL; 1463 1464 /* No EEH device or PE ? */ 1465 edev = pci_dev_to_eeh_dev(pdev); 1466 if (!edev || !edev->pe) 1467 return NULL; 1468 1469 return edev->pe; 1470} 1471EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe); 1472 1473#endif /* CONFIG_IOMMU_API */ 1474 1475/** 1476 * eeh_pe_set_option - Set options for the indicated PE 1477 * @pe: EEH PE 1478 * @option: requested option 1479 * 1480 * The routine is called to enable or disable EEH functionality 1481 * on the indicated PE, to enable IO or DMA for the frozen PE. 1482 */ 1483int eeh_pe_set_option(struct eeh_pe *pe, int option) 1484{ 1485 int ret = 0; 1486 1487 /* Invalid PE ? */ 1488 if (!pe) 1489 return -ENODEV; 1490 1491 /* 1492 * EEH functionality could possibly be disabled, just 1493 * return error for the case. And the EEH functinality 1494 * isn't expected to be disabled on one specific PE. 1495 */ 1496 switch (option) { 1497 case EEH_OPT_ENABLE: 1498 if (eeh_enabled()) { 1499 ret = eeh_pe_change_owner(pe); 1500 break; 1501 } 1502 ret = -EIO; 1503 break; 1504 case EEH_OPT_DISABLE: 1505 break; 1506 case EEH_OPT_THAW_MMIO: 1507 case EEH_OPT_THAW_DMA: 1508 if (!eeh_ops || !eeh_ops->set_option) { 1509 ret = -ENOENT; 1510 break; 1511 } 1512 1513 ret = eeh_pci_enable(pe, option); 1514 break; 1515 default: 1516 pr_debug("%s: Option %d out of range (%d, %d)\n", 1517 __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA); 1518 ret = -EINVAL; 1519 } 1520 1521 return ret; 1522} 1523EXPORT_SYMBOL_GPL(eeh_pe_set_option); 1524 1525/** 1526 * eeh_pe_get_state - Retrieve PE's state 1527 * @pe: EEH PE 1528 * 1529 * Retrieve the PE's state, which includes 3 aspects: enabled 1530 * DMA, enabled IO and asserted reset. 1531 */ 1532int eeh_pe_get_state(struct eeh_pe *pe) 1533{ 1534 int result, ret = 0; 1535 bool rst_active, dma_en, mmio_en; 1536 1537 /* Existing PE ? */ 1538 if (!pe) 1539 return -ENODEV; 1540 1541 if (!eeh_ops || !eeh_ops->get_state) 1542 return -ENOENT; 1543 1544 result = eeh_ops->get_state(pe, NULL); 1545 rst_active = !!(result & EEH_STATE_RESET_ACTIVE); 1546 dma_en = !!(result & EEH_STATE_DMA_ENABLED); 1547 mmio_en = !!(result & EEH_STATE_MMIO_ENABLED); 1548 1549 if (rst_active) 1550 ret = EEH_PE_STATE_RESET; 1551 else if (dma_en && mmio_en) 1552 ret = EEH_PE_STATE_NORMAL; 1553 else if (!dma_en && !mmio_en) 1554 ret = EEH_PE_STATE_STOPPED_IO_DMA; 1555 else if (!dma_en && mmio_en) 1556 ret = EEH_PE_STATE_STOPPED_DMA; 1557 else 1558 ret = EEH_PE_STATE_UNAVAIL; 1559 1560 return ret; 1561} 1562EXPORT_SYMBOL_GPL(eeh_pe_get_state); 1563 1564static int eeh_pe_reenable_devices(struct eeh_pe *pe) 1565{ 1566 struct eeh_dev *edev, *tmp; 1567 struct pci_dev *pdev; 1568 int ret = 0; 1569 1570 /* Restore config space */ 1571 eeh_pe_restore_bars(pe); 1572 1573 /* 1574 * Reenable PCI devices as the devices passed 1575 * through are always enabled before the reset. 1576 */ 1577 eeh_pe_for_each_dev(pe, edev, tmp) { 1578 pdev = eeh_dev_to_pci_dev(edev); 1579 if (!pdev) 1580 continue; 1581 1582 ret = pci_reenable_device(pdev); 1583 if (ret) { 1584 pr_warn("%s: Failure %d reenabling %s\n", 1585 __func__, ret, pci_name(pdev)); 1586 return ret; 1587 } 1588 } 1589 1590 /* The PE is still in frozen state */ 1591 return eeh_unfreeze_pe(pe, true); 1592} 1593 1594/** 1595 * eeh_pe_reset - Issue PE reset according to specified type 1596 * @pe: EEH PE 1597 * @option: reset type 1598 * 1599 * The routine is called to reset the specified PE with the 1600 * indicated type, either fundamental reset or hot reset. 1601 * PE reset is the most important part for error recovery. 1602 */ 1603int eeh_pe_reset(struct eeh_pe *pe, int option) 1604{ 1605 int ret = 0; 1606 1607 /* Invalid PE ? */ 1608 if (!pe) 1609 return -ENODEV; 1610 1611 if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset) 1612 return -ENOENT; 1613 1614 switch (option) { 1615 case EEH_RESET_DEACTIVATE: 1616 ret = eeh_ops->reset(pe, option); 1617 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 1618 if (ret) 1619 break; 1620 1621 ret = eeh_pe_reenable_devices(pe); 1622 break; 1623 case EEH_RESET_HOT: 1624 case EEH_RESET_FUNDAMENTAL: 1625 /* 1626 * Proactively freeze the PE to drop all MMIO access 1627 * during reset, which should be banned as it's always 1628 * cause recursive EEH error. 1629 */ 1630 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 1631 1632 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 1633 ret = eeh_ops->reset(pe, option); 1634 break; 1635 default: 1636 pr_debug("%s: Unsupported option %d\n", 1637 __func__, option); 1638 ret = -EINVAL; 1639 } 1640 1641 return ret; 1642} 1643EXPORT_SYMBOL_GPL(eeh_pe_reset); 1644 1645/** 1646 * eeh_pe_configure - Configure PCI bridges after PE reset 1647 * @pe: EEH PE 1648 * 1649 * The routine is called to restore the PCI config space for 1650 * those PCI devices, especially PCI bridges affected by PE 1651 * reset issued previously. 1652 */ 1653int eeh_pe_configure(struct eeh_pe *pe) 1654{ 1655 int ret = 0; 1656 1657 /* Invalid PE ? */ 1658 if (!pe) 1659 return -ENODEV; 1660 1661 return ret; 1662} 1663EXPORT_SYMBOL_GPL(eeh_pe_configure); 1664 1665static int proc_eeh_show(struct seq_file *m, void *v) 1666{ 1667 if (!eeh_enabled()) { 1668 seq_printf(m, "EEH Subsystem is globally disabled\n"); 1669 seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs); 1670 } else { 1671 seq_printf(m, "EEH Subsystem is enabled\n"); 1672 seq_printf(m, 1673 "no device=%llu\n" 1674 "no device node=%llu\n" 1675 "no config address=%llu\n" 1676 "check not wanted=%llu\n" 1677 "eeh_total_mmio_ffs=%llu\n" 1678 "eeh_false_positives=%llu\n" 1679 "eeh_slot_resets=%llu\n", 1680 eeh_stats.no_device, 1681 eeh_stats.no_dn, 1682 eeh_stats.no_cfg_addr, 1683 eeh_stats.ignored_check, 1684 eeh_stats.total_mmio_ffs, 1685 eeh_stats.false_positives, 1686 eeh_stats.slot_resets); 1687 } 1688 1689 return 0; 1690} 1691 1692static int proc_eeh_open(struct inode *inode, struct file *file) 1693{ 1694 return single_open(file, proc_eeh_show, NULL); 1695} 1696 1697static const struct file_operations proc_eeh_operations = { 1698 .open = proc_eeh_open, 1699 .read = seq_read, 1700 .llseek = seq_lseek, 1701 .release = single_release, 1702}; 1703 1704#ifdef CONFIG_DEBUG_FS 1705static int eeh_enable_dbgfs_set(void *data, u64 val) 1706{ 1707 if (val) 1708 eeh_clear_flag(EEH_FORCE_DISABLED); 1709 else 1710 eeh_add_flag(EEH_FORCE_DISABLED); 1711 1712 /* Notify the backend */ 1713 if (eeh_ops->post_init) 1714 eeh_ops->post_init(); 1715 1716 return 0; 1717} 1718 1719static int eeh_enable_dbgfs_get(void *data, u64 *val) 1720{ 1721 if (eeh_enabled()) 1722 *val = 0x1ul; 1723 else 1724 *val = 0x0ul; 1725 return 0; 1726} 1727 1728static int eeh_freeze_dbgfs_set(void *data, u64 val) 1729{ 1730 eeh_max_freezes = val; 1731 return 0; 1732} 1733 1734static int eeh_freeze_dbgfs_get(void *data, u64 *val) 1735{ 1736 *val = eeh_max_freezes; 1737 return 0; 1738} 1739 1740DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, 1741 eeh_enable_dbgfs_set, "0x%llx\n"); 1742DEFINE_SIMPLE_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get, 1743 eeh_freeze_dbgfs_set, "0x%llx\n"); 1744#endif 1745 1746static int __init eeh_init_proc(void) 1747{ 1748 if (machine_is(pseries) || machine_is(powernv)) { 1749 proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations); 1750#ifdef CONFIG_DEBUG_FS 1751 debugfs_create_file("eeh_enable", 0600, 1752 powerpc_debugfs_root, NULL, 1753 &eeh_enable_dbgfs_ops); 1754 debugfs_create_file("eeh_max_freezes", 0600, 1755 powerpc_debugfs_root, NULL, 1756 &eeh_freeze_dbgfs_ops); 1757#endif 1758 } 1759 1760 return 0; 1761} 1762__initcall(eeh_init_proc); 1763