1/* 2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34#include <linux/sched.h> 35#include <linux/spinlock.h> 36#include <linux/idr.h> 37#include <linux/pci.h> 38#include <linux/io.h> 39#include <linux/delay.h> 40#include <linux/netdevice.h> 41#include <linux/vmalloc.h> 42#include <linux/bitmap.h> 43#include <linux/slab.h> 44#include <linux/module.h> 45 46#include "ipath_kernel.h" 47#include "ipath_verbs.h" 48 49static void ipath_update_pio_bufs(struct ipath_devdata *); 50 51const char *ipath_get_unit_name(int unit) 52{ 53 static char iname[16]; 54 snprintf(iname, sizeof iname, "infinipath%u", unit); 55 return iname; 56} 57 58#define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: " 59#define PFX IPATH_DRV_NAME ": " 60 61/* 62 * The size has to be longer than this string, so we can append 63 * board/chip information to it in the init code. 64 */ 65const char ib_ipath_version[] = IPATH_IDSTR "\n"; 66 67static struct idr unit_table; 68DEFINE_SPINLOCK(ipath_devs_lock); 69LIST_HEAD(ipath_dev_list); 70 71wait_queue_head_t ipath_state_wait; 72 73unsigned ipath_debug = __IPATH_INFO; 74 75module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO); 76MODULE_PARM_DESC(debug, "mask for debug prints"); 77EXPORT_SYMBOL_GPL(ipath_debug); 78 79unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */ 80module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO); 81MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported"); 82 83static unsigned ipath_hol_timeout_ms = 13000; 84module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO); 85MODULE_PARM_DESC(hol_timeout_ms, 86 "duration of user app suspension after link failure"); 87 88unsigned ipath_linkrecovery = 1; 89module_param_named(linkrecovery, ipath_linkrecovery, uint, S_IWUSR | S_IRUGO); 90MODULE_PARM_DESC(linkrecovery, "enable workaround for link recovery issue"); 91 92MODULE_LICENSE("GPL"); 93MODULE_AUTHOR("QLogic <support@qlogic.com>"); 94MODULE_DESCRIPTION("QLogic InfiniPath driver"); 95 96/* 97 * Table to translate the LINKTRAININGSTATE portion of 98 * IBCStatus to a human-readable form. 99 */ 100const char *ipath_ibcstatus_str[] = { 101 "Disabled", 102 "LinkUp", 103 "PollActive", 104 "PollQuiet", 105 "SleepDelay", 106 "SleepQuiet", 107 "LState6", /* unused */ 108 "LState7", /* unused */ 109 "CfgDebounce", 110 "CfgRcvfCfg", 111 "CfgWaitRmt", 112 "CfgIdle", 113 "RecovRetrain", 114 "CfgTxRevLane", /* unused before IBA7220 */ 115 "RecovWaitRmt", 116 "RecovIdle", 117 /* below were added for IBA7220 */ 118 "CfgEnhanced", 119 "CfgTest", 120 "CfgWaitRmtTest", 121 "CfgWaitCfgEnhanced", 122 "SendTS_T", 123 "SendTstIdles", 124 "RcvTS_T", 125 "SendTst_TS1s", 126 "LTState18", "LTState19", "LTState1A", "LTState1B", 127 "LTState1C", "LTState1D", "LTState1E", "LTState1F" 128}; 129 130static void ipath_remove_one(struct pci_dev *); 131static int ipath_init_one(struct pci_dev *, const struct pci_device_id *); 132 133/* Only needed for registration, nothing else needs this info */ 134#define PCI_VENDOR_ID_PATHSCALE 0x1fc1 135#define PCI_DEVICE_ID_INFINIPATH_HT 0xd 136 137/* Number of seconds before our card status check... */ 138#define STATUS_TIMEOUT 60 139 140static const struct pci_device_id ipath_pci_tbl[] = { 141 { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) }, 142 { 0, } 143}; 144 145MODULE_DEVICE_TABLE(pci, ipath_pci_tbl); 146 147static struct pci_driver ipath_driver = { 148 .name = IPATH_DRV_NAME, 149 .probe = ipath_init_one, 150 .remove = ipath_remove_one, 151 .id_table = ipath_pci_tbl, 152 .driver = { 153 .groups = ipath_driver_attr_groups, 154 }, 155}; 156 157static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev, 158 u32 *bar0, u32 *bar1) 159{ 160 int ret; 161 162 ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, bar0); 163 if (ret) 164 ipath_dev_err(dd, "failed to read bar0 before enable: " 165 "error %d\n", -ret); 166 167 ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, bar1); 168 if (ret) 169 ipath_dev_err(dd, "failed to read bar1 before enable: " 170 "error %d\n", -ret); 171 172 ipath_dbg("Read bar0 %x bar1 %x\n", *bar0, *bar1); 173} 174 175static void ipath_free_devdata(struct pci_dev *pdev, 176 struct ipath_devdata *dd) 177{ 178 unsigned long flags; 179 180 pci_set_drvdata(pdev, NULL); 181 182 if (dd->ipath_unit != -1) { 183 spin_lock_irqsave(&ipath_devs_lock, flags); 184 idr_remove(&unit_table, dd->ipath_unit); 185 list_del(&dd->ipath_list); 186 spin_unlock_irqrestore(&ipath_devs_lock, flags); 187 } 188 vfree(dd); 189} 190 191static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev) 192{ 193 unsigned long flags; 194 struct ipath_devdata *dd; 195 int ret; 196 197 dd = vzalloc(sizeof(*dd)); 198 if (!dd) { 199 dd = ERR_PTR(-ENOMEM); 200 goto bail; 201 } 202 dd->ipath_unit = -1; 203 204 idr_preload(GFP_KERNEL); 205 spin_lock_irqsave(&ipath_devs_lock, flags); 206 207 ret = idr_alloc(&unit_table, dd, 0, 0, GFP_NOWAIT); 208 if (ret < 0) { 209 printk(KERN_ERR IPATH_DRV_NAME 210 ": Could not allocate unit ID: error %d\n", -ret); 211 ipath_free_devdata(pdev, dd); 212 dd = ERR_PTR(ret); 213 goto bail_unlock; 214 } 215 dd->ipath_unit = ret; 216 217 dd->pcidev = pdev; 218 pci_set_drvdata(pdev, dd); 219 220 list_add(&dd->ipath_list, &ipath_dev_list); 221 222bail_unlock: 223 spin_unlock_irqrestore(&ipath_devs_lock, flags); 224 idr_preload_end(); 225bail: 226 return dd; 227} 228 229static inline struct ipath_devdata *__ipath_lookup(int unit) 230{ 231 return idr_find(&unit_table, unit); 232} 233 234struct ipath_devdata *ipath_lookup(int unit) 235{ 236 struct ipath_devdata *dd; 237 unsigned long flags; 238 239 spin_lock_irqsave(&ipath_devs_lock, flags); 240 dd = __ipath_lookup(unit); 241 spin_unlock_irqrestore(&ipath_devs_lock, flags); 242 243 return dd; 244} 245 246int ipath_count_units(int *npresentp, int *nupp, int *maxportsp) 247{ 248 int nunits, npresent, nup; 249 struct ipath_devdata *dd; 250 unsigned long flags; 251 int maxports; 252 253 nunits = npresent = nup = maxports = 0; 254 255 spin_lock_irqsave(&ipath_devs_lock, flags); 256 257 list_for_each_entry(dd, &ipath_dev_list, ipath_list) { 258 nunits++; 259 if ((dd->ipath_flags & IPATH_PRESENT) && dd->ipath_kregbase) 260 npresent++; 261 if (dd->ipath_lid && 262 !(dd->ipath_flags & (IPATH_DISABLED | IPATH_LINKDOWN 263 | IPATH_LINKUNK))) 264 nup++; 265 if (dd->ipath_cfgports > maxports) 266 maxports = dd->ipath_cfgports; 267 } 268 269 spin_unlock_irqrestore(&ipath_devs_lock, flags); 270 271 if (npresentp) 272 *npresentp = npresent; 273 if (nupp) 274 *nupp = nup; 275 if (maxportsp) 276 *maxportsp = maxports; 277 278 return nunits; 279} 280 281/* 282 * These next two routines are placeholders in case we don't have per-arch 283 * code for controlling write combining. If explicit control of write 284 * combining is not available, performance will probably be awful. 285 */ 286 287int __attribute__((weak)) ipath_enable_wc(struct ipath_devdata *dd) 288{ 289 return -EOPNOTSUPP; 290} 291 292void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd) 293{ 294} 295 296/* 297 * Perform a PIO buffer bandwidth write test, to verify proper system 298 * configuration. Even when all the setup calls work, occasionally 299 * BIOS or other issues can prevent write combining from working, or 300 * can cause other bandwidth problems to the chip. 301 * 302 * This test simply writes the same buffer over and over again, and 303 * measures close to the peak bandwidth to the chip (not testing 304 * data bandwidth to the wire). On chips that use an address-based 305 * trigger to send packets to the wire, this is easy. On chips that 306 * use a count to trigger, we want to make sure that the packet doesn't 307 * go out on the wire, or trigger flow control checks. 308 */ 309static void ipath_verify_pioperf(struct ipath_devdata *dd) 310{ 311 u32 pbnum, cnt, lcnt; 312 u32 __iomem *piobuf; 313 u32 *addr; 314 u64 msecs, emsecs; 315 316 piobuf = ipath_getpiobuf(dd, 0, &pbnum); 317 if (!piobuf) { 318 dev_info(&dd->pcidev->dev, 319 "No PIObufs for checking perf, skipping\n"); 320 return; 321 } 322 323 /* 324 * Enough to give us a reasonable test, less than piobuf size, and 325 * likely multiple of store buffer length. 326 */ 327 cnt = 1024; 328 329 addr = vmalloc(cnt); 330 if (!addr) { 331 dev_info(&dd->pcidev->dev, 332 "Couldn't get memory for checking PIO perf," 333 " skipping\n"); 334 goto done; 335 } 336 337 preempt_disable(); /* we want reasonably accurate elapsed time */ 338 msecs = 1 + jiffies_to_msecs(jiffies); 339 for (lcnt = 0; lcnt < 10000U; lcnt++) { 340 /* wait until we cross msec boundary */ 341 if (jiffies_to_msecs(jiffies) >= msecs) 342 break; 343 udelay(1); 344 } 345 346 ipath_disable_armlaunch(dd); 347 348 /* 349 * length 0, no dwords actually sent, and mark as VL15 350 * on chips where that may matter (due to IB flowcontrol) 351 */ 352 if ((dd->ipath_flags & IPATH_HAS_PBC_CNT)) 353 writeq(1UL << 63, piobuf); 354 else 355 writeq(0, piobuf); 356 ipath_flush_wc(); 357 358 /* 359 * this is only roughly accurate, since even with preempt we 360 * still take interrupts that could take a while. Running for 361 * >= 5 msec seems to get us "close enough" to accurate values 362 */ 363 msecs = jiffies_to_msecs(jiffies); 364 for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) { 365 __iowrite32_copy(piobuf + 64, addr, cnt >> 2); 366 emsecs = jiffies_to_msecs(jiffies) - msecs; 367 } 368 369 /* 1 GiB/sec, slightly over IB SDR line rate */ 370 if (lcnt < (emsecs * 1024U)) 371 ipath_dev_err(dd, 372 "Performance problem: bandwidth to PIO buffers is " 373 "only %u MiB/sec\n", 374 lcnt / (u32) emsecs); 375 else 376 ipath_dbg("PIO buffer bandwidth %u MiB/sec is OK\n", 377 lcnt / (u32) emsecs); 378 379 preempt_enable(); 380 381 vfree(addr); 382 383done: 384 /* disarm piobuf, so it's available again */ 385 ipath_disarm_piobufs(dd, pbnum, 1); 386 ipath_enable_armlaunch(dd); 387} 388 389static void cleanup_device(struct ipath_devdata *dd); 390 391static int ipath_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) 392{ 393 int ret, len, j; 394 struct ipath_devdata *dd; 395 unsigned long long addr; 396 u32 bar0 = 0, bar1 = 0; 397 398 dd = ipath_alloc_devdata(pdev); 399 if (IS_ERR(dd)) { 400 ret = PTR_ERR(dd); 401 printk(KERN_ERR IPATH_DRV_NAME 402 ": Could not allocate devdata: error %d\n", -ret); 403 goto bail; 404 } 405 406 ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit); 407 408 ret = pci_enable_device(pdev); 409 if (ret) { 410 /* This can happen iff: 411 * 412 * We did a chip reset, and then failed to reprogram the 413 * BAR, or the chip reset due to an internal error. We then 414 * unloaded the driver and reloaded it. 415 * 416 * Both reset cases set the BAR back to initial state. For 417 * the latter case, the AER sticky error bit at offset 0x718 418 * should be set, but the Linux kernel doesn't yet know 419 * about that, it appears. If the original BAR was retained 420 * in the kernel data structures, this may be OK. 421 */ 422 ipath_dev_err(dd, "enable unit %d failed: error %d\n", 423 dd->ipath_unit, -ret); 424 goto bail_devdata; 425 } 426 addr = pci_resource_start(pdev, 0); 427 len = pci_resource_len(pdev, 0); 428 ipath_cdbg(VERBOSE, "regbase (0) %llx len %d irq %d, vend %x/%x " 429 "driver_data %lx\n", addr, len, pdev->irq, ent->vendor, 430 ent->device, ent->driver_data); 431 432 read_bars(dd, pdev, &bar0, &bar1); 433 434 if (!bar1 && !(bar0 & ~0xf)) { 435 if (addr) { 436 dev_info(&pdev->dev, "BAR is 0 (probable RESET), " 437 "rewriting as %llx\n", addr); 438 ret = pci_write_config_dword( 439 pdev, PCI_BASE_ADDRESS_0, addr); 440 if (ret) { 441 ipath_dev_err(dd, "rewrite of BAR0 " 442 "failed: err %d\n", -ret); 443 goto bail_disable; 444 } 445 ret = pci_write_config_dword( 446 pdev, PCI_BASE_ADDRESS_1, addr >> 32); 447 if (ret) { 448 ipath_dev_err(dd, "rewrite of BAR1 " 449 "failed: err %d\n", -ret); 450 goto bail_disable; 451 } 452 } else { 453 ipath_dev_err(dd, "BAR is 0 (probable RESET), " 454 "not usable until reboot\n"); 455 ret = -ENODEV; 456 goto bail_disable; 457 } 458 } 459 460 ret = pci_request_regions(pdev, IPATH_DRV_NAME); 461 if (ret) { 462 dev_info(&pdev->dev, "pci_request_regions unit %u fails: " 463 "err %d\n", dd->ipath_unit, -ret); 464 goto bail_disable; 465 } 466 467 ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); 468 if (ret) { 469 /* 470 * if the 64 bit setup fails, try 32 bit. Some systems 471 * do not setup 64 bit maps on systems with 2GB or less 472 * memory installed. 473 */ 474 ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 475 if (ret) { 476 dev_info(&pdev->dev, 477 "Unable to set DMA mask for unit %u: %d\n", 478 dd->ipath_unit, ret); 479 goto bail_regions; 480 } 481 else { 482 ipath_dbg("No 64bit DMA mask, used 32 bit mask\n"); 483 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); 484 if (ret) 485 dev_info(&pdev->dev, 486 "Unable to set DMA consistent mask " 487 "for unit %u: %d\n", 488 dd->ipath_unit, ret); 489 490 } 491 } 492 else { 493 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 494 if (ret) 495 dev_info(&pdev->dev, 496 "Unable to set DMA consistent mask " 497 "for unit %u: %d\n", 498 dd->ipath_unit, ret); 499 } 500 501 pci_set_master(pdev); 502 503 /* 504 * Save BARs to rewrite after device reset. Save all 64 bits of 505 * BAR, just in case. 506 */ 507 dd->ipath_pcibar0 = addr; 508 dd->ipath_pcibar1 = addr >> 32; 509 dd->ipath_deviceid = ent->device; /* save for later use */ 510 dd->ipath_vendorid = ent->vendor; 511 512 /* setup the chip-specific functions, as early as possible. */ 513 switch (ent->device) { 514 case PCI_DEVICE_ID_INFINIPATH_HT: 515 ipath_init_iba6110_funcs(dd); 516 break; 517 518 default: 519 ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, " 520 "failing\n", ent->device); 521 return -ENODEV; 522 } 523 524 for (j = 0; j < 6; j++) { 525 if (!pdev->resource[j].start) 526 continue; 527 ipath_cdbg(VERBOSE, "BAR %d %pR, len %llx\n", 528 j, &pdev->resource[j], 529 (unsigned long long)pci_resource_len(pdev, j)); 530 } 531 532 if (!addr) { 533 ipath_dev_err(dd, "No valid address in BAR 0!\n"); 534 ret = -ENODEV; 535 goto bail_regions; 536 } 537 538 dd->ipath_pcirev = pdev->revision; 539 540#if defined(__powerpc__) 541 /* There isn't a generic way to specify writethrough mappings */ 542 dd->ipath_kregbase = __ioremap(addr, len, 543 (_PAGE_NO_CACHE|_PAGE_WRITETHRU)); 544#else 545 dd->ipath_kregbase = ioremap_nocache(addr, len); 546#endif 547 548 if (!dd->ipath_kregbase) { 549 ipath_dbg("Unable to map io addr %llx to kvirt, failing\n", 550 addr); 551 ret = -ENOMEM; 552 goto bail_iounmap; 553 } 554 dd->ipath_kregend = (u64 __iomem *) 555 ((void __iomem *)dd->ipath_kregbase + len); 556 dd->ipath_physaddr = addr; /* used for io_remap, etc. */ 557 /* for user mmap */ 558 ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p\n", 559 addr, dd->ipath_kregbase); 560 561 if (dd->ipath_f_bus(dd, pdev)) 562 ipath_dev_err(dd, "Failed to setup config space; " 563 "continuing anyway\n"); 564 565 /* 566 * set up our interrupt handler; IRQF_SHARED probably not needed, 567 * since MSI interrupts shouldn't be shared but won't hurt for now. 568 * check 0 irq after we return from chip-specific bus setup, since 569 * that can affect this due to setup 570 */ 571 if (!dd->ipath_irq) 572 ipath_dev_err(dd, "irq is 0, BIOS error? Interrupts won't " 573 "work\n"); 574 else { 575 ret = request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED, 576 IPATH_DRV_NAME, dd); 577 if (ret) { 578 ipath_dev_err(dd, "Couldn't setup irq handler, " 579 "irq=%d: %d\n", dd->ipath_irq, ret); 580 goto bail_iounmap; 581 } 582 } 583 584 ret = ipath_init_chip(dd, 0); /* do the chip-specific init */ 585 if (ret) 586 goto bail_irqsetup; 587 588 ret = ipath_enable_wc(dd); 589 590 if (ret) { 591 ipath_dev_err(dd, "Write combining not enabled " 592 "(err %d): performance may be poor\n", 593 -ret); 594 ret = 0; 595 } 596 597 ipath_verify_pioperf(dd); 598 599 ipath_device_create_group(&pdev->dev, dd); 600 ipathfs_add_device(dd); 601 ipath_user_add(dd); 602 ipath_diag_add(dd); 603 ipath_register_ib_device(dd); 604 605 goto bail; 606 607bail_irqsetup: 608 cleanup_device(dd); 609 610 if (dd->ipath_irq) 611 dd->ipath_f_free_irq(dd); 612 613 if (dd->ipath_f_cleanup) 614 dd->ipath_f_cleanup(dd); 615 616bail_iounmap: 617 iounmap((volatile void __iomem *) dd->ipath_kregbase); 618 619bail_regions: 620 pci_release_regions(pdev); 621 622bail_disable: 623 pci_disable_device(pdev); 624 625bail_devdata: 626 ipath_free_devdata(pdev, dd); 627 628bail: 629 return ret; 630} 631 632static void cleanup_device(struct ipath_devdata *dd) 633{ 634 int port; 635 struct ipath_portdata **tmp; 636 unsigned long flags; 637 638 if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) { 639 /* can't do anything more with chip; needs re-init */ 640 *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT; 641 if (dd->ipath_kregbase) { 642 /* 643 * if we haven't already cleaned up before these are 644 * to ensure any register reads/writes "fail" until 645 * re-init 646 */ 647 dd->ipath_kregbase = NULL; 648 dd->ipath_uregbase = 0; 649 dd->ipath_sregbase = 0; 650 dd->ipath_cregbase = 0; 651 dd->ipath_kregsize = 0; 652 } 653 ipath_disable_wc(dd); 654 } 655 656 if (dd->ipath_spectriggerhit) 657 dev_info(&dd->pcidev->dev, "%lu special trigger hits\n", 658 dd->ipath_spectriggerhit); 659 660 if (dd->ipath_pioavailregs_dma) { 661 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, 662 (void *) dd->ipath_pioavailregs_dma, 663 dd->ipath_pioavailregs_phys); 664 dd->ipath_pioavailregs_dma = NULL; 665 } 666 if (dd->ipath_dummy_hdrq) { 667 dma_free_coherent(&dd->pcidev->dev, 668 dd->ipath_pd[0]->port_rcvhdrq_size, 669 dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys); 670 dd->ipath_dummy_hdrq = NULL; 671 } 672 673 if (dd->ipath_pageshadow) { 674 struct page **tmpp = dd->ipath_pageshadow; 675 dma_addr_t *tmpd = dd->ipath_physshadow; 676 int i, cnt = 0; 677 678 ipath_cdbg(VERBOSE, "Unlocking any expTID pages still " 679 "locked\n"); 680 for (port = 0; port < dd->ipath_cfgports; port++) { 681 int port_tidbase = port * dd->ipath_rcvtidcnt; 682 int maxtid = port_tidbase + dd->ipath_rcvtidcnt; 683 for (i = port_tidbase; i < maxtid; i++) { 684 if (!tmpp[i]) 685 continue; 686 pci_unmap_page(dd->pcidev, tmpd[i], 687 PAGE_SIZE, PCI_DMA_FROMDEVICE); 688 ipath_release_user_pages(&tmpp[i], 1); 689 tmpp[i] = NULL; 690 cnt++; 691 } 692 } 693 if (cnt) { 694 ipath_stats.sps_pageunlocks += cnt; 695 ipath_cdbg(VERBOSE, "There were still %u expTID " 696 "entries locked\n", cnt); 697 } 698 if (ipath_stats.sps_pagelocks || 699 ipath_stats.sps_pageunlocks) 700 ipath_cdbg(VERBOSE, "%llu pages locked, %llu " 701 "unlocked via ipath_m{un}lock\n", 702 (unsigned long long) 703 ipath_stats.sps_pagelocks, 704 (unsigned long long) 705 ipath_stats.sps_pageunlocks); 706 707 ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n", 708 dd->ipath_pageshadow); 709 tmpp = dd->ipath_pageshadow; 710 dd->ipath_pageshadow = NULL; 711 vfree(tmpp); 712 713 dd->ipath_egrtidbase = NULL; 714 } 715 716 /* 717 * free any resources still in use (usually just kernel ports) 718 * at unload; we do for portcnt, because that's what we allocate. 719 * We acquire lock to be really paranoid that ipath_pd isn't being 720 * accessed from some interrupt-related code (that should not happen, 721 * but best to be sure). 722 */ 723 spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); 724 tmp = dd->ipath_pd; 725 dd->ipath_pd = NULL; 726 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); 727 for (port = 0; port < dd->ipath_portcnt; port++) { 728 struct ipath_portdata *pd = tmp[port]; 729 tmp[port] = NULL; /* debugging paranoia */ 730 ipath_free_pddata(dd, pd); 731 } 732 kfree(tmp); 733} 734 735static void ipath_remove_one(struct pci_dev *pdev) 736{ 737 struct ipath_devdata *dd = pci_get_drvdata(pdev); 738 739 ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd); 740 741 /* 742 * disable the IB link early, to be sure no new packets arrive, which 743 * complicates the shutdown process 744 */ 745 ipath_shutdown_device(dd); 746 747 flush_workqueue(ib_wq); 748 749 if (dd->verbs_dev) 750 ipath_unregister_ib_device(dd->verbs_dev); 751 752 ipath_diag_remove(dd); 753 ipath_user_remove(dd); 754 ipathfs_remove_device(dd); 755 ipath_device_remove_group(&pdev->dev, dd); 756 757 ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, " 758 "unit %u\n", dd, (u32) dd->ipath_unit); 759 760 cleanup_device(dd); 761 762 /* 763 * turn off rcv, send, and interrupts for all ports, all drivers 764 * should also hard reset the chip here? 765 * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs 766 * for all versions of the driver, if they were allocated 767 */ 768 if (dd->ipath_irq) { 769 ipath_cdbg(VERBOSE, "unit %u free irq %d\n", 770 dd->ipath_unit, dd->ipath_irq); 771 dd->ipath_f_free_irq(dd); 772 } else 773 ipath_dbg("irq is 0, not doing free_irq " 774 "for unit %u\n", dd->ipath_unit); 775 /* 776 * we check for NULL here, because it's outside 777 * the kregbase check, and we need to call it 778 * after the free_irq. Thus it's possible that 779 * the function pointers were never initialized. 780 */ 781 if (dd->ipath_f_cleanup) 782 /* clean up chip-specific stuff */ 783 dd->ipath_f_cleanup(dd); 784 785 ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase); 786 iounmap((volatile void __iomem *) dd->ipath_kregbase); 787 pci_release_regions(pdev); 788 ipath_cdbg(VERBOSE, "calling pci_disable_device\n"); 789 pci_disable_device(pdev); 790 791 ipath_free_devdata(pdev, dd); 792} 793 794/* general driver use */ 795DEFINE_MUTEX(ipath_mutex); 796 797static DEFINE_SPINLOCK(ipath_pioavail_lock); 798 799/** 800 * ipath_disarm_piobufs - cancel a range of PIO buffers 801 * @dd: the infinipath device 802 * @first: the first PIO buffer to cancel 803 * @cnt: the number of PIO buffers to cancel 804 * 805 * cancel a range of PIO buffers, used when they might be armed, but 806 * not triggered. Used at init to ensure buffer state, and also user 807 * process close, in case it died while writing to a PIO buffer 808 * Also after errors. 809 */ 810void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first, 811 unsigned cnt) 812{ 813 unsigned i, last = first + cnt; 814 unsigned long flags; 815 816 ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first); 817 for (i = first; i < last; i++) { 818 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 819 /* 820 * The disarm-related bits are write-only, so it 821 * is ok to OR them in with our copy of sendctrl 822 * while we hold the lock. 823 */ 824 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 825 dd->ipath_sendctrl | INFINIPATH_S_DISARM | 826 (i << INFINIPATH_S_DISARMPIOBUF_SHIFT)); 827 /* can't disarm bufs back-to-back per iba7220 spec */ 828 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 829 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 830 } 831 /* on some older chips, update may not happen after cancel */ 832 ipath_force_pio_avail_update(dd); 833} 834 835/** 836 * ipath_wait_linkstate - wait for an IB link state change to occur 837 * @dd: the infinipath device 838 * @state: the state to wait for 839 * @msecs: the number of milliseconds to wait 840 * 841 * wait up to msecs milliseconds for IB link state change to occur for 842 * now, take the easy polling route. Currently used only by 843 * ipath_set_linkstate. Returns 0 if state reached, otherwise 844 * -ETIMEDOUT state can have multiple states set, for any of several 845 * transitions. 846 */ 847int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs) 848{ 849 dd->ipath_state_wanted = state; 850 wait_event_interruptible_timeout(ipath_state_wait, 851 (dd->ipath_flags & state), 852 msecs_to_jiffies(msecs)); 853 dd->ipath_state_wanted = 0; 854 855 if (!(dd->ipath_flags & state)) { 856 u64 val; 857 ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u" 858 " ms\n", 859 /* test INIT ahead of DOWN, both can be set */ 860 (state & IPATH_LINKINIT) ? "INIT" : 861 ((state & IPATH_LINKDOWN) ? "DOWN" : 862 ((state & IPATH_LINKARMED) ? "ARM" : "ACTIVE")), 863 msecs); 864 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); 865 ipath_cdbg(VERBOSE, "ibcc=%llx ibcstatus=%llx (%s)\n", 866 (unsigned long long) ipath_read_kreg64( 867 dd, dd->ipath_kregs->kr_ibcctrl), 868 (unsigned long long) val, 869 ipath_ibcstatus_str[val & dd->ibcs_lts_mask]); 870 } 871 return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT; 872} 873 874static void decode_sdma_errs(struct ipath_devdata *dd, ipath_err_t err, 875 char *buf, size_t blen) 876{ 877 static const struct { 878 ipath_err_t err; 879 const char *msg; 880 } errs[] = { 881 { INFINIPATH_E_SDMAGENMISMATCH, "SDmaGenMismatch" }, 882 { INFINIPATH_E_SDMAOUTOFBOUND, "SDmaOutOfBound" }, 883 { INFINIPATH_E_SDMATAILOUTOFBOUND, "SDmaTailOutOfBound" }, 884 { INFINIPATH_E_SDMABASE, "SDmaBase" }, 885 { INFINIPATH_E_SDMA1STDESC, "SDma1stDesc" }, 886 { INFINIPATH_E_SDMARPYTAG, "SDmaRpyTag" }, 887 { INFINIPATH_E_SDMADWEN, "SDmaDwEn" }, 888 { INFINIPATH_E_SDMAMISSINGDW, "SDmaMissingDw" }, 889 { INFINIPATH_E_SDMAUNEXPDATA, "SDmaUnexpData" }, 890 { INFINIPATH_E_SDMADESCADDRMISALIGN, "SDmaDescAddrMisalign" }, 891 { INFINIPATH_E_SENDBUFMISUSE, "SendBufMisuse" }, 892 { INFINIPATH_E_SDMADISABLED, "SDmaDisabled" }, 893 }; 894 int i; 895 int expected; 896 size_t bidx = 0; 897 898 for (i = 0; i < ARRAY_SIZE(errs); i++) { 899 expected = (errs[i].err != INFINIPATH_E_SDMADISABLED) ? 0 : 900 test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); 901 if ((err & errs[i].err) && !expected) 902 bidx += snprintf(buf + bidx, blen - bidx, 903 "%s ", errs[i].msg); 904 } 905} 906 907/* 908 * Decode the error status into strings, deciding whether to always 909 * print * it or not depending on "normal packet errors" vs everything 910 * else. Return 1 if "real" errors, otherwise 0 if only packet 911 * errors, so caller can decide what to print with the string. 912 */ 913int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen, 914 ipath_err_t err) 915{ 916 int iserr = 1; 917 *buf = '\0'; 918 if (err & INFINIPATH_E_PKTERRS) { 919 if (!(err & ~INFINIPATH_E_PKTERRS)) 920 iserr = 0; // if only packet errors. 921 if (ipath_debug & __IPATH_ERRPKTDBG) { 922 if (err & INFINIPATH_E_REBP) 923 strlcat(buf, "EBP ", blen); 924 if (err & INFINIPATH_E_RVCRC) 925 strlcat(buf, "VCRC ", blen); 926 if (err & INFINIPATH_E_RICRC) { 927 strlcat(buf, "CRC ", blen); 928 // clear for check below, so only once 929 err &= INFINIPATH_E_RICRC; 930 } 931 if (err & INFINIPATH_E_RSHORTPKTLEN) 932 strlcat(buf, "rshortpktlen ", blen); 933 if (err & INFINIPATH_E_SDROPPEDDATAPKT) 934 strlcat(buf, "sdroppeddatapkt ", blen); 935 if (err & INFINIPATH_E_SPKTLEN) 936 strlcat(buf, "spktlen ", blen); 937 } 938 if ((err & INFINIPATH_E_RICRC) && 939 !(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP))) 940 strlcat(buf, "CRC ", blen); 941 if (!iserr) 942 goto done; 943 } 944 if (err & INFINIPATH_E_RHDRLEN) 945 strlcat(buf, "rhdrlen ", blen); 946 if (err & INFINIPATH_E_RBADTID) 947 strlcat(buf, "rbadtid ", blen); 948 if (err & INFINIPATH_E_RBADVERSION) 949 strlcat(buf, "rbadversion ", blen); 950 if (err & INFINIPATH_E_RHDR) 951 strlcat(buf, "rhdr ", blen); 952 if (err & INFINIPATH_E_SENDSPECIALTRIGGER) 953 strlcat(buf, "sendspecialtrigger ", blen); 954 if (err & INFINIPATH_E_RLONGPKTLEN) 955 strlcat(buf, "rlongpktlen ", blen); 956 if (err & INFINIPATH_E_RMAXPKTLEN) 957 strlcat(buf, "rmaxpktlen ", blen); 958 if (err & INFINIPATH_E_RMINPKTLEN) 959 strlcat(buf, "rminpktlen ", blen); 960 if (err & INFINIPATH_E_SMINPKTLEN) 961 strlcat(buf, "sminpktlen ", blen); 962 if (err & INFINIPATH_E_RFORMATERR) 963 strlcat(buf, "rformaterr ", blen); 964 if (err & INFINIPATH_E_RUNSUPVL) 965 strlcat(buf, "runsupvl ", blen); 966 if (err & INFINIPATH_E_RUNEXPCHAR) 967 strlcat(buf, "runexpchar ", blen); 968 if (err & INFINIPATH_E_RIBFLOW) 969 strlcat(buf, "ribflow ", blen); 970 if (err & INFINIPATH_E_SUNDERRUN) 971 strlcat(buf, "sunderrun ", blen); 972 if (err & INFINIPATH_E_SPIOARMLAUNCH) 973 strlcat(buf, "spioarmlaunch ", blen); 974 if (err & INFINIPATH_E_SUNEXPERRPKTNUM) 975 strlcat(buf, "sunexperrpktnum ", blen); 976 if (err & INFINIPATH_E_SDROPPEDSMPPKT) 977 strlcat(buf, "sdroppedsmppkt ", blen); 978 if (err & INFINIPATH_E_SMAXPKTLEN) 979 strlcat(buf, "smaxpktlen ", blen); 980 if (err & INFINIPATH_E_SUNSUPVL) 981 strlcat(buf, "sunsupVL ", blen); 982 if (err & INFINIPATH_E_INVALIDADDR) 983 strlcat(buf, "invalidaddr ", blen); 984 if (err & INFINIPATH_E_RRCVEGRFULL) 985 strlcat(buf, "rcvegrfull ", blen); 986 if (err & INFINIPATH_E_RRCVHDRFULL) 987 strlcat(buf, "rcvhdrfull ", blen); 988 if (err & INFINIPATH_E_IBSTATUSCHANGED) 989 strlcat(buf, "ibcstatuschg ", blen); 990 if (err & INFINIPATH_E_RIBLOSTLINK) 991 strlcat(buf, "riblostlink ", blen); 992 if (err & INFINIPATH_E_HARDWARE) 993 strlcat(buf, "hardware ", blen); 994 if (err & INFINIPATH_E_RESET) 995 strlcat(buf, "reset ", blen); 996 if (err & INFINIPATH_E_SDMAERRS) 997 decode_sdma_errs(dd, err, buf, blen); 998 if (err & INFINIPATH_E_INVALIDEEPCMD) 999 strlcat(buf, "invalideepromcmd ", blen); 1000done: 1001 return iserr; 1002} 1003 1004/** 1005 * get_rhf_errstring - decode RHF errors 1006 * @err: the err number 1007 * @msg: the output buffer 1008 * @len: the length of the output buffer 1009 * 1010 * only used one place now, may want more later 1011 */ 1012static void get_rhf_errstring(u32 err, char *msg, size_t len) 1013{ 1014 /* if no errors, and so don't need to check what's first */ 1015 *msg = '\0'; 1016 1017 if (err & INFINIPATH_RHF_H_ICRCERR) 1018 strlcat(msg, "icrcerr ", len); 1019 if (err & INFINIPATH_RHF_H_VCRCERR) 1020 strlcat(msg, "vcrcerr ", len); 1021 if (err & INFINIPATH_RHF_H_PARITYERR) 1022 strlcat(msg, "parityerr ", len); 1023 if (err & INFINIPATH_RHF_H_LENERR) 1024 strlcat(msg, "lenerr ", len); 1025 if (err & INFINIPATH_RHF_H_MTUERR) 1026 strlcat(msg, "mtuerr ", len); 1027 if (err & INFINIPATH_RHF_H_IHDRERR) 1028 /* infinipath hdr checksum error */ 1029 strlcat(msg, "ipathhdrerr ", len); 1030 if (err & INFINIPATH_RHF_H_TIDERR) 1031 strlcat(msg, "tiderr ", len); 1032 if (err & INFINIPATH_RHF_H_MKERR) 1033 /* bad port, offset, etc. */ 1034 strlcat(msg, "invalid ipathhdr ", len); 1035 if (err & INFINIPATH_RHF_H_IBERR) 1036 strlcat(msg, "iberr ", len); 1037 if (err & INFINIPATH_RHF_L_SWA) 1038 strlcat(msg, "swA ", len); 1039 if (err & INFINIPATH_RHF_L_SWB) 1040 strlcat(msg, "swB ", len); 1041} 1042 1043/** 1044 * ipath_get_egrbuf - get an eager buffer 1045 * @dd: the infinipath device 1046 * @bufnum: the eager buffer to get 1047 * 1048 * must only be called if ipath_pd[port] is known to be allocated 1049 */ 1050static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum) 1051{ 1052 return dd->ipath_port0_skbinfo ? 1053 (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL; 1054} 1055 1056/** 1057 * ipath_alloc_skb - allocate an skb and buffer with possible constraints 1058 * @dd: the infinipath device 1059 * @gfp_mask: the sk_buff SFP mask 1060 */ 1061struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, 1062 gfp_t gfp_mask) 1063{ 1064 struct sk_buff *skb; 1065 u32 len; 1066 1067 /* 1068 * Only fully supported way to handle this is to allocate lots 1069 * extra, align as needed, and then do skb_reserve(). That wastes 1070 * a lot of memory... I'll have to hack this into infinipath_copy 1071 * also. 1072 */ 1073 1074 /* 1075 * We need 2 extra bytes for ipath_ether data sent in the 1076 * key header. In order to keep everything dword aligned, 1077 * we'll reserve 4 bytes. 1078 */ 1079 len = dd->ipath_ibmaxlen + 4; 1080 1081 if (dd->ipath_flags & IPATH_4BYTE_TID) { 1082 /* We need a 2KB multiple alignment, and there is no way 1083 * to do it except to allocate extra and then skb_reserve 1084 * enough to bring it up to the right alignment. 1085 */ 1086 len += 2047; 1087 } 1088 1089 skb = __dev_alloc_skb(len, gfp_mask); 1090 if (!skb) { 1091 ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n", 1092 len); 1093 goto bail; 1094 } 1095 1096 skb_reserve(skb, 4); 1097 1098 if (dd->ipath_flags & IPATH_4BYTE_TID) { 1099 u32 una = (unsigned long)skb->data & 2047; 1100 if (una) 1101 skb_reserve(skb, 2048 - una); 1102 } 1103 1104bail: 1105 return skb; 1106} 1107 1108static void ipath_rcv_hdrerr(struct ipath_devdata *dd, 1109 u32 eflags, 1110 u32 l, 1111 u32 etail, 1112 __le32 *rhf_addr, 1113 struct ipath_message_header *hdr) 1114{ 1115 char emsg[128]; 1116 1117 get_rhf_errstring(eflags, emsg, sizeof emsg); 1118 ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u " 1119 "tlen=%x opcode=%x egridx=%x: %s\n", 1120 eflags, l, 1121 ipath_hdrget_rcv_type(rhf_addr), 1122 ipath_hdrget_length_in_bytes(rhf_addr), 1123 be32_to_cpu(hdr->bth[0]) >> 24, 1124 etail, emsg); 1125 1126 /* Count local link integrity errors. */ 1127 if (eflags & (INFINIPATH_RHF_H_ICRCERR | INFINIPATH_RHF_H_VCRCERR)) { 1128 u8 n = (dd->ipath_ibcctrl >> 1129 INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & 1130 INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; 1131 1132 if (++dd->ipath_lli_counter > n) { 1133 dd->ipath_lli_counter = 0; 1134 dd->ipath_lli_errors++; 1135 } 1136 } 1137} 1138 1139/* 1140 * ipath_kreceive - receive a packet 1141 * @pd: the infinipath port 1142 * 1143 * called from interrupt handler for errors or receive interrupt 1144 */ 1145void ipath_kreceive(struct ipath_portdata *pd) 1146{ 1147 struct ipath_devdata *dd = pd->port_dd; 1148 __le32 *rhf_addr; 1149 void *ebuf; 1150 const u32 rsize = dd->ipath_rcvhdrentsize; /* words */ 1151 const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */ 1152 u32 etail = -1, l, hdrqtail; 1153 struct ipath_message_header *hdr; 1154 u32 eflags, i, etype, tlen, pkttot = 0, updegr = 0, reloop = 0; 1155 static u64 totcalls; /* stats, may eventually remove */ 1156 int last; 1157 1158 l = pd->port_head; 1159 rhf_addr = (__le32 *) pd->port_rcvhdrq + l + dd->ipath_rhf_offset; 1160 if (dd->ipath_flags & IPATH_NODMA_RTAIL) { 1161 u32 seq = ipath_hdrget_seq(rhf_addr); 1162 1163 if (seq != pd->port_seq_cnt) 1164 goto bail; 1165 hdrqtail = 0; 1166 } else { 1167 hdrqtail = ipath_get_rcvhdrtail(pd); 1168 if (l == hdrqtail) 1169 goto bail; 1170 smp_rmb(); 1171 } 1172 1173reloop: 1174 for (last = 0, i = 1; !last; i += !last) { 1175 hdr = dd->ipath_f_get_msgheader(dd, rhf_addr); 1176 eflags = ipath_hdrget_err_flags(rhf_addr); 1177 etype = ipath_hdrget_rcv_type(rhf_addr); 1178 /* total length */ 1179 tlen = ipath_hdrget_length_in_bytes(rhf_addr); 1180 ebuf = NULL; 1181 if ((dd->ipath_flags & IPATH_NODMA_RTAIL) ? 1182 ipath_hdrget_use_egr_buf(rhf_addr) : 1183 (etype != RCVHQ_RCV_TYPE_EXPECTED)) { 1184 /* 1185 * It turns out that the chip uses an eager buffer 1186 * for all non-expected packets, whether it "needs" 1187 * one or not. So always get the index, but don't 1188 * set ebuf (so we try to copy data) unless the 1189 * length requires it. 1190 */ 1191 etail = ipath_hdrget_index(rhf_addr); 1192 updegr = 1; 1193 if (tlen > sizeof(*hdr) || 1194 etype == RCVHQ_RCV_TYPE_NON_KD) 1195 ebuf = ipath_get_egrbuf(dd, etail); 1196 } 1197 1198 /* 1199 * both tiderr and ipathhdrerr are set for all plain IB 1200 * packets; only ipathhdrerr should be set. 1201 */ 1202 1203 if (etype != RCVHQ_RCV_TYPE_NON_KD && 1204 etype != RCVHQ_RCV_TYPE_ERROR && 1205 ipath_hdrget_ipath_ver(hdr->iph.ver_port_tid_offset) != 1206 IPS_PROTO_VERSION) 1207 ipath_cdbg(PKT, "Bad InfiniPath protocol version " 1208 "%x\n", etype); 1209 1210 if (unlikely(eflags)) 1211 ipath_rcv_hdrerr(dd, eflags, l, etail, rhf_addr, hdr); 1212 else if (etype == RCVHQ_RCV_TYPE_NON_KD) { 1213 ipath_ib_rcv(dd->verbs_dev, (u32 *)hdr, ebuf, tlen); 1214 if (dd->ipath_lli_counter) 1215 dd->ipath_lli_counter--; 1216 } else if (etype == RCVHQ_RCV_TYPE_EAGER) { 1217 u8 opcode = be32_to_cpu(hdr->bth[0]) >> 24; 1218 u32 qp = be32_to_cpu(hdr->bth[1]) & 0xffffff; 1219 ipath_cdbg(PKT, "typ %x, opcode %x (eager, " 1220 "qp=%x), len %x; ignored\n", 1221 etype, opcode, qp, tlen); 1222 } 1223 else if (etype == RCVHQ_RCV_TYPE_EXPECTED) 1224 ipath_dbg("Bug: Expected TID, opcode %x; ignored\n", 1225 be32_to_cpu(hdr->bth[0]) >> 24); 1226 else { 1227 /* 1228 * error packet, type of error unknown. 1229 * Probably type 3, but we don't know, so don't 1230 * even try to print the opcode, etc. 1231 * Usually caused by a "bad packet", that has no 1232 * BTH, when the LRH says it should. 1233 */ 1234 ipath_cdbg(ERRPKT, "Error Pkt, but no eflags! egrbuf" 1235 " %x, len %x hdrq+%x rhf: %Lx\n", 1236 etail, tlen, l, (unsigned long long) 1237 le64_to_cpu(*(__le64 *) rhf_addr)); 1238 if (ipath_debug & __IPATH_ERRPKTDBG) { 1239 u32 j, *d, dw = rsize-2; 1240 if (rsize > (tlen>>2)) 1241 dw = tlen>>2; 1242 d = (u32 *)hdr; 1243 printk(KERN_DEBUG "EPkt rcvhdr(%x dw):\n", 1244 dw); 1245 for (j = 0; j < dw; j++) 1246 printk(KERN_DEBUG "%8x%s", d[j], 1247 (j%8) == 7 ? "\n" : " "); 1248 printk(KERN_DEBUG ".\n"); 1249 } 1250 } 1251 l += rsize; 1252 if (l >= maxcnt) 1253 l = 0; 1254 rhf_addr = (__le32 *) pd->port_rcvhdrq + 1255 l + dd->ipath_rhf_offset; 1256 if (dd->ipath_flags & IPATH_NODMA_RTAIL) { 1257 u32 seq = ipath_hdrget_seq(rhf_addr); 1258 1259 if (++pd->port_seq_cnt > 13) 1260 pd->port_seq_cnt = 1; 1261 if (seq != pd->port_seq_cnt) 1262 last = 1; 1263 } else if (l == hdrqtail) 1264 last = 1; 1265 /* 1266 * update head regs on last packet, and every 16 packets. 1267 * Reduce bus traffic, while still trying to prevent 1268 * rcvhdrq overflows, for when the queue is nearly full 1269 */ 1270 if (last || !(i & 0xf)) { 1271 u64 lval = l; 1272 1273 /* request IBA6120 and 7220 interrupt only on last */ 1274 if (last) 1275 lval |= dd->ipath_rhdrhead_intr_off; 1276 ipath_write_ureg(dd, ur_rcvhdrhead, lval, 1277 pd->port_port); 1278 if (updegr) { 1279 ipath_write_ureg(dd, ur_rcvegrindexhead, 1280 etail, pd->port_port); 1281 updegr = 0; 1282 } 1283 } 1284 } 1285 1286 if (!dd->ipath_rhdrhead_intr_off && !reloop && 1287 !(dd->ipath_flags & IPATH_NODMA_RTAIL)) { 1288 /* IBA6110 workaround; we can have a race clearing chip 1289 * interrupt with another interrupt about to be delivered, 1290 * and can clear it before it is delivered on the GPIO 1291 * workaround. By doing the extra check here for the 1292 * in-memory tail register updating while we were doing 1293 * earlier packets, we "almost" guarantee we have covered 1294 * that case. 1295 */ 1296 u32 hqtail = ipath_get_rcvhdrtail(pd); 1297 if (hqtail != hdrqtail) { 1298 hdrqtail = hqtail; 1299 reloop = 1; /* loop 1 extra time at most */ 1300 goto reloop; 1301 } 1302 } 1303 1304 pkttot += i; 1305 1306 pd->port_head = l; 1307 1308 if (pkttot > ipath_stats.sps_maxpkts_call) 1309 ipath_stats.sps_maxpkts_call = pkttot; 1310 ipath_stats.sps_port0pkts += pkttot; 1311 ipath_stats.sps_avgpkts_call = 1312 ipath_stats.sps_port0pkts / ++totcalls; 1313 1314bail:; 1315} 1316 1317/** 1318 * ipath_update_pio_bufs - update shadow copy of the PIO availability map 1319 * @dd: the infinipath device 1320 * 1321 * called whenever our local copy indicates we have run out of send buffers 1322 * NOTE: This can be called from interrupt context by some code 1323 * and from non-interrupt context by ipath_getpiobuf(). 1324 */ 1325 1326static void ipath_update_pio_bufs(struct ipath_devdata *dd) 1327{ 1328 unsigned long flags; 1329 int i; 1330 const unsigned piobregs = (unsigned)dd->ipath_pioavregs; 1331 1332 /* If the generation (check) bits have changed, then we update the 1333 * busy bit for the corresponding PIO buffer. This algorithm will 1334 * modify positions to the value they already have in some cases 1335 * (i.e., no change), but it's faster than changing only the bits 1336 * that have changed. 1337 * 1338 * We would like to do this atomicly, to avoid spinlocks in the 1339 * critical send path, but that's not really possible, given the 1340 * type of changes, and that this routine could be called on 1341 * multiple cpu's simultaneously, so we lock in this routine only, 1342 * to avoid conflicting updates; all we change is the shadow, and 1343 * it's a single 64 bit memory location, so by definition the update 1344 * is atomic in terms of what other cpu's can see in testing the 1345 * bits. The spin_lock overhead isn't too bad, since it only 1346 * happens when all buffers are in use, so only cpu overhead, not 1347 * latency or bandwidth is affected. 1348 */ 1349 if (!dd->ipath_pioavailregs_dma) { 1350 ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n"); 1351 return; 1352 } 1353 if (ipath_debug & __IPATH_VERBDBG) { 1354 /* only if packet debug and verbose */ 1355 volatile __le64 *dma = dd->ipath_pioavailregs_dma; 1356 unsigned long *shadow = dd->ipath_pioavailshadow; 1357 1358 ipath_cdbg(PKT, "Refill avail, dma0=%llx shad0=%lx, " 1359 "d1=%llx s1=%lx, d2=%llx s2=%lx, d3=%llx " 1360 "s3=%lx\n", 1361 (unsigned long long) le64_to_cpu(dma[0]), 1362 shadow[0], 1363 (unsigned long long) le64_to_cpu(dma[1]), 1364 shadow[1], 1365 (unsigned long long) le64_to_cpu(dma[2]), 1366 shadow[2], 1367 (unsigned long long) le64_to_cpu(dma[3]), 1368 shadow[3]); 1369 if (piobregs > 4) 1370 ipath_cdbg( 1371 PKT, "2nd group, dma4=%llx shad4=%lx, " 1372 "d5=%llx s5=%lx, d6=%llx s6=%lx, " 1373 "d7=%llx s7=%lx\n", 1374 (unsigned long long) le64_to_cpu(dma[4]), 1375 shadow[4], 1376 (unsigned long long) le64_to_cpu(dma[5]), 1377 shadow[5], 1378 (unsigned long long) le64_to_cpu(dma[6]), 1379 shadow[6], 1380 (unsigned long long) le64_to_cpu(dma[7]), 1381 shadow[7]); 1382 } 1383 spin_lock_irqsave(&ipath_pioavail_lock, flags); 1384 for (i = 0; i < piobregs; i++) { 1385 u64 pchbusy, pchg, piov, pnew; 1386 /* 1387 * Chip Errata: bug 6641; even and odd qwords>3 are swapped 1388 */ 1389 if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) 1390 piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]); 1391 else 1392 piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]); 1393 pchg = dd->ipath_pioavailkernel[i] & 1394 ~(dd->ipath_pioavailshadow[i] ^ piov); 1395 pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT; 1396 if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) { 1397 pnew = dd->ipath_pioavailshadow[i] & ~pchbusy; 1398 pnew |= piov & pchbusy; 1399 dd->ipath_pioavailshadow[i] = pnew; 1400 } 1401 } 1402 spin_unlock_irqrestore(&ipath_pioavail_lock, flags); 1403} 1404 1405/* 1406 * used to force update of pioavailshadow if we can't get a pio buffer. 1407 * Needed primarily due to exitting freeze mode after recovering 1408 * from errors. Done lazily, because it's safer (known to not 1409 * be writing pio buffers). 1410 */ 1411static void ipath_reset_availshadow(struct ipath_devdata *dd) 1412{ 1413 int i, im; 1414 unsigned long flags; 1415 1416 spin_lock_irqsave(&ipath_pioavail_lock, flags); 1417 for (i = 0; i < dd->ipath_pioavregs; i++) { 1418 u64 val, oldval; 1419 /* deal with 6110 chip bug on high register #s */ 1420 im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ? 1421 i ^ 1 : i; 1422 val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]); 1423 /* 1424 * busy out the buffers not in the kernel avail list, 1425 * without changing the generation bits. 1426 */ 1427 oldval = dd->ipath_pioavailshadow[i]; 1428 dd->ipath_pioavailshadow[i] = val | 1429 ((~dd->ipath_pioavailkernel[i] << 1430 INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) & 1431 0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */ 1432 if (oldval != dd->ipath_pioavailshadow[i]) 1433 ipath_dbg("shadow[%d] was %Lx, now %lx\n", 1434 i, (unsigned long long) oldval, 1435 dd->ipath_pioavailshadow[i]); 1436 } 1437 spin_unlock_irqrestore(&ipath_pioavail_lock, flags); 1438} 1439 1440/** 1441 * ipath_setrcvhdrsize - set the receive header size 1442 * @dd: the infinipath device 1443 * @rhdrsize: the receive header size 1444 * 1445 * called from user init code, and also layered driver init 1446 */ 1447int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize) 1448{ 1449 int ret = 0; 1450 1451 if (dd->ipath_flags & IPATH_RCVHDRSZ_SET) { 1452 if (dd->ipath_rcvhdrsize != rhdrsize) { 1453 dev_info(&dd->pcidev->dev, 1454 "Error: can't set protocol header " 1455 "size %u, already %u\n", 1456 rhdrsize, dd->ipath_rcvhdrsize); 1457 ret = -EAGAIN; 1458 } else 1459 ipath_cdbg(VERBOSE, "Reuse same protocol header " 1460 "size %u\n", dd->ipath_rcvhdrsize); 1461 } else if (rhdrsize > (dd->ipath_rcvhdrentsize - 1462 (sizeof(u64) / sizeof(u32)))) { 1463 ipath_dbg("Error: can't set protocol header size %u " 1464 "(> max %u)\n", rhdrsize, 1465 dd->ipath_rcvhdrentsize - 1466 (u32) (sizeof(u64) / sizeof(u32))); 1467 ret = -EOVERFLOW; 1468 } else { 1469 dd->ipath_flags |= IPATH_RCVHDRSZ_SET; 1470 dd->ipath_rcvhdrsize = rhdrsize; 1471 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize, 1472 dd->ipath_rcvhdrsize); 1473 ipath_cdbg(VERBOSE, "Set protocol header size to %u\n", 1474 dd->ipath_rcvhdrsize); 1475 } 1476 return ret; 1477} 1478 1479/* 1480 * debugging code and stats updates if no pio buffers available. 1481 */ 1482static noinline void no_pio_bufs(struct ipath_devdata *dd) 1483{ 1484 unsigned long *shadow = dd->ipath_pioavailshadow; 1485 __le64 *dma = (__le64 *)dd->ipath_pioavailregs_dma; 1486 1487 dd->ipath_upd_pio_shadow = 1; 1488 1489 /* 1490 * not atomic, but if we lose a stat count in a while, that's OK 1491 */ 1492 ipath_stats.sps_nopiobufs++; 1493 if (!(++dd->ipath_consec_nopiobuf % 100000)) { 1494 ipath_force_pio_avail_update(dd); /* at start */ 1495 ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: " 1496 "%llx %llx %llx %llx\n" 1497 "ipath shadow: %lx %lx %lx %lx\n", 1498 dd->ipath_consec_nopiobuf, 1499 (unsigned long)get_cycles(), 1500 (unsigned long long) le64_to_cpu(dma[0]), 1501 (unsigned long long) le64_to_cpu(dma[1]), 1502 (unsigned long long) le64_to_cpu(dma[2]), 1503 (unsigned long long) le64_to_cpu(dma[3]), 1504 shadow[0], shadow[1], shadow[2], shadow[3]); 1505 /* 1506 * 4 buffers per byte, 4 registers above, cover rest 1507 * below 1508 */ 1509 if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 1510 (sizeof(shadow[0]) * 4 * 4)) 1511 ipath_dbg("2nd group: dmacopy: " 1512 "%llx %llx %llx %llx\n" 1513 "ipath shadow: %lx %lx %lx %lx\n", 1514 (unsigned long long)le64_to_cpu(dma[4]), 1515 (unsigned long long)le64_to_cpu(dma[5]), 1516 (unsigned long long)le64_to_cpu(dma[6]), 1517 (unsigned long long)le64_to_cpu(dma[7]), 1518 shadow[4], shadow[5], shadow[6], shadow[7]); 1519 1520 /* at end, so update likely happened */ 1521 ipath_reset_availshadow(dd); 1522 } 1523} 1524 1525/* 1526 * common code for normal driver pio buffer allocation, and reserved 1527 * allocation. 1528 * 1529 * do appropriate marking as busy, etc. 1530 * returns buffer number if one found (>=0), negative number is error. 1531 */ 1532static u32 __iomem *ipath_getpiobuf_range(struct ipath_devdata *dd, 1533 u32 *pbufnum, u32 first, u32 last, u32 firsti) 1534{ 1535 int i, j, updated = 0; 1536 unsigned piobcnt; 1537 unsigned long flags; 1538 unsigned long *shadow = dd->ipath_pioavailshadow; 1539 u32 __iomem *buf; 1540 1541 piobcnt = last - first; 1542 if (dd->ipath_upd_pio_shadow) { 1543 /* 1544 * Minor optimization. If we had no buffers on last call, 1545 * start out by doing the update; continue and do scan even 1546 * if no buffers were updated, to be paranoid 1547 */ 1548 ipath_update_pio_bufs(dd); 1549 updated++; 1550 i = first; 1551 } else 1552 i = firsti; 1553rescan: 1554 /* 1555 * while test_and_set_bit() is atomic, we do that and then the 1556 * change_bit(), and the pair is not. See if this is the cause 1557 * of the remaining armlaunch errors. 1558 */ 1559 spin_lock_irqsave(&ipath_pioavail_lock, flags); 1560 for (j = 0; j < piobcnt; j++, i++) { 1561 if (i >= last) 1562 i = first; 1563 if (__test_and_set_bit((2 * i) + 1, shadow)) 1564 continue; 1565 /* flip generation bit */ 1566 __change_bit(2 * i, shadow); 1567 break; 1568 } 1569 spin_unlock_irqrestore(&ipath_pioavail_lock, flags); 1570 1571 if (j == piobcnt) { 1572 if (!updated) { 1573 /* 1574 * first time through; shadow exhausted, but may be 1575 * buffers available, try an update and then rescan. 1576 */ 1577 ipath_update_pio_bufs(dd); 1578 updated++; 1579 i = first; 1580 goto rescan; 1581 } else if (updated == 1 && piobcnt <= 1582 ((dd->ipath_sendctrl 1583 >> INFINIPATH_S_UPDTHRESH_SHIFT) & 1584 INFINIPATH_S_UPDTHRESH_MASK)) { 1585 /* 1586 * for chips supporting and using the update 1587 * threshold we need to force an update of the 1588 * in-memory copy if the count is less than the 1589 * thershold, then check one more time. 1590 */ 1591 ipath_force_pio_avail_update(dd); 1592 ipath_update_pio_bufs(dd); 1593 updated++; 1594 i = first; 1595 goto rescan; 1596 } 1597 1598 no_pio_bufs(dd); 1599 buf = NULL; 1600 } else { 1601 if (i < dd->ipath_piobcnt2k) 1602 buf = (u32 __iomem *) (dd->ipath_pio2kbase + 1603 i * dd->ipath_palign); 1604 else 1605 buf = (u32 __iomem *) 1606 (dd->ipath_pio4kbase + 1607 (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign); 1608 if (pbufnum) 1609 *pbufnum = i; 1610 } 1611 1612 return buf; 1613} 1614 1615/** 1616 * ipath_getpiobuf - find an available pio buffer 1617 * @dd: the infinipath device 1618 * @plen: the size of the PIO buffer needed in 32-bit words 1619 * @pbufnum: the buffer number is placed here 1620 */ 1621u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 plen, u32 *pbufnum) 1622{ 1623 u32 __iomem *buf; 1624 u32 pnum, nbufs; 1625 u32 first, lasti; 1626 1627 if (plen + 1 >= IPATH_SMALLBUF_DWORDS) { 1628 first = dd->ipath_piobcnt2k; 1629 lasti = dd->ipath_lastpioindexl; 1630 } else { 1631 first = 0; 1632 lasti = dd->ipath_lastpioindex; 1633 } 1634 nbufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; 1635 buf = ipath_getpiobuf_range(dd, &pnum, first, nbufs, lasti); 1636 1637 if (buf) { 1638 /* 1639 * Set next starting place. It's just an optimization, 1640 * it doesn't matter who wins on this, so no locking 1641 */ 1642 if (plen + 1 >= IPATH_SMALLBUF_DWORDS) 1643 dd->ipath_lastpioindexl = pnum + 1; 1644 else 1645 dd->ipath_lastpioindex = pnum + 1; 1646 if (dd->ipath_upd_pio_shadow) 1647 dd->ipath_upd_pio_shadow = 0; 1648 if (dd->ipath_consec_nopiobuf) 1649 dd->ipath_consec_nopiobuf = 0; 1650 ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n", 1651 pnum, (pnum < dd->ipath_piobcnt2k) ? 2 : 4, buf); 1652 if (pbufnum) 1653 *pbufnum = pnum; 1654 1655 } 1656 return buf; 1657} 1658 1659/** 1660 * ipath_chg_pioavailkernel - change which send buffers are available for kernel 1661 * @dd: the infinipath device 1662 * @start: the starting send buffer number 1663 * @len: the number of send buffers 1664 * @avail: true if the buffers are available for kernel use, false otherwise 1665 */ 1666void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start, 1667 unsigned len, int avail) 1668{ 1669 unsigned long flags; 1670 unsigned end, cnt = 0; 1671 1672 /* There are two bits per send buffer (busy and generation) */ 1673 start *= 2; 1674 end = start + len * 2; 1675 1676 spin_lock_irqsave(&ipath_pioavail_lock, flags); 1677 /* Set or clear the busy bit in the shadow. */ 1678 while (start < end) { 1679 if (avail) { 1680 unsigned long dma; 1681 int i, im; 1682 /* 1683 * the BUSY bit will never be set, because we disarm 1684 * the user buffers before we hand them back to the 1685 * kernel. We do have to make sure the generation 1686 * bit is set correctly in shadow, since it could 1687 * have changed many times while allocated to user. 1688 * We can't use the bitmap functions on the full 1689 * dma array because it is always little-endian, so 1690 * we have to flip to host-order first. 1691 * BITS_PER_LONG is slightly wrong, since it's 1692 * always 64 bits per register in chip... 1693 * We only work on 64 bit kernels, so that's OK. 1694 */ 1695 /* deal with 6110 chip bug on high register #s */ 1696 i = start / BITS_PER_LONG; 1697 im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ? 1698 i ^ 1 : i; 1699 __clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT 1700 + start, dd->ipath_pioavailshadow); 1701 dma = (unsigned long) le64_to_cpu( 1702 dd->ipath_pioavailregs_dma[im]); 1703 if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 1704 + start) % BITS_PER_LONG, &dma)) 1705 __set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 1706 + start, dd->ipath_pioavailshadow); 1707 else 1708 __clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 1709 + start, dd->ipath_pioavailshadow); 1710 __set_bit(start, dd->ipath_pioavailkernel); 1711 } else { 1712 __set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT, 1713 dd->ipath_pioavailshadow); 1714 __clear_bit(start, dd->ipath_pioavailkernel); 1715 } 1716 start += 2; 1717 } 1718 1719 if (dd->ipath_pioupd_thresh) { 1720 end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); 1721 cnt = bitmap_weight(dd->ipath_pioavailkernel, end); 1722 } 1723 spin_unlock_irqrestore(&ipath_pioavail_lock, flags); 1724 1725 /* 1726 * When moving buffers from kernel to user, if number assigned to 1727 * the user is less than the pio update threshold, and threshold 1728 * is supported (cnt was computed > 0), drop the update threshold 1729 * so we update at least once per allocated number of buffers. 1730 * In any case, if the kernel buffers are less than the threshold, 1731 * drop the threshold. We don't bother increasing it, having once 1732 * decreased it, since it would typically just cycle back and forth. 1733 * If we don't decrease below buffers in use, we can wait a long 1734 * time for an update, until some other context uses PIO buffers. 1735 */ 1736 if (!avail && len < cnt) 1737 cnt = len; 1738 if (cnt < dd->ipath_pioupd_thresh) { 1739 dd->ipath_pioupd_thresh = cnt; 1740 ipath_dbg("Decreased pio update threshold to %u\n", 1741 dd->ipath_pioupd_thresh); 1742 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 1743 dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK 1744 << INFINIPATH_S_UPDTHRESH_SHIFT); 1745 dd->ipath_sendctrl |= dd->ipath_pioupd_thresh 1746 << INFINIPATH_S_UPDTHRESH_SHIFT; 1747 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 1748 dd->ipath_sendctrl); 1749 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 1750 } 1751} 1752 1753/** 1754 * ipath_create_rcvhdrq - create a receive header queue 1755 * @dd: the infinipath device 1756 * @pd: the port data 1757 * 1758 * this must be contiguous memory (from an i/o perspective), and must be 1759 * DMA'able (which means for some systems, it will go through an IOMMU, 1760 * or be forced into a low address range). 1761 */ 1762int ipath_create_rcvhdrq(struct ipath_devdata *dd, 1763 struct ipath_portdata *pd) 1764{ 1765 int ret = 0; 1766 1767 if (!pd->port_rcvhdrq) { 1768 dma_addr_t phys_hdrqtail; 1769 gfp_t gfp_flags = GFP_USER | __GFP_COMP; 1770 int amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * 1771 sizeof(u32), PAGE_SIZE); 1772 1773 pd->port_rcvhdrq = dma_alloc_coherent( 1774 &dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys, 1775 gfp_flags); 1776 1777 if (!pd->port_rcvhdrq) { 1778 ipath_dev_err(dd, "attempt to allocate %d bytes " 1779 "for port %u rcvhdrq failed\n", 1780 amt, pd->port_port); 1781 ret = -ENOMEM; 1782 goto bail; 1783 } 1784 1785 if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) { 1786 pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent( 1787 &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, 1788 GFP_KERNEL); 1789 if (!pd->port_rcvhdrtail_kvaddr) { 1790 ipath_dev_err(dd, "attempt to allocate 1 page " 1791 "for port %u rcvhdrqtailaddr " 1792 "failed\n", pd->port_port); 1793 ret = -ENOMEM; 1794 dma_free_coherent(&dd->pcidev->dev, amt, 1795 pd->port_rcvhdrq, 1796 pd->port_rcvhdrq_phys); 1797 pd->port_rcvhdrq = NULL; 1798 goto bail; 1799 } 1800 pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail; 1801 ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx " 1802 "physical\n", pd->port_port, 1803 (unsigned long long) phys_hdrqtail); 1804 } 1805 1806 pd->port_rcvhdrq_size = amt; 1807 1808 ipath_cdbg(VERBOSE, "%d pages at %p (phys %lx) size=%lu " 1809 "for port %u rcvhdr Q\n", 1810 amt >> PAGE_SHIFT, pd->port_rcvhdrq, 1811 (unsigned long) pd->port_rcvhdrq_phys, 1812 (unsigned long) pd->port_rcvhdrq_size, 1813 pd->port_port); 1814 } 1815 else 1816 ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; " 1817 "hdrtailaddr@%p %llx physical\n", 1818 pd->port_port, pd->port_rcvhdrq, 1819 (unsigned long long) pd->port_rcvhdrq_phys, 1820 pd->port_rcvhdrtail_kvaddr, (unsigned long long) 1821 pd->port_rcvhdrqtailaddr_phys); 1822 1823 /* clear for security and sanity on each use */ 1824 memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size); 1825 if (pd->port_rcvhdrtail_kvaddr) 1826 memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE); 1827 1828 /* 1829 * tell chip each time we init it, even if we are re-using previous 1830 * memory (we zero the register at process close) 1831 */ 1832 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr, 1833 pd->port_port, pd->port_rcvhdrqtailaddr_phys); 1834 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr, 1835 pd->port_port, pd->port_rcvhdrq_phys); 1836 1837bail: 1838 return ret; 1839} 1840 1841 1842/* 1843 * Flush all sends that might be in the ready to send state, as well as any 1844 * that are in the process of being sent. Used whenever we need to be 1845 * sure the send side is idle. Cleans up all buffer state by canceling 1846 * all pio buffers, and issuing an abort, which cleans up anything in the 1847 * launch fifo. The cancel is superfluous on some chip versions, but 1848 * it's safer to always do it. 1849 * PIOAvail bits are updated by the chip as if normal send had happened. 1850 */ 1851void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl) 1852{ 1853 unsigned long flags; 1854 1855 if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) { 1856 ipath_cdbg(VERBOSE, "Ignore while in autonegotiation\n"); 1857 goto bail; 1858 } 1859 /* 1860 * If we have SDMA, and it's not disabled, we have to kick off the 1861 * abort state machine, provided we aren't already aborting. 1862 * If we are in the process of aborting SDMA (!DISABLED, but ABORTING), 1863 * we skip the rest of this routine. It is already "in progress" 1864 */ 1865 if (dd->ipath_flags & IPATH_HAS_SEND_DMA) { 1866 int skip_cancel; 1867 unsigned long *statp = &dd->ipath_sdma_status; 1868 1869 spin_lock_irqsave(&dd->ipath_sdma_lock, flags); 1870 skip_cancel = 1871 test_and_set_bit(IPATH_SDMA_ABORTING, statp) 1872 && !test_bit(IPATH_SDMA_DISABLED, statp); 1873 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); 1874 if (skip_cancel) 1875 goto bail; 1876 } 1877 1878 ipath_dbg("Cancelling all in-progress send buffers\n"); 1879 1880 /* skip armlaunch errs for a while */ 1881 dd->ipath_lastcancel = jiffies + HZ / 2; 1882 1883 /* 1884 * The abort bit is auto-clearing. We also don't want pioavail 1885 * update happening during this, and we don't want any other 1886 * sends going out, so turn those off for the duration. We read 1887 * the scratch register to be sure that cancels and the abort 1888 * have taken effect in the chip. Otherwise two parts are same 1889 * as ipath_force_pio_avail_update() 1890 */ 1891 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 1892 dd->ipath_sendctrl &= ~(INFINIPATH_S_PIOBUFAVAILUPD 1893 | INFINIPATH_S_PIOENABLE); 1894 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 1895 dd->ipath_sendctrl | INFINIPATH_S_ABORT); 1896 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 1897 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 1898 1899 /* disarm all send buffers */ 1900 ipath_disarm_piobufs(dd, 0, 1901 dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); 1902 1903 if (dd->ipath_flags & IPATH_HAS_SEND_DMA) 1904 set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status); 1905 1906 if (restore_sendctrl) { 1907 /* else done by caller later if needed */ 1908 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 1909 dd->ipath_sendctrl |= INFINIPATH_S_PIOBUFAVAILUPD | 1910 INFINIPATH_S_PIOENABLE; 1911 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 1912 dd->ipath_sendctrl); 1913 /* and again, be sure all have hit the chip */ 1914 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 1915 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 1916 } 1917 1918 if ((dd->ipath_flags & IPATH_HAS_SEND_DMA) && 1919 !test_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status) && 1920 test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)) { 1921 spin_lock_irqsave(&dd->ipath_sdma_lock, flags); 1922 /* only wait so long for intr */ 1923 dd->ipath_sdma_abort_intr_timeout = jiffies + HZ; 1924 dd->ipath_sdma_reset_wait = 200; 1925 if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) 1926 tasklet_hi_schedule(&dd->ipath_sdma_abort_task); 1927 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); 1928 } 1929bail:; 1930} 1931 1932/* 1933 * Force an update of in-memory copy of the pioavail registers, when 1934 * needed for any of a variety of reasons. We read the scratch register 1935 * to make it highly likely that the update will have happened by the 1936 * time we return. If already off (as in cancel_sends above), this 1937 * routine is a nop, on the assumption that the caller will "do the 1938 * right thing". 1939 */ 1940void ipath_force_pio_avail_update(struct ipath_devdata *dd) 1941{ 1942 unsigned long flags; 1943 1944 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 1945 if (dd->ipath_sendctrl & INFINIPATH_S_PIOBUFAVAILUPD) { 1946 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 1947 dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD); 1948 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 1949 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 1950 dd->ipath_sendctrl); 1951 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 1952 } 1953 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 1954} 1955 1956static void ipath_set_ib_lstate(struct ipath_devdata *dd, int linkcmd, 1957 int linitcmd) 1958{ 1959 u64 mod_wd; 1960 static const char *what[4] = { 1961 [0] = "NOP", 1962 [INFINIPATH_IBCC_LINKCMD_DOWN] = "DOWN", 1963 [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED", 1964 [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE" 1965 }; 1966 1967 if (linitcmd == INFINIPATH_IBCC_LINKINITCMD_DISABLE) { 1968 /* 1969 * If we are told to disable, note that so link-recovery 1970 * code does not attempt to bring us back up. 1971 */ 1972 preempt_disable(); 1973 dd->ipath_flags |= IPATH_IB_LINK_DISABLED; 1974 preempt_enable(); 1975 } else if (linitcmd) { 1976 /* 1977 * Any other linkinitcmd will lead to LINKDOWN and then 1978 * to INIT (if all is well), so clear flag to let 1979 * link-recovery code attempt to bring us back up. 1980 */ 1981 preempt_disable(); 1982 dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED; 1983 preempt_enable(); 1984 } 1985 1986 mod_wd = (linkcmd << dd->ibcc_lc_shift) | 1987 (linitcmd << INFINIPATH_IBCC_LINKINITCMD_SHIFT); 1988 ipath_cdbg(VERBOSE, 1989 "Moving unit %u to %s (initcmd=0x%x), current ltstate is %s\n", 1990 dd->ipath_unit, what[linkcmd], linitcmd, 1991 ipath_ibcstatus_str[ipath_ib_linktrstate(dd, 1992 ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus))]); 1993 1994 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, 1995 dd->ipath_ibcctrl | mod_wd); 1996 /* read from chip so write is flushed */ 1997 (void) ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); 1998} 1999 2000int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate) 2001{ 2002 u32 lstate; 2003 int ret; 2004 2005 switch (newstate) { 2006 case IPATH_IB_LINKDOWN_ONLY: 2007 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 0); 2008 /* don't wait */ 2009 ret = 0; 2010 goto bail; 2011 2012 case IPATH_IB_LINKDOWN: 2013 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 2014 INFINIPATH_IBCC_LINKINITCMD_POLL); 2015 /* don't wait */ 2016 ret = 0; 2017 goto bail; 2018 2019 case IPATH_IB_LINKDOWN_SLEEP: 2020 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 2021 INFINIPATH_IBCC_LINKINITCMD_SLEEP); 2022 /* don't wait */ 2023 ret = 0; 2024 goto bail; 2025 2026 case IPATH_IB_LINKDOWN_DISABLE: 2027 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 2028 INFINIPATH_IBCC_LINKINITCMD_DISABLE); 2029 /* don't wait */ 2030 ret = 0; 2031 goto bail; 2032 2033 case IPATH_IB_LINKARM: 2034 if (dd->ipath_flags & IPATH_LINKARMED) { 2035 ret = 0; 2036 goto bail; 2037 } 2038 if (!(dd->ipath_flags & 2039 (IPATH_LINKINIT | IPATH_LINKACTIVE))) { 2040 ret = -EINVAL; 2041 goto bail; 2042 } 2043 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED, 0); 2044 2045 /* 2046 * Since the port can transition to ACTIVE by receiving 2047 * a non VL 15 packet, wait for either state. 2048 */ 2049 lstate = IPATH_LINKARMED | IPATH_LINKACTIVE; 2050 break; 2051 2052 case IPATH_IB_LINKACTIVE: 2053 if (dd->ipath_flags & IPATH_LINKACTIVE) { 2054 ret = 0; 2055 goto bail; 2056 } 2057 if (!(dd->ipath_flags & IPATH_LINKARMED)) { 2058 ret = -EINVAL; 2059 goto bail; 2060 } 2061 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE, 0); 2062 lstate = IPATH_LINKACTIVE; 2063 break; 2064 2065 case IPATH_IB_LINK_LOOPBACK: 2066 dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n"); 2067 dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK; 2068 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, 2069 dd->ipath_ibcctrl); 2070 2071 /* turn heartbeat off, as it causes loopback to fail */ 2072 dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, 2073 IPATH_IB_HRTBT_OFF); 2074 /* don't wait */ 2075 ret = 0; 2076 goto bail; 2077 2078 case IPATH_IB_LINK_EXTERNAL: 2079 dev_info(&dd->pcidev->dev, 2080 "Disabling IB local loopback (normal)\n"); 2081 dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, 2082 IPATH_IB_HRTBT_ON); 2083 dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK; 2084 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, 2085 dd->ipath_ibcctrl); 2086 /* don't wait */ 2087 ret = 0; 2088 goto bail; 2089 2090 /* 2091 * Heartbeat can be explicitly enabled by the user via 2092 * "hrtbt_enable" "file", and if disabled, trying to enable here 2093 * will have no effect. Implicit changes (heartbeat off when 2094 * loopback on, and vice versa) are included to ease testing. 2095 */ 2096 case IPATH_IB_LINK_HRTBT: 2097 ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, 2098 IPATH_IB_HRTBT_ON); 2099 goto bail; 2100 2101 case IPATH_IB_LINK_NO_HRTBT: 2102 ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, 2103 IPATH_IB_HRTBT_OFF); 2104 goto bail; 2105 2106 default: 2107 ipath_dbg("Invalid linkstate 0x%x requested\n", newstate); 2108 ret = -EINVAL; 2109 goto bail; 2110 } 2111 ret = ipath_wait_linkstate(dd, lstate, 2000); 2112 2113bail: 2114 return ret; 2115} 2116 2117/** 2118 * ipath_set_mtu - set the MTU 2119 * @dd: the infinipath device 2120 * @arg: the new MTU 2121 * 2122 * we can handle "any" incoming size, the issue here is whether we 2123 * need to restrict our outgoing size. For now, we don't do any 2124 * sanity checking on this, and we don't deal with what happens to 2125 * programs that are already running when the size changes. 2126 * NOTE: changing the MTU will usually cause the IBC to go back to 2127 * link INIT state... 2128 */ 2129int ipath_set_mtu(struct ipath_devdata *dd, u16 arg) 2130{ 2131 u32 piosize; 2132 int changed = 0; 2133 int ret; 2134 2135 /* 2136 * mtu is IB data payload max. It's the largest power of 2 less 2137 * than piosize (or even larger, since it only really controls the 2138 * largest we can receive; we can send the max of the mtu and 2139 * piosize). We check that it's one of the valid IB sizes. 2140 */ 2141 if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 && 2142 (arg != 4096 || !ipath_mtu4096)) { 2143 ipath_dbg("Trying to set invalid mtu %u, failing\n", arg); 2144 ret = -EINVAL; 2145 goto bail; 2146 } 2147 if (dd->ipath_ibmtu == arg) { 2148 ret = 0; /* same as current */ 2149 goto bail; 2150 } 2151 2152 piosize = dd->ipath_ibmaxlen; 2153 dd->ipath_ibmtu = arg; 2154 2155 if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) { 2156 /* Only if it's not the initial value (or reset to it) */ 2157 if (piosize != dd->ipath_init_ibmaxlen) { 2158 if (arg > piosize && arg <= dd->ipath_init_ibmaxlen) 2159 piosize = dd->ipath_init_ibmaxlen; 2160 dd->ipath_ibmaxlen = piosize; 2161 changed = 1; 2162 } 2163 } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) { 2164 piosize = arg + IPATH_PIO_MAXIBHDR; 2165 ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x " 2166 "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize, 2167 arg); 2168 dd->ipath_ibmaxlen = piosize; 2169 changed = 1; 2170 } 2171 2172 if (changed) { 2173 u64 ibc = dd->ipath_ibcctrl, ibdw; 2174 /* 2175 * update our housekeeping variables, and set IBC max 2176 * size, same as init code; max IBC is max we allow in 2177 * buffer, less the qword pbc, plus 1 for ICRC, in dwords 2178 */ 2179 dd->ipath_ibmaxlen = piosize - 2 * sizeof(u32); 2180 ibdw = (dd->ipath_ibmaxlen >> 2) + 1; 2181 ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK << 2182 dd->ibcc_mpl_shift); 2183 ibc |= ibdw << dd->ibcc_mpl_shift; 2184 dd->ipath_ibcctrl = ibc; 2185 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, 2186 dd->ipath_ibcctrl); 2187 dd->ipath_f_tidtemplate(dd); 2188 } 2189 2190 ret = 0; 2191 2192bail: 2193 return ret; 2194} 2195 2196int ipath_set_lid(struct ipath_devdata *dd, u32 lid, u8 lmc) 2197{ 2198 dd->ipath_lid = lid; 2199 dd->ipath_lmc = lmc; 2200 2201 dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LIDLMC, lid | 2202 (~((1U << lmc) - 1)) << 16); 2203 2204 dev_info(&dd->pcidev->dev, "We got a lid: 0x%x\n", lid); 2205 2206 return 0; 2207} 2208 2209 2210/** 2211 * ipath_write_kreg_port - write a device's per-port 64-bit kernel register 2212 * @dd: the infinipath device 2213 * @regno: the register number to write 2214 * @port: the port containing the register 2215 * @value: the value to write 2216 * 2217 * Registers that vary with the chip implementation constants (port) 2218 * use this routine. 2219 */ 2220void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno, 2221 unsigned port, u64 value) 2222{ 2223 u16 where; 2224 2225 if (port < dd->ipath_portcnt && 2226 (regno == dd->ipath_kregs->kr_rcvhdraddr || 2227 regno == dd->ipath_kregs->kr_rcvhdrtailaddr)) 2228 where = regno + port; 2229 else 2230 where = -1; 2231 2232 ipath_write_kreg(dd, where, value); 2233} 2234 2235/* 2236 * Following deal with the "obviously simple" task of overriding the state 2237 * of the LEDS, which normally indicate link physical and logical status. 2238 * The complications arise in dealing with different hardware mappings 2239 * and the board-dependent routine being called from interrupts. 2240 * and then there's the requirement to _flash_ them. 2241 */ 2242#define LED_OVER_FREQ_SHIFT 8 2243#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT) 2244/* Below is "non-zero" to force override, but both actual LEDs are off */ 2245#define LED_OVER_BOTH_OFF (8) 2246 2247static void ipath_run_led_override(unsigned long opaque) 2248{ 2249 struct ipath_devdata *dd = (struct ipath_devdata *)opaque; 2250 int timeoff; 2251 int pidx; 2252 u64 lstate, ltstate, val; 2253 2254 if (!(dd->ipath_flags & IPATH_INITTED)) 2255 return; 2256 2257 pidx = dd->ipath_led_override_phase++ & 1; 2258 dd->ipath_led_override = dd->ipath_led_override_vals[pidx]; 2259 timeoff = dd->ipath_led_override_timeoff; 2260 2261 /* 2262 * below potentially restores the LED values per current status, 2263 * should also possibly setup the traffic-blink register, 2264 * but leave that to per-chip functions. 2265 */ 2266 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); 2267 ltstate = ipath_ib_linktrstate(dd, val); 2268 lstate = ipath_ib_linkstate(dd, val); 2269 2270 dd->ipath_f_setextled(dd, lstate, ltstate); 2271 mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff); 2272} 2273 2274void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val) 2275{ 2276 int timeoff, freq; 2277 2278 if (!(dd->ipath_flags & IPATH_INITTED)) 2279 return; 2280 2281 /* First check if we are blinking. If not, use 1HZ polling */ 2282 timeoff = HZ; 2283 freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT; 2284 2285 if (freq) { 2286 /* For blink, set each phase from one nybble of val */ 2287 dd->ipath_led_override_vals[0] = val & 0xF; 2288 dd->ipath_led_override_vals[1] = (val >> 4) & 0xF; 2289 timeoff = (HZ << 4)/freq; 2290 } else { 2291 /* Non-blink set both phases the same. */ 2292 dd->ipath_led_override_vals[0] = val & 0xF; 2293 dd->ipath_led_override_vals[1] = val & 0xF; 2294 } 2295 dd->ipath_led_override_timeoff = timeoff; 2296 2297 /* 2298 * If the timer has not already been started, do so. Use a "quick" 2299 * timeout so the function will be called soon, to look at our request. 2300 */ 2301 if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) { 2302 /* Need to start timer */ 2303 init_timer(&dd->ipath_led_override_timer); 2304 dd->ipath_led_override_timer.function = 2305 ipath_run_led_override; 2306 dd->ipath_led_override_timer.data = (unsigned long) dd; 2307 dd->ipath_led_override_timer.expires = jiffies + 1; 2308 add_timer(&dd->ipath_led_override_timer); 2309 } else 2310 atomic_dec(&dd->ipath_led_override_timer_active); 2311} 2312 2313/** 2314 * ipath_shutdown_device - shut down a device 2315 * @dd: the infinipath device 2316 * 2317 * This is called to make the device quiet when we are about to 2318 * unload the driver, and also when the device is administratively 2319 * disabled. It does not free any data structures. 2320 * Everything it does has to be setup again by ipath_init_chip(dd,1) 2321 */ 2322void ipath_shutdown_device(struct ipath_devdata *dd) 2323{ 2324 unsigned long flags; 2325 2326 ipath_dbg("Shutting down the device\n"); 2327 2328 ipath_hol_up(dd); /* make sure user processes aren't suspended */ 2329 2330 dd->ipath_flags |= IPATH_LINKUNK; 2331 dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN | 2332 IPATH_LINKINIT | IPATH_LINKARMED | 2333 IPATH_LINKACTIVE); 2334 *dd->ipath_statusp &= ~(IPATH_STATUS_IB_CONF | 2335 IPATH_STATUS_IB_READY); 2336 2337 /* mask interrupts, but not errors */ 2338 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); 2339 2340 dd->ipath_rcvctrl = 0; 2341 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 2342 dd->ipath_rcvctrl); 2343 2344 if (dd->ipath_flags & IPATH_HAS_SEND_DMA) 2345 teardown_sdma(dd); 2346 2347 /* 2348 * gracefully stop all sends allowing any in progress to trickle out 2349 * first. 2350 */ 2351 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 2352 dd->ipath_sendctrl = 0; 2353 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); 2354 /* flush it */ 2355 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 2356 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 2357 2358 /* 2359 * enough for anything that's going to trickle out to have actually 2360 * done so. 2361 */ 2362 udelay(5); 2363 2364 dd->ipath_f_setextled(dd, 0, 0); /* make sure LEDs are off */ 2365 2366 ipath_set_ib_lstate(dd, 0, INFINIPATH_IBCC_LINKINITCMD_DISABLE); 2367 ipath_cancel_sends(dd, 0); 2368 2369 /* 2370 * we are shutting down, so tell components that care. We don't do 2371 * this on just a link state change, much like ethernet, a cable 2372 * unplug, etc. doesn't change driver state 2373 */ 2374 signal_ib_event(dd, IB_EVENT_PORT_ERR); 2375 2376 /* disable IBC */ 2377 dd->ipath_control &= ~INFINIPATH_C_LINKENABLE; 2378 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 2379 dd->ipath_control | INFINIPATH_C_FREEZEMODE); 2380 2381 /* 2382 * clear SerdesEnable and turn the leds off; do this here because 2383 * we are unloading, so don't count on interrupts to move along 2384 * Turn the LEDs off explicitly for the same reason. 2385 */ 2386 dd->ipath_f_quiet_serdes(dd); 2387 2388 /* stop all the timers that might still be running */ 2389 del_timer_sync(&dd->ipath_hol_timer); 2390 if (dd->ipath_stats_timer_active) { 2391 del_timer_sync(&dd->ipath_stats_timer); 2392 dd->ipath_stats_timer_active = 0; 2393 } 2394 if (dd->ipath_intrchk_timer.data) { 2395 del_timer_sync(&dd->ipath_intrchk_timer); 2396 dd->ipath_intrchk_timer.data = 0; 2397 } 2398 if (atomic_read(&dd->ipath_led_override_timer_active)) { 2399 del_timer_sync(&dd->ipath_led_override_timer); 2400 atomic_set(&dd->ipath_led_override_timer_active, 0); 2401 } 2402 2403 /* 2404 * clear all interrupts and errors, so that the next time the driver 2405 * is loaded or device is enabled, we know that whatever is set 2406 * happened while we were unloaded 2407 */ 2408 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 2409 ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED); 2410 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL); 2411 ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL); 2412 2413 ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n"); 2414 ipath_update_eeprom_log(dd); 2415} 2416 2417/** 2418 * ipath_free_pddata - free a port's allocated data 2419 * @dd: the infinipath device 2420 * @pd: the portdata structure 2421 * 2422 * free up any allocated data for a port 2423 * This should not touch anything that would affect a simultaneous 2424 * re-allocation of port data, because it is called after ipath_mutex 2425 * is released (and can be called from reinit as well). 2426 * It should never change any chip state, or global driver state. 2427 * (The only exception to global state is freeing the port0 port0_skbs.) 2428 */ 2429void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd) 2430{ 2431 if (!pd) 2432 return; 2433 2434 if (pd->port_rcvhdrq) { 2435 ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p " 2436 "(size=%lu)\n", pd->port_port, pd->port_rcvhdrq, 2437 (unsigned long) pd->port_rcvhdrq_size); 2438 dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size, 2439 pd->port_rcvhdrq, pd->port_rcvhdrq_phys); 2440 pd->port_rcvhdrq = NULL; 2441 if (pd->port_rcvhdrtail_kvaddr) { 2442 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, 2443 pd->port_rcvhdrtail_kvaddr, 2444 pd->port_rcvhdrqtailaddr_phys); 2445 pd->port_rcvhdrtail_kvaddr = NULL; 2446 } 2447 } 2448 if (pd->port_port && pd->port_rcvegrbuf) { 2449 unsigned e; 2450 2451 for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) { 2452 void *base = pd->port_rcvegrbuf[e]; 2453 size_t size = pd->port_rcvegrbuf_size; 2454 2455 ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), " 2456 "chunk %u/%u\n", base, 2457 (unsigned long) size, 2458 e, pd->port_rcvegrbuf_chunks); 2459 dma_free_coherent(&dd->pcidev->dev, size, 2460 base, pd->port_rcvegrbuf_phys[e]); 2461 } 2462 kfree(pd->port_rcvegrbuf); 2463 pd->port_rcvegrbuf = NULL; 2464 kfree(pd->port_rcvegrbuf_phys); 2465 pd->port_rcvegrbuf_phys = NULL; 2466 pd->port_rcvegrbuf_chunks = 0; 2467 } else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) { 2468 unsigned e; 2469 struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo; 2470 2471 dd->ipath_port0_skbinfo = NULL; 2472 ipath_cdbg(VERBOSE, "free closed port %d " 2473 "ipath_port0_skbinfo @ %p\n", pd->port_port, 2474 skbinfo); 2475 for (e = 0; e < dd->ipath_p0_rcvegrcnt; e++) 2476 if (skbinfo[e].skb) { 2477 pci_unmap_single(dd->pcidev, skbinfo[e].phys, 2478 dd->ipath_ibmaxlen, 2479 PCI_DMA_FROMDEVICE); 2480 dev_kfree_skb(skbinfo[e].skb); 2481 } 2482 vfree(skbinfo); 2483 } 2484 kfree(pd->port_tid_pg_list); 2485 vfree(pd->subport_uregbase); 2486 vfree(pd->subport_rcvegrbuf); 2487 vfree(pd->subport_rcvhdr_base); 2488 kfree(pd); 2489} 2490 2491static int __init infinipath_init(void) 2492{ 2493 int ret; 2494 2495 if (ipath_debug & __IPATH_DBG) 2496 printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version); 2497 2498 /* 2499 * These must be called before the driver is registered with 2500 * the PCI subsystem. 2501 */ 2502 idr_init(&unit_table); 2503 2504 ret = pci_register_driver(&ipath_driver); 2505 if (ret < 0) { 2506 printk(KERN_ERR IPATH_DRV_NAME 2507 ": Unable to register driver: error %d\n", -ret); 2508 goto bail_unit; 2509 } 2510 2511 ret = ipath_init_ipathfs(); 2512 if (ret < 0) { 2513 printk(KERN_ERR IPATH_DRV_NAME ": Unable to create " 2514 "ipathfs: error %d\n", -ret); 2515 goto bail_pci; 2516 } 2517 2518 goto bail; 2519 2520bail_pci: 2521 pci_unregister_driver(&ipath_driver); 2522 2523bail_unit: 2524 idr_destroy(&unit_table); 2525 2526bail: 2527 return ret; 2528} 2529 2530static void __exit infinipath_cleanup(void) 2531{ 2532 ipath_exit_ipathfs(); 2533 2534 ipath_cdbg(VERBOSE, "Unregistering pci driver\n"); 2535 pci_unregister_driver(&ipath_driver); 2536 2537 idr_destroy(&unit_table); 2538} 2539 2540/** 2541 * ipath_reset_device - reset the chip if possible 2542 * @unit: the device to reset 2543 * 2544 * Whether or not reset is successful, we attempt to re-initialize the chip 2545 * (that is, much like a driver unload/reload). We clear the INITTED flag 2546 * so that the various entry points will fail until we reinitialize. For 2547 * now, we only allow this if no user ports are open that use chip resources 2548 */ 2549int ipath_reset_device(int unit) 2550{ 2551 int ret, i; 2552 struct ipath_devdata *dd = ipath_lookup(unit); 2553 unsigned long flags; 2554 2555 if (!dd) { 2556 ret = -ENODEV; 2557 goto bail; 2558 } 2559 2560 if (atomic_read(&dd->ipath_led_override_timer_active)) { 2561 /* Need to stop LED timer, _then_ shut off LEDs */ 2562 del_timer_sync(&dd->ipath_led_override_timer); 2563 atomic_set(&dd->ipath_led_override_timer_active, 0); 2564 } 2565 2566 /* Shut off LEDs after we are sure timer is not running */ 2567 dd->ipath_led_override = LED_OVER_BOTH_OFF; 2568 dd->ipath_f_setextled(dd, 0, 0); 2569 2570 dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit); 2571 2572 if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) { 2573 dev_info(&dd->pcidev->dev, "Invalid unit number %u or " 2574 "not initialized or not present\n", unit); 2575 ret = -ENXIO; 2576 goto bail; 2577 } 2578 2579 spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); 2580 if (dd->ipath_pd) 2581 for (i = 1; i < dd->ipath_cfgports; i++) { 2582 if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt) 2583 continue; 2584 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); 2585 ipath_dbg("unit %u port %d is in use " 2586 "(PID %u cmd %s), can't reset\n", 2587 unit, i, 2588 pid_nr(dd->ipath_pd[i]->port_pid), 2589 dd->ipath_pd[i]->port_comm); 2590 ret = -EBUSY; 2591 goto bail; 2592 } 2593 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); 2594 2595 if (dd->ipath_flags & IPATH_HAS_SEND_DMA) 2596 teardown_sdma(dd); 2597 2598 dd->ipath_flags &= ~IPATH_INITTED; 2599 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); 2600 ret = dd->ipath_f_reset(dd); 2601 if (ret == 1) { 2602 ipath_dbg("Reinitializing unit %u after reset attempt\n", 2603 unit); 2604 ret = ipath_init_chip(dd, 1); 2605 } else 2606 ret = -EAGAIN; 2607 if (ret) 2608 ipath_dev_err(dd, "Reinitialize unit %u after " 2609 "reset failed with %d\n", unit, ret); 2610 else 2611 dev_info(&dd->pcidev->dev, "Reinitialized unit %u after " 2612 "resetting\n", unit); 2613 2614bail: 2615 return ret; 2616} 2617 2618/* 2619 * send a signal to all the processes that have the driver open 2620 * through the normal interfaces (i.e., everything other than diags 2621 * interface). Returns number of signalled processes. 2622 */ 2623static int ipath_signal_procs(struct ipath_devdata *dd, int sig) 2624{ 2625 int i, sub, any = 0; 2626 struct pid *pid; 2627 unsigned long flags; 2628 2629 if (!dd->ipath_pd) 2630 return 0; 2631 2632 spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); 2633 for (i = 1; i < dd->ipath_cfgports; i++) { 2634 if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt) 2635 continue; 2636 pid = dd->ipath_pd[i]->port_pid; 2637 if (!pid) 2638 continue; 2639 2640 dev_info(&dd->pcidev->dev, "context %d in use " 2641 "(PID %u), sending signal %d\n", 2642 i, pid_nr(pid), sig); 2643 kill_pid(pid, sig, 1); 2644 any++; 2645 for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) { 2646 pid = dd->ipath_pd[i]->port_subpid[sub]; 2647 if (!pid) 2648 continue; 2649 dev_info(&dd->pcidev->dev, "sub-context " 2650 "%d:%d in use (PID %u), sending " 2651 "signal %d\n", i, sub, pid_nr(pid), sig); 2652 kill_pid(pid, sig, 1); 2653 any++; 2654 } 2655 } 2656 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); 2657 return any; 2658} 2659 2660static void ipath_hol_signal_down(struct ipath_devdata *dd) 2661{ 2662 if (ipath_signal_procs(dd, SIGSTOP)) 2663 ipath_dbg("Stopped some processes\n"); 2664 ipath_cancel_sends(dd, 1); 2665} 2666 2667 2668static void ipath_hol_signal_up(struct ipath_devdata *dd) 2669{ 2670 if (ipath_signal_procs(dd, SIGCONT)) 2671 ipath_dbg("Continued some processes\n"); 2672} 2673 2674/* 2675 * link is down, stop any users processes, and flush pending sends 2676 * to prevent HoL blocking, then start the HoL timer that 2677 * periodically continues, then stop procs, so they can detect 2678 * link down if they want, and do something about it. 2679 * Timer may already be running, so use mod_timer, not add_timer. 2680 */ 2681void ipath_hol_down(struct ipath_devdata *dd) 2682{ 2683 dd->ipath_hol_state = IPATH_HOL_DOWN; 2684 ipath_hol_signal_down(dd); 2685 dd->ipath_hol_next = IPATH_HOL_DOWNCONT; 2686 dd->ipath_hol_timer.expires = jiffies + 2687 msecs_to_jiffies(ipath_hol_timeout_ms); 2688 mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires); 2689} 2690 2691/* 2692 * link is up, continue any user processes, and ensure timer 2693 * is a nop, if running. Let timer keep running, if set; it 2694 * will nop when it sees the link is up 2695 */ 2696void ipath_hol_up(struct ipath_devdata *dd) 2697{ 2698 ipath_hol_signal_up(dd); 2699 dd->ipath_hol_state = IPATH_HOL_UP; 2700} 2701 2702/* 2703 * toggle the running/not running state of user proceses 2704 * to prevent HoL blocking on chip resources, but still allow 2705 * user processes to do link down special case handling. 2706 * Should only be called via the timer 2707 */ 2708void ipath_hol_event(unsigned long opaque) 2709{ 2710 struct ipath_devdata *dd = (struct ipath_devdata *)opaque; 2711 2712 if (dd->ipath_hol_next == IPATH_HOL_DOWNSTOP 2713 && dd->ipath_hol_state != IPATH_HOL_UP) { 2714 dd->ipath_hol_next = IPATH_HOL_DOWNCONT; 2715 ipath_dbg("Stopping processes\n"); 2716 ipath_hol_signal_down(dd); 2717 } else { /* may do "extra" if also in ipath_hol_up() */ 2718 dd->ipath_hol_next = IPATH_HOL_DOWNSTOP; 2719 ipath_dbg("Continuing processes\n"); 2720 ipath_hol_signal_up(dd); 2721 } 2722 if (dd->ipath_hol_state == IPATH_HOL_UP) 2723 ipath_dbg("link's up, don't resched timer\n"); 2724 else { 2725 dd->ipath_hol_timer.expires = jiffies + 2726 msecs_to_jiffies(ipath_hol_timeout_ms); 2727 mod_timer(&dd->ipath_hol_timer, 2728 dd->ipath_hol_timer.expires); 2729 } 2730} 2731 2732int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv) 2733{ 2734 u64 val; 2735 2736 if (new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK) 2737 return -1; 2738 if (dd->ipath_rx_pol_inv != new_pol_inv) { 2739 dd->ipath_rx_pol_inv = new_pol_inv; 2740 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); 2741 val &= ~(INFINIPATH_XGXS_RX_POL_MASK << 2742 INFINIPATH_XGXS_RX_POL_SHIFT); 2743 val |= ((u64)dd->ipath_rx_pol_inv) << 2744 INFINIPATH_XGXS_RX_POL_SHIFT; 2745 ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); 2746 } 2747 return 0; 2748} 2749 2750/* 2751 * Disable and enable the armlaunch error. Used for PIO bandwidth testing on 2752 * the 7220, which is count-based, rather than trigger-based. Safe for the 2753 * driver check, since it's at init. Not completely safe when used for 2754 * user-mode checking, since some error checking can be lost, but not 2755 * particularly risky, and only has problematic side-effects in the face of 2756 * very buggy user code. There is no reference counting, but that's also 2757 * fine, given the intended use. 2758 */ 2759void ipath_enable_armlaunch(struct ipath_devdata *dd) 2760{ 2761 dd->ipath_lasterror &= ~INFINIPATH_E_SPIOARMLAUNCH; 2762 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 2763 INFINIPATH_E_SPIOARMLAUNCH); 2764 dd->ipath_errormask |= INFINIPATH_E_SPIOARMLAUNCH; 2765 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 2766 dd->ipath_errormask); 2767} 2768 2769void ipath_disable_armlaunch(struct ipath_devdata *dd) 2770{ 2771 /* so don't re-enable if already set */ 2772 dd->ipath_maskederrs &= ~INFINIPATH_E_SPIOARMLAUNCH; 2773 dd->ipath_errormask &= ~INFINIPATH_E_SPIOARMLAUNCH; 2774 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 2775 dd->ipath_errormask); 2776} 2777 2778module_init(infinipath_init); 2779module_exit(infinipath_cleanup); 2780