1/* 2 * File: msi.c 3 * Purpose: PCI Message Signaled Interrupt (MSI) 4 * 5 * Copyright (C) 2003-2004 Intel 6 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) 7 */ 8 9#include <linux/err.h> 10#include <linux/mm.h> 11#include <linux/irq.h> 12#include <linux/interrupt.h> 13#include <linux/export.h> 14#include <linux/ioport.h> 15#include <linux/pci.h> 16#include <linux/proc_fs.h> 17#include <linux/msi.h> 18#include <linux/smp.h> 19#include <linux/errno.h> 20#include <linux/io.h> 21#include <linux/slab.h> 22#include <linux/irqdomain.h> 23 24#include "pci.h" 25 26static int pci_msi_enable = 1; 27int pci_msi_ignore_mask; 28 29#define msix_table_size(flags) ((flags & PCI_MSIX_FLAGS_QSIZE) + 1) 30 31#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN 32static struct irq_domain *pci_msi_default_domain; 33static DEFINE_MUTEX(pci_msi_domain_lock); 34 35struct irq_domain * __weak arch_get_pci_msi_domain(struct pci_dev *dev) 36{ 37 return pci_msi_default_domain; 38} 39 40static struct irq_domain *pci_msi_get_domain(struct pci_dev *dev) 41{ 42 struct irq_domain *domain = NULL; 43 44 if (dev->bus->msi) 45 domain = dev->bus->msi->domain; 46 if (!domain) 47 domain = arch_get_pci_msi_domain(dev); 48 49 return domain; 50} 51 52static int pci_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 53{ 54 struct irq_domain *domain; 55 56 domain = pci_msi_get_domain(dev); 57 if (domain) 58 return pci_msi_domain_alloc_irqs(domain, dev, nvec, type); 59 60 return arch_setup_msi_irqs(dev, nvec, type); 61} 62 63static void pci_msi_teardown_msi_irqs(struct pci_dev *dev) 64{ 65 struct irq_domain *domain; 66 67 domain = pci_msi_get_domain(dev); 68 if (domain) 69 pci_msi_domain_free_irqs(domain, dev); 70 else 71 arch_teardown_msi_irqs(dev); 72} 73#else 74#define pci_msi_setup_msi_irqs arch_setup_msi_irqs 75#define pci_msi_teardown_msi_irqs arch_teardown_msi_irqs 76#endif 77 78/* Arch hooks */ 79 80struct msi_controller * __weak pcibios_msi_controller(struct pci_dev *dev) 81{ 82 return NULL; 83} 84 85static struct msi_controller *pci_msi_controller(struct pci_dev *dev) 86{ 87 struct msi_controller *msi_ctrl = dev->bus->msi; 88 89 if (msi_ctrl) 90 return msi_ctrl; 91 92 return pcibios_msi_controller(dev); 93} 94 95int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) 96{ 97 struct msi_controller *chip = pci_msi_controller(dev); 98 int err; 99 100 if (!chip || !chip->setup_irq) 101 return -EINVAL; 102 103 err = chip->setup_irq(chip, dev, desc); 104 if (err < 0) 105 return err; 106 107 irq_set_chip_data(desc->irq, chip); 108 109 return 0; 110} 111 112void __weak arch_teardown_msi_irq(unsigned int irq) 113{ 114 struct msi_controller *chip = irq_get_chip_data(irq); 115 116 if (!chip || !chip->teardown_irq) 117 return; 118 119 chip->teardown_irq(chip, irq); 120} 121 122int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 123{ 124 struct msi_desc *entry; 125 int ret; 126 127 /* 128 * If an architecture wants to support multiple MSI, it needs to 129 * override arch_setup_msi_irqs() 130 */ 131 if (type == PCI_CAP_ID_MSI && nvec > 1) 132 return 1; 133 134 list_for_each_entry(entry, &dev->msi_list, list) { 135 ret = arch_setup_msi_irq(dev, entry); 136 if (ret < 0) 137 return ret; 138 if (ret > 0) 139 return -ENOSPC; 140 } 141 142 return 0; 143} 144 145/* 146 * We have a default implementation available as a separate non-weak 147 * function, as it is used by the Xen x86 PCI code 148 */ 149void default_teardown_msi_irqs(struct pci_dev *dev) 150{ 151 int i; 152 struct msi_desc *entry; 153 154 list_for_each_entry(entry, &dev->msi_list, list) 155 if (entry->irq) 156 for (i = 0; i < entry->nvec_used; i++) 157 arch_teardown_msi_irq(entry->irq + i); 158} 159 160void __weak arch_teardown_msi_irqs(struct pci_dev *dev) 161{ 162 return default_teardown_msi_irqs(dev); 163} 164 165static void default_restore_msi_irq(struct pci_dev *dev, int irq) 166{ 167 struct msi_desc *entry; 168 169 entry = NULL; 170 if (dev->msix_enabled) { 171 list_for_each_entry(entry, &dev->msi_list, list) { 172 if (irq == entry->irq) 173 break; 174 } 175 } else if (dev->msi_enabled) { 176 entry = irq_get_msi_desc(irq); 177 } 178 179 if (entry) 180 __pci_write_msi_msg(entry, &entry->msg); 181} 182 183void __weak arch_restore_msi_irqs(struct pci_dev *dev) 184{ 185 return default_restore_msi_irqs(dev); 186} 187 188static void msi_set_enable(struct pci_dev *dev, int enable) 189{ 190 u16 control; 191 192 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); 193 control &= ~PCI_MSI_FLAGS_ENABLE; 194 if (enable) 195 control |= PCI_MSI_FLAGS_ENABLE; 196 pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control); 197} 198 199static void msix_clear_and_set_ctrl(struct pci_dev *dev, u16 clear, u16 set) 200{ 201 u16 ctrl; 202 203 pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &ctrl); 204 ctrl &= ~clear; 205 ctrl |= set; 206 pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, ctrl); 207} 208 209static inline __attribute_const__ u32 msi_mask(unsigned x) 210{ 211 /* Don't shift by >= width of type */ 212 if (x >= 5) 213 return 0xffffffff; 214 return (1 << (1 << x)) - 1; 215} 216 217/* 218 * PCI 2.3 does not specify mask bits for each MSI interrupt. Attempting to 219 * mask all MSI interrupts by clearing the MSI enable bit does not work 220 * reliably as devices without an INTx disable bit will then generate a 221 * level IRQ which will never be cleared. 222 */ 223u32 __pci_msi_desc_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) 224{ 225 u32 mask_bits = desc->masked; 226 227 if (pci_msi_ignore_mask || !desc->msi_attrib.maskbit) 228 return 0; 229 230 mask_bits &= ~mask; 231 mask_bits |= flag; 232 pci_write_config_dword(desc->dev, desc->mask_pos, mask_bits); 233 234 return mask_bits; 235} 236 237static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) 238{ 239 desc->masked = __pci_msi_desc_mask_irq(desc, mask, flag); 240} 241 242/* 243 * This internal function does not flush PCI writes to the device. 244 * All users must ensure that they read from the device before either 245 * assuming that the device state is up to date, or returning out of this 246 * file. This saves a few milliseconds when initialising devices with lots 247 * of MSI-X interrupts. 248 */ 249u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag) 250{ 251 u32 mask_bits = desc->masked; 252 unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + 253 PCI_MSIX_ENTRY_VECTOR_CTRL; 254 255 if (pci_msi_ignore_mask) 256 return 0; 257 258 mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT; 259 if (flag) 260 mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT; 261 writel(mask_bits, desc->mask_base + offset); 262 263 return mask_bits; 264} 265 266static void msix_mask_irq(struct msi_desc *desc, u32 flag) 267{ 268 desc->masked = __pci_msix_desc_mask_irq(desc, flag); 269} 270 271static void msi_set_mask_bit(struct irq_data *data, u32 flag) 272{ 273 struct msi_desc *desc = irq_data_get_msi(data); 274 275 if (desc->msi_attrib.is_msix) { 276 msix_mask_irq(desc, flag); 277 readl(desc->mask_base); /* Flush write to device */ 278 } else { 279 unsigned offset = data->irq - desc->irq; 280 msi_mask_irq(desc, 1 << offset, flag << offset); 281 } 282} 283 284/** 285 * pci_msi_mask_irq - Generic irq chip callback to mask PCI/MSI interrupts 286 * @data: pointer to irqdata associated to that interrupt 287 */ 288void pci_msi_mask_irq(struct irq_data *data) 289{ 290 msi_set_mask_bit(data, 1); 291} 292 293/** 294 * pci_msi_unmask_irq - Generic irq chip callback to unmask PCI/MSI interrupts 295 * @data: pointer to irqdata associated to that interrupt 296 */ 297void pci_msi_unmask_irq(struct irq_data *data) 298{ 299 msi_set_mask_bit(data, 0); 300} 301 302void default_restore_msi_irqs(struct pci_dev *dev) 303{ 304 struct msi_desc *entry; 305 306 list_for_each_entry(entry, &dev->msi_list, list) 307 default_restore_msi_irq(dev, entry->irq); 308} 309 310void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg) 311{ 312 BUG_ON(entry->dev->current_state != PCI_D0); 313 314 if (entry->msi_attrib.is_msix) { 315 void __iomem *base = entry->mask_base + 316 entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; 317 318 msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR); 319 msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR); 320 msg->data = readl(base + PCI_MSIX_ENTRY_DATA); 321 } else { 322 struct pci_dev *dev = entry->dev; 323 int pos = dev->msi_cap; 324 u16 data; 325 326 pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, 327 &msg->address_lo); 328 if (entry->msi_attrib.is_64) { 329 pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, 330 &msg->address_hi); 331 pci_read_config_word(dev, pos + PCI_MSI_DATA_64, &data); 332 } else { 333 msg->address_hi = 0; 334 pci_read_config_word(dev, pos + PCI_MSI_DATA_32, &data); 335 } 336 msg->data = data; 337 } 338} 339 340void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) 341{ 342 if (entry->dev->current_state != PCI_D0) { 343 /* Don't touch the hardware now */ 344 } else if (entry->msi_attrib.is_msix) { 345 void __iomem *base; 346 base = entry->mask_base + 347 entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; 348 349 writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR); 350 writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR); 351 writel(msg->data, base + PCI_MSIX_ENTRY_DATA); 352 } else { 353 struct pci_dev *dev = entry->dev; 354 int pos = dev->msi_cap; 355 u16 msgctl; 356 357 pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); 358 msgctl &= ~PCI_MSI_FLAGS_QSIZE; 359 msgctl |= entry->msi_attrib.multiple << 4; 360 pci_write_config_word(dev, pos + PCI_MSI_FLAGS, msgctl); 361 362 pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, 363 msg->address_lo); 364 if (entry->msi_attrib.is_64) { 365 pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, 366 msg->address_hi); 367 pci_write_config_word(dev, pos + PCI_MSI_DATA_64, 368 msg->data); 369 } else { 370 pci_write_config_word(dev, pos + PCI_MSI_DATA_32, 371 msg->data); 372 } 373 } 374 entry->msg = *msg; 375} 376 377void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) 378{ 379 struct msi_desc *entry = irq_get_msi_desc(irq); 380 381 __pci_write_msi_msg(entry, msg); 382} 383EXPORT_SYMBOL_GPL(pci_write_msi_msg); 384 385static void free_msi_irqs(struct pci_dev *dev) 386{ 387 struct msi_desc *entry, *tmp; 388 struct attribute **msi_attrs; 389 struct device_attribute *dev_attr; 390 int i, count = 0; 391 392 list_for_each_entry(entry, &dev->msi_list, list) 393 if (entry->irq) 394 for (i = 0; i < entry->nvec_used; i++) 395 BUG_ON(irq_has_action(entry->irq + i)); 396 397 pci_msi_teardown_msi_irqs(dev); 398 399 list_for_each_entry_safe(entry, tmp, &dev->msi_list, list) { 400 if (entry->msi_attrib.is_msix) { 401 if (list_is_last(&entry->list, &dev->msi_list)) 402 iounmap(entry->mask_base); 403 } 404 405 list_del(&entry->list); 406 kfree(entry); 407 } 408 409 if (dev->msi_irq_groups) { 410 sysfs_remove_groups(&dev->dev.kobj, dev->msi_irq_groups); 411 msi_attrs = dev->msi_irq_groups[0]->attrs; 412 while (msi_attrs[count]) { 413 dev_attr = container_of(msi_attrs[count], 414 struct device_attribute, attr); 415 kfree(dev_attr->attr.name); 416 kfree(dev_attr); 417 ++count; 418 } 419 kfree(msi_attrs); 420 kfree(dev->msi_irq_groups[0]); 421 kfree(dev->msi_irq_groups); 422 dev->msi_irq_groups = NULL; 423 } 424} 425 426static struct msi_desc *alloc_msi_entry(struct pci_dev *dev) 427{ 428 struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL); 429 if (!desc) 430 return NULL; 431 432 INIT_LIST_HEAD(&desc->list); 433 desc->dev = dev; 434 435 return desc; 436} 437 438static void pci_intx_for_msi(struct pci_dev *dev, int enable) 439{ 440 if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG)) 441 pci_intx(dev, enable); 442} 443 444static void __pci_restore_msi_state(struct pci_dev *dev) 445{ 446 u16 control; 447 struct msi_desc *entry; 448 449 if (!dev->msi_enabled) 450 return; 451 452 entry = irq_get_msi_desc(dev->irq); 453 454 pci_intx_for_msi(dev, 0); 455 msi_set_enable(dev, 0); 456 arch_restore_msi_irqs(dev); 457 458 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); 459 msi_mask_irq(entry, msi_mask(entry->msi_attrib.multi_cap), 460 entry->masked); 461 control &= ~PCI_MSI_FLAGS_QSIZE; 462 control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE; 463 pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control); 464} 465 466static void __pci_restore_msix_state(struct pci_dev *dev) 467{ 468 struct msi_desc *entry; 469 470 if (!dev->msix_enabled) 471 return; 472 BUG_ON(list_empty(&dev->msi_list)); 473 474 /* route the table */ 475 pci_intx_for_msi(dev, 0); 476 msix_clear_and_set_ctrl(dev, 0, 477 PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL); 478 479 arch_restore_msi_irqs(dev); 480 list_for_each_entry(entry, &dev->msi_list, list) 481 msix_mask_irq(entry, entry->masked); 482 483 msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0); 484} 485 486void pci_restore_msi_state(struct pci_dev *dev) 487{ 488 __pci_restore_msi_state(dev); 489 __pci_restore_msix_state(dev); 490} 491EXPORT_SYMBOL_GPL(pci_restore_msi_state); 492 493static ssize_t msi_mode_show(struct device *dev, struct device_attribute *attr, 494 char *buf) 495{ 496 struct msi_desc *entry; 497 unsigned long irq; 498 int retval; 499 500 retval = kstrtoul(attr->attr.name, 10, &irq); 501 if (retval) 502 return retval; 503 504 entry = irq_get_msi_desc(irq); 505 if (entry) 506 return sprintf(buf, "%s\n", 507 entry->msi_attrib.is_msix ? "msix" : "msi"); 508 509 return -ENODEV; 510} 511 512static int populate_msi_sysfs(struct pci_dev *pdev) 513{ 514 struct attribute **msi_attrs; 515 struct attribute *msi_attr; 516 struct device_attribute *msi_dev_attr; 517 struct attribute_group *msi_irq_group; 518 const struct attribute_group **msi_irq_groups; 519 struct msi_desc *entry; 520 int ret = -ENOMEM; 521 int num_msi = 0; 522 int count = 0; 523 524 /* Determine how many msi entries we have */ 525 list_for_each_entry(entry, &pdev->msi_list, list) 526 ++num_msi; 527 if (!num_msi) 528 return 0; 529 530 /* Dynamically create the MSI attributes for the PCI device */ 531 msi_attrs = kzalloc(sizeof(void *) * (num_msi + 1), GFP_KERNEL); 532 if (!msi_attrs) 533 return -ENOMEM; 534 list_for_each_entry(entry, &pdev->msi_list, list) { 535 msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL); 536 if (!msi_dev_attr) 537 goto error_attrs; 538 msi_attrs[count] = &msi_dev_attr->attr; 539 540 sysfs_attr_init(&msi_dev_attr->attr); 541 msi_dev_attr->attr.name = kasprintf(GFP_KERNEL, "%d", 542 entry->irq); 543 if (!msi_dev_attr->attr.name) 544 goto error_attrs; 545 msi_dev_attr->attr.mode = S_IRUGO; 546 msi_dev_attr->show = msi_mode_show; 547 ++count; 548 } 549 550 msi_irq_group = kzalloc(sizeof(*msi_irq_group), GFP_KERNEL); 551 if (!msi_irq_group) 552 goto error_attrs; 553 msi_irq_group->name = "msi_irqs"; 554 msi_irq_group->attrs = msi_attrs; 555 556 msi_irq_groups = kzalloc(sizeof(void *) * 2, GFP_KERNEL); 557 if (!msi_irq_groups) 558 goto error_irq_group; 559 msi_irq_groups[0] = msi_irq_group; 560 561 ret = sysfs_create_groups(&pdev->dev.kobj, msi_irq_groups); 562 if (ret) 563 goto error_irq_groups; 564 pdev->msi_irq_groups = msi_irq_groups; 565 566 return 0; 567 568error_irq_groups: 569 kfree(msi_irq_groups); 570error_irq_group: 571 kfree(msi_irq_group); 572error_attrs: 573 count = 0; 574 msi_attr = msi_attrs[count]; 575 while (msi_attr) { 576 msi_dev_attr = container_of(msi_attr, struct device_attribute, attr); 577 kfree(msi_attr->name); 578 kfree(msi_dev_attr); 579 ++count; 580 msi_attr = msi_attrs[count]; 581 } 582 kfree(msi_attrs); 583 return ret; 584} 585 586static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec) 587{ 588 u16 control; 589 struct msi_desc *entry; 590 591 /* MSI Entry Initialization */ 592 entry = alloc_msi_entry(dev); 593 if (!entry) 594 return NULL; 595 596 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); 597 598 entry->msi_attrib.is_msix = 0; 599 entry->msi_attrib.is_64 = !!(control & PCI_MSI_FLAGS_64BIT); 600 entry->msi_attrib.entry_nr = 0; 601 entry->msi_attrib.maskbit = !!(control & PCI_MSI_FLAGS_MASKBIT); 602 entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */ 603 entry->msi_attrib.multi_cap = (control & PCI_MSI_FLAGS_QMASK) >> 1; 604 entry->msi_attrib.multiple = ilog2(__roundup_pow_of_two(nvec)); 605 entry->nvec_used = nvec; 606 607 if (control & PCI_MSI_FLAGS_64BIT) 608 entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64; 609 else 610 entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_32; 611 612 /* Save the initial mask status */ 613 if (entry->msi_attrib.maskbit) 614 pci_read_config_dword(dev, entry->mask_pos, &entry->masked); 615 616 return entry; 617} 618 619static int msi_verify_entries(struct pci_dev *dev) 620{ 621 struct msi_desc *entry; 622 623 list_for_each_entry(entry, &dev->msi_list, list) { 624 if (!dev->no_64bit_msi || !entry->msg.address_hi) 625 continue; 626 dev_err(&dev->dev, "Device has broken 64-bit MSI but arch" 627 " tried to assign one above 4G\n"); 628 return -EIO; 629 } 630 return 0; 631} 632 633/** 634 * msi_capability_init - configure device's MSI capability structure 635 * @dev: pointer to the pci_dev data structure of MSI device function 636 * @nvec: number of interrupts to allocate 637 * 638 * Setup the MSI capability structure of the device with the requested 639 * number of interrupts. A return value of zero indicates the successful 640 * setup of an entry with the new MSI irq. A negative return value indicates 641 * an error, and a positive return value indicates the number of interrupts 642 * which could have been allocated. 643 */ 644static int msi_capability_init(struct pci_dev *dev, int nvec) 645{ 646 struct msi_desc *entry; 647 int ret; 648 unsigned mask; 649 650 msi_set_enable(dev, 0); /* Disable MSI during set up */ 651 652 entry = msi_setup_entry(dev, nvec); 653 if (!entry) 654 return -ENOMEM; 655 656 /* All MSIs are unmasked by default, Mask them all */ 657 mask = msi_mask(entry->msi_attrib.multi_cap); 658 msi_mask_irq(entry, mask, mask); 659 660 list_add_tail(&entry->list, &dev->msi_list); 661 662 /* Configure MSI capability structure */ 663 ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI); 664 if (ret) { 665 msi_mask_irq(entry, mask, ~mask); 666 free_msi_irqs(dev); 667 return ret; 668 } 669 670 ret = msi_verify_entries(dev); 671 if (ret) { 672 msi_mask_irq(entry, mask, ~mask); 673 free_msi_irqs(dev); 674 return ret; 675 } 676 677 ret = populate_msi_sysfs(dev); 678 if (ret) { 679 msi_mask_irq(entry, mask, ~mask); 680 free_msi_irqs(dev); 681 return ret; 682 } 683 684 /* Set MSI enabled bits */ 685 pci_intx_for_msi(dev, 0); 686 msi_set_enable(dev, 1); 687 dev->msi_enabled = 1; 688 689 dev->irq = entry->irq; 690 return 0; 691} 692 693static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries) 694{ 695 resource_size_t phys_addr; 696 u32 table_offset; 697 unsigned long flags; 698 u8 bir; 699 700 pci_read_config_dword(dev, dev->msix_cap + PCI_MSIX_TABLE, 701 &table_offset); 702 bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR); 703 flags = pci_resource_flags(dev, bir); 704 if (!flags || (flags & IORESOURCE_UNSET)) 705 return NULL; 706 707 table_offset &= PCI_MSIX_TABLE_OFFSET; 708 phys_addr = pci_resource_start(dev, bir) + table_offset; 709 710 return ioremap_nocache(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE); 711} 712 713static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, 714 struct msix_entry *entries, int nvec) 715{ 716 struct msi_desc *entry; 717 int i; 718 719 for (i = 0; i < nvec; i++) { 720 entry = alloc_msi_entry(dev); 721 if (!entry) { 722 if (!i) 723 iounmap(base); 724 else 725 free_msi_irqs(dev); 726 /* No enough memory. Don't try again */ 727 return -ENOMEM; 728 } 729 730 entry->msi_attrib.is_msix = 1; 731 entry->msi_attrib.is_64 = 1; 732 entry->msi_attrib.entry_nr = entries[i].entry; 733 entry->msi_attrib.default_irq = dev->irq; 734 entry->mask_base = base; 735 entry->nvec_used = 1; 736 737 list_add_tail(&entry->list, &dev->msi_list); 738 } 739 740 return 0; 741} 742 743static void msix_program_entries(struct pci_dev *dev, 744 struct msix_entry *entries) 745{ 746 struct msi_desc *entry; 747 int i = 0; 748 749 list_for_each_entry(entry, &dev->msi_list, list) { 750 int offset = entries[i].entry * PCI_MSIX_ENTRY_SIZE + 751 PCI_MSIX_ENTRY_VECTOR_CTRL; 752 753 entries[i].vector = entry->irq; 754 entry->masked = readl(entry->mask_base + offset); 755 msix_mask_irq(entry, 1); 756 i++; 757 } 758} 759 760/** 761 * msix_capability_init - configure device's MSI-X capability 762 * @dev: pointer to the pci_dev data structure of MSI-X device function 763 * @entries: pointer to an array of struct msix_entry entries 764 * @nvec: number of @entries 765 * 766 * Setup the MSI-X capability structure of device function with a 767 * single MSI-X irq. A return of zero indicates the successful setup of 768 * requested MSI-X entries with allocated irqs or non-zero for otherwise. 769 **/ 770static int msix_capability_init(struct pci_dev *dev, 771 struct msix_entry *entries, int nvec) 772{ 773 int ret; 774 u16 control; 775 void __iomem *base; 776 777 /* Ensure MSI-X is disabled while it is set up */ 778 msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0); 779 780 pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control); 781 /* Request & Map MSI-X table region */ 782 base = msix_map_region(dev, msix_table_size(control)); 783 if (!base) 784 return -ENOMEM; 785 786 ret = msix_setup_entries(dev, base, entries, nvec); 787 if (ret) 788 return ret; 789 790 ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX); 791 if (ret) 792 goto out_avail; 793 794 /* Check if all MSI entries honor device restrictions */ 795 ret = msi_verify_entries(dev); 796 if (ret) 797 goto out_free; 798 799 /* 800 * Some devices require MSI-X to be enabled before we can touch the 801 * MSI-X registers. We need to mask all the vectors to prevent 802 * interrupts coming in before they're fully set up. 803 */ 804 msix_clear_and_set_ctrl(dev, 0, 805 PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE); 806 807 msix_program_entries(dev, entries); 808 809 ret = populate_msi_sysfs(dev); 810 if (ret) 811 goto out_free; 812 813 /* Set MSI-X enabled bits and unmask the function */ 814 pci_intx_for_msi(dev, 0); 815 dev->msix_enabled = 1; 816 817 msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0); 818 819 return 0; 820 821out_avail: 822 if (ret < 0) { 823 /* 824 * If we had some success, report the number of irqs 825 * we succeeded in setting up. 826 */ 827 struct msi_desc *entry; 828 int avail = 0; 829 830 list_for_each_entry(entry, &dev->msi_list, list) { 831 if (entry->irq != 0) 832 avail++; 833 } 834 if (avail != 0) 835 ret = avail; 836 } 837 838out_free: 839 free_msi_irqs(dev); 840 841 return ret; 842} 843 844/** 845 * pci_msi_supported - check whether MSI may be enabled on a device 846 * @dev: pointer to the pci_dev data structure of MSI device function 847 * @nvec: how many MSIs have been requested ? 848 * 849 * Look at global flags, the device itself, and its parent buses 850 * to determine if MSI/-X are supported for the device. If MSI/-X is 851 * supported return 1, else return 0. 852 **/ 853static int pci_msi_supported(struct pci_dev *dev, int nvec) 854{ 855 struct pci_bus *bus; 856 857 /* MSI must be globally enabled and supported by the device */ 858 if (!pci_msi_enable) 859 return 0; 860 861 if (!dev || dev->no_msi || dev->current_state != PCI_D0) 862 return 0; 863 864 /* 865 * You can't ask to have 0 or less MSIs configured. 866 * a) it's stupid .. 867 * b) the list manipulation code assumes nvec >= 1. 868 */ 869 if (nvec < 1) 870 return 0; 871 872 /* 873 * Any bridge which does NOT route MSI transactions from its 874 * secondary bus to its primary bus must set NO_MSI flag on 875 * the secondary pci_bus. 876 * We expect only arch-specific PCI host bus controller driver 877 * or quirks for specific PCI bridges to be setting NO_MSI. 878 */ 879 for (bus = dev->bus; bus; bus = bus->parent) 880 if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI) 881 return 0; 882 883 return 1; 884} 885 886/** 887 * pci_msi_vec_count - Return the number of MSI vectors a device can send 888 * @dev: device to report about 889 * 890 * This function returns the number of MSI vectors a device requested via 891 * Multiple Message Capable register. It returns a negative errno if the 892 * device is not capable sending MSI interrupts. Otherwise, the call succeeds 893 * and returns a power of two, up to a maximum of 2^5 (32), according to the 894 * MSI specification. 895 **/ 896int pci_msi_vec_count(struct pci_dev *dev) 897{ 898 int ret; 899 u16 msgctl; 900 901 if (!dev->msi_cap) 902 return -EINVAL; 903 904 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &msgctl); 905 ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1); 906 907 return ret; 908} 909EXPORT_SYMBOL(pci_msi_vec_count); 910 911void pci_msi_shutdown(struct pci_dev *dev) 912{ 913 struct msi_desc *desc; 914 u32 mask; 915 916 if (!pci_msi_enable || !dev || !dev->msi_enabled) 917 return; 918 919 BUG_ON(list_empty(&dev->msi_list)); 920 desc = list_first_entry(&dev->msi_list, struct msi_desc, list); 921 922 msi_set_enable(dev, 0); 923 pci_intx_for_msi(dev, 1); 924 dev->msi_enabled = 0; 925 926 /* Return the device with MSI unmasked as initial states */ 927 mask = msi_mask(desc->msi_attrib.multi_cap); 928 /* Keep cached state to be restored */ 929 __pci_msi_desc_mask_irq(desc, mask, ~mask); 930 931 /* Restore dev->irq to its default pin-assertion irq */ 932 dev->irq = desc->msi_attrib.default_irq; 933} 934 935void pci_disable_msi(struct pci_dev *dev) 936{ 937 if (!pci_msi_enable || !dev || !dev->msi_enabled) 938 return; 939 940 pci_msi_shutdown(dev); 941 free_msi_irqs(dev); 942} 943EXPORT_SYMBOL(pci_disable_msi); 944 945/** 946 * pci_msix_vec_count - return the number of device's MSI-X table entries 947 * @dev: pointer to the pci_dev data structure of MSI-X device function 948 * This function returns the number of device's MSI-X table entries and 949 * therefore the number of MSI-X vectors device is capable of sending. 950 * It returns a negative errno if the device is not capable of sending MSI-X 951 * interrupts. 952 **/ 953int pci_msix_vec_count(struct pci_dev *dev) 954{ 955 u16 control; 956 957 if (!dev->msix_cap) 958 return -EINVAL; 959 960 pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control); 961 return msix_table_size(control); 962} 963EXPORT_SYMBOL(pci_msix_vec_count); 964 965/** 966 * pci_enable_msix - configure device's MSI-X capability structure 967 * @dev: pointer to the pci_dev data structure of MSI-X device function 968 * @entries: pointer to an array of MSI-X entries 969 * @nvec: number of MSI-X irqs requested for allocation by device driver 970 * 971 * Setup the MSI-X capability structure of device function with the number 972 * of requested irqs upon its software driver call to request for 973 * MSI-X mode enabled on its hardware device function. A return of zero 974 * indicates the successful configuration of MSI-X capability structure 975 * with new allocated MSI-X irqs. A return of < 0 indicates a failure. 976 * Or a return of > 0 indicates that driver request is exceeding the number 977 * of irqs or MSI-X vectors available. Driver should use the returned value to 978 * re-send its request. 979 **/ 980int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec) 981{ 982 int nr_entries; 983 int i, j; 984 985 if (!pci_msi_supported(dev, nvec)) 986 return -EINVAL; 987 988 if (!entries) 989 return -EINVAL; 990 991 nr_entries = pci_msix_vec_count(dev); 992 if (nr_entries < 0) 993 return nr_entries; 994 if (nvec > nr_entries) 995 return nr_entries; 996 997 /* Check for any invalid entries */ 998 for (i = 0; i < nvec; i++) { 999 if (entries[i].entry >= nr_entries) 1000 return -EINVAL; /* invalid entry */ 1001 for (j = i + 1; j < nvec; j++) { 1002 if (entries[i].entry == entries[j].entry) 1003 return -EINVAL; /* duplicate entry */ 1004 } 1005 } 1006 WARN_ON(!!dev->msix_enabled); 1007 1008 /* Check whether driver already requested for MSI irq */ 1009 if (dev->msi_enabled) { 1010 dev_info(&dev->dev, "can't enable MSI-X (MSI IRQ already assigned)\n"); 1011 return -EINVAL; 1012 } 1013 return msix_capability_init(dev, entries, nvec); 1014} 1015EXPORT_SYMBOL(pci_enable_msix); 1016 1017void pci_msix_shutdown(struct pci_dev *dev) 1018{ 1019 struct msi_desc *entry; 1020 1021 if (!pci_msi_enable || !dev || !dev->msix_enabled) 1022 return; 1023 1024 /* Return the device with MSI-X masked as initial states */ 1025 list_for_each_entry(entry, &dev->msi_list, list) { 1026 /* Keep cached states to be restored */ 1027 __pci_msix_desc_mask_irq(entry, 1); 1028 } 1029 1030 msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0); 1031 pci_intx_for_msi(dev, 1); 1032 dev->msix_enabled = 0; 1033} 1034 1035void pci_disable_msix(struct pci_dev *dev) 1036{ 1037 if (!pci_msi_enable || !dev || !dev->msix_enabled) 1038 return; 1039 1040 pci_msix_shutdown(dev); 1041 free_msi_irqs(dev); 1042} 1043EXPORT_SYMBOL(pci_disable_msix); 1044 1045void pci_no_msi(void) 1046{ 1047 pci_msi_enable = 0; 1048} 1049 1050/** 1051 * pci_msi_enabled - is MSI enabled? 1052 * 1053 * Returns true if MSI has not been disabled by the command-line option 1054 * pci=nomsi. 1055 **/ 1056int pci_msi_enabled(void) 1057{ 1058 return pci_msi_enable; 1059} 1060EXPORT_SYMBOL(pci_msi_enabled); 1061 1062void pci_msi_init_pci_dev(struct pci_dev *dev) 1063{ 1064 INIT_LIST_HEAD(&dev->msi_list); 1065 1066 /* Disable the msi hardware to avoid screaming interrupts 1067 * during boot. This is the power on reset default so 1068 * usually this should be a noop. 1069 */ 1070 dev->msi_cap = pci_find_capability(dev, PCI_CAP_ID_MSI); 1071 if (dev->msi_cap) 1072 msi_set_enable(dev, 0); 1073 1074 dev->msix_cap = pci_find_capability(dev, PCI_CAP_ID_MSIX); 1075 if (dev->msix_cap) 1076 msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0); 1077} 1078 1079/** 1080 * pci_enable_msi_range - configure device's MSI capability structure 1081 * @dev: device to configure 1082 * @minvec: minimal number of interrupts to configure 1083 * @maxvec: maximum number of interrupts to configure 1084 * 1085 * This function tries to allocate a maximum possible number of interrupts in a 1086 * range between @minvec and @maxvec. It returns a negative errno if an error 1087 * occurs. If it succeeds, it returns the actual number of interrupts allocated 1088 * and updates the @dev's irq member to the lowest new interrupt number; 1089 * the other interrupt numbers allocated to this device are consecutive. 1090 **/ 1091int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec) 1092{ 1093 int nvec; 1094 int rc; 1095 1096 if (!pci_msi_supported(dev, minvec)) 1097 return -EINVAL; 1098 1099 WARN_ON(!!dev->msi_enabled); 1100 1101 /* Check whether driver already requested MSI-X irqs */ 1102 if (dev->msix_enabled) { 1103 dev_info(&dev->dev, 1104 "can't enable MSI (MSI-X already enabled)\n"); 1105 return -EINVAL; 1106 } 1107 1108 if (maxvec < minvec) 1109 return -ERANGE; 1110 1111 nvec = pci_msi_vec_count(dev); 1112 if (nvec < 0) 1113 return nvec; 1114 else if (nvec < minvec) 1115 return -EINVAL; 1116 else if (nvec > maxvec) 1117 nvec = maxvec; 1118 1119 do { 1120 rc = msi_capability_init(dev, nvec); 1121 if (rc < 0) { 1122 return rc; 1123 } else if (rc > 0) { 1124 if (rc < minvec) 1125 return -ENOSPC; 1126 nvec = rc; 1127 } 1128 } while (rc); 1129 1130 return nvec; 1131} 1132EXPORT_SYMBOL(pci_enable_msi_range); 1133 1134/** 1135 * pci_enable_msix_range - configure device's MSI-X capability structure 1136 * @dev: pointer to the pci_dev data structure of MSI-X device function 1137 * @entries: pointer to an array of MSI-X entries 1138 * @minvec: minimum number of MSI-X irqs requested 1139 * @maxvec: maximum number of MSI-X irqs requested 1140 * 1141 * Setup the MSI-X capability structure of device function with a maximum 1142 * possible number of interrupts in the range between @minvec and @maxvec 1143 * upon its software driver call to request for MSI-X mode enabled on its 1144 * hardware device function. It returns a negative errno if an error occurs. 1145 * If it succeeds, it returns the actual number of interrupts allocated and 1146 * indicates the successful configuration of MSI-X capability structure 1147 * with new allocated MSI-X interrupts. 1148 **/ 1149int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, 1150 int minvec, int maxvec) 1151{ 1152 int nvec = maxvec; 1153 int rc; 1154 1155 if (maxvec < minvec) 1156 return -ERANGE; 1157 1158 do { 1159 rc = pci_enable_msix(dev, entries, nvec); 1160 if (rc < 0) { 1161 return rc; 1162 } else if (rc > 0) { 1163 if (rc < minvec) 1164 return -ENOSPC; 1165 nvec = rc; 1166 } 1167 } while (rc); 1168 1169 return nvec; 1170} 1171EXPORT_SYMBOL(pci_enable_msix_range); 1172 1173#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN 1174/** 1175 * pci_msi_domain_write_msg - Helper to write MSI message to PCI config space 1176 * @irq_data: Pointer to interrupt data of the MSI interrupt 1177 * @msg: Pointer to the message 1178 */ 1179void pci_msi_domain_write_msg(struct irq_data *irq_data, struct msi_msg *msg) 1180{ 1181 struct msi_desc *desc = irq_data->msi_desc; 1182 1183 /* 1184 * For MSI-X desc->irq is always equal to irq_data->irq. For 1185 * MSI only the first interrupt of MULTI MSI passes the test. 1186 */ 1187 if (desc->irq == irq_data->irq) 1188 __pci_write_msi_msg(desc, msg); 1189} 1190 1191/** 1192 * pci_msi_domain_calc_hwirq - Generate a unique ID for an MSI source 1193 * @dev: Pointer to the PCI device 1194 * @desc: Pointer to the msi descriptor 1195 * 1196 * The ID number is only used within the irqdomain. 1197 */ 1198irq_hw_number_t pci_msi_domain_calc_hwirq(struct pci_dev *dev, 1199 struct msi_desc *desc) 1200{ 1201 return (irq_hw_number_t)desc->msi_attrib.entry_nr | 1202 PCI_DEVID(dev->bus->number, dev->devfn) << 11 | 1203 (pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 27; 1204} 1205 1206static inline bool pci_msi_desc_is_multi_msi(struct msi_desc *desc) 1207{ 1208 return !desc->msi_attrib.is_msix && desc->nvec_used > 1; 1209} 1210 1211/** 1212 * pci_msi_domain_check_cap - Verify that @domain supports the capabilities for @dev 1213 * @domain: The interrupt domain to check 1214 * @info: The domain info for verification 1215 * @dev: The device to check 1216 * 1217 * Returns: 1218 * 0 if the functionality is supported 1219 * 1 if Multi MSI is requested, but the domain does not support it 1220 * -ENOTSUPP otherwise 1221 */ 1222int pci_msi_domain_check_cap(struct irq_domain *domain, 1223 struct msi_domain_info *info, struct device *dev) 1224{ 1225 struct msi_desc *desc = first_pci_msi_entry(to_pci_dev(dev)); 1226 1227 /* Special handling to support pci_enable_msi_range() */ 1228 if (pci_msi_desc_is_multi_msi(desc) && 1229 !(info->flags & MSI_FLAG_MULTI_PCI_MSI)) 1230 return 1; 1231 else if (desc->msi_attrib.is_msix && !(info->flags & MSI_FLAG_PCI_MSIX)) 1232 return -ENOTSUPP; 1233 1234 return 0; 1235} 1236 1237static int pci_msi_domain_handle_error(struct irq_domain *domain, 1238 struct msi_desc *desc, int error) 1239{ 1240 /* Special handling to support pci_enable_msi_range() */ 1241 if (pci_msi_desc_is_multi_msi(desc) && error == -ENOSPC) 1242 return 1; 1243 1244 return error; 1245} 1246 1247#ifdef GENERIC_MSI_DOMAIN_OPS 1248static void pci_msi_domain_set_desc(msi_alloc_info_t *arg, 1249 struct msi_desc *desc) 1250{ 1251 arg->desc = desc; 1252 arg->hwirq = pci_msi_domain_calc_hwirq(msi_desc_to_pci_dev(desc), 1253 desc); 1254} 1255#else 1256#define pci_msi_domain_set_desc NULL 1257#endif 1258 1259static struct msi_domain_ops pci_msi_domain_ops_default = { 1260 .set_desc = pci_msi_domain_set_desc, 1261 .msi_check = pci_msi_domain_check_cap, 1262 .handle_error = pci_msi_domain_handle_error, 1263}; 1264 1265static void pci_msi_domain_update_dom_ops(struct msi_domain_info *info) 1266{ 1267 struct msi_domain_ops *ops = info->ops; 1268 1269 if (ops == NULL) { 1270 info->ops = &pci_msi_domain_ops_default; 1271 } else { 1272 if (ops->set_desc == NULL) 1273 ops->set_desc = pci_msi_domain_set_desc; 1274 if (ops->msi_check == NULL) 1275 ops->msi_check = pci_msi_domain_check_cap; 1276 if (ops->handle_error == NULL) 1277 ops->handle_error = pci_msi_domain_handle_error; 1278 } 1279} 1280 1281static void pci_msi_domain_update_chip_ops(struct msi_domain_info *info) 1282{ 1283 struct irq_chip *chip = info->chip; 1284 1285 BUG_ON(!chip); 1286 if (!chip->irq_write_msi_msg) 1287 chip->irq_write_msi_msg = pci_msi_domain_write_msg; 1288} 1289 1290/** 1291 * pci_msi_create_irq_domain - Creat a MSI interrupt domain 1292 * @node: Optional device-tree node of the interrupt controller 1293 * @info: MSI domain info 1294 * @parent: Parent irq domain 1295 * 1296 * Updates the domain and chip ops and creates a MSI interrupt domain. 1297 * 1298 * Returns: 1299 * A domain pointer or NULL in case of failure. 1300 */ 1301struct irq_domain *pci_msi_create_irq_domain(struct device_node *node, 1302 struct msi_domain_info *info, 1303 struct irq_domain *parent) 1304{ 1305 if (info->flags & MSI_FLAG_USE_DEF_DOM_OPS) 1306 pci_msi_domain_update_dom_ops(info); 1307 if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS) 1308 pci_msi_domain_update_chip_ops(info); 1309 1310 return msi_create_irq_domain(node, info, parent); 1311} 1312 1313/** 1314 * pci_msi_domain_alloc_irqs - Allocate interrupts for @dev in @domain 1315 * @domain: The interrupt domain to allocate from 1316 * @dev: The device for which to allocate 1317 * @nvec: The number of interrupts to allocate 1318 * @type: Unused to allow simpler migration from the arch_XXX interfaces 1319 * 1320 * Returns: 1321 * A virtual interrupt number or an error code in case of failure 1322 */ 1323int pci_msi_domain_alloc_irqs(struct irq_domain *domain, struct pci_dev *dev, 1324 int nvec, int type) 1325{ 1326 return msi_domain_alloc_irqs(domain, &dev->dev, nvec); 1327} 1328 1329/** 1330 * pci_msi_domain_free_irqs - Free interrupts for @dev in @domain 1331 * @domain: The interrupt domain 1332 * @dev: The device for which to free interrupts 1333 */ 1334void pci_msi_domain_free_irqs(struct irq_domain *domain, struct pci_dev *dev) 1335{ 1336 msi_domain_free_irqs(domain, &dev->dev); 1337} 1338 1339/** 1340 * pci_msi_create_default_irq_domain - Create a default MSI interrupt domain 1341 * @node: Optional device-tree node of the interrupt controller 1342 * @info: MSI domain info 1343 * @parent: Parent irq domain 1344 * 1345 * Returns: A domain pointer or NULL in case of failure. If successful 1346 * the default PCI/MSI irqdomain pointer is updated. 1347 */ 1348struct irq_domain *pci_msi_create_default_irq_domain(struct device_node *node, 1349 struct msi_domain_info *info, struct irq_domain *parent) 1350{ 1351 struct irq_domain *domain; 1352 1353 mutex_lock(&pci_msi_domain_lock); 1354 if (pci_msi_default_domain) { 1355 pr_err("PCI: default irq domain for PCI MSI has already been created.\n"); 1356 domain = NULL; 1357 } else { 1358 domain = pci_msi_create_irq_domain(node, info, parent); 1359 pci_msi_default_domain = domain; 1360 } 1361 mutex_unlock(&pci_msi_domain_lock); 1362 1363 return domain; 1364} 1365#endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */ 1366