1/*
2 * File:	msi.c
3 * Purpose:	PCI Message Signaled Interrupt (MSI)
4 *
5 * Copyright (C) 2003-2004 Intel
6 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
7 */
8
9#include <linux/err.h>
10#include <linux/mm.h>
11#include <linux/irq.h>
12#include <linux/interrupt.h>
13#include <linux/export.h>
14#include <linux/ioport.h>
15#include <linux/pci.h>
16#include <linux/proc_fs.h>
17#include <linux/msi.h>
18#include <linux/smp.h>
19#include <linux/errno.h>
20#include <linux/io.h>
21#include <linux/slab.h>
22#include <linux/irqdomain.h>
23
24#include "pci.h"
25
26static int pci_msi_enable = 1;
27int pci_msi_ignore_mask;
28
29#define msix_table_size(flags)	((flags & PCI_MSIX_FLAGS_QSIZE) + 1)
30
31#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
32static struct irq_domain *pci_msi_default_domain;
33static DEFINE_MUTEX(pci_msi_domain_lock);
34
35struct irq_domain * __weak arch_get_pci_msi_domain(struct pci_dev *dev)
36{
37	return pci_msi_default_domain;
38}
39
40static struct irq_domain *pci_msi_get_domain(struct pci_dev *dev)
41{
42	struct irq_domain *domain = NULL;
43
44	if (dev->bus->msi)
45		domain = dev->bus->msi->domain;
46	if (!domain)
47		domain = arch_get_pci_msi_domain(dev);
48
49	return domain;
50}
51
52static int pci_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
53{
54	struct irq_domain *domain;
55
56	domain = pci_msi_get_domain(dev);
57	if (domain)
58		return pci_msi_domain_alloc_irqs(domain, dev, nvec, type);
59
60	return arch_setup_msi_irqs(dev, nvec, type);
61}
62
63static void pci_msi_teardown_msi_irqs(struct pci_dev *dev)
64{
65	struct irq_domain *domain;
66
67	domain = pci_msi_get_domain(dev);
68	if (domain)
69		pci_msi_domain_free_irqs(domain, dev);
70	else
71		arch_teardown_msi_irqs(dev);
72}
73#else
74#define pci_msi_setup_msi_irqs		arch_setup_msi_irqs
75#define pci_msi_teardown_msi_irqs	arch_teardown_msi_irqs
76#endif
77
78/* Arch hooks */
79
80struct msi_controller * __weak pcibios_msi_controller(struct pci_dev *dev)
81{
82	return NULL;
83}
84
85static struct msi_controller *pci_msi_controller(struct pci_dev *dev)
86{
87	struct msi_controller *msi_ctrl = dev->bus->msi;
88
89	if (msi_ctrl)
90		return msi_ctrl;
91
92	return pcibios_msi_controller(dev);
93}
94
95int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
96{
97	struct msi_controller *chip = pci_msi_controller(dev);
98	int err;
99
100	if (!chip || !chip->setup_irq)
101		return -EINVAL;
102
103	err = chip->setup_irq(chip, dev, desc);
104	if (err < 0)
105		return err;
106
107	irq_set_chip_data(desc->irq, chip);
108
109	return 0;
110}
111
112void __weak arch_teardown_msi_irq(unsigned int irq)
113{
114	struct msi_controller *chip = irq_get_chip_data(irq);
115
116	if (!chip || !chip->teardown_irq)
117		return;
118
119	chip->teardown_irq(chip, irq);
120}
121
122int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
123{
124	struct msi_desc *entry;
125	int ret;
126
127	/*
128	 * If an architecture wants to support multiple MSI, it needs to
129	 * override arch_setup_msi_irqs()
130	 */
131	if (type == PCI_CAP_ID_MSI && nvec > 1)
132		return 1;
133
134	list_for_each_entry(entry, &dev->msi_list, list) {
135		ret = arch_setup_msi_irq(dev, entry);
136		if (ret < 0)
137			return ret;
138		if (ret > 0)
139			return -ENOSPC;
140	}
141
142	return 0;
143}
144
145/*
146 * We have a default implementation available as a separate non-weak
147 * function, as it is used by the Xen x86 PCI code
148 */
149void default_teardown_msi_irqs(struct pci_dev *dev)
150{
151	int i;
152	struct msi_desc *entry;
153
154	list_for_each_entry(entry, &dev->msi_list, list)
155		if (entry->irq)
156			for (i = 0; i < entry->nvec_used; i++)
157				arch_teardown_msi_irq(entry->irq + i);
158}
159
160void __weak arch_teardown_msi_irqs(struct pci_dev *dev)
161{
162	return default_teardown_msi_irqs(dev);
163}
164
165static void default_restore_msi_irq(struct pci_dev *dev, int irq)
166{
167	struct msi_desc *entry;
168
169	entry = NULL;
170	if (dev->msix_enabled) {
171		list_for_each_entry(entry, &dev->msi_list, list) {
172			if (irq == entry->irq)
173				break;
174		}
175	} else if (dev->msi_enabled)  {
176		entry = irq_get_msi_desc(irq);
177	}
178
179	if (entry)
180		__pci_write_msi_msg(entry, &entry->msg);
181}
182
183void __weak arch_restore_msi_irqs(struct pci_dev *dev)
184{
185	return default_restore_msi_irqs(dev);
186}
187
188static void msi_set_enable(struct pci_dev *dev, int enable)
189{
190	u16 control;
191
192	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
193	control &= ~PCI_MSI_FLAGS_ENABLE;
194	if (enable)
195		control |= PCI_MSI_FLAGS_ENABLE;
196	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
197}
198
199static void msix_clear_and_set_ctrl(struct pci_dev *dev, u16 clear, u16 set)
200{
201	u16 ctrl;
202
203	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &ctrl);
204	ctrl &= ~clear;
205	ctrl |= set;
206	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, ctrl);
207}
208
209static inline __attribute_const__ u32 msi_mask(unsigned x)
210{
211	/* Don't shift by >= width of type */
212	if (x >= 5)
213		return 0xffffffff;
214	return (1 << (1 << x)) - 1;
215}
216
217/*
218 * PCI 2.3 does not specify mask bits for each MSI interrupt.  Attempting to
219 * mask all MSI interrupts by clearing the MSI enable bit does not work
220 * reliably as devices without an INTx disable bit will then generate a
221 * level IRQ which will never be cleared.
222 */
223u32 __pci_msi_desc_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
224{
225	u32 mask_bits = desc->masked;
226
227	if (pci_msi_ignore_mask || !desc->msi_attrib.maskbit)
228		return 0;
229
230	mask_bits &= ~mask;
231	mask_bits |= flag;
232	pci_write_config_dword(desc->dev, desc->mask_pos, mask_bits);
233
234	return mask_bits;
235}
236
237static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
238{
239	desc->masked = __pci_msi_desc_mask_irq(desc, mask, flag);
240}
241
242/*
243 * This internal function does not flush PCI writes to the device.
244 * All users must ensure that they read from the device before either
245 * assuming that the device state is up to date, or returning out of this
246 * file.  This saves a few milliseconds when initialising devices with lots
247 * of MSI-X interrupts.
248 */
249u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag)
250{
251	u32 mask_bits = desc->masked;
252	unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
253						PCI_MSIX_ENTRY_VECTOR_CTRL;
254
255	if (pci_msi_ignore_mask)
256		return 0;
257
258	mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
259	if (flag)
260		mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
261	writel(mask_bits, desc->mask_base + offset);
262
263	return mask_bits;
264}
265
266static void msix_mask_irq(struct msi_desc *desc, u32 flag)
267{
268	desc->masked = __pci_msix_desc_mask_irq(desc, flag);
269}
270
271static void msi_set_mask_bit(struct irq_data *data, u32 flag)
272{
273	struct msi_desc *desc = irq_data_get_msi(data);
274
275	if (desc->msi_attrib.is_msix) {
276		msix_mask_irq(desc, flag);
277		readl(desc->mask_base);		/* Flush write to device */
278	} else {
279		unsigned offset = data->irq - desc->irq;
280		msi_mask_irq(desc, 1 << offset, flag << offset);
281	}
282}
283
284/**
285 * pci_msi_mask_irq - Generic irq chip callback to mask PCI/MSI interrupts
286 * @data:	pointer to irqdata associated to that interrupt
287 */
288void pci_msi_mask_irq(struct irq_data *data)
289{
290	msi_set_mask_bit(data, 1);
291}
292
293/**
294 * pci_msi_unmask_irq - Generic irq chip callback to unmask PCI/MSI interrupts
295 * @data:	pointer to irqdata associated to that interrupt
296 */
297void pci_msi_unmask_irq(struct irq_data *data)
298{
299	msi_set_mask_bit(data, 0);
300}
301
302void default_restore_msi_irqs(struct pci_dev *dev)
303{
304	struct msi_desc *entry;
305
306	list_for_each_entry(entry, &dev->msi_list, list)
307		default_restore_msi_irq(dev, entry->irq);
308}
309
310void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
311{
312	BUG_ON(entry->dev->current_state != PCI_D0);
313
314	if (entry->msi_attrib.is_msix) {
315		void __iomem *base = entry->mask_base +
316			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
317
318		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
319		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
320		msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
321	} else {
322		struct pci_dev *dev = entry->dev;
323		int pos = dev->msi_cap;
324		u16 data;
325
326		pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO,
327				      &msg->address_lo);
328		if (entry->msi_attrib.is_64) {
329			pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI,
330					      &msg->address_hi);
331			pci_read_config_word(dev, pos + PCI_MSI_DATA_64, &data);
332		} else {
333			msg->address_hi = 0;
334			pci_read_config_word(dev, pos + PCI_MSI_DATA_32, &data);
335		}
336		msg->data = data;
337	}
338}
339
340void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
341{
342	if (entry->dev->current_state != PCI_D0) {
343		/* Don't touch the hardware now */
344	} else if (entry->msi_attrib.is_msix) {
345		void __iomem *base;
346		base = entry->mask_base +
347			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
348
349		writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR);
350		writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
351		writel(msg->data, base + PCI_MSIX_ENTRY_DATA);
352	} else {
353		struct pci_dev *dev = entry->dev;
354		int pos = dev->msi_cap;
355		u16 msgctl;
356
357		pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
358		msgctl &= ~PCI_MSI_FLAGS_QSIZE;
359		msgctl |= entry->msi_attrib.multiple << 4;
360		pci_write_config_word(dev, pos + PCI_MSI_FLAGS, msgctl);
361
362		pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO,
363				       msg->address_lo);
364		if (entry->msi_attrib.is_64) {
365			pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI,
366					       msg->address_hi);
367			pci_write_config_word(dev, pos + PCI_MSI_DATA_64,
368					      msg->data);
369		} else {
370			pci_write_config_word(dev, pos + PCI_MSI_DATA_32,
371					      msg->data);
372		}
373	}
374	entry->msg = *msg;
375}
376
377void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg)
378{
379	struct msi_desc *entry = irq_get_msi_desc(irq);
380
381	__pci_write_msi_msg(entry, msg);
382}
383EXPORT_SYMBOL_GPL(pci_write_msi_msg);
384
385static void free_msi_irqs(struct pci_dev *dev)
386{
387	struct msi_desc *entry, *tmp;
388	struct attribute **msi_attrs;
389	struct device_attribute *dev_attr;
390	int i, count = 0;
391
392	list_for_each_entry(entry, &dev->msi_list, list)
393		if (entry->irq)
394			for (i = 0; i < entry->nvec_used; i++)
395				BUG_ON(irq_has_action(entry->irq + i));
396
397	pci_msi_teardown_msi_irqs(dev);
398
399	list_for_each_entry_safe(entry, tmp, &dev->msi_list, list) {
400		if (entry->msi_attrib.is_msix) {
401			if (list_is_last(&entry->list, &dev->msi_list))
402				iounmap(entry->mask_base);
403		}
404
405		list_del(&entry->list);
406		kfree(entry);
407	}
408
409	if (dev->msi_irq_groups) {
410		sysfs_remove_groups(&dev->dev.kobj, dev->msi_irq_groups);
411		msi_attrs = dev->msi_irq_groups[0]->attrs;
412		while (msi_attrs[count]) {
413			dev_attr = container_of(msi_attrs[count],
414						struct device_attribute, attr);
415			kfree(dev_attr->attr.name);
416			kfree(dev_attr);
417			++count;
418		}
419		kfree(msi_attrs);
420		kfree(dev->msi_irq_groups[0]);
421		kfree(dev->msi_irq_groups);
422		dev->msi_irq_groups = NULL;
423	}
424}
425
426static struct msi_desc *alloc_msi_entry(struct pci_dev *dev)
427{
428	struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
429	if (!desc)
430		return NULL;
431
432	INIT_LIST_HEAD(&desc->list);
433	desc->dev = dev;
434
435	return desc;
436}
437
438static void pci_intx_for_msi(struct pci_dev *dev, int enable)
439{
440	if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG))
441		pci_intx(dev, enable);
442}
443
444static void __pci_restore_msi_state(struct pci_dev *dev)
445{
446	u16 control;
447	struct msi_desc *entry;
448
449	if (!dev->msi_enabled)
450		return;
451
452	entry = irq_get_msi_desc(dev->irq);
453
454	pci_intx_for_msi(dev, 0);
455	msi_set_enable(dev, 0);
456	arch_restore_msi_irqs(dev);
457
458	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
459	msi_mask_irq(entry, msi_mask(entry->msi_attrib.multi_cap),
460		     entry->masked);
461	control &= ~PCI_MSI_FLAGS_QSIZE;
462	control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
463	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
464}
465
466static void __pci_restore_msix_state(struct pci_dev *dev)
467{
468	struct msi_desc *entry;
469
470	if (!dev->msix_enabled)
471		return;
472	BUG_ON(list_empty(&dev->msi_list));
473
474	/* route the table */
475	pci_intx_for_msi(dev, 0);
476	msix_clear_and_set_ctrl(dev, 0,
477				PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL);
478
479	arch_restore_msi_irqs(dev);
480	list_for_each_entry(entry, &dev->msi_list, list)
481		msix_mask_irq(entry, entry->masked);
482
483	msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
484}
485
486void pci_restore_msi_state(struct pci_dev *dev)
487{
488	__pci_restore_msi_state(dev);
489	__pci_restore_msix_state(dev);
490}
491EXPORT_SYMBOL_GPL(pci_restore_msi_state);
492
493static ssize_t msi_mode_show(struct device *dev, struct device_attribute *attr,
494			     char *buf)
495{
496	struct msi_desc *entry;
497	unsigned long irq;
498	int retval;
499
500	retval = kstrtoul(attr->attr.name, 10, &irq);
501	if (retval)
502		return retval;
503
504	entry = irq_get_msi_desc(irq);
505	if (entry)
506		return sprintf(buf, "%s\n",
507				entry->msi_attrib.is_msix ? "msix" : "msi");
508
509	return -ENODEV;
510}
511
512static int populate_msi_sysfs(struct pci_dev *pdev)
513{
514	struct attribute **msi_attrs;
515	struct attribute *msi_attr;
516	struct device_attribute *msi_dev_attr;
517	struct attribute_group *msi_irq_group;
518	const struct attribute_group **msi_irq_groups;
519	struct msi_desc *entry;
520	int ret = -ENOMEM;
521	int num_msi = 0;
522	int count = 0;
523
524	/* Determine how many msi entries we have */
525	list_for_each_entry(entry, &pdev->msi_list, list)
526		++num_msi;
527	if (!num_msi)
528		return 0;
529
530	/* Dynamically create the MSI attributes for the PCI device */
531	msi_attrs = kzalloc(sizeof(void *) * (num_msi + 1), GFP_KERNEL);
532	if (!msi_attrs)
533		return -ENOMEM;
534	list_for_each_entry(entry, &pdev->msi_list, list) {
535		msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL);
536		if (!msi_dev_attr)
537			goto error_attrs;
538		msi_attrs[count] = &msi_dev_attr->attr;
539
540		sysfs_attr_init(&msi_dev_attr->attr);
541		msi_dev_attr->attr.name = kasprintf(GFP_KERNEL, "%d",
542						    entry->irq);
543		if (!msi_dev_attr->attr.name)
544			goto error_attrs;
545		msi_dev_attr->attr.mode = S_IRUGO;
546		msi_dev_attr->show = msi_mode_show;
547		++count;
548	}
549
550	msi_irq_group = kzalloc(sizeof(*msi_irq_group), GFP_KERNEL);
551	if (!msi_irq_group)
552		goto error_attrs;
553	msi_irq_group->name = "msi_irqs";
554	msi_irq_group->attrs = msi_attrs;
555
556	msi_irq_groups = kzalloc(sizeof(void *) * 2, GFP_KERNEL);
557	if (!msi_irq_groups)
558		goto error_irq_group;
559	msi_irq_groups[0] = msi_irq_group;
560
561	ret = sysfs_create_groups(&pdev->dev.kobj, msi_irq_groups);
562	if (ret)
563		goto error_irq_groups;
564	pdev->msi_irq_groups = msi_irq_groups;
565
566	return 0;
567
568error_irq_groups:
569	kfree(msi_irq_groups);
570error_irq_group:
571	kfree(msi_irq_group);
572error_attrs:
573	count = 0;
574	msi_attr = msi_attrs[count];
575	while (msi_attr) {
576		msi_dev_attr = container_of(msi_attr, struct device_attribute, attr);
577		kfree(msi_attr->name);
578		kfree(msi_dev_attr);
579		++count;
580		msi_attr = msi_attrs[count];
581	}
582	kfree(msi_attrs);
583	return ret;
584}
585
586static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec)
587{
588	u16 control;
589	struct msi_desc *entry;
590
591	/* MSI Entry Initialization */
592	entry = alloc_msi_entry(dev);
593	if (!entry)
594		return NULL;
595
596	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
597
598	entry->msi_attrib.is_msix	= 0;
599	entry->msi_attrib.is_64		= !!(control & PCI_MSI_FLAGS_64BIT);
600	entry->msi_attrib.entry_nr	= 0;
601	entry->msi_attrib.maskbit	= !!(control & PCI_MSI_FLAGS_MASKBIT);
602	entry->msi_attrib.default_irq	= dev->irq;	/* Save IOAPIC IRQ */
603	entry->msi_attrib.multi_cap	= (control & PCI_MSI_FLAGS_QMASK) >> 1;
604	entry->msi_attrib.multiple	= ilog2(__roundup_pow_of_two(nvec));
605	entry->nvec_used		= nvec;
606
607	if (control & PCI_MSI_FLAGS_64BIT)
608		entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64;
609	else
610		entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_32;
611
612	/* Save the initial mask status */
613	if (entry->msi_attrib.maskbit)
614		pci_read_config_dword(dev, entry->mask_pos, &entry->masked);
615
616	return entry;
617}
618
619static int msi_verify_entries(struct pci_dev *dev)
620{
621	struct msi_desc *entry;
622
623	list_for_each_entry(entry, &dev->msi_list, list) {
624		if (!dev->no_64bit_msi || !entry->msg.address_hi)
625			continue;
626		dev_err(&dev->dev, "Device has broken 64-bit MSI but arch"
627			" tried to assign one above 4G\n");
628		return -EIO;
629	}
630	return 0;
631}
632
633/**
634 * msi_capability_init - configure device's MSI capability structure
635 * @dev: pointer to the pci_dev data structure of MSI device function
636 * @nvec: number of interrupts to allocate
637 *
638 * Setup the MSI capability structure of the device with the requested
639 * number of interrupts.  A return value of zero indicates the successful
640 * setup of an entry with the new MSI irq.  A negative return value indicates
641 * an error, and a positive return value indicates the number of interrupts
642 * which could have been allocated.
643 */
644static int msi_capability_init(struct pci_dev *dev, int nvec)
645{
646	struct msi_desc *entry;
647	int ret;
648	unsigned mask;
649
650	msi_set_enable(dev, 0);	/* Disable MSI during set up */
651
652	entry = msi_setup_entry(dev, nvec);
653	if (!entry)
654		return -ENOMEM;
655
656	/* All MSIs are unmasked by default, Mask them all */
657	mask = msi_mask(entry->msi_attrib.multi_cap);
658	msi_mask_irq(entry, mask, mask);
659
660	list_add_tail(&entry->list, &dev->msi_list);
661
662	/* Configure MSI capability structure */
663	ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
664	if (ret) {
665		msi_mask_irq(entry, mask, ~mask);
666		free_msi_irqs(dev);
667		return ret;
668	}
669
670	ret = msi_verify_entries(dev);
671	if (ret) {
672		msi_mask_irq(entry, mask, ~mask);
673		free_msi_irqs(dev);
674		return ret;
675	}
676
677	ret = populate_msi_sysfs(dev);
678	if (ret) {
679		msi_mask_irq(entry, mask, ~mask);
680		free_msi_irqs(dev);
681		return ret;
682	}
683
684	/* Set MSI enabled bits	 */
685	pci_intx_for_msi(dev, 0);
686	msi_set_enable(dev, 1);
687	dev->msi_enabled = 1;
688
689	dev->irq = entry->irq;
690	return 0;
691}
692
693static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries)
694{
695	resource_size_t phys_addr;
696	u32 table_offset;
697	unsigned long flags;
698	u8 bir;
699
700	pci_read_config_dword(dev, dev->msix_cap + PCI_MSIX_TABLE,
701			      &table_offset);
702	bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR);
703	flags = pci_resource_flags(dev, bir);
704	if (!flags || (flags & IORESOURCE_UNSET))
705		return NULL;
706
707	table_offset &= PCI_MSIX_TABLE_OFFSET;
708	phys_addr = pci_resource_start(dev, bir) + table_offset;
709
710	return ioremap_nocache(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE);
711}
712
713static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
714			      struct msix_entry *entries, int nvec)
715{
716	struct msi_desc *entry;
717	int i;
718
719	for (i = 0; i < nvec; i++) {
720		entry = alloc_msi_entry(dev);
721		if (!entry) {
722			if (!i)
723				iounmap(base);
724			else
725				free_msi_irqs(dev);
726			/* No enough memory. Don't try again */
727			return -ENOMEM;
728		}
729
730		entry->msi_attrib.is_msix	= 1;
731		entry->msi_attrib.is_64		= 1;
732		entry->msi_attrib.entry_nr	= entries[i].entry;
733		entry->msi_attrib.default_irq	= dev->irq;
734		entry->mask_base		= base;
735		entry->nvec_used		= 1;
736
737		list_add_tail(&entry->list, &dev->msi_list);
738	}
739
740	return 0;
741}
742
743static void msix_program_entries(struct pci_dev *dev,
744				 struct msix_entry *entries)
745{
746	struct msi_desc *entry;
747	int i = 0;
748
749	list_for_each_entry(entry, &dev->msi_list, list) {
750		int offset = entries[i].entry * PCI_MSIX_ENTRY_SIZE +
751						PCI_MSIX_ENTRY_VECTOR_CTRL;
752
753		entries[i].vector = entry->irq;
754		entry->masked = readl(entry->mask_base + offset);
755		msix_mask_irq(entry, 1);
756		i++;
757	}
758}
759
760/**
761 * msix_capability_init - configure device's MSI-X capability
762 * @dev: pointer to the pci_dev data structure of MSI-X device function
763 * @entries: pointer to an array of struct msix_entry entries
764 * @nvec: number of @entries
765 *
766 * Setup the MSI-X capability structure of device function with a
767 * single MSI-X irq. A return of zero indicates the successful setup of
768 * requested MSI-X entries with allocated irqs or non-zero for otherwise.
769 **/
770static int msix_capability_init(struct pci_dev *dev,
771				struct msix_entry *entries, int nvec)
772{
773	int ret;
774	u16 control;
775	void __iomem *base;
776
777	/* Ensure MSI-X is disabled while it is set up */
778	msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
779
780	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
781	/* Request & Map MSI-X table region */
782	base = msix_map_region(dev, msix_table_size(control));
783	if (!base)
784		return -ENOMEM;
785
786	ret = msix_setup_entries(dev, base, entries, nvec);
787	if (ret)
788		return ret;
789
790	ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
791	if (ret)
792		goto out_avail;
793
794	/* Check if all MSI entries honor device restrictions */
795	ret = msi_verify_entries(dev);
796	if (ret)
797		goto out_free;
798
799	/*
800	 * Some devices require MSI-X to be enabled before we can touch the
801	 * MSI-X registers.  We need to mask all the vectors to prevent
802	 * interrupts coming in before they're fully set up.
803	 */
804	msix_clear_and_set_ctrl(dev, 0,
805				PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE);
806
807	msix_program_entries(dev, entries);
808
809	ret = populate_msi_sysfs(dev);
810	if (ret)
811		goto out_free;
812
813	/* Set MSI-X enabled bits and unmask the function */
814	pci_intx_for_msi(dev, 0);
815	dev->msix_enabled = 1;
816
817	msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
818
819	return 0;
820
821out_avail:
822	if (ret < 0) {
823		/*
824		 * If we had some success, report the number of irqs
825		 * we succeeded in setting up.
826		 */
827		struct msi_desc *entry;
828		int avail = 0;
829
830		list_for_each_entry(entry, &dev->msi_list, list) {
831			if (entry->irq != 0)
832				avail++;
833		}
834		if (avail != 0)
835			ret = avail;
836	}
837
838out_free:
839	free_msi_irqs(dev);
840
841	return ret;
842}
843
844/**
845 * pci_msi_supported - check whether MSI may be enabled on a device
846 * @dev: pointer to the pci_dev data structure of MSI device function
847 * @nvec: how many MSIs have been requested ?
848 *
849 * Look at global flags, the device itself, and its parent buses
850 * to determine if MSI/-X are supported for the device. If MSI/-X is
851 * supported return 1, else return 0.
852 **/
853static int pci_msi_supported(struct pci_dev *dev, int nvec)
854{
855	struct pci_bus *bus;
856
857	/* MSI must be globally enabled and supported by the device */
858	if (!pci_msi_enable)
859		return 0;
860
861	if (!dev || dev->no_msi || dev->current_state != PCI_D0)
862		return 0;
863
864	/*
865	 * You can't ask to have 0 or less MSIs configured.
866	 *  a) it's stupid ..
867	 *  b) the list manipulation code assumes nvec >= 1.
868	 */
869	if (nvec < 1)
870		return 0;
871
872	/*
873	 * Any bridge which does NOT route MSI transactions from its
874	 * secondary bus to its primary bus must set NO_MSI flag on
875	 * the secondary pci_bus.
876	 * We expect only arch-specific PCI host bus controller driver
877	 * or quirks for specific PCI bridges to be setting NO_MSI.
878	 */
879	for (bus = dev->bus; bus; bus = bus->parent)
880		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
881			return 0;
882
883	return 1;
884}
885
886/**
887 * pci_msi_vec_count - Return the number of MSI vectors a device can send
888 * @dev: device to report about
889 *
890 * This function returns the number of MSI vectors a device requested via
891 * Multiple Message Capable register. It returns a negative errno if the
892 * device is not capable sending MSI interrupts. Otherwise, the call succeeds
893 * and returns a power of two, up to a maximum of 2^5 (32), according to the
894 * MSI specification.
895 **/
896int pci_msi_vec_count(struct pci_dev *dev)
897{
898	int ret;
899	u16 msgctl;
900
901	if (!dev->msi_cap)
902		return -EINVAL;
903
904	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &msgctl);
905	ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
906
907	return ret;
908}
909EXPORT_SYMBOL(pci_msi_vec_count);
910
911void pci_msi_shutdown(struct pci_dev *dev)
912{
913	struct msi_desc *desc;
914	u32 mask;
915
916	if (!pci_msi_enable || !dev || !dev->msi_enabled)
917		return;
918
919	BUG_ON(list_empty(&dev->msi_list));
920	desc = list_first_entry(&dev->msi_list, struct msi_desc, list);
921
922	msi_set_enable(dev, 0);
923	pci_intx_for_msi(dev, 1);
924	dev->msi_enabled = 0;
925
926	/* Return the device with MSI unmasked as initial states */
927	mask = msi_mask(desc->msi_attrib.multi_cap);
928	/* Keep cached state to be restored */
929	__pci_msi_desc_mask_irq(desc, mask, ~mask);
930
931	/* Restore dev->irq to its default pin-assertion irq */
932	dev->irq = desc->msi_attrib.default_irq;
933}
934
935void pci_disable_msi(struct pci_dev *dev)
936{
937	if (!pci_msi_enable || !dev || !dev->msi_enabled)
938		return;
939
940	pci_msi_shutdown(dev);
941	free_msi_irqs(dev);
942}
943EXPORT_SYMBOL(pci_disable_msi);
944
945/**
946 * pci_msix_vec_count - return the number of device's MSI-X table entries
947 * @dev: pointer to the pci_dev data structure of MSI-X device function
948 * This function returns the number of device's MSI-X table entries and
949 * therefore the number of MSI-X vectors device is capable of sending.
950 * It returns a negative errno if the device is not capable of sending MSI-X
951 * interrupts.
952 **/
953int pci_msix_vec_count(struct pci_dev *dev)
954{
955	u16 control;
956
957	if (!dev->msix_cap)
958		return -EINVAL;
959
960	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
961	return msix_table_size(control);
962}
963EXPORT_SYMBOL(pci_msix_vec_count);
964
965/**
966 * pci_enable_msix - configure device's MSI-X capability structure
967 * @dev: pointer to the pci_dev data structure of MSI-X device function
968 * @entries: pointer to an array of MSI-X entries
969 * @nvec: number of MSI-X irqs requested for allocation by device driver
970 *
971 * Setup the MSI-X capability structure of device function with the number
972 * of requested irqs upon its software driver call to request for
973 * MSI-X mode enabled on its hardware device function. A return of zero
974 * indicates the successful configuration of MSI-X capability structure
975 * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
976 * Or a return of > 0 indicates that driver request is exceeding the number
977 * of irqs or MSI-X vectors available. Driver should use the returned value to
978 * re-send its request.
979 **/
980int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
981{
982	int nr_entries;
983	int i, j;
984
985	if (!pci_msi_supported(dev, nvec))
986		return -EINVAL;
987
988	if (!entries)
989		return -EINVAL;
990
991	nr_entries = pci_msix_vec_count(dev);
992	if (nr_entries < 0)
993		return nr_entries;
994	if (nvec > nr_entries)
995		return nr_entries;
996
997	/* Check for any invalid entries */
998	for (i = 0; i < nvec; i++) {
999		if (entries[i].entry >= nr_entries)
1000			return -EINVAL;		/* invalid entry */
1001		for (j = i + 1; j < nvec; j++) {
1002			if (entries[i].entry == entries[j].entry)
1003				return -EINVAL;	/* duplicate entry */
1004		}
1005	}
1006	WARN_ON(!!dev->msix_enabled);
1007
1008	/* Check whether driver already requested for MSI irq */
1009	if (dev->msi_enabled) {
1010		dev_info(&dev->dev, "can't enable MSI-X (MSI IRQ already assigned)\n");
1011		return -EINVAL;
1012	}
1013	return msix_capability_init(dev, entries, nvec);
1014}
1015EXPORT_SYMBOL(pci_enable_msix);
1016
1017void pci_msix_shutdown(struct pci_dev *dev)
1018{
1019	struct msi_desc *entry;
1020
1021	if (!pci_msi_enable || !dev || !dev->msix_enabled)
1022		return;
1023
1024	/* Return the device with MSI-X masked as initial states */
1025	list_for_each_entry(entry, &dev->msi_list, list) {
1026		/* Keep cached states to be restored */
1027		__pci_msix_desc_mask_irq(entry, 1);
1028	}
1029
1030	msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
1031	pci_intx_for_msi(dev, 1);
1032	dev->msix_enabled = 0;
1033}
1034
1035void pci_disable_msix(struct pci_dev *dev)
1036{
1037	if (!pci_msi_enable || !dev || !dev->msix_enabled)
1038		return;
1039
1040	pci_msix_shutdown(dev);
1041	free_msi_irqs(dev);
1042}
1043EXPORT_SYMBOL(pci_disable_msix);
1044
1045void pci_no_msi(void)
1046{
1047	pci_msi_enable = 0;
1048}
1049
1050/**
1051 * pci_msi_enabled - is MSI enabled?
1052 *
1053 * Returns true if MSI has not been disabled by the command-line option
1054 * pci=nomsi.
1055 **/
1056int pci_msi_enabled(void)
1057{
1058	return pci_msi_enable;
1059}
1060EXPORT_SYMBOL(pci_msi_enabled);
1061
1062void pci_msi_init_pci_dev(struct pci_dev *dev)
1063{
1064	INIT_LIST_HEAD(&dev->msi_list);
1065
1066	/* Disable the msi hardware to avoid screaming interrupts
1067	 * during boot.  This is the power on reset default so
1068	 * usually this should be a noop.
1069	 */
1070	dev->msi_cap = pci_find_capability(dev, PCI_CAP_ID_MSI);
1071	if (dev->msi_cap)
1072		msi_set_enable(dev, 0);
1073
1074	dev->msix_cap = pci_find_capability(dev, PCI_CAP_ID_MSIX);
1075	if (dev->msix_cap)
1076		msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
1077}
1078
1079/**
1080 * pci_enable_msi_range - configure device's MSI capability structure
1081 * @dev: device to configure
1082 * @minvec: minimal number of interrupts to configure
1083 * @maxvec: maximum number of interrupts to configure
1084 *
1085 * This function tries to allocate a maximum possible number of interrupts in a
1086 * range between @minvec and @maxvec. It returns a negative errno if an error
1087 * occurs. If it succeeds, it returns the actual number of interrupts allocated
1088 * and updates the @dev's irq member to the lowest new interrupt number;
1089 * the other interrupt numbers allocated to this device are consecutive.
1090 **/
1091int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec)
1092{
1093	int nvec;
1094	int rc;
1095
1096	if (!pci_msi_supported(dev, minvec))
1097		return -EINVAL;
1098
1099	WARN_ON(!!dev->msi_enabled);
1100
1101	/* Check whether driver already requested MSI-X irqs */
1102	if (dev->msix_enabled) {
1103		dev_info(&dev->dev,
1104			 "can't enable MSI (MSI-X already enabled)\n");
1105		return -EINVAL;
1106	}
1107
1108	if (maxvec < minvec)
1109		return -ERANGE;
1110
1111	nvec = pci_msi_vec_count(dev);
1112	if (nvec < 0)
1113		return nvec;
1114	else if (nvec < minvec)
1115		return -EINVAL;
1116	else if (nvec > maxvec)
1117		nvec = maxvec;
1118
1119	do {
1120		rc = msi_capability_init(dev, nvec);
1121		if (rc < 0) {
1122			return rc;
1123		} else if (rc > 0) {
1124			if (rc < minvec)
1125				return -ENOSPC;
1126			nvec = rc;
1127		}
1128	} while (rc);
1129
1130	return nvec;
1131}
1132EXPORT_SYMBOL(pci_enable_msi_range);
1133
1134/**
1135 * pci_enable_msix_range - configure device's MSI-X capability structure
1136 * @dev: pointer to the pci_dev data structure of MSI-X device function
1137 * @entries: pointer to an array of MSI-X entries
1138 * @minvec: minimum number of MSI-X irqs requested
1139 * @maxvec: maximum number of MSI-X irqs requested
1140 *
1141 * Setup the MSI-X capability structure of device function with a maximum
1142 * possible number of interrupts in the range between @minvec and @maxvec
1143 * upon its software driver call to request for MSI-X mode enabled on its
1144 * hardware device function. It returns a negative errno if an error occurs.
1145 * If it succeeds, it returns the actual number of interrupts allocated and
1146 * indicates the successful configuration of MSI-X capability structure
1147 * with new allocated MSI-X interrupts.
1148 **/
1149int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
1150			       int minvec, int maxvec)
1151{
1152	int nvec = maxvec;
1153	int rc;
1154
1155	if (maxvec < minvec)
1156		return -ERANGE;
1157
1158	do {
1159		rc = pci_enable_msix(dev, entries, nvec);
1160		if (rc < 0) {
1161			return rc;
1162		} else if (rc > 0) {
1163			if (rc < minvec)
1164				return -ENOSPC;
1165			nvec = rc;
1166		}
1167	} while (rc);
1168
1169	return nvec;
1170}
1171EXPORT_SYMBOL(pci_enable_msix_range);
1172
1173#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
1174/**
1175 * pci_msi_domain_write_msg - Helper to write MSI message to PCI config space
1176 * @irq_data:	Pointer to interrupt data of the MSI interrupt
1177 * @msg:	Pointer to the message
1178 */
1179void pci_msi_domain_write_msg(struct irq_data *irq_data, struct msi_msg *msg)
1180{
1181	struct msi_desc *desc = irq_data->msi_desc;
1182
1183	/*
1184	 * For MSI-X desc->irq is always equal to irq_data->irq. For
1185	 * MSI only the first interrupt of MULTI MSI passes the test.
1186	 */
1187	if (desc->irq == irq_data->irq)
1188		__pci_write_msi_msg(desc, msg);
1189}
1190
1191/**
1192 * pci_msi_domain_calc_hwirq - Generate a unique ID for an MSI source
1193 * @dev:	Pointer to the PCI device
1194 * @desc:	Pointer to the msi descriptor
1195 *
1196 * The ID number is only used within the irqdomain.
1197 */
1198irq_hw_number_t pci_msi_domain_calc_hwirq(struct pci_dev *dev,
1199					  struct msi_desc *desc)
1200{
1201	return (irq_hw_number_t)desc->msi_attrib.entry_nr |
1202		PCI_DEVID(dev->bus->number, dev->devfn) << 11 |
1203		(pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 27;
1204}
1205
1206static inline bool pci_msi_desc_is_multi_msi(struct msi_desc *desc)
1207{
1208	return !desc->msi_attrib.is_msix && desc->nvec_used > 1;
1209}
1210
1211/**
1212 * pci_msi_domain_check_cap - Verify that @domain supports the capabilities for @dev
1213 * @domain:	The interrupt domain to check
1214 * @info:	The domain info for verification
1215 * @dev:	The device to check
1216 *
1217 * Returns:
1218 *  0 if the functionality is supported
1219 *  1 if Multi MSI is requested, but the domain does not support it
1220 *  -ENOTSUPP otherwise
1221 */
1222int pci_msi_domain_check_cap(struct irq_domain *domain,
1223			     struct msi_domain_info *info, struct device *dev)
1224{
1225	struct msi_desc *desc = first_pci_msi_entry(to_pci_dev(dev));
1226
1227	/* Special handling to support pci_enable_msi_range() */
1228	if (pci_msi_desc_is_multi_msi(desc) &&
1229	    !(info->flags & MSI_FLAG_MULTI_PCI_MSI))
1230		return 1;
1231	else if (desc->msi_attrib.is_msix && !(info->flags & MSI_FLAG_PCI_MSIX))
1232		return -ENOTSUPP;
1233
1234	return 0;
1235}
1236
1237static int pci_msi_domain_handle_error(struct irq_domain *domain,
1238				       struct msi_desc *desc, int error)
1239{
1240	/* Special handling to support pci_enable_msi_range() */
1241	if (pci_msi_desc_is_multi_msi(desc) && error == -ENOSPC)
1242		return 1;
1243
1244	return error;
1245}
1246
1247#ifdef GENERIC_MSI_DOMAIN_OPS
1248static void pci_msi_domain_set_desc(msi_alloc_info_t *arg,
1249				    struct msi_desc *desc)
1250{
1251	arg->desc = desc;
1252	arg->hwirq = pci_msi_domain_calc_hwirq(msi_desc_to_pci_dev(desc),
1253					       desc);
1254}
1255#else
1256#define pci_msi_domain_set_desc		NULL
1257#endif
1258
1259static struct msi_domain_ops pci_msi_domain_ops_default = {
1260	.set_desc	= pci_msi_domain_set_desc,
1261	.msi_check	= pci_msi_domain_check_cap,
1262	.handle_error	= pci_msi_domain_handle_error,
1263};
1264
1265static void pci_msi_domain_update_dom_ops(struct msi_domain_info *info)
1266{
1267	struct msi_domain_ops *ops = info->ops;
1268
1269	if (ops == NULL) {
1270		info->ops = &pci_msi_domain_ops_default;
1271	} else {
1272		if (ops->set_desc == NULL)
1273			ops->set_desc = pci_msi_domain_set_desc;
1274		if (ops->msi_check == NULL)
1275			ops->msi_check = pci_msi_domain_check_cap;
1276		if (ops->handle_error == NULL)
1277			ops->handle_error = pci_msi_domain_handle_error;
1278	}
1279}
1280
1281static void pci_msi_domain_update_chip_ops(struct msi_domain_info *info)
1282{
1283	struct irq_chip *chip = info->chip;
1284
1285	BUG_ON(!chip);
1286	if (!chip->irq_write_msi_msg)
1287		chip->irq_write_msi_msg = pci_msi_domain_write_msg;
1288}
1289
1290/**
1291 * pci_msi_create_irq_domain - Creat a MSI interrupt domain
1292 * @node:	Optional device-tree node of the interrupt controller
1293 * @info:	MSI domain info
1294 * @parent:	Parent irq domain
1295 *
1296 * Updates the domain and chip ops and creates a MSI interrupt domain.
1297 *
1298 * Returns:
1299 * A domain pointer or NULL in case of failure.
1300 */
1301struct irq_domain *pci_msi_create_irq_domain(struct device_node *node,
1302					     struct msi_domain_info *info,
1303					     struct irq_domain *parent)
1304{
1305	if (info->flags & MSI_FLAG_USE_DEF_DOM_OPS)
1306		pci_msi_domain_update_dom_ops(info);
1307	if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
1308		pci_msi_domain_update_chip_ops(info);
1309
1310	return msi_create_irq_domain(node, info, parent);
1311}
1312
1313/**
1314 * pci_msi_domain_alloc_irqs - Allocate interrupts for @dev in @domain
1315 * @domain:	The interrupt domain to allocate from
1316 * @dev:	The device for which to allocate
1317 * @nvec:	The number of interrupts to allocate
1318 * @type:	Unused to allow simpler migration from the arch_XXX interfaces
1319 *
1320 * Returns:
1321 * A virtual interrupt number or an error code in case of failure
1322 */
1323int pci_msi_domain_alloc_irqs(struct irq_domain *domain, struct pci_dev *dev,
1324			      int nvec, int type)
1325{
1326	return msi_domain_alloc_irqs(domain, &dev->dev, nvec);
1327}
1328
1329/**
1330 * pci_msi_domain_free_irqs - Free interrupts for @dev in @domain
1331 * @domain:	The interrupt domain
1332 * @dev:	The device for which to free interrupts
1333 */
1334void pci_msi_domain_free_irqs(struct irq_domain *domain, struct pci_dev *dev)
1335{
1336	msi_domain_free_irqs(domain, &dev->dev);
1337}
1338
1339/**
1340 * pci_msi_create_default_irq_domain - Create a default MSI interrupt domain
1341 * @node:	Optional device-tree node of the interrupt controller
1342 * @info:	MSI domain info
1343 * @parent:	Parent irq domain
1344 *
1345 * Returns: A domain pointer or NULL in case of failure. If successful
1346 * the default PCI/MSI irqdomain pointer is updated.
1347 */
1348struct irq_domain *pci_msi_create_default_irq_domain(struct device_node *node,
1349		struct msi_domain_info *info, struct irq_domain *parent)
1350{
1351	struct irq_domain *domain;
1352
1353	mutex_lock(&pci_msi_domain_lock);
1354	if (pci_msi_default_domain) {
1355		pr_err("PCI: default irq domain for PCI MSI has already been created.\n");
1356		domain = NULL;
1357	} else {
1358		domain = pci_msi_create_irq_domain(node, info, parent);
1359		pci_msi_default_domain = domain;
1360	}
1361	mutex_unlock(&pci_msi_domain_lock);
1362
1363	return domain;
1364}
1365#endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */
1366