1/*
2 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <jroedel@suse.de>
4 *         Leo Duran <leo.duran@amd.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
18 */
19
20#include <linux/pci.h>
21#include <linux/acpi.h>
22#include <linux/list.h>
23#include <linux/slab.h>
24#include <linux/syscore_ops.h>
25#include <linux/interrupt.h>
26#include <linux/msi.h>
27#include <linux/amd-iommu.h>
28#include <linux/export.h>
29#include <linux/iommu.h>
30#include <asm/pci-direct.h>
31#include <asm/iommu.h>
32#include <asm/gart.h>
33#include <asm/x86_init.h>
34#include <asm/iommu_table.h>
35#include <asm/io_apic.h>
36#include <asm/irq_remapping.h>
37
38#include "amd_iommu_proto.h"
39#include "amd_iommu_types.h"
40#include "irq_remapping.h"
41
42/*
43 * definitions for the ACPI scanning code
44 */
45#define IVRS_HEADER_LENGTH 48
46
47#define ACPI_IVHD_TYPE                  0x10
48#define ACPI_IVMD_TYPE_ALL              0x20
49#define ACPI_IVMD_TYPE                  0x21
50#define ACPI_IVMD_TYPE_RANGE            0x22
51
52#define IVHD_DEV_ALL                    0x01
53#define IVHD_DEV_SELECT                 0x02
54#define IVHD_DEV_SELECT_RANGE_START     0x03
55#define IVHD_DEV_RANGE_END              0x04
56#define IVHD_DEV_ALIAS                  0x42
57#define IVHD_DEV_ALIAS_RANGE            0x43
58#define IVHD_DEV_EXT_SELECT             0x46
59#define IVHD_DEV_EXT_SELECT_RANGE       0x47
60#define IVHD_DEV_SPECIAL		0x48
61
62#define IVHD_SPECIAL_IOAPIC		1
63#define IVHD_SPECIAL_HPET		2
64
65#define IVHD_FLAG_HT_TUN_EN_MASK        0x01
66#define IVHD_FLAG_PASSPW_EN_MASK        0x02
67#define IVHD_FLAG_RESPASSPW_EN_MASK     0x04
68#define IVHD_FLAG_ISOC_EN_MASK          0x08
69
70#define IVMD_FLAG_EXCL_RANGE            0x08
71#define IVMD_FLAG_UNITY_MAP             0x01
72
73#define ACPI_DEVFLAG_INITPASS           0x01
74#define ACPI_DEVFLAG_EXTINT             0x02
75#define ACPI_DEVFLAG_NMI                0x04
76#define ACPI_DEVFLAG_SYSMGT1            0x10
77#define ACPI_DEVFLAG_SYSMGT2            0x20
78#define ACPI_DEVFLAG_LINT0              0x40
79#define ACPI_DEVFLAG_LINT1              0x80
80#define ACPI_DEVFLAG_ATSDIS             0x10000000
81
82/*
83 * ACPI table definitions
84 *
85 * These data structures are laid over the table to parse the important values
86 * out of it.
87 */
88
89/*
90 * structure describing one IOMMU in the ACPI table. Typically followed by one
91 * or more ivhd_entrys.
92 */
93struct ivhd_header {
94	u8 type;
95	u8 flags;
96	u16 length;
97	u16 devid;
98	u16 cap_ptr;
99	u64 mmio_phys;
100	u16 pci_seg;
101	u16 info;
102	u32 efr;
103} __attribute__((packed));
104
105/*
106 * A device entry describing which devices a specific IOMMU translates and
107 * which requestor ids they use.
108 */
109struct ivhd_entry {
110	u8 type;
111	u16 devid;
112	u8 flags;
113	u32 ext;
114} __attribute__((packed));
115
116/*
117 * An AMD IOMMU memory definition structure. It defines things like exclusion
118 * ranges for devices and regions that should be unity mapped.
119 */
120struct ivmd_header {
121	u8 type;
122	u8 flags;
123	u16 length;
124	u16 devid;
125	u16 aux;
126	u64 resv;
127	u64 range_start;
128	u64 range_length;
129} __attribute__((packed));
130
131bool amd_iommu_dump;
132bool amd_iommu_irq_remap __read_mostly;
133
134static bool amd_iommu_detected;
135static bool __initdata amd_iommu_disabled;
136
137u16 amd_iommu_last_bdf;			/* largest PCI device id we have
138					   to handle */
139LIST_HEAD(amd_iommu_unity_map);		/* a list of required unity mappings
140					   we find in ACPI */
141u32 amd_iommu_unmap_flush;		/* if true, flush on every unmap */
142
143LIST_HEAD(amd_iommu_list);		/* list of all AMD IOMMUs in the
144					   system */
145
146/* Array to assign indices to IOMMUs*/
147struct amd_iommu *amd_iommus[MAX_IOMMUS];
148int amd_iommus_present;
149
150/* IOMMUs have a non-present cache? */
151bool amd_iommu_np_cache __read_mostly;
152bool amd_iommu_iotlb_sup __read_mostly = true;
153
154u32 amd_iommu_max_pasid __read_mostly = ~0;
155
156bool amd_iommu_v2_present __read_mostly;
157bool amd_iommu_pc_present __read_mostly;
158
159bool amd_iommu_force_isolation __read_mostly;
160
161/*
162 * List of protection domains - used during resume
163 */
164LIST_HEAD(amd_iommu_pd_list);
165spinlock_t amd_iommu_pd_lock;
166
167/*
168 * Pointer to the device table which is shared by all AMD IOMMUs
169 * it is indexed by the PCI device id or the HT unit id and contains
170 * information about the domain the device belongs to as well as the
171 * page table root pointer.
172 */
173struct dev_table_entry *amd_iommu_dev_table;
174
175/*
176 * The alias table is a driver specific data structure which contains the
177 * mappings of the PCI device ids to the actual requestor ids on the IOMMU.
178 * More than one device can share the same requestor id.
179 */
180u16 *amd_iommu_alias_table;
181
182/*
183 * The rlookup table is used to find the IOMMU which is responsible
184 * for a specific device. It is also indexed by the PCI device id.
185 */
186struct amd_iommu **amd_iommu_rlookup_table;
187
188/*
189 * This table is used to find the irq remapping table for a given device id
190 * quickly.
191 */
192struct irq_remap_table **irq_lookup_table;
193
194/*
195 * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
196 * to know which ones are already in use.
197 */
198unsigned long *amd_iommu_pd_alloc_bitmap;
199
200static u32 dev_table_size;	/* size of the device table */
201static u32 alias_table_size;	/* size of the alias table */
202static u32 rlookup_table_size;	/* size if the rlookup table */
203
204enum iommu_init_state {
205	IOMMU_START_STATE,
206	IOMMU_IVRS_DETECTED,
207	IOMMU_ACPI_FINISHED,
208	IOMMU_ENABLED,
209	IOMMU_PCI_INIT,
210	IOMMU_INTERRUPTS_EN,
211	IOMMU_DMA_OPS,
212	IOMMU_INITIALIZED,
213	IOMMU_NOT_FOUND,
214	IOMMU_INIT_ERROR,
215};
216
217/* Early ioapic and hpet maps from kernel command line */
218#define EARLY_MAP_SIZE		4
219static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE];
220static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE];
221static int __initdata early_ioapic_map_size;
222static int __initdata early_hpet_map_size;
223static bool __initdata cmdline_maps;
224
225static enum iommu_init_state init_state = IOMMU_START_STATE;
226
227static int amd_iommu_enable_interrupts(void);
228static int __init iommu_go_to_state(enum iommu_init_state state);
229
230static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu,
231				    u8 bank, u8 cntr, u8 fxn,
232				    u64 *value, bool is_write);
233
234static inline void update_last_devid(u16 devid)
235{
236	if (devid > amd_iommu_last_bdf)
237		amd_iommu_last_bdf = devid;
238}
239
240static inline unsigned long tbl_size(int entry_size)
241{
242	unsigned shift = PAGE_SHIFT +
243			 get_order(((int)amd_iommu_last_bdf + 1) * entry_size);
244
245	return 1UL << shift;
246}
247
248/* Access to l1 and l2 indexed register spaces */
249
250static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
251{
252	u32 val;
253
254	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
255	pci_read_config_dword(iommu->dev, 0xfc, &val);
256	return val;
257}
258
259static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val)
260{
261	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31));
262	pci_write_config_dword(iommu->dev, 0xfc, val);
263	pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
264}
265
266static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address)
267{
268	u32 val;
269
270	pci_write_config_dword(iommu->dev, 0xf0, address);
271	pci_read_config_dword(iommu->dev, 0xf4, &val);
272	return val;
273}
274
275static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
276{
277	pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8));
278	pci_write_config_dword(iommu->dev, 0xf4, val);
279}
280
281/****************************************************************************
282 *
283 * AMD IOMMU MMIO register space handling functions
284 *
285 * These functions are used to program the IOMMU device registers in
286 * MMIO space required for that driver.
287 *
288 ****************************************************************************/
289
290/*
291 * This function set the exclusion range in the IOMMU. DMA accesses to the
292 * exclusion range are passed through untranslated
293 */
294static void iommu_set_exclusion_range(struct amd_iommu *iommu)
295{
296	u64 start = iommu->exclusion_start & PAGE_MASK;
297	u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
298	u64 entry;
299
300	if (!iommu->exclusion_start)
301		return;
302
303	entry = start | MMIO_EXCL_ENABLE_MASK;
304	memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
305			&entry, sizeof(entry));
306
307	entry = limit;
308	memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
309			&entry, sizeof(entry));
310}
311
312/* Programs the physical address of the device table into the IOMMU hardware */
313static void iommu_set_device_table(struct amd_iommu *iommu)
314{
315	u64 entry;
316
317	BUG_ON(iommu->mmio_base == NULL);
318
319	entry = virt_to_phys(amd_iommu_dev_table);
320	entry |= (dev_table_size >> 12) - 1;
321	memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
322			&entry, sizeof(entry));
323}
324
325/* Generic functions to enable/disable certain features of the IOMMU. */
326static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
327{
328	u32 ctrl;
329
330	ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
331	ctrl |= (1 << bit);
332	writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
333}
334
335static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
336{
337	u32 ctrl;
338
339	ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
340	ctrl &= ~(1 << bit);
341	writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
342}
343
344static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout)
345{
346	u32 ctrl;
347
348	ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
349	ctrl &= ~CTRL_INV_TO_MASK;
350	ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK;
351	writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
352}
353
354/* Function to enable the hardware */
355static void iommu_enable(struct amd_iommu *iommu)
356{
357	iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
358}
359
360static void iommu_disable(struct amd_iommu *iommu)
361{
362	/* Disable command buffer */
363	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
364
365	/* Disable event logging and event interrupts */
366	iommu_feature_disable(iommu, CONTROL_EVT_INT_EN);
367	iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
368
369	/* Disable IOMMU hardware itself */
370	iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
371}
372
373/*
374 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
375 * the system has one.
376 */
377static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end)
378{
379	if (!request_mem_region(address, end, "amd_iommu")) {
380		pr_err("AMD-Vi: Can not reserve memory region %llx-%llx for mmio\n",
381			address, end);
382		pr_err("AMD-Vi: This is a BIOS bug. Please contact your hardware vendor\n");
383		return NULL;
384	}
385
386	return (u8 __iomem *)ioremap_nocache(address, end);
387}
388
389static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
390{
391	if (iommu->mmio_base)
392		iounmap(iommu->mmio_base);
393	release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end);
394}
395
396/****************************************************************************
397 *
398 * The functions below belong to the first pass of AMD IOMMU ACPI table
399 * parsing. In this pass we try to find out the highest device id this
400 * code has to handle. Upon this information the size of the shared data
401 * structures is determined later.
402 *
403 ****************************************************************************/
404
405/*
406 * This function calculates the length of a given IVHD entry
407 */
408static inline int ivhd_entry_length(u8 *ivhd)
409{
410	return 0x04 << (*ivhd >> 6);
411}
412
413/*
414 * This function reads the last device id the IOMMU has to handle from the PCI
415 * capability header for this IOMMU
416 */
417static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr)
418{
419	u32 cap;
420
421	cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
422	update_last_devid(PCI_DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
423
424	return 0;
425}
426
427/*
428 * After reading the highest device id from the IOMMU PCI capability header
429 * this function looks if there is a higher device id defined in the ACPI table
430 */
431static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
432{
433	u8 *p = (void *)h, *end = (void *)h;
434	struct ivhd_entry *dev;
435
436	p += sizeof(*h);
437	end += h->length;
438
439	find_last_devid_on_pci(PCI_BUS_NUM(h->devid),
440			PCI_SLOT(h->devid),
441			PCI_FUNC(h->devid),
442			h->cap_ptr);
443
444	while (p < end) {
445		dev = (struct ivhd_entry *)p;
446		switch (dev->type) {
447		case IVHD_DEV_SELECT:
448		case IVHD_DEV_RANGE_END:
449		case IVHD_DEV_ALIAS:
450		case IVHD_DEV_EXT_SELECT:
451			/* all the above subfield types refer to device ids */
452			update_last_devid(dev->devid);
453			break;
454		default:
455			break;
456		}
457		p += ivhd_entry_length(p);
458	}
459
460	WARN_ON(p != end);
461
462	return 0;
463}
464
465/*
466 * Iterate over all IVHD entries in the ACPI table and find the highest device
467 * id which we need to handle. This is the first of three functions which parse
468 * the ACPI table. So we check the checksum here.
469 */
470static int __init find_last_devid_acpi(struct acpi_table_header *table)
471{
472	int i;
473	u8 checksum = 0, *p = (u8 *)table, *end = (u8 *)table;
474	struct ivhd_header *h;
475
476	/*
477	 * Validate checksum here so we don't need to do it when
478	 * we actually parse the table
479	 */
480	for (i = 0; i < table->length; ++i)
481		checksum += p[i];
482	if (checksum != 0)
483		/* ACPI table corrupt */
484		return -ENODEV;
485
486	p += IVRS_HEADER_LENGTH;
487
488	end += table->length;
489	while (p < end) {
490		h = (struct ivhd_header *)p;
491		switch (h->type) {
492		case ACPI_IVHD_TYPE:
493			find_last_devid_from_ivhd(h);
494			break;
495		default:
496			break;
497		}
498		p += h->length;
499	}
500	WARN_ON(p != end);
501
502	return 0;
503}
504
505/****************************************************************************
506 *
507 * The following functions belong to the code path which parses the ACPI table
508 * the second time. In this ACPI parsing iteration we allocate IOMMU specific
509 * data structures, initialize the device/alias/rlookup table and also
510 * basically initialize the hardware.
511 *
512 ****************************************************************************/
513
514/*
515 * Allocates the command buffer. This buffer is per AMD IOMMU. We can
516 * write commands to that buffer later and the IOMMU will execute them
517 * asynchronously
518 */
519static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
520{
521	u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
522			get_order(CMD_BUFFER_SIZE));
523
524	if (cmd_buf == NULL)
525		return NULL;
526
527	iommu->cmd_buf_size = CMD_BUFFER_SIZE | CMD_BUFFER_UNINITIALIZED;
528
529	return cmd_buf;
530}
531
532/*
533 * This function resets the command buffer if the IOMMU stopped fetching
534 * commands from it.
535 */
536void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
537{
538	iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
539
540	writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
541	writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
542
543	iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
544}
545
546/*
547 * This function writes the command buffer address to the hardware and
548 * enables it.
549 */
550static void iommu_enable_command_buffer(struct amd_iommu *iommu)
551{
552	u64 entry;
553
554	BUG_ON(iommu->cmd_buf == NULL);
555
556	entry = (u64)virt_to_phys(iommu->cmd_buf);
557	entry |= MMIO_CMD_SIZE_512;
558
559	memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
560		    &entry, sizeof(entry));
561
562	amd_iommu_reset_cmd_buffer(iommu);
563	iommu->cmd_buf_size &= ~(CMD_BUFFER_UNINITIALIZED);
564}
565
566static void __init free_command_buffer(struct amd_iommu *iommu)
567{
568	free_pages((unsigned long)iommu->cmd_buf,
569		   get_order(iommu->cmd_buf_size & ~(CMD_BUFFER_UNINITIALIZED)));
570}
571
572/* allocates the memory where the IOMMU will log its events to */
573static u8 * __init alloc_event_buffer(struct amd_iommu *iommu)
574{
575	iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
576						get_order(EVT_BUFFER_SIZE));
577
578	if (iommu->evt_buf == NULL)
579		return NULL;
580
581	iommu->evt_buf_size = EVT_BUFFER_SIZE;
582
583	return iommu->evt_buf;
584}
585
586static void iommu_enable_event_buffer(struct amd_iommu *iommu)
587{
588	u64 entry;
589
590	BUG_ON(iommu->evt_buf == NULL);
591
592	entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
593
594	memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
595		    &entry, sizeof(entry));
596
597	/* set head and tail to zero manually */
598	writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
599	writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
600
601	iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
602}
603
604static void __init free_event_buffer(struct amd_iommu *iommu)
605{
606	free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
607}
608
609/* allocates the memory where the IOMMU will log its events to */
610static u8 * __init alloc_ppr_log(struct amd_iommu *iommu)
611{
612	iommu->ppr_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
613						get_order(PPR_LOG_SIZE));
614
615	if (iommu->ppr_log == NULL)
616		return NULL;
617
618	return iommu->ppr_log;
619}
620
621static void iommu_enable_ppr_log(struct amd_iommu *iommu)
622{
623	u64 entry;
624
625	if (iommu->ppr_log == NULL)
626		return;
627
628	entry = (u64)virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512;
629
630	memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET,
631		    &entry, sizeof(entry));
632
633	/* set head and tail to zero manually */
634	writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
635	writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
636
637	iommu_feature_enable(iommu, CONTROL_PPFLOG_EN);
638	iommu_feature_enable(iommu, CONTROL_PPR_EN);
639}
640
641static void __init free_ppr_log(struct amd_iommu *iommu)
642{
643	if (iommu->ppr_log == NULL)
644		return;
645
646	free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE));
647}
648
649static void iommu_enable_gt(struct amd_iommu *iommu)
650{
651	if (!iommu_feature(iommu, FEATURE_GT))
652		return;
653
654	iommu_feature_enable(iommu, CONTROL_GT_EN);
655}
656
657/* sets a specific bit in the device table entry. */
658static void set_dev_entry_bit(u16 devid, u8 bit)
659{
660	int i = (bit >> 6) & 0x03;
661	int _bit = bit & 0x3f;
662
663	amd_iommu_dev_table[devid].data[i] |= (1UL << _bit);
664}
665
666static int get_dev_entry_bit(u16 devid, u8 bit)
667{
668	int i = (bit >> 6) & 0x03;
669	int _bit = bit & 0x3f;
670
671	return (amd_iommu_dev_table[devid].data[i] & (1UL << _bit)) >> _bit;
672}
673
674
675void amd_iommu_apply_erratum_63(u16 devid)
676{
677	int sysmgt;
678
679	sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) |
680		 (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1);
681
682	if (sysmgt == 0x01)
683		set_dev_entry_bit(devid, DEV_ENTRY_IW);
684}
685
686/* Writes the specific IOMMU for a device into the rlookup table */
687static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
688{
689	amd_iommu_rlookup_table[devid] = iommu;
690}
691
692/*
693 * This function takes the device specific flags read from the ACPI
694 * table and sets up the device table entry with that information
695 */
696static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
697					   u16 devid, u32 flags, u32 ext_flags)
698{
699	if (flags & ACPI_DEVFLAG_INITPASS)
700		set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS);
701	if (flags & ACPI_DEVFLAG_EXTINT)
702		set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS);
703	if (flags & ACPI_DEVFLAG_NMI)
704		set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS);
705	if (flags & ACPI_DEVFLAG_SYSMGT1)
706		set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1);
707	if (flags & ACPI_DEVFLAG_SYSMGT2)
708		set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2);
709	if (flags & ACPI_DEVFLAG_LINT0)
710		set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS);
711	if (flags & ACPI_DEVFLAG_LINT1)
712		set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS);
713
714	amd_iommu_apply_erratum_63(devid);
715
716	set_iommu_for_device(iommu, devid);
717}
718
719static int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line)
720{
721	struct devid_map *entry;
722	struct list_head *list;
723
724	if (type == IVHD_SPECIAL_IOAPIC)
725		list = &ioapic_map;
726	else if (type == IVHD_SPECIAL_HPET)
727		list = &hpet_map;
728	else
729		return -EINVAL;
730
731	list_for_each_entry(entry, list, list) {
732		if (!(entry->id == id && entry->cmd_line))
733			continue;
734
735		pr_info("AMD-Vi: Command-line override present for %s id %d - ignoring\n",
736			type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id);
737
738		*devid = entry->devid;
739
740		return 0;
741	}
742
743	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
744	if (!entry)
745		return -ENOMEM;
746
747	entry->id	= id;
748	entry->devid	= *devid;
749	entry->cmd_line	= cmd_line;
750
751	list_add_tail(&entry->list, list);
752
753	return 0;
754}
755
756static int __init add_early_maps(void)
757{
758	int i, ret;
759
760	for (i = 0; i < early_ioapic_map_size; ++i) {
761		ret = add_special_device(IVHD_SPECIAL_IOAPIC,
762					 early_ioapic_map[i].id,
763					 &early_ioapic_map[i].devid,
764					 early_ioapic_map[i].cmd_line);
765		if (ret)
766			return ret;
767	}
768
769	for (i = 0; i < early_hpet_map_size; ++i) {
770		ret = add_special_device(IVHD_SPECIAL_HPET,
771					 early_hpet_map[i].id,
772					 &early_hpet_map[i].devid,
773					 early_hpet_map[i].cmd_line);
774		if (ret)
775			return ret;
776	}
777
778	return 0;
779}
780
781/*
782 * Reads the device exclusion range from ACPI and initializes the IOMMU with
783 * it
784 */
785static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
786{
787	struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
788
789	if (!(m->flags & IVMD_FLAG_EXCL_RANGE))
790		return;
791
792	if (iommu) {
793		/*
794		 * We only can configure exclusion ranges per IOMMU, not
795		 * per device. But we can enable the exclusion range per
796		 * device. This is done here
797		 */
798		set_dev_entry_bit(devid, DEV_ENTRY_EX);
799		iommu->exclusion_start = m->range_start;
800		iommu->exclusion_length = m->range_length;
801	}
802}
803
804/*
805 * Takes a pointer to an AMD IOMMU entry in the ACPI table and
806 * initializes the hardware and our data structures with it.
807 */
808static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
809					struct ivhd_header *h)
810{
811	u8 *p = (u8 *)h;
812	u8 *end = p, flags = 0;
813	u16 devid = 0, devid_start = 0, devid_to = 0;
814	u32 dev_i, ext_flags = 0;
815	bool alias = false;
816	struct ivhd_entry *e;
817	int ret;
818
819
820	ret = add_early_maps();
821	if (ret)
822		return ret;
823
824	/*
825	 * First save the recommended feature enable bits from ACPI
826	 */
827	iommu->acpi_flags = h->flags;
828
829	/*
830	 * Done. Now parse the device entries
831	 */
832	p += sizeof(struct ivhd_header);
833	end += h->length;
834
835
836	while (p < end) {
837		e = (struct ivhd_entry *)p;
838		switch (e->type) {
839		case IVHD_DEV_ALL:
840
841			DUMP_printk("  DEV_ALL\t\t\t first devid: %02x:%02x.%x"
842				    " last device %02x:%02x.%x flags: %02x\n",
843				    PCI_BUS_NUM(iommu->first_device),
844				    PCI_SLOT(iommu->first_device),
845				    PCI_FUNC(iommu->first_device),
846				    PCI_BUS_NUM(iommu->last_device),
847				    PCI_SLOT(iommu->last_device),
848				    PCI_FUNC(iommu->last_device),
849				    e->flags);
850
851			for (dev_i = iommu->first_device;
852					dev_i <= iommu->last_device; ++dev_i)
853				set_dev_entry_from_acpi(iommu, dev_i,
854							e->flags, 0);
855			break;
856		case IVHD_DEV_SELECT:
857
858			DUMP_printk("  DEV_SELECT\t\t\t devid: %02x:%02x.%x "
859				    "flags: %02x\n",
860				    PCI_BUS_NUM(e->devid),
861				    PCI_SLOT(e->devid),
862				    PCI_FUNC(e->devid),
863				    e->flags);
864
865			devid = e->devid;
866			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
867			break;
868		case IVHD_DEV_SELECT_RANGE_START:
869
870			DUMP_printk("  DEV_SELECT_RANGE_START\t "
871				    "devid: %02x:%02x.%x flags: %02x\n",
872				    PCI_BUS_NUM(e->devid),
873				    PCI_SLOT(e->devid),
874				    PCI_FUNC(e->devid),
875				    e->flags);
876
877			devid_start = e->devid;
878			flags = e->flags;
879			ext_flags = 0;
880			alias = false;
881			break;
882		case IVHD_DEV_ALIAS:
883
884			DUMP_printk("  DEV_ALIAS\t\t\t devid: %02x:%02x.%x "
885				    "flags: %02x devid_to: %02x:%02x.%x\n",
886				    PCI_BUS_NUM(e->devid),
887				    PCI_SLOT(e->devid),
888				    PCI_FUNC(e->devid),
889				    e->flags,
890				    PCI_BUS_NUM(e->ext >> 8),
891				    PCI_SLOT(e->ext >> 8),
892				    PCI_FUNC(e->ext >> 8));
893
894			devid = e->devid;
895			devid_to = e->ext >> 8;
896			set_dev_entry_from_acpi(iommu, devid   , e->flags, 0);
897			set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
898			amd_iommu_alias_table[devid] = devid_to;
899			break;
900		case IVHD_DEV_ALIAS_RANGE:
901
902			DUMP_printk("  DEV_ALIAS_RANGE\t\t "
903				    "devid: %02x:%02x.%x flags: %02x "
904				    "devid_to: %02x:%02x.%x\n",
905				    PCI_BUS_NUM(e->devid),
906				    PCI_SLOT(e->devid),
907				    PCI_FUNC(e->devid),
908				    e->flags,
909				    PCI_BUS_NUM(e->ext >> 8),
910				    PCI_SLOT(e->ext >> 8),
911				    PCI_FUNC(e->ext >> 8));
912
913			devid_start = e->devid;
914			flags = e->flags;
915			devid_to = e->ext >> 8;
916			ext_flags = 0;
917			alias = true;
918			break;
919		case IVHD_DEV_EXT_SELECT:
920
921			DUMP_printk("  DEV_EXT_SELECT\t\t devid: %02x:%02x.%x "
922				    "flags: %02x ext: %08x\n",
923				    PCI_BUS_NUM(e->devid),
924				    PCI_SLOT(e->devid),
925				    PCI_FUNC(e->devid),
926				    e->flags, e->ext);
927
928			devid = e->devid;
929			set_dev_entry_from_acpi(iommu, devid, e->flags,
930						e->ext);
931			break;
932		case IVHD_DEV_EXT_SELECT_RANGE:
933
934			DUMP_printk("  DEV_EXT_SELECT_RANGE\t devid: "
935				    "%02x:%02x.%x flags: %02x ext: %08x\n",
936				    PCI_BUS_NUM(e->devid),
937				    PCI_SLOT(e->devid),
938				    PCI_FUNC(e->devid),
939				    e->flags, e->ext);
940
941			devid_start = e->devid;
942			flags = e->flags;
943			ext_flags = e->ext;
944			alias = false;
945			break;
946		case IVHD_DEV_RANGE_END:
947
948			DUMP_printk("  DEV_RANGE_END\t\t devid: %02x:%02x.%x\n",
949				    PCI_BUS_NUM(e->devid),
950				    PCI_SLOT(e->devid),
951				    PCI_FUNC(e->devid));
952
953			devid = e->devid;
954			for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
955				if (alias) {
956					amd_iommu_alias_table[dev_i] = devid_to;
957					set_dev_entry_from_acpi(iommu,
958						devid_to, flags, ext_flags);
959				}
960				set_dev_entry_from_acpi(iommu, dev_i,
961							flags, ext_flags);
962			}
963			break;
964		case IVHD_DEV_SPECIAL: {
965			u8 handle, type;
966			const char *var;
967			u16 devid;
968			int ret;
969
970			handle = e->ext & 0xff;
971			devid  = (e->ext >>  8) & 0xffff;
972			type   = (e->ext >> 24) & 0xff;
973
974			if (type == IVHD_SPECIAL_IOAPIC)
975				var = "IOAPIC";
976			else if (type == IVHD_SPECIAL_HPET)
977				var = "HPET";
978			else
979				var = "UNKNOWN";
980
981			DUMP_printk("  DEV_SPECIAL(%s[%d])\t\tdevid: %02x:%02x.%x\n",
982				    var, (int)handle,
983				    PCI_BUS_NUM(devid),
984				    PCI_SLOT(devid),
985				    PCI_FUNC(devid));
986
987			ret = add_special_device(type, handle, &devid, false);
988			if (ret)
989				return ret;
990
991			/*
992			 * add_special_device might update the devid in case a
993			 * command-line override is present. So call
994			 * set_dev_entry_from_acpi after add_special_device.
995			 */
996			set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
997
998			break;
999		}
1000		default:
1001			break;
1002		}
1003
1004		p += ivhd_entry_length(p);
1005	}
1006
1007	return 0;
1008}
1009
1010/* Initializes the device->iommu mapping for the driver */
1011static int __init init_iommu_devices(struct amd_iommu *iommu)
1012{
1013	u32 i;
1014
1015	for (i = iommu->first_device; i <= iommu->last_device; ++i)
1016		set_iommu_for_device(iommu, i);
1017
1018	return 0;
1019}
1020
1021static void __init free_iommu_one(struct amd_iommu *iommu)
1022{
1023	free_command_buffer(iommu);
1024	free_event_buffer(iommu);
1025	free_ppr_log(iommu);
1026	iommu_unmap_mmio_space(iommu);
1027}
1028
1029static void __init free_iommu_all(void)
1030{
1031	struct amd_iommu *iommu, *next;
1032
1033	for_each_iommu_safe(iommu, next) {
1034		list_del(&iommu->list);
1035		free_iommu_one(iommu);
1036		kfree(iommu);
1037	}
1038}
1039
1040/*
1041 * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations)
1042 * Workaround:
1043 *     BIOS should disable L2B micellaneous clock gating by setting
1044 *     L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b
1045 */
1046static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu)
1047{
1048	u32 value;
1049
1050	if ((boot_cpu_data.x86 != 0x15) ||
1051	    (boot_cpu_data.x86_model < 0x10) ||
1052	    (boot_cpu_data.x86_model > 0x1f))
1053		return;
1054
1055	pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1056	pci_read_config_dword(iommu->dev, 0xf4, &value);
1057
1058	if (value & BIT(2))
1059		return;
1060
1061	/* Select NB indirect register 0x90 and enable writing */
1062	pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8));
1063
1064	pci_write_config_dword(iommu->dev, 0xf4, value | 0x4);
1065	pr_info("AMD-Vi: Applying erratum 746 workaround for IOMMU at %s\n",
1066		dev_name(&iommu->dev->dev));
1067
1068	/* Clear the enable writing bit */
1069	pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1070}
1071
1072/*
1073 * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission)
1074 * Workaround:
1075 *     BIOS should enable ATS write permission check by setting
1076 *     L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b
1077 */
1078static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu)
1079{
1080	u32 value;
1081
1082	if ((boot_cpu_data.x86 != 0x15) ||
1083	    (boot_cpu_data.x86_model < 0x30) ||
1084	    (boot_cpu_data.x86_model > 0x3f))
1085		return;
1086
1087	/* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */
1088	value = iommu_read_l2(iommu, 0x47);
1089
1090	if (value & BIT(0))
1091		return;
1092
1093	/* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */
1094	iommu_write_l2(iommu, 0x47, value | BIT(0));
1095
1096	pr_info("AMD-Vi: Applying ATS write check workaround for IOMMU at %s\n",
1097		dev_name(&iommu->dev->dev));
1098}
1099
1100/*
1101 * This function clues the initialization function for one IOMMU
1102 * together and also allocates the command buffer and programs the
1103 * hardware. It does NOT enable the IOMMU. This is done afterwards.
1104 */
1105static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
1106{
1107	int ret;
1108
1109	spin_lock_init(&iommu->lock);
1110
1111	/* Add IOMMU to internal data structures */
1112	list_add_tail(&iommu->list, &amd_iommu_list);
1113	iommu->index             = amd_iommus_present++;
1114
1115	if (unlikely(iommu->index >= MAX_IOMMUS)) {
1116		WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n");
1117		return -ENOSYS;
1118	}
1119
1120	/* Index is fine - add IOMMU to the array */
1121	amd_iommus[iommu->index] = iommu;
1122
1123	/*
1124	 * Copy data from ACPI table entry to the iommu struct
1125	 */
1126	iommu->devid   = h->devid;
1127	iommu->cap_ptr = h->cap_ptr;
1128	iommu->pci_seg = h->pci_seg;
1129	iommu->mmio_phys = h->mmio_phys;
1130
1131	/* Check if IVHD EFR contains proper max banks/counters */
1132	if ((h->efr != 0) &&
1133	    ((h->efr & (0xF << 13)) != 0) &&
1134	    ((h->efr & (0x3F << 17)) != 0)) {
1135		iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1136	} else {
1137		iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1138	}
1139
1140	iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys,
1141						iommu->mmio_phys_end);
1142	if (!iommu->mmio_base)
1143		return -ENOMEM;
1144
1145	iommu->cmd_buf = alloc_command_buffer(iommu);
1146	if (!iommu->cmd_buf)
1147		return -ENOMEM;
1148
1149	iommu->evt_buf = alloc_event_buffer(iommu);
1150	if (!iommu->evt_buf)
1151		return -ENOMEM;
1152
1153	iommu->int_enabled = false;
1154
1155	ret = init_iommu_from_acpi(iommu, h);
1156	if (ret)
1157		return ret;
1158
1159	/*
1160	 * Make sure IOMMU is not considered to translate itself. The IVRS
1161	 * table tells us so, but this is a lie!
1162	 */
1163	amd_iommu_rlookup_table[iommu->devid] = NULL;
1164
1165	init_iommu_devices(iommu);
1166
1167	return 0;
1168}
1169
1170/*
1171 * Iterates over all IOMMU entries in the ACPI table, allocates the
1172 * IOMMU structure and initializes it with init_iommu_one()
1173 */
1174static int __init init_iommu_all(struct acpi_table_header *table)
1175{
1176	u8 *p = (u8 *)table, *end = (u8 *)table;
1177	struct ivhd_header *h;
1178	struct amd_iommu *iommu;
1179	int ret;
1180
1181	end += table->length;
1182	p += IVRS_HEADER_LENGTH;
1183
1184	while (p < end) {
1185		h = (struct ivhd_header *)p;
1186		switch (*p) {
1187		case ACPI_IVHD_TYPE:
1188
1189			DUMP_printk("device: %02x:%02x.%01x cap: %04x "
1190				    "seg: %d flags: %01x info %04x\n",
1191				    PCI_BUS_NUM(h->devid), PCI_SLOT(h->devid),
1192				    PCI_FUNC(h->devid), h->cap_ptr,
1193				    h->pci_seg, h->flags, h->info);
1194			DUMP_printk("       mmio-addr: %016llx\n",
1195				    h->mmio_phys);
1196
1197			iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
1198			if (iommu == NULL)
1199				return -ENOMEM;
1200
1201			ret = init_iommu_one(iommu, h);
1202			if (ret)
1203				return ret;
1204			break;
1205		default:
1206			break;
1207		}
1208		p += h->length;
1209
1210	}
1211	WARN_ON(p != end);
1212
1213	return 0;
1214}
1215
1216
1217static void init_iommu_perf_ctr(struct amd_iommu *iommu)
1218{
1219	u64 val = 0xabcd, val2 = 0;
1220
1221	if (!iommu_feature(iommu, FEATURE_PC))
1222		return;
1223
1224	amd_iommu_pc_present = true;
1225
1226	/* Check if the performance counters can be written to */
1227	if ((0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val, true)) ||
1228	    (0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val2, false)) ||
1229	    (val != val2)) {
1230		pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n");
1231		amd_iommu_pc_present = false;
1232		return;
1233	}
1234
1235	pr_info("AMD-Vi: IOMMU performance counters supported\n");
1236
1237	val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET);
1238	iommu->max_banks = (u8) ((val >> 12) & 0x3f);
1239	iommu->max_counters = (u8) ((val >> 7) & 0xf);
1240}
1241
1242static ssize_t amd_iommu_show_cap(struct device *dev,
1243				  struct device_attribute *attr,
1244				  char *buf)
1245{
1246	struct amd_iommu *iommu = dev_get_drvdata(dev);
1247	return sprintf(buf, "%x\n", iommu->cap);
1248}
1249static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL);
1250
1251static ssize_t amd_iommu_show_features(struct device *dev,
1252				       struct device_attribute *attr,
1253				       char *buf)
1254{
1255	struct amd_iommu *iommu = dev_get_drvdata(dev);
1256	return sprintf(buf, "%llx\n", iommu->features);
1257}
1258static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL);
1259
1260static struct attribute *amd_iommu_attrs[] = {
1261	&dev_attr_cap.attr,
1262	&dev_attr_features.attr,
1263	NULL,
1264};
1265
1266static struct attribute_group amd_iommu_group = {
1267	.name = "amd-iommu",
1268	.attrs = amd_iommu_attrs,
1269};
1270
1271static const struct attribute_group *amd_iommu_groups[] = {
1272	&amd_iommu_group,
1273	NULL,
1274};
1275
1276static int iommu_init_pci(struct amd_iommu *iommu)
1277{
1278	int cap_ptr = iommu->cap_ptr;
1279	u32 range, misc, low, high;
1280
1281	iommu->dev = pci_get_bus_and_slot(PCI_BUS_NUM(iommu->devid),
1282					  iommu->devid & 0xff);
1283	if (!iommu->dev)
1284		return -ENODEV;
1285
1286	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
1287			      &iommu->cap);
1288	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET,
1289			      &range);
1290	pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET,
1291			      &misc);
1292
1293	iommu->first_device = PCI_DEVID(MMIO_GET_BUS(range),
1294					 MMIO_GET_FD(range));
1295	iommu->last_device = PCI_DEVID(MMIO_GET_BUS(range),
1296					MMIO_GET_LD(range));
1297
1298	if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
1299		amd_iommu_iotlb_sup = false;
1300
1301	/* read extended feature bits */
1302	low  = readl(iommu->mmio_base + MMIO_EXT_FEATURES);
1303	high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4);
1304
1305	iommu->features = ((u64)high << 32) | low;
1306
1307	if (iommu_feature(iommu, FEATURE_GT)) {
1308		int glxval;
1309		u32 max_pasid;
1310		u64 pasmax;
1311
1312		pasmax = iommu->features & FEATURE_PASID_MASK;
1313		pasmax >>= FEATURE_PASID_SHIFT;
1314		max_pasid  = (1 << (pasmax + 1)) - 1;
1315
1316		amd_iommu_max_pasid = min(amd_iommu_max_pasid, max_pasid);
1317
1318		BUG_ON(amd_iommu_max_pasid & ~PASID_MASK);
1319
1320		glxval   = iommu->features & FEATURE_GLXVAL_MASK;
1321		glxval >>= FEATURE_GLXVAL_SHIFT;
1322
1323		if (amd_iommu_max_glx_val == -1)
1324			amd_iommu_max_glx_val = glxval;
1325		else
1326			amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
1327	}
1328
1329	if (iommu_feature(iommu, FEATURE_GT) &&
1330	    iommu_feature(iommu, FEATURE_PPR)) {
1331		iommu->is_iommu_v2   = true;
1332		amd_iommu_v2_present = true;
1333	}
1334
1335	if (iommu_feature(iommu, FEATURE_PPR)) {
1336		iommu->ppr_log = alloc_ppr_log(iommu);
1337		if (!iommu->ppr_log)
1338			return -ENOMEM;
1339	}
1340
1341	if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
1342		amd_iommu_np_cache = true;
1343
1344	init_iommu_perf_ctr(iommu);
1345
1346	if (is_rd890_iommu(iommu->dev)) {
1347		int i, j;
1348
1349		iommu->root_pdev = pci_get_bus_and_slot(iommu->dev->bus->number,
1350				PCI_DEVFN(0, 0));
1351
1352		/*
1353		 * Some rd890 systems may not be fully reconfigured by the
1354		 * BIOS, so it's necessary for us to store this information so
1355		 * it can be reprogrammed on resume
1356		 */
1357		pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
1358				&iommu->stored_addr_lo);
1359		pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
1360				&iommu->stored_addr_hi);
1361
1362		/* Low bit locks writes to configuration space */
1363		iommu->stored_addr_lo &= ~1;
1364
1365		for (i = 0; i < 6; i++)
1366			for (j = 0; j < 0x12; j++)
1367				iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
1368
1369		for (i = 0; i < 0x83; i++)
1370			iommu->stored_l2[i] = iommu_read_l2(iommu, i);
1371	}
1372
1373	amd_iommu_erratum_746_workaround(iommu);
1374	amd_iommu_ats_write_check_workaround(iommu);
1375
1376	iommu->iommu_dev = iommu_device_create(&iommu->dev->dev, iommu,
1377					       amd_iommu_groups, "ivhd%d",
1378					       iommu->index);
1379
1380	return pci_enable_device(iommu->dev);
1381}
1382
1383static void print_iommu_info(void)
1384{
1385	static const char * const feat_str[] = {
1386		"PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
1387		"IA", "GA", "HE", "PC"
1388	};
1389	struct amd_iommu *iommu;
1390
1391	for_each_iommu(iommu) {
1392		int i;
1393
1394		pr_info("AMD-Vi: Found IOMMU at %s cap 0x%hx\n",
1395			dev_name(&iommu->dev->dev), iommu->cap_ptr);
1396
1397		if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
1398			pr_info("AMD-Vi:  Extended features: ");
1399			for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
1400				if (iommu_feature(iommu, (1ULL << i)))
1401					pr_cont(" %s", feat_str[i]);
1402			}
1403			pr_cont("\n");
1404		}
1405	}
1406	if (irq_remapping_enabled)
1407		pr_info("AMD-Vi: Interrupt remapping enabled\n");
1408}
1409
1410static int __init amd_iommu_init_pci(void)
1411{
1412	struct amd_iommu *iommu;
1413	int ret = 0;
1414
1415	for_each_iommu(iommu) {
1416		ret = iommu_init_pci(iommu);
1417		if (ret)
1418			break;
1419	}
1420
1421	ret = amd_iommu_init_devices();
1422
1423	print_iommu_info();
1424
1425	return ret;
1426}
1427
1428/****************************************************************************
1429 *
1430 * The following functions initialize the MSI interrupts for all IOMMUs
1431 * in the system. It's a bit challenging because there could be multiple
1432 * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per
1433 * pci_dev.
1434 *
1435 ****************************************************************************/
1436
1437static int iommu_setup_msi(struct amd_iommu *iommu)
1438{
1439	int r;
1440
1441	r = pci_enable_msi(iommu->dev);
1442	if (r)
1443		return r;
1444
1445	r = request_threaded_irq(iommu->dev->irq,
1446				 amd_iommu_int_handler,
1447				 amd_iommu_int_thread,
1448				 0, "AMD-Vi",
1449				 iommu);
1450
1451	if (r) {
1452		pci_disable_msi(iommu->dev);
1453		return r;
1454	}
1455
1456	iommu->int_enabled = true;
1457
1458	return 0;
1459}
1460
1461static int iommu_init_msi(struct amd_iommu *iommu)
1462{
1463	int ret;
1464
1465	if (iommu->int_enabled)
1466		goto enable_faults;
1467
1468	if (iommu->dev->msi_cap)
1469		ret = iommu_setup_msi(iommu);
1470	else
1471		ret = -ENODEV;
1472
1473	if (ret)
1474		return ret;
1475
1476enable_faults:
1477	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
1478
1479	if (iommu->ppr_log != NULL)
1480		iommu_feature_enable(iommu, CONTROL_PPFINT_EN);
1481
1482	return 0;
1483}
1484
1485/****************************************************************************
1486 *
1487 * The next functions belong to the third pass of parsing the ACPI
1488 * table. In this last pass the memory mapping requirements are
1489 * gathered (like exclusion and unity mapping ranges).
1490 *
1491 ****************************************************************************/
1492
1493static void __init free_unity_maps(void)
1494{
1495	struct unity_map_entry *entry, *next;
1496
1497	list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) {
1498		list_del(&entry->list);
1499		kfree(entry);
1500	}
1501}
1502
1503/* called when we find an exclusion range definition in ACPI */
1504static int __init init_exclusion_range(struct ivmd_header *m)
1505{
1506	int i;
1507
1508	switch (m->type) {
1509	case ACPI_IVMD_TYPE:
1510		set_device_exclusion_range(m->devid, m);
1511		break;
1512	case ACPI_IVMD_TYPE_ALL:
1513		for (i = 0; i <= amd_iommu_last_bdf; ++i)
1514			set_device_exclusion_range(i, m);
1515		break;
1516	case ACPI_IVMD_TYPE_RANGE:
1517		for (i = m->devid; i <= m->aux; ++i)
1518			set_device_exclusion_range(i, m);
1519		break;
1520	default:
1521		break;
1522	}
1523
1524	return 0;
1525}
1526
1527/* called for unity map ACPI definition */
1528static int __init init_unity_map_range(struct ivmd_header *m)
1529{
1530	struct unity_map_entry *e = NULL;
1531	char *s;
1532
1533	e = kzalloc(sizeof(*e), GFP_KERNEL);
1534	if (e == NULL)
1535		return -ENOMEM;
1536
1537	switch (m->type) {
1538	default:
1539		kfree(e);
1540		return 0;
1541	case ACPI_IVMD_TYPE:
1542		s = "IVMD_TYPEi\t\t\t";
1543		e->devid_start = e->devid_end = m->devid;
1544		break;
1545	case ACPI_IVMD_TYPE_ALL:
1546		s = "IVMD_TYPE_ALL\t\t";
1547		e->devid_start = 0;
1548		e->devid_end = amd_iommu_last_bdf;
1549		break;
1550	case ACPI_IVMD_TYPE_RANGE:
1551		s = "IVMD_TYPE_RANGE\t\t";
1552		e->devid_start = m->devid;
1553		e->devid_end = m->aux;
1554		break;
1555	}
1556	e->address_start = PAGE_ALIGN(m->range_start);
1557	e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
1558	e->prot = m->flags >> 1;
1559
1560	DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x"
1561		    " range_start: %016llx range_end: %016llx flags: %x\n", s,
1562		    PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start),
1563		    PCI_FUNC(e->devid_start), PCI_BUS_NUM(e->devid_end),
1564		    PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
1565		    e->address_start, e->address_end, m->flags);
1566
1567	list_add_tail(&e->list, &amd_iommu_unity_map);
1568
1569	return 0;
1570}
1571
1572/* iterates over all memory definitions we find in the ACPI table */
1573static int __init init_memory_definitions(struct acpi_table_header *table)
1574{
1575	u8 *p = (u8 *)table, *end = (u8 *)table;
1576	struct ivmd_header *m;
1577
1578	end += table->length;
1579	p += IVRS_HEADER_LENGTH;
1580
1581	while (p < end) {
1582		m = (struct ivmd_header *)p;
1583		if (m->flags & IVMD_FLAG_EXCL_RANGE)
1584			init_exclusion_range(m);
1585		else if (m->flags & IVMD_FLAG_UNITY_MAP)
1586			init_unity_map_range(m);
1587
1588		p += m->length;
1589	}
1590
1591	return 0;
1592}
1593
1594/*
1595 * Init the device table to not allow DMA access for devices and
1596 * suppress all page faults
1597 */
1598static void init_device_table_dma(void)
1599{
1600	u32 devid;
1601
1602	for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
1603		set_dev_entry_bit(devid, DEV_ENTRY_VALID);
1604		set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
1605	}
1606}
1607
1608static void __init uninit_device_table_dma(void)
1609{
1610	u32 devid;
1611
1612	for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
1613		amd_iommu_dev_table[devid].data[0] = 0ULL;
1614		amd_iommu_dev_table[devid].data[1] = 0ULL;
1615	}
1616}
1617
1618static void init_device_table(void)
1619{
1620	u32 devid;
1621
1622	if (!amd_iommu_irq_remap)
1623		return;
1624
1625	for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)
1626		set_dev_entry_bit(devid, DEV_ENTRY_IRQ_TBL_EN);
1627}
1628
1629static void iommu_init_flags(struct amd_iommu *iommu)
1630{
1631	iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ?
1632		iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
1633		iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
1634
1635	iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ?
1636		iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
1637		iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
1638
1639	iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
1640		iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
1641		iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
1642
1643	iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ?
1644		iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
1645		iommu_feature_disable(iommu, CONTROL_ISOC_EN);
1646
1647	/*
1648	 * make IOMMU memory accesses cache coherent
1649	 */
1650	iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
1651
1652	/* Set IOTLB invalidation timeout to 1s */
1653	iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S);
1654}
1655
1656static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
1657{
1658	int i, j;
1659	u32 ioc_feature_control;
1660	struct pci_dev *pdev = iommu->root_pdev;
1661
1662	/* RD890 BIOSes may not have completely reconfigured the iommu */
1663	if (!is_rd890_iommu(iommu->dev) || !pdev)
1664		return;
1665
1666	/*
1667	 * First, we need to ensure that the iommu is enabled. This is
1668	 * controlled by a register in the northbridge
1669	 */
1670
1671	/* Select Northbridge indirect register 0x75 and enable writing */
1672	pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
1673	pci_read_config_dword(pdev, 0x64, &ioc_feature_control);
1674
1675	/* Enable the iommu */
1676	if (!(ioc_feature_control & 0x1))
1677		pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
1678
1679	/* Restore the iommu BAR */
1680	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
1681			       iommu->stored_addr_lo);
1682	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
1683			       iommu->stored_addr_hi);
1684
1685	/* Restore the l1 indirect regs for each of the 6 l1s */
1686	for (i = 0; i < 6; i++)
1687		for (j = 0; j < 0x12; j++)
1688			iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
1689
1690	/* Restore the l2 indirect regs */
1691	for (i = 0; i < 0x83; i++)
1692		iommu_write_l2(iommu, i, iommu->stored_l2[i]);
1693
1694	/* Lock PCI setup registers */
1695	pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
1696			       iommu->stored_addr_lo | 1);
1697}
1698
1699/*
1700 * This function finally enables all IOMMUs found in the system after
1701 * they have been initialized
1702 */
1703static void early_enable_iommus(void)
1704{
1705	struct amd_iommu *iommu;
1706
1707	for_each_iommu(iommu) {
1708		iommu_disable(iommu);
1709		iommu_init_flags(iommu);
1710		iommu_set_device_table(iommu);
1711		iommu_enable_command_buffer(iommu);
1712		iommu_enable_event_buffer(iommu);
1713		iommu_set_exclusion_range(iommu);
1714		iommu_enable(iommu);
1715		iommu_flush_all_caches(iommu);
1716	}
1717}
1718
1719static void enable_iommus_v2(void)
1720{
1721	struct amd_iommu *iommu;
1722
1723	for_each_iommu(iommu) {
1724		iommu_enable_ppr_log(iommu);
1725		iommu_enable_gt(iommu);
1726	}
1727}
1728
1729static void enable_iommus(void)
1730{
1731	early_enable_iommus();
1732
1733	enable_iommus_v2();
1734}
1735
1736static void disable_iommus(void)
1737{
1738	struct amd_iommu *iommu;
1739
1740	for_each_iommu(iommu)
1741		iommu_disable(iommu);
1742}
1743
1744/*
1745 * Suspend/Resume support
1746 * disable suspend until real resume implemented
1747 */
1748
1749static void amd_iommu_resume(void)
1750{
1751	struct amd_iommu *iommu;
1752
1753	for_each_iommu(iommu)
1754		iommu_apply_resume_quirks(iommu);
1755
1756	/* re-load the hardware */
1757	enable_iommus();
1758
1759	amd_iommu_enable_interrupts();
1760}
1761
1762static int amd_iommu_suspend(void)
1763{
1764	/* disable IOMMUs to go out of the way for BIOS */
1765	disable_iommus();
1766
1767	return 0;
1768}
1769
1770static struct syscore_ops amd_iommu_syscore_ops = {
1771	.suspend = amd_iommu_suspend,
1772	.resume = amd_iommu_resume,
1773};
1774
1775static void __init free_on_init_error(void)
1776{
1777	free_pages((unsigned long)irq_lookup_table,
1778		   get_order(rlookup_table_size));
1779
1780	if (amd_iommu_irq_cache) {
1781		kmem_cache_destroy(amd_iommu_irq_cache);
1782		amd_iommu_irq_cache = NULL;
1783
1784	}
1785
1786	free_pages((unsigned long)amd_iommu_rlookup_table,
1787		   get_order(rlookup_table_size));
1788
1789	free_pages((unsigned long)amd_iommu_alias_table,
1790		   get_order(alias_table_size));
1791
1792	free_pages((unsigned long)amd_iommu_dev_table,
1793		   get_order(dev_table_size));
1794
1795	free_iommu_all();
1796
1797#ifdef CONFIG_GART_IOMMU
1798	/*
1799	 * We failed to initialize the AMD IOMMU - try fallback to GART
1800	 * if possible.
1801	 */
1802	gart_iommu_init();
1803
1804#endif
1805}
1806
1807/* SB IOAPIC is always on this device in AMD systems */
1808#define IOAPIC_SB_DEVID		((0x00 << 8) | PCI_DEVFN(0x14, 0))
1809
1810static bool __init check_ioapic_information(void)
1811{
1812	const char *fw_bug = FW_BUG;
1813	bool ret, has_sb_ioapic;
1814	int idx;
1815
1816	has_sb_ioapic = false;
1817	ret           = false;
1818
1819	/*
1820	 * If we have map overrides on the kernel command line the
1821	 * messages in this function might not describe firmware bugs
1822	 * anymore - so be careful
1823	 */
1824	if (cmdline_maps)
1825		fw_bug = "";
1826
1827	for (idx = 0; idx < nr_ioapics; idx++) {
1828		int devid, id = mpc_ioapic_id(idx);
1829
1830		devid = get_ioapic_devid(id);
1831		if (devid < 0) {
1832			pr_err("%sAMD-Vi: IOAPIC[%d] not in IVRS table\n",
1833				fw_bug, id);
1834			ret = false;
1835		} else if (devid == IOAPIC_SB_DEVID) {
1836			has_sb_ioapic = true;
1837			ret           = true;
1838		}
1839	}
1840
1841	if (!has_sb_ioapic) {
1842		/*
1843		 * We expect the SB IOAPIC to be listed in the IVRS
1844		 * table. The system timer is connected to the SB IOAPIC
1845		 * and if we don't have it in the list the system will
1846		 * panic at boot time.  This situation usually happens
1847		 * when the BIOS is buggy and provides us the wrong
1848		 * device id for the IOAPIC in the system.
1849		 */
1850		pr_err("%sAMD-Vi: No southbridge IOAPIC found\n", fw_bug);
1851	}
1852
1853	if (!ret)
1854		pr_err("AMD-Vi: Disabling interrupt remapping\n");
1855
1856	return ret;
1857}
1858
1859static void __init free_dma_resources(void)
1860{
1861	amd_iommu_uninit_devices();
1862
1863	free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
1864		   get_order(MAX_DOMAIN_ID/8));
1865
1866	free_unity_maps();
1867}
1868
1869/*
1870 * This is the hardware init function for AMD IOMMU in the system.
1871 * This function is called either from amd_iommu_init or from the interrupt
1872 * remapping setup code.
1873 *
1874 * This function basically parses the ACPI table for AMD IOMMU (IVRS)
1875 * three times:
1876 *
1877 *	1 pass) Find the highest PCI device id the driver has to handle.
1878 *		Upon this information the size of the data structures is
1879 *		determined that needs to be allocated.
1880 *
1881 *	2 pass) Initialize the data structures just allocated with the
1882 *		information in the ACPI table about available AMD IOMMUs
1883 *		in the system. It also maps the PCI devices in the
1884 *		system to specific IOMMUs
1885 *
1886 *	3 pass) After the basic data structures are allocated and
1887 *		initialized we update them with information about memory
1888 *		remapping requirements parsed out of the ACPI table in
1889 *		this last pass.
1890 *
1891 * After everything is set up the IOMMUs are enabled and the necessary
1892 * hotplug and suspend notifiers are registered.
1893 */
1894static int __init early_amd_iommu_init(void)
1895{
1896	struct acpi_table_header *ivrs_base;
1897	acpi_size ivrs_size;
1898	acpi_status status;
1899	int i, ret = 0;
1900
1901	if (!amd_iommu_detected)
1902		return -ENODEV;
1903
1904	status = acpi_get_table_with_size("IVRS", 0, &ivrs_base, &ivrs_size);
1905	if (status == AE_NOT_FOUND)
1906		return -ENODEV;
1907	else if (ACPI_FAILURE(status)) {
1908		const char *err = acpi_format_exception(status);
1909		pr_err("AMD-Vi: IVRS table error: %s\n", err);
1910		return -EINVAL;
1911	}
1912
1913	/*
1914	 * First parse ACPI tables to find the largest Bus/Dev/Func
1915	 * we need to handle. Upon this information the shared data
1916	 * structures for the IOMMUs in the system will be allocated
1917	 */
1918	ret = find_last_devid_acpi(ivrs_base);
1919	if (ret)
1920		goto out;
1921
1922	dev_table_size     = tbl_size(DEV_TABLE_ENTRY_SIZE);
1923	alias_table_size   = tbl_size(ALIAS_TABLE_ENTRY_SIZE);
1924	rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE);
1925
1926	/* Device table - directly used by all IOMMUs */
1927	ret = -ENOMEM;
1928	amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
1929				      get_order(dev_table_size));
1930	if (amd_iommu_dev_table == NULL)
1931		goto out;
1932
1933	/*
1934	 * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the
1935	 * IOMMU see for that device
1936	 */
1937	amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL,
1938			get_order(alias_table_size));
1939	if (amd_iommu_alias_table == NULL)
1940		goto out;
1941
1942	/* IOMMU rlookup table - find the IOMMU for a specific device */
1943	amd_iommu_rlookup_table = (void *)__get_free_pages(
1944			GFP_KERNEL | __GFP_ZERO,
1945			get_order(rlookup_table_size));
1946	if (amd_iommu_rlookup_table == NULL)
1947		goto out;
1948
1949	amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
1950					    GFP_KERNEL | __GFP_ZERO,
1951					    get_order(MAX_DOMAIN_ID/8));
1952	if (amd_iommu_pd_alloc_bitmap == NULL)
1953		goto out;
1954
1955	/*
1956	 * let all alias entries point to itself
1957	 */
1958	for (i = 0; i <= amd_iommu_last_bdf; ++i)
1959		amd_iommu_alias_table[i] = i;
1960
1961	/*
1962	 * never allocate domain 0 because its used as the non-allocated and
1963	 * error value placeholder
1964	 */
1965	amd_iommu_pd_alloc_bitmap[0] = 1;
1966
1967	spin_lock_init(&amd_iommu_pd_lock);
1968
1969	/*
1970	 * now the data structures are allocated and basically initialized
1971	 * start the real acpi table scan
1972	 */
1973	ret = init_iommu_all(ivrs_base);
1974	if (ret)
1975		goto out;
1976
1977	if (amd_iommu_irq_remap)
1978		amd_iommu_irq_remap = check_ioapic_information();
1979
1980	if (amd_iommu_irq_remap) {
1981		/*
1982		 * Interrupt remapping enabled, create kmem_cache for the
1983		 * remapping tables.
1984		 */
1985		ret = -ENOMEM;
1986		amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache",
1987				MAX_IRQS_PER_TABLE * sizeof(u32),
1988				IRQ_TABLE_ALIGNMENT,
1989				0, NULL);
1990		if (!amd_iommu_irq_cache)
1991			goto out;
1992
1993		irq_lookup_table = (void *)__get_free_pages(
1994				GFP_KERNEL | __GFP_ZERO,
1995				get_order(rlookup_table_size));
1996		if (!irq_lookup_table)
1997			goto out;
1998	}
1999
2000	ret = init_memory_definitions(ivrs_base);
2001	if (ret)
2002		goto out;
2003
2004	/* init the device table */
2005	init_device_table();
2006
2007out:
2008	/* Don't leak any ACPI memory */
2009	early_acpi_os_unmap_memory((char __iomem *)ivrs_base, ivrs_size);
2010	ivrs_base = NULL;
2011
2012	return ret;
2013}
2014
2015static int amd_iommu_enable_interrupts(void)
2016{
2017	struct amd_iommu *iommu;
2018	int ret = 0;
2019
2020	for_each_iommu(iommu) {
2021		ret = iommu_init_msi(iommu);
2022		if (ret)
2023			goto out;
2024	}
2025
2026out:
2027	return ret;
2028}
2029
2030static bool detect_ivrs(void)
2031{
2032	struct acpi_table_header *ivrs_base;
2033	acpi_size ivrs_size;
2034	acpi_status status;
2035
2036	status = acpi_get_table_with_size("IVRS", 0, &ivrs_base, &ivrs_size);
2037	if (status == AE_NOT_FOUND)
2038		return false;
2039	else if (ACPI_FAILURE(status)) {
2040		const char *err = acpi_format_exception(status);
2041		pr_err("AMD-Vi: IVRS table error: %s\n", err);
2042		return false;
2043	}
2044
2045	early_acpi_os_unmap_memory((char __iomem *)ivrs_base, ivrs_size);
2046
2047	/* Make sure ACS will be enabled during PCI probe */
2048	pci_request_acs();
2049
2050	return true;
2051}
2052
2053static int amd_iommu_init_dma(void)
2054{
2055	struct amd_iommu *iommu;
2056	int ret;
2057
2058	if (iommu_pass_through)
2059		ret = amd_iommu_init_passthrough();
2060	else
2061		ret = amd_iommu_init_dma_ops();
2062
2063	if (ret)
2064		return ret;
2065
2066	init_device_table_dma();
2067
2068	for_each_iommu(iommu)
2069		iommu_flush_all_caches(iommu);
2070
2071	amd_iommu_init_api();
2072
2073	amd_iommu_init_notifier();
2074
2075	return 0;
2076}
2077
2078/****************************************************************************
2079 *
2080 * AMD IOMMU Initialization State Machine
2081 *
2082 ****************************************************************************/
2083
2084static int __init state_next(void)
2085{
2086	int ret = 0;
2087
2088	switch (init_state) {
2089	case IOMMU_START_STATE:
2090		if (!detect_ivrs()) {
2091			init_state	= IOMMU_NOT_FOUND;
2092			ret		= -ENODEV;
2093		} else {
2094			init_state	= IOMMU_IVRS_DETECTED;
2095		}
2096		break;
2097	case IOMMU_IVRS_DETECTED:
2098		ret = early_amd_iommu_init();
2099		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
2100		break;
2101	case IOMMU_ACPI_FINISHED:
2102		early_enable_iommus();
2103		register_syscore_ops(&amd_iommu_syscore_ops);
2104		x86_platform.iommu_shutdown = disable_iommus;
2105		init_state = IOMMU_ENABLED;
2106		break;
2107	case IOMMU_ENABLED:
2108		ret = amd_iommu_init_pci();
2109		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT;
2110		enable_iommus_v2();
2111		break;
2112	case IOMMU_PCI_INIT:
2113		ret = amd_iommu_enable_interrupts();
2114		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN;
2115		break;
2116	case IOMMU_INTERRUPTS_EN:
2117		ret = amd_iommu_init_dma();
2118		init_state = ret ? IOMMU_INIT_ERROR : IOMMU_DMA_OPS;
2119		break;
2120	case IOMMU_DMA_OPS:
2121		init_state = IOMMU_INITIALIZED;
2122		break;
2123	case IOMMU_INITIALIZED:
2124		/* Nothing to do */
2125		break;
2126	case IOMMU_NOT_FOUND:
2127	case IOMMU_INIT_ERROR:
2128		/* Error states => do nothing */
2129		ret = -EINVAL;
2130		break;
2131	default:
2132		/* Unknown state */
2133		BUG();
2134	}
2135
2136	return ret;
2137}
2138
2139static int __init iommu_go_to_state(enum iommu_init_state state)
2140{
2141	int ret = 0;
2142
2143	while (init_state != state) {
2144		ret = state_next();
2145		if (init_state == IOMMU_NOT_FOUND ||
2146		    init_state == IOMMU_INIT_ERROR)
2147			break;
2148	}
2149
2150	return ret;
2151}
2152
2153#ifdef CONFIG_IRQ_REMAP
2154int __init amd_iommu_prepare(void)
2155{
2156	int ret;
2157
2158	amd_iommu_irq_remap = true;
2159
2160	ret = iommu_go_to_state(IOMMU_ACPI_FINISHED);
2161	if (ret)
2162		return ret;
2163	return amd_iommu_irq_remap ? 0 : -ENODEV;
2164}
2165
2166int __init amd_iommu_enable(void)
2167{
2168	int ret;
2169
2170	ret = iommu_go_to_state(IOMMU_ENABLED);
2171	if (ret)
2172		return ret;
2173
2174	irq_remapping_enabled = 1;
2175
2176	return 0;
2177}
2178
2179void amd_iommu_disable(void)
2180{
2181	amd_iommu_suspend();
2182}
2183
2184int amd_iommu_reenable(int mode)
2185{
2186	amd_iommu_resume();
2187
2188	return 0;
2189}
2190
2191int __init amd_iommu_enable_faulting(void)
2192{
2193	/* We enable MSI later when PCI is initialized */
2194	return 0;
2195}
2196#endif
2197
2198/*
2199 * This is the core init function for AMD IOMMU hardware in the system.
2200 * This function is called from the generic x86 DMA layer initialization
2201 * code.
2202 */
2203static int __init amd_iommu_init(void)
2204{
2205	int ret;
2206
2207	ret = iommu_go_to_state(IOMMU_INITIALIZED);
2208	if (ret) {
2209		free_dma_resources();
2210		if (!irq_remapping_enabled) {
2211			disable_iommus();
2212			free_on_init_error();
2213		} else {
2214			struct amd_iommu *iommu;
2215
2216			uninit_device_table_dma();
2217			for_each_iommu(iommu)
2218				iommu_flush_all_caches(iommu);
2219		}
2220	}
2221
2222	return ret;
2223}
2224
2225/****************************************************************************
2226 *
2227 * Early detect code. This code runs at IOMMU detection time in the DMA
2228 * layer. It just looks if there is an IVRS ACPI table to detect AMD
2229 * IOMMUs
2230 *
2231 ****************************************************************************/
2232int __init amd_iommu_detect(void)
2233{
2234	int ret;
2235
2236	if (no_iommu || (iommu_detected && !gart_iommu_aperture))
2237		return -ENODEV;
2238
2239	if (amd_iommu_disabled)
2240		return -ENODEV;
2241
2242	ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
2243	if (ret)
2244		return ret;
2245
2246	amd_iommu_detected = true;
2247	iommu_detected = 1;
2248	x86_init.iommu.iommu_init = amd_iommu_init;
2249
2250	return 0;
2251}
2252
2253/****************************************************************************
2254 *
2255 * Parsing functions for the AMD IOMMU specific kernel command line
2256 * options.
2257 *
2258 ****************************************************************************/
2259
2260static int __init parse_amd_iommu_dump(char *str)
2261{
2262	amd_iommu_dump = true;
2263
2264	return 1;
2265}
2266
2267static int __init parse_amd_iommu_options(char *str)
2268{
2269	for (; *str; ++str) {
2270		if (strncmp(str, "fullflush", 9) == 0)
2271			amd_iommu_unmap_flush = true;
2272		if (strncmp(str, "off", 3) == 0)
2273			amd_iommu_disabled = true;
2274		if (strncmp(str, "force_isolation", 15) == 0)
2275			amd_iommu_force_isolation = true;
2276	}
2277
2278	return 1;
2279}
2280
2281static int __init parse_ivrs_ioapic(char *str)
2282{
2283	unsigned int bus, dev, fn;
2284	int ret, id, i;
2285	u16 devid;
2286
2287	ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
2288
2289	if (ret != 4) {
2290		pr_err("AMD-Vi: Invalid command line: ivrs_ioapic%s\n", str);
2291		return 1;
2292	}
2293
2294	if (early_ioapic_map_size == EARLY_MAP_SIZE) {
2295		pr_err("AMD-Vi: Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
2296			str);
2297		return 1;
2298	}
2299
2300	devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
2301
2302	cmdline_maps			= true;
2303	i				= early_ioapic_map_size++;
2304	early_ioapic_map[i].id		= id;
2305	early_ioapic_map[i].devid	= devid;
2306	early_ioapic_map[i].cmd_line	= true;
2307
2308	return 1;
2309}
2310
2311static int __init parse_ivrs_hpet(char *str)
2312{
2313	unsigned int bus, dev, fn;
2314	int ret, id, i;
2315	u16 devid;
2316
2317	ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
2318
2319	if (ret != 4) {
2320		pr_err("AMD-Vi: Invalid command line: ivrs_hpet%s\n", str);
2321		return 1;
2322	}
2323
2324	if (early_hpet_map_size == EARLY_MAP_SIZE) {
2325		pr_err("AMD-Vi: Early HPET map overflow - ignoring ivrs_hpet%s\n",
2326			str);
2327		return 1;
2328	}
2329
2330	devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
2331
2332	cmdline_maps			= true;
2333	i				= early_hpet_map_size++;
2334	early_hpet_map[i].id		= id;
2335	early_hpet_map[i].devid		= devid;
2336	early_hpet_map[i].cmd_line	= true;
2337
2338	return 1;
2339}
2340
2341__setup("amd_iommu_dump",	parse_amd_iommu_dump);
2342__setup("amd_iommu=",		parse_amd_iommu_options);
2343__setup("ivrs_ioapic",		parse_ivrs_ioapic);
2344__setup("ivrs_hpet",		parse_ivrs_hpet);
2345
2346IOMMU_INIT_FINISH(amd_iommu_detect,
2347		  gart_iommu_hole_init,
2348		  NULL,
2349		  NULL);
2350
2351bool amd_iommu_v2_supported(void)
2352{
2353	return amd_iommu_v2_present;
2354}
2355EXPORT_SYMBOL(amd_iommu_v2_supported);
2356
2357/****************************************************************************
2358 *
2359 * IOMMU EFR Performance Counter support functionality. This code allows
2360 * access to the IOMMU PC functionality.
2361 *
2362 ****************************************************************************/
2363
2364u8 amd_iommu_pc_get_max_banks(u16 devid)
2365{
2366	struct amd_iommu *iommu;
2367	u8 ret = 0;
2368
2369	/* locate the iommu governing the devid */
2370	iommu = amd_iommu_rlookup_table[devid];
2371	if (iommu)
2372		ret = iommu->max_banks;
2373
2374	return ret;
2375}
2376EXPORT_SYMBOL(amd_iommu_pc_get_max_banks);
2377
2378bool amd_iommu_pc_supported(void)
2379{
2380	return amd_iommu_pc_present;
2381}
2382EXPORT_SYMBOL(amd_iommu_pc_supported);
2383
2384u8 amd_iommu_pc_get_max_counters(u16 devid)
2385{
2386	struct amd_iommu *iommu;
2387	u8 ret = 0;
2388
2389	/* locate the iommu governing the devid */
2390	iommu = amd_iommu_rlookup_table[devid];
2391	if (iommu)
2392		ret = iommu->max_counters;
2393
2394	return ret;
2395}
2396EXPORT_SYMBOL(amd_iommu_pc_get_max_counters);
2397
2398static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu,
2399				    u8 bank, u8 cntr, u8 fxn,
2400				    u64 *value, bool is_write)
2401{
2402	u32 offset;
2403	u32 max_offset_lim;
2404
2405	/* Check for valid iommu and pc register indexing */
2406	if (WARN_ON((fxn > 0x28) || (fxn & 7)))
2407		return -ENODEV;
2408
2409	offset = (u32)(((0x40|bank) << 12) | (cntr << 8) | fxn);
2410
2411	/* Limit the offset to the hw defined mmio region aperture */
2412	max_offset_lim = (u32)(((0x40|iommu->max_banks) << 12) |
2413				(iommu->max_counters << 8) | 0x28);
2414	if ((offset < MMIO_CNTR_REG_OFFSET) ||
2415	    (offset > max_offset_lim))
2416		return -EINVAL;
2417
2418	if (is_write) {
2419		writel((u32)*value, iommu->mmio_base + offset);
2420		writel((*value >> 32), iommu->mmio_base + offset + 4);
2421	} else {
2422		*value = readl(iommu->mmio_base + offset + 4);
2423		*value <<= 32;
2424		*value = readl(iommu->mmio_base + offset);
2425	}
2426
2427	return 0;
2428}
2429EXPORT_SYMBOL(amd_iommu_pc_get_set_reg_val);
2430
2431int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
2432				    u64 *value, bool is_write)
2433{
2434	struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
2435
2436	/* Make sure the IOMMU PC resource is available */
2437	if (!amd_iommu_pc_present || iommu == NULL)
2438		return -ENODEV;
2439
2440	return iommu_pc_get_set_reg_val(iommu, bank, cntr, fxn,
2441					value, is_write);
2442}
2443