1/* 2 * Copyright © 2006-2014 Intel Corporation. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms and conditions of the GNU General Public License, 6 * version 2, as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 * more details. 12 * 13 * Authors: David Woodhouse <dwmw2@infradead.org>, 14 * Ashok Raj <ashok.raj@intel.com>, 15 * Shaohua Li <shaohua.li@intel.com>, 16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>, 17 * Fenghua Yu <fenghua.yu@intel.com> 18 */ 19 20#include <linux/init.h> 21#include <linux/bitmap.h> 22#include <linux/debugfs.h> 23#include <linux/export.h> 24#include <linux/slab.h> 25#include <linux/irq.h> 26#include <linux/interrupt.h> 27#include <linux/spinlock.h> 28#include <linux/pci.h> 29#include <linux/dmar.h> 30#include <linux/dma-mapping.h> 31#include <linux/mempool.h> 32#include <linux/memory.h> 33#include <linux/timer.h> 34#include <linux/iova.h> 35#include <linux/iommu.h> 36#include <linux/intel-iommu.h> 37#include <linux/syscore_ops.h> 38#include <linux/tboot.h> 39#include <linux/dmi.h> 40#include <linux/pci-ats.h> 41#include <linux/memblock.h> 42#include <linux/dma-contiguous.h> 43#include <asm/irq_remapping.h> 44#include <asm/cacheflush.h> 45#include <asm/iommu.h> 46 47#include "irq_remapping.h" 48 49#define ROOT_SIZE VTD_PAGE_SIZE 50#define CONTEXT_SIZE VTD_PAGE_SIZE 51 52#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) 53#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB) 54#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) 55#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) 56 57#define IOAPIC_RANGE_START (0xfee00000) 58#define IOAPIC_RANGE_END (0xfeefffff) 59#define IOVA_START_ADDR (0x1000) 60 61#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 62 63#define MAX_AGAW_WIDTH 64 64#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT) 65 66#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1) 67#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1) 68 69/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR 70 to match. That way, we can use 'unsigned long' for PFNs with impunity. */ 71#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \ 72 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1)) 73#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT) 74 75/* IO virtual address start page frame number */ 76#define IOVA_START_PFN (1) 77 78#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) 79#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) 80#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64)) 81 82/* page table handling */ 83#define LEVEL_STRIDE (9) 84#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1) 85 86/* 87 * This bitmap is used to advertise the page sizes our hardware support 88 * to the IOMMU core, which will then use this information to split 89 * physically contiguous memory regions it is mapping into page sizes 90 * that we support. 91 * 92 * Traditionally the IOMMU core just handed us the mappings directly, 93 * after making sure the size is an order of a 4KiB page and that the 94 * mapping has natural alignment. 95 * 96 * To retain this behavior, we currently advertise that we support 97 * all page sizes that are an order of 4KiB. 98 * 99 * If at some point we'd like to utilize the IOMMU core's new behavior, 100 * we could change this to advertise the real page sizes we support. 101 */ 102#define INTEL_IOMMU_PGSIZES (~0xFFFUL) 103 104static inline int agaw_to_level(int agaw) 105{ 106 return agaw + 2; 107} 108 109static inline int agaw_to_width(int agaw) 110{ 111 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH); 112} 113 114static inline int width_to_agaw(int width) 115{ 116 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE); 117} 118 119static inline unsigned int level_to_offset_bits(int level) 120{ 121 return (level - 1) * LEVEL_STRIDE; 122} 123 124static inline int pfn_level_offset(unsigned long pfn, int level) 125{ 126 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK; 127} 128 129static inline unsigned long level_mask(int level) 130{ 131 return -1UL << level_to_offset_bits(level); 132} 133 134static inline unsigned long level_size(int level) 135{ 136 return 1UL << level_to_offset_bits(level); 137} 138 139static inline unsigned long align_to_level(unsigned long pfn, int level) 140{ 141 return (pfn + level_size(level) - 1) & level_mask(level); 142} 143 144static inline unsigned long lvl_to_nr_pages(unsigned int lvl) 145{ 146 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH); 147} 148 149/* VT-d pages must always be _smaller_ than MM pages. Otherwise things 150 are never going to work. */ 151static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn) 152{ 153 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT); 154} 155 156static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn) 157{ 158 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT); 159} 160static inline unsigned long page_to_dma_pfn(struct page *pg) 161{ 162 return mm_to_dma_pfn(page_to_pfn(pg)); 163} 164static inline unsigned long virt_to_dma_pfn(void *p) 165{ 166 return page_to_dma_pfn(virt_to_page(p)); 167} 168 169/* global iommu list, set NULL for ignored DMAR units */ 170static struct intel_iommu **g_iommus; 171 172static void __init check_tylersburg_isoch(void); 173static int rwbf_quirk; 174 175/* 176 * set to 1 to panic kernel if can't successfully enable VT-d 177 * (used when kernel is launched w/ TXT) 178 */ 179static int force_on = 0; 180 181/* 182 * 0: Present 183 * 1-11: Reserved 184 * 12-63: Context Ptr (12 - (haw-1)) 185 * 64-127: Reserved 186 */ 187struct root_entry { 188 u64 lo; 189 u64 hi; 190}; 191#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) 192 193 194/* 195 * low 64 bits: 196 * 0: present 197 * 1: fault processing disable 198 * 2-3: translation type 199 * 12-63: address space root 200 * high 64 bits: 201 * 0-2: address width 202 * 3-6: aval 203 * 8-23: domain id 204 */ 205struct context_entry { 206 u64 lo; 207 u64 hi; 208}; 209 210static inline bool context_present(struct context_entry *context) 211{ 212 return (context->lo & 1); 213} 214static inline void context_set_present(struct context_entry *context) 215{ 216 context->lo |= 1; 217} 218 219static inline void context_set_fault_enable(struct context_entry *context) 220{ 221 context->lo &= (((u64)-1) << 2) | 1; 222} 223 224static inline void context_set_translation_type(struct context_entry *context, 225 unsigned long value) 226{ 227 context->lo &= (((u64)-1) << 4) | 3; 228 context->lo |= (value & 3) << 2; 229} 230 231static inline void context_set_address_root(struct context_entry *context, 232 unsigned long value) 233{ 234 context->lo &= ~VTD_PAGE_MASK; 235 context->lo |= value & VTD_PAGE_MASK; 236} 237 238static inline void context_set_address_width(struct context_entry *context, 239 unsigned long value) 240{ 241 context->hi |= value & 7; 242} 243 244static inline void context_set_domain_id(struct context_entry *context, 245 unsigned long value) 246{ 247 context->hi |= (value & ((1 << 16) - 1)) << 8; 248} 249 250static inline void context_clear_entry(struct context_entry *context) 251{ 252 context->lo = 0; 253 context->hi = 0; 254} 255 256/* 257 * 0: readable 258 * 1: writable 259 * 2-6: reserved 260 * 7: super page 261 * 8-10: available 262 * 11: snoop behavior 263 * 12-63: Host physcial address 264 */ 265struct dma_pte { 266 u64 val; 267}; 268 269static inline void dma_clear_pte(struct dma_pte *pte) 270{ 271 pte->val = 0; 272} 273 274static inline u64 dma_pte_addr(struct dma_pte *pte) 275{ 276#ifdef CONFIG_64BIT 277 return pte->val & VTD_PAGE_MASK; 278#else 279 /* Must have a full atomic 64-bit read */ 280 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK; 281#endif 282} 283 284static inline bool dma_pte_present(struct dma_pte *pte) 285{ 286 return (pte->val & 3) != 0; 287} 288 289static inline bool dma_pte_superpage(struct dma_pte *pte) 290{ 291 return (pte->val & DMA_PTE_LARGE_PAGE); 292} 293 294static inline int first_pte_in_page(struct dma_pte *pte) 295{ 296 return !((unsigned long)pte & ~VTD_PAGE_MASK); 297} 298 299/* 300 * This domain is a statically identity mapping domain. 301 * 1. This domain creats a static 1:1 mapping to all usable memory. 302 * 2. It maps to each iommu if successful. 303 * 3. Each iommu mapps to this domain if successful. 304 */ 305static struct dmar_domain *si_domain; 306static int hw_pass_through = 1; 307 308/* domain represents a virtual machine, more than one devices 309 * across iommus may be owned in one domain, e.g. kvm guest. 310 */ 311#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0) 312 313/* si_domain contains mulitple devices */ 314#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1) 315 316struct dmar_domain { 317 int id; /* domain id */ 318 int nid; /* node id */ 319 DECLARE_BITMAP(iommu_bmp, DMAR_UNITS_SUPPORTED); 320 /* bitmap of iommus this domain uses*/ 321 322 struct list_head devices; /* all devices' list */ 323 struct iova_domain iovad; /* iova's that belong to this domain */ 324 325 struct dma_pte *pgd; /* virtual address */ 326 int gaw; /* max guest address width */ 327 328 /* adjusted guest address width, 0 is level 2 30-bit */ 329 int agaw; 330 331 int flags; /* flags to find out type of domain */ 332 333 int iommu_coherency;/* indicate coherency of iommu access */ 334 int iommu_snooping; /* indicate snooping control feature*/ 335 int iommu_count; /* reference count of iommu */ 336 int iommu_superpage;/* Level of superpages supported: 337 0 == 4KiB (no superpages), 1 == 2MiB, 338 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ 339 spinlock_t iommu_lock; /* protect iommu set in domain */ 340 u64 max_addr; /* maximum mapped address */ 341 342 struct iommu_domain domain; /* generic domain data structure for 343 iommu core */ 344}; 345 346/* PCI domain-device relationship */ 347struct device_domain_info { 348 struct list_head link; /* link to domain siblings */ 349 struct list_head global; /* link to global list */ 350 u8 bus; /* PCI bus number */ 351 u8 devfn; /* PCI devfn number */ 352 struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ 353 struct intel_iommu *iommu; /* IOMMU used by this device */ 354 struct dmar_domain *domain; /* pointer to domain */ 355}; 356 357struct dmar_rmrr_unit { 358 struct list_head list; /* list of rmrr units */ 359 struct acpi_dmar_header *hdr; /* ACPI header */ 360 u64 base_address; /* reserved base address*/ 361 u64 end_address; /* reserved end address */ 362 struct dmar_dev_scope *devices; /* target devices */ 363 int devices_cnt; /* target device count */ 364}; 365 366struct dmar_atsr_unit { 367 struct list_head list; /* list of ATSR units */ 368 struct acpi_dmar_header *hdr; /* ACPI header */ 369 struct dmar_dev_scope *devices; /* target devices */ 370 int devices_cnt; /* target device count */ 371 u8 include_all:1; /* include all ports */ 372}; 373 374static LIST_HEAD(dmar_atsr_units); 375static LIST_HEAD(dmar_rmrr_units); 376 377#define for_each_rmrr_units(rmrr) \ 378 list_for_each_entry(rmrr, &dmar_rmrr_units, list) 379 380static void flush_unmaps_timeout(unsigned long data); 381 382static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); 383 384#define HIGH_WATER_MARK 250 385struct deferred_flush_tables { 386 int next; 387 struct iova *iova[HIGH_WATER_MARK]; 388 struct dmar_domain *domain[HIGH_WATER_MARK]; 389 struct page *freelist[HIGH_WATER_MARK]; 390}; 391 392static struct deferred_flush_tables *deferred_flush; 393 394/* bitmap for indexing intel_iommus */ 395static int g_num_of_iommus; 396 397static DEFINE_SPINLOCK(async_umap_flush_lock); 398static LIST_HEAD(unmaps_to_do); 399 400static int timer_on; 401static long list_size; 402 403static void domain_exit(struct dmar_domain *domain); 404static void domain_remove_dev_info(struct dmar_domain *domain); 405static void domain_remove_one_dev_info(struct dmar_domain *domain, 406 struct device *dev); 407static void iommu_detach_dependent_devices(struct intel_iommu *iommu, 408 struct device *dev); 409static int domain_detach_iommu(struct dmar_domain *domain, 410 struct intel_iommu *iommu); 411 412#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON 413int dmar_disabled = 0; 414#else 415int dmar_disabled = 1; 416#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/ 417 418int intel_iommu_enabled = 0; 419EXPORT_SYMBOL_GPL(intel_iommu_enabled); 420 421static int dmar_map_gfx = 1; 422static int dmar_forcedac; 423static int intel_iommu_strict; 424static int intel_iommu_superpage = 1; 425static int intel_iommu_ecs = 1; 426 427/* We only actually use ECS when PASID support (on the new bit 40) 428 * is also advertised. Some early implementations — the ones with 429 * PASID support on bit 28 — have issues even when we *only* use 430 * extended root/context tables. */ 431#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \ 432 ecap_pasid(iommu->ecap)) 433 434int intel_iommu_gfx_mapped; 435EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); 436 437#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) 438static DEFINE_SPINLOCK(device_domain_lock); 439static LIST_HEAD(device_domain_list); 440 441static const struct iommu_ops intel_iommu_ops; 442 443/* Convert generic 'struct iommu_domain to private struct dmar_domain */ 444static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom) 445{ 446 return container_of(dom, struct dmar_domain, domain); 447} 448 449static int __init intel_iommu_setup(char *str) 450{ 451 if (!str) 452 return -EINVAL; 453 while (*str) { 454 if (!strncmp(str, "on", 2)) { 455 dmar_disabled = 0; 456 printk(KERN_INFO "Intel-IOMMU: enabled\n"); 457 } else if (!strncmp(str, "off", 3)) { 458 dmar_disabled = 1; 459 printk(KERN_INFO "Intel-IOMMU: disabled\n"); 460 } else if (!strncmp(str, "igfx_off", 8)) { 461 dmar_map_gfx = 0; 462 printk(KERN_INFO 463 "Intel-IOMMU: disable GFX device mapping\n"); 464 } else if (!strncmp(str, "forcedac", 8)) { 465 printk(KERN_INFO 466 "Intel-IOMMU: Forcing DAC for PCI devices\n"); 467 dmar_forcedac = 1; 468 } else if (!strncmp(str, "strict", 6)) { 469 printk(KERN_INFO 470 "Intel-IOMMU: disable batched IOTLB flush\n"); 471 intel_iommu_strict = 1; 472 } else if (!strncmp(str, "sp_off", 6)) { 473 printk(KERN_INFO 474 "Intel-IOMMU: disable supported super page\n"); 475 intel_iommu_superpage = 0; 476 } else if (!strncmp(str, "ecs_off", 7)) { 477 printk(KERN_INFO 478 "Intel-IOMMU: disable extended context table support\n"); 479 intel_iommu_ecs = 0; 480 } 481 482 str += strcspn(str, ","); 483 while (*str == ',') 484 str++; 485 } 486 return 0; 487} 488__setup("intel_iommu=", intel_iommu_setup); 489 490static struct kmem_cache *iommu_domain_cache; 491static struct kmem_cache *iommu_devinfo_cache; 492 493static inline void *alloc_pgtable_page(int node) 494{ 495 struct page *page; 496 void *vaddr = NULL; 497 498 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0); 499 if (page) 500 vaddr = page_address(page); 501 return vaddr; 502} 503 504static inline void free_pgtable_page(void *vaddr) 505{ 506 free_page((unsigned long)vaddr); 507} 508 509static inline void *alloc_domain_mem(void) 510{ 511 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC); 512} 513 514static void free_domain_mem(void *vaddr) 515{ 516 kmem_cache_free(iommu_domain_cache, vaddr); 517} 518 519static inline void * alloc_devinfo_mem(void) 520{ 521 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC); 522} 523 524static inline void free_devinfo_mem(void *vaddr) 525{ 526 kmem_cache_free(iommu_devinfo_cache, vaddr); 527} 528 529static inline int domain_type_is_vm(struct dmar_domain *domain) 530{ 531 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE; 532} 533 534static inline int domain_type_is_vm_or_si(struct dmar_domain *domain) 535{ 536 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE | 537 DOMAIN_FLAG_STATIC_IDENTITY); 538} 539 540static inline int domain_pfn_supported(struct dmar_domain *domain, 541 unsigned long pfn) 542{ 543 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; 544 545 return !(addr_width < BITS_PER_LONG && pfn >> addr_width); 546} 547 548static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) 549{ 550 unsigned long sagaw; 551 int agaw = -1; 552 553 sagaw = cap_sagaw(iommu->cap); 554 for (agaw = width_to_agaw(max_gaw); 555 agaw >= 0; agaw--) { 556 if (test_bit(agaw, &sagaw)) 557 break; 558 } 559 560 return agaw; 561} 562 563/* 564 * Calculate max SAGAW for each iommu. 565 */ 566int iommu_calculate_max_sagaw(struct intel_iommu *iommu) 567{ 568 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH); 569} 570 571/* 572 * calculate agaw for each iommu. 573 * "SAGAW" may be different across iommus, use a default agaw, and 574 * get a supported less agaw for iommus that don't support the default agaw. 575 */ 576int iommu_calculate_agaw(struct intel_iommu *iommu) 577{ 578 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH); 579} 580 581/* This functionin only returns single iommu in a domain */ 582static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) 583{ 584 int iommu_id; 585 586 /* si_domain and vm domain should not get here. */ 587 BUG_ON(domain_type_is_vm_or_si(domain)); 588 iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus); 589 if (iommu_id < 0 || iommu_id >= g_num_of_iommus) 590 return NULL; 591 592 return g_iommus[iommu_id]; 593} 594 595static void domain_update_iommu_coherency(struct dmar_domain *domain) 596{ 597 struct dmar_drhd_unit *drhd; 598 struct intel_iommu *iommu; 599 bool found = false; 600 int i; 601 602 domain->iommu_coherency = 1; 603 604 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) { 605 found = true; 606 if (!ecap_coherent(g_iommus[i]->ecap)) { 607 domain->iommu_coherency = 0; 608 break; 609 } 610 } 611 if (found) 612 return; 613 614 /* No hardware attached; use lowest common denominator */ 615 rcu_read_lock(); 616 for_each_active_iommu(iommu, drhd) { 617 if (!ecap_coherent(iommu->ecap)) { 618 domain->iommu_coherency = 0; 619 break; 620 } 621 } 622 rcu_read_unlock(); 623} 624 625static int domain_update_iommu_snooping(struct intel_iommu *skip) 626{ 627 struct dmar_drhd_unit *drhd; 628 struct intel_iommu *iommu; 629 int ret = 1; 630 631 rcu_read_lock(); 632 for_each_active_iommu(iommu, drhd) { 633 if (iommu != skip) { 634 if (!ecap_sc_support(iommu->ecap)) { 635 ret = 0; 636 break; 637 } 638 } 639 } 640 rcu_read_unlock(); 641 642 return ret; 643} 644 645static int domain_update_iommu_superpage(struct intel_iommu *skip) 646{ 647 struct dmar_drhd_unit *drhd; 648 struct intel_iommu *iommu; 649 int mask = 0xf; 650 651 if (!intel_iommu_superpage) { 652 return 0; 653 } 654 655 /* set iommu_superpage to the smallest common denominator */ 656 rcu_read_lock(); 657 for_each_active_iommu(iommu, drhd) { 658 if (iommu != skip) { 659 mask &= cap_super_page_val(iommu->cap); 660 if (!mask) 661 break; 662 } 663 } 664 rcu_read_unlock(); 665 666 return fls(mask); 667} 668 669/* Some capabilities may be different across iommus */ 670static void domain_update_iommu_cap(struct dmar_domain *domain) 671{ 672 domain_update_iommu_coherency(domain); 673 domain->iommu_snooping = domain_update_iommu_snooping(NULL); 674 domain->iommu_superpage = domain_update_iommu_superpage(NULL); 675} 676 677static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu, 678 u8 bus, u8 devfn, int alloc) 679{ 680 struct root_entry *root = &iommu->root_entry[bus]; 681 struct context_entry *context; 682 u64 *entry; 683 684 entry = &root->lo; 685 if (ecs_enabled(iommu)) { 686 if (devfn >= 0x80) { 687 devfn -= 0x80; 688 entry = &root->hi; 689 } 690 devfn *= 2; 691 } 692 if (*entry & 1) 693 context = phys_to_virt(*entry & VTD_PAGE_MASK); 694 else { 695 unsigned long phy_addr; 696 if (!alloc) 697 return NULL; 698 699 context = alloc_pgtable_page(iommu->node); 700 if (!context) 701 return NULL; 702 703 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE); 704 phy_addr = virt_to_phys((void *)context); 705 *entry = phy_addr | 1; 706 __iommu_flush_cache(iommu, entry, sizeof(*entry)); 707 } 708 return &context[devfn]; 709} 710 711static int iommu_dummy(struct device *dev) 712{ 713 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO; 714} 715 716static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) 717{ 718 struct dmar_drhd_unit *drhd = NULL; 719 struct intel_iommu *iommu; 720 struct device *tmp; 721 struct pci_dev *ptmp, *pdev = NULL; 722 u16 segment = 0; 723 int i; 724 725 if (iommu_dummy(dev)) 726 return NULL; 727 728 if (dev_is_pci(dev)) { 729 pdev = to_pci_dev(dev); 730 segment = pci_domain_nr(pdev->bus); 731 } else if (has_acpi_companion(dev)) 732 dev = &ACPI_COMPANION(dev)->dev; 733 734 rcu_read_lock(); 735 for_each_active_iommu(iommu, drhd) { 736 if (pdev && segment != drhd->segment) 737 continue; 738 739 for_each_active_dev_scope(drhd->devices, 740 drhd->devices_cnt, i, tmp) { 741 if (tmp == dev) { 742 *bus = drhd->devices[i].bus; 743 *devfn = drhd->devices[i].devfn; 744 goto out; 745 } 746 747 if (!pdev || !dev_is_pci(tmp)) 748 continue; 749 750 ptmp = to_pci_dev(tmp); 751 if (ptmp->subordinate && 752 ptmp->subordinate->number <= pdev->bus->number && 753 ptmp->subordinate->busn_res.end >= pdev->bus->number) 754 goto got_pdev; 755 } 756 757 if (pdev && drhd->include_all) { 758 got_pdev: 759 *bus = pdev->bus->number; 760 *devfn = pdev->devfn; 761 goto out; 762 } 763 } 764 iommu = NULL; 765 out: 766 rcu_read_unlock(); 767 768 return iommu; 769} 770 771static void domain_flush_cache(struct dmar_domain *domain, 772 void *addr, int size) 773{ 774 if (!domain->iommu_coherency) 775 clflush_cache_range(addr, size); 776} 777 778static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn) 779{ 780 struct context_entry *context; 781 int ret = 0; 782 unsigned long flags; 783 784 spin_lock_irqsave(&iommu->lock, flags); 785 context = iommu_context_addr(iommu, bus, devfn, 0); 786 if (context) 787 ret = context_present(context); 788 spin_unlock_irqrestore(&iommu->lock, flags); 789 return ret; 790} 791 792static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn) 793{ 794 struct context_entry *context; 795 unsigned long flags; 796 797 spin_lock_irqsave(&iommu->lock, flags); 798 context = iommu_context_addr(iommu, bus, devfn, 0); 799 if (context) { 800 context_clear_entry(context); 801 __iommu_flush_cache(iommu, context, sizeof(*context)); 802 } 803 spin_unlock_irqrestore(&iommu->lock, flags); 804} 805 806static void free_context_table(struct intel_iommu *iommu) 807{ 808 int i; 809 unsigned long flags; 810 struct context_entry *context; 811 812 spin_lock_irqsave(&iommu->lock, flags); 813 if (!iommu->root_entry) { 814 goto out; 815 } 816 for (i = 0; i < ROOT_ENTRY_NR; i++) { 817 context = iommu_context_addr(iommu, i, 0, 0); 818 if (context) 819 free_pgtable_page(context); 820 821 if (!ecs_enabled(iommu)) 822 continue; 823 824 context = iommu_context_addr(iommu, i, 0x80, 0); 825 if (context) 826 free_pgtable_page(context); 827 828 } 829 free_pgtable_page(iommu->root_entry); 830 iommu->root_entry = NULL; 831out: 832 spin_unlock_irqrestore(&iommu->lock, flags); 833} 834 835static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, 836 unsigned long pfn, int *target_level) 837{ 838 struct dma_pte *parent, *pte = NULL; 839 int level = agaw_to_level(domain->agaw); 840 int offset; 841 842 BUG_ON(!domain->pgd); 843 844 if (!domain_pfn_supported(domain, pfn)) 845 /* Address beyond IOMMU's addressing capabilities. */ 846 return NULL; 847 848 parent = domain->pgd; 849 850 while (1) { 851 void *tmp_page; 852 853 offset = pfn_level_offset(pfn, level); 854 pte = &parent[offset]; 855 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte))) 856 break; 857 if (level == *target_level) 858 break; 859 860 if (!dma_pte_present(pte)) { 861 uint64_t pteval; 862 863 tmp_page = alloc_pgtable_page(domain->nid); 864 865 if (!tmp_page) 866 return NULL; 867 868 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); 869 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; 870 if (cmpxchg64(&pte->val, 0ULL, pteval)) 871 /* Someone else set it while we were thinking; use theirs. */ 872 free_pgtable_page(tmp_page); 873 else 874 domain_flush_cache(domain, pte, sizeof(*pte)); 875 } 876 if (level == 1) 877 break; 878 879 parent = phys_to_virt(dma_pte_addr(pte)); 880 level--; 881 } 882 883 if (!*target_level) 884 *target_level = level; 885 886 return pte; 887} 888 889 890/* return address's pte at specific level */ 891static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, 892 unsigned long pfn, 893 int level, int *large_page) 894{ 895 struct dma_pte *parent, *pte = NULL; 896 int total = agaw_to_level(domain->agaw); 897 int offset; 898 899 parent = domain->pgd; 900 while (level <= total) { 901 offset = pfn_level_offset(pfn, total); 902 pte = &parent[offset]; 903 if (level == total) 904 return pte; 905 906 if (!dma_pte_present(pte)) { 907 *large_page = total; 908 break; 909 } 910 911 if (dma_pte_superpage(pte)) { 912 *large_page = total; 913 return pte; 914 } 915 916 parent = phys_to_virt(dma_pte_addr(pte)); 917 total--; 918 } 919 return NULL; 920} 921 922/* clear last level pte, a tlb flush should be followed */ 923static void dma_pte_clear_range(struct dmar_domain *domain, 924 unsigned long start_pfn, 925 unsigned long last_pfn) 926{ 927 unsigned int large_page = 1; 928 struct dma_pte *first_pte, *pte; 929 930 BUG_ON(!domain_pfn_supported(domain, start_pfn)); 931 BUG_ON(!domain_pfn_supported(domain, last_pfn)); 932 BUG_ON(start_pfn > last_pfn); 933 934 /* we don't need lock here; nobody else touches the iova range */ 935 do { 936 large_page = 1; 937 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page); 938 if (!pte) { 939 start_pfn = align_to_level(start_pfn + 1, large_page + 1); 940 continue; 941 } 942 do { 943 dma_clear_pte(pte); 944 start_pfn += lvl_to_nr_pages(large_page); 945 pte++; 946 } while (start_pfn <= last_pfn && !first_pte_in_page(pte)); 947 948 domain_flush_cache(domain, first_pte, 949 (void *)pte - (void *)first_pte); 950 951 } while (start_pfn && start_pfn <= last_pfn); 952} 953 954static void dma_pte_free_level(struct dmar_domain *domain, int level, 955 struct dma_pte *pte, unsigned long pfn, 956 unsigned long start_pfn, unsigned long last_pfn) 957{ 958 pfn = max(start_pfn, pfn); 959 pte = &pte[pfn_level_offset(pfn, level)]; 960 961 do { 962 unsigned long level_pfn; 963 struct dma_pte *level_pte; 964 965 if (!dma_pte_present(pte) || dma_pte_superpage(pte)) 966 goto next; 967 968 level_pfn = pfn & level_mask(level - 1); 969 level_pte = phys_to_virt(dma_pte_addr(pte)); 970 971 if (level > 2) 972 dma_pte_free_level(domain, level - 1, level_pte, 973 level_pfn, start_pfn, last_pfn); 974 975 /* If range covers entire pagetable, free it */ 976 if (!(start_pfn > level_pfn || 977 last_pfn < level_pfn + level_size(level) - 1)) { 978 dma_clear_pte(pte); 979 domain_flush_cache(domain, pte, sizeof(*pte)); 980 free_pgtable_page(level_pte); 981 } 982next: 983 pfn += level_size(level); 984 } while (!first_pte_in_page(++pte) && pfn <= last_pfn); 985} 986 987/* free page table pages. last level pte should already be cleared */ 988static void dma_pte_free_pagetable(struct dmar_domain *domain, 989 unsigned long start_pfn, 990 unsigned long last_pfn) 991{ 992 BUG_ON(!domain_pfn_supported(domain, start_pfn)); 993 BUG_ON(!domain_pfn_supported(domain, last_pfn)); 994 BUG_ON(start_pfn > last_pfn); 995 996 dma_pte_clear_range(domain, start_pfn, last_pfn); 997 998 /* We don't need lock here; nobody else touches the iova range */ 999 dma_pte_free_level(domain, agaw_to_level(domain->agaw), 1000 domain->pgd, 0, start_pfn, last_pfn); 1001 1002 /* free pgd */ 1003 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { 1004 free_pgtable_page(domain->pgd); 1005 domain->pgd = NULL; 1006 } 1007} 1008 1009/* When a page at a given level is being unlinked from its parent, we don't 1010 need to *modify* it at all. All we need to do is make a list of all the 1011 pages which can be freed just as soon as we've flushed the IOTLB and we 1012 know the hardware page-walk will no longer touch them. 1013 The 'pte' argument is the *parent* PTE, pointing to the page that is to 1014 be freed. */ 1015static struct page *dma_pte_list_pagetables(struct dmar_domain *domain, 1016 int level, struct dma_pte *pte, 1017 struct page *freelist) 1018{ 1019 struct page *pg; 1020 1021 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT); 1022 pg->freelist = freelist; 1023 freelist = pg; 1024 1025 if (level == 1) 1026 return freelist; 1027 1028 pte = page_address(pg); 1029 do { 1030 if (dma_pte_present(pte) && !dma_pte_superpage(pte)) 1031 freelist = dma_pte_list_pagetables(domain, level - 1, 1032 pte, freelist); 1033 pte++; 1034 } while (!first_pte_in_page(pte)); 1035 1036 return freelist; 1037} 1038 1039static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level, 1040 struct dma_pte *pte, unsigned long pfn, 1041 unsigned long start_pfn, 1042 unsigned long last_pfn, 1043 struct page *freelist) 1044{ 1045 struct dma_pte *first_pte = NULL, *last_pte = NULL; 1046 1047 pfn = max(start_pfn, pfn); 1048 pte = &pte[pfn_level_offset(pfn, level)]; 1049 1050 do { 1051 unsigned long level_pfn; 1052 1053 if (!dma_pte_present(pte)) 1054 goto next; 1055 1056 level_pfn = pfn & level_mask(level); 1057 1058 /* If range covers entire pagetable, free it */ 1059 if (start_pfn <= level_pfn && 1060 last_pfn >= level_pfn + level_size(level) - 1) { 1061 /* These suborbinate page tables are going away entirely. Don't 1062 bother to clear them; we're just going to *free* them. */ 1063 if (level > 1 && !dma_pte_superpage(pte)) 1064 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist); 1065 1066 dma_clear_pte(pte); 1067 if (!first_pte) 1068 first_pte = pte; 1069 last_pte = pte; 1070 } else if (level > 1) { 1071 /* Recurse down into a level that isn't *entirely* obsolete */ 1072 freelist = dma_pte_clear_level(domain, level - 1, 1073 phys_to_virt(dma_pte_addr(pte)), 1074 level_pfn, start_pfn, last_pfn, 1075 freelist); 1076 } 1077next: 1078 pfn += level_size(level); 1079 } while (!first_pte_in_page(++pte) && pfn <= last_pfn); 1080 1081 if (first_pte) 1082 domain_flush_cache(domain, first_pte, 1083 (void *)++last_pte - (void *)first_pte); 1084 1085 return freelist; 1086} 1087 1088/* We can't just free the pages because the IOMMU may still be walking 1089 the page tables, and may have cached the intermediate levels. The 1090 pages can only be freed after the IOTLB flush has been done. */ 1091struct page *domain_unmap(struct dmar_domain *domain, 1092 unsigned long start_pfn, 1093 unsigned long last_pfn) 1094{ 1095 struct page *freelist = NULL; 1096 1097 BUG_ON(!domain_pfn_supported(domain, start_pfn)); 1098 BUG_ON(!domain_pfn_supported(domain, last_pfn)); 1099 BUG_ON(start_pfn > last_pfn); 1100 1101 /* we don't need lock here; nobody else touches the iova range */ 1102 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw), 1103 domain->pgd, 0, start_pfn, last_pfn, NULL); 1104 1105 /* free pgd */ 1106 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { 1107 struct page *pgd_page = virt_to_page(domain->pgd); 1108 pgd_page->freelist = freelist; 1109 freelist = pgd_page; 1110 1111 domain->pgd = NULL; 1112 } 1113 1114 return freelist; 1115} 1116 1117void dma_free_pagelist(struct page *freelist) 1118{ 1119 struct page *pg; 1120 1121 while ((pg = freelist)) { 1122 freelist = pg->freelist; 1123 free_pgtable_page(page_address(pg)); 1124 } 1125} 1126 1127/* iommu handling */ 1128static int iommu_alloc_root_entry(struct intel_iommu *iommu) 1129{ 1130 struct root_entry *root; 1131 unsigned long flags; 1132 1133 root = (struct root_entry *)alloc_pgtable_page(iommu->node); 1134 if (!root) { 1135 pr_err("IOMMU: allocating root entry for %s failed\n", 1136 iommu->name); 1137 return -ENOMEM; 1138 } 1139 1140 __iommu_flush_cache(iommu, root, ROOT_SIZE); 1141 1142 spin_lock_irqsave(&iommu->lock, flags); 1143 iommu->root_entry = root; 1144 spin_unlock_irqrestore(&iommu->lock, flags); 1145 1146 return 0; 1147} 1148 1149static void iommu_set_root_entry(struct intel_iommu *iommu) 1150{ 1151 u64 addr; 1152 u32 sts; 1153 unsigned long flag; 1154 1155 addr = virt_to_phys(iommu->root_entry); 1156 if (ecs_enabled(iommu)) 1157 addr |= DMA_RTADDR_RTT; 1158 1159 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1160 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr); 1161 1162 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG); 1163 1164 /* Make sure hardware complete it */ 1165 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1166 readl, (sts & DMA_GSTS_RTPS), sts); 1167 1168 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1169} 1170 1171static void iommu_flush_write_buffer(struct intel_iommu *iommu) 1172{ 1173 u32 val; 1174 unsigned long flag; 1175 1176 if (!rwbf_quirk && !cap_rwbf(iommu->cap)) 1177 return; 1178 1179 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1180 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG); 1181 1182 /* Make sure hardware complete it */ 1183 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1184 readl, (!(val & DMA_GSTS_WBFS)), val); 1185 1186 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1187} 1188 1189/* return value determine if we need a write buffer flush */ 1190static void __iommu_flush_context(struct intel_iommu *iommu, 1191 u16 did, u16 source_id, u8 function_mask, 1192 u64 type) 1193{ 1194 u64 val = 0; 1195 unsigned long flag; 1196 1197 switch (type) { 1198 case DMA_CCMD_GLOBAL_INVL: 1199 val = DMA_CCMD_GLOBAL_INVL; 1200 break; 1201 case DMA_CCMD_DOMAIN_INVL: 1202 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did); 1203 break; 1204 case DMA_CCMD_DEVICE_INVL: 1205 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did) 1206 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask); 1207 break; 1208 default: 1209 BUG(); 1210 } 1211 val |= DMA_CCMD_ICC; 1212 1213 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1214 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val); 1215 1216 /* Make sure hardware complete it */ 1217 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG, 1218 dmar_readq, (!(val & DMA_CCMD_ICC)), val); 1219 1220 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1221} 1222 1223/* return value determine if we need a write buffer flush */ 1224static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, 1225 u64 addr, unsigned int size_order, u64 type) 1226{ 1227 int tlb_offset = ecap_iotlb_offset(iommu->ecap); 1228 u64 val = 0, val_iva = 0; 1229 unsigned long flag; 1230 1231 switch (type) { 1232 case DMA_TLB_GLOBAL_FLUSH: 1233 /* global flush doesn't need set IVA_REG */ 1234 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT; 1235 break; 1236 case DMA_TLB_DSI_FLUSH: 1237 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); 1238 break; 1239 case DMA_TLB_PSI_FLUSH: 1240 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); 1241 /* IH bit is passed in as part of address */ 1242 val_iva = size_order | addr; 1243 break; 1244 default: 1245 BUG(); 1246 } 1247 /* Note: set drain read/write */ 1248#if 0 1249 /* 1250 * This is probably to be super secure.. Looks like we can 1251 * ignore it without any impact. 1252 */ 1253 if (cap_read_drain(iommu->cap)) 1254 val |= DMA_TLB_READ_DRAIN; 1255#endif 1256 if (cap_write_drain(iommu->cap)) 1257 val |= DMA_TLB_WRITE_DRAIN; 1258 1259 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1260 /* Note: Only uses first TLB reg currently */ 1261 if (val_iva) 1262 dmar_writeq(iommu->reg + tlb_offset, val_iva); 1263 dmar_writeq(iommu->reg + tlb_offset + 8, val); 1264 1265 /* Make sure hardware complete it */ 1266 IOMMU_WAIT_OP(iommu, tlb_offset + 8, 1267 dmar_readq, (!(val & DMA_TLB_IVT)), val); 1268 1269 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1270 1271 /* check IOTLB invalidation granularity */ 1272 if (DMA_TLB_IAIG(val) == 0) 1273 printk(KERN_ERR"IOMMU: flush IOTLB failed\n"); 1274 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type)) 1275 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n", 1276 (unsigned long long)DMA_TLB_IIRG(type), 1277 (unsigned long long)DMA_TLB_IAIG(val)); 1278} 1279 1280static struct device_domain_info * 1281iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu, 1282 u8 bus, u8 devfn) 1283{ 1284 bool found = false; 1285 unsigned long flags; 1286 struct device_domain_info *info; 1287 struct pci_dev *pdev; 1288 1289 if (!ecap_dev_iotlb_support(iommu->ecap)) 1290 return NULL; 1291 1292 if (!iommu->qi) 1293 return NULL; 1294 1295 spin_lock_irqsave(&device_domain_lock, flags); 1296 list_for_each_entry(info, &domain->devices, link) 1297 if (info->iommu == iommu && info->bus == bus && 1298 info->devfn == devfn) { 1299 found = true; 1300 break; 1301 } 1302 spin_unlock_irqrestore(&device_domain_lock, flags); 1303 1304 if (!found || !info->dev || !dev_is_pci(info->dev)) 1305 return NULL; 1306 1307 pdev = to_pci_dev(info->dev); 1308 1309 if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS)) 1310 return NULL; 1311 1312 if (!dmar_find_matched_atsr_unit(pdev)) 1313 return NULL; 1314 1315 return info; 1316} 1317 1318static void iommu_enable_dev_iotlb(struct device_domain_info *info) 1319{ 1320 if (!info || !dev_is_pci(info->dev)) 1321 return; 1322 1323 pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT); 1324} 1325 1326static void iommu_disable_dev_iotlb(struct device_domain_info *info) 1327{ 1328 if (!info->dev || !dev_is_pci(info->dev) || 1329 !pci_ats_enabled(to_pci_dev(info->dev))) 1330 return; 1331 1332 pci_disable_ats(to_pci_dev(info->dev)); 1333} 1334 1335static void iommu_flush_dev_iotlb(struct dmar_domain *domain, 1336 u64 addr, unsigned mask) 1337{ 1338 u16 sid, qdep; 1339 unsigned long flags; 1340 struct device_domain_info *info; 1341 1342 spin_lock_irqsave(&device_domain_lock, flags); 1343 list_for_each_entry(info, &domain->devices, link) { 1344 struct pci_dev *pdev; 1345 if (!info->dev || !dev_is_pci(info->dev)) 1346 continue; 1347 1348 pdev = to_pci_dev(info->dev); 1349 if (!pci_ats_enabled(pdev)) 1350 continue; 1351 1352 sid = info->bus << 8 | info->devfn; 1353 qdep = pci_ats_queue_depth(pdev); 1354 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask); 1355 } 1356 spin_unlock_irqrestore(&device_domain_lock, flags); 1357} 1358 1359static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, 1360 unsigned long pfn, unsigned int pages, int ih, int map) 1361{ 1362 unsigned int mask = ilog2(__roundup_pow_of_two(pages)); 1363 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT; 1364 1365 BUG_ON(pages == 0); 1366 1367 if (ih) 1368 ih = 1 << 6; 1369 /* 1370 * Fallback to domain selective flush if no PSI support or the size is 1371 * too big. 1372 * PSI requires page size to be 2 ^ x, and the base address is naturally 1373 * aligned to the size 1374 */ 1375 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap)) 1376 iommu->flush.flush_iotlb(iommu, did, 0, 0, 1377 DMA_TLB_DSI_FLUSH); 1378 else 1379 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask, 1380 DMA_TLB_PSI_FLUSH); 1381 1382 /* 1383 * In caching mode, changes of pages from non-present to present require 1384 * flush. However, device IOTLB doesn't need to be flushed in this case. 1385 */ 1386 if (!cap_caching_mode(iommu->cap) || !map) 1387 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask); 1388} 1389 1390static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) 1391{ 1392 u32 pmen; 1393 unsigned long flags; 1394 1395 raw_spin_lock_irqsave(&iommu->register_lock, flags); 1396 pmen = readl(iommu->reg + DMAR_PMEN_REG); 1397 pmen &= ~DMA_PMEN_EPM; 1398 writel(pmen, iommu->reg + DMAR_PMEN_REG); 1399 1400 /* wait for the protected region status bit to clear */ 1401 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG, 1402 readl, !(pmen & DMA_PMEN_PRS), pmen); 1403 1404 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 1405} 1406 1407static void iommu_enable_translation(struct intel_iommu *iommu) 1408{ 1409 u32 sts; 1410 unsigned long flags; 1411 1412 raw_spin_lock_irqsave(&iommu->register_lock, flags); 1413 iommu->gcmd |= DMA_GCMD_TE; 1414 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 1415 1416 /* Make sure hardware complete it */ 1417 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1418 readl, (sts & DMA_GSTS_TES), sts); 1419 1420 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 1421} 1422 1423static void iommu_disable_translation(struct intel_iommu *iommu) 1424{ 1425 u32 sts; 1426 unsigned long flag; 1427 1428 raw_spin_lock_irqsave(&iommu->register_lock, flag); 1429 iommu->gcmd &= ~DMA_GCMD_TE; 1430 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); 1431 1432 /* Make sure hardware complete it */ 1433 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, 1434 readl, (!(sts & DMA_GSTS_TES)), sts); 1435 1436 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 1437} 1438 1439 1440static int iommu_init_domains(struct intel_iommu *iommu) 1441{ 1442 unsigned long ndomains; 1443 unsigned long nlongs; 1444 1445 ndomains = cap_ndoms(iommu->cap); 1446 pr_debug("IOMMU%d: Number of Domains supported <%ld>\n", 1447 iommu->seq_id, ndomains); 1448 nlongs = BITS_TO_LONGS(ndomains); 1449 1450 spin_lock_init(&iommu->lock); 1451 1452 /* TBD: there might be 64K domains, 1453 * consider other allocation for future chip 1454 */ 1455 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL); 1456 if (!iommu->domain_ids) { 1457 pr_err("IOMMU%d: allocating domain id array failed\n", 1458 iommu->seq_id); 1459 return -ENOMEM; 1460 } 1461 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *), 1462 GFP_KERNEL); 1463 if (!iommu->domains) { 1464 pr_err("IOMMU%d: allocating domain array failed\n", 1465 iommu->seq_id); 1466 kfree(iommu->domain_ids); 1467 iommu->domain_ids = NULL; 1468 return -ENOMEM; 1469 } 1470 1471 /* 1472 * if Caching mode is set, then invalid translations are tagged 1473 * with domainid 0. Hence we need to pre-allocate it. 1474 */ 1475 if (cap_caching_mode(iommu->cap)) 1476 set_bit(0, iommu->domain_ids); 1477 return 0; 1478} 1479 1480static void disable_dmar_iommu(struct intel_iommu *iommu) 1481{ 1482 struct dmar_domain *domain; 1483 int i; 1484 1485 if ((iommu->domains) && (iommu->domain_ids)) { 1486 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) { 1487 /* 1488 * Domain id 0 is reserved for invalid translation 1489 * if hardware supports caching mode. 1490 */ 1491 if (cap_caching_mode(iommu->cap) && i == 0) 1492 continue; 1493 1494 domain = iommu->domains[i]; 1495 clear_bit(i, iommu->domain_ids); 1496 if (domain_detach_iommu(domain, iommu) == 0 && 1497 !domain_type_is_vm(domain)) 1498 domain_exit(domain); 1499 } 1500 } 1501 1502 if (iommu->gcmd & DMA_GCMD_TE) 1503 iommu_disable_translation(iommu); 1504} 1505 1506static void free_dmar_iommu(struct intel_iommu *iommu) 1507{ 1508 if ((iommu->domains) && (iommu->domain_ids)) { 1509 kfree(iommu->domains); 1510 kfree(iommu->domain_ids); 1511 iommu->domains = NULL; 1512 iommu->domain_ids = NULL; 1513 } 1514 1515 g_iommus[iommu->seq_id] = NULL; 1516 1517 /* free context mapping */ 1518 free_context_table(iommu); 1519} 1520 1521static struct dmar_domain *alloc_domain(int flags) 1522{ 1523 /* domain id for virtual machine, it won't be set in context */ 1524 static atomic_t vm_domid = ATOMIC_INIT(0); 1525 struct dmar_domain *domain; 1526 1527 domain = alloc_domain_mem(); 1528 if (!domain) 1529 return NULL; 1530 1531 memset(domain, 0, sizeof(*domain)); 1532 domain->nid = -1; 1533 domain->flags = flags; 1534 spin_lock_init(&domain->iommu_lock); 1535 INIT_LIST_HEAD(&domain->devices); 1536 if (flags & DOMAIN_FLAG_VIRTUAL_MACHINE) 1537 domain->id = atomic_inc_return(&vm_domid); 1538 1539 return domain; 1540} 1541 1542static int __iommu_attach_domain(struct dmar_domain *domain, 1543 struct intel_iommu *iommu) 1544{ 1545 int num; 1546 unsigned long ndomains; 1547 1548 ndomains = cap_ndoms(iommu->cap); 1549 num = find_first_zero_bit(iommu->domain_ids, ndomains); 1550 if (num < ndomains) { 1551 set_bit(num, iommu->domain_ids); 1552 iommu->domains[num] = domain; 1553 } else { 1554 num = -ENOSPC; 1555 } 1556 1557 return num; 1558} 1559 1560static int iommu_attach_domain(struct dmar_domain *domain, 1561 struct intel_iommu *iommu) 1562{ 1563 int num; 1564 unsigned long flags; 1565 1566 spin_lock_irqsave(&iommu->lock, flags); 1567 num = __iommu_attach_domain(domain, iommu); 1568 spin_unlock_irqrestore(&iommu->lock, flags); 1569 if (num < 0) 1570 pr_err("IOMMU: no free domain ids\n"); 1571 1572 return num; 1573} 1574 1575static int iommu_attach_vm_domain(struct dmar_domain *domain, 1576 struct intel_iommu *iommu) 1577{ 1578 int num; 1579 unsigned long ndomains; 1580 1581 ndomains = cap_ndoms(iommu->cap); 1582 for_each_set_bit(num, iommu->domain_ids, ndomains) 1583 if (iommu->domains[num] == domain) 1584 return num; 1585 1586 return __iommu_attach_domain(domain, iommu); 1587} 1588 1589static void iommu_detach_domain(struct dmar_domain *domain, 1590 struct intel_iommu *iommu) 1591{ 1592 unsigned long flags; 1593 int num, ndomains; 1594 1595 spin_lock_irqsave(&iommu->lock, flags); 1596 if (domain_type_is_vm_or_si(domain)) { 1597 ndomains = cap_ndoms(iommu->cap); 1598 for_each_set_bit(num, iommu->domain_ids, ndomains) { 1599 if (iommu->domains[num] == domain) { 1600 clear_bit(num, iommu->domain_ids); 1601 iommu->domains[num] = NULL; 1602 break; 1603 } 1604 } 1605 } else { 1606 clear_bit(domain->id, iommu->domain_ids); 1607 iommu->domains[domain->id] = NULL; 1608 } 1609 spin_unlock_irqrestore(&iommu->lock, flags); 1610} 1611 1612static void domain_attach_iommu(struct dmar_domain *domain, 1613 struct intel_iommu *iommu) 1614{ 1615 unsigned long flags; 1616 1617 spin_lock_irqsave(&domain->iommu_lock, flags); 1618 if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) { 1619 domain->iommu_count++; 1620 if (domain->iommu_count == 1) 1621 domain->nid = iommu->node; 1622 domain_update_iommu_cap(domain); 1623 } 1624 spin_unlock_irqrestore(&domain->iommu_lock, flags); 1625} 1626 1627static int domain_detach_iommu(struct dmar_domain *domain, 1628 struct intel_iommu *iommu) 1629{ 1630 unsigned long flags; 1631 int count = INT_MAX; 1632 1633 spin_lock_irqsave(&domain->iommu_lock, flags); 1634 if (test_and_clear_bit(iommu->seq_id, domain->iommu_bmp)) { 1635 count = --domain->iommu_count; 1636 domain_update_iommu_cap(domain); 1637 } 1638 spin_unlock_irqrestore(&domain->iommu_lock, flags); 1639 1640 return count; 1641} 1642 1643static struct iova_domain reserved_iova_list; 1644static struct lock_class_key reserved_rbtree_key; 1645 1646static int dmar_init_reserved_ranges(void) 1647{ 1648 struct pci_dev *pdev = NULL; 1649 struct iova *iova; 1650 int i; 1651 1652 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN, 1653 DMA_32BIT_PFN); 1654 1655 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock, 1656 &reserved_rbtree_key); 1657 1658 /* IOAPIC ranges shouldn't be accessed by DMA */ 1659 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START), 1660 IOVA_PFN(IOAPIC_RANGE_END)); 1661 if (!iova) { 1662 printk(KERN_ERR "Reserve IOAPIC range failed\n"); 1663 return -ENODEV; 1664 } 1665 1666 /* Reserve all PCI MMIO to avoid peer-to-peer access */ 1667 for_each_pci_dev(pdev) { 1668 struct resource *r; 1669 1670 for (i = 0; i < PCI_NUM_RESOURCES; i++) { 1671 r = &pdev->resource[i]; 1672 if (!r->flags || !(r->flags & IORESOURCE_MEM)) 1673 continue; 1674 iova = reserve_iova(&reserved_iova_list, 1675 IOVA_PFN(r->start), 1676 IOVA_PFN(r->end)); 1677 if (!iova) { 1678 printk(KERN_ERR "Reserve iova failed\n"); 1679 return -ENODEV; 1680 } 1681 } 1682 } 1683 return 0; 1684} 1685 1686static void domain_reserve_special_ranges(struct dmar_domain *domain) 1687{ 1688 copy_reserved_iova(&reserved_iova_list, &domain->iovad); 1689} 1690 1691static inline int guestwidth_to_adjustwidth(int gaw) 1692{ 1693 int agaw; 1694 int r = (gaw - 12) % 9; 1695 1696 if (r == 0) 1697 agaw = gaw; 1698 else 1699 agaw = gaw + 9 - r; 1700 if (agaw > 64) 1701 agaw = 64; 1702 return agaw; 1703} 1704 1705static int domain_init(struct dmar_domain *domain, int guest_width) 1706{ 1707 struct intel_iommu *iommu; 1708 int adjust_width, agaw; 1709 unsigned long sagaw; 1710 1711 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN, 1712 DMA_32BIT_PFN); 1713 domain_reserve_special_ranges(domain); 1714 1715 /* calculate AGAW */ 1716 iommu = domain_get_iommu(domain); 1717 if (guest_width > cap_mgaw(iommu->cap)) 1718 guest_width = cap_mgaw(iommu->cap); 1719 domain->gaw = guest_width; 1720 adjust_width = guestwidth_to_adjustwidth(guest_width); 1721 agaw = width_to_agaw(adjust_width); 1722 sagaw = cap_sagaw(iommu->cap); 1723 if (!test_bit(agaw, &sagaw)) { 1724 /* hardware doesn't support it, choose a bigger one */ 1725 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw); 1726 agaw = find_next_bit(&sagaw, 5, agaw); 1727 if (agaw >= 5) 1728 return -ENODEV; 1729 } 1730 domain->agaw = agaw; 1731 1732 if (ecap_coherent(iommu->ecap)) 1733 domain->iommu_coherency = 1; 1734 else 1735 domain->iommu_coherency = 0; 1736 1737 if (ecap_sc_support(iommu->ecap)) 1738 domain->iommu_snooping = 1; 1739 else 1740 domain->iommu_snooping = 0; 1741 1742 if (intel_iommu_superpage) 1743 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); 1744 else 1745 domain->iommu_superpage = 0; 1746 1747 domain->nid = iommu->node; 1748 1749 /* always allocate the top pgd */ 1750 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); 1751 if (!domain->pgd) 1752 return -ENOMEM; 1753 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE); 1754 return 0; 1755} 1756 1757static void domain_exit(struct dmar_domain *domain) 1758{ 1759 struct dmar_drhd_unit *drhd; 1760 struct intel_iommu *iommu; 1761 struct page *freelist = NULL; 1762 1763 /* Domain 0 is reserved, so dont process it */ 1764 if (!domain) 1765 return; 1766 1767 /* Flush any lazy unmaps that may reference this domain */ 1768 if (!intel_iommu_strict) 1769 flush_unmaps_timeout(0); 1770 1771 /* remove associated devices */ 1772 domain_remove_dev_info(domain); 1773 1774 /* destroy iovas */ 1775 put_iova_domain(&domain->iovad); 1776 1777 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); 1778 1779 /* clear attached or cached domains */ 1780 rcu_read_lock(); 1781 for_each_active_iommu(iommu, drhd) 1782 if (domain_type_is_vm(domain) || 1783 test_bit(iommu->seq_id, domain->iommu_bmp)) 1784 iommu_detach_domain(domain, iommu); 1785 rcu_read_unlock(); 1786 1787 dma_free_pagelist(freelist); 1788 1789 free_domain_mem(domain); 1790} 1791 1792static int domain_context_mapping_one(struct dmar_domain *domain, 1793 struct intel_iommu *iommu, 1794 u8 bus, u8 devfn, int translation) 1795{ 1796 struct context_entry *context; 1797 unsigned long flags; 1798 struct dma_pte *pgd; 1799 int id; 1800 int agaw; 1801 struct device_domain_info *info = NULL; 1802 1803 pr_debug("Set context mapping for %02x:%02x.%d\n", 1804 bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 1805 1806 BUG_ON(!domain->pgd); 1807 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH && 1808 translation != CONTEXT_TT_MULTI_LEVEL); 1809 1810 spin_lock_irqsave(&iommu->lock, flags); 1811 context = iommu_context_addr(iommu, bus, devfn, 1); 1812 spin_unlock_irqrestore(&iommu->lock, flags); 1813 if (!context) 1814 return -ENOMEM; 1815 spin_lock_irqsave(&iommu->lock, flags); 1816 if (context_present(context)) { 1817 spin_unlock_irqrestore(&iommu->lock, flags); 1818 return 0; 1819 } 1820 1821 id = domain->id; 1822 pgd = domain->pgd; 1823 1824 if (domain_type_is_vm_or_si(domain)) { 1825 if (domain_type_is_vm(domain)) { 1826 id = iommu_attach_vm_domain(domain, iommu); 1827 if (id < 0) { 1828 spin_unlock_irqrestore(&iommu->lock, flags); 1829 pr_err("IOMMU: no free domain ids\n"); 1830 return -EFAULT; 1831 } 1832 } 1833 1834 /* Skip top levels of page tables for 1835 * iommu which has less agaw than default. 1836 * Unnecessary for PT mode. 1837 */ 1838 if (translation != CONTEXT_TT_PASS_THROUGH) { 1839 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) { 1840 pgd = phys_to_virt(dma_pte_addr(pgd)); 1841 if (!dma_pte_present(pgd)) { 1842 spin_unlock_irqrestore(&iommu->lock, flags); 1843 return -ENOMEM; 1844 } 1845 } 1846 } 1847 } 1848 1849 context_set_domain_id(context, id); 1850 1851 if (translation != CONTEXT_TT_PASS_THROUGH) { 1852 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn); 1853 translation = info ? CONTEXT_TT_DEV_IOTLB : 1854 CONTEXT_TT_MULTI_LEVEL; 1855 } 1856 /* 1857 * In pass through mode, AW must be programmed to indicate the largest 1858 * AGAW value supported by hardware. And ASR is ignored by hardware. 1859 */ 1860 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH)) 1861 context_set_address_width(context, iommu->msagaw); 1862 else { 1863 context_set_address_root(context, virt_to_phys(pgd)); 1864 context_set_address_width(context, iommu->agaw); 1865 } 1866 1867 context_set_translation_type(context, translation); 1868 context_set_fault_enable(context); 1869 context_set_present(context); 1870 domain_flush_cache(domain, context, sizeof(*context)); 1871 1872 /* 1873 * It's a non-present to present mapping. If hardware doesn't cache 1874 * non-present entry we only need to flush the write-buffer. If the 1875 * _does_ cache non-present entries, then it does so in the special 1876 * domain #0, which we have to flush: 1877 */ 1878 if (cap_caching_mode(iommu->cap)) { 1879 iommu->flush.flush_context(iommu, 0, 1880 (((u16)bus) << 8) | devfn, 1881 DMA_CCMD_MASK_NOBIT, 1882 DMA_CCMD_DEVICE_INVL); 1883 iommu->flush.flush_iotlb(iommu, id, 0, 0, DMA_TLB_DSI_FLUSH); 1884 } else { 1885 iommu_flush_write_buffer(iommu); 1886 } 1887 iommu_enable_dev_iotlb(info); 1888 spin_unlock_irqrestore(&iommu->lock, flags); 1889 1890 domain_attach_iommu(domain, iommu); 1891 1892 return 0; 1893} 1894 1895struct domain_context_mapping_data { 1896 struct dmar_domain *domain; 1897 struct intel_iommu *iommu; 1898 int translation; 1899}; 1900 1901static int domain_context_mapping_cb(struct pci_dev *pdev, 1902 u16 alias, void *opaque) 1903{ 1904 struct domain_context_mapping_data *data = opaque; 1905 1906 return domain_context_mapping_one(data->domain, data->iommu, 1907 PCI_BUS_NUM(alias), alias & 0xff, 1908 data->translation); 1909} 1910 1911static int 1912domain_context_mapping(struct dmar_domain *domain, struct device *dev, 1913 int translation) 1914{ 1915 struct intel_iommu *iommu; 1916 u8 bus, devfn; 1917 struct domain_context_mapping_data data; 1918 1919 iommu = device_to_iommu(dev, &bus, &devfn); 1920 if (!iommu) 1921 return -ENODEV; 1922 1923 if (!dev_is_pci(dev)) 1924 return domain_context_mapping_one(domain, iommu, bus, devfn, 1925 translation); 1926 1927 data.domain = domain; 1928 data.iommu = iommu; 1929 data.translation = translation; 1930 1931 return pci_for_each_dma_alias(to_pci_dev(dev), 1932 &domain_context_mapping_cb, &data); 1933} 1934 1935static int domain_context_mapped_cb(struct pci_dev *pdev, 1936 u16 alias, void *opaque) 1937{ 1938 struct intel_iommu *iommu = opaque; 1939 1940 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff); 1941} 1942 1943static int domain_context_mapped(struct device *dev) 1944{ 1945 struct intel_iommu *iommu; 1946 u8 bus, devfn; 1947 1948 iommu = device_to_iommu(dev, &bus, &devfn); 1949 if (!iommu) 1950 return -ENODEV; 1951 1952 if (!dev_is_pci(dev)) 1953 return device_context_mapped(iommu, bus, devfn); 1954 1955 return !pci_for_each_dma_alias(to_pci_dev(dev), 1956 domain_context_mapped_cb, iommu); 1957} 1958 1959/* Returns a number of VTD pages, but aligned to MM page size */ 1960static inline unsigned long aligned_nrpages(unsigned long host_addr, 1961 size_t size) 1962{ 1963 host_addr &= ~PAGE_MASK; 1964 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; 1965} 1966 1967/* Return largest possible superpage level for a given mapping */ 1968static inline int hardware_largepage_caps(struct dmar_domain *domain, 1969 unsigned long iov_pfn, 1970 unsigned long phy_pfn, 1971 unsigned long pages) 1972{ 1973 int support, level = 1; 1974 unsigned long pfnmerge; 1975 1976 support = domain->iommu_superpage; 1977 1978 /* To use a large page, the virtual *and* physical addresses 1979 must be aligned to 2MiB/1GiB/etc. Lower bits set in either 1980 of them will mean we have to use smaller pages. So just 1981 merge them and check both at once. */ 1982 pfnmerge = iov_pfn | phy_pfn; 1983 1984 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) { 1985 pages >>= VTD_STRIDE_SHIFT; 1986 if (!pages) 1987 break; 1988 pfnmerge >>= VTD_STRIDE_SHIFT; 1989 level++; 1990 support--; 1991 } 1992 return level; 1993} 1994 1995static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, 1996 struct scatterlist *sg, unsigned long phys_pfn, 1997 unsigned long nr_pages, int prot) 1998{ 1999 struct dma_pte *first_pte = NULL, *pte = NULL; 2000 phys_addr_t uninitialized_var(pteval); 2001 unsigned long sg_res = 0; 2002 unsigned int largepage_lvl = 0; 2003 unsigned long lvl_pages = 0; 2004 2005 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1)); 2006 2007 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) 2008 return -EINVAL; 2009 2010 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP; 2011 2012 if (!sg) { 2013 sg_res = nr_pages; 2014 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; 2015 } 2016 2017 while (nr_pages > 0) { 2018 uint64_t tmp; 2019 2020 if (!sg_res) { 2021 sg_res = aligned_nrpages(sg->offset, sg->length); 2022 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; 2023 sg->dma_length = sg->length; 2024 pteval = page_to_phys(sg_page(sg)) | prot; 2025 phys_pfn = pteval >> VTD_PAGE_SHIFT; 2026 } 2027 2028 if (!pte) { 2029 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res); 2030 2031 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl); 2032 if (!pte) 2033 return -ENOMEM; 2034 /* It is large page*/ 2035 if (largepage_lvl > 1) { 2036 unsigned long nr_superpages, end_pfn; 2037 2038 pteval |= DMA_PTE_LARGE_PAGE; 2039 lvl_pages = lvl_to_nr_pages(largepage_lvl); 2040 2041 nr_superpages = sg_res / lvl_pages; 2042 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1; 2043 2044 /* 2045 * Ensure that old small page tables are 2046 * removed to make room for superpage(s). 2047 */ 2048 dma_pte_free_pagetable(domain, iov_pfn, end_pfn); 2049 } else { 2050 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE; 2051 } 2052 2053 } 2054 /* We don't need lock here, nobody else 2055 * touches the iova range 2056 */ 2057 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval); 2058 if (tmp) { 2059 static int dumps = 5; 2060 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n", 2061 iov_pfn, tmp, (unsigned long long)pteval); 2062 if (dumps) { 2063 dumps--; 2064 debug_dma_dump_mappings(NULL); 2065 } 2066 WARN_ON(1); 2067 } 2068 2069 lvl_pages = lvl_to_nr_pages(largepage_lvl); 2070 2071 BUG_ON(nr_pages < lvl_pages); 2072 BUG_ON(sg_res < lvl_pages); 2073 2074 nr_pages -= lvl_pages; 2075 iov_pfn += lvl_pages; 2076 phys_pfn += lvl_pages; 2077 pteval += lvl_pages * VTD_PAGE_SIZE; 2078 sg_res -= lvl_pages; 2079 2080 /* If the next PTE would be the first in a new page, then we 2081 need to flush the cache on the entries we've just written. 2082 And then we'll need to recalculate 'pte', so clear it and 2083 let it get set again in the if (!pte) block above. 2084 2085 If we're done (!nr_pages) we need to flush the cache too. 2086 2087 Also if we've been setting superpages, we may need to 2088 recalculate 'pte' and switch back to smaller pages for the 2089 end of the mapping, if the trailing size is not enough to 2090 use another superpage (i.e. sg_res < lvl_pages). */ 2091 pte++; 2092 if (!nr_pages || first_pte_in_page(pte) || 2093 (largepage_lvl > 1 && sg_res < lvl_pages)) { 2094 domain_flush_cache(domain, first_pte, 2095 (void *)pte - (void *)first_pte); 2096 pte = NULL; 2097 } 2098 2099 if (!sg_res && nr_pages) 2100 sg = sg_next(sg); 2101 } 2102 return 0; 2103} 2104 2105static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn, 2106 struct scatterlist *sg, unsigned long nr_pages, 2107 int prot) 2108{ 2109 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot); 2110} 2111 2112static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn, 2113 unsigned long phys_pfn, unsigned long nr_pages, 2114 int prot) 2115{ 2116 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot); 2117} 2118 2119static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) 2120{ 2121 if (!iommu) 2122 return; 2123 2124 clear_context_table(iommu, bus, devfn); 2125 iommu->flush.flush_context(iommu, 0, 0, 0, 2126 DMA_CCMD_GLOBAL_INVL); 2127 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); 2128} 2129 2130static inline void unlink_domain_info(struct device_domain_info *info) 2131{ 2132 assert_spin_locked(&device_domain_lock); 2133 list_del(&info->link); 2134 list_del(&info->global); 2135 if (info->dev) 2136 info->dev->archdata.iommu = NULL; 2137} 2138 2139static void domain_remove_dev_info(struct dmar_domain *domain) 2140{ 2141 struct device_domain_info *info, *tmp; 2142 unsigned long flags; 2143 2144 spin_lock_irqsave(&device_domain_lock, flags); 2145 list_for_each_entry_safe(info, tmp, &domain->devices, link) { 2146 unlink_domain_info(info); 2147 spin_unlock_irqrestore(&device_domain_lock, flags); 2148 2149 iommu_disable_dev_iotlb(info); 2150 iommu_detach_dev(info->iommu, info->bus, info->devfn); 2151 2152 if (domain_type_is_vm(domain)) { 2153 iommu_detach_dependent_devices(info->iommu, info->dev); 2154 domain_detach_iommu(domain, info->iommu); 2155 } 2156 2157 free_devinfo_mem(info); 2158 spin_lock_irqsave(&device_domain_lock, flags); 2159 } 2160 spin_unlock_irqrestore(&device_domain_lock, flags); 2161} 2162 2163/* 2164 * find_domain 2165 * Note: we use struct device->archdata.iommu stores the info 2166 */ 2167static struct dmar_domain *find_domain(struct device *dev) 2168{ 2169 struct device_domain_info *info; 2170 2171 /* No lock here, assumes no domain exit in normal case */ 2172 info = dev->archdata.iommu; 2173 if (info) 2174 return info->domain; 2175 return NULL; 2176} 2177 2178static inline struct device_domain_info * 2179dmar_search_domain_by_dev_info(int segment, int bus, int devfn) 2180{ 2181 struct device_domain_info *info; 2182 2183 list_for_each_entry(info, &device_domain_list, global) 2184 if (info->iommu->segment == segment && info->bus == bus && 2185 info->devfn == devfn) 2186 return info; 2187 2188 return NULL; 2189} 2190 2191static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu, 2192 int bus, int devfn, 2193 struct device *dev, 2194 struct dmar_domain *domain) 2195{ 2196 struct dmar_domain *found = NULL; 2197 struct device_domain_info *info; 2198 unsigned long flags; 2199 2200 info = alloc_devinfo_mem(); 2201 if (!info) 2202 return NULL; 2203 2204 info->bus = bus; 2205 info->devfn = devfn; 2206 info->dev = dev; 2207 info->domain = domain; 2208 info->iommu = iommu; 2209 2210 spin_lock_irqsave(&device_domain_lock, flags); 2211 if (dev) 2212 found = find_domain(dev); 2213 else { 2214 struct device_domain_info *info2; 2215 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn); 2216 if (info2) 2217 found = info2->domain; 2218 } 2219 if (found) { 2220 spin_unlock_irqrestore(&device_domain_lock, flags); 2221 free_devinfo_mem(info); 2222 /* Caller must free the original domain */ 2223 return found; 2224 } 2225 2226 list_add(&info->link, &domain->devices); 2227 list_add(&info->global, &device_domain_list); 2228 if (dev) 2229 dev->archdata.iommu = info; 2230 spin_unlock_irqrestore(&device_domain_lock, flags); 2231 2232 return domain; 2233} 2234 2235static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque) 2236{ 2237 *(u16 *)opaque = alias; 2238 return 0; 2239} 2240 2241/* domain is initialized */ 2242static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw) 2243{ 2244 struct dmar_domain *domain, *tmp; 2245 struct intel_iommu *iommu; 2246 struct device_domain_info *info; 2247 u16 dma_alias; 2248 unsigned long flags; 2249 u8 bus, devfn; 2250 2251 domain = find_domain(dev); 2252 if (domain) 2253 return domain; 2254 2255 iommu = device_to_iommu(dev, &bus, &devfn); 2256 if (!iommu) 2257 return NULL; 2258 2259 if (dev_is_pci(dev)) { 2260 struct pci_dev *pdev = to_pci_dev(dev); 2261 2262 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias); 2263 2264 spin_lock_irqsave(&device_domain_lock, flags); 2265 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus), 2266 PCI_BUS_NUM(dma_alias), 2267 dma_alias & 0xff); 2268 if (info) { 2269 iommu = info->iommu; 2270 domain = info->domain; 2271 } 2272 spin_unlock_irqrestore(&device_domain_lock, flags); 2273 2274 /* DMA alias already has a domain, uses it */ 2275 if (info) 2276 goto found_domain; 2277 } 2278 2279 /* Allocate and initialize new domain for the device */ 2280 domain = alloc_domain(0); 2281 if (!domain) 2282 return NULL; 2283 domain->id = iommu_attach_domain(domain, iommu); 2284 if (domain->id < 0) { 2285 free_domain_mem(domain); 2286 return NULL; 2287 } 2288 domain_attach_iommu(domain, iommu); 2289 if (domain_init(domain, gaw)) { 2290 domain_exit(domain); 2291 return NULL; 2292 } 2293 2294 /* register PCI DMA alias device */ 2295 if (dev_is_pci(dev)) { 2296 tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias), 2297 dma_alias & 0xff, NULL, domain); 2298 2299 if (!tmp || tmp != domain) { 2300 domain_exit(domain); 2301 domain = tmp; 2302 } 2303 2304 if (!domain) 2305 return NULL; 2306 } 2307 2308found_domain: 2309 tmp = dmar_insert_dev_info(iommu, bus, devfn, dev, domain); 2310 2311 if (!tmp || tmp != domain) { 2312 domain_exit(domain); 2313 domain = tmp; 2314 } 2315 2316 return domain; 2317} 2318 2319static int iommu_identity_mapping; 2320#define IDENTMAP_ALL 1 2321#define IDENTMAP_GFX 2 2322#define IDENTMAP_AZALIA 4 2323 2324static int iommu_domain_identity_map(struct dmar_domain *domain, 2325 unsigned long long start, 2326 unsigned long long end) 2327{ 2328 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT; 2329 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT; 2330 2331 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn), 2332 dma_to_mm_pfn(last_vpfn))) { 2333 printk(KERN_ERR "IOMMU: reserve iova failed\n"); 2334 return -ENOMEM; 2335 } 2336 2337 pr_debug("Mapping reserved region %llx-%llx for domain %d\n", 2338 start, end, domain->id); 2339 /* 2340 * RMRR range might have overlap with physical memory range, 2341 * clear it first 2342 */ 2343 dma_pte_clear_range(domain, first_vpfn, last_vpfn); 2344 2345 return domain_pfn_mapping(domain, first_vpfn, first_vpfn, 2346 last_vpfn - first_vpfn + 1, 2347 DMA_PTE_READ|DMA_PTE_WRITE); 2348} 2349 2350static int iommu_prepare_identity_map(struct device *dev, 2351 unsigned long long start, 2352 unsigned long long end) 2353{ 2354 struct dmar_domain *domain; 2355 int ret; 2356 2357 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); 2358 if (!domain) 2359 return -ENOMEM; 2360 2361 /* For _hardware_ passthrough, don't bother. But for software 2362 passthrough, we do it anyway -- it may indicate a memory 2363 range which is reserved in E820, so which didn't get set 2364 up to start with in si_domain */ 2365 if (domain == si_domain && hw_pass_through) { 2366 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n", 2367 dev_name(dev), start, end); 2368 return 0; 2369 } 2370 2371 printk(KERN_INFO 2372 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", 2373 dev_name(dev), start, end); 2374 2375 if (end < start) { 2376 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n" 2377 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 2378 dmi_get_system_info(DMI_BIOS_VENDOR), 2379 dmi_get_system_info(DMI_BIOS_VERSION), 2380 dmi_get_system_info(DMI_PRODUCT_VERSION)); 2381 ret = -EIO; 2382 goto error; 2383 } 2384 2385 if (end >> agaw_to_width(domain->agaw)) { 2386 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n" 2387 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 2388 agaw_to_width(domain->agaw), 2389 dmi_get_system_info(DMI_BIOS_VENDOR), 2390 dmi_get_system_info(DMI_BIOS_VERSION), 2391 dmi_get_system_info(DMI_PRODUCT_VERSION)); 2392 ret = -EIO; 2393 goto error; 2394 } 2395 2396 ret = iommu_domain_identity_map(domain, start, end); 2397 if (ret) 2398 goto error; 2399 2400 /* context entry init */ 2401 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL); 2402 if (ret) 2403 goto error; 2404 2405 return 0; 2406 2407 error: 2408 domain_exit(domain); 2409 return ret; 2410} 2411 2412static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, 2413 struct device *dev) 2414{ 2415 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) 2416 return 0; 2417 return iommu_prepare_identity_map(dev, rmrr->base_address, 2418 rmrr->end_address); 2419} 2420 2421#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA 2422static inline void iommu_prepare_isa(void) 2423{ 2424 struct pci_dev *pdev; 2425 int ret; 2426 2427 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL); 2428 if (!pdev) 2429 return; 2430 2431 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n"); 2432 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1); 2433 2434 if (ret) 2435 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " 2436 "floppy might not work\n"); 2437 2438 pci_dev_put(pdev); 2439} 2440#else 2441static inline void iommu_prepare_isa(void) 2442{ 2443 return; 2444} 2445#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */ 2446 2447static int md_domain_init(struct dmar_domain *domain, int guest_width); 2448 2449static int __init si_domain_init(int hw) 2450{ 2451 struct dmar_drhd_unit *drhd; 2452 struct intel_iommu *iommu; 2453 int nid, ret = 0; 2454 bool first = true; 2455 2456 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY); 2457 if (!si_domain) 2458 return -EFAULT; 2459 2460 for_each_active_iommu(iommu, drhd) { 2461 ret = iommu_attach_domain(si_domain, iommu); 2462 if (ret < 0) { 2463 domain_exit(si_domain); 2464 return -EFAULT; 2465 } else if (first) { 2466 si_domain->id = ret; 2467 first = false; 2468 } else if (si_domain->id != ret) { 2469 domain_exit(si_domain); 2470 return -EFAULT; 2471 } 2472 domain_attach_iommu(si_domain, iommu); 2473 } 2474 2475 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { 2476 domain_exit(si_domain); 2477 return -EFAULT; 2478 } 2479 2480 pr_debug("IOMMU: identity mapping domain is domain %d\n", 2481 si_domain->id); 2482 2483 if (hw) 2484 return 0; 2485 2486 for_each_online_node(nid) { 2487 unsigned long start_pfn, end_pfn; 2488 int i; 2489 2490 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { 2491 ret = iommu_domain_identity_map(si_domain, 2492 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn)); 2493 if (ret) 2494 return ret; 2495 } 2496 } 2497 2498 return 0; 2499} 2500 2501static int identity_mapping(struct device *dev) 2502{ 2503 struct device_domain_info *info; 2504 2505 if (likely(!iommu_identity_mapping)) 2506 return 0; 2507 2508 info = dev->archdata.iommu; 2509 if (info && info != DUMMY_DEVICE_DOMAIN_INFO) 2510 return (info->domain == si_domain); 2511 2512 return 0; 2513} 2514 2515static int domain_add_dev_info(struct dmar_domain *domain, 2516 struct device *dev, int translation) 2517{ 2518 struct dmar_domain *ndomain; 2519 struct intel_iommu *iommu; 2520 u8 bus, devfn; 2521 int ret; 2522 2523 iommu = device_to_iommu(dev, &bus, &devfn); 2524 if (!iommu) 2525 return -ENODEV; 2526 2527 ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain); 2528 if (ndomain != domain) 2529 return -EBUSY; 2530 2531 ret = domain_context_mapping(domain, dev, translation); 2532 if (ret) { 2533 domain_remove_one_dev_info(domain, dev); 2534 return ret; 2535 } 2536 2537 return 0; 2538} 2539 2540static bool device_has_rmrr(struct device *dev) 2541{ 2542 struct dmar_rmrr_unit *rmrr; 2543 struct device *tmp; 2544 int i; 2545 2546 rcu_read_lock(); 2547 for_each_rmrr_units(rmrr) { 2548 /* 2549 * Return TRUE if this RMRR contains the device that 2550 * is passed in. 2551 */ 2552 for_each_active_dev_scope(rmrr->devices, 2553 rmrr->devices_cnt, i, tmp) 2554 if (tmp == dev) { 2555 rcu_read_unlock(); 2556 return true; 2557 } 2558 } 2559 rcu_read_unlock(); 2560 return false; 2561} 2562 2563/* 2564 * There are a couple cases where we need to restrict the functionality of 2565 * devices associated with RMRRs. The first is when evaluating a device for 2566 * identity mapping because problems exist when devices are moved in and out 2567 * of domains and their respective RMRR information is lost. This means that 2568 * a device with associated RMRRs will never be in a "passthrough" domain. 2569 * The second is use of the device through the IOMMU API. This interface 2570 * expects to have full control of the IOVA space for the device. We cannot 2571 * satisfy both the requirement that RMRR access is maintained and have an 2572 * unencumbered IOVA space. We also have no ability to quiesce the device's 2573 * use of the RMRR space or even inform the IOMMU API user of the restriction. 2574 * We therefore prevent devices associated with an RMRR from participating in 2575 * the IOMMU API, which eliminates them from device assignment. 2576 * 2577 * In both cases we assume that PCI USB devices with RMRRs have them largely 2578 * for historical reasons and that the RMRR space is not actively used post 2579 * boot. This exclusion may change if vendors begin to abuse it. 2580 * 2581 * The same exception is made for graphics devices, with the requirement that 2582 * any use of the RMRR regions will be torn down before assigning the device 2583 * to a guest. 2584 */ 2585static bool device_is_rmrr_locked(struct device *dev) 2586{ 2587 if (!device_has_rmrr(dev)) 2588 return false; 2589 2590 if (dev_is_pci(dev)) { 2591 struct pci_dev *pdev = to_pci_dev(dev); 2592 2593 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev)) 2594 return false; 2595 } 2596 2597 return true; 2598} 2599 2600static int iommu_should_identity_map(struct device *dev, int startup) 2601{ 2602 2603 if (dev_is_pci(dev)) { 2604 struct pci_dev *pdev = to_pci_dev(dev); 2605 2606 if (device_is_rmrr_locked(dev)) 2607 return 0; 2608 2609 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) 2610 return 1; 2611 2612 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) 2613 return 1; 2614 2615 if (!(iommu_identity_mapping & IDENTMAP_ALL)) 2616 return 0; 2617 2618 /* 2619 * We want to start off with all devices in the 1:1 domain, and 2620 * take them out later if we find they can't access all of memory. 2621 * 2622 * However, we can't do this for PCI devices behind bridges, 2623 * because all PCI devices behind the same bridge will end up 2624 * with the same source-id on their transactions. 2625 * 2626 * Practically speaking, we can't change things around for these 2627 * devices at run-time, because we can't be sure there'll be no 2628 * DMA transactions in flight for any of their siblings. 2629 * 2630 * So PCI devices (unless they're on the root bus) as well as 2631 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of 2632 * the 1:1 domain, just in _case_ one of their siblings turns out 2633 * not to be able to map all of memory. 2634 */ 2635 if (!pci_is_pcie(pdev)) { 2636 if (!pci_is_root_bus(pdev->bus)) 2637 return 0; 2638 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI) 2639 return 0; 2640 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE) 2641 return 0; 2642 } else { 2643 if (device_has_rmrr(dev)) 2644 return 0; 2645 } 2646 2647 /* 2648 * At boot time, we don't yet know if devices will be 64-bit capable. 2649 * Assume that they will — if they turn out not to be, then we can 2650 * take them out of the 1:1 domain later. 2651 */ 2652 if (!startup) { 2653 /* 2654 * If the device's dma_mask is less than the system's memory 2655 * size then this is not a candidate for identity mapping. 2656 */ 2657 u64 dma_mask = *dev->dma_mask; 2658 2659 if (dev->coherent_dma_mask && 2660 dev->coherent_dma_mask < dma_mask) 2661 dma_mask = dev->coherent_dma_mask; 2662 2663 return dma_mask >= dma_get_required_mask(dev); 2664 } 2665 2666 return 1; 2667} 2668 2669static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw) 2670{ 2671 int ret; 2672 2673 if (!iommu_should_identity_map(dev, 1)) 2674 return 0; 2675 2676 ret = domain_add_dev_info(si_domain, dev, 2677 hw ? CONTEXT_TT_PASS_THROUGH : 2678 CONTEXT_TT_MULTI_LEVEL); 2679 if (!ret) 2680 pr_info("IOMMU: %s identity mapping for device %s\n", 2681 hw ? "hardware" : "software", dev_name(dev)); 2682 else if (ret == -ENODEV) 2683 /* device not associated with an iommu */ 2684 ret = 0; 2685 2686 return ret; 2687} 2688 2689 2690static int __init iommu_prepare_static_identity_mapping(int hw) 2691{ 2692 struct pci_dev *pdev = NULL; 2693 struct dmar_drhd_unit *drhd; 2694 struct intel_iommu *iommu; 2695 struct device *dev; 2696 int i; 2697 int ret = 0; 2698 2699 ret = si_domain_init(hw); 2700 if (ret) 2701 return -EFAULT; 2702 2703 for_each_pci_dev(pdev) { 2704 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw); 2705 if (ret) 2706 return ret; 2707 } 2708 2709 for_each_active_iommu(iommu, drhd) 2710 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) { 2711 struct acpi_device_physical_node *pn; 2712 struct acpi_device *adev; 2713 2714 if (dev->bus != &acpi_bus_type) 2715 continue; 2716 2717 adev= to_acpi_device(dev); 2718 mutex_lock(&adev->physical_node_lock); 2719 list_for_each_entry(pn, &adev->physical_node_list, node) { 2720 ret = dev_prepare_static_identity_mapping(pn->dev, hw); 2721 if (ret) 2722 break; 2723 } 2724 mutex_unlock(&adev->physical_node_lock); 2725 if (ret) 2726 return ret; 2727 } 2728 2729 return 0; 2730} 2731 2732static void intel_iommu_init_qi(struct intel_iommu *iommu) 2733{ 2734 /* 2735 * Start from the sane iommu hardware state. 2736 * If the queued invalidation is already initialized by us 2737 * (for example, while enabling interrupt-remapping) then 2738 * we got the things already rolling from a sane state. 2739 */ 2740 if (!iommu->qi) { 2741 /* 2742 * Clear any previous faults. 2743 */ 2744 dmar_fault(-1, iommu); 2745 /* 2746 * Disable queued invalidation if supported and already enabled 2747 * before OS handover. 2748 */ 2749 dmar_disable_qi(iommu); 2750 } 2751 2752 if (dmar_enable_qi(iommu)) { 2753 /* 2754 * Queued Invalidate not enabled, use Register Based Invalidate 2755 */ 2756 iommu->flush.flush_context = __iommu_flush_context; 2757 iommu->flush.flush_iotlb = __iommu_flush_iotlb; 2758 pr_info("IOMMU: %s using Register based invalidation\n", 2759 iommu->name); 2760 } else { 2761 iommu->flush.flush_context = qi_flush_context; 2762 iommu->flush.flush_iotlb = qi_flush_iotlb; 2763 pr_info("IOMMU: %s using Queued invalidation\n", iommu->name); 2764 } 2765} 2766 2767static int __init init_dmars(void) 2768{ 2769 struct dmar_drhd_unit *drhd; 2770 struct dmar_rmrr_unit *rmrr; 2771 struct device *dev; 2772 struct intel_iommu *iommu; 2773 int i, ret; 2774 2775 /* 2776 * for each drhd 2777 * allocate root 2778 * initialize and program root entry to not present 2779 * endfor 2780 */ 2781 for_each_drhd_unit(drhd) { 2782 /* 2783 * lock not needed as this is only incremented in the single 2784 * threaded kernel __init code path all other access are read 2785 * only 2786 */ 2787 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) { 2788 g_num_of_iommus++; 2789 continue; 2790 } 2791 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n", 2792 DMAR_UNITS_SUPPORTED); 2793 } 2794 2795 /* Preallocate enough resources for IOMMU hot-addition */ 2796 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) 2797 g_num_of_iommus = DMAR_UNITS_SUPPORTED; 2798 2799 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *), 2800 GFP_KERNEL); 2801 if (!g_iommus) { 2802 printk(KERN_ERR "Allocating global iommu array failed\n"); 2803 ret = -ENOMEM; 2804 goto error; 2805 } 2806 2807 deferred_flush = kzalloc(g_num_of_iommus * 2808 sizeof(struct deferred_flush_tables), GFP_KERNEL); 2809 if (!deferred_flush) { 2810 ret = -ENOMEM; 2811 goto free_g_iommus; 2812 } 2813 2814 for_each_active_iommu(iommu, drhd) { 2815 g_iommus[iommu->seq_id] = iommu; 2816 2817 ret = iommu_init_domains(iommu); 2818 if (ret) 2819 goto free_iommu; 2820 2821 /* 2822 * TBD: 2823 * we could share the same root & context tables 2824 * among all IOMMU's. Need to Split it later. 2825 */ 2826 ret = iommu_alloc_root_entry(iommu); 2827 if (ret) 2828 goto free_iommu; 2829 if (!ecap_pass_through(iommu->ecap)) 2830 hw_pass_through = 0; 2831 } 2832 2833 for_each_active_iommu(iommu, drhd) 2834 intel_iommu_init_qi(iommu); 2835 2836 if (iommu_pass_through) 2837 iommu_identity_mapping |= IDENTMAP_ALL; 2838 2839#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA 2840 iommu_identity_mapping |= IDENTMAP_GFX; 2841#endif 2842 2843 check_tylersburg_isoch(); 2844 2845 /* 2846 * If pass through is not set or not enabled, setup context entries for 2847 * identity mappings for rmrr, gfx, and isa and may fall back to static 2848 * identity mapping if iommu_identity_mapping is set. 2849 */ 2850 if (iommu_identity_mapping) { 2851 ret = iommu_prepare_static_identity_mapping(hw_pass_through); 2852 if (ret) { 2853 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n"); 2854 goto free_iommu; 2855 } 2856 } 2857 /* 2858 * For each rmrr 2859 * for each dev attached to rmrr 2860 * do 2861 * locate drhd for dev, alloc domain for dev 2862 * allocate free domain 2863 * allocate page table entries for rmrr 2864 * if context not allocated for bus 2865 * allocate and init context 2866 * set present in root table for this bus 2867 * init context with domain, translation etc 2868 * endfor 2869 * endfor 2870 */ 2871 printk(KERN_INFO "IOMMU: Setting RMRR:\n"); 2872 for_each_rmrr_units(rmrr) { 2873 /* some BIOS lists non-exist devices in DMAR table. */ 2874 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, 2875 i, dev) { 2876 ret = iommu_prepare_rmrr_dev(rmrr, dev); 2877 if (ret) 2878 printk(KERN_ERR 2879 "IOMMU: mapping reserved region failed\n"); 2880 } 2881 } 2882 2883 iommu_prepare_isa(); 2884 2885 /* 2886 * for each drhd 2887 * enable fault log 2888 * global invalidate context cache 2889 * global invalidate iotlb 2890 * enable translation 2891 */ 2892 for_each_iommu(iommu, drhd) { 2893 if (drhd->ignored) { 2894 /* 2895 * we always have to disable PMRs or DMA may fail on 2896 * this device 2897 */ 2898 if (force_on) 2899 iommu_disable_protect_mem_regions(iommu); 2900 continue; 2901 } 2902 2903 iommu_flush_write_buffer(iommu); 2904 2905 ret = dmar_set_interrupt(iommu); 2906 if (ret) 2907 goto free_iommu; 2908 2909 iommu_set_root_entry(iommu); 2910 2911 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); 2912 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); 2913 iommu_enable_translation(iommu); 2914 iommu_disable_protect_mem_regions(iommu); 2915 } 2916 2917 return 0; 2918 2919free_iommu: 2920 for_each_active_iommu(iommu, drhd) { 2921 disable_dmar_iommu(iommu); 2922 free_dmar_iommu(iommu); 2923 } 2924 kfree(deferred_flush); 2925free_g_iommus: 2926 kfree(g_iommus); 2927error: 2928 return ret; 2929} 2930 2931/* This takes a number of _MM_ pages, not VTD pages */ 2932static struct iova *intel_alloc_iova(struct device *dev, 2933 struct dmar_domain *domain, 2934 unsigned long nrpages, uint64_t dma_mask) 2935{ 2936 struct iova *iova = NULL; 2937 2938 /* Restrict dma_mask to the width that the iommu can handle */ 2939 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask); 2940 2941 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) { 2942 /* 2943 * First try to allocate an io virtual address in 2944 * DMA_BIT_MASK(32) and if that fails then try allocating 2945 * from higher range 2946 */ 2947 iova = alloc_iova(&domain->iovad, nrpages, 2948 IOVA_PFN(DMA_BIT_MASK(32)), 1); 2949 if (iova) 2950 return iova; 2951 } 2952 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1); 2953 if (unlikely(!iova)) { 2954 printk(KERN_ERR "Allocating %ld-page iova for %s failed", 2955 nrpages, dev_name(dev)); 2956 return NULL; 2957 } 2958 2959 return iova; 2960} 2961 2962static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev) 2963{ 2964 struct dmar_domain *domain; 2965 int ret; 2966 2967 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); 2968 if (!domain) { 2969 printk(KERN_ERR "Allocating domain for %s failed", 2970 dev_name(dev)); 2971 return NULL; 2972 } 2973 2974 /* make sure context mapping is ok */ 2975 if (unlikely(!domain_context_mapped(dev))) { 2976 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL); 2977 if (ret) { 2978 printk(KERN_ERR "Domain context map for %s failed", 2979 dev_name(dev)); 2980 return NULL; 2981 } 2982 } 2983 2984 return domain; 2985} 2986 2987static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev) 2988{ 2989 struct device_domain_info *info; 2990 2991 /* No lock here, assumes no domain exit in normal case */ 2992 info = dev->archdata.iommu; 2993 if (likely(info)) 2994 return info->domain; 2995 2996 return __get_valid_domain_for_dev(dev); 2997} 2998 2999/* Check if the dev needs to go through non-identity map and unmap process.*/ 3000static int iommu_no_mapping(struct device *dev) 3001{ 3002 int found; 3003 3004 if (iommu_dummy(dev)) 3005 return 1; 3006 3007 if (!iommu_identity_mapping) 3008 return 0; 3009 3010 found = identity_mapping(dev); 3011 if (found) { 3012 if (iommu_should_identity_map(dev, 0)) 3013 return 1; 3014 else { 3015 /* 3016 * 32 bit DMA is removed from si_domain and fall back 3017 * to non-identity mapping. 3018 */ 3019 domain_remove_one_dev_info(si_domain, dev); 3020 printk(KERN_INFO "32bit %s uses non-identity mapping\n", 3021 dev_name(dev)); 3022 return 0; 3023 } 3024 } else { 3025 /* 3026 * In case of a detached 64 bit DMA device from vm, the device 3027 * is put into si_domain for identity mapping. 3028 */ 3029 if (iommu_should_identity_map(dev, 0)) { 3030 int ret; 3031 ret = domain_add_dev_info(si_domain, dev, 3032 hw_pass_through ? 3033 CONTEXT_TT_PASS_THROUGH : 3034 CONTEXT_TT_MULTI_LEVEL); 3035 if (!ret) { 3036 printk(KERN_INFO "64bit %s uses identity mapping\n", 3037 dev_name(dev)); 3038 return 1; 3039 } 3040 } 3041 } 3042 3043 return 0; 3044} 3045 3046static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, 3047 size_t size, int dir, u64 dma_mask) 3048{ 3049 struct dmar_domain *domain; 3050 phys_addr_t start_paddr; 3051 struct iova *iova; 3052 int prot = 0; 3053 int ret; 3054 struct intel_iommu *iommu; 3055 unsigned long paddr_pfn = paddr >> PAGE_SHIFT; 3056 3057 BUG_ON(dir == DMA_NONE); 3058 3059 if (iommu_no_mapping(dev)) 3060 return paddr; 3061 3062 domain = get_valid_domain_for_dev(dev); 3063 if (!domain) 3064 return 0; 3065 3066 iommu = domain_get_iommu(domain); 3067 size = aligned_nrpages(paddr, size); 3068 3069 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask); 3070 if (!iova) 3071 goto error; 3072 3073 /* 3074 * Check if DMAR supports zero-length reads on write only 3075 * mappings.. 3076 */ 3077 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ 3078 !cap_zlr(iommu->cap)) 3079 prot |= DMA_PTE_READ; 3080 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 3081 prot |= DMA_PTE_WRITE; 3082 /* 3083 * paddr - (paddr + size) might be partial page, we should map the whole 3084 * page. Note: if two part of one page are separately mapped, we 3085 * might have two guest_addr mapping to the same host paddr, but this 3086 * is not a big problem 3087 */ 3088 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo), 3089 mm_to_dma_pfn(paddr_pfn), size, prot); 3090 if (ret) 3091 goto error; 3092 3093 /* it's a non-present to present mapping. Only flush if caching mode */ 3094 if (cap_caching_mode(iommu->cap)) 3095 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1); 3096 else 3097 iommu_flush_write_buffer(iommu); 3098 3099 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT; 3100 start_paddr += paddr & ~PAGE_MASK; 3101 return start_paddr; 3102 3103error: 3104 if (iova) 3105 __free_iova(&domain->iovad, iova); 3106 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n", 3107 dev_name(dev), size, (unsigned long long)paddr, dir); 3108 return 0; 3109} 3110 3111static dma_addr_t intel_map_page(struct device *dev, struct page *page, 3112 unsigned long offset, size_t size, 3113 enum dma_data_direction dir, 3114 struct dma_attrs *attrs) 3115{ 3116 return __intel_map_single(dev, page_to_phys(page) + offset, size, 3117 dir, *dev->dma_mask); 3118} 3119 3120static void flush_unmaps(void) 3121{ 3122 int i, j; 3123 3124 timer_on = 0; 3125 3126 /* just flush them all */ 3127 for (i = 0; i < g_num_of_iommus; i++) { 3128 struct intel_iommu *iommu = g_iommus[i]; 3129 if (!iommu) 3130 continue; 3131 3132 if (!deferred_flush[i].next) 3133 continue; 3134 3135 /* In caching mode, global flushes turn emulation expensive */ 3136 if (!cap_caching_mode(iommu->cap)) 3137 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 3138 DMA_TLB_GLOBAL_FLUSH); 3139 for (j = 0; j < deferred_flush[i].next; j++) { 3140 unsigned long mask; 3141 struct iova *iova = deferred_flush[i].iova[j]; 3142 struct dmar_domain *domain = deferred_flush[i].domain[j]; 3143 3144 /* On real hardware multiple invalidations are expensive */ 3145 if (cap_caching_mode(iommu->cap)) 3146 iommu_flush_iotlb_psi(iommu, domain->id, 3147 iova->pfn_lo, iova_size(iova), 3148 !deferred_flush[i].freelist[j], 0); 3149 else { 3150 mask = ilog2(mm_to_dma_pfn(iova_size(iova))); 3151 iommu_flush_dev_iotlb(deferred_flush[i].domain[j], 3152 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask); 3153 } 3154 __free_iova(&deferred_flush[i].domain[j]->iovad, iova); 3155 if (deferred_flush[i].freelist[j]) 3156 dma_free_pagelist(deferred_flush[i].freelist[j]); 3157 } 3158 deferred_flush[i].next = 0; 3159 } 3160 3161 list_size = 0; 3162} 3163 3164static void flush_unmaps_timeout(unsigned long data) 3165{ 3166 unsigned long flags; 3167 3168 spin_lock_irqsave(&async_umap_flush_lock, flags); 3169 flush_unmaps(); 3170 spin_unlock_irqrestore(&async_umap_flush_lock, flags); 3171} 3172 3173static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist) 3174{ 3175 unsigned long flags; 3176 int next, iommu_id; 3177 struct intel_iommu *iommu; 3178 3179 spin_lock_irqsave(&async_umap_flush_lock, flags); 3180 if (list_size == HIGH_WATER_MARK) 3181 flush_unmaps(); 3182 3183 iommu = domain_get_iommu(dom); 3184 iommu_id = iommu->seq_id; 3185 3186 next = deferred_flush[iommu_id].next; 3187 deferred_flush[iommu_id].domain[next] = dom; 3188 deferred_flush[iommu_id].iova[next] = iova; 3189 deferred_flush[iommu_id].freelist[next] = freelist; 3190 deferred_flush[iommu_id].next++; 3191 3192 if (!timer_on) { 3193 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10)); 3194 timer_on = 1; 3195 } 3196 list_size++; 3197 spin_unlock_irqrestore(&async_umap_flush_lock, flags); 3198} 3199 3200static void intel_unmap(struct device *dev, dma_addr_t dev_addr) 3201{ 3202 struct dmar_domain *domain; 3203 unsigned long start_pfn, last_pfn; 3204 struct iova *iova; 3205 struct intel_iommu *iommu; 3206 struct page *freelist; 3207 3208 if (iommu_no_mapping(dev)) 3209 return; 3210 3211 domain = find_domain(dev); 3212 BUG_ON(!domain); 3213 3214 iommu = domain_get_iommu(domain); 3215 3216 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); 3217 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n", 3218 (unsigned long long)dev_addr)) 3219 return; 3220 3221 start_pfn = mm_to_dma_pfn(iova->pfn_lo); 3222 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1; 3223 3224 pr_debug("Device %s unmapping: pfn %lx-%lx\n", 3225 dev_name(dev), start_pfn, last_pfn); 3226 3227 freelist = domain_unmap(domain, start_pfn, last_pfn); 3228 3229 if (intel_iommu_strict) { 3230 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, 3231 last_pfn - start_pfn + 1, !freelist, 0); 3232 /* free iova */ 3233 __free_iova(&domain->iovad, iova); 3234 dma_free_pagelist(freelist); 3235 } else { 3236 add_unmap(domain, iova, freelist); 3237 /* 3238 * queue up the release of the unmap to save the 1/6th of the 3239 * cpu used up by the iotlb flush operation... 3240 */ 3241 } 3242} 3243 3244static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, 3245 size_t size, enum dma_data_direction dir, 3246 struct dma_attrs *attrs) 3247{ 3248 intel_unmap(dev, dev_addr); 3249} 3250 3251static void *intel_alloc_coherent(struct device *dev, size_t size, 3252 dma_addr_t *dma_handle, gfp_t flags, 3253 struct dma_attrs *attrs) 3254{ 3255 struct page *page = NULL; 3256 int order; 3257 3258 size = PAGE_ALIGN(size); 3259 order = get_order(size); 3260 3261 if (!iommu_no_mapping(dev)) 3262 flags &= ~(GFP_DMA | GFP_DMA32); 3263 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) { 3264 if (dev->coherent_dma_mask < DMA_BIT_MASK(32)) 3265 flags |= GFP_DMA; 3266 else 3267 flags |= GFP_DMA32; 3268 } 3269 3270 if (flags & __GFP_WAIT) { 3271 unsigned int count = size >> PAGE_SHIFT; 3272 3273 page = dma_alloc_from_contiguous(dev, count, order); 3274 if (page && iommu_no_mapping(dev) && 3275 page_to_phys(page) + size > dev->coherent_dma_mask) { 3276 dma_release_from_contiguous(dev, page, count); 3277 page = NULL; 3278 } 3279 } 3280 3281 if (!page) 3282 page = alloc_pages(flags, order); 3283 if (!page) 3284 return NULL; 3285 memset(page_address(page), 0, size); 3286 3287 *dma_handle = __intel_map_single(dev, page_to_phys(page), size, 3288 DMA_BIDIRECTIONAL, 3289 dev->coherent_dma_mask); 3290 if (*dma_handle) 3291 return page_address(page); 3292 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) 3293 __free_pages(page, order); 3294 3295 return NULL; 3296} 3297 3298static void intel_free_coherent(struct device *dev, size_t size, void *vaddr, 3299 dma_addr_t dma_handle, struct dma_attrs *attrs) 3300{ 3301 int order; 3302 struct page *page = virt_to_page(vaddr); 3303 3304 size = PAGE_ALIGN(size); 3305 order = get_order(size); 3306 3307 intel_unmap(dev, dma_handle); 3308 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) 3309 __free_pages(page, order); 3310} 3311 3312static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist, 3313 int nelems, enum dma_data_direction dir, 3314 struct dma_attrs *attrs) 3315{ 3316 intel_unmap(dev, sglist[0].dma_address); 3317} 3318 3319static int intel_nontranslate_map_sg(struct device *hddev, 3320 struct scatterlist *sglist, int nelems, int dir) 3321{ 3322 int i; 3323 struct scatterlist *sg; 3324 3325 for_each_sg(sglist, sg, nelems, i) { 3326 BUG_ON(!sg_page(sg)); 3327 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset; 3328 sg->dma_length = sg->length; 3329 } 3330 return nelems; 3331} 3332 3333static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, 3334 enum dma_data_direction dir, struct dma_attrs *attrs) 3335{ 3336 int i; 3337 struct dmar_domain *domain; 3338 size_t size = 0; 3339 int prot = 0; 3340 struct iova *iova = NULL; 3341 int ret; 3342 struct scatterlist *sg; 3343 unsigned long start_vpfn; 3344 struct intel_iommu *iommu; 3345 3346 BUG_ON(dir == DMA_NONE); 3347 if (iommu_no_mapping(dev)) 3348 return intel_nontranslate_map_sg(dev, sglist, nelems, dir); 3349 3350 domain = get_valid_domain_for_dev(dev); 3351 if (!domain) 3352 return 0; 3353 3354 iommu = domain_get_iommu(domain); 3355 3356 for_each_sg(sglist, sg, nelems, i) 3357 size += aligned_nrpages(sg->offset, sg->length); 3358 3359 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), 3360 *dev->dma_mask); 3361 if (!iova) { 3362 sglist->dma_length = 0; 3363 return 0; 3364 } 3365 3366 /* 3367 * Check if DMAR supports zero-length reads on write only 3368 * mappings.. 3369 */ 3370 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ 3371 !cap_zlr(iommu->cap)) 3372 prot |= DMA_PTE_READ; 3373 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) 3374 prot |= DMA_PTE_WRITE; 3375 3376 start_vpfn = mm_to_dma_pfn(iova->pfn_lo); 3377 3378 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot); 3379 if (unlikely(ret)) { 3380 dma_pte_free_pagetable(domain, start_vpfn, 3381 start_vpfn + size - 1); 3382 __free_iova(&domain->iovad, iova); 3383 return 0; 3384 } 3385 3386 /* it's a non-present to present mapping. Only flush if caching mode */ 3387 if (cap_caching_mode(iommu->cap)) 3388 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1); 3389 else 3390 iommu_flush_write_buffer(iommu); 3391 3392 return nelems; 3393} 3394 3395static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr) 3396{ 3397 return !dma_addr; 3398} 3399 3400struct dma_map_ops intel_dma_ops = { 3401 .alloc = intel_alloc_coherent, 3402 .free = intel_free_coherent, 3403 .map_sg = intel_map_sg, 3404 .unmap_sg = intel_unmap_sg, 3405 .map_page = intel_map_page, 3406 .unmap_page = intel_unmap_page, 3407 .mapping_error = intel_mapping_error, 3408}; 3409 3410static inline int iommu_domain_cache_init(void) 3411{ 3412 int ret = 0; 3413 3414 iommu_domain_cache = kmem_cache_create("iommu_domain", 3415 sizeof(struct dmar_domain), 3416 0, 3417 SLAB_HWCACHE_ALIGN, 3418 3419 NULL); 3420 if (!iommu_domain_cache) { 3421 printk(KERN_ERR "Couldn't create iommu_domain cache\n"); 3422 ret = -ENOMEM; 3423 } 3424 3425 return ret; 3426} 3427 3428static inline int iommu_devinfo_cache_init(void) 3429{ 3430 int ret = 0; 3431 3432 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo", 3433 sizeof(struct device_domain_info), 3434 0, 3435 SLAB_HWCACHE_ALIGN, 3436 NULL); 3437 if (!iommu_devinfo_cache) { 3438 printk(KERN_ERR "Couldn't create devinfo cache\n"); 3439 ret = -ENOMEM; 3440 } 3441 3442 return ret; 3443} 3444 3445static int __init iommu_init_mempool(void) 3446{ 3447 int ret; 3448 ret = iommu_iova_cache_init(); 3449 if (ret) 3450 return ret; 3451 3452 ret = iommu_domain_cache_init(); 3453 if (ret) 3454 goto domain_error; 3455 3456 ret = iommu_devinfo_cache_init(); 3457 if (!ret) 3458 return ret; 3459 3460 kmem_cache_destroy(iommu_domain_cache); 3461domain_error: 3462 iommu_iova_cache_destroy(); 3463 3464 return -ENOMEM; 3465} 3466 3467static void __init iommu_exit_mempool(void) 3468{ 3469 kmem_cache_destroy(iommu_devinfo_cache); 3470 kmem_cache_destroy(iommu_domain_cache); 3471 iommu_iova_cache_destroy(); 3472} 3473 3474static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev) 3475{ 3476 struct dmar_drhd_unit *drhd; 3477 u32 vtbar; 3478 int rc; 3479 3480 /* We know that this device on this chipset has its own IOMMU. 3481 * If we find it under a different IOMMU, then the BIOS is lying 3482 * to us. Hope that the IOMMU for this device is actually 3483 * disabled, and it needs no translation... 3484 */ 3485 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar); 3486 if (rc) { 3487 /* "can't" happen */ 3488 dev_info(&pdev->dev, "failed to run vt-d quirk\n"); 3489 return; 3490 } 3491 vtbar &= 0xffff0000; 3492 3493 /* we know that the this iommu should be at offset 0xa000 from vtbar */ 3494 drhd = dmar_find_matched_drhd_unit(pdev); 3495 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000, 3496 TAINT_FIRMWARE_WORKAROUND, 3497 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n")) 3498 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; 3499} 3500DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu); 3501 3502static void __init init_no_remapping_devices(void) 3503{ 3504 struct dmar_drhd_unit *drhd; 3505 struct device *dev; 3506 int i; 3507 3508 for_each_drhd_unit(drhd) { 3509 if (!drhd->include_all) { 3510 for_each_active_dev_scope(drhd->devices, 3511 drhd->devices_cnt, i, dev) 3512 break; 3513 /* ignore DMAR unit if no devices exist */ 3514 if (i == drhd->devices_cnt) 3515 drhd->ignored = 1; 3516 } 3517 } 3518 3519 for_each_active_drhd_unit(drhd) { 3520 if (drhd->include_all) 3521 continue; 3522 3523 for_each_active_dev_scope(drhd->devices, 3524 drhd->devices_cnt, i, dev) 3525 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev))) 3526 break; 3527 if (i < drhd->devices_cnt) 3528 continue; 3529 3530 /* This IOMMU has *only* gfx devices. Either bypass it or 3531 set the gfx_mapped flag, as appropriate */ 3532 if (dmar_map_gfx) { 3533 intel_iommu_gfx_mapped = 1; 3534 } else { 3535 drhd->ignored = 1; 3536 for_each_active_dev_scope(drhd->devices, 3537 drhd->devices_cnt, i, dev) 3538 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; 3539 } 3540 } 3541} 3542 3543#ifdef CONFIG_SUSPEND 3544static int init_iommu_hw(void) 3545{ 3546 struct dmar_drhd_unit *drhd; 3547 struct intel_iommu *iommu = NULL; 3548 3549 for_each_active_iommu(iommu, drhd) 3550 if (iommu->qi) 3551 dmar_reenable_qi(iommu); 3552 3553 for_each_iommu(iommu, drhd) { 3554 if (drhd->ignored) { 3555 /* 3556 * we always have to disable PMRs or DMA may fail on 3557 * this device 3558 */ 3559 if (force_on) 3560 iommu_disable_protect_mem_regions(iommu); 3561 continue; 3562 } 3563 3564 iommu_flush_write_buffer(iommu); 3565 3566 iommu_set_root_entry(iommu); 3567 3568 iommu->flush.flush_context(iommu, 0, 0, 0, 3569 DMA_CCMD_GLOBAL_INVL); 3570 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); 3571 iommu_enable_translation(iommu); 3572 iommu_disable_protect_mem_regions(iommu); 3573 } 3574 3575 return 0; 3576} 3577 3578static void iommu_flush_all(void) 3579{ 3580 struct dmar_drhd_unit *drhd; 3581 struct intel_iommu *iommu; 3582 3583 for_each_active_iommu(iommu, drhd) { 3584 iommu->flush.flush_context(iommu, 0, 0, 0, 3585 DMA_CCMD_GLOBAL_INVL); 3586 iommu->flush.flush_iotlb(iommu, 0, 0, 0, 3587 DMA_TLB_GLOBAL_FLUSH); 3588 } 3589} 3590 3591static int iommu_suspend(void) 3592{ 3593 struct dmar_drhd_unit *drhd; 3594 struct intel_iommu *iommu = NULL; 3595 unsigned long flag; 3596 3597 for_each_active_iommu(iommu, drhd) { 3598 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS, 3599 GFP_ATOMIC); 3600 if (!iommu->iommu_state) 3601 goto nomem; 3602 } 3603 3604 iommu_flush_all(); 3605 3606 for_each_active_iommu(iommu, drhd) { 3607 iommu_disable_translation(iommu); 3608 3609 raw_spin_lock_irqsave(&iommu->register_lock, flag); 3610 3611 iommu->iommu_state[SR_DMAR_FECTL_REG] = 3612 readl(iommu->reg + DMAR_FECTL_REG); 3613 iommu->iommu_state[SR_DMAR_FEDATA_REG] = 3614 readl(iommu->reg + DMAR_FEDATA_REG); 3615 iommu->iommu_state[SR_DMAR_FEADDR_REG] = 3616 readl(iommu->reg + DMAR_FEADDR_REG); 3617 iommu->iommu_state[SR_DMAR_FEUADDR_REG] = 3618 readl(iommu->reg + DMAR_FEUADDR_REG); 3619 3620 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 3621 } 3622 return 0; 3623 3624nomem: 3625 for_each_active_iommu(iommu, drhd) 3626 kfree(iommu->iommu_state); 3627 3628 return -ENOMEM; 3629} 3630 3631static void iommu_resume(void) 3632{ 3633 struct dmar_drhd_unit *drhd; 3634 struct intel_iommu *iommu = NULL; 3635 unsigned long flag; 3636 3637 if (init_iommu_hw()) { 3638 if (force_on) 3639 panic("tboot: IOMMU setup failed, DMAR can not resume!\n"); 3640 else 3641 WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); 3642 return; 3643 } 3644 3645 for_each_active_iommu(iommu, drhd) { 3646 3647 raw_spin_lock_irqsave(&iommu->register_lock, flag); 3648 3649 writel(iommu->iommu_state[SR_DMAR_FECTL_REG], 3650 iommu->reg + DMAR_FECTL_REG); 3651 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG], 3652 iommu->reg + DMAR_FEDATA_REG); 3653 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG], 3654 iommu->reg + DMAR_FEADDR_REG); 3655 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG], 3656 iommu->reg + DMAR_FEUADDR_REG); 3657 3658 raw_spin_unlock_irqrestore(&iommu->register_lock, flag); 3659 } 3660 3661 for_each_active_iommu(iommu, drhd) 3662 kfree(iommu->iommu_state); 3663} 3664 3665static struct syscore_ops iommu_syscore_ops = { 3666 .resume = iommu_resume, 3667 .suspend = iommu_suspend, 3668}; 3669 3670static void __init init_iommu_pm_ops(void) 3671{ 3672 register_syscore_ops(&iommu_syscore_ops); 3673} 3674 3675#else 3676static inline void init_iommu_pm_ops(void) {} 3677#endif /* CONFIG_PM */ 3678 3679 3680int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg) 3681{ 3682 struct acpi_dmar_reserved_memory *rmrr; 3683 struct dmar_rmrr_unit *rmrru; 3684 3685 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); 3686 if (!rmrru) 3687 return -ENOMEM; 3688 3689 rmrru->hdr = header; 3690 rmrr = (struct acpi_dmar_reserved_memory *)header; 3691 rmrru->base_address = rmrr->base_address; 3692 rmrru->end_address = rmrr->end_address; 3693 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1), 3694 ((void *)rmrr) + rmrr->header.length, 3695 &rmrru->devices_cnt); 3696 if (rmrru->devices_cnt && rmrru->devices == NULL) { 3697 kfree(rmrru); 3698 return -ENOMEM; 3699 } 3700 3701 list_add(&rmrru->list, &dmar_rmrr_units); 3702 3703 return 0; 3704} 3705 3706static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr) 3707{ 3708 struct dmar_atsr_unit *atsru; 3709 struct acpi_dmar_atsr *tmp; 3710 3711 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) { 3712 tmp = (struct acpi_dmar_atsr *)atsru->hdr; 3713 if (atsr->segment != tmp->segment) 3714 continue; 3715 if (atsr->header.length != tmp->header.length) 3716 continue; 3717 if (memcmp(atsr, tmp, atsr->header.length) == 0) 3718 return atsru; 3719 } 3720 3721 return NULL; 3722} 3723 3724int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg) 3725{ 3726 struct acpi_dmar_atsr *atsr; 3727 struct dmar_atsr_unit *atsru; 3728 3729 if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled) 3730 return 0; 3731 3732 atsr = container_of(hdr, struct acpi_dmar_atsr, header); 3733 atsru = dmar_find_atsr(atsr); 3734 if (atsru) 3735 return 0; 3736 3737 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL); 3738 if (!atsru) 3739 return -ENOMEM; 3740 3741 /* 3742 * If memory is allocated from slab by ACPI _DSM method, we need to 3743 * copy the memory content because the memory buffer will be freed 3744 * on return. 3745 */ 3746 atsru->hdr = (void *)(atsru + 1); 3747 memcpy(atsru->hdr, hdr, hdr->length); 3748 atsru->include_all = atsr->flags & 0x1; 3749 if (!atsru->include_all) { 3750 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1), 3751 (void *)atsr + atsr->header.length, 3752 &atsru->devices_cnt); 3753 if (atsru->devices_cnt && atsru->devices == NULL) { 3754 kfree(atsru); 3755 return -ENOMEM; 3756 } 3757 } 3758 3759 list_add_rcu(&atsru->list, &dmar_atsr_units); 3760 3761 return 0; 3762} 3763 3764static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru) 3765{ 3766 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt); 3767 kfree(atsru); 3768} 3769 3770int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg) 3771{ 3772 struct acpi_dmar_atsr *atsr; 3773 struct dmar_atsr_unit *atsru; 3774 3775 atsr = container_of(hdr, struct acpi_dmar_atsr, header); 3776 atsru = dmar_find_atsr(atsr); 3777 if (atsru) { 3778 list_del_rcu(&atsru->list); 3779 synchronize_rcu(); 3780 intel_iommu_free_atsr(atsru); 3781 } 3782 3783 return 0; 3784} 3785 3786int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg) 3787{ 3788 int i; 3789 struct device *dev; 3790 struct acpi_dmar_atsr *atsr; 3791 struct dmar_atsr_unit *atsru; 3792 3793 atsr = container_of(hdr, struct acpi_dmar_atsr, header); 3794 atsru = dmar_find_atsr(atsr); 3795 if (!atsru) 3796 return 0; 3797 3798 if (!atsru->include_all && atsru->devices && atsru->devices_cnt) 3799 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt, 3800 i, dev) 3801 return -EBUSY; 3802 3803 return 0; 3804} 3805 3806static int intel_iommu_add(struct dmar_drhd_unit *dmaru) 3807{ 3808 int sp, ret = 0; 3809 struct intel_iommu *iommu = dmaru->iommu; 3810 3811 if (g_iommus[iommu->seq_id]) 3812 return 0; 3813 3814 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) { 3815 pr_warn("IOMMU: %s doesn't support hardware pass through.\n", 3816 iommu->name); 3817 return -ENXIO; 3818 } 3819 if (!ecap_sc_support(iommu->ecap) && 3820 domain_update_iommu_snooping(iommu)) { 3821 pr_warn("IOMMU: %s doesn't support snooping.\n", 3822 iommu->name); 3823 return -ENXIO; 3824 } 3825 sp = domain_update_iommu_superpage(iommu) - 1; 3826 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) { 3827 pr_warn("IOMMU: %s doesn't support large page.\n", 3828 iommu->name); 3829 return -ENXIO; 3830 } 3831 3832 /* 3833 * Disable translation if already enabled prior to OS handover. 3834 */ 3835 if (iommu->gcmd & DMA_GCMD_TE) 3836 iommu_disable_translation(iommu); 3837 3838 g_iommus[iommu->seq_id] = iommu; 3839 ret = iommu_init_domains(iommu); 3840 if (ret == 0) 3841 ret = iommu_alloc_root_entry(iommu); 3842 if (ret) 3843 goto out; 3844 3845 if (dmaru->ignored) { 3846 /* 3847 * we always have to disable PMRs or DMA may fail on this device 3848 */ 3849 if (force_on) 3850 iommu_disable_protect_mem_regions(iommu); 3851 return 0; 3852 } 3853 3854 intel_iommu_init_qi(iommu); 3855 iommu_flush_write_buffer(iommu); 3856 ret = dmar_set_interrupt(iommu); 3857 if (ret) 3858 goto disable_iommu; 3859 3860 iommu_set_root_entry(iommu); 3861 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); 3862 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); 3863 iommu_enable_translation(iommu); 3864 3865 if (si_domain) { 3866 ret = iommu_attach_domain(si_domain, iommu); 3867 if (ret < 0 || si_domain->id != ret) 3868 goto disable_iommu; 3869 domain_attach_iommu(si_domain, iommu); 3870 } 3871 3872 iommu_disable_protect_mem_regions(iommu); 3873 return 0; 3874 3875disable_iommu: 3876 disable_dmar_iommu(iommu); 3877out: 3878 free_dmar_iommu(iommu); 3879 return ret; 3880} 3881 3882int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert) 3883{ 3884 int ret = 0; 3885 struct intel_iommu *iommu = dmaru->iommu; 3886 3887 if (!intel_iommu_enabled) 3888 return 0; 3889 if (iommu == NULL) 3890 return -EINVAL; 3891 3892 if (insert) { 3893 ret = intel_iommu_add(dmaru); 3894 } else { 3895 disable_dmar_iommu(iommu); 3896 free_dmar_iommu(iommu); 3897 } 3898 3899 return ret; 3900} 3901 3902static void intel_iommu_free_dmars(void) 3903{ 3904 struct dmar_rmrr_unit *rmrru, *rmrr_n; 3905 struct dmar_atsr_unit *atsru, *atsr_n; 3906 3907 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) { 3908 list_del(&rmrru->list); 3909 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt); 3910 kfree(rmrru); 3911 } 3912 3913 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) { 3914 list_del(&atsru->list); 3915 intel_iommu_free_atsr(atsru); 3916 } 3917} 3918 3919int dmar_find_matched_atsr_unit(struct pci_dev *dev) 3920{ 3921 int i, ret = 1; 3922 struct pci_bus *bus; 3923 struct pci_dev *bridge = NULL; 3924 struct device *tmp; 3925 struct acpi_dmar_atsr *atsr; 3926 struct dmar_atsr_unit *atsru; 3927 3928 dev = pci_physfn(dev); 3929 for (bus = dev->bus; bus; bus = bus->parent) { 3930 bridge = bus->self; 3931 /* If it's an integrated device, allow ATS */ 3932 if (!bridge) 3933 return 1; 3934 /* Connected via non-PCIe: no ATS */ 3935 if (!pci_is_pcie(bridge) || 3936 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) 3937 return 0; 3938 /* If we found the root port, look it up in the ATSR */ 3939 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) 3940 break; 3941 } 3942 3943 rcu_read_lock(); 3944 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) { 3945 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); 3946 if (atsr->segment != pci_domain_nr(dev->bus)) 3947 continue; 3948 3949 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp) 3950 if (tmp == &bridge->dev) 3951 goto out; 3952 3953 if (atsru->include_all) 3954 goto out; 3955 } 3956 ret = 0; 3957out: 3958 rcu_read_unlock(); 3959 3960 return ret; 3961} 3962 3963int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) 3964{ 3965 int ret = 0; 3966 struct dmar_rmrr_unit *rmrru; 3967 struct dmar_atsr_unit *atsru; 3968 struct acpi_dmar_atsr *atsr; 3969 struct acpi_dmar_reserved_memory *rmrr; 3970 3971 if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING) 3972 return 0; 3973 3974 list_for_each_entry(rmrru, &dmar_rmrr_units, list) { 3975 rmrr = container_of(rmrru->hdr, 3976 struct acpi_dmar_reserved_memory, header); 3977 if (info->event == BUS_NOTIFY_ADD_DEVICE) { 3978 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1), 3979 ((void *)rmrr) + rmrr->header.length, 3980 rmrr->segment, rmrru->devices, 3981 rmrru->devices_cnt); 3982 if(ret < 0) 3983 return ret; 3984 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) { 3985 dmar_remove_dev_scope(info, rmrr->segment, 3986 rmrru->devices, rmrru->devices_cnt); 3987 } 3988 } 3989 3990 list_for_each_entry(atsru, &dmar_atsr_units, list) { 3991 if (atsru->include_all) 3992 continue; 3993 3994 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); 3995 if (info->event == BUS_NOTIFY_ADD_DEVICE) { 3996 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1), 3997 (void *)atsr + atsr->header.length, 3998 atsr->segment, atsru->devices, 3999 atsru->devices_cnt); 4000 if (ret > 0) 4001 break; 4002 else if(ret < 0) 4003 return ret; 4004 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) { 4005 if (dmar_remove_dev_scope(info, atsr->segment, 4006 atsru->devices, atsru->devices_cnt)) 4007 break; 4008 } 4009 } 4010 4011 return 0; 4012} 4013 4014/* 4015 * Here we only respond to action of unbound device from driver. 4016 * 4017 * Added device is not attached to its DMAR domain here yet. That will happen 4018 * when mapping the device to iova. 4019 */ 4020static int device_notifier(struct notifier_block *nb, 4021 unsigned long action, void *data) 4022{ 4023 struct device *dev = data; 4024 struct dmar_domain *domain; 4025 4026 if (iommu_dummy(dev)) 4027 return 0; 4028 4029 if (action != BUS_NOTIFY_REMOVED_DEVICE) 4030 return 0; 4031 4032 domain = find_domain(dev); 4033 if (!domain) 4034 return 0; 4035 4036 down_read(&dmar_global_lock); 4037 domain_remove_one_dev_info(domain, dev); 4038 if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices)) 4039 domain_exit(domain); 4040 up_read(&dmar_global_lock); 4041 4042 return 0; 4043} 4044 4045static struct notifier_block device_nb = { 4046 .notifier_call = device_notifier, 4047}; 4048 4049static int intel_iommu_memory_notifier(struct notifier_block *nb, 4050 unsigned long val, void *v) 4051{ 4052 struct memory_notify *mhp = v; 4053 unsigned long long start, end; 4054 unsigned long start_vpfn, last_vpfn; 4055 4056 switch (val) { 4057 case MEM_GOING_ONLINE: 4058 start = mhp->start_pfn << PAGE_SHIFT; 4059 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1; 4060 if (iommu_domain_identity_map(si_domain, start, end)) { 4061 pr_warn("dmar: failed to build identity map for [%llx-%llx]\n", 4062 start, end); 4063 return NOTIFY_BAD; 4064 } 4065 break; 4066 4067 case MEM_OFFLINE: 4068 case MEM_CANCEL_ONLINE: 4069 start_vpfn = mm_to_dma_pfn(mhp->start_pfn); 4070 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1); 4071 while (start_vpfn <= last_vpfn) { 4072 struct iova *iova; 4073 struct dmar_drhd_unit *drhd; 4074 struct intel_iommu *iommu; 4075 struct page *freelist; 4076 4077 iova = find_iova(&si_domain->iovad, start_vpfn); 4078 if (iova == NULL) { 4079 pr_debug("dmar: failed get IOVA for PFN %lx\n", 4080 start_vpfn); 4081 break; 4082 } 4083 4084 iova = split_and_remove_iova(&si_domain->iovad, iova, 4085 start_vpfn, last_vpfn); 4086 if (iova == NULL) { 4087 pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n", 4088 start_vpfn, last_vpfn); 4089 return NOTIFY_BAD; 4090 } 4091 4092 freelist = domain_unmap(si_domain, iova->pfn_lo, 4093 iova->pfn_hi); 4094 4095 rcu_read_lock(); 4096 for_each_active_iommu(iommu, drhd) 4097 iommu_flush_iotlb_psi(iommu, si_domain->id, 4098 iova->pfn_lo, iova_size(iova), 4099 !freelist, 0); 4100 rcu_read_unlock(); 4101 dma_free_pagelist(freelist); 4102 4103 start_vpfn = iova->pfn_hi + 1; 4104 free_iova_mem(iova); 4105 } 4106 break; 4107 } 4108 4109 return NOTIFY_OK; 4110} 4111 4112static struct notifier_block intel_iommu_memory_nb = { 4113 .notifier_call = intel_iommu_memory_notifier, 4114 .priority = 0 4115}; 4116 4117 4118static ssize_t intel_iommu_show_version(struct device *dev, 4119 struct device_attribute *attr, 4120 char *buf) 4121{ 4122 struct intel_iommu *iommu = dev_get_drvdata(dev); 4123 u32 ver = readl(iommu->reg + DMAR_VER_REG); 4124 return sprintf(buf, "%d:%d\n", 4125 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver)); 4126} 4127static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL); 4128 4129static ssize_t intel_iommu_show_address(struct device *dev, 4130 struct device_attribute *attr, 4131 char *buf) 4132{ 4133 struct intel_iommu *iommu = dev_get_drvdata(dev); 4134 return sprintf(buf, "%llx\n", iommu->reg_phys); 4135} 4136static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL); 4137 4138static ssize_t intel_iommu_show_cap(struct device *dev, 4139 struct device_attribute *attr, 4140 char *buf) 4141{ 4142 struct intel_iommu *iommu = dev_get_drvdata(dev); 4143 return sprintf(buf, "%llx\n", iommu->cap); 4144} 4145static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL); 4146 4147static ssize_t intel_iommu_show_ecap(struct device *dev, 4148 struct device_attribute *attr, 4149 char *buf) 4150{ 4151 struct intel_iommu *iommu = dev_get_drvdata(dev); 4152 return sprintf(buf, "%llx\n", iommu->ecap); 4153} 4154static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL); 4155 4156static struct attribute *intel_iommu_attrs[] = { 4157 &dev_attr_version.attr, 4158 &dev_attr_address.attr, 4159 &dev_attr_cap.attr, 4160 &dev_attr_ecap.attr, 4161 NULL, 4162}; 4163 4164static struct attribute_group intel_iommu_group = { 4165 .name = "intel-iommu", 4166 .attrs = intel_iommu_attrs, 4167}; 4168 4169const struct attribute_group *intel_iommu_groups[] = { 4170 &intel_iommu_group, 4171 NULL, 4172}; 4173 4174int __init intel_iommu_init(void) 4175{ 4176 int ret = -ENODEV; 4177 struct dmar_drhd_unit *drhd; 4178 struct intel_iommu *iommu; 4179 4180 /* VT-d is required for a TXT/tboot launch, so enforce that */ 4181 force_on = tboot_force_iommu(); 4182 4183 if (iommu_init_mempool()) { 4184 if (force_on) 4185 panic("tboot: Failed to initialize iommu memory\n"); 4186 return -ENOMEM; 4187 } 4188 4189 down_write(&dmar_global_lock); 4190 if (dmar_table_init()) { 4191 if (force_on) 4192 panic("tboot: Failed to initialize DMAR table\n"); 4193 goto out_free_dmar; 4194 } 4195 4196 /* 4197 * Disable translation if already enabled prior to OS handover. 4198 */ 4199 for_each_active_iommu(iommu, drhd) 4200 if (iommu->gcmd & DMA_GCMD_TE) 4201 iommu_disable_translation(iommu); 4202 4203 if (dmar_dev_scope_init() < 0) { 4204 if (force_on) 4205 panic("tboot: Failed to initialize DMAR device scope\n"); 4206 goto out_free_dmar; 4207 } 4208 4209 if (no_iommu || dmar_disabled) 4210 goto out_free_dmar; 4211 4212 if (list_empty(&dmar_rmrr_units)) 4213 printk(KERN_INFO "DMAR: No RMRR found\n"); 4214 4215 if (list_empty(&dmar_atsr_units)) 4216 printk(KERN_INFO "DMAR: No ATSR found\n"); 4217 4218 if (dmar_init_reserved_ranges()) { 4219 if (force_on) 4220 panic("tboot: Failed to reserve iommu ranges\n"); 4221 goto out_free_reserved_range; 4222 } 4223 4224 init_no_remapping_devices(); 4225 4226 ret = init_dmars(); 4227 if (ret) { 4228 if (force_on) 4229 panic("tboot: Failed to initialize DMARs\n"); 4230 printk(KERN_ERR "IOMMU: dmar init failed\n"); 4231 goto out_free_reserved_range; 4232 } 4233 up_write(&dmar_global_lock); 4234 printk(KERN_INFO 4235 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n"); 4236 4237 init_timer(&unmap_timer); 4238#ifdef CONFIG_SWIOTLB 4239 swiotlb = 0; 4240#endif 4241 dma_ops = &intel_dma_ops; 4242 4243 init_iommu_pm_ops(); 4244 4245 for_each_active_iommu(iommu, drhd) 4246 iommu->iommu_dev = iommu_device_create(NULL, iommu, 4247 intel_iommu_groups, 4248 iommu->name); 4249 4250 bus_set_iommu(&pci_bus_type, &intel_iommu_ops); 4251 bus_register_notifier(&pci_bus_type, &device_nb); 4252 if (si_domain && !hw_pass_through) 4253 register_memory_notifier(&intel_iommu_memory_nb); 4254 4255 intel_iommu_enabled = 1; 4256 4257 return 0; 4258 4259out_free_reserved_range: 4260 put_iova_domain(&reserved_iova_list); 4261out_free_dmar: 4262 intel_iommu_free_dmars(); 4263 up_write(&dmar_global_lock); 4264 iommu_exit_mempool(); 4265 return ret; 4266} 4267 4268static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque) 4269{ 4270 struct intel_iommu *iommu = opaque; 4271 4272 iommu_detach_dev(iommu, PCI_BUS_NUM(alias), alias & 0xff); 4273 return 0; 4274} 4275 4276/* 4277 * NB - intel-iommu lacks any sort of reference counting for the users of 4278 * dependent devices. If multiple endpoints have intersecting dependent 4279 * devices, unbinding the driver from any one of them will possibly leave 4280 * the others unable to operate. 4281 */ 4282static void iommu_detach_dependent_devices(struct intel_iommu *iommu, 4283 struct device *dev) 4284{ 4285 if (!iommu || !dev || !dev_is_pci(dev)) 4286 return; 4287 4288 pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu); 4289} 4290 4291static void domain_remove_one_dev_info(struct dmar_domain *domain, 4292 struct device *dev) 4293{ 4294 struct device_domain_info *info, *tmp; 4295 struct intel_iommu *iommu; 4296 unsigned long flags; 4297 bool found = false; 4298 u8 bus, devfn; 4299 4300 iommu = device_to_iommu(dev, &bus, &devfn); 4301 if (!iommu) 4302 return; 4303 4304 spin_lock_irqsave(&device_domain_lock, flags); 4305 list_for_each_entry_safe(info, tmp, &domain->devices, link) { 4306 if (info->iommu == iommu && info->bus == bus && 4307 info->devfn == devfn) { 4308 unlink_domain_info(info); 4309 spin_unlock_irqrestore(&device_domain_lock, flags); 4310 4311 iommu_disable_dev_iotlb(info); 4312 iommu_detach_dev(iommu, info->bus, info->devfn); 4313 iommu_detach_dependent_devices(iommu, dev); 4314 free_devinfo_mem(info); 4315 4316 spin_lock_irqsave(&device_domain_lock, flags); 4317 4318 if (found) 4319 break; 4320 else 4321 continue; 4322 } 4323 4324 /* if there is no other devices under the same iommu 4325 * owned by this domain, clear this iommu in iommu_bmp 4326 * update iommu count and coherency 4327 */ 4328 if (info->iommu == iommu) 4329 found = true; 4330 } 4331 4332 spin_unlock_irqrestore(&device_domain_lock, flags); 4333 4334 if (found == 0) { 4335 domain_detach_iommu(domain, iommu); 4336 if (!domain_type_is_vm_or_si(domain)) 4337 iommu_detach_domain(domain, iommu); 4338 } 4339} 4340 4341static int md_domain_init(struct dmar_domain *domain, int guest_width) 4342{ 4343 int adjust_width; 4344 4345 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN, 4346 DMA_32BIT_PFN); 4347 domain_reserve_special_ranges(domain); 4348 4349 /* calculate AGAW */ 4350 domain->gaw = guest_width; 4351 adjust_width = guestwidth_to_adjustwidth(guest_width); 4352 domain->agaw = width_to_agaw(adjust_width); 4353 4354 domain->iommu_coherency = 0; 4355 domain->iommu_snooping = 0; 4356 domain->iommu_superpage = 0; 4357 domain->max_addr = 0; 4358 4359 /* always allocate the top pgd */ 4360 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); 4361 if (!domain->pgd) 4362 return -ENOMEM; 4363 domain_flush_cache(domain, domain->pgd, PAGE_SIZE); 4364 return 0; 4365} 4366 4367static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) 4368{ 4369 struct dmar_domain *dmar_domain; 4370 struct iommu_domain *domain; 4371 4372 if (type != IOMMU_DOMAIN_UNMANAGED) 4373 return NULL; 4374 4375 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE); 4376 if (!dmar_domain) { 4377 printk(KERN_ERR 4378 "intel_iommu_domain_init: dmar_domain == NULL\n"); 4379 return NULL; 4380 } 4381 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { 4382 printk(KERN_ERR 4383 "intel_iommu_domain_init() failed\n"); 4384 domain_exit(dmar_domain); 4385 return NULL; 4386 } 4387 domain_update_iommu_cap(dmar_domain); 4388 4389 domain = &dmar_domain->domain; 4390 domain->geometry.aperture_start = 0; 4391 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw); 4392 domain->geometry.force_aperture = true; 4393 4394 return domain; 4395} 4396 4397static void intel_iommu_domain_free(struct iommu_domain *domain) 4398{ 4399 domain_exit(to_dmar_domain(domain)); 4400} 4401 4402static int intel_iommu_attach_device(struct iommu_domain *domain, 4403 struct device *dev) 4404{ 4405 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 4406 struct intel_iommu *iommu; 4407 int addr_width; 4408 u8 bus, devfn; 4409 4410 if (device_is_rmrr_locked(dev)) { 4411 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n"); 4412 return -EPERM; 4413 } 4414 4415 /* normally dev is not mapped */ 4416 if (unlikely(domain_context_mapped(dev))) { 4417 struct dmar_domain *old_domain; 4418 4419 old_domain = find_domain(dev); 4420 if (old_domain) { 4421 if (domain_type_is_vm_or_si(dmar_domain)) 4422 domain_remove_one_dev_info(old_domain, dev); 4423 else 4424 domain_remove_dev_info(old_domain); 4425 4426 if (!domain_type_is_vm_or_si(old_domain) && 4427 list_empty(&old_domain->devices)) 4428 domain_exit(old_domain); 4429 } 4430 } 4431 4432 iommu = device_to_iommu(dev, &bus, &devfn); 4433 if (!iommu) 4434 return -ENODEV; 4435 4436 /* check if this iommu agaw is sufficient for max mapped address */ 4437 addr_width = agaw_to_width(iommu->agaw); 4438 if (addr_width > cap_mgaw(iommu->cap)) 4439 addr_width = cap_mgaw(iommu->cap); 4440 4441 if (dmar_domain->max_addr > (1LL << addr_width)) { 4442 printk(KERN_ERR "%s: iommu width (%d) is not " 4443 "sufficient for the mapped address (%llx)\n", 4444 __func__, addr_width, dmar_domain->max_addr); 4445 return -EFAULT; 4446 } 4447 dmar_domain->gaw = addr_width; 4448 4449 /* 4450 * Knock out extra levels of page tables if necessary 4451 */ 4452 while (iommu->agaw < dmar_domain->agaw) { 4453 struct dma_pte *pte; 4454 4455 pte = dmar_domain->pgd; 4456 if (dma_pte_present(pte)) { 4457 dmar_domain->pgd = (struct dma_pte *) 4458 phys_to_virt(dma_pte_addr(pte)); 4459 free_pgtable_page(pte); 4460 } 4461 dmar_domain->agaw--; 4462 } 4463 4464 return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL); 4465} 4466 4467static void intel_iommu_detach_device(struct iommu_domain *domain, 4468 struct device *dev) 4469{ 4470 domain_remove_one_dev_info(to_dmar_domain(domain), dev); 4471} 4472 4473static int intel_iommu_map(struct iommu_domain *domain, 4474 unsigned long iova, phys_addr_t hpa, 4475 size_t size, int iommu_prot) 4476{ 4477 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 4478 u64 max_addr; 4479 int prot = 0; 4480 int ret; 4481 4482 if (iommu_prot & IOMMU_READ) 4483 prot |= DMA_PTE_READ; 4484 if (iommu_prot & IOMMU_WRITE) 4485 prot |= DMA_PTE_WRITE; 4486 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) 4487 prot |= DMA_PTE_SNP; 4488 4489 max_addr = iova + size; 4490 if (dmar_domain->max_addr < max_addr) { 4491 u64 end; 4492 4493 /* check if minimum agaw is sufficient for mapped address */ 4494 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1; 4495 if (end < max_addr) { 4496 printk(KERN_ERR "%s: iommu width (%d) is not " 4497 "sufficient for the mapped address (%llx)\n", 4498 __func__, dmar_domain->gaw, max_addr); 4499 return -EFAULT; 4500 } 4501 dmar_domain->max_addr = max_addr; 4502 } 4503 /* Round up size to next multiple of PAGE_SIZE, if it and 4504 the low bits of hpa would take us onto the next page */ 4505 size = aligned_nrpages(hpa, size); 4506 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, 4507 hpa >> VTD_PAGE_SHIFT, size, prot); 4508 return ret; 4509} 4510 4511static size_t intel_iommu_unmap(struct iommu_domain *domain, 4512 unsigned long iova, size_t size) 4513{ 4514 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 4515 struct page *freelist = NULL; 4516 struct intel_iommu *iommu; 4517 unsigned long start_pfn, last_pfn; 4518 unsigned int npages; 4519 int iommu_id, num, ndomains, level = 0; 4520 4521 /* Cope with horrid API which requires us to unmap more than the 4522 size argument if it happens to be a large-page mapping. */ 4523 if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level)) 4524 BUG(); 4525 4526 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) 4527 size = VTD_PAGE_SIZE << level_to_offset_bits(level); 4528 4529 start_pfn = iova >> VTD_PAGE_SHIFT; 4530 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT; 4531 4532 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn); 4533 4534 npages = last_pfn - start_pfn + 1; 4535 4536 for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) { 4537 iommu = g_iommus[iommu_id]; 4538 4539 /* 4540 * find bit position of dmar_domain 4541 */ 4542 ndomains = cap_ndoms(iommu->cap); 4543 for_each_set_bit(num, iommu->domain_ids, ndomains) { 4544 if (iommu->domains[num] == dmar_domain) 4545 iommu_flush_iotlb_psi(iommu, num, start_pfn, 4546 npages, !freelist, 0); 4547 } 4548 4549 } 4550 4551 dma_free_pagelist(freelist); 4552 4553 if (dmar_domain->max_addr == iova + size) 4554 dmar_domain->max_addr = iova; 4555 4556 return size; 4557} 4558 4559static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, 4560 dma_addr_t iova) 4561{ 4562 struct dmar_domain *dmar_domain = to_dmar_domain(domain); 4563 struct dma_pte *pte; 4564 int level = 0; 4565 u64 phys = 0; 4566 4567 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level); 4568 if (pte) 4569 phys = dma_pte_addr(pte); 4570 4571 return phys; 4572} 4573 4574static bool intel_iommu_capable(enum iommu_cap cap) 4575{ 4576 if (cap == IOMMU_CAP_CACHE_COHERENCY) 4577 return domain_update_iommu_snooping(NULL) == 1; 4578 if (cap == IOMMU_CAP_INTR_REMAP) 4579 return irq_remapping_enabled == 1; 4580 4581 return false; 4582} 4583 4584static int intel_iommu_add_device(struct device *dev) 4585{ 4586 struct intel_iommu *iommu; 4587 struct iommu_group *group; 4588 u8 bus, devfn; 4589 4590 iommu = device_to_iommu(dev, &bus, &devfn); 4591 if (!iommu) 4592 return -ENODEV; 4593 4594 iommu_device_link(iommu->iommu_dev, dev); 4595 4596 group = iommu_group_get_for_dev(dev); 4597 4598 if (IS_ERR(group)) 4599 return PTR_ERR(group); 4600 4601 iommu_group_put(group); 4602 return 0; 4603} 4604 4605static void intel_iommu_remove_device(struct device *dev) 4606{ 4607 struct intel_iommu *iommu; 4608 u8 bus, devfn; 4609 4610 iommu = device_to_iommu(dev, &bus, &devfn); 4611 if (!iommu) 4612 return; 4613 4614 iommu_group_remove_device(dev); 4615 4616 iommu_device_unlink(iommu->iommu_dev, dev); 4617} 4618 4619static const struct iommu_ops intel_iommu_ops = { 4620 .capable = intel_iommu_capable, 4621 .domain_alloc = intel_iommu_domain_alloc, 4622 .domain_free = intel_iommu_domain_free, 4623 .attach_dev = intel_iommu_attach_device, 4624 .detach_dev = intel_iommu_detach_device, 4625 .map = intel_iommu_map, 4626 .unmap = intel_iommu_unmap, 4627 .map_sg = default_iommu_map_sg, 4628 .iova_to_phys = intel_iommu_iova_to_phys, 4629 .add_device = intel_iommu_add_device, 4630 .remove_device = intel_iommu_remove_device, 4631 .pgsize_bitmap = INTEL_IOMMU_PGSIZES, 4632}; 4633 4634static void quirk_iommu_g4x_gfx(struct pci_dev *dev) 4635{ 4636 /* G4x/GM45 integrated gfx dmar support is totally busted. */ 4637 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n"); 4638 dmar_map_gfx = 0; 4639} 4640 4641DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx); 4642DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx); 4643DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx); 4644DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx); 4645DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx); 4646DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx); 4647DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx); 4648 4649static void quirk_iommu_rwbf(struct pci_dev *dev) 4650{ 4651 /* 4652 * Mobile 4 Series Chipset neglects to set RWBF capability, 4653 * but needs it. Same seems to hold for the desktop versions. 4654 */ 4655 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n"); 4656 rwbf_quirk = 1; 4657} 4658 4659DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf); 4660DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf); 4661DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf); 4662DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf); 4663DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf); 4664DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf); 4665DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf); 4666 4667#define GGC 0x52 4668#define GGC_MEMORY_SIZE_MASK (0xf << 8) 4669#define GGC_MEMORY_SIZE_NONE (0x0 << 8) 4670#define GGC_MEMORY_SIZE_1M (0x1 << 8) 4671#define GGC_MEMORY_SIZE_2M (0x3 << 8) 4672#define GGC_MEMORY_VT_ENABLED (0x8 << 8) 4673#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8) 4674#define GGC_MEMORY_SIZE_3M_VT (0xa << 8) 4675#define GGC_MEMORY_SIZE_4M_VT (0xb << 8) 4676 4677static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev) 4678{ 4679 unsigned short ggc; 4680 4681 if (pci_read_config_word(dev, GGC, &ggc)) 4682 return; 4683 4684 if (!(ggc & GGC_MEMORY_VT_ENABLED)) { 4685 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n"); 4686 dmar_map_gfx = 0; 4687 } else if (dmar_map_gfx) { 4688 /* we have to ensure the gfx device is idle before we flush */ 4689 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n"); 4690 intel_iommu_strict = 1; 4691 } 4692} 4693DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt); 4694DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt); 4695DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt); 4696DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt); 4697 4698/* On Tylersburg chipsets, some BIOSes have been known to enable the 4699 ISOCH DMAR unit for the Azalia sound device, but not give it any 4700 TLB entries, which causes it to deadlock. Check for that. We do 4701 this in a function called from init_dmars(), instead of in a PCI 4702 quirk, because we don't want to print the obnoxious "BIOS broken" 4703 message if VT-d is actually disabled. 4704*/ 4705static void __init check_tylersburg_isoch(void) 4706{ 4707 struct pci_dev *pdev; 4708 uint32_t vtisochctrl; 4709 4710 /* If there's no Azalia in the system anyway, forget it. */ 4711 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL); 4712 if (!pdev) 4713 return; 4714 pci_dev_put(pdev); 4715 4716 /* System Management Registers. Might be hidden, in which case 4717 we can't do the sanity check. But that's OK, because the 4718 known-broken BIOSes _don't_ actually hide it, so far. */ 4719 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL); 4720 if (!pdev) 4721 return; 4722 4723 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) { 4724 pci_dev_put(pdev); 4725 return; 4726 } 4727 4728 pci_dev_put(pdev); 4729 4730 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */ 4731 if (vtisochctrl & 1) 4732 return; 4733 4734 /* Drop all bits other than the number of TLB entries */ 4735 vtisochctrl &= 0x1c; 4736 4737 /* If we have the recommended number of TLB entries (16), fine. */ 4738 if (vtisochctrl == 0x10) 4739 return; 4740 4741 /* Zero TLB entries? You get to ride the short bus to school. */ 4742 if (!vtisochctrl) { 4743 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n" 4744 "BIOS vendor: %s; Ver: %s; Product Version: %s\n", 4745 dmi_get_system_info(DMI_BIOS_VENDOR), 4746 dmi_get_system_info(DMI_BIOS_VERSION), 4747 dmi_get_system_info(DMI_PRODUCT_VERSION)); 4748 iommu_identity_mapping |= IDENTMAP_AZALIA; 4749 return; 4750 } 4751 4752 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n", 4753 vtisochctrl); 4754} 4755