1/* 2 * Copyright 2010 Tilera Corporation. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation, version 2. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 11 * NON INFRINGEMENT. See the GNU General Public License for 12 * more details. 13 */ 14 15#include <linux/mm.h> 16#include <linux/dma-mapping.h> 17#include <linux/swiotlb.h> 18#include <linux/vmalloc.h> 19#include <linux/export.h> 20#include <asm/tlbflush.h> 21#include <asm/homecache.h> 22 23/* Generic DMA mapping functions: */ 24 25/* 26 * Allocate what Linux calls "coherent" memory. On TILEPro this is 27 * uncached memory; on TILE-Gx it is hash-for-home memory. 28 */ 29#ifdef __tilepro__ 30#define PAGE_HOME_DMA PAGE_HOME_UNCACHED 31#else 32#define PAGE_HOME_DMA PAGE_HOME_HASH 33#endif 34 35static void *tile_dma_alloc_coherent(struct device *dev, size_t size, 36 dma_addr_t *dma_handle, gfp_t gfp, 37 struct dma_attrs *attrs) 38{ 39 u64 dma_mask = (dev && dev->coherent_dma_mask) ? 40 dev->coherent_dma_mask : DMA_BIT_MASK(32); 41 int node = dev ? dev_to_node(dev) : 0; 42 int order = get_order(size); 43 struct page *pg; 44 dma_addr_t addr; 45 46 gfp |= __GFP_ZERO; 47 48 /* 49 * If the mask specifies that the memory be in the first 4 GB, then 50 * we force the allocation to come from the DMA zone. We also 51 * force the node to 0 since that's the only node where the DMA 52 * zone isn't empty. If the mask size is smaller than 32 bits, we 53 * may still not be able to guarantee a suitable memory address, in 54 * which case we will return NULL. But such devices are uncommon. 55 */ 56 if (dma_mask <= DMA_BIT_MASK(32)) { 57 gfp |= GFP_DMA; 58 node = 0; 59 } 60 61 pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_DMA); 62 if (pg == NULL) 63 return NULL; 64 65 addr = page_to_phys(pg); 66 if (addr + size > dma_mask) { 67 __homecache_free_pages(pg, order); 68 return NULL; 69 } 70 71 *dma_handle = addr; 72 73 return page_address(pg); 74} 75 76/* 77 * Free memory that was allocated with tile_dma_alloc_coherent. 78 */ 79static void tile_dma_free_coherent(struct device *dev, size_t size, 80 void *vaddr, dma_addr_t dma_handle, 81 struct dma_attrs *attrs) 82{ 83 homecache_free_pages((unsigned long)vaddr, get_order(size)); 84} 85 86/* 87 * The map routines "map" the specified address range for DMA 88 * accesses. The memory belongs to the device after this call is 89 * issued, until it is unmapped with dma_unmap_single. 90 * 91 * We don't need to do any mapping, we just flush the address range 92 * out of the cache and return a DMA address. 93 * 94 * The unmap routines do whatever is necessary before the processor 95 * accesses the memory again, and must be called before the driver 96 * touches the memory. We can get away with a cache invalidate if we 97 * can count on nothing having been touched. 98 */ 99 100/* Set up a single page for DMA access. */ 101static void __dma_prep_page(struct page *page, unsigned long offset, 102 size_t size, enum dma_data_direction direction) 103{ 104 /* 105 * Flush the page from cache if necessary. 106 * On tilegx, data is delivered to hash-for-home L3; on tilepro, 107 * data is delivered direct to memory. 108 * 109 * NOTE: If we were just doing DMA_TO_DEVICE we could optimize 110 * this to be a "flush" not a "finv" and keep some of the 111 * state in cache across the DMA operation, but it doesn't seem 112 * worth creating the necessary flush_buffer_xxx() infrastructure. 113 */ 114 int home = page_home(page); 115 switch (home) { 116 case PAGE_HOME_HASH: 117#ifdef __tilegx__ 118 return; 119#endif 120 break; 121 case PAGE_HOME_UNCACHED: 122#ifdef __tilepro__ 123 return; 124#endif 125 break; 126 case PAGE_HOME_IMMUTABLE: 127 /* Should be going to the device only. */ 128 BUG_ON(direction == DMA_FROM_DEVICE || 129 direction == DMA_BIDIRECTIONAL); 130 return; 131 case PAGE_HOME_INCOHERENT: 132 /* Incoherent anyway, so no need to work hard here. */ 133 return; 134 default: 135 BUG_ON(home < 0 || home >= NR_CPUS); 136 break; 137 } 138 homecache_finv_page(page); 139 140#ifdef DEBUG_ALIGNMENT 141 /* Warn if the region isn't cacheline aligned. */ 142 if (offset & (L2_CACHE_BYTES - 1) || (size & (L2_CACHE_BYTES - 1))) 143 pr_warn("Unaligned DMA to non-hfh memory: PA %#llx/%#lx\n", 144 PFN_PHYS(page_to_pfn(page)) + offset, size); 145#endif 146} 147 148/* Make the page ready to be read by the core. */ 149static void __dma_complete_page(struct page *page, unsigned long offset, 150 size_t size, enum dma_data_direction direction) 151{ 152#ifdef __tilegx__ 153 switch (page_home(page)) { 154 case PAGE_HOME_HASH: 155 /* I/O device delivered data the way the cpu wanted it. */ 156 break; 157 case PAGE_HOME_INCOHERENT: 158 /* Incoherent anyway, so no need to work hard here. */ 159 break; 160 case PAGE_HOME_IMMUTABLE: 161 /* Extra read-only copies are not a problem. */ 162 break; 163 default: 164 /* Flush the bogus hash-for-home I/O entries to memory. */ 165 homecache_finv_map_page(page, PAGE_HOME_HASH); 166 break; 167 } 168#endif 169} 170 171static void __dma_prep_pa_range(dma_addr_t dma_addr, size_t size, 172 enum dma_data_direction direction) 173{ 174 struct page *page = pfn_to_page(PFN_DOWN(dma_addr)); 175 unsigned long offset = dma_addr & (PAGE_SIZE - 1); 176 size_t bytes = min(size, (size_t)(PAGE_SIZE - offset)); 177 178 while (size != 0) { 179 __dma_prep_page(page, offset, bytes, direction); 180 size -= bytes; 181 ++page; 182 offset = 0; 183 bytes = min((size_t)PAGE_SIZE, size); 184 } 185} 186 187static void __dma_complete_pa_range(dma_addr_t dma_addr, size_t size, 188 enum dma_data_direction direction) 189{ 190 struct page *page = pfn_to_page(PFN_DOWN(dma_addr)); 191 unsigned long offset = dma_addr & (PAGE_SIZE - 1); 192 size_t bytes = min(size, (size_t)(PAGE_SIZE - offset)); 193 194 while (size != 0) { 195 __dma_complete_page(page, offset, bytes, direction); 196 size -= bytes; 197 ++page; 198 offset = 0; 199 bytes = min((size_t)PAGE_SIZE, size); 200 } 201} 202 203static int tile_dma_map_sg(struct device *dev, struct scatterlist *sglist, 204 int nents, enum dma_data_direction direction, 205 struct dma_attrs *attrs) 206{ 207 struct scatterlist *sg; 208 int i; 209 210 BUG_ON(!valid_dma_direction(direction)); 211 212 WARN_ON(nents == 0 || sglist->length == 0); 213 214 for_each_sg(sglist, sg, nents, i) { 215 sg->dma_address = sg_phys(sg); 216 __dma_prep_pa_range(sg->dma_address, sg->length, direction); 217#ifdef CONFIG_NEED_SG_DMA_LENGTH 218 sg->dma_length = sg->length; 219#endif 220 } 221 222 return nents; 223} 224 225static void tile_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, 226 int nents, enum dma_data_direction direction, 227 struct dma_attrs *attrs) 228{ 229 struct scatterlist *sg; 230 int i; 231 232 BUG_ON(!valid_dma_direction(direction)); 233 for_each_sg(sglist, sg, nents, i) { 234 sg->dma_address = sg_phys(sg); 235 __dma_complete_pa_range(sg->dma_address, sg->length, 236 direction); 237 } 238} 239 240static dma_addr_t tile_dma_map_page(struct device *dev, struct page *page, 241 unsigned long offset, size_t size, 242 enum dma_data_direction direction, 243 struct dma_attrs *attrs) 244{ 245 BUG_ON(!valid_dma_direction(direction)); 246 247 BUG_ON(offset + size > PAGE_SIZE); 248 __dma_prep_page(page, offset, size, direction); 249 250 return page_to_pa(page) + offset; 251} 252 253static void tile_dma_unmap_page(struct device *dev, dma_addr_t dma_address, 254 size_t size, enum dma_data_direction direction, 255 struct dma_attrs *attrs) 256{ 257 BUG_ON(!valid_dma_direction(direction)); 258 259 __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)), 260 dma_address & (PAGE_SIZE - 1), size, direction); 261} 262 263static void tile_dma_sync_single_for_cpu(struct device *dev, 264 dma_addr_t dma_handle, 265 size_t size, 266 enum dma_data_direction direction) 267{ 268 BUG_ON(!valid_dma_direction(direction)); 269 270 __dma_complete_pa_range(dma_handle, size, direction); 271} 272 273static void tile_dma_sync_single_for_device(struct device *dev, 274 dma_addr_t dma_handle, size_t size, 275 enum dma_data_direction direction) 276{ 277 __dma_prep_pa_range(dma_handle, size, direction); 278} 279 280static void tile_dma_sync_sg_for_cpu(struct device *dev, 281 struct scatterlist *sglist, int nelems, 282 enum dma_data_direction direction) 283{ 284 struct scatterlist *sg; 285 int i; 286 287 BUG_ON(!valid_dma_direction(direction)); 288 WARN_ON(nelems == 0 || sglist->length == 0); 289 290 for_each_sg(sglist, sg, nelems, i) { 291 dma_sync_single_for_cpu(dev, sg->dma_address, 292 sg_dma_len(sg), direction); 293 } 294} 295 296static void tile_dma_sync_sg_for_device(struct device *dev, 297 struct scatterlist *sglist, int nelems, 298 enum dma_data_direction direction) 299{ 300 struct scatterlist *sg; 301 int i; 302 303 BUG_ON(!valid_dma_direction(direction)); 304 WARN_ON(nelems == 0 || sglist->length == 0); 305 306 for_each_sg(sglist, sg, nelems, i) { 307 dma_sync_single_for_device(dev, sg->dma_address, 308 sg_dma_len(sg), direction); 309 } 310} 311 312static inline int 313tile_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) 314{ 315 return 0; 316} 317 318static inline int 319tile_dma_supported(struct device *dev, u64 mask) 320{ 321 return 1; 322} 323 324static struct dma_map_ops tile_default_dma_map_ops = { 325 .alloc = tile_dma_alloc_coherent, 326 .free = tile_dma_free_coherent, 327 .map_page = tile_dma_map_page, 328 .unmap_page = tile_dma_unmap_page, 329 .map_sg = tile_dma_map_sg, 330 .unmap_sg = tile_dma_unmap_sg, 331 .sync_single_for_cpu = tile_dma_sync_single_for_cpu, 332 .sync_single_for_device = tile_dma_sync_single_for_device, 333 .sync_sg_for_cpu = tile_dma_sync_sg_for_cpu, 334 .sync_sg_for_device = tile_dma_sync_sg_for_device, 335 .mapping_error = tile_dma_mapping_error, 336 .dma_supported = tile_dma_supported 337}; 338 339struct dma_map_ops *tile_dma_map_ops = &tile_default_dma_map_ops; 340EXPORT_SYMBOL(tile_dma_map_ops); 341 342/* Generic PCI DMA mapping functions */ 343 344static void *tile_pci_dma_alloc_coherent(struct device *dev, size_t size, 345 dma_addr_t *dma_handle, gfp_t gfp, 346 struct dma_attrs *attrs) 347{ 348 int node = dev_to_node(dev); 349 int order = get_order(size); 350 struct page *pg; 351 dma_addr_t addr; 352 353 gfp |= __GFP_ZERO; 354 355 pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_DMA); 356 if (pg == NULL) 357 return NULL; 358 359 addr = page_to_phys(pg); 360 361 *dma_handle = addr + get_dma_offset(dev); 362 363 return page_address(pg); 364} 365 366/* 367 * Free memory that was allocated with tile_pci_dma_alloc_coherent. 368 */ 369static void tile_pci_dma_free_coherent(struct device *dev, size_t size, 370 void *vaddr, dma_addr_t dma_handle, 371 struct dma_attrs *attrs) 372{ 373 homecache_free_pages((unsigned long)vaddr, get_order(size)); 374} 375 376static int tile_pci_dma_map_sg(struct device *dev, struct scatterlist *sglist, 377 int nents, enum dma_data_direction direction, 378 struct dma_attrs *attrs) 379{ 380 struct scatterlist *sg; 381 int i; 382 383 BUG_ON(!valid_dma_direction(direction)); 384 385 WARN_ON(nents == 0 || sglist->length == 0); 386 387 for_each_sg(sglist, sg, nents, i) { 388 sg->dma_address = sg_phys(sg); 389 __dma_prep_pa_range(sg->dma_address, sg->length, direction); 390 391 sg->dma_address = sg->dma_address + get_dma_offset(dev); 392#ifdef CONFIG_NEED_SG_DMA_LENGTH 393 sg->dma_length = sg->length; 394#endif 395 } 396 397 return nents; 398} 399 400static void tile_pci_dma_unmap_sg(struct device *dev, 401 struct scatterlist *sglist, int nents, 402 enum dma_data_direction direction, 403 struct dma_attrs *attrs) 404{ 405 struct scatterlist *sg; 406 int i; 407 408 BUG_ON(!valid_dma_direction(direction)); 409 for_each_sg(sglist, sg, nents, i) { 410 sg->dma_address = sg_phys(sg); 411 __dma_complete_pa_range(sg->dma_address, sg->length, 412 direction); 413 } 414} 415 416static dma_addr_t tile_pci_dma_map_page(struct device *dev, struct page *page, 417 unsigned long offset, size_t size, 418 enum dma_data_direction direction, 419 struct dma_attrs *attrs) 420{ 421 BUG_ON(!valid_dma_direction(direction)); 422 423 BUG_ON(offset + size > PAGE_SIZE); 424 __dma_prep_page(page, offset, size, direction); 425 426 return page_to_pa(page) + offset + get_dma_offset(dev); 427} 428 429static void tile_pci_dma_unmap_page(struct device *dev, dma_addr_t dma_address, 430 size_t size, 431 enum dma_data_direction direction, 432 struct dma_attrs *attrs) 433{ 434 BUG_ON(!valid_dma_direction(direction)); 435 436 dma_address -= get_dma_offset(dev); 437 438 __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)), 439 dma_address & (PAGE_SIZE - 1), size, direction); 440} 441 442static void tile_pci_dma_sync_single_for_cpu(struct device *dev, 443 dma_addr_t dma_handle, 444 size_t size, 445 enum dma_data_direction direction) 446{ 447 BUG_ON(!valid_dma_direction(direction)); 448 449 dma_handle -= get_dma_offset(dev); 450 451 __dma_complete_pa_range(dma_handle, size, direction); 452} 453 454static void tile_pci_dma_sync_single_for_device(struct device *dev, 455 dma_addr_t dma_handle, 456 size_t size, 457 enum dma_data_direction 458 direction) 459{ 460 dma_handle -= get_dma_offset(dev); 461 462 __dma_prep_pa_range(dma_handle, size, direction); 463} 464 465static void tile_pci_dma_sync_sg_for_cpu(struct device *dev, 466 struct scatterlist *sglist, 467 int nelems, 468 enum dma_data_direction direction) 469{ 470 struct scatterlist *sg; 471 int i; 472 473 BUG_ON(!valid_dma_direction(direction)); 474 WARN_ON(nelems == 0 || sglist->length == 0); 475 476 for_each_sg(sglist, sg, nelems, i) { 477 dma_sync_single_for_cpu(dev, sg->dma_address, 478 sg_dma_len(sg), direction); 479 } 480} 481 482static void tile_pci_dma_sync_sg_for_device(struct device *dev, 483 struct scatterlist *sglist, 484 int nelems, 485 enum dma_data_direction direction) 486{ 487 struct scatterlist *sg; 488 int i; 489 490 BUG_ON(!valid_dma_direction(direction)); 491 WARN_ON(nelems == 0 || sglist->length == 0); 492 493 for_each_sg(sglist, sg, nelems, i) { 494 dma_sync_single_for_device(dev, sg->dma_address, 495 sg_dma_len(sg), direction); 496 } 497} 498 499static inline int 500tile_pci_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) 501{ 502 return 0; 503} 504 505static inline int 506tile_pci_dma_supported(struct device *dev, u64 mask) 507{ 508 return 1; 509} 510 511static struct dma_map_ops tile_pci_default_dma_map_ops = { 512 .alloc = tile_pci_dma_alloc_coherent, 513 .free = tile_pci_dma_free_coherent, 514 .map_page = tile_pci_dma_map_page, 515 .unmap_page = tile_pci_dma_unmap_page, 516 .map_sg = tile_pci_dma_map_sg, 517 .unmap_sg = tile_pci_dma_unmap_sg, 518 .sync_single_for_cpu = tile_pci_dma_sync_single_for_cpu, 519 .sync_single_for_device = tile_pci_dma_sync_single_for_device, 520 .sync_sg_for_cpu = tile_pci_dma_sync_sg_for_cpu, 521 .sync_sg_for_device = tile_pci_dma_sync_sg_for_device, 522 .mapping_error = tile_pci_dma_mapping_error, 523 .dma_supported = tile_pci_dma_supported 524}; 525 526struct dma_map_ops *gx_pci_dma_map_ops = &tile_pci_default_dma_map_ops; 527EXPORT_SYMBOL(gx_pci_dma_map_ops); 528 529/* PCI DMA mapping functions for legacy PCI devices */ 530 531#ifdef CONFIG_SWIOTLB 532static void *tile_swiotlb_alloc_coherent(struct device *dev, size_t size, 533 dma_addr_t *dma_handle, gfp_t gfp, 534 struct dma_attrs *attrs) 535{ 536 gfp |= GFP_DMA; 537 return swiotlb_alloc_coherent(dev, size, dma_handle, gfp); 538} 539 540static void tile_swiotlb_free_coherent(struct device *dev, size_t size, 541 void *vaddr, dma_addr_t dma_addr, 542 struct dma_attrs *attrs) 543{ 544 swiotlb_free_coherent(dev, size, vaddr, dma_addr); 545} 546 547static struct dma_map_ops pci_swiotlb_dma_ops = { 548 .alloc = tile_swiotlb_alloc_coherent, 549 .free = tile_swiotlb_free_coherent, 550 .map_page = swiotlb_map_page, 551 .unmap_page = swiotlb_unmap_page, 552 .map_sg = swiotlb_map_sg_attrs, 553 .unmap_sg = swiotlb_unmap_sg_attrs, 554 .sync_single_for_cpu = swiotlb_sync_single_for_cpu, 555 .sync_single_for_device = swiotlb_sync_single_for_device, 556 .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, 557 .sync_sg_for_device = swiotlb_sync_sg_for_device, 558 .dma_supported = swiotlb_dma_supported, 559 .mapping_error = swiotlb_dma_mapping_error, 560}; 561 562static struct dma_map_ops pci_hybrid_dma_ops = { 563 .alloc = tile_swiotlb_alloc_coherent, 564 .free = tile_swiotlb_free_coherent, 565 .map_page = tile_pci_dma_map_page, 566 .unmap_page = tile_pci_dma_unmap_page, 567 .map_sg = tile_pci_dma_map_sg, 568 .unmap_sg = tile_pci_dma_unmap_sg, 569 .sync_single_for_cpu = tile_pci_dma_sync_single_for_cpu, 570 .sync_single_for_device = tile_pci_dma_sync_single_for_device, 571 .sync_sg_for_cpu = tile_pci_dma_sync_sg_for_cpu, 572 .sync_sg_for_device = tile_pci_dma_sync_sg_for_device, 573 .mapping_error = tile_pci_dma_mapping_error, 574 .dma_supported = tile_pci_dma_supported 575}; 576 577struct dma_map_ops *gx_legacy_pci_dma_map_ops = &pci_swiotlb_dma_ops; 578struct dma_map_ops *gx_hybrid_pci_dma_map_ops = &pci_hybrid_dma_ops; 579#else 580struct dma_map_ops *gx_legacy_pci_dma_map_ops; 581struct dma_map_ops *gx_hybrid_pci_dma_map_ops; 582#endif 583EXPORT_SYMBOL(gx_legacy_pci_dma_map_ops); 584EXPORT_SYMBOL(gx_hybrid_pci_dma_map_ops); 585 586#ifdef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK 587int dma_set_coherent_mask(struct device *dev, u64 mask) 588{ 589 struct dma_map_ops *dma_ops = get_dma_ops(dev); 590 591 /* 592 * For PCI devices with 64-bit DMA addressing capability, promote 593 * the dma_ops to full capability for both streams and consistent 594 * memory access. For 32-bit capable devices, limit the consistent 595 * memory DMA range to max_direct_dma_addr. 596 */ 597 if (dma_ops == gx_pci_dma_map_ops || 598 dma_ops == gx_hybrid_pci_dma_map_ops || 599 dma_ops == gx_legacy_pci_dma_map_ops) { 600 if (mask == DMA_BIT_MASK(64)) 601 set_dma_ops(dev, gx_pci_dma_map_ops); 602 else if (mask > dev->archdata.max_direct_dma_addr) 603 mask = dev->archdata.max_direct_dma_addr; 604 } 605 606 if (!dma_supported(dev, mask)) 607 return -EIO; 608 dev->coherent_dma_mask = mask; 609 return 0; 610} 611EXPORT_SYMBOL(dma_set_coherent_mask); 612#endif 613 614#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK 615/* 616 * The generic dma_get_required_mask() uses the highest physical address 617 * (max_pfn) to provide the hint to the PCI drivers regarding 32-bit or 618 * 64-bit DMA configuration. Since TILEGx has I/O TLB/MMU, allowing the 619 * DMAs to use the full 64-bit PCI address space and not limited by 620 * the physical memory space, we always let the PCI devices use 621 * 64-bit DMA if they have that capability, by returning the 64-bit 622 * DMA mask here. The device driver has the option to use 32-bit DMA if 623 * the device is not capable of 64-bit DMA. 624 */ 625u64 dma_get_required_mask(struct device *dev) 626{ 627 return DMA_BIT_MASK(64); 628} 629EXPORT_SYMBOL_GPL(dma_get_required_mask); 630#endif 631