1/* 2 * Copyright © 2008,2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Eric Anholt <eric@anholt.net> 25 * Chris Wilson <chris@chris-wilson.co.uk> 26 * 27 */ 28 29#include <drm/drmP.h> 30#include <drm/i915_drm.h> 31#include "i915_drv.h" 32#include "i915_trace.h" 33#include "intel_drv.h" 34#include <linux/dma_remapping.h> 35 36#define __EXEC_OBJECT_HAS_PIN (1<<31) 37#define __EXEC_OBJECT_HAS_FENCE (1<<30) 38#define __EXEC_OBJECT_NEEDS_MAP (1<<29) 39#define __EXEC_OBJECT_NEEDS_BIAS (1<<28) 40#define __EXEC_OBJECT_PURGEABLE (1<<27) 41 42#define BATCH_OFFSET_BIAS (256*1024) 43 44struct eb_vmas { 45 struct list_head vmas; 46 int and; 47 union { 48 struct i915_vma *lut[0]; 49 struct hlist_head buckets[0]; 50 }; 51}; 52 53static struct eb_vmas * 54eb_create(struct drm_i915_gem_execbuffer2 *args) 55{ 56 struct eb_vmas *eb = NULL; 57 58 if (args->flags & I915_EXEC_HANDLE_LUT) { 59 unsigned size = args->buffer_count; 60 size *= sizeof(struct i915_vma *); 61 size += sizeof(struct eb_vmas); 62 eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); 63 } 64 65 if (eb == NULL) { 66 unsigned size = args->buffer_count; 67 unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2; 68 BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head)); 69 while (count > 2*size) 70 count >>= 1; 71 eb = kzalloc(count*sizeof(struct hlist_head) + 72 sizeof(struct eb_vmas), 73 GFP_TEMPORARY); 74 if (eb == NULL) 75 return eb; 76 77 eb->and = count - 1; 78 } else 79 eb->and = -args->buffer_count; 80 81 INIT_LIST_HEAD(&eb->vmas); 82 return eb; 83} 84 85static void 86eb_reset(struct eb_vmas *eb) 87{ 88 if (eb->and >= 0) 89 memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); 90} 91 92static int 93eb_lookup_vmas(struct eb_vmas *eb, 94 struct drm_i915_gem_exec_object2 *exec, 95 const struct drm_i915_gem_execbuffer2 *args, 96 struct i915_address_space *vm, 97 struct drm_file *file) 98{ 99 struct drm_i915_gem_object *obj; 100 struct list_head objects; 101 int i, ret; 102 103 INIT_LIST_HEAD(&objects); 104 spin_lock(&file->table_lock); 105 /* Grab a reference to the object and release the lock so we can lookup 106 * or create the VMA without using GFP_ATOMIC */ 107 for (i = 0; i < args->buffer_count; i++) { 108 obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle)); 109 if (obj == NULL) { 110 spin_unlock(&file->table_lock); 111 DRM_DEBUG("Invalid object handle %d at index %d\n", 112 exec[i].handle, i); 113 ret = -ENOENT; 114 goto err; 115 } 116 117 if (!list_empty(&obj->obj_exec_link)) { 118 spin_unlock(&file->table_lock); 119 DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", 120 obj, exec[i].handle, i); 121 ret = -EINVAL; 122 goto err; 123 } 124 125 drm_gem_object_reference(&obj->base); 126 list_add_tail(&obj->obj_exec_link, &objects); 127 } 128 spin_unlock(&file->table_lock); 129 130 i = 0; 131 while (!list_empty(&objects)) { 132 struct i915_vma *vma; 133 134 obj = list_first_entry(&objects, 135 struct drm_i915_gem_object, 136 obj_exec_link); 137 138 /* 139 * NOTE: We can leak any vmas created here when something fails 140 * later on. But that's no issue since vma_unbind can deal with 141 * vmas which are not actually bound. And since only 142 * lookup_or_create exists as an interface to get at the vma 143 * from the (obj, vm) we don't run the risk of creating 144 * duplicated vmas for the same vm. 145 */ 146 vma = i915_gem_obj_lookup_or_create_vma(obj, vm); 147 if (IS_ERR(vma)) { 148 DRM_DEBUG("Failed to lookup VMA\n"); 149 ret = PTR_ERR(vma); 150 goto err; 151 } 152 153 /* Transfer ownership from the objects list to the vmas list. */ 154 list_add_tail(&vma->exec_list, &eb->vmas); 155 list_del_init(&obj->obj_exec_link); 156 157 vma->exec_entry = &exec[i]; 158 if (eb->and < 0) { 159 eb->lut[i] = vma; 160 } else { 161 uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle; 162 vma->exec_handle = handle; 163 hlist_add_head(&vma->exec_node, 164 &eb->buckets[handle & eb->and]); 165 } 166 ++i; 167 } 168 169 return 0; 170 171 172err: 173 while (!list_empty(&objects)) { 174 obj = list_first_entry(&objects, 175 struct drm_i915_gem_object, 176 obj_exec_link); 177 list_del_init(&obj->obj_exec_link); 178 drm_gem_object_unreference(&obj->base); 179 } 180 /* 181 * Objects already transfered to the vmas list will be unreferenced by 182 * eb_destroy. 183 */ 184 185 return ret; 186} 187 188static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle) 189{ 190 if (eb->and < 0) { 191 if (handle >= -eb->and) 192 return NULL; 193 return eb->lut[handle]; 194 } else { 195 struct hlist_head *head; 196 struct hlist_node *node; 197 198 head = &eb->buckets[handle & eb->and]; 199 hlist_for_each(node, head) { 200 struct i915_vma *vma; 201 202 vma = hlist_entry(node, struct i915_vma, exec_node); 203 if (vma->exec_handle == handle) 204 return vma; 205 } 206 return NULL; 207 } 208} 209 210static void 211i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) 212{ 213 struct drm_i915_gem_exec_object2 *entry; 214 struct drm_i915_gem_object *obj = vma->obj; 215 216 if (!drm_mm_node_allocated(&vma->node)) 217 return; 218 219 entry = vma->exec_entry; 220 221 if (entry->flags & __EXEC_OBJECT_HAS_FENCE) 222 i915_gem_object_unpin_fence(obj); 223 224 if (entry->flags & __EXEC_OBJECT_HAS_PIN) 225 vma->pin_count--; 226 227 if (entry->flags & __EXEC_OBJECT_PURGEABLE) 228 obj->madv = I915_MADV_DONTNEED; 229 230 entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | 231 __EXEC_OBJECT_HAS_PIN | 232 __EXEC_OBJECT_PURGEABLE); 233} 234 235static void eb_destroy(struct eb_vmas *eb) 236{ 237 while (!list_empty(&eb->vmas)) { 238 struct i915_vma *vma; 239 240 vma = list_first_entry(&eb->vmas, 241 struct i915_vma, 242 exec_list); 243 list_del_init(&vma->exec_list); 244 i915_gem_execbuffer_unreserve_vma(vma); 245 drm_gem_object_unreference(&vma->obj->base); 246 } 247 kfree(eb); 248} 249 250static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) 251{ 252 return (HAS_LLC(obj->base.dev) || 253 obj->base.write_domain == I915_GEM_DOMAIN_CPU || 254 obj->cache_level != I915_CACHE_NONE); 255} 256 257static int 258relocate_entry_cpu(struct drm_i915_gem_object *obj, 259 struct drm_i915_gem_relocation_entry *reloc, 260 uint64_t target_offset) 261{ 262 struct drm_device *dev = obj->base.dev; 263 uint32_t page_offset = offset_in_page(reloc->offset); 264 uint64_t delta = reloc->delta + target_offset; 265 char *vaddr; 266 int ret; 267 268 ret = i915_gem_object_set_to_cpu_domain(obj, true); 269 if (ret) 270 return ret; 271 272 vaddr = kmap_atomic(i915_gem_object_get_page(obj, 273 reloc->offset >> PAGE_SHIFT)); 274 *(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta); 275 276 if (INTEL_INFO(dev)->gen >= 8) { 277 page_offset = offset_in_page(page_offset + sizeof(uint32_t)); 278 279 if (page_offset == 0) { 280 kunmap_atomic(vaddr); 281 vaddr = kmap_atomic(i915_gem_object_get_page(obj, 282 (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT)); 283 } 284 285 *(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta); 286 } 287 288 kunmap_atomic(vaddr); 289 290 return 0; 291} 292 293static int 294relocate_entry_gtt(struct drm_i915_gem_object *obj, 295 struct drm_i915_gem_relocation_entry *reloc, 296 uint64_t target_offset) 297{ 298 struct drm_device *dev = obj->base.dev; 299 struct drm_i915_private *dev_priv = dev->dev_private; 300 uint64_t delta = reloc->delta + target_offset; 301 uint64_t offset; 302 void __iomem *reloc_page; 303 int ret; 304 305 ret = i915_gem_object_set_to_gtt_domain(obj, true); 306 if (ret) 307 return ret; 308 309 ret = i915_gem_object_put_fence(obj); 310 if (ret) 311 return ret; 312 313 /* Map the page containing the relocation we're going to perform. */ 314 offset = i915_gem_obj_ggtt_offset(obj); 315 offset += reloc->offset; 316 reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable, 317 offset & PAGE_MASK); 318 iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset)); 319 320 if (INTEL_INFO(dev)->gen >= 8) { 321 offset += sizeof(uint32_t); 322 323 if (offset_in_page(offset) == 0) { 324 io_mapping_unmap_atomic(reloc_page); 325 reloc_page = 326 io_mapping_map_atomic_wc(dev_priv->gtt.mappable, 327 offset); 328 } 329 330 iowrite32(upper_32_bits(delta), 331 reloc_page + offset_in_page(offset)); 332 } 333 334 io_mapping_unmap_atomic(reloc_page); 335 336 return 0; 337} 338 339static void 340clflush_write32(void *addr, uint32_t value) 341{ 342 /* This is not a fast path, so KISS. */ 343 drm_clflush_virt_range(addr, sizeof(uint32_t)); 344 *(uint32_t *)addr = value; 345 drm_clflush_virt_range(addr, sizeof(uint32_t)); 346} 347 348static int 349relocate_entry_clflush(struct drm_i915_gem_object *obj, 350 struct drm_i915_gem_relocation_entry *reloc, 351 uint64_t target_offset) 352{ 353 struct drm_device *dev = obj->base.dev; 354 uint32_t page_offset = offset_in_page(reloc->offset); 355 uint64_t delta = (int)reloc->delta + target_offset; 356 char *vaddr; 357 int ret; 358 359 ret = i915_gem_object_set_to_gtt_domain(obj, true); 360 if (ret) 361 return ret; 362 363 vaddr = kmap_atomic(i915_gem_object_get_page(obj, 364 reloc->offset >> PAGE_SHIFT)); 365 clflush_write32(vaddr + page_offset, lower_32_bits(delta)); 366 367 if (INTEL_INFO(dev)->gen >= 8) { 368 page_offset = offset_in_page(page_offset + sizeof(uint32_t)); 369 370 if (page_offset == 0) { 371 kunmap_atomic(vaddr); 372 vaddr = kmap_atomic(i915_gem_object_get_page(obj, 373 (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT)); 374 } 375 376 clflush_write32(vaddr + page_offset, upper_32_bits(delta)); 377 } 378 379 kunmap_atomic(vaddr); 380 381 return 0; 382} 383 384static int 385i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, 386 struct eb_vmas *eb, 387 struct drm_i915_gem_relocation_entry *reloc) 388{ 389 struct drm_device *dev = obj->base.dev; 390 struct drm_gem_object *target_obj; 391 struct drm_i915_gem_object *target_i915_obj; 392 struct i915_vma *target_vma; 393 uint64_t target_offset; 394 int ret; 395 396 /* we've already hold a reference to all valid objects */ 397 target_vma = eb_get_vma(eb, reloc->target_handle); 398 if (unlikely(target_vma == NULL)) 399 return -ENOENT; 400 target_i915_obj = target_vma->obj; 401 target_obj = &target_vma->obj->base; 402 403 target_offset = target_vma->node.start; 404 405 /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and 406 * pipe_control writes because the gpu doesn't properly redirect them 407 * through the ppgtt for non_secure batchbuffers. */ 408 if (unlikely(IS_GEN6(dev) && 409 reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && 410 !(target_vma->bound & GLOBAL_BIND))) { 411 ret = i915_vma_bind(target_vma, target_i915_obj->cache_level, 412 GLOBAL_BIND); 413 if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!")) 414 return ret; 415 } 416 417 /* Validate that the target is in a valid r/w GPU domain */ 418 if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { 419 DRM_DEBUG("reloc with multiple write domains: " 420 "obj %p target %d offset %d " 421 "read %08x write %08x", 422 obj, reloc->target_handle, 423 (int) reloc->offset, 424 reloc->read_domains, 425 reloc->write_domain); 426 return -EINVAL; 427 } 428 if (unlikely((reloc->write_domain | reloc->read_domains) 429 & ~I915_GEM_GPU_DOMAINS)) { 430 DRM_DEBUG("reloc with read/write non-GPU domains: " 431 "obj %p target %d offset %d " 432 "read %08x write %08x", 433 obj, reloc->target_handle, 434 (int) reloc->offset, 435 reloc->read_domains, 436 reloc->write_domain); 437 return -EINVAL; 438 } 439 440 target_obj->pending_read_domains |= reloc->read_domains; 441 target_obj->pending_write_domain |= reloc->write_domain; 442 443 /* If the relocation already has the right value in it, no 444 * more work needs to be done. 445 */ 446 if (target_offset == reloc->presumed_offset) 447 return 0; 448 449 /* Check that the relocation address is valid... */ 450 if (unlikely(reloc->offset > 451 obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) { 452 DRM_DEBUG("Relocation beyond object bounds: " 453 "obj %p target %d offset %d size %d.\n", 454 obj, reloc->target_handle, 455 (int) reloc->offset, 456 (int) obj->base.size); 457 return -EINVAL; 458 } 459 if (unlikely(reloc->offset & 3)) { 460 DRM_DEBUG("Relocation not 4-byte aligned: " 461 "obj %p target %d offset %d.\n", 462 obj, reloc->target_handle, 463 (int) reloc->offset); 464 return -EINVAL; 465 } 466 467 /* We can't wait for rendering with pagefaults disabled */ 468 if (obj->active && in_atomic()) 469 return -EFAULT; 470 471 if (use_cpu_reloc(obj)) 472 ret = relocate_entry_cpu(obj, reloc, target_offset); 473 else if (obj->map_and_fenceable) 474 ret = relocate_entry_gtt(obj, reloc, target_offset); 475 else if (cpu_has_clflush) 476 ret = relocate_entry_clflush(obj, reloc, target_offset); 477 else { 478 WARN_ONCE(1, "Impossible case in relocation handling\n"); 479 ret = -ENODEV; 480 } 481 482 if (ret) 483 return ret; 484 485 /* and update the user's relocation entry */ 486 reloc->presumed_offset = target_offset; 487 488 return 0; 489} 490 491static int 492i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, 493 struct eb_vmas *eb) 494{ 495#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) 496 struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; 497 struct drm_i915_gem_relocation_entry __user *user_relocs; 498 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 499 int remain, ret; 500 501 user_relocs = to_user_ptr(entry->relocs_ptr); 502 503 remain = entry->relocation_count; 504 while (remain) { 505 struct drm_i915_gem_relocation_entry *r = stack_reloc; 506 int count = remain; 507 if (count > ARRAY_SIZE(stack_reloc)) 508 count = ARRAY_SIZE(stack_reloc); 509 remain -= count; 510 511 if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]))) 512 return -EFAULT; 513 514 do { 515 u64 offset = r->presumed_offset; 516 517 ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r); 518 if (ret) 519 return ret; 520 521 if (r->presumed_offset != offset && 522 __copy_to_user_inatomic(&user_relocs->presumed_offset, 523 &r->presumed_offset, 524 sizeof(r->presumed_offset))) { 525 return -EFAULT; 526 } 527 528 user_relocs++; 529 r++; 530 } while (--count); 531 } 532 533 return 0; 534#undef N_RELOC 535} 536 537static int 538i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma, 539 struct eb_vmas *eb, 540 struct drm_i915_gem_relocation_entry *relocs) 541{ 542 const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 543 int i, ret; 544 545 for (i = 0; i < entry->relocation_count; i++) { 546 ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]); 547 if (ret) 548 return ret; 549 } 550 551 return 0; 552} 553 554static int 555i915_gem_execbuffer_relocate(struct eb_vmas *eb) 556{ 557 struct i915_vma *vma; 558 int ret = 0; 559 560 /* This is the fast path and we cannot handle a pagefault whilst 561 * holding the struct mutex lest the user pass in the relocations 562 * contained within a mmaped bo. For in such a case we, the page 563 * fault handler would call i915_gem_fault() and we would try to 564 * acquire the struct mutex again. Obviously this is bad and so 565 * lockdep complains vehemently. 566 */ 567 pagefault_disable(); 568 list_for_each_entry(vma, &eb->vmas, exec_list) { 569 ret = i915_gem_execbuffer_relocate_vma(vma, eb); 570 if (ret) 571 break; 572 } 573 pagefault_enable(); 574 575 return ret; 576} 577 578static bool only_mappable_for_reloc(unsigned int flags) 579{ 580 return (flags & (EXEC_OBJECT_NEEDS_FENCE | __EXEC_OBJECT_NEEDS_MAP)) == 581 __EXEC_OBJECT_NEEDS_MAP; 582} 583 584static int 585i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, 586 struct intel_engine_cs *ring, 587 bool *need_reloc) 588{ 589 struct drm_i915_gem_object *obj = vma->obj; 590 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 591 uint64_t flags; 592 int ret; 593 594 flags = 0; 595 if (!drm_mm_node_allocated(&vma->node)) { 596 if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) 597 flags |= PIN_GLOBAL | PIN_MAPPABLE; 598 if (entry->flags & EXEC_OBJECT_NEEDS_GTT) 599 flags |= PIN_GLOBAL; 600 if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) 601 flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; 602 } 603 604 ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); 605 if ((ret == -ENOSPC || ret == -E2BIG) && 606 only_mappable_for_reloc(entry->flags)) 607 ret = i915_gem_object_pin(obj, vma->vm, 608 entry->alignment, 609 flags & ~(PIN_GLOBAL | PIN_MAPPABLE)); 610 if (ret) 611 return ret; 612 613 entry->flags |= __EXEC_OBJECT_HAS_PIN; 614 615 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { 616 ret = i915_gem_object_get_fence(obj); 617 if (ret) 618 return ret; 619 620 if (i915_gem_object_pin_fence(obj)) 621 entry->flags |= __EXEC_OBJECT_HAS_FENCE; 622 } 623 624 if (entry->offset != vma->node.start) { 625 entry->offset = vma->node.start; 626 *need_reloc = true; 627 } 628 629 if (entry->flags & EXEC_OBJECT_WRITE) { 630 obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER; 631 obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER; 632 } 633 634 return 0; 635} 636 637static bool 638need_reloc_mappable(struct i915_vma *vma) 639{ 640 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 641 642 if (entry->relocation_count == 0) 643 return false; 644 645 if (!i915_is_ggtt(vma->vm)) 646 return false; 647 648 /* See also use_cpu_reloc() */ 649 if (HAS_LLC(vma->obj->base.dev)) 650 return false; 651 652 if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU) 653 return false; 654 655 return true; 656} 657 658static bool 659eb_vma_misplaced(struct i915_vma *vma) 660{ 661 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 662 struct drm_i915_gem_object *obj = vma->obj; 663 664 WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && 665 !i915_is_ggtt(vma->vm)); 666 667 if (entry->alignment && 668 vma->node.start & (entry->alignment - 1)) 669 return true; 670 671 if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && 672 vma->node.start < BATCH_OFFSET_BIAS) 673 return true; 674 675 /* avoid costly ping-pong once a batch bo ended up non-mappable */ 676 if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable) 677 return !only_mappable_for_reloc(entry->flags); 678 679 return false; 680} 681 682static int 683i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, 684 struct list_head *vmas, 685 bool *need_relocs) 686{ 687 struct drm_i915_gem_object *obj; 688 struct i915_vma *vma; 689 struct i915_address_space *vm; 690 struct list_head ordered_vmas; 691 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 692 int retry; 693 694 i915_gem_retire_requests_ring(ring); 695 696 vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; 697 698 INIT_LIST_HEAD(&ordered_vmas); 699 while (!list_empty(vmas)) { 700 struct drm_i915_gem_exec_object2 *entry; 701 bool need_fence, need_mappable; 702 703 vma = list_first_entry(vmas, struct i915_vma, exec_list); 704 obj = vma->obj; 705 entry = vma->exec_entry; 706 707 if (!has_fenced_gpu_access) 708 entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; 709 need_fence = 710 entry->flags & EXEC_OBJECT_NEEDS_FENCE && 711 obj->tiling_mode != I915_TILING_NONE; 712 need_mappable = need_fence || need_reloc_mappable(vma); 713 714 if (need_mappable) { 715 entry->flags |= __EXEC_OBJECT_NEEDS_MAP; 716 list_move(&vma->exec_list, &ordered_vmas); 717 } else 718 list_move_tail(&vma->exec_list, &ordered_vmas); 719 720 obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; 721 obj->base.pending_write_domain = 0; 722 } 723 list_splice(&ordered_vmas, vmas); 724 725 /* Attempt to pin all of the buffers into the GTT. 726 * This is done in 3 phases: 727 * 728 * 1a. Unbind all objects that do not match the GTT constraints for 729 * the execbuffer (fenceable, mappable, alignment etc). 730 * 1b. Increment pin count for already bound objects. 731 * 2. Bind new objects. 732 * 3. Decrement pin count. 733 * 734 * This avoid unnecessary unbinding of later objects in order to make 735 * room for the earlier objects *unless* we need to defragment. 736 */ 737 retry = 0; 738 do { 739 int ret = 0; 740 741 /* Unbind any ill-fitting objects or pin. */ 742 list_for_each_entry(vma, vmas, exec_list) { 743 if (!drm_mm_node_allocated(&vma->node)) 744 continue; 745 746 if (eb_vma_misplaced(vma)) 747 ret = i915_vma_unbind(vma); 748 else 749 ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs); 750 if (ret) 751 goto err; 752 } 753 754 /* Bind fresh objects */ 755 list_for_each_entry(vma, vmas, exec_list) { 756 if (drm_mm_node_allocated(&vma->node)) 757 continue; 758 759 ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs); 760 if (ret) 761 goto err; 762 } 763 764err: 765 if (ret != -ENOSPC || retry++) 766 return ret; 767 768 /* Decrement pin count for bound objects */ 769 list_for_each_entry(vma, vmas, exec_list) 770 i915_gem_execbuffer_unreserve_vma(vma); 771 772 ret = i915_gem_evict_vm(vm, true); 773 if (ret) 774 return ret; 775 } while (1); 776} 777 778static int 779i915_gem_execbuffer_relocate_slow(struct drm_device *dev, 780 struct drm_i915_gem_execbuffer2 *args, 781 struct drm_file *file, 782 struct intel_engine_cs *ring, 783 struct eb_vmas *eb, 784 struct drm_i915_gem_exec_object2 *exec) 785{ 786 struct drm_i915_gem_relocation_entry *reloc; 787 struct i915_address_space *vm; 788 struct i915_vma *vma; 789 bool need_relocs; 790 int *reloc_offset; 791 int i, total, ret; 792 unsigned count = args->buffer_count; 793 794 vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm; 795 796 /* We may process another execbuffer during the unlock... */ 797 while (!list_empty(&eb->vmas)) { 798 vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list); 799 list_del_init(&vma->exec_list); 800 i915_gem_execbuffer_unreserve_vma(vma); 801 drm_gem_object_unreference(&vma->obj->base); 802 } 803 804 mutex_unlock(&dev->struct_mutex); 805 806 total = 0; 807 for (i = 0; i < count; i++) 808 total += exec[i].relocation_count; 809 810 reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset)); 811 reloc = drm_malloc_ab(total, sizeof(*reloc)); 812 if (reloc == NULL || reloc_offset == NULL) { 813 drm_free_large(reloc); 814 drm_free_large(reloc_offset); 815 mutex_lock(&dev->struct_mutex); 816 return -ENOMEM; 817 } 818 819 total = 0; 820 for (i = 0; i < count; i++) { 821 struct drm_i915_gem_relocation_entry __user *user_relocs; 822 u64 invalid_offset = (u64)-1; 823 int j; 824 825 user_relocs = to_user_ptr(exec[i].relocs_ptr); 826 827 if (copy_from_user(reloc+total, user_relocs, 828 exec[i].relocation_count * sizeof(*reloc))) { 829 ret = -EFAULT; 830 mutex_lock(&dev->struct_mutex); 831 goto err; 832 } 833 834 /* As we do not update the known relocation offsets after 835 * relocating (due to the complexities in lock handling), 836 * we need to mark them as invalid now so that we force the 837 * relocation processing next time. Just in case the target 838 * object is evicted and then rebound into its old 839 * presumed_offset before the next execbuffer - if that 840 * happened we would make the mistake of assuming that the 841 * relocations were valid. 842 */ 843 for (j = 0; j < exec[i].relocation_count; j++) { 844 if (__copy_to_user(&user_relocs[j].presumed_offset, 845 &invalid_offset, 846 sizeof(invalid_offset))) { 847 ret = -EFAULT; 848 mutex_lock(&dev->struct_mutex); 849 goto err; 850 } 851 } 852 853 reloc_offset[i] = total; 854 total += exec[i].relocation_count; 855 } 856 857 ret = i915_mutex_lock_interruptible(dev); 858 if (ret) { 859 mutex_lock(&dev->struct_mutex); 860 goto err; 861 } 862 863 /* reacquire the objects */ 864 eb_reset(eb); 865 ret = eb_lookup_vmas(eb, exec, args, vm, file); 866 if (ret) 867 goto err; 868 869 need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; 870 ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs); 871 if (ret) 872 goto err; 873 874 list_for_each_entry(vma, &eb->vmas, exec_list) { 875 int offset = vma->exec_entry - exec; 876 ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb, 877 reloc + reloc_offset[offset]); 878 if (ret) 879 goto err; 880 } 881 882 /* Leave the user relocations as are, this is the painfully slow path, 883 * and we want to avoid the complication of dropping the lock whilst 884 * having buffers reserved in the aperture and so causing spurious 885 * ENOSPC for random operations. 886 */ 887 888err: 889 drm_free_large(reloc); 890 drm_free_large(reloc_offset); 891 return ret; 892} 893 894static int 895i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring, 896 struct list_head *vmas) 897{ 898 struct i915_vma *vma; 899 uint32_t flush_domains = 0; 900 bool flush_chipset = false; 901 int ret; 902 903 list_for_each_entry(vma, vmas, exec_list) { 904 struct drm_i915_gem_object *obj = vma->obj; 905 ret = i915_gem_object_sync(obj, ring); 906 if (ret) 907 return ret; 908 909 if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) 910 flush_chipset |= i915_gem_clflush_object(obj, false); 911 912 flush_domains |= obj->base.write_domain; 913 } 914 915 if (flush_chipset) 916 i915_gem_chipset_flush(ring->dev); 917 918 if (flush_domains & I915_GEM_DOMAIN_GTT) 919 wmb(); 920 921 /* Unconditionally invalidate gpu caches and ensure that we do flush 922 * any residual writes from the previous batch. 923 */ 924 return intel_ring_invalidate_all_caches(ring); 925} 926 927static bool 928i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) 929{ 930 if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS) 931 return false; 932 933 return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0; 934} 935 936static int 937validate_exec_list(struct drm_device *dev, 938 struct drm_i915_gem_exec_object2 *exec, 939 int count) 940{ 941 unsigned relocs_total = 0; 942 unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry); 943 unsigned invalid_flags; 944 int i; 945 946 invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; 947 if (USES_FULL_PPGTT(dev)) 948 invalid_flags |= EXEC_OBJECT_NEEDS_GTT; 949 950 for (i = 0; i < count; i++) { 951 char __user *ptr = to_user_ptr(exec[i].relocs_ptr); 952 int length; /* limited by fault_in_pages_readable() */ 953 954 if (exec[i].flags & invalid_flags) 955 return -EINVAL; 956 957 /* First check for malicious input causing overflow in 958 * the worst case where we need to allocate the entire 959 * relocation tree as a single array. 960 */ 961 if (exec[i].relocation_count > relocs_max - relocs_total) 962 return -EINVAL; 963 relocs_total += exec[i].relocation_count; 964 965 length = exec[i].relocation_count * 966 sizeof(struct drm_i915_gem_relocation_entry); 967 /* 968 * We must check that the entire relocation array is safe 969 * to read, but since we may need to update the presumed 970 * offsets during execution, check for full write access. 971 */ 972 if (!access_ok(VERIFY_WRITE, ptr, length)) 973 return -EFAULT; 974 975 if (likely(!i915.prefault_disable)) { 976 if (fault_in_multipages_readable(ptr, length)) 977 return -EFAULT; 978 } 979 } 980 981 return 0; 982} 983 984static struct intel_context * 985i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, 986 struct intel_engine_cs *ring, const u32 ctx_id) 987{ 988 struct intel_context *ctx = NULL; 989 struct i915_ctx_hang_stats *hs; 990 991 if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE) 992 return ERR_PTR(-EINVAL); 993 994 ctx = i915_gem_context_get(file->driver_priv, ctx_id); 995 if (IS_ERR(ctx)) 996 return ctx; 997 998 hs = &ctx->hang_stats; 999 if (hs->banned) { 1000 DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id); 1001 return ERR_PTR(-EIO); 1002 } 1003 1004 if (i915.enable_execlists && !ctx->engine[ring->id].state) { 1005 int ret = intel_lr_context_deferred_create(ctx, ring); 1006 if (ret) { 1007 DRM_DEBUG("Could not create LRC %u: %d\n", ctx_id, ret); 1008 return ERR_PTR(ret); 1009 } 1010 } 1011 1012 return ctx; 1013} 1014 1015void 1016i915_gem_execbuffer_move_to_active(struct list_head *vmas, 1017 struct intel_engine_cs *ring) 1018{ 1019 struct drm_i915_gem_request *req = intel_ring_get_request(ring); 1020 struct i915_vma *vma; 1021 1022 list_for_each_entry(vma, vmas, exec_list) { 1023 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 1024 struct drm_i915_gem_object *obj = vma->obj; 1025 u32 old_read = obj->base.read_domains; 1026 u32 old_write = obj->base.write_domain; 1027 1028 obj->dirty = 1; /* be paranoid */ 1029 obj->base.write_domain = obj->base.pending_write_domain; 1030 if (obj->base.write_domain == 0) 1031 obj->base.pending_read_domains |= obj->base.read_domains; 1032 obj->base.read_domains = obj->base.pending_read_domains; 1033 1034 i915_vma_move_to_active(vma, ring); 1035 if (obj->base.write_domain) { 1036 i915_gem_request_assign(&obj->last_write_req, req); 1037 1038 intel_fb_obj_invalidate(obj, ring, ORIGIN_CS); 1039 1040 /* update for the implicit flush after a batch */ 1041 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; 1042 } 1043 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { 1044 i915_gem_request_assign(&obj->last_fenced_req, req); 1045 if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { 1046 struct drm_i915_private *dev_priv = to_i915(ring->dev); 1047 list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list, 1048 &dev_priv->mm.fence_list); 1049 } 1050 } 1051 1052 trace_i915_gem_object_change_domain(obj, old_read, old_write); 1053 } 1054} 1055 1056void 1057i915_gem_execbuffer_retire_commands(struct drm_device *dev, 1058 struct drm_file *file, 1059 struct intel_engine_cs *ring, 1060 struct drm_i915_gem_object *obj) 1061{ 1062 /* Unconditionally force add_request to emit a full flush. */ 1063 ring->gpu_caches_dirty = true; 1064 1065 /* Add a breadcrumb for the completion of the batch buffer */ 1066 (void)__i915_add_request(ring, file, obj); 1067} 1068 1069static int 1070i915_reset_gen7_sol_offsets(struct drm_device *dev, 1071 struct intel_engine_cs *ring) 1072{ 1073 struct drm_i915_private *dev_priv = dev->dev_private; 1074 int ret, i; 1075 1076 if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) { 1077 DRM_DEBUG("sol reset is gen7/rcs only\n"); 1078 return -EINVAL; 1079 } 1080 1081 ret = intel_ring_begin(ring, 4 * 3); 1082 if (ret) 1083 return ret; 1084 1085 for (i = 0; i < 4; i++) { 1086 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 1087 intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i)); 1088 intel_ring_emit(ring, 0); 1089 } 1090 1091 intel_ring_advance(ring); 1092 1093 return 0; 1094} 1095 1096static int 1097i915_emit_box(struct intel_engine_cs *ring, 1098 struct drm_clip_rect *box, 1099 int DR1, int DR4) 1100{ 1101 int ret; 1102 1103 if (box->y2 <= box->y1 || box->x2 <= box->x1 || 1104 box->y2 <= 0 || box->x2 <= 0) { 1105 DRM_ERROR("Bad box %d,%d..%d,%d\n", 1106 box->x1, box->y1, box->x2, box->y2); 1107 return -EINVAL; 1108 } 1109 1110 if (INTEL_INFO(ring->dev)->gen >= 4) { 1111 ret = intel_ring_begin(ring, 4); 1112 if (ret) 1113 return ret; 1114 1115 intel_ring_emit(ring, GFX_OP_DRAWRECT_INFO_I965); 1116 intel_ring_emit(ring, (box->x1 & 0xffff) | box->y1 << 16); 1117 intel_ring_emit(ring, ((box->x2 - 1) & 0xffff) | (box->y2 - 1) << 16); 1118 intel_ring_emit(ring, DR4); 1119 } else { 1120 ret = intel_ring_begin(ring, 6); 1121 if (ret) 1122 return ret; 1123 1124 intel_ring_emit(ring, GFX_OP_DRAWRECT_INFO); 1125 intel_ring_emit(ring, DR1); 1126 intel_ring_emit(ring, (box->x1 & 0xffff) | box->y1 << 16); 1127 intel_ring_emit(ring, ((box->x2 - 1) & 0xffff) | (box->y2 - 1) << 16); 1128 intel_ring_emit(ring, DR4); 1129 intel_ring_emit(ring, 0); 1130 } 1131 intel_ring_advance(ring); 1132 1133 return 0; 1134} 1135 1136static struct drm_i915_gem_object* 1137i915_gem_execbuffer_parse(struct intel_engine_cs *ring, 1138 struct drm_i915_gem_exec_object2 *shadow_exec_entry, 1139 struct eb_vmas *eb, 1140 struct drm_i915_gem_object *batch_obj, 1141 u32 batch_start_offset, 1142 u32 batch_len, 1143 bool is_master) 1144{ 1145 struct drm_i915_private *dev_priv = to_i915(batch_obj->base.dev); 1146 struct drm_i915_gem_object *shadow_batch_obj; 1147 struct i915_vma *vma; 1148 int ret; 1149 1150 shadow_batch_obj = i915_gem_batch_pool_get(&dev_priv->mm.batch_pool, 1151 PAGE_ALIGN(batch_len)); 1152 if (IS_ERR(shadow_batch_obj)) 1153 return shadow_batch_obj; 1154 1155 ret = i915_parse_cmds(ring, 1156 batch_obj, 1157 shadow_batch_obj, 1158 batch_start_offset, 1159 batch_len, 1160 is_master); 1161 if (ret) 1162 goto err; 1163 1164 ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 0, 0); 1165 if (ret) 1166 goto err; 1167 1168 memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry)); 1169 1170 vma = i915_gem_obj_to_ggtt(shadow_batch_obj); 1171 vma->exec_entry = shadow_exec_entry; 1172 vma->exec_entry->flags = __EXEC_OBJECT_PURGEABLE | __EXEC_OBJECT_HAS_PIN; 1173 drm_gem_object_reference(&shadow_batch_obj->base); 1174 list_add_tail(&vma->exec_list, &eb->vmas); 1175 1176 shadow_batch_obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND; 1177 1178 return shadow_batch_obj; 1179 1180err: 1181 if (ret == -EACCES) /* unhandled chained batch */ 1182 return batch_obj; 1183 else 1184 return ERR_PTR(ret); 1185} 1186 1187int 1188i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, 1189 struct intel_engine_cs *ring, 1190 struct intel_context *ctx, 1191 struct drm_i915_gem_execbuffer2 *args, 1192 struct list_head *vmas, 1193 struct drm_i915_gem_object *batch_obj, 1194 u64 exec_start, u32 dispatch_flags) 1195{ 1196 struct drm_clip_rect *cliprects = NULL; 1197 struct drm_i915_private *dev_priv = dev->dev_private; 1198 u64 exec_len; 1199 int instp_mode; 1200 u32 instp_mask; 1201 int i, ret = 0; 1202 1203 if (args->num_cliprects != 0) { 1204 if (ring != &dev_priv->ring[RCS]) { 1205 DRM_DEBUG("clip rectangles are only valid with the render ring\n"); 1206 return -EINVAL; 1207 } 1208 1209 if (INTEL_INFO(dev)->gen >= 5) { 1210 DRM_DEBUG("clip rectangles are only valid on pre-gen5\n"); 1211 return -EINVAL; 1212 } 1213 1214 if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) { 1215 DRM_DEBUG("execbuf with %u cliprects\n", 1216 args->num_cliprects); 1217 return -EINVAL; 1218 } 1219 1220 cliprects = kcalloc(args->num_cliprects, 1221 sizeof(*cliprects), 1222 GFP_KERNEL); 1223 if (cliprects == NULL) { 1224 ret = -ENOMEM; 1225 goto error; 1226 } 1227 1228 if (copy_from_user(cliprects, 1229 to_user_ptr(args->cliprects_ptr), 1230 sizeof(*cliprects)*args->num_cliprects)) { 1231 ret = -EFAULT; 1232 goto error; 1233 } 1234 } else { 1235 if (args->DR4 == 0xffffffff) { 1236 DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); 1237 args->DR4 = 0; 1238 } 1239 1240 if (args->DR1 || args->DR4 || args->cliprects_ptr) { 1241 DRM_DEBUG("0 cliprects but dirt in cliprects fields\n"); 1242 return -EINVAL; 1243 } 1244 } 1245 1246 ret = i915_gem_execbuffer_move_to_gpu(ring, vmas); 1247 if (ret) 1248 goto error; 1249 1250 ret = i915_switch_context(ring, ctx); 1251 if (ret) 1252 goto error; 1253 1254 if (ctx->ppgtt) 1255 WARN(ctx->ppgtt->pd_dirty_rings & (1<<ring->id), 1256 "%s didn't clear reload\n", ring->name); 1257 else if (dev_priv->mm.aliasing_ppgtt) 1258 WARN(dev_priv->mm.aliasing_ppgtt->pd_dirty_rings & 1259 (1<<ring->id), "%s didn't clear reload\n", ring->name); 1260 1261 instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; 1262 instp_mask = I915_EXEC_CONSTANTS_MASK; 1263 switch (instp_mode) { 1264 case I915_EXEC_CONSTANTS_REL_GENERAL: 1265 case I915_EXEC_CONSTANTS_ABSOLUTE: 1266 case I915_EXEC_CONSTANTS_REL_SURFACE: 1267 if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) { 1268 DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); 1269 ret = -EINVAL; 1270 goto error; 1271 } 1272 1273 if (instp_mode != dev_priv->relative_constants_mode) { 1274 if (INTEL_INFO(dev)->gen < 4) { 1275 DRM_DEBUG("no rel constants on pre-gen4\n"); 1276 ret = -EINVAL; 1277 goto error; 1278 } 1279 1280 if (INTEL_INFO(dev)->gen > 5 && 1281 instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { 1282 DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); 1283 ret = -EINVAL; 1284 goto error; 1285 } 1286 1287 /* The HW changed the meaning on this bit on gen6 */ 1288 if (INTEL_INFO(dev)->gen >= 6) 1289 instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; 1290 } 1291 break; 1292 default: 1293 DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode); 1294 ret = -EINVAL; 1295 goto error; 1296 } 1297 1298 if (ring == &dev_priv->ring[RCS] && 1299 instp_mode != dev_priv->relative_constants_mode) { 1300 ret = intel_ring_begin(ring, 4); 1301 if (ret) 1302 goto error; 1303 1304 intel_ring_emit(ring, MI_NOOP); 1305 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 1306 intel_ring_emit(ring, INSTPM); 1307 intel_ring_emit(ring, instp_mask << 16 | instp_mode); 1308 intel_ring_advance(ring); 1309 1310 dev_priv->relative_constants_mode = instp_mode; 1311 } 1312 1313 if (args->flags & I915_EXEC_GEN7_SOL_RESET) { 1314 ret = i915_reset_gen7_sol_offsets(dev, ring); 1315 if (ret) 1316 goto error; 1317 } 1318 1319 exec_len = args->batch_len; 1320 if (cliprects) { 1321 for (i = 0; i < args->num_cliprects; i++) { 1322 ret = i915_emit_box(ring, &cliprects[i], 1323 args->DR1, args->DR4); 1324 if (ret) 1325 goto error; 1326 1327 ret = ring->dispatch_execbuffer(ring, 1328 exec_start, exec_len, 1329 dispatch_flags); 1330 if (ret) 1331 goto error; 1332 } 1333 } else { 1334 ret = ring->dispatch_execbuffer(ring, 1335 exec_start, exec_len, 1336 dispatch_flags); 1337 if (ret) 1338 return ret; 1339 } 1340 1341 trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), dispatch_flags); 1342 1343 i915_gem_execbuffer_move_to_active(vmas, ring); 1344 i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj); 1345 1346error: 1347 kfree(cliprects); 1348 return ret; 1349} 1350 1351/** 1352 * Find one BSD ring to dispatch the corresponding BSD command. 1353 * The Ring ID is returned. 1354 */ 1355static int gen8_dispatch_bsd_ring(struct drm_device *dev, 1356 struct drm_file *file) 1357{ 1358 struct drm_i915_private *dev_priv = dev->dev_private; 1359 struct drm_i915_file_private *file_priv = file->driver_priv; 1360 1361 /* Check whether the file_priv is using one ring */ 1362 if (file_priv->bsd_ring) 1363 return file_priv->bsd_ring->id; 1364 else { 1365 /* If no, use the ping-pong mechanism to select one ring */ 1366 int ring_id; 1367 1368 mutex_lock(&dev->struct_mutex); 1369 if (dev_priv->mm.bsd_ring_dispatch_index == 0) { 1370 ring_id = VCS; 1371 dev_priv->mm.bsd_ring_dispatch_index = 1; 1372 } else { 1373 ring_id = VCS2; 1374 dev_priv->mm.bsd_ring_dispatch_index = 0; 1375 } 1376 file_priv->bsd_ring = &dev_priv->ring[ring_id]; 1377 mutex_unlock(&dev->struct_mutex); 1378 return ring_id; 1379 } 1380} 1381 1382static struct drm_i915_gem_object * 1383eb_get_batch(struct eb_vmas *eb) 1384{ 1385 struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); 1386 1387 /* 1388 * SNA is doing fancy tricks with compressing batch buffers, which leads 1389 * to negative relocation deltas. Usually that works out ok since the 1390 * relocate address is still positive, except when the batch is placed 1391 * very low in the GTT. Ensure this doesn't happen. 1392 * 1393 * Note that actual hangs have only been observed on gen7, but for 1394 * paranoia do it everywhere. 1395 */ 1396 vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; 1397 1398 return vma->obj; 1399} 1400 1401static int 1402i915_gem_do_execbuffer(struct drm_device *dev, void *data, 1403 struct drm_file *file, 1404 struct drm_i915_gem_execbuffer2 *args, 1405 struct drm_i915_gem_exec_object2 *exec) 1406{ 1407 struct drm_i915_private *dev_priv = dev->dev_private; 1408 struct eb_vmas *eb; 1409 struct drm_i915_gem_object *batch_obj; 1410 struct drm_i915_gem_exec_object2 shadow_exec_entry; 1411 struct intel_engine_cs *ring; 1412 struct intel_context *ctx; 1413 struct i915_address_space *vm; 1414 const u32 ctx_id = i915_execbuffer2_get_context_id(*args); 1415 u64 exec_start = args->batch_start_offset; 1416 u32 dispatch_flags; 1417 int ret; 1418 bool need_relocs; 1419 1420 if (!i915_gem_check_execbuffer(args)) 1421 return -EINVAL; 1422 1423 ret = validate_exec_list(dev, exec, args->buffer_count); 1424 if (ret) 1425 return ret; 1426 1427 dispatch_flags = 0; 1428 if (args->flags & I915_EXEC_SECURE) { 1429 if (!file->is_master || !capable(CAP_SYS_ADMIN)) 1430 return -EPERM; 1431 1432 dispatch_flags |= I915_DISPATCH_SECURE; 1433 } 1434 if (args->flags & I915_EXEC_IS_PINNED) 1435 dispatch_flags |= I915_DISPATCH_PINNED; 1436 1437 if ((args->flags & I915_EXEC_RING_MASK) > LAST_USER_RING) { 1438 DRM_DEBUG("execbuf with unknown ring: %d\n", 1439 (int)(args->flags & I915_EXEC_RING_MASK)); 1440 return -EINVAL; 1441 } 1442 1443 if (((args->flags & I915_EXEC_RING_MASK) != I915_EXEC_BSD) && 1444 ((args->flags & I915_EXEC_BSD_MASK) != 0)) { 1445 DRM_DEBUG("execbuf with non bsd ring but with invalid " 1446 "bsd dispatch flags: %d\n", (int)(args->flags)); 1447 return -EINVAL; 1448 } 1449 1450 if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT) 1451 ring = &dev_priv->ring[RCS]; 1452 else if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_BSD) { 1453 if (HAS_BSD2(dev)) { 1454 int ring_id; 1455 1456 switch (args->flags & I915_EXEC_BSD_MASK) { 1457 case I915_EXEC_BSD_DEFAULT: 1458 ring_id = gen8_dispatch_bsd_ring(dev, file); 1459 ring = &dev_priv->ring[ring_id]; 1460 break; 1461 case I915_EXEC_BSD_RING1: 1462 ring = &dev_priv->ring[VCS]; 1463 break; 1464 case I915_EXEC_BSD_RING2: 1465 ring = &dev_priv->ring[VCS2]; 1466 break; 1467 default: 1468 DRM_DEBUG("execbuf with unknown bsd ring: %d\n", 1469 (int)(args->flags & I915_EXEC_BSD_MASK)); 1470 return -EINVAL; 1471 } 1472 } else 1473 ring = &dev_priv->ring[VCS]; 1474 } else 1475 ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1]; 1476 1477 if (!intel_ring_initialized(ring)) { 1478 DRM_DEBUG("execbuf with invalid ring: %d\n", 1479 (int)(args->flags & I915_EXEC_RING_MASK)); 1480 return -EINVAL; 1481 } 1482 1483 if (args->buffer_count < 1) { 1484 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 1485 return -EINVAL; 1486 } 1487 1488 intel_runtime_pm_get(dev_priv); 1489 1490 ret = i915_mutex_lock_interruptible(dev); 1491 if (ret) 1492 goto pre_mutex_err; 1493 1494 ctx = i915_gem_validate_context(dev, file, ring, ctx_id); 1495 if (IS_ERR(ctx)) { 1496 mutex_unlock(&dev->struct_mutex); 1497 ret = PTR_ERR(ctx); 1498 goto pre_mutex_err; 1499 } 1500 1501 i915_gem_context_reference(ctx); 1502 1503 if (ctx->ppgtt) 1504 vm = &ctx->ppgtt->base; 1505 else 1506 vm = &dev_priv->gtt.base; 1507 1508 eb = eb_create(args); 1509 if (eb == NULL) { 1510 i915_gem_context_unreference(ctx); 1511 mutex_unlock(&dev->struct_mutex); 1512 ret = -ENOMEM; 1513 goto pre_mutex_err; 1514 } 1515 1516 /* Look up object handles */ 1517 ret = eb_lookup_vmas(eb, exec, args, vm, file); 1518 if (ret) 1519 goto err; 1520 1521 /* take note of the batch buffer before we might reorder the lists */ 1522 batch_obj = eb_get_batch(eb); 1523 1524 /* Move the objects en-masse into the GTT, evicting if necessary. */ 1525 need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; 1526 ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs); 1527 if (ret) 1528 goto err; 1529 1530 /* The objects are in their final locations, apply the relocations. */ 1531 if (need_relocs) 1532 ret = i915_gem_execbuffer_relocate(eb); 1533 if (ret) { 1534 if (ret == -EFAULT) { 1535 ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring, 1536 eb, exec); 1537 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1538 } 1539 if (ret) 1540 goto err; 1541 } 1542 1543 /* Set the pending read domains for the batch buffer to COMMAND */ 1544 if (batch_obj->base.pending_write_domain) { 1545 DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); 1546 ret = -EINVAL; 1547 goto err; 1548 } 1549 1550 if (i915_needs_cmd_parser(ring) && args->batch_len) { 1551 batch_obj = i915_gem_execbuffer_parse(ring, 1552 &shadow_exec_entry, 1553 eb, 1554 batch_obj, 1555 args->batch_start_offset, 1556 args->batch_len, 1557 file->is_master); 1558 if (IS_ERR(batch_obj)) { 1559 ret = PTR_ERR(batch_obj); 1560 goto err; 1561 } 1562 1563 /* 1564 * Set the DISPATCH_SECURE bit to remove the NON_SECURE 1565 * bit from MI_BATCH_BUFFER_START commands issued in the 1566 * dispatch_execbuffer implementations. We specifically 1567 * don't want that set when the command parser is 1568 * enabled. 1569 * 1570 * FIXME: with aliasing ppgtt, buffers that should only 1571 * be in ggtt still end up in the aliasing ppgtt. remove 1572 * this check when that is fixed. 1573 */ 1574 if (USES_FULL_PPGTT(dev)) 1575 dispatch_flags |= I915_DISPATCH_SECURE; 1576 1577 exec_start = 0; 1578 } 1579 1580 batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; 1581 1582 /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure 1583 * batch" bit. Hence we need to pin secure batches into the global gtt. 1584 * hsw should have this fixed, but bdw mucks it up again. */ 1585 if (dispatch_flags & I915_DISPATCH_SECURE) { 1586 /* 1587 * So on first glance it looks freaky that we pin the batch here 1588 * outside of the reservation loop. But: 1589 * - The batch is already pinned into the relevant ppgtt, so we 1590 * already have the backing storage fully allocated. 1591 * - No other BO uses the global gtt (well contexts, but meh), 1592 * so we don't really have issues with multiple objects not 1593 * fitting due to fragmentation. 1594 * So this is actually safe. 1595 */ 1596 ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0); 1597 if (ret) 1598 goto err; 1599 1600 exec_start += i915_gem_obj_ggtt_offset(batch_obj); 1601 } else 1602 exec_start += i915_gem_obj_offset(batch_obj, vm); 1603 1604 ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args, 1605 &eb->vmas, batch_obj, exec_start, 1606 dispatch_flags); 1607 1608 /* 1609 * FIXME: We crucially rely upon the active tracking for the (ppgtt) 1610 * batch vma for correctness. For less ugly and less fragility this 1611 * needs to be adjusted to also track the ggtt batch vma properly as 1612 * active. 1613 */ 1614 if (dispatch_flags & I915_DISPATCH_SECURE) 1615 i915_gem_object_ggtt_unpin(batch_obj); 1616err: 1617 /* the request owns the ref now */ 1618 i915_gem_context_unreference(ctx); 1619 eb_destroy(eb); 1620 1621 mutex_unlock(&dev->struct_mutex); 1622 1623pre_mutex_err: 1624 /* intel_gpu_busy should also get a ref, so it will free when the device 1625 * is really idle. */ 1626 intel_runtime_pm_put(dev_priv); 1627 return ret; 1628} 1629 1630/* 1631 * Legacy execbuffer just creates an exec2 list from the original exec object 1632 * list array and passes it to the real function. 1633 */ 1634int 1635i915_gem_execbuffer(struct drm_device *dev, void *data, 1636 struct drm_file *file) 1637{ 1638 struct drm_i915_gem_execbuffer *args = data; 1639 struct drm_i915_gem_execbuffer2 exec2; 1640 struct drm_i915_gem_exec_object *exec_list = NULL; 1641 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1642 int ret, i; 1643 1644 if (args->buffer_count < 1) { 1645 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 1646 return -EINVAL; 1647 } 1648 1649 /* Copy in the exec list from userland */ 1650 exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count); 1651 exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count); 1652 if (exec_list == NULL || exec2_list == NULL) { 1653 DRM_DEBUG("Failed to allocate exec list for %d buffers\n", 1654 args->buffer_count); 1655 drm_free_large(exec_list); 1656 drm_free_large(exec2_list); 1657 return -ENOMEM; 1658 } 1659 ret = copy_from_user(exec_list, 1660 to_user_ptr(args->buffers_ptr), 1661 sizeof(*exec_list) * args->buffer_count); 1662 if (ret != 0) { 1663 DRM_DEBUG("copy %d exec entries failed %d\n", 1664 args->buffer_count, ret); 1665 drm_free_large(exec_list); 1666 drm_free_large(exec2_list); 1667 return -EFAULT; 1668 } 1669 1670 for (i = 0; i < args->buffer_count; i++) { 1671 exec2_list[i].handle = exec_list[i].handle; 1672 exec2_list[i].relocation_count = exec_list[i].relocation_count; 1673 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; 1674 exec2_list[i].alignment = exec_list[i].alignment; 1675 exec2_list[i].offset = exec_list[i].offset; 1676 if (INTEL_INFO(dev)->gen < 4) 1677 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; 1678 else 1679 exec2_list[i].flags = 0; 1680 } 1681 1682 exec2.buffers_ptr = args->buffers_ptr; 1683 exec2.buffer_count = args->buffer_count; 1684 exec2.batch_start_offset = args->batch_start_offset; 1685 exec2.batch_len = args->batch_len; 1686 exec2.DR1 = args->DR1; 1687 exec2.DR4 = args->DR4; 1688 exec2.num_cliprects = args->num_cliprects; 1689 exec2.cliprects_ptr = args->cliprects_ptr; 1690 exec2.flags = I915_EXEC_RENDER; 1691 i915_execbuffer2_set_context_id(exec2, 0); 1692 1693 ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list); 1694 if (!ret) { 1695 struct drm_i915_gem_exec_object __user *user_exec_list = 1696 to_user_ptr(args->buffers_ptr); 1697 1698 /* Copy the new buffer offsets back to the user's exec list. */ 1699 for (i = 0; i < args->buffer_count; i++) { 1700 ret = __copy_to_user(&user_exec_list[i].offset, 1701 &exec2_list[i].offset, 1702 sizeof(user_exec_list[i].offset)); 1703 if (ret) { 1704 ret = -EFAULT; 1705 DRM_DEBUG("failed to copy %d exec entries " 1706 "back to user (%d)\n", 1707 args->buffer_count, ret); 1708 break; 1709 } 1710 } 1711 } 1712 1713 drm_free_large(exec_list); 1714 drm_free_large(exec2_list); 1715 return ret; 1716} 1717 1718int 1719i915_gem_execbuffer2(struct drm_device *dev, void *data, 1720 struct drm_file *file) 1721{ 1722 struct drm_i915_gem_execbuffer2 *args = data; 1723 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 1724 int ret; 1725 1726 if (args->buffer_count < 1 || 1727 args->buffer_count > UINT_MAX / sizeof(*exec2_list)) { 1728 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); 1729 return -EINVAL; 1730 } 1731 1732 if (args->rsvd2 != 0) { 1733 DRM_DEBUG("dirty rvsd2 field\n"); 1734 return -EINVAL; 1735 } 1736 1737 exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count, 1738 GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); 1739 if (exec2_list == NULL) 1740 exec2_list = drm_malloc_ab(sizeof(*exec2_list), 1741 args->buffer_count); 1742 if (exec2_list == NULL) { 1743 DRM_DEBUG("Failed to allocate exec list for %d buffers\n", 1744 args->buffer_count); 1745 return -ENOMEM; 1746 } 1747 ret = copy_from_user(exec2_list, 1748 to_user_ptr(args->buffers_ptr), 1749 sizeof(*exec2_list) * args->buffer_count); 1750 if (ret != 0) { 1751 DRM_DEBUG("copy %d exec entries failed %d\n", 1752 args->buffer_count, ret); 1753 drm_free_large(exec2_list); 1754 return -EFAULT; 1755 } 1756 1757 ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); 1758 if (!ret) { 1759 /* Copy the new buffer offsets back to the user's exec list. */ 1760 struct drm_i915_gem_exec_object2 __user *user_exec_list = 1761 to_user_ptr(args->buffers_ptr); 1762 int i; 1763 1764 for (i = 0; i < args->buffer_count; i++) { 1765 ret = __copy_to_user(&user_exec_list[i].offset, 1766 &exec2_list[i].offset, 1767 sizeof(user_exec_list[i].offset)); 1768 if (ret) { 1769 ret = -EFAULT; 1770 DRM_DEBUG("failed to copy %d exec entries " 1771 "back to user\n", 1772 args->buffer_count); 1773 break; 1774 } 1775 } 1776 } 1777 1778 drm_free_large(exec2_list); 1779 return ret; 1780} 1781