From 5c0c16b554f2303e4be706d6a97239cb54abb8ae Mon Sep 17 00:00:00 2001 From: "Sergey Semyonov (Serge)" Date: Sat, 18 Feb 2012 14:32:16 +0000 Subject: [PATCH] bitmap's synchronization git-svn-id: svn://kolibrios.org@2352 a494cfbc-eb01-0410-851d-a64ba20cac60 --- drivers/video/drm/i915/bitmap.c | 34 ++ drivers/video/drm/i915/bitmap.h | 13 + drivers/video/drm/i915/execbuffer.c | 376 ++++++++++++++++++++++ drivers/video/drm/i915/i915_dma.c | 2 +- drivers/video/drm/i915/i915_drv.c | 20 -- drivers/video/drm/i915/i915_drv.h | 2 +- drivers/video/drm/i915/i915_gem.c | 265 +++++++++++++-- drivers/video/drm/i915/i915_gem_gtt.c | 16 +- drivers/video/drm/i915/i915_irq.c | 46 ++- drivers/video/drm/i915/i915_trace.h | 6 + drivers/video/drm/i915/intel_bios.c | 2 +- drivers/video/drm/i915/intel_ringbuffer.c | 24 +- drivers/video/drm/i915/intel_ringbuffer.h | 2 +- drivers/video/drm/i915/kms_display.c | 9 +- drivers/video/drm/i915/main.c | 19 +- drivers/video/drm/i915/sna/gen6_render.c | 40 +-- drivers/video/drm/i915/sna/kgem.c | 12 +- drivers/video/drm/i915/sna/kgem.h | 25 +- drivers/video/drm/i915/sna/sna.c | 12 +- 19 files changed, 786 insertions(+), 139 deletions(-) create mode 100644 drivers/video/drm/i915/execbuffer.c diff --git a/drivers/video/drm/i915/bitmap.c b/drivers/video/drm/i915/bitmap.c index ecbe2a6648..0b97728b55 100644 --- a/drivers/video/drm/i915/bitmap.c +++ b/drivers/video/drm/i915/bitmap.c @@ -6,6 +6,10 @@ #include "intel_drv.h" #include "bitmap.h" +#define memmove __builtin_memmove + +int gem_object_lock(struct drm_i915_gem_object *obj); + #define DRIVER_CAPS_0 HW_BIT_BLIT | HW_TEX_BLIT; #define DRIVER_CAPS_1 0 @@ -174,7 +178,37 @@ err1: }; +int lock_surface(struct io_call_12 *pbitmap) +{ + int ret; + drm_i915_private_t *dev_priv = main_device->dev_private; + + bitmap_t *bitmap; + + if(unlikely(pbitmap->handle == 0)) + return -1; + + bitmap = (bitmap_t*)hman_get_data(&bm_man, pbitmap->handle); + + if(unlikely(bitmap==NULL)) + return -1; + + ret = gem_object_lock(bitmap->obj); + if(ret !=0 ) + { + pbitmap->data = NULL; + pbitmap->pitch = 0; + + dbgprintf("%s fail\n", __FUNCTION__); + return ret; + }; + + pbitmap->data = bitmap->uaddr; + pbitmap->pitch = bitmap->pitch; + + return 0; +}; int init_hman(struct hman *man, u32 count) { diff --git a/drivers/video/drm/i915/bitmap.h b/drivers/video/drm/i915/bitmap.h index 606cc872a8..c32aca489e 100644 --- a/drivers/video/drm/i915/bitmap.h +++ b/drivers/video/drm/i915/bitmap.h @@ -66,6 +66,17 @@ struct io_call_10 /* SRV_CREATE_SURFACE */ u32 format; // reserved mbz }; +struct io_call_12 /* SRV_LOCK_SURFACE */ +{ + u32 handle; // ignored + void *data; // ignored + + u32 width; + u32 height; + u32 pitch; // ignored +}; + + typedef struct { uint32_t idx; @@ -86,5 +97,7 @@ typedef struct int get_driver_caps(hwcaps_t *caps); int create_surface(struct io_call_10 *pbitmap); +int lock_surface(struct io_call_12 *pbitmap); + int init_bitmaps(); diff --git a/drivers/video/drm/i915/execbuffer.c b/drivers/video/drm/i915/execbuffer.c new file mode 100644 index 0000000000..e6f8a7e7fe --- /dev/null +++ b/drivers/video/drm/i915/execbuffer.c @@ -0,0 +1,376 @@ +#include "drmP.h" +#include "drm.h" +#include "i915_drm.h" +#include "i915_drv.h" +#include "intel_drv.h" +//#include + +#undef mb +#undef rmb +#undef wmb +#define mb() asm volatile("mfence") +#define rmb() asm volatile ("lfence") +#define wmb() asm volatile ("sfence") + + +typedef struct +{ + struct drm_i915_gem_object *batch; + struct list_head objects; + u32 exec_start; + u32 exec_len; + +}batchbuffer_t; + +struct change_domains { + uint32_t invalidate_domains; + uint32_t flush_domains; + uint32_t flush_rings; + uint32_t flips; +}; + +/* + * Set the next domain for the specified object. This + * may not actually perform the necessary flushing/invaliding though, + * as that may want to be batched with other set_domain operations + * + * This is (we hope) the only really tricky part of gem. The goal + * is fairly simple -- track which caches hold bits of the object + * and make sure they remain coherent. A few concrete examples may + * help to explain how it works. For shorthand, we use the notation + * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the + * a pair of read and write domain masks. + * + * Case 1: the batch buffer + * + * 1. Allocated + * 2. Written by CPU + * 3. Mapped to GTT + * 4. Read by GPU + * 5. Unmapped from GTT + * 6. Freed + * + * Let's take these a step at a time + * + * 1. Allocated + * Pages allocated from the kernel may still have + * cache contents, so we set them to (CPU, CPU) always. + * 2. Written by CPU (using pwrite) + * The pwrite function calls set_domain (CPU, CPU) and + * this function does nothing (as nothing changes) + * 3. Mapped by GTT + * This function asserts that the object is not + * currently in any GPU-based read or write domains + * 4. Read by GPU + * i915_gem_execbuffer calls set_domain (COMMAND, 0). + * As write_domain is zero, this function adds in the + * current read domains (CPU+COMMAND, 0). + * flush_domains is set to CPU. + * invalidate_domains is set to COMMAND + * clflush is run to get data out of the CPU caches + * then i915_dev_set_domain calls i915_gem_flush to + * emit an MI_FLUSH and drm_agp_chipset_flush + * 5. Unmapped from GTT + * i915_gem_object_unbind calls set_domain (CPU, CPU) + * flush_domains and invalidate_domains end up both zero + * so no flushing/invalidating happens + * 6. Freed + * yay, done + * + * Case 2: The shared render buffer + * + * 1. Allocated + * 2. Mapped to GTT + * 3. Read/written by GPU + * 4. set_domain to (CPU,CPU) + * 5. Read/written by CPU + * 6. Read/written by GPU + * + * 1. Allocated + * Same as last example, (CPU, CPU) + * 2. Mapped to GTT + * Nothing changes (assertions find that it is not in the GPU) + * 3. Read/written by GPU + * execbuffer calls set_domain (RENDER, RENDER) + * flush_domains gets CPU + * invalidate_domains gets GPU + * clflush (obj) + * MI_FLUSH and drm_agp_chipset_flush + * 4. set_domain (CPU, CPU) + * flush_domains gets GPU + * invalidate_domains gets CPU + * wait_rendering (obj) to make sure all drawing is complete. + * This will include an MI_FLUSH to get the data from GPU + * to memory + * clflush (obj) to invalidate the CPU cache + * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) + * 5. Read/written by CPU + * cache lines are loaded and dirtied + * 6. Read written by GPU + * Same as last GPU access + * + * Case 3: The constant buffer + * + * 1. Allocated + * 2. Written by CPU + * 3. Read by GPU + * 4. Updated (written) by CPU again + * 5. Read by GPU + * + * 1. Allocated + * (CPU, CPU) + * 2. Written by CPU + * (CPU, CPU) + * 3. Read by GPU + * (CPU+RENDER, 0) + * flush_domains = CPU + * invalidate_domains = RENDER + * clflush (obj) + * MI_FLUSH + * drm_agp_chipset_flush + * 4. Updated (written) by CPU again + * (CPU, CPU) + * flush_domains = 0 (no previous write domain) + * invalidate_domains = 0 (no new read domains) + * 5. Read by GPU + * (CPU+RENDER, 0) + * flush_domains = CPU + * invalidate_domains = RENDER + * clflush (obj) + * MI_FLUSH + * drm_agp_chipset_flush + */ +static void +i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj, + struct intel_ring_buffer *ring, + struct change_domains *cd) +{ + uint32_t invalidate_domains = 0, flush_domains = 0; + + /* + * If the object isn't moving to a new write domain, + * let the object stay in multiple read domains + */ + if (obj->base.pending_write_domain == 0) + obj->base.pending_read_domains |= obj->base.read_domains; + + /* + * Flush the current write domain if + * the new read domains don't match. Invalidate + * any read domains which differ from the old + * write domain + */ + if (obj->base.write_domain && + (((obj->base.write_domain != obj->base.pending_read_domains || + obj->ring != ring)) || + (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) { + flush_domains |= obj->base.write_domain; + invalidate_domains |= + obj->base.pending_read_domains & ~obj->base.write_domain; + } + /* + * Invalidate any read caches which may have + * stale data. That is, any new read domains. + */ + invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains; + if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) + i915_gem_clflush_object(obj); + + if (obj->base.pending_write_domain) + cd->flips |= atomic_read(&obj->pending_flip); + + /* The actual obj->write_domain will be updated with + * pending_write_domain after we emit the accumulated flush for all + * of our domain changes in execbuffers (which clears objects' + * write_domains). So if we have a current write domain that we + * aren't changing, set pending_write_domain to that. + */ + if (flush_domains == 0 && obj->base.pending_write_domain == 0) + obj->base.pending_write_domain = obj->base.write_domain; + + cd->invalidate_domains |= invalidate_domains; + cd->flush_domains |= flush_domains; + if (flush_domains & I915_GEM_GPU_DOMAINS) + cd->flush_rings |= obj->ring->id; + if (invalidate_domains & I915_GEM_GPU_DOMAINS) + cd->flush_rings |= ring->id; +} + +static int +i915_gem_execbuffer_flush(struct drm_device *dev, + uint32_t invalidate_domains, + uint32_t flush_domains, + uint32_t flush_rings) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + int i, ret; + + if (flush_domains & I915_GEM_DOMAIN_CPU) + intel_gtt_chipset_flush(); + + if (flush_domains & I915_GEM_DOMAIN_GTT) + wmb(); + + if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) { + for (i = 0; i < I915_NUM_RINGS; i++) + if (flush_rings & (1 << i)) { + ret = i915_gem_flush_ring(&dev_priv->ring[i], + invalidate_domains, + flush_domains); + if (ret) + return ret; + } + } + + return 0; +} + +static int +i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, + struct list_head *objects) +{ + struct drm_i915_gem_object *obj; + struct change_domains cd; + int ret; + + memset(&cd, 0, sizeof(cd)); + list_for_each_entry(obj, objects, exec_list) + i915_gem_object_set_to_gpu_domain(obj, ring, &cd); + + if (cd.invalidate_domains | cd.flush_domains) { + ret = i915_gem_execbuffer_flush(ring->dev, + cd.invalidate_domains, + cd.flush_domains, + cd.flush_rings); + if (ret) + return ret; + } + +// if (cd.flips) { +// ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips); +// if (ret) +// return ret; +// } + +// list_for_each_entry(obj, objects, exec_list) { +// ret = i915_gem_execbuffer_sync_rings(obj, ring); +// if (ret) +// return ret; +// } + + return 0; +} + +static void +i915_gem_execbuffer_move_to_active(struct list_head *objects, + struct intel_ring_buffer *ring, + u32 seqno) +{ + struct drm_i915_gem_object *obj; + + list_for_each_entry(obj, objects, exec_list) { + u32 old_read = obj->base.read_domains; + u32 old_write = obj->base.write_domain; + + + obj->base.read_domains = obj->base.pending_read_domains; + obj->base.write_domain = obj->base.pending_write_domain; + obj->fenced_gpu_access = obj->pending_fenced_gpu_access; + + i915_gem_object_move_to_active(obj, ring, seqno); + if (obj->base.write_domain) { + obj->dirty = 1; + obj->pending_gpu_write = true; + list_move_tail(&obj->gpu_write_list, + &ring->gpu_write_list); +// intel_mark_busy(ring->dev, obj); + } + +// trace_i915_gem_object_change_domain(obj, old_read, old_write); + } +} + +static void +i915_gem_execbuffer_retire_commands(struct drm_device *dev, + struct intel_ring_buffer *ring) +{ + struct drm_i915_gem_request *request; + u32 invalidate; + + /* + * Ensure that the commands in the batch buffer are + * finished before the interrupt fires. + * + * The sampler always gets flushed on i965 (sigh). + */ + invalidate = I915_GEM_DOMAIN_COMMAND; + if (INTEL_INFO(dev)->gen >= 4) + invalidate |= I915_GEM_DOMAIN_SAMPLER; + if (ring->flush(ring, invalidate, 0)) { + i915_gem_next_request_seqno(ring); + return; + } + + /* Add a breadcrumb for the completion of the batch buffer */ + request = kzalloc(sizeof(*request), GFP_KERNEL); + if (request == NULL || i915_add_request(ring, NULL, request)) { + i915_gem_next_request_seqno(ring); + kfree(request); + } +} + + +int exec_batch(struct drm_device *dev, struct intel_ring_buffer *ring, + batchbuffer_t *exec) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_gem_object *obj; + + u32 seqno; + int i; + int ret; + + ring = &dev_priv->ring[RCS]; + + mutex_lock(&dev->struct_mutex); + + list_for_each_entry(obj, &exec->objects, exec_list) + { + obj->base.pending_read_domains = 0; + obj->base.pending_write_domain = 0; + }; + + exec->batch->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; + + ret = i915_gem_execbuffer_move_to_gpu(ring, &exec->objects); + if (ret) + goto err; + + seqno = i915_gem_next_request_seqno(ring); +// for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) { +// if (seqno < ring->sync_seqno[i]) { + /* The GPU can not handle its semaphore value wrapping, + * so every billion or so execbuffers, we need to stall + * the GPU in order to reset the counters. + */ +// ret = i915_gpu_idle(dev); +// if (ret) +// goto err; + +// BUG_ON(ring->sync_seqno[i]); +// } +// }; + + ret = ring->dispatch_execbuffer(ring, exec->exec_start, exec->exec_len); + if (ret) + goto err; + + i915_gem_execbuffer_move_to_active(&exec->objects, ring, seqno); + i915_gem_execbuffer_retire_commands(dev, ring); + +err: + mutex_unlock(&dev->struct_mutex); + + return ret; + +}; diff --git a/drivers/video/drm/i915/i915_dma.c b/drivers/video/drm/i915/i915_dma.c index 3ffb06c8c0..6fe9c5a163 100644 --- a/drivers/video/drm/i915/i915_dma.c +++ b/drivers/video/drm/i915/i915_dma.c @@ -76,7 +76,7 @@ static int i915_init_phys_hws(struct drm_device *dev) /* Program Hardware Status Page */ dev_priv->status_page_dmah = - drm_pci_alloc(dev, PAGE_SIZE, PAGE_SIZE); + (void*)drm_pci_alloc(dev, PAGE_SIZE, PAGE_SIZE); if (!dev_priv->status_page_dmah) { DRM_ERROR("Can not allocate hardware status page\n"); diff --git a/drivers/video/drm/i915/i915_drv.c b/drivers/video/drm/i915/i915_drv.c index a09f025292..485da8ed09 100644 --- a/drivers/video/drm/i915/i915_drv.c +++ b/drivers/video/drm/i915/i915_drv.c @@ -74,26 +74,6 @@ int i915_vbt_sdvo_panel_type __read_mostly = -1; .subdevice = PCI_ANY_ID, \ .driver_data = (unsigned long) info } -static const struct intel_device_info intel_i830_info = { - .gen = 2, .is_mobile = 1, .cursor_needs_physical = 1, - .has_overlay = 1, .overlay_needs_physical = 1, -}; - -static const struct intel_device_info intel_845g_info = { - .gen = 2, - .has_overlay = 1, .overlay_needs_physical = 1, -}; - -static const struct intel_device_info intel_i85x_info = { - .gen = 2, .is_i85x = 1, .is_mobile = 1, - .cursor_needs_physical = 1, - .has_overlay = 1, .overlay_needs_physical = 1, -}; - -static const struct intel_device_info intel_i865g_info = { - .gen = 2, - .has_overlay = 1, .overlay_needs_physical = 1, -}; static const struct intel_device_info intel_i915g_info = { .gen = 3, .is_i915g = 1, .cursor_needs_physical = 1, diff --git a/drivers/video/drm/i915/i915_drv.h b/drivers/video/drm/i915/i915_drv.h index 32aedc4d8b..a606c7bb6c 100644 --- a/drivers/video/drm/i915/i915_drv.h +++ b/drivers/video/drm/i915/i915_drv.h @@ -691,7 +691,7 @@ typedef struct drm_i915_private { struct drm_crtc *plane_to_crtc_mapping[3]; struct drm_crtc *pipe_to_crtc_mapping[3]; -// wait_queue_head_t pending_flip_queue; + wait_queue_head_t pending_flip_queue; bool flip_pending_is_done; /* Reclocking support */ diff --git a/drivers/video/drm/i915/i915_gem.c b/drivers/video/drm/i915/i915_gem.c index a16ff39d2d..44644adfa8 100644 --- a/drivers/video/drm/i915/i915_gem.c +++ b/drivers/video/drm/i915/i915_gem.c @@ -127,8 +127,8 @@ static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_file *file); static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj); -static int i915_gem_inactive_shrink(struct shrinker *shrinker, - struct shrink_control *sc); +//static int i915_gem_inactive_shrink(struct shrinker *shrinker, +// struct shrink_control *sc); /* some bookkeeping */ static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, @@ -190,6 +190,7 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) WARN_ON(i915_verify_lists(dev)); return 0; } +#endif static inline bool i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) @@ -197,8 +198,6 @@ i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) return obj->gtt_space && !obj->active && obj->pin_count == 0; } -#endif - void i915_gem_do_init(struct drm_device *dev, unsigned long start, unsigned long mappable_end, @@ -780,7 +779,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj, err_pages: while (i--) - FreePage(obj->pages[i]); + FreePage((addr_t)obj->pages[i]); free(obj->pages); obj->pages = NULL; @@ -802,7 +801,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) obj->dirty = 0; for (i = 0; i < page_count; i++) { - FreePage(obj->pages[i]); + FreePage((addr_t)obj->pages[i]); } obj->dirty = 0; @@ -864,9 +863,30 @@ i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj) i915_gem_object_move_off_active(obj); } +static void +i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) +{ + struct drm_device *dev = obj->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + if (obj->pin_count != 0) + list_move_tail(&obj->mm_list, &dev_priv->mm.pinned_list); + else + list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); + BUG_ON(!list_empty(&obj->gpu_write_list)); + BUG_ON(!obj->active); + obj->ring = NULL; + i915_gem_object_move_off_active(obj); + obj->fenced_gpu_access = false; + + obj->active = 0; + obj->pending_gpu_write = false; + drm_gem_object_unreference(&obj->base); + + WARN_ON(i915_verify_lists(dev)); +} /* Immediately discard the backing storage */ static void @@ -906,10 +926,53 @@ i915_gem_process_flushing_list(struct intel_ring_buffer *ring, i915_gem_object_move_to_active(obj, ring, i915_gem_next_request_seqno(ring)); + trace_i915_gem_object_change_domain(obj, + obj->base.read_domains, + old_write_domain); } } } +int +i915_add_request(struct intel_ring_buffer *ring, + struct drm_file *file, + struct drm_i915_gem_request *request) +{ + drm_i915_private_t *dev_priv = ring->dev->dev_private; + uint32_t seqno; + int was_empty; + int ret; + + BUG_ON(request == NULL); + + ret = ring->add_request(ring, &seqno); + if (ret) + return ret; + + trace_i915_gem_request_add(ring, seqno); + + request->seqno = seqno; + request->ring = ring; + request->emitted_jiffies = jiffies; + was_empty = list_empty(&ring->request_list); + list_add_tail(&request->list, &ring->request_list); + + + ring->outstanding_lazy_request = false; + +// if (!dev_priv->mm.suspended) { +// if (i915_enable_hangcheck) { +// mod_timer(&dev_priv->hangcheck_timer, +// jiffies + +// msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)); +// } +// if (was_empty) +// queue_delayed_work(dev_priv->wq, +// &dev_priv->mm.retire_work, HZ); +// } + return 0; +} + @@ -921,6 +984,93 @@ i915_gem_process_flushing_list(struct intel_ring_buffer *ring, +/** + * This function clears the request list as sequence numbers are passed. + */ +static void +i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) +{ + uint32_t seqno; + int i; + + if (list_empty(&ring->request_list)) + return; + + WARN_ON(i915_verify_lists(ring->dev)); + + seqno = ring->get_seqno(ring); + + for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) + if (seqno >= ring->sync_seqno[i]) + ring->sync_seqno[i] = 0; + + while (!list_empty(&ring->request_list)) { + struct drm_i915_gem_request *request; + + request = list_first_entry(&ring->request_list, + struct drm_i915_gem_request, + list); + + if (!i915_seqno_passed(seqno, request->seqno)) + break; + + trace_i915_gem_request_retire(ring, request->seqno); + + list_del(&request->list); + kfree(request); + } + + /* Move any buffers on the active list that are no longer referenced + * by the ringbuffer to the flushing/inactive lists as appropriate. + */ + while (!list_empty(&ring->active_list)) { + struct drm_i915_gem_object *obj; + + obj = list_first_entry(&ring->active_list, + struct drm_i915_gem_object, + ring_list); + + if (!i915_seqno_passed(seqno, obj->last_rendering_seqno)) + break; + + if (obj->base.write_domain != 0) + i915_gem_object_move_to_flushing(obj); + else + i915_gem_object_move_to_inactive(obj); + } + + if (unlikely(ring->trace_irq_seqno && + i915_seqno_passed(seqno, ring->trace_irq_seqno))) { + ring->irq_put(ring); + ring->trace_irq_seqno = 0; + } + + WARN_ON(i915_verify_lists(ring->dev)); +} + +void +i915_gem_retire_requests(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + int i; + + if (!list_empty(&dev_priv->mm.deferred_free_list)) { + struct drm_i915_gem_object *obj, *next; + + /* We must be careful that during unbind() we do not + * accidentally infinitely recurse into retire requests. + * Currently: + * retire -> free -> unbind -> wait -> retire_ring + */ + list_for_each_entry_safe(obj, next, + &dev_priv->mm.deferred_free_list, + mm_list) + i915_gem_free_object_tail(obj); + } + + for (i = 0; i < I915_NUM_RINGS; i++) + i915_gem_retire_requests_ring(&dev_priv->ring[i]); +} @@ -931,22 +1081,97 @@ i915_gem_process_flushing_list(struct intel_ring_buffer *ring, +/** + * Waits for a sequence number to be signaled, and cleans up the + * request and object lists appropriately for that event. + */ +int +i915_wait_request(struct intel_ring_buffer *ring, + uint32_t seqno) +{ + drm_i915_private_t *dev_priv = ring->dev->dev_private; + u32 ier; + int ret = 0; + BUG_ON(seqno == 0); +// if (atomic_read(&dev_priv->mm.wedged)) { +// struct completion *x = &dev_priv->error_completion; +// bool recovery_complete; +// unsigned long flags; + /* Give the error handler a chance to run. */ +// spin_lock_irqsave(&x->wait.lock, flags); +// recovery_complete = x->done > 0; +// spin_unlock_irqrestore(&x->wait.lock, flags); +// +// return recovery_complete ? -EIO : -EAGAIN; +// } + if (seqno == ring->outstanding_lazy_request) { + struct drm_i915_gem_request *request; + request = kzalloc(sizeof(*request), GFP_KERNEL); + if (request == NULL) + return -ENOMEM; + ret = i915_add_request(ring, NULL, request); + if (ret) { + kfree(request); + return ret; + } + seqno = request->seqno; + } + if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) { + if (HAS_PCH_SPLIT(ring->dev)) + ier = I915_READ(DEIER) | I915_READ(GTIER); + else + ier = I915_READ(IER); + if (!ier) { + DRM_ERROR("something (likely vbetool) disabled " + "interrupts, re-enabling\n"); +// ring->dev->driver->irq_preinstall(ring->dev); +// ring->dev->driver->irq_postinstall(ring->dev); + } + trace_i915_gem_request_wait_begin(ring, seqno); + ring->waiting_seqno = seqno; + if (ring->irq_get(ring)) { +// printf("enter wait\n"); + wait_event(ring->irq_queue, + i915_seqno_passed(ring->get_seqno(ring), seqno) + || atomic_read(&dev_priv->mm.wedged)); + ring->irq_put(ring); + } else if (wait_for_atomic(i915_seqno_passed(ring->get_seqno(ring), + seqno) || + atomic_read(&dev_priv->mm.wedged), 3000)) + ret = -EBUSY; + ring->waiting_seqno = 0; + trace_i915_gem_request_wait_end(ring, seqno); + } + if (atomic_read(&dev_priv->mm.wedged)) + ret = -EAGAIN; + if (ret && ret != -ERESTARTSYS) + DRM_ERROR("%s returns %d (awaiting %d at %d, next %d)\n", + __func__, ret, seqno, ring->get_seqno(ring), + dev_priv->next_seqno); + /* Directly dispatch request retiring. While we have the work queue + * to handle this, the waiter on a request often wants an associated + * buffer to have made it to the inactive list, and we would need + * a separate wait queue to handle that. + */ + if (ret == 0) + i915_gem_retire_requests_ring(ring); - + return ret; +} /** * Ensures that all rendering to the object has completed and the object is @@ -966,9 +1191,9 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj) * it. */ if (obj->active) { -// ret = i915_wait_request(obj->ring, obj->last_rendering_seqno); -// if (ret) -// return ret; + ret = i915_wait_request(obj->ring, obj->last_rendering_seqno); + if (ret) + return ret; } return 0; @@ -1166,10 +1391,10 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj, if (obj->last_fenced_seqno && pipelined != obj->last_fenced_ring) { if (!ring_passed_seqno(obj->last_fenced_ring, obj->last_fenced_seqno)) { -// ret = i915_wait_request(obj->last_fenced_ring, -// obj->last_fenced_seqno); -// if (ret) -// return ret; + ret = i915_wait_request(obj->last_fenced_ring, + obj->last_fenced_seqno); + if (ret) + return ret; } obj->last_fenced_seqno = 0; @@ -1601,7 +1826,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) return 0; } -#if 0 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { @@ -1661,7 +1885,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, obj->cache_level = cache_level; return 0; } -#endif /* * Prepare buffer for display plane (scanout, cursors, etc). @@ -1775,10 +1998,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) i915_gem_object_flush_gtt_write_domain(obj); - /* If we have a partially-valid cache of the object in the CPU, - * finish invalidating it and free the per-page flags. - */ - i915_gem_object_set_to_full_cpu_read_domain(obj); old_write_domain = obj->base.write_domain; old_read_domains = obj->base.read_domains; @@ -1837,7 +2056,10 @@ i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj) - +int gem_object_lock(struct drm_i915_gem_object *obj) +{ + return i915_gem_object_set_to_cpu_domain(obj, true); +} @@ -1867,6 +2089,7 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj, int ret; BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT); + WARN_ON(i915_verify_lists(dev)); #if 0 if (obj->gtt_space != NULL) { diff --git a/drivers/video/drm/i915/i915_gem_gtt.c b/drivers/video/drm/i915/i915_gem_gtt.c index 571b05017c..9b40d8ab6e 100644 --- a/drivers/video/drm/i915/i915_gem_gtt.c +++ b/drivers/video/drm/i915/i915_gem_gtt.c @@ -122,7 +122,6 @@ int i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj) return 0; } -#if 0 void i915_gem_gtt_rebind_object(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { @@ -130,21 +129,20 @@ void i915_gem_gtt_rebind_object(struct drm_i915_gem_object *obj, struct drm_i915_private *dev_priv = dev->dev_private; unsigned int agp_type = cache_level_to_agp_type(dev, cache_level); - if (dev_priv->mm.gtt->needs_dmar) { - BUG_ON(!obj->sg_list); +// if (dev_priv->mm.gtt->needs_dmar) { +// BUG_ON(!obj->sg_list); - intel_gtt_insert_sg_entries(obj->sg_list, - obj->num_sg, - obj->gtt_space->start >> PAGE_SHIFT, - agp_type); - } else +// intel_gtt_insert_sg_entries(obj->sg_list, +// obj->num_sg, +// obj->gtt_space->start >> PAGE_SHIFT, +// agp_type); +// } else intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT, obj->base.size >> PAGE_SHIFT, obj->pages, agp_type); } -#endif void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) { diff --git a/drivers/video/drm/i915/i915_irq.c b/drivers/video/drm/i915/i915_irq.c index fcaddfdfc1..7469bf6dde 100644 --- a/drivers/video/drm/i915/i915_irq.c +++ b/drivers/video/drm/i915/i915_irq.c @@ -35,6 +35,9 @@ #include "i915_trace.h" #include "intel_drv.h" +#define DRM_WAKEUP( queue ) wake_up( queue ) +#define DRM_INIT_WAITQUEUE( queue ) init_waitqueue_head( queue ) + #define MAX_NOPID ((u32)~0) /** @@ -84,6 +87,27 @@ ironlake_disable_display_irq(drm_i915_private_t *dev_priv, u32 mask) POSTING_READ(DEIMR); } } +static void notify_ring(struct drm_device *dev, + struct intel_ring_buffer *ring) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u32 seqno; + + if (ring->obj == NULL) + return; + + seqno = ring->get_seqno(ring); + trace_i915_gem_request_complete(ring, seqno); + + ring->irq_seqno = seqno; + wake_up_all(&ring->irq_queue); +// if (i915_enable_hangcheck) { +// dev_priv->hangcheck_count = 0; +// mod_timer(&dev_priv->hangcheck_timer, +// jiffies + +// msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)); +// } +} @@ -123,12 +147,12 @@ static int ironlake_irq_handler(struct drm_device *dev) ret = IRQ_HANDLED; -// if (gt_iir & (GT_USER_INTERRUPT | GT_PIPE_NOTIFY)) -// notify_ring(dev, &dev_priv->ring[RCS]); -// if (gt_iir & bsd_usr_interrupt) -// notify_ring(dev, &dev_priv->ring[VCS]); -// if (gt_iir & GT_BLT_USER_INTERRUPT) -// notify_ring(dev, &dev_priv->ring[BCS]); + if (gt_iir & (GT_USER_INTERRUPT | GT_PIPE_NOTIFY)) + notify_ring(dev, &dev_priv->ring[RCS]); + if (gt_iir & bsd_usr_interrupt) + notify_ring(dev, &dev_priv->ring[VCS]); + if (gt_iir & GT_BLT_USER_INTERRUPT) + notify_ring(dev, &dev_priv->ring[BCS]); // if (de_iir & DE_GSE) // intel_opregion_gse_intr(dev); @@ -275,11 +299,11 @@ static int ironlake_irq_postinstall(struct drm_device *dev) u32 render_irqs; u32 hotplug_mask; -// DRM_INIT_WAITQUEUE(&dev_priv->ring[RCS].irq_queue); -// if (HAS_BSD(dev)) -// DRM_INIT_WAITQUEUE(&dev_priv->ring[VCS].irq_queue); -// if (HAS_BLT(dev)) -// DRM_INIT_WAITQUEUE(&dev_priv->ring[BCS].irq_queue); + DRM_INIT_WAITQUEUE(&dev_priv->ring[RCS].irq_queue); + if (HAS_BSD(dev)) + DRM_INIT_WAITQUEUE(&dev_priv->ring[VCS].irq_queue); + if (HAS_BLT(dev)) + DRM_INIT_WAITQUEUE(&dev_priv->ring[BCS].irq_queue); dev_priv->vblank_pipe = DRM_I915_VBLANK_PIPE_A | DRM_I915_VBLANK_PIPE_B; dev_priv->irq_mask = ~display_mask; diff --git a/drivers/video/drm/i915/i915_trace.h b/drivers/video/drm/i915/i915_trace.h index 3fcc1c5a1e..24f7fc0827 100644 --- a/drivers/video/drm/i915/i915_trace.h +++ b/drivers/video/drm/i915/i915_trace.h @@ -5,6 +5,7 @@ #include //#include +#define WARN_ON(x) #define trace_i915_gem_object_create(x) #define trace_i915_gem_object_destroy(x) @@ -13,5 +14,10 @@ #define trace_i915_gem_ring_flush(a, b, c) #define trace_i915_gem_object_bind(a, b) #define trace_i915_ring_wait_end(x) +#define trace_i915_gem_request_add(a, b) +#define trace_i915_gem_request_retire(a, b) +#define trace_i915_gem_request_wait_begin(a, b) +#define trace_i915_gem_request_wait_end(a, b) +#define trace_i915_gem_request_complete(a, b) #endif diff --git a/drivers/video/drm/i915/intel_bios.c b/drivers/video/drm/i915/intel_bios.c index 63880e2e5c..cf577248ce 100644 --- a/drivers/video/drm/i915/intel_bios.c +++ b/drivers/video/drm/i915/intel_bios.c @@ -656,7 +656,7 @@ intel_parse_bios(struct drm_device *dev) size_t size; int i; - bios = pci_map_rom(pdev, &size); + bios = (void*)pci_map_rom(pdev, &size); if (!bios) return -1; diff --git a/drivers/video/drm/i915/intel_ringbuffer.c b/drivers/video/drm/i915/intel_ringbuffer.c index 4e417b7f6d..f3224d4946 100644 --- a/drivers/video/drm/i915/intel_ringbuffer.c +++ b/drivers/video/drm/i915/intel_ringbuffer.c @@ -348,14 +348,14 @@ init_pipe_control(struct intel_ring_buffer *ring) goto err; } -// i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); + i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); ret = i915_gem_object_pin(obj, 4096, true); if (ret) goto err_unref; pc->gtt_offset = obj->gtt_offset; - pc->cpu_page = (void*)MapIoMem(obj->pages[0], 4096, PG_SW); + pc->cpu_page = (void*)MapIoMem((addr_t)obj->pages[0], 4096, PG_SW); if (pc->cpu_page == NULL) goto err_unpin; @@ -516,7 +516,7 @@ render_ring_sync_to(struct intel_ring_buffer *waiter, struct intel_ring_buffer *signaller, u32 seqno) { -// WARN_ON(signaller->semaphore_register[RCS] == MI_SEMAPHORE_SYNC_INVALID); + WARN_ON(signaller->semaphore_register[RCS] == MI_SEMAPHORE_SYNC_INVALID); return intel_ring_sync(waiter, signaller, RCS, @@ -529,7 +529,7 @@ gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter, struct intel_ring_buffer *signaller, u32 seqno) { -// WARN_ON(signaller->semaphore_register[VCS] == MI_SEMAPHORE_SYNC_INVALID); + WARN_ON(signaller->semaphore_register[VCS] == MI_SEMAPHORE_SYNC_INVALID); return intel_ring_sync(waiter, signaller, VCS, @@ -542,7 +542,7 @@ gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter, struct intel_ring_buffer *signaller, u32 seqno) { -// WARN_ON(signaller->semaphore_register[BCS] == MI_SEMAPHORE_SYNC_INVALID); + WARN_ON(signaller->semaphore_register[BCS] == MI_SEMAPHORE_SYNC_INVALID); return intel_ring_sync(waiter, signaller, BCS, @@ -969,7 +969,7 @@ static int init_status_page(struct intel_ring_buffer *ring) goto err; } -// i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); + i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); ret = i915_gem_object_pin(obj, 4096, true); if (ret != 0) { @@ -977,7 +977,7 @@ static int init_status_page(struct intel_ring_buffer *ring) } ring->status_page.gfx_addr = obj->gtt_offset; - ring->status_page.page_addr = MapIoMem(obj->pages[0], 4096, PG_SW); + ring->status_page.page_addr = (void*)MapIoMem((addr_t)obj->pages[0], 4096, PG_SW); if (ring->status_page.page_addr == NULL) { memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); goto err_unpin; @@ -1010,7 +1010,7 @@ int intel_init_ring_buffer(struct drm_device *dev, INIT_LIST_HEAD(&ring->request_list); INIT_LIST_HEAD(&ring->gpu_write_list); -// init_waitqueue_head(&ring->irq_queue); + init_waitqueue_head(&ring->irq_queue); spin_lock_init(&ring->irq_lock); ring->irq_mask = ~0; @@ -1175,8 +1175,8 @@ int intel_ring_begin(struct intel_ring_buffer *ring, int n = 4*num_dwords; int ret; -// if (unlikely(atomic_read(&dev_priv->mm.wedged))) -// return -EIO; + if (unlikely(atomic_read(&dev_priv->mm.wedged))) + return -EIO; if (unlikely(ring->tail + n > ring->effective_size)) { ret = intel_wrap_ring_buffer(ring); @@ -1208,7 +1208,7 @@ static const struct intel_ring_buffer render_ring = { .init = init_render_ring, .write_tail = ring_write_tail, .flush = render_ring_flush, - .add_request = render_ring_add_request, + .add_request = render_ring_add_request, .get_seqno = ring_get_seqno, .irq_get = render_ring_get_irq, .irq_put = render_ring_put_irq, @@ -1403,7 +1403,7 @@ static int blt_ring_init(struct intel_ring_buffer *ring) return ret; } - ptr = MapIoMem(obj->pages[0], 4096, PG_SW); + ptr = (void*)MapIoMem((addr_t)obj->pages[0], 4096, PG_SW); obj->mapped = ptr; *ptr++ = MI_BATCH_BUFFER_END; diff --git a/drivers/video/drm/i915/intel_ringbuffer.h b/drivers/video/drm/i915/intel_ringbuffer.h index df2a8208b2..4814e14c05 100644 --- a/drivers/video/drm/i915/intel_ringbuffer.h +++ b/drivers/video/drm/i915/intel_ringbuffer.h @@ -113,7 +113,7 @@ struct intel_ring_buffer { */ u32 outstanding_lazy_request; -// wait_queue_head_t irq_queue; + wait_queue_head_t irq_queue; drm_local_map_t map; void *private; diff --git a/drivers/video/drm/i915/kms_display.c b/drivers/video/drm/i915/kms_display.c index f2639ca1b7..a08fcb38e4 100644 --- a/drivers/video/drm/i915/kms_display.c +++ b/drivers/video/drm/i915/kms_display.c @@ -210,7 +210,7 @@ int init_display_kms(struct drm_device *dev) obj = i915_gem_alloc_object(dev, 4096); i915_gem_object_pin(obj, 4096, true); - cmd_buffer = MapIoMem(obj->pages[0], 4096, PG_SW|PG_NOCACHE); + cmd_buffer = MapIoMem((addr_t)obj->pages[0], 4096, PG_SW|PG_NOCACHE); cmd_offset = obj->gtt_offset; }; #endif @@ -577,7 +577,7 @@ cursor_t* __stdcall select_cursor_kms(cursor_t *cursor) if (!dev_priv->info->cursor_needs_physical) intel_crtc->cursor_addr = cursor->cobj->gtt_offset; else - intel_crtc->cursor_addr = cursor->cobj; + intel_crtc->cursor_addr = (addr_t)cursor->cobj; intel_crtc->cursor_width = 32; intel_crtc->cursor_height = 32; @@ -1003,12 +1003,7 @@ int blit_textured(u32 hbitmap, int dst_x, int dst_y, dst_x+= winrc.left; dst_y+= winrc.top; - i915_gem_object_set_to_gtt_domain(src_bitmap->obj, false); - sna_blit_copy(dst_bitmap, dst_x, dst_y, w, h, src_bitmap, src_x, src_y); - src_bitmap->obj->base.read_domains = I915_GEM_DOMAIN_CPU; - src_bitmap->obj->base.write_domain = I915_GEM_DOMAIN_CPU; - }; diff --git a/drivers/video/drm/i915/main.c b/drivers/video/drm/i915/main.c index d91c66a587..8004ca6be9 100644 --- a/drivers/video/drm/i915/main.c +++ b/drivers/video/drm/i915/main.c @@ -88,14 +88,17 @@ u32_t drvEntry(int action, char *cmdline) #define DISPLAY_VERSION API_VERSION -#define SRV_GETVERSION 0 -#define SRV_ENUM_MODES 1 -#define SRV_SET_MODE 2 -#define SRV_GET_CAPS 3 +#define SRV_GETVERSION 0 +#define SRV_ENUM_MODES 1 +#define SRV_SET_MODE 2 +#define SRV_GET_CAPS 3 -#define SRV_CREATE_SURFACE 10 +#define SRV_CREATE_SURFACE 10 +#define SRV_DESTROY_SURFACE 11 +#define SRV_LOCK_SURFACE 12 +#define SRV_UNLOCK_SURFACE 13 -#define SRV_BLIT_VIDEO 20 +#define SRV_BLIT_VIDEO 20 #define check_input(size) \ if( unlikely((inp==NULL)||(io->inp_size != (size))) ) \ @@ -148,6 +151,9 @@ int _stdcall display_handler(ioctl_t *io) retval = create_surface((struct io_call_10*)inp); break; + case SRV_LOCK_SURFACE: + retval = lock_surface((struct io_call_12*)inp); + break; case SRV_BLIT_VIDEO: // blit_video( inp[0], inp[1], inp[2], @@ -156,6 +162,7 @@ int _stdcall display_handler(ioctl_t *io) blit_textured( inp[0], inp[1], inp[2], inp[3], inp[4], inp[5], inp[6]); + retval = 0; break; }; diff --git a/drivers/video/drm/i915/sna/gen6_render.c b/drivers/video/drm/i915/sna/gen6_render.c index a60b1dc423..1c4d2a8b08 100644 --- a/drivers/video/drm/i915/sna/gen6_render.c +++ b/drivers/video/drm/i915/sna/gen6_render.c @@ -369,29 +369,12 @@ gen6_emit_state_base_address(struct sna *sna) OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (10 - 2)); OUT_BATCH(0); /* general */ -// OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */ -// sna->kgem.nbatch, -// NULL, -// I915_GEM_DOMAIN_INSTRUCTION << 16, -// BASE_ADDRESS_MODIFY)); - OUT_BATCH((sna->kgem.batch_obj->gtt_offset+ sna->kgem.batch_idx*4096)|BASE_ADDRESS_MODIFY); -// OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */ -// sna->kgem.nbatch, -// sna->render_state.gen6.general_bo, -// I915_GEM_DOMAIN_INSTRUCTION << 16, -// BASE_ADDRESS_MODIFY)); - OUT_BATCH(sna->render_state.gen6.general_bo->gaddr|BASE_ADDRESS_MODIFY); OUT_BATCH(0); /* indirect */ -// OUT_BATCH(kgem_add_reloc(&sna->kgem, -// sna->kgem.nbatch, -// sna->render_state.gen6.general_bo, -// I915_GEM_DOMAIN_INSTRUCTION << 16, -// BASE_ADDRESS_MODIFY)); OUT_BATCH(sna->render_state.gen6.general_bo->gaddr|BASE_ADDRESS_MODIFY); @@ -493,7 +476,7 @@ gen6_emit_invariant(struct sna *sna) OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2)); OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | - GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ + GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ OUT_BATCH(0); OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2)); @@ -980,23 +963,10 @@ static void gen6_vertex_close(struct sna *sna) DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, i, sna->render.vertex_reloc[i])); -// sna->kgem.batch[sna->render.vertex_reloc[i]] = -// kgem_add_reloc(&sna->kgem, -// sna->render.vertex_reloc[i], -// bo, -// I915_GEM_DOMAIN_VERTEX << 16, -// delta); sna->kgem.batch[sna->render.vertex_reloc[i]] = sna->kgem.batch_obj->gtt_offset+delta+ sna->kgem.batch_idx*4096; -// sna->kgem.batch[sna->render.vertex_reloc[i]+1] = -// kgem_add_reloc(&sna->kgem, -// sna->render.vertex_reloc[i]+1, -// bo, -// I915_GEM_DOMAIN_VERTEX << 16, -// delta + sna->render.vertex_used * 4 - 1); - sna->kgem.batch[sna->render.vertex_reloc[i]+1] = sna->kgem.batch_obj->gtt_offset+delta+ sna->kgem.batch_idx*4096+ @@ -1681,7 +1651,7 @@ gen6_render_copy_blt(struct sna *sna, int16_t dx, int16_t dy) { if (unlikely(!gen6_get_rectangles(sna, op, 1))) { - _kgem_submit(&sna->kgem); +// _kgem_submit(&sna->kgem); gen6_emit_copy_state(sna, op); gen6_get_rectangles(sna, op, 1); } @@ -1768,8 +1738,6 @@ gen6_render_copy(struct sna *sna, uint8_t alu, gen6_render_copy_blt(sna, &op, src_x, src_y, w, h, dst_x, dst_y); gen6_render_copy_done(sna); - _kgem_submit(&sna->kgem); - return TRUE; } @@ -1856,7 +1824,7 @@ gen6_render_clear(struct sna *sna, bitmap_t *dst, struct kgem_bo *bo) gen6_align_vertex(sna, &tmp); if (unlikely(!gen6_get_rectangles(sna, &tmp, 1))) { - _kgem_submit(&sna->kgem); +// _kgem_submit(&sna->kgem); gen6_emit_fill_state(sna, &tmp); gen6_get_rectangles(sna, &tmp, 1); } @@ -1876,7 +1844,7 @@ gen6_render_clear(struct sna *sna, bitmap_t *dst, struct kgem_bo *bo) gen6_vertex_flush(sna); // kgem_bo_destroy(&sna->kgem, tmp.src.bo); // gen6_render_composite_done(sna, &tmp); - _kgem_submit(&sna->kgem); +// _kgem_submit(&sna->kgem); return TRUE; } diff --git a/drivers/video/drm/i915/sna/kgem.c b/drivers/video/drm/i915/sna/kgem.c index df0d067e00..60ecf8b304 100644 --- a/drivers/video/drm/i915/sna/kgem.c +++ b/drivers/video/drm/i915/sna/kgem.c @@ -970,10 +970,10 @@ static int compact_batch_surface(struct kgem *kgem) return size; } -void execute_buffer (struct drm_i915_gem_object *buffer, uint32_t offset, - int size); +int exec_batch(struct drm_device *dev, struct intel_ring_buffer *ring, + batchbuffer_t *exec); -void _kgem_submit(struct kgem *kgem) +void _kgem_submit(struct kgem *kgem, batchbuffer_t *exb) { struct kgem_request *rq; uint32_t batch_end; @@ -1020,7 +1020,11 @@ void _kgem_submit(struct kgem *kgem) }; #endif - execute_buffer(kgem->batch_obj, kgem->batch_idx*4096, sizeof(uint32_t)*kgem->nbatch); + exb->batch = kgem->batch_obj; + exb->exec_start = kgem->batch_obj->gtt_offset+kgem->batch_idx*4096; + exb->exec_len = sizeof(uint32_t)*kgem->nbatch; + + exec_batch(main_device, NULL, exb); // if (kgem->wedged) // kgem_cleanup(kgem); diff --git a/drivers/video/drm/i915/sna/kgem.h b/drivers/video/drm/i915/sna/kgem.h index 06215aa161..1751048d41 100644 --- a/drivers/video/drm/i915/sna/kgem.h +++ b/drivers/video/drm/i915/sna/kgem.h @@ -178,6 +178,15 @@ struct kgem { struct drm_i915_gem_relocation_entry reloc[384]; }; +typedef struct +{ + struct drm_i915_gem_object *batch; + struct list_head objects; + u32 exec_start; + u32 exec_len; + +}batchbuffer_t; + #define KGEM_BATCH_RESERVED 1 #define KGEM_RELOC_RESERVED 4 #define KGEM_EXEC_RESERVED 1 @@ -237,12 +246,12 @@ void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset); bool kgem_retire(struct kgem *kgem); -void _kgem_submit(struct kgem *kgem); -static inline void kgem_submit(struct kgem *kgem) -{ - if (kgem->nbatch) - _kgem_submit(kgem); -} +void _kgem_submit(struct kgem *kgem, batchbuffer_t *exb); +//static inline void kgem_submit(struct kgem *kgem) +//{ +// if (kgem->nbatch) +// _kgem_submit(kgem); +//} /* static inline void kgem_bo_submit(struct kgem *kgem, struct kgem_bo *bo) @@ -328,8 +337,8 @@ static inline bool kgem_check_batch_with_surfaces(struct kgem *kgem, static inline uint32_t *kgem_get_batch(struct kgem *kgem, int num_dwords) { - if (!kgem_check_batch(kgem, num_dwords)) - _kgem_submit(kgem); +// if (!kgem_check_batch(kgem, num_dwords)) +// _kgem_submit(kgem); return kgem->batch + kgem->nbatch; } diff --git a/drivers/video/drm/i915/sna/sna.c b/drivers/video/drm/i915/sna/sna.c index bffcb1eddf..d14568b66c 100644 --- a/drivers/video/drm/i915/sna/sna.c +++ b/drivers/video/drm/i915/sna/sna.c @@ -297,16 +297,20 @@ done: return kgem_bo_reference(cache->bo[i]); } - int sna_blit_copy(bitmap_t *dst_bitmap, int dst_x, int dst_y, int w, int h, bitmap_t *src_bitmap, int src_x, int src_y) { + batchbuffer_t execbuffer; + struct kgem_bo src_bo, dst_bo; + memset(&execbuffer, 0, sizeof(execbuffer)); memset(&src_bo, 0, sizeof(src_bo)); memset(&dst_bo, 0, sizeof(dst_bo)); + INIT_LIST_HEAD(&execbuffer.objects); + src_bo.gaddr = src_bitmap->gaddr; src_bo.pitch = src_bitmap->pitch; src_bo.tiling = 0; @@ -318,6 +322,12 @@ int sna_blit_copy(bitmap_t *dst_bitmap, int dst_x, int dst_y, sna_device->render.copy(sna_device, 0, src_bitmap, &src_bo, dst_bitmap, &dst_bo, dst_x, dst_y, src_x, src_y, w, h); + + INIT_LIST_HEAD(&execbuffer.objects); + list_add_tail(&src_bitmap->obj->exec_list, &execbuffer.objects); + + _kgem_submit(&sna_device->kgem, &execbuffer); + };