From 75968b0534dddbf1050cb0e14cf4cee04ce778e8 Mon Sep 17 00:00:00 2001 From: "Sergey Semyonov (Serge)" Date: Sat, 23 Feb 2013 09:47:31 +0000 Subject: [PATCH] Intel-2D: sna_blit_copy i915: i915_gem_execbuffer2. It's full of bugs! git-svn-id: svn://kolibrios.org@3263 a494cfbc-eb01-0410-851d-a64ba20cac60 --- drivers/video/Intel-2D/gen6_render.c | 158 ++- drivers/video/Intel-2D/i915_drm.h | 4 +- drivers/video/Intel-2D/kgem-sna.c | 348 +++++- drivers/video/Intel-2D/kgem.h | 2 +- drivers/video/Intel-2D/sna.c | 83 +- drivers/video/Intel-2D/sna.h | 17 +- drivers/video/Intel-2D/sna_render.h | 37 +- drivers/video/Intel-2D/utils.c | 150 +++ drivers/video/drm/i915/i915_dma.c | 5 + drivers/video/drm/i915/i915_drv.c | 8 +- drivers/video/drm/i915/i915_gem.c | 78 +- drivers/video/drm/i915/i915_gem_execbuffer.c | 1171 ++++++++++++++++++ drivers/video/drm/i915/i915_trace.h | 2 + drivers/video/drm/i915/intel_display.c | 4 + drivers/video/drm/i915/intel_fb.c | 14 +- drivers/video/drm/i915/kms_display.c | 17 + drivers/video/drm/i915/main.c | 26 +- drivers/video/drm/i915/utils.c | 38 + 18 files changed, 2066 insertions(+), 96 deletions(-) create mode 100644 drivers/video/Intel-2D/utils.c create mode 100644 drivers/video/drm/i915/i915_gem_execbuffer.c diff --git a/drivers/video/Intel-2D/gen6_render.c b/drivers/video/Intel-2D/gen6_render.c index 82ea4d8c57..afec942b0d 100644 --- a/drivers/video/Intel-2D/gen6_render.c +++ b/drivers/video/Intel-2D/gen6_render.c @@ -1149,6 +1149,9 @@ static bool gen6_rectangle_begin(struct sna *sna, int id = 1 << GEN6_VERTEX(op->u.gen6.flags); int ndwords; + if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) + return true; + ndwords = op->need_magic_ca_pass ? 60 : 6; if ((sna->render.vb_id & id) == 0) ndwords += 5; @@ -1165,6 +1168,12 @@ static bool gen6_rectangle_begin(struct sna *sna, static int gen6_get_rectangles__flush(struct sna *sna, const struct sna_composite_op *op) { + /* Preventing discarding new vbo after lock contention */ + if (sna_vertex_wait__locked(&sna->render)) { + int rem = vertex_space(sna); + if (rem > op->floats_per_rect) + return rem; + } if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 5)) return 0; @@ -1218,7 +1227,7 @@ flush: gen4_vertex_flush(sna); gen6_magic_ca_pass(sna, op); } -// sna_vertex_wait__locked(&sna->render); + sna_vertex_wait__locked(&sna->render); _kgem_submit(&sna->kgem); emit_state(sna, op); goto start; @@ -2014,7 +2023,17 @@ gen6_render_composite(struct sna *sna, } tmp->done = gen6_render_composite_done; - + kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); + if (!kgem_check_bo(&sna->kgem, + tmp->dst.bo, tmp->src.bo, tmp->mask.bo, + NULL)) { + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, + tmp->dst.bo, tmp->src.bo, tmp->mask.bo, + NULL)) + goto cleanup_mask; + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } gen6_emit_composite_state(sna, tmp); gen6_align_vertex(sna, tmp); @@ -2654,6 +2673,13 @@ fallback: assert(GEN6_SAMPLER(op->base.u.gen6.flags) == COPY_SAMPLER); assert(GEN6_VERTEX(op->base.u.gen6.flags) == COPY_VERTEX); + kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); + if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) + goto fallback; + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } gen6_emit_copy_state(sna, &op->base); gen6_align_vertex(sna, &op->base); @@ -3149,7 +3175,13 @@ gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); - + if (!kgem_check_bo(&sna->kgem, bo, NULL)) { + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, bo, NULL)) { + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + return false; + } + } gen6_emit_fill_state(sna, &tmp); gen6_align_vertex(sna, &tmp); @@ -3173,6 +3205,7 @@ gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) return true; } +#endif static void gen6_render_flush(struct sna *sna) { @@ -3182,7 +3215,17 @@ static void gen6_render_flush(struct sna *sna) assert(sna->render.vertex_offset == 0); } -#endif +static void +gen6_render_context_switch(struct kgem *kgem, + int new_mode) +{ + if (kgem->nbatch) { + DBG(("%s: from %d to %d\n", __FUNCTION__, kgem->mode, new_mode)); + _kgem_submit(kgem); + } + + kgem->ring = new_mode; +} static void gen6_render_retire(struct kgem *kgem) @@ -3200,6 +3243,23 @@ gen6_render_retire(struct kgem *kgem) } } +static void +gen6_render_expire(struct kgem *kgem) +{ + struct sna *sna; + + sna = container_of(kgem, struct sna, kgem); + if (sna->render.vbo && !sna->render.vertex_used) { + DBG(("%s: discarding vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); + kgem_bo_destroy(kgem, sna->render.vbo); + assert(!sna->render.active); + sna->render.vbo = NULL; + sna->render.vertices = sna->render.vertex_data; + sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); + sna->render.vertex_used = 0; + sna->render.vertex_index = 0; + } +} static void gen6_render_reset(struct sna *sna) { @@ -3319,8 +3379,9 @@ bool gen6_render_init(struct sna *sna) if (!gen6_render_setup(sna)) return false; -// sna->kgem.context_switch = gen6_render_context_switch; + sna->kgem.context_switch = gen6_render_context_switch; sna->kgem.retire = gen6_render_retire; + sna->kgem.expire = gen6_render_expire; // sna->render.composite = gen6_render_composite; // sna->render.video = gen6_render_video; @@ -3445,3 +3506,90 @@ int gen4_vertex_finish(struct sna *sna) return sna->render.vertex_size - sna->render.vertex_used; } +void gen4_vertex_close(struct sna *sna) +{ + struct kgem_bo *bo, *free_bo = NULL; + unsigned int i, delta = 0; + + assert(sna->render.vertex_offset == 0); + if (!sna->render.vb_id) + return; + + DBG(("%s: used=%d, vbo active? %d, vb=%x, nreloc=%d\n", + __FUNCTION__, sna->render.vertex_used, sna->render.vbo ? sna->render.vbo->handle : 0, + sna->render.vb_id, sna->render.nvertex_reloc)); + + assert(!sna->render.active); + + bo = sna->render.vbo; + if (bo) { + if (sna->render.vertex_size - sna->render.vertex_used < 64) { + DBG(("%s: discarding vbo (full), handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); + sna->render.vbo = NULL; + sna->render.vertices = sna->render.vertex_data; + sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); + free_bo = bo; + } else if (IS_CPU_MAP(bo->map) && !sna->kgem.has_llc) { + DBG(("%s: converting CPU map to GTT\n", __FUNCTION__)); + sna->render.vertices = + kgem_bo_map__gtt(&sna->kgem, sna->render.vbo); + if (sna->render.vertices == NULL) { + sna->render.vbo = NULL; + sna->render.vertices = sna->render.vertex_data; + sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); + free_bo = bo; + } + + } + } else { + if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) { + DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__, + sna->render.vertex_used, sna->kgem.nbatch)); + memcpy(sna->kgem.batch + sna->kgem.nbatch, + sna->render.vertex_data, + sna->render.vertex_used * 4); + delta = sna->kgem.nbatch * 4; + bo = NULL; + sna->kgem.nbatch += sna->render.vertex_used; + } else { + bo = kgem_create_linear(&sna->kgem, + 4*sna->render.vertex_used, + CREATE_NO_THROTTLE); + if (bo && !kgem_bo_write(&sna->kgem, bo, + sna->render.vertex_data, + 4*sna->render.vertex_used)) { + kgem_bo_destroy(&sna->kgem, bo); + bo = NULL; + } + DBG(("%s: new vbo: %d\n", __FUNCTION__, + sna->render.vertex_used)); + free_bo = bo; + } + } + + assert(sna->render.nvertex_reloc); + for (i = 0; i < sna->render.nvertex_reloc; i++) { + DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, + i, sna->render.vertex_reloc[i])); + + sna->kgem.batch[sna->render.vertex_reloc[i]] = + kgem_add_reloc(&sna->kgem, + sna->render.vertex_reloc[i], bo, + I915_GEM_DOMAIN_VERTEX << 16, + delta); + } + sna->render.nvertex_reloc = 0; + sna->render.vb_id = 0; + + if (sna->render.vbo == NULL) { + assert(!sna->render.active); + sna->render.vertex_used = 0; + sna->render.vertex_index = 0; + assert(sna->render.vertices == sna->render.vertex_data); + assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data)); + } + + if (free_bo) + kgem_bo_destroy(&sna->kgem, free_bo); +} + diff --git a/drivers/video/Intel-2D/i915_drm.h b/drivers/video/Intel-2D/i915_drm.h index 701f1f22d0..a41bbeb441 100644 --- a/drivers/video/Intel-2D/i915_drm.h +++ b/drivers/video/Intel-2D/i915_drm.h @@ -218,13 +218,13 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_HWS_ADDR #define DRM_IOCTL_I915_GEM_INIT #define DRM_IOCTL_I915_GEM_EXECBUFFER -#define DRM_IOCTL_I915_GEM_EXECBUFFER2 +#define DRM_IOCTL_I915_GEM_EXECBUFFER2 SRV_I915_GEM_EXECBUFFER2 #define DRM_IOCTL_I915_GEM_PIN SRV_I915_GEM_PIN #define DRM_IOCTL_I915_GEM_UNPIN #define DRM_IOCTL_I915_GEM_BUSY SRV_I915_GEM_BUSY #define DRM_IOCTL_I915_GEM_SET_CACHEING SRV_I915_GEM_SET_CACHEING #define DRM_IOCTL_I915_GEM_GET_CACHEING -#define DRM_IOCTL_I915_GEM_THROTTLE +#define DRM_IOCTL_I915_GEM_THROTTLE SRV_I915_GEM_THROTTLE #define DRM_IOCTL_I915_GEM_ENTERVT #define DRM_IOCTL_I915_GEM_LEAVEVT #define DRM_IOCTL_I915_GEM_CREATE SRV_I915_GEM_CREATE diff --git a/drivers/video/Intel-2D/kgem-sna.c b/drivers/video/Intel-2D/kgem-sna.c index 444e12ddaa..9fd97563c4 100644 --- a/drivers/video/Intel-2D/kgem-sna.c +++ b/drivers/video/Intel-2D/kgem-sna.c @@ -108,6 +108,15 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); #define LOCAL_I915_EXEC_IS_PINNED (1<<10) #define LOCAL_I915_EXEC_NO_RELOC (1<<11) #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12) +struct local_i915_gem_userptr { + uint64_t user_ptr; + uint32_t user_size; + uint32_t flags; +#define I915_USERPTR_READ_ONLY (1<<0) +#define I915_USERPTR_UNSYNCHRONIZED (1<<31) + uint32_t handle; +}; + #define UNCACHED 0 #define SNOOPED 1 @@ -118,6 +127,13 @@ struct local_i915_gem_cacheing { #define LOCAL_IOCTL_I915_GEM_SET_CACHEING SRV_I915_GEM_SET_CACHEING +struct local_fbinfo { + int width; + int height; + int pitch; + int tiling; +}; + struct kgem_buffer { struct kgem_bo base; void *mem; @@ -189,7 +205,7 @@ static bool gem_set_tiling(int fd, uint32_t handle, int tiling, int stride) ret = ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); */ - return ret == 0; + return false;//ret == 0; } static bool gem_set_cacheing(int fd, uint32_t handle, int cacheing) @@ -260,19 +276,19 @@ retry_gtt: retry_mmap: // ptr = mmap(0, bytes(bo), PROT_READ | PROT_WRITE, MAP_SHARED, // kgem->fd, mmap_arg.offset); - if (ptr == 0) { +// if (ptr == 0) { printf("%s: failed to mmap %d, %d bytes, into GTT domain: %d\n", __FUNCTION__, bo->handle, bytes(bo), 0); - if (__kgem_throttle_retire(kgem, 0)) - goto retry_mmap; +// if (__kgem_throttle_retire(kgem, 0)) +// goto retry_mmap; - if (kgem->need_expire) { - kgem_cleanup_cache(kgem); - goto retry_mmap; - } +// if (kgem->need_expire) { +// kgem_cleanup_cache(kgem); +// goto retry_mmap; +// } ptr = NULL; - } +// } return ptr; } @@ -639,10 +655,10 @@ static bool test_has_semaphores_enabled(struct kgem *kgem) static bool __kgem_throttle(struct kgem *kgem) { -// if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_THROTTLE, NULL) == 0) + if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_THROTTLE, NULL) == 0) return false; -// return errno == EIO; + return errno == EIO; } static bool is_hw_supported(struct kgem *kgem, @@ -1073,7 +1089,138 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) kgem->batch_flags_base |= LOCAL_I915_EXEC_HANDLE_LUT; if (kgem->has_pinned_batches) kgem->batch_flags_base |= LOCAL_I915_EXEC_IS_PINNED; +} +/* XXX hopefully a good approximation */ +static uint32_t kgem_get_unique_id(struct kgem *kgem) +{ + uint32_t id; + id = ++kgem->unique_id; + if (id == 0) + id = ++kgem->unique_id; + return id; +} + +inline static uint32_t kgem_pitch_alignment(struct kgem *kgem, unsigned flags) +{ + if (flags & CREATE_PRIME) + return 256; + if (flags & CREATE_SCANOUT) + return 64; + return kgem->min_alignment; +} + +static uint32_t kgem_untiled_pitch(struct kgem *kgem, + uint32_t width, uint32_t bpp, + unsigned flags) +{ + width = ALIGN(width, 2) * bpp >> 3; + return ALIGN(width, kgem_pitch_alignment(kgem, flags)); +} +static uint32_t kgem_surface_size(struct kgem *kgem, + bool relaxed_fencing, + unsigned flags, + uint32_t width, + uint32_t height, + uint32_t bpp, + uint32_t tiling, + uint32_t *pitch) +{ + uint32_t tile_width, tile_height; + uint32_t size; + + assert(width <= MAXSHORT); + assert(height <= MAXSHORT); + + if (kgem->gen <= 030) { + if (tiling) { + if (kgem->gen < 030) { + tile_width = 128; + tile_height = 32; + } else { + tile_width = 512; + tile_height = 16; + } + } else { + tile_width = 2 * bpp >> 3; + tile_width = ALIGN(tile_width, + kgem_pitch_alignment(kgem, flags)); + tile_height = 2; + } + } else switch (tiling) { + default: + case I915_TILING_NONE: + tile_width = 2 * bpp >> 3; + tile_width = ALIGN(tile_width, + kgem_pitch_alignment(kgem, flags)); + tile_height = 2; + break; + + /* XXX align to an even tile row */ + case I915_TILING_X: + tile_width = 512; + tile_height = 16; + break; + case I915_TILING_Y: + tile_width = 128; + tile_height = 64; + break; + } + + *pitch = ALIGN(width * bpp / 8, tile_width); + height = ALIGN(height, tile_height); + if (kgem->gen >= 040) + return PAGE_ALIGN(*pitch * height); + + /* If it is too wide for the blitter, don't even bother. */ + if (tiling != I915_TILING_NONE) { + if (*pitch > 8192) + return 0; + + for (size = tile_width; size < *pitch; size <<= 1) + ; + *pitch = size; + } else { + if (*pitch >= 32768) + return 0; + } + + size = *pitch * height; + if (relaxed_fencing || tiling == I915_TILING_NONE) + return PAGE_ALIGN(size); + + /* We need to allocate a pot fence region for a tiled buffer. */ + if (kgem->gen < 030) + tile_width = 512 * 1024; + else + tile_width = 1024 * 1024; + while (tile_width < size) + tile_width *= 2; + return tile_width; +} + +static uint32_t kgem_aligned_height(struct kgem *kgem, + uint32_t height, uint32_t tiling) +{ + uint32_t tile_height; + + if (kgem->gen <= 030) { + tile_height = tiling ? kgem->gen < 030 ? 32 : 16 : 1; + } else switch (tiling) { + /* XXX align to an even tile row */ + default: + case I915_TILING_NONE: + tile_height = 1; + break; + case I915_TILING_X: + tile_height = 16; + break; + case I915_TILING_Y: + tile_height = 64; + break; + } + + return ALIGN(height, tile_height); } static struct drm_i915_gem_exec_object2 * @@ -1763,11 +1910,27 @@ bool kgem_retire(struct kgem *kgem) return retired; } +bool __kgem_ring_is_idle(struct kgem *kgem, int ring) +{ + struct kgem_request *rq; + assert(!list_is_empty(&kgem->requests[ring])); + rq = list_last_entry(&kgem->requests[ring], + struct kgem_request, list); + if (__kgem_busy(kgem, rq->bo->handle)) { + DBG(("%s: last requests handle=%d still busy\n", + __FUNCTION__, rq->bo->handle)); + return false; + } + DBG(("%s: ring=%d idle (handle=%d)\n", + __FUNCTION__, ring, rq->bo->handle)); - + kgem_retire__requests_ring(kgem, ring); + assert(list_is_empty(&kgem->requests[ring])); + return true; +} static void kgem_commit(struct kgem *kgem) { @@ -2328,15 +2491,15 @@ void _kgem_submit(struct kgem *kgem) -// ret = drmIoctl(kgem->fd, -// DRM_IOCTL_I915_GEM_EXECBUFFER2, -// &execbuf); -// while (ret == -1 && errno == EBUSY && retry--) { -// __kgem_throttle(kgem); -// ret = drmIoctl(kgem->fd, -// DRM_IOCTL_I915_GEM_EXECBUFFER2, -// &execbuf); -// } + ret = drmIoctl(kgem->fd, + DRM_IOCTL_I915_GEM_EXECBUFFER2, + &execbuf); + while (ret == -1 && errno == EBUSY && retry--) { + __kgem_throttle(kgem); + ret = drmIoctl(kgem->fd, + DRM_IOCTL_I915_GEM_EXECBUFFER2, + &execbuf); + } if (DEBUG_SYNC && ret == 0) { struct drm_i915_gem_set_domain set_domain; @@ -2898,8 +3061,6 @@ inline int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo) return size; } -#if 0 - struct kgem_bo *kgem_create_2d(struct kgem *kgem, int width, int height, @@ -3379,6 +3540,7 @@ create: return bo; } +#if 0 struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem, int width, int height, @@ -3497,11 +3659,80 @@ void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) __kgem_bo_destroy(kgem, bo); } +void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo) +{ + assert(bo->rq); + assert(bo->exec == NULL); + assert(bo->needs_flush); + /* The kernel will emit a flush *and* update its own flushing lists. */ + if (!__kgem_busy(kgem, bo->handle)) + __kgem_bo_clear_busy(bo); + DBG(("%s: handle=%d, busy?=%d\n", + __FUNCTION__, bo->handle, bo->rq != NULL)); +} +inline static bool needs_semaphore(struct kgem *kgem, struct kgem_bo *bo) +{ + return kgem->nreloc && bo->rq && RQ_RING(bo->rq) != kgem->ring; +} +bool kgem_check_bo(struct kgem *kgem, ...) +{ + va_list ap; + struct kgem_bo *bo; + int num_exec = 0; + int num_pages = 0; + bool flush = false; + va_start(ap, kgem); + while ((bo = va_arg(ap, struct kgem_bo *))) { + while (bo->proxy) + bo = bo->proxy; + if (bo->exec) + continue; + + if (needs_semaphore(kgem, bo)) + return false; + + num_pages += num_pages(bo); + num_exec++; + + flush |= bo->flush; + } + va_end(ap); + + DBG(("%s: num_pages=+%d, num_exec=+%d\n", + __FUNCTION__, num_pages, num_exec)); + + if (!num_pages) + return true; + + if (kgem_flush(kgem, flush)) + return false; + + if (kgem->aperture > kgem->aperture_low && + kgem_ring_is_idle(kgem, kgem->ring)) { + DBG(("%s: current aperture usage (%d) is greater than low water mark (%d)\n", + __FUNCTION__, kgem->aperture, kgem->aperture_low)); + return false; + } + + if (num_pages + kgem->aperture > kgem->aperture_high) { + DBG(("%s: final aperture usage (%d) is greater than high water mark (%d)\n", + __FUNCTION__, num_pages + kgem->aperture, kgem->aperture_high)); + return false; + } + + if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem)) { + DBG(("%s: out of exec slots (%d + %d / %d)\n", __FUNCTION__, + kgem->nexec, num_exec, KGEM_EXEC_SIZE(kgem))); + return false; + } + + return true; +} @@ -3545,6 +3776,9 @@ uint32_t kgem_add_reloc(struct kgem *kgem, assert((read_write_domain & 0x7fff) == 0 || bo != NULL); +// if( bo != NULL && bo->handle == -1) +// return 0; + index = kgem->nreloc++; assert(index < ARRAY_SIZE(kgem->reloc)); kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]); @@ -3851,6 +4085,42 @@ void kgem_clear_dirty(struct kgem *kgem) } } +struct kgem_bo *kgem_create_proxy(struct kgem *kgem, + struct kgem_bo *target, + int offset, int length) +{ + struct kgem_bo *bo; + + DBG(("%s: target handle=%d [proxy? %d], offset=%d, length=%d, io=%d\n", + __FUNCTION__, target->handle, target->proxy ? target->proxy->delta : -1, + offset, length, target->io)); + + bo = __kgem_bo_alloc(target->handle, length); + if (bo == NULL) + return NULL; + + bo->unique_id = kgem_get_unique_id(kgem); + bo->reusable = false; + bo->size.bytes = length; + + bo->io = target->io && target->proxy == NULL; + bo->dirty = target->dirty; + bo->tiling = target->tiling; + bo->pitch = target->pitch; + + assert(!bo->scanout); + bo->proxy = kgem_bo_reference(target); + bo->delta = offset; + + if (target->exec) { + list_move_tail(&bo->request, &kgem->next_request->buffers); + bo->exec = &_kgem_dummy_exec; + } + bo->rq = target->rq; + + return bo; +} + uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format) { struct kgem_bo_binding *b; @@ -3889,5 +4159,37 @@ void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset) } +int kgem_init_fb(struct kgem *kgem, struct sna_fb *fb) +{ + struct kgem_bo *bo; + size_t size; + int ret; + + ret = drmIoctl(kgem->fd, SRV_FBINFO, fb); + if( ret != 0 ) + return 0; + + size = fb->pitch * fb->height / PAGE_SIZE; + + bo = __kgem_bo_alloc(-2, size); + if (!bo) { + return 0; + } + + bo->domain = DOMAIN_GTT; + bo->unique_id = kgem_get_unique_id(kgem); + bo->pitch = fb->pitch; + bo->tiling = I915_TILING_NONE; + bo->scanout = 1; + fb->fb_bo = bo; + + printf("fb width %d height %d pitch %d bo %p\n", + fb->width, fb->height, fb->pitch, fb->fb_bo); + + return 1; +}; + + + diff --git a/drivers/video/Intel-2D/kgem.h b/drivers/video/Intel-2D/kgem.h index f35e7343bc..bab3b8db4b 100644 --- a/drivers/video/Intel-2D/kgem.h +++ b/drivers/video/Intel-2D/kgem.h @@ -28,7 +28,7 @@ #ifndef KGEM_H #define KGEM_H -#define HAS_DEBUG_FULL 1 +#define HAS_DEBUG_FULL 0 #include #include diff --git a/drivers/video/Intel-2D/sna.c b/drivers/video/Intel-2D/sna.c index 892933f95e..88b6030f9c 100644 --- a/drivers/video/Intel-2D/sna.c +++ b/drivers/video/Intel-2D/sna.c @@ -5,6 +5,10 @@ #include "sna.h" +#include + +static struct sna_fb sna_fb; + typedef struct __attribute__((packed)) { unsigned handle; @@ -64,7 +68,7 @@ void no_render_init(struct sna *sna) // render->clear = no_render_clear; render->reset = no_render_reset; - render->flush = no_render_flush; +// render->flush = no_render_flush; // render->fini = no_render_fini; // sna->kgem.context_switch = no_render_context_switch; @@ -129,24 +133,9 @@ bool sna_accel_init(struct sna *sna) // return false; sna_device = sna; -#if 0 - { - struct kgem_bo *screen_bo; - bitmap_t screen; - screen.pitch = 1024*4; - screen.gaddr = 0; - screen.width = 1024; - screen.height = 768; - screen.obj = (void*)-1; - screen_bo = create_bo(&screen); - - sna->render.clear(sna, &screen, screen_bo); - } -#endif - - return true; + return kgem_init_fb(&sna->kgem, &sna_fb); } int sna_init(uint32_t service) @@ -339,37 +328,69 @@ done: #endif -int sna_blit_copy(uint32_t dst_bitmap, int dst_x, int dst_y, - int w, int h, uint32_t src_bitmap, int src_x, int src_y) +int sna_blit_copy(bitmap_t *src_bitmap, int dst_x, int dst_y, + int w, int h, int src_x, int src_y) { struct sna_copy_op copy; - struct kgem_bo src_bo, dst_bo; + struct _Pixmap src, dst; + struct kgem_bo *src_bo; - memset(&src_bo, 0, sizeof(src_bo)); - memset(&dst_bo, 0, sizeof(dst_bo)); + memset(&src, 0, sizeof(src)); + memset(&dst, 0, sizeof(dst)); -// src_bo.gaddr = src_bitmap->gaddr; -// src_bo.pitch = src_bitmap->pitch; -// src_bo.tiling = 0; + src.drawable.bitsPerPixel = 32; + src.drawable.width = src_bitmap->width; + src.drawable.height = src_bitmap->height; -// dst_bo.gaddr = dst_bitmap->gaddr; -// dst_bo.pitch = dst_bitmap->pitch; -// dst_bo.tiling = 0; + dst.drawable.bitsPerPixel = 32; + dst.drawable.width = sna_fb.width; + dst.drawable.height = sna_fb.height; memset(©, 0, sizeof(copy)); - sna_device->render.copy(sna_device, GXcopy, NULL, &src_bo, NULL, &dst_bo, ©); + src_bo = (struct kgem_bo*)src_bitmap->handle; + + if( sna_device->render.copy(sna_device, GXcopy, + &src, src_bo, + &dst, sna_fb.fb_bo, ©) ) + { copy.blt(sna_device, ©, src_x, src_y, w, h, dst_x, dst_y); copy.done(sna_device, ©); + } + kgem_submit(&sna_device->kgem); - -// _kgem_submit(&sna_device->kgem, &execbuffer); +// __asm__ __volatile__("int3"); }; +int sna_create_bitmap(bitmap_t *bitmap) +{ + struct kgem_bo *bo; + bo = kgem_create_2d(&sna_device->kgem, bitmap->width, bitmap->height, + 32,I915_TILING_NONE, CREATE_CPU_MAP); + + if(bo == NULL) + goto err_1; + + void *map = kgem_bo_map(&sna_device->kgem, bo); + if(map == NULL) + goto err_2; + + bitmap->handle = (uint32_t)bo; + bitmap->pitch = bo->pitch; + bitmap->data = map; + + return 0; + +err_2: + kgem_bo_destroy(&sna_device->kgem, bo); + +err_1: + return -1; +}; /* int sna_blit_tex(bitmap_t *dst_bitmap, int dst_x, int dst_y, diff --git a/drivers/video/Intel-2D/sna.h b/drivers/video/Intel-2D/sna.h index 99715c0d74..2c54d2c4b2 100644 --- a/drivers/video/Intel-2D/sna.h +++ b/drivers/video/Intel-2D/sna.h @@ -44,7 +44,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include #include #include - +#include #include "intel_driver.h" #include "pciaccess.h" @@ -69,11 +69,13 @@ int drmIoctl(int fd, unsigned long request, void *arg); #define SRV_I915_GEM_GET_APERTURE 26 #define SRV_I915_GEM_PWRITE 27 #define SRV_I915_GEM_BUSY 28 - #define SRV_I915_GEM_SET_DOMAIN 29 #define SRV_I915_GEM_MMAP 30 -#define SRV_I915_GEM_MMAP_GTT 31 +#define SRV_I915_GEM_THROTTLE 32 +#define SRV_FBINFO 33 +#define SRV_I915_GEM_EXECBUFFER2 34 +#define SRV_I915_GEM_MMAP_GTT 31 #define DRM_IOCTL_GEM_CLOSE SRV_DRM_GEM_CLOSE @@ -153,6 +155,15 @@ typedef struct _Pixmap { } PixmapRec; +struct sna_fb +{ + uint32_t width; + uint32_t height; + uint32_t pitch; + uint32_t tiling; + + struct kgem_bo *fb_bo; +}; struct pixman_box16 { diff --git a/drivers/video/Intel-2D/sna_render.h b/drivers/video/Intel-2D/sna_render.h index 8c56594b8c..a8c967d0a1 100644 --- a/drivers/video/Intel-2D/sna_render.h +++ b/drivers/video/Intel-2D/sna_render.h @@ -471,7 +471,6 @@ unsigned sna_static_stream_compile_wm(struct sna *sna, struct kgem_bo *sna_static_stream_fini(struct sna *sna, struct sna_static_stream *stream); -/* struct kgem_bo * sna_render_get_solid(struct sna *sna, uint32_t color); @@ -479,9 +478,6 @@ sna_render_get_solid(struct sna *sna, void sna_render_flush_solid(struct sna *sna); -struct kgem_bo * -sna_render_get_gradient(struct sna *sna, - PictGradient *pattern); uint32_t sna_rgba_for_color(uint32_t color, int depth); uint32_t sna_rgba_to_color(uint32_t rgba, uint32_t format); @@ -493,8 +489,6 @@ bool sna_get_rgba_from_pixel(uint32_t pixel, uint32_t format); bool sna_picture_is_solid(PicturePtr picture, uint32_t *color); -*/ - void no_render_init(struct sna *sna); bool gen2_render_init(struct sna *sna); @@ -683,8 +677,37 @@ bool sna_composite_mask_is_opaque(PicturePtr mask); #endif - void sna_vertex_init(struct sna *sna); +static inline void sna_vertex_lock(struct sna_render *r) +{ +// pthread_mutex_lock(&r->lock); +} + +static inline void sna_vertex_acquire__locked(struct sna_render *r) +{ + r->active++; +} + +static inline void sna_vertex_unlock(struct sna_render *r) +{ +// pthread_mutex_unlock(&r->lock); +} + +static inline void sna_vertex_release__locked(struct sna_render *r) +{ + assert(r->active > 0); + --r->active; +// if (--r->active == 0) +// pthread_cond_signal(&r->wait); +} + +static inline bool sna_vertex_wait__locked(struct sna_render *r) +{ + bool was_active = r->active; +// while (r->active) +// pthread_cond_wait(&r->wait, &r->lock); + return was_active; +} #endif /* SNA_RENDER_H */ diff --git a/drivers/video/Intel-2D/utils.c b/drivers/video/Intel-2D/utils.c new file mode 100644 index 0000000000..935844e3ee --- /dev/null +++ b/drivers/video/Intel-2D/utils.c @@ -0,0 +1,150 @@ + +#include +#include + + +static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + /* ecx is often an input as well as an output. */ + asm volatile("cpuid" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx) + : "memory"); +} + +/* Some CPUID calls want 'count' to be placed in ecx */ +static inline void cpuid_count(unsigned int op, int count, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + *eax = op; + *ecx = count; + native_cpuid(eax, ebx, ecx, edx); +} + + +enum _cache_type { + CACHE_TYPE_NULL = 0, + CACHE_TYPE_DATA = 1, + CACHE_TYPE_INST = 2, + CACHE_TYPE_UNIFIED = 3 +}; + + +union _cpuid4_leaf_eax { + struct { + enum _cache_type type:5; + unsigned int level:3; + unsigned int is_self_initializing:1; + unsigned int is_fully_associative:1; + unsigned int reserved:4; + unsigned int num_threads_sharing:12; + unsigned int num_cores_on_die:6; + } split; + uint32_t full; +}; + +union _cpuid4_leaf_ebx { + struct { + unsigned int coherency_line_size:12; + unsigned int physical_line_partition:10; + unsigned int ways_of_associativity:10; + } split; + uint32_t full; +}; + +union _cpuid4_leaf_ecx { + struct { + unsigned int number_of_sets:32; + } split; + uint32_t full; +}; + +struct _cpuid4_info_regs { + union _cpuid4_leaf_eax eax; + union _cpuid4_leaf_ebx ebx; + union _cpuid4_leaf_ecx ecx; + unsigned long size; +}; + +static int +cpuid4_cache_lookup_regs(int index, + struct _cpuid4_info_regs *this_leaf) +{ + union _cpuid4_leaf_eax eax; + union _cpuid4_leaf_ebx ebx; + union _cpuid4_leaf_ecx ecx; + unsigned edx; + + cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); + + if (eax.split.type == CACHE_TYPE_NULL) + return -1; /* better error ? */ + + this_leaf->eax = eax; + this_leaf->ebx = ebx; + this_leaf->ecx = ecx; + this_leaf->size = (ecx.split.number_of_sets + 1) * + (ebx.split.coherency_line_size + 1) * + (ebx.split.physical_line_partition + 1) * + (ebx.split.ways_of_associativity + 1); + return 0; +} + +static int find_num_cache_leaves() +{ + unsigned int eax, ebx, ecx, edx, op; + union _cpuid4_leaf_eax cache_eax; + int i = -1; + + do { + ++i; + /* Do cpuid(op) loop to find out num_cache_leaves */ + cpuid_count(4, i, &eax, &ebx, &ecx, &edx); + cache_eax.full = eax; + } while (cache_eax.split.type != CACHE_TYPE_NULL); + return i; +}; + +unsigned int cpu_cache_size() +{ + unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ + unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ + unsigned int num_cache_leaves; + + num_cache_leaves = find_num_cache_leaves(); + + for (i = 0; i < num_cache_leaves; i++) + { + struct _cpuid4_info_regs this_leaf; + int retval; + + retval = cpuid4_cache_lookup_regs(i, &this_leaf); + if (retval >= 0) { + switch (this_leaf.eax.split.level) + { + case 1: + if (this_leaf.eax.split.type == CACHE_TYPE_DATA) + new_l1d = this_leaf.size; + else if (this_leaf.eax.split.type == CACHE_TYPE_INST) + new_l1i = this_leaf.size; + break; + case 2: + new_l2 = this_leaf.size; + break; + case 3: + new_l3 = this_leaf.size; + break; + default: + break; + } + } + } + printf("l2 cache %d l3 cache %d\n", new_l2, new_l3); + + return new_l3 != 0 ? new_l3 : new_l2; +}; diff --git a/drivers/video/drm/i915/i915_dma.c b/drivers/video/drm/i915/i915_dma.c index cd75948259..843c103114 100644 --- a/drivers/video/drm/i915/i915_dma.c +++ b/drivers/video/drm/i915/i915_dma.c @@ -1547,12 +1547,15 @@ int i915_driver_unload(struct drm_device *dev) return 0; } +#endif int i915_driver_open(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv; DRM_DEBUG_DRIVER("\n"); + ENTER(); + file_priv = kmalloc(sizeof(*file_priv), GFP_KERNEL); if (!file_priv) return -ENOMEM; @@ -1564,9 +1567,11 @@ int i915_driver_open(struct drm_device *dev, struct drm_file *file) idr_init(&file_priv->context_idr); + LEAVE(); return 0; } +#if 0 /** * i915_driver_lastclose - clean up after all DRM clients have exited * @dev: DRM device diff --git a/drivers/video/drm/i915/i915_drv.c b/drivers/video/drm/i915/i915_drv.c index 633851bc91..969172a7ae 100644 --- a/drivers/video/drm/i915/i915_drv.c +++ b/drivers/video/drm/i915/i915_drv.c @@ -492,7 +492,7 @@ static struct drm_driver driver = { // DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | DRIVER_PRIME, // .load = i915_driver_load, // .unload = i915_driver_unload, -// .open = i915_driver_open, + .open = i915_driver_open, // .lastclose = i915_driver_lastclose, // .preclose = i915_driver_preclose, // .postclose = i915_driver_postclose, @@ -577,6 +577,12 @@ int drm_get_dev(struct pci_dev *pdev, const struct pci_device_id *ent) dev->driver = &driver; + if (dev->driver->open) { + ret = dev->driver->open(dev, priv); + if (ret < 0) + goto err_g4; + } + ret = i915_driver_load(dev, ent->driver_data ); if (ret) diff --git a/drivers/video/drm/i915/i915_gem.c b/drivers/video/drm/i915/i915_gem.c index b6cba54468..9eef51aad2 100644 --- a/drivers/video/drm/i915/i915_gem.c +++ b/drivers/video/drm/i915/i915_gem.c @@ -37,6 +37,10 @@ extern int x86_clflush_size; +#define PROT_READ 0x1 /* page can be read */ +#define PROT_WRITE 0x2 /* page can be written */ +#define MAP_SHARED 0x01 /* Share changes */ + #undef mb #undef rmb #undef wmb @@ -44,6 +48,10 @@ extern int x86_clflush_size; #define rmb() asm volatile ("lfence") #define wmb() asm volatile ("sfence") +unsigned long vm_mmap(struct file *file, unsigned long addr, + unsigned long len, unsigned long prot, + unsigned long flag, unsigned long offset); + static inline void clflush(volatile void *__p) { asm volatile("clflush %0" : "+m" (*(volatile char*)__p)); @@ -1296,8 +1304,8 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, if (obj == NULL) return -ENOENT; - dbgprintf("%s offset %lld size %lld not supported\n", - args->offset, args->size); + dbgprintf("%s offset %lld size %lld\n", + __FUNCTION__, args->offset, args->size); /* prime objects have no backing filp to GEM mmap * pages from. */ @@ -1306,17 +1314,16 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, return -EINVAL; } -// addr = vm_mmap(obj->filp, 0, args->size, -// PROT_READ | PROT_WRITE, MAP_SHARED, -// args->offset); + addr = vm_mmap(obj->filp, 0, args->size, + PROT_READ | PROT_WRITE, MAP_SHARED, + args->offset); drm_gem_object_unreference_unlocked(obj); -// if (IS_ERR((void *)addr)) -// return addr; + if (IS_ERR((void *)addr)) + return addr; args->addr_ptr = (uint64_t) addr; - return -EINVAL; -// return 0; + return 0; } @@ -1444,8 +1451,8 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj) // i915_gem_object_free_mmap_offset(obj); -// if (obj->base.filp == NULL) -// return; + if (obj->base.filp == NULL) + return; /* Our goal here is to return as much of the memory as * is possible back to the system as we are called from OOM. @@ -1491,7 +1498,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) - page_cache_release(page); +// page_cache_release(page); } //DRM_DEBUG_KMS("%s release %d pages\n", __FUNCTION__, page_count); obj->dirty = 0; @@ -1784,7 +1791,17 @@ i915_add_request(struct intel_ring_buffer *ring, list_add_tail(&request->list, &ring->request_list); request->file_priv = NULL; + if (file) { + struct drm_i915_file_private *file_priv = file->driver_priv; + spin_lock(&file_priv->mm.lock); + request->file_priv = file_priv; + list_add_tail(&request->client_list, + &file_priv->mm.request_list); + spin_unlock(&file_priv->mm.lock); + } + + trace_i915_gem_request_add(ring, request->seqno); ring->outstanding_lazy_request = 0; if (!dev_priv->mm.suspended) { @@ -1805,8 +1822,21 @@ i915_add_request(struct intel_ring_buffer *ring, return 0; } +static inline void +i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) +{ + struct drm_i915_file_private *file_priv = request->file_priv; + if (!file_priv) + return; + spin_lock(&file_priv->mm.lock); + if (request->file_priv) { + list_del(&request->client_list); + request->file_priv = NULL; + } + spin_unlock(&file_priv->mm.lock); +} static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, struct intel_ring_buffer *ring) @@ -1819,7 +1849,7 @@ static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, list); list_del(&request->list); -// i915_gem_request_remove_from_client(request); + i915_gem_request_remove_from_client(request); kfree(request); } @@ -1887,6 +1917,8 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) { uint32_t seqno; + ENTER(); + if (list_empty(&ring->request_list)) return; @@ -1913,6 +1945,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) ring->last_retired_head = request->tail; list_del(&request->list); + i915_gem_request_remove_from_client(request); kfree(request); } @@ -1939,6 +1972,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) } WARN_ON(i915_verify_lists(ring->dev)); + LEAVE(); } void @@ -1961,6 +1995,8 @@ i915_gem_retire_work_handler(struct work_struct *work) bool idle; int i; + ENTER(); + dev_priv = container_of(work, drm_i915_private_t, mm.retire_work.work); dev = dev_priv->dev; @@ -1990,6 +2026,8 @@ i915_gem_retire_work_handler(struct work_struct *work) intel_mark_idle(dev); mutex_unlock(&dev->struct_mutex); + + LEAVE(); } /** @@ -2127,6 +2165,9 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) drm_i915_private_t *dev_priv = obj->base.dev->dev_private; int ret = 0; + if(obj == get_fb_obj()) + return 0; + if (obj->gtt_space == NULL) return 0; @@ -3105,7 +3146,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) return 0; } -#if 0 /* Throttle our rendering by waiting until the ring has completed our requests * emitted over 20 msec ago. * @@ -3121,7 +3161,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) { struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_file_private *file_priv = file->driver_priv; - unsigned long recent_enough = GetTimerTics() - msecs_to_jiffies(20); + unsigned long recent_enough = GetTimerTicks() - msecs_to_jiffies(20); struct drm_i915_gem_request *request; struct intel_ring_buffer *ring = NULL; u32 seqno = 0; @@ -3149,7 +3189,6 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) return ret; } -#endif int i915_gem_object_pin(struct drm_i915_gem_object *obj, @@ -3162,7 +3201,6 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj, if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) return -EBUSY; -#if 0 if (obj->gtt_space != NULL) { if ((alignment && obj->gtt_offset & (alignment - 1)) || (map_and_fenceable && !obj->map_and_fenceable)) { @@ -3178,7 +3216,6 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj, return ret; } } -#endif if (obj->gtt_space == NULL) { struct drm_i915_private *dev_priv = obj->base.dev->dev_private; @@ -3342,7 +3379,6 @@ unlock: return ret; } -#if 0 int i915_gem_throttle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -3350,6 +3386,8 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data, return i915_gem_ring_throttle(dev, file_priv); } +#if 0 + int i915_gem_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -3545,7 +3583,7 @@ i915_gem_idle(struct drm_device *dev) mutex_unlock(&dev->struct_mutex); /* Cancel the retire work handler, which should be idle now. */ -// cancel_delayed_work_sync(&dev_priv->mm.retire_work); + cancel_delayed_work_sync(&dev_priv->mm.retire_work); return 0; } diff --git a/drivers/video/drm/i915/i915_gem_execbuffer.c b/drivers/video/drm/i915/i915_gem_execbuffer.c new file mode 100644 index 0000000000..5e3ab7bd9a --- /dev/null +++ b/drivers/video/drm/i915/i915_gem_execbuffer.c @@ -0,0 +1,1171 @@ +/* + * Copyright © 2008,2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * Chris Wilson + * + */ + +#define iowrite32(v, addr) writel((v), (addr)) + +#include +#include +#include "i915_drv.h" +#include "i915_trace.h" +#include "intel_drv.h" +//#include + +#define I915_EXEC_SECURE (1<<9) +#define I915_EXEC_IS_PINNED (1<<10) + +#define wmb() asm volatile ("sfence") + +struct drm_i915_gem_object *get_fb_obj(); + +static inline __attribute__((const)) +bool is_power_of_2(unsigned long n) +{ + return (n != 0 && ((n & (n - 1)) == 0)); +} + +static unsigned long +copy_to_user(void __user *to, const void *from, unsigned long n) +{ + memcpy(to, from, n); + return 0; +} + +static unsigned long +copy_from_user(void *to, const void __user *from, unsigned long n) +{ + memcpy(to, from, n); + return 0; +} + +struct eb_objects { + int and; + struct hlist_head buckets[0]; +}; + +static struct eb_objects * +eb_create(int size) +{ + struct eb_objects *eb; + int count = PAGE_SIZE / sizeof(struct hlist_head) / 2; + BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head))); + while (count > size) + count >>= 1; + eb = kzalloc(count*sizeof(struct hlist_head) + + sizeof(struct eb_objects), + GFP_KERNEL); + if (eb == NULL) + return eb; + + eb->and = count - 1; + return eb; +} + +static void +eb_reset(struct eb_objects *eb) +{ + memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); +} + +static void +eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj) +{ + hlist_add_head(&obj->exec_node, + &eb->buckets[obj->exec_handle & eb->and]); +} + +static struct drm_i915_gem_object * +eb_get_object(struct eb_objects *eb, unsigned long handle) +{ + struct hlist_head *head; + struct hlist_node *node; + struct drm_i915_gem_object *obj; + + head = &eb->buckets[handle & eb->and]; + hlist_for_each(node, head) { + obj = hlist_entry(node, struct drm_i915_gem_object, exec_node); + if (obj->exec_handle == handle) + return obj; + } + + return NULL; +} + +static void +eb_destroy(struct eb_objects *eb) +{ + kfree(eb); +} + +static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) +{ + return (obj->base.write_domain == I915_GEM_DOMAIN_CPU || + !obj->map_and_fenceable || + obj->cache_level != I915_CACHE_NONE); +} + +static int +i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, + struct eb_objects *eb, + struct drm_i915_gem_relocation_entry *reloc) +{ + struct drm_device *dev = obj->base.dev; + struct drm_gem_object *target_obj; + struct drm_i915_gem_object *target_i915_obj; + uint32_t target_offset; + int ret = -EINVAL; + + /* we've already hold a reference to all valid objects */ + target_obj = &eb_get_object(eb, reloc->target_handle)->base; + if (unlikely(target_obj == NULL)) + return -ENOENT; + + target_i915_obj = to_intel_bo(target_obj); + target_offset = target_i915_obj->gtt_offset; + + /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and + * pipe_control writes because the gpu doesn't properly redirect them + * through the ppgtt for non_secure batchbuffers. */ + if (unlikely(IS_GEN6(dev) && + reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && + !target_i915_obj->has_global_gtt_mapping)) { + i915_gem_gtt_bind_object(target_i915_obj, + target_i915_obj->cache_level); + } + + /* Validate that the target is in a valid r/w GPU domain */ + if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { + DRM_DEBUG("reloc with multiple write domains: " + "obj %p target %d offset %d " + "read %08x write %08x", + obj, reloc->target_handle, + (int) reloc->offset, + reloc->read_domains, + reloc->write_domain); + return ret; + } + if (unlikely((reloc->write_domain | reloc->read_domains) + & ~I915_GEM_GPU_DOMAINS)) { + DRM_DEBUG("reloc with read/write non-GPU domains: " + "obj %p target %d offset %d " + "read %08x write %08x", + obj, reloc->target_handle, + (int) reloc->offset, + reloc->read_domains, + reloc->write_domain); + return ret; + } + if (unlikely(reloc->write_domain && target_obj->pending_write_domain && + reloc->write_domain != target_obj->pending_write_domain)) { + DRM_DEBUG("Write domain conflict: " + "obj %p target %d offset %d " + "new %08x old %08x\n", + obj, reloc->target_handle, + (int) reloc->offset, + reloc->write_domain, + target_obj->pending_write_domain); + return ret; + } + + target_obj->pending_read_domains |= reloc->read_domains; + target_obj->pending_write_domain |= reloc->write_domain; + + /* If the relocation already has the right value in it, no + * more work needs to be done. + */ + if (target_offset == reloc->presumed_offset) + return 0; + + /* Check that the relocation address is valid... */ + if (unlikely(reloc->offset > obj->base.size - 4)) { + DRM_DEBUG("Relocation beyond object bounds: " + "obj %p target %d offset %d size %d.\n", + obj, reloc->target_handle, + (int) reloc->offset, + (int) obj->base.size); + return ret; + } + if (unlikely(reloc->offset & 3)) { + DRM_DEBUG("Relocation not 4-byte aligned: " + "obj %p target %d offset %d.\n", + obj, reloc->target_handle, + (int) reloc->offset); + return ret; + } + + /* We can't wait for rendering with pagefaults disabled */ +// if (obj->active && in_atomic()) +// return -EFAULT; + + reloc->delta += target_offset; + if (use_cpu_reloc(obj)) { + uint32_t page_offset = reloc->offset & ~PAGE_MASK; + char *vaddr; + + ret = i915_gem_object_set_to_cpu_domain(obj, 1); + if (ret) + return ret; + + vaddr = (char *)MapIoMem((addr_t)i915_gem_object_get_page(obj, + reloc->offset >> PAGE_SHIFT), 4096, 3); + *(uint32_t *)(vaddr + page_offset) = reloc->delta; + FreeKernelSpace(vaddr); + } else { + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t __iomem *reloc_entry; + void __iomem *reloc_page; + + ret = i915_gem_object_set_to_gtt_domain(obj, true); + if (ret) + return ret; + + ret = i915_gem_object_put_fence(obj); + if (ret) + return ret; + + /* Map the page containing the relocation we're going to perform. */ + reloc->offset += obj->gtt_offset; + reloc_page = (void*)MapIoMem(reloc->offset & PAGE_MASK, 4096, 3); + reloc_entry = (uint32_t __iomem *) + (reloc_page + (reloc->offset & ~PAGE_MASK)); + iowrite32(reloc->delta, reloc_entry); + FreeKernelSpace(reloc_page); + } + + /* and update the user's relocation entry */ + reloc->presumed_offset = target_offset; + + return 0; +} + +static int +i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, + struct eb_objects *eb) +{ +#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) + struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; + struct drm_i915_gem_relocation_entry __user *user_relocs; + struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; + int remain, ret; + + user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr; + + remain = entry->relocation_count; + while (remain) { + struct drm_i915_gem_relocation_entry *r = stack_reloc; + int count = remain; + if (count > ARRAY_SIZE(stack_reloc)) + count = ARRAY_SIZE(stack_reloc); + remain -= count; + + memcpy(r, user_relocs, count*sizeof(r[0])); + + do { + u64 offset = r->presumed_offset; + + ret = i915_gem_execbuffer_relocate_entry(obj, eb, r); + if (ret) + return ret; + + memcpy(&user_relocs->presumed_offset, + &r->presumed_offset, + sizeof(r->presumed_offset)); + + user_relocs++; + r++; + } while (--count); + } + + return 0; +#undef N_RELOC +} + +static int +i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj, + struct eb_objects *eb, + struct drm_i915_gem_relocation_entry *relocs) +{ + const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; + int i, ret; + + for (i = 0; i < entry->relocation_count; i++) { + ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]); + if (ret) + return ret; + } + + return 0; +} + +static int +i915_gem_execbuffer_relocate(struct drm_device *dev, + struct eb_objects *eb, + struct list_head *objects) +{ + struct drm_i915_gem_object *obj; + int ret = 0; + + /* This is the fast path and we cannot handle a pagefault whilst + * holding the struct mutex lest the user pass in the relocations + * contained within a mmaped bo. For in such a case we, the page + * fault handler would call i915_gem_fault() and we would try to + * acquire the struct mutex again. Obviously this is bad and so + * lockdep complains vehemently. + */ +// pagefault_disable(); + list_for_each_entry(obj, objects, exec_list) { + ret = i915_gem_execbuffer_relocate_object(obj, eb); + if (ret) + break; + } +// pagefault_enable(); + + return ret; +} + +#define __EXEC_OBJECT_HAS_PIN (1<<31) +#define __EXEC_OBJECT_HAS_FENCE (1<<30) + +static int +need_reloc_mappable(struct drm_i915_gem_object *obj) +{ + struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; + return entry->relocation_count && !use_cpu_reloc(obj); +} + +static int +i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, + struct intel_ring_buffer *ring) +{ + struct drm_i915_private *dev_priv = obj->base.dev->dev_private; + struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; + bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; + bool need_fence, need_mappable; + int ret; + + need_fence = + has_fenced_gpu_access && + entry->flags & EXEC_OBJECT_NEEDS_FENCE && + obj->tiling_mode != I915_TILING_NONE; + need_mappable = need_fence || need_reloc_mappable(obj); + + ret = i915_gem_object_pin(obj, entry->alignment, need_mappable, false); + if (ret) + return ret; + + entry->flags |= __EXEC_OBJECT_HAS_PIN; + + if (has_fenced_gpu_access) { + if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { + ret = i915_gem_object_get_fence(obj); + if (ret) + return ret; + + if (i915_gem_object_pin_fence(obj)) + entry->flags |= __EXEC_OBJECT_HAS_FENCE; + + obj->pending_fenced_gpu_access = true; + } + } + + /* Ensure ppgtt mapping exists if needed */ + if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { + i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, + obj, obj->cache_level); + + obj->has_aliasing_ppgtt_mapping = 1; + } + + entry->offset = obj->gtt_offset; + return 0; +} + +static void +i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj) +{ + struct drm_i915_gem_exec_object2 *entry; + + if (!obj->gtt_space) + return; + + entry = obj->exec_entry; + + if (entry->flags & __EXEC_OBJECT_HAS_FENCE) + i915_gem_object_unpin_fence(obj); + + if (entry->flags & __EXEC_OBJECT_HAS_PIN) + i915_gem_object_unpin(obj); + + entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); +} + +static int +i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, + struct drm_file *file, + struct list_head *objects) +{ + struct drm_i915_gem_object *obj; + struct list_head ordered_objects; + bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; + int retry; + + INIT_LIST_HEAD(&ordered_objects); + while (!list_empty(objects)) { + struct drm_i915_gem_exec_object2 *entry; + bool need_fence, need_mappable; + + obj = list_first_entry(objects, + struct drm_i915_gem_object, + exec_list); + entry = obj->exec_entry; + + need_fence = + has_fenced_gpu_access && + entry->flags & EXEC_OBJECT_NEEDS_FENCE && + obj->tiling_mode != I915_TILING_NONE; + need_mappable = need_fence || need_reloc_mappable(obj); + + if (need_mappable) + list_move(&obj->exec_list, &ordered_objects); + else + list_move_tail(&obj->exec_list, &ordered_objects); + + obj->base.pending_read_domains = 0; + obj->base.pending_write_domain = 0; + obj->pending_fenced_gpu_access = false; + } + list_splice(&ordered_objects, objects); + + /* Attempt to pin all of the buffers into the GTT. + * This is done in 3 phases: + * + * 1a. Unbind all objects that do not match the GTT constraints for + * the execbuffer (fenceable, mappable, alignment etc). + * 1b. Increment pin count for already bound objects. + * 2. Bind new objects. + * 3. Decrement pin count. + * + * This avoid unnecessary unbinding of later objects in order to make + * room for the earlier objects *unless* we need to defragment. + */ + retry = 0; + do { + int ret = 0; + + /* Unbind any ill-fitting objects or pin. */ + list_for_each_entry(obj, objects, exec_list) { + struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; + bool need_fence, need_mappable; + + if (!obj->gtt_space) + continue; + + need_fence = + has_fenced_gpu_access && + entry->flags & EXEC_OBJECT_NEEDS_FENCE && + obj->tiling_mode != I915_TILING_NONE; + need_mappable = need_fence || need_reloc_mappable(obj); + + if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) || + (need_mappable && !obj->map_and_fenceable)) + ret = i915_gem_object_unbind(obj); + else + ret = i915_gem_execbuffer_reserve_object(obj, ring); + if (ret) + goto err; + } + + /* Bind fresh objects */ + list_for_each_entry(obj, objects, exec_list) { + if (obj->gtt_space) + continue; + + ret = i915_gem_execbuffer_reserve_object(obj, ring); + if (ret) + goto err; + } + +err: /* Decrement pin count for bound objects */ + list_for_each_entry(obj, objects, exec_list) + i915_gem_execbuffer_unreserve_object(obj); + + if (ret != -ENOSPC || retry++) + return ret; + +// ret = i915_gem_evict_everything(ring->dev); + if (ret) + return ret; + } while (1); +} + +static int +i915_gem_execbuffer_relocate_slow(struct drm_device *dev, + struct drm_file *file, + struct intel_ring_buffer *ring, + struct list_head *objects, + struct eb_objects *eb, + struct drm_i915_gem_exec_object2 *exec, + int count) +{ + struct drm_i915_gem_relocation_entry *reloc; + struct drm_i915_gem_object *obj; + int *reloc_offset; + int i, total, ret; + + /* We may process another execbuffer during the unlock... */ + while (!list_empty(objects)) { + obj = list_first_entry(objects, + struct drm_i915_gem_object, + exec_list); + list_del_init(&obj->exec_list); + drm_gem_object_unreference(&obj->base); + } + + mutex_unlock(&dev->struct_mutex); + + total = 0; + for (i = 0; i < count; i++) + total += exec[i].relocation_count; + + reloc_offset = malloc(count * sizeof(*reloc_offset)); + reloc = malloc(total * sizeof(*reloc)); + if (reloc == NULL || reloc_offset == NULL) { + free(reloc); + free(reloc_offset); + mutex_lock(&dev->struct_mutex); + return -ENOMEM; + } + + total = 0; + for (i = 0; i < count; i++) { + struct drm_i915_gem_relocation_entry __user *user_relocs; + u64 invalid_offset = (u64)-1; + int j; + + user_relocs = (void __user *)(uintptr_t)exec[i].relocs_ptr; + + if (copy_from_user(reloc+total, user_relocs, + exec[i].relocation_count * sizeof(*reloc))) { + ret = -EFAULT; + mutex_lock(&dev->struct_mutex); + goto err; + } + + /* As we do not update the known relocation offsets after + * relocating (due to the complexities in lock handling), + * we need to mark them as invalid now so that we force the + * relocation processing next time. Just in case the target + * object is evicted and then rebound into its old + * presumed_offset before the next execbuffer - if that + * happened we would make the mistake of assuming that the + * relocations were valid. + */ + for (j = 0; j < exec[i].relocation_count; j++) { + if (copy_to_user(&user_relocs[j].presumed_offset, + &invalid_offset, + sizeof(invalid_offset))) { + ret = -EFAULT; + mutex_lock(&dev->struct_mutex); + goto err; + } + } + + reloc_offset[i] = total; + total += exec[i].relocation_count; + } + + ret = i915_mutex_lock_interruptible(dev); + if (ret) { + mutex_lock(&dev->struct_mutex); + goto err; + } + + /* reacquire the objects */ + eb_reset(eb); + for (i = 0; i < count; i++) { + + if(exec[i].handle == -2) + obj = get_fb_obj(); + else + obj = to_intel_bo(drm_gem_object_lookup(dev, file, + exec[i].handle)); + if (&obj->base == NULL) { + DRM_DEBUG("Invalid object handle %d at index %d\n", + exec[i].handle, i); + ret = -ENOENT; + goto err; + } + + list_add_tail(&obj->exec_list, objects); + obj->exec_handle = exec[i].handle; + obj->exec_entry = &exec[i]; + eb_add_object(eb, obj); + } + + ret = i915_gem_execbuffer_reserve(ring, file, objects); + if (ret) + goto err; + + list_for_each_entry(obj, objects, exec_list) { + int offset = obj->exec_entry - exec; + ret = i915_gem_execbuffer_relocate_object_slow(obj, eb, + reloc + reloc_offset[offset]); + if (ret) + goto err; + } + + /* Leave the user relocations as are, this is the painfully slow path, + * and we want to avoid the complication of dropping the lock whilst + * having buffers reserved in the aperture and so causing spurious + * ENOSPC for random operations. + */ + +err: + free(reloc); + free(reloc_offset); + return ret; +} + +static int +i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips) +{ + u32 plane, flip_mask; + int ret; + + /* Check for any pending flips. As we only maintain a flip queue depth + * of 1, we can simply insert a WAIT for the next display flip prior + * to executing the batch and avoid stalling the CPU. + */ + + for (plane = 0; flips >> plane; plane++) { + if (((flips >> plane) & 1) == 0) + continue; + + if (plane) + flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; + else + flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; + + ret = intel_ring_begin(ring, 2); + if (ret) + return ret; + + intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + } + + return 0; +} + +static int +i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, + struct list_head *objects) +{ + struct drm_i915_gem_object *obj; + uint32_t flush_domains = 0; + uint32_t flips = 0; + int ret; + + list_for_each_entry(obj, objects, exec_list) { + ret = i915_gem_object_sync(obj, ring); + if (ret) + return ret; + + if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) + i915_gem_clflush_object(obj); + + if (obj->base.pending_write_domain) + flips |= atomic_read(&obj->pending_flip); + + flush_domains |= obj->base.write_domain; + } + + if (flips) { + ret = i915_gem_execbuffer_wait_for_flips(ring, flips); + if (ret) + return ret; + } + + if (flush_domains & I915_GEM_DOMAIN_CPU) + i915_gem_chipset_flush(ring->dev); + + if (flush_domains & I915_GEM_DOMAIN_GTT) + wmb(); + + /* Unconditionally invalidate gpu caches and ensure that we do flush + * any residual writes from the previous batch. + */ + return intel_ring_invalidate_all_caches(ring); +} + +static bool +i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) +{ + return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0; +} + +static int +validate_exec_list(struct drm_i915_gem_exec_object2 *exec, + int count) +{ + int i; + + for (i = 0; i < count; i++) { + char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr; + int length; /* limited by fault_in_pages_readable() */ + + /* First check for malicious input causing overflow */ + if (exec[i].relocation_count > + INT_MAX / sizeof(struct drm_i915_gem_relocation_entry)) + return -EINVAL; + + length = exec[i].relocation_count * + sizeof(struct drm_i915_gem_relocation_entry); +// if (!access_ok(VERIFY_READ, ptr, length)) +// return -EFAULT; + + /* we may also need to update the presumed offsets */ +// if (!access_ok(VERIFY_WRITE, ptr, length)) +// return -EFAULT; + +// if (fault_in_multipages_readable(ptr, length)) +// return -EFAULT; + } + + return 0; +} + +static void +i915_gem_execbuffer_move_to_active(struct list_head *objects, + struct intel_ring_buffer *ring) +{ + struct drm_i915_gem_object *obj; + + list_for_each_entry(obj, objects, exec_list) { + u32 old_read = obj->base.read_domains; + u32 old_write = obj->base.write_domain; + + obj->base.read_domains = obj->base.pending_read_domains; + obj->base.write_domain = obj->base.pending_write_domain; + obj->fenced_gpu_access = obj->pending_fenced_gpu_access; + + i915_gem_object_move_to_active(obj, ring); + if (obj->base.write_domain) { + obj->dirty = 1; + obj->last_write_seqno = intel_ring_get_seqno(ring); + if (obj->pin_count) /* check for potential scanout */ + intel_mark_fb_busy(obj); + } + + trace_i915_gem_object_change_domain(obj, old_read, old_write); + } +} + +static void +i915_gem_execbuffer_retire_commands(struct drm_device *dev, + struct drm_file *file, + struct intel_ring_buffer *ring) +{ + /* Unconditionally force add_request to emit a full flush. */ + ring->gpu_caches_dirty = true; + + /* Add a breadcrumb for the completion of the batch buffer */ + (void)i915_add_request(ring, file, NULL); +} + +static int +i915_reset_gen7_sol_offsets(struct drm_device *dev, + struct intel_ring_buffer *ring) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + int ret, i; + + if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) + return 0; + + ret = intel_ring_begin(ring, 4 * 3); + if (ret) + return ret; + + for (i = 0; i < 4; i++) { + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i)); + intel_ring_emit(ring, 0); + } + + intel_ring_advance(ring); + + return 0; +} + +static int +i915_gem_do_execbuffer(struct drm_device *dev, void *data, + struct drm_file *file, + struct drm_i915_gem_execbuffer2 *args, + struct drm_i915_gem_exec_object2 *exec) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + struct list_head objects; + struct eb_objects *eb; + struct drm_i915_gem_object *batch_obj; + struct drm_clip_rect *cliprects = NULL; + struct intel_ring_buffer *ring; + u32 ctx_id = i915_execbuffer2_get_context_id(*args); + u32 exec_start, exec_len; + u32 mask; + u32 flags; + int ret, mode, i; + + if (!i915_gem_check_execbuffer(args)) { + DRM_DEBUG("execbuf with invalid offset/length\n"); + return -EINVAL; + } + + ret = validate_exec_list(exec, args->buffer_count); + if (ret) + return ret; + + flags = 0; + if (args->flags & I915_EXEC_SECURE) { +// if (!file->is_master || !capable(CAP_SYS_ADMIN)) +// return -EPERM; + + flags |= I915_DISPATCH_SECURE; + } + if (args->flags & I915_EXEC_IS_PINNED) + flags |= I915_DISPATCH_PINNED; + + switch (args->flags & I915_EXEC_RING_MASK) { + case I915_EXEC_DEFAULT: + case I915_EXEC_RENDER: + ring = &dev_priv->ring[RCS]; + break; + case I915_EXEC_BSD: + ring = &dev_priv->ring[VCS]; + if (ctx_id != 0) { + DRM_DEBUG("Ring %s doesn't support contexts\n", + ring->name); + return -EPERM; + } + break; + case I915_EXEC_BLT: + ring = &dev_priv->ring[BCS]; + if (ctx_id != 0) { + DRM_DEBUG("Ring %s doesn't support contexts\n", + ring->name); + return -EPERM; + } + break; + default: + DRM_DEBUG("execbuf with unknown ring: %d\n", + (int)(args->flags & I915_EXEC_RING_MASK)); + return -EINVAL; + } + if (!intel_ring_initialized(ring)) { + DRM_DEBUG("execbuf with invalid ring: %d\n", + (int)(args->flags & I915_EXEC_RING_MASK)); + return -EINVAL; + } + + mode = args->flags & I915_EXEC_CONSTANTS_MASK; + mask = I915_EXEC_CONSTANTS_MASK; + switch (mode) { + case I915_EXEC_CONSTANTS_REL_GENERAL: + case I915_EXEC_CONSTANTS_ABSOLUTE: + case I915_EXEC_CONSTANTS_REL_SURFACE: + if (ring == &dev_priv->ring[RCS] && + mode != dev_priv->relative_constants_mode) { + if (INTEL_INFO(dev)->gen < 4) + return -EINVAL; + + if (INTEL_INFO(dev)->gen > 5 && + mode == I915_EXEC_CONSTANTS_REL_SURFACE) + return -EINVAL; + + /* The HW changed the meaning on this bit on gen6 */ + if (INTEL_INFO(dev)->gen >= 6) + mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; + } + break; + default: + DRM_DEBUG("execbuf with unknown constants: %d\n", mode); + return -EINVAL; + } + + if (args->buffer_count < 1) { + DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); + return -EINVAL; + } + + if (args->num_cliprects != 0) { + if (ring != &dev_priv->ring[RCS]) { + DRM_DEBUG("clip rectangles are only valid with the render ring\n"); + return -EINVAL; + } + + if (INTEL_INFO(dev)->gen >= 5) { + DRM_DEBUG("clip rectangles are only valid on pre-gen5\n"); + return -EINVAL; + } + + if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) { + DRM_DEBUG("execbuf with %u cliprects\n", + args->num_cliprects); + return -EINVAL; + } + + cliprects = kmalloc(args->num_cliprects * sizeof(*cliprects), + GFP_KERNEL); + if (cliprects == NULL) { + ret = -ENOMEM; + goto pre_mutex_err; + } + + if (copy_from_user(cliprects, + (struct drm_clip_rect __user *)(uintptr_t) + args->cliprects_ptr, + sizeof(*cliprects)*args->num_cliprects)) { + ret = -EFAULT; + goto pre_mutex_err; + } + } + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + goto pre_mutex_err; + + if (dev_priv->mm.suspended) { + mutex_unlock(&dev->struct_mutex); + ret = -EBUSY; + goto pre_mutex_err; + } + + eb = eb_create(args->buffer_count); + if (eb == NULL) { + mutex_unlock(&dev->struct_mutex); + ret = -ENOMEM; + goto pre_mutex_err; + } + + /* Look up object handles */ + INIT_LIST_HEAD(&objects); + for (i = 0; i < args->buffer_count; i++) { + struct drm_i915_gem_object *obj; + + if(exec[i].handle == -2) + obj = get_fb_obj(); + else + obj = to_intel_bo(drm_gem_object_lookup(dev, file, + exec[i].handle)); + if (&obj->base == NULL) { + DRM_DEBUG("Invalid object handle %d at index %d\n", + exec[i].handle, i); + /* prevent error path from reading uninitialized data */ + ret = -ENOENT; + goto err; + } + + if (!list_empty(&obj->exec_list)) { + DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", + obj, exec[i].handle, i); + ret = -EINVAL; + goto err; + } + + list_add_tail(&obj->exec_list, &objects); + obj->exec_handle = exec[i].handle; + obj->exec_entry = &exec[i]; + eb_add_object(eb, obj); + } + + /* take note of the batch buffer before we might reorder the lists */ + batch_obj = list_entry(objects.prev, + struct drm_i915_gem_object, + exec_list); + + /* Move the objects en-masse into the GTT, evicting if necessary. */ + ret = i915_gem_execbuffer_reserve(ring, file, &objects); + if (ret) + goto err; + + /* The objects are in their final locations, apply the relocations. */ + ret = i915_gem_execbuffer_relocate(dev, eb, &objects); + if (ret) { + if (ret == -EFAULT) { + ret = i915_gem_execbuffer_relocate_slow(dev, file, ring, + &objects, eb, + exec, + args->buffer_count); + BUG_ON(!mutex_is_locked(&dev->struct_mutex)); + } + if (ret) + goto err; + } + + /* Set the pending read domains for the batch buffer to COMMAND */ + if (batch_obj->base.pending_write_domain) { + DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); + ret = -EINVAL; + goto err; + } + batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; + + /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure + * batch" bit. Hence we need to pin secure batches into the global gtt. + * hsw should have this fixed, but let's be paranoid and do it + * unconditionally for now. */ + if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping) + i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level); + + ret = i915_gem_execbuffer_move_to_gpu(ring, &objects); + if (ret) + goto err; + + ret = i915_switch_context(ring, file, ctx_id); + if (ret) + goto err; + + if (ring == &dev_priv->ring[RCS] && + mode != dev_priv->relative_constants_mode) { + ret = intel_ring_begin(ring, 4); + if (ret) + goto err; + + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, INSTPM); + intel_ring_emit(ring, mask << 16 | mode); + intel_ring_advance(ring); + + dev_priv->relative_constants_mode = mode; + } + + if (args->flags & I915_EXEC_GEN7_SOL_RESET) { + ret = i915_reset_gen7_sol_offsets(dev, ring); + if (ret) + goto err; + } + + exec_start = batch_obj->gtt_offset + args->batch_start_offset; + exec_len = args->batch_len; + if (cliprects) { +// for (i = 0; i < args->num_cliprects; i++) { +// ret = i915_emit_box(dev, &cliprects[i], +// args->DR1, args->DR4); +// if (ret) +// goto err; + +// ret = ring->dispatch_execbuffer(ring, +// exec_start, exec_len, +// flags); +// if (ret) +// goto err; +// } + } else { + ret = ring->dispatch_execbuffer(ring, + exec_start, exec_len, + flags); + if (ret) + goto err; + } + +// i915_gem_execbuffer_move_to_active(&objects, ring); +// i915_gem_execbuffer_retire_commands(dev, file, ring); + ring->gpu_caches_dirty = true; + intel_ring_flush_all_caches(ring); + +err: + eb_destroy(eb); + while (!list_empty(&objects)) { + struct drm_i915_gem_object *obj; + + obj = list_first_entry(&objects, + struct drm_i915_gem_object, + exec_list); + list_del_init(&obj->exec_list); + drm_gem_object_unreference(&obj->base); + } + + mutex_unlock(&dev->struct_mutex); + +pre_mutex_err: + kfree(cliprects); + return ret; +} + + +int +i915_gem_execbuffer2(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_gem_execbuffer2 *args = data; + struct drm_i915_gem_exec_object2 *exec2_list = NULL; + int ret; + + if (args->buffer_count < 1 || + args->buffer_count > UINT_MAX / sizeof(*exec2_list)) { + DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); + return -EINVAL; + } + + exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count, 0); + if (exec2_list == NULL) + exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count); + if (exec2_list == NULL) { + DRM_DEBUG("Failed to allocate exec list for %d buffers\n", + args->buffer_count); + return -ENOMEM; + } + ret = copy_from_user(exec2_list, + (struct drm_i915_relocation_entry __user *) + (uintptr_t) args->buffers_ptr, + sizeof(*exec2_list) * args->buffer_count); + if (ret != 0) { + DRM_DEBUG("copy %d exec entries failed %d\n", + args->buffer_count, ret); + free(exec2_list); + return -EFAULT; + } + + ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); + if (!ret) { + /* Copy the new buffer offsets back to the user's exec list. */ + ret = copy_to_user((void __user *)(uintptr_t)args->buffers_ptr, + exec2_list, + sizeof(*exec2_list) * args->buffer_count); + if (ret) { + ret = -EFAULT; + DRM_DEBUG("failed to copy %d exec entries " + "back to user (%d)\n", + args->buffer_count, ret); + } + } + + free(exec2_list); + return ret; +} diff --git a/drivers/video/drm/i915/i915_trace.h b/drivers/video/drm/i915/i915_trace.h index c94176ac21..3602da0544 100644 --- a/drivers/video/drm/i915/i915_trace.h +++ b/drivers/video/drm/i915/i915_trace.h @@ -23,5 +23,7 @@ #define trace_i915_reg_rw(a, b, c, d) #define trace_i915_ring_wait_begin(a) #define trace_i915_gem_object_pwrite(a, b, c) +#define trace_i915_gem_request_add(a, b) +#define trace_i915_gem_ring_dispatch(a, b, c); #endif diff --git a/drivers/video/drm/i915/intel_display.c b/drivers/video/drm/i915/intel_display.c index b88655f601..c433bec063 100644 --- a/drivers/video/drm/i915/intel_display.c +++ b/drivers/video/drm/i915/intel_display.c @@ -6985,6 +6985,8 @@ void intel_mark_fb_busy(struct drm_i915_gem_object *obj) struct drm_device *dev = obj->base.dev; struct drm_crtc *crtc; + ENTER(); + if (!i915_powersave) return; @@ -7002,6 +7004,8 @@ void intel_mark_fb_idle(struct drm_i915_gem_object *obj) struct drm_device *dev = obj->base.dev; struct drm_crtc *crtc; + ENTER(); + if (!i915_powersave) return; diff --git a/drivers/video/drm/i915/intel_fb.c b/drivers/video/drm/i915/intel_fb.c index 1e18b04fcd..09f9c49804 100644 --- a/drivers/video/drm/i915/intel_fb.c +++ b/drivers/video/drm/i915/intel_fb.c @@ -43,6 +43,12 @@ #include #include "i915_drv.h" +static struct drm_i915_gem_object *fb_obj; + +struct drm_i915_gem_object *get_fb_obj() +{ + return fb_obj; +}; struct fb_info *framebuffer_alloc(size_t size, struct device *dev) { @@ -144,6 +150,10 @@ static int intelfb_create(struct intel_fbdev *ifbdev, obj->gtt_space = &lfb_vm_node; obj->gtt_offset = 0; obj->pin_count = 2; + obj->cache_level = I915_CACHE_NONE; + obj->base.write_domain = 0; + obj->base.read_domains = I915_GEM_DOMAIN_GTT; + } /***********************************************************************/ @@ -182,7 +192,7 @@ static int intelfb_create(struct intel_fbdev *ifbdev, info->fix.smem_start = dev->mode_config.fb_base + obj->gtt_offset; info->fix.smem_len = size; - info->screen_base = 0xFE000000; + info->screen_base = (void*) 0xFE000000; info->screen_size = size; // memset(info->screen_base, 0, size); @@ -200,6 +210,8 @@ static int intelfb_create(struct intel_fbdev *ifbdev, mutex_unlock(&dev->struct_mutex); // vga_switcheroo_client_fb_set(dev->pdev, info); + fb_obj = obj; + return 0; out_unpin: diff --git a/drivers/video/drm/i915/kms_display.c b/drivers/video/drm/i915/kms_display.c index 300f297556..6289d4f5be 100644 --- a/drivers/video/drm/i915/kms_display.c +++ b/drivers/video/drm/i915/kms_display.c @@ -624,6 +624,23 @@ cursor_t* __stdcall select_cursor_kms(cursor_t *cursor) return old; }; +struct sna_fb +{ + uint32_t width; + uint32_t height; + uint32_t pitch; + uint32_t tiling; +}; + +int i915_fbinfo(struct sna_fb *fb) +{ + fb->width = os_display->width; + fb->height = os_display->height; + fb->pitch = os_display->pitch; + fb->tiling = 0; + + return 0; +}; #ifdef __HWA__ diff --git a/drivers/video/drm/i915/main.c b/drivers/video/drm/i915/main.c index a47857bc53..d2e6d04db2 100644 --- a/drivers/video/drm/i915/main.c +++ b/drivers/video/drm/i915/main.c @@ -70,8 +70,8 @@ u32_t drvEntry(int action, char *cmdline) if(!dbg_open(log)) { -// strcpy(log, "/tmp1/1/i915.log"); - strcpy(log, "/RD/1/DRIVERS/i915.log"); + strcpy(log, "/tmp1/1/i915.log"); +// strcpy(log, "/RD/1/DRIVERS/i915.log"); if(!dbg_open(log)) { @@ -132,6 +132,12 @@ u32_t drvEntry(int action, char *cmdline) #define SRV_I915_GEM_PWRITE 27 #define SRV_I915_GEM_BUSY 28 #define SRV_I915_GEM_SET_DOMAIN 29 +#define SRV_I915_GEM_MMAP 30 + +#define SRV_I915_GEM_THROTTLE 32 +#define SRV_FBINFO 33 +#define SRV_I915_GEM_EXECBUFFER2 34 + #define check_input(size) \ @@ -247,6 +253,22 @@ int _stdcall display_handler(ioctl_t *io) retval = i915_gem_set_domain_ioctl(main_device, inp, file); break; + case SRV_I915_GEM_THROTTLE: + retval = i915_gem_throttle_ioctl(main_device, inp, file); + break; + + case SRV_I915_GEM_MMAP: + retval = i915_gem_mmap_ioctl(main_device, inp, file); + break; + + case SRV_FBINFO: + retval = i915_fbinfo(inp); + break; + + case SRV_I915_GEM_EXECBUFFER2: + retval = i915_gem_execbuffer2(main_device, inp, file); + break; + }; return retval; diff --git a/drivers/video/drm/i915/utils.c b/drivers/video/drm/i915/utils.c index 2af5a352ce..77d7890779 100644 --- a/drivers/video/drm/i915/utils.c +++ b/drivers/video/drm/i915/utils.c @@ -1,3 +1,5 @@ +#include +#include #include #include #include "i915_drv.h" @@ -57,3 +59,39 @@ struct page *shmem_read_mapping_page_gfp(struct file *filep, return page; }; + +unsigned long vm_mmap(struct file *file, unsigned long addr, + unsigned long len, unsigned long prot, + unsigned long flag, unsigned long offset) +{ + char *mem, *ptr; + int i; + + if (unlikely(offset + PAGE_ALIGN(len) < offset)) + return -EINVAL; + if (unlikely(offset & ~PAGE_MASK)) + return -EINVAL; + + mem = UserAlloc(len); + if(unlikely(mem == NULL)) + return -ENOMEM; + + for(i = offset, ptr = mem; i < offset+len; i+= 4096, ptr+= 4096) + { + struct page *page; + + page = shmem_read_mapping_page_gfp(file, i/PAGE_SIZE,0); + + if (unlikely(IS_ERR(page))) + goto err; + + MapPage(ptr, (addr_t)page, PG_SHARED|PG_UW); + } + + return (unsigned long)mem; +err: + UserFree(mem); + return -ENOMEM; +}; + +