From de8b1b257a3101d744f55525a6918e42b65145e2 Mon Sep 17 00:00:00 2001 From: "Sergey Semyonov (Serge)" Date: Mon, 18 Nov 2013 11:28:53 +0000 Subject: [PATCH] intel-2D: sna-2.99.902 git-svn-id: svn://kolibrios.org@4251 a494cfbc-eb01-0410-851d-a64ba20cac60 --- drivers/video/Intel-2D/brw/brw_eu.h | 2 - drivers/video/Intel-2D/compiler.h | 33 +- drivers/video/Intel-2D/gen3_render.c | 1255 ++- drivers/video/Intel-2D/gen4_render.c | 1168 ++- drivers/video/Intel-2D/gen4_vertex.c | 1570 +++- drivers/video/Intel-2D/gen4_vertex.h | 3 +- drivers/video/Intel-2D/gen5_render.c | 1195 ++- drivers/video/Intel-2D/gen6_render.c | 7093 +++++++++-------- drivers/video/Intel-2D/gen7_render.c | 969 ++- drivers/video/Intel-2D/gen7_render.h | 2 + drivers/video/Intel-2D/i915_pciids.h | 211 + drivers/video/Intel-2D/intel_driver.h | 246 +- drivers/video/Intel-2D/intel_list.h | 1 + drivers/video/Intel-2D/{kgem-sna.c => kgem.c} | 1501 +++- drivers/video/Intel-2D/kgem.h | 154 +- drivers/video/Intel-2D/kgem_debug.c | 27 - drivers/video/Intel-2D/kgem_debug_gen3.c | 1599 ++++ drivers/video/Intel-2D/kgem_debug_gen4.c | 687 ++ drivers/video/Intel-2D/kgem_debug_gen5.c | 662 ++ drivers/video/Intel-2D/kgem_debug_gen6.c | 2 - drivers/video/Intel-2D/kgem_debug_gen7.c | 715 ++ drivers/video/Intel-2D/sna.c | 536 +- drivers/video/Intel-2D/sna.h | 286 +- drivers/video/Intel-2D/sna_cpu.c | 116 + drivers/video/Intel-2D/sna_cpuid.h | 86 + drivers/video/Intel-2D/sna_render.h | 1515 ++-- drivers/video/Intel-2D/sna_render_inline.h | 15 +- drivers/video/Intel-2D/sna_transform.c | 147 + 28 files changed, 16198 insertions(+), 5598 deletions(-) create mode 100644 drivers/video/Intel-2D/i915_pciids.h rename drivers/video/Intel-2D/{kgem-sna.c => kgem.c} (72%) create mode 100644 drivers/video/Intel-2D/kgem_debug_gen3.c create mode 100644 drivers/video/Intel-2D/kgem_debug_gen4.c create mode 100644 drivers/video/Intel-2D/kgem_debug_gen5.c create mode 100644 drivers/video/Intel-2D/kgem_debug_gen7.c create mode 100644 drivers/video/Intel-2D/sna_cpu.c create mode 100644 drivers/video/Intel-2D/sna_cpuid.h create mode 100644 drivers/video/Intel-2D/sna_transform.c diff --git a/drivers/video/Intel-2D/brw/brw_eu.h b/drivers/video/Intel-2D/brw/brw_eu.h index 0974760962..2525f15fce 100644 --- a/drivers/video/Intel-2D/brw/brw_eu.h +++ b/drivers/video/Intel-2D/brw/brw_eu.h @@ -2239,8 +2239,6 @@ void brw_CMP(struct brw_compile *p, struct brw_reg src0, struct brw_reg src1); -void brw_print_reg(struct brw_reg reg); - static inline void brw_math_invert(struct brw_compile *p, struct brw_reg dst, struct brw_reg src) diff --git a/drivers/video/Intel-2D/compiler.h b/drivers/video/Intel-2D/compiler.h index 23ec31c326..ff41217925 100644 --- a/drivers/video/Intel-2D/compiler.h +++ b/drivers/video/Intel-2D/compiler.h @@ -37,19 +37,46 @@ #define must_check __attribute__((warn_unused_result)) #define constant __attribute__((const)) #define pure __attribute__((pure)) -#define __packed__ __attribute__((__packed__)) +#define tightly_packed __attribute__((__packed__)) #define flatten __attribute__((flatten)) +#define page_aligned __attribute__((aligned(4096))) #else #define likely(expr) (expr) #define unlikely(expr) (expr) #define noinline -#define force_inline +#define force_inline inline #define fastcall #define must_check #define constant #define pure -#define __packed__ +#define tighly_packed #define flatten +#define page_aligned +#endif + +#define HAS_GCC(major, minor) defined(__GNUC__) && (__GNUC__ > (major) || __GNUC__ == (major) && __GNUC_MINOR__ >= (minor)) + +#if HAS_GCC(4, 5) +#define sse2 __attribute__((target("sse2,fpmath=sse"))) +#define sse4_2 __attribute__((target("sse4.2,sse2,fpmath=sse"))) +#endif + +#if HAS_GCC(4, 7) +#define avx2 __attribute__((target("avx2,sse4.2,sse2,fpmath=sse"))) +#endif + +#if HAS_GCC(4, 6) && defined(__OPTIMIZE__) +#define fast __attribute__((optimize("Ofast"))) +#else +#define fast +#endif + +#if HAS_GCC(4, 6) && defined(__OPTIMIZE__) +#define fast_memcpy __attribute__((optimize("Ofast"))) __attribute__((target("inline-all-stringops"))) +#elif HAS_GCC(4, 5) && defined(__OPTIMIZE__) +#define fast_memcpy __attribute__((target("inline-all-stringops"))) +#else +#define fast_memcpy #endif #ifdef HAVE_VALGRIND diff --git a/drivers/video/Intel-2D/gen3_render.c b/drivers/video/Intel-2D/gen3_render.c index 53717a5034..bd44f09c23 100644 --- a/drivers/video/Intel-2D/gen3_render.c +++ b/drivers/video/Intel-2D/gen3_render.c @@ -107,7 +107,15 @@ static const struct formatinfo { {PICT_a8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_ARGB8888, false}, {PICT_x8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_XRGB8888, false}, {PICT_a8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_ABGR8888, false}, - {PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, false} + {PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, false}, + {PICT_a2r10g10b10, PICT_x2r10g10b10, MAPSURF_32BIT | MT_32BIT_ARGB2101010, false}, + {PICT_a2b10g10r10, PICT_x2b10g10r10, MAPSURF_32BIT | MT_32BIT_ABGR2101010, false}, + {PICT_r5g6b5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, false}, + {PICT_b5g6r5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, true}, + {PICT_a1r5g5b5, PICT_x1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555, false}, + {PICT_a1b5g5r5, PICT_x1b5g5r5, MAPSURF_16BIT | MT_16BIT_ARGB1555, true}, + {PICT_a4r4g4b4, PICT_x4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444, false}, + {PICT_a4b4g4r4, PICT_x4b4g4r4, MAPSURF_16BIT | MT_16BIT_ARGB4444, true}, }; #define xFixedToDouble(f) pixman_fixed_to_double(f) @@ -187,7 +195,14 @@ static bool gen3_dst_rb_reversed(uint32_t format) switch (format) { case PICT_a8r8g8b8: case PICT_x8r8g8b8: + case PICT_r5g6b5: + case PICT_a1r5g5b5: + case PICT_x1r5g5b5: + case PICT_a2r10g10b10: + case PICT_x2r10g10b10: case PICT_a8: + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: return false; default: return true; @@ -207,13 +222,255 @@ static uint32_t gen3_get_dst_format(uint32_t format) case PICT_a8b8g8r8: case PICT_x8b8g8r8: return BIAS | COLR_BUF_ARGB8888; + case PICT_r5g6b5: + case PICT_b5g6r5: + return BIAS | COLR_BUF_RGB565; + case PICT_a1r5g5b5: + case PICT_x1r5g5b5: + case PICT_a1b5g5r5: + case PICT_x1b5g5r5: + return BIAS | COLR_BUF_ARGB1555; + case PICT_a2r10g10b10: + case PICT_x2r10g10b10: + case PICT_a2b10g10r10: + case PICT_x2b10g10r10: + return BIAS | COLR_BUF_ARGB2AAA; case PICT_a8: return BIAS | COLR_BUF_8BIT; + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + case PICT_a4b4g4r4: + case PICT_x4b4g4r4: + return BIAS | COLR_BUF_ARGB4444; } #undef BIAS } +#if 0 +static bool gen3_check_repeat(PicturePtr p) +{ + if (!p->repeat) + return true; + + switch (p->repeatType) { + case RepeatNone: + case RepeatNormal: + case RepeatPad: + case RepeatReflect: + return true; + default: + return false; + } +} + +static uint32_t gen3_filter(uint32_t filter) +{ + switch (filter) { + default: + assert(0); + case PictFilterNearest: + return (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT | + FILTER_NEAREST << SS2_MIN_FILTER_SHIFT | + MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT); + case PictFilterBilinear: + return (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT | + FILTER_LINEAR << SS2_MIN_FILTER_SHIFT | + MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT); + } +} + +static bool gen3_check_filter(PicturePtr p) +{ + switch (p->filter) { + case PictFilterNearest: + case PictFilterBilinear: + return true; + default: + return false; + } +} +fastcall static void +gen3_emit_composite_primitive_identity_gradient(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + int16_t dst_x, dst_y; + int16_t src_x, src_y; + + dst_x = r->dst.x + op->dst.x; + dst_y = r->dst.y + op->dst.y; + src_x = r->src.x + op->src.offset[0]; + src_y = r->src.y + op->src.offset[1]; + + gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); + OUT_VERTEX(src_x + r->width); + OUT_VERTEX(src_y + r->height); + + gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); + OUT_VERTEX(src_x); + OUT_VERTEX(src_y + r->height); + + gen3_emit_composite_dstcoord(sna, dst_x, dst_y); + OUT_VERTEX(src_x); + OUT_VERTEX(src_y); +} + +fastcall static void +gen3_emit_composite_boxes_identity_gradient(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + do { + v[0] = box->x2; + v[1] = box->y2; + v[2] = box->x2 + op->src.offset[0]; + v[3] = box->y2 + op->src.offset[1]; + + v[4] = box->x1; + v[5] = box->y2; + v[6] = box->x1 + op->src.offset[0]; + v[7] = box->y2 + op->src.offset[1]; + + v[8] = box->x1; + v[9] = box->y1; + v[10] = box->x1 + op->src.offset[0]; + v[11] = box->y1 + op->src.offset[1]; + + v += 12; + box++; + } while (--nbox); +} +fastcall static void +gen3_emit_composite_boxes_affine_gradient(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + const PictTransform *transform = op->src.transform; + + do { + v[0] = box->x2; + v[1] = box->y2; + _sna_get_transformed_scaled(box->x2 + op->src.offset[0], + box->y2 + op->src.offset[1], + transform, op->src.scale, + &v[2], &v[3]); + + v[4] = box->x1; + v[5] = box->y2; + _sna_get_transformed_scaled(box->x1 + op->src.offset[0], + box->y2 + op->src.offset[1], + transform, op->src.scale, + &v[6], &v[7]); + + v[8] = box->x1; + v[9] = box->y1; + _sna_get_transformed_scaled(box->x1 + op->src.offset[0], + box->y1 + op->src.offset[1], + transform, op->src.scale, + &v[10], &v[11]); + + box++; + v += 12; + } while (--nbox); +} + +fastcall static void +gen3_emit_composite_primitive_identity_source(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float w = r->width; + float h = r->height; + float *v; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; + + v[8] = v[4] = r->dst.x + op->dst.x; + v[0] = v[4] + w; + + v[9] = r->dst.y + op->dst.y; + v[5] = v[1] = v[9] + h; + + v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; + v[2] = v[6] + w * op->src.scale[0]; + + v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; + v[7] = v[3] = v[11] + h * op->src.scale[1]; +} + +fastcall static void +gen3_emit_composite_boxes_identity_source(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + do { + v[0] = box->x2 + op->dst.x; + v[8] = v[4] = box->x1 + op->dst.x; + v[5] = v[1] = box->y2 + op->dst.y; + v[9] = box->y1 + op->dst.y; + + v[10] = v[6] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; + v[2] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; + + v[11] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; + v[7] = v[3] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; + + v += 12; + box++; + } while (--nbox); +} + +fastcall static void +gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float w = r->width; + float h = r->height; + float *v; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; + + v[8] = v[4] = r->dst.x; + v[9] = r->dst.y; + + v[0] = v[4] + w; + v[5] = v[1] = v[9] + h; + + v[10] = v[6] = r->src.x * op->src.scale[0]; + v[11] = r->src.y * op->src.scale[1]; + + v[2] = v[6] + w * op->src.scale[0]; + v[7] = v[3] = v[11] + h * op->src.scale[1]; +} +fastcall static void +gen3_emit_composite_primitive_constant_identity_mask(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float w = r->width; + float h = r->height; + float *v; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; + + v[8] = v[4] = r->dst.x + op->dst.x; + v[0] = v[4] + w; + + v[9] = r->dst.y + op->dst.y; + v[5] = v[1] = v[9] + h; + + v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0]; + v[2] = v[6] + w * op->mask.scale[0]; + + v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1]; + v[7] = v[3] = v[11] + h * op->mask.scale[1]; +} +#endif fastcall static void gen3_emit_composite_primitive_identity_source_mask(struct sna *sna, @@ -884,6 +1141,7 @@ static void gen3_emit_target(struct sna *sna, state->current_dst = bo->unique_id; } + assert(bo->exec); kgem_bo_mark_dirty(bo); } @@ -938,6 +1196,7 @@ static void gen3_emit_composite_state(struct sna *sna, ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT); ss2 |= S2_TEXCOORD_FMT(tex_count, op->src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D); + assert(op->src.card_format); map[tex_count * 2 + 0] = op->src.card_format | gen3_ms_tiling(op->src.bo->tiling) | @@ -973,6 +1232,7 @@ static void gen3_emit_composite_state(struct sna *sna, ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT); ss2 |= S2_TEXCOORD_FMT(tex_count, op->mask.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D); + assert(op->mask.card_format); map[tex_count * 2 + 0] = op->mask.card_format | gen3_ms_tiling(op->mask.bo->tiling) | @@ -1357,8 +1617,8 @@ start: goto start; } - assert(op->floats_per_rect >= vertex_space(sna)); assert(rem <= vertex_space(sna)); + assert(op->floats_per_rect <= rem); if (want > 1 && want * op->floats_per_rect > rem) want = rem / op->floats_per_rect; sna->render.vertex_index += 3*want; @@ -1494,63 +1754,173 @@ static bool gen3_composite_channel_set_format(struct sna_composite_channel *chan return false; } +#if 0 +static int +gen3_composite_picture(struct sna *sna, + PicturePtr picture, + struct sna_composite_op *op, + struct sna_composite_channel *channel, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y, + bool precise) +{ + PixmapPtr pixmap; + uint32_t color; + int16_t dx, dy; + DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", + __FUNCTION__, x, y, w, h, dst_x, dst_y)); + channel->card_format = 0; + if (picture->pDrawable == NULL) { + SourcePict *source = picture->pSourcePict; + int ret = -1; + switch (source->type) { + case SourcePictTypeSolidFill: + DBG(("%s: solid fill [%08x], format %08x\n", + __FUNCTION__, + (unsigned)source->solidFill.color, + (unsigned)picture->format)); + ret = gen3_init_solid(channel, source->solidFill.color); + break; + case SourcePictTypeLinear: + ret = gen3_init_linear(sna, picture, op, channel, + x - dst_x, y - dst_y); + break; + case SourcePictTypeRadial: + ret = gen3_init_radial(sna, picture, op, channel, + x - dst_x, y - dst_y); + break; + } + if (ret == -1) { + if (!precise) + ret = sna_render_picture_approximate_gradient(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + if (ret == -1) + ret = sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + } + return ret; + } + if (picture->alphaMap) { + DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + } + if (sna_picture_is_solid(picture, &color)) { + DBG(("%s: solid drawable [%08x]\n", __FUNCTION__, color)); + return gen3_init_solid(channel, color); + } + if (sna_picture_is_clear(picture, x, y, w, h, &color)) { + DBG(("%s: clear drawable [%08x]\n", __FUNCTION__, color)); + return gen3_init_solid(channel, color_convert(color, picture->format, PICT_a8r8g8b8)); + } + if (!gen3_check_repeat(picture)) + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + if (!gen3_check_filter(picture)) + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; + channel->filter = picture->filter; + channel->pict_format = picture->format; + pixmap = get_drawable_pixmap(picture->pDrawable); + get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); + x += dx + picture->pDrawable->x; + y += dy + picture->pDrawable->y; + if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) { + DBG(("%s: integer translation (%d, %d), removing\n", + __FUNCTION__, dx, dy)); + x += dx; + y += dy; + channel->transform = NULL; + channel->filter = PictFilterNearest; + } else { + channel->transform = picture->transform; + channel->is_affine = sna_transform_is_affine(picture->transform); + } + if (!gen3_composite_channel_set_format(channel, picture->format) && + !gen3_composite_channel_set_xformat(picture, channel, x, y, w, h)) + return sna_render_picture_convert(sna, picture, channel, pixmap, + x, y, w, h, dst_x, dst_y, + false); + assert(channel->card_format); + if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { + DBG(("%s: pixmap too large (%dx%d), extracting (%d, %d)x(%d,%d)\n", + __FUNCTION__, + pixmap->drawable.width, pixmap->drawable.height, + x, y, w, h)); + return sna_render_picture_extract(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + } + return sna_render_pixmap_bo(sna, channel, pixmap, + x, y, w, h, dst_x, dst_y); +} +static inline bool +source_use_blt(struct sna *sna, PicturePtr picture) +{ + /* If it is a solid, try to use the BLT paths */ + if (!picture->pDrawable) + return picture->pSourcePict->type == SourcePictTypeSolidFill; + if (picture->pDrawable->width == 1 && + picture->pDrawable->height == 1 && + picture->repeat) + return true; + if (too_large(picture->pDrawable->width, picture->pDrawable->height)) + return true; + return !is_gpu(sna, picture->pDrawable, PREFER_GPU_RENDER); +} +static bool +try_blt(struct sna *sna, + PicturePtr dst, + PicturePtr src, + int width, int height) +{ + if (sna->kgem.mode != KGEM_RENDER) { + DBG(("%s: already performing BLT\n", __FUNCTION__)); + return true; + } + if (too_large(width, height)) { + DBG(("%s: operation too large for 3D pipe (%d, %d)\n", + __FUNCTION__, width, height)); + return true; + } + if (too_large(dst->pDrawable->width, dst->pDrawable->height)) { + DBG(("%s: target too large for 3D pipe (%d, %d)\n", + __FUNCTION__, + dst->pDrawable->width, dst->pDrawable->height)); + return true; + } - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + /* is the source picture only in cpu memory e.g. a shm pixmap? */ + return source_use_blt(sna, src); +} +#endif static void gen3_align_vertex(struct sna *sna, @@ -1572,144 +1942,6 @@ gen3_align_vertex(struct sna *sna, } } - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - static inline bool is_constant_ps(uint32_t type) { switch (type) { @@ -1724,11 +1956,472 @@ static inline bool is_constant_ps(uint32_t type) } } +#if 0 +static bool +gen3_composite_fallback(struct sna *sna, + uint8_t op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst) +{ + PixmapPtr src_pixmap; + PixmapPtr mask_pixmap; + PixmapPtr dst_pixmap; + bool src_fallback, mask_fallback; + + if (!gen3_check_dst_format(dst->format)) { + DBG(("%s: unknown destination format: %d\n", + __FUNCTION__, dst->format)); + return true; + } + + dst_pixmap = get_drawable_pixmap(dst->pDrawable); + + src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; + src_fallback = source_fallback(src, src_pixmap, + dst->polyMode == PolyModePrecise); + + if (mask) { + mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; + mask_fallback = source_fallback(mask, mask_pixmap, + dst->polyMode == PolyModePrecise); + } else { + mask_pixmap = NULL; + mask_fallback = false; + } + + /* If we are using the destination as a source and need to + * readback in order to upload the source, do it all + * on the cpu. + */ + if (src_pixmap == dst_pixmap && src_fallback) { + DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); + return true; + } + if (mask_pixmap == dst_pixmap && mask_fallback) { + DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); + return true; + } + + if (mask && + mask->componentAlpha && PICT_FORMAT_RGB(mask->format) && + gen3_blend_op[op].src_alpha && + gen3_blend_op[op].src_blend != BLENDFACT_ZERO && + op != PictOpOver) { + DBG(("%s: component-alpha mask with op=%d, should fallback\n", + __FUNCTION__, op)); + return true; + } + + /* If anything is on the GPU, push everything out to the GPU */ + if (dst_use_gpu(dst_pixmap)) { + DBG(("%s: dst is already on the GPU, try to use GPU\n", + __FUNCTION__)); + return false; + } + + if (src_pixmap && !src_fallback) { + DBG(("%s: src is already on the GPU, try to use GPU\n", + __FUNCTION__)); + return false; + } + if (mask_pixmap && !mask_fallback) { + DBG(("%s: mask is already on the GPU, try to use GPU\n", + __FUNCTION__)); + return false; + } + + /* However if the dst is not on the GPU and we need to + * render one of the sources using the CPU, we may + * as well do the entire operation in place onthe CPU. + */ + if (src_fallback) { + DBG(("%s: dst is on the CPU and src will fallback\n", + __FUNCTION__)); + return true; + } + + if (mask && mask_fallback) { + DBG(("%s: dst is on the CPU and mask will fallback\n", + __FUNCTION__)); + return true; + } + + if (too_large(dst_pixmap->drawable.width, + dst_pixmap->drawable.height) && + dst_is_cpu(dst_pixmap)) { + DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); + return true; + } + + DBG(("%s: dst is not on the GPU and the operation should not fallback: use-cpu? %d\n", + __FUNCTION__, dst_use_cpu(dst_pixmap))); + return dst_use_cpu(dst_pixmap); +} + +static bool +gen3_render_composite(struct sna *sna, + uint8_t op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t mask_x, int16_t mask_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_op *tmp) +{ + DBG(("%s()\n", __FUNCTION__)); + + if (op >= ARRAY_SIZE(gen3_blend_op)) { + DBG(("%s: fallback due to unhandled blend op: %d\n", + __FUNCTION__, op)); + return false; + } + + /* Try to use the BLT engine unless it implies a + * 3D -> 2D context switch. + */ + if (mask == NULL && + try_blt(sna, dst, src, width, height) && + sna_blt_composite(sna, + op, src, dst, + src_x, src_y, + dst_x, dst_y, + width, height, + tmp, false)) + return true; + + if (gen3_composite_fallback(sna, op, src, mask, dst)) + return false; + + if (need_tiling(sna, width, height)) + return sna_tiling_composite(op, src, mask, dst, + src_x, src_y, + mask_x, mask_y, + dst_x, dst_y, + width, height, + tmp); + + if (!gen3_composite_set_target(sna, tmp, dst, + dst_x, dst_y, width, height)) { + DBG(("%s: unable to set render target\n", + __FUNCTION__)); + return false; + } + + tmp->op = op; + tmp->rb_reversed = gen3_dst_rb_reversed(tmp->dst.format); + if (too_large(tmp->dst.width, tmp->dst.height) || + !gen3_check_pitch_3d(tmp->dst.bo)) { + if (!sna_render_composite_redirect(sna, tmp, + dst_x, dst_y, width, height, + op > PictOpSrc || dst->pCompositeClip->data)) + return false; + } + + tmp->u.gen3.num_constants = 0; + tmp->src.u.gen3.type = SHADER_TEXTURE; + tmp->src.is_affine = true; + DBG(("%s: preparing source\n", __FUNCTION__)); + switch (gen3_composite_picture(sna, src, tmp, &tmp->src, + src_x, src_y, + width, height, + dst_x, dst_y, + dst->polyMode == PolyModePrecise)) { + case -1: + goto cleanup_dst; + case 0: + tmp->src.u.gen3.type = SHADER_ZERO; + break; + case 1: + if (mask == NULL && tmp->src.bo && + sna_blt_composite__convert(sna, + dst_x, dst_y, width, height, + tmp)) + return true; + + gen3_composite_channel_convert(&tmp->src); + break; + } + DBG(("%s: source type=%d\n", __FUNCTION__, tmp->src.u.gen3.type)); + + tmp->mask.u.gen3.type = SHADER_NONE; + tmp->mask.is_affine = true; + tmp->need_magic_ca_pass = false; + tmp->has_component_alpha = false; + if (mask && tmp->src.u.gen3.type != SHADER_ZERO) { + if (!reuse_source(sna, + src, &tmp->src, src_x, src_y, + mask, &tmp->mask, mask_x, mask_y)) { + tmp->mask.u.gen3.type = SHADER_TEXTURE; + DBG(("%s: preparing mask\n", __FUNCTION__)); + switch (gen3_composite_picture(sna, mask, tmp, &tmp->mask, + mask_x, mask_y, + width, height, + dst_x, dst_y, + dst->polyMode == PolyModePrecise)) { + case -1: + goto cleanup_src; + case 0: + tmp->mask.u.gen3.type = SHADER_ZERO; + break; + case 1: + gen3_composite_channel_convert(&tmp->mask); + break; + } + } + DBG(("%s: mask type=%d\n", __FUNCTION__, tmp->mask.u.gen3.type)); + if (tmp->mask.u.gen3.type == SHADER_ZERO) { + if (tmp->src.bo) { + kgem_bo_destroy(&sna->kgem, + tmp->src.bo); + tmp->src.bo = NULL; + } + tmp->src.u.gen3.type = SHADER_ZERO; + tmp->mask.u.gen3.type = SHADER_NONE; + } + + if (tmp->mask.u.gen3.type != SHADER_NONE) { + if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { + /* Check if it's component alpha that relies on a source alpha + * and on the source value. We can only get one of those + * into the single source value that we get to blend with. + */ + DBG(("%s: component-alpha mask: %d\n", + __FUNCTION__, tmp->mask.u.gen3.type)); + tmp->has_component_alpha = true; + if (tmp->mask.u.gen3.type == SHADER_WHITE) { + tmp->mask.u.gen3.type = SHADER_NONE; + tmp->has_component_alpha = false; + } else if (gen3_blend_op[op].src_alpha && + gen3_blend_op[op].src_blend != BLENDFACT_ZERO) { + if (op != PictOpOver) + goto cleanup_mask; + + tmp->need_magic_ca_pass = true; + tmp->op = PictOpOutReverse; + } + } else { + if (tmp->mask.is_opaque) { + tmp->mask.u.gen3.type = SHADER_NONE; + } else if (is_constant_ps(tmp->src.u.gen3.type) && + is_constant_ps(tmp->mask.u.gen3.type)) { + uint32_t v; + + v = multa(tmp->src.u.gen3.mode, + tmp->mask.u.gen3.mode, + 24); + v |= multa(tmp->src.u.gen3.mode, + tmp->mask.u.gen3.mode, + 16); + v |= multa(tmp->src.u.gen3.mode, + tmp->mask.u.gen3.mode, + 8); + v |= multa(tmp->src.u.gen3.mode, + tmp->mask.u.gen3.mode, + 0); + + DBG(("%s: combining constant source/mask: %x x %x -> %x\n", + __FUNCTION__, + tmp->src.u.gen3.mode, + tmp->mask.u.gen3.mode, + v)); + + tmp->src.u.gen3.type = SHADER_CONSTANT; + tmp->src.u.gen3.mode = v; + tmp->src.is_opaque = false; + + tmp->mask.u.gen3.type = SHADER_NONE; + } + } + } + } + DBG(("%s: final src/mask type=%d/%d, affine=%d/%d\n", __FUNCTION__, + tmp->src.u.gen3.type, tmp->mask.u.gen3.type, + tmp->src.is_affine, tmp->mask.is_affine)); + + tmp->prim_emit = gen3_emit_composite_primitive; + if (is_constant_ps(tmp->mask.u.gen3.type)) { + switch (tmp->src.u.gen3.type) { + case SHADER_NONE: + case SHADER_ZERO: + case SHADER_BLACK: + case SHADER_WHITE: + case SHADER_CONSTANT: +#if defined(sse2) && !defined(__x86_64__) + if (sna->cpu_features & SSE2) { + tmp->prim_emit = gen3_emit_composite_primitive_constant__sse2; + tmp->emit_boxes = gen3_emit_composite_boxes_constant__sse2; + } else +#endif + { + tmp->prim_emit = gen3_emit_composite_primitive_constant; + tmp->emit_boxes = gen3_emit_composite_boxes_constant; + } + + break; + case SHADER_LINEAR: + case SHADER_RADIAL: + if (tmp->src.transform == NULL) { +#if defined(sse2) && !defined(__x86_64__) + if (sna->cpu_features & SSE2) { + tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient__sse2; + tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient__sse2; + } else +#endif + { + tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient; + tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient; + } + } else if (tmp->src.is_affine) { + tmp->src.scale[1] = tmp->src.scale[0] = 1. / tmp->src.transform->matrix[2][2]; +#if defined(sse2) && !defined(__x86_64__) + if (sna->cpu_features & SSE2) { + tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient__sse2; + tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient__sse2; + } else +#endif + { + tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient; + tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient; + } + } + break; + case SHADER_TEXTURE: + if (tmp->src.transform == NULL) { + if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0) { +#if defined(sse2) && !defined(__x86_64__) + if (sna->cpu_features & SSE2) { + tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset__sse2; + tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset__sse2; + } else +#endif + { + tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset; + tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset; + } + } else { +#if defined(sse2) && !defined(__x86_64__) + if (sna->cpu_features & SSE2) { + tmp->prim_emit = gen3_emit_composite_primitive_identity_source__sse2; + tmp->emit_boxes = gen3_emit_composite_boxes_identity_source__sse2; + } else +#endif + { + tmp->prim_emit = gen3_emit_composite_primitive_identity_source; + tmp->emit_boxes = gen3_emit_composite_boxes_identity_source; + } + } + } else if (tmp->src.is_affine) { + tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; + tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; +#if defined(sse2) && !defined(__x86_64__) + if (sna->cpu_features & SSE2) { + tmp->prim_emit = gen3_emit_composite_primitive_affine_source__sse2; + tmp->emit_boxes = gen3_emit_composite_boxes_affine_source__sse2; + } else +#endif + { + tmp->prim_emit = gen3_emit_composite_primitive_affine_source; + tmp->emit_boxes = gen3_emit_composite_boxes_affine_source; + } + } + break; + } + } else if (tmp->mask.u.gen3.type == SHADER_TEXTURE) { + if (tmp->mask.transform == NULL) { + if (is_constant_ps(tmp->src.u.gen3.type)) { + if ((tmp->mask.offset[0]|tmp->mask.offset[1]|tmp->dst.x|tmp->dst.y) == 0) { +#if defined(sse2) && !defined(__x86_64__) + if (sna->cpu_features & SSE2) { + tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset__sse2; + } else +#endif + { + tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset; + } + } else { +#if defined(sse2) && !defined(__x86_64__) + if (sna->cpu_features & SSE2) { + tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask__sse2; + } else +#endif + { + tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask; + } + } + } else if (tmp->src.transform == NULL) { +#if defined(sse2) && !defined(__x86_64__) + if (sna->cpu_features & SSE2) { + tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask__sse2; + } else +#endif + { + tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask; + } + } else if (tmp->src.is_affine) { + tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; + tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; +#if defined(sse2) && !defined(__x86_64__) + if (sna->cpu_features & SSE2) { + tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask__sse2; + } else +#endif + { + tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask; + } + } + } + } + tmp->floats_per_vertex = 2; + if (!is_constant_ps(tmp->src.u.gen3.type)) + tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 4; + if (!is_constant_ps(tmp->mask.u.gen3.type)) + tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 4; + DBG(("%s: floats_per_vertex = 2 + %d + %d = %d [specialised emitter? %d]\n", __FUNCTION__, + !is_constant_ps(tmp->src.u.gen3.type) ? tmp->src.is_affine ? 2 : 4 : 0, + !is_constant_ps(tmp->mask.u.gen3.type) ? tmp->mask.is_affine ? 2 : 4 : 0, + tmp->floats_per_vertex, + tmp->prim_emit != gen3_emit_composite_primitive)); + tmp->floats_per_rect = 3 * tmp->floats_per_vertex; + tmp->blt = gen3_render_composite_blt; + tmp->box = gen3_render_composite_box; + tmp->boxes = gen3_render_composite_boxes__blt; + if (tmp->emit_boxes) { + tmp->boxes = gen3_render_composite_boxes; + tmp->thread_boxes = gen3_render_composite_boxes__thread; + } + tmp->done = gen3_render_composite_done; + if (!kgem_check_bo(&sna->kgem, + tmp->dst.bo, tmp->src.bo, tmp->mask.bo, + NULL)) { + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, + tmp->dst.bo, tmp->src.bo, tmp->mask.bo, + NULL)) + goto cleanup_mask; + } + gen3_emit_composite_state(sna, tmp); + gen3_align_vertex(sna, tmp); + return true; +cleanup_mask: + if (tmp->mask.bo) + kgem_bo_destroy(&sna->kgem, tmp->mask.bo); +cleanup_src: + if (tmp->src.bo) + kgem_bo_destroy(&sna->kgem, tmp->src.bo); +cleanup_dst: + if (tmp->redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp->dst.bo); + return false; +} +#endif @@ -1832,6 +2525,192 @@ static inline bool is_constant_ps(uint32_t type) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +static void gen3_render_flush(struct sna *sna) +{ + gen3_vertex_close(sna); + + assert(sna->render.vertex_reloc[0] == 0); + assert(sna->render.vertex_offset == 0); +} + +static void +gen3_render_fini(struct sna *sna) +{ +} + +const char *gen3_render_init(struct sna *sna, const char *backend) +{ + struct sna_render *render = &sna->render; + +#if 0 +#if !NO_COMPOSITE + render->composite = gen3_render_composite; + render->prefer_gpu |= PREFER_GPU_RENDER; +#endif +#if !NO_COMPOSITE_SPANS + render->check_composite_spans = gen3_check_composite_spans; + render->composite_spans = gen3_render_composite_spans; + render->prefer_gpu |= PREFER_GPU_SPANS; +#endif + + render->video = gen3_render_video; + + render->copy_boxes = gen3_render_copy_boxes; + render->copy = gen3_render_copy; + + render->fill_boxes = gen3_render_fill_boxes; + render->fill = gen3_render_fill; + render->fill_one = gen3_render_fill_one; +#endif + + render->blit_tex = gen3_blit_tex; + render->caps = HW_BIT_BLIT | HW_TEX_BLIT; + + render->reset = gen3_render_reset; + render->flush = gen3_render_flush; + render->fini = gen3_render_fini; + + render->max_3d_size = MAX_3D_SIZE; + render->max_3d_pitch = MAX_3D_PITCH; + + sna->kgem.retire = gen3_render_retire; + sna->kgem.expire = gen3_render_expire; + return "Alviso (gen3)"; +} + static bool gen3_blit_tex(struct sna *sna, uint8_t op, bool scale, @@ -1933,39 +2812,3 @@ gen3_blit_tex(struct sna *sna, gen3_align_vertex(sna, tmp); return true; } - -static void gen3_render_flush(struct sna *sna) -{ - gen3_vertex_close(sna); - - assert(sna->render.vertex_reloc[0] == 0); - assert(sna->render.vertex_offset == 0); -} - -static void -gen3_render_fini(struct sna *sna) -{ -} - -bool gen3_render_init(struct sna *sna) -{ - struct sna_render *render = &sna->render; - - -// render->video = gen3_render_video; - - render->blit_tex = gen3_blit_tex; - - render->reset = gen3_render_reset; - render->flush = gen3_render_flush; - render->fini = gen3_render_fini; - - render->max_3d_size = MAX_3D_SIZE; - render->max_3d_pitch = MAX_3D_PITCH; - - render->caps = HW_BIT_BLIT | HW_TEX_BLIT; - - sna->kgem.retire = gen3_render_retire; - sna->kgem.expire = gen3_render_expire; - return true; -} diff --git a/drivers/video/Intel-2D/gen4_render.c b/drivers/video/Intel-2D/gen4_render.c index 1ccfafe238..5d61ee220e 100644 --- a/drivers/video/Intel-2D/gen4_render.c +++ b/drivers/video/Intel-2D/gen4_render.c @@ -51,6 +51,7 @@ */ #define FORCE_SPANS 0 #define FORCE_NONRECTILINEAR_SPANS -1 +#define FORCE_FLUSH 1 /* https://bugs.freedesktop.org/show_bug.cgi?id=55500 */ #define NO_COMPOSITE 0 #define NO_COMPOSITE_SPANS 0 @@ -61,6 +62,8 @@ #define NO_FILL_BOXES 0 #define NO_VIDEO 0 +#define MAX_FLUSH_VERTICES 6 + #define GEN4_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) /* Set up a default static partitioning of the URB, which is supposed to @@ -286,8 +289,24 @@ static uint32_t gen4_get_card_format(PictFormat format) return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM; case PICT_x8r8g8b8: return GEN4_SURFACEFORMAT_B8G8R8X8_UNORM; + case PICT_a8b8g8r8: + return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM; + case PICT_x8b8g8r8: + return GEN4_SURFACEFORMAT_R8G8B8X8_UNORM; + case PICT_a2r10g10b10: + return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM; + case PICT_x2r10g10b10: + return GEN4_SURFACEFORMAT_B10G10R10X2_UNORM; + case PICT_r8g8b8: + return GEN4_SURFACEFORMAT_R8G8B8_UNORM; + case PICT_r5g6b5: + return GEN4_SURFACEFORMAT_B5G6R5_UNORM; + case PICT_a1r5g5b5: + return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM; case PICT_a8: return GEN4_SURFACEFORMAT_A8_UNORM; + case PICT_a4r4g4b4: + return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM; } } @@ -299,8 +318,22 @@ static uint32_t gen4_get_dest_format(PictFormat format) case PICT_a8r8g8b8: case PICT_x8r8g8b8: return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM; + case PICT_a8b8g8r8: + case PICT_x8b8g8r8: + return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM; + case PICT_a2r10g10b10: + case PICT_x2r10g10b10: + return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM; + case PICT_r5g6b5: + return GEN4_SURFACEFORMAT_B5G6R5_UNORM; + case PICT_x1r5g5b5: + case PICT_a1r5g5b5: + return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM; case PICT_a8: return GEN4_SURFACEFORMAT_A8_UNORM; + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM; } } @@ -393,7 +426,7 @@ gen4_bind_bo(struct sna *sna, assert(sna->kgem.gen != 040 || !kgem_bo_is_snoop(bo)); /* After the first bind, we manage the cache domains within the batch */ - offset = kgem_bo_get_binding(bo, format); + offset = kgem_bo_get_binding(bo, format | is_dst << 31); if (offset) { if (is_dst) kgem_bo_mark_dirty(bo); @@ -408,9 +441,10 @@ gen4_bind_bo(struct sna *sna, GEN4_SURFACE_BLEND_ENABLED | format << GEN4_SURFACE_FORMAT_SHIFT); - if (is_dst) + if (is_dst) { + ss[0] |= GEN4_SURFACE_RC_READ_WRITE; domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER; - else + } else domains = I915_GEM_DOMAIN_SAMPLER << 16; ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0); @@ -421,7 +455,7 @@ gen4_bind_bo(struct sna *sna, ss[4] = 0; ss[5] = 0; - kgem_bo_set_binding(bo, format, offset); + kgem_bo_set_binding(bo, format | is_dst << 31, offset); DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", offset, bo->handle, ss[1], @@ -476,7 +510,7 @@ static void gen4_emit_primitive(struct sna *sna) static bool gen4_rectangle_begin(struct sna *sna, const struct sna_composite_op *op) { - int id = op->u.gen4.ve_id; + unsigned int id = 1 << op->u.gen4.ve_id; int ndwords; if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) @@ -484,13 +518,14 @@ static bool gen4_rectangle_begin(struct sna *sna, /* 7xpipelined pointers + 6xprimitive + 1xflush */ ndwords = op->need_magic_ca_pass? 20 : 6; - if ((sna->render.vb_id & (1 << id)) == 0) + if ((sna->render.vb_id & id) == 0) ndwords += 5; + ndwords += 2*FORCE_FLUSH; if (!kgem_check_batch(&sna->kgem, ndwords)) return false; - if ((sna->render.vb_id & (1 << id)) == 0) + if ((sna->render.vb_id & id) == 0) gen4_emit_vertex_buffer(sna, op); if (sna->render.vertex_offset == 0) gen4_emit_primitive(sna); @@ -508,7 +543,8 @@ static int gen4_get_rectangles__flush(struct sna *sna, return rem; } - if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 25 : 6)) + if (!kgem_check_batch(&sna->kgem, + 2*FORCE_FLUSH + (op->need_magic_ca_pass ? 25 : 6))) return 0; if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) return 0; @@ -534,6 +570,27 @@ inline static int gen4_get_rectangles(struct sna *sna, int rem; assert(want); +#if FORCE_FLUSH + rem = sna->render.vertex_offset; + if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) + rem = sna->kgem.nbatch - 5; + if (rem) { + rem = MAX_FLUSH_VERTICES - (sna->render.vertex_index - sna->render.vertex_start) / 3; + if (rem <= 0) { + if (sna->render.vertex_offset) { + gen4_vertex_flush(sna); + if (gen4_magic_ca_pass(sna, op)) + gen4_emit_pipelined_pointers(sna, op, op->op, + op->u.gen4.wm_kernel); + } + OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); + rem = MAX_FLUSH_VERTICES; + } + } else + rem = MAX_FLUSH_VERTICES; + if (want > rem) + want = rem; +#endif start: rem = vertex_space(sna); @@ -552,8 +609,8 @@ start: goto start; } - assert(op->floats_per_rect >= vertex_space(sna)); assert(rem <= vertex_space(sna)); + assert(op->floats_per_rect <= rem); if (want > 1 && want * op->floats_per_rect > rem) want = rem / op->floats_per_rect; @@ -673,7 +730,7 @@ gen4_get_batch(struct sna *sna, const struct sna_composite_op *op) { kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); - if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) { + if (!kgem_check_batch_with_surfaces(&sna->kgem, 150 + 50*FORCE_FLUSH, 4)) { DBG(("%s: flushing batch: %d < %d+%d\n", __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, 150, 4*8)); @@ -904,6 +961,8 @@ gen4_emit_state(struct sna *sna, { bool flush; + assert(op->dst.bo->exec); + flush = wm_binding_table & 1; if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { DBG(("%s: flushing dirty (%d, %d), forced? %d\n", __FUNCTION__, @@ -984,22 +1043,449 @@ gen4_render_composite_blt(struct sna *sna, op->prim_emit(sna, op, r); } +#if 0 +fastcall static void +gen4_render_composite_box(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box) +{ + struct sna_composite_rectangles r; + DBG((" %s: (%d, %d), (%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, box->x2, box->y2)); + gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces); + r.dst.x = box->x1; + r.dst.y = box->y1; + r.width = box->x2 - box->x1; + r.height = box->y2 - box->y1; + r.mask = r.src = r.dst; + op->prim_emit(sna, op, &r); +} +static void +gen4_render_composite_boxes__blt(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n", + __FUNCTION__, nbox, op->dst.x, op->dst.y, + op->src.offset[0], op->src.offset[1], + op->src.width, op->src.height, + op->mask.offset[0], op->mask.offset[1], + op->mask.width, op->mask.height)); + do { + int nbox_this_time; + nbox_this_time = gen4_get_rectangles(sna, op, nbox, + gen4_bind_surfaces); + nbox -= nbox_this_time; + do { + struct sna_composite_rectangles r; + DBG((" %s: (%d, %d), (%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, box->x2, box->y2)); + r.dst.x = box->x1; + r.dst.y = box->y1; + r.width = box->x2 - box->x1; + r.height = box->y2 - box->y1; + r.mask = r.src = r.dst; + op->prim_emit(sna, op, &r); + box++; + } while (--nbox_this_time); + } while (nbox); +} +static void +gen4_render_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + do { + int nbox_this_time; + float *v; + nbox_this_time = gen4_get_rectangles(sna, op, nbox, + gen4_bind_surfaces); + assert(nbox_this_time); + nbox -= nbox_this_time; + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + } while (nbox); +} +#if !FORCE_FLUSH +static void +gen4_render_composite_boxes__thread(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen4_get_rectangles(sna, op, nbox, + gen4_bind_surfaces); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} +#endif + +#ifndef MAX +#define MAX(a,b) ((a) > (b) ? (a) : (b)) +#endif + +static uint32_t gen4_bind_video_source(struct sna *sna, + struct kgem_bo *src_bo, + uint32_t src_offset, + int src_width, + int src_height, + int src_pitch, + uint32_t src_surf_format) +{ + struct gen4_surface_state *ss; + + sna->kgem.surface -= sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); + + ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss)); + ss->ss0.surface_type = GEN4_SURFACE_2D; + ss->ss0.surface_format = src_surf_format; + ss->ss0.color_blend = 1; + + ss->ss1.base_addr = + kgem_add_reloc(&sna->kgem, + sna->kgem.surface + 1, + src_bo, + I915_GEM_DOMAIN_SAMPLER << 16, + src_offset); + + ss->ss2.width = src_width - 1; + ss->ss2.height = src_height - 1; + ss->ss3.pitch = src_pitch - 1; + + return sna->kgem.surface * sizeof(uint32_t); +} + +static void gen4_video_bind_surfaces(struct sna *sna, + const struct sna_composite_op *op) +{ + bool dirty = kgem_bo_is_dirty(op->dst.bo); + struct sna_video_frame *frame = op->priv; + uint32_t src_surf_format; + uint32_t src_surf_base[6]; + int src_width[6]; + int src_height[6]; + int src_pitch[6]; + uint32_t *binding_table; + uint16_t offset; + int n_src, n; + + src_surf_base[0] = 0; + src_surf_base[1] = 0; + src_surf_base[2] = frame->VBufOffset; + src_surf_base[3] = frame->VBufOffset; + src_surf_base[4] = frame->UBufOffset; + src_surf_base[5] = frame->UBufOffset; + + if (is_planar_fourcc(frame->id)) { + src_surf_format = GEN4_SURFACEFORMAT_R8_UNORM; + src_width[1] = src_width[0] = frame->width; + src_height[1] = src_height[0] = frame->height; + src_pitch[1] = src_pitch[0] = frame->pitch[1]; + src_width[4] = src_width[5] = src_width[2] = src_width[3] = + frame->width / 2; + src_height[4] = src_height[5] = src_height[2] = src_height[3] = + frame->height / 2; + src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = + frame->pitch[0]; + n_src = 6; + } else { + if (frame->id == FOURCC_UYVY) + src_surf_format = GEN4_SURFACEFORMAT_YCRCB_SWAPY; + else + src_surf_format = GEN4_SURFACEFORMAT_YCRCB_NORMAL; + + src_width[0] = frame->width; + src_height[0] = frame->height; + src_pitch[0] = frame->pitch[0]; + n_src = 1; + } + + gen4_get_batch(sna, op); + + binding_table = gen4_composite_get_binding_table(sna, &offset); + binding_table[0] = + gen4_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen4_get_dest_format(op->dst.format), + true); + for (n = 0; n < n_src; n++) { + binding_table[1+n] = + gen4_bind_video_source(sna, + frame->bo, + src_surf_base[n], + src_width[n], + src_height[n], + src_pitch[n], + src_surf_format); + } + + gen4_emit_state(sna, op, offset | dirty); +} + +static bool +gen4_render_video(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame, + RegionPtr dstRegion, + PixmapPtr pixmap) +{ + struct sna_composite_op tmp; + int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; + int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; + int src_width = frame->src.x2 - frame->src.x1; + int src_height = frame->src.y2 - frame->src.y1; + float src_offset_x, src_offset_y; + float src_scale_x, src_scale_y; + int nbox, pix_xoff, pix_yoff; + struct sna_pixmap *priv; + BoxPtr box; + + DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, + src_width, src_height, dst_width, dst_height)); + + priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE); + if (priv == NULL) + return false; + + memset(&tmp, 0, sizeof(tmp)); + + tmp.op = PictOpSrc; + tmp.dst.pixmap = pixmap; + tmp.dst.width = pixmap->drawable.width; + tmp.dst.height = pixmap->drawable.height; + tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth); + tmp.dst.bo = priv->gpu_bo; + + if (src_width == dst_width && src_height == dst_height) + tmp.src.filter = SAMPLER_FILTER_NEAREST; + else + tmp.src.filter = SAMPLER_FILTER_BILINEAR; + tmp.src.repeat = SAMPLER_EXTEND_PAD; + tmp.src.bo = frame->bo; + tmp.mask.bo = NULL; + tmp.u.gen4.wm_kernel = + is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED; + tmp.u.gen4.ve_id = 2; + tmp.is_affine = true; + tmp.floats_per_vertex = 3; + tmp.floats_per_rect = 9; + tmp.priv = frame; + + if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { + kgem_submit(&sna->kgem); + assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)); + } + + gen4_video_bind_surfaces(sna, &tmp); + gen4_align_vertex(sna, &tmp); + + /* Set up the offset for translating from the given region (in screen + * coordinates) to the backing pixmap. + */ +#ifdef COMPOSITE + pix_xoff = -pixmap->screen_x + pixmap->drawable.x; + pix_yoff = -pixmap->screen_y + pixmap->drawable.y; +#else + pix_xoff = 0; + pix_yoff = 0; +#endif + + src_scale_x = (float)src_width / dst_width / frame->width; + src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; + + src_scale_y = (float)src_height / dst_height / frame->height; + src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; + + box = REGION_RECTS(dstRegion); + nbox = REGION_NUM_RECTS(dstRegion); + do { + int n; + + n = gen4_get_rectangles(sna, &tmp, nbox, + gen4_video_bind_surfaces); + assert(n); + nbox -= n; + + do { + BoxRec r; + + r.x1 = box->x1 + pix_xoff; + r.x2 = box->x2 + pix_xoff; + r.y1 = box->y1 + pix_yoff; + r.y2 = box->y2 + pix_yoff; + + OUT_VERTEX(r.x2, r.y2); + OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); + + OUT_VERTEX(r.x1, r.y2); + OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); + + OUT_VERTEX(r.x1, r.y1); + OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); + + if (!DAMAGE_IS_ALL(priv->gpu_damage)) { + sna_damage_add_box(&priv->gpu_damage, &r); + sna_damage_subtract_box(&priv->cpu_damage, &r); + } + box++; + } while (--n); + } while (nbox); + gen4_vertex_flush(sna); + + return true; +} + +static int +gen4_composite_picture(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int x, int y, + int w, int h, + int dst_x, int dst_y, + bool precise) +{ + PixmapPtr pixmap; + uint32_t color; + int16_t dx, dy; + + DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", + __FUNCTION__, x, y, w, h, dst_x, dst_y)); + + channel->is_solid = false; + channel->card_format = -1; + + if (sna_picture_is_solid(picture, &color)) + return gen4_channel_init_solid(sna, channel, color); + + if (picture->pDrawable == NULL) { + int ret; + + if (picture->pSourcePict->type == SourcePictTypeLinear) + return gen4_channel_init_linear(sna, picture, channel, + x, y, + w, h, + dst_x, dst_y); + + DBG(("%s -- fixup, gradient\n", __FUNCTION__)); + ret = -1; + if (!precise) + ret = sna_render_picture_approximate_gradient(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + if (ret == -1) + ret = sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + return ret; + } + + if (picture->alphaMap) { + DBG(("%s -- fallback, alphamap\n", __FUNCTION__)); + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + } + + if (!gen4_check_repeat(picture)) { + DBG(("%s: unknown repeat mode fixup\n", __FUNCTION__)); + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + } + + if (!gen4_check_filter(picture)) { + DBG(("%s: unhandled filter fixup\n", __FUNCTION__)); + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + } + + channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; + channel->filter = picture->filter; + + pixmap = get_drawable_pixmap(picture->pDrawable); + get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); + + x += dx + picture->pDrawable->x; + y += dy + picture->pDrawable->y; + + channel->is_affine = sna_transform_is_affine(picture->transform); + if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) { + DBG(("%s: integer translation (%d, %d), removing\n", + __FUNCTION__, dx, dy)); + x += dx; + y += dy; + channel->transform = NULL; + channel->filter = PictFilterNearest; + } else + channel->transform = picture->transform; + + channel->pict_format = picture->format; + channel->card_format = gen4_get_card_format(picture->format); + if (channel->card_format == -1) + return sna_render_picture_convert(sna, picture, channel, pixmap, + x, y, w, h, dst_x, dst_y, + false); + + if (too_large(pixmap->drawable.width, pixmap->drawable.height)) + return sna_render_picture_extract(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + return sna_render_pixmap_bo(sna, channel, pixmap, + x, y, w, h, dst_x, dst_y); +} + +static void gen4_composite_channel_convert(struct sna_composite_channel *channel) +{ + DBG(("%s: repeat %d -> %d, filter %d -> %d\n", + __FUNCTION__, + channel->repeat, gen4_repeat(channel->repeat), + channel->filter, gen4_repeat(channel->filter))); + channel->repeat = gen4_repeat(channel->repeat); + channel->filter = gen4_filter(channel->filter); + if (channel->card_format == (unsigned)-1) + channel->card_format = gen4_get_card_format(channel->pict_format); +} +#endif static void gen4_render_composite_done(struct sna *sna, @@ -1014,134 +1500,495 @@ gen4_render_composite_done(struct sna *sna, } +#if 0 +static bool +gen4_composite_set_target(struct sna *sna, + struct sna_composite_op *op, + PicturePtr dst, + int x, int y, int w, int h, + bool partial) +{ + BoxRec box; + op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); + op->dst.width = op->dst.pixmap->drawable.width; + op->dst.height = op->dst.pixmap->drawable.height; + op->dst.format = dst->format; + if (w && h) { + box.x1 = x; + box.y1 = y; + box.x2 = x + w; + box.y2 = y + h; + } else + sna_render_picture_extents(dst, &box); + op->dst.bo = sna_drawable_use_bo (dst->pDrawable, + PREFER_GPU | FORCE_GPU | RENDER_GPU, + &box, &op->damage); + if (op->dst.bo == NULL) + return false; + get_drawable_deltas(dst->pDrawable, op->dst.pixmap, + &op->dst.x, &op->dst.y); + DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", + __FUNCTION__, + op->dst.pixmap, (int)op->dst.format, + op->dst.width, op->dst.height, + op->dst.bo->pitch, + op->dst.x, op->dst.y, + op->damage ? *op->damage : (void *)-1)); + assert(op->dst.bo->proxy == NULL); + if (too_large(op->dst.width, op->dst.height) && + !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) + return false; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + return true; +} static bool -gen4_blit_tex(struct sna *sna, - uint8_t op, bool scale, - PixmapPtr src, struct kgem_bo *src_bo, - PixmapPtr mask,struct kgem_bo *mask_bo, - PixmapPtr dst, struct kgem_bo *dst_bo, - int32_t src_x, int32_t src_y, - int32_t msk_x, int32_t msk_y, - int32_t dst_x, int32_t dst_y, - int32_t width, int32_t height, - struct sna_composite_op *tmp) +try_blt(struct sna *sna, + PicturePtr dst, PicturePtr src, + int width, int height) { + if (sna->kgem.mode != KGEM_RENDER) { + DBG(("%s: already performing BLT\n", __FUNCTION__)); + return true; + } - DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, - width, height, sna->kgem.ring)); + if (too_large(width, height)) { + DBG(("%s: operation too large for 3D pipe (%d, %d)\n", + __FUNCTION__, width, height)); + return true; + } - tmp->op = PictOpSrc; + if (too_large(dst->pDrawable->width, dst->pDrawable->height)) + return true; - tmp->dst.pixmap = dst; - tmp->dst.bo = dst_bo; - tmp->dst.width = dst->drawable.width; - tmp->dst.height = dst->drawable.height; - tmp->dst.format = PICT_x8r8g8b8; + /* The blitter is much faster for solids */ + if (sna_picture_is_solid(src, NULL)) + return true; + /* is the source picture only in cpu memory e.g. a shm pixmap? */ + return picture_is_cpu(sna, src); +} - tmp->src.repeat = RepeatNone; - tmp->src.filter = PictFilterNearest; - tmp->src.is_affine = true; +static bool +check_gradient(PicturePtr picture, bool precise) +{ + switch (picture->pSourcePict->type) { + case SourcePictTypeSolidFill: + case SourcePictTypeLinear: + return false; + default: + return precise; + } +} - tmp->src.bo = src_bo; - tmp->src.pict_format = PICT_x8r8g8b8; - tmp->src.card_format = gen4_get_card_format(tmp->src.pict_format); - tmp->src.width = src->drawable.width; - tmp->src.height = src->drawable.height; +static bool +has_alphamap(PicturePtr p) +{ + return p->alphaMap != NULL; +} + +static bool +need_upload(struct sna *sna, PicturePtr p) +{ + return p->pDrawable && untransformed(p) && + !is_gpu(sna, p->pDrawable, PREFER_GPU_RENDER); +} + +static bool +source_is_busy(PixmapPtr pixmap) +{ + struct sna_pixmap *priv = sna_pixmap(pixmap); + if (priv == NULL) + return false; + + if (priv->clear) + return false; + + if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo)) + return true; + + if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) + return true; + + return priv->gpu_damage && !priv->cpu_damage; +} + +static bool +source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap, bool precise) +{ + if (sna_picture_is_solid(p, NULL)) + return false; + + if (p->pSourcePict) + return check_gradient(p, precise); + + if (!gen4_check_repeat(p) || !gen4_check_format(p->format)) + return true; + + /* soft errors: perfer to upload/compute rather than readback */ + if (pixmap && source_is_busy(pixmap)) + return false; + + return has_alphamap(p) || !gen4_check_filter(p) || need_upload(sna, p); +} + +static bool +gen4_composite_fallback(struct sna *sna, + PicturePtr src, + PicturePtr mask, + PicturePtr dst) +{ + PixmapPtr src_pixmap; + PixmapPtr mask_pixmap; + PixmapPtr dst_pixmap; + bool src_fallback, mask_fallback; + + if (!gen4_check_dst_format(dst->format)) { + DBG(("%s: unknown destination format: %d\n", + __FUNCTION__, dst->format)); + return true; + } + + dst_pixmap = get_drawable_pixmap(dst->pDrawable); + + src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; + src_fallback = source_fallback(sna, src, src_pixmap, + dst->polyMode == PolyModePrecise); + + if (mask) { + mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; + mask_fallback = source_fallback(sna, mask, mask_pixmap, + dst->polyMode == PolyModePrecise); + } else { + mask_pixmap = NULL; + mask_fallback = false; + } + + /* If we are using the destination as a source and need to + * readback in order to upload the source, do it all + * on the cpu. + */ + if (src_pixmap == dst_pixmap && src_fallback) { + DBG(("%s: src is dst and will fallback\n",__FUNCTION__)); + return true; + } + if (mask_pixmap == dst_pixmap && mask_fallback) { + DBG(("%s: mask is dst and will fallback\n",__FUNCTION__)); + return true; + } + + /* If anything is on the GPU, push everything out to the GPU */ + if (dst_use_gpu(dst_pixmap)) { + DBG(("%s: dst is already on the GPU, try to use GPU\n", + __FUNCTION__)); + return false; + } + + if (src_pixmap && !src_fallback) { + DBG(("%s: src is already on the GPU, try to use GPU\n", + __FUNCTION__)); + return false; + } + if (mask_pixmap && !mask_fallback) { + DBG(("%s: mask is already on the GPU, try to use GPU\n", + __FUNCTION__)); + return false; + } + + /* However if the dst is not on the GPU and we need to + * render one of the sources using the CPU, we may + * as well do the entire operation in place onthe CPU. + */ + if (src_fallback) { + DBG(("%s: dst is on the CPU and src will fallback\n", + __FUNCTION__)); + return true; + } + + if (mask_fallback) { + DBG(("%s: dst is on the CPU and mask will fallback\n", + __FUNCTION__)); + return true; + } + + if (too_large(dst_pixmap->drawable.width, + dst_pixmap->drawable.height) && + dst_is_cpu(dst_pixmap)) { + DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__)); + return true; + } + + DBG(("%s: dst is not on the GPU and the operation should not fallback\n", + __FUNCTION__)); + return dst_use_cpu(dst_pixmap); +} + +static int +reuse_source(struct sna *sna, + PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, + PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) +{ + uint32_t color; + + if (src_x != msk_x || src_y != msk_y) + return false; + + if (src == mask) { + DBG(("%s: mask is source\n", __FUNCTION__)); + *mc = *sc; + mc->bo = kgem_bo_reference(mc->bo); + return true; + } + + if (sna_picture_is_solid(mask, &color)) + return gen4_channel_init_solid(sna, mc, color); + + if (sc->is_solid) + return false; + + if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable) + return false; + + DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); + + if (!sna_transform_equal(src->transform, mask->transform)) + return false; + + if (!sna_picture_alphamap_equal(src, mask)) + return false; + + if (!gen4_check_repeat(mask)) + return false; + + if (!gen4_check_filter(mask)) + return false; + + if (!gen4_check_format(mask->format)) + return false; + + DBG(("%s: reusing source channel for mask with a twist\n", + __FUNCTION__)); + + *mc = *sc; + mc->repeat = gen4_repeat(mask->repeat ? mask->repeatType : RepeatNone); + mc->filter = gen4_filter(mask->filter); + mc->pict_format = mask->format; + mc->card_format = gen4_get_card_format(mask->format); + mc->bo = kgem_bo_reference(mc->bo); + return true; +} + +static bool +gen4_render_composite(struct sna *sna, + uint8_t op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t msk_x, int16_t msk_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_op *tmp) +{ + DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, + width, height, sna->kgem.mode)); + + if (op >= ARRAY_SIZE(gen4_blend_op)) + return false; + + if (mask == NULL && + try_blt(sna, dst, src, width, height) && + sna_blt_composite(sna, op, + src, dst, + src_x, src_y, + dst_x, dst_y, + width, height, + tmp, false)) + return true; + + if (gen4_composite_fallback(sna, src, mask, dst)) + return false; + + if (need_tiling(sna, width, height)) + return sna_tiling_composite(op, src, mask, dst, + src_x, src_y, + msk_x, msk_y, + dst_x, dst_y, + width, height, + tmp); + + if (!gen4_composite_set_target(sna, tmp, dst, + dst_x, dst_y, width, height, + op > PictOpSrc || dst->pCompositeClip->data)) { + DBG(("%s: failed to set composite target\n", __FUNCTION__)); + return false; + } + + tmp->op = op; + switch (gen4_composite_picture(sna, src, &tmp->src, + src_x, src_y, + width, height, + dst_x, dst_y, + dst->polyMode == PolyModePrecise)) { + case -1: + DBG(("%s: failed to prepare source\n", __FUNCTION__)); + goto cleanup_dst; + case 0: + if (!gen4_channel_init_solid(sna, &tmp->src, 0)) + goto cleanup_dst; + /* fall through to fixup */ + case 1: + if (mask == NULL && + sna_blt_composite__convert(sna, + dst_x, dst_y, width, height, + tmp)) + return true; + + gen4_composite_channel_convert(&tmp->src); + break; + } tmp->is_affine = tmp->src.is_affine; tmp->has_component_alpha = false; tmp->need_magic_ca_pass = false; - tmp->mask.repeat = SAMPLER_EXTEND_NONE; - tmp->mask.filter = SAMPLER_FILTER_NEAREST; - tmp->mask.is_affine = true; + if (mask) { + if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { + tmp->has_component_alpha = true; - tmp->mask.bo = mask_bo; - tmp->mask.pict_format = PIXMAN_a8; - tmp->mask.card_format = gen4_get_card_format(tmp->mask.pict_format); - tmp->mask.width = mask->drawable.width; - tmp->mask.height = mask->drawable.height; + /* Check if it's component alpha that relies on a source alpha and on + * the source value. We can only get one of those into the single + * source value that we get to blend with. + */ + if (gen4_blend_op[op].src_alpha && + (gen4_blend_op[op].src_blend != GEN4_BLENDFACTOR_ZERO)) { + if (op != PictOpOver) { + DBG(("%s -- fallback: unhandled component alpha blend\n", + __FUNCTION__)); - if( scale ) - { - tmp->src.scale[0] = 1.f/width; - tmp->src.scale[1] = 1.f/height; - } - else - { - tmp->src.scale[0] = 1.f/src->drawable.width; - tmp->src.scale[1] = 1.f/src->drawable.height; - } -// tmp->src.offset[0] = -dst_x; -// tmp->src.offset[1] = -dst_y; + goto cleanup_src; + } + tmp->need_magic_ca_pass = true; + tmp->op = PictOpOutReverse; + } + } - tmp->mask.scale[0] = 1.f/mask->drawable.width; - tmp->mask.scale[1] = 1.f/mask->drawable.height; -// tmp->mask.offset[0] = -dst_x; -// tmp->mask.offset[1] = -dst_y; + if (!reuse_source(sna, + src, &tmp->src, src_x, src_y, + mask, &tmp->mask, msk_x, msk_y)) { + switch (gen4_composite_picture(sna, mask, &tmp->mask, + msk_x, msk_y, + width, height, + dst_x, dst_y, + dst->polyMode == PolyModePrecise)) { + case -1: + DBG(("%s: failed to prepare mask\n", __FUNCTION__)); + goto cleanup_src; + case 0: + if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) + goto cleanup_src; + /* fall through to fixup */ + case 1: + gen4_composite_channel_convert(&tmp->mask); + break; + } + } + + tmp->is_affine &= tmp->mask.is_affine; + } tmp->u.gen4.wm_kernel = gen4_choose_composite_kernel(tmp->op, tmp->mask.bo != NULL, tmp->has_component_alpha, tmp->is_affine); - tmp->u.gen4.ve_id = gen4_choose_composite_emitter(tmp); + tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp); tmp->blt = gen4_render_composite_blt; + tmp->box = gen4_render_composite_box; + tmp->boxes = gen4_render_composite_boxes__blt; + if (tmp->emit_boxes) { + tmp->boxes = gen4_render_composite_boxes; +#if !FORCE_FLUSH + tmp->thread_boxes = gen4_render_composite_boxes__thread; +#endif + } tmp->done = gen4_render_composite_done; if (!kgem_check_bo(&sna->kgem, tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) { kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, + tmp->dst.bo, tmp->src.bo, tmp->mask.bo, + NULL)) + goto cleanup_mask; } gen4_bind_surfaces(sna, tmp); gen4_align_vertex(sna, tmp); return true; + +cleanup_mask: + if (tmp->mask.bo) + kgem_bo_destroy(&sna->kgem, tmp->mask.bo); +cleanup_src: + if (tmp->src.bo) + kgem_bo_destroy(&sna->kgem, tmp->src.bo); +cleanup_dst: + if (tmp->redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp->dst.bo); + return false; } + +#endif + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + static void gen4_render_flush(struct sna *sna) @@ -1230,7 +2077,7 @@ static uint32_t gen4_create_vs_unit_state(struct sna_static_stream *stream) } static uint32_t gen4_create_sf_state(struct sna_static_stream *stream, - int gen, uint32_t kernel) + uint32_t kernel) { struct gen4_sf_unit_state *sf; @@ -1384,7 +2231,7 @@ static bool gen4_render_setup(struct sna *sna) } state->vs = gen4_create_vs_unit_state(&general); - state->sf = gen4_create_sf_state(&general, sna->kgem.gen, sf); + state->sf = gen4_create_sf_state(&general, sf); wm_state = sna_static_stream_map(&general, sizeof(*wm_state) * KERNEL_COUNT * @@ -1421,19 +2268,51 @@ static bool gen4_render_setup(struct sna *sna) return state->general_bo != NULL; } - -bool gen4_render_init(struct sna *sna) +const char *gen4_render_init(struct sna *sna, const char *backend) { if (!gen4_render_setup(sna)) - return false; + return backend; sna->kgem.retire = gen4_render_retire; sna->kgem.expire = gen4_render_expire; +#if 0 +#if !NO_COMPOSITE + sna->render.composite = gen4_render_composite; sna->render.prefer_gpu |= PREFER_GPU_RENDER; +#endif +#if !NO_COMPOSITE_SPANS + sna->render.check_composite_spans = gen4_check_composite_spans; + sna->render.composite_spans = gen4_render_composite_spans; + if (0) + sna->render.prefer_gpu |= PREFER_GPU_SPANS; +#endif + +#if !NO_VIDEO + sna->render.video = gen4_render_video; +#endif + +#if !NO_COPY_BOXES + sna->render.copy_boxes = gen4_render_copy_boxes; +#endif +#if !NO_COPY + sna->render.copy = gen4_render_copy; +#endif + +#if !NO_FILL_BOXES + sna->render.fill_boxes = gen4_render_fill_boxes; +#endif +#if !NO_FILL + sna->render.fill = gen4_render_fill; +#endif +#if !NO_FILL_ONE + sna->render.fill_one = gen4_render_fill_one; +#endif + +#endif sna->render.blit_tex = gen4_blit_tex; - + sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT; sna->render.flush = gen4_render_flush; sna->render.reset = gen4_render_reset; @@ -1441,8 +2320,95 @@ bool gen4_render_init(struct sna *sna) sna->render.max_3d_size = GEN4_MAX_3D_SIZE; sna->render.max_3d_pitch = 1 << 18; - sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT; - + return sna->kgem.gen >= 045 ? "Eaglelake (gen4.5)" : "Broadwater (gen4)"; +} + +static bool +gen4_blit_tex(struct sna *sna, + uint8_t op, bool scale, + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr mask,struct kgem_bo *mask_bo, + PixmapPtr dst, struct kgem_bo *dst_bo, + int32_t src_x, int32_t src_y, + int32_t msk_x, int32_t msk_y, + int32_t dst_x, int32_t dst_y, + int32_t width, int32_t height, + struct sna_composite_op *tmp) +{ + + DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, + width, height, sna->kgem.ring)); + + tmp->op = PictOpSrc; + + tmp->dst.pixmap = dst; + tmp->dst.bo = dst_bo; + tmp->dst.width = dst->drawable.width; + tmp->dst.height = dst->drawable.height; + tmp->dst.format = PICT_x8r8g8b8; + + + tmp->src.repeat = RepeatNone; + tmp->src.filter = PictFilterNearest; + tmp->src.is_affine = true; + + tmp->src.bo = src_bo; + tmp->src.pict_format = PICT_x8r8g8b8; + tmp->src.card_format = gen4_get_card_format(tmp->src.pict_format); + tmp->src.width = src->drawable.width; + tmp->src.height = src->drawable.height; + + tmp->is_affine = tmp->src.is_affine; + tmp->has_component_alpha = false; + tmp->need_magic_ca_pass = false; + + tmp->mask.repeat = SAMPLER_EXTEND_NONE; + tmp->mask.filter = SAMPLER_FILTER_NEAREST; + tmp->mask.is_affine = true; + + tmp->mask.bo = mask_bo; + tmp->mask.pict_format = PIXMAN_a8; + tmp->mask.card_format = gen4_get_card_format(tmp->mask.pict_format); + tmp->mask.width = mask->drawable.width; + tmp->mask.height = mask->drawable.height; + + if( scale ) + { + tmp->src.scale[0] = 1.f/width; + tmp->src.scale[1] = 1.f/height; + } + else + { + tmp->src.scale[0] = 1.f/src->drawable.width; + tmp->src.scale[1] = 1.f/src->drawable.height; + } +// tmp->src.offset[0] = -dst_x; +// tmp->src.offset[1] = -dst_y; + + + tmp->mask.scale[0] = 1.f/mask->drawable.width; + tmp->mask.scale[1] = 1.f/mask->drawable.height; +// tmp->mask.offset[0] = -dst_x; +// tmp->mask.offset[1] = -dst_y; + + tmp->u.gen4.wm_kernel = + gen4_choose_composite_kernel(tmp->op, + tmp->mask.bo != NULL, + tmp->has_component_alpha, + tmp->is_affine); + tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp); + + tmp->blt = gen4_render_composite_blt; + tmp->done = gen4_render_composite_done; + + if (!kgem_check_bo(&sna->kgem, + tmp->dst.bo, tmp->src.bo, tmp->mask.bo, + NULL)) { + kgem_submit(&sna->kgem); + } + + gen4_bind_surfaces(sna, tmp); + gen4_align_vertex(sna, tmp); return true; } diff --git a/drivers/video/Intel-2D/gen4_vertex.c b/drivers/video/Intel-2D/gen4_vertex.c index 05c839591e..91658a554a 100644 --- a/drivers/video/Intel-2D/gen4_vertex.c +++ b/drivers/video/Intel-2D/gen4_vertex.c @@ -34,6 +34,10 @@ #include "sna_render_inline.h" #include "gen4_vertex.h" +#ifndef sse2 +#define sse2 +#endif + void gen4_vertex_flush(struct sna *sna) { DBG(("%s[%x] = %d\n", __FUNCTION__, @@ -223,7 +227,601 @@ void gen4_vertex_close(struct sna *sna) kgem_bo_destroy(&sna->kgem, free_bo); } -fastcall static void +/* specialised vertex emission routines */ + +#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) /* XXX assert(!too_large(x, y)); */ +#define OUT_VERTEX_F(v) vertex_emit(sna, v) + +force_inline static float +compute_linear(const struct sna_composite_channel *channel, + int16_t x, int16_t y) +{ + return ((x+channel->offset[0]) * channel->u.linear.dx + + (y+channel->offset[1]) * channel->u.linear.dy + + channel->u.linear.offset); +} + +sse2 inline static void +emit_texcoord(struct sna *sna, + const struct sna_composite_channel *channel, + int16_t x, int16_t y) +{ + if (channel->is_solid) { + OUT_VERTEX_F(x); + return; + } + + x += channel->offset[0]; + y += channel->offset[1]; + + if (channel->is_affine) { + float s, t; + + sna_get_transformed_coordinates(x, y, + channel->transform, + &s, &t); + OUT_VERTEX_F(s * channel->scale[0]); + OUT_VERTEX_F(t * channel->scale[1]); + } else { + float s, t, w; + + sna_get_transformed_coordinates_3d(x, y, + channel->transform, + &s, &t, &w); + OUT_VERTEX_F(s * channel->scale[0]); + OUT_VERTEX_F(t * channel->scale[1]); + OUT_VERTEX_F(w); + } +} + +sse2 force_inline static void +emit_vertex(struct sna *sna, + const struct sna_composite_op *op, + int16_t srcX, int16_t srcY, + int16_t mskX, int16_t mskY, + int16_t dstX, int16_t dstY) +{ + OUT_VERTEX(dstX, dstY); + emit_texcoord(sna, &op->src, srcX, srcY); +} + +sse2 fastcall static void +emit_primitive(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + emit_vertex(sna, op, + r->src.x + r->width, r->src.y + r->height, + r->mask.x + r->width, r->mask.y + r->height, + r->dst.x + r->width, r->dst.y + r->height); + emit_vertex(sna, op, + r->src.x, r->src.y + r->height, + r->mask.x, r->mask.y + r->height, + r->dst.x, r->dst.y + r->height); + emit_vertex(sna, op, + r->src.x, r->src.y, + r->mask.x, r->mask.y, + r->dst.x, r->dst.y); +} + +sse2 force_inline static void +emit_vertex_mask(struct sna *sna, + const struct sna_composite_op *op, + int16_t srcX, int16_t srcY, + int16_t mskX, int16_t mskY, + int16_t dstX, int16_t dstY) +{ + OUT_VERTEX(dstX, dstY); + emit_texcoord(sna, &op->src, srcX, srcY); + emit_texcoord(sna, &op->mask, mskX, mskY); +} + +sse2 fastcall static void +emit_primitive_mask(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + emit_vertex_mask(sna, op, + r->src.x + r->width, r->src.y + r->height, + r->mask.x + r->width, r->mask.y + r->height, + r->dst.x + r->width, r->dst.y + r->height); + emit_vertex_mask(sna, op, + r->src.x, r->src.y + r->height, + r->mask.x, r->mask.y + r->height, + r->dst.x, r->dst.y + r->height); + emit_vertex_mask(sna, op, + r->src.x, r->src.y, + r->mask.x, r->mask.y, + r->dst.x, r->dst.y); +} + +sse2 fastcall static void +emit_primitive_solid(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + assert(op->floats_per_rect == 6); + assert((sna->render.vertex_used % 2) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + dst.p.x = r->dst.x; + v[2] = dst.f; + dst.p.y = r->dst.y; + v[4] = dst.f; + + v[5] = v[3] = v[1] = .5; +} + +sse2 fastcall static void +emit_boxes_solid(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + dst.p.x = box->x1; + v[2] = dst.f; + dst.p.y = box->y1; + v[4] = dst.f; + + v[5] = v[3] = v[1] = .5; + box++; + v += 6; + } while (--nbox); +} + +sse2 fastcall static void +emit_primitive_linear(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + assert(op->floats_per_rect == 6); + assert((sna->render.vertex_used % 2) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + dst.p.x = r->dst.x; + v[2] = dst.f; + dst.p.y = r->dst.y; + v[4] = dst.f; + + v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); + v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height); + v[5] = compute_linear(&op->src, r->src.x, r->src.y); +} + +sse2 fastcall static void +emit_boxes_linear(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + union { + struct sna_coordinate p; + float f; + } dst; + + do { + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + dst.p.x = box->x1; + v[2] = dst.f; + dst.p.y = box->y1; + v[4] = dst.f; + + v[1] = compute_linear(&op->src, box->x2, box->y2); + v[3] = compute_linear(&op->src, box->x1, box->y2); + v[5] = compute_linear(&op->src, box->x1, box->y1); + + v += 6; + box++; + } while (--nbox); +} + +sse2 fastcall static void +emit_primitive_identity_source(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float *v; + + assert(op->floats_per_rect == 9); + assert((sna->render.vertex_used % 3) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 9; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + dst.p.x = r->dst.x; + v[3] = dst.f; + dst.p.y = r->dst.y; + v[6] = dst.f; + + v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; + v[1] = v[4] + r->width * op->src.scale[0]; + + v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; + v[5] = v[2] = v[8] + r->height * op->src.scale[1]; +} + +sse2 fastcall static void +emit_boxes_identity_source(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + dst.p.x = box->x1; + v[3] = dst.f; + dst.p.y = box->y1; + v[6] = dst.f; + + v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; + v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; + + v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; + v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; + + v += 9; + box++; + } while (--nbox); +} + +sse2 fastcall static void +emit_primitive_simple_source(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + float xx = op->src.transform->matrix[0][0]; + float x0 = op->src.transform->matrix[0][2]; + float yy = op->src.transform->matrix[1][1]; + float y0 = op->src.transform->matrix[1][2]; + float sx = op->src.scale[0]; + float sy = op->src.scale[1]; + int16_t tx = op->src.offset[0]; + int16_t ty = op->src.offset[1]; + + assert(op->floats_per_rect == 9); + assert((sna->render.vertex_used % 3) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 3*3; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; + v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; + + dst.p.x = r->dst.x; + v[3] = dst.f; + v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx; + + dst.p.y = r->dst.y; + v[6] = dst.f; + v[8] = ((r->src.y + ty) * yy + y0) * sy; +} + +sse2 fastcall static void +emit_boxes_simple_source(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + float xx = op->src.transform->matrix[0][0]; + float x0 = op->src.transform->matrix[0][2]; + float yy = op->src.transform->matrix[1][1]; + float y0 = op->src.transform->matrix[1][2]; + float sx = op->src.scale[0]; + float sy = op->src.scale[1]; + int16_t tx = op->src.offset[0]; + int16_t ty = op->src.offset[1]; + + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + v[1] = ((box->x2 + tx) * xx + x0) * sx; + v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy; + + dst.p.x = box->x1; + v[3] = dst.f; + v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx; + + dst.p.y = box->y1; + v[6] = dst.f; + v[8] = ((box->y1 + ty) * yy + y0) * sy; + + v += 9; + box++; + } while (--nbox); +} + +sse2 fastcall static void +emit_primitive_affine_source(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float *v; + + assert(op->floats_per_rect == 9); + assert((sna->render.vertex_used % 3) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 9; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + _sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width, + op->src.offset[1] + r->src.y + r->height, + op->src.transform, op->src.scale, + &v[1], &v[2]); + + dst.p.x = r->dst.x; + v[3] = dst.f; + _sna_get_transformed_scaled(op->src.offset[0] + r->src.x, + op->src.offset[1] + r->src.y + r->height, + op->src.transform, op->src.scale, + &v[4], &v[5]); + + dst.p.y = r->dst.y; + v[6] = dst.f; + _sna_get_transformed_scaled(op->src.offset[0] + r->src.x, + op->src.offset[1] + r->src.y, + op->src.transform, op->src.scale, + &v[7], &v[8]); +} + +sse2 fastcall static void +emit_boxes_affine_source(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + _sna_get_transformed_scaled(op->src.offset[0] + box->x2, + op->src.offset[1] + box->y2, + op->src.transform, op->src.scale, + &v[1], &v[2]); + + dst.p.x = box->x1; + v[3] = dst.f; + _sna_get_transformed_scaled(op->src.offset[0] + box->x1, + op->src.offset[1] + box->y2, + op->src.transform, op->src.scale, + &v[4], &v[5]); + + dst.p.y = box->y1; + v[6] = dst.f; + _sna_get_transformed_scaled(op->src.offset[0] + box->x1, + op->src.offset[1] + box->y1, + op->src.transform, op->src.scale, + &v[7], &v[8]); + box++; + v += 9; + } while (--nbox); +} + +sse2 fastcall static void +emit_primitive_identity_mask(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float msk_x, msk_y; + float w, h; + float *v; + + msk_x = r->mask.x + op->mask.offset[0]; + msk_y = r->mask.y + op->mask.offset[1]; + w = r->width; + h = r->height; + + DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", + __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); + + assert(op->floats_per_rect == 12); + assert((sna->render.vertex_used % 4) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[2] = (msk_x + w) * op->mask.scale[0]; + v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; + + dst.p.x = r->dst.x; + v[4] = dst.f; + v[10] = v[6] = msk_x * op->mask.scale[0]; + + dst.p.y = r->dst.y; + v[8] = dst.f; + v[11] = msk_y * op->mask.scale[1]; + + v[9] = v[5] = v[1] = .5; +} + +sse2 fastcall static void +emit_boxes_identity_mask(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + float msk_x = op->mask.offset[0]; + float msk_y = op->mask.offset[1]; + + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + v[2] = (msk_x + box->x2) * op->mask.scale[0]; + v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; + + dst.p.x = box->x1; + v[4] = dst.f; + v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; + + dst.p.y = box->y1; + v[8] = dst.f; + v[11] = (msk_y + box->y1) * op->mask.scale[1]; + + v[9] = v[5] = v[1] = .5; + v += 12; + box++; + } while (--nbox); +} + +sse2 fastcall static void +emit_primitive_linear_identity_mask(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float msk_x, msk_y; + float w, h; + float *v; + + msk_x = r->mask.x + op->mask.offset[0]; + msk_y = r->mask.y + op->mask.offset[1]; + w = r->width; + h = r->height; + + DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", + __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); + + assert(op->floats_per_rect == 12); + assert((sna->render.vertex_used % 4) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[2] = (msk_x + w) * op->mask.scale[0]; + v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; + + dst.p.x = r->dst.x; + v[4] = dst.f; + v[10] = v[6] = msk_x * op->mask.scale[0]; + + dst.p.y = r->dst.y; + v[8] = dst.f; + v[11] = msk_y * op->mask.scale[1]; + + v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); + v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height); + v[9] = compute_linear(&op->src, r->src.x, r->src.y); +} + +sse2 fastcall static void +emit_boxes_linear_identity_mask(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + float msk_x = op->mask.offset[0]; + float msk_y = op->mask.offset[1]; + + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + v[2] = (msk_x + box->x2) * op->mask.scale[0]; + v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; + + dst.p.x = box->x1; + v[4] = dst.f; + v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; + + dst.p.y = box->y1; + v[8] = dst.f; + v[11] = (msk_y + box->y1) * op->mask.scale[1]; + + v[1] = compute_linear(&op->src, box->x2, box->y2); + v[5] = compute_linear(&op->src, box->x1, box->y2); + v[9] = compute_linear(&op->src, box->x1, box->y1); + + v += 12; + box++; + } while (--nbox); +} + +sse2 fastcall static void emit_primitive_identity_source_mask(struct sna *sna, const struct sna_composite_op *op, const struct sna_composite_rectangles *r) @@ -272,7 +870,830 @@ emit_primitive_identity_source_mask(struct sna *sna, v[14] = msk_y * op->mask.scale[1]; } -unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp) +sse2 fastcall static void +emit_primitive_simple_source_identity(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + float xx = op->src.transform->matrix[0][0]; + float x0 = op->src.transform->matrix[0][2]; + float yy = op->src.transform->matrix[1][1]; + float y0 = op->src.transform->matrix[1][2]; + float sx = op->src.scale[0]; + float sy = op->src.scale[1]; + int16_t tx = op->src.offset[0]; + int16_t ty = op->src.offset[1]; + float msk_x = r->mask.x + op->mask.offset[0]; + float msk_y = r->mask.y + op->mask.offset[1]; + float w = r->width, h = r->height; + + assert(op->floats_per_rect == 15); + assert((sna->render.vertex_used % 5) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 3*5; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; + v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; + v[3] = (msk_x + w) * op->mask.scale[0]; + v[4] = (msk_y + h) * op->mask.scale[1]; + + dst.p.x = r->dst.x; + v[5] = dst.f; + v[6] = ((r->src.x + tx) * xx + x0) * sx; + v[7] = v[2]; + v[8] = msk_x * op->mask.scale[0]; + v[9] = v[4]; + + dst.p.y = r->dst.y; + v[10] = dst.f; + v[11] = v[6]; + v[12] = ((r->src.y + ty) * yy + y0) * sy; + v[13] = v[8]; + v[14] = msk_y * op->mask.scale[1]; +} + +sse2 fastcall static void +emit_primitive_affine_source_identity(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + float msk_x = r->mask.x + op->mask.offset[0]; + float msk_y = r->mask.y + op->mask.offset[1]; + float w = r->width, h = r->height; + + assert(op->floats_per_rect == 15); + assert((sna->render.vertex_used % 5) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 3*5; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + _sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width, + op->src.offset[1] + r->src.y + r->height, + op->src.transform, op->src.scale, + &v[1], &v[2]); + v[3] = (msk_x + w) * op->mask.scale[0]; + v[4] = (msk_y + h) * op->mask.scale[1]; + + dst.p.x = r->dst.x; + v[5] = dst.f; + _sna_get_transformed_scaled(op->src.offset[0] + r->src.x, + op->src.offset[1] + r->src.y + r->height, + op->src.transform, op->src.scale, + &v[6], &v[7]); + v[8] = msk_x * op->mask.scale[0]; + v[9] = v[4]; + + dst.p.y = r->dst.y; + v[10] = dst.f; + _sna_get_transformed_scaled(op->src.offset[0] + r->src.x, + op->src.offset[1] + r->src.y, + op->src.transform, op->src.scale, + &v[11], &v[12]); + v[13] = v[8]; + v[14] = msk_y * op->mask.scale[1]; +} + +/* SSE4_2 */ +#if defined(sse4_2) + +sse4_2 fastcall static void +emit_primitive_linear__sse4_2(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + assert(op->floats_per_rect == 6); + assert((sna->render.vertex_used % 2) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + dst.p.x = r->dst.x; + v[2] = dst.f; + dst.p.y = r->dst.y; + v[4] = dst.f; + + v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); + v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height); + v[5] = compute_linear(&op->src, r->src.x, r->src.y); +} + +sse4_2 fastcall static void +emit_boxes_linear__sse4_2(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + union { + struct sna_coordinate p; + float f; + } dst; + + do { + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + dst.p.x = box->x1; + v[2] = dst.f; + dst.p.y = box->y1; + v[4] = dst.f; + + v[1] = compute_linear(&op->src, box->x2, box->y2); + v[3] = compute_linear(&op->src, box->x1, box->y2); + v[5] = compute_linear(&op->src, box->x1, box->y1); + + v += 6; + box++; + } while (--nbox); +} + +sse4_2 fastcall static void +emit_primitive_identity_source__sse4_2(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float *v; + + assert(op->floats_per_rect == 9); + assert((sna->render.vertex_used % 3) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 9; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + dst.p.x = r->dst.x; + v[3] = dst.f; + dst.p.y = r->dst.y; + v[6] = dst.f; + + v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; + v[1] = v[4] + r->width * op->src.scale[0]; + + v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; + v[5] = v[2] = v[8] + r->height * op->src.scale[1]; +} + +sse4_2 fastcall static void +emit_boxes_identity_source__sse4_2(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + dst.p.x = box->x1; + v[3] = dst.f; + dst.p.y = box->y1; + v[6] = dst.f; + + v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; + v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; + + v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; + v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; + + v += 9; + box++; + } while (--nbox); +} + +sse4_2 fastcall static void +emit_primitive_simple_source__sse4_2(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + float xx = op->src.transform->matrix[0][0]; + float x0 = op->src.transform->matrix[0][2]; + float yy = op->src.transform->matrix[1][1]; + float y0 = op->src.transform->matrix[1][2]; + float sx = op->src.scale[0]; + float sy = op->src.scale[1]; + int16_t tx = op->src.offset[0]; + int16_t ty = op->src.offset[1]; + + assert(op->floats_per_rect == 9); + assert((sna->render.vertex_used % 3) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 3*3; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; + v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; + + dst.p.x = r->dst.x; + v[3] = dst.f; + v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx; + + dst.p.y = r->dst.y; + v[6] = dst.f; + v[8] = ((r->src.y + ty) * yy + y0) * sy; +} + +sse4_2 fastcall static void +emit_boxes_simple_source__sse4_2(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + float xx = op->src.transform->matrix[0][0]; + float x0 = op->src.transform->matrix[0][2]; + float yy = op->src.transform->matrix[1][1]; + float y0 = op->src.transform->matrix[1][2]; + float sx = op->src.scale[0]; + float sy = op->src.scale[1]; + int16_t tx = op->src.offset[0]; + int16_t ty = op->src.offset[1]; + + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + v[1] = ((box->x2 + tx) * xx + x0) * sx; + v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy; + + dst.p.x = box->x1; + v[3] = dst.f; + v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx; + + dst.p.y = box->y1; + v[6] = dst.f; + v[8] = ((box->y1 + ty) * yy + y0) * sy; + + v += 9; + box++; + } while (--nbox); +} + +sse4_2 fastcall static void +emit_primitive_identity_mask__sse4_2(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float msk_x, msk_y; + float w, h; + float *v; + + msk_x = r->mask.x + op->mask.offset[0]; + msk_y = r->mask.y + op->mask.offset[1]; + w = r->width; + h = r->height; + + DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", + __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); + + assert(op->floats_per_rect == 12); + assert((sna->render.vertex_used % 4) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[2] = (msk_x + w) * op->mask.scale[0]; + v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; + + dst.p.x = r->dst.x; + v[4] = dst.f; + v[10] = v[6] = msk_x * op->mask.scale[0]; + + dst.p.y = r->dst.y; + v[8] = dst.f; + v[11] = msk_y * op->mask.scale[1]; + + v[9] = v[5] = v[1] = .5; +} + +sse4_2 fastcall static void +emit_boxes_identity_mask__sse4_2(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + float msk_x = op->mask.offset[0]; + float msk_y = op->mask.offset[1]; + + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + v[2] = (msk_x + box->x2) * op->mask.scale[0]; + v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; + + dst.p.x = box->x1; + v[4] = dst.f; + v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; + + dst.p.y = box->y1; + v[8] = dst.f; + v[11] = (msk_y + box->y1) * op->mask.scale[1]; + + v[9] = v[5] = v[1] = .5; + v += 12; + box++; + } while (--nbox); +} + +sse4_2 fastcall static void +emit_primitive_linear_identity_mask__sse4_2(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float msk_x, msk_y; + float w, h; + float *v; + + msk_x = r->mask.x + op->mask.offset[0]; + msk_y = r->mask.y + op->mask.offset[1]; + w = r->width; + h = r->height; + + DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", + __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); + + assert(op->floats_per_rect == 12); + assert((sna->render.vertex_used % 4) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[2] = (msk_x + w) * op->mask.scale[0]; + v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; + + dst.p.x = r->dst.x; + v[4] = dst.f; + v[10] = v[6] = msk_x * op->mask.scale[0]; + + dst.p.y = r->dst.y; + v[8] = dst.f; + v[11] = msk_y * op->mask.scale[1]; + + v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); + v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height); + v[9] = compute_linear(&op->src, r->src.x, r->src.y); +} + +sse4_2 fastcall static void +emit_boxes_linear_identity_mask__sse4_2(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + float msk_x = op->mask.offset[0]; + float msk_y = op->mask.offset[1]; + + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + v[2] = (msk_x + box->x2) * op->mask.scale[0]; + v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; + + dst.p.x = box->x1; + v[4] = dst.f; + v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; + + dst.p.y = box->y1; + v[8] = dst.f; + v[11] = (msk_y + box->y1) * op->mask.scale[1]; + + v[1] = compute_linear(&op->src, box->x2, box->y2); + v[5] = compute_linear(&op->src, box->x1, box->y2); + v[9] = compute_linear(&op->src, box->x1, box->y1); + + v += 12; + box++; + } while (--nbox); +} + +#endif + +/* AVX2 */ +#if defined(avx2) + +avx2 fastcall static void +emit_primitive_linear__avx2(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + assert(op->floats_per_rect == 6); + assert((sna->render.vertex_used % 2) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + dst.p.x = r->dst.x; + v[2] = dst.f; + dst.p.y = r->dst.y; + v[4] = dst.f; + + v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); + v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height); + v[5] = compute_linear(&op->src, r->src.x, r->src.y); +} + +avx2 fastcall static void +emit_boxes_linear__avx2(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + union { + struct sna_coordinate p; + float f; + } dst; + + do { + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + dst.p.x = box->x1; + v[2] = dst.f; + dst.p.y = box->y1; + v[4] = dst.f; + + v[1] = compute_linear(&op->src, box->x2, box->y2); + v[3] = compute_linear(&op->src, box->x1, box->y2); + v[5] = compute_linear(&op->src, box->x1, box->y1); + + v += 6; + box++; + } while (--nbox); +} + +avx2 fastcall static void +emit_primitive_identity_source__avx2(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float *v; + + assert(op->floats_per_rect == 9); + assert((sna->render.vertex_used % 3) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 9; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + dst.p.x = r->dst.x; + v[3] = dst.f; + dst.p.y = r->dst.y; + v[6] = dst.f; + + v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; + v[1] = v[4] + r->width * op->src.scale[0]; + + v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; + v[5] = v[2] = v[8] + r->height * op->src.scale[1]; +} + +avx2 fastcall static void +emit_boxes_identity_source__avx2(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + dst.p.x = box->x1; + v[3] = dst.f; + dst.p.y = box->y1; + v[6] = dst.f; + + v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; + v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; + + v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; + v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; + + v += 9; + box++; + } while (--nbox); +} + +avx2 fastcall static void +emit_primitive_simple_source__avx2(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + float xx = op->src.transform->matrix[0][0]; + float x0 = op->src.transform->matrix[0][2]; + float yy = op->src.transform->matrix[1][1]; + float y0 = op->src.transform->matrix[1][2]; + float sx = op->src.scale[0]; + float sy = op->src.scale[1]; + int16_t tx = op->src.offset[0]; + int16_t ty = op->src.offset[1]; + + assert(op->floats_per_rect == 9); + assert((sna->render.vertex_used % 3) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 3*3; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; + v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; + + dst.p.x = r->dst.x; + v[3] = dst.f; + v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx; + + dst.p.y = r->dst.y; + v[6] = dst.f; + v[8] = ((r->src.y + ty) * yy + y0) * sy; +} + +avx2 fastcall static void +emit_boxes_simple_source__avx2(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + float xx = op->src.transform->matrix[0][0]; + float x0 = op->src.transform->matrix[0][2]; + float yy = op->src.transform->matrix[1][1]; + float y0 = op->src.transform->matrix[1][2]; + float sx = op->src.scale[0]; + float sy = op->src.scale[1]; + int16_t tx = op->src.offset[0]; + int16_t ty = op->src.offset[1]; + + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + v[1] = ((box->x2 + tx) * xx + x0) * sx; + v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy; + + dst.p.x = box->x1; + v[3] = dst.f; + v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx; + + dst.p.y = box->y1; + v[6] = dst.f; + v[8] = ((box->y1 + ty) * yy + y0) * sy; + + v += 9; + box++; + } while (--nbox); +} + +avx2 fastcall static void +emit_primitive_identity_mask__avx2(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float msk_x, msk_y; + float w, h; + float *v; + + msk_x = r->mask.x + op->mask.offset[0]; + msk_y = r->mask.y + op->mask.offset[1]; + w = r->width; + h = r->height; + + DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", + __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); + + assert(op->floats_per_rect == 12); + assert((sna->render.vertex_used % 4) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[2] = (msk_x + w) * op->mask.scale[0]; + v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; + + dst.p.x = r->dst.x; + v[4] = dst.f; + v[10] = v[6] = msk_x * op->mask.scale[0]; + + dst.p.y = r->dst.y; + v[8] = dst.f; + v[11] = msk_y * op->mask.scale[1]; + + v[9] = v[5] = v[1] = .5; +} + +avx2 fastcall static void +emit_boxes_identity_mask__avx2(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + float msk_x = op->mask.offset[0]; + float msk_y = op->mask.offset[1]; + + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + v[2] = (msk_x + box->x2) * op->mask.scale[0]; + v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; + + dst.p.x = box->x1; + v[4] = dst.f; + v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; + + dst.p.y = box->y1; + v[8] = dst.f; + v[11] = (msk_y + box->y1) * op->mask.scale[1]; + + v[9] = v[5] = v[1] = .5; + v += 12; + box++; + } while (--nbox); +} + +avx2 fastcall static void +emit_primitive_linear_identity_mask__avx2(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float msk_x, msk_y; + float w, h; + float *v; + + msk_x = r->mask.x + op->mask.offset[0]; + msk_y = r->mask.y + op->mask.offset[1]; + w = r->width; + h = r->height; + + DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", + __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); + + assert(op->floats_per_rect == 12); + assert((sna->render.vertex_used % 4) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[2] = (msk_x + w) * op->mask.scale[0]; + v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; + + dst.p.x = r->dst.x; + v[4] = dst.f; + v[10] = v[6] = msk_x * op->mask.scale[0]; + + dst.p.y = r->dst.y; + v[8] = dst.f; + v[11] = msk_y * op->mask.scale[1]; + + v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); + v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height); + v[9] = compute_linear(&op->src, r->src.x, r->src.y); +} + +avx2 fastcall static void +emit_boxes_linear_identity_mask__avx2(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + float msk_x = op->mask.offset[0]; + float msk_y = op->mask.offset[1]; + + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + v[2] = (msk_x + box->x2) * op->mask.scale[0]; + v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; + + dst.p.x = box->x1; + v[4] = dst.f; + v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; + + dst.p.y = box->y1; + v[8] = dst.f; + v[11] = (msk_y + box->y1) * op->mask.scale[1]; + + v[1] = compute_linear(&op->src, box->x2, box->y2); + v[5] = compute_linear(&op->src, box->x1, box->y2); + v[9] = compute_linear(&op->src, box->x1, box->y1); + + v += 12; + box++; + } while (--nbox); +} + +#endif + +unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op *tmp) { unsigned vb; @@ -280,25 +1701,168 @@ unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp) if (tmp->mask.transform == NULL) { if (tmp->src.is_solid) { DBG(("%s: solid, identity mask\n", __FUNCTION__)); +#if defined(avx2) + if (sna->cpu_features & AVX2) { + tmp->prim_emit = emit_primitive_identity_mask__avx2; + } else +#endif +#if defined(sse4_2) + if (sna->cpu_features & SSE4_2) { + tmp->prim_emit = emit_primitive_identity_mask__sse4_2; + } else +#endif + { + tmp->prim_emit = emit_primitive_identity_mask; + } + tmp->floats_per_vertex = 4; + vb = 1 | 2 << 2; } else if (tmp->src.is_linear) { DBG(("%s: linear, identity mask\n", __FUNCTION__)); +#if defined(avx2) + if (sna->cpu_features & AVX2) { + tmp->prim_emit = emit_primitive_linear_identity_mask__avx2; + } else +#endif +#if defined(sse4_2) + if (sna->cpu_features & SSE4_2) { + tmp->prim_emit = emit_primitive_linear_identity_mask__sse4_2; + } else +#endif + { + tmp->prim_emit = emit_primitive_linear_identity_mask; + } + tmp->floats_per_vertex = 4; + vb = 1 | 2 << 2; } else if (tmp->src.transform == NULL) { DBG(("%s: identity source, identity mask\n", __FUNCTION__)); tmp->prim_emit = emit_primitive_identity_source_mask; tmp->floats_per_vertex = 5; vb = 2 << 2 | 2; } else if (tmp->src.is_affine) { - DBG(("%s: simple src, identity mask\n", __FUNCTION__)); + tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; + tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; + if (!sna_affine_transform_is_rotation(tmp->src.transform)) { + DBG(("%s: simple src, identity mask\n", __FUNCTION__)); + tmp->prim_emit = emit_primitive_simple_source_identity; + } else { + DBG(("%s: affine src, identity mask\n", __FUNCTION__)); + tmp->prim_emit = emit_primitive_affine_source_identity; + } + tmp->floats_per_vertex = 5; + vb = 2 << 2 | 2; } else { DBG(("%s: projective source, identity mask\n", __FUNCTION__)); + tmp->prim_emit = emit_primitive_mask; + tmp->floats_per_vertex = 6; + vb = 2 << 2 | 3; } } else { + tmp->prim_emit = emit_primitive_mask; + tmp->floats_per_vertex = 1; + vb = 0; + if (tmp->mask.is_solid) { + tmp->floats_per_vertex += 1; + vb |= 1 << 2; + } else if (tmp->mask.is_affine) { + tmp->floats_per_vertex += 2; + vb |= 2 << 2; + }else { + tmp->floats_per_vertex += 3; + vb |= 3 << 2; + } + if (tmp->src.is_solid) { + tmp->floats_per_vertex += 1; + vb |= 1; + } else if (tmp->src.is_affine) { + tmp->floats_per_vertex += 2; + vb |= 2 ; + }else { + tmp->floats_per_vertex += 3; + vb |= 3; + } DBG(("%s: general mask: floats-per-vertex=%d, vb=%x\n", __FUNCTION__,tmp->floats_per_vertex, vb)); } } else { +#if 0 + if (tmp->src.is_solid) { + DBG(("%s: solid, no mask\n", __FUNCTION__)); + tmp->prim_emit = emit_primitive_solid; + if (tmp->src.is_opaque && tmp->op == PictOpOver) + tmp->op = PictOpSrc; + tmp->floats_per_vertex = 2; + vb = 1; + } else if (tmp->src.is_linear) { + DBG(("%s: linear, no mask\n", __FUNCTION__)); +#if defined(avx2) + if (sna->cpu_features & AVX2) { + tmp->prim_emit = emit_primitive_linear__avx2; + } else +#endif +#if defined(sse4_2) + if (sna->cpu_features & SSE4_2) { + tmp->prim_emit = emit_primitive_linear__sse4_2; + } else +#endif + { + tmp->prim_emit = emit_primitive_linear; + } + tmp->floats_per_vertex = 2; + vb = 1; + } else if (tmp->src.transform == NULL) { + DBG(("%s: identity src, no mask\n", __FUNCTION__)); +#if defined(avx2) + if (sna->cpu_features & AVX2) { + tmp->prim_emit = emit_primitive_identity_source__avx2; + } else +#endif +#if defined(sse4_2) + if (sna->cpu_features & SSE4_2) { + tmp->prim_emit = emit_primitive_identity_source__sse4_2; + } else +#endif + { + tmp->prim_emit = emit_primitive_identity_source; + } + tmp->floats_per_vertex = 3; + vb = 2; + } else if (tmp->src.is_affine) { + tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; + tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; + if (!sna_affine_transform_is_rotation(tmp->src.transform)) { + DBG(("%s: simple src, no mask\n", __FUNCTION__)); +#if defined(avx2) + if (sna->cpu_features & AVX2) { + tmp->prim_emit = emit_primitive_simple_source__avx2; + } else +#endif +#if defined(sse4_2) + if (sna->cpu_features & SSE4_2) { + tmp->prim_emit = emit_primitive_simple_source__sse4_2; + } else +#endif + { + tmp->prim_emit = emit_primitive_simple_source; + } + } else { + DBG(("%s: affine src, no mask\n", __FUNCTION__)); + tmp->prim_emit = emit_primitive_affine_source; + } + tmp->floats_per_vertex = 3; + vb = 2; + } else { + DBG(("%s: projective src, no mask\n", __FUNCTION__)); + assert(!tmp->src.is_solid); + tmp->prim_emit = emit_primitive; + tmp->floats_per_vertex = 4; + vb = 3; + } +#endif } tmp->floats_per_rect = 3 * tmp->floats_per_vertex; return vb; } + + + diff --git a/drivers/video/Intel-2D/gen4_vertex.h b/drivers/video/Intel-2D/gen4_vertex.h index b482809b66..31c81d684d 100644 --- a/drivers/video/Intel-2D/gen4_vertex.h +++ b/drivers/video/Intel-2D/gen4_vertex.h @@ -10,7 +10,6 @@ void gen4_vertex_flush(struct sna *sna); int gen4_vertex_finish(struct sna *sna); void gen4_vertex_close(struct sna *sna); -unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp); -//unsigned gen4_choose_spans_emitter(struct sna_composite_spans_op *tmp); +unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op *tmp); #endif /* GEN4_VERTEX_H */ diff --git a/drivers/video/Intel-2D/gen5_render.c b/drivers/video/Intel-2D/gen5_render.c index 275aef21c0..21209f66c4 100644 --- a/drivers/video/Intel-2D/gen5_render.c +++ b/drivers/video/Intel-2D/gen5_render.c @@ -279,8 +279,24 @@ static uint32_t gen5_get_card_format(PictFormat format) return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM; case PICT_x8r8g8b8: return GEN5_SURFACEFORMAT_B8G8R8X8_UNORM; + case PICT_a8b8g8r8: + return GEN5_SURFACEFORMAT_R8G8B8A8_UNORM; + case PICT_x8b8g8r8: + return GEN5_SURFACEFORMAT_R8G8B8X8_UNORM; + case PICT_a2r10g10b10: + return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM; + case PICT_x2r10g10b10: + return GEN5_SURFACEFORMAT_B10G10R10X2_UNORM; + case PICT_r8g8b8: + return GEN5_SURFACEFORMAT_R8G8B8_UNORM; + case PICT_r5g6b5: + return GEN5_SURFACEFORMAT_B5G6R5_UNORM; + case PICT_a1r5g5b5: + return GEN5_SURFACEFORMAT_B5G5R5A1_UNORM; case PICT_a8: return GEN5_SURFACEFORMAT_A8_UNORM; + case PICT_a4r4g4b4: + return GEN5_SURFACEFORMAT_B4G4R4A4_UNORM; } } @@ -292,8 +308,22 @@ static uint32_t gen5_get_dest_format(PictFormat format) case PICT_a8r8g8b8: case PICT_x8r8g8b8: return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM; + case PICT_a8b8g8r8: + case PICT_x8b8g8r8: + return GEN5_SURFACEFORMAT_R8G8B8A8_UNORM; + case PICT_a2r10g10b10: + case PICT_x2r10g10b10: + return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM; + case PICT_r5g6b5: + return GEN5_SURFACEFORMAT_B5G6R5_UNORM; + case PICT_x1r5g5b5: + case PICT_a1r5g5b5: + return GEN5_SURFACEFORMAT_B5G5R5A1_UNORM; case PICT_a8: return GEN5_SURFACEFORMAT_A8_UNORM; + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + return GEN5_SURFACEFORMAT_B4G4R4A4_UNORM; } } typedef struct gen5_surface_state_padded { @@ -384,7 +414,7 @@ gen5_bind_bo(struct sna *sna, /* After the first bind, we manage the cache domains within the batch */ if (!DBG_NO_SURFACE_CACHE) { - offset = kgem_bo_get_binding(bo, format); + offset = kgem_bo_get_binding(bo, format | is_dst << 31); if (offset) { if (is_dst) kgem_bo_mark_dirty(bo); @@ -400,9 +430,10 @@ gen5_bind_bo(struct sna *sna, GEN5_SURFACE_BLEND_ENABLED | format << GEN5_SURFACE_FORMAT_SHIFT); - if (is_dst) + if (is_dst) { + ss[0] |= GEN5_SURFACE_RC_READ_WRITE; domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER; - else + } else domains = I915_GEM_DOMAIN_SAMPLER << 16; ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0); @@ -413,7 +444,7 @@ gen5_bind_bo(struct sna *sna, ss[4] = 0; ss[5] = 0; - kgem_bo_set_binding(bo, format, offset); + kgem_bo_set_binding(bo, format | is_dst << 31, offset); DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", offset, bo->handle, ss[1], @@ -541,8 +572,8 @@ start: goto start; } - assert(op->floats_per_rect >= vertex_space(sna)); assert(rem <= vertex_space(sna)); + assert(op->floats_per_rect <= rem); if (want > 1 && want * op->floats_per_rect > rem) want = rem / op->floats_per_rect; @@ -649,8 +680,13 @@ gen5_emit_invariant(struct sna *sna) * * However, the kernel flushes the pipeline between batches, * so we should be safe.... - * OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); + * + * On the other hand, after using BLT we must use a non-pipelined + * operation... */ + if (sna->kgem.nreloc) + OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); + OUT_BATCH(GEN5_PIPELINE_SELECT | PIPELINE_SELECT_3D); gen5_emit_state_base_address(sna); @@ -732,12 +768,15 @@ gen5_emit_pipelined_pointers(struct sna *sna, kernel); bp = gen5_get_blend(blend, op->has_component_alpha, op->dst.format); - DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp)); key = sp | (uint32_t)bp << 16 | (op->mask.bo != NULL) << 31; + DBG(("%s: sp=%d, bp=%d, key=%08x (current sp=%d, bp=%d, key=%08x)\n", + __FUNCTION__, sp, bp, key, + sna->render_state.gen5.last_pipelined_pointers & 0xffff, + (sna->render_state.gen5.last_pipelined_pointers >> 16) & 0x7fff, + sna->render_state.gen5.last_pipelined_pointers)); if (key == sna->render_state.gen5.last_pipelined_pointers) return false; - OUT_BATCH(GEN5_3DSTATE_PIPELINED_POINTERS | 5); OUT_BATCH(sna->render_state.gen5.vs); OUT_BATCH(GEN5_GS_DISABLE); /* passthrough */ @@ -746,11 +785,15 @@ gen5_emit_pipelined_pointers(struct sna *sna, OUT_BATCH(sna->render_state.gen5.wm + sp); OUT_BATCH(sna->render_state.gen5.cc + bp); + bp = (sna->render_state.gen5.last_pipelined_pointers & 0x7fff0000) != ((uint32_t)bp << 16); sna->render_state.gen5.last_pipelined_pointers = key; - return true; + + gen5_emit_urb(sna); + + return bp; } -static void +static bool gen5_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op) { uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); @@ -762,7 +805,7 @@ gen5_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op) if (!DBG_NO_STATE_CACHE && sna->render_state.gen5.drawrect_limit == limit && sna->render_state.gen5.drawrect_offset == offset) - return; + return false; sna->render_state.gen5.drawrect_offset = offset; sna->render_state.gen5.drawrect_limit = limit; @@ -771,6 +814,7 @@ gen5_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op) OUT_BATCH(0x00000000); OUT_BATCH(limit); OUT_BATCH(offset); + return true; } static void @@ -892,11 +936,35 @@ gen5_emit_vertex_elements(struct sna *sna, } } +inline static void +gen5_emit_pipe_flush(struct sna *sna) +{ + OUT_BATCH(GEN5_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(GEN5_PIPE_CONTROL_WC_FLUSH); + OUT_BATCH(0); + OUT_BATCH(0); +} + static void gen5_emit_state(struct sna *sna, const struct sna_composite_op *op, uint16_t offset) { + bool flush = false; + + assert(op->dst.bo->exec); + + /* drawrect must be first for Ironlake BLT workaround */ + if (gen5_emit_drawing_rectangle(sna, op)) + offset &= ~1; + gen5_emit_binding_table(sna, offset & ~1); + if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel)){ + DBG(("%s: changed blend state, flush required? %d\n", + __FUNCTION__, (offset & 1) && op->op > PictOpSrc)); + flush = (offset & 1) && op->op > PictOpSrc; + } + gen5_emit_vertex_elements(sna, op); + if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { DBG(("%s: flushing dirty (%d, %d)\n", __FUNCTION__, kgem_bo_is_dirty(op->src.bo), @@ -904,19 +972,18 @@ gen5_emit_state(struct sna *sna, OUT_BATCH(MI_FLUSH); kgem_clear_dirty(&sna->kgem); kgem_bo_mark_dirty(op->dst.bo); + flush = false; + } + if (flush) { + DBG(("%s: forcing flush\n", __FUNCTION__)); + gen5_emit_pipe_flush(sna); } - - /* drawrect must be first for Ironlake BLT workaround */ - gen5_emit_drawing_rectangle(sna, op); - gen5_emit_binding_table(sna, offset); - if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel)) - gen5_emit_urb(sna); - gen5_emit_vertex_elements(sna, op); } static void gen5_bind_surfaces(struct sna *sna, const struct sna_composite_op *op) { + bool dirty = kgem_bo_is_dirty(op->dst.bo); uint32_t *binding_table; uint16_t offset; @@ -953,7 +1020,7 @@ static void gen5_bind_surfaces(struct sna *sna, offset = sna->render_state.gen5.surface_table; } - gen5_emit_state(sna, op, offset); + gen5_emit_state(sna, op, offset | dirty); } fastcall static void @@ -972,6 +1039,333 @@ gen5_render_composite_blt(struct sna *sna, op->prim_emit(sna, op, r); } +#if 0 +fastcall static void +gen5_render_composite_box(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box) +{ + struct sna_composite_rectangles r; + + DBG((" %s: (%d, %d), (%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, box->x2, box->y2)); + + gen5_get_rectangles(sna, op, 1, gen5_bind_surfaces); + + r.dst.x = box->x1; + r.dst.y = box->y1; + r.width = box->x2 - box->x1; + r.height = box->y2 - box->y1; + r.mask = r.src = r.dst; + + op->prim_emit(sna, op, &r); +} + +static void +gen5_render_composite_boxes__blt(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n", + __FUNCTION__, nbox, op->dst.x, op->dst.y, + op->src.offset[0], op->src.offset[1], + op->src.width, op->src.height, + op->mask.offset[0], op->mask.offset[1], + op->mask.width, op->mask.height)); + + do { + int nbox_this_time; + + nbox_this_time = gen5_get_rectangles(sna, op, nbox, + gen5_bind_surfaces); + nbox -= nbox_this_time; + + do { + struct sna_composite_rectangles r; + + DBG((" %s: (%d, %d), (%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, box->x2, box->y2)); + + r.dst.x = box->x1; + r.dst.y = box->y1; + r.width = box->x2 - box->x1; + r.height = box->y2 - box->y1; + r.mask = r.src = r.dst; + op->prim_emit(sna, op, &r); + box++; + } while (--nbox_this_time); + } while (nbox); +} + +static void +gen5_render_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen5_get_rectangles(sna, op, nbox, + gen5_bind_surfaces); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + } while (nbox); +} + +static void +gen5_render_composite_boxes__thread(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen5_get_rectangles(sna, op, nbox, + gen5_bind_surfaces); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + +#ifndef MAX +#define MAX(a,b) ((a) > (b) ? (a) : (b)) +#endif + +static uint32_t gen5_bind_video_source(struct sna *sna, + struct kgem_bo *src_bo, + uint32_t src_offset, + int src_width, + int src_height, + int src_pitch, + uint32_t src_surf_format) +{ + struct gen5_surface_state *ss; + + sna->kgem.surface -= sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t); + + ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss)); + ss->ss0.surface_type = GEN5_SURFACE_2D; + ss->ss0.surface_format = src_surf_format; + ss->ss0.color_blend = 1; + + ss->ss1.base_addr = + kgem_add_reloc(&sna->kgem, + sna->kgem.surface + 1, + src_bo, + I915_GEM_DOMAIN_SAMPLER << 16, + src_offset); + + ss->ss2.width = src_width - 1; + ss->ss2.height = src_height - 1; + ss->ss3.pitch = src_pitch - 1; + + return sna->kgem.surface * sizeof(uint32_t); +} + +static void gen5_video_bind_surfaces(struct sna *sna, + const struct sna_composite_op *op) +{ + bool dirty = kgem_bo_is_dirty(op->dst.bo); + struct sna_video_frame *frame = op->priv; + uint32_t src_surf_format; + uint32_t src_surf_base[6]; + int src_width[6]; + int src_height[6]; + int src_pitch[6]; + uint32_t *binding_table; + uint16_t offset; + int n_src, n; + + src_surf_base[0] = 0; + src_surf_base[1] = 0; + src_surf_base[2] = frame->VBufOffset; + src_surf_base[3] = frame->VBufOffset; + src_surf_base[4] = frame->UBufOffset; + src_surf_base[5] = frame->UBufOffset; + + if (is_planar_fourcc(frame->id)) { + src_surf_format = GEN5_SURFACEFORMAT_R8_UNORM; + src_width[1] = src_width[0] = frame->width; + src_height[1] = src_height[0] = frame->height; + src_pitch[1] = src_pitch[0] = frame->pitch[1]; + src_width[4] = src_width[5] = src_width[2] = src_width[3] = + frame->width / 2; + src_height[4] = src_height[5] = src_height[2] = src_height[3] = + frame->height / 2; + src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = + frame->pitch[0]; + n_src = 6; + } else { + if (frame->id == FOURCC_UYVY) + src_surf_format = GEN5_SURFACEFORMAT_YCRCB_SWAPY; + else + src_surf_format = GEN5_SURFACEFORMAT_YCRCB_NORMAL; + + src_width[0] = frame->width; + src_height[0] = frame->height; + src_pitch[0] = frame->pitch[0]; + n_src = 1; + } + + gen5_get_batch(sna, op); + + binding_table = gen5_composite_get_binding_table(sna, &offset); + binding_table[0] = + gen5_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen5_get_dest_format(op->dst.format), + true); + for (n = 0; n < n_src; n++) { + binding_table[1+n] = + gen5_bind_video_source(sna, + frame->bo, + src_surf_base[n], + src_width[n], + src_height[n], + src_pitch[n], + src_surf_format); + } + + gen5_emit_state(sna, op, offset | dirty); +} + +static bool +gen5_render_video(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame, + RegionPtr dstRegion, + PixmapPtr pixmap) +{ + struct sna_composite_op tmp; + int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; + int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; + int src_width = frame->src.x2 - frame->src.x1; + int src_height = frame->src.y2 - frame->src.y1; + float src_offset_x, src_offset_y; + float src_scale_x, src_scale_y; + int nbox, pix_xoff, pix_yoff; + struct sna_pixmap *priv; + BoxPtr box; + + DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, + src_width, src_height, dst_width, dst_height)); + + priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE); + if (priv == NULL) + return false; + + memset(&tmp, 0, sizeof(tmp)); + + tmp.op = PictOpSrc; + tmp.dst.pixmap = pixmap; + tmp.dst.width = pixmap->drawable.width; + tmp.dst.height = pixmap->drawable.height; + tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth); + tmp.dst.bo = priv->gpu_bo; + + if (src_width == dst_width && src_height == dst_height) + tmp.src.filter = SAMPLER_FILTER_NEAREST; + else + tmp.src.filter = SAMPLER_FILTER_BILINEAR; + tmp.src.repeat = SAMPLER_EXTEND_PAD; + tmp.src.bo = frame->bo; + tmp.mask.bo = NULL; + tmp.u.gen5.wm_kernel = + is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED; + tmp.u.gen5.ve_id = 2; + tmp.is_affine = true; + tmp.floats_per_vertex = 3; + tmp.floats_per_rect = 9; + tmp.priv = frame; + + if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { + kgem_submit(&sna->kgem); + assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)); + } + + gen5_video_bind_surfaces(sna, &tmp); + gen5_align_vertex(sna, &tmp); + + /* Set up the offset for translating from the given region (in screen + * coordinates) to the backing pixmap. + */ +#ifdef COMPOSITE + pix_xoff = -pixmap->screen_x + pixmap->drawable.x; + pix_yoff = -pixmap->screen_y + pixmap->drawable.y; +#else + pix_xoff = 0; + pix_yoff = 0; +#endif + + src_scale_x = (float)src_width / dst_width / frame->width; + src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; + + src_scale_y = (float)src_height / dst_height / frame->height; + src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; + + box = REGION_RECTS(dstRegion); + nbox = REGION_NUM_RECTS(dstRegion); + while (nbox--) { + BoxRec r; + + r.x1 = box->x1 + pix_xoff; + r.x2 = box->x2 + pix_xoff; + r.y1 = box->y1 + pix_yoff; + r.y2 = box->y2 + pix_yoff; + + gen5_get_rectangles(sna, &tmp, 1, gen5_video_bind_surfaces); + + OUT_VERTEX(r.x2, r.y2); + OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); + + OUT_VERTEX(r.x1, r.y2); + OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); + + OUT_VERTEX(r.x1, r.y1); + OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); + + if (!DAMAGE_IS_ALL(priv->gpu_damage)) { + sna_damage_add_box(&priv->gpu_damage, &r); + sna_damage_subtract_box(&priv->cpu_damage, &r); + } + box++; + } + + gen4_vertex_flush(sna); + return true; +} +#endif static void gen5_render_composite_done(struct sna *sna, @@ -986,93 +1380,644 @@ gen5_render_composite_done(struct sna *sna, } +#if 0 +static bool +gen5_composite_set_target(struct sna *sna, + struct sna_composite_op *op, + PicturePtr dst, + int x, int y, int w, int h, + bool partial) +{ + BoxRec box; + + op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); + op->dst.width = op->dst.pixmap->drawable.width; + op->dst.height = op->dst.pixmap->drawable.height; + op->dst.format = dst->format; + if (w && h) { + box.x1 = x; + box.y1 = y; + box.x2 = x + w; + box.y2 = y + h; + } else + sna_render_picture_extents(dst, &box); + + op->dst.bo = sna_drawable_use_bo (dst->pDrawable, + PREFER_GPU | FORCE_GPU | RENDER_GPU, + &box, &op->damage); + if (op->dst.bo == NULL) + return false; + + get_drawable_deltas(dst->pDrawable, op->dst.pixmap, + &op->dst.x, &op->dst.y); + + DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", + __FUNCTION__, + op->dst.pixmap, (int)op->dst.format, + op->dst.width, op->dst.height, + op->dst.bo->pitch, + op->dst.x, op->dst.y, + op->damage ? *op->damage : (void *)-1)); + + assert(op->dst.bo->proxy == NULL); + + if (too_large(op->dst.width, op->dst.height) && + !sna_render_composite_redirect(sna, op, x, y, w, h, partial)) + return false; + + return true; +} static bool -gen5_blit_tex(struct sna *sna, - uint8_t op, bool scale, - PixmapPtr src, struct kgem_bo *src_bo, - PixmapPtr mask,struct kgem_bo *mask_bo, - PixmapPtr dst, struct kgem_bo *dst_bo, - int32_t src_x, int32_t src_y, - int32_t msk_x, int32_t msk_y, - int32_t dst_x, int32_t dst_y, - int32_t width, int32_t height, - struct sna_composite_op *tmp) +gen5_render_composite(struct sna *sna, + uint8_t op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t msk_x, int16_t msk_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_op *tmp) { DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, width, height, sna->kgem.mode)); - tmp->op = PictOpSrc; + if (op >= ARRAY_SIZE(gen5_blend_op)) { + DBG(("%s: unhandled blend op %d\n", __FUNCTION__, op)); + return false; + } - tmp->dst.pixmap = dst; - tmp->dst.bo = dst_bo; - tmp->dst.width = dst->drawable.width; - tmp->dst.height = dst->drawable.height; - tmp->dst.format = PICT_x8r8g8b8; + if (mask == NULL && + try_blt(sna, dst, src, width, height) && + sna_blt_composite(sna, op, + src, dst, + src_x, src_y, + dst_x, dst_y, + width, height, + tmp, false)) + return true; + if (gen5_composite_fallback(sna, src, mask, dst)) + return false; - tmp->src.repeat = RepeatNone; - tmp->src.filter = PictFilterNearest; - tmp->src.is_affine = true; + if (need_tiling(sna, width, height)) + return sna_tiling_composite(op, src, mask, dst, + src_x, src_y, + msk_x, msk_y, + dst_x, dst_y, + width, height, + tmp); - tmp->src.bo = src_bo; - tmp->src.pict_format = PICT_x8r8g8b8; - tmp->src.card_format = gen5_get_card_format(tmp->src.pict_format); - tmp->src.width = src->drawable.width; - tmp->src.height = src->drawable.height; + if (!gen5_composite_set_target(sna, tmp, dst, + dst_x, dst_y, width, height, + op > PictOpSrc || dst->pCompositeClip->data)) { + DBG(("%s: failed to set composite target\n", __FUNCTION__)); + return false; + } + DBG(("%s: preparing source\n", __FUNCTION__)); + tmp->op = op; + switch (gen5_composite_picture(sna, src, &tmp->src, + src_x, src_y, + width, height, + dst_x, dst_y, + dst->polyMode == PolyModePrecise)) { + case -1: + DBG(("%s: failed to prepare source picture\n", __FUNCTION__)); + goto cleanup_dst; + case 0: + if (!gen4_channel_init_solid(sna, &tmp->src, 0)) + goto cleanup_dst; + /* fall through to fixup */ + case 1: + if (mask == NULL && + sna_blt_composite__convert(sna, + dst_x, dst_y, width, height, + tmp)) + return true; - tmp->is_affine = tmp->src.is_affine; + gen5_composite_channel_convert(&tmp->src); + break; + } + + tmp->is_affine = tmp->src.is_affine; tmp->has_component_alpha = false; tmp->need_magic_ca_pass = false; - - tmp->mask.is_affine = true; - tmp->mask.repeat = SAMPLER_EXTEND_NONE; - tmp->mask.filter = SAMPLER_FILTER_NEAREST; - tmp->mask.bo = mask_bo; - tmp->mask.pict_format = PIXMAN_a8; - tmp->mask.card_format = gen5_get_card_format(tmp->mask.pict_format); - tmp->mask.width = mask->drawable.width; - tmp->mask.height = mask->drawable.height; - if( scale ) - { - tmp->src.scale[0] = 1.f/width; - tmp->src.scale[1] = 1.f/height; - } - else - { - tmp->src.scale[0] = 1.f/src->drawable.width; - tmp->src.scale[1] = 1.f/src->drawable.height; - } + if (mask) { + if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { + tmp->has_component_alpha = true; + + /* Check if it's component alpha that relies on a source alpha and on + * the source value. We can only get one of those into the single + * source value that we get to blend with. + */ + if (gen5_blend_op[op].src_alpha && + (gen5_blend_op[op].src_blend != GEN5_BLENDFACTOR_ZERO)) { + if (op != PictOpOver) { + DBG(("%s: unhandled CA blend op %d\n", __FUNCTION__, op)); + goto cleanup_src; + } + + tmp->need_magic_ca_pass = true; + tmp->op = PictOpOutReverse; + } + } + + if (!reuse_source(sna, + src, &tmp->src, src_x, src_y, + mask, &tmp->mask, msk_x, msk_y)) { + DBG(("%s: preparing mask\n", __FUNCTION__)); + switch (gen5_composite_picture(sna, mask, &tmp->mask, + msk_x, msk_y, + width, height, + dst_x, dst_y, + dst->polyMode == PolyModePrecise)) { + case -1: + DBG(("%s: failed to prepare mask picture\n", __FUNCTION__)); + goto cleanup_src; + case 0: + if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) + goto cleanup_src; + /* fall through to fixup */ + case 1: + gen5_composite_channel_convert(&tmp->mask); + break; + } + } + + tmp->is_affine &= tmp->mask.is_affine; + } - tmp->mask.scale[0] = 1.f/mask->drawable.width; - tmp->mask.scale[1] = 1.f/mask->drawable.height; - - tmp->u.gen5.wm_kernel = gen5_choose_composite_kernel(tmp->op, tmp->mask.bo != NULL, tmp->has_component_alpha, tmp->is_affine); - tmp->u.gen5.ve_id = gen4_choose_composite_emitter(tmp); + tmp->u.gen5.ve_id = gen4_choose_composite_emitter(sna, tmp); tmp->blt = gen5_render_composite_blt; -// tmp->box = gen5_render_composite_box; + tmp->box = gen5_render_composite_box; + tmp->boxes = gen5_render_composite_boxes__blt; + if (tmp->emit_boxes) { + tmp->boxes = gen5_render_composite_boxes; + tmp->thread_boxes = gen5_render_composite_boxes__thread; + } tmp->done = gen5_render_composite_done; if (!kgem_check_bo(&sna->kgem, tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) { kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, + tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) + goto cleanup_mask; } gen5_bind_surfaces(sna, tmp); gen5_align_vertex(sna, tmp); return true; +cleanup_mask: + if (tmp->mask.bo) + kgem_bo_destroy(&sna->kgem, tmp->mask.bo); +cleanup_src: + if (tmp->src.bo) + kgem_bo_destroy(&sna->kgem, tmp->src.bo); +cleanup_dst: + if (tmp->redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp->dst.bo); + return false; } +#if !NO_COMPOSITE_SPANS +fastcall static void +gen5_render_composite_spans_box(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, float opacity) +{ + DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", + __FUNCTION__, + op->base.src.offset[0], op->base.src.offset[1], + opacity, + op->base.dst.x, op->base.dst.y, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1)); + gen5_get_rectangles(sna, &op->base, 1, gen5_bind_surfaces); + op->prim_emit(sna, op, box, opacity); +} + +static void +gen5_render_composite_spans_boxes(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, int nbox, + float opacity) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->base.src.offset[0], op->base.src.offset[1], + opacity, + op->base.dst.x, op->base.dst.y)); + + do { + int nbox_this_time; + + nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox, + gen5_bind_surfaces); + nbox -= nbox_this_time; + + do { + DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1)); + + op->prim_emit(sna, op, box++, opacity); + } while (--nbox_this_time); + } while (nbox); +} + +fastcall static void +gen5_render_composite_spans_boxes__thread(struct sna *sna, + const struct sna_composite_spans_op *op, + const struct sna_opacity_box *box, + int nbox) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->base.src.offset[0], op->base.src.offset[1], + op->base.dst.x, op->base.dst.y)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox, + gen5_bind_surfaces); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + +fastcall static void +gen5_render_composite_spans_done(struct sna *sna, + const struct sna_composite_spans_op *op) +{ + if (sna->render.vertex_offset) + gen4_vertex_flush(sna); + + DBG(("%s()\n", __FUNCTION__)); + + kgem_bo_destroy(&sna->kgem, op->base.src.bo); + sna_render_composite_redirect_done(sna, &op->base); +} + +static bool +gen5_check_composite_spans(struct sna *sna, + uint8_t op, PicturePtr src, PicturePtr dst, + int16_t width, int16_t height, + unsigned flags) +{ + DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n", + __FUNCTION__, op, width, height, flags)); + + if (op >= ARRAY_SIZE(gen5_blend_op)) + return false; + + if (gen5_composite_fallback(sna, src, NULL, dst)) { + DBG(("%s: operation would fallback\n", __FUNCTION__)); + return false; + } + + if (need_tiling(sna, width, height) && + !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { + DBG(("%s: fallback, tiled operation not on GPU\n", + __FUNCTION__)); + return false; + } + + if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) { + struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable); + assert(priv); + + if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) + return true; + + if (flags & COMPOSITE_SPANS_INPLACE_HINT) + return false; + + if ((sna->render.prefer_gpu & PREFER_GPU_SPANS) == 0 && + dst->format == PICT_a8) + return false; + + return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo); + } + + return true; +} + +static bool +gen5_render_composite_spans(struct sna *sna, + uint8_t op, + PicturePtr src, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + unsigned flags, + struct sna_composite_spans_op *tmp) +{ + DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__, + width, height, flags, sna->kgem.ring)); + + assert(gen5_check_composite_spans(sna, op, src, dst, width, height, flags)); + + if (need_tiling(sna, width, height)) { + DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", + __FUNCTION__, width, height)); + return sna_tiling_composite_spans(op, src, dst, + src_x, src_y, dst_x, dst_y, + width, height, flags, tmp); + } + + tmp->base.op = op; + if (!gen5_composite_set_target(sna, &tmp->base, dst, + dst_x, dst_y, width, height, + true)) + return false; + + switch (gen5_composite_picture(sna, src, &tmp->base.src, + src_x, src_y, + width, height, + dst_x, dst_y, + dst->polyMode == PolyModePrecise)) { + case -1: + goto cleanup_dst; + case 0: + if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) + goto cleanup_dst; + /* fall through to fixup */ + case 1: + gen5_composite_channel_convert(&tmp->base.src); + break; + } + + tmp->base.mask.bo = NULL; + + tmp->base.is_affine = tmp->base.src.is_affine; + tmp->base.has_component_alpha = false; + tmp->base.need_magic_ca_pass = false; + + tmp->base.u.gen5.ve_id = gen4_choose_spans_emitter(sna, tmp); + tmp->base.u.gen5.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine; + + tmp->box = gen5_render_composite_spans_box; + tmp->boxes = gen5_render_composite_spans_boxes; + if (tmp->emit_boxes) + tmp->thread_boxes = gen5_render_composite_spans_boxes__thread; + tmp->done = gen5_render_composite_spans_done; + + if (!kgem_check_bo(&sna->kgem, + tmp->base.dst.bo, tmp->base.src.bo, + NULL)) { + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, + tmp->base.dst.bo, tmp->base.src.bo, + NULL)) + goto cleanup_src; + } + + gen5_bind_surfaces(sna, &tmp->base); + gen5_align_vertex(sna, &tmp->base); + return true; + +cleanup_src: + if (tmp->base.src.bo) + kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); +cleanup_dst: + if (tmp->base.redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); + return false; +} +#endif + + + +static bool +gen5_render_copy_boxes(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n, unsigned flags) +{ + struct sna_composite_op tmp; + + DBG(("%s alu=%d, src=%ld:handle=%d, dst=%ld:handle=%d boxes=%d x [((%d, %d), (%d, %d))...], flags=%x\n", + __FUNCTION__, alu, + src->drawable.serialNumber, src_bo->handle, + dst->drawable.serialNumber, dst_bo->handle, + n, box->x1, box->y1, box->x2, box->y2, + flags)); + + if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && + sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->drawable.bitsPerPixel, + box, n)) + return true; + + if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo) { +fallback_blt: + if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) + return false; + + return sna_blt_copy_boxes_fallback(sna, alu, + src, src_bo, src_dx, src_dy, + dst, dst_bo, dst_dx, dst_dy, + box, n); + } + + memset(&tmp, 0, sizeof(tmp)); + + if (dst->drawable.depth == src->drawable.depth) { + tmp.dst.format = sna_render_format_for_depth(dst->drawable.depth); + tmp.src.pict_format = tmp.dst.format; + } else { + tmp.dst.format = sna_format_for_depth(dst->drawable.depth); + tmp.src.pict_format = sna_format_for_depth(src->drawable.depth); + } + if (!gen5_check_format(tmp.src.pict_format)) { + DBG(("%s: unsupported source format, %x, use BLT\n", + __FUNCTION__, tmp.src.pict_format)); + goto fallback_blt; + } + + DBG(("%s (%d, %d)->(%d, %d) x %d\n", + __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n)); + + tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear; + + tmp.dst.pixmap = dst; + tmp.dst.width = dst->drawable.width; + tmp.dst.height = dst->drawable.height; + tmp.dst.x = tmp.dst.y = 0; + tmp.dst.bo = dst_bo; + tmp.damage = NULL; + + sna_render_composite_redirect_init(&tmp); + if (too_large(tmp.dst.width, tmp.dst.height)) { + BoxRec extents = box[0]; + int i; + + for (i = 1; i < n; i++) { + if (box[i].x1 < extents.x1) + extents.x1 = box[i].x1; + if (box[i].y1 < extents.y1) + extents.y1 = box[i].y1; + + if (box[i].x2 > extents.x2) + extents.x2 = box[i].x2; + if (box[i].y2 > extents.y2) + extents.y2 = box[i].y2; + } + if (!sna_render_composite_redirect(sna, &tmp, + extents.x1 + dst_dx, + extents.y1 + dst_dy, + extents.x2 - extents.x1, + extents.y2 - extents.y1, + n > 1)) + goto fallback_tiled; + } + + tmp.src.filter = SAMPLER_FILTER_NEAREST; + tmp.src.repeat = SAMPLER_EXTEND_NONE; + tmp.src.card_format = gen5_get_card_format(tmp.src.pict_format); + if (too_large(src->drawable.width, src->drawable.height)) { + BoxRec extents = box[0]; + int i; + + for (i = 1; i < n; i++) { + if (box[i].x1 < extents.x1) + extents.x1 = box[i].x1; + if (box[i].y1 < extents.y1) + extents.y1 = box[i].y1; + + if (box[i].x2 > extents.x2) + extents.x2 = box[i].x2; + if (box[i].y2 > extents.y2) + extents.y2 = box[i].y2; + } + + if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, + extents.x1 + src_dx, + extents.y1 + src_dy, + extents.x2 - extents.x1, + extents.y2 - extents.y1)) + goto fallback_tiled_dst; + } else { + tmp.src.bo = kgem_bo_reference(src_bo); + tmp.src.width = src->drawable.width; + tmp.src.height = src->drawable.height; + tmp.src.offset[0] = tmp.src.offset[1] = 0; + tmp.src.scale[0] = 1.f/src->drawable.width; + tmp.src.scale[1] = 1.f/src->drawable.height; + } + + tmp.is_affine = true; + tmp.floats_per_vertex = 3; + tmp.floats_per_rect = 9; + tmp.u.gen5.wm_kernel = WM_KERNEL; + tmp.u.gen5.ve_id = 2; + + if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { + DBG(("%s: aperture check failed\n", __FUNCTION__)); + goto fallback_tiled_src; + } + } + + dst_dx += tmp.dst.x; + dst_dy += tmp.dst.y; + tmp.dst.x = tmp.dst.y = 0; + + src_dx += tmp.src.offset[0]; + src_dy += tmp.src.offset[1]; + + gen5_copy_bind_surfaces(sna, &tmp); + gen5_align_vertex(sna, &tmp); + + do { + int n_this_time; + + n_this_time = gen5_get_rectangles(sna, &tmp, n, + gen5_copy_bind_surfaces); + n -= n_this_time; + + do { + DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", + box->x1 + src_dx, box->y1 + src_dy, + box->x1 + dst_dx, box->y1 + dst_dy, + box->x2 - box->x1, box->y2 - box->y1)); + OUT_VERTEX(box->x2 + dst_dx, box->y2 + dst_dy); + OUT_VERTEX_F((box->x2 + src_dx) * tmp.src.scale[0]); + OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]); + + OUT_VERTEX(box->x1 + dst_dx, box->y2 + dst_dy); + OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]); + OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]); + + OUT_VERTEX(box->x1 + dst_dx, box->y1 + dst_dy); + OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]); + OUT_VERTEX_F((box->y1 + src_dy) * tmp.src.scale[1]); + + box++; + } while (--n_this_time); + } while (n); + + gen4_vertex_flush(sna); + sna_render_composite_redirect_done(sna, &tmp); + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + return true; + +fallback_tiled_src: + kgem_bo_destroy(&sna->kgem, tmp.src.bo); +fallback_tiled_dst: + if (tmp.redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp.dst.bo); +fallback_tiled: + if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && + sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->drawable.bitsPerPixel, + box, n)) + return true; + + DBG(("%s: tiled fallback\n", __FUNCTION__)); + return sna_tiling_copy_boxes(sna, alu, + src, src_bo, src_dx, src_dy, + dst, dst_bo, dst_dx, dst_dy, + box, n); +} + +#endif static void gen5_render_flush(struct sna *sna) @@ -1397,16 +2342,38 @@ static bool gen5_render_setup(struct sna *sna) return state->general_bo != NULL; } -bool gen5_render_init(struct sna *sna) +const char *gen5_render_init(struct sna *sna, const char *backend) { if (!gen5_render_setup(sna)) - return false; + return backend; sna->kgem.context_switch = gen5_render_context_switch; sna->kgem.retire = gen5_render_retire; sna->kgem.expire = gen5_render_expire; +#if 0 +#if !NO_COMPOSITE + sna->render.composite = gen5_render_composite; + sna->render.prefer_gpu |= PREFER_GPU_RENDER; +#endif +#if !NO_COMPOSITE_SPANS + sna->render.check_composite_spans = gen5_check_composite_spans; + sna->render.composite_spans = gen5_render_composite_spans; + if (sna->PciInfo->device_id == 0x0044) + sna->render.prefer_gpu |= PREFER_GPU_SPANS; +#endif + sna->render.video = gen5_render_video; + + sna->render.copy_boxes = gen5_render_copy_boxes; + sna->render.copy = gen5_render_copy; + + sna->render.fill_boxes = gen5_render_fill_boxes; + sna->render.fill = gen5_render_fill; + sna->render.fill_one = gen5_render_fill_one; +#endif + sna->render.blit_tex = gen5_blit_tex; + sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT; sna->render.flush = gen5_render_flush; sna->render.reset = gen5_render_reset; @@ -1414,7 +2381,89 @@ bool gen5_render_init(struct sna *sna) sna->render.max_3d_size = MAX_3D_SIZE; sna->render.max_3d_pitch = 1 << 18; - sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT; - + return "Ironlake (gen5)"; +}; + +static bool +gen5_blit_tex(struct sna *sna, + uint8_t op, bool scale, + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr mask,struct kgem_bo *mask_bo, + PixmapPtr dst, struct kgem_bo *dst_bo, + int32_t src_x, int32_t src_y, + int32_t msk_x, int32_t msk_y, + int32_t dst_x, int32_t dst_y, + int32_t width, int32_t height, + struct sna_composite_op *tmp) +{ + DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, + width, height, sna->kgem.mode)); + + tmp->op = PictOpSrc; + + tmp->dst.pixmap = dst; + tmp->dst.bo = dst_bo; + tmp->dst.width = dst->drawable.width; + tmp->dst.height = dst->drawable.height; + tmp->dst.format = PICT_x8r8g8b8; + + + tmp->src.repeat = RepeatNone; + tmp->src.filter = PictFilterNearest; + tmp->src.is_affine = true; + + tmp->src.bo = src_bo; + tmp->src.pict_format = PICT_x8r8g8b8; + tmp->src.card_format = gen5_get_card_format(tmp->src.pict_format); + tmp->src.width = src->drawable.width; + tmp->src.height = src->drawable.height; + + + tmp->is_affine = tmp->src.is_affine; + tmp->has_component_alpha = false; + tmp->need_magic_ca_pass = false; + + tmp->mask.is_affine = true; + tmp->mask.repeat = SAMPLER_EXTEND_NONE; + tmp->mask.filter = SAMPLER_FILTER_NEAREST; + tmp->mask.bo = mask_bo; + tmp->mask.pict_format = PIXMAN_a8; + tmp->mask.card_format = gen5_get_card_format(tmp->mask.pict_format); + tmp->mask.width = mask->drawable.width; + tmp->mask.height = mask->drawable.height; + + if( scale ) + { + tmp->src.scale[0] = 1.f/width; + tmp->src.scale[1] = 1.f/height; + } + else + { + tmp->src.scale[0] = 1.f/src->drawable.width; + tmp->src.scale[1] = 1.f/src->drawable.height; + } + + tmp->mask.scale[0] = 1.f/mask->drawable.width; + tmp->mask.scale[1] = 1.f/mask->drawable.height; + + + tmp->u.gen5.wm_kernel = + gen5_choose_composite_kernel(tmp->op, + tmp->mask.bo != NULL, + tmp->has_component_alpha, + tmp->is_affine); + tmp->u.gen5.ve_id = gen4_choose_composite_emitter(sna, tmp); + + tmp->blt = gen5_render_composite_blt; + tmp->done = gen5_render_composite_done; + + if (!kgem_check_bo(&sna->kgem, + tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) { + kgem_submit(&sna->kgem); + } + + gen5_bind_surfaces(sna, tmp); + gen5_align_vertex(sna, tmp); return true; + } diff --git a/drivers/video/Intel-2D/gen6_render.c b/drivers/video/Intel-2D/gen6_render.c index d42099ccf3..d57a757c85 100644 --- a/drivers/video/Intel-2D/gen6_render.c +++ b/drivers/video/Intel-2D/gen6_render.c @@ -1,3508 +1,3585 @@ -/* - * Copyright © 2006,2008,2011 Intel Corporation - * Copyright © 2007 Red Hat, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Wang Zhenyu - * Eric Anholt - * Carl Worth - * Keith Packard - * Chris Wilson - * - */ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "sna.h" -#include "sna_reg.h" -#include "sna_render.h" -#include "sna_render_inline.h" -//#include "sna_video.h" - -#include "brw/brw.h" -#include "gen6_render.h" -#include "gen4_source.h" -#include "gen4_vertex.h" - -#define NO_COMPOSITE 0 -#define NO_COMPOSITE_SPANS 0 -#define NO_COPY 0 -#define NO_COPY_BOXES 0 -#define NO_FILL 0 -#define NO_FILL_BOXES 0 -#define NO_FILL_ONE 0 -#define NO_FILL_CLEAR 0 - -#define NO_RING_SWITCH 1 -#define PREFER_RENDER 0 - -#define USE_8_PIXEL_DISPATCH 1 -#define USE_16_PIXEL_DISPATCH 1 -#define USE_32_PIXEL_DISPATCH 0 - -#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH -#error "Must select at least 8, 16 or 32 pixel dispatch" -#endif - -#define GEN6_MAX_SIZE 8192 - -struct gt_info { - int max_vs_threads; - int max_gs_threads; - int max_wm_threads; - struct { - int size; - int max_vs_entries; - int max_gs_entries; - } urb; -}; - -static const struct gt_info gt1_info = { - .max_vs_threads = 24, - .max_gs_threads = 21, - .max_wm_threads = 40, - .urb = { 32, 256, 256 }, -}; - -static const struct gt_info gt2_info = { - .max_vs_threads = 60, - .max_gs_threads = 60, - .max_wm_threads = 80, - .urb = { 64, 256, 256 }, -}; - -static const uint32_t ps_kernel_packed[][4] = { -#include "exa_wm_src_affine.g6b" -#include "exa_wm_src_sample_argb.g6b" -#include "exa_wm_yuv_rgb.g6b" -#include "exa_wm_write.g6b" -}; - -static const uint32_t ps_kernel_planar[][4] = { -#include "exa_wm_src_affine.g6b" -#include "exa_wm_src_sample_planar.g6b" -#include "exa_wm_yuv_rgb.g6b" -#include "exa_wm_write.g6b" -}; - -#define NOKERNEL(kernel_enum, func, ns) \ - [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, func, 0, ns} -#define KERNEL(kernel_enum, kernel, ns) \ - [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), ns} - -static const struct wm_kernel_info { - const char *name; - const void *data; - unsigned int size; - unsigned int num_surfaces; -} wm_kernels[] = { - NOKERNEL(NOMASK, brw_wm_kernel__affine, 2), - NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2), - - NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3), - NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3), - - NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3), - NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3), - - NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3), - NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3), - - NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2), - NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2), - - KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), - KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), -}; -#undef KERNEL - -static const struct blendinfo { - bool src_alpha; - uint32_t src_blend; - uint32_t dst_blend; -} gen6_blend_op[] = { - /* Clear */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO}, - /* Src */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO}, - /* Dst */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ONE}, - /* Over */ {1, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, - /* OverReverse */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ONE}, - /* In */ {0, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_ZERO}, - /* InReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_SRC_ALPHA}, - /* Out */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ZERO}, - /* OutReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, - /* Atop */ {1, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, - /* AtopReverse */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_SRC_ALPHA}, - /* Xor */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, - /* Add */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ONE}, -}; - -/** - * Highest-valued BLENDFACTOR used in gen6_blend_op. - * - * This leaves out GEN6_BLENDFACTOR_INV_DST_COLOR, - * GEN6_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, - * GEN6_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} - */ -#define GEN6_BLENDFACTOR_COUNT (GEN6_BLENDFACTOR_INV_DST_ALPHA + 1) - -#define GEN6_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen6_blend_state), 64) - -#define BLEND_OFFSET(s, d) \ - (((s) * GEN6_BLENDFACTOR_COUNT + (d)) * GEN6_BLEND_STATE_PADDED_SIZE) - -#define NO_BLEND BLEND_OFFSET(GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO) -#define CLEAR BLEND_OFFSET(GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO) - -#define SAMPLER_OFFSET(sf, se, mf, me) \ - (((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me) + 2) * 2 * sizeof(struct gen6_sampler_state)) - -#define VERTEX_2s2s 0 - -#define COPY_SAMPLER 0 -#define COPY_VERTEX VERTEX_2s2s -#define COPY_FLAGS(a) GEN6_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN6_WM_KERNEL_NOMASK, COPY_VERTEX) - -#define FILL_SAMPLER (2 * sizeof(struct gen6_sampler_state)) -#define FILL_VERTEX VERTEX_2s2s -#define FILL_FLAGS(op, format) GEN6_SET_FLAGS(FILL_SAMPLER, gen6_get_blend((op), false, (format)), GEN6_WM_KERNEL_NOMASK, FILL_VERTEX) -#define FILL_FLAGS_NOBLEND GEN6_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN6_WM_KERNEL_NOMASK, FILL_VERTEX) - -#define GEN6_SAMPLER(f) (((f) >> 16) & 0xfff0) -#define GEN6_BLEND(f) (((f) >> 0) & 0xfff0) -#define GEN6_KERNEL(f) (((f) >> 16) & 0xf) -#define GEN6_VERTEX(f) (((f) >> 0) & 0xf) -#define GEN6_SET_FLAGS(S, B, K, V) (((S) | (K)) << 16 | ((B) | (V))) - -#define OUT_BATCH(v) batch_emit(sna, v) -#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) -#define OUT_VERTEX_F(v) vertex_emit(sna, v) - -static inline bool too_large(int width, int height) -{ - return width > GEN6_MAX_SIZE || height > GEN6_MAX_SIZE; -} - -static uint32_t gen6_get_blend(int op, - bool has_component_alpha, - uint32_t dst_format) -{ - uint32_t src, dst; - - - src = GEN6_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend; - dst = GEN6_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend; - -// dst = GEN6_BLENDFACTOR_ZERO; //gen6_blend_op[op].dst_blend; - -#if 0 - /* If there's no dst alpha channel, adjust the blend op so that - * we'll treat it always as 1. - */ - if (PICT_FORMAT_A(dst_format) == 0) { - if (src == GEN6_BLENDFACTOR_DST_ALPHA) - src = GEN6_BLENDFACTOR_ONE; - else if (src == GEN6_BLENDFACTOR_INV_DST_ALPHA) - src = GEN6_BLENDFACTOR_ZERO; - } - - /* If the source alpha is being used, then we should only be in a - * case where the source blend factor is 0, and the source blend - * value is the mask channels multiplied by the source picture's alpha. - */ - if (has_component_alpha && gen6_blend_op[op].src_alpha) { - if (dst == GEN6_BLENDFACTOR_SRC_ALPHA) - dst = GEN6_BLENDFACTOR_SRC_COLOR; - else if (dst == GEN6_BLENDFACTOR_INV_SRC_ALPHA) - dst = GEN6_BLENDFACTOR_INV_SRC_COLOR; - } - - DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", - op, dst_format, PICT_FORMAT_A(dst_format), - src, dst, (int)BLEND_OFFSET(src, dst))); -#endif - - return BLEND_OFFSET(src, dst); -} - -static uint32_t gen6_get_card_format(PictFormat format) -{ - switch (format) { - default: - return -1; - case PICT_a8r8g8b8: - return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; - case PICT_x8r8g8b8: - return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM; - case PICT_a8: - return GEN6_SURFACEFORMAT_A8_UNORM; - }; - -/* - switch (format) { - default: - return -1; - case PICT_a8r8g8b8: - return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; - case PICT_x8r8g8b8: - return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM; - case PICT_a8b8g8r8: - return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM; - case PICT_x8b8g8r8: - return GEN6_SURFACEFORMAT_R8G8B8X8_UNORM; - case PICT_a2r10g10b10: - return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM; - case PICT_x2r10g10b10: - return GEN6_SURFACEFORMAT_B10G10R10X2_UNORM; - case PICT_r8g8b8: - return GEN6_SURFACEFORMAT_R8G8B8_UNORM; - case PICT_r5g6b5: - return GEN6_SURFACEFORMAT_B5G6R5_UNORM; - case PICT_a1r5g5b5: - return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM; - case PICT_a8: - return GEN6_SURFACEFORMAT_A8_UNORM; - case PICT_a4r4g4b4: - return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM; - } - */ -} - -static uint32_t gen6_get_dest_format(PictFormat format) -{ - return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; - -#if 0 - - switch (format) { - default: - return -1; - case PICT_a8r8g8b8: - case PICT_x8r8g8b8: - return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; - case PICT_a8b8g8r8: - case PICT_x8b8g8r8: - return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM; - case PICT_a2r10g10b10: - case PICT_x2r10g10b10: - return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM; - case PICT_r5g6b5: - return GEN6_SURFACEFORMAT_B5G6R5_UNORM; - case PICT_x1r5g5b5: - case PICT_a1r5g5b5: - return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM; - case PICT_a8: - return GEN6_SURFACEFORMAT_A8_UNORM; - case PICT_a4r4g4b4: - case PICT_x4r4g4b4: - return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM; - } -#endif - -} - -#if 0 - -static bool gen6_check_dst_format(PictFormat format) -{ - if (gen6_get_dest_format(format) != -1) - return true; - - DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); - return false; -} - -static bool gen6_check_format(uint32_t format) -{ - if (gen6_get_card_format(format) != -1) - return true; - - DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); - return false; -} - -static uint32_t gen6_filter(uint32_t filter) -{ - switch (filter) { - default: - assert(0); - case PictFilterNearest: - return SAMPLER_FILTER_NEAREST; - case PictFilterBilinear: - return SAMPLER_FILTER_BILINEAR; - } -} - -static uint32_t gen6_check_filter(PicturePtr picture) -{ - switch (picture->filter) { - case PictFilterNearest: - case PictFilterBilinear: - return true; - default: - return false; - } -} - -static uint32_t gen6_repeat(uint32_t repeat) -{ - switch (repeat) { - default: - assert(0); - case RepeatNone: - return SAMPLER_EXTEND_NONE; - case RepeatNormal: - return SAMPLER_EXTEND_REPEAT; - case RepeatPad: - return SAMPLER_EXTEND_PAD; - case RepeatReflect: - return SAMPLER_EXTEND_REFLECT; - } -} - -static bool gen6_check_repeat(PicturePtr picture) -{ - if (!picture->repeat) - return true; - - switch (picture->repeatType) { - case RepeatNone: - case RepeatNormal: - case RepeatPad: - case RepeatReflect: - return true; - default: - return false; - } -} -#endif - -static int -gen6_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) -{ - int base; - - if (has_mask) { - - if (is_ca) { - if (gen6_blend_op[op].src_alpha) - base = GEN6_WM_KERNEL_MASKSA; - else - base = GEN6_WM_KERNEL_MASKCA; - } else - base = GEN6_WM_KERNEL_MASK; - - } else - base = GEN6_WM_KERNEL_NOMASK; - - return base + !is_affine; -} - -static void -gen6_emit_urb(struct sna *sna) -{ - OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2)); - OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) | - (sna->render_state.gen6.info->urb.max_vs_entries << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */ - OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) | - (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */ -} - -static void -gen6_emit_state_base_address(struct sna *sna) -{ - OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (10 - 2)); - OUT_BATCH(0); /* general */ - OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */ - sna->kgem.nbatch, - NULL, - I915_GEM_DOMAIN_INSTRUCTION << 16, - BASE_ADDRESS_MODIFY)); - OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */ - sna->kgem.nbatch, - sna->render_state.gen6.general_bo, - I915_GEM_DOMAIN_INSTRUCTION << 16, - BASE_ADDRESS_MODIFY)); - OUT_BATCH(0); /* indirect */ - OUT_BATCH(kgem_add_reloc(&sna->kgem, - sna->kgem.nbatch, - sna->render_state.gen6.general_bo, - I915_GEM_DOMAIN_INSTRUCTION << 16, - BASE_ADDRESS_MODIFY)); - - /* upper bounds, disable */ - OUT_BATCH(0); - OUT_BATCH(BASE_ADDRESS_MODIFY); - OUT_BATCH(0); - OUT_BATCH(BASE_ADDRESS_MODIFY); -} - -static void -gen6_emit_viewports(struct sna *sna) -{ - OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS | - GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC | - (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); -} - -static void -gen6_emit_vs(struct sna *sna) -{ - /* disable VS constant buffer */ - OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - - OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2)); - OUT_BATCH(0); /* no VS kernel */ - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); /* pass-through */ -} - -static void -gen6_emit_gs(struct sna *sna) -{ - /* disable GS constant buffer */ - OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - - OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2)); - OUT_BATCH(0); /* no GS kernel */ - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); /* pass-through */ -} - -static void -gen6_emit_clip(struct sna *sna) -{ - OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); /* pass-through */ - OUT_BATCH(0); -} - -static void -gen6_emit_wm_constants(struct sna *sna) -{ - /* disable WM constant buffer */ - OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); -} - -static void -gen6_emit_null_depth_buffer(struct sna *sna) -{ - OUT_BATCH(GEN6_3DSTATE_DEPTH_BUFFER | (7 - 2)); - OUT_BATCH(GEN6_SURFACE_NULL << GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT | - GEN6_DEPTHFORMAT_D32_FLOAT << GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - - OUT_BATCH(GEN6_3DSTATE_CLEAR_PARAMS | (2 - 2)); - OUT_BATCH(0); -} - -static void -gen6_emit_invariant(struct sna *sna) -{ - OUT_BATCH(GEN6_PIPELINE_SELECT | PIPELINE_SELECT_3D); - - OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2)); - OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | - GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ - OUT_BATCH(0); - - OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2)); - OUT_BATCH(1); - - gen6_emit_urb(sna); - - gen6_emit_state_base_address(sna); - - gen6_emit_viewports(sna); - gen6_emit_vs(sna); - gen6_emit_gs(sna); - gen6_emit_clip(sna); - gen6_emit_wm_constants(sna); - gen6_emit_null_depth_buffer(sna); - - sna->render_state.gen6.needs_invariant = false; -} - -static bool -gen6_emit_cc(struct sna *sna, int blend) -{ - struct gen6_render_state *render = &sna->render_state.gen6; - - if (render->blend == blend) - return blend != NO_BLEND; - - DBG(("%s: blend = %x\n", __FUNCTION__, blend)); - - OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2)); - OUT_BATCH((render->cc_blend + blend) | 1); - if (render->blend == (unsigned)-1) { - OUT_BATCH(1); - OUT_BATCH(1); - } else { - OUT_BATCH(0); - OUT_BATCH(0); - } - - render->blend = blend; - return blend != NO_BLEND; -} - -static void -gen6_emit_sampler(struct sna *sna, uint32_t state) -{ - if (sna->render_state.gen6.samplers == state) - return; - - sna->render_state.gen6.samplers = state; - - DBG(("%s: sampler = %x\n", __FUNCTION__, state)); - - OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS | - GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS | - (4 - 2)); - OUT_BATCH(0); /* VS */ - OUT_BATCH(0); /* GS */ - OUT_BATCH(sna->render_state.gen6.wm_state + state); -} - -static void -gen6_emit_sf(struct sna *sna, bool has_mask) -{ - int num_sf_outputs = has_mask ? 2 : 1; - - if (sna->render_state.gen6.num_sf_outputs == num_sf_outputs) - return; - - DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n", - __FUNCTION__, num_sf_outputs, 1, 0)); - - sna->render_state.gen6.num_sf_outputs = num_sf_outputs; - - OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2)); - OUT_BATCH(num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT | - 1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT | - 1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT); - OUT_BATCH(0); - OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE); - OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */ - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); /* DW9 */ - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); /* DW14 */ - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); /* DW19 */ -} - -static void -gen6_emit_wm(struct sna *sna, unsigned int kernel, bool has_mask) -{ - const uint32_t *kernels; - - if (sna->render_state.gen6.kernel == kernel) - return; - - sna->render_state.gen6.kernel = kernel; - kernels = sna->render_state.gen6.wm_kernel[kernel]; - - DBG(("%s: switching to %s, num_surfaces=%d (8-pixel? %d, 16-pixel? %d,32-pixel? %d)\n", - __FUNCTION__, - wm_kernels[kernel].name, wm_kernels[kernel].num_surfaces, - kernels[0], kernels[1], kernels[2])); - - OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2)); - OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]); - OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT | - wm_kernels[kernel].num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT); - OUT_BATCH(0); /* scratch space */ - OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT | - 8 << GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT | - 6 << GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT); - OUT_BATCH((sna->render_state.gen6.info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT | - (kernels[0] ? GEN6_3DSTATE_WM_8_DISPATCH_ENABLE : 0) | - (kernels[1] ? GEN6_3DSTATE_WM_16_DISPATCH_ENABLE : 0) | - (kernels[2] ? GEN6_3DSTATE_WM_32_DISPATCH_ENABLE : 0) | - GEN6_3DSTATE_WM_DISPATCH_ENABLE); - OUT_BATCH((1 + has_mask) << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT | - GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); - OUT_BATCH(kernels[2]); - OUT_BATCH(kernels[1]); -} - -static bool -gen6_emit_binding_table(struct sna *sna, uint16_t offset) -{ - if (sna->render_state.gen6.surface_table == offset) - return false; - - /* Binding table pointers */ - OUT_BATCH(GEN6_3DSTATE_BINDING_TABLE_POINTERS | - GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS | - (4 - 2)); - OUT_BATCH(0); /* vs */ - OUT_BATCH(0); /* gs */ - /* Only the PS uses the binding table */ - OUT_BATCH(offset*4); - - sna->render_state.gen6.surface_table = offset; - return true; -} - -static bool -gen6_emit_drawing_rectangle(struct sna *sna, - const struct sna_composite_op *op) -{ - uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); - uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; - - assert(!too_large(op->dst.x, op->dst.y)); - assert(!too_large(op->dst.width, op->dst.height)); - - if (sna->render_state.gen6.drawrect_limit == limit && - sna->render_state.gen6.drawrect_offset == offset) - return false; - - /* [DevSNB-C+{W/A}] Before any depth stall flush (including those - * produced by non-pipelined state commands), software needs to first - * send a PIPE_CONTROL with no bits set except Post-Sync Operation != - * 0. - * - * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent - * BEFORE the pipe-control with a post-sync op and no write-cache - * flushes. - */ - if (!sna->render_state.gen6.first_state_packet) { - OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); - OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL | - GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD); - OUT_BATCH(0); - OUT_BATCH(0); - } - - OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); - OUT_BATCH(GEN6_PIPE_CONTROL_WRITE_TIME); - OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, - sna->render_state.gen6.general_bo, - I915_GEM_DOMAIN_INSTRUCTION << 16 | - I915_GEM_DOMAIN_INSTRUCTION, - 64)); - OUT_BATCH(0); - - OUT_BATCH(GEN6_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(limit); - OUT_BATCH(offset); - - sna->render_state.gen6.drawrect_offset = offset; - sna->render_state.gen6.drawrect_limit = limit; - return true; -} - -static void -gen6_emit_vertex_elements(struct sna *sna, - const struct sna_composite_op *op) -{ - /* - * vertex data in vertex buffer - * position: (x, y) - * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) - * texture coordinate 1 if (has_mask is true): same as above - */ - struct gen6_render_state *render = &sna->render_state.gen6; - uint32_t src_format, dw; - int id = GEN6_VERTEX(op->u.gen6.flags); - bool has_mask; - - DBG(("%s: setup id=%d\n", __FUNCTION__, id)); - - if (render->ve_id == id) - return; - render->ve_id = id; - - /* The VUE layout - * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) - * dword 4-7: position (x, y, 1.0, 1.0), - * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) - * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) - * - * dword 4-15 are fetched from vertex buffer - */ - has_mask = (id >> 2) != 0; - OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS | - ((2 * (3 + has_mask)) + 1 - 2)); - - OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | - GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT | - 0 << VE0_OFFSET_SHIFT); - OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT | - GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT | - GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | - GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT); - - /* x,y */ - OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | - GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | - 0 << VE0_OFFSET_SHIFT); - OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | - GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | - GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | - GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT); - - /* u0, v0, w0 */ - DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3)); - dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; - switch (id & 3) { - default: - assert(0); - case 0: - src_format = GEN6_SURFACEFORMAT_R16G16_SSCALED; - dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; - dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; - dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; - break; - case 1: - src_format = GEN6_SURFACEFORMAT_R32_FLOAT; - dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; - dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; - dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; - break; - case 2: - src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT; - dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; - dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; - dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; - break; - case 3: - src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT; - dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; - dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; - dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; - break; - } - OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | - src_format << VE0_FORMAT_SHIFT | - 4 << VE0_OFFSET_SHIFT); - OUT_BATCH(dw); - - /* u1, v1, w1 */ - if (has_mask) { - unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); - DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset)); - dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; - switch (id >> 2) { - case 1: - src_format = GEN6_SURFACEFORMAT_R32_FLOAT; - dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; - dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; - dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; - break; - default: - assert(0); - case 2: - src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT; - dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; - dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; - dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; - break; - case 3: - src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT; - dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; - dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; - dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; - break; - } - OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | - src_format << VE0_FORMAT_SHIFT | - offset << VE0_OFFSET_SHIFT); - OUT_BATCH(dw); - } -} - -static void -gen6_emit_flush(struct sna *sna) -{ - OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); - OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH | - GEN6_PIPE_CONTROL_TC_FLUSH | - GEN6_PIPE_CONTROL_CS_STALL); - OUT_BATCH(0); - OUT_BATCH(0); -} - -static void -gen6_emit_state(struct sna *sna, - const struct sna_composite_op *op, - uint16_t wm_binding_table) -{ - bool need_stall = wm_binding_table & 1; - - if (gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags))) - need_stall = false; - gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags)); - gen6_emit_sf(sna, GEN6_VERTEX(op->u.gen6.flags) >> 2); - gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags), GEN6_VERTEX(op->u.gen6.flags) >> 2); - gen6_emit_vertex_elements(sna, op); - - need_stall |= gen6_emit_binding_table(sna, wm_binding_table & ~1); - if (gen6_emit_drawing_rectangle(sna, op)) - need_stall = false; - if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { - gen6_emit_flush(sna); - kgem_clear_dirty(&sna->kgem); - if (op->dst.bo->exec) - kgem_bo_mark_dirty(op->dst.bo); - need_stall = false; - } - if (need_stall) { - OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); - OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL | - GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD); - OUT_BATCH(0); - OUT_BATCH(0); - } - sna->render_state.gen6.first_state_packet = false; -} - -static bool gen6_magic_ca_pass(struct sna *sna, - const struct sna_composite_op *op) -{ - struct gen6_render_state *state = &sna->render_state.gen6; - - if (!op->need_magic_ca_pass) - return false; - - DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, - sna->render.vertex_start, sna->render.vertex_index)); - - gen6_emit_flush(sna); - - gen6_emit_cc(sna, gen6_get_blend(PictOpAdd, true, op->dst.format)); - gen6_emit_wm(sna, - gen6_choose_composite_kernel(PictOpAdd, - true, true, - op->is_affine), - true); - - OUT_BATCH(GEN6_3DPRIMITIVE | - GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL | - _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT | - 0 << 9 | - 4); - OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); - OUT_BATCH(sna->render.vertex_start); - OUT_BATCH(1); /* single instance */ - OUT_BATCH(0); /* start instance location */ - OUT_BATCH(0); /* index buffer offset, ignored */ - - state->last_primitive = sna->kgem.nbatch; - return true; -} - -typedef struct gen6_surface_state_padded { - struct gen6_surface_state state; - char pad[32 - sizeof(struct gen6_surface_state)]; -} gen6_surface_state_padded; - -static void null_create(struct sna_static_stream *stream) -{ - /* A bunch of zeros useful for legacy border color and depth-stencil */ - sna_static_stream_map(stream, 64, 64); -} - -static void scratch_create(struct sna_static_stream *stream) -{ - /* 64 bytes of scratch space for random writes, such as - * the pipe-control w/a. - */ - sna_static_stream_map(stream, 64, 64); -} - -static void -sampler_state_init(struct gen6_sampler_state *sampler_state, - sampler_filter_t filter, - sampler_extend_t extend) -{ - sampler_state->ss0.lod_preclamp = 1; /* GL mode */ - - /* We use the legacy mode to get the semantics specified by - * the Render extension. */ - sampler_state->ss0.border_color_mode = GEN6_BORDER_COLOR_MODE_LEGACY; - - switch (filter) { - default: - case SAMPLER_FILTER_NEAREST: - sampler_state->ss0.min_filter = GEN6_MAPFILTER_NEAREST; - sampler_state->ss0.mag_filter = GEN6_MAPFILTER_NEAREST; - break; - case SAMPLER_FILTER_BILINEAR: - sampler_state->ss0.min_filter = GEN6_MAPFILTER_LINEAR; - sampler_state->ss0.mag_filter = GEN6_MAPFILTER_LINEAR; - break; - } - - switch (extend) { - default: - case SAMPLER_EXTEND_NONE: - sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; - sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; - sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; - break; - case SAMPLER_EXTEND_REPEAT: - sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_WRAP; - sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_WRAP; - sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_WRAP; - break; - case SAMPLER_EXTEND_PAD: - sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; - sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; - sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; - break; - case SAMPLER_EXTEND_REFLECT: - sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; - sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; - sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; - break; - } -} - -static void -sampler_copy_init(struct gen6_sampler_state *ss) -{ - sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); - ss->ss3.non_normalized_coord = 1; - - sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); -} - -static void -sampler_fill_init(struct gen6_sampler_state *ss) -{ - sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT); - ss->ss3.non_normalized_coord = 1; - - sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); -} - -static uint32_t -gen6_tiling_bits(uint32_t tiling) -{ - return 0; -/* - switch (tiling) { - default: assert(0); - case I915_TILING_NONE: return 0; - case I915_TILING_X: return GEN6_SURFACE_TILED; - case I915_TILING_Y: return GEN6_SURFACE_TILED | GEN6_SURFACE_TILED_Y; - } -*/ -} - -/** - * Sets up the common fields for a surface state buffer for the given - * picture in the given surface state buffer. - */ -static int -gen6_bind_bo(struct sna *sna, - struct kgem_bo *bo, - uint32_t width, - uint32_t height, - uint32_t format, - bool is_dst) -{ - uint32_t *ss; - uint32_t domains; - uint16_t offset; - uint32_t is_scanout = is_dst && bo->scanout; - - /* After the first bind, we manage the cache domains within the batch */ - offset = kgem_bo_get_binding(bo, format | is_scanout << 31); - if (offset) { - DBG(("[%x] bo(handle=%d), format=%d, reuse %s binding\n", - offset, bo->handle, format, - is_dst ? "render" : "sampler")); - if (is_dst) - kgem_bo_mark_dirty(bo); - return offset * sizeof(uint32_t); - } - - offset = sna->kgem.surface -= - sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); - ss = sna->kgem.batch + offset; - ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT | - GEN6_SURFACE_BLEND_ENABLED | - format << GEN6_SURFACE_FORMAT_SHIFT); - if (is_dst) - domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; - else - domains = I915_GEM_DOMAIN_SAMPLER << 16; - ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0); - ss[2] = ((width - 1) << GEN6_SURFACE_WIDTH_SHIFT | - (height - 1) << GEN6_SURFACE_HEIGHT_SHIFT); - assert(bo->pitch <= (1 << 18)); - ss[3] = (gen6_tiling_bits(bo->tiling) | - (bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT); - ss[4] = 0; - ss[5] = is_scanout ? 0 : 3 << 16; - - kgem_bo_set_binding(bo, format | is_scanout << 31, offset); - - DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", - offset, bo->handle, ss[1], - format, width, height, bo->pitch, bo->tiling, - domains & 0xffff ? "render" : "sampler")); - - return offset * sizeof(uint32_t); -} - -static void gen6_emit_vertex_buffer(struct sna *sna, - const struct sna_composite_op *op) -{ - int id = GEN6_VERTEX(op->u.gen6.flags); - - OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | 3); - OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA | - 4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT); - sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; - OUT_BATCH(0); - OUT_BATCH(~0); /* max address: disabled */ - OUT_BATCH(0); - - sna->render.vb_id |= 1 << id; -} - -static void gen6_emit_primitive(struct sna *sna) -{ - if (sna->kgem.nbatch == sna->render_state.gen6.last_primitive) { - DBG(("%s: continuing previous primitive, start=%d, index=%d\n", - __FUNCTION__, - sna->render.vertex_start, - sna->render.vertex_index)); - sna->render.vertex_offset = sna->kgem.nbatch - 5; - return; - } - - OUT_BATCH(GEN6_3DPRIMITIVE | - GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL | - _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT | - 0 << 9 | - 4); - sna->render.vertex_offset = sna->kgem.nbatch; - OUT_BATCH(0); /* vertex count, to be filled in later */ - OUT_BATCH(sna->render.vertex_index); - OUT_BATCH(1); /* single instance */ - OUT_BATCH(0); /* start instance location */ - OUT_BATCH(0); /* index buffer offset, ignored */ - sna->render.vertex_start = sna->render.vertex_index; - DBG(("%s: started new primitive: index=%d\n", - __FUNCTION__, sna->render.vertex_start)); - - sna->render_state.gen6.last_primitive = sna->kgem.nbatch; -} - -static bool gen6_rectangle_begin(struct sna *sna, - const struct sna_composite_op *op) -{ - int id = 1 << GEN6_VERTEX(op->u.gen6.flags); - int ndwords; - - if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) - return true; - - ndwords = op->need_magic_ca_pass ? 60 : 6; - if ((sna->render.vb_id & id) == 0) - ndwords += 5; - if (!kgem_check_batch(&sna->kgem, ndwords)) - return false; - - if ((sna->render.vb_id & id) == 0) - gen6_emit_vertex_buffer(sna, op); - - gen6_emit_primitive(sna); - return true; -} - -static int gen6_get_rectangles__flush(struct sna *sna, - const struct sna_composite_op *op) -{ - /* Preventing discarding new vbo after lock contention */ - if (sna_vertex_wait__locked(&sna->render)) { - int rem = vertex_space(sna); - if (rem > op->floats_per_rect) - return rem; - } - - if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 5)) - return 0; - if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) - return 0; - - if (sna->render.vertex_offset) { - gen4_vertex_flush(sna); - if (gen6_magic_ca_pass(sna, op)) { - gen6_emit_flush(sna); - gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)); - gen6_emit_wm(sna, - GEN6_KERNEL(op->u.gen6.flags), - GEN6_VERTEX(op->u.gen6.flags) >> 2); - } - } - - return gen4_vertex_finish(sna); -} - -inline static int gen6_get_rectangles(struct sna *sna, - const struct sna_composite_op *op, - int want, - void (*emit_state)(struct sna *, const struct sna_composite_op *op)) -{ - int rem; - -start: - rem = vertex_space(sna); - if (unlikely(rem < op->floats_per_rect)) { - DBG(("flushing vbo for %s: %d < %d\n", - __FUNCTION__, rem, op->floats_per_rect)); - rem = gen6_get_rectangles__flush(sna, op); - if (unlikely(rem == 0)) - goto flush; - } - - if (unlikely(sna->render.vertex_offset == 0 && - !gen6_rectangle_begin(sna, op))) - goto flush; - - if (want > 1 && want * op->floats_per_rect > rem) - want = rem / op->floats_per_rect; - - assert(want > 0); - sna->render.vertex_index += 3*want; - return want; - -flush: - if (sna->render.vertex_offset) { - gen4_vertex_flush(sna); - gen6_magic_ca_pass(sna, op); - } - sna_vertex_wait__locked(&sna->render); - _kgem_submit(&sna->kgem); - emit_state(sna, op); - goto start; -} - -inline static uint32_t *gen6_composite_get_binding_table(struct sna *sna, - uint16_t *offset) -{ - uint32_t *table; - - sna->kgem.surface -= - sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); - /* Clear all surplus entries to zero in case of prefetch */ - table = memset(sna->kgem.batch + sna->kgem.surface, - 0, sizeof(struct gen6_surface_state_padded)); - - DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); - - *offset = sna->kgem.surface; - return table; -} - -static bool -gen6_get_batch(struct sna *sna, const struct sna_composite_op *op) -{ - kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); - - if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) { - DBG(("%s: flushing batch: %d < %d+%d\n", - __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, - 150, 4*8)); - kgem_submit(&sna->kgem); - _kgem_set_mode(&sna->kgem, KGEM_RENDER); - } - - if (sna->render_state.gen6.needs_invariant) - gen6_emit_invariant(sna); - - return kgem_bo_is_dirty(op->dst.bo); -} - -static void gen6_emit_composite_state(struct sna *sna, - const struct sna_composite_op *op) -{ - uint32_t *binding_table; - uint16_t offset; - bool dirty; - - dirty = gen6_get_batch(sna, op); - - binding_table = gen6_composite_get_binding_table(sna, &offset); - - binding_table[0] = - gen6_bind_bo(sna, - op->dst.bo, op->dst.width, op->dst.height, - gen6_get_dest_format(op->dst.format), - true); - binding_table[1] = - gen6_bind_bo(sna, - op->src.bo, op->src.width, op->src.height, - op->src.card_format, - false); - if (op->mask.bo) { - binding_table[2] = - gen6_bind_bo(sna, - op->mask.bo, - op->mask.width, - op->mask.height, - op->mask.card_format, - false); - } - - if (sna->kgem.surface == offset && - *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table && - (op->mask.bo == NULL || - sna->kgem.batch[sna->render_state.gen6.surface_table+2] == binding_table[2])) { - sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); - offset = sna->render_state.gen6.surface_table; - } - - gen6_emit_state(sna, op, offset | dirty); -} - -static void -gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op) -{ - assert (sna->render.vertex_offset == 0); - if (op->floats_per_vertex != sna->render_state.gen6.floats_per_vertex) { - if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect) - gen4_vertex_finish(sna); - - DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n", - sna->render_state.gen6.floats_per_vertex, - op->floats_per_vertex, - sna->render.vertex_index, - (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex)); - sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; - sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex; - sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex; - } - assert((sna->render.vertex_used % op->floats_per_vertex) == 0); -} - - -fastcall static void -gen6_render_composite_blt(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state); - op->prim_emit(sna, op, r); -} - -#if 0 - -fastcall static void -gen6_render_composite_box(struct sna *sna, - const struct sna_composite_op *op, - const BoxRec *box) -{ - struct sna_composite_rectangles r; - - gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state); - - DBG((" %s: (%d, %d), (%d, %d)\n", - __FUNCTION__, - box->x1, box->y1, box->x2, box->y2)); - - r.dst.x = box->x1; - r.dst.y = box->y1; - r.width = box->x2 - box->x1; - r.height = box->y2 - box->y1; - r.src = r.mask = r.dst; - - op->prim_emit(sna, op, &r); -} - -static void -gen6_render_composite_boxes__blt(struct sna *sna, - const struct sna_composite_op *op, - const BoxRec *box, int nbox) -{ - DBG(("composite_boxes(%d)\n", nbox)); - - do { - int nbox_this_time; - - nbox_this_time = gen6_get_rectangles(sna, op, nbox, - gen6_emit_composite_state); - nbox -= nbox_this_time; - - do { - struct sna_composite_rectangles r; - - DBG((" %s: (%d, %d), (%d, %d)\n", - __FUNCTION__, - box->x1, box->y1, box->x2, box->y2)); - - r.dst.x = box->x1; - r.dst.y = box->y1; - r.width = box->x2 - box->x1; - r.height = box->y2 - box->y1; - r.src = r.mask = r.dst; - - op->prim_emit(sna, op, &r); - box++; - } while (--nbox_this_time); - } while (nbox); -} - -static void -gen6_render_composite_boxes(struct sna *sna, - const struct sna_composite_op *op, - const BoxRec *box, int nbox) -{ - DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); - - do { - int nbox_this_time; - float *v; - - nbox_this_time = gen6_get_rectangles(sna, op, nbox, - gen6_emit_composite_state); - assert(nbox_this_time); - nbox -= nbox_this_time; - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += nbox_this_time * op->floats_per_rect; - - op->emit_boxes(op, box, nbox_this_time, v); - box += nbox_this_time; - } while (nbox); -} - -static void -gen6_render_composite_boxes__thread(struct sna *sna, - const struct sna_composite_op *op, - const BoxRec *box, int nbox) -{ - DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); - - sna_vertex_lock(&sna->render); - do { - int nbox_this_time; - float *v; - - nbox_this_time = gen6_get_rectangles(sna, op, nbox, - gen6_emit_composite_state); - assert(nbox_this_time); - nbox -= nbox_this_time; - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += nbox_this_time * op->floats_per_rect; - - sna_vertex_acquire__locked(&sna->render); - sna_vertex_unlock(&sna->render); - - op->emit_boxes(op, box, nbox_this_time, v); - box += nbox_this_time; - - sna_vertex_lock(&sna->render); - sna_vertex_release__locked(&sna->render); - } while (nbox); - sna_vertex_unlock(&sna->render); -} - -#endif - -#ifndef MAX -#define MAX(a,b) ((a) > (b) ? (a) : (b)) -#endif - -static uint32_t -gen6_composite_create_blend_state(struct sna_static_stream *stream) -{ - char *base, *ptr; - int src, dst; - - base = sna_static_stream_map(stream, - GEN6_BLENDFACTOR_COUNT * GEN6_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE, - 64); - - ptr = base; - for (src = 0; src < GEN6_BLENDFACTOR_COUNT; src++) { - for (dst= 0; dst < GEN6_BLENDFACTOR_COUNT; dst++) { - struct gen6_blend_state *blend = - (struct gen6_blend_state *)ptr; - - blend->blend0.dest_blend_factor = dst; - blend->blend0.source_blend_factor = src; - blend->blend0.blend_func = GEN6_BLENDFUNCTION_ADD; - blend->blend0.blend_enable = - !(dst == GEN6_BLENDFACTOR_ZERO && src == GEN6_BLENDFACTOR_ONE); - - blend->blend1.post_blend_clamp_enable = 1; - blend->blend1.pre_blend_clamp_enable = 1; - - ptr += GEN6_BLEND_STATE_PADDED_SIZE; - } - } - - return sna_static_stream_offsetof(stream, base); -} - -#if 0 - -static uint32_t gen6_bind_video_source(struct sna *sna, - struct kgem_bo *src_bo, - uint32_t src_offset, - int src_width, - int src_height, - int src_pitch, - uint32_t src_surf_format) -{ - struct gen6_surface_state *ss; - - sna->kgem.surface -= sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); - - ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss)); - ss->ss0.surface_type = GEN6_SURFACE_2D; - ss->ss0.surface_format = src_surf_format; - - ss->ss1.base_addr = - kgem_add_reloc(&sna->kgem, - sna->kgem.surface + 1, - src_bo, - I915_GEM_DOMAIN_SAMPLER << 16, - src_offset); - - ss->ss2.width = src_width - 1; - ss->ss2.height = src_height - 1; - ss->ss3.pitch = src_pitch - 1; - - return sna->kgem.surface * sizeof(uint32_t); -} - -static void gen6_emit_video_state(struct sna *sna, - const struct sna_composite_op *op) -{ - struct sna_video_frame *frame = op->priv; - uint32_t src_surf_format; - uint32_t src_surf_base[6]; - int src_width[6]; - int src_height[6]; - int src_pitch[6]; - uint32_t *binding_table; - uint16_t offset; - bool dirty; - int n_src, n; - - dirty = gen6_get_batch(sna, op); - - src_surf_base[0] = 0; - src_surf_base[1] = 0; - src_surf_base[2] = frame->VBufOffset; - src_surf_base[3] = frame->VBufOffset; - src_surf_base[4] = frame->UBufOffset; - src_surf_base[5] = frame->UBufOffset; - - if (is_planar_fourcc(frame->id)) { - src_surf_format = GEN6_SURFACEFORMAT_R8_UNORM; - src_width[1] = src_width[0] = frame->width; - src_height[1] = src_height[0] = frame->height; - src_pitch[1] = src_pitch[0] = frame->pitch[1]; - src_width[4] = src_width[5] = src_width[2] = src_width[3] = - frame->width / 2; - src_height[4] = src_height[5] = src_height[2] = src_height[3] = - frame->height / 2; - src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = - frame->pitch[0]; - n_src = 6; - } else { - if (frame->id == FOURCC_UYVY) - src_surf_format = GEN6_SURFACEFORMAT_YCRCB_SWAPY; - else - src_surf_format = GEN6_SURFACEFORMAT_YCRCB_NORMAL; - - src_width[0] = frame->width; - src_height[0] = frame->height; - src_pitch[0] = frame->pitch[0]; - n_src = 1; - } - - binding_table = gen6_composite_get_binding_table(sna, &offset); - - binding_table[0] = - gen6_bind_bo(sna, - op->dst.bo, op->dst.width, op->dst.height, - gen6_get_dest_format(op->dst.format), - true); - for (n = 0; n < n_src; n++) { - binding_table[1+n] = - gen6_bind_video_source(sna, - frame->bo, - src_surf_base[n], - src_width[n], - src_height[n], - src_pitch[n], - src_surf_format); - } - - gen6_emit_state(sna, op, offset | dirty); -} - -static bool -gen6_render_video(struct sna *sna, - struct sna_video *video, - struct sna_video_frame *frame, - RegionPtr dstRegion, - short src_w, short src_h, - short drw_w, short drw_h, - short dx, short dy, - PixmapPtr pixmap) -{ - struct sna_composite_op tmp; - int nbox, pix_xoff, pix_yoff; - float src_scale_x, src_scale_y; - struct sna_pixmap *priv; - unsigned filter; - BoxPtr box; - - DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", - __FUNCTION__, src_w, src_h, drw_w, drw_h, - REGION_NUM_RECTS(dstRegion), - REGION_EXTENTS(NULL, dstRegion)->x1, - REGION_EXTENTS(NULL, dstRegion)->y1, - REGION_EXTENTS(NULL, dstRegion)->x2, - REGION_EXTENTS(NULL, dstRegion)->y2)); - - priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE); - if (priv == NULL) - return false; - - memset(&tmp, 0, sizeof(tmp)); - - tmp.dst.pixmap = pixmap; - tmp.dst.width = pixmap->drawable.width; - tmp.dst.height = pixmap->drawable.height; - tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); - tmp.dst.bo = priv->gpu_bo; - - tmp.src.bo = frame->bo; - tmp.mask.bo = NULL; - - tmp.floats_per_vertex = 3; - tmp.floats_per_rect = 9; - - if (src_w == drw_w && src_h == drw_h) - filter = SAMPLER_FILTER_NEAREST; - else - filter = SAMPLER_FILTER_BILINEAR; - - tmp.u.gen6.flags = - GEN6_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, - SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), - NO_BLEND, - is_planar_fourcc(frame->id) ? - GEN6_WM_KERNEL_VIDEO_PLANAR : - GEN6_WM_KERNEL_VIDEO_PACKED, - 2); - tmp.priv = frame; - - kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); - if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { - kgem_submit(&sna->kgem); - assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)); - _kgem_set_mode(&sna->kgem, KGEM_RENDER); - } - - gen6_emit_video_state(sna, &tmp); - gen6_align_vertex(sna, &tmp); - - /* Set up the offset for translating from the given region (in screen - * coordinates) to the backing pixmap. - */ -#ifdef COMPOSITE - pix_xoff = -pixmap->screen_x + pixmap->drawable.x; - pix_yoff = -pixmap->screen_y + pixmap->drawable.y; -#else - pix_xoff = 0; - pix_yoff = 0; -#endif - - /* Use normalized texture coordinates */ - src_scale_x = ((float)src_w / frame->width) / (float)drw_w; - src_scale_y = ((float)src_h / frame->height) / (float)drw_h; - - box = REGION_RECTS(dstRegion); - nbox = REGION_NUM_RECTS(dstRegion); - while (nbox--) { - BoxRec r; - - r.x1 = box->x1 + pix_xoff; - r.x2 = box->x2 + pix_xoff; - r.y1 = box->y1 + pix_yoff; - r.y2 = box->y2 + pix_yoff; - - gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state); - - OUT_VERTEX(r.x2, r.y2); - OUT_VERTEX_F((box->x2 - dx) * src_scale_x); - OUT_VERTEX_F((box->y2 - dy) * src_scale_y); - - OUT_VERTEX(r.x1, r.y2); - OUT_VERTEX_F((box->x1 - dx) * src_scale_x); - OUT_VERTEX_F((box->y2 - dy) * src_scale_y); - - OUT_VERTEX(r.x1, r.y1); - OUT_VERTEX_F((box->x1 - dx) * src_scale_x); - OUT_VERTEX_F((box->y1 - dy) * src_scale_y); - - if (!DAMAGE_IS_ALL(priv->gpu_damage)) { - sna_damage_add_box(&priv->gpu_damage, &r); - sna_damage_subtract_box(&priv->cpu_damage, &r); - } - box++; - } - priv->clear = false; - - gen4_vertex_flush(sna); - return true; -} - -static int -gen6_composite_picture(struct sna *sna, - PicturePtr picture, - struct sna_composite_channel *channel, - int x, int y, - int w, int h, - int dst_x, int dst_y, - bool precise) -{ - PixmapPtr pixmap; - uint32_t color; - int16_t dx, dy; - - DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", - __FUNCTION__, x, y, w, h, dst_x, dst_y)); - - channel->is_solid = false; - channel->card_format = -1; - - if (sna_picture_is_solid(picture, &color)) - return gen4_channel_init_solid(sna, channel, color); - - if (picture->pDrawable == NULL) { - int ret; - - if (picture->pSourcePict->type == SourcePictTypeLinear) - return gen4_channel_init_linear(sna, picture, channel, - x, y, - w, h, - dst_x, dst_y); - - DBG(("%s -- fixup, gradient\n", __FUNCTION__)); - ret = -1; - if (!precise) - ret = sna_render_picture_approximate_gradient(sna, picture, channel, - x, y, w, h, dst_x, dst_y); - if (ret == -1) - ret = sna_render_picture_fixup(sna, picture, channel, - x, y, w, h, dst_x, dst_y); - return ret; - } - - if (picture->alphaMap) { - DBG(("%s -- fixup, alphamap\n", __FUNCTION__)); - return sna_render_picture_fixup(sna, picture, channel, - x, y, w, h, dst_x, dst_y); - } - - if (!gen6_check_repeat(picture)) - return sna_render_picture_fixup(sna, picture, channel, - x, y, w, h, dst_x, dst_y); - - if (!gen6_check_filter(picture)) - return sna_render_picture_fixup(sna, picture, channel, - x, y, w, h, dst_x, dst_y); - - channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; - channel->filter = picture->filter; - - pixmap = get_drawable_pixmap(picture->pDrawable); - get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); - - x += dx + picture->pDrawable->x; - y += dy + picture->pDrawable->y; - - channel->is_affine = sna_transform_is_affine(picture->transform); - if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) { - DBG(("%s: integer translation (%d, %d), removing\n", - __FUNCTION__, dx, dy)); - x += dx; - y += dy; - channel->transform = NULL; - channel->filter = PictFilterNearest; - } else - channel->transform = picture->transform; - - channel->pict_format = picture->format; - channel->card_format = gen6_get_card_format(picture->format); - if (channel->card_format == (unsigned)-1) - return sna_render_picture_convert(sna, picture, channel, pixmap, - x, y, w, h, dst_x, dst_y, - false); - - if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { - DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, - pixmap->drawable.width, pixmap->drawable.height)); - return sna_render_picture_extract(sna, picture, channel, - x, y, w, h, dst_x, dst_y); - } - - return sna_render_pixmap_bo(sna, channel, pixmap, - x, y, w, h, dst_x, dst_y); -} - -inline static void gen6_composite_channel_convert(struct sna_composite_channel *channel) -{ - channel->repeat = gen6_repeat(channel->repeat); - channel->filter = gen6_filter(channel->filter); - if (channel->card_format == (unsigned)-1) - channel->card_format = gen6_get_card_format(channel->pict_format); - assert(channel->card_format != (unsigned)-1); -} - -#endif - -static void gen6_render_composite_done(struct sna *sna, - const struct sna_composite_op *op) -{ - DBG(("%s\n", __FUNCTION__)); - - assert(!sna->render.active); - if (sna->render.vertex_offset) { - gen4_vertex_flush(sna); - gen6_magic_ca_pass(sna, op); - } - - -// sna_render_composite_redirect_done(sna, op); -} - -#if 0 - -static bool -gen6_composite_set_target(struct sna *sna, - struct sna_composite_op *op, - PicturePtr dst, - int x, int y, int w, int h) -{ - BoxRec box; - - op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); - op->dst.format = dst->format; - op->dst.width = op->dst.pixmap->drawable.width; - op->dst.height = op->dst.pixmap->drawable.height; - - if (w && h) { - box.x1 = x; - box.y1 = y; - box.x2 = x + w; - box.y2 = y + h; - } else - sna_render_picture_extents(dst, &box); - -// op->dst.bo = sna_drawable_use_bo (dst->pDrawable, -// PREFER_GPU | FORCE_GPU | RENDER_GPU, -// &box, &op->damage); - if (op->dst.bo == NULL) - return false; - - get_drawable_deltas(dst->pDrawable, op->dst.pixmap, - &op->dst.x, &op->dst.y); - - DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", - __FUNCTION__, - op->dst.pixmap, (int)op->dst.format, - op->dst.width, op->dst.height, - op->dst.bo->pitch, - op->dst.x, op->dst.y, - op->damage ? *op->damage : (void *)-1)); - - assert(op->dst.bo->proxy == NULL); - - if (too_large(op->dst.width, op->dst.height) && - !sna_render_composite_redirect(sna, op, x, y, w, h)) - return false; - - return true; -} - - -static bool -gen6_render_composite(struct sna *sna, - uint8_t op, - PicturePtr src, - PicturePtr mask, - PicturePtr dst, - int16_t src_x, int16_t src_y, - int16_t msk_x, int16_t msk_y, - int16_t dst_x, int16_t dst_y, - int16_t width, int16_t height, - struct sna_composite_op *tmp) -{ - if (op >= ARRAY_SIZE(gen6_blend_op)) - return false; - - DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, - width, height, sna->kgem.ring)); - - if (op == PictOpClear) - op = PictOpSrc; - tmp->op = op; - if (!gen6_composite_set_target(sna, tmp, dst, - dst_x, dst_y, width, height)) - return false; - - switch (gen6_composite_picture(sna, src, &tmp->src, - src_x, src_y, - width, height, - dst_x, dst_y, - dst->polyMode == PolyModePrecise)) { - case -1: - goto cleanup_dst; - case 0: - if (!gen4_channel_init_solid(sna, &tmp->src, 0)) - goto cleanup_dst; - /* fall through to fixup */ - case 1: - /* Did we just switch rings to prepare the source? */ - if (mask == NULL && - prefer_blt_composite(sna, tmp) && - sna_blt_composite__convert(sna, - dst_x, dst_y, width, height, - tmp)) - return true; - - gen6_composite_channel_convert(&tmp->src); - break; - } - - tmp->is_affine = tmp->src.is_affine; - tmp->has_component_alpha = false; - tmp->need_magic_ca_pass = false; - - tmp->mask.bo = NULL; - tmp->mask.filter = SAMPLER_FILTER_NEAREST; - tmp->mask.repeat = SAMPLER_EXTEND_NONE; - - if (mask) { - if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { - tmp->has_component_alpha = true; - - /* Check if it's component alpha that relies on a source alpha and on - * the source value. We can only get one of those into the single - * source value that we get to blend with. - */ - if (gen6_blend_op[op].src_alpha && - (gen6_blend_op[op].src_blend != GEN6_BLENDFACTOR_ZERO)) { - if (op != PictOpOver) - goto cleanup_src; - - tmp->need_magic_ca_pass = true; - tmp->op = PictOpOutReverse; - } - } - - if (!reuse_source(sna, - src, &tmp->src, src_x, src_y, - mask, &tmp->mask, msk_x, msk_y)) { - switch (gen6_composite_picture(sna, mask, &tmp->mask, - msk_x, msk_y, - width, height, - dst_x, dst_y, - dst->polyMode == PolyModePrecise)) { - case -1: - goto cleanup_src; - case 0: - if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) - goto cleanup_src; - /* fall through to fixup */ - case 1: - gen6_composite_channel_convert(&tmp->mask); - break; - } - } - - tmp->is_affine &= tmp->mask.is_affine; - } - - tmp->u.gen6.flags = - GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, - tmp->src.repeat, - tmp->mask.filter, - tmp->mask.repeat), - gen6_get_blend(tmp->op, - tmp->has_component_alpha, - tmp->dst.format), - gen6_choose_composite_kernel(tmp->op, - tmp->mask.bo != NULL, - tmp->has_component_alpha, - tmp->is_affine), - gen4_choose_composite_emitter(tmp)); - -// tmp->blt = gen6_render_composite_blt; -// tmp->box = gen6_render_composite_box; -// tmp->boxes = gen6_render_composite_boxes__blt; -// if (tmp->emit_boxes) { -// tmp->boxes = gen6_render_composite_boxes; -// tmp->thread_boxes = gen6_render_composite_boxes__thread; -// } - tmp->done = gen6_render_composite_done; - - kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); - if (!kgem_check_bo(&sna->kgem, - tmp->dst.bo, tmp->src.bo, tmp->mask.bo, - NULL)) { - kgem_submit(&sna->kgem); - if (!kgem_check_bo(&sna->kgem, - tmp->dst.bo, tmp->src.bo, tmp->mask.bo, - NULL)) - goto cleanup_mask; - _kgem_set_mode(&sna->kgem, KGEM_RENDER); - } - - gen6_emit_composite_state(sna, tmp); - gen6_align_vertex(sna, tmp); - return true; - -cleanup_mask: - if (tmp->mask.bo) - kgem_bo_destroy(&sna->kgem, tmp->mask.bo); -cleanup_src: - if (tmp->src.bo) - kgem_bo_destroy(&sna->kgem, tmp->src.bo); -cleanup_dst: - if (tmp->redirect.real_bo) - kgem_bo_destroy(&sna->kgem, tmp->dst.bo); - return false; -} - - -#if !NO_COMPOSITE_SPANS -fastcall static void -gen6_render_composite_spans_box(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, float opacity) -{ - DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", - __FUNCTION__, - op->base.src.offset[0], op->base.src.offset[1], - opacity, - op->base.dst.x, op->base.dst.y, - box->x1, box->y1, - box->x2 - box->x1, - box->y2 - box->y1)); - - gen6_get_rectangles(sna, &op->base, 1, gen6_emit_composite_state); - op->prim_emit(sna, op, box, opacity); -} - -static void -gen6_render_composite_spans_boxes(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, int nbox, - float opacity) -{ - DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", - __FUNCTION__, nbox, - op->base.src.offset[0], op->base.src.offset[1], - opacity, - op->base.dst.x, op->base.dst.y)); - - do { - int nbox_this_time; - - nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, - gen6_emit_composite_state); - nbox -= nbox_this_time; - - do { - DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, - box->x1, box->y1, - box->x2 - box->x1, - box->y2 - box->y1)); - - op->prim_emit(sna, op, box++, opacity); - } while (--nbox_this_time); - } while (nbox); -} - -fastcall static void -gen6_render_composite_spans_boxes__thread(struct sna *sna, - const struct sna_composite_spans_op *op, - const struct sna_opacity_box *box, - int nbox) -{ - DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", - __FUNCTION__, nbox, - op->base.src.offset[0], op->base.src.offset[1], - op->base.dst.x, op->base.dst.y)); - - sna_vertex_lock(&sna->render); - do { - int nbox_this_time; - float *v; - - nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, - gen6_emit_composite_state); - assert(nbox_this_time); - nbox -= nbox_this_time; - - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; - - sna_vertex_acquire__locked(&sna->render); - sna_vertex_unlock(&sna->render); - - op->emit_boxes(op, box, nbox_this_time, v); - box += nbox_this_time; - - sna_vertex_lock(&sna->render); - sna_vertex_release__locked(&sna->render); - } while (nbox); - sna_vertex_unlock(&sna->render); -} - -fastcall static void -gen6_render_composite_spans_done(struct sna *sna, - const struct sna_composite_spans_op *op) -{ - DBG(("%s()\n", __FUNCTION__)); - assert(!sna->render.active); - - if (sna->render.vertex_offset) - gen4_vertex_flush(sna); - - if (op->base.src.bo) - kgem_bo_destroy(&sna->kgem, op->base.src.bo); - - sna_render_composite_redirect_done(sna, &op->base); -} - -static bool -gen6_check_composite_spans(struct sna *sna, - uint8_t op, PicturePtr src, PicturePtr dst, - int16_t width, int16_t height, - unsigned flags) -{ - DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n", - __FUNCTION__, op, width, height, flags)); - - if (op >= ARRAY_SIZE(gen6_blend_op)) - return false; - - if (gen6_composite_fallback(sna, src, NULL, dst)) { - DBG(("%s: operation would fallback\n", __FUNCTION__)); - return false; - } - - if (need_tiling(sna, width, height) && - !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { - DBG(("%s: fallback, tiled operation not on GPU\n", - __FUNCTION__)); - return false; - } - - if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) { - struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable); - assert(priv); - - if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) - return true; - - if (flags & COMPOSITE_SPANS_INPLACE_HINT) - return false; - - return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo); - } - - return true; -} - -static bool -gen6_render_composite_spans(struct sna *sna, - uint8_t op, - PicturePtr src, - PicturePtr dst, - int16_t src_x, int16_t src_y, - int16_t dst_x, int16_t dst_y, - int16_t width, int16_t height, - unsigned flags, - struct sna_composite_spans_op *tmp) -{ - DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__, - width, height, flags, sna->kgem.ring)); - - assert(gen6_check_composite_spans(sna, op, src, dst, width, height, flags)); - - if (need_tiling(sna, width, height)) { - DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", - __FUNCTION__, width, height)); - return sna_tiling_composite_spans(op, src, dst, - src_x, src_y, dst_x, dst_y, - width, height, flags, tmp); - } - - tmp->base.op = op; - if (!gen6_composite_set_target(sna, &tmp->base, dst, - dst_x, dst_y, width, height)) - return false; - - switch (gen6_composite_picture(sna, src, &tmp->base.src, - src_x, src_y, - width, height, - dst_x, dst_y, - dst->polyMode == PolyModePrecise)) { - case -1: - goto cleanup_dst; - case 0: - if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) - goto cleanup_dst; - /* fall through to fixup */ - case 1: - gen6_composite_channel_convert(&tmp->base.src); - break; - } - tmp->base.mask.bo = NULL; - - tmp->base.is_affine = tmp->base.src.is_affine; - tmp->base.need_magic_ca_pass = false; - - tmp->base.u.gen6.flags = - GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter, - tmp->base.src.repeat, - SAMPLER_FILTER_NEAREST, - SAMPLER_EXTEND_PAD), - gen6_get_blend(tmp->base.op, false, tmp->base.dst.format), - GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine, - gen4_choose_spans_emitter(tmp)); - - tmp->box = gen6_render_composite_spans_box; - tmp->boxes = gen6_render_composite_spans_boxes; - if (tmp->emit_boxes) - tmp->thread_boxes = gen6_render_composite_spans_boxes__thread; - tmp->done = gen6_render_composite_spans_done; - - kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); - if (!kgem_check_bo(&sna->kgem, - tmp->base.dst.bo, tmp->base.src.bo, - NULL)) { - kgem_submit(&sna->kgem); - if (!kgem_check_bo(&sna->kgem, - tmp->base.dst.bo, tmp->base.src.bo, - NULL)) - goto cleanup_src; - _kgem_set_mode(&sna->kgem, KGEM_RENDER); - } - - gen6_emit_composite_state(sna, &tmp->base); - gen6_align_vertex(sna, &tmp->base); - return true; - -cleanup_src: - if (tmp->base.src.bo) - kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); -cleanup_dst: - if (tmp->base.redirect.real_bo) - kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); - return false; -} -#endif - - -static void -gen6_emit_copy_state(struct sna *sna, - const struct sna_composite_op *op) -{ - uint32_t *binding_table; - uint16_t offset; - bool dirty; - - dirty = gen6_get_batch(sna, op); - - binding_table = gen6_composite_get_binding_table(sna, &offset); - - binding_table[0] = - gen6_bind_bo(sna, - op->dst.bo, op->dst.width, op->dst.height, - gen6_get_dest_format(op->dst.format), - true); - binding_table[1] = - gen6_bind_bo(sna, - op->src.bo, op->src.width, op->src.height, - op->src.card_format, - false); - - if (sna->kgem.surface == offset && - *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) { - sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); - offset = sna->render_state.gen6.surface_table; - } - - gen6_emit_state(sna, op, offset | dirty); -} - - -static inline bool prefer_blt_copy(struct sna *sna, - struct kgem_bo *src_bo, - struct kgem_bo *dst_bo, - unsigned flags) -{ - if (flags & COPY_SYNC) - return false; - - if (PREFER_RENDER) - return PREFER_RENDER > 0; - - if (sna->kgem.ring == KGEM_BLT) - return true; - - if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) - return true; - - if (untiled_tlb_miss(src_bo) || - untiled_tlb_miss(dst_bo)) - return true; - - if (!prefer_blt_ring(sna, dst_bo, flags)) - return false; - - return (prefer_blt_bo(sna, src_bo) >= 0 && - prefer_blt_bo(sna, dst_bo) > 0); -} - -inline static void boxes_extents(const BoxRec *box, int n, BoxRec *extents) -{ - *extents = box[0]; - while (--n) { - box++; - - if (box->x1 < extents->x1) - extents->x1 = box->x1; - if (box->x2 > extents->x2) - extents->x2 = box->x2; - - if (box->y1 < extents->y1) - extents->y1 = box->y1; - if (box->y2 > extents->y2) - extents->y2 = box->y2; - } -} - -static inline bool -overlaps(struct sna *sna, - struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, - struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, - const BoxRec *box, int n, BoxRec *extents) -{ - if (src_bo != dst_bo) - return false; - - boxes_extents(box, n, extents); - return (extents->x2 + src_dx > extents->x1 + dst_dx && - extents->x1 + src_dx < extents->x2 + dst_dx && - extents->y2 + src_dy > extents->y1 + dst_dy && - extents->y1 + src_dy < extents->y2 + dst_dy); -} - -static bool -gen6_render_copy_boxes(struct sna *sna, uint8_t alu, - PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, - PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, - const BoxRec *box, int n, unsigned flags) -{ - struct sna_composite_op tmp; - BoxRec extents; - - DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, self-copy=%d, overlaps? %d\n", - __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, - src_bo == dst_bo, - overlaps(sna, - src_bo, src_dx, src_dy, - dst_bo, dst_dx, dst_dy, - box, n, &extents))); - - if (prefer_blt_copy(sna, src_bo, dst_bo, flags) && - sna_blt_compare_depth(&src->drawable, &dst->drawable) && - sna_blt_copy_boxes(sna, alu, - src_bo, src_dx, src_dy, - dst_bo, dst_dx, dst_dy, - dst->drawable.bitsPerPixel, - box, n)) - return true; - - if (!(alu == GXcopy || alu == GXclear)) { -fallback_blt: - if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) - return false; - - return sna_blt_copy_boxes_fallback(sna, alu, - src, src_bo, src_dx, src_dy, - dst, dst_bo, dst_dx, dst_dy, - box, n); - } - - if (overlaps(sna, - src_bo, src_dx, src_dy, - dst_bo, dst_dx, dst_dy, - box, n, &extents)) { - if (too_large(extents.x2-extents.x1, extents.y2-extents.y1)) - goto fallback_blt; - - if (can_switch_to_blt(sna, dst_bo, flags) && - sna_blt_compare_depth(&src->drawable, &dst->drawable) && - sna_blt_copy_boxes(sna, alu, - src_bo, src_dx, src_dy, - dst_bo, dst_dx, dst_dy, - dst->drawable.bitsPerPixel, - box, n)) - return true; - - return sna_render_copy_boxes__overlap(sna, alu, - src, src_bo, src_dx, src_dy, - dst, dst_bo, dst_dx, dst_dy, - box, n, &extents); - } - - if (dst->drawable.depth == src->drawable.depth) { - tmp.dst.format = sna_render_format_for_depth(dst->drawable.depth); - tmp.src.pict_format = tmp.dst.format; - } else { - tmp.dst.format = sna_format_for_depth(dst->drawable.depth); - tmp.src.pict_format = sna_format_for_depth(src->drawable.depth); - } - if (!gen6_check_format(tmp.src.pict_format)) - goto fallback_blt; - - tmp.dst.pixmap = dst; - tmp.dst.width = dst->drawable.width; - tmp.dst.height = dst->drawable.height; - tmp.dst.bo = dst_bo; - tmp.dst.x = tmp.dst.y = 0; - tmp.damage = NULL; - - sna_render_composite_redirect_init(&tmp); - if (too_large(tmp.dst.width, tmp.dst.height)) { - int i; - - extents = box[0]; - for (i = 1; i < n; i++) { - if (box[i].x1 < extents.x1) - extents.x1 = box[i].x1; - if (box[i].y1 < extents.y1) - extents.y1 = box[i].y1; - - if (box[i].x2 > extents.x2) - extents.x2 = box[i].x2; - if (box[i].y2 > extents.y2) - extents.y2 = box[i].y2; - } - - if (!sna_render_composite_redirect(sna, &tmp, - extents.x1 + dst_dx, - extents.y1 + dst_dy, - extents.x2 - extents.x1, - extents.y2 - extents.y1)) - goto fallback_tiled; - - dst_dx += tmp.dst.x; - dst_dy += tmp.dst.y; - - tmp.dst.x = tmp.dst.y = 0; - } - - tmp.src.card_format = gen6_get_card_format(tmp.src.pict_format); - if (too_large(src->drawable.width, src->drawable.height)) { - int i; - - extents = box[0]; - for (i = 1; i < n; i++) { - if (extents.x1 < box[i].x1) - extents.x1 = box[i].x1; - if (extents.y1 < box[i].y1) - extents.y1 = box[i].y1; - - if (extents.x2 > box[i].x2) - extents.x2 = box[i].x2; - if (extents.y2 > box[i].y2) - extents.y2 = box[i].y2; - } - - if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, - extents.x1 + src_dx, - extents.y1 + src_dy, - extents.x2 - extents.x1, - extents.y2 - extents.y1)) { - DBG(("%s: unable to extract partial pixmap\n", __FUNCTION__)); - goto fallback_tiled_dst; - } - - src_dx += tmp.src.offset[0]; - src_dy += tmp.src.offset[1]; - } else { - tmp.src.bo = src_bo; - tmp.src.width = src->drawable.width; - tmp.src.height = src->drawable.height; - } - - tmp.mask.bo = NULL; - - tmp.floats_per_vertex = 2; - tmp.floats_per_rect = 6; - tmp.need_magic_ca_pass = 0; - - tmp.u.gen6.flags = COPY_FLAGS(alu); - assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); - assert(GEN6_SAMPLER(tmp.u.gen6.flags) == COPY_SAMPLER); - assert(GEN6_VERTEX(tmp.u.gen6.flags) == COPY_VERTEX); - - kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); - if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { - kgem_submit(&sna->kgem); - if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { - DBG(("%s: too large for a single operation\n", - __FUNCTION__)); - goto fallback_tiled_src; - } - _kgem_set_mode(&sna->kgem, KGEM_RENDER); - } - - gen6_emit_copy_state(sna, &tmp); - gen6_align_vertex(sna, &tmp); - - do { - int16_t *v; - int n_this_time; - - n_this_time = gen6_get_rectangles(sna, &tmp, n, - gen6_emit_copy_state); - n -= n_this_time; - - v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); - sna->render.vertex_used += 6 * n_this_time; - assert(sna->render.vertex_used <= sna->render.vertex_size); - do { - - DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", - box->x1 + src_dx, box->y1 + src_dy, - box->x1 + dst_dx, box->y1 + dst_dy, - box->x2 - box->x1, box->y2 - box->y1)); - v[0] = box->x2 + dst_dx; - v[2] = box->x2 + src_dx; - v[1] = v[5] = box->y2 + dst_dy; - v[3] = v[7] = box->y2 + src_dy; - v[8] = v[4] = box->x1 + dst_dx; - v[10] = v[6] = box->x1 + src_dx; - v[9] = box->y1 + dst_dy; - v[11] = box->y1 + src_dy; - v += 12; box++; - } while (--n_this_time); - } while (n); - - gen4_vertex_flush(sna); - sna_render_composite_redirect_done(sna, &tmp); - if (tmp.src.bo != src_bo) - kgem_bo_destroy(&sna->kgem, tmp.src.bo); - return true; - -fallback_tiled_src: - if (tmp.src.bo != src_bo) - kgem_bo_destroy(&sna->kgem, tmp.src.bo); -fallback_tiled_dst: - if (tmp.redirect.real_bo) - kgem_bo_destroy(&sna->kgem, tmp.dst.bo); -fallback_tiled: - if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && - sna_blt_copy_boxes(sna, alu, - src_bo, src_dx, src_dy, - dst_bo, dst_dx, dst_dy, - dst->drawable.bitsPerPixel, - box, n)) - return true; - - return sna_tiling_copy_boxes(sna, alu, - src, src_bo, src_dx, src_dy, - dst, dst_bo, dst_dx, dst_dy, - box, n); -} - -static void -gen6_render_copy_blt(struct sna *sna, - const struct sna_copy_op *op, - int16_t sx, int16_t sy, - int16_t w, int16_t h, - int16_t dx, int16_t dy) -{ - int16_t *v; - - gen6_get_rectangles(sna, &op->base, 1, gen6_emit_copy_state); - - v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; - sna->render.vertex_used += 6; - assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = dx+w; v[1] = dy+h; - v[2] = sx+w; v[3] = sy+h; - v[4] = dx; v[5] = dy+h; - v[6] = sx; v[7] = sy+h; - v[8] = dx; v[9] = dy; - v[10] = sx; v[11] = sy; -} - -static void -gen6_render_copy_done(struct sna *sna, const struct sna_copy_op *op) -{ - DBG(("%s()\n", __FUNCTION__)); - - assert(!sna->render.active); - if (sna->render.vertex_offset) - gen4_vertex_flush(sna); -} - -static bool -gen6_render_copy(struct sna *sna, uint8_t alu, - PixmapPtr src, struct kgem_bo *src_bo, - PixmapPtr dst, struct kgem_bo *dst_bo, - struct sna_copy_op *op) -{ - DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n", - __FUNCTION__, alu, - src->drawable.width, src->drawable.height, - dst->drawable.width, dst->drawable.height)); - -fallback: - - op->base.dst.format = PIXMAN_a8r8g8b8; - op->base.src.pict_format = op->base.dst.format; - - op->base.dst.pixmap = dst; - op->base.dst.width = dst->drawable.width; - op->base.dst.height = dst->drawable.height; - op->base.dst.bo = dst_bo; - - op->base.src.bo = src_bo; - op->base.src.card_format = - gen6_get_card_format(op->base.src.pict_format); - op->base.src.width = src->drawable.width; - op->base.src.height = src->drawable.height; - - op->base.mask.bo = NULL; - - op->base.floats_per_vertex = 2; - op->base.floats_per_rect = 6; - - op->base.u.gen6.flags = COPY_FLAGS(alu); - assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); - assert(GEN6_SAMPLER(op->base.u.gen6.flags) == COPY_SAMPLER); - assert(GEN6_VERTEX(op->base.u.gen6.flags) == COPY_VERTEX); - - kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); - if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { - kgem_submit(&sna->kgem); - if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) - goto fallback; - _kgem_set_mode(&sna->kgem, KGEM_RENDER); - } - - gen6_emit_copy_state(sna, &op->base); - gen6_align_vertex(sna, &op->base); - - op->blt = gen6_render_copy_blt; - op->done = gen6_render_copy_done; - return true; -} -#endif - - -static bool -gen6_blit_tex(struct sna *sna, - uint8_t op, bool scale, - PixmapPtr src, struct kgem_bo *src_bo, - PixmapPtr mask,struct kgem_bo *mask_bo, - PixmapPtr dst, struct kgem_bo *dst_bo, - int32_t src_x, int32_t src_y, - int32_t msk_x, int32_t msk_y, - int32_t dst_x, int32_t dst_y, - int32_t width, int32_t height, - struct sna_composite_op *tmp) -{ - - DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, - width, height, sna->kgem.ring)); - - tmp->op = PictOpSrc; - - tmp->dst.pixmap = dst; - tmp->dst.bo = dst_bo; - tmp->dst.width = dst->drawable.width; - tmp->dst.height = dst->drawable.height; - tmp->dst.format = PICT_x8r8g8b8; - - - tmp->src.repeat = SAMPLER_EXTEND_NONE; - tmp->src.is_affine = true; - - tmp->src.bo = src_bo; - tmp->src.pict_format = PICT_x8r8g8b8; - tmp->src.card_format = gen6_get_card_format(tmp->src.pict_format); - tmp->src.width = src->drawable.width; - tmp->src.height = src->drawable.height; - - if ( (tmp->src.width == width) && - (tmp->src.height == height) ) - tmp->src.filter = SAMPLER_FILTER_NEAREST; - else - tmp->src.filter = SAMPLER_FILTER_BILINEAR; - - tmp->is_affine = tmp->src.is_affine; - tmp->has_component_alpha = false; - tmp->need_magic_ca_pass = false; - - tmp->mask.repeat = SAMPLER_EXTEND_NONE; - tmp->mask.filter = SAMPLER_FILTER_NEAREST; - tmp->mask.is_affine = true; - - tmp->mask.bo = mask_bo; - tmp->mask.pict_format = PIXMAN_a8; - tmp->mask.card_format = gen6_get_card_format(tmp->mask.pict_format); - tmp->mask.width = mask->drawable.width; - tmp->mask.height = mask->drawable.height; - - - if( scale ) - { - tmp->src.scale[0] = 1.f/width; - tmp->src.scale[1] = 1.f/height; - } - else - { - tmp->src.scale[0] = 1.f/src->drawable.width; - tmp->src.scale[1] = 1.f/src->drawable.height; - } -// tmp->src.offset[0] = -dst_x; -// tmp->src.offset[1] = -dst_y; - - - tmp->mask.scale[0] = 1.f/mask->drawable.width; - tmp->mask.scale[1] = 1.f/mask->drawable.height; -// tmp->mask.offset[0] = -dst_x; -// tmp->mask.offset[1] = -dst_y; - - tmp->u.gen6.flags = - GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, - tmp->src.repeat, - tmp->mask.filter, - tmp->mask.repeat), - gen6_get_blend(tmp->op, - tmp->has_component_alpha, - tmp->dst.format), -/* gen6_choose_composite_kernel(tmp->op, - tmp->mask.bo != NULL, - tmp->has_component_alpha, - tmp->is_affine), -*/ - GEN6_WM_KERNEL_MASK, - gen4_choose_composite_emitter(tmp)); - - tmp->blt = gen6_render_composite_blt; -// tmp->box = gen6_render_composite_box; - tmp->done = gen6_render_composite_done; - - kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); - if (!kgem_check_bo(&sna->kgem, - tmp->dst.bo, tmp->src.bo, tmp->mask.bo, - NULL)) { - kgem_submit(&sna->kgem); - _kgem_set_mode(&sna->kgem, KGEM_RENDER); - } - - gen6_emit_composite_state(sna, tmp); - gen6_align_vertex(sna, tmp); - return true; - -} - - - -#if 0 - -static void -gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) -{ - uint32_t *binding_table; - uint16_t offset; - bool dirty; - - dirty = gen6_get_batch(sna, op); - - binding_table = gen6_composite_get_binding_table(sna, &offset); - - binding_table[0] = - gen6_bind_bo(sna, - op->dst.bo, op->dst.width, op->dst.height, - gen6_get_dest_format(op->dst.format), - true); - binding_table[1] = - gen6_bind_bo(sna, - op->src.bo, 1, 1, - GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, - false); - - if (sna->kgem.surface == offset && - *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) { - sna->kgem.surface += - sizeof(struct gen6_surface_state_padded)/sizeof(uint32_t); - offset = sna->render_state.gen6.surface_table; - } - - gen6_emit_state(sna, op, offset | dirty); -} - -static inline bool prefer_blt_fill(struct sna *sna, - struct kgem_bo *bo) -{ - if (PREFER_RENDER) - return PREFER_RENDER < 0; - - if (untiled_tlb_miss(bo)) - return true; - - return prefer_blt_ring(sna, bo, 0) || prefer_blt_bo(sna, bo) >= 0; -} - -static bool -gen6_render_fill_boxes(struct sna *sna, - CARD8 op, - PictFormat format, - const xRenderColor *color, - PixmapPtr dst, struct kgem_bo *dst_bo, - const BoxRec *box, int n) -{ - struct sna_composite_op tmp; - uint32_t pixel; - - DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", - __FUNCTION__, op, - color->red, color->green, color->blue, color->alpha, (int)format)); - - if (op >= ARRAY_SIZE(gen6_blend_op)) { - DBG(("%s: fallback due to unhandled blend op: %d\n", - __FUNCTION__, op)); - return false; - } - - if (prefer_blt_fill(sna, dst_bo) || !gen6_check_dst_format(format)) { - uint8_t alu = GXinvalid; - - if (op <= PictOpSrc) { - pixel = 0; - if (op == PictOpClear) - alu = GXclear; - else if (sna_get_pixel_from_rgba(&pixel, - color->red, - color->green, - color->blue, - color->alpha, - format)) - alu = GXcopy; - } - - if (alu != GXinvalid && - sna_blt_fill_boxes(sna, alu, - dst_bo, dst->drawable.bitsPerPixel, - pixel, box, n)) - return true; - - if (!gen6_check_dst_format(format)) - return false; - } - - if (op == PictOpClear) { - pixel = 0; - op = PictOpSrc; - } else if (!sna_get_pixel_from_rgba(&pixel, - color->red, - color->green, - color->blue, - color->alpha, - PICT_a8r8g8b8)) - return false; - - DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", - __FUNCTION__, pixel, n, - box[0].x1, box[0].y1, box[0].x2, box[0].y2)); - - tmp.dst.pixmap = dst; - tmp.dst.width = dst->drawable.width; - tmp.dst.height = dst->drawable.height; - tmp.dst.format = format; - tmp.dst.bo = dst_bo; - tmp.dst.x = tmp.dst.y = 0; - tmp.damage = NULL; - - sna_render_composite_redirect_init(&tmp); - if (too_large(dst->drawable.width, dst->drawable.height)) { - BoxRec extents; - - boxes_extents(box, n, &extents); - if (!sna_render_composite_redirect(sna, &tmp, - extents.x1, extents.y1, - extents.x2 - extents.x1, - extents.y2 - extents.y1)) - return sna_tiling_fill_boxes(sna, op, format, color, - dst, dst_bo, box, n); - } - - tmp.src.bo = sna_render_get_solid(sna, pixel); - tmp.mask.bo = NULL; - - tmp.floats_per_vertex = 2; - tmp.floats_per_rect = 6; - tmp.need_magic_ca_pass = false; - - tmp.u.gen6.flags = FILL_FLAGS(op, format); - assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); - assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); - assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); - - if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { - kgem_submit(&sna->kgem); - assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); - } - - gen6_emit_fill_state(sna, &tmp); - gen6_align_vertex(sna, &tmp); - - do { - int n_this_time; - int16_t *v; - - n_this_time = gen6_get_rectangles(sna, &tmp, n, - gen6_emit_fill_state); - n -= n_this_time; - - v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); - sna->render.vertex_used += 6 * n_this_time; - assert(sna->render.vertex_used <= sna->render.vertex_size); - do { - DBG((" (%d, %d), (%d, %d)\n", - box->x1, box->y1, box->x2, box->y2)); - - v[0] = box->x2; - v[5] = v[1] = box->y2; - v[8] = v[4] = box->x1; - v[9] = box->y1; - v[2] = v[3] = v[7] = 1; - v[6] = v[10] = v[11] = 0; - v += 12; box++; - } while (--n_this_time); - } while (n); - - gen4_vertex_flush(sna); - kgem_bo_destroy(&sna->kgem, tmp.src.bo); - sna_render_composite_redirect_done(sna, &tmp); - return true; -} - -static void -gen6_render_op_fill_blt(struct sna *sna, - const struct sna_fill_op *op, - int16_t x, int16_t y, int16_t w, int16_t h) -{ - int16_t *v; - - DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); - - gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state); - - v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; - sna->render.vertex_used += 6; - assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = x+w; - v[4] = v[8] = x; - v[1] = v[5] = y+h; - v[9] = y; - - v[2] = v[3] = v[7] = 1; - v[6] = v[10] = v[11] = 0; -} - -fastcall static void -gen6_render_op_fill_box(struct sna *sna, - const struct sna_fill_op *op, - const BoxRec *box) -{ - int16_t *v; - - DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, - box->x1, box->y1, box->x2, box->y2)); - - gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state); - - v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; - sna->render.vertex_used += 6; - assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = box->x2; - v[8] = v[4] = box->x1; - v[5] = v[1] = box->y2; - v[9] = box->y1; - - v[7] = v[2] = v[3] = 1; - v[6] = v[10] = v[11] = 0; -} - -fastcall static void -gen6_render_op_fill_boxes(struct sna *sna, - const struct sna_fill_op *op, - const BoxRec *box, - int nbox) -{ - DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, - box->x1, box->y1, box->x2, box->y2, nbox)); - - do { - int nbox_this_time; - int16_t *v; - - nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, - gen6_emit_fill_state); - nbox -= nbox_this_time; - - v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; - sna->render.vertex_used += 6 * nbox_this_time; - assert(sna->render.vertex_used <= sna->render.vertex_size); - - do { - v[0] = box->x2; - v[8] = v[4] = box->x1; - v[5] = v[1] = box->y2; - v[9] = box->y1; - v[7] = v[2] = v[3] = 1; - v[6] = v[10] = v[11] = 0; - box++; v += 12; - } while (--nbox_this_time); - } while (nbox); -} - -static void -gen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op) -{ - DBG(("%s()\n", __FUNCTION__)); - - assert(!sna->render.active); - if (sna->render.vertex_offset) - gen4_vertex_flush(sna); - kgem_bo_destroy(&sna->kgem, op->base.src.bo); -} - -static bool -gen6_render_fill(struct sna *sna, uint8_t alu, - PixmapPtr dst, struct kgem_bo *dst_bo, - uint32_t color, - struct sna_fill_op *op) -{ - DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color)); - - if (prefer_blt_fill(sna, dst_bo) && - sna_blt_fill(sna, alu, - dst_bo, dst->drawable.bitsPerPixel, - color, - op)) - return true; - - if (!(alu == GXcopy || alu == GXclear) || - too_large(dst->drawable.width, dst->drawable.height)) - return sna_blt_fill(sna, alu, - dst_bo, dst->drawable.bitsPerPixel, - color, - op); - - if (alu == GXclear) - color = 0; - - op->base.dst.pixmap = dst; - op->base.dst.width = dst->drawable.width; - op->base.dst.height = dst->drawable.height; - op->base.dst.format = sna_format_for_depth(dst->drawable.depth); - op->base.dst.bo = dst_bo; - op->base.dst.x = op->base.dst.y = 0; - - op->base.src.bo = - sna_render_get_solid(sna, - sna_rgba_for_color(color, - dst->drawable.depth)); - op->base.mask.bo = NULL; - - op->base.need_magic_ca_pass = false; - op->base.floats_per_vertex = 2; - op->base.floats_per_rect = 6; - - op->base.u.gen6.flags = FILL_FLAGS_NOBLEND; - assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); - assert(GEN6_SAMPLER(op->base.u.gen6.flags) == FILL_SAMPLER); - assert(GEN6_VERTEX(op->base.u.gen6.flags) == FILL_VERTEX); - - if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { - kgem_submit(&sna->kgem); - assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); - } - - gen6_emit_fill_state(sna, &op->base); - gen6_align_vertex(sna, &op->base); - - op->blt = gen6_render_op_fill_blt; - op->box = gen6_render_op_fill_box; - op->boxes = gen6_render_op_fill_boxes; - op->done = gen6_render_op_fill_done; - return true; -} - -static bool -gen6_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, - uint32_t color, - int16_t x1, int16_t y1, int16_t x2, int16_t y2, - uint8_t alu) -{ - BoxRec box; - - box.x1 = x1; - box.y1 = y1; - box.x2 = x2; - box.y2 = y2; - - return sna_blt_fill_boxes(sna, alu, - bo, dst->drawable.bitsPerPixel, - color, &box, 1); -} - -static bool -gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, - uint32_t color, - int16_t x1, int16_t y1, - int16_t x2, int16_t y2, - uint8_t alu) -{ - struct sna_composite_op tmp; - int16_t *v; - - /* Prefer to use the BLT if already engaged */ - if (prefer_blt_fill(sna, bo) && - gen6_render_fill_one_try_blt(sna, dst, bo, color, - x1, y1, x2, y2, alu)) - return true; - - /* Must use the BLT if we can't RENDER... */ - if (!(alu == GXcopy || alu == GXclear) || - too_large(dst->drawable.width, dst->drawable.height)) - return gen6_render_fill_one_try_blt(sna, dst, bo, color, - x1, y1, x2, y2, alu); - - if (alu == GXclear) - color = 0; - - tmp.dst.pixmap = dst; - tmp.dst.width = dst->drawable.width; - tmp.dst.height = dst->drawable.height; - tmp.dst.format = sna_format_for_depth(dst->drawable.depth); - tmp.dst.bo = bo; - tmp.dst.x = tmp.dst.y = 0; - - tmp.src.bo = - sna_render_get_solid(sna, - sna_rgba_for_color(color, - dst->drawable.depth)); - tmp.mask.bo = NULL; - - tmp.floats_per_vertex = 2; - tmp.floats_per_rect = 6; - tmp.need_magic_ca_pass = false; - - tmp.u.gen6.flags = FILL_FLAGS_NOBLEND; - assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); - assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); - assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); - - if (!kgem_check_bo(&sna->kgem, bo, NULL)) { - kgem_submit(&sna->kgem); - if (!kgem_check_bo(&sna->kgem, bo, NULL)) { - kgem_bo_destroy(&sna->kgem, tmp.src.bo); - return false; - } - } - - gen6_emit_fill_state(sna, &tmp); - gen6_align_vertex(sna, &tmp); - - gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state); - - DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2)); - - v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; - sna->render.vertex_used += 6; - assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = x2; - v[8] = v[4] = x1; - v[5] = v[1] = y2; - v[9] = y1; - v[7] = v[2] = v[3] = 1; - v[6] = v[10] = v[11] = 0; - - gen4_vertex_flush(sna); - kgem_bo_destroy(&sna->kgem, tmp.src.bo); - - return true; -} - -static bool -gen6_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) -{ - BoxRec box; - - box.x1 = 0; - box.y1 = 0; - box.x2 = dst->drawable.width; - box.y2 = dst->drawable.height; - - return sna_blt_fill_boxes(sna, GXclear, - bo, dst->drawable.bitsPerPixel, - 0, &box, 1); -} - -static bool -gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) -{ - struct sna_composite_op tmp; - int16_t *v; - - DBG(("%s: %dx%d\n", - __FUNCTION__, - dst->drawable.width, - dst->drawable.height)); - - /* Prefer to use the BLT if, and only if, already engaged */ - if (sna->kgem.ring == KGEM_BLT && - gen6_render_clear_try_blt(sna, dst, bo)) - return true; - - /* Must use the BLT if we can't RENDER... */ - if (too_large(dst->drawable.width, dst->drawable.height)) - return gen6_render_clear_try_blt(sna, dst, bo); - - tmp.dst.pixmap = dst; - tmp.dst.width = dst->drawable.width; - tmp.dst.height = dst->drawable.height; - tmp.dst.format = sna_format_for_depth(dst->drawable.depth); - tmp.dst.bo = bo; - tmp.dst.x = tmp.dst.y = 0; - - tmp.src.bo = sna_render_get_solid(sna, 0); - tmp.mask.bo = NULL; - - tmp.floats_per_vertex = 2; - tmp.floats_per_rect = 6; - tmp.need_magic_ca_pass = false; - - tmp.u.gen6.flags = FILL_FLAGS_NOBLEND; - assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); - assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); - assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); - - if (!kgem_check_bo(&sna->kgem, bo, NULL)) { - kgem_submit(&sna->kgem); - if (!kgem_check_bo(&sna->kgem, bo, NULL)) { - kgem_bo_destroy(&sna->kgem, tmp.src.bo); - return false; - } - } - - gen6_emit_fill_state(sna, &tmp); - gen6_align_vertex(sna, &tmp); - - gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state); - - v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; - sna->render.vertex_used += 6; - assert(sna->render.vertex_used <= sna->render.vertex_size); - - v[0] = dst->drawable.width; - v[5] = v[1] = dst->drawable.height; - v[8] = v[4] = 0; - v[9] = 0; - - v[7] = v[2] = v[3] = 1; - v[6] = v[10] = v[11] = 0; - - gen4_vertex_flush(sna); - kgem_bo_destroy(&sna->kgem, tmp.src.bo); - - return true; -} -#endif - -static void gen6_render_flush(struct sna *sna) -{ - gen4_vertex_close(sna); - - assert(sna->render.vb_id == 0); - assert(sna->render.vertex_offset == 0); -} - -static void -gen6_render_context_switch(struct kgem *kgem, - int new_mode) -{ - if (kgem->nbatch) { - DBG(("%s: from %d to %d\n", __FUNCTION__, kgem->mode, new_mode)); - _kgem_submit(kgem); - } - - kgem->ring = new_mode; -} - -static void -gen6_render_retire(struct kgem *kgem) -{ - struct sna *sna; - - if (kgem->ring && (kgem->has_semaphores || !kgem->need_retire)) - kgem->ring = kgem->mode; - - sna = container_of(kgem, struct sna, kgem); - if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) { - DBG(("%s: resetting idle vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); - sna->render.vertex_used = 0; - sna->render.vertex_index = 0; - } -} - -static void -gen6_render_expire(struct kgem *kgem) -{ - struct sna *sna; - - sna = container_of(kgem, struct sna, kgem); - if (sna->render.vbo && !sna->render.vertex_used) { - DBG(("%s: discarding vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); - kgem_bo_destroy(kgem, sna->render.vbo); - assert(!sna->render.active); - sna->render.vbo = NULL; - sna->render.vertices = sna->render.vertex_data; - sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); - sna->render.vertex_used = 0; - sna->render.vertex_index = 0; - } -} - -static void gen6_render_reset(struct sna *sna) -{ - sna->render_state.gen6.needs_invariant = true; - sna->render_state.gen6.first_state_packet = true; - sna->render_state.gen6.ve_id = 3 << 2; - sna->render_state.gen6.last_primitive = -1; - - sna->render_state.gen6.num_sf_outputs = 0; - sna->render_state.gen6.samplers = -1; - sna->render_state.gen6.blend = -1; - sna->render_state.gen6.kernel = -1; - sna->render_state.gen6.drawrect_offset = -1; - sna->render_state.gen6.drawrect_limit = -1; - sna->render_state.gen6.surface_table = -1; - - sna->render.vertex_offset = 0; - sna->render.nvertex_reloc = 0; - sna->render.vb_id = 0; -} - -static void gen6_render_fini(struct sna *sna) -{ - kgem_bo_destroy(&sna->kgem, sna->render_state.gen6.general_bo); -} - -static bool is_gt2(struct sna *sna) -{ - return DEVICE_ID(sna->PciInfo) & 0x30; -} - -static bool is_mobile(struct sna *sna) -{ - return (DEVICE_ID(sna->PciInfo) & 0xf) == 0x6; -} - -static bool gen6_render_setup(struct sna *sna) -{ - struct gen6_render_state *state = &sna->render_state.gen6; - struct sna_static_stream general; - struct gen6_sampler_state *ss; - int i, j, k, l, m; - - state->info = >1_info; - if (is_gt2(sna)) - state->info = >2_info; /* XXX requires GT_MODE WiZ disabled */ - - sna_static_stream_init(&general); - - /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer - * dumps, you know it points to zero. - */ - null_create(&general); - scratch_create(&general); - - for (m = 0; m < GEN6_KERNEL_COUNT; m++) { - if (wm_kernels[m].size) { - state->wm_kernel[m][1] = - sna_static_stream_add(&general, - wm_kernels[m].data, - wm_kernels[m].size, - 64); - } else { - if (USE_8_PIXEL_DISPATCH) { - state->wm_kernel[m][0] = - sna_static_stream_compile_wm(sna, &general, - wm_kernels[m].data, 8); - } - - if (USE_16_PIXEL_DISPATCH) { - state->wm_kernel[m][1] = - sna_static_stream_compile_wm(sna, &general, - wm_kernels[m].data, 16); - } - - if (USE_32_PIXEL_DISPATCH) { - state->wm_kernel[m][2] = - sna_static_stream_compile_wm(sna, &general, - wm_kernels[m].data, 32); - } - } - if ((state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]) == 0) { - state->wm_kernel[m][1] = - sna_static_stream_compile_wm(sna, &general, - wm_kernels[m].data, 16); - } - } - - ss = sna_static_stream_map(&general, - 2 * sizeof(*ss) * - (2 + - FILTER_COUNT * EXTEND_COUNT * - FILTER_COUNT * EXTEND_COUNT), - 32); - state->wm_state = sna_static_stream_offsetof(&general, ss); - sampler_copy_init(ss); ss += 2; - sampler_fill_init(ss); ss += 2; - for (i = 0; i < FILTER_COUNT; i++) { - for (j = 0; j < EXTEND_COUNT; j++) { - for (k = 0; k < FILTER_COUNT; k++) { - for (l = 0; l < EXTEND_COUNT; l++) { - sampler_state_init(ss++, i, j); - sampler_state_init(ss++, k, l); - } - } - } - } - - state->cc_blend = gen6_composite_create_blend_state(&general); - - state->general_bo = sna_static_stream_fini(sna, &general); - return state->general_bo != NULL; -} - -bool gen6_render_init(struct sna *sna) -{ - if (!gen6_render_setup(sna)) - return false; - - sna->kgem.context_switch = gen6_render_context_switch; - sna->kgem.retire = gen6_render_retire; - sna->kgem.expire = gen6_render_expire; - -// sna->render.composite = gen6_render_composite; -// sna->render.video = gen6_render_video; - -// sna->render.copy_boxes = gen6_render_copy_boxes; - - sna->render.blit_tex = gen6_blit_tex; - -// sna->render.copy = gen6_render_copy; - -// sna->render.fill_boxes = gen6_render_fill_boxes; -// sna->render.fill = gen6_render_fill; -// sna->render.fill_one = gen6_render_fill_one; -// sna->render.clear = gen6_render_clear; - - sna->render.flush = gen6_render_flush; - sna->render.reset = gen6_render_reset; - sna->render.fini = gen6_render_fini; - - sna->render.max_3d_size = GEN6_MAX_SIZE; - sna->render.max_3d_pitch = 1 << 18; - sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT; - - return true; -} - - +/* + * Copyright © 2006,2008,2011 Intel Corporation + * Copyright © 2007 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Wang Zhenyu + * Eric Anholt + * Carl Worth + * Keith Packard + * Chris Wilson + * + */ + + +#include "sna.h" +#include "sna_reg.h" +#include "sna_render.h" +#include "sna_render_inline.h" +//#include "sna_video.h" + +#include "brw/brw.h" +#include "gen6_render.h" +#include "gen4_source.h" +#include "gen4_vertex.h" + +#define NO_COMPOSITE 0 +#define NO_COMPOSITE_SPANS 0 +#define NO_COPY 0 +#define NO_COPY_BOXES 0 +#define NO_FILL 0 +#define NO_FILL_BOXES 0 +#define NO_FILL_ONE 0 +#define NO_FILL_CLEAR 0 + +#define NO_RING_SWITCH 0 +#define PREFER_RENDER 0 + +#define USE_8_PIXEL_DISPATCH 1 +#define USE_16_PIXEL_DISPATCH 1 +#define USE_32_PIXEL_DISPATCH 0 + +#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH +#error "Must select at least 8, 16 or 32 pixel dispatch" +#endif + +#define GEN6_MAX_SIZE 8192 + +struct gt_info { + const char *name; + int max_vs_threads; + int max_gs_threads; + int max_wm_threads; + struct { + int size; + int max_vs_entries; + int max_gs_entries; + } urb; +}; + +static const struct gt_info gt1_info = { + .name = "Sandybridge (gen6, gt1)", + .max_vs_threads = 24, + .max_gs_threads = 21, + .max_wm_threads = 40, + .urb = { 32, 256, 256 }, +}; + +static const struct gt_info gt2_info = { + .name = "Sandybridge (gen6, gt2)", + .max_vs_threads = 60, + .max_gs_threads = 60, + .max_wm_threads = 80, + .urb = { 64, 256, 256 }, +}; + +static const uint32_t ps_kernel_packed[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_yuv_rgb.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_planar[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_planar.g6b" +#include "exa_wm_yuv_rgb.g6b" +#include "exa_wm_write.g6b" +}; + +#define NOKERNEL(kernel_enum, func, ns) \ + [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, func, 0, ns} +#define KERNEL(kernel_enum, kernel, ns) \ + [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), ns} + +static const struct wm_kernel_info { + const char *name; + const void *data; + unsigned int size; + unsigned int num_surfaces; +} wm_kernels[] = { + NOKERNEL(NOMASK, brw_wm_kernel__affine, 2), + NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2), + + NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3), + NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3), + + NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3), + NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3), + + NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3), + NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3), + + NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2), + NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2), + + KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), + KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), +}; +#undef KERNEL + +static const struct blendinfo { + bool src_alpha; + uint32_t src_blend; + uint32_t dst_blend; +} gen6_blend_op[] = { + /* Clear */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO}, + /* Src */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO}, + /* Dst */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ONE}, + /* Over */ {1, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, + /* OverReverse */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ONE}, + /* In */ {0, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_ZERO}, + /* InReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_SRC_ALPHA}, + /* Out */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ZERO}, + /* OutReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, + /* Atop */ {1, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, + /* AtopReverse */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_SRC_ALPHA}, + /* Xor */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, + /* Add */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ONE}, +}; + +/** + * Highest-valued BLENDFACTOR used in gen6_blend_op. + * + * This leaves out GEN6_BLENDFACTOR_INV_DST_COLOR, + * GEN6_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, + * GEN6_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} + */ +#define GEN6_BLENDFACTOR_COUNT (GEN6_BLENDFACTOR_INV_DST_ALPHA + 1) + +#define GEN6_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen6_blend_state), 64) + +#define BLEND_OFFSET(s, d) \ + (((s) * GEN6_BLENDFACTOR_COUNT + (d)) * GEN6_BLEND_STATE_PADDED_SIZE) + +#define NO_BLEND BLEND_OFFSET(GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO) +#define CLEAR BLEND_OFFSET(GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO) + +#define SAMPLER_OFFSET(sf, se, mf, me) \ + (((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me) + 2) * 2 * sizeof(struct gen6_sampler_state)) + +#define VERTEX_2s2s 0 + +#define COPY_SAMPLER 0 +#define COPY_VERTEX VERTEX_2s2s +#define COPY_FLAGS(a) GEN6_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN6_WM_KERNEL_NOMASK, COPY_VERTEX) + +#define FILL_SAMPLER (2 * sizeof(struct gen6_sampler_state)) +#define FILL_VERTEX VERTEX_2s2s +#define FILL_FLAGS(op, format) GEN6_SET_FLAGS(FILL_SAMPLER, gen6_get_blend((op), false, (format)), GEN6_WM_KERNEL_NOMASK, FILL_VERTEX) +#define FILL_FLAGS_NOBLEND GEN6_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN6_WM_KERNEL_NOMASK, FILL_VERTEX) + +#define GEN6_SAMPLER(f) (((f) >> 16) & 0xfff0) +#define GEN6_BLEND(f) (((f) >> 0) & 0xfff0) +#define GEN6_KERNEL(f) (((f) >> 16) & 0xf) +#define GEN6_VERTEX(f) (((f) >> 0) & 0xf) +#define GEN6_SET_FLAGS(S, B, K, V) (((S) | (K)) << 16 | ((B) | (V))) + +#define OUT_BATCH(v) batch_emit(sna, v) +#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) +#define OUT_VERTEX_F(v) vertex_emit(sna, v) + +static inline bool too_large(int width, int height) +{ + return width > GEN6_MAX_SIZE || height > GEN6_MAX_SIZE; +} + +static uint32_t gen6_get_blend(int op, + bool has_component_alpha, + uint32_t dst_format) +{ + uint32_t src, dst; + + + src = GEN6_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend; + dst = GEN6_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend; + +// dst = GEN6_BLENDFACTOR_ZERO; //gen6_blend_op[op].dst_blend; + +#if 0 + /* If there's no dst alpha channel, adjust the blend op so that + * we'll treat it always as 1. + */ + if (PICT_FORMAT_A(dst_format) == 0) { + if (src == GEN6_BLENDFACTOR_DST_ALPHA) + src = GEN6_BLENDFACTOR_ONE; + else if (src == GEN6_BLENDFACTOR_INV_DST_ALPHA) + src = GEN6_BLENDFACTOR_ZERO; + } + + /* If the source alpha is being used, then we should only be in a + * case where the source blend factor is 0, and the source blend + * value is the mask channels multiplied by the source picture's alpha. + */ + if (has_component_alpha && gen6_blend_op[op].src_alpha) { + if (dst == GEN6_BLENDFACTOR_SRC_ALPHA) + dst = GEN6_BLENDFACTOR_SRC_COLOR; + else if (dst == GEN6_BLENDFACTOR_INV_SRC_ALPHA) + dst = GEN6_BLENDFACTOR_INV_SRC_COLOR; + } + + DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", + op, dst_format, PICT_FORMAT_A(dst_format), + src, dst, (int)BLEND_OFFSET(src, dst))); +#endif + + return BLEND_OFFSET(src, dst); +} + +static uint32_t gen6_get_card_format(PictFormat format) +{ + switch (format) { + default: + return -1; + case PICT_a8r8g8b8: + return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; + case PICT_x8r8g8b8: + return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM; + case PICT_a8b8g8r8: + return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM; + case PICT_x8b8g8r8: + return GEN6_SURFACEFORMAT_R8G8B8X8_UNORM; + case PICT_a2r10g10b10: + return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM; + case PICT_x2r10g10b10: + return GEN6_SURFACEFORMAT_B10G10R10X2_UNORM; + case PICT_r8g8b8: + return GEN6_SURFACEFORMAT_R8G8B8_UNORM; + case PICT_r5g6b5: + return GEN6_SURFACEFORMAT_B5G6R5_UNORM; + case PICT_a1r5g5b5: + return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM; + case PICT_a8: + return GEN6_SURFACEFORMAT_A8_UNORM; + case PICT_a4r4g4b4: + return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM; + } +} + +static uint32_t gen6_get_dest_format(PictFormat format) +{ + return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; + +#if 0 + + switch (format) { + default: + return -1; + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; + case PICT_a8b8g8r8: + case PICT_x8b8g8r8: + return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM; + case PICT_a2r10g10b10: + case PICT_x2r10g10b10: + return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM; + case PICT_r5g6b5: + return GEN6_SURFACEFORMAT_B5G6R5_UNORM; + case PICT_x1r5g5b5: + case PICT_a1r5g5b5: + return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM; + case PICT_a8: + return GEN6_SURFACEFORMAT_A8_UNORM; + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM; + } +#endif + +} + +#if 0 + +static bool gen6_check_dst_format(PictFormat format) +{ + if (gen6_get_dest_format(format) != -1) + return true; + + DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); + return false; +} + +static bool gen6_check_format(uint32_t format) +{ + if (gen6_get_card_format(format) != -1) + return true; + + DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format)); + return false; +} + +static uint32_t gen6_filter(uint32_t filter) +{ + switch (filter) { + default: + assert(0); + case PictFilterNearest: + return SAMPLER_FILTER_NEAREST; + case PictFilterBilinear: + return SAMPLER_FILTER_BILINEAR; + } +} + +static uint32_t gen6_check_filter(PicturePtr picture) +{ + switch (picture->filter) { + case PictFilterNearest: + case PictFilterBilinear: + return true; + default: + return false; + } +} + +static uint32_t gen6_repeat(uint32_t repeat) +{ + switch (repeat) { + default: + assert(0); + case RepeatNone: + return SAMPLER_EXTEND_NONE; + case RepeatNormal: + return SAMPLER_EXTEND_REPEAT; + case RepeatPad: + return SAMPLER_EXTEND_PAD; + case RepeatReflect: + return SAMPLER_EXTEND_REFLECT; + } +} + +static bool gen6_check_repeat(PicturePtr picture) +{ + if (!picture->repeat) + return true; + + switch (picture->repeatType) { + case RepeatNone: + case RepeatNormal: + case RepeatPad: + case RepeatReflect: + return true; + default: + return false; + } +} +#endif + +static int +gen6_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine) +{ + int base; + + if (has_mask) { + if (is_ca) { + if (gen6_blend_op[op].src_alpha) + base = GEN6_WM_KERNEL_MASKSA; + else + base = GEN6_WM_KERNEL_MASKCA; + } else + base = GEN6_WM_KERNEL_MASK; + } else + base = GEN6_WM_KERNEL_NOMASK; + + return base + !is_affine; +} + +static void +gen6_emit_urb(struct sna *sna) +{ + OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2)); + OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) | + (sna->render_state.gen6.info->urb.max_vs_entries << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */ + OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) | + (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */ +} + +static void +gen6_emit_state_base_address(struct sna *sna) +{ + OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (10 - 2)); + OUT_BATCH(0); /* general */ + OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */ + sna->kgem.nbatch, + NULL, + I915_GEM_DOMAIN_INSTRUCTION << 16, + BASE_ADDRESS_MODIFY)); + OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */ + sna->kgem.nbatch, + sna->render_state.gen6.general_bo, + I915_GEM_DOMAIN_INSTRUCTION << 16, + BASE_ADDRESS_MODIFY)); + OUT_BATCH(0); /* indirect */ + OUT_BATCH(kgem_add_reloc(&sna->kgem, + sna->kgem.nbatch, + sna->render_state.gen6.general_bo, + I915_GEM_DOMAIN_INSTRUCTION << 16, + BASE_ADDRESS_MODIFY)); + + /* upper bounds, disable */ + OUT_BATCH(0); + OUT_BATCH(BASE_ADDRESS_MODIFY); + OUT_BATCH(0); + OUT_BATCH(BASE_ADDRESS_MODIFY); +} + +static void +gen6_emit_viewports(struct sna *sna) +{ + OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS | + GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC | + (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); +} + +static void +gen6_emit_vs(struct sna *sna) +{ + /* disable VS constant buffer */ + OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2)); + OUT_BATCH(0); /* no VS kernel */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* pass-through */ +} + +static void +gen6_emit_gs(struct sna *sna) +{ + /* disable GS constant buffer */ + OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2)); + OUT_BATCH(0); /* no GS kernel */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* pass-through */ +} + +static void +gen6_emit_clip(struct sna *sna) +{ + OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); /* pass-through */ + OUT_BATCH(0); +} + +static void +gen6_emit_wm_constants(struct sna *sna) +{ + /* disable WM constant buffer */ + OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); +} + +static void +gen6_emit_null_depth_buffer(struct sna *sna) +{ + OUT_BATCH(GEN6_3DSTATE_DEPTH_BUFFER | (7 - 2)); + OUT_BATCH(GEN6_SURFACE_NULL << GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT | + GEN6_DEPTHFORMAT_D32_FLOAT << GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN6_3DSTATE_CLEAR_PARAMS | (2 - 2)); + OUT_BATCH(0); +} + +static void +gen6_emit_invariant(struct sna *sna) +{ + OUT_BATCH(GEN6_PIPELINE_SELECT | PIPELINE_SELECT_3D); + + OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2)); + OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | + GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ + OUT_BATCH(0); + + OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2)); + OUT_BATCH(1); + + gen6_emit_urb(sna); + + gen6_emit_state_base_address(sna); + + gen6_emit_viewports(sna); + gen6_emit_vs(sna); + gen6_emit_gs(sna); + gen6_emit_clip(sna); + gen6_emit_wm_constants(sna); + gen6_emit_null_depth_buffer(sna); + + sna->render_state.gen6.needs_invariant = false; +} + +static bool +gen6_emit_cc(struct sna *sna, int blend) +{ + struct gen6_render_state *render = &sna->render_state.gen6; + + if (render->blend == blend) + return blend != NO_BLEND; + + DBG(("%s: blend = %x\n", __FUNCTION__, blend)); + + OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2)); + OUT_BATCH((render->cc_blend + blend) | 1); + if (render->blend == (unsigned)-1) { + OUT_BATCH(1); + OUT_BATCH(1); + } else { + OUT_BATCH(0); + OUT_BATCH(0); + } + + render->blend = blend; + return blend != NO_BLEND; +} + +static void +gen6_emit_sampler(struct sna *sna, uint32_t state) +{ + if (sna->render_state.gen6.samplers == state) + return; + + sna->render_state.gen6.samplers = state; + + DBG(("%s: sampler = %x\n", __FUNCTION__, state)); + + OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS | + GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS | + (4 - 2)); + OUT_BATCH(0); /* VS */ + OUT_BATCH(0); /* GS */ + OUT_BATCH(sna->render_state.gen6.wm_state + state); +} + +static void +gen6_emit_sf(struct sna *sna, bool has_mask) +{ + int num_sf_outputs = has_mask ? 2 : 1; + + if (sna->render_state.gen6.num_sf_outputs == num_sf_outputs) + return; + + DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n", + __FUNCTION__, num_sf_outputs, 1, 0)); + + sna->render_state.gen6.num_sf_outputs = num_sf_outputs; + + OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2)); + OUT_BATCH(num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT | + 1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT | + 1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT); + OUT_BATCH(0); + OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE); + OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* DW9 */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* DW14 */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* DW19 */ +} + +static void +gen6_emit_wm(struct sna *sna, unsigned int kernel, bool has_mask) +{ + const uint32_t *kernels; + + if (sna->render_state.gen6.kernel == kernel) + return; + + sna->render_state.gen6.kernel = kernel; + kernels = sna->render_state.gen6.wm_kernel[kernel]; + + DBG(("%s: switching to %s, num_surfaces=%d (8-pixel? %d, 16-pixel? %d,32-pixel? %d)\n", + __FUNCTION__, + wm_kernels[kernel].name, wm_kernels[kernel].num_surfaces, + kernels[0], kernels[1], kernels[2])); + + OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2)); + OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]); + OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT | + wm_kernels[kernel].num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT); + OUT_BATCH(0); /* scratch space */ + OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT | + 8 << GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT | + 6 << GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT); + OUT_BATCH((sna->render_state.gen6.info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT | + (kernels[0] ? GEN6_3DSTATE_WM_8_DISPATCH_ENABLE : 0) | + (kernels[1] ? GEN6_3DSTATE_WM_16_DISPATCH_ENABLE : 0) | + (kernels[2] ? GEN6_3DSTATE_WM_32_DISPATCH_ENABLE : 0) | + GEN6_3DSTATE_WM_DISPATCH_ENABLE); + OUT_BATCH((1 + has_mask) << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT | + GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); + OUT_BATCH(kernels[2]); + OUT_BATCH(kernels[1]); +} + +static bool +gen6_emit_binding_table(struct sna *sna, uint16_t offset) +{ + if (sna->render_state.gen6.surface_table == offset) + return false; + + /* Binding table pointers */ + OUT_BATCH(GEN6_3DSTATE_BINDING_TABLE_POINTERS | + GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS | + (4 - 2)); + OUT_BATCH(0); /* vs */ + OUT_BATCH(0); /* gs */ + /* Only the PS uses the binding table */ + OUT_BATCH(offset*4); + + sna->render_state.gen6.surface_table = offset; + return true; +} + +static bool +gen6_emit_drawing_rectangle(struct sna *sna, + const struct sna_composite_op *op) +{ + uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); + uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; + + assert(!too_large(op->dst.x, op->dst.y)); + assert(!too_large(op->dst.width, op->dst.height)); + + if (sna->render_state.gen6.drawrect_limit == limit && + sna->render_state.gen6.drawrect_offset == offset) + return false; + + /* [DevSNB-C+{W/A}] Before any depth stall flush (including those + * produced by non-pipelined state commands), software needs to first + * send a PIPE_CONTROL with no bits set except Post-Sync Operation != + * 0. + * + * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent + * BEFORE the pipe-control with a post-sync op and no write-cache + * flushes. + */ + if (!sna->render_state.gen6.first_state_packet) { + OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL | + GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD); + OUT_BATCH(0); + OUT_BATCH(0); + } + + OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(GEN6_PIPE_CONTROL_WRITE_TIME); + OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, + sna->render_state.gen6.general_bo, + I915_GEM_DOMAIN_INSTRUCTION << 16 | + I915_GEM_DOMAIN_INSTRUCTION, + 64)); + OUT_BATCH(0); + + OUT_BATCH(GEN6_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(limit); + OUT_BATCH(offset); + + sna->render_state.gen6.drawrect_offset = offset; + sna->render_state.gen6.drawrect_limit = limit; + return true; +} + +static void +gen6_emit_vertex_elements(struct sna *sna, + const struct sna_composite_op *op) +{ + /* + * vertex data in vertex buffer + * position: (x, y) + * texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0) + * texture coordinate 1 if (has_mask is true): same as above + */ + struct gen6_render_state *render = &sna->render_state.gen6; + uint32_t src_format, dw; + int id = GEN6_VERTEX(op->u.gen6.flags); + bool has_mask; + + DBG(("%s: setup id=%d\n", __FUNCTION__, id)); + + if (render->ve_id == id) + return; + render->ve_id = id; + + /* The VUE layout + * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) + * dword 4-7: position (x, y, 1.0, 1.0), + * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) + * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) + * + * dword 4-15 are fetched from vertex buffer + */ + has_mask = (id >> 2) != 0; + OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS | + ((2 * (3 + has_mask)) + 1 - 2)); + + OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | + GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT | + 0 << VE0_OFFSET_SHIFT); + OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT | + GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT | + GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | + GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT); + + /* x,y */ + OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | + GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | + 0 << VE0_OFFSET_SHIFT); + OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | + GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | + GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | + GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT); + + /* u0, v0, w0 */ + DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3)); + dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; + switch (id & 3) { + default: + assert(0); + case 0: + src_format = GEN6_SURFACEFORMAT_R16G16_SSCALED; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; + break; + case 1: + src_format = GEN6_SURFACEFORMAT_R32_FLOAT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; + break; + case 2: + src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; + break; + case 3: + src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; + break; + } + OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | + src_format << VE0_FORMAT_SHIFT | + 4 << VE0_OFFSET_SHIFT); + OUT_BATCH(dw); + + /* u1, v1, w1 */ + if (has_mask) { + unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float); + DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset)); + dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; + switch (id >> 2) { + case 1: + src_format = GEN6_SURFACEFORMAT_R32_FLOAT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; + break; + default: + assert(0); + case 2: + src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; + break; + case 3: + src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; + break; + } + OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | + src_format << VE0_FORMAT_SHIFT | + offset << VE0_OFFSET_SHIFT); + OUT_BATCH(dw); + } +} + +static void +gen6_emit_flush(struct sna *sna) +{ + OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH | + GEN6_PIPE_CONTROL_TC_FLUSH | + GEN6_PIPE_CONTROL_CS_STALL); + OUT_BATCH(0); + OUT_BATCH(0); +} + +static void +gen6_emit_state(struct sna *sna, + const struct sna_composite_op *op, + uint16_t wm_binding_table) +{ + bool need_stall = wm_binding_table & 1; + + assert(op->dst.bo->exec); + + if (gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags))) + need_stall = false; + gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags)); + gen6_emit_sf(sna, GEN6_VERTEX(op->u.gen6.flags) >> 2); + gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags), GEN6_VERTEX(op->u.gen6.flags) >> 2); + gen6_emit_vertex_elements(sna, op); + + need_stall |= gen6_emit_binding_table(sna, wm_binding_table & ~1); + if (gen6_emit_drawing_rectangle(sna, op)) + need_stall = false; + if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { + gen6_emit_flush(sna); + kgem_clear_dirty(&sna->kgem); + assert(op->dst.bo->exec); + kgem_bo_mark_dirty(op->dst.bo); + need_stall = false; + } + if (need_stall) { + OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL | + GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD); + OUT_BATCH(0); + OUT_BATCH(0); + } + sna->render_state.gen6.first_state_packet = false; +} + +static bool gen6_magic_ca_pass(struct sna *sna, + const struct sna_composite_op *op) +{ + struct gen6_render_state *state = &sna->render_state.gen6; + + if (!op->need_magic_ca_pass) + return false; + + DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, + sna->render.vertex_start, sna->render.vertex_index)); + + gen6_emit_flush(sna); + + gen6_emit_cc(sna, gen6_get_blend(PictOpAdd, true, op->dst.format)); + gen6_emit_wm(sna, + gen6_choose_composite_kernel(PictOpAdd, + true, true, + op->is_affine), + true); + + OUT_BATCH(GEN6_3DPRIMITIVE | + GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL | + _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT | + 0 << 9 | + 4); + OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); + OUT_BATCH(sna->render.vertex_start); + OUT_BATCH(1); /* single instance */ + OUT_BATCH(0); /* start instance location */ + OUT_BATCH(0); /* index buffer offset, ignored */ + + state->last_primitive = sna->kgem.nbatch; + return true; +} + +typedef struct gen6_surface_state_padded { + struct gen6_surface_state state; + char pad[32 - sizeof(struct gen6_surface_state)]; +} gen6_surface_state_padded; + +static void null_create(struct sna_static_stream *stream) +{ + /* A bunch of zeros useful for legacy border color and depth-stencil */ + sna_static_stream_map(stream, 64, 64); +} + +static void scratch_create(struct sna_static_stream *stream) +{ + /* 64 bytes of scratch space for random writes, such as + * the pipe-control w/a. + */ + sna_static_stream_map(stream, 64, 64); +} + +static void +sampler_state_init(struct gen6_sampler_state *sampler_state, + sampler_filter_t filter, + sampler_extend_t extend) +{ + sampler_state->ss0.lod_preclamp = 1; /* GL mode */ + + /* We use the legacy mode to get the semantics specified by + * the Render extension. */ + sampler_state->ss0.border_color_mode = GEN6_BORDER_COLOR_MODE_LEGACY; + + switch (filter) { + default: + case SAMPLER_FILTER_NEAREST: + sampler_state->ss0.min_filter = GEN6_MAPFILTER_NEAREST; + sampler_state->ss0.mag_filter = GEN6_MAPFILTER_NEAREST; + break; + case SAMPLER_FILTER_BILINEAR: + sampler_state->ss0.min_filter = GEN6_MAPFILTER_LINEAR; + sampler_state->ss0.mag_filter = GEN6_MAPFILTER_LINEAR; + break; + } + + switch (extend) { + default: + case SAMPLER_EXTEND_NONE: + sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; + sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; + sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; + break; + case SAMPLER_EXTEND_REPEAT: + sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_WRAP; + sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_WRAP; + sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_WRAP; + break; + case SAMPLER_EXTEND_PAD: + sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; + sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; + sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; + break; + case SAMPLER_EXTEND_REFLECT: + sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; + sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; + sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; + break; + } +} + +static void +sampler_copy_init(struct gen6_sampler_state *ss) +{ + sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); + ss->ss3.non_normalized_coord = 1; + + sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); +} + +static void +sampler_fill_init(struct gen6_sampler_state *ss) +{ + sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT); + ss->ss3.non_normalized_coord = 1; + + sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE); +} + +static uint32_t +gen6_tiling_bits(uint32_t tiling) +{ + switch (tiling) { + default: assert(0); + case I915_TILING_NONE: return 0; + case I915_TILING_X: return GEN6_SURFACE_TILED; + case I915_TILING_Y: return GEN6_SURFACE_TILED | GEN6_SURFACE_TILED_Y; + } +} + +/** + * Sets up the common fields for a surface state buffer for the given + * picture in the given surface state buffer. + */ +static int +gen6_bind_bo(struct sna *sna, + struct kgem_bo *bo, + uint32_t width, + uint32_t height, + uint32_t format, + bool is_dst) +{ + uint32_t *ss; + uint32_t domains; + uint16_t offset; + uint32_t is_scanout = is_dst && bo->scanout; + + /* After the first bind, we manage the cache domains within the batch */ + offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31); + if (offset) { + DBG(("[%x] bo(handle=%d), format=%d, reuse %s binding\n", + offset, bo->handle, format, + is_dst ? "render" : "sampler")); + if (is_dst) + kgem_bo_mark_dirty(bo); + return offset * sizeof(uint32_t); + } + + offset = sna->kgem.surface -= + sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); + ss = sna->kgem.batch + offset; + ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT | + GEN6_SURFACE_BLEND_ENABLED | + format << GEN6_SURFACE_FORMAT_SHIFT); + if (is_dst) { + ss[0] |= GEN6_SURFACE_RC_READ_WRITE; + domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; + } else + domains = I915_GEM_DOMAIN_SAMPLER << 16; + ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0); + ss[2] = ((width - 1) << GEN6_SURFACE_WIDTH_SHIFT | + (height - 1) << GEN6_SURFACE_HEIGHT_SHIFT); + assert(bo->pitch <= (1 << 18)); + ss[3] = (gen6_tiling_bits(bo->tiling) | + (bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT); + ss[4] = 0; + ss[5] = (is_scanout || bo->io) ? 0 : 3 << 16; + + kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset); + + DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", + offset, bo->handle, ss[1], + format, width, height, bo->pitch, bo->tiling, + domains & 0xffff ? "render" : "sampler")); + + return offset * sizeof(uint32_t); +} + +static void gen6_emit_vertex_buffer(struct sna *sna, + const struct sna_composite_op *op) +{ + int id = GEN6_VERTEX(op->u.gen6.flags); + + OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | 3); + OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA | + 4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT); + sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; + OUT_BATCH(0); + OUT_BATCH(~0); /* max address: disabled */ + OUT_BATCH(0); + + sna->render.vb_id |= 1 << id; +} + +static void gen6_emit_primitive(struct sna *sna) +{ + if (sna->kgem.nbatch == sna->render_state.gen6.last_primitive) { + DBG(("%s: continuing previous primitive, start=%d, index=%d\n", + __FUNCTION__, + sna->render.vertex_start, + sna->render.vertex_index)); + sna->render.vertex_offset = sna->kgem.nbatch - 5; + return; + } + + OUT_BATCH(GEN6_3DPRIMITIVE | + GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL | + _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT | + 0 << 9 | + 4); + sna->render.vertex_offset = sna->kgem.nbatch; + OUT_BATCH(0); /* vertex count, to be filled in later */ + OUT_BATCH(sna->render.vertex_index); + OUT_BATCH(1); /* single instance */ + OUT_BATCH(0); /* start instance location */ + OUT_BATCH(0); /* index buffer offset, ignored */ + sna->render.vertex_start = sna->render.vertex_index; + DBG(("%s: started new primitive: index=%d\n", + __FUNCTION__, sna->render.vertex_start)); + + sna->render_state.gen6.last_primitive = sna->kgem.nbatch; +} + +static bool gen6_rectangle_begin(struct sna *sna, + const struct sna_composite_op *op) +{ + int id = 1 << GEN6_VERTEX(op->u.gen6.flags); + int ndwords; + + if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset) + return true; + + ndwords = op->need_magic_ca_pass ? 60 : 6; + if ((sna->render.vb_id & id) == 0) + ndwords += 5; + if (!kgem_check_batch(&sna->kgem, ndwords)) + return false; + + if ((sna->render.vb_id & id) == 0) + gen6_emit_vertex_buffer(sna, op); + + gen6_emit_primitive(sna); + return true; +} + +static int gen6_get_rectangles__flush(struct sna *sna, + const struct sna_composite_op *op) +{ + /* Preventing discarding new vbo after lock contention */ + if (sna_vertex_wait__locked(&sna->render)) { + int rem = vertex_space(sna); + if (rem > op->floats_per_rect) + return rem; + } + + if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 5)) + return 0; + if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) + return 0; + + if (sna->render.vertex_offset) { + gen4_vertex_flush(sna); + if (gen6_magic_ca_pass(sna, op)) { + gen6_emit_flush(sna); + gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)); + gen6_emit_wm(sna, + GEN6_KERNEL(op->u.gen6.flags), + GEN6_VERTEX(op->u.gen6.flags) >> 2); + } + } + + return gen4_vertex_finish(sna); +} + +inline static int gen6_get_rectangles(struct sna *sna, + const struct sna_composite_op *op, + int want, + void (*emit_state)(struct sna *, const struct sna_composite_op *op)) +{ + int rem; + + assert(want); + +start: + rem = vertex_space(sna); + if (unlikely(rem < op->floats_per_rect)) { + DBG(("flushing vbo for %s: %d < %d\n", + __FUNCTION__, rem, op->floats_per_rect)); + rem = gen6_get_rectangles__flush(sna, op); + if (unlikely(rem == 0)) + goto flush; + } + + if (unlikely(sna->render.vertex_offset == 0)) { + if (!gen6_rectangle_begin(sna, op)) + goto flush; + else + goto start; + } + + assert(rem <= vertex_space(sna)); + assert(op->floats_per_rect <= rem); + if (want > 1 && want * op->floats_per_rect > rem) + want = rem / op->floats_per_rect; + + assert(want > 0); + sna->render.vertex_index += 3*want; + return want; + +flush: + if (sna->render.vertex_offset) { + gen4_vertex_flush(sna); + gen6_magic_ca_pass(sna, op); + } + sna_vertex_wait__locked(&sna->render); + _kgem_submit(&sna->kgem); + emit_state(sna, op); + goto start; +} + +inline static uint32_t *gen6_composite_get_binding_table(struct sna *sna, + uint16_t *offset) +{ + uint32_t *table; + + sna->kgem.surface -= + sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); + /* Clear all surplus entries to zero in case of prefetch */ + table = memset(sna->kgem.batch + sna->kgem.surface, + 0, sizeof(struct gen6_surface_state_padded)); + + DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); + + *offset = sna->kgem.surface; + return table; +} + +static bool +gen6_get_batch(struct sna *sna, const struct sna_composite_op *op) +{ + kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo); + + if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) { + DBG(("%s: flushing batch: %d < %d+%d\n", + __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, + 150, 4*8)); + kgem_submit(&sna->kgem); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + if (sna->render_state.gen6.needs_invariant) + gen6_emit_invariant(sna); + + return kgem_bo_is_dirty(op->dst.bo); +} + +static void gen6_emit_composite_state(struct sna *sna, + const struct sna_composite_op *op) +{ + uint32_t *binding_table; + uint16_t offset; + bool dirty; + + dirty = gen6_get_batch(sna, op); + + binding_table = gen6_composite_get_binding_table(sna, &offset); + + binding_table[0] = + gen6_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen6_get_dest_format(op->dst.format), + true); + binding_table[1] = + gen6_bind_bo(sna, + op->src.bo, op->src.width, op->src.height, + op->src.card_format, + false); + if (op->mask.bo) { + binding_table[2] = + gen6_bind_bo(sna, + op->mask.bo, + op->mask.width, + op->mask.height, + op->mask.card_format, + false); + } + + if (sna->kgem.surface == offset && + *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table && + (op->mask.bo == NULL || + sna->kgem.batch[sna->render_state.gen6.surface_table+2] == binding_table[2])) { + sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); + offset = sna->render_state.gen6.surface_table; + } + + gen6_emit_state(sna, op, offset | dirty); +} + +static void +gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op) +{ + assert (sna->render.vertex_offset == 0); + if (op->floats_per_vertex != sna->render_state.gen6.floats_per_vertex) { + if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect) + gen4_vertex_finish(sna); + + DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n", + sna->render_state.gen6.floats_per_vertex, + op->floats_per_vertex, + sna->render.vertex_index, + (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex)); + sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; + sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex; + sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex; + } + assert((sna->render.vertex_used % op->floats_per_vertex) == 0); +} + +fastcall static void +gen6_render_composite_blt(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state); + op->prim_emit(sna, op, r); +} + +#if 0 +fastcall static void +gen6_render_composite_box(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box) +{ + struct sna_composite_rectangles r; + + gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state); + + DBG((" %s: (%d, %d), (%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, box->x2, box->y2)); + + r.dst.x = box->x1; + r.dst.y = box->y1; + r.width = box->x2 - box->x1; + r.height = box->y2 - box->y1; + r.src = r.mask = r.dst; + + op->prim_emit(sna, op, &r); +} + +static void +gen6_render_composite_boxes__blt(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("composite_boxes(%d)\n", nbox)); + + do { + int nbox_this_time; + + nbox_this_time = gen6_get_rectangles(sna, op, nbox, + gen6_emit_composite_state); + nbox -= nbox_this_time; + + do { + struct sna_composite_rectangles r; + + DBG((" %s: (%d, %d), (%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, box->x2, box->y2)); + + r.dst.x = box->x1; + r.dst.y = box->y1; + r.width = box->x2 - box->x1; + r.height = box->y2 - box->y1; + r.src = r.mask = r.dst; + + op->prim_emit(sna, op, &r); + box++; + } while (--nbox_this_time); + } while (nbox); +} + +static void +gen6_render_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen6_get_rectangles(sna, op, nbox, + gen6_emit_composite_state); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + } while (nbox); +} + +static void +gen6_render_composite_boxes__thread(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen6_get_rectangles(sna, op, nbox, + gen6_emit_composite_state); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} +#endif + +#ifndef MAX +#define MAX(a,b) ((a) > (b) ? (a) : (b)) +#endif + +static uint32_t +gen6_composite_create_blend_state(struct sna_static_stream *stream) +{ + char *base, *ptr; + int src, dst; + + base = sna_static_stream_map(stream, + GEN6_BLENDFACTOR_COUNT * GEN6_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE, + 64); + + ptr = base; + for (src = 0; src < GEN6_BLENDFACTOR_COUNT; src++) { + for (dst= 0; dst < GEN6_BLENDFACTOR_COUNT; dst++) { + struct gen6_blend_state *blend = + (struct gen6_blend_state *)ptr; + + blend->blend0.dest_blend_factor = dst; + blend->blend0.source_blend_factor = src; + blend->blend0.blend_func = GEN6_BLENDFUNCTION_ADD; + blend->blend0.blend_enable = + !(dst == GEN6_BLENDFACTOR_ZERO && src == GEN6_BLENDFACTOR_ONE); + + blend->blend1.post_blend_clamp_enable = 1; + blend->blend1.pre_blend_clamp_enable = 1; + + ptr += GEN6_BLEND_STATE_PADDED_SIZE; + } + } + + return sna_static_stream_offsetof(stream, base); +} + +#if 0 +static uint32_t gen6_bind_video_source(struct sna *sna, + struct kgem_bo *src_bo, + uint32_t src_offset, + int src_width, + int src_height, + int src_pitch, + uint32_t src_surf_format) +{ + struct gen6_surface_state *ss; + + sna->kgem.surface -= sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); + + ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss)); + ss->ss0.surface_type = GEN6_SURFACE_2D; + ss->ss0.surface_format = src_surf_format; + + ss->ss1.base_addr = + kgem_add_reloc(&sna->kgem, + sna->kgem.surface + 1, + src_bo, + I915_GEM_DOMAIN_SAMPLER << 16, + src_offset); + + ss->ss2.width = src_width - 1; + ss->ss2.height = src_height - 1; + ss->ss3.pitch = src_pitch - 1; + + return sna->kgem.surface * sizeof(uint32_t); +} + +static void gen6_emit_video_state(struct sna *sna, + const struct sna_composite_op *op) +{ + struct sna_video_frame *frame = op->priv; + uint32_t src_surf_format; + uint32_t src_surf_base[6]; + int src_width[6]; + int src_height[6]; + int src_pitch[6]; + uint32_t *binding_table; + uint16_t offset; + bool dirty; + int n_src, n; + + dirty = gen6_get_batch(sna, op); + + src_surf_base[0] = 0; + src_surf_base[1] = 0; + src_surf_base[2] = frame->VBufOffset; + src_surf_base[3] = frame->VBufOffset; + src_surf_base[4] = frame->UBufOffset; + src_surf_base[5] = frame->UBufOffset; + + if (is_planar_fourcc(frame->id)) { + src_surf_format = GEN6_SURFACEFORMAT_R8_UNORM; + src_width[1] = src_width[0] = frame->width; + src_height[1] = src_height[0] = frame->height; + src_pitch[1] = src_pitch[0] = frame->pitch[1]; + src_width[4] = src_width[5] = src_width[2] = src_width[3] = + frame->width / 2; + src_height[4] = src_height[5] = src_height[2] = src_height[3] = + frame->height / 2; + src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = + frame->pitch[0]; + n_src = 6; + } else { + if (frame->id == FOURCC_UYVY) + src_surf_format = GEN6_SURFACEFORMAT_YCRCB_SWAPY; + else + src_surf_format = GEN6_SURFACEFORMAT_YCRCB_NORMAL; + + src_width[0] = frame->width; + src_height[0] = frame->height; + src_pitch[0] = frame->pitch[0]; + n_src = 1; + } + + binding_table = gen6_composite_get_binding_table(sna, &offset); + + binding_table[0] = + gen6_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen6_get_dest_format(op->dst.format), + true); + for (n = 0; n < n_src; n++) { + binding_table[1+n] = + gen6_bind_video_source(sna, + frame->bo, + src_surf_base[n], + src_width[n], + src_height[n], + src_pitch[n], + src_surf_format); + } + + gen6_emit_state(sna, op, offset | dirty); +} + +static bool +gen6_render_video(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame, + RegionPtr dstRegion, + PixmapPtr pixmap) +{ + struct sna_composite_op tmp; + int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; + int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; + int src_width = frame->src.x2 - frame->src.x1; + int src_height = frame->src.y2 - frame->src.y1; + float src_offset_x, src_offset_y; + float src_scale_x, src_scale_y; + int nbox, pix_xoff, pix_yoff; + struct sna_pixmap *priv; + unsigned filter; + BoxPtr box; + + DBG(("%s: src=(%d, %d), dst=(%d, %d), %ldx[(%d, %d), (%d, %d)...]\n", + __FUNCTION__, + src_width, src_height, dst_width, dst_height, + (long)REGION_NUM_RECTS(dstRegion), + REGION_EXTENTS(NULL, dstRegion)->x1, + REGION_EXTENTS(NULL, dstRegion)->y1, + REGION_EXTENTS(NULL, dstRegion)->x2, + REGION_EXTENTS(NULL, dstRegion)->y2)); + + priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE); + if (priv == NULL) + return false; + + memset(&tmp, 0, sizeof(tmp)); + + tmp.dst.pixmap = pixmap; + tmp.dst.width = pixmap->drawable.width; + tmp.dst.height = pixmap->drawable.height; + tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); + tmp.dst.bo = priv->gpu_bo; + + tmp.src.bo = frame->bo; + tmp.mask.bo = NULL; + + tmp.floats_per_vertex = 3; + tmp.floats_per_rect = 9; + + if (src_width == dst_width && src_height == dst_height) + filter = SAMPLER_FILTER_NEAREST; + else + filter = SAMPLER_FILTER_BILINEAR; + + tmp.u.gen6.flags = + GEN6_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, + SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), + NO_BLEND, + is_planar_fourcc(frame->id) ? + GEN6_WM_KERNEL_VIDEO_PLANAR : + GEN6_WM_KERNEL_VIDEO_PACKED, + 2); + tmp.priv = frame; + + kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); + if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { + kgem_submit(&sna->kgem); + assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + gen6_emit_video_state(sna, &tmp); + gen6_align_vertex(sna, &tmp); + + /* Set up the offset for translating from the given region (in screen + * coordinates) to the backing pixmap. + */ +#ifdef COMPOSITE + pix_xoff = -pixmap->screen_x + pixmap->drawable.x; + pix_yoff = -pixmap->screen_y + pixmap->drawable.y; +#else + pix_xoff = 0; + pix_yoff = 0; +#endif + + src_scale_x = (float)src_width / dst_width / frame->width; + src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; + + src_scale_y = (float)src_height / dst_height / frame->height; + src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; + + box = REGION_RECTS(dstRegion); + nbox = REGION_NUM_RECTS(dstRegion); + while (nbox--) { + BoxRec r; + + r.x1 = box->x1 + pix_xoff; + r.x2 = box->x2 + pix_xoff; + r.y1 = box->y1 + pix_yoff; + r.y2 = box->y2 + pix_yoff; + + gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state); + + OUT_VERTEX(r.x2, r.y2); + OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); + + OUT_VERTEX(r.x1, r.y2); + OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); + + OUT_VERTEX(r.x1, r.y1); + OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); + + if (!DAMAGE_IS_ALL(priv->gpu_damage)) { + sna_damage_add_box(&priv->gpu_damage, &r); + sna_damage_subtract_box(&priv->cpu_damage, &r); + } + box++; + } + + gen4_vertex_flush(sna); + return true; +} + +static int +gen6_composite_picture(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int x, int y, + int w, int h, + int dst_x, int dst_y, + bool precise) +{ + PixmapPtr pixmap; + uint32_t color; + int16_t dx, dy; + + DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", + __FUNCTION__, x, y, w, h, dst_x, dst_y)); + + channel->is_solid = false; + channel->card_format = -1; + + if (sna_picture_is_solid(picture, &color)) + return gen4_channel_init_solid(sna, channel, color); + + if (picture->pDrawable == NULL) { + int ret; + + if (picture->pSourcePict->type == SourcePictTypeLinear) + return gen4_channel_init_linear(sna, picture, channel, + x, y, + w, h, + dst_x, dst_y); + + DBG(("%s -- fixup, gradient\n", __FUNCTION__)); + ret = -1; + if (!precise) + ret = sna_render_picture_approximate_gradient(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + if (ret == -1) + ret = sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + return ret; + } + + if (picture->alphaMap) { + DBG(("%s -- fixup, alphamap\n", __FUNCTION__)); + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + } + + if (!gen6_check_repeat(picture)) + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + if (!gen6_check_filter(picture)) + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; + channel->filter = picture->filter; + + pixmap = get_drawable_pixmap(picture->pDrawable); + get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); + + x += dx + picture->pDrawable->x; + y += dy + picture->pDrawable->y; + + channel->is_affine = sna_transform_is_affine(picture->transform); + if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) { + DBG(("%s: integer translation (%d, %d), removing\n", + __FUNCTION__, dx, dy)); + x += dx; + y += dy; + channel->transform = NULL; + channel->filter = PictFilterNearest; + } else + channel->transform = picture->transform; + + channel->pict_format = picture->format; + channel->card_format = gen6_get_card_format(picture->format); + if (channel->card_format == (unsigned)-1) + return sna_render_picture_convert(sna, picture, channel, pixmap, + x, y, w, h, dst_x, dst_y, + false); + + if (too_large(pixmap->drawable.width, pixmap->drawable.height)) { + DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, + pixmap->drawable.width, pixmap->drawable.height)); + return sna_render_picture_extract(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + } + + return sna_render_pixmap_bo(sna, channel, pixmap, + x, y, w, h, dst_x, dst_y); +} + +inline static void gen6_composite_channel_convert(struct sna_composite_channel *channel) +{ + channel->repeat = gen6_repeat(channel->repeat); + channel->filter = gen6_filter(channel->filter); + if (channel->card_format == (unsigned)-1) + channel->card_format = gen6_get_card_format(channel->pict_format); + assert(channel->card_format != (unsigned)-1); +} +#endif + +static void gen6_render_composite_done(struct sna *sna, + const struct sna_composite_op *op) +{ + DBG(("%s\n", __FUNCTION__)); + + assert(!sna->render.active); + if (sna->render.vertex_offset) { + gen4_vertex_flush(sna); + gen6_magic_ca_pass(sna, op); + } + + +} + +#if 0 +static bool +gen6_composite_set_target(struct sna *sna, + struct sna_composite_op *op, + PicturePtr dst, + int x, int y, int w, int h, + bool partial) +{ + BoxRec box; + + op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); + op->dst.format = dst->format; + op->dst.width = op->dst.pixmap->drawable.width; + op->dst.height = op->dst.pixmap->drawable.height; + + if (w && h) { + box.x1 = x; + box.y1 = y; + box.x2 = x + w; + box.y2 = y + h; + } else + sna_render_picture_extents(dst, &box); + +// op->dst.bo = sna_drawable_use_bo (dst->pDrawable, +// PREFER_GPU | FORCE_GPU | RENDER_GPU, +// &box, &op->damage); + if (op->dst.bo == NULL) + return false; + + get_drawable_deltas(dst->pDrawable, op->dst.pixmap, + &op->dst.x, &op->dst.y); + + DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n", + __FUNCTION__, + op->dst.pixmap, (int)op->dst.format, + op->dst.width, op->dst.height, + op->dst.bo->pitch, + op->dst.x, op->dst.y, + op->damage ? *op->damage : (void *)-1)); + + assert(op->dst.bo->proxy == NULL); + + if (too_large(op->dst.width, op->dst.height) && + !sna_render_composite_redirect(sna, op, x, y, w, h)) + return false; + + return true; +} + +static bool +prefer_blt_composite(struct sna *sna, struct sna_composite_op *tmp) +{ + if (untiled_tlb_miss(tmp->dst.bo) || + untiled_tlb_miss(tmp->src.bo)) + return true; + + if (kgem_bo_is_render(tmp->dst.bo) || + kgem_bo_is_render(tmp->src.bo)) + return false; + + if (!prefer_blt_ring(sna, tmp->dst.bo, 0)) + return false; + + return prefer_blt_bo(sna, tmp->dst.bo) || prefer_blt_bo(sna, tmp->src.bo); +} + +static bool +gen6_render_composite(struct sna *sna, + uint8_t op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t msk_x, int16_t msk_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_op *tmp) +{ + if (op >= ARRAY_SIZE(gen6_blend_op)) + return false; + + DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, + width, height, sna->kgem.ring)); + + if (mask == NULL && + try_blt(sna, dst, src, width, height) && + sna_blt_composite(sna, op, + src, dst, + src_x, src_y, + dst_x, dst_y, + width, height, + tmp, false)) + return true; + + if (gen6_composite_fallback(sna, src, mask, dst)) + return false; + + if (need_tiling(sna, width, height)) + return sna_tiling_composite(op, src, mask, dst, + src_x, src_y, + msk_x, msk_y, + dst_x, dst_y, + width, height, + tmp); + + if (op == PictOpClear) + op = PictOpSrc; + tmp->op = op; + if (!gen6_composite_set_target(sna, tmp, dst, + dst_x, dst_y, width, height, + op > PictOpSrc || dst->pCompositeClip->data)) + return false; + + switch (gen6_composite_picture(sna, src, &tmp->src, + src_x, src_y, + width, height, + dst_x, dst_y, + dst->polyMode == PolyModePrecise)) { + case -1: + goto cleanup_dst; + case 0: + if (!gen4_channel_init_solid(sna, &tmp->src, 0)) + goto cleanup_dst; + /* fall through to fixup */ + case 1: + /* Did we just switch rings to prepare the source? */ + if (mask == NULL && + prefer_blt_composite(sna, tmp) && + sna_blt_composite__convert(sna, + dst_x, dst_y, width, height, + tmp)) + return true; + + gen6_composite_channel_convert(&tmp->src); + break; + } + + tmp->is_affine = tmp->src.is_affine; + tmp->has_component_alpha = false; + tmp->need_magic_ca_pass = false; + + tmp->mask.bo = NULL; + tmp->mask.filter = SAMPLER_FILTER_NEAREST; + tmp->mask.repeat = SAMPLER_EXTEND_NONE; + + if (mask) { + if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { + tmp->has_component_alpha = true; + + /* Check if it's component alpha that relies on a source alpha and on + * the source value. We can only get one of those into the single + * source value that we get to blend with. + */ + if (gen6_blend_op[op].src_alpha && + (gen6_blend_op[op].src_blend != GEN6_BLENDFACTOR_ZERO)) { + if (op != PictOpOver) + goto cleanup_src; + + tmp->need_magic_ca_pass = true; + tmp->op = PictOpOutReverse; + } + } + + if (!reuse_source(sna, + src, &tmp->src, src_x, src_y, + mask, &tmp->mask, msk_x, msk_y)) { + switch (gen6_composite_picture(sna, mask, &tmp->mask, + msk_x, msk_y, + width, height, + dst_x, dst_y, + dst->polyMode == PolyModePrecise)) { + case -1: + goto cleanup_src; + case 0: + if (!gen4_channel_init_solid(sna, &tmp->mask, 0)) + goto cleanup_src; + /* fall through to fixup */ + case 1: + gen6_composite_channel_convert(&tmp->mask); + break; + } + } + + tmp->is_affine &= tmp->mask.is_affine; + } + + tmp->u.gen6.flags = + GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, + tmp->src.repeat, + tmp->mask.filter, + tmp->mask.repeat), + gen6_get_blend(tmp->op, + tmp->has_component_alpha, + tmp->dst.format), + gen6_choose_composite_kernel(tmp->op, + tmp->mask.bo != NULL, + tmp->has_component_alpha, + tmp->is_affine), + gen4_choose_composite_emitter(sna, tmp)); + + tmp->blt = gen6_render_composite_blt; + tmp->box = gen6_render_composite_box; + tmp->boxes = gen6_render_composite_boxes__blt; + if (tmp->emit_boxes) { + tmp->boxes = gen6_render_composite_boxes; + tmp->thread_boxes = gen6_render_composite_boxes__thread; + } + tmp->done = gen6_render_composite_done; + + kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); + if (!kgem_check_bo(&sna->kgem, + tmp->dst.bo, tmp->src.bo, tmp->mask.bo, + NULL)) { + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, + tmp->dst.bo, tmp->src.bo, tmp->mask.bo, + NULL)) + goto cleanup_mask; + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + gen6_emit_composite_state(sna, tmp); + gen6_align_vertex(sna, tmp); + return true; + +cleanup_mask: + if (tmp->mask.bo) + kgem_bo_destroy(&sna->kgem, tmp->mask.bo); +cleanup_src: + if (tmp->src.bo) + kgem_bo_destroy(&sna->kgem, tmp->src.bo); +cleanup_dst: + if (tmp->redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp->dst.bo); + return false; +} + +#if !NO_COMPOSITE_SPANS +fastcall static void +gen6_render_composite_spans_box(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, float opacity) +{ + DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", + __FUNCTION__, + op->base.src.offset[0], op->base.src.offset[1], + opacity, + op->base.dst.x, op->base.dst.y, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1)); + + gen6_get_rectangles(sna, &op->base, 1, gen6_emit_composite_state); + op->prim_emit(sna, op, box, opacity); +} + +static void +gen6_render_composite_spans_boxes(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, int nbox, + float opacity) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->base.src.offset[0], op->base.src.offset[1], + opacity, + op->base.dst.x, op->base.dst.y)); + + do { + int nbox_this_time; + + nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, + gen6_emit_composite_state); + nbox -= nbox_this_time; + + do { + DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1)); + + op->prim_emit(sna, op, box++, opacity); + } while (--nbox_this_time); + } while (nbox); +} + +fastcall static void +gen6_render_composite_spans_boxes__thread(struct sna *sna, + const struct sna_composite_spans_op *op, + const struct sna_opacity_box *box, + int nbox) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->base.src.offset[0], op->base.src.offset[1], + op->base.dst.x, op->base.dst.y)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, + gen6_emit_composite_state); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + +fastcall static void +gen6_render_composite_spans_done(struct sna *sna, + const struct sna_composite_spans_op *op) +{ + DBG(("%s()\n", __FUNCTION__)); + assert(!sna->render.active); + + if (sna->render.vertex_offset) + gen4_vertex_flush(sna); + + if (op->base.src.bo) + kgem_bo_destroy(&sna->kgem, op->base.src.bo); + + sna_render_composite_redirect_done(sna, &op->base); +} + +static bool +gen6_check_composite_spans(struct sna *sna, + uint8_t op, PicturePtr src, PicturePtr dst, + int16_t width, int16_t height, + unsigned flags) +{ + DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n", + __FUNCTION__, op, width, height, flags)); + + if (op >= ARRAY_SIZE(gen6_blend_op)) + return false; + + if (gen6_composite_fallback(sna, src, NULL, dst)) { + DBG(("%s: operation would fallback\n", __FUNCTION__)); + return false; + } + + if (need_tiling(sna, width, height) && + !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { + DBG(("%s: fallback, tiled operation not on GPU\n", + __FUNCTION__)); + return false; + } + + if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) { + struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable); + assert(priv); + + if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) + return true; + + if (flags & COMPOSITE_SPANS_INPLACE_HINT) + return false; + + return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo); + } + + return true; +} + +static bool +gen6_render_composite_spans(struct sna *sna, + uint8_t op, + PicturePtr src, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + unsigned flags, + struct sna_composite_spans_op *tmp) +{ + DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__, + width, height, flags, sna->kgem.ring)); + + assert(gen6_check_composite_spans(sna, op, src, dst, width, height, flags)); + + if (need_tiling(sna, width, height)) { + DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", + __FUNCTION__, width, height)); + return sna_tiling_composite_spans(op, src, dst, + src_x, src_y, dst_x, dst_y, + width, height, flags, tmp); + } + + tmp->base.op = op; + if (!gen6_composite_set_target(sna, &tmp->base, dst, + dst_x, dst_y, width, height, true)) + return false; + + switch (gen6_composite_picture(sna, src, &tmp->base.src, + src_x, src_y, + width, height, + dst_x, dst_y, + dst->polyMode == PolyModePrecise)) { + case -1: + goto cleanup_dst; + case 0: + if (!gen4_channel_init_solid(sna, &tmp->base.src, 0)) + goto cleanup_dst; + /* fall through to fixup */ + case 1: + gen6_composite_channel_convert(&tmp->base.src); + break; + } + tmp->base.mask.bo = NULL; + + tmp->base.is_affine = tmp->base.src.is_affine; + tmp->base.need_magic_ca_pass = false; + + tmp->base.u.gen6.flags = + GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter, + tmp->base.src.repeat, + SAMPLER_FILTER_NEAREST, + SAMPLER_EXTEND_PAD), + gen6_get_blend(tmp->base.op, false, tmp->base.dst.format), + GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine, + gen4_choose_spans_emitter(sna, tmp)); + + tmp->box = gen6_render_composite_spans_box; + tmp->boxes = gen6_render_composite_spans_boxes; + if (tmp->emit_boxes) + tmp->thread_boxes = gen6_render_composite_spans_boxes__thread; + tmp->done = gen6_render_composite_spans_done; + + kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo); + if (!kgem_check_bo(&sna->kgem, + tmp->base.dst.bo, tmp->base.src.bo, + NULL)) { + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, + tmp->base.dst.bo, tmp->base.src.bo, + NULL)) + goto cleanup_src; + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + gen6_emit_composite_state(sna, &tmp->base); + gen6_align_vertex(sna, &tmp->base); + return true; + +cleanup_src: + if (tmp->base.src.bo) + kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); +cleanup_dst: + if (tmp->base.redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); + return false; +} +#endif + +static void +gen6_emit_copy_state(struct sna *sna, + const struct sna_composite_op *op) +{ + uint32_t *binding_table; + uint16_t offset; + bool dirty; + + dirty = gen6_get_batch(sna, op); + + binding_table = gen6_composite_get_binding_table(sna, &offset); + + binding_table[0] = + gen6_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen6_get_dest_format(op->dst.format), + true); + binding_table[1] = + gen6_bind_bo(sna, + op->src.bo, op->src.width, op->src.height, + op->src.card_format, + false); + + if (sna->kgem.surface == offset && + *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) { + sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); + offset = sna->render_state.gen6.surface_table; + } + + gen6_emit_state(sna, op, offset | dirty); +} + +static inline bool prefer_blt_copy(struct sna *sna, + struct kgem_bo *src_bo, + struct kgem_bo *dst_bo, + unsigned flags) +{ + if (flags & COPY_SYNC) + return false; + + if (PREFER_RENDER) + return PREFER_RENDER > 0; + + if (sna->kgem.ring == KGEM_BLT) + return true; + + if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags)) + return true; + + if (untiled_tlb_miss(src_bo) || + untiled_tlb_miss(dst_bo)) + return true; + + if (kgem_bo_is_render(dst_bo) || + kgem_bo_is_render(src_bo)) + return false; + + if (!prefer_blt_ring(sna, dst_bo, flags)) + return false; + + return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo); +} + +inline static void boxes_extents(const BoxRec *box, int n, BoxRec *extents) +{ + *extents = box[0]; + while (--n) { + box++; + + if (box->x1 < extents->x1) + extents->x1 = box->x1; + if (box->x2 > extents->x2) + extents->x2 = box->x2; + + if (box->y1 < extents->y1) + extents->y1 = box->y1; + if (box->y2 > extents->y2) + extents->y2 = box->y2; + } +} + +static inline bool +overlaps(struct sna *sna, + struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n, BoxRec *extents) +{ + if (src_bo != dst_bo) + return false; + + boxes_extents(box, n, extents); + return (extents->x2 + src_dx > extents->x1 + dst_dx && + extents->x1 + src_dx < extents->x2 + dst_dx && + extents->y2 + src_dy > extents->y1 + dst_dy && + extents->y1 + src_dy < extents->y2 + dst_dy); +} + +static bool +gen6_render_copy_boxes(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n, unsigned flags) +{ + struct sna_composite_op tmp; + BoxRec extents; + + DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, self-copy=%d, overlaps? %d\n", + __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, + src_bo == dst_bo, + overlaps(sna, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + box, n, &extents))); + + if (prefer_blt_copy(sna, src_bo, dst_bo, flags) && + sna_blt_compare_depth(&src->drawable, &dst->drawable) && + sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->drawable.bitsPerPixel, + box, n)) + return true; + + if (!(alu == GXcopy || alu == GXclear)) { +fallback_blt: + if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) + return false; + + return sna_blt_copy_boxes_fallback(sna, alu, + src, src_bo, src_dx, src_dy, + dst, dst_bo, dst_dx, dst_dy, + box, n); + } + + if (overlaps(sna, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + box, n, &extents)) { + if (too_large(extents.x2-extents.x1, extents.y2-extents.y1)) + goto fallback_blt; + + if (can_switch_to_blt(sna, dst_bo, flags) && + sna_blt_compare_depth(&src->drawable, &dst->drawable) && + sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->drawable.bitsPerPixel, + box, n)) + return true; + + return sna_render_copy_boxes__overlap(sna, alu, + src, src_bo, src_dx, src_dy, + dst, dst_bo, dst_dx, dst_dy, + box, n, &extents); + } + + if (dst->drawable.depth == src->drawable.depth) { + tmp.dst.format = sna_render_format_for_depth(dst->drawable.depth); + tmp.src.pict_format = tmp.dst.format; + } else { + tmp.dst.format = sna_format_for_depth(dst->drawable.depth); + tmp.src.pict_format = sna_format_for_depth(src->drawable.depth); + } + if (!gen6_check_format(tmp.src.pict_format)) + goto fallback_blt; + + tmp.dst.pixmap = dst; + tmp.dst.width = dst->drawable.width; + tmp.dst.height = dst->drawable.height; + tmp.dst.bo = dst_bo; + tmp.dst.x = tmp.dst.y = 0; + tmp.damage = NULL; + + sna_render_composite_redirect_init(&tmp); + if (too_large(tmp.dst.width, tmp.dst.height)) { + int i; + + extents = box[0]; + for (i = 1; i < n; i++) { + if (box[i].x1 < extents.x1) + extents.x1 = box[i].x1; + if (box[i].y1 < extents.y1) + extents.y1 = box[i].y1; + + if (box[i].x2 > extents.x2) + extents.x2 = box[i].x2; + if (box[i].y2 > extents.y2) + extents.y2 = box[i].y2; + } + + if (!sna_render_composite_redirect(sna, &tmp, + extents.x1 + dst_dx, + extents.y1 + dst_dy, + extents.x2 - extents.x1, + extents.y2 - extents.y1, + n > 1)) + goto fallback_tiled; + + dst_dx += tmp.dst.x; + dst_dy += tmp.dst.y; + + tmp.dst.x = tmp.dst.y = 0; + } + + tmp.src.card_format = gen6_get_card_format(tmp.src.pict_format); + if (too_large(src->drawable.width, src->drawable.height)) { + int i; + + extents = box[0]; + for (i = 1; i < n; i++) { + if (box[i].x1 < extents.x1) + extents.x1 = box[i].x1; + if (box[i].y1 < extents.y1) + extents.y1 = box[i].y1; + + if (box[i].x2 > extents.x2) + extents.x2 = box[i].x2; + if (box[i].y2 > extents.y2) + extents.y2 = box[i].y2; + } + + if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, + extents.x1 + src_dx, + extents.y1 + src_dy, + extents.x2 - extents.x1, + extents.y2 - extents.y1)) { + DBG(("%s: unable to extract partial pixmap\n", __FUNCTION__)); + goto fallback_tiled_dst; + } + + src_dx += tmp.src.offset[0]; + src_dy += tmp.src.offset[1]; + } else { + tmp.src.bo = src_bo; + tmp.src.width = src->drawable.width; + tmp.src.height = src->drawable.height; + } + + tmp.mask.bo = NULL; + + tmp.floats_per_vertex = 2; + tmp.floats_per_rect = 6; + tmp.need_magic_ca_pass = 0; + + tmp.u.gen6.flags = COPY_FLAGS(alu); + assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); + assert(GEN6_SAMPLER(tmp.u.gen6.flags) == COPY_SAMPLER); + assert(GEN6_VERTEX(tmp.u.gen6.flags) == COPY_VERTEX); + + kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); + if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { + DBG(("%s: too large for a single operation\n", + __FUNCTION__)); + goto fallback_tiled_src; + } + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + gen6_emit_copy_state(sna, &tmp); + gen6_align_vertex(sna, &tmp); + + do { + int16_t *v; + int n_this_time; + + n_this_time = gen6_get_rectangles(sna, &tmp, n, + gen6_emit_copy_state); + n -= n_this_time; + + v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); + sna->render.vertex_used += 6 * n_this_time; + assert(sna->render.vertex_used <= sna->render.vertex_size); + do { + + DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", + box->x1 + src_dx, box->y1 + src_dy, + box->x1 + dst_dx, box->y1 + dst_dy, + box->x2 - box->x1, box->y2 - box->y1)); + v[0] = box->x2 + dst_dx; + v[2] = box->x2 + src_dx; + v[1] = v[5] = box->y2 + dst_dy; + v[3] = v[7] = box->y2 + src_dy; + v[8] = v[4] = box->x1 + dst_dx; + v[10] = v[6] = box->x1 + src_dx; + v[9] = box->y1 + dst_dy; + v[11] = box->y1 + src_dy; + v += 12; box++; + } while (--n_this_time); + } while (n); + + gen4_vertex_flush(sna); + sna_render_composite_redirect_done(sna, &tmp); + if (tmp.src.bo != src_bo) + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + return true; + +fallback_tiled_src: + if (tmp.src.bo != src_bo) + kgem_bo_destroy(&sna->kgem, tmp.src.bo); +fallback_tiled_dst: + if (tmp.redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp.dst.bo); +fallback_tiled: + if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && + sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->drawable.bitsPerPixel, + box, n)) + return true; + + return sna_tiling_copy_boxes(sna, alu, + src, src_bo, src_dx, src_dy, + dst, dst_bo, dst_dx, dst_dy, + box, n); +} + +static void +gen6_render_copy_blt(struct sna *sna, + const struct sna_copy_op *op, + int16_t sx, int16_t sy, + int16_t w, int16_t h, + int16_t dx, int16_t dy) +{ + int16_t *v; + + gen6_get_rectangles(sna, &op->base, 1, gen6_emit_copy_state); + + v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; + sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = dx+w; v[1] = dy+h; + v[2] = sx+w; v[3] = sy+h; + v[4] = dx; v[5] = dy+h; + v[6] = sx; v[7] = sy+h; + v[8] = dx; v[9] = dy; + v[10] = sx; v[11] = sy; +} + +static void +gen6_render_copy_done(struct sna *sna, const struct sna_copy_op *op) +{ + DBG(("%s()\n", __FUNCTION__)); + + assert(!sna->render.active); + if (sna->render.vertex_offset) + gen4_vertex_flush(sna); +} + +static bool +gen6_render_copy(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr dst, struct kgem_bo *dst_bo, + struct sna_copy_op *op) +{ + DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n", + __FUNCTION__, alu, + src->drawable.width, src->drawable.height, + dst->drawable.width, dst->drawable.height)); + + if (prefer_blt_copy(sna, src_bo, dst_bo, 0) && + sna_blt_compare_depth(&src->drawable, &dst->drawable) && + sna_blt_copy(sna, alu, + src_bo, dst_bo, + dst->drawable.bitsPerPixel, + op)) + return true; + + if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || + too_large(src->drawable.width, src->drawable.height) || + too_large(dst->drawable.width, dst->drawable.height)) { +fallback: + if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) + return false; + + return sna_blt_copy(sna, alu, src_bo, dst_bo, + dst->drawable.bitsPerPixel, + op); + } + + if (dst->drawable.depth == src->drawable.depth) { + op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth); + op->base.src.pict_format = op->base.dst.format; + } else { + op->base.dst.format = sna_format_for_depth(dst->drawable.depth); + op->base.src.pict_format = sna_format_for_depth(src->drawable.depth); + } + if (!gen6_check_format(op->base.src.pict_format)) + goto fallback; + + op->base.dst.pixmap = dst; + op->base.dst.width = dst->drawable.width; + op->base.dst.height = dst->drawable.height; + op->base.dst.bo = dst_bo; + + op->base.src.bo = src_bo; + op->base.src.card_format = + gen6_get_card_format(op->base.src.pict_format); + op->base.src.width = src->drawable.width; + op->base.src.height = src->drawable.height; + + op->base.mask.bo = NULL; + + op->base.floats_per_vertex = 2; + op->base.floats_per_rect = 6; + + op->base.u.gen6.flags = COPY_FLAGS(alu); + assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); + assert(GEN6_SAMPLER(op->base.u.gen6.flags) == COPY_SAMPLER); + assert(GEN6_VERTEX(op->base.u.gen6.flags) == COPY_VERTEX); + + kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); + if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) + goto fallback; + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + gen6_emit_copy_state(sna, &op->base); + gen6_align_vertex(sna, &op->base); + + op->blt = gen6_render_copy_blt; + op->done = gen6_render_copy_done; + return true; +} +#endif + +#if 0 +static void +gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) +{ + uint32_t *binding_table; + uint16_t offset; + bool dirty; + + dirty = gen6_get_batch(sna, op); + + binding_table = gen6_composite_get_binding_table(sna, &offset); + + binding_table[0] = + gen6_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen6_get_dest_format(op->dst.format), + true); + binding_table[1] = + gen6_bind_bo(sna, + op->src.bo, 1, 1, + GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, + false); + + if (sna->kgem.surface == offset && + *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) { + sna->kgem.surface += + sizeof(struct gen6_surface_state_padded)/sizeof(uint32_t); + offset = sna->render_state.gen6.surface_table; + } + + gen6_emit_state(sna, op, offset | dirty); +} + +static inline bool prefer_blt_fill(struct sna *sna, + struct kgem_bo *bo) +{ + if (PREFER_RENDER) + return PREFER_RENDER < 0; + + if (kgem_bo_is_render(bo)) + return false; + + if (untiled_tlb_miss(bo)) + return true; + + if (!prefer_blt_ring(sna, bo, 0)) + return false; + + return prefer_blt_bo(sna, bo); +} + +static bool +gen6_render_fill_boxes(struct sna *sna, + CARD8 op, + PictFormat format, + const xRenderColor *color, + PixmapPtr dst, struct kgem_bo *dst_bo, + const BoxRec *box, int n) +{ + struct sna_composite_op tmp; + uint32_t pixel; + + DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", + __FUNCTION__, op, + color->red, color->green, color->blue, color->alpha, (int)format)); + + if (op >= ARRAY_SIZE(gen6_blend_op)) { + DBG(("%s: fallback due to unhandled blend op: %d\n", + __FUNCTION__, op)); + return false; + } + + if (prefer_blt_fill(sna, dst_bo) || !gen6_check_dst_format(format)) { + uint8_t alu = GXinvalid; + + if (op <= PictOpSrc) { + pixel = 0; + if (op == PictOpClear) + alu = GXclear; + else if (sna_get_pixel_from_rgba(&pixel, + color->red, + color->green, + color->blue, + color->alpha, + format)) + alu = GXcopy; + } + + if (alu != GXinvalid && + sna_blt_fill_boxes(sna, alu, + dst_bo, dst->drawable.bitsPerPixel, + pixel, box, n)) + return true; + + if (!gen6_check_dst_format(format)) + return false; + } + + if (op == PictOpClear) { + pixel = 0; + op = PictOpSrc; + } else if (!sna_get_pixel_from_rgba(&pixel, + color->red, + color->green, + color->blue, + color->alpha, + PICT_a8r8g8b8)) + return false; + + DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", + __FUNCTION__, pixel, n, + box[0].x1, box[0].y1, box[0].x2, box[0].y2)); + + tmp.dst.pixmap = dst; + tmp.dst.width = dst->drawable.width; + tmp.dst.height = dst->drawable.height; + tmp.dst.format = format; + tmp.dst.bo = dst_bo; + tmp.dst.x = tmp.dst.y = 0; + tmp.damage = NULL; + + sna_render_composite_redirect_init(&tmp); + if (too_large(dst->drawable.width, dst->drawable.height)) { + BoxRec extents; + + boxes_extents(box, n, &extents); + if (!sna_render_composite_redirect(sna, &tmp, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1, + n > 1)) + return sna_tiling_fill_boxes(sna, op, format, color, + dst, dst_bo, box, n); + } + + tmp.src.bo = sna_render_get_solid(sna, pixel); + tmp.mask.bo = NULL; + + tmp.floats_per_vertex = 2; + tmp.floats_per_rect = 6; + tmp.need_magic_ca_pass = false; + + tmp.u.gen6.flags = FILL_FLAGS(op, format); + assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); + assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); + assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); + + if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { + kgem_submit(&sna->kgem); + assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); + } + + gen6_emit_fill_state(sna, &tmp); + gen6_align_vertex(sna, &tmp); + + do { + int n_this_time; + int16_t *v; + + n_this_time = gen6_get_rectangles(sna, &tmp, n, + gen6_emit_fill_state); + n -= n_this_time; + + v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); + sna->render.vertex_used += 6 * n_this_time; + assert(sna->render.vertex_used <= sna->render.vertex_size); + do { + DBG((" (%d, %d), (%d, %d)\n", + box->x1, box->y1, box->x2, box->y2)); + + v[0] = box->x2; + v[5] = v[1] = box->y2; + v[8] = v[4] = box->x1; + v[9] = box->y1; + v[2] = v[3] = v[7] = 1; + v[6] = v[10] = v[11] = 0; + v += 12; box++; + } while (--n_this_time); + } while (n); + + gen4_vertex_flush(sna); + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + sna_render_composite_redirect_done(sna, &tmp); + return true; +} + +static void +gen6_render_op_fill_blt(struct sna *sna, + const struct sna_fill_op *op, + int16_t x, int16_t y, int16_t w, int16_t h) +{ + int16_t *v; + + DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); + + gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state); + + v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; + sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = x+w; + v[4] = v[8] = x; + v[1] = v[5] = y+h; + v[9] = y; + + v[2] = v[3] = v[7] = 1; + v[6] = v[10] = v[11] = 0; +} + +fastcall static void +gen6_render_op_fill_box(struct sna *sna, + const struct sna_fill_op *op, + const BoxRec *box) +{ + int16_t *v; + + DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__, + box->x1, box->y1, box->x2, box->y2)); + + gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state); + + v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; + sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = box->x2; + v[8] = v[4] = box->x1; + v[5] = v[1] = box->y2; + v[9] = box->y1; + + v[7] = v[2] = v[3] = 1; + v[6] = v[10] = v[11] = 0; +} + +fastcall static void +gen6_render_op_fill_boxes(struct sna *sna, + const struct sna_fill_op *op, + const BoxRec *box, + int nbox) +{ + DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__, + box->x1, box->y1, box->x2, box->y2, nbox)); + + do { + int nbox_this_time; + int16_t *v; + + nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox, + gen6_emit_fill_state); + nbox -= nbox_this_time; + + v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; + sna->render.vertex_used += 6 * nbox_this_time; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + do { + v[0] = box->x2; + v[8] = v[4] = box->x1; + v[5] = v[1] = box->y2; + v[9] = box->y1; + v[7] = v[2] = v[3] = 1; + v[6] = v[10] = v[11] = 0; + box++; v += 12; + } while (--nbox_this_time); + } while (nbox); +} + +static void +gen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op) +{ + DBG(("%s()\n", __FUNCTION__)); + + assert(!sna->render.active); + if (sna->render.vertex_offset) + gen4_vertex_flush(sna); + kgem_bo_destroy(&sna->kgem, op->base.src.bo); +} + +static bool +gen6_render_fill(struct sna *sna, uint8_t alu, + PixmapPtr dst, struct kgem_bo *dst_bo, + uint32_t color, + struct sna_fill_op *op) +{ + DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color)); + + if (prefer_blt_fill(sna, dst_bo) && + sna_blt_fill(sna, alu, + dst_bo, dst->drawable.bitsPerPixel, + color, + op)) + return true; + + if (!(alu == GXcopy || alu == GXclear) || + too_large(dst->drawable.width, dst->drawable.height)) + return sna_blt_fill(sna, alu, + dst_bo, dst->drawable.bitsPerPixel, + color, + op); + + if (alu == GXclear) + color = 0; + + op->base.dst.pixmap = dst; + op->base.dst.width = dst->drawable.width; + op->base.dst.height = dst->drawable.height; + op->base.dst.format = sna_format_for_depth(dst->drawable.depth); + op->base.dst.bo = dst_bo; + op->base.dst.x = op->base.dst.y = 0; + + op->base.src.bo = + sna_render_get_solid(sna, + sna_rgba_for_color(color, + dst->drawable.depth)); + op->base.mask.bo = NULL; + + op->base.need_magic_ca_pass = false; + op->base.floats_per_vertex = 2; + op->base.floats_per_rect = 6; + + op->base.u.gen6.flags = FILL_FLAGS_NOBLEND; + assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); + assert(GEN6_SAMPLER(op->base.u.gen6.flags) == FILL_SAMPLER); + assert(GEN6_VERTEX(op->base.u.gen6.flags) == FILL_VERTEX); + + if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { + kgem_submit(&sna->kgem); + assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); + } + + gen6_emit_fill_state(sna, &op->base); + gen6_align_vertex(sna, &op->base); + + op->blt = gen6_render_op_fill_blt; + op->box = gen6_render_op_fill_box; + op->boxes = gen6_render_op_fill_boxes; + op->done = gen6_render_op_fill_done; + return true; +} + +static bool +gen6_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, + uint32_t color, + int16_t x1, int16_t y1, int16_t x2, int16_t y2, + uint8_t alu) +{ + BoxRec box; + + box.x1 = x1; + box.y1 = y1; + box.x2 = x2; + box.y2 = y2; + + return sna_blt_fill_boxes(sna, alu, + bo, dst->drawable.bitsPerPixel, + color, &box, 1); +} + +static bool +gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, + uint32_t color, + int16_t x1, int16_t y1, + int16_t x2, int16_t y2, + uint8_t alu) +{ + struct sna_composite_op tmp; + int16_t *v; + + /* Prefer to use the BLT if already engaged */ + if (prefer_blt_fill(sna, bo) && + gen6_render_fill_one_try_blt(sna, dst, bo, color, + x1, y1, x2, y2, alu)) + return true; + + /* Must use the BLT if we can't RENDER... */ + if (!(alu == GXcopy || alu == GXclear) || + too_large(dst->drawable.width, dst->drawable.height)) + return gen6_render_fill_one_try_blt(sna, dst, bo, color, + x1, y1, x2, y2, alu); + + if (alu == GXclear) + color = 0; + + tmp.dst.pixmap = dst; + tmp.dst.width = dst->drawable.width; + tmp.dst.height = dst->drawable.height; + tmp.dst.format = sna_format_for_depth(dst->drawable.depth); + tmp.dst.bo = bo; + tmp.dst.x = tmp.dst.y = 0; + + tmp.src.bo = + sna_render_get_solid(sna, + sna_rgba_for_color(color, + dst->drawable.depth)); + tmp.mask.bo = NULL; + + tmp.floats_per_vertex = 2; + tmp.floats_per_rect = 6; + tmp.need_magic_ca_pass = false; + + tmp.u.gen6.flags = FILL_FLAGS_NOBLEND; + assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); + assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); + assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); + + if (!kgem_check_bo(&sna->kgem, bo, NULL)) { + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, bo, NULL)) { + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + return false; + } + } + + gen6_emit_fill_state(sna, &tmp); + gen6_align_vertex(sna, &tmp); + + gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state); + + DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2)); + + v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; + sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = x2; + v[8] = v[4] = x1; + v[5] = v[1] = y2; + v[9] = y1; + v[7] = v[2] = v[3] = 1; + v[6] = v[10] = v[11] = 0; + + gen4_vertex_flush(sna); + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + + return true; +} + +static bool +gen6_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) +{ + BoxRec box; + + box.x1 = 0; + box.y1 = 0; + box.x2 = dst->drawable.width; + box.y2 = dst->drawable.height; + + return sna_blt_fill_boxes(sna, GXclear, + bo, dst->drawable.bitsPerPixel, + 0, &box, 1); +} + +static bool +gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) +{ + struct sna_composite_op tmp; + int16_t *v; + + DBG(("%s: %dx%d\n", + __FUNCTION__, + dst->drawable.width, + dst->drawable.height)); + + /* Prefer to use the BLT if, and only if, already engaged */ + if (sna->kgem.ring == KGEM_BLT && + gen6_render_clear_try_blt(sna, dst, bo)) + return true; + + /* Must use the BLT if we can't RENDER... */ + if (too_large(dst->drawable.width, dst->drawable.height)) + return gen6_render_clear_try_blt(sna, dst, bo); + + tmp.dst.pixmap = dst; + tmp.dst.width = dst->drawable.width; + tmp.dst.height = dst->drawable.height; + tmp.dst.format = sna_format_for_depth(dst->drawable.depth); + tmp.dst.bo = bo; + tmp.dst.x = tmp.dst.y = 0; + + tmp.src.bo = sna_render_get_solid(sna, 0); + tmp.mask.bo = NULL; + + tmp.floats_per_vertex = 2; + tmp.floats_per_rect = 6; + tmp.need_magic_ca_pass = false; + + tmp.u.gen6.flags = FILL_FLAGS_NOBLEND; + assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK); + assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); + assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); + + if (!kgem_check_bo(&sna->kgem, bo, NULL)) { + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, bo, NULL)) { + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + return false; + } + } + + gen6_emit_fill_state(sna, &tmp); + gen6_align_vertex(sna, &tmp); + + gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state); + + v = (int16_t *)&sna->render.vertices[sna->render.vertex_used]; + sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + v[0] = dst->drawable.width; + v[5] = v[1] = dst->drawable.height; + v[8] = v[4] = 0; + v[9] = 0; + + v[7] = v[2] = v[3] = 1; + v[6] = v[10] = v[11] = 0; + + gen4_vertex_flush(sna); + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + + return true; +} +#endif + +static void gen6_render_flush(struct sna *sna) +{ + gen4_vertex_close(sna); + + assert(sna->render.vb_id == 0); + assert(sna->render.vertex_offset == 0); +} + +static void +gen6_render_context_switch(struct kgem *kgem, + int new_mode) +{ + if (kgem->nbatch) { + DBG(("%s: from %d to %d\n", __FUNCTION__, kgem->mode, new_mode)); + _kgem_submit(kgem); + } + + kgem->ring = new_mode; +} + +static void +gen6_render_retire(struct kgem *kgem) +{ + struct sna *sna; + + if (kgem->ring && (kgem->has_semaphores || !kgem->need_retire)) + kgem->ring = kgem->mode; + + sna = container_of(kgem, struct sna, kgem); + if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) { + DBG(("%s: resetting idle vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); + sna->render.vertex_used = 0; + sna->render.vertex_index = 0; + } +} + +static void +gen6_render_expire(struct kgem *kgem) +{ + struct sna *sna; + + sna = container_of(kgem, struct sna, kgem); + if (sna->render.vbo && !sna->render.vertex_used) { + DBG(("%s: discarding vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); + kgem_bo_destroy(kgem, sna->render.vbo); + assert(!sna->render.active); + sna->render.vbo = NULL; + sna->render.vertices = sna->render.vertex_data; + sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); + sna->render.vertex_used = 0; + sna->render.vertex_index = 0; + } +} + +static void gen6_render_reset(struct sna *sna) +{ + sna->render_state.gen6.needs_invariant = true; + sna->render_state.gen6.first_state_packet = true; + sna->render_state.gen6.ve_id = 3 << 2; + sna->render_state.gen6.last_primitive = -1; + + sna->render_state.gen6.num_sf_outputs = 0; + sna->render_state.gen6.samplers = -1; + sna->render_state.gen6.blend = -1; + sna->render_state.gen6.kernel = -1; + sna->render_state.gen6.drawrect_offset = -1; + sna->render_state.gen6.drawrect_limit = -1; + sna->render_state.gen6.surface_table = -1; + + sna->render.vertex_offset = 0; + sna->render.nvertex_reloc = 0; + sna->render.vb_id = 0; +} + +static void gen6_render_fini(struct sna *sna) +{ + kgem_bo_destroy(&sna->kgem, sna->render_state.gen6.general_bo); +} + +static bool is_gt2(struct sna *sna) +{ + return sna->PciInfo->device_id & 0x30; +} + +static bool is_mobile(struct sna *sna) +{ + return (sna->PciInfo->device_id & 0xf) == 0x6; +} + +static bool gen6_render_setup(struct sna *sna) +{ + struct gen6_render_state *state = &sna->render_state.gen6; + struct sna_static_stream general; + struct gen6_sampler_state *ss; + int i, j, k, l, m; + + state->info = >1_info; + if (is_gt2(sna)) + state->info = >2_info; /* XXX requires GT_MODE WiZ disabled */ + + sna_static_stream_init(&general); + + /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer + * dumps, you know it points to zero. + */ + null_create(&general); + scratch_create(&general); + + for (m = 0; m < GEN6_KERNEL_COUNT; m++) { + if (wm_kernels[m].size) { + state->wm_kernel[m][1] = + sna_static_stream_add(&general, + wm_kernels[m].data, + wm_kernels[m].size, + 64); + } else { + if (USE_8_PIXEL_DISPATCH) { + state->wm_kernel[m][0] = + sna_static_stream_compile_wm(sna, &general, + wm_kernels[m].data, 8); + } + + if (USE_16_PIXEL_DISPATCH) { + state->wm_kernel[m][1] = + sna_static_stream_compile_wm(sna, &general, + wm_kernels[m].data, 16); + } + + if (USE_32_PIXEL_DISPATCH) { + state->wm_kernel[m][2] = + sna_static_stream_compile_wm(sna, &general, + wm_kernels[m].data, 32); + } + } + if ((state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]) == 0) { + state->wm_kernel[m][1] = + sna_static_stream_compile_wm(sna, &general, + wm_kernels[m].data, 16); + } + } + + ss = sna_static_stream_map(&general, + 2 * sizeof(*ss) * + (2 + + FILTER_COUNT * EXTEND_COUNT * + FILTER_COUNT * EXTEND_COUNT), + 32); + state->wm_state = sna_static_stream_offsetof(&general, ss); + sampler_copy_init(ss); ss += 2; + sampler_fill_init(ss); ss += 2; + for (i = 0; i < FILTER_COUNT; i++) { + for (j = 0; j < EXTEND_COUNT; j++) { + for (k = 0; k < FILTER_COUNT; k++) { + for (l = 0; l < EXTEND_COUNT; l++) { + sampler_state_init(ss++, i, j); + sampler_state_init(ss++, k, l); + } + } + } + } + + state->cc_blend = gen6_composite_create_blend_state(&general); + + state->general_bo = sna_static_stream_fini(sna, &general); + return state->general_bo != NULL; +} + +const char *gen6_render_init(struct sna *sna, const char *backend) +{ + if (!gen6_render_setup(sna)) + return backend; + + sna->kgem.context_switch = gen6_render_context_switch; + sna->kgem.retire = gen6_render_retire; + sna->kgem.expire = gen6_render_expire; + +#if 0 +#if !NO_COMPOSITE + sna->render.composite = gen6_render_composite; + sna->render.prefer_gpu |= PREFER_GPU_RENDER; +#endif + +#if !NO_COMPOSITE_SPANS + sna->render.check_composite_spans = gen6_check_composite_spans; + sna->render.composite_spans = gen6_render_composite_spans; + if (is_mobile(sna)) + sna->render.prefer_gpu |= PREFER_GPU_SPANS; +#endif + sna->render.video = gen6_render_video; + +#if !NO_COPY_BOXES + sna->render.copy_boxes = gen6_render_copy_boxes; +#endif +#if !NO_COPY + sna->render.copy = gen6_render_copy; +#endif + +#if !NO_FILL_BOXES + sna->render.fill_boxes = gen6_render_fill_boxes; +#endif +#if !NO_FILL + sna->render.fill = gen6_render_fill; +#endif +#if !NO_FILL_ONE + sna->render.fill_one = gen6_render_fill_one; +#endif +#if !NO_FILL_CLEAR + sna->render.clear = gen6_render_clear; +#endif +#endif + + sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT; + sna->render.blit_tex = gen6_blit_tex; + + sna->render.flush = gen6_render_flush; + sna->render.reset = gen6_render_reset; + sna->render.fini = gen6_render_fini; + + sna->render.max_3d_size = GEN6_MAX_SIZE; + sna->render.max_3d_pitch = 1 << 18; + return sna->render_state.gen6.info->name; +} + +static bool +gen6_blit_tex(struct sna *sna, + uint8_t op, bool scale, + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr mask,struct kgem_bo *mask_bo, + PixmapPtr dst, struct kgem_bo *dst_bo, + int32_t src_x, int32_t src_y, + int32_t msk_x, int32_t msk_y, + int32_t dst_x, int32_t dst_y, + int32_t width, int32_t height, + struct sna_composite_op *tmp) +{ + + DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, + width, height, sna->kgem.ring)); + + tmp->op = PictOpSrc; + + tmp->dst.pixmap = dst; + tmp->dst.bo = dst_bo; + tmp->dst.width = dst->drawable.width; + tmp->dst.height = dst->drawable.height; + tmp->dst.format = PICT_x8r8g8b8; + + + tmp->src.repeat = SAMPLER_EXTEND_NONE; + tmp->src.is_affine = true; + + tmp->src.bo = src_bo; + tmp->src.pict_format = PICT_x8r8g8b8; + tmp->src.card_format = gen6_get_card_format(tmp->src.pict_format); + tmp->src.width = src->drawable.width; + tmp->src.height = src->drawable.height; + + if ( (tmp->src.width == width) && + (tmp->src.height == height) ) + tmp->src.filter = SAMPLER_FILTER_NEAREST; + else + tmp->src.filter = SAMPLER_FILTER_BILINEAR; + + tmp->is_affine = tmp->src.is_affine; + tmp->has_component_alpha = false; + tmp->need_magic_ca_pass = false; + + tmp->mask.repeat = SAMPLER_EXTEND_NONE; + tmp->mask.filter = SAMPLER_FILTER_NEAREST; + tmp->mask.is_affine = true; + + tmp->mask.bo = mask_bo; + tmp->mask.pict_format = PIXMAN_a8; + tmp->mask.card_format = gen6_get_card_format(tmp->mask.pict_format); + tmp->mask.width = mask->drawable.width; + tmp->mask.height = mask->drawable.height; + + + if( scale ) + { + tmp->src.scale[0] = 1.f/width; + tmp->src.scale[1] = 1.f/height; + } + else + { + tmp->src.scale[0] = 1.f/src->drawable.width; + tmp->src.scale[1] = 1.f/src->drawable.height; + } +// tmp->src.offset[0] = -dst_x; +// tmp->src.offset[1] = -dst_y; + + + tmp->mask.scale[0] = 1.f/mask->drawable.width; + tmp->mask.scale[1] = 1.f/mask->drawable.height; +// tmp->mask.offset[0] = -dst_x; +// tmp->mask.offset[1] = -dst_y; + + tmp->u.gen6.flags = + GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, + tmp->src.repeat, + tmp->mask.filter, + tmp->mask.repeat), + gen6_get_blend(tmp->op, + tmp->has_component_alpha, + tmp->dst.format), +/* gen6_choose_composite_kernel(tmp->op, + tmp->mask.bo != NULL, + tmp->has_component_alpha, + tmp->is_affine), +*/ + GEN6_WM_KERNEL_MASK, + gen4_choose_composite_emitter(sna, tmp)); + + tmp->blt = gen6_render_composite_blt; +// tmp->box = gen6_render_composite_box; + tmp->done = gen6_render_composite_done; + + kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); + if (!kgem_check_bo(&sna->kgem, + tmp->dst.bo, tmp->src.bo, tmp->mask.bo, + NULL)) { + kgem_submit(&sna->kgem); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + gen6_emit_composite_state(sna, tmp); + gen6_align_vertex(sna, tmp); + return true; + +} diff --git a/drivers/video/Intel-2D/gen7_render.c b/drivers/video/Intel-2D/gen7_render.c index fd8242f09c..cc483890b6 100644 --- a/drivers/video/Intel-2D/gen7_render.c +++ b/drivers/video/Intel-2D/gen7_render.c @@ -45,6 +45,8 @@ #include "gen4_source.h" #include "gen4_vertex.h" +#define ALWAYS_FLUSH 0 + #define NO_COMPOSITE 0 #define NO_COMPOSITE_SPANS 0 #define NO_COPY 0 @@ -75,6 +77,7 @@ #define is_aligned(x, y) (((x) & ((y) - 1)) == 0) struct gt_info { + const char *name; uint32_t max_vs_threads; uint32_t max_gs_threads; uint32_t max_wm_threads; @@ -82,57 +85,107 @@ struct gt_info { int size; int max_vs_entries; int max_gs_entries; + int push_ps_size; /* in 1KBs */ } urb; + int gt; }; static const struct gt_info ivb_gt_info = { + .name = "Ivybridge (gen7)", .max_vs_threads = 16, .max_gs_threads = 16, .max_wm_threads = (16-1) << IVB_PS_MAX_THREADS_SHIFT, - .urb = { 128, 64, 64 }, + .urb = { 128, 64, 64, 8 }, + .gt = 0, }; static const struct gt_info ivb_gt1_info = { + .name = "Ivybridge (gen7, gt1)", .max_vs_threads = 36, .max_gs_threads = 36, .max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT, - .urb = { 128, 512, 192 }, + .urb = { 128, 512, 192, 8 }, + .gt = 1, }; static const struct gt_info ivb_gt2_info = { + .name = "Ivybridge (gen7, gt2)", .max_vs_threads = 128, .max_gs_threads = 128, .max_wm_threads = (172-1) << IVB_PS_MAX_THREADS_SHIFT, - .urb = { 256, 704, 320 }, + .urb = { 256, 704, 320, 8 }, + .gt = 2, +}; + +static const struct gt_info byt_gt_info = { + .name = "Baytrail (gen7)", + .urb = { 128, 64, 64 }, + .max_vs_threads = 36, + .max_gs_threads = 36, + .max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT, + .urb = { 128, 512, 192, 8 }, + .gt = 1, }; static const struct gt_info hsw_gt_info = { + .name = "Haswell (gen7.5)", .max_vs_threads = 8, .max_gs_threads = 8, .max_wm_threads = (8 - 1) << HSW_PS_MAX_THREADS_SHIFT | 1 << HSW_PS_SAMPLE_MASK_SHIFT, - .urb = { 128, 64, 64 }, + .urb = { 128, 64, 64, 8 }, + .gt = 0, }; static const struct gt_info hsw_gt1_info = { + .name = "Haswell (gen7.5, gt1)", .max_vs_threads = 70, .max_gs_threads = 70, .max_wm_threads = (102 - 1) << HSW_PS_MAX_THREADS_SHIFT | 1 << HSW_PS_SAMPLE_MASK_SHIFT, - .urb = { 128, 640, 256 }, + .urb = { 128, 640, 256, 8 }, + .gt = 1, }; static const struct gt_info hsw_gt2_info = { + .name = "Haswell (gen7.5, gt2)", + .max_vs_threads = 140, + .max_gs_threads = 140, + .max_wm_threads = + (140 - 1) << HSW_PS_MAX_THREADS_SHIFT | + 1 << HSW_PS_SAMPLE_MASK_SHIFT, + .urb = { 256, 1664, 640, 8 }, + .gt = 2, +}; + +static const struct gt_info hsw_gt3_info = { + .name = "Haswell (gen7.5, gt3)", .max_vs_threads = 280, .max_gs_threads = 280, .max_wm_threads = - (204 - 1) << HSW_PS_MAX_THREADS_SHIFT | + (280 - 1) << HSW_PS_MAX_THREADS_SHIFT | 1 << HSW_PS_SAMPLE_MASK_SHIFT, - .urb = { 256, 1664, 640 }, + .urb = { 512, 3328, 1280, 16 }, + .gt = 3, }; +inline static bool is_ivb(struct sna *sna) +{ + return sna->kgem.gen == 070; +} + +inline static bool is_byt(struct sna *sna) +{ + return sna->kgem.gen == 071; +} + +inline static bool is_hsw(struct sna *sna) +{ + return sna->kgem.gen == 075; +} + static const uint32_t ps_kernel_packed[][4] = { #include "exa_wm_src_affine.g7b" #include "exa_wm_src_sample_argb.g7b" @@ -294,8 +347,24 @@ static uint32_t gen7_get_card_format(PictFormat format) return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM; case PICT_x8r8g8b8: return GEN7_SURFACEFORMAT_B8G8R8X8_UNORM; + case PICT_a8b8g8r8: + return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM; + case PICT_x8b8g8r8: + return GEN7_SURFACEFORMAT_R8G8B8X8_UNORM; + case PICT_a2r10g10b10: + return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM; + case PICT_x2r10g10b10: + return GEN7_SURFACEFORMAT_B10G10R10X2_UNORM; + case PICT_r8g8b8: + return GEN7_SURFACEFORMAT_R8G8B8_UNORM; + case PICT_r5g6b5: + return GEN7_SURFACEFORMAT_B5G6R5_UNORM; + case PICT_a1r5g5b5: + return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM; case PICT_a8: return GEN7_SURFACEFORMAT_A8_UNORM; + case PICT_a4r4g4b4: + return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM; } } @@ -307,8 +376,22 @@ static uint32_t gen7_get_dest_format(PictFormat format) case PICT_a8r8g8b8: case PICT_x8r8g8b8: return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM; + case PICT_a8b8g8r8: + case PICT_x8b8g8r8: + return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM; + case PICT_a2r10g10b10: + case PICT_x2r10g10b10: + return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM; + case PICT_r5g6b5: + return GEN7_SURFACEFORMAT_B5G6R5_UNORM; + case PICT_x1r5g5b5: + case PICT_a1r5g5b5: + return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM; case PICT_a8: return GEN7_SURFACEFORMAT_A8_UNORM; + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM; } } @@ -335,7 +418,7 @@ static void gen7_emit_urb(struct sna *sna) { OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2)); - OUT_BATCH(8); /* in 1KBs */ + OUT_BATCH(sna->render_state.gen7.info->urb.push_ps_size); /* num of VS entries must be divisible by 8 if size < 9 */ OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2)); @@ -359,6 +442,10 @@ gen7_emit_urb(struct sna *sna) static void gen7_emit_state_base_address(struct sna *sna) { + uint32_t mocs; + + mocs = is_hsw(sna) ? 5 << 8 : 3 << 8; + OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2)); OUT_BATCH(0); /* general */ OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */ @@ -366,17 +453,17 @@ gen7_emit_state_base_address(struct sna *sna) NULL, I915_GEM_DOMAIN_INSTRUCTION << 16, BASE_ADDRESS_MODIFY)); + OUT_BATCH(kgem_add_reloc(&sna->kgem, /* dynamic */ + sna->kgem.nbatch, + sna->render_state.gen7.general_bo, + I915_GEM_DOMAIN_INSTRUCTION << 16, + mocs | BASE_ADDRESS_MODIFY)); + OUT_BATCH(0); /* indirect */ OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */ sna->kgem.nbatch, sna->render_state.gen7.general_bo, I915_GEM_DOMAIN_INSTRUCTION << 16, - BASE_ADDRESS_MODIFY)); - OUT_BATCH(0); /* indirect */ - OUT_BATCH(kgem_add_reloc(&sna->kgem, - sna->kgem.nbatch, - sna->render_state.gen7.general_bo, - I915_GEM_DOMAIN_INSTRUCTION << 16, - BASE_ADDRESS_MODIFY)); + mocs | BASE_ADDRESS_MODIFY)); /* upper bounds, disable */ OUT_BATCH(0); @@ -905,10 +992,17 @@ gen7_emit_pipe_invalidate(struct sna *sna) } inline static void -gen7_emit_pipe_flush(struct sna *sna) +gen7_emit_pipe_flush(struct sna *sna, bool need_stall) { + unsigned stall; + + stall = 0; + if (need_stall) + stall = (GEN7_PIPE_CONTROL_CS_STALL | + GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD); + OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2)); - OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH); + OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH | stall); OUT_BATCH(0); OUT_BATCH(0); } @@ -930,8 +1024,7 @@ gen7_emit_state(struct sna *sna, { bool need_stall; - if (sna->render_state.gen7.emit_flush) - gen7_emit_pipe_flush(sna); + assert(op->dst.bo->exec); gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags)); gen7_emit_sampler(sna, GEN7_SAMPLER(op->u.gen7.flags)); @@ -942,11 +1035,16 @@ gen7_emit_state(struct sna *sna, need_stall = gen7_emit_binding_table(sna, wm_binding_table); need_stall &= gen7_emit_drawing_rectangle(sna, op); - if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { + if (ALWAYS_FLUSH || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { gen7_emit_pipe_invalidate(sna); kgem_clear_dirty(&sna->kgem); - if (op->dst.bo->exec) + assert(op->dst.bo->exec); kgem_bo_mark_dirty(op->dst.bo); + sna->render_state.gen7.emit_flush = false; + need_stall = false; + } + if (sna->render_state.gen7.emit_flush) { + gen7_emit_pipe_flush(sna, need_stall); need_stall = false; } if (need_stall) @@ -1091,7 +1189,7 @@ gen7_bind_bo(struct sna *sna, COMPILE_TIME_ASSERT(sizeof(struct gen7_surface_state) == 32); /* After the first bind, we manage the cache domains within the batch */ - offset = kgem_bo_get_binding(bo, format | is_scanout << 31); + offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31); if (offset) { if (is_dst) kgem_bo_mark_dirty(bo); @@ -1104,22 +1202,25 @@ gen7_bind_bo(struct sna *sna, ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT | gen7_tiling_bits(bo->tiling) | format << GEN7_SURFACE_FORMAT_SHIFT); - if (is_dst) + if (bo->tiling == I915_TILING_Y) + ss[0] |= GEN7_SURFACE_VALIGN_4; + if (is_dst) { + ss[0] |= GEN7_SURFACE_RC_READ_WRITE; domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; - else + } else domains = I915_GEM_DOMAIN_SAMPLER << 16; ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0); ss[2] = ((width - 1) << GEN7_SURFACE_WIDTH_SHIFT | (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT); ss[3] = (bo->pitch - 1) << GEN7_SURFACE_PITCH_SHIFT; ss[4] = 0; - ss[5] = is_scanout ? 0 : 3 << 16; + ss[5] = (is_scanout || bo->io) ? 0 : is_hsw(sna) ? 5 << 16 : 3 << 16; ss[6] = 0; ss[7] = 0; - if (sna->kgem.gen == 075) + if (is_hsw(sna)) ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); - kgem_bo_set_binding(bo, format | is_scanout << 31, offset); + kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset); DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", offset, bo->handle, ss[1], @@ -1242,8 +1343,8 @@ start: goto start; } - assert(op->floats_per_rect >= vertex_space(sna)); assert(rem <= vertex_space(sna)); + assert(op->floats_per_rect <= rem); if (want > 1 && want * op->floats_per_rect > rem) want = rem / op->floats_per_rect; @@ -1398,7 +1499,257 @@ gen7_composite_create_blend_state(struct sna_static_stream *stream) return sna_static_stream_offsetof(stream, base); } +#if 0 +static uint32_t gen7_bind_video_source(struct sna *sna, + struct kgem_bo *bo, + uint32_t offset, + int width, + int height, + int pitch, + uint32_t format) +{ + uint32_t *ss, bind; + bind = sna->kgem.surface -= + sizeof(struct gen7_surface_state) / sizeof(uint32_t); + + assert(bo->tiling == I915_TILING_NONE); + + ss = sna->kgem.batch + bind; + ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT | + format << GEN7_SURFACE_FORMAT_SHIFT); + ss[1] = kgem_add_reloc(&sna->kgem, bind + 1, bo, + I915_GEM_DOMAIN_SAMPLER << 16, + offset); + ss[2] = ((width - 1) << GEN7_SURFACE_WIDTH_SHIFT | + (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT); + ss[3] = (pitch - 1) << GEN7_SURFACE_PITCH_SHIFT; + ss[4] = 0; + ss[5] = 0; + ss[6] = 0; + ss[7] = 0; + if (is_hsw(sna)) + ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA); + + DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, offset=%d\n", + bind, bo->handle, ss[1], + format, width, height, pitch, offset)); + + return bind * sizeof(uint32_t); +} + +static void gen7_emit_video_state(struct sna *sna, + const struct sna_composite_op *op) +{ + struct sna_video_frame *frame = op->priv; + uint32_t src_surf_format; + uint32_t src_surf_base[6]; + int src_width[6]; + int src_height[6]; + int src_pitch[6]; + uint32_t *binding_table; + uint16_t offset; + int n_src, n; + + gen7_get_batch(sna, op); + + src_surf_base[0] = 0; + src_surf_base[1] = 0; + src_surf_base[2] = frame->VBufOffset; + src_surf_base[3] = frame->VBufOffset; + src_surf_base[4] = frame->UBufOffset; + src_surf_base[5] = frame->UBufOffset; + + if (is_planar_fourcc(frame->id)) { + src_surf_format = GEN7_SURFACEFORMAT_R8_UNORM; + src_width[1] = src_width[0] = frame->width; + src_height[1] = src_height[0] = frame->height; + src_pitch[1] = src_pitch[0] = frame->pitch[1]; + src_width[4] = src_width[5] = src_width[2] = src_width[3] = + frame->width / 2; + src_height[4] = src_height[5] = src_height[2] = src_height[3] = + frame->height / 2; + src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = + frame->pitch[0]; + n_src = 6; + } else { + if (frame->id == FOURCC_UYVY) + src_surf_format = GEN7_SURFACEFORMAT_YCRCB_SWAPY; + else + src_surf_format = GEN7_SURFACEFORMAT_YCRCB_NORMAL; + + src_width[0] = frame->width; + src_height[0] = frame->height; + src_pitch[0] = frame->pitch[0]; + n_src = 1; + } + + binding_table = gen7_composite_get_binding_table(sna, &offset); + + binding_table[0] = + gen7_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen7_get_dest_format(op->dst.format), + true); + for (n = 0; n < n_src; n++) { + binding_table[1+n] = + gen7_bind_video_source(sna, + frame->bo, + src_surf_base[n], + src_width[n], + src_height[n], + src_pitch[n], + src_surf_format); + } + + gen7_emit_state(sna, op, offset); +} + +static bool +gen7_render_video(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame, + RegionPtr dstRegion, + PixmapPtr pixmap) +{ + struct sna_composite_op tmp; + int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; + int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; + int src_width = frame->src.x2 - frame->src.x1; + int src_height = frame->src.y2 - frame->src.y1; + float src_offset_x, src_offset_y; + float src_scale_x, src_scale_y; + int nbox, pix_xoff, pix_yoff; + struct sna_pixmap *priv; + unsigned filter; + BoxPtr box; + + DBG(("%s: src=(%d, %d), dst=(%d, %d), %ldx[(%d, %d), (%d, %d)...]\n", + __FUNCTION__, + src_width, src_height, dst_width, dst_height, + (long)REGION_NUM_RECTS(dstRegion), + REGION_EXTENTS(NULL, dstRegion)->x1, + REGION_EXTENTS(NULL, dstRegion)->y1, + REGION_EXTENTS(NULL, dstRegion)->x2, + REGION_EXTENTS(NULL, dstRegion)->y2)); + + priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE); + if (priv == NULL) + return false; + + memset(&tmp, 0, sizeof(tmp)); + + tmp.dst.pixmap = pixmap; + tmp.dst.width = pixmap->drawable.width; + tmp.dst.height = pixmap->drawable.height; + tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth); + tmp.dst.bo = priv->gpu_bo; + + tmp.src.bo = frame->bo; + tmp.mask.bo = NULL; + + tmp.floats_per_vertex = 3; + tmp.floats_per_rect = 9; + + if (src_width == dst_width && src_height == dst_height) + filter = SAMPLER_FILTER_NEAREST; + else + filter = SAMPLER_FILTER_BILINEAR; + + tmp.u.gen7.flags = + GEN7_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD, + SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE), + NO_BLEND, + is_planar_fourcc(frame->id) ? + GEN7_WM_KERNEL_VIDEO_PLANAR : + GEN7_WM_KERNEL_VIDEO_PACKED, + 2); + tmp.priv = frame; + + kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); + if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { + kgem_submit(&sna->kgem); + assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + gen7_emit_video_state(sna, &tmp); + gen7_align_vertex(sna, &tmp); + + /* Set up the offset for translating from the given region (in screen + * coordinates) to the backing pixmap. + */ +#ifdef COMPOSITE + pix_xoff = -pixmap->screen_x + pixmap->drawable.x; + pix_yoff = -pixmap->screen_y + pixmap->drawable.y; +#else + pix_xoff = 0; + pix_yoff = 0; +#endif + + DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n", + __FUNCTION__, + frame->src.x1, frame->src.y1, + src_width, src_height, + dst_width, dst_height, + frame->width, frame->height)); + + src_scale_x = (float)src_width / dst_width / frame->width; + src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; + + src_scale_y = (float)src_height / dst_height / frame->height; + src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; + + DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n", + __FUNCTION__, + src_scale_x, src_scale_y, + src_offset_x, src_offset_y)); + + box = REGION_RECTS(dstRegion); + nbox = REGION_NUM_RECTS(dstRegion); + while (nbox--) { + BoxRec r; + + DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n", + __FUNCTION__, + box->x1, box->y1, + box->x2, box->y2, + pix_xoff, pix_yoff, + box->x1 * src_scale_x + src_offset_x, + box->y1 * src_scale_y + src_offset_y, + box->x2 * src_scale_x + src_offset_x, + box->y2 * src_scale_y + src_offset_y)); + + r.x1 = box->x1 + pix_xoff; + r.x2 = box->x2 + pix_xoff; + r.y1 = box->y1 + pix_yoff; + r.y2 = box->y2 + pix_yoff; + + gen7_get_rectangles(sna, &tmp, 1, gen7_emit_video_state); + + OUT_VERTEX(r.x2, r.y2); + OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); + + OUT_VERTEX(r.x1, r.y2); + OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y); + + OUT_VERTEX(r.x1, r.y1); + OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x); + OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y); + + if (!DAMAGE_IS_ALL(priv->gpu_damage)) { + sna_damage_add_box(&priv->gpu_damage, &r); + sna_damage_subtract_box(&priv->cpu_damage, &r); + } + box++; + } + + gen4_vertex_flush(sna); + return true; +} +#endif static void gen7_render_composite_done(struct sna *sna, const struct sna_composite_op *op) @@ -1410,192 +1761,231 @@ static void gen7_render_composite_done(struct sna *sna, } + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#if 0 static bool -gen7_blit_tex(struct sna *sna, - uint8_t op, bool scale, - PixmapPtr src, struct kgem_bo *src_bo, - PixmapPtr mask,struct kgem_bo *mask_bo, - PixmapPtr dst, struct kgem_bo *dst_bo, - int32_t src_x, int32_t src_y, - int32_t msk_x, int32_t msk_y, - int32_t dst_x, int32_t dst_y, - int32_t width, int32_t height, - struct sna_composite_op *tmp) +gen7_render_fill_boxes(struct sna *sna, + CARD8 op, + PictFormat format, + const xRenderColor *color, + PixmapPtr dst, struct kgem_bo *dst_bo, + const BoxRec *box, int n) { + struct sna_composite_op tmp; + uint32_t pixel; + DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", + __FUNCTION__, op, + color->red, color->green, color->blue, color->alpha, (int)format)); - tmp->op = PictOpSrc; - - tmp->dst.pixmap = dst; - tmp->dst.bo = dst_bo; - tmp->dst.width = dst->drawable.width; - tmp->dst.height = dst->drawable.height; - tmp->dst.format = PICT_x8r8g8b8; - - - tmp->src.repeat = RepeatNone; - tmp->src.filter = PictFilterNearest; - tmp->src.is_affine = true; - - tmp->src.bo = src_bo; - tmp->src.pict_format = PICT_x8r8g8b8; - tmp->src.card_format = gen7_get_card_format(tmp->src.pict_format); - tmp->src.width = src->drawable.width; - tmp->src.height = src->drawable.height; - - - tmp->is_affine = tmp->src.is_affine; - tmp->has_component_alpha = false; - tmp->need_magic_ca_pass = false; - - tmp->mask.repeat = SAMPLER_EXTEND_NONE; - tmp->mask.filter = SAMPLER_FILTER_NEAREST; - tmp->mask.is_affine = true; - - tmp->mask.bo = mask_bo; - tmp->mask.pict_format = PIXMAN_a8; - tmp->mask.card_format = gen7_get_card_format(tmp->mask.pict_format); - tmp->mask.width = mask->drawable.width; - tmp->mask.height = mask->drawable.height; - - if( scale ) - { - tmp->src.scale[0] = 1.f/width; - tmp->src.scale[1] = 1.f/height; - } - else - { - tmp->src.scale[0] = 1.f/src->drawable.width; - tmp->src.scale[1] = 1.f/src->drawable.height; - } - - tmp->mask.scale[0] = 1.f/mask->drawable.width; - tmp->mask.scale[1] = 1.f/mask->drawable.height; - - - - tmp->u.gen7.flags = - GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, - tmp->src.repeat, - tmp->mask.filter, - tmp->mask.repeat), - gen7_get_blend(tmp->op, - tmp->has_component_alpha, - tmp->dst.format), -/* gen7_choose_composite_kernel(tmp->op, - tmp->mask.bo != NULL, - tmp->has_component_alpha, - tmp->is_affine), */ - GEN7_WM_KERNEL_MASK, - gen4_choose_composite_emitter(tmp)); - - tmp->blt = gen7_render_composite_blt; -// tmp->box = gen7_render_composite_box; - tmp->done = gen7_render_composite_done; - - kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); - if (!kgem_check_bo(&sna->kgem, - tmp->dst.bo, tmp->src.bo, tmp->mask.bo, - NULL)) { - kgem_submit(&sna->kgem); - _kgem_set_mode(&sna->kgem, KGEM_RENDER); + if (op >= ARRAY_SIZE(gen7_blend_op)) { + DBG(("%s: fallback due to unhandled blend op: %d\n", + __FUNCTION__, op)); + return false; } - gen7_emit_composite_state(sna, tmp); - gen7_align_vertex(sna, tmp); + if (prefer_blt_fill(sna, dst_bo) || !gen7_check_dst_format(format)) { + uint8_t alu = GXinvalid; + + if (op <= PictOpSrc) { + pixel = 0; + if (op == PictOpClear) + alu = GXclear; + else if (sna_get_pixel_from_rgba(&pixel, + color->red, + color->green, + color->blue, + color->alpha, + format)) + alu = GXcopy; + } + + if (alu != GXinvalid && + sna_blt_fill_boxes(sna, alu, + dst_bo, dst->drawable.bitsPerPixel, + pixel, box, n)) + return true; + + if (!gen7_check_dst_format(format)) + return false; + } + + if (op == PictOpClear) { + pixel = 0; + op = PictOpSrc; + } else if (!sna_get_pixel_from_rgba(&pixel, + color->red, + color->green, + color->blue, + color->alpha, + PICT_a8r8g8b8)) + return false; + + DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", + __FUNCTION__, pixel, n, + box[0].x1, box[0].y1, box[0].x2, box[0].y2)); + + tmp.dst.pixmap = dst; + tmp.dst.width = dst->drawable.width; + tmp.dst.height = dst->drawable.height; + tmp.dst.format = format; + tmp.dst.bo = dst_bo; + tmp.dst.x = tmp.dst.y = 0; + tmp.damage = NULL; + + sna_render_composite_redirect_init(&tmp); + if (too_large(dst->drawable.width, dst->drawable.height)) { + BoxRec extents; + + boxes_extents(box, n, &extents); + if (!sna_render_composite_redirect(sna, &tmp, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1, + n > 1)) + return sna_tiling_fill_boxes(sna, op, format, color, + dst, dst_bo, box, n); + } + + tmp.src.bo = sna_render_get_solid(sna, pixel); + tmp.mask.bo = NULL; + + tmp.floats_per_vertex = 2; + tmp.floats_per_rect = 6; + tmp.need_magic_ca_pass = false; + + tmp.u.gen7.flags = FILL_FLAGS(op, format); + + kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); + if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { + kgem_submit(&sna->kgem); + assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); + } + + gen7_emit_fill_state(sna, &tmp); + gen7_align_vertex(sna, &tmp); + + do { + int n_this_time; + int16_t *v; + + n_this_time = gen7_get_rectangles(sna, &tmp, n, + gen7_emit_fill_state); + n -= n_this_time; + + v = (int16_t *)(sna->render.vertices + sna->render.vertex_used); + sna->render.vertex_used += 6 * n_this_time; + assert(sna->render.vertex_used <= sna->render.vertex_size); + do { + DBG((" (%d, %d), (%d, %d)\n", + box->x1, box->y1, box->x2, box->y2)); + + v[0] = box->x2; + v[5] = v[1] = box->y2; + v[8] = v[4] = box->x1; + v[9] = box->y1; + v[2] = v[3] = v[7] = 1; + v[6] = v[10] = v[11] = 0; + v += 12; box++; + } while (--n_this_time); + } while (n); + + gen4_vertex_flush(sna); + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + sna_render_composite_redirect_done(sna, &tmp); return true; } - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +#endif static void gen7_render_flush(struct sna *sna) { @@ -1676,14 +2066,20 @@ static void gen7_render_fini(struct sna *sna) kgem_bo_destroy(&sna->kgem, sna->render_state.gen7.general_bo); } +static bool is_gt3(struct sna *sna) +{ + assert(sna->kgem.gen == 075); + return sna->PciInfo->device_id & 0x20; +} + static bool is_gt2(struct sna *sna) { - return DEVICE_ID(sna->PciInfo) & 0x20; + return sna->PciInfo->device_id & (is_hsw(sna)? 0x30 : 0x20); } static bool is_mobile(struct sna *sna) { - return (DEVICE_ID(sna->PciInfo) & 0xf) == 0x6; + return (sna->PciInfo->device_id & 0xf) == 0x6; } static bool gen7_render_setup(struct sna *sna) @@ -1693,19 +2089,24 @@ static bool gen7_render_setup(struct sna *sna) struct gen7_sampler_state *ss; int i, j, k, l, m; - if (sna->kgem.gen == 070) { + if (is_ivb(sna)) { state->info = &ivb_gt_info; - if (DEVICE_ID(sna->PciInfo) & 0xf) { + if (sna->PciInfo->device_id & 0xf) { state->info = &ivb_gt1_info; if (is_gt2(sna)) state->info = &ivb_gt2_info; /* XXX requires GT_MODE WiZ disabled */ } - } else if (sna->kgem.gen == 075) { + } else if (is_byt(sna)) { + state->info = &byt_gt_info; + } else if (is_hsw(sna)) { state->info = &hsw_gt_info; - if (DEVICE_ID(sna->PciInfo) & 0xf) { + if (sna->PciInfo->device_id & 0xf) { + if (is_gt3(sna)) + state->info = &hsw_gt3_info; + else if (is_gt2(sna)) + state->info = &hsw_gt2_info; + else state->info = &hsw_gt1_info; - if (is_gt2(sna)) - state->info = &hsw_gt2_info; } } else return false; @@ -1772,26 +2173,154 @@ static bool gen7_render_setup(struct sna *sna) return state->general_bo != NULL; } -bool gen7_render_init(struct sna *sna) +const char *gen7_render_init(struct sna *sna, const char *backend) { if (!gen7_render_setup(sna)) - return false; + return backend; sna->kgem.context_switch = gen7_render_context_switch; sna->kgem.retire = gen7_render_retire; sna->kgem.expire = gen7_render_expire; +#if 0 +#if !NO_COMPOSITE + sna->render.composite = gen7_render_composite; + sna->render.prefer_gpu |= PREFER_GPU_RENDER; +#endif +#if !NO_COMPOSITE_SPANS + sna->render.check_composite_spans = gen7_check_composite_spans; + sna->render.composite_spans = gen7_render_composite_spans; + if (is_mobile(sna) || is_gt2(sna) || is_byt(sna)) + sna->render.prefer_gpu |= PREFER_GPU_SPANS; +#endif + sna->render.video = gen7_render_video; + +#if !NO_COPY_BOXES + sna->render.copy_boxes = gen7_render_copy_boxes; +#endif +#if !NO_COPY + sna->render.copy = gen7_render_copy; +#endif + +#if !NO_FILL_BOXES + sna->render.fill_boxes = gen7_render_fill_boxes; +#endif +#if !NO_FILL + sna->render.fill = gen7_render_fill; +#endif +#if !NO_FILL_ONE + sna->render.fill_one = gen7_render_fill_one; +#endif +#if !NO_FILL_CLEAR + sna->render.clear = gen7_render_clear; +#endif +#endif + sna->render.blit_tex = gen7_blit_tex; - + sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT; + sna->render.flush = gen7_render_flush; sna->render.reset = gen7_render_reset; sna->render.fini = gen7_render_fini; sna->render.max_3d_size = GEN7_MAX_SIZE; sna->render.max_3d_pitch = 1 << 18; - sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT; - - return true; + return sna->render_state.gen7.info->name; } +static bool +gen7_blit_tex(struct sna *sna, + uint8_t op, bool scale, + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr mask,struct kgem_bo *mask_bo, + PixmapPtr dst, struct kgem_bo *dst_bo, + int32_t src_x, int32_t src_y, + int32_t msk_x, int32_t msk_y, + int32_t dst_x, int32_t dst_y, + int32_t width, int32_t height, + struct sna_composite_op *tmp) +{ + + + tmp->op = PictOpSrc; + + tmp->dst.pixmap = dst; + tmp->dst.bo = dst_bo; + tmp->dst.width = dst->drawable.width; + tmp->dst.height = dst->drawable.height; + tmp->dst.format = PICT_x8r8g8b8; + + + tmp->src.repeat = RepeatNone; + tmp->src.filter = PictFilterNearest; + tmp->src.is_affine = true; + + tmp->src.bo = src_bo; + tmp->src.pict_format = PICT_x8r8g8b8; + tmp->src.card_format = gen7_get_card_format(tmp->src.pict_format); + tmp->src.width = src->drawable.width; + tmp->src.height = src->drawable.height; + + + tmp->is_affine = tmp->src.is_affine; + tmp->has_component_alpha = false; + tmp->need_magic_ca_pass = false; + + tmp->mask.repeat = SAMPLER_EXTEND_NONE; + tmp->mask.filter = SAMPLER_FILTER_NEAREST; + tmp->mask.is_affine = true; + + tmp->mask.bo = mask_bo; + tmp->mask.pict_format = PIXMAN_a8; + tmp->mask.card_format = gen7_get_card_format(tmp->mask.pict_format); + tmp->mask.width = mask->drawable.width; + tmp->mask.height = mask->drawable.height; + + if( scale ) + { + tmp->src.scale[0] = 1.f/width; + tmp->src.scale[1] = 1.f/height; + } + else + { + tmp->src.scale[0] = 1.f/src->drawable.width; + tmp->src.scale[1] = 1.f/src->drawable.height; + } + + tmp->mask.scale[0] = 1.f/mask->drawable.width; + tmp->mask.scale[1] = 1.f/mask->drawable.height; + + + + tmp->u.gen7.flags = + GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter, + tmp->src.repeat, + tmp->mask.filter, + tmp->mask.repeat), + gen7_get_blend(tmp->op, + tmp->has_component_alpha, + tmp->dst.format), +/* gen7_choose_composite_kernel(tmp->op, + tmp->mask.bo != NULL, + tmp->has_component_alpha, + tmp->is_affine), */ + GEN7_WM_KERNEL_MASK, + gen4_choose_composite_emitter(sna, tmp)); + + tmp->blt = gen7_render_composite_blt; +// tmp->box = gen7_render_composite_box; + tmp->done = gen7_render_composite_done; + + kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); + if (!kgem_check_bo(&sna->kgem, + tmp->dst.bo, tmp->src.bo, tmp->mask.bo, + NULL)) { + kgem_submit(&sna->kgem); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + gen7_emit_composite_state(sna, tmp); + gen7_align_vertex(sna, tmp); + return true; +} diff --git a/drivers/video/Intel-2D/gen7_render.h b/drivers/video/Intel-2D/gen7_render.h index c4473199d2..36b2cfe5eb 100644 --- a/drivers/video/Intel-2D/gen7_render.h +++ b/drivers/video/Intel-2D/gen7_render.h @@ -1224,6 +1224,8 @@ struct gen7_sampler_state { /* Surface state DW0 */ #define GEN7_SURFACE_RC_READ_WRITE (1 << 8) +#define GEN7_SURFACE_VALIGN_4 (1 << 16) +#define GEN7_SURFACE_HALIGN_8 (1 << 15) #define GEN7_SURFACE_TILED (1 << 14) #define GEN7_SURFACE_TILED_Y (1 << 13) #define GEN7_SURFACE_FORMAT_SHIFT 18 diff --git a/drivers/video/Intel-2D/i915_pciids.h b/drivers/video/Intel-2D/i915_pciids.h new file mode 100644 index 0000000000..8a10f5c354 --- /dev/null +++ b/drivers/video/Intel-2D/i915_pciids.h @@ -0,0 +1,211 @@ +/* + * Copyright 2013 Intel Corporation + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef _I915_PCIIDS_H +#define _I915_PCIIDS_H + +/* + * A pci_device_id struct { + * __u32 vendor, device; + * __u32 subvendor, subdevice; + * __u32 class, class_mask; + * kernel_ulong_t driver_data; + * }; + * Don't use C99 here because "class" is reserved and we want to + * give userspace flexibility. + */ +#define INTEL_VGA_DEVICE(id, info) { \ + 0x8086, id, \ + ~0, ~0, \ + 0x030000, 0xff0000, \ + (unsigned long) info } + +#define INTEL_QUANTA_VGA_DEVICE(info) { \ + 0x8086, 0x16a, \ + 0x152d, 0x8990, \ + 0x030000, 0xff0000, \ + (unsigned long) info } + +#define INTEL_I830_IDS(info) \ + INTEL_VGA_DEVICE(0x3577, info) + +#define INTEL_I845G_IDS(info) \ + INTEL_VGA_DEVICE(0x2562, info) + +#define INTEL_I85X_IDS(info) \ + INTEL_VGA_DEVICE(0x3582, info), /* I855_GM */ \ + INTEL_VGA_DEVICE(0x358e, info) + +#define INTEL_I865G_IDS(info) \ + INTEL_VGA_DEVICE(0x2572, info) /* I865_G */ + +#define INTEL_I915G_IDS(info) \ + INTEL_VGA_DEVICE(0x2582, info), /* I915_G */ \ + INTEL_VGA_DEVICE(0x258a, info) /* E7221_G */ + +#define INTEL_I915GM_IDS(info) \ + INTEL_VGA_DEVICE(0x2592, info) /* I915_GM */ + +#define INTEL_I945G_IDS(info) \ + INTEL_VGA_DEVICE(0x2772, info) /* I945_G */ + +#define INTEL_I945GM_IDS(info) \ + INTEL_VGA_DEVICE(0x27a2, info), /* I945_GM */ \ + INTEL_VGA_DEVICE(0x27ae, info) /* I945_GME */ + +#define INTEL_I965G_IDS(info) \ + INTEL_VGA_DEVICE(0x2972, info), /* I946_GZ */ \ + INTEL_VGA_DEVICE(0x2982, info), /* G35_G */ \ + INTEL_VGA_DEVICE(0x2992, info), /* I965_Q */ \ + INTEL_VGA_DEVICE(0x29a2, info) /* I965_G */ + +#define INTEL_G33_IDS(info) \ + INTEL_VGA_DEVICE(0x29b2, info), /* Q35_G */ \ + INTEL_VGA_DEVICE(0x29c2, info), /* G33_G */ \ + INTEL_VGA_DEVICE(0x29d2, info) /* Q33_G */ + +#define INTEL_I965GM_IDS(info) \ + INTEL_VGA_DEVICE(0x2a02, info), /* I965_GM */ \ + INTEL_VGA_DEVICE(0x2a12, info) /* I965_GME */ + +#define INTEL_GM45_IDS(info) \ + INTEL_VGA_DEVICE(0x2a42, info) /* GM45_G */ + +#define INTEL_G45_IDS(info) \ + INTEL_VGA_DEVICE(0x2e02, info), /* IGD_E_G */ \ + INTEL_VGA_DEVICE(0x2e12, info), /* Q45_G */ \ + INTEL_VGA_DEVICE(0x2e22, info), /* G45_G */ \ + INTEL_VGA_DEVICE(0x2e32, info), /* G41_G */ \ + INTEL_VGA_DEVICE(0x2e42, info), /* B43_G */ \ + INTEL_VGA_DEVICE(0x2e92, info) /* B43_G.1 */ + +#define INTEL_PINEVIEW_IDS(info) \ + INTEL_VGA_DEVICE(0xa001, info), \ + INTEL_VGA_DEVICE(0xa011, info) + +#define INTEL_IRONLAKE_D_IDS(info) \ + INTEL_VGA_DEVICE(0x0042, info) + +#define INTEL_IRONLAKE_M_IDS(info) \ + INTEL_VGA_DEVICE(0x0046, info) + +#define INTEL_SNB_D_IDS(info) \ + INTEL_VGA_DEVICE(0x0102, info), \ + INTEL_VGA_DEVICE(0x0112, info), \ + INTEL_VGA_DEVICE(0x0122, info), \ + INTEL_VGA_DEVICE(0x010A, info) + +#define INTEL_SNB_M_IDS(info) \ + INTEL_VGA_DEVICE(0x0106, info), \ + INTEL_VGA_DEVICE(0x0116, info), \ + INTEL_VGA_DEVICE(0x0126, info) + +#define INTEL_IVB_M_IDS(info) \ + INTEL_VGA_DEVICE(0x0156, info), /* GT1 mobile */ \ + INTEL_VGA_DEVICE(0x0166, info) /* GT2 mobile */ + +#define INTEL_IVB_D_IDS(info) \ + INTEL_VGA_DEVICE(0x0152, info), /* GT1 desktop */ \ + INTEL_VGA_DEVICE(0x0162, info), /* GT2 desktop */ \ + INTEL_VGA_DEVICE(0x015a, info), /* GT1 server */ \ + INTEL_VGA_DEVICE(0x016a, info) /* GT2 server */ + +#define INTEL_IVB_Q_IDS(info) \ + INTEL_QUANTA_VGA_DEVICE(info) /* Quanta transcode */ + +#define INTEL_HSW_D_IDS(info) \ + INTEL_VGA_DEVICE(0x0402, info), /* GT1 desktop */ \ + INTEL_VGA_DEVICE(0x0412, info), /* GT2 desktop */ \ + INTEL_VGA_DEVICE(0x0422, info), /* GT3 desktop */ \ + INTEL_VGA_DEVICE(0x040a, info), /* GT1 server */ \ + INTEL_VGA_DEVICE(0x041a, info), /* GT2 server */ \ + INTEL_VGA_DEVICE(0x042a, info), /* GT3 server */ \ + INTEL_VGA_DEVICE(0x040B, info), /* GT1 reserved */ \ + INTEL_VGA_DEVICE(0x041B, info), /* GT2 reserved */ \ + INTEL_VGA_DEVICE(0x042B, info), /* GT3 reserved */ \ + INTEL_VGA_DEVICE(0x040E, info), /* GT1 reserved */ \ + INTEL_VGA_DEVICE(0x041E, info), /* GT2 reserved */ \ + INTEL_VGA_DEVICE(0x042E, info), /* GT3 reserved */ \ + INTEL_VGA_DEVICE(0x0C02, info), /* SDV GT1 desktop */ \ + INTEL_VGA_DEVICE(0x0C12, info), /* SDV GT2 desktop */ \ + INTEL_VGA_DEVICE(0x0C22, info), /* SDV GT3 desktop */ \ + INTEL_VGA_DEVICE(0x0C0A, info), /* SDV GT1 server */ \ + INTEL_VGA_DEVICE(0x0C1A, info), /* SDV GT2 server */ \ + INTEL_VGA_DEVICE(0x0C2A, info), /* SDV GT3 server */ \ + INTEL_VGA_DEVICE(0x0C0B, info), /* SDV GT1 reserved */ \ + INTEL_VGA_DEVICE(0x0C1B, info), /* SDV GT2 reserved */ \ + INTEL_VGA_DEVICE(0x0C2B, info), /* SDV GT3 reserved */ \ + INTEL_VGA_DEVICE(0x0C0E, info), /* SDV GT1 reserved */ \ + INTEL_VGA_DEVICE(0x0C1E, info), /* SDV GT2 reserved */ \ + INTEL_VGA_DEVICE(0x0C2E, info), /* SDV GT3 reserved */ \ + INTEL_VGA_DEVICE(0x0A02, info), /* ULT GT1 desktop */ \ + INTEL_VGA_DEVICE(0x0A12, info), /* ULT GT2 desktop */ \ + INTEL_VGA_DEVICE(0x0A22, info), /* ULT GT3 desktop */ \ + INTEL_VGA_DEVICE(0x0A0A, info), /* ULT GT1 server */ \ + INTEL_VGA_DEVICE(0x0A1A, info), /* ULT GT2 server */ \ + INTEL_VGA_DEVICE(0x0A2A, info), /* ULT GT3 server */ \ + INTEL_VGA_DEVICE(0x0A0B, info), /* ULT GT1 reserved */ \ + INTEL_VGA_DEVICE(0x0A1B, info), /* ULT GT2 reserved */ \ + INTEL_VGA_DEVICE(0x0A2B, info), /* ULT GT3 reserved */ \ + INTEL_VGA_DEVICE(0x0D02, info), /* CRW GT1 desktop */ \ + INTEL_VGA_DEVICE(0x0D12, info), /* CRW GT2 desktop */ \ + INTEL_VGA_DEVICE(0x0D22, info), /* CRW GT3 desktop */ \ + INTEL_VGA_DEVICE(0x0D0A, info), /* CRW GT1 server */ \ + INTEL_VGA_DEVICE(0x0D1A, info), /* CRW GT2 server */ \ + INTEL_VGA_DEVICE(0x0D2A, info), /* CRW GT3 server */ \ + INTEL_VGA_DEVICE(0x0D0B, info), /* CRW GT1 reserved */ \ + INTEL_VGA_DEVICE(0x0D1B, info), /* CRW GT2 reserved */ \ + INTEL_VGA_DEVICE(0x0D2B, info), /* CRW GT3 reserved */ \ + INTEL_VGA_DEVICE(0x0D0E, info), /* CRW GT1 reserved */ \ + INTEL_VGA_DEVICE(0x0D1E, info), /* CRW GT2 reserved */ \ + INTEL_VGA_DEVICE(0x0D2E, info) /* CRW GT3 reserved */ \ + +#define INTEL_HSW_M_IDS(info) \ + INTEL_VGA_DEVICE(0x0406, info), /* GT1 mobile */ \ + INTEL_VGA_DEVICE(0x0416, info), /* GT2 mobile */ \ + INTEL_VGA_DEVICE(0x0426, info), /* GT2 mobile */ \ + INTEL_VGA_DEVICE(0x0C06, info), /* SDV GT1 mobile */ \ + INTEL_VGA_DEVICE(0x0C16, info), /* SDV GT2 mobile */ \ + INTEL_VGA_DEVICE(0x0C26, info), /* SDV GT3 mobile */ \ + INTEL_VGA_DEVICE(0x0A06, info), /* ULT GT1 mobile */ \ + INTEL_VGA_DEVICE(0x0A16, info), /* ULT GT2 mobile */ \ + INTEL_VGA_DEVICE(0x0A26, info), /* ULT GT3 mobile */ \ + INTEL_VGA_DEVICE(0x0A0E, info), /* ULT GT1 reserved */ \ + INTEL_VGA_DEVICE(0x0A1E, info), /* ULT GT2 reserved */ \ + INTEL_VGA_DEVICE(0x0A2E, info), /* ULT GT3 reserved */ \ + INTEL_VGA_DEVICE(0x0D06, info), /* CRW GT1 mobile */ \ + INTEL_VGA_DEVICE(0x0D16, info), /* CRW GT2 mobile */ \ + INTEL_VGA_DEVICE(0x0D26, info) /* CRW GT3 mobile */ + +#define INTEL_VLV_M_IDS(info) \ + INTEL_VGA_DEVICE(0x0f30, info), \ + INTEL_VGA_DEVICE(0x0f31, info), \ + INTEL_VGA_DEVICE(0x0f32, info), \ + INTEL_VGA_DEVICE(0x0f33, info), \ + INTEL_VGA_DEVICE(0x0157, info) + +#define INTEL_VLV_D_IDS(info) \ + INTEL_VGA_DEVICE(0x0155, info) + +#endif /* _I915_PCIIDS_H */ diff --git a/drivers/video/Intel-2D/intel_driver.h b/drivers/video/Intel-2D/intel_driver.h index 104214a6e0..99d820d401 100644 --- a/drivers/video/Intel-2D/intel_driver.h +++ b/drivers/video/Intel-2D/intel_driver.h @@ -9,180 +9,52 @@ #define INTEL_VERSION_MINOR PACKAGE_VERSION_MINOR #define INTEL_VERSION_PATCH PACKAGE_VERSION_PATCHLEVEL -#ifndef PCI_CHIP_I810 #define PCI_CHIP_I810 0x7121 #define PCI_CHIP_I810_DC100 0x7123 #define PCI_CHIP_I810_E 0x7125 #define PCI_CHIP_I815 0x1132 -#define PCI_CHIP_I810_BRIDGE 0x7120 -#define PCI_CHIP_I810_DC100_BRIDGE 0x7122 -#define PCI_CHIP_I810_E_BRIDGE 0x7124 -#define PCI_CHIP_I815_BRIDGE 0x1130 -#endif -#ifndef PCI_CHIP_I830_M #define PCI_CHIP_I830_M 0x3577 -#define PCI_CHIP_I830_M_BRIDGE 0x3575 -#endif - -#ifndef PCI_CHIP_845_G #define PCI_CHIP_845_G 0x2562 -#define PCI_CHIP_845_G_BRIDGE 0x2560 -#endif - -#ifndef PCI_CHIP_I854 #define PCI_CHIP_I854 0x358E -#define PCI_CHIP_I854_BRIDGE 0x358C -#endif - -#ifndef PCI_CHIP_I855_GM #define PCI_CHIP_I855_GM 0x3582 -#define PCI_CHIP_I855_GM_BRIDGE 0x3580 -#endif - -#ifndef PCI_CHIP_I865_G #define PCI_CHIP_I865_G 0x2572 -#define PCI_CHIP_I865_G_BRIDGE 0x2570 -#endif -#ifndef PCI_CHIP_I915_G #define PCI_CHIP_I915_G 0x2582 -#define PCI_CHIP_I915_G_BRIDGE 0x2580 -#endif - -#ifndef PCI_CHIP_I915_GM #define PCI_CHIP_I915_GM 0x2592 -#define PCI_CHIP_I915_GM_BRIDGE 0x2590 -#endif - -#ifndef PCI_CHIP_E7221_G #define PCI_CHIP_E7221_G 0x258A -/* Same as I915_G_BRIDGE */ -#define PCI_CHIP_E7221_G_BRIDGE 0x2580 -#endif - -#ifndef PCI_CHIP_I945_G #define PCI_CHIP_I945_G 0x2772 -#define PCI_CHIP_I945_G_BRIDGE 0x2770 -#endif - -#ifndef PCI_CHIP_I945_GM #define PCI_CHIP_I945_GM 0x27A2 -#define PCI_CHIP_I945_GM_BRIDGE 0x27A0 -#endif - -#ifndef PCI_CHIP_I945_GME #define PCI_CHIP_I945_GME 0x27AE -#define PCI_CHIP_I945_GME_BRIDGE 0x27AC -#endif - -#ifndef PCI_CHIP_PINEVIEW_M #define PCI_CHIP_PINEVIEW_M 0xA011 -#define PCI_CHIP_PINEVIEW_M_BRIDGE 0xA010 #define PCI_CHIP_PINEVIEW_G 0xA001 -#define PCI_CHIP_PINEVIEW_G_BRIDGE 0xA000 -#endif - -#ifndef PCI_CHIP_G35_G -#define PCI_CHIP_G35_G 0x2982 -#define PCI_CHIP_G35_G_BRIDGE 0x2980 -#endif - -#ifndef PCI_CHIP_I965_Q -#define PCI_CHIP_I965_Q 0x2992 -#define PCI_CHIP_I965_Q_BRIDGE 0x2990 -#endif - -#ifndef PCI_CHIP_I965_G -#define PCI_CHIP_I965_G 0x29A2 -#define PCI_CHIP_I965_G_BRIDGE 0x29A0 -#endif - -#ifndef PCI_CHIP_I946_GZ -#define PCI_CHIP_I946_GZ 0x2972 -#define PCI_CHIP_I946_GZ_BRIDGE 0x2970 -#endif - -#ifndef PCI_CHIP_I965_GM -#define PCI_CHIP_I965_GM 0x2A02 -#define PCI_CHIP_I965_GM_BRIDGE 0x2A00 -#endif - -#ifndef PCI_CHIP_I965_GME -#define PCI_CHIP_I965_GME 0x2A12 -#define PCI_CHIP_I965_GME_BRIDGE 0x2A10 -#endif - -#ifndef PCI_CHIP_G33_G -#define PCI_CHIP_G33_G 0x29C2 -#define PCI_CHIP_G33_G_BRIDGE 0x29C0 -#endif - -#ifndef PCI_CHIP_Q35_G #define PCI_CHIP_Q35_G 0x29B2 -#define PCI_CHIP_Q35_G_BRIDGE 0x29B0 -#endif - -#ifndef PCI_CHIP_Q33_G +#define PCI_CHIP_G33_G 0x29C2 #define PCI_CHIP_Q33_G 0x29D2 -#define PCI_CHIP_Q33_G_BRIDGE 0x29D0 -#endif -#ifndef PCI_CHIP_GM45_GM +#define PCI_CHIP_G35_G 0x2982 +#define PCI_CHIP_I965_Q 0x2992 +#define PCI_CHIP_I965_G 0x29A2 +#define PCI_CHIP_I946_GZ 0x2972 +#define PCI_CHIP_I965_GM 0x2A02 +#define PCI_CHIP_I965_GME 0x2A12 #define PCI_CHIP_GM45_GM 0x2A42 -#define PCI_CHIP_GM45_BRIDGE 0x2A40 -#endif - -#ifndef PCI_CHIP_G45_E_G #define PCI_CHIP_G45_E_G 0x2E02 -#define PCI_CHIP_G45_E_G_BRIDGE 0x2E00 -#endif - -#ifndef PCI_CHIP_G45_G #define PCI_CHIP_G45_G 0x2E22 -#define PCI_CHIP_G45_G_BRIDGE 0x2E20 -#endif - -#ifndef PCI_CHIP_Q45_G #define PCI_CHIP_Q45_G 0x2E12 -#define PCI_CHIP_Q45_G_BRIDGE 0x2E10 -#endif - -#ifndef PCI_CHIP_G41_G #define PCI_CHIP_G41_G 0x2E32 -#define PCI_CHIP_G41_G_BRIDGE 0x2E30 -#endif - -#ifndef PCI_CHIP_B43_G #define PCI_CHIP_B43_G 0x2E42 -#define PCI_CHIP_B43_G_BRIDGE 0x2E40 -#endif - -#ifndef PCI_CHIP_B43_G1 #define PCI_CHIP_B43_G1 0x2E92 -#define PCI_CHIP_B43_G1_BRIDGE 0x2E90 -#endif -#ifndef PCI_CHIP_IRONLAKE_D_G #define PCI_CHIP_IRONLAKE_D_G 0x0042 -#define PCI_CHIP_IRONLAKE_D_G_BRIDGE 0x0040 -#endif - -#ifndef PCI_CHIP_IRONLAKE_M_G #define PCI_CHIP_IRONLAKE_M_G 0x0046 -#define PCI_CHIP_IRONLAKE_M_G_BRIDGE 0x0044 -#endif -#ifndef PCI_CHIP_SANDYBRIDGE_BRIDGE -#define PCI_CHIP_SANDYBRIDGE_BRIDGE 0x0100 /* Desktop */ #define PCI_CHIP_SANDYBRIDGE_GT1 0x0102 #define PCI_CHIP_SANDYBRIDGE_GT2 0x0112 #define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122 -#define PCI_CHIP_SANDYBRIDGE_BRIDGE_M 0x0104 /* Mobile */ #define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106 #define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116 #define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126 -#define PCI_CHIP_SANDYBRIDGE_BRIDGE_S 0x0108 /* Server */ #define PCI_CHIP_SANDYBRIDGE_S_GT 0x010A #define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156 @@ -194,87 +66,51 @@ #define PCI_CHIP_HASWELL_D_GT1 0x0402 #define PCI_CHIP_HASWELL_D_GT2 0x0412 -#define PCI_CHIP_HASWELL_D_GT2_PLUS 0x0422 +#define PCI_CHIP_HASWELL_D_GT3 0x0422 #define PCI_CHIP_HASWELL_M_GT1 0x0406 #define PCI_CHIP_HASWELL_M_GT2 0x0416 -#define PCI_CHIP_HASWELL_M_GT2_PLUS 0x0426 +#define PCI_CHIP_HASWELL_M_GT3 0x0426 #define PCI_CHIP_HASWELL_S_GT1 0x040A #define PCI_CHIP_HASWELL_S_GT2 0x041A -#define PCI_CHIP_HASWELL_S_GT2_PLUS 0x042A -#define PCI_CHIP_HASWELL_SDV_D_GT1 0x0C02 -#define PCI_CHIP_HASWELL_SDV_D_GT2 0x0C12 -#define PCI_CHIP_HASWELL_SDV_D_GT2_PLUS 0x0C22 -#define PCI_CHIP_HASWELL_SDV_M_GT1 0x0C06 -#define PCI_CHIP_HASWELL_SDV_M_GT2 0x0C16 -#define PCI_CHIP_HASWELL_SDV_M_GT2_PLUS 0x0C26 -#define PCI_CHIP_HASWELL_SDV_S_GT1 0x0C0A -#define PCI_CHIP_HASWELL_SDV_S_GT2 0x0C1A -#define PCI_CHIP_HASWELL_SDV_S_GT2_PLUS 0x0C2A +#define PCI_CHIP_HASWELL_S_GT3 0x042A +#define PCI_CHIP_HASWELL_B_GT1 0x040B +#define PCI_CHIP_HASWELL_B_GT2 0x041B +#define PCI_CHIP_HASWELL_B_GT3 0x042B +#define PCI_CHIP_HASWELL_E_GT1 0x040E +#define PCI_CHIP_HASWELL_E_GT2 0x041E +#define PCI_CHIP_HASWELL_E_GT3 0x042E + #define PCI_CHIP_HASWELL_ULT_D_GT1 0x0A02 #define PCI_CHIP_HASWELL_ULT_D_GT2 0x0A12 -#define PCI_CHIP_HASWELL_ULT_D_GT2_PLUS 0x0A22 +#define PCI_CHIP_HASWELL_ULT_D_GT3 0x0A22 #define PCI_CHIP_HASWELL_ULT_M_GT1 0x0A06 #define PCI_CHIP_HASWELL_ULT_M_GT2 0x0A16 -#define PCI_CHIP_HASWELL_ULT_M_GT2_PLUS 0x0A26 +#define PCI_CHIP_HASWELL_ULT_M_GT3 0x0A26 #define PCI_CHIP_HASWELL_ULT_S_GT1 0x0A0A #define PCI_CHIP_HASWELL_ULT_S_GT2 0x0A1A -#define PCI_CHIP_HASWELL_ULT_S_GT2_PLUS 0x0A2A -#define PCI_CHIP_HASWELL_CRW_D_GT1 0x0D12 -#define PCI_CHIP_HASWELL_CRW_D_GT2 0x0D22 -#define PCI_CHIP_HASWELL_CRW_D_GT2_PLUS 0x0D32 -#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D16 -#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D26 -#define PCI_CHIP_HASWELL_CRW_M_GT2_PLUS 0x0D36 -#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D1A -#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D2A -#define PCI_CHIP_HASWELL_CRW_S_GT2_PLUS 0x0D3A +#define PCI_CHIP_HASWELL_ULT_S_GT3 0x0A2A +#define PCI_CHIP_HASWELL_ULT_B_GT1 0x0A0B +#define PCI_CHIP_HASWELL_ULT_B_GT2 0x0A1B +#define PCI_CHIP_HASWELL_ULT_B_GT3 0x0A2B +#define PCI_CHIP_HASWELL_ULT_E_GT1 0x0A0E +#define PCI_CHIP_HASWELL_ULT_E_GT2 0x0A1E +#define PCI_CHIP_HASWELL_ULT_E_GT3 0x0A2E -#define PCI_CHIP_VALLEYVIEW_PO 0x0f30 -#define PCI_CHIP_VALLEYVIEW_1 0x0f31 -#define PCI_CHIP_VALLEYVIEW_2 0x0f32 -#define PCI_CHIP_VALLEYVIEW_3 0x0f33 - -#endif - -#define I85X_CAPID 0x44 -#define I85X_VARIANT_MASK 0x7 -#define I85X_VARIANT_SHIFT 5 -#define I855_GME 0x0 -#define I855_GM 0x4 -#define I852_GME 0x2 -#define I852_GM 0x5 - -#define I810_MEMBASE(p,n) (p)->regions[(n)].base_addr -#define VENDOR_ID(p) (p)->vendor_id -#define DEVICE_ID(p) (p)->device_id -#define SUBVENDOR_ID(p) (p)->subvendor_id -#define SUBSYS_ID(p) (p)->subdevice_id -#define CHIP_REVISION(p) (p)->revision - -#define INTEL_INFO(intel) ((intel)->info) -#define IS_GENx(intel, X) (INTEL_INFO(intel)->gen >= 8*(X) && INTEL_INFO(intel)->gen < 8*((X)+1)) -#define IS_GEN1(intel) IS_GENx(intel, 1) -#define IS_GEN2(intel) IS_GENx(intel, 2) -#define IS_GEN3(intel) IS_GENx(intel, 3) -#define IS_GEN4(intel) IS_GENx(intel, 4) -#define IS_GEN5(intel) IS_GENx(intel, 5) -#define IS_GEN6(intel) IS_GENx(intel, 6) -#define IS_GEN7(intel) IS_GENx(intel, 7) -#define IS_HSW(intel) (INTEL_INFO(intel)->gen == 075) - -/* Some chips have specific errata (or limits) that we need to workaround. */ -#define IS_I830(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_I830_M) -#define IS_845G(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_845_G) -#define IS_I865G(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_I865_G) - -#define IS_I915G(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I915_G || DEVICE_ID(pI810->PciInfo) == PCI_CHIP_E7221_G) -#define IS_I915GM(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I915_GM) - -#define IS_965_Q(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I965_Q) - -/* supports Y tiled surfaces (pre-965 Mesa isn't ready yet) */ -#define SUPPORTS_YTILING(pI810) (INTEL_INFO(intel)->gen >= 040) -#define HAS_BLT(pI810) (INTEL_INFO(intel)->gen >= 060) +#define PCI_CHIP_HASWELL_CRW_D_GT1 0x0D02 +#define PCI_CHIP_HASWELL_CRW_D_GT2 0x0D12 +#define PCI_CHIP_HASWELL_CRW_D_GT3 0x0D22 +#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D06 +#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D16 +#define PCI_CHIP_HASWELL_CRW_M_GT3 0x0D26 +#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D0A +#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D1A +#define PCI_CHIP_HASWELL_CRW_S_GT3 0x0D2A +#define PCI_CHIP_HASWELL_CRW_B_GT1 0x0D0B +#define PCI_CHIP_HASWELL_CRW_B_GT2 0x0D1B +#define PCI_CHIP_HASWELL_CRW_B_GT3 0x0D2B +#define PCI_CHIP_HASWELL_CRW_E_GT1 0x0D0E +#define PCI_CHIP_HASWELL_CRW_E_GT2 0x0D1E +#define PCI_CHIP_HASWELL_CRW_E_GT3 0x0D2E struct intel_device_info { int gen; diff --git a/drivers/video/Intel-2D/intel_list.h b/drivers/video/Intel-2D/intel_list.h index 5716fc6f10..38e4d52fd3 100644 --- a/drivers/video/Intel-2D/intel_list.h +++ b/drivers/video/Intel-2D/intel_list.h @@ -107,6 +107,7 @@ * There are no requirements for a list head, any struct list can be a list * head. */ + struct list { struct list *next, *prev; }; diff --git a/drivers/video/Intel-2D/kgem-sna.c b/drivers/video/Intel-2D/kgem.c similarity index 72% rename from drivers/video/Intel-2D/kgem-sna.c rename to drivers/video/Intel-2D/kgem.c index 3c9098ea85..d45f31a96f 100644 --- a/drivers/video/Intel-2D/kgem-sna.c +++ b/drivers/video/Intel-2D/kgem.c @@ -32,19 +32,20 @@ #include "sna.h" #include "sna_reg.h" -static inline -int user_free(void *mem) -{ - int val; - __asm__ __volatile__( - "int $0x40" - :"=a"(val) - :"a"(68),"b"(12),"c"(mem)); - return val; -} +#include +#include +#include +#ifdef HAVE_VALGRIND +#include +#include +#endif -unsigned int cpu_cache_size(); +#ifdef HAVE_STRUCT_SYSINFO_TOTALRAM +#include +#endif + +#include "sna_cpuid.h" static struct kgem_bo * search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); @@ -53,11 +54,13 @@ static struct kgem_bo * search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); #define DBG_NO_HW 0 -#define DBG_NO_TILING 1 +#define DBG_NO_TILING 0 #define DBG_NO_CACHE 0 #define DBG_NO_CACHE_LEVEL 0 #define DBG_NO_CPU 0 +#define DBG_NO_CREATE2 1 #define DBG_NO_USERPTR 0 +#define DBG_NO_UNSYNCHRONIZED_USERPTR 0 #define DBG_NO_LLC 0 #define DBG_NO_SEMAPHORES 0 #define DBG_NO_MADV 1 @@ -68,14 +71,16 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); #define DBG_NO_SECURE_BATCHES 0 #define DBG_NO_PINNED_BATCHES 0 #define DBG_NO_FAST_RELOC 0 -#define DBG_NO_HANDLE_LUT 0 +#define DBG_NO_HANDLE_LUT 1 +#define DBG_NO_WT 0 #define DBG_DUMP 0 +#define FORCE_MMAP_SYNC 0 /* ((1 << DOMAIN_CPU) | (1 << DOMAIN_GTT)) */ + #ifndef DEBUG_SYNC #define DEBUG_SYNC 0 #endif -#define SHOW_BATCH 1 #if 0 #define ASSERT_IDLE(kgem__, handle__) assert(!__kgem_busy(kgem__, handle__)) @@ -99,7 +104,6 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); #define MAX_CPU_VMA_CACHE INT16_MAX #define MAP_PRESERVE_TIME 10 -#define MAP(ptr) ((void*)((uintptr_t)(ptr) & ~3)) #define MAKE_CPU_MAP(ptr) ((void*)((uintptr_t)(ptr) | 1)) #define MAKE_USER_MAP(ptr) ((void*)((uintptr_t)(ptr) | 3)) #define IS_USER_MAP(ptr) ((uintptr_t)(ptr) & 2) @@ -115,13 +119,14 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); #define LOCAL_I915_PARAM_HAS_PINNED_BATCHES 24 #define LOCAL_I915_PARAM_HAS_NO_RELOC 25 #define LOCAL_I915_PARAM_HAS_HANDLE_LUT 26 +#define LOCAL_I915_PARAM_HAS_WT 27 #define LOCAL_I915_EXEC_IS_PINNED (1<<10) #define LOCAL_I915_EXEC_NO_RELOC (1<<11) #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12) struct local_i915_gem_userptr { uint64_t user_ptr; - uint32_t user_size; + uint64_t user_size; uint32_t flags; #define I915_USERPTR_READ_ONLY (1<<0) #define I915_USERPTR_UNSYNCHRONIZED (1<<31) @@ -130,13 +135,14 @@ struct local_i915_gem_userptr { #define UNCACHED 0 #define SNOOPED 1 +#define DISPLAY 2 -struct local_i915_gem_cacheing { +struct local_i915_gem_caching { uint32_t handle; - uint32_t cacheing; + uint32_t caching; }; -#define LOCAL_IOCTL_I915_GEM_SET_CACHEING SRV_I915_GEM_SET_CACHING +#define LOCAL_IOCTL_I915_GEM_SET_CACHING SRV_I915_GEM_SET_CACHING struct local_fbinfo { int width; @@ -181,6 +187,23 @@ static void debug_alloc__bo(struct kgem *kgem, struct kgem_bo *bo) #define debug_alloc__bo(k, b) #endif +#ifndef NDEBUG +static void assert_tiling(struct kgem *kgem, struct kgem_bo *bo) +{ + struct drm_i915_gem_get_tiling tiling; + + assert(bo); + + VG_CLEAR(tiling); + tiling.handle = bo->handle; + tiling.tiling_mode = -1; + (void)drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling); + assert(tiling.tiling_mode == bo->tiling); +} +#else +#define assert_tiling(kgem, bo) +#endif + static void kgem_sna_reset(struct kgem *kgem) { struct sna *sna = container_of(kgem, struct sna, kgem); @@ -206,27 +229,26 @@ static bool gem_set_tiling(int fd, uint32_t handle, int tiling, int stride) if (DBG_NO_TILING) return false; -/* + VG_CLEAR(set_tiling); do { set_tiling.handle = handle; set_tiling.tiling_mode = tiling; set_tiling.stride = stride; - ret = ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); - } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); -*/ - return false;//ret == 0; + ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); + } while (ret != 0); + return ret == 0; } -static bool gem_set_cacheing(int fd, uint32_t handle, int cacheing) +static bool gem_set_caching(int fd, uint32_t handle, int caching) { - struct local_i915_gem_cacheing arg; + struct local_i915_gem_caching arg; VG_CLEAR(arg); arg.handle = handle; - arg.cacheing = cacheing; - return drmIoctl(fd, LOCAL_IOCTL_I915_GEM_SET_CACHEING, &arg) == 0; + arg.caching = caching; + return drmIoctl(fd, LOCAL_IOCTL_I915_GEM_SET_CACHING, &arg) == 0; } @@ -265,13 +287,14 @@ static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo) DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__, bo->handle, bytes(bo))); assert(bo->proxy == NULL); + assert(!bo->snoop); + assert(kgem_bo_can_map(kgem, bo)); retry_gtt: VG_CLEAR(mmap_arg); mmap_arg.handle = bo->handle; if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg)) { - printf("%s: failed to retrieve GTT offset for handle=%d: %d\n", - __FUNCTION__, bo->handle, 0); + (void)__kgem_throttle_retire(kgem, 0); if (kgem_expire_cache(kgem)) goto retry_gtt; @@ -281,15 +304,17 @@ retry_gtt: goto retry_gtt; } + printf("%s: failed to retrieve GTT offset for handle=%d\n", + __FUNCTION__, bo->handle); return NULL; } retry_mmap: ptr = (void*)(int)mmap_arg.offset; if (ptr == NULL) { - printf("%s: failed to mmap %d, %d bytes, into GTT domain: %d\n", - __FUNCTION__, bo->handle, bytes(bo), 0); - + ErrorF("%s: failed to mmap handle=%d, %d bytes, into GTT domain\n", + __FUNCTION__, bo->handle, bytes(bo)); + ptr = NULL; } return ptr; @@ -387,6 +412,7 @@ bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo, kgem_bo_retire(kgem, bo); bo->domain = DOMAIN_NONE; } + bo->gtt_dirty = true; return true; } @@ -584,8 +610,6 @@ agp_aperture_size(struct pci_device *dev, unsigned gen) /* XXX assume that only future chipsets are unknown and follow * the post gen2 PCI layout. */ -// return dev->regions[gen < 030 ? 0 : 2].size; - return 0; } @@ -603,6 +627,49 @@ total_ram_size(void) return size != -1 ? size : 0; } +static unsigned +cpu_cache_size__cpuid4(void) +{ + /* Deterministic Cache Parmaeters (Function 04h)": + * When EAX is initialized to a value of 4, the CPUID instruction + * returns deterministic cache information in the EAX, EBX, ECX + * and EDX registers. This function requires ECX be initialized + * with an index which indicates which cache to return information + * about. The OS is expected to call this function (CPUID.4) with + * ECX = 0, 1, 2, until EAX[4:0] == 0, indicating no more caches. + * The order in which the caches are returned is not specified + * and may change at Intel's discretion. + * + * Calculating the Cache Size in bytes: + * = (Ways +1) * (Partitions +1) * (Line Size +1) * (Sets +1) + */ + + unsigned int eax, ebx, ecx, edx; + unsigned int llc_size = 0; + int cnt = 0; + + if (__get_cpuid_max(BASIC_CPUID, NULL) < 4) + return 0; + + do { + unsigned associativity, line_partitions, line_size, sets; + + __cpuid_count(4, cnt++, eax, ebx, ecx, edx); + + if ((eax & 0x1f) == 0) + break; + + associativity = ((ebx >> 22) & 0x3ff) + 1; + line_partitions = ((ebx >> 12) & 0x3ff) + 1; + line_size = (ebx & 0xfff) + 1; + sets = ecx + 1; + + llc_size = associativity * line_partitions * line_size * sets; + } while (1); + + return llc_size; +} + static int gem_param(struct kgem *kgem, int name) { drm_i915_getparam_t gp; @@ -639,6 +706,14 @@ static bool test_has_handle_lut(struct kgem *kgem) return gem_param(kgem, LOCAL_I915_PARAM_HAS_HANDLE_LUT) > 0; } +static bool test_has_wt(struct kgem *kgem) +{ + if (DBG_NO_WT) + return false; + + return gem_param(kgem, LOCAL_I915_PARAM_HAS_WT) > 0; +} + static bool test_has_semaphores_enabled(struct kgem *kgem) { bool detected = false; @@ -719,7 +794,7 @@ static bool test_has_llc(struct kgem *kgem) return has_llc; } -static bool test_has_cacheing(struct kgem *kgem) +static bool test_has_caching(struct kgem *kgem) { uint32_t handle; bool ret; @@ -735,7 +810,7 @@ static bool test_has_cacheing(struct kgem *kgem) if (handle == 0) return false; - ret = gem_set_cacheing(kgem->fd, handle, UNCACHED); + ret = gem_set_caching(kgem->fd, handle, UNCACHED); gem_close(kgem->fd, handle); return ret; } @@ -753,7 +828,9 @@ static bool test_has_userptr(struct kgem *kgem) if (kgem->gen == 040) return false; - ptr = malloc(PAGE_SIZE); + if (posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE)) + return false; + handle = gem_userptr(kgem->fd, ptr, PAGE_SIZE, false); gem_close(kgem->fd, handle); free(ptr); @@ -764,6 +841,26 @@ static bool test_has_userptr(struct kgem *kgem) #endif } +static bool test_has_create2(struct kgem *kgem) +{ +#if defined(USE_CREATE2) + struct local_i915_gem_create2 args; + + if (DBG_NO_CREATE2) + return false; + + memset(&args, 0, sizeof(args)); + args.size = PAGE_SIZE; + args.caching = DISPLAY; + if (drmIoctl(kgem->fd, LOCAL_IOCTL_I915_GEM_CREATE2, &args) == 0) + gem_close(kgem->fd, args.handle); + + return args.handle != 0; +#else + return false; +#endif +} + static bool test_has_secure_batches(struct kgem *kgem) { if (DBG_NO_SECURE_BATCHES) @@ -908,19 +1005,29 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) DBG(("%s: has shared last-level-cache? %d\n", __FUNCTION__, kgem->has_llc)); - kgem->has_cacheing = test_has_cacheing(kgem); + kgem->has_wt = test_has_wt(kgem); + DBG(("%s: has write-through caching for scanouts? %d\n", __FUNCTION__, + kgem->has_wt)); + + kgem->has_caching = test_has_caching(kgem); DBG(("%s: has set-cache-level? %d\n", __FUNCTION__, - kgem->has_cacheing)); + kgem->has_caching)); kgem->has_userptr = test_has_userptr(kgem); DBG(("%s: has userptr? %d\n", __FUNCTION__, kgem->has_userptr)); + kgem->has_create2 = test_has_create2(kgem); + kgem->has_create2 = 0; + DBG(("%s: has create2? %d\n", __FUNCTION__, + kgem->has_create2)); + kgem->has_no_reloc = test_has_no_reloc(kgem); DBG(("%s: has no-reloc? %d\n", __FUNCTION__, kgem->has_no_reloc)); kgem->has_handle_lut = test_has_handle_lut(kgem); + kgem->has_handle_lut = 0; DBG(("%s: has handle-lut? %d\n", __FUNCTION__, kgem->has_handle_lut)); @@ -970,19 +1077,19 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) DBG(("%s: maximum batch size? %d\n", __FUNCTION__, kgem->batch_size)); - kgem->min_alignment = 16; + kgem->min_alignment = 4; if (gen < 040) kgem->min_alignment = 64; kgem->half_cpu_cache_pages = cpu_cache_size() >> 13; - DBG(("%s: half cpu cache %d pages\n", __FUNCTION__, - kgem->half_cpu_cache_pages)); + DBG(("%s: last-level cache size: %d bytes, threshold in pages: %d\n", + __FUNCTION__, cpu_cache_size(), kgem->half_cpu_cache_pages)); kgem->next_request = __kgem_request_alloc(kgem); DBG(("%s: cpu bo enabled %d: llc? %d, set-cache-level? %d, userptr? %d\n", __FUNCTION__, - !DBG_NO_CPU && (kgem->has_llc | kgem->has_userptr | kgem->has_cacheing), - kgem->has_llc, kgem->has_cacheing, kgem->has_userptr)); + !DBG_NO_CPU && (kgem->has_llc | kgem->has_userptr | kgem->has_caching), + kgem->has_llc, kgem->has_caching, kgem->has_userptr)); VG_CLEAR(aperture); aperture.aper_size = 0; @@ -1019,12 +1126,14 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) kgem->buffer_size *= 2; if (kgem->buffer_size >> 12 > kgem->half_cpu_cache_pages) kgem->buffer_size = kgem->half_cpu_cache_pages << 12; + kgem->buffer_size = 1 << __fls(kgem->buffer_size); DBG(("%s: buffer size=%d [%d KiB]\n", __FUNCTION__, kgem->buffer_size, kgem->buffer_size / 1024)); + assert(kgem->buffer_size); kgem->max_object_size = 3 * (kgem->aperture_high >> 12) << 10; kgem->max_gpu_size = kgem->max_object_size; - if (!kgem->has_llc) + if (!kgem->has_llc && kgem->max_gpu_size > MAX_CACHE_SIZE) kgem->max_gpu_size = MAX_CACHE_SIZE; totalram = total_ram_size(); @@ -1033,7 +1142,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) __FUNCTION__)); totalram = kgem->aperture_total; } - DBG(("%s: total ram=%u\n", __FUNCTION__, totalram)); + DBG(("%s: total ram=%ld\n", __FUNCTION__, (long)totalram)); if (kgem->max_object_size > totalram / 2) kgem->max_object_size = totalram / 2; if (kgem->max_gpu_size > totalram / 4) @@ -1052,12 +1161,24 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) kgem->max_upload_tile_size = kgem->aperture_mappable / 4; if (kgem->max_upload_tile_size > half_gpu_max) kgem->max_upload_tile_size = half_gpu_max; + if (kgem->max_upload_tile_size > kgem->aperture_high/2) + kgem->max_upload_tile_size = kgem->aperture_high/2; + if (kgem->max_upload_tile_size > kgem->aperture_low) + kgem->max_upload_tile_size = kgem->aperture_low; + if (kgem->max_upload_tile_size < 16*PAGE_SIZE) + kgem->max_upload_tile_size = 16*PAGE_SIZE; kgem->large_object_size = MAX_CACHE_SIZE; - if (kgem->large_object_size > kgem->max_gpu_size) - kgem->large_object_size = kgem->max_gpu_size; + if (kgem->large_object_size > half_gpu_max) + kgem->large_object_size = half_gpu_max; + if (kgem->max_copy_tile_size > kgem->aperture_high/2) + kgem->max_copy_tile_size = kgem->aperture_high/2; + if (kgem->max_copy_tile_size > kgem->aperture_low) + kgem->max_copy_tile_size = kgem->aperture_low; + if (kgem->max_copy_tile_size < 16*PAGE_SIZE) + kgem->max_copy_tile_size = 16*PAGE_SIZE; - if (kgem->has_llc | kgem->has_cacheing | kgem->has_userptr) { + if (kgem->has_llc | kgem->has_caching | kgem->has_userptr) { if (kgem->large_object_size > kgem->max_cpu_size) kgem->large_object_size = kgem->max_cpu_size; } else @@ -1093,7 +1214,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) } /* XXX hopefully a good approximation */ -static uint32_t kgem_get_unique_id(struct kgem *kgem) +uint32_t kgem_get_unique_id(struct kgem *kgem) { uint32_t id; id = ++kgem->unique_id; @@ -1111,12 +1232,43 @@ inline static uint32_t kgem_pitch_alignment(struct kgem *kgem, unsigned flags) return kgem->min_alignment; } -static uint32_t kgem_untiled_pitch(struct kgem *kgem, - uint32_t width, uint32_t bpp, - unsigned flags) +void kgem_get_tile_size(struct kgem *kgem, int tiling, + int *tile_width, int *tile_height, int *tile_size) { - width = ALIGN(width, 2) * bpp >> 3; - return ALIGN(width, kgem_pitch_alignment(kgem, flags)); + if (kgem->gen <= 030) { + if (tiling) { + if (kgem->gen < 030) { + *tile_width = 128; + *tile_height = 16; + *tile_size = 2048; + } else { + *tile_width = 512; + *tile_height = 8; + *tile_size = 4096; + } + } else { + *tile_width = 1; + *tile_height = 1; + *tile_size = 1; + } + } else switch (tiling) { + default: + case I915_TILING_NONE: + *tile_width = 1; + *tile_height = 1; + *tile_size = 1; + break; + case I915_TILING_X: + *tile_width = 512; + *tile_height = 8; + *tile_size = 4096; + break; + case I915_TILING_Y: + *tile_width = 128; + *tile_height = 32; + *tile_size = 4096; + break; + } } uint32_t kgem_surface_size(struct kgem *kgem, @@ -1133,6 +1285,7 @@ uint32_t kgem_surface_size(struct kgem *kgem, assert(width <= MAXSHORT); assert(height <= MAXSHORT); + assert(bpp >= 8); if (kgem->gen <= 030) { if (tiling) { @@ -1268,6 +1421,7 @@ static void kgem_fixup_self_relocs(struct kgem *kgem, struct kgem_bo *bo) { int n; + assert(kgem->nreloc__self <= 256); if (kgem->nreloc__self == 0) return; @@ -1329,8 +1483,8 @@ static void kgem_bo_release_map(struct kgem *kgem, struct kgem_bo *bo) static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo) { DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); - assert(bo->refcnt == 0); + assert(bo->proxy == NULL); assert(bo->exec == NULL); assert(!bo->snoop || bo->rq == NULL); @@ -1343,9 +1497,11 @@ static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo) if (IS_USER_MAP(bo->map)) { assert(bo->rq == NULL); - assert(MAP(bo->map) != bo || bo->io); - if (bo != MAP(bo->map)) { + assert(!__kgem_busy(kgem, bo->handle)); + assert(MAP(bo->map) != bo || bo->io || bo->flush); + if (!(bo->io || bo->flush)) { DBG(("%s: freeing snooped base\n", __FUNCTION__)); + assert(bo != MAP(bo->map)); free(MAP(bo->map)); } bo->map = NULL; @@ -1353,6 +1509,7 @@ static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo) if (bo->map) kgem_bo_release_map(kgem, bo); assert(list_is_empty(&bo->vma)); + assert(bo->map == NULL); _list_del(&bo->list); _list_del(&bo->request); @@ -1378,8 +1535,11 @@ inline static void kgem_bo_move_to_inactive(struct kgem *kgem, assert(!bo->proxy); assert(!bo->io); assert(!bo->scanout); + assert(!bo->snoop); + assert(!bo->flush); assert(!bo->needs_flush); assert(list_is_empty(&bo->vma)); + assert_tiling(kgem, bo); ASSERT_IDLE(kgem, bo->handle); kgem->need_expire = true; @@ -1458,30 +1618,6 @@ inline static void kgem_bo_remove_from_active(struct kgem *kgem, assert(list_is_empty(&bo->vma)); } -static void kgem_bo_clear_scanout(struct kgem *kgem, struct kgem_bo *bo) -{ - assert(bo->scanout); - assert(!bo->refcnt); - assert(bo->exec == NULL); - assert(bo->proxy == NULL); - - DBG(("%s: handle=%d, fb=%d (reusable=%d)\n", - __FUNCTION__, bo->handle, bo->delta, bo->reusable)); - if (bo->delta) { - /* XXX will leak if we are not DRM_MASTER. *shrug* */ -// drmModeRmFB(kgem->fd, bo->delta); - bo->delta = 0; - } - - bo->scanout = false; - bo->flush = false; - bo->reusable = true; - - if (kgem->has_llc && - !gem_set_cacheing(kgem->fd, bo->handle, SNOOPED)) - bo->reusable = false; -} - static void _kgem_bo_delete_buffer(struct kgem *kgem, struct kgem_bo *bo) { struct kgem_buffer *io = (struct kgem_buffer *)bo->proxy; @@ -1498,9 +1634,17 @@ static void kgem_bo_move_to_scanout(struct kgem *kgem, struct kgem_bo *bo) assert(bo->refcnt == 0); assert(bo->scanout); assert(bo->delta); + assert(!bo->flush); assert(!bo->snoop); assert(!bo->io); + if (bo->purged) { + DBG(("%s: discarding purged scanout - external name?\n", + __FUNCTION__)); + kgem_bo_free(kgem, bo); + return; + } + DBG(("%s: moving %d [fb %d] to scanout cache, active? %d\n", __FUNCTION__, bo->handle, bo->delta, bo->rq != NULL)); if (bo->rq) @@ -1511,6 +1655,9 @@ static void kgem_bo_move_to_scanout(struct kgem *kgem, struct kgem_bo *bo) static void kgem_bo_move_to_snoop(struct kgem *kgem, struct kgem_bo *bo) { + assert(bo->reusable); + assert(!bo->flush); + assert(!bo->needs_flush); assert(bo->refcnt == 0); assert(bo->exec == NULL); @@ -1535,7 +1682,7 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags) DBG(("%s: num_pages=%d, flags=%x\n", __FUNCTION__, num_pages, flags)); - if ((kgem->has_cacheing | kgem->has_userptr) == 0) + if ((kgem->has_caching | kgem->has_userptr) == 0) return NULL; if (list_is_empty(&kgem->snoop)) { @@ -1550,6 +1697,7 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags) assert(bo->refcnt == 0); assert(bo->snoop); assert(!bo->scanout); + assert(!bo->purged); assert(bo->proxy == NULL); assert(bo->tiling == I915_TILING_NONE); assert(bo->rq == NULL); @@ -1586,14 +1734,32 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags) return NULL; } +void kgem_bo_undo(struct kgem *kgem, struct kgem_bo *bo) +{ + if (kgem->nexec != 1 || bo->exec == NULL) + return; + + DBG(("%s: only handle in batch, discarding last operations for handle=%d\n", + __FUNCTION__, bo->handle)); + + assert(bo->exec == &kgem->exec[0]); + assert(kgem->exec[0].handle == bo->handle); + assert(RQ(bo->rq) == kgem->next_request); + + bo->refcnt++; + kgem_reset(kgem); + bo->refcnt--; +} + static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) { DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); assert(list_is_empty(&bo->list)); assert(bo->refcnt == 0); - assert(!bo->purged); + assert(!bo->purged || !bo->reusable); assert(bo->proxy == NULL); + assert_tiling(kgem, bo); bo->binding.offset = 0; @@ -1602,16 +1768,16 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) if (bo->snoop && !bo->flush) { DBG(("%s: handle=%d is snooped\n", __FUNCTION__, bo->handle)); - assert(!bo->flush); + assert(bo->reusable); assert(list_is_empty(&bo->list)); if (bo->exec == NULL && bo->rq && !__kgem_busy(kgem, bo->handle)) __kgem_bo_clear_busy(bo); - if (bo->rq == NULL) { - assert(!bo->needs_flush); + if (bo->rq == NULL) kgem_bo_move_to_snoop(kgem, bo); - } return; } + if (!IS_USER_MAP(bo->map)) + bo->flush = false; if (bo->scanout) { kgem_bo_move_to_scanout(kgem, bo); @@ -1631,20 +1797,13 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) assert(list_is_empty(&bo->vma)); assert(list_is_empty(&bo->list)); + assert(bo->flush == false); assert(bo->snoop == false); assert(bo->io == false); assert(bo->scanout == false); - if (bo->exec && kgem->nexec == 1) { - DBG(("%s: only handle in batch, discarding last operations\n", - __FUNCTION__)); - assert(bo->exec == &kgem->exec[0]); - assert(kgem->exec[0].handle == bo->handle); - assert(RQ(bo->rq) == kgem->next_request); - bo->refcnt = 1; - kgem_reset(kgem); - bo->refcnt = 0; - } + kgem_bo_undo(kgem, bo); + assert(bo->refcnt == 0); if (bo->rq && bo->exec == NULL && !__kgem_busy(kgem, bo->handle)) __kgem_bo_clear_busy(bo); @@ -1764,7 +1923,7 @@ static bool kgem_retire__flushing(struct kgem *kgem) int count = 0; list_for_each_entry(bo, &kgem->flushing, request) count++; - printf("%s: %d bo on flushing list\n", __FUNCTION__, count); + ErrorF("%s: %d bo on flushing list\n", __FUNCTION__, count); } #endif @@ -1872,7 +2031,7 @@ static bool kgem_retire__requests_ring(struct kgem *kgem, int ring) struct kgem_request, list)->bo; - printf("%s: ring=%d, %d outstanding requests, oldest=%d\n", + ErrorF("%s: ring=%d, %d outstanding requests, oldest=%d\n", __FUNCTION__, ring, count, bo ? bo->handle : 0); } #endif @@ -1945,10 +2104,9 @@ static void kgem_commit(struct kgem *kgem) DBG(("%s: release handle=%d (proxy? %d), dirty? %d flush? %d, snoop? %d -> offset=%x\n", __FUNCTION__, bo->handle, bo->proxy != NULL, - bo->dirty, bo->needs_flush, bo->snoop, + bo->gpu_dirty, bo->needs_flush, bo->snoop, (unsigned)bo->exec->offset)); - assert(!bo->purged); assert(bo->exec); assert(bo->proxy == NULL || bo->exec == &_kgem_dummy_exec); assert(RQ(bo->rq) == rq || (RQ(bo->proxy->rq) == rq)); @@ -1965,7 +2123,7 @@ static void kgem_commit(struct kgem *kgem) bo->binding.offset = 0; bo->domain = DOMAIN_GPU; - bo->dirty = false; + bo->gpu_dirty = false; if (bo->proxy) { /* proxies are not used for domain tracking */ @@ -1993,6 +2151,7 @@ static void kgem_commit(struct kgem *kgem) kgem_retire(kgem); assert(list_is_empty(&rq->buffers)); + assert(rq->bo->map == NULL); gem_close(kgem->fd, rq->bo->handle); kgem_cleanup_cache(kgem); } else { @@ -2022,9 +2181,9 @@ static void kgem_finish_buffers(struct kgem *kgem) struct kgem_buffer *bo, *next; list_for_each_entry_safe(bo, next, &kgem->batch_buffers, base.list) { - DBG(("%s: buffer handle=%d, used=%d, exec?=%d, write=%d, mmapped=%d\n", + DBG(("%s: buffer handle=%d, used=%d, exec?=%d, write=%d, mmapped=%s\n", __FUNCTION__, bo->base.handle, bo->used, bo->base.exec!=NULL, - bo->write, bo->mmapped)); + bo->write, bo->mmapped ? IS_CPU_MAP(bo->base.map) ? "cpu" : "gtt" : "no")); assert(next->base.list.prev == &bo->base.list); assert(bo->base.io); @@ -2116,11 +2275,11 @@ static void kgem_finish_buffers(struct kgem *kgem) list_replace(&bo->base.request, &shrink->request); list_init(&bo->base.request); - shrink->needs_flush = bo->base.dirty; + shrink->needs_flush = bo->base.gpu_dirty; bo->base.exec = NULL; bo->base.rq = NULL; - bo->base.dirty = false; + bo->base.gpu_dirty = false; bo->base.needs_flush = false; bo->used = 0; @@ -2161,11 +2320,11 @@ static void kgem_finish_buffers(struct kgem *kgem) list_replace(&bo->base.request, &shrink->request); list_init(&bo->base.request); - shrink->needs_flush = bo->base.dirty; + shrink->needs_flush = bo->base.gpu_dirty; bo->base.exec = NULL; bo->base.rq = NULL; - bo->base.dirty = false; + bo->base.gpu_dirty = false; bo->base.needs_flush = false; bo->used = 0; @@ -2211,7 +2370,7 @@ static void kgem_cleanup(struct kgem *kgem) request); bo->exec = NULL; - bo->dirty = false; + bo->gpu_dirty = false; __kgem_bo_clear_busy(bo); if (bo->refcnt == 0) kgem_bo_free(kgem, bo); @@ -2276,17 +2435,27 @@ void kgem_reset(struct kgem *kgem) bo->binding.offset = 0; bo->exec = NULL; bo->target_handle = -1; - bo->dirty = false; + bo->gpu_dirty = false; if (bo->needs_flush && __kgem_busy(kgem, bo->handle)) { + assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE); list_add(&bo->request, &kgem->flushing); bo->rq = (void *)kgem; } else __kgem_bo_clear_busy(bo); - if (!bo->refcnt && !bo->reusable) { - assert(!bo->snoop); - DBG(("%s: discarding handle=%d\n", + if (bo->refcnt || bo->rq) + continue; + + if (bo->snoop) { + kgem_bo_move_to_snoop(kgem, bo); + } else if (bo->scanout) { + kgem_bo_move_to_scanout(kgem, bo); + } else if ((bo = kgem_bo_replace_io(bo))->reusable && + kgem_bo_set_purgeable(kgem, bo)) { + kgem_bo_move_to_inactive(kgem, bo); + } else { + DBG(("%s: closing %d\n", __FUNCTION__, bo->handle)); kgem_bo_free(kgem, bo); } @@ -2429,8 +2598,9 @@ void _kgem_submit(struct kgem *kgem) batch_end = kgem_end_batch(kgem); kgem_sna_flush(kgem); - DBG(("batch[%d/%d]: %d %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d\n", - kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface, kgem->batch_size, + DBG(("batch[%d/%d, flags=%x]: %d %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d\n", + kgem->mode, kgem->ring, kgem->batch_flags, + batch_end, kgem->nbatch, kgem->surface, kgem->batch_size, kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture)); assert(kgem->nbatch <= kgem->batch_size); @@ -2479,19 +2649,11 @@ void _kgem_submit(struct kgem *kgem) struct drm_i915_gem_execbuffer2 execbuf; int ret, retry = 3; - VG_CLEAR(execbuf); + memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)kgem->exec; execbuf.buffer_count = kgem->nexec; - execbuf.batch_start_offset = 0; execbuf.batch_len = batch_end*sizeof(uint32_t); - execbuf.cliprects_ptr = 0; - execbuf.num_cliprects = 0; - execbuf.DR1 = 0; - execbuf.DR4 = 0; execbuf.flags = kgem->ring | kgem->batch_flags; - execbuf.rsvd1 = 0; - execbuf.rsvd2 = 0; - ret = drmIoctl(kgem->fd, @@ -2609,6 +2771,17 @@ void kgem_purge_cache(struct kgem *kgem) kgem->need_purge = false; } + +void kgem_clean_large_cache(struct kgem *kgem) +{ + while (!list_is_empty(&kgem->large_inactive)) { + kgem_bo_free(kgem, + list_first_entry(&kgem->large_inactive, + struct kgem_bo, list)); + + } +} + bool kgem_expire_cache(struct kgem *kgem) { time_t now, expire; @@ -2631,22 +2804,7 @@ bool kgem_expire_cache(struct kgem *kgem) free(rq); } - while (!list_is_empty(&kgem->large_inactive)) { - kgem_bo_free(kgem, - list_first_entry(&kgem->large_inactive, - struct kgem_bo, list)); - - } - - while (!list_is_empty(&kgem->scanout)) { - bo = list_first_entry(&kgem->scanout, struct kgem_bo, list); - if (__kgem_busy(kgem, bo->handle)) - break; - - list_del(&bo->list); - kgem_bo_clear_scanout(kgem, bo); - __kgem_bo_destroy(kgem, bo); - } + kgem_clean_large_cache(kgem); expire = 0; list_for_each_entry(bo, &kgem->snoop, list) { @@ -2800,6 +2958,8 @@ void kgem_cleanup_cache(struct kgem *kgem) struct kgem_bo, list)); } + kgem_clean_large_cache(kgem); + while (!list_is_empty(&kgem->snoop)) kgem_bo_free(kgem, list_last_entry(&kgem->snoop, @@ -2822,11 +2982,63 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags) bool use_active = (flags & CREATE_INACTIVE) == 0; struct list *cache; - DBG(("%s: num_pages=%d, flags=%x, use_active? %d\n", - __FUNCTION__, num_pages, flags, use_active)); + DBG(("%s: num_pages=%d, flags=%x, use_active? %d, use_large=%d [max=%d]\n", + __FUNCTION__, num_pages, flags, use_active, + num_pages >= MAX_CACHE_SIZE / PAGE_SIZE, + MAX_CACHE_SIZE / PAGE_SIZE)); + + assert(num_pages); + + if (num_pages >= MAX_CACHE_SIZE / PAGE_SIZE) { + DBG(("%s: searching large buffers\n", __FUNCTION__)); +retry_large: + cache = use_active ? &kgem->large : &kgem->large_inactive; + list_for_each_entry_safe(bo, first, cache, list) { + assert(bo->refcnt == 0); + assert(bo->reusable); + assert(!bo->scanout); + + if (num_pages > num_pages(bo)) + goto discard; + + if (bo->tiling != I915_TILING_NONE) { + if (use_active) + goto discard; + + if (!gem_set_tiling(kgem->fd, bo->handle, + I915_TILING_NONE, 0)) + goto discard; + + bo->tiling = I915_TILING_NONE; + bo->pitch = 0; + } + + if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) + goto discard; + + list_del(&bo->list); + if (bo->rq == (void *)kgem) + list_del(&bo->request); + + bo->delta = 0; + assert_tiling(kgem, bo); + return bo; + +discard: + if (!use_active) + kgem_bo_free(kgem, bo); + } + + if (use_active) { + use_active = false; + goto retry_large; + } + + if (__kgem_throttle_retire(kgem, flags)) + goto retry_large; - if (num_pages >= MAX_CACHE_SIZE / PAGE_SIZE) return NULL; + } if (!use_active && list_is_empty(inactive(kgem, num_pages))) { DBG(("%s: inactive and cache bucket empty\n", @@ -2892,6 +3104,7 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags) __FUNCTION__, bo->handle, num_pages(bo))); assert(use_active || bo->domain != DOMAIN_GPU); assert(!bo->needs_flush); + assert_tiling(kgem, bo); ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active); return bo; } @@ -2980,6 +3193,7 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags) assert(list_is_empty(&bo->list)); assert(use_active || bo->domain != DOMAIN_GPU); assert(!bo->needs_flush || use_active); + assert_tiling(kgem, bo); ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active); return bo; } @@ -3014,13 +3228,14 @@ struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags) uint32_t handle; DBG(("%s(%d)\n", __FUNCTION__, size)); + assert(size); if (flags & CREATE_GTT_MAP && kgem->has_llc) { flags &= ~CREATE_GTT_MAP; flags |= CREATE_CPU_MAP; } - size = (size + PAGE_SIZE - 1) / PAGE_SIZE; + size = NUM_PAGES(size); bo = search_linear_cache(kgem, size, CREATE_INACTIVE | flags); if (bo) { assert(bo->domain != DOMAIN_GPU); @@ -3052,6 +3267,7 @@ inline int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo) unsigned int size; assert(bo->tiling); + assert_tiling(kgem, bo); assert(kgem->gen < 040); if (kgem->gen < 030) @@ -3073,16 +3289,17 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, { struct list *cache; struct kgem_bo *bo; - uint32_t pitch, untiled_pitch, tiled_height, size; + uint32_t pitch, tiled_height, size; uint32_t handle; int i, bucket, retry; + bool exact = flags & (CREATE_EXACT | CREATE_SCANOUT); if (tiling < 0) - tiling = -tiling, flags |= CREATE_EXACT; + exact = true, tiling = -tiling; + DBG(("%s(%dx%d, bpp=%d, tiling=%d, exact=%d, inactive=%d, cpu-mapping=%d, gtt-mapping=%d, scanout?=%d, prime?=%d, temp?=%d)\n", __FUNCTION__, - width, height, bpp, tiling, - !!(flags & CREATE_EXACT), + width, height, bpp, tiling, exact, !!(flags & CREATE_INACTIVE), !!(flags & CREATE_CPU_MAP), !!(flags & CREATE_GTT_MAP), @@ -3097,11 +3314,13 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, bucket = cache_bucket(size); if (flags & CREATE_SCANOUT) { - assert((flags & CREATE_INACTIVE) == 0); + struct kgem_bo *last = NULL; + list_for_each_entry_reverse(bo, &kgem->scanout, list) { assert(bo->scanout); assert(bo->delta); - assert(!bo->purged); + assert(!bo->flush); + assert_tiling(kgem, bo); if (size > num_pages(bo) || num_pages(bo) > 2*size) continue; @@ -3116,15 +3335,37 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, bo->pitch = pitch; } + if (flags & CREATE_INACTIVE && bo->rq) { + last = bo; + continue; + } + list_del(&bo->list); bo->unique_id = kgem_get_unique_id(kgem); DBG((" 1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n", bo->pitch, bo->tiling, bo->handle, bo->unique_id)); assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + assert_tiling(kgem, bo); bo->refcnt = 1; return bo; } + + if (last) { + list_del(&last->list); + + last->unique_id = kgem_get_unique_id(kgem); + DBG((" 1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n", + last->pitch, last->tiling, last->handle, last->unique_id)); + assert(last->pitch*kgem_aligned_height(kgem, height, last->tiling) <= kgem_bo_size(last)); + assert_tiling(kgem, last); + last->refcnt = 1; + return last; + } + + bo = NULL; //__kgem_bo_create_as_display(kgem, size, tiling, pitch); + if (bo) + return bo; } if (bucket >= NUM_CACHE_BUCKETS) { @@ -3135,14 +3376,13 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, goto large_inactive; tiled_height = kgem_aligned_height(kgem, height, tiling); - untiled_pitch = kgem_untiled_pitch(kgem, width, bpp, flags); list_for_each_entry(bo, &kgem->large, list) { assert(!bo->purged); assert(!bo->scanout); assert(bo->refcnt == 0); assert(bo->reusable); - assert(bo->flush == true); + assert_tiling(kgem, bo); if (kgem->gen < 040) { if (bo->pitch < pitch) { @@ -3175,15 +3415,19 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", bo->pitch, bo->tiling, bo->handle, bo->unique_id)); assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + assert_tiling(kgem, bo); bo->refcnt = 1; + bo->flush = true; return bo; } large_inactive: + __kgem_throttle_retire(kgem, flags); list_for_each_entry(bo, &kgem->large_inactive, list) { assert(bo->refcnt == 0); assert(bo->reusable); assert(!bo->scanout); + assert_tiling(kgem, bo); if (size > num_pages(bo)) continue; @@ -3205,12 +3449,14 @@ large_inactive: list_del(&bo->list); + assert(bo->domain != DOMAIN_GPU); bo->unique_id = kgem_get_unique_id(kgem); bo->pitch = pitch; bo->delta = 0; DBG((" 1:from large inactive: pitch=%d, tiling=%d, handle=%d, id=%d\n", bo->pitch, bo->tiling, bo->handle, bo->unique_id)); assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + assert_tiling(kgem, bo); bo->refcnt = 1; return bo; } @@ -3236,6 +3482,7 @@ large_inactive: assert(bo->rq == NULL); assert(list_is_empty(&bo->request)); assert(bo->flush == false); + assert_tiling(kgem, bo); if (size > num_pages(bo)) { DBG(("inactive too small: %d < %d\n", @@ -3255,9 +3502,11 @@ large_inactive: break; } + assert(bo->tiling == tiling); bo->pitch = pitch; bo->delta = 0; bo->unique_id = kgem_get_unique_id(kgem); + bo->domain = DOMAIN_NONE; kgem_bo_remove_from_inactive(kgem, bo); @@ -3267,15 +3516,21 @@ large_inactive: assert(bo->domain != DOMAIN_GPU); ASSERT_IDLE(kgem, bo->handle); assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + assert_tiling(kgem, bo); bo->refcnt = 1; return bo; } } while (!list_is_empty(cache) && __kgem_throttle_retire(kgem, flags)); - if (flags & CREATE_CPU_MAP && !kgem->has_llc) + if (flags & CREATE_CPU_MAP && !kgem->has_llc) { + if (list_is_empty(&kgem->active[bucket][tiling]) && + list_is_empty(&kgem->inactive[bucket])) + flags &= ~CREATE_CACHED; + goto create; } + } if (flags & CREATE_INACTIVE) goto skip_active_search; @@ -3297,6 +3552,7 @@ search_again: assert(bo->tiling == tiling); assert(bo->flush == false); assert(!bo->scanout); + assert_tiling(kgem, bo); if (kgem->gen < 040) { if (bo->pitch < pitch) { @@ -3329,6 +3585,7 @@ search_again: DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", bo->pitch, bo->tiling, bo->handle, bo->unique_id)); assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + assert_tiling(kgem, bo); bo->refcnt = 1; return bo; } @@ -3341,6 +3598,7 @@ search_again: assert(!bo->scanout); assert(bo->tiling == tiling); assert(bo->flush == false); + assert_tiling(kgem, bo); if (num_pages(bo) < size) continue; @@ -3353,12 +3611,13 @@ search_again: DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", bo->pitch, bo->tiling, bo->handle, bo->unique_id)); assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + assert_tiling(kgem, bo); bo->refcnt = 1; return bo; } } - if (--retry && flags & CREATE_EXACT) { + if (--retry && exact) { if (kgem->gen >= 040) { for (i = I915_TILING_NONE; i <= I915_TILING_Y; i++) { if (i == tiling) @@ -3371,6 +3630,7 @@ search_again: assert(bo->reusable); assert(!bo->scanout); assert(bo->flush == false); + assert_tiling(kgem, bo); if (num_pages(bo) < size) continue; @@ -3389,6 +3649,7 @@ search_again: DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", bo->pitch, bo->tiling, bo->handle, bo->unique_id)); assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + assert_tiling(kgem, bo); bo->refcnt = 1; return bo; } @@ -3399,8 +3660,7 @@ search_again: goto search_again; } - if ((flags & CREATE_EXACT) == 0) { /* allow an active near-miss? */ - untiled_pitch = kgem_untiled_pitch(kgem, width, bpp, flags); + if (!exact) { /* allow an active near-miss? */ i = tiling; while (--i >= 0) { tiled_height = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags, @@ -3413,6 +3673,7 @@ search_again: assert(bo->reusable); assert(!bo->scanout); assert(bo->flush == false); + assert_tiling(kgem, bo); if (bo->tiling) { if (bo->pitch < pitch) { @@ -3422,7 +3683,7 @@ search_again: continue; } } else - bo->pitch = untiled_pitch; + bo->pitch = pitch; if (bo->pitch * tiled_height > bytes(bo)) continue; @@ -3434,6 +3695,7 @@ search_again: DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", bo->pitch, bo->tiling, bo->handle, bo->unique_id)); assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + assert_tiling(kgem, bo); bo->refcnt = 1; return bo; } @@ -3454,6 +3716,7 @@ search_inactive: assert(bo->reusable); assert(!bo->scanout); assert(bo->flush == false); + assert_tiling(kgem, bo); if (size > num_pages(bo)) { DBG(("inactive too small: %d < %d\n", @@ -3491,6 +3754,7 @@ search_inactive: assert((flags & CREATE_INACTIVE) == 0 || bo->domain != DOMAIN_GPU); ASSERT_MAYBE_IDLE(kgem, bo->handle, flags & CREATE_INACTIVE); assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + assert_tiling(kgem, bo); bo->refcnt = 1; return bo; } @@ -3509,6 +3773,9 @@ search_inactive: } create: + if (flags & CREATE_CACHED) + return NULL; + if (bucket >= NUM_CACHE_BUCKETS) size = ALIGN(size, 1024); handle = gem_create(kgem->fd, size); @@ -3521,19 +3788,28 @@ create: return NULL; } - bo->domain = DOMAIN_CPU; - bo->unique_id = kgem_get_unique_id(kgem); - bo->pitch = pitch; - if (tiling != I915_TILING_NONE && - gem_set_tiling(kgem->fd, handle, tiling, pitch)) - bo->tiling = tiling; if (bucket >= NUM_CACHE_BUCKETS) { DBG(("%s: marking large bo for automatic flushing\n", __FUNCTION__)); bo->flush = true; } + bo->unique_id = kgem_get_unique_id(kgem); + if (tiling == I915_TILING_NONE || + gem_set_tiling(kgem->fd, handle, tiling, pitch)) { + bo->tiling = tiling; + bo->pitch = pitch; + } else { + if (flags & CREATE_EXACT) { + if (bo->pitch != pitch || bo->tiling != tiling) { + kgem_bo_free(kgem, bo); + return NULL; + } + } + } + assert(bytes(bo) >= bo->pitch * kgem_aligned_height(kgem, height, bo->tiling)); + assert_tiling(kgem, bo); debug_alloc__bo(kgem, bo); @@ -3565,6 +3841,7 @@ struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem, return bo; assert(bo->tiling == I915_TILING_NONE); + assert_tiling(kgem, bo); if (kgem_bo_map__cpu(kgem, bo) == NULL) { kgem_bo_destroy(kgem, bo); @@ -3586,6 +3863,7 @@ struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem, bo = search_snoop_cache(kgem, NUM_PAGES(size), 0); if (bo) { assert(bo->tiling == I915_TILING_NONE); + assert_tiling(kgem, bo); assert(bo->snoop); bo->refcnt = 1; bo->pitch = stride; @@ -3593,14 +3871,15 @@ struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem, return bo; } - if (kgem->has_cacheing) { + if (kgem->has_caching) { bo = kgem_create_linear(kgem, size, flags); if (bo == NULL) return NULL; assert(bo->tiling == I915_TILING_NONE); + assert_tiling(kgem, bo); - if (!gem_set_cacheing(kgem->fd, bo->handle, SNOOPED)) { + if (!gem_set_caching(kgem->fd, bo->handle, SNOOPED)) { kgem_bo_destroy(kgem, bo); return NULL; } @@ -3630,7 +3909,6 @@ struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem, return NULL; } - bo->map = MAKE_USER_MAP(ptr); bo->pitch = stride; bo->unique_id = kgem_get_unique_id(kgem); return bo; @@ -3638,11 +3916,8 @@ struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem, return NULL; } - - #endif - void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) { DBG(("%s: handle=%d, proxy? %d\n", @@ -3662,7 +3937,7 @@ void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) __kgem_bo_destroy(kgem, bo); } -void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo) +static void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo) { assert(bo->rq); assert(bo->exec == NULL); @@ -3676,6 +3951,28 @@ void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo) __FUNCTION__, bo->handle, bo->rq != NULL)); } +void kgem_scanout_flush(struct kgem *kgem, struct kgem_bo *bo) +{ + kgem_bo_submit(kgem, bo); + if (!bo->needs_flush) + return; + + /* If the kernel fails to emit the flush, then it will be forced when + * we assume direct access. And as the usual failure is EIO, we do + * not actually care. + */ + assert(bo->exec == NULL); + if (bo->rq) + __kgem_flush(kgem, bo); + + /* Whatever actually happens, we can regard the GTT write domain + * as being flushed. + */ + bo->gtt_dirty = false; + bo->needs_flush = false; + bo->domain = DOMAIN_NONE; +} + inline static bool needs_semaphore(struct kgem *kgem, struct kgem_bo *bo) { return kgem->nreloc && bo->rq && RQ_RING(bo->rq) != kgem->ring; @@ -3784,8 +4081,7 @@ uint32_t kgem_add_reloc(struct kgem *kgem, if (bo->exec == NULL) kgem_add_bo(kgem, bo); - if (read_write_domain & 0x7fff && !bo->dirty) { - assert(!bo->snoop || kgem->can_blt_cpu); + if (read_write_domain & 0x7fff && !bo->gpu_dirty) { __kgem_bo_mark_dirty(bo); } return 0; @@ -3796,8 +4092,6 @@ uint32_t kgem_add_reloc(struct kgem *kgem, kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]); if (bo) { assert(bo->refcnt); - assert(!bo->purged); - while (bo->proxy) { DBG(("%s: adding proxy [delta=%d] for handle=%d\n", __FUNCTION__, bo->delta, bo->handle)); @@ -3812,13 +4106,13 @@ uint32_t kgem_add_reloc(struct kgem *kgem, bo->exec = &_kgem_dummy_exec; } - if (read_write_domain & 0x7fff && !bo->dirty) + if (read_write_domain & 0x7fff && !bo->gpu_dirty) __kgem_bo_mark_dirty(bo); bo = bo->proxy; assert(bo->refcnt); - assert(!bo->purged); } + assert(bo->refcnt); if (bo->exec == NULL) kgem_add_bo(kgem, bo); @@ -3840,7 +4134,7 @@ uint32_t kgem_add_reloc(struct kgem *kgem, kgem->reloc[index].target_handle = bo->target_handle; kgem->reloc[index].presumed_offset = bo->presumed_offset; - if (read_write_domain & 0x7fff && !bo->dirty) { + if (read_write_domain & 0x7fff && !bo->gpu_dirty) { assert(!bo->snoop || kgem->can_blt_cpu); __kgem_bo_mark_dirty(bo); } @@ -3920,9 +4214,10 @@ void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo) DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__, bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain)); - assert(!bo->purged); assert(bo->proxy == NULL); assert(list_is_empty(&bo->list)); + assert(!IS_USER_MAP(bo->map)); + assert_tiling(kgem, bo); if (bo->tiling == I915_TILING_NONE && !bo->scanout && kgem->has_llc) { DBG(("%s: converting request for GTT map into CPU map\n", @@ -3955,7 +4250,6 @@ void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo) return ptr; } - void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo) { void *ptr; @@ -3963,17 +4257,19 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo) DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__, bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain)); - assert(!bo->purged); assert(bo->proxy == NULL); assert(list_is_empty(&bo->list)); + assert(!IS_USER_MAP(bo->map)); assert(bo->exec == NULL); + assert_tiling(kgem, bo); if (bo->tiling == I915_TILING_NONE && !bo->scanout && (kgem->has_llc || bo->domain == DOMAIN_CPU)) { DBG(("%s: converting request for GTT map into CPU map\n", __FUNCTION__)); ptr = kgem_bo_map__cpu(kgem, bo); - kgem_bo_sync__cpu(kgem, bo); + if (ptr) + kgem_bo_sync__cpu(kgem, bo); return ptr; } @@ -4000,7 +4296,7 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo) DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle)); } - if (bo->domain != DOMAIN_GTT) { + if (bo->domain != DOMAIN_GTT || FORCE_MMAP_SYNC & (1 << DOMAIN_GTT)) { struct drm_i915_gem_set_domain set_domain; DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__, @@ -4015,6 +4311,7 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo) if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0) { kgem_bo_retire(kgem, bo); bo->domain = DOMAIN_GTT; + bo->gtt_dirty = true; } } @@ -4028,9 +4325,10 @@ void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo) DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__, bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain)); - assert(!bo->purged); assert(bo->exec == NULL); assert(list_is_empty(&bo->list)); + assert(!IS_USER_MAP(bo->map)); + assert_tiling(kgem, bo); if (IS_CPU_MAP(bo->map)) kgem_bo_release_map(kgem, bo); @@ -4059,7 +4357,11 @@ void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo) void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo) { - return kgem_bo_map__async(kgem, bo); + if (bo->map) + return MAP(bo->map); + + kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); + return bo->map = __kgem_bo_map__gtt(kgem, bo); } void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo) @@ -4070,7 +4372,6 @@ void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo) __FUNCTION__, bo->handle, bytes(bo), (int)__MAP_TYPE(bo->map))); assert(!bo->purged); assert(list_is_empty(&bo->list)); - assert(!bo->scanout); assert(bo->proxy == NULL); if (IS_CPU_MAP(bo->map)) @@ -4087,8 +4388,7 @@ retry: mmap_arg.offset = 0; mmap_arg.size = bytes(bo); if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg)) { - printf("%s: failed to mmap %d, %d bytes, into CPU domain: %d\n", - __FUNCTION__, bo->handle, bytes(bo), 0); + if (__kgem_throttle_retire(kgem, 0)) goto retry; @@ -4097,6 +4397,8 @@ retry: goto retry; } + ErrorF("%s: failed to mmap handle=%d, %d bytes, into CPU domain\n", + __FUNCTION__, bo->handle, bytes(bo)); return NULL; } @@ -4107,16 +4409,73 @@ retry: return (void *)(uintptr_t)mmap_arg.addr_ptr; } +void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo) +{ + struct drm_i915_gem_mmap mmap_arg; + + DBG(("%s(handle=%d, size=%d, mapped? %d)\n", + __FUNCTION__, bo->handle, bytes(bo), (int)__MAP_TYPE(bo->map))); + assert(bo->refcnt); + assert(!bo->purged); + assert(list_is_empty(&bo->list)); + assert(bo->proxy == NULL); + + if (IS_CPU_MAP(bo->map)) + return MAP(bo->map); + +retry: + VG_CLEAR(mmap_arg); + mmap_arg.handle = bo->handle; + mmap_arg.offset = 0; + mmap_arg.size = bytes(bo); + if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg)) { + int err = errno; + + assert(err != EINVAL); + + if (__kgem_throttle_retire(kgem, 0)) + goto retry; + + if (kgem->need_expire) { + kgem_cleanup_cache(kgem); + goto retry; + } + + ErrorF("%s: failed to mmap handle=%d, %d bytes, into CPU domain: %d\n", + __FUNCTION__, bo->handle, bytes(bo), err); + return NULL; + } + + VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo))); + if (bo->map && bo->domain == DOMAIN_CPU) { + DBG(("%s: discarding GTT vma for %d\n", __FUNCTION__, bo->handle)); + kgem_bo_release_map(kgem, bo); + } + if (bo->map == NULL) { + DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle)); + bo->map = MAKE_CPU_MAP(mmap_arg.addr_ptr); + } + return (void *)(uintptr_t)mmap_arg.addr_ptr; +} void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo) { - assert(bo->proxy == NULL); + DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); + assert(!bo->scanout); kgem_bo_submit(kgem, bo); - if (bo->domain != DOMAIN_CPU) { + /* SHM pixmaps use proxies for subpage offsets */ + assert(!bo->purged); + while (bo->proxy) + bo = bo->proxy; + assert(!bo->purged); + + if (bo->domain != DOMAIN_CPU || FORCE_MMAP_SYNC & (1 << DOMAIN_CPU)) { struct drm_i915_gem_set_domain set_domain; - DBG(("%s: SYNC: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__, - bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle))); + DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n", + __FUNCTION__, bo->handle, + bo->needs_flush, bo->domain, + __kgem_busy(kgem, bo->handle))); VG_CLEAR(set_domain); set_domain.handle = bo->handle; @@ -4136,10 +4495,10 @@ void kgem_clear_dirty(struct kgem *kgem) struct kgem_bo *bo; list_for_each_entry(bo, buffers, request) { - if (!bo->dirty) + if (!bo->gpu_dirty) break; - bo->dirty = false; + bo->gpu_dirty = false; } } @@ -4162,9 +4521,11 @@ struct kgem_bo *kgem_create_proxy(struct kgem *kgem, bo->size.bytes = length; bo->io = target->io && target->proxy == NULL; - bo->dirty = target->dirty; + bo->gpu_dirty = target->gpu_dirty; bo->tiling = target->tiling; bo->pitch = target->pitch; + bo->flush = target->flush; + bo->snoop = target->snoop; assert(!bo->scanout); bo->proxy = kgem_bo_reference(target); @@ -4179,6 +4540,735 @@ struct kgem_bo *kgem_create_proxy(struct kgem *kgem, return bo; } +#if 0 +static struct kgem_buffer * +buffer_alloc(void) +{ + struct kgem_buffer *bo; + + bo = malloc(sizeof(*bo)); + if (bo == NULL) + return NULL; + + bo->mem = NULL; + bo->need_io = false; + bo->mmapped = true; + + return bo; +} + +static struct kgem_buffer * +buffer_alloc_with_data(int num_pages) +{ + struct kgem_buffer *bo; + + bo = malloc(sizeof(*bo) + 2*UPLOAD_ALIGNMENT + num_pages * PAGE_SIZE); + if (bo == NULL) + return NULL; + + bo->mem = (void *)ALIGN((uintptr_t)bo + sizeof(*bo), UPLOAD_ALIGNMENT); + bo->mmapped = false; + return bo; +} + +static inline bool +use_snoopable_buffer(struct kgem *kgem, uint32_t flags) +{ + if ((flags & KGEM_BUFFER_WRITE) == 0) + return kgem->gen >= 030; + + return true; +} + +static void +init_buffer_from_bo(struct kgem_buffer *bo, struct kgem_bo *old) +{ + DBG(("%s: reusing handle=%d for buffer\n", + __FUNCTION__, old->handle)); + + assert(old->proxy == NULL); + + memcpy(&bo->base, old, sizeof(*old)); + if (old->rq) + list_replace(&old->request, &bo->base.request); + else + list_init(&bo->base.request); + list_replace(&old->vma, &bo->base.vma); + list_init(&bo->base.list); + free(old); + + assert(bo->base.tiling == I915_TILING_NONE); + + bo->base.refcnt = 1; +} + +static struct kgem_buffer * +search_snoopable_buffer(struct kgem *kgem, unsigned alloc) +{ + struct kgem_buffer *bo; + struct kgem_bo *old; + + old = search_snoop_cache(kgem, alloc, 0); + if (old) { + if (!old->io) { + bo = buffer_alloc(); + if (bo == NULL) + return NULL; + + init_buffer_from_bo(bo, old); + } else { + bo = (struct kgem_buffer *)old; + bo->base.refcnt = 1; + } + + DBG(("%s: created CPU handle=%d for buffer, size %d\n", + __FUNCTION__, bo->base.handle, num_pages(&bo->base))); + + assert(bo->base.snoop); + assert(bo->base.tiling == I915_TILING_NONE); + assert(num_pages(&bo->base) >= alloc); + assert(bo->mmapped == true); + assert(bo->need_io == false); + + bo->mem = kgem_bo_map__cpu(kgem, &bo->base); + if (bo->mem == NULL) { + bo->base.refcnt = 0; + kgem_bo_free(kgem, &bo->base); + bo = NULL; + } + + return bo; + } + + return NULL; +} + +static struct kgem_buffer * +create_snoopable_buffer(struct kgem *kgem, unsigned alloc) +{ + struct kgem_buffer *bo; + uint32_t handle; + + if (kgem->has_llc) { + struct kgem_bo *old; + + bo = buffer_alloc(); + if (bo == NULL) + return NULL; + + old = search_linear_cache(kgem, alloc, + CREATE_INACTIVE | CREATE_CPU_MAP | CREATE_EXACT); + if (old) { + init_buffer_from_bo(bo, old); + } else { + handle = gem_create(kgem->fd, alloc); + if (handle == 0) { + free(bo); + return NULL; + } + + debug_alloc(kgem, alloc); + __kgem_bo_init(&bo->base, handle, alloc); + DBG(("%s: created CPU (LLC) handle=%d for buffer, size %d\n", + __FUNCTION__, bo->base.handle, alloc)); + } + + assert(bo->base.refcnt == 1); + assert(bo->mmapped == true); + assert(bo->need_io == false); + + bo->mem = kgem_bo_map__cpu(kgem, &bo->base); + if (bo->mem != NULL) + return bo; + + bo->base.refcnt = 0; /* for valgrind */ + kgem_bo_free(kgem, &bo->base); + } + + if (kgem->has_caching) { + struct kgem_bo *old; + + bo = buffer_alloc(); + if (bo == NULL) + return NULL; + + old = search_linear_cache(kgem, alloc, + CREATE_INACTIVE | CREATE_CPU_MAP | CREATE_EXACT); + if (old) { + init_buffer_from_bo(bo, old); + } else { + handle = gem_create(kgem->fd, alloc); + if (handle == 0) { + free(bo); + return NULL; + } + + debug_alloc(kgem, alloc); + __kgem_bo_init(&bo->base, handle, alloc); + DBG(("%s: created CPU handle=%d for buffer, size %d\n", + __FUNCTION__, bo->base.handle, alloc)); + } + + assert(bo->base.refcnt == 1); + assert(bo->mmapped == true); + assert(bo->need_io == false); + + if (!gem_set_caching(kgem->fd, bo->base.handle, SNOOPED)) + goto free_caching; + + bo->base.snoop = true; + + bo->mem = kgem_bo_map__cpu(kgem, &bo->base); + if (bo->mem == NULL) + goto free_caching; + + return bo; + +free_caching: + bo->base.refcnt = 0; /* for valgrind */ + kgem_bo_free(kgem, &bo->base); + } + + if (kgem->has_userptr) { + bo = buffer_alloc(); + if (bo == NULL) + return NULL; + + //if (posix_memalign(&ptr, 64, ALIGN(size, 64))) + if (posix_memalign(&bo->mem, PAGE_SIZE, alloc * PAGE_SIZE)) { + free(bo); + return NULL; + } + + handle = gem_userptr(kgem->fd, bo->mem, alloc * PAGE_SIZE, false); + if (handle == 0) { + free(bo->mem); + free(bo); + return NULL; + } + + debug_alloc(kgem, alloc); + __kgem_bo_init(&bo->base, handle, alloc); + DBG(("%s: created snoop handle=%d for buffer\n", + __FUNCTION__, bo->base.handle)); + + assert(bo->mmapped == true); + assert(bo->need_io == false); + + bo->base.refcnt = 1; + bo->base.snoop = true; + bo->base.map = MAKE_USER_MAP(bo->mem); + + return bo; + } + + return NULL; +} + +struct kgem_bo *kgem_create_buffer(struct kgem *kgem, + uint32_t size, uint32_t flags, + void **ret) +{ + struct kgem_buffer *bo; + unsigned offset, alloc; + struct kgem_bo *old; + + DBG(("%s: size=%d, flags=%x [write?=%d, inplace?=%d, last?=%d]\n", + __FUNCTION__, size, flags, + !!(flags & KGEM_BUFFER_WRITE), + !!(flags & KGEM_BUFFER_INPLACE), + !!(flags & KGEM_BUFFER_LAST))); + assert(size); + /* we should never be asked to create anything TOO large */ + assert(size <= kgem->max_object_size); + +#if !DBG_NO_UPLOAD_CACHE + list_for_each_entry(bo, &kgem->batch_buffers, base.list) { + assert(bo->base.io); + assert(bo->base.refcnt >= 1); + + /* We can reuse any write buffer which we can fit */ + if (flags == KGEM_BUFFER_LAST && + bo->write == KGEM_BUFFER_WRITE && + bo->base.refcnt == 1 && !bo->mmapped && + size <= bytes(&bo->base)) { + DBG(("%s: reusing write buffer for read of %d bytes? used=%d, total=%d\n", + __FUNCTION__, size, bo->used, bytes(&bo->base))); + gem_write(kgem->fd, bo->base.handle, + 0, bo->used, bo->mem); + kgem_buffer_release(kgem, bo); + bo->need_io = 0; + bo->write = 0; + offset = 0; + bo->used = size; + goto done; + } + + if (flags & KGEM_BUFFER_WRITE) { + if ((bo->write & KGEM_BUFFER_WRITE) == 0 || + (((bo->write & ~flags) & KGEM_BUFFER_INPLACE) && + !bo->base.snoop)) { + DBG(("%s: skip write %x buffer, need %x\n", + __FUNCTION__, bo->write, flags)); + continue; + } + assert(bo->mmapped || bo->need_io); + } else { + if (bo->write & KGEM_BUFFER_WRITE) { + DBG(("%s: skip write %x buffer, need %x\n", + __FUNCTION__, bo->write, flags)); + continue; + } + } + + if (bo->used + size <= bytes(&bo->base)) { + DBG(("%s: reusing buffer? used=%d + size=%d, total=%d\n", + __FUNCTION__, bo->used, size, bytes(&bo->base))); + offset = bo->used; + bo->used += size; + goto done; + } + } + + if (flags & KGEM_BUFFER_WRITE) { + list_for_each_entry(bo, &kgem->active_buffers, base.list) { + assert(bo->base.io); + assert(bo->base.refcnt >= 1); + assert(bo->mmapped); + assert(!IS_CPU_MAP(bo->base.map) || kgem->has_llc || bo->base.snoop); + + if (!kgem->has_llc && (bo->write & ~flags) & KGEM_BUFFER_INPLACE) { + DBG(("%s: skip write %x buffer, need %x\n", + __FUNCTION__, bo->write, flags)); + continue; + } + + if (bo->used + size <= bytes(&bo->base)) { + DBG(("%s: reusing buffer? used=%d + size=%d, total=%d\n", + __FUNCTION__, bo->used, size, bytes(&bo->base))); + offset = bo->used; + bo->used += size; + list_move(&bo->base.list, &kgem->batch_buffers); + goto done; + } + } + } +#endif + +#if !DBG_NO_MAP_UPLOAD + /* Be a little more generous and hope to hold fewer mmappings */ + alloc = ALIGN(2*size, kgem->buffer_size); + if (alloc > MAX_CACHE_SIZE) + alloc = ALIGN(size, kgem->buffer_size); + if (alloc > MAX_CACHE_SIZE) + alloc = PAGE_ALIGN(size); + assert(alloc); + + if (alloc > kgem->aperture_mappable / 4) + flags &= ~KGEM_BUFFER_INPLACE; + alloc /= PAGE_SIZE; + + if (kgem->has_llc && + (flags & KGEM_BUFFER_WRITE_INPLACE) != KGEM_BUFFER_WRITE_INPLACE) { + bo = buffer_alloc(); + if (bo == NULL) + goto skip_llc; + + old = NULL; + if ((flags & KGEM_BUFFER_WRITE) == 0) + old = search_linear_cache(kgem, alloc, CREATE_CPU_MAP); + if (old == NULL) + old = search_linear_cache(kgem, alloc, CREATE_INACTIVE | CREATE_CPU_MAP); + if (old == NULL) + old = search_linear_cache(kgem, NUM_PAGES(size), CREATE_INACTIVE | CREATE_CPU_MAP); + if (old) { + DBG(("%s: found LLC handle=%d for buffer\n", + __FUNCTION__, old->handle)); + + init_buffer_from_bo(bo, old); + } else { + uint32_t handle = gem_create(kgem->fd, alloc); + if (handle == 0) { + free(bo); + goto skip_llc; + } + __kgem_bo_init(&bo->base, handle, alloc); + DBG(("%s: created LLC handle=%d for buffer\n", + __FUNCTION__, bo->base.handle)); + + debug_alloc(kgem, alloc); + } + + assert(bo->mmapped); + assert(!bo->need_io); + + bo->mem = kgem_bo_map__cpu(kgem, &bo->base); + if (bo->mem) { + if (flags & KGEM_BUFFER_WRITE) + kgem_bo_sync__cpu(kgem, &bo->base); + flags &= ~KGEM_BUFFER_INPLACE; + goto init; + } else { + bo->base.refcnt = 0; /* for valgrind */ + kgem_bo_free(kgem, &bo->base); + } + } +skip_llc: + + if ((flags & KGEM_BUFFER_WRITE_INPLACE) == KGEM_BUFFER_WRITE_INPLACE) { + /* The issue with using a GTT upload buffer is that we may + * cause eviction-stalls in order to free up some GTT space. + * An is-mappable? ioctl could help us detect when we are + * about to block, or some per-page magic in the kernel. + * + * XXX This is especially noticeable on memory constrained + * devices like gen2 or with relatively slow gpu like i3. + */ + DBG(("%s: searching for an inactive GTT map for upload\n", + __FUNCTION__)); + old = search_linear_cache(kgem, alloc, + CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP); +#if HAVE_I915_GEM_BUFFER_INFO + if (old) { + struct drm_i915_gem_buffer_info info; + + /* An example of such a non-blocking ioctl might work */ + + VG_CLEAR(info); + info.handle = handle; + if (drmIoctl(kgem->fd, + DRM_IOCTL_I915_GEM_BUFFER_INFO, + &fino) == 0) { + old->presumed_offset = info.addr; + if ((info.flags & I915_GEM_MAPPABLE) == 0) { + kgem_bo_move_to_inactive(kgem, old); + old = NULL; + } + } + } +#endif + if (old == NULL) + old = search_linear_cache(kgem, NUM_PAGES(size), + CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP); + if (old == NULL) { + old = search_linear_cache(kgem, alloc, CREATE_INACTIVE); + if (old && !__kgem_bo_is_mappable(kgem, old)) { + _kgem_bo_destroy(kgem, old); + old = NULL; + } + } + if (old) { + DBG(("%s: reusing handle=%d for buffer\n", + __FUNCTION__, old->handle)); + assert(__kgem_bo_is_mappable(kgem, old)); + assert(!old->snoop); + assert(old->rq == NULL); + + bo = buffer_alloc(); + if (bo == NULL) + return NULL; + + init_buffer_from_bo(bo, old); + assert(num_pages(&bo->base) >= NUM_PAGES(size)); + + assert(bo->mmapped); + assert(bo->base.refcnt == 1); + + bo->mem = kgem_bo_map(kgem, &bo->base); + if (bo->mem) { + if (IS_CPU_MAP(bo->base.map)) + flags &= ~KGEM_BUFFER_INPLACE; + goto init; + } else { + bo->base.refcnt = 0; + kgem_bo_free(kgem, &bo->base); + } + } + } +#else + flags &= ~KGEM_BUFFER_INPLACE; +#endif + /* Be more parsimonious with pwrite/pread/cacheable buffers */ + if ((flags & KGEM_BUFFER_INPLACE) == 0) + alloc = NUM_PAGES(size); + + if (use_snoopable_buffer(kgem, flags)) { + bo = search_snoopable_buffer(kgem, alloc); + if (bo) { + if (flags & KGEM_BUFFER_WRITE) + kgem_bo_sync__cpu(kgem, &bo->base); + flags &= ~KGEM_BUFFER_INPLACE; + goto init; + } + + if ((flags & KGEM_BUFFER_INPLACE) == 0) { + bo = create_snoopable_buffer(kgem, alloc); + if (bo) + goto init; + } + } + + flags &= ~KGEM_BUFFER_INPLACE; + + old = NULL; + if ((flags & KGEM_BUFFER_WRITE) == 0) + old = search_linear_cache(kgem, alloc, 0); + if (old == NULL) + old = search_linear_cache(kgem, alloc, CREATE_INACTIVE); + if (old) { + DBG(("%s: reusing ordinary handle %d for io\n", + __FUNCTION__, old->handle)); + bo = buffer_alloc_with_data(num_pages(old)); + if (bo == NULL) + return NULL; + + init_buffer_from_bo(bo, old); + bo->need_io = flags & KGEM_BUFFER_WRITE; + } else { + unsigned hint; + + if (use_snoopable_buffer(kgem, flags)) { + bo = create_snoopable_buffer(kgem, alloc); + if (bo) + goto init; + } + + bo = buffer_alloc(); + if (bo == NULL) + return NULL; + + hint = CREATE_INACTIVE; + if (flags & KGEM_BUFFER_WRITE) + hint |= CREATE_CPU_MAP; + old = search_linear_cache(kgem, alloc, hint); + if (old) { + DBG(("%s: reusing handle=%d for buffer\n", + __FUNCTION__, old->handle)); + + init_buffer_from_bo(bo, old); + } else { + uint32_t handle = gem_create(kgem->fd, alloc); + if (handle == 0) { + free(bo); + return NULL; + } + + DBG(("%s: created handle=%d for buffer\n", + __FUNCTION__, handle)); + + __kgem_bo_init(&bo->base, handle, alloc); + debug_alloc(kgem, alloc * PAGE_SIZE); + } + + assert(bo->mmapped); + assert(!bo->need_io); + assert(bo->base.refcnt == 1); + + if (flags & KGEM_BUFFER_WRITE) { + bo->mem = kgem_bo_map__cpu(kgem, &bo->base); + if (bo->mem != NULL) { + kgem_bo_sync__cpu(kgem, &bo->base); + goto init; + } + } + + DBG(("%s: failing back to new pwrite buffer\n", __FUNCTION__)); + old = &bo->base; + bo = buffer_alloc_with_data(num_pages(old)); + if (bo == NULL) { + old->refcnt= 0; + kgem_bo_free(kgem, old); + return NULL; + } + + init_buffer_from_bo(bo, old); + + assert(bo->mem); + assert(!bo->mmapped); + assert(bo->base.refcnt == 1); + + bo->need_io = flags & KGEM_BUFFER_WRITE; + } +init: + bo->base.io = true; + assert(bo->base.refcnt == 1); + assert(num_pages(&bo->base) >= NUM_PAGES(size)); + assert(!bo->need_io || !bo->base.needs_flush); + assert(!bo->need_io || bo->base.domain != DOMAIN_GPU); + assert(bo->mem); + assert(!bo->mmapped || bo->base.map != NULL); + + bo->used = size; + bo->write = flags & KGEM_BUFFER_WRITE_INPLACE; + offset = 0; + + assert(list_is_empty(&bo->base.list)); + list_add(&bo->base.list, &kgem->batch_buffers); + + DBG(("%s(pages=%d [%d]) new handle=%d, used=%d, write=%d\n", + __FUNCTION__, num_pages(&bo->base), alloc, bo->base.handle, bo->used, bo->write)); + +done: + bo->used = ALIGN(bo->used, UPLOAD_ALIGNMENT); + assert(bo->mem); + *ret = (char *)bo->mem + offset; + return kgem_create_proxy(kgem, &bo->base, offset, size); +} + +bool kgem_buffer_is_inplace(struct kgem_bo *_bo) +{ + struct kgem_buffer *bo = (struct kgem_buffer *)_bo->proxy; + return bo->write & KGEM_BUFFER_WRITE_INPLACE; +} + +struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem, + int width, int height, int bpp, + uint32_t flags, + void **ret) +{ + struct kgem_bo *bo; + int stride; + + assert(width > 0 && height > 0); + assert(ret != NULL); + stride = ALIGN(width, 2) * bpp >> 3; + stride = ALIGN(stride, 4); + + DBG(("%s: %dx%d, %d bpp, stride=%d\n", + __FUNCTION__, width, height, bpp, stride)); + + bo = kgem_create_buffer(kgem, stride * ALIGN(height, 2), flags, ret); + if (bo == NULL) { + DBG(("%s: allocation failure for upload buffer\n", + __FUNCTION__)); + return NULL; + } + assert(*ret != NULL); + assert(bo->proxy != NULL); + + if (height & 1) { + struct kgem_buffer *io = (struct kgem_buffer *)bo->proxy; + int min; + + assert(io->used); + + /* Having padded this surface to ensure that accesses to + * the last pair of rows is valid, remove the padding so + * that it can be allocated to other pixmaps. + */ + min = bo->delta + height * stride; + min = ALIGN(min, UPLOAD_ALIGNMENT); + if (io->used != min) { + DBG(("%s: trimming buffer from %d to %d\n", + __FUNCTION__, io->used, min)); + io->used = min; + } + bo->size.bytes -= stride; + } + + bo->map = MAKE_CPU_MAP(*ret); + bo->pitch = stride; + bo->unique_id = kgem_get_unique_id(kgem); + return bo; +} + +struct kgem_bo *kgem_upload_source_image(struct kgem *kgem, + const void *data, + const BoxRec *box, + int stride, int bpp) +{ + int width = box->x2 - box->x1; + int height = box->y2 - box->y1; + struct kgem_bo *bo; + void *dst; + + if (!kgem_can_create_2d(kgem, width, height, bpp)) + return NULL; + + DBG(("%s : (%d, %d), (%d, %d), stride=%d, bpp=%d\n", + __FUNCTION__, box->x1, box->y1, box->x2, box->y2, stride, bpp)); + + assert(data); + assert(width > 0); + assert(height > 0); + assert(stride); + assert(bpp); + + bo = kgem_create_buffer_2d(kgem, + width, height, bpp, + KGEM_BUFFER_WRITE_INPLACE, &dst); + if (bo) + memcpy_blt(data, dst, bpp, + stride, bo->pitch, + box->x1, box->y1, + 0, 0, + width, height); + + return bo; +} + +void kgem_proxy_bo_attach(struct kgem_bo *bo, + struct kgem_bo **ptr) +{ + DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); + assert(bo->map == NULL || IS_CPU_MAP(bo->map)); + assert(bo->proxy); + list_add(&bo->vma, &bo->proxy->vma); + bo->map = ptr; + *ptr = kgem_bo_reference(bo); +} + +void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo) +{ + struct kgem_buffer *bo; + uint32_t offset = _bo->delta, length = _bo->size.bytes; + + /* We expect the caller to have already submitted the batch */ + assert(_bo->io); + assert(_bo->exec == NULL); + assert(_bo->rq == NULL); + assert(_bo->proxy); + + _bo = _bo->proxy; + assert(_bo->proxy == NULL); + assert(_bo->exec == NULL); + + bo = (struct kgem_buffer *)_bo; + + DBG(("%s(offset=%d, length=%d, snooped=%d)\n", __FUNCTION__, + offset, length, bo->base.snoop)); + + if (bo->mmapped) { + struct drm_i915_gem_set_domain set_domain; + + DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", + __FUNCTION__, + bo->base.needs_flush, + bo->base.domain, + __kgem_busy(kgem, bo->base.handle))); + + assert(!IS_CPU_MAP(bo->base.map) || bo->base.snoop || kgem->has_llc); + + VG_CLEAR(set_domain); + set_domain.handle = bo->base.handle; + set_domain.write_domain = 0; + set_domain.read_domains = + IS_CPU_MAP(bo->base.map) ? I915_GEM_DOMAIN_CPU : I915_GEM_DOMAIN_GTT; + + if (drmIoctl(kgem->fd, + DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) + return; + } else { + if (gem_read(kgem->fd, + bo->base.handle, (char *)bo->mem+offset, + offset, length)) + return; + } + kgem_bo_retire(kgem, &bo->base); + bo->base.domain = DOMAIN_NONE; +} +#endif + uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format) { struct kgem_bo_binding *b; @@ -4216,7 +5306,6 @@ void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset) } } - int kgem_init_fb(struct kgem *kgem, struct sna_fb *fb) { struct kgem_bo *bo; @@ -4299,5 +5388,23 @@ void kgem_close_batches(struct kgem *kgem) } }; +struct kgem_bo *kgem_bo_from_handle(struct kgem *kgem, int handle, + int pitch, int height) +{ + struct kgem_bo *bo; + int size; + size = pitch * height / PAGE_SIZE; + bo = __kgem_bo_alloc(handle, size); + if(bo == NULL) + return NULL; + + bo->domain = DOMAIN_GTT; + bo->unique_id = kgem_get_unique_id(kgem); + bo->pitch = pitch; + bo->tiling = I915_TILING_X; + bo->scanout = 0; + + return bo; +} diff --git a/drivers/video/Intel-2D/kgem.h b/drivers/video/Intel-2D/kgem.h index 1b5dfbaab7..3f0768db3f 100644 --- a/drivers/video/Intel-2D/kgem.h +++ b/drivers/video/Intel-2D/kgem.h @@ -28,24 +28,34 @@ #ifndef KGEM_H #define KGEM_H -#define HAS_DEBUG_FULL 1 - #include +#include #include #include -#include #include #include "compiler.h" #include "intel_list.h" -#undef DBG +#include +#if !defined(MAXSHORT) || !defined(MINSHORT) || \ + !defined(MAXINT) || !defined(MININT) +/* + * Some implementations #define these through , so preclude + * #include'ing it later. + */ + +#include +#undef MAXSHORT +#define MAXSHORT SHRT_MAX +#undef MINSHORT +#define MINSHORT SHRT_MIN +#undef MAXINT +#define MAXINT INT_MAX +#undef MININT +#define MININT INT_MIN -#if HAS_DEBUG_FULL -#define DBG(x) printf x -#else -#define DBG(x) #endif struct kgem_bo { @@ -64,6 +74,7 @@ struct kgem_bo { void *map; #define IS_CPU_MAP(ptr) ((uintptr_t)(ptr) & 1) #define IS_GTT_MAP(ptr) (ptr && ((uintptr_t)(ptr) & 1) == 0) +#define MAP(ptr) ((void*)((uintptr_t)(ptr) & ~3)) struct kgem_bo_binding { struct kgem_bo_binding *next; @@ -90,7 +101,8 @@ struct kgem_bo { uint32_t pitch : 18; /* max 128k */ uint32_t tiling : 2; uint32_t reusable : 1; - uint32_t dirty : 1; + uint32_t gpu_dirty : 1; + uint32_t gtt_dirty : 1; uint32_t domain : 2; uint32_t needs_flush : 1; uint32_t snoop : 1; @@ -173,6 +185,7 @@ struct kgem { uint32_t scanout_busy:1; uint32_t busy:1; + uint32_t has_create2 :1; uint32_t has_userptr :1; uint32_t has_blt :1; uint32_t has_relaxed_fencing :1; @@ -180,8 +193,9 @@ struct kgem { uint32_t has_semaphores :1; uint32_t has_secure_batches :1; uint32_t has_pinned_batches :1; - uint32_t has_cacheing :1; + uint32_t has_caching :1; uint32_t has_llc :1; + uint32_t has_wt :1; uint32_t has_no_reloc :1; uint32_t has_handle_lut :1; @@ -200,10 +214,23 @@ struct kgem { void (*retire)(struct kgem *kgem); void (*expire)(struct kgem *kgem); - uint32_t batch[64*1024-8]; - struct drm_i915_gem_exec_object2 exec[256]; - struct drm_i915_gem_relocation_entry reloc[4096]; +#if 0 + void (*memcpy_to_tiled_x)(const void *src, void *dst, int bpp, + int32_t src_stride, int32_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height); + void (*memcpy_from_tiled_x)(const void *src, void *dst, int bpp, + int32_t src_stride, int32_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height); +#endif + uint16_t reloc__self[256]; + uint32_t batch[64*1024-8] page_aligned; + struct drm_i915_gem_exec_object2 exec[384] page_aligned; + struct drm_i915_gem_relocation_entry reloc[8192] page_aligned; #ifdef DEBUG_MEMORY struct { @@ -213,9 +240,11 @@ struct kgem { #endif }; +#define KGEM_MAX_DEFERRED_VBO 16 + #define KGEM_BATCH_RESERVED 1 -#define KGEM_RELOC_RESERVED 4 -#define KGEM_EXEC_RESERVED 1 +#define KGEM_RELOC_RESERVED (KGEM_MAX_DEFERRED_VBO) +#define KGEM_EXEC_RESERVED (1+KGEM_MAX_DEFERRED_VBO) #ifndef ARRAY_SIZE #define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0])) @@ -233,12 +262,15 @@ struct kgem_bo *kgem_create_map(struct kgem *kgem, bool read_only); struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name); +struct kgem_bo *kgem_create_for_prime(struct kgem *kgem, int name, uint32_t size); +int kgem_bo_export_to_prime(struct kgem *kgem, struct kgem_bo *bo); struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags); struct kgem_bo *kgem_create_proxy(struct kgem *kgem, struct kgem_bo *target, int offset, int length); +void kgem_proxy_bo_attach(struct kgem_bo *bo, struct kgem_bo **ptr); int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int bpp); @@ -248,6 +280,8 @@ unsigned kgem_can_create_2d(struct kgem *kgem, int width, int height, int depth) #define KGEM_CAN_CREATE_LARGE 0x4 #define KGEM_CAN_CREATE_GTT 0x8 +uint32_t kgem_get_unique_id(struct kgem *kgem); + struct kgem_bo * kgem_replace_bo(struct kgem *kgem, struct kgem_bo *src, @@ -281,7 +315,6 @@ struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem, uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format); void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset); -int kgem_bo_get_swizzling(struct kgem *kgem, struct kgem_bo *bo); bool kgem_retire(struct kgem *kgem); @@ -304,6 +337,11 @@ static inline bool kgem_is_idle(struct kgem *kgem) return kgem_ring_is_idle(kgem, kgem->ring); } +static inline bool __kgem_ring_empty(struct kgem *kgem) +{ + return list_is_empty(&kgem->requests[kgem->ring == KGEM_BLT]); +} + void _kgem_submit(struct kgem *kgem); static inline void kgem_submit(struct kgem *kgem) { @@ -325,20 +363,7 @@ static inline void kgem_bo_submit(struct kgem *kgem, struct kgem_bo *bo) _kgem_submit(kgem); } -void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo); -static inline void kgem_bo_flush(struct kgem *kgem, struct kgem_bo *bo) -{ - kgem_bo_submit(kgem, bo); - - if (!bo->needs_flush) - return; - - /* If the kernel fails to emit the flush, then it will be forced when - * we assume direct access. And as the useual failure is EIO, we do - * not actualy care. - */ - __kgem_flush(kgem, bo); -} +void kgem_scanout_flush(struct kgem *kgem, struct kgem_bo *bo); static inline struct kgem_bo *kgem_bo_reference(struct kgem_bo *bo) { @@ -367,6 +392,9 @@ static inline void kgem_set_mode(struct kgem *kgem, kgem_submit(kgem); #endif + if (kgem->nreloc && bo->exec == NULL && kgem_ring_is_idle(kgem, kgem->ring)) + _kgem_submit(kgem); + if (kgem->mode == mode) return; @@ -419,6 +447,11 @@ static inline bool kgem_check_batch_with_surfaces(struct kgem *kgem, static inline uint32_t *kgem_get_batch(struct kgem *kgem) { + if (kgem->nreloc) { + unsigned mode = kgem->mode; + _kgem_submit(kgem); + _kgem_set_mode(kgem, mode); + } return kgem->batch + kgem->nbatch; } @@ -512,6 +545,12 @@ static inline bool __kgem_bo_is_mappable(struct kgem *kgem, bo->presumed_offset & (kgem_bo_fenced_size(kgem, bo) - 1)) return false; + if (kgem->gen == 021 && bo->tiling == I915_TILING_Y) + return false; + + if (kgem->has_llc && bo->tiling == I915_TILING_NONE) + return true; + if (!bo->presumed_offset) return kgem_bo_size(bo) <= kgem->aperture_mappable / 4; @@ -544,7 +583,7 @@ static inline bool kgem_bo_can_map(struct kgem *kgem, struct kgem_bo *bo) if (kgem_bo_mapped(kgem, bo)) return true; - if (!bo->tiling && kgem->has_llc) + if (!bo->tiling && (kgem->has_llc || bo->domain == DOMAIN_CPU)) return true; if (kgem->gen == 021 && bo->tiling == I915_TILING_Y) @@ -553,6 +592,22 @@ static inline bool kgem_bo_can_map(struct kgem *kgem, struct kgem_bo *bo) return kgem_bo_size(bo) <= kgem->aperture_mappable / 4; } +static inline bool kgem_bo_can_map__cpu(struct kgem *kgem, + struct kgem_bo *bo, + bool write) +{ + if (bo->purged || (bo->scanout && write)) + return false; + + if (kgem->has_llc) + return true; + + if (bo->domain != DOMAIN_CPU) + return false; + + return !write || bo->exec == NULL; +} + static inline bool kgem_bo_is_snoop(struct kgem_bo *bo) { assert(bo->refcnt); @@ -561,6 +616,8 @@ static inline bool kgem_bo_is_snoop(struct kgem_bo *bo) return bo->snoop; } +void kgem_bo_undo(struct kgem *kgem, struct kgem_bo *bo); + bool __kgem_busy(struct kgem *kgem, int handle); static inline void kgem_bo_mark_busy(struct kgem_bo *bo, int ring) @@ -570,10 +627,12 @@ static inline void kgem_bo_mark_busy(struct kgem_bo *bo, int ring) inline static void __kgem_bo_clear_busy(struct kgem_bo *bo) { - bo->needs_flush = false; - list_del(&bo->request); bo->rq = NULL; + list_del(&bo->request); + bo->domain = DOMAIN_NONE; + bo->needs_flush = false; + bo->gtt_dirty = false; } static inline bool kgem_bo_is_busy(struct kgem_bo *bo) @@ -584,8 +643,6 @@ static inline bool kgem_bo_is_busy(struct kgem_bo *bo) return bo->rq; } -/* - static inline bool __kgem_bo_is_busy(struct kgem *kgem, struct kgem_bo *bo) { DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__, @@ -604,7 +661,23 @@ static inline bool __kgem_bo_is_busy(struct kgem *kgem, struct kgem_bo *bo) return kgem_bo_is_busy(bo); } -*/ +static inline bool kgem_bo_is_render(struct kgem_bo *bo) +{ + DBG(("%s: handle=%d, rq? %d [%d]\n", __FUNCTION__, + bo->handle, bo->rq != NULL, (int)RQ_RING(bo->rq))); + assert(bo->refcnt); + return bo->rq && RQ_RING(bo->rq) == I915_EXEC_RENDER; +} + +static inline void kgem_bo_mark_unreusable(struct kgem_bo *bo) +{ + while (bo->proxy) { + bo->flush = true; + bo = bo->proxy; + } + bo->flush = true; + bo->reusable = false; +} static inline bool kgem_bo_is_dirty(struct kgem_bo *bo) { @@ -612,7 +685,7 @@ static inline bool kgem_bo_is_dirty(struct kgem_bo *bo) return false; assert(bo->refcnt); - return bo->dirty; + return bo->gpu_dirty; } static inline void kgem_bo_unclean(struct kgem *kgem, struct kgem_bo *bo) @@ -632,7 +705,7 @@ static inline void __kgem_bo_mark_dirty(struct kgem_bo *bo) bo->handle, bo->proxy != NULL)); bo->exec->flags |= LOCAL_EXEC_OBJECT_WRITE; - bo->needs_flush = bo->dirty = true; + bo->needs_flush = bo->gpu_dirty = true; list_move(&bo->request, &RQ(bo->rq)->buffers); } @@ -643,7 +716,7 @@ static inline void kgem_bo_mark_dirty(struct kgem_bo *bo) assert(bo->exec); assert(bo->rq); - if (bo->dirty) + if (bo->gpu_dirty) return; __kgem_bo_mark_dirty(bo); @@ -672,6 +745,9 @@ bool kgem_expire_cache(struct kgem *kgem); void kgem_purge_cache(struct kgem *kgem); void kgem_cleanup_cache(struct kgem *kgem); +void kgem_clean_scanout_cache(struct kgem *kgem); +void kgem_clean_large_cache(struct kgem *kgem); + #if HAS_DEBUG_FULL void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch); #else diff --git a/drivers/video/Intel-2D/kgem_debug.c b/drivers/video/Intel-2D/kgem_debug.c index 91769fbd75..f9853d2511 100644 --- a/drivers/video/Intel-2D/kgem_debug.c +++ b/drivers/video/Intel-2D/kgem_debug.c @@ -37,22 +37,6 @@ #include "kgem_debug.h" -#include - -/* -void -ErrorF(const char *f, ...) -{ - va_list args; - - va_start(args, f); - VErrorF(f, args); - va_end(args); -} -*/ - -#define ErrorF printf - struct drm_i915_gem_relocation_entry * kgem_debug_get_reloc_entry(struct kgem *kgem, uint32_t offset) { @@ -388,8 +372,6 @@ decode_2d(struct kgem *kgem, uint32_t offset) static int (*decode_3d(int gen))(struct kgem*, uint32_t) { - return kgem_gen6_decode_3d; -/* if (gen >= 0100) { } else if (gen >= 070) { return kgem_gen7_decode_3d; @@ -401,18 +383,12 @@ static int (*decode_3d(int gen))(struct kgem*, uint32_t) return kgem_gen4_decode_3d; } else if (gen >= 030) { return kgem_gen3_decode_3d; - } else if (gen >= 020) { - return kgem_gen2_decode_3d; } assert(0); -*/ } static void (*finish_state(int gen))(struct kgem*) { - - return kgem_gen6_finish_state; -/* if (gen >= 0100) { } else if (gen >= 070) { return kgem_gen7_finish_state; @@ -424,11 +400,8 @@ static void (*finish_state(int gen))(struct kgem*) return kgem_gen4_finish_state; } else if (gen >= 030) { return kgem_gen3_finish_state; - } else if (gen >= 020) { - return kgem_gen2_finish_state; } assert(0); -*/ } void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch) diff --git a/drivers/video/Intel-2D/kgem_debug_gen3.c b/drivers/video/Intel-2D/kgem_debug_gen3.c new file mode 100644 index 0000000000..c43af0e48b --- /dev/null +++ b/drivers/video/Intel-2D/kgem_debug_gen3.c @@ -0,0 +1,1599 @@ +/* + * Copyright © 2007-2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt + * Chris Wilson + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include "sna.h" +#include "sna_reg.h" + +#include "gen3_render.h" + +#include "kgem_debug.h" + +enum type { + T_FLOAT32, + T_FLOAT16, +}; + +static struct state { + struct vertex_buffer { + int handle; + void *base; + const char *ptr; + int pitch; + + struct kgem_bo *current; + } vb; + struct vertex_elements { + int offset; + bool valid; + enum type type; + int size; + uint8_t swizzle[4]; + } ve[33]; + int num_ve; +} state; + +static float int_as_float(int i) +{ + union { + float f; + int i; + } x; + x.i = i; + return x.f; +} + +static void gen3_update_vertex_buffer_addr(struct kgem *kgem, + uint32_t offset) +{ + uint32_t handle; + struct kgem_bo *bo = NULL; + void *base, *ptr; + int i; + + offset *= sizeof(uint32_t); + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == offset) + break; + assert(i < kgem->nreloc); + handle = kgem->reloc[i].target_handle; + + if (handle == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == handle) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map__debug(kgem, bo); + } + ptr = (char *)base + kgem->reloc[i].delta; + + state.vb.current = bo; + state.vb.base = base; + state.vb.ptr = ptr; +} + +static void gen3_update_vertex_buffer_pitch(struct kgem *kgem, + uint32_t offset) +{ + state.vb.pitch = kgem->batch[offset] >> 16 & 0x3f; + state.vb.pitch *= sizeof(uint32_t); +} + +static void gen3_update_vertex_elements(struct kgem *kgem, uint32_t data) +{ + state.ve[1].valid = 1; + + switch ((data >> 6) & 7) { + case 1: + state.ve[1].type = T_FLOAT32; + state.ve[1].size = 3; + state.ve[1].swizzle[0] = 1; + state.ve[1].swizzle[1] = 1; + state.ve[1].swizzle[2] = 1; + state.ve[1].swizzle[3] = 3; + break; + case 2: + state.ve[1].type = T_FLOAT32; + state.ve[1].size = 4; + state.ve[1].swizzle[0] = 1; + state.ve[1].swizzle[1] = 1; + state.ve[1].swizzle[2] = 1; + state.ve[1].swizzle[3] = 1; + break; + case 3: + state.ve[1].type = T_FLOAT32; + state.ve[1].size = 2; + state.ve[1].swizzle[0] = 1; + state.ve[1].swizzle[1] = 1; + state.ve[1].swizzle[2] = 2; + state.ve[1].swizzle[3] = 3; + break; + case 4: + state.ve[1].type = T_FLOAT32; + state.ve[1].size = 3; + state.ve[1].swizzle[0] = 1; + state.ve[1].swizzle[1] = 1; + state.ve[1].swizzle[2] = 3; + state.ve[1].swizzle[3] = 1; + break; + } + + state.ve[2].valid = 0; + state.ve[3].valid = 0; +} + +static void gen3_update_vertex_texcoords(struct kgem *kgem, uint32_t data) +{ + int id; + for (id = 0; id < 8; id++) { + uint32_t fmt = (data >> (id*4)) & 0xf; + int width; + + state.ve[id+4].valid = fmt != 0xf; + + width = 0; + switch (fmt) { + case 0: + state.ve[id+4].type = T_FLOAT32; + width = state.ve[id+4].size = 2; + break; + case 1: + state.ve[id+4].type = T_FLOAT32; + width = state.ve[id+4].size = 3; + break; + case 2: + state.ve[id+4].type = T_FLOAT32; + width = state.ve[id+4].size = 4; + break; + case 3: + state.ve[id+4].type = T_FLOAT32; + width = state.ve[id+4].size = 1; + break; + case 4: + state.ve[id+4].type = T_FLOAT16; + width = state.ve[id+4].size = 2; + break; + case 5: + state.ve[id+4].type = T_FLOAT16; + width = state.ve[id+4].size = 4; + break; + } + + state.ve[id+4].swizzle[0] = width > 0 ? 1 : 2; + state.ve[id+4].swizzle[1] = width > 1 ? 1 : 2; + state.ve[id+4].swizzle[2] = width > 2 ? 1 : 2; + state.ve[id+4].swizzle[3] = width > 3 ? 1 : 2; + } +} + +static void gen3_update_vertex_elements_offsets(struct kgem *kgem) +{ + int i, offset; + + for (i = offset = 0; i < ARRAY_SIZE(state.ve); i++) { + if (!state.ve[i].valid) + continue; + + state.ve[i].offset = offset; + offset += 4 * state.ve[i].size; + state.num_ve = i; + } +} + +static void vertices_float32_out(const struct vertex_elements *ve, const float *f, int max) +{ + int c; + + ErrorF("("); + for (c = 0; c < max; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("%f", f[c]); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < max-1) + ErrorF(", "); + } + ErrorF(")"); +} + +static void ve_out(const struct vertex_elements *ve, const void *ptr) +{ + switch (ve->type) { + case T_FLOAT32: + vertices_float32_out(ve, ptr, ve->size); + break; + case T_FLOAT16: + //vertices_float16_out(ve, ptr, ve->size); + break; + } +} + +static void indirect_vertex_out(struct kgem *kgem, uint32_t v) +{ + const struct vertex_buffer *vb = &state.vb; + int i = 1; + + do { + const struct vertex_elements *ve = &state.ve[i]; + const void *ptr = vb->ptr + v * vb->pitch + ve->offset; + + if (!ve->valid) + continue; + + ve_out(ve, ptr); + + while (++i <= state.num_ve && !state.ve[i].valid) + ; + + if (i <= state.num_ve) + ErrorF(", "); + } while (i <= state.num_ve); +} + +static int inline_vertex_out(struct kgem *kgem, void *base) +{ + const struct vertex_buffer *vb = &state.vb; + int i = 1; + + do { + const struct vertex_elements *ve = &state.ve[i]; + const void *ptr = (char *)base + ve->offset; + + if (!ve->valid) + continue; + + ve_out(ve, ptr); + + while (++i <= state.num_ve && !state.ve[i].valid) + ; + + if (i <= state.num_ve) + ErrorF(", "); + } while (i <= state.num_ve); + + return vb->pitch; +} + +static int +gen3_decode_3d_1c(struct kgem *kgem, uint32_t offset) +{ + uint32_t *data = kgem->batch + offset; + uint32_t opcode; + + opcode = (data[0] & 0x00f80000) >> 19; + + switch (opcode) { + case 0x11: + kgem_debug_print(data, offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE\n"); + return 1; + case 0x10: + kgem_debug_print(data, offset, 0, "3DSTATE_SCISSOR_ENABLE %s\n", + data[0]&1?"enabled":"disabled"); + return 1; + case 0x01: + kgem_debug_print(data, offset, 0, "3DSTATE_MAP_COORD_SET_I830\n"); + return 1; + case 0x0a: + kgem_debug_print(data, offset, 0, "3DSTATE_MAP_CUBE_I830\n"); + return 1; + case 0x05: + kgem_debug_print(data, offset, 0, "3DSTATE_MAP_TEX_STREAM_I830\n"); + return 1; + } + + kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d_1c opcode = 0x%x\n", + opcode); + assert(0); + return 1; +} + +/** Sets the string dstname to describe the destination of the PS instruction */ +static void +gen3_get_instruction_dst(uint32_t *data, int i, char *dstname, int do_mask) +{ + uint32_t a0 = data[i]; + int dst_nr = (a0 >> 14) & 0xf; + char dstmask[8]; + const char *sat; + + if (do_mask) { + if (((a0 >> 10) & 0xf) == 0xf) { + dstmask[0] = 0; + } else { + int dstmask_index = 0; + + dstmask[dstmask_index++] = '.'; + if (a0 & (1 << 10)) + dstmask[dstmask_index++] = 'x'; + if (a0 & (1 << 11)) + dstmask[dstmask_index++] = 'y'; + if (a0 & (1 << 12)) + dstmask[dstmask_index++] = 'z'; + if (a0 & (1 << 13)) + dstmask[dstmask_index++] = 'w'; + dstmask[dstmask_index++] = 0; + } + + if (a0 & (1 << 22)) + sat = ".sat"; + else + sat = ""; + } else { + dstmask[0] = 0; + sat = ""; + } + + switch ((a0 >> 19) & 0x7) { + case 0: + assert(dst_nr <= 15); + sprintf(dstname, "R%d%s%s", dst_nr, dstmask, sat); + break; + case 4: + assert(dst_nr == 0); + sprintf(dstname, "oC%s%s", dstmask, sat); + break; + case 5: + assert(dst_nr == 0); + sprintf(dstname, "oD%s%s", dstmask, sat); + break; + case 6: + assert(dst_nr <= 3); + sprintf(dstname, "U%d%s%s", dst_nr, dstmask, sat); + break; + default: + sprintf(dstname, "RESERVED"); + break; + } +} + +static const char * +gen3_get_channel_swizzle(uint32_t select) +{ + switch (select & 0x7) { + case 0: + return (select & 8) ? "-x" : "x"; + case 1: + return (select & 8) ? "-y" : "y"; + case 2: + return (select & 8) ? "-z" : "z"; + case 3: + return (select & 8) ? "-w" : "w"; + case 4: + return (select & 8) ? "-0" : "0"; + case 5: + return (select & 8) ? "-1" : "1"; + default: + return (select & 8) ? "-bad" : "bad"; + } +} + +static void +gen3_get_instruction_src_name(uint32_t src_type, uint32_t src_nr, char *name) +{ + switch (src_type) { + case 0: + sprintf(name, "R%d", src_nr); + assert(src_nr <= 15); + break; + case 1: + if (src_nr < 8) + sprintf(name, "T%d", src_nr); + else if (src_nr == 8) + sprintf(name, "DIFFUSE"); + else if (src_nr == 9) + sprintf(name, "SPECULAR"); + else if (src_nr == 10) + sprintf(name, "FOG"); + else { + assert(0); + sprintf(name, "RESERVED"); + } + break; + case 2: + sprintf(name, "C%d", src_nr); + assert(src_nr <= 31); + break; + case 4: + sprintf(name, "oC"); + assert(src_nr == 0); + break; + case 5: + sprintf(name, "oD"); + assert(src_nr == 0); + break; + case 6: + sprintf(name, "U%d", src_nr); + assert(src_nr <= 3); + break; + default: + sprintf(name, "RESERVED"); + assert(0); + break; + } +} + +static void +gen3_get_instruction_src0(uint32_t *data, int i, char *srcname) +{ + uint32_t a0 = data[i]; + uint32_t a1 = data[i + 1]; + int src_nr = (a0 >> 2) & 0x1f; + const char *swizzle_x = gen3_get_channel_swizzle((a1 >> 28) & 0xf); + const char *swizzle_y = gen3_get_channel_swizzle((a1 >> 24) & 0xf); + const char *swizzle_z = gen3_get_channel_swizzle((a1 >> 20) & 0xf); + const char *swizzle_w = gen3_get_channel_swizzle((a1 >> 16) & 0xf); + char swizzle[100]; + + gen3_get_instruction_src_name((a0 >> 7) & 0x7, src_nr, srcname); + sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); + if (strcmp(swizzle, ".xyzw") != 0) + strcat(srcname, swizzle); +} + +static void +gen3_get_instruction_src1(uint32_t *data, int i, char *srcname) +{ + uint32_t a1 = data[i + 1]; + uint32_t a2 = data[i + 2]; + int src_nr = (a1 >> 8) & 0x1f; + const char *swizzle_x = gen3_get_channel_swizzle((a1 >> 4) & 0xf); + const char *swizzle_y = gen3_get_channel_swizzle((a1 >> 0) & 0xf); + const char *swizzle_z = gen3_get_channel_swizzle((a2 >> 28) & 0xf); + const char *swizzle_w = gen3_get_channel_swizzle((a2 >> 24) & 0xf); + char swizzle[100]; + + gen3_get_instruction_src_name((a1 >> 13) & 0x7, src_nr, srcname); + sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); + if (strcmp(swizzle, ".xyzw") != 0) + strcat(srcname, swizzle); +} + +static void +gen3_get_instruction_src2(uint32_t *data, int i, char *srcname) +{ + uint32_t a2 = data[i + 2]; + int src_nr = (a2 >> 16) & 0x1f; + const char *swizzle_x = gen3_get_channel_swizzle((a2 >> 12) & 0xf); + const char *swizzle_y = gen3_get_channel_swizzle((a2 >> 8) & 0xf); + const char *swizzle_z = gen3_get_channel_swizzle((a2 >> 4) & 0xf); + const char *swizzle_w = gen3_get_channel_swizzle((a2 >> 0) & 0xf); + char swizzle[100]; + + gen3_get_instruction_src_name((a2 >> 21) & 0x7, src_nr, srcname); + sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); + if (strcmp(swizzle, ".xyzw") != 0) + strcat(srcname, swizzle); +} + +static void +gen3_get_instruction_addr(uint32_t src_type, uint32_t src_nr, char *name) +{ + switch (src_type) { + case 0: + sprintf(name, "R%d", src_nr); + assert(src_nr <= 15); + break; + case 1: + if (src_nr < 8) + sprintf(name, "T%d", src_nr); + else if (src_nr == 8) + sprintf(name, "DIFFUSE"); + else if (src_nr == 9) + sprintf(name, "SPECULAR"); + else if (src_nr == 10) + sprintf(name, "FOG"); + else { + assert(0); + sprintf(name, "RESERVED"); + } + break; + case 4: + sprintf(name, "oC"); + assert(src_nr == 0); + break; + case 5: + sprintf(name, "oD"); + assert(src_nr == 0); + break; + default: + assert(0); + sprintf(name, "RESERVED"); + break; + } +} + +static void +gen3_decode_alu1(uint32_t *data, uint32_t offset, + int i, char *instr_prefix, const char *op_name) +{ + char dst[100], src0[100]; + + gen3_get_instruction_dst(data, i, dst, 1); + gen3_get_instruction_src0(data, i, src0); + + kgem_debug_print(data, offset, i++, "%s: %s %s, %s\n", instr_prefix, + op_name, dst, src0); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); +} + +static void +gen3_decode_alu2(uint32_t *data, uint32_t offset, + int i, char *instr_prefix, const char *op_name) +{ + char dst[100], src0[100], src1[100]; + + gen3_get_instruction_dst(data, i, dst, 1); + gen3_get_instruction_src0(data, i, src0); + gen3_get_instruction_src1(data, i, src1); + + kgem_debug_print(data, offset, i++, "%s: %s %s, %s, %s\n", instr_prefix, + op_name, dst, src0, src1); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); +} + +static void +gen3_decode_alu3(uint32_t *data, uint32_t offset, + int i, char *instr_prefix, const char *op_name) +{ + char dst[100], src0[100], src1[100], src2[100]; + + gen3_get_instruction_dst(data, i, dst, 1); + gen3_get_instruction_src0(data, i, src0); + gen3_get_instruction_src1(data, i, src1); + gen3_get_instruction_src2(data, i, src2); + + kgem_debug_print(data, offset, i++, "%s: %s %s, %s, %s, %s\n", instr_prefix, + op_name, dst, src0, src1, src2); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); +} + +static void +gen3_decode_tex(uint32_t *data, uint32_t offset, int i, char *instr_prefix, + const char *tex_name) +{ + uint32_t t0 = data[i]; + uint32_t t1 = data[i + 1]; + char dst_name[100]; + char addr_name[100]; + int sampler_nr; + + gen3_get_instruction_dst(data, i, dst_name, 0); + gen3_get_instruction_addr((t1 >> 24) & 0x7, + (t1 >> 17) & 0xf, + addr_name); + sampler_nr = t0 & 0xf; + + kgem_debug_print(data, offset, i++, "%s: %s %s, S%d, %s\n", instr_prefix, + tex_name, dst_name, sampler_nr, addr_name); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); +} + +static void +gen3_decode_dcl(uint32_t *data, uint32_t offset, int i, char *instr_prefix) +{ + uint32_t d0 = data[i]; + const char *sampletype; + int dcl_nr = (d0 >> 14) & 0xf; + const char *dcl_x = d0 & (1 << 10) ? "x" : ""; + const char *dcl_y = d0 & (1 << 11) ? "y" : ""; + const char *dcl_z = d0 & (1 << 12) ? "z" : ""; + const char *dcl_w = d0 & (1 << 13) ? "w" : ""; + char dcl_mask[10]; + + switch ((d0 >> 19) & 0x3) { + case 1: + sprintf(dcl_mask, ".%s%s%s%s", dcl_x, dcl_y, dcl_z, dcl_w); + assert (strcmp(dcl_mask, ".")); + + assert(dcl_nr <= 10); + if (dcl_nr < 8) { + if (strcmp(dcl_mask, ".x") != 0 && + strcmp(dcl_mask, ".xy") != 0 && + strcmp(dcl_mask, ".xz") != 0 && + strcmp(dcl_mask, ".w") != 0 && + strcmp(dcl_mask, ".xyzw") != 0) { + assert(0); + } + kgem_debug_print(data, offset, i++, "%s: DCL T%d%s\n", instr_prefix, + dcl_nr, dcl_mask); + } else { + if (strcmp(dcl_mask, ".xz") == 0) + assert(0); + else if (strcmp(dcl_mask, ".xw") == 0) + assert(0); + else if (strcmp(dcl_mask, ".xzw") == 0) + assert(0); + + if (dcl_nr == 8) { + kgem_debug_print(data, offset, i++, "%s: DCL DIFFUSE%s\n", instr_prefix, + dcl_mask); + } else if (dcl_nr == 9) { + kgem_debug_print(data, offset, i++, "%s: DCL SPECULAR%s\n", instr_prefix, + dcl_mask); + } else if (dcl_nr == 10) { + kgem_debug_print(data, offset, i++, "%s: DCL FOG%s\n", instr_prefix, + dcl_mask); + } + } + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + break; + case 3: + switch ((d0 >> 22) & 0x3) { + case 0: + sampletype = "2D"; + break; + case 1: + sampletype = "CUBE"; + break; + case 2: + sampletype = "3D"; + break; + default: + sampletype = "RESERVED"; + break; + } + assert(dcl_nr <= 15); + kgem_debug_print(data, offset, i++, "%s: DCL S%d %s\n", instr_prefix, + dcl_nr, sampletype); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + break; + default: + kgem_debug_print(data, offset, i++, "%s: DCL RESERVED%d\n", instr_prefix, dcl_nr); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + } +} + +static void +gen3_decode_instruction(uint32_t *data, uint32_t offset, + int i, char *instr_prefix) +{ + switch ((data[i] >> 24) & 0x1f) { + case 0x0: + kgem_debug_print(data, offset, i++, "%s: NOP\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + break; + case 0x01: + gen3_decode_alu2(data, offset, i, instr_prefix, "ADD"); + break; + case 0x02: + gen3_decode_alu1(data, offset, i, instr_prefix, "MOV"); + break; + case 0x03: + gen3_decode_alu2(data, offset, i, instr_prefix, "MUL"); + break; + case 0x04: + gen3_decode_alu3(data, offset, i, instr_prefix, "MAD"); + break; + case 0x05: + gen3_decode_alu3(data, offset, i, instr_prefix, "DP2ADD"); + break; + case 0x06: + gen3_decode_alu2(data, offset, i, instr_prefix, "DP3"); + break; + case 0x07: + gen3_decode_alu2(data, offset, i, instr_prefix, "DP4"); + break; + case 0x08: + gen3_decode_alu1(data, offset, i, instr_prefix, "FRC"); + break; + case 0x09: + gen3_decode_alu1(data, offset, i, instr_prefix, "RCP"); + break; + case 0x0a: + gen3_decode_alu1(data, offset, i, instr_prefix, "RSQ"); + break; + case 0x0b: + gen3_decode_alu1(data, offset, i, instr_prefix, "EXP"); + break; + case 0x0c: + gen3_decode_alu1(data, offset, i, instr_prefix, "LOG"); + break; + case 0x0d: + gen3_decode_alu2(data, offset, i, instr_prefix, "CMP"); + break; + case 0x0e: + gen3_decode_alu2(data, offset, i, instr_prefix, "MIN"); + break; + case 0x0f: + gen3_decode_alu2(data, offset, i, instr_prefix, "MAX"); + break; + case 0x10: + gen3_decode_alu1(data, offset, i, instr_prefix, "FLR"); + break; + case 0x11: + gen3_decode_alu1(data, offset, i, instr_prefix, "MOD"); + break; + case 0x12: + gen3_decode_alu1(data, offset, i, instr_prefix, "TRC"); + break; + case 0x13: + gen3_decode_alu2(data, offset, i, instr_prefix, "SGE"); + break; + case 0x14: + gen3_decode_alu2(data, offset, i, instr_prefix, "SLT"); + break; + case 0x15: + gen3_decode_tex(data, offset, i, instr_prefix, "TEXLD"); + break; + case 0x16: + gen3_decode_tex(data, offset, i, instr_prefix, "TEXLDP"); + break; + case 0x17: + gen3_decode_tex(data, offset, i, instr_prefix, "TEXLDB"); + break; + case 0x19: + gen3_decode_dcl(data, offset, i, instr_prefix); + break; + default: + kgem_debug_print(data, offset, i++, "%s: unknown\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + break; + } +} + +static const char * +gen3_decode_compare_func(uint32_t op) +{ + switch (op&0x7) { + case 0: return "always"; + case 1: return "never"; + case 2: return "less"; + case 3: return "equal"; + case 4: return "lequal"; + case 5: return "greater"; + case 6: return "notequal"; + case 7: return "gequal"; + } + return ""; +} + +static const char * +gen3_decode_stencil_op(uint32_t op) +{ + switch (op&0x7) { + case 0: return "keep"; + case 1: return "zero"; + case 2: return "replace"; + case 3: return "incr_sat"; + case 4: return "decr_sat"; + case 5: return "greater"; + case 6: return "incr"; + case 7: return "decr"; + } + return ""; +} + +#if 0 +/* part of MODES_4 */ +static const char * +gen3_decode_logic_op(uint32_t op) +{ + switch (op&0xf) { + case 0: return "clear"; + case 1: return "nor"; + case 2: return "and_inv"; + case 3: return "copy_inv"; + case 4: return "and_rvrse"; + case 5: return "inv"; + case 6: return "xor"; + case 7: return "nand"; + case 8: return "and"; + case 9: return "equiv"; + case 10: return "noop"; + case 11: return "or_inv"; + case 12: return "copy"; + case 13: return "or_rvrse"; + case 14: return "or"; + case 15: return "set"; + } + return ""; +} +#endif + +static const char * +gen3_decode_blend_fact(uint32_t op) +{ + switch (op&0xf) { + case 1: return "zero"; + case 2: return "one"; + case 3: return "src_colr"; + case 4: return "inv_src_colr"; + case 5: return "src_alpha"; + case 6: return "inv_src_alpha"; + case 7: return "dst_alpha"; + case 8: return "inv_dst_alpha"; + case 9: return "dst_colr"; + case 10: return "inv_dst_colr"; + case 11: return "src_alpha_sat"; + case 12: return "cnst_colr"; + case 13: return "inv_cnst_colr"; + case 14: return "cnst_alpha"; + case 15: return "inv_const_alpha"; + } + return ""; +} + +static const char * +decode_tex_coord_mode(uint32_t mode) +{ + switch (mode&0x7) { + case 0: return "wrap"; + case 1: return "mirror"; + case 2: return "clamp_edge"; + case 3: return "cube"; + case 4: return "clamp_border"; + case 5: return "mirror_once"; + } + return ""; +} + +static const char * +gen3_decode_sample_filter(uint32_t mode) +{ + switch (mode&0x7) { + case 0: return "nearest"; + case 1: return "linear"; + case 2: return "anisotropic"; + case 3: return "4x4_1"; + case 4: return "4x4_2"; + case 5: return "4x4_flat"; + case 6: return "6x5_mono"; + } + return ""; +} + +static int +gen3_decode_load_state_immediate_1(struct kgem *kgem, uint32_t offset) +{ + const uint32_t *data = kgem->batch + offset; + int len, i, word; + + kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_1\n"); + len = (data[0] & 0x0000000f) + 2; + i = 1; + for (word = 0; word <= 8; word++) { + if (data[0] & (1 << (4 + word))) { + switch (word) { + case 0: + kgem_debug_print(data, offset, i, "S0: vbo offset: 0x%08x%s\n", + data[i]&(~1),data[i]&1?", auto cache invalidate disabled":""); + gen3_update_vertex_buffer_addr(kgem, offset + i); + break; + case 1: + kgem_debug_print(data, offset, i, "S1: vertex width: %i, vertex pitch: %i\n", + (data[i]>>24)&0x3f,(data[i]>>16)&0x3f); + gen3_update_vertex_buffer_pitch(kgem, offset + i); + break; + case 2: + { + char buf[200]; + int len = 0; + int tex_num; + for (tex_num = 0; tex_num < 8; tex_num++) { + switch((data[i]>>tex_num*4)&0xf) { + case 0: len += sprintf(buf + len, "%i=2D ", tex_num); break; + case 1: len += sprintf(buf + len, "%i=3D ", tex_num); break; + case 2: len += sprintf(buf + len, "%i=4D ", tex_num); break; + case 3: len += sprintf(buf + len, "%i=1D ", tex_num); break; + case 4: len += sprintf(buf + len, "%i=2D_16 ", tex_num); break; + case 5: len += sprintf(buf + len, "%i=4D_16 ", tex_num); break; + case 0xf: len += sprintf(buf + len, "%i=NP ", tex_num); break; + } + } + kgem_debug_print(data, offset, i, "S2: texcoord formats: %s\n", buf); + gen3_update_vertex_texcoords(kgem, data[i]); + } + + break; + case 3: + kgem_debug_print(data, offset, i, "S3: not documented\n"); + break; + case 4: + { + const char *cullmode = ""; + const char *vfmt_xyzw = ""; + switch((data[i]>>13)&0x3) { + case 0: cullmode = "both"; break; + case 1: cullmode = "none"; break; + case 2: cullmode = "cw"; break; + case 3: cullmode = "ccw"; break; + } + switch(data[i] & (7<<6 | 1<<2)) { + case 1<<6: vfmt_xyzw = "XYZ,"; break; + case 2<<6: vfmt_xyzw = "XYZW,"; break; + case 3<<6: vfmt_xyzw = "XY,"; break; + case 4<<6: vfmt_xyzw = "XYW,"; break; + case 1<<6 | 1<<2: vfmt_xyzw = "XYZF,"; break; + case 2<<6 | 1<<2: vfmt_xyzw = "XYZWF,"; break; + case 3<<6 | 1<<2: vfmt_xyzw = "XYF,"; break; + case 4<<6 | 1<<2: vfmt_xyzw = "XYWF,"; break; + } + kgem_debug_print(data, offset, i, "S4: point_width=%i, line_width=%.1f," + "%s%s%s%s%s cullmode=%s, vfmt=%s%s%s%s%s%s%s%s " + "%s%s%s\n", + (data[i]>>23)&0x1ff, + ((data[i]>>19)&0xf) / 2.0, + data[i]&(0xf<<15)?" flatshade=":"", + data[i]&(1<<18)?"Alpha,":"", + data[i]&(1<<17)?"Fog,":"", + data[i]&(1<<16)?"Specular,":"", + data[i]&(1<<15)?"Color,":"", + cullmode, + data[i]&(1<<12)?"PointWidth,":"", + data[i]&(1<<11)?"SpecFog,":"", + data[i]&(1<<10)?"Color,":"", + data[i]&(1<<9)?"DepthOfs,":"", + vfmt_xyzw, + data[i]&(1<<9)?"FogParam,":"", + data[i]&(1<<5)?"force default diffuse, ":"", + data[i]&(1<<4)?"force default specular, ":"", + data[i]&(1<<3)?"local depth ofs enable, ":"", + data[i]&(1<<1)?"point sprite enable, ":"", + data[i]&(1<<0)?"line AA enable, ":""); + gen3_update_vertex_elements(kgem, data[i]); + break; + } + case 5: + { + kgem_debug_print(data, offset, i, "S5:%s%s%s%s%s" + "%s%s%s%s stencil_ref=0x%x, stencil_test=%s, " + "stencil_fail=%s, stencil_pass_z_fail=%s, " + "stencil_pass_z_pass=%s, %s%s%s%s\n", + data[i]&(0xf<<28)?" write_disable=":"", + data[i]&(1<<31)?"Alpha,":"", + data[i]&(1<<30)?"Red,":"", + data[i]&(1<<29)?"Green,":"", + data[i]&(1<<28)?"Blue,":"", + data[i]&(1<<27)?" force default point size,":"", + data[i]&(1<<26)?" last pixel enable,":"", + data[i]&(1<<25)?" global depth ofs enable,":"", + data[i]&(1<<24)?" fog enable,":"", + (data[i]>>16)&0xff, + gen3_decode_compare_func(data[i]>>13), + gen3_decode_stencil_op(data[i]>>10), + gen3_decode_stencil_op(data[i]>>7), + gen3_decode_stencil_op(data[i]>>4), + data[i]&(1<<3)?"stencil write enable, ":"", + data[i]&(1<<2)?"stencil test enable, ":"", + data[i]&(1<<1)?"color dither enable, ":"", + data[i]&(1<<0)?"logicop enable, ":""); + } + break; + case 6: + kgem_debug_print(data, offset, i, "S6: %salpha_test=%s, alpha_ref=0x%x, " + "depth_test=%s, %ssrc_blnd_fct=%s, dst_blnd_fct=%s, " + "%s%stristrip_provoking_vertex=%i\n", + data[i]&(1<<31)?"alpha test enable, ":"", + gen3_decode_compare_func(data[i]>>28), + data[i]&(0xff<<20), + gen3_decode_compare_func(data[i]>>16), + data[i]&(1<<15)?"cbuf blend enable, ":"", + gen3_decode_blend_fact(data[i]>>8), + gen3_decode_blend_fact(data[i]>>4), + data[i]&(1<<3)?"depth write enable, ":"", + data[i]&(1<<2)?"cbuf write enable, ":"", + data[i]&(0x3)); + break; + case 7: + kgem_debug_print(data, offset, i, "S7: depth offset constant: 0x%08x\n", data[i]); + break; + } + i++; + } + } + + assert(len == i); + return len; +} + +static int +gen3_decode_3d_1d(struct kgem *kgem, uint32_t offset) +{ + uint32_t *data = kgem->batch + offset; + unsigned int len, i, c, idx, word, map, sampler, instr; + const char *format, *zformat, *type; + uint32_t opcode; + static const struct { + uint32_t opcode; + int min_len; + int max_len; + const char *name; + } opcodes_3d_1d[] = { + { 0x86, 4, 4, "3DSTATE_CHROMA_KEY" }, + { 0x88, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" }, + { 0x99, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" }, + { 0x9a, 2, 2, "3DSTATE_DEFAULT_SPECULAR" }, + { 0x98, 2, 2, "3DSTATE_DEFAULT_Z" }, + { 0x97, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" }, + { 0x9d, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" }, + { 0x9e, 4, 4, "3DSTATE_MONO_FILTER" }, + { 0x89, 4, 4, "3DSTATE_FOG_MODE" }, + { 0x8f, 2, 16, "3DSTATE_MAP_PALLETE_LOAD_32" }, + { 0x83, 2, 2, "3DSTATE_SPAN_STIPPLE" }, + }, *opcode_3d_1d; + + opcode = (data[0] & 0x00ff0000) >> 16; + + switch (opcode) { + case 0x07: + /* This instruction is unusual. A 0 length means just 1 DWORD instead of + * 2. The 0 length is specified in one place to be unsupported, but + * stated to be required in another, and 0 length LOAD_INDIRECTs appear + * to cause no harm at least. + */ + kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_INDIRECT\n"); + len = (data[0] & 0x000000ff) + 1; + i = 1; + if (data[0] & (0x01 << 8)) { + kgem_debug_print(data, offset, i++, "SIS.0\n"); + kgem_debug_print(data, offset, i++, "SIS.1\n"); + } + if (data[0] & (0x02 << 8)) { + kgem_debug_print(data, offset, i++, "DIS.0\n"); + } + if (data[0] & (0x04 << 8)) { + kgem_debug_print(data, offset, i++, "SSB.0\n"); + kgem_debug_print(data, offset, i++, "SSB.1\n"); + } + if (data[0] & (0x08 << 8)) { + kgem_debug_print(data, offset, i++, "MSB.0\n"); + kgem_debug_print(data, offset, i++, "MSB.1\n"); + } + if (data[0] & (0x10 << 8)) { + kgem_debug_print(data, offset, i++, "PSP.0\n"); + kgem_debug_print(data, offset, i++, "PSP.1\n"); + } + if (data[0] & (0x20 << 8)) { + kgem_debug_print(data, offset, i++, "PSC.0\n"); + kgem_debug_print(data, offset, i++, "PSC.1\n"); + } + assert(len == i); + return len; + case 0x04: + return gen3_decode_load_state_immediate_1(kgem, offset); + case 0x03: + kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_2\n"); + len = (data[0] & 0x0000000f) + 2; + i = 1; + for (word = 6; word <= 14; word++) { + if (data[0] & (1 << word)) { + if (word == 6) + kgem_debug_print(data, offset, i++, "TBCF\n"); + else if (word >= 7 && word <= 10) { + kgem_debug_print(data, offset, i++, "TB%dC\n", word - 7); + kgem_debug_print(data, offset, i++, "TB%dA\n", word - 7); + } else if (word >= 11 && word <= 14) { + kgem_debug_print(data, offset, i, "TM%dS0: offset=0x%08x, %s\n", + word - 11, + data[i]&0xfffffffe, + data[i]&1?"use fence":""); + i++; + kgem_debug_print(data, offset, i, "TM%dS1: height=%i, width=%i, %s\n", + word - 11, + data[i]>>21, (data[i]>>10)&0x3ff, + data[i]&2?(data[i]&1?"y-tiled":"x-tiled"):""); + i++; + kgem_debug_print(data, offset, i, "TM%dS2: pitch=%i, \n", + word - 11, + ((data[i]>>21) + 1)*4); + i++; + kgem_debug_print(data, offset, i++, "TM%dS3\n", word - 11); + kgem_debug_print(data, offset, i++, "TM%dS4: dflt color\n", word - 11); + } + } + } + assert(len == i); + return len; + case 0x00: + kgem_debug_print(data, offset, 0, "3DSTATE_MAP_STATE\n"); + len = (data[0] & 0x0000003f) + 2; + kgem_debug_print(data, offset, 1, "mask\n"); + + i = 2; + for (map = 0; map <= 15; map++) { + if (data[1] & (1 << map)) { + int width, height, pitch, dword; + struct drm_i915_gem_relocation_entry *reloc; + const char *tiling; + + reloc = kgem_debug_get_reloc_entry(kgem, &data[i] - kgem->batch); + assert(reloc->target_handle); + + dword = data[i]; + kgem_debug_print(data, offset, i++, "map %d MS2 %s%s%s, handle=%d\n", map, + dword&(1<<31)?"untrusted surface, ":"", + dword&(1<<1)?"vertical line stride enable, ":"", + dword&(1<<0)?"vertical ofs enable, ":"", + reloc->target_handle); + + dword = data[i]; + width = ((dword >> 10) & ((1 << 11) - 1))+1; + height = ((dword >> 21) & ((1 << 11) - 1))+1; + + tiling = "none"; + if (dword & (1 << 2)) + tiling = "fenced"; + else if (dword & (1 << 1)) + tiling = dword & (1 << 0) ? "Y" : "X"; + type = " BAD"; + format = " (invalid)"; + switch ((dword>>7) & 0x7) { + case 1: + type = "8"; + switch ((dword>>3) & 0xf) { + case 0: format = "I"; break; + case 1: format = "L"; break; + case 4: format = "A"; break; + case 5: format = " mono"; break; + } + break; + case 2: + type = "16"; + switch ((dword>>3) & 0xf) { + case 0: format = " rgb565"; break; + case 1: format = " argb1555"; break; + case 2: format = " argb4444"; break; + case 3: format = " ay88"; break; + case 5: format = " 88dvdu"; break; + case 6: format = " bump655"; break; + case 7: format = "I"; break; + case 8: format = "L"; break; + case 9: format = "A"; break; + } + break; + case 3: + type = "32"; + switch ((dword>>3) & 0xf) { + case 0: format = " argb8888"; break; + case 1: format = " abgr8888"; break; + case 2: format = " xrgb8888"; break; + case 3: format = " xbgr8888"; break; + case 4: format = " qwvu8888"; break; + case 5: format = " axvu8888"; break; + case 6: format = " lxvu8888"; break; + case 7: format = " xlvu8888"; break; + case 8: format = " argb2101010"; break; + case 9: format = " abgr2101010"; break; + case 10: format = " awvu2101010"; break; + case 11: format = " gr1616"; break; + case 12: format = " vu1616"; break; + case 13: format = " xI824"; break; + case 14: format = " xA824"; break; + case 15: format = " xL824"; break; + } + break; + case 5: + type = "422"; + switch ((dword>>3) & 0xf) { + case 0: format = " yuv_swapy"; break; + case 1: format = " yuv"; break; + case 2: format = " yuv_swapuv"; break; + case 3: format = " yuv_swapuvy"; break; + } + break; + case 6: + type = "compressed"; + switch ((dword>>3) & 0x7) { + case 0: format = " dxt1"; break; + case 1: format = " dxt2_3"; break; + case 2: format = " dxt4_5"; break; + case 3: format = " fxt1"; break; + case 4: format = " dxt1_rb"; break; + } + break; + case 7: + type = "4b indexed"; + switch ((dword>>3) & 0xf) { + case 7: format = " argb8888"; break; + } + break; + default: + format = "BAD"; + break; + } + dword = data[i]; + kgem_debug_print(data, offset, i++, "map %d MS3 [width=%d, height=%d, format=%s%s, tiling=%s%s]\n", + map, width, height, type, format, tiling, + dword&(1<<9)?" palette select":""); + + dword = data[i]; + pitch = 4*(((dword >> 21) & ((1 << 11) - 1))+1); + kgem_debug_print(data, offset, i++, "map %d MS4 [pitch=%d, max_lod=%i, vol_depth=%i, cube_face_ena=%x, %s]\n", + map, pitch, + (dword>>9)&0x3f, dword&0xff, (dword>>15)&0x3f, + dword&(1<<8)?"miplayout legacy":"miplayout right"); + } + } + assert(len == i); + return len; + case 0x06: + kgem_debug_print(data, offset, 0, "3DSTATE_PIXEL_SHADER_CONSTANTS\n"); + len = (data[0] & 0x000000ff) + 2; + + i = 2; + for (c = 0; c <= 31; c++) { + if (data[1] & (1 << c)) { + kgem_debug_print(data, offset, i, "C%d.X = %f\n", + c, int_as_float(data[i])); + i++; + kgem_debug_print(data, offset, i, "C%d.Y = %f\n", + c, int_as_float(data[i])); + i++; + kgem_debug_print(data, offset, i, "C%d.Z = %f\n", + c, int_as_float(data[i])); + i++; + kgem_debug_print(data, offset, i, "C%d.W = %f\n", + c, int_as_float(data[i])); + i++; + } + } + assert(len == i); + return len; + case 0x05: + kgem_debug_print(data, offset, 0, "3DSTATE_PIXEL_SHADER_PROGRAM\n"); + len = (data[0] & 0x000000ff) + 2; + assert(((len-1) % 3) == 0); + assert(len <= 370); + i = 1; + for (instr = 0; instr < (len - 1) / 3; instr++) { + char instr_prefix[10]; + + sprintf(instr_prefix, "PS%03d", instr); + gen3_decode_instruction(data, offset, i, instr_prefix); + i += 3; + } + return len; + case 0x01: + kgem_debug_print(data, offset, 0, "3DSTATE_SAMPLER_STATE\n"); + kgem_debug_print(data, offset, 1, "mask\n"); + len = (data[0] & 0x0000003f) + 2; + i = 2; + for (sampler = 0; sampler <= 15; sampler++) { + if (data[1] & (1 << sampler)) { + uint32_t dword; + const char *mip_filter = ""; + dword = data[i]; + switch ((dword>>20)&0x3) { + case 0: mip_filter = "none"; break; + case 1: mip_filter = "nearest"; break; + case 3: mip_filter = "linear"; break; + } + kgem_debug_print(data, offset, i++, "sampler %d SS2:%s%s%s " + "base_mip_level=%i, mip_filter=%s, mag_filter=%s, min_filter=%s " + "lod_bias=%.2f,%s max_aniso=%i, shadow_func=%s\n", sampler, + dword&(1<<31)?" reverse gamma,":"", + dword&(1<<30)?" packed2planar,":"", + dword&(1<<29)?" colorspace conversion,":"", + (dword>>22)&0x1f, + mip_filter, + gen3_decode_sample_filter(dword>>17), + gen3_decode_sample_filter(dword>>14), + ((dword>>5)&0x1ff)/(0x10*1.0), + dword&(1<<4)?" shadow,":"", + dword&(1<<3)?4:2, + gen3_decode_compare_func(dword)); + dword = data[i]; + kgem_debug_print(data, offset, i++, "sampler %d SS3: min_lod=%.2f,%s " + "tcmode_x=%s, tcmode_y=%s, tcmode_z=%s,%s texmap_idx=%i,%s\n", + sampler, ((dword>>24)&0xff)/(0x10*1.0), + dword&(1<<17)?" kill pixel enable,":"", + decode_tex_coord_mode(dword>>12), + decode_tex_coord_mode(dword>>9), + decode_tex_coord_mode(dword>>6), + dword&(1<<5)?" normalized coords,":"", + (dword>>1)&0xf, + dword&(1<<0)?" deinterlacer,":""); + kgem_debug_print(data, offset, i++, "sampler %d SS4: border color\n", + sampler); + } + } + assert(len == i); + return len; + case 0x85: + len = (data[0] & 0x0000000f) + 2; + assert(len == 2); + + kgem_debug_print(data, offset, 0, + "3DSTATE_DEST_BUFFER_VARIABLES\n"); + + switch ((data[1] >> 8) & 0xf) { + case 0x0: format = "g8"; break; + case 0x1: format = "x1r5g5b5"; break; + case 0x2: format = "r5g6b5"; break; + case 0x3: format = "a8r8g8b8"; break; + case 0x4: format = "ycrcb_swapy"; break; + case 0x5: format = "ycrcb_normal"; break; + case 0x6: format = "ycrcb_swapuv"; break; + case 0x7: format = "ycrcb_swapuvy"; break; + case 0x8: format = "a4r4g4b4"; break; + case 0x9: format = "a1r5g5b5"; break; + case 0xa: format = "a2r10g10b10"; break; + default: format = "BAD"; break; + } + switch ((data[1] >> 2) & 0x3) { + case 0x0: zformat = "u16"; break; + case 0x1: zformat = "f16"; break; + case 0x2: zformat = "u24x8"; break; + default: zformat = "BAD"; break; + } + kgem_debug_print(data, offset, 1, "%s format, %s depth format, early Z %sabled\n", + format, zformat, + (data[1] & (1 << 31)) ? "en" : "dis"); + return len; + + case 0x8e: + { + const char *name, *tiling; + + len = (data[0] & 0x0000000f) + 2; + assert(len == 3); + + switch((data[1] >> 24) & 0x7) { + case 0x3: name = "color"; break; + case 0x7: name = "depth"; break; + default: name = "unknown"; break; + } + + tiling = "none"; + if (data[1] & (1 << 23)) + tiling = "fenced"; + else if (data[1] & (1 << 22)) + tiling = data[1] & (1 << 21) ? "Y" : "X"; + + kgem_debug_print(data, offset, 0, "3DSTATE_BUFFER_INFO\n"); + kgem_debug_print(data, offset, 1, "%s, tiling = %s, pitch=%d\n", name, tiling, data[1]&0xffff); + + kgem_debug_print(data, offset, 2, "address\n"); + return len; + } + case 0x81: + len = (data[0] & 0x0000000f) + 2; + assert(len == 3); + + kgem_debug_print(data, offset, 0, + "3DSTATE_SCISSOR_RECTANGLE\n"); + kgem_debug_print(data, offset, 1, "(%d,%d)\n", + data[1] & 0xffff, data[1] >> 16); + kgem_debug_print(data, offset, 2, "(%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + + return len; + case 0x80: + len = (data[0] & 0x0000000f) + 2; + assert(len == 5); + + kgem_debug_print(data, offset, 0, + "3DSTATE_DRAWING_RECTANGLE\n"); + kgem_debug_print(data, offset, 1, "%s\n", + data[1]&(1<<30)?"depth ofs disabled ":""); + kgem_debug_print(data, offset, 2, "(%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + kgem_debug_print(data, offset, 3, "(%d,%d)\n", + data[3] & 0xffff, data[3] >> 16); + kgem_debug_print(data, offset, 4, "(%d,%d)\n", + (int16_t)(data[4] & 0xffff), + (int16_t)(data[4] >> 16)); + + return len; + case 0x9c: + len = (data[0] & 0x0000000f) + 2; + assert(len == 7); + + kgem_debug_print(data, offset, 0, + "3DSTATE_CLEAR_PARAMETERS\n"); + kgem_debug_print(data, offset, 1, "prim_type=%s, clear=%s%s%s\n", + data[1]&(1<<16)?"CLEAR_RECT":"ZONE_INIT", + data[1]&(1<<2)?"color,":"", + data[1]&(1<<1)?"depth,":"", + data[1]&(1<<0)?"stencil,":""); + kgem_debug_print(data, offset, 2, "clear color\n"); + kgem_debug_print(data, offset, 3, "clear depth/stencil\n"); + kgem_debug_print(data, offset, 4, "color value (rgba8888)\n"); + kgem_debug_print(data, offset, 5, "depth value %f\n", + int_as_float(data[5])); + kgem_debug_print(data, offset, 6, "clear stencil\n"); + return len; + } + + for (idx = 0; idx < ARRAY_SIZE(opcodes_3d_1d); idx++) { + opcode_3d_1d = &opcodes_3d_1d[idx]; + if (((data[0] & 0x00ff0000) >> 16) == opcode_3d_1d->opcode) { + len = (data[0] & 0xf) + 2; + kgem_debug_print(data, offset, 0, "%s\n", opcode_3d_1d->name); + for (i = 1; i < len; i++) + kgem_debug_print(data, offset, i, "dword %d\n", i); + + return len; + } + } + + kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d_1d opcode = 0x%x\n", opcode); + assert(0); + return 1; +} + +#define VERTEX_OUT(fmt, ...) do { \ + kgem_debug_print(data, offset, i, " V%d."fmt"\n", vertex, __VA_ARGS__); \ + i++; \ +} while (0) + +static int +gen3_decode_3d_primitive(struct kgem *kgem, uint32_t offset) +{ + uint32_t *data = kgem->batch + offset; + char immediate = (data[0] & (1 << 23)) == 0; + unsigned int len, i, ret; + const char *primtype; + unsigned int vertex = 0; + + switch ((data[0] >> 18) & 0xf) { + case 0x0: primtype = "TRILIST"; break; + case 0x1: primtype = "TRISTRIP"; break; + case 0x2: primtype = "TRISTRIP_REVERSE"; break; + case 0x3: primtype = "TRIFAN"; break; + case 0x4: primtype = "POLYGON"; break; + case 0x5: primtype = "LINELIST"; break; + case 0x6: primtype = "LINESTRIP"; break; + case 0x7: primtype = "RECTLIST"; break; + case 0x8: primtype = "POINTLIST"; break; + case 0x9: primtype = "DIB"; break; + case 0xa: primtype = "CLEAR_RECT"; assert(0); break; + default: primtype = "unknown"; break; + } + + gen3_update_vertex_elements_offsets(kgem); + + /* XXX: 3DPRIM_DIB not supported */ + if (immediate) { + len = (data[0] & 0x0003ffff) + 2; + kgem_debug_print(data, offset, 0, "3DPRIMITIVE inline %s\n", primtype); + for (i = 1; i < len; ) { + ErrorF(" [%d]: ", vertex); + i += inline_vertex_out(kgem, data + i) / sizeof(uint32_t); + ErrorF("\n"); + vertex++; + } + + ret = len; + } else { + /* indirect vertices */ + len = data[0] & 0x0000ffff; /* index count */ + if (data[0] & (1 << 17)) { + /* random vertex access */ + kgem_debug_print(data, offset, 0, + "3DPRIMITIVE random indirect %s (%d)\n", primtype, len); + assert(0); + if (len == 0) { + /* vertex indices continue until 0xffff is found */ + } else { + /* fixed size vertex index buffer */ + } + ret = (len + 1) / 2 + 1; + goto out; + } else { + /* sequential vertex access */ + vertex = data[1] & 0xffff; + kgem_debug_print(data, offset, 0, + "3DPRIMITIVE sequential indirect %s, %d starting from " + "%d\n", primtype, len, vertex); + kgem_debug_print(data, offset, 1, " start\n"); + for (i = 0; i < len; i++) { + ErrorF(" [%d]: ", vertex); + indirect_vertex_out(kgem, vertex++); + ErrorF("\n"); + } + ret = 2; + goto out; + } + } + +out: + return ret; +} + +int kgem_gen3_decode_3d(struct kgem *kgem, uint32_t offset) +{ + static const struct { + uint32_t opcode; + int min_len; + int max_len; + const char *name; + } opcodes[] = { + { 0x06, 1, 1, "3DSTATE_ANTI_ALIASING" }, + { 0x08, 1, 1, "3DSTATE_BACKFACE_STENCIL_OPS" }, + { 0x09, 1, 1, "3DSTATE_BACKFACE_STENCIL_MASKS" }, + { 0x16, 1, 1, "3DSTATE_COORD_SET_BINDINGS" }, + { 0x15, 1, 1, "3DSTATE_FOG_COLOR" }, + { 0x0b, 1, 1, "3DSTATE_INDEPENDENT_ALPHA_BLEND" }, + { 0x0d, 1, 1, "3DSTATE_MODES_4" }, + { 0x0c, 1, 1, "3DSTATE_MODES_5" }, + { 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" }, + }; + uint32_t *data = kgem->batch + offset; + uint32_t opcode; + unsigned int idx; + + opcode = (data[0] & 0x1f000000) >> 24; + + switch (opcode) { + case 0x1f: + return gen3_decode_3d_primitive(kgem, offset); + case 0x1d: + return gen3_decode_3d_1d(kgem, offset); + case 0x1c: + return gen3_decode_3d_1c(kgem, offset); + } + + for (idx = 0; idx < ARRAY_SIZE(opcodes); idx++) { + if (opcode == opcodes[idx].opcode) { + unsigned int len = 1, i; + + kgem_debug_print(data, offset, 0, "%s\n", opcodes[idx].name); + if (opcodes[idx].max_len > 1) { + len = (data[0] & 0xff) + 2; + assert(len >= opcodes[idx].min_len || + len <= opcodes[idx].max_len); + } + + for (i = 1; i < len; i++) + kgem_debug_print(data, offset, i, "dword %d\n", i); + return len; + } + } + + kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d opcode = 0x%x\n", opcode); + return 1; +} + + +void kgem_gen3_finish_state(struct kgem *kgem) +{ + memset(&state, 0, sizeof(state)); +} diff --git a/drivers/video/Intel-2D/kgem_debug_gen4.c b/drivers/video/Intel-2D/kgem_debug_gen4.c new file mode 100644 index 0000000000..aa1210380e --- /dev/null +++ b/drivers/video/Intel-2D/kgem_debug_gen4.c @@ -0,0 +1,687 @@ +/* + * Copyright © 2007-2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt + * Chris Wilson + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include "sna.h" +#include "sna_reg.h" + +#include "gen4_render.h" + +#include "kgem_debug.h" + +static struct state { + struct vertex_buffer { + int handle; + void *base; + const char *ptr; + int pitch; + + struct kgem_bo *current; + } vb[33]; + struct vertex_elements { + int buffer; + int offset; + bool valid; + uint32_t type; + uint8_t swizzle[4]; + } ve[33]; + int num_ve; + + struct dynamic_state { + struct kgem_bo *current; + void *base, *ptr; + } dynamic_state; +} state; + +static void gen4_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) +{ + uint32_t reloc = sizeof(uint32_t) * (&data[1] - kgem->batch); + struct kgem_bo *bo = NULL; + void *base, *ptr; + int i; + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == reloc) + break; + assert(i < kgem->nreloc); + reloc = kgem->reloc[i].target_handle; + + if (reloc == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == reloc) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map__debug(kgem, bo); + } + ptr = (char *)base + kgem->reloc[i].delta; + + i = data[0] >> 27; + + state.vb[i].current = bo; + state.vb[i].base = base; + state.vb[i].ptr = ptr; + state.vb[i].pitch = data[0] & 0x7ff; +} + +static uint32_t +get_ve_component(uint32_t data, int component) +{ + return (data >> (16 + (3 - component) * 4)) & 0x7; +} + +static void gen4_update_vertex_elements(struct kgem *kgem, int id, const uint32_t *data) +{ + state.ve[id].buffer = data[0] >> 27; + state.ve[id].valid = !!(data[0] & (1 << 26)); + state.ve[id].type = (data[0] >> 16) & 0x1ff; + state.ve[id].offset = data[0] & 0x7ff; + state.ve[id].swizzle[0] = get_ve_component(data[1], 0); + state.ve[id].swizzle[1] = get_ve_component(data[1], 1); + state.ve[id].swizzle[2] = get_ve_component(data[1], 2); + state.ve[id].swizzle[3] = get_ve_component(data[1], 3); +} + +static void vertices_sint16_out(const struct vertex_elements *ve, const int16_t *v, int max) +{ + int c; + + ErrorF("("); + for (c = 0; c < max; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("%d", v[c]); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + for (; c < 4; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("1.0"); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + ErrorF(")"); +} + +static void vertices_float_out(const struct vertex_elements *ve, const float *f, int max) +{ + int c, o; + + ErrorF("("); + for (c = o = 0; c < 4 && o < max; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("%f", f[o++]); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + for (; c < 4; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("1.0"); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + ErrorF(")"); +} + +static void ve_out(const struct vertex_elements *ve, const void *ptr) +{ + switch (ve->type) { + case GEN4_SURFACEFORMAT_R32_FLOAT: + vertices_float_out(ve, ptr, 1); + break; + case GEN4_SURFACEFORMAT_R32G32_FLOAT: + vertices_float_out(ve, ptr, 2); + break; + case GEN4_SURFACEFORMAT_R32G32B32_FLOAT: + vertices_float_out(ve, ptr, 3); + break; + case GEN4_SURFACEFORMAT_R32G32B32A32_FLOAT: + vertices_float_out(ve, ptr, 4); + break; + case GEN4_SURFACEFORMAT_R16_SINT: + vertices_sint16_out(ve, ptr, 1); + break; + case GEN4_SURFACEFORMAT_R16G16_SINT: + vertices_sint16_out(ve, ptr, 2); + break; + case GEN4_SURFACEFORMAT_R16G16B16A16_SINT: + vertices_sint16_out(ve, ptr, 4); + break; + case GEN4_SURFACEFORMAT_R16_SSCALED: + vertices_sint16_out(ve, ptr, 1); + break; + case GEN4_SURFACEFORMAT_R16G16_SSCALED: + vertices_sint16_out(ve, ptr, 2); + break; + case GEN4_SURFACEFORMAT_R16G16B16A16_SSCALED: + vertices_sint16_out(ve, ptr, 4); + break; + } +} + +static void indirect_vertex_out(struct kgem *kgem, uint32_t v) +{ + int i = 0; + + do { + const struct vertex_elements *ve = &state.ve[i]; + const struct vertex_buffer *vb = &state.vb[ve->buffer]; + const void *ptr = vb->ptr + v * vb->pitch + ve->offset; + + if (!ve->valid) + continue; + + ve_out(ve, ptr); + + while (++i <= state.num_ve && !state.ve[i].valid) + ; + + if (i <= state.num_ve) + ErrorF(", "); + } while (i <= state.num_ve); +} + +static void primitive_out(struct kgem *kgem, uint32_t *data) +{ + int n; + + assert((data[0] & (1<<15)) == 0); /* XXX index buffers */ + + for (n = 0; n < data[1]; n++) { + int v = data[2] + n; + ErrorF(" [%d:%d] = ", n, v); + indirect_vertex_out(kgem, v); + ErrorF("\n"); + } +} + +static void +state_base_out(uint32_t *data, uint32_t offset, unsigned int index, + const char *name) +{ + if (data[index] & 1) + kgem_debug_print(data, offset, index, + "%s state base address 0x%08x\n", + name, data[index] & ~1); + else + kgem_debug_print(data, offset, index, + "%s state base not updated\n", + name); +} + +static void +state_max_out(uint32_t *data, uint32_t offset, unsigned int index, + const char *name) +{ + if (data[index] == 1) + kgem_debug_print(data, offset, index, + "%s state upper bound disabled\n", name); + else if (data[index] & 1) + kgem_debug_print(data, offset, index, + "%s state upper bound 0x%08x\n", + name, data[index] & ~1); + else + kgem_debug_print(data, offset, index, + "%s state upper bound not updated\n", + name); +} + +static const char * +get_965_surfacetype(unsigned int surfacetype) +{ + switch (surfacetype) { + case 0: return "1D"; + case 1: return "2D"; + case 2: return "3D"; + case 3: return "CUBE"; + case 4: return "BUFFER"; + case 7: return "NULL"; + default: return "unknown"; + } +} + +static const char * +get_965_depthformat(unsigned int depthformat) +{ + switch (depthformat) { + case 0: return "s8_z24float"; + case 1: return "z32float"; + case 2: return "z24s8"; + case 5: return "z16"; + default: return "unknown"; + } +} + +static const char * +get_965_element_component(uint32_t data, int component) +{ + uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7; + + switch (component_control) { + case 0: + return "nostore"; + case 1: + switch (component) { + case 0: return "X"; + case 1: return "Y"; + case 2: return "Z"; + case 3: return "W"; + default: return "fail"; + } + case 2: + return "0.0"; + case 3: + return "1.0"; + case 4: + return "0x1"; + case 5: + return "VID"; + default: + return "fail"; + } +} + +static const char * +get_965_prim_type(uint32_t data) +{ + uint32_t primtype = (data >> 10) & 0x1f; + + switch (primtype) { + case 0x01: return "point list"; + case 0x02: return "line list"; + case 0x03: return "line strip"; + case 0x04: return "tri list"; + case 0x05: return "tri strip"; + case 0x06: return "tri fan"; + case 0x07: return "quad list"; + case 0x08: return "quad strip"; + case 0x09: return "line list adj"; + case 0x0a: return "line strip adj"; + case 0x0b: return "tri list adj"; + case 0x0c: return "tri strip adj"; + case 0x0d: return "tri strip reverse"; + case 0x0e: return "polygon"; + case 0x0f: return "rect list"; + case 0x10: return "line loop"; + case 0x11: return "point list bf"; + case 0x12: return "line strip cont"; + case 0x13: return "line strip bf"; + case 0x14: return "line strip cont bf"; + case 0x15: return "tri fan no stipple"; + default: return "fail"; + } +} + +#if 0 +struct reloc { + struct kgem_bo *bo; + void *base; +}; + +static void * +get_reloc(struct kgem *kgem, + void *base, const uint32_t *reloc, + struct reloc *r) +{ + uint32_t delta = *reloc; + + memset(r, 0, sizeof(*r)); + + if (base == 0) { + uint32_t handle = sizeof(uint32_t) * (reloc - kgem->batch); + struct kgem_bo *bo = NULL; + int i; + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == handle) + break; + assert(i < kgem->nreloc); + handle = kgem->reloc[i].target_handle; + delta = kgem->reloc[i].delta; + + if (handle == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == handle) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map__debug(kgem, bo); + r->bo = bo; + r->base = base; + } + } + + return (char *)base + delta; +} +#endif + +int kgem_gen4_decode_3d(struct kgem *kgem, uint32_t offset) +{ + static const struct { + uint32_t opcode; + int min_len; + int max_len; + const char *name; + } opcodes[] = { + { 0x6000, 3, 3, "URB_FENCE" }, + { 0x6001, 2, 2, "CS_URB_FENCE" }, + { 0x6002, 2, 2, "CONSTANT_BUFFER" }, + { 0x6101, 6, 6, "STATE_BASE_ADDRESS" }, + { 0x6102, 2, 2 , "STATE_SIP" }, + { 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" }, + { 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" }, + { 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" }, + { 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" }, + { 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" }, + { 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" }, + { 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" }, + { 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" }, + { 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" }, + { 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" }, + { 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" }, + { 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" }, + { 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" }, + { 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" }, + { 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" }, + { 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" }, + { 0x7910, 2, 2, "3DSTATE_CLEAR_PARAMS" }, + { 0x7b00, 6, 6, "3DPRIMITIVE" }, + { 0x7805, 3, 3, "3DSTATE_URB" }, + { 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" }, + { 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" }, + { 0x7817, 5, 5, "3DSTATE_CONSTANT_PS_STATE" }, + { 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" }, + }; + uint32_t *data = kgem->batch + offset; + uint32_t op; + unsigned int len; + int i; + const char *desc1 = NULL; + + len = (data[0] & 0xff) + 2; + op = (data[0] & 0xffff0000) >> 16; + switch (op) { + case 0x6000: + assert(len == 3); + + kgem_debug_print(data, offset, 0, "URB_FENCE: %s%s%s%s%s%s\n", + (data[0] >> 13) & 1 ? "cs " : "", + (data[0] >> 12) & 1 ? "vfe " : "", + (data[0] >> 11) & 1 ? "sf " : "", + (data[0] >> 10) & 1 ? "clip " : "", + (data[0] >> 9) & 1 ? "gs " : "", + (data[0] >> 8) & 1 ? "vs " : ""); + kgem_debug_print(data, offset, 1, + "vs fence: %d, gs_fence: %d, clip_fence: %d\n", + data[1] & 0x3ff, + (data[1] >> 10) & 0x3ff, + (data[1] >> 20) & 0x3ff); + kgem_debug_print(data, offset, 2, + "sf fence: %d, vfe_fence: %d, cs_fence: %d\n", + data[2] & 0x3ff, + (data[2] >> 10) & 0x3ff, + (data[2] >> 20) & 0x7ff); + return len; + + case 0x6001: + kgem_debug_print(data, offset, 0, "CS_URB_STATE\n"); + kgem_debug_print(data, offset, 1, "entry_size: %d [%d bytes], n_entries: %d\n", + (data[1] >> 4) & 0x1f, + (((data[1] >> 4) & 0x1f) + 1) * 64, + data[1] & 0x7); + return len; + case 0x6002: + kgem_debug_print(data, offset, 0, "CONSTANT_BUFFER: %s\n", + (data[0] >> 8) & 1 ? "valid" : "invalid"); + kgem_debug_print(data, offset, 1, "offset: 0x%08x, length: %d bytes\n", + data[1] & ~0x3f, ((data[1] & 0x3f) + 1) * 64); + return len; + case 0x6101: + i = 0; + kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n"); + assert(len == 6); + + state_base_out(data, offset, i++, "general"); + state_base_out(data, offset, i++, "surface"); + state_base_out(data, offset, i++, "media"); + + state_max_out(data, offset, i++, "general"); + state_max_out(data, offset, i++, "media"); + + return len; + + case 0x7801: + assert(len == 6); + + kgem_debug_print(data, offset, 0, + "3DSTATE_BINDING_TABLE_POINTERS\n"); + kgem_debug_print(data, offset, 1, "VS binding table\n"); + kgem_debug_print(data, offset, 2, "GS binding table\n"); + kgem_debug_print(data, offset, 3, "CLIP binding table\n"); + kgem_debug_print(data, offset, 4, "SF binding table\n"); + kgem_debug_print(data, offset, 5, "WM binding table\n"); + + return len; + + case 0x7808: + assert((len - 1) % 4 == 0); + kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_BUFFERS\n"); + + for (i = 1; i < len;) { + gen4_update_vertex_buffer(kgem, data + i); + + kgem_debug_print(data, offset, i, "buffer %d: %s, pitch %db\n", + data[i] >> 27, + data[i] & (1 << 20) ? "random" : "sequential", + data[i] & 0x07ff); + i++; + kgem_debug_print(data, offset, i++, "buffer address\n"); + kgem_debug_print(data, offset, i++, "max index\n"); + kgem_debug_print(data, offset, i++, "mbz\n"); + } + return len; + + case 0x7809: + assert((len + 1) % 2 == 0); + kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_ELEMENTS\n"); + + memset(state.ve, 0, sizeof(state.ve)); /* XXX? */ + for (i = 1; i < len;) { + gen4_update_vertex_elements(kgem, (i - 1)/2, data + i); + + kgem_debug_print(data, offset, i, "buffer %d: %svalid, type 0x%04x, " + "src offset 0x%04x bytes\n", + data[i] >> 27, + data[i] & (1 << 26) ? "" : "in", + (data[i] >> 16) & 0x1ff, + data[i] & 0x07ff); + i++; + kgem_debug_print(data, offset, i, "(%s, %s, %s, %s), " + "dst offset 0x%02x bytes\n", + get_965_element_component(data[i], 0), + get_965_element_component(data[i], 1), + get_965_element_component(data[i], 2), + get_965_element_component(data[i], 3), + (data[i] & 0xff) * 4); + i++; + } + state.num_ve = (len - 1) / 2; /* XXX? */ + return len; + + case 0x780a: + assert(len == 3); + kgem_debug_print(data, offset, 0, "3DSTATE_INDEX_BUFFER\n"); + kgem_debug_print(data, offset, 1, "beginning buffer address\n"); + kgem_debug_print(data, offset, 2, "ending buffer address\n"); + return len; + + case 0x7900: + assert(len == 4); + kgem_debug_print(data, offset, 0, + "3DSTATE_DRAWING_RECTANGLE\n"); + kgem_debug_print(data, offset, 1, "top left: %d,%d\n", + data[1] & 0xffff, + (data[1] >> 16) & 0xffff); + kgem_debug_print(data, offset, 2, "bottom right: %d,%d\n", + data[2] & 0xffff, + (data[2] >> 16) & 0xffff); + kgem_debug_print(data, offset, 3, "origin: %d,%d\n", + (int)data[3] & 0xffff, + ((int)data[3] >> 16) & 0xffff); + return len; + + case 0x7905: + assert(len == 7); + kgem_debug_print(data, offset, 0, + "3DSTATE_DEPTH_BUFFER\n"); + kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n", + get_965_surfacetype(data[1] >> 29), + get_965_depthformat((data[1] >> 18) & 0x7), + (data[1] & 0x0001ffff) + 1, + data[1] & (1 << 27) ? "" : "not ", + (data[1] & (1 << 22)) != 0, + (data[1] & (1 << 21)) != 0); + kgem_debug_print(data, offset, 2, "depth offset\n"); + kgem_debug_print(data, offset, 3, "%dx%d\n", + ((data[3] & 0x0007ffc0) >> 6) + 1, + ((data[3] & 0xfff80000) >> 19) + 1); + kgem_debug_print(data, offset, 4, "volume depth\n"); + kgem_debug_print(data, offset, 5, "\n"); + kgem_debug_print(data, offset, 6, "\n"); + return len; + + case 0x7a00: + assert(len == 4 || len == 5); + switch ((data[1] >> 14) & 0x3) { + case 0: desc1 = "no write"; break; + case 1: desc1 = "qword write"; break; + case 2: desc1 = "PS_DEPTH_COUNT write"; break; + case 3: desc1 = "TIMESTAMP write"; break; + } + kgem_debug_print(data, offset, 0, "PIPE_CONTROL\n"); + kgem_debug_print(data, offset, 1, + "%s, %scs stall, %stlb invalidate, " + "%ssync gfdt, %sdepth stall, %sRC write flush, " + "%sinst flush, %sTC flush\n", + desc1, + data[1] & (1 << 20) ? "" : "no ", + data[1] & (1 << 18) ? "" : "no ", + data[1] & (1 << 17) ? "" : "no ", + data[1] & (1 << 13) ? "" : "no ", + data[1] & (1 << 12) ? "" : "no ", + data[1] & (1 << 11) ? "" : "no ", + data[1] & (1 << 10) ? "" : "no "); + if (len == 5) { + kgem_debug_print(data, offset, 2, "destination address\n"); + kgem_debug_print(data, offset, 3, "immediate dword low\n"); + kgem_debug_print(data, offset, 4, "immediate dword high\n"); + } else { + for (i = 2; i < len; i++) { + kgem_debug_print(data, offset, i, "\n"); + } + } + return len; + + case 0x7b00: + assert(len == 6); + kgem_debug_print(data, offset, 0, + "3DPRIMITIVE: %s %s\n", + get_965_prim_type(data[0]), + (data[0] & (1 << 15)) ? "random" : "sequential"); + kgem_debug_print(data, offset, 1, "vertex count\n"); + kgem_debug_print(data, offset, 2, "start vertex\n"); + kgem_debug_print(data, offset, 3, "instance count\n"); + kgem_debug_print(data, offset, 4, "start instance\n"); + kgem_debug_print(data, offset, 5, "index bias\n"); + primitive_out(kgem, data); + return len; + } + + /* For the rest, just dump the bytes */ + for (i = 0; i < ARRAY_SIZE(opcodes); i++) + if (op == opcodes[i].opcode) + break; + + assert(i < ARRAY_SIZE(opcodes)); + + len = 1; + kgem_debug_print(data, offset, 0, "%s\n", opcodes[i].name); + if (opcodes[i].max_len > 1) { + len = (data[0] & 0xff) + 2; + assert(len >= opcodes[i].min_len && + len <= opcodes[i].max_len); + } + + for (i = 1; i < len; i++) + kgem_debug_print(data, offset, i, "dword %d\n", i); + + return len; +} + +void kgem_gen4_finish_state(struct kgem *kgem) +{ + memset(&state, 0, sizeof(state)); +} diff --git a/drivers/video/Intel-2D/kgem_debug_gen5.c b/drivers/video/Intel-2D/kgem_debug_gen5.c new file mode 100644 index 0000000000..58e92869a0 --- /dev/null +++ b/drivers/video/Intel-2D/kgem_debug_gen5.c @@ -0,0 +1,662 @@ +/* + * Copyright © 2007-2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt + * Chris Wilson + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include "sna.h" +#include "sna_reg.h" + +#include "gen5_render.h" + +#include "kgem_debug.h" + +static struct state { + struct vertex_buffer { + int handle; + void *base; + int size; + const char *ptr; + int pitch; + + struct kgem_bo *current; + } vb[17]; + struct vertex_elements { + int buffer; + int offset; + bool valid; + uint32_t type; + uint8_t swizzle[4]; + } ve[17]; + int num_ve; + + struct dynamic_state { + struct kgem_bo *current; + void *base, *ptr; + } dynamic_state; +} state; + +static void gen5_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) +{ + struct drm_i915_gem_relocation_entry *reloc; + struct kgem_bo *bo = NULL; + void *base, *ptr; + int i, size; + + reloc = kgem_debug_get_reloc_entry(kgem, &data[1] - kgem->batch); + if (reloc->target_handle == -1) { + base = kgem->batch; + size = kgem->nbatch * sizeof(uint32_t); + } else { + bo = kgem_debug_get_bo_for_reloc_entry(kgem, reloc); + base = kgem_bo_map__debug(kgem, bo); + size = kgem_bo_size(bo); + } + ptr = (char *)base + reloc->delta; + + i = data[0] >> 27; + + state.vb[i].handle = reloc->target_handle; + state.vb[i].current = bo; + state.vb[i].base = base; + state.vb[i].ptr = ptr; + state.vb[i].pitch = data[0] & 0x7ff; + state.vb[i].size = size; +} + +static uint32_t +get_ve_component(uint32_t data, int component) +{ + return (data >> (16 + (3 - component) * 4)) & 0x7; +} + +static void gen5_update_vertex_elements(struct kgem *kgem, int id, const uint32_t *data) +{ + state.ve[id].buffer = data[0] >> 27; + state.ve[id].valid = !!(data[0] & (1 << 26)); + state.ve[id].type = (data[0] >> 16) & 0x1ff; + state.ve[id].offset = data[0] & 0x7ff; + state.ve[id].swizzle[0] = get_ve_component(data[1], 0); + state.ve[id].swizzle[1] = get_ve_component(data[1], 1); + state.ve[id].swizzle[2] = get_ve_component(data[1], 2); + state.ve[id].swizzle[3] = get_ve_component(data[1], 3); +} + +static void vertices_sint16_out(const struct vertex_elements *ve, const int16_t *v, int max) +{ + int c, o; + + ErrorF("("); + for (c = o = 0; c < 4 && o < max; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("%d", v[o++]); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (o < max) + ErrorF(", "); + } + ErrorF(")"); +} + +static void vertices_float_out(const struct vertex_elements *ve, const float *f, int max) +{ + int c, o; + + ErrorF("("); + for (c = o = 0; c < 4 && o < max; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("%f", f[o++]); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (o < max) + ErrorF(", "); + } + ErrorF(")"); +} + +static void ve_out(const struct vertex_elements *ve, const void *ptr) +{ + switch (ve->type) { + case GEN5_SURFACEFORMAT_R32_FLOAT: + vertices_float_out(ve, ptr, 1); + break; + case GEN5_SURFACEFORMAT_R32G32_FLOAT: + vertices_float_out(ve, ptr, 2); + break; + case GEN5_SURFACEFORMAT_R32G32B32_FLOAT: + vertices_float_out(ve, ptr, 3); + break; + case GEN5_SURFACEFORMAT_R32G32B32A32_FLOAT: + vertices_float_out(ve, ptr, 4); + break; + case GEN5_SURFACEFORMAT_R16_SINT: + vertices_sint16_out(ve, ptr, 1); + break; + case GEN5_SURFACEFORMAT_R16G16_SINT: + vertices_sint16_out(ve, ptr, 2); + break; + case GEN5_SURFACEFORMAT_R16G16B16A16_SINT: + vertices_sint16_out(ve, ptr, 4); + break; + case GEN5_SURFACEFORMAT_R16_SSCALED: + vertices_sint16_out(ve, ptr, 1); + break; + case GEN5_SURFACEFORMAT_R16G16_SSCALED: + vertices_sint16_out(ve, ptr, 2); + break; + case GEN5_SURFACEFORMAT_R16G16B16A16_SSCALED: + vertices_sint16_out(ve, ptr, 4); + break; + } +} + +static void indirect_vertex_out(struct kgem *kgem, uint32_t v) +{ + int i = 1; + + do { + const struct vertex_elements *ve = &state.ve[i]; + const struct vertex_buffer *vb = &state.vb[ve->buffer]; + const void *ptr = vb->ptr + v * vb->pitch + ve->offset; + + if (!ve->valid) + continue; + + assert(vb->pitch); + assert(ve->offset + v*vb->pitch < vb->size); + + ve_out(ve, ptr); + + while (++i <= state.num_ve && !state.ve[i].valid) + ; + + if (i <= state.num_ve) + ErrorF(", "); + } while (i <= state.num_ve); +} + +static void primitive_out(struct kgem *kgem, uint32_t *data) +{ + int n; + + assert((data[0] & (1<<15)) == 0); /* XXX index buffers */ + + for (n = 0; n < data[1]; n++) { + int v = data[2] + n; + ErrorF(" [%d:%d] = ", n, v); + indirect_vertex_out(kgem, v); + ErrorF("\n"); + } +} + +static void +state_base_out(uint32_t *data, uint32_t offset, unsigned int index, + const char *name) +{ + if (data[index] & 1) + kgem_debug_print(data, offset, index, + "%s state base address 0x%08x\n", + name, data[index] & ~1); + else + kgem_debug_print(data, offset, index, + "%s state base not updated\n", + name); +} + +static void +state_max_out(uint32_t *data, uint32_t offset, unsigned int index, + const char *name) +{ + if (data[index] == 1) + kgem_debug_print(data, offset, index, + "%s state upper bound disabled\n", name); + else if (data[index] & 1) + kgem_debug_print(data, offset, index, + "%s state upper bound 0x%08x\n", + name, data[index] & ~1); + else + kgem_debug_print(data, offset, index, + "%s state upper bound not updated\n", + name); +} + +static const char * +get_965_surfacetype(unsigned int surfacetype) +{ + switch (surfacetype) { + case 0: return "1D"; + case 1: return "2D"; + case 2: return "3D"; + case 3: return "CUBE"; + case 4: return "BUFFER"; + case 7: return "NULL"; + default: return "unknown"; + } +} + +static const char * +get_965_depthformat(unsigned int depthformat) +{ + switch (depthformat) { + case 0: return "s8_z24float"; + case 1: return "z32float"; + case 2: return "z24s8"; + case 5: return "z16"; + default: return "unknown"; + } +} + +static const char * +get_965_element_component(uint32_t data, int component) +{ + uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7; + + switch (component_control) { + case 0: + return "nostore"; + case 1: + switch (component) { + case 0: return "X"; + case 1: return "Y"; + case 2: return "Z"; + case 3: return "W"; + default: return "fail"; + } + case 2: + return "0.0"; + case 3: + return "1.0"; + case 4: + return "0x1"; + case 5: + return "VID"; + default: + return "fail"; + } +} + +static const char * +get_965_prim_type(uint32_t data) +{ + uint32_t primtype = (data >> 10) & 0x1f; + + switch (primtype) { + case 0x01: return "point list"; + case 0x02: return "line list"; + case 0x03: return "line strip"; + case 0x04: return "tri list"; + case 0x05: return "tri strip"; + case 0x06: return "tri fan"; + case 0x07: return "quad list"; + case 0x08: return "quad strip"; + case 0x09: return "line list adj"; + case 0x0a: return "line strip adj"; + case 0x0b: return "tri list adj"; + case 0x0c: return "tri strip adj"; + case 0x0d: return "tri strip reverse"; + case 0x0e: return "polygon"; + case 0x0f: return "rect list"; + case 0x10: return "line loop"; + case 0x11: return "point list bf"; + case 0x12: return "line strip cont"; + case 0x13: return "line strip bf"; + case 0x14: return "line strip cont bf"; + case 0x15: return "tri fan no stipple"; + default: return "fail"; + } +} + +#if 0 +struct reloc { + struct kgem_bo *bo; + void *base; +}; + +static void * +get_reloc(struct kgem *kgem, + void *base, const uint32_t *reloc, + struct reloc *r) +{ + uint32_t delta = *reloc; + + memset(r, 0, sizeof(*r)); + + if (base == 0) { + uint32_t handle = sizeof(uint32_t) * (reloc - kgem->batch); + struct kgem_bo *bo = NULL; + int i; + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == handle) + break; + assert(i < kgem->nreloc); + handle = kgem->reloc[i].target_handle; + delta = kgem->reloc[i].delta; + + if (handle == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == handle) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map(kgem, bo, PROT_READ); + r->bo = bo; + r->base = base; + } + } + + return (char *)base + delta; +} +#endif + +int kgem_gen5_decode_3d(struct kgem *kgem, uint32_t offset) +{ + static const struct { + uint32_t opcode; + int min_len; + int max_len; + const char *name; + } opcodes[] = { + { 0x6000, 3, 3, "URB_FENCE" }, + { 0x6001, 2, 2, "CS_URB_FENCE" }, + { 0x6002, 2, 2, "CONSTANT_BUFFER" }, + { 0x6101, 6, 6, "STATE_BASE_ADDRESS" }, + { 0x6102, 2, 2 , "STATE_SIP" }, + { 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" }, + { 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" }, + { 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" }, + { 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" }, + { 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" }, + { 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" }, + { 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" }, + { 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" }, + { 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" }, + { 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" }, + { 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" }, + { 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" }, + { 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" }, + { 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" }, + { 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" }, + { 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" }, + { 0x7910, 2, 2, "3DSTATE_CLEAR_PARAMS" }, + { 0x7b00, 6, 6, "3DPRIMITIVE" }, + { 0x7805, 3, 3, "3DSTATE_URB" }, + { 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" }, + { 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" }, + { 0x7817, 5, 5, "3DSTATE_CONSTANT_PS_STATE" }, + { 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" }, + }; + uint32_t *data = kgem->batch + offset; + uint32_t op; + unsigned int len; + int i; + const char *desc1 = NULL; + + len = (data[0] & 0xff) + 2; + op = (data[0] & 0xffff0000) >> 16; + switch (op) { + case 0x6000: + assert(len == 3); + + kgem_debug_print(data, offset, 0, "URB_FENCE: %s%s%s%s%s%s\n", + (data[0] >> 13) & 1 ? "cs " : "", + (data[0] >> 12) & 1 ? "vfe " : "", + (data[0] >> 11) & 1 ? "sf " : "", + (data[0] >> 10) & 1 ? "clip " : "", + (data[0] >> 9) & 1 ? "gs " : "", + (data[0] >> 8) & 1 ? "vs " : ""); + kgem_debug_print(data, offset, 1, + "vs fence: %d, gs_fence: %d, clip_fence: %d\n", + data[1] & 0x3ff, + (data[1] >> 10) & 0x3ff, + (data[1] >> 20) & 0x3ff); + kgem_debug_print(data, offset, 2, + "sf fence: %d, vfe_fence: %d, cs_fence: %d\n", + data[2] & 0x3ff, + (data[2] >> 10) & 0x3ff, + (data[2] >> 20) & 0x7ff); + return len; + + case 0x6001: + kgem_debug_print(data, offset, 0, "CS_URB_STATE\n"); + kgem_debug_print(data, offset, 1, "entry_size: %d [%d bytes], n_entries: %d\n", + (data[1] >> 4) & 0x1f, + (((data[1] >> 4) & 0x1f) + 1) * 64, + data[1] & 0x7); + return len; + case 0x6002: + kgem_debug_print(data, offset, 0, "CONSTANT_BUFFER: %s\n", + (data[0] >> 8) & 1 ? "valid" : "invalid"); + kgem_debug_print(data, offset, 1, "offset: 0x%08x, length: %d bytes\n", + data[1] & ~0x3f, ((data[1] & 0x3f) + 1) * 64); + return len; + case 0x6101: + i = 0; + kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n"); + assert(len == 8); + + state_base_out(data, offset, i++, "general"); + state_base_out(data, offset, i++, "surface"); + state_base_out(data, offset, i++, "media"); + state_base_out(data, offset, i++, "instruction"); + + state_max_out(data, offset, i++, "general"); + state_max_out(data, offset, i++, "media"); + state_max_out(data, offset, i++, "instruction"); + + return len; + + case 0x7801: + assert(len == 6); + + kgem_debug_print(data, offset, 0, + "3DSTATE_BINDING_TABLE_POINTERS\n"); + kgem_debug_print(data, offset, 1, "VS binding table\n"); + kgem_debug_print(data, offset, 2, "GS binding table\n"); + kgem_debug_print(data, offset, 3, "CLIP binding table\n"); + kgem_debug_print(data, offset, 4, "SF binding table\n"); + kgem_debug_print(data, offset, 5, "WM binding table\n"); + + return len; + + case 0x7808: + assert((len - 1) % 4 == 0); + kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_BUFFERS\n"); + + for (i = 1; i < len;) { + gen5_update_vertex_buffer(kgem, data + i); + + kgem_debug_print(data, offset, i, "buffer %d: %s, pitch %db\n", + data[i] >> 27, + data[i] & (1 << 20) ? "random" : "sequential", + data[i] & 0x07ff); + i++; + kgem_debug_print(data, offset, i++, "buffer address\n"); + kgem_debug_print(data, offset, i++, "max index\n"); + kgem_debug_print(data, offset, i++, "mbz\n"); + } + return len; + + case 0x7809: + assert((len + 1) % 2 == 0); + kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_ELEMENTS\n"); + + memset(state.ve, 0, sizeof(state.ve)); /* XXX? */ + for (i = 1; i < len;) { + gen5_update_vertex_elements(kgem, (i - 1)/2, data + i); + + kgem_debug_print(data, offset, i, + "buffer %d: %svalid, type 0x%04x, " + "src offset 0x%04x bytes\n", + data[i] >> 27, + data[i] & (1 << 26) ? "" : "in", + (data[i] >> 16) & 0x1ff, + data[i] & 0x07ff); + i++; + kgem_debug_print(data, offset, i, "(%s, %s, %s, %s)\n", + get_965_element_component(data[i], 0), + get_965_element_component(data[i], 1), + get_965_element_component(data[i], 2), + get_965_element_component(data[i], 3)); + i++; + } + state.num_ve = (len - 1) / 2; /* XXX? */ + return len; + + case 0x780a: + assert(len == 3); + kgem_debug_print(data, offset, 0, "3DSTATE_INDEX_BUFFER\n"); + kgem_debug_print(data, offset, 1, "beginning buffer address\n"); + kgem_debug_print(data, offset, 2, "ending buffer address\n"); + return len; + + case 0x7900: + assert(len == 4); + kgem_debug_print(data, offset, 0, + "3DSTATE_DRAWING_RECTANGLE\n"); + kgem_debug_print(data, offset, 1, "top left: %d,%d\n", + data[1] & 0xffff, + (data[1] >> 16) & 0xffff); + kgem_debug_print(data, offset, 2, "bottom right: %d,%d\n", + data[2] & 0xffff, + (data[2] >> 16) & 0xffff); + kgem_debug_print(data, offset, 3, "origin: %d,%d\n", + (int)data[3] & 0xffff, + ((int)data[3] >> 16) & 0xffff); + return len; + + case 0x7905: + assert(len == 7); + kgem_debug_print(data, offset, 0, + "3DSTATE_DEPTH_BUFFER\n"); + kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n", + get_965_surfacetype(data[1] >> 29), + get_965_depthformat((data[1] >> 18) & 0x7), + (data[1] & 0x0001ffff) + 1, + data[1] & (1 << 27) ? "" : "not ", + (data[1] & (1 << 22)) != 0, + (data[1] & (1 << 21)) != 0); + kgem_debug_print(data, offset, 2, "depth offset\n"); + kgem_debug_print(data, offset, 3, "%dx%d\n", + ((data[3] & 0x0007ffc0) >> 6) + 1, + ((data[3] & 0xfff80000) >> 19) + 1); + kgem_debug_print(data, offset, 4, "volume depth\n"); + kgem_debug_print(data, offset, 5, "\n"); + kgem_debug_print(data, offset, 6, "\n"); + return len; + + case 0x7a00: + assert(len == 4 || len == 5); + switch ((data[1] >> 14) & 0x3) { + case 0: desc1 = "no write"; break; + case 1: desc1 = "qword write"; break; + case 2: desc1 = "PS_DEPTH_COUNT write"; break; + case 3: desc1 = "TIMESTAMP write"; break; + } + kgem_debug_print(data, offset, 0, "PIPE_CONTROL\n"); + kgem_debug_print(data, offset, 1, + "%s, %scs stall, %stlb invalidate, " + "%ssync gfdt, %sdepth stall, %sRC write flush, " + "%sinst flush, %sTC flush\n", + desc1, + data[1] & (1 << 20) ? "" : "no ", + data[1] & (1 << 18) ? "" : "no ", + data[1] & (1 << 17) ? "" : "no ", + data[1] & (1 << 13) ? "" : "no ", + data[1] & (1 << 12) ? "" : "no ", + data[1] & (1 << 11) ? "" : "no ", + data[1] & (1 << 10) ? "" : "no "); + if (len == 5) { + kgem_debug_print(data, offset, 2, "destination address\n"); + kgem_debug_print(data, offset, 3, "immediate dword low\n"); + kgem_debug_print(data, offset, 4, "immediate dword high\n"); + } else { + for (i = 2; i < len; i++) { + kgem_debug_print(data, offset, i, "\n"); + } + } + return len; + + case 0x7b00: + assert(len == 6); + kgem_debug_print(data, offset, 0, + "3DPRIMITIVE: %s %s\n", + get_965_prim_type(data[0]), + (data[0] & (1 << 15)) ? "random" : "sequential"); + kgem_debug_print(data, offset, 1, "vertex count\n"); + kgem_debug_print(data, offset, 2, "start vertex\n"); + kgem_debug_print(data, offset, 3, "instance count\n"); + kgem_debug_print(data, offset, 4, "start instance\n"); + kgem_debug_print(data, offset, 5, "index bias\n"); + primitive_out(kgem, data); + return len; + } + + /* For the rest, just dump the bytes */ + for (i = 0; i < ARRAY_SIZE(opcodes); i++) + if (op == opcodes[i].opcode) + break; + + assert(i < ARRAY_SIZE(opcodes)); + + len = 1; + kgem_debug_print(data, offset, 0, "%s\n", opcodes[i].name); + if (opcodes[i].max_len > 1) { + len = (data[0] & 0xff) + 2; + assert(len >= opcodes[i].min_len && + len <= opcodes[i].max_len); + } + + for (i = 1; i < len; i++) + kgem_debug_print(data, offset, i, "dword %d\n", i); + + return len; +} + +void kgem_gen5_finish_state(struct kgem *kgem) +{ + memset(&state, 0, sizeof(state)); +} diff --git a/drivers/video/Intel-2D/kgem_debug_gen6.c b/drivers/video/Intel-2D/kgem_debug_gen6.c index 7ab00f5db6..2524a77ad6 100644 --- a/drivers/video/Intel-2D/kgem_debug_gen6.c +++ b/drivers/video/Intel-2D/kgem_debug_gen6.c @@ -39,8 +39,6 @@ #include "kgem_debug.h" -#define ErrorF printf - static struct state { struct vertex_buffer { int handle; diff --git a/drivers/video/Intel-2D/kgem_debug_gen7.c b/drivers/video/Intel-2D/kgem_debug_gen7.c new file mode 100644 index 0000000000..b075b3032e --- /dev/null +++ b/drivers/video/Intel-2D/kgem_debug_gen7.c @@ -0,0 +1,715 @@ +/* + * Copyright © 2007-2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt + * Chris Wilson + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include "sna.h" +#include "sna_reg.h" +#include "gen7_render.h" + +#include "kgem_debug.h" + +static struct state { + struct vertex_buffer { + int handle; + void *base; + const char *ptr; + int pitch; + + struct kgem_bo *current; + } vb[33]; + struct vertex_elements { + int buffer; + int offset; + bool valid; + uint32_t type; + uint8_t swizzle[4]; + } ve[33]; + int num_ve; + + struct dynamic_state { + struct kgem_bo *current; + void *base, *ptr; + } dynamic_state; +} state; + +static void gen7_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) +{ + uint32_t reloc = sizeof(uint32_t) * (&data[1] - kgem->batch); + struct kgem_bo *bo = NULL; + void *base, *ptr; + int i; + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == reloc) + break; + assert(i < kgem->nreloc); + reloc = kgem->reloc[i].target_handle; + + if (reloc == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == reloc) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map__debug(kgem, bo); + } + ptr = (char *)base + kgem->reloc[i].delta; + + i = data[0] >> 26; + + state.vb[i].current = bo; + state.vb[i].base = base; + state.vb[i].ptr = ptr; + state.vb[i].pitch = data[0] & 0x7ff; +} + +static void gen7_update_dynamic_buffer(struct kgem *kgem, const uint32_t offset) +{ + uint32_t reloc = sizeof(uint32_t) * offset; + struct kgem_bo *bo = NULL; + void *base, *ptr; + int i; + + if ((kgem->batch[offset] & 1) == 0) + return; + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == reloc) + break; + if(i < kgem->nreloc) { + reloc = kgem->reloc[i].target_handle; + + if (reloc == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == reloc) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map__debug(kgem, bo); + } + ptr = (char *)base + (kgem->reloc[i].delta & ~1); + } else { + bo = NULL; + base = NULL; + ptr = NULL; + } + + state.dynamic_state.current = bo; + state.dynamic_state.base = base; + state.dynamic_state.ptr = ptr; +} + +static uint32_t +get_ve_component(uint32_t data, int component) +{ + return (data >> (16 + (3 - component) * 4)) & 0x7; +} + +static void gen7_update_vertex_elements(struct kgem *kgem, int id, const uint32_t *data) +{ + state.ve[id].buffer = data[0] >> 26; + state.ve[id].valid = !!(data[0] & (1 << 25)); + state.ve[id].type = (data[0] >> 16) & 0x1ff; + state.ve[id].offset = data[0] & 0x7ff; + state.ve[id].swizzle[0] = get_ve_component(data[1], 0); + state.ve[id].swizzle[1] = get_ve_component(data[1], 1); + state.ve[id].swizzle[2] = get_ve_component(data[1], 2); + state.ve[id].swizzle[3] = get_ve_component(data[1], 3); +} + +static void gen7_update_sf_state(struct kgem *kgem, uint32_t *data) +{ + state.num_ve = 1 + ((data[1] >> 22) & 0x3f); +} + +static void vertices_sint16_out(const struct vertex_elements *ve, const int16_t *v, int max) +{ + int c; + + ErrorF("("); + for (c = 0; c < max; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("%d", v[c]); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + for (; c < 4; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("1.0"); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + ErrorF(")"); +} + +static void vertices_float_out(const struct vertex_elements *ve, const float *f, int max) +{ + int c, o; + + ErrorF("("); + for (c = o = 0; c < 4 && o < max; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("%f", f[o++]); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + for (; c < 4; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("1.0"); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + ErrorF(")"); +} + +static void ve_out(const struct vertex_elements *ve, const void *ptr) +{ + switch (ve->type) { + case GEN7_SURFACEFORMAT_R32_FLOAT: + vertices_float_out(ve, ptr, 1); + break; + case GEN7_SURFACEFORMAT_R32G32_FLOAT: + vertices_float_out(ve, ptr, 2); + break; + case GEN7_SURFACEFORMAT_R32G32B32_FLOAT: + vertices_float_out(ve, ptr, 3); + break; + case GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT: + vertices_float_out(ve, ptr, 4); + break; + case GEN7_SURFACEFORMAT_R16_SINT: + vertices_sint16_out(ve, ptr, 1); + break; + case GEN7_SURFACEFORMAT_R16G16_SINT: + vertices_sint16_out(ve, ptr, 2); + break; + case GEN7_SURFACEFORMAT_R16G16B16A16_SINT: + vertices_sint16_out(ve, ptr, 4); + break; + case GEN7_SURFACEFORMAT_R16_SSCALED: + vertices_sint16_out(ve, ptr, 1); + break; + case GEN7_SURFACEFORMAT_R16G16_SSCALED: + vertices_sint16_out(ve, ptr, 2); + break; + case GEN7_SURFACEFORMAT_R16G16B16A16_SSCALED: + vertices_sint16_out(ve, ptr, 4); + break; + } +} + +static void indirect_vertex_out(struct kgem *kgem, uint32_t v) +{ + int i = 1; + + do { + const struct vertex_elements *ve = &state.ve[i]; + const struct vertex_buffer *vb = &state.vb[ve->buffer]; + const void *ptr = vb->ptr + v * vb->pitch + ve->offset; + + if (!ve->valid) + continue; + + ve_out(ve, ptr); + + while (++i <= state.num_ve && !state.ve[i].valid) + ; + + if (i <= state.num_ve) + ErrorF(", "); + } while (i <= state.num_ve); +} + +static void primitive_out(struct kgem *kgem, uint32_t *data) +{ + int n; + + assert((data[0] & (1<<15)) == 0); /* XXX index buffers */ + + for (n = 0; n < data[2]; n++) { + int v = data[3] + n; + ErrorF(" [%d:%d] = ", n, v); + indirect_vertex_out(kgem, v); + ErrorF("\n"); + } +} + +static void finish_state(struct kgem *kgem) +{ + memset(&state, 0, sizeof(state)); +} + +static void +state_base_out(uint32_t *data, uint32_t offset, unsigned int index, + const char *name) +{ + if (data[index] & 1) + kgem_debug_print(data, offset, index, + "%s state base address 0x%08x\n", + name, data[index] & ~1); + else + kgem_debug_print(data, offset, index, + "%s state base not updated\n", + name); +} + +static void +state_max_out(uint32_t *data, uint32_t offset, unsigned int index, + const char *name) +{ + if (data[index] == 1) + kgem_debug_print(data, offset, index, + "%s state upper bound disabled\n", name); + else if (data[index] & 1) + kgem_debug_print(data, offset, index, + "%s state upper bound 0x%08x\n", + name, data[index] & ~1); + else + kgem_debug_print(data, offset, index, + "%s state upper bound not updated\n", + name); +} + +static const char * +get_965_surfacetype(unsigned int surfacetype) +{ + switch (surfacetype) { + case 0: return "1D"; + case 1: return "2D"; + case 2: return "3D"; + case 3: return "CUBE"; + case 4: return "BUFFER"; + case 7: return "NULL"; + default: return "unknown"; + } +} + +static const char * +get_965_depthformat(unsigned int depthformat) +{ + switch (depthformat) { + case 0: return "s8_z24float"; + case 1: return "z32float"; + case 2: return "z24s8"; + case 5: return "z16"; + default: return "unknown"; + } +} + +static const char * +get_element_component(uint32_t data, int component) +{ + uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7; + + switch (component_control) { + case 0: + return "nostore"; + case 1: + switch (component) { + case 0: return "X"; + case 1: return "Y"; + case 2: return "Z"; + case 3: return "W"; + default: return "fail"; + } + case 2: + return "0.0"; + case 3: + return "1.0"; + case 4: + return "0x1"; + case 5: + return "VID"; + default: + return "fail"; + } +} + +static const char * +get_prim_type(uint32_t data) +{ + uint32_t primtype = data & 0x1f; + + switch (primtype) { + case 0x01: return "point list"; + case 0x02: return "line list"; + case 0x03: return "line strip"; + case 0x04: return "tri list"; + case 0x05: return "tri strip"; + case 0x06: return "tri fan"; + case 0x07: return "quad list"; + case 0x08: return "quad strip"; + case 0x09: return "line list adj"; + case 0x0a: return "line strip adj"; + case 0x0b: return "tri list adj"; + case 0x0c: return "tri strip adj"; + case 0x0d: return "tri strip reverse"; + case 0x0e: return "polygon"; + case 0x0f: return "rect list"; + case 0x10: return "line loop"; + case 0x11: return "point list bf"; + case 0x12: return "line strip cont"; + case 0x13: return "line strip bf"; + case 0x14: return "line strip cont bf"; + case 0x15: return "tri fan no stipple"; + default: return "fail"; + } +} + +struct reloc { + struct kgem_bo *bo; + void *base; +}; + +static void * +get_reloc(struct kgem *kgem, + void *base, const uint32_t *reloc, + struct reloc *r) +{ + uint32_t delta = *reloc; + + memset(r, 0, sizeof(*r)); + + if (base == 0) { + uint32_t handle = sizeof(uint32_t) * (reloc - kgem->batch); + struct kgem_bo *bo = NULL; + int i; + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == handle) + break; + assert(i < kgem->nreloc); + handle = kgem->reloc[i].target_handle; + delta = kgem->reloc[i].delta; + + if (handle == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == handle) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map__debug(kgem, bo); + r->bo = bo; + r->base = base; + } + } + + return (char *)base + (delta & ~3); +} + +static const char * +gen7_filter_to_string(uint32_t filter) +{ + switch (filter) { + default: + case GEN7_MAPFILTER_NEAREST: return "nearest"; + case GEN7_MAPFILTER_LINEAR: return "linear"; + } +} + +static const char * +gen7_repeat_to_string(uint32_t repeat) +{ + switch (repeat) { + default: + case GEN7_TEXCOORDMODE_CLAMP_BORDER: return "border"; + case GEN7_TEXCOORDMODE_WRAP: return "wrap"; + case GEN7_TEXCOORDMODE_CLAMP: return "clamp"; + case GEN7_TEXCOORDMODE_MIRROR: return "mirror"; + } +} + +static void +gen7_decode_sampler_state(struct kgem *kgem, const uint32_t *reloc) +{ + const struct gen7_sampler_state *ss; + struct reloc r; + const char *min, *mag; + const char *s_wrap, *t_wrap, *r_wrap; + + ss = get_reloc(kgem, state.dynamic_state.ptr, reloc, &r); + + min = gen7_filter_to_string(ss->ss0.min_filter); + mag = gen7_filter_to_string(ss->ss0.mag_filter); + + s_wrap = gen7_repeat_to_string(ss->ss3.s_wrap_mode); + t_wrap = gen7_repeat_to_string(ss->ss3.t_wrap_mode); + r_wrap = gen7_repeat_to_string(ss->ss3.r_wrap_mode); + + ErrorF(" Sampler 0:\n"); + ErrorF(" filter: min=%s, mag=%s\n", min, mag); + ErrorF(" wrap: s=%s, t=%s, r=%s\n", s_wrap, t_wrap, r_wrap); + + ss++; + min = gen7_filter_to_string(ss->ss0.min_filter); + mag = gen7_filter_to_string(ss->ss0.mag_filter); + + s_wrap = gen7_repeat_to_string(ss->ss3.s_wrap_mode); + t_wrap = gen7_repeat_to_string(ss->ss3.t_wrap_mode); + r_wrap = gen7_repeat_to_string(ss->ss3.r_wrap_mode); + + ErrorF(" Sampler 1:\n"); + ErrorF(" filter: min=%s, mag=%s\n", min, mag); + ErrorF(" wrap: s=%s, t=%s, r=%s\n", s_wrap, t_wrap, r_wrap); +} + +static const char * +gen7_blend_factor_to_string(uint32_t v) +{ + switch (v) { +#define C(x) case GEN7_BLENDFACTOR_##x: return #x; + C(ONE); + C(SRC_COLOR); + C(SRC_ALPHA); + C(DST_ALPHA); + C(DST_COLOR); + C(SRC_ALPHA_SATURATE); + C(CONST_COLOR); + C(CONST_ALPHA); + C(SRC1_COLOR); + C(SRC1_ALPHA); + C(ZERO); + C(INV_SRC_COLOR); + C(INV_SRC_ALPHA); + C(INV_DST_ALPHA); + C(INV_DST_COLOR); + C(INV_CONST_COLOR); + C(INV_CONST_ALPHA); + C(INV_SRC1_COLOR); + C(INV_SRC1_ALPHA); +#undef C + default: return "???"; + } +} + +static const char * +gen7_blend_function_to_string(uint32_t v) +{ + switch (v) { +#define C(x) case GEN7_BLENDFUNCTION_##x: return #x; + C(ADD); + C(SUBTRACT); + C(REVERSE_SUBTRACT); + C(MIN); + C(MAX); +#undef C + default: return "???"; + } +} + +static void +gen7_decode_blend(struct kgem *kgem, const uint32_t *reloc) +{ + const struct gen7_blend_state *blend; + struct reloc r; + const char *dst, *src; + const char *func; + + blend = get_reloc(kgem, state.dynamic_state.ptr, reloc, &r); + + dst = gen7_blend_factor_to_string(blend->blend0.dest_blend_factor); + src = gen7_blend_factor_to_string(blend->blend0.source_blend_factor); + func = gen7_blend_function_to_string(blend->blend0.blend_func); + + ErrorF(" Blend (%s): function %s, src=%s, dst=%s\n", + blend->blend0.blend_enable ? "enabled" : "disabled", + func, src, dst); +} + +int kgem_gen7_decode_3d(struct kgem *kgem, uint32_t offset) +{ + static const struct { + uint32_t opcode; + int min_len; + int max_len; + const char *name; + } opcodes[] = { + { 0x6101, 6, 6, "STATE_BASE_ADDRESS" }, + { 0x6102, 2, 2 , "STATE_SIP" }, + { 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" }, + { 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" }, + }; + uint32_t *data = kgem->batch + offset; + uint32_t op; + unsigned int len; + int i; + const char *name; + + len = (data[0] & 0xff) + 2; + op = (data[0] & 0xffff0000) >> 16; + switch (op) { + case 0x6101: + i = 0; + kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n"); + assert(len == 10); + + state_base_out(data, offset, i++, "general"); + state_base_out(data, offset, i++, "surface"); + state_base_out(data, offset, i++, "dynamic"); + state_base_out(data, offset, i++, "indirect"); + state_base_out(data, offset, i++, "instruction"); + + state_max_out(data, offset, i++, "general"); + state_max_out(data, offset, i++, "dynamic"); + state_max_out(data, offset, i++, "indirect"); + state_max_out(data, offset, i++, "instruction"); + + gen7_update_dynamic_buffer(kgem, offset + 3); + + return len; + + case 0x7808: + assert((len - 1) % 4 == 0); + kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_BUFFERS\n"); + + for (i = 1; i < len;) { + gen7_update_vertex_buffer(kgem, data + i); + + kgem_debug_print(data, offset, i, "buffer %d: %s, pitch %db\n", + data[i] >> 26, + data[i] & (1 << 20) ? "random" : "sequential", + data[i] & 0x07ff); + i++; + kgem_debug_print(data, offset, i++, "buffer address\n"); + kgem_debug_print(data, offset, i++, "max index\n"); + kgem_debug_print(data, offset, i++, "mbz\n"); + } + return len; + + case 0x7809: + assert((len + 1) % 2 == 0); + kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_ELEMENTS\n"); + + for (i = 1; i < len;) { + gen7_update_vertex_elements(kgem, (i - 1)/2, data + i); + + kgem_debug_print(data, offset, i, "buffer %d: %svalid, type 0x%04x, " + "src offset 0x%04x bytes\n", + data[i] >> 26, + data[i] & (1 << 25) ? "" : "in", + (data[i] >> 16) & 0x1ff, + data[i] & 0x07ff); + i++; + kgem_debug_print(data, offset, i, "(%s, %s, %s, %s), " + "dst offset 0x%02x bytes\n", + get_element_component(data[i], 0), + get_element_component(data[i], 1), + get_element_component(data[i], 2), + get_element_component(data[i], 3), + (data[i] & 0xff) * 4); + i++; + } + return len; + + case 0x780a: + assert(len == 3); + kgem_debug_print(data, offset, 0, "3DSTATE_INDEX_BUFFER\n"); + kgem_debug_print(data, offset, 1, "beginning buffer address\n"); + kgem_debug_print(data, offset, 2, "ending buffer address\n"); + return len; + + case 0x7b00: + assert(len == 7); + kgem_debug_print(data, offset, 0, "3DPRIMITIVE\n"); + kgem_debug_print(data, offset, 1, "type %s, %s\n", + get_prim_type(data[1]), + (data[1] & (1 << 15)) ? "random" : "sequential"); + kgem_debug_print(data, offset, 2, "vertex count\n"); + kgem_debug_print(data, offset, 3, "start vertex\n"); + kgem_debug_print(data, offset, 4, "instance count\n"); + kgem_debug_print(data, offset, 5, "start instance\n"); + kgem_debug_print(data, offset, 6, "index bias\n"); + primitive_out(kgem, data); + return len; + } + + /* For the rest, just dump the bytes */ + name = NULL; + for (i = 0; i < ARRAY_SIZE(opcodes); i++) + if (op == opcodes[i].opcode) { + name = opcodes[i].name; + break; + } + + len = (data[0] & 0xff) + 2; + if (name == NULL) { + kgem_debug_print(data, offset, 0, "unknown\n"); + } else { + kgem_debug_print(data, offset, 0, "%s\n", opcodes[i].name); + if (opcodes[i].max_len > 1) { + assert(len >= opcodes[i].min_len && + len <= opcodes[i].max_len); + } + } + for (i = 1; i < len; i++) + kgem_debug_print(data, offset, i, "dword %d\n", i); + + return len; +} + +void kgem_gen7_finish_state(struct kgem *kgem) +{ + finish_state(kgem); +} diff --git a/drivers/video/Intel-2D/sna.c b/drivers/video/Intel-2D/sna.c index 273ed96aca..1628d9d438 100644 --- a/drivers/video/Intel-2D/sna.c +++ b/drivers/video/Intel-2D/sna.c @@ -1,11 +1,51 @@ +/************************************************************************** +Copyright 2001 VA Linux Systems Inc., Fremont, California. +Copyright © 2002 by David Dawes + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +THE COPYRIGHT HOLDERS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: Jeff Hartmann + * Abraham van der Merwe + * David Dawes + * Alan Hourihane + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif #include #include #include #include +#include "i915_pciids.h" +#include "compiler.h" #include "sna.h" +#include "intel_driver.h" #define to_surface(x) (surface_t*)((x)->handle) @@ -46,6 +86,8 @@ int kgem_update_fb(struct kgem *kgem, struct sna_fb *fb); uint32_t kgem_surface_size(struct kgem *kgem,bool relaxed_fencing, unsigned flags, uint32_t width, uint32_t height, uint32_t bpp, uint32_t tiling, uint32_t *pitch); +struct kgem_bo *kgem_bo_from_handle(struct kgem *kgem, int handle, + int pitch, int height); void kgem_close_batches(struct kgem *kgem); void sna_bo_destroy(struct kgem *kgem, struct kgem_bo *bo); @@ -53,8 +95,6 @@ void sna_bo_destroy(struct kgem *kgem, struct kgem_bo *bo); const struct intel_device_info * intel_detect_chipset(struct pci_device *pci); -//struct kgem_bo *create_bo(bitmap_t *bitmap); - static bool sna_solid_cache_init(struct sna *sna); struct sna *sna_device; @@ -66,7 +106,45 @@ static void no_render_reset(struct sna *sna) (void)sna; } -void no_render_init(struct sna *sna) +static void no_render_flush(struct sna *sna) +{ + (void)sna; +} + +static void +no_render_context_switch(struct kgem *kgem, + int new_mode) +{ + if (!kgem->nbatch) + return; + + if (kgem_ring_is_idle(kgem, kgem->ring)) { + DBG(("%s: GPU idle, flushing\n", __FUNCTION__)); + _kgem_submit(kgem); + } + + (void)new_mode; +} + +static void +no_render_retire(struct kgem *kgem) +{ + (void)kgem; +} + +static void +no_render_expire(struct kgem *kgem) +{ + (void)kgem; +} + +static void +no_render_fini(struct sna *sna) +{ + (void)sna; +} + +const char *no_render_init(struct sna *sna) { struct sna_render *render = &sna->render; @@ -77,28 +155,20 @@ void no_render_init(struct sna *sna) render->vertices = render->vertex_data; render->vertex_size = ARRAY_SIZE(render->vertex_data); -// render->composite = no_render_composite; - -// render->copy_boxes = no_render_copy_boxes; -// render->copy = no_render_copy; - -// render->fill_boxes = no_render_fill_boxes; -// render->fill = no_render_fill; -// render->fill_one = no_render_fill_one; -// render->clear = no_render_clear; - render->reset = no_render_reset; -// render->flush = no_render_flush; -// render->fini = no_render_fini; + render->flush = no_render_flush; + render->fini = no_render_fini; -// sna->kgem.context_switch = no_render_context_switch; -// sna->kgem.retire = no_render_retire; + sna->kgem.context_switch = no_render_context_switch; + sna->kgem.retire = no_render_retire; + sna->kgem.expire = no_render_expire; - if (sna->kgem.gen >= 60) - sna->kgem.ring = KGEM_RENDER; + sna->kgem.mode = KGEM_RENDER; + sna->kgem.ring = KGEM_RENDER; - sna_vertex_init(sna); -} + sna_vertex_init(sna); + return "generic"; + } void sna_vertex_init(struct sna *sna) { @@ -111,47 +181,27 @@ int sna_accel_init(struct sna *sna) { const char *backend; -// list_init(&sna->deferred_free); -// list_init(&sna->dirty_pixmaps); -// list_init(&sna->active_pixmaps); -// list_init(&sna->inactive_clock[0]); -// list_init(&sna->inactive_clock[1]); - -// sna_accel_install_timers(sna); - - - backend = "no"; - no_render_init(sna); - - if (sna->info->gen >= 0100) { - } else if (sna->info->gen >= 070) { - if (gen7_render_init(sna)) - backend = "IvyBridge"; - } else if (sna->info->gen >= 060) { - if (gen6_render_init(sna)) - backend = "SandyBridge"; - } else if (sna->info->gen >= 050) { - if (gen5_render_init(sna)) - backend = "Ironlake"; - } else if (sna->info->gen >= 040) { - if (gen4_render_init(sna)) - backend = "Broadwater/Crestline"; - } else if (sna->info->gen >= 030) { - if (gen3_render_init(sna)) - backend = "gen3"; - } + backend = no_render_init(sna); + if (sna->info->gen >= 0100) + (void)backend; + else if (sna->info->gen >= 070) + backend = gen7_render_init(sna, backend); + else if (sna->info->gen >= 060) + backend = gen6_render_init(sna, backend); + else if (sna->info->gen >= 050) + backend = gen5_render_init(sna, backend); + else if (sna->info->gen >= 040) + backend = gen4_render_init(sna, backend); + else if (sna->info->gen >= 030) + backend = gen3_render_init(sna, backend); DBG(("%s(backend=%s, prefer_gpu=%x)\n", __FUNCTION__, backend, sna->render.prefer_gpu)); - kgem_reset(&sna->kgem); - -// if (!sna_solid_cache_init(sna)) -// return false; + kgem_reset(&sna->kgem); sna_device = sna; - return kgem_init_fb(&sna->kgem, &sna_fb); } @@ -169,7 +219,7 @@ int sna_init(uint32_t service) if(sna_device) goto done; - + io.handle = service; io.io_code = SRV_GET_PCI_INFO; io.input = &device; @@ -179,40 +229,24 @@ int sna_init(uint32_t service) if (call_service(&io)!=0) goto err1; - + sna = malloc(sizeof(*sna)); if (sna == NULL) goto err1; memset(sna, 0, sizeof(*sna)); - - sna->PciInfo = &device; + sna->cpu_features = sna_cpu_detect(); + + sna->PciInfo = &device; sna->info = intel_detect_chipset(sna->PciInfo); + sna->scrn = service; kgem_init(&sna->kgem, service, sna->PciInfo, sna->info->gen); - -/* - if (!xf86ReturnOptValBool(sna->Options, - OPTION_RELAXED_FENCING, - sna->kgem.has_relaxed_fencing)) { - xf86DrvMsg(scrn->scrnIndex, - sna->kgem.has_relaxed_fencing ? X_CONFIG : X_PROBED, - "Disabling use of relaxed fencing\n"); - sna->kgem.has_relaxed_fencing = 0; - } - if (!xf86ReturnOptValBool(sna->Options, - OPTION_VMAP, - sna->kgem.has_vmap)) { - xf86DrvMsg(scrn->scrnIndex, - sna->kgem.has_vmap ? X_CONFIG : X_PROBED, - "Disabling use of vmap\n"); - sna->kgem.has_vmap = 0; - } -*/ + /* Disable tiling by default */ - sna->tiling = SNA_TILING_DISABLE; + sna->tiling = 0; /* Default fail-safe value of 75 Hz */ // sna->vblank_interval = 1000 * 1000 * 1000 / 75; @@ -222,16 +256,16 @@ int sna_init(uint32_t service) sna_accel_init(sna); tls_mask = tls_alloc(); - + // printf("tls mask %x\n", tls_mask); - + done: caps = sna_device->render.caps; err1: __lock_release_recursive(__sna_lock); - - return caps; + + return caps; } void sna_fini() @@ -239,17 +273,17 @@ void sna_fini() if( sna_device ) { struct kgem_bo *mask; - + __lock_acquire_recursive(__sna_lock); - + mask = tls_get(tls_mask); - + sna_device->render.fini(sna_device); if(mask) kgem_bo_destroy(&sna_device->kgem, mask); - kgem_close_batches(&sna_device->kgem); + kgem_close_batches(&sna_device->kgem); kgem_cleanup_cache(&sna_device->kgem); - + sna_device = NULL; __lock_release_recursive(__sna_lock); }; @@ -402,7 +436,7 @@ int sna_blit_copy(bitmap_t *src_bitmap, int dst_x, int dst_y, winx = *(uint32_t*)(proc_info+34); winy = *(uint32_t*)(proc_info+38); - + memset(&src, 0, sizeof(src)); memset(&dst, 0, sizeof(dst)); @@ -413,36 +447,36 @@ int sna_blit_copy(bitmap_t *src_bitmap, int dst_x, int dst_y, dst.drawable.bitsPerPixel = 32; dst.drawable.width = sna_fb.width; dst.drawable.height = sna_fb.height; - + memset(©, 0, sizeof(copy)); src_bo = (struct kgem_bo*)src_bitmap->handle; - + if( sna_device->render.copy(sna_device, GXcopy, &src, src_bo, &dst, sna_fb.fb_bo, ©) ) - { + { copy.blt(sna_device, ©, src_x, src_y, w, h, winx+dst_x, winy+dst_y); copy.done(sna_device, ©); } kgem_submit(&sna_device->kgem); - + return 0; - + // __asm__ __volatile__("int3"); - + }; -typedef struct +typedef struct { uint32_t width; uint32_t height; void *data; uint32_t pitch; - struct kgem_bo *bo; - uint32_t bo_size; - uint32_t flags; + struct kgem_bo *bo; + uint32_t bo_size; + uint32_t flags; }surface_t; @@ -451,23 +485,23 @@ int sna_create_bitmap(bitmap_t *bitmap) { surface_t *sf; struct kgem_bo *bo; - + sf = malloc(sizeof(*sf)); if(sf == NULL) goto err_1; - + __lock_acquire_recursive(__sna_lock); bo = kgem_create_2d(&sna_device->kgem, bitmap->width, bitmap->height, 32,I915_TILING_NONE, CREATE_CPU_MAP); - + if(bo == NULL) goto err_2; - + void *map = kgem_bo_map(&sna_device->kgem, bo); if(map == NULL) goto err_3; - + sf->width = bitmap->width; sf->height = bitmap->height; sf->data = map; @@ -475,33 +509,74 @@ int sna_create_bitmap(bitmap_t *bitmap) sf->bo = bo; sf->bo_size = PAGE_SIZE * bo->size.pages.count; sf->flags = bitmap->flags; - + bitmap->handle = (uint32_t)sf; __lock_release_recursive(__sna_lock); - + return 0; - + err_3: kgem_bo_destroy(&sna_device->kgem, bo); err_2: __lock_release_recursive(__sna_lock); - free(sf); + free(sf); err_1: - return -1; + return -1; }; +int sna_bitmap_from_handle(bitmap_t *bitmap, uint32_t handle) +{ + surface_t *sf; + struct kgem_bo *bo; + + sf = malloc(sizeof(*sf)); + if(sf == NULL) + goto err_1; + + __lock_acquire_recursive(__sna_lock); + + bo = kgem_bo_from_handle(&sna_device->kgem, handle, bitmap->pitch, bitmap->height); + + __lock_release_recursive(__sna_lock); + + sf->width = bitmap->width; + sf->height = bitmap->height; + sf->data = NULL; + sf->pitch = bo->pitch; + sf->bo = bo; + sf->bo_size = PAGE_SIZE * bo->size.pages.count; + sf->flags = bitmap->flags; + + bitmap->handle = (uint32_t)sf; + + return 0; + +err_2: + __lock_release_recursive(__sna_lock); + free(sf); +err_1: + return -1; +}; + +void sna_set_bo_handle(bitmap_t *bitmap, int handle) +{ + surface_t *sf = to_surface(bitmap); + struct kgem_bo *bo = sf->bo; + bo->handle = handle; +} + int sna_destroy_bitmap(bitmap_t *bitmap) { surface_t *sf = to_surface(bitmap); - + __lock_acquire_recursive(__sna_lock); - + kgem_bo_destroy(&sna_device->kgem, sf->bo); - + __lock_release_recursive(__sna_lock); free(sf); - + bitmap->handle = -1; bitmap->data = (void*)-1; bitmap->pitch = -1; @@ -511,17 +586,17 @@ int sna_destroy_bitmap(bitmap_t *bitmap) int sna_lock_bitmap(bitmap_t *bitmap) { - surface_t *sf = to_surface(bitmap); - + surface_t *sf = to_surface(bitmap); + // printf("%s\n", __FUNCTION__); __lock_acquire_recursive(__sna_lock); - + kgem_bo_sync__cpu(&sna_device->kgem, sf->bo); __lock_release_recursive(__sna_lock); - + bitmap->data = sf->data; - bitmap->pitch = sf->pitch; + bitmap->pitch = sf->pitch; return 0; }; @@ -530,8 +605,8 @@ int sna_resize_bitmap(bitmap_t *bitmap) { surface_t *sf = to_surface(bitmap); struct kgem *kgem = &sna_device->kgem; - struct kgem_bo *bo = sf->bo; - + struct kgem_bo *bo = sf->bo; + uint32_t size; uint32_t pitch; @@ -541,24 +616,24 @@ int sna_resize_bitmap(bitmap_t *bitmap) size = kgem_surface_size(kgem,kgem->has_relaxed_fencing, CREATE_CPU_MAP, bitmap->width, bitmap->height, 32, I915_TILING_NONE, &pitch); assert(size && size <= kgem->max_object_size); - + if(sf->bo_size >= size) { sf->width = bitmap->width; sf->height = bitmap->height; sf->pitch = pitch; - bo->pitch = pitch; - + bo->pitch = pitch; + return 0; } else { __lock_acquire_recursive(__sna_lock); - + sna_bo_destroy(kgem, bo); - + sf->bo = NULL; - + bo = kgem_create_2d(kgem, bitmap->width, bitmap->height, 32, I915_TILING_NONE, CREATE_CPU_MAP); @@ -567,7 +642,7 @@ int sna_resize_bitmap(bitmap_t *bitmap) __lock_release_recursive(__sna_lock); return -1; }; - + void *map = kgem_bo_map(kgem, bo); if(map == NULL) { @@ -575,9 +650,9 @@ int sna_resize_bitmap(bitmap_t *bitmap) __lock_release_recursive(__sna_lock); return -1; }; - + __lock_release_recursive(__sna_lock); - + sf->width = bitmap->width; sf->height = bitmap->height; sf->data = map; @@ -586,7 +661,7 @@ int sna_resize_bitmap(bitmap_t *bitmap) sf->bo_size = PAGE_SIZE * bo->size.pages.count; } - return 0; + return 0; }; @@ -596,32 +671,32 @@ int sna_create_mask() struct kgem_bo *bo; // printf("%s width %d height %d\n", __FUNCTION__, sna_fb.width, sna_fb.height); - + __lock_acquire_recursive(__sna_lock); - + bo = kgem_create_2d(&sna_device->kgem, sna_fb.width, sna_fb.height, 8,I915_TILING_NONE, CREATE_CPU_MAP); - + if(unlikely(bo == NULL)) goto err_1; - + int *map = kgem_bo_map(&sna_device->kgem, bo); if(map == NULL) goto err_2; - + __lock_release_recursive(__sna_lock); - + memset(map, 0, bo->pitch * sna_fb.height); - + tls_set(tls_mask, bo); - + return 0; - + err_2: kgem_bo_destroy(&sna_device->kgem, bo); err_1: __lock_release_recursive(__sna_lock); - return -1; + return -1; }; @@ -630,7 +705,7 @@ gen6_composite(struct sna *sna, uint8_t op, PixmapPtr src, struct kgem_bo *src_bo, PixmapPtr mask,struct kgem_bo *mask_bo, - PixmapPtr dst, struct kgem_bo *dst_bo, + PixmapPtr dst, struct kgem_bo *dst_bo, int32_t src_x, int32_t src_y, int32_t msk_x, int32_t msk_y, int32_t dst_x, int32_t dst_y, @@ -644,10 +719,10 @@ int sna_blit_tex(bitmap_t *bitmap, bool scale, int dst_x, int dst_y, int w, int h, int src_x, int src_y) { - surface_t *sf = to_surface(bitmap); + surface_t *sf = to_surface(bitmap); struct drm_i915_mask_update update; - + struct sna_composite_op composite; struct _Pixmap src, dst, mask; struct kgem_bo *src_bo, *mask_bo; @@ -661,48 +736,48 @@ int sna_blit_tex(bitmap_t *bitmap, bool scale, int dst_x, int dst_y, winy = *(uint32_t*)(proc_info+38); // winw = *(uint32_t*)(proc_info+42)+1; // winh = *(uint32_t*)(proc_info+46)+1; - + mask_bo = tls_get(tls_mask); - + if(unlikely(mask_bo == NULL)) { sna_create_mask(); mask_bo = tls_get(tls_mask); if( mask_bo == NULL) - return -1; + return -1; }; - + if(kgem_update_fb(&sna_device->kgem, &sna_fb)) { __lock_acquire_recursive(__sna_lock); kgem_bo_destroy(&sna_device->kgem, mask_bo); __lock_release_recursive(__sna_lock); - + sna_create_mask(); mask_bo = tls_get(tls_mask); if( mask_bo == NULL) - return -1; + return -1; } - + VG_CLEAR(update); update.handle = mask_bo->handle; - update.bo_map = (__u32)MAP(mask_bo->map); + update.bo_map = (int)kgem_bo_map__cpu(&sna_device->kgem, mask_bo); drmIoctl(sna_device->kgem.fd, SRV_MASK_UPDATE, &update); mask_bo->pitch = update.bo_pitch; - + memset(&src, 0, sizeof(src)); memset(&dst, 0, sizeof(dst)); memset(&mask, 0, sizeof(dst)); src.drawable.bitsPerPixel = 32; - + src.drawable.width = sf->width; src.drawable.height = sf->height; dst.drawable.bitsPerPixel = 32; dst.drawable.width = sna_fb.width; dst.drawable.height = sna_fb.height; - + mask.drawable.bitsPerPixel = 8; mask.drawable.width = update.width; mask.drawable.height = update.height; @@ -710,14 +785,14 @@ int sna_blit_tex(bitmap_t *bitmap, bool scale, int dst_x, int dst_y, memset(&composite, 0, sizeof(composite)); src_bo = sf->bo; - + __lock_acquire_recursive(__sna_lock); - + if( sna_device->render.blit_tex(sna_device, PictOpSrc,scale, &src, src_bo, &mask, mask_bo, - &dst, sna_fb.fb_bo, + &dst, sna_fb.fb_bo, src_x, src_y, dst_x, dst_y, winx+dst_x, winy+dst_y, @@ -725,7 +800,7 @@ int sna_blit_tex(bitmap_t *bitmap, bool scale, int dst_x, int dst_y, &composite) ) { struct sna_composite_rectangles r; - + r.src.x = src_x; r.src.y = src_y; r.mask.x = dst_x; @@ -734,20 +809,20 @@ int sna_blit_tex(bitmap_t *bitmap, bool scale, int dst_x, int dst_y, r.dst.y = winy+dst_y; r.width = w; r.height = h; - + composite.blt(sna_device, &composite, &r); composite.done(sna_device, &composite); - + }; - + kgem_submit(&sna_device->kgem); - + __lock_release_recursive(__sna_lock); bitmap->data = (void*)-1; bitmap->pitch = -1; - - return 0; + + return 0; } @@ -756,8 +831,6 @@ int sna_blit_tex(bitmap_t *bitmap, bool scale, int dst_x, int dst_y, - - static const struct intel_device_info intel_generic_info = { .gen = -1, }; @@ -807,99 +880,36 @@ static const struct intel_device_info intel_haswell_info = { static const struct pci_id_match intel_device_match[] = { + INTEL_I915G_IDS(&intel_i915_info), + INTEL_I915GM_IDS(&intel_i915_info), + INTEL_I945G_IDS(&intel_i945_info), + INTEL_I945GM_IDS(&intel_i945_info), - INTEL_DEVICE_MATCH (PCI_CHIP_I915_G, &intel_i915_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_E7221_G, &intel_i915_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_I915_GM, &intel_i915_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_I945_G, &intel_i945_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_I945_GM, &intel_i945_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_I945_GME, &intel_i945_info ), + INTEL_G33_IDS(&intel_g33_info), + INTEL_PINEVIEW_IDS(&intel_g33_info), - INTEL_DEVICE_MATCH (PCI_CHIP_PINEVIEW_M, &intel_g33_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_PINEVIEW_G, &intel_g33_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_G33_G, &intel_g33_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_Q33_G, &intel_g33_info ), - /* Another marketing win: Q35 is another g33 device not a gen4 part - * like its G35 brethren. - */ - INTEL_DEVICE_MATCH (PCI_CHIP_Q35_G, &intel_g33_info ), + INTEL_I965G_IDS(&intel_i965_info), + INTEL_I965GM_IDS(&intel_i965_info), - INTEL_DEVICE_MATCH (PCI_CHIP_I965_G, &intel_i965_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_G35_G, &intel_i965_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_I965_Q, &intel_i965_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_I946_GZ, &intel_i965_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_I965_GM, &intel_i965_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_I965_GME, &intel_i965_info ), + INTEL_G45_IDS(&intel_g4x_info), + INTEL_GM45_IDS(&intel_g4x_info), - INTEL_DEVICE_MATCH (PCI_CHIP_GM45_GM, &intel_g4x_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_G45_E_G, &intel_g4x_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_G45_G, &intel_g4x_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_Q45_G, &intel_g4x_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_G41_G, &intel_g4x_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_B43_G, &intel_g4x_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_B43_G1, &intel_g4x_info ), + INTEL_IRONLAKE_D_IDS(&intel_ironlake_info), + INTEL_IRONLAKE_M_IDS(&intel_ironlake_info), - INTEL_DEVICE_MATCH (PCI_CHIP_IRONLAKE_D_G, &intel_ironlake_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_IRONLAKE_M_G, &intel_ironlake_info ), + INTEL_SNB_D_IDS(&intel_sandybridge_info), + INTEL_SNB_M_IDS(&intel_sandybridge_info), - INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_GT1, &intel_sandybridge_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_GT2, &intel_sandybridge_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_GT2_PLUS, &intel_sandybridge_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_M_GT1, &intel_sandybridge_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_M_GT2, &intel_sandybridge_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS, &intel_sandybridge_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_S_GT, &intel_sandybridge_info ), + INTEL_IVB_D_IDS(&intel_ivybridge_info), + INTEL_IVB_M_IDS(&intel_ivybridge_info), - INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_M_GT1, &intel_ivybridge_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_M_GT2, &intel_ivybridge_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_D_GT1, &intel_ivybridge_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_D_GT2, &intel_ivybridge_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_S_GT1, &intel_ivybridge_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_S_GT2, &intel_ivybridge_info ), + INTEL_HSW_D_IDS(&intel_haswell_info), + INTEL_HSW_M_IDS(&intel_haswell_info), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_D_GT1, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_D_GT2, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_D_GT2_PLUS, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_M_GT1, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_M_GT2, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_M_GT2_PLUS, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_S_GT1, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_S_GT2, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_S_GT2_PLUS, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_D_GT1, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_D_GT2, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_D_GT2_PLUS, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_M_GT1, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_M_GT2, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_M_GT2_PLUS, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_S_GT1, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_S_GT2, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_S_GT2_PLUS, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_D_GT1, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_D_GT2, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_D_GT2_PLUS, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_M_GT1, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_M_GT2, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_M_GT2_PLUS, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_S_GT1, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_S_GT2, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_S_GT2_PLUS, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_D_GT1, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_D_GT2, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_D_GT2_PLUS, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_M_GT1, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_M_GT2, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_M_GT2_PLUS, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_S_GT1, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_S_GT2, &intel_haswell_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_S_GT2_PLUS, &intel_haswell_info ), + INTEL_VLV_D_IDS(&intel_valleyview_info), + INTEL_VLV_M_IDS(&intel_valleyview_info), - INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_PO, &intel_valleyview_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_1, &intel_valleyview_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_2, &intel_valleyview_info ), - INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_3, &intel_valleyview_info ), - - INTEL_DEVICE_MATCH (PCI_MATCH_ANY, &intel_generic_info ), + INTEL_VGA_DEVICE(PCI_MATCH_ANY, &intel_generic_info), { 0, 0, 0 }, }; @@ -920,14 +930,14 @@ intel_detect_chipset(struct pci_device *pci) { const struct pci_id_match *ent = NULL; - ent = PciDevMatch(pci->device_id, intel_device_match); - + ent = PciDevMatch(pci->device_id, intel_device_match); + if(ent != NULL) return (const struct intel_device_info*)ent->match_data; - else + else return &intel_generic_info; - -#if 0 + +#if 0 for (i = 0; intel_chipsets[i].name != NULL; i++) { if (DEVICE_ID(pci) == intel_chipsets[i].token) { name = intel_chipsets[i].name; @@ -945,9 +955,23 @@ intel_detect_chipset(struct pci_device *pci) scrn->chipset = name; #endif - + } +int intel_get_device_id(int fd) +{ + struct drm_i915_getparam gp; + int devid = 0; + + memset(&gp, 0, sizeof(gp)); + gp.param = I915_PARAM_CHIPSET_ID; + gp.value = &devid; + + if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp)) + return 0; + + return devid; +} int drmIoctl(int fd, unsigned long request, void *arg) { diff --git a/drivers/video/Intel-2D/sna.h b/drivers/video/Intel-2D/sna.h index 2940b5b286..612b77c9ab 100644 --- a/drivers/video/Intel-2D/sna.h +++ b/drivers/video/Intel-2D/sna.h @@ -37,25 +37,82 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #ifndef _SNA_H_ #define _SNA_H_ +#include + #ifdef HAVE_CONFIG_H #include "config.h" #endif -#include +#include "compiler.h" + + #include #include #include +#include #include "intel_driver.h" #include "pciaccess.h" -#include "compiler.h" +#include +#include -//#define DBG(x) -//#define DBG(x) ErrorF x +#ifdef HAVE_DRI2_H +#include +#endif -#define assert(x) +#if HAVE_UDEV +#include +#endif +#if 0 +#include + +#include +#if XF86_CRTC_VERSION >= 5 +#define HAS_PIXMAP_SHARING 1 +#endif + +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "../compat-api.h" + +#endif + +#include + +#define ErrorF printf + +#if HAS_DEBUG_FULL +#define DBG(x) ErrorF x +#else +#define DBG(x) +#endif + +#define DEBUG_NO_BLT 0 + +#define DEBUG_FLUSH_BATCH 0 + +#define TEST_ALL 0 +#define TEST_ACCEL (TEST_ALL || 0) +#define TEST_BATCH (TEST_ALL || 0) +#define TEST_BLT (TEST_ALL || 0) +#define TEST_COMPOSITE (TEST_ALL || 0) +#define TEST_DAMAGE (TEST_ALL || 0) +#define TEST_GRADIENT (TEST_ALL || 0) +#define TEST_GLYPHS (TEST_ALL || 0) +#define TEST_IO (TEST_ALL || 0) +#define TEST_KGEM (TEST_ALL || 0) +#define TEST_RENDER (TEST_ALL || 0) int drmIoctl(int fd, unsigned long request, void *arg); @@ -94,12 +151,69 @@ typedef enum { PIXMAN_x2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,10,10,10), PIXMAN_a2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,2,10,10,10), - PIXMAN_a8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_A,8,0,0,0) - +/* sRGB formats */ + PIXMAN_a8r8g8b8_sRGB = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB_SRGB,8,8,8,8), + +/* 24bpp formats */ + PIXMAN_r8g8b8 = PIXMAN_FORMAT(24,PIXMAN_TYPE_ARGB,0,8,8,8), + PIXMAN_b8g8r8 = PIXMAN_FORMAT(24,PIXMAN_TYPE_ABGR,0,8,8,8), + +/* 16bpp formats */ + PIXMAN_r5g6b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,6,5), + PIXMAN_b5g6r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,6,5), + + PIXMAN_a1r5g5b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,1,5,5,5), + PIXMAN_x1r5g5b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,5,5), + PIXMAN_a1b5g5r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,1,5,5,5), + PIXMAN_x1b5g5r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,5,5), + PIXMAN_a4r4g4b4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,4,4,4,4), + PIXMAN_x4r4g4b4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,4,4,4), + PIXMAN_a4b4g4r4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,4,4,4,4), + PIXMAN_x4b4g4r4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,4,4,4), + +/* 8bpp formats */ + PIXMAN_a8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_A,8,0,0,0), + PIXMAN_r3g3b2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,0,3,3,2), + PIXMAN_b2g3r3 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,0,3,3,2), + PIXMAN_a2r2g2b2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,2,2,2,2), + PIXMAN_a2b2g2r2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,2,2,2,2), + + PIXMAN_c8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0), + PIXMAN_g8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0), + + PIXMAN_x4a4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_A,4,0,0,0), + + PIXMAN_x4c4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0), + PIXMAN_x4g4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0), + +/* 4bpp formats */ + PIXMAN_a4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_A,4,0,0,0), + PIXMAN_r1g2b1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,0,1,2,1), + PIXMAN_b1g2r1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,0,1,2,1), + PIXMAN_a1r1g1b1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,1,1,1,1), + PIXMAN_a1b1g1r1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,1,1,1,1), + + PIXMAN_c4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_COLOR,0,0,0,0), + PIXMAN_g4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_GRAY,0,0,0,0), + +/* 1bpp formats */ + PIXMAN_a1 = PIXMAN_FORMAT(1,PIXMAN_TYPE_A,1,0,0,0), + + PIXMAN_g1 = PIXMAN_FORMAT(1,PIXMAN_TYPE_GRAY,0,0,0,0), + +/* YUV formats */ + PIXMAN_yuy2 = PIXMAN_FORMAT(16,PIXMAN_TYPE_YUY2,0,0,0,0), + PIXMAN_yv12 = PIXMAN_FORMAT(12,PIXMAN_TYPE_YV12,0,0,0,0) + } pixman_format_code_t; typedef enum _PictFormatShort { + PICT_a2r10g10b10 = PIXMAN_a2r10g10b10, + PICT_x2r10g10b10 = PIXMAN_x2r10g10b10, + PICT_a2b10g10r10 = PIXMAN_a2b10g10r10, + PICT_x2b10g10r10 = PIXMAN_x2b10g10r10, + PICT_a8r8g8b8 = PIXMAN_a8r8g8b8, PICT_x8r8g8b8 = PIXMAN_x8r8g8b8, PICT_a8b8g8r8 = PIXMAN_a8b8g8r8, @@ -107,10 +221,52 @@ typedef enum _PictFormatShort { PICT_b8g8r8a8 = PIXMAN_b8g8r8a8, PICT_b8g8r8x8 = PIXMAN_b8g8r8x8, +/* 24bpp formats */ + PICT_r8g8b8 = PIXMAN_r8g8b8, + PICT_b8g8r8 = PIXMAN_b8g8r8, + +/* 16bpp formats */ + PICT_r5g6b5 = PIXMAN_r5g6b5, + PICT_b5g6r5 = PIXMAN_b5g6r5, + + PICT_a1r5g5b5 = PIXMAN_a1r5g5b5, + PICT_x1r5g5b5 = PIXMAN_x1r5g5b5, + PICT_a1b5g5r5 = PIXMAN_a1b5g5r5, + PICT_x1b5g5r5 = PIXMAN_x1b5g5r5, + PICT_a4r4g4b4 = PIXMAN_a4r4g4b4, + PICT_x4r4g4b4 = PIXMAN_x4r4g4b4, + PICT_a4b4g4r4 = PIXMAN_a4b4g4r4, + PICT_x4b4g4r4 = PIXMAN_x4b4g4r4, + /* 8bpp formats */ PICT_a8 = PIXMAN_a8, + PICT_r3g3b2 = PIXMAN_r3g3b2, + PICT_b2g3r3 = PIXMAN_b2g3r3, + PICT_a2r2g2b2 = PIXMAN_a2r2g2b2, + PICT_a2b2g2r2 = PIXMAN_a2b2g2r2, + + PICT_c8 = PIXMAN_c8, + PICT_g8 = PIXMAN_g8, + + PICT_x4a4 = PIXMAN_x4a4, + + PICT_x4c4 = PIXMAN_x4c4, + PICT_x4g4 = PIXMAN_x4g4, /* 4bpp formats */ + PICT_a4 = PIXMAN_a4, + PICT_r1g2b1 = PIXMAN_r1g2b1, + PICT_b1g2r1 = PIXMAN_b1g2r1, + PICT_a1r1g1b1 = PIXMAN_a1r1g1b1, + PICT_a1b1g1r1 = PIXMAN_a1b1g1r1, + + PICT_c4 = PIXMAN_c4, + PICT_g4 = PIXMAN_g4, + +/* 1bpp formats */ + PICT_a1 = PIXMAN_a1, + + PICT_g1 = PIXMAN_g1 } PictFormatShort; #define PIXMAN_FORMAT_A(f) (((f) >> 12) & 0x0f) @@ -230,7 +386,7 @@ struct sna_fb uint32_t height; uint32_t pitch; uint32_t tiling; - + struct kgem_bo *fb_bo; }; @@ -266,29 +422,53 @@ typedef unsigned short CARD16; #define PictOpMaximum 13 +#define SNA_CURSOR_X 64 +#define SNA_CURSOR_Y SNA_CURSOR_X + +struct sna_client { + int is_compositor; /* only 4 bits used */ +}; + + +//#define assert(x) + struct sna { + struct kgem kgem; + + unsigned scrn; + unsigned flags; #define SNA_NO_WAIT 0x1 #define SNA_NO_FLIP 0x2 #define SNA_TRIPLE_BUFFER 0x4 #define SNA_TEAR_FREE 0x10 #define SNA_FORCE_SHADOW 0x20 +#define SNA_FLUSH_GTT 0x40 +#define SNA_IS_HOSTED 0x80 +#define SNA_PERFORMANCE 0x100 +#define SNA_POWERSAVE 0x200 +#define SNA_REPROBE 0x80000000 + + unsigned cpu_features; +#define MMX 0x1 +#define SSE 0x2 +#define SSE2 0x4 +#define SSE3 0x8 +#define SSSE3 0x10 +#define SSE4_1 0x20 +#define SSE4_2 0x40 +#define AVX 0x80 +#define AVX2 0x100 struct list flush_pixmaps; struct list active_pixmaps; -// int vblank_interval; -// struct list deferred_free; -// struct list dirty_pixmaps; -// struct list active_pixmaps; -// struct list inactive_clock[2]; unsigned int tiling; -#define SNA_TILING_DISABLE 0x0 #define SNA_TILING_FB 0x1 #define SNA_TILING_2D 0x2 #define SNA_TILING_ALL (~0) @@ -303,7 +483,6 @@ struct sna { uint32_t fill_alu; } blt_state; union { -// struct gen2_render_state gen2; struct gen3_render_state gen3; struct gen4_render_state gen4; struct gen5_render_state gen5; @@ -318,12 +497,11 @@ struct sna { /* Driver phase/state information */ // Bool suspended; - struct kgem kgem; struct sna_render render; #if DEBUG_MEMORY struct { - int shadow_pixels_allocs; + int pixmap_allocs; int cpu_bo_allocs; size_t shadow_pixels_bytes; size_t cpu_bo_bytes; @@ -352,4 +530,78 @@ to_sna_from_kgem(struct kgem *kgem) #ifndef MAX #define MAX(a,b) ((a) >= (b) ? (a) : (b)) #endif +static inline bool +_sna_transform_point(const PictTransform *transform, + int64_t x, int64_t y, int64_t result[3]) +{ + int j; + + for (j = 0; j < 3; j++) + result[j] = (transform->matrix[j][0] * x + + transform->matrix[j][1] * y + + transform->matrix[j][2]); + + return result[2] != 0; +} + +static inline void +_sna_get_transformed_coordinates(int x, int y, + const PictTransform *transform, + float *x_out, float *y_out) +{ + + int64_t result[3]; + + _sna_transform_point(transform, x, y, result); + *x_out = result[0] / (double)result[2]; + *y_out = result[1] / (double)result[2]; +} + +static inline void +_sna_get_transformed_scaled(int x, int y, + const PictTransform *transform, const float *sf, + float *x_out, float *y_out) +{ + *x_out = sf[0] * (transform->matrix[0][0] * x + + transform->matrix[0][1] * y + + transform->matrix[0][2]); + + *y_out = sf[1] * (transform->matrix[1][0] * x + + transform->matrix[1][1] * y + + transform->matrix[1][2]); +} + +void +sna_get_transformed_coordinates(int x, int y, + const PictTransform *transform, + float *x_out, float *y_out); + +void +sna_get_transformed_coordinates_3d(int x, int y, + const PictTransform *transform, + float *x_out, float *y_out, float *z_out); + +bool sna_transform_is_affine(const PictTransform *t); +bool sna_transform_is_integer_translation(const PictTransform *t, + int16_t *tx, int16_t *ty); +bool sna_transform_is_translation(const PictTransform *t, + pixman_fixed_t *tx, pixman_fixed_t *ty); +static inline bool +sna_affine_transform_is_rotation(const PictTransform *t) +{ + assert(sna_transform_is_affine(t)); + return t->matrix[0][1] | t->matrix[1][0]; +} + +static inline bool +sna_transform_equal(const PictTransform *a, const PictTransform *b) +{ + if (a == b) + return true; + + if (a == NULL || b == NULL) + return false; + + return memcmp(a, b, sizeof(*a)) == 0; +} #endif /* _SNA_H */ diff --git a/drivers/video/Intel-2D/sna_cpu.c b/drivers/video/Intel-2D/sna_cpu.c new file mode 100644 index 0000000000..23e6bcc0e0 --- /dev/null +++ b/drivers/video/Intel-2D/sna_cpu.c @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" +#include "sna_cpuid.h" + +#define xgetbv(index,eax,edx) \ + __asm__ ("xgetbv" : "=a"(eax), "=d"(edx) : "c" (index)) + +#define has_YMM 0x1 + +unsigned sna_cpu_detect(void) +{ + unsigned max = __get_cpuid_max(BASIC_CPUID, NULL); + unsigned int eax, ebx, ecx, edx; + unsigned features = 0; + unsigned extra = 0; + + if (max >= 1) { + __cpuid(1, eax, ebx, ecx, edx); + if (ecx & bit_SSE3) + features |= SSE3; + + if (ecx & bit_SSSE3) + features |= SSSE3; + + if (ecx & bit_SSE4_1) + features |= SSE4_1; + + if (ecx & bit_SSE4_2) + features |= SSE4_2; + + if (ecx & bit_OSXSAVE) { + unsigned int bv_eax, bv_ecx; + xgetbv(0, bv_eax, bv_ecx); + if ((bv_eax & 6) == 6) + extra |= has_YMM; + } + + if ((extra & has_YMM) && (ecx & bit_AVX)) + features |= AVX; + + if (edx & bit_MMX) + features |= MMX; + + if (edx & bit_SSE) + features |= SSE; + + if (edx & bit_SSE2) + features |= SSE2; + } + + if (max >= 7) { + __cpuid_count(7, 0, eax, ebx, ecx, edx); + if ((extra & has_YMM) && (ebx & bit_AVX2)) + features |= AVX2; + } + + return features; +} + +char *sna_cpu_features_to_string(unsigned features, char *line) +{ + char *ret = line; + +#ifdef __x86_64__ + line += sprintf (line, "x86-64"); +#else + line += sprintf (line, "x86"); +#endif + + if (features & SSE2) + line += sprintf (line, ", sse2"); + if (features & SSE3) + line += sprintf (line, ", sse3"); + if (features & SSSE3) + line += sprintf (line, ", ssse3"); + if (features & SSE4_1) + line += sprintf (line, ", sse4.1"); + if (features & SSE4_2) + line += sprintf (line, ", sse4.2"); + if (features & AVX) + line += sprintf (line, ", avx"); + if (features & AVX2) + line += sprintf (line, ", avx2"); + + return ret; +} diff --git a/drivers/video/Intel-2D/sna_cpuid.h b/drivers/video/Intel-2D/sna_cpuid.h new file mode 100644 index 0000000000..5a82c89308 --- /dev/null +++ b/drivers/video/Intel-2D/sna_cpuid.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +/* Small wrapper around compiler specific implementation details of cpuid */ + +#ifndef SNA_CPUID_H +#define SNA_CPUID_H + +#include "compiler.h" + +#if HAS_GCC(4, 4) /* for __cpuid_count() */ +#include +#else +#define __get_cpuid_max(x, y) 0 +#define __cpuid(level, a, b, c, d) +#define __cpuid_count(level, count, a, b, c, d) +#endif + +#define BASIC_CPUID 0x0 +#define EXTENDED_CPUID 0x80000000 + +#ifndef bit_MMX +#define bit_MMX (1 << 23) +#endif + +#ifndef bit_SSE +#define bit_SSE (1 << 25) +#endif + +#ifndef bit_SSE2 +#define bit_SSE2 (1 << 26) +#endif + +#ifndef bit_SSE3 +#define bit_SSE3 (1 << 0) +#endif + +#ifndef bit_SSSE3 +#define bit_SSSE3 (1 << 9) +#endif + +#ifndef bit_SSE4_1 +#define bit_SSE4_1 (1 << 19) +#endif + +#ifndef bit_SSE4_2 +#define bit_SSE4_2 (1 << 20) +#endif + +#ifndef bit_OSXSAVE +#define bit_OSXSAVE (1 << 27) +#endif + +#ifndef bit_AVX +#define bit_AVX (1 << 28) +#endif + +#ifndef bit_AVX2 +#define bit_AVX2 (1<<5) +#endif + +#endif /* SNA_CPUID_H */ diff --git a/drivers/video/Intel-2D/sna_render.h b/drivers/video/Intel-2D/sna_render.h index 7b91cca4c0..4ab8acf544 100644 --- a/drivers/video/Intel-2D/sna_render.h +++ b/drivers/video/Intel-2D/sna_render.h @@ -1,729 +1,786 @@ -#ifndef SNA_RENDER_H -#define SNA_RENDER_H - -#include "compiler.h" - -#include -#include - -#define GRADIENT_CACHE_SIZE 16 - -#define GXinvalid 0xff - -#define HW_BIT_BLIT (1<<0) /* BGRX blitter */ -#define HW_TEX_BLIT (1<<1) /* stretch blit */ -#define HW_VID_BLIT (1<<2) /* planar and packed video */ - -struct sna; -struct sna_glyph; -struct sna_video; -struct sna_video_frame; -struct brw_compile; - -struct sna_composite_rectangles { - struct sna_coordinate { - int16_t x, y; - } src, mask, dst; - int16_t width, height; -}; - -struct sna_composite_op { - fastcall void (*blt)(struct sna *sna, const struct sna_composite_op *op, - const struct sna_composite_rectangles *r); - fastcall void (*box)(struct sna *sna, - const struct sna_composite_op *op, - const BoxRec *box); - void (*boxes)(struct sna *sna, const struct sna_composite_op *op, - const BoxRec *box, int nbox); - void (*done)(struct sna *sna, const struct sna_composite_op *op); - - struct sna_damage **damage; - - uint32_t op; - - struct { - PixmapPtr pixmap; - CARD32 format; - struct kgem_bo *bo; - int16_t x, y; - uint16_t width, height; - } dst; - - struct sna_composite_channel { - struct kgem_bo *bo; - PictTransform *transform; - uint16_t width; - uint16_t height; - uint32_t pict_format; - uint32_t card_format; - uint32_t filter; - uint32_t repeat; - uint32_t is_affine : 1; - uint32_t is_solid : 1; - uint32_t is_linear : 1; - uint32_t is_opaque : 1; - uint32_t alpha_fixup : 1; - uint32_t rb_reversed : 1; - int16_t offset[2]; - float scale[2]; - -// pixman_transform_t embedded_transform; - - union { - struct { - float dx, dy, offset; - } linear; - struct { - uint32_t pixel; - } gen2; - struct gen3_shader_channel { - int type; - uint32_t mode; - uint32_t constants; - } gen3; - } u; - } src, mask; - uint32_t is_affine : 1; - uint32_t has_component_alpha : 1; - uint32_t need_magic_ca_pass : 1; - uint32_t rb_reversed : 1; - - int16_t floats_per_vertex; - int16_t floats_per_rect; - fastcall void (*prim_emit)(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r); - - struct sna_composite_redirect { - struct kgem_bo *real_bo; - struct sna_damage **real_damage, *damage; - BoxRec box; - } redirect; - - union { - struct sna_blt_state { - PixmapPtr src_pixmap; - int16_t sx, sy; - - uint32_t inplace :1; - uint32_t overwrites:1; - uint32_t bpp : 6; - - uint32_t cmd; - uint32_t br13; - uint32_t pitch[2]; - uint32_t pixel; - struct kgem_bo *bo[2]; - } blt; - - struct { - float constants[8]; - uint32_t num_constants; - } gen3; - - struct { - int wm_kernel; - int ve_id; - } gen4; - - struct { - int16_t wm_kernel; - int16_t ve_id; - } gen5; - - struct { - uint32_t flags; - } gen6; - - struct { - uint32_t flags; - } gen7; - } u; - - void *priv; -}; - -struct sna_copy_op { - struct sna_composite_op base; - - void (*blt)(struct sna *sna, const struct sna_copy_op *op, - int16_t sx, int16_t sy, - int16_t w, int16_t h, - int16_t dx, int16_t dy); - void (*done)(struct sna *sna, const struct sna_copy_op *op); -}; - -struct sna_render { - int active; - - int caps; - - int max_3d_size; - int max_3d_pitch; - - unsigned prefer_gpu; -#define PREFER_GPU_BLT 0x1 -#define PREFER_GPU_RENDER 0x2 -#define PREFER_GPU_SPANS 0x4 - - bool (*composite)(struct sna *sna, uint8_t op, - PicturePtr dst, PicturePtr src, PicturePtr mask, - int16_t src_x, int16_t src_y, - int16_t msk_x, int16_t msk_y, - int16_t dst_x, int16_t dst_y, - int16_t w, int16_t h, - struct sna_composite_op *tmp); - -#if 0 - bool (*check_composite_spans)(struct sna *sna, uint8_t op, - PicturePtr dst, PicturePtr src, - int16_t w, int16_t h, unsigned flags); - bool (*composite_spans)(struct sna *sna, uint8_t op, - PicturePtr dst, PicturePtr src, - int16_t src_x, int16_t src_y, - int16_t dst_x, int16_t dst_y, - int16_t w, int16_t h, - unsigned flags, - struct sna_composite_spans_op *tmp); -#define COMPOSITE_SPANS_RECTILINEAR 0x1 -#define COMPOSITE_SPANS_INPLACE_HINT 0x2 - - bool (*video)(struct sna *sna, - struct sna_video *video, - struct sna_video_frame *frame, - RegionPtr dstRegion, - short src_w, short src_h, - short drw_w, short drw_h, - short dx, short dy, - PixmapPtr pixmap); - - bool (*fill_boxes)(struct sna *sna, - CARD8 op, - PictFormat format, - const xRenderColor *color, - PixmapPtr dst, struct kgem_bo *dst_bo, - const BoxRec *box, int n); - bool (*fill)(struct sna *sna, uint8_t alu, - PixmapPtr dst, struct kgem_bo *dst_bo, - uint32_t color, - struct sna_fill_op *tmp); - bool (*fill_one)(struct sna *sna, PixmapPtr dst, struct kgem_bo *dst_bo, - uint32_t color, - int16_t x1, int16_t y1, int16_t x2, int16_t y2, - uint8_t alu); - bool (*clear)(struct sna *sna, PixmapPtr dst, struct kgem_bo *dst_bo); - - bool (*copy_boxes)(struct sna *sna, uint8_t alu, - PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, - PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, - const BoxRec *box, int n, unsigned flags); -#define COPY_LAST 0x1 -#define COPY_SYNC 0x2 - -#endif - - bool (*blit_tex)(struct sna *sna, - uint8_t op, bool scale, - PixmapPtr src, struct kgem_bo *src_bo, - PixmapPtr mask,struct kgem_bo *mask_bo, - PixmapPtr dst, struct kgem_bo *dst_bo, - int32_t src_x, int32_t src_y, - int32_t msk_x, int32_t msk_y, - int32_t dst_x, int32_t dst_y, - int32_t width, int32_t height, - struct sna_composite_op *tmp); - - bool (*copy)(struct sna *sna, uint8_t alu, - PixmapPtr src, struct kgem_bo *src_bo, - PixmapPtr dst, struct kgem_bo *dst_bo, - struct sna_copy_op *op); - - void (*flush)(struct sna *sna); - void (*reset)(struct sna *sna); - void (*fini)(struct sna *sna); - -#if 0 - - struct sna_alpha_cache { - struct kgem_bo *cache_bo; - struct kgem_bo *bo[256+7]; - } alpha_cache; - - struct sna_solid_cache { - struct kgem_bo *cache_bo; - struct kgem_bo *bo[1024]; - uint32_t color[1025]; - int last; - int size; - int dirty; - } solid_cache; - - struct { - struct sna_gradient_cache { - struct kgem_bo *bo; - int nstops; - PictGradientStop *stops; - } cache[GRADIENT_CACHE_SIZE]; - int size; - } gradient_cache; - - struct sna_glyph_cache{ - PicturePtr picture; - struct sna_glyph **glyphs; - uint16_t count; - uint16_t evict; - } glyph[2]; - pixman_image_t *white_image; - PicturePtr white_picture; -#if HAS_PIXMAN_GLYPHS - pixman_glyph_cache_t *glyph_cache; -#endif - -#endif - - uint16_t vb_id; - uint16_t vertex_offset; - uint16_t vertex_start; - uint16_t vertex_index; - uint16_t vertex_used; - uint16_t vertex_size; - uint16_t vertex_reloc[16]; - int nvertex_reloc; - - struct kgem_bo *vbo; - float *vertices; - - float vertex_data[1024]; -}; - -struct gen2_render_state { - uint32_t target; - bool need_invariant; - uint32_t logic_op_enabled; - uint32_t ls1, ls2, vft; - uint32_t diffuse; - uint32_t specular; -}; - -struct gen3_render_state { - uint32_t current_dst; - bool need_invariant; - uint32_t tex_count; - uint32_t last_drawrect_limit; - uint32_t last_target; - uint32_t last_blend; - uint32_t last_constants; - uint32_t last_sampler; - uint32_t last_shader; - uint32_t last_diffuse; - uint32_t last_specular; - - uint16_t last_vertex_offset; - uint16_t floats_per_vertex; - uint16_t last_floats_per_vertex; - - uint32_t tex_map[4]; - uint32_t tex_handle[2]; - uint32_t tex_delta[2]; -}; - -struct gen4_render_state { - struct kgem_bo *general_bo; - - uint32_t vs; - uint32_t sf; - uint32_t wm; - uint32_t cc; - - int ve_id; - uint32_t drawrect_offset; - uint32_t drawrect_limit; - uint32_t last_pipelined_pointers; - uint16_t last_primitive; - int16_t floats_per_vertex; - uint16_t surface_table; - - bool needs_invariant; - bool needs_urb; -}; - -struct gen5_render_state { - struct kgem_bo *general_bo; - - uint32_t vs; - uint32_t sf[2]; - uint32_t wm; - uint32_t cc; - - int ve_id; - uint32_t drawrect_offset; - uint32_t drawrect_limit; - uint16_t last_primitive; - int16_t floats_per_vertex; - uint16_t surface_table; - uint16_t last_pipelined_pointers; - - bool needs_invariant; -}; - -enum { - GEN6_WM_KERNEL_NOMASK = 0, - GEN6_WM_KERNEL_NOMASK_P, - - GEN6_WM_KERNEL_MASK, - GEN6_WM_KERNEL_MASK_P, - - GEN6_WM_KERNEL_MASKCA, - GEN6_WM_KERNEL_MASKCA_P, - - GEN6_WM_KERNEL_MASKSA, - GEN6_WM_KERNEL_MASKSA_P, - - GEN6_WM_KERNEL_OPACITY, - GEN6_WM_KERNEL_OPACITY_P, - - GEN6_WM_KERNEL_VIDEO_PLANAR, - GEN6_WM_KERNEL_VIDEO_PACKED, - GEN6_KERNEL_COUNT -}; - -struct gen6_render_state { - const struct gt_info *info; - struct kgem_bo *general_bo; - - uint32_t vs_state; - uint32_t sf_state; - uint32_t sf_mask_state; - uint32_t wm_state; - uint32_t wm_kernel[GEN6_KERNEL_COUNT][3]; - - uint32_t cc_blend; - - uint32_t drawrect_offset; - uint32_t drawrect_limit; - uint32_t blend; - uint32_t samplers; - uint32_t kernel; - - uint16_t num_sf_outputs; - uint16_t ve_id; - uint16_t last_primitive; - int16_t floats_per_vertex; - uint16_t surface_table; - - bool needs_invariant; - bool first_state_packet; -}; - -enum { - GEN7_WM_KERNEL_NOMASK = 0, - GEN7_WM_KERNEL_NOMASK_P, - - GEN7_WM_KERNEL_MASK, - GEN7_WM_KERNEL_MASK_P, - - GEN7_WM_KERNEL_MASKCA, - GEN7_WM_KERNEL_MASKCA_P, - - GEN7_WM_KERNEL_MASKSA, - GEN7_WM_KERNEL_MASKSA_P, - - GEN7_WM_KERNEL_OPACITY, - GEN7_WM_KERNEL_OPACITY_P, - - GEN7_WM_KERNEL_VIDEO_PLANAR, - GEN7_WM_KERNEL_VIDEO_PACKED, - GEN7_WM_KERNEL_COUNT -}; - -struct gen7_render_state { - const struct gt_info *info; - struct kgem_bo *general_bo; - - uint32_t vs_state; - uint32_t sf_state; - uint32_t sf_mask_state; - uint32_t wm_state; - uint32_t wm_kernel[GEN7_WM_KERNEL_COUNT][3]; - - uint32_t cc_blend; - - uint32_t drawrect_offset; - uint32_t drawrect_limit; - uint32_t blend; - uint32_t samplers; - uint32_t kernel; - - uint16_t num_sf_outputs; - uint16_t ve_id; - uint16_t last_primitive; - int16_t floats_per_vertex; - uint16_t surface_table; - - bool needs_invariant; - bool emit_flush; -}; - -struct sna_static_stream { - uint32_t size, used; - uint8_t *data; -}; - -int sna_static_stream_init(struct sna_static_stream *stream); -uint32_t sna_static_stream_add(struct sna_static_stream *stream, - const void *data, uint32_t len, uint32_t align); -void *sna_static_stream_map(struct sna_static_stream *stream, - uint32_t len, uint32_t align); -uint32_t sna_static_stream_offsetof(struct sna_static_stream *stream, - void *ptr); -unsigned sna_static_stream_compile_sf(struct sna *sna, - struct sna_static_stream *stream, - bool (*compile)(struct brw_compile *)); - -unsigned sna_static_stream_compile_wm(struct sna *sna, - struct sna_static_stream *stream, - bool (*compile)(struct brw_compile *, int), - int width); -struct kgem_bo *sna_static_stream_fini(struct sna *sna, - struct sna_static_stream *stream); - -struct kgem_bo * -sna_render_get_solid(struct sna *sna, - uint32_t color); - -void -sna_render_flush_solid(struct sna *sna); - - -uint32_t sna_rgba_for_color(uint32_t color, int depth); -uint32_t sna_rgba_to_color(uint32_t rgba, uint32_t format); -bool sna_get_rgba_from_pixel(uint32_t pixel, - uint16_t *red, - uint16_t *green, - uint16_t *blue, - uint16_t *alpha, - uint32_t format); -bool sna_picture_is_solid(PicturePtr picture, uint32_t *color); - -void no_render_init(struct sna *sna); - -bool gen2_render_init(struct sna *sna); -bool gen3_render_init(struct sna *sna); -bool gen4_render_init(struct sna *sna); -bool gen5_render_init(struct sna *sna); -bool gen6_render_init(struct sna *sna); -bool gen7_render_init(struct sna *sna); - -#if 0 - -bool sna_tiling_composite(uint32_t op, - PicturePtr src, - PicturePtr mask, - PicturePtr dst, - int16_t src_x, int16_t src_y, - int16_t mask_x, int16_t mask_y, - int16_t dst_x, int16_t dst_y, - int16_t width, int16_t height, - struct sna_composite_op *tmp); -bool sna_tiling_fill_boxes(struct sna *sna, - CARD8 op, - PictFormat format, - const xRenderColor *color, - PixmapPtr dst, struct kgem_bo *dst_bo, - const BoxRec *box, int n); - -bool sna_tiling_copy_boxes(struct sna *sna, uint8_t alu, - PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, - PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, - const BoxRec *box, int n); - -bool sna_tiling_blt_copy_boxes(struct sna *sna, uint8_t alu, - struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, - struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, - int bpp, const BoxRec *box, int nbox); - -bool sna_blt_composite(struct sna *sna, - uint32_t op, - PicturePtr src, - PicturePtr dst, - int16_t src_x, int16_t src_y, - int16_t dst_x, int16_t dst_y, - int16_t width, int16_t height, - struct sna_composite_op *tmp, - bool fallback); -bool sna_blt_composite__convert(struct sna *sna, - int x, int y, - int width, int height, - struct sna_composite_op *tmp); - -bool sna_blt_fill(struct sna *sna, uint8_t alu, - struct kgem_bo *bo, - int bpp, - uint32_t pixel, - struct sna_fill_op *fill); - -bool sna_blt_copy(struct sna *sna, uint8_t alu, - struct kgem_bo *src, - struct kgem_bo *dst, - int bpp, - struct sna_copy_op *copy); - -bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, - struct kgem_bo *bo, - int bpp, - uint32_t pixel, - const BoxRec *box, int n); - -bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, - struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, - struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, - int bpp, - const BoxRec *box, int n); -bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu, - PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, - PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, - const BoxRec *box, int nbox); - -bool _sna_get_pixel_from_rgba(uint32_t *pixel, - uint16_t red, - uint16_t green, - uint16_t blue, - uint16_t alpha, - uint32_t format); - -static inline bool -sna_get_pixel_from_rgba(uint32_t * pixel, - uint16_t red, - uint16_t green, - uint16_t blue, - uint16_t alpha, - uint32_t format) -{ - switch (format) { - case PICT_x8r8g8b8: - alpha = 0xffff; - /* fall through to re-use a8r8g8b8 expansion */ - case PICT_a8r8g8b8: - *pixel = ((alpha >> 8 << 24) | - (red >> 8 << 16) | - (green & 0xff00) | - (blue >> 8)); - return TRUE; - case PICT_a8: - *pixel = alpha >> 8; - return TRUE; - } - - return _sna_get_pixel_from_rgba(pixel, red, green, blue, alpha, format); -} - -struct kgem_bo * -__sna_render_pixmap_bo(struct sna *sna, - PixmapPtr pixmap, - const BoxRec *box, - bool blt); - -int -sna_render_pixmap_bo(struct sna *sna, - struct sna_composite_channel *channel, - PixmapPtr pixmap, - int16_t x, int16_t y, - int16_t w, int16_t h, - int16_t dst_x, int16_t dst_y); - -bool -sna_render_pixmap_partial(struct sna *sna, - PixmapPtr pixmap, - struct kgem_bo *bo, - struct sna_composite_channel *channel, - int16_t x, int16_t y, - int16_t w, int16_t h); - -int -sna_render_picture_extract(struct sna *sna, - PicturePtr picture, - struct sna_composite_channel *channel, - int16_t x, int16_t y, - int16_t w, int16_t h, - int16_t dst_x, int16_t dst_y); - -int -sna_render_picture_approximate_gradient(struct sna *sna, - PicturePtr picture, - struct sna_composite_channel *channel, - int16_t x, int16_t y, - int16_t w, int16_t h, - int16_t dst_x, int16_t dst_y); - -int -sna_render_picture_fixup(struct sna *sna, - PicturePtr picture, - struct sna_composite_channel *channel, - int16_t x, int16_t y, - int16_t w, int16_t h, - int16_t dst_x, int16_t dst_y); - -int -sna_render_picture_convert(struct sna *sna, - PicturePtr picture, - struct sna_composite_channel *channel, - PixmapPtr pixmap, - int16_t x, int16_t y, - int16_t w, int16_t h, - int16_t dst_x, int16_t dst_y, - bool fixup_alpha); - -inline static void sna_render_composite_redirect_init(struct sna_composite_op *op) -{ - struct sna_composite_redirect *t = &op->redirect; - t->real_bo = NULL; - t->damage = NULL; -} - -bool -sna_render_composite_redirect(struct sna *sna, - struct sna_composite_op *op, - int x, int y, int width, int height); - -void -sna_render_composite_redirect_done(struct sna *sna, - const struct sna_composite_op *op); - -bool -sna_composite_mask_is_opaque(PicturePtr mask); - -#endif -void sna_vertex_init(struct sna *sna); - -static inline void sna_vertex_lock(struct sna_render *r) -{ -// pthread_mutex_lock(&r->lock); -} - -static inline void sna_vertex_acquire__locked(struct sna_render *r) -{ - r->active++; -} - -static inline void sna_vertex_unlock(struct sna_render *r) -{ -// pthread_mutex_unlock(&r->lock); -} - -static inline void sna_vertex_release__locked(struct sna_render *r) -{ - assert(r->active > 0); - --r->active; -// if (--r->active == 0) -// pthread_cond_signal(&r->wait); -} - -static inline bool sna_vertex_wait__locked(struct sna_render *r) -{ - bool was_active = r->active; -// while (r->active) -// pthread_cond_wait(&r->wait, &r->lock); - return was_active; -} - -#endif /* SNA_RENDER_H */ +#ifndef SNA_RENDER_H +#define SNA_RENDER_H + +#include "compiler.h" + +#include +#include + +#define GRADIENT_CACHE_SIZE 16 + +#define GXinvalid 0xff + +#define HW_BIT_BLIT (1<<0) /* BGRX blitter */ +#define HW_TEX_BLIT (1<<1) /* stretch blit */ +#define HW_VID_BLIT (1<<2) /* planar and packed video */ + +struct sna; +struct sna_glyph; +struct sna_video; +struct sna_video_frame; +struct brw_compile; + +struct sna_composite_rectangles { + struct sna_coordinate { + int16_t x, y; + } src, mask, dst; + int16_t width, height; +}; + +struct sna_composite_op { + fastcall void (*blt)(struct sna *sna, const struct sna_composite_op *op, + const struct sna_composite_rectangles *r); + + void (*done)(struct sna *sna, const struct sna_composite_op *op); + + struct sna_damage **damage; + + uint32_t op; + + struct { + PixmapPtr pixmap; + CARD32 format; + struct kgem_bo *bo; + int16_t x, y; + uint16_t width, height; + } dst; + + struct sna_composite_channel { + struct kgem_bo *bo; + PictTransform *transform; + uint16_t width; + uint16_t height; + uint32_t pict_format; + uint32_t card_format; + uint32_t filter; + uint32_t repeat; + uint32_t is_affine : 1; + uint32_t is_solid : 1; + uint32_t is_linear : 1; + uint32_t is_opaque : 1; + uint32_t alpha_fixup : 1; + uint32_t rb_reversed : 1; + int16_t offset[2]; + float scale[2]; + +// pixman_transform_t embedded_transform; + + union { + struct { + float dx, dy, offset; + } linear; + struct { + uint32_t pixel; + } gen2; + struct gen3_shader_channel { + int type; + uint32_t mode; + uint32_t constants; + } gen3; + } u; + } src, mask; + uint32_t is_affine : 1; + uint32_t has_component_alpha : 1; + uint32_t need_magic_ca_pass : 1; + uint32_t rb_reversed : 1; + + int16_t floats_per_vertex; + int16_t floats_per_rect; + fastcall void (*prim_emit)(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r); + + struct sna_composite_redirect { + struct kgem_bo *real_bo; + struct sna_damage **real_damage, *damage; + BoxRec box; + } redirect; + + union { + struct sna_blt_state { + PixmapPtr src_pixmap; + int16_t sx, sy; + + uint32_t inplace :1; + uint32_t overwrites:1; + uint32_t bpp : 6; + + uint32_t cmd; + uint32_t br13; + uint32_t pitch[2]; + uint32_t pixel; + struct kgem_bo *bo[2]; + } blt; + + struct { + float constants[8]; + uint32_t num_constants; + } gen3; + + struct { + int wm_kernel; + int ve_id; + } gen4; + + struct { + int16_t wm_kernel; + int16_t ve_id; + } gen5; + + struct { + uint32_t flags; + } gen6; + + struct { + uint32_t flags; + } gen7; + } u; + + void *priv; +}; + +struct sna_copy_op { + struct sna_composite_op base; + + void (*blt)(struct sna *sna, const struct sna_copy_op *op, + int16_t sx, int16_t sy, + int16_t w, int16_t h, + int16_t dx, int16_t dy); + void (*done)(struct sna *sna, const struct sna_copy_op *op); +}; + +struct sna_render { + int active; + + int caps; + + int max_3d_size; + int max_3d_pitch; + + unsigned prefer_gpu; +#define PREFER_GPU_BLT 0x1 +#define PREFER_GPU_RENDER 0x2 +#define PREFER_GPU_SPANS 0x4 + + bool (*composite)(struct sna *sna, uint8_t op, + PicturePtr dst, PicturePtr src, PicturePtr mask, + int16_t src_x, int16_t src_y, + int16_t msk_x, int16_t msk_y, + int16_t dst_x, int16_t dst_y, + int16_t w, int16_t h, + struct sna_composite_op *tmp); + +#if 0 + bool (*check_composite_spans)(struct sna *sna, uint8_t op, + PicturePtr dst, PicturePtr src, + int16_t w, int16_t h, unsigned flags); + bool (*composite_spans)(struct sna *sna, uint8_t op, + PicturePtr dst, PicturePtr src, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + int16_t w, int16_t h, + unsigned flags, + struct sna_composite_spans_op *tmp); +#define COMPOSITE_SPANS_RECTILINEAR 0x1 +#define COMPOSITE_SPANS_INPLACE_HINT 0x2 + + bool (*video)(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame, + RegionPtr dstRegion, + PixmapPtr pixmap); + + bool (*fill_boxes)(struct sna *sna, + CARD8 op, + PictFormat format, + const xRenderColor *color, + PixmapPtr dst, struct kgem_bo *dst_bo, + const BoxRec *box, int n); + bool (*fill)(struct sna *sna, uint8_t alu, + PixmapPtr dst, struct kgem_bo *dst_bo, + uint32_t color, + struct sna_fill_op *tmp); + bool (*fill_one)(struct sna *sna, PixmapPtr dst, struct kgem_bo *dst_bo, + uint32_t color, + int16_t x1, int16_t y1, int16_t x2, int16_t y2, + uint8_t alu); + bool (*clear)(struct sna *sna, PixmapPtr dst, struct kgem_bo *dst_bo); + + bool (*copy_boxes)(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n, unsigned flags); +#define COPY_LAST 0x1 +#define COPY_SYNC 0x2 + +#endif + + bool (*blit_tex)(struct sna *sna, + uint8_t op, bool scale, + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr mask,struct kgem_bo *mask_bo, + PixmapPtr dst, struct kgem_bo *dst_bo, + int32_t src_x, int32_t src_y, + int32_t msk_x, int32_t msk_y, + int32_t dst_x, int32_t dst_y, + int32_t width, int32_t height, + struct sna_composite_op *tmp); + + bool (*copy)(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr dst, struct kgem_bo *dst_bo, + struct sna_copy_op *op); + + void (*flush)(struct sna *sna); + void (*reset)(struct sna *sna); + void (*fini)(struct sna *sna); + +#if 0 + + struct sna_alpha_cache { + struct kgem_bo *cache_bo; + struct kgem_bo *bo[256+7]; + } alpha_cache; + + struct sna_solid_cache { + struct kgem_bo *cache_bo; + struct kgem_bo *bo[1024]; + uint32_t color[1025]; + int last; + int size; + int dirty; + } solid_cache; + + struct { + struct sna_gradient_cache { + struct kgem_bo *bo; + int nstops; + PictGradientStop *stops; + } cache[GRADIENT_CACHE_SIZE]; + int size; + } gradient_cache; + + struct sna_glyph_cache{ + PicturePtr picture; + struct sna_glyph **glyphs; + uint16_t count; + uint16_t evict; + } glyph[2]; + pixman_image_t *white_image; + PicturePtr white_picture; +#if HAS_PIXMAN_GLYPHS + pixman_glyph_cache_t *glyph_cache; +#endif + +#endif + + uint16_t vb_id; + uint16_t vertex_offset; + uint16_t vertex_start; + uint16_t vertex_index; + uint16_t vertex_used; + uint16_t vertex_size; + uint16_t vertex_reloc[16]; + int nvertex_reloc; + + struct kgem_bo *vbo; + float *vertices; + + float vertex_data[1024]; +}; + +struct gen2_render_state { + uint32_t target; + bool need_invariant; + uint32_t logic_op_enabled; + uint32_t ls1, ls2, vft; + uint32_t diffuse; + uint32_t specular; +}; + +struct gen3_render_state { + uint32_t current_dst; + bool need_invariant; + uint32_t tex_count; + uint32_t last_drawrect_limit; + uint32_t last_target; + uint32_t last_blend; + uint32_t last_constants; + uint32_t last_sampler; + uint32_t last_shader; + uint32_t last_diffuse; + uint32_t last_specular; + + uint16_t last_vertex_offset; + uint16_t floats_per_vertex; + uint16_t last_floats_per_vertex; + + uint32_t tex_map[4]; + uint32_t tex_handle[2]; + uint32_t tex_delta[2]; +}; + +struct gen4_render_state { + struct kgem_bo *general_bo; + + uint32_t vs; + uint32_t sf; + uint32_t wm; + uint32_t cc; + + int ve_id; + uint32_t drawrect_offset; + uint32_t drawrect_limit; + uint32_t last_pipelined_pointers; + uint16_t last_primitive; + int16_t floats_per_vertex; + uint16_t surface_table; + + bool needs_invariant; + bool needs_urb; +}; + +struct gen5_render_state { + struct kgem_bo *general_bo; + + uint32_t vs; + uint32_t sf[2]; + uint32_t wm; + uint32_t cc; + + int ve_id; + uint32_t drawrect_offset; + uint32_t drawrect_limit; + uint32_t last_pipelined_pointers; + uint16_t last_primitive; + int16_t floats_per_vertex; + uint16_t surface_table; + + bool needs_invariant; +}; + +enum { + GEN6_WM_KERNEL_NOMASK = 0, + GEN6_WM_KERNEL_NOMASK_P, + + GEN6_WM_KERNEL_MASK, + GEN6_WM_KERNEL_MASK_P, + + GEN6_WM_KERNEL_MASKCA, + GEN6_WM_KERNEL_MASKCA_P, + + GEN6_WM_KERNEL_MASKSA, + GEN6_WM_KERNEL_MASKSA_P, + + GEN6_WM_KERNEL_OPACITY, + GEN6_WM_KERNEL_OPACITY_P, + + GEN6_WM_KERNEL_VIDEO_PLANAR, + GEN6_WM_KERNEL_VIDEO_PACKED, + GEN6_KERNEL_COUNT +}; + +struct gen6_render_state { + const struct gt_info *info; + struct kgem_bo *general_bo; + + uint32_t vs_state; + uint32_t sf_state; + uint32_t sf_mask_state; + uint32_t wm_state; + uint32_t wm_kernel[GEN6_KERNEL_COUNT][3]; + + uint32_t cc_blend; + + uint32_t drawrect_offset; + uint32_t drawrect_limit; + uint32_t blend; + uint32_t samplers; + uint32_t kernel; + + uint16_t num_sf_outputs; + uint16_t ve_id; + uint16_t last_primitive; + int16_t floats_per_vertex; + uint16_t surface_table; + + bool needs_invariant; + bool first_state_packet; +}; + +enum { + GEN7_WM_KERNEL_NOMASK = 0, + GEN7_WM_KERNEL_NOMASK_P, + + GEN7_WM_KERNEL_MASK, + GEN7_WM_KERNEL_MASK_P, + + GEN7_WM_KERNEL_MASKCA, + GEN7_WM_KERNEL_MASKCA_P, + + GEN7_WM_KERNEL_MASKSA, + GEN7_WM_KERNEL_MASKSA_P, + + GEN7_WM_KERNEL_OPACITY, + GEN7_WM_KERNEL_OPACITY_P, + + GEN7_WM_KERNEL_VIDEO_PLANAR, + GEN7_WM_KERNEL_VIDEO_PACKED, + GEN7_WM_KERNEL_COUNT +}; + +struct gen7_render_state { + const struct gt_info *info; + struct kgem_bo *general_bo; + + uint32_t vs_state; + uint32_t sf_state; + uint32_t sf_mask_state; + uint32_t wm_state; + uint32_t wm_kernel[GEN7_WM_KERNEL_COUNT][3]; + + uint32_t cc_blend; + + uint32_t drawrect_offset; + uint32_t drawrect_limit; + uint32_t blend; + uint32_t samplers; + uint32_t kernel; + + uint16_t num_sf_outputs; + uint16_t ve_id; + uint16_t last_primitive; + int16_t floats_per_vertex; + uint16_t surface_table; + + bool needs_invariant; + bool emit_flush; +}; + +struct sna_static_stream { + uint32_t size, used; + uint8_t *data; +}; + +int sna_static_stream_init(struct sna_static_stream *stream); +uint32_t sna_static_stream_add(struct sna_static_stream *stream, + const void *data, uint32_t len, uint32_t align); +void *sna_static_stream_map(struct sna_static_stream *stream, + uint32_t len, uint32_t align); +uint32_t sna_static_stream_offsetof(struct sna_static_stream *stream, + void *ptr); +unsigned sna_static_stream_compile_sf(struct sna *sna, + struct sna_static_stream *stream, + bool (*compile)(struct brw_compile *)); + +unsigned sna_static_stream_compile_wm(struct sna *sna, + struct sna_static_stream *stream, + bool (*compile)(struct brw_compile *, int), + int width); +struct kgem_bo *sna_static_stream_fini(struct sna *sna, + struct sna_static_stream *stream); + +struct kgem_bo * +sna_render_get_solid(struct sna *sna, + uint32_t color); + +void +sna_render_flush_solid(struct sna *sna); + + +uint32_t sna_rgba_for_color(uint32_t color, int depth); +uint32_t sna_rgba_to_color(uint32_t rgba, uint32_t format); +bool sna_get_rgba_from_pixel(uint32_t pixel, + uint16_t *red, + uint16_t *green, + uint16_t *blue, + uint16_t *alpha, + uint32_t format); +bool sna_picture_is_solid(PicturePtr picture, uint32_t *color); + +const char *no_render_init(struct sna *sna); +const char *gen2_render_init(struct sna *sna, const char *backend); +const char *gen3_render_init(struct sna *sna, const char *backend); +const char *gen4_render_init(struct sna *sna, const char *backend); +const char *gen5_render_init(struct sna *sna, const char *backend); +const char *gen6_render_init(struct sna *sna, const char *backend); +const char *gen7_render_init(struct sna *sna, const char *backend); + +#if 0 +bool sna_tiling_composite(uint32_t op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t mask_x, int16_t mask_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_op *tmp); +bool sna_tiling_fill_boxes(struct sna *sna, + CARD8 op, + PictFormat format, + const xRenderColor *color, + PixmapPtr dst, struct kgem_bo *dst_bo, + const BoxRec *box, int n); + +bool sna_tiling_copy_boxes(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n); + +bool sna_tiling_blt_copy_boxes(struct sna *sna, uint8_t alu, + struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + int bpp, const BoxRec *box, int nbox); + +bool sna_blt_composite(struct sna *sna, + uint32_t op, + PicturePtr src, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_op *tmp, + bool fallback); +bool sna_blt_composite__convert(struct sna *sna, + int x, int y, + int width, int height, + struct sna_composite_op *tmp); + +bool sna_blt_fill(struct sna *sna, uint8_t alu, + struct kgem_bo *bo, + int bpp, + uint32_t pixel, + struct sna_fill_op *fill); + +bool sna_blt_copy(struct sna *sna, uint8_t alu, + struct kgem_bo *src, + struct kgem_bo *dst, + int bpp, + struct sna_copy_op *copy); + +bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, + struct kgem_bo *bo, + int bpp, + uint32_t pixel, + const BoxRec *box, int n); + +bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, + struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + int bpp, + const BoxRec *box, int n); +bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int nbox); + +bool _sna_get_pixel_from_rgba(uint32_t *pixel, + uint16_t red, + uint16_t green, + uint16_t blue, + uint16_t alpha, + uint32_t format); + +static inline bool +sna_get_pixel_from_rgba(uint32_t * pixel, + uint16_t red, + uint16_t green, + uint16_t blue, + uint16_t alpha, + uint32_t format) +{ + switch (format) { + case PICT_x8r8g8b8: + alpha = 0xffff; + /* fall through to re-use a8r8g8b8 expansion */ + case PICT_a8r8g8b8: + *pixel = ((alpha >> 8 << 24) | + (red >> 8 << 16) | + (green & 0xff00) | + (blue >> 8)); + return TRUE; + case PICT_a8: + *pixel = alpha >> 8; + return TRUE; + } + + return _sna_get_pixel_from_rgba(pixel, red, green, blue, alpha, format); +} + +struct kgem_bo * +__sna_render_pixmap_bo(struct sna *sna, + PixmapPtr pixmap, + const BoxRec *box, + bool blt); + +int +sna_render_pixmap_bo(struct sna *sna, + struct sna_composite_channel *channel, + PixmapPtr pixmap, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y); + +bool +sna_render_pixmap_partial(struct sna *sna, + PixmapPtr pixmap, + struct kgem_bo *bo, + struct sna_composite_channel *channel, + int16_t x, int16_t y, + int16_t w, int16_t h); + +int +sna_render_picture_extract(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y); + +int +sna_render_picture_approximate_gradient(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y); + +int +sna_render_picture_fixup(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y); + +int +sna_render_picture_convert(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + PixmapPtr pixmap, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y, + bool fixup_alpha); + +inline static void sna_render_composite_redirect_init(struct sna_composite_op *op) +{ + struct sna_composite_redirect *t = &op->redirect; + t->real_bo = NULL; + t->damage = NULL; +} + +bool +sna_render_composite_redirect(struct sna *sna, + struct sna_composite_op *op, + int x, int y, int width, int height, + bool partial); + +void +sna_render_composite_redirect_done(struct sna *sna, + const struct sna_composite_op *op); + +bool +sna_composite_mask_is_opaque(PicturePtr mask); +#endif + +void sna_vertex_init(struct sna *sna); + +static inline void sna_vertex_lock(struct sna_render *r) +{ +// pthread_mutex_lock(&r->lock); +} + +static inline void sna_vertex_acquire__locked(struct sna_render *r) +{ + r->active++; +} + +static inline void sna_vertex_unlock(struct sna_render *r) +{ +// pthread_mutex_unlock(&r->lock); +} + +static inline void sna_vertex_release__locked(struct sna_render *r) +{ + assert(r->active > 0); + --r->active; +// if (--r->active == 0) +// pthread_cond_signal(&r->wait); +} + +static inline bool sna_vertex_wait__locked(struct sna_render *r) +{ + bool was_active = r->active; +// while (r->active) +// pthread_cond_wait(&r->wait, &r->lock); + return was_active; +} + +#define alphaless(format) PICT_FORMAT(PICT_FORMAT_BPP(format), \ + PICT_FORMAT_TYPE(format), \ + 0, \ + PICT_FORMAT_R(format), \ + PICT_FORMAT_G(format), \ + PICT_FORMAT_B(format)) +static bool +gen3_blit_tex(struct sna *sna, + uint8_t op, bool scale, + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr mask,struct kgem_bo *mask_bo, + PixmapPtr dst, struct kgem_bo *dst_bo, + int32_t src_x, int32_t src_y, + int32_t msk_x, int32_t msk_y, + int32_t dst_x, int32_t dst_y, + int32_t width, int32_t height, + struct sna_composite_op *tmp); +static bool +gen4_blit_tex(struct sna *sna, + uint8_t op, bool scale, + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr mask,struct kgem_bo *mask_bo, + PixmapPtr dst, struct kgem_bo *dst_bo, + int32_t src_x, int32_t src_y, + int32_t msk_x, int32_t msk_y, + int32_t dst_x, int32_t dst_y, + int32_t width, int32_t height, + struct sna_composite_op *tmp); + +static bool +gen5_blit_tex(struct sna *sna, + uint8_t op, bool scale, + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr mask,struct kgem_bo *mask_bo, + PixmapPtr dst, struct kgem_bo *dst_bo, + int32_t src_x, int32_t src_y, + int32_t msk_x, int32_t msk_y, + int32_t dst_x, int32_t dst_y, + int32_t width, int32_t height, + struct sna_composite_op *tmp); + +static bool +gen6_blit_tex(struct sna *sna, + uint8_t op, bool scale, + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr mask,struct kgem_bo *mask_bo, + PixmapPtr dst, struct kgem_bo *dst_bo, + int32_t src_x, int32_t src_y, + int32_t msk_x, int32_t msk_y, + int32_t dst_x, int32_t dst_y, + int32_t width, int32_t height, + struct sna_composite_op *tmp); + +static bool +gen7_blit_tex(struct sna *sna, + uint8_t op, bool scale, + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr mask,struct kgem_bo *mask_bo, + PixmapPtr dst, struct kgem_bo *dst_bo, + int32_t src_x, int32_t src_y, + int32_t msk_x, int32_t msk_y, + int32_t dst_x, int32_t dst_y, + int32_t width, int32_t height, + struct sna_composite_op *tmp); + +#endif /* SNA_RENDER_H */ diff --git a/drivers/video/Intel-2D/sna_render_inline.h b/drivers/video/Intel-2D/sna_render_inline.h index c3f62a571a..0af2f5d09b 100644 --- a/drivers/video/Intel-2D/sna_render_inline.h +++ b/drivers/video/Intel-2D/sna_render_inline.h @@ -17,7 +17,7 @@ static inline bool need_redirect(struct sna *sna, PixmapPtr dst) dst->drawable.height > sna->render.max_3d_size); } -static inline float pack_2s(int16_t x, int16_t y) +static force_inline float pack_2s(int16_t x, int16_t y) { union { struct sna_coordinate p; @@ -28,35 +28,35 @@ static inline float pack_2s(int16_t x, int16_t y) return u.f; } -static inline int vertex_space(struct sna *sna) +static force_inline int vertex_space(struct sna *sna) { return sna->render.vertex_size - sna->render.vertex_used; } -static inline void vertex_emit(struct sna *sna, float v) +static force_inline void vertex_emit(struct sna *sna, float v) { assert(sna->render.vertex_used < sna->render.vertex_size); sna->render.vertices[sna->render.vertex_used++] = v; } -static inline void vertex_emit_2s(struct sna *sna, int16_t x, int16_t y) +static force_inline void vertex_emit_2s(struct sna *sna, int16_t x, int16_t y) { vertex_emit(sna, pack_2s(x, y)); } -static inline int batch_space(struct sna *sna) +static force_inline int batch_space(struct sna *sna) { assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem)); assert(sna->kgem.nbatch + KGEM_BATCH_RESERVED <= sna->kgem.surface); return sna->kgem.surface - sna->kgem.nbatch - KGEM_BATCH_RESERVED; } -static inline void batch_emit(struct sna *sna, uint32_t dword) +static force_inline void batch_emit(struct sna *sna, uint32_t dword) { assert(sna->kgem.mode != KGEM_NONE); assert(sna->kgem.nbatch + KGEM_BATCH_RESERVED < sna->kgem.surface); sna->kgem.batch[sna->kgem.nbatch++] = dword; } -static inline void batch_emit_float(struct sna *sna, float f) +static force_inline void batch_emit_float(struct sna *sna, float f) { union { uint32_t dw; @@ -67,5 +67,4 @@ static inline void batch_emit_float(struct sna *sna, float f) } - #endif /* SNA_RENDER_INLINE_H */ diff --git a/drivers/video/Intel-2D/sna_transform.c b/drivers/video/Intel-2D/sna_transform.c new file mode 100644 index 0000000000..cbc44495b6 --- /dev/null +++ b/drivers/video/Intel-2D/sna_transform.c @@ -0,0 +1,147 @@ +/* + * Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. All Rights Reserved. + * Copyright (c) 2005 Jesse Barnes + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Jesse Barns + * Chris Wilson + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" + +#define pixman_fixed_e ((pixman_fixed_t) 1) +#define pixman_fixed_1 (pixman_int_to_fixed(1)) +#define pixman_fixed_1_minus_e (pixman_fixed_1 - pixman_fixed_e) +#define pixman_fixed_to_int(f) ((int) ((f) >> 16)) +#define pixman_int_to_fixed(i) ((pixman_fixed_t) ((i) << 16)) +#define pixman_fixed_fraction(f) ((f) & pixman_fixed_1_minus_e) + +#define IntToxFixed(i) pixman_int_to_fixed(i) + +/** + * Returns whether the provided transform is affine. + * + * transform may be null. + */ +bool sna_transform_is_affine(const PictTransform *t) +{ + if (t == NULL) + return true; + + return t->matrix[2][0] == 0 && t->matrix[2][1] == 0; +} + +bool +sna_transform_is_translation(const PictTransform *t, + pixman_fixed_t *tx, + pixman_fixed_t *ty) +{ + if (t == NULL) { + *tx = *ty = 0; + return true; + } + + if (t->matrix[0][0] != IntToxFixed(1) || + t->matrix[0][1] != 0 || + t->matrix[1][0] != 0 || + t->matrix[1][1] != IntToxFixed(1) || + t->matrix[2][0] != 0 || + t->matrix[2][1] != 0 || + t->matrix[2][2] != IntToxFixed(1)) + return false; + + *tx = t->matrix[0][2]; + *ty = t->matrix[1][2]; + return true; +} + +bool +sna_transform_is_integer_translation(const PictTransform *t, int16_t *tx, int16_t *ty) +{ + if (t == NULL) { + *tx = *ty = 0; + return true; + } + + if (t->matrix[0][0] != IntToxFixed(1) || + t->matrix[0][1] != 0 || + t->matrix[1][0] != 0 || + t->matrix[1][1] != IntToxFixed(1) || + t->matrix[2][0] != 0 || + t->matrix[2][1] != 0 || + t->matrix[2][2] != IntToxFixed(1)) + return false; + + if (pixman_fixed_fraction(t->matrix[0][2]) || + pixman_fixed_fraction(t->matrix[1][2])) + return false; + + *tx = pixman_fixed_to_int(t->matrix[0][2]); + *ty = pixman_fixed_to_int(t->matrix[1][2]); + return true; +} + +/** + * Returns the floating-point coordinates transformed by the given transform. + */ +void +sna_get_transformed_coordinates(int x, int y, + const PictTransform *transform, + float *x_out, float *y_out) +{ + if (transform == NULL) { + *x_out = x; + *y_out = y; + } else + _sna_get_transformed_coordinates(x, y, transform, x_out, y_out); +} + +/** + * Returns the un-normalized floating-point coordinates transformed by the given transform. + */ +void +sna_get_transformed_coordinates_3d(int x, int y, + const PictTransform *transform, + float *x_out, float *y_out, float *w_out) +{ + if (transform == NULL) { + *x_out = x; + *y_out = y; + *w_out = 1; + } else { + int64_t result[3]; + + if (_sna_transform_point(transform, x, y, result)) { + *x_out = result[0] / 65536.; + *y_out = result[1] / 65536.; + *w_out = result[2] / 65536.; + } else { + *x_out = *y_out = 0; + *w_out = 1.; + } + } +}