From d605acf341d25a0e9b1a197cd17bff2c5d44e8e6 Mon Sep 17 00:00:00 2001 From: "Sergey Semyonov (Serge)" Date: Tue, 21 Jan 2014 14:20:59 +0000 Subject: [PATCH] intel-2d: sna-2.99.906 git-svn-id: svn://kolibrios.org@4501 a494cfbc-eb01-0410-851d-a64ba20cac60 --- contrib/sdk/sources/Intel-2D/Makefile | 10 +- contrib/sdk/sources/Intel-2D/intel_driver.h | 1 + contrib/sdk/sources/Intel-2D/intel_list.h | 2 +- contrib/sdk/sources/Intel-2D/sna/brw/brw_wm.c | 16 +- .../sdk/sources/Intel-2D/sna/gen3_render.c | 2479 +++++++++++++++-- .../sdk/sources/Intel-2D/sna/gen4_common.c | 64 + .../sdk/sources/Intel-2D/sna/gen4_common.h | 49 + .../sdk/sources/Intel-2D/sna/gen4_render.c | 101 +- .../sdk/sources/Intel-2D/sna/gen4_vertex.c | 87 +- .../sdk/sources/Intel-2D/sna/gen4_vertex.h | 1 + .../sdk/sources/Intel-2D/sna/gen5_render.c | 97 +- .../sdk/sources/Intel-2D/sna/gen6_common.c | 71 + .../sdk/sources/Intel-2D/sna/gen6_common.h | 139 + .../sdk/sources/Intel-2D/sna/gen6_render.c | 194 +- .../sdk/sources/Intel-2D/sna/gen7_render.c | 205 +- contrib/sdk/sources/Intel-2D/sna/kgem.c | 1032 ++++--- contrib/sdk/sources/Intel-2D/sna/kgem.h | 169 +- contrib/sdk/sources/Intel-2D/sna/sna.c | 28 +- contrib/sdk/sources/Intel-2D/sna/sna.h | 9 +- contrib/sdk/sources/Intel-2D/sna/sna_reg.h | 32 +- contrib/sdk/sources/Intel-2D/sna/sna_render.h | 5 +- contrib/sdk/sources/Intel-2D/uxa/uxa.c | 14 +- 22 files changed, 3686 insertions(+), 1119 deletions(-) create mode 100644 contrib/sdk/sources/Intel-2D/sna/gen4_common.c create mode 100644 contrib/sdk/sources/Intel-2D/sna/gen4_common.h create mode 100644 contrib/sdk/sources/Intel-2D/sna/gen6_common.c create mode 100644 contrib/sdk/sources/Intel-2D/sna/gen6_common.h diff --git a/contrib/sdk/sources/Intel-2D/Makefile b/contrib/sdk/sources/Intel-2D/Makefile index 9ebbd8f58e..5f5839aa79 100644 --- a/contrib/sdk/sources/Intel-2D/Makefile +++ b/contrib/sdk/sources/Intel-2D/Makefile @@ -13,6 +13,7 @@ STRIP = $(PREFIX)strip LDFLAGS:= -shared -s -nostdlib -T ../newlib/dll.lds --entry _DllStartup --image-base=0 PXFLAGS:= --version-script pixlib.ver --output-def $(LIBRARY).orig.def --out-implib $(LIBRARY).dll.a SNAFLAGS:= --version-script sna.ver --output-def sna.def +UXAFLAGS:= --version-script uxa.ver --output-def uxa.def INCLUDES= -I. -I../libdrm/intel -I../libdrm/include/drm -I./render_program -I../pixman -I../newlib/include @@ -25,7 +26,10 @@ DEFINES:= -DHAS_DEBUG_FULL=0 -DSHOW_BATCH=0 -DDEBUG_DUMP=0 SRC_PIXLIB = pixlib.c -SRC_SNA = sna/gen3_render.c \ +SRC_SNA = \ + sna/gen4_common.c \ + sna/gen6_common.c \ + sna/gen3_render.c \ sna/gen4_render.c \ sna/gen4_vertex.c \ sna/gen5_render.c \ @@ -70,7 +74,7 @@ endif # targets all:$(LIBRARY).dll intel-sna.drv -uxa:$(LIBRARY).dll +uxa:$(LIBRARY).dll intel-uxa.drv ebox:$(LIBRARY).dll @@ -86,7 +90,7 @@ intel-sna.drv: $(OBJ_SNA) Makefile mv -f $@ ../../bin intel-uxa.drv: $(OBJ_UXA) Makefile - $(LD) $(LDFLAGS) $(LIBPATH) -o $@ $(OBJ_UXA) $(LIBS) + $(LD) $(LDFLAGS) $(UXAFLAGS) $(LIBPATH) -o $@ $(OBJ_UXA) $(LIBS) $(STRIP) $@ mv -f $@ ../../bin diff --git a/contrib/sdk/sources/Intel-2D/intel_driver.h b/contrib/sdk/sources/Intel-2D/intel_driver.h index af74a9a076..b43370a01d 100644 --- a/contrib/sdk/sources/Intel-2D/intel_driver.h +++ b/contrib/sdk/sources/Intel-2D/intel_driver.h @@ -118,5 +118,6 @@ struct intel_device_info { const struct intel_device_info *intel_detect_chipset(struct pci_device *pci); +#define hosted() (0) #endif /* INTEL_DRIVER_H */ diff --git a/contrib/sdk/sources/Intel-2D/intel_list.h b/contrib/sdk/sources/Intel-2D/intel_list.h index 38e4d52fd3..42653c9f0a 100644 --- a/contrib/sdk/sources/Intel-2D/intel_list.h +++ b/contrib/sdk/sources/Intel-2D/intel_list.h @@ -261,7 +261,7 @@ static inline void list_move_tail(struct list *list, struct list *head) * @return True if the list contains one or more elements or False otherwise. */ static inline bool -list_is_empty(struct list *head) +list_is_empty(const struct list *head) { return head->next == head; } diff --git a/contrib/sdk/sources/Intel-2D/sna/brw/brw_wm.c b/contrib/sdk/sources/Intel-2D/sna/brw/brw_wm.c index 8b73abee00..ccfad0cfe0 100644 --- a/contrib/sdk/sources/Intel-2D/sna/brw/brw_wm.c +++ b/contrib/sdk/sources/Intel-2D/sna/brw/brw_wm.c @@ -521,7 +521,7 @@ static void brw_wm_projective_st(struct brw_compile *p, int dw, if (p->gen >= 060) { /* First compute 1/z */ brw_PLN(p, - brw_message_reg(msg), + brw_vec8_grf(30, 0), brw_vec1_grf(uv+1, 0), brw_vec8_grf(2, 0)); @@ -532,22 +532,22 @@ static void brw_wm_projective_st(struct brw_compile *p, int dw, brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); } else brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); + brw_PLN(p, - brw_vec8_grf(28, 0), + brw_vec8_grf(26, 0), brw_vec1_grf(uv, 0), brw_vec8_grf(2, 0)); - brw_MUL(p, - brw_message_reg(msg), - brw_vec8_grf(28, 0), - brw_vec8_grf(30, 0)); - msg += dw/8; - brw_PLN(p, brw_vec8_grf(28, 0), brw_vec1_grf(uv, 0), brw_vec8_grf(4, 0)); + brw_MUL(p, brw_message_reg(msg), + brw_vec8_grf(26, 0), + brw_vec8_grf(30, 0)); + brw_MUL(p, + brw_message_reg(msg + dw/8), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0)); } else { diff --git a/contrib/sdk/sources/Intel-2D/sna/gen3_render.c b/contrib/sdk/sources/Intel-2D/sna/gen3_render.c index bd44f09c23..fe9c160310 100644 --- a/contrib/sdk/sources/Intel-2D/sna/gen3_render.c +++ b/contrib/sdk/sources/Intel-2D/sna/gen3_render.c @@ -1459,7 +1459,7 @@ static void gen3_vertex_close(struct sna *sna) sna->render.vertices = sna->render.vertex_data; sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); free_bo = bo; - } else if (IS_CPU_MAP(bo->map)) { + } else if (sna->render.vertices == MAP(bo->map__cpu)) { DBG(("%s: converting CPU map to GTT\n", __FUNCTION__)); sna->render.vertices = kgem_bo_map__gtt(&sna->kgem, bo); if (sna->render.vertices == NULL) { @@ -1657,6 +1657,122 @@ gen3_render_composite_blt(struct sna *sna, op->prim_emit(sna, op, r); } +#if 0 +fastcall static void +gen3_render_composite_box(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box) +{ + struct sna_composite_rectangles r; + + DBG(("%s: src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n", + __FUNCTION__, + op->src.offset[0], op->src.offset[1], + op->mask.offset[0], op->mask.offset[1], + op->dst.x, op->dst.y)); + + gen3_get_rectangles(sna, op, 1); + + r.dst.x = box->x1; + r.dst.y = box->y1; + r.width = box->x2 - box->x1; + r.height = box->y2 - box->y1; + r.src = r.mask = r.dst; + + op->prim_emit(sna, op, &r); +} + +static void +gen3_render_composite_boxes__blt(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->src.offset[0], op->src.offset[1], + op->mask.offset[0], op->mask.offset[1], + op->dst.x, op->dst.y)); + + do { + int nbox_this_time; + + nbox_this_time = gen3_get_rectangles(sna, op, nbox); + nbox -= nbox_this_time; + + do { + struct sna_composite_rectangles r; + + DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1)); + + r.dst.x = box->x1; r.dst.y = box->y1; + r.width = box->x2 - box->x1; + r.height = box->y2 - box->y1; + r.src = r.mask = r.dst; + + op->prim_emit(sna, op, &r); + box++; + } while (--nbox_this_time); + } while (nbox); +} + +static void +gen3_render_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen3_get_rectangles(sna, op, nbox); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + } while (nbox); +} + +static void +gen3_render_composite_boxes__thread(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen3_get_rectangles(sna, op, nbox); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} +#endif + static void gen3_render_composite_done(struct sna *sna, const struct sna_composite_op *op) @@ -1702,8 +1818,7 @@ gen3_render_reset(struct sna *sna) state->last_floats_per_vertex = 0; state->last_vertex_offset = 0; - if (sna->render.vbo != NULL && - !kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) { + if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { DBG(("%s: discarding vbo as next access will stall: %d\n", __FUNCTION__, sna->render.vbo->presumed_offset)); discard_vbo(sna); @@ -1755,6 +1870,285 @@ static bool gen3_composite_channel_set_format(struct sna_composite_channel *chan } #if 0 +static bool source_is_covered(PicturePtr picture, + int x, int y, + int width, int height) +{ + int x1, y1, x2, y2; + + if (picture->repeat && picture->repeatType != RepeatNone) + return true; + + if (picture->pDrawable == NULL) + return false; + + if (picture->transform) { + pixman_box16_t sample; + + sample.x1 = x; + sample.y1 = y; + sample.x2 = x + width; + sample.y2 = y + height; + + pixman_transform_bounds(picture->transform, &sample); + + x1 = sample.x1; + x2 = sample.x2; + y1 = sample.y1; + y2 = sample.y2; + } else { + x1 = x; + y1 = y; + x2 = x + width; + y2 = y + height; + } + + return + x1 >= 0 && y1 >= 0 && + x2 <= picture->pDrawable->width && + y2 <= picture->pDrawable->height; +} + +static bool gen3_composite_channel_set_xformat(PicturePtr picture, + struct sna_composite_channel *channel, + int x, int y, + int width, int height) +{ + unsigned int i; + + if (PICT_FORMAT_A(picture->format) != 0) + return false; + + if (width == 0 || height == 0) + return false; + + if (!source_is_covered(picture, x, y, width, height)) + return false; + + for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) { + if (gen3_tex_formats[i].xfmt == picture->format) { + channel->card_format = gen3_tex_formats[i].card_fmt; + channel->rb_reversed = gen3_tex_formats[i].rb_reversed; + channel->alpha_fixup = true; + return true; + } + } + + return false; +} + +static int +gen3_init_solid(struct sna_composite_channel *channel, uint32_t color) +{ + channel->u.gen3.mode = color; + channel->u.gen3.type = SHADER_CONSTANT; + if (color == 0) + channel->u.gen3.type = SHADER_ZERO; + else if (color == 0xff000000) + channel->u.gen3.type = SHADER_BLACK; + else if (color == 0xffffffff) + channel->u.gen3.type = SHADER_WHITE; + + channel->bo = NULL; + channel->is_opaque = (color >> 24) == 0xff; + channel->is_affine = 1; + channel->alpha_fixup = 0; + channel->rb_reversed = 0; + + DBG(("%s: color=%08x, is_opaque=%d, type=%d\n", + __FUNCTION__, color, channel->is_opaque, channel->u.gen3.type)); + + /* for consistency */ + channel->repeat = RepeatNormal; + channel->filter = PictFilterNearest; + channel->pict_format = PICT_a8r8g8b8; + channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888; + + return 1; +} + +static void gen3_composite_channel_convert(struct sna_composite_channel *channel) +{ + if (channel->u.gen3.type == SHADER_TEXTURE) + channel->repeat = gen3_texture_repeat(channel->repeat); + else + channel->repeat = gen3_gradient_repeat(channel->repeat); + + channel->filter = gen3_filter(channel->filter); + if (channel->card_format == 0) + gen3_composite_channel_set_format(channel, channel->pict_format); + assert(channel->card_format); +} + +static bool gen3_gradient_setup(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int16_t ox, int16_t oy) +{ + int16_t dx, dy; + + if (picture->repeat == 0) { + channel->repeat = RepeatNone; + } else switch (picture->repeatType) { + case RepeatNone: + case RepeatNormal: + case RepeatPad: + case RepeatReflect: + channel->repeat = picture->repeatType; + break; + default: + return false; + } + + channel->bo = + sna_render_get_gradient(sna, + (PictGradient *)picture->pSourcePict); + if (channel->bo == NULL) + return false; + + channel->pict_format = PICT_a8r8g8b8; + channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888; + channel->filter = PictFilterNearest; + channel->is_affine = sna_transform_is_affine(picture->transform); + if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) { + DBG(("%s: integer translation (%d, %d), removing\n", + __FUNCTION__, dx, dy)); + ox += dx; + oy += dy; + channel->transform = NULL; + } else + channel->transform = picture->transform; + channel->width = channel->bo->pitch / 4; + channel->height = 1; + channel->offset[0] = ox; + channel->offset[1] = oy; + channel->scale[0] = channel->scale[1] = 1; + return true; +} + +static int +gen3_init_linear(struct sna *sna, + PicturePtr picture, + struct sna_composite_op *op, + struct sna_composite_channel *channel, + int ox, int oy) +{ + PictLinearGradient *linear = + (PictLinearGradient *)picture->pSourcePict; + float x0, y0, sf; + float dx, dy, offset; + int n; + + DBG(("%s: p1=(%f, %f), p2=(%f, %f)\n", + __FUNCTION__, + xFixedToDouble(linear->p1.x), xFixedToDouble(linear->p1.y), + xFixedToDouble(linear->p2.x), xFixedToDouble(linear->p2.y))); + + if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y) + return 0; + + dx = xFixedToDouble(linear->p2.x - linear->p1.x); + dy = xFixedToDouble(linear->p2.y - linear->p1.y); + sf = dx*dx + dy*dy; + dx /= sf; + dy /= sf; + + x0 = xFixedToDouble(linear->p1.x); + y0 = xFixedToDouble(linear->p1.y); + offset = dx*x0 + dy*y0; + + n = op->u.gen3.num_constants; + channel->u.gen3.constants = FS_C0 + n / 4; + op->u.gen3.constants[n++] = dx; + op->u.gen3.constants[n++] = dy; + op->u.gen3.constants[n++] = -offset; + op->u.gen3.constants[n++] = 0; + + if (!gen3_gradient_setup(sna, picture, channel, ox, oy)) + return -1; + + channel->u.gen3.type = SHADER_LINEAR; + op->u.gen3.num_constants = n; + + DBG(("%s: dx=%f, dy=%f, offset=%f, constants=%d\n", + __FUNCTION__, dx, dy, -offset, channel->u.gen3.constants - FS_C0)); + return 1; +} + +static int +gen3_init_radial(struct sna *sna, + PicturePtr picture, + struct sna_composite_op *op, + struct sna_composite_channel *channel, + int ox, int oy) +{ + PictRadialGradient *radial = (PictRadialGradient *)picture->pSourcePict; + double dx, dy, dr, r1; + int n; + + dx = xFixedToDouble(radial->c2.x - radial->c1.x); + dy = xFixedToDouble(radial->c2.y - radial->c1.y); + dr = xFixedToDouble(radial->c2.radius - radial->c1.radius); + + r1 = xFixedToDouble(radial->c1.radius); + + n = op->u.gen3.num_constants; + channel->u.gen3.constants = FS_C0 + n / 4; + if (radial->c2.x == radial->c1.x && radial->c2.y == radial->c1.y) { + if (radial->c2.radius == radial->c1.radius) { + channel->u.gen3.type = SHADER_ZERO; + return 1; + } + + op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.x) / dr; + op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.y) / dr; + op->u.gen3.constants[n++] = 1. / dr; + op->u.gen3.constants[n++] = -r1 / dr; + + channel->u.gen3.mode = RADIAL_ONE; + } else { + op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.x); + op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.y); + op->u.gen3.constants[n++] = r1; + op->u.gen3.constants[n++] = -4 * (dx*dx + dy*dy - dr*dr); + + op->u.gen3.constants[n++] = -2 * dx; + op->u.gen3.constants[n++] = -2 * dy; + op->u.gen3.constants[n++] = -2 * r1 * dr; + op->u.gen3.constants[n++] = 1 / (2 * (dx*dx + dy*dy - dr*dr)); + + channel->u.gen3.mode = RADIAL_TWO; + } + + if (!gen3_gradient_setup(sna, picture, channel, ox, oy)) + return -1; + + channel->u.gen3.type = SHADER_RADIAL; + op->u.gen3.num_constants = n; + return 1; +} + +static bool +sna_picture_is_clear(PicturePtr picture, + int x, int y, int w, int h, + uint32_t *color) +{ + struct sna_pixmap *priv; + + if (!picture->pDrawable) + return false; + + priv = sna_pixmap(get_drawable_pixmap(picture->pDrawable)); + if (priv == NULL || !priv->clear) + return false; + + if (!source_is_covered(picture, x, y, w, h)) + return false; + + *color = priv->clear_color; + return true; +} + static int gen3_composite_picture(struct sna *sna, PicturePtr picture, @@ -1874,72 +2268,35 @@ gen3_composite_picture(struct sna *sna, return sna_render_pixmap_bo(sna, channel, pixmap, x, y, w, h, dst_x, dst_y); } - -static inline bool -source_use_blt(struct sna *sna, PicturePtr picture) -{ - /* If it is a solid, try to use the BLT paths */ - if (!picture->pDrawable) - return picture->pSourcePict->type == SourcePictTypeSolidFill; - - if (picture->pDrawable->width == 1 && - picture->pDrawable->height == 1 && - picture->repeat) - return true; - - if (too_large(picture->pDrawable->width, picture->pDrawable->height)) - return true; - - return !is_gpu(sna, picture->pDrawable, PREFER_GPU_RENDER); -} - -static bool -try_blt(struct sna *sna, - PicturePtr dst, - PicturePtr src, - int width, int height) -{ - if (sna->kgem.mode != KGEM_RENDER) { - DBG(("%s: already performing BLT\n", __FUNCTION__)); - return true; - } - - if (too_large(width, height)) { - DBG(("%s: operation too large for 3D pipe (%d, %d)\n", - __FUNCTION__, width, height)); - return true; - } - - if (too_large(dst->pDrawable->width, dst->pDrawable->height)) { - DBG(("%s: target too large for 3D pipe (%d, %d)\n", - __FUNCTION__, - dst->pDrawable->width, dst->pDrawable->height)); - return true; - } - - /* is the source picture only in cpu memory e.g. a shm pixmap? */ - return source_use_blt(sna, src); -} #endif static void gen3_align_vertex(struct sna *sna, const struct sna_composite_op *op) { - if (op->floats_per_vertex != sna->render_state.gen3.last_floats_per_vertex) { - if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect) - gen3_vertex_finish(sna); + int vertex_index; - DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n", - sna->render_state.gen3.last_floats_per_vertex, - op->floats_per_vertex, - sna->render.vertex_index, - (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex)); - sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; - sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex; - assert(sna->render.vertex_used < sna->render.vertex_size - op->floats_per_rect); - sna->render_state.gen3.last_floats_per_vertex = op->floats_per_vertex; + if (op->floats_per_vertex == sna->render_state.gen3.last_floats_per_vertex) + return; + + DBG(("aligning vertex: was %d, now %d floats per vertex\n", + sna->render_state.gen3.last_floats_per_vertex, + op->floats_per_vertex)); + + assert(op->floats_per_rect == 3*op->floats_per_vertex); + + vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; + if ((int)sna->render.vertex_size - vertex_index * op->floats_per_vertex < 2*op->floats_per_rect) { + DBG(("%s: flushing vertex buffer: new index=%d, max=%d\n", + __FUNCTION__, vertex_index, sna->render.vertex_size / op->floats_per_vertex)); + if (gen3_vertex_finish(sna) < op->floats_per_vertex) + kgem_submit(&sna->kgem); + + vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; } + + sna->render.vertex_index = vertex_index; + sna->render.vertex_used = vertex_index * op->floats_per_vertex; } static inline bool is_constant_ps(uint32_t type) @@ -2059,6 +2416,58 @@ gen3_composite_fallback(struct sna *sna, return dst_use_cpu(dst_pixmap); } +static int +reuse_source(struct sna *sna, + PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y, + PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y) +{ + if (src_x != msk_x || src_y != msk_y) + return false; + + if (mask == src) { + *mc = *sc; + if (mc->bo) + kgem_bo_reference(mc->bo); + return true; + } + + if ((src->pDrawable == NULL || mask->pDrawable != src->pDrawable)) + return false; + + if (sc->is_solid) + return false; + + DBG(("%s: mask reuses source drawable\n", __FUNCTION__)); + + if (!sna_transform_equal(src->transform, mask->transform)) + return false; + + if (!sna_picture_alphamap_equal(src, mask)) + return false; + + if (!gen3_check_repeat(mask)) + return false; + + if (!gen3_check_filter(mask)) + return false; + + if (!gen3_check_format(mask)) + return false; + + DBG(("%s: reusing source channel for mask with a twist\n", + __FUNCTION__)); + + *mc = *sc; + mc->repeat = gen3_texture_repeat(mask->repeat ? mask->repeatType : RepeatNone); + mc->filter = gen3_filter(mask->filter); + mc->pict_format = mask->format; + gen3_composite_channel_set_format(mc, mask->format); + assert(mc->card_format); + if (mc->bo) + kgem_bo_reference(mc->bo); + return true; +} + static bool gen3_render_composite(struct sna *sna, uint8_t op, @@ -2083,7 +2492,6 @@ gen3_render_composite(struct sna *sna, * 3D -> 2D context switch. */ if (mask == NULL && - try_blt(sna, dst, src, width, height) && sna_blt_composite(sna, op, src, dst, src_x, src_y, @@ -2093,7 +2501,7 @@ gen3_render_composite(struct sna *sna, return true; if (gen3_composite_fallback(sna, op, src, mask, dst)) - return false; + goto fallback; if (need_tiling(sna, width, height)) return sna_tiling_composite(op, src, mask, dst, @@ -2117,7 +2525,7 @@ gen3_render_composite(struct sna *sna, if (!sna_render_composite_redirect(sna, tmp, dst_x, dst_y, width, height, op > PictOpSrc || dst->pCompositeClip->data)) - return false; + goto fallback; } tmp->u.gen3.num_constants = 0; @@ -2406,8 +2814,8 @@ gen3_render_composite(struct sna *sna, goto cleanup_mask; } - gen3_emit_composite_state(sna, tmp); gen3_align_vertex(sna, tmp); + gen3_emit_composite_state(sna, tmp); return true; cleanup_mask: @@ -2419,8 +2827,1719 @@ cleanup_src: cleanup_dst: if (tmp->redirect.real_bo) kgem_bo_destroy(&sna->kgem, tmp->dst.bo); +fallback: + return (mask == NULL && + sna_blt_composite(sna, + op, src, dst, + src_x, src_y, + dst_x, dst_y, + width, height, + tmp, true)); +} + +static void +gen3_emit_composite_spans_vertex(struct sna *sna, + const struct sna_composite_spans_op *op, + int16_t x, int16_t y, + float opacity) +{ + gen3_emit_composite_dstcoord(sna, x + op->base.dst.x, y + op->base.dst.y); + gen3_emit_composite_texcoord(sna, &op->base.src, x, y); + OUT_VERTEX(opacity); +} + +fastcall static void +gen3_emit_composite_spans_primitive_zero(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + float *v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 6; + + v[0] = op->base.dst.x + box->x2; + v[1] = op->base.dst.y + box->y2; + + v[2] = op->base.dst.x + box->x1; + v[3] = v[1]; + + v[4] = v[2]; + v[5] = op->base.dst.x + box->y1; +} + +fastcall static void +gen3_emit_composite_spans_primitive_zero__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, float *v) +{ + do { + v[0] = op->base.dst.x + b->box.x2; + v[1] = op->base.dst.y + b->box.y2; + + v[2] = op->base.dst.x + b->box.x1; + v[3] = v[1]; + + v[4] = v[2]; + v[5] = op->base.dst.x + b->box.y1; + + v += 6; + b++; + } while (--nbox); +} + +fastcall static void +gen3_emit_composite_spans_primitive_zero_no_offset(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + float *v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 6; + + v[0] = box->x2; + v[3] = v[1] = box->y2; + v[4] = v[2] = box->x1; + v[5] = box->y1; +} + +fastcall static void +gen3_emit_composite_spans_primitive_zero_no_offset__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, float *v) +{ + do { + v[0] = b->box.x2; + v[3] = v[1] = b->box.y2; + v[4] = v[2] = b->box.x1; + v[5] = b->box.y1; + + b++; + v += 6; + } while (--nbox); +} + +fastcall static void +gen3_emit_composite_spans_primitive_constant(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + float *v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 9; + + v[0] = op->base.dst.x + box->x2; + v[6] = v[3] = op->base.dst.x + box->x1; + v[4] = v[1] = op->base.dst.y + box->y2; + v[7] = op->base.dst.y + box->y1; + v[8] = v[5] = v[2] = opacity; +} + +fastcall static void +gen3_emit_composite_spans_primitive_constant__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, + float *v) +{ + do { + v[0] = op->base.dst.x + b->box.x2; + v[6] = v[3] = op->base.dst.x + b->box.x1; + v[4] = v[1] = op->base.dst.y + b->box.y2; + v[7] = op->base.dst.y + b->box.y1; + v[8] = v[5] = v[2] = b->alpha; + + v += 9; + b++; + } while (--nbox); +} + +fastcall static void +gen3_emit_composite_spans_primitive_constant_no_offset(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + float *v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 9; + + v[0] = box->x2; + v[6] = v[3] = box->x1; + v[4] = v[1] = box->y2; + v[7] = box->y1; + v[8] = v[5] = v[2] = opacity; +} + +fastcall static void +gen3_emit_composite_spans_primitive_constant_no_offset__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, float *v) +{ + do { + v[0] = b->box.x2; + v[6] = v[3] = b->box.x1; + v[4] = v[1] = b->box.y2; + v[7] = b->box.y1; + v[8] = v[5] = v[2] = b->alpha; + + v += 9; + b++; + } while (--nbox); +} + +fastcall static void +gen3_emit_composite_spans_primitive_identity_source(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + float *v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 15; + + v[0] = op->base.dst.x + box->x2; + v[1] = op->base.dst.y + box->y2; + v[2] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0]; + v[3] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1]; + v[4] = opacity; + + v[5] = op->base.dst.x + box->x1; + v[6] = v[1]; + v[7] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0]; + v[8] = v[3]; + v[9] = opacity; + + v[10] = v[5]; + v[11] = op->base.dst.y + box->y1; + v[12] = v[7]; + v[13] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1]; + v[14] = opacity; +} + +fastcall static void +gen3_emit_composite_spans_primitive_identity_source__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, + float *v) +{ + do { + v[0] = op->base.dst.x + b->box.x2; + v[1] = op->base.dst.y + b->box.y2; + v[2] = (op->base.src.offset[0] + b->box.x2) * op->base.src.scale[0]; + v[3] = (op->base.src.offset[1] + b->box.y2) * op->base.src.scale[1]; + v[4] = b->alpha; + + v[5] = op->base.dst.x + b->box.x1; + v[6] = v[1]; + v[7] = (op->base.src.offset[0] + b->box.x1) * op->base.src.scale[0]; + v[8] = v[3]; + v[9] = b->alpha; + + v[10] = v[5]; + v[11] = op->base.dst.y + b->box.y1; + v[12] = v[7]; + v[13] = (op->base.src.offset[1] + b->box.y1) * op->base.src.scale[1]; + v[14] = b->alpha; + + v += 15; + b++; + } while (--nbox); +} + +fastcall static void +gen3_emit_composite_spans_primitive_affine_source(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + PictTransform *transform = op->base.src.transform; + float *v; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 15; + + v[0] = op->base.dst.x + box->x2; + v[6] = v[1] = op->base.dst.y + box->y2; + v[10] = v[5] = op->base.dst.x + box->x1; + v[11] = op->base.dst.y + box->y1; + v[14] = v[9] = v[4] = opacity; + + _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2, + (int)op->base.src.offset[1] + box->y2, + transform, op->base.src.scale, + &v[2], &v[3]); + + _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, + (int)op->base.src.offset[1] + box->y2, + transform, op->base.src.scale, + &v[7], &v[8]); + + _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, + (int)op->base.src.offset[1] + box->y1, + transform, op->base.src.scale, + &v[12], &v[13]); +} + +fastcall static void +gen3_emit_composite_spans_primitive_affine_source__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, + float *v) +{ + PictTransform *transform = op->base.src.transform; + + do { + v[0] = op->base.dst.x + b->box.x2; + v[6] = v[1] = op->base.dst.y + b->box.y2; + v[10] = v[5] = op->base.dst.x + b->box.x1; + v[11] = op->base.dst.y + b->box.y1; + v[14] = v[9] = v[4] = b->alpha; + + _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x2, + (int)op->base.src.offset[1] + b->box.y2, + transform, op->base.src.scale, + &v[2], &v[3]); + + _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1, + (int)op->base.src.offset[1] + b->box.y2, + transform, op->base.src.scale, + &v[7], &v[8]); + + _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1, + (int)op->base.src.offset[1] + b->box.y1, + transform, op->base.src.scale, + &v[12], &v[13]); + v += 15; + b++; + } while (--nbox); +} + +fastcall static void +gen3_emit_composite_spans_primitive_identity_gradient(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + float *v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 15; + + v[0] = op->base.dst.x + box->x2; + v[1] = op->base.dst.y + box->y2; + v[2] = op->base.src.offset[0] + box->x2; + v[3] = op->base.src.offset[1] + box->y2; + v[4] = opacity; + + v[5] = op->base.dst.x + box->x1; + v[6] = v[1]; + v[7] = op->base.src.offset[0] + box->x1; + v[8] = v[3]; + v[9] = opacity; + + v[10] = v[5]; + v[11] = op->base.dst.y + box->y1; + v[12] = v[7]; + v[13] = op->base.src.offset[1] + box->y1; + v[14] = opacity; +} + +fastcall static void +gen3_emit_composite_spans_primitive_identity_gradient__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, + float *v) +{ + do { + v[0] = op->base.dst.x + b->box.x2; + v[1] = op->base.dst.y + b->box.y2; + v[2] = op->base.src.offset[0] + b->box.x2; + v[3] = op->base.src.offset[1] + b->box.y2; + v[4] = b->alpha; + + v[5] = op->base.dst.x + b->box.x1; + v[6] = v[1]; + v[7] = op->base.src.offset[0] + b->box.x1; + v[8] = v[3]; + v[9] = b->alpha; + + v[10] = v[5]; + v[11] = op->base.dst.y + b->box.y1; + v[12] = v[7]; + v[13] = op->base.src.offset[1] + b->box.y1; + v[14] = b->alpha; + + v += 15; + b++; + } while (--nbox); +} + +#if defined(sse2) && !defined(__x86_64__) +sse2 fastcall static void +gen3_emit_composite_spans_primitive_constant__sse2(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + float *v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 9; + + v[0] = op->base.dst.x + box->x2; + v[6] = v[3] = op->base.dst.x + box->x1; + v[4] = v[1] = op->base.dst.y + box->y2; + v[7] = op->base.dst.y + box->y1; + v[8] = v[5] = v[2] = opacity; +} + +sse2 fastcall static void +gen3_emit_composite_spans_primitive_constant__sse2__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, + float *v) +{ + do { + v[0] = op->base.dst.x + b->box.x2; + v[6] = v[3] = op->base.dst.x + b->box.x1; + v[4] = v[1] = op->base.dst.y + b->box.y2; + v[7] = op->base.dst.y + b->box.y1; + v[8] = v[5] = v[2] = b->alpha; + + v += 9; + b++; + } while (--nbox); +} + +sse2 fastcall static void +gen3_render_composite_spans_constant_box__sse2(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, float opacity) +{ + float *v; + DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", + __FUNCTION__, + op->base.src.offset[0], op->base.src.offset[1], + opacity, + op->base.dst.x, op->base.dst.y, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1)); + + gen3_get_rectangles(sna, &op->base, 1); + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 9; + + v[0] = box->x2; + v[6] = v[3] = box->x1; + v[4] = v[1] = box->y2; + v[7] = box->y1; + v[8] = v[5] = v[2] = opacity; +} + +sse2 fastcall static void +gen3_render_composite_spans_constant_thread__sse2__boxes(struct sna *sna, + const struct sna_composite_spans_op *op, + const struct sna_opacity_box *box, + int nbox) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->base.src.offset[0], op->base.src.offset[1], + op->base.dst.x, op->base.dst.y)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * 9; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + do { + v[0] = box->box.x2; + v[6] = v[3] = box->box.x1; + v[4] = v[1] = box->box.y2; + v[7] = box->box.y1; + v[8] = v[5] = v[2] = box->alpha; + v += 9; + box++; + } while (--nbox_this_time); + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + +sse2 fastcall static void +gen3_emit_composite_spans_primitive_constant__sse2__no_offset(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + float *v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 9; + + v[0] = box->x2; + v[6] = v[3] = box->x1; + v[4] = v[1] = box->y2; + v[7] = box->y1; + v[8] = v[5] = v[2] = opacity; +} + +sse2 fastcall static void +gen3_emit_composite_spans_primitive_constant__sse2__no_offset__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, float *v) +{ + do { + v[0] = b->box.x2; + v[6] = v[3] = b->box.x1; + v[4] = v[1] = b->box.y2; + v[7] = b->box.y1; + v[8] = v[5] = v[2] = b->alpha; + + v += 9; + b++; + } while (--nbox); +} + +sse2 fastcall static void +gen3_emit_composite_spans_primitive_identity_source__sse2(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + float *v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 15; + + v[0] = op->base.dst.x + box->x2; + v[1] = op->base.dst.y + box->y2; + v[2] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0]; + v[3] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1]; + v[4] = opacity; + + v[5] = op->base.dst.x + box->x1; + v[6] = v[1]; + v[7] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0]; + v[8] = v[3]; + v[9] = opacity; + + v[10] = v[5]; + v[11] = op->base.dst.y + box->y1; + v[12] = v[7]; + v[13] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1]; + v[14] = opacity; +} + +sse2 fastcall static void +gen3_emit_composite_spans_primitive_identity_source__sse2__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, + float *v) +{ + do { + v[0] = op->base.dst.x + b->box.x2; + v[1] = op->base.dst.y + b->box.y2; + v[2] = (op->base.src.offset[0] + b->box.x2) * op->base.src.scale[0]; + v[3] = (op->base.src.offset[1] + b->box.y2) * op->base.src.scale[1]; + v[4] = b->alpha; + + v[5] = op->base.dst.x + b->box.x1; + v[6] = v[1]; + v[7] = (op->base.src.offset[0] + b->box.x1) * op->base.src.scale[0]; + v[8] = v[3]; + v[9] = b->alpha; + + v[10] = v[5]; + v[11] = op->base.dst.y + b->box.y1; + v[12] = v[7]; + v[13] = (op->base.src.offset[1] + b->box.y1) * op->base.src.scale[1]; + v[14] = b->alpha; + + v += 15; + b++; + } while (--nbox); +} +sse2 fastcall static void +gen3_emit_composite_spans_primitive_affine_source__sse2(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + PictTransform *transform = op->base.src.transform; + float *v; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 15; + + v[0] = op->base.dst.x + box->x2; + v[6] = v[1] = op->base.dst.y + box->y2; + v[10] = v[5] = op->base.dst.x + box->x1; + v[11] = op->base.dst.y + box->y1; + v[14] = v[9] = v[4] = opacity; + + _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2, + (int)op->base.src.offset[1] + box->y2, + transform, op->base.src.scale, + &v[2], &v[3]); + + _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, + (int)op->base.src.offset[1] + box->y2, + transform, op->base.src.scale, + &v[7], &v[8]); + + _sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1, + (int)op->base.src.offset[1] + box->y1, + transform, op->base.src.scale, + &v[12], &v[13]); +} + +sse2 fastcall static void +gen3_emit_composite_spans_primitive_affine_source__sse2__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, + float *v) +{ + PictTransform *transform = op->base.src.transform; + + do { + v[0] = op->base.dst.x + b->box.x2; + v[6] = v[1] = op->base.dst.y + b->box.y2; + v[10] = v[5] = op->base.dst.x + b->box.x1; + v[11] = op->base.dst.y + b->box.y1; + v[14] = v[9] = v[4] = b->alpha; + + _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x2, + (int)op->base.src.offset[1] + b->box.y2, + transform, op->base.src.scale, + &v[2], &v[3]); + + _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1, + (int)op->base.src.offset[1] + b->box.y2, + transform, op->base.src.scale, + &v[7], &v[8]); + + _sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1, + (int)op->base.src.offset[1] + b->box.y1, + transform, op->base.src.scale, + &v[12], &v[13]); + v += 15; + b++; + } while (--nbox); +} + +sse2 fastcall static void +gen3_emit_composite_spans_primitive_identity_gradient__sse2(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + float *v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 15; + + v[0] = op->base.dst.x + box->x2; + v[1] = op->base.dst.y + box->y2; + v[2] = op->base.src.offset[0] + box->x2; + v[3] = op->base.src.offset[1] + box->y2; + v[4] = opacity; + + v[5] = op->base.dst.x + box->x1; + v[6] = v[1]; + v[7] = op->base.src.offset[0] + box->x1; + v[8] = v[3]; + v[9] = opacity; + + v[10] = v[5]; + v[11] = op->base.dst.y + box->y1; + v[12] = v[7]; + v[13] = op->base.src.offset[1] + box->y1; + v[14] = opacity; +} + +sse2 fastcall static void +gen3_emit_composite_spans_primitive_identity_gradient__sse2__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, + float *v) +{ + do { + v[0] = op->base.dst.x + b->box.x2; + v[1] = op->base.dst.y + b->box.y2; + v[2] = op->base.src.offset[0] + b->box.x2; + v[3] = op->base.src.offset[1] + b->box.y2; + v[4] = b->alpha; + + v[5] = op->base.dst.x + b->box.x1; + v[6] = v[1]; + v[7] = op->base.src.offset[0] + b->box.x1; + v[8] = v[3]; + v[9] = b->alpha; + + v[10] = v[5]; + v[11] = op->base.dst.y + b->box.y1; + v[12] = v[7]; + v[13] = op->base.src.offset[1] + b->box.y1; + v[14] = b->alpha; + + v += 15; + b++; + } while (--nbox); +} + +sse2 fastcall static void +gen3_emit_composite_spans_primitive_affine_gradient__sse2(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + PictTransform *transform = op->base.src.transform; + float *v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 15; + + v[0] = op->base.dst.x + box->x2; + v[1] = op->base.dst.y + box->y2; + _sna_get_transformed_scaled(op->base.src.offset[0] + box->x2, + op->base.src.offset[1] + box->y2, + transform, op->base.src.scale, + &v[2], &v[3]); + v[4] = opacity; + + v[5] = op->base.dst.x + box->x1; + v[6] = v[1]; + _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, + op->base.src.offset[1] + box->y2, + transform, op->base.src.scale, + &v[7], &v[8]); + v[9] = opacity; + + v[10] = v[5]; + v[11] = op->base.dst.y + box->y1; + _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, + op->base.src.offset[1] + box->y1, + transform, op->base.src.scale, + &v[12], &v[13]); + v[14] = opacity; +} + +sse2 fastcall static void +gen3_emit_composite_spans_primitive_affine_gradient__sse2__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, + float *v) +{ + PictTransform *transform = op->base.src.transform; + + do { + v[0] = op->base.dst.x + b->box.x2; + v[1] = op->base.dst.y + b->box.y2; + _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2, + op->base.src.offset[1] + b->box.y2, + transform, op->base.src.scale, + &v[2], &v[3]); + v[4] = b->alpha; + + v[5] = op->base.dst.x + b->box.x1; + v[6] = v[1]; + _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, + op->base.src.offset[1] + b->box.y2, + transform, op->base.src.scale, + &v[7], &v[8]); + v[9] = b->alpha; + + v[10] = v[5]; + v[11] = op->base.dst.y + b->box.y1; + _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, + op->base.src.offset[1] + b->box.y1, + transform, op->base.src.scale, + &v[12], &v[13]); + v[14] = b->alpha; + v += 15; + b++; + } while (--nbox); +} +#endif + +fastcall static void +gen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + PictTransform *transform = op->base.src.transform; + float *v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 15; + + v[0] = op->base.dst.x + box->x2; + v[1] = op->base.dst.y + box->y2; + _sna_get_transformed_scaled(op->base.src.offset[0] + box->x2, + op->base.src.offset[1] + box->y2, + transform, op->base.src.scale, + &v[2], &v[3]); + v[4] = opacity; + + v[5] = op->base.dst.x + box->x1; + v[6] = v[1]; + _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, + op->base.src.offset[1] + box->y2, + transform, op->base.src.scale, + &v[7], &v[8]); + v[9] = opacity; + + v[10] = v[5]; + v[11] = op->base.dst.y + box->y1; + _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, + op->base.src.offset[1] + box->y1, + transform, op->base.src.scale, + &v[12], &v[13]); + v[14] = opacity; +} + +fastcall static void +gen3_emit_composite_spans_primitive_affine_gradient__boxes(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, + int nbox, + float *v) +{ + PictTransform *transform = op->base.src.transform; + + do { + v[0] = op->base.dst.x + b->box.x2; + v[1] = op->base.dst.y + b->box.y2; + _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2, + op->base.src.offset[1] + b->box.y2, + transform, op->base.src.scale, + &v[2], &v[3]); + v[4] = b->alpha; + + v[5] = op->base.dst.x + b->box.x1; + v[6] = v[1]; + _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, + op->base.src.offset[1] + b->box.y2, + transform, op->base.src.scale, + &v[7], &v[8]); + v[9] = b->alpha; + + v[10] = v[5]; + v[11] = op->base.dst.y + b->box.y1; + _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, + op->base.src.offset[1] + b->box.y1, + transform, op->base.src.scale, + &v[12], &v[13]); + v[14] = b->alpha; + v += 15; + b++; + } while (--nbox); +} + +fastcall static void +gen3_emit_composite_spans_primitive(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + gen3_emit_composite_spans_vertex(sna, op, + box->x2, box->y2, + opacity); + gen3_emit_composite_spans_vertex(sna, op, + box->x1, box->y2, + opacity); + gen3_emit_composite_spans_vertex(sna, op, + box->x1, box->y1, + opacity); +} + +fastcall static void +gen3_render_composite_spans_constant_box(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, float opacity) +{ + float *v; + DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", + __FUNCTION__, + op->base.src.offset[0], op->base.src.offset[1], + opacity, + op->base.dst.x, op->base.dst.y, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1)); + + gen3_get_rectangles(sna, &op->base, 1); + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 9; + + v[0] = box->x2; + v[6] = v[3] = box->x1; + v[4] = v[1] = box->y2; + v[7] = box->y1; + v[8] = v[5] = v[2] = opacity; +} + +fastcall static void +gen3_render_composite_spans_constant_thread_boxes(struct sna *sna, + const struct sna_composite_spans_op *op, + const struct sna_opacity_box *box, + int nbox) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->base.src.offset[0], op->base.src.offset[1], + op->base.dst.x, op->base.dst.y)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * 9; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + do { + v[0] = box->box.x2; + v[6] = v[3] = box->box.x1; + v[4] = v[1] = box->box.y2; + v[7] = box->box.y1; + v[8] = v[5] = v[2] = box->alpha; + v += 9; + box++; + } while (--nbox_this_time); + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + +fastcall static void +gen3_render_composite_spans_box(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, float opacity) +{ + DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n", + __FUNCTION__, + op->base.src.offset[0], op->base.src.offset[1], + opacity, + op->base.dst.x, op->base.dst.y, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1)); + + gen3_get_rectangles(sna, &op->base, 1); + op->prim_emit(sna, op, box, opacity); +} + +static void +gen3_render_composite_spans_boxes(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, int nbox, + float opacity) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->base.src.offset[0], op->base.src.offset[1], + opacity, + op->base.dst.x, op->base.dst.y)); + + do { + int nbox_this_time; + + nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox); + nbox -= nbox_this_time; + + do { + DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1)); + + op->prim_emit(sna, op, box++, opacity); + } while (--nbox_this_time); + } while (nbox); +} + +fastcall static void +gen3_render_composite_spans_boxes__thread(struct sna *sna, + const struct sna_composite_spans_op *op, + const struct sna_opacity_box *box, + int nbox) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->base.src.offset[0], op->base.src.offset[1], + op->base.dst.x, op->base.dst.y)); + + sna_vertex_lock(&sna->render); + do { + int nbox_this_time; + float *v; + + nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox); + assert(nbox_this_time); + nbox -= nbox_this_time; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect; + + sna_vertex_acquire__locked(&sna->render); + sna_vertex_unlock(&sna->render); + + op->emit_boxes(op, box, nbox_this_time, v); + box += nbox_this_time; + + sna_vertex_lock(&sna->render); + sna_vertex_release__locked(&sna->render); + } while (nbox); + sna_vertex_unlock(&sna->render); +} + +fastcall static void +gen3_render_composite_spans_done(struct sna *sna, + const struct sna_composite_spans_op *op) +{ + if (sna->render.vertex_offset) + gen3_vertex_flush(sna); + + DBG(("%s()\n", __FUNCTION__)); + + if (op->base.src.bo) + kgem_bo_destroy(&sna->kgem, op->base.src.bo); + + sna_render_composite_redirect_done(sna, &op->base); +} + +static bool +gen3_check_composite_spans(struct sna *sna, + uint8_t op, PicturePtr src, PicturePtr dst, + int16_t width, int16_t height, unsigned flags) +{ + if (op >= ARRAY_SIZE(gen3_blend_op)) + return false; + + if (gen3_composite_fallback(sna, op, src, NULL, dst)) + return false; + + if (need_tiling(sna, width, height) && + !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) { + DBG(("%s: fallback, tiled operation not on GPU\n", + __FUNCTION__)); + return false; + } + + return true; +} + +static bool +gen3_render_composite_spans(struct sna *sna, + uint8_t op, + PicturePtr src, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + unsigned flags, + struct sna_composite_spans_op *tmp) +{ + bool no_offset; + + DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__, + src_x, src_y, dst_x, dst_y, width, height)); + + assert(gen3_check_composite_spans(sna, op, src, dst, width, height, flags)); + + if (need_tiling(sna, width, height)) { + DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n", + __FUNCTION__, width, height)); + return sna_tiling_composite_spans(op, src, dst, + src_x, src_y, dst_x, dst_y, + width, height, flags, tmp); + } + + if (!gen3_composite_set_target(sna, &tmp->base, dst, + dst_x, dst_y, width, height)) { + DBG(("%s: unable to set render target\n", + __FUNCTION__)); + return false; + } + + tmp->base.op = op; + tmp->base.rb_reversed = gen3_dst_rb_reversed(tmp->base.dst.format); + if (too_large(tmp->base.dst.width, tmp->base.dst.height) || + !gen3_check_pitch_3d(tmp->base.dst.bo)) { + if (!sna_render_composite_redirect(sna, &tmp->base, + dst_x, dst_y, width, height, + true)) + return false; + } + + tmp->base.src.u.gen3.type = SHADER_TEXTURE; + tmp->base.src.is_affine = true; + DBG(("%s: preparing source\n", __FUNCTION__)); + switch (gen3_composite_picture(sna, src, &tmp->base, &tmp->base.src, + src_x, src_y, + width, height, + dst_x, dst_y, + dst->polyMode == PolyModePrecise)) { + case -1: + goto cleanup_dst; + case 0: + tmp->base.src.u.gen3.type = SHADER_ZERO; + break; + case 1: + gen3_composite_channel_convert(&tmp->base.src); + break; + } + DBG(("%s: source type=%d\n", __FUNCTION__, tmp->base.src.u.gen3.type)); + + if (tmp->base.src.u.gen3.type != SHADER_ZERO) + tmp->base.mask.u.gen3.type = SHADER_OPACITY; + + no_offset = tmp->base.dst.x == 0 && tmp->base.dst.y == 0; + tmp->box = gen3_render_composite_spans_box; + tmp->boxes = gen3_render_composite_spans_boxes; + tmp->thread_boxes = gen3_render_composite_spans_boxes__thread; + tmp->done = gen3_render_composite_spans_done; + tmp->prim_emit = gen3_emit_composite_spans_primitive; + switch (tmp->base.src.u.gen3.type) { + case SHADER_NONE: + assert(0); + case SHADER_ZERO: + if (no_offset) { + tmp->prim_emit = gen3_emit_composite_spans_primitive_zero_no_offset; + tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero_no_offset__boxes; + } else { + tmp->prim_emit = gen3_emit_composite_spans_primitive_zero; + tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero__boxes; + } + break; + case SHADER_BLACK: + case SHADER_WHITE: + case SHADER_CONSTANT: + if (no_offset) { +#if defined(sse2) && !defined(__x86_64__) + if (sna->cpu_features & SSE2) { + tmp->box = gen3_render_composite_spans_constant_box__sse2; + tmp->thread_boxes = gen3_render_composite_spans_constant_thread__sse2__boxes; + tmp->prim_emit = gen3_emit_composite_spans_primitive_constant__sse2__no_offset; + tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__sse2__no_offset__boxes; + } else +#endif + { + tmp->box = gen3_render_composite_spans_constant_box; + tmp->thread_boxes = gen3_render_composite_spans_constant_thread_boxes; + tmp->prim_emit = gen3_emit_composite_spans_primitive_constant_no_offset; + tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant_no_offset__boxes; + } + } else { +#if defined(sse2) && !defined(__x86_64__) + if (sna->cpu_features & SSE2) { + tmp->prim_emit = gen3_emit_composite_spans_primitive_constant__sse2; + tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__sse2__boxes; + } else +#endif + { + tmp->prim_emit = gen3_emit_composite_spans_primitive_constant; + tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__boxes; + } + } + break; + case SHADER_LINEAR: + case SHADER_RADIAL: + if (tmp->base.src.transform == NULL) { +#if defined(sse2) && !defined(__x86_64__) + if (sna->cpu_features & SSE2) { + tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient__sse2; + tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_gradient__sse2__boxes; + } else +#endif + { + tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient; + tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_gradient__boxes; + } + } else if (tmp->base.src.is_affine) { + tmp->base.src.scale[1] = tmp->base.src.scale[0] = 1. / tmp->base.src.transform->matrix[2][2]; +#if defined(sse2) && !defined(__x86_64__) + if (sna->cpu_features & SSE2) { + tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient__sse2; + tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_gradient__sse2__boxes; + } else +#endif + { + tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient; + tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_gradient__boxes; + } + } + break; + case SHADER_TEXTURE: + if (tmp->base.src.transform == NULL) { +#if defined(sse2) && !defined(__x86_64__) + if (sna->cpu_features & SSE2) { + tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source__sse2; + tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_source__sse2__boxes; + } else +#endif + { + tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source; + tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_source__boxes; + } + } else if (tmp->base.src.is_affine) { + tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2]; + tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2]; +#if defined(sse2) && !defined(__x86_64__) + if (sna->cpu_features & SSE2) { + tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source__sse2; + tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_source__sse2__boxes; + } else +#endif + { + tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source; + tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_source__boxes; + } + } + break; + } + if (tmp->emit_boxes == NULL) + tmp->thread_boxes = NULL; + + tmp->base.mask.bo = NULL; + + tmp->base.floats_per_vertex = 2; + if (!is_constant_ps(tmp->base.src.u.gen3.type)) + tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3; + tmp->base.floats_per_vertex += + tmp->base.mask.u.gen3.type == SHADER_OPACITY; + tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex; + + if (!kgem_check_bo(&sna->kgem, + tmp->base.dst.bo, tmp->base.src.bo, + NULL)) { + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, + tmp->base.dst.bo, tmp->base.src.bo, + NULL)) + goto cleanup_src; + } + + gen3_align_vertex(sna, &tmp->base); + gen3_emit_composite_state(sna, &tmp->base); + return true; + +cleanup_src: + if (tmp->base.src.bo) + kgem_bo_destroy(&sna->kgem, tmp->base.src.bo); +cleanup_dst: + if (tmp->base.redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); return false; } + +static void +gen3_emit_video_state(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame, + PixmapPtr pixmap, + struct kgem_bo *dst_bo, + int width, int height, + bool bilinear) +{ + struct gen3_render_state *state = &sna->render_state.gen3; + uint32_t id, ms3, rewind; + + gen3_emit_target(sna, dst_bo, width, height, + sna_format_for_depth(pixmap->drawable.depth)); + + /* XXX share with composite? Is it worth the effort? */ + if ((state->last_shader & (1<<31)) == 0) { + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) | + 2); + OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT)); + OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) | + S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) | + S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) | + S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) | + S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) | + S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) | + S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) | + S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT)); + OUT_BATCH((2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) | + (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) | + S6_COLOR_WRITE_ENABLE); + + state->last_blend = 0; + state->floats_per_vertex = 4; + } + + if (!is_planar_fourcc(frame->id)) { + rewind = sna->kgem.nbatch; + OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4); + OUT_BATCH(0x0000001); /* constant 0 */ + /* constant 0: brightness/contrast */ + OUT_BATCH_F(video->brightness / 128.0); + OUT_BATCH_F(video->contrast / 255.0); + OUT_BATCH_F(0.0); + OUT_BATCH_F(0.0); + if (state->last_constants && + memcmp(&sna->kgem.batch[state->last_constants], + &sna->kgem.batch[rewind], + 6*sizeof(uint32_t)) == 0) + sna->kgem.nbatch = rewind; + else + state->last_constants = rewind; + + rewind = sna->kgem.nbatch; + OUT_BATCH(_3DSTATE_SAMPLER_STATE | 3); + OUT_BATCH(0x00000001); + OUT_BATCH(SS2_COLORSPACE_CONVERSION | + (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | + (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); + OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) | + (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) | + (0 << SS3_TEXTUREMAP_INDEX_SHIFT) | + SS3_NORMALIZED_COORDS); + OUT_BATCH(0x00000000); + if (state->last_sampler && + memcmp(&sna->kgem.batch[state->last_sampler], + &sna->kgem.batch[rewind], + 5*sizeof(uint32_t)) == 0) + sna->kgem.nbatch = rewind; + else + state->last_sampler = rewind; + + OUT_BATCH(_3DSTATE_MAP_STATE | 3); + OUT_BATCH(0x00000001); /* texture map #1 */ + OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, + frame->bo, + I915_GEM_DOMAIN_SAMPLER << 16, + 0)); + + ms3 = MAPSURF_422; + switch (frame->id) { + case FOURCC_YUY2: + ms3 |= MT_422_YCRCB_NORMAL; + break; + case FOURCC_UYVY: + ms3 |= MT_422_YCRCB_SWAPY; + break; + } + ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT; + ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT; + OUT_BATCH(ms3); + OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT); + + id = 1<<31 | 1<<1 | !!video->brightness; + if (state->last_shader != id) { + state->last_shader = id; + id = sna->kgem.nbatch++; + + gen3_fs_dcl(FS_S0); + gen3_fs_dcl(FS_T0); + gen3_fs_texld(FS_OC, FS_S0, FS_T0); + if (video->brightness != 0) { + gen3_fs_add(FS_OC, + gen3_fs_operand_reg(FS_OC), + gen3_fs_operand(FS_C0, X, X, X, ZERO)); + } + + sna->kgem.batch[id] = + _3DSTATE_PIXEL_SHADER_PROGRAM | + (sna->kgem.nbatch - id - 2); + } + } else { + /* For the planar formats, we set up three samplers -- + * one for each plane, in a Y8 format. Because I + * couldn't get the special PLANAR_TO_PACKED + * shader setup to work, I did the manual pixel shader: + * + * y' = y - .0625 + * u' = u - .5 + * v' = v - .5; + * + * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' + * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' + * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' + * + * register assignment: + * r0 = (y',u',v',0) + * r1 = (y,y,y,y) + * r2 = (u,u,u,u) + * r3 = (v,v,v,v) + * OC = (r,g,b,1) + */ + rewind = sna->kgem.nbatch; + OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (22 - 2)); + OUT_BATCH(0x000001f); /* constants 0-4 */ + /* constant 0: normalization offsets */ + OUT_BATCH_F(-0.0625); + OUT_BATCH_F(-0.5); + OUT_BATCH_F(-0.5); + OUT_BATCH_F(0.0); + /* constant 1: r coefficients */ + OUT_BATCH_F(1.1643); + OUT_BATCH_F(0.0); + OUT_BATCH_F(1.5958); + OUT_BATCH_F(0.0); + /* constant 2: g coefficients */ + OUT_BATCH_F(1.1643); + OUT_BATCH_F(-0.39173); + OUT_BATCH_F(-0.81290); + OUT_BATCH_F(0.0); + /* constant 3: b coefficients */ + OUT_BATCH_F(1.1643); + OUT_BATCH_F(2.017); + OUT_BATCH_F(0.0); + OUT_BATCH_F(0.0); + /* constant 4: brightness/contrast */ + OUT_BATCH_F(video->brightness / 128.0); + OUT_BATCH_F(video->contrast / 255.0); + OUT_BATCH_F(0.0); + OUT_BATCH_F(0.0); + if (state->last_constants && + memcmp(&sna->kgem.batch[state->last_constants], + &sna->kgem.batch[rewind], + 22*sizeof(uint32_t)) == 0) + sna->kgem.nbatch = rewind; + else + state->last_constants = rewind; + + rewind = sna->kgem.nbatch; + OUT_BATCH(_3DSTATE_SAMPLER_STATE | 9); + OUT_BATCH(0x00000007); + /* sampler 0 */ + OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | + (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); + OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) | + (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) | + (0 << SS3_TEXTUREMAP_INDEX_SHIFT) | + SS3_NORMALIZED_COORDS); + OUT_BATCH(0x00000000); + /* sampler 1 */ + OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | + (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); + OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) | + (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) | + (1 << SS3_TEXTUREMAP_INDEX_SHIFT) | + SS3_NORMALIZED_COORDS); + OUT_BATCH(0x00000000); + /* sampler 2 */ + OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | + (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); + OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) | + (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) | + (2 << SS3_TEXTUREMAP_INDEX_SHIFT) | + SS3_NORMALIZED_COORDS); + OUT_BATCH(0x00000000); + if (state->last_sampler && + memcmp(&sna->kgem.batch[state->last_sampler], + &sna->kgem.batch[rewind], + 11*sizeof(uint32_t)) == 0) + sna->kgem.nbatch = rewind; + else + state->last_sampler = rewind; + + OUT_BATCH(_3DSTATE_MAP_STATE | 9); + OUT_BATCH(0x00000007); + + OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, + frame->bo, + I915_GEM_DOMAIN_SAMPLER << 16, + 0)); + + ms3 = MAPSURF_8BIT | MT_8BIT_I8; + ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT; + ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT; + OUT_BATCH(ms3); + /* check to see if Y has special pitch than normal + * double u/v pitch, e.g i915 XvMC hw requires at + * least 1K alignment, so Y pitch might + * be same as U/V's.*/ + if (frame->pitch[1]) + OUT_BATCH(((frame->pitch[1] / 4) - 1) << MS4_PITCH_SHIFT); + else + OUT_BATCH(((frame->pitch[0] * 2 / 4) - 1) << MS4_PITCH_SHIFT); + + OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, + frame->bo, + I915_GEM_DOMAIN_SAMPLER << 16, + frame->UBufOffset)); + + ms3 = MAPSURF_8BIT | MT_8BIT_I8; + ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT; + ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT; + OUT_BATCH(ms3); + OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT); + + OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, + frame->bo, + I915_GEM_DOMAIN_SAMPLER << 16, + frame->VBufOffset)); + + ms3 = MAPSURF_8BIT | MT_8BIT_I8; + ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT; + ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT; + OUT_BATCH(ms3); + OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT); + + id = 1<<31 | 2<<1 | !!video->brightness; + if (state->last_shader != id) { + state->last_shader = id; + id = sna->kgem.nbatch++; + + /* Declare samplers */ + gen3_fs_dcl(FS_S0); /* Y */ + gen3_fs_dcl(FS_S1); /* U */ + gen3_fs_dcl(FS_S2); /* V */ + gen3_fs_dcl(FS_T0); /* normalized coords */ + + /* Load samplers to temporaries. */ + gen3_fs_texld(FS_R1, FS_S0, FS_T0); + gen3_fs_texld(FS_R2, FS_S1, FS_T0); + gen3_fs_texld(FS_R3, FS_S2, FS_T0); + + /* Move the sampled YUV data in R[123] to the first + * 3 channels of R0. + */ + gen3_fs_mov_masked(FS_R0, MASK_X, + gen3_fs_operand_reg(FS_R1)); + gen3_fs_mov_masked(FS_R0, MASK_Y, + gen3_fs_operand_reg(FS_R2)); + gen3_fs_mov_masked(FS_R0, MASK_Z, + gen3_fs_operand_reg(FS_R3)); + + /* Normalize the YUV data */ + gen3_fs_add(FS_R0, gen3_fs_operand_reg(FS_R0), + gen3_fs_operand_reg(FS_C0)); + /* dot-product the YUV data in R0 by the vectors of + * coefficients for calculating R, G, and B, storing + * the results in the R, G, or B channels of the output + * color. The OC results are implicitly clamped + * at the end of the program. + */ + gen3_fs_dp3(FS_OC, MASK_X, + gen3_fs_operand_reg(FS_R0), + gen3_fs_operand_reg(FS_C1)); + gen3_fs_dp3(FS_OC, MASK_Y, + gen3_fs_operand_reg(FS_R0), + gen3_fs_operand_reg(FS_C2)); + gen3_fs_dp3(FS_OC, MASK_Z, + gen3_fs_operand_reg(FS_R0), + gen3_fs_operand_reg(FS_C3)); + /* Set alpha of the output to 1.0, by wiring W to 1 + * and not actually using the source. + */ + gen3_fs_mov_masked(FS_OC, MASK_W, + gen3_fs_operand_one()); + + if (video->brightness != 0) { + gen3_fs_add(FS_OC, + gen3_fs_operand_reg(FS_OC), + gen3_fs_operand(FS_C4, X, X, X, ZERO)); + } + + sna->kgem.batch[id] = + _3DSTATE_PIXEL_SHADER_PROGRAM | + (sna->kgem.nbatch - id - 2); + } + } +} + +static void +gen3_video_get_batch(struct sna *sna, struct kgem_bo *bo) +{ + kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); + + if (!kgem_check_batch(&sna->kgem, 120) || + !kgem_check_reloc(&sna->kgem, 4) || + !kgem_check_exec(&sna->kgem, 2)) { + _kgem_submit(&sna->kgem); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + if (sna->render_state.gen3.need_invariant) + gen3_emit_invariant(sna); +} + +static int +gen3_get_inline_rectangles(struct sna *sna, int want, int floats_per_vertex) +{ + int size = floats_per_vertex * 3; + int rem = batch_space(sna) - 1; + + if (size * want > rem) + want = rem / size; + + return want; +} + +static bool +gen3_render_video(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame, + RegionPtr dstRegion, + PixmapPtr pixmap) +{ + struct sna_pixmap *priv = sna_pixmap(pixmap); + BoxPtr pbox = REGION_RECTS(dstRegion); + int nbox = REGION_NUM_RECTS(dstRegion); + int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1; + int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1; + int src_width = frame->src.x2 - frame->src.x1; + int src_height = frame->src.y2 - frame->src.y1; + float src_offset_x, src_offset_y; + float src_scale_x, src_scale_y; + int pix_xoff, pix_yoff; + struct kgem_bo *dst_bo; + bool bilinear; + int copy = 0; + + DBG(("%s: src:%dx%d (frame:%dx%d) -> dst:%dx%d\n", __FUNCTION__, + src_width, src_height, frame->width, frame->height, dst_width, dst_height)); + + dst_bo = priv->gpu_bo; + if (dst_bo == NULL) + return false; + + bilinear = src_width != dst_width || src_height != dst_height; + + src_scale_x = (float)src_width / dst_width / frame->width; + src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x; + + src_scale_y = (float)src_height / dst_height / frame->height; + src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y; + DBG(("%s: src offset (%f, %f), scale (%f, %f)\n", + __FUNCTION__, src_offset_x, src_offset_y, src_scale_x, src_scale_y)); + + if (too_large(pixmap->drawable.width, pixmap->drawable.height) || + !gen3_check_pitch_3d(dst_bo)) { + int bpp = pixmap->drawable.bitsPerPixel; + + if (too_large(dst_width, dst_height)) + return false; + + dst_bo = kgem_create_2d(&sna->kgem, + dst_width, dst_height, bpp, + kgem_choose_tiling(&sna->kgem, + I915_TILING_X, + dst_width, dst_height, bpp), + 0); + if (!dst_bo) + return false; + + pix_xoff = -dstRegion->extents.x1; + pix_yoff = -dstRegion->extents.y1; + copy = 1; + } else { + /* Set up the offset for translating from the given region + * (in screen coordinates) to the backing pixmap. + */ +#ifdef COMPOSITE + pix_xoff = -pixmap->screen_x + pixmap->drawable.x; + pix_yoff = -pixmap->screen_y + pixmap->drawable.y; +#else + pix_xoff = 0; + pix_yoff = 0; +#endif + + dst_width = pixmap->drawable.width; + dst_height = pixmap->drawable.height; + } + + gen3_video_get_batch(sna, dst_bo); + gen3_emit_video_state(sna, video, frame, pixmap, + dst_bo, dst_width, dst_height, bilinear); + do { + int nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4); + if (nbox_this_time == 0) { + gen3_video_get_batch(sna, dst_bo); + gen3_emit_video_state(sna, video, frame, pixmap, + dst_bo, dst_width, dst_height, bilinear); + nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4); + assert(nbox_this_time); + } + nbox -= nbox_this_time; + + OUT_BATCH(PRIM3D_RECTLIST | (12 * nbox_this_time - 1)); + do { + int box_x1 = pbox->x1; + int box_y1 = pbox->y1; + int box_x2 = pbox->x2; + int box_y2 = pbox->y2; + + pbox++; + + DBG(("%s: dst (%d, %d), (%d, %d) + (%d, %d); src (%f, %f), (%f, %f)\n", + __FUNCTION__, box_x1, box_y1, box_x2, box_y2, pix_xoff, pix_yoff, + box_x1 * src_scale_x + src_offset_x, + box_y1 * src_scale_y + src_offset_y, + box_x2 * src_scale_x + src_offset_x, + box_y2 * src_scale_y + src_offset_y)); + + /* bottom right */ + OUT_BATCH_F(box_x2 + pix_xoff); + OUT_BATCH_F(box_y2 + pix_yoff); + OUT_BATCH_F(box_x2 * src_scale_x + src_offset_x); + OUT_BATCH_F(box_y2 * src_scale_y + src_offset_y); + + /* bottom left */ + OUT_BATCH_F(box_x1 + pix_xoff); + OUT_BATCH_F(box_y2 + pix_yoff); + OUT_BATCH_F(box_x1 * src_scale_x + src_offset_x); + OUT_BATCH_F(box_y2 * src_scale_y + src_offset_y); + + /* top left */ + OUT_BATCH_F(box_x1 + pix_xoff); + OUT_BATCH_F(box_y1 + pix_yoff); + OUT_BATCH_F(box_x1 * src_scale_x + src_offset_x); + OUT_BATCH_F(box_y1 * src_scale_y + src_offset_y); + } while (--nbox_this_time); + } while (nbox); + + if (copy) { +#ifdef COMPOSITE + pix_xoff = -pixmap->screen_x + pixmap->drawable.x; + pix_yoff = -pixmap->screen_y + pixmap->drawable.y; +#else + pix_xoff = 0; + pix_yoff = 0; +#endif + sna_blt_copy_boxes(sna, GXcopy, + dst_bo, -dstRegion->extents.x1, -dstRegion->extents.y1, + priv->gpu_bo, pix_xoff, pix_yoff, + pixmap->drawable.bitsPerPixel, + REGION_RECTS(dstRegion), + REGION_NUM_RECTS(dstRegion)); + + kgem_bo_destroy(&sna->kgem, dst_bo); + } + + if (!DAMAGE_IS_ALL(priv->gpu_damage)) { + if ((pix_xoff | pix_yoff) == 0) { + sna_damage_add(&priv->gpu_damage, dstRegion); + sna_damage_subtract(&priv->cpu_damage, dstRegion); + } else { + sna_damage_add_boxes(&priv->gpu_damage, + REGION_RECTS(dstRegion), + REGION_NUM_RECTS(dstRegion), + pix_xoff, pix_yoff); + sna_damage_subtract_boxes(&priv->cpu_damage, + REGION_RECTS(dstRegion), + REGION_NUM_RECTS(dstRegion), + pix_xoff, pix_yoff); + } + } + + return true; +} + #endif @@ -2504,159 +4623,81 @@ cleanup_dst: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +#if 0 +static bool +gen3_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, + uint32_t color, + int16_t x1, int16_t y1, + int16_t x2, int16_t y2, + uint8_t alu) +{ + struct sna_composite_op tmp; + +#if NO_FILL_ONE + return gen3_render_fill_one_try_blt(sna, dst, bo, color, + x1, y1, x2, y2, alu); +#endif + + /* Prefer to use the BLT if already engaged */ + if (prefer_fill_blt(sna) && + gen3_render_fill_one_try_blt(sna, dst, bo, color, + x1, y1, x2, y2, alu)) + return true; + + /* Must use the BLT if we can't RENDER... */ + if (!(alu == GXcopy || alu == GXclear) || + too_large(dst->drawable.width, dst->drawable.height) || + bo->pitch > MAX_3D_PITCH) + return gen3_render_fill_one_try_blt(sna, dst, bo, color, + x1, y1, x2, y2, alu); + + if (alu == GXclear) + color = 0; + + tmp.op = color == 0 ? PictOpClear : PictOpSrc; + tmp.dst.pixmap = dst; + tmp.dst.width = dst->drawable.width; + tmp.dst.height = dst->drawable.height; + tmp.dst.format = sna_format_for_depth(dst->drawable.depth); + tmp.dst.bo = bo; + tmp.floats_per_vertex = 2; + tmp.floats_per_rect = 6; + tmp.need_magic_ca_pass = 0; + tmp.has_component_alpha = 0; + tmp.rb_reversed = 0; + + gen3_init_solid(&tmp.src, + sna_rgba_for_color(color, dst->drawable.depth)); + tmp.mask.bo = NULL; + tmp.mask.u.gen3.type = SHADER_NONE; + tmp.u.gen3.num_constants = 0; + + if (!kgem_check_bo(&sna->kgem, bo, NULL)) { + kgem_submit(&sna->kgem); + + if (gen3_render_fill_one_try_blt(sna, dst, bo, color, + x1, y1, x2, y2, alu)) + return true; + + if (!kgem_check_bo(&sna->kgem, bo, NULL)) + return false; + } + + gen3_align_vertex(sna, &tmp); + gen3_emit_composite_state(sna, &tmp); + gen3_get_rectangles(sna, &tmp, 1); + DBG((" (%d, %d), (%d, %d): %x\n", x1, y1, x2, y2, color)); + OUT_VERTEX(x2); + OUT_VERTEX(y2); + OUT_VERTEX(x1); + OUT_VERTEX(y2); + OUT_VERTEX(x1); + OUT_VERTEX(y1); + gen3_vertex_flush(sna); + + return true; +} +#endif static void gen3_render_flush(struct sna *sna) { @@ -2808,7 +4849,7 @@ gen3_blit_tex(struct sna *sna, kgem_submit(&sna->kgem); } - gen3_emit_composite_state(sna, tmp); gen3_align_vertex(sna, tmp); + gen3_emit_composite_state(sna, tmp); return true; } diff --git a/contrib/sdk/sources/Intel-2D/sna/gen4_common.c b/contrib/sdk/sources/Intel-2D/sna/gen4_common.c new file mode 100644 index 0000000000..f3d36c3bac --- /dev/null +++ b/contrib/sdk/sources/Intel-2D/sna/gen4_common.c @@ -0,0 +1,64 @@ +/* + * Copyright © 2011-2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "gen4_common.h" +#include "gen4_vertex.h" + +void gen4_render_flush(struct sna *sna) +{ + gen4_vertex_close(sna); + + assert(sna->render.vb_id == 0); + assert(sna->render.vertex_offset == 0); +} + +void gen4_render_retire(struct kgem *kgem) +{ + struct sna *sna; + + sna = container_of(kgem, struct sna, kgem); + if (sna->render.nvertex_reloc == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) { + DBG(("%s: resetting idle vbo\n", __FUNCTION__)); + sna->render.vertex_used = 0; + sna->render.vertex_index = 0; + } +} + +void gen4_render_expire(struct kgem *kgem) +{ + struct sna *sna; + + sna = container_of(kgem, struct sna, kgem); + if (sna->render.vbo && !sna->render.vertex_used) { + DBG(("%s: discarding vbo\n", __FUNCTION__)); + discard_vbo(sna); + } +} diff --git a/contrib/sdk/sources/Intel-2D/sna/gen4_common.h b/contrib/sdk/sources/Intel-2D/sna/gen4_common.h new file mode 100644 index 0000000000..de860bb00d --- /dev/null +++ b/contrib/sdk/sources/Intel-2D/sna/gen4_common.h @@ -0,0 +1,49 @@ +/* + * Copyright © 2011-2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +#ifndef GEN4_COMMON_H +#define GEN4_COMMON_H + +#include "sna.h" + +inline static void +discard_vbo(struct sna *sna) +{ + kgem_bo_destroy(&sna->kgem, sna->render.vbo); + sna->render.vbo = NULL; + sna->render.vertices = sna->render.vertex_data; + sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); + sna->render.vertex_used = 0; + sna->render.vertex_index = 0; +} + +void gen4_render_flush(struct sna *sna); +void gen4_render_retire(struct kgem *kgem); +void gen4_render_expire(struct kgem *kgem); + +#endif /* GEN4_COMMON_H */ + diff --git a/contrib/sdk/sources/Intel-2D/sna/gen4_render.c b/contrib/sdk/sources/Intel-2D/sna/gen4_render.c index e214c68121..5fd764eea5 100644 --- a/contrib/sdk/sources/Intel-2D/sna/gen4_render.c +++ b/contrib/sdk/sources/Intel-2D/sna/gen4_render.c @@ -41,6 +41,7 @@ //#include "sna_video.h" #include "brw/brw.h" +#include "gen4_common.h" #include "gen4_render.h" #include "gen4_source.h" #include "gen4_vertex.h" @@ -549,9 +550,6 @@ static int gen4_get_rectangles__flush(struct sna *sna, if (!kgem_check_reloc_and_exec(&sna->kgem, 2)) return 0; - if (op->need_magic_ca_pass && sna->render.vbo) - return 0; - if (sna->render.vertex_offset) { gen4_vertex_flush(sna); if (gen4_magic_ca_pass(sna, op)) @@ -747,16 +745,10 @@ gen4_align_vertex(struct sna *sna, const struct sna_composite_op *op) { assert(op->floats_per_rect == 3*op->floats_per_vertex); if (op->floats_per_vertex != sna->render_state.gen4.floats_per_vertex) { - if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect) - gen4_vertex_finish(sna); - - DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n", + DBG(("aligning vertex: was %d, now %d floats per vertex\n", sna->render_state.gen4.floats_per_vertex, - op->floats_per_vertex, - sna->render.vertex_index, - (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex)); - sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; - sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex; + op->floats_per_vertex)); + gen4_vertex_align(sna, op); sna->render_state.gen4.floats_per_vertex = op->floats_per_vertex; } } @@ -1314,11 +1306,12 @@ gen4_render_video(struct sna *sna, if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { kgem_submit(&sna->kgem); - assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)); + if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) + return false; } - gen4_video_bind_surfaces(sna, &tmp); gen4_align_vertex(sna, &tmp); + gen4_video_bind_surfaces(sna, &tmp); /* Set up the offset for translating from the given region (in screen * coordinates) to the backing pixmap. @@ -1548,33 +1541,6 @@ gen4_composite_set_target(struct sna *sna, return true; } -static bool -try_blt(struct sna *sna, - PicturePtr dst, PicturePtr src, - int width, int height) -{ - if (sna->kgem.mode != KGEM_RENDER) { - DBG(("%s: already performing BLT\n", __FUNCTION__)); - return true; - } - - if (too_large(width, height)) { - DBG(("%s: operation too large for 3D pipe (%d, %d)\n", - __FUNCTION__, width, height)); - return true; - } - - if (too_large(dst->pDrawable->width, dst->pDrawable->height)) - return true; - - /* The blitter is much faster for solids */ - if (sna_picture_is_solid(src, NULL)) - return true; - - /* is the source picture only in cpu memory e.g. a shm pixmap? */ - return picture_is_cpu(sna, src); -} - static bool check_gradient(PicturePtr picture, bool precise) { @@ -1803,7 +1769,6 @@ gen4_render_composite(struct sna *sna, return false; if (mask == NULL && - try_blt(sna, dst, src, width, height) && sna_blt_composite(sna, op, src, dst, src_x, src_y, @@ -1932,8 +1897,8 @@ gen4_render_composite(struct sna *sna, goto cleanup_mask; } - gen4_bind_surfaces(sna, tmp); gen4_align_vertex(sna, tmp); + gen4_bind_surfaces(sna, tmp); return true; cleanup_mask: @@ -1989,51 +1954,6 @@ cleanup_dst: - -static void -gen4_render_flush(struct sna *sna) -{ - gen4_vertex_close(sna); - - assert(sna->render.vb_id == 0); - assert(sna->render.vertex_offset == 0); -} - -static void -discard_vbo(struct sna *sna) -{ - kgem_bo_destroy(&sna->kgem, sna->render.vbo); - sna->render.vbo = NULL; - sna->render.vertices = sna->render.vertex_data; - sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); - sna->render.vertex_used = 0; - sna->render.vertex_index = 0; -} - -static void -gen4_render_retire(struct kgem *kgem) -{ - struct sna *sna; - - sna = container_of(kgem, struct sna, kgem); - if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) { - DBG(("%s: resetting idle vbo\n", __FUNCTION__)); - sna->render.vertex_used = 0; - sna->render.vertex_index = 0; - } -} - -static void -gen4_render_expire(struct kgem *kgem) -{ - struct sna *sna; - - sna = container_of(kgem, struct sna, kgem); - if (sna->render.vbo && !sna->render.vertex_used) { - DBG(("%s: discarding vbo\n", __FUNCTION__)); - discard_vbo(sna); - } -} static void gen4_render_reset(struct sna *sna) { @@ -2047,8 +1967,7 @@ static void gen4_render_reset(struct sna *sna) sna->render_state.gen4.drawrect_limit = -1; sna->render_state.gen4.surface_table = -1; - if (sna->render.vbo && - !kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) { + if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); discard_vbo(sna); } @@ -2407,8 +2326,8 @@ gen4_blit_tex(struct sna *sna, kgem_submit(&sna->kgem); } - gen4_bind_surfaces(sna, tmp); gen4_align_vertex(sna, tmp); + gen4_bind_surfaces(sna, tmp); return true; } diff --git a/contrib/sdk/sources/Intel-2D/sna/gen4_vertex.c b/contrib/sdk/sources/Intel-2D/sna/gen4_vertex.c index 91658a554a..cd6ff65322 100644 --- a/contrib/sdk/sources/Intel-2D/sna/gen4_vertex.c +++ b/contrib/sdk/sources/Intel-2D/sna/gen4_vertex.c @@ -38,6 +38,29 @@ #define sse2 #endif +void gen4_vertex_align(struct sna *sna, const struct sna_composite_op *op) +{ + int vertex_index; + + assert(op->floats_per_rect == 3*op->floats_per_vertex); + + vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; + if ((int)sna->render.vertex_size - vertex_index * op->floats_per_vertex < 2*op->floats_per_rect) { + DBG(("%s: flushing vertex buffer: new index=%d, max=%d\n", + __FUNCTION__, vertex_index, sna->render.vertex_size / op->floats_per_vertex)); + if (gen4_vertex_finish(sna) < op->floats_per_rect) { + kgem_submit(&sna->kgem); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + } + + vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; + assert(vertex_index * op->floats_per_vertex <= sna->render.vertex_size); + } + + sna->render.vertex_index = vertex_index; + sna->render.vertex_used = vertex_index * op->floats_per_vertex; +} + void gen4_vertex_flush(struct sna *sna) { DBG(("%s[%x] = %d\n", __FUNCTION__, @@ -45,7 +68,9 @@ void gen4_vertex_flush(struct sna *sna) sna->render.vertex_index - sna->render.vertex_start)); assert(sna->render.vertex_offset); + assert(sna->render.vertex_offset <= sna->kgem.nbatch); assert(sna->render.vertex_index > sna->render.vertex_start); + assert(sna->render.vertex_used <= sna->render.vertex_size); sna->kgem.batch[sna->render.vertex_offset] = sna->render.vertex_index - sna->render.vertex_start; @@ -62,11 +87,14 @@ int gen4_vertex_finish(struct sna *sna) sna->render.vertex_used, sna->render.vertex_size)); assert(sna->render.vertex_offset == 0); assert(sna->render.vertex_used); + assert(sna->render.vertex_used <= sna->render.vertex_size); sna_vertex_wait__locked(&sna->render); /* Note: we only need dword alignment (currently) */ + hint = CREATE_GTT_MAP; + bo = sna->render.vbo; if (bo) { for (i = 0; i < sna->render.nvertex_reloc; i++) { @@ -88,11 +116,15 @@ int gen4_vertex_finish(struct sna *sna) sna->render.vb_id = 0; kgem_bo_destroy(&sna->kgem, bo); + hint |= CREATE_CACHED | CREATE_NO_THROTTLE; + } else { + if (kgem_is_idle(&sna->kgem)) { + sna->render.vertices = sna->render.vertex_data; + sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); + return 0; + } } - hint = CREATE_GTT_MAP; - if (bo) - hint |= CREATE_CACHED | CREATE_NO_THROTTLE; size = 256*1024; assert(!sna->render.active); @@ -163,7 +195,7 @@ void gen4_vertex_close(struct sna *sna) sna->render.vertices = sna->render.vertex_data; sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); free_bo = bo; - } else if (IS_CPU_MAP(bo->map) && !sna->kgem.has_llc) { + } else if (!sna->kgem.has_llc && sna->render.vertices == MAP(bo->map__cpu)) { DBG(("%s: converting CPU map to GTT\n", __FUNCTION__)); sna->render.vertices = kgem_bo_map__gtt(&sna->kgem, sna->render.vbo); @@ -176,9 +208,16 @@ void gen4_vertex_close(struct sna *sna) } } else { - if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) { + int size; + + size = sna->kgem.nbatch; + size += sna->kgem.batch_size - sna->kgem.surface; + size += sna->render.vertex_used; + + if (size <= 1024) { DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__, sna->render.vertex_used, sna->kgem.nbatch)); + assert(sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface); memcpy(sna->kgem.batch + sna->kgem.nbatch, sna->render.vertex_data, sna->render.vertex_used * 4); @@ -186,6 +225,37 @@ void gen4_vertex_close(struct sna *sna) bo = NULL; sna->kgem.nbatch += sna->render.vertex_used; } else { + size = 256 * 1024; + do { + bo = kgem_create_linear(&sna->kgem, size, + CREATE_GTT_MAP | CREATE_NO_RETIRE | CREATE_NO_THROTTLE | CREATE_CACHED); + } while (bo == NULL && (size>>=1) > sizeof(float)*sna->render.vertex_used); + + sna->render.vertices = NULL; + if (bo) + sna->render.vertices = kgem_bo_map(&sna->kgem, bo); + if (sna->render.vertices != NULL) { + DBG(("%s: new vbo: %d / %d\n", __FUNCTION__, + sna->render.vertex_used, __kgem_bo_size(bo)/4)); + + assert(sizeof(float)*sna->render.vertex_used <= __kgem_bo_size(bo)); + memcpy(sna->render.vertices, + sna->render.vertex_data, + sizeof(float)*sna->render.vertex_used); + + size = __kgem_bo_size(bo)/4; + if (size >= UINT16_MAX) + size = UINT16_MAX - 1; + + sna->render.vbo = bo; + sna->render.vertex_size = size; + } else { + DBG(("%s: tmp vbo: %d\n", __FUNCTION__, + sna->render.vertex_used)); + + if (bo) + kgem_bo_destroy(&sna->kgem, bo); + bo = kgem_create_linear(&sna->kgem, 4*sna->render.vertex_used, CREATE_NO_THROTTLE); @@ -195,11 +265,14 @@ void gen4_vertex_close(struct sna *sna) kgem_bo_destroy(&sna->kgem, bo); bo = NULL; } - DBG(("%s: new vbo: %d\n", __FUNCTION__, - sna->render.vertex_used)); + + assert(sna->render.vbo == NULL); + sna->render.vertices = sna->render.vertex_data; + sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); free_bo = bo; } } + } assert(sna->render.nvertex_reloc); for (i = 0; i < sna->render.nvertex_reloc; i++) { diff --git a/contrib/sdk/sources/Intel-2D/sna/gen4_vertex.h b/contrib/sdk/sources/Intel-2D/sna/gen4_vertex.h index 31c81d684d..6eb1cc66d5 100644 --- a/contrib/sdk/sources/Intel-2D/sna/gen4_vertex.h +++ b/contrib/sdk/sources/Intel-2D/sna/gen4_vertex.h @@ -6,6 +6,7 @@ #include "sna.h" #include "sna_render.h" +void gen4_vertex_align(struct sna *sna, const struct sna_composite_op *op); void gen4_vertex_flush(struct sna *sna); int gen4_vertex_finish(struct sna *sna); void gen4_vertex_close(struct sna *sna); diff --git a/contrib/sdk/sources/Intel-2D/sna/gen5_render.c b/contrib/sdk/sources/Intel-2D/sna/gen5_render.c index 43c2226a1c..16351a0914 100644 --- a/contrib/sdk/sources/Intel-2D/sna/gen5_render.c +++ b/contrib/sdk/sources/Intel-2D/sna/gen5_render.c @@ -42,6 +42,7 @@ #include "brw/brw.h" #include "gen5_render.h" +#include "gen4_common.h" #include "gen4_source.h" #include "gen4_vertex.h" @@ -719,16 +720,10 @@ gen5_align_vertex(struct sna *sna, const struct sna_composite_op *op) { assert(op->floats_per_rect == 3*op->floats_per_vertex); if (op->floats_per_vertex != sna->render_state.gen5.floats_per_vertex) { - if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect) - gen4_vertex_finish(sna); - - DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n", + DBG(("aligning vertex: was %d, now %d floats per vertex\n", sna->render_state.gen5.floats_per_vertex, - op->floats_per_vertex, - sna->render.vertex_index, - (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex)); - sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; - sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex; + op->floats_per_vertex)); + gen4_vertex_align(sna, op); sna->render_state.gen5.floats_per_vertex = op->floats_per_vertex; } } @@ -942,10 +937,14 @@ gen5_emit_vertex_elements(struct sna *sna, inline static void gen5_emit_pipe_flush(struct sna *sna) { +#if 0 OUT_BATCH(GEN5_PIPE_CONTROL | (4 - 2)); OUT_BATCH(GEN5_PIPE_CONTROL_WC_FLUSH); OUT_BATCH(0); OUT_BATCH(0); +#else + OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); +#endif } static void @@ -1311,11 +1310,12 @@ gen5_render_video(struct sna *sna, if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { kgem_submit(&sna->kgem); - assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)); + if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) + return false; } - gen5_video_bind_surfaces(sna, &tmp); gen5_align_vertex(sna, &tmp); + gen5_video_bind_surfaces(sna, &tmp); /* Set up the offset for translating from the given region (in screen * coordinates) to the backing pixmap. @@ -1452,7 +1452,6 @@ gen5_render_composite(struct sna *sna, } if (mask == NULL && - try_blt(sna, dst, src, width, height) && sna_blt_composite(sna, op, src, dst, src_x, src_y, @@ -1577,8 +1576,8 @@ gen5_render_composite(struct sna *sna, goto cleanup_mask; } - gen5_bind_surfaces(sna, tmp); gen5_align_vertex(sna, tmp); + gen5_bind_surfaces(sna, tmp); return true; cleanup_mask: @@ -1806,8 +1805,8 @@ gen5_render_composite_spans(struct sna *sna, goto cleanup_src; } - gen5_bind_surfaces(sna, &tmp->base); gen5_align_vertex(sna, &tmp->base); + gen5_bind_surfaces(sna, &tmp->base); return true; cleanup_src: @@ -1952,7 +1951,10 @@ fallback_blt: kgem_submit(&sna->kgem); if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { DBG(("%s: aperture check failed\n", __FUNCTION__)); - goto fallback_tiled_src; + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + if (tmp.redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp.dst.bo); + goto fallback_blt; } } @@ -1963,8 +1965,8 @@ fallback_blt: src_dx += tmp.src.offset[0]; src_dy += tmp.src.offset[1]; - gen5_copy_bind_surfaces(sna, &tmp); gen5_align_vertex(sna, &tmp); + gen5_copy_bind_surfaces(sna, &tmp); do { int n_this_time; @@ -1999,8 +2001,6 @@ fallback_blt: kgem_bo_destroy(&sna->kgem, tmp.src.bo); return true; -fallback_tiled_src: - kgem_bo_destroy(&sna->kgem, tmp.src.bo); fallback_tiled_dst: if (tmp.redirect.real_bo) kgem_bo_destroy(&sna->kgem, tmp.dst.bo); @@ -2021,16 +2021,6 @@ fallback_tiled: } #endif - -static void -gen5_render_flush(struct sna *sna) -{ - gen4_vertex_close(sna); - - assert(sna->render.vb_id == 0); - assert(sna->render.vertex_offset == 0); -} - static void gen5_render_context_switch(struct kgem *kgem, int new_mode) @@ -2060,42 +2050,6 @@ gen5_render_context_switch(struct kgem *kgem, } } -static void -discard_vbo(struct sna *sna) -{ - kgem_bo_destroy(&sna->kgem, sna->render.vbo); - sna->render.vbo = NULL; - sna->render.vertices = sna->render.vertex_data; - sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); - sna->render.vertex_used = 0; - sna->render.vertex_index = 0; -} - -static void -gen5_render_retire(struct kgem *kgem) -{ - struct sna *sna; - - sna = container_of(kgem, struct sna, kgem); - if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) { - DBG(("%s: resetting idle vbo\n", __FUNCTION__)); - sna->render.vertex_used = 0; - sna->render.vertex_index = 0; - } -} - -static void -gen5_render_expire(struct kgem *kgem) -{ - struct sna *sna; - - sna = container_of(kgem, struct sna, kgem); - if (sna->render.vbo && !sna->render.vertex_used) { - DBG(("%s: discarding vbo\n", __FUNCTION__)); - discard_vbo(sna); - } -} - static void gen5_render_reset(struct sna *sna) { sna->render_state.gen5.needs_invariant = true; @@ -2107,8 +2061,7 @@ static void gen5_render_reset(struct sna *sna) sna->render_state.gen5.drawrect_limit = -1; sna->render_state.gen5.surface_table = -1; - if (sna->render.vbo && - !kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) { + if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); discard_vbo(sna); } @@ -2351,8 +2304,8 @@ const char *gen5_render_init(struct sna *sna, const char *backend) return backend; sna->kgem.context_switch = gen5_render_context_switch; - sna->kgem.retire = gen5_render_retire; - sna->kgem.expire = gen5_render_expire; + sna->kgem.retire = gen4_render_retire; + sna->kgem.expire = gen4_render_expire; #if 0 #if !NO_COMPOSITE @@ -2362,7 +2315,7 @@ const char *gen5_render_init(struct sna *sna, const char *backend) #if !NO_COMPOSITE_SPANS sna->render.check_composite_spans = gen5_check_composite_spans; sna->render.composite_spans = gen5_render_composite_spans; - if (sna->PciInfo->device_id == 0x0044) + if (intel_get_device_id(sna->scrn) == 0x0044) sna->render.prefer_gpu |= PREFER_GPU_SPANS; #endif sna->render.video = gen5_render_video; @@ -2378,7 +2331,7 @@ const char *gen5_render_init(struct sna *sna, const char *backend) sna->render.blit_tex = gen5_blit_tex; sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT; - sna->render.flush = gen5_render_flush; + sna->render.flush = gen4_render_flush; sna->render.reset = gen5_render_reset; sna->render.fini = gen5_render_fini; @@ -2466,8 +2419,8 @@ gen5_blit_tex(struct sna *sna, kgem_submit(&sna->kgem); } - gen5_bind_surfaces(sna, tmp); gen5_align_vertex(sna, tmp); - return true; + gen5_bind_surfaces(sna, tmp); + return true; } diff --git a/contrib/sdk/sources/Intel-2D/sna/gen6_common.c b/contrib/sdk/sources/Intel-2D/sna/gen6_common.c new file mode 100644 index 0000000000..8789109f2c --- /dev/null +++ b/contrib/sdk/sources/Intel-2D/sna/gen6_common.c @@ -0,0 +1,71 @@ +/* + * Copyright © 2011-2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "gen6_common.h" +#include "gen4_vertex.h" + +void +gen6_render_context_switch(struct kgem *kgem, + int new_mode) +{ + if (kgem->nbatch) { + DBG(("%s: from %d to %d, submit batch\n", __FUNCTION__, kgem->mode, new_mode)); + _kgem_submit(kgem); + } + + if (kgem->nexec) { + DBG(("%s: from %d to %d, reset incomplete batch\n", __FUNCTION__, kgem->mode, new_mode)); + kgem_reset(kgem); + } + + assert(kgem->nbatch == 0); + assert(kgem->nreloc == 0); + assert(kgem->nexec == 0); + + kgem->ring = new_mode; +} + +void gen6_render_retire(struct kgem *kgem) +{ + struct sna *sna; + + if (kgem->ring && (kgem->has_semaphores || !kgem->need_retire)) + kgem->ring = kgem->mode; + + sna = container_of(kgem, struct sna, kgem); + if (sna->render.nvertex_reloc == 0 && + sna->render.vbo && + !kgem_bo_is_busy(sna->render.vbo)) { + DBG(("%s: resetting idle vbo\n", __FUNCTION__)); + sna->render.vertex_used = 0; + sna->render.vertex_index = 0; + } +} diff --git a/contrib/sdk/sources/Intel-2D/sna/gen6_common.h b/contrib/sdk/sources/Intel-2D/sna/gen6_common.h new file mode 100644 index 0000000000..5ebdf09a95 --- /dev/null +++ b/contrib/sdk/sources/Intel-2D/sna/gen6_common.h @@ -0,0 +1,139 @@ +/* + * Copyright © 2011-2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson + * + */ + +#ifndef GEN6_COMMON_H +#define GEN6_COMMON_H + +#include "sna.h" + +#define NO_RING_SWITCH 0 +#define PREFER_RENDER 0 + +static inline bool is_uncached(struct sna *sna, + struct kgem_bo *bo) +{ + return bo->scanout && !sna->kgem.has_wt; +} + +inline static bool can_switch_to_blt(struct sna *sna, + struct kgem_bo *bo, + unsigned flags) +{ + return false; +} + +inline static bool can_switch_to_render(struct sna *sna, + struct kgem_bo *bo) +{ + if (sna->kgem.ring == KGEM_RENDER) + return true; + + if (NO_RING_SWITCH) + return false; + + if (!sna->kgem.has_semaphores) + return false; + + if (bo && !RQ_IS_BLT(bo->rq) && !is_uncached(sna, bo)) + return true; + + return !kgem_ring_is_idle(&sna->kgem, KGEM_RENDER); +} + +static inline bool untiled_tlb_miss(struct kgem_bo *bo) +{ + if (kgem_bo_is_render(bo)) + return false; + + return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096; +} + +static int prefer_blt_bo(struct sna *sna, struct kgem_bo *bo) +{ + if (bo->rq) + return RQ_IS_BLT(bo->rq); + + if (sna->flags & SNA_POWERSAVE) + return true; + + return bo->tiling == I915_TILING_NONE || is_uncached(sna, bo); +} + +inline static bool force_blt_ring(struct sna *sna) +{ + if (sna->flags & SNA_POWERSAVE) + return true; + + if (sna->kgem.mode == KGEM_RENDER) + return false; + + if (sna->render_state.gt < 2) + return true; + + return false; +} + +inline static bool prefer_blt_ring(struct sna *sna, + struct kgem_bo *bo, + unsigned flags) +{ + assert(!force_blt_ring(sna)); + assert(!kgem_bo_is_render(bo)); + + return can_switch_to_blt(sna, bo, flags); +} + +inline static bool prefer_render_ring(struct sna *sna, + struct kgem_bo *bo) +{ + if (sna->flags & SNA_POWERSAVE) + return false; + + if (sna->render_state.gt < 2) + return false; + + return can_switch_to_render(sna, bo); +} + +inline static bool +prefer_blt_composite(struct sna *sna, struct sna_composite_op *tmp) +{ + return false; + +} + +static inline bool prefer_blt_fill(struct sna *sna, + struct kgem_bo *bo, + unsigned flags) +{ + return false; +} + +void gen6_render_context_switch(struct kgem *kgem, int new_mode); +void gen6_render_retire(struct kgem *kgem); + +#endif /* GEN6_COMMON_H */ diff --git a/contrib/sdk/sources/Intel-2D/sna/gen6_render.c b/contrib/sdk/sources/Intel-2D/sna/gen6_render.c index 8818017eac..b06238f5cc 100644 --- a/contrib/sdk/sources/Intel-2D/sna/gen6_render.c +++ b/contrib/sdk/sources/Intel-2D/sna/gen6_render.c @@ -39,6 +39,8 @@ #include "brw/brw.h" #include "gen6_render.h" +#include "gen6_common.h" +#include "gen4_common.h" #include "gen4_source.h" #include "gen4_vertex.h" @@ -74,6 +76,7 @@ struct gt_info { int max_vs_entries; int max_gs_entries; } urb; + int gt; }; static const struct gt_info gt1_info = { @@ -82,6 +85,7 @@ static const struct gt_info gt1_info = { .max_gs_threads = 21, .max_wm_threads = 40, .urb = { 32, 256, 256 }, + .gt = 1, }; static const struct gt_info gt2_info = { @@ -90,6 +94,7 @@ static const struct gt_info gt2_info = { .max_gs_threads = 60, .max_wm_threads = 80, .urb = { 64, 256, 256 }, + .gt = 2, }; static const uint32_t ps_kernel_packed[][4] = { @@ -872,21 +877,22 @@ gen6_emit_state(struct sna *sna, const struct sna_composite_op *op, uint16_t wm_binding_table) { - bool need_stall = wm_binding_table & 1; + bool need_flush, need_stall; assert(op->dst.bo->exec); - if (gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags))) - need_stall = false; + need_flush = + gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)) && + wm_binding_table & 1; gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags)); gen6_emit_sf(sna, GEN6_VERTEX(op->u.gen6.flags) >> 2); gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags), GEN6_VERTEX(op->u.gen6.flags) >> 2); gen6_emit_vertex_elements(sna, op); - need_stall |= gen6_emit_binding_table(sna, wm_binding_table & ~1); + need_stall = gen6_emit_binding_table(sna, wm_binding_table & ~1); if (gen6_emit_drawing_rectangle(sna, op)) need_stall = false; - if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { + if (need_flush || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { gen6_emit_flush(sna); kgem_clear_dirty(&sna->kgem); assert(op->dst.bo->exec); @@ -1317,16 +1323,10 @@ gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op) { assert (sna->render.vertex_offset == 0); if (op->floats_per_vertex != sna->render_state.gen6.floats_per_vertex) { - if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect) - gen4_vertex_finish(sna); - - DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n", + DBG(("aligning vertex: was %d, now %d floats per vertex\n", sna->render_state.gen6.floats_per_vertex, - op->floats_per_vertex, - sna->render.vertex_index, - (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex)); - sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; - sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex; + op->floats_per_vertex)); + gen4_vertex_align(sna, op); sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex; } assert((sna->render.vertex_used % op->floats_per_vertex) == 0); @@ -1657,8 +1657,8 @@ gen6_render_video(struct sna *sna, _kgem_set_mode(&sna->kgem, KGEM_RENDER); } - gen6_emit_video_state(sna, &tmp); gen6_align_vertex(sna, &tmp); + gen6_emit_video_state(sna, &tmp); /* Set up the offset for translating from the given region (in screen * coordinates) to the backing pixmap. @@ -1853,9 +1853,9 @@ gen6_composite_set_target(struct sna *sna, } else sna_render_picture_extents(dst, &box); -// op->dst.bo = sna_drawable_use_bo (dst->pDrawable, -// PREFER_GPU | FORCE_GPU | RENDER_GPU, -// &box, &op->damage); + op->dst.bo = sna_drawable_use_bo(dst->pDrawable, + PREFER_GPU | FORCE_GPU | RENDER_GPU, + &box, &op->damage); if (op->dst.bo == NULL) return false; @@ -1925,7 +1925,13 @@ gen6_render_composite(struct sna *sna, return true; if (gen6_composite_fallback(sna, src, mask, dst)) - return false; + return (mask == NULL && + sna_blt_composite(sna, op, + src, dst, + src_x, src_y, + dst_x, dst_y, + width, height, + tmp, true)); if (need_tiling(sna, width, height)) return sna_tiling_composite(op, src, mask, dst, @@ -2051,8 +2057,8 @@ gen6_render_composite(struct sna *sna, _kgem_set_mode(&sna->kgem, KGEM_RENDER); } + gen6_align_vertex(sna, tmp); gen6_emit_composite_state(sna, tmp); - gen6_align_vertex(sna, tmp); return true; cleanup_mask: @@ -2284,8 +2290,8 @@ gen6_render_composite_spans(struct sna *sna, _kgem_set_mode(&sna->kgem, KGEM_RENDER); } - gen6_emit_composite_state(sna, &tmp->base); gen6_align_vertex(sna, &tmp->base); + gen6_emit_composite_state(sna, &tmp->base); return true; cleanup_src: @@ -2351,10 +2357,16 @@ static inline bool prefer_blt_copy(struct sna *sna, untiled_tlb_miss(dst_bo)) return true; + if (force_blt_ring(sna)) + return true; + if (kgem_bo_is_render(dst_bo) || kgem_bo_is_render(src_bo)) return false; + if (prefer_render_ring(sna, dst_bo)) + return false; + if (!prefer_blt_ring(sna, dst_bo, flags)) return false; @@ -2553,13 +2565,17 @@ fallback_blt: if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) { DBG(("%s: too large for a single operation\n", __FUNCTION__)); - goto fallback_tiled_src; + if (tmp.src.bo != src_bo) + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + if (tmp.redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp.dst.bo); + goto fallback_blt; } _kgem_set_mode(&sna->kgem, KGEM_RENDER); } - gen6_emit_copy_state(sna, &tmp); gen6_align_vertex(sna, &tmp); + gen6_emit_copy_state(sna, &tmp); do { int16_t *v; @@ -2596,9 +2612,6 @@ fallback_blt: kgem_bo_destroy(&sna->kgem, tmp.src.bo); return true; -fallback_tiled_src: - if (tmp.src.bo != src_bo) - kgem_bo_destroy(&sna->kgem, tmp.src.bo); fallback_tiled_dst: if (tmp.redirect.real_bo) kgem_bo_destroy(&sna->kgem, tmp.dst.bo); @@ -2720,8 +2733,8 @@ fallback: _kgem_set_mode(&sna->kgem, KGEM_RENDER); } - gen6_emit_copy_state(sna, &op->base); gen6_align_vertex(sna, &op->base); + gen6_emit_copy_state(sna, &op->base); op->blt = gen6_render_copy_blt; op->done = gen6_render_copy_done; @@ -2760,24 +2773,6 @@ gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) gen6_emit_state(sna, op, offset | dirty); } -static inline bool prefer_blt_fill(struct sna *sna, - struct kgem_bo *bo) -{ - if (PREFER_RENDER) - return PREFER_RENDER < 0; - - if (kgem_bo_is_render(bo)) - return false; - - if (untiled_tlb_miss(bo)) - return true; - - if (!prefer_blt_ring(sna, bo, 0)) - return false; - - return prefer_blt_bo(sna, bo); -} - static bool gen6_render_fill_boxes(struct sna *sna, CARD8 op, @@ -2799,7 +2794,8 @@ gen6_render_fill_boxes(struct sna *sna, return false; } - if (prefer_blt_fill(sna, dst_bo) || !gen6_check_dst_format(format)) { + if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) || + !gen6_check_dst_format(format)) { uint8_t alu = GXinvalid; if (op <= PictOpSrc) { @@ -2874,13 +2870,14 @@ gen6_render_fill_boxes(struct sna *sna, assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); + kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { kgem_submit(&sna->kgem); assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); } - gen6_emit_fill_state(sna, &tmp); gen6_align_vertex(sna, &tmp); + gen6_emit_fill_state(sna, &tmp); do { int n_this_time; @@ -3009,12 +3006,12 @@ gen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op) static bool gen6_render_fill(struct sna *sna, uint8_t alu, PixmapPtr dst, struct kgem_bo *dst_bo, - uint32_t color, + uint32_t color, unsigned flags, struct sna_fill_op *op) { DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color)); - if (prefer_blt_fill(sna, dst_bo) && + if (prefer_blt_fill(sna, dst_bo, flags) && sna_blt_fill(sna, alu, dst_bo, dst->drawable.bitsPerPixel, color, @@ -3053,13 +3050,14 @@ gen6_render_fill(struct sna *sna, uint8_t alu, assert(GEN6_SAMPLER(op->base.u.gen6.flags) == FILL_SAMPLER); assert(GEN6_VERTEX(op->base.u.gen6.flags) == FILL_VERTEX); + kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { kgem_submit(&sna->kgem); assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); } - gen6_emit_fill_state(sna, &op->base); gen6_align_vertex(sna, &op->base); + gen6_emit_fill_state(sna, &op->base); op->blt = gen6_render_op_fill_blt; op->box = gen6_render_op_fill_box; @@ -3097,7 +3095,7 @@ gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, int16_t *v; /* Prefer to use the BLT if already engaged */ - if (prefer_blt_fill(sna, bo) && + if (prefer_blt_fill(sna, bo, FILL_BOXES) && gen6_render_fill_one_try_blt(sna, dst, bo, color, x1, y1, x2, y2, alu)) return true; @@ -3133,6 +3131,7 @@ gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); + kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); if (!kgem_check_bo(&sna->kgem, bo, NULL)) { kgem_submit(&sna->kgem); if (!kgem_check_bo(&sna->kgem, bo, NULL)) { @@ -3141,8 +3140,8 @@ gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, } } - gen6_emit_fill_state(sna, &tmp); gen6_align_vertex(sna, &tmp); + gen6_emit_fill_state(sna, &tmp); gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state); @@ -3219,6 +3218,7 @@ gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER); assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX); + kgem_set_mode(&sna->kgem, KGEM_RENDER, bo); if (!kgem_check_bo(&sna->kgem, bo, NULL)) { kgem_submit(&sna->kgem); if (!kgem_check_bo(&sna->kgem, bo, NULL)) { @@ -3227,8 +3227,8 @@ gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) } } - gen6_emit_fill_state(sna, &tmp); gen6_align_vertex(sna, &tmp); + gen6_emit_fill_state(sna, &tmp); gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state); @@ -3251,60 +3251,6 @@ gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo) } #endif -static void gen6_render_flush(struct sna *sna) -{ - gen4_vertex_close(sna); - - assert(sna->render.vb_id == 0); - assert(sna->render.vertex_offset == 0); -} - -static void -gen6_render_context_switch(struct kgem *kgem, - int new_mode) -{ - if (kgem->nbatch) { - DBG(("%s: from %d to %d\n", __FUNCTION__, kgem->mode, new_mode)); - _kgem_submit(kgem); - } - - kgem->ring = new_mode; -} - -static void -gen6_render_retire(struct kgem *kgem) -{ - struct sna *sna; - - if (kgem->ring && (kgem->has_semaphores || !kgem->need_retire)) - kgem->ring = kgem->mode; - - sna = container_of(kgem, struct sna, kgem); - if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) { - DBG(("%s: resetting idle vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); - sna->render.vertex_used = 0; - sna->render.vertex_index = 0; - } -} - -static void -gen6_render_expire(struct kgem *kgem) -{ - struct sna *sna; - - sna = container_of(kgem, struct sna, kgem); - if (sna->render.vbo && !sna->render.vertex_used) { - DBG(("%s: discarding vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); - kgem_bo_destroy(kgem, sna->render.vbo); - assert(!sna->render.active); - sna->render.vbo = NULL; - sna->render.vertices = sna->render.vertex_data; - sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); - sna->render.vertex_used = 0; - sna->render.vertex_index = 0; - } -} - static void gen6_render_reset(struct sna *sna) { sna->render_state.gen6.needs_invariant = true; @@ -3320,6 +3266,11 @@ static void gen6_render_reset(struct sna *sna) sna->render_state.gen6.drawrect_limit = -1; sna->render_state.gen6.surface_table = -1; + if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { + DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); + discard_vbo(sna); + } + sna->render.vertex_offset = 0; sna->render.nvertex_reloc = 0; sna->render.vb_id = 0; @@ -3330,17 +3281,17 @@ static void gen6_render_fini(struct sna *sna) kgem_bo_destroy(&sna->kgem, sna->render_state.gen6.general_bo); } -static bool is_gt2(struct sna *sna) +static bool is_gt2(struct sna *sna, int devid) { - return sna->PciInfo->device_id & 0x30; + return devid & 0x30; } -static bool is_mobile(struct sna *sna) +static bool is_mobile(struct sna *sna, int devid) { - return (sna->PciInfo->device_id & 0xf) == 0x6; + return (devid & 0xf) == 0x6; } -static bool gen6_render_setup(struct sna *sna) +static bool gen6_render_setup(struct sna *sna, int devid) { struct gen6_render_state *state = &sna->render_state.gen6; struct sna_static_stream general; @@ -3348,8 +3299,9 @@ static bool gen6_render_setup(struct sna *sna) int i, j, k, l, m; state->info = >1_info; - if (is_gt2(sna)) + if (is_gt2(sna, devid)) state->info = >2_info; /* XXX requires GT_MODE WiZ disabled */ + state->gt = state->info->gt; sna_static_stream_init(&general); @@ -3420,12 +3372,14 @@ static bool gen6_render_setup(struct sna *sna) const char *gen6_render_init(struct sna *sna, const char *backend) { - if (!gen6_render_setup(sna)) + int devid = intel_get_device_id(sna); + + if (!gen6_render_setup(sna, devid)) return backend; sna->kgem.context_switch = gen6_render_context_switch; sna->kgem.retire = gen6_render_retire; - sna->kgem.expire = gen6_render_expire; + sna->kgem.expire = gen4_render_expire; #if 0 #if !NO_COMPOSITE @@ -3436,7 +3390,7 @@ const char *gen6_render_init(struct sna *sna, const char *backend) #if !NO_COMPOSITE_SPANS sna->render.check_composite_spans = gen6_check_composite_spans; sna->render.composite_spans = gen6_render_composite_spans; - if (is_mobile(sna)) + if (is_mobile(sna, devid)) sna->render.prefer_gpu |= PREFER_GPU_SPANS; #endif sna->render.video = gen6_render_video; @@ -3465,7 +3419,7 @@ const char *gen6_render_init(struct sna *sna, const char *backend) sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT; sna->render.blit_tex = gen6_blit_tex; - sna->render.flush = gen6_render_flush; + sna->render.flush = gen4_render_flush; sna->render.reset = gen6_render_reset; sna->render.fini = gen6_render_fini; @@ -3568,7 +3522,7 @@ gen6_blit_tex(struct sna *sna, // tmp->box = gen6_render_composite_box; tmp->done = gen6_render_composite_done; - kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); + kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); if (!kgem_check_bo(&sna->kgem, tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) { @@ -3576,8 +3530,8 @@ gen6_blit_tex(struct sna *sna, _kgem_set_mode(&sna->kgem, KGEM_RENDER); } - gen6_emit_composite_state(sna, tmp); gen6_align_vertex(sna, tmp); - return true; + gen6_emit_composite_state(sna, tmp); + return true; } diff --git a/contrib/sdk/sources/Intel-2D/sna/gen7_render.c b/contrib/sdk/sources/Intel-2D/sna/gen7_render.c index 68a5e44f9f..92331a3be0 100644 --- a/contrib/sdk/sources/Intel-2D/sna/gen7_render.c +++ b/contrib/sdk/sources/Intel-2D/sna/gen7_render.c @@ -42,10 +42,14 @@ #include "brw/brw.h" #include "gen7_render.h" +#include "gen4_common.h" #include "gen4_source.h" #include "gen4_vertex.h" +#include "gen6_common.h" +#define ALWAYS_INVALIDATE 0 #define ALWAYS_FLUSH 0 +#define ALWAYS_STALL 0 #define NO_COMPOSITE 0 #define NO_COMPOSITE_SPANS 0 @@ -1022,33 +1026,51 @@ gen7_emit_state(struct sna *sna, const struct sna_composite_op *op, uint16_t wm_binding_table) { + bool need_invalidate; + bool need_flush; bool need_stall; assert(op->dst.bo->exec); + need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo); + if (ALWAYS_INVALIDATE) + need_invalidate = true; + + need_flush = + sna->render_state.gen7.emit_flush && + wm_binding_table & GEN7_READS_DST(op->u.gen7.flags); + if (ALWAYS_FLUSH) + need_flush = true; + + wm_binding_table &= ~1; + + need_stall = sna->render_state.gen7.surface_table != wm_binding_table; + need_stall &= gen7_emit_drawing_rectangle(sna, op); + if (ALWAYS_STALL) + need_stall = true; + + if (need_invalidate) { + gen7_emit_pipe_invalidate(sna); + kgem_clear_dirty(&sna->kgem); + assert(op->dst.bo->exec); + kgem_bo_mark_dirty(op->dst.bo); + + need_flush = false; + need_stall = false; + } + if (need_flush) { + gen7_emit_pipe_flush(sna, need_stall); + need_stall = false; + } + if (need_stall) + gen7_emit_pipe_stall(sna); + gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags)); gen7_emit_sampler(sna, GEN7_SAMPLER(op->u.gen7.flags)); gen7_emit_sf(sna, GEN7_VERTEX(op->u.gen7.flags) >> 2); gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags)); gen7_emit_vertex_elements(sna, op); - - need_stall = gen7_emit_binding_table(sna, wm_binding_table); - need_stall &= gen7_emit_drawing_rectangle(sna, op); - - if (ALWAYS_FLUSH || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) { - gen7_emit_pipe_invalidate(sna); - kgem_clear_dirty(&sna->kgem); - assert(op->dst.bo->exec); - kgem_bo_mark_dirty(op->dst.bo); - sna->render_state.gen7.emit_flush = false; - need_stall = false; - } - if (sna->render_state.gen7.emit_flush) { - gen7_emit_pipe_flush(sna, need_stall); - need_stall = false; - } - if (need_stall) - gen7_emit_pipe_stall(sna); + gen7_emit_binding_table(sna, wm_binding_table); sna->render_state.gen7.emit_flush = GEN7_READS_DST(op->u.gen7.flags); } @@ -1404,12 +1426,14 @@ static void gen7_emit_composite_state(struct sna *sna, const struct sna_composite_op *op) { uint32_t *binding_table; - uint16_t offset; + uint16_t offset, dirty; gen7_get_batch(sna, op); binding_table = gen7_composite_get_binding_table(sna, &offset); + dirty = kgem_bo_is_dirty(op->dst.bo); + binding_table[0] = gen7_bind_bo(sna, op->dst.bo, op->dst.width, op->dst.height, @@ -1438,23 +1462,16 @@ static void gen7_emit_composite_state(struct sna *sna, offset = sna->render_state.gen7.surface_table; } - gen7_emit_state(sna, op, offset); + gen7_emit_state(sna, op, offset | dirty); } static void gen7_align_vertex(struct sna *sna, const struct sna_composite_op *op) { if (op->floats_per_vertex != sna->render_state.gen7.floats_per_vertex) { - if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect) - gen4_vertex_finish(sna); - - DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n", - sna->render_state.gen7.floats_per_vertex, - op->floats_per_vertex, - sna->render.vertex_index, - (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex)); - sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; - sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex; + DBG(("aligning vertex: was %d, now %d floats per vertex\n", + sna->render_state.gen7.floats_per_vertex, op->floats_per_vertex)); + gen4_vertex_align(sna, op); sna->render_state.gen7.floats_per_vertex = op->floats_per_vertex; } } @@ -1548,7 +1565,7 @@ static void gen7_emit_video_state(struct sna *sna, int src_height[6]; int src_pitch[6]; uint32_t *binding_table; - uint16_t offset; + uint16_t offset, dirty; int n_src, n; gen7_get_batch(sna, op); @@ -1586,6 +1603,8 @@ static void gen7_emit_video_state(struct sna *sna, binding_table = gen7_composite_get_binding_table(sna, &offset); + dirty = kgem_bo_is_dirty(op->dst.bo); + binding_table[0] = gen7_bind_bo(sna, op->dst.bo, op->dst.width, op->dst.height, @@ -1602,7 +1621,7 @@ static void gen7_emit_video_state(struct sna *sna, src_surf_format); } - gen7_emit_state(sna, op, offset); + gen7_emit_state(sna, op, offset | dirty); } static bool @@ -1669,12 +1688,14 @@ gen7_render_video(struct sna *sna, kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo); if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) { kgem_submit(&sna->kgem); - assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)); + if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) + return false; + _kgem_set_mode(&sna->kgem, KGEM_RENDER); } - gen7_emit_video_state(sna, &tmp); gen7_align_vertex(sna, &tmp); + gen7_emit_video_state(sna, &tmp); /* Set up the offset for translating from the given region (in screen * coordinates) to the backing pixmap. @@ -1874,7 +1895,8 @@ gen7_render_fill_boxes(struct sna *sna, return false; } - if (prefer_blt_fill(sna, dst_bo) || !gen7_check_dst_format(format)) { + if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) || + !gen7_check_dst_format(format)) { uint8_t alu = GXinvalid; if (op <= PictOpSrc) { @@ -1949,11 +1971,17 @@ gen7_render_fill_boxes(struct sna *sna, kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { kgem_submit(&sna->kgem); - assert(kgem_check_bo(&sna->kgem, dst_bo, NULL)); + if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) { + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + if (tmp.redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp.dst.bo); + return false; + } + _kgem_set_mode(&sna->kgem, KGEM_RENDER); } - gen7_emit_fill_state(sna, &tmp); gen7_align_vertex(sna, &tmp); + gen7_emit_fill_state(sna, &tmp); do { int n_this_time; @@ -1987,60 +2015,6 @@ gen7_render_fill_boxes(struct sna *sna, } #endif -static void gen7_render_flush(struct sna *sna) -{ - gen4_vertex_close(sna); - - assert(sna->render.vb_id == 0); - assert(sna->render.vertex_offset == 0); -} - -static void -gen7_render_context_switch(struct kgem *kgem, - int new_mode) -{ - if (kgem->nbatch) { - DBG(("%s: switch rings %d -> %d\n", - __FUNCTION__, kgem->mode, new_mode)); - _kgem_submit(kgem); - } - - kgem->ring = new_mode; -} - -static void -gen7_render_retire(struct kgem *kgem) -{ - struct sna *sna; - - if (kgem->ring && (kgem->has_semaphores || !kgem->need_retire)) - kgem->ring = kgem->mode; - - sna = container_of(kgem, struct sna, kgem); - if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) { - DBG(("%s: resetting idle vbo\n", __FUNCTION__)); - sna->render.vertex_used = 0; - sna->render.vertex_index = 0; - } -} - -static void -gen7_render_expire(struct kgem *kgem) -{ - struct sna *sna; - - sna = container_of(kgem, struct sna, kgem); - if (sna->render.vbo && !sna->render.vertex_used) { - DBG(("%s: discarding vbo\n", __FUNCTION__)); - kgem_bo_destroy(kgem, sna->render.vbo); - sna->render.vbo = NULL; - sna->render.vertices = sna->render.vertex_data; - sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); - sna->render.vertex_used = 0; - sna->render.vertex_index = 0; - } -} - static void gen7_render_reset(struct sna *sna) { sna->render_state.gen7.emit_flush = false; @@ -2056,6 +2030,11 @@ static void gen7_render_reset(struct sna *sna) sna->render_state.gen7.drawrect_limit = -1; sna->render_state.gen7.surface_table = -1; + if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) { + DBG(("%s: discarding unmappable vbo\n", __FUNCTION__)); + discard_vbo(sna); + } + sna->render.vertex_offset = 0; sna->render.nvertex_reloc = 0; sna->render.vb_id = 0; @@ -2066,23 +2045,23 @@ static void gen7_render_fini(struct sna *sna) kgem_bo_destroy(&sna->kgem, sna->render_state.gen7.general_bo); } -static bool is_gt3(struct sna *sna) +static bool is_gt3(struct sna *sna, int devid) { assert(sna->kgem.gen == 075); - return sna->PciInfo->device_id & 0x20; + return devid & 0x20; } -static bool is_gt2(struct sna *sna) +static bool is_gt2(struct sna *sna, int devid) { - return sna->PciInfo->device_id & (is_hsw(sna)? 0x30 : 0x20); + return devid & (is_hsw(sna)? 0x30 : 0x20); } -static bool is_mobile(struct sna *sna) +static bool is_mobile(struct sna *sna, int devid) { - return (sna->PciInfo->device_id & 0xf) == 0x6; + return (devid & 0xf) == 0x6; } -static bool gen7_render_setup(struct sna *sna) +static bool gen7_render_setup(struct sna *sna, int devid) { struct gen7_render_state *state = &sna->render_state.gen7; struct sna_static_stream general; @@ -2091,19 +2070,19 @@ static bool gen7_render_setup(struct sna *sna) if (is_ivb(sna)) { state->info = &ivb_gt_info; - if (sna->PciInfo->device_id & 0xf) { + if (devid & 0xf) { state->info = &ivb_gt1_info; - if (is_gt2(sna)) + if (is_gt2(sna, devid)) state->info = &ivb_gt2_info; /* XXX requires GT_MODE WiZ disabled */ } } else if (is_byt(sna)) { state->info = &byt_gt_info; } else if (is_hsw(sna)) { state->info = &hsw_gt_info; - if (sna->PciInfo->device_id & 0xf) { - if (is_gt3(sna)) + if (devid & 0xf) { + if (is_gt3(sna, devid)) state->info = &hsw_gt3_info; - else if (is_gt2(sna)) + else if (is_gt2(sna, devid)) state->info = &hsw_gt2_info; else state->info = &hsw_gt1_info; @@ -2111,6 +2090,8 @@ static bool gen7_render_setup(struct sna *sna) } else return false; + state->gt = state->info->gt; + sna_static_stream_init(&general); /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer @@ -2175,12 +2156,14 @@ static bool gen7_render_setup(struct sna *sna) const char *gen7_render_init(struct sna *sna, const char *backend) { - if (!gen7_render_setup(sna)) + int devid = intel_get_device_id(sna); + + if (!gen7_render_setup(sna, devid)) return backend; - sna->kgem.context_switch = gen7_render_context_switch; - sna->kgem.retire = gen7_render_retire; - sna->kgem.expire = gen7_render_expire; + sna->kgem.context_switch = gen6_render_context_switch; + sna->kgem.retire = gen6_render_retire; + sna->kgem.expire = gen4_render_expire; #if 0 #if !NO_COMPOSITE @@ -2190,7 +2173,7 @@ const char *gen7_render_init(struct sna *sna, const char *backend) #if !NO_COMPOSITE_SPANS sna->render.check_composite_spans = gen7_check_composite_spans; sna->render.composite_spans = gen7_render_composite_spans; - if (is_mobile(sna) || is_gt2(sna) || is_byt(sna)) + if (is_mobile(sna, devid) || is_gt2(sna, devid) || is_byt(sna)) sna->render.prefer_gpu |= PREFER_GPU_SPANS; #endif sna->render.video = gen7_render_video; @@ -2219,7 +2202,7 @@ const char *gen7_render_init(struct sna *sna, const char *backend) sna->render.blit_tex = gen7_blit_tex; sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT; - sna->render.flush = gen7_render_flush; + sna->render.flush = gen4_render_flush; sna->render.reset = gen7_render_reset; sna->render.fini = gen7_render_fini; @@ -2312,7 +2295,7 @@ gen7_blit_tex(struct sna *sna, // tmp->box = gen7_render_composite_box; tmp->done = gen7_render_composite_done; - kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo); + kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo); if (!kgem_check_bo(&sna->kgem, tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) { @@ -2320,7 +2303,7 @@ gen7_blit_tex(struct sna *sna, _kgem_set_mode(&sna->kgem, KGEM_RENDER); } - gen7_emit_composite_state(sna, tmp); gen7_align_vertex(sna, tmp); + gen7_emit_composite_state(sna, tmp); return true; } diff --git a/contrib/sdk/sources/Intel-2D/sna/kgem.c b/contrib/sdk/sources/Intel-2D/sna/kgem.c index 5943a24d7c..84d5e8cfb8 100644 --- a/contrib/sdk/sources/Intel-2D/sna/kgem.c +++ b/contrib/sdk/sources/Intel-2D/sna/kgem.c @@ -47,7 +47,6 @@ #include "sna_cpuid.h" - static struct kgem_bo * search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); @@ -60,7 +59,7 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); #define DBG_NO_CACHE_LEVEL 0 #define DBG_NO_CPU 0 #define DBG_NO_CREATE2 1 -#define DBG_NO_USERPTR 0 +#define DBG_NO_USERPTR 1 #define DBG_NO_UNSYNCHRONIZED_USERPTR 0 #define DBG_NO_LLC 0 #define DBG_NO_SEMAPHORES 0 @@ -72,7 +71,7 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); #define DBG_NO_SECURE_BATCHES 0 #define DBG_NO_PINNED_BATCHES 0 #define DBG_NO_FAST_RELOC 0 -#define DBG_NO_HANDLE_LUT 1 +#define DBG_NO_HANDLE_LUT 0 #define DBG_NO_WT 0 #define DBG_DUMP 0 @@ -105,10 +104,8 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); #define MAX_CPU_VMA_CACHE INT16_MAX #define MAP_PRESERVE_TIME 10 -#define MAKE_CPU_MAP(ptr) ((void*)((uintptr_t)(ptr) | 1)) -#define MAKE_USER_MAP(ptr) ((void*)((uintptr_t)(ptr) | 3)) -#define IS_USER_MAP(ptr) ((uintptr_t)(ptr) & 2) -#define __MAP_TYPE(ptr) ((uintptr_t)(ptr) & 3) +#define MAKE_USER_MAP(ptr) ((void*)((uintptr_t)(ptr) | 1)) +#define IS_USER_MAP(ptr) ((uintptr_t)(ptr) & 1) #define MAKE_REQUEST(rq, ring) ((struct kgem_request *)((uintptr_t)(rq) | (ring))) @@ -158,7 +155,12 @@ struct kgem_buffer { uint32_t used; uint32_t need_io : 1; uint32_t write : 2; - uint32_t mmapped : 1; + uint32_t mmapped : 2; +}; +enum { + MMAPPED_NONE, + MMAPPED_GTT, + MMAPPED_CPU }; static struct kgem_bo *__kgem_freed_bo; @@ -252,9 +254,10 @@ static bool gem_set_caching(int fd, uint32_t handle, int caching) return drmIoctl(fd, LOCAL_IOCTL_I915_GEM_SET_CACHING, &arg) == 0; } - - - +static uint32_t gem_userptr(int fd, void *ptr, int size, int read_only) +{ + return 0; +} static bool __kgem_throttle_retire(struct kgem *kgem, unsigned flags) { @@ -289,24 +292,23 @@ static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo) bo->handle, bytes(bo))); assert(bo->proxy == NULL); assert(!bo->snoop); - assert(kgem_bo_can_map(kgem, bo)); + assert(num_pages(bo) <= kgem->aperture_mappable / 4); retry_gtt: VG_CLEAR(mmap_arg); mmap_arg.handle = bo->handle; if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg)) { + int err = 0; (void)__kgem_throttle_retire(kgem, 0); if (kgem_expire_cache(kgem)) goto retry_gtt; - if (kgem->need_expire) { - kgem_cleanup_cache(kgem); + if (kgem_cleanup_cache(kgem)) goto retry_gtt; - } - printf("%s: failed to retrieve GTT offset for handle=%d\n", - __FUNCTION__, bo->handle); + ErrorF("%s: failed to retrieve GTT offset for handle=%d: %d\n", + __FUNCTION__, bo->handle, err); return NULL; } @@ -321,7 +323,7 @@ retry_mmap: return ptr; } -static int __gem_write(int fd, uint32_t handle, +static int gem_write(int fd, uint32_t handle, int offset, int length, const void *src) { @@ -338,7 +340,7 @@ static int __gem_write(int fd, uint32_t handle, return drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite); } -static int gem_write(int fd, uint32_t handle, +static int gem_write__cachealigned(int fd, uint32_t handle, int offset, int length, const void *src) { @@ -631,7 +633,7 @@ total_ram_size(void) static unsigned cpu_cache_size__cpuid4(void) { - /* Deterministic Cache Parmaeters (Function 04h)": + /* Deterministic Cache Parameters (Function 04h)": * When EAX is initialized to a value of 4, the CPUID instruction * returns deterministic cache information in the EAX, EBX, ECX * and EDX registers. This function requires ECX be initialized @@ -755,7 +757,7 @@ static bool is_hw_supported(struct kgem *kgem, * hw acceleration. */ - if (kgem->gen == 060 && dev->revision < 8) { + if (kgem->gen == 060 && dev && dev->revision < 8) { /* pre-production SNB with dysfunctional BLT */ return false; } @@ -881,7 +883,7 @@ static bool test_has_pinned_batches(struct kgem *kgem) static bool kgem_init_pinned_batches(struct kgem *kgem) { - int count[2] = { 2, 2 }; + int count[2] = { 4, 4 }; int size[2] = { 1, 2 }; int n, i; @@ -911,6 +913,7 @@ static bool kgem_init_pinned_batches(struct kgem *kgem) pin.alignment = 0; if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_PIN, &pin)) { gem_close(kgem->fd, pin.handle); + free(bo); goto err; } bo->presumed_offset = pin.offset; @@ -1028,7 +1031,6 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) kgem->has_no_reloc)); kgem->has_handle_lut = test_has_handle_lut(kgem); - kgem->has_handle_lut = 0; DBG(("%s: has handle-lut? %d\n", __FUNCTION__, kgem->has_handle_lut)); @@ -1042,6 +1044,10 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) DBG(("%s: can blt to cpu? %d\n", __FUNCTION__, kgem->can_blt_cpu)); + kgem->can_render_y = gen != 021 && (gen >> 3) != 4; + DBG(("%s: can render to Y-tiled surfaces? %d\n", __FUNCTION__, + kgem->can_render_y)); + kgem->has_secure_batches = test_has_secure_batches(kgem); DBG(("%s: can use privileged batchbuffers? %d\n", __FUNCTION__, kgem->has_secure_batches)); @@ -1115,6 +1121,8 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) kgem->aperture_low, kgem->aperture_low / (1024*1024), kgem->aperture_high, kgem->aperture_high / (1024*1024))); + kgem->aperture_mappable = 256 * 1024 * 1024; + if (dev != NULL) kgem->aperture_mappable = agp_aperture_size(dev, gen); if (kgem->aperture_mappable == 0 || kgem->aperture_mappable > aperture.aper_size) @@ -1149,6 +1157,14 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) if (kgem->max_gpu_size > totalram / 4) kgem->max_gpu_size = totalram / 4; + if (kgem->aperture_high > totalram / 2) { + kgem->aperture_high = totalram / 2; + kgem->aperture_low = kgem->aperture_high / 4; + DBG(("%s: reduced aperture watermaks to fit into ram; low=%d [%d], high=%d [%d]\n", __FUNCTION__, + kgem->aperture_low, kgem->aperture_low / (1024*1024), + kgem->aperture_high, kgem->aperture_high / (1024*1024))); + } + kgem->max_cpu_size = kgem->max_object_size; half_gpu_max = kgem->max_gpu_size / 2; @@ -1197,8 +1213,10 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen) kgem->max_upload_tile_size, kgem->max_copy_tile_size)); /* Convert the aperture thresholds to pages */ + kgem->aperture_mappable /= PAGE_SIZE; kgem->aperture_low /= PAGE_SIZE; kgem->aperture_high /= PAGE_SIZE; + kgem->aperture_total /= PAGE_SIZE; kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2; if ((int)kgem->fence_max < 0) @@ -1233,7 +1251,7 @@ inline static uint32_t kgem_pitch_alignment(struct kgem *kgem, unsigned flags) return kgem->min_alignment; } -void kgem_get_tile_size(struct kgem *kgem, int tiling, +void kgem_get_tile_size(struct kgem *kgem, int tiling, int pitch, int *tile_width, int *tile_height, int *tile_size) { if (kgem->gen <= 030) { @@ -1270,6 +1288,10 @@ void kgem_get_tile_size(struct kgem *kgem, int tiling, *tile_size = 4096; break; } + + /* Force offset alignment to tile-row */ + if (tiling && kgem->gen < 033) + *tile_width = pitch; } uint32_t kgem_surface_size(struct kgem *kgem, @@ -1400,10 +1422,15 @@ kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo) static void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo) { + assert(bo->refcnt); + assert(bo->proxy == NULL); + bo->exec = kgem_add_handle(kgem, bo); bo->rq = MAKE_REQUEST(kgem->next_request, kgem->ring); list_move_tail(&bo->request, &kgem->next_request->buffers); + if (bo->io && !list_is_empty(&bo->list)) + list_move(&bo->list, &kgem->batch_buffers); /* XXX is it worth working around gcc here? */ kgem->flush |= bo->flush; @@ -1456,31 +1483,11 @@ static void kgem_bo_binding_free(struct kgem *kgem, struct kgem_bo *bo) b = bo->binding.next; while (b) { struct kgem_bo_binding *next = b->next; - free (b); + free(b); b = next; } } -static void kgem_bo_release_map(struct kgem *kgem, struct kgem_bo *bo) -{ - int type = IS_CPU_MAP(bo->map); - - assert(!IS_USER_MAP(bo->map)); - - DBG(("%s: releasing %s vma for handle=%d, count=%d\n", - __FUNCTION__, type ? "CPU" : "GTT", - bo->handle, kgem->vma[type].count)); - - VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map), bytes(bo))); - user_free(MAP(bo->map)); - bo->map = NULL; - - if (!list_is_empty(&bo->vma)) { - list_del(&bo->vma); - kgem->vma[type].count--; - } -} - static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo) { DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); @@ -1496,21 +1503,31 @@ static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo) kgem_bo_binding_free(kgem, bo); - if (IS_USER_MAP(bo->map)) { + if (IS_USER_MAP(bo->map__cpu)) { assert(bo->rq == NULL); assert(!__kgem_busy(kgem, bo->handle)); - assert(MAP(bo->map) != bo || bo->io || bo->flush); + assert(MAP(bo->map__cpu) != bo || bo->io || bo->flush); if (!(bo->io || bo->flush)) { DBG(("%s: freeing snooped base\n", __FUNCTION__)); - assert(bo != MAP(bo->map)); - free(MAP(bo->map)); + assert(bo != MAP(bo->map__cpu)); + free(MAP(bo->map__cpu)); } - bo->map = NULL; + bo->map__cpu = NULL; } - if (bo->map) - kgem_bo_release_map(kgem, bo); - assert(list_is_empty(&bo->vma)); - assert(bo->map == NULL); + + DBG(("%s: releasing %p:%p vma for handle=%d, count=%d\n", + __FUNCTION__, bo->map__gtt, bo->map__cpu, + bo->handle, list_is_empty(&bo->vma) ? 0 : kgem->vma[bo->map__gtt == NULL].count)); + + if (!list_is_empty(&bo->vma)) { + _list_del(&bo->vma); + kgem->vma[bo->map__gtt == NULL].count--; + } + +// if (bo->map__gtt) +// munmap(MAP(bo->map__gtt), bytes(bo)); +// if (bo->map__cpu) +// munmap(MAP(bo->map__cpu), bytes(bo)); _list_del(&bo->list); _list_del(&bo->request); @@ -1546,22 +1563,28 @@ inline static void kgem_bo_move_to_inactive(struct kgem *kgem, kgem->need_expire = true; if (bucket(bo) >= NUM_CACHE_BUCKETS) { - list_move(&bo->list, &kgem->large_inactive); - return; + if (bo->map__gtt) { +// munmap(MAP(bo->map__gtt), bytes(bo)); + bo->map__gtt = NULL; } + list_move(&bo->list, &kgem->large_inactive); + } else { assert(bo->flush == false); list_move(&bo->list, &kgem->inactive[bucket(bo)]); - if (bo->map) { - int type = IS_CPU_MAP(bo->map); - if (bucket(bo) >= NUM_CACHE_BUCKETS || - (!type && !__kgem_bo_is_mappable(kgem, bo))) { -// munmap(MAP(bo->map), bytes(bo)); - bo->map = NULL; + if (bo->map__gtt) { + if (!kgem_bo_can_map(kgem, bo)) { +// munmap(MAP(bo->map__gtt), bytes(bo)); + bo->map__gtt = NULL; + } + if (bo->map__gtt) { + list_add(&bo->vma, &kgem->vma[0].inactive[bucket(bo)]); + kgem->vma[0].count++; + } } - if (bo->map) { - list_add(&bo->vma, &kgem->vma[type].inactive[bucket(bo)]); - kgem->vma[type].count++; + if (bo->map__cpu && !bo->map__gtt) { + list_add(&bo->vma, &kgem->vma[1].inactive[bucket(bo)]); + kgem->vma[1].count++; } } } @@ -1574,6 +1597,10 @@ static struct kgem_bo *kgem_bo_replace_io(struct kgem_bo *bo) return bo; assert(!bo->snoop); + if (__kgem_freed_bo) { + base = __kgem_freed_bo; + __kgem_freed_bo = *(struct kgem_bo **)base; + } else base = malloc(sizeof(*base)); if (base) { DBG(("%s: transferring io handle=%d to bo\n", @@ -1600,10 +1627,10 @@ inline static void kgem_bo_remove_from_inactive(struct kgem *kgem, list_del(&bo->list); assert(bo->rq == NULL); assert(bo->exec == NULL); - if (bo->map) { - assert(!list_is_empty(&bo->vma)); + if (!list_is_empty(&bo->vma)) { + assert(bo->map__gtt || bo->map__cpu); list_del(&bo->vma); - kgem->vma[IS_CPU_MAP(bo->map)].count--; + kgem->vma[bo->map__gtt == NULL].count--; } } @@ -1614,8 +1641,10 @@ inline static void kgem_bo_remove_from_active(struct kgem *kgem, list_del(&bo->list); assert(bo->rq != NULL); - if (bo->rq == (void *)kgem) + if (RQ(bo->rq) == (void *)kgem) { + assert(bo->exec == NULL); list_del(&bo->request); + } assert(list_is_empty(&bo->vma)); } @@ -1740,6 +1769,7 @@ void kgem_bo_undo(struct kgem *kgem, struct kgem_bo *bo) if (kgem->nexec != 1 || bo->exec == NULL) return; + assert(bo); DBG(("%s: only handle in batch, discarding last operations for handle=%d\n", __FUNCTION__, bo->handle)); @@ -1750,6 +1780,10 @@ void kgem_bo_undo(struct kgem *kgem, struct kgem_bo *bo) bo->refcnt++; kgem_reset(kgem); bo->refcnt--; + + assert(kgem->nreloc == 0); + assert(kgem->nexec == 0); + assert(bo->exec == NULL); } static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) @@ -1777,7 +1811,7 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) kgem_bo_move_to_snoop(kgem, bo); return; } - if (!IS_USER_MAP(bo->map)) + if (!IS_USER_MAP(bo->map__cpu)) bo->flush = false; if (bo->scanout) { @@ -1793,9 +1827,6 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) goto destroy; } - if (!kgem->has_llc && IS_CPU_MAP(bo->map) && bo->domain != DOMAIN_CPU) - kgem_bo_release_map(kgem, bo); - assert(list_is_empty(&bo->vma)); assert(list_is_empty(&bo->list)); assert(bo->flush == false); @@ -1824,7 +1855,7 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) assert(bo->exec == NULL); assert(list_is_empty(&bo->request)); - if (!IS_CPU_MAP(bo->map)) { + if (bo->map__cpu == NULL || bucket(bo) >= NUM_CACHE_BUCKETS) { if (!kgem_bo_set_purgeable(kgem, bo)) goto destroy; @@ -1852,16 +1883,18 @@ static void kgem_bo_unref(struct kgem *kgem, struct kgem_bo *bo) static void kgem_buffer_release(struct kgem *kgem, struct kgem_buffer *bo) { + assert(bo->base.io); while (!list_is_empty(&bo->base.vma)) { struct kgem_bo *cached; cached = list_first_entry(&bo->base.vma, struct kgem_bo, vma); assert(cached->proxy == &bo->base); + assert(cached != &bo->base); list_del(&cached->vma); - assert(*(struct kgem_bo **)cached->map == cached); - *(struct kgem_bo **)cached->map = NULL; - cached->map = NULL; + assert(*(struct kgem_bo **)cached->map__gtt == cached); + *(struct kgem_bo **)cached->map__gtt = NULL; + cached->map__gtt = NULL; kgem_bo_destroy(kgem, cached); } @@ -1877,6 +1910,10 @@ static bool kgem_retire__buffers(struct kgem *kgem) struct kgem_buffer, base.list); + DBG(("%s: handle=%d, busy? %d [%d]\n", + __FUNCTION__, bo->base.handle, bo->base.rq != NULL, bo->base.exec != NULL)); + + assert(bo->base.exec == NULL || RQ(bo->base.rq) == kgem->next_request); if (bo->base.rq) break; @@ -1897,7 +1934,7 @@ static bool kgem_retire__flushing(struct kgem *kgem) bool retired = false; list_for_each_entry_safe(bo, next, &kgem->flushing, request) { - assert(bo->rq == (void *)kgem); + assert(RQ(bo->rq) == (void *)kgem); assert(bo->exec == NULL); if (__kgem_busy(kgem, bo->handle)) @@ -1960,7 +1997,8 @@ static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq) DBG(("%s: moving %d to flushing\n", __FUNCTION__, bo->handle)); list_add(&bo->request, &kgem->flushing); - bo->rq = (void *)kgem; + bo->rq = MAKE_REQUEST(kgem, RQ_RING(bo->rq)); + kgem->need_retire = true; continue; } @@ -1985,6 +2023,7 @@ static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq) } assert(rq->bo->rq == NULL); + assert(rq->bo->exec == NULL); assert(list_is_empty(&rq->bo->request)); if (--rq->bo->refcnt == 0) { @@ -2057,7 +2096,7 @@ bool kgem_retire(struct kgem *kgem) { bool retired = false; - DBG(("%s\n", __FUNCTION__)); + DBG(("%s, need_retire?=%d\n", __FUNCTION__, kgem->need_retire)); kgem->need_retire = false; @@ -2077,6 +2116,7 @@ bool __kgem_ring_is_idle(struct kgem *kgem, int ring) { struct kgem_request *rq; + assert(ring < ARRAY_SIZE(kgem->requests)); assert(!list_is_empty(&kgem->requests[ring])); rq = list_last_entry(&kgem->requests[ring], @@ -2091,10 +2131,24 @@ bool __kgem_ring_is_idle(struct kgem *kgem, int ring) __FUNCTION__, ring, rq->bo->handle)); kgem_retire__requests_ring(kgem, ring); + kgem_retire__buffers(kgem); + assert(list_is_empty(&kgem->requests[ring])); return true; } +#ifndef NDEBUG +static void kgem_commit__check_buffers(struct kgem *kgem) +{ + struct kgem_buffer *bo; + + list_for_each_entry(bo, &kgem->active_buffers, base.list) + assert(bo->base.exec == NULL); +} +#else +#define kgem_commit__check_buffers(kgem) +#endif + static void kgem_commit(struct kgem *kgem) { struct kgem_request *rq = kgem->next_request; @@ -2118,6 +2172,7 @@ static void kgem_commit(struct kgem *kgem) if (!bo->refcnt && !bo->reusable) { assert(!bo->snoop); + assert(!bo->proxy); kgem_bo_free(kgem, bo); continue; } @@ -2128,7 +2183,6 @@ static void kgem_commit(struct kgem *kgem) if (bo->proxy) { /* proxies are not used for domain tracking */ - bo->exec = NULL; __kgem_bo_clear_busy(bo); } @@ -2152,7 +2206,8 @@ static void kgem_commit(struct kgem *kgem) kgem_retire(kgem); assert(list_is_empty(&rq->buffers)); - assert(rq->bo->map == NULL); + assert(rq->bo->map__gtt == NULL); + assert(rq->bo->map__cpu == NULL); gem_close(kgem->fd, rq->bo->handle); kgem_cleanup_cache(kgem); } else { @@ -2161,6 +2216,8 @@ static void kgem_commit(struct kgem *kgem) } kgem->next_request = NULL; + + kgem_commit__check_buffers(kgem); } static void kgem_close_list(struct kgem *kgem, struct list *head) @@ -2182,17 +2239,18 @@ static void kgem_finish_buffers(struct kgem *kgem) struct kgem_buffer *bo, *next; list_for_each_entry_safe(bo, next, &kgem->batch_buffers, base.list) { - DBG(("%s: buffer handle=%d, used=%d, exec?=%d, write=%d, mmapped=%s\n", + DBG(("%s: buffer handle=%d, used=%d, exec?=%d, write=%d, mmapped=%s, refcnt=%d\n", __FUNCTION__, bo->base.handle, bo->used, bo->base.exec!=NULL, - bo->write, bo->mmapped ? IS_CPU_MAP(bo->base.map) ? "cpu" : "gtt" : "no")); + bo->write, bo->mmapped == MMAPPED_CPU ? "cpu" : bo->mmapped == MMAPPED_GTT ? "gtt" : "no", + bo->base.refcnt)); assert(next->base.list.prev == &bo->base.list); assert(bo->base.io); assert(bo->base.refcnt >= 1); - if (!bo->base.exec) { - DBG(("%s: skipping unattached handle=%d, used=%d\n", - __FUNCTION__, bo->base.handle, bo->used)); + if (bo->base.refcnt > 1 && !bo->base.exec) { + DBG(("%s: skipping unattached handle=%d, used=%d, refcnt=%d\n", + __FUNCTION__, bo->base.handle, bo->used, bo->base.refcnt)); continue; } @@ -2202,27 +2260,28 @@ static void kgem_finish_buffers(struct kgem *kgem) } if (bo->mmapped) { - int used; + uint32_t used; assert(!bo->need_io); used = ALIGN(bo->used, PAGE_SIZE); if (!DBG_NO_UPLOAD_ACTIVE && used + PAGE_SIZE <= bytes(&bo->base) && - (kgem->has_llc || !IS_CPU_MAP(bo->base.map) || bo->base.snoop)) { - DBG(("%s: retaining upload buffer (%d/%d)\n", - __FUNCTION__, bo->used, bytes(&bo->base))); + (kgem->has_llc || bo->mmapped == MMAPPED_GTT || bo->base.snoop)) { + DBG(("%s: retaining upload buffer (%d/%d): used=%d, refcnt=%d\n", + __FUNCTION__, bo->used, bytes(&bo->base), used, bo->base.refcnt)); bo->used = used; list_move(&bo->base.list, &kgem->active_buffers); + kgem->need_retire = true; continue; } DBG(("%s: discarding mmapped buffer, used=%d, map type=%d\n", - __FUNCTION__, bo->used, (int)__MAP_TYPE(bo->base.map))); + __FUNCTION__, bo->used, bo->mmapped)); goto decouple; } - if (!bo->used) { + if (!bo->used || !bo->base.exec) { /* Unless we replace the handle in the execbuffer, * then this bo will become active. So decouple it * from the buffer list and track it in the normal @@ -2301,7 +2360,7 @@ static void kgem_finish_buffers(struct kgem *kgem) bo->base.handle, shrink->handle)); assert(bo->used <= bytes(shrink)); - if (gem_write(kgem->fd, shrink->handle, + if (gem_write__cachealigned(kgem->fd, shrink->handle, 0, bo->used, bo->mem) == 0) { shrink->target_handle = kgem->has_handle_lut ? bo->base.target_handle : shrink->handle; @@ -2340,7 +2399,7 @@ static void kgem_finish_buffers(struct kgem *kgem) __FUNCTION__, bo->base.handle, bo->used, bytes(&bo->base))); ASSERT_IDLE(kgem, bo->base.handle); assert(bo->used <= bytes(&bo->base)); - gem_write(kgem->fd, bo->base.handle, + gem_write__cachealigned(kgem->fd, bo->base.handle, 0, bo->used, bo->mem); bo->need_io = 0; @@ -2390,33 +2449,58 @@ static int kgem_batch_write(struct kgem *kgem, uint32_t handle, uint32_t size) ASSERT_IDLE(kgem, handle); +retry: /* If there is no surface data, just upload the batch */ - if (kgem->surface == kgem->batch_size) - return gem_write(kgem->fd, handle, + if (kgem->surface == kgem->batch_size) { + if (gem_write__cachealigned(kgem->fd, handle, 0, sizeof(uint32_t)*kgem->nbatch, - kgem->batch); + kgem->batch) == 0) + return 0; + + goto expire; + } /* Are the batch pages conjoint with the surface pages? */ if (kgem->surface < kgem->nbatch + PAGE_SIZE/sizeof(uint32_t)) { assert(size == PAGE_ALIGN(kgem->batch_size*sizeof(uint32_t))); - return gem_write(kgem->fd, handle, + if (gem_write__cachealigned(kgem->fd, handle, 0, kgem->batch_size*sizeof(uint32_t), - kgem->batch); + kgem->batch) == 0) + return 0; + + goto expire; } /* Disjoint surface/batch, upload separately */ - ret = gem_write(kgem->fd, handle, + if (gem_write__cachealigned(kgem->fd, handle, 0, sizeof(uint32_t)*kgem->nbatch, - kgem->batch); - if (ret) - return ret; + kgem->batch)) + goto expire; ret = PAGE_ALIGN(sizeof(uint32_t) * kgem->batch_size); ret -= sizeof(uint32_t) * kgem->surface; assert(size-ret >= kgem->nbatch*sizeof(uint32_t)); - return __gem_write(kgem->fd, handle, + if (gem_write(kgem->fd, handle, size - ret, (kgem->batch_size - kgem->surface)*sizeof(uint32_t), - kgem->batch + kgem->surface); + kgem->batch + kgem->surface)) + goto expire; + + return 0; + +expire: + ret = errno; + assert(ret != EINVAL); + + (void)__kgem_throttle_retire(kgem, 0); + if (kgem_expire_cache(kgem)) + goto retry; + + if (kgem_cleanup_cache(kgem)) + goto retry; + + ErrorF("%s: failed to write batch (handle=%d): %d\n", + __FUNCTION__, handle, ret); + return ret; } void kgem_reset(struct kgem *kgem) @@ -2442,6 +2526,7 @@ void kgem_reset(struct kgem *kgem) assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE); list_add(&bo->request, &kgem->flushing); bo->rq = (void *)kgem; + kgem->need_retire = true; } else __kgem_bo_clear_busy(bo); @@ -2474,6 +2559,7 @@ void kgem_reset(struct kgem *kgem) kgem->nreloc__self = 0; kgem->aperture = 0; kgem->aperture_fenced = 0; + kgem->aperture_max_fence = 0; kgem->nbatch = 0; kgem->surface = kgem->batch_size; kgem->mode = KGEM_NONE; @@ -2599,10 +2685,10 @@ void _kgem_submit(struct kgem *kgem) batch_end = kgem_end_batch(kgem); kgem_sna_flush(kgem); - DBG(("batch[%d/%d, flags=%x]: %d %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d\n", + DBG(("batch[%d/%d, flags=%x]: %d %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d [fenced=%d]\n", kgem->mode, kgem->ring, kgem->batch_flags, batch_end, kgem->nbatch, kgem->surface, kgem->batch_size, - kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture)); + kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced)); assert(kgem->nbatch <= kgem->batch_size); assert(kgem->nbatch <= kgem->surface); @@ -2660,8 +2746,8 @@ void _kgem_submit(struct kgem *kgem) { int fd = open("/tmp1/1/batchbuffer.bin", O_CREAT|O_WRONLY|O_BINARY); if (fd != -1) { - write(fd, kgem->batch, size); - close(fd); + ret = write(fd, kgem->batch, batch_end*sizeof(uint32_t)); + fd = close(fd); } else printf("SNA: failed to write batchbuffer\n"); asm volatile("int3"); @@ -2694,9 +2780,9 @@ void _kgem_submit(struct kgem *kgem) #if 0 ret = errno; - ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d: errno=%d\n", + ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d, fenced=%d, high=%d,%d: errno=%d\n", kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface, - kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, errno); + kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced, kgem->aperture_high, kgem->aperture_total, errno); for (i = 0; i < kgem->nexec; i++) { struct kgem_bo *bo, *found = NULL; @@ -2764,7 +2850,7 @@ void kgem_throttle(struct kgem *kgem) } } -void kgem_purge_cache(struct kgem *kgem) +static void kgem_purge_cache(struct kgem *kgem) { struct kgem_bo *bo, *next; int i; @@ -2892,7 +2978,7 @@ bool kgem_expire_cache(struct kgem *kgem) break; } - if (bo->map && bo->delta + MAP_PRESERVE_TIME > expire) { + if (bo->map__cpu && bo->delta + MAP_PRESERVE_TIME > expire) { idle = false; list_move_tail(&bo->list, &preserve); } else { @@ -2932,7 +3018,7 @@ bool kgem_expire_cache(struct kgem *kgem) (void)size; } -void kgem_cleanup_cache(struct kgem *kgem) +bool kgem_cleanup_cache(struct kgem *kgem) { unsigned int i; int n; @@ -2962,6 +3048,9 @@ void kgem_cleanup_cache(struct kgem *kgem) kgem_retire(kgem); kgem_cleanup(kgem); + if (!kgem->need_expire) + return false; + for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) { while (!list_is_empty(&kgem->inactive[i])) kgem_bo_free(kgem, @@ -2984,6 +3073,7 @@ void kgem_cleanup_cache(struct kgem *kgem) kgem->need_purge = false; kgem->need_expire = false; + return true; } static struct kgem_bo * @@ -3028,8 +3118,10 @@ retry_large: goto discard; list_del(&bo->list); - if (bo->rq == (void *)kgem) + if (RQ(bo->rq) == (void *)kgem) { + assert(bo->exec == NULL); list_del(&bo->request); + } bo->delta = 0; assert_tiling(kgem, bo); @@ -3083,7 +3175,7 @@ discard: __FUNCTION__, for_cpu ? "cpu" : "gtt")); cache = &kgem->vma[for_cpu].inactive[cache_bucket(num_pages)]; list_for_each_entry(bo, cache, vma) { - assert(IS_CPU_MAP(bo->map) == for_cpu); + assert(for_cpu ? bo->map__cpu : bo->map__gtt); assert(bucket(bo) == cache_bucket(num_pages)); assert(bo->proxy == NULL); assert(bo->rq == NULL); @@ -3107,6 +3199,8 @@ discard: continue; kgem_bo_remove_from_inactive(kgem, bo); + assert(list_is_empty(&bo->vma)); + assert(list_is_empty(&bo->list)); bo->tiling = I915_TILING_NONE; bo->pitch = 0; @@ -3163,10 +3257,10 @@ discard: bo->pitch = 0; } - if (bo->map) { + if (bo->map__gtt || bo->map__cpu) { if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { int for_cpu = !!(flags & CREATE_CPU_MAP); - if (IS_CPU_MAP(bo->map) != for_cpu) { + if (for_cpu ? bo->map__cpu : bo->map__gtt){ if (first != NULL) break; @@ -3181,6 +3275,9 @@ discard: continue; } } else { + if (flags & CREATE_GTT_MAP && !kgem_bo_can_map(kgem, bo)) + continue; + if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { if (first != NULL) break; @@ -3202,6 +3299,7 @@ discard: __FUNCTION__, bo->handle, num_pages(bo), use_active ? "active" : "inactive")); assert(list_is_empty(&bo->list)); + assert(list_is_empty(&bo->vma)); assert(use_active || bo->domain != DOMAIN_GPU); assert(!bo->needs_flush || use_active); assert_tiling(kgem, bo); @@ -3223,6 +3321,7 @@ discard: __FUNCTION__, first->handle, num_pages(first), use_active ? "active" : "inactive")); assert(list_is_empty(&first->list)); + assert(list_is_empty(&first->vma)); assert(use_active || first->domain != DOMAIN_GPU); assert(!first->needs_flush || use_active); ASSERT_MAYBE_IDLE(kgem, first->handle, !use_active); @@ -3282,11 +3381,11 @@ inline int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo) assert(kgem->gen < 040); if (kgem->gen < 030) - size = 512 * 1024; + size = 512 * 1024 / PAGE_SIZE; else - size = 1024 * 1024; - while (size < bytes(bo)) - size *= 2; + size = 1024 * 1024 / PAGE_SIZE; + while (size < num_pages(bo)) + size <<= 1; return size; } @@ -3308,7 +3407,6 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, if (tiling < 0) exact = true, tiling = -tiling; - DBG(("%s(%dx%d, bpp=%d, tiling=%d, exact=%d, inactive=%d, cpu-mapping=%d, gtt-mapping=%d, scanout?=%d, prime?=%d, temp?=%d)\n", __FUNCTION__, width, height, bpp, tiling, exact, !!(flags & CREATE_INACTIVE), @@ -3324,61 +3422,6 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, size /= PAGE_SIZE; bucket = cache_bucket(size); - if (flags & CREATE_SCANOUT) { - struct kgem_bo *last = NULL; - - list_for_each_entry_reverse(bo, &kgem->scanout, list) { - assert(bo->scanout); - assert(bo->delta); - assert(!bo->flush); - assert_tiling(kgem, bo); - - if (size > num_pages(bo) || num_pages(bo) > 2*size) - continue; - - if (bo->tiling != tiling || - (tiling != I915_TILING_NONE && bo->pitch != pitch)) { - if (!gem_set_tiling(kgem->fd, bo->handle, - tiling, pitch)) - continue; - - bo->tiling = tiling; - bo->pitch = pitch; - } - - if (flags & CREATE_INACTIVE && bo->rq) { - last = bo; - continue; - } - - list_del(&bo->list); - - bo->unique_id = kgem_get_unique_id(kgem); - DBG((" 1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n", - bo->pitch, bo->tiling, bo->handle, bo->unique_id)); - assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); - assert_tiling(kgem, bo); - bo->refcnt = 1; - return bo; - } - - if (last) { - list_del(&last->list); - - last->unique_id = kgem_get_unique_id(kgem); - DBG((" 1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n", - last->pitch, last->tiling, last->handle, last->unique_id)); - assert(last->pitch*kgem_aligned_height(kgem, height, last->tiling) <= kgem_bo_size(last)); - assert_tiling(kgem, last); - last->refcnt = 1; - return last; - } - - bo = NULL; //__kgem_bo_create_as_display(kgem, size, tiling, pitch); - if (bo) - return bo; - } - if (bucket >= NUM_CACHE_BUCKETS) { DBG(("%s: large bo num pages=%d, bucket=%d\n", __FUNCTION__, size, bucket)); @@ -3428,7 +3471,6 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); assert_tiling(kgem, bo); bo->refcnt = 1; - bo->flush = true; return bo; } @@ -3488,9 +3530,9 @@ large_inactive: assert(bucket(bo) == bucket); assert(bo->refcnt == 0); assert(!bo->scanout); - assert(bo->map); - assert(IS_CPU_MAP(bo->map) == for_cpu); + assert(for_cpu ? bo->map__cpu : bo->map__gtt); assert(bo->rq == NULL); + assert(bo->exec == NULL); assert(list_is_empty(&bo->request)); assert(bo->flush == false); assert_tiling(kgem, bo); @@ -3520,6 +3562,8 @@ large_inactive: bo->domain = DOMAIN_NONE; kgem_bo_remove_from_inactive(kgem, bo); + assert(list_is_empty(&bo->list)); + assert(list_is_empty(&bo->vma)); DBG((" from inactive vma: pitch=%d, tiling=%d: handle=%d, id=%d\n", bo->pitch, bo->tiling, bo->handle, bo->unique_id)); @@ -3740,9 +3784,6 @@ search_inactive: if (!gem_set_tiling(kgem->fd, bo->handle, tiling, pitch)) continue; - - if (bo->map) - kgem_bo_release_map(kgem, bo); } if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) { @@ -3751,6 +3792,8 @@ search_inactive: } kgem_bo_remove_from_inactive(kgem, bo); + assert(list_is_empty(&bo->list)); + assert(list_is_empty(&bo->vma)); bo->pitch = pitch; bo->tiling = tiling; @@ -3799,12 +3842,6 @@ create: return NULL; } - if (bucket >= NUM_CACHE_BUCKETS) { - DBG(("%s: marking large bo for automatic flushing\n", - __FUNCTION__)); - bo->flush = true; - } - bo->unique_id = kgem_get_unique_id(kgem); if (tiling == I915_TILING_NONE || gem_set_tiling(kgem->fd, handle, tiling, pitch)) { @@ -3935,16 +3972,21 @@ void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) __FUNCTION__, bo->handle, bo->proxy != NULL)); if (bo->proxy) { + assert(!bo->reusable); + kgem_bo_binding_free(kgem, bo); + + assert(list_is_empty(&bo->list)); _list_del(&bo->vma); _list_del(&bo->request); - if (bo->io && bo->exec == NULL) - _kgem_bo_delete_buffer(kgem, bo); - kgem_bo_unref(kgem, bo->proxy); - kgem_bo_binding_free(kgem, bo); - free(bo); - return; - } + if (bo->io && bo->domain == DOMAIN_CPU) + _kgem_bo_delete_buffer(kgem, bo); + + kgem_bo_unref(kgem, bo->proxy); + + *(struct kgem_bo **)bo = __kgem_freed_bo; + __kgem_freed_bo = bo; + } else __kgem_bo_destroy(kgem, bo); } @@ -3989,6 +4031,58 @@ inline static bool needs_semaphore(struct kgem *kgem, struct kgem_bo *bo) return kgem->nreloc && bo->rq && RQ_RING(bo->rq) != kgem->ring; } +static bool aperture_check(struct kgem *kgem, unsigned num_pages) +{ + if (kgem->aperture) { + struct drm_i915_gem_get_aperture aperture; + + VG_CLEAR(aperture); + aperture.aper_available_size = kgem->aperture_high; + aperture.aper_available_size *= PAGE_SIZE; + (void)drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); + + DBG(("%s: aperture required %ld bytes, available %ld bytes\n", + __FUNCTION__, + (long)num_pages * PAGE_SIZE, + (long)aperture.aper_available_size)); + + /* Leave some space in case of alignment issues */ + aperture.aper_available_size -= 1024 * 1024; + aperture.aper_available_size -= kgem->aperture_mappable * PAGE_SIZE / 2; + if (kgem->gen < 033) + aperture.aper_available_size -= kgem->aperture_max_fence * PAGE_SIZE; + if (!kgem->has_llc) + aperture.aper_available_size -= 2 * kgem->nexec * PAGE_SIZE; + + DBG(("%s: num_pages=%d, estimated max usable=%ld\n", + __FUNCTION__, num_pages, (long)(aperture.aper_available_size/PAGE_SIZE))); + + if (num_pages <= aperture.aper_available_size / PAGE_SIZE) + return true; + } + + return false; +} + +static inline bool kgem_flush(struct kgem *kgem, bool flush) +{ + if (unlikely(kgem->wedged)) + return false; + + if (kgem->nreloc == 0) + return true; + + if (container_of(kgem, struct sna, kgem)->flags & SNA_POWERSAVE) + return true; + + if (kgem->flush == flush && kgem->aperture < kgem->aperture_low) + return true; + + DBG(("%s: opportunistic flushing? flush=%d,%d, aperture=%d/%d, idle?=%d\n", + __FUNCTION__, kgem->flush, flush, kgem->aperture, kgem->aperture_low, kgem_ring_is_idle(kgem, kgem->ring))); + return !kgem_ring_is_idle(kgem, kgem->ring); +} + bool kgem_check_bo(struct kgem *kgem, ...) { va_list ap; @@ -3996,6 +4090,7 @@ bool kgem_check_bo(struct kgem *kgem, ...) int num_exec = 0; int num_pages = 0; bool flush = false; + bool busy = true; va_start(ap, kgem); while ((bo = va_arg(ap, struct kgem_bo *))) { @@ -4004,13 +4099,16 @@ bool kgem_check_bo(struct kgem *kgem, ...) if (bo->exec) continue; - if (needs_semaphore(kgem, bo)) + if (needs_semaphore(kgem, bo)) { + DBG(("%s: flushing for required semaphore\n", __FUNCTION__)); return false; + } num_pages += num_pages(bo); num_exec++; flush |= bo->flush; + busy &= bo->rq != NULL; } va_end(ap); @@ -4020,43 +4118,129 @@ bool kgem_check_bo(struct kgem *kgem, ...) if (!num_pages) return true; - if (kgem_flush(kgem, flush)) - return false; - - if (kgem->aperture > kgem->aperture_low && - kgem_ring_is_idle(kgem, kgem->ring)) { - DBG(("%s: current aperture usage (%d) is greater than low water mark (%d)\n", - __FUNCTION__, kgem->aperture, kgem->aperture_low)); - return false; - } - - if (num_pages + kgem->aperture > kgem->aperture_high) { - DBG(("%s: final aperture usage (%d) is greater than high water mark (%d)\n", - __FUNCTION__, num_pages + kgem->aperture, kgem->aperture_high)); - return false; - } - if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem)) { DBG(("%s: out of exec slots (%d + %d / %d)\n", __FUNCTION__, kgem->nexec, num_exec, KGEM_EXEC_SIZE(kgem))); return false; } - return true; + if (num_pages + kgem->aperture > kgem->aperture_high) { + DBG(("%s: final aperture usage (%d) is greater than high water mark (%d)\n", + __FUNCTION__, num_pages + kgem->aperture, kgem->aperture_high)); + if (!aperture_check(kgem, num_pages + kgem->aperture)) + return false; + } + + if (busy) + return true; + + return kgem_flush(kgem, flush); } +#if 0 +bool kgem_check_bo_fenced(struct kgem *kgem, struct kgem_bo *bo) +{ + assert(bo->refcnt); + while (bo->proxy) + bo = bo->proxy; + assert(bo->refcnt); + if (bo->exec) { + if (kgem->gen < 040 && + bo->tiling != I915_TILING_NONE && + (bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) { + uint32_t size; + assert(bo->tiling == I915_TILING_X); + if (kgem->nfence >= kgem->fence_max) + return false; + if (kgem->aperture_fenced) { + size = 3*kgem->aperture_fenced; + if (kgem->aperture_total == kgem->aperture_mappable) + size += kgem->aperture; + if (size > kgem->aperture_mappable && + kgem_ring_is_idle(kgem, kgem->ring)) { + DBG(("%s: opportunistic fence flush\n", __FUNCTION__)); + return false; + } + } + size = kgem_bo_fenced_size(kgem, bo); + if (size > kgem->aperture_max_fence) + kgem->aperture_max_fence = size; + size += kgem->aperture_fenced; + if (kgem->gen < 033) + size += kgem->aperture_max_fence; + if (kgem->aperture_total == kgem->aperture_mappable) + size += kgem->aperture; + if (size > kgem->aperture_mappable) { + DBG(("%s: estimated fence space required [%d] exceed aperture [%d]\n", + __FUNCTION__, size, kgem->aperture_mappable)); + return false; + } + } + return true; + } + if (kgem->nexec >= KGEM_EXEC_SIZE(kgem) - 1) + return false; + if (needs_semaphore(kgem, bo)) { + DBG(("%s: flushing for required semaphore\n", __FUNCTION__)); + return false; + } + assert_tiling(kgem, bo); + if (kgem->gen < 040 && bo->tiling != I915_TILING_NONE) { + uint32_t size; + assert(bo->tiling == I915_TILING_X); + if (kgem->nfence >= kgem->fence_max) + return false; + if (kgem->aperture_fenced) { + size = 3*kgem->aperture_fenced; + if (kgem->aperture_total == kgem->aperture_mappable) + size += kgem->aperture; + if (size > kgem->aperture_mappable && + kgem_ring_is_idle(kgem, kgem->ring)) { + DBG(("%s: opportunistic fence flush\n", __FUNCTION__)); + return false; + } + } + + size = kgem_bo_fenced_size(kgem, bo); + if (size > kgem->aperture_max_fence) + kgem->aperture_max_fence = size; + size += kgem->aperture_fenced; + if (kgem->gen < 033) + size += kgem->aperture_max_fence; + if (kgem->aperture_total == kgem->aperture_mappable) + size += kgem->aperture; + if (size > kgem->aperture_mappable) { + DBG(("%s: estimated fence space required [%d] exceed aperture [%d]\n", + __FUNCTION__, size, kgem->aperture_mappable)); + return false; + } + } + + if (kgem->aperture + kgem->aperture_fenced + num_pages(bo) > kgem->aperture_high) { + DBG(("%s: final aperture usage (%d) is greater than high water mark (%d)\n", + __FUNCTION__, num_pages(bo) + kgem->aperture, kgem->aperture_high)); + if (!aperture_check(kgem, num_pages(bo) + kgem->aperture + kgem->aperture_fenced)) + return false; + } + + if (bo->rq) + return true; + + return kgem_flush(kgem, bo->flush); +} +#endif @@ -4085,23 +4269,14 @@ uint32_t kgem_add_reloc(struct kgem *kgem, DBG(("%s: handle=%d, pos=%d, delta=%d, domains=%08x\n", __FUNCTION__, bo ? bo->handle : 0, pos, delta, read_write_domain)); + assert(kgem->gen < 0100); assert((read_write_domain & 0x7fff) == 0 || bo != NULL); - if( bo != NULL && bo->handle == -2) - { - if (bo->exec == NULL) - kgem_add_bo(kgem, bo); - - if (read_write_domain & 0x7fff && !bo->gpu_dirty) { - __kgem_bo_mark_dirty(bo); - } - return 0; - }; - index = kgem->nreloc++; assert(index < ARRAY_SIZE(kgem->reloc)); kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]); if (bo) { + assert(kgem->mode != KGEM_NONE); assert(bo->refcnt); while (bo->proxy) { DBG(("%s: adding proxy [delta=%d] for handle=%d\n", @@ -4115,6 +4290,7 @@ uint32_t kgem_add_reloc(struct kgem *kgem, bo->rq = MAKE_REQUEST(kgem->next_request, kgem->ring); bo->exec = &_kgem_dummy_exec; + bo->domain = DOMAIN_GPU; } if (read_write_domain & 0x7fff && !bo->gpu_dirty) @@ -4133,6 +4309,7 @@ uint32_t kgem_add_reloc(struct kgem *kgem, if (kgem->gen < 040 && read_write_domain & KGEM_RELOC_FENCED) { if (bo->tiling && (bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) { + assert(bo->tiling == I915_TILING_X); assert(kgem->nfence < kgem->fence_max); kgem->aperture_fenced += kgem_bo_fenced_size(kgem, bo); @@ -4164,6 +4341,77 @@ uint32_t kgem_add_reloc(struct kgem *kgem, return delta; } +uint64_t kgem_add_reloc64(struct kgem *kgem, + uint32_t pos, + struct kgem_bo *bo, + uint32_t read_write_domain, + uint64_t delta) +{ + int index; + + DBG(("%s: handle=%d, pos=%d, delta=%ld, domains=%08x\n", + __FUNCTION__, bo ? bo->handle : 0, pos, (long)delta, read_write_domain)); + + assert(kgem->gen >= 0100); + assert((read_write_domain & 0x7fff) == 0 || bo != NULL); + + index = kgem->nreloc++; + assert(index < ARRAY_SIZE(kgem->reloc)); + kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]); + if (bo) { + assert(kgem->mode != KGEM_NONE); + assert(bo->refcnt); + while (bo->proxy) { + DBG(("%s: adding proxy [delta=%ld] for handle=%d\n", + __FUNCTION__, (long)bo->delta, bo->handle)); + delta += bo->delta; + assert(bo->handle == bo->proxy->handle); + /* need to release the cache upon batch submit */ + if (bo->exec == NULL) { + list_move_tail(&bo->request, + &kgem->next_request->buffers); + bo->rq = MAKE_REQUEST(kgem->next_request, + kgem->ring); + bo->exec = &_kgem_dummy_exec; + bo->domain = DOMAIN_GPU; + } + + if (read_write_domain & 0x7fff && !bo->gpu_dirty) + __kgem_bo_mark_dirty(bo); + + bo = bo->proxy; + assert(bo->refcnt); + } + assert(bo->refcnt); + + if (bo->exec == NULL) + kgem_add_bo(kgem, bo); + assert(bo->rq == MAKE_REQUEST(kgem->next_request, kgem->ring)); + assert(RQ_RING(bo->rq) == kgem->ring); + + kgem->reloc[index].delta = delta; + kgem->reloc[index].target_handle = bo->target_handle; + kgem->reloc[index].presumed_offset = bo->presumed_offset; + + if (read_write_domain & 0x7fff && !bo->gpu_dirty) { + assert(!bo->snoop || kgem->can_blt_cpu); + __kgem_bo_mark_dirty(bo); + } + + delta += bo->presumed_offset; + } else { + kgem->reloc[index].delta = delta; + kgem->reloc[index].target_handle = ~0U; + kgem->reloc[index].presumed_offset = 0; + if (kgem->nreloc__self < 256) + kgem->reloc__self[kgem->nreloc__self++] = index; + } + kgem->reloc[index].read_domains = read_write_domain >> 16; + kgem->reloc[index].write_domain = read_write_domain & 0x7fff; + + return delta; +} + static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket) { int i, j; @@ -4186,6 +4434,7 @@ static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket) i = 0; while (kgem->vma[type].count > 0) { struct kgem_bo *bo = NULL; + void **ptr; for (j = 0; bo == NULL && j < ARRAY_SIZE(kgem->vma[type].inactive); @@ -4198,15 +4447,14 @@ static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket) break; DBG(("%s: discarding inactive %s vma cache for %d\n", - __FUNCTION__, - IS_CPU_MAP(bo->map) ? "CPU" : "GTT", bo->handle)); - assert(IS_CPU_MAP(bo->map) == type); - assert(bo->map); + __FUNCTION__, type ? "CPU" : "GTT", bo->handle)); + + ptr = type ? &bo->map__cpu : &bo->map__gtt; assert(bo->rq == NULL); - VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map), bytes(bo))); -// munmap(MAP(bo->map), bytes(bo)); - bo->map = NULL; + VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(*ptr), bytes(bo))); +// munmap(MAP(*ptr), bytes(bo)); + *ptr = NULL; list_del(&bo->vma); kgem->vma[type].count--; @@ -4222,12 +4470,11 @@ void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo) { void *ptr; - DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__, - bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain)); + DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__, + bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain)); assert(bo->proxy == NULL); assert(list_is_empty(&bo->list)); - assert(!IS_USER_MAP(bo->map)); assert_tiling(kgem, bo); if (bo->tiling == I915_TILING_NONE && !bo->scanout && kgem->has_llc) { @@ -4236,12 +4483,9 @@ void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo) return kgem_bo_map__cpu(kgem, bo); } - if (IS_CPU_MAP(bo->map)) - kgem_bo_release_map(kgem, bo); - - ptr = bo->map; + ptr = MAP(bo->map__gtt); if (ptr == NULL) { - assert(kgem_bo_size(bo) <= kgem->aperture_mappable / 2); + assert(num_pages(bo) <= kgem->aperture_mappable / 2); kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); @@ -4254,7 +4498,7 @@ void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo) * issue with compositing managers which need to frequently * flush CPU damage to their GPU bo. */ - bo->map = ptr; + bo->map__gtt = ptr; DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle)); } @@ -4265,12 +4509,11 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo) { void *ptr; - DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__, - bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain)); + DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__, + bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain)); assert(bo->proxy == NULL); assert(list_is_empty(&bo->list)); - assert(!IS_USER_MAP(bo->map)); assert(bo->exec == NULL); assert_tiling(kgem, bo); @@ -4284,12 +4527,9 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo) return ptr; } - if (IS_CPU_MAP(bo->map)) - kgem_bo_release_map(kgem, bo); - - ptr = bo->map; + ptr = MAP(bo->map__gtt); if (ptr == NULL) { - assert(kgem_bo_size(bo) <= kgem->aperture_mappable / 2); + assert(num_pages(bo) <= kgem->aperture_mappable / 2); assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y); kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); @@ -4303,7 +4543,7 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo) * issue with compositing managers which need to frequently * flush CPU damage to their GPU bo. */ - bo->map = ptr; + bo->map__gtt = ptr; DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle)); } @@ -4333,20 +4573,16 @@ void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo) { void *ptr; - DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__, - bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain)); + DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__, + bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain)); assert(bo->exec == NULL); assert(list_is_empty(&bo->list)); - assert(!IS_USER_MAP(bo->map)); assert_tiling(kgem, bo); - if (IS_CPU_MAP(bo->map)) - kgem_bo_release_map(kgem, bo); - - ptr = bo->map; + ptr = MAP(bo->map__gtt); if (ptr == NULL) { - assert(bytes(bo) <= kgem->aperture_mappable / 4); + assert(num_pages(bo) <= kgem->aperture_mappable / 4); kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); @@ -4359,7 +4595,7 @@ void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo) * issue with compositing managers which need to frequently * flush CPU damage to their GPU bo. */ - bo->map = ptr; + bo->map__gtt = ptr; DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle)); } @@ -4368,28 +4604,21 @@ void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo) void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo) { - if (bo->map) - return MAP(bo->map); - - kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo)); - return bo->map = __kgem_bo_map__gtt(kgem, bo); + return kgem_bo_map__async(kgem, bo); } void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo) { struct drm_i915_gem_mmap mmap_arg; - DBG(("%s(handle=%d, size=%d, mapped? %d)\n", - __FUNCTION__, bo->handle, bytes(bo), (int)__MAP_TYPE(bo->map))); + DBG(("%s(handle=%d, size=%d, map=%p:%p)\n", + __FUNCTION__, bo->handle, bytes(bo), bo->map__gtt, bo->map__cpu)); assert(!bo->purged); assert(list_is_empty(&bo->list)); assert(bo->proxy == NULL); - if (IS_CPU_MAP(bo->map)) - return MAP(bo->map); - - if (bo->map) - kgem_bo_release_map(kgem, bo); + if (bo->map__cpu) + return MAP(bo->map__cpu); kgem_trim_vma_cache(kgem, MAP_CPU, bucket(bo)); @@ -4399,58 +4628,14 @@ retry: mmap_arg.offset = 0; mmap_arg.size = bytes(bo); if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg)) { + int err = 0; + if (__kgem_throttle_retire(kgem, 0)) goto retry; - if (kgem->need_expire) { - kgem_cleanup_cache(kgem); + if (kgem_cleanup_cache(kgem)) goto retry; - } - - ErrorF("%s: failed to mmap handle=%d, %d bytes, into CPU domain\n", - __FUNCTION__, bo->handle, bytes(bo)); - return NULL; - } - - VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo))); - - DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle)); - bo->map = MAKE_CPU_MAP(mmap_arg.addr_ptr); - return (void *)(uintptr_t)mmap_arg.addr_ptr; -} - -void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo) -{ - struct drm_i915_gem_mmap mmap_arg; - - DBG(("%s(handle=%d, size=%d, mapped? %d)\n", - __FUNCTION__, bo->handle, bytes(bo), (int)__MAP_TYPE(bo->map))); - assert(bo->refcnt); - assert(!bo->purged); - assert(list_is_empty(&bo->list)); - assert(bo->proxy == NULL); - - if (IS_CPU_MAP(bo->map)) - return MAP(bo->map); - -retry: - VG_CLEAR(mmap_arg); - mmap_arg.handle = bo->handle; - mmap_arg.offset = 0; - mmap_arg.size = bytes(bo); - if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg)) { - int err = errno; - - assert(err != EINVAL); - - if (__kgem_throttle_retire(kgem, 0)) - goto retry; - - if (kgem->need_expire) { - kgem_cleanup_cache(kgem); - goto retry; - } ErrorF("%s: failed to mmap handle=%d, %d bytes, into CPU domain: %d\n", __FUNCTION__, bo->handle, bytes(bo), err); @@ -4458,16 +4643,68 @@ retry: } VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo))); - if (bo->map && bo->domain == DOMAIN_CPU) { - DBG(("%s: discarding GTT vma for %d\n", __FUNCTION__, bo->handle)); - kgem_bo_release_map(kgem, bo); - } - if (bo->map == NULL) { - DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle)); - bo->map = MAKE_CPU_MAP(mmap_arg.addr_ptr); - } - return (void *)(uintptr_t)mmap_arg.addr_ptr; + + DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle)); + return bo->map__cpu = (void *)(uintptr_t)mmap_arg.addr_ptr; } + + +/* +struct kgem_bo *kgem_create_map(struct kgem *kgem, + void *ptr, uint32_t size, + bool read_only) +{ + struct kgem_bo *bo; + uintptr_t first_page, last_page; + uint32_t handle; + + assert(MAP(ptr) == ptr); + + if (!kgem->has_userptr) + return NULL; + + first_page = (uintptr_t)ptr; + last_page = first_page + size + PAGE_SIZE - 1; + + first_page &= ~(PAGE_SIZE-1); + last_page &= ~(PAGE_SIZE-1); + assert(last_page > first_page); + + handle = gem_userptr(kgem->fd, + (void *)first_page, last_page-first_page, + read_only); + if (handle == 0) + return NULL; + + bo = __kgem_bo_alloc(handle, (last_page - first_page) / PAGE_SIZE); + if (bo == NULL) { + gem_close(kgem->fd, handle); + return NULL; + } + + bo->snoop = !kgem->has_llc; + debug_alloc__bo(kgem, bo); + + if (first_page != (uintptr_t)ptr) { + struct kgem_bo *proxy; + + proxy = kgem_create_proxy(kgem, bo, + (uintptr_t)ptr - first_page, size); + kgem_bo_destroy(kgem, bo); + if (proxy == NULL) + return NULL; + + bo = proxy; + } + + bo->map__cpu = MAKE_USER_MAP(ptr); + + DBG(("%s(ptr=%p, size=%d, pages=%d, read_only=%d) => handle=%d (proxy? %d)\n", + __FUNCTION__, ptr, size, NUM_PAGES(size), read_only, handle, bo->proxy != NULL)); + return bo; +} +*/ + void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo) { DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); @@ -4500,6 +4737,72 @@ void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo) } } +void kgem_bo_sync__cpu_full(struct kgem *kgem, struct kgem_bo *bo, bool write) +{ + DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); + assert(!bo->scanout || !write); + + if (write || bo->needs_flush) + kgem_bo_submit(kgem, bo); + + /* SHM pixmaps use proxies for subpage offsets */ + assert(!bo->purged); + assert(bo->refcnt); + while (bo->proxy) + bo = bo->proxy; + assert(bo->refcnt); + assert(!bo->purged); + + if (bo->domain != DOMAIN_CPU || FORCE_MMAP_SYNC & (1 << DOMAIN_CPU)) { + struct drm_i915_gem_set_domain set_domain; + + DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n", + __FUNCTION__, bo->handle, + bo->needs_flush, bo->domain, + __kgem_busy(kgem, bo->handle))); + + VG_CLEAR(set_domain); + set_domain.handle = bo->handle; + set_domain.read_domains = I915_GEM_DOMAIN_CPU; + set_domain.write_domain = write ? I915_GEM_DOMAIN_CPU : 0; + + if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0) { + if (bo->exec == NULL) + kgem_bo_retire(kgem, bo); + bo->domain = write ? DOMAIN_CPU : DOMAIN_NONE; + } + } +} + +void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo) +{ + DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); + assert(bo->refcnt); + assert(bo->proxy == NULL); + + kgem_bo_submit(kgem, bo); + + if (bo->domain != DOMAIN_GTT || FORCE_MMAP_SYNC & (1 << DOMAIN_GTT)) { + struct drm_i915_gem_set_domain set_domain; + + DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n", + __FUNCTION__, bo->handle, + bo->needs_flush, bo->domain, + __kgem_busy(kgem, bo->handle))); + + VG_CLEAR(set_domain); + set_domain.handle = bo->handle; + set_domain.read_domains = I915_GEM_DOMAIN_GTT; + set_domain.write_domain = I915_GEM_DOMAIN_GTT; + + if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0) { + kgem_bo_retire(kgem, bo); + bo->domain = DOMAIN_GTT; + bo->gtt_dirty = true; + } + } +} + void kgem_clear_dirty(struct kgem *kgem) { struct list * const buffers = &kgem->next_request->buffers; @@ -4542,7 +4845,7 @@ struct kgem_bo *kgem_create_proxy(struct kgem *kgem, bo->proxy = kgem_bo_reference(target); bo->delta = offset; - if (target->exec) { + if (target->exec && !bo->io) { list_move_tail(&bo->request, &kgem->next_request->buffers); bo->exec = &_kgem_dummy_exec; } @@ -4563,7 +4866,7 @@ buffer_alloc(void) bo->mem = NULL; bo->need_io = false; - bo->mmapped = true; + bo->mmapped = MMAPPED_CPU; return bo; } @@ -4638,7 +4941,7 @@ search_snoopable_buffer(struct kgem *kgem, unsigned alloc) assert(bo->base.snoop); assert(bo->base.tiling == I915_TILING_NONE); assert(num_pages(&bo->base) >= alloc); - assert(bo->mmapped == true); + assert(bo->mmapped == MMAPPED_CPU); assert(bo->need_io == false); bo->mem = kgem_bo_map__cpu(kgem, &bo->base); @@ -4685,7 +4988,7 @@ create_snoopable_buffer(struct kgem *kgem, unsigned alloc) } assert(bo->base.refcnt == 1); - assert(bo->mmapped == true); + assert(bo->mmapped == MMAPPED_CPU); assert(bo->need_io == false); bo->mem = kgem_bo_map__cpu(kgem, &bo->base); @@ -4721,7 +5024,7 @@ create_snoopable_buffer(struct kgem *kgem, unsigned alloc) } assert(bo->base.refcnt == 1); - assert(bo->mmapped == true); + assert(bo->mmapped == MMAPPED_CPU); assert(bo->need_io == false); if (!gem_set_caching(kgem->fd, bo->base.handle, SNOOPED)) @@ -4763,12 +5066,12 @@ free_caching: DBG(("%s: created snoop handle=%d for buffer\n", __FUNCTION__, bo->base.handle)); - assert(bo->mmapped == true); + assert(bo->mmapped == MMAPPED_CPU); assert(bo->need_io == false); bo->base.refcnt = 1; bo->base.snoop = true; - bo->base.map = MAKE_USER_MAP(bo->mem); + bo->base.map__cpu = MAKE_USER_MAP(bo->mem); return bo; } @@ -4801,11 +5104,12 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, /* We can reuse any write buffer which we can fit */ if (flags == KGEM_BUFFER_LAST && bo->write == KGEM_BUFFER_WRITE && - bo->base.refcnt == 1 && !bo->mmapped && + bo->base.refcnt == 1 && + bo->mmapped == MMAPPED_NONE && size <= bytes(&bo->base)) { DBG(("%s: reusing write buffer for read of %d bytes? used=%d, total=%d\n", __FUNCTION__, size, bo->used, bytes(&bo->base))); - gem_write(kgem->fd, bo->base.handle, + gem_write__cachealigned(kgem->fd, bo->base.handle, 0, bo->used, bo->mem); kgem_buffer_release(kgem, bo); bo->need_io = 0; @@ -4845,10 +5149,11 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, list_for_each_entry(bo, &kgem->active_buffers, base.list) { assert(bo->base.io); assert(bo->base.refcnt >= 1); + assert(bo->base.exec == NULL); assert(bo->mmapped); - assert(!IS_CPU_MAP(bo->base.map) || kgem->has_llc || bo->base.snoop); + assert(bo->mmapped == MMAPPED_GTT || kgem->has_llc || bo->base.snoop); - if (!kgem->has_llc && (bo->write & ~flags) & KGEM_BUFFER_INPLACE) { + if ((bo->write & ~flags) & KGEM_BUFFER_INPLACE && !bo->base.snoop) { DBG(("%s: skip write %x buffer, need %x\n", __FUNCTION__, bo->write, flags)); continue; @@ -4862,6 +5167,29 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, list_move(&bo->base.list, &kgem->batch_buffers); goto done; } + + if (size <= bytes(&bo->base) && + (bo->base.rq == NULL || + !__kgem_busy(kgem, bo->base.handle))) { + DBG(("%s: reusing whole buffer? size=%d, total=%d\n", + __FUNCTION__, size, bytes(&bo->base))); + __kgem_bo_clear_busy(&bo->base); + kgem_buffer_release(kgem, bo); + + switch (bo->mmapped) { + case MMAPPED_CPU: + kgem_bo_sync__cpu(kgem, &bo->base); + break; + case MMAPPED_GTT: + kgem_bo_sync__gtt(kgem, &bo->base); + break; + } + + offset = 0; + bo->used = size; + list_move(&bo->base.list, &kgem->batch_buffers); + goto done; + } } } #endif @@ -4875,9 +5203,9 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, alloc = PAGE_ALIGN(size); assert(alloc); + alloc /= PAGE_SIZE; if (alloc > kgem->aperture_mappable / 4) flags &= ~KGEM_BUFFER_INPLACE; - alloc /= PAGE_SIZE; if (kgem->has_llc && (flags & KGEM_BUFFER_WRITE_INPLACE) != KGEM_BUFFER_WRITE_INPLACE) { @@ -4963,7 +5291,7 @@ skip_llc: CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP); if (old == NULL) { old = search_linear_cache(kgem, alloc, CREATE_INACTIVE); - if (old && !__kgem_bo_is_mappable(kgem, old)) { + if (old && !kgem_bo_can_map(kgem, old)) { _kgem_bo_destroy(kgem, old); old = NULL; } @@ -4971,7 +5299,7 @@ skip_llc: if (old) { DBG(("%s: reusing handle=%d for buffer\n", __FUNCTION__, old->handle)); - assert(__kgem_bo_is_mappable(kgem, old)); + assert(kgem_bo_can_map(kgem, old)); assert(!old->snoop); assert(old->rq == NULL); @@ -4987,8 +5315,10 @@ skip_llc: bo->mem = kgem_bo_map(kgem, &bo->base); if (bo->mem) { - if (IS_CPU_MAP(bo->base.map)) + if (bo->mem == MAP(bo->base.map__cpu)) flags &= ~KGEM_BUFFER_INPLACE; + else + bo->mmapped = MMAPPED_GTT; goto init; } else { bo->base.refcnt = 0; @@ -5107,7 +5437,8 @@ init: assert(!bo->need_io || !bo->base.needs_flush); assert(!bo->need_io || bo->base.domain != DOMAIN_GPU); assert(bo->mem); - assert(!bo->mmapped || bo->base.map != NULL); + assert(bo->mmapped != MMAPPED_GTT || MAP(bo->base.map__gtt) == bo->mem); + assert(bo->mmapped != MMAPPED_CPU || MAP(bo->base.map__cpu) == bo->mem); bo->used = size; bo->write = flags & KGEM_BUFFER_WRITE_INPLACE; @@ -5121,6 +5452,7 @@ init: done: bo->used = ALIGN(bo->used, UPLOAD_ALIGNMENT); + assert(bo->used && bo->used <= bytes(&bo->base)); assert(bo->mem); *ret = (char *)bo->mem + offset; return kgem_create_proxy(kgem, &bo->base, offset, size); @@ -5177,7 +5509,7 @@ struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem, bo->size.bytes -= stride; } - bo->map = MAKE_CPU_MAP(*ret); + bo->map__cpu = *ret; bo->pitch = stride; bo->unique_id = kgem_get_unique_id(kgem); return bo; @@ -5222,10 +5554,10 @@ void kgem_proxy_bo_attach(struct kgem_bo *bo, struct kgem_bo **ptr) { DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle)); - assert(bo->map == NULL || IS_CPU_MAP(bo->map)); + assert(bo->map__gtt == NULL); assert(bo->proxy); list_add(&bo->vma, &bo->proxy->vma); - bo->map = ptr; + bo->map__gtt = ptr; *ptr = kgem_bo_reference(bo); } @@ -5258,13 +5590,13 @@ void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo) bo->base.domain, __kgem_busy(kgem, bo->base.handle))); - assert(!IS_CPU_MAP(bo->base.map) || bo->base.snoop || kgem->has_llc); + assert(bo->mmapped == MMAPPED_GTT || bo->base.snoop || kgem->has_llc); VG_CLEAR(set_domain); set_domain.handle = bo->base.handle; set_domain.write_domain = 0; set_domain.read_domains = - IS_CPU_MAP(bo->base.map) ? I915_GEM_DOMAIN_CPU : I915_GEM_DOMAIN_GTT; + bo->mmapped == MMAPPED_CPU ? I915_GEM_DOMAIN_CPU : I915_GEM_DOMAIN_GTT; if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) diff --git a/contrib/sdk/sources/Intel-2D/sna/kgem.h b/contrib/sdk/sources/Intel-2D/sna/kgem.h index 64e2a4d882..a00672cbb6 100644 --- a/contrib/sdk/sources/Intel-2D/sna/kgem.h +++ b/contrib/sdk/sources/Intel-2D/sna/kgem.h @@ -71,9 +71,8 @@ struct kgem_bo { struct list request; struct list vma; - void *map; -#define IS_CPU_MAP(ptr) ((uintptr_t)(ptr) & 1) -#define IS_GTT_MAP(ptr) (ptr && ((uintptr_t)(ptr) & 1) == 0) + void *map__cpu; + void *map__gtt; #define MAP(ptr) ((void*)((uintptr_t)(ptr) & ~3)) struct kgem_bo_binding { @@ -82,11 +81,11 @@ struct kgem_bo { uint16_t offset; } binding; + uint64_t presumed_offset; uint32_t unique_id; uint32_t refcnt; uint32_t handle; uint32_t target_handle; - uint32_t presumed_offset; uint32_t delta; union { struct { @@ -200,11 +199,12 @@ struct kgem { uint32_t has_handle_lut :1; uint32_t can_blt_cpu :1; + uint32_t can_render_y :1; uint16_t fence_max; uint16_t half_cpu_cache_pages; uint32_t aperture_total, aperture_high, aperture_low, aperture_mappable; - uint32_t aperture, aperture_fenced; + uint32_t aperture, aperture_fenced, aperture_max_fence; uint32_t max_upload_tile_size, max_copy_tile_size; uint32_t max_gpu_size, max_cpu_size; uint32_t large_object_size, max_object_size; @@ -313,6 +313,8 @@ struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem, int bpp, uint32_t flags); +bool kgem_bo_convert_to_gpu(struct kgem *kgem, struct kgem_bo *bo); + uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format); void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset); @@ -349,14 +351,6 @@ static inline void kgem_submit(struct kgem *kgem) _kgem_submit(kgem); } -static inline bool kgem_flush(struct kgem *kgem, bool flush) -{ - if (kgem->nreloc == 0) - return false; - - return (kgem->flush ^ flush) && kgem_ring_is_idle(kgem, kgem->ring); -} - static inline void kgem_bo_submit(struct kgem *kgem, struct kgem_bo *bo) { if (bo->exec) @@ -392,8 +386,10 @@ static inline void kgem_set_mode(struct kgem *kgem, kgem_submit(kgem); #endif - if (kgem->nreloc && bo->exec == NULL && kgem_ring_is_idle(kgem, kgem->ring)) + if (kgem->nreloc && bo->exec == NULL && kgem_ring_is_idle(kgem, kgem->ring)) { + DBG(("%s: flushing before new bo\n", __FUNCTION__)); _kgem_submit(kgem); + } if (kgem->mode == mode) return; @@ -466,6 +462,11 @@ uint32_t kgem_add_reloc(struct kgem *kgem, struct kgem_bo *bo, uint32_t read_write_domains, uint32_t delta); +uint64_t kgem_add_reloc64(struct kgem *kgem, + uint32_t pos, + struct kgem_bo *bo, + uint32_t read_write_domains, + uint64_t delta); void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo); void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo); @@ -475,15 +476,13 @@ void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo); void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo); void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo); void kgem_bo_sync__cpu_full(struct kgem *kgem, struct kgem_bo *bo, bool write); -void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo); -void __kgem_bo_unmap__cpu(struct kgem *kgem, struct kgem_bo *bo, void *ptr); uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo); bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo, const void *data, int length); int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo); -void kgem_get_tile_size(struct kgem *kgem, int tiling, +void kgem_get_tile_size(struct kgem *kgem, int tiling, int pitch, int *tile_width, int *tile_height, int *tile_size); static inline int __kgem_buffer_size(struct kgem_bo *bo) @@ -498,6 +497,12 @@ static inline int __kgem_bo_size(struct kgem_bo *bo) return PAGE_SIZE * bo->size.pages.count; } +static inline int __kgem_bo_num_pages(struct kgem_bo *bo) +{ + assert(bo->proxy == NULL); + return bo->size.pages.count; +} + static inline int kgem_bo_size(struct kgem_bo *bo) { if (bo->proxy) @@ -506,7 +511,6 @@ static inline int kgem_bo_size(struct kgem_bo *bo) return __kgem_bo_size(bo); } -/* static inline bool kgem_bo_blt_pitch_is_ok(struct kgem *kgem, struct kgem_bo *bo) { @@ -533,80 +537,6 @@ static inline bool kgem_bo_can_blt(struct kgem *kgem, return kgem_bo_blt_pitch_is_ok(kgem, bo); } -*/ - -static inline bool __kgem_bo_is_mappable(struct kgem *kgem, - struct kgem_bo *bo) -{ - if (bo->domain == DOMAIN_GTT) - return true; - - if (kgem->gen < 040 && bo->tiling && - bo->presumed_offset & (kgem_bo_fenced_size(kgem, bo) - 1)) - return false; - - if (kgem->gen == 021 && bo->tiling == I915_TILING_Y) - return false; - - if (kgem->has_llc && bo->tiling == I915_TILING_NONE) - return true; - - if (!bo->presumed_offset) - return kgem_bo_size(bo) <= kgem->aperture_mappable / 4; - - return bo->presumed_offset + kgem_bo_size(bo) <= kgem->aperture_mappable; -} - -static inline bool kgem_bo_is_mappable(struct kgem *kgem, - struct kgem_bo *bo) -{ - DBG(("%s: domain=%d, offset: %d size: %d\n", - __FUNCTION__, bo->domain, bo->presumed_offset, kgem_bo_size(bo))); - assert(bo->refcnt); - return __kgem_bo_is_mappable(kgem, bo); -} - -static inline bool kgem_bo_mapped(struct kgem *kgem, struct kgem_bo *bo) -{ - DBG(("%s: map=%p, tiling=%d, domain=%d\n", - __FUNCTION__, bo->map, bo->tiling, bo->domain)); - assert(bo->refcnt); - - if (bo->map == NULL) - return bo->tiling == I915_TILING_NONE && bo->domain == DOMAIN_CPU; - - return IS_CPU_MAP(bo->map) == !bo->tiling; -} - -static inline bool kgem_bo_can_map(struct kgem *kgem, struct kgem_bo *bo) -{ - if (kgem_bo_mapped(kgem, bo)) - return true; - - if (!bo->tiling && (kgem->has_llc || bo->domain == DOMAIN_CPU)) - return true; - - if (kgem->gen == 021 && bo->tiling == I915_TILING_Y) - return false; - - return kgem_bo_size(bo) <= kgem->aperture_mappable / 4; -} - -static inline bool kgem_bo_can_map__cpu(struct kgem *kgem, - struct kgem_bo *bo, - bool write) -{ - if (bo->purged || (bo->scanout && write)) - return false; - - if (kgem->has_llc) - return true; - - if (bo->domain != DOMAIN_CPU) - return false; - - return !write || bo->exec == NULL; -} static inline bool kgem_bo_is_snoop(struct kgem_bo *bo) { @@ -652,9 +582,6 @@ static inline bool __kgem_bo_is_busy(struct kgem *kgem, struct kgem_bo *bo) if (bo->exec) return true; - if (kgem_flush(kgem, bo->flush)) - kgem_submit(kgem); - if (bo->rq && !__kgem_busy(kgem, bo->handle)) __kgem_bo_clear_busy(bo); @@ -723,6 +650,53 @@ static inline void kgem_bo_mark_dirty(struct kgem_bo *bo) } while ((bo = bo->proxy)); } +static inline bool kgem_bo_mapped(struct kgem *kgem, struct kgem_bo *bo) +{ + DBG(("%s: map=%p:%p, tiling=%d, domain=%d\n", + __FUNCTION__, bo->map__gtt, bo->map__cpu, bo->tiling, bo->domain)); + + if (bo->tiling == I915_TILING_NONE && (bo->domain == DOMAIN_CPU || kgem->has_llc)) + return bo->map__cpu != NULL; + + return bo->map__gtt != NULL; +} + +static inline bool kgem_bo_can_map(struct kgem *kgem, struct kgem_bo *bo) +{ + DBG(("%s: map=%p:%p, tiling=%d, domain=%d, offset=%ld\n", + __FUNCTION__, bo->map__gtt, bo->map__cpu, bo->tiling, bo->domain, (long)bo->presumed_offset)); + + if (!bo->tiling && (kgem->has_llc || bo->domain == DOMAIN_CPU)) + return true; + + if (bo->map__gtt != NULL) + return true; + + if (kgem->gen == 021 && bo->tiling == I915_TILING_Y) + return false; + + if (!bo->presumed_offset) + return __kgem_bo_num_pages(bo) <= kgem->aperture_mappable / 4; + + return bo->presumed_offset / PAGE_SIZE + __kgem_bo_num_pages(bo) <= kgem->aperture_mappable; +} + +static inline bool kgem_bo_can_map__cpu(struct kgem *kgem, + struct kgem_bo *bo, + bool write) +{ + if (bo->purged || (bo->scanout && write)) + return false; + + if (kgem->has_llc) + return true; + + if (bo->domain != DOMAIN_CPU) + return false; + + return !write || bo->exec == NULL; +} + #define KGEM_BUFFER_WRITE 0x1 #define KGEM_BUFFER_INPLACE 0x2 #define KGEM_BUFFER_LAST 0x4 @@ -742,8 +716,7 @@ void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *bo); void kgem_throttle(struct kgem *kgem); #define MAX_INACTIVE_TIME 10 bool kgem_expire_cache(struct kgem *kgem); -void kgem_purge_cache(struct kgem *kgem); -void kgem_cleanup_cache(struct kgem *kgem); +bool kgem_cleanup_cache(struct kgem *kgem); void kgem_clean_scanout_cache(struct kgem *kgem); void kgem_clean_large_cache(struct kgem *kgem); @@ -758,4 +731,6 @@ static inline void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch) } #endif +void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling); + #endif /* KGEM_H */ diff --git a/contrib/sdk/sources/Intel-2D/sna/sna.c b/contrib/sdk/sources/Intel-2D/sna/sna.c index d6facc9dec..b4f17c86f8 100644 --- a/contrib/sdk/sources/Intel-2D/sna/sna.c +++ b/contrib/sdk/sources/Intel-2D/sna/sna.c @@ -706,6 +706,19 @@ sna_wait_for_scanline(struct sna *sna, +int intel_get_device_id(struct sna *sna) +{ + struct drm_i915_getparam gp; + int devid = 0; + + memset(&gp, 0, sizeof(gp)); + gp.param = I915_PARAM_CHIPSET_ID; + gp.value = &devid; + + if (drmIoctl(sna->scrn, DRM_IOCTL_I915_GETPARAM, &gp)) + return 0; + return devid; +} static const struct intel_device_info intel_generic_info = { .gen = -1, @@ -814,21 +827,6 @@ intel_detect_chipset(struct pci_device *pci) return &intel_generic_info; } -int intel_get_device_id(int fd) -{ - struct drm_i915_getparam gp; - int devid = 0; - - memset(&gp, 0, sizeof(gp)); - gp.param = I915_PARAM_CHIPSET_ID; - gp.value = &devid; - - if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp)) - return 0; - - return devid; -} - int drmIoctl(int fd, unsigned long request, void *arg) { ioctl_t io; diff --git a/contrib/sdk/sources/Intel-2D/sna/sna.h b/contrib/sdk/sources/Intel-2D/sna/sna.h index f11790b2f4..b79160fb4f 100644 --- a/contrib/sdk/sources/Intel-2D/sna/sna.h +++ b/contrib/sdk/sources/Intel-2D/sna/sna.h @@ -448,7 +448,8 @@ struct sna { unsigned flags; #define SNA_NO_WAIT 0x1 #define SNA_NO_FLIP 0x2 -#define SNA_TRIPLE_BUFFER 0x4 +#define SNA_NO_VSYNC 0x4 +#define SNA_TRIPLE_BUFFER 0x8 #define SNA_TEAR_FREE 0x10 #define SNA_FORCE_SHADOW 0x20 #define SNA_FLUSH_GTT 0x40 @@ -490,6 +491,7 @@ struct sna { uint32_t fill_alu; } blt_state; union { + unsigned gt; struct gen3_render_state gen3; struct gen4_render_state gen4; struct gen5_render_state gen5; @@ -497,6 +499,8 @@ struct sna { struct gen7_render_state gen7; } render_state; + bool dri_available; + bool dri_open; /* Broken-out options. */ // OptionInfoPtr Options; @@ -611,4 +615,7 @@ sna_transform_equal(const PictTransform *a, const PictTransform *b) return memcmp(a, b, sizeof(*a)) == 0; } + +int intel_get_device_id(struct sna *sna); + #endif /* _SNA_H */ diff --git a/contrib/sdk/sources/Intel-2D/sna/sna_reg.h b/contrib/sdk/sources/Intel-2D/sna/sna_reg.h index 26282361c0..bda6ef67e9 100644 --- a/contrib/sdk/sources/Intel-2D/sna/sna_reg.h +++ b/contrib/sdk/sources/Intel-2D/sna/sna_reg.h @@ -42,22 +42,22 @@ #define BLT_SRC_TILED (1<<15) #define BLT_DST_TILED (1<<11) -#define COLOR_BLT_CMD ((2<<29)|(0x40<<22)|(0x3)) -#define XY_COLOR_BLT ((2<<29)|(0x50<<22)|(0x4)) -#define XY_SETUP_BLT ((2<<29)|(1<<22)|6) -#define XY_SETUP_MONO_PATTERN_SL_BLT ((2<<29)|(0x11<<22)|7) -#define XY_SETUP_CLIP ((2<<29)|(3<<22)|1) -#define XY_SCANLINE_BLT ((2<<29)|(0x25<<22)|1) -#define XY_TEXT_IMMEDIATE_BLT ((2<<29)|(0x31<<22)|(1<<16)) -#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) -#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|0x4) -#define XY_PAT_BLT ((2<<29)|(0x51<<22)|0x4) -#define XY_PAT_BLT_IMMEDIATE ((2<<29)|(0x72<<22)) -#define XY_MONO_PAT ((0x2<<29)|(0x52<<22)|0x7) -#define XY_MONO_SRC_COPY ((0x2<<29)|(0x54<<22)|(0x6)) -#define XY_MONO_SRC_COPY_IMM ((0x2<<29)|(0x71<<22)) -#define XY_FULL_MONO_PATTERN_BLT ((0x2<<29)|(0x57<<22)|0xa) -#define XY_FULL_MONO_PATTERN_MONO_SRC_BLT ((0x2<<29)|(0x58<<22)|0xa) +#define COLOR_BLT_CMD (2<<29|0x40<<22|(0x3)) +#define XY_COLOR_BLT (2<<29|0x50<<22|(0x4)) +#define XY_SETUP_BLT (2<<29|0x01<<22) +#define XY_SETUP_MONO_PATTERN_SL_BLT (2<<29|0x11<<22) +#define XY_SETUP_CLIP (2<<29|0x03<<22|1) +#define XY_SCANLINE_BLT (2<<29|0x25<<22|1) +#define XY_TEXT_IMMEDIATE_BLT (2<<29|0x31<<22|(1<<16)) +#define XY_SRC_COPY_BLT_CMD (2<<29|0x53<<22) +#define SRC_COPY_BLT_CMD (2<<29|0x43<<22|0x4) +#define XY_PAT_BLT (2<<29|0x51<<22) +#define XY_PAT_BLT_IMMEDIATE (2<<29|0x72<<22) +#define XY_MONO_PAT (2<<29|0x52<<22) +#define XY_MONO_SRC_COPY (2<<29|0x54<<22) +#define XY_MONO_SRC_COPY_IMM (2<<29|0x71<<22) +#define XY_FULL_MONO_PATTERN_BLT (2<<29|0x57<<22) +#define XY_FULL_MONO_PATTERN_MONO_SRC_BLT (2<<29|0x58<<22) /* FLUSH commands */ #define BRW_3D(Pipeline,Opcode,Subopcode) \ diff --git a/contrib/sdk/sources/Intel-2D/sna/sna_render.h b/contrib/sdk/sources/Intel-2D/sna/sna_render.h index 4ab8acf544..ad0ec41d38 100644 --- a/contrib/sdk/sources/Intel-2D/sna/sna_render.h +++ b/contrib/sdk/sources/Intel-2D/sna/sna_render.h @@ -104,6 +104,7 @@ struct sna_composite_op { uint32_t inplace :1; uint32_t overwrites:1; uint32_t bpp : 6; + uint32_t alu : 4; uint32_t cmd; uint32_t br13; @@ -245,7 +246,7 @@ struct sna_render { struct sna_solid_cache { struct kgem_bo *cache_bo; struct kgem_bo *bo[1024]; - uint32_t color[1025]; + uint32_t color[1024]; int last; int size; int dirty; @@ -381,6 +382,7 @@ enum { }; struct gen6_render_state { + unsigned gt; const struct gt_info *info; struct kgem_bo *general_bo; @@ -430,6 +432,7 @@ enum { }; struct gen7_render_state { + unsigned gt; const struct gt_info *info; struct kgem_bo *general_bo; diff --git a/contrib/sdk/sources/Intel-2D/uxa/uxa.c b/contrib/sdk/sources/Intel-2D/uxa/uxa.c index ba5de045ac..14defe797e 100644 --- a/contrib/sdk/sources/Intel-2D/uxa/uxa.c +++ b/contrib/sdk/sources/Intel-2D/uxa/uxa.c @@ -70,10 +70,10 @@ static void i830_done_composite(PixmapPtr dest) // intel_debug_flush(scrn); } -int sna_bitmap_from_handle(bitmap_t *bitmap, uint32_t handle) +int uxa_bitmap_from_handle(bitmap_t *bitmap, uint32_t handle) { - struct intel_screen_private *intel = intel_get_screen_private(); - drm_intel_bo *bo; + struct intel_screen_private *intel = intel_get_screen_private(); + drm_intel_bo *bo; surface_t *sf; unsigned int size; @@ -118,14 +118,14 @@ err_1: return -1; }; -void sna_set_bo_handle(bitmap_t *bitmap, int handle) +void uxa_set_bo_handle(bitmap_t *bitmap, int handle) { - sna_bitmap_from_handle(bitmap, handle); + uxa_bitmap_from_handle(bitmap, handle); }; -int sna_blit_tex(bitmap_t *bitmap, bool scale, int dst_x, int dst_y, - int w, int h, int src_x, int src_y) +int uxa_blit_tex(bitmap_t *bitmap, int scale, int vsync, + int dst_x, int dst_y,int w, int h, int src_x, int src_y) { // DBG("%s\n", __FUNCTION__);