From 8fd9581723dd65328bd7cb196a59a809ae964b15 Mon Sep 17 00:00:00 2001 From: "Sergey Semyonov (Serge)" Date: Tue, 21 Feb 2012 06:06:51 +0000 Subject: [PATCH] render context and composite render git-svn-id: svn://kolibrios.org@2361 a494cfbc-eb01-0410-851d-a64ba20cac60 --- drivers/video/drm/i915/bitmap.c | 8 +- drivers/video/drm/i915/bitmap.h | 9 + drivers/video/drm/i915/kms_display.c | 255 ++++++++++++++++++++++- drivers/video/drm/i915/main.c | 5 +- drivers/video/drm/i915/sna/gen6_render.c | 234 ++++++++++++++++++++- drivers/video/drm/i915/sna/sna.c | 54 +++++ drivers/video/drm/i915/sna/sna_render.h | 7 +- 7 files changed, 553 insertions(+), 19 deletions(-) diff --git a/drivers/video/drm/i915/bitmap.c b/drivers/video/drm/i915/bitmap.c index a1594d659b..66d173c3d1 100644 --- a/drivers/video/drm/i915/bitmap.c +++ b/drivers/video/drm/i915/bitmap.c @@ -90,13 +90,14 @@ int create_surface(struct io_call_10 *pbitmap) goto err1; bitmap = CreateObject(GetPid(), sizeof(*bitmap)); +// printf("bitmap %x\n", bitmap); + if( bitmap == NULL) + goto err1; + bitmap->handle = handle; bitmap->header.destroy = destroy_bitmap; bitmap->obj = NULL; -// printf("bitmap %x\n", bitmap); - if( bitmap == NULL) - goto err1; hman_set_data(&bm_man, handle, bitmap); @@ -210,6 +211,7 @@ int lock_surface(struct io_call_12 *pbitmap) return 0; }; + int init_hman(struct hman *man, u32 count) { u32* data; diff --git a/drivers/video/drm/i915/bitmap.h b/drivers/video/drm/i915/bitmap.h index c32aca489e..4d00b724e8 100644 --- a/drivers/video/drm/i915/bitmap.h +++ b/drivers/video/drm/i915/bitmap.h @@ -95,6 +95,15 @@ typedef struct #define HW_VID_BLIT (1<<2) /* planar and packed video */ /* 3 - 63 reserved */ +struct context +{ + kobj_t header; + + bitmap_t *mask; + u32 seqno; + int slot; +}; + int get_driver_caps(hwcaps_t *caps); int create_surface(struct io_call_10 *pbitmap); int lock_surface(struct io_call_12 *pbitmap); diff --git a/drivers/video/drm/i915/kms_display.c b/drivers/video/drm/i915/kms_display.c index d66e20ab55..44188c7b9e 100644 --- a/drivers/video/drm/i915/kms_display.c +++ b/drivers/video/drm/i915/kms_display.c @@ -61,6 +61,7 @@ struct tag_display void (__stdcall *move_cursor)(cursor_t *cursor, int x, int y); void (__stdcall *restore_cursor)(int x, int y); void (*disable_mouse)(void); + u32 mask_seqno; }; @@ -964,7 +965,6 @@ void execute_buffer (struct drm_i915_gem_object *buffer, uint32_t offset, // i915_interrupt_info(main_device); -// ironlake_enable_vblank(main_device, 0); }; @@ -1007,6 +1007,259 @@ int blit_textured(u32 hbitmap, int dst_x, int dst_y, }; +int sna_blit_tex(bitmap_t *dst_bitmap, int dst_x, int dst_y, + int w, int h, bitmap_t *src_bitmap, int src_x, int src_y, + bitmap_t *mask_bitmap); + +int create_context(); +struct context *get_context(); + +int blit_tex(u32 hbitmap, int dst_x, int dst_y, + int src_x, int src_y, u32 w, u32 h) +{ + drm_i915_private_t *dev_priv = main_device->dev_private; + struct context *ctx; + + bitmap_t *src_bitmap, *dst_bitmap; + bitmap_t screen; + int ret; + + bitmap_t *mask_bitmap; + rect_t winrc; + +// dbgprintf(" handle: %d dx %d dy %d sx %d sy %d w %d h %d\n", +// hbitmap, dst_x, dst_y, src_x, src_y, w, h); + + if(unlikely(hbitmap==0)) + return -1; + + src_bitmap = (bitmap_t*)hman_get_data(&bm_man, hbitmap); +// dbgprintf("bitmap %x\n", src_bitmap); + + if(unlikely(src_bitmap==NULL)) + return -1; + + ctx = get_context(); + if(unlikely(ctx==NULL)) + { + ret = create_context(); + if(ret!=0) + return -1; + + ctx = get_context(); + }; + + mask_bitmap = ctx->mask; + + GetWindowRect(&winrc); + dst_x+= winrc.left; + dst_y+= winrc.top; + + + if(ctx->seqno != os_display->mask_seqno) + { + u8* src_offset; + u8* dst_offset; + u32 slot = *((u8*)CURRENT_TASK); + u32 ifl; + + ret = gem_object_lock(mask_bitmap->obj); + if(ret !=0 ) + { + dbgprintf("%s fail\n", __FUNCTION__); + return ret; + }; + + printf("width %d height %d\n", winrc.right, winrc.bottom); + + mask_bitmap->width = winrc.right; + mask_bitmap->height = winrc.bottom; + mask_bitmap->pitch = ALIGN(w,64); + + slot|= (slot<<8)|(slot<<16)|(slot<<24); + + + __asm__ __volatile__ ( + "movd %[slot], %%xmm6 \n" + "punpckldq %%xmm6, %%xmm6 \n" + "punpcklqdq %%xmm6, %%xmm6 \n" + :: [slot] "g" (slot) + :"xmm6"); + + src_offset = mask_bitmap->uaddr; + + dst_offset = (u8*)(dst_y*os_display->width + dst_x); + dst_offset+= get_display_map(); + + u32_t tmp_h = mask_bitmap->height; + + ifl = safe_cli(); + while( tmp_h--) + { + int tmp_w = mask_bitmap->width; + + u8* tmp_src = src_offset; + u8* tmp_dst = dst_offset; + + src_offset+= mask_bitmap->pitch; + dst_offset+= os_display->width; + +// while( tmp_w--) +// { +// *(tmp_src) = (*tmp_dst==slot)?0x1:0x00; +// tmp_src++; +// tmp_dst++; +// }; + while(tmp_w >= 64) + { + __asm__ __volatile__ ( + "movdqu (%0), %%xmm0 \n" + "movdqu 16(%0), %%xmm1 \n" + "movdqu 32(%0), %%xmm2 \n" + "movdqu 48(%0), %%xmm3 \n" + "pcmpeqb %%xmm6, %%xmm0 \n" + "pcmpeqb %%xmm6, %%xmm1 \n" + "pcmpeqb %%xmm6, %%xmm2 \n" + "pcmpeqb %%xmm6, %%xmm3 \n" + "movdqa %%xmm0, (%%edi) \n" + "movdqa %%xmm1, 16(%%edi) \n" + "movdqa %%xmm2, 32(%%edi) \n" + "movdqa %%xmm3, 48(%%edi) \n" + + :: "r" (tmp_dst), "D" (tmp_src) + :"xmm0","xmm1","xmm2","xmm3"); + tmp_w -= 64; + tmp_src += 64; + tmp_dst += 64; + } + + if( tmp_w >= 32 ) + { + __asm__ __volatile__ ( + "movdqu (%0), %%xmm0 \n" + "movdqu 16(%0), %%xmm1 \n" + "pcmpeqb %%xmm6, %%xmm0 \n" + "pcmpeqb %%xmm6, %%xmm1 \n" + "movdqa %%xmm0, (%%edi) \n" + "movdqa %%xmm1, 16(%%edi) \n" + + :: "r" (tmp_dst), "D" (tmp_src) + :"xmm0","xmm1"); + tmp_w -= 32; + tmp_src += 32; + tmp_dst += 32; + } + + while( tmp_w > 0 ) + { + __asm__ __volatile__ ( + "movdqu (%0), %%xmm0 \n" + "pcmpeqb %%xmm6, %%xmm0 \n" + "movdqa %%xmm0, (%%edi) \n" + :: "r" (tmp_dst), "D" (tmp_src) + :"xmm0"); + tmp_w -= 16; + tmp_src += 16; + tmp_dst += 16; + } + }; + safe_sti(ifl); + ctx->seqno = os_display->mask_seqno; + } + + screen.pitch = os_display->pitch; + screen.gaddr = 0; + screen.width = os_display->width; + screen.height = os_display->height; + screen.obj = (void*)-1; + + dst_bitmap = &screen; + + + sna_blit_tex(dst_bitmap, dst_x, dst_y, w, h, src_bitmap, src_x, src_y, + mask_bitmap); + +// asm volatile ("int3"); +}; + + +struct context *context_map[256]; + +void __attribute__((regparm(1))) destroy_context(struct context *context) +{ + printf("destroy context %x\n", context); + + context_map[context->slot] = NULL; + __DestroyObject(context); +}; + + +int create_context() +{ + struct context *context; + + bitmap_t *mask; + int slot; + + struct io_call_10 io_10; + int ret; + + slot = *((u8*)CURRENT_TASK); + + if(context_map[slot] != NULL) + return 0; + + context = CreateObject(GetPid(), sizeof(*context)); +// printf("context %x\n", coontext); + if( context == NULL) + goto err1; + context->header.destroy = destroy_context; + + dbgprintf("Create mask surface\n"); + + io_10.width = os_display->width/4; /* need bitmap format here */ + io_10.height = os_display->height+1; + io_10.max_width = os_display->width/4; + io_10.max_height = os_display->height+1; + + ret = create_surface(&io_10); + if(ret) + goto err2; + + mask= (bitmap_t*)hman_get_data(&bm_man, io_10.handle); + if(unlikely(mask == NULL)) /* something really terrible happend */ + goto err2; + dbgprintf("done\n"); + + context->mask = mask; + context->seqno = os_display->mask_seqno-1; + context->slot = slot; + + context_map[slot] = context; + return 0; + +err2: + __DestroyObject(context); +err1: + return -1; +}; + +struct context *get_context() +{ + + int slot = *((u8*)CURRENT_TASK); + + return context_map[slot]; +} + + + + + + + + + void __stdcall run_workqueue(struct workqueue_struct *cwq) { diff --git a/drivers/video/drm/i915/main.c b/drivers/video/drm/i915/main.c index 8004ca6be9..433a25ccf8 100644 --- a/drivers/video/drm/i915/main.c +++ b/drivers/video/drm/i915/main.c @@ -26,6 +26,9 @@ int blit_video(u32 hbitmap, int dst_x, int dst_y, int blit_textured(u32 hbitmap, int dst_x, int dst_y, int src_x, int src_y, u32 w, u32 h); +int blit_tex(u32 hbitmap, int dst_x, int dst_y, + int src_x, int src_y, u32 w, u32 h); + static char log[256]; int x86_clflush_size; @@ -159,7 +162,7 @@ int _stdcall display_handler(ioctl_t *io) // blit_video( inp[0], inp[1], inp[2], // inp[3], inp[4], inp[5], inp[6]); - blit_textured( inp[0], inp[1], inp[2], + blit_tex( inp[0], inp[1], inp[2], inp[3], inp[4], inp[5], inp[6]); diff --git a/drivers/video/drm/i915/sna/gen6_render.c b/drivers/video/drm/i915/sna/gen6_render.c index f165e91129..ff82586577 100644 --- a/drivers/video/drm/i915/sna/gen6_render.c +++ b/drivers/video/drm/i915/sna/gen6_render.c @@ -90,7 +90,9 @@ static const struct wm_kernel_info { unsigned int size; Bool has_mask; } wm_kernels[] = { - KERNEL(NOMASK, ps_kernel_nomask_affine, FALSE), +// KERNEL(NOMASK, ps_kernel_nomask_affine, FALSE), +// KERNEL(MASK, ps_kernel_masknoca_affine, TRUE), + KERNEL(NOMASK, ps_kernel_masknoca_affine, TRUE), KERNEL(MASK, ps_kernel_masknoca_affine, TRUE), }; #undef KERNEL @@ -171,8 +173,11 @@ static uint32_t gen6_get_blend(int op, { uint32_t src, dst; +// src = GEN6_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend; +// dst = GEN6_BLENDFACTOR_ZERO; //gen6_blend_op[op].dst_blend; + src = GEN6_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend; - dst = GEN6_BLENDFACTOR_ZERO; //gen6_blend_op[op].dst_blend; + dst = GEN6_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend; #if 0 /* If there's no dst alpha channel, adjust the blend op so that @@ -1302,6 +1307,50 @@ gen6_get_batch(struct sna *sna) gen6_emit_invariant(sna); } +static void gen6_emit_composite_state(struct sna *sna, + const struct sna_composite_op *op) +{ + uint32_t *binding_table; + uint16_t offset; + bool dirty; + + gen6_get_batch(sna); + dirty = FALSE; + + binding_table = gen6_composite_get_binding_table(sna, &offset); + + binding_table[0] = + gen6_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + op->dst.format, + TRUE); + binding_table[1] = + gen6_bind_bo(sna, + op->src.bo, op->src.width, op->src.height, + op->src.card_format, + FALSE); + if (op->mask.bo) { + binding_table[2] = + gen6_bind_bo(sna, + op->mask.bo, + op->mask.width, + op->mask.height, + op->mask.card_format, + FALSE); + } + + if (sna->kgem.surface == offset && + *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table && + (op->mask.bo == NULL || + sna->kgem.batch[sna->render_state.gen6.surface_table+2] == binding_table[2])) { + sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); + offset = sna->render_state.gen6.surface_table; + } + + gen6_emit_state(sna, op, offset | dirty); +} + + static void gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op) @@ -1586,15 +1635,90 @@ gen6_render_video(struct sna *sna, #endif -static void gen6_render_composite_done(struct sna *sna, - const struct sna_composite_op *op) -{ - DBG(("%s\n", __FUNCTION__)); - if (sna->render_state.gen6.vertex_offset) { - gen6_vertex_flush(sna); - gen6_magic_ca_pass(sna, op); - } +fastcall static void +gen6_emit_composite_primitive_identity_source_mask(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float src_x, src_y; + float msk_x, msk_y; + float w, h; + float *v; + + src_x = r->src.x + op->src.offset[0]; + src_y = r->src.y + op->src.offset[1]; + msk_x = r->mask.x + op->mask.offset[0]; + msk_y = r->mask.y + op->mask.offset[1]; + w = r->width; + h = r->height; + + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 15; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[1] = (src_x + w) * op->src.scale[0]; + v[2] = (src_y + h) * op->src.scale[1]; + v[3] = (msk_x + w) * op->mask.scale[0]; + v[4] = (msk_y + h) * op->mask.scale[1]; + + dst.p.x = r->dst.x; + v[5] = dst.f; + v[6] = src_x * op->src.scale[0]; + v[7] = v[2]; + v[8] = msk_x * op->mask.scale[0]; + v[9] = v[4]; + + dst.p.y = r->dst.y; + v[10] = dst.f; + v[11] = v[6]; + v[12] = src_y * op->src.scale[1]; + v[13] = v[8]; + v[14] = msk_y * op->mask.scale[1]; +} + +fastcall static void +gen6_render_composite_box(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box) +{ + struct sna_composite_rectangles r; + + if (unlikely(!gen6_get_rectangles(sna, op, 1))) { +// _kgem_submit(&sna->kgem); +// gen6_emit_composite_state(sna, op); +// gen6_get_rectangles(sna, op, 1); + } + + DBG((" %s: (%d, %d), (%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, box->x2, box->y2)); + + r.dst.x = box->x1; + r.dst.y = box->y1; + r.width = box->x2 - box->x1; + r.height = box->y2 - box->y1; + r.src = r.mask = r.dst; + + op->prim_emit(sna, op, &r); +} + + +static void gen6_render_composite_done(struct sna *sna, + const struct sna_composite_op *op) +{ + DBG(("%s\n", __FUNCTION__)); + + if (sna->render_state.gen6.vertex_offset) { + gen6_vertex_flush(sna); + gen6_magic_ca_pass(sna, op); + } // if (op->mask.bo) // kgem_bo_destroy(&sna->kgem, op->mask.bo); @@ -1605,6 +1729,94 @@ static void gen6_render_composite_done(struct sna *sna, } +static Bool +gen6_render_composite(struct sna *sna, + uint8_t op, + bitmap_t *src, + struct kgem_bo *src_bo, + bitmap_t *mask, + struct kgem_bo *mask_bo, + bitmap_t *dst, + struct kgem_bo *dst_bo, + int16_t src_x, int16_t src_y, + int16_t msk_x, int16_t msk_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_op *tmp) +{ +// if (op >= ARRAY_SIZE(gen6_blend_op)) +// return FALSE; + +// ENTER(); + + DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, + width, height, sna->kgem.ring)); + + tmp->op = PictOpSrc; + + tmp->dst.bo = dst_bo; + tmp->dst.width = dst->width; + tmp->dst.height = dst->height; + tmp->dst.format = GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; + + + tmp->src.bo = src_bo; + tmp->src.card_format = GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; + tmp->src.width = src->width; + tmp->src.height = src->height; + + tmp->src.scale[0] = 1.f/width; //src->width; + tmp->src.scale[1] = 1.f/height; //src->height; + tmp->src.filter = SAMPLER_FILTER_BILINEAR; + tmp->src.repeat = SAMPLER_EXTEND_NONE; + tmp->src.offset[0] = -dst_x; + tmp->src.offset[1] = -dst_y; + tmp->src.is_affine = TRUE; + + + tmp->mask.bo = mask_bo; + tmp->mask.card_format = GEN6_SURFACEFORMAT_A8_UNORM; + tmp->mask.width = mask->width; + tmp->mask.height = mask->height; + + tmp->mask.scale[0] = 1.f/mask->width; + tmp->mask.scale[1] = 1.f/mask->height; + tmp->mask.filter = SAMPLER_FILTER_NEAREST; + tmp->mask.repeat = SAMPLER_EXTEND_NONE; + tmp->mask.offset[0] = -dst_x; + tmp->mask.offset[1] = -dst_y; + tmp->mask.is_affine = TRUE; + + tmp->is_affine = TRUE; + tmp->has_component_alpha = FALSE; + tmp->need_magic_ca_pass = FALSE; + + tmp->prim_emit = gen6_emit_composite_primitive_identity_source_mask; + + tmp->floats_per_vertex = 5 + 2 * !tmp->is_affine; + + tmp->floats_per_rect = 3 * tmp->floats_per_vertex; + + tmp->u.gen6.wm_kernel = GEN6_WM_KERNEL_MASK; + tmp->u.gen6.nr_surfaces = 2 + 1; + tmp->u.gen6.nr_inputs = 1 + 1; + tmp->u.gen6.ve_id = gen6_choose_composite_vertex_buffer(tmp); + + tmp->need_magic_ca_pass = TRUE; + +// tmp->blt = gen6_render_composite_blt; + tmp->box = gen6_render_composite_box; +// tmp->boxes = gen6_render_composite_boxes; + tmp->done = gen6_render_composite_done; + + gen6_emit_composite_state(sna, tmp); + gen6_align_vertex(sna, tmp); + +// LEAVE(); + + return TRUE; +} + static void gen6_emit_copy_state(struct sna *sna, @@ -1930,7 +2142,7 @@ Bool gen6_render_init(struct sna *sna) // sna->kgem.context_switch = gen6_render_context_switch; sna->kgem.retire = gen6_render_retire; -// sna->render.composite = gen6_render_composite; + sna->render.composite = gen6_render_composite; // sna->render.video = gen6_render_video; // sna->render.copy_boxes = gen6_render_copy_boxes; diff --git a/drivers/video/drm/i915/sna/sna.c b/drivers/video/drm/i915/sna/sna.c index d14568b66c..3ab1708da6 100644 --- a/drivers/video/drm/i915/sna/sna.c +++ b/drivers/video/drm/i915/sna/sna.c @@ -331,3 +331,57 @@ int sna_blit_copy(bitmap_t *dst_bitmap, int dst_x, int dst_y, }; +int sna_blit_tex(bitmap_t *dst_bitmap, int dst_x, int dst_y, + int w, int h, bitmap_t *src_bitmap, int src_x, int src_y, + bitmap_t *mask_bitmap) + +{ + struct sna_composite_op cop; + batchbuffer_t execbuffer; + BoxRec box; + + struct kgem_bo src_bo, mask_bo, dst_bo; + + memset(&cop, 0, sizeof(cop)); + memset(&execbuffer, 0, sizeof(execbuffer)); + memset(&src_bo, 0, sizeof(src_bo)); + memset(&dst_bo, 0, sizeof(dst_bo)); + memset(&mask_bo, 0, sizeof(mask_bo)); + + src_bo.gaddr = src_bitmap->gaddr; + src_bo.pitch = src_bitmap->pitch; + src_bo.tiling = 0; + + dst_bo.gaddr = dst_bitmap->gaddr; + dst_bo.pitch = dst_bitmap->pitch; + dst_bo.tiling = 0; + + mask_bo.gaddr = mask_bitmap->gaddr; + mask_bo.pitch = mask_bitmap->pitch; + mask_bo.tiling = 0; + + box.x1 = dst_x; + box.y1 = dst_y; + box.x2 = dst_x+w; + box.y2 = dst_y+h; + + sna_device->render.composite(sna_device, 0, + src_bitmap, &src_bo, + mask_bitmap, &mask_bo, + dst_bitmap, &dst_bo, + src_x, src_y, + src_x, src_y, + dst_x, dst_y, + w, h, &cop); + + cop.box(sna_device, &cop, &box); + cop.done(sna_device, &cop); + + INIT_LIST_HEAD(&execbuffer.objects); + list_add_tail(&src_bitmap->obj->exec_list, &execbuffer.objects); + list_add_tail(&mask_bitmap->obj->exec_list, &execbuffer.objects); + + _kgem_submit(&sna_device->kgem, &execbuffer); + +}; + diff --git a/drivers/video/drm/i915/sna/sna_render.h b/drivers/video/drm/i915/sna/sna_render.h index 60858845ed..e3dd0b7aa4 100644 --- a/drivers/video/drm/i915/sna/sna_render.h +++ b/drivers/video/drm/i915/sna/sna_render.h @@ -141,15 +141,16 @@ struct sna_render { int max_3d_size; int max_3d_pitch; -/* Bool (*composite)(struct sna *sna, uint8_t op, - PicturePtr dst, PicturePtr src, PicturePtr mask, + bitmap_t *src, struct kgem_bo *src_bo, + bitmap_t *mask, struct kgem_bo *mask_bo, + bitmap_t *dst, struct kgem_bo *dst_bo, int16_t src_x, int16_t src_y, int16_t msk_x, int16_t msk_y, int16_t dst_x, int16_t dst_y, int16_t w, int16_t h, struct sna_composite_op *tmp); - +/* Bool (*composite_spans)(struct sna *sna, uint8_t op, PicturePtr dst, PicturePtr src, int16_t src_x, int16_t src_y,