render context and composite render

git-svn-id: svn://kolibrios.org@2361 a494cfbc-eb01-0410-851d-a64ba20cac60
This commit is contained in:
Sergey Semyonov (Serge) 2012-02-21 06:06:51 +00:00
parent 80ab648f04
commit 8fd9581723
7 changed files with 553 additions and 19 deletions

View File

@ -90,13 +90,14 @@ int create_surface(struct io_call_10 *pbitmap)
goto err1;
bitmap = CreateObject(GetPid(), sizeof(*bitmap));
// printf("bitmap %x\n", bitmap);
if( bitmap == NULL)
goto err1;
bitmap->handle = handle;
bitmap->header.destroy = destroy_bitmap;
bitmap->obj = NULL;
// printf("bitmap %x\n", bitmap);
if( bitmap == NULL)
goto err1;
hman_set_data(&bm_man, handle, bitmap);
@ -210,6 +211,7 @@ int lock_surface(struct io_call_12 *pbitmap)
return 0;
};
int init_hman(struct hman *man, u32 count)
{
u32* data;

View File

@ -95,6 +95,15 @@ typedef struct
#define HW_VID_BLIT (1<<2) /* planar and packed video */
/* 3 - 63 reserved */
struct context
{
kobj_t header;
bitmap_t *mask;
u32 seqno;
int slot;
};
int get_driver_caps(hwcaps_t *caps);
int create_surface(struct io_call_10 *pbitmap);
int lock_surface(struct io_call_12 *pbitmap);

View File

@ -61,6 +61,7 @@ struct tag_display
void (__stdcall *move_cursor)(cursor_t *cursor, int x, int y);
void (__stdcall *restore_cursor)(int x, int y);
void (*disable_mouse)(void);
u32 mask_seqno;
};
@ -964,7 +965,6 @@ void execute_buffer (struct drm_i915_gem_object *buffer, uint32_t offset,
// i915_interrupt_info(main_device);
// ironlake_enable_vblank(main_device, 0);
};
@ -1007,6 +1007,259 @@ int blit_textured(u32 hbitmap, int dst_x, int dst_y,
};
int sna_blit_tex(bitmap_t *dst_bitmap, int dst_x, int dst_y,
int w, int h, bitmap_t *src_bitmap, int src_x, int src_y,
bitmap_t *mask_bitmap);
int create_context();
struct context *get_context();
int blit_tex(u32 hbitmap, int dst_x, int dst_y,
int src_x, int src_y, u32 w, u32 h)
{
drm_i915_private_t *dev_priv = main_device->dev_private;
struct context *ctx;
bitmap_t *src_bitmap, *dst_bitmap;
bitmap_t screen;
int ret;
bitmap_t *mask_bitmap;
rect_t winrc;
// dbgprintf(" handle: %d dx %d dy %d sx %d sy %d w %d h %d\n",
// hbitmap, dst_x, dst_y, src_x, src_y, w, h);
if(unlikely(hbitmap==0))
return -1;
src_bitmap = (bitmap_t*)hman_get_data(&bm_man, hbitmap);
// dbgprintf("bitmap %x\n", src_bitmap);
if(unlikely(src_bitmap==NULL))
return -1;
ctx = get_context();
if(unlikely(ctx==NULL))
{
ret = create_context();
if(ret!=0)
return -1;
ctx = get_context();
};
mask_bitmap = ctx->mask;
GetWindowRect(&winrc);
dst_x+= winrc.left;
dst_y+= winrc.top;
if(ctx->seqno != os_display->mask_seqno)
{
u8* src_offset;
u8* dst_offset;
u32 slot = *((u8*)CURRENT_TASK);
u32 ifl;
ret = gem_object_lock(mask_bitmap->obj);
if(ret !=0 )
{
dbgprintf("%s fail\n", __FUNCTION__);
return ret;
};
printf("width %d height %d\n", winrc.right, winrc.bottom);
mask_bitmap->width = winrc.right;
mask_bitmap->height = winrc.bottom;
mask_bitmap->pitch = ALIGN(w,64);
slot|= (slot<<8)|(slot<<16)|(slot<<24);
__asm__ __volatile__ (
"movd %[slot], %%xmm6 \n"
"punpckldq %%xmm6, %%xmm6 \n"
"punpcklqdq %%xmm6, %%xmm6 \n"
:: [slot] "g" (slot)
:"xmm6");
src_offset = mask_bitmap->uaddr;
dst_offset = (u8*)(dst_y*os_display->width + dst_x);
dst_offset+= get_display_map();
u32_t tmp_h = mask_bitmap->height;
ifl = safe_cli();
while( tmp_h--)
{
int tmp_w = mask_bitmap->width;
u8* tmp_src = src_offset;
u8* tmp_dst = dst_offset;
src_offset+= mask_bitmap->pitch;
dst_offset+= os_display->width;
// while( tmp_w--)
// {
// *(tmp_src) = (*tmp_dst==slot)?0x1:0x00;
// tmp_src++;
// tmp_dst++;
// };
while(tmp_w >= 64)
{
__asm__ __volatile__ (
"movdqu (%0), %%xmm0 \n"
"movdqu 16(%0), %%xmm1 \n"
"movdqu 32(%0), %%xmm2 \n"
"movdqu 48(%0), %%xmm3 \n"
"pcmpeqb %%xmm6, %%xmm0 \n"
"pcmpeqb %%xmm6, %%xmm1 \n"
"pcmpeqb %%xmm6, %%xmm2 \n"
"pcmpeqb %%xmm6, %%xmm3 \n"
"movdqa %%xmm0, (%%edi) \n"
"movdqa %%xmm1, 16(%%edi) \n"
"movdqa %%xmm2, 32(%%edi) \n"
"movdqa %%xmm3, 48(%%edi) \n"
:: "r" (tmp_dst), "D" (tmp_src)
:"xmm0","xmm1","xmm2","xmm3");
tmp_w -= 64;
tmp_src += 64;
tmp_dst += 64;
}
if( tmp_w >= 32 )
{
__asm__ __volatile__ (
"movdqu (%0), %%xmm0 \n"
"movdqu 16(%0), %%xmm1 \n"
"pcmpeqb %%xmm6, %%xmm0 \n"
"pcmpeqb %%xmm6, %%xmm1 \n"
"movdqa %%xmm0, (%%edi) \n"
"movdqa %%xmm1, 16(%%edi) \n"
:: "r" (tmp_dst), "D" (tmp_src)
:"xmm0","xmm1");
tmp_w -= 32;
tmp_src += 32;
tmp_dst += 32;
}
while( tmp_w > 0 )
{
__asm__ __volatile__ (
"movdqu (%0), %%xmm0 \n"
"pcmpeqb %%xmm6, %%xmm0 \n"
"movdqa %%xmm0, (%%edi) \n"
:: "r" (tmp_dst), "D" (tmp_src)
:"xmm0");
tmp_w -= 16;
tmp_src += 16;
tmp_dst += 16;
}
};
safe_sti(ifl);
ctx->seqno = os_display->mask_seqno;
}
screen.pitch = os_display->pitch;
screen.gaddr = 0;
screen.width = os_display->width;
screen.height = os_display->height;
screen.obj = (void*)-1;
dst_bitmap = &screen;
sna_blit_tex(dst_bitmap, dst_x, dst_y, w, h, src_bitmap, src_x, src_y,
mask_bitmap);
// asm volatile ("int3");
};
struct context *context_map[256];
void __attribute__((regparm(1))) destroy_context(struct context *context)
{
printf("destroy context %x\n", context);
context_map[context->slot] = NULL;
__DestroyObject(context);
};
int create_context()
{
struct context *context;
bitmap_t *mask;
int slot;
struct io_call_10 io_10;
int ret;
slot = *((u8*)CURRENT_TASK);
if(context_map[slot] != NULL)
return 0;
context = CreateObject(GetPid(), sizeof(*context));
// printf("context %x\n", coontext);
if( context == NULL)
goto err1;
context->header.destroy = destroy_context;
dbgprintf("Create mask surface\n");
io_10.width = os_display->width/4; /* need bitmap format here */
io_10.height = os_display->height+1;
io_10.max_width = os_display->width/4;
io_10.max_height = os_display->height+1;
ret = create_surface(&io_10);
if(ret)
goto err2;
mask= (bitmap_t*)hman_get_data(&bm_man, io_10.handle);
if(unlikely(mask == NULL)) /* something really terrible happend */
goto err2;
dbgprintf("done\n");
context->mask = mask;
context->seqno = os_display->mask_seqno-1;
context->slot = slot;
context_map[slot] = context;
return 0;
err2:
__DestroyObject(context);
err1:
return -1;
};
struct context *get_context()
{
int slot = *((u8*)CURRENT_TASK);
return context_map[slot];
}
void __stdcall run_workqueue(struct workqueue_struct *cwq)
{

View File

@ -26,6 +26,9 @@ int blit_video(u32 hbitmap, int dst_x, int dst_y,
int blit_textured(u32 hbitmap, int dst_x, int dst_y,
int src_x, int src_y, u32 w, u32 h);
int blit_tex(u32 hbitmap, int dst_x, int dst_y,
int src_x, int src_y, u32 w, u32 h);
static char log[256];
int x86_clflush_size;
@ -159,7 +162,7 @@ int _stdcall display_handler(ioctl_t *io)
// blit_video( inp[0], inp[1], inp[2],
// inp[3], inp[4], inp[5], inp[6]);
blit_textured( inp[0], inp[1], inp[2],
blit_tex( inp[0], inp[1], inp[2],
inp[3], inp[4], inp[5], inp[6]);

View File

@ -90,7 +90,9 @@ static const struct wm_kernel_info {
unsigned int size;
Bool has_mask;
} wm_kernels[] = {
KERNEL(NOMASK, ps_kernel_nomask_affine, FALSE),
// KERNEL(NOMASK, ps_kernel_nomask_affine, FALSE),
// KERNEL(MASK, ps_kernel_masknoca_affine, TRUE),
KERNEL(NOMASK, ps_kernel_masknoca_affine, TRUE),
KERNEL(MASK, ps_kernel_masknoca_affine, TRUE),
};
#undef KERNEL
@ -171,8 +173,11 @@ static uint32_t gen6_get_blend(int op,
{
uint32_t src, dst;
// src = GEN6_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
// dst = GEN6_BLENDFACTOR_ZERO; //gen6_blend_op[op].dst_blend;
src = GEN6_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
dst = GEN6_BLENDFACTOR_ZERO; //gen6_blend_op[op].dst_blend;
dst = GEN6_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
#if 0
/* If there's no dst alpha channel, adjust the blend op so that
@ -1302,6 +1307,50 @@ gen6_get_batch(struct sna *sna)
gen6_emit_invariant(sna);
}
static void gen6_emit_composite_state(struct sna *sna,
const struct sna_composite_op *op)
{
uint32_t *binding_table;
uint16_t offset;
bool dirty;
gen6_get_batch(sna);
dirty = FALSE;
binding_table = gen6_composite_get_binding_table(sna, &offset);
binding_table[0] =
gen6_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
op->dst.format,
TRUE);
binding_table[1] =
gen6_bind_bo(sna,
op->src.bo, op->src.width, op->src.height,
op->src.card_format,
FALSE);
if (op->mask.bo) {
binding_table[2] =
gen6_bind_bo(sna,
op->mask.bo,
op->mask.width,
op->mask.height,
op->mask.card_format,
FALSE);
}
if (sna->kgem.surface == offset &&
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table &&
(op->mask.bo == NULL ||
sna->kgem.batch[sna->render_state.gen6.surface_table+2] == binding_table[2])) {
sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
offset = sna->render_state.gen6.surface_table;
}
gen6_emit_state(sna, op, offset | dirty);
}
static void
gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op)
@ -1586,15 +1635,90 @@ gen6_render_video(struct sna *sna,
#endif
static void gen6_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
{
DBG(("%s\n", __FUNCTION__));
if (sna->render_state.gen6.vertex_offset) {
gen6_vertex_flush(sna);
gen6_magic_ca_pass(sna, op);
}
fastcall static void
gen6_emit_composite_primitive_identity_source_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float src_x, src_y;
float msk_x, msk_y;
float w, h;
float *v;
src_x = r->src.x + op->src.offset[0];
src_y = r->src.y + op->src.offset[1];
msk_x = r->mask.x + op->mask.offset[0];
msk_y = r->mask.y + op->mask.offset[1];
w = r->width;
h = r->height;
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 15;
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[1] = (src_x + w) * op->src.scale[0];
v[2] = (src_y + h) * op->src.scale[1];
v[3] = (msk_x + w) * op->mask.scale[0];
v[4] = (msk_y + h) * op->mask.scale[1];
dst.p.x = r->dst.x;
v[5] = dst.f;
v[6] = src_x * op->src.scale[0];
v[7] = v[2];
v[8] = msk_x * op->mask.scale[0];
v[9] = v[4];
dst.p.y = r->dst.y;
v[10] = dst.f;
v[11] = v[6];
v[12] = src_y * op->src.scale[1];
v[13] = v[8];
v[14] = msk_y * op->mask.scale[1];
}
fastcall static void
gen6_render_composite_box(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box)
{
struct sna_composite_rectangles r;
if (unlikely(!gen6_get_rectangles(sna, op, 1))) {
// _kgem_submit(&sna->kgem);
// gen6_emit_composite_state(sna, op);
// gen6_get_rectangles(sna, op, 1);
}
DBG((" %s: (%d, %d), (%d, %d)\n",
__FUNCTION__,
box->x1, box->y1, box->x2, box->y2));
r.dst.x = box->x1;
r.dst.y = box->y1;
r.width = box->x2 - box->x1;
r.height = box->y2 - box->y1;
r.src = r.mask = r.dst;
op->prim_emit(sna, op, &r);
}
static void gen6_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
{
DBG(("%s\n", __FUNCTION__));
if (sna->render_state.gen6.vertex_offset) {
gen6_vertex_flush(sna);
gen6_magic_ca_pass(sna, op);
}
// if (op->mask.bo)
// kgem_bo_destroy(&sna->kgem, op->mask.bo);
@ -1605,6 +1729,94 @@ static void gen6_render_composite_done(struct sna *sna,
}
static Bool
gen6_render_composite(struct sna *sna,
uint8_t op,
bitmap_t *src,
struct kgem_bo *src_bo,
bitmap_t *mask,
struct kgem_bo *mask_bo,
bitmap_t *dst,
struct kgem_bo *dst_bo,
int16_t src_x, int16_t src_y,
int16_t msk_x, int16_t msk_y,
int16_t dst_x, int16_t dst_y,
int16_t width, int16_t height,
struct sna_composite_op *tmp)
{
// if (op >= ARRAY_SIZE(gen6_blend_op))
// return FALSE;
// ENTER();
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.ring));
tmp->op = PictOpSrc;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->width;
tmp->dst.height = dst->height;
tmp->dst.format = GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
tmp->src.bo = src_bo;
tmp->src.card_format = GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
tmp->src.width = src->width;
tmp->src.height = src->height;
tmp->src.scale[0] = 1.f/width; //src->width;
tmp->src.scale[1] = 1.f/height; //src->height;
tmp->src.filter = SAMPLER_FILTER_BILINEAR;
tmp->src.repeat = SAMPLER_EXTEND_NONE;
tmp->src.offset[0] = -dst_x;
tmp->src.offset[1] = -dst_y;
tmp->src.is_affine = TRUE;
tmp->mask.bo = mask_bo;
tmp->mask.card_format = GEN6_SURFACEFORMAT_A8_UNORM;
tmp->mask.width = mask->width;
tmp->mask.height = mask->height;
tmp->mask.scale[0] = 1.f/mask->width;
tmp->mask.scale[1] = 1.f/mask->height;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.offset[0] = -dst_x;
tmp->mask.offset[1] = -dst_y;
tmp->mask.is_affine = TRUE;
tmp->is_affine = TRUE;
tmp->has_component_alpha = FALSE;
tmp->need_magic_ca_pass = FALSE;
tmp->prim_emit = gen6_emit_composite_primitive_identity_source_mask;
tmp->floats_per_vertex = 5 + 2 * !tmp->is_affine;
tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
tmp->u.gen6.wm_kernel = GEN6_WM_KERNEL_MASK;
tmp->u.gen6.nr_surfaces = 2 + 1;
tmp->u.gen6.nr_inputs = 1 + 1;
tmp->u.gen6.ve_id = gen6_choose_composite_vertex_buffer(tmp);
tmp->need_magic_ca_pass = TRUE;
// tmp->blt = gen6_render_composite_blt;
tmp->box = gen6_render_composite_box;
// tmp->boxes = gen6_render_composite_boxes;
tmp->done = gen6_render_composite_done;
gen6_emit_composite_state(sna, tmp);
gen6_align_vertex(sna, tmp);
// LEAVE();
return TRUE;
}
static void
gen6_emit_copy_state(struct sna *sna,
@ -1930,7 +2142,7 @@ Bool gen6_render_init(struct sna *sna)
// sna->kgem.context_switch = gen6_render_context_switch;
sna->kgem.retire = gen6_render_retire;
// sna->render.composite = gen6_render_composite;
sna->render.composite = gen6_render_composite;
// sna->render.video = gen6_render_video;
// sna->render.copy_boxes = gen6_render_copy_boxes;

View File

@ -331,3 +331,57 @@ int sna_blit_copy(bitmap_t *dst_bitmap, int dst_x, int dst_y,
};
int sna_blit_tex(bitmap_t *dst_bitmap, int dst_x, int dst_y,
int w, int h, bitmap_t *src_bitmap, int src_x, int src_y,
bitmap_t *mask_bitmap)
{
struct sna_composite_op cop;
batchbuffer_t execbuffer;
BoxRec box;
struct kgem_bo src_bo, mask_bo, dst_bo;
memset(&cop, 0, sizeof(cop));
memset(&execbuffer, 0, sizeof(execbuffer));
memset(&src_bo, 0, sizeof(src_bo));
memset(&dst_bo, 0, sizeof(dst_bo));
memset(&mask_bo, 0, sizeof(mask_bo));
src_bo.gaddr = src_bitmap->gaddr;
src_bo.pitch = src_bitmap->pitch;
src_bo.tiling = 0;
dst_bo.gaddr = dst_bitmap->gaddr;
dst_bo.pitch = dst_bitmap->pitch;
dst_bo.tiling = 0;
mask_bo.gaddr = mask_bitmap->gaddr;
mask_bo.pitch = mask_bitmap->pitch;
mask_bo.tiling = 0;
box.x1 = dst_x;
box.y1 = dst_y;
box.x2 = dst_x+w;
box.y2 = dst_y+h;
sna_device->render.composite(sna_device, 0,
src_bitmap, &src_bo,
mask_bitmap, &mask_bo,
dst_bitmap, &dst_bo,
src_x, src_y,
src_x, src_y,
dst_x, dst_y,
w, h, &cop);
cop.box(sna_device, &cop, &box);
cop.done(sna_device, &cop);
INIT_LIST_HEAD(&execbuffer.objects);
list_add_tail(&src_bitmap->obj->exec_list, &execbuffer.objects);
list_add_tail(&mask_bitmap->obj->exec_list, &execbuffer.objects);
_kgem_submit(&sna_device->kgem, &execbuffer);
};

View File

@ -141,15 +141,16 @@ struct sna_render {
int max_3d_size;
int max_3d_pitch;
/*
Bool (*composite)(struct sna *sna, uint8_t op,
PicturePtr dst, PicturePtr src, PicturePtr mask,
bitmap_t *src, struct kgem_bo *src_bo,
bitmap_t *mask, struct kgem_bo *mask_bo,
bitmap_t *dst, struct kgem_bo *dst_bo,
int16_t src_x, int16_t src_y,
int16_t msk_x, int16_t msk_y,
int16_t dst_x, int16_t dst_y,
int16_t w, int16_t h,
struct sna_composite_op *tmp);
/*
Bool (*composite_spans)(struct sna *sna, uint8_t op,
PicturePtr dst, PicturePtr src,
int16_t src_x, int16_t src_y,