[g3dvl] splitt vertex element state into y, cb, cr

This commit is contained in:
Christian König
2011-04-02 12:05:22 +02:00
parent 4de5d81638
commit 794cde3f5e
7 changed files with 175 additions and 234 deletions
+14 -16
View File
@@ -93,8 +93,8 @@ static void *
create_vert_shader(struct vl_idct *idct, bool matrix_stage, int color_swizzle)
{
struct ureg_program *shader;
struct ureg_src vrect, vpos, vblock, eb[4];
struct ureg_src scale, blocks_xy, t_eb;
struct ureg_src vrect, vpos, vblock, eb;
struct ureg_src scale, blocks_xy;
struct ureg_dst t_tex, t_start;
struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2];
unsigned label;
@@ -112,10 +112,7 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage, int color_swizzle)
o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
eb[0] = ureg_DECL_vs_input(shader, VS_I_EB_0_0);
eb[1] = ureg_DECL_vs_input(shader, VS_I_EB_1_0);
eb[2] = ureg_DECL_vs_input(shader, VS_I_EB_0_1);
eb[3] = ureg_DECL_vs_input(shader, VS_I_EB_1_1);
eb = ureg_DECL_vs_input(shader, VS_I_EB);
o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
@@ -127,8 +124,7 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage, int color_swizzle)
* scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
* blocks_xy = (blocks_x, blocks_y)
*
* ar = vblock.y * blocks.x + vblock.x
* if eb[ar].(color_swizzle)
* if eb.(vblock.y, vblock.x)
* o_vpos.xy = -1
* else
* t_tex = vpos * blocks_xy + vblock
@@ -150,18 +146,20 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage, int color_swizzle)
blocks_xy = ureg_imm2f(shader, idct->blocks_x, idct->blocks_y);
if (idct->blocks_x > 1 || idct->blocks_y > 1) {
struct ureg_dst ar = ureg_DECL_address(shader);
ureg_CMP(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY),
ureg_negate(ureg_scalar(vblock, TGSI_SWIZZLE_Y)),
ureg_swizzle(eb, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W),
ureg_swizzle(eb, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y));
ureg_MAD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_X),
ureg_scalar(vblock, TGSI_SWIZZLE_Y), blocks_xy, vblock);
ureg_CMP(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_X),
ureg_negate(ureg_scalar(vblock, TGSI_SWIZZLE_X)),
ureg_scalar(ureg_src(t_tex), TGSI_SWIZZLE_Y),
ureg_scalar(ureg_src(t_tex), TGSI_SWIZZLE_X));
ureg_ARL(shader, ureg_writemask(ar, TGSI_WRITEMASK_X), ureg_src(t_tex));
t_eb = ureg_src_indirect(eb[0], ureg_src(ar));
} else {
t_eb = eb[0];
eb = ureg_src(t_tex);
}
ureg_IF(shader, ureg_scalar(t_eb, color_swizzle), &label);
ureg_IF(shader, ureg_scalar(eb, TGSI_SWIZZLE_X), &label);
ureg_MOV(shader, o_vpos, ureg_imm1f(shader, -1.0f));
+44 -78
View File
@@ -94,8 +94,8 @@ vl_mpeg12_buffer_destroy(struct pipe_video_buffer *buffer)
vl_ycbcr_buffer_cleanup(&buf->render_result);
vl_vb_cleanup(&buf->vertex_stream);
vl_idct_cleanup_buffer(&ctx->idct_y, &buf->idct_y);
vl_idct_cleanup_buffer(&ctx->idct_cb, &buf->idct_cb);
vl_idct_cleanup_buffer(&ctx->idct_cr, &buf->idct_cr);
vl_idct_cleanup_buffer(&ctx->idct_c, &buf->idct_cb);
vl_idct_cleanup_buffer(&ctx->idct_c, &buf->idct_cr);
vl_mpeg12_mc_cleanup_buffer(&buf->mc_y);
vl_mpeg12_mc_cleanup_buffer(&buf->mc_cb);
vl_mpeg12_mc_cleanup_buffer(&buf->mc_cr);
@@ -115,8 +115,8 @@ vl_mpeg12_buffer_map(struct pipe_video_buffer *buffer)
vl_vb_map(&buf->vertex_stream, ctx->pipe);
vl_idct_map_buffers(&ctx->idct_y, &buf->idct_y);
vl_idct_map_buffers(&ctx->idct_cr, &buf->idct_cr);
vl_idct_map_buffers(&ctx->idct_cb, &buf->idct_cb);
vl_idct_map_buffers(&ctx->idct_c, &buf->idct_cb);
vl_idct_map_buffers(&ctx->idct_c, &buf->idct_cr);
}
static void
@@ -156,8 +156,8 @@ vl_mpeg12_buffer_unmap(struct pipe_video_buffer *buffer)
vl_vb_unmap(&buf->vertex_stream, ctx->pipe);
vl_idct_unmap_buffers(&ctx->idct_y, &buf->idct_y);
vl_idct_unmap_buffers(&ctx->idct_cr, &buf->idct_cr);
vl_idct_unmap_buffers(&ctx->idct_cb, &buf->idct_cb);
vl_idct_unmap_buffers(&ctx->idct_c, &buf->idct_cb);
vl_idct_unmap_buffers(&ctx->idct_c, &buf->idct_cr);
}
static void
@@ -182,36 +182,42 @@ vl_mpeg12_buffer_flush(struct pipe_video_buffer *buffer,
ctx = (struct vl_mpeg12_context *)buf->base.context;
assert(ctx);
vl_vb_restart(&buf->vertex_stream, &ne_start, &ne_num, &e_start, &e_num);
ctx->pipe->set_vertex_buffers(ctx->pipe, 2, buf->vertex_bufs.all);
ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->vertex_elems_state);
ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend);
vl_idct_flush(&ctx->idct_y, &buf->idct_y, ne_num);
vl_idct_flush(&ctx->idct_cr, &buf->idct_cr, ne_num);
vl_idct_flush(&ctx->idct_cb, &buf->idct_cb, ne_num);
surfaces = vl_ycbcr_get_surfaces(&buf->render_result);
sv_past = past ? vl_ycbcr_get_sampler_views(&past->render_result) : NULL;
sv_future = future ? vl_ycbcr_get_sampler_views(&future->render_result) : NULL;
vl_vb_restart(&buf->vertex_stream, &ne_start, &ne_num, &e_start, &e_num);
ctx->pipe->set_vertex_buffers(ctx->pipe, 2, buf->vertex_bufs.all);
ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend);
ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->ves_y);
vl_idct_flush(&ctx->idct_y, &buf->idct_y, ne_num);
sv_refs[0] = sv_past ? sv_past->y : NULL;
sv_refs[1] = sv_future ? sv_future->y : NULL;
vl_mpeg12_mc_renderer_flush(&ctx->mc_y, &buf->mc_y, surfaces->y,
vl_mpeg12_mc_renderer_flush(&ctx->mc, &buf->mc_y, surfaces->y,
sv_refs, ne_start, ne_num, e_start, e_num, fence);
ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->ves_cb);
vl_idct_flush(&ctx->idct_c, &buf->idct_cb, ne_num);
sv_refs[0] = sv_past ? sv_past->cb : NULL;
sv_refs[1] = sv_future ? sv_future->cb : NULL;
vl_mpeg12_mc_renderer_flush(&ctx->mc_cb, &buf->mc_cb, surfaces->cb,
vl_mpeg12_mc_renderer_flush(&ctx->mc, &buf->mc_cb, surfaces->cb,
sv_refs, ne_start, ne_num, e_start, e_num, fence);
ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->ves_cr);
vl_idct_flush(&ctx->idct_c, &buf->idct_cr, ne_num);
sv_refs[0] = sv_past ? sv_past->cr : NULL;
sv_refs[1] = sv_future ? sv_future->cr : NULL;
vl_mpeg12_mc_renderer_flush(&ctx->mc_cr, &buf->mc_cr, surfaces->cr,
vl_mpeg12_mc_renderer_flush(&ctx->mc, &buf->mc_cr, surfaces->cr,
sv_refs, ne_start, ne_num, e_start, e_num, fence);
}
@@ -231,13 +237,12 @@ vl_mpeg12_destroy(struct pipe_video_context *vpipe)
ctx->pipe->delete_depth_stencil_alpha_state(ctx->pipe, ctx->dsa);
vl_compositor_cleanup(&ctx->compositor);
vl_mpeg12_mc_renderer_cleanup(&ctx->mc_y);
vl_mpeg12_mc_renderer_cleanup(&ctx->mc_cb);
vl_mpeg12_mc_renderer_cleanup(&ctx->mc_cr);
vl_mpeg12_mc_renderer_cleanup(&ctx->mc);
vl_idct_cleanup(&ctx->idct_y);
vl_idct_cleanup(&ctx->idct_cr);
vl_idct_cleanup(&ctx->idct_cb);
ctx->pipe->delete_vertex_elements_state(ctx->pipe, ctx->vertex_elems_state);
vl_idct_cleanup(&ctx->idct_c);
ctx->pipe->delete_vertex_elements_state(ctx->pipe, ctx->ves_y);
ctx->pipe->delete_vertex_elements_state(ctx->pipe, ctx->ves_cb);
ctx->pipe->delete_vertex_elements_state(ctx->pipe, ctx->ves_cr);
pipe_resource_reference(&ctx->quads.buffer, NULL);
ctx->pipe->destroy(ctx->pipe);
@@ -353,13 +358,13 @@ vl_mpeg12_create_buffer(struct pipe_video_context *vpipe)
return NULL;
}
if (!vl_idct_init_buffer(&ctx->idct_cb, &buffer->idct_cb,
if (!vl_idct_init_buffer(&ctx->idct_c, &buffer->idct_cb,
idct_views->cb, idct_surfaces->cb)) {
FREE(buffer);
return NULL;
}
if (!vl_idct_init_buffer(&ctx->idct_cr, &buffer->idct_cr,
if (!vl_idct_init_buffer(&ctx->idct_c, &buffer->idct_cr,
idct_views->cr, idct_surfaces->cr)) {
FREE(buffer);
return NULL;
@@ -367,17 +372,17 @@ vl_mpeg12_create_buffer(struct pipe_video_context *vpipe)
mc_views = vl_ycbcr_get_sampler_views(&buffer->idct_2_mc);
if(!vl_mpeg12_mc_init_buffer(&ctx->mc_y, &buffer->mc_y, mc_views->y)) {
if(!vl_mpeg12_mc_init_buffer(&ctx->mc, &buffer->mc_y, mc_views->y)) {
FREE(buffer);
return NULL;
}
if(!vl_mpeg12_mc_init_buffer(&ctx->mc_cb, &buffer->mc_cb, mc_views->cb)) {
if(!vl_mpeg12_mc_init_buffer(&ctx->mc, &buffer->mc_cb, mc_views->cb)) {
FREE(buffer);
return NULL;
}
if(!vl_mpeg12_mc_init_buffer(&ctx->mc_cr, &buffer->mc_cr, mc_views->cr)) {
if(!vl_mpeg12_mc_init_buffer(&ctx->mc, &buffer->mc_cr, mc_views->cr)) {
FREE(buffer);
return NULL;
}
@@ -637,14 +642,10 @@ init_idct(struct vl_mpeg12_context *ctx, unsigned buffer_width, unsigned buffer_
chroma_blocks_y = 2;
}
if(!vl_idct_init(&ctx->idct_cb, ctx->pipe, chroma_width, chroma_height,
if(!vl_idct_init(&ctx->idct_c, ctx->pipe, chroma_width, chroma_height,
chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Y, idct_matrix))
return false;
if(!vl_idct_init(&ctx->idct_cr, ctx->pipe, chroma_width, chroma_height,
chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Z, idct_matrix))
return false;
return true;
}
@@ -690,13 +691,9 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
ctx->quads = vl_vb_upload_quads(ctx->pipe, 2, 2);
ctx->vertex_buffer_size = width / MACROBLOCK_WIDTH * height / MACROBLOCK_HEIGHT;
ctx->vertex_elems_state = vl_vb_get_elems_state(ctx->pipe, true);
if (ctx->vertex_elems_state == NULL) {
ctx->pipe->destroy(ctx->pipe);
FREE(ctx);
return NULL;
}
ctx->ves_y = vl_vb_get_elems_state(ctx->pipe, TGSI_SWIZZLE_X);
ctx->ves_cb = vl_vb_get_elems_state(ctx->pipe, TGSI_SWIZZLE_Y);
ctx->ves_cr = vl_vb_get_elems_state(ctx->pipe, TGSI_SWIZZLE_Z);
ctx->buffer_width = pot_buffers ? util_next_power_of_two(width) : align(width, MACROBLOCK_WIDTH);
ctx->buffer_height = pot_buffers ? util_next_power_of_two(height) : align(height, MACROBLOCK_HEIGHT);
@@ -707,34 +704,9 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
return NULL;
}
if (!vl_mpeg12_mc_renderer_init(&ctx->mc_y, ctx->pipe,
ctx->buffer_width, ctx->buffer_height,
chroma_format, TGSI_SWIZZLE_X)) {
if (!vl_mpeg12_mc_renderer_init(&ctx->mc, ctx->pipe, ctx->buffer_width, ctx->buffer_height)) {
vl_idct_cleanup(&ctx->idct_y);
vl_idct_cleanup(&ctx->idct_cr);
vl_idct_cleanup(&ctx->idct_cb);
ctx->pipe->destroy(ctx->pipe);
FREE(ctx);
return NULL;
}
if (!vl_mpeg12_mc_renderer_init(&ctx->mc_cb, ctx->pipe,
ctx->buffer_width, ctx->buffer_height,
chroma_format, TGSI_SWIZZLE_Y)) {
vl_idct_cleanup(&ctx->idct_y);
vl_idct_cleanup(&ctx->idct_cr);
vl_idct_cleanup(&ctx->idct_cb);
ctx->pipe->destroy(ctx->pipe);
FREE(ctx);
return NULL;
}
if (!vl_mpeg12_mc_renderer_init(&ctx->mc_cr, ctx->pipe,
ctx->buffer_width, ctx->buffer_height,
chroma_format, TGSI_SWIZZLE_Z)) {
vl_idct_cleanup(&ctx->idct_y);
vl_idct_cleanup(&ctx->idct_cr);
vl_idct_cleanup(&ctx->idct_cb);
vl_idct_cleanup(&ctx->idct_c);
ctx->pipe->destroy(ctx->pipe);
FREE(ctx);
return NULL;
@@ -742,11 +714,8 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
if (!vl_compositor_init(&ctx->compositor, ctx->pipe)) {
vl_idct_cleanup(&ctx->idct_y);
vl_idct_cleanup(&ctx->idct_cr);
vl_idct_cleanup(&ctx->idct_cb);
vl_mpeg12_mc_renderer_cleanup(&ctx->mc_y);
vl_mpeg12_mc_renderer_cleanup(&ctx->mc_cb);
vl_mpeg12_mc_renderer_cleanup(&ctx->mc_cr);
vl_idct_cleanup(&ctx->idct_c);
vl_mpeg12_mc_renderer_cleanup(&ctx->mc);
ctx->pipe->destroy(ctx->pipe);
FREE(ctx);
return NULL;
@@ -754,11 +723,8 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
if (!init_pipe_state(ctx)) {
vl_idct_cleanup(&ctx->idct_y);
vl_idct_cleanup(&ctx->idct_cr);
vl_idct_cleanup(&ctx->idct_cb);
vl_mpeg12_mc_renderer_cleanup(&ctx->mc_y);
vl_mpeg12_mc_renderer_cleanup(&ctx->mc_cb);
vl_mpeg12_mc_renderer_cleanup(&ctx->mc_cr);
vl_idct_cleanup(&ctx->idct_c);
vl_mpeg12_mc_renderer_cleanup(&ctx->mc);
vl_compositor_cleanup(&ctx->compositor);
ctx->pipe->destroy(ctx->pipe);
FREE(ctx);
+3 -3
View File
@@ -49,10 +49,10 @@ struct vl_mpeg12_context
struct pipe_vertex_buffer quads;
unsigned vertex_buffer_size;
void *vertex_elems_state;
void *ves_y, *ves_cb, *ves_cr;
struct vl_idct idct_y, idct_cb, idct_cr;
struct vl_mpeg12_mc_renderer mc_y, mc_cb, mc_cr;
struct vl_idct idct_y, idct_c;
struct vl_mpeg12_mc_renderer mc;
struct vl_compositor compositor;
void *rast;
@@ -44,16 +44,12 @@ enum VS_OUTPUT
{
VS_O_VPOS,
VS_O_LINE,
VS_O_TEX0,
VS_O_TEX1,
VS_O_TEX2,
VS_O_EB_0,
VS_O_EB_1,
VS_O_INFO,
VS_O_MV0,
VS_O_MV1,
VS_O_MV2,
VS_O_MV3
VS_O_TEX_TOP,
VS_O_TEX_BOTTOM,
VS_O_MV0_TOP,
VS_O_MV0_BOTTOM,
VS_O_MV1_TOP,
VS_O_MV1_BOTTOM
};
static void *
@@ -61,10 +57,10 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
{
struct ureg_program *shader;
struct ureg_src block_scale, mv_scale;
struct ureg_src vrect, vpos, eb[2][2], vmv[4];
struct ureg_src vrect, vpos, eb, flags, vmv[2][2];
struct ureg_dst t_vpos, t_vtex, t_vmv;
struct ureg_dst o_vpos, o_line, o_vtex[3], o_eb[2], o_vmv[4], o_info;
unsigned i, label;
struct ureg_dst o_vpos, o_line, o_vtex[2], o_vmv[2][2];
unsigned i, j, label;
shader = ureg_create(TGSI_PROCESSOR_VERTEX);
if (!shader)
@@ -76,24 +72,21 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
eb[0][0] = ureg_DECL_vs_input(shader, VS_I_EB_0_0);
eb[1][0] = ureg_DECL_vs_input(shader, VS_I_EB_1_0);
eb[0][1] = ureg_DECL_vs_input(shader, VS_I_EB_0_1);
eb[1][1] = ureg_DECL_vs_input(shader, VS_I_EB_1_1);
eb = ureg_DECL_vs_input(shader, VS_I_EB);
flags = ureg_DECL_vs_input(shader, VS_I_FLAGS);
vmv[0][0] = ureg_DECL_vs_input(shader, VS_I_MV0_TOP);
vmv[0][1] = ureg_DECL_vs_input(shader, VS_I_MV0_BOTTOM);
vmv[1][0] = ureg_DECL_vs_input(shader, VS_I_MV1_TOP);
vmv[1][1] = ureg_DECL_vs_input(shader, VS_I_MV1_BOTTOM);
o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);
o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0);
o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1);
o_vtex[2] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2);
o_eb[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0);
o_eb[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1);
o_info = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO);
for (i = 0; i < 4; ++i) {
vmv[i] = ureg_DECL_vs_input(shader, VS_I_MV0 + i);
o_vmv[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i);
}
o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX_TOP);
o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX_BOTTOM);
o_vmv[0][0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0_TOP);
o_vmv[0][1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0_BOTTOM);
o_vmv[1][0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV1_TOP);
o_vmv[1][1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV1_BOTTOM);
/*
* block_scale = (MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT) / (dst.width, dst.height)
@@ -143,31 +136,35 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
ureg_CMP(shader, ureg_writemask(o_eb[0], TGSI_WRITEMASK_XYZ),
ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
eb[0][1], eb[0][0]);
ureg_CMP(shader, ureg_writemask(o_eb[1], TGSI_WRITEMASK_XYZ),
ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
eb[1][1], eb[1][0]);
ureg_MOV(shader, ureg_writemask(o_info, TGSI_WRITEMASK_X),
ureg_scalar(eb[1][0], TGSI_SWIZZLE_W));
ureg_MUL(shader, ureg_writemask(o_info, TGSI_WRITEMASK_Y),
ureg_scalar(eb[1][1], TGSI_SWIZZLE_W),
ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_Z),
ureg_scalar(flags, TGSI_SWIZZLE_W),
ureg_imm1f(shader, 0.5f));
for (i = 0; i < 4; ++i)
ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), mv_scale, vmv[i], ureg_src(t_vpos));
for (i = 0; i < 2; ++i)
for (j = 0; j < 2; ++j) {
ureg_MAD(shader, ureg_writemask(o_vmv[i][j], TGSI_WRITEMASK_XY), mv_scale, vmv[i][j], ureg_src(t_vpos));
ureg_MOV(shader, ureg_writemask(o_vmv[i][j], TGSI_WRITEMASK_Z), ureg_src(t_vpos));
}
ureg_MOV(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
ureg_CMP(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_Z),
ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
ureg_scalar(eb, TGSI_SWIZZLE_Y),
ureg_scalar(eb, TGSI_SWIZZLE_X));
ureg_MOV(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
ureg_MOV(shader, ureg_writemask(o_vtex[2], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
ureg_CMP(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_Z),
ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
ureg_scalar(eb, TGSI_SWIZZLE_W),
ureg_scalar(eb, TGSI_SWIZZLE_Z));
ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), ureg_scalar(vrect, TGSI_SWIZZLE_Y));
ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y),
vrect, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Z),
ureg_scalar(flags, TGSI_SWIZZLE_Z));
ureg_IF(shader, ureg_scalar(eb[0][0], TGSI_SWIZZLE_W), &label);
ureg_IF(shader, ureg_scalar(flags, TGSI_SWIZZLE_X), &label);
ureg_MOV(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_X), vrect);
ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, 0.5f));
@@ -206,37 +203,31 @@ calc_field(struct ureg_program *shader)
* line.x going from 0 to 1 if not interlaced
* line.x going from 0 to 8 in steps of 0.5 if interlaced
* line.y going from 0 to 8 in steps of 0.5
* line.z is flag for intra frames
*
* tmp.xy = fraction(line)
* tmp.xy = tmp.xy >= 0.5 ? 1 : 0
*/
ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), line);
ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), line);
return tmp;
}
static struct ureg_dst
fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct ureg_dst field, unsigned color_swizzle)
fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct ureg_dst field)
{
struct ureg_src tc[3], sampler, eb[2];
struct ureg_dst texel, t_tc, t_eb_info;
struct ureg_src tc[2], sampler;
struct ureg_dst texel, t_tc;
unsigned label;
texel = ureg_DECL_temporary(shader);
t_tc = ureg_DECL_temporary(shader);
t_eb_info = ureg_DECL_temporary(shader);
tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0, TGSI_INTERPOLATE_LINEAR);
tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1, TGSI_INTERPOLATE_LINEAR);
tc[2] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2, TGSI_INTERPOLATE_LINEAR);
tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX_TOP, TGSI_INTERPOLATE_LINEAR);
tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX_BOTTOM, TGSI_INTERPOLATE_LINEAR);
eb[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0, TGSI_INTERPOLATE_CONSTANT);
eb[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1, TGSI_INTERPOLATE_CONSTANT);
//for (i = 0; i < 3; ++i) {
// sampler[i] = ureg_DECL_sampler(shader, i);
//}
sampler = ureg_DECL_sampler(shader, 0);
/*
@@ -245,31 +236,21 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
* texel.cr = tex(tc[2], sampler[2])
*/
ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY),
ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XYZ),
ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X)),
tc[1], tc[0]);
ureg_CMP(shader, ureg_writemask(t_eb_info, TGSI_WRITEMASK_XYZ),
ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X)),
eb[1], eb[0]);
/* r600g is ignoring TGSI_INTERPOLATE_CONSTANT, just workaround this */
ureg_SLT(shader, ureg_writemask(t_eb_info, TGSI_WRITEMASK_XYZ), ureg_src(t_eb_info), ureg_imm1f(shader, 0.5f));
ureg_SLT(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_src(t_tc), ureg_imm1f(shader, 0.5f));
ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_XYZ), ureg_imm1f(shader, 0.0f));
ureg_IF(shader, ureg_scalar(ureg_src(t_eb_info), color_swizzle), &label);
ureg_IF(shader, ureg_scalar(ureg_src(t_tc), TGSI_SWIZZLE_Z), &label);
if(color_swizzle==TGSI_SWIZZLE_X || r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444) {
ureg_TEX(shader, texel, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
} else {
ureg_TEX(shader, texel, TGSI_TEXTURE_3D, tc[2], sampler);
}
ureg_TEX(shader, texel, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
ureg_ENDIF(shader);
ureg_release_temporary(shader, t_tc);
ureg_release_temporary(shader, t_eb_info);
return texel;
}
@@ -277,15 +258,14 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
static struct ureg_dst
fetch_ref(struct ureg_program *shader, struct ureg_dst field)
{
struct ureg_src info;
struct ureg_src tc[4], sampler[2];
struct ureg_src tc[2][2], sampler[2];
struct ureg_dst ref[2], result;
unsigned i, intra_label;
info = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO, TGSI_INTERPOLATE_CONSTANT);
for (i = 0; i < 4; ++i)
tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i, TGSI_INTERPOLATE_LINEAR);
tc[0][0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0_TOP, TGSI_INTERPOLATE_LINEAR);
tc[0][1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0_BOTTOM, TGSI_INTERPOLATE_LINEAR);
tc[1][0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV1_TOP, TGSI_INTERPOLATE_LINEAR);
tc[1][1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV1_BOTTOM, TGSI_INTERPOLATE_LINEAR);
for (i = 0; i < 2; ++i) {
sampler[i] = ureg_DECL_sampler(shader, i + 1);
@@ -296,7 +276,7 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
ureg_MOV(shader, result, ureg_imm1f(shader, 0.5f));
ureg_IF(shader, ureg_scalar(info, TGSI_SWIZZLE_X), &intra_label);
ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z), &intra_label);
/*
* if (field.z)
* ref[0..1] = tex(tc[0..1], sampler[0..1])
@@ -306,16 +286,16 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
*/
ureg_CMP(shader, ureg_writemask(ref[0], TGSI_WRITEMASK_XY),
ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
tc[1], tc[0]);
tc[0][1], tc[0][0]);
ureg_CMP(shader, ureg_writemask(ref[1], TGSI_WRITEMASK_XY),
ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
tc[3], tc[2]);
tc[1][1], tc[1][0]);
ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]);
ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(ref[1]), sampler[1]);
ureg_LRP(shader, result,
ureg_scalar(info, TGSI_SWIZZLE_Y),
ureg_scalar(tc[0][0], TGSI_SWIZZLE_Z),
ureg_src(ref[1]), ureg_src(ref[0]));
ureg_fixup_label(shader, intra_label, ureg_get_instruction_number(shader));
@@ -328,7 +308,7 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
}
static void *
create_frag_shader(struct vl_mpeg12_mc_renderer *r, unsigned color_swizzle)
create_frag_shader(struct vl_mpeg12_mc_renderer *r)
{
struct ureg_program *shader;
struct ureg_dst result;
@@ -342,7 +322,7 @@ create_frag_shader(struct vl_mpeg12_mc_renderer *r, unsigned color_swizzle)
fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
field = calc_field(shader);
texel = fetch_ycbcr(r, shader, field, color_swizzle);
texel = fetch_ycbcr(r, shader, field);
result = fetch_ref(shader, field);
@@ -444,9 +424,7 @@ bool
vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
struct pipe_context *pipe,
unsigned buffer_width,
unsigned buffer_height,
enum pipe_video_chroma_format chroma_format,
unsigned color_swizzle)
unsigned buffer_height)
{
struct pipe_resource tex_templ, *tex_dummy;
struct pipe_sampler_view sampler_view;
@@ -459,7 +437,6 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
renderer->pipe = pipe;
renderer->buffer_width = buffer_width;
renderer->buffer_height = buffer_height;
renderer->chroma_format = chroma_format;
if (!init_pipe_state(renderer))
goto error_pipe_state;
@@ -468,7 +445,7 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
if (!renderer->vs)
goto error_vs_shaders;
renderer->fs = create_frag_shader(renderer, color_swizzle);
renderer->fs = create_frag_shader(renderer);
if (!renderer->fs)
goto error_fs_shaders;
@@ -42,7 +42,6 @@ struct vl_mpeg12_mc_renderer
struct pipe_context *pipe;
unsigned buffer_width;
unsigned buffer_height;
enum pipe_video_chroma_format chroma_format;
struct pipe_viewport_state viewport;
struct pipe_framebuffer_state fb_state;
@@ -72,9 +71,7 @@ struct vl_mpeg12_mc_buffer
bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
struct pipe_context *pipe,
unsigned picture_width,
unsigned picture_height,
enum pipe_video_chroma_format chroma_format,
unsigned color_swizzle);
unsigned picture_height);
void vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer);
+42 -37
View File
@@ -33,12 +33,11 @@
struct vl_vertex_stream
{
struct vertex2s pos;
struct {
int8_t y;
int8_t cr;
int8_t cb;
int8_t flag;
} eb[2][2];
int8_t eb[3][2][2];
int8_t dct_type_field;
int8_t mo_type_frame;
int8_t mb_type_intra;
int8_t mv_wheights;
struct vertex2s mv[4];
};
@@ -121,7 +120,10 @@ vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements
assert(elements && num_elements);
for ( i = 0; i < num_elements; ++i ) {
elements[i].src_offset = offset;
if (elements[i].src_offset)
offset = elements[i].src_offset;
else
elements[i].src_offset = offset;
elements[i].instance_divisor = 1;
elements[i].vertex_buffer_index = vertex_buffer_index;
offset += util_format_get_blocksize(elements[i].src_format);
@@ -129,37 +131,39 @@ vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements
}
void *
vl_vb_get_elems_state(struct pipe_context *pipe, bool include_mvs)
vl_vb_get_elems_state(struct pipe_context *pipe, int component)
{
struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS];
unsigned i;
memset(&vertex_elems, 0, sizeof(vertex_elems));
vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element();
/* Position element */
vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R16G16_SSCALED;
/* y, cr, cb empty block element top left block */
vertex_elems[VS_I_EB_0_0].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
/* empty block element of selected component */
vertex_elems[VS_I_EB].src_offset = 4 + component * 4;
vertex_elems[VS_I_EB].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
/* y, cr, cb empty block element top right block */
vertex_elems[VS_I_EB_0_1].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
/* flags */
vertex_elems[VS_I_FLAGS].src_offset = 16;
vertex_elems[VS_I_FLAGS].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
/* y, cr, cb empty block element bottom left block */
vertex_elems[VS_I_EB_1_0].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
/* motion vector 0 TOP element */
vertex_elems[VS_I_MV0_TOP].src_format = PIPE_FORMAT_R16G16_SSCALED;
/* y, cr, cb empty block element bottom right block */
vertex_elems[VS_I_EB_1_1].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
/* motion vector 0 BOTTOM element */
vertex_elems[VS_I_MV0_BOTTOM].src_format = PIPE_FORMAT_R16G16_SSCALED;
for (i = 0; i < 4; ++i)
/* motion vector 0..4 element */
vertex_elems[VS_I_MV0 + i].src_format = PIPE_FORMAT_R16G16_SSCALED;
/* motion vector 1 TOP element */
vertex_elems[VS_I_MV1_TOP].src_format = PIPE_FORMAT_R16G16_SSCALED;
vl_vb_element_helper(&vertex_elems[VS_I_VPOS], NUM_VS_INPUTS - (include_mvs ? 1 : 5), 1);
/* motion vector 1 BOTTOM element */
vertex_elems[VS_I_MV1_BOTTOM].src_format = PIPE_FORMAT_R16G16_SSCALED;
return pipe->create_vertex_elements_state(pipe, NUM_VS_INPUTS - (include_mvs ? 0 : 4), vertex_elems);
vl_vb_element_helper(&vertex_elems[VS_I_VPOS], NUM_VS_INPUTS - 1, 1);
return pipe->create_vertex_elements_state(pipe, NUM_VS_INPUTS, vertex_elems);
}
struct pipe_vertex_buffer
@@ -256,7 +260,7 @@ vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *
const unsigned (*empty_block_mask)[3][2][2])
{
struct vl_vertex_stream *stream;
unsigned i, j;
unsigned i, j, k;
assert(buffer);
assert(mb);
@@ -269,28 +273,29 @@ vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *
stream->pos.x = mb->mbx;
stream->pos.y = mb->mby;
for ( i = 0; i < 2; ++i) {
for ( j = 0; j < 2; ++j) {
stream->eb[i][j].y = !(mb->cbp & (*empty_block_mask)[0][i][j]);
stream->eb[i][j].cr = !(mb->cbp & (*empty_block_mask)[1][i][j]);
stream->eb[i][j].cb = !(mb->cbp & (*empty_block_mask)[2][i][j]);
}
}
stream->eb[0][0].flag = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD;
stream->eb[0][1].flag = mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME;
stream->eb[1][0].flag = mb->mb_type != PIPE_MPEG12_MACROBLOCK_TYPE_INTRA;
for ( i = 0; i < 3; ++i)
for ( j = 0; j < 2; ++j)
for ( k = 0; k < 2; ++k)
stream->eb[i][j][k] = !(mb->cbp & (*empty_block_mask)[i][j][k]);
stream->dct_type_field = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD;
stream->mo_type_frame = mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME;
stream->mb_type_intra = mb->mb_type != PIPE_MPEG12_MACROBLOCK_TYPE_INTRA;
switch (mb->mb_type) {
case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
stream->eb[1][1].flag = 0;
stream->mv_wheights = 0;
break;
case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
stream->eb[1][1].flag = 1;
stream->mv_wheights = 1;
break;
case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
stream->eb[1][1].flag = 2;
stream->mv_wheights = 2;
break;
default:
stream->mv_wheights = 0;
}
get_motion_vectors(mb, stream->mv);
+7 -9
View File
@@ -41,14 +41,12 @@ enum VS_INPUT
{
VS_I_RECT,
VS_I_VPOS,
VS_I_EB_0_0,
VS_I_EB_0_1,
VS_I_EB_1_0,
VS_I_EB_1_1,
VS_I_MV0,
VS_I_MV1,
VS_I_MV2,
VS_I_MV3,
VS_I_EB,
VS_I_FLAGS,
VS_I_MV0_TOP,
VS_I_MV0_BOTTOM,
VS_I_MV1_TOP,
VS_I_MV1_BOTTOM,
NUM_VS_INPUTS
};
@@ -67,7 +65,7 @@ struct vl_vertex_buffer
struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe,
unsigned blocks_x, unsigned blocks_y);
void *vl_vb_get_elems_state(struct pipe_context *pipe, bool include_mvs);
void *vl_vb_get_elems_state(struct pipe_context *pipe, int component);
struct pipe_vertex_buffer vl_vb_init(struct vl_vertex_buffer *buffer,
struct pipe_context *pipe,