r600: add support for compute grid/block sizes. (v2)
We just pass these in from outside in a constant buffer. The shader side stores them once they are accessed once. v2: fix to not use a temp_reg. Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
@@ -712,10 +712,17 @@ static void compute_emit_cs(struct r600_context *rctx,
|
||||
bool need_buf_const = current->shader.uses_tex_buffers ||
|
||||
current->shader.has_txq_cube_array_z_comp;
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
rctx->cs_block_grid_sizes[i] = info->block[i];
|
||||
rctx->cs_block_grid_sizes[i + 4] = info->grid[i];
|
||||
}
|
||||
rctx->cs_block_grid_sizes[3] = rctx->cs_block_grid_sizes[7] = 0;
|
||||
rctx->driver_consts[PIPE_SHADER_COMPUTE].cs_block_grid_size_dirty = true;
|
||||
if (need_buf_const) {
|
||||
eg_setup_buffer_constants(rctx, PIPE_SHADER_COMPUTE);
|
||||
r600_update_driver_const_buffers(rctx, true);
|
||||
}
|
||||
r600_update_driver_const_buffers(rctx, true);
|
||||
|
||||
if (evergreen_emit_atomic_buffer_setup(rctx, current, combined_atomics, &atomic_used_mask)) {
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
|
||||
|
||||
@@ -78,6 +78,7 @@
|
||||
/* start driver buffers after user buffers */
|
||||
#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
|
||||
#define R600_UCP_SIZE (4*4*8)
|
||||
#define R600_CS_BLOCK_GRID_SIZE (8 * 4)
|
||||
#define R600_BUFFER_INFO_OFFSET (R600_UCP_SIZE)
|
||||
|
||||
#define R600_LDS_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
|
||||
@@ -396,6 +397,7 @@ struct r600_shader_driver_constants_info {
|
||||
bool vs_ucp_dirty;
|
||||
bool texture_const_dirty;
|
||||
bool ps_sample_pos_dirty;
|
||||
bool cs_block_grid_size_dirty;
|
||||
};
|
||||
|
||||
struct r600_constbuf_state
|
||||
@@ -575,6 +577,7 @@ struct r600_context {
|
||||
struct r600_isa *isa;
|
||||
float sample_positions[4 * 16];
|
||||
float tess_state[8];
|
||||
uint32_t cs_block_grid_sizes[8]; /* 3 for grid + 1 pad, 3 for block + 1 pad*/
|
||||
bool tess_state_dirty;
|
||||
struct r600_pipe_shader_selector *last_ls;
|
||||
struct r600_pipe_shader_selector *last_tcs;
|
||||
|
||||
@@ -346,6 +346,9 @@ struct r600_shader_ctx {
|
||||
boolean clip_vertex_write;
|
||||
unsigned cv_output;
|
||||
unsigned edgeflag_output;
|
||||
int cs_block_size_reg;
|
||||
int cs_grid_size_reg;
|
||||
bool cs_block_size_loaded, cs_grid_size_loaded;
|
||||
int fragcoord_input;
|
||||
int native_integers;
|
||||
int next_ring_offset;
|
||||
@@ -1309,6 +1312,60 @@ static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_
|
||||
return t1;
|
||||
}
|
||||
|
||||
static int load_block_grid_size(struct r600_shader_ctx *ctx, bool load_block)
|
||||
{
|
||||
struct r600_bytecode_vtx vtx;
|
||||
int r, t1;
|
||||
|
||||
if (ctx->cs_block_size_loaded)
|
||||
return ctx->cs_block_size_reg;
|
||||
if (ctx->cs_grid_size_loaded)
|
||||
return ctx->cs_grid_size_reg;
|
||||
|
||||
t1 = load_block ? ctx->cs_block_size_reg : ctx->cs_grid_size_reg;
|
||||
struct r600_bytecode_alu alu;
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP1_MOV;
|
||||
alu.src[0].sel = V_SQ_ALU_SRC_0;
|
||||
alu.dst.sel = t1;
|
||||
alu.dst.write = 1;
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
|
||||
vtx.op = FETCH_OP_VFETCH;
|
||||
vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER;
|
||||
vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
|
||||
vtx.src_gpr = t1;
|
||||
vtx.src_sel_x = 0;
|
||||
|
||||
vtx.mega_fetch_count = 16;
|
||||
vtx.dst_gpr = t1;
|
||||
vtx.dst_sel_x = 0;
|
||||
vtx.dst_sel_y = 1;
|
||||
vtx.dst_sel_z = 2;
|
||||
vtx.dst_sel_w = 7;
|
||||
vtx.data_format = FMT_32_32_32_32;
|
||||
vtx.num_format_all = 1;
|
||||
vtx.format_comp_all = 0;
|
||||
vtx.use_const_fields = 0;
|
||||
vtx.offset = load_block ? 0 : 16; // first element is size of buffer
|
||||
vtx.endian = r600_endian_swap(32);
|
||||
vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
|
||||
|
||||
r = r600_bytecode_add_vtx(ctx->bc, &vtx);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (load_block)
|
||||
ctx->cs_block_size_loaded = true;
|
||||
else
|
||||
ctx->cs_grid_size_loaded = true;
|
||||
return t1;
|
||||
}
|
||||
|
||||
static void tgsi_src(struct r600_shader_ctx *ctx,
|
||||
const struct tgsi_full_src_register *tgsi_src,
|
||||
struct r600_shader_src *r600_src)
|
||||
@@ -1414,6 +1471,10 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
|
||||
r600_src->swizzle[1] = 3;
|
||||
r600_src->swizzle[2] = 3;
|
||||
r600_src->swizzle[3] = 3;
|
||||
} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_GRID_SIZE) {
|
||||
r600_src->sel = load_block_grid_size(ctx, false);
|
||||
} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_BLOCK_SIZE) {
|
||||
r600_src->sel = load_block_grid_size(ctx, true);
|
||||
}
|
||||
} else {
|
||||
if (tgsi_src->Register.Indirect)
|
||||
@@ -3148,6 +3209,11 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
|
||||
ctx.clip_vertex_write = 0;
|
||||
ctx.thread_id_gpr_loaded = false;
|
||||
|
||||
ctx.cs_block_size_reg = -1;
|
||||
ctx.cs_grid_size_reg = -1;
|
||||
ctx.cs_block_size_loaded = false;
|
||||
ctx.cs_grid_size_loaded = false;
|
||||
|
||||
shader->nr_ps_color_exports = 0;
|
||||
shader->nr_ps_max_color_exports = 0;
|
||||
|
||||
@@ -3211,8 +3277,15 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
|
||||
if (add_tess_inout)
|
||||
ctx.file_offset[TGSI_FILE_INPUT]+=2;
|
||||
}
|
||||
if (ctx.type == PIPE_SHADER_COMPUTE)
|
||||
if (ctx.type == PIPE_SHADER_COMPUTE) {
|
||||
ctx.file_offset[TGSI_FILE_INPUT] = 2;
|
||||
for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) {
|
||||
if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_GRID_SIZE)
|
||||
ctx.cs_grid_size_reg = ctx.file_offset[TGSI_FILE_INPUT]++;
|
||||
if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_BLOCK_SIZE)
|
||||
ctx.cs_block_size_reg = ctx.file_offset[TGSI_FILE_INPUT]++;
|
||||
}
|
||||
}
|
||||
|
||||
ctx.file_offset[TGSI_FILE_OUTPUT] =
|
||||
ctx.file_offset[TGSI_FILE_INPUT] +
|
||||
|
||||
@@ -1230,7 +1230,8 @@ void r600_update_driver_const_buffers(struct r600_context *rctx, bool compute_on
|
||||
struct r600_shader_driver_constants_info *info = &rctx->driver_consts[sh];
|
||||
if (!info->vs_ucp_dirty &&
|
||||
!info->texture_const_dirty &&
|
||||
!info->ps_sample_pos_dirty)
|
||||
!info->ps_sample_pos_dirty &&
|
||||
!info->cs_block_grid_size_dirty)
|
||||
continue;
|
||||
|
||||
ptr = info->constants;
|
||||
@@ -1257,6 +1258,17 @@ void r600_update_driver_const_buffers(struct r600_context *rctx, bool compute_on
|
||||
info->ps_sample_pos_dirty = false;
|
||||
}
|
||||
|
||||
if (info->cs_block_grid_size_dirty) {
|
||||
assert(sh == PIPE_SHADER_COMPUTE);
|
||||
if (!size) {
|
||||
ptr = rctx->cs_block_grid_sizes;
|
||||
size = R600_CS_BLOCK_GRID_SIZE;
|
||||
} else {
|
||||
memcpy(ptr, rctx->cs_block_grid_sizes, R600_CS_BLOCK_GRID_SIZE);
|
||||
}
|
||||
info->cs_block_grid_size_dirty = false;
|
||||
}
|
||||
|
||||
if (info->texture_const_dirty) {
|
||||
assert (ptr);
|
||||
assert (size);
|
||||
@@ -1264,6 +1276,8 @@ void r600_update_driver_const_buffers(struct r600_context *rctx, bool compute_on
|
||||
memcpy(ptr, rctx->clip_state.state.ucp, R600_UCP_SIZE);
|
||||
if (sh == PIPE_SHADER_FRAGMENT)
|
||||
memcpy(ptr, rctx->sample_positions, R600_UCP_SIZE);
|
||||
if (sh == PIPE_SHADER_COMPUTE)
|
||||
memcpy(ptr, rctx->cs_block_grid_sizes, R600_CS_BLOCK_GRID_SIZE);
|
||||
}
|
||||
info->texture_const_dirty = false;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user