radv: add support for loading the LSHS vertex stride from a SGPR

With shader object, if VS and TCS aren't linked together, the LSHS
vertex stride should be computed from the vertex outputs. Otherwise,
if an output is unused, the stride is wrong in TCS.

This is currently for GFX8 only because for merged shaders this won't
be needed but shader object on GFX9+ isn't yet a thing.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24540>
This commit is contained in:
Samuel Pitoiset
2023-08-07 14:31:34 +02:00
committed by Marge Bot
parent 0cb88ddca2
commit 8a97302f57
4 changed files with 20 additions and 6 deletions

View File

@@ -273,8 +273,18 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.task_ring_entry);
break;
case nir_intrinsic_load_lshs_vertex_stride_amd: {
unsigned io_num = stage == MESA_SHADER_VERTEX ? s->info->vs.num_linked_outputs : s->info->tcs.num_linked_inputs;
replacement = nir_imm_int(b, get_tcs_input_vertex_stride(io_num));
if (stage == MESA_SHADER_VERTEX) {
replacement = nir_imm_int(b, get_tcs_input_vertex_stride(s->info->vs.num_linked_outputs));
} else {
assert(stage == MESA_SHADER_TESS_CTRL);
if (s->info->inputs_linked) {
replacement = nir_imm_int(b, get_tcs_input_vertex_stride(s->info->tcs.num_linked_inputs));
} else {
nir_ssa_def *lshs_vertex_stride =
GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_LSHS_VERTEX_STRIDE);
replacement = nir_ishl_imm(b, lshs_vertex_stride, 2);
}
}
break;
}
case nir_intrinsic_load_esgs_vertex_stride_amd: {

View File

@@ -2507,6 +2507,7 @@ static void
radv_emit_patch_control_points(struct radv_cmd_buffer *cmd_buffer)
{
const struct radv_physical_device *pdevice = cmd_buffer->device->physical_device;
const struct radv_shader *vs = radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_VERTEX);
const struct radv_shader *tcs = cmd_buffer->state.shaders[MESA_SHADER_TESS_CTRL];
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
unsigned ls_hs_config, base_reg;
@@ -2549,7 +2550,6 @@ radv_emit_patch_control_points(struct radv_cmd_buffer *cmd_buffer)
radeon_set_sh_reg(cmd_buffer->cs, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, hs_rsrc2);
} else {
struct radv_shader *vs = cmd_buffer->state.shaders[MESA_SHADER_VERTEX];
unsigned ls_rsrc2 = vs->config.rsrc2 | S_00B52C_LDS_SIZE(cmd_buffer->state.tess_lds_size);
radeon_set_sh_reg(cmd_buffer->cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
@@ -2564,7 +2564,9 @@ radv_emit_patch_control_points(struct radv_cmd_buffer *cmd_buffer)
unsigned tcs_offchip_layout =
SET_SGPR_FIELD(TCS_OFFCHIP_LAYOUT_PATCH_CONTROL_POINTS, d->vk.ts.patch_control_points) |
SET_SGPR_FIELD(TCS_OFFCHIP_LAYOUT_NUM_PATCHES, cmd_buffer->state.tess_num_patches);
SET_SGPR_FIELD(TCS_OFFCHIP_LAYOUT_NUM_PATCHES, cmd_buffer->state.tess_num_patches) |
SET_SGPR_FIELD(TCS_OFFCHIP_LAYOUT_LSHS_VERTEX_STRIDE,
get_tcs_input_vertex_stride(vs->info.vs.num_linked_outputs) / 4);
base_reg = cmd_buffer->state.shaders[MESA_SHADER_TESS_CTRL]->info.user_data_0;
radeon_set_sh_reg(cmd_buffer->cs, base_reg + offchip->sgpr_idx * 4, tcs_offchip_layout);

View File

@@ -201,6 +201,8 @@ enum radv_ud_index {
#define TCS_OFFCHIP_LAYOUT_PATCH_CONTROL_POINTS__MASK 0x3f
#define TCS_OFFCHIP_LAYOUT_NUM_PATCHES__SHIFT 6
#define TCS_OFFCHIP_LAYOUT_NUM_PATCHES__MASK 0xff
#define TCS_OFFCHIP_LAYOUT_LSHS_VERTEX_STRIDE__SHIFT 14
#define TCS_OFFCHIP_LAYOUT_LSHS_VERTEX_STRIDE__MASK 0xff /* max 32 * 4 + 1 (to reduce LDS bank conflicts) */
#define TES_STATE_NUM_PATCHES__SHIFT 0
#define TES_STATE_NUM_PATCHES__MASK 0xff

View File

@@ -355,8 +355,8 @@ radv_declare_rt_shader_args(enum amd_gfx_level gfx_level, struct radv_shader_arg
static bool
radv_tcs_needs_state_sgpr(const struct radv_shader_info *info, const struct radv_pipeline_key *key)
{
/* When the number of patch control points/tessellation patches is 0, it's loaded from a SGPR. */
return !key->tcs.tess_input_vertices || !info->num_tess_patches;
/* Some values are loaded from a SGPR when dynamic states are used or when the shader is unlinked. */
return !key->tcs.tess_input_vertices || !info->num_tess_patches || !info->inputs_linked;
}
static bool