radv: use paired shader registers for graphics on GFX12
Loosely based on RadeonSI. This is supposed to be faster because parsing the packet header seems to be the main bottleneck on GFX12. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35282>
This commit is contained in:
committed by
Marge Bot
parent
c8b3c92a3e
commit
098c15bfc9
@@ -437,6 +437,12 @@ radv_reset_tracked_regs(struct radv_cmd_buffer *cmd_buffer)
|
||||
memset(tracked_regs->spi_ps_input_cntl, 0xff, sizeof(uint32_t) * 32);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_reset_buffered_regs(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
cmd_buffer->num_buffered_sh_regs = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
radv_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer, UNUSED VkCommandBufferResetFlags flags)
|
||||
{
|
||||
@@ -1979,11 +1985,18 @@ radv_emit_ps_epilog_state(struct radv_cmd_buffer *cmd_buffer, struct radv_shader
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, ps_epilog->bo);
|
||||
|
||||
const uint32_t epilog_pc_offset = radv_get_user_sgpr_loc(ps_shader, AC_UD_EPILOG_PC);
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
if (pgm_rsrc1)
|
||||
radeon_set_sh_reg(ps_shader->info.regs.pgm_rsrc1, pgm_rsrc1);
|
||||
radeon_emit_32bit_pointer(epilog_pc_offset, ps_epilog->va, &pdev->info);
|
||||
radeon_end();
|
||||
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
if (pgm_rsrc1)
|
||||
gfx12_push_sh_reg(cmd_buffer, ps_shader->info.regs.pgm_rsrc1, pgm_rsrc1);
|
||||
gfx12_push_32bit_pointer(cmd_buffer, epilog_pc_offset, ps_epilog->va, &pdev->info);
|
||||
} else {
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
if (pgm_rsrc1)
|
||||
radeon_set_sh_reg(ps_shader->info.regs.pgm_rsrc1, pgm_rsrc1);
|
||||
radeon_emit_32bit_pointer(epilog_pc_offset, ps_epilog->va, &pdev->info);
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
cmd_buffer->shader_upload_seq = MAX2(cmd_buffer->shader_upload_seq, ps_epilog->upload_seq);
|
||||
|
||||
@@ -2086,26 +2099,42 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *sh
|
||||
static void
|
||||
radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *shader)
|
||||
{
|
||||
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const uint64_t va = radv_shader_get_va(shader);
|
||||
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_set_sh_reg_seq(shader->info.regs.pgm_lo, 4);
|
||||
radeon_emit(va >> 8);
|
||||
radeon_emit(S_00B324_MEM_BASE(va >> 40));
|
||||
radeon_emit(shader->config.rsrc1);
|
||||
radeon_emit(shader->config.rsrc2);
|
||||
radeon_end();
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_lo, va >> 8);
|
||||
gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_lo + 4, S_00B324_MEM_BASE(va >> 40));
|
||||
gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
|
||||
gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_rsrc2, shader->config.rsrc2);
|
||||
} else {
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_set_sh_reg_seq(shader->info.regs.pgm_lo, 4);
|
||||
radeon_emit(va >> 8);
|
||||
radeon_emit(S_00B324_MEM_BASE(va >> 40));
|
||||
radeon_emit(shader->config.rsrc1);
|
||||
radeon_emit(shader->config.rsrc2);
|
||||
radeon_end();
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
radv_emit_hw_ls(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *shader)
|
||||
{
|
||||
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const uint64_t va = radv_shader_get_va(shader);
|
||||
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8);
|
||||
radeon_set_sh_reg(shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
|
||||
radeon_end();
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_lo, va >> 8);
|
||||
gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
|
||||
} else {
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8);
|
||||
radeon_set_sh_reg(shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
|
||||
radeon_end();
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -2128,13 +2157,19 @@ radv_emit_hw_ngg(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *e
|
||||
}
|
||||
|
||||
if (!shader->info.merged_shader_compiled_separately) {
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8);
|
||||
|
||||
radeon_set_sh_reg_seq(shader->info.regs.pgm_rsrc1, 2);
|
||||
radeon_emit(shader->config.rsrc1);
|
||||
radeon_emit(shader->config.rsrc2);
|
||||
radeon_end();
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_lo, va >> 8);
|
||||
gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
|
||||
gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_rsrc2, shader->config.rsrc2);
|
||||
gfx12_push_sh_reg(cmd_buffer, R_00B220_SPI_SHADER_PGM_RSRC4_GS, shader->info.regs.spi_shader_pgm_rsrc4_gs);
|
||||
} else {
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8);
|
||||
radeon_set_sh_reg_seq(shader->info.regs.pgm_rsrc1, 2);
|
||||
radeon_emit(shader->config.rsrc1);
|
||||
radeon_emit(shader->config.rsrc2);
|
||||
radeon_end();
|
||||
}
|
||||
}
|
||||
|
||||
const struct radv_vs_output_info *outinfo = &shader->info.outinfo;
|
||||
@@ -2211,7 +2246,6 @@ radv_emit_hw_ngg(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *e
|
||||
radeon_set_uconfig_reg(R_03096C_GE_CNTL, ge_cntl);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
radeon_set_sh_reg(R_00B220_SPI_SHADER_PGM_RSRC4_GS, shader->info.regs.spi_shader_pgm_rsrc4_gs);
|
||||
radeon_set_uconfig_reg(R_030988_VGT_PRIMITIVEID_EN, shader->info.regs.ngg.vgt_primitiveid_en);
|
||||
} else {
|
||||
if (pdev->info.gfx_level >= GFX7) {
|
||||
@@ -2235,18 +2269,23 @@ radv_emit_hw_hs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *sh
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const uint64_t va = radv_shader_get_va(shader);
|
||||
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8);
|
||||
radeon_set_sh_reg(shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_lo, va >> 8);
|
||||
gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
|
||||
} else {
|
||||
radeon_set_sh_reg_seq(shader->info.regs.pgm_lo, 4);
|
||||
radeon_emit(va >> 8);
|
||||
radeon_emit(S_00B424_MEM_BASE(va >> 40));
|
||||
radeon_emit(shader->config.rsrc1);
|
||||
radeon_emit(shader->config.rsrc2);
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8);
|
||||
radeon_set_sh_reg(shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
|
||||
} else {
|
||||
radeon_set_sh_reg_seq(shader->info.regs.pgm_lo, 4);
|
||||
radeon_emit(va >> 8);
|
||||
radeon_emit(S_00B424_MEM_BASE(va >> 40));
|
||||
radeon_emit(shader->config.rsrc1);
|
||||
radeon_emit(shader->config.rsrc2);
|
||||
}
|
||||
radeon_end();
|
||||
}
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -2272,20 +2311,34 @@ radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer)
|
||||
|
||||
const uint32_t next_stage_pc_offset = radv_get_user_sgpr_loc(vs, AC_UD_NEXT_STAGE_PC);
|
||||
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_emit_32bit_pointer(next_stage_pc_offset, next_stage->va, &pdev->info);
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_32bit_pointer(cmd_buffer, next_stage_pc_offset, next_stage->va, &pdev->info);
|
||||
|
||||
if (!vs->info.vs.has_prolog) {
|
||||
radeon_set_sh_reg(vs->info.regs.pgm_lo, vs->va >> 8);
|
||||
if (vs->info.next_stage == MESA_SHADER_TESS_CTRL) {
|
||||
radeon_set_sh_reg(vs->info.regs.pgm_rsrc1, rsrc1);
|
||||
} else {
|
||||
radeon_set_sh_reg_seq(vs->info.regs.pgm_rsrc1, 2);
|
||||
radeon_emit(rsrc1);
|
||||
radeon_emit(rsrc2);
|
||||
if (!vs->info.vs.has_prolog) {
|
||||
gfx12_push_sh_reg(cmd_buffer, vs->info.regs.pgm_lo, vs->va >> 8);
|
||||
if (vs->info.next_stage == MESA_SHADER_TESS_CTRL) {
|
||||
gfx12_push_sh_reg(cmd_buffer, vs->info.regs.pgm_rsrc1, rsrc1);
|
||||
} else {
|
||||
gfx12_push_sh_reg(cmd_buffer, vs->info.regs.pgm_rsrc1, rsrc1);
|
||||
gfx12_push_sh_reg(cmd_buffer, vs->info.regs.pgm_rsrc2, rsrc2);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_emit_32bit_pointer(next_stage_pc_offset, next_stage->va, &pdev->info);
|
||||
|
||||
if (!vs->info.vs.has_prolog) {
|
||||
radeon_set_sh_reg(vs->info.regs.pgm_lo, vs->va >> 8);
|
||||
if (vs->info.next_stage == MESA_SHADER_TESS_CTRL) {
|
||||
radeon_set_sh_reg(vs->info.regs.pgm_rsrc1, rsrc1);
|
||||
} else {
|
||||
radeon_set_sh_reg_seq(vs->info.regs.pgm_rsrc1, 2);
|
||||
radeon_emit(rsrc1);
|
||||
radeon_emit(rsrc2);
|
||||
}
|
||||
}
|
||||
radeon_end();
|
||||
}
|
||||
radeon_end();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -2329,16 +2382,22 @@ radv_emit_tess_eval_shader(struct radv_cmd_buffer *cmd_buffer)
|
||||
|
||||
radv_shader_combine_cfg_tes_gs(device, tes, gs, &rsrc1, &rsrc2);
|
||||
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_set_sh_reg(tes->info.regs.pgm_lo, tes->va >> 8);
|
||||
|
||||
radeon_set_sh_reg_seq(tes->info.regs.pgm_rsrc1, 2);
|
||||
radeon_emit(rsrc1);
|
||||
radeon_emit(rsrc2);
|
||||
|
||||
const uint32_t next_stage_pc_offset = radv_get_user_sgpr_loc(tes, AC_UD_NEXT_STAGE_PC);
|
||||
radeon_emit_32bit_pointer(next_stage_pc_offset, gs->va, &pdev->info);
|
||||
radeon_end();
|
||||
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_sh_reg(cmd_buffer, tes->info.regs.pgm_lo, tes->va >> 8);
|
||||
gfx12_push_sh_reg(cmd_buffer, tes->info.regs.pgm_rsrc1, rsrc1);
|
||||
gfx12_push_sh_reg(cmd_buffer, tes->info.regs.pgm_rsrc2, rsrc2);
|
||||
gfx12_push_32bit_pointer(cmd_buffer, next_stage_pc_offset, gs->va, &pdev->info);
|
||||
} else {
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_set_sh_reg(tes->info.regs.pgm_lo, tes->va >> 8);
|
||||
radeon_set_sh_reg_seq(tes->info.regs.pgm_rsrc1, 2);
|
||||
radeon_emit(rsrc1);
|
||||
radeon_emit(rsrc2);
|
||||
radeon_emit_32bit_pointer(next_stage_pc_offset, gs->va, &pdev->info);
|
||||
radeon_end();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -2484,13 +2543,18 @@ radv_gfx11_emit_meshlet(struct radv_cmd_buffer *cmd_buffer, const struct radv_sh
|
||||
|
||||
assert(pdev->info.gfx_level >= GFX11);
|
||||
|
||||
radeon_begin(cs);
|
||||
radeon_set_sh_reg_seq(R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, 2);
|
||||
radeon_emit(ms->info.regs.ms.spi_shader_gs_meshlet_dim);
|
||||
radeon_emit(ms->info.regs.ms.spi_shader_gs_meshlet_exp_alloc);
|
||||
if (pdev->info.gfx_level >= GFX12)
|
||||
radeon_set_sh_reg(R_00B2B8_SPI_SHADER_GS_MESHLET_CTRL, ms->info.regs.ms.spi_shader_gs_meshlet_ctrl);
|
||||
radeon_end();
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_sh_reg(cmd_buffer, R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, ms->info.regs.ms.spi_shader_gs_meshlet_dim);
|
||||
gfx12_push_sh_reg(cmd_buffer, R_00B2B4_SPI_SHADER_GS_MESHLET_EXP_ALLOC,
|
||||
ms->info.regs.ms.spi_shader_gs_meshlet_exp_alloc);
|
||||
gfx12_push_sh_reg(cmd_buffer, R_00B2B8_SPI_SHADER_GS_MESHLET_CTRL, ms->info.regs.ms.spi_shader_gs_meshlet_ctrl);
|
||||
} else {
|
||||
radeon_begin(cs);
|
||||
radeon_set_sh_reg_seq(R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, 2);
|
||||
radeon_emit(ms->info.regs.ms.spi_shader_gs_meshlet_dim);
|
||||
radeon_emit(ms->info.regs.ms.spi_shader_gs_meshlet_exp_alloc);
|
||||
radeon_end();
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -2712,16 +2776,25 @@ radv_emit_fragment_shader_state(struct radv_cmd_buffer *cmd_buffer, const struct
|
||||
static void
|
||||
radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_shader *ps = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT];
|
||||
const uint64_t va = radv_shader_get_va(ps);
|
||||
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_set_sh_reg_seq(ps->info.regs.pgm_lo, 4);
|
||||
radeon_emit(va >> 8);
|
||||
radeon_emit(S_00B024_MEM_BASE(va >> 40));
|
||||
radeon_emit(ps->config.rsrc1);
|
||||
radeon_emit(ps->config.rsrc2);
|
||||
radeon_end();
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_sh_reg(cmd_buffer, ps->info.regs.pgm_lo, va >> 8);
|
||||
gfx12_push_sh_reg(cmd_buffer, ps->info.regs.pgm_lo + 4, S_00B024_MEM_BASE(va >> 40));
|
||||
gfx12_push_sh_reg(cmd_buffer, ps->info.regs.pgm_rsrc1, ps->config.rsrc1);
|
||||
gfx12_push_sh_reg(cmd_buffer, ps->info.regs.pgm_rsrc2, ps->config.rsrc2);
|
||||
} else {
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_set_sh_reg_seq(ps->info.regs.pgm_lo, 4);
|
||||
radeon_emit(va >> 8);
|
||||
radeon_emit(S_00B024_MEM_BASE(va >> 40));
|
||||
radeon_emit(ps->config.rsrc1);
|
||||
radeon_emit(ps->config.rsrc2);
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
radv_emit_fragment_shader_state(cmd_buffer, ps);
|
||||
}
|
||||
@@ -2962,9 +3035,7 @@ radv_emit_graphics_shaders(struct radv_cmd_buffer *cmd_buffer)
|
||||
radv_emit_fragment_shader_state(cmd_buffer, NULL);
|
||||
}
|
||||
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_set_sh_reg(R_00B0C4_SPI_SHADER_GS_OUT_CONFIG_PS, gs_out_config_ps);
|
||||
radeon_end();
|
||||
gfx12_push_sh_reg(cmd_buffer, R_00B0C4_SPI_SHADER_GS_OUT_CONFIG_PS, gs_out_config_ps);
|
||||
}
|
||||
|
||||
const struct radv_vgt_shader_key vgt_shader_cfg_key =
|
||||
@@ -5038,17 +5109,20 @@ emit_prolog_regs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v
|
||||
if (G_00B848_VGPRS(prolog->rsrc1) > G_00B848_VGPRS(rsrc1))
|
||||
rsrc1 = (rsrc1 & C_00B848_VGPRS) | (prolog->rsrc1 & ~C_00B848_VGPRS);
|
||||
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
|
||||
radeon_set_sh_reg(vs_shader->info.regs.pgm_lo, prolog->va >> 8);
|
||||
radeon_set_sh_reg(vs_shader->info.regs.pgm_rsrc1, rsrc1);
|
||||
|
||||
if (vs_shader->info.merged_shader_compiled_separately) {
|
||||
radeon_set_sh_reg(vs_shader->info.regs.pgm_rsrc2, rsrc2);
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_sh_reg(cmd_buffer, vs_shader->info.regs.pgm_lo, prolog->va >> 8);
|
||||
gfx12_push_sh_reg(cmd_buffer, vs_shader->info.regs.pgm_rsrc1, rsrc1);
|
||||
if (vs_shader->info.merged_shader_compiled_separately)
|
||||
gfx12_push_sh_reg(cmd_buffer, vs_shader->info.regs.pgm_rsrc2, rsrc2);
|
||||
} else {
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_set_sh_reg(vs_shader->info.regs.pgm_lo, prolog->va >> 8);
|
||||
radeon_set_sh_reg(vs_shader->info.regs.pgm_rsrc1, rsrc1);
|
||||
if (vs_shader->info.merged_shader_compiled_separately)
|
||||
radeon_set_sh_reg(vs_shader->info.regs.pgm_rsrc2, rsrc2);
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
radeon_end();
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, prolog->bo);
|
||||
}
|
||||
|
||||
@@ -5886,16 +5960,20 @@ radv_emit_streamout_buffers(struct radv_cmd_buffer *cmd_buffer, uint64_t va)
|
||||
if (!streamout_buffers_offset)
|
||||
return;
|
||||
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_emit_32bit_pointer(streamout_buffers_offset, va, &pdev->info);
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_32bit_pointer(cmd_buffer, streamout_buffers_offset, va, &pdev->info);
|
||||
} else {
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_emit_32bit_pointer(streamout_buffers_offset, va, &pdev->info);
|
||||
|
||||
if (cmd_buffer->state.gs_copy_shader) {
|
||||
streamout_buffers_offset = radv_get_user_sgpr_loc(cmd_buffer->state.gs_copy_shader, AC_UD_STREAMOUT_BUFFERS);
|
||||
if (streamout_buffers_offset)
|
||||
radeon_emit_32bit_pointer(streamout_buffers_offset, va, &pdev->info);
|
||||
if (cmd_buffer->state.gs_copy_shader) {
|
||||
streamout_buffers_offset = radv_get_user_sgpr_loc(cmd_buffer->state.gs_copy_shader, AC_UD_STREAMOUT_BUFFERS);
|
||||
if (streamout_buffers_offset)
|
||||
radeon_emit_32bit_pointer(streamout_buffers_offset, va, &pdev->info);
|
||||
}
|
||||
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -5912,9 +5990,7 @@ radv_emit_streamout_state(struct radv_cmd_buffer *cmd_buffer)
|
||||
if (!streamout_state_offset)
|
||||
return;
|
||||
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_emit_32bit_pointer(streamout_state_offset, so->state_va, &pdev->info);
|
||||
radeon_end();
|
||||
gfx12_push_32bit_pointer(cmd_buffer, streamout_state_offset, so->state_va, &pdev->info);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -6012,9 +6088,14 @@ radv_flush_force_vrs_state(struct radv_cmd_buffer *cmd_buffer)
|
||||
|
||||
if (cmd_buffer->state.last_vrs_rates != vrs_rates ||
|
||||
cmd_buffer->state.last_force_vrs_rates_offset != force_vrs_rates_offset) {
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_set_sh_reg(force_vrs_rates_offset, vrs_rates);
|
||||
radeon_end();
|
||||
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_sh_reg(cmd_buffer, force_vrs_rates_offset, vrs_rates);
|
||||
} else {
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_set_sh_reg(force_vrs_rates_offset, vrs_rates);
|
||||
radeon_end();
|
||||
}
|
||||
}
|
||||
|
||||
cmd_buffer->state.last_vrs_rates = vrs_rates;
|
||||
@@ -6808,6 +6889,7 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBegi
|
||||
cmd_buffer->state.last_force_vrs_rates_offset = -1;
|
||||
|
||||
radv_reset_tracked_regs(cmd_buffer);
|
||||
radv_reset_buffered_regs(cmd_buffer);
|
||||
|
||||
cmd_buffer->usage_flags = pBeginInfo->flags;
|
||||
|
||||
@@ -10395,6 +10477,8 @@ radv_emit_ngg_culling_state(struct radv_cmd_buffer *cmd_buffer)
|
||||
static void
|
||||
radv_emit_fs_state(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_shader *ps = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT];
|
||||
|
||||
if (!ps)
|
||||
@@ -10413,9 +10497,13 @@ radv_emit_fs_state(struct radv_cmd_buffer *cmd_buffer)
|
||||
SET_SGPR_FIELD(PS_STATE_LINE_RAST_MODE, radv_get_line_mode(cmd_buffer)) |
|
||||
SET_SGPR_FIELD(PS_STATE_RAST_PRIM, rast_prim);
|
||||
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_set_sh_reg(ps_state_offset, ps_state);
|
||||
radeon_end();
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_sh_reg(cmd_buffer, ps_state_offset, ps_state);
|
||||
} else {
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_set_sh_reg(ps_state_offset, ps_state);
|
||||
radeon_end();
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
@@ -10492,16 +10580,19 @@ radv_emit_ngg_state(struct radv_cmd_buffer *cmd_buffer)
|
||||
SET_SGPR_FIELD(NGG_STATE_PROVOKING_VTX, radv_get_ngg_state_provoking_vtx(cmd_buffer)) |
|
||||
SET_SGPR_FIELD(NGG_STATE_QUERY, radv_get_ngg_state_query(cmd_buffer));
|
||||
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_set_sh_reg(ngg_state_offset, ngg_state);
|
||||
const uint32_t ngg_query_buf_va_offset = radv_get_user_sgpr_loc(last_vgt_shader, AC_UD_NGG_QUERY_BUF_VA);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
const uint32_t ngg_query_buf_va_offset = radv_get_user_sgpr_loc(last_vgt_shader, AC_UD_NGG_QUERY_BUF_VA);
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_sh_reg(cmd_buffer, ngg_state_offset, ngg_state);
|
||||
if (ngg_query_buf_va_offset)
|
||||
gfx12_push_sh_reg(cmd_buffer, ngg_query_buf_va_offset, cmd_buffer->state.shader_query_buf_va);
|
||||
} else {
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
radeon_set_sh_reg(ngg_state_offset, ngg_state);
|
||||
if (ngg_query_buf_va_offset)
|
||||
radeon_set_sh_reg(ngg_query_buf_va_offset, cmd_buffer->state.shader_query_buf_va);
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -10573,22 +10664,30 @@ radv_emit_tess_state(struct radv_cmd_buffer *cmd_buffer)
|
||||
assert(tes_offchip_layout_offset);
|
||||
}
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
radeon_set_sh_reg(tcs->info.regs.pgm_rsrc2, pgm_hs_rsrc2);
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
gfx12_push_sh_reg(cmd_buffer, tcs->info.regs.pgm_rsrc2, pgm_hs_rsrc2);
|
||||
if (tcs_offchip_layout) {
|
||||
gfx12_push_sh_reg(cmd_buffer, tcs_offchip_layout_offset, tcs_offchip_layout);
|
||||
gfx12_push_sh_reg(cmd_buffer, tes_offchip_layout_offset, tcs_offchip_layout);
|
||||
}
|
||||
} else {
|
||||
const uint32_t ls_rsrc2 = vs->config.rsrc2 | S_00B52C_LDS_SIZE(cmd_buffer->state.tess_lds_size);
|
||||
radeon_begin(cs);
|
||||
|
||||
radeon_set_sh_reg(vs->info.regs.pgm_rsrc2, ls_rsrc2);
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
radeon_set_sh_reg(tcs->info.regs.pgm_rsrc2, pgm_hs_rsrc2);
|
||||
} else {
|
||||
const uint32_t ls_rsrc2 = vs->config.rsrc2 | S_00B52C_LDS_SIZE(cmd_buffer->state.tess_lds_size);
|
||||
|
||||
radeon_set_sh_reg(vs->info.regs.pgm_rsrc2, ls_rsrc2);
|
||||
}
|
||||
|
||||
if (tcs_offchip_layout) {
|
||||
radeon_set_sh_reg(tcs_offchip_layout_offset, tcs_offchip_layout);
|
||||
radeon_set_sh_reg(tes_offchip_layout_offset, tcs_offchip_layout);
|
||||
}
|
||||
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
if (tcs_offchip_layout) {
|
||||
radeon_set_sh_reg(tcs_offchip_layout_offset, tcs_offchip_layout);
|
||||
radeon_set_sh_reg(tes_offchip_layout_offset, tcs_offchip_layout);
|
||||
}
|
||||
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -11528,6 +11627,12 @@ radv_before_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info
|
||||
radv_emit_all_graphics_states(cmd_buffer, info);
|
||||
}
|
||||
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
gfx12_emit_buffered_sh_regs(&cmd_buffer->num_buffered_sh_regs, cmd_buffer->gfx12.buffered_sh_regs);
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
if (!dgc)
|
||||
radv_describe_draw(cmd_buffer);
|
||||
if (likely(!info->indirect_va)) {
|
||||
@@ -11552,6 +11657,9 @@ ALWAYS_INLINE static bool
|
||||
radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info, uint32_t drawCount,
|
||||
bool dgc)
|
||||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
/* For direct draws, this makes sure we don't draw anything.
|
||||
* For indirect draws, this is necessary to prevent a GPU hang (on MEC version < 100).
|
||||
*/
|
||||
@@ -11562,7 +11670,6 @@ radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_
|
||||
radv_bind_graphics_shaders(cmd_buffer);
|
||||
}
|
||||
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
struct radeon_cmdbuf *ace_cs = cmd_buffer->gang.cs;
|
||||
struct radv_shader *task_shader = cmd_buffer->state.shaders[MESA_SHADER_TASK];
|
||||
|
||||
@@ -11594,6 +11701,12 @@ radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_
|
||||
if (pc_stages)
|
||||
radv_flush_constants(cmd_buffer, pc_stages, VK_PIPELINE_BIND_POINT_GRAPHICS);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
radeon_begin(cmd_buffer->cs);
|
||||
gfx12_emit_buffered_sh_regs(&cmd_buffer->num_buffered_sh_regs, cmd_buffer->gfx12.buffered_sh_regs);
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
if (!dgc)
|
||||
radv_describe_draw(cmd_buffer);
|
||||
if (likely(!info->indirect_va)) {
|
||||
|
||||
@@ -539,11 +539,22 @@ struct radv_cmd_buffer_upload {
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
/* A pair of values for SET_*_REG_PAIRS. */
|
||||
struct gfx12_reg {
|
||||
uint32_t reg_offset;
|
||||
uint32_t reg_value;
|
||||
};
|
||||
|
||||
struct radv_cmd_buffer {
|
||||
struct vk_command_buffer vk;
|
||||
|
||||
struct radv_tracked_regs tracked_regs;
|
||||
|
||||
uint32_t num_buffered_sh_regs;
|
||||
struct {
|
||||
struct gfx12_reg buffered_sh_regs[64];
|
||||
} gfx12;
|
||||
|
||||
VkCommandBufferUsageFlags usage_flags;
|
||||
struct radeon_cmdbuf *cs;
|
||||
struct radv_cmd_state state;
|
||||
|
||||
Reference in New Issue
Block a user