diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index de773f6a083..8376f84a7b5 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -437,6 +437,12 @@ radv_reset_tracked_regs(struct radv_cmd_buffer *cmd_buffer) memset(tracked_regs->spi_ps_input_cntl, 0xff, sizeof(uint32_t) * 32); } +static void +radv_reset_buffered_regs(struct radv_cmd_buffer *cmd_buffer) +{ + cmd_buffer->num_buffered_sh_regs = 0; +} + static void radv_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer, UNUSED VkCommandBufferResetFlags flags) { @@ -1979,11 +1985,18 @@ radv_emit_ps_epilog_state(struct radv_cmd_buffer *cmd_buffer, struct radv_shader radv_cs_add_buffer(device->ws, cmd_buffer->cs, ps_epilog->bo); const uint32_t epilog_pc_offset = radv_get_user_sgpr_loc(ps_shader, AC_UD_EPILOG_PC); - radeon_begin(cmd_buffer->cs); - if (pgm_rsrc1) - radeon_set_sh_reg(ps_shader->info.regs.pgm_rsrc1, pgm_rsrc1); - radeon_emit_32bit_pointer(epilog_pc_offset, ps_epilog->va, &pdev->info); - radeon_end(); + + if (pdev->info.gfx_level >= GFX12) { + if (pgm_rsrc1) + gfx12_push_sh_reg(cmd_buffer, ps_shader->info.regs.pgm_rsrc1, pgm_rsrc1); + gfx12_push_32bit_pointer(cmd_buffer, epilog_pc_offset, ps_epilog->va, &pdev->info); + } else { + radeon_begin(cmd_buffer->cs); + if (pgm_rsrc1) + radeon_set_sh_reg(ps_shader->info.regs.pgm_rsrc1, pgm_rsrc1); + radeon_emit_32bit_pointer(epilog_pc_offset, ps_epilog->va, &pdev->info); + radeon_end(); + } cmd_buffer->shader_upload_seq = MAX2(cmd_buffer->shader_upload_seq, ps_epilog->upload_seq); @@ -2086,26 +2099,42 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *sh static void radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *shader) { + const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + const struct radv_physical_device *pdev = radv_device_physical(device); const uint64_t va = radv_shader_get_va(shader); - radeon_begin(cmd_buffer->cs); - radeon_set_sh_reg_seq(shader->info.regs.pgm_lo, 4); - radeon_emit(va >> 8); - radeon_emit(S_00B324_MEM_BASE(va >> 40)); - radeon_emit(shader->config.rsrc1); - radeon_emit(shader->config.rsrc2); - radeon_end(); + if (pdev->info.gfx_level >= GFX12) { + gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_lo, va >> 8); + gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_lo + 4, S_00B324_MEM_BASE(va >> 40)); + gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_rsrc1, shader->config.rsrc1); + gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_rsrc2, shader->config.rsrc2); + } else { + radeon_begin(cmd_buffer->cs); + radeon_set_sh_reg_seq(shader->info.regs.pgm_lo, 4); + radeon_emit(va >> 8); + radeon_emit(S_00B324_MEM_BASE(va >> 40)); + radeon_emit(shader->config.rsrc1); + radeon_emit(shader->config.rsrc2); + radeon_end(); + } } static void radv_emit_hw_ls(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *shader) { + const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + const struct radv_physical_device *pdev = radv_device_physical(device); const uint64_t va = radv_shader_get_va(shader); - radeon_begin(cmd_buffer->cs); - radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8); - radeon_set_sh_reg(shader->info.regs.pgm_rsrc1, shader->config.rsrc1); - radeon_end(); + if (pdev->info.gfx_level >= GFX12) { + gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_lo, va >> 8); + gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_rsrc1, shader->config.rsrc1); + } else { + radeon_begin(cmd_buffer->cs); + radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8); + radeon_set_sh_reg(shader->info.regs.pgm_rsrc1, shader->config.rsrc1); + radeon_end(); + } } static void @@ -2128,13 +2157,19 @@ radv_emit_hw_ngg(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *e } if (!shader->info.merged_shader_compiled_separately) { - radeon_begin(cmd_buffer->cs); - radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8); - - radeon_set_sh_reg_seq(shader->info.regs.pgm_rsrc1, 2); - radeon_emit(shader->config.rsrc1); - radeon_emit(shader->config.rsrc2); - radeon_end(); + if (pdev->info.gfx_level >= GFX12) { + gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_lo, va >> 8); + gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_rsrc1, shader->config.rsrc1); + gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_rsrc2, shader->config.rsrc2); + gfx12_push_sh_reg(cmd_buffer, R_00B220_SPI_SHADER_PGM_RSRC4_GS, shader->info.regs.spi_shader_pgm_rsrc4_gs); + } else { + radeon_begin(cmd_buffer->cs); + radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8); + radeon_set_sh_reg_seq(shader->info.regs.pgm_rsrc1, 2); + radeon_emit(shader->config.rsrc1); + radeon_emit(shader->config.rsrc2); + radeon_end(); + } } const struct radv_vs_output_info *outinfo = &shader->info.outinfo; @@ -2211,7 +2246,6 @@ radv_emit_hw_ngg(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *e radeon_set_uconfig_reg(R_03096C_GE_CNTL, ge_cntl); if (pdev->info.gfx_level >= GFX12) { - radeon_set_sh_reg(R_00B220_SPI_SHADER_PGM_RSRC4_GS, shader->info.regs.spi_shader_pgm_rsrc4_gs); radeon_set_uconfig_reg(R_030988_VGT_PRIMITIVEID_EN, shader->info.regs.ngg.vgt_primitiveid_en); } else { if (pdev->info.gfx_level >= GFX7) { @@ -2235,18 +2269,23 @@ radv_emit_hw_hs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *sh const struct radv_physical_device *pdev = radv_device_physical(device); const uint64_t va = radv_shader_get_va(shader); - radeon_begin(cmd_buffer->cs); - if (pdev->info.gfx_level >= GFX9) { - radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8); - radeon_set_sh_reg(shader->info.regs.pgm_rsrc1, shader->config.rsrc1); + if (pdev->info.gfx_level >= GFX12) { + gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_lo, va >> 8); + gfx12_push_sh_reg(cmd_buffer, shader->info.regs.pgm_rsrc1, shader->config.rsrc1); } else { - radeon_set_sh_reg_seq(shader->info.regs.pgm_lo, 4); - radeon_emit(va >> 8); - radeon_emit(S_00B424_MEM_BASE(va >> 40)); - radeon_emit(shader->config.rsrc1); - radeon_emit(shader->config.rsrc2); + radeon_begin(cmd_buffer->cs); + if (pdev->info.gfx_level >= GFX9) { + radeon_set_sh_reg(shader->info.regs.pgm_lo, va >> 8); + radeon_set_sh_reg(shader->info.regs.pgm_rsrc1, shader->config.rsrc1); + } else { + radeon_set_sh_reg_seq(shader->info.regs.pgm_lo, 4); + radeon_emit(va >> 8); + radeon_emit(S_00B424_MEM_BASE(va >> 40)); + radeon_emit(shader->config.rsrc1); + radeon_emit(shader->config.rsrc2); + } + radeon_end(); } - radeon_end(); } static void @@ -2272,20 +2311,34 @@ radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer) const uint32_t next_stage_pc_offset = radv_get_user_sgpr_loc(vs, AC_UD_NEXT_STAGE_PC); - radeon_begin(cmd_buffer->cs); - radeon_emit_32bit_pointer(next_stage_pc_offset, next_stage->va, &pdev->info); + if (pdev->info.gfx_level >= GFX12) { + gfx12_push_32bit_pointer(cmd_buffer, next_stage_pc_offset, next_stage->va, &pdev->info); - if (!vs->info.vs.has_prolog) { - radeon_set_sh_reg(vs->info.regs.pgm_lo, vs->va >> 8); - if (vs->info.next_stage == MESA_SHADER_TESS_CTRL) { - radeon_set_sh_reg(vs->info.regs.pgm_rsrc1, rsrc1); - } else { - radeon_set_sh_reg_seq(vs->info.regs.pgm_rsrc1, 2); - radeon_emit(rsrc1); - radeon_emit(rsrc2); + if (!vs->info.vs.has_prolog) { + gfx12_push_sh_reg(cmd_buffer, vs->info.regs.pgm_lo, vs->va >> 8); + if (vs->info.next_stage == MESA_SHADER_TESS_CTRL) { + gfx12_push_sh_reg(cmd_buffer, vs->info.regs.pgm_rsrc1, rsrc1); + } else { + gfx12_push_sh_reg(cmd_buffer, vs->info.regs.pgm_rsrc1, rsrc1); + gfx12_push_sh_reg(cmd_buffer, vs->info.regs.pgm_rsrc2, rsrc2); + } } + } else { + radeon_begin(cmd_buffer->cs); + radeon_emit_32bit_pointer(next_stage_pc_offset, next_stage->va, &pdev->info); + + if (!vs->info.vs.has_prolog) { + radeon_set_sh_reg(vs->info.regs.pgm_lo, vs->va >> 8); + if (vs->info.next_stage == MESA_SHADER_TESS_CTRL) { + radeon_set_sh_reg(vs->info.regs.pgm_rsrc1, rsrc1); + } else { + radeon_set_sh_reg_seq(vs->info.regs.pgm_rsrc1, 2); + radeon_emit(rsrc1); + radeon_emit(rsrc2); + } + } + radeon_end(); } - radeon_end(); return; } @@ -2329,16 +2382,22 @@ radv_emit_tess_eval_shader(struct radv_cmd_buffer *cmd_buffer) radv_shader_combine_cfg_tes_gs(device, tes, gs, &rsrc1, &rsrc2); - radeon_begin(cmd_buffer->cs); - radeon_set_sh_reg(tes->info.regs.pgm_lo, tes->va >> 8); - - radeon_set_sh_reg_seq(tes->info.regs.pgm_rsrc1, 2); - radeon_emit(rsrc1); - radeon_emit(rsrc2); - const uint32_t next_stage_pc_offset = radv_get_user_sgpr_loc(tes, AC_UD_NEXT_STAGE_PC); - radeon_emit_32bit_pointer(next_stage_pc_offset, gs->va, &pdev->info); - radeon_end(); + + if (pdev->info.gfx_level >= GFX12) { + gfx12_push_sh_reg(cmd_buffer, tes->info.regs.pgm_lo, tes->va >> 8); + gfx12_push_sh_reg(cmd_buffer, tes->info.regs.pgm_rsrc1, rsrc1); + gfx12_push_sh_reg(cmd_buffer, tes->info.regs.pgm_rsrc2, rsrc2); + gfx12_push_32bit_pointer(cmd_buffer, next_stage_pc_offset, gs->va, &pdev->info); + } else { + radeon_begin(cmd_buffer->cs); + radeon_set_sh_reg(tes->info.regs.pgm_lo, tes->va >> 8); + radeon_set_sh_reg_seq(tes->info.regs.pgm_rsrc1, 2); + radeon_emit(rsrc1); + radeon_emit(rsrc2); + radeon_emit_32bit_pointer(next_stage_pc_offset, gs->va, &pdev->info); + radeon_end(); + } return; } @@ -2484,13 +2543,18 @@ radv_gfx11_emit_meshlet(struct radv_cmd_buffer *cmd_buffer, const struct radv_sh assert(pdev->info.gfx_level >= GFX11); - radeon_begin(cs); - radeon_set_sh_reg_seq(R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, 2); - radeon_emit(ms->info.regs.ms.spi_shader_gs_meshlet_dim); - radeon_emit(ms->info.regs.ms.spi_shader_gs_meshlet_exp_alloc); - if (pdev->info.gfx_level >= GFX12) - radeon_set_sh_reg(R_00B2B8_SPI_SHADER_GS_MESHLET_CTRL, ms->info.regs.ms.spi_shader_gs_meshlet_ctrl); - radeon_end(); + if (pdev->info.gfx_level >= GFX12) { + gfx12_push_sh_reg(cmd_buffer, R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, ms->info.regs.ms.spi_shader_gs_meshlet_dim); + gfx12_push_sh_reg(cmd_buffer, R_00B2B4_SPI_SHADER_GS_MESHLET_EXP_ALLOC, + ms->info.regs.ms.spi_shader_gs_meshlet_exp_alloc); + gfx12_push_sh_reg(cmd_buffer, R_00B2B8_SPI_SHADER_GS_MESHLET_CTRL, ms->info.regs.ms.spi_shader_gs_meshlet_ctrl); + } else { + radeon_begin(cs); + radeon_set_sh_reg_seq(R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, 2); + radeon_emit(ms->info.regs.ms.spi_shader_gs_meshlet_dim); + radeon_emit(ms->info.regs.ms.spi_shader_gs_meshlet_exp_alloc); + radeon_end(); + } } static void @@ -2712,16 +2776,25 @@ radv_emit_fragment_shader_state(struct radv_cmd_buffer *cmd_buffer, const struct static void radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer) { + const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_shader *ps = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT]; const uint64_t va = radv_shader_get_va(ps); - radeon_begin(cmd_buffer->cs); - radeon_set_sh_reg_seq(ps->info.regs.pgm_lo, 4); - radeon_emit(va >> 8); - radeon_emit(S_00B024_MEM_BASE(va >> 40)); - radeon_emit(ps->config.rsrc1); - radeon_emit(ps->config.rsrc2); - radeon_end(); + if (pdev->info.gfx_level >= GFX12) { + gfx12_push_sh_reg(cmd_buffer, ps->info.regs.pgm_lo, va >> 8); + gfx12_push_sh_reg(cmd_buffer, ps->info.regs.pgm_lo + 4, S_00B024_MEM_BASE(va >> 40)); + gfx12_push_sh_reg(cmd_buffer, ps->info.regs.pgm_rsrc1, ps->config.rsrc1); + gfx12_push_sh_reg(cmd_buffer, ps->info.regs.pgm_rsrc2, ps->config.rsrc2); + } else { + radeon_begin(cmd_buffer->cs); + radeon_set_sh_reg_seq(ps->info.regs.pgm_lo, 4); + radeon_emit(va >> 8); + radeon_emit(S_00B024_MEM_BASE(va >> 40)); + radeon_emit(ps->config.rsrc1); + radeon_emit(ps->config.rsrc2); + radeon_end(); + } radv_emit_fragment_shader_state(cmd_buffer, ps); } @@ -2962,9 +3035,7 @@ radv_emit_graphics_shaders(struct radv_cmd_buffer *cmd_buffer) radv_emit_fragment_shader_state(cmd_buffer, NULL); } - radeon_begin(cmd_buffer->cs); - radeon_set_sh_reg(R_00B0C4_SPI_SHADER_GS_OUT_CONFIG_PS, gs_out_config_ps); - radeon_end(); + gfx12_push_sh_reg(cmd_buffer, R_00B0C4_SPI_SHADER_GS_OUT_CONFIG_PS, gs_out_config_ps); } const struct radv_vgt_shader_key vgt_shader_cfg_key = @@ -5038,17 +5109,20 @@ emit_prolog_regs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v if (G_00B848_VGPRS(prolog->rsrc1) > G_00B848_VGPRS(rsrc1)) rsrc1 = (rsrc1 & C_00B848_VGPRS) | (prolog->rsrc1 & ~C_00B848_VGPRS); - radeon_begin(cmd_buffer->cs); - - radeon_set_sh_reg(vs_shader->info.regs.pgm_lo, prolog->va >> 8); - radeon_set_sh_reg(vs_shader->info.regs.pgm_rsrc1, rsrc1); - - if (vs_shader->info.merged_shader_compiled_separately) { - radeon_set_sh_reg(vs_shader->info.regs.pgm_rsrc2, rsrc2); + if (pdev->info.gfx_level >= GFX12) { + gfx12_push_sh_reg(cmd_buffer, vs_shader->info.regs.pgm_lo, prolog->va >> 8); + gfx12_push_sh_reg(cmd_buffer, vs_shader->info.regs.pgm_rsrc1, rsrc1); + if (vs_shader->info.merged_shader_compiled_separately) + gfx12_push_sh_reg(cmd_buffer, vs_shader->info.regs.pgm_rsrc2, rsrc2); + } else { + radeon_begin(cmd_buffer->cs); + radeon_set_sh_reg(vs_shader->info.regs.pgm_lo, prolog->va >> 8); + radeon_set_sh_reg(vs_shader->info.regs.pgm_rsrc1, rsrc1); + if (vs_shader->info.merged_shader_compiled_separately) + radeon_set_sh_reg(vs_shader->info.regs.pgm_rsrc2, rsrc2); + radeon_end(); } - radeon_end(); - radv_cs_add_buffer(device->ws, cmd_buffer->cs, prolog->bo); } @@ -5886,16 +5960,20 @@ radv_emit_streamout_buffers(struct radv_cmd_buffer *cmd_buffer, uint64_t va) if (!streamout_buffers_offset) return; - radeon_begin(cmd_buffer->cs); - radeon_emit_32bit_pointer(streamout_buffers_offset, va, &pdev->info); + if (pdev->info.gfx_level >= GFX12) { + gfx12_push_32bit_pointer(cmd_buffer, streamout_buffers_offset, va, &pdev->info); + } else { + radeon_begin(cmd_buffer->cs); + radeon_emit_32bit_pointer(streamout_buffers_offset, va, &pdev->info); - if (cmd_buffer->state.gs_copy_shader) { - streamout_buffers_offset = radv_get_user_sgpr_loc(cmd_buffer->state.gs_copy_shader, AC_UD_STREAMOUT_BUFFERS); - if (streamout_buffers_offset) - radeon_emit_32bit_pointer(streamout_buffers_offset, va, &pdev->info); + if (cmd_buffer->state.gs_copy_shader) { + streamout_buffers_offset = radv_get_user_sgpr_loc(cmd_buffer->state.gs_copy_shader, AC_UD_STREAMOUT_BUFFERS); + if (streamout_buffers_offset) + radeon_emit_32bit_pointer(streamout_buffers_offset, va, &pdev->info); + } + + radeon_end(); } - - radeon_end(); } static void @@ -5912,9 +5990,7 @@ radv_emit_streamout_state(struct radv_cmd_buffer *cmd_buffer) if (!streamout_state_offset) return; - radeon_begin(cmd_buffer->cs); - radeon_emit_32bit_pointer(streamout_state_offset, so->state_va, &pdev->info); - radeon_end(); + gfx12_push_32bit_pointer(cmd_buffer, streamout_state_offset, so->state_va, &pdev->info); } static void @@ -6012,9 +6088,14 @@ radv_flush_force_vrs_state(struct radv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.last_vrs_rates != vrs_rates || cmd_buffer->state.last_force_vrs_rates_offset != force_vrs_rates_offset) { - radeon_begin(cmd_buffer->cs); - radeon_set_sh_reg(force_vrs_rates_offset, vrs_rates); - radeon_end(); + + if (pdev->info.gfx_level >= GFX12) { + gfx12_push_sh_reg(cmd_buffer, force_vrs_rates_offset, vrs_rates); + } else { + radeon_begin(cmd_buffer->cs); + radeon_set_sh_reg(force_vrs_rates_offset, vrs_rates); + radeon_end(); + } } cmd_buffer->state.last_vrs_rates = vrs_rates; @@ -6808,6 +6889,7 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBegi cmd_buffer->state.last_force_vrs_rates_offset = -1; radv_reset_tracked_regs(cmd_buffer); + radv_reset_buffered_regs(cmd_buffer); cmd_buffer->usage_flags = pBeginInfo->flags; @@ -10395,6 +10477,8 @@ radv_emit_ngg_culling_state(struct radv_cmd_buffer *cmd_buffer) static void radv_emit_fs_state(struct radv_cmd_buffer *cmd_buffer) { + const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_shader *ps = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT]; if (!ps) @@ -10413,9 +10497,13 @@ radv_emit_fs_state(struct radv_cmd_buffer *cmd_buffer) SET_SGPR_FIELD(PS_STATE_LINE_RAST_MODE, radv_get_line_mode(cmd_buffer)) | SET_SGPR_FIELD(PS_STATE_RAST_PRIM, rast_prim); - radeon_begin(cmd_buffer->cs); - radeon_set_sh_reg(ps_state_offset, ps_state); - radeon_end(); + if (pdev->info.gfx_level >= GFX12) { + gfx12_push_sh_reg(cmd_buffer, ps_state_offset, ps_state); + } else { + radeon_begin(cmd_buffer->cs); + radeon_set_sh_reg(ps_state_offset, ps_state); + radeon_end(); + } } static uint32_t @@ -10492,16 +10580,19 @@ radv_emit_ngg_state(struct radv_cmd_buffer *cmd_buffer) SET_SGPR_FIELD(NGG_STATE_PROVOKING_VTX, radv_get_ngg_state_provoking_vtx(cmd_buffer)) | SET_SGPR_FIELD(NGG_STATE_QUERY, radv_get_ngg_state_query(cmd_buffer)); - radeon_begin(cmd_buffer->cs); - radeon_set_sh_reg(ngg_state_offset, ngg_state); + const uint32_t ngg_query_buf_va_offset = radv_get_user_sgpr_loc(last_vgt_shader, AC_UD_NGG_QUERY_BUF_VA); - if (pdev->info.gfx_level >= GFX11) { - const uint32_t ngg_query_buf_va_offset = radv_get_user_sgpr_loc(last_vgt_shader, AC_UD_NGG_QUERY_BUF_VA); + if (pdev->info.gfx_level >= GFX12) { + gfx12_push_sh_reg(cmd_buffer, ngg_state_offset, ngg_state); + if (ngg_query_buf_va_offset) + gfx12_push_sh_reg(cmd_buffer, ngg_query_buf_va_offset, cmd_buffer->state.shader_query_buf_va); + } else { + radeon_begin(cmd_buffer->cs); + radeon_set_sh_reg(ngg_state_offset, ngg_state); if (ngg_query_buf_va_offset) radeon_set_sh_reg(ngg_query_buf_va_offset, cmd_buffer->state.shader_query_buf_va); + radeon_end(); } - - radeon_end(); } static void @@ -10573,22 +10664,30 @@ radv_emit_tess_state(struct radv_cmd_buffer *cmd_buffer) assert(tes_offchip_layout_offset); } - radeon_begin(cs); - - if (pdev->info.gfx_level >= GFX9) { - radeon_set_sh_reg(tcs->info.regs.pgm_rsrc2, pgm_hs_rsrc2); + if (pdev->info.gfx_level >= GFX12) { + gfx12_push_sh_reg(cmd_buffer, tcs->info.regs.pgm_rsrc2, pgm_hs_rsrc2); + if (tcs_offchip_layout) { + gfx12_push_sh_reg(cmd_buffer, tcs_offchip_layout_offset, tcs_offchip_layout); + gfx12_push_sh_reg(cmd_buffer, tes_offchip_layout_offset, tcs_offchip_layout); + } } else { - const uint32_t ls_rsrc2 = vs->config.rsrc2 | S_00B52C_LDS_SIZE(cmd_buffer->state.tess_lds_size); + radeon_begin(cs); - radeon_set_sh_reg(vs->info.regs.pgm_rsrc2, ls_rsrc2); + if (pdev->info.gfx_level >= GFX9) { + radeon_set_sh_reg(tcs->info.regs.pgm_rsrc2, pgm_hs_rsrc2); + } else { + const uint32_t ls_rsrc2 = vs->config.rsrc2 | S_00B52C_LDS_SIZE(cmd_buffer->state.tess_lds_size); + + radeon_set_sh_reg(vs->info.regs.pgm_rsrc2, ls_rsrc2); + } + + if (tcs_offchip_layout) { + radeon_set_sh_reg(tcs_offchip_layout_offset, tcs_offchip_layout); + radeon_set_sh_reg(tes_offchip_layout_offset, tcs_offchip_layout); + } + + radeon_end(); } - - if (tcs_offchip_layout) { - radeon_set_sh_reg(tcs_offchip_layout_offset, tcs_offchip_layout); - radeon_set_sh_reg(tes_offchip_layout_offset, tcs_offchip_layout); - } - - radeon_end(); } static void @@ -11528,6 +11627,12 @@ radv_before_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info radv_emit_all_graphics_states(cmd_buffer, info); } + if (pdev->info.gfx_level >= GFX12) { + radeon_begin(cmd_buffer->cs); + gfx12_emit_buffered_sh_regs(&cmd_buffer->num_buffered_sh_regs, cmd_buffer->gfx12.buffered_sh_regs); + radeon_end(); + } + if (!dgc) radv_describe_draw(cmd_buffer); if (likely(!info->indirect_va)) { @@ -11552,6 +11657,9 @@ ALWAYS_INLINE static bool radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info, uint32_t drawCount, bool dgc) { + struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + const struct radv_physical_device *pdev = radv_device_physical(device); + /* For direct draws, this makes sure we don't draw anything. * For indirect draws, this is necessary to prevent a GPU hang (on MEC version < 100). */ @@ -11562,7 +11670,6 @@ radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_ radv_bind_graphics_shaders(cmd_buffer); } - struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); struct radeon_cmdbuf *ace_cs = cmd_buffer->gang.cs; struct radv_shader *task_shader = cmd_buffer->state.shaders[MESA_SHADER_TASK]; @@ -11594,6 +11701,12 @@ radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_ if (pc_stages) radv_flush_constants(cmd_buffer, pc_stages, VK_PIPELINE_BIND_POINT_GRAPHICS); + if (pdev->info.gfx_level >= GFX12) { + radeon_begin(cmd_buffer->cs); + gfx12_emit_buffered_sh_regs(&cmd_buffer->num_buffered_sh_regs, cmd_buffer->gfx12.buffered_sh_regs); + radeon_end(); + } + if (!dgc) radv_describe_draw(cmd_buffer); if (likely(!info->indirect_va)) { diff --git a/src/amd/vulkan/radv_cmd_buffer.h b/src/amd/vulkan/radv_cmd_buffer.h index fbc4326feef..3129c92d414 100644 --- a/src/amd/vulkan/radv_cmd_buffer.h +++ b/src/amd/vulkan/radv_cmd_buffer.h @@ -539,11 +539,22 @@ struct radv_cmd_buffer_upload { struct list_head list; }; +/* A pair of values for SET_*_REG_PAIRS. */ +struct gfx12_reg { + uint32_t reg_offset; + uint32_t reg_value; +}; + struct radv_cmd_buffer { struct vk_command_buffer vk; struct radv_tracked_regs tracked_regs; + uint32_t num_buffered_sh_regs; + struct { + struct gfx12_reg buffered_sh_regs[64]; + } gfx12; + VkCommandBufferUsageFlags usage_flags; struct radeon_cmdbuf *cs; struct radv_cmd_state state;