From 55df7b641513028b2a3a9dd61373cad3a67b32e2 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 15 May 2023 13:02:26 +0200 Subject: [PATCH] radv: rework configuring VGT_SHADER_STAGES_EN For shader objects. Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_pipeline_graphics.c | 125 ++++++++++++++---------- src/amd/vulkan/radv_private.h | 13 +++ 2 files changed, 89 insertions(+), 49 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index e54a28e3e7d..a51e94f0db9 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -3523,43 +3523,87 @@ radv_pipeline_emit_vgt_vertex_reuse(const struct radv_device *device, struct rad S_028C58_VTX_REUSE_DEPTH(vtx_reuse_depth)); } +static struct radv_vgt_shader_key +radv_pipeline_generate_vgt_shader_key(const struct radv_device *device, + const struct radv_graphics_pipeline *pipeline) +{ + uint8_t hs_size = 64, gs_size = 64, vs_size = 64; + struct radv_vgt_shader_key key; + + memset(&key, 0, sizeof(key)); + + if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) + hs_size = pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.wave_size; + + if (pipeline->base.shaders[MESA_SHADER_GEOMETRY]) { + vs_size = gs_size = pipeline->base.shaders[MESA_SHADER_GEOMETRY]->info.wave_size; + if (radv_pipeline_has_gs_copy_shader(&pipeline->base)) + vs_size = pipeline->base.gs_copy_shader->info.wave_size; + } else if (pipeline->base.shaders[MESA_SHADER_TESS_EVAL]) + vs_size = pipeline->base.shaders[MESA_SHADER_TESS_EVAL]->info.wave_size; + else if (pipeline->base.shaders[MESA_SHADER_VERTEX]) + vs_size = pipeline->base.shaders[MESA_SHADER_VERTEX]->info.wave_size; + else if (pipeline->base.shaders[MESA_SHADER_MESH]) + vs_size = gs_size = pipeline->base.shaders[MESA_SHADER_MESH]->info.wave_size; + + if (radv_pipeline_has_ngg(pipeline)) { + assert(!radv_pipeline_has_gs_copy_shader(&pipeline->base)); + gs_size = vs_size; + } + + key.tess = radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL); + key.gs = radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY); + if (radv_pipeline_has_ngg(pipeline)) { + key.ngg = 1; + key.ngg_passthrough = radv_pipeline_has_ngg_passthrough(pipeline); + } + key.streamout = !!pipeline->streamout_shader; + if (radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) { + key.mesh = 1; + key.mesh_scratch_ring = + pipeline->base.shaders[MESA_SHADER_MESH]->info.ms.needs_ms_scratch_ring; + } + + key.hs_wave32 = hs_size == 32; + key.vs_wave32 = vs_size == 32; + key.gs_wave32 = gs_size == 32; + + return key; +} + static void -radv_pipeline_emit_vgt_shader_config(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, - const struct radv_graphics_pipeline *pipeline) +radv_emit_vgt_shader_config(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, + const struct radv_vgt_shader_key *key) { const struct radv_physical_device *pdevice = device->physical_device; uint32_t stages = 0; - if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) { + + if (key->tess) { stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) | S_028B54_HS_EN(1) | S_028B54_DYNAMIC_HS(1); - if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) + if (key->gs) stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) | S_028B54_GS_EN(1); - else if (radv_pipeline_has_ngg(pipeline)) + else if (key->ngg) stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS); else stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS); - } else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) { + } else if (key->gs) { stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | S_028B54_GS_EN(1); - } else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) { - assert(!radv_pipeline_has_ngg_passthrough(pipeline)); - stages |= S_028B54_GS_EN(1) | S_028B54_GS_FAST_LAUNCH(1); - - if (pipeline->base.shaders[MESA_SHADER_MESH]->info.ms.needs_ms_scratch_ring) - stages |= S_028B54_NGG_WAVE_ID_EN(1); - } else if (radv_pipeline_has_ngg(pipeline)) { + } else if (key->mesh) { + assert(!key->ngg_passthrough); + stages |= S_028B54_GS_EN(1) | S_028B54_GS_FAST_LAUNCH(1) | + S_028B54_NGG_WAVE_ID_EN(key->mesh_scratch_ring); + } else if (key->ngg) { stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL); } - if (radv_pipeline_has_ngg(pipeline)) { - stages |= S_028B54_PRIMGEN_EN(1); - if (pipeline->streamout_shader) - stages |= S_028B54_NGG_WAVE_ID_EN(1); - if (radv_pipeline_has_ngg_passthrough(pipeline)) { - stages |= S_028B54_PRIMGEN_PASSTHRU_EN(1); - if (pdevice->rad_info.family >= CHIP_NAVI23) - stages |= S_028B54_PRIMGEN_PASSTHRU_NO_MSG(1); - } - } else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) { + if (key->ngg) { + stages |= S_028B54_PRIMGEN_EN(1) | + S_028B54_NGG_WAVE_ID_EN(key->streamout) | + S_028B54_PRIMGEN_PASSTHRU_EN(key->ngg_passthrough) | + S_028B54_PRIMGEN_PASSTHRU_NO_MSG(key->ngg_passthrough && + pdevice->rad_info.family >= CHIP_NAVI23); + } else if (key->gs) { stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); } @@ -3567,31 +3611,11 @@ radv_pipeline_emit_vgt_shader_config(const struct radv_device *device, struct ra stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2); if (pdevice->rad_info.gfx_level >= GFX10) { - uint8_t hs_size = 64, gs_size = 64, vs_size = 64; - - if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) - hs_size = pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.wave_size; - - if (pipeline->base.shaders[MESA_SHADER_GEOMETRY]) { - vs_size = gs_size = pipeline->base.shaders[MESA_SHADER_GEOMETRY]->info.wave_size; - if (radv_pipeline_has_gs_copy_shader(&pipeline->base)) - vs_size = pipeline->base.gs_copy_shader->info.wave_size; - } else if (pipeline->base.shaders[MESA_SHADER_TESS_EVAL]) - vs_size = pipeline->base.shaders[MESA_SHADER_TESS_EVAL]->info.wave_size; - else if (pipeline->base.shaders[MESA_SHADER_VERTEX]) - vs_size = pipeline->base.shaders[MESA_SHADER_VERTEX]->info.wave_size; - else if (pipeline->base.shaders[MESA_SHADER_MESH]) - vs_size = gs_size = pipeline->base.shaders[MESA_SHADER_MESH]->info.wave_size; - - if (radv_pipeline_has_ngg(pipeline)) { - assert(!radv_pipeline_has_gs_copy_shader(&pipeline->base)); - gs_size = vs_size; - } - - /* legacy GS only supports Wave64 */ - stages |= S_028B54_HS_W32_EN(hs_size == 32 ? 1 : 0) | - S_028B54_GS_W32_EN(gs_size == 32 ? 1 : 0) | - S_028B54_VS_W32_EN(vs_size == 32 ? 1 : 0); + stages |= S_028B54_HS_W32_EN(key->hs_wave32) | + S_028B54_GS_W32_EN(key->gs_wave32) | + S_028B54_VS_W32_EN(pdevice->rad_info.gfx_level < GFX11 && key->vs_wave32); + /* Legacy GS only supports Wave64. Read it as an implication. */ + assert(!(key->gs && !key->ngg) || !key->gs_wave32); } radeon_set_context_reg(ctx_cs, R_028B54_VGT_SHADER_STAGES_EN, stages); @@ -3711,6 +3735,9 @@ radv_pipeline_emit_pm4(const struct radv_device *device, struct radv_graphics_pi cs->buf = malloc(4 * (cs->max_dw + ctx_cs->max_dw)); ctx_cs->buf = cs->buf + cs->max_dw; + struct radv_vgt_shader_key vgt_shader_key = + radv_pipeline_generate_vgt_shader_key(device, pipeline); + radv_pipeline_emit_blend_state(ctx_cs, pipeline, blend); radv_pipeline_emit_vgt_gs_mode(device, ctx_cs, pipeline); @@ -3753,7 +3780,7 @@ radv_pipeline_emit_pm4(const struct radv_device *device, struct radv_graphics_pi } radv_pipeline_emit_vgt_vertex_reuse(device, ctx_cs, pipeline); - radv_pipeline_emit_vgt_shader_config(device, ctx_cs, pipeline); + radv_emit_vgt_shader_config(device, ctx_cs, &vgt_shader_key); radv_pipeline_emit_vgt_gs_out(device, ctx_cs, pipeline, vgt_gs_out_prim_type); if (pdevice->rad_info.gfx_level >= GFX10_3) { diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index ad227b61a8e..99307f46b0b 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1916,6 +1916,19 @@ void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx unsigned data_sel, uint64_t va, uint32_t new_fence, uint64_t gfx9_eop_bug_va); +struct radv_vgt_shader_key { + uint8_t tess : 1; + uint8_t gs : 1; + uint8_t mesh_scratch_ring : 1; + uint8_t mesh : 1; + uint8_t ngg_passthrough : 1; + uint8_t ngg : 1; /* gfx10+ */ + uint8_t streamout : 1; /* only used with NGG */ + uint8_t hs_wave32 : 1; + uint8_t gs_wave32 : 1; + uint8_t vs_wave32 : 1; +}; + void radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref, uint32_t mask); void si_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs,