From 4f3c74ddfb7289f729cb719e777b2c07ca1e79ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 9 Jun 2022 10:21:11 -0400 Subject: [PATCH] radeonsi: determine DB_SHADER_CONTROL in si_shader_ps This is cleaner and more flexible. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_shader.h | 3 +- src/gallium/drivers/radeonsi/si_shader_info.c | 50 ------------ src/gallium/drivers/radeonsi/si_state.c | 4 - src/gallium/drivers/radeonsi/si_state.h | 1 - .../drivers/radeonsi/si_state_draw.cpp | 8 ++ .../drivers/radeonsi/si_state_shaders.cpp | 79 +++++++++++++------ 6 files changed, 66 insertions(+), 79 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index d45d2e27fee..1f3ec8c8348 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -381,8 +381,6 @@ struct si_shader_info { ubyte gs_input_verts_per_prim; unsigned max_gsvs_emit_size; - /* PS parameters */ - unsigned db_shader_control; /* Set 0xf or 0x0 (4 bits) per each written output. * ANDed with spi_shader_col_format. */ @@ -916,6 +914,7 @@ struct si_shader { unsigned spi_shader_z_format; unsigned spi_shader_col_format; unsigned cb_shader_mask; + unsigned db_shader_control; unsigned num_interp; } ps; } ctx_reg; diff --git a/src/gallium/drivers/radeonsi/si_shader_info.c b/src/gallium/drivers/radeonsi/si_shader_info.c index 497b090a175..ff4b74e30de 100644 --- a/src/gallium/drivers/radeonsi/si_shader_info.c +++ b/src/gallium/drivers/radeonsi/si_shader_info.c @@ -837,55 +837,5 @@ void si_nir_scan_shader(struct si_screen *sscreen, const struct nir_shader *nir, else if (info->input[i].semantic == VARYING_SLOT_COL1) info->color_attr_index[1] = i; } - - /* DB_SHADER_CONTROL */ - info->db_shader_control = S_02880C_Z_EXPORT_ENABLE(info->writes_z) | - S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(info->writes_stencil) | - S_02880C_MASK_EXPORT_ENABLE(info->writes_samplemask) | - S_02880C_KILL_ENABLE(info->base.fs.uses_discard); - - switch (info->base.fs.depth_layout) { - case FRAG_DEPTH_LAYOUT_GREATER: - info->db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z); - break; - case FRAG_DEPTH_LAYOUT_LESS: - info->db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z); - break; - default:; - } - - /* Z_ORDER, EXEC_ON_HIER_FAIL and EXEC_ON_NOOP should be set as following: - * - * | early Z/S | writes_mem | allow_ReZ? | Z_ORDER | EXEC_ON_HIER_FAIL | EXEC_ON_NOOP - * --|-----------|------------|------------|--------------------|-------------------|------------- - * 1a| false | false | true | EarlyZ_Then_ReZ | 0 | 0 - * 1b| false | false | false | EarlyZ_Then_LateZ | 0 | 0 - * 2 | false | true | n/a | LateZ | 1 | 0 - * 3 | true | false | n/a | EarlyZ_Then_LateZ | 0 | 0 - * 4 | true | true | n/a | EarlyZ_Then_LateZ | 0 | 1 - * - * In cases 3 and 4, HW will force Z_ORDER to EarlyZ regardless of what's set in the register. - * In case 2, NOOP_CULL is a don't care field. In case 2, 3 and 4, ReZ doesn't make sense. - * - * Don't use ReZ without profiling !!! - * - * ReZ decreases performance by 15% in DiRT: Showdown on Ultra settings, which has pretty complex - * shaders. - */ - if (info->base.fs.early_fragment_tests) { - /* Cases 3, 4. */ - info->db_shader_control |= S_02880C_DEPTH_BEFORE_SHADER(1) | - S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z) | - S_02880C_EXEC_ON_NOOP(info->base.writes_memory); - } else if (info->base.writes_memory) { - /* Case 2. */ - info->db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z) | S_02880C_EXEC_ON_HIER_FAIL(1); - } else { - /* Case 1. */ - info->db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); - } - - if (info->base.fs.post_depth_coverage) - info->db_shader_control |= S_02880C_PRE_SHADER_DEPTH_COVERAGE_ENABLE(1); } } diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index d62a6dd6a97..586a1c38f44 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -1211,9 +1211,6 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state) sctx->do_update_shaders = true; } - if (old_rs->poly_stipple_enable != rs->poly_stipple_enable) - si_update_ps_kill_enable(sctx); - if (old_rs->line_smooth != rs->line_smooth || old_rs->poly_smooth != rs->poly_smooth || old_rs->poly_stipple_enable != rs->poly_stipple_enable || @@ -1431,7 +1428,6 @@ static void si_bind_dsa_state(struct pipe_context *ctx, void *state) if (old_dsa->alpha_func != dsa->alpha_func) { si_ps_key_update_dsa(sctx); si_update_ps_inputs_read_or_disabled(sctx); - si_update_ps_kill_enable(sctx); sctx->do_update_shaders = true; } diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 0dd38f080d2..ca830703198 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -583,7 +583,6 @@ void si_vs_key_update_inputs(struct si_context *sctx); void si_get_vs_key_inputs(struct si_context *sctx, union si_shader_key *key, struct si_vs_prolog_bits *prolog_key); void si_update_ps_inputs_read_or_disabled(struct si_context *sctx); -void si_update_ps_kill_enable(struct si_context *sctx); void si_update_vrs_flat_shading(struct si_context *sctx); unsigned si_get_input_prim(const struct si_shader_selector *gs, const union si_shader_key *key); bool si_update_ngg(struct si_context *sctx); diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index 9103cacc1f4..af7dfabf5d7 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -263,6 +263,14 @@ static bool si_update_shaders(struct si_context *sctx) return false; si_pm4_bind_state(sctx, ps, sctx->shader.ps.current); + unsigned db_shader_control = sctx->shader.ps.current->ctx_reg.ps.db_shader_control; + if (sctx->ps_db_shader_control != db_shader_control) { + sctx->ps_db_shader_control = db_shader_control; + si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); + if (sctx->screen->dpbb_allowed) + si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); + } + if (si_pm4_state_changed(sctx, ps) || (!NGG && si_pm4_state_changed(sctx, vs)) || (NGG && si_pm4_state_changed(sctx, gs))) { diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 6dbd2002d7f..e83b6e7e2fd 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -1866,6 +1866,62 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader) assert(shader->key.ps.part.prolog.bc_optimize_for_linear || !G_0286CC_LINEAR_CENTER_ENA(input_ena) || !G_0286CC_LINEAR_CENTROID_ENA(input_ena)); + /* DB_SHADER_CONTROL */ + unsigned db_shader_control = + S_02880C_Z_EXPORT_ENABLE(info->writes_z) | + S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(info->writes_stencil) | + S_02880C_MASK_EXPORT_ENABLE(info->writes_samplemask) | + /* Changes KILL_ENABLE should also update ps_modifies_zs. */ + S_02880C_KILL_ENABLE(info->base.fs.uses_discard || + shader->key.ps.part.prolog.poly_stipple || + shader->key.ps.part.epilog.alpha_func != PIPE_FUNC_ALWAYS); + + switch (info->base.fs.depth_layout) { + case FRAG_DEPTH_LAYOUT_GREATER: + db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z); + break; + case FRAG_DEPTH_LAYOUT_LESS: + db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z); + break; + default:; + } + + /* Z_ORDER, EXEC_ON_HIER_FAIL and EXEC_ON_NOOP should be set as following: + * + * | early Z/S | writes_mem | allow_ReZ? | Z_ORDER | EXEC_ON_HIER_FAIL | EXEC_ON_NOOP + * --|-----------|------------|------------|--------------------|-------------------|------------- + * 1a| false | false | true | EarlyZ_Then_ReZ | 0 | 0 + * 1b| false | false | false | EarlyZ_Then_LateZ | 0 | 0 + * 2 | false | true | n/a | LateZ | 1 | 0 + * 3 | true | false | n/a | EarlyZ_Then_LateZ | 0 | 0 + * 4 | true | true | n/a | EarlyZ_Then_LateZ | 0 | 1 + * + * In cases 3 and 4, HW will force Z_ORDER to EarlyZ regardless of what's set in the register. + * In case 2, NOOP_CULL is a don't care field. In case 2, 3 and 4, ReZ doesn't make sense. + * + * Don't use ReZ without profiling !!! + * + * ReZ decreases performance by 15% in DiRT: Showdown on Ultra settings, which has pretty complex + * shaders. + */ + if (info->base.fs.early_fragment_tests) { + /* Cases 3, 4. */ + db_shader_control |= S_02880C_DEPTH_BEFORE_SHADER(1) | + S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z) | + S_02880C_EXEC_ON_NOOP(info->base.writes_memory); + } else if (info->base.writes_memory) { + /* Case 2. */ + db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z) | S_02880C_EXEC_ON_HIER_FAIL(1); + } else { + /* Case 1. */ + db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); + } + + if (info->base.fs.post_depth_coverage) + db_shader_control |= S_02880C_PRE_SHADER_DEPTH_COVERAGE_ENABLE(1); + + shader->ctx_reg.ps.db_shader_control = db_shader_control; + pm4 = si_get_shader_pm4_state(shader); if (!pm4) return; @@ -1925,9 +1981,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader) if (!spi_shader_col_format && !has_mrtz) { if (sscreen->info.gfx_level >= GFX10) { - if (info->base.fs.uses_discard || - shader->key.ps.part.prolog.poly_stipple || - shader->key.ps.part.epilog.alpha_func != PIPE_FUNC_ALWAYS) + if (G_02880C_KILL_ENABLE(db_shader_control)) spi_shader_col_format = V_028714_SPI_SHADER_32_R; } else { spi_shader_col_format = V_028714_SPI_SHADER_32_R; @@ -3483,24 +3537,6 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state) si_update_rasterized_prim(sctx); } -void si_update_ps_kill_enable(struct si_context *sctx) -{ - if (!sctx->shader.ps.cso) - return; - - /* Changes to KILL_ENABLE should also update si_shader_ps. */ - unsigned db_shader_control = sctx->shader.ps.cso->info.db_shader_control | - S_02880C_KILL_ENABLE(sctx->queued.named.rasterizer->poly_stipple_enable || - sctx->queued.named.dsa->alpha_func != PIPE_FUNC_ALWAYS); - - if (sctx->ps_db_shader_control != db_shader_control) { - sctx->ps_db_shader_control = db_shader_control; - si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); - if (sctx->screen->dpbb_allowed) - si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); - } -} - void si_update_vrs_flat_shading(struct si_context *sctx) { if (sctx->gfx_level >= GFX10_3 && sctx->shader.ps.cso) { @@ -3557,7 +3593,6 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state) si_ps_key_update_sample_shading(sctx); si_ps_key_update_framebuffer_rasterizer_sample_shading(sctx); si_update_ps_inputs_read_or_disabled(sctx); - si_update_ps_kill_enable(sctx); si_update_vrs_flat_shading(sctx); if (sctx->screen->dpbb_allowed) {