From f5912c6d326d9ae850b6bedc100ec20df91ef7f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 26 Sep 2020 14:39:23 -0400 Subject: [PATCH] radeonsi: kill disabled clip distances and planes at per-channel granularity Apps often enable only 1 plane for gl_ClipVertex, which means 1 scalar clip distance. Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_shader.c | 2 +- src/gallium/drivers/radeonsi/si_shader.h | 2 +- .../drivers/radeonsi/si_shader_llvm_vs.c | 30 ++++++++++++------- src/gallium/drivers/radeonsi/si_state.c | 13 ++------ .../drivers/radeonsi/si_state_shaders.c | 10 +++---- 5 files changed, 29 insertions(+), 28 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 8e688cd65af..888a7312b7b 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1290,7 +1290,7 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f) stage == MESA_SHADER_VERTEX) && !key->as_es && !key->as_ls) { fprintf(f, " opt.kill_outputs = 0x%" PRIx64 "\n", key->opt.kill_outputs); - fprintf(f, " opt.clip_disable = %u\n", key->opt.clip_disable); + fprintf(f, " opt.kill_clip_distances = 0x%x\n", key->opt.kill_clip_distances); if (stage != MESA_SHADER_GEOMETRY) fprintf(f, " opt.ngg_culling = 0x%x\n", key->opt.ngg_culling); } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index a8aba0b81f1..4985ce66259 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -647,8 +647,8 @@ struct si_shader_key { struct { /* For HW VS (it can be VS, TES, GS) */ uint64_t kill_outputs; /* "get_unique_index" bits */ + unsigned kill_clip_distances : 8; unsigned kill_pointsize : 1; - unsigned clip_disable : 1; /* For NGG VS and TES. */ unsigned ngg_culling : 5; /* SI_NGG_CULL_* */ diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c index 96313d11175..d996ccc1b11 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c @@ -372,20 +372,29 @@ static void si_llvm_emit_clipvertex(struct si_shader_context *ctx, struct ac_exp LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->rw_buffers); LLVMValueRef constbuf_index = LLVMConstInt(ctx->ac.i32, SI_VS_CONST_CLIP_PLANES, 0); LLVMValueRef const_resource = ac_build_load_to_sgpr(&ctx->ac, ptr, constbuf_index); + unsigned clipdist_mask = ctx->shader->selector->clipdist_mask & + ~ctx->shader->key.opt.kill_clip_distances; for (reg_index = 0; reg_index < 2; reg_index++) { struct ac_export_args *args = &pos[2 + reg_index]; - args->out[0] = args->out[1] = args->out[2] = args->out[3] = LLVMConstReal(ctx->ac.f32, 0.0f); + if (!(clipdist_mask & BITFIELD_RANGE(reg_index * 4, 4))) + continue; + + args->out[0] = args->out[1] = args->out[2] = args->out[3] = LLVMGetUndef(ctx->ac.f32); /* Compute dot products of position and user clip plane vectors */ for (chan = 0; chan < 4; chan++) { + if (!(clipdist_mask & BITFIELD_BIT(reg_index * 4 + chan))) + continue; + for (const_chan = 0; const_chan < 4; const_chan++) { LLVMValueRef addr = LLVMConstInt(ctx->ac.i32, ((reg_index * 4 + chan) * 4 + const_chan) * 4, 0); base_elt = si_buffer_load_const(ctx, const_resource, addr); args->out[chan] = - ac_build_fmad(&ctx->ac, base_elt, out_elts[const_chan], args->out[chan]); + ac_build_fmad(&ctx->ac, base_elt, out_elts[const_chan], + const_chan == 0 ? ctx->ac.f32_0 : args->out[chan]); } } @@ -541,7 +550,10 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx, struct ac_export_args pos_args[4] = {}; LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL, viewport_index_value = NULL; - unsigned pos_idx; + unsigned pos_idx, index; + unsigned clipdist_mask = (shader->selector->clipdist_mask & + ~shader->key.opt.kill_clip_distances) | + shader->selector->culldist_mask; int i; si_vertex_color_clamping(ctx, outputs, noutput); @@ -566,16 +578,14 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx, break; case VARYING_SLOT_CLIP_DIST0: case VARYING_SLOT_CLIP_DIST1: - if (!shader->key.opt.clip_disable) { - unsigned index = 2 + (outputs[i].semantic - VARYING_SLOT_CLIP_DIST0); - si_llvm_init_vs_export_args(ctx, outputs[i].values, V_008DFC_SQ_EXP_POS + index, - &pos_args[index]); + index = outputs[i].semantic - VARYING_SLOT_CLIP_DIST0; + if (clipdist_mask & BITFIELD_RANGE(index * 4, 4)) { + si_llvm_init_vs_export_args(ctx, outputs[i].values, V_008DFC_SQ_EXP_POS + 2 + index, + &pos_args[2 + index]); } break; case VARYING_SLOT_CLIP_VERTEX: - if (!shader->key.opt.clip_disable) { - si_llvm_emit_clipvertex(ctx, pos_args, outputs[i].values); - } + si_llvm_emit_clipvertex(ctx, pos_args, outputs[i].values); break; } } diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index f13ca4f3bc8..36d05cd376d 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -737,14 +737,7 @@ static void si_emit_clip_regs(struct si_context *sctx) unsigned clipdist_mask = vs_sel->clipdist_mask; unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SIX_BITS; unsigned culldist_mask = vs_sel->culldist_mask; - unsigned total_mask; - - if (vs->key.opt.clip_disable) { - assert(!info->base.cull_distance_array_size); - clipdist_mask = 0; - culldist_mask = 0; - } - total_mask = clipdist_mask | culldist_mask; + unsigned vs_out_mask = (clipdist_mask & ~vs->key.opt.kill_clip_distances) | culldist_mask; /* Clip distances on points have no effect, so need to be implemented * as cull distances. This applies for the clipvertex case as well. @@ -756,8 +749,8 @@ static void si_emit_clip_regs(struct si_context *sctx) culldist_mask |= clipdist_mask; unsigned initial_cdw = sctx->gfx_cs->current.cdw; - unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) | - S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | + unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((vs_out_mask & 0x0F) != 0) | + S_02881C_VS_OUT_CCDIST1_VEC_ENA((vs_out_mask & 0xF0) != 0) | S_02881C_BYPASS_VTX_RATE_COMBINER(sctx->chip_class >= GFX10_3) | S_02881C_BYPASS_PRIM_RATE_COMBINER(sctx->chip_class >= GFX10_3) | clipdist_mask | (culldist_mask << 8); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 967f6de22b7..b5ce55a9df5 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1774,9 +1774,7 @@ static void si_shader_selector_key_hw_vs(struct si_context *sctx, struct si_shad { struct si_shader_selector *ps = sctx->ps_shader.cso; - key->opt.clip_disable = sctx->queued.named.rasterizer->clip_plane_enable == 0 && - (vs->info.base.clip_distance_array_size || vs->info.writes_clipvertex) && - !vs->info.base.cull_distance_array_size; + key->opt.kill_clip_distances = vs->clipdist_mask & ~sctx->queued.named.rasterizer->clip_plane_enable; /* Find out if PS is disabled. */ bool ps_disabled = true; @@ -2920,7 +2918,7 @@ static void si_update_clip_regs(struct si_context *sctx, struct si_shader_select old_hw_vs->clipdist_mask != next_hw_vs->clipdist_mask || old_hw_vs->culldist_mask != next_hw_vs->culldist_mask || !old_hw_vs_variant || !next_hw_vs_variant || - old_hw_vs_variant->key.opt.clip_disable != next_hw_vs_variant->key.opt.clip_disable)) + old_hw_vs_variant->key.opt.kill_clip_distances != next_hw_vs_variant->key.opt.kill_clip_distances)) si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs); } @@ -3862,7 +3860,7 @@ bool si_update_shaders(struct si_context *sctx) struct si_compiler_ctx_state compiler_state; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; struct si_shader *old_vs = si_get_vs_state(sctx); - bool old_clip_disable = old_vs ? old_vs->key.opt.clip_disable : false; + unsigned old_kill_clip_distances = old_vs ? old_vs->key.opt.kill_clip_distances : 0; struct si_shader *old_ps = sctx->ps_shader.current; union si_vgt_stages_key key; unsigned old_spi_shader_col_format = @@ -3988,7 +3986,7 @@ bool si_update_shaders(struct si_context *sctx) si_update_vgt_shader_config(sctx, key); - if (old_clip_disable != si_get_vs_state(sctx)->key.opt.clip_disable) + if (old_kill_clip_distances != si_get_vs_state(sctx)->key.opt.kill_clip_distances) si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs); if (sctx->ps_shader.cso) {