aco: disable wqm for tex loads when not needed
By only executing VMEM loads for lanes where the result is used, we can save bandwidth. The NIR pass only handles tex for now, but those are most common anyway. We can extend it handle image/ssbo/ubo/global loads in the future. Foz-DB GFX1201: Totals from 32633 (40.66% of 80251) affected shaders: Instrs: 22635910 -> 23193509 (+2.46%); split: -0.00%, +2.46% CodeSize: 122880044 -> 125093428 (+1.80%); split: -0.00%, +1.81% VGPRs: 1481868 -> 1481712 (-0.01%) SpillSGPRs: 3877 -> 4301 (+10.94%); split: -0.52%, +11.45% Latency: 171480552 -> 171685219 (+0.12%); split: -0.18%, +0.30% InvThroughput: 24364743 -> 24373441 (+0.04%); split: -0.08%, +0.12% VClause: 388318 -> 388557 (+0.06%); split: -0.06%, +0.13% SClause: 774781 -> 776492 (+0.22%); split: -0.29%, +0.51% Copies: 1416586 -> 1541199 (+8.80%); split: -0.16%, +8.96% Branches: 419591 -> 419673 (+0.02%); split: -0.02%, +0.04% PreSGPRs: 1330303 -> 1416540 (+6.48%) PreVGPRs: 964864 -> 964863 (-0.00%) VALU: 12919601 -> 12920254 (+0.01%); split: -0.01%, +0.01% SALU: 2685402 -> 3224147 (+20.06%); split: -0.00%, +20.07% Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35970>
This commit is contained in:
@@ -367,6 +367,12 @@ init_context(isel_context* ctx, nir_shader* shader)
|
||||
apply_nuw_to_offsets(ctx, impl);
|
||||
ac_nir_flag_smem_for_loads(shader, ctx->program->gfx_level, false, true);
|
||||
|
||||
if (shader->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
nir_opt_load_skip_helpers_options skip_helper_options = {};
|
||||
skip_helper_options.no_add_divergence = true;
|
||||
nir_opt_load_skip_helpers(shader, &skip_helper_options);
|
||||
}
|
||||
|
||||
/* sanitize control flow */
|
||||
sanitize_cf_list(impl, &impl->body);
|
||||
nir_progress(true, impl, nir_metadata_none);
|
||||
@@ -621,11 +627,8 @@ init_context(isel_context* ctx, nir_shader* shader)
|
||||
}
|
||||
case nir_instr_type_tex: {
|
||||
nir_tex_instr* tex = nir_instr_as_tex(instr);
|
||||
RegType type = tex->def.divergent ? RegType::vgpr : RegType::sgpr;
|
||||
|
||||
if (tex->op == nir_texop_texture_samples) {
|
||||
assert(!tex->def.divergent);
|
||||
}
|
||||
RegType type =
|
||||
tex->def.divergent || tex->skip_helpers ? RegType::vgpr : RegType::sgpr;
|
||||
|
||||
RegClass rc = get_reg_class(ctx, type, tex->def.num_components, tex->def.bit_size);
|
||||
regclasses[tex->def.index] = rc;
|
||||
|
||||
@@ -83,6 +83,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
|
||||
assert(instr->op != nir_texop_samples_identical);
|
||||
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
bool disable_wqm = instr->skip_helpers;
|
||||
bool has_bias = false, has_lod = false, level_zero = false, has_compare = false,
|
||||
has_offset = false, has_ddx = false, has_ddy = false, has_derivs = false,
|
||||
has_sample_index = false, has_clamped_lod = false, has_wqm_coord = false;
|
||||
@@ -338,7 +339,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
|
||||
Temp tg4_lod = bld.copy(bld.def(v1), Operand::zero());
|
||||
Temp size = bld.tmp(v2);
|
||||
MIMG_instruction* tex = emit_mimg(bld, aco_opcode::image_get_resinfo, {size}, resource,
|
||||
Operand(s4), std::vector<Temp>{tg4_lod}, false);
|
||||
Operand(s4), std::vector<Temp>{tg4_lod}, disable_wqm);
|
||||
tex->dim = dim;
|
||||
tex->dmask = 0x3;
|
||||
tex->da = da;
|
||||
@@ -495,7 +496,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
|
||||
: aco_opcode::image_load_mip;
|
||||
Operand vdata = instr->is_sparse ? emit_tfe_init(bld, tmp_dst) : Operand(v1);
|
||||
MIMG_instruction* tex =
|
||||
emit_mimg(bld, op, {tmp_dst}, resource, Operand(s4), args, false, vdata);
|
||||
emit_mimg(bld, op, {tmp_dst}, resource, Operand(s4), args, disable_wqm, vdata);
|
||||
if (instr->op == nir_texop_fragment_mask_fetch_amd)
|
||||
tex->dim = da ? ac_image_2darray : ac_image_2d;
|
||||
else
|
||||
@@ -675,7 +676,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
|
||||
|
||||
Operand vdata = instr->is_sparse ? emit_tfe_init(bld, tmp_dst) : Operand(v1);
|
||||
MIMG_instruction* tex =
|
||||
emit_mimg(bld, opcode, {tmp_dst}, resource, Operand(sampler), args, false, vdata);
|
||||
emit_mimg(bld, opcode, {tmp_dst}, resource, Operand(sampler), args, disable_wqm, vdata);
|
||||
tex->dim = dim;
|
||||
tex->dmask = dmask & 0xf;
|
||||
tex->da = da;
|
||||
|
||||
@@ -117,7 +117,7 @@ BEGIN_TEST(d3d11_derivs.discard)
|
||||
/* The discard gets emitted as demote_if. */
|
||||
//>> s2: %_:exec, s1: (kill)%_:scc = s_wqm_b64 %_
|
||||
//! p_exit_early_if_not %_:exec
|
||||
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (kill)%_, (kill)%_ 2d
|
||||
//>> v4: %_ = image_sample (kill)%_, (kill)%_, v1: undef, (kill)%_, (kill)%_, %_, (kill)%_ 2d disable_wqm
|
||||
pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "ACO IR");
|
||||
END_TEST
|
||||
|
||||
|
||||
Reference in New Issue
Block a user