aco: implement skip_helpers for load_scratch

Foz-DB GFX1201:
Totals from 2 (0.00% of 80287) affected shaders:
Instrs: 4016 -> 4054 (+0.95%)
CodeSize: 22104 -> 22256 (+0.69%)
Latency: 17123 -> 17129 (+0.04%)
Copies: 406 -> 415 (+2.22%)
SALU: 323 -> 353 (+9.29%)

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36785>
This commit is contained in:
Georg Lehmann
2025-08-06 14:30:15 +02:00
committed by Marge Bot
parent 2bfd8918a5
commit ee7069f875
2 changed files with 4 additions and 1 deletions
@@ -347,6 +347,7 @@ intrinsic_try_skip_helpers(nir_intrinsic_instr* intr, UNUSED void* data)
case nir_intrinsic_load_ssbo:
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_constant:
case nir_intrinsic_load_scratch:
case nir_intrinsic_bindless_image_load:
case nir_intrinsic_bindless_image_fragment_mask_load_amd:
case nir_intrinsic_bindless_image_sparse_load:
@@ -732,13 +732,14 @@ scratch_load_callback(Builder& bld, const LoadEmitInfo& info, unsigned bytes_nee
}
RegClass rc = RegClass::get(RegType::vgpr, bytes_size);
Temp val = rc == info.dst.regClass() ? info.dst : bld.tmp(rc);
aco_ptr<Instruction> flat{create_instruction(op, Format::SCRATCH, 2, 1)};
aco_ptr<Instruction> flat{create_instruction(op, Format::SCRATCH, 2 + 2 * info.disable_wqm, 1)};
Temp offset = info.offset.getTemp();
flat->operands[0] = offset.regClass() == s1 ? Operand(v1) : Operand(offset);
flat->operands[1] = offset.regClass() == s1 ? Operand(offset) : Operand(s1);
flat->scratch().sync = info.sync;
flat->scratch().offset = info.const_offset;
flat->definitions[0] = Definition(val);
init_disable_wqm(bld, flat->scratch(), info.disable_wqm);
bld.insert(std::move(flat));
return val;
@@ -3207,6 +3208,7 @@ visit_load_scratch(isel_context* ctx, nir_intrinsic_instr* instr)
info.cache = get_cache_flags(ctx, ACCESS_IS_SWIZZLED_AMD, ac_access_type_load);
info.swizzle_component_size = ctx->program->gfx_level <= GFX8 ? 4 : 0;
info.sync = memory_sync_info(storage_scratch, semantic_private);
info.disable_wqm = nir_intrinsic_access(instr) & ACCESS_SKIP_HELPERS;
if (ctx->program->gfx_level >= GFX9) {
if (nir_src_is_const(instr->src[0])) {
info.const_offset = nir_src_as_uint(instr->src[0]);