From ee7069f87548d11ffef24a7a4b38f8bec37bed12 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Wed, 6 Aug 2025 14:30:15 +0200 Subject: [PATCH] aco: implement skip_helpers for load_scratch Foz-DB GFX1201: Totals from 2 (0.00% of 80287) affected shaders: Instrs: 4016 -> 4054 (+0.95%) CodeSize: 22104 -> 22256 (+0.69%) Latency: 17123 -> 17129 (+0.04%) Copies: 406 -> 415 (+2.22%) SALU: 323 -> 353 (+9.29%) Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/instruction_selection/aco_isel_setup.cpp | 1 + .../instruction_selection/aco_select_nir_intrinsics.cpp | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/instruction_selection/aco_isel_setup.cpp b/src/amd/compiler/instruction_selection/aco_isel_setup.cpp index 4687796a9be..2029c679619 100644 --- a/src/amd/compiler/instruction_selection/aco_isel_setup.cpp +++ b/src/amd/compiler/instruction_selection/aco_isel_setup.cpp @@ -347,6 +347,7 @@ intrinsic_try_skip_helpers(nir_intrinsic_instr* intr, UNUSED void* data) case nir_intrinsic_load_ssbo: case nir_intrinsic_load_ubo: case nir_intrinsic_load_constant: + case nir_intrinsic_load_scratch: case nir_intrinsic_bindless_image_load: case nir_intrinsic_bindless_image_fragment_mask_load_amd: case nir_intrinsic_bindless_image_sparse_load: diff --git a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp index c53aadd3607..9ecbb4a07b8 100644 --- a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp @@ -732,13 +732,14 @@ scratch_load_callback(Builder& bld, const LoadEmitInfo& info, unsigned bytes_nee } RegClass rc = RegClass::get(RegType::vgpr, bytes_size); Temp val = rc == info.dst.regClass() ? info.dst : bld.tmp(rc); - aco_ptr flat{create_instruction(op, Format::SCRATCH, 2, 1)}; + aco_ptr flat{create_instruction(op, Format::SCRATCH, 2 + 2 * info.disable_wqm, 1)}; Temp offset = info.offset.getTemp(); flat->operands[0] = offset.regClass() == s1 ? Operand(v1) : Operand(offset); flat->operands[1] = offset.regClass() == s1 ? Operand(offset) : Operand(s1); flat->scratch().sync = info.sync; flat->scratch().offset = info.const_offset; flat->definitions[0] = Definition(val); + init_disable_wqm(bld, flat->scratch(), info.disable_wqm); bld.insert(std::move(flat)); return val; @@ -3207,6 +3208,7 @@ visit_load_scratch(isel_context* ctx, nir_intrinsic_instr* instr) info.cache = get_cache_flags(ctx, ACCESS_IS_SWIZZLED_AMD, ac_access_type_load); info.swizzle_component_size = ctx->program->gfx_level <= GFX8 ? 4 : 0; info.sync = memory_sync_info(storage_scratch, semantic_private); + info.disable_wqm = nir_intrinsic_access(instr) & ACCESS_SKIP_HELPERS; if (ctx->program->gfx_level >= GFX9) { if (nir_src_is_const(instr->src[0])) { info.const_offset = nir_src_as_uint(instr->src[0]);