From 75cd43741a8607fc30f4577ae73004c495c39a5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Sat, 10 Apr 2021 14:51:56 +0200 Subject: [PATCH] aco: Align NGG scratch size to 16 so a single ds_read can always read it. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Timur Kristóf Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 2 +- src/amd/compiler/aco_instruction_selection_setup.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 75b9fff0159..5d5f7e429b4 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -11409,7 +11409,7 @@ std::pair ngg_gs_workgroup_reduce_and_scan(isel_context *ctx, Temp s /* The first lane of each wave loads every wave's results from LDS, to avoid bank conflicts */ Temp reduction_per_wave_vector = load_lds(ctx, 4u * num_lds_dwords, bld.tmp(RegClass(RegType::vgpr, num_lds_dwords)), - bld.copy(bld.def(v1), Operand(0u)), ctx->ngg_gs_scratch_addr, 4u); + bld.copy(bld.def(v1), Operand(0u)), ctx->ngg_gs_scratch_addr, 16u); begin_divergent_if_else(ctx, &ic); end_divergent_if(ctx, &ic); diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index bbdbd800e7a..29b3845f02e 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -419,8 +419,9 @@ void setup_gs_variables(isel_context *ctx, nir_shader *nir) ctx->ngg_gs_emit_vtx_bytes = ctx->ngg_gs_primflags_offset + 4u; ctx->ngg_gs_emit_addr = esgs_ring_bytes; ctx->ngg_gs_scratch_addr = ctx->ngg_gs_emit_addr + ngg_emit_bytes; + ctx->ngg_gs_scratch_addr = ALIGN(ctx->ngg_gs_scratch_addr, 16u); - unsigned total_lds_bytes = esgs_ring_bytes + ngg_emit_bytes + ngg_gs_scratch_bytes; + unsigned total_lds_bytes = ctx->ngg_gs_scratch_addr + ngg_gs_scratch_bytes; assert(total_lds_bytes >= ctx->ngg_gs_emit_addr); assert(total_lds_bytes >= ctx->ngg_gs_scratch_addr); ctx->program->config->lds_size = DIV_ROUND_UP(total_lds_bytes, ctx->program->dev.lds_encoding_granule);