From e99765df08c7fe6be836022896152bbca7a94ff4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 11 Mar 2024 15:13:50 -0400 Subject: [PATCH] radeonsi: fix the DMA compute shader It was correct for the parameters that the driver was using, but incorrect for other parameters. 1. The address computation must multiply the workgroup size (wave size) by num_mem_ops to fix the case when num_dwords_per_thread > 4. 2. nir_load_ssbo shouldn't set the number of components to 4 when num_dwords_per_thread < 4. Fixes: 6584088cd5e - radeonsi: "create_dma_compute" shader in nir Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_shaderlib_nir.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c index 1ab8a0a5a65..51375878004 100644 --- a/src/gallium/drivers/radeonsi/si_shaderlib_nir.c +++ b/src/gallium/drivers/radeonsi/si_shaderlib_nir.c @@ -697,13 +697,15 @@ void *si_create_dma_compute_shader(struct si_context *sctx, unsigned num_dwords_ * the 2nd store writes into 1 * wavesize + tid, * the 3rd store writes into 2 * wavesize + tid, etc. */ - nir_def *store_address = get_global_ids(&b, 1); + nir_def *store_address = + nir_iadd(&b, nir_imul_imm(&b, nir_channel(&b, nir_load_workgroup_id(&b), 0), + default_wave_size * num_mem_ops), + nir_channel(&b, nir_load_local_invocation_id(&b), 0)); /* Convert from a "store size unit" into bytes. */ store_address = nir_imul_imm(&b, store_address, 4 * inst_dwords[0]); - nir_def *load_address = store_address, *value, *values[num_mem_ops]; - value = nir_undef(&b, 1, 32); + nir_def *load_address = store_address, *value = NULL, *values[num_mem_ops]; if (is_copy) { b.shader->info.num_ssbos++; @@ -723,7 +725,7 @@ void *si_create_dma_compute_shader(struct si_context *sctx, unsigned num_dwords_ load_address = nir_iadd(&b, load_address, nir_imm_int(&b, 4 * inst_dwords[i] * default_wave_size)); } - values[i] = nir_load_ssbo(&b, 4, 32, nir_imm_int(&b, 1),load_address, + values[i] = nir_load_ssbo(&b, inst_dwords[i], 32, nir_imm_int(&b, 1), load_address, .access = load_qualifier); }