radeonsi: fix the DMA compute shader
It was correct for the parameters that the driver was using, but incorrect
for other parameters.
1. The address computation must multiply the workgroup size (wave size)
by num_mem_ops to fix the case when num_dwords_per_thread > 4.
2. nir_load_ssbo shouldn't set the number of components to 4 when
num_dwords_per_thread < 4.
Fixes: 6584088cd5 - radeonsi: "create_dma_compute" shader in nir
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28119>
This commit is contained in:
@@ -697,13 +697,15 @@ void *si_create_dma_compute_shader(struct si_context *sctx, unsigned num_dwords_
|
||||
* the 2nd store writes into 1 * wavesize + tid,
|
||||
* the 3rd store writes into 2 * wavesize + tid, etc.
|
||||
*/
|
||||
nir_def *store_address = get_global_ids(&b, 1);
|
||||
nir_def *store_address =
|
||||
nir_iadd(&b, nir_imul_imm(&b, nir_channel(&b, nir_load_workgroup_id(&b), 0),
|
||||
default_wave_size * num_mem_ops),
|
||||
nir_channel(&b, nir_load_local_invocation_id(&b), 0));
|
||||
|
||||
/* Convert from a "store size unit" into bytes. */
|
||||
store_address = nir_imul_imm(&b, store_address, 4 * inst_dwords[0]);
|
||||
|
||||
nir_def *load_address = store_address, *value, *values[num_mem_ops];
|
||||
value = nir_undef(&b, 1, 32);
|
||||
nir_def *load_address = store_address, *value = NULL, *values[num_mem_ops];
|
||||
|
||||
if (is_copy) {
|
||||
b.shader->info.num_ssbos++;
|
||||
@@ -723,7 +725,7 @@ void *si_create_dma_compute_shader(struct si_context *sctx, unsigned num_dwords_
|
||||
load_address = nir_iadd(&b, load_address,
|
||||
nir_imm_int(&b, 4 * inst_dwords[i] * default_wave_size));
|
||||
}
|
||||
values[i] = nir_load_ssbo(&b, 4, 32, nir_imm_int(&b, 1),load_address,
|
||||
values[i] = nir_load_ssbo(&b, inst_dwords[i], 32, nir_imm_int(&b, 1), load_address,
|
||||
.access = load_qualifier);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user