From fc392ff104d688bc30b36cd0b4ef0587750f3194 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 5 Jun 2022 06:00:22 -0400 Subject: [PATCH] radeonsi: fix an NGG streamout hang with monolithic shaders ac_llvm_add_target_dep_function_attr has no effect if the function is inlined. amdgpu-gds-size determines m0 for ds_sub_u32 gds, which hangs if it's 0. This helps both gfx10 and gfx11, though it will only be used by gfx11 after we enable streamout. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 2 -- src/gallium/drivers/radeonsi/si_shader_llvm.c | 4 ++++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index 83dc79812b6..da783a7f6e1 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -303,8 +303,6 @@ static void build_streamout(struct si_shader_context *ctx, struct ngg_streamout unsigned scratch_offset_base = isgs ? 8 : 4; LLVMValueRef scratch_offset_basev = isgs ? i32_8 : i32_4; - ac_llvm_add_target_dep_function_attr(ctx->main_fn, "amdgpu-gds-size", 256); - /* Determine the mapping of streamout buffers to vertex streams. */ for (unsigned i = 0; i < so->num_outputs; ++i) { unsigned buf = so->output[i].output_buffer; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 7ae10d2736d..c383d6dbd78 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -188,6 +188,10 @@ void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTy ctx->screen->info.address32_hi); } + if (ctx->stage <= MESA_SHADER_GEOMETRY && ctx->shader->key.ge.as_ngg && + ctx->shader->selector->info.enabled_streamout_buffer_mask) + ac_llvm_add_target_dep_function_attr(ctx->main_fn, "amdgpu-gds-size", 256); + ac_llvm_set_workgroup_size(ctx->main_fn, max_workgroup_size); ac_llvm_set_target_features(ctx->main_fn, &ctx->ac); }