radeonsi: fix an NGG streamout hang with monolithic shaders

ac_llvm_add_target_dep_function_attr has no effect if the function is
inlined.

amdgpu-gds-size determines m0 for ds_sub_u32 gds, which hangs if it's 0.

This helps both gfx10 and gfx11, though it will only be used by gfx11
after we enable streamout.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16885>
This commit is contained in:
Marek Olšák
2022-06-05 06:00:22 -04:00
committed by Marge Bot
parent a9f7744cfe
commit fc392ff104
2 changed files with 4 additions and 2 deletions
@@ -303,8 +303,6 @@ static void build_streamout(struct si_shader_context *ctx, struct ngg_streamout
unsigned scratch_offset_base = isgs ? 8 : 4;
LLVMValueRef scratch_offset_basev = isgs ? i32_8 : i32_4;
ac_llvm_add_target_dep_function_attr(ctx->main_fn, "amdgpu-gds-size", 256);
/* Determine the mapping of streamout buffers to vertex streams. */
for (unsigned i = 0; i < so->num_outputs; ++i) {
unsigned buf = so->output[i].output_buffer;
@@ -188,6 +188,10 @@ void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTy
ctx->screen->info.address32_hi);
}
if (ctx->stage <= MESA_SHADER_GEOMETRY && ctx->shader->key.ge.as_ngg &&
ctx->shader->selector->info.enabled_streamout_buffer_mask)
ac_llvm_add_target_dep_function_attr(ctx->main_fn, "amdgpu-gds-size", 256);
ac_llvm_set_workgroup_size(ctx->main_fn, max_workgroup_size);
ac_llvm_set_target_features(ctx->main_fn, &ctx->ac);
}