From cf30742a667b2d19dbed69f32fcca5665cb7190f Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Wed, 24 Sep 2025 10:51:55 +0200 Subject: [PATCH] radv,aco: don't end monolithic ray tracing with unconditional terminate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The terminate requires more code and blocks us from deallocating VGPRs early. Foz-DB Navi31: Totals from 63 (0.08% of 80273) affected shaders: Instrs: 3372702 -> 3372467 (-0.01%) CodeSize: 17441676 -> 17440736 (-0.01%) Latency: 19763447 -> 19763288 (-0.00%) InvThroughput: 3860502 -> 3860478 (-0.00%) Branches: 96204 -> 96141 (-0.07%) SALU: 406648 -> 406549 (-0.02%) Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/instruction_selection/aco_select_nir.cpp | 2 ++ src/amd/vulkan/nir/radv_nir_rt_shader.c | 4 +--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/amd/compiler/instruction_selection/aco_select_nir.cpp b/src/amd/compiler/instruction_selection/aco_select_nir.cpp index 5b9296b4e1b..50184e2082c 100644 --- a/src/amd/compiler/instruction_selection/aco_select_nir.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_nir.cpp @@ -1208,6 +1208,8 @@ select_program_rt(isel_context& ctx, unsigned shader_count, struct nir_shader* c */ if (shader_count > 1 || shaders[i]->info.stage != MESA_SHADER_RAYGEN) insert_rt_jump_next(ctx, args); + else + Builder(ctx.program, ctx.block).sopp(aco_opcode::s_endpgm); cleanup_context(&ctx); } diff --git a/src/amd/vulkan/nir/radv_nir_rt_shader.c b/src/amd/vulkan/nir/radv_nir_rt_shader.c index c361d25bfd7..9be833f5c12 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_shader.c +++ b/src/amd/vulkan/nir/radv_nir_rt_shader.c @@ -2075,9 +2075,7 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH b.cursor = nir_after_impl(impl); - if (monolithic) { - nir_terminate(&b); - } else { + if (!monolithic) { /* select next shader */ shader_addr = nir_load_var(&b, vars.shader_addr); nir_def *next = select_next_shader(&b, shader_addr, info->wave_size);