diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 537e0646e99..a970d2c1aeb 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -12598,6 +12598,11 @@ select_rt_prolog(Program* program, ac_shader_config* config, assert(in_stack_base == out_launch_size_z); assert(in_local_ids[0] == out_launch_ids[0]); + /* = out_args->num_sgprs_used); + /* load raygen sbt */ bld.smem(aco_opcode::s_load_dwordx2, Definition(tmp_raygen_sbt, s2), Operand(in_sbt_desc, s2), Operand::c32(0u)); @@ -12649,15 +12654,6 @@ select_rt_prolog(Program* program, ac_shader_config* config, bld.vop3(aco_opcode::v_mad_u32_u24, Definition(out_launch_ids[0], v1), Operand(in_wg_id_x, s1), Operand::c32(8), Operand(in_local_ids[0], v1)); - if (options->gfx_level < GFX9) { - /* write scratch/ring offsets to outputs, if needed */ - bld.sop1(aco_opcode::s_mov_b32, - Definition(get_arg_reg(out_args, out_args->scratch_offset), s1), - Operand(in_scratch_offset, s1)); - bld.sop1(aco_opcode::s_mov_b64, Definition(get_arg_reg(out_args, out_args->ring_offsets), s2), - Operand(tmp_ring_offsets, s2)); - } - /* calculate shader record ptr: SBT + RADV_RT_HANDLE_SIZE */ if (options->gfx_level < GFX9) { bld.vop2_e64(aco_opcode::v_add_co_u32, Definition(out_record_ptr, v1), Definition(vcc, s2), @@ -12699,6 +12695,15 @@ select_rt_prolog(Program* program, ac_shader_config* config, bld.vop2(aco_opcode::v_cndmask_b32, Definition(out_launch_ids[1], v1), Operand::zero(), Operand(out_launch_ids[1], v1), Operand(vcc, bld.lm)); + if (options->gfx_level < GFX9) { + /* write scratch/ring offsets to outputs, if needed */ + bld.sop1(aco_opcode::s_mov_b32, + Definition(get_arg_reg(out_args, out_args->scratch_offset), s1), + Operand(in_scratch_offset, s1)); + bld.sop1(aco_opcode::s_mov_b64, Definition(get_arg_reg(out_args, out_args->ring_offsets), s2), + Operand(tmp_ring_offsets, s2)); + } + /* jump to raygen */ bld.sop1(aco_opcode::s_setpc_b64, Operand(out_uniform_shader_addr, s2));