diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 1de79f01da8..195c57950c5 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -3463,8 +3463,10 @@ void emit_interp_instr(isel_context *ctx, unsigned idx, unsigned component, Temp Temp coord2 = emit_extract_vector(ctx, src, 1, v1); Builder bld(ctx->program, ctx->block); - Temp tmp = bld.vintrp(aco_opcode::v_interp_p1_f32, bld.def(v1), coord1, bld.m0(prim_mask), idx, component); - bld.vintrp(aco_opcode::v_interp_p2_f32, Definition(dst), coord2, bld.m0(prim_mask), tmp, idx, component); + Builder::Result interp_p1 = bld.vintrp(aco_opcode::v_interp_p1_f32, bld.def(v1), coord1, bld.m0(prim_mask), idx, component); + if (ctx->program->has_16bank_lds) + interp_p1.instr->operands[0].setLateKill(true); + bld.vintrp(aco_opcode::v_interp_p2_f32, Definition(dst), coord2, bld.m0(prim_mask), interp_p1, idx, component); } void emit_load_frag_coord(isel_context *ctx, Temp dst, unsigned num_components) diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index 558c6a7eabb..b2acc5f9cb3 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -1099,6 +1099,9 @@ setup_isel_context(Program* program, program->lds_alloc_granule = args->options->chip_class >= GFX7 ? 512 : 256; program->lds_limit = args->options->chip_class >= GFX7 ? 65536 : 32768; + /* apparently gfx702 also has 16-bank LDS but I can't find a family for that */ + program->has_16bank_lds = args->options->family == CHIP_KABINI || args->options->family == CHIP_STONEY; + program->vgpr_limit = 256; program->vgpr_alloc_granule = 3; diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 92511975a69..5bbe337fe17 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1197,6 +1197,7 @@ public: uint16_t min_waves = 0; uint16_t lds_alloc_granule; uint32_t lds_limit; /* in bytes */ + bool has_16bank_lds; uint16_t vgpr_limit; uint16_t sgpr_limit; uint16_t physical_sgprs;