aco: implement 16-bit interp

For 16-bit bank LDS (ie. Kabini/Stoney) we need a slightly different
path. It's completely untested though because I don't have these
chips but according to vkpipeline-db the generated assembly seems fine.

Note that 16-bit I/O is currently only exposed on GFX9+ for both
compiler backends.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4966>
This commit is contained in:
Samuel Pitoiset
2020-05-08 16:22:53 +02:00
committed by Marge Bot
parent bbbb4057e6
commit 1647e098e9
+34 -4
View File
@@ -4424,10 +4424,40 @@ void emit_interp_instr(isel_context *ctx, unsigned idx, unsigned component, Temp
Temp coord2 = emit_extract_vector(ctx, src, 1, v1);
Builder bld(ctx->program, ctx->block);
Builder::Result interp_p1 = bld.vintrp(aco_opcode::v_interp_p1_f32, bld.def(v1), coord1, bld.m0(prim_mask), idx, component);
if (ctx->program->has_16bank_lds)
interp_p1.instr->operands[0].setLateKill(true);
bld.vintrp(aco_opcode::v_interp_p2_f32, Definition(dst), coord2, bld.m0(prim_mask), interp_p1, idx, component);
if (dst.regClass() == v2b) {
if (ctx->program->has_16bank_lds) {
assert(ctx->options->chip_class <= GFX8);
Builder::Result interp_p1 =
bld.vintrp(aco_opcode::v_interp_mov_f32, bld.def(v1),
Operand(2u) /* P0 */, bld.m0(prim_mask), idx, component);
interp_p1 = bld.vintrp(aco_opcode::v_interp_p1lv_f16, bld.def(v2b),
coord1, bld.m0(prim_mask), interp_p1, idx, component);
bld.vintrp(aco_opcode::v_interp_p2_legacy_f16, Definition(dst), coord2,
bld.m0(prim_mask), interp_p1, idx, component);
} else {
aco_opcode interp_p2_op = aco_opcode::v_interp_p2_f16;
if (ctx->options->chip_class == GFX8)
interp_p2_op = aco_opcode::v_interp_p2_legacy_f16;
Builder::Result interp_p1 =
bld.vintrp(aco_opcode::v_interp_p1ll_f16, bld.def(v1),
coord1, bld.m0(prim_mask), idx, component);
bld.vintrp(interp_p2_op, Definition(dst), coord2, bld.m0(prim_mask),
interp_p1, idx, component);
}
} else {
Builder::Result interp_p1 =
bld.vintrp(aco_opcode::v_interp_p1_f32, bld.def(v1), coord1,
bld.m0(prim_mask), idx, component);
if (ctx->program->has_16bank_lds)
interp_p1.instr->operands[0].setLateKill(true);
bld.vintrp(aco_opcode::v_interp_p2_f32, Definition(dst), coord2,
bld.m0(prim_mask), interp_p1, idx, component);
}
}
void emit_load_frag_coord(isel_context *ctx, Temp dst, unsigned num_components)