pan/bi: Handle abs packing for fp16/FMA add/min

It's seriously quirky, and all to save a single bit. Alas. It also
introduces an edge case for the scheduler which is a bit annoying.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4382>
This commit is contained in:
Alyssa Rosenzweig
2020-03-27 22:34:15 -04:00
committed by Marge Bot
parent ba8e11f0f1
commit c88f816169
2 changed files with 62 additions and 3 deletions
+61 -2
View File
@@ -512,11 +512,70 @@ bi_pack_fma_addmin_f16(bi_instruction *ins, struct bi_registers *regs)
(ins->op.minmax == BI_MINMAX_MIN) ? BIFROST_FMA_OP_FMIN16 :
BIFROST_FMA_OP_FMAX16;
/* Absolute values are packed in a quirky way. Let k = src1 < src0. Let
* l be an auxiliary bit we encode. Then the hardware determines:
*
* abs0 = l || k
* abs1 = l && k
*
* Since add/min/max are commutative, this saves a bit by using the
* order of the operands as a bit (k). To pack this, first note:
*
* (l && k) implies (l || k).
*
* That is, if the second argument is abs'd, then the first argument
* also has abs. So there are three cases:
*
* Case 0: Neither src has absolute value. Then we have l = k = 0.
*
* Case 1: Exactly one src has absolute value. Assign that source to
* src0 and the other source to src1. Compute k = src1 < src0 based on
* that assignment. Then l = ~k.
*
* Case 2: Both sources have absolute value. Then we have l = k = 1.
* Note to force k = 1 requires that (src1 < src0) OR (src0 < src1).
* That is, this encoding is only valid if src1 and src0 are distinct.
* This is a scheduling restriction (XXX); if an op of this type
* requires both identical sources to have abs value, then we must
* schedule to ADD (which does not use this ordering trick).
*/
unsigned abs_0 = ins->src_abs[0], abs_1 = ins->src_abs[1];
unsigned src_0 = bi_get_src(ins, regs, 0, true);
unsigned src_1 = bi_get_src(ins, regs, 0, true);
bool l = false;
if (!abs_0 && !abs_1) {
/* Force k = 0 <===> NOT(src1 < src0) <==> src1 >= src0 */
if (src_0 < src_1) {
unsigned tmp = src_0;
src_0 = src_1;
src_1 = tmp;
}
} else if (abs_0 && !abs_1) {
l = src_1 >= src_0;
} else if (abs_1 && !abs_0) {
unsigned tmp = src_0;
src_0 = src_1;
src_0 = tmp;
l = src_1 >= src_0;
} else {
if (src_0 >= src_1) {
unsigned tmp = src_0;
src_0 = src_1;
src_1 = tmp;
}
l = true;
}
struct bifrost_fma_add_minmax16 pack = {
.src0 = bi_get_src(ins, regs, 0, true),
.src1 = bi_get_src(ins, regs, 1, true),
.src0 = src_0,
.src1 = src_1,
.src0_neg = ins->src_neg[0],
.src1_neg = ins->src_neg[1],
.abs1 = l,
.outmod = ins->outmod,
.mode = (ins->type == BI_ADD) ? ins->roundmode : ins->minmax,
.op = op
+1 -1
View File
@@ -1929,7 +1929,7 @@ pandecode_shader_disassemble(mali_ptr shader_ptr, int shader_no, int type,
struct midgard_disasm_stats stats;
if (is_bifrost) {
disassemble_bifrost(pandecode_dump_stream, code, sz, false);
disassemble_bifrost(pandecode_dump_stream, code, sz, true);
/* TODO: Extend stats to Bifrost */
stats.texture_count = -128;