25c302d337
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31908>
458 lines
14 KiB
C++
458 lines
14 KiB
C++
/*
|
|
* Copyright 2021 Collabora, Ltd.
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
#include "agx_builder.h"
|
|
#include "agx_compiler.h"
|
|
#include "agx_test.h"
|
|
|
|
#include <gtest/gtest.h>
|
|
|
|
static void
|
|
agx_optimize_and_dce(agx_context *ctx)
|
|
{
|
|
agx_optimizer_backward(ctx);
|
|
agx_optimizer_forward(ctx);
|
|
agx_dce(ctx, true);
|
|
}
|
|
|
|
#define CASE(instr, expected, size, returns) \
|
|
INSTRUCTION_CASE( \
|
|
{ \
|
|
UNUSED agx_index out = agx_temp(b->shader, AGX_SIZE_##size); \
|
|
instr; \
|
|
if (returns) \
|
|
agx_unit_test(b, out); \
|
|
}, \
|
|
{ \
|
|
UNUSED agx_index out = agx_temp(b->shader, AGX_SIZE_##size); \
|
|
expected; \
|
|
if (returns) \
|
|
agx_unit_test(b, out); \
|
|
}, \
|
|
agx_optimize_and_dce)
|
|
|
|
#define NEGCASE(instr, size) CASE(instr, instr, size, true)
|
|
|
|
#define CASE16(instr, expected) CASE(instr, expected, 16, true)
|
|
#define CASE32(instr, expected) CASE(instr, expected, 32, true)
|
|
#define CASE64(instr, expected) CASE(instr, expected, 64, true)
|
|
|
|
#define CASE_NO_RETURN(instr, expected) \
|
|
CASE(instr, expected, 32 /* irrelevant */, false)
|
|
|
|
#define NEGCASE16(instr) NEGCASE(instr, 16)
|
|
#define NEGCASE32(instr) NEGCASE(instr, 32)
|
|
|
|
static inline agx_index
|
|
agx_fmov(agx_builder *b, agx_index s0)
|
|
{
|
|
agx_index tmp = agx_temp(b->shader, s0.size);
|
|
agx_fmov_to(b, tmp, s0);
|
|
return tmp;
|
|
}
|
|
|
|
class Optimizer : public testing::Test {
|
|
protected:
|
|
Optimizer()
|
|
{
|
|
mem_ctx = ralloc_context(NULL);
|
|
|
|
dx = agx_register(0, AGX_SIZE_64);
|
|
dz = agx_register(4, AGX_SIZE_64);
|
|
|
|
wx = agx_register(0, AGX_SIZE_32);
|
|
wy = agx_register(2, AGX_SIZE_32);
|
|
wz = agx_register(4, AGX_SIZE_32);
|
|
|
|
hx = agx_register(0, AGX_SIZE_16);
|
|
hy = agx_register(1, AGX_SIZE_16);
|
|
hz = agx_register(2, AGX_SIZE_16);
|
|
}
|
|
|
|
~Optimizer()
|
|
{
|
|
ralloc_free(mem_ctx);
|
|
}
|
|
|
|
void *mem_ctx;
|
|
|
|
agx_index dx, dz, wx, wy, wz, hx, hy, hz;
|
|
};
|
|
|
|
TEST_F(Optimizer, FloatCopyprop)
|
|
{
|
|
CASE32(agx_fadd_to(b, out, agx_abs(agx_fmov(b, wx)), wy),
|
|
agx_fadd_to(b, out, agx_abs(wx), wy));
|
|
|
|
CASE32(agx_fadd_to(b, out, agx_neg(agx_fmov(b, wx)), wy),
|
|
agx_fadd_to(b, out, agx_neg(wx), wy));
|
|
}
|
|
|
|
TEST_F(Optimizer, FloatConversion)
|
|
{
|
|
CASE32(
|
|
{
|
|
agx_index cvt = agx_temp(b->shader, AGX_SIZE_32);
|
|
agx_fmov_to(b, cvt, hx);
|
|
agx_fadd_to(b, out, cvt, wy);
|
|
},
|
|
{ agx_fadd_to(b, out, hx, wy); });
|
|
|
|
CASE16(
|
|
{
|
|
agx_index sum = agx_temp(b->shader, AGX_SIZE_32);
|
|
agx_fadd_to(b, sum, wx, wy);
|
|
agx_fmov_to(b, out, sum);
|
|
},
|
|
{ agx_fadd_to(b, out, wx, wy); });
|
|
}
|
|
|
|
TEST_F(Optimizer, FusedFABSNEG)
|
|
{
|
|
CASE32(agx_fadd_to(b, out, agx_fmov(b, agx_abs(wx)), wy),
|
|
agx_fadd_to(b, out, agx_abs(wx), wy));
|
|
|
|
CASE32(agx_fmul_to(b, out, wx, agx_fmov(b, agx_neg(agx_abs(wx)))),
|
|
agx_fmul_to(b, out, wx, agx_neg(agx_abs(wx))));
|
|
}
|
|
|
|
TEST_F(Optimizer, FusedFabsAbsorb)
|
|
{
|
|
CASE32(agx_fadd_to(b, out, agx_abs(agx_fmov(b, agx_abs(wx))), wy),
|
|
agx_fadd_to(b, out, agx_abs(wx), wy));
|
|
}
|
|
|
|
TEST_F(Optimizer, FusedFnegCancel)
|
|
{
|
|
CASE32(agx_fmul_to(b, out, wx, agx_neg(agx_fmov(b, agx_neg(wx)))),
|
|
agx_fmul_to(b, out, wx, wx));
|
|
|
|
CASE32(agx_fmul_to(b, out, wx, agx_neg(agx_fmov(b, agx_neg(agx_abs(wx))))),
|
|
agx_fmul_to(b, out, wx, agx_abs(wx)));
|
|
}
|
|
|
|
TEST_F(Optimizer, FusedNot)
|
|
{
|
|
CASE32(agx_not_to(b, out, agx_and(b, wx, wx)), agx_nand_to(b, out, wx, wx));
|
|
|
|
CASE32(agx_not_to(b, out, agx_or(b, wx, wx)), agx_nor_to(b, out, wx, wx));
|
|
|
|
CASE32(agx_not_to(b, out, agx_xor(b, wx, wx)), agx_xnor_to(b, out, wx, wx));
|
|
|
|
CASE32(agx_xor_to(b, out, agx_not(b, wx), agx_not(b, wx)),
|
|
agx_xor_to(b, out, wx, wx));
|
|
|
|
CASE32(agx_xor_to(b, out, agx_not(b, wx), wx), agx_xnor_to(b, out, wx, wx));
|
|
|
|
CASE32(agx_xor_to(b, out, wx, agx_not(b, wx)), agx_xnor_to(b, out, wx, wx));
|
|
|
|
CASE32(agx_nand_to(b, out, agx_not(b, wx), agx_not(b, wx)),
|
|
agx_or_to(b, out, wx, wx));
|
|
|
|
CASE32(agx_andn1_to(b, out, agx_not(b, wx), wx), agx_and_to(b, out, wx, wx));
|
|
|
|
CASE32(agx_andn1_to(b, out, wx, agx_not(b, wx)), agx_nor_to(b, out, wx, wx));
|
|
|
|
CASE32(agx_andn2_to(b, out, agx_not(b, wx), wx), agx_nor_to(b, out, wx, wx));
|
|
|
|
CASE32(agx_andn2_to(b, out, wx, agx_not(b, wx)), agx_and_to(b, out, wx, wx));
|
|
|
|
CASE32(agx_xor_to(b, out, agx_not(b, wx), agx_uniform(8, AGX_SIZE_32)),
|
|
agx_xnor_to(b, out, wx, agx_uniform(8, AGX_SIZE_32)));
|
|
|
|
CASE32(agx_or_to(b, out, agx_immediate(123), agx_not(b, wx)),
|
|
agx_orn2_to(b, out, agx_immediate(123), wx));
|
|
|
|
CASE32(agx_xor_to(b, out, wx, agx_not(b, wy)), agx_xnor_to(b, out, wx, wy));
|
|
|
|
CASE32(agx_xor_to(b, out, wy, agx_not(b, wx)), agx_xnor_to(b, out, wy, wx));
|
|
|
|
CASE32(agx_and_to(b, out, agx_not(b, wx), wy), agx_andn1_to(b, out, wx, wy));
|
|
|
|
CASE32(agx_or_to(b, out, wx, agx_not(b, wy)), agx_orn2_to(b, out, wx, wy));
|
|
}
|
|
|
|
TEST_F(Optimizer, FmulFsatF2F16)
|
|
{
|
|
CASE16(
|
|
{
|
|
agx_index tmp = agx_temp(b->shader, AGX_SIZE_32);
|
|
agx_fmov_to(b, tmp, agx_fmul(b, wx, wy))->saturate = true;
|
|
agx_fmov_to(b, out, tmp);
|
|
},
|
|
{ agx_fmul_to(b, out, wx, wy)->saturate = true; });
|
|
}
|
|
|
|
TEST_F(Optimizer, FsatWithPhi)
|
|
{
|
|
/*
|
|
* Construct the loop:
|
|
*
|
|
* A:
|
|
* ...
|
|
*
|
|
* B:
|
|
* phi ..., u
|
|
* u = wx * phi
|
|
* out = fsat u
|
|
* --> B
|
|
*
|
|
* This example shows that phi sources are read at the end of the
|
|
* predecessor, not at the start of the successor. If phis are not handled
|
|
* properly, the fsat would be fused incorrectly.
|
|
*
|
|
* This reproduces an issue hit in a Control shader. Astonishingly, it is not
|
|
* hit anywhere in CTS.
|
|
*/
|
|
NEGCASE32({
|
|
agx_block *A = agx_start_block(b->shader);
|
|
agx_block *B = agx_test_block(b->shader);
|
|
|
|
agx_block_add_successor(A, B);
|
|
agx_block_add_successor(B, B);
|
|
|
|
b->cursor = agx_after_block(B);
|
|
agx_index u = agx_temp(b->shader, AGX_SIZE_32);
|
|
|
|
agx_instr *phi = agx_phi_to(b, agx_temp(b->shader, AGX_SIZE_32), 2);
|
|
phi->src[0] = wx;
|
|
phi->src[1] = u;
|
|
|
|
agx_fmul_to(b, u, wx, phi->dest[0]);
|
|
agx_fmov_to(b, out, u)->saturate = true;
|
|
});
|
|
}
|
|
|
|
TEST_F(Optimizer, Copyprop)
|
|
{
|
|
CASE32(agx_fmul_to(b, out, wx, agx_mov(b, wy)), agx_fmul_to(b, out, wx, wy));
|
|
CASE32(agx_fmul_to(b, out, agx_mov(b, wx), agx_mov(b, wy)),
|
|
agx_fmul_to(b, out, wx, wy));
|
|
}
|
|
|
|
TEST_F(Optimizer, SubInlineImmediate)
|
|
{
|
|
CASE16(agx_iadd_to(b, out, hx, agx_mov_imm(b, 16, -2), 0),
|
|
agx_iadd_to(b, out, hx, agx_neg(agx_immediate(2)), 0));
|
|
|
|
CASE32(agx_iadd_to(b, out, wx, agx_mov_imm(b, 32, -1), 0),
|
|
agx_iadd_to(b, out, wx, agx_neg(agx_immediate(1)), 0));
|
|
|
|
CASE64(agx_iadd_to(b, out, dx, agx_mov_imm(b, 64, -17), 0),
|
|
agx_iadd_to(b, out, dx, agx_neg(agx_immediate(17)), 0));
|
|
|
|
CASE16(agx_imad_to(b, out, hx, hy, agx_mov_imm(b, 16, -2), 0),
|
|
agx_imad_to(b, out, hx, hy, agx_neg(agx_immediate(2)), 0));
|
|
|
|
CASE32(agx_imad_to(b, out, wx, wy, agx_mov_imm(b, 32, -1), 0),
|
|
agx_imad_to(b, out, wx, wy, agx_neg(agx_immediate(1)), 0));
|
|
|
|
CASE64(agx_imad_to(b, out, dx, dz, agx_mov_imm(b, 64, -17), 0),
|
|
agx_imad_to(b, out, dx, dz, agx_neg(agx_immediate(17)), 0));
|
|
}
|
|
|
|
TEST_F(Optimizer, InlineHazards)
|
|
{
|
|
NEGCASE32({
|
|
agx_index zero = agx_mov_imm(b, AGX_SIZE_32, 0);
|
|
agx_instr *I = agx_collect_to(b, out, 4);
|
|
|
|
I->src[0] = zero;
|
|
I->src[1] = wy;
|
|
I->src[2] = wz;
|
|
I->src[3] = wz;
|
|
});
|
|
}
|
|
|
|
TEST_F(Optimizer, CopypropRespectsAbsNeg)
|
|
{
|
|
CASE32(agx_fadd_to(b, out, agx_abs(agx_mov(b, wx)), wy),
|
|
agx_fadd_to(b, out, agx_abs(wx), wy));
|
|
|
|
CASE32(agx_fadd_to(b, out, agx_neg(agx_mov(b, wx)), wy),
|
|
agx_fadd_to(b, out, agx_neg(wx), wy));
|
|
|
|
CASE32(agx_fadd_to(b, out, agx_neg(agx_abs(agx_mov(b, wx))), wy),
|
|
agx_fadd_to(b, out, agx_neg(agx_abs(wx)), wy));
|
|
}
|
|
|
|
TEST_F(Optimizer, IntCopyprop)
|
|
{
|
|
CASE32(agx_xor_to(b, out, agx_mov(b, wx), wy), agx_xor_to(b, out, wx, wy));
|
|
}
|
|
|
|
TEST_F(Optimizer, CopypropSplitMovedUniform64)
|
|
{
|
|
CASE32(
|
|
{
|
|
/* emit_load_preamble puts in the move, so we do too */
|
|
agx_index mov = agx_mov(b, agx_uniform(40, AGX_SIZE_64));
|
|
agx_instr *spl = agx_split(b, 2, mov);
|
|
spl->dest[0] = agx_temp(b->shader, AGX_SIZE_32);
|
|
spl->dest[1] = agx_temp(b->shader, AGX_SIZE_32);
|
|
agx_xor_to(b, out, spl->dest[0], spl->dest[1]);
|
|
},
|
|
{
|
|
agx_xor_to(b, out, agx_uniform(40, AGX_SIZE_32),
|
|
agx_uniform(42, AGX_SIZE_32));
|
|
});
|
|
}
|
|
|
|
TEST_F(Optimizer, IntCopypropDoesntConvert)
|
|
{
|
|
NEGCASE32({
|
|
agx_index cvt = agx_temp(b->shader, AGX_SIZE_32);
|
|
agx_mov_to(b, cvt, hx);
|
|
agx_xor_to(b, out, cvt, wy);
|
|
});
|
|
}
|
|
|
|
TEST_F(Optimizer, SkipPreloads)
|
|
{
|
|
NEGCASE32({
|
|
agx_index preload = agx_preload(b, agx_register(0, AGX_SIZE_32));
|
|
agx_xor_to(b, out, preload, wy);
|
|
});
|
|
}
|
|
|
|
TEST_F(Optimizer, NoConversionsOn16BitALU)
|
|
{
|
|
NEGCASE16({
|
|
agx_index cvt = agx_temp(b->shader, AGX_SIZE_16);
|
|
agx_fmov_to(b, cvt, wx);
|
|
agx_fadd_to(b, out, cvt, hy);
|
|
});
|
|
|
|
NEGCASE32(agx_fmov_to(b, out, agx_fadd(b, hx, hy)));
|
|
}
|
|
|
|
TEST_F(Optimizer, BallotCondition)
|
|
{
|
|
CASE32(agx_ballot_to(b, out, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true)),
|
|
agx_icmp_ballot_to(b, out, wx, wy, AGX_ICOND_UEQ, true));
|
|
|
|
CASE32(agx_ballot_to(b, out, agx_fcmp(b, wx, wy, AGX_FCOND_GE, false)),
|
|
agx_fcmp_ballot_to(b, out, wx, wy, AGX_FCOND_GE, false));
|
|
|
|
CASE32(agx_quad_ballot_to(b, out, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true)),
|
|
agx_icmp_quad_ballot_to(b, out, wx, wy, AGX_ICOND_UEQ, true));
|
|
|
|
CASE32(agx_quad_ballot_to(b, out, agx_fcmp(b, wx, wy, AGX_FCOND_GT, false)),
|
|
agx_fcmp_quad_ballot_to(b, out, wx, wy, AGX_FCOND_GT, false));
|
|
}
|
|
|
|
TEST_F(Optimizer, BallotMultipleUses)
|
|
{
|
|
CASE32(
|
|
{
|
|
agx_index cmp = agx_fcmp(b, wx, wy, AGX_FCOND_GT, false);
|
|
agx_index ballot = agx_quad_ballot(b, cmp);
|
|
agx_fadd_to(b, out, cmp, ballot);
|
|
},
|
|
{
|
|
agx_index cmp = agx_fcmp(b, wx, wy, AGX_FCOND_GT, false);
|
|
agx_index ballot =
|
|
agx_fcmp_quad_ballot(b, wx, wy, AGX_FCOND_GT, false);
|
|
agx_fadd_to(b, out, cmp, ballot);
|
|
});
|
|
}
|
|
|
|
/*
|
|
* We had a bug where the ballot optimization didn't check the agx_index's type
|
|
* so would fuse constants with overlapping values. An unrelated common code
|
|
* change surfaced this in CTS case:
|
|
*
|
|
* dEQP-VK.subgroups.vote.frag_helper.subgroupallequal_bool_fragment
|
|
*
|
|
* We passed Vulkan CTS without hitting it though, hence the targeted test.
|
|
*/
|
|
TEST_F(Optimizer, BallotConstant)
|
|
{
|
|
CASE32(
|
|
{
|
|
agx_index cmp = agx_fcmp(b, wx, wy, AGX_FCOND_GT, false);
|
|
agx_index ballot = agx_quad_ballot(b, agx_immediate(cmp.value));
|
|
agx_index ballot2 = agx_quad_ballot(b, cmp);
|
|
agx_fadd_to(b, out, ballot, agx_fadd(b, ballot2, cmp));
|
|
},
|
|
{
|
|
agx_index cmp = agx_fcmp(b, wx, wy, AGX_FCOND_GT, false);
|
|
agx_index ballot = agx_quad_ballot(b, agx_immediate(cmp.value));
|
|
agx_index ballot2 =
|
|
agx_fcmp_quad_ballot(b, wx, wy, AGX_FCOND_GT, false);
|
|
agx_fadd_to(b, out, ballot, agx_fadd(b, ballot2, cmp));
|
|
});
|
|
}
|
|
|
|
TEST_F(Optimizer, IfCondition)
|
|
{
|
|
CASE_NO_RETURN(agx_if_icmp(b, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true),
|
|
agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
|
|
agx_if_icmp(b, wx, wy, 1, AGX_ICOND_UEQ, true, NULL));
|
|
|
|
CASE_NO_RETURN(agx_if_icmp(b, agx_fcmp(b, wx, wy, AGX_FCOND_EQ, true),
|
|
agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
|
|
agx_if_fcmp(b, wx, wy, 1, AGX_FCOND_EQ, true, NULL));
|
|
|
|
CASE_NO_RETURN(agx_if_icmp(b, agx_fcmp(b, hx, hy, AGX_FCOND_LT, false),
|
|
agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
|
|
agx_if_fcmp(b, hx, hy, 1, AGX_FCOND_LT, false, NULL));
|
|
}
|
|
|
|
TEST_F(Optimizer, SelectCondition)
|
|
{
|
|
CASE32(agx_icmpsel_to(b, out, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, false),
|
|
agx_zero(), wz, wx, AGX_ICOND_UEQ),
|
|
agx_icmpsel_to(b, out, wx, wy, wx, wz, AGX_ICOND_UEQ));
|
|
|
|
CASE32(agx_icmpsel_to(b, out, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true),
|
|
agx_zero(), wz, wx, AGX_ICOND_UEQ),
|
|
agx_icmpsel_to(b, out, wx, wy, wz, wx, AGX_ICOND_UEQ));
|
|
|
|
CASE32(agx_icmpsel_to(b, out, agx_fcmp(b, wx, wy, AGX_FCOND_EQ, false),
|
|
agx_zero(), wz, wx, AGX_ICOND_UEQ),
|
|
agx_fcmpsel_to(b, out, wx, wy, wx, wz, AGX_FCOND_EQ));
|
|
|
|
CASE32(agx_icmpsel_to(b, out, agx_fcmp(b, wx, wy, AGX_FCOND_LT, true),
|
|
agx_zero(), wz, wx, AGX_ICOND_UEQ),
|
|
agx_fcmpsel_to(b, out, wx, wy, wz, wx, AGX_FCOND_LT));
|
|
}
|
|
|
|
TEST_F(Optimizer, IfInverted)
|
|
{
|
|
CASE_NO_RETURN(
|
|
agx_if_icmp(b, agx_xor(b, hx, agx_immediate(1)), agx_zero(), 1,
|
|
AGX_ICOND_UEQ, true, NULL),
|
|
agx_if_icmp(b, hx, agx_zero(), 1, AGX_ICOND_UEQ, false, NULL));
|
|
|
|
CASE_NO_RETURN(agx_if_icmp(b, agx_xor(b, hx, agx_immediate(1)), agx_zero(),
|
|
1, AGX_ICOND_UEQ, false, NULL),
|
|
agx_if_icmp(b, hx, agx_zero(), 1, AGX_ICOND_UEQ, true, NULL));
|
|
}
|
|
|
|
TEST_F(Optimizer, IfInvertedCondition)
|
|
{
|
|
CASE_NO_RETURN(
|
|
agx_if_icmp(
|
|
b,
|
|
agx_xor(b, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true), agx_immediate(1)),
|
|
agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
|
|
agx_if_icmp(b, wx, wy, 1, AGX_ICOND_UEQ, false, NULL));
|
|
|
|
CASE_NO_RETURN(
|
|
agx_if_icmp(
|
|
b,
|
|
agx_xor(b, agx_fcmp(b, wx, wy, AGX_FCOND_EQ, true), agx_immediate(1)),
|
|
agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
|
|
agx_if_fcmp(b, wx, wy, 1, AGX_FCOND_EQ, false, NULL));
|
|
|
|
CASE_NO_RETURN(
|
|
agx_if_icmp(
|
|
b,
|
|
agx_xor(b, agx_fcmp(b, hx, hy, AGX_FCOND_LT, false), agx_immediate(1)),
|
|
agx_zero(), 1, AGX_ICOND_UEQ, true, NULL),
|
|
agx_if_fcmp(b, hx, hy, 1, AGX_FCOND_LT, true, NULL));
|
|
}
|