intel/vec4: sel.cond writes the flags on Gfx4 and Gfx5

This is the equivalent of idr's
intel/fs: sel.cond writes the flags on Gfx4 and Gfx5

except for the vec4 backend.

This fixes buggy rendering seen with crocus on a qt trace.

v2 (idr): Trivial whitespace change.  Add unit tests.

v3: Fix type in comment in unit tests.  Noticed by Jason and Priit.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>

Iron Lake
total instructions in shared programs: 8183077 -> 8184543 (0.02%)
instructions in affected programs: 198990 -> 200456 (0.74%)
helped: 0
HURT: 1355
HURT stats (abs)   min: 1 max: 8 x̄: 1.08 x̃: 1
HURT stats (rel)   min: 0.29% max: 6.00% x̄: 0.99% x̃: 0.70%
95% mean confidence interval for instructions value: 1.04 1.12
95% mean confidence interval for instructions %-change: 0.96% 1.03%
Instructions are HURT.

total cycles in shared programs: 238967672 -> 238962784 (<.01%)
cycles in affected programs: 4666014 -> 4661126 (-0.10%)
helped: 406
HURT: 314
helped stats (abs) min: 4 max: 54 x̄: 22.46 x̃: 18
helped stats (rel) min: <.01% max: 12.80% x̄: 1.82% x̃: 0.65%
HURT stats (abs)   min: 2 max: 112 x̄: 13.48 x̃: 12
HURT stats (rel)   min: <.01% max: 7.82% x̄: 0.81% x̃: 0.16%
95% mean confidence interval for cycles value: -8.60 -4.98
95% mean confidence interval for cycles %-change: -0.87% -0.49%
Cycles are helped.

GM45
total instructions in shared programs: 4986888 -> 4988354 (0.03%)
instructions in affected programs: 198990 -> 200456 (0.74%)
helped: 0
HURT: 1355
HURT stats (abs)   min: 1 max: 8 x̄: 1.08 x̃: 1
HURT stats (rel)   min: 0.29% max: 6.00% x̄: 0.99% x̃: 0.70%
95% mean confidence interval for instructions value: 1.04 1.12
95% mean confidence interval for instructions %-change: 0.96% 1.03%
Instructions are HURT.

total cycles in shared programs: 153577826 -> 153572938 (<.01%)
cycles in affected programs: 4666014 -> 4661126 (-0.10%)
helped: 406
HURT: 314
helped stats (abs) min: 4 max: 54 x̄: 22.46 x̃: 18
helped stats (rel) min: <.01% max: 12.80% x̄: 1.82% x̃: 0.65%
HURT stats (abs)   min: 2 max: 112 x̄: 13.48 x̃: 12
HURT stats (rel)   min: <.01% max: 7.82% x̄: 0.81% x̃: 0.16%
95% mean confidence interval for cycles value: -8.60 -4.98
95% mean confidence interval for cycles %-change: -0.87% -0.49%
Cycles are helped.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12191>
This commit is contained in:
Dave Airlie
2021-08-04 17:38:12 +10:00
committed by Ian Romanick
parent 38807ceeae
commit 8a81d14271
10 changed files with 167 additions and 17 deletions
@@ -109,7 +109,7 @@ void cmod_propagation_test::SetUp()
v = new cmod_propagation_vec4_visitor(compiler, ctx, shader, prog_data);
devinfo->ver = 4;
devinfo->ver = 7;
devinfo->verx10 = devinfo->ver * 10;
}
@@ -905,3 +905,149 @@ TEST_F(cmod_propagation_test, add_cmp_different_dst_writemask)
EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
}
TEST_F(cmod_propagation_test, prop_across_sel_gfx7)
{
const vec4_builder bld = vec4_builder(v).at_end();
dst_reg dest1 = dst_reg(v, glsl_type::float_type);
dst_reg dest2 = dst_reg(v, glsl_type::float_type);
src_reg src0 = src_reg(v, glsl_type::float_type);
src_reg src1 = src_reg(v, glsl_type::float_type);
src_reg src2 = src_reg(v, glsl_type::float_type);
src_reg src3 = src_reg(v, glsl_type::float_type);
src_reg zero(brw_imm_f(0.0f));
dst_reg dest_null = bld.null_reg_f();
dest_null.writemask = WRITEMASK_X;
bld.ADD(dest1, src0, src1);
bld.SEL(dest2, src2, src3)
->conditional_mod = BRW_CONDITIONAL_GE;
bld.CMP(dest_null, src_reg(dest1), zero, BRW_CONDITIONAL_GE);
/* = Before =
*
* 0: add dest1.x src0.xxxx src1.xxxx
* 1: sel.ge.f0 dest2.x src2.xxxx src3.xxxx
* 2: cmp.ge.f0 null.x dest.xxxx 0.0f
*
* = After =
* 0: add.ge.f0 dest.x src0.xxxx src1.xxxx
* 1: sel.ge.f0 dest2.x src2.xxxx src3.xxxx
*/
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(2, block0->end_ip);
EXPECT_TRUE(cmod_propagation(v));
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(1, block0->end_ip);
EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode);
EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
}
TEST_F(cmod_propagation_test, prop_across_sel_gfx5)
{
devinfo->ver = 5;
devinfo->verx10 = devinfo->ver * 10;
const vec4_builder bld = vec4_builder(v).at_end();
dst_reg dest1 = dst_reg(v, glsl_type::float_type);
dst_reg dest2 = dst_reg(v, glsl_type::float_type);
src_reg src0 = src_reg(v, glsl_type::float_type);
src_reg src1 = src_reg(v, glsl_type::float_type);
src_reg src2 = src_reg(v, glsl_type::float_type);
src_reg src3 = src_reg(v, glsl_type::float_type);
src_reg zero(brw_imm_f(0.0f));
dst_reg dest_null = bld.null_reg_f();
dest_null.writemask = WRITEMASK_X;
bld.ADD(dest1, src0, src1);
bld.SEL(dest2, src2, src3)
->conditional_mod = BRW_CONDITIONAL_GE;
bld.CMP(dest_null, src_reg(dest1), zero, BRW_CONDITIONAL_GE);
/* = Before =
*
* 0: add dest1.x src0.xxxx src1.xxxx
* 1: sel.ge.f0 dest2.x src2.xxxx src3.xxxx
* 2: cmp.ge.f0 null.x dest.xxxx 0.0f
*
* = After =
* (no changes)
*
* On Gfx4 and Gfx5, sel.l (for min) and sel.ge (for max) are implemented
* using a separate cmpn and sel instruction. This lowering occurs in
* fs_vistor::lower_minmax which is called a long time after the first
* calls to cmod_propagation.
*/
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(2, block0->end_ip);
EXPECT_FALSE(cmod_propagation(v));
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(2, block0->end_ip);
EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode);
EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode);
EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod);
}
TEST_F(cmod_propagation_test, prop_into_sel_gfx5)
{
devinfo->ver = 5;
devinfo->verx10 = devinfo->ver * 10;
const vec4_builder bld = vec4_builder(v).at_end();
dst_reg dest = dst_reg(v, glsl_type::float_type);
src_reg src0 = src_reg(v, glsl_type::float_type);
src_reg src1 = src_reg(v, glsl_type::float_type);
src_reg zero(brw_imm_f(0.0f));
dst_reg dest_null = bld.null_reg_f();
dest_null.writemask = WRITEMASK_X;
bld.SEL(dest, src0, src1)
->conditional_mod = BRW_CONDITIONAL_GE;
bld.CMP(dest_null, src_reg(dest), zero, BRW_CONDITIONAL_GE);
/* = Before =
*
* 0: sel.ge.f0 dest.x src2.xxxx src3.xxxx
* 1: cmp.ge.f0 null.x dest.xxxx 0.0f
*
* = After =
* (no changes)
*
* Do not copy propagate into a sel.cond instruction. While it does modify
* the flags, the flags are not based on the result compared with zero (as
* with most other instructions). The result is based on the sources
* compared with each other (like cmp.cond).
*/
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(1, block0->end_ip);
EXPECT_FALSE(cmod_propagation(v));
ASSERT_EQ(0, block0->start_ip);
ASSERT_EQ(1, block0->end_ip);
EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 0)->opcode);
EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
}