From 08e0c5a9cf14b14c3db654ea43cc51182b1f780e Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 30 Aug 2023 10:19:19 -0400 Subject: [PATCH] agx: Fuse compares into selects This lets us save a LOT of instructions at the cost of increased register pressure. However, on my shader-db, this is still coming out ahead since no shaders are hurt for thread count/spills, and only 1/10 of the shaders helped for instruction count are hurt for register pressure. The shaders most hurt for pressure have very low pressure (7 -> 15 is the worst case) and you need a certain number of registers to use a 4 source instruction at all. Analyzing the hurt shaders, nothing concerns me too much ... this isn't as bad as I feared. So I think at this point it's worth ripping off the bandage, given the massive potential for instruction count win. This is a big improvement for some of the shaders I'm working on for my $SECRET_PROJECT. total instructions in shared programs: 1784943 -> 1775169 (-0.55%) instructions in affected programs: 644211 -> 634437 (-1.52%) helped: 3498 HURT: 38 Instructions are helped. total bytes in shared programs: 11720734 -> 11643224 (-0.66%) bytes in affected programs: 4370986 -> 4293476 (-1.77%) helped: 3572 HURT: 36 Bytes are helped. total halfregs in shared programs: 474094 -> 475165 (0.23%) halfregs in affected programs: 12821 -> 13892 (8.35%) helped: 65 HURT: 247 Halfregs are HURT. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/compiler/agx_optimizer.c | 49 ++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/src/asahi/compiler/agx_optimizer.c b/src/asahi/compiler/agx_optimizer.c index 88149ef9c4a..f017a53c58a 100644 --- a/src/asahi/compiler/agx_optimizer.c +++ b/src/asahi/compiler/agx_optimizer.c @@ -275,6 +275,53 @@ agx_optimizer_if_cmp(agx_instr **defs, agx_instr *I) } } +/* + * Fuse conditions into select. Specifically, acts on icmpsel and fuses: + * + * icmpsel(cmp(x, y, *), 0, z, w, eq) -> cmpsel(x, y, w, z, *) + * + * Care must be taken to invert the condition by swapping cmpsel arguments. + */ +static void +agx_optimizer_cmpsel(agx_instr **defs, agx_instr *I) +{ + /* Check for unfused select */ + if (!agx_is_equiv(I->src[1], agx_zero()) || I->icond != AGX_ICOND_UEQ || + I->src[0].type != AGX_INDEX_NORMAL) + return; + + /* Check for condition */ + agx_instr *def = defs[I->src[0].value]; + if (def->op != AGX_OPCODE_ICMP && def->op != AGX_OPCODE_FCMP) + return; + + /* Fuse */ + I->src[0] = def->src[0]; + I->src[1] = def->src[1]; + + /* In the unfused select, the condition is inverted due to the form: + * + * (cond == 0) ? x : y + * + * So we need to swap the arguments when fusing to become cond ? y : x. If + * the condition was supposed to be inverted, we don't swap since it's + * already inverted. cmpsel does not have an invert_cond bit to use. + */ + if (!def->invert_cond) { + agx_index temp = I->src[2]; + I->src[2] = I->src[3]; + I->src[3] = temp; + } + + if (def->op == AGX_OPCODE_ICMP) { + I->op = AGX_OPCODE_ICMPSEL; + I->icond = def->icond; + } else { + I->op = AGX_OPCODE_FCMPSEL; + I->fcond = def->fcond; + } +} + static void agx_optimizer_forward(agx_context *ctx) { @@ -304,6 +351,8 @@ agx_optimizer_forward(agx_context *ctx) if (I->op == AGX_OPCODE_IF_ICMP) agx_optimizer_if_cmp(defs, I); + else if (I->op == AGX_OPCODE_ICMPSEL) + agx_optimizer_cmpsel(defs, I); } free(defs);