agx: split select opt into its own pass

so it runs on the results of b2f lowering.

total instructions in shared programs: 2039862 -> 2039473 (-0.02%)
instructions in affected programs: 12348 -> 11959 (-3.15%)
helped: 84
HURT: 0
Instructions are helped.

total bytes in shared programs: 13986278 -> 13983778 (-0.02%)
bytes in affected programs: 141748 -> 139248 (-1.76%)
helped: 84
HURT: 122
Bytes are helped.

total regs in shared programs: 590371 -> 590373 (<.01%)
regs in affected programs: 195 -> 197 (1.03%)
helped: 5
HURT: 6
Inconclusive result (value mean confidence interval includes 0).

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28483>
This commit is contained in:
Alyssa Rosenzweig
2024-03-03 19:50:26 -04:00
committed by Marge Bot
parent 90b4e27bb2
commit 703e5385eb
3 changed files with 21 additions and 13 deletions
+1
View File
@@ -2497,6 +2497,7 @@ agx_optimize_nir(nir_shader *nir, unsigned *preamble_size)
* do it after fusing constant shifts. Constant folding will clean up.
*/
NIR_PASS(_, nir, agx_nir_lower_algebraic_late);
NIR_PASS(_, nir, agx_nir_fuse_selects);
NIR_PASS(_, nir, nir_opt_constant_folding);
NIR_PASS(_, nir, nir_opt_combine_barriers, NULL, NULL);
+1
View File
@@ -11,6 +11,7 @@ struct nir_shader;
bool agx_nir_lower_interpolation(struct nir_shader *s);
bool agx_nir_lower_algebraic_late(struct nir_shader *shader);
bool agx_nir_fuse_selects(struct nir_shader *shader);
bool agx_nir_fuse_algebraic_late(struct nir_shader *shader);
bool agx_nir_fence_images(struct nir_shader *shader);
bool agx_nir_lower_layer(struct nir_shader *s);
+19 -13
View File
@@ -83,17 +83,7 @@ lower_pack = [
('isub', 32, 'bits'))),
]
# Rewriting bcsel(a || b, ...) in terms of bcsel(a, ...) and bcsel(b, ...) lets
# our rules to fuse compare-and-select do a better job, assuming that a and b
# are comparisons themselves.
lower_selects = [
(('bcsel', ('ior(is_used_once)', a, b), c, d),
('bcsel', a, c, ('bcsel', b, c, d))),
(('bcsel', ('iand(is_used_once)', a, b), c, d),
('bcsel', a, ('bcsel', b, c, d), d)),
]
lower_selects = []
for T, sizes, one in [('f', [16, 32], 1.0),
('i', [8, 16, 32], 1),
('b', [32], -1)]:
@@ -103,6 +93,20 @@ for T, sizes, one in [('f', [16, 32], 1.0),
((f'b2{T}{size}', 'a@1'), ('bcsel', a, one, 0)),
])
# Rewriting bcsel(a || b, ...) in terms of bcsel(a, ...) and bcsel(b, ...) lets
# our rules to fuse compare-and-select do a better job, assuming that a and b
# are comparisons themselves.
#
# This needs to be a separate pass that runs after lower_selects, in order to
# pick up patterns like b2f32(iand(...))
opt_selects = [
(('bcsel', ('ior(is_used_once)', a, b), c, d),
('bcsel', a, c, ('bcsel', b, c, d))),
(('bcsel', ('iand(is_used_once)', a, b), c, d),
('bcsel', a, ('bcsel', b, c, d), d)),
]
fuse_extr = []
for start in range(32):
fuse_extr.extend([
@@ -192,9 +196,11 @@ def run():
print(nir_algebraic.AlgebraicPass("agx_nir_lower_algebraic_late",
lower_sm5_shift + lower_pack +
lower_selects).render())
print(nir_algebraic.AlgebraicPass("agx_nir_fuse_selects",
opt_selects).render())
print(nir_algebraic.AlgebraicPass("agx_nir_fuse_algebraic_late",
fuse_extr + fuse_ubfe + fuse_imad +
ixor_bcsel).render())
fuse_extr + fuse_ubfe +
fuse_imad + ixor_bcsel).render())
if __name__ == '__main__':