agx: split select opt into its own pass
so it runs on the results of b2f lowering. total instructions in shared programs: 2039862 -> 2039473 (-0.02%) instructions in affected programs: 12348 -> 11959 (-3.15%) helped: 84 HURT: 0 Instructions are helped. total bytes in shared programs: 13986278 -> 13983778 (-0.02%) bytes in affected programs: 141748 -> 139248 (-1.76%) helped: 84 HURT: 122 Bytes are helped. total regs in shared programs: 590371 -> 590373 (<.01%) regs in affected programs: 195 -> 197 (1.03%) helped: 5 HURT: 6 Inconclusive result (value mean confidence interval includes 0). Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28483>
This commit is contained in:
committed by
Marge Bot
parent
90b4e27bb2
commit
703e5385eb
@@ -2497,6 +2497,7 @@ agx_optimize_nir(nir_shader *nir, unsigned *preamble_size)
|
||||
* do it after fusing constant shifts. Constant folding will clean up.
|
||||
*/
|
||||
NIR_PASS(_, nir, agx_nir_lower_algebraic_late);
|
||||
NIR_PASS(_, nir, agx_nir_fuse_selects);
|
||||
NIR_PASS(_, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(_, nir, nir_opt_combine_barriers, NULL, NULL);
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ struct nir_shader;
|
||||
|
||||
bool agx_nir_lower_interpolation(struct nir_shader *s);
|
||||
bool agx_nir_lower_algebraic_late(struct nir_shader *shader);
|
||||
bool agx_nir_fuse_selects(struct nir_shader *shader);
|
||||
bool agx_nir_fuse_algebraic_late(struct nir_shader *shader);
|
||||
bool agx_nir_fence_images(struct nir_shader *shader);
|
||||
bool agx_nir_lower_layer(struct nir_shader *s);
|
||||
|
||||
@@ -83,17 +83,7 @@ lower_pack = [
|
||||
('isub', 32, 'bits'))),
|
||||
]
|
||||
|
||||
# Rewriting bcsel(a || b, ...) in terms of bcsel(a, ...) and bcsel(b, ...) lets
|
||||
# our rules to fuse compare-and-select do a better job, assuming that a and b
|
||||
# are comparisons themselves.
|
||||
lower_selects = [
|
||||
(('bcsel', ('ior(is_used_once)', a, b), c, d),
|
||||
('bcsel', a, c, ('bcsel', b, c, d))),
|
||||
|
||||
(('bcsel', ('iand(is_used_once)', a, b), c, d),
|
||||
('bcsel', a, ('bcsel', b, c, d), d)),
|
||||
]
|
||||
|
||||
lower_selects = []
|
||||
for T, sizes, one in [('f', [16, 32], 1.0),
|
||||
('i', [8, 16, 32], 1),
|
||||
('b', [32], -1)]:
|
||||
@@ -103,6 +93,20 @@ for T, sizes, one in [('f', [16, 32], 1.0),
|
||||
((f'b2{T}{size}', 'a@1'), ('bcsel', a, one, 0)),
|
||||
])
|
||||
|
||||
# Rewriting bcsel(a || b, ...) in terms of bcsel(a, ...) and bcsel(b, ...) lets
|
||||
# our rules to fuse compare-and-select do a better job, assuming that a and b
|
||||
# are comparisons themselves.
|
||||
#
|
||||
# This needs to be a separate pass that runs after lower_selects, in order to
|
||||
# pick up patterns like b2f32(iand(...))
|
||||
opt_selects = [
|
||||
(('bcsel', ('ior(is_used_once)', a, b), c, d),
|
||||
('bcsel', a, c, ('bcsel', b, c, d))),
|
||||
|
||||
(('bcsel', ('iand(is_used_once)', a, b), c, d),
|
||||
('bcsel', a, ('bcsel', b, c, d), d)),
|
||||
]
|
||||
|
||||
fuse_extr = []
|
||||
for start in range(32):
|
||||
fuse_extr.extend([
|
||||
@@ -192,9 +196,11 @@ def run():
|
||||
print(nir_algebraic.AlgebraicPass("agx_nir_lower_algebraic_late",
|
||||
lower_sm5_shift + lower_pack +
|
||||
lower_selects).render())
|
||||
print(nir_algebraic.AlgebraicPass("agx_nir_fuse_selects",
|
||||
opt_selects).render())
|
||||
print(nir_algebraic.AlgebraicPass("agx_nir_fuse_algebraic_late",
|
||||
fuse_extr + fuse_ubfe + fuse_imad +
|
||||
ixor_bcsel).render())
|
||||
fuse_extr + fuse_ubfe +
|
||||
fuse_imad + ixor_bcsel).render())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
Reference in New Issue
Block a user