pan/bi: Propagate fabs/neg/sat
Initial support for modifier propagation. Bifrost makes this unreasonably hard. total instructions in shared programs: 151604 -> 150761 (-0.56%) instructions in affected programs: 48773 -> 47930 (-1.73%) helped: 212 HURT: 0 helped stats (abs) min: 1 max: 28 x̄: 3.98 x̃: 1 helped stats (rel) min: 0.29% max: 12.70% x̄: 1.75% x̃: 1.26% 95% mean confidence interval for instructions value: -4.71 -3.25 95% mean confidence interval for instructions %-change: -1.97% -1.53% Instructions are helped. total tuples in shared programs: 131876 -> 131560 (-0.24%) tuples in affected programs: 25393 -> 25077 (-1.24%) helped: 104 HURT: 3 helped stats (abs) min: 1 max: 28 x̄: 3.08 x̃: 2 helped stats (rel) min: 0.34% max: 8.57% x̄: 1.55% x̃: 1.04% HURT stats (abs) min: 1 max: 2 x̄: 1.33 x̃: 1 HURT stats (rel) min: 0.51% max: 2.86% x̄: 1.30% x̃: 0.53% 95% mean confidence interval for tuples value: -3.63 -2.28 95% mean confidence interval for tuples %-change: -1.73% -1.21% Tuples are helped. total clauses in shared programs: 28122 -> 28032 (-0.32%) clauses in affected programs: 2720 -> 2630 (-3.31%) helped: 58 HURT: 1 helped stats (abs) min: 1 max: 6 x̄: 1.57 x̃: 1 helped stats (rel) min: 0.88% max: 14.29% x̄: 4.06% x̃: 3.67% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 7.69% max: 7.69% x̄: 7.69% x̃: 7.69% 95% mean confidence interval for clauses value: -1.85 -1.20 95% mean confidence interval for clauses %-change: -4.60% -3.13% Clauses are helped. total quadwords in shared programs: 119778 -> 119509 (-0.22%) quadwords in affected programs: 20698 -> 20429 (-1.30%) helped: 95 HURT: 1 helped stats (abs) min: 1 max: 28 x̄: 2.85 x̃: 2 helped stats (rel) min: 0.38% max: 7.14% x̄: 1.50% x̃: 1.13% HURT stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2 HURT stats (rel) min: 3.23% max: 3.23% x̄: 3.23% x̃: 3.23% 95% mean confidence interval for quadwords value: -3.49 -2.11 95% mean confidence interval for quadwords %-change: -1.71% -1.20% Quadwords are helped. Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11327>
This commit is contained in:
committed by
Marge Bot
parent
e41d8ed007
commit
41070fedca
@@ -0,0 +1,233 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Collabora, Ltd.
|
||||
* Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
|
||||
static bool
|
||||
bi_takes_fabs(bi_instr *I, unsigned s)
|
||||
{
|
||||
switch (I->op) {
|
||||
case BI_OPCODE_FCMP_V2F16:
|
||||
case BI_OPCODE_FMAX_V2F16:
|
||||
case BI_OPCODE_FMIN_V2F16:
|
||||
/* TODO: Check count or lower */
|
||||
return false;
|
||||
case BI_OPCODE_V2F32_TO_V2F16:
|
||||
/* TODO: Needs both match or lower */
|
||||
return false;
|
||||
case BI_OPCODE_FLOG_TABLE_F32:
|
||||
/* TODO: Need to check mode */
|
||||
return false;
|
||||
default:
|
||||
return bi_opcode_props[I->op].abs & BITFIELD_BIT(s);
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_takes_fneg(bi_instr *I, unsigned s)
|
||||
{
|
||||
switch (I->op) {
|
||||
case BI_OPCODE_CUBE_SSEL:
|
||||
case BI_OPCODE_CUBE_TSEL:
|
||||
case BI_OPCODE_CUBEFACE:
|
||||
/* TODO: Needs match or lower */
|
||||
return false;
|
||||
case BI_OPCODE_FREXPE_F32:
|
||||
case BI_OPCODE_FREXPE_V2F16:
|
||||
case BI_OPCODE_FLOG_TABLE_F32:
|
||||
/* TODO: Need to check mode */
|
||||
return false;
|
||||
default:
|
||||
return bi_opcode_props[I->op].neg & BITFIELD_BIT(s);
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_is_fabsneg(bi_instr *I)
|
||||
{
|
||||
return (I->op == BI_OPCODE_FADD_F32 || I->op == BI_OPCODE_FADD_V2F16) &&
|
||||
(I->src[1].type == BI_INDEX_CONSTANT && I->src[1].value == 0) &&
|
||||
(I->clamp == BI_CLAMP_NONE);
|
||||
}
|
||||
|
||||
static enum bi_swizzle
|
||||
bi_compose_swizzle_16(enum bi_swizzle a, enum bi_swizzle b)
|
||||
{
|
||||
assert(a <= BI_SWIZZLE_H11);
|
||||
assert(b <= BI_SWIZZLE_H11);
|
||||
|
||||
bool al = (a & BI_SWIZZLE_H10);
|
||||
bool ar = (a & BI_SWIZZLE_H01);
|
||||
bool bl = (b & BI_SWIZZLE_H10);
|
||||
bool br = (b & BI_SWIZZLE_H01);
|
||||
|
||||
return ((al ? br : bl) ? BI_SWIZZLE_H10 : 0) |
|
||||
((ar ? br : bl) ? BI_SWIZZLE_H01 : 0);
|
||||
}
|
||||
|
||||
/* Like bi_replace_index, but composes instead of overwrites */
|
||||
|
||||
static inline bi_index
|
||||
bi_compose_float_index(bi_index old, bi_index repl)
|
||||
{
|
||||
/* abs(-x) = abs(+x) so ignore repl.neg if old.abs is set, otherwise
|
||||
* -(-x) = x but -(+x) = +(-x) so need to exclusive-or the negates */
|
||||
repl.neg = old.neg ^ (repl.neg && !old.abs);
|
||||
|
||||
/* +/- abs(+/- abs(x)) = +/- abs(x), etc so just or the two */
|
||||
repl.abs |= old.abs;
|
||||
|
||||
/* Use the old swizzle to select from the replacement swizzle */
|
||||
repl.swizzle = bi_compose_swizzle_16(old.swizzle, repl.swizzle);
|
||||
|
||||
return repl;
|
||||
}
|
||||
|
||||
void
|
||||
bi_opt_mod_prop_forward(bi_context *ctx)
|
||||
{
|
||||
bi_instr **lut = calloc(sizeof(bi_instr *), ((ctx->ssa_alloc + 1) << 2));
|
||||
|
||||
bi_foreach_instr_global_safe(ctx, I) {
|
||||
if (bi_is_ssa(I->dest[0]))
|
||||
lut[bi_word_node(I->dest[0])] = I;
|
||||
|
||||
bi_foreach_src(I, s) {
|
||||
if (!bi_is_ssa(I->src[s]))
|
||||
continue;
|
||||
|
||||
bi_instr *mod = lut[bi_word_node(I->src[s])];
|
||||
|
||||
if (!mod)
|
||||
continue;
|
||||
|
||||
if (bi_opcode_props[mod->op].size != bi_opcode_props[I->op].size)
|
||||
continue;
|
||||
|
||||
if (bi_is_fabsneg(mod)) {
|
||||
if (mod->src[0].abs && !bi_takes_fabs(I, s))
|
||||
continue;
|
||||
|
||||
if (mod->src[0].neg && !bi_takes_fneg(I, s))
|
||||
continue;
|
||||
|
||||
I->src[s] = bi_compose_float_index(I->src[s], mod->src[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(lut);
|
||||
}
|
||||
|
||||
/* RSCALE has restrictions on how the clamp may be used, only used for
|
||||
* specialized transcendental sequences that set the clamp explicitly anyway */
|
||||
|
||||
static bool
|
||||
bi_takes_clamp(bi_instr *I)
|
||||
{
|
||||
switch (I->op) {
|
||||
case BI_OPCODE_FMA_RSCALE_F32:
|
||||
case BI_OPCODE_FMA_RSCALE_V2F16:
|
||||
case BI_OPCODE_FADD_RSCALE_F32:
|
||||
return false;
|
||||
default:
|
||||
return bi_opcode_props[I->op].clamp;
|
||||
}
|
||||
}
|
||||
|
||||
/* Treating clamps as functions, compute the composition f circ g. For {NONE,
|
||||
* SAT, SAT_SIGNED, CLAMP_POS}, anything left- or right-composed with NONE is
|
||||
* unchanged, anything composed with itself is unchanged, and any two
|
||||
* nontrivial distinct clamps compose to SAT (left as an exercise) */
|
||||
|
||||
static enum bi_clamp
|
||||
bi_compose_clamp(enum bi_clamp f, enum bi_clamp g)
|
||||
{
|
||||
return (f == BI_CLAMP_NONE) ? g :
|
||||
(g == BI_CLAMP_NONE) ? f :
|
||||
(f == g) ? f :
|
||||
BI_CLAMP_CLAMP_0_1;
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_is_fclamp(bi_instr *I)
|
||||
{
|
||||
return (I->op == BI_OPCODE_FADD_F32 || I->op == BI_OPCODE_FADD_V2F16) &&
|
||||
(!I->src[0].abs && !I->src[0].neg) &&
|
||||
(I->src[1].type == BI_INDEX_CONSTANT && I->src[1].value == 0) &&
|
||||
(I->clamp != BI_CLAMP_NONE);
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_optimizer_clamp(bi_instr *I, bi_instr *use)
|
||||
{
|
||||
if (!bi_is_fclamp(use)) return false;
|
||||
if (!bi_takes_clamp(I)) return false;
|
||||
if (use->src[0].neg || use->src[0].abs) return false;
|
||||
|
||||
I->clamp = bi_compose_clamp(I->clamp, use->clamp);
|
||||
I->dest[0] = use->dest[0];
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
bi_opt_mod_prop_backward(bi_context *ctx)
|
||||
{
|
||||
unsigned count = ((ctx->ssa_alloc + 1) << 2);
|
||||
bi_instr **uses = calloc(count, sizeof(*uses));
|
||||
BITSET_WORD *multiple = calloc(BITSET_WORDS(count), sizeof(*multiple));
|
||||
|
||||
bi_foreach_instr_global_rev(ctx, I) {
|
||||
bi_foreach_src(I, s) {
|
||||
if (bi_is_ssa(I->src[s])) {
|
||||
unsigned v = bi_word_node(I->src[s]);
|
||||
|
||||
if (uses[v])
|
||||
BITSET_SET(multiple, v);
|
||||
else
|
||||
uses[v] = I;
|
||||
}
|
||||
}
|
||||
|
||||
if (!bi_is_ssa(I->dest[0]))
|
||||
continue;
|
||||
|
||||
bi_instr *use = uses[bi_word_node(I->dest[0])];
|
||||
|
||||
if (!use || BITSET_TEST(multiple, bi_word_node(I->dest[0])))
|
||||
continue;
|
||||
|
||||
if (bi_opcode_props[use->op].size != bi_opcode_props[I->op].size)
|
||||
continue;
|
||||
|
||||
/* Destination has a single use, try to propagate */
|
||||
if (bi_optimizer_clamp(I, use)) {
|
||||
bi_remove_instruction(use);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
free(uses);
|
||||
free(multiple);
|
||||
}
|
||||
@@ -3431,6 +3431,8 @@ bifrost_compile_shader_nir(nir_shader *nir,
|
||||
bi_opt_push_ubo(ctx);
|
||||
bi_opt_constant_fold(ctx);
|
||||
bi_opt_copy_prop(ctx);
|
||||
bi_opt_mod_prop_forward(ctx);
|
||||
bi_opt_mod_prop_backward(ctx);
|
||||
bi_opt_dead_code_eliminate(ctx);
|
||||
|
||||
bi_foreach_block(ctx, _block) {
|
||||
|
||||
@@ -707,6 +707,10 @@ bi_node_to_index(unsigned node, unsigned node_count)
|
||||
bi_foreach_block(ctx, v_block) \
|
||||
bi_foreach_instr_in_block((bi_block *) v_block, v)
|
||||
|
||||
#define bi_foreach_instr_global_rev(ctx, v) \
|
||||
bi_foreach_block_rev(ctx, v_block) \
|
||||
bi_foreach_instr_in_block_rev((bi_block *) v_block, v)
|
||||
|
||||
#define bi_foreach_instr_global_safe(ctx, v) \
|
||||
bi_foreach_block(ctx, v_block) \
|
||||
bi_foreach_instr_in_block_safe((bi_block *) v_block, v)
|
||||
@@ -774,6 +778,8 @@ void bi_print_shader(bi_context *ctx, FILE *fp);
|
||||
/* BIR passes */
|
||||
|
||||
void bi_opt_copy_prop(bi_context *ctx);
|
||||
void bi_opt_mod_prop_forward(bi_context *ctx);
|
||||
void bi_opt_mod_prop_backward(bi_context *ctx);
|
||||
void bi_opt_dead_code_eliminate(bi_context *ctx);
|
||||
void bi_opt_dce_post_ra(bi_context *ctx);
|
||||
void bi_opt_push_ubo(bi_context *ctx);
|
||||
|
||||
@@ -29,6 +29,7 @@ libpanfrost_bifrost_files = files(
|
||||
'bi_opt_copy_prop.c',
|
||||
'bi_opt_dce.c',
|
||||
'bi_opt_push_ubo.c',
|
||||
'bi_opt_mod_props.c',
|
||||
'bi_pack.c',
|
||||
'bi_ra.c',
|
||||
'bi_schedule.c',
|
||||
|
||||
Reference in New Issue
Block a user