pan/bi: Add a constant subexpression elimination pass

ALU only. Intended to clean up the lowerings used with complex
texturings. Ex: if a shader reads two cube maps at the same coordinates,
this deduplicates the cube map transformation.

This needs to happen in the backend since we do the cube map
transformation with the backend builder, rather than special NIR ops.
This is a tradeoff.

Pass based on ir3's, which in turn is inspired by NIR's.

total instructions in shared programs: 148799 -> 147348 (-0.98%)
instructions in affected programs: 20509 -> 19058 (-7.07%)
helped: 145
HURT: 0
helped stats (abs) min: 4.0 max: 30.0 x̄: 10.01 x̃: 8
helped stats (rel) min: 1.92% max: 54.55% x̄: 10.87% x̃: 7.41%
95% mean confidence interval for instructions value: -10.73 -9.28
95% mean confidence interval for instructions %-change: -12.81% -8.94%
Instructions are helped.

total tuples in shared programs: 129992 -> 128908 (-0.83%)
tuples in affected programs: 17624 -> 16540 (-6.15%)
helped: 145
HURT: 0
helped stats (abs) min: 2.0 max: 25.0 x̄: 7.48 x̃: 7
helped stats (rel) min: 0.74% max: 42.86% x̄: 9.16% x̃: 7.22%
95% mean confidence interval for tuples value: -7.96 -6.99
95% mean confidence interval for tuples %-change: -10.52% -7.79%
Tuples are helped.

total clauses in shared programs: 27632 -> 27582 (-0.18%)
clauses in affected programs: 1077 -> 1027 (-4.64%)
helped: 44
HURT: 0
helped stats (abs) min: 1.0 max: 3.0 x̄: 1.14 x̃: 1
helped stats (rel) min: 2.50% max: 16.67% x̄: 4.99% x̃: 4.45%
95% mean confidence interval for clauses value: -1.26 -1.01
95% mean confidence interval for clauses %-change: -5.70% -4.27%
Clauses are helped.

total cycles in shared programs: 12323 -> 12285.63 (-0.30%)
cycles in affected programs: 618.25 -> 580.88 (-6.05%)
helped: 120
HURT: 0
helped stats (abs) min: 0.08333299999999966 max: 0.5416680000000014 x̄: 0.31 x̃: 0
helped stats (rel) min: 0.77% max: 66.67% x̄: 7.60% x̃: 7.37%
95% mean confidence interval for cycles value: -0.33 -0.29
95% mean confidence interval for cycles %-change: -8.73% -6.47%
Cycles are helped.

total arith in shared programs: 4916.75 -> 4866.88 (-1.01%)
arith in affected programs: 677.79 -> 627.92 (-7.36%)
helped: 145
HURT: 0
helped stats (abs) min: 0.08333299999999966 max: 1.0833329999999997 x̄: 0.34 x̃: 0
helped stats (rel) min: 0.77% max: 66.67% x̄: 12.81% x̃: 7.87%
95% mean confidence interval for arith value: -0.37 -0.32
95% mean confidence interval for arith %-change: -15.33% -10.29%
Arith are helped.

total quadwords in shared programs: 118117 -> 117262 (-0.72%)
quadwords in affected programs: 15283 -> 14428 (-5.59%)
helped: 143
HURT: 0
helped stats (abs) min: 1.0 max: 23.0 x̄: 5.98 x̃: 5
helped stats (rel) min: 0.44% max: 25.71% x̄: 7.56% x̃: 5.56%
95% mean confidence interval for quadwords value: -6.46 -5.50
95% mean confidence interval for quadwords %-change: -8.59% -6.53%
Quadwords are helped.

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11733>
This commit is contained in:
Alyssa Rosenzweig
2021-06-24 19:36:11 -04:00
committed by Marge Bot
parent f35d0fb028
commit 99b2dddebf
4 changed files with 198 additions and 0 deletions
+189
View File
@@ -0,0 +1,189 @@
/*
* Copyright (C) 2021 Collabora, Ltd.
* Copyright (C) 2014 Valve Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "compiler.h"
#include "bi_builder.h"
#define XXH_INLINE_ALL
#include "xxhash.h"
/* This pass handles CSE'ing repeated expressions created in the process of
* translating from NIR. Also, currently this is intra-block only, to make it
* work over multiple block we'd need to bring forward dominance calculation.
*/
static inline uint32_t
HASH(uint32_t hash, unsigned data)
{
return XXH32(&data, sizeof(data), hash);
}
static uint32_t
hash_index(uint32_t hash, bi_index index)
{
hash = HASH(hash, index.value);
hash = HASH(hash, index.abs);
hash = HASH(hash, index.neg);
hash = HASH(hash, index.swizzle);
hash = HASH(hash, index.offset);
hash = HASH(hash, index.reg);
hash = HASH(hash, index.type);
return hash;
}
/* Hash an ALU instruction. */
static uint32_t
hash_instr(const void *data)
{
const bi_instr *I = data;
uint32_t hash = 0;
hash = HASH(hash, I->op);
/* Explcitly skip destinations, except for size details */
bi_foreach_dest(I, d) {
hash = HASH(hash, I->dest[d].swizzle);
}
bi_foreach_src(I, s) {
hash = hash_index(hash, I->src[s]);
}
/* Explicitly skip branch, regfmt, vecsize, no_spill, tdd, table */
hash = HASH(hash, I->dest_mod);
/* Explicitly skip other immediates */
hash = HASH(hash, I->shift);
for (unsigned i = 0; i < ARRAY_SIZE(I->flags); ++i)
hash = HASH(hash, I->flags[i]);
return hash;
}
static bool
instrs_equal(const void *_i1, const void *_i2)
{
const bi_instr *i1 = _i1, *i2 = _i2;
if (i1->op != i2->op)
return false;
/* Explicitly skip destinations */
bi_foreach_src(i1, s) {
bi_index s1 = i1->src[s], s2 = i2->src[s];
if (memcmp(&s1, &s2, sizeof(s1)) != 0)
return false;
}
if (i1->dest_mod != i2->dest_mod)
return false;
if (i1->shift != i2->shift)
return false;
for (unsigned i = 0; i < ARRAY_SIZE(i1->flags); ++i) {
if (i1->flags[i] != i2->flags[i])
return false;
}
return true;
}
/* Determines what instructions the above routines have to handle */
static bool
instr_can_cse(const bi_instr *I)
{
switch (I->op) {
case BI_OPCODE_DTSEL_IMM:
case BI_OPCODE_DISCARD_F32:
return false;
default:
break;
}
if (bi_opcode_props[I->op].message)
return false;
if (I->branch_target)
return false;
/* Refuse to CSE non-SSA destinations since the data flow analysis
* required is nontrivial */
bi_foreach_dest(I, d) {
if (!bi_is_null(I->dest[d]) && !bi_is_ssa(I->dest[d]))
return false;
}
return true;
}
void
bi_opt_cse(bi_context *ctx)
{
struct set *instr_set = _mesa_set_create(NULL, hash_instr, instrs_equal);
bi_foreach_block(ctx, block) {
bi_index *replacement = calloc(sizeof(bi_index), ((ctx->ssa_alloc + 1) << 2));
_mesa_set_clear(instr_set, NULL);
bi_foreach_instr_in_block((bi_block *) block, instr) {
/* Rewrite before trying to CSE anything so we converge
* locally in one iteration */
bi_foreach_src(instr, s) {
if (s == 0 && bi_opcode_props[instr->op].sr_read)
continue;
if (!bi_is_ssa(instr->src[s]))
continue;
bi_index repl = replacement[bi_word_node(instr->src[s])];
if (!bi_is_null(repl))
instr->src[s] = bi_replace_index(instr->src[s], repl);
}
if (!instr_can_cse(instr))
continue;
bool found;
struct set_entry *entry =
_mesa_set_search_or_add(instr_set, instr, &found);
if (found) {
const bi_instr *match = entry->key;
bi_foreach_dest(instr, d) {
if (!bi_is_null(instr->dest[d]))
replacement[bi_word_node(instr->dest[d])] = match->dest[d];
}
}
}
free(replacement);
}
_mesa_set_destroy(instr_set, NULL);
}
+3
View File
@@ -3512,10 +3512,13 @@ bifrost_compile_shader_nir(nir_shader *nir,
/* Runs before copy prop */
bi_opt_push_ubo(ctx);
bi_opt_constant_fold(ctx);
bi_opt_copy_prop(ctx);
bi_opt_mod_prop_forward(ctx);
bi_opt_mod_prop_backward(ctx);
bi_opt_dead_code_eliminate(ctx);
bi_opt_cse(ctx);
bi_opt_dead_code_eliminate(ctx);
bi_foreach_block(ctx, _block) {
bi_block *block = (bi_block *) _block;
+5
View File
@@ -330,6 +330,7 @@ typedef struct {
enum bi_clamp clamp;
bool saturate;
bool not_result;
unsigned dest_mod;
};
/* Immediates. All seen alone in an instruction, except for varying/texture
@@ -395,6 +396,9 @@ typedef struct {
bool lod_mode; /* VAR_TEX, TEXS, implicitly for TEXC */
};
/* Maximum size, for hashing */
unsigned flags[5];
struct {
enum bi_subgroup subgroup; /* WMASK, CLPER */
enum bi_inactive_result inactive_result; /* CLPER */
@@ -791,6 +795,7 @@ void bi_print_shader(bi_context *ctx, FILE *fp);
void bi_analyze_helper_terminate(bi_context *ctx);
void bi_analyze_helper_requirements(bi_context *ctx);
void bi_opt_copy_prop(bi_context *ctx);
void bi_opt_cse(bi_context *ctx);
void bi_opt_mod_prop_forward(bi_context *ctx);
void bi_opt_mod_prop_backward(bi_context *ctx);
void bi_opt_dead_code_eliminate(bi_context *ctx);
+1
View File
@@ -29,6 +29,7 @@ libpanfrost_bifrost_files = files(
'bi_opt_constant_fold.c',
'bi_opt_copy_prop.c',
'bi_opt_dce.c',
'bi_opt_cse.c',
'bi_opt_push_ubo.c',
'bi_opt_mod_props.c',
'bi_pack.c',