nir: Apply nir_opt_offsets to nir_intrinsic_load_uniform as well.

Doing this for ir3 required adding a struct for limits of how much base to
fold in (which NTT wants as well for its case of shared vars), otherwise
the later work to lower to the 1<<9 word limit would emit more
instructions.

The shader-db results are that sometimes the reduction in NIR instruction
count results in the fewer sampler prefetches due to the shader being
estimated to be shorter (dota2, nexuiz):

total instructions in shared programs: 8996651 -> 8996776 (<.01%)
total cat5 in shared programs: 86561 -> 86577 (0.02%)

Reviewed-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14023>
This commit is contained in:
Emma Anholt
2022-01-10 14:49:09 -08:00
committed by Marge Bot
parent b024102d7c
commit f6ffefba3e
4 changed files with 53 additions and 23 deletions
+8 -2
View File
@@ -211,8 +211,14 @@ radv_optimize_nir_algebraic(nir_shader *nir, bool opt_offsets)
NIR_PASS(more_algebraic, nir, nir_opt_algebraic);
}
if (opt_offsets)
NIR_PASS_V(nir, nir_opt_offsets);
if (opt_offsets) {
static const nir_opt_offsets_options offset_options = {
.uniform_max = 0,
.buffer_max = ~0,
.shared_max = ~0,
};
NIR_PASS_V(nir, nir_opt_offsets, &offset_options);
}
/* Do late algebraic optimization to turn add(a,
* neg(b)) back into subs, then the mandatory cleanup
+12 -1
View File
@@ -5254,7 +5254,18 @@ bool nir_opt_sink(nir_shader *shader, nir_move_options options);
bool nir_opt_move(nir_shader *shader, nir_move_options options);
bool nir_opt_offsets(nir_shader *shader);
typedef struct {
/** nir_load_uniform max base offset */
uint32_t uniform_max;
/** nir_var_mem_shared max base offset */
uint32_t shared_max;
/** nir_load/store_buffer_amd max base offset */
uint32_t buffer_max;
} nir_opt_offsets_options;
bool nir_opt_offsets(nir_shader *shader, const nir_opt_offsets_options *options);
bool nir_opt_peephole_select(nir_shader *shader, unsigned limit,
bool indirect_load_ok, bool expensive_alu_ok);
+22 -19
View File
@@ -31,10 +31,11 @@
typedef struct
{
struct hash_table *range_ht;
const nir_opt_offsets_options *options;
} opt_offsets_state;
static nir_ssa_def *
try_extract_const_addition(nir_builder *b, nir_instr *instr, opt_offsets_state *state, unsigned *out_const)
try_extract_const_addition(nir_builder *b, nir_instr *instr, opt_offsets_state *state, unsigned *out_const, uint32_t max)
{
if (instr->type != nir_instr_type_alu)
return NULL;
@@ -66,15 +67,18 @@ try_extract_const_addition(nir_builder *b, nir_instr *instr, opt_offsets_state *
for (unsigned i = 0; i < 2; ++i) {
if (nir_src_is_const(alu->src[i].src)) {
*out_const += nir_src_as_uint(alu->src[i].src);
nir_ssa_def *replace_src =
try_extract_const_addition(b, alu->src[1 - i].src.ssa->parent_instr, state, out_const);
return replace_src ? replace_src : alu->src[1 - i].src.ssa;
uint32_t offset = nir_src_as_uint(alu->src[i].src);
if (offset + *out_const <= max) {
*out_const += offset;
nir_ssa_def *replace_src =
try_extract_const_addition(b, alu->src[1 - i].src.ssa->parent_instr, state, out_const, max);
return replace_src ? replace_src : alu->src[1 - i].src.ssa;
}
}
}
nir_ssa_def *replace_src0 = try_extract_const_addition(b, alu->src[0].src.ssa->parent_instr, state, out_const);
nir_ssa_def *replace_src1 = try_extract_const_addition(b, alu->src[1].src.ssa->parent_instr, state, out_const);
nir_ssa_def *replace_src0 = try_extract_const_addition(b, alu->src[0].src.ssa->parent_instr, state, out_const, max);
nir_ssa_def *replace_src1 = try_extract_const_addition(b, alu->src[1].src.ssa->parent_instr, state, out_const, max);
if (!replace_src0 && !replace_src1)
return NULL;
@@ -88,7 +92,8 @@ static bool
try_fold_load_store(nir_builder *b,
nir_intrinsic_instr *intrin,
opt_offsets_state *state,
unsigned offset_src_idx)
unsigned offset_src_idx,
uint32_t max)
{
/* Assume that BASE is the constant offset of a load/store.
* Try to constant-fold additions to the offset source
@@ -103,7 +108,7 @@ try_fold_load_store(nir_builder *b,
return false;
if (!nir_src_is_const(*off_src)) {
replace_src = try_extract_const_addition(b, off_src->ssa->parent_instr, state, &off_const);
replace_src = try_extract_const_addition(b, off_src->ssa->parent_instr, state, &off_const, max);
} else if (nir_src_as_uint(*off_src)) {
off_const += nir_src_as_uint(*off_src);
b->cursor = nir_before_instr(&intrin->instr);
@@ -128,21 +133,18 @@ process_instr(nir_builder *b, nir_instr *instr, void *s)
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
/* Note that while it's tempting to include nir_intrinsic_load_uniform
* here, freedreno doesn't want that because it can have to move the base
* back to a register plus a small constant offset, and it's not clever
* enough to minimize the code that that emits.
*/
case nir_intrinsic_load_uniform:
return try_fold_load_store(b, intrin, state, 0, state->options->uniform_max);
case nir_intrinsic_load_shared:
case nir_intrinsic_load_shared_ir3:
return try_fold_load_store(b, intrin, state, 0);
return try_fold_load_store(b, intrin, state, 0, state->options->shared_max);
case nir_intrinsic_store_shared:
case nir_intrinsic_store_shared_ir3:
return try_fold_load_store(b, intrin, state, 1);
return try_fold_load_store(b, intrin, state, 1, state->options->shared_max);
case nir_intrinsic_load_buffer_amd:
return try_fold_load_store(b, intrin, state, 1);
return try_fold_load_store(b, intrin, state, 1, state->options->buffer_max);
case nir_intrinsic_store_buffer_amd:
return try_fold_load_store(b, intrin, state, 2);
return try_fold_load_store(b, intrin, state, 2, state->options->buffer_max);
default:
return false;
}
@@ -151,10 +153,11 @@ process_instr(nir_builder *b, nir_instr *instr, void *s)
}
bool
nir_opt_offsets(nir_shader *shader)
nir_opt_offsets(nir_shader *shader, const nir_opt_offsets_options *options)
{
opt_offsets_state state;
state.range_ht = NULL;
state.options = options;
bool p = nir_shader_instructions_pass(shader, process_instr,
nir_metadata_block_index |
+11 -1
View File
@@ -117,7 +117,17 @@ ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s)
progress |= OPT(s, nir_lower_alu);
progress |= OPT(s, nir_lower_pack);
progress |= OPT(s, nir_opt_constant_folding);
progress |= OPT(s, nir_opt_offsets);
static const nir_opt_offsets_options offset_options = {
/* How large an offset we can encode in the instr's immediate field.
*/
.uniform_max = (1 << 9) - 1,
.shared_max = ~0,
.buffer_max = ~0,
};
progress |= OPT(s, nir_opt_offsets, &offset_options);
nir_load_store_vectorize_options vectorize_opts = {
.modes = nir_var_mem_ubo,