agx: Emit fewer combines for intrinsics
A bunch of the emitted combines were unnecessary, or unnecessarily large. Fix the accounting now that combines are variable size. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18687>
This commit is contained in:
committed by
Marge Bot
parent
e887a11b06
commit
e302e5d527
@@ -143,7 +143,11 @@ static agx_instr *
|
||||
agx_emit_combine_to(agx_builder *b, agx_index dst, unsigned nr_srcs,
|
||||
agx_index *srcs)
|
||||
{
|
||||
agx_cache_combine(b, dst, 4, srcs);
|
||||
agx_cache_combine(b, dst, nr_srcs, srcs);
|
||||
|
||||
if (nr_srcs == 1)
|
||||
return agx_mov_to(b, dst, srcs[0]);
|
||||
|
||||
agx_instr *I = agx_p_combine_to(b, dst, nr_srcs);
|
||||
|
||||
agx_foreach_src(I, s)
|
||||
@@ -303,7 +307,7 @@ agx_udiv_const(agx_builder *b, agx_index P, uint32_t Q)
|
||||
|
||||
/* AGX appears to lack support for vertex attributes. Lower to global loads. */
|
||||
static void
|
||||
agx_emit_load_attr(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr)
|
||||
agx_emit_load_attr(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr)
|
||||
{
|
||||
nir_src *offset_src = nir_get_io_offset_src(instr);
|
||||
assert(nir_src_is_const(*offset_src) && "no attribute indirects");
|
||||
@@ -343,6 +347,7 @@ agx_emit_load_attr(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr)
|
||||
BITFIELD_MASK(attrib.nr_comps_minus_1 + 1), 0);
|
||||
agx_wait(b, 0);
|
||||
|
||||
agx_index dests[4] = { agx_null() };
|
||||
agx_emit_split(b, dests, vec, actual_comps);
|
||||
|
||||
agx_index one = agx_mov_imm(b, 32, fui(1.0));
|
||||
@@ -351,10 +356,12 @@ agx_emit_load_attr(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr)
|
||||
|
||||
for (unsigned i = actual_comps; i < instr->num_components; ++i)
|
||||
dests[i] = default_value[i];
|
||||
|
||||
agx_emit_combine_to(b, dest, instr->num_components, dests);
|
||||
}
|
||||
|
||||
static void
|
||||
agx_emit_load_vary_flat(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr)
|
||||
agx_emit_load_vary_flat(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr)
|
||||
{
|
||||
unsigned components = instr->num_components;
|
||||
assert(components >= 1 && components <= 4);
|
||||
@@ -370,6 +377,7 @@ agx_emit_load_vary_flat(agx_builder *b, agx_index *dests, nir_intrinsic_instr *i
|
||||
agx_index cf = agx_get_cf(b->shader, false, false,
|
||||
sem.location + nir_src_as_uint(*offset), 0,
|
||||
components);
|
||||
agx_index dests[4] = { agx_null() };
|
||||
|
||||
for (unsigned i = 0; i < components; ++i) {
|
||||
/* vec3 for each vertex, unknown what first 2 channels are for */
|
||||
@@ -380,10 +388,12 @@ agx_emit_load_vary_flat(agx_builder *b, agx_index *dests, nir_intrinsic_instr *i
|
||||
/* Each component accesses a sequential coefficient register */
|
||||
cf.value++;
|
||||
}
|
||||
|
||||
agx_emit_combine_to(b, dest, components, dests);
|
||||
}
|
||||
|
||||
static void
|
||||
agx_emit_load_vary(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr)
|
||||
agx_emit_load_vary(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr)
|
||||
{
|
||||
ASSERTED unsigned components = instr->num_components;
|
||||
nir_intrinsic_instr *bary = nir_src_as_intrinsic(instr->src[0]);
|
||||
@@ -409,9 +419,8 @@ agx_emit_load_vary(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr)
|
||||
sem.location + nir_src_as_uint(*offset), 0,
|
||||
components);
|
||||
|
||||
agx_index vec = agx_vec_for_intr(b->shader, instr);
|
||||
agx_iter_to(b, vec, I, J, components, perspective);
|
||||
agx_emit_split(b, dests, vec, components);
|
||||
agx_iter_to(b, dest, I, J, components, perspective);
|
||||
agx_emit_cached_split(b, dest, components);
|
||||
}
|
||||
|
||||
static agx_instr *
|
||||
@@ -468,7 +477,7 @@ agx_emit_fragment_out(agx_builder *b, nir_intrinsic_instr *instr)
|
||||
}
|
||||
|
||||
static void
|
||||
agx_emit_load_tile(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr)
|
||||
agx_emit_load_tile(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr)
|
||||
{
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
|
||||
unsigned loc = sem.location;
|
||||
@@ -483,9 +492,8 @@ agx_emit_load_tile(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr)
|
||||
b->shader->did_writeout = true;
|
||||
b->shader->out->reads_tib = true;
|
||||
|
||||
agx_index vec = agx_vec_for_dest(b->shader, &instr->dest);
|
||||
agx_ld_tile_to(b, vec, b->shader->key->fs.tib_formats[rt]);
|
||||
agx_emit_split(b, dests, vec, 4);
|
||||
agx_ld_tile_to(b, dest, b->shader->key->fs.tib_formats[rt]);
|
||||
agx_emit_cached_split(b, dest, 4);
|
||||
}
|
||||
|
||||
static enum agx_format
|
||||
@@ -500,18 +508,16 @@ agx_format_for_bits(unsigned bits)
|
||||
}
|
||||
|
||||
static void
|
||||
agx_emit_load_global(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr)
|
||||
agx_emit_load_global(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr)
|
||||
{
|
||||
agx_index addr = agx_src_index(&instr->src[0]);
|
||||
agx_index offset = agx_immediate(0);
|
||||
enum agx_format fmt = agx_format_for_bits(nir_dest_bit_size(instr->dest));
|
||||
|
||||
agx_index vec = agx_vec_for_intr(b->shader, instr);
|
||||
agx_device_load_to(b, vec, addr, offset, fmt,
|
||||
agx_device_load_to(b, dest, addr, offset, fmt,
|
||||
BITFIELD_MASK(nir_dest_num_components(instr->dest)), 0);
|
||||
agx_wait(b, 0);
|
||||
|
||||
agx_emit_split(b, dests, vec, 4);
|
||||
agx_emit_cached_split(b, dest, nir_dest_num_components(instr->dest));
|
||||
}
|
||||
|
||||
static agx_instr *
|
||||
@@ -555,8 +561,10 @@ agx_emit_load_ubo(agx_builder *b, agx_index dst, nir_intrinsic_instr *instr)
|
||||
* might not be used, we only emit code for components that are actually used.
|
||||
*/
|
||||
static void
|
||||
agx_emit_load_frag_coord(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr)
|
||||
agx_emit_load_frag_coord(agx_builder *b, agx_index dst, nir_intrinsic_instr *instr)
|
||||
{
|
||||
agx_index dests[4] = { agx_null() };
|
||||
|
||||
u_foreach_bit(i, nir_ssa_def_components_read(&instr->dest.ssa)) {
|
||||
if (i < 2) {
|
||||
dests[i] = agx_fadd(b, agx_convert(b, agx_immediate(AGX_CONVERT_U32_TO_F),
|
||||
@@ -567,6 +575,8 @@ agx_emit_load_frag_coord(agx_builder *b, agx_index *dests, nir_intrinsic_instr *
|
||||
dests[i] = agx_iter(b, cf, agx_null(), 1, false);
|
||||
}
|
||||
}
|
||||
|
||||
agx_emit_combine_to(b, dst, 4, dests);
|
||||
}
|
||||
|
||||
static agx_instr *
|
||||
@@ -604,7 +614,6 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr)
|
||||
agx_index dst = nir_intrinsic_infos[instr->intrinsic].has_dest ?
|
||||
agx_dest_index(&instr->dest) : agx_null();
|
||||
gl_shader_stage stage = b->shader->stage;
|
||||
agx_index dests[4] = { agx_null() };
|
||||
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_load_barycentric_pixel:
|
||||
@@ -616,23 +625,23 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr)
|
||||
return NULL;
|
||||
case nir_intrinsic_load_interpolated_input:
|
||||
assert(stage == MESA_SHADER_FRAGMENT);
|
||||
agx_emit_load_vary(b, dests, instr);
|
||||
break;
|
||||
agx_emit_load_vary(b, dst, instr);
|
||||
return NULL;
|
||||
|
||||
case nir_intrinsic_load_input:
|
||||
if (stage == MESA_SHADER_FRAGMENT)
|
||||
agx_emit_load_vary_flat(b, dests, instr);
|
||||
agx_emit_load_vary_flat(b, dst, instr);
|
||||
else if (stage == MESA_SHADER_VERTEX)
|
||||
agx_emit_load_attr(b, dests, instr);
|
||||
agx_emit_load_attr(b, dst, instr);
|
||||
else
|
||||
unreachable("Unsupported shader stage");
|
||||
|
||||
break;
|
||||
return NULL;
|
||||
|
||||
case nir_intrinsic_load_global:
|
||||
case nir_intrinsic_load_global_constant:
|
||||
agx_emit_load_global(b, dests, instr);
|
||||
break;
|
||||
agx_emit_load_global(b, dst, instr);
|
||||
return NULL;
|
||||
|
||||
case nir_intrinsic_store_output:
|
||||
if (stage == MESA_SHADER_FRAGMENT)
|
||||
@@ -644,16 +653,16 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr)
|
||||
|
||||
case nir_intrinsic_load_output:
|
||||
assert(stage == MESA_SHADER_FRAGMENT);
|
||||
agx_emit_load_tile(b, dests, instr);
|
||||
break;
|
||||
agx_emit_load_tile(b, dst, instr);
|
||||
return NULL;
|
||||
|
||||
case nir_intrinsic_load_ubo:
|
||||
case nir_intrinsic_load_kernel_input:
|
||||
return agx_emit_load_ubo(b, dst, instr);
|
||||
|
||||
case nir_intrinsic_load_frag_coord:
|
||||
agx_emit_load_frag_coord(b, dests, instr);
|
||||
break;
|
||||
agx_emit_load_frag_coord(b, dst, instr);
|
||||
return NULL;
|
||||
|
||||
case nir_intrinsic_discard:
|
||||
return agx_emit_discard(b, instr);
|
||||
@@ -680,14 +689,6 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr)
|
||||
fprintf(stderr, "Unhandled intrinsic %s\n", nir_intrinsic_infos[instr->intrinsic].name);
|
||||
unreachable("Unhandled intrinsic");
|
||||
}
|
||||
|
||||
/* If we got here, there is a vector destination for the intrinsic composed
|
||||
* of separate scalars. Its components are specified separately in the dests
|
||||
* array. We need to combine them so the vector destination itself is valid.
|
||||
* If only individual components are accessed, this combine will be dead code
|
||||
* eliminated.
|
||||
*/
|
||||
return agx_emit_combine_to(b, dst, 4, dests);
|
||||
}
|
||||
|
||||
static agx_index
|
||||
|
||||
Reference in New Issue
Block a user