From e302e5d527fb2bee1d5863983de8f631259f0470 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sat, 10 Sep 2022 16:24:26 -0400 Subject: [PATCH] agx: Emit fewer combines for intrinsics A bunch of the emitted combines were unnecessary, or unnecessarily large. Fix the accounting now that combines are variable size. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/compiler/agx_compile.c | 75 ++++++++++++++++---------------- 1 file changed, 38 insertions(+), 37 deletions(-) diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index 27eb8318a74..3cb7b45d497 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -143,7 +143,11 @@ static agx_instr * agx_emit_combine_to(agx_builder *b, agx_index dst, unsigned nr_srcs, agx_index *srcs) { - agx_cache_combine(b, dst, 4, srcs); + agx_cache_combine(b, dst, nr_srcs, srcs); + + if (nr_srcs == 1) + return agx_mov_to(b, dst, srcs[0]); + agx_instr *I = agx_p_combine_to(b, dst, nr_srcs); agx_foreach_src(I, s) @@ -303,7 +307,7 @@ agx_udiv_const(agx_builder *b, agx_index P, uint32_t Q) /* AGX appears to lack support for vertex attributes. Lower to global loads. */ static void -agx_emit_load_attr(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr) +agx_emit_load_attr(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr) { nir_src *offset_src = nir_get_io_offset_src(instr); assert(nir_src_is_const(*offset_src) && "no attribute indirects"); @@ -343,6 +347,7 @@ agx_emit_load_attr(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr) BITFIELD_MASK(attrib.nr_comps_minus_1 + 1), 0); agx_wait(b, 0); + agx_index dests[4] = { agx_null() }; agx_emit_split(b, dests, vec, actual_comps); agx_index one = agx_mov_imm(b, 32, fui(1.0)); @@ -351,10 +356,12 @@ agx_emit_load_attr(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr) for (unsigned i = actual_comps; i < instr->num_components; ++i) dests[i] = default_value[i]; + + agx_emit_combine_to(b, dest, instr->num_components, dests); } static void -agx_emit_load_vary_flat(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr) +agx_emit_load_vary_flat(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr) { unsigned components = instr->num_components; assert(components >= 1 && components <= 4); @@ -370,6 +377,7 @@ agx_emit_load_vary_flat(agx_builder *b, agx_index *dests, nir_intrinsic_instr *i agx_index cf = agx_get_cf(b->shader, false, false, sem.location + nir_src_as_uint(*offset), 0, components); + agx_index dests[4] = { agx_null() }; for (unsigned i = 0; i < components; ++i) { /* vec3 for each vertex, unknown what first 2 channels are for */ @@ -380,10 +388,12 @@ agx_emit_load_vary_flat(agx_builder *b, agx_index *dests, nir_intrinsic_instr *i /* Each component accesses a sequential coefficient register */ cf.value++; } + + agx_emit_combine_to(b, dest, components, dests); } static void -agx_emit_load_vary(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr) +agx_emit_load_vary(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr) { ASSERTED unsigned components = instr->num_components; nir_intrinsic_instr *bary = nir_src_as_intrinsic(instr->src[0]); @@ -409,9 +419,8 @@ agx_emit_load_vary(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr) sem.location + nir_src_as_uint(*offset), 0, components); - agx_index vec = agx_vec_for_intr(b->shader, instr); - agx_iter_to(b, vec, I, J, components, perspective); - agx_emit_split(b, dests, vec, components); + agx_iter_to(b, dest, I, J, components, perspective); + agx_emit_cached_split(b, dest, components); } static agx_instr * @@ -468,7 +477,7 @@ agx_emit_fragment_out(agx_builder *b, nir_intrinsic_instr *instr) } static void -agx_emit_load_tile(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr) +agx_emit_load_tile(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr) { nir_io_semantics sem = nir_intrinsic_io_semantics(instr); unsigned loc = sem.location; @@ -483,9 +492,8 @@ agx_emit_load_tile(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr) b->shader->did_writeout = true; b->shader->out->reads_tib = true; - agx_index vec = agx_vec_for_dest(b->shader, &instr->dest); - agx_ld_tile_to(b, vec, b->shader->key->fs.tib_formats[rt]); - agx_emit_split(b, dests, vec, 4); + agx_ld_tile_to(b, dest, b->shader->key->fs.tib_formats[rt]); + agx_emit_cached_split(b, dest, 4); } static enum agx_format @@ -500,18 +508,16 @@ agx_format_for_bits(unsigned bits) } static void -agx_emit_load_global(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr) +agx_emit_load_global(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr) { agx_index addr = agx_src_index(&instr->src[0]); agx_index offset = agx_immediate(0); enum agx_format fmt = agx_format_for_bits(nir_dest_bit_size(instr->dest)); - agx_index vec = agx_vec_for_intr(b->shader, instr); - agx_device_load_to(b, vec, addr, offset, fmt, + agx_device_load_to(b, dest, addr, offset, fmt, BITFIELD_MASK(nir_dest_num_components(instr->dest)), 0); agx_wait(b, 0); - - agx_emit_split(b, dests, vec, 4); + agx_emit_cached_split(b, dest, nir_dest_num_components(instr->dest)); } static agx_instr * @@ -555,8 +561,10 @@ agx_emit_load_ubo(agx_builder *b, agx_index dst, nir_intrinsic_instr *instr) * might not be used, we only emit code for components that are actually used. */ static void -agx_emit_load_frag_coord(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr) +agx_emit_load_frag_coord(agx_builder *b, agx_index dst, nir_intrinsic_instr *instr) { + agx_index dests[4] = { agx_null() }; + u_foreach_bit(i, nir_ssa_def_components_read(&instr->dest.ssa)) { if (i < 2) { dests[i] = agx_fadd(b, agx_convert(b, agx_immediate(AGX_CONVERT_U32_TO_F), @@ -567,6 +575,8 @@ agx_emit_load_frag_coord(agx_builder *b, agx_index *dests, nir_intrinsic_instr * dests[i] = agx_iter(b, cf, agx_null(), 1, false); } } + + agx_emit_combine_to(b, dst, 4, dests); } static agx_instr * @@ -604,7 +614,6 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr) agx_index dst = nir_intrinsic_infos[instr->intrinsic].has_dest ? agx_dest_index(&instr->dest) : agx_null(); gl_shader_stage stage = b->shader->stage; - agx_index dests[4] = { agx_null() }; switch (instr->intrinsic) { case nir_intrinsic_load_barycentric_pixel: @@ -616,23 +625,23 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr) return NULL; case nir_intrinsic_load_interpolated_input: assert(stage == MESA_SHADER_FRAGMENT); - agx_emit_load_vary(b, dests, instr); - break; + agx_emit_load_vary(b, dst, instr); + return NULL; case nir_intrinsic_load_input: if (stage == MESA_SHADER_FRAGMENT) - agx_emit_load_vary_flat(b, dests, instr); + agx_emit_load_vary_flat(b, dst, instr); else if (stage == MESA_SHADER_VERTEX) - agx_emit_load_attr(b, dests, instr); + agx_emit_load_attr(b, dst, instr); else unreachable("Unsupported shader stage"); - break; + return NULL; case nir_intrinsic_load_global: case nir_intrinsic_load_global_constant: - agx_emit_load_global(b, dests, instr); - break; + agx_emit_load_global(b, dst, instr); + return NULL; case nir_intrinsic_store_output: if (stage == MESA_SHADER_FRAGMENT) @@ -644,16 +653,16 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr) case nir_intrinsic_load_output: assert(stage == MESA_SHADER_FRAGMENT); - agx_emit_load_tile(b, dests, instr); - break; + agx_emit_load_tile(b, dst, instr); + return NULL; case nir_intrinsic_load_ubo: case nir_intrinsic_load_kernel_input: return agx_emit_load_ubo(b, dst, instr); case nir_intrinsic_load_frag_coord: - agx_emit_load_frag_coord(b, dests, instr); - break; + agx_emit_load_frag_coord(b, dst, instr); + return NULL; case nir_intrinsic_discard: return agx_emit_discard(b, instr); @@ -680,14 +689,6 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr) fprintf(stderr, "Unhandled intrinsic %s\n", nir_intrinsic_infos[instr->intrinsic].name); unreachable("Unhandled intrinsic"); } - - /* If we got here, there is a vector destination for the intrinsic composed - * of separate scalars. Its components are specified separately in the dests - * array. We need to combine them so the vector destination itself is valid. - * If only individual components are accessed, this combine will be dead code - * eliminated. - */ - return agx_emit_combine_to(b, dst, 4, dests); } static agx_index