From 5987e47a2967b1dcc13efcb926e1a86d641aec86 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 17 Nov 2023 13:49:47 -0400 Subject: [PATCH] asahi: rework GS input assembly in prep for tessellation (which will share the IA lowering), and for multidraw indirect (which greatly complicates IA lowering with geom/tess). Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/lib/agx_nir_lower_gs.c | 109 ++++++------------ src/asahi/lib/agx_nir_lower_gs.h | 10 +- src/asahi/lib/agx_nir_lower_ia.c | 61 ++++++++++ src/asahi/lib/meson.build | 1 + src/asahi/lib/shaders/geometry.cl | 38 ++++-- src/asahi/lib/shaders/geometry.h | 19 ++- src/compiler/nir/nir_intrinsics.py | 3 + .../drivers/asahi/agx_nir_lower_sysvals.c | 2 + src/gallium/drivers/asahi/agx_state.c | 16 ++- src/gallium/drivers/asahi/agx_state.h | 3 + 10 files changed, 158 insertions(+), 104 deletions(-) create mode 100644 src/asahi/lib/agx_nir_lower_ia.c diff --git a/src/asahi/lib/agx_nir_lower_gs.c b/src/asahi/lib/agx_nir_lower_gs.c index b4cd490a7d4..d2bbb067cb3 100644 --- a/src/asahi/lib/agx_nir_lower_gs.c +++ b/src/asahi/lib/agx_nir_lower_gs.c @@ -194,55 +194,6 @@ load_instance_id(nir_builder *b) return nir_channel(b, nir_load_global_invocation_id(b, 32), 1); } -static nir_def * -load_vs_vertex_id(nir_builder *b, struct agx_ia_key *key) -{ - /* Tessellate by primitive mode */ - nir_def *id = libagx_vertex_id_for_topology( - b, nir_imm_int(b, key->mode), nir_imm_bool(b, key->flatshade_first), - load_primitive_id(b), nir_load_vertex_id_in_primitive_agx(b), - nir_channel(b, nir_load_num_workgroups(b), 0)); - - /* If drawing with an index buffer, pull the vertex ID. */ - if (key->index_size) { - nir_def *index_buffer = load_geometry_param(b, input_index_buffer); - nir_def *offset = nir_imul_imm(b, id, key->index_size); - nir_def *address = nir_iadd(b, index_buffer, nir_u2u64(b, offset)); - nir_def *index = nir_load_global_constant(b, address, key->index_size, 1, - key->index_size * 8); - - id = nir_u2uN(b, index, id->bit_size); - } - - /* Add the "start", either an index bias or a base vertex. This must happen - * after indexing for proper index bias behaviour. - */ - return nir_iadd(b, id, nir_load_first_vertex(b)); -} - -static bool -lower_input_assembly(nir_builder *b, nir_instr *instr, void *data) -{ - if (instr->type != nir_instr_type_intrinsic) - return false; - - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - nir_def *id = NULL; - b->cursor = nir_before_instr(instr); - - if (intr->intrinsic == nir_intrinsic_load_vertex_id) - id = load_vs_vertex_id(b, data); - else if (intr->intrinsic == nir_intrinsic_load_instance_id) - id = load_instance_id(b); - else - return false; - - assert(intr->def.bit_size == 32); - nir_def_rewrite_uses(&intr->def, id); - nir_instr_remove(instr); - return true; -} - static bool lower_gs_inputs(nir_builder *b, nir_intrinsic_instr *intr, void *data) { @@ -363,9 +314,8 @@ agx_nir_link_vs_gs(nir_shader *vs, nir_shader *gs) static nir_def * calc_unrolled_id(nir_builder *b) { - nir_def *per_instance = nir_channel(b, nir_load_num_workgroups(b), 0); - - return nir_iadd(b, nir_imul(b, load_instance_id(b), per_instance), + return nir_iadd(b, + nir_imul(b, load_instance_id(b), nir_load_num_vertices(b)), load_primitive_id(b)); } @@ -428,6 +378,26 @@ lower_gs_count_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data) } } +static bool +lower_id(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + b->cursor = nir_before_instr(&intr->instr); + + nir_def *id; + if (intr->intrinsic == nir_intrinsic_load_primitive_id) + id = load_primitive_id(b); + else if (intr->intrinsic == nir_intrinsic_load_instance_id) + id = load_instance_id(b); + else if (intr->intrinsic == nir_intrinsic_load_num_vertices) + id = nir_channel(b, nir_load_num_workgroups(b), 0); + else + return false; + + b->cursor = nir_instr_remove(&intr->instr); + nir_def_rewrite_uses(&intr->def, id); + return true; +} + /* * Create a "Geometry count" shader. This is a stripped down geometry shader * that just write its number of emitted vertices / primitives / transform @@ -452,6 +422,9 @@ agx_nir_create_geometry_count_shader(nir_shader *gs, const nir_shader *libagx, NIR_PASS_V(shader, nir_shader_intrinsics_pass, lower_gs_count_instr, nir_metadata_block_index | nir_metadata_dominance, state); + NIR_PASS_V(shader, nir_shader_intrinsics_pass, lower_id, + nir_metadata_block_index | nir_metadata_dominance, NULL); + /* Preprocess it */ UNUSED struct agx_uncompiled_shader_info info; agx_preprocess_nir(shader, libagx, false, &info); @@ -792,20 +765,6 @@ lower_gs_instr(nir_builder *b, nir_intrinsic_instr *intr, void *state) return true; } -/* - * Lower load_primitive_id to something compute-like. - */ -static bool -lower_primitive_id(nir_builder *b, nir_intrinsic_instr *intr, void *data) -{ - if (intr->intrinsic != nir_intrinsic_load_primitive_id) - return false; - - b->cursor = nir_instr_remove(&intr->instr); - nir_def_rewrite_uses(&intr->def, load_primitive_id(b)); - return true; -} - static bool collect_components(nir_builder *b, nir_intrinsic_instr *intr, void *data) { @@ -1013,15 +972,13 @@ link_libagx(nir_shader *nir, const nir_shader *libagx) void agx_nir_lower_gs(nir_shader *gs, nir_shader *vs, const nir_shader *libagx, - struct agx_ia_key *ia, bool rasterizer_discard, - nir_shader **gs_count, nir_shader **gs_copy, - nir_shader **pre_gs, enum mesa_prim *out_mode, - unsigned *out_count_words) + bool rasterizer_discard, nir_shader **gs_count, + nir_shader **gs_copy, nir_shader **pre_gs, + enum mesa_prim *out_mode, unsigned *out_count_words) { - /* Lower input assembly on the vertex shader */ - NIR_PASS_V(vs, nir_shader_instructions_pass, lower_input_assembly, - nir_metadata_block_index | nir_metadata_dominance, ia); link_libagx(vs, libagx); + NIR_PASS_V(vs, nir_lower_idiv, + &(const nir_lower_idiv_options){.allow_fp16 = true}); /* Collect output component counts so we can size the geometry output buffer * appropriately, instead of assuming everything is vec4. @@ -1058,9 +1015,6 @@ agx_nir_lower_gs(nir_shader *gs, nir_shader *vs, const nir_shader *libagx, nir_lower_gs_intrinsics_always_end_primitive | nir_lower_gs_intrinsics_count_decomposed_primitives); - NIR_PASS_V(gs, nir_shader_intrinsics_pass, lower_primitive_id, - nir_metadata_block_index | nir_metadata_dominance, NULL); - /* Clean up after all that lowering we did */ bool progress = false; do { @@ -1165,6 +1119,8 @@ agx_nir_lower_gs(nir_shader *gs, nir_shader *vs, const nir_shader *libagx, NIR_PASS_V(gs, nir_opt_sink, ~0); NIR_PASS_V(gs, nir_opt_move, ~0); + NIR_PASS_V(gs, nir_shader_intrinsics_pass, lower_id, + nir_metadata_block_index | nir_metadata_dominance, NULL); /* Create auxiliary programs */ *gs_copy = agx_nir_create_gs_copy_shader(&gs_state, outputs_rasterized(gs), @@ -1206,6 +1162,7 @@ agx_nir_gs_setup_indirect(const nir_shader *libagx, enum mesa_prim prim) MESA_SHADER_COMPUTE, &agx_nir_options, "GS indirect setup"); libagx_gs_setup_indirect(&b, nir_load_geometry_param_buffer_agx(&b), + nir_load_input_assembly_buffer_agx(&b), nir_imm_int(&b, prim)); UNUSED struct agx_uncompiled_shader_info info; diff --git a/src/asahi/lib/agx_nir_lower_gs.h b/src/asahi/lib/agx_nir_lower_gs.h index a773ae91451..1a5533edffc 100644 --- a/src/asahi/lib/agx_nir_lower_gs.h +++ b/src/asahi/lib/agx_nir_lower_gs.h @@ -12,11 +12,13 @@ struct nir_shader; struct agx_ia_key; enum mesa_prim; +void agx_nir_lower_ia(struct nir_shader *s, struct agx_ia_key *ia); + void agx_nir_lower_gs(struct nir_shader *gs, struct nir_shader *vs, - const struct nir_shader *libagx, struct agx_ia_key *ia, - bool rasterizer_discard, struct nir_shader **gs_count, - struct nir_shader **gs_copy, struct nir_shader **pre_gs, - enum mesa_prim *out_mode, unsigned *out_count_words); + const struct nir_shader *libagx, bool rasterizer_discard, + struct nir_shader **gs_count, struct nir_shader **gs_copy, + struct nir_shader **pre_gs, enum mesa_prim *out_mode, + unsigned *out_count_words); struct nir_shader *agx_nir_prefix_sum_gs(const struct nir_shader *libagx, unsigned words); diff --git a/src/asahi/lib/agx_nir_lower_ia.c b/src/asahi/lib/agx_nir_lower_ia.c new file mode 100644 index 00000000000..fb8061f2e1c --- /dev/null +++ b/src/asahi/lib/agx_nir_lower_ia.c @@ -0,0 +1,61 @@ +/* + * Copyright 2023 Valve Corporation + * SPDX-License-Identifier: MIT + */ + +#include "asahi/compiler/agx_compile.h" +#include "compiler/nir/nir_builder.h" +#include "shaders/geometry.h" +#include "agx_nir_lower_gs.h" +#include "libagx_shaders.h" +#include "nir.h" +#include "nir_builder_opcodes.h" + +static nir_def * +load_vertex_id(nir_builder *b, struct agx_ia_key *key) +{ + /* Tessellate by primitive mode */ + nir_def *id = libagx_vertex_id_for_topology( + b, nir_imm_int(b, key->mode), nir_imm_bool(b, key->flatshade_first), + nir_load_primitive_id(b), nir_load_vertex_id_in_primitive_agx(b), + nir_load_num_vertices(b)); + + /* If drawing with an index buffer, pull the vertex ID. Otherwise, the + * vertex ID is just the index as-is. + */ + if (key->index_size) { + nir_def *address = + libagx_index_buffer(b, nir_load_input_assembly_buffer_agx(b), id, + nir_imm_int(b, key->index_size)); + + nir_def *index = nir_load_global_constant(b, address, key->index_size, 1, + key->index_size * 8); + + id = nir_u2uN(b, index, id->bit_size); + } + + /* Add the "start", either an index bias or a base vertex. This must happen + * after indexing for proper index bias behaviour. + */ + return nir_iadd(b, id, nir_load_first_vertex(b)); +} + +static bool +lower_vertex_id(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + if (intr->intrinsic != nir_intrinsic_load_vertex_id) + return false; + + b->cursor = nir_instr_remove(&intr->instr); + assert(intr->def.bit_size == 32); + nir_def_rewrite_uses(&intr->def, load_vertex_id(b, data)); + return true; +} + +void +agx_nir_lower_ia(nir_shader *s, struct agx_ia_key *ia) +{ + nir_shader_intrinsics_pass(s, lower_vertex_id, + nir_metadata_block_index | nir_metadata_dominance, + ia); +} diff --git a/src/asahi/lib/meson.build b/src/asahi/lib/meson.build index 99a84325115..ebfb5a83125 100644 --- a/src/asahi/lib/meson.build +++ b/src/asahi/lib/meson.build @@ -14,6 +14,7 @@ libasahi_lib_files = files( 'agx_tilebuffer.c', 'agx_nir_lower_alpha.c', 'agx_nir_lower_gs.c', + 'agx_nir_lower_ia.c', 'agx_nir_lower_msaa.c', 'agx_nir_lower_sample_intrinsics.c', 'agx_nir_lower_tilebuffer.c', diff --git a/src/asahi/lib/shaders/geometry.cl b/src/asahi/lib/shaders/geometry.cl index 472e36f857f..d5cb6b42843 100644 --- a/src/asahi/lib/shaders/geometry.cl +++ b/src/asahi/lib/shaders/geometry.cl @@ -103,6 +103,13 @@ libagx_vertex_id_for_topology(enum mesa_prim mode, bool flatshade_first, } } +uintptr_t +libagx_index_buffer(constant struct agx_ia_state *p, uint id, + uint index_size) +{ + return (uintptr_t)&p->index_buffer[id * index_size]; +} + uint libagx_setup_xfb_buffer(global struct agx_geometry_params *p, uint i) { @@ -190,19 +197,30 @@ libagx_build_gs_draw(global struct agx_geometry_params *p, bool indexed, } } -void -libagx_gs_setup_indirect(global struct agx_geometry_params *p, - enum mesa_prim mode) +uint2 +process_draw(global uint *draw, enum mesa_prim mode) { /* Regardless of indexing being enabled, this holds */ - uint vertex_count = p->input_indirect_desc[0]; - uint instance_count = p->input_indirect_desc[1]; + uint vertex_count = draw[0]; + uint instance_count = draw[1]; uint prim_per_instance = u_decomposed_prims_for_vertices(mode, vertex_count); - p->input_primitives = prim_per_instance * instance_count; + return (uint2)(prim_per_instance, instance_count); +} - p->gs_grid[0] = prim_per_instance; - p->gs_grid[1] = instance_count; +void +libagx_gs_setup_indirect(global struct agx_geometry_params *p, + global struct agx_ia_state *ia, enum mesa_prim mode) +{ + /* Determine the (primitives, instances) grid size. */ + uint2 draw = process_draw(p->input_indirect_desc, mode); + + /* There are primitives*instances primitives total */ + p->input_primitives = draw.x * draw.y; + + /* Invoke as (primitives, instances, 1) */ + p->gs_grid[0] = draw.x; + p->gs_grid[1] = draw.y; p->gs_grid[2] = 1; /* If indexing is enabled, the third word is the offset into the index buffer @@ -210,8 +228,8 @@ libagx_gs_setup_indirect(global struct agx_geometry_params *p, * indirect draw, the hardware would do this for us, but for software input * assembly we need to do it ourselves. */ - if (p->input_index_buffer) { - p->input_index_buffer += p->input_indirect_desc[2] * p->index_size_B; + if (ia->index_buffer) { + ia->index_buffer += p->input_indirect_desc[2] * ia->index_size_B; } /* We may need to allocate a GS count buffer, do so now */ diff --git a/src/asahi/lib/shaders/geometry.h b/src/asahi/lib/shaders/geometry.h index f8e283f2608..593a38b1d0f 100644 --- a/src/asahi/lib/shaders/geometry.h +++ b/src/asahi/lib/shaders/geometry.h @@ -32,6 +32,14 @@ struct agx_ia_key { bool flatshade_first; }; +struct agx_ia_state { + /* Input: index buffer if present. */ + GLOBAL(uchar) index_buffer; + + /* The index size (1, 2, 4) or 0 if drawing without an index buffer. */ + uint8_t index_size_B; +} PACKED; + /* Packed geometry state buffer */ struct agx_geometry_state { /* Heap to allocate from, in either direction. By convention, the top is used @@ -81,11 +89,6 @@ struct agx_geometry_params { */ uint32_t xfb_prims[MAX_VERTEX_STREAMS]; - /* Address of input index buffer for an indexed draw (this includes - * tessellation - it's the index buffer coming into the geometry stage). - */ - GLOBAL(uchar) input_index_buffer; - /* Address of input indirect buffer for indirect GS draw */ GLOBAL(uint) input_indirect_desc; @@ -104,12 +107,6 @@ struct agx_geometry_params { * allocating counts. */ uint32_t count_buffer_stride; - - /* Size of a single input index in bytes, or 0 if indexing is disabled. - * - * index_size_B == 0 <==> input_index_buffer == NULL - */ - uint32_t index_size_B; } PACKED; #endif diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 8ac4ee3f5e6..48c7a28c527 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1855,6 +1855,9 @@ barrier("fence_mem_to_tex_agx") # act like render target writes, in conjunction with fragment interlock. barrier("fence_pbe_to_tex_pixel_agx") +# Address of state for AGX input assembly lowering for geometry/tessellation +system_value("input_assembly_buffer_agx", 1, bit_sizes=[64]) + # Address of the parameter buffer for AGX geometry shaders system_value("geometry_param_buffer_agx", 1, bit_sizes=[64]) diff --git a/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c b/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c index ebd743c5b78..b9c102faf3a 100644 --- a/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c +++ b/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c @@ -168,6 +168,8 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr) return load_sysval_root(b, 1, 32, &u->draw_id); case nir_intrinsic_load_layer_id_written_agx: return load_sysval_root(b, 1, 16, &u->layer_id_written); + case nir_intrinsic_load_input_assembly_buffer_agx: + return load_sysval_root(b, 1, 64, &u->input_assembly); case nir_intrinsic_load_geometry_param_buffer_agx: return load_sysval_root(b, 1, 64, &u->geometry_params); default: diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index c748a741041..45708253cd9 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -1716,13 +1716,16 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx, NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL); + /* Lower IA before VS sysvals to correctly handle indirect multidraws */ + agx_nir_lower_ia(vs, &key->ia); + /* Lower VS sysvals before it's merged in, so we access the correct shader * stage for UBOs etc. */ NIR_PASS_V(vs, agx_nir_lower_sysvals); /* Link VS with GS */ - NIR_PASS_V(nir, agx_nir_lower_gs, vs, dev->libagx, &key->ia, + NIR_PASS_V(nir, agx_nir_lower_gs, vs, dev->libagx, key->rasterizer_discard, &gs_count, &gs_copy, &pre_gs, &gs_out_prim, &gs_out_count_words); ralloc_free(vs); @@ -3394,11 +3397,18 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer, const struct pipe_draw_start_count_bias *draw, const struct pipe_draw_indirect_info *indirect) { + /* XXX move me */ + struct agx_ia_state ia = { + .index_buffer = input_index_buffer, + .index_size_B = info->index_size, + }; + + batch->uniforms.input_assembly = + agx_pool_upload_aligned(&batch->pool, &ia, sizeof(ia), 8); + struct agx_geometry_params params = { .state = agx_batch_geometry_state(batch), .indirect_desc = batch->geom_indirect, - .input_index_buffer = input_index_buffer, - .index_size_B = info->index_size, }; for (unsigned i = 0; i < ARRAY_SIZE(batch->ctx->streamout.targets); ++i) { diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h index ac5c0adac67..f9c92677931 100644 --- a/src/gallium/drivers/asahi/agx_state.h +++ b/src/gallium/drivers/asahi/agx_state.h @@ -99,6 +99,9 @@ struct PACKED agx_draw_uniforms { /* Vertex buffer object bases, if present */ uint64_t vbo_base[PIPE_MAX_ATTRIBS]; + /* Address of input assembly buffer if geom/tess is used, else 0 */ + uint64_t input_assembly; + /* Address of geometry param buffer if geometry shaders are used, else 0 */ uint64_t geometry_params;