asahi: rework GS input assembly

in prep for tessellation (which will share the IA lowering), and for multidraw
indirect (which greatly complicates IA lowering with geom/tess).

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26614>
This commit is contained in:
Alyssa Rosenzweig
2023-11-17 13:49:47 -04:00
parent a816350d95
commit 5987e47a29
10 changed files with 158 additions and 104 deletions
+33 -76
View File
@@ -194,55 +194,6 @@ load_instance_id(nir_builder *b)
return nir_channel(b, nir_load_global_invocation_id(b, 32), 1);
}
static nir_def *
load_vs_vertex_id(nir_builder *b, struct agx_ia_key *key)
{
/* Tessellate by primitive mode */
nir_def *id = libagx_vertex_id_for_topology(
b, nir_imm_int(b, key->mode), nir_imm_bool(b, key->flatshade_first),
load_primitive_id(b), nir_load_vertex_id_in_primitive_agx(b),
nir_channel(b, nir_load_num_workgroups(b), 0));
/* If drawing with an index buffer, pull the vertex ID. */
if (key->index_size) {
nir_def *index_buffer = load_geometry_param(b, input_index_buffer);
nir_def *offset = nir_imul_imm(b, id, key->index_size);
nir_def *address = nir_iadd(b, index_buffer, nir_u2u64(b, offset));
nir_def *index = nir_load_global_constant(b, address, key->index_size, 1,
key->index_size * 8);
id = nir_u2uN(b, index, id->bit_size);
}
/* Add the "start", either an index bias or a base vertex. This must happen
* after indexing for proper index bias behaviour.
*/
return nir_iadd(b, id, nir_load_first_vertex(b));
}
static bool
lower_input_assembly(nir_builder *b, nir_instr *instr, void *data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
nir_def *id = NULL;
b->cursor = nir_before_instr(instr);
if (intr->intrinsic == nir_intrinsic_load_vertex_id)
id = load_vs_vertex_id(b, data);
else if (intr->intrinsic == nir_intrinsic_load_instance_id)
id = load_instance_id(b);
else
return false;
assert(intr->def.bit_size == 32);
nir_def_rewrite_uses(&intr->def, id);
nir_instr_remove(instr);
return true;
}
static bool
lower_gs_inputs(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
@@ -363,9 +314,8 @@ agx_nir_link_vs_gs(nir_shader *vs, nir_shader *gs)
static nir_def *
calc_unrolled_id(nir_builder *b)
{
nir_def *per_instance = nir_channel(b, nir_load_num_workgroups(b), 0);
return nir_iadd(b, nir_imul(b, load_instance_id(b), per_instance),
return nir_iadd(b,
nir_imul(b, load_instance_id(b), nir_load_num_vertices(b)),
load_primitive_id(b));
}
@@ -428,6 +378,26 @@ lower_gs_count_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
}
}
static bool
lower_id(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
b->cursor = nir_before_instr(&intr->instr);
nir_def *id;
if (intr->intrinsic == nir_intrinsic_load_primitive_id)
id = load_primitive_id(b);
else if (intr->intrinsic == nir_intrinsic_load_instance_id)
id = load_instance_id(b);
else if (intr->intrinsic == nir_intrinsic_load_num_vertices)
id = nir_channel(b, nir_load_num_workgroups(b), 0);
else
return false;
b->cursor = nir_instr_remove(&intr->instr);
nir_def_rewrite_uses(&intr->def, id);
return true;
}
/*
* Create a "Geometry count" shader. This is a stripped down geometry shader
* that just write its number of emitted vertices / primitives / transform
@@ -452,6 +422,9 @@ agx_nir_create_geometry_count_shader(nir_shader *gs, const nir_shader *libagx,
NIR_PASS_V(shader, nir_shader_intrinsics_pass, lower_gs_count_instr,
nir_metadata_block_index | nir_metadata_dominance, state);
NIR_PASS_V(shader, nir_shader_intrinsics_pass, lower_id,
nir_metadata_block_index | nir_metadata_dominance, NULL);
/* Preprocess it */
UNUSED struct agx_uncompiled_shader_info info;
agx_preprocess_nir(shader, libagx, false, &info);
@@ -792,20 +765,6 @@ lower_gs_instr(nir_builder *b, nir_intrinsic_instr *intr, void *state)
return true;
}
/*
* Lower load_primitive_id to something compute-like.
*/
static bool
lower_primitive_id(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
if (intr->intrinsic != nir_intrinsic_load_primitive_id)
return false;
b->cursor = nir_instr_remove(&intr->instr);
nir_def_rewrite_uses(&intr->def, load_primitive_id(b));
return true;
}
static bool
collect_components(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
@@ -1013,15 +972,13 @@ link_libagx(nir_shader *nir, const nir_shader *libagx)
void
agx_nir_lower_gs(nir_shader *gs, nir_shader *vs, const nir_shader *libagx,
struct agx_ia_key *ia, bool rasterizer_discard,
nir_shader **gs_count, nir_shader **gs_copy,
nir_shader **pre_gs, enum mesa_prim *out_mode,
unsigned *out_count_words)
bool rasterizer_discard, nir_shader **gs_count,
nir_shader **gs_copy, nir_shader **pre_gs,
enum mesa_prim *out_mode, unsigned *out_count_words)
{
/* Lower input assembly on the vertex shader */
NIR_PASS_V(vs, nir_shader_instructions_pass, lower_input_assembly,
nir_metadata_block_index | nir_metadata_dominance, ia);
link_libagx(vs, libagx);
NIR_PASS_V(vs, nir_lower_idiv,
&(const nir_lower_idiv_options){.allow_fp16 = true});
/* Collect output component counts so we can size the geometry output buffer
* appropriately, instead of assuming everything is vec4.
@@ -1058,9 +1015,6 @@ agx_nir_lower_gs(nir_shader *gs, nir_shader *vs, const nir_shader *libagx,
nir_lower_gs_intrinsics_always_end_primitive |
nir_lower_gs_intrinsics_count_decomposed_primitives);
NIR_PASS_V(gs, nir_shader_intrinsics_pass, lower_primitive_id,
nir_metadata_block_index | nir_metadata_dominance, NULL);
/* Clean up after all that lowering we did */
bool progress = false;
do {
@@ -1165,6 +1119,8 @@ agx_nir_lower_gs(nir_shader *gs, nir_shader *vs, const nir_shader *libagx,
NIR_PASS_V(gs, nir_opt_sink, ~0);
NIR_PASS_V(gs, nir_opt_move, ~0);
NIR_PASS_V(gs, nir_shader_intrinsics_pass, lower_id,
nir_metadata_block_index | nir_metadata_dominance, NULL);
/* Create auxiliary programs */
*gs_copy = agx_nir_create_gs_copy_shader(&gs_state, outputs_rasterized(gs),
@@ -1206,6 +1162,7 @@ agx_nir_gs_setup_indirect(const nir_shader *libagx, enum mesa_prim prim)
MESA_SHADER_COMPUTE, &agx_nir_options, "GS indirect setup");
libagx_gs_setup_indirect(&b, nir_load_geometry_param_buffer_agx(&b),
nir_load_input_assembly_buffer_agx(&b),
nir_imm_int(&b, prim));
UNUSED struct agx_uncompiled_shader_info info;
+6 -4
View File
@@ -12,11 +12,13 @@ struct nir_shader;
struct agx_ia_key;
enum mesa_prim;
void agx_nir_lower_ia(struct nir_shader *s, struct agx_ia_key *ia);
void agx_nir_lower_gs(struct nir_shader *gs, struct nir_shader *vs,
const struct nir_shader *libagx, struct agx_ia_key *ia,
bool rasterizer_discard, struct nir_shader **gs_count,
struct nir_shader **gs_copy, struct nir_shader **pre_gs,
enum mesa_prim *out_mode, unsigned *out_count_words);
const struct nir_shader *libagx, bool rasterizer_discard,
struct nir_shader **gs_count, struct nir_shader **gs_copy,
struct nir_shader **pre_gs, enum mesa_prim *out_mode,
unsigned *out_count_words);
struct nir_shader *agx_nir_prefix_sum_gs(const struct nir_shader *libagx,
unsigned words);
+61
View File
@@ -0,0 +1,61 @@
/*
* Copyright 2023 Valve Corporation
* SPDX-License-Identifier: MIT
*/
#include "asahi/compiler/agx_compile.h"
#include "compiler/nir/nir_builder.h"
#include "shaders/geometry.h"
#include "agx_nir_lower_gs.h"
#include "libagx_shaders.h"
#include "nir.h"
#include "nir_builder_opcodes.h"
static nir_def *
load_vertex_id(nir_builder *b, struct agx_ia_key *key)
{
/* Tessellate by primitive mode */
nir_def *id = libagx_vertex_id_for_topology(
b, nir_imm_int(b, key->mode), nir_imm_bool(b, key->flatshade_first),
nir_load_primitive_id(b), nir_load_vertex_id_in_primitive_agx(b),
nir_load_num_vertices(b));
/* If drawing with an index buffer, pull the vertex ID. Otherwise, the
* vertex ID is just the index as-is.
*/
if (key->index_size) {
nir_def *address =
libagx_index_buffer(b, nir_load_input_assembly_buffer_agx(b), id,
nir_imm_int(b, key->index_size));
nir_def *index = nir_load_global_constant(b, address, key->index_size, 1,
key->index_size * 8);
id = nir_u2uN(b, index, id->bit_size);
}
/* Add the "start", either an index bias or a base vertex. This must happen
* after indexing for proper index bias behaviour.
*/
return nir_iadd(b, id, nir_load_first_vertex(b));
}
static bool
lower_vertex_id(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
if (intr->intrinsic != nir_intrinsic_load_vertex_id)
return false;
b->cursor = nir_instr_remove(&intr->instr);
assert(intr->def.bit_size == 32);
nir_def_rewrite_uses(&intr->def, load_vertex_id(b, data));
return true;
}
void
agx_nir_lower_ia(nir_shader *s, struct agx_ia_key *ia)
{
nir_shader_intrinsics_pass(s, lower_vertex_id,
nir_metadata_block_index | nir_metadata_dominance,
ia);
}
+1
View File
@@ -14,6 +14,7 @@ libasahi_lib_files = files(
'agx_tilebuffer.c',
'agx_nir_lower_alpha.c',
'agx_nir_lower_gs.c',
'agx_nir_lower_ia.c',
'agx_nir_lower_msaa.c',
'agx_nir_lower_sample_intrinsics.c',
'agx_nir_lower_tilebuffer.c',
+28 -10
View File
@@ -103,6 +103,13 @@ libagx_vertex_id_for_topology(enum mesa_prim mode, bool flatshade_first,
}
}
uintptr_t
libagx_index_buffer(constant struct agx_ia_state *p, uint id,
uint index_size)
{
return (uintptr_t)&p->index_buffer[id * index_size];
}
uint
libagx_setup_xfb_buffer(global struct agx_geometry_params *p, uint i)
{
@@ -190,19 +197,30 @@ libagx_build_gs_draw(global struct agx_geometry_params *p, bool indexed,
}
}
void
libagx_gs_setup_indirect(global struct agx_geometry_params *p,
enum mesa_prim mode)
uint2
process_draw(global uint *draw, enum mesa_prim mode)
{
/* Regardless of indexing being enabled, this holds */
uint vertex_count = p->input_indirect_desc[0];
uint instance_count = p->input_indirect_desc[1];
uint vertex_count = draw[0];
uint instance_count = draw[1];
uint prim_per_instance = u_decomposed_prims_for_vertices(mode, vertex_count);
p->input_primitives = prim_per_instance * instance_count;
return (uint2)(prim_per_instance, instance_count);
}
p->gs_grid[0] = prim_per_instance;
p->gs_grid[1] = instance_count;
void
libagx_gs_setup_indirect(global struct agx_geometry_params *p,
global struct agx_ia_state *ia, enum mesa_prim mode)
{
/* Determine the (primitives, instances) grid size. */
uint2 draw = process_draw(p->input_indirect_desc, mode);
/* There are primitives*instances primitives total */
p->input_primitives = draw.x * draw.y;
/* Invoke as (primitives, instances, 1) */
p->gs_grid[0] = draw.x;
p->gs_grid[1] = draw.y;
p->gs_grid[2] = 1;
/* If indexing is enabled, the third word is the offset into the index buffer
@@ -210,8 +228,8 @@ libagx_gs_setup_indirect(global struct agx_geometry_params *p,
* indirect draw, the hardware would do this for us, but for software input
* assembly we need to do it ourselves.
*/
if (p->input_index_buffer) {
p->input_index_buffer += p->input_indirect_desc[2] * p->index_size_B;
if (ia->index_buffer) {
ia->index_buffer += p->input_indirect_desc[2] * ia->index_size_B;
}
/* We may need to allocate a GS count buffer, do so now */
+8 -11
View File
@@ -32,6 +32,14 @@ struct agx_ia_key {
bool flatshade_first;
};
struct agx_ia_state {
/* Input: index buffer if present. */
GLOBAL(uchar) index_buffer;
/* The index size (1, 2, 4) or 0 if drawing without an index buffer. */
uint8_t index_size_B;
} PACKED;
/* Packed geometry state buffer */
struct agx_geometry_state {
/* Heap to allocate from, in either direction. By convention, the top is used
@@ -81,11 +89,6 @@ struct agx_geometry_params {
*/
uint32_t xfb_prims[MAX_VERTEX_STREAMS];
/* Address of input index buffer for an indexed draw (this includes
* tessellation - it's the index buffer coming into the geometry stage).
*/
GLOBAL(uchar) input_index_buffer;
/* Address of input indirect buffer for indirect GS draw */
GLOBAL(uint) input_indirect_desc;
@@ -104,12 +107,6 @@ struct agx_geometry_params {
* allocating counts.
*/
uint32_t count_buffer_stride;
/* Size of a single input index in bytes, or 0 if indexing is disabled.
*
* index_size_B == 0 <==> input_index_buffer == NULL
*/
uint32_t index_size_B;
} PACKED;
#endif
+3
View File
@@ -1855,6 +1855,9 @@ barrier("fence_mem_to_tex_agx")
# act like render target writes, in conjunction with fragment interlock.
barrier("fence_pbe_to_tex_pixel_agx")
# Address of state for AGX input assembly lowering for geometry/tessellation
system_value("input_assembly_buffer_agx", 1, bit_sizes=[64])
# Address of the parameter buffer for AGX geometry shaders
system_value("geometry_param_buffer_agx", 1, bit_sizes=[64])
@@ -168,6 +168,8 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr)
return load_sysval_root(b, 1, 32, &u->draw_id);
case nir_intrinsic_load_layer_id_written_agx:
return load_sysval_root(b, 1, 16, &u->layer_id_written);
case nir_intrinsic_load_input_assembly_buffer_agx:
return load_sysval_root(b, 1, 64, &u->input_assembly);
case nir_intrinsic_load_geometry_param_buffer_agx:
return load_sysval_root(b, 1, 64, &u->geometry_params);
default:
+13 -3
View File
@@ -1716,13 +1716,16 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
/* Lower IA before VS sysvals to correctly handle indirect multidraws */
agx_nir_lower_ia(vs, &key->ia);
/* Lower VS sysvals before it's merged in, so we access the correct shader
* stage for UBOs etc.
*/
NIR_PASS_V(vs, agx_nir_lower_sysvals);
/* Link VS with GS */
NIR_PASS_V(nir, agx_nir_lower_gs, vs, dev->libagx, &key->ia,
NIR_PASS_V(nir, agx_nir_lower_gs, vs, dev->libagx,
key->rasterizer_discard, &gs_count, &gs_copy, &pre_gs,
&gs_out_prim, &gs_out_count_words);
ralloc_free(vs);
@@ -3394,11 +3397,18 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer,
const struct pipe_draw_start_count_bias *draw,
const struct pipe_draw_indirect_info *indirect)
{
/* XXX move me */
struct agx_ia_state ia = {
.index_buffer = input_index_buffer,
.index_size_B = info->index_size,
};
batch->uniforms.input_assembly =
agx_pool_upload_aligned(&batch->pool, &ia, sizeof(ia), 8);
struct agx_geometry_params params = {
.state = agx_batch_geometry_state(batch),
.indirect_desc = batch->geom_indirect,
.input_index_buffer = input_index_buffer,
.index_size_B = info->index_size,
};
for (unsigned i = 0; i < ARRAY_SIZE(batch->ctx->streamout.targets); ++i) {
+3
View File
@@ -99,6 +99,9 @@ struct PACKED agx_draw_uniforms {
/* Vertex buffer object bases, if present */
uint64_t vbo_base[PIPE_MAX_ATTRIBS];
/* Address of input assembly buffer if geom/tess is used, else 0 */
uint64_t input_assembly;
/* Address of geometry param buffer if geometry shaders are used, else 0 */
uint64_t geometry_params;