asahi: rework GS input assembly
in prep for tessellation (which will share the IA lowering), and for multidraw indirect (which greatly complicates IA lowering with geom/tess). Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26614>
This commit is contained in:
@@ -194,55 +194,6 @@ load_instance_id(nir_builder *b)
|
||||
return nir_channel(b, nir_load_global_invocation_id(b, 32), 1);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
load_vs_vertex_id(nir_builder *b, struct agx_ia_key *key)
|
||||
{
|
||||
/* Tessellate by primitive mode */
|
||||
nir_def *id = libagx_vertex_id_for_topology(
|
||||
b, nir_imm_int(b, key->mode), nir_imm_bool(b, key->flatshade_first),
|
||||
load_primitive_id(b), nir_load_vertex_id_in_primitive_agx(b),
|
||||
nir_channel(b, nir_load_num_workgroups(b), 0));
|
||||
|
||||
/* If drawing with an index buffer, pull the vertex ID. */
|
||||
if (key->index_size) {
|
||||
nir_def *index_buffer = load_geometry_param(b, input_index_buffer);
|
||||
nir_def *offset = nir_imul_imm(b, id, key->index_size);
|
||||
nir_def *address = nir_iadd(b, index_buffer, nir_u2u64(b, offset));
|
||||
nir_def *index = nir_load_global_constant(b, address, key->index_size, 1,
|
||||
key->index_size * 8);
|
||||
|
||||
id = nir_u2uN(b, index, id->bit_size);
|
||||
}
|
||||
|
||||
/* Add the "start", either an index bias or a base vertex. This must happen
|
||||
* after indexing for proper index bias behaviour.
|
||||
*/
|
||||
return nir_iadd(b, id, nir_load_first_vertex(b));
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_input_assembly(nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
nir_def *id = NULL;
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_load_vertex_id)
|
||||
id = load_vs_vertex_id(b, data);
|
||||
else if (intr->intrinsic == nir_intrinsic_load_instance_id)
|
||||
id = load_instance_id(b);
|
||||
else
|
||||
return false;
|
||||
|
||||
assert(intr->def.bit_size == 32);
|
||||
nir_def_rewrite_uses(&intr->def, id);
|
||||
nir_instr_remove(instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_gs_inputs(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
{
|
||||
@@ -363,9 +314,8 @@ agx_nir_link_vs_gs(nir_shader *vs, nir_shader *gs)
|
||||
static nir_def *
|
||||
calc_unrolled_id(nir_builder *b)
|
||||
{
|
||||
nir_def *per_instance = nir_channel(b, nir_load_num_workgroups(b), 0);
|
||||
|
||||
return nir_iadd(b, nir_imul(b, load_instance_id(b), per_instance),
|
||||
return nir_iadd(b,
|
||||
nir_imul(b, load_instance_id(b), nir_load_num_vertices(b)),
|
||||
load_primitive_id(b));
|
||||
}
|
||||
|
||||
@@ -428,6 +378,26 @@ lower_gs_count_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_id(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
{
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
nir_def *id;
|
||||
if (intr->intrinsic == nir_intrinsic_load_primitive_id)
|
||||
id = load_primitive_id(b);
|
||||
else if (intr->intrinsic == nir_intrinsic_load_instance_id)
|
||||
id = load_instance_id(b);
|
||||
else if (intr->intrinsic == nir_intrinsic_load_num_vertices)
|
||||
id = nir_channel(b, nir_load_num_workgroups(b), 0);
|
||||
else
|
||||
return false;
|
||||
|
||||
b->cursor = nir_instr_remove(&intr->instr);
|
||||
nir_def_rewrite_uses(&intr->def, id);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create a "Geometry count" shader. This is a stripped down geometry shader
|
||||
* that just write its number of emitted vertices / primitives / transform
|
||||
@@ -452,6 +422,9 @@ agx_nir_create_geometry_count_shader(nir_shader *gs, const nir_shader *libagx,
|
||||
NIR_PASS_V(shader, nir_shader_intrinsics_pass, lower_gs_count_instr,
|
||||
nir_metadata_block_index | nir_metadata_dominance, state);
|
||||
|
||||
NIR_PASS_V(shader, nir_shader_intrinsics_pass, lower_id,
|
||||
nir_metadata_block_index | nir_metadata_dominance, NULL);
|
||||
|
||||
/* Preprocess it */
|
||||
UNUSED struct agx_uncompiled_shader_info info;
|
||||
agx_preprocess_nir(shader, libagx, false, &info);
|
||||
@@ -792,20 +765,6 @@ lower_gs_instr(nir_builder *b, nir_intrinsic_instr *intr, void *state)
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Lower load_primitive_id to something compute-like.
|
||||
*/
|
||||
static bool
|
||||
lower_primitive_id(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
{
|
||||
if (intr->intrinsic != nir_intrinsic_load_primitive_id)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_instr_remove(&intr->instr);
|
||||
nir_def_rewrite_uses(&intr->def, load_primitive_id(b));
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
collect_components(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
{
|
||||
@@ -1013,15 +972,13 @@ link_libagx(nir_shader *nir, const nir_shader *libagx)
|
||||
|
||||
void
|
||||
agx_nir_lower_gs(nir_shader *gs, nir_shader *vs, const nir_shader *libagx,
|
||||
struct agx_ia_key *ia, bool rasterizer_discard,
|
||||
nir_shader **gs_count, nir_shader **gs_copy,
|
||||
nir_shader **pre_gs, enum mesa_prim *out_mode,
|
||||
unsigned *out_count_words)
|
||||
bool rasterizer_discard, nir_shader **gs_count,
|
||||
nir_shader **gs_copy, nir_shader **pre_gs,
|
||||
enum mesa_prim *out_mode, unsigned *out_count_words)
|
||||
{
|
||||
/* Lower input assembly on the vertex shader */
|
||||
NIR_PASS_V(vs, nir_shader_instructions_pass, lower_input_assembly,
|
||||
nir_metadata_block_index | nir_metadata_dominance, ia);
|
||||
link_libagx(vs, libagx);
|
||||
NIR_PASS_V(vs, nir_lower_idiv,
|
||||
&(const nir_lower_idiv_options){.allow_fp16 = true});
|
||||
|
||||
/* Collect output component counts so we can size the geometry output buffer
|
||||
* appropriately, instead of assuming everything is vec4.
|
||||
@@ -1058,9 +1015,6 @@ agx_nir_lower_gs(nir_shader *gs, nir_shader *vs, const nir_shader *libagx,
|
||||
nir_lower_gs_intrinsics_always_end_primitive |
|
||||
nir_lower_gs_intrinsics_count_decomposed_primitives);
|
||||
|
||||
NIR_PASS_V(gs, nir_shader_intrinsics_pass, lower_primitive_id,
|
||||
nir_metadata_block_index | nir_metadata_dominance, NULL);
|
||||
|
||||
/* Clean up after all that lowering we did */
|
||||
bool progress = false;
|
||||
do {
|
||||
@@ -1165,6 +1119,8 @@ agx_nir_lower_gs(nir_shader *gs, nir_shader *vs, const nir_shader *libagx,
|
||||
|
||||
NIR_PASS_V(gs, nir_opt_sink, ~0);
|
||||
NIR_PASS_V(gs, nir_opt_move, ~0);
|
||||
NIR_PASS_V(gs, nir_shader_intrinsics_pass, lower_id,
|
||||
nir_metadata_block_index | nir_metadata_dominance, NULL);
|
||||
|
||||
/* Create auxiliary programs */
|
||||
*gs_copy = agx_nir_create_gs_copy_shader(&gs_state, outputs_rasterized(gs),
|
||||
@@ -1206,6 +1162,7 @@ agx_nir_gs_setup_indirect(const nir_shader *libagx, enum mesa_prim prim)
|
||||
MESA_SHADER_COMPUTE, &agx_nir_options, "GS indirect setup");
|
||||
|
||||
libagx_gs_setup_indirect(&b, nir_load_geometry_param_buffer_agx(&b),
|
||||
nir_load_input_assembly_buffer_agx(&b),
|
||||
nir_imm_int(&b, prim));
|
||||
|
||||
UNUSED struct agx_uncompiled_shader_info info;
|
||||
|
||||
@@ -12,11 +12,13 @@ struct nir_shader;
|
||||
struct agx_ia_key;
|
||||
enum mesa_prim;
|
||||
|
||||
void agx_nir_lower_ia(struct nir_shader *s, struct agx_ia_key *ia);
|
||||
|
||||
void agx_nir_lower_gs(struct nir_shader *gs, struct nir_shader *vs,
|
||||
const struct nir_shader *libagx, struct agx_ia_key *ia,
|
||||
bool rasterizer_discard, struct nir_shader **gs_count,
|
||||
struct nir_shader **gs_copy, struct nir_shader **pre_gs,
|
||||
enum mesa_prim *out_mode, unsigned *out_count_words);
|
||||
const struct nir_shader *libagx, bool rasterizer_discard,
|
||||
struct nir_shader **gs_count, struct nir_shader **gs_copy,
|
||||
struct nir_shader **pre_gs, enum mesa_prim *out_mode,
|
||||
unsigned *out_count_words);
|
||||
|
||||
struct nir_shader *agx_nir_prefix_sum_gs(const struct nir_shader *libagx,
|
||||
unsigned words);
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
* Copyright 2023 Valve Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "asahi/compiler/agx_compile.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "shaders/geometry.h"
|
||||
#include "agx_nir_lower_gs.h"
|
||||
#include "libagx_shaders.h"
|
||||
#include "nir.h"
|
||||
#include "nir_builder_opcodes.h"
|
||||
|
||||
static nir_def *
|
||||
load_vertex_id(nir_builder *b, struct agx_ia_key *key)
|
||||
{
|
||||
/* Tessellate by primitive mode */
|
||||
nir_def *id = libagx_vertex_id_for_topology(
|
||||
b, nir_imm_int(b, key->mode), nir_imm_bool(b, key->flatshade_first),
|
||||
nir_load_primitive_id(b), nir_load_vertex_id_in_primitive_agx(b),
|
||||
nir_load_num_vertices(b));
|
||||
|
||||
/* If drawing with an index buffer, pull the vertex ID. Otherwise, the
|
||||
* vertex ID is just the index as-is.
|
||||
*/
|
||||
if (key->index_size) {
|
||||
nir_def *address =
|
||||
libagx_index_buffer(b, nir_load_input_assembly_buffer_agx(b), id,
|
||||
nir_imm_int(b, key->index_size));
|
||||
|
||||
nir_def *index = nir_load_global_constant(b, address, key->index_size, 1,
|
||||
key->index_size * 8);
|
||||
|
||||
id = nir_u2uN(b, index, id->bit_size);
|
||||
}
|
||||
|
||||
/* Add the "start", either an index bias or a base vertex. This must happen
|
||||
* after indexing for proper index bias behaviour.
|
||||
*/
|
||||
return nir_iadd(b, id, nir_load_first_vertex(b));
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_vertex_id(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
{
|
||||
if (intr->intrinsic != nir_intrinsic_load_vertex_id)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_instr_remove(&intr->instr);
|
||||
assert(intr->def.bit_size == 32);
|
||||
nir_def_rewrite_uses(&intr->def, load_vertex_id(b, data));
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
agx_nir_lower_ia(nir_shader *s, struct agx_ia_key *ia)
|
||||
{
|
||||
nir_shader_intrinsics_pass(s, lower_vertex_id,
|
||||
nir_metadata_block_index | nir_metadata_dominance,
|
||||
ia);
|
||||
}
|
||||
@@ -14,6 +14,7 @@ libasahi_lib_files = files(
|
||||
'agx_tilebuffer.c',
|
||||
'agx_nir_lower_alpha.c',
|
||||
'agx_nir_lower_gs.c',
|
||||
'agx_nir_lower_ia.c',
|
||||
'agx_nir_lower_msaa.c',
|
||||
'agx_nir_lower_sample_intrinsics.c',
|
||||
'agx_nir_lower_tilebuffer.c',
|
||||
|
||||
@@ -103,6 +103,13 @@ libagx_vertex_id_for_topology(enum mesa_prim mode, bool flatshade_first,
|
||||
}
|
||||
}
|
||||
|
||||
uintptr_t
|
||||
libagx_index_buffer(constant struct agx_ia_state *p, uint id,
|
||||
uint index_size)
|
||||
{
|
||||
return (uintptr_t)&p->index_buffer[id * index_size];
|
||||
}
|
||||
|
||||
uint
|
||||
libagx_setup_xfb_buffer(global struct agx_geometry_params *p, uint i)
|
||||
{
|
||||
@@ -190,19 +197,30 @@ libagx_build_gs_draw(global struct agx_geometry_params *p, bool indexed,
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
libagx_gs_setup_indirect(global struct agx_geometry_params *p,
|
||||
enum mesa_prim mode)
|
||||
uint2
|
||||
process_draw(global uint *draw, enum mesa_prim mode)
|
||||
{
|
||||
/* Regardless of indexing being enabled, this holds */
|
||||
uint vertex_count = p->input_indirect_desc[0];
|
||||
uint instance_count = p->input_indirect_desc[1];
|
||||
uint vertex_count = draw[0];
|
||||
uint instance_count = draw[1];
|
||||
|
||||
uint prim_per_instance = u_decomposed_prims_for_vertices(mode, vertex_count);
|
||||
p->input_primitives = prim_per_instance * instance_count;
|
||||
return (uint2)(prim_per_instance, instance_count);
|
||||
}
|
||||
|
||||
p->gs_grid[0] = prim_per_instance;
|
||||
p->gs_grid[1] = instance_count;
|
||||
void
|
||||
libagx_gs_setup_indirect(global struct agx_geometry_params *p,
|
||||
global struct agx_ia_state *ia, enum mesa_prim mode)
|
||||
{
|
||||
/* Determine the (primitives, instances) grid size. */
|
||||
uint2 draw = process_draw(p->input_indirect_desc, mode);
|
||||
|
||||
/* There are primitives*instances primitives total */
|
||||
p->input_primitives = draw.x * draw.y;
|
||||
|
||||
/* Invoke as (primitives, instances, 1) */
|
||||
p->gs_grid[0] = draw.x;
|
||||
p->gs_grid[1] = draw.y;
|
||||
p->gs_grid[2] = 1;
|
||||
|
||||
/* If indexing is enabled, the third word is the offset into the index buffer
|
||||
@@ -210,8 +228,8 @@ libagx_gs_setup_indirect(global struct agx_geometry_params *p,
|
||||
* indirect draw, the hardware would do this for us, but for software input
|
||||
* assembly we need to do it ourselves.
|
||||
*/
|
||||
if (p->input_index_buffer) {
|
||||
p->input_index_buffer += p->input_indirect_desc[2] * p->index_size_B;
|
||||
if (ia->index_buffer) {
|
||||
ia->index_buffer += p->input_indirect_desc[2] * ia->index_size_B;
|
||||
}
|
||||
|
||||
/* We may need to allocate a GS count buffer, do so now */
|
||||
|
||||
@@ -32,6 +32,14 @@ struct agx_ia_key {
|
||||
bool flatshade_first;
|
||||
};
|
||||
|
||||
struct agx_ia_state {
|
||||
/* Input: index buffer if present. */
|
||||
GLOBAL(uchar) index_buffer;
|
||||
|
||||
/* The index size (1, 2, 4) or 0 if drawing without an index buffer. */
|
||||
uint8_t index_size_B;
|
||||
} PACKED;
|
||||
|
||||
/* Packed geometry state buffer */
|
||||
struct agx_geometry_state {
|
||||
/* Heap to allocate from, in either direction. By convention, the top is used
|
||||
@@ -81,11 +89,6 @@ struct agx_geometry_params {
|
||||
*/
|
||||
uint32_t xfb_prims[MAX_VERTEX_STREAMS];
|
||||
|
||||
/* Address of input index buffer for an indexed draw (this includes
|
||||
* tessellation - it's the index buffer coming into the geometry stage).
|
||||
*/
|
||||
GLOBAL(uchar) input_index_buffer;
|
||||
|
||||
/* Address of input indirect buffer for indirect GS draw */
|
||||
GLOBAL(uint) input_indirect_desc;
|
||||
|
||||
@@ -104,12 +107,6 @@ struct agx_geometry_params {
|
||||
* allocating counts.
|
||||
*/
|
||||
uint32_t count_buffer_stride;
|
||||
|
||||
/* Size of a single input index in bytes, or 0 if indexing is disabled.
|
||||
*
|
||||
* index_size_B == 0 <==> input_index_buffer == NULL
|
||||
*/
|
||||
uint32_t index_size_B;
|
||||
} PACKED;
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1855,6 +1855,9 @@ barrier("fence_mem_to_tex_agx")
|
||||
# act like render target writes, in conjunction with fragment interlock.
|
||||
barrier("fence_pbe_to_tex_pixel_agx")
|
||||
|
||||
# Address of state for AGX input assembly lowering for geometry/tessellation
|
||||
system_value("input_assembly_buffer_agx", 1, bit_sizes=[64])
|
||||
|
||||
# Address of the parameter buffer for AGX geometry shaders
|
||||
system_value("geometry_param_buffer_agx", 1, bit_sizes=[64])
|
||||
|
||||
|
||||
@@ -168,6 +168,8 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr)
|
||||
return load_sysval_root(b, 1, 32, &u->draw_id);
|
||||
case nir_intrinsic_load_layer_id_written_agx:
|
||||
return load_sysval_root(b, 1, 16, &u->layer_id_written);
|
||||
case nir_intrinsic_load_input_assembly_buffer_agx:
|
||||
return load_sysval_root(b, 1, 64, &u->input_assembly);
|
||||
case nir_intrinsic_load_geometry_param_buffer_agx:
|
||||
return load_sysval_root(b, 1, 64, &u->geometry_params);
|
||||
default:
|
||||
|
||||
@@ -1716,13 +1716,16 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
|
||||
|
||||
/* Lower IA before VS sysvals to correctly handle indirect multidraws */
|
||||
agx_nir_lower_ia(vs, &key->ia);
|
||||
|
||||
/* Lower VS sysvals before it's merged in, so we access the correct shader
|
||||
* stage for UBOs etc.
|
||||
*/
|
||||
NIR_PASS_V(vs, agx_nir_lower_sysvals);
|
||||
|
||||
/* Link VS with GS */
|
||||
NIR_PASS_V(nir, agx_nir_lower_gs, vs, dev->libagx, &key->ia,
|
||||
NIR_PASS_V(nir, agx_nir_lower_gs, vs, dev->libagx,
|
||||
key->rasterizer_discard, &gs_count, &gs_copy, &pre_gs,
|
||||
&gs_out_prim, &gs_out_count_words);
|
||||
ralloc_free(vs);
|
||||
@@ -3394,11 +3397,18 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer,
|
||||
const struct pipe_draw_start_count_bias *draw,
|
||||
const struct pipe_draw_indirect_info *indirect)
|
||||
{
|
||||
/* XXX move me */
|
||||
struct agx_ia_state ia = {
|
||||
.index_buffer = input_index_buffer,
|
||||
.index_size_B = info->index_size,
|
||||
};
|
||||
|
||||
batch->uniforms.input_assembly =
|
||||
agx_pool_upload_aligned(&batch->pool, &ia, sizeof(ia), 8);
|
||||
|
||||
struct agx_geometry_params params = {
|
||||
.state = agx_batch_geometry_state(batch),
|
||||
.indirect_desc = batch->geom_indirect,
|
||||
.input_index_buffer = input_index_buffer,
|
||||
.index_size_B = info->index_size,
|
||||
};
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(batch->ctx->streamout.targets); ++i) {
|
||||
|
||||
@@ -99,6 +99,9 @@ struct PACKED agx_draw_uniforms {
|
||||
/* Vertex buffer object bases, if present */
|
||||
uint64_t vbo_base[PIPE_MAX_ATTRIBS];
|
||||
|
||||
/* Address of input assembly buffer if geom/tess is used, else 0 */
|
||||
uint64_t input_assembly;
|
||||
|
||||
/* Address of geometry param buffer if geometry shaders are used, else 0 */
|
||||
uint64_t geometry_params;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user