asahi: separate GS from VS
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27616>
This commit is contained in:
committed by
Marge Bot
parent
af7084efa7
commit
d96fbd4618
@@ -16,7 +16,9 @@
|
||||
#include "libagx_shaders.h"
|
||||
#include "nir.h"
|
||||
#include "nir_builder_opcodes.h"
|
||||
#include "nir_intrinsics.h"
|
||||
#include "nir_xfb_info.h"
|
||||
#include "shader_enums.h"
|
||||
|
||||
enum gs_counter {
|
||||
GS_COUNTER_VERTICES = 0,
|
||||
@@ -192,125 +194,42 @@ load_instance_id(nir_builder *b)
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_gs_inputs(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
lower_gs_inputs(nir_builder *b, nir_intrinsic_instr *intr, void *key)
|
||||
{
|
||||
struct agx_lower_output_to_var_state *vs_state = data;
|
||||
if (intr->intrinsic != nir_intrinsic_load_per_vertex_input)
|
||||
return false;
|
||||
|
||||
/* I suppose we could support indirect GS inputs, but it would be more
|
||||
* complicated and probably pointless (versus the lowering the frontend would
|
||||
* otherwise do). GS lowering is hard enough as it is.
|
||||
*/
|
||||
assert(nir_src_is_const(intr->src[1]) && "no indirect GS inputs");
|
||||
|
||||
b->cursor = nir_instr_remove(&intr->instr);
|
||||
nir_def *vertex = intr->src[0].ssa;
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||
|
||||
nir_variable *var =
|
||||
vs_state->outputs[sem.location + nir_src_as_uint(intr->src[1])];
|
||||
nir_def *location = nir_iadd_imm(b, intr->src[1].ssa, sem.location);
|
||||
|
||||
nir_def *val = nir_load_array_var(b, var, vertex);
|
||||
/* Calculate the vertex ID we're pulling, based on the topology */
|
||||
nir_def *vert_in_prim = intr->src[0].ssa;
|
||||
nir_def *vertex = agx_vertex_id_for_topology(b, vert_in_prim, key);
|
||||
|
||||
/* The unrolled vertex ID uses the input_vertices, which differs from what
|
||||
* our load_num_vertices will return (vertices vs primitives).
|
||||
*/
|
||||
nir_def *unrolled = nir_iadd(
|
||||
b,
|
||||
nir_imul(b, load_instance_id(b), load_geometry_param(b, input_vertices)),
|
||||
vertex);
|
||||
|
||||
/* Calculate the address of the input given the unrolled vertex ID */
|
||||
nir_def *addr = libagx_vertex_output_address(
|
||||
b, nir_load_geometry_param_buffer_agx(b), unrolled, location,
|
||||
load_geometry_param(b, vs_outputs));
|
||||
|
||||
assert(intr->def.bit_size == 32);
|
||||
unsigned start = nir_intrinsic_component(intr);
|
||||
unsigned count = intr->def.num_components;
|
||||
val = nir_channels(b, val, nir_component_mask(count) << start);
|
||||
addr = nir_iadd_imm(b, addr, nir_intrinsic_component(intr) * 4);
|
||||
|
||||
nir_def *val = nir_load_global_constant(b, addr, 4, intr->def.num_components,
|
||||
intr->def.bit_size);
|
||||
nir_def_rewrite_uses(&intr->def, val);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_id_in_prim(nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
if (intr->intrinsic != nir_intrinsic_load_vertex_id_in_primitive_agx)
|
||||
return false;
|
||||
|
||||
/* The ID in the primitive is passed as a function parameter */
|
||||
b->cursor = nir_instr_remove(instr);
|
||||
nir_def *id = nir_load_param(b, 0);
|
||||
nir_def_rewrite_uses(&intr->def, nir_u2uN(b, id, intr->def.bit_size));
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
agx_nir_link_vs_gs(nir_shader *vs, nir_shader *gs)
|
||||
{
|
||||
struct agx_lower_output_to_var_state state = {.arrayed = true};
|
||||
|
||||
/* Vertex shader outputs will be placed in arrays. Create those arrays. */
|
||||
u_foreach_bit64(slot, vs->info.outputs_written) {
|
||||
state.outputs[slot] = nir_variable_create(
|
||||
gs, nir_var_shader_temp,
|
||||
glsl_array_type(glsl_uvec4_type(), gs->info.gs.vertices_in, 0),
|
||||
gl_varying_slot_name_for_stage(slot, MESA_SHADER_VERTEX));
|
||||
}
|
||||
|
||||
/* Rewrite geometry shader inputs to read from those arrays */
|
||||
NIR_PASS(_, gs, nir_shader_intrinsics_pass, lower_gs_inputs,
|
||||
nir_metadata_block_index | nir_metadata_dominance, &state);
|
||||
|
||||
/* Link the vertex shader with the geometry shader. This assumes that
|
||||
* all functions have been inlined in the vertex shader.
|
||||
*/
|
||||
nir_function_impl *vs_entry = nir_shader_get_entrypoint(vs);
|
||||
nir_function *vs_function = nir_function_create(gs, "vertex");
|
||||
vs_function->impl = nir_function_impl_clone(gs, vs_entry);
|
||||
vs_function->impl->function = vs_function;
|
||||
|
||||
/* The vertex shader needs to be passed its index in the input primitive */
|
||||
vs_function->num_params = 1;
|
||||
vs_function->params = rzalloc_array(gs, nir_parameter, 1);
|
||||
vs_function->params[0] = (nir_parameter){1, 16};
|
||||
|
||||
/* The vertex shader needs to be expressed in terms of that index */
|
||||
nir_function_instructions_pass(
|
||||
vs_function->impl, agx_lower_output_to_var,
|
||||
nir_metadata_block_index | nir_metadata_dominance, &state);
|
||||
|
||||
nir_function_instructions_pass(
|
||||
vs_function->impl, lower_id_in_prim,
|
||||
nir_metadata_block_index | nir_metadata_dominance, NULL);
|
||||
|
||||
/* Run the vertex shader for each vertex in the input primitive */
|
||||
nir_function_impl *gs_entry = nir_shader_get_entrypoint(gs);
|
||||
nir_builder b = nir_builder_at(nir_before_impl(gs_entry));
|
||||
|
||||
for (unsigned i = 0; i < gs->info.gs.vertices_in; ++i) {
|
||||
nir_call(&b, vs_function, nir_imm_intN_t(&b, i, 16));
|
||||
}
|
||||
|
||||
/* Copy texture info. We force bindless on GS for now. */
|
||||
gs->info.num_textures = vs->info.num_textures;
|
||||
gs->info.num_images = vs->info.num_images;
|
||||
BITSET_COPY(gs->info.textures_used, vs->info.textures_used);
|
||||
BITSET_COPY(gs->info.textures_used_by_txf, vs->info.textures_used_by_txf);
|
||||
BITSET_COPY(gs->info.images_used, vs->info.images_used);
|
||||
|
||||
/* Inline the VS into the GS */
|
||||
nir_inline_functions(gs);
|
||||
exec_node_remove(&vs_function->node);
|
||||
nir_lower_global_vars_to_local(gs);
|
||||
|
||||
/* Do some optimization to get rid of indirects */
|
||||
bool progress;
|
||||
|
||||
do {
|
||||
progress = false;
|
||||
NIR_PASS(progress, gs, nir_opt_constant_folding);
|
||||
NIR_PASS(progress, gs, nir_opt_dce);
|
||||
} while (progress);
|
||||
|
||||
/* If any indirects hung around, lower them */
|
||||
nir_lower_indirect_derefs(gs, nir_var_function_temp, UINT32_MAX);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unrolled ID is the index of the primitive in the count buffer, given as
|
||||
* (instance ID * # vertices/instance) + vertex ID
|
||||
@@ -1091,14 +1010,12 @@ link_libagx(nir_shader *nir, const nir_shader *libagx)
|
||||
}
|
||||
|
||||
bool
|
||||
agx_nir_lower_gs(nir_shader *gs, nir_shader *vs, const nir_shader *libagx,
|
||||
agx_nir_lower_gs(nir_shader *gs, const nir_shader *libagx,
|
||||
struct agx_ia_key *ia, bool rasterizer_discard,
|
||||
nir_shader **gs_count, nir_shader **gs_copy,
|
||||
nir_shader **pre_gs, enum mesa_prim *out_mode,
|
||||
unsigned *out_count_words)
|
||||
{
|
||||
link_libagx(vs, libagx);
|
||||
|
||||
/* Collect output component counts so we can size the geometry output buffer
|
||||
* appropriately, instead of assuming everything is vec4.
|
||||
*/
|
||||
@@ -1120,8 +1037,8 @@ agx_nir_lower_gs(nir_shader *gs, nir_shader *vs, const nir_shader *libagx,
|
||||
nir_metadata_block_index | nir_metadata_dominance, nir_imm_int(&b, 0));
|
||||
}
|
||||
|
||||
/* Link VS into the GS */
|
||||
agx_nir_link_vs_gs(vs, gs);
|
||||
NIR_PASS(_, gs, nir_shader_intrinsics_pass, lower_gs_inputs,
|
||||
nir_metadata_block_index | nir_metadata_dominance, ia);
|
||||
|
||||
/* Lower geometry shader writes to contain all of the required counts, so we
|
||||
* know where in the various buffers we should write vertices.
|
||||
@@ -1265,6 +1182,73 @@ agx_nir_lower_gs(nir_shader *gs, nir_shader *vs, const nir_shader *libagx,
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Vertex shaders (tessellation evaluation shaders) before a geometry shader run
|
||||
* as a dedicated compute prepass. They are invoked as (count, instances, 1),
|
||||
* equivalent to a geometry shader inputting POINTS, so the vertex output buffer
|
||||
* is indexed according to calc_unrolled_id.
|
||||
*
|
||||
* This function lowers their vertex shader I/O to compute.
|
||||
*
|
||||
* Vertex ID becomes an index buffer pull (without applying the topology). Store
|
||||
* output becomes a store into the global vertex output buffer.
|
||||
*/
|
||||
static bool
|
||||
lower_vs_before_gs(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
{
|
||||
if (intr->intrinsic != nir_intrinsic_store_output)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_instr_remove(&intr->instr);
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||
nir_def *location = nir_iadd_imm(b, intr->src[1].ssa, sem.location);
|
||||
|
||||
nir_def *addr = libagx_vertex_output_address(
|
||||
b, nir_load_geometry_param_buffer_agx(b), calc_unrolled_id(b), location,
|
||||
nir_imm_int64(b, b->shader->info.outputs_written));
|
||||
|
||||
assert(nir_src_bit_size(intr->src[0]) == 32);
|
||||
addr = nir_iadd_imm(b, addr, nir_intrinsic_component(intr) * 4);
|
||||
|
||||
nir_store_global(b, addr, 4, intr->src[0].ssa,
|
||||
nir_intrinsic_write_mask(intr));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
agx_nir_lower_vs_before_gs(struct nir_shader *vs,
|
||||
const struct nir_shader *libagx,
|
||||
unsigned index_size_B, uint64_t *outputs)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
/* Lower vertex ID to an index buffer pull without a topology applied */
|
||||
progress |= agx_nir_lower_ia(vs, &(struct agx_ia_key){
|
||||
.index_size = index_size_B,
|
||||
.mode = MESA_PRIM_POINTS,
|
||||
});
|
||||
|
||||
/* Lower vertex stores to memory stores */
|
||||
progress |= nir_shader_intrinsics_pass(
|
||||
vs, lower_vs_before_gs, nir_metadata_block_index | nir_metadata_dominance,
|
||||
&index_size_B);
|
||||
|
||||
/* Lower instance ID and num vertices */
|
||||
progress |= nir_shader_intrinsics_pass(
|
||||
vs, lower_id, nir_metadata_block_index | nir_metadata_dominance, NULL);
|
||||
|
||||
/* Link libagx, used in lower_vs_before_gs */
|
||||
if (progress)
|
||||
link_libagx(vs, libagx);
|
||||
|
||||
/* Turn into a compute shader now that we're free of vertexisms */
|
||||
vs->info.stage = MESA_SHADER_COMPUTE;
|
||||
memset(&vs->info.cs, 0, sizeof(vs->info.cs));
|
||||
vs->xfb_info = NULL;
|
||||
*outputs = vs->info.outputs_written;
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
agx_nir_prefix_sum_gs(nir_builder *b, const void *data)
|
||||
{
|
||||
|
||||
@@ -32,11 +32,15 @@ struct nir_def *agx_vertex_id_for_topology(struct nir_builder *b,
|
||||
|
||||
bool agx_nir_lower_ia(struct nir_shader *s, struct agx_ia_key *ia);
|
||||
|
||||
bool agx_nir_lower_gs(struct nir_shader *gs, struct nir_shader *vs,
|
||||
const struct nir_shader *libagx, struct agx_ia_key *ia,
|
||||
bool rasterizer_discard, struct nir_shader **gs_count,
|
||||
struct nir_shader **gs_copy, struct nir_shader **pre_gs,
|
||||
enum mesa_prim *out_mode, unsigned *out_count_words);
|
||||
bool agx_nir_lower_vs_before_gs(struct nir_shader *vs,
|
||||
const struct nir_shader *libagx,
|
||||
unsigned index_size_B, uint64_t *outputs);
|
||||
|
||||
bool agx_nir_lower_gs(struct nir_shader *gs, const struct nir_shader *libagx,
|
||||
struct agx_ia_key *ia, bool rasterizer_discard,
|
||||
struct nir_shader **gs_count, struct nir_shader **gs_copy,
|
||||
struct nir_shader **pre_gs, enum mesa_prim *out_mode,
|
||||
unsigned *out_count_words);
|
||||
|
||||
void agx_nir_prefix_sum_gs(struct nir_builder *b, const void *data);
|
||||
|
||||
|
||||
@@ -353,15 +353,18 @@ libagx_gs_setup_indirect(global struct agx_geometry_params *p,
|
||||
uint vertex_count = in_draw[0];
|
||||
uint instance_count = in_draw[1];
|
||||
|
||||
/* Calculate number of primitives input into the GS */
|
||||
uint prim_per_instance = u_decomposed_prims_for_vertices(mode, vertex_count);
|
||||
uint2 draw = (uint2)(prim_per_instance, instance_count);
|
||||
p->input_primitives = prim_per_instance * instance_count;
|
||||
p->input_vertices = vertex_count;
|
||||
|
||||
/* There are primitives*instances primitives total */
|
||||
p->input_primitives = draw.x * draw.y;
|
||||
/* Invoke VS as (vertices, instances, 1); GS as (primitives, instances, 1) */
|
||||
p->vs_grid[0] = vertex_count;
|
||||
p->vs_grid[1] = instance_count;
|
||||
p->vs_grid[2] = 1;
|
||||
|
||||
/* Invoke as (primitives, instances, 1) */
|
||||
p->gs_grid[0] = draw.x;
|
||||
p->gs_grid[1] = draw.y;
|
||||
p->gs_grid[0] = prim_per_instance;
|
||||
p->gs_grid[1] = instance_count;
|
||||
p->gs_grid[2] = 1;
|
||||
|
||||
/* If indexing is enabled, the third word is the offset into the index buffer
|
||||
@@ -373,10 +376,18 @@ libagx_gs_setup_indirect(global struct agx_geometry_params *p,
|
||||
ia->index_buffer += ((constant uint *)ia->draws)[2] * ia->index_size_B;
|
||||
}
|
||||
|
||||
/* We may need to allocate a GS count buffer, do so now */
|
||||
/* We may need to allocate VS and GS count buffers, do so now */
|
||||
global struct agx_geometry_state *state = p->state;
|
||||
|
||||
uint vertex_buffer_size =
|
||||
libagx_tcs_in_size(vertex_count * instance_count, p->vs_outputs);
|
||||
|
||||
p->count_buffer = (global uint *)(state->heap + state->heap_bottom);
|
||||
state->heap_bottom += align(p->input_primitives * p->count_buffer_stride, 4);
|
||||
state->heap_bottom +=
|
||||
align(p->input_primitives * p->count_buffer_stride, 16);
|
||||
|
||||
p->vertex_buffer = (global uint *)(state->heap + state->heap_bottom);
|
||||
state->heap_bottom += align(vertex_buffer_size, 4);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -422,3 +433,11 @@ libagx_is_provoking_last(global struct agx_ia_state *ia)
|
||||
{
|
||||
return !ia->flatshade_first;
|
||||
}
|
||||
|
||||
uintptr_t
|
||||
libagx_vertex_output_address(constant struct agx_geometry_params *p, uint vtx,
|
||||
gl_varying_slot location, uint64_t vs_outputs)
|
||||
{
|
||||
return (uintptr_t)p->vertex_buffer +
|
||||
libagx_tcs_in_offs(vtx, location, vs_outputs);
|
||||
}
|
||||
|
||||
@@ -209,7 +209,7 @@ AGX_STATIC_ASSERT(sizeof(struct agx_tess_params) == 22 * 4);
|
||||
*
|
||||
* TODO: compact.
|
||||
*/
|
||||
static inline ushort
|
||||
static inline uint
|
||||
libagx_tcs_in_offs(uint vtx, gl_varying_slot location,
|
||||
uint64_t crosslane_vs_out_mask)
|
||||
{
|
||||
|
||||
@@ -1908,11 +1908,20 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
|
||||
struct asahi_vs_shader_key *key = &key_->vs;
|
||||
|
||||
NIR_PASS(_, nir, lower_vbo, key->attribs);
|
||||
NIR_PASS(_, nir, agx_nir_lower_point_size, key->fixed_point_size);
|
||||
|
||||
if (should_lower_clip_m1_1(dev, key->clip_halfz)) {
|
||||
NIR_PASS(_, nir, nir_shader_intrinsics_pass, agx_nir_lower_clip_m1_1,
|
||||
nir_metadata_block_index | nir_metadata_dominance, NULL);
|
||||
if (key->next_stage == ASAHI_VS_FS) {
|
||||
NIR_PASS(_, nir, agx_nir_lower_point_size,
|
||||
key->next.fs.fixed_point_size);
|
||||
|
||||
if (should_lower_clip_m1_1(dev, key->next.fs.clip_halfz)) {
|
||||
NIR_PASS(_, nir, nir_shader_intrinsics_pass,
|
||||
agx_nir_lower_clip_m1_1,
|
||||
nir_metadata_block_index | nir_metadata_dominance, NULL);
|
||||
}
|
||||
} else if (key->next_stage == ASAHI_VS_GS) {
|
||||
NIR_PASS(_, nir, agx_nir_lower_sysvals, false);
|
||||
NIR_PASS(_, nir, agx_nir_lower_vs_before_gs, dev->libagx,
|
||||
key->next.gs.index_size_B, &outputs);
|
||||
}
|
||||
} else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
|
||||
struct asahi_tcs_shader_key *key = &key_->tcs;
|
||||
@@ -1940,28 +1949,11 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
|
||||
NIR_PASS(_, nir, nir_io_add_intrinsic_xfb_info);
|
||||
}
|
||||
|
||||
struct blob_reader vs_reader;
|
||||
blob_reader_init(&vs_reader, linked_so->serialized_nir.data,
|
||||
linked_so->serialized_nir.size);
|
||||
nir_shader *vs = nir_deserialize(NULL, &agx_nir_options, &vs_reader);
|
||||
|
||||
/* Apply the VS key to the VS before linking it in */
|
||||
NIR_PASS(_, vs, lower_vbo, key->attribs);
|
||||
NIR_PASS(_, vs, agx_nir_lower_ia, &key->ia);
|
||||
|
||||
NIR_PASS(_, vs, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
|
||||
NIR_PASS(_, nir, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
|
||||
|
||||
/* Lower VS sysvals before it's merged in, so we access the correct shader
|
||||
* stage for UBOs etc. Skip draw parameters, those are lowered later.
|
||||
*/
|
||||
NIR_PASS(_, vs, agx_nir_lower_sysvals, false);
|
||||
|
||||
/* Link VS with GS */
|
||||
NIR_PASS(_, nir, agx_nir_lower_gs, vs, dev->libagx, &key->ia,
|
||||
NIR_PASS(_, nir, agx_nir_lower_gs, dev->libagx, &key->ia,
|
||||
key->rasterizer_discard, &gs_count, &gs_copy, &pre_gs,
|
||||
&gs_out_prim, &gs_out_count_words);
|
||||
ralloc_free(vs);
|
||||
} else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
struct asahi_fs_shader_key *key = &key_->fs;
|
||||
|
||||
@@ -2065,8 +2057,14 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
|
||||
base_key.fs.nr_samples = key_->fs.nr_samples;
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
base_key.vs.outputs_flat_shaded = key_->vs.outputs_flat_shaded;
|
||||
base_key.vs.outputs_linear_shaded = key_->vs.outputs_linear_shaded;
|
||||
struct asahi_vs_shader_key *key = &key_->vs;
|
||||
|
||||
if (key->next_stage == ASAHI_VS_FS) {
|
||||
base_key.vs.outputs_flat_shaded = key_->vs.next.fs.outputs_flat_shaded;
|
||||
|
||||
base_key.vs.outputs_linear_shaded =
|
||||
key_->vs.next.fs.outputs_linear_shaded;
|
||||
}
|
||||
}
|
||||
|
||||
struct agx_compiled_shader *compiled =
|
||||
@@ -2423,7 +2421,7 @@ rast_prim(enum mesa_prim mode, unsigned fill_mode)
|
||||
}
|
||||
|
||||
static bool
|
||||
agx_update_vs(struct agx_context *ctx)
|
||||
agx_update_vs(struct agx_context *ctx, unsigned index_size_B)
|
||||
{
|
||||
/* Only proceed if the shader or anything the key depends on changes
|
||||
*
|
||||
@@ -2431,27 +2429,38 @@ agx_update_vs(struct agx_context *ctx)
|
||||
* clip_halfz: RS
|
||||
* outputs_{flat,linear}_shaded: FS_PROG
|
||||
*/
|
||||
if (!(ctx->dirty & (AGX_DIRTY_VS_PROG | AGX_DIRTY_VERTEX | AGX_DIRTY_XFB |
|
||||
AGX_DIRTY_FS_PROG | AGX_DIRTY_RS | AGX_DIRTY_PRIM)))
|
||||
if (!((ctx->dirty & (AGX_DIRTY_VS_PROG | AGX_DIRTY_VERTEX | AGX_DIRTY_XFB |
|
||||
AGX_DIRTY_FS_PROG | AGX_DIRTY_RS | AGX_DIRTY_PRIM)) ||
|
||||
ctx->stage[PIPE_SHADER_TESS_EVAL].shader ||
|
||||
ctx->stage[PIPE_SHADER_GEOMETRY].shader || ctx->in_tess))
|
||||
return false;
|
||||
|
||||
enum mesa_prim rasterized_prim =
|
||||
rast_prim(ctx->batch->reduced_prim, ctx->rast->base.fill_front);
|
||||
|
||||
struct asahi_vs_shader_key key = {
|
||||
.clip_halfz = ctx->rast->base.clip_halfz,
|
||||
.next_stage = ctx->stage[PIPE_SHADER_TESS_EVAL].shader && !ctx->in_tess
|
||||
? ASAHI_VS_TCS
|
||||
: ctx->stage[PIPE_SHADER_GEOMETRY].shader ? ASAHI_VS_GS
|
||||
: ASAHI_VS_FS,
|
||||
};
|
||||
|
||||
if (key.next_stage == ASAHI_VS_FS) {
|
||||
key.next.fs.clip_halfz = ctx->rast->base.clip_halfz;
|
||||
|
||||
/* If we are not rasterizing points, don't set fixed_point_size to
|
||||
* eliminate the useless point size write.
|
||||
*/
|
||||
.fixed_point_size = !ctx->rast->base.point_size_per_vertex &&
|
||||
rasterized_prim == MESA_PRIM_POINTS,
|
||||
key.next.fs.fixed_point_size = !ctx->rast->base.point_size_per_vertex &&
|
||||
rasterized_prim == MESA_PRIM_POINTS;
|
||||
|
||||
.outputs_flat_shaded =
|
||||
ctx->stage[PIPE_SHADER_FRAGMENT].shader->info.inputs_flat_shaded,
|
||||
.outputs_linear_shaded =
|
||||
ctx->stage[PIPE_SHADER_FRAGMENT].shader->info.inputs_linear_shaded,
|
||||
};
|
||||
key.next.fs.outputs_flat_shaded =
|
||||
ctx->stage[PIPE_SHADER_FRAGMENT].shader->info.inputs_flat_shaded;
|
||||
key.next.fs.outputs_linear_shaded =
|
||||
ctx->stage[PIPE_SHADER_FRAGMENT].shader->info.inputs_linear_shaded;
|
||||
} else if (key.next_stage == ASAHI_VS_GS) {
|
||||
key.next.gs.index_size_B = index_size_B;
|
||||
}
|
||||
|
||||
memcpy(key.attribs, &ctx->attributes->key, sizeof(key.attribs));
|
||||
|
||||
@@ -2519,9 +2528,7 @@ agx_update_gs(struct agx_context *ctx, const struct pipe_draw_info *info,
|
||||
tgt->stride = gs->xfb_strides[i];
|
||||
}
|
||||
|
||||
/* XXX: Deduplicate this code from regular vertex */
|
||||
struct asahi_gs_shader_key key = {
|
||||
.ia.index_size = info->index_size,
|
||||
.ia.mode = info->mode,
|
||||
.ia.flatshade_first =
|
||||
ia_needs_provoking(info->mode) && ctx->rast->base.flatshade_first,
|
||||
@@ -2529,15 +2536,6 @@ agx_update_gs(struct agx_context *ctx, const struct pipe_draw_info *info,
|
||||
.rasterizer_discard = ctx->rast->base.rasterizer_discard,
|
||||
};
|
||||
|
||||
memcpy(key.attribs, &ctx->attributes->key, sizeof(key.attribs));
|
||||
|
||||
static_assert(sizeof(key.input_nir_sha1) ==
|
||||
sizeof(ctx->stage[PIPE_SHADER_VERTEX].shader->nir_sha1),
|
||||
"common size for shader sha-1");
|
||||
|
||||
memcpy(key.input_nir_sha1, ctx->stage[PIPE_SHADER_VERTEX].shader->nir_sha1,
|
||||
sizeof(key.input_nir_sha1));
|
||||
|
||||
return agx_update_shader(ctx, &ctx->gs, PIPE_SHADER_GEOMETRY,
|
||||
(union asahi_shader_key *)&key);
|
||||
}
|
||||
@@ -4131,10 +4129,11 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer,
|
||||
}
|
||||
}
|
||||
|
||||
/* Calculate input primitive count for direct draws, and allocate the count
|
||||
* buffer. GPU calculates and allocates for indirect draws.
|
||||
/* Calculate input primitive count for direct draws, and allocate the vertex
|
||||
* & count buffers. GPU calculates and allocates for indirect draws.
|
||||
*/
|
||||
unsigned count_buffer_stride = batch->ctx->gs->gs_count_words * 4;
|
||||
params.vs_outputs = batch->ctx->vs->info.outputs;
|
||||
|
||||
if (indirect) {
|
||||
params.count_buffer_stride = count_buffer_stride;
|
||||
@@ -4142,13 +4141,21 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer,
|
||||
unsigned prim_per_instance =
|
||||
u_decomposed_prims_for_vertices(info->mode, draw->count);
|
||||
params.input_primitives = prim_per_instance * info->instance_count;
|
||||
params.input_vertices = draw->count;
|
||||
|
||||
unsigned vb_size = libagx_tcs_in_size(draw->count * info->instance_count,
|
||||
params.vs_outputs);
|
||||
unsigned size = params.input_primitives * count_buffer_stride;
|
||||
|
||||
if (size) {
|
||||
params.count_buffer =
|
||||
agx_pool_alloc_aligned(&batch->pool, size, 4).gpu;
|
||||
}
|
||||
|
||||
if (vb_size) {
|
||||
params.vertex_buffer =
|
||||
agx_pool_alloc_aligned(&batch->pool, vb_size, 4).gpu;
|
||||
}
|
||||
}
|
||||
|
||||
return agx_pool_upload_aligned_with_bo(&batch->pool, ¶ms, sizeof(params),
|
||||
@@ -4178,9 +4185,11 @@ agx_launch_gs(struct agx_batch *batch, const struct pipe_draw_info *info,
|
||||
|
||||
assert(!info->primitive_restart && "should have been lowered");
|
||||
|
||||
struct pipe_grid_info grid = {.block = {1, 1, 1}};
|
||||
struct pipe_grid_info grid_vs = {.block = {1, 1, 1}};
|
||||
struct pipe_grid_info grid_gs = {.block = {1, 1, 1}};
|
||||
struct agx_resource grid_indirect_rsrc = {.bo = batch->geom_params_bo};
|
||||
|
||||
/* Setup grids */
|
||||
if (indirect) {
|
||||
assert(indirect->buffer && "drawauto already handled");
|
||||
|
||||
@@ -4200,23 +4209,35 @@ agx_launch_gs(struct agx_batch *batch, const struct pipe_draw_info *info,
|
||||
|
||||
/* Wrap the pool allocation in a fake resource for meta-Gallium use */
|
||||
assert(batch->geom_params_bo != NULL);
|
||||
grid.indirect = &grid_indirect_rsrc.base;
|
||||
grid.indirect_offset =
|
||||
(batch->uniforms.geometry_params - grid_indirect_rsrc.bo->ptr.gpu) +
|
||||
offsetof(struct agx_geometry_params, gs_grid);
|
||||
} else {
|
||||
unsigned prim_per_instance =
|
||||
u_decomposed_prims_for_vertices(info->mode, draws->count);
|
||||
grid_vs.indirect = &grid_indirect_rsrc.base;
|
||||
grid_gs.indirect = &grid_indirect_rsrc.base;
|
||||
|
||||
grid.grid[0] = prim_per_instance;
|
||||
grid.grid[1] = info->instance_count;
|
||||
grid.grid[2] = 1;
|
||||
unsigned param_offs =
|
||||
(batch->uniforms.geometry_params - grid_indirect_rsrc.bo->ptr.gpu);
|
||||
|
||||
grid_vs.indirect_offset =
|
||||
param_offs + offsetof(struct agx_geometry_params, vs_grid);
|
||||
|
||||
grid_gs.indirect_offset =
|
||||
param_offs + offsetof(struct agx_geometry_params, gs_grid);
|
||||
} else {
|
||||
grid_vs.grid[0] = draws->count;
|
||||
grid_vs.grid[1] = info->instance_count;
|
||||
grid_vs.grid[2] = 1;
|
||||
|
||||
grid_gs.grid[0] =
|
||||
u_decomposed_prims_for_vertices(info->mode, draws->count);
|
||||
grid_gs.grid[1] = info->instance_count;
|
||||
grid_gs.grid[2] = 1;
|
||||
}
|
||||
|
||||
/* Launch the vertex shader first */
|
||||
agx_launch(batch, &grid_vs, ctx->vs, ctx->vs->stage);
|
||||
|
||||
/* If there is a count shader, launch it and prefix sum the results. */
|
||||
if (gs->gs_count) {
|
||||
perf_debug(dev, "Geometry shader count");
|
||||
agx_launch(batch, &grid, gs->gs_count, PIPE_SHADER_GEOMETRY);
|
||||
agx_launch(batch, &grid_gs, gs->gs_count, PIPE_SHADER_GEOMETRY);
|
||||
|
||||
unsigned words = gs->gs_count_words;
|
||||
agx_launch(batch,
|
||||
@@ -4238,7 +4259,7 @@ agx_launch_gs(struct agx_batch *batch, const struct pipe_draw_info *info,
|
||||
gs->pre_gs, PIPE_SHADER_COMPUTE);
|
||||
|
||||
/* Launch the actual geometry shader */
|
||||
agx_launch(batch, &grid, gs, PIPE_SHADER_GEOMETRY);
|
||||
agx_launch(batch, &grid_gs, gs, PIPE_SHADER_GEOMETRY);
|
||||
|
||||
/* If we're not rasterizing, the pipeline ends here */
|
||||
if (ctx->rast->base.rasterizer_discard)
|
||||
@@ -4691,7 +4712,7 @@ agx_draw_patches(struct agx_context *ctx, const struct pipe_draw_info *info,
|
||||
|
||||
/* Run VS+TCS as compute */
|
||||
agx_upload_vbos(batch);
|
||||
agx_update_vs(ctx);
|
||||
agx_update_vs(ctx, info->index_size);
|
||||
agx_update_tcs(ctx, info);
|
||||
/* XXX */
|
||||
ctx->stage[PIPE_SHADER_TESS_CTRL].dirty = ~0;
|
||||
@@ -4954,7 +4975,7 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
|
||||
batch->reduced_prim = reduced_prim;
|
||||
|
||||
/* Update shaders first so we can use them after */
|
||||
if (agx_update_vs(ctx)) {
|
||||
if (agx_update_vs(ctx, idx_size)) {
|
||||
ctx->dirty |= AGX_DIRTY_VS | AGX_DIRTY_VS_PROG;
|
||||
ctx->stage[PIPE_SHADER_VERTEX].dirty = ~0;
|
||||
|
||||
|
||||
@@ -436,12 +436,28 @@ struct agx_velem_key {
|
||||
uint8_t pad;
|
||||
};
|
||||
|
||||
enum asahi_vs_next_stage {
|
||||
ASAHI_VS_FS,
|
||||
ASAHI_VS_GS,
|
||||
ASAHI_VS_TCS,
|
||||
};
|
||||
|
||||
struct asahi_vs_shader_key {
|
||||
struct agx_velem_key attribs[AGX_MAX_VBUFS];
|
||||
bool clip_halfz;
|
||||
bool fixed_point_size;
|
||||
uint64_t outputs_flat_shaded;
|
||||
uint64_t outputs_linear_shaded;
|
||||
enum asahi_vs_next_stage next_stage;
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint8_t index_size_B;
|
||||
} gs;
|
||||
|
||||
struct {
|
||||
bool clip_halfz;
|
||||
bool fixed_point_size;
|
||||
uint64_t outputs_flat_shaded;
|
||||
uint64_t outputs_linear_shaded;
|
||||
} fs;
|
||||
} next;
|
||||
};
|
||||
|
||||
struct agx_vertex_elements {
|
||||
@@ -483,20 +499,10 @@ struct asahi_tcs_shader_key {
|
||||
};
|
||||
|
||||
struct asahi_gs_shader_key {
|
||||
/* Input assembly key */
|
||||
struct agx_ia_key ia;
|
||||
|
||||
/* Vertex shader key */
|
||||
struct agx_velem_key attribs[AGX_MAX_VBUFS];
|
||||
|
||||
/* If true, this GS is run only for its side effects (including XFB) */
|
||||
bool rasterizer_discard;
|
||||
|
||||
/* Geometry shaders must be linked with a vertex shader. In a monolithic
|
||||
* pipeline, this is the vertex shader (or tessellation evaluation shader).
|
||||
* With separate shaders, this needs to be an internal passthrough program.
|
||||
*/
|
||||
uint8_t input_nir_sha1[20];
|
||||
};
|
||||
|
||||
union asahi_shader_key {
|
||||
|
||||
Reference in New Issue
Block a user