asahi: separate GS from VS

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27616>
This commit is contained in:
Alyssa Rosenzweig
2024-01-29 16:40:52 -04:00
committed by Marge Bot
parent af7084efa7
commit d96fbd4618
6 changed files with 234 additions and 200 deletions
+93 -109
View File
@@ -16,7 +16,9 @@
#include "libagx_shaders.h"
#include "nir.h"
#include "nir_builder_opcodes.h"
#include "nir_intrinsics.h"
#include "nir_xfb_info.h"
#include "shader_enums.h"
enum gs_counter {
GS_COUNTER_VERTICES = 0,
@@ -192,125 +194,42 @@ load_instance_id(nir_builder *b)
}
static bool
lower_gs_inputs(nir_builder *b, nir_intrinsic_instr *intr, void *data)
lower_gs_inputs(nir_builder *b, nir_intrinsic_instr *intr, void *key)
{
struct agx_lower_output_to_var_state *vs_state = data;
if (intr->intrinsic != nir_intrinsic_load_per_vertex_input)
return false;
/* I suppose we could support indirect GS inputs, but it would be more
* complicated and probably pointless (versus the lowering the frontend would
* otherwise do). GS lowering is hard enough as it is.
*/
assert(nir_src_is_const(intr->src[1]) && "no indirect GS inputs");
b->cursor = nir_instr_remove(&intr->instr);
nir_def *vertex = intr->src[0].ssa;
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
nir_variable *var =
vs_state->outputs[sem.location + nir_src_as_uint(intr->src[1])];
nir_def *location = nir_iadd_imm(b, intr->src[1].ssa, sem.location);
nir_def *val = nir_load_array_var(b, var, vertex);
/* Calculate the vertex ID we're pulling, based on the topology */
nir_def *vert_in_prim = intr->src[0].ssa;
nir_def *vertex = agx_vertex_id_for_topology(b, vert_in_prim, key);
/* The unrolled vertex ID uses the input_vertices, which differs from what
* our load_num_vertices will return (vertices vs primitives).
*/
nir_def *unrolled = nir_iadd(
b,
nir_imul(b, load_instance_id(b), load_geometry_param(b, input_vertices)),
vertex);
/* Calculate the address of the input given the unrolled vertex ID */
nir_def *addr = libagx_vertex_output_address(
b, nir_load_geometry_param_buffer_agx(b), unrolled, location,
load_geometry_param(b, vs_outputs));
assert(intr->def.bit_size == 32);
unsigned start = nir_intrinsic_component(intr);
unsigned count = intr->def.num_components;
val = nir_channels(b, val, nir_component_mask(count) << start);
addr = nir_iadd_imm(b, addr, nir_intrinsic_component(intr) * 4);
nir_def *val = nir_load_global_constant(b, addr, 4, intr->def.num_components,
intr->def.bit_size);
nir_def_rewrite_uses(&intr->def, val);
return true;
}
static bool
lower_id_in_prim(nir_builder *b, nir_instr *instr, void *data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_load_vertex_id_in_primitive_agx)
return false;
/* The ID in the primitive is passed as a function parameter */
b->cursor = nir_instr_remove(instr);
nir_def *id = nir_load_param(b, 0);
nir_def_rewrite_uses(&intr->def, nir_u2uN(b, id, intr->def.bit_size));
return true;
}
static void
agx_nir_link_vs_gs(nir_shader *vs, nir_shader *gs)
{
struct agx_lower_output_to_var_state state = {.arrayed = true};
/* Vertex shader outputs will be placed in arrays. Create those arrays. */
u_foreach_bit64(slot, vs->info.outputs_written) {
state.outputs[slot] = nir_variable_create(
gs, nir_var_shader_temp,
glsl_array_type(glsl_uvec4_type(), gs->info.gs.vertices_in, 0),
gl_varying_slot_name_for_stage(slot, MESA_SHADER_VERTEX));
}
/* Rewrite geometry shader inputs to read from those arrays */
NIR_PASS(_, gs, nir_shader_intrinsics_pass, lower_gs_inputs,
nir_metadata_block_index | nir_metadata_dominance, &state);
/* Link the vertex shader with the geometry shader. This assumes that
* all functions have been inlined in the vertex shader.
*/
nir_function_impl *vs_entry = nir_shader_get_entrypoint(vs);
nir_function *vs_function = nir_function_create(gs, "vertex");
vs_function->impl = nir_function_impl_clone(gs, vs_entry);
vs_function->impl->function = vs_function;
/* The vertex shader needs to be passed its index in the input primitive */
vs_function->num_params = 1;
vs_function->params = rzalloc_array(gs, nir_parameter, 1);
vs_function->params[0] = (nir_parameter){1, 16};
/* The vertex shader needs to be expressed in terms of that index */
nir_function_instructions_pass(
vs_function->impl, agx_lower_output_to_var,
nir_metadata_block_index | nir_metadata_dominance, &state);
nir_function_instructions_pass(
vs_function->impl, lower_id_in_prim,
nir_metadata_block_index | nir_metadata_dominance, NULL);
/* Run the vertex shader for each vertex in the input primitive */
nir_function_impl *gs_entry = nir_shader_get_entrypoint(gs);
nir_builder b = nir_builder_at(nir_before_impl(gs_entry));
for (unsigned i = 0; i < gs->info.gs.vertices_in; ++i) {
nir_call(&b, vs_function, nir_imm_intN_t(&b, i, 16));
}
/* Copy texture info. We force bindless on GS for now. */
gs->info.num_textures = vs->info.num_textures;
gs->info.num_images = vs->info.num_images;
BITSET_COPY(gs->info.textures_used, vs->info.textures_used);
BITSET_COPY(gs->info.textures_used_by_txf, vs->info.textures_used_by_txf);
BITSET_COPY(gs->info.images_used, vs->info.images_used);
/* Inline the VS into the GS */
nir_inline_functions(gs);
exec_node_remove(&vs_function->node);
nir_lower_global_vars_to_local(gs);
/* Do some optimization to get rid of indirects */
bool progress;
do {
progress = false;
NIR_PASS(progress, gs, nir_opt_constant_folding);
NIR_PASS(progress, gs, nir_opt_dce);
} while (progress);
/* If any indirects hung around, lower them */
nir_lower_indirect_derefs(gs, nir_var_function_temp, UINT32_MAX);
}
/*
* Unrolled ID is the index of the primitive in the count buffer, given as
* (instance ID * # vertices/instance) + vertex ID
@@ -1091,14 +1010,12 @@ link_libagx(nir_shader *nir, const nir_shader *libagx)
}
bool
agx_nir_lower_gs(nir_shader *gs, nir_shader *vs, const nir_shader *libagx,
agx_nir_lower_gs(nir_shader *gs, const nir_shader *libagx,
struct agx_ia_key *ia, bool rasterizer_discard,
nir_shader **gs_count, nir_shader **gs_copy,
nir_shader **pre_gs, enum mesa_prim *out_mode,
unsigned *out_count_words)
{
link_libagx(vs, libagx);
/* Collect output component counts so we can size the geometry output buffer
* appropriately, instead of assuming everything is vec4.
*/
@@ -1120,8 +1037,8 @@ agx_nir_lower_gs(nir_shader *gs, nir_shader *vs, const nir_shader *libagx,
nir_metadata_block_index | nir_metadata_dominance, nir_imm_int(&b, 0));
}
/* Link VS into the GS */
agx_nir_link_vs_gs(vs, gs);
NIR_PASS(_, gs, nir_shader_intrinsics_pass, lower_gs_inputs,
nir_metadata_block_index | nir_metadata_dominance, ia);
/* Lower geometry shader writes to contain all of the required counts, so we
* know where in the various buffers we should write vertices.
@@ -1265,6 +1182,73 @@ agx_nir_lower_gs(nir_shader *gs, nir_shader *vs, const nir_shader *libagx,
return true;
}
/*
* Vertex shaders (tessellation evaluation shaders) before a geometry shader run
* as a dedicated compute prepass. They are invoked as (count, instances, 1),
* equivalent to a geometry shader inputting POINTS, so the vertex output buffer
* is indexed according to calc_unrolled_id.
*
* This function lowers their vertex shader I/O to compute.
*
* Vertex ID becomes an index buffer pull (without applying the topology). Store
* output becomes a store into the global vertex output buffer.
*/
static bool
lower_vs_before_gs(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
if (intr->intrinsic != nir_intrinsic_store_output)
return false;
b->cursor = nir_instr_remove(&intr->instr);
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
nir_def *location = nir_iadd_imm(b, intr->src[1].ssa, sem.location);
nir_def *addr = libagx_vertex_output_address(
b, nir_load_geometry_param_buffer_agx(b), calc_unrolled_id(b), location,
nir_imm_int64(b, b->shader->info.outputs_written));
assert(nir_src_bit_size(intr->src[0]) == 32);
addr = nir_iadd_imm(b, addr, nir_intrinsic_component(intr) * 4);
nir_store_global(b, addr, 4, intr->src[0].ssa,
nir_intrinsic_write_mask(intr));
return true;
}
bool
agx_nir_lower_vs_before_gs(struct nir_shader *vs,
const struct nir_shader *libagx,
unsigned index_size_B, uint64_t *outputs)
{
bool progress = false;
/* Lower vertex ID to an index buffer pull without a topology applied */
progress |= agx_nir_lower_ia(vs, &(struct agx_ia_key){
.index_size = index_size_B,
.mode = MESA_PRIM_POINTS,
});
/* Lower vertex stores to memory stores */
progress |= nir_shader_intrinsics_pass(
vs, lower_vs_before_gs, nir_metadata_block_index | nir_metadata_dominance,
&index_size_B);
/* Lower instance ID and num vertices */
progress |= nir_shader_intrinsics_pass(
vs, lower_id, nir_metadata_block_index | nir_metadata_dominance, NULL);
/* Link libagx, used in lower_vs_before_gs */
if (progress)
link_libagx(vs, libagx);
/* Turn into a compute shader now that we're free of vertexisms */
vs->info.stage = MESA_SHADER_COMPUTE;
memset(&vs->info.cs, 0, sizeof(vs->info.cs));
vs->xfb_info = NULL;
*outputs = vs->info.outputs_written;
return true;
}
void
agx_nir_prefix_sum_gs(nir_builder *b, const void *data)
{
+9 -5
View File
@@ -32,11 +32,15 @@ struct nir_def *agx_vertex_id_for_topology(struct nir_builder *b,
bool agx_nir_lower_ia(struct nir_shader *s, struct agx_ia_key *ia);
bool agx_nir_lower_gs(struct nir_shader *gs, struct nir_shader *vs,
const struct nir_shader *libagx, struct agx_ia_key *ia,
bool rasterizer_discard, struct nir_shader **gs_count,
struct nir_shader **gs_copy, struct nir_shader **pre_gs,
enum mesa_prim *out_mode, unsigned *out_count_words);
bool agx_nir_lower_vs_before_gs(struct nir_shader *vs,
const struct nir_shader *libagx,
unsigned index_size_B, uint64_t *outputs);
bool agx_nir_lower_gs(struct nir_shader *gs, const struct nir_shader *libagx,
struct agx_ia_key *ia, bool rasterizer_discard,
struct nir_shader **gs_count, struct nir_shader **gs_copy,
struct nir_shader **pre_gs, enum mesa_prim *out_mode,
unsigned *out_count_words);
void agx_nir_prefix_sum_gs(struct nir_builder *b, const void *data);
+27 -8
View File
@@ -353,15 +353,18 @@ libagx_gs_setup_indirect(global struct agx_geometry_params *p,
uint vertex_count = in_draw[0];
uint instance_count = in_draw[1];
/* Calculate number of primitives input into the GS */
uint prim_per_instance = u_decomposed_prims_for_vertices(mode, vertex_count);
uint2 draw = (uint2)(prim_per_instance, instance_count);
p->input_primitives = prim_per_instance * instance_count;
p->input_vertices = vertex_count;
/* There are primitives*instances primitives total */
p->input_primitives = draw.x * draw.y;
/* Invoke VS as (vertices, instances, 1); GS as (primitives, instances, 1) */
p->vs_grid[0] = vertex_count;
p->vs_grid[1] = instance_count;
p->vs_grid[2] = 1;
/* Invoke as (primitives, instances, 1) */
p->gs_grid[0] = draw.x;
p->gs_grid[1] = draw.y;
p->gs_grid[0] = prim_per_instance;
p->gs_grid[1] = instance_count;
p->gs_grid[2] = 1;
/* If indexing is enabled, the third word is the offset into the index buffer
@@ -373,10 +376,18 @@ libagx_gs_setup_indirect(global struct agx_geometry_params *p,
ia->index_buffer += ((constant uint *)ia->draws)[2] * ia->index_size_B;
}
/* We may need to allocate a GS count buffer, do so now */
/* We may need to allocate VS and GS count buffers, do so now */
global struct agx_geometry_state *state = p->state;
uint vertex_buffer_size =
libagx_tcs_in_size(vertex_count * instance_count, p->vs_outputs);
p->count_buffer = (global uint *)(state->heap + state->heap_bottom);
state->heap_bottom += align(p->input_primitives * p->count_buffer_stride, 4);
state->heap_bottom +=
align(p->input_primitives * p->count_buffer_stride, 16);
p->vertex_buffer = (global uint *)(state->heap + state->heap_bottom);
state->heap_bottom += align(vertex_buffer_size, 4);
}
void
@@ -422,3 +433,11 @@ libagx_is_provoking_last(global struct agx_ia_state *ia)
{
return !ia->flatshade_first;
}
uintptr_t
libagx_vertex_output_address(constant struct agx_geometry_params *p, uint vtx,
gl_varying_slot location, uint64_t vs_outputs)
{
return (uintptr_t)p->vertex_buffer +
libagx_tcs_in_offs(vtx, location, vs_outputs);
}
+1 -1
View File
@@ -209,7 +209,7 @@ AGX_STATIC_ASSERT(sizeof(struct agx_tess_params) == 22 * 4);
*
* TODO: compact.
*/
static inline ushort
static inline uint
libagx_tcs_in_offs(uint vtx, gl_varying_slot location,
uint64_t crosslane_vs_out_mask)
{
+84 -63
View File
@@ -1908,11 +1908,20 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
struct asahi_vs_shader_key *key = &key_->vs;
NIR_PASS(_, nir, lower_vbo, key->attribs);
NIR_PASS(_, nir, agx_nir_lower_point_size, key->fixed_point_size);
if (should_lower_clip_m1_1(dev, key->clip_halfz)) {
NIR_PASS(_, nir, nir_shader_intrinsics_pass, agx_nir_lower_clip_m1_1,
nir_metadata_block_index | nir_metadata_dominance, NULL);
if (key->next_stage == ASAHI_VS_FS) {
NIR_PASS(_, nir, agx_nir_lower_point_size,
key->next.fs.fixed_point_size);
if (should_lower_clip_m1_1(dev, key->next.fs.clip_halfz)) {
NIR_PASS(_, nir, nir_shader_intrinsics_pass,
agx_nir_lower_clip_m1_1,
nir_metadata_block_index | nir_metadata_dominance, NULL);
}
} else if (key->next_stage == ASAHI_VS_GS) {
NIR_PASS(_, nir, agx_nir_lower_sysvals, false);
NIR_PASS(_, nir, agx_nir_lower_vs_before_gs, dev->libagx,
key->next.gs.index_size_B, &outputs);
}
} else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
struct asahi_tcs_shader_key *key = &key_->tcs;
@@ -1940,28 +1949,11 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
NIR_PASS(_, nir, nir_io_add_intrinsic_xfb_info);
}
struct blob_reader vs_reader;
blob_reader_init(&vs_reader, linked_so->serialized_nir.data,
linked_so->serialized_nir.size);
nir_shader *vs = nir_deserialize(NULL, &agx_nir_options, &vs_reader);
/* Apply the VS key to the VS before linking it in */
NIR_PASS(_, vs, lower_vbo, key->attribs);
NIR_PASS(_, vs, agx_nir_lower_ia, &key->ia);
NIR_PASS(_, vs, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
NIR_PASS(_, nir, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
/* Lower VS sysvals before it's merged in, so we access the correct shader
* stage for UBOs etc. Skip draw parameters, those are lowered later.
*/
NIR_PASS(_, vs, agx_nir_lower_sysvals, false);
/* Link VS with GS */
NIR_PASS(_, nir, agx_nir_lower_gs, vs, dev->libagx, &key->ia,
NIR_PASS(_, nir, agx_nir_lower_gs, dev->libagx, &key->ia,
key->rasterizer_discard, &gs_count, &gs_copy, &pre_gs,
&gs_out_prim, &gs_out_count_words);
ralloc_free(vs);
} else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
struct asahi_fs_shader_key *key = &key_->fs;
@@ -2065,8 +2057,14 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
base_key.fs.nr_samples = key_->fs.nr_samples;
if (nir->info.stage == MESA_SHADER_VERTEX) {
base_key.vs.outputs_flat_shaded = key_->vs.outputs_flat_shaded;
base_key.vs.outputs_linear_shaded = key_->vs.outputs_linear_shaded;
struct asahi_vs_shader_key *key = &key_->vs;
if (key->next_stage == ASAHI_VS_FS) {
base_key.vs.outputs_flat_shaded = key_->vs.next.fs.outputs_flat_shaded;
base_key.vs.outputs_linear_shaded =
key_->vs.next.fs.outputs_linear_shaded;
}
}
struct agx_compiled_shader *compiled =
@@ -2423,7 +2421,7 @@ rast_prim(enum mesa_prim mode, unsigned fill_mode)
}
static bool
agx_update_vs(struct agx_context *ctx)
agx_update_vs(struct agx_context *ctx, unsigned index_size_B)
{
/* Only proceed if the shader or anything the key depends on changes
*
@@ -2431,27 +2429,38 @@ agx_update_vs(struct agx_context *ctx)
* clip_halfz: RS
* outputs_{flat,linear}_shaded: FS_PROG
*/
if (!(ctx->dirty & (AGX_DIRTY_VS_PROG | AGX_DIRTY_VERTEX | AGX_DIRTY_XFB |
AGX_DIRTY_FS_PROG | AGX_DIRTY_RS | AGX_DIRTY_PRIM)))
if (!((ctx->dirty & (AGX_DIRTY_VS_PROG | AGX_DIRTY_VERTEX | AGX_DIRTY_XFB |
AGX_DIRTY_FS_PROG | AGX_DIRTY_RS | AGX_DIRTY_PRIM)) ||
ctx->stage[PIPE_SHADER_TESS_EVAL].shader ||
ctx->stage[PIPE_SHADER_GEOMETRY].shader || ctx->in_tess))
return false;
enum mesa_prim rasterized_prim =
rast_prim(ctx->batch->reduced_prim, ctx->rast->base.fill_front);
struct asahi_vs_shader_key key = {
.clip_halfz = ctx->rast->base.clip_halfz,
.next_stage = ctx->stage[PIPE_SHADER_TESS_EVAL].shader && !ctx->in_tess
? ASAHI_VS_TCS
: ctx->stage[PIPE_SHADER_GEOMETRY].shader ? ASAHI_VS_GS
: ASAHI_VS_FS,
};
if (key.next_stage == ASAHI_VS_FS) {
key.next.fs.clip_halfz = ctx->rast->base.clip_halfz;
/* If we are not rasterizing points, don't set fixed_point_size to
* eliminate the useless point size write.
*/
.fixed_point_size = !ctx->rast->base.point_size_per_vertex &&
rasterized_prim == MESA_PRIM_POINTS,
key.next.fs.fixed_point_size = !ctx->rast->base.point_size_per_vertex &&
rasterized_prim == MESA_PRIM_POINTS;
.outputs_flat_shaded =
ctx->stage[PIPE_SHADER_FRAGMENT].shader->info.inputs_flat_shaded,
.outputs_linear_shaded =
ctx->stage[PIPE_SHADER_FRAGMENT].shader->info.inputs_linear_shaded,
};
key.next.fs.outputs_flat_shaded =
ctx->stage[PIPE_SHADER_FRAGMENT].shader->info.inputs_flat_shaded;
key.next.fs.outputs_linear_shaded =
ctx->stage[PIPE_SHADER_FRAGMENT].shader->info.inputs_linear_shaded;
} else if (key.next_stage == ASAHI_VS_GS) {
key.next.gs.index_size_B = index_size_B;
}
memcpy(key.attribs, &ctx->attributes->key, sizeof(key.attribs));
@@ -2519,9 +2528,7 @@ agx_update_gs(struct agx_context *ctx, const struct pipe_draw_info *info,
tgt->stride = gs->xfb_strides[i];
}
/* XXX: Deduplicate this code from regular vertex */
struct asahi_gs_shader_key key = {
.ia.index_size = info->index_size,
.ia.mode = info->mode,
.ia.flatshade_first =
ia_needs_provoking(info->mode) && ctx->rast->base.flatshade_first,
@@ -2529,15 +2536,6 @@ agx_update_gs(struct agx_context *ctx, const struct pipe_draw_info *info,
.rasterizer_discard = ctx->rast->base.rasterizer_discard,
};
memcpy(key.attribs, &ctx->attributes->key, sizeof(key.attribs));
static_assert(sizeof(key.input_nir_sha1) ==
sizeof(ctx->stage[PIPE_SHADER_VERTEX].shader->nir_sha1),
"common size for shader sha-1");
memcpy(key.input_nir_sha1, ctx->stage[PIPE_SHADER_VERTEX].shader->nir_sha1,
sizeof(key.input_nir_sha1));
return agx_update_shader(ctx, &ctx->gs, PIPE_SHADER_GEOMETRY,
(union asahi_shader_key *)&key);
}
@@ -4131,10 +4129,11 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer,
}
}
/* Calculate input primitive count for direct draws, and allocate the count
* buffer. GPU calculates and allocates for indirect draws.
/* Calculate input primitive count for direct draws, and allocate the vertex
* & count buffers. GPU calculates and allocates for indirect draws.
*/
unsigned count_buffer_stride = batch->ctx->gs->gs_count_words * 4;
params.vs_outputs = batch->ctx->vs->info.outputs;
if (indirect) {
params.count_buffer_stride = count_buffer_stride;
@@ -4142,13 +4141,21 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer,
unsigned prim_per_instance =
u_decomposed_prims_for_vertices(info->mode, draw->count);
params.input_primitives = prim_per_instance * info->instance_count;
params.input_vertices = draw->count;
unsigned vb_size = libagx_tcs_in_size(draw->count * info->instance_count,
params.vs_outputs);
unsigned size = params.input_primitives * count_buffer_stride;
if (size) {
params.count_buffer =
agx_pool_alloc_aligned(&batch->pool, size, 4).gpu;
}
if (vb_size) {
params.vertex_buffer =
agx_pool_alloc_aligned(&batch->pool, vb_size, 4).gpu;
}
}
return agx_pool_upload_aligned_with_bo(&batch->pool, &params, sizeof(params),
@@ -4178,9 +4185,11 @@ agx_launch_gs(struct agx_batch *batch, const struct pipe_draw_info *info,
assert(!info->primitive_restart && "should have been lowered");
struct pipe_grid_info grid = {.block = {1, 1, 1}};
struct pipe_grid_info grid_vs = {.block = {1, 1, 1}};
struct pipe_grid_info grid_gs = {.block = {1, 1, 1}};
struct agx_resource grid_indirect_rsrc = {.bo = batch->geom_params_bo};
/* Setup grids */
if (indirect) {
assert(indirect->buffer && "drawauto already handled");
@@ -4200,23 +4209,35 @@ agx_launch_gs(struct agx_batch *batch, const struct pipe_draw_info *info,
/* Wrap the pool allocation in a fake resource for meta-Gallium use */
assert(batch->geom_params_bo != NULL);
grid.indirect = &grid_indirect_rsrc.base;
grid.indirect_offset =
(batch->uniforms.geometry_params - grid_indirect_rsrc.bo->ptr.gpu) +
offsetof(struct agx_geometry_params, gs_grid);
} else {
unsigned prim_per_instance =
u_decomposed_prims_for_vertices(info->mode, draws->count);
grid_vs.indirect = &grid_indirect_rsrc.base;
grid_gs.indirect = &grid_indirect_rsrc.base;
grid.grid[0] = prim_per_instance;
grid.grid[1] = info->instance_count;
grid.grid[2] = 1;
unsigned param_offs =
(batch->uniforms.geometry_params - grid_indirect_rsrc.bo->ptr.gpu);
grid_vs.indirect_offset =
param_offs + offsetof(struct agx_geometry_params, vs_grid);
grid_gs.indirect_offset =
param_offs + offsetof(struct agx_geometry_params, gs_grid);
} else {
grid_vs.grid[0] = draws->count;
grid_vs.grid[1] = info->instance_count;
grid_vs.grid[2] = 1;
grid_gs.grid[0] =
u_decomposed_prims_for_vertices(info->mode, draws->count);
grid_gs.grid[1] = info->instance_count;
grid_gs.grid[2] = 1;
}
/* Launch the vertex shader first */
agx_launch(batch, &grid_vs, ctx->vs, ctx->vs->stage);
/* If there is a count shader, launch it and prefix sum the results. */
if (gs->gs_count) {
perf_debug(dev, "Geometry shader count");
agx_launch(batch, &grid, gs->gs_count, PIPE_SHADER_GEOMETRY);
agx_launch(batch, &grid_gs, gs->gs_count, PIPE_SHADER_GEOMETRY);
unsigned words = gs->gs_count_words;
agx_launch(batch,
@@ -4238,7 +4259,7 @@ agx_launch_gs(struct agx_batch *batch, const struct pipe_draw_info *info,
gs->pre_gs, PIPE_SHADER_COMPUTE);
/* Launch the actual geometry shader */
agx_launch(batch, &grid, gs, PIPE_SHADER_GEOMETRY);
agx_launch(batch, &grid_gs, gs, PIPE_SHADER_GEOMETRY);
/* If we're not rasterizing, the pipeline ends here */
if (ctx->rast->base.rasterizer_discard)
@@ -4691,7 +4712,7 @@ agx_draw_patches(struct agx_context *ctx, const struct pipe_draw_info *info,
/* Run VS+TCS as compute */
agx_upload_vbos(batch);
agx_update_vs(ctx);
agx_update_vs(ctx, info->index_size);
agx_update_tcs(ctx, info);
/* XXX */
ctx->stage[PIPE_SHADER_TESS_CTRL].dirty = ~0;
@@ -4954,7 +4975,7 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
batch->reduced_prim = reduced_prim;
/* Update shaders first so we can use them after */
if (agx_update_vs(ctx)) {
if (agx_update_vs(ctx, idx_size)) {
ctx->dirty |= AGX_DIRTY_VS | AGX_DIRTY_VS_PROG;
ctx->stage[PIPE_SHADER_VERTEX].dirty = ~0;
+20 -14
View File
@@ -436,12 +436,28 @@ struct agx_velem_key {
uint8_t pad;
};
enum asahi_vs_next_stage {
ASAHI_VS_FS,
ASAHI_VS_GS,
ASAHI_VS_TCS,
};
struct asahi_vs_shader_key {
struct agx_velem_key attribs[AGX_MAX_VBUFS];
bool clip_halfz;
bool fixed_point_size;
uint64_t outputs_flat_shaded;
uint64_t outputs_linear_shaded;
enum asahi_vs_next_stage next_stage;
union {
struct {
uint8_t index_size_B;
} gs;
struct {
bool clip_halfz;
bool fixed_point_size;
uint64_t outputs_flat_shaded;
uint64_t outputs_linear_shaded;
} fs;
} next;
};
struct agx_vertex_elements {
@@ -483,20 +499,10 @@ struct asahi_tcs_shader_key {
};
struct asahi_gs_shader_key {
/* Input assembly key */
struct agx_ia_key ia;
/* Vertex shader key */
struct agx_velem_key attribs[AGX_MAX_VBUFS];
/* If true, this GS is run only for its side effects (including XFB) */
bool rasterizer_discard;
/* Geometry shaders must be linked with a vertex shader. In a monolithic
* pipeline, this is the vertex shader (or tessellation evaluation shader).
* With separate shaders, this needs to be an internal passthrough program.
*/
uint8_t input_nir_sha1[20];
};
union asahi_shader_key {