libagx: always tessellate clockwise

easy enough to flip later in the pipeline instead and reduce significantly the
tessellator variants.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31908>
This commit is contained in:
Alyssa Rosenzweig
2024-10-27 10:09:46 -04:00
parent 87e6324459
commit e86a35dad2
8 changed files with 80 additions and 38 deletions

View File

@@ -1567,14 +1567,13 @@ agx_nir_tessellate(nir_builder *b, const void *data)
nir_def *params = nir_load_preamble(b, 1, 64, .base = 0);
nir_def *patch = nir_channel(b, nir_load_global_invocation_id(b, 32), 0);
nir_def *mode = nir_imm_int(b, key->mode);
nir_def *ccw = nir_imm_bool(b, key->ccw);
if (key->prim == TESS_PRIMITIVE_ISOLINES)
libagx_tess_isoline(b, params, mode, ccw, patch);
libagx_tess_isoline(b, params, mode, patch);
else if (key->prim == TESS_PRIMITIVE_TRIANGLES)
libagx_tess_tri(b, params, mode, ccw, patch);
libagx_tess_tri(b, params, mode, patch);
else if (key->prim == TESS_PRIMITIVE_QUADS)
libagx_tess_quad(b, params, mode, ccw, patch);
libagx_tess_quad(b, params, mode, patch);
else
unreachable("invalid tess primitive");
}

View File

@@ -54,8 +54,7 @@ void agx_nir_unroll_restart(struct nir_builder *b, const void *key);
struct agx_tessellator_key {
enum tess_primitive_mode prim : 8;
enum libagx_tess_mode mode : 8;
bool ccw;
unsigned pad : 8;
unsigned pad : 16;
};
static_assert(sizeof(struct agx_tessellator_key) == 4, "padded");

View File

@@ -274,7 +274,7 @@ agxdecode_usc(struct agxdecode_ctx *ctx, const uint8_t *map,
{
enum agx_sampler_states *sampler_states = data;
enum agx_usc_control type = map[0];
uint8_t buf[8192];
uint8_t buf[3072];
bool extended_samplers =
(sampler_states != NULL) &&

View File

@@ -44,6 +44,18 @@ libagx_tes_buffer(constant struct libagx_tess_args *p)
uint32_t
libagx_load_tes_index(constant struct libagx_tess_args *p, uint32_t index)
{
/* Swap second and third vertices of each triangle to flip winding order
* dynamically if needed.
*/
if (p->ccw) {
uint id = index % 3;
if (id == 1)
index++;
else if (id == 2)
index--;
}
return p->index_buffer[index];
}

View File

@@ -96,7 +96,6 @@ struct INDEX_PATCH_CONTEXT2 {
};
struct CHWTessellator {
bool cw;
enum libagx_tess_mode mode;
uint index_bias;
@@ -345,10 +344,9 @@ static void
DefineClockwiseTriangle(private struct CHWTessellator *ctx, int index0,
int index1, int index2, int indexStorageBaseOffset)
{
// inputs a clockwise triangle, stores a CW or CCW triangle per state state
DefineIndex(ctx, index0, indexStorageBaseOffset);
DefineIndex(ctx, ctx->cw ? index1 : index2, indexStorageBaseOffset + 1);
DefineIndex(ctx, ctx->cw ? index2 : index1, indexStorageBaseOffset + 2);
DefineIndex(ctx, index1, indexStorageBaseOffset + 1);
DefineIndex(ctx, index2, indexStorageBaseOffset + 2);
}
static uint32_t
@@ -741,7 +739,7 @@ StitchTransition(private struct CHWTessellator *ctx, int baseIndexOffset,
void
libagx_tess_isoline(constant struct libagx_tess_args *p,
enum libagx_tess_mode mode, bool ccw, uint patch)
enum libagx_tess_mode mode, uint patch)
{
enum libagx_tess_partitioning partitioning = p->partitioning;
@@ -840,7 +838,7 @@ libagx_tess_isoline(constant struct libagx_tess_args *p,
void
libagx_tess_tri(constant struct libagx_tess_args *p, enum libagx_tess_mode mode,
bool ccw, uint patch)
uint patch)
{
enum libagx_tess_partitioning partitioning = p->partitioning;
@@ -851,7 +849,6 @@ libagx_tess_tri(constant struct libagx_tess_args *p, enum libagx_tess_mode mode,
float insideTessFactor_f = factors[4];
struct CHWTessellator ctx;
ctx.cw = !ccw;
ctx.Point = NULL;
ctx.Index = NULL;
ctx.mode = mode;
@@ -1172,7 +1169,7 @@ libagx_tess_tri(constant struct libagx_tess_args *p, enum libagx_tess_mode mode,
void
libagx_tess_quad(constant struct libagx_tess_args *p,
enum libagx_tess_mode mode, bool ccw, uint patch)
enum libagx_tess_mode mode, uint patch)
{
enum libagx_tess_partitioning partitioning = p->partitioning;
global float *factors = tess_factors(p, patch);
@@ -1187,7 +1184,6 @@ libagx_tess_quad(constant struct libagx_tess_args *p,
// TODO: fix designated initializer optimization in NIR
struct CHWTessellator ctx;
ctx.cw = !ccw;
ctx.Point = NULL;
ctx.Index = NULL;
ctx.mode = mode;

View File

@@ -130,5 +130,11 @@ struct libagx_tess_args {
*/
enum libagx_tess_partitioning partitioning;
uint32_t points_mode;
/* When fed into a geometry shader, triangles should be counter-clockwise.
* The tessellator always produces clockwise triangles, but we can swap
* dynamically in the TES.
*/
uint32_t ccw;
} PACKED;
AGX_STATIC_ASSERT(sizeof(struct libagx_tess_args) == 50 * 4);
AGX_STATIC_ASSERT(sizeof(struct libagx_tess_args) == 51 * 4);

View File

@@ -1285,6 +1285,12 @@ hk_upload_tess_params(struct hk_cmd_buffer *cmd, struct libagx_tess_args *out,
.points_mode = gfx->tess.info.points,
};
if (!args.points_mode && gfx->tess.info.mode != TESS_PRIMITIVE_ISOLINES) {
args.ccw = gfx->tess.info.ccw;
args.ccw ^=
dyn->ts.domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT;
}
uint32_t draw_stride_el = 5;
size_t draw_stride_B = draw_stride_el * sizeof(uint32_t);
@@ -1672,13 +1678,8 @@ hk_launch_tess(struct hk_cmd_buffer *cmd, struct hk_cs *cs, struct hk_draw draw)
dev, cs, tcs, hk_upload_usc_words(cmd, tcs, tcs->only_linked), grid_tcs,
hk_grid(tcs->info.tess.tcs_output_patch_size, 1, 1));
/* If the domain is flipped, we need to flip the winding order */
bool ccw = info.ccw;
ccw ^= dyn->ts.domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT;
struct agx_tessellator_key key = {
.prim = info.mode,
.ccw = ccw,
};
/* Generate counts */
@@ -2345,11 +2346,13 @@ hk_flush_ppp_state(struct hk_cmd_buffer *cmd, struct hk_cs *cs, uint8_t **out)
.output_select = hw_vs_dirty || linked_fs_dirty || varyings_dirty,
.varying_counts_32 = varyings_dirty,
.varying_counts_16 = varyings_dirty,
.cull =
IS_DIRTY(RS_CULL_MODE) || IS_DIRTY(RS_RASTERIZER_DISCARD_ENABLE) ||
IS_DIRTY(RS_FRONT_FACE) || IS_DIRTY(RS_DEPTH_CLIP_ENABLE) ||
IS_DIRTY(RS_DEPTH_CLAMP_ENABLE) || IS_DIRTY(RS_LINE_MODE) ||
IS_DIRTY(IA_PRIMITIVE_TOPOLOGY) || (gfx->dirty & HK_DIRTY_PROVOKING),
.cull = IS_DIRTY(RS_CULL_MODE) ||
IS_DIRTY(RS_RASTERIZER_DISCARD_ENABLE) ||
IS_DIRTY(RS_FRONT_FACE) || IS_DIRTY(RS_DEPTH_CLIP_ENABLE) ||
IS_DIRTY(RS_DEPTH_CLAMP_ENABLE) || IS_DIRTY(RS_LINE_MODE) ||
IS_DIRTY(IA_PRIMITIVE_TOPOLOGY) ||
(gfx->dirty & HK_DIRTY_PROVOKING) || IS_SHADER_DIRTY(TESS_CTRL) ||
IS_SHADER_DIRTY(TESS_EVAL) || IS_DIRTY(TS_DOMAIN_ORIGIN),
.cull_2 = varyings_dirty,
/* With a null FS, the fragment shader PPP word is ignored and doesn't
@@ -2509,6 +2512,14 @@ hk_flush_ppp_state(struct hk_cmd_buffer *cmd, struct hk_cs *cs, uint8_t **out)
cfg.cull_front = dyn->rs.cull_mode & VK_CULL_MODE_FRONT_BIT;
cfg.cull_back = dyn->rs.cull_mode & VK_CULL_MODE_BACK_BIT;
cfg.front_face_ccw = dyn->rs.front_face != VK_FRONT_FACE_CLOCKWISE;
if (gfx->shaders[MESA_SHADER_TESS_CTRL] &&
!gfx->shaders[MESA_SHADER_GEOMETRY]) {
cfg.front_face_ccw ^= gfx->tess.info.ccw;
cfg.front_face_ccw ^= dyn->ts.domain_origin ==
VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT;
}
cfg.flat_shading_vertex = translate_ppp_vertex(gfx->provoking);
cfg.rasterizer_discard = dyn->rs.rasterizer_discard_enable;

View File

@@ -380,7 +380,6 @@ agx_create_rs_state(struct pipe_context *ctx,
agx_pack(so->cull, CULL, cfg) {
cfg.cull_front = cso->cull_face & PIPE_FACE_FRONT;
cfg.cull_back = cso->cull_face & PIPE_FACE_BACK;
cfg.front_face_ccw = cso->front_ccw;
cfg.depth_clip = cso->depth_clip_near;
cfg.depth_clamp = !cso->depth_clip_near;
cfg.flat_shading_vertex =
@@ -1621,8 +1620,12 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
if (nir->info.stage == MESA_SHADER_VERTEX) {
struct asahi_vs_shader_key *key = &key_->vs;
NIR_PASS(_, nir, agx_nir_lower_vs_input_to_prolog,
attrib_components_read);
if (nir->info.vs.tes_agx) {
NIR_PASS(_, nir, agx_nir_lower_tes, dev->libagx, key->hw);
} else {
NIR_PASS(_, nir, agx_nir_lower_vs_input_to_prolog,
attrib_components_read);
}
if (key->hw) {
NIR_PASS(_, nir, agx_nir_lower_point_size, true);
@@ -1884,7 +1887,8 @@ agx_shader_initialize(struct agx_device *dev, struct agx_uncompiled_shader *so,
so->type = pipe_shader_type_from_mesa(nir->info.stage);
if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
NIR_PASS(_, nir, agx_nir_lower_tes, dev->libagx, true);
nir->info.stage = MESA_SHADER_VERTEX;
nir->info.vs.tes_agx = true;
}
blob_init(&so->serialized_nir);
@@ -2154,9 +2158,10 @@ asahi_fast_link(struct agx_context *ctx, struct agx_uncompiled_shader *so,
ctx, agx_nir_fs_epilog, &key->epilog.fs, sizeof(key->epilog.fs), false,
true, 0, false);
} else if (so->type == MESA_SHADER_TESS_EVAL) {
/* No prolog/epilog needed */
} else {
assert(so->type == MESA_SHADER_VERTEX ||
so->type == MESA_SHADER_TESS_EVAL);
assert(so->type == MESA_SHADER_VERTEX);
prolog = agx_build_meta_shader_internal(
ctx, agx_nir_vs_prolog, &key->prolog.vs, sizeof(key->prolog.vs), true,
@@ -3649,6 +3654,7 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out)
.output_select = varyings_dirty,
.varying_counts_32 = varyings_dirty,
.varying_counts_16 = varyings_dirty,
/* Also dirty with tess but agx_draw_patches dirties RS for that */
.cull = IS_DIRTY(RS),
.cull_2 = varyings_dirty,
.fragment_shader =
@@ -3743,8 +3749,17 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out)
VARYING_COUNTS);
}
if (dirty.cull)
agx_ppp_push_packed(&ppp, ctx->rast->cull, CULL);
if (dirty.cull) {
agx_ppp_push_merged(&ppp, CULL, cfg, ctx->rast->cull) {
cfg.front_face_ccw = ctx->rast->base.front_ccw;
if (ctx->in_tess && !ctx->gs) {
/* Yes, OpenGL is backwards. Deal with it. */
cfg.front_face_ccw ^=
!ctx->stage[MESA_SHADER_TESS_EVAL].shader->tess.ccw;
}
}
}
if (dirty.cull_2) {
agx_ppp_push(&ppp, CULL_2, cfg) {
@@ -4631,6 +4646,10 @@ agx_draw_patches(struct agx_context *ctx, const struct pipe_draw_info *info,
.points_mode = point_mode,
};
if (!point_mode && tes->tess.primitive != TESS_PRIMITIVE_ISOLINES) {
args.ccw = !tes->tess.ccw;
}
memcpy(&args.tess_level_outer_default, ctx->default_outer_level,
sizeof(ctx->default_outer_level));
memcpy(&args.tess_level_inner_default, ctx->default_inner_level,
@@ -4748,9 +4767,6 @@ agx_draw_patches(struct agx_context *ctx, const struct pipe_draw_info *info,
struct agx_tessellator_key key = {
.prim = mode,
/* Yes, OpenGL is backwards. */
.ccw = !tes->tess.ccw,
};
/* Generate counts */
@@ -4771,6 +4787,9 @@ agx_draw_patches(struct agx_context *ctx, const struct pipe_draw_info *info,
agx_launch_with_uploaded_data(batch, &tess_grid, agx_nir_tessellate, &key,
sizeof(key), state);
/* Face culling state needs to be specialized for tess */
ctx->dirty |= AGX_DIRTY_RS;
/* Run TES as VS */
void *vs_cso = ctx->stage[PIPE_SHADER_VERTEX].shader;
void *tes_cso = ctx->stage[PIPE_SHADER_TESS_EVAL].shader;