From 1b8bed38eb40c71a9973d6ea7c9294fa077bee7a Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 2 Jan 2023 13:36:55 -0800 Subject: [PATCH] freedreno/a6xx: Remove shader key from fd6_emit Only construct the key on-demand if the PROG state is dirty. The newly added "virtual" PROG_KEY state is used to know when other state that the shader key depends on changes. Worth ~13% at drawoverhead test 0. Signed-off-by: Rob Clark Part-of: --- .../drivers/freedreno/a6xx/fd6_const.c | 4 +- .../drivers/freedreno/a6xx/fd6_context.c | 4 +- src/gallium/drivers/freedreno/a6xx/fd6_draw.c | 112 +++++++++++------- src/gallium/drivers/freedreno/a6xx/fd6_emit.c | 2 +- src/gallium/drivers/freedreno/a6xx/fd6_emit.h | 12 +- 5 files changed, 80 insertions(+), 54 deletions(-) diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_const.c b/src/gallium/drivers/freedreno/a6xx/fd6_const.c index 05a9f82b6b1..c7c37a1e778 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_const.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_const.c @@ -132,7 +132,7 @@ fd6_build_tess_consts(struct fd6_emit *emit) * size is dwords, since that's what LDG/STG use. */ unsigned num_vertices = emit->hs - ? emit->key.patch_vertices + ? ctx->patch_vertices : emit->gs->gs.vertices_in; uint32_t vs_params[4] = { @@ -146,7 +146,7 @@ fd6_build_tess_consts(struct fd6_emit *emit) uint32_t hs_params[4] = { emit->vs->output_size * num_vertices * 4, /* vs primitive stride */ emit->vs->output_size * 4, /* vs vertex stride */ - emit->hs->output_size, emit->key.patch_vertices}; + emit->hs->output_size, ctx->patch_vertices}; emit_stage_tess_consts(constobj, emit->hs, hs_params, ARRAY_SIZE(hs_params)); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.c b/src/gallium/drivers/freedreno/a6xx/fd6_context.c index 2eff221eff9..8fa3d974d88 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_context.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.c @@ -151,7 +151,9 @@ setup_state_map(struct fd_context *ctx) fd_context_add_map(ctx, FD_DIRTY_ZSA | FD_DIRTY_BLEND | FD_DIRTY_PROG, BIT(FD6_GROUP_LRZ)); fd_context_add_map(ctx, FD_DIRTY_PROG | FD_DIRTY_RASTERIZER_CLIP_PLANE_ENABLE, - BIT(FD6_GROUP_PROG)); + BIT(FD6_GROUP_PROG) | BIT(FD6_GROUP_PROG_KEY)); + fd_context_add_map(ctx, FD_DIRTY_RASTERIZER | FD_DIRTY_MIN_SAMPLES | FD_DIRTY_FRAMEBUFFER, + BIT(FD6_GROUP_PROG_KEY)); fd_context_add_map(ctx, FD_DIRTY_RASTERIZER, BIT(FD6_GROUP_RASTERIZER)); fd_context_add_map(ctx, FD_DIRTY_FRAMEBUFFER | FD_DIRTY_RASTERIZER_DISCARD | diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c index 9caa63dc48d..525dcd81468 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c @@ -131,22 +131,12 @@ fixup_draw_state(struct fd_context *ctx, struct fd6_emit *emit) assert_dt } } -static bool -fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, - unsigned drawid_offset, - const struct pipe_draw_indirect_info *indirect, - const struct pipe_draw_start_count_bias *draw, - unsigned index_offset) assert_dt +static const struct fd6_program_state * +get_program_state(struct fd_context *ctx, const struct pipe_draw_info *info) + assert_dt { struct fd6_context *fd6_ctx = fd6_context(ctx); - struct shader_info *gs_info = ir3_get_shader_info(ctx->prog.gs); - struct fd6_emit emit; - - emit.ctx = ctx; - emit.info = info; - emit.indirect = indirect; - emit.draw = draw; - emit.key = (struct ir3_cache_key){ + struct ir3_cache_key key = { .vs = ctx->prog.vs, .gs = ctx->prog.gs, .fs = ctx->prog.fs, @@ -159,6 +149,52 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, .clip_plane_enable = ctx->rasterizer->clip_plane_enable, .patch_vertices = ctx->patch_vertices, }; + + if (info->mode == PIPE_PRIM_PATCHES) { + struct shader_info *gs_info = ir3_get_shader_info(ctx->prog.gs); + + key.hs = ctx->prog.hs; + key.ds = ctx->prog.ds; + + struct shader_info *ds_info = ir3_get_shader_info(key.ds); + key.key.tessellation = ir3_tess_mode(ds_info->tess._primitive_mode); + + struct shader_info *fs_info = ir3_get_shader_info(key.fs); + key.key.tcs_store_primid = + BITSET_TEST(ds_info->system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) || + (gs_info && BITSET_TEST(gs_info->system_values_read, SYSTEM_VALUE_PRIMITIVE_ID)) || + (fs_info && (fs_info->inputs_read & (1ull << VARYING_SLOT_PRIMITIVE_ID))); + } + + if (key.gs) { + key.key.has_gs = true; + } + + ir3_fixup_shader_state(&ctx->base, &key.key); + + if (ctx->gen_dirty & BIT(FD6_GROUP_PROG)) { + struct ir3_program_state *s = ir3_cache_lookup( + ctx->shader_cache, &key, &ctx->debug); + fd6_ctx->prog = fd6_program_state(s); + } + + return fd6_ctx->prog; +} + +static bool +fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, + unsigned drawid_offset, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_draw_start_count_bias *draw, + unsigned index_offset) assert_dt +{ + struct fd6_context *fd6_ctx = fd6_context(ctx); + struct fd6_emit emit; + + emit.ctx = ctx; + emit.info = info; + emit.indirect = indirect; + emit.draw = draw; emit.rasterflat = ctx->rasterizer->flatshade; emit.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable; emit.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode; @@ -170,39 +206,25 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, if (!(ctx->prog.vs && ctx->prog.fs)) return false; - if (info->mode == PIPE_PRIM_PATCHES) { - emit.key.hs = ctx->prog.hs; - emit.key.ds = ctx->prog.ds; - - struct shader_info *ds_info = ir3_get_shader_info(emit.key.ds); - emit.key.key.tessellation = ir3_tess_mode(ds_info->tess._primitive_mode); + if ((info->mode == PIPE_PRIM_PATCHES) || ctx->prog.gs) { ctx->gen_dirty |= BIT(FD6_GROUP_PRIMITIVE_PARAMS); - - struct shader_info *fs_info = ir3_get_shader_info(emit.key.fs); - emit.key.key.tcs_store_primid = - BITSET_TEST(ds_info->system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) || - (gs_info && BITSET_TEST(gs_info->system_values_read, SYSTEM_VALUE_PRIMITIVE_ID)) || - (fs_info && (fs_info->inputs_read & (1ull << VARYING_SLOT_PRIMITIVE_ID))); - } - - if (emit.key.gs) { - emit.key.key.has_gs = true; - ctx->gen_dirty |= BIT(FD6_GROUP_PRIMITIVE_PARAMS); - } - - if (!(emit.key.hs || emit.key.ds || emit.key.gs || indirect)) + } else if (!indirect) { fd6_vsc_update_sizes(ctx->batch, info, draw); + } - ir3_fixup_shader_state(&ctx->base, &emit.key.key); - - if (!(ctx->gen_dirty & BIT(FD6_GROUP_PROG))) { - emit.prog = fd6_ctx->prog; + /* If PROG state (which will mark PROG_KEY dirty) or any state that the + * key depends on, is dirty, then we actually need to construct the shader + * key, figure out if we need a new variant, and lookup the PROG state. + * Otherwise we can just use the previous prog state. + */ + if (unlikely(ctx->gen_dirty & BIT(FD6_GROUP_PROG_KEY))) { + emit.prog = get_program_state(ctx, info); } else { - fd6_ctx->prog = fd6_emit_get_prog(&emit); + emit.prog = fd6_ctx->prog; } /* bail if compile failed: */ - if (!fd6_ctx->prog) + if (!emit.prog) return false; fixup_draw_state(ctx, &emit); @@ -242,7 +264,7 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, struct CP_DRAW_INDX_OFFSET_0 draw0 = { .prim_type = ctx->screen->primtypes[info->mode], .vis_cull = USE_VISIBILITY, - .gs_enable = !!emit.key.gs, + .gs_enable = !!ctx->prog.gs, }; if (indirect && indirect->count_from_stream_output) { @@ -255,12 +277,15 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, } if (info->mode == PIPE_PRIM_PATCHES) { - uint32_t factor_stride = ir3_tess_factor_stride(emit.key.key.tessellation); + struct shader_info *ds_info = ir3_get_shader_info(ctx->prog.ds); + unsigned tessellation = ir3_tess_mode(ds_info->tess._primitive_mode); + + uint32_t factor_stride = ir3_tess_factor_stride(tessellation); STATIC_ASSERT(IR3_TESS_ISOLINES == TESS_ISOLINES + 1); STATIC_ASSERT(IR3_TESS_TRIANGLES == TESS_TRIANGLES + 1); STATIC_ASSERT(IR3_TESS_QUADS == TESS_QUADS + 1); - draw0.patch_type = emit.key.key.tessellation - 1; + draw0.patch_type = tessellation - 1; draw0.prim_type = DI_PT_PATCHES0 + ctx->patch_vertices; draw0.tess_enable = true; @@ -298,7 +323,6 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info, ctx->last.restart_index = restart_index; } - // TODO move fd6_emit_streamout.. I think.. if (emit.dirty_groups) fd6_emit_3d_state(ring, &emit); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index c07a4f87f72..98577637fc8 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -902,7 +902,7 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) fd6_emit_non_ring(ring, emit); break; default: - unreachable("bad state group"); + break; } } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h index 02e21571fc0..f60d1b82c21 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h @@ -66,6 +66,12 @@ enum fd6_state_id { FD6_GROUP_BLEND_COLOR, FD6_GROUP_SO, FD6_GROUP_IBO, + + /* + * Virtual state-groups, which don't turn into a CP_SET_DRAW_STATE group + */ + + FD6_GROUP_PROG_KEY, /* Set for any state which could change shader key */ FD6_GROUP_NON_GROUP, /* placeholder group for state emit in IB2, keep last */ }; @@ -148,7 +154,6 @@ struct fd6_emit { const struct pipe_draw_info *info; const struct pipe_draw_indirect_info *indirect; const struct pipe_draw_start_count_bias *draw; - struct ir3_cache_key key; uint32_t dirty_groups; uint32_t sprite_coord_enable; /* bitmask */ @@ -172,11 +177,6 @@ struct fd6_emit { static inline const struct fd6_program_state * fd6_emit_get_prog(struct fd6_emit *emit) { - if (!emit->prog) { - struct ir3_program_state *s = ir3_cache_lookup( - emit->ctx->shader_cache, &emit->key, &emit->ctx->debug); - emit->prog = fd6_program_state(s); - } return emit->prog; }