From e51975142c0d781d801a7d0fff4f8a5506343f54 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 1 Jan 2023 12:09:06 -0800 Subject: [PATCH] freedreno/a6xx: Add bindless state This will be used when we switch over to lowering image/SSBO to bindless. Note that it also starts using CP_SET_DRAW_STATE in the compute path. Subsequent cleanup will switch texture and eventually other state over as well (which will make more sense when we get more clever than emitting all state for every compute grid, but for now simplifies re-using the same code between 3d and compute). Signed-off-by: Rob Clark Part-of: --- .../drivers/freedreno/a6xx/fd6_compute.c | 4 + .../drivers/freedreno/a6xx/fd6_context.c | 24 +++ .../drivers/freedreno/a6xx/fd6_context.h | 14 ++ src/gallium/drivers/freedreno/a6xx/fd6_emit.c | 47 ++++- src/gallium/drivers/freedreno/a6xx/fd6_emit.h | 12 ++ .../drivers/freedreno/a6xx/fd6_image.c | 191 ++++++++++++++++++ .../drivers/freedreno/a6xx/fd6_image.h | 3 + .../drivers/freedreno/a6xx/fd6_program.c | 44 ++-- 8 files changed, 313 insertions(+), 26 deletions(-) diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c index bf4c377079d..0134e93d46a 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_compute.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_compute.c @@ -60,6 +60,10 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_PKT4(ring, REG_A6XX_SP_CS_CONFIG, 2); OUT_RING(ring, A6XX_SP_CS_CONFIG_ENABLED | + COND(v->bindless_tex, A6XX_SP_CS_CONFIG_BINDLESS_TEX) | + COND(v->bindless_samp, A6XX_SP_CS_CONFIG_BINDLESS_SAMP) | + COND(v->bindless_ibo, A6XX_SP_CS_CONFIG_BINDLESS_IBO) | + COND(v->bindless_ubo, A6XX_SP_CS_CONFIG_BINDLESS_UBO) | A6XX_SP_CS_CONFIG_NIBO(ir3_shader_nibo(v)) | A6XX_SP_CS_CONFIG_NTEX(v->num_samp) | A6XX_SP_CS_CONFIG_NSAMP(v->num_samp)); /* SP_VS_CONFIG */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.c b/src/gallium/drivers/freedreno/a6xx/fd6_context.c index 1a14afbe126..8454b25ba00 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_context.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.c @@ -51,6 +51,10 @@ fd6_context_destroy(struct pipe_context *pctx) in_dt { struct fd6_context *fd6_ctx = fd6_context(fd_context(pctx)); + fd6_descriptor_set_invalidate(&fd6_ctx->cs_descriptor_set); + for (unsigned i = 0; i < ARRAY_SIZE(fd6_ctx->descriptor_sets); i++) + fd6_descriptor_set_invalidate(&fd6_ctx->descriptor_sets[i]); + if (fd6_ctx->streamout_disable_stateobj) fd_ringbuffer_del(fd6_ctx->streamout_disable_stateobj); @@ -184,6 +188,26 @@ setup_state_map(struct fd_context *ctx) fd_context_add_shader_map(ctx, PIPE_SHADER_FRAGMENT, FD_DIRTY_SHADER_TEX, BIT(FD6_GROUP_FS_TEX)); + fd_context_add_shader_map(ctx, PIPE_SHADER_VERTEX, + FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE, + BIT(FD6_GROUP_VS_BINDLESS)); + fd_context_add_shader_map(ctx, PIPE_SHADER_TESS_CTRL, + FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE, + BIT(FD6_GROUP_HS_BINDLESS)); + fd_context_add_shader_map(ctx, PIPE_SHADER_TESS_EVAL, + FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE, + BIT(FD6_GROUP_DS_BINDLESS)); + fd_context_add_shader_map(ctx, PIPE_SHADER_GEOMETRY, + FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE, + BIT(FD6_GROUP_GS_BINDLESS)); + /* NOTE: FD6_GROUP_FS_BINDLESS has a weak dependency on the program + * state (ie. it needs to be re-generated with fb-read descriptor + * patched in) but this special case is handled in fd6_emit_3d_state() + */ + fd_context_add_shader_map(ctx, PIPE_SHADER_FRAGMENT, + FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE, + BIT(FD6_GROUP_FS_BINDLESS)); + /* NOTE: scissor enabled bit is part of rasterizer state, but * fd_rasterizer_state_bind() will mark scissor dirty if needed: */ diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.h b/src/gallium/drivers/freedreno/a6xx/fd6_context.h index 37a1051f717..3bfccc800f8 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_context.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.h @@ -62,8 +62,22 @@ struct fd6_descriptor_set { * resource has been rebound */ uint16_t seqno[IR3_BINDLESS_DESC_COUNT]; + + /** + * Current GPU copy of the desciptor set + */ + struct fd_bo *bo; }; +static void +fd6_descriptor_set_invalidate(struct fd6_descriptor_set *set) +{ + if (!set->bo) + return; + fd_bo_del(set->bo); + set->bo = NULL; +} + struct fd6_context { struct fd_context base; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index 3ce2d0e36e2..a3b86ccb05e 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -793,11 +793,13 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) emit_marker6(ring, 5); - /* NOTE: we track fb_read differently than _BLEND_ENABLED since we - * might decide to do sysmem in some cases when blend is enabled: + /* Special case, we need to re-emit bindless FS state w/ the + * fb-read state appended: */ - if (fs->fb_read) + if ((emit->dirty_groups & BIT(FD6_GROUP_PROG)) && fs->fb_read) { ctx->batch->gmem_reason |= FD_GMEM_FB_READ; + emit->dirty_groups |= BIT(FD6_GROUP_FS_BINDLESS); + } u_foreach_bit (b, emit->dirty_groups) { enum fd6_state_id group = b; @@ -862,6 +864,26 @@ fd6_emit_3d_state(struct fd_ringbuffer *ring, struct fd6_emit *emit) state = build_ibo(emit); fd6_state_take_group(&emit->state, state, FD6_GROUP_IBO); break; + case FD6_GROUP_VS_BINDLESS: + state = fd6_build_bindless_state(ctx, PIPE_SHADER_VERTEX, false); + fd6_state_take_group(&emit->state, state, FD6_GROUP_VS_BINDLESS); + break; + case FD6_GROUP_HS_BINDLESS: + state = fd6_build_bindless_state(ctx, PIPE_SHADER_TESS_CTRL, false); + fd6_state_take_group(&emit->state, state, FD6_GROUP_HS_BINDLESS); + break; + case FD6_GROUP_DS_BINDLESS: + state = fd6_build_bindless_state(ctx, PIPE_SHADER_TESS_EVAL, false); + fd6_state_take_group(&emit->state, state, FD6_GROUP_DS_BINDLESS); + break; + case FD6_GROUP_GS_BINDLESS: + state = fd6_build_bindless_state(ctx, PIPE_SHADER_GEOMETRY, false); + fd6_state_take_group(&emit->state, state, FD6_GROUP_GS_BINDLESS); + break; + case FD6_GROUP_FS_BINDLESS: + state = fd6_build_bindless_state(ctx, PIPE_SHADER_FRAGMENT, fs->fb_read); + fd6_state_take_group(&emit->state, state, FD6_GROUP_FS_BINDLESS); + break; case FD6_GROUP_CONST: state = fd6_build_user_consts(emit); fd6_state_take_group(&emit->state, state, FD6_GROUP_CONST); @@ -913,6 +935,7 @@ void fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, struct ir3_shader_variant *cp) { + struct fd6_state state = {}; enum fd_dirty_shader_state dirty = ctx->dirty_shader[PIPE_SHADER_COMPUTE]; if (dirty & (FD_DIRTY_SHADER_TEX | FD_DIRTY_SHADER_PROG | @@ -957,6 +980,24 @@ fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring, fd_ringbuffer_del(state); } + + u_foreach_bit (b, ctx->gen_dirty) { + enum fd6_state_id group = b; + + switch (group) { + case FD6_GROUP_CS_BINDLESS: + fd6_state_take_group( + &state, + fd6_build_bindless_state(ctx, PIPE_SHADER_COMPUTE, false), + FD6_GROUP_CS_BINDLESS); + break; + default: + /* State-group unused for compute shaders */ + break; + } + } + + fd6_state_emit(&state, ring); } /* emit setup at begin of new cmdstream buffer (don't rely on previous diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h index 6a67877425c..0f085b6c370 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h @@ -66,6 +66,11 @@ enum fd6_state_id { FD6_GROUP_BLEND_COLOR, FD6_GROUP_SO, FD6_GROUP_IBO, + FD6_GROUP_VS_BINDLESS, + FD6_GROUP_HS_BINDLESS, + FD6_GROUP_DS_BINDLESS, + FD6_GROUP_GS_BINDLESS, + FD6_GROUP_FS_BINDLESS, /* * Virtual state-groups, which don't turn into a CP_SET_DRAW_STATE group @@ -73,6 +78,12 @@ enum fd6_state_id { FD6_GROUP_PROG_KEY, /* Set for any state which could change shader key */ FD6_GROUP_NON_GROUP, /* placeholder group for state emit in IB2, keep last */ + + /* + * Note that since we don't interleave draws and grids in the same batch, + * the compute vs draw state groups can overlap: + */ + FD6_GROUP_CS_BINDLESS = FD6_GROUP_VS_BINDLESS, }; #define ENABLE_ALL \ @@ -133,6 +144,7 @@ fd6_state_take_group(struct fd6_state *state, struct fd_ringbuffer *stateobj, [FD6_GROUP_PROG_BINNING] = CP_SET_DRAW_STATE__0_BINNING, [FD6_GROUP_PROG_INTERP] = ENABLE_DRAW, [FD6_GROUP_FS_TEX] = ENABLE_DRAW, + [FD6_GROUP_FS_BINDLESS] = ENABLE_DRAW, }; assert(state->num_groups < ARRAY_SIZE(state->groups)); struct fd6_state_group *g = &state->groups[state->num_groups++]; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_image.c b/src/gallium/drivers/freedreno/a6xx/fd6_image.c index e13a7f81cad..157d0351e54 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_image.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_image.c @@ -158,6 +158,17 @@ descriptor_set(struct fd_context *ctx, enum pipe_shader_type shader) static void clear_descriptor(struct fd6_descriptor_set *set, unsigned slot) { + /* The 2nd dword of the descriptor contains the width and height. + * so a non-zero value means the slot was previously valid and + * must be cleared. We can't leave dangling descriptors as the + * shader could use variable indexing into the set of IBOs to + * get at them. See piglit arb_shader_image_load_store-invalid. + */ + if (!set->descriptor[slot][1]) + return; + + fd6_descriptor_set_invalidate(set); + memset(set->descriptor[slot], 0, sizeof(set->descriptor[slot])); } @@ -170,6 +181,8 @@ validate_image_descriptor(struct fd_context *ctx, struct fd6_descriptor_set *set if (!rsc || (rsc->seqno == set->seqno[slot])) return; + fd6_descriptor_set_invalidate(set); + fd6_image_descriptor(ctx, img, set->descriptor[slot]); set->seqno[slot] = rsc->seqno; } @@ -183,6 +196,8 @@ validate_buffer_descriptor(struct fd_context *ctx, struct fd6_descriptor_set *se if (!rsc || (rsc->seqno == set->seqno[slot])) return; + fd6_descriptor_set_invalidate(set); + fd6_ssbo_descriptor(ctx, buf, set->descriptor[slot]); set->seqno[slot] = rsc->seqno; } @@ -221,6 +236,182 @@ fd6_build_ibo_state(struct fd_context *ctx, const struct ir3_shader_variant *v, return state; } +/* Build bindless descriptor state, returns ownership of state reference */ +struct fd_ringbuffer * +fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader, + bool append_fb_read) +{ + struct fd_shaderbuf_stateobj *bufso = &ctx->shaderbuf[shader]; + struct fd_shaderimg_stateobj *imgso = &ctx->shaderimg[shader]; + struct fd6_descriptor_set *set = descriptor_set(ctx, shader); + + struct fd_ringbuffer *ring = fd_submit_new_ringbuffer( + ctx->batch->submit, 16 * 4, FD_RINGBUFFER_STREAMING); + + /* Don't re-use a previous descriptor set if appending the + * fb-read descriptor, as that can change across batches. + * The normal descriptor slots are safe to re-use even if + * the state is dirtied due to batch flush, but the fb-read + * slot is not. + */ + if (unlikely(append_fb_read)) + fd6_descriptor_set_invalidate(set); + + /* + * Re-validate the descriptor slots, ie. in the case that + * the resource gets rebound due to use with non-UBWC + * compatible view format, etc. + * + * While we are at it, attach the BOs to the ring. + */ + + u_foreach_bit (b, bufso->enabled_mask) { + struct pipe_shader_buffer *buf = &bufso->sb[b]; + unsigned idx = b + IR3_BINDLESS_SSBO_OFFSET; + validate_buffer_descriptor(ctx, set, idx, buf); + if (buf->buffer) + fd_ringbuffer_attach_bo(ring, fd_resource(buf->buffer)->bo); + } + + u_foreach_bit (b, imgso->enabled_mask) { + struct pipe_image_view *img = &imgso->si[b]; + unsigned idx = b + IR3_BINDLESS_IMAGE_OFFSET; + validate_image_descriptor(ctx, set, idx, img); + if (img->resource) + fd_ringbuffer_attach_bo(ring, fd_resource(img->resource)->bo); + } + + if (!set->bo) { + set->bo = fd_bo_new( + ctx->dev, sizeof(set->descriptor), + /* Use same flags as ringbuffer so hits the same heap, + * because those will already have the FD_RELOC_DUMP + * flag set: + */ + FD_BO_GPUREADONLY | FD_BO_CACHED_COHERENT, + "%s bindless", _mesa_shader_stage_to_abbrev(shader)); + fd_bo_mark_for_dump(set->bo); + + uint32_t *desc_buf = fd_bo_map(set->bo); + + memcpy(desc_buf, set->descriptor, sizeof(set->descriptor)); + + if (unlikely(append_fb_read)) { + /* The last image slot is used for fb-read: */ + unsigned idx = IR3_BINDLESS_DESC_COUNT - 1; + + /* This is patched with the appropriate descriptor for GMEM or + * sysmem rendering path in fd6_gmem + */ + + struct fd_cs_patch patch = { + .cs = &desc_buf[idx * FDL6_TEX_CONST_DWORDS], + }; + util_dynarray_append(&ctx->batch->fb_read_patches, + __typeof__(patch), patch); + } + } + + /* + * Build stateobj emitting reg writes to configure the descriptor + * set and CP_LOAD_STATE packets to preload the state. + * + * Note that unless the app is using the max # of SSBOs there will + * be a gap between the IBO descriptors used for SSBOs and for images, + * so emit this as two CP_LOAD_STATE packets: + */ + + unsigned idx = ir3_shader_descriptor_set(shader); + + if (shader == PIPE_SHADER_COMPUTE) { + OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(.cs_bindless = 0x1f)); + OUT_REG(ring, A6XX_SP_CS_BINDLESS_BASE_DESCRIPTOR( + idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo, + )); + OUT_REG(ring, A6XX_HLSQ_CS_BINDLESS_BASE_DESCRIPTOR( + idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo, + )); + + if (bufso->enabled_mask) { + OUT_PKT(ring, CP_LOAD_STATE6_FRAG, + CP_LOAD_STATE6_0( + .dst_off = IR3_BINDLESS_SSBO_OFFSET, + .state_type = ST6_IBO, + .state_src = SS6_BINDLESS, + .state_block = SB6_CS_SHADER, + .num_unit = util_last_bit(bufso->enabled_mask), + ), + CP_LOAD_STATE6_EXT_SRC_ADDR( + /* This isn't actually an address: */ + .qword = (idx << 28) | + IR3_BINDLESS_SSBO_OFFSET * FDL6_TEX_CONST_DWORDS, + ), + ); + } + + if (imgso->enabled_mask) { + OUT_PKT(ring, CP_LOAD_STATE6_FRAG, + CP_LOAD_STATE6_0( + .dst_off = IR3_BINDLESS_IMAGE_OFFSET, + .state_type = ST6_IBO, + .state_src = SS6_BINDLESS, + .state_block = SB6_CS_SHADER, + .num_unit = util_last_bit(imgso->enabled_mask), + ), + CP_LOAD_STATE6_EXT_SRC_ADDR( + /* This isn't actually an address: */ + .qword = (idx << 28) | + IR3_BINDLESS_IMAGE_OFFSET * FDL6_TEX_CONST_DWORDS, + ), + ); + } + } else { + OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(.gfx_bindless = 0x1f)); + OUT_REG(ring, A6XX_SP_BINDLESS_BASE_DESCRIPTOR( + idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo, + )); + OUT_REG(ring, A6XX_HLSQ_BINDLESS_BASE_DESCRIPTOR( + idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo, + )); + + if (bufso->enabled_mask) { + OUT_PKT(ring, CP_LOAD_STATE6, + CP_LOAD_STATE6_0( + .dst_off = IR3_BINDLESS_SSBO_OFFSET, + .state_type = ST6_SHADER, + .state_src = SS6_BINDLESS, + .state_block = SB6_IBO, + .num_unit = util_last_bit(bufso->enabled_mask), + ), + CP_LOAD_STATE6_EXT_SRC_ADDR( + /* This isn't actually an address: */ + .qword = (idx << 28) | + IR3_BINDLESS_SSBO_OFFSET * FDL6_TEX_CONST_DWORDS, + ), + ); + } + + if (imgso->enabled_mask) { + OUT_PKT(ring, CP_LOAD_STATE6, + CP_LOAD_STATE6_0( + .dst_off = IR3_BINDLESS_IMAGE_OFFSET, + .state_type = ST6_SHADER, + .state_src = SS6_BINDLESS, + .state_block = SB6_IBO, + .num_unit = util_last_bit(imgso->enabled_mask), + ), + CP_LOAD_STATE6_EXT_SRC_ADDR( + /* This isn't actually an address: */ + .qword = (idx << 28) | + IR3_BINDLESS_IMAGE_OFFSET * FDL6_TEX_CONST_DWORDS, + ), + ); + } + } + + return ring; +} + static void fd6_set_shader_buffers(struct pipe_context *pctx, enum pipe_shader_type shader, unsigned start, unsigned count, diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_image.h b/src/gallium/drivers/freedreno/a6xx/fd6_image.h index 439317ea572..22235640e02 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_image.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_image.h @@ -39,6 +39,9 @@ struct ir3_shader_variant; struct fd_ringbuffer * fd6_build_ibo_state(struct fd_context *ctx, const struct ir3_shader_variant *v, enum pipe_shader_type shader) assert_dt; +struct fd_ringbuffer * +fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader, + bool append_fb_read) assert_dt; void fd6_image_init(struct pipe_context *pctx); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index 9ff022d5f5c..1b1eee0c570 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -289,6 +289,22 @@ setup_stream_out(struct fd_context *ctx, struct fd6_program_state *state, state->streamout_stateobj = ring; } +static uint32_t +sp_xs_config(struct ir3_shader_variant *v) +{ + if (!v) + return 0; + + return A6XX_SP_VS_CONFIG_ENABLED | + COND(v->bindless_tex, A6XX_SP_VS_CONFIG_BINDLESS_TEX) | + COND(v->bindless_samp, A6XX_SP_VS_CONFIG_BINDLESS_SAMP) | + COND(v->bindless_ibo, A6XX_SP_VS_CONFIG_BINDLESS_IBO) | + COND(v->bindless_ubo, A6XX_SP_VS_CONFIG_BINDLESS_UBO) | + A6XX_SP_VS_CONFIG_NIBO(ir3_shader_nibo(v)) | + A6XX_SP_VS_CONFIG_NTEX(v->num_samp) | + A6XX_SP_VS_CONFIG_NSAMP(v->num_samp); +} + static void setup_config_stateobj(struct fd_context *ctx, struct fd6_program_state *state) { @@ -318,37 +334,19 @@ setup_config_stateobj(struct fd_context *ctx, struct fd6_program_state *state) A6XX_HLSQ_FS_CNTL_ENABLED); OUT_PKT4(ring, REG_A6XX_SP_VS_CONFIG, 1); - OUT_RING(ring, COND(state->vs, A6XX_SP_VS_CONFIG_ENABLED) | - A6XX_SP_VS_CONFIG_NIBO(ir3_shader_nibo(state->vs)) | - A6XX_SP_VS_CONFIG_NTEX(state->vs->num_samp) | - A6XX_SP_VS_CONFIG_NSAMP(state->vs->num_samp)); + OUT_RING(ring, sp_xs_config(state->vs)); OUT_PKT4(ring, REG_A6XX_SP_HS_CONFIG, 1); - OUT_RING(ring, COND(state->hs, - A6XX_SP_HS_CONFIG_ENABLED | - A6XX_SP_HS_CONFIG_NIBO(ir3_shader_nibo(state->hs)) | - A6XX_SP_HS_CONFIG_NTEX(state->hs->num_samp) | - A6XX_SP_HS_CONFIG_NSAMP(state->hs->num_samp))); + OUT_RING(ring, sp_xs_config(state->hs)); OUT_PKT4(ring, REG_A6XX_SP_DS_CONFIG, 1); - OUT_RING(ring, COND(state->ds, - A6XX_SP_DS_CONFIG_ENABLED | - A6XX_SP_DS_CONFIG_NIBO(ir3_shader_nibo(state->ds)) | - A6XX_SP_DS_CONFIG_NTEX(state->ds->num_samp) | - A6XX_SP_DS_CONFIG_NSAMP(state->ds->num_samp))); + OUT_RING(ring, sp_xs_config(state->ds)); OUT_PKT4(ring, REG_A6XX_SP_GS_CONFIG, 1); - OUT_RING(ring, COND(state->gs, - A6XX_SP_GS_CONFIG_ENABLED | - A6XX_SP_GS_CONFIG_NIBO(ir3_shader_nibo(state->gs)) | - A6XX_SP_GS_CONFIG_NTEX(state->gs->num_samp) | - A6XX_SP_GS_CONFIG_NSAMP(state->gs->num_samp))); + OUT_RING(ring, sp_xs_config(state->gs)); OUT_PKT4(ring, REG_A6XX_SP_FS_CONFIG, 1); - OUT_RING(ring, COND(state->fs, A6XX_SP_FS_CONFIG_ENABLED) | - A6XX_SP_FS_CONFIG_NIBO(ir3_shader_nibo(state->fs)) | - A6XX_SP_FS_CONFIG_NTEX(state->fs->num_samp) | - A6XX_SP_FS_CONFIG_NSAMP(state->fs->num_samp)); + OUT_RING(ring, sp_xs_config(state->fs)); OUT_PKT4(ring, REG_A6XX_SP_IBO_COUNT, 1); OUT_RING(ring, ir3_shader_nibo(state->fs));