freedreno: Add support for devices not supporting double thread size.

Signed-off-by: Amber Amber <amber@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20991>
This commit is contained in:
Amber
2023-04-17 18:53:56 +02:00
committed by Marge Bot
parent 0f57d7c0c3
commit 576a4e85f0
2 changed files with 18 additions and 4 deletions
@@ -24,6 +24,7 @@
* Rob Clark <robclark@freedesktop.org>
*/
#include "drm/freedreno_ringbuffer.h"
#define FD_BO_NO_HARDPIN 1
#include "pipe/p_state.h"
@@ -48,7 +49,7 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
assert_dt
{
const struct ir3_info *i = &v->info;
enum a6xx_threadsize thrsz = i->double_threadsize ? THREAD128 : THREAD64;
enum a6xx_threadsize thrsz_cs = i->double_threadsize ? THREAD128 : THREAD64;
OUT_REG(ring, HLSQ_INVALIDATE_CMD(CHIP, .vs_state = true, .hs_state = true,
.ds_state = true, .gs_state = true,
@@ -73,7 +74,7 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_PKT4(ring, REG_A6XX_SP_CS_CTRL_REG0, 1);
OUT_RING(ring,
A6XX_SP_CS_CTRL_REG0_THREADSIZE(thrsz) |
A6XX_SP_CS_CTRL_REG0_THREADSIZE(thrsz_cs) |
A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) |
COND(v->mergedregs, A6XX_SP_CS_CTRL_REG0_MERGEDREGS) |
@@ -84,6 +85,7 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID);
work_group_id = ir3_find_sysval_regid(v, SYSTEM_VALUE_WORKGROUP_ID);
enum a6xx_threadsize thrsz = ctx->screen->info->a6xx.supports_double_threadsize ? thrsz_cs : THREAD128;
OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL_0, 2);
OUT_RING(ring, A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID(work_group_id) |
A6XX_HLSQ_CS_CNTL_0_WGSIZECONSTID(regid(63, 0)) |
@@ -91,6 +93,10 @@ cs_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID(local_invocation_id));
OUT_RING(ring, A6XX_HLSQ_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) |
A6XX_HLSQ_CS_CNTL_1_THREADSIZE(thrsz));
if (!ctx->screen->info->a6xx.supports_double_threadsize) {
OUT_PKT4(ring, REG_A6XX_HLSQ_FS_CNTL_0, 1);
OUT_RING(ring, A6XX_HLSQ_FS_CNTL_0_THREADSIZE(thrsz_cs));
}
if (ctx->screen->info->a6xx.has_lpac) {
OUT_PKT4(ring, REG_A6XX_SP_CS_CNTL_0, 2);
@@ -319,13 +319,21 @@ ir3_shader_compute_state_create(struct pipe_context *pctx,
if (ctx->screen->gen >= 6)
ir3_nir_lower_io_to_bindless(nir);
enum ir3_wavesize_option api_wavesize = IR3_SINGLE_OR_DOUBLE;
enum ir3_wavesize_option real_wavesize = IR3_SINGLE_OR_DOUBLE;
if (ctx->screen->gen >= 6 && !ctx->screen->info->a6xx.supports_double_threadsize) {
api_wavesize = IR3_SINGLE_ONLY;
real_wavesize = IR3_SINGLE_ONLY;
}
struct ir3_shader *shader =
ir3_shader_from_nir(compiler, nir, &(struct ir3_shader_options){
/* TODO: force to single on a6xx with legacy
* ballot extension that uses 64-bit masks
*/
.api_wavesize = IR3_SINGLE_OR_DOUBLE,
.real_wavesize = IR3_SINGLE_OR_DOUBLE,
.api_wavesize = api_wavesize,
.real_wavesize = real_wavesize,
}, NULL);
shader->cs.req_input_mem = align(cso->req_input_mem, 4) / 4; /* byte->dword */
shader->cs.req_local_mem = cso->static_shared_mem;