freedreno/a3xx: deal with optimized tex instructions

Keep track of whether we actually have any sam instructions in the
resulting shader, rather than using TGSI SAMP declarations.  If the sam
instruction is optimized out, because the result is not used, we don't
want to emit texture state, etc.  In fact emitting sampler state and/or
setting PIXLODENABLE bit when there are no texture fetches seems to
cause lockup.

In theory this should never happen for a "normal" shader, unless the
state tracker is wonky.  But it is a very real possibility for binning
pass shaders.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
This commit is contained in:
Rob Clark
2014-04-08 14:14:43 -04:00
parent cb4ad13685
commit ee839cc6ef
7 changed files with 41 additions and 25 deletions
@@ -2054,12 +2054,6 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
}
}
static void
decl_samp(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
{
ctx->so->samplers_count++;
}
/* from TGSI perspective, we actually have inputs. But most of the "inputs"
* for a fragment shader are just bary.f instructions. The *actual* inputs
* from the hw perspective are the frag_pos and optionally frag_coord and
@@ -2160,8 +2154,6 @@ compile_instructions(struct fd3_compile_context *ctx)
decl_out(ctx, decl);
} else if (decl->Declaration.File == TGSI_FILE_INPUT) {
decl_in(ctx, decl);
} else if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
decl_samp(ctx, decl);
}
break;
}
@@ -2320,7 +2312,7 @@ fd3_compile_shader(struct fd3_shader_variant *so,
}
ret = ir3_block_ra(block, so->type, key.half_precision,
so->frag_coord, so->frag_face);
so->frag_coord, so->frag_face, &so->has_samp);
if (ret)
goto out;
@@ -1417,7 +1417,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
static void
decl_samp(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
{
ctx->so->samplers_count++;
ctx->so->has_samp = true;
}
static void
+13 -5
View File
@@ -177,7 +177,7 @@ emit_textures(struct fd_ringbuffer *ring,
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
for (i = 0; i < tex->num_samplers; i++) {
static const struct fd3_sampler_stateobj dummy_sampler = {};
struct fd3_sampler_stateobj *sampler = tex->samplers[i] ?
const struct fd3_sampler_stateobj *sampler = tex->samplers[i] ?
fd3_sampler_stateobj(tex->samplers[i]) :
&dummy_sampler;
OUT_RING(ring, sampler->texsamp0);
@@ -542,11 +542,19 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
if (dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX))
fd_wfi(ctx, ring);
if (dirty & FD_DIRTY_VERTTEX)
emit_textures(ring, SB_VERT_TEX, &ctx->verttex);
if (dirty & FD_DIRTY_VERTTEX) {
if (vp->has_samp)
emit_textures(ring, SB_VERT_TEX, &ctx->verttex);
else
dirty &= ~FD_DIRTY_VERTTEX;
}
if (dirty & FD_DIRTY_FRAGTEX)
emit_textures(ring, SB_FRAG_TEX, &ctx->fragtex);
if (dirty & FD_DIRTY_FRAGTEX) {
if (fp->has_samp)
emit_textures(ring, SB_FRAG_TEX, &ctx->fragtex);
else
dirty &= ~FD_DIRTY_FRAGTEX;
}
ctx->dirty &= ~dirty;
}
@@ -120,7 +120,7 @@ create_variant(struct fd3_shader_stateobj *so, struct fd3_shader_key key)
v->inputs_count = 0;
v->outputs_count = 0;
v->total_in = 0;
v->samplers_count = 0;
v->has_samp = false;
v->immediates_count = 0;
}
} else {
@@ -397,7 +397,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
COND(vp->samplers_count > 0, A3XX_SP_VS_CTRL_REG0_PIXLODENABLE) |
COND(vp->has_samp, A3XX_SP_VS_CTRL_REG0_PIXLODENABLE) |
A3XX_SP_VS_CTRL_REG0_LENGTH(vp->instrlen));
OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) |
A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) |
@@ -475,7 +475,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
COND(fp->samplers_count > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
COND(fp->has_samp > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
A3XX_SP_FS_CTRL_REG0_LENGTH(fp->instrlen));
OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) |
@@ -107,8 +107,8 @@ struct fd3_shader_variant {
unsigned total_in; /* sum of inputs (scalar) */
/* samplers: */
unsigned samplers_count;
/* do we have one or more texture sample instructions: */
bool has_samp;
/* const reg # of first immediate, ie. 1 == c1
* (not regid, because TGSI thinks in terms of vec4 registers,
+2 -2
View File
@@ -385,8 +385,8 @@ void ir3_block_sched(struct ir3_block *block);
/* register assignment: */
int ir3_block_ra(struct ir3_block *block, enum shader_t type,
bool half_precision, bool frag_coord, bool frag_face);
bool half_precision, bool frag_coord, bool frag_face,
bool *has_samp);
#ifndef ARRAY_SIZE
# define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+19 -3
View File
@@ -56,6 +56,7 @@ struct ir3_ra_ctx {
bool half_precision;
bool frag_coord;
bool frag_face;
bool has_samp;
int cnt;
bool error;
};
@@ -654,8 +655,17 @@ static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block)
if (is_sfu(n))
regmask_set(&needs_ss, n->regs[0]);
if (is_tex(n))
if (is_tex(n)) {
/* this ends up being the # of samp instructions.. but that
* is ok, everything else only cares whether it is zero or
* not. We do this here, rather than when we encounter a
* SAMP decl, because (especially in binning pass shader)
* the samp instruction(s) could get eliminated if the
* result is not used.
*/
ctx->has_samp = true;
regmask_set(&needs_sy, n->regs[0]);
}
/* both tex/sfu appear to not always immediately consume
* their src register(s):
@@ -730,7 +740,8 @@ static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block)
}
int ir3_block_ra(struct ir3_block *block, enum shader_t type,
bool half_precision, bool frag_coord, bool frag_face)
bool half_precision, bool frag_coord, bool frag_face,
bool *has_samp)
{
struct ir3_ra_ctx ctx = {
.block = block,
@@ -739,6 +750,11 @@ int ir3_block_ra(struct ir3_block *block, enum shader_t type,
.frag_coord = frag_coord,
.frag_face = frag_face,
};
int ret;
ir3_shader_clear_mark(block->shader);
return block_ra(&ctx, block);
ret = block_ra(&ctx, block);
*has_samp = ctx.has_samp;
return ret;
}