etnaviv: implement UBOs
At the same time, use pre-HALTI2 to use address register for indirect uniform loads, since integers/LOAD instruction isn't always available. Passes all dEQP-GLES3.functional.ubo.* on GC7000L. GC3000 with an extra flush hack passes most of them, but still fails on some of the cases with many loads. Signed-off-by: Jonathan Marek <jonathan@marek.ca> Reviewed-by: Christian Gmeiner <christian.gmeiner@gmail.com> Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3389> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3389>
This commit is contained in:
committed by
Marge Bot
parent
7ff8ce7a3f
commit
6346490a2e
@@ -115,10 +115,36 @@ etna_lower_io(nir_shader *shader, struct etna_shader_variant *v)
|
||||
nir_instr_rewrite_src(instr, &intr->src[1], nir_src_for_ssa(ssa));
|
||||
} break;
|
||||
case nir_intrinsic_load_uniform: {
|
||||
/* multiply by 16 and convert to int */
|
||||
/* convert indirect load_uniform to load_ubo when possible
|
||||
* this is required on HALTI5+ because address register is not implemented
|
||||
* address register loads also arent done optimally
|
||||
*/
|
||||
if (v->shader->specs->halti < 2 || nir_src_is_const(intr->src[0]))
|
||||
break;
|
||||
|
||||
nir_intrinsic_instr *load_ubo =
|
||||
nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ubo);
|
||||
load_ubo->num_components = intr->num_components;
|
||||
nir_ssa_dest_init(&load_ubo->instr, &load_ubo->dest,
|
||||
load_ubo->num_components, 32, NULL);
|
||||
|
||||
b.cursor = nir_before_instr(instr);
|
||||
nir_ssa_def *ssa = nir_imul(&b, intr->src[0].ssa, nir_imm_int(&b, 16));
|
||||
nir_instr_rewrite_src(instr, &intr->src[0], nir_src_for_ssa(ssa));
|
||||
load_ubo->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
|
||||
load_ubo->src[1] = nir_src_for_ssa(nir_iadd(&b,
|
||||
nir_imul(&b, intr->src[0].ssa, nir_imm_int(&b, 16)),
|
||||
nir_imm_int(&b, nir_intrinsic_base(intr) * 16)));
|
||||
nir_builder_instr_insert(&b, &load_ubo->instr);
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa,
|
||||
nir_src_for_ssa(&load_ubo->dest.ssa));
|
||||
nir_instr_remove(&intr->instr);
|
||||
} break;
|
||||
case nir_intrinsic_load_ubo: {
|
||||
nir_const_value *idx = nir_src_as_const_value(intr->src[0]);
|
||||
assert(idx);
|
||||
/* offset index by 1, index 0 is used for converted load_uniform */
|
||||
b.cursor = nir_before_instr(instr);
|
||||
nir_instr_rewrite_src(instr, &intr->src[0],
|
||||
nir_src_for_ssa(nir_imm_int(&b, idx[0].u32 + 1)));
|
||||
} break;
|
||||
case nir_intrinsic_load_vertex_id:
|
||||
case nir_intrinsic_load_instance_id:
|
||||
@@ -593,44 +619,6 @@ etna_emit_output(struct etna_compile *c, nir_variable *var, struct etna_inst_src
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
etna_emit_load_ubo(struct etna_compile *c, struct etna_inst_dst dst,
|
||||
struct etna_inst_src src, struct etna_inst_src base)
|
||||
{
|
||||
/* convert float offset back to integer */
|
||||
if (c->specs->halti < 2) {
|
||||
emit_inst(c, &(struct etna_inst) {
|
||||
.opcode = INST_OPCODE_F2I,
|
||||
.type = INST_TYPE_U32,
|
||||
.dst = dst,
|
||||
.src[0] = src,
|
||||
});
|
||||
|
||||
emit_inst(c, &(struct etna_inst) {
|
||||
.opcode = INST_OPCODE_LOAD,
|
||||
.type = INST_TYPE_U32,
|
||||
.dst = dst,
|
||||
.src[0] = {
|
||||
.use = 1,
|
||||
.rgroup = INST_RGROUP_TEMP,
|
||||
.reg = dst.reg,
|
||||
.swiz = INST_SWIZ_BROADCAST(ffs(dst.write_mask) - 1)
|
||||
},
|
||||
.src[1] = base,
|
||||
});
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
emit_inst(c, &(struct etna_inst) {
|
||||
.opcode = INST_OPCODE_LOAD,
|
||||
.type = INST_TYPE_U32,
|
||||
.dst = dst,
|
||||
.src[0] = src,
|
||||
.src[1] = base,
|
||||
});
|
||||
}
|
||||
|
||||
#define OPT(nir, pass, ...) ({ \
|
||||
bool this_progress = false; \
|
||||
NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \
|
||||
|
||||
@@ -94,7 +94,6 @@ static inline bool is_sysval(nir_instr *instr)
|
||||
#define CONST_VAL(a, b) (nir_const_value) {.u64 = (uint64_t)(a) << 32 | (uint64_t)(b)}
|
||||
#define CONST(x) CONST_VAL(ETNA_IMMEDIATE_CONSTANT, x)
|
||||
#define UNIFORM(x) CONST_VAL(ETNA_IMMEDIATE_UNIFORM, x)
|
||||
#define UNIFORM_BASE(x) CONST_VAL(ETNA_IMMEDIATE_UBO0_ADDR, x)
|
||||
#define TEXSCALE(x, i) CONST_VAL(ETNA_IMMEDIATE_TEXRECT_SCALE_X + (i), x)
|
||||
|
||||
static int
|
||||
@@ -388,6 +387,7 @@ get_src(struct state *state, nir_src *src)
|
||||
case nir_intrinsic_load_input:
|
||||
case nir_intrinsic_load_instance_id:
|
||||
case nir_intrinsic_load_uniform:
|
||||
case nir_intrinsic_load_ubo:
|
||||
return ra_src(state, src);
|
||||
case nir_intrinsic_load_front_face:
|
||||
return (hw_src) { .use = 1, .rgroup = INST_RGROUP_INTERNAL };
|
||||
@@ -586,6 +586,7 @@ dest_for_instr(nir_instr *instr)
|
||||
case nir_instr_type_intrinsic: {
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
if (intr->intrinsic == nir_intrinsic_load_uniform ||
|
||||
intr->intrinsic == nir_intrinsic_load_ubo ||
|
||||
intr->intrinsic == nir_intrinsic_load_input ||
|
||||
intr->intrinsic == nir_intrinsic_load_instance_id)
|
||||
dest = &intr->dest;
|
||||
@@ -908,8 +909,8 @@ ra_assign(struct state *state, nir_shader *shader)
|
||||
|
||||
if (instr->type == nir_instr_type_intrinsic) {
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
if (intr->intrinsic == nir_intrinsic_load_uniform) {
|
||||
/* make sure there isn't any reswizzling */
|
||||
/* can't have dst swizzle or sparse writemask on UBO loads */
|
||||
if (intr->intrinsic == nir_intrinsic_load_ubo) {
|
||||
assert(dest == &intr->dest);
|
||||
if (dest->ssa.num_components == 2)
|
||||
c = REG_CLASS_VIRT_VEC2C;
|
||||
@@ -1102,9 +1103,37 @@ emit_intrinsic(struct state *state, nir_intrinsic_instr * intr)
|
||||
break;
|
||||
case nir_intrinsic_load_uniform: {
|
||||
unsigned dst_swiz;
|
||||
hw_dst dst = ra_dest(state, &intr->dest, &dst_swiz);
|
||||
/* TODO: might have a problem with dst_swiz .. */
|
||||
emit(load_ubo, dst, get_src(state, &intr->src[0]), const_src(state, &UNIFORM_BASE(nir_intrinsic_base(intr) * 16), 1));
|
||||
struct etna_inst_dst dst = ra_dest(state, &intr->dest, &dst_swiz);
|
||||
|
||||
/* TODO: rework so extra MOV isn't required, load up to 4 addresses at once */
|
||||
emit_inst(state->c, &(struct etna_inst) {
|
||||
.opcode = INST_OPCODE_MOVAR,
|
||||
.dst.write_mask = 0x1,
|
||||
.src[2] = get_src(state, &intr->src[0]),
|
||||
});
|
||||
emit_inst(state->c, &(struct etna_inst) {
|
||||
.opcode = INST_OPCODE_MOV,
|
||||
.dst = dst,
|
||||
.src[2] = {
|
||||
.use = 1,
|
||||
.rgroup = INST_RGROUP_UNIFORM_0,
|
||||
.reg = nir_intrinsic_base(intr),
|
||||
.swiz = dst_swiz,
|
||||
.amode = INST_AMODE_ADD_A_X,
|
||||
},
|
||||
});
|
||||
} break;
|
||||
case nir_intrinsic_load_ubo: {
|
||||
/* TODO: if offset is of the form (x + C) then add C to the base instead */
|
||||
unsigned idx = nir_src_as_const_value(intr->src[0])[0].u32;
|
||||
unsigned dst_swiz;
|
||||
emit_inst(state->c, &(struct etna_inst) {
|
||||
.opcode = INST_OPCODE_LOAD,
|
||||
.type = INST_TYPE_U32,
|
||||
.dst = ra_dest(state, &intr->dest, &dst_swiz),
|
||||
.src[0] = get_src(state, &intr->src[1]),
|
||||
.src[1] = const_src(state, &CONST_VAL(ETNA_IMMEDIATE_UBO0_ADDR + idx, 0), 1),
|
||||
});
|
||||
} break;
|
||||
case nir_intrinsic_load_front_face:
|
||||
case nir_intrinsic_load_frag_coord:
|
||||
@@ -1402,6 +1431,8 @@ emit_shader(struct etna_compile *c, unsigned *num_temps, unsigned *num_consts)
|
||||
.shader = shader,
|
||||
.impl = nir_shader_get_entrypoint(shader),
|
||||
};
|
||||
bool have_indirect_uniform = false;
|
||||
unsigned indirect_max = 0;
|
||||
|
||||
nir_builder b;
|
||||
nir_builder_init(&b, state.impl);
|
||||
@@ -1421,19 +1452,25 @@ emit_shader(struct etna_compile *c, unsigned *num_temps, unsigned *num_consts)
|
||||
} break;
|
||||
case nir_instr_type_intrinsic: {
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
/* TODO: load_ubo can also become a constant in some cases
|
||||
* (at the moment it can end up emitting a LOAD with two
|
||||
* uniform sources, which could be a problem on HALTI2)
|
||||
*/
|
||||
if (intr->intrinsic != nir_intrinsic_load_uniform)
|
||||
break;
|
||||
nir_const_value *off = nir_src_as_const_value(intr->src[0]);
|
||||
if (!off || off[0].u64 >> 32 != ETNA_IMMEDIATE_CONSTANT)
|
||||
if (!off || off[0].u64 >> 32 != ETNA_IMMEDIATE_CONSTANT) {
|
||||
have_indirect_uniform = true;
|
||||
indirect_max = nir_intrinsic_base(intr) + nir_intrinsic_range(intr);
|
||||
break;
|
||||
}
|
||||
|
||||
unsigned base = nir_intrinsic_base(intr);
|
||||
/* pre halti2 uniform offset will be float */
|
||||
if (c->specs->halti < 2)
|
||||
base += (unsigned) off[0].f32 / 16;
|
||||
base += (unsigned) off[0].f32;
|
||||
else
|
||||
base += off[0].u32 / 16;
|
||||
|
||||
base += off[0].u32;
|
||||
nir_const_value value[4];
|
||||
|
||||
for (unsigned i = 0; i < intr->dest.ssa.num_components; i++) {
|
||||
@@ -1455,6 +1492,13 @@ emit_shader(struct etna_compile *c, unsigned *num_temps, unsigned *num_consts)
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: only emit required indirect uniform ranges */
|
||||
if (have_indirect_uniform) {
|
||||
for (unsigned i = 0; i < indirect_max * 4; i++)
|
||||
c->consts[i] = UNIFORM(i).u64;
|
||||
state.const_count = indirect_max;
|
||||
}
|
||||
|
||||
/* add mov for any store output using sysval/const */
|
||||
nir_foreach_block(block, state.impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
|
||||
@@ -288,8 +288,10 @@ etna_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
|
||||
}
|
||||
|
||||
/* Mark constant buffers as being read */
|
||||
resource_read(ctx, ctx->constant_buffer[PIPE_SHADER_VERTEX].buffer);
|
||||
resource_read(ctx, ctx->constant_buffer[PIPE_SHADER_FRAGMENT].buffer);
|
||||
for (unsigned i = 0; i < ETNA_MAX_CONST_BUF; i++) {
|
||||
resource_read(ctx, ctx->constant_buffer[PIPE_SHADER_VERTEX][i].buffer);
|
||||
resource_read(ctx, ctx->constant_buffer[PIPE_SHADER_FRAGMENT][i].buffer);
|
||||
}
|
||||
|
||||
/* Mark VBOs as being read */
|
||||
foreach_bit(i, ctx->vertex_buffer.enabled_mask) {
|
||||
|
||||
@@ -92,7 +92,7 @@ enum etna_immediate_contents {
|
||||
ETNA_IMMEDIATE_TEXRECT_SCALE_X,
|
||||
ETNA_IMMEDIATE_TEXRECT_SCALE_Y,
|
||||
ETNA_IMMEDIATE_UBO0_ADDR,
|
||||
ETNA_IMMEDIATE_UBOMAX_ADDR = ETNA_IMMEDIATE_UBO0_ADDR + 255,
|
||||
ETNA_IMMEDIATE_UBOMAX_ADDR = ETNA_IMMEDIATE_UBO0_ADDR + ETNA_MAX_CONST_BUF - 1,
|
||||
};
|
||||
|
||||
struct etna_shader_uniform_info {
|
||||
@@ -164,7 +164,7 @@ struct etna_context {
|
||||
uint32_t active_sampler_views;
|
||||
uint32_t dirty_sampler_views;
|
||||
struct pipe_sampler_view *sampler_view[PIPE_MAX_SAMPLERS];
|
||||
struct pipe_constant_buffer constant_buffer[PIPE_SHADER_TYPES];
|
||||
struct pipe_constant_buffer constant_buffer[PIPE_SHADER_TYPES][ETNA_MAX_CONST_BUF];
|
||||
struct etna_vertexbuf_state vertex_buffer;
|
||||
struct etna_index_buffer index_buffer;
|
||||
struct etna_shader_state shader;
|
||||
|
||||
@@ -668,12 +668,12 @@ etna_emit_state(struct etna_context *ctx)
|
||||
if (do_uniform_flush)
|
||||
etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
|
||||
|
||||
etna_uniforms_write(ctx, ctx->shader.vs, &ctx->constant_buffer[PIPE_SHADER_VERTEX]);
|
||||
etna_uniforms_write(ctx, ctx->shader.vs, ctx->constant_buffer[PIPE_SHADER_VERTEX]);
|
||||
|
||||
if (do_uniform_flush)
|
||||
etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
|
||||
|
||||
etna_uniforms_write(ctx, ctx->shader.fs, &ctx->constant_buffer[PIPE_SHADER_FRAGMENT]);
|
||||
etna_uniforms_write(ctx, ctx->shader.fs, ctx->constant_buffer[PIPE_SHADER_FRAGMENT]);
|
||||
|
||||
if (ctx->specs.halti >= 5) {
|
||||
/* HALTI5 needs to be prompted to pre-fetch shaders */
|
||||
@@ -687,14 +687,14 @@ etna_emit_state(struct etna_context *ctx)
|
||||
etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
|
||||
|
||||
if (dirty & (uniform_dirty_bits | ctx->shader.vs->uniforms_dirty_bits))
|
||||
etna_uniforms_write(ctx, ctx->shader.vs, &ctx->constant_buffer[PIPE_SHADER_VERTEX]);
|
||||
etna_uniforms_write(ctx, ctx->shader.vs, ctx->constant_buffer[PIPE_SHADER_VERTEX]);
|
||||
|
||||
/* ideally this cache would only be flushed if there are PS uniform changes */
|
||||
if (do_uniform_flush)
|
||||
etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
|
||||
|
||||
if (dirty & (uniform_dirty_bits | ctx->shader.fs->uniforms_dirty_bits))
|
||||
etna_uniforms_write(ctx, ctx->shader.fs, &ctx->constant_buffer[PIPE_SHADER_FRAGMENT]);
|
||||
etna_uniforms_write(ctx, ctx->shader.fs, ctx->constant_buffer[PIPE_SHADER_FRAGMENT]);
|
||||
}
|
||||
/**** End of state update ****/
|
||||
#undef EMIT_STATE
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
#define ETNA_NUM_LOD (14)
|
||||
#define ETNA_NUM_LAYERS (6)
|
||||
#define ETNA_MAX_UNIFORMS (256)
|
||||
#define ETNA_MAX_CONST_BUF 16
|
||||
#define ETNA_MAX_PIXELPIPES 2
|
||||
|
||||
/* All RS operations must have width%16 = 0 */
|
||||
|
||||
@@ -280,6 +280,10 @@ etna_screen_get_shader_param(struct pipe_screen *pscreen,
|
||||
enum pipe_shader_cap param)
|
||||
{
|
||||
struct etna_screen *screen = etna_screen(pscreen);
|
||||
bool ubo_enable = screen->specs.halti >= 2 && DBG_ENABLED(ETNA_DBG_NIR);
|
||||
|
||||
if (DBG_ENABLED(ETNA_DBG_DEQP))
|
||||
ubo_enable = true;
|
||||
|
||||
switch (shader) {
|
||||
case PIPE_SHADER_FRAGMENT:
|
||||
@@ -315,7 +319,7 @@ etna_screen_get_shader_param(struct pipe_screen *pscreen,
|
||||
case PIPE_SHADER_CAP_MAX_TEMPS:
|
||||
return 64; /* Max native temporaries. */
|
||||
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
|
||||
return DBG_ENABLED(ETNA_DBG_DEQP) ? 16 : 1;
|
||||
return ubo_enable ? ETNA_MAX_CONST_BUF : 1;
|
||||
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
|
||||
return 1;
|
||||
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
|
||||
@@ -340,6 +344,8 @@ etna_screen_get_shader_param(struct pipe_screen *pscreen,
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
return DBG_ENABLED(ETNA_DBG_NIR) ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI;
|
||||
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
|
||||
if (ubo_enable)
|
||||
return 16384; /* 16384 so state tracker enables UBOs */
|
||||
return shader == PIPE_SHADER_FRAGMENT
|
||||
? screen->specs.max_ps_uniforms * sizeof(float[4])
|
||||
: screen->specs.max_vs_uniforms * sizeof(float[4]);
|
||||
@@ -617,9 +623,6 @@ etna_determine_uniform_limits(struct etna_screen *screen)
|
||||
screen->specs.max_vs_uniforms = 168;
|
||||
screen->specs.max_ps_uniforms = 64;
|
||||
}
|
||||
|
||||
if (DBG_ENABLED(ETNA_DBG_DEQP))
|
||||
screen->specs.max_ps_uniforms = 1024;
|
||||
}
|
||||
|
||||
static bool
|
||||
|
||||
@@ -83,24 +83,19 @@ etna_set_constant_buffer(struct pipe_context *pctx,
|
||||
{
|
||||
struct etna_context *ctx = etna_context(pctx);
|
||||
|
||||
if (unlikely(index > 0)) {
|
||||
DBG("Unhandled buffer index %i", index);
|
||||
return;
|
||||
}
|
||||
assert(index < ETNA_MAX_CONST_BUF);
|
||||
|
||||
|
||||
util_copy_constant_buffer(&ctx->constant_buffer[shader], cb);
|
||||
util_copy_constant_buffer(&ctx->constant_buffer[shader][index], cb);
|
||||
|
||||
/* Note that the state tracker can unbind constant buffers by
|
||||
* passing NULL here. */
|
||||
if (unlikely(!cb || (!cb->buffer && !cb->user_buffer)))
|
||||
return;
|
||||
|
||||
/* there is no support for ARB_uniform_buffer_object */
|
||||
assert(cb->buffer == NULL && cb->user_buffer != NULL);
|
||||
assert(index != 0 || cb->user_buffer != NULL);
|
||||
|
||||
if (!cb->buffer) {
|
||||
struct pipe_constant_buffer *cb = &ctx->constant_buffer[shader];
|
||||
struct pipe_constant_buffer *cb = &ctx->constant_buffer[shader][index];
|
||||
u_upload_data(pctx->const_uploader, 0, cb->buffer_size, 16, cb->user_buffer, &cb->buffer_offset, &cb->buffer);
|
||||
}
|
||||
|
||||
|
||||
@@ -67,6 +67,7 @@ etna_uniforms_write(const struct etna_context *ctx,
|
||||
const struct etna_shader_uniform_info *uinfo = &sobj->uniforms;
|
||||
bool frag = (sobj == ctx->shader.fs);
|
||||
uint32_t base = frag ? ctx->specs.ps_uniforms_offset : ctx->specs.vs_uniforms_offset;
|
||||
unsigned idx;
|
||||
|
||||
if (!uinfo->imm_count)
|
||||
return;
|
||||
@@ -94,11 +95,11 @@ etna_uniforms_write(const struct etna_context *ctx,
|
||||
break;
|
||||
|
||||
case ETNA_IMMEDIATE_UBO0_ADDR ... ETNA_IMMEDIATE_UBOMAX_ADDR:
|
||||
assert(uinfo->imm_contents[i] == ETNA_IMMEDIATE_UBO0_ADDR);
|
||||
idx = uinfo->imm_contents[i] - ETNA_IMMEDIATE_UBO0_ADDR;
|
||||
etna_cmd_stream_reloc(stream, &(struct etna_reloc) {
|
||||
.bo = etna_resource(cb->buffer)->bo,
|
||||
.bo = etna_resource(cb[idx].buffer)->bo,
|
||||
.flags = ETNA_RELOC_READ,
|
||||
.offset = cb->buffer_offset + val,
|
||||
.offset = cb[idx].buffer_offset + val,
|
||||
});
|
||||
break;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user