etnaviv: implement UBOs

At the same time, use pre-HALTI2 to use address register for indirect
uniform loads, since integers/LOAD instruction isn't always available.

Passes all dEQP-GLES3.functional.ubo.* on GC7000L. GC3000 with an extra
flush hack passes most of them, but still fails on some of the cases with
many loads.

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Reviewed-by: Christian Gmeiner <christian.gmeiner@gmail.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3389>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3389>
This commit is contained in:
Jonathan Marek
2020-01-04 14:17:15 -05:00
committed by Marge Bot
parent 7ff8ce7a3f
commit 6346490a2e
9 changed files with 109 additions and 75 deletions
@@ -115,10 +115,36 @@ etna_lower_io(nir_shader *shader, struct etna_shader_variant *v)
nir_instr_rewrite_src(instr, &intr->src[1], nir_src_for_ssa(ssa));
} break;
case nir_intrinsic_load_uniform: {
/* multiply by 16 and convert to int */
/* convert indirect load_uniform to load_ubo when possible
* this is required on HALTI5+ because address register is not implemented
* address register loads also arent done optimally
*/
if (v->shader->specs->halti < 2 || nir_src_is_const(intr->src[0]))
break;
nir_intrinsic_instr *load_ubo =
nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ubo);
load_ubo->num_components = intr->num_components;
nir_ssa_dest_init(&load_ubo->instr, &load_ubo->dest,
load_ubo->num_components, 32, NULL);
b.cursor = nir_before_instr(instr);
nir_ssa_def *ssa = nir_imul(&b, intr->src[0].ssa, nir_imm_int(&b, 16));
nir_instr_rewrite_src(instr, &intr->src[0], nir_src_for_ssa(ssa));
load_ubo->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
load_ubo->src[1] = nir_src_for_ssa(nir_iadd(&b,
nir_imul(&b, intr->src[0].ssa, nir_imm_int(&b, 16)),
nir_imm_int(&b, nir_intrinsic_base(intr) * 16)));
nir_builder_instr_insert(&b, &load_ubo->instr);
nir_ssa_def_rewrite_uses(&intr->dest.ssa,
nir_src_for_ssa(&load_ubo->dest.ssa));
nir_instr_remove(&intr->instr);
} break;
case nir_intrinsic_load_ubo: {
nir_const_value *idx = nir_src_as_const_value(intr->src[0]);
assert(idx);
/* offset index by 1, index 0 is used for converted load_uniform */
b.cursor = nir_before_instr(instr);
nir_instr_rewrite_src(instr, &intr->src[0],
nir_src_for_ssa(nir_imm_int(&b, idx[0].u32 + 1)));
} break;
case nir_intrinsic_load_vertex_id:
case nir_intrinsic_load_instance_id:
@@ -593,44 +619,6 @@ etna_emit_output(struct etna_compile *c, nir_variable *var, struct etna_inst_src
}
}
static void
etna_emit_load_ubo(struct etna_compile *c, struct etna_inst_dst dst,
struct etna_inst_src src, struct etna_inst_src base)
{
/* convert float offset back to integer */
if (c->specs->halti < 2) {
emit_inst(c, &(struct etna_inst) {
.opcode = INST_OPCODE_F2I,
.type = INST_TYPE_U32,
.dst = dst,
.src[0] = src,
});
emit_inst(c, &(struct etna_inst) {
.opcode = INST_OPCODE_LOAD,
.type = INST_TYPE_U32,
.dst = dst,
.src[0] = {
.use = 1,
.rgroup = INST_RGROUP_TEMP,
.reg = dst.reg,
.swiz = INST_SWIZ_BROADCAST(ffs(dst.write_mask) - 1)
},
.src[1] = base,
});
return;
}
emit_inst(c, &(struct etna_inst) {
.opcode = INST_OPCODE_LOAD,
.type = INST_TYPE_U32,
.dst = dst,
.src[0] = src,
.src[1] = base,
});
}
#define OPT(nir, pass, ...) ({ \
bool this_progress = false; \
NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \
@@ -94,7 +94,6 @@ static inline bool is_sysval(nir_instr *instr)
#define CONST_VAL(a, b) (nir_const_value) {.u64 = (uint64_t)(a) << 32 | (uint64_t)(b)}
#define CONST(x) CONST_VAL(ETNA_IMMEDIATE_CONSTANT, x)
#define UNIFORM(x) CONST_VAL(ETNA_IMMEDIATE_UNIFORM, x)
#define UNIFORM_BASE(x) CONST_VAL(ETNA_IMMEDIATE_UBO0_ADDR, x)
#define TEXSCALE(x, i) CONST_VAL(ETNA_IMMEDIATE_TEXRECT_SCALE_X + (i), x)
static int
@@ -388,6 +387,7 @@ get_src(struct state *state, nir_src *src)
case nir_intrinsic_load_input:
case nir_intrinsic_load_instance_id:
case nir_intrinsic_load_uniform:
case nir_intrinsic_load_ubo:
return ra_src(state, src);
case nir_intrinsic_load_front_face:
return (hw_src) { .use = 1, .rgroup = INST_RGROUP_INTERNAL };
@@ -586,6 +586,7 @@ dest_for_instr(nir_instr *instr)
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic == nir_intrinsic_load_uniform ||
intr->intrinsic == nir_intrinsic_load_ubo ||
intr->intrinsic == nir_intrinsic_load_input ||
intr->intrinsic == nir_intrinsic_load_instance_id)
dest = &intr->dest;
@@ -908,8 +909,8 @@ ra_assign(struct state *state, nir_shader *shader)
if (instr->type == nir_instr_type_intrinsic) {
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic == nir_intrinsic_load_uniform) {
/* make sure there isn't any reswizzling */
/* can't have dst swizzle or sparse writemask on UBO loads */
if (intr->intrinsic == nir_intrinsic_load_ubo) {
assert(dest == &intr->dest);
if (dest->ssa.num_components == 2)
c = REG_CLASS_VIRT_VEC2C;
@@ -1102,9 +1103,37 @@ emit_intrinsic(struct state *state, nir_intrinsic_instr * intr)
break;
case nir_intrinsic_load_uniform: {
unsigned dst_swiz;
hw_dst dst = ra_dest(state, &intr->dest, &dst_swiz);
/* TODO: might have a problem with dst_swiz .. */
emit(load_ubo, dst, get_src(state, &intr->src[0]), const_src(state, &UNIFORM_BASE(nir_intrinsic_base(intr) * 16), 1));
struct etna_inst_dst dst = ra_dest(state, &intr->dest, &dst_swiz);
/* TODO: rework so extra MOV isn't required, load up to 4 addresses at once */
emit_inst(state->c, &(struct etna_inst) {
.opcode = INST_OPCODE_MOVAR,
.dst.write_mask = 0x1,
.src[2] = get_src(state, &intr->src[0]),
});
emit_inst(state->c, &(struct etna_inst) {
.opcode = INST_OPCODE_MOV,
.dst = dst,
.src[2] = {
.use = 1,
.rgroup = INST_RGROUP_UNIFORM_0,
.reg = nir_intrinsic_base(intr),
.swiz = dst_swiz,
.amode = INST_AMODE_ADD_A_X,
},
});
} break;
case nir_intrinsic_load_ubo: {
/* TODO: if offset is of the form (x + C) then add C to the base instead */
unsigned idx = nir_src_as_const_value(intr->src[0])[0].u32;
unsigned dst_swiz;
emit_inst(state->c, &(struct etna_inst) {
.opcode = INST_OPCODE_LOAD,
.type = INST_TYPE_U32,
.dst = ra_dest(state, &intr->dest, &dst_swiz),
.src[0] = get_src(state, &intr->src[1]),
.src[1] = const_src(state, &CONST_VAL(ETNA_IMMEDIATE_UBO0_ADDR + idx, 0), 1),
});
} break;
case nir_intrinsic_load_front_face:
case nir_intrinsic_load_frag_coord:
@@ -1402,6 +1431,8 @@ emit_shader(struct etna_compile *c, unsigned *num_temps, unsigned *num_consts)
.shader = shader,
.impl = nir_shader_get_entrypoint(shader),
};
bool have_indirect_uniform = false;
unsigned indirect_max = 0;
nir_builder b;
nir_builder_init(&b, state.impl);
@@ -1421,19 +1452,25 @@ emit_shader(struct etna_compile *c, unsigned *num_temps, unsigned *num_consts)
} break;
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
/* TODO: load_ubo can also become a constant in some cases
* (at the moment it can end up emitting a LOAD with two
* uniform sources, which could be a problem on HALTI2)
*/
if (intr->intrinsic != nir_intrinsic_load_uniform)
break;
nir_const_value *off = nir_src_as_const_value(intr->src[0]);
if (!off || off[0].u64 >> 32 != ETNA_IMMEDIATE_CONSTANT)
if (!off || off[0].u64 >> 32 != ETNA_IMMEDIATE_CONSTANT) {
have_indirect_uniform = true;
indirect_max = nir_intrinsic_base(intr) + nir_intrinsic_range(intr);
break;
}
unsigned base = nir_intrinsic_base(intr);
/* pre halti2 uniform offset will be float */
if (c->specs->halti < 2)
base += (unsigned) off[0].f32 / 16;
base += (unsigned) off[0].f32;
else
base += off[0].u32 / 16;
base += off[0].u32;
nir_const_value value[4];
for (unsigned i = 0; i < intr->dest.ssa.num_components; i++) {
@@ -1455,6 +1492,13 @@ emit_shader(struct etna_compile *c, unsigned *num_temps, unsigned *num_consts)
}
}
/* TODO: only emit required indirect uniform ranges */
if (have_indirect_uniform) {
for (unsigned i = 0; i < indirect_max * 4; i++)
c->consts[i] = UNIFORM(i).u64;
state.const_count = indirect_max;
}
/* add mov for any store output using sysval/const */
nir_foreach_block(block, state.impl) {
nir_foreach_instr_safe(instr, block) {
@@ -288,8 +288,10 @@ etna_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
}
/* Mark constant buffers as being read */
resource_read(ctx, ctx->constant_buffer[PIPE_SHADER_VERTEX].buffer);
resource_read(ctx, ctx->constant_buffer[PIPE_SHADER_FRAGMENT].buffer);
for (unsigned i = 0; i < ETNA_MAX_CONST_BUF; i++) {
resource_read(ctx, ctx->constant_buffer[PIPE_SHADER_VERTEX][i].buffer);
resource_read(ctx, ctx->constant_buffer[PIPE_SHADER_FRAGMENT][i].buffer);
}
/* Mark VBOs as being read */
foreach_bit(i, ctx->vertex_buffer.enabled_mask) {
@@ -92,7 +92,7 @@ enum etna_immediate_contents {
ETNA_IMMEDIATE_TEXRECT_SCALE_X,
ETNA_IMMEDIATE_TEXRECT_SCALE_Y,
ETNA_IMMEDIATE_UBO0_ADDR,
ETNA_IMMEDIATE_UBOMAX_ADDR = ETNA_IMMEDIATE_UBO0_ADDR + 255,
ETNA_IMMEDIATE_UBOMAX_ADDR = ETNA_IMMEDIATE_UBO0_ADDR + ETNA_MAX_CONST_BUF - 1,
};
struct etna_shader_uniform_info {
@@ -164,7 +164,7 @@ struct etna_context {
uint32_t active_sampler_views;
uint32_t dirty_sampler_views;
struct pipe_sampler_view *sampler_view[PIPE_MAX_SAMPLERS];
struct pipe_constant_buffer constant_buffer[PIPE_SHADER_TYPES];
struct pipe_constant_buffer constant_buffer[PIPE_SHADER_TYPES][ETNA_MAX_CONST_BUF];
struct etna_vertexbuf_state vertex_buffer;
struct etna_index_buffer index_buffer;
struct etna_shader_state shader;
+4 -4
View File
@@ -668,12 +668,12 @@ etna_emit_state(struct etna_context *ctx)
if (do_uniform_flush)
etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
etna_uniforms_write(ctx, ctx->shader.vs, &ctx->constant_buffer[PIPE_SHADER_VERTEX]);
etna_uniforms_write(ctx, ctx->shader.vs, ctx->constant_buffer[PIPE_SHADER_VERTEX]);
if (do_uniform_flush)
etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
etna_uniforms_write(ctx, ctx->shader.fs, &ctx->constant_buffer[PIPE_SHADER_FRAGMENT]);
etna_uniforms_write(ctx, ctx->shader.fs, ctx->constant_buffer[PIPE_SHADER_FRAGMENT]);
if (ctx->specs.halti >= 5) {
/* HALTI5 needs to be prompted to pre-fetch shaders */
@@ -687,14 +687,14 @@ etna_emit_state(struct etna_context *ctx)
etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
if (dirty & (uniform_dirty_bits | ctx->shader.vs->uniforms_dirty_bits))
etna_uniforms_write(ctx, ctx->shader.vs, &ctx->constant_buffer[PIPE_SHADER_VERTEX]);
etna_uniforms_write(ctx, ctx->shader.vs, ctx->constant_buffer[PIPE_SHADER_VERTEX]);
/* ideally this cache would only be flushed if there are PS uniform changes */
if (do_uniform_flush)
etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
if (dirty & (uniform_dirty_bits | ctx->shader.fs->uniforms_dirty_bits))
etna_uniforms_write(ctx, ctx->shader.fs, &ctx->constant_buffer[PIPE_SHADER_FRAGMENT]);
etna_uniforms_write(ctx, ctx->shader.fs, ctx->constant_buffer[PIPE_SHADER_FRAGMENT]);
}
/**** End of state update ****/
#undef EMIT_STATE
@@ -38,6 +38,7 @@
#define ETNA_NUM_LOD (14)
#define ETNA_NUM_LAYERS (6)
#define ETNA_MAX_UNIFORMS (256)
#define ETNA_MAX_CONST_BUF 16
#define ETNA_MAX_PIXELPIPES 2
/* All RS operations must have width%16 = 0 */
+7 -4
View File
@@ -280,6 +280,10 @@ etna_screen_get_shader_param(struct pipe_screen *pscreen,
enum pipe_shader_cap param)
{
struct etna_screen *screen = etna_screen(pscreen);
bool ubo_enable = screen->specs.halti >= 2 && DBG_ENABLED(ETNA_DBG_NIR);
if (DBG_ENABLED(ETNA_DBG_DEQP))
ubo_enable = true;
switch (shader) {
case PIPE_SHADER_FRAGMENT:
@@ -315,7 +319,7 @@ etna_screen_get_shader_param(struct pipe_screen *pscreen,
case PIPE_SHADER_CAP_MAX_TEMPS:
return 64; /* Max native temporaries. */
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
return DBG_ENABLED(ETNA_DBG_DEQP) ? 16 : 1;
return ubo_enable ? ETNA_MAX_CONST_BUF : 1;
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
return 1;
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
@@ -340,6 +344,8 @@ etna_screen_get_shader_param(struct pipe_screen *pscreen,
case PIPE_SHADER_CAP_PREFERRED_IR:
return DBG_ENABLED(ETNA_DBG_NIR) ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
if (ubo_enable)
return 16384; /* 16384 so state tracker enables UBOs */
return shader == PIPE_SHADER_FRAGMENT
? screen->specs.max_ps_uniforms * sizeof(float[4])
: screen->specs.max_vs_uniforms * sizeof(float[4]);
@@ -617,9 +623,6 @@ etna_determine_uniform_limits(struct etna_screen *screen)
screen->specs.max_vs_uniforms = 168;
screen->specs.max_ps_uniforms = 64;
}
if (DBG_ENABLED(ETNA_DBG_DEQP))
screen->specs.max_ps_uniforms = 1024;
}
static bool
+4 -9
View File
@@ -83,24 +83,19 @@ etna_set_constant_buffer(struct pipe_context *pctx,
{
struct etna_context *ctx = etna_context(pctx);
if (unlikely(index > 0)) {
DBG("Unhandled buffer index %i", index);
return;
}
assert(index < ETNA_MAX_CONST_BUF);
util_copy_constant_buffer(&ctx->constant_buffer[shader], cb);
util_copy_constant_buffer(&ctx->constant_buffer[shader][index], cb);
/* Note that the state tracker can unbind constant buffers by
* passing NULL here. */
if (unlikely(!cb || (!cb->buffer && !cb->user_buffer)))
return;
/* there is no support for ARB_uniform_buffer_object */
assert(cb->buffer == NULL && cb->user_buffer != NULL);
assert(index != 0 || cb->user_buffer != NULL);
if (!cb->buffer) {
struct pipe_constant_buffer *cb = &ctx->constant_buffer[shader];
struct pipe_constant_buffer *cb = &ctx->constant_buffer[shader][index];
u_upload_data(pctx->const_uploader, 0, cb->buffer_size, 16, cb->user_buffer, &cb->buffer_offset, &cb->buffer);
}
@@ -67,6 +67,7 @@ etna_uniforms_write(const struct etna_context *ctx,
const struct etna_shader_uniform_info *uinfo = &sobj->uniforms;
bool frag = (sobj == ctx->shader.fs);
uint32_t base = frag ? ctx->specs.ps_uniforms_offset : ctx->specs.vs_uniforms_offset;
unsigned idx;
if (!uinfo->imm_count)
return;
@@ -94,11 +95,11 @@ etna_uniforms_write(const struct etna_context *ctx,
break;
case ETNA_IMMEDIATE_UBO0_ADDR ... ETNA_IMMEDIATE_UBOMAX_ADDR:
assert(uinfo->imm_contents[i] == ETNA_IMMEDIATE_UBO0_ADDR);
idx = uinfo->imm_contents[i] - ETNA_IMMEDIATE_UBO0_ADDR;
etna_cmd_stream_reloc(stream, &(struct etna_reloc) {
.bo = etna_resource(cb->buffer)->bo,
.bo = etna_resource(cb[idx].buffer)->bo,
.flags = ETNA_RELOC_READ,
.offset = cb->buffer_offset + val,
.offset = cb[idx].buffer_offset + val,
});
break;