freedreno/ir3: Use the resinfo path for ssbo sizes on GL, too.
Less state walking at draw time, in exchange for a SHL in the lookup. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12258>
This commit is contained in:
@@ -25,21 +25,6 @@ KHR-GLES31.core.gpu_shader5.fma_precision_vec2,Fail
|
||||
KHR-GLES31.core.gpu_shader5.fma_precision_vec3,Fail
|
||||
KHR-GLES31.core.gpu_shader5.fma_precision_vec4,Fail
|
||||
|
||||
# "Array 0 length is 60829990 should be 7
|
||||
# Array 1 length is 60829990 should be 5
|
||||
# Array 2 length is 60829990 should be 3
|
||||
# Array 3 length is 60829990 should be 4
|
||||
# Array 4 length is 132120576 should be 23
|
||||
# Array 5 length is 132120576 should be 123
|
||||
# Array 6 length is 66060288 should be 419"
|
||||
KHR-GLES31.core.shader_storage_buffer_object.advanced-unsizedArrayLength-fs-std430-vec,Fail
|
||||
|
||||
# "Array 0 length is 266338304 should be 7
|
||||
# Array 1 length is 266338304 should be 3
|
||||
# Array 2 length is 266338304 should be 4
|
||||
# Array 3 length is 266338303 should be 4"
|
||||
KHR-GLES31.core.shader_storage_buffer_object.advanced-unsizedArrayLength-fs-int,Fail
|
||||
|
||||
# "gl_NumWorkGroups: Invalid data at index 2"
|
||||
KHR-GLES31.core.compute_shader.built-in-variables,Fail
|
||||
|
||||
|
||||
@@ -855,37 +855,32 @@ static void
|
||||
emit_intrinsic_ssbo_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
||||
struct ir3_instruction **dst)
|
||||
{
|
||||
if (ir3_bindless_resource(intr->src[0])) {
|
||||
struct ir3_block *b = ctx->block;
|
||||
struct ir3_instruction *ibo = ir3_ssbo_to_ibo(ctx, intr->src[0]);
|
||||
struct ir3_instruction *resinfo = ir3_RESINFO(b, ibo, 0);
|
||||
resinfo->cat6.iim_val = 1;
|
||||
resinfo->cat6.d = 1;
|
||||
resinfo->cat6.type = TYPE_U32;
|
||||
resinfo->cat6.typed = false;
|
||||
/* resinfo has no writemask and always writes out 3 components */
|
||||
resinfo->dsts[0]->wrmask = MASK(3);
|
||||
ir3_handle_bindless_cat6(resinfo, intr->src[0]);
|
||||
struct ir3_block *b = ctx->block;
|
||||
struct ir3_instruction *ibo = ir3_ssbo_to_ibo(ctx, intr->src[0]);
|
||||
struct ir3_instruction *resinfo = ir3_RESINFO(b, ibo, 0);
|
||||
resinfo->cat6.iim_val = 1;
|
||||
resinfo->cat6.d = ctx->compiler->gen >= 6 ? 1 : 2;
|
||||
resinfo->cat6.type = TYPE_U32;
|
||||
resinfo->cat6.typed = false;
|
||||
/* resinfo has no writemask and always writes out 3 components */
|
||||
resinfo->dsts[0]->wrmask = MASK(3);
|
||||
ir3_handle_bindless_cat6(resinfo, intr->src[0]);
|
||||
|
||||
if (ctx->compiler->gen >= 6) {
|
||||
struct ir3_instruction *resinfo_dst;
|
||||
ir3_split_dest(b, &resinfo_dst, resinfo, 0, 1);
|
||||
/* Unfortunately resinfo returns the array length, i.e. in dwords,
|
||||
* while NIR expects us to return the size in bytes.
|
||||
*
|
||||
* TODO: fix this in NIR.
|
||||
*/
|
||||
* while NIR expects us to return the size in bytes.
|
||||
*
|
||||
* TODO: fix this in NIR.
|
||||
*/
|
||||
*dst = ir3_SHL_B(b, resinfo_dst, 0, create_immed(b, 2), 0);
|
||||
return;
|
||||
} else {
|
||||
/* On a5xx, resinfo returns the low 16 bits of ssbo size in .x and the high 16 bits in .y */
|
||||
struct ir3_instruction *resinfo_dst[2];
|
||||
ir3_split_dest(b, resinfo_dst, resinfo, 0, 2);
|
||||
*dst = ir3_ADD_U(b, ir3_SHL_B(b, resinfo_dst[1], 0, create_immed(b, 16), 0), 0, resinfo_dst[0], 0);
|
||||
}
|
||||
|
||||
/* SSBO size stored as a const starting at ssbo_sizes: */
|
||||
const struct ir3_const_state *const_state = ir3_const_state(ctx->so);
|
||||
unsigned blk_idx = nir_src_as_uint(intr->src[0]);
|
||||
unsigned idx = regid(const_state->offsets.ssbo_sizes, 0) +
|
||||
const_state->ssbo_size.off[blk_idx];
|
||||
|
||||
debug_assert(const_state->ssbo_size.mask & (1 << blk_idx));
|
||||
|
||||
dst[0] = create_uniform(ctx->block, idx);
|
||||
}
|
||||
|
||||
/* src[] = { offset }. const_index[] = { base } */
|
||||
|
||||
@@ -745,16 +745,6 @@ ir3_nir_scan_driver_consts(nir_shader *shader, struct ir3_const_state *layout)
|
||||
unsigned idx;
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_get_ssbo_size:
|
||||
if (ir3_bindless_resource(intr->src[0]))
|
||||
break;
|
||||
idx = nir_src_as_uint(intr->src[0]);
|
||||
if (layout->ssbo_size.mask & (1 << idx))
|
||||
break;
|
||||
layout->ssbo_size.mask |= (1 << idx);
|
||||
layout->ssbo_size.off[idx] = layout->ssbo_size.count;
|
||||
layout->ssbo_size.count += 1; /* one const per */
|
||||
break;
|
||||
case nir_intrinsic_image_atomic_add:
|
||||
case nir_intrinsic_image_atomic_imin:
|
||||
case nir_intrinsic_image_atomic_umin:
|
||||
@@ -850,12 +840,6 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
|
||||
constoff += align(const_state->num_ubos * ptrsz, 4) / 4;
|
||||
}
|
||||
|
||||
if (const_state->ssbo_size.count > 0) {
|
||||
unsigned cnt = const_state->ssbo_size.count;
|
||||
const_state->offsets.ssbo_sizes = constoff;
|
||||
constoff += align(cnt, 4) / 4;
|
||||
}
|
||||
|
||||
if (const_state->image_dims.count > 0) {
|
||||
unsigned cnt = const_state->image_dims.count;
|
||||
const_state->offsets.image_dims = constoff;
|
||||
|
||||
@@ -169,8 +169,6 @@ struct ir3_const_state {
|
||||
struct {
|
||||
/* user const start at zero */
|
||||
unsigned ubo;
|
||||
/* NOTE that a3xx might need a section for SSBO addresses too */
|
||||
unsigned ssbo_sizes;
|
||||
unsigned image_dims;
|
||||
unsigned driver_param;
|
||||
unsigned tfbo;
|
||||
@@ -179,16 +177,6 @@ struct ir3_const_state {
|
||||
unsigned immediate;
|
||||
} offsets;
|
||||
|
||||
struct {
|
||||
uint32_t mask; /* bitmask of SSBOs that have get_ssbo_size */
|
||||
uint32_t count; /* number of consts allocated */
|
||||
/* one const allocated per SSBO which has get_ssbo_size,
|
||||
* ssbo_sizes.off[ssbo_id] is offset from start of ssbo_sizes
|
||||
* consts:
|
||||
*/
|
||||
uint32_t off[IR3_MAX_SHADER_BUFFERS];
|
||||
} ssbo_size;
|
||||
|
||||
struct {
|
||||
uint32_t mask; /* bitmask of images that have image_store */
|
||||
uint32_t count; /* number of consts allocated */
|
||||
|
||||
@@ -442,10 +442,8 @@ emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
struct pipe_shader_buffer *buf = &so->sb[i];
|
||||
unsigned sz = buf->buffer_size;
|
||||
|
||||
/* width is in dwords, overflows into height: */
|
||||
sz /= 4;
|
||||
|
||||
OUT_RING(ring, A5XX_SSBO_1_0_WIDTH(sz));
|
||||
/* Unlike a6xx, SSBO size is in bytes. */
|
||||
OUT_RING(ring, A5XX_SSBO_1_0_WIDTH(sz & MASK(16)));
|
||||
OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(sz >> 16));
|
||||
|
||||
OUT_PKT7(ring, CP_LOAD_STATE4, 5);
|
||||
|
||||
@@ -338,7 +338,6 @@ fd6_emit_ibo_consts(struct fd6_emit *emit, const struct ir3_shader_variant *v,
|
||||
{
|
||||
struct fd_context *ctx = emit->ctx;
|
||||
|
||||
ir3_emit_ssbo_sizes(ctx->screen, v, ring, &ctx->shaderbuf[stage]);
|
||||
ir3_emit_image_dims(ctx->screen, v, ring, &ctx->shaderimg[stage]);
|
||||
}
|
||||
|
||||
|
||||
@@ -253,28 +253,6 @@ ir3_emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
ir3_emit_ssbo_sizes(struct fd_screen *screen,
|
||||
const struct ir3_shader_variant *v,
|
||||
struct fd_ringbuffer *ring,
|
||||
struct fd_shaderbuf_stateobj *sb)
|
||||
{
|
||||
const struct ir3_const_state *const_state = ir3_const_state(v);
|
||||
uint32_t offset = const_state->offsets.ssbo_sizes;
|
||||
if (v->constlen > offset) {
|
||||
uint32_t sizes[align(const_state->ssbo_size.count, 4)];
|
||||
unsigned mask = const_state->ssbo_size.mask;
|
||||
|
||||
while (mask) {
|
||||
unsigned index = u_bit_scan(&mask);
|
||||
unsigned off = const_state->ssbo_size.off[index];
|
||||
sizes[off] = sb->sb[index].buffer_size;
|
||||
}
|
||||
|
||||
emit_const_user(ring, v, offset * 4, ARRAY_SIZE(sizes), sizes);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
ir3_emit_image_dims(struct fd_screen *screen,
|
||||
const struct ir3_shader_variant *v,
|
||||
@@ -448,12 +426,6 @@ emit_common_consts(const struct ir3_shader_variant *v,
|
||||
ir3_emit_immediates(ctx->screen, v, ring);
|
||||
}
|
||||
|
||||
if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_SSBO)) {
|
||||
struct fd_shaderbuf_stateobj *sb = &ctx->shaderbuf[t];
|
||||
ring_wfi(ctx->batch, ring);
|
||||
ir3_emit_ssbo_sizes(ctx->screen, v, ring, sb);
|
||||
}
|
||||
|
||||
if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_IMAGE)) {
|
||||
struct fd_shaderimg_stateobj *si = &ctx->shaderimg[t];
|
||||
ring_wfi(ctx->batch, ring);
|
||||
|
||||
Reference in New Issue
Block a user