From 547a2aa051a513caa3d382e2db1a4550ea732834 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 11 Mar 2021 11:00:24 -0800 Subject: [PATCH] freedreno/ir3: Use the resinfo path for ssbo sizes on GL, too. Less state walking at draw time, in exchange for a SHL in the lookup. Part-of: --- .../ci/deqp-freedreno-a630-fails.txt | 15 ------ src/freedreno/ir3/ir3_compiler_nir.c | 47 +++++++++---------- src/freedreno/ir3/ir3_nir.c | 16 ------- src/freedreno/ir3/ir3_shader.h | 12 ----- src/gallium/drivers/freedreno/a5xx/fd5_emit.c | 6 +-- .../drivers/freedreno/a6xx/fd6_const.c | 1 - src/gallium/drivers/freedreno/ir3/ir3_const.h | 28 ----------- 7 files changed, 23 insertions(+), 102 deletions(-) diff --git a/src/freedreno/ci/deqp-freedreno-a630-fails.txt b/src/freedreno/ci/deqp-freedreno-a630-fails.txt index a6d94707bef..f8359211418 100644 --- a/src/freedreno/ci/deqp-freedreno-a630-fails.txt +++ b/src/freedreno/ci/deqp-freedreno-a630-fails.txt @@ -25,21 +25,6 @@ KHR-GLES31.core.gpu_shader5.fma_precision_vec2,Fail KHR-GLES31.core.gpu_shader5.fma_precision_vec3,Fail KHR-GLES31.core.gpu_shader5.fma_precision_vec4,Fail -# "Array 0 length is 60829990 should be 7 -# Array 1 length is 60829990 should be 5 -# Array 2 length is 60829990 should be 3 -# Array 3 length is 60829990 should be 4 -# Array 4 length is 132120576 should be 23 -# Array 5 length is 132120576 should be 123 -# Array 6 length is 66060288 should be 419" -KHR-GLES31.core.shader_storage_buffer_object.advanced-unsizedArrayLength-fs-std430-vec,Fail - -# "Array 0 length is 266338304 should be 7 -# Array 1 length is 266338304 should be 3 -# Array 2 length is 266338304 should be 4 -# Array 3 length is 266338303 should be 4" -KHR-GLES31.core.shader_storage_buffer_object.advanced-unsizedArrayLength-fs-int,Fail - # "gl_NumWorkGroups: Invalid data at index 2" KHR-GLES31.core.compute_shader.built-in-variables,Fail diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index a660788ed78..23ca8a25cde 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -855,37 +855,32 @@ static void emit_intrinsic_ssbo_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, struct ir3_instruction **dst) { - if (ir3_bindless_resource(intr->src[0])) { - struct ir3_block *b = ctx->block; - struct ir3_instruction *ibo = ir3_ssbo_to_ibo(ctx, intr->src[0]); - struct ir3_instruction *resinfo = ir3_RESINFO(b, ibo, 0); - resinfo->cat6.iim_val = 1; - resinfo->cat6.d = 1; - resinfo->cat6.type = TYPE_U32; - resinfo->cat6.typed = false; - /* resinfo has no writemask and always writes out 3 components */ - resinfo->dsts[0]->wrmask = MASK(3); - ir3_handle_bindless_cat6(resinfo, intr->src[0]); + struct ir3_block *b = ctx->block; + struct ir3_instruction *ibo = ir3_ssbo_to_ibo(ctx, intr->src[0]); + struct ir3_instruction *resinfo = ir3_RESINFO(b, ibo, 0); + resinfo->cat6.iim_val = 1; + resinfo->cat6.d = ctx->compiler->gen >= 6 ? 1 : 2; + resinfo->cat6.type = TYPE_U32; + resinfo->cat6.typed = false; + /* resinfo has no writemask and always writes out 3 components */ + resinfo->dsts[0]->wrmask = MASK(3); + ir3_handle_bindless_cat6(resinfo, intr->src[0]); + + if (ctx->compiler->gen >= 6) { struct ir3_instruction *resinfo_dst; ir3_split_dest(b, &resinfo_dst, resinfo, 0, 1); /* Unfortunately resinfo returns the array length, i.e. in dwords, - * while NIR expects us to return the size in bytes. - * - * TODO: fix this in NIR. - */ + * while NIR expects us to return the size in bytes. + * + * TODO: fix this in NIR. + */ *dst = ir3_SHL_B(b, resinfo_dst, 0, create_immed(b, 2), 0); - return; + } else { + /* On a5xx, resinfo returns the low 16 bits of ssbo size in .x and the high 16 bits in .y */ + struct ir3_instruction *resinfo_dst[2]; + ir3_split_dest(b, resinfo_dst, resinfo, 0, 2); + *dst = ir3_ADD_U(b, ir3_SHL_B(b, resinfo_dst[1], 0, create_immed(b, 16), 0), 0, resinfo_dst[0], 0); } - - /* SSBO size stored as a const starting at ssbo_sizes: */ - const struct ir3_const_state *const_state = ir3_const_state(ctx->so); - unsigned blk_idx = nir_src_as_uint(intr->src[0]); - unsigned idx = regid(const_state->offsets.ssbo_sizes, 0) + - const_state->ssbo_size.off[blk_idx]; - - debug_assert(const_state->ssbo_size.mask & (1 << blk_idx)); - - dst[0] = create_uniform(ctx->block, idx); } /* src[] = { offset }. const_index[] = { base } */ diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index fdc38b95e8f..56ecb6b4cf8 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -745,16 +745,6 @@ ir3_nir_scan_driver_consts(nir_shader *shader, struct ir3_const_state *layout) unsigned idx; switch (intr->intrinsic) { - case nir_intrinsic_get_ssbo_size: - if (ir3_bindless_resource(intr->src[0])) - break; - idx = nir_src_as_uint(intr->src[0]); - if (layout->ssbo_size.mask & (1 << idx)) - break; - layout->ssbo_size.mask |= (1 << idx); - layout->ssbo_size.off[idx] = layout->ssbo_size.count; - layout->ssbo_size.count += 1; /* one const per */ - break; case nir_intrinsic_image_atomic_add: case nir_intrinsic_image_atomic_imin: case nir_intrinsic_image_atomic_umin: @@ -850,12 +840,6 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v, constoff += align(const_state->num_ubos * ptrsz, 4) / 4; } - if (const_state->ssbo_size.count > 0) { - unsigned cnt = const_state->ssbo_size.count; - const_state->offsets.ssbo_sizes = constoff; - constoff += align(cnt, 4) / 4; - } - if (const_state->image_dims.count > 0) { unsigned cnt = const_state->image_dims.count; const_state->offsets.image_dims = constoff; diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 26335302c92..3337b6252e5 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -169,8 +169,6 @@ struct ir3_const_state { struct { /* user const start at zero */ unsigned ubo; - /* NOTE that a3xx might need a section for SSBO addresses too */ - unsigned ssbo_sizes; unsigned image_dims; unsigned driver_param; unsigned tfbo; @@ -179,16 +177,6 @@ struct ir3_const_state { unsigned immediate; } offsets; - struct { - uint32_t mask; /* bitmask of SSBOs that have get_ssbo_size */ - uint32_t count; /* number of consts allocated */ - /* one const allocated per SSBO which has get_ssbo_size, - * ssbo_sizes.off[ssbo_id] is offset from start of ssbo_sizes - * consts: - */ - uint32_t off[IR3_MAX_SHADER_BUFFERS]; - } ssbo_size; - struct { uint32_t mask; /* bitmask of images that have image_store */ uint32_t count; /* number of consts allocated */ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c index b98608c10b9..4f44210c35c 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c @@ -442,10 +442,8 @@ emit_ssbos(struct fd_context *ctx, struct fd_ringbuffer *ring, struct pipe_shader_buffer *buf = &so->sb[i]; unsigned sz = buf->buffer_size; - /* width is in dwords, overflows into height: */ - sz /= 4; - - OUT_RING(ring, A5XX_SSBO_1_0_WIDTH(sz)); + /* Unlike a6xx, SSBO size is in bytes. */ + OUT_RING(ring, A5XX_SSBO_1_0_WIDTH(sz & MASK(16))); OUT_RING(ring, A5XX_SSBO_1_1_HEIGHT(sz >> 16)); OUT_PKT7(ring, CP_LOAD_STATE4, 5); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_const.c b/src/gallium/drivers/freedreno/a6xx/fd6_const.c index bb1ecc42d23..41a5f322528 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_const.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_const.c @@ -338,7 +338,6 @@ fd6_emit_ibo_consts(struct fd6_emit *emit, const struct ir3_shader_variant *v, { struct fd_context *ctx = emit->ctx; - ir3_emit_ssbo_sizes(ctx->screen, v, ring, &ctx->shaderbuf[stage]); ir3_emit_image_dims(ctx->screen, v, ring, &ctx->shaderimg[stage]); } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_const.h b/src/gallium/drivers/freedreno/ir3/ir3_const.h index 247167a2935..85f5aefafa7 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_const.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_const.h @@ -253,28 +253,6 @@ ir3_emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v, } } -static inline void -ir3_emit_ssbo_sizes(struct fd_screen *screen, - const struct ir3_shader_variant *v, - struct fd_ringbuffer *ring, - struct fd_shaderbuf_stateobj *sb) -{ - const struct ir3_const_state *const_state = ir3_const_state(v); - uint32_t offset = const_state->offsets.ssbo_sizes; - if (v->constlen > offset) { - uint32_t sizes[align(const_state->ssbo_size.count, 4)]; - unsigned mask = const_state->ssbo_size.mask; - - while (mask) { - unsigned index = u_bit_scan(&mask); - unsigned off = const_state->ssbo_size.off[index]; - sizes[off] = sb->sb[index].buffer_size; - } - - emit_const_user(ring, v, offset * 4, ARRAY_SIZE(sizes), sizes); - } -} - static inline void ir3_emit_image_dims(struct fd_screen *screen, const struct ir3_shader_variant *v, @@ -448,12 +426,6 @@ emit_common_consts(const struct ir3_shader_variant *v, ir3_emit_immediates(ctx->screen, v, ring); } - if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_SSBO)) { - struct fd_shaderbuf_stateobj *sb = &ctx->shaderbuf[t]; - ring_wfi(ctx->batch, ring); - ir3_emit_ssbo_sizes(ctx->screen, v, ring, sb); - } - if (dirty & (FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_IMAGE)) { struct fd_shaderimg_stateobj *si = &ctx->shaderimg[t]; ring_wfi(ctx->batch, ring);