From 6373dd814a74d84becbbbfc42673df147adb6e9b Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Mon, 30 Aug 2021 12:25:46 +0300 Subject: [PATCH] ir3/a6xx,freedreno: account for resinfo return size dependency on IBO_0_FMT On a6xx resinfo returns size in bytes divided by IBO_0_FMT format size (not just size in dwords), we have to shift it back to NIR meaning which is size in bytes. Make freedreno use 16b buffers when they are supported in order to be able to depend on hardware capabilities when lowering ssbo size. Fixes: ce1a381e57d2803246c8a27f52f3dea2d2bfbfb1 "turnip: enable VK_KHR_16bit_storage on A650" Fixes cts tests: dEQP-VK.ssbo.unsized_array_length.float_offset_explicit_size dEQP-VK.ssbo.unsized_array_length.float_no_offset_whole_size dEQP-VK.compute.basic.write_multiple_unsized_arr_single_invocation and many more Signed-off-by: Danylo Piliaiev Part-of: --- src/freedreno/ir3/ir3_compiler.c | 3 +++ src/freedreno/ir3/ir3_compiler.h | 3 +++ src/freedreno/ir3/ir3_nir.c | 17 ++++++++++------- src/gallium/drivers/freedreno/a6xx/fd6_image.c | 8 ++++++-- 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index cf407e6b2b3..fa745f8ff34 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -126,6 +126,9 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id, compiler->tess_use_shared = fd_dev_info(compiler->dev_id)->a6xx.tess_use_shared; + + compiler->storage_16bit = + fd_dev_info(compiler->dev_id)->a6xx.storage_16bit; } else { compiler->max_const_pipeline = 512; compiler->max_const_geom = 512; diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h index afe6113b1e4..0a13f0465df 100644 --- a/src/freedreno/ir3/ir3_compiler.h +++ b/src/freedreno/ir3/ir3_compiler.h @@ -156,6 +156,9 @@ struct ir3_compiler { /* Whether private memory is supported */ bool has_pvtmem; + + /* True if 16-bit descriptors are used for both 16-bit and 32-bit access. */ + bool storage_16bit; }; void ir3_compiler_destroy(struct ir3_compiler *compiler); diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 25dd0f5dc17..4739d9e661b 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -303,20 +303,23 @@ ir3_nir_lower_ssbo_size_filter(const nir_instr *instr, const void *data) static nir_ssa_def * ir3_nir_lower_ssbo_size_instr(nir_builder *b, nir_instr *instr, void *data) { + uint8_t ssbo_size_to_bytes_shift = *(uint8_t *) data; nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - return nir_ishl(b, &intr->dest.ssa, nir_imm_int(b, 2)); + return nir_ishl(b, &intr->dest.ssa, nir_imm_int(b, ssbo_size_to_bytes_shift)); } /** - * The resinfo opcode we have for getting the SSBO size on a6xx returns a number - * of dwords, while the NIR intrinsic coming in is a number of bytes. Switch - * things so the NIR intrinsic in our backend means dwords. + * The resinfo opcode we have for getting the SSBO size on a6xx returns a byte + * length divided by IBO_0_FMT, while the NIR intrinsic coming in is a number of + * bytes. Switch things so the NIR intrinsic in our backend means dwords. */ static bool -ir3_nir_lower_ssbo_size(nir_shader *s) +ir3_nir_lower_ssbo_size(nir_shader *s, bool storage_16bit) { + uint8_t ssbo_size_to_bytes_shift = storage_16bit ? 1 : 2; return nir_shader_lower_instructions(s, ir3_nir_lower_ssbo_size_filter, - ir3_nir_lower_ssbo_size_instr, NULL); + ir3_nir_lower_ssbo_size_instr, + &ssbo_size_to_bytes_shift); } void @@ -563,7 +566,7 @@ ir3_nir_post_finalize(struct ir3_compiler *compiler, nir_shader *s) NIR_PASS_V(s, nir_lower_idiv, &idiv_options); /* idiv generated by cube lowering */ if (compiler->gen >= 6) - OPT_V(s, ir3_nir_lower_ssbo_size); + OPT_V(s, ir3_nir_lower_ssbo_size, compiler->storage_16bit); ir3_optimize_loop(compiler, s); } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_image.c b/src/gallium/drivers/freedreno/a6xx/fd6_image.c index cc8ee0a362a..5da8f640874 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_image.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_image.c @@ -135,7 +135,6 @@ translate_image(struct fd6_image *img, const struct pipe_image_view *pimg) static void translate_buf(struct fd6_image *img, const struct pipe_shader_buffer *pimg) { - enum pipe_format format = PIPE_FORMAT_R32_UINT; struct pipe_resource *prsc = pimg->buffer; struct fd_resource *rsc = fd_resource(prsc); @@ -144,6 +143,11 @@ translate_buf(struct fd6_image *img, const struct pipe_shader_buffer *pimg) return; } + const struct fd_dev_info *dev_info = fd_screen(prsc->screen)->info; + enum pipe_format format = dev_info->a6xx.storage_16bit + ? PIPE_FORMAT_R16_UINT + : PIPE_FORMAT_R32_UINT; + img->prsc = prsc; img->pfmt = format; img->type = fd6_tex_type(prsc->target); @@ -161,7 +165,7 @@ translate_buf(struct fd6_image *img, const struct pipe_shader_buffer *pimg) /* size is encoded with low 15b in WIDTH and high bits in HEIGHT, * in units of elements: */ - unsigned sz = pimg->buffer_size / 4; + unsigned sz = pimg->buffer_size / (dev_info->a6xx.storage_16bit ? 2 : 4); img->width = sz & MASK(15); img->height = sz >> 15; img->depth = 0;