From 6be3c0f82d7cd988d6737e078d4ee8e60dc66eae Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Wed, 20 Oct 2021 10:12:37 -0400 Subject: [PATCH] zink: move all 64-32bit shader store rewriting to nir pass this also enables natural 64bit stores on drivers that support it Reviewed-by: Dave Airlie Part-of: --- .../drivers/zink/nir_to_spirv/nir_to_spirv.c | 48 ++++++------------- src/gallium/drivers/zink/zink_compiler.c | 24 +++++++++- 2 files changed, 37 insertions(+), 35 deletions(-) diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c index 3b6eb9f6ace..48a7fae37e3 100644 --- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c +++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c @@ -2006,7 +2006,7 @@ emit_store_ssbo(struct ntv_context *ctx, nir_intrinsic_instr *intr) nir_const_value *const_block_index = nir_src_as_const_value(intr->src[1]); assert(const_block_index); - unsigned idx = MIN2(nir_src_bit_size(intr->src[0]), 32) >> 4; + unsigned idx = nir_src_bit_size(intr->src[0]) >> 4; assert(idx < ARRAY_SIZE(ctx->ssbos[0])); if (!ctx->ssbos[const_block_index->u32][idx]) emit_bo(ctx, ctx->ssbo_vars[const_block_index->u32], nir_src_bit_size(intr->src[0])); @@ -2020,18 +2020,15 @@ emit_store_ssbo(struct ntv_context *ctx, nir_intrinsic_instr *intr) unsigned wrmask = nir_intrinsic_write_mask(intr); unsigned num_components = util_bitcount(wrmask); - /* we need to grab 2x32 to fill the 64bit value */ - bool is_64bit = bit_size == 64; - /* we grab a single array member at a time, so it's a pointer to a uint */ SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, SpvStorageClassStorageBuffer, - get_uvec_type(ctx, MIN2(bit_size, 32), 1)); + get_uvec_type(ctx, bit_size, 1)); /* our generated uniform has a memory layout like * * struct { - * uint base[array_size]; + * uintN base[array_size]; * }; * * where 'array_size' is set as though every member of the ubo takes up a vec4, @@ -2055,34 +2052,19 @@ emit_store_ssbo(struct ntv_context *ctx, nir_intrinsic_instr *intr) * no other spirv method for using an id to access a member of a composite, as * (composite|vector)_extract both take literals */ - unsigned write_count = 0; SpvId src_base_type = get_uvec_type(ctx, bit_size, 1); - for (unsigned i = 0; write_count < num_components; i++) { - if (wrmask & (1 << i)) { - SpvId component = nir_src_num_components(intr->src[0]) > 1 ? - spirv_builder_emit_composite_extract(&ctx->builder, src_base_type, value, &i, 1) : - value; - SpvId component_split; - if (is_64bit) - component_split = emit_bitcast(ctx, get_uvec_type(ctx, 32, 2), component); - for (unsigned j = 0; j < 1 + !!is_64bit; j++) { - if (j) - offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, one); - SpvId indices[] = { member, offset }; - SpvId ptr = spirv_builder_emit_access_chain(&ctx->builder, pointer_type, - bo, indices, - ARRAY_SIZE(indices)); - if (is_64bit) - component = spirv_builder_emit_composite_extract(&ctx->builder, uint_type, component_split, &j, 1); - if (nir_intrinsic_access(intr) & ACCESS_COHERENT) - spirv_builder_emit_atomic_store(&ctx->builder, ptr, SpvScopeWorkgroup, 0, component); - else - spirv_builder_emit_store(&ctx->builder, ptr, component); - } - write_count++; - } else if (is_64bit) - /* we're doing 32bit stores here, so we need to increment correctly here */ - offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, one); + for (unsigned i = 0; i < num_components; i++) { + SpvId component = nir_src_num_components(intr->src[0]) > 1 ? + spirv_builder_emit_composite_extract(&ctx->builder, src_base_type, value, &i, 1) : + value; + SpvId indices[] = { member, offset }; + SpvId ptr = spirv_builder_emit_access_chain(&ctx->builder, pointer_type, + bo, indices, + ARRAY_SIZE(indices)); + if (nir_intrinsic_access(intr) & ACCESS_COHERENT) + spirv_builder_emit_atomic_store(&ctx->builder, ptr, SpvScopeWorkgroup, 0, component); + else + spirv_builder_emit_store(&ctx->builder, ptr, component); /* increment to the next vec4 member index for the next store */ offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, one); diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 1847e341a58..ed4cce2d43c 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -692,11 +692,31 @@ rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data) break; case nir_intrinsic_store_ssbo: b->cursor = nir_before_instr(instr); - nir_instr_rewrite_src_ssa(instr, &intr->src[2], nir_udiv_imm(b, intr->src[2].ssa, MIN2(nir_src_bit_size(intr->src[0]), 32) / 8)); + nir_instr_rewrite_src_ssa(instr, &intr->src[2], nir_udiv_imm(b, intr->src[2].ssa, nir_src_bit_size(intr->src[0]) / 8)); + /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */ + if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) { + /* this is always scalarized */ + assert(intr->src[0].ssa->num_components == 1); + /* cast to 32bit: nir_unpack_64_2x32 not supported by ntv */ + nir_ssa_def *casted = nir_vec2(b, nir_u2u32(b, intr->src[0].ssa), nir_u2u32(b, nir_ushr_imm(b, intr->src[0].ssa, 32))); + /* rewrite as 2x32 */ + nir_store_ssbo(b, casted, intr->src[1].ssa, intr->src[2].ssa, .align_mul = 4, .align_offset = 0); + nir_instr_remove(instr); + } return true; case nir_intrinsic_store_shared: b->cursor = nir_before_instr(instr); - nir_instr_rewrite_src_ssa(instr, &intr->src[1], nir_udiv_imm(b, intr->src[1].ssa, MIN2(nir_src_bit_size(intr->src[0]), 32) / 8)); + nir_instr_rewrite_src_ssa(instr, &intr->src[1], nir_udiv_imm(b, intr->src[1].ssa, nir_src_bit_size(intr->src[0]) / 8)); + /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */ + if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) { + /* this is always scalarized */ + assert(intr->src[0].ssa->num_components == 1); + /* cast to 32bit: nir_unpack_64_2x32 not supported by ntv */ + nir_ssa_def *casted = nir_vec2(b, nir_u2u32(b, intr->src[0].ssa), nir_u2u32(b, nir_ushr_imm(b, intr->src[0].ssa, 32))); + /* rewrite as 2x32 */ + nir_store_shared(b, casted, intr->src[1].ssa, .align_mul = 4, .align_offset = 0); + nir_instr_remove(instr); + } return true; default: break;