From 064c806d23b70108d9bfb74bcb67ed3e512bd246 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 22 Aug 2021 09:53:56 -0700 Subject: [PATCH] freedreno/ir3: Add load/store_global lowering Signed-off-by: Rob Clark Part-of: --- src/freedreno/ir3/ir3_nir.c | 1 + src/freedreno/ir3/ir3_nir.h | 1 + src/freedreno/ir3/ir3_nir_lower_64b.c | 68 +++++++++++++++++++++++++++ 3 files changed, 70 insertions(+) diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 2f3426e1fe3..a630b0585a8 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -705,6 +705,7 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s) /* Lower scratch writemasks */ progress |= OPT(s, nir_lower_wrmasks, should_split_wrmask, s); + progress |= OPT(s, ir3_nir_lower_64b_global); progress |= OPT(s, ir3_nir_lower_64b_intrinsics); progress |= OPT(s, ir3_nir_lower_64b_undef); progress |= OPT(s, nir_lower_int64); diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index 10bc8a9b08e..2de6adf0251 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -59,6 +59,7 @@ void ir3_nir_lower_gs(nir_shader *shader); */ bool ir3_nir_lower_64b_intrinsics(nir_shader *shader); bool ir3_nir_lower_64b_undef(nir_shader *shader); +bool ir3_nir_lower_64b_global(nir_shader *shader); const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler); diff --git a/src/freedreno/ir3/ir3_nir_lower_64b.c b/src/freedreno/ir3/ir3_nir_lower_64b.c index 05a2c9d1fcb..e02a6dfc564 100644 --- a/src/freedreno/ir3/ir3_nir_lower_64b.c +++ b/src/freedreno/ir3/ir3_nir_lower_64b.c @@ -214,3 +214,71 @@ ir3_nir_lower_64b_undef(nir_shader *shader) shader, lower_64b_undef_filter, lower_64b_undef, NULL); } + +/* + * Lowering for load_global/store_global with 64b addresses to ir3 + * variants, which instead take a uvec2_32 + */ + +static bool +lower_64b_global_filter(const nir_instr *instr, const void *unused) +{ + (void)unused; + + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + return (intr->intrinsic == nir_intrinsic_load_global) || + (intr->intrinsic == nir_intrinsic_load_global_constant) || + (intr->intrinsic == nir_intrinsic_store_global); +} + +static nir_ssa_def * +lower_64b_global(nir_builder *b, nir_instr *instr, void *unused) +{ + (void)unused; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + bool load = intr->intrinsic != nir_intrinsic_store_global; + + nir_ssa_def *addr64 = nir_ssa_for_src(b, intr->src[load ? 0 : 1], 1); + nir_ssa_def *addr = nir_unpack_64_2x32(b, addr64); + + /* + * Note that we can get vec8/vec16 with OpenCL.. we need to split + * those up into max 4 components per load/store. + */ + + if (load) { + unsigned num_comp = nir_intrinsic_dest_components(intr); + nir_ssa_def *components[num_comp]; + for (unsigned off = 0; off < num_comp;) { + unsigned c = MIN2(num_comp - off, 4); + nir_ssa_def *val = nir_build_load_global_ir3( + b, c, nir_dest_bit_size(intr->dest), + addr, nir_imm_int(b, off)); + for (unsigned i = 0; i < c; i++) { + components[off++] = nir_channel(b, val, i); + } + } + return nir_build_alu_src_arr(b, nir_op_vec(num_comp), components); + } else { + unsigned num_comp = nir_intrinsic_src_components(intr, 0); + nir_ssa_def *value = nir_ssa_for_src(b, intr->src[0], num_comp); + for (unsigned off = 0; off < num_comp; off += 4) { + unsigned c = MIN2(num_comp - off, 4); + nir_ssa_def *v = nir_channels(b, value, BITFIELD_MASK(c) << off); + nir_build_store_global_ir3(b, v, addr, nir_imm_int(b, off)); + } + return NIR_LOWER_INSTR_PROGRESS_REPLACE; + } +} + +bool +ir3_nir_lower_64b_global(nir_shader *shader) +{ + return nir_shader_lower_instructions( + shader, lower_64b_global_filter, + lower_64b_global, NULL); +}