From 140154db0216aa6edd0cafb6c5a78290d750fa07 Mon Sep 17 00:00:00 2001 From: Simon Perretta Date: Thu, 23 Jan 2025 12:51:43 +0000 Subject: [PATCH] pco: add initial support for shared atomics Signed-off-by: Simon Perretta Acked-by: Erik Faye-Lund Part-of: --- src/imagination/pco/pco_legalize.c | 58 ++++++++++++++++++++ src/imagination/pco/pco_map.py | 60 +++++++++++++++++++++ src/imagination/pco/pco_ops.py | 7 +++ src/imagination/pco/pco_opt.c | 3 ++ src/imagination/pco/pco_trans_nir.c | 82 +++++++++++++++++++++++++++++ 5 files changed, 210 insertions(+) diff --git a/src/imagination/pco/pco_legalize.c b/src/imagination/pco/pco_legalize.c index 0abde31a92c..d8fe3cd2494 100644 --- a/src/imagination/pco/pco_legalize.c +++ b/src/imagination/pco/pco_legalize.c @@ -156,6 +156,64 @@ static bool legalize_pseudo(pco_instr *instr) return true; } + case PCO_OP_IADD32_ATOMIC_OFFSET: { + pco_builder b = + pco_builder_create(instr->parent_func, pco_cursor_before_instr(instr)); + + pco_ref dest = instr->dest[0]; + pco_ref shmem_dest = instr->dest[1]; + pco_ref shmem_src = instr->src[0]; + pco_ref value = instr->src[1]; + pco_ref pred = instr->src[2]; + pco_ref offset = instr->src[3]; + + unsigned idx_reg_num = 0; + pco_ref idx_reg = + pco_ref_hwreg_idx(idx_reg_num, idx_reg_num, PCO_REG_CLASS_INDEX); + + pco_mbyp(&b, idx_reg, offset, .exec_cnd = pco_instr_get_exec_cnd(instr)); + + shmem_dest = pco_ref_hwreg_idx_from(idx_reg_num, shmem_dest); + shmem_src = pco_ref_hwreg_idx_from(idx_reg_num, shmem_src); + + pco_instr *repl = + pco_iadd32_atomic(&b, dest, shmem_dest, shmem_src, value, pred); + + xfer_op_mods(repl, instr); + + pco_instr_delete(instr); + + return true; + } + + case PCO_OP_XCHG_ATOMIC_OFFSET: { + pco_builder b = + pco_builder_create(instr->parent_func, pco_cursor_before_instr(instr)); + + pco_ref dest = instr->dest[0]; + pco_ref shmem_dest = instr->dest[1]; + pco_ref shmem_src = instr->src[0]; + pco_ref value = instr->src[1]; + pco_ref offset = instr->src[2]; + + unsigned idx_reg_num = 0; + pco_ref idx_reg = + pco_ref_hwreg_idx(idx_reg_num, idx_reg_num, PCO_REG_CLASS_INDEX); + + pco_mbyp(&b, idx_reg, offset, .exec_cnd = pco_instr_get_exec_cnd(instr)); + + shmem_dest = pco_ref_hwreg_idx_from(idx_reg_num, shmem_dest); + shmem_src = pco_ref_hwreg_idx_from(idx_reg_num, shmem_src); + + pco_instr *repl = pco_xchg_atomic(&b, dest, shmem_dest, shmem_src, value); + + xfer_op_mods(repl, instr); + + pco_instr_delete(instr); + + return true; + } + default: break; } diff --git a/src/imagination/pco/pco_map.py b/src/imagination/pco/pco_map.py index 14bd5b0129d..d0fb3cd0da1 100644 --- a/src/imagination/pco/pco_map.py +++ b/src/imagination/pco/pco_map.py @@ -2558,6 +2558,66 @@ group_map(O_ST32, ] ) +group_map(O_IADD32_ATOMIC, + hdr=(I_IGRP_HDR_MAIN, [ + ('oporg', 'p0_p1'), + ('olchk', OM_OLCHK), + ('w1p', True), + ('w0p', True), + ('cc', OM_EXEC_CND), + ('end', OM_END), + ('atom', True), + ('rpt', OM_RPT) + ]), + enc_ops=[ + ('0', O_IMADD32, [DEST(1)], [SRC(0), 'pco_one', SRC(1), SRC(2)], [(OM_S, OM_S)]), + ('1', O_MBYP, [DEST(0)], [SRC(0)]) + ], + srcs=[ + ('s[0]', ('0', SRC(0)), 's0'), + ('s[1]', ('0', SRC(1)), 's1'), + ('s[2]', ('0', SRC(2)), 's2'), + ('s[3]', ('1', SRC(0)), 's3') + ], + iss=[ + ('is[4]', 'ft1'), + ('is[5]', 'ft0'), + ], + dests=[ + ('w[0]', ('1', DEST(0)), 'ft1'), + ('w[1]', ('0', DEST(0)), 'ft0'), + ] +) + +group_map(O_XCHG_ATOMIC, + hdr=(I_IGRP_HDR_MAIN, [ + ('oporg', 'p0_p1'), + ('olchk', OM_OLCHK), + ('w1p', True), + ('w0p', True), + ('cc', OM_EXEC_CND), + ('end', OM_END), + ('atom', True), + ('rpt', OM_RPT) + ]), + enc_ops=[ + ('0', O_MBYP, [DEST(1)], [SRC(1)]), + ('1', O_MBYP, [DEST(0)], [SRC(0)]) + ], + srcs=[ + ('s[0]', ('0', SRC(0)), 's0'), + ('s[3]', ('1', SRC(0)), 's3') + ], + iss=[ + ('is[4]', 'ft1'), + ('is[5]', 'ft0'), + ], + dests=[ + ('w[0]', ('1', DEST(0)), 'ft1'), + ('w[1]', ('0', DEST(0)), 'ft0'), + ] +) + group_map(O_UVSW_WRITE, hdr=(I_IGRP_HDR_MAIN, [ ('oporg', 'be'), diff --git a/src/imagination/pco/pco_ops.py b/src/imagination/pco/pco_ops.py index a9c7ab449f2..c823c183662 100644 --- a/src/imagination/pco/pco_ops.py +++ b/src/imagination/pco/pco_ops.py @@ -335,6 +335,7 @@ OM_OFFSET_SD = op_mod_enum('offset_sd', [ OM_ALU = [OM_OLCHK, OM_EXEC_CND, OM_END, OM_ATOM, OM_RPT] OM_ALU_RPT1 = [OM_OLCHK, OM_EXEC_CND, OM_END, OM_ATOM] +OM_ALU_ATOMEXT = [OM_OLCHK, OM_EXEC_CND, OM_END, OM_RPT] ## Main. O_FADD = hw_op('fadd', OM_ALU + [OM_SAT], 1, 2, [], [[RM_ABS, RM_NEG, RM_FLR], [RM_ABS]]) @@ -448,6 +449,9 @@ O_IMUL32 = hw_op('imul32', OM_ALU + [OM_S], 1, 3, [], [[RM_ABS, RM_NEG], [RM_ABS O_TSTZ = hw_op('tstz', OM_ALU + [OM_TST_TYPE_MAIN], 2, 1, [], [[RM_ELEM]]) O_ST32 = hw_op('st32', OM_ALU_RPT1 + [OM_MCU_CACHE_MODE_ST], 0, 5) +O_IADD32_ATOMIC = hw_op('iadd32.atomic', OM_ALU_ATOMEXT + [OM_S], 2, 3, [], [[RM_ABS, RM_NEG], [RM_ABS, RM_NEG]]) +O_XCHG_ATOMIC = hw_op('xchg.atomic', OM_ALU_ATOMEXT, 2, 2, [], [[RM_ABS, RM_NEG], [RM_ABS, RM_NEG]]) + # Pseudo-ops (unmapped). O_FNEG = pseudo_op('fneg', OM_ALU, 1, 1) O_FABS = pseudo_op('fabs', OM_ALU, 1, 1) @@ -457,5 +461,8 @@ O_MOV_OFFSET = pseudo_op('mov.offset', OM_ALU + [OM_OFFSET_SD], 1, 2) O_VEC = pseudo_op('vec', [OM_EXEC_CND], 1, VARIABLE, [], [[RM_ABS, RM_NEG]]) O_COMP = pseudo_op('comp', [], 1, 2) +O_IADD32_ATOMIC_OFFSET = pseudo_op('iadd32.atomic.offset', OM_ALU_ATOMEXT + [OM_S], 2, 4, [], [[RM_ABS, RM_NEG], [RM_ABS, RM_NEG]]) +O_XCHG_ATOMIC_OFFSET = pseudo_op('xchg.atomic.offset', OM_ALU_ATOMEXT, 2, 3, [[RM_ABS, RM_NEG], [RM_ABS, RM_NEG]]) + O_BREAK = pseudo_op('break', [OM_EXEC_CND]) O_CONTINUE = pseudo_op('continue', [OM_EXEC_CND]) diff --git a/src/imagination/pco/pco_opt.c b/src/imagination/pco/pco_opt.c index 3c5df5943fd..69432cb8901 100644 --- a/src/imagination/pco/pco_opt.c +++ b/src/imagination/pco/pco_opt.c @@ -542,6 +542,9 @@ static inline bool instr_has_side_effects(pco_instr *instr) case PCO_OP_ATOMIC: case PCO_OP_FDSX: case PCO_OP_FDSY: + + case PCO_OP_IADD32_ATOMIC: + case PCO_OP_XCHG_ATOMIC: return true; default: diff --git a/src/imagination/pco/pco_trans_nir.c b/src/imagination/pco/pco_trans_nir.c index 2fe37ba8917..85665dc32c8 100644 --- a/src/imagination/pco/pco_trans_nir.c +++ b/src/imagination/pco/pco_trans_nir.c @@ -527,6 +527,83 @@ static pco_instr *trans_store_common_store(trans_ctx *tctx, .rpt = chans); } +static pco_instr *trans_atomic_shared(trans_ctx *tctx, + nir_intrinsic_instr *intr, + pco_ref dest, + pco_ref offset_src, + pco_ref value, + pco_ref value_swap) +{ + nir_src *noffset_src = &intr->src[0]; + + unsigned chans = pco_ref_get_chans(dest); + ASSERTED unsigned bits = pco_ref_get_bits(dest); + + assert(bits == 32); + assert(chans == 1); + + assert(tctx->shader->data.cs.shmem.count > 0); + + pco_ref shmem_ref; + bool const_offset = nir_src_is_const(*noffset_src); + if (const_offset) { + unsigned offset = nir_src_as_uint(*noffset_src); + assert(offset < tctx->shader->data.cs.shmem.count); + + shmem_ref = pco_ref_hwreg_vec(tctx->shader->data.cs.shmem.start + offset, + PCO_REG_CLASS_COEFF, + chans); + } else { + shmem_ref = pco_ref_hwreg_vec(tctx->shader->data.cs.shmem.start, + PCO_REG_CLASS_COEFF, + chans); + } + + pco_instr *instr; + switch (nir_intrinsic_atomic_op(intr)) { + case nir_atomic_op_iadd: + if (const_offset) { + instr = pco_iadd32_atomic(&tctx->b, + dest, + shmem_ref, + shmem_ref, + value, + pco_ref_null(), + .s = true); + } else { + instr = pco_iadd32_atomic_offset(&tctx->b, + dest, + shmem_ref, + shmem_ref, + value, + pco_ref_null(), + offset_src, + .s = true); + } + break; + + case nir_atomic_op_xchg: + if (const_offset) { + instr = pco_xchg_atomic(&tctx->b, dest, shmem_ref, shmem_ref, value); + } else { + instr = pco_xchg_atomic_offset(&tctx->b, + dest, + shmem_ref, + shmem_ref, + value, + offset_src); + } + break; + + default: + UNREACHABLE(""); + } + + pco_instr_set_rpt(instr, chans); + + return instr; +} + static pco_instr *trans_load_buffer(trans_ctx *tctx, nir_intrinsic_instr *intr, pco_ref dest, @@ -1074,6 +1151,11 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr) &tctx->shader->data.cs.shmem); break; + case nir_intrinsic_shared_atomic: + case nir_intrinsic_shared_atomic_swap: + instr = trans_atomic_shared(tctx, intr, dest, src[0], src[1], src[2]); + break; + case nir_intrinsic_load_ubo: case nir_intrinsic_load_ssbo: instr = trans_load_buffer(tctx, intr, dest, src[1]);