pco: add initial support for shared atomics

Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
Simon Perretta
2025-01-23 12:51:43 +00:00
committed by Marge Bot
parent 7d9b7ee485
commit 140154db02
5 changed files with 210 additions and 0 deletions
+58
View File
@@ -156,6 +156,64 @@ static bool legalize_pseudo(pco_instr *instr)
return true;
}
case PCO_OP_IADD32_ATOMIC_OFFSET: {
pco_builder b =
pco_builder_create(instr->parent_func, pco_cursor_before_instr(instr));
pco_ref dest = instr->dest[0];
pco_ref shmem_dest = instr->dest[1];
pco_ref shmem_src = instr->src[0];
pco_ref value = instr->src[1];
pco_ref pred = instr->src[2];
pco_ref offset = instr->src[3];
unsigned idx_reg_num = 0;
pco_ref idx_reg =
pco_ref_hwreg_idx(idx_reg_num, idx_reg_num, PCO_REG_CLASS_INDEX);
pco_mbyp(&b, idx_reg, offset, .exec_cnd = pco_instr_get_exec_cnd(instr));
shmem_dest = pco_ref_hwreg_idx_from(idx_reg_num, shmem_dest);
shmem_src = pco_ref_hwreg_idx_from(idx_reg_num, shmem_src);
pco_instr *repl =
pco_iadd32_atomic(&b, dest, shmem_dest, shmem_src, value, pred);
xfer_op_mods(repl, instr);
pco_instr_delete(instr);
return true;
}
case PCO_OP_XCHG_ATOMIC_OFFSET: {
pco_builder b =
pco_builder_create(instr->parent_func, pco_cursor_before_instr(instr));
pco_ref dest = instr->dest[0];
pco_ref shmem_dest = instr->dest[1];
pco_ref shmem_src = instr->src[0];
pco_ref value = instr->src[1];
pco_ref offset = instr->src[2];
unsigned idx_reg_num = 0;
pco_ref idx_reg =
pco_ref_hwreg_idx(idx_reg_num, idx_reg_num, PCO_REG_CLASS_INDEX);
pco_mbyp(&b, idx_reg, offset, .exec_cnd = pco_instr_get_exec_cnd(instr));
shmem_dest = pco_ref_hwreg_idx_from(idx_reg_num, shmem_dest);
shmem_src = pco_ref_hwreg_idx_from(idx_reg_num, shmem_src);
pco_instr *repl = pco_xchg_atomic(&b, dest, shmem_dest, shmem_src, value);
xfer_op_mods(repl, instr);
pco_instr_delete(instr);
return true;
}
default:
break;
}
+60
View File
@@ -2558,6 +2558,66 @@ group_map(O_ST32,
]
)
group_map(O_IADD32_ATOMIC,
hdr=(I_IGRP_HDR_MAIN, [
('oporg', 'p0_p1'),
('olchk', OM_OLCHK),
('w1p', True),
('w0p', True),
('cc', OM_EXEC_CND),
('end', OM_END),
('atom', True),
('rpt', OM_RPT)
]),
enc_ops=[
('0', O_IMADD32, [DEST(1)], [SRC(0), 'pco_one', SRC(1), SRC(2)], [(OM_S, OM_S)]),
('1', O_MBYP, [DEST(0)], [SRC(0)])
],
srcs=[
('s[0]', ('0', SRC(0)), 's0'),
('s[1]', ('0', SRC(1)), 's1'),
('s[2]', ('0', SRC(2)), 's2'),
('s[3]', ('1', SRC(0)), 's3')
],
iss=[
('is[4]', 'ft1'),
('is[5]', 'ft0'),
],
dests=[
('w[0]', ('1', DEST(0)), 'ft1'),
('w[1]', ('0', DEST(0)), 'ft0'),
]
)
group_map(O_XCHG_ATOMIC,
hdr=(I_IGRP_HDR_MAIN, [
('oporg', 'p0_p1'),
('olchk', OM_OLCHK),
('w1p', True),
('w0p', True),
('cc', OM_EXEC_CND),
('end', OM_END),
('atom', True),
('rpt', OM_RPT)
]),
enc_ops=[
('0', O_MBYP, [DEST(1)], [SRC(1)]),
('1', O_MBYP, [DEST(0)], [SRC(0)])
],
srcs=[
('s[0]', ('0', SRC(0)), 's0'),
('s[3]', ('1', SRC(0)), 's3')
],
iss=[
('is[4]', 'ft1'),
('is[5]', 'ft0'),
],
dests=[
('w[0]', ('1', DEST(0)), 'ft1'),
('w[1]', ('0', DEST(0)), 'ft0'),
]
)
group_map(O_UVSW_WRITE,
hdr=(I_IGRP_HDR_MAIN, [
('oporg', 'be'),
+7
View File
@@ -335,6 +335,7 @@ OM_OFFSET_SD = op_mod_enum('offset_sd', [
OM_ALU = [OM_OLCHK, OM_EXEC_CND, OM_END, OM_ATOM, OM_RPT]
OM_ALU_RPT1 = [OM_OLCHK, OM_EXEC_CND, OM_END, OM_ATOM]
OM_ALU_ATOMEXT = [OM_OLCHK, OM_EXEC_CND, OM_END, OM_RPT]
## Main.
O_FADD = hw_op('fadd', OM_ALU + [OM_SAT], 1, 2, [], [[RM_ABS, RM_NEG, RM_FLR], [RM_ABS]])
@@ -448,6 +449,9 @@ O_IMUL32 = hw_op('imul32', OM_ALU + [OM_S], 1, 3, [], [[RM_ABS, RM_NEG], [RM_ABS
O_TSTZ = hw_op('tstz', OM_ALU + [OM_TST_TYPE_MAIN], 2, 1, [], [[RM_ELEM]])
O_ST32 = hw_op('st32', OM_ALU_RPT1 + [OM_MCU_CACHE_MODE_ST], 0, 5)
O_IADD32_ATOMIC = hw_op('iadd32.atomic', OM_ALU_ATOMEXT + [OM_S], 2, 3, [], [[RM_ABS, RM_NEG], [RM_ABS, RM_NEG]])
O_XCHG_ATOMIC = hw_op('xchg.atomic', OM_ALU_ATOMEXT, 2, 2, [], [[RM_ABS, RM_NEG], [RM_ABS, RM_NEG]])
# Pseudo-ops (unmapped).
O_FNEG = pseudo_op('fneg', OM_ALU, 1, 1)
O_FABS = pseudo_op('fabs', OM_ALU, 1, 1)
@@ -457,5 +461,8 @@ O_MOV_OFFSET = pseudo_op('mov.offset', OM_ALU + [OM_OFFSET_SD], 1, 2)
O_VEC = pseudo_op('vec', [OM_EXEC_CND], 1, VARIABLE, [], [[RM_ABS, RM_NEG]])
O_COMP = pseudo_op('comp', [], 1, 2)
O_IADD32_ATOMIC_OFFSET = pseudo_op('iadd32.atomic.offset', OM_ALU_ATOMEXT + [OM_S], 2, 4, [], [[RM_ABS, RM_NEG], [RM_ABS, RM_NEG]])
O_XCHG_ATOMIC_OFFSET = pseudo_op('xchg.atomic.offset', OM_ALU_ATOMEXT, 2, 3, [[RM_ABS, RM_NEG], [RM_ABS, RM_NEG]])
O_BREAK = pseudo_op('break', [OM_EXEC_CND])
O_CONTINUE = pseudo_op('continue', [OM_EXEC_CND])
+3
View File
@@ -542,6 +542,9 @@ static inline bool instr_has_side_effects(pco_instr *instr)
case PCO_OP_ATOMIC:
case PCO_OP_FDSX:
case PCO_OP_FDSY:
case PCO_OP_IADD32_ATOMIC:
case PCO_OP_XCHG_ATOMIC:
return true;
default:
+82
View File
@@ -527,6 +527,83 @@ static pco_instr *trans_store_common_store(trans_ctx *tctx,
.rpt = chans);
}
static pco_instr *trans_atomic_shared(trans_ctx *tctx,
nir_intrinsic_instr *intr,
pco_ref dest,
pco_ref offset_src,
pco_ref value,
pco_ref value_swap)
{
nir_src *noffset_src = &intr->src[0];
unsigned chans = pco_ref_get_chans(dest);
ASSERTED unsigned bits = pco_ref_get_bits(dest);
assert(bits == 32);
assert(chans == 1);
assert(tctx->shader->data.cs.shmem.count > 0);
pco_ref shmem_ref;
bool const_offset = nir_src_is_const(*noffset_src);
if (const_offset) {
unsigned offset = nir_src_as_uint(*noffset_src);
assert(offset < tctx->shader->data.cs.shmem.count);
shmem_ref = pco_ref_hwreg_vec(tctx->shader->data.cs.shmem.start + offset,
PCO_REG_CLASS_COEFF,
chans);
} else {
shmem_ref = pco_ref_hwreg_vec(tctx->shader->data.cs.shmem.start,
PCO_REG_CLASS_COEFF,
chans);
}
pco_instr *instr;
switch (nir_intrinsic_atomic_op(intr)) {
case nir_atomic_op_iadd:
if (const_offset) {
instr = pco_iadd32_atomic(&tctx->b,
dest,
shmem_ref,
shmem_ref,
value,
pco_ref_null(),
.s = true);
} else {
instr = pco_iadd32_atomic_offset(&tctx->b,
dest,
shmem_ref,
shmem_ref,
value,
pco_ref_null(),
offset_src,
.s = true);
}
break;
case nir_atomic_op_xchg:
if (const_offset) {
instr = pco_xchg_atomic(&tctx->b, dest, shmem_ref, shmem_ref, value);
} else {
instr = pco_xchg_atomic_offset(&tctx->b,
dest,
shmem_ref,
shmem_ref,
value,
offset_src);
}
break;
default:
UNREACHABLE("");
}
pco_instr_set_rpt(instr, chans);
return instr;
}
static pco_instr *trans_load_buffer(trans_ctx *tctx,
nir_intrinsic_instr *intr,
pco_ref dest,
@@ -1074,6 +1151,11 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
&tctx->shader->data.cs.shmem);
break;
case nir_intrinsic_shared_atomic:
case nir_intrinsic_shared_atomic_swap:
instr = trans_atomic_shared(tctx, intr, dest, src[0], src[1], src[2]);
break;
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ssbo:
instr = trans_load_buffer(tctx, intr, dest, src[1]);