From f716da596b005c2bcac146bd49d6ed84a1771b3e Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 5 Jul 2023 12:19:51 -0400 Subject: [PATCH] asahi,agx: Set coherency bit for clustered targets We need to set a particular bit on atomics for them to be coherent across clusters. Fixes atomics on G13X. Setting this bit on the single-cluster G13G, on the other hand, wedges the GPU. So best be careful ;-) Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/compiler/agx_compile.h | 5 +++++ src/asahi/compiler/agx_pack.c | 10 ++++++---- src/gallium/drivers/asahi/agx_state.c | 5 ++++- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h index 0ef4b9342b2..4f247a1305b 100644 --- a/src/asahi/compiler/agx_compile.h +++ b/src/asahi/compiler/agx_compile.h @@ -193,6 +193,11 @@ struct agx_shader_key { /* Number of reserved preamble slots at the start */ unsigned reserved_preamble; + /* Does the target GPU need explicit cluster coherency for atomics? + * Only used on G13X. + */ + bool needs_g13x_coherency; + union { struct agx_vs_shader_key vs; struct agx_fs_shader_key fs; diff --git a/src/asahi/compiler/agx_pack.c b/src/asahi/compiler/agx_pack.c index 4d91fc8a792..972c7ef5168 100644 --- a/src/asahi/compiler/agx_pack.c +++ b/src/asahi/compiler/agx_pack.c @@ -469,7 +469,7 @@ agx_pack_alu(struct util_dynarray *emission, agx_instr *I) static void agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, - agx_instr *I) + agx_instr *I, bool needs_g13x_coherency) { switch (I->op) { case AGX_OPCODE_LD_TILE: @@ -693,8 +693,10 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, (I->scoreboard << 30) | (((uint64_t)((O >> 4) & BITFIELD_MASK(4))) << 32) | (((uint64_t)((A >> 4) & BITFIELD_MASK(4))) << 36) | - (((uint64_t)(R >> 6)) << 40) | (Rt ? BITFIELD64_BIT(47) : 0) | - (((uint64_t)S) << 48) | (((uint64_t)(O >> 8)) << 56); + (((uint64_t)(R >> 6)) << 40) | + (needs_g13x_coherency ? BITFIELD64_BIT(45) : 0) | + (Rt ? BITFIELD64_BIT(47) : 0) | (((uint64_t)S) << 48) | + (((uint64_t)(O >> 8)) << 56); memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8); break; @@ -896,7 +898,7 @@ agx_pack_binary(agx_context *ctx, struct util_dynarray *emission) block->offset = emission->size; agx_foreach_instr_in_block(block, ins) { - agx_pack_instr(emission, &fixups, ins); + agx_pack_instr(emission, &fixups, ins, ctx->key->needs_g13x_coherency); } } diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index dd7306c1451..d3708d64251 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -1498,7 +1498,10 @@ agx_compile_variant(struct agx_device *dev, struct agx_uncompiled_shader *so, } } - struct agx_shader_key base_key = {0}; + struct agx_shader_key base_key = { + .needs_g13x_coherency = + dev->params.gpu_generation == 13 && dev->params.num_clusters_total > 1, + }; if (nir->info.stage == MESA_SHADER_FRAGMENT) base_key.fs.nr_samples = key_->fs.nr_samples;