From dc3288dcb179f16909af2f3aad78404f7ef74ed2 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 31 Jul 2024 21:24:55 -0400 Subject: [PATCH] agx: exploit soft fault info for preambles and for peephole selection. total instructions in shared programs: 2159359 -> 2114124 (-2.09%) instructions in affected programs: 359763 -> 314528 (-12.57%) helped: 814 HURT: 6 Instructions are helped. total alu in shared programs: 1685059 -> 1670200 (-0.88%) alu in affected programs: 217210 -> 202351 (-6.84%) helped: 589 HURT: 45 Alu are helped. total fscib in shared programs: 1681202 -> 1666324 (-0.88%) fscib in affected programs: 217477 -> 202599 (-6.84%) helped: 590 HURT: 45 Fscib are helped. total ic in shared programs: 460856 -> 455502 (-1.16%) ic in affected programs: 41350 -> 35996 (-12.95%) helped: 174 HURT: 8 Ic are helped. total bytes in shared programs: 14302484 -> 14053982 (-1.74%) bytes in affected programs: 2380614 -> 2132112 (-10.44%) helped: 814 HURT: 7 Bytes are helped. total regs in shared programs: 662302 -> 656517 (-0.87%) regs in affected programs: 26979 -> 21194 (-21.44%) helped: 432 HURT: 9 Regs are helped. total uniforms in shared programs: 1651909 -> 1687077 (2.13%) uniforms in affected programs: 95383 -> 130551 (36.87%) helped: 17 HURT: 783 Uniforms are HURT. total threads in shared programs: 20324608 -> 20326592 (<.01%) threads in affected programs: 16192 -> 18176 (12.25%) helped: 17 HURT: 3 Threads are helped. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/compiler/agx_compile.c | 31 +++++++++++++++++++++++++++++-- src/asahi/compiler/agx_compile.h | 5 +++++ src/asahi/lib/agx_device.h | 6 ++++++ src/asahi/lib/agx_helpers.h | 1 + 4 files changed, 41 insertions(+), 2 deletions(-) diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index 00250ae9d2d..a7228f0ae64 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -2704,14 +2704,40 @@ mem_vectorize_cb(unsigned align_mul, unsigned align_offset, unsigned bit_size, return true; } +static bool +set_speculate(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *_) +{ + if (!nir_intrinsic_has_access(intr)) + return false; + + nir_intrinsic_set_access(intr, + ACCESS_CAN_SPECULATE | nir_intrinsic_access(intr)); + return true; +} + static void -agx_optimize_nir(nir_shader *nir, unsigned *preamble_size) +agx_optimize_nir(nir_shader *nir, bool soft_fault, unsigned *preamble_size) { /* This runs only once up front since other optimizations don't affect it */ NIR_PASS(_, nir, nir_opt_shrink_stores, true); agx_optimize_loop_nir(nir); + /* If soft fault is enabled, we can freely speculate everything. That lets us + * peephole select and form preambles more aggressively. + */ + if (soft_fault) { + NIR_PASS(_, nir, nir_shader_intrinsics_pass, set_speculate, + nir_metadata_control_flow, NULL); + } + + /* Peephole select again after setting the speculate flag but before + * vectorizing. This cleans up short-circuit loads in unrolled loops. + * + * XXX: Set indirect_load_ok once we can investigate CTS flakes. + */ + NIR_PASS(_, nir, nir_opt_peephole_select, 64, false, true); + NIR_PASS(_, nir, nir_opt_load_store_vectorize, &(const nir_load_store_vectorize_options){ .modes = nir_var_mem_global | nir_var_mem_constant, @@ -3426,7 +3452,8 @@ agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key, nir_metadata_control_flow, NULL); info->push_count = key->reserved_preamble; - agx_optimize_nir(nir, key->secondary ? NULL : &info->push_count); + agx_optimize_nir(nir, key->dev.soft_fault, + key->secondary ? NULL : &info->push_count); if (nir->info.stage == MESA_SHADER_FRAGMENT) { info->varyings.fs.nr_cf = key->fs.cf_base; diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h index 2049194dedf..c65e2836996 100644 --- a/src/asahi/compiler/agx_compile.h +++ b/src/asahi/compiler/agx_compile.h @@ -190,6 +190,11 @@ struct agx_device_key { * Only used on G13X. */ bool needs_g13x_coherency; + + /* Is soft fault enabled? This is technically system-wide policy set by the + * kernel, but that's functionally a hardware feature. + */ + bool soft_fault; }; struct agx_shader_key { diff --git a/src/asahi/lib/agx_device.h b/src/asahi/lib/agx_device.h index aabb84b66a1..f34b34ec9dd 100644 --- a/src/asahi/lib/agx_device.h +++ b/src/asahi/lib/agx_device.h @@ -133,6 +133,12 @@ struct agx_device { struct agxdecode_ctx *agxdecode; }; +static inline bool +agx_has_soft_fault(struct agx_device *dev) +{ + return dev->params.feat_compat & DRM_ASAHI_FEAT_SOFT_FAULTS; +} + bool agx_open_device(void *memctx, struct agx_device *dev); void agx_close_device(struct agx_device *dev); diff --git a/src/asahi/lib/agx_helpers.h b/src/asahi/lib/agx_helpers.h index 768a5e5c022..af5cedbf1f4 100644 --- a/src/asahi/lib/agx_helpers.h +++ b/src/asahi/lib/agx_helpers.h @@ -244,5 +244,6 @@ agx_gather_device_key(struct agx_device *dev) .needs_g13x_coherency = (dev->params.gpu_generation == 13 && dev->params.num_clusters_total > 1) || dev->params.num_dies > 1, + .soft_fault = agx_has_soft_fault(dev), }; }