diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index 00250ae9d2d..a7228f0ae64 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -2704,14 +2704,40 @@ mem_vectorize_cb(unsigned align_mul, unsigned align_offset, unsigned bit_size, return true; } +static bool +set_speculate(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *_) +{ + if (!nir_intrinsic_has_access(intr)) + return false; + + nir_intrinsic_set_access(intr, + ACCESS_CAN_SPECULATE | nir_intrinsic_access(intr)); + return true; +} + static void -agx_optimize_nir(nir_shader *nir, unsigned *preamble_size) +agx_optimize_nir(nir_shader *nir, bool soft_fault, unsigned *preamble_size) { /* This runs only once up front since other optimizations don't affect it */ NIR_PASS(_, nir, nir_opt_shrink_stores, true); agx_optimize_loop_nir(nir); + /* If soft fault is enabled, we can freely speculate everything. That lets us + * peephole select and form preambles more aggressively. + */ + if (soft_fault) { + NIR_PASS(_, nir, nir_shader_intrinsics_pass, set_speculate, + nir_metadata_control_flow, NULL); + } + + /* Peephole select again after setting the speculate flag but before + * vectorizing. This cleans up short-circuit loads in unrolled loops. + * + * XXX: Set indirect_load_ok once we can investigate CTS flakes. + */ + NIR_PASS(_, nir, nir_opt_peephole_select, 64, false, true); + NIR_PASS(_, nir, nir_opt_load_store_vectorize, &(const nir_load_store_vectorize_options){ .modes = nir_var_mem_global | nir_var_mem_constant, @@ -3426,7 +3452,8 @@ agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key, nir_metadata_control_flow, NULL); info->push_count = key->reserved_preamble; - agx_optimize_nir(nir, key->secondary ? NULL : &info->push_count); + agx_optimize_nir(nir, key->dev.soft_fault, + key->secondary ? NULL : &info->push_count); if (nir->info.stage == MESA_SHADER_FRAGMENT) { info->varyings.fs.nr_cf = key->fs.cf_base; diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h index 2049194dedf..c65e2836996 100644 --- a/src/asahi/compiler/agx_compile.h +++ b/src/asahi/compiler/agx_compile.h @@ -190,6 +190,11 @@ struct agx_device_key { * Only used on G13X. */ bool needs_g13x_coherency; + + /* Is soft fault enabled? This is technically system-wide policy set by the + * kernel, but that's functionally a hardware feature. + */ + bool soft_fault; }; struct agx_shader_key { diff --git a/src/asahi/lib/agx_device.h b/src/asahi/lib/agx_device.h index aabb84b66a1..f34b34ec9dd 100644 --- a/src/asahi/lib/agx_device.h +++ b/src/asahi/lib/agx_device.h @@ -133,6 +133,12 @@ struct agx_device { struct agxdecode_ctx *agxdecode; }; +static inline bool +agx_has_soft_fault(struct agx_device *dev) +{ + return dev->params.feat_compat & DRM_ASAHI_FEAT_SOFT_FAULTS; +} + bool agx_open_device(void *memctx, struct agx_device *dev); void agx_close_device(struct agx_device *dev); diff --git a/src/asahi/lib/agx_helpers.h b/src/asahi/lib/agx_helpers.h index 768a5e5c022..af5cedbf1f4 100644 --- a/src/asahi/lib/agx_helpers.h +++ b/src/asahi/lib/agx_helpers.h @@ -244,5 +244,6 @@ agx_gather_device_key(struct agx_device *dev) .needs_g13x_coherency = (dev->params.gpu_generation == 13 && dev->params.num_clusters_total > 1) || dev->params.num_dies > 1, + .soft_fault = agx_has_soft_fault(dev), }; }