agx: exploit soft fault info
for preambles and for peephole selection. total instructions in shared programs: 2159359 -> 2114124 (-2.09%) instructions in affected programs: 359763 -> 314528 (-12.57%) helped: 814 HURT: 6 Instructions are helped. total alu in shared programs: 1685059 -> 1670200 (-0.88%) alu in affected programs: 217210 -> 202351 (-6.84%) helped: 589 HURT: 45 Alu are helped. total fscib in shared programs: 1681202 -> 1666324 (-0.88%) fscib in affected programs: 217477 -> 202599 (-6.84%) helped: 590 HURT: 45 Fscib are helped. total ic in shared programs: 460856 -> 455502 (-1.16%) ic in affected programs: 41350 -> 35996 (-12.95%) helped: 174 HURT: 8 Ic are helped. total bytes in shared programs: 14302484 -> 14053982 (-1.74%) bytes in affected programs: 2380614 -> 2132112 (-10.44%) helped: 814 HURT: 7 Bytes are helped. total regs in shared programs: 662302 -> 656517 (-0.87%) regs in affected programs: 26979 -> 21194 (-21.44%) helped: 432 HURT: 9 Regs are helped. total uniforms in shared programs: 1651909 -> 1687077 (2.13%) uniforms in affected programs: 95383 -> 130551 (36.87%) helped: 17 HURT: 783 Uniforms are HURT. total threads in shared programs: 20324608 -> 20326592 (<.01%) threads in affected programs: 16192 -> 18176 (12.25%) helped: 17 HURT: 3 Threads are helped. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30633>
This commit is contained in:
@@ -2704,14 +2704,40 @@ mem_vectorize_cb(unsigned align_mul, unsigned align_offset, unsigned bit_size,
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
set_speculate(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *_)
|
||||
{
|
||||
if (!nir_intrinsic_has_access(intr))
|
||||
return false;
|
||||
|
||||
nir_intrinsic_set_access(intr,
|
||||
ACCESS_CAN_SPECULATE | nir_intrinsic_access(intr));
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
agx_optimize_nir(nir_shader *nir, unsigned *preamble_size)
|
||||
agx_optimize_nir(nir_shader *nir, bool soft_fault, unsigned *preamble_size)
|
||||
{
|
||||
/* This runs only once up front since other optimizations don't affect it */
|
||||
NIR_PASS(_, nir, nir_opt_shrink_stores, true);
|
||||
|
||||
agx_optimize_loop_nir(nir);
|
||||
|
||||
/* If soft fault is enabled, we can freely speculate everything. That lets us
|
||||
* peephole select and form preambles more aggressively.
|
||||
*/
|
||||
if (soft_fault) {
|
||||
NIR_PASS(_, nir, nir_shader_intrinsics_pass, set_speculate,
|
||||
nir_metadata_control_flow, NULL);
|
||||
}
|
||||
|
||||
/* Peephole select again after setting the speculate flag but before
|
||||
* vectorizing. This cleans up short-circuit loads in unrolled loops.
|
||||
*
|
||||
* XXX: Set indirect_load_ok once we can investigate CTS flakes.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_opt_peephole_select, 64, false, true);
|
||||
|
||||
NIR_PASS(_, nir, nir_opt_load_store_vectorize,
|
||||
&(const nir_load_store_vectorize_options){
|
||||
.modes = nir_var_mem_global | nir_var_mem_constant,
|
||||
@@ -3426,7 +3452,8 @@ agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key,
|
||||
nir_metadata_control_flow, NULL);
|
||||
|
||||
info->push_count = key->reserved_preamble;
|
||||
agx_optimize_nir(nir, key->secondary ? NULL : &info->push_count);
|
||||
agx_optimize_nir(nir, key->dev.soft_fault,
|
||||
key->secondary ? NULL : &info->push_count);
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
info->varyings.fs.nr_cf = key->fs.cf_base;
|
||||
|
||||
@@ -190,6 +190,11 @@ struct agx_device_key {
|
||||
* Only used on G13X.
|
||||
*/
|
||||
bool needs_g13x_coherency;
|
||||
|
||||
/* Is soft fault enabled? This is technically system-wide policy set by the
|
||||
* kernel, but that's functionally a hardware feature.
|
||||
*/
|
||||
bool soft_fault;
|
||||
};
|
||||
|
||||
struct agx_shader_key {
|
||||
|
||||
@@ -133,6 +133,12 @@ struct agx_device {
|
||||
struct agxdecode_ctx *agxdecode;
|
||||
};
|
||||
|
||||
static inline bool
|
||||
agx_has_soft_fault(struct agx_device *dev)
|
||||
{
|
||||
return dev->params.feat_compat & DRM_ASAHI_FEAT_SOFT_FAULTS;
|
||||
}
|
||||
|
||||
bool agx_open_device(void *memctx, struct agx_device *dev);
|
||||
|
||||
void agx_close_device(struct agx_device *dev);
|
||||
|
||||
@@ -244,5 +244,6 @@ agx_gather_device_key(struct agx_device *dev)
|
||||
.needs_g13x_coherency = (dev->params.gpu_generation == 13 &&
|
||||
dev->params.num_clusters_total > 1) ||
|
||||
dev->params.num_dies > 1,
|
||||
.soft_fault = agx_has_soft_fault(dev),
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user