From f482fc33cf0dcc372410256c72cfa09633eec056 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 17 Jun 2024 11:47:42 +0300 Subject: [PATCH] brw: blockify load_global_const_block_intel This intrinsic is pretty much equivalent to load_global_constant_uniform_block_intel, it just has a predicate. If the predicate is always true we can turn into into the other. Signed-off-by: Lionel Landwerlin Reviewed-by: Kenneth Graunke Part-of: --- .../intel_nir_blockify_uniform_loads.c | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/intel/compiler/intel_nir_blockify_uniform_loads.c b/src/intel/compiler/intel_nir_blockify_uniform_loads.c index 75f1a7a921b..11816a4aa5a 100644 --- a/src/intel/compiler/intel_nir_blockify_uniform_loads.c +++ b/src/intel/compiler/intel_nir_blockify_uniform_loads.c @@ -106,6 +106,39 @@ intel_nir_blockify_uniform_loads_instr(nir_builder *b, intrin->intrinsic = nir_intrinsic_load_global_constant_uniform_block_intel; return true; + case nir_intrinsic_load_global_const_block_intel: + /* Only deal with the simple predication true case */ + if (!nir_src_is_const(intrin->src[1]) || + nir_src_as_uint(intrin->src[1]) == 0) + return false; + + if (nir_src_is_divergent(intrin->src[0])) + return false; + + if (intrin->def.bit_size != 32) + return false; + + /* Without the LSC, we can only do block loads of at least 4dwords (1 + * oword). + */ + if (!devinfo->has_lsc && intrin->def.num_components < 4) + return false; + + b->cursor = nir_before_instr(&intrin->instr); + nir_def *def = + nir_load_global_constant_uniform_block_intel( + b, + intrin->def.num_components, + intrin->def.bit_size, + intrin->src[0].ssa, + .access = ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER, + .align_mul = 4, + .align_offset = 4); + + nir_def_rewrite_uses(&intrin->def, def); + nir_instr_remove(&intrin->instr); + return true; + default: return false; }