agx: Use nir_lower_mem_access_bit_sizes

Lowers away 64-bit loads, which we'll create in the sysval lowering for
dynamically indexed UBOs/VBOs. The lowering generates pack_64_2x32 instructions,
so lower those too.

No shader-db changes.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21674>
This commit is contained in:
Alyssa Rosenzweig
2023-02-25 22:29:42 -05:00
committed by Marge Bot
parent 8a53050d7d
commit b768a254f7
2 changed files with 31 additions and 0 deletions
+30
View File
@@ -2094,6 +2094,27 @@ agx_fp32_varying_mask(nir_shader *nir)
return mask;
}
static nir_mem_access_size_align
mem_access_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes, uint32_t align,
uint32_t align_offset, bool offset_is_const,
const void *cb_data)
{
align = nir_combined_align(align, align_offset);
assert(util_is_power_of_two_nonzero(align));
unsigned bit_size = (bytes & 1) ? 8 : (bytes & 2) ? 16 : 32;
if (align == 2)
bit_size = MIN2(bit_size, 16);
else if (align == 1)
bit_size = 8;
return (nir_mem_access_size_align){
.num_components = bytes / (bit_size / 8),
.bit_size = bit_size,
.align = bit_size / 8,
};
}
static bool
agx_should_dump(nir_shader *nir, unsigned agx_dbg_bit)
{
@@ -2354,6 +2375,15 @@ agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key,
out->depth_layout = layout;
}
/* Late sysval lowering creates large loads. Load lowering creates unpacks */
NIR_PASS_V(nir, nir_lower_mem_access_bit_sizes,
nir_var_mem_ssbo | nir_var_mem_constant |
nir_var_mem_task_payload | nir_var_shader_temp |
nir_var_function_temp | nir_var_mem_global |
nir_var_mem_shared,
mem_access_size_align_cb, NULL);
NIR_PASS_V(nir, nir_lower_pack);
/* Late blend lowering creates vectors */
NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
NIR_PASS_V(nir, nir_lower_load_const_to_scalar);
+1
View File
@@ -204,6 +204,7 @@ static const nir_shader_compiler_options agx_nir_options = {
.lower_fdph = true,
.lower_ffract = true,
.lower_pack_half_2x16 = true,
.lower_pack_64_2x32 = true,
.lower_unpack_half_2x16 = true,
.lower_extract_byte = true,
.lower_insert_byte = true,