From 2d597b6919fefa7659e89e89966c0d448d6fdbdb Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 13 Aug 2025 14:13:20 +0100 Subject: [PATCH] nir/load_store_vectorize: use nir_def_num_lsb_zero in calc_alignment fossil-db (gfx1201): Totals from 20 (0.03% of 79839) affected shaders: Instrs: 15370 -> 15251 (-0.77%) CodeSize: 89764 -> 88952 (-0.90%) Latency: 150295 -> 149963 (-0.22%) InvThroughput: 210291 -> 210105 (-0.09%) Copies: 1337 -> 1320 (-1.27%) PreVGPRs: 589 -> 590 (+0.17%) VALU: 7519 -> 7466 (-0.70%) Signed-off-by: Rhys Perry Reviewed-by: Georg Lehmann Part-of: --- .../nir/nir_opt_load_store_vectorize.c | 36 +++++++++++-------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/src/compiler/nir/nir_opt_load_store_vectorize.c b/src/compiler/nir/nir_opt_load_store_vectorize.c index 08a39795f34..cd171096d55 100644 --- a/src/compiler/nir/nir_opt_load_store_vectorize.c +++ b/src/compiler/nir/nir_opt_load_store_vectorize.c @@ -581,7 +581,7 @@ aliasing_modes(nir_variable_mode modes) } static void -calc_alignment(struct entry *entry) +calc_alignment(struct vectorize_ctx *ctx, struct entry *entry) { if (entry->intrin->intrinsic == nir_intrinsic_load_buffer_amd || entry->intrin->intrinsic == nir_intrinsic_store_buffer_amd) { @@ -595,8 +595,12 @@ calc_alignment(struct entry *entry) uint32_t align_mul = 31; for (unsigned i = 0; i < entry->key->offset_def_count; i++) { - if (entry->key->offset_defs_mul[i]) - align_mul = MIN2(align_mul, ffsll(entry->key->offset_defs_mul[i])); + unsigned lsb_zero = nir_def_num_lsb_zero(ctx->numlsb_ht, entry->key->offset_defs[i]); + if (lsb_zero == 64) + continue; + uint64_t stride = entry->key->offset_defs_mul[i] << lsb_zero; + if (stride) + align_mul = MIN2(align_mul, ffsll(stride)); } entry->align_mul = 1u << (align_mul - 1); @@ -610,7 +614,7 @@ calc_alignment(struct entry *entry) } static struct entry * -create_entry(void *mem_ctx, +create_entry(void *mem_ctx, struct vectorize_ctx *ctx, const struct intrinsic_info *info, nir_intrinsic_instr *intrin) { @@ -672,7 +676,7 @@ create_entry(void *mem_ctx, if (get_variable_mode(entry) & restrict_modes) entry->access |= ACCESS_RESTRICT; - calc_alignment(entry); + calc_alignment(ctx, entry); return entry; } @@ -1660,7 +1664,7 @@ process_block(nir_function_impl *impl, struct vectorize_ctx *ctx, nir_block *blo unsigned mode_index = mode_to_index(mode); /* create entry */ - struct entry *entry = create_entry(ctx, info, intrin); + struct entry *entry = create_entry(ctx, ctx, info, intrin); entry->index = next_index++; list_addtail(&entry->head, &ctx->entries[mode_index]); @@ -1730,7 +1734,7 @@ nir_opt_load_store_vectorize(nir_shader *shader, const nir_load_store_vectorize_ static bool opt_load_store_update_alignments_callback(struct nir_builder *b, nir_intrinsic_instr *intrin, - UNUSED void *s) + void *s) { if (!nir_intrinsic_has_align_mul(intrin)) return false; @@ -1739,7 +1743,7 @@ opt_load_store_update_alignments_callback(struct nir_builder *b, if (!info) return false; - struct entry *entry = create_entry(NULL, info, intrin); + struct entry *entry = create_entry(NULL, s, info, intrin); const bool progress = update_align(entry); ralloc_free(entry); @@ -1749,10 +1753,14 @@ opt_load_store_update_alignments_callback(struct nir_builder *b, bool nir_opt_load_store_update_alignments(nir_shader *shader) { - return nir_shader_intrinsics_pass(shader, - opt_load_store_update_alignments_callback, - nir_metadata_control_flow | - nir_metadata_live_defs | - nir_metadata_instr_index, - NULL); + struct vectorize_ctx ctx; + ctx.numlsb_ht = _mesa_pointer_hash_table_create(NULL); + bool progress = nir_shader_intrinsics_pass(shader, + opt_load_store_update_alignments_callback, + nir_metadata_control_flow | + nir_metadata_live_defs | + nir_metadata_instr_index, + &ctx); + ralloc_free(ctx.numlsb_ht); + return progress; }