nir/opt_load_store_vectorize: add support for offset_shift

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35092>
This commit is contained in:
Job Noorman
2025-08-19 10:41:46 +02:00
committed by Marge Bot
parent 249e27c9c7
commit cb773dec8c
2 changed files with 120 additions and 9 deletions
@@ -861,10 +861,17 @@ vectorize_loads(nir_builder *b, struct vectorize_ctx *ctx,
* nir_opt_algebraic() turns them into "i * 16 + 16" */
b->cursor = nir_before_instr(first->instr);
nir_def *new_base = first->intrin->src[info->base_src].ssa;
new_base = nir_iadd_imm(b, new_base, -(int)(high_start / 8u / get_offset_scale(first)));
nir_src_rewrite(&first->intrin->src[info->base_src], new_base);
if (nir_intrinsic_has_offset_shift(first->intrin)) {
nir_add_io_offset(b, first->intrin, -(int)(high_start / 8u));
} else {
/* TODO once all intrinsics that need a scaled offset use
* offset_shift, this old path can be removed.
*/
nir_def *new_base = first->intrin->src[info->base_src].ssa;
new_base = nir_iadd_imm(
b, new_base, -(int)(high_start / 8u / get_offset_scale(first)));
nir_src_rewrite(&first->intrin->src[info->base_src], new_base);
}
}
/* update the deref */
@@ -995,6 +1002,14 @@ vectorize_stores(nir_builder *b, struct vectorize_ctx *ctx,
if (second != low && nir_intrinsic_has_base(second->intrin))
nir_intrinsic_set_base(second->intrin, nir_intrinsic_base(low->intrin));
/* update offset_shift: since we use low's offset, we should use its
* offset_shift as well.
*/
if (second != low && nir_intrinsic_has_offset_shift(second->intrin)) {
nir_intrinsic_set_offset_shift(second->intrin,
nir_intrinsic_offset_shift(low->intrin));
}
second->key = low->key;
second->offset = low->offset;
@@ -49,14 +49,14 @@ protected:
nir_intrinsic_instr *create_indirect_load(nir_variable_mode mode, uint32_t binding, nir_def *offset,
uint32_t id, unsigned bit_size=32, unsigned components=1,
unsigned access=0);
unsigned access=0, unsigned offset_shift=0);
void create_indirect_store(nir_variable_mode mode, uint32_t binding, nir_def *offset,
uint32_t id, unsigned bit_size=32, unsigned components=1,
unsigned wrmask=0xf, unsigned access=0);
nir_intrinsic_instr *create_load(nir_variable_mode mode, uint32_t binding, uint32_t offset,
uint32_t id, unsigned bit_size=32, unsigned components=1,
unsigned access=0);
unsigned access=0, unsigned offset_shift=0);
void create_store(nir_variable_mode mode, uint32_t binding, uint32_t offset,
uint32_t id, unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf,
unsigned access=0);
@@ -181,7 +181,7 @@ nir_load_store_vectorize_test::get_resource(uint32_t binding, bool ssbo)
nir_intrinsic_instr *
nir_load_store_vectorize_test::create_indirect_load(
nir_variable_mode mode, uint32_t binding, nir_def *offset, uint32_t id,
unsigned bit_size, unsigned components, unsigned access)
unsigned bit_size, unsigned components, unsigned access, unsigned offset_shift)
{
nir_intrinsic_op intrinsic;
nir_def *res = NULL;
@@ -230,6 +230,12 @@ nir_load_store_vectorize_test::create_indirect_load(
}
}
if (nir_intrinsic_has_offset_shift(load)) {
nir_intrinsic_set_offset_shift(load, offset_shift);
} else {
assert(offset_shift == 0);
}
nir_builder_instr_insert(b, &load->instr);
nir_alu_instr *mov = nir_instr_as_alu(nir_mov(b, &load->def)->parent_instr);
movs[id] = mov;
@@ -281,9 +287,9 @@ nir_load_store_vectorize_test::create_indirect_store(
nir_intrinsic_instr *
nir_load_store_vectorize_test::create_load(
nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
unsigned bit_size, unsigned components, unsigned access)
unsigned bit_size, unsigned components, unsigned access, unsigned offset_shift)
{
return create_indirect_load(mode, binding, nir_imm_int(b, offset), id, bit_size, components, access);
return create_indirect_load(mode, binding, nir_imm_int(b, offset), id, bit_size, components, access, offset_shift);
}
void
@@ -2272,3 +2278,93 @@ TEST_F(nir_load_store_vectorize_test, ubo_vec3_hole1_vec3)
EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xyz");
EXPECT_INSTR_SWIZZLES(movs[0x2], load, "efg");
}
TEST_F(nir_load_store_vectorize_test, ssbo_shifted_same_shift_adjacent)
{
/* byte offset = 4..16*/
create_load(nir_var_mem_ssbo, 0, 1, 0x1, 32, 3, 0, 2);
/* byte offset = 16..20*/
create_load(nir_var_mem_ssbo, 0, 4, 0x2, 32, 1, 0, 2);
nir_validate_shader(b->shader, NULL);
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
ASSERT_EQ(load->def.bit_size, 32);
ASSERT_EQ(load->def.num_components, 4);
ASSERT_EQ(nir_intrinsic_offset_shift(load), 2);
ASSERT_EQ(nir_def_components_read(&load->def), 0xf);
ASSERT_EQ(nir_src_as_uint(load->src[1]), 1);
EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xyz");
EXPECT_INSTR_SWIZZLES(movs[0x2], load, "w");
}
TEST_F(nir_load_store_vectorize_test, ssbo_shifted_different_shift_adjacent)
{
/* byte offset = 4..8*/
create_load(nir_var_mem_ssbo, 0, 1, 0x1, 32, 1, 0, 2);
/* byte offset = 8..12*/
create_load(nir_var_mem_ssbo, 0, 4, 0x2, 32, 1, 0, 1);
nir_validate_shader(b->shader, NULL);
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
ASSERT_EQ(load->def.bit_size, 32);
ASSERT_EQ(load->def.num_components, 2);
ASSERT_EQ(nir_intrinsic_offset_shift(load), 2);
ASSERT_EQ(nir_def_components_read(&load->def), 0x3);
ASSERT_EQ(nir_src_as_uint(load->src[1]), 1);
EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
}
TEST_F(nir_load_store_vectorize_test, ssbo_shifted_same_shift_non_adjacent)
{
/* byte offset = 0..4*/
create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 1, 0, 2);
/* byte offset = 16..20*/
create_load(nir_var_mem_ssbo, 0, 4, 0x2, 32, 1, 0, 2);
nir_validate_shader(b->shader, NULL);
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
}
TEST_F(nir_load_store_vectorize_test, ssbo_shifted_different_bit_size_adjacent)
{
/* byte offset = 0..4*/
create_load(nir_var_mem_ssbo, 0, 0, 0x1, 16, 2, 0, 1);
/* byte offset = 4..8*/
create_load(nir_var_mem_ssbo, 0, 1, 0x2, 32, 1, 0, 2);
nir_validate_shader(b->shader, NULL);
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
ASSERT_EQ(load->def.bit_size, 16);
ASSERT_EQ(load->def.num_components, 4);
ASSERT_EQ(nir_intrinsic_offset_shift(load), 1);
ASSERT_EQ(nir_def_components_read(&load->def), 0xf);
ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
nir_instr *mov2_src = movs[0x2]->src[0].src.ssa->parent_instr;
ASSERT_TRUE(test_alu(mov2_src, nir_op_pack_32_2x16));
nir_alu_instr *pack = nir_instr_as_alu(mov2_src);
EXPECT_INSTR_SWIZZLES(pack, load, "zw");
}