nir/opt_load_store_vectorize: add support for offset_shift
Signed-off-by: Job Noorman <jnoorman@igalia.com> Reviewed-by: Emma Anholt <emma@anholt.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35092>
This commit is contained in:
@@ -861,10 +861,17 @@ vectorize_loads(nir_builder *b, struct vectorize_ctx *ctx,
|
||||
* nir_opt_algebraic() turns them into "i * 16 + 16" */
|
||||
b->cursor = nir_before_instr(first->instr);
|
||||
|
||||
nir_def *new_base = first->intrin->src[info->base_src].ssa;
|
||||
new_base = nir_iadd_imm(b, new_base, -(int)(high_start / 8u / get_offset_scale(first)));
|
||||
|
||||
nir_src_rewrite(&first->intrin->src[info->base_src], new_base);
|
||||
if (nir_intrinsic_has_offset_shift(first->intrin)) {
|
||||
nir_add_io_offset(b, first->intrin, -(int)(high_start / 8u));
|
||||
} else {
|
||||
/* TODO once all intrinsics that need a scaled offset use
|
||||
* offset_shift, this old path can be removed.
|
||||
*/
|
||||
nir_def *new_base = first->intrin->src[info->base_src].ssa;
|
||||
new_base = nir_iadd_imm(
|
||||
b, new_base, -(int)(high_start / 8u / get_offset_scale(first)));
|
||||
nir_src_rewrite(&first->intrin->src[info->base_src], new_base);
|
||||
}
|
||||
}
|
||||
|
||||
/* update the deref */
|
||||
@@ -995,6 +1002,14 @@ vectorize_stores(nir_builder *b, struct vectorize_ctx *ctx,
|
||||
if (second != low && nir_intrinsic_has_base(second->intrin))
|
||||
nir_intrinsic_set_base(second->intrin, nir_intrinsic_base(low->intrin));
|
||||
|
||||
/* update offset_shift: since we use low's offset, we should use its
|
||||
* offset_shift as well.
|
||||
*/
|
||||
if (second != low && nir_intrinsic_has_offset_shift(second->intrin)) {
|
||||
nir_intrinsic_set_offset_shift(second->intrin,
|
||||
nir_intrinsic_offset_shift(low->intrin));
|
||||
}
|
||||
|
||||
second->key = low->key;
|
||||
second->offset = low->offset;
|
||||
|
||||
|
||||
@@ -49,14 +49,14 @@ protected:
|
||||
|
||||
nir_intrinsic_instr *create_indirect_load(nir_variable_mode mode, uint32_t binding, nir_def *offset,
|
||||
uint32_t id, unsigned bit_size=32, unsigned components=1,
|
||||
unsigned access=0);
|
||||
unsigned access=0, unsigned offset_shift=0);
|
||||
void create_indirect_store(nir_variable_mode mode, uint32_t binding, nir_def *offset,
|
||||
uint32_t id, unsigned bit_size=32, unsigned components=1,
|
||||
unsigned wrmask=0xf, unsigned access=0);
|
||||
|
||||
nir_intrinsic_instr *create_load(nir_variable_mode mode, uint32_t binding, uint32_t offset,
|
||||
uint32_t id, unsigned bit_size=32, unsigned components=1,
|
||||
unsigned access=0);
|
||||
unsigned access=0, unsigned offset_shift=0);
|
||||
void create_store(nir_variable_mode mode, uint32_t binding, uint32_t offset,
|
||||
uint32_t id, unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf,
|
||||
unsigned access=0);
|
||||
@@ -181,7 +181,7 @@ nir_load_store_vectorize_test::get_resource(uint32_t binding, bool ssbo)
|
||||
nir_intrinsic_instr *
|
||||
nir_load_store_vectorize_test::create_indirect_load(
|
||||
nir_variable_mode mode, uint32_t binding, nir_def *offset, uint32_t id,
|
||||
unsigned bit_size, unsigned components, unsigned access)
|
||||
unsigned bit_size, unsigned components, unsigned access, unsigned offset_shift)
|
||||
{
|
||||
nir_intrinsic_op intrinsic;
|
||||
nir_def *res = NULL;
|
||||
@@ -230,6 +230,12 @@ nir_load_store_vectorize_test::create_indirect_load(
|
||||
}
|
||||
}
|
||||
|
||||
if (nir_intrinsic_has_offset_shift(load)) {
|
||||
nir_intrinsic_set_offset_shift(load, offset_shift);
|
||||
} else {
|
||||
assert(offset_shift == 0);
|
||||
}
|
||||
|
||||
nir_builder_instr_insert(b, &load->instr);
|
||||
nir_alu_instr *mov = nir_instr_as_alu(nir_mov(b, &load->def)->parent_instr);
|
||||
movs[id] = mov;
|
||||
@@ -281,9 +287,9 @@ nir_load_store_vectorize_test::create_indirect_store(
|
||||
nir_intrinsic_instr *
|
||||
nir_load_store_vectorize_test::create_load(
|
||||
nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id,
|
||||
unsigned bit_size, unsigned components, unsigned access)
|
||||
unsigned bit_size, unsigned components, unsigned access, unsigned offset_shift)
|
||||
{
|
||||
return create_indirect_load(mode, binding, nir_imm_int(b, offset), id, bit_size, components, access);
|
||||
return create_indirect_load(mode, binding, nir_imm_int(b, offset), id, bit_size, components, access, offset_shift);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -2272,3 +2278,93 @@ TEST_F(nir_load_store_vectorize_test, ubo_vec3_hole1_vec3)
|
||||
EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xyz");
|
||||
EXPECT_INSTR_SWIZZLES(movs[0x2], load, "efg");
|
||||
}
|
||||
|
||||
TEST_F(nir_load_store_vectorize_test, ssbo_shifted_same_shift_adjacent)
|
||||
{
|
||||
/* byte offset = 4..16*/
|
||||
create_load(nir_var_mem_ssbo, 0, 1, 0x1, 32, 3, 0, 2);
|
||||
/* byte offset = 16..20*/
|
||||
create_load(nir_var_mem_ssbo, 0, 4, 0x2, 32, 1, 0, 2);
|
||||
|
||||
nir_validate_shader(b->shader, NULL);
|
||||
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
|
||||
|
||||
EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
|
||||
|
||||
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
|
||||
|
||||
nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
|
||||
ASSERT_EQ(load->def.bit_size, 32);
|
||||
ASSERT_EQ(load->def.num_components, 4);
|
||||
ASSERT_EQ(nir_intrinsic_offset_shift(load), 2);
|
||||
ASSERT_EQ(nir_def_components_read(&load->def), 0xf);
|
||||
ASSERT_EQ(nir_src_as_uint(load->src[1]), 1);
|
||||
EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xyz");
|
||||
EXPECT_INSTR_SWIZZLES(movs[0x2], load, "w");
|
||||
}
|
||||
|
||||
TEST_F(nir_load_store_vectorize_test, ssbo_shifted_different_shift_adjacent)
|
||||
{
|
||||
/* byte offset = 4..8*/
|
||||
create_load(nir_var_mem_ssbo, 0, 1, 0x1, 32, 1, 0, 2);
|
||||
/* byte offset = 8..12*/
|
||||
create_load(nir_var_mem_ssbo, 0, 4, 0x2, 32, 1, 0, 1);
|
||||
|
||||
nir_validate_shader(b->shader, NULL);
|
||||
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
|
||||
|
||||
EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
|
||||
|
||||
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
|
||||
|
||||
nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
|
||||
ASSERT_EQ(load->def.bit_size, 32);
|
||||
ASSERT_EQ(load->def.num_components, 2);
|
||||
ASSERT_EQ(nir_intrinsic_offset_shift(load), 2);
|
||||
ASSERT_EQ(nir_def_components_read(&load->def), 0x3);
|
||||
ASSERT_EQ(nir_src_as_uint(load->src[1]), 1);
|
||||
EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x");
|
||||
EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y");
|
||||
}
|
||||
|
||||
TEST_F(nir_load_store_vectorize_test, ssbo_shifted_same_shift_non_adjacent)
|
||||
{
|
||||
/* byte offset = 0..4*/
|
||||
create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 1, 0, 2);
|
||||
/* byte offset = 16..20*/
|
||||
create_load(nir_var_mem_ssbo, 0, 4, 0x2, 32, 1, 0, 2);
|
||||
|
||||
nir_validate_shader(b->shader, NULL);
|
||||
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
|
||||
|
||||
EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
|
||||
|
||||
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
|
||||
}
|
||||
|
||||
TEST_F(nir_load_store_vectorize_test, ssbo_shifted_different_bit_size_adjacent)
|
||||
{
|
||||
/* byte offset = 0..4*/
|
||||
create_load(nir_var_mem_ssbo, 0, 0, 0x1, 16, 2, 0, 1);
|
||||
/* byte offset = 4..8*/
|
||||
create_load(nir_var_mem_ssbo, 0, 1, 0x2, 32, 1, 0, 2);
|
||||
|
||||
nir_validate_shader(b->shader, NULL);
|
||||
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2);
|
||||
|
||||
EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo));
|
||||
|
||||
ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1);
|
||||
|
||||
nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0);
|
||||
ASSERT_EQ(load->def.bit_size, 16);
|
||||
ASSERT_EQ(load->def.num_components, 4);
|
||||
ASSERT_EQ(nir_intrinsic_offset_shift(load), 1);
|
||||
ASSERT_EQ(nir_def_components_read(&load->def), 0xf);
|
||||
ASSERT_EQ(nir_src_as_uint(load->src[1]), 0);
|
||||
EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy");
|
||||
nir_instr *mov2_src = movs[0x2]->src[0].src.ssa->parent_instr;
|
||||
ASSERT_TRUE(test_alu(mov2_src, nir_op_pack_32_2x16));
|
||||
nir_alu_instr *pack = nir_instr_as_alu(mov2_src);
|
||||
EXPECT_INSTR_SWIZZLES(pack, load, "zw");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user