diff --git a/src/compiler/nir/nir_opt_load_store_vectorize.c b/src/compiler/nir/nir_opt_load_store_vectorize.c index 9466c89330f..3390fadb912 100644 --- a/src/compiler/nir/nir_opt_load_store_vectorize.c +++ b/src/compiler/nir/nir_opt_load_store_vectorize.c @@ -861,10 +861,17 @@ vectorize_loads(nir_builder *b, struct vectorize_ctx *ctx, * nir_opt_algebraic() turns them into "i * 16 + 16" */ b->cursor = nir_before_instr(first->instr); - nir_def *new_base = first->intrin->src[info->base_src].ssa; - new_base = nir_iadd_imm(b, new_base, -(int)(high_start / 8u / get_offset_scale(first))); - - nir_src_rewrite(&first->intrin->src[info->base_src], new_base); + if (nir_intrinsic_has_offset_shift(first->intrin)) { + nir_add_io_offset(b, first->intrin, -(int)(high_start / 8u)); + } else { + /* TODO once all intrinsics that need a scaled offset use + * offset_shift, this old path can be removed. + */ + nir_def *new_base = first->intrin->src[info->base_src].ssa; + new_base = nir_iadd_imm( + b, new_base, -(int)(high_start / 8u / get_offset_scale(first))); + nir_src_rewrite(&first->intrin->src[info->base_src], new_base); + } } /* update the deref */ @@ -995,6 +1002,14 @@ vectorize_stores(nir_builder *b, struct vectorize_ctx *ctx, if (second != low && nir_intrinsic_has_base(second->intrin)) nir_intrinsic_set_base(second->intrin, nir_intrinsic_base(low->intrin)); + /* update offset_shift: since we use low's offset, we should use its + * offset_shift as well. + */ + if (second != low && nir_intrinsic_has_offset_shift(second->intrin)) { + nir_intrinsic_set_offset_shift(second->intrin, + nir_intrinsic_offset_shift(low->intrin)); + } + second->key = low->key; second->offset = low->offset; diff --git a/src/compiler/nir/tests/load_store_vectorizer_tests.cpp b/src/compiler/nir/tests/load_store_vectorizer_tests.cpp index 633acc87824..a1962d695f0 100644 --- a/src/compiler/nir/tests/load_store_vectorizer_tests.cpp +++ b/src/compiler/nir/tests/load_store_vectorizer_tests.cpp @@ -49,14 +49,14 @@ protected: nir_intrinsic_instr *create_indirect_load(nir_variable_mode mode, uint32_t binding, nir_def *offset, uint32_t id, unsigned bit_size=32, unsigned components=1, - unsigned access=0); + unsigned access=0, unsigned offset_shift=0); void create_indirect_store(nir_variable_mode mode, uint32_t binding, nir_def *offset, uint32_t id, unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf, unsigned access=0); nir_intrinsic_instr *create_load(nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id, unsigned bit_size=32, unsigned components=1, - unsigned access=0); + unsigned access=0, unsigned offset_shift=0); void create_store(nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id, unsigned bit_size=32, unsigned components=1, unsigned wrmask=0xf, unsigned access=0); @@ -181,7 +181,7 @@ nir_load_store_vectorize_test::get_resource(uint32_t binding, bool ssbo) nir_intrinsic_instr * nir_load_store_vectorize_test::create_indirect_load( nir_variable_mode mode, uint32_t binding, nir_def *offset, uint32_t id, - unsigned bit_size, unsigned components, unsigned access) + unsigned bit_size, unsigned components, unsigned access, unsigned offset_shift) { nir_intrinsic_op intrinsic; nir_def *res = NULL; @@ -230,6 +230,12 @@ nir_load_store_vectorize_test::create_indirect_load( } } + if (nir_intrinsic_has_offset_shift(load)) { + nir_intrinsic_set_offset_shift(load, offset_shift); + } else { + assert(offset_shift == 0); + } + nir_builder_instr_insert(b, &load->instr); nir_alu_instr *mov = nir_instr_as_alu(nir_mov(b, &load->def)->parent_instr); movs[id] = mov; @@ -281,9 +287,9 @@ nir_load_store_vectorize_test::create_indirect_store( nir_intrinsic_instr * nir_load_store_vectorize_test::create_load( nir_variable_mode mode, uint32_t binding, uint32_t offset, uint32_t id, - unsigned bit_size, unsigned components, unsigned access) + unsigned bit_size, unsigned components, unsigned access, unsigned offset_shift) { - return create_indirect_load(mode, binding, nir_imm_int(b, offset), id, bit_size, components, access); + return create_indirect_load(mode, binding, nir_imm_int(b, offset), id, bit_size, components, access, offset_shift); } void @@ -2272,3 +2278,93 @@ TEST_F(nir_load_store_vectorize_test, ubo_vec3_hole1_vec3) EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xyz"); EXPECT_INSTR_SWIZZLES(movs[0x2], load, "efg"); } + +TEST_F(nir_load_store_vectorize_test, ssbo_shifted_same_shift_adjacent) +{ + /* byte offset = 4..16*/ + create_load(nir_var_mem_ssbo, 0, 1, 0x1, 32, 3, 0, 2); + /* byte offset = 16..20*/ + create_load(nir_var_mem_ssbo, 0, 4, 0x2, 32, 1, 0, 2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0); + ASSERT_EQ(load->def.bit_size, 32); + ASSERT_EQ(load->def.num_components, 4); + ASSERT_EQ(nir_intrinsic_offset_shift(load), 2); + ASSERT_EQ(nir_def_components_read(&load->def), 0xf); + ASSERT_EQ(nir_src_as_uint(load->src[1]), 1); + EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xyz"); + EXPECT_INSTR_SWIZZLES(movs[0x2], load, "w"); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_shifted_different_shift_adjacent) +{ + /* byte offset = 4..8*/ + create_load(nir_var_mem_ssbo, 0, 1, 0x1, 32, 1, 0, 2); + /* byte offset = 8..12*/ + create_load(nir_var_mem_ssbo, 0, 4, 0x2, 32, 1, 0, 1); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0); + ASSERT_EQ(load->def.bit_size, 32); + ASSERT_EQ(load->def.num_components, 2); + ASSERT_EQ(nir_intrinsic_offset_shift(load), 2); + ASSERT_EQ(nir_def_components_read(&load->def), 0x3); + ASSERT_EQ(nir_src_as_uint(load->src[1]), 1); + EXPECT_INSTR_SWIZZLES(movs[0x1], load, "x"); + EXPECT_INSTR_SWIZZLES(movs[0x2], load, "y"); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_shifted_same_shift_non_adjacent) +{ + /* byte offset = 0..4*/ + create_load(nir_var_mem_ssbo, 0, 0, 0x1, 32, 1, 0, 2); + /* byte offset = 16..20*/ + create_load(nir_var_mem_ssbo, 0, 4, 0x2, 32, 1, 0, 2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); +} + +TEST_F(nir_load_store_vectorize_test, ssbo_shifted_different_bit_size_adjacent) +{ + /* byte offset = 0..4*/ + create_load(nir_var_mem_ssbo, 0, 0, 0x1, 16, 2, 0, 1); + /* byte offset = 4..8*/ + create_load(nir_var_mem_ssbo, 0, 1, 0x2, 32, 1, 0, 2); + + nir_validate_shader(b->shader, NULL); + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 2); + + EXPECT_TRUE(run_vectorizer(nir_var_mem_ssbo)); + + ASSERT_EQ(count_intrinsics(nir_intrinsic_load_ssbo), 1); + + nir_intrinsic_instr *load = get_intrinsic(nir_intrinsic_load_ssbo, 0); + ASSERT_EQ(load->def.bit_size, 16); + ASSERT_EQ(load->def.num_components, 4); + ASSERT_EQ(nir_intrinsic_offset_shift(load), 1); + ASSERT_EQ(nir_def_components_read(&load->def), 0xf); + ASSERT_EQ(nir_src_as_uint(load->src[1]), 0); + EXPECT_INSTR_SWIZZLES(movs[0x1], load, "xy"); + nir_instr *mov2_src = movs[0x2]->src[0].src.ssa->parent_instr; + ASSERT_TRUE(test_alu(mov2_src, nir_op_pack_32_2x16)); + nir_alu_instr *pack = nir_instr_as_alu(mov2_src); + EXPECT_INSTR_SWIZZLES(pack, load, "zw"); +}