broadcom/compiler: only handle load_uniform explicitly in v3d_nir_lower_load_store_bitsize

Also use nir_get_io_offset_src_number while at it.

Acked-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29711>
This commit is contained in:
Karol Herbst
2024-06-13 13:55:39 +02:00
committed by Marge Bot
parent a2eff2b9f9
commit 75196e86f1

View File

@@ -33,40 +33,6 @@
* which we do support.
*/
static int
value_src(nir_intrinsic_op intrinsic)
{
switch (intrinsic) {
case nir_intrinsic_store_ssbo:
case nir_intrinsic_store_scratch:
case nir_intrinsic_store_global_2x32:
return 0;
default:
unreachable("Unsupported intrinsic");
}
}
static int
offset_src(nir_intrinsic_op intrinsic)
{
switch (intrinsic) {
case nir_intrinsic_load_uniform:
case nir_intrinsic_load_shared:
case nir_intrinsic_load_scratch:
case nir_intrinsic_load_global_2x32:
return 0;
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ssbo:
case nir_intrinsic_store_scratch:
case nir_intrinsic_store_global_2x32:
return 1;
case nir_intrinsic_store_ssbo:
return 2;
default:
unreachable("Unsupported intrinsic");
}
}
static nir_intrinsic_instr *
init_scalar_intrinsic(nir_builder *b,
nir_intrinsic_instr *intr,
@@ -83,30 +49,12 @@ init_scalar_intrinsic(nir_builder *b,
const int offset_units = bit_size / 8;
assert(offset_units >= 1);
if (nir_intrinsic_has_align_mul(intr)) {
assert(nir_intrinsic_has_align_offset(intr));
unsigned align_mul = nir_intrinsic_align_mul(intr);
unsigned align_off = nir_intrinsic_align_offset(intr);
align_off += offset_units * component;
align_off = align_off % align_mul;
nir_intrinsic_set_align(new_intr, align_mul, align_off);
}
assert(!nir_intrinsic_has_align_mul(intr));
assert(nir_intrinsic_has_base(intr));
*scalar_offset = offset;
unsigned offset_adj = offset_units * component;
if (nir_intrinsic_has_base(intr)) {
nir_intrinsic_set_base(
new_intr, nir_intrinsic_base(intr) + offset_adj);
} else {
*scalar_offset =
nir_iadd(b, offset,
nir_imm_intN_t(b, offset_adj,
offset->bit_size));
}
nir_intrinsic_set_base(new_intr, nir_intrinsic_base(intr) + offset_adj);
new_intr->num_components = 1;
return new_intr;
@@ -127,9 +75,8 @@ lower_load_bitsize(nir_builder *b,
b->cursor = nir_before_instr(&intr->instr);
/* For global 2x32 we ignore Y component because it must be zero */
unsigned offset_idx = offset_src(intr->intrinsic);
nir_def *offset = nir_trim_vector(b, intr->src[offset_idx].ssa, 1);
unsigned offset_idx = nir_get_io_offset_src_number(intr);
nir_def *offset = intr->src[offset_idx].ssa;
/* Split vector store to multiple scalar loads */
nir_def *dest_components[4] = { NULL };
@@ -142,12 +89,7 @@ lower_load_bitsize(nir_builder *b,
for (unsigned i = 0; i < info->num_srcs; i++) {
if (i == offset_idx) {
nir_def *final_offset;
final_offset = intr->intrinsic != nir_intrinsic_load_global_2x32 ?
scalar_offset :
nir_vec2(b, scalar_offset,
nir_imm_int(b, 0));
new_intr->src[i] = nir_src_for_ssa(final_offset);
new_intr->src[i] = nir_src_for_ssa(scalar_offset);
} else {
new_intr->src[i] = intr->src[i];
}
@@ -167,84 +109,14 @@ lower_load_bitsize(nir_builder *b,
return true;
}
static bool
lower_store_bitsize(nir_builder *b,
nir_intrinsic_instr *intr)
{
/* No need to split if it is already scalar */
int value_idx = value_src(intr->intrinsic);
int num_comp = nir_intrinsic_src_components(intr, value_idx);
if (num_comp <= 1)
return false;
/* No need to split if it is 32-bit */
if (nir_src_bit_size(intr->src[value_idx]) == 32)
return false;
nir_def *value = intr->src[value_idx].ssa;
b->cursor = nir_before_instr(&intr->instr);
/* For global 2x32 we ignore Y component because it must be zero */
unsigned offset_idx = offset_src(intr->intrinsic);
nir_def *offset = nir_trim_vector(b, intr->src[offset_idx].ssa, 1);
/* Split vector store to multiple scalar stores */
const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
unsigned wrmask = nir_intrinsic_write_mask(intr);
while (wrmask) {
unsigned component = ffs(wrmask) - 1;
nir_def *scalar_offset;
nir_intrinsic_instr *new_intr =
init_scalar_intrinsic(b, intr, component, offset,
value->bit_size, &scalar_offset);
nir_intrinsic_set_write_mask(new_intr, 0x1);
for (unsigned i = 0; i < info->num_srcs; i++) {
if (i == value_idx) {
nir_def *scalar_value =
nir_channels(b, value, 1 << component);
new_intr->src[i] = nir_src_for_ssa(scalar_value);
} else if (i == offset_idx) {
nir_def *final_offset;
final_offset = intr->intrinsic != nir_intrinsic_store_global_2x32 ?
scalar_offset :
nir_vec2(b, scalar_offset,
nir_imm_int(b, 0));
new_intr->src[i] = nir_src_for_ssa(final_offset);
} else {
new_intr->src[i] = intr->src[i];
}
}
nir_builder_instr_insert(b, &new_intr->instr);
wrmask &= ~(1 << component);
}
nir_instr_remove(&intr->instr);
return true;
}
static bool
lower_load_store_bitsize(nir_builder *b, nir_intrinsic_instr *intr,
void *data)
{
switch (intr->intrinsic) {
case nir_intrinsic_load_ssbo:
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_uniform:
case nir_intrinsic_load_scratch:
case nir_intrinsic_load_global_2x32:
return lower_load_bitsize(b, intr);
case nir_intrinsic_store_ssbo:
case nir_intrinsic_store_scratch:
case nir_intrinsic_store_global_2x32:
return lower_store_bitsize(b, intr);
default:
return false;
}