pan/va: improve lowering of SWZ_V4I8
Use bi_make_vec_to to allow to use only 1 MKVEC.v2i8 when possible. Also add support for all swizzles instead of only mono-byte ones, using bi_swizzle_to_byte_channels. Update assert in bi_byte. Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35643>
This commit is contained in:
@@ -474,8 +474,8 @@ bi_is_imm_var_desc_handle(bi_builder *b, nir_intrinsic_instr *instr,
|
||||
return bi_is_imm_desc_handle(b, instr, immediate, max);
|
||||
}
|
||||
|
||||
static void bi_make_vec_to(bi_builder *b, bi_index final_dst, bi_index *src,
|
||||
unsigned *channel, unsigned count, unsigned bitsize);
|
||||
bi_instr *bi_make_vec_to(bi_builder *b, bi_index final_dst, bi_index *src,
|
||||
unsigned *channel, unsigned count, unsigned bitsize);
|
||||
|
||||
/* Bifrost's load instructions lack a component offset despite operating in
|
||||
* terms of vec4 slots. Usually I/O vectorization avoids nonzero components,
|
||||
@@ -806,7 +806,7 @@ bi_make_vec16_helper(bi_builder *b, bi_index *src, unsigned *channel,
|
||||
return bi_mkvec_v2i16(b, h0, h1);
|
||||
}
|
||||
|
||||
static void
|
||||
bi_instr *
|
||||
bi_make_vec_to(bi_builder *b, bi_index dst, bi_index *src, unsigned *channel,
|
||||
unsigned count, unsigned bitsize)
|
||||
{
|
||||
@@ -831,7 +831,7 @@ bi_make_vec_to(bi_builder *b, bi_index dst, bi_index *src, unsigned *channel,
|
||||
srcs[i >> 2] = bi_make_vec8_helper(b, src + i, channel_offset, rem);
|
||||
}
|
||||
|
||||
bi_emit_collect_to(b, dst, srcs, DIV_ROUND_UP(count, chan_per_word));
|
||||
return bi_emit_collect_to(b, dst, srcs, DIV_ROUND_UP(count, chan_per_word));
|
||||
}
|
||||
|
||||
static inline bi_instr *
|
||||
|
||||
@@ -119,6 +119,47 @@ enum bi_swizzle {
|
||||
BI_SWIZZLE_B33 = BI_SWIZZLE_B3333,
|
||||
};
|
||||
|
||||
static inline bool
|
||||
bi_swizzle_to_byte_channels(enum bi_swizzle swizzle, unsigned *channels)
|
||||
{
|
||||
#define B(b0, b1, b2, b3) \
|
||||
case BI_SWIZZLE_B##b0##b1##b2##b3: { \
|
||||
channels[0] = b0; \
|
||||
channels[1] = b1; \
|
||||
channels[2] = b2; \
|
||||
channels[3] = b3; \
|
||||
return true; \
|
||||
}
|
||||
switch (swizzle) {
|
||||
B(0, 1, 0, 1);
|
||||
B(0, 1, 2, 3);
|
||||
B(2, 3, 0, 1);
|
||||
B(2, 3, 2, 3);
|
||||
B(0, 0, 0, 0);
|
||||
B(1, 1, 1, 1);
|
||||
B(2, 2, 2, 2);
|
||||
B(3, 3, 3, 3);
|
||||
B(0, 0, 1, 1);
|
||||
B(2, 2, 3, 3);
|
||||
B(1, 0, 3, 2);
|
||||
B(3, 2, 1, 0);
|
||||
B(0, 0, 2, 2);
|
||||
B(1, 1, 0, 0);
|
||||
B(2, 2, 0, 0);
|
||||
B(3, 3, 0, 0);
|
||||
B(2, 2, 1, 1);
|
||||
B(3, 3, 1, 1);
|
||||
B(1, 1, 2, 2);
|
||||
B(3, 3, 2, 2);
|
||||
B(0, 0, 3, 3);
|
||||
B(1, 1, 3, 3);
|
||||
B(1, 1, 2, 3);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
#undef B
|
||||
}
|
||||
|
||||
/* Given a packed i16vec2/i8vec4 constant, apply a swizzle. Useful for constant
|
||||
* folding and Valhall constant optimization. */
|
||||
|
||||
@@ -297,11 +338,21 @@ bi_half(bi_index idx, bool upper)
|
||||
return bi_swz_16(idx, upper, upper);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
bi_valid_lane_for_byte_swizzle(enum bi_swizzle swizzle, unsigned lane)
|
||||
{
|
||||
unsigned channels[4];
|
||||
if (bi_swizzle_to_byte_channels(swizzle, channels)) {
|
||||
return lane == channels[0] || lane == channels[1] ||
|
||||
lane == channels[2] || lane == channels[3];
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bi_index
|
||||
bi_byte(bi_index idx, unsigned lane)
|
||||
{
|
||||
assert(idx.swizzle == BI_SWIZZLE_B0123);
|
||||
assert(lane < 4);
|
||||
assert(bi_valid_lane_for_byte_swizzle(idx.swizzle, lane));
|
||||
idx.swizzle = (enum bi_swizzle)(BI_SWIZZLE_B0 + lane);
|
||||
return idx;
|
||||
}
|
||||
@@ -1614,6 +1665,9 @@ bi_dontcare(bi_builder *b)
|
||||
return bi_passthrough(BIFROST_SRC_FAU_HI);
|
||||
}
|
||||
|
||||
bi_instr *bi_make_vec_to(bi_builder *b, bi_index dst, bi_index *src,
|
||||
unsigned *channel, unsigned count, unsigned bitsize);
|
||||
|
||||
#define bi_worklist_init(ctx, w) u_worklist_init(w, ctx->num_blocks, ctx)
|
||||
#define bi_worklist_push_head(w, block) u_worklist_push_head(w, block, index)
|
||||
#define bi_worklist_push_tail(w, block) u_worklist_push_tail(w, block, index)
|
||||
|
||||
@@ -25,49 +25,22 @@
|
||||
#include "compiler.h"
|
||||
#include "va_compiler.h"
|
||||
#include "valhall.h"
|
||||
#include "compiler.h"
|
||||
|
||||
static void
|
||||
va_compose_mkvec_swz_v4i8(bi_index *b, enum bi_swizzle swz)
|
||||
static bi_instr *
|
||||
lower_swz_v4i8(bi_builder *b, bi_instr *I)
|
||||
{
|
||||
#define B(b0, b1, b2, b3) \
|
||||
case BI_SWIZZLE_B##b0##b1##b2##b3: \
|
||||
b[0].swizzle = BI_SWIZZLE_B##b0; \
|
||||
b[1].swizzle = BI_SWIZZLE_B##b1; \
|
||||
b[2].swizzle = BI_SWIZZLE_B##b2; \
|
||||
b[3].swizzle = BI_SWIZZLE_B##b3; \
|
||||
break;
|
||||
|
||||
switch (swz) {
|
||||
B(0, 1, 0, 1);
|
||||
B(0, 1, 2, 3);
|
||||
B(2, 3, 0, 1);
|
||||
B(2, 3, 2, 3);
|
||||
B(0, 0, 0, 0);
|
||||
B(1, 1, 1, 1);
|
||||
B(2, 2, 2, 2);
|
||||
B(3, 3, 3, 3);
|
||||
B(0, 0, 1, 1);
|
||||
B(2, 2, 3, 3);
|
||||
B(1, 0, 3, 2);
|
||||
B(3, 2, 1, 0);
|
||||
B(0, 0, 2, 2);
|
||||
B(1, 1, 0, 0);
|
||||
B(2, 2, 0, 0);
|
||||
B(3, 3, 0, 0);
|
||||
B(2, 2, 1, 1);
|
||||
B(3, 3, 1, 1);
|
||||
B(1, 1, 2, 2);
|
||||
B(3, 3, 2, 2);
|
||||
B(0, 0, 3, 3);
|
||||
B(1, 1, 3, 3);
|
||||
B(1, 1, 2, 3);
|
||||
|
||||
default:
|
||||
UNREACHABLE("Invalid swizzle");
|
||||
break;
|
||||
/* IADD.v4u8 is gone on v11 */
|
||||
if (b->shader->arch >= 11) {
|
||||
bi_index srcs[4] = {I->src[0], I->src[0], I->src[0], I->src[0]};
|
||||
unsigned channels[4];
|
||||
bool valid_swizzle =
|
||||
bi_swizzle_to_byte_channels(I->src[0].swizzle, channels);
|
||||
assert(valid_swizzle);
|
||||
return bi_make_vec_to(b, I->dest[0], srcs, channels, 4, 8);
|
||||
}
|
||||
|
||||
#undef B
|
||||
return bi_iadd_v4u8_to(b, I->dest[0], I->src[0], bi_zero(), false);
|
||||
}
|
||||
|
||||
static bi_instr *
|
||||
@@ -79,23 +52,8 @@ lower(bi_builder *b, bi_instr *I)
|
||||
case BI_OPCODE_SWZ_V2I16:
|
||||
return bi_iadd_v2u16_to(b, I->dest[0], I->src[0], bi_zero(), false);
|
||||
|
||||
case BI_OPCODE_SWZ_V4I8: {
|
||||
/* IADD.v4u8 is gone on v11 */
|
||||
if (b->shader->arch >= 11) {
|
||||
bi_index bytes[4] = {
|
||||
I->src[0],
|
||||
I->src[0],
|
||||
I->src[0],
|
||||
I->src[0],
|
||||
};
|
||||
|
||||
va_compose_mkvec_swz_v4i8(bytes, I->src[0].swizzle);
|
||||
bi_index high = bi_mkvec_v2i8(b, bytes[2], bytes[3], bi_zero());
|
||||
return bi_mkvec_v2i8_to(b, I->dest[0], bytes[0], bytes[1], high);
|
||||
}
|
||||
|
||||
return bi_iadd_v4u8_to(b, I->dest[0], I->src[0], bi_zero(), false);
|
||||
}
|
||||
case BI_OPCODE_SWZ_V4I8:
|
||||
return lower_swz_v4i8(b, I);
|
||||
|
||||
case BI_OPCODE_ICMP_I32:
|
||||
return bi_icmp_or_u32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
|
||||
|
||||
Reference in New Issue
Block a user