pan/va: improve lowering of SWZ_V4I8

Use bi_make_vec_to to allow to use only 1 MKVEC.v2i8 when possible.

Also add support for all swizzles instead of only mono-byte ones,
using bi_swizzle_to_byte_channels.

Update assert in bi_byte.

Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35643>
This commit is contained in:
Romaric Jodin
2025-07-04 15:05:09 +02:00
committed by Marge Bot
parent 857f29d67b
commit 6b693e281a
3 changed files with 74 additions and 62 deletions
+4 -4
View File
@@ -474,8 +474,8 @@ bi_is_imm_var_desc_handle(bi_builder *b, nir_intrinsic_instr *instr,
return bi_is_imm_desc_handle(b, instr, immediate, max);
}
static void bi_make_vec_to(bi_builder *b, bi_index final_dst, bi_index *src,
unsigned *channel, unsigned count, unsigned bitsize);
bi_instr *bi_make_vec_to(bi_builder *b, bi_index final_dst, bi_index *src,
unsigned *channel, unsigned count, unsigned bitsize);
/* Bifrost's load instructions lack a component offset despite operating in
* terms of vec4 slots. Usually I/O vectorization avoids nonzero components,
@@ -806,7 +806,7 @@ bi_make_vec16_helper(bi_builder *b, bi_index *src, unsigned *channel,
return bi_mkvec_v2i16(b, h0, h1);
}
static void
bi_instr *
bi_make_vec_to(bi_builder *b, bi_index dst, bi_index *src, unsigned *channel,
unsigned count, unsigned bitsize)
{
@@ -831,7 +831,7 @@ bi_make_vec_to(bi_builder *b, bi_index dst, bi_index *src, unsigned *channel,
srcs[i >> 2] = bi_make_vec8_helper(b, src + i, channel_offset, rem);
}
bi_emit_collect_to(b, dst, srcs, DIV_ROUND_UP(count, chan_per_word));
return bi_emit_collect_to(b, dst, srcs, DIV_ROUND_UP(count, chan_per_word));
}
static inline bi_instr *
+56 -2
View File
@@ -119,6 +119,47 @@ enum bi_swizzle {
BI_SWIZZLE_B33 = BI_SWIZZLE_B3333,
};
static inline bool
bi_swizzle_to_byte_channels(enum bi_swizzle swizzle, unsigned *channels)
{
#define B(b0, b1, b2, b3) \
case BI_SWIZZLE_B##b0##b1##b2##b3: { \
channels[0] = b0; \
channels[1] = b1; \
channels[2] = b2; \
channels[3] = b3; \
return true; \
}
switch (swizzle) {
B(0, 1, 0, 1);
B(0, 1, 2, 3);
B(2, 3, 0, 1);
B(2, 3, 2, 3);
B(0, 0, 0, 0);
B(1, 1, 1, 1);
B(2, 2, 2, 2);
B(3, 3, 3, 3);
B(0, 0, 1, 1);
B(2, 2, 3, 3);
B(1, 0, 3, 2);
B(3, 2, 1, 0);
B(0, 0, 2, 2);
B(1, 1, 0, 0);
B(2, 2, 0, 0);
B(3, 3, 0, 0);
B(2, 2, 1, 1);
B(3, 3, 1, 1);
B(1, 1, 2, 2);
B(3, 3, 2, 2);
B(0, 0, 3, 3);
B(1, 1, 3, 3);
B(1, 1, 2, 3);
default:
return false;
}
#undef B
}
/* Given a packed i16vec2/i8vec4 constant, apply a swizzle. Useful for constant
* folding and Valhall constant optimization. */
@@ -297,11 +338,21 @@ bi_half(bi_index idx, bool upper)
return bi_swz_16(idx, upper, upper);
}
static inline bool
bi_valid_lane_for_byte_swizzle(enum bi_swizzle swizzle, unsigned lane)
{
unsigned channels[4];
if (bi_swizzle_to_byte_channels(swizzle, channels)) {
return lane == channels[0] || lane == channels[1] ||
lane == channels[2] || lane == channels[3];
}
return false;
}
static inline bi_index
bi_byte(bi_index idx, unsigned lane)
{
assert(idx.swizzle == BI_SWIZZLE_B0123);
assert(lane < 4);
assert(bi_valid_lane_for_byte_swizzle(idx.swizzle, lane));
idx.swizzle = (enum bi_swizzle)(BI_SWIZZLE_B0 + lane);
return idx;
}
@@ -1614,6 +1665,9 @@ bi_dontcare(bi_builder *b)
return bi_passthrough(BIFROST_SRC_FAU_HI);
}
bi_instr *bi_make_vec_to(bi_builder *b, bi_index dst, bi_index *src,
unsigned *channel, unsigned count, unsigned bitsize);
#define bi_worklist_init(ctx, w) u_worklist_init(w, ctx->num_blocks, ctx)
#define bi_worklist_push_head(w, block) u_worklist_push_head(w, block, index)
#define bi_worklist_push_tail(w, block) u_worklist_push_tail(w, block, index)
+14 -56
View File
@@ -25,49 +25,22 @@
#include "compiler.h"
#include "va_compiler.h"
#include "valhall.h"
#include "compiler.h"
static void
va_compose_mkvec_swz_v4i8(bi_index *b, enum bi_swizzle swz)
static bi_instr *
lower_swz_v4i8(bi_builder *b, bi_instr *I)
{
#define B(b0, b1, b2, b3) \
case BI_SWIZZLE_B##b0##b1##b2##b3: \
b[0].swizzle = BI_SWIZZLE_B##b0; \
b[1].swizzle = BI_SWIZZLE_B##b1; \
b[2].swizzle = BI_SWIZZLE_B##b2; \
b[3].swizzle = BI_SWIZZLE_B##b3; \
break;
switch (swz) {
B(0, 1, 0, 1);
B(0, 1, 2, 3);
B(2, 3, 0, 1);
B(2, 3, 2, 3);
B(0, 0, 0, 0);
B(1, 1, 1, 1);
B(2, 2, 2, 2);
B(3, 3, 3, 3);
B(0, 0, 1, 1);
B(2, 2, 3, 3);
B(1, 0, 3, 2);
B(3, 2, 1, 0);
B(0, 0, 2, 2);
B(1, 1, 0, 0);
B(2, 2, 0, 0);
B(3, 3, 0, 0);
B(2, 2, 1, 1);
B(3, 3, 1, 1);
B(1, 1, 2, 2);
B(3, 3, 2, 2);
B(0, 0, 3, 3);
B(1, 1, 3, 3);
B(1, 1, 2, 3);
default:
UNREACHABLE("Invalid swizzle");
break;
/* IADD.v4u8 is gone on v11 */
if (b->shader->arch >= 11) {
bi_index srcs[4] = {I->src[0], I->src[0], I->src[0], I->src[0]};
unsigned channels[4];
bool valid_swizzle =
bi_swizzle_to_byte_channels(I->src[0].swizzle, channels);
assert(valid_swizzle);
return bi_make_vec_to(b, I->dest[0], srcs, channels, 4, 8);
}
#undef B
return bi_iadd_v4u8_to(b, I->dest[0], I->src[0], bi_zero(), false);
}
static bi_instr *
@@ -79,23 +52,8 @@ lower(bi_builder *b, bi_instr *I)
case BI_OPCODE_SWZ_V2I16:
return bi_iadd_v2u16_to(b, I->dest[0], I->src[0], bi_zero(), false);
case BI_OPCODE_SWZ_V4I8: {
/* IADD.v4u8 is gone on v11 */
if (b->shader->arch >= 11) {
bi_index bytes[4] = {
I->src[0],
I->src[0],
I->src[0],
I->src[0],
};
va_compose_mkvec_swz_v4i8(bytes, I->src[0].swizzle);
bi_index high = bi_mkvec_v2i8(b, bytes[2], bytes[3], bi_zero());
return bi_mkvec_v2i8_to(b, I->dest[0], bytes[0], bytes[1], high);
}
return bi_iadd_v4u8_to(b, I->dest[0], I->src[0], bi_zero(), false);
}
case BI_OPCODE_SWZ_V4I8:
return lower_swz_v4i8(b, I);
case BI_OPCODE_ICMP_I32:
return bi_icmp_or_u32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),