nvir: introduce OP_SHF
We already use a hack from NVC0LegalizeSSA::handleShift() on GK110 and newer which encodes SHF into the existing SHL/SHR opcodes, but there's a couple of problems with it: - LO/HI are swapped in one of the directions, which is very confusing. - The initial SM70 code will emit this from NIR->NVIR, and using the existing encodings will confuse the optimisation passes. As I want to limit the impact on other GPUs from the initial bring-up of Volta/Turing, let's add an explicit representation of SHF in the IR. Signed-off-by: Ben Skeggs <bskeggs@redhat.com> Reviewed-by: Karol Herbst <kherbst@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5377>
This commit is contained in:
@@ -70,6 +70,7 @@ enum operation
|
||||
OP_LOP3_LUT,
|
||||
OP_SHL,
|
||||
OP_SHR,
|
||||
OP_SHF,
|
||||
OP_MAX,
|
||||
OP_MIN,
|
||||
OP_SAT, // CLAMP(f32, 0.0, 1.0)
|
||||
@@ -271,6 +272,13 @@ enum operation
|
||||
#define NV50_IR_SUBOP_MINMAX_MED 2
|
||||
#define NV50_IR_SUBOP_MINMAX_HIGH 3
|
||||
|
||||
#define NV50_IR_SUBOP_SHF_L (0 << 0)
|
||||
#define NV50_IR_SUBOP_SHF_R (1 << 0)
|
||||
#define NV50_IR_SUBOP_SHF_LO (0 << 1)
|
||||
#define NV50_IR_SUBOP_SHF_HI (1 << 1)
|
||||
#define NV50_IR_SUBOP_SHF_C (0 << 2)
|
||||
#define NV50_IR_SUBOP_SHF_W (1 << 2)
|
||||
|
||||
// xmad(src0, src1, 0) << 16 + src2
|
||||
#define NV50_IR_SUBOP_XMAD_PSL (1 << 0)
|
||||
// (xmad(src0, src1, src2) & 0xffff) | (src1 << 16)
|
||||
|
||||
@@ -96,6 +96,7 @@ const char *operationStr[OP_LAST + 1] =
|
||||
"lop3 lut",
|
||||
"shl",
|
||||
"shr",
|
||||
"shf",
|
||||
"max",
|
||||
"min",
|
||||
"sat",
|
||||
|
||||
@@ -33,7 +33,7 @@ const uint8_t Target::operationSrcNr[] =
|
||||
2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
|
||||
3, 3, // SHLADD, XMAD
|
||||
1, 1, 1, // ABS, NEG, NOT
|
||||
2, 2, 2, 3, 2, 2, // AND, OR, XOR, LOP3_LUT, SHL, SHR
|
||||
2, 2, 2, 3, 2, 2, 3, // AND, OR, XOR, LOP3_LUT, SHL, SHR, SHF
|
||||
2, 2, 1, // MAX, MIN, SAT
|
||||
1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT
|
||||
3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT
|
||||
@@ -76,10 +76,10 @@ const OpClass Target::operationClass[] =
|
||||
OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
|
||||
OPCLASS_ARITH, OPCLASS_ARITH,
|
||||
OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
|
||||
// ABS, NEG; NOT, AND, OR, XOR, LOP3_LUT; SHL, SHR
|
||||
// ABS, NEG; NOT, AND, OR, XOR, LOP3_LUT; SHL, SHR, SHF
|
||||
OPCLASS_CONVERT, OPCLASS_CONVERT,
|
||||
OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
|
||||
OPCLASS_SHIFT, OPCLASS_SHIFT,
|
||||
OPCLASS_SHIFT, OPCLASS_SHIFT, OPCLASS_SHIFT,
|
||||
// MAX, MIN
|
||||
OPCLASS_COMPARE, OPCLASS_COMPARE,
|
||||
// SAT, CEIL, FLOOR, TRUNC; CVT
|
||||
|
||||
Reference in New Issue
Block a user