nvir: introduce OP_SHF

We already use a hack from NVC0LegalizeSSA::handleShift() on GK110 and
newer which encodes SHF into the existing SHL/SHR opcodes, but there's
a couple of problems with it:

- LO/HI are swapped in one of the directions, which is very confusing.
- The initial SM70 code will emit this from NIR->NVIR, and using the
  existing encodings will confuse the optimisation passes.

As I want to limit the impact on other GPUs from the initial bring-up
of Volta/Turing, let's add an explicit representation of SHF in the IR.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Reviewed-by: Karol Herbst <kherbst@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5377>
This commit is contained in:
Ben Skeggs
2020-06-07 09:51:53 +10:00
committed by Marge Bot
parent 60b28f7a50
commit e1e4d1d373
3 changed files with 12 additions and 3 deletions
@@ -70,6 +70,7 @@ enum operation
OP_LOP3_LUT,
OP_SHL,
OP_SHR,
OP_SHF,
OP_MAX,
OP_MIN,
OP_SAT, // CLAMP(f32, 0.0, 1.0)
@@ -271,6 +272,13 @@ enum operation
#define NV50_IR_SUBOP_MINMAX_MED 2
#define NV50_IR_SUBOP_MINMAX_HIGH 3
#define NV50_IR_SUBOP_SHF_L (0 << 0)
#define NV50_IR_SUBOP_SHF_R (1 << 0)
#define NV50_IR_SUBOP_SHF_LO (0 << 1)
#define NV50_IR_SUBOP_SHF_HI (1 << 1)
#define NV50_IR_SUBOP_SHF_C (0 << 2)
#define NV50_IR_SUBOP_SHF_W (1 << 2)
// xmad(src0, src1, 0) << 16 + src2
#define NV50_IR_SUBOP_XMAD_PSL (1 << 0)
// (xmad(src0, src1, src2) & 0xffff) | (src1 << 16)
@@ -96,6 +96,7 @@ const char *operationStr[OP_LAST + 1] =
"lop3 lut",
"shl",
"shr",
"shf",
"max",
"min",
"sat",
@@ -33,7 +33,7 @@ const uint8_t Target::operationSrcNr[] =
2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
3, 3, // SHLADD, XMAD
1, 1, 1, // ABS, NEG, NOT
2, 2, 2, 3, 2, 2, // AND, OR, XOR, LOP3_LUT, SHL, SHR
2, 2, 2, 3, 2, 2, 3, // AND, OR, XOR, LOP3_LUT, SHL, SHR, SHF
2, 2, 1, // MAX, MIN, SAT
1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT
3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT
@@ -76,10 +76,10 @@ const OpClass Target::operationClass[] =
OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
OPCLASS_ARITH, OPCLASS_ARITH,
OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
// ABS, NEG; NOT, AND, OR, XOR, LOP3_LUT; SHL, SHR
// ABS, NEG; NOT, AND, OR, XOR, LOP3_LUT; SHL, SHR, SHF
OPCLASS_CONVERT, OPCLASS_CONVERT,
OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
OPCLASS_SHIFT, OPCLASS_SHIFT,
OPCLASS_SHIFT, OPCLASS_SHIFT, OPCLASS_SHIFT,
// MAX, MIN
OPCLASS_COMPARE, OPCLASS_COMPARE,
// SAT, CEIL, FLOOR, TRUNC; CVT