radeon/llvm: Use tablegen pattern to lower bitconvert

This commit is contained in:
Tom Stellard
2012-05-25 12:18:14 -04:00
parent 667cdba211
commit 4863477e22
4 changed files with 11 additions and 294 deletions
@@ -115,6 +115,12 @@ class Insert_Element <ValueType elem_type, ValueType vec_type,
(INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
>;
// bitconvert pattern
class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
(dt (bitconvert (st rc:$src0))),
(dt rc:$src0)
>;
include "R600Instructions.td"
include "SIInstrInfo.td"
@@ -640,7 +640,6 @@ AMDILTargetLowering::convertToReg(MachineOperand op) const
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
setOperationAction(ISDBITCAST, VT, Custom);
setOperationAction(ISD::GlobalAddress, VT, Custom);
setOperationAction(ISD::JumpTable, VT, Custom);
setOperationAction(ISD::ConstantPool, VT, Custom);
@@ -1513,7 +1512,6 @@ AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
LOWER(SELECT);
LOWER(SETCC);
LOWER(SIGN_EXTEND_INREG);
LOWER(BITCAST);
LOWER(DYNAMIC_STACKALLOC);
LOWER(BRCOND);
LOWER(BR_CC);
@@ -3231,289 +3229,6 @@ AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
}
}
SDValue
AMDILTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const
{
SDValue Src = Op.getOperand(0);
SDValue Dst = Op;
SDValue Res;
DebugLoc DL = Op.getDebugLoc();
EVT SrcVT = Src.getValueType();
EVT DstVT = Dst.getValueType();
// Lets bitcast the floating point types to an
// equivalent integer type before converting to vectors.
if (SrcVT.getScalarType().isFloatingPoint()) {
Src = DAG.getNode(AMDILISD::BITCONV, DL, genIntType(
SrcVT.getScalarType().getSimpleVT().getSizeInBits(),
SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1),
Src);
SrcVT = Src.getValueType();
}
uint32_t ScalarSrcSize = SrcVT.getScalarType()
.getSimpleVT().getSizeInBits();
uint32_t ScalarDstSize = DstVT.getScalarType()
.getSimpleVT().getSizeInBits();
uint32_t SrcNumEle = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
uint32_t DstNumEle = DstVT.isVector() ? DstVT.getVectorNumElements() : 1;
bool isVec = SrcVT.isVector();
if (DstVT.getScalarType().isInteger() &&
(SrcVT.getScalarType().isInteger()
|| SrcVT.getScalarType().isFloatingPoint())) {
if ((ScalarDstSize == 64 && SrcNumEle == 4 && ScalarSrcSize == 16)
|| (ScalarSrcSize == 64
&& DstNumEle == 4
&& ScalarDstSize == 16)) {
// This is the problematic case when bitcasting i64 <-> <4 x i16>
// This approach is a little different as we cannot generate a
// <4 x i64> vector
// as that is illegal in our backend and we are already past
// the DAG legalizer.
// So, in this case, we will do the following conversion.
// Case 1:
// %dst = <4 x i16> %src bitconvert i64 ==>
// %tmp = <4 x i16> %src convert <4 x i32>
// %tmp = <4 x i32> %tmp and 0xFFFF
// %tmp = <4 x i32> %tmp shift_left <0, 16, 0, 16>
// %tmp = <4 x i32> %tmp or %tmp.xz %tmp.yw
// %dst = <2 x i32> %tmp bitcast i64
// case 2:
// %dst = i64 %src bitconvert <4 x i16> ==>
// %tmp = i64 %src bitcast <2 x i32>
// %tmp = <4 x i32> %tmp vinsert %tmp.xxyy
// %tmp = <4 x i32> %tmp shift_right <0, 16, 0, 16>
// %tmp = <4 x i32> %tmp and 0xFFFF
// %dst = <4 x i16> %tmp bitcast <4 x i32>
SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v4i32,
DAG.getConstant(0xFFFF, MVT::i32));
SDValue const16 = DAG.getConstant(16, MVT::i32);
if (ScalarDstSize == 64) {
// case 1
Op = DAG.getSExtOrTrunc(Src, DL, MVT::v4i32);
Op = DAG.getNode(ISD::AND, DL, Op.getValueType(), Op, mask);
SDValue x = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
Op, DAG.getConstant(0, MVT::i32));
SDValue y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
Op, DAG.getConstant(1, MVT::i32));
y = DAG.getNode(ISD::SHL, DL, MVT::i32, y, const16);
SDValue z = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
Op, DAG.getConstant(2, MVT::i32));
SDValue w = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
Op, DAG.getConstant(3, MVT::i32));
w = DAG.getNode(ISD::SHL, DL, MVT::i32, w, const16);
x = DAG.getNode(ISD::OR, DL, MVT::i32, x, y);
y = DAG.getNode(ISD::OR, DL, MVT::i32, z, w);
Res = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, MVT::i64, x, y);
return Res;
} else {
// case 2
SDValue lo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, MVT::i32, Src);
SDValue lor16
= DAG.getNode(ISD::SRL, DL, MVT::i32, lo, const16);
SDValue hi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, MVT::i32, Src);
SDValue hir16
= DAG.getNode(ISD::SRL, DL, MVT::i32, hi, const16);
SDValue resVec = DAG.getNode(AMDILISD::VBUILD, DL,
MVT::v4i32, lo);
SDValue idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
getPointerTy(), DAG.getConstant(1, MVT::i32));
resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
resVec, lor16, idxVal);
idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
getPointerTy(), DAG.getConstant(2, MVT::i32));
resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
resVec, hi, idxVal);
idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
getPointerTy(), DAG.getConstant(3, MVT::i32));
resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
resVec, hir16, idxVal);
resVec = DAG.getNode(ISD::AND, DL, MVT::v4i32, resVec, mask);
Res = DAG.getSExtOrTrunc(resVec, DL, MVT::v4i16);
return Res;
}
} else {
// There are four cases we need to worry about for bitcasts
// where the size of all
// source, intermediates and result is <= 128 bits, unlike
// the above case
// 1) Sub32bit bitcast 32bitAlign
// %dst = <4 x i8> bitcast i32
// (also <[2|4] x i16> to <[2|4] x i32>)
// 2) 32bitAlign bitcast Sub32bit
// %dst = i32 bitcast <4 x i8>
// 3) Sub32bit bitcast LargerSub32bit
// %dst = <2 x i8> bitcast i16
// (also <4 x i8> to <2 x i16>)
// 4) Sub32bit bitcast SmallerSub32bit
// %dst = i16 bitcast <2 x i8>
// (also <2 x i16> to <4 x i8>)
// This also only handles types that are powers of two
if ((ScalarDstSize & (ScalarDstSize - 1))
|| (ScalarSrcSize & (ScalarSrcSize - 1))) {
} else if (ScalarDstSize >= 32 && ScalarSrcSize < 32) {
// case 1:
EVT IntTy = genIntType(ScalarDstSize, SrcNumEle);
#if 0 // TODO: LLVM does not like this for some reason, cannot SignExt vectors
SDValue res = DAG.getSExtOrTrunc(Src, DL, IntTy);
#else
SDValue res = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
DAG.getUNDEF(IntTy.getScalarType()));
for (uint32_t x = 0; x < SrcNumEle; ++x) {
SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
getPointerTy(), DAG.getConstant(x, MVT::i32));
SDValue temp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
SrcVT.getScalarType(), Src,
DAG.getConstant(x, MVT::i32));
temp = DAG.getSExtOrTrunc(temp, DL, IntTy.getScalarType());
res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntTy,
res, temp, idx);
}
#endif
SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
DAG.getConstant((1 << ScalarSrcSize) - 1, MVT::i32));
SDValue *newEle = new SDValue[SrcNumEle];
res = DAG.getNode(ISD::AND, DL, IntTy, res, mask);
for (uint32_t x = 0; x < SrcNumEle; ++x) {
newEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
IntTy.getScalarType(), res,
DAG.getConstant(x, MVT::i32));
}
uint32_t Ratio = SrcNumEle / DstNumEle;
for (uint32_t x = 0; x < SrcNumEle; ++x) {
if (x % Ratio) {
newEle[x] = DAG.getNode(ISD::SHL, DL,
IntTy.getScalarType(), newEle[x],
DAG.getConstant(ScalarSrcSize * (x % Ratio),
MVT::i32));
}
}
for (uint32_t x = 0; x < SrcNumEle; x += 2) {
newEle[x] = DAG.getNode(ISD::OR, DL,
IntTy.getScalarType(), newEle[x], newEle[x + 1]);
}
if (ScalarSrcSize == 8) {
for (uint32_t x = 0; x < SrcNumEle; x += 4) {
newEle[x] = DAG.getNode(ISD::OR, DL,
IntTy.getScalarType(), newEle[x], newEle[x + 2]);
}
if (DstNumEle == 1) {
Dst = newEle[0];
} else {
Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
newEle[0]);
for (uint32_t x = 1; x < DstNumEle; ++x) {
SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
getPointerTy(), DAG.getConstant(x, MVT::i32));
Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
DstVT, Dst, newEle[x * 4], idx);
}
}
} else {
if (DstNumEle == 1) {
Dst = newEle[0];
} else {
Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
newEle[0]);
for (uint32_t x = 1; x < DstNumEle; ++x) {
SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
getPointerTy(), DAG.getConstant(x, MVT::i32));
Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
DstVT, Dst, newEle[x * 2], idx);
}
}
}
delete [] newEle;
return Dst;
} else if (ScalarDstSize < 32 && ScalarSrcSize >= 32) {
// case 2:
EVT IntTy = genIntType(ScalarSrcSize, DstNumEle);
SDValue vec = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
DAG.getUNDEF(IntTy.getScalarType()));
uint32_t mult = (ScalarDstSize == 8) ? 4 : 2;
for (uint32_t x = 0; x < SrcNumEle; ++x) {
for (uint32_t y = 0; y < mult; ++y) {
SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
getPointerTy(),
DAG.getConstant(x * mult + y, MVT::i32));
SDValue t;
if (SrcNumEle > 1) {
t = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
DL, SrcVT.getScalarType(), Src,
DAG.getConstant(x, MVT::i32));
} else {
t = Src;
}
if (y != 0) {
t = DAG.getNode(ISD::SRL, DL, t.getValueType(),
t, DAG.getConstant(y * ScalarDstSize,
MVT::i32));
}
vec = DAG.getNode(ISD::INSERT_VECTOR_ELT,
DL, IntTy, vec, t, idx);
}
}
Dst = DAG.getSExtOrTrunc(vec, DL, DstVT);
return Dst;
} else if (ScalarDstSize == 16 && ScalarSrcSize == 8) {
// case 3:
SDValue *numEle = new SDValue[SrcNumEle];
for (uint32_t x = 0; x < SrcNumEle; ++x) {
numEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
MVT::i8, Src, DAG.getConstant(x, MVT::i32));
numEle[x] = DAG.getSExtOrTrunc(numEle[x], DL, MVT::i16);
numEle[x] = DAG.getNode(ISD::AND, DL, MVT::i16, numEle[x],
DAG.getConstant(0xFF, MVT::i16));
}
for (uint32_t x = 1; x < SrcNumEle; x += 2) {
numEle[x] = DAG.getNode(ISD::SHL, DL, MVT::i16, numEle[x],
DAG.getConstant(8, MVT::i16));
numEle[x - 1] = DAG.getNode(ISD::OR, DL, MVT::i16,
numEle[x-1], numEle[x]);
}
if (DstNumEle > 1) {
// If we are not a scalar i16, the only other case is a
// v2i16 since we can't have v8i8 at this point, v4i16
// cannot be generated
Dst = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i16,
numEle[0]);
SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
getPointerTy(), DAG.getConstant(1, MVT::i32));
Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i16,
Dst, numEle[2], idx);
} else {
Dst = numEle[0];
}
delete [] numEle;
return Dst;
} else if (ScalarDstSize == 8 && ScalarSrcSize == 16) {
// case 4:
SDValue *numEle = new SDValue[DstNumEle];
for (uint32_t x = 0; x < SrcNumEle; ++x) {
numEle[x * 2] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
MVT::i16, Src, DAG.getConstant(x, MVT::i32));
numEle[x * 2 + 1] = DAG.getNode(ISD::SRL, DL, MVT::i16,
numEle[x * 2], DAG.getConstant(8, MVT::i16));
}
MVT ty = (SrcNumEle == 1) ? MVT::v2i16 : MVT::v4i16;
Dst = DAG.getNode(AMDILISD::VBUILD, DL, ty, numEle[0]);
for (uint32_t x = 1; x < DstNumEle; ++x) {
SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
getPointerTy(), DAG.getConstant(x, MVT::i32));
Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ty,
Dst, numEle[x], idx);
}
delete [] numEle;
ty = (SrcNumEle == 1) ? MVT::v2i8 : MVT::v4i8;
Res = DAG.getSExtOrTrunc(Dst, DL, ty);
return Res;
}
}
}
Res = DAG.getNode(AMDILISD::BITCONV,
Dst.getDebugLoc(),
Dst.getValueType(), Src);
return Res;
}
SDValue
AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const
@@ -1164,4 +1164,9 @@ def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 5, sel_y>;
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 6, sel_z>;
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 7, sel_w>;
// bitconvert patterns
def : BitConvert <i32, f32, R600_Reg32>;
def : BitConvert <f32, i32, R600_Reg32>;
} // End isR600toCayman Predicate
@@ -164,15 +164,6 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
break;
}
case AMDIL::IL_ASINT_f32:
case AMDIL::IL_ASINT_i32:
case AMDIL::IL_ASFLOAT_f32:
case AMDIL::IL_ASFLOAT_i32:
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::COPY))
.addOperand(MI.getOperand(0))
.addOperand(MI.getOperand(1));
break;
case AMDIL::ILT:
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGT_INT))
.addOperand(MI.getOperand(0))