|
|
|
@@ -640,7 +640,6 @@ AMDILTargetLowering::convertToReg(MachineOperand op) const
|
|
|
|
|
setOperationAction(ISD::SREM, VT, Expand);
|
|
|
|
|
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
|
|
|
|
|
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
|
|
|
|
|
setOperationAction(ISDBITCAST, VT, Custom);
|
|
|
|
|
setOperationAction(ISD::GlobalAddress, VT, Custom);
|
|
|
|
|
setOperationAction(ISD::JumpTable, VT, Custom);
|
|
|
|
|
setOperationAction(ISD::ConstantPool, VT, Custom);
|
|
|
|
@@ -1513,7 +1512,6 @@ AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
|
|
|
|
LOWER(SELECT);
|
|
|
|
|
LOWER(SETCC);
|
|
|
|
|
LOWER(SIGN_EXTEND_INREG);
|
|
|
|
|
LOWER(BITCAST);
|
|
|
|
|
LOWER(DYNAMIC_STACKALLOC);
|
|
|
|
|
LOWER(BRCOND);
|
|
|
|
|
LOWER(BR_CC);
|
|
|
|
@@ -3231,289 +3229,6 @@ AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
SDValue
|
|
|
|
|
AMDILTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const
|
|
|
|
|
{
|
|
|
|
|
SDValue Src = Op.getOperand(0);
|
|
|
|
|
SDValue Dst = Op;
|
|
|
|
|
SDValue Res;
|
|
|
|
|
DebugLoc DL = Op.getDebugLoc();
|
|
|
|
|
EVT SrcVT = Src.getValueType();
|
|
|
|
|
EVT DstVT = Dst.getValueType();
|
|
|
|
|
// Lets bitcast the floating point types to an
|
|
|
|
|
// equivalent integer type before converting to vectors.
|
|
|
|
|
if (SrcVT.getScalarType().isFloatingPoint()) {
|
|
|
|
|
Src = DAG.getNode(AMDILISD::BITCONV, DL, genIntType(
|
|
|
|
|
SrcVT.getScalarType().getSimpleVT().getSizeInBits(),
|
|
|
|
|
SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1),
|
|
|
|
|
Src);
|
|
|
|
|
SrcVT = Src.getValueType();
|
|
|
|
|
}
|
|
|
|
|
uint32_t ScalarSrcSize = SrcVT.getScalarType()
|
|
|
|
|
.getSimpleVT().getSizeInBits();
|
|
|
|
|
uint32_t ScalarDstSize = DstVT.getScalarType()
|
|
|
|
|
.getSimpleVT().getSizeInBits();
|
|
|
|
|
uint32_t SrcNumEle = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
|
|
|
|
|
uint32_t DstNumEle = DstVT.isVector() ? DstVT.getVectorNumElements() : 1;
|
|
|
|
|
bool isVec = SrcVT.isVector();
|
|
|
|
|
if (DstVT.getScalarType().isInteger() &&
|
|
|
|
|
(SrcVT.getScalarType().isInteger()
|
|
|
|
|
|| SrcVT.getScalarType().isFloatingPoint())) {
|
|
|
|
|
if ((ScalarDstSize == 64 && SrcNumEle == 4 && ScalarSrcSize == 16)
|
|
|
|
|
|| (ScalarSrcSize == 64
|
|
|
|
|
&& DstNumEle == 4
|
|
|
|
|
&& ScalarDstSize == 16)) {
|
|
|
|
|
// This is the problematic case when bitcasting i64 <-> <4 x i16>
|
|
|
|
|
// This approach is a little different as we cannot generate a
|
|
|
|
|
// <4 x i64> vector
|
|
|
|
|
// as that is illegal in our backend and we are already past
|
|
|
|
|
// the DAG legalizer.
|
|
|
|
|
// So, in this case, we will do the following conversion.
|
|
|
|
|
// Case 1:
|
|
|
|
|
// %dst = <4 x i16> %src bitconvert i64 ==>
|
|
|
|
|
// %tmp = <4 x i16> %src convert <4 x i32>
|
|
|
|
|
// %tmp = <4 x i32> %tmp and 0xFFFF
|
|
|
|
|
// %tmp = <4 x i32> %tmp shift_left <0, 16, 0, 16>
|
|
|
|
|
// %tmp = <4 x i32> %tmp or %tmp.xz %tmp.yw
|
|
|
|
|
// %dst = <2 x i32> %tmp bitcast i64
|
|
|
|
|
// case 2:
|
|
|
|
|
// %dst = i64 %src bitconvert <4 x i16> ==>
|
|
|
|
|
// %tmp = i64 %src bitcast <2 x i32>
|
|
|
|
|
// %tmp = <4 x i32> %tmp vinsert %tmp.xxyy
|
|
|
|
|
// %tmp = <4 x i32> %tmp shift_right <0, 16, 0, 16>
|
|
|
|
|
// %tmp = <4 x i32> %tmp and 0xFFFF
|
|
|
|
|
// %dst = <4 x i16> %tmp bitcast <4 x i32>
|
|
|
|
|
SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v4i32,
|
|
|
|
|
DAG.getConstant(0xFFFF, MVT::i32));
|
|
|
|
|
SDValue const16 = DAG.getConstant(16, MVT::i32);
|
|
|
|
|
if (ScalarDstSize == 64) {
|
|
|
|
|
// case 1
|
|
|
|
|
Op = DAG.getSExtOrTrunc(Src, DL, MVT::v4i32);
|
|
|
|
|
Op = DAG.getNode(ISD::AND, DL, Op.getValueType(), Op, mask);
|
|
|
|
|
SDValue x = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
|
|
|
|
|
Op, DAG.getConstant(0, MVT::i32));
|
|
|
|
|
SDValue y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
|
|
|
|
|
Op, DAG.getConstant(1, MVT::i32));
|
|
|
|
|
y = DAG.getNode(ISD::SHL, DL, MVT::i32, y, const16);
|
|
|
|
|
SDValue z = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
|
|
|
|
|
Op, DAG.getConstant(2, MVT::i32));
|
|
|
|
|
SDValue w = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
|
|
|
|
|
Op, DAG.getConstant(3, MVT::i32));
|
|
|
|
|
w = DAG.getNode(ISD::SHL, DL, MVT::i32, w, const16);
|
|
|
|
|
x = DAG.getNode(ISD::OR, DL, MVT::i32, x, y);
|
|
|
|
|
y = DAG.getNode(ISD::OR, DL, MVT::i32, z, w);
|
|
|
|
|
Res = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, MVT::i64, x, y);
|
|
|
|
|
return Res;
|
|
|
|
|
} else {
|
|
|
|
|
// case 2
|
|
|
|
|
SDValue lo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, MVT::i32, Src);
|
|
|
|
|
SDValue lor16
|
|
|
|
|
= DAG.getNode(ISD::SRL, DL, MVT::i32, lo, const16);
|
|
|
|
|
SDValue hi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, MVT::i32, Src);
|
|
|
|
|
SDValue hir16
|
|
|
|
|
= DAG.getNode(ISD::SRL, DL, MVT::i32, hi, const16);
|
|
|
|
|
SDValue resVec = DAG.getNode(AMDILISD::VBUILD, DL,
|
|
|
|
|
MVT::v4i32, lo);
|
|
|
|
|
SDValue idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
|
|
|
|
|
getPointerTy(), DAG.getConstant(1, MVT::i32));
|
|
|
|
|
resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
|
|
|
|
|
resVec, lor16, idxVal);
|
|
|
|
|
idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
|
|
|
|
|
getPointerTy(), DAG.getConstant(2, MVT::i32));
|
|
|
|
|
resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
|
|
|
|
|
resVec, hi, idxVal);
|
|
|
|
|
idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
|
|
|
|
|
getPointerTy(), DAG.getConstant(3, MVT::i32));
|
|
|
|
|
resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
|
|
|
|
|
resVec, hir16, idxVal);
|
|
|
|
|
resVec = DAG.getNode(ISD::AND, DL, MVT::v4i32, resVec, mask);
|
|
|
|
|
Res = DAG.getSExtOrTrunc(resVec, DL, MVT::v4i16);
|
|
|
|
|
return Res;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
// There are four cases we need to worry about for bitcasts
|
|
|
|
|
// where the size of all
|
|
|
|
|
// source, intermediates and result is <= 128 bits, unlike
|
|
|
|
|
// the above case
|
|
|
|
|
// 1) Sub32bit bitcast 32bitAlign
|
|
|
|
|
// %dst = <4 x i8> bitcast i32
|
|
|
|
|
// (also <[2|4] x i16> to <[2|4] x i32>)
|
|
|
|
|
// 2) 32bitAlign bitcast Sub32bit
|
|
|
|
|
// %dst = i32 bitcast <4 x i8>
|
|
|
|
|
// 3) Sub32bit bitcast LargerSub32bit
|
|
|
|
|
// %dst = <2 x i8> bitcast i16
|
|
|
|
|
// (also <4 x i8> to <2 x i16>)
|
|
|
|
|
// 4) Sub32bit bitcast SmallerSub32bit
|
|
|
|
|
// %dst = i16 bitcast <2 x i8>
|
|
|
|
|
// (also <2 x i16> to <4 x i8>)
|
|
|
|
|
// This also only handles types that are powers of two
|
|
|
|
|
if ((ScalarDstSize & (ScalarDstSize - 1))
|
|
|
|
|
|| (ScalarSrcSize & (ScalarSrcSize - 1))) {
|
|
|
|
|
} else if (ScalarDstSize >= 32 && ScalarSrcSize < 32) {
|
|
|
|
|
// case 1:
|
|
|
|
|
EVT IntTy = genIntType(ScalarDstSize, SrcNumEle);
|
|
|
|
|
#if 0 // TODO: LLVM does not like this for some reason, cannot SignExt vectors
|
|
|
|
|
SDValue res = DAG.getSExtOrTrunc(Src, DL, IntTy);
|
|
|
|
|
#else
|
|
|
|
|
SDValue res = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
|
|
|
|
|
DAG.getUNDEF(IntTy.getScalarType()));
|
|
|
|
|
for (uint32_t x = 0; x < SrcNumEle; ++x) {
|
|
|
|
|
SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
|
|
|
|
|
getPointerTy(), DAG.getConstant(x, MVT::i32));
|
|
|
|
|
SDValue temp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
|
|
|
|
|
SrcVT.getScalarType(), Src,
|
|
|
|
|
DAG.getConstant(x, MVT::i32));
|
|
|
|
|
temp = DAG.getSExtOrTrunc(temp, DL, IntTy.getScalarType());
|
|
|
|
|
res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntTy,
|
|
|
|
|
res, temp, idx);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
|
|
|
|
|
DAG.getConstant((1 << ScalarSrcSize) - 1, MVT::i32));
|
|
|
|
|
SDValue *newEle = new SDValue[SrcNumEle];
|
|
|
|
|
res = DAG.getNode(ISD::AND, DL, IntTy, res, mask);
|
|
|
|
|
for (uint32_t x = 0; x < SrcNumEle; ++x) {
|
|
|
|
|
newEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
|
|
|
|
|
IntTy.getScalarType(), res,
|
|
|
|
|
DAG.getConstant(x, MVT::i32));
|
|
|
|
|
}
|
|
|
|
|
uint32_t Ratio = SrcNumEle / DstNumEle;
|
|
|
|
|
for (uint32_t x = 0; x < SrcNumEle; ++x) {
|
|
|
|
|
if (x % Ratio) {
|
|
|
|
|
newEle[x] = DAG.getNode(ISD::SHL, DL,
|
|
|
|
|
IntTy.getScalarType(), newEle[x],
|
|
|
|
|
DAG.getConstant(ScalarSrcSize * (x % Ratio),
|
|
|
|
|
MVT::i32));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
for (uint32_t x = 0; x < SrcNumEle; x += 2) {
|
|
|
|
|
newEle[x] = DAG.getNode(ISD::OR, DL,
|
|
|
|
|
IntTy.getScalarType(), newEle[x], newEle[x + 1]);
|
|
|
|
|
}
|
|
|
|
|
if (ScalarSrcSize == 8) {
|
|
|
|
|
for (uint32_t x = 0; x < SrcNumEle; x += 4) {
|
|
|
|
|
newEle[x] = DAG.getNode(ISD::OR, DL,
|
|
|
|
|
IntTy.getScalarType(), newEle[x], newEle[x + 2]);
|
|
|
|
|
}
|
|
|
|
|
if (DstNumEle == 1) {
|
|
|
|
|
Dst = newEle[0];
|
|
|
|
|
} else {
|
|
|
|
|
Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
|
|
|
|
|
newEle[0]);
|
|
|
|
|
for (uint32_t x = 1; x < DstNumEle; ++x) {
|
|
|
|
|
SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
|
|
|
|
|
getPointerTy(), DAG.getConstant(x, MVT::i32));
|
|
|
|
|
Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
|
|
|
|
|
DstVT, Dst, newEle[x * 4], idx);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
if (DstNumEle == 1) {
|
|
|
|
|
Dst = newEle[0];
|
|
|
|
|
} else {
|
|
|
|
|
Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
|
|
|
|
|
newEle[0]);
|
|
|
|
|
for (uint32_t x = 1; x < DstNumEle; ++x) {
|
|
|
|
|
SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
|
|
|
|
|
getPointerTy(), DAG.getConstant(x, MVT::i32));
|
|
|
|
|
Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
|
|
|
|
|
DstVT, Dst, newEle[x * 2], idx);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
delete [] newEle;
|
|
|
|
|
return Dst;
|
|
|
|
|
} else if (ScalarDstSize < 32 && ScalarSrcSize >= 32) {
|
|
|
|
|
// case 2:
|
|
|
|
|
EVT IntTy = genIntType(ScalarSrcSize, DstNumEle);
|
|
|
|
|
SDValue vec = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
|
|
|
|
|
DAG.getUNDEF(IntTy.getScalarType()));
|
|
|
|
|
uint32_t mult = (ScalarDstSize == 8) ? 4 : 2;
|
|
|
|
|
for (uint32_t x = 0; x < SrcNumEle; ++x) {
|
|
|
|
|
for (uint32_t y = 0; y < mult; ++y) {
|
|
|
|
|
SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
|
|
|
|
|
getPointerTy(),
|
|
|
|
|
DAG.getConstant(x * mult + y, MVT::i32));
|
|
|
|
|
SDValue t;
|
|
|
|
|
if (SrcNumEle > 1) {
|
|
|
|
|
t = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
|
|
|
|
|
DL, SrcVT.getScalarType(), Src,
|
|
|
|
|
DAG.getConstant(x, MVT::i32));
|
|
|
|
|
} else {
|
|
|
|
|
t = Src;
|
|
|
|
|
}
|
|
|
|
|
if (y != 0) {
|
|
|
|
|
t = DAG.getNode(ISD::SRL, DL, t.getValueType(),
|
|
|
|
|
t, DAG.getConstant(y * ScalarDstSize,
|
|
|
|
|
MVT::i32));
|
|
|
|
|
}
|
|
|
|
|
vec = DAG.getNode(ISD::INSERT_VECTOR_ELT,
|
|
|
|
|
DL, IntTy, vec, t, idx);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Dst = DAG.getSExtOrTrunc(vec, DL, DstVT);
|
|
|
|
|
return Dst;
|
|
|
|
|
} else if (ScalarDstSize == 16 && ScalarSrcSize == 8) {
|
|
|
|
|
// case 3:
|
|
|
|
|
SDValue *numEle = new SDValue[SrcNumEle];
|
|
|
|
|
for (uint32_t x = 0; x < SrcNumEle; ++x) {
|
|
|
|
|
numEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
|
|
|
|
|
MVT::i8, Src, DAG.getConstant(x, MVT::i32));
|
|
|
|
|
numEle[x] = DAG.getSExtOrTrunc(numEle[x], DL, MVT::i16);
|
|
|
|
|
numEle[x] = DAG.getNode(ISD::AND, DL, MVT::i16, numEle[x],
|
|
|
|
|
DAG.getConstant(0xFF, MVT::i16));
|
|
|
|
|
}
|
|
|
|
|
for (uint32_t x = 1; x < SrcNumEle; x += 2) {
|
|
|
|
|
numEle[x] = DAG.getNode(ISD::SHL, DL, MVT::i16, numEle[x],
|
|
|
|
|
DAG.getConstant(8, MVT::i16));
|
|
|
|
|
numEle[x - 1] = DAG.getNode(ISD::OR, DL, MVT::i16,
|
|
|
|
|
numEle[x-1], numEle[x]);
|
|
|
|
|
}
|
|
|
|
|
if (DstNumEle > 1) {
|
|
|
|
|
// If we are not a scalar i16, the only other case is a
|
|
|
|
|
// v2i16 since we can't have v8i8 at this point, v4i16
|
|
|
|
|
// cannot be generated
|
|
|
|
|
Dst = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i16,
|
|
|
|
|
numEle[0]);
|
|
|
|
|
SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
|
|
|
|
|
getPointerTy(), DAG.getConstant(1, MVT::i32));
|
|
|
|
|
Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i16,
|
|
|
|
|
Dst, numEle[2], idx);
|
|
|
|
|
} else {
|
|
|
|
|
Dst = numEle[0];
|
|
|
|
|
}
|
|
|
|
|
delete [] numEle;
|
|
|
|
|
return Dst;
|
|
|
|
|
} else if (ScalarDstSize == 8 && ScalarSrcSize == 16) {
|
|
|
|
|
// case 4:
|
|
|
|
|
SDValue *numEle = new SDValue[DstNumEle];
|
|
|
|
|
for (uint32_t x = 0; x < SrcNumEle; ++x) {
|
|
|
|
|
numEle[x * 2] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
|
|
|
|
|
MVT::i16, Src, DAG.getConstant(x, MVT::i32));
|
|
|
|
|
numEle[x * 2 + 1] = DAG.getNode(ISD::SRL, DL, MVT::i16,
|
|
|
|
|
numEle[x * 2], DAG.getConstant(8, MVT::i16));
|
|
|
|
|
}
|
|
|
|
|
MVT ty = (SrcNumEle == 1) ? MVT::v2i16 : MVT::v4i16;
|
|
|
|
|
Dst = DAG.getNode(AMDILISD::VBUILD, DL, ty, numEle[0]);
|
|
|
|
|
for (uint32_t x = 1; x < DstNumEle; ++x) {
|
|
|
|
|
SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
|
|
|
|
|
getPointerTy(), DAG.getConstant(x, MVT::i32));
|
|
|
|
|
Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ty,
|
|
|
|
|
Dst, numEle[x], idx);
|
|
|
|
|
}
|
|
|
|
|
delete [] numEle;
|
|
|
|
|
ty = (SrcNumEle == 1) ? MVT::v2i8 : MVT::v4i8;
|
|
|
|
|
Res = DAG.getSExtOrTrunc(Dst, DL, ty);
|
|
|
|
|
return Res;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Res = DAG.getNode(AMDILISD::BITCONV,
|
|
|
|
|
Dst.getDebugLoc(),
|
|
|
|
|
Dst.getValueType(), Src);
|
|
|
|
|
return Res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
SDValue
|
|
|
|
|
AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
|
|
|
|
|
SelectionDAG &DAG) const
|
|
|
|
|