radeon/llvm: improve select_cc lowering to generate CND* more often
v2: - Simplify isZero()
- Remove a unused function prototype
- Clean whitespace trails
Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
This commit is contained in:
@@ -165,6 +165,20 @@ static void llvm_emit_tex(
|
||||
emit_data->dst_type, args, c, LLVMReadNoneAttribute);
|
||||
}
|
||||
|
||||
static void emit_cndlt(
|
||||
const struct lp_build_tgsi_action * action,
|
||||
struct lp_build_tgsi_context * bld_base,
|
||||
struct lp_build_emit_data * emit_data)
|
||||
{
|
||||
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
|
||||
LLVMValueRef float_zero = lp_build_const_float(
|
||||
bld_base->base.gallivm, 0.0f);
|
||||
LLVMValueRef cmp = LLVMBuildFCmp(
|
||||
builder, LLVMRealULT, emit_data->args[0], float_zero, "");
|
||||
emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
|
||||
cmp, emit_data->args[1], emit_data->args[2], "");
|
||||
}
|
||||
|
||||
static void dp_fetch_args(
|
||||
struct lp_build_tgsi_context * bld_base,
|
||||
struct lp_build_emit_data * emit_data)
|
||||
@@ -241,6 +255,7 @@ LLVMModuleRef r600_tgsi_llvm(
|
||||
bld_base->op_actions[TGSI_OPCODE_TXF].emit = llvm_emit_tex;
|
||||
bld_base->op_actions[TGSI_OPCODE_TXQ].emit = llvm_emit_tex;
|
||||
bld_base->op_actions[TGSI_OPCODE_TXP].emit = llvm_emit_tex;
|
||||
bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cndlt;
|
||||
|
||||
lp_build_tgsi_llvm(bld_base, tokens);
|
||||
|
||||
|
||||
@@ -516,6 +516,17 @@ SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
|
||||
Op.getOperand(1)));
|
||||
}
|
||||
|
||||
bool R600TargetLowering::isZero(SDValue Op) const
|
||||
{
|
||||
if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
|
||||
return Cst->isNullValue();
|
||||
} else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
|
||||
return CstFP->isZero();
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
@@ -568,47 +579,58 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
|
||||
if (isHWTrueValue(False) && isHWFalseValue(True)) {
|
||||
}
|
||||
|
||||
// XXX Check if we can lower this to a SELECT or if it is supported by a native
|
||||
// operation. (The code below does this but we don't have the Instruction
|
||||
// selection patterns to do this yet.
|
||||
#if 0
|
||||
// Check if we can lower this to a native operation.
|
||||
// CND* instructions requires all operands to have the same type,
|
||||
// and RHS to be zero.
|
||||
|
||||
if (isZero(LHS) || isZero(RHS)) {
|
||||
SDValue Cond = (isZero(LHS) ? RHS : LHS);
|
||||
bool SwapTF = false;
|
||||
SDValue Zero = (isZero(LHS) ? LHS : RHS);
|
||||
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
|
||||
if (CompareVT != VT) {
|
||||
True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
|
||||
False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
|
||||
}
|
||||
if (isZero(LHS)) {
|
||||
CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
|
||||
}
|
||||
|
||||
switch (CCOpcode) {
|
||||
case ISD::SETOEQ:
|
||||
case ISD::SETUEQ:
|
||||
case ISD::SETEQ:
|
||||
SwapTF = true;
|
||||
// Fall through
|
||||
case ISD::SETONE:
|
||||
case ISD::SETUNE:
|
||||
case ISD::SETNE:
|
||||
// We can lower to select
|
||||
if (SwapTF) {
|
||||
Temp = True;
|
||||
True = False;
|
||||
False = Temp;
|
||||
}
|
||||
// CNDE
|
||||
return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
|
||||
case ISD::SETULE:
|
||||
case ISD::SETULT:
|
||||
case ISD::SETOLE:
|
||||
case ISD::SETOLT:
|
||||
case ISD::SETLE:
|
||||
case ISD::SETLT:
|
||||
CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
|
||||
Temp = True;
|
||||
True = False;
|
||||
False = Temp;
|
||||
break;
|
||||
default:
|
||||
// Supported by a native operation (CNDGE, CNDGT)
|
||||
return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
|
||||
break;
|
||||
}
|
||||
SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
|
||||
Cond, Zero,
|
||||
True, False,
|
||||
DAG.getCondCode(CCOpcode));
|
||||
return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
// If we make it this for it means we have no native instructions to handle
|
||||
// this SELECT_CC, so we must lower it.
|
||||
SDValue HWTrue, HWFalse;
|
||||
|
||||
if (VT == MVT::f32) {
|
||||
HWTrue = DAG.getConstantFP(1.0f, VT);
|
||||
HWFalse = DAG.getConstantFP(0.0f, VT);
|
||||
} else if (VT == MVT::i32) {
|
||||
HWTrue = DAG.getConstant(-1, VT);
|
||||
HWFalse = DAG.getConstant(0, VT);
|
||||
if (CompareVT == MVT::f32) {
|
||||
HWTrue = DAG.getConstantFP(1.0f, CompareVT);
|
||||
HWFalse = DAG.getConstantFP(0.0f, CompareVT);
|
||||
} else if (CompareVT == MVT::i32) {
|
||||
HWTrue = DAG.getConstant(-1, CompareVT);
|
||||
HWFalse = DAG.getConstant(0, CompareVT);
|
||||
}
|
||||
else {
|
||||
assert(!"Unhandled value type in LowerSELECT_CC");
|
||||
@@ -616,15 +638,12 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
|
||||
|
||||
// Lower this unsupported SELECT_CC into a combination of two supported
|
||||
// SELECT_CC operations.
|
||||
SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
|
||||
SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
|
||||
|
||||
// Convert floating point condition to i1
|
||||
if (VT == MVT::f32) {
|
||||
Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32,
|
||||
DAG.getNode(ISD::FNEG, DL, VT, Cond));
|
||||
}
|
||||
|
||||
return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
|
||||
return DAG.getNode(ISD::SELECT_CC, DL, VT,
|
||||
Cond, HWFalse,
|
||||
True, False,
|
||||
DAG.getCondCode(ISD::SETNE));
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
|
||||
|
||||
@@ -60,6 +60,8 @@ private:
|
||||
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerInputFace(SDNode *Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
bool isZero(SDValue Op) const;
|
||||
};
|
||||
|
||||
} // End namespace llvm;
|
||||
|
||||
@@ -545,7 +545,25 @@ def SETGE_UINT : R600_2OP <
|
||||
def CNDE_INT : R600_3OP <
|
||||
0x1C, "CNDE_INT",
|
||||
[(set (i32 R600_Reg32:$dst),
|
||||
(select R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))]
|
||||
(selectcc (i32 R600_Reg32:$src0), 0,
|
||||
(i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
|
||||
COND_EQ))]
|
||||
>;
|
||||
|
||||
def CNDGE_INT : R600_3OP <
|
||||
0x1E, "CNDGE_INT",
|
||||
[(set (i32 R600_Reg32:$dst),
|
||||
(selectcc (i32 R600_Reg32:$src0), 0,
|
||||
(i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
|
||||
COND_GE))]
|
||||
>;
|
||||
|
||||
def CNDGT_INT : R600_3OP <
|
||||
0x1D, "CNDGT_INT",
|
||||
[(set (i32 R600_Reg32:$dst),
|
||||
(selectcc (i32 R600_Reg32:$src0), 0,
|
||||
(i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
|
||||
COND_GT))]
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@@ -642,18 +660,26 @@ class MULADD_Common <bits<11> inst> : R600_3OP <
|
||||
|
||||
class CNDE_Common <bits<11> inst> : R600_3OP <
|
||||
inst, "CNDE",
|
||||
[(set (f32 R600_Reg32:$dst),
|
||||
(select (i32 (fp_to_sint (fneg R600_Reg32:$src0))), (f32 R600_Reg32:$src2), (f32 R600_Reg32:$src1)))]
|
||||
[(set R600_Reg32:$dst,
|
||||
(selectcc (f32 R600_Reg32:$src0), FP_ZERO,
|
||||
(f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
|
||||
COND_EQ))]
|
||||
>;
|
||||
|
||||
class CNDGT_Common <bits<11> inst> : R600_3OP <
|
||||
inst, "CNDGT",
|
||||
[]
|
||||
[(set R600_Reg32:$dst,
|
||||
(selectcc (f32 R600_Reg32:$src0), FP_ZERO,
|
||||
(f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
|
||||
COND_GT))]
|
||||
>;
|
||||
|
||||
|
||||
class CNDGE_Common <bits<11> inst> : R600_3OP <
|
||||
inst, "CNDGE",
|
||||
[(set R600_Reg32:$dst, (int_AMDGPU_cndlt R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))]
|
||||
[(set R600_Reg32:$dst,
|
||||
(selectcc (f32 R600_Reg32:$src0), FP_ZERO,
|
||||
(f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
|
||||
COND_GE))]
|
||||
>;
|
||||
|
||||
class DOT4_Common <bits<11> inst> : R600_REDUCTION <
|
||||
|
||||
Reference in New Issue
Block a user