radeon/llvm: support for interpolation intrinsics
Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
This commit is contained in:
@@ -346,5 +346,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
|
||||
NODE_NAME_CASE(SMIN)
|
||||
NODE_NAME_CASE(UMIN)
|
||||
NODE_NAME_CASE(URECIP)
|
||||
NODE_NAME_CASE(INTERP)
|
||||
NODE_NAME_CASE(INTERP_P0)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -119,6 +119,8 @@ enum
|
||||
SMIN,
|
||||
UMIN,
|
||||
URECIP,
|
||||
INTERP,
|
||||
INTERP_P0,
|
||||
LAST_AMDGPU_ISD_NUMBER
|
||||
};
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#include "R600Defines.h"
|
||||
#include "R600InstrInfo.h"
|
||||
#include "R600RegisterInfo.h"
|
||||
#include "R600MachineFunctionInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
@@ -29,6 +30,9 @@ private:
|
||||
static char ID;
|
||||
const R600InstrInfo *TII;
|
||||
|
||||
bool ExpandInputPerspective(MachineInstr& MI);
|
||||
bool ExpandInputConstant(MachineInstr& MI);
|
||||
|
||||
public:
|
||||
R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
|
||||
TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
|
||||
@@ -48,6 +52,126 @@ FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
|
||||
return new R600ExpandSpecialInstrsPass(TM);
|
||||
}
|
||||
|
||||
bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI)
|
||||
{
|
||||
const R600RegisterInfo &TRI = TII->getRegisterInfo();
|
||||
if (MI.getOpcode() != AMDGPU::input_perspective)
|
||||
return false;
|
||||
|
||||
MachineBasicBlock::iterator I = &MI;
|
||||
unsigned DstReg = MI.getOperand(0).getReg();
|
||||
R600MachineFunctionInfo *MFI = MI.getParent()->getParent()
|
||||
->getInfo<R600MachineFunctionInfo>();
|
||||
unsigned IJIndexBase;
|
||||
|
||||
// In Evergreen ISA doc section 8.3.2 :
|
||||
// We need to interpolate XY and ZW in two different instruction groups.
|
||||
// An INTERP_* must occupy all 4 slots of an instruction group.
|
||||
// Output of INTERP_XY is written in X,Y slots
|
||||
// Output of INTERP_ZW is written in Z,W slots
|
||||
//
|
||||
// Thus interpolation requires the following sequences :
|
||||
//
|
||||
// AnyGPR.x = INTERP_ZW; (Write Masked Out)
|
||||
// AnyGPR.y = INTERP_ZW; (Write Masked Out)
|
||||
// DstGPR.z = INTERP_ZW;
|
||||
// DstGPR.w = INTERP_ZW; (End of first IG)
|
||||
// DstGPR.x = INTERP_XY;
|
||||
// DstGPR.y = INTERP_XY;
|
||||
// AnyGPR.z = INTERP_XY; (Write Masked Out)
|
||||
// AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG)
|
||||
//
|
||||
switch (MI.getOperand(1).getImm()) {
|
||||
case 0:
|
||||
IJIndexBase = MFI->GetIJPerspectiveIndex();
|
||||
break;
|
||||
case 1:
|
||||
IJIndexBase = MFI->GetIJLinearIndex();
|
||||
break;
|
||||
default:
|
||||
assert(0 && "Unknow ij index");
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < 8; i++) {
|
||||
unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister(
|
||||
2 * IJIndexBase + ((i + 1) % 2));
|
||||
unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister(
|
||||
4 * MI.getOperand(2).getImm());
|
||||
|
||||
unsigned Sel;
|
||||
switch (i % 4) {
|
||||
case 0:Sel = AMDGPU::sel_x;break;
|
||||
case 1:Sel = AMDGPU::sel_y;break;
|
||||
case 2:Sel = AMDGPU::sel_z;break;
|
||||
case 3:Sel = AMDGPU::sel_w;break;
|
||||
default:break;
|
||||
}
|
||||
|
||||
unsigned Res = TRI.getSubReg(DstReg, Sel);
|
||||
|
||||
const MCInstrDesc &Opcode = (i < 4)?
|
||||
TII->get(AMDGPU::INTERP_ZW):
|
||||
TII->get(AMDGPU::INTERP_XY);
|
||||
|
||||
MachineInstr *NewMI = BuildMI(*(MI.getParent()),
|
||||
I, MI.getParent()->findDebugLoc(I),
|
||||
Opcode, Res)
|
||||
.addReg(IJIndex)
|
||||
.addReg(ReadReg)
|
||||
.addImm(0);
|
||||
|
||||
if (!(i> 1 && i < 6)) {
|
||||
TII->addFlag(NewMI, 0, MO_FLAG_MASK);
|
||||
}
|
||||
|
||||
if (i % 4 != 3)
|
||||
TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
|
||||
}
|
||||
|
||||
MI.eraseFromParent();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI)
|
||||
{
|
||||
const R600RegisterInfo &TRI = TII->getRegisterInfo();
|
||||
if (MI.getOpcode() != AMDGPU::input_constant)
|
||||
return false;
|
||||
|
||||
MachineBasicBlock::iterator I = &MI;
|
||||
unsigned DstReg = MI.getOperand(0).getReg();
|
||||
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister(
|
||||
4 * MI.getOperand(1).getImm() + i);
|
||||
|
||||
unsigned Sel;
|
||||
switch (i % 4) {
|
||||
case 0:Sel = AMDGPU::sel_x;break;
|
||||
case 1:Sel = AMDGPU::sel_y;break;
|
||||
case 2:Sel = AMDGPU::sel_z;break;
|
||||
case 3:Sel = AMDGPU::sel_w;break;
|
||||
default:break;
|
||||
}
|
||||
|
||||
unsigned Res = TRI.getSubReg(DstReg, Sel);
|
||||
|
||||
MachineInstr *NewMI = BuildMI(*(MI.getParent()),
|
||||
I, MI.getParent()->findDebugLoc(I),
|
||||
TII->get(AMDGPU::INTERP_LOAD_P0), Res)
|
||||
.addReg(ReadReg)
|
||||
.addImm(0);
|
||||
|
||||
if (i % 4 != 3)
|
||||
TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
|
||||
}
|
||||
|
||||
MI.eraseFromParent();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
||||
const R600RegisterInfo &TRI = TII->getRegisterInfo();
|
||||
@@ -59,6 +183,11 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
while (I != MBB.end()) {
|
||||
MachineInstr &MI = *I;
|
||||
I = llvm::next(I);
|
||||
|
||||
if (ExpandInputPerspective(MI))
|
||||
continue;
|
||||
if (ExpandInputConstant(MI))
|
||||
continue;
|
||||
|
||||
bool IsReduction = TII->isReductionOp(MI.getOpcode());
|
||||
bool IsVector = TII->isVector(MI);
|
||||
|
||||
@@ -44,6 +44,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
|
||||
|
||||
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
|
||||
|
||||
setOperationAction(ISD::ROTL, MVT::i32, Custom);
|
||||
|
||||
@@ -240,6 +241,29 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
|
||||
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
|
||||
break;
|
||||
}
|
||||
case AMDGPU::input_perspective:
|
||||
{
|
||||
R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
|
||||
|
||||
// XXX Be more fine about register reservation
|
||||
for (unsigned i = 0; i < 4; i ++) {
|
||||
unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i);
|
||||
MFI->ReservedRegs.push_back(ReservedReg);
|
||||
}
|
||||
|
||||
switch (MI->getOperand(1).getImm()) {
|
||||
case 0:// Perspective
|
||||
MFI->HasPerspectiveInterpolation = true;
|
||||
break;
|
||||
case 1:// Linear
|
||||
MFI->HasLinearInterpolation = true;
|
||||
break;
|
||||
default:
|
||||
assert(0 && "Unknow ij index");
|
||||
}
|
||||
|
||||
return BB;
|
||||
}
|
||||
}
|
||||
|
||||
MI->eraseFromParent();
|
||||
@@ -294,7 +318,48 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
||||
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
|
||||
}
|
||||
|
||||
case AMDGPUIntrinsic::R600_load_input_perspective: {
|
||||
unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
SDValue FullVector = DAG.getNode(
|
||||
AMDGPUISD::INTERP,
|
||||
DL, MVT::v4f32,
|
||||
DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
|
||||
DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
|
||||
}
|
||||
case AMDGPUIntrinsic::R600_load_input_linear: {
|
||||
unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
SDValue FullVector = DAG.getNode(
|
||||
AMDGPUISD::INTERP,
|
||||
DL, MVT::v4f32,
|
||||
DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
|
||||
DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
|
||||
}
|
||||
case AMDGPUIntrinsic::R600_load_input_constant: {
|
||||
unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
SDValue FullVector = DAG.getNode(
|
||||
AMDGPUISD::INTERP_P0,
|
||||
DL, MVT::v4f32,
|
||||
DAG.getConstant(slot / 4 , MVT::i32));
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
|
||||
DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
|
||||
}
|
||||
case AMDGPUIntrinsic::R600_load_input_position: {
|
||||
unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot);
|
||||
SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
||||
RegIndex, MVT::f32);
|
||||
if ((slot % 4) == 3) {
|
||||
return DAG.getNode(ISD::FDIV,
|
||||
DL, VT,
|
||||
DAG.getConstantFP(1.0f, MVT::f32),
|
||||
Reg);
|
||||
} else {
|
||||
return Reg;
|
||||
}
|
||||
}
|
||||
|
||||
case r600_read_ngroups_x:
|
||||
return LowerImplicitParameter(DAG, VT, DL, 0);
|
||||
case r600_read_ngroups_y:
|
||||
@@ -347,9 +412,30 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
|
||||
switch (N->getOpcode()) {
|
||||
default: return;
|
||||
case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
|
||||
case ISD::INTRINSIC_WO_CHAIN:
|
||||
{
|
||||
unsigned IntrinsicID =
|
||||
cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
|
||||
if (IntrinsicID == AMDGPUIntrinsic::R600_load_input_face) {
|
||||
Results.push_back(LowerInputFace(N, DAG));
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerInputFace(SDNode* Op, SelectionDAG &DAG) const
|
||||
{
|
||||
unsigned slot = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
|
||||
unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot);
|
||||
SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
||||
RegIndex, MVT::f32);
|
||||
return DAG.getNode(ISD::SETCC, Op->getDebugLoc(), MVT::i1,
|
||||
Reg, DAG.getConstantFP(0.0f, MVT::f32),
|
||||
DAG.getCondCode(ISD::SETUGT));
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
return DAG.getNode(
|
||||
|
||||
@@ -58,6 +58,7 @@ private:
|
||||
|
||||
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerInputFace(SDNode *Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
|
||||
};
|
||||
|
||||
|
||||
@@ -258,6 +258,60 @@ def isEGorCayman : Predicate<"Subtarget.device()"
|
||||
def isR600toCayman : Predicate<
|
||||
"Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Interpolation Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def INTERP: SDNode<"AMDGPUISD::INTERP",
|
||||
SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]>
|
||||
>;
|
||||
|
||||
def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0",
|
||||
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisInt<1>]>
|
||||
>;
|
||||
|
||||
let usesCustomInserter = 1 in {
|
||||
def input_perspective : AMDGPUShaderInst <
|
||||
(outs R600_Reg128:$dst),
|
||||
(ins i32imm:$src0, i32imm:$src1),
|
||||
"input_perspective $src0 $src1 : dst",
|
||||
[(set R600_Reg128:$dst, (INTERP (i32 imm:$src0), (i32 imm:$src1)))]>;
|
||||
} // End usesCustomInserter = 1
|
||||
|
||||
def input_constant : AMDGPUShaderInst <
|
||||
(outs R600_Reg128:$dst),
|
||||
(ins i32imm:$src),
|
||||
"input_perspective $src : dst",
|
||||
[(set R600_Reg128:$dst, (INTERP_P0 (i32 imm:$src)))]>;
|
||||
|
||||
|
||||
|
||||
def INTERP_XY : InstR600 <0xD6,
|
||||
(outs R600_Reg32:$dst),
|
||||
(ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags),
|
||||
"INTERP_XY dst",
|
||||
[], AnyALU>
|
||||
{
|
||||
let FlagOperandIdx = 3;
|
||||
}
|
||||
|
||||
def INTERP_ZW : InstR600 <0xD7,
|
||||
(outs R600_Reg32:$dst),
|
||||
(ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags),
|
||||
"INTERP_ZW dst",
|
||||
[], AnyALU>
|
||||
{
|
||||
let FlagOperandIdx = 3;
|
||||
}
|
||||
|
||||
def INTERP_LOAD_P0 : InstR600 <0xE0,
|
||||
(outs R600_Reg32:$dst),
|
||||
(ins R600_Reg32:$src, i32imm:$flags),
|
||||
"INTERP_LOAD_P0 dst",
|
||||
[], AnyALU>
|
||||
{
|
||||
let FlagOperandIdx = 2;
|
||||
}
|
||||
|
||||
let Predicates = [isR600toCayman] in {
|
||||
|
||||
|
||||
@@ -13,6 +13,16 @@
|
||||
|
||||
let TargetPrefix = "R600", isTarget = 1 in {
|
||||
def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_R600_load_input_perspective :
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
|
||||
def int_R600_load_input_constant :
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
|
||||
def int_R600_load_input_linear :
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
|
||||
def int_R600_load_input_position :
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
|
||||
def int_R600_load_input_face :
|
||||
Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>;
|
||||
}
|
||||
|
||||
let TargetPrefix = "r600", isTarget = 1 in {
|
||||
|
||||
@@ -13,4 +13,14 @@
|
||||
|
||||
let TargetPrefix = "R600", isTarget = 1 in {
|
||||
def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_R600_load_input_perspective :
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
|
||||
def int_R600_load_input_constant :
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
|
||||
def int_R600_load_input_linear :
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
|
||||
def int_R600_load_input_position :
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
|
||||
def int_R600_load_input_face :
|
||||
Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>;
|
||||
}
|
||||
|
||||
@@ -12,5 +12,22 @@
|
||||
using namespace llvm;
|
||||
|
||||
R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
|
||||
: MachineFunctionInfo()
|
||||
: MachineFunctionInfo(),
|
||||
HasLinearInterpolation(false),
|
||||
HasPerspectiveInterpolation(false)
|
||||
{ }
|
||||
|
||||
unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const
|
||||
{
|
||||
assert(HasPerspectiveInterpolation);
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned R600MachineFunctionInfo::GetIJLinearIndex() const
|
||||
{
|
||||
assert(HasLinearInterpolation);
|
||||
if (HasPerspectiveInterpolation)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -25,6 +25,11 @@ class R600MachineFunctionInfo : public MachineFunctionInfo {
|
||||
public:
|
||||
R600MachineFunctionInfo(const MachineFunction &MF);
|
||||
std::vector<unsigned> ReservedRegs;
|
||||
bool HasLinearInterpolation;
|
||||
bool HasPerspectiveInterpolation;
|
||||
|
||||
unsigned GetIJLinearIndex() const;
|
||||
unsigned GetIJPerspectiveIndex() const;
|
||||
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user