radeon/llvm: Add live-in registers during DAG lowering
Psuedo instructions emulating live-in registers have been removed and their corresponding intrinsics are now being lowered during DAG lowering.
This commit is contained in:
@@ -15,6 +15,7 @@
|
||||
#include "AMDILIntrinsicInfo.h"
|
||||
#include "AMDGPUUtil.h"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
|
||||
|
||||
@@ -317,6 +318,21 @@ void AMDGPUTargetLowering::addLiveIn(MachineInstr * MI,
|
||||
AMDGPU::utilAddLiveIn(MF, MRI, TII, reg, MI->getOperand(0).getReg());
|
||||
}
|
||||
|
||||
SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
|
||||
const TargetRegisterClass *RC,
|
||||
unsigned Reg, EVT VT) const {
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
unsigned VirtualRegister;
|
||||
if (!MRI.isLiveIn(Reg)) {
|
||||
VirtualRegister = MRI.createVirtualRegister(RC);
|
||||
MRI.addLiveIn(Reg, VirtualRegister);
|
||||
} else {
|
||||
VirtualRegister = MRI.getLiveInVirtReg(Reg);
|
||||
}
|
||||
return DAG.getRegister(VirtualRegister, VT);
|
||||
}
|
||||
|
||||
#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
|
||||
|
||||
const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
|
||||
|
||||
@@ -39,6 +39,12 @@ protected:
|
||||
MachineRegisterInfo & MRI, const TargetInstrInfo * TII,
|
||||
unsigned reg) const;
|
||||
|
||||
/// CreateLiveInRegister - Helper function that adds Reg to the LiveIn list
|
||||
/// of the DAG's MachineFunction. This returns a Register SDNode representing
|
||||
/// Reg.
|
||||
SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
|
||||
unsigned Reg, EVT VT) const;
|
||||
|
||||
bool isHWTrueValue(SDValue Op) const;
|
||||
bool isHWFalseValue(SDValue Op) const;
|
||||
|
||||
|
||||
@@ -32,7 +32,6 @@ bool AMDGPU::isPlaceHolderOpcode(unsigned opcode)
|
||||
switch (opcode) {
|
||||
default: return false;
|
||||
case AMDGPU::RETURN:
|
||||
case AMDGPU::LOAD_INPUT:
|
||||
case AMDGPU::LAST:
|
||||
case AMDGPU::MASK_WRITE:
|
||||
case AMDGPU::RESERVE_REG:
|
||||
|
||||
@@ -38,6 +38,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
|
||||
setOperationAction(ISD::FSUB, MVT::f32, Expand);
|
||||
|
||||
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
||||
|
||||
setOperationAction(ISD::ROTL, MVT::i32, Custom);
|
||||
|
||||
@@ -58,24 +59,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
|
||||
|
||||
switch (MI->getOpcode()) {
|
||||
default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
|
||||
case AMDGPU::TGID_X:
|
||||
addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_X);
|
||||
break;
|
||||
case AMDGPU::TGID_Y:
|
||||
addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Y);
|
||||
break;
|
||||
case AMDGPU::TGID_Z:
|
||||
addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Z);
|
||||
break;
|
||||
case AMDGPU::TIDIG_X:
|
||||
addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_X);
|
||||
break;
|
||||
case AMDGPU::TIDIG_Y:
|
||||
addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Y);
|
||||
break;
|
||||
case AMDGPU::TIDIG_Z:
|
||||
addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Z);
|
||||
break;
|
||||
case AMDGPU::NGROUPS_X:
|
||||
lowerImplicitParameter(MI, *BB, MRI, 0);
|
||||
break;
|
||||
@@ -135,14 +118,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPU::LOAD_INPUT:
|
||||
{
|
||||
int64_t RegIndex = MI->getOperand(1).getImm();
|
||||
addLiveIn(MI, MF, MRI, TII,
|
||||
AMDGPU::R600_TReg32RegClass.getRegister(RegIndex));
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPU::MASK_WRITE:
|
||||
{
|
||||
unsigned maskedRegister = MI->getOperand(0).getReg();
|
||||
@@ -264,6 +239,8 @@ void R600TargetLowering::lowerImplicitParameter(MachineInstr *MI, MachineBasicBl
|
||||
// Custom DAG Lowering Operations
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
using namespace llvm::Intrinsic;
|
||||
using namespace llvm::AMDGPUIntrinsic;
|
||||
|
||||
SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
@@ -288,11 +265,47 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
||||
}
|
||||
return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
|
||||
}
|
||||
default: return SDValue();
|
||||
// default for switch(IntrinsicID)
|
||||
default: break;
|
||||
}
|
||||
// break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
|
||||
break;
|
||||
}
|
||||
case ISD::INTRINSIC_WO_CHAIN: {
|
||||
unsigned IntrinsicID =
|
||||
cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
|
||||
EVT VT = Op.getValueType();
|
||||
switch(IntrinsicID) {
|
||||
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
||||
case AMDGPUIntrinsic::R600_load_input: {
|
||||
int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
|
||||
}
|
||||
case r600_read_tgid_x:
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
||||
AMDGPU::T1_X, VT);
|
||||
case r600_read_tgid_y:
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
||||
AMDGPU::T1_Y, VT);
|
||||
case r600_read_tgid_z:
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
||||
AMDGPU::T1_Z, VT);
|
||||
case r600_read_tidig_x:
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
||||
AMDGPU::T0_X, VT);
|
||||
case r600_read_tidig_y:
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
||||
AMDGPU::T0_Y, VT);
|
||||
case r600_read_tidig_z:
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
||||
AMDGPU::T0_Z, VT);
|
||||
}
|
||||
// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
|
||||
break;
|
||||
}
|
||||
} // end switch(Op.getOpcode())
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
|
||||
|
||||
@@ -1105,14 +1105,6 @@ class R600PreloadInst <string asm, Intrinsic intr> : AMDGPUInst <
|
||||
[(set R600_TReg32:$dst, (intr))]
|
||||
>;
|
||||
|
||||
def TGID_X : R600PreloadInst <"TGID_X", int_r600_read_tgid_x>;
|
||||
def TGID_Y : R600PreloadInst <"TGID_Y", int_r600_read_tgid_y>;
|
||||
def TGID_Z : R600PreloadInst <"TGID_Z", int_r600_read_tgid_z>;
|
||||
|
||||
def TIDIG_X : R600PreloadInst <"TIDIG_X", int_r600_read_tidig_x>;
|
||||
def TIDIG_Y : R600PreloadInst <"TIDIG_Y", int_r600_read_tidig_y>;
|
||||
def TIDIG_Z : R600PreloadInst <"TIDIG_Z", int_r600_read_tidig_z>;
|
||||
|
||||
def NGROUPS_X : R600PreloadInst <"NGROUPS_X", int_r600_read_ngroups_x>;
|
||||
def NGROUPS_Y : R600PreloadInst <"NGROUPS_Y", int_r600_read_ngroups_y>;
|
||||
def NGROUPS_Z : R600PreloadInst <"NGROUPS_Z", int_r600_read_ngroups_z>;
|
||||
@@ -1138,13 +1130,6 @@ def R600_LOAD_CONST : AMDGPUShaderInst <
|
||||
[(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))]
|
||||
>;
|
||||
|
||||
def LOAD_INPUT : AMDGPUShaderInst <
|
||||
(outs R600_Reg32:$dst),
|
||||
(ins i32imm:$src),
|
||||
"LOAD_INPUT $dst, $src",
|
||||
[(set R600_Reg32:$dst, (int_R600_load_input imm:$src))]
|
||||
>;
|
||||
|
||||
def RESERVE_REG : AMDGPUShaderInst <
|
||||
(outs),
|
||||
(ins i32imm:$src),
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "SIISelLowering.h"
|
||||
#include "AMDILIntrinsicInfo.h"
|
||||
#include "SIInstrInfo.h"
|
||||
#include "SIRegisterInfo.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
@@ -44,6 +45,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||
|
||||
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
|
||||
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
||||
|
||||
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
|
||||
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
|
||||
|
||||
@@ -128,10 +131,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
||||
lowerUSE_SGPR(MI, BB->getParent(), MRI);
|
||||
MI->eraseFromParent();
|
||||
break;
|
||||
case AMDGPU::VS_LOAD_BUFFER_INDEX:
|
||||
addLiveIn(MI, BB->getParent(), MRI, TII, AMDGPU::VGPR0);
|
||||
MI->eraseFromParent();
|
||||
break;
|
||||
}
|
||||
return BB;
|
||||
}
|
||||
@@ -241,7 +240,20 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
||||
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
|
||||
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
|
||||
case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND);
|
||||
case ISD::INTRINSIC_WO_CHAIN: {
|
||||
unsigned IntrinsicID =
|
||||
cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
|
||||
EVT VT = Op.getValueType();
|
||||
switch (IntrinsicID) {
|
||||
case AMDGPUIntrinsic::SI_vs_load_buffer_index:
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
|
||||
AMDGPU::VGPR0, VT);
|
||||
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// Loweri1ContextSwitch - The function is for lowering i1 operations on the
|
||||
|
||||
@@ -970,15 +970,6 @@ def USE_SGPR_64 : InstSI <
|
||||
[(set (i64 SReg_64:$dst), (load_user_sgpr imm:$src0))]
|
||||
>;
|
||||
|
||||
def VS_LOAD_BUFFER_INDEX : InstSI <
|
||||
(outs VReg_32:$dst),
|
||||
(ins),
|
||||
"VS_LOAD_BUFFER_INDEX",
|
||||
[(set VReg_32:$dst, (int_SI_vs_load_buffer_index))]> {
|
||||
|
||||
field bits<32> Inst = 0;
|
||||
}
|
||||
|
||||
} // end usesCustomInserter
|
||||
|
||||
// SI Psuedo branch instructions. These are used by the CFG structurizer pass
|
||||
@@ -1058,13 +1049,6 @@ def : Pat <
|
||||
(COPY_TO_REGCLASS SReg_64:$vcc, VCCReg)
|
||||
>;
|
||||
|
||||
/*
|
||||
def : Pat<
|
||||
(int_SI_vs_load_buffer_index),
|
||||
(COPY_TO_REGCLASS (f32 VGPR0), VReg_32)
|
||||
>;
|
||||
*/
|
||||
|
||||
/********** ===================== **********/
|
||||
/********** Interpolation Paterns **********/
|
||||
/********** ===================== **********/
|
||||
|
||||
@@ -17,7 +17,7 @@ let TargetPrefix = "SI", isTarget = 1 in {
|
||||
def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
|
||||
/* XXX: We may need a seperate intrinsic here for loading integer values */
|
||||
def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>;
|
||||
def int_SI_vs_load_buffer_index : Intrinsic <[llvm_i32_ty], [], []>;
|
||||
def int_SI_vs_load_buffer_index : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>;
|
||||
def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], []> ;
|
||||
|
||||
def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty]>;
|
||||
|
||||
@@ -229,8 +229,9 @@ static void declare_input_vs(
|
||||
|
||||
/* Load the buffer index is always, which is always stored in VGPR0
|
||||
* for Vertex Shaders */
|
||||
buffer_index_reg = lp_build_intrinsic(base->gallivm->builder,
|
||||
"llvm.SI.vs.load.buffer.index", uint->elem_type, NULL, 0);
|
||||
buffer_index_reg = build_intrinsic(base->gallivm->builder,
|
||||
"llvm.SI.vs.load.buffer.index", uint->elem_type, NULL, 0,
|
||||
LLVMReadNoneAttribute);
|
||||
|
||||
vec4_type = LLVMVectorType(base->elem_type, 4);
|
||||
args[0] = t_list;
|
||||
|
||||
Reference in New Issue
Block a user