radeon/llvm: Lower implicit parameters before ISel

This commit is contained in:
Tom Stellard
2012-08-16 15:49:31 +00:00
parent 0d308ef8fe
commit 5f82d19248
3 changed files with 42 additions and 69 deletions
+37 -49
View File
@@ -58,34 +58,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
switch (MI->getOpcode()) {
default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
case AMDGPU::NGROUPS_X:
lowerImplicitParameter(MI, *BB, MRI, 0);
break;
case AMDGPU::NGROUPS_Y:
lowerImplicitParameter(MI, *BB, MRI, 1);
break;
case AMDGPU::NGROUPS_Z:
lowerImplicitParameter(MI, *BB, MRI, 2);
break;
case AMDGPU::GLOBAL_SIZE_X:
lowerImplicitParameter(MI, *BB, MRI, 3);
break;
case AMDGPU::GLOBAL_SIZE_Y:
lowerImplicitParameter(MI, *BB, MRI, 4);
break;
case AMDGPU::GLOBAL_SIZE_Z:
lowerImplicitParameter(MI, *BB, MRI, 5);
break;
case AMDGPU::LOCAL_SIZE_X:
lowerImplicitParameter(MI, *BB, MRI, 6);
break;
case AMDGPU::LOCAL_SIZE_Y:
lowerImplicitParameter(MI, *BB, MRI, 7);
break;
case AMDGPU::LOCAL_SIZE_Z:
lowerImplicitParameter(MI, *BB, MRI, 8);
break;
case AMDGPU::CLAMP_R600:
MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
@@ -245,27 +217,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
return BB;
}
void R600TargetLowering::lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
MachineRegisterInfo & MRI, unsigned dword_offset) const
{
unsigned ByteOffset = dword_offset * 4;
// We shouldn't be using an offset wider than 16-bits for implicit parameters.
assert(isInt<16>(ByteOffset));
MachineBasicBlock::iterator I = *MI;
unsigned PtrReg = MRI.createVirtualRegister(&AMDGPU::R600_TReg32_XRegClass);
MRI.setRegClass(MI->getOperand(0).getReg(), &AMDGPU::R600_TReg32_XRegClass);
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::COPY), PtrReg)
.addReg(AMDGPU::ZERO);
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::VTX_READ_PARAM_i32_eg))
.addOperand(MI->getOperand(0))
.addReg(PtrReg)
.addImm(ByteOffset);
}
//===----------------------------------------------------------------------===//
// Custom DAG Lowering Operations
//===----------------------------------------------------------------------===//
@@ -306,6 +257,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
unsigned IntrinsicID =
cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
EVT VT = Op.getValueType();
DebugLoc DL = Op.getDebugLoc();
switch(IntrinsicID) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case AMDGPUIntrinsic::R600_load_input: {
@@ -313,6 +265,26 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
}
case r600_read_ngroups_x:
return LowerImplicitParameter(DAG, VT, DL, 0);
case r600_read_ngroups_y:
return LowerImplicitParameter(DAG, VT, DL, 1);
case r600_read_ngroups_z:
return LowerImplicitParameter(DAG, VT, DL, 2);
case r600_read_global_size_x:
return LowerImplicitParameter(DAG, VT, DL, 3);
case r600_read_global_size_y:
return LowerImplicitParameter(DAG, VT, DL, 4);
case r600_read_global_size_z:
return LowerImplicitParameter(DAG, VT, DL, 5);
case r600_read_local_size_x:
return LowerImplicitParameter(DAG, VT, DL, 6);
case r600_read_local_size_y:
return LowerImplicitParameter(DAG, VT, DL, 7);
case r600_read_local_size_z:
return LowerImplicitParameter(DAG, VT, DL, 8);
case r600_read_tgid_x:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_X, VT);
@@ -364,6 +336,22 @@ SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
return Result;
}
SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
DebugLoc DL,
unsigned DwordOffset) const
{
unsigned ByteOffset = DwordOffset * 4;
PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
AMDGPUAS::PARAM_I_ADDRESS);
// We shouldn't be using an offset wider than 16-bits for implicit parameters.
assert(isInt<16>(ByteOffset));
return DAG.getLoad(VT, DL, DAG.getEntryNode(),
DAG.getConstant(ByteOffset, MVT::i32), // PTR
MachinePointerInfo(ConstantPointerNull::get(PtrType)),
false, false, false, 0);
}
SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
{
@@ -33,8 +33,11 @@ private:
/// lowerImplicitParameter - Each OpenCL kernel has nine implicit parameters
/// that are stored in the first nine dwords of a Vertex Buffer. These
/// implicit parameters are represented by pseudo instructions, which are
/// lowered to VTX_READ instructions by this function.
/// implicit parameters are lowered to load instructions which retreive the
/// values from the Vertex Buffer.
SDValue LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
DebugLoc DL, unsigned DwordOffset) const;
void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
MachineRegisterInfo & MRI, unsigned dword_offset) const;
@@ -1133,24 +1133,6 @@ class R600PreloadInst <string asm, Intrinsic intr> : AMDGPUInst <
[(set R600_TReg32:$dst, (intr))]
>;
def NGROUPS_X : R600PreloadInst <"NGROUPS_X", int_r600_read_ngroups_x>;
def NGROUPS_Y : R600PreloadInst <"NGROUPS_Y", int_r600_read_ngroups_y>;
def NGROUPS_Z : R600PreloadInst <"NGROUPS_Z", int_r600_read_ngroups_z>;
def GLOBAL_SIZE_X : R600PreloadInst <"GLOBAL_SIZE_X",
int_r600_read_global_size_x>;
def GLOBAL_SIZE_Y : R600PreloadInst <"GLOBAL_SIZE_Y",
int_r600_read_global_size_y>;
def GLOBAL_SIZE_Z : R600PreloadInst <"GLOBAL_SIZE_Z",
int_r600_read_global_size_z>;
def LOCAL_SIZE_X : R600PreloadInst <"LOCAL_SIZE_X",
int_r600_read_local_size_x>;
def LOCAL_SIZE_Y : R600PreloadInst <"LOCAL_SIZE_Y",
int_r600_read_local_size_y>;
def LOCAL_SIZE_Z : R600PreloadInst <"LOCAL_SIZE_Z",
int_r600_read_local_size_z>;
def R600_LOAD_CONST : AMDGPUShaderInst <
(outs R600_Reg32:$dst),
(ins i32imm:$src0),