radeon/llvm: Lower VCREATE_v4f32 for R600 and SI
This commit is contained in:
@@ -27,7 +27,6 @@ namespace llvm {
|
||||
FunctionPass *createR600LowerInstructionsPass(TargetMachine &tm);
|
||||
|
||||
FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
|
||||
FunctionPass *createSIConvertToISAPass(TargetMachine &tm);
|
||||
FunctionPass *createSIInitMachineFunctionInfoPass(TargetMachine &tm);
|
||||
FunctionPass *createSILowerShaderInstructionsPass(TargetMachine &tm);
|
||||
FunctionPass *createSIPropagateImmReadsPass(TargetMachine &tm);
|
||||
@@ -35,6 +34,7 @@ namespace llvm {
|
||||
|
||||
FunctionPass *createAMDGPUReorderPreloadInstructionsPass(TargetMachine &tm);
|
||||
|
||||
FunctionPass *createAMDGPULowerInstructionsPass(TargetMachine &tm);
|
||||
FunctionPass *createAMDGPULowerShaderInstructionsPass(TargetMachine &tm);
|
||||
|
||||
FunctionPass *createAMDGPUDelimitInstGroupsPass(TargetMachine &tm);
|
||||
|
||||
+19
-26
@@ -1,4 +1,4 @@
|
||||
//===-- SIConvertToISA.cpp - TODO: Add brief description -------===//
|
||||
//===-- AMDGPULowerInstructions.cpp - TODO: Add brief description -------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
@@ -22,16 +22,16 @@
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
class SIConvertToISAPass : public MachineFunctionPass {
|
||||
class AMDGPULowerInstructionsPass : public MachineFunctionPass {
|
||||
|
||||
private:
|
||||
static char ID;
|
||||
TargetMachine &TM;
|
||||
void convertVCREATE_v4f32(MachineInstr &MI, MachineBasicBlock::iterator I,
|
||||
void lowerVCREATE_v4f32(MachineInstr &MI, MachineBasicBlock::iterator I,
|
||||
MachineBasicBlock &MBB, MachineFunction &MF);
|
||||
|
||||
public:
|
||||
SIConvertToISAPass(TargetMachine &tm) :
|
||||
AMDGPULowerInstructionsPass(TargetMachine &tm) :
|
||||
MachineFunctionPass(ID), TM(tm) { }
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||
@@ -39,13 +39,13 @@ namespace {
|
||||
};
|
||||
} /* End anonymous namespace */
|
||||
|
||||
char SIConvertToISAPass::ID = 0;
|
||||
char AMDGPULowerInstructionsPass::ID = 0;
|
||||
|
||||
FunctionPass *llvm::createSIConvertToISAPass(TargetMachine &tm) {
|
||||
return new SIConvertToISAPass(tm);
|
||||
FunctionPass *llvm::createAMDGPULowerInstructionsPass(TargetMachine &tm) {
|
||||
return new AMDGPULowerInstructionsPass(tm);
|
||||
}
|
||||
|
||||
bool SIConvertToISAPass::runOnMachineFunction(MachineFunction &MF)
|
||||
bool AMDGPULowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
|
||||
{
|
||||
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
||||
BB != BB_E; ++BB) {
|
||||
@@ -56,34 +56,27 @@ bool SIConvertToISAPass::runOnMachineFunction(MachineFunction &MF)
|
||||
|
||||
switch (MI.getOpcode()) {
|
||||
default: continue;
|
||||
case AMDIL::VCREATE_v4f32: convertVCREATE_v4f32(MI, I, MBB, MF);
|
||||
case AMDIL::VCREATE_v4f32: lowerVCREATE_v4f32(MI, I, MBB, MF); break;
|
||||
|
||||
}
|
||||
MI.removeFromParent();
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void SIConvertToISAPass::convertVCREATE_v4f32(MachineInstr &MI,
|
||||
void AMDGPULowerInstructionsPass::lowerVCREATE_v4f32(MachineInstr &MI,
|
||||
MachineBasicBlock::iterator I, MachineBasicBlock &MBB, MachineFunction &MF)
|
||||
{
|
||||
MachineInstrBuilder implicitDef;
|
||||
MachineInstrBuilder insertSubreg;
|
||||
MachineRegisterInfo & MRI = MF.getRegInfo();
|
||||
unsigned tmp = MRI.createVirtualRegister(&AMDIL::VReg_128RegClass);
|
||||
unsigned tmp = MRI.createVirtualRegister(
|
||||
MRI.getRegClass(MI.getOperand(0).getReg()));
|
||||
|
||||
implicitDef = BuildMI(MF, MBB.findDebugLoc(I),
|
||||
TM.getInstrInfo()->get(AMDIL::IMPLICIT_DEF), tmp);
|
||||
BuildMI(MBB, I, DebugLoc(), TM.getInstrInfo()->get(AMDIL::IMPLICIT_DEF), tmp);
|
||||
|
||||
MRI.setRegClass(MI.getOperand(1).getReg(), &AMDIL::VReg_32RegClass);
|
||||
insertSubreg = BuildMI(MF, MBB.findDebugLoc(I),
|
||||
TM.getInstrInfo()->get(AMDIL::INSERT_SUBREG))
|
||||
.addOperand(MI.getOperand(0))
|
||||
.addReg(tmp)
|
||||
.addOperand(MI.getOperand(1))
|
||||
.addImm(AMDIL::sel_x);
|
||||
|
||||
MBB.insert(I, implicitDef);
|
||||
MBB.insert(I, insertSubreg);
|
||||
BuildMI(MBB, I, DebugLoc(), TM.getInstrInfo()->get(AMDIL::INSERT_SUBREG))
|
||||
.addOperand(MI.getOperand(0))
|
||||
.addReg(tmp)
|
||||
.addOperand(MI.getOperand(1))
|
||||
.addImm(AMDIL::sel_x);
|
||||
}
|
||||
@@ -152,8 +152,8 @@ bool AMDGPUPassConfig::addPreRegAlloc() {
|
||||
} else {
|
||||
PM.add(createSILowerShaderInstructionsPass(*TM));
|
||||
PM.add(createSIAssignInterpRegsPass(*TM));
|
||||
PM.add(createSIConvertToISAPass(*TM));
|
||||
}
|
||||
PM.add(createAMDGPULowerInstructionsPass(*TM));
|
||||
PM.add(createAMDGPUConvertToISAPass(*TM));
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -56,6 +56,7 @@ CPP_SOURCES := \
|
||||
AMDGPUTargetMachine.cpp \
|
||||
AMDGPUISelLowering.cpp \
|
||||
AMDGPUConvertToISA.cpp \
|
||||
AMDGPULowerInstructions.cpp \
|
||||
AMDGPULowerShaderInstructions.cpp \
|
||||
AMDGPUReorderPreloadInstructions.cpp \
|
||||
AMDGPUInstrInfo.cpp \
|
||||
@@ -70,7 +71,6 @@ CPP_SOURCES := \
|
||||
R600RegisterInfo.cpp \
|
||||
SIAssignInterpRegs.cpp \
|
||||
SICodeEmitter.cpp \
|
||||
SIConvertToISA.cpp \
|
||||
SIInstrInfo.cpp \
|
||||
SIISelLowering.cpp \
|
||||
SILowerShaderInstructions.cpp \
|
||||
|
||||
@@ -400,11 +400,8 @@ void R600CodeEmitter::emitDst(const MachineOperand & MO)
|
||||
emitByte(getHWReg(MO.getReg()));
|
||||
|
||||
/* Emit the element of the destination register (1 byte)*/
|
||||
const MachineInstr * parent = MO.getParent();
|
||||
if (isReduction) {
|
||||
emitByte(reductionElement);
|
||||
} else if (parent->getOpcode() == AMDIL::VCREATE_v4f32) {
|
||||
emitByte(ELEMENT_X);
|
||||
} else {
|
||||
emitByte(TRI->getHWRegChan(MO.getReg()));
|
||||
}
|
||||
@@ -631,7 +628,6 @@ unsigned int R600CodeEmitter::getHWInst(const MachineInstr &MI)
|
||||
switch (MI.getOpcode()) {
|
||||
case AMDIL::STORE_OUTPUT:
|
||||
case AMDIL::VCREATE_v4i32:
|
||||
case AMDIL::VCREATE_v4f32:
|
||||
case AMDIL::LOADCONST_i32:
|
||||
case AMDIL::LOADCONST_f32:
|
||||
case AMDIL::MOVE_v4i32:
|
||||
|
||||
Reference in New Issue
Block a user