Skip to content
Snippets Groups Projects
Commit be8ebeeb authored by Tom Stellard's avatar Tom Stellard
Browse files

R600: Optimize and cleanup KILL on SI


We shouldn't insert KILL optimization if we don't have a
kill instruction at all.

Patch by: Christian König

Tested-by: default avatarMichel Dänzer <michel.daenzer@amd.com>
Reviewed-by: default avatarTom Stellard <thomas.stellard@amd.com>
Signed-off-by: default avatarChristian König <deathsimple@vodafone.de>
llvm-svn: 172845
parent 77543895
No related branches found
No related tags found
No related merge requests found
...@@ -131,9 +131,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( ...@@ -131,9 +131,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
case AMDGPU::SI_INTERP_CONST: case AMDGPU::SI_INTERP_CONST:
LowerSI_INTERP_CONST(MI, *BB, I, MRI); LowerSI_INTERP_CONST(MI, *BB, I, MRI);
break; break;
case AMDGPU::SI_KIL:
LowerSI_KIL(MI, *BB, I, MRI);
break;
case AMDGPU::SI_WQM: case AMDGPU::SI_WQM:
LowerSI_WQM(MI, *BB, I, MRI); LowerSI_WQM(MI, *BB, I, MRI);
break; break;
...@@ -211,17 +208,6 @@ void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI, ...@@ -211,17 +208,6 @@ void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI,
MI->eraseFromParent(); MI->eraseFromParent();
} }
void SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
// Clear this pixel from the exec mask if the operand is negative
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMPX_LE_F32_e32),
AMDGPU::VCC)
.addReg(AMDGPU::SREG_LIT_0)
.addOperand(MI->getOperand(0));
MI->eraseFromParent();
}
void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
unsigned VCC = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); unsigned VCC = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
......
...@@ -34,8 +34,6 @@ class SITargetLowering : public AMDGPUTargetLowering { ...@@ -34,8 +34,6 @@ class SITargetLowering : public AMDGPUTargetLowering {
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB, void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const; MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const;
void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
......
...@@ -1080,13 +1080,6 @@ def SI_INTERP_CONST : InstSI < ...@@ -1080,13 +1080,6 @@ def SI_INTERP_CONST : InstSI <
imm:$attr, SReg_32:$params))] imm:$attr, SReg_32:$params))]
>; >;
def SI_KIL : InstSI <
(outs),
(ins VReg_32:$src),
"SI_KIL $src",
[(int_AMDGPU_kill VReg_32:$src)]
>;
def SI_WQM : InstSI < def SI_WQM : InstSI <
(outs), (outs),
(ins), (ins),
...@@ -1157,11 +1150,23 @@ def SI_END_CF : InstSI < ...@@ -1157,11 +1150,23 @@ def SI_END_CF : InstSI <
[(int_SI_end_cf SReg_64:$saved)] [(int_SI_end_cf SReg_64:$saved)]
>; >;
def SI_KILL : InstSI <
(outs),
(ins VReg_32:$src),
"SI_KIL $src",
[(int_AMDGPU_kill VReg_32:$src)]
>;
} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1 } // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
// Uses = [EXEC], Defs = [EXEC] // Uses = [EXEC], Defs = [EXEC]
} // end IsCodeGenOnly, isPseudo } // end IsCodeGenOnly, isPseudo
def : Pat <
(int_AMDGPU_kilp),
(SI_KILL (V_MOV_IMM_I32 0xbf800000))
>;
/* int_SI_vs_load_input */ /* int_SI_vs_load_input */
def : Pat< def : Pat<
(int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset, (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset,
...@@ -1314,11 +1319,6 @@ def : Pat< ...@@ -1314,11 +1319,6 @@ def : Pat<
(V_MUL_F32_e32 AllReg_32:$src0, (V_RCP_F32_e32 AllReg_32:$src1)) (V_MUL_F32_e32 AllReg_32:$src0, (V_RCP_F32_e32 AllReg_32:$src1))
>; >;
def : Pat <
(int_AMDGPU_kilp),
(SI_KIL (V_MOV_IMM_I32 0xbf800000))
>;
def : Pat < def : Pat <
(int_AMDGPU_cube VReg_128:$src), (int_AMDGPU_cube VReg_128:$src),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
......
...@@ -68,7 +68,10 @@ private: ...@@ -68,7 +68,10 @@ private:
static char ID; static char ID;
const TargetInstrInfo *TII; const TargetInstrInfo *TII;
void Skip(MachineInstr &MI, MachineOperand &To); bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
void Skip(MachineInstr &From, MachineOperand &To);
void SkipIfDead(MachineInstr &MI);
void If(MachineInstr &MI); void If(MachineInstr &MI);
void Else(MachineInstr &MI); void Else(MachineInstr &MI);
...@@ -78,6 +81,7 @@ private: ...@@ -78,6 +81,7 @@ private:
void Loop(MachineInstr &MI); void Loop(MachineInstr &MI);
void EndCf(MachineInstr &MI); void EndCf(MachineInstr &MI);
void Kill(MachineInstr &MI);
void Branch(MachineInstr &MI); void Branch(MachineInstr &MI);
public: public:
...@@ -100,22 +104,29 @@ FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) { ...@@ -100,22 +104,29 @@ FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) {
return new SILowerControlFlowPass(tm); return new SILowerControlFlowPass(tm);
} }
void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) { bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From,
MachineBasicBlock *To) {
unsigned NumInstr = 0; unsigned NumInstr = 0;
for (MachineBasicBlock *MBB = *From.getParent()->succ_begin(); for (MachineBasicBlock *MBB = From; MBB != To && !MBB->succ_empty();
NumInstr < SkipThreshold && MBB != To.getMBB() && !MBB->succ_empty();
MBB = *MBB->succ_begin()) { MBB = *MBB->succ_begin()) {
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
NumInstr < SkipThreshold && I != E; ++I) { NumInstr < SkipThreshold && I != E; ++I) {
if (I->isBundle() || !I->isBundled()) if (I->isBundle() || !I->isBundled())
++NumInstr; if (++NumInstr >= SkipThreshold)
return true;
} }
} }
if (NumInstr < SkipThreshold) return false;
}
void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB()))
return; return;
DebugLoc DL = From.getDebugLoc(); DebugLoc DL = From.getDebugLoc();
...@@ -124,6 +135,38 @@ void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) { ...@@ -124,6 +135,38 @@ void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
.addReg(AMDGPU::EXEC); .addReg(AMDGPU::EXEC);
} }
void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
if (!shouldSkip(&MBB, &MBB.getParent()->back()))
return;
MachineBasicBlock::iterator Insert = &MI;
++Insert;
// If the exec mask is non-zero, skip the next two instructions
BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
.addImm(3)
.addReg(AMDGPU::EXEC);
// Exec mask is zero: Export to NULL target...
BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP))
.addImm(0)
.addImm(0x09) // V_008DFC_SQ_EXP_NULL
.addImm(0)
.addImm(1)
.addImm(1)
.addReg(AMDGPU::SREG_LIT_0)
.addReg(AMDGPU::SREG_LIT_0)
.addReg(AMDGPU::SREG_LIT_0)
.addReg(AMDGPU::SREG_LIT_0);
// ... and terminate wavefront
BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
}
void SILowerControlFlowPass::If(MachineInstr &MI) { void SILowerControlFlowPass::If(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent(); MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc(); DebugLoc DL = MI.getDebugLoc();
...@@ -242,8 +285,28 @@ void SILowerControlFlowPass::Branch(MachineInstr &MI) { ...@@ -242,8 +285,28 @@ void SILowerControlFlowPass::Branch(MachineInstr &MI) {
assert(0); assert(0);
} }
void SILowerControlFlowPass::Kill(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
// Kill is only allowed in pixel shaders
MachineFunction &MF = *MBB.getParent();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
assert(Info->ShaderType == ShaderType::PIXEL);
// Clear this pixel from the exec mask if the operand is negative
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
.addReg(AMDGPU::SREG_LIT_0)
.addOperand(MI.getOperand(0));
MI.eraseFromParent();
}
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
bool HaveCf = false;
bool HaveKill = false;
unsigned Depth = 0;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) { BI != BE; ++BI) {
...@@ -257,6 +320,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { ...@@ -257,6 +320,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
switch (MI.getOpcode()) { switch (MI.getOpcode()) {
default: break; default: break;
case AMDGPU::SI_IF: case AMDGPU::SI_IF:
++Depth;
If(MI); If(MI);
break; break;
...@@ -277,14 +341,26 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { ...@@ -277,14 +341,26 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
break; break;
case AMDGPU::SI_LOOP: case AMDGPU::SI_LOOP:
++Depth;
Loop(MI); Loop(MI);
break; break;
case AMDGPU::SI_END_CF: case AMDGPU::SI_END_CF:
HaveCf = true; if (--Depth == 0 && HaveKill) {
SkipIfDead(MI);
HaveKill = false;
}
EndCf(MI); EndCf(MI);
break; break;
case AMDGPU::SI_KILL:
if (Depth == 0)
SkipIfDead(MI);
else
HaveKill = true;
Kill(MI);
break;
case AMDGPU::S_BRANCH: case AMDGPU::S_BRANCH:
Branch(MI); Branch(MI);
break; break;
...@@ -292,40 +368,5 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { ...@@ -292,40 +368,5 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
} }
} }
// TODO: What is this good for?
unsigned ShaderType = MF.getInfo<SIMachineFunctionInfo>()->ShaderType;
if (HaveCf && ShaderType == ShaderType::PIXEL) {
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
MachineBasicBlock &MBB = *BI;
if (MBB.succ_empty()) {
MachineInstr &MI = *MBB.getFirstNonPHI();
DebugLoc DL = MI.getDebugLoc();
// If the exec mask is non-zero, skip the next two instructions
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
.addImm(3)
.addReg(AMDGPU::EXEC);
// Exec mask is zero: Export to NULL target...
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::EXP))
.addImm(0)
.addImm(0x09) // V_008DFC_SQ_EXP_NULL
.addImm(0)
.addImm(1)
.addImm(1)
.addReg(AMDGPU::SREG_LIT_0)
.addReg(AMDGPU::SREG_LIT_0)
.addReg(AMDGPU::SREG_LIT_0)
.addReg(AMDGPU::SREG_LIT_0);
// ... and terminate wavefront
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ENDPGM));
}
}
}
return true; return true;
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment