Newer
Older
//===-- BranchFolding.cpp - Fold machine code branch instructions ---------===//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//===----------------------------------------------------------------------===//
//
// This pass forwards branches to unconditional branches to make them branch
// directly to the target block. This pass often results in dead MBB's, which
// it then removes.
//
// Note that this pass must be run after register allocation, it cannot handle
// SSA form.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "branchfolding"
#include "BranchFolding.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
STATISTIC(NumBranchOpts, "Number of branches optimized");
STATISTIC(NumTailMerge , "Number of block tails merged");
static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
cl::init(cl::BOU_UNSET), cl::Hidden);
// Throttle for huge numbers of predecessors (compile speed problems)
static cl::opt<unsigned>
cl::desc("Max number of predecessors to consider tail merging"),
cl::init(150), cl::Hidden);
// Heuristic for tail merging (and, inversely, tail duplication).
// TODO: This should be replaced with a target query.
static cl::opt<unsigned>
TailMergeSize("tail-merge-size",
cl::desc("Min number of instructions to consider tail merging"),
cl::init(3), cl::Hidden);
char BranchFolderPass::ID = 0;
FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) {
return new BranchFolderPass(DefaultEnableTailMerge);
}
bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
return OptimizeFunction(MF,
MF.getTarget().getInstrInfo(),
MF.getTarget().getRegisterInfo(),
getAnalysisIfAvailable<MachineModuleInfo>());
}
BranchFolder::BranchFolder(bool defaultEnableTailMerge) {
switch (FlagEnableTailMerge) {
case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
case cl::BOU_TRUE: EnableTailMerge = true; break;
case cl::BOU_FALSE: EnableTailMerge = false; break;
}
/// RemoveDeadBlock - Remove the specified dead machine basic block from the
/// function, updating the CFG.
void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
DEBUG(errs() << "\nRemoving MBB: " << *MBB);
MachineFunction *MF = MBB->getParent();
// drop all successors.
while (!MBB->succ_empty())
MBB->removeSuccessor(MBB->succ_end()-1);
// If there are any labels in the basic block, unregister them from
// MachineModuleInfo.
if (MMI && !MBB->empty()) {
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
I != E; ++I) {
// The label ID # is always operand #0, an immediate.
MMI->InvalidateLabel(I->getOperand(0).getImm());
}
}
/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def
/// followed by terminators, and if the implicitly defined registers are not
/// used by the terminators, remove those implicit_def's. e.g.
/// BB1:
/// r0 = implicit_def
/// r1 = implicit_def
/// br
/// This block can be optimized away later if the implicit instructions are
/// removed.
bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
SmallSet<unsigned, 4> ImpDefRegs;
MachineBasicBlock::iterator I = MBB->begin();
while (I != MBB->end()) {
if (I->getOpcode() != TargetInstrInfo::IMPLICIT_DEF)
break;
unsigned Reg = I->getOperand(0).getReg();
ImpDefRegs.insert(Reg);
for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
unsigned SubReg = *SubRegs; ++SubRegs)
ImpDefRegs.insert(SubReg);
++I;
}
if (ImpDefRegs.empty())
return false;
MachineBasicBlock::iterator FirstTerm = I;
while (I != MBB->end()) {
if (!TII->isUnpredicatedTerminator(I))
return false;
// See if it uses any of the implicitly defined registers.
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
MachineOperand &MO = I->getOperand(i);
if (!MO.isReg() || !MO.isUse())
continue;
unsigned Reg = MO.getReg();
if (ImpDefRegs.count(Reg))
return false;
}
++I;
}
I = MBB->begin();
while (I != FirstTerm) {
MachineInstr *ImpDefMI = &*I;
++I;
MBB->erase(ImpDefMI);
}
return true;
}
/// OptimizeFunction - Perhaps branch folding, tail merging and other
/// CFG optimizations on the given function.
bool BranchFolder::OptimizeFunction(MachineFunction &MF,
const TargetInstrInfo *tii,
const TargetRegisterInfo *tri,
MachineModuleInfo *mmi) {
if (!tii) return false;
TII = tii;
TRI = tri;
MMI = mmi;
RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL;
// Fix CFG. The later algorithms expect it to be right.
bool MadeChange = false;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) {
MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0;
Owen Anderson
committed
SmallVector<MachineOperand, 4> Cond;
if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true))
MadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
MadeChange |= OptimizeImpDefsBlock(MBB);
}
bool MadeChangeThisIteration = true;
while (MadeChangeThisIteration) {
MadeChangeThisIteration = false;
MadeChangeThisIteration |= TailMergeBlocks(MF);
MadeChangeThisIteration |= OptimizeBranches(MF);
MadeChange |= MadeChangeThisIteration;
// See if any jump tables have become mergable or dead as the code generator
// did its thing.
MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
const std::vector<MachineJumpTableEntry> &JTs = JTI->getJumpTables();
if (!JTs.empty()) {
// Figure out how these jump tables should be merged.
std::vector<unsigned> JTMapping;
JTMapping.reserve(JTs.size());
// We always keep the 0th jump table.
JTMapping.push_back(0);
// Scan the jump tables, seeing if there are any duplicates. Note that this
// is N^2, which should be fixed someday.
for (unsigned i = 1, e = JTs.size(); i != e; ++i) {
if (JTs[i].MBBs.empty())
JTMapping.push_back(i);
else
JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs));
}
// If a jump table was merge with another one, walk the function rewriting
// references to jump tables to reference the new JT ID's. Keep track of
// whether we see a jump table idx, if not, we can delete the JT.
BitVector JTIsLive(JTs.size());
for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
BB != E; ++BB) {
for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
I != E; ++I)
for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
MachineOperand &Op = I->getOperand(op);
if (!Op.isJTI()) continue;
unsigned NewIdx = JTMapping[Op.getIndex()];
Op.setIndex(NewIdx);
// Remember that this JT is live.
JTIsLive.set(NewIdx);
}
}
// Finally, remove dead jump tables. This happens either because the
// indirect jump was unreachable (and thus deleted) or because the jump
// table was merged with some other one.
for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)
if (!JTIsLive.test(i)) {
JTI->RemoveJumpTable(i);
MadeChange = true;
}
}
delete RS;
return MadeChange;
}
//===----------------------------------------------------------------------===//
// Tail Merging of Blocks
//===----------------------------------------------------------------------===//
/// HashMachineInstr - Compute a hash value for MI and its operands.
static unsigned HashMachineInstr(const MachineInstr *MI) {
unsigned Hash = MI->getOpcode();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &Op = MI->getOperand(i);
// Merge in bits from the operand if easy.
unsigned OperandHash = 0;
switch (Op.getType()) {
case MachineOperand::MO_Register: OperandHash = Op.getReg(); break;
case MachineOperand::MO_Immediate: OperandHash = Op.getImm(); break;
case MachineOperand::MO_MachineBasicBlock:
OperandHash = Op.getMBB()->getNumber();
case MachineOperand::MO_FrameIndex:
case MachineOperand::MO_ConstantPoolIndex:
case MachineOperand::MO_JumpTableIndex:
OperandHash = Op.getIndex();
break;
case MachineOperand::MO_GlobalAddress:
case MachineOperand::MO_ExternalSymbol:
// Global address / external symbol are too hard, don't bother, but do
// pull in the offset.
OperandHash = Op.getOffset();
break;
default: break;
}
Hash += ((OperandHash << 3) | Op.getType()) << (i&31);
}
return Hash;
}
/// HashEndOfMBB - Hash the last few instructions in the MBB. For blocks
/// with no successors, we hash two instructions, because cross-jumping
/// only saves code when at least two instructions are removed (since a
/// branch must be inserted). For blocks with a successor, one of the
/// two blocks to be tail-merged will end with a branch already, so
/// it gains to cross-jump even for one instruction.
static unsigned HashEndOfMBB(const MachineBasicBlock *MBB,
unsigned minCommonTailLength) {
MachineBasicBlock::const_iterator I = MBB->end();
if (I == MBB->begin())
return 0; // Empty MBB.
--I;
unsigned Hash = HashMachineInstr(I);
if (I == MBB->begin() || minCommonTailLength == 1)
return Hash; // Single instr MBB.
--I;
// Hash in the second-to-last instruction.
Hash ^= HashMachineInstr(I) << 2;
return Hash;
}
/// ComputeCommonTailLength - Given two machine basic blocks, compute the number
/// of instructions they actually have in common together at their end. Return
/// iterators for the first shared instruction in each block.
static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
MachineBasicBlock *MBB2,
MachineBasicBlock::iterator &I1,
MachineBasicBlock::iterator &I2) {
I1 = MBB1->end();
I2 = MBB2->end();
unsigned TailLen = 0;
while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
--I1; --I2;
// FIXME: This check is dubious. It's used to get around a problem where
// people incorrectly expect inline asm directives to remain in the same
// relative order. This is untenable because normal compiler
// optimizations (like this one) may reorder and/or merge these
// directives.
I1->getOpcode() == TargetInstrInfo::INLINEASM) {
++I1; ++I2;
break;
}
++TailLen;
}
return TailLen;
}
/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
/// after it, replacing it with an unconditional branch to NewDest. This
/// returns true if OldInst's block is modified, false if NewDest is modified.
void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
MachineBasicBlock *NewDest) {
MachineBasicBlock *OldBB = OldInst->getParent();
// Remove all the old successors of OldBB from the CFG.
while (!OldBB->succ_empty())
OldBB->removeSuccessor(OldBB->succ_begin());
// Remove all the dead instructions from the end of OldBB.
OldBB->erase(OldInst, OldBB->end());
// If OldBB isn't immediately before OldBB, insert a branch to it.
if (++MachineFunction::iterator(OldBB) != MachineFunction::iterator(NewDest))
TII->InsertBranch(*OldBB, NewDest, 0, SmallVector<MachineOperand, 0>());
OldBB->addSuccessor(NewDest);
++NumTailMerge;
}
Chris Lattner
committed
/// SplitMBBAt - Given a machine basic block and an iterator into it, split the
/// MBB so that the part before the iterator falls into the part starting at the
/// iterator. This returns the new MBB.
MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
MachineBasicBlock::iterator BBI1) {
MachineFunction &MF = *CurMBB.getParent();
Chris Lattner
committed
// Create the fall-through block.
MachineFunction::iterator MBBI = &CurMBB;
MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(CurMBB.getBasicBlock());
CurMBB.getParent()->insert(++MBBI, NewMBB);
Chris Lattner
committed
// Move all the successors of this block to the specified block.
NewMBB->transferSuccessors(&CurMBB);
Chris Lattner
committed
// Add an edge from CurMBB to NewMBB for the fall-through.
CurMBB.addSuccessor(NewMBB);
Chris Lattner
committed
// Splice the code over.
NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
// For targets that use the register scavenger, we must maintain LiveIns.
if (RS) {
RS->enterBasicBlock(&CurMBB);
if (!CurMBB.empty())
RS->forward(prior(CurMBB.end()));
BitVector RegsLiveAtExit(TRI->getNumRegs());
RS->getRegsUsed(RegsLiveAtExit, false);
for (unsigned int i=0, e=TRI->getNumRegs(); i!=e; i++)
if (RegsLiveAtExit[i])
NewMBB->addLiveIn(i);
}
Chris Lattner
committed
return NewMBB;
}
Chris Lattner
committed
/// EstimateRuntime - Make a rough estimate for how long it will take to run
/// the specified code.
static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator E) {
Chris Lattner
committed
unsigned Time = 0;
for (; I != E; ++I) {
const TargetInstrDesc &TID = I->getDesc();
if (TID.isCall())
Chris Lattner
committed
Time += 10;
else if (TID.mayLoad() || TID.mayStore())
Chris Lattner
committed
Time += 2;
else
++Time;
}
return Time;
}
// CurMBB needs to add an unconditional branch to SuccMBB (we removed these
// branches temporarily for tail merging). In the case where CurMBB ends
// with a conditional branch to the next block, optimize by reversing the
// test and conditionally branching to SuccMBB instead.
static void FixTail(MachineBasicBlock* CurMBB, MachineBasicBlock *SuccBB,
const TargetInstrInfo *TII) {
MachineFunction *MF = CurMBB->getParent();
MachineFunction::iterator I = next(MachineFunction::iterator(CurMBB));
MachineBasicBlock *TBB = 0, *FBB = 0;
Owen Anderson
committed
SmallVector<MachineOperand, 4> Cond;
if (I != MF->end() &&
!TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
MachineBasicBlock *NextBB = I;
if (TBB == NextBB && !Cond.empty() && !FBB) {
if (!TII->ReverseBranchCondition(Cond)) {
TII->RemoveBranch(*CurMBB);
TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond);
return;
}
}
}
TII->InsertBranch(*CurMBB, SuccBB, NULL, SmallVector<MachineOperand, 0>());
static bool MergeCompare(const std::pair<unsigned,MachineBasicBlock*> &p,
const std::pair<unsigned,MachineBasicBlock*> &q) {
Dale Johannesen
committed
if (p.first < q.first)
return true;
else if (p.first > q.first)
return false;
else if (p.second->getNumber() < q.second->getNumber())
return true;
else if (p.second->getNumber() > q.second->getNumber())
return false;
// _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
// an object with itself.
#ifndef _GLIBCXX_DEBUG
llvm_unreachable("Predecessor appears twice");
Dale Johannesen
committed
}
/// CountTerminators - Count the number of terminators in the given
/// block and set I to the position of the first non-terminator, if there
/// is one, or MBB->end() otherwise.
static unsigned CountTerminators(MachineBasicBlock *MBB,
MachineBasicBlock::iterator &I) {
I = MBB->end();
unsigned NumTerms = 0;
for (;;) {
if (I == MBB->begin()) {
I = MBB->end();
break;
}
--I;
if (!I->getDesc().isTerminator()) break;
++NumTerms;
}
return NumTerms;
}
/// ProfitableToMerge - Check if two machine basic blocks have a common tail
/// and decide if it would be profitable to merge those tails. Return the
/// length of the common tail and iterators to the first common instruction
/// in each block.
static bool ProfitableToMerge(MachineBasicBlock *MBB1,
MachineBasicBlock *MBB2,
unsigned minCommonTailLength,
unsigned &CommonTailLen,
MachineBasicBlock::iterator &I1,
MachineBasicBlock::iterator &I2,
MachineBasicBlock *SuccBB,
MachineBasicBlock *PredBB) {
CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);
MachineFunction *MF = MBB1->getParent();
if (CommonTailLen == 0)
return false;
// It's almost always profitable to merge any number of non-terminator
// instructions with the block that falls through into the common successor.
if (MBB1 == PredBB || MBB2 == PredBB) {
MachineBasicBlock::iterator I;
unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I);
if (CommonTailLen > NumTerms)
return true;
}
// If both blocks have an unconditional branch temporarily stripped out,
// treat that as an additional common instruction.
if (MBB1 != PredBB && MBB2 != PredBB &&
!MBB1->back().getDesc().isBarrier() &&
!MBB2->back().getDesc().isBarrier())
--minCommonTailLength;
// Check if the common tail is long enough to be worthwhile.
if (CommonTailLen >= minCommonTailLength)
return true;
// If we are optimizing for code size, 1 instruction in common is enough if
// we don't have to split a block. At worst we will be replacing a
// fallthrough into the common tail with a branch, which at worst breaks
// even with falling through into the duplicated common tail.
if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
(I1 == MBB1->begin() || I2 == MBB2->begin()))
return true;
return false;
}
/// ComputeSameTails - Look through all the blocks in MergePotentials that have
/// hash CurHash (guaranteed to match the last element). Build the vector
/// SameTails of all those that have the (same) largest number of instructions
/// in common of any pair of these blocks. SameTails entries contain an
/// iterator into MergePotentials (from which the MachineBasicBlock can be
/// found) and a MachineBasicBlock::iterator into that MBB indicating the
/// instruction where the matching code sequence begins.
/// Order of elements in SameTails is the reverse of the order in which
/// those blocks appear in MergePotentials (where they are not necessarily
/// consecutive).
unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
unsigned minCommonTailLength,
MachineBasicBlock *SuccBB,
MachineBasicBlock *PredBB) {
unsigned maxCommonTailLength = 0U;
SameTails.clear();
MachineBasicBlock::iterator TrialBBI1, TrialBBI2;
MPIterator HighestMPIter = prior(MergePotentials.end());
for (MPIterator CurMPIter = prior(MergePotentials.end()),
B = MergePotentials.begin();
CurMPIter!=B && CurMPIter->first == CurHash;
for (MPIterator I = prior(CurMPIter); I->first == CurHash ; --I) {
unsigned CommonTailLen;
if (ProfitableToMerge(CurMPIter->second, I->second, minCommonTailLength,
CommonTailLen, TrialBBI1, TrialBBI2,
SuccBB, PredBB)) {
if (CommonTailLen > maxCommonTailLength) {
SameTails.clear();
maxCommonTailLength = CommonTailLen;
HighestMPIter = CurMPIter;
SameTails.push_back(std::make_pair(CurMPIter, TrialBBI1));
}
if (HighestMPIter == CurMPIter &&
CommonTailLen == maxCommonTailLength)
SameTails.push_back(std::make_pair(I, TrialBBI2));
}
break;
}
}
return maxCommonTailLength;
}
/// RemoveBlocksWithHash - Remove all blocks with hash CurHash from
/// MergePotentials, restoring branches at ends of blocks as appropriate.
void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
MachineBasicBlock* SuccBB,
MachineBasicBlock* PredBB) {
MPIterator CurMPIter, B;
for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin();
CurMPIter->first == CurHash;
--CurMPIter) {
// Put the unconditional branch back, if we need one.
MachineBasicBlock *CurMBB = CurMPIter->second;
if (SuccBB && CurMBB != PredBB)
FixTail(CurMBB, SuccBB, TII);
if (CurMPIter->first!=CurHash)
CurMPIter++;
MergePotentials.erase(CurMPIter, MergePotentials.end());
/// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist
/// only of the common tail. Create a block that does by splitting one.
unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
unsigned maxCommonTailLength) {
unsigned i, commonTailIndex;
unsigned TimeEstimate = ~0U;
for (i=0, commonTailIndex=0; i<SameTails.size(); i++) {
// Use PredBB if possible; that doesn't require a new branch.
commonTailIndex = i;
break;
}
// Otherwise, make a (fairly bogus) choice based on estimate of
// how long it will take the various blocks to execute.
unsigned t = EstimateRuntime(SameTails[i].first->second->begin(),
SameTails[i].second);
TimeEstimate = t;
commonTailIndex = i;
}
}
MachineBasicBlock::iterator BBI = SameTails[commonTailIndex].second;
MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second;
DEBUG(errs() << "\nSplitting BB#" << MBB->getNumber() << ", size "
MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI);
SameTails[commonTailIndex].first->second = newMBB;
SameTails[commonTailIndex].second = newMBB->begin();
// If we split PredBB, newMBB is the new predecessor.
PredBB = newMBB;
return commonTailIndex;
}
// See if any of the blocks in MergePotentials (which all have a common single
// successor, or all have no successor) can be tail-merged. If there is a
// successor, any blocks in MergePotentials that are not tail-merged and
// are not immediately before Succ must have an unconditional branch to
// Succ added (but the predecessor/successor lists need no adjustment).
// The lone predecessor of Succ that falls through into Succ,
// if any, is given in PredBB.
bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
MachineBasicBlock* PredBB) {
bool MadeChange = false;
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
// Except for the special cases below, tail-merge if there are at least
// this many instructions in common.
unsigned minCommonTailLength = TailMergeSize;
// If there's a successor block, there are some cases which don't require
// new branching and as such are very likely to be profitable.
if (SuccBB) {
if (SuccBB->pred_size() == MergePotentials.size() &&
!MergePotentials[0].second->empty()) {
// If all the predecessors have at least one tail instruction in common,
// merging is very likely to be a win since it won't require an increase
// in static branches, and it will decrease the static instruction count.
bool AllPredsMatch = true;
MachineBasicBlock::iterator FirstNonTerm;
unsigned MinNumTerms = CountTerminators(MergePotentials[0].second,
FirstNonTerm);
if (FirstNonTerm != MergePotentials[0].second->end()) {
for (unsigned i = 1, e = MergePotentials.size(); i != e; ++i) {
MachineBasicBlock::iterator OtherFirstNonTerm;
unsigned NumTerms = CountTerminators(MergePotentials[0].second,
OtherFirstNonTerm);
if (NumTerms < MinNumTerms)
MinNumTerms = NumTerms;
if (OtherFirstNonTerm == MergePotentials[i].second->end() ||
OtherFirstNonTerm->isIdenticalTo(FirstNonTerm)) {
AllPredsMatch = false;
break;
}
}
// If they all have an instruction in common, do any amount of merging.
if (AllPredsMatch)
minCommonTailLength = MinNumTerms + 1;
}
}
}
DEBUG(errs() << "\nTryTailMergeBlocks: ";
for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
errs() << "BB#" << MergePotentials[i].second->getNumber()
<< (i == e-1 ? "" : ", ");
errs() << "\n";
if (SuccBB) {
errs() << " with successor BB#" << SuccBB->getNumber() << '\n';
if (PredBB)
errs() << " which has fall-through from BB#"
<< PredBB->getNumber() << "\n";
}
errs() << "Looking for common tails of at least "
<< minCommonTailLength << " instruction"
<< (minCommonTailLength == 1 ? "" : "s") << '\n';
);
// Sort by hash value so that blocks with identical end sequences sort
// together.
std::stable_sort(MergePotentials.begin(), MergePotentials.end(),MergeCompare);
// Walk through equivalence sets looking for actual exact matches.
while (MergePotentials.size() > 1) {
unsigned CurHash = MergePotentials.back().first;
// Build SameTails, identifying the set of blocks with this hash code
// and with the maximum number of instructions in common.
unsigned maxCommonTailLength = ComputeSameTails(CurHash,
minCommonTailLength,
SuccBB, PredBB);
// If we didn't find any pair that has at least minCommonTailLength
// instructions in common, remove all blocks with this hash code and retry.
if (SameTails.empty()) {
RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
Chris Lattner
committed
// If one of the blocks is the entire common tail (and not the entry
// block, which we can't jump to), we can treat all blocks with this same
// tail at once. Use PredBB if that is one of the possibilities, as that
// will not introduce any extra branches.
MachineBasicBlock *EntryBB = MergePotentials.begin()->second->
getParent()->begin();
unsigned int commonTailIndex, i;
for (commonTailIndex=SameTails.size(), i=0; i<SameTails.size(); i++) {
MachineBasicBlock *MBB = SameTails[i].first->second;
if (MBB == EntryBB)
continue;
if (MBB == PredBB) {
commonTailIndex = i;
if (MBB->begin() == SameTails[i].second)
commonTailIndex = i;
if (commonTailIndex == SameTails.size() ||
(SameTails[commonTailIndex].first->second == PredBB &&
SameTails[commonTailIndex].first->second->begin() !=
SameTails[i].second)) {
// None of the blocks consist entirely of the common tail.
// Split a block so that one does.
commonTailIndex = CreateCommonTailOnlyBlock(PredBB, maxCommonTailLength);
Chris Lattner
committed
}
MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second;
// MBB is common tail. Adjust all other BB's to jump to this one.
// Traversal must be forwards so erases work.
DEBUG(errs() << "\nUsing common tail in BB#" << MBB->getNumber()
<< " for ");
for (unsigned int i=0, e = SameTails.size(); i != e; ++i) {
DEBUG(errs() << "BB#" << SameTails[i].first->second->getNumber()
<< (i == e-1 ? "" : ", "));
// Hack the end off BB i, making it jump to BB commonTailIndex instead.
ReplaceTailWithBranchTo(SameTails[i].second, MBB);
// BB i is no longer a predecessor of SuccBB; remove it from the worklist.
MergePotentials.erase(SameTails[i].first);
// We leave commonTailIndex in the worklist in case there are other blocks
// that match it with a smaller number of instructions.
Chris Lattner
committed
MadeChange = true;
return MadeChange;
}
bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
if (!EnableTailMerge) return false;
bool MadeChange = false;
// First find blocks with no successors.
MergePotentials.clear();
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
if (I->succ_empty())
MergePotentials.push_back(std::make_pair(HashEndOfMBB(I, 2U), I));
// See if we can do any tail merging on those.
if (MergePotentials.size() < TailMergeThreshold &&
MergePotentials.size() >= 2)
MadeChange |= TryTailMergeBlocks(NULL, NULL);
// Look at blocks (IBB) with multiple predecessors (PBB).
// We change each predecessor to a canonical form, by
// (1) temporarily removing any unconditional branch from the predecessor
// to IBB, and
// (2) alter conditional branches so they branch to the other block
// not IBB; this may require adding back an unconditional branch to IBB
// later, where there wasn't one coming in. E.g.
// Bcc IBB
// fallthrough to QBB
// here becomes
// Bncc QBB
// with a conceptual B to IBB after that, which never actually exists.
// With those changes, we see whether the predecessors' tails match,
// and merge them if so. We change things out of canonical form and
// back to the way they were later in the process. (OptimizeBranches
// would undo some of this, but we can't use it, because we'd get into
// a compile-time infinite loop repeatedly doing and undoing the same
// transformations.)
for (MachineFunction::iterator I = next(MF.begin()), E = MF.end();
I != E; ++I) {
if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) {
SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
MachineBasicBlock *IBB = I;
MachineBasicBlock *PredBB = prior(I);
MergePotentials.clear();
for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
P != E2; ++P) {
MachineBasicBlock* PBB = *P;
// Skip blocks that loop to themselves, can't tail merge these.
continue;
// Visit each predecessor only once.
if (!UniquePreds.insert(PBB))
continue;
MachineBasicBlock *TBB = 0, *FBB = 0;
Owen Anderson
committed
SmallVector<MachineOperand, 4> Cond;
if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) {
// Failing case: IBB is the target of a cbr, and
// we cannot reverse the branch.
Owen Anderson
committed
SmallVector<MachineOperand, 4> NewCond(Cond);
if (TII->ReverseBranchCondition(NewCond))
continue;
// This is the QBB case described above
if (!FBB)
FBB = next(MachineFunction::iterator(PBB));
}
// Failing case: the only way IBB can be reached from PBB is via
// exception handling. Happens for landing pads. Would be nice
// to have a bit in the edge so we didn't have to do all this.
if (IBB->isLandingPad()) {
MachineFunction::iterator IP = PBB; IP++;
MachineBasicBlock* PredNextBB = NULL;
if (IP!=MF.end())
PredNextBB = IP;
if (IBB!=PredNextBB) // fallthrough
continue;
} else if (FBB) {
if (TBB!=IBB && FBB!=IBB) // cbr then ubr
continue;
} else if (Cond.empty()) {
if (TBB!=IBB) // ubr
continue;
} else {
if (TBB!=IBB && IBB!=PredNextBB) // cbr
continue;
}
}
// Remove the unconditional branch at the end, if any.
if (TBB && (Cond.empty() || FBB)) {
// reinsert conditional branch only, for now
TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond);
MergePotentials.push_back(std::make_pair(HashEndOfMBB(PBB, 1U), *P));
MadeChange |= TryTailMergeBlocks(IBB, PredBB);
// The 1 below can occur as a result of removing blocks in TryTailMergeBlocks.
PredBB = prior(I); // this may have been changed in TryTailMergeBlocks
if (MergePotentials.size() == 1 &&
MergePotentials.begin()->second != PredBB)
FixTail(MergePotentials.begin()->second, IBB, TII);
}
}
return MadeChange;
}
//===----------------------------------------------------------------------===//
// Branch Optimization
//===----------------------------------------------------------------------===//
bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
bool MadeChange = false;
// Make sure blocks are numbered in order
MF.RenumberBlocks();
for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
MachineBasicBlock *MBB = I++;
MadeChange |= OptimizeBlock(MBB);
// If it is dead, remove it.
RemoveDeadBlock(MBB);
MadeChange = true;
++NumDeadBlocks;
}
}
return MadeChange;
Chris Lattner
committed
/// CanFallThrough - Return true if the specified block (with the specified
/// branch condition) can implicitly transfer control to the block after it by
/// falling off the end of it. This should return false if it can reach the
/// block after it, but it uses an explicit branch to do so (e.g. a table jump).
///
/// True is a conservative answer.
///
bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB,
bool BranchUnAnalyzable,
MachineBasicBlock *FBB,
Owen Anderson
committed
const SmallVectorImpl<MachineOperand> &Cond) {
Chris Lattner
committed
MachineFunction::iterator Fallthrough = CurBB;
++Fallthrough;
// If FallthroughBlock is off the end of the function, it can't fall through.
if (Fallthrough == CurBB->getParent()->end())
return false;
Chris Lattner
committed
// If FallthroughBlock isn't a successor of CurBB, no fallthrough is possible.
if (!CurBB->isSuccessor(Fallthrough))
return false;
// If we couldn't analyze the branch, examine the last instruction.
// If the block doesn't end in a known control barrier, assume fallthrough
// is possible. The isPredicable check is needed because this code can be
// called during IfConversion, where an instruction which is normally a
// Barrier is predicated and thus no longer an actual control barrier. This
// is over-conservative though, because if an instruction isn't actually
// predicated we could still treat it like a barrier.
if (BranchUnAnalyzable)
return CurBB->empty() || !CurBB->back().getDesc().isBarrier() ||
CurBB->back().getDesc().isPredicable();
// If there is no branch, control always falls through.
if (TBB == 0) return true;
// If there is some explicit branch to the fallthrough block, it can obviously
// reach, even though the branch should get folded to fall through implicitly.
Chris Lattner
committed
if (MachineFunction::iterator(TBB) == Fallthrough ||
MachineFunction::iterator(FBB) == Fallthrough)
return true;
// If it's an unconditional branch to some block not the fall through, it
// doesn't fall through.
if (Cond.empty()) return false;
// Otherwise, if it is conditional and has no explicit false block, it falls
// through.
}
Chris Lattner
committed
/// CanFallThrough - Return true if the specified can implicitly transfer
/// control to the block after it by falling off the end of it. This should
/// return false if it can reach the block after it, but it uses an explicit
/// branch to do so (e.g. a table jump).
///
/// True is a conservative answer.
///
bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB) {
MachineBasicBlock *TBB = 0, *FBB = 0;
Owen Anderson
committed
SmallVector<MachineOperand, 4> Cond;
bool CurUnAnalyzable = TII->AnalyzeBranch(*CurBB, TBB, FBB, Cond, true);
Chris Lattner
committed
return CanFallThrough(CurBB, CurUnAnalyzable, TBB, FBB, Cond);
}
/// IsBetterFallthrough - Return true if it would be clearly better to
/// fall-through to MBB1 than to fall through into MBB2. This has to return
/// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will
/// result in infinite loops.
static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
MachineBasicBlock *MBB2) {
Chris Lattner
committed
// Right now, we use a simple heuristic. If MBB2 ends with a call, and
// MBB1 doesn't, we prefer to fall through into MBB1. This allows us to
// optimize branches that branch to either a return block or an assert block
// into a fallthrough to the return.
if (MBB1->empty() || MBB2->empty()) return false;
Christopher Lamb
committed
// If there is a clear successor ordering we make sure that one block
// will fall through to the next
if (MBB1->isSuccessor(MBB2)) return true;
if (MBB2->isSuccessor(MBB1)) return false;
MachineInstr *MBB1I = --MBB1->end();
MachineInstr *MBB2I = --MBB2->end();
return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall();
/// TailDuplicate - MBB unconditionally branches to SuccBB. If it is profitable,
/// duplicate SuccBB's contents in MBB to eliminate the branch.
bool BranchFolder::TailDuplicate(MachineBasicBlock *TailBB,
bool PrevFallsThrough,
MachineFunction &MF) {
// Don't try to tail-duplicate single-block loops.
if (TailBB->isSuccessor(TailBB))
return false;
// Don't tail-duplicate a block which will soon be folded into its successor.
if (TailBB->succ_size() == 1 &&
TailBB->succ_begin()[0]->pred_size() == 1)
return false;
// Duplicate up to one less that the tail-merge threshold, so that we don't
// get into an infinite loop between duplicating and merging. When optimizing