"llvm/git@repo.hca.bsc.es:rferrer/llvm-epi-0.8.git" did not exist on "77ae4d358f53085075d97bcd6cb2a04145c2f8b4"
Newer
Older
Chris Lattner
committed
//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
Chris Lattner
committed
//
//===----------------------------------------------------------------------===//
//
// This pass munges the code in the input function to better prepare it for
// SelectionDAG-based code generation. This works around limitations in it's
// basic-block-at-a-time approach. It should eventually be removed.
Chris Lattner
committed
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "codegenprepare"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/InlineAsm.h"
Chris Lattner
committed
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
Chris Lattner
committed
#include "llvm/Pass.h"
Cameron Zwarich
committed
#include "llvm/Analysis/Dominators.h"
Owen Anderson
committed
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ProfileInfo.h"
Chris Lattner
committed
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Transforms/Utils/AddrModeMatcher.h"
Chris Lattner
committed
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/ADT/DenseMap.h"
Chris Lattner
committed
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CallSite.h"
Evan Cheng
committed
#include "llvm/Support/CommandLine.h"
Evan Cheng
committed
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/PatternMatch.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/ValueHandle.h"
Chris Lattner
committed
using namespace llvm;
using namespace llvm::PatternMatch;
Chris Lattner
committed
STATISTIC(NumBlocksElim, "Number of blocks eliminated");
STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
"sunken Cmps");
STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
"of sunken Casts");
STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
"computations were sunk");
STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
STATISTIC(NumRetsDup, "Number of return instructions duplicated");
static cl::opt<bool> DisableBranchOpts(
"disable-cgp-branch-opts", cl::Hidden, cl::init(false),
cl::desc("Disable branch optimizations in CodeGenPrepare"));
namespace {
class CodeGenPrepare : public FunctionPass {
Chris Lattner
committed
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
const TargetLowering *TLI;
Cameron Zwarich
committed
DominatorTree *DT;
ProfileInfo *PFI;
/// CurInstIterator - As we scan instructions optimizing them, this is the
/// next instruction to optimize. Xforms that can invalidate this should
/// update it.
BasicBlock::iterator CurInstIterator;
Evan Cheng
committed
/// Keeps track of non-local addresses that have been sunk into a block.
/// This allows us to avoid inserting duplicate code for blocks with
/// multiple load/stores of the same address.
Cameron Zwarich
committed
DenseMap<Value*, Value*> SunkAddrs;
/// UpdateDT - If CFG is modified in anyway, dominator tree may need to
/// be updated.
bool UpdateDT;
Chris Lattner
committed
public:
static char ID; // Pass identification, replacement for typeid
explicit CodeGenPrepare(const TargetLowering *tli = 0)
Owen Anderson
committed
: FunctionPass(ID), TLI(tli) {
initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
}
Chris Lattner
committed
bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
Cameron Zwarich
committed
AU.addPreserved<DominatorTree>();
AU.addPreserved<ProfileInfo>();
}
Chris Lattner
committed
private:
Chris Lattner
committed
bool EliminateMostlyEmptyBlocks(Function &F);
bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
void EliminateMostlyEmptyBlock(BasicBlock *BB);
Chris Lattner
committed
bool OptimizeBlock(BasicBlock &BB);
Cameron Zwarich
committed
bool OptimizeInst(Instruction *I);
bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy);
bool OptimizeInlineAsmInst(CallInst *CS);
bool OptimizeCallInst(CallInst *CI);
bool MoveExtToFormExtLoad(Instruction *I);
Evan Cheng
committed
bool OptimizeExtUses(Instruction *I);
bool DupRetToEnableTailCallOpts(ReturnInst *RI);
Chris Lattner
committed
};
}
INITIALIZE_PASS(CodeGenPrepare, "codegenprepare",
"Optimize for code generation", false, false)
Chris Lattner
committed
FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
return new CodeGenPrepare(TLI);
}
bool CodeGenPrepare::runOnFunction(Function &F) {
bool EverMadeChange = false;
UpdateDT = false;
Cameron Zwarich
committed
DT = getAnalysisIfAvailable<DominatorTree>();
PFI = getAnalysisIfAvailable<ProfileInfo>();
Chris Lattner
committed
// First pass, eliminate blocks that contain only PHI nodes and an
// unconditional branch.
EverMadeChange |= EliminateMostlyEmptyBlocks(F);
Chris Lattner
committed
bool MadeChange = true;
Chris Lattner
committed
while (MadeChange) {
MadeChange = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
BasicBlock *BB = I++;
Chris Lattner
committed
MadeChange |= OptimizeBlock(*BB);
}
Chris Lattner
committed
EverMadeChange |= MadeChange;
}
Cameron Zwarich
committed
SunkAddrs.clear();
if (!DisableBranchOpts) {
MadeChange = false;
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
MadeChange |= ConstantFoldTerminator(BB);
if (MadeChange)
UpdateDT = true;
if (UpdateDT && DT)
DT->DT->recalculate(F);
Chris Lattner
committed
return EverMadeChange;
}
/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
/// debug info directives, and an unconditional branch. Passes before isel
/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
/// isel. Start by eliminating these blocks so we can split them the way we
/// want them.
Chris Lattner
committed
bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
bool MadeChange = false;
// Note that this intentionally skips the entry block.
for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
BasicBlock *BB = I++;
// If this block doesn't end with an uncond branch, ignore it.
BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
if (!BI || !BI->isUnconditional())
continue;
// If the instruction before the branch (skipping debug info) isn't a phi
// node, then other stuff is happening here.
Chris Lattner
committed
BasicBlock::iterator BBI = BI;
if (BBI != BB->begin()) {
--BBI;
while (isa<DbgInfoIntrinsic>(BBI)) {
if (BBI == BB->begin())
break;
--BBI;
}
if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
continue;
Chris Lattner
committed
}
Chris Lattner
committed
// Do not break infinite loops.
BasicBlock *DestBB = BI->getSuccessor(0);
if (DestBB == BB)
continue;
Chris Lattner
committed
if (!CanMergeBlocks(BB, DestBB))
continue;
Chris Lattner
committed
EliminateMostlyEmptyBlock(BB);
MadeChange = true;
}
return MadeChange;
}
/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
/// single uncond branch between them, and BB contains no other non-phi
/// instructions.
bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
const BasicBlock *DestBB) const {
// We only want to eliminate blocks whose phi nodes are used by phi nodes in
// the successor. If there are more complex condition (e.g. preheaders),
// don't mess around with them.
BasicBlock::const_iterator BBI = BB->begin();
while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
for (Value::const_use_iterator UI = PN->use_begin(), E = PN->use_end();
Chris Lattner
committed
UI != E; ++UI) {
const Instruction *User = cast<Instruction>(*UI);
if (User->getParent() != DestBB || !isa<PHINode>(User))
return false;
// If User is inside DestBB block and it is a PHINode then check
// incoming value. If incoming value is not from BB then this is
// a complex condition (e.g. preheaders) we want to avoid here.
if (User->getParent() == DestBB) {
if (const PHINode *UPN = dyn_cast<PHINode>(User))
for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
if (Insn && Insn->getParent() == BB &&
Insn->getParent() != UPN->getIncomingBlock(I))
return false;
}
}
Chris Lattner
committed
}
}
Chris Lattner
committed
// If BB and DestBB contain any common predecessors, then the phi nodes in BB
// and DestBB may have conflicting incoming values for the block. If so, we
// can't merge the block.
const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
if (!DestBBPN) return true; // no conflict.
Chris Lattner
committed
// Collect the preds of BB.
SmallPtrSet<const BasicBlock*, 16> BBPreds;
Chris Lattner
committed
if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
// It is faster to get preds from a PHI than with pred_iterator.
for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
BBPreds.insert(BBPN->getIncomingBlock(i));
} else {
BBPreds.insert(pred_begin(BB), pred_end(BB));
}
Chris Lattner
committed
// Walk the preds of DestBB.
for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
if (BBPreds.count(Pred)) { // Common predecessor?
BBI = DestBB->begin();
while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
const Value *V1 = PN->getIncomingValueForBlock(Pred);
const Value *V2 = PN->getIncomingValueForBlock(BB);
Chris Lattner
committed
// If V2 is a phi node in BB, look up what the mapped value will be.
if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
if (V2PN->getParent() == BB)
V2 = V2PN->getIncomingValueForBlock(Pred);
Chris Lattner
committed
// If there is a conflict, bail out.
if (V1 != V2) return false;
}
}
}
return true;
}
/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
/// an unconditional branch in it.
void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
BranchInst *BI = cast<BranchInst>(BB->getTerminator());
BasicBlock *DestBB = BI->getSuccessor(0);
DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
Chris Lattner
committed
// If the destination block has a single pred, then this is a trivial edge,
// just collapse it.
if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
if (SinglePred != DestBB) {
// Remember if SinglePred was the entry block of the function. If so, we
// will need to move BB back to the entry position.
bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
MergeBasicBlockIntoOnlyPred(DestBB, this);
if (isEntry && BB != &BB->getParent()->getEntryBlock())
BB->moveBefore(&BB->getParent()->getEntryBlock());
DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
return;
}
Chris Lattner
committed
}
Chris Lattner
committed
// Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
// to handle the new incoming edges it is about to have.
PHINode *PN;
for (BasicBlock::iterator BBI = DestBB->begin();
(PN = dyn_cast<PHINode>(BBI)); ++BBI) {
// Remove the incoming value for BB, and remember it.
Value *InVal = PN->removeIncomingValue(BB, false);
Chris Lattner
committed
// Two options: either the InVal is a phi node defined in BB or it is some
// value that dominates BB.
PHINode *InValPhi = dyn_cast<PHINode>(InVal);
if (InValPhi && InValPhi->getParent() == BB) {
// Add all of the input values of the input PHI as inputs of this phi.
for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
PN->addIncoming(InValPhi->getIncomingValue(i),
InValPhi->getIncomingBlock(i));
} else {
// Otherwise, add one instance of the dominating value for each edge that
// we will be adding.
if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
} else {
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
PN->addIncoming(InVal, *PI);
}
}
}
Chris Lattner
committed
// The PHIs are now updated, change everything that refers to BB to use
// DestBB and remove BB.
BB->replaceAllUsesWith(DestBB);
Cameron Zwarich
committed
if (DT) {
BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock();
BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
DT->changeImmediateDominator(DestBB, NewIDom);
DT->eraseNode(BB);
}
if (PFI) {
PFI->replaceAllUses(BB, DestBB);
PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
Chris Lattner
committed
BB->eraseFromParent();
++NumBlocksElim;
DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
Chris Lattner
committed
}
/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
/// sink it into user blocks to reduce the number of virtual
/// registers that must be created and coalesced.
///
/// Return true if any changes are made.
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
// If this is a noop copy,
Owen Anderson
committed
EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
EVT DstVT = TLI.getValueType(CI->getType());
// This is an fp<->int conversion?
if (SrcVT.isInteger() != DstVT.isInteger())
Chris Lattner
committed
return false;
// If this is an extension, it will be a zero or sign extension, which
// isn't a noop.
if (SrcVT.bitsLT(DstVT)) return false;
// If these values will be promoted, find out what they will be promoted
// to. This helps us consider truncates on PPC as noop copies when they
// are.
if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote)
Owen Anderson
committed
SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
if (TLI.getTypeAction(DstVT) == TargetLowering::Promote)
Owen Anderson
committed
DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
// If, after promotion, these are the same types, this is a noop copy.
if (SrcVT != DstVT)
Chris Lattner
committed
return false;
Chris Lattner
committed
BasicBlock *DefBB = CI->getParent();
Chris Lattner
committed
/// InsertedCasts - Only insert a cast in each block once.
DenseMap<BasicBlock*, CastInst*> InsertedCasts;
Chris Lattner
committed
bool MadeChange = false;
for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
Chris Lattner
committed
UI != E; ) {
Use &TheUse = UI.getUse();
Instruction *User = cast<Instruction>(*UI);
Chris Lattner
committed
// Figure out which BB this cast is used in. For PHI's this is the
// appropriate predecessor block.
BasicBlock *UserBB = User->getParent();
if (PHINode *PN = dyn_cast<PHINode>(User)) {
UserBB = PN->getIncomingBlock(UI);
Chris Lattner
committed
}
Chris Lattner
committed
// Preincrement use iterator so we don't invalidate it.
++UI;
Chris Lattner
committed
// If this user is in the same block as the cast, don't change the cast.
if (UserBB == DefBB) continue;
Chris Lattner
committed
// If we have already inserted a cast into this block, use it.
CastInst *&InsertedCast = InsertedCasts[UserBB];
if (!InsertedCast) {
BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
InsertedCast =
CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
Chris Lattner
committed
InsertPt);
MadeChange = true;
}
// Replace a use of the cast with a use of the new cast.
Chris Lattner
committed
TheUse = InsertedCast;
++NumCastUses;
Chris Lattner
committed
}
Chris Lattner
committed
// If we removed all uses, nuke the cast.
Chris Lattner
committed
CI->eraseFromParent();
Chris Lattner
committed
return MadeChange;
}
/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
/// the number of virtual registers that must be created and coalesced. This is
/// a clear win except on targets with multiple condition code registers
/// (PowerPC), where it might lose; some adjustment may be wanted there.
///
/// Return true if any changes are made.
static bool OptimizeCmpExpression(CmpInst *CI) {
BasicBlock *DefBB = CI->getParent();
/// InsertedCmp - Only insert a cmp in each block once.
DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
bool MadeChange = false;
for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
UI != E; ) {
Use &TheUse = UI.getUse();
Instruction *User = cast<Instruction>(*UI);
// Preincrement use iterator so we don't invalidate it.
++UI;
// Don't bother for PHI nodes.
if (isa<PHINode>(User))
continue;
// Figure out which BB this cmp is used in.
BasicBlock *UserBB = User->getParent();
// If this user is in the same block as the cmp, don't change the cmp.
if (UserBB == DefBB) continue;
// If we have already inserted a cmp into this block, use it.
CmpInst *&InsertedCmp = InsertedCmps[UserBB];
if (!InsertedCmp) {
BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
InsertedCmp =
CmpInst::Create(CI->getOpcode(),
Owen Anderson
committed
CI->getPredicate(), CI->getOperand(0),
CI->getOperand(1), "", InsertPt);
MadeChange = true;
}
// Replace a use of the cmp with a use of the new cmp.
TheUse = InsertedCmp;
++NumCmpUses;
// If we removed all uses, nuke the cmp.
if (CI->use_empty())
CI->eraseFromParent();
return MadeChange;
}
Benjamin Kramer
committed
namespace {
class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls {
protected:
void replaceCall(Value *With) {
CI->replaceAllUsesWith(With);
CI->eraseFromParent();
}
bool isFoldable(unsigned SizeCIOp, unsigned, bool) const {
if (ConstantInt *SizeCI =
dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp)))
return SizeCI->isAllOnesValue();
Benjamin Kramer
committed
return false;
}
};
} // end anonymous namespace
bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
BasicBlock *BB = CI->getParent();
// Lower inline assembly if we can.
// If we found an inline asm expession, and if the target knows how to
// lower it to normal LLVM code, do so now.
if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
if (TLI->ExpandInlineAsm(CI)) {
// Avoid invalidating the iterator.
CurInstIterator = BB->begin();
// Avoid processing instructions out of order, which could cause
// reuse before a value is defined.
SunkAddrs.clear();
return true;
}
// Sink address computing for memory operands into the block.
if (OptimizeInlineAsmInst(CI))
return true;
}
// Lower all uses of llvm.objectsize.*
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
const Type *ReturnTy = CI->getType();
Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
// Substituting this can cause recursive simplifications, which can
// invalidate our iterator. Use a WeakVH to hold onto it in case this
// happens.
WeakVH IterHandle(CurInstIterator);
ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, DT);
// If the iterator instruction was recursively deleted, start over at the
// start of the block.
if (IterHandle != CurInstIterator) {
CurInstIterator = BB->begin();
SunkAddrs.clear();
}
return true;
}
// From here on out we're working with named functions.
if (CI->getCalledFunction() == 0) return false;
// We'll need TargetData from here on out.
const TargetData *TD = TLI ? TLI->getTargetData() : 0;
if (!TD) return false;
Benjamin Kramer
committed
// Lower all default uses of _chk calls. This is very similar
// to what InstCombineCalls does, but here we are only lowering calls
// that have the default "don't know" as the objectsize. Anything else
// should be left alone.
Benjamin Kramer
committed
CodeGenPrepareFortifiedLibCalls Simplifier;
return Simplifier.fold(CI, TD);
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
/// instructions to the predecessor to enable tail call optimizations. The
/// case it is currently looking for is:
/// bb0:
/// %tmp0 = tail call i32 @f0()
/// br label %return
/// bb1:
/// %tmp1 = tail call i32 @f1()
/// br label %return
/// bb2:
/// %tmp2 = tail call i32 @f2()
/// br label %return
/// return:
/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
/// ret i32 %retval
///
/// =>
///
/// bb0:
/// %tmp0 = tail call i32 @f0()
/// ret i32 %tmp0
/// bb1:
/// %tmp1 = tail call i32 @f1()
/// ret i32 %tmp1
/// bb2:
/// %tmp2 = tail call i32 @f2()
/// ret i32 %tmp2
///
bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
Value *V = RI->getReturnValue();
if (!V)
return false;
if (PHINode *PN = dyn_cast<PHINode>(V)) {
BasicBlock *BB = RI->getParent();
if (PN->getParent() != BB)
return false;
// It's not safe to eliminate the sign / zero extension of the return value.
// See llvm::isInTailCallPosition().
const Function *F = BB->getParent();
unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
return false;
// Make sure there are no instructions between PHI and return.
BasicBlock::iterator BI = PN;
do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
if (&*BI != RI)
return false;
/// Only dup the ReturnInst if the CallInst is likely to be emitted as a
/// tail call.
SmallVector<CallInst*, 4> TailCalls;
for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
// Make sure the phi value is indeed produced by the tail call.
if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
TLI->mayBeEmittedAsTailCall(CI))
TailCalls.push_back(CI);
}
bool Changed = false;
for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
CallInst *CI = TailCalls[i];
CallSite CS(CI);
// Conservatively require the attributes of the call to match those of
// the return. Ignore noalias because it doesn't affect the call sequence.
unsigned CalleeRetAttr = CS.getAttributes().getRetAttributes();
if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
continue;
// Make sure the call instruction is followed by an unconditional branch
// to the return block.
BasicBlock *CallBB = CI->getParent();
BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
continue;
// Duplicate the return into CallBB.
(void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
UpdateDT = Changed = true;
++NumRetsDup;
}
// If we eliminated all predecessors of the block, delete the block now.
if (Changed && pred_begin(BB) == pred_end(BB))
BB->eraseFromParent();
return Changed;
}
return false;
}
//===----------------------------------------------------------------------===//
// Memory Optimization
//===----------------------------------------------------------------------===//
/// IsNonLocalValue - Return true if the specified values are defined in a
/// different basic block than BB.
static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
if (Instruction *I = dyn_cast<Instruction>(V))
return I->getParent() != BB;
return false;
}
/// OptimizeMemoryInst - Load and Store Instructions often have
/// addressing modes that can do significant amounts of computation. As such,
/// instruction selection will try to get the load or store to do as much
/// computation as possible for the program. The problem is that isel can only
/// see within a single block. As such, we sink as much legal addressing mode
/// stuff into the block as possible.
///
/// This method is used to optimize both load/store and inline asms with memory
/// operands.
bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
Value *Repl = Addr;
Owen Anderson
committed
// Try to collapse single-value PHI nodes. This is necessary to undo
// unprofitable PRE transformations.
Cameron Zwarich
committed
SmallVector<Value*, 8> worklist;
SmallPtrSet<Value*, 16> Visited;
worklist.push_back(Addr);
// Use a worklist to iteratively look through PHI nodes, and ensure that
// the addressing mode obtained from the non-PHI roots of the graph
// are equivalent.
Value *Consensus = 0;
Cameron Zwarich
committed
unsigned NumUsesConsensus = 0;
Cameron Zwarich
committed
bool IsNumUsesConsensusValid = false;
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
SmallVector<Instruction*, 16> AddrModeInsts;
ExtAddrMode AddrMode;
while (!worklist.empty()) {
Value *V = worklist.back();
worklist.pop_back();
// Break use-def graph loops.
if (Visited.count(V)) {
Consensus = 0;
break;
}
Visited.insert(V);
// For a PHI node, push all of its incoming values.
if (PHINode *P = dyn_cast<PHINode>(V)) {
for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
worklist.push_back(P->getIncomingValue(i));
continue;
}
// For non-PHIs, determine the addressing mode being computed.
SmallVector<Instruction*, 16> NewAddrModeInsts;
ExtAddrMode NewAddrMode =
AddressingModeMatcher::Match(V, AccessTy,MemoryInst,
NewAddrModeInsts, *TLI);
Cameron Zwarich
committed
// This check is broken into two cases with very similar code to avoid using
// getNumUses() as much as possible. Some values have a lot of uses, so
// calling getNumUses() unconditionally caused a significant compile-time
// regression.
if (!Consensus) {
Consensus = V;
AddrMode = NewAddrMode;
AddrModeInsts = NewAddrModeInsts;
continue;
} else if (NewAddrMode == AddrMode) {
if (!IsNumUsesConsensusValid) {
NumUsesConsensus = Consensus->getNumUses();
IsNumUsesConsensusValid = true;
}
// Ensure that the obtained addressing mode is equivalent to that obtained
// for all other roots of the PHI traversal. Also, when choosing one
// such root as representative, select the one with the most uses in order
// to keep the cost modeling heuristics in AddressingModeMatcher
// applicable.
Cameron Zwarich
committed
unsigned NumUses = V->getNumUses();
if (NumUses > NumUsesConsensus) {
Consensus = V;
Cameron Zwarich
committed
NumUsesConsensus = NumUses;
AddrModeInsts = NewAddrModeInsts;
Owen Anderson
committed
}
continue;
Owen Anderson
committed
}
Consensus = 0;
break;
Owen Anderson
committed
}
// If the addressing mode couldn't be determined, or if multiple different
// ones were determined, bail out now.
if (!Consensus) return false;
// Check to see if any of the instructions supersumed by this addr mode are
// non-local to I's BB.
bool AnyNonLocal = false;
for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
AnyNonLocal = true;
break;
}
}
// If all the instructions matched are already in this BB, don't do anything.
if (!AnyNonLocal) {
DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n");
return false;
}
// Insert this computation right after this user. Since our caller is
// scanning from the top of the BB to the bottom, reuse of the expr are
// guaranteed to happen later.
BasicBlock::iterator InsertPt = MemoryInst;
// Now that we determined the addressing expression we want to use and know
// that we have to sink it into this block. Check to see if we have already
// done this for some other load/store instr in this block. If so, reuse the
// computation.
Value *&SunkAddr = SunkAddrs[Addr];
if (SunkAddr) {
DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst);
if (SunkAddr->getType() != Addr->getType())
SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt);
} else {
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst);
const Type *IntPtrTy =
TLI->getTargetData()->getIntPtrType(AccessTy->getContext());
Value *Result = 0;
// Start with the base register. Do this first so that subsequent address
// matching finds it last, which will prevent it from trying to match it
// as the scaled value in case it happens to be a mul. That would be
// problematic if we've sunk a different mul for the scale, because then
// we'd end up sinking both muls.
if (AddrMode.BaseReg) {
Value *V = AddrMode.BaseReg;
Duncan Sands
committed
if (V->getType()->isPointerTy())
V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
if (V->getType() != IntPtrTy)
V = CastInst::CreateIntegerCast(V, IntPtrTy, /*isSigned=*/true,
"sunkaddr", InsertPt);
Result = V;
}
// Add the scale value.
if (AddrMode.Scale) {
Value *V = AddrMode.ScaledReg;
if (V->getType() == IntPtrTy) {
// done.
Duncan Sands
committed
} else if (V->getType()->isPointerTy()) {
V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
} else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
cast<IntegerType>(V->getType())->getBitWidth()) {
V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt);
} else {
V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt);
}
if (AddrMode.Scale != 1)
Owen Anderson
committed
V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy,
"sunkaddr", InsertPt);
if (Result)
Gabor Greif
committed
Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
else
Result = V;
}
// Add in the BaseGV if present.
if (AddrMode.BaseGV) {
Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr",
InsertPt);
if (Result)
Gabor Greif
committed
Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
else
Result = V;
}
// Add in the Base Offset if present.
if (AddrMode.BaseOffs) {
Owen Anderson
committed
Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
if (Result)
Gabor Greif
committed
Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
else
Result = V;
}
Chris Lattner
committed
if (Result == 0)
Owen Anderson
committed
SunkAddr = Constant::getNullValue(Addr->getType());
else
SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt);
}
Owen Anderson
committed
MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
Owen Anderson
committed
if (Repl->use_empty()) {
RecursivelyDeleteTriviallyDeadInstructions(Repl);
// This address is now available for reassignment, so erase the table entry;
// we don't want to match some completely different instruction.
SunkAddrs[Addr] = 0;
}
++NumMemoryInsts;
return true;
}
Chris Lattner
committed
/// OptimizeInlineAsmInst - If there are any memory operands, use
/// OptimizeMemoryInst to sink their address computing into the block when
/// possible / profitable.
bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
bool MadeChange = false;
TargetLowering::AsmOperandInfoVector
TargetConstraints = TLI->ParseConstraints(CS);
unsigned ArgNo = 0;
for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
// Compute the constraint code and ConstraintType to use.
TLI->ComputeConstraintToUse(OpInfo, SDValue());
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
OpInfo.isIndirect) {
Value *OpVal = CS->getArgOperand(ArgNo++);
MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType());
} else if (OpInfo.Type == InlineAsm::isInput)
ArgNo++;
}
return MadeChange;
}
/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
/// basic block as the load, unless conditions are unfavorable. This allows
/// SelectionDAG to fold the extend into the load.
///
bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
// Look for a load being extended.
LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0));
if (!LI) return false;
// If they're already in the same block, there's nothing to do.
if (LI->getParent() == I->getParent())
return false;
// If the load has other users and the truncate is not free, this probably
// isn't worthwhile.
if (!LI->hasOneUse() &&
TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) ||
!TLI->isTypeLegal(TLI->getValueType(I->getType()))) &&
!TLI->isTruncateFree(I->getType(), LI->getType()))
return false;
// Check whether the target supports casts folded into loads.
unsigned LType;
if (isa<ZExtInst>(I))
LType = ISD::ZEXTLOAD;
else {
assert(isa<SExtInst>(I) && "Unexpected ext type!");
LType = ISD::SEXTLOAD;
}
if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType())))
return false;
// Move the extend into the same block as the load, so that SelectionDAG
// can fold it.
I->removeFromParent();
I->insertAfter(LI);
++NumExtsMoved;
return true;
}
Evan Cheng
committed
bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
BasicBlock *DefBB = I->getParent();
// If the result of a {s|z}ext and its source are both live out, rewrite all
Evan Cheng
committed
// other uses of the source with result of extension.
Value *Src = I->getOperand(0);
if (Src->hasOneUse())
return false;
if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
return false;
// Only safe to perform the optimization if the source is also defined in
// this block.
if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
return false;
Evan Cheng
committed
bool DefIsLiveOut = false;
for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
Evan Cheng
committed
UI != E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
// Figure out which BB this ext is used in.
BasicBlock *UserBB = User->getParent();
if (UserBB == DefBB) continue;
DefIsLiveOut = true;
break;
}
if (!DefIsLiveOut)
return false;
// Make sure non of the uses are PHI nodes.
for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
UI != E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
BasicBlock *UserBB = User->getParent();
if (UserBB == DefBB) continue;
// Be conservative. We don't want this xform to end up introducing
// reloads just before load / store instructions.
if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User))
Evan Cheng
committed
// InsertedTruncs - Only insert one trunc in each block once.
DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
bool MadeChange = false;
for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
Evan Cheng
committed
UI != E; ++UI) {
Use &TheUse = UI.getUse();
Instruction *User = cast<Instruction>(*UI);
// Figure out which BB this ext is used in.
BasicBlock *UserBB = User->getParent();
if (UserBB == DefBB) continue;
// Both src and def are live in this block. Rewrite the use.
Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
if (!InsertedTrunc) {