Newer
Older
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
(I - F.BaseRegs.begin()));
if (LU.HasFormulaWithSameRegs(NewF)) {
DEBUG(dbgs() << " Deleting "; F.print(dbgs());
dbgs() << '\n');
LU.DeleteFormula(F);
--i;
--e;
Any = true;
break;
}
}
}
}
}
if (Any)
LU.RecomputeRegs(LUIdx, RegUses);
}
DEBUG(dbgs() << "After pre-selection:\n";
print_uses(dbgs()));
}
/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers
/// for expressions like A, A+1, A+2, etc., allocate a single register for
/// them.
void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
DEBUG(dbgs() << "The search space is too complex.\n");
DEBUG(dbgs() << "Narrowing the search space by assuming that uses "
"separated by a constant offset will use the same "
"registers.\n");
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
if (F.AM.BaseOffs != 0 && F.AM.Scale == 0) {
if (LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU)) {
if (reconcileNewOffset(*LUThatHas, F.AM.BaseOffs,
/*HasBaseReg=*/false,
LU.Kind, LU.AccessTy)) {
DEBUG(dbgs() << " Deleting use "; LU.print(dbgs());
dbgs() << '\n');
LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
// Update the relocs to reference the new use.
for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
E = Fixups.end(); I != E; ++I) {
LSRFixup &Fixup = *I;
if (Fixup.LUIdx == LUIdx) {
Fixup.LUIdx = LUThatHas - &Uses.front();
Fixup.Offset += F.AM.BaseOffs;
// Add the new offset to LUThatHas' offset list.
if (LUThatHas->Offsets.back() != Fixup.Offset) {
LUThatHas->Offsets.push_back(Fixup.Offset);
if (Fixup.Offset > LUThatHas->MaxOffset)
LUThatHas->MaxOffset = Fixup.Offset;
if (Fixup.Offset < LUThatHas->MinOffset)
LUThatHas->MinOffset = Fixup.Offset;
}
DEBUG(dbgs() << "New fixup has offset "
<< Fixup.Offset << '\n');
}
if (Fixup.LUIdx == NumUses-1)
Fixup.LUIdx = LUIdx;
}
// Delete formulae from the new use which are no longer legal.
bool Any = false;
for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
Formula &F = LUThatHas->Formulae[i];
if (!isLegalUse(F.AM,
LUThatHas->MinOffset, LUThatHas->MaxOffset,
LUThatHas->Kind, LUThatHas->AccessTy, TLI)) {
DEBUG(dbgs() << " Deleting "; F.print(dbgs());
dbgs() << '\n');
LUThatHas->DeleteFormula(F);
--i;
--e;
Any = true;
}
}
if (Any)
LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
// Delete the old use.
DeleteUse(LU, LUIdx);
--LUIdx;
--NumUses;
break;
}
}
}
}
}
DEBUG(dbgs() << "After pre-selection:\n";
print_uses(dbgs()));
}
/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that
/// we've done more filtering, as it may be able to find more formulae to
/// eliminate.
void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
DEBUG(dbgs() << "The search space is too complex.\n");
DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
"undesirable dedicated registers.\n");
FilterOutUndesirableDedicatedRegisters();
DEBUG(dbgs() << "After pre-selection:\n";
print_uses(dbgs()));
}
}
/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely
/// to be profitable, and then in any use which has any reference to that
/// register, delete all formulae which do not reference that register.
void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
// With all other options exhausted, loop until the system is simple
// enough to handle.
SmallPtrSet<const SCEV *, 4> Taken;
while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
// Ok, we have too many of formulae on our hands to conveniently handle.
// Use a rough heuristic to thin out the list.
DEBUG(dbgs() << "The search space is too complex.\n");
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
// Pick the register which is used by the most LSRUses, which is likely
// to be a good reuse register candidate.
const SCEV *Best = 0;
unsigned BestNum = 0;
for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
I != E; ++I) {
const SCEV *Reg = *I;
if (Taken.count(Reg))
continue;
if (!Best)
Best = Reg;
else {
unsigned Count = RegUses.getUsedByIndices(Reg).count();
if (Count > BestNum) {
Best = Reg;
BestNum = Count;
}
}
}
DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
Taken.insert(Best);
// In any use with formulae which references this register, delete formulae
// which don't reference it.
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
if (!LU.Regs.count(Best)) continue;
bool Any = false;
for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
Formula &F = LU.Formulae[i];
if (!F.referencesReg(Best)) {
DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
LU.DeleteFormula(F);
--e;
--i;
Any = true;
assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?");
continue;
}
}
if (Any)
LU.RecomputeRegs(LUIdx, RegUses);
}
DEBUG(dbgs() << "After pre-selection:\n";
print_uses(dbgs()));
}
}
/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
/// formulae to choose from, use some rough heuristics to prune down the number
/// of formulae. This keeps the main solver from taking an extraordinary amount
/// of time in some worst-case scenarios.
void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
NarrowSearchSpaceByDetectingSupersets();
NarrowSearchSpaceByCollapsingUnrolledCode();
NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
NarrowSearchSpaceByPickingWinnerRegs();
}
/// SolveRecurse - This is the recursive solver.
void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
Cost &SolutionCost,
SmallVectorImpl<const Formula *> &Workspace,
const Cost &CurCost,
const SmallPtrSet<const SCEV *, 16> &CurRegs,
DenseSet<const SCEV *> &VisitedRegs) const {
// Some ideas:
// - prune more:
// - use more aggressive filtering
// - sort the formula so that the most profitable solutions are found first
// - sort the uses too
// - search faster:
// - don't compute a cost, and then compare. compare while computing a cost
// and bail early.
// - track register sets with SmallBitVector
const LSRUse &LU = Uses[Workspace.size()];
// If this use references any register that's already a part of the
// in-progress solution, consider it a requirement that a formula must
// reference that register in order to be considered. This prunes out
// unprofitable searching.
SmallSetVector<const SCEV *, 4> ReqRegs;
for (SmallPtrSet<const SCEV *, 16>::const_iterator I = CurRegs.begin(),
E = CurRegs.end(); I != E; ++I)
if (LU.Regs.count(*I))
ReqRegs.insert(*I);
bool AnySatisfiedReqRegs = false;
SmallPtrSet<const SCEV *, 16> NewRegs;
Cost NewCost;
retry:
for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
// Ignore formulae which do not use any of the required registers.
for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
JE = ReqRegs.end(); J != JE; ++J) {
const SCEV *Reg = *J;
if ((!F.ScaledReg || F.ScaledReg != Reg) &&
std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) ==
F.BaseRegs.end())
goto skip;
}
AnySatisfiedReqRegs = true;
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
// Evaluate the cost of the current formula. If it's already worse than
// the current best, prune the search at that point.
NewCost = CurCost;
NewRegs = CurRegs;
NewCost.RateFormula(F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT);
if (NewCost < SolutionCost) {
Workspace.push_back(&F);
if (Workspace.size() != Uses.size()) {
SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
NewRegs, VisitedRegs);
if (F.getNumRegs() == 1 && Workspace.size() == 1)
VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
} else {
DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
dbgs() << ". Regs:";
for (SmallPtrSet<const SCEV *, 16>::const_iterator
I = NewRegs.begin(), E = NewRegs.end(); I != E; ++I)
dbgs() << ' ' << **I;
dbgs() << '\n');
SolutionCost = NewCost;
Solution = Workspace;
}
Workspace.pop_back();
}
skip:;
}
// If none of the formulae had all of the required registers, relax the
// constraint so that we don't exclude all formulae.
if (!AnySatisfiedReqRegs) {
assert(!ReqRegs.empty() && "Solver failed even without required registers");
ReqRegs.clear();
goto retry;
}
/// Solve - Choose one formula from each use. Return the results in the given
/// Solution vector.
void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
SmallVector<const Formula *, 8> Workspace;
Cost SolutionCost;
SolutionCost.Loose();
Cost CurCost;
SmallPtrSet<const SCEV *, 16> CurRegs;
DenseSet<const SCEV *> VisitedRegs;
Workspace.reserve(Uses.size());
SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
CurRegs, VisitedRegs);
// Ok, we've now made all our decisions.
DEBUG(dbgs() << "\n"
"The chosen solution requires "; SolutionCost.print(dbgs());
dbgs() << ":\n";
for (size_t i = 0, e = Uses.size(); i != e; ++i) {
dbgs() << " ";
Uses[i].print(dbgs());
dbgs() << "\n"
" ";
Solution[i]->print(dbgs());
dbgs() << '\n';
});
assert(Solution.size() == Uses.size() && "Malformed solution!");
/// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up
/// the dominator tree far as we can go while still being dominated by the
/// input positions. This helps canonicalize the insert position, which
/// encourages sharing.
BasicBlock::iterator
LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
const SmallVectorImpl<Instruction *> &Inputs)
const {
for (;;) {
const Loop *IPLoop = LI.getLoopFor(IP->getParent());
unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
BasicBlock *IDom;
for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
if (!Rung) return IP;
Rung = Rung->getIDom();
if (!Rung) return IP;
IDom = Rung->getBlock();
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
// Don't climb into a loop though.
const Loop *IDomLoop = LI.getLoopFor(IDom);
unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
if (IDomDepth <= IPLoopDepth &&
(IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
break;
}
bool AllDominate = true;
Instruction *BetterPos = 0;
Instruction *Tentative = IDom->getTerminator();
for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
E = Inputs.end(); I != E; ++I) {
Instruction *Inst = *I;
if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
AllDominate = false;
break;
}
// Attempt to find an insert position in the middle of the block,
// instead of at the end, so that it can be used for other expansions.
if (IDom == Inst->getParent() &&
(!BetterPos || DT.dominates(BetterPos, Inst)))
BetterPos = llvm::next(BasicBlock::iterator(Inst));
}
if (!AllDominate)
break;
if (BetterPos)
IP = BetterPos;
else
IP = Tentative;
}
return IP;
}
/// AdjustInsertPositionForExpand - Determine an input position which will be
/// dominated by the operands and which will dominate the result.
BasicBlock::iterator
LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
const LSRFixup &LF,
const LSRUse &LU) const {
// Collect some instructions which must be dominated by the
// expanding replacement. These must be dominated by any operands that
// will be required in the expansion.
SmallVector<Instruction *, 4> Inputs;
if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
Inputs.push_back(I);
if (LU.Kind == LSRUse::ICmpZero)
if (Instruction *I =
dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
Inputs.push_back(I);
if (LF.PostIncLoops.count(L)) {
if (LF.isUseFullyOutsideLoop(L))
Inputs.push_back(L->getLoopLatch()->getTerminator());
else
Inputs.push_back(IVIncInsertPos);
}
// The expansion must also be dominated by the increment positions of any
// loops it for which it is using post-inc mode.
for (PostIncLoopSet::const_iterator I = LF.PostIncLoops.begin(),
E = LF.PostIncLoops.end(); I != E; ++I) {
const Loop *PIL = *I;
if (PIL == L) continue;
// Be dominated by the loop exit.
SmallVector<BasicBlock *, 4> ExitingBlocks;
PIL->getExitingBlocks(ExitingBlocks);
if (!ExitingBlocks.empty()) {
BasicBlock *BB = ExitingBlocks[0];
for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)
BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
Inputs.push_back(BB->getTerminator());
}
}
// Then, climb up the immediate dominator tree as far as we can go while
// still being dominated by the input positions.
IP = HoistInsertPosition(IP, Inputs);
// Don't insert instructions before PHI nodes.
while (isa<PHINode>(IP)) ++IP;
// Ignore debug intrinsics.
while (isa<DbgInfoIntrinsic>(IP)) ++IP;
return IP;
}
/// Expand - Emit instructions for the leading candidate expression for this
/// LSRUse (this is called "expanding").
Value *LSRInstance::Expand(const LSRFixup &LF,
const Formula &F,
BasicBlock::iterator IP,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts) const {
const LSRUse &LU = Uses[LF.LUIdx];
// Determine an input position which will be dominated by the operands and
// which will dominate the result.
IP = AdjustInsertPositionForExpand(IP, LF, LU);
// Inform the Rewriter if we have a post-increment use, so that it can
// perform an advantageous expansion.
Rewriter.setPostInc(LF.PostIncLoops);
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
// This is the type that the user actually needs.
const Type *OpTy = LF.OperandValToReplace->getType();
// This will be the type that we'll initially expand to.
const Type *Ty = F.getType();
if (!Ty)
// No type known; just expand directly to the ultimate type.
Ty = OpTy;
else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
// Expand directly to the ultimate type if it's the right size.
Ty = OpTy;
// This is the type to do integer arithmetic in.
const Type *IntTy = SE.getEffectiveSCEVType(Ty);
// Build up a list of operands to add together to form the full base.
SmallVector<const SCEV *, 8> Ops;
// Expand the BaseRegs portion.
for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
E = F.BaseRegs.end(); I != E; ++I) {
const SCEV *Reg = *I;
assert(!Reg->isZero() && "Zero allocated in a base register!");
// If we're expanding for a post-inc user, make the post-inc adjustment.
PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
Reg = TransformForPostIncUse(Denormalize, Reg,
LF.UserInst, LF.OperandValToReplace,
Loops, SE, DT);
Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
}
// Flush the operand list to suppress SCEVExpander hoisting.
if (!Ops.empty()) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
// Expand the ScaledReg portion.
Value *ICmpScaledV = 0;
if (F.AM.Scale != 0) {
const SCEV *ScaledS = F.ScaledReg;
// If we're expanding for a post-inc user, make the post-inc adjustment.
PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
ScaledS = TransformForPostIncUse(Denormalize, ScaledS,
LF.UserInst, LF.OperandValToReplace,
Loops, SE, DT);
if (LU.Kind == LSRUse::ICmpZero) {
// An interesting way of "folding" with an icmp is to use a negated
// scale, which we'll implement by inserting it into the other operand
// of the icmp.
assert(F.AM.Scale == -1 &&
"The only scale supported by ICmpZero uses is -1!");
ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
} else {
// Otherwise just expand the scaled register and an explicit scale,
// which is expected to be matched as part of the address.
ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
ScaledS = SE.getMulExpr(ScaledS,
SE.getConstant(ScaledS->getType(), F.AM.Scale));
Ops.push_back(ScaledS);
// Flush the operand list to suppress SCEVExpander hoisting.
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
}
// Expand the GV portion.
if (F.AM.BaseGV) {
Ops.push_back(SE.getUnknown(F.AM.BaseGV));
// Flush the operand list to suppress SCEVExpander hoisting.
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
// Expand the immediate portion.
int64_t Offset = (uint64_t)F.AM.BaseOffs + LF.Offset;
if (Offset != 0) {
if (LU.Kind == LSRUse::ICmpZero) {
// The other interesting way of "folding" with an ICmpZero is to use a
// negated immediate.
if (!ICmpScaledV)
ICmpScaledV = ConstantInt::get(IntTy, -Offset);
else {
Ops.push_back(SE.getUnknown(ICmpScaledV));
ICmpScaledV = ConstantInt::get(IntTy, Offset);
}
} else {
// Just add the immediate values. These again are expected to be matched
// as part of the address.
Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset)));
}
}
// Expand the unfolded offset portion.
int64_t UnfoldedOffset = F.UnfoldedOffset;
if (UnfoldedOffset != 0) {
// Just add the immediate values.
Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy,
UnfoldedOffset)));
}
// Emit instructions summing all the operands.
const SCEV *FullS = Ops.empty() ?
SE.getConstant(IntTy, 0) :
SE.getAddExpr(Ops);
Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
// We're done expanding now, so reset the rewriter.
Rewriter.clearPostInc();
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
// An ICmpZero Formula represents an ICmp which we're handling as a
// comparison against zero. Now that we've expanded an expression for that
// form, update the ICmp's other operand.
if (LU.Kind == LSRUse::ICmpZero) {
ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
DeadInsts.push_back(CI->getOperand(1));
assert(!F.AM.BaseGV && "ICmp does not support folding a global value and "
"a scale at the same time!");
if (F.AM.Scale == -1) {
if (ICmpScaledV->getType() != OpTy) {
Instruction *Cast =
CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
OpTy, false),
ICmpScaledV, OpTy, "tmp", CI);
ICmpScaledV = Cast;
}
CI->setOperand(1, ICmpScaledV);
} else {
assert(F.AM.Scale == 0 &&
"ICmp does not support folding a global value and "
"a scale at the same time!");
Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
-(uint64_t)Offset);
if (C->getType() != OpTy)
C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
OpTy, false),
C, OpTy);
CI->setOperand(1, C);
}
}
return FullV;
}
/// RewriteForPHI - Helper for Rewrite. PHI nodes are special because the use
/// of their operands effectively happens in their predecessor blocks, so the
/// expression may need to be expanded in multiple places.
void LSRInstance::RewriteForPHI(PHINode *PN,
const LSRFixup &LF,
const Formula &F,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts,
Pass *P) const {
DenseMap<BasicBlock *, Value *> Inserted;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
BasicBlock *BB = PN->getIncomingBlock(i);
// If this is a critical edge, split the edge so that we do not insert
// the code on all predecessor/successor paths. We do this unless this
// is the canonical backedge for this loop, which complicates post-inc
// users.
if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
!isa<IndirectBrInst>(BB->getTerminator())) {
Loop *PNLoop = LI.getLoopFor(PN->getParent());
if (!PNLoop || PN->getParent() != PNLoop->getHeader()) {
// Split the critical edge.
BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
// If PN is outside of the loop and BB is in the loop, we want to
// move the block to be immediately before the PHI block, not
// immediately after BB.
if (L->contains(BB) && !L->contains(PN))
NewBB->moveBefore(PN->getParent());
// Splitting the edge can reduce the number of PHI entries we have.
e = PN->getNumIncomingValues();
BB = NewBB;
i = PN->getBasicBlockIndex(BB);
}
}
std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
Inserted.insert(std::make_pair(BB, static_cast<Value *>(0)));
if (!Pair.second)
PN->setIncomingValue(i, Pair.first->second);
else {
Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts);
// If this is reuse-by-noop-cast, insert the noop cast.
const Type *OpTy = LF.OperandValToReplace->getType();
if (FullV->getType() != OpTy)
FullV =
CastInst::Create(CastInst::getCastOpcode(FullV, false,
OpTy, false),
FullV, LF.OperandValToReplace->getType(),
"tmp", BB->getTerminator());
PN->setIncomingValue(i, FullV);
Pair.first->second = FullV;
}
}
}
/// Rewrite - Emit instructions for the leading candidate expression for this
/// LSRUse (this is called "expanding"), and update the UserInst to reference
/// the newly expanded value.
void LSRInstance::Rewrite(const LSRFixup &LF,
const Formula &F,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts,
Pass *P) const {
// First, find an insertion point that dominates UserInst. For PHI nodes,
// find the nearest block which dominates all the relevant uses.
if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
RewriteForPHI(PN, LF, F, Rewriter, DeadInsts, P);
} else {
Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts);
// If this is reuse-by-noop-cast, insert the noop cast.
const Type *OpTy = LF.OperandValToReplace->getType();
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
if (FullV->getType() != OpTy) {
Instruction *Cast =
CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
FullV, OpTy, "tmp", LF.UserInst);
FullV = Cast;
}
// Update the user. ICmpZero is handled specially here (for now) because
// Expand may have updated one of the operands of the icmp already, and
// its new value may happen to be equal to LF.OperandValToReplace, in
// which case doing replaceUsesOfWith leads to replacing both operands
// with the same value. TODO: Reorganize this.
if (Uses[LF.LUIdx].Kind == LSRUse::ICmpZero)
LF.UserInst->setOperand(0, FullV);
else
LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
}
DeadInsts.push_back(LF.OperandValToReplace);
}
/// ImplementSolution - Rewrite all the fixup locations with new values,
/// following the chosen solution.
void
LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Pass *P) {
// Keep track of instructions we may have made dead, so that
// we can remove them after we are done working.
SmallVector<WeakVH, 16> DeadInsts;
SCEVExpander Rewriter(SE, "lsr");
Rewriter.disableCanonicalMode();
Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
// Expand the new value definitions and update the users.
for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
E = Fixups.end(); I != E; ++I) {
const LSRFixup &Fixup = *I;
Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts, P);
Changed = true;
}
// Clean up after ourselves. This must be done before deleting any
// instructions.
Rewriter.clear();
Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
}
LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
: IU(P->getAnalysis<IVUsers>()),
SE(P->getAnalysis<ScalarEvolution>()),
DT(P->getAnalysis<DominatorTree>()),
LI(P->getAnalysis<LoopInfo>()),
TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
Evan Cheng
committed
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm()) return;
// If there's no interesting work to be done, bail early.
if (IU.empty()) return;
DEBUG(dbgs() << "\nLSR on loop ";
WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
dbgs() << ":\n");
// First, perform some low-level loop optimizations.
OptimizeShadowIV();
OptimizeLoopTermCond();
// Start collecting data and preparing for the solver.
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
CollectInterestingTypesAndFactors();
CollectFixupsAndInitialFormulae();
CollectLoopInvariantFixupsAndFormulae();
DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
print_uses(dbgs()));
// Now use the reuse data to generate a bunch of interesting ways
// to formulate the values needed for the uses.
GenerateAllReuseFormulae();
FilterOutUndesirableDedicatedRegisters();
NarrowSearchSpaceUsingHeuristics();
SmallVector<const Formula *, 8> Solution;
Solve(Solution);
// Release memory that is no longer needed.
Factors.clear();
Types.clear();
RegUses.clear();
#ifndef NDEBUG
// Formulae should be legal.
for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
E = Uses.end(); I != E; ++I) {
const LSRUse &LU = *I;
for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
JE = LU.Formulae.end(); J != JE; ++J)
assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset,
LU.Kind, LU.AccessTy, TLI) &&
"Illegal formula generated!");
};
#endif
// Now that we've decided what we want, make it so.
ImplementSolution(Solution, P);
}
void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
if (Factors.empty() && Types.empty()) return;
OS << "LSR has identified the following interesting factors and types: ";
bool First = true;
for (SmallSetVector<int64_t, 8>::const_iterator
I = Factors.begin(), E = Factors.end(); I != E; ++I) {
if (!First) OS << ", ";
First = false;
OS << '*' << *I;
}
for (SmallSetVector<const Type *, 4>::const_iterator
I = Types.begin(), E = Types.end(); I != E; ++I) {
if (!First) OS << ", ";
First = false;
OS << '(' << **I << ')';
}
OS << '\n';
}
void LSRInstance::print_fixups(raw_ostream &OS) const {
OS << "LSR is examining the following fixup sites:\n";
for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
E = Fixups.end(); I != E; ++I) {
dbgs() << " ";
OS << '\n';
}
}
void LSRInstance::print_uses(raw_ostream &OS) const {
OS << "LSR is examining the following uses:\n";
for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
E = Uses.end(); I != E; ++I) {
const LSRUse &LU = *I;
dbgs() << " ";
LU.print(OS);
OS << '\n';
for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
JE = LU.Formulae.end(); J != JE; ++J) {
OS << " ";
J->print(OS);
OS << '\n';
}
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
}
void LSRInstance::print(raw_ostream &OS) const {
print_factors_and_types(OS);
print_fixups(OS);
print_uses(OS);
}
void LSRInstance::dump() const {
print(errs()); errs() << '\n';
}
namespace {
class LoopStrengthReduce : public LoopPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
const TargetLowering *const TLI;
public:
static char ID; // Pass ID, replacement for typeid
explicit LoopStrengthReduce(const TargetLowering *tli = 0);
private:
bool runOnLoop(Loop *L, LPPassManager &LPM);
void getAnalysisUsage(AnalysisUsage &AU) const;
};
}
char LoopStrengthReduce::ID = 0;
Owen Anderson
committed
INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
Owen Anderson
committed
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(IVUsers)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
Owen Anderson
committed
INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
return new LoopStrengthReduce(TLI);
}
LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
Owen Anderson
committed
: LoopPass(ID), TLI(tli) {
initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
}
void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
// We split critical edges, so we change the CFG. However, we do update
// many analyses if they are around.
AU.addPreservedID(LoopSimplifyID);
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<DominatorTree>();
AU.addPreserved<DominatorTree>();
AU.addRequired<ScalarEvolution>();
AU.addPreserved<ScalarEvolution>();
Cameron Zwarich
committed
// Requiring LoopSimplify a second time here prevents IVUsers from running
// twice, since LoopSimplify was invalidated by running ScalarEvolution.
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<IVUsers>();
AU.addPreserved<IVUsers>();
}
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
bool Changed = false;
// Run the main LSR transformation.
Changed |= LSRInstance(TLI, L, this).getChanged();
// At this point, it is worth checking to see if any recurrence PHIs are also
// dead, so that we can remove them as well.
Changed |= DeleteDeadPHIs(L->getHeader());