Newer
Older
LI->getLoopFor(DU.NarrowUse->getParent()) != L)
return 0;
// Our raison d'etre! Eliminate sign and zero extension.
if (IsSigned ? isa<SExtInst>(DU.NarrowUse) : isa<ZExtInst>(DU.NarrowUse)) {
Value *NewDef = DU.WideDef;
if (DU.NarrowUse->getType() != WideType) {
unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
unsigned IVWidth = SE->getTypeSizeInBits(WideType);
if (CastWidth < IVWidth) {
// The cast isn't as wide as the IV, so insert a Trunc.
IRBuilder<> Builder(DU.NarrowUse);
NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
}
else {
// A wider extend was hidden behind a narrower one. This may induce
// another round of IV widening in which the intermediate IV becomes
// dead. It should be very rare.
DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
<< " not wide enough to subsume " << *DU.NarrowUse << "\n");
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
NewDef = DU.NarrowUse;
}
}
if (NewDef != DU.NarrowUse) {
DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
<< " replaced by " << *DU.WideDef << "\n");
++NumElimExt;
DU.NarrowUse->replaceAllUsesWith(NewDef);
DeadInsts.push_back(DU.NarrowUse);
// Now that the extend is gone, we want to expose it's uses for potential
// further simplification. We don't need to directly inform SimplifyIVUsers
// of the new users, because their parent IV will be processed later as a
// new loop phi. If we preserved IVUsers analysis, we would also want to
// push the uses of WideDef here.
// No further widening is needed. The deceased [sz]ext had done it for us.
return 0;
}
Andrew Trick
committed
// Does this user itself evaluate to a recurrence after widening?
const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse);
if (!WideAddRec) {
// This user does not evaluate to a recurence after widening, so don't
// follow it. Instead insert a Trunc to kill off the original use,
// eventually isolating the original narrow IV so it can be removed.
Andrew Trick
committed
IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
return 0;
}
// Assume block terminators cannot evaluate to a recurrence. We can't to
Andrew Trick
committed
// insert a Trunc after a terminator if there happens to be a critical edge.
assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
Andrew Trick
committed
"SCEV is not expected to evaluate a block terminator");
// Reuse the IV increment that SCEVExpander created as long as it dominates
// NarrowUse.
Instruction *WideUse = 0;
if (WideAddRec == WideIncExpr && HoistStep(WideInc, DU.NarrowUse, DT)) {
WideUse = WideInc;
}
else {
WideUse = CloneIVUser(DU);
if (!WideUse)
return 0;
}
Andrew Trick
committed
// Evaluation of WideAddRec ensured that the narrow expression could be
// extended outside the loop without overflow. This suggests that the wide use
// evaluates to the same expression as the extended narrow use, but doesn't
// absolutely guarantee it. Hence the following failsafe check. In rare cases
// where it fails, we simply throw away the newly created wide use.
if (WideAddRec != SE->getSCEV(WideUse)) {
DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
<< ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
DeadInsts.push_back(WideUse);
return 0;
}
// Returning WideUse pushes it on the worklist.
return WideUse;
}
Andrew Trick
committed
/// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers.
///
void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
for (Value::use_iterator UI = NarrowDef->use_begin(),
UE = NarrowDef->use_end(); UI != UE; ++UI) {
Instruction *NarrowUse = cast<Instruction>(*UI);
Andrew Trick
committed
// Handle data flow merges and bizarre phi cycles.
if (!Widened.insert(NarrowUse))
Andrew Trick
committed
continue;
NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUse, WideDef));
Andrew Trick
committed
}
}
/// CreateWideIV - Process a single induction variable. First use the
/// SCEVExpander to create a wide induction variable that evaluates to the same
/// recurrence as the original narrow IV. Then use a worklist to forward
/// traverse the narrow IV's def-use chain. After WidenIVUse has processed all
/// interesting IV users, the narrow IV will be isolated for removal by
/// DeleteDeadPHIs.
///
/// It would be simpler to delete uses as they are processed, but we must avoid
/// invalidating SCEV expressions.
///
PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// Is this phi an induction variable?
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
if (!AddRec)
return NULL;
// Widen the induction variable expression.
const SCEV *WideIVExpr = IsSigned ?
SE->getSignExtendExpr(AddRec, WideType) :
SE->getZeroExtendExpr(AddRec, WideType);
assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
"Expect the new IV expression to preserve its type");
// Can the IV be extended outside the loop without overflow?
AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
if (!AddRec || AddRec->getLoop() != L)
return NULL;
// An AddRec must have loop-invariant operands. Since this AddRec is
// materialized by a loop header phi, the expression cannot have any post-loop
// operands, so they must dominate the loop header.
assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader())
&& "Loop header phi recurrence inputs do not dominate the loop");
// The rewriter provides a value for the desired IV expression. This may
// either find an existing phi or materialize a new one. Either way, we
// expect a well-formed cyclic phi-with-increments. i.e. any operand not part
// of the phi-SCC dominates the loop entry.
Instruction *InsertPt = L->getHeader()->begin();
WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
// Remembering the WideIV increment generated by SCEVExpander allows
// WidenIVUse to reuse it when widening the narrow IV's increment. We don't
// employ a general reuse mechanism because the call above is the only call to
// SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
if (BasicBlock *LatchBlock = L->getLoopLatch()) {
WideInc =
cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
WideIncExpr = SE->getSCEV(WideInc);
}
DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
++NumWidened;
// Traverse the def-use chain using a worklist starting at the original IV.
Andrew Trick
committed
assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
Widened.insert(OrigPhi);
pushNarrowIVUsers(OrigPhi, WidePhi);
while (!NarrowIVUsers.empty()) {
NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
// Process a def-use edge. This may replace the use, so don't hold a
// use_iterator across it.
Instruction *WideUse = WidenIVUse(DU);
// Follow all def-use edges from the previous narrow use.
Andrew Trick
committed
if (WideUse)
pushNarrowIVUsers(DU.NarrowUse, WideUse);
Andrew Trick
committed
// WidenIVUse may have removed the def-use edge.
if (DU.NarrowDef->use_empty())
DeadInsts.push_back(DU.NarrowDef);
return WidePhi;
}
//===----------------------------------------------------------------------===//
// Simplification of IV users based on SCEV evaluation.
//===----------------------------------------------------------------------===//
void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
unsigned IVOperIdx = 0;
ICmpInst::Predicate Pred = ICmp->getPredicate();
if (IVOperand != ICmp->getOperand(0)) {
// Swapped
assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand");
IVOperIdx = 1;
Pred = ICmpInst::getSwappedPredicate(Pred);
}
// Get the SCEVs for the ICmp operands.
const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx));
const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx));
// Simplify unnecessary loops away.
const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
S = SE->getSCEVAtScope(S, ICmpLoop);
X = SE->getSCEVAtScope(X, ICmpLoop);
// If the condition is always true or always false, replace it with
// a constant value.
if (SE->isKnownPredicate(Pred, S, X))
ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext()));
else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X))
ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
else
return;
DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
++NumElimCmp;
Changed = true;
void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem,
Value *IVOperand,
// We're only interested in the case where we know something about
// the numerator.
if (IVOperand != Rem->getOperand(0))
return;
// Get the SCEVs for the ICmp operands.
const SCEV *S = SE->getSCEV(Rem->getOperand(0));
const SCEV *X = SE->getSCEV(Rem->getOperand(1));
// Simplify unnecessary loops away.
const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
S = SE->getSCEVAtScope(S, ICmpLoop);
X = SE->getSCEVAtScope(X, ICmpLoop);
// i % n --> i if i is in [0,n).
if ((!IsSigned || SE->isKnownNonNegative(S)) &&
SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
S, X))
Rem->replaceAllUsesWith(Rem->getOperand(0));
else {
// (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n).
const SCEV *LessOne =
SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1));
if (IsSigned && !SE->isKnownNonNegative(LessOne))
if (!SE->isKnownPredicate(IsSigned ?
ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
LessOne, X))
return;
ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ,
Rem->getOperand(0), Rem->getOperand(1),
"tmp");
SelectInst *Sel =
SelectInst::Create(ICmp,
ConstantInt::get(Rem->getType(), 0),
Rem->getOperand(0), "tmp", Rem);
Rem->replaceAllUsesWith(Sel);
}
if (IU) {
if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
IU->AddUsersIfInteresting(I);
DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
++NumElimRem;
Changed = true;
}
/// EliminateIVUser - Eliminate an operation that consumes a simple IV and has
/// no observable side-effect given the range of IV values.
bool IndVarSimplify::EliminateIVUser(Instruction *UseInst,
Instruction *IVOperand) {
if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
EliminateIVComparison(ICmp, IVOperand);
return true;
}
if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
bool IsSigned = Rem->getOpcode() == Instruction::SRem;
if (IsSigned || Rem->getOpcode() == Instruction::URem) {
EliminateIVRemainder(Rem, IVOperand, IsSigned);
return true;
}
}
// Eliminate any operation that SCEV can prove is an identity function.
if (!SE->isSCEVable(UseInst->getType()) ||
(UseInst->getType() != IVOperand->getType()) ||
(SE->getSCEV(UseInst) != SE->getSCEV(IVOperand)))
return false;
DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
UseInst->replaceAllUsesWith(IVOperand);
++NumElimIdentity;
Changed = true;
DeadInsts.push_back(UseInst);
return true;
}
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
/// FoldIVUser - Fold an IV operand into its use. This removes increments of an
/// aligned IV when used by a instruction that ignores the low bits.
bool IndVarSimplify::FoldIVUser(Instruction *UseInst, Instruction *IVOperand) {
Value *IVSrc = 0;
unsigned OperIdx = 0;
const SCEV *FoldedExpr = 0;
switch (UseInst->getOpcode()) {
default:
return false;
case Instruction::UDiv:
case Instruction::LShr:
// We're only interested in the case where we know something about
// the numerator and have a constant denominator.
if (IVOperand != UseInst->getOperand(OperIdx) ||
!isa<ConstantInt>(UseInst->getOperand(1)))
return false;
// Attempt to fold a binary operator with constant operand.
// e.g. ((I + 1) >> 2) => I >> 2
if (IVOperand->getNumOperands() != 2 ||
!isa<ConstantInt>(IVOperand->getOperand(1)))
return false;
IVSrc = IVOperand->getOperand(0);
// IVSrc must be the (SCEVable) IV, since the other operand is const.
assert(SE->isSCEVable(IVSrc->getType()) && "Expect SCEVable IV operand");
ConstantInt *D = cast<ConstantInt>(UseInst->getOperand(1));
if (UseInst->getOpcode() == Instruction::LShr) {
// Get a constant for the divisor. See createSCEV.
uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth();
if (D->getValue().uge(BitWidth))
return false;
D = ConstantInt::get(UseInst->getContext(),
APInt(BitWidth, 1).shl(D->getZExtValue()));
}
FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D));
}
// We have something that might fold it's operand. Compare SCEVs.
if (!SE->isSCEVable(UseInst->getType()))
return false;
// Bypass the operand if SCEV can prove it has no effect.
if (SE->getSCEV(UseInst) != FoldedExpr)
return false;
DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand
<< " -> " << *UseInst << '\n');
UseInst->setOperand(OperIdx, IVSrc);
assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper");
++NumElimOperand;
Changed = true;
if (IVOperand->use_empty())
DeadInsts.push_back(IVOperand);
return true;
}
/// pushIVUsers - Add all uses of Def to the current IV's worklist.
///
static void pushIVUsers(
Instruction *Def,
SmallPtrSet<Instruction*,16> &Simplified,
SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end();
UI != E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
// Avoid infinite or exponential worklist processing.
// Also ensure unique worklist users.
// If Def is a LoopPhi, it may not be in the Simplified set, so check for
// self edges first.
if (User != Def && Simplified.insert(User))
SimpleIVUsers.push_back(std::make_pair(User, Def));
}
}
/// isSimpleIVUser - Return true if this instruction generates a simple SCEV
/// expression in terms of that IV.
///
/// This is similar to IVUsers' isInsteresting() but processes each instruction
/// non-recursively when the operand is already known to be a simpleIVUser.
///
static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
if (!SE->isSCEVable(I->getType()))
return false;
// Get the symbolic expression for this instruction.
const SCEV *S = SE->getSCEV(I);
// Only consider affine recurrences.
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
if (AR && AR->getLoop() == L)
return true;
return false;
}
/// SimplifyIVUsersNoRewrite - Iteratively perform simplification on a worklist
/// of IV users. Each successive simplification may push more users which may
/// themselves be candidates for simplification.
///
/// The "NoRewrite" algorithm does not require IVUsers analysis. Instead, it
/// simplifies instructions in-place during analysis. Rather than rewriting
/// induction variables bottom-up from their users, it transforms a chain of
/// IVUsers top-down, updating the IR only when it encouters a clear
/// optimization opportunitiy. A SCEVExpander "Rewriter" instance is still
/// needed, but only used to generate a new IV (phi) of wider type for sign/zero
/// extend elimination.
///
/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
///
void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) {
std::map<PHINode *, WideIVInfo> WideIVMap;
SmallVector<PHINode*, 8> LoopPhis;
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
LoopPhis.push_back(cast<PHINode>(I));
}
// Each round of simplification iterates through the SimplifyIVUsers worklist
// for all current phis, then determines whether any IVs can be
// widened. Widening adds new phis to LoopPhis, inducing another round of
// simplification on the wide IVs.
while (!LoopPhis.empty()) {
// Evaluate as many IV expressions as possible before widening any IVs. This
// forces SCEV to set no-wrap flags before evaluating sign/zero
// extension. The first time SCEV attempts to normalize sign/zero extension,
// the result becomes final. So for the most predictable results, we delay
// evaluation of sign/zero extend evaluation until needed, and avoid running
// other SCEV based analysis prior to SimplifyIVUsersNoRewrite.
do {
PHINode *CurrIV = LoopPhis.pop_back_val();
// Information about sign/zero extensions of CurrIV.
WideIVInfo WI;
// Instructions processed by SimplifyIVUsers for CurrIV.
SmallPtrSet<Instruction*,16> Simplified;
// Use-def pairs if IV users waiting to be processed for CurrIV.
SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers;
// Push users of the current LoopPhi. In rare cases, pushIVUsers may be
// called multiple times for the same LoopPhi. This is the proper thing to
// do for loop header phis that use each other.
pushIVUsers(CurrIV, Simplified, SimpleIVUsers);
while (!SimpleIVUsers.empty()) {
Andrew Trick
committed
std::pair<Instruction*, Instruction*> UseOper =
SimpleIVUsers.pop_back_val();
Andrew Trick
committed
// Bypass back edges to avoid extra work.
Andrew Trick
committed
if (UseOper.first == CurrIV) continue;
FoldIVUser(UseOper.first, UseOper.second);
Andrew Trick
committed
if (EliminateIVUser(UseOper.first, UseOper.second)) {
pushIVUsers(UseOper.second, Simplified, SimpleIVUsers);
continue;
}
Andrew Trick
committed
if (CastInst *Cast = dyn_cast<CastInst>(UseOper.first)) {
bool IsSigned = Cast->getOpcode() == Instruction::SExt;
if (IsSigned || Cast->getOpcode() == Instruction::ZExt) {
CollectExtend(Cast, IsSigned, WI, SE, TD);
}
continue;
}
Andrew Trick
committed
if (isSimpleIVUser(UseOper.first, L, SE)) {
pushIVUsers(UseOper.first, Simplified, SimpleIVUsers);
}
}
if (WI.WidestNativeType) {
WideIVMap[CurrIV] = WI;
} while(!LoopPhis.empty());
for (std::map<PHINode *, WideIVInfo>::const_iterator I = WideIVMap.begin(),
E = WideIVMap.end(); I != E; ++I) {
WidenIV Widener(I->first, I->second, LI, SE, DT, DeadInsts);
if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) {
Changed = true;
LoopPhis.push_back(WidePhi);
}
}
WideIVMap.clear();
}
}
/// SimplifyCongruentIVs - Check for congruent phis in this loop header and
/// populate ExprToIVMap for use later.
///
void IndVarSimplify::SimplifyCongruentIVs(Loop *L) {
DenseMap<const SCEV *, PHINode *> ExprToIVMap;
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
PHINode *Phi = cast<PHINode>(I);
if (!SE->isSCEVable(Phi->getType()))
continue;
const SCEV *S = SE->getSCEV(Phi);
std::pair<DenseMap<const SCEV *, PHINode *>::const_iterator, bool> Tmp =
ExprToIVMap.insert(std::make_pair(S, Phi));
if (Tmp.second)
PHINode *OrigPhi = Tmp.first->second;
// If one phi derives from the other via GEPs, types may differ.
if (OrigPhi->getType() != Phi->getType())
continue;
// Replacing the congruent phi is sufficient because acyclic redundancy
// elimination, CSE/GVN, should handle the rest. However, once SCEV proves
// that a phi is congruent, it's almost certain to be the head of an IV
// user cycle that is isomorphic with the original phi. So it's worth
// eagerly cleaning up the common case of a single IV increment.
if (BasicBlock *LatchBlock = L->getLoopLatch()) {
Instruction *OrigInc =
cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
Instruction *IsomorphicInc =
cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
if (OrigInc != IsomorphicInc &&
OrigInc->getType() == IsomorphicInc->getType() &&
SE->getSCEV(OrigInc) == SE->getSCEV(IsomorphicInc) &&
HoistStep(OrigInc, IsomorphicInc, DT)) {
DEBUG(dbgs() << "INDVARS: Eliminated congruent iv.inc: "
<< *IsomorphicInc << '\n');
IsomorphicInc->replaceAllUsesWith(OrigInc);
DeadInsts.push_back(IsomorphicInc);
}
}
DEBUG(dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi << '\n');
++NumElimIV;
Phi->replaceAllUsesWith(OrigPhi);
DeadInsts.push_back(Phi);
}
}
//===----------------------------------------------------------------------===//
// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
//===----------------------------------------------------------------------===//
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
// Check for expressions that ScalarEvolution generates to compute
// BackedgeTakenInfo. If these expressions have not been reduced, then expanding
// them may incur additional cost (albeit in the loop preheader).
static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
ScalarEvolution *SE) {
// If the backedge-taken count is a UDiv, it's very likely a UDiv that
// ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a
// precise expression, rather than a UDiv from the user's code. If we can't
// find a UDiv in the code with some simple searching, assume the former and
// forego rewriting the loop.
if (isa<SCEVUDivExpr>(S)) {
ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
if (!OrigCond) return true;
const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
if (R != S) {
const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
if (L != S)
return true;
}
}
if (!DisableIVRewrite || ForceLFTR)
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
return false;
// Recurse past add expressions, which commonly occur in the
// BackedgeTakenCount. They may already exist in program code, and if not,
// they are not too expensive rematerialize.
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I) {
if (isHighCostExpansion(*I, BI, SE))
return true;
}
return false;
}
// HowManyLessThans uses a Max expression whenever the loop is not guarded by
// the exit condition.
if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
return true;
// If we haven't recognized an expensive SCEV patter, assume its an expression
// produced by program code.
return false;
}
/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
/// count expression can be safely and cheaply expanded into an instruction
/// sequence that can be used by LinearFunctionTestReplace.
static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
BackedgeTakenCount->isZero())
return false;
if (!L->getExitingBlock())
return false;
// Can't rewrite non-branch yet.
BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
if (!BI)
return false;
if (isHighCostExpansion(BackedgeTakenCount, BI, SE))
return false;
/// getBackedgeIVType - Get the widest type used by the loop test after peeking
/// through Truncs.
///
/// TODO: Unnecessary when ForceLFTR is removed.
static Type *getBackedgeIVType(Loop *L) {
if (!L->getExitingBlock())
return 0;
Chris Lattner
committed
// Can't rewrite non-branch yet.
BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
if (!BI)
return 0;
ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
if (!Cond)
return 0;
Type *Ty = 0;
for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end();
OI != OE; ++OI) {
assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types");
TruncInst *Trunc = dyn_cast<TruncInst>(*OI);
if (!Trunc)
continue;
return Trunc->getSrcTy();
}
return Ty;
}
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
/// isLoopInvariant - Perform a quick domtree based check for loop invariance
/// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems
/// gratuitous for this purpose.
static bool isLoopInvariant(Value *V, Loop *L, DominatorTree *DT) {
Instruction *Inst = dyn_cast<Instruction>(V);
if (!Inst)
return true;
return DT->properlyDominates(Inst->getParent(), L->getHeader());
}
/// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop
/// invariant value to the phi.
static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
Instruction *IncI = dyn_cast<Instruction>(IncV);
if (!IncI)
return 0;
switch (IncI->getOpcode()) {
case Instruction::Add:
case Instruction::Sub:
break;
case Instruction::GetElementPtr:
// An IV counter must preserve its type.
if (IncI->getNumOperands() == 2)
break;
default:
return 0;
}
PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
if (Phi && Phi->getParent() == L->getHeader()) {
if (isLoopInvariant(IncI->getOperand(1), L, DT))
return Phi;
return 0;
}
if (IncI->getOpcode() == Instruction::GetElementPtr)
return 0;
// Allow add/sub to be commuted.
Phi = dyn_cast<PHINode>(IncI->getOperand(1));
if (Phi && Phi->getParent() == L->getHeader()) {
if (isLoopInvariant(IncI->getOperand(0), L, DT))
return Phi;
}
return 0;
}
/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show
/// that the current exit test is already sufficiently canonical.
static bool needsLFTR(Loop *L, DominatorTree *DT) {
assert(L->getExitingBlock() && "expected loop exit");
BasicBlock *LatchBlock = L->getLoopLatch();
// Don't bother with LFTR if the loop is not properly simplified.
if (!LatchBlock)
return false;
BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
assert(BI && "expected exit branch");
// Do LFTR to simplify the exit condition to an ICMP.
ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
if (!Cond)
return true;
// Do LFTR to simplify the exit ICMP to EQ/NE
ICmpInst::Predicate Pred = Cond->getPredicate();
if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
return true;
// Look for a loop invariant RHS
Value *LHS = Cond->getOperand(0);
Value *RHS = Cond->getOperand(1);
if (!isLoopInvariant(RHS, L, DT)) {
if (!isLoopInvariant(LHS, L, DT))
return true;
std::swap(LHS, RHS);
}
// Look for a simple IV counter LHS
PHINode *Phi = dyn_cast<PHINode>(LHS);
if (!Phi)
Phi = getLoopPhiForCounter(LHS, L, DT);
if (!Phi)
return true;
// Do LFTR if the exit condition's IV is *not* a simple counter.
Value *IncV = Phi->getIncomingValueForBlock(L->getLoopLatch());
return Phi != getLoopPhiForCounter(IncV, L, DT);
}
/// AlmostDeadIV - Return true if this IV has any uses other than the (soon to
/// be rewritten) loop exit test.
static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
Value *IncV = Phi->getIncomingValue(LatchIdx);
for (Value::use_iterator UI = Phi->use_begin(), UE = Phi->use_end();
UI != UE; ++UI) {
if (*UI != Cond && *UI != IncV) return false;
}
for (Value::use_iterator UI = IncV->use_begin(), UE = IncV->use_end();
UI != UE; ++UI) {
if (*UI != Cond && *UI != Phi) return false;
}
return true;
}
/// FindLoopCounter - Find an affine IV in canonical form.
///
/// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
///
/// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
/// This is difficult in general for SCEV because of potential overflow. But we
/// could at least handle constant BECounts.
static PHINode *
FindLoopCounter(Loop *L, const SCEV *BECount,
ScalarEvolution *SE, DominatorTree *DT, const TargetData *TD) {
// I'm not sure how BECount could be a pointer type, but we definitely don't
// want to LFTR that.
if (BECount->getType()->isPointerTy())
return 0;
uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
Value *Cond =
cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
// Loop over all of the PHI nodes, looking for a simple counter.
PHINode *BestPhi = 0;
const SCEV *BestInit = 0;
BasicBlock *LatchBlock = L->getLoopLatch();
assert(LatchBlock && "needsLFTR should guarantee a loop latch");
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
PHINode *Phi = cast<PHINode>(I);
if (!SE->isSCEVable(Phi->getType()))
continue;
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
if (!AR || AR->getLoop() != L || !AR->isAffine())
continue;
// AR may be a pointer type, while BECount is an integer type.
// AR may be wider than BECount. With eq/ne tests overflow is immaterial.
// AR may not be a narrower type, or we may never exit.
uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
if (PhiWidth < BCWidth || (TD && !TD->isLegalInteger(PhiWidth)))
continue;
const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
if (!Step || !Step->isOne())
continue;
int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
Value *IncV = Phi->getIncomingValue(LatchIdx);
if (getLoopPhiForCounter(IncV, L, DT) != Phi)
continue;
const SCEV *Init = AR->getStart();
if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
// Don't force a live loop counter if another IV can be used.
if (AlmostDeadIV(Phi, LatchBlock, Cond))
continue;
// Prefer to count-from-zero. This is a more "canonical" counter form. It
// also prefers integer to pointer IVs.
if (BestInit->isZero() != Init->isZero()) {
if (BestInit->isZero())
continue;
}
// If two IVs both count from zero or both count from nonzero then the
// narrower is likely a dead phi that has been widened. Use the wider phi
// to allow the other to be eliminated.
if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
continue;
}
BestPhi = Phi;
BestInit = Init;
}
return BestPhi;
}
/// LinearFunctionTestReplace - This method rewrites the exit condition of the
/// loop to be a canonical != comparison against the incremented loop induction
/// variable. This pass is able to rewrite the exit tests of any loop where the
/// SCEV analysis can determine a loop-invariant trip count of the loop, which
/// is actually a much broader range than just linear tests.
Value *IndVarSimplify::
LinearFunctionTestReplace(Loop *L,
const SCEV *BackedgeTakenCount,
PHINode *IndVar,
SCEVExpander &Rewriter) {
assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
// In DisableIVRewrite mode, IndVar is not necessarily a canonical IV. In this
// mode, LFTR can ignore IV overflow and truncate to the width of
// BECount. This avoids materializing the add(zext(add)) expression.
Type *CntTy = DisableIVRewrite ?
BackedgeTakenCount->getType() : IndVar->getType();
const SCEV *IVLimit = BackedgeTakenCount;
// If the exiting block is not the same as the backedge block, we must compare
// against the preincremented value, otherwise we prefer to compare against
// the post-incremented value.
Value *CmpIndVar;
if (L->getExitingBlock() == L->getLoopLatch()) {
// Add one to the "backedge-taken" count to get the trip count.
// If this addition may overflow, we have to be more pessimistic and
// cast the induction variable before doing the add.
const SCEV *N =
SE->getAddExpr(IVLimit, SE->getConstant(IVLimit->getType(), 1));
if (CntTy == IVLimit->getType())
IVLimit = N;
else {
const SCEV *Zero = SE->getConstant(IVLimit->getType(), 0);
if ((isa<SCEVConstant>(N) && !N->isZero()) ||
SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
// No overflow. Cast the sum.
IVLimit = SE->getTruncateOrZeroExtend(N, CntTy);
} else {
// Potential overflow. Cast before doing the add.
IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy);
IVLimit = SE->getAddExpr(IVLimit, SE->getConstant(CntTy, 1));
}
}
// The BackedgeTaken expression contains the number of times that the
// backedge branches to the loop header. This is one less than the
// number of times the loop executes, so use the incremented indvar.
CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
} else {
// We have to use the preincremented value...
IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy);
CmpIndVar = IndVar;
}
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
// For unit stride, IVLimit = Start + BECount with 2's complement overflow.
// So for, non-zero start compute the IVLimit here.
bool isPtrIV = false;
Type *CmpTy = CntTy;
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
if (!AR->getStart()->isZero()) {
assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
const SCEV *IVInit = AR->getStart();
// For pointer types, sign extend BECount in order to materialize a GEP.
// Note that for DisableIVRewrite, we never run SCEVExpander on a
// pointer type, because we must preserve the existing GEPs. Instead we
// directly generate a GEP later.
if (IVInit->getType()->isPointerTy()) {
isPtrIV = true;
CmpTy = SE->getEffectiveSCEVType(IVInit->getType());
IVLimit = SE->getTruncateOrSignExtend(IVLimit, CmpTy);
}
// For integer types, truncate the IV before computing IVInit + BECount.
else {
if (SE->getTypeSizeInBits(IVInit->getType())
> SE->getTypeSizeInBits(CmpTy))
IVInit = SE->getTruncateExpr(IVInit, CmpTy);
IVLimit = SE->getAddExpr(IVInit, IVLimit);
}
}
// Expand the code for the iteration count.
IRBuilder<> Builder(BI);
assert(SE->isLoopInvariant(IVLimit, L) &&
"Computed iteration count is not loop invariant!");
Value *ExitCnt = Rewriter.expandCodeFor(IVLimit, CmpTy, BI);
// Create a gep for IVInit + IVLimit from on an existing pointer base.
assert(isPtrIV == IndVar->getType()->isPointerTy() &&
"IndVar type must match IVInit type");
if (isPtrIV) {
Value *IVStart = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
assert(AR->getStart() == SE->getSCEV(IVStart) && "bad loop counter");
assert(SE->getSizeOfExpr(
cast<PointerType>(IVStart->getType())->getElementType())->isOne()
&& "unit stride pointer IV must be i8*");
Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
ExitCnt = Builder.CreateGEP(IVStart, ExitCnt, "lftr.limit");
Builder.SetInsertPoint(BI);
}
// Insert a new icmp_ne or icmp_eq instruction before the branch.
ICmpInst::Predicate P;
if (L->contains(BI->getSuccessor(0)))
P = ICmpInst::ICMP_NE;
P = ICmpInst::ICMP_EQ;
DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
<< " LHS:" << *CmpIndVar << '\n'
<< " op:\t"
<< (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
<< " RHS:\t" << *ExitCnt << "\n"
<< " Expr:\t" << *IVLimit << "\n");
if (SE->getTypeSizeInBits(CmpIndVar->getType())
> SE->getTypeSizeInBits(CmpTy)) {
CmpIndVar = Builder.CreateTrunc(CmpIndVar, CmpTy, "lftr.wideiv");
}
Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
Value *OrigCond = BI->getCondition();
// It's tempting to use replaceAllUsesWith here to fully replace the old
// comparison, but that's not immediately safe, since users of the old
// comparison may not be dominated by the new comparison. Instead, just
// update the branch to use the new comparison; in the common case this
// will make old comparison dead.
BI->setCondition(Cond);
DeadInsts.push_back(OrigCond);
++NumLFTR;
Changed = true;
return Cond;
}
//===----------------------------------------------------------------------===//
// SinkUnusedInvariants. A late subpass to cleanup loop preheaders.
//===----------------------------------------------------------------------===//
/// If there's a single exit block, sink any loop-invariant values that
/// were defined in the preheader but not used inside the loop into the
/// exit block to reduce register pressure in the loop.
void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
BasicBlock *ExitBlock = L->getExitBlock();
if (!ExitBlock) return;
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) return;
Instruction *InsertPt = ExitBlock->getFirstNonPHI();
BasicBlock::iterator I = Preheader->getTerminator();
while (I != Preheader->begin()) {
--I;
// New instructions were inserted at the end of the preheader.
if (isa<PHINode>(I))
break;
// Don't move instructions which might have side effects, since the side
// effects need to complete before instructions inside the loop. Also don't
// move instructions which might read memory, since the loop may modify
// memory. Note that it's okay if the instruction might have undefined
// behavior: LoopSimplify guarantees that the preheader dominates the exit
// block.
if (I->mayHaveSideEffects() || I->mayReadFromMemory())