Newer
Older
}
else {
// A wider extend was hidden behind a narrower one. This may induce
// another round of IV widening in which the intermediate IV becomes
// dead. It should be very rare.
DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
<< " not wide enough to subsume " << *DU.NarrowUse << "\n");
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
NewDef = DU.NarrowUse;
}
}
if (NewDef != DU.NarrowUse) {
DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
<< " replaced by " << *DU.WideDef << "\n");
++NumElimExt;
DU.NarrowUse->replaceAllUsesWith(NewDef);
DeadInsts.push_back(DU.NarrowUse);
// Now that the extend is gone, we want to expose it's uses for potential
// further simplification. We don't need to directly inform SimplifyIVUsers
// of the new users, because their parent IV will be processed later as a
// new loop phi. If we preserved IVUsers analysis, we would also want to
// push the uses of WideDef here.
// No further widening is needed. The deceased [sz]ext had done it for us.
return 0;
}
Andrew Trick
committed
// Does this user itself evaluate to a recurrence after widening?
const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse);
if (!WideAddRec) {
WideAddRec = GetExtendedOperandRecurrence(DU);
}
if (!WideAddRec) {
// This user does not evaluate to a recurence after widening, so don't
// follow it. Instead insert a Trunc to kill off the original use,
// eventually isolating the original narrow IV so it can be removed.
Andrew Trick
committed
IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
return 0;
}
// Assume block terminators cannot evaluate to a recurrence. We can't to
Andrew Trick
committed
// insert a Trunc after a terminator if there happens to be a critical edge.
assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
Andrew Trick
committed
"SCEV is not expected to evaluate a block terminator");
// Reuse the IV increment that SCEVExpander created as long as it dominates
// NarrowUse.
Instruction *WideUse = 0;
if (WideAddRec == WideIncExpr
&& SCEVExpander::hoistStep(WideInc, DU.NarrowUse, DT))
WideUse = WideInc;
else {
WideUse = CloneIVUser(DU);
if (!WideUse)
return 0;
}
Andrew Trick
committed
// Evaluation of WideAddRec ensured that the narrow expression could be
// extended outside the loop without overflow. This suggests that the wide use
// evaluates to the same expression as the extended narrow use, but doesn't
// absolutely guarantee it. Hence the following failsafe check. In rare cases
// where it fails, we simply throw away the newly created wide use.
if (WideAddRec != SE->getSCEV(WideUse)) {
DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
<< ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
DeadInsts.push_back(WideUse);
return 0;
}
// Returning WideUse pushes it on the worklist.
return WideUse;
}
Andrew Trick
committed
/// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers.
///
void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
for (Value::use_iterator UI = NarrowDef->use_begin(),
UE = NarrowDef->use_end(); UI != UE; ++UI) {
Instruction *NarrowUse = cast<Instruction>(*UI);
Andrew Trick
committed
// Handle data flow merges and bizarre phi cycles.
if (!Widened.insert(NarrowUse))
Andrew Trick
committed
continue;
NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUse, WideDef));
Andrew Trick
committed
}
}
/// CreateWideIV - Process a single induction variable. First use the
/// SCEVExpander to create a wide induction variable that evaluates to the same
/// recurrence as the original narrow IV. Then use a worklist to forward
/// traverse the narrow IV's def-use chain. After WidenIVUse has processed all
/// interesting IV users, the narrow IV will be isolated for removal by
/// DeleteDeadPHIs.
///
/// It would be simpler to delete uses as they are processed, but we must avoid
/// invalidating SCEV expressions.
///
PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// Is this phi an induction variable?
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
if (!AddRec)
return NULL;
// Widen the induction variable expression.
const SCEV *WideIVExpr = IsSigned ?
SE->getSignExtendExpr(AddRec, WideType) :
SE->getZeroExtendExpr(AddRec, WideType);
assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
"Expect the new IV expression to preserve its type");
// Can the IV be extended outside the loop without overflow?
AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
if (!AddRec || AddRec->getLoop() != L)
return NULL;
// An AddRec must have loop-invariant operands. Since this AddRec is
// materialized by a loop header phi, the expression cannot have any post-loop
// operands, so they must dominate the loop header.
assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader())
&& "Loop header phi recurrence inputs do not dominate the loop");
// The rewriter provides a value for the desired IV expression. This may
// either find an existing phi or materialize a new one. Either way, we
// expect a well-formed cyclic phi-with-increments. i.e. any operand not part
// of the phi-SCC dominates the loop entry.
Instruction *InsertPt = L->getHeader()->begin();
WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
// Remembering the WideIV increment generated by SCEVExpander allows
// WidenIVUse to reuse it when widening the narrow IV's increment. We don't
// employ a general reuse mechanism because the call above is the only call to
// SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
if (BasicBlock *LatchBlock = L->getLoopLatch()) {
WideInc =
cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
WideIncExpr = SE->getSCEV(WideInc);
}
DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
++NumWidened;
// Traverse the def-use chain using a worklist starting at the original IV.
Andrew Trick
committed
assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
Widened.insert(OrigPhi);
pushNarrowIVUsers(OrigPhi, WidePhi);
while (!NarrowIVUsers.empty()) {
NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
// Process a def-use edge. This may replace the use, so don't hold a
// use_iterator across it.
Instruction *WideUse = WidenIVUse(DU);
// Follow all def-use edges from the previous narrow use.
Andrew Trick
committed
if (WideUse)
pushNarrowIVUsers(DU.NarrowUse, WideUse);
Andrew Trick
committed
// WidenIVUse may have removed the def-use edge.
if (DU.NarrowDef->use_empty())
DeadInsts.push_back(DU.NarrowDef);
return WidePhi;
}
//===----------------------------------------------------------------------===//
// Simplification of IV users based on SCEV evaluation.
//===----------------------------------------------------------------------===//
/// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV
/// users. Each successive simplification may push more users which may
/// themselves be candidates for simplification.
///
/// Sign/Zero extend elimination is interleaved with IV simplification.
void IndVarSimplify::SimplifyAndExtend(Loop *L,
SCEVExpander &Rewriter,
LPPassManager &LPM) {
SmallVector<WideIVInfo, 8> WideIVs;
SmallVector<PHINode*, 8> LoopPhis;
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
LoopPhis.push_back(cast<PHINode>(I));
}
// Each round of simplification iterates through the SimplifyIVUsers worklist
// for all current phis, then determines whether any IVs can be
// widened. Widening adds new phis to LoopPhis, inducing another round of
// simplification on the wide IVs.
while (!LoopPhis.empty()) {
// Evaluate as many IV expressions as possible before widening any IVs. This
// forces SCEV to set no-wrap flags before evaluating sign/zero
// extension. The first time SCEV attempts to normalize sign/zero extension,
// the result becomes final. So for the most predictable results, we delay
// evaluation of sign/zero extend evaluation until needed, and avoid running
// other SCEV based analysis prior to SimplifyAndExtend.
do {
PHINode *CurrIV = LoopPhis.pop_back_val();
// Information about sign/zero extensions of CurrIV.
WideIVVisitor WIV(CurrIV, SE, TD);
Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &WIV);
if (WIV.WI.WidestNativeType) {
WideIVs.push_back(WIV.WI);
} while(!LoopPhis.empty());
for (; !WideIVs.empty(); WideIVs.pop_back()) {
WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts);
if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) {
Changed = true;
LoopPhis.push_back(WidePhi);
}
}
}
}
//===----------------------------------------------------------------------===//
// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
//===----------------------------------------------------------------------===//
/// Check for expressions that ScalarEvolution generates to compute
/// BackedgeTakenInfo. If these expressions have not been reduced, then
/// expanding them may incur additional cost (albeit in the loop preheader).
static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
ScalarEvolution *SE) {
// If the backedge-taken count is a UDiv, it's very likely a UDiv that
// ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a
// precise expression, rather than a UDiv from the user's code. If we can't
// find a UDiv in the code with some simple searching, assume the former and
// forego rewriting the loop.
if (isa<SCEVUDivExpr>(S)) {
ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
if (!OrigCond) return true;
const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
if (R != S) {
const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
if (L != S)
return true;
}
}
Andrew Trick
committed
if (EnableIVRewrite)
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
return false;
// Recurse past add expressions, which commonly occur in the
// BackedgeTakenCount. They may already exist in program code, and if not,
// they are not too expensive rematerialize.
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I) {
if (isHighCostExpansion(*I, BI, SE))
return true;
}
return false;
}
// HowManyLessThans uses a Max expression whenever the loop is not guarded by
// the exit condition.
if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
return true;
// If we haven't recognized an expensive SCEV patter, assume its an expression
// produced by program code.
return false;
}
/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
/// count expression can be safely and cheaply expanded into an instruction
/// sequence that can be used by LinearFunctionTestReplace.
///
/// TODO: This fails for pointer-type loop counters with greater than one byte
/// strides, consequently preventing LFTR from running. For the purpose of LFTR
/// we could skip this check in the case that the LFTR loop counter (chosen by
/// FindLoopCounter) is also pointer type. Instead, we could directly convert
/// the loop test to an inequality test by checking the target data's alignment
/// of element types (given that the initial pointer value originates from or is
/// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
/// However, we don't yet have a strong motivation for converting loop tests
/// into inequality tests.
static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
BackedgeTakenCount->isZero())
return false;
if (!L->getExitingBlock())
return false;
// Can't rewrite non-branch yet.
BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
if (!BI)
return false;
if (isHighCostExpansion(BackedgeTakenCount, BI, SE))
return false;
/// getBackedgeIVType - Get the widest type used by the loop test after peeking
/// through Truncs.
///
/// TODO: Unnecessary when ForceLFTR is removed.
static Type *getBackedgeIVType(Loop *L) {
if (!L->getExitingBlock())
return 0;
Chris Lattner
committed
// Can't rewrite non-branch yet.
BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
if (!BI)
return 0;
ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
if (!Cond)
return 0;
Type *Ty = 0;
for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end();
OI != OE; ++OI) {
assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types");
TruncInst *Trunc = dyn_cast<TruncInst>(*OI);
if (!Trunc)
continue;
return Trunc->getSrcTy();
}
return Ty;
}
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
/// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop
/// invariant value to the phi.
static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
Instruction *IncI = dyn_cast<Instruction>(IncV);
if (!IncI)
return 0;
switch (IncI->getOpcode()) {
case Instruction::Add:
case Instruction::Sub:
break;
case Instruction::GetElementPtr:
// An IV counter must preserve its type.
if (IncI->getNumOperands() == 2)
break;
default:
return 0;
}
PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
if (Phi && Phi->getParent() == L->getHeader()) {
if (isLoopInvariant(IncI->getOperand(1), L, DT))
return Phi;
return 0;
}
if (IncI->getOpcode() == Instruction::GetElementPtr)
return 0;
// Allow add/sub to be commuted.
Phi = dyn_cast<PHINode>(IncI->getOperand(1));
if (Phi && Phi->getParent() == L->getHeader()) {
if (isLoopInvariant(IncI->getOperand(0), L, DT))
return Phi;
}
return 0;
}
/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show
/// that the current exit test is already sufficiently canonical.
static bool needsLFTR(Loop *L, DominatorTree *DT) {
assert(L->getExitingBlock() && "expected loop exit");
BasicBlock *LatchBlock = L->getLoopLatch();
// Don't bother with LFTR if the loop is not properly simplified.
if (!LatchBlock)
return false;
BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
assert(BI && "expected exit branch");
// Do LFTR to simplify the exit condition to an ICMP.
ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
if (!Cond)
return true;
// Do LFTR to simplify the exit ICMP to EQ/NE
ICmpInst::Predicate Pred = Cond->getPredicate();
if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
return true;
// Look for a loop invariant RHS
Value *LHS = Cond->getOperand(0);
Value *RHS = Cond->getOperand(1);
if (!isLoopInvariant(RHS, L, DT)) {
if (!isLoopInvariant(LHS, L, DT))
return true;
std::swap(LHS, RHS);
}
// Look for a simple IV counter LHS
PHINode *Phi = dyn_cast<PHINode>(LHS);
if (!Phi)
Phi = getLoopPhiForCounter(LHS, L, DT);
if (!Phi)
return true;
// Do LFTR if the exit condition's IV is *not* a simple counter.
Value *IncV = Phi->getIncomingValueForBlock(L->getLoopLatch());
return Phi != getLoopPhiForCounter(IncV, L, DT);
}
/// AlmostDeadIV - Return true if this IV has any uses other than the (soon to
/// be rewritten) loop exit test.
static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
Value *IncV = Phi->getIncomingValue(LatchIdx);
for (Value::use_iterator UI = Phi->use_begin(), UE = Phi->use_end();
UI != UE; ++UI) {
if (*UI != Cond && *UI != IncV) return false;
}
for (Value::use_iterator UI = IncV->use_begin(), UE = IncV->use_end();
UI != UE; ++UI) {
if (*UI != Cond && *UI != Phi) return false;
}
return true;
}
/// FindLoopCounter - Find an affine IV in canonical form.
///
/// BECount may be an i8* pointer type. The pointer difference is already
/// valid count without scaling the address stride, so it remains a pointer
/// expression as far as SCEV is concerned.
///
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
/// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
///
/// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
/// This is difficult in general for SCEV because of potential overflow. But we
/// could at least handle constant BECounts.
static PHINode *
FindLoopCounter(Loop *L, const SCEV *BECount,
ScalarEvolution *SE, DominatorTree *DT, const TargetData *TD) {
uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
Value *Cond =
cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
// Loop over all of the PHI nodes, looking for a simple counter.
PHINode *BestPhi = 0;
const SCEV *BestInit = 0;
BasicBlock *LatchBlock = L->getLoopLatch();
assert(LatchBlock && "needsLFTR should guarantee a loop latch");
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
PHINode *Phi = cast<PHINode>(I);
if (!SE->isSCEVable(Phi->getType()))
continue;
// Avoid comparing an integer IV against a pointer Limit.
if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
continue;
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
if (!AR || AR->getLoop() != L || !AR->isAffine())
continue;
// AR may be a pointer type, while BECount is an integer type.
// AR may be wider than BECount. With eq/ne tests overflow is immaterial.
// AR may not be a narrower type, or we may never exit.
uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
if (PhiWidth < BCWidth || (TD && !TD->isLegalInteger(PhiWidth)))
continue;
const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
if (!Step || !Step->isOne())
continue;
int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
Value *IncV = Phi->getIncomingValue(LatchIdx);
if (getLoopPhiForCounter(IncV, L, DT) != Phi)
continue;
const SCEV *Init = AR->getStart();
if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
// Don't force a live loop counter if another IV can be used.
if (AlmostDeadIV(Phi, LatchBlock, Cond))
continue;
// Prefer to count-from-zero. This is a more "canonical" counter form. It
// also prefers integer to pointer IVs.
if (BestInit->isZero() != Init->isZero()) {
if (BestInit->isZero())
continue;
}
// If two IVs both count from zero or both count from nonzero then the
// narrower is likely a dead phi that has been widened. Use the wider phi
// to allow the other to be eliminated.
if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
continue;
}
BestPhi = Phi;
BestInit = Init;
}
return BestPhi;
}
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
/// genLoopLimit - Help LinearFunctionTestReplace by generating a value that
/// holds the RHS of the new loop test.
static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
SCEVExpander &Rewriter, ScalarEvolution *SE) {
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
const SCEV *IVInit = AR->getStart();
// IVInit may be a pointer while IVCount is an integer when FindLoopCounter
// finds a valid pointer IV. Sign extend BECount in order to materialize a
// GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
// the existing GEPs whenever possible.
if (IndVar->getType()->isPointerTy()
&& !IVCount->getType()->isPointerTy()) {
Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
const SCEV *IVOffset = SE->getTruncateOrSignExtend(IVCount, OfsTy);
// Expand the code for the iteration count.
assert(SE->isLoopInvariant(IVOffset, L) &&
"Computed iteration count is not loop invariant!");
BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
// We could handle pointer IVs other than i8*, but we need to compensate for
// gep index scaling. See canExpandBackedgeTakenCount comments.
assert(SE->getSizeOfExpr(
cast<PointerType>(GEPBase->getType())->getElementType())->isOne()
&& "unit stride pointer IV must be i8*");
IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
return Builder.CreateGEP(GEPBase, GEPOffset, "lftr.limit");
}
else {
// In any other case, convert both IVInit and IVCount to integers before
// comparing. This may result in SCEV expension of pointers, but in practice
// SCEV will fold the pointer arithmetic away as such:
// BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
//
// Valid Cases: (1) both integers is most common; (2) both may be pointers
// for simple memset-style loops; (3) IVInit is an integer and IVCount is a
// pointer may occur when enable-iv-rewrite generates a canonical IV on top
// of case #2.
const SCEV *IVLimit = 0;
// For unit stride, IVCount = Start + BECount with 2's complement overflow.
// For non-zero Start, compute IVCount here.
if (AR->getStart()->isZero())
IVLimit = IVCount;
else {
assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
const SCEV *IVInit = AR->getStart();
// For integer IVs, truncate the IV before computing IVInit + BECount.
if (SE->getTypeSizeInBits(IVInit->getType())
> SE->getTypeSizeInBits(IVCount->getType()))
IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
IVLimit = SE->getAddExpr(IVInit, IVCount);
}
// Expand the code for the iteration count.
BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
IRBuilder<> Builder(BI);
assert(SE->isLoopInvariant(IVLimit, L) &&
"Computed iteration count is not loop invariant!");
// Ensure that we generate the same type as IndVar, or a smaller integer
// type. In the presence of null pointer values, we have an integer type
// SCEV expression (IVInit) for a pointer type IV value (IndVar).
Type *LimitTy = IVCount->getType()->isPointerTy() ?
IndVar->getType() : IVCount->getType();
return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
}
}
/// LinearFunctionTestReplace - This method rewrites the exit condition of the
/// loop to be a canonical != comparison against the incremented loop induction
/// variable. This pass is able to rewrite the exit tests of any loop where the
/// SCEV analysis can determine a loop-invariant trip count of the loop, which
/// is actually a much broader range than just linear tests.
Value *IndVarSimplify::
LinearFunctionTestReplace(Loop *L,
const SCEV *BackedgeTakenCount,
PHINode *IndVar,
SCEVExpander &Rewriter) {
assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
Andrew Trick
committed
// LFTR can ignore IV overflow and truncate to the width of
// BECount. This avoids materializing the add(zext(add)) expression.
Andrew Trick
committed
Type *CntTy = !EnableIVRewrite ?
BackedgeTakenCount->getType() : IndVar->getType();
const SCEV *IVCount = BackedgeTakenCount;
// If the exiting block is the same as the backedge block, we prefer to
// compare against the post-incremented value, otherwise we must compare
// against the preincremented value.
Value *CmpIndVar;
if (L->getExitingBlock() == L->getLoopLatch()) {
// Add one to the "backedge-taken" count to get the trip count.
// If this addition may overflow, we have to be more pessimistic and
// cast the induction variable before doing the add.
const SCEV *N =
SE->getAddExpr(IVCount, SE->getConstant(IVCount->getType(), 1));
if (CntTy == IVCount->getType())
IVCount = N;
const SCEV *Zero = SE->getConstant(IVCount->getType(), 0);
if ((isa<SCEVConstant>(N) && !N->isZero()) ||
SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
// No overflow. Cast the sum.
IVCount = SE->getTruncateOrZeroExtend(N, CntTy);
} else {
// Potential overflow. Cast before doing the add.
IVCount = SE->getTruncateOrZeroExtend(IVCount, CntTy);
IVCount = SE->getAddExpr(IVCount, SE->getConstant(CntTy, 1));
}
// The BackedgeTaken expression contains the number of times that the
// backedge branches to the loop header. This is one less than the
// number of times the loop executes, so use the incremented indvar.
CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
} else {
// We must use the preincremented value...
IVCount = SE->getTruncateOrZeroExtend(IVCount, CntTy);
CmpIndVar = IndVar;
}
Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
assert(ExitCnt->getType()->isPointerTy() == IndVar->getType()->isPointerTy()
&& "genLoopLimit missed a cast");
// Insert a new icmp_ne or icmp_eq instruction before the branch.
BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
ICmpInst::Predicate P;
if (L->contains(BI->getSuccessor(0)))
P = ICmpInst::ICMP_NE;
P = ICmpInst::ICMP_EQ;
DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
<< " LHS:" << *CmpIndVar << '\n'
<< " op:\t"
<< (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
<< " RHS:\t" << *ExitCnt << "\n"
<< " IVCount:\t" << *IVCount << "\n");
IRBuilder<> Builder(BI);
if (SE->getTypeSizeInBits(CmpIndVar->getType())
> SE->getTypeSizeInBits(ExitCnt->getType())) {
CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
"lftr.wideiv");
}
Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
Value *OrigCond = BI->getCondition();
// It's tempting to use replaceAllUsesWith here to fully replace the old
// comparison, but that's not immediately safe, since users of the old
// comparison may not be dominated by the new comparison. Instead, just
// update the branch to use the new comparison; in the common case this
// will make old comparison dead.
BI->setCondition(Cond);
DeadInsts.push_back(OrigCond);
++NumLFTR;
Changed = true;
return Cond;
}
//===----------------------------------------------------------------------===//
// SinkUnusedInvariants. A late subpass to cleanup loop preheaders.
//===----------------------------------------------------------------------===//
/// If there's a single exit block, sink any loop-invariant values that
/// were defined in the preheader but not used inside the loop into the
/// exit block to reduce register pressure in the loop.
void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
BasicBlock *ExitBlock = L->getExitBlock();
if (!ExitBlock) return;
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) return;
Bill Wendling
committed
Instruction *InsertPt = ExitBlock->getFirstInsertionPt();
BasicBlock::iterator I = Preheader->getTerminator();
while (I != Preheader->begin()) {
--I;
// New instructions were inserted at the end of the preheader.
if (isa<PHINode>(I))
break;
// Don't move instructions which might have side effects, since the side
// effects need to complete before instructions inside the loop. Also don't
// move instructions which might read memory, since the loop may modify
// memory. Note that it's okay if the instruction might have undefined
// behavior: LoopSimplify guarantees that the preheader dominates the exit
// block.
if (I->mayHaveSideEffects() || I->mayReadFromMemory())
continue;
// Skip debug info intrinsics.
if (isa<DbgInfoIntrinsic>(I))
continue;
// Skip landingpad instructions.
if (isa<LandingPadInst>(I))
continue;
Eli Friedman
committed
// Don't sink alloca: we never want to sink static alloca's out of the
// entry block, and correctly sinking dynamic alloca's requires
// checks for stacksave/stackrestore intrinsics.
// FIXME: Refactor this check somehow?
if (isa<AllocaInst>(I))
continue;
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
// Determine if there is a use in or before the loop (direct or
// otherwise).
bool UsedInLoop = false;
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
UI != UE; ++UI) {
User *U = *UI;
BasicBlock *UseBB = cast<Instruction>(U)->getParent();
if (PHINode *P = dyn_cast<PHINode>(U)) {
unsigned i =
PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
UseBB = P->getIncomingBlock(i);
}
if (UseBB == Preheader || L->contains(UseBB)) {
UsedInLoop = true;
break;
}
}
// If there is, the def must remain in the preheader.
if (UsedInLoop)
continue;
// Otherwise, sink it to the exit block.
Instruction *ToMove = I;
bool Done = false;
if (I != Preheader->begin()) {
// Skip debug info intrinsics.
do {
--I;
} while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
Done = true;
} else {
Done = true;
}
ToMove->moveBefore(InsertPt);
if (Done) break;
InsertPt = ToMove;
}
}
//===----------------------------------------------------------------------===//
// IndVarSimplify driver. Manage several subpasses of IV simplification.
//===----------------------------------------------------------------------===//
bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// If LoopSimplify form is not available, stay out of trouble. Some notes:
// - LSR currently only supports LoopSimplify-form loops. Indvars'
// canonicalization can be a pessimization without LSR to "clean up"
// afterwards.
// - We depend on having a preheader; in particular,
// Loop::getCanonicalInductionVariable only supports loops with preheaders,
// and we're in trouble if we can't find the induction variable even when
// we've manually inserted one.
if (!L->isLoopSimplifyForm())
return false;
Andrew Trick
committed
if (EnableIVRewrite)
IU = &getAnalysis<IVUsers>();
LI = &getAnalysis<LoopInfo>();
SE = &getAnalysis<ScalarEvolution>();
DT = &getAnalysis<DominatorTree>();
TD = getAnalysisIfAvailable<TargetData>();
DeadInsts.clear();
Changed = false;
// If there are any floating-point recurrences, attempt to
// transform them to use integer recurrences.
RewriteNonIntegerIVs(L);
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
// Create a rewriter object which we'll use to transform the code with.
SCEVExpander Rewriter(*SE, "indvars");
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
// Eliminate redundant IV users.
//
// Simplification works best when run before other consumers of SCEV. We
// attempt to avoid evaluating SCEVs for sign/zero extend operations until
// other expressions involving loop IVs have been evaluated. This helps SCEV
// set no-wrap flags before normalizing sign/zero extension.
Andrew Trick
committed
if (!EnableIVRewrite) {
Rewriter.disableCanonicalMode();
SimplifyAndExtend(L, Rewriter, LPM);
}
// Check to see if this loop has a computable loop-invariant execution count.
// If so, this means that we can compute the final value of any expressions
// that are recurrent in the loop, and substitute the exit values from the
// loop into any instructions outside of the loop that use the final values of
// the current expressions.
//
if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
RewriteLoopExitValues(L, Rewriter);
// Eliminate redundant IV users.
Andrew Trick
committed
if (EnableIVRewrite)
Changed |= simplifyIVUsers(IU, SE, &LPM, DeadInsts);
// Eliminate redundant IV cycles.
Andrew Trick
committed
if (!EnableIVRewrite)
NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
// Compute the type of the largest recurrence expression, and decide whether
// a canonical induction variable should be inserted.
Type *LargestType = 0;
bool NeedCannIV = false;
bool ExpandBECount = canExpandBackedgeTakenCount(L, SE);
Andrew Trick
committed
if (EnableIVRewrite && ExpandBECount) {
// If we have a known trip count and a single exit block, we'll be
// rewriting the loop exit test condition below, which requires a
// canonical induction variable.
Type *Ty = BackedgeTakenCount->getType();
Andrew Trick
committed
if (!EnableIVRewrite) {
// In this mode, SimplifyIVUsers may have already widened the IV used by
// the backedge test and inserted a Trunc on the compare's operand. Get
// the wider type to avoid creating a redundant narrow IV only used by the
// loop test.
LargestType = getBackedgeIVType(L);
}
if (!LargestType ||
SE->getTypeSizeInBits(Ty) >
SE->getTypeSizeInBits(LargestType))
LargestType = SE->getEffectiveSCEVType(Ty);
Chris Lattner
committed
}
Andrew Trick
committed
if (EnableIVRewrite) {
for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
NeedCannIV = true;
Type *Ty =
SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType());
if (!LargestType ||
SE->getTypeSizeInBits(Ty) >
SE->getTypeSizeInBits(LargestType))
Chris Lattner
committed
}
// Now that we know the largest of the induction variable expressions
// in this loop, insert a canonical induction variable of the largest size.
PHINode *IndVar = 0;
if (NeedCannIV) {
// Check to see if the loop already has any canonical-looking induction
// variables. If any are present and wider than the planned canonical
// induction variable, temporarily remove them, so that the Rewriter
// doesn't attempt to reuse them.
SmallVector<PHINode *, 2> OldCannIVs;
while (PHINode *OldCannIV = L->getCanonicalInductionVariable()) {
if (SE->getTypeSizeInBits(OldCannIV->getType()) >
SE->getTypeSizeInBits(LargestType))
OldCannIV->removeFromParent();
else
break;
OldCannIVs.push_back(OldCannIV);
}
IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L, LargestType);
++NumInserted;
Changed = true;
DEBUG(dbgs() << "INDVARS: New CanIV: " << *IndVar << '\n');
// Now that the official induction variable is established, reinsert
// any old canonical-looking variables after it so that the IR remains
// consistent. They will be deleted as part of the dead-PHI deletion at
// the end of the pass.
while (!OldCannIVs.empty()) {
PHINode *OldCannIV = OldCannIVs.pop_back_val();
Bill Wendling
committed
OldCannIV->insertBefore(L->getHeader()->getFirstInsertionPt());
Andrew Trick
committed
else if (!EnableIVRewrite && ExpandBECount && needsLFTR(L, DT)) {
IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD);
}
// If we have a trip count expression, rewrite the loop's exit condition
// using it. We can currently only handle loops with a single exit.
Value *NewICmp = 0;
if (ExpandBECount && IndVar) {
// Check preconditions for proper SCEVExpander operation. SCEV does not
// express SCEVExpander's dependencies, such as LoopSimplify. Instead any
// pass that uses the SCEVExpander must do it. This does not work well for
// loop passes because SCEVExpander makes assumptions about all loops, while
// LoopPassManager only forces the current loop to be simplified.
//
// FIXME: SCEV expansion has no way to bail out, so the caller must
// explicitly check any assumptions made by SCEV. Brittle.
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
if (!AR || AR->getLoop()->getLoopPreheader())
NewICmp =
LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter);
}
// Rewrite IV-derived expressions.
Andrew Trick
committed
if (EnableIVRewrite)
RewriteIVExpressions(L, Rewriter);
// Clear the rewriter cache, because values that are in the rewriter's cache
// can be deleted in the loop below, causing the AssertingVH in the cache to
// trigger.
Rewriter.clear();
// Now that we're done iterating through lists, clean up any instructions
// which are now dead.
while (!DeadInsts.empty())
if (Instruction *Inst =
dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
RecursivelyDeleteTriviallyDeadInstructions(Inst);
// The Rewriter may not be used from this point on.
// Loop-invariant instructions in the preheader that aren't used in the
// loop may be sunk below the loop to reduce register pressure.
SinkUnusedInvariants(L);
// For completeness, inform IVUsers of the IV use in the newly-created
// loop exit test instruction.
if (IU && NewICmp) {
ICmpInst *NewICmpInst = dyn_cast<ICmpInst>(NewICmp);
if (NewICmpInst)
IU->AddUsersIfInteresting(cast<Instruction>(NewICmpInst->getOperand(0)));
}
// Clean up dead instructions.
Changed |= DeleteDeadPHIs(L->getHeader());
// Check a post-condition.
assert(L->isLCSSAForm(*DT) &&
"Indvars did not leave the loop in lcssa form!");
// Verify that LFTR, and any other change have not interfered with SCEV's
// ability to compute trip count.
#ifndef NDEBUG
Andrew Trick
committed
if (!EnableIVRewrite && VerifyIndvars &&
Andrew Trick
committed
!isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
SE->forgetLoop(L);
const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
SE->getTypeSizeInBits(NewBECount->getType()))
NewBECount = SE->getTruncateOrNoop(NewBECount,
BackedgeTakenCount->getType());
else
BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
NewBECount->getType());
assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
}
#endif
return Changed;
}