Newer
Older
NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy);
}
NewOffset = TyBits == NewTyBits
? SE->getMulExpr(CondUse->getOffset(),
SE->getConstant(CmpTy, Scale))
: SE->getConstant(NewCmpIntTy,
cast<SCEVConstant>(CondUse->getOffset())->getValue()
->getSExtValue()*Scale);
break;
}
// Forgo this transformation if it the increment happens to be
// unfortunately positioned after the condition, and the condition
// has multiple uses which prevent it from being moved immediately
// before the branch. See
// test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-*.ll
// for an example of this situation.
if (!Cond->hasOneUse()) {
for (BasicBlock::iterator I = Cond, E = Cond->getParent()->end();
I != E; ++I)
if (I == NewCmpLHS)
return Cond;
}
if (NewCmpRHS) {
// Create a new compare instruction using new stride / iv.
ICmpInst *OldCond = Cond;
// Insert new compare instruction.
Cond = new ICmpInst(OldCond, Predicate, NewCmpLHS, NewCmpRHS,
L->getHeader()->getName() + ".termcond");
DEBUG(dbgs() << " Change compare stride in Inst " << *OldCond);
DEBUG(dbgs() << " to " << *Cond << '\n');
// Remove the old compare instruction. The old indvar is probably dead too.
DeadInsts.push_back(CondUse->getOperandValToReplace());
OldCond->replaceAllUsesWith(Cond);
OldCond->eraseFromParent();
IU->IVUsesByStride[NewStride]->addUser(NewOffset, Cond, NewCmpLHS);
CondUse = &IU->IVUsesByStride[NewStride]->Users.back();
CondStride = NewStride;
++NumEliminated;
Changed = true;
}
}
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
/// OptimizeMax - Rewrite the loop's terminating condition if it uses
/// a max computation.
///
/// This is a narrow solution to a specific, but acute, problem. For loops
/// like this:
///
/// i = 0;
/// do {
/// p[i] = 0.0;
/// } while (++i < n);
///
/// the trip count isn't just 'n', because 'n' might not be positive. And
/// unfortunately this can come up even for loops where the user didn't use
/// a C do-while loop. For example, seemingly well-behaved top-test loops
/// will commonly be lowered like this:
//
/// if (n > 0) {
/// i = 0;
/// do {
/// p[i] = 0.0;
/// } while (++i < n);
/// }
///
/// and then it's possible for subsequent optimization to obscure the if
/// test in such a way that indvars can't find it.
///
/// When indvars can't find the if test in loops like this, it creates a
/// max expression, which allows it to give the loop a canonical
/// induction variable:
///
/// i = 0;
/// max = n < 1 ? 1 : n;
/// do {
/// p[i] = 0.0;
/// } while (++i != max);
///
/// Canonical induction variables are necessary because the loop passes
/// are designed around them. The most obvious example of this is the
/// LoopInfo analysis, which doesn't remember trip count values. It
/// expects to be able to rediscover the trip count each time it is
/// needed, and it does this using a simple analyis that only succeeds if
/// the loop has a canonical induction variable.
///
/// However, when it comes time to generate code, the maximum operation
/// can be quite costly, especially if it's inside of an outer loop.
///
/// This function solves this problem by detecting this type of loop and
/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
/// the instructions for the maximum computation.
///
ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
IVStrideUse* &CondUse) {
// Check that the loop matches the pattern we're looking for.
if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
Cond->getPredicate() != CmpInst::ICMP_NE)
return Cond;
SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
if (!Sel || !Sel->hasOneUse()) return Cond;
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
return Cond;
const SCEV *One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());
// Add one to the backedge-taken count to get the trip count.
const SCEV *IterationCount = SE->getAddExpr(BackedgeTakenCount, One);
// Check for a max calculation that matches the pattern.
if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount))
return Cond;
const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount);
if (Max != SE->getSCEV(Sel)) return Cond;
// To handle a max with more than two operands, this optimization would
// require additional checking and setup.
if (Max->getNumOperands() != 2)
return Cond;
const SCEV *MaxLHS = Max->getOperand(0);
const SCEV *MaxRHS = Max->getOperand(1);
if (!MaxLHS || MaxLHS != One) return Cond;
// Check the relevant induction variable for conformance to
// the pattern.
const SCEV *IV = SE->getSCEV(Cond->getOperand(0));
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
if (!AR || !AR->isAffine() ||
AR->getStart() != One ||
AR->getStepRecurrence(*SE) != One)
return Cond;
assert(AR->getLoop() == L &&
"Loop condition operand is an addrec in a different loop!");
// Check the right operand of the select, and remember it, as it will
// be used in the new comparison instruction.
Value *NewRHS = 0;
if (SE->getSCEV(Sel->getOperand(1)) == MaxRHS)
NewRHS = Sel->getOperand(1);
else if (SE->getSCEV(Sel->getOperand(2)) == MaxRHS)
NewRHS = Sel->getOperand(2);
if (!NewRHS) return Cond;
// Determine the new comparison opcode. It may be signed or unsigned,
// and the original comparison may be either equality or inequality.
CmpInst::Predicate Pred =
isa<SCEVSMaxExpr>(Max) ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
if (Cond->getPredicate() == CmpInst::ICMP_EQ)
Pred = CmpInst::getInversePredicate(Pred);
// Ok, everything looks ok to change the condition into an SLT or SGE and
// delete the max calculation.
ICmpInst *NewCond =
new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
// Delete the max calculation instructions.
Cond->replaceAllUsesWith(NewCond);
CondUse->setUser(NewCond);
Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
Cond->eraseFromParent();
Sel->eraseFromParent();
if (Cmp->use_empty())
Cmp->eraseFromParent();
return NewCond;
}
Devang Patel
committed
/// OptimizeShadowIV - If IV is used in a int-to-float cast
/// inside the loop then try to eliminate the cast opeation.
void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
Devang Patel
committed
return;
for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e;
++Stride) {
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
Devang Patel
committed
if (!isa<SCEVConstant>(SI->first))
continue;
for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
E = SI->second->Users.end(); UI != E; /* empty */) {
ilist<IVStrideUse>::iterator CandidateUI = UI;
++UI;
Instruction *ShadowUse = CandidateUI->getUser();
Devang Patel
committed
const Type *DestTy = NULL;
/* If shadow use is a int->float cast then insert a second IV
to eliminate this cast.
Devang Patel
committed
Devang Patel
committed
foo((double)i);
is transformed into
Devang Patel
committed
double d = 0.0;
Devang Patel
committed
foo(d);
*/
if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
Devang Patel
committed
DestTy = UCast->getDestTy();
else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser()))
Devang Patel
committed
DestTy = SCast->getDestTy();
Devang Patel
committed
if (!DestTy) continue;
if (TLI) {
Evan Cheng
committed
// If target does not support DestTy natively then do not apply
// this transformation.
Owen Anderson
committed
EVT DVT = TLI->getValueType(DestTy);
Devang Patel
committed
if (!TLI->isTypeLegal(DVT)) continue;
}
Devang Patel
committed
PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
if (!PH) continue;
if (PH->getNumIncomingValues() != 2) continue;
const Type *SrcTy = PH->getType();
int Mantissa = DestTy->getFPMantissaWidth();
if ((int)SE->getTypeSizeInBits(SrcTy) > Mantissa)
Devang Patel
committed
continue;
unsigned Entry, Latch;
if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
Entry = 0;
Latch = 1;
} else {
Entry = 1;
Latch = 0;
}
Devang Patel
committed
ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
if (!Init) continue;
Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
Devang Patel
committed
Devang Patel
committed
dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
if (!Incr) continue;
if (Incr->getOpcode() != Instruction::Add
&& Incr->getOpcode() != Instruction::Sub)
continue;
/* Initialize new IV, double d = 0.0 in above example. */
ConstantInt *C = NULL;
if (Incr->getOperand(0) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(1));
else if (Incr->getOperand(1) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(0));
else
continue;
Devang Patel
committed
// Ignore negative constants, as the code below doesn't handle them
// correctly. TODO: Remove this restriction.
if (!C->getValue().isStrictlyPositive()) continue;
Devang Patel
committed
/* Add new PHINode. */
PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
/* create new increment. '++d' in above example. */
Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
BinaryOperator *NewIncr =
BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
Instruction::FAdd : Instruction::FSub,
NewPH, CFP, "IV.S.next.", Incr);
Devang Patel
committed
NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
/* Remove cast operation */
ShadowUse->replaceAllUsesWith(NewPH);
ShadowUse->eraseFromParent();
NumShadow++;
break;
}
}
}
/// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar
/// uses in the loop, look to see if we can eliminate some, in favor of using
/// common indvars for the different uses.
void LoopStrengthReduce::OptimizeIndvars(Loop *L) {
// TODO: implement optzns here.
OptimizeShadowIV(L);
bool LoopStrengthReduce::StrideMightBeShared(const SCEV* Stride, Loop *L,
bool CheckPreInc) {
Evan Cheng
committed
int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue();
for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
Evan Cheng
committed
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
IU->IVUsesByStride.find(IU->StrideOrder[i]);
Evan Cheng
committed
const SCEV *Share = SI->first;
if (!isa<SCEVConstant>(SI->first) || Share == Stride)
continue;
int64_t SSInt = cast<SCEVConstant>(Share)->getValue()->getSExtValue();
if (SSInt == SInt)
return true; // This can definitely be reused.
if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0)
continue;
int64_t Scale = SSInt / SInt;
bool AllUsesAreAddresses = true;
bool AllUsesAreOutsideLoop = true;
std::vector<BasedUser> UsersToProcess;
const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
AllUsesAreAddresses,
AllUsesAreOutsideLoop,
UsersToProcess);
if (AllUsesAreAddresses &&
ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) {
if (!CheckPreInc)
Evan Cheng
committed
return true;
// Any pre-inc iv use?
IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[Share];
for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
E = StrideUses.Users.end(); I != E; ++I) {
if (!I->isUseOfPostIncrementedValue())
return true;
}
Evan Cheng
committed
}
}
return false;
}
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
/// isUsedByExitBranch - Return true if icmp is used by a loop terminating
/// conditional branch or it's and / or with other conditions before being used
/// as the condition.
static bool isUsedByExitBranch(ICmpInst *Cond, Loop *L) {
BasicBlock *CondBB = Cond->getParent();
if (!L->isLoopExiting(CondBB))
return false;
BranchInst *TermBr = dyn_cast<BranchInst>(CondBB->getTerminator());
if (!TermBr || !TermBr->isConditional())
return false;
Value *User = *Cond->use_begin();
Instruction *UserInst = dyn_cast<Instruction>(User);
while (UserInst &&
(UserInst->getOpcode() == Instruction::And ||
UserInst->getOpcode() == Instruction::Or)) {
if (!UserInst->hasOneUse() || UserInst->getParent() != CondBB)
return false;
User = *User->use_begin();
UserInst = dyn_cast<Instruction>(User);
}
return User == TermBr;
}
static bool ShouldCountToZero(ICmpInst *Cond, IVStrideUse* &CondUse,
ScalarEvolution *SE, Loop *L,
const TargetLowering *TLI = 0) {
if (!L->contains(Cond))
return false;
if (!isa<SCEVConstant>(CondUse->getOffset()))
return false;
// Handle only tests for equality for the moment.
if (!Cond->isEquality() || !Cond->hasOneUse())
return false;
if (!isUsedByExitBranch(Cond, L))
return false;
Value *CondOp0 = Cond->getOperand(0);
const SCEV *IV = SE->getSCEV(CondOp0);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
if (!AR || !AR->isAffine())
return false;
const SCEVConstant *SC = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
if (!SC || SC->getValue()->getSExtValue() < 0)
// If it's already counting down, don't do anything.
return false;
// If the RHS of the comparison is not an loop invariant, the rewrite
// cannot be done. Also bail out if it's already comparing against a zero.
// If we are checking this before cmp stride optimization, check if it's
// comparing against a already legal immediate.
Value *RHS = Cond->getOperand(1);
ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS);
if (!L->isLoopInvariant(RHS) ||
(RHSC && RHSC->isZero()) ||
(RHSC && TLI && TLI->isLegalICmpImmediate(RHSC->getSExtValue())))
return false;
// Make sure the IV is only used for counting. Value may be preinc or
// postinc; 2 uses in either case.
if (!CondOp0->hasNUses(2))
return false;
return true;
}
/// OptimizeLoopTermCond - Change loop terminating condition to use the
/// postinc iv when possible.
void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
Evan Cheng
committed
BasicBlock *LatchBlock = L->getLoopLatch();
bool LatchExit = L->isLoopExiting(LatchBlock);
Evan Cheng
committed
SmallVector<BasicBlock*, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
Evan Cheng
committed
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
BasicBlock *ExitingBlock = ExitingBlocks[i];
// Finally, get the terminating condition for the loop if possible. If we
Evan Cheng
committed
// can, we want to change it to use a post-incremented version of its
// induction variable, to allow coalescing the live ranges for the IV into
// one register value.
Evan Cheng
committed
BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
if (!TermBr)
continue;
// FIXME: Overly conservative, termination condition could be an 'or' etc..
if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
continue;
// Search IVUsesByStride to find Cond's IVUse if there is one.
IVStrideUse *CondUse = 0;
const SCEV *CondStride = 0;
ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
if (!FindIVUserForCond(Cond, CondUse, CondStride))
continue;
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
// If the latch block is exiting and it's not a single block loop, it's
// not safe to use postinc iv in other exiting blocks. FIXME: overly
// conservative? How about icmp stride optimization?
bool UsePostInc = !(e > 1 && LatchExit && ExitingBlock != LatchBlock);
if (UsePostInc && ExitingBlock != LatchBlock) {
if (!Cond->hasOneUse())
// See below, we don't want the condition to be cloned.
UsePostInc = false;
else {
// If exiting block is the latch block, we know it's safe and profitable
// to transform the icmp to use post-inc iv. Otherwise do so only if it
// would not reuse another iv and its iv would be reused by other uses.
// We are optimizing for the case where the icmp is the only use of the
// iv.
IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[CondStride];
for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
E = StrideUses.Users.end(); I != E; ++I) {
if (I->getUser() == Cond)
continue;
if (!I->isUseOfPostIncrementedValue()) {
UsePostInc = false;
break;
}
}
}
// If iv for the stride might be shared and any of the users use pre-inc
// iv might be used, then it's not safe to use post-inc iv.
if (UsePostInc &&
isa<SCEVConstant>(CondStride) &&
StrideMightBeShared(CondStride, L, true))
UsePostInc = false;
}
// If the trip count is computed in terms of a max (due to ScalarEvolution
// being unable to find a sufficient guard, for example), change the loop
// comparison to use SLT or ULT instead of NE.
Cond = OptimizeMax(L, Cond, CondUse);
// If possible, change stride and operands of the compare instruction to
// eliminate one stride. However, avoid rewriting the compare instruction
// with an iv of new stride if it's likely the new stride uses will be
// rewritten using the stride of the compare instruction.
if (ExitingBlock == LatchBlock && isa<SCEVConstant>(CondStride)) {
// If the condition stride is a constant and it's the only use, we might
// want to optimize it first by turning it to count toward zero.
if (!StrideMightBeShared(CondStride, L, false) &&
!ShouldCountToZero(Cond, CondUse, SE, L, TLI))
Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
Evan Cheng
committed
}
if (!UsePostInc)
continue;
Evan Cheng
committed
// It's possible for the setcc instruction to be anywhere in the loop, and
// possible for it to have multiple users. If it is not immediately before
// the exiting block branch, move it.
if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) {
Evan Cheng
committed
if (Cond->hasOneUse()) { // Condition has a single use, just move it.
Cond->moveBefore(TermBr);
} else {
// Otherwise, clone the terminating condition and insert into the
// loopend.
Cond = cast<ICmpInst>(Cond->clone());
Cond->setName(L->getHeader()->getName() + ".termcond");
ExitingBlock->getInstList().insert(TermBr, Cond);
// Clone the IVUse, as the old use still exists!
IU->IVUsesByStride[CondStride]->addUser(CondUse->getOffset(), Cond,
CondUse->getOperandValToReplace());
CondUse = &IU->IVUsesByStride[CondStride]->Users.back();
Evan Cheng
committed
}
}
Evan Cheng
committed
// If we get to here, we know that we can transform the setcc instruction to
// use the post-incremented version of the IV, allowing us to coalesce the
// live ranges for the IV correctly.
CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), CondStride));
Evan Cheng
committed
CondUse->setIsUseOfPostIncrementedValue(true);
Changed = true;
Evan Cheng
committed
}
bool LoopStrengthReduce::OptimizeLoopCountIVOfStride(const SCEV* &Stride,
IVStrideUse* &CondUse,
Loop *L) {
// If the only use is an icmp of a loop exiting conditional branch, then
// attempt the optimization.
BasedUser User = BasedUser(*CondUse, SE);
assert(isa<ICmpInst>(User.Inst) && "Expecting an ICMPInst!");
ICmpInst *Cond = cast<ICmpInst>(User.Inst);
// Less strict check now that compare stride optimization is done.
if (!ShouldCountToZero(Cond, CondUse, SE, L))
return false;
Value *CondOp0 = Cond->getOperand(0);
PHINode *PHIExpr = dyn_cast<PHINode>(CondOp0);
Instruction *Incr;
if (!PHIExpr) {
// Value tested is postinc. Find the phi node.
Incr = dyn_cast<BinaryOperator>(CondOp0);
// FIXME: Just use User.OperandValToReplace here?
if (!Incr || Incr->getOpcode() != Instruction::Add)
return false;
PHIExpr = dyn_cast<PHINode>(Incr->getOperand(0));
if (!PHIExpr)
return false;
// 1 use for preinc value, the increment.
if (!PHIExpr->hasOneUse())
return false;
} else {
assert(isa<PHINode>(CondOp0) &&
"Unexpected loop exiting counting instruction sequence!");
PHIExpr = cast<PHINode>(CondOp0);
// Value tested is preinc. Find the increment.
// A CmpInst is not a BinaryOperator; we depend on this.
Instruction::use_iterator UI = PHIExpr->use_begin();
Incr = dyn_cast<BinaryOperator>(UI);
if (!Incr)
Incr = dyn_cast<BinaryOperator>(++UI);
// One use for postinc value, the phi. Unnecessarily conservative?
if (!Incr || !Incr->hasOneUse() || Incr->getOpcode() != Instruction::Add)
return false;
}
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
// Replace the increment with a decrement.
DEBUG(dbgs() << "LSR: Examining use ");
DEBUG(WriteAsOperand(dbgs(), CondOp0, /*PrintType=*/false));
DEBUG(dbgs() << " in Inst: " << *Cond << '\n');
BinaryOperator *Decr = BinaryOperator::Create(Instruction::Sub,
Incr->getOperand(0), Incr->getOperand(1), "tmp", Incr);
Incr->replaceAllUsesWith(Decr);
Incr->eraseFromParent();
// Substitute endval-startval for the original startval, and 0 for the
// original endval. Since we're only testing for equality this is OK even
// if the computation wraps around.
BasicBlock *Preheader = L->getLoopPreheader();
Instruction *PreInsertPt = Preheader->getTerminator();
unsigned InBlock = L->contains(PHIExpr->getIncomingBlock(0)) ? 1 : 0;
Value *StartVal = PHIExpr->getIncomingValue(InBlock);
Value *EndVal = Cond->getOperand(1);
DEBUG(dbgs() << " Optimize loop counting iv to count down ["
<< *EndVal << " .. " << *StartVal << "]\n");
// FIXME: check for case where both are constant.
Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);
BinaryOperator *NewStartVal = BinaryOperator::Create(Instruction::Sub,
EndVal, StartVal, "tmp", PreInsertPt);
PHIExpr->setIncomingValue(InBlock, NewStartVal);
Cond->setOperand(1, Zero);
DEBUG(dbgs() << " New icmp: " << *Cond << "\n");
int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue();
const SCEV *NewStride = 0;
bool Found = false;
for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
const SCEV *OldStride = IU->StrideOrder[i];
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OldStride))
if (SC->getValue()->getSExtValue() == -SInt) {
Found = true;
NewStride = OldStride;
break;
}
}
if (!Found)
NewStride = SE->getIntegerSCEV(-SInt, Stride->getType());
IU->AddUser(NewStride, CondUse->getOffset(), Cond, Cond->getOperand(0));
IU->IVUsesByStride[Stride]->removeUser(CondUse);
CondUse = &IU->IVUsesByStride[NewStride]->Users.back();
Stride = NewStride;
}
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
/// when to exit the loop is used only for that purpose, try to rearrange things
/// so it counts down to a test against zero.
bool LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
bool ThisChanged = false;
for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
const SCEV *Stride = IU->StrideOrder[i];
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
IU->IVUsesByStride.find(Stride);
assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
// FIXME: Generalize to non-affine IV's.
if (!SI->first->isLoopInvariant(L))
continue;
// If stride is a constant and it has an icmpinst use, check if we can
// optimize the loop to count down.
if (isa<SCEVConstant>(Stride) && SI->second->Users.size() == 1) {
Instruction *User = SI->second->Users.begin()->getUser();
if (!isa<ICmpInst>(User))
continue;
const SCEV *CondStride = Stride;
IVStrideUse *Use = &*SI->second->Users.begin();
if (!OptimizeLoopCountIVOfStride(CondStride, Use, L))
continue;
ThisChanged = true;
// Now check if it's possible to reuse this iv for other stride uses.
for (unsigned j = 0, ee = IU->StrideOrder.size(); j != ee; ++j) {
const SCEV *SStride = IU->StrideOrder[j];
if (SStride == CondStride)
continue;
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SII =
IU->IVUsesByStride.find(SStride);
assert(SII != IU->IVUsesByStride.end() && "Stride doesn't exist!");
// FIXME: Generalize to non-affine IV's.
if (!SII->first->isLoopInvariant(L))
continue;
// FIXME: Rewrite other stride using CondStride.
}
}
Evan Cheng
committed
Changed |= ThisChanged;
return ThisChanged;
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
IU = &getAnalysis<IVUsers>();
SE = &getAnalysis<ScalarEvolution>();
Changed = false;
// If LoopSimplify form is not available, stay out of trouble.
if (!L->getLoopPreheader() || !L->getLoopLatch())
return false;
if (!IU->IVUsesByStride.empty()) {
DEBUG(dbgs() << "\nLSR on \"" << L->getHeader()->getParent()->getName()
<< "\" ";
L->print(dbgs()));
// Sort the StrideOrder so we process larger strides first.
std::stable_sort(IU->StrideOrder.begin(), IU->StrideOrder.end(),
StrideCompare(SE));
// Optimize induction variables. Some indvar uses can be transformed to use
// strides that will be needed for other purposes. A common example of this
// is the exit test for the loop, which can often be rewritten to use the
// computation of some other indvar to decide when to terminate the loop.
OptimizeIndvars(L);
Evan Cheng
committed
// Change loop terminating condition to use the postinc iv when possible
// and optimize loop terminating compare. FIXME: Move this after
// StrengthReduceIVUsersOfStride?
OptimizeLoopTermCond(L);
// FIXME: We can shrink overlarge IV's here. e.g. if the code has
// computation in i64 values and the target doesn't support i64, demote
// the computation to 32-bit if safe.
// FIXME: Attempt to reuse values across multiple IV's. In particular, we
// could have something like "for(i) { foo(i*8); bar(i*16) }", which should
// be codegened as "for (j = 0;; j+=8) { foo(j); bar(j+j); }" on X86/PPC.
// Need to be careful that IV's are all the same type. Only works for
// intptr_t indvars.
// IVsByStride keeps IVs for one particular loop.
assert(IVsByStride.empty() && "Stale entries in IVsByStride?");
StrengthReduceIVUsers(L);
// After all sharing is done, see if we can adjust the loop to test against
// zero instead of counting up to a maximum. This is usually faster.
OptimizeLoopCountIV(L);
// We're done analyzing this loop; release all the state we built up for it.
IVsByStride.clear();
// Clean up after ourselves
DeleteTriviallyDeadInstructions();
// At this point, it is worth checking to see if any recurrence PHIs are also
// dead, so that we can remove them as well.
Changed |= DeleteDeadPHIs(L->getHeader());