Newer
Older
if (SE->getEffectiveSCEVType(Op->getType()) !=
SE->getEffectiveSCEVType(SI->first->getType())) {
CastInst *CI = dyn_cast<CastInst>(Op);
// If it's not a simple cast, it's complicated.
if (!CI)
continue;
// If it's a cast from a type other than the stride type,
// it's complicated.
if (CI->getOperand(0)->getType() != SI->first->getType())
continue;
// Ok, we found the IV expression in the stride's type.
Op = CI->getOperand(0);
}
NewCmpLHS = Op;
if (NewCmpLHS->getType() == CmpTy)
break;
}
if (!NewCmpLHS)
continue;
NewCmpTy = NewCmpLHS->getType();
NewTyBits = SE->getTypeSizeInBits(NewCmpTy);
const Type *NewCmpIntTy = IntegerType::get(Cond->getContext(), NewTyBits);
if (RequiresTypeConversion(NewCmpTy, CmpTy)) {
// Check if it is possible to rewrite it using
// an iv / stride of a smaller integer type.
unsigned Bits = NewTyBits;
if (ICmpInst::isSigned(Predicate))
--Bits;
uint64_t Mask = (1ULL << Bits) - 1;
if (((uint64_t)NewCmpVal & Mask) != (uint64_t)NewCmpVal)
continue;
}
// Don't rewrite if use offset is non-constant and the new type is
// of a different type.
// FIXME: too conservative?
if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->getOffset()))
Evan Cheng
committed
if (!PostPass) {
bool AllUsesAreAddresses = true;
bool AllUsesAreOutsideLoop = true;
std::vector<BasedUser> UsersToProcess;
const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
AllUsesAreAddresses,
AllUsesAreOutsideLoop,
UsersToProcess);
// Avoid rewriting the compare instruction with an iv of new stride
// if it's likely the new stride uses will be rewritten using the
// stride of the compare instruction.
if (AllUsesAreAddresses &&
ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
continue;
}
// Avoid rewriting the compare instruction with an iv which has
// implicit extension or truncation built into it.
// TODO: This is over-conservative.
if (SE->getTypeSizeInBits(CondUse->getOffset()->getType()) != TyBits)
continue;
// If scale is negative, use swapped predicate unless it's testing
// for equality.
if (Scale < 0 && !Cond->isEquality())
Predicate = ICmpInst::getSwappedPredicate(Predicate);
Evan Cheng
committed
NewStride = IU->StrideOrder[i];
if (!isa<PointerType>(NewCmpTy))
Owen Anderson
committed
NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal);
Owen Anderson
committed
Constant *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal);
NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy);
}
NewOffset = TyBits == NewTyBits
? SE->getMulExpr(CondUse->getOffset(),
SE->getConstant(CmpTy, Scale))
: SE->getConstant(NewCmpIntTy,
cast<SCEVConstant>(CondUse->getOffset())->getValue()
->getSExtValue()*Scale);
}
}
// Forgo this transformation if it the increment happens to be
// unfortunately positioned after the condition, and the condition
// has multiple uses which prevent it from being moved immediately
// before the branch. See
// test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-*.ll
// for an example of this situation.
if (!Cond->hasOneUse()) {
for (BasicBlock::iterator I = Cond, E = Cond->getParent()->end();
I != E; ++I)
if (I == NewCmpLHS)
return Cond;
if (NewCmpRHS) {
// Create a new compare instruction using new stride / iv.
ICmpInst *OldCond = Cond;
Owen Anderson
committed
Cond = new ICmpInst(OldCond, Predicate, NewCmpLHS, NewCmpRHS,
L->getHeader()->getName() + ".termcond");
Evan Cheng
committed
DEBUG(errs() << " Change compare stride in Inst " << *OldCond);
DEBUG(errs() << " to " << *Cond << '\n');
// Remove the old compare instruction. The old indvar is probably dead too.
DeadInsts.push_back(CondUse->getOperandValToReplace());
OldCond->replaceAllUsesWith(Cond);
OldCond->eraseFromParent();
Evan Cheng
committed
IU->IVUsesByStride[NewStride]->addUser(NewOffset, Cond, NewCmpLHS);
CondUse = &IU->IVUsesByStride[NewStride]->Users.back();
CondStride = NewStride;
++NumEliminated;
Changed = true;
}
return Cond;
}
/// OptimizeMax - Rewrite the loop's terminating condition if it uses
/// a max computation.
///
/// This is a narrow solution to a specific, but acute, problem. For loops
/// like this:
///
/// i = 0;
/// do {
/// p[i] = 0.0;
/// } while (++i < n);
///
/// the trip count isn't just 'n', because 'n' might not be positive. And
/// unfortunately this can come up even for loops where the user didn't use
/// a C do-while loop. For example, seemingly well-behaved top-test loops
/// will commonly be lowered like this:
//
/// if (n > 0) {
/// i = 0;
/// do {
/// p[i] = 0.0;
/// } while (++i < n);
/// }
///
/// and then it's possible for subsequent optimization to obscure the if
/// test in such a way that indvars can't find it.
///
/// When indvars can't find the if test in loops like this, it creates a
/// max expression, which allows it to give the loop a canonical
/// induction variable:
///
/// i = 0;
/// max = n < 1 ? 1 : n;
/// do {
/// p[i] = 0.0;
/// } while (++i != max);
///
/// Canonical induction variables are necessary because the loop passes
/// are designed around them. The most obvious example of this is the
/// LoopInfo analysis, which doesn't remember trip count values. It
/// expects to be able to rediscover the trip count each time it is
/// needed, and it does this using a simple analyis that only succeeds if
/// the loop has a canonical induction variable.
///
/// However, when it comes time to generate code, the maximum operation
/// can be quite costly, especially if it's inside of an outer loop.
///
/// This function solves this problem by detecting this type of loop and
/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
/// the instructions for the maximum computation.
///
ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
IVStrideUse* &CondUse) {
// Check that the loop matches the pattern we're looking for.
if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
Cond->getPredicate() != CmpInst::ICMP_NE)
return Cond;
SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
if (!Sel || !Sel->hasOneUse()) return Cond;
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
return Cond;
const SCEV *One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());
// Add one to the backedge-taken count to get the trip count.
const SCEV *IterationCount = SE->getAddExpr(BackedgeTakenCount, One);
// Check for a max calculation that matches the pattern.
if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount))
return Cond;
const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount);
if (Max != SE->getSCEV(Sel)) return Cond;
// To handle a max with more than two operands, this optimization would
// require additional checking and setup.
if (Max->getNumOperands() != 2)
return Cond;
const SCEV *MaxLHS = Max->getOperand(0);
const SCEV *MaxRHS = Max->getOperand(1);
if (!MaxLHS || MaxLHS != One) return Cond;
// Check the relevant induction variable for conformance to
// the pattern.
const SCEV *IV = SE->getSCEV(Cond->getOperand(0));
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
if (!AR || !AR->isAffine() ||
AR->getStart() != One ||
AR->getStepRecurrence(*SE) != One)
return Cond;
assert(AR->getLoop() == L &&
"Loop condition operand is an addrec in a different loop!");
// Check the right operand of the select, and remember it, as it will
// be used in the new comparison instruction.
Value *NewRHS = 0;
if (SE->getSCEV(Sel->getOperand(1)) == MaxRHS)
NewRHS = Sel->getOperand(1);
else if (SE->getSCEV(Sel->getOperand(2)) == MaxRHS)
NewRHS = Sel->getOperand(2);
if (!NewRHS) return Cond;
// Determine the new comparison opcode. It may be signed or unsigned,
// and the original comparison may be either equality or inequality.
CmpInst::Predicate Pred =
isa<SCEVSMaxExpr>(Max) ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
if (Cond->getPredicate() == CmpInst::ICMP_EQ)
Pred = CmpInst::getInversePredicate(Pred);
// Ok, everything looks ok to change the condition into an SLT or SGE and
// delete the max calculation.
ICmpInst *NewCond =
Owen Anderson
committed
new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
// Delete the max calculation instructions.
Cond->replaceAllUsesWith(NewCond);
CondUse->setUser(NewCond);
Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
Cond->eraseFromParent();
Sel->eraseFromParent();
Cmp->eraseFromParent();
return NewCond;
}
Devang Patel
committed
/// OptimizeShadowIV - If IV is used in a int-to-float cast
/// inside the loop then try to eliminate the cast opeation.
void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
Devang Patel
committed
return;
for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e;
Devang Patel
committed
++Stride) {
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
Devang Patel
committed
if (!isa<SCEVConstant>(SI->first))
continue;
for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
E = SI->second->Users.end(); UI != E; /* empty */) {
ilist<IVStrideUse>::iterator CandidateUI = UI;
++UI;
Instruction *ShadowUse = CandidateUI->getUser();
Devang Patel
committed
const Type *DestTy = NULL;
/* If shadow use is a int->float cast then insert a second IV
to eliminate this cast.
Devang Patel
committed
for (unsigned i = 0; i < n; ++i)
foo((double)i);
is transformed into
Devang Patel
committed
double d = 0.0;
for (unsigned i = 0; i < n; ++i, ++d)
foo(d);
*/
if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
Devang Patel
committed
DestTy = UCast->getDestTy();
else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser()))
Devang Patel
committed
DestTy = SCast->getDestTy();
Devang Patel
committed
if (!DestTy) continue;
if (TLI) {
Evan Cheng
committed
// If target does not support DestTy natively then do not apply
// this transformation.
Owen Anderson
committed
EVT DVT = TLI->getValueType(DestTy);
Devang Patel
committed
if (!TLI->isTypeLegal(DVT)) continue;
}
Devang Patel
committed
PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
if (!PH) continue;
if (PH->getNumIncomingValues() != 2) continue;
const Type *SrcTy = PH->getType();
int Mantissa = DestTy->getFPMantissaWidth();
if (Mantissa == -1) continue;
if ((int)SE->getTypeSizeInBits(SrcTy) > Mantissa)
Devang Patel
committed
continue;
unsigned Entry, Latch;
if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
Entry = 0;
Latch = 1;
} else {
Entry = 1;
Latch = 0;
}
ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
if (!Init) continue;
Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
Devang Patel
committed
BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
if (!Incr) continue;
if (Incr->getOpcode() != Instruction::Add
&& Incr->getOpcode() != Instruction::Sub)
continue;
/* Initialize new IV, double d = 0.0 in above example. */
ConstantInt *C = NULL;
if (Incr->getOperand(0) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(1));
else if (Incr->getOperand(1) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(0));
else
continue;
if (!C) continue;
// Ignore negative constants, as the code below doesn't handle them
// correctly. TODO: Remove this restriction.
if (!C->getValue().isStrictlyPositive()) continue;
Devang Patel
committed
/* Add new PHINode. */
PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
/* create new increment. '++d' in above example. */
Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
Devang Patel
committed
BinaryOperator *NewIncr =
BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
Instruction::FAdd : Instruction::FSub,
Devang Patel
committed
NewPH, CFP, "IV.S.next.", Incr);
NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
/* Remove cast operation */
ShadowUse->replaceAllUsesWith(NewPH);
ShadowUse->eraseFromParent();
NumShadow++;
break;
}
}
}
/// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar
/// uses in the loop, look to see if we can eliminate some, in favor of using
/// common indvars for the different uses.
void LoopStrengthReduce::OptimizeIndvars(Loop *L) {
// TODO: implement optzns here.
Devang Patel
committed
OptimizeShadowIV(L);
Evan Cheng
committed
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
bool LoopStrengthReduce::StrideMightBeShared(const SCEV* Stride, Loop *L,
bool CheckPreInc) {
int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue();
for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
IU->IVUsesByStride.find(IU->StrideOrder[i]);
const SCEV *Share = SI->first;
if (!isa<SCEVConstant>(SI->first) || Share == Stride)
continue;
int64_t SSInt = cast<SCEVConstant>(Share)->getValue()->getSExtValue();
if (SSInt == SInt)
return true; // This can definitely be reused.
if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0)
continue;
int64_t Scale = SSInt / SInt;
bool AllUsesAreAddresses = true;
bool AllUsesAreOutsideLoop = true;
std::vector<BasedUser> UsersToProcess;
const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
AllUsesAreAddresses,
AllUsesAreOutsideLoop,
UsersToProcess);
if (AllUsesAreAddresses &&
ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) {
if (!CheckPreInc)
return true;
// Any pre-inc iv use?
IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[Share];
for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
E = StrideUses.Users.end(); I != E; ++I) {
if (!I->isUseOfPostIncrementedValue())
return true;
}
}
}
return false;
}
/// isUsedByExitBranch - Return true if icmp is used by a loop terminating
/// conditional branch or it's and / or with other conditions before being used
/// as the condition.
static bool isUsedByExitBranch(ICmpInst *Cond, Loop *L) {
BasicBlock *CondBB = Cond->getParent();
if (!L->isLoopExiting(CondBB))
return false;
BranchInst *TermBr = dyn_cast<BranchInst>(CondBB->getTerminator());
if (!TermBr || !TermBr->isConditional())
return false;
Value *User = *Cond->use_begin();
Instruction *UserInst = dyn_cast<Instruction>(User);
while (UserInst &&
(UserInst->getOpcode() == Instruction::And ||
UserInst->getOpcode() == Instruction::Or)) {
if (!UserInst->hasOneUse() || UserInst->getParent() != CondBB)
return false;
User = *User->use_begin();
UserInst = dyn_cast<Instruction>(User);
}
return User == TermBr;
}
static bool ShouldCountToZero(ICmpInst *Cond, IVStrideUse* &CondUse,
ScalarEvolution *SE, Loop *L,
const TargetLowering *TLI = 0) {
if (!L->contains(Cond->getParent()))
return false;
if (!isa<SCEVConstant>(CondUse->getOffset()))
return false;
// Handle only tests for equality for the moment.
if (!Cond->isEquality() || !Cond->hasOneUse())
return false;
if (!isUsedByExitBranch(Cond, L))
return false;
Value *CondOp0 = Cond->getOperand(0);
const SCEV *IV = SE->getSCEV(CondOp0);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
if (!AR || !AR->isAffine())
return false;
const SCEVConstant *SC = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
if (!SC || SC->getValue()->getSExtValue() < 0)
// If it's already counting down, don't do anything.
return false;
// If the RHS of the comparison is not an loop invariant, the rewrite
// cannot be done. Also bail out if it's already comparing against a zero.
// If we are checking this before cmp stride optimization, check if it's
// comparing against a already legal immediate.
Value *RHS = Cond->getOperand(1);
ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS);
if (!L->isLoopInvariant(RHS) ||
(RHSC && RHSC->isZero()) ||
(RHSC && TLI && TLI->isLegalICmpImmediate(RHSC->getSExtValue())))
return false;
// Make sure the IV is only used for counting. Value may be preinc or
// postinc; 2 uses in either case.
if (!CondOp0->hasNUses(2))
return false;
return true;
}
/// OptimizeLoopTermCond - Change loop terminating condition to use the
/// postinc iv when possible.
void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
// Finally, get the terminating condition for the loop if possible. If we
// can, we want to change it to use a post-incremented version of its
// induction variable, to allow coalescing the live ranges for the IV into
// one register value.
Evan Cheng
committed
BasicBlock *LatchBlock = L->getLoopLatch();
BasicBlock *ExitingBlock = L->getExitingBlock();
if (!ExitingBlock)
Evan Cheng
committed
// Multiple exits, just look at the exit in the latch block if there is one.
ExitingBlock = LatchBlock;
BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
Evan Cheng
committed
if (!TermBr)
return;
if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
return;
// Search IVUsesByStride to find Cond's IVUse if there is one.
IVStrideUse *CondUse = 0;
Evan Cheng
committed
const SCEV *CondStride = 0;
Evan Cheng
committed
ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
if (!FindIVUserForCond(Cond, CondUse, CondStride))
Evan Cheng
committed
return;
Evan Cheng
committed
bool UsePostInc = true;
if (ExitingBlock != LatchBlock) {
Evan Cheng
committed
if (Cond->hasOneUse()) {
Evan Cheng
committed
// See below, we don't want the condition to be cloned.
Evan Cheng
committed
// If exiting block is the latch block, we know it's safe and profitable
// to transform the icmp to use post-inc iv. Otherwise do so only if it
// would not reuse another iv and its iv would be reused by other uses.
// We are optimizing for the case where the icmp is the only use of the
// iv.
IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[CondStride];
for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
E = StrideUses.Users.end(); I != E; ++I) {
if (I->getUser() == Cond)
Evan Cheng
committed
continue;
Evan Cheng
committed
if (!I->isUseOfPostIncrementedValue()) {
UsePostInc = false;
break;
}
Evan Cheng
committed
}
}
Evan Cheng
committed
// If iv for the stride might be shared and any of the users use pre-inc iv
// might be used, then it's not safe to use post-inc iv.
if (UsePostInc &&
isa<SCEVConstant>(CondStride) &&
StrideMightBeShared(CondStride, L, true))
UsePostInc = false;
Evan Cheng
committed
}
// If the trip count is computed in terms of a max (due to ScalarEvolution
// being unable to find a sufficient guard, for example), change the loop
// comparison to use SLT or ULT instead of NE.
Cond = OptimizeMax(L, Cond, CondUse);
// If possible, change stride and operands of the compare instruction to
Evan Cheng
committed
// eliminate one stride. However, avoid rewriting the compare instruction with
// an iv of new stride if it's likely the new stride uses will be rewritten
// using the stride of the compare instruction.
if (ExitingBlock == LatchBlock && isa<SCEVConstant>(CondStride)) {
// If the condition stride is a constant and it's the only use, we might
// want to optimize it first by turning it to count toward zero.
if (!StrideMightBeShared(CondStride, L, false) &&
!ShouldCountToZero(Cond, CondUse, SE, L, TLI))
Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
}
if (!UsePostInc)
return;
// It's possible for the setcc instruction to be anywhere in the loop, and
// possible for it to have multiple users. If it is not immediately before
// the latch block branch, move it.
if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) {
if (Cond->hasOneUse()) { // Condition has a single use, just move it.
Cond->moveBefore(TermBr);
} else {
// Otherwise, clone the terminating condition and insert into the loopend.
Cond = cast<ICmpInst>(Cond->clone());
Cond->setName(L->getHeader()->getName() + ".termcond");
LatchBlock->getInstList().insert(TermBr, Cond);
// Clone the IVUse, as the old use still exists!
Evan Cheng
committed
IU->IVUsesByStride[CondStride]->addUser(CondUse->getOffset(), Cond,
CondUse->getOperandValToReplace());
Evan Cheng
committed
CondUse = &IU->IVUsesByStride[CondStride]->Users.back();
}
}
// If we get to here, we know that we can transform the setcc instruction to
// use the post-incremented version of the IV, allowing us to coalesce the
// live ranges for the IV correctly.
Evan Cheng
committed
CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), CondStride));
CondUse->setIsUseOfPostIncrementedValue(true);
Evan Cheng
committed
++NumLoopCond;
}
Evan Cheng
committed
bool LoopStrengthReduce::OptimizeLoopCountIVOfStride(const SCEV* &Stride,
IVStrideUse* &CondUse,
Loop *L) {
// If the only use is an icmp of an loop exiting conditional branch, then
// attempts the optimization.
Evan Cheng
committed
BasedUser User = BasedUser(*CondUse, SE);
assert(isa<ICmpInst>(User.Inst) && "Expecting an ICMPInst!");
ICmpInst *Cond = cast<ICmpInst>(User.Inst);
Evan Cheng
committed
// Less strict check now that compare stride optimization is done.
if (!ShouldCountToZero(Cond, CondUse, SE, L))
return false;
Evan Cheng
committed
Value *CondOp0 = Cond->getOperand(0);
PHINode *PHIExpr = dyn_cast<PHINode>(CondOp0);
Instruction *Incr;
Evan Cheng
committed
if (!PHIExpr) {
// Value tested is postinc. Find the phi node.
Incr = dyn_cast<BinaryOperator>(CondOp0);
Evan Cheng
committed
// FIXME: Just use User.OperandValToReplace here?
if (!Incr || Incr->getOpcode() != Instruction::Add)
Evan Cheng
committed
return false;
Evan Cheng
committed
PHIExpr = dyn_cast<PHINode>(Incr->getOperand(0));
if (!PHIExpr)
Evan Cheng
committed
return false;
// 1 use for preinc value, the increment.
Evan Cheng
committed
if (!PHIExpr->hasOneUse())
return false;
} else {
assert(isa<PHINode>(CondOp0) &&
"Unexpected loop exiting counting instruction sequence!");
PHIExpr = cast<PHINode>(CondOp0);
// Value tested is preinc. Find the increment.
// A CmpInst is not a BinaryOperator; we depend on this.
Instruction::use_iterator UI = PHIExpr->use_begin();
Incr = dyn_cast<BinaryOperator>(UI);
if (!Incr)
Incr = dyn_cast<BinaryOperator>(++UI);
// One use for postinc value, the phi. Unnecessarily conservative?
if (!Incr || !Incr->hasOneUse() || Incr->getOpcode() != Instruction::Add)
Evan Cheng
committed
return false;
}
// Replace the increment with a decrement.
Evan Cheng
committed
DEBUG(errs() << "LSR: Examining use ");
DEBUG(WriteAsOperand(errs(), CondOp0, /*PrintType=*/false));
Evan Cheng
committed
DEBUG(errs() << " in Inst: " << *Cond << '\n');
BinaryOperator *Decr = BinaryOperator::Create(Instruction::Sub,
Incr->getOperand(0), Incr->getOperand(1), "tmp", Incr);
Incr->replaceAllUsesWith(Decr);
Incr->eraseFromParent();
// Substitute endval-startval for the original startval, and 0 for the
// original endval. Since we're only testing for equality this is OK even
// if the computation wraps around.
BasicBlock *Preheader = L->getLoopPreheader();
Instruction *PreInsertPt = Preheader->getTerminator();
unsigned InBlock = L->contains(PHIExpr->getIncomingBlock(0)) ? 1 : 0;
Value *StartVal = PHIExpr->getIncomingValue(InBlock);
Value *EndVal = Cond->getOperand(1);
DEBUG(errs() << " Optimize loop counting iv to count down ["
<< *EndVal << " .. " << *StartVal << "]\n");
// FIXME: check for case where both are constant.
Owen Anderson
committed
Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);
BinaryOperator *NewStartVal = BinaryOperator::Create(Instruction::Sub,
EndVal, StartVal, "tmp", PreInsertPt);
PHIExpr->setIncomingValue(InBlock, NewStartVal);
Cond->setOperand(1, Zero);
DEBUG(errs() << " New icmp: " << *Cond << "\n");
Evan Cheng
committed
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue();
const SCEV *NewStride = 0;
bool Found = false;
for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
const SCEV *OldStride = IU->StrideOrder[i];
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OldStride))
if (SC->getValue()->getSExtValue() == -SInt) {
Found = true;
NewStride = OldStride;
break;
}
}
if (!Found)
NewStride = SE->getIntegerSCEV(-SInt, Stride->getType());
IU->AddUser(NewStride, CondUse->getOffset(), Cond, Cond->getOperand(0));
IU->IVUsesByStride[Stride]->removeUser(CondUse);
CondUse = &IU->IVUsesByStride[NewStride]->Users.back();
Stride = NewStride;
++NumCountZero;
Evan Cheng
committed
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
return true;
}
/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
/// when to exit the loop is used only for that purpose, try to rearrange things
/// so it counts down to a test against zero.
bool LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
bool ThisChanged = false;
for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
const SCEV *Stride = IU->StrideOrder[i];
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
IU->IVUsesByStride.find(Stride);
assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
// FIXME: Generalize to non-affine IV's.
if (!SI->first->isLoopInvariant(L))
continue;
// If stride is a constant and it has an icmpinst use, check if we can
// optimize the loop to count down.
if (isa<SCEVConstant>(Stride) && SI->second->Users.size() == 1) {
Instruction *User = SI->second->Users.begin()->getUser();
if (!isa<ICmpInst>(User))
continue;
const SCEV *CondStride = Stride;
IVStrideUse *Use = &*SI->second->Users.begin();
if (!OptimizeLoopCountIVOfStride(CondStride, Use, L))
continue;
ThisChanged = true;
// Now check if it's possible to reuse this iv for other stride uses.
for (unsigned j = 0, ee = IU->StrideOrder.size(); j != ee; ++j) {
const SCEV *SStride = IU->StrideOrder[j];
if (SStride == CondStride)
continue;
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SII =
IU->IVUsesByStride.find(SStride);
assert(SII != IU->IVUsesByStride.end() && "Stride doesn't exist!");
// FIXME: Generalize to non-affine IV's.
if (!SII->first->isLoopInvariant(L))
continue;
// FIXME: Rewrite other stride using CondStride.
}
}
}
Changed |= ThisChanged;
return ThisChanged;
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
IU = &getAnalysis<IVUsers>();
DT = &getAnalysis<DominatorTree>();
SE = &getAnalysis<ScalarEvolution>();
// If LoopSimplify form is not available, stay out of trouble.
if (!L->getLoopPreheader() || !L->getLoopLatch())
return false;
if (!IU->IVUsesByStride.empty()) {
DEBUG(errs() << "\nLSR on \"" << L->getHeader()->getParent()->getName()
<< "\" ";
L->dump());
// Sort the StrideOrder so we process larger strides first.
std::stable_sort(IU->StrideOrder.begin(), IU->StrideOrder.end(),
StrideCompare(SE));
// Optimize induction variables. Some indvar uses can be transformed to use
// strides that will be needed for other purposes. A common example of this
// is the exit test for the loop, which can often be rewritten to use the
// computation of some other indvar to decide when to terminate the loop.
OptimizeIndvars(L);
Evan Cheng
committed
// Change loop terminating condition to use the postinc iv when possible
// and optimize loop terminating compare. FIXME: Move this after
Evan Cheng
committed
// StrengthReduceIVUsersOfStride?
Evan Cheng
committed
OptimizeLoopTermCond(L);
// FIXME: We can shrink overlarge IV's here. e.g. if the code has
// computation in i64 values and the target doesn't support i64, demote
// the computation to 32-bit if safe.
// FIXME: Attempt to reuse values across multiple IV's. In particular, we
// could have something like "for(i) { foo(i*8); bar(i*16) }", which should
// be codegened as "for (j = 0;; j+=8) { foo(j); bar(j+j); }" on X86/PPC.
// Need to be careful that IV's are all the same type. Only works for
// intptr_t indvars.
// IVsByStride keeps IVs for one particular loop.
assert(IVsByStride.empty() && "Stale entries in IVsByStride?");
Evan Cheng
committed
StrengthReduceIVUsers(L);
// After all sharing is done, see if we can adjust the loop to test against
// zero instead of counting up to a maximum. This is usually faster.
Evan Cheng
committed
OptimizeLoopCountIV(L);
// We're done analyzing this loop; release all the state we built up for it.
IVsByStride.clear();
Evan Cheng
committed
StrideNoReuse.clear();
// Clean up after ourselves
if (!DeadInsts.empty())
DeleteTriviallyDeadInstructions();
// At this point, it is worth checking to see if any recurrence PHIs are also
// dead, so that we can remove them as well.
DeleteDeadPHIs(L->getHeader());