"git@repo.hca.bsc.es:rferrer/llvm-epi-0.8.git" did not exist on "e5d8d0d64b6cc85e8aaa750cb68e90e672937517"
Newer
Older
}
}
// Forgo this transformation if it the increment happens to be
// unfortunately positioned after the condition, and the condition
// has multiple uses which prevent it from being moved immediately
// before the branch. See
// test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-*.ll
// for an example of this situation.
if (!Cond->hasOneUse()) {
for (BasicBlock::iterator I = Cond, E = Cond->getParent()->end();
I != E; ++I)
if (I == NewCmpLHS)
return Cond;
if (NewCmpRHS) {
// Create a new compare instruction using new stride / iv.
ICmpInst *OldCond = Cond;
Cond = new ICmpInst(Predicate, NewCmpLHS, NewCmpRHS,
L->getHeader()->getName() + ".termcond",
OldCond);
// Remove the old compare instruction. The old indvar is probably dead too.
DeadInsts.push_back(CondUse->getOperandValToReplace());
OldCond->replaceAllUsesWith(Cond);
OldCond->eraseFromParent();
IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS);
CondUse = &IU->IVUsesByStride[*NewStride]->Users.back();
CondStride = NewStride;
++NumEliminated;
Changed = true;
}
return Cond;
}
/// OptimizeMax - Rewrite the loop's terminating condition if it uses
/// a max computation.
///
/// This is a narrow solution to a specific, but acute, problem. For loops
/// like this:
///
/// i = 0;
/// do {
/// p[i] = 0.0;
/// } while (++i < n);
///
/// the trip count isn't just 'n', because 'n' might not be positive. And
/// unfortunately this can come up even for loops where the user didn't use
/// a C do-while loop. For example, seemingly well-behaved top-test loops
/// will commonly be lowered like this:
//
/// if (n > 0) {
/// i = 0;
/// do {
/// p[i] = 0.0;
/// } while (++i < n);
/// }
///
/// and then it's possible for subsequent optimization to obscure the if
/// test in such a way that indvars can't find it.
///
/// When indvars can't find the if test in loops like this, it creates a
/// max expression, which allows it to give the loop a canonical
/// induction variable:
///
/// i = 0;
/// max = n < 1 ? 1 : n;
/// do {
/// p[i] = 0.0;
/// } while (++i != max);
///
/// Canonical induction variables are necessary because the loop passes
/// are designed around them. The most obvious example of this is the
/// LoopInfo analysis, which doesn't remember trip count values. It
/// expects to be able to rediscover the trip count each time it is
/// needed, and it does this using a simple analyis that only succeeds if
/// the loop has a canonical induction variable.
///
/// However, when it comes time to generate code, the maximum operation
/// can be quite costly, especially if it's inside of an outer loop.
///
/// This function solves this problem by detecting this type of loop and
/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
/// the instructions for the maximum computation.
///
ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
IVStrideUse* &CondUse) {
// Check that the loop matches the pattern we're looking for.
if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
Cond->getPredicate() != CmpInst::ICMP_NE)
return Cond;
SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
if (!Sel || !Sel->hasOneUse()) return Cond;
const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
return Cond;
const SCEV* One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());
// Add one to the backedge-taken count to get the trip count.
const SCEV* IterationCount = SE->getAddExpr(BackedgeTakenCount, One);
// Check for a max calculation that matches the pattern.
if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount))
return Cond;
const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount);
if (Max != SE->getSCEV(Sel)) return Cond;
// To handle a max with more than two operands, this optimization would
// require additional checking and setup.
if (Max->getNumOperands() != 2)
return Cond;
const SCEV* MaxLHS = Max->getOperand(0);
const SCEV* MaxRHS = Max->getOperand(1);
if (!MaxLHS || MaxLHS != One) return Cond;
// Check the relevant induction variable for conformance to
// the pattern.
const SCEV* IV = SE->getSCEV(Cond->getOperand(0));
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
if (!AR || !AR->isAffine() ||
AR->getStart() != One ||
AR->getStepRecurrence(*SE) != One)
return Cond;
assert(AR->getLoop() == L &&
"Loop condition operand is an addrec in a different loop!");
// Check the right operand of the select, and remember it, as it will
// be used in the new comparison instruction.
Value *NewRHS = 0;
if (SE->getSCEV(Sel->getOperand(1)) == MaxRHS)
NewRHS = Sel->getOperand(1);
else if (SE->getSCEV(Sel->getOperand(2)) == MaxRHS)
NewRHS = Sel->getOperand(2);
if (!NewRHS) return Cond;
// Determine the new comparison opcode. It may be signed or unsigned,
// and the original comparison may be either equality or inequality.
CmpInst::Predicate Pred =
isa<SCEVSMaxExpr>(Max) ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
if (Cond->getPredicate() == CmpInst::ICMP_EQ)
Pred = CmpInst::getInversePredicate(Pred);
// Ok, everything looks ok to change the condition into an SLT or SGE and
// delete the max calculation.
ICmpInst *NewCond =
new ICmpInst(Pred, Cond->getOperand(0), NewRHS, "scmp", Cond);
// Delete the max calculation instructions.
Cond->replaceAllUsesWith(NewCond);
CondUse->setUser(NewCond);
Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
Cond->eraseFromParent();
Sel->eraseFromParent();
Cmp->eraseFromParent();
return NewCond;
}
Devang Patel
committed
/// OptimizeShadowIV - If IV is used in a int-to-float cast
/// inside the loop then try to eliminate the cast opeation.
void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
Devang Patel
committed
return;
for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e;
Devang Patel
committed
++Stride) {
std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI =
IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
Devang Patel
committed
if (!isa<SCEVConstant>(SI->first))
continue;
for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
E = SI->second->Users.end(); UI != E; /* empty */) {
ilist<IVStrideUse>::iterator CandidateUI = UI;
++UI;
Instruction *ShadowUse = CandidateUI->getUser();
Devang Patel
committed
const Type *DestTy = NULL;
/* If shadow use is a int->float cast then insert a second IV
to eliminate this cast.
Devang Patel
committed
for (unsigned i = 0; i < n; ++i)
foo((double)i);
is transformed into
Devang Patel
committed
double d = 0.0;
for (unsigned i = 0; i < n; ++i, ++d)
foo(d);
*/
if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
Devang Patel
committed
DestTy = UCast->getDestTy();
else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser()))
Devang Patel
committed
DestTy = SCast->getDestTy();
Devang Patel
committed
if (!DestTy) continue;
if (TLI) {
Evan Cheng
committed
// If target does not support DestTy natively then do not apply
// this transformation.
Devang Patel
committed
MVT DVT = TLI->getValueType(DestTy);
if (!TLI->isTypeLegal(DVT)) continue;
}
Devang Patel
committed
PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
if (!PH) continue;
if (PH->getNumIncomingValues() != 2) continue;
const Type *SrcTy = PH->getType();
int Mantissa = DestTy->getFPMantissaWidth();
if (Mantissa == -1) continue;
if ((int)SE->getTypeSizeInBits(SrcTy) > Mantissa)
Devang Patel
committed
continue;
unsigned Entry, Latch;
if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
Entry = 0;
Latch = 1;
} else {
Entry = 1;
Latch = 0;
}
ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
if (!Init) continue;
Constant *NewInit = Context->getConstantFP(DestTy, Init->getZExtValue());
Devang Patel
committed
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
if (!Incr) continue;
if (Incr->getOpcode() != Instruction::Add
&& Incr->getOpcode() != Instruction::Sub)
continue;
/* Initialize new IV, double d = 0.0 in above example. */
ConstantInt *C = NULL;
if (Incr->getOperand(0) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(1));
else if (Incr->getOperand(1) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(0));
else
continue;
if (!C) continue;
/* Add new PHINode. */
PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
/* create new increment. '++d' in above example. */
Constant *CFP = Context->getConstantFP(DestTy, C->getZExtValue());
Devang Patel
committed
BinaryOperator *NewIncr =
BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
Instruction::FAdd : Instruction::FSub,
Devang Patel
committed
NewPH, CFP, "IV.S.next.", Incr);
NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
/* Remove cast operation */
ShadowUse->replaceAllUsesWith(NewPH);
ShadowUse->eraseFromParent();
NumShadow++;
break;
}
}
}
/// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar
/// uses in the loop, look to see if we can eliminate some, in favor of using
/// common indvars for the different uses.
void LoopStrengthReduce::OptimizeIndvars(Loop *L) {
// TODO: implement optzns here.
Devang Patel
committed
OptimizeShadowIV(L);
}
/// OptimizeLoopTermCond - Change loop terminating condition to use the
/// postinc iv when possible.
void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
// Finally, get the terminating condition for the loop if possible. If we
// can, we want to change it to use a post-incremented version of its
// induction variable, to allow coalescing the live ranges for the IV into
// one register value.
Evan Cheng
committed
BasicBlock *LatchBlock = L->getLoopLatch();
BasicBlock *ExitingBlock = L->getExitingBlock();
if (!ExitingBlock)
Evan Cheng
committed
// Multiple exits, just look at the exit in the latch block if there is one.
ExitingBlock = LatchBlock;
BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
Evan Cheng
committed
if (!TermBr)
return;
if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
return;
// Search IVUsesByStride to find Cond's IVUse if there is one.
IVStrideUse *CondUse = 0;
Evan Cheng
committed
ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
if (!FindIVUserForCond(Cond, CondUse, CondStride))
return; // setcc doesn't use the IV.
if (ExitingBlock != LatchBlock) {
Evan Cheng
committed
if (!Cond->hasOneUse())
// See below, we don't want the condition to be cloned.
return;
// If exiting block is the latch block, we know it's safe and profitable to
// transform the icmp to use post-inc iv. Otherwise do so only if it would
// not reuse another iv and its iv would be reused by other uses. We are
// optimizing for the case where the icmp is the only use of the iv.
IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[*CondStride];
for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
E = StrideUses.Users.end(); I != E; ++I) {
if (I->getUser() == Cond)
Evan Cheng
committed
continue;
if (!I->isUseOfPostIncrementedValue())
Evan Cheng
committed
return;
}
// FIXME: This is expensive, and worse still ChangeCompareStride does a
// similar check. Can we perform all the icmp related transformations after
// StrengthReduceStridedIVUsers?
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride)) {
int64_t SInt = SC->getValue()->getSExtValue();
for (unsigned NewStride = 0, ee = IU->StrideOrder.size(); NewStride != ee;
Evan Cheng
committed
++NewStride) {
std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI =
IU->IVUsesByStride.find(IU->StrideOrder[NewStride]);
Evan Cheng
committed
if (!isa<SCEVConstant>(SI->first) || SI->first == *CondStride)
continue;
int64_t SSInt =
cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SSInt == SInt)
return; // This can definitely be reused.
if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0)
Evan Cheng
committed
continue;
int64_t Scale = SSInt / SInt;
bool AllUsesAreAddresses = true;
bool AllUsesAreOutsideLoop = true;
std::vector<BasedUser> UsersToProcess;
const SCEV* CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
Evan Cheng
committed
AllUsesAreAddresses,
AllUsesAreOutsideLoop,
UsersToProcess);
// Avoid rewriting the compare instruction with an iv of new stride
// if it's likely the new stride uses will be rewritten using the
// stride of the compare instruction.
if (AllUsesAreAddresses &&
ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
return;
}
}
StrideNoReuse.insert(*CondStride);
}
// If the trip count is computed in terms of a max (due to ScalarEvolution
// being unable to find a sufficient guard, for example), change the loop
// comparison to use SLT or ULT instead of NE.
Cond = OptimizeMax(L, Cond, CondUse);
// If possible, change stride and operands of the compare instruction to
// eliminate one stride.
if (ExitingBlock == LatchBlock)
Evan Cheng
committed
Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
// It's possible for the setcc instruction to be anywhere in the loop, and
// possible for it to have multiple users. If it is not immediately before
// the latch block branch, move it.
if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) {
if (Cond->hasOneUse()) { // Condition has a single use, just move it.
Cond->moveBefore(TermBr);
} else {
// Otherwise, clone the terminating condition and insert into the loopend.
Cond->setName(L->getHeader()->getName() + ".termcond");
LatchBlock->getInstList().insert(TermBr, Cond);
// Clone the IVUse, as the old use still exists!
IU->IVUsesByStride[*CondStride]->addUser(CondUse->getOffset(), Cond,
CondUse->getOperandValToReplace());
CondUse = &IU->IVUsesByStride[*CondStride]->Users.back();
}
}
// If we get to here, we know that we can transform the setcc instruction to
// use the post-incremented version of the IV, allowing us to coalesce the
// live ranges for the IV correctly.
CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), *CondStride));
CondUse->setIsUseOfPostIncrementedValue(true);
Evan Cheng
committed
++NumLoopCond;
}
/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
/// when to exit the loop is used only for that purpose, try to rearrange things
/// so it counts down to a test against zero.
void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
// If the number of times the loop is executed isn't computable, give up.
const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
return;
// Get the terminating condition for the loop if possible (this isn't
// necessarily in the latch, or a block that's a predecessor of the header).
if (!L->getExitBlock())
return; // More than one loop exit blocks.
// Okay, there is one exit block. Try to find the condition that causes the
// loop to be exited.
BasicBlock *ExitingBlock = L->getExitingBlock();
if (!ExitingBlock)
return; // More than one block exiting!
// Okay, we've computed the exiting block. See what condition causes us to
// exit.
//
// FIXME: we should be able to handle switch instructions (with a single exit)
BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
if (TermBr == 0) return;
assert(TermBr->isConditional() && "If unconditional, it can't be in loop!");
if (!isa<ICmpInst>(TermBr->getCondition()))
return;
ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
// Handle only tests for equality for the moment, and only stride 1.
if (Cond->getPredicate() != CmpInst::ICMP_EQ)
return;
const SCEV* IV = SE->getSCEV(Cond->getOperand(0));
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
const SCEV* One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());
if (!AR || !AR->isAffine() || AR->getStepRecurrence(*SE) != One)
return;
// If the RHS of the comparison is defined inside the loop, the rewrite
// cannot be done.
if (Instruction *CR = dyn_cast<Instruction>(Cond->getOperand(1)))
if (L->contains(CR->getParent()))
return;
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
// Make sure the IV is only used for counting. Value may be preinc or
// postinc; 2 uses in either case.
if (!Cond->getOperand(0)->hasNUses(2))
return;
PHINode *phi = dyn_cast<PHINode>(Cond->getOperand(0));
Instruction *incr;
if (phi && phi->getParent()==L->getHeader()) {
// value tested is preinc. Find the increment.
// A CmpInst is not a BinaryOperator; we depend on this.
Instruction::use_iterator UI = phi->use_begin();
incr = dyn_cast<BinaryOperator>(UI);
if (!incr)
incr = dyn_cast<BinaryOperator>(++UI);
// 1 use for postinc value, the phi. Unnecessarily conservative?
if (!incr || !incr->hasOneUse() || incr->getOpcode()!=Instruction::Add)
return;
} else {
// Value tested is postinc. Find the phi node.
incr = dyn_cast<BinaryOperator>(Cond->getOperand(0));
if (!incr || incr->getOpcode()!=Instruction::Add)
return;
Instruction::use_iterator UI = Cond->getOperand(0)->use_begin();
phi = dyn_cast<PHINode>(UI);
if (!phi)
phi = dyn_cast<PHINode>(++UI);
// 1 use for preinc value, the increment.
if (!phi || phi->getParent()!=L->getHeader() || !phi->hasOneUse())
return;
}
// Replace the increment with a decrement.
BinaryOperator *decr =
BinaryOperator::Create(Instruction::Sub, incr->getOperand(0),
incr->getOperand(1), "tmp", incr);
incr->replaceAllUsesWith(decr);
incr->eraseFromParent();
// Substitute endval-startval for the original startval, and 0 for the
// original endval. Since we're only testing for equality this is OK even
// if the computation wraps around.
BasicBlock *Preheader = L->getLoopPreheader();
Instruction *PreInsertPt = Preheader->getTerminator();
int inBlock = L->contains(phi->getIncomingBlock(0)) ? 1 : 0;
Value *startVal = phi->getIncomingValue(inBlock);
Value *endVal = Cond->getOperand(1);
// FIXME check for case where both are constant
Constant* Zero = Context->getConstantInt(Cond->getOperand(1)->getType(), 0);
BinaryOperator *NewStartVal =
BinaryOperator::Create(Instruction::Sub, endVal, startVal,
"tmp", PreInsertPt);
phi->setIncomingValue(inBlock, NewStartVal);
Cond->setOperand(1, Zero);
Changed = true;
}
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
IU = &getAnalysis<IVUsers>();
DT = &getAnalysis<DominatorTree>();
SE = &getAnalysis<ScalarEvolution>();
if (!IU->IVUsesByStride.empty()) {
#ifndef NDEBUG
DOUT << "\nLSR on \"" << L->getHeader()->getParent()->getNameStart()
<< "\" ";
DEBUG(L->dump());
#endif
// Sort the StrideOrder so we process larger strides first.
std::stable_sort(IU->StrideOrder.begin(), IU->StrideOrder.end(),
StrideCompare(SE));
// Optimize induction variables. Some indvar uses can be transformed to use
// strides that will be needed for other purposes. A common example of this
// is the exit test for the loop, which can often be rewritten to use the
// computation of some other indvar to decide when to terminate the loop.
OptimizeIndvars(L);
Evan Cheng
committed
// Change loop terminating condition to use the postinc iv when possible
// and optimize loop terminating compare. FIXME: Move this after
// StrengthReduceStridedIVUsers?
OptimizeLoopTermCond(L);
// FIXME: We can shrink overlarge IV's here. e.g. if the code has
// computation in i64 values and the target doesn't support i64, demote
// the computation to 32-bit if safe.
// FIXME: Attempt to reuse values across multiple IV's. In particular, we
// could have something like "for(i) { foo(i*8); bar(i*16) }", which should
// be codegened as "for (j = 0;; j+=8) { foo(j); bar(j+j); }" on X86/PPC.
// Need to be careful that IV's are all the same type. Only works for
// intptr_t indvars.
// IVsByStride keeps IVs for one particular loop.
assert(IVsByStride.empty() && "Stale entries in IVsByStride?");
// Note: this processes each stride/type pair individually. All users
// passed into StrengthReduceStridedIVUsers have the same type AND stride.
// Also, note that we iterate over IVUsesByStride indirectly by using
// StrideOrder. This extra layer of indirection makes the ordering of
// strides deterministic - not dependent on map order.
for (unsigned Stride = 0, e = IU->StrideOrder.size();
Stride != e; ++Stride) {
std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI =
IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
// FIXME: Generalize to non-affine IV's.
if (!SI->first->isLoopInvariant(L))
continue;
StrengthReduceStridedIVUsers(SI->first, *SI->second, L);
}
// After all sharing is done, see if we can adjust the loop to test against
// zero instead of counting up to a maximum. This is usually faster.
OptimizeLoopCountIV(L);
// We're done analyzing this loop; release all the state we built up for it.
IVsByStride.clear();
Evan Cheng
committed
StrideNoReuse.clear();
// Clean up after ourselves
if (!DeadInsts.empty())
DeleteTriviallyDeadInstructions();
// At this point, it is worth checking to see if any recurrence PHIs are also
// dead, so that we can remove them as well.
DeleteDeadPHIs(L->getHeader());