Newer
Older
const Type *CmpTy = Cond->getOperand(0)->getType();
unsigned TyBits = SE->getTypeSizeInBits(CmpTy);
SCEVHandle *NewStride = NULL;
Value *NewCmpLHS = NULL;
Value *NewCmpRHS = NULL;
int64_t Scale = 1;
SCEVHandle NewOffset = SE->getIntegerSCEV(0, CmpTy);
if (ConstantInt *C = dyn_cast<ConstantInt>(Cond->getOperand(1))) {
int64_t CmpVal = C->getValue().getSExtValue();
// Check stride constant and the comparision constant signs to detect
// overflow.
if ((CmpVal & SignBit) != (CmpSSInt & SignBit))
return Cond;
// Look for a suitable stride / iv as replacement.
for (unsigned i = 0, e = StrideOrder.size(); i != e; ++i) {
std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI =
IVUsesByStride.find(StrideOrder[i]);
if (!isa<SCEVConstant>(SI->first))
continue;
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SSInt == CmpSSInt ||
abs(SSInt) < abs(CmpSSInt) ||
(SSInt % CmpSSInt) != 0)
Scale = SSInt / CmpSSInt;
int64_t NewCmpVal = CmpVal * Scale;
APInt Mul = APInt(BitWidth*2, CmpVal, true);
Mul = Mul * APInt(BitWidth*2, Scale, true);
// Check for overflow.
// Watch out for overflow.
if (ICmpInst::isSignedPredicate(Predicate) &&
(CmpVal & SignBit) != (NewCmpVal & SignBit))
continue;
if (NewCmpVal == CmpVal)
continue;
// Pick the best iv to use trying to avoid a cast.
NewCmpLHS = NULL;
for (std::vector<IVStrideUse>::iterator UI = SI->second.Users.begin(),
E = SI->second.Users.end(); UI != E; ++UI) {
NewCmpLHS = UI->OperandValToReplace;
if (NewCmpLHS->getType() == CmpTy)
break;
}
if (!NewCmpLHS)
continue;
NewCmpTy = NewCmpLHS->getType();
NewTyBits = SE->getTypeSizeInBits(NewCmpTy);
const Type *NewCmpIntTy = IntegerType::get(NewTyBits);
if (RequiresTypeConversion(NewCmpTy, CmpTy)) {
// Check if it is possible to rewrite it using
// an iv / stride of a smaller integer type.
unsigned Bits = NewTyBits;
if (ICmpInst::isSignedPredicate(Predicate))
--Bits;
uint64_t Mask = (1ULL << Bits) - 1;
if (((uint64_t)NewCmpVal & Mask) != (uint64_t)NewCmpVal)
continue;
}
// Don't rewrite if use offset is non-constant and the new type is
// of a different type.
// FIXME: too conservative?
if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->Offset))
continue;
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
bool AllUsesAreAddresses = true;
bool AllUsesAreOutsideLoop = true;
std::vector<BasedUser> UsersToProcess;
SCEVHandle CommonExprs = CollectIVUsers(SI->first, SI->second, L,
AllUsesAreAddresses,
AllUsesAreOutsideLoop,
UsersToProcess);
// Avoid rewriting the compare instruction with an iv of new stride
// if it's likely the new stride uses will be rewritten using the
// stride of the compare instruction.
if (AllUsesAreAddresses &&
ValidStride(!CommonExprs->isZero(), Scale, UsersToProcess))
continue;
// If scale is negative, use swapped predicate unless it's testing
// for equality.
if (Scale < 0 && !Cond->isEquality())
Predicate = ICmpInst::getSwappedPredicate(Predicate);
NewStride = &StrideOrder[i];
if (!isa<PointerType>(NewCmpTy))
NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal);
else {
ConstantInt *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal);
NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy);
}
NewOffset = TyBits == NewTyBits
? SE->getMulExpr(CondUse->Offset,
SE->getConstant(ConstantInt::get(CmpTy, Scale)))
: SE->getConstant(ConstantInt::get(NewCmpIntTy,
cast<SCEVConstant>(CondUse->Offset)->getValue()->getSExtValue()*Scale));
break;
}
}
// Forgo this transformation if it the increment happens to be
// unfortunately positioned after the condition, and the condition
// has multiple uses which prevent it from being moved immediately
// before the branch. See
// test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-*.ll
// for an example of this situation.
if (!Cond->hasOneUse()) {
for (BasicBlock::iterator I = Cond, E = Cond->getParent()->end();
I != E; ++I)
if (I == NewCmpLHS)
return Cond;
if (NewCmpRHS) {
// Create a new compare instruction using new stride / iv.
ICmpInst *OldCond = Cond;
Cond = new ICmpInst(Predicate, NewCmpLHS, NewCmpRHS,
L->getHeader()->getName() + ".termcond",
OldCond);
// Remove the old compare instruction. The old indvar is probably dead too.
DeadInsts.push_back(cast<Instruction>(CondUse->OperandValToReplace));
OldCond->replaceAllUsesWith(Cond);
OldCond->eraseFromParent();
IVUsesByStride[*CondStride].Users.pop_back();
IVUsesByStride[*NewStride].addUser(NewOffset, Cond, NewCmpLHS);
CondUse = &IVUsesByStride[*NewStride].Users.back();
CondStride = NewStride;
++NumEliminated;
Changed = true;
}
return Cond;
}
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
/// OptimizeSMax - Rewrite the loop's terminating condition if it uses
/// an smax computation.
///
/// This is a narrow solution to a specific, but acute, problem. For loops
/// like this:
///
/// i = 0;
/// do {
/// p[i] = 0.0;
/// } while (++i < n);
///
/// where the comparison is signed, the trip count isn't just 'n', because
/// 'n' could be negative. And unfortunately this can come up even for loops
/// where the user didn't use a C do-while loop. For example, seemingly
/// well-behaved top-test loops will commonly be lowered like this:
//
/// if (n > 0) {
/// i = 0;
/// do {
/// p[i] = 0.0;
/// } while (++i < n);
/// }
///
/// and then it's possible for subsequent optimization to obscure the if
/// test in such a way that indvars can't find it.
///
/// When indvars can't find the if test in loops like this, it creates a
/// signed-max expression, which allows it to give the loop a canonical
/// induction variable:
///
/// i = 0;
/// smax = n < 1 ? 1 : n;
/// do {
/// p[i] = 0.0;
/// } while (++i != smax);
///
/// Canonical induction variables are necessary because the loop passes
/// are designed around them. The most obvious example of this is the
/// LoopInfo analysis, which doesn't remember trip count values. It
/// expects to be able to rediscover the trip count each time it is
/// needed, and it does this using a simple analyis that only succeeds if
/// the loop has a canonical induction variable.
///
/// However, when it comes time to generate code, the maximum operation
/// can be quite costly, especially if it's inside of an outer loop.
///
/// This function solves this problem by detecting this type of loop and
/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
/// the instructions for the maximum computation.
///
ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond,
IVStrideUse* &CondUse) {
// Check that the loop matches the pattern we're looking for.
if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
Cond->getPredicate() != CmpInst::ICMP_NE)
return Cond;
SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
if (!Sel || !Sel->hasOneUse()) return Cond;
SCEVHandle BackedgeTakenCount = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
return Cond;
SCEVHandle One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());
// Add one to the backedge-taken count to get the trip count.
SCEVHandle IterationCount = SE->getAddExpr(BackedgeTakenCount, One);
// Check for a max calculation that matches the pattern.
const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(IterationCount);
if (!SMax || SMax != SE->getSCEV(Sel)) return Cond;
SCEVHandle SMaxLHS = SMax->getOperand(0);
SCEVHandle SMaxRHS = SMax->getOperand(1);
if (!SMaxLHS || SMaxLHS != One) return Cond;
// Check the relevant induction variable for conformance to
// the pattern.
SCEVHandle IV = SE->getSCEV(Cond->getOperand(0));
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
if (!AR || !AR->isAffine() ||
AR->getStart() != One ||
AR->getStepRecurrence(*SE) != One)
return Cond;
assert(AR->getLoop() == L &&
"Loop condition operand is an addrec in a different loop!");
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
// Check the right operand of the select, and remember it, as it will
// be used in the new comparison instruction.
Value *NewRHS = 0;
if (SE->getSCEV(Sel->getOperand(1)) == SMaxRHS)
NewRHS = Sel->getOperand(1);
else if (SE->getSCEV(Sel->getOperand(2)) == SMaxRHS)
NewRHS = Sel->getOperand(2);
if (!NewRHS) return Cond;
// Ok, everything looks ok to change the condition into an SLT or SGE and
// delete the max calculation.
ICmpInst *NewCond =
new ICmpInst(Cond->getPredicate() == CmpInst::ICMP_NE ?
CmpInst::ICMP_SLT :
CmpInst::ICMP_SGE,
Cond->getOperand(0), NewRHS, "scmp", Cond);
// Delete the max calculation instructions.
Cond->replaceAllUsesWith(NewCond);
Cond->eraseFromParent();
Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
Sel->eraseFromParent();
Cmp->eraseFromParent();
CondUse->User = NewCond;
return NewCond;
}
Devang Patel
committed
/// OptimizeShadowIV - If IV is used in a int-to-float cast
/// inside the loop then try to eliminate the cast opeation.
void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
SCEVHandle BackedgeTakenCount = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
Devang Patel
committed
return;
for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e;
++Stride) {
std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI =
IVUsesByStride.find(StrideOrder[Stride]);
assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
if (!isa<SCEVConstant>(SI->first))
continue;
for (std::vector<IVStrideUse>::iterator UI = SI->second.Users.begin(),
E = SI->second.Users.end(); UI != E; /* empty */) {
std::vector<IVStrideUse>::iterator CandidateUI = UI;
++UI;
Devang Patel
committed
Instruction *ShadowUse = CandidateUI->User;
const Type *DestTy = NULL;
/* If shadow use is a int->float cast then insert a second IV
to eliminate this cast.
Devang Patel
committed
for (unsigned i = 0; i < n; ++i)
foo((double)i);
is transformed into
Devang Patel
committed
double d = 0.0;
for (unsigned i = 0; i < n; ++i, ++d)
foo(d);
*/
if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->User))
Devang Patel
committed
DestTy = UCast->getDestTy();
else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->User))
Devang Patel
committed
DestTy = SCast->getDestTy();
Devang Patel
committed
if (!DestTy) continue;
if (TLI) {
/* If target does not support DestTy natively then do not apply
this transformation. */
MVT DVT = TLI->getValueType(DestTy);
if (!TLI->isTypeLegal(DVT)) continue;
}
Devang Patel
committed
PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
if (!PH) continue;
if (PH->getNumIncomingValues() != 2) continue;
const Type *SrcTy = PH->getType();
int Mantissa = DestTy->getFPMantissaWidth();
if (Mantissa == -1) continue;
if ((int)SE->getTypeSizeInBits(SrcTy) > Mantissa)
Devang Patel
committed
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
continue;
unsigned Entry, Latch;
if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
Entry = 0;
Latch = 1;
} else {
Entry = 1;
Latch = 0;
}
ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
if (!Init) continue;
ConstantFP *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
if (!Incr) continue;
if (Incr->getOpcode() != Instruction::Add
&& Incr->getOpcode() != Instruction::Sub)
continue;
/* Initialize new IV, double d = 0.0 in above example. */
ConstantInt *C = NULL;
if (Incr->getOperand(0) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(1));
else if (Incr->getOperand(1) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(0));
else
continue;
if (!C) continue;
/* Add new PHINode. */
PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
/* create new increment. '++d' in above example. */
Devang Patel
committed
ConstantFP *CFP = ConstantFP::get(DestTy, C->getZExtValue());
BinaryOperator *NewIncr =
BinaryOperator::Create(Incr->getOpcode(),
NewPH, CFP, "IV.S.next.", Incr);
NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
/* Remove cast operation */
ShadowUse->replaceAllUsesWith(NewPH);
ShadowUse->eraseFromParent();
SI->second.Users.erase(CandidateUI);
NumShadow++;
break;
}
}
}
// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar
// uses in the loop, look to see if we can eliminate some, in favor of using
// common indvars for the different uses.
void LoopStrengthReduce::OptimizeIndvars(Loop *L) {
// TODO: implement optzns here.
Devang Patel
committed
OptimizeShadowIV(L);
OptimizeLoopTermCond(L);
}
/// OptimizeLoopTermCond - Change loop terminating condition to use the
/// postinc iv when possible.
void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
// Finally, get the terminating condition for the loop if possible. If we
// can, we want to change it to use a post-incremented version of its
// induction variable, to allow coalescing the live ranges for the IV into
// one register value.
PHINode *SomePHI = cast<PHINode>(L->getHeader()->begin());
BasicBlock *Preheader = L->getLoopPreheader();
BasicBlock *LatchBlock =
SomePHI->getIncomingBlock(SomePHI->getIncomingBlock(0) == Preheader);
BranchInst *TermBr = dyn_cast<BranchInst>(LatchBlock->getTerminator());
if (!TermBr || TermBr->isUnconditional() ||
!isa<ICmpInst>(TermBr->getCondition()))
return;
// Search IVUsesByStride to find Cond's IVUse if there is one.
IVStrideUse *CondUse = 0;
Chris Lattner
committed
const SCEVHandle *CondStride = 0;
if (!FindIVUserForCond(Cond, CondUse, CondStride))
return; // setcc doesn't use the IV.
// If the trip count is computed in terms of an smax (due to ScalarEvolution
// being unable to find a sufficient guard, for example), change the loop
// comparison to use SLT instead of NE.
Cond = OptimizeSMax(L, Cond, CondUse);
// If possible, change stride and operands of the compare instruction to
// eliminate one stride.
Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
// It's possible for the setcc instruction to be anywhere in the loop, and
// possible for it to have multiple users. If it is not immediately before
// the latch block branch, move it.
if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) {
if (Cond->hasOneUse()) { // Condition has a single use, just move it.
Cond->moveBefore(TermBr);
} else {
// Otherwise, clone the terminating condition and insert into the loopend.
Cond->setName(L->getHeader()->getName() + ".termcond");
LatchBlock->getInstList().insert(TermBr, Cond);
// Clone the IVUse, as the old use still exists!
Chris Lattner
committed
IVUsesByStride[*CondStride].addUser(CondUse->Offset, Cond,
CondUse->OperandValToReplace);
Chris Lattner
committed
CondUse = &IVUsesByStride[*CondStride].Users.back();
}
}
// If we get to here, we know that we can transform the setcc instruction to
// use the post-incremented version of the IV, allowing us to coalesce the
// live ranges for the IV correctly.
CondUse->Offset = SE->getMinusSCEV(CondUse->Offset, *CondStride);
CondUse->isUseOfPostIncrementedValue = true;
}
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
DT = &getAnalysis<DominatorTree>();
SE = &getAnalysis<ScalarEvolution>();
// Find all uses of induction variables in this loop, and categorize
// them by stride. Start by finding all of the PHI nodes in the header for
// this loop. If they are induction variables, inspect their uses.
SmallPtrSet<Instruction*,16> Processed; // Don't reprocess instructions.
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
AddUsersIfInteresting(I, L, Processed);
#ifndef NDEBUG
DOUT << "\nLSR on \"" << L->getHeader()->getParent()->getNameStart()
<< "\" ";
DEBUG(L->dump());
#endif
// Sort the StrideOrder so we process larger strides first.
std::stable_sort(StrideOrder.begin(), StrideOrder.end(), StrideCompare(SE));
// Optimize induction variables. Some indvar uses can be transformed to use
// strides that will be needed for other purposes. A common example of this
// is the exit test for the loop, which can often be rewritten to use the
// computation of some other indvar to decide when to terminate the loop.
OptimizeIndvars(L);
// FIXME: We can shrink overlarge IV's here. e.g. if the code has
// computation in i64 values and the target doesn't support i64, demote
// the computation to 32-bit if safe.
// FIXME: Attempt to reuse values across multiple IV's. In particular, we
// could have something like "for(i) { foo(i*8); bar(i*16) }", which should
// be codegened as "for (j = 0;; j+=8) { foo(j); bar(j+j); }" on X86/PPC.
// Need to be careful that IV's are all the same type. Only works for
// intptr_t indvars.
// IVsByStride keeps IVs for one particular loop.
assert(IVsByStride.empty() && "Stale entries in IVsByStride?");
// Note: this processes each stride/type pair individually. All users
// passed into StrengthReduceStridedIVUsers have the same type AND stride.
// Also, note that we iterate over IVUsesByStride indirectly by using
// StrideOrder. This extra layer of indirection makes the ordering of
// strides deterministic - not dependent on map order.
for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) {
std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI =
IVUsesByStride.find(StrideOrder[Stride]);
assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
StrengthReduceStridedIVUsers(SI->first, SI->second, L);
}
// We're done analyzing this loop; release all the state we built up for it.
IVUsesByStride.clear();
IVsByStride.clear();
StrideOrder.clear();
// Clean up after ourselves
if (!DeadInsts.empty())
DeleteTriviallyDeadInstructions();
// At this point, it is worth checking to see if any recurrence PHIs are also
// dead, so that we can remove them as well.
DeleteDeadPHIs(L->getHeader());