"git@repo.hca.bsc.es:rferrer/llvm-epi-0.8.git" did not exist on "c511efbcfce12608ec1bed71225b9768e1f16737"
Newer
Older
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
/// induction variable:
///
/// i = 0;
/// smax = n < 1 ? 1 : n;
/// do {
/// p[i] = 0.0;
/// } while (++i != smax);
///
/// Canonical induction variables are necessary because the loop passes
/// are designed around them. The most obvious example of this is the
/// LoopInfo analysis, which doesn't remember trip count values. It
/// expects to be able to rediscover the trip count each time it is
/// needed, and it does this using a simple analyis that only succeeds if
/// the loop has a canonical induction variable.
///
/// However, when it comes time to generate code, the maximum operation
/// can be quite costly, especially if it's inside of an outer loop.
///
/// This function solves this problem by detecting this type of loop and
/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
/// the instructions for the maximum computation.
///
ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond,
IVStrideUse* &CondUse) {
// Check that the loop matches the pattern we're looking for.
if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
Cond->getPredicate() != CmpInst::ICMP_NE)
return Cond;
SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
if (!Sel || !Sel->hasOneUse()) return Cond;
SCEVHandle IterationCount = SE->getIterationCount(L);
if (isa<SCEVCouldNotCompute>(IterationCount))
return Cond;
SCEVHandle One = SE->getIntegerSCEV(1, IterationCount->getType());
// Adjust for an annoying getIterationCount quirk.
IterationCount = SE->getAddExpr(IterationCount, One);
// Check for a max calculation that matches the pattern.
SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(IterationCount);
if (!SMax || SMax != SE->getSCEV(Sel)) return Cond;
SCEVHandle SMaxLHS = SMax->getOperand(0);
SCEVHandle SMaxRHS = SMax->getOperand(1);
if (!SMaxLHS || SMaxLHS != One) return Cond;
// Check the relevant induction variable for conformance to
// the pattern.
SCEVHandle IV = SE->getSCEV(Cond->getOperand(0));
SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
if (!AR || !AR->isAffine() ||
AR->getStart() != One ||
AR->getStepRecurrence(*SE) != One)
return Cond;
// Check the right operand of the select, and remember it, as it will
// be used in the new comparison instruction.
Value *NewRHS = 0;
if (SE->getSCEV(Sel->getOperand(1)) == SMaxRHS)
NewRHS = Sel->getOperand(1);
else if (SE->getSCEV(Sel->getOperand(2)) == SMaxRHS)
NewRHS = Sel->getOperand(2);
if (!NewRHS) return Cond;
// Ok, everything looks ok to change the condition into an SLT or SGE and
// delete the max calculation.
ICmpInst *NewCond =
new ICmpInst(Cond->getPredicate() == CmpInst::ICMP_NE ?
CmpInst::ICMP_SLT :
CmpInst::ICMP_SGE,
Cond->getOperand(0), NewRHS, "scmp", Cond);
// Delete the max calculation instructions.
SE->deleteValueFromRecords(Cond);
Cond->replaceAllUsesWith(NewCond);
Cond->eraseFromParent();
Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
SE->deleteValueFromRecords(Sel);
Sel->eraseFromParent();
if (Cmp->use_empty()) {
SE->deleteValueFromRecords(Cmp);
Cmp->eraseFromParent();
}
CondUse->User = NewCond;
return NewCond;
}
Devang Patel
committed
/// OptimizeShadowIV - If IV is used in a int-to-float cast
/// inside the loop then try to eliminate the cast opeation.
void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
SCEVHandle IterationCount = SE->getIterationCount(L);
if (isa<SCEVCouldNotCompute>(IterationCount))
return;
for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e;
++Stride) {
std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI =
IVUsesByStride.find(StrideOrder[Stride]);
assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
if (!isa<SCEVConstant>(SI->first))
continue;
for (std::vector<IVStrideUse>::iterator UI = SI->second.Users.begin(),
E = SI->second.Users.end(); UI != E; /* empty */) {
std::vector<IVStrideUse>::iterator CandidateUI = UI;
++UI;
Devang Patel
committed
Instruction *ShadowUse = CandidateUI->User;
const Type *DestTy = NULL;
/* If shadow use is a int->float cast then insert a second IV
to eliminate this cast.
Devang Patel
committed
for (unsigned i = 0; i < n; ++i)
foo((double)i);
is transformed into
Devang Patel
committed
double d = 0.0;
for (unsigned i = 0; i < n; ++i, ++d)
foo(d);
*/
if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->User))
Devang Patel
committed
DestTy = UCast->getDestTy();
else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->User))
Devang Patel
committed
DestTy = SCast->getDestTy();
Devang Patel
committed
if (!DestTy) continue;
if (TLI) {
/* If target does not support DestTy natively then do not apply
this transformation. */
MVT DVT = TLI->getValueType(DestTy);
if (!TLI->isTypeLegal(DVT)) continue;
}
Devang Patel
committed
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
if (!PH) continue;
if (PH->getNumIncomingValues() != 2) continue;
const Type *SrcTy = PH->getType();
int Mantissa = DestTy->getFPMantissaWidth();
if (Mantissa == -1) continue;
if ((int)TD->getTypeSizeInBits(SrcTy) > Mantissa)
continue;
unsigned Entry, Latch;
if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
Entry = 0;
Latch = 1;
} else {
Entry = 1;
Latch = 0;
}
ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
if (!Init) continue;
ConstantFP *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
if (!Incr) continue;
if (Incr->getOpcode() != Instruction::Add
&& Incr->getOpcode() != Instruction::Sub)
continue;
/* Initialize new IV, double d = 0.0 in above example. */
ConstantInt *C = NULL;
if (Incr->getOperand(0) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(1));
else if (Incr->getOperand(1) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(0));
else
continue;
if (!C) continue;
/* Add new PHINode. */
PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
/* create new increment. '++d' in above example. */
Devang Patel
committed
ConstantFP *CFP = ConstantFP::get(DestTy, C->getZExtValue());
BinaryOperator *NewIncr =
BinaryOperator::Create(Incr->getOpcode(),
NewPH, CFP, "IV.S.next.", Incr);
NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
/* Remove cast operation */
SE->deleteValueFromRecords(ShadowUse);
ShadowUse->replaceAllUsesWith(NewPH);
ShadowUse->eraseFromParent();
SI->second.Users.erase(CandidateUI);
NumShadow++;
break;
}
}
}
// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar
// uses in the loop, look to see if we can eliminate some, in favor of using
// common indvars for the different uses.
void LoopStrengthReduce::OptimizeIndvars(Loop *L) {
// TODO: implement optzns here.
Devang Patel
committed
OptimizeShadowIV(L);
// Finally, get the terminating condition for the loop if possible. If we
// can, we want to change it to use a post-incremented version of its
// induction variable, to allow coalescing the live ranges for the IV into
// one register value.
PHINode *SomePHI = cast<PHINode>(L->getHeader()->begin());
BasicBlock *Preheader = L->getLoopPreheader();
BasicBlock *LatchBlock =
SomePHI->getIncomingBlock(SomePHI->getIncomingBlock(0) == Preheader);
BranchInst *TermBr = dyn_cast<BranchInst>(LatchBlock->getTerminator());
if (!TermBr || TermBr->isUnconditional() ||
!isa<ICmpInst>(TermBr->getCondition()))
return;
// Search IVUsesByStride to find Cond's IVUse if there is one.
IVStrideUse *CondUse = 0;
Chris Lattner
committed
const SCEVHandle *CondStride = 0;
if (!FindIVUserForCond(Cond, CondUse, CondStride))
return; // setcc doesn't use the IV.
// If the trip count is computed in terms of an smax (due to ScalarEvolution
// being unable to find a sufficient guard, for example), change the loop
// comparison to use SLT instead of NE.
Cond = OptimizeSMax(L, Cond, CondUse);
// If possible, change stride and operands of the compare instruction to
// eliminate one stride.
Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
// It's possible for the setcc instruction to be anywhere in the loop, and
// possible for it to have multiple users. If it is not immediately before
// the latch block branch, move it.
if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) {
if (Cond->hasOneUse()) { // Condition has a single use, just move it.
Cond->moveBefore(TermBr);
} else {
// Otherwise, clone the terminating condition and insert into the loopend.
Cond->setName(L->getHeader()->getName() + ".termcond");
LatchBlock->getInstList().insert(TermBr, Cond);
// Clone the IVUse, as the old use still exists!
Chris Lattner
committed
IVUsesByStride[*CondStride].addUser(CondUse->Offset, Cond,
CondUse->OperandValToReplace);
Chris Lattner
committed
CondUse = &IVUsesByStride[*CondStride].Users.back();
}
}
// If we get to here, we know that we can transform the setcc instruction to
// use the post-incremented version of the IV, allowing us to coalesce the
// live ranges for the IV correctly.
CondUse->Offset = SE->getMinusSCEV(CondUse->Offset, *CondStride);
CondUse->isUseOfPostIncrementedValue = true;
}
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
DT = &getAnalysis<DominatorTree>();
SE = &getAnalysis<ScalarEvolution>();
TD = &getAnalysis<TargetData>();
UIntPtrTy = TD->getIntPtrType();
// Find all uses of induction variables in this loop, and categorize
// them by stride. Start by finding all of the PHI nodes in the header for
// this loop. If they are induction variables, inspect their uses.
SmallPtrSet<Instruction*,16> Processed; // Don't reprocess instructions.
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
AddUsersIfInteresting(I, L, Processed);
if (!IVUsesByStride.empty()) {
// Optimize induction variables. Some indvar uses can be transformed to use
// strides that will be needed for other purposes. A common example of this
// is the exit test for the loop, which can often be rewritten to use the
// computation of some other indvar to decide when to terminate the loop.
OptimizeIndvars(L);
// FIXME: We can widen subreg IV's here for RISC targets. e.g. instead of
// doing computation in byte values, promote to 32-bit values if safe.
// FIXME: Attempt to reuse values across multiple IV's. In particular, we
// could have something like "for(i) { foo(i*8); bar(i*16) }", which should
// be codegened as "for (j = 0;; j+=8) { foo(j); bar(j+j); }" on X86/PPC.
// Need to be careful that IV's are all the same type. Only works for
// intptr_t indvars.
// If we only have one stride, we can more aggressively eliminate some
// things.
bool HasOneStride = IVUsesByStride.size() == 1;
#ifndef NDEBUG
#endif
// IVsByStride keeps IVs for one particular loop.
assert(IVsByStride.empty() && "Stale entries in IVsByStride?");
// Sort the StrideOrder so we process larger strides first.
std::stable_sort(StrideOrder.begin(), StrideOrder.end(), StrideCompare());
// Note: this processes each stride/type pair individually. All users
// passed into StrengthReduceStridedIVUsers have the same type AND stride.
// Also, note that we iterate over IVUsesByStride indirectly by using
// StrideOrder. This extra layer of indirection makes the ordering of
// strides deterministic - not dependent on map order.
for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) {
std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI =
IVUsesByStride.find(StrideOrder[Stride]);
assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
StrengthReduceStridedIVUsers(SI->first, SI->second, L, HasOneStride);
}
}
// We're done analyzing this loop; release all the state we built up for it.
CastedPointers.clear();
IVUsesByStride.clear();
IVsByStride.clear();
StrideOrder.clear();
for (unsigned i=0; i<GEPlist.size(); i++)
SE->deleteValueFromRecords(GEPlist[i]);
GEPlist.clear();
// Clean up after ourselves
if (!DeadInsts.empty()) {
DeleteTriviallyDeadInstructions();
BasicBlock::iterator I = L->getHeader()->begin();
while (PHINode *PN = dyn_cast<PHINode>(I++)) {
// At this point, we know that we have killed one or more IV users.
// It is worth checking to see if the cannonical indvar is also
// dead, so that we can remove it as well.
//
// We can remove a PHI if it is on a cycle in the def-use graph
// where each node in the cycle has degree one, i.e. only one use,
// and is an instruction with no side effects.
//
// FIXME: this needs to eliminate an induction variable even if it's being
// compared against some value to decide loop termination.
if (!PN->hasOneUse())
continue;
SmallPtrSet<PHINode *, 4> PHIs;
for (Instruction *J = dyn_cast<Instruction>(*PN->use_begin());
J && J->hasOneUse() && !J->mayWriteToMemory();
J = dyn_cast<Instruction>(*J->use_begin())) {
// If we find the original PHI, we've discovered a cycle.
if (J == PN) {
// Break the cycle and mark the PHI for deletion.
SE->deleteValueFromRecords(PN);
PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
DeadInsts.push_back(PN);
// If we find a PHI more than once, we're on a cycle that
// won't prove fruitful.
if (isa<PHINode>(J) && !PHIs.insert(cast<PHINode>(J)))
break;
}
}
DeleteTriviallyDeadInstructions();
}