Newer
Older
// If the user is not in the current loop, this means it is using the exit
// value of the IV. Do not put anything in the base, make sure it's all in
// the immediate field to allow as much factoring as possible.
if (!L->contains(UsersToProcess[i].Inst->getParent())) {
UsersToProcess[i].Imm = SCEVAddExpr::get(UsersToProcess[i].Imm,
UsersToProcess[i].Base);
UsersToProcess[i].Base =
SCEVUnknown::getIntegerSCEV(0, UsersToProcess[i].Base->getType());
} else {
// Addressing modes can be folded into loads and stores. Be careful that
// the store is through the expression, not of the expression though.
bool isAddress = isa<LoadInst>(UsersToProcess[i].Inst);
if (StoreInst *SI = dyn_cast<StoreInst>(UsersToProcess[i].Inst))
if (SI->getOperand(1) == UsersToProcess[i].OperandValToReplace)
isAddress = true;
MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base,
UsersToProcess[i].Imm, isAddress, L);
}
}
// Check if it is possible to reuse a IV with stride that is factor of this
// stride. And the multiple is a number that can be encoded in the scale
// field of the target addressing mode. And we will have a valid
// instruction after this substition, including the immediate field, if any.
PHINode *NewPHI = NULL;
Value *IncV = NULL;
IVExpr ReuseIV;
unsigned RewriteFactor = CheckForIVReuse(Stride, ReuseIV,
CommonExprs->getType(),
UsersToProcess);
if (RewriteFactor != 0) {
DOUT << "BASED ON IV of STRIDE " << *ReuseIV.Stride
<< " and BASE " << *ReuseIV.Base << " :\n";
NewPHI = ReuseIV.PHI;
IncV = ReuseIV.IncV;
}
const Type *ReplacedTy = CommonExprs->getType();
// Now that we know what we need to do, insert the PHI node itself.
//
DOUT << "INSERTING IV of TYPE " << *ReplacedTy << " of STRIDE "
<< *Stride << " and BASE " << *CommonExprs << " :\n";
SCEVExpander Rewriter(*SE, *LI);
SCEVExpander PreheaderRewriter(*SE, *LI);
BasicBlock *Preheader = L->getLoopPreheader();
Instruction *PreInsertPt = Preheader->getTerminator();
Instruction *PhiInsertBefore = L->getHeader()->begin();
BasicBlock *LatchBlock = L->getLoopLatch();
// Emit the initial base value into the loop preheader.
Value *CommonBaseV
= PreheaderRewriter.expandCodeFor(CommonExprs, PreInsertPt,
ReplacedTy);
// Create a new Phi for this base, and stick it in the loop header.
NewPHI = new PHINode(ReplacedTy, "iv.", PhiInsertBefore);
++NumInserted;
// Add common base to the new Phi node.
NewPHI->addIncoming(CommonBaseV, Preheader);
// Insert the stride into the preheader.
Value *StrideV = PreheaderRewriter.expandCodeFor(Stride, PreInsertPt,
ReplacedTy);
if (!isa<ConstantInt>(StrideV)) ++NumVariable;
Chris Lattner
committed
// Emit the increment of the base value before the terminator of the loop
// latch block, and add it to the Phi node.
SCEVHandle IncExp = SCEVAddExpr::get(SCEVUnknown::get(NewPHI),
SCEVUnknown::get(StrideV));
IncV = Rewriter.expandCodeFor(IncExp, LatchBlock->getTerminator(),
ReplacedTy);
IncV->setName(NewPHI->getName()+".inc");
NewPHI->addIncoming(IncV, LatchBlock);
// Remember this in case a later stride is multiple of this.
IVsByStride[Stride].addIV(Stride, CommonExprs, NewPHI, IncV);
} else {
Constant *C = dyn_cast<Constant>(CommonBaseV);
if (!C ||
(!C->isNullValue() &&
!isTargetConstant(SCEVUnknown::get(CommonBaseV), ReplacedTy, TLI)))
// We want the common base emitted into the preheader! This is just
// using cast as a copy so BitCast (no-op cast) is appropriate
CommonBaseV = new BitCastInst(CommonBaseV, CommonBaseV->getType(),
"commonbase", PreInsertPt);
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
// We want to emit code for users inside the loop first. To do this, we
// rearrange BasedUser so that the entries at the end have
// isUseOfPostIncrementedValue = false, because we pop off the end of the
// vector (so we handle them first).
std::partition(UsersToProcess.begin(), UsersToProcess.end(),
PartitionByIsUseOfPostIncrementedValue);
// Sort this by base, so that things with the same base are handled
// together. By partitioning first and stable-sorting later, we are
// guaranteed that within each base we will pop off users from within the
// loop before users outside of the loop with a particular base.
//
// We would like to use stable_sort here, but we can't. The problem is that
// SCEVHandle's don't have a deterministic ordering w.r.t to each other, so
// we don't have anything to do a '<' comparison on. Because we think the
// number of uses is small, do a horrible bubble sort which just relies on
// ==.
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
// Get a base value.
SCEVHandle Base = UsersToProcess[i].Base;
// Compact everything with this base to be consequetive with this one.
for (unsigned j = i+1; j != e; ++j) {
if (UsersToProcess[j].Base == Base) {
std::swap(UsersToProcess[i+1], UsersToProcess[j]);
++i;
}
}
}
// Process all the users now. This outer loop handles all bases, the inner
// loop handles all users of a particular base.
while (!UsersToProcess.empty()) {
Chris Lattner
committed
SCEVHandle Base = UsersToProcess.back().Base;
DOUT << " INSERTING code for BASE = " << *Base << ":\n";
// Emit the code for Base into the preheader.
Value *BaseV = PreheaderRewriter.expandCodeFor(Base, PreInsertPt,
ReplacedTy);
// If BaseV is a constant other than 0, make sure that it gets inserted into
// the preheader, instead of being forward substituted into the uses. We do
// this by forcing a BitCast (noop cast) to be inserted into the preheader
// in this case.
if (!C->isNullValue() && !isTargetConstant(Base, ReplacedTy, TLI)) {
// We want this constant emitted into the preheader! This is just
// using cast as a copy so BitCast (no-op cast) is appropriate
BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",
// Emit the code to add the immediate offset to the Phi value, just before
// the instructions that we identified as using this stride and base.
Chris Lattner
committed
do {
Chris Lattner
committed
BasedUser &User = UsersToProcess.back();
// If this instruction wants to use the post-incremented value, move it
// after the post-inc and use its value instead of the PHI.
if (User.isUseOfPostIncrementedValue) {
// If this user is in the loop, make sure it is the last thing in the
// loop to ensure it is dominated by the increment.
if (L->contains(User.Inst->getParent()))
User.Inst->moveBefore(LatchBlock->getTerminator());
}
if (RewriteOp->getType() != ReplacedTy) {
Instruction::CastOps opcode = Instruction::Trunc;
if (ReplacedTy->getPrimitiveSizeInBits() ==
RewriteOp->getType()->getPrimitiveSizeInBits())
opcode = Instruction::BitCast;
RewriteOp = SCEVExpander::InsertCastOfTo(opcode, RewriteOp, ReplacedTy);
}
SCEVHandle RewriteExpr = SCEVUnknown::get(RewriteOp);
// Clear the SCEVExpander's expression map so that we are guaranteed
// to have the code emitted where we expect it.
Rewriter.clear();
// If we are reusing the iv, then it must be multiplied by a constant
// factor take advantage of addressing mode scale component.
RewriteExpr =
SCEVMulExpr::get(SCEVUnknown::getIntegerSCEV(RewriteFactor,
RewriteExpr->getType()),
RewriteExpr);
// The common base is emitted in the loop preheader. But since we
// are reusing an IV, it has not been used to initialize the PHI node.
// Add it to the expression used to rewrite the uses.
if (!isa<ConstantInt>(CommonBaseV) ||
!cast<ConstantInt>(CommonBaseV)->isZero())
RewriteExpr = SCEVAddExpr::get(RewriteExpr,
SCEVUnknown::get(CommonBaseV));
}
// Now that we know what we need to do, insert code before User for the
// immediate and any loop-variant expressions.
if (!isa<ConstantInt>(BaseV) || !cast<ConstantInt>(BaseV)->isZero())
// Add BaseV to the PHI value if needed.
RewriteExpr = SCEVAddExpr::get(RewriteExpr, SCEVUnknown::get(BaseV));
User.RewriteInstructionToUseNewBase(RewriteExpr, Rewriter, L, this);
// Mark old value we replaced as possibly dead, so that it is elminated
// if we just replaced the last use of that value.
DeadInsts.insert(cast<Instruction>(User.OperandValToReplace));
Chris Lattner
committed
UsersToProcess.pop_back();
Chris Lattner
committed
// If there are any more users to process with the same base, process them
// now. We sorted by base above, so we just have to check the last elt.
Chris Lattner
committed
} while (!UsersToProcess.empty() && UsersToProcess.back().Base == Base);
// TODO: Next, find out which base index is the most common, pull it out.
}
// IMPORTANT TODO: Figure out how to partition the IV's with this stride, but
// different starting values, into different PHIs.
}
// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar
// uses in the loop, look to see if we can eliminate some, in favor of using
// common indvars for the different uses.
void LoopStrengthReduce::OptimizeIndvars(Loop *L) {
// TODO: implement optzns here.
// Finally, get the terminating condition for the loop if possible. If we
// can, we want to change it to use a post-incremented version of its
// induction variable, to allow coalescing the live ranges for the IV into
// one register value.
PHINode *SomePHI = cast<PHINode>(L->getHeader()->begin());
BasicBlock *Preheader = L->getLoopPreheader();
BasicBlock *LatchBlock =
SomePHI->getIncomingBlock(SomePHI->getIncomingBlock(0) == Preheader);
BranchInst *TermBr = dyn_cast<BranchInst>(LatchBlock->getTerminator());
if (!TermBr || TermBr->isUnconditional() ||
!isa<ICmpInst>(TermBr->getCondition()))
return;
// Search IVUsesByStride to find Cond's IVUse if there is one.
IVStrideUse *CondUse = 0;
Chris Lattner
committed
const SCEVHandle *CondStride = 0;
for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e && !CondUse;
++Stride) {
std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI =
IVUsesByStride.find(StrideOrder[Stride]);
assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
for (std::vector<IVStrideUse>::iterator UI = SI->second.Users.begin(),
E = SI->second.Users.end(); UI != E; ++UI)
if (UI->User == Cond) {
CondUse = &*UI;
// NOTE: we could handle setcc instructions with multiple uses here, but
// InstCombine does it as well for simple uses, it's not clear that it
// occurs enough in real life to handle.
break;
}
if (!CondUse) return; // setcc doesn't use the IV.
// It's possible for the setcc instruction to be anywhere in the loop, and
// possible for it to have multiple users. If it is not immediately before
// the latch block branch, move it.
if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) {
if (Cond->hasOneUse()) { // Condition has a single use, just move it.
Cond->moveBefore(TermBr);
} else {
// Otherwise, clone the terminating condition and insert into the loopend.
Cond->setName(L->getHeader()->getName() + ".termcond");
LatchBlock->getInstList().insert(TermBr, Cond);
// Clone the IVUse, as the old use still exists!
Chris Lattner
committed
IVUsesByStride[*CondStride].addUser(CondUse->Offset, Cond,
CondUse->OperandValToReplace);
Chris Lattner
committed
CondUse = &IVUsesByStride[*CondStride].Users.back();
}
}
// If we get to here, we know that we can transform the setcc instruction to
// use the post-incremented version of the IV, allowing us to coalesce the
// live ranges for the IV correctly.
Chris Lattner
committed
CondUse->Offset = SCEV::getMinusSCEV(CondUse->Offset, *CondStride);
CondUse->isUseOfPostIncrementedValue = true;
}
namespace {
// Constant strides come first which in turns are sorted by their absolute
// values. If absolute values are the same, then positive strides comes first.
// e.g.
// 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X
struct StrideCompare {
bool operator()(const SCEVHandle &LHS, const SCEVHandle &RHS) {
SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);
SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
if (LHSC && RHSC) {
int64_t LV = LHSC->getValue()->getSExtValue();
int64_t RV = RHSC->getValue()->getSExtValue();
uint64_t ALV = (LV < 0) ? -LV : LV;
uint64_t ARV = (RV < 0) ? -RV : RV;
if (ALV == ARV)
else
}
};
}
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
LI = &getAnalysis<LoopInfo>();
EF = &getAnalysis<ETForest>();
SE = &getAnalysis<ScalarEvolution>();
TD = &getAnalysis<TargetData>();
UIntPtrTy = TD->getIntPtrType();
// Find all uses of induction variables in this loop, and catagorize
// them by stride. Start by finding all of the PHI nodes in the header for
// this loop. If they are induction variables, inspect their uses.
std::set<Instruction*> Processed; // Don't reprocess instructions.
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
AddUsersIfInteresting(I, L, Processed);
// If we have nothing to do, return.
if (IVUsesByStride.empty()) return false;
// Optimize induction variables. Some indvar uses can be transformed to use
// strides that will be needed for other purposes. A common example of this
// is the exit test for the loop, which can often be rewritten to use the
// computation of some other indvar to decide when to terminate the loop.
OptimizeIndvars(L);
// FIXME: We can widen subreg IV's here for RISC targets. e.g. instead of
// doing computation in byte values, promote to 32-bit values if safe.
// FIXME: Attempt to reuse values across multiple IV's. In particular, we
// could have something like "for(i) { foo(i*8); bar(i*16) }", which should be
// codegened as "for (j = 0;; j+=8) { foo(j); bar(j+j); }" on X86/PPC. Need
// to be careful that IV's are all the same type. Only works for intptr_t
// indvars.
// If we only have one stride, we can more aggressively eliminate some things.
bool HasOneStride = IVUsesByStride.size() == 1;
#ifndef NDEBUG
DOUT << "\nLSR on ";
DEBUG(L->dump());
#endif
// IVsByStride keeps IVs for one particular loop.
IVsByStride.clear();
// Sort the StrideOrder so we process larger strides first.
std::stable_sort(StrideOrder.begin(), StrideOrder.end(), StrideCompare());
// Note: this processes each stride/type pair individually. All users passed
// into StrengthReduceStridedIVUsers have the same type AND stride. Also,
// node that we iterate over IVUsesByStride indirectly by using StrideOrder.
// This extra layer of indirection makes the ordering of strides deterministic
// - not dependent on map order.
for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) {
std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI =
IVUsesByStride.find(StrideOrder[Stride]);
assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
StrengthReduceStridedIVUsers(SI->first, SI->second, L, HasOneStride);
}
// Clean up after ourselves
if (!DeadInsts.empty()) {
DeleteTriviallyDeadInstructions(DeadInsts);
BasicBlock::iterator I = L->getHeader()->begin();
PHINode *PN;
while ((PN = dyn_cast<PHINode>(I))) {
++I; // Preincrement iterator to avoid invalidating it when deleting PN.
// At this point, we know that we have killed one or more GEP
// instructions. It is worth checking to see if the cann indvar is also
// dead, so that we can remove it as well. The requirements for the cann
// indvar to be considered dead are:
// 1. the cann indvar has one use
// 2. the use is an add instruction
// 3. the add has one use
// 4. the add is used by the cann indvar
// If all four cases above are true, then we can remove both the add and
// the cann indvar.
// FIXME: this needs to eliminate an induction variable even if it's being
// compared against some value to decide loop termination.
if (PN->hasOneUse()) {
Instruction *BO = dyn_cast<Instruction>(*PN->use_begin());
if (BO && (isa<BinaryOperator>(BO) || isa<CmpInst>(BO))) {
if (BO->hasOneUse() && PN == *(BO->use_begin())) {
DeadInsts.insert(BO);
// Break the cycle, then delete the PHI.
PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
SE->deleteInstructionFromRecords(PN);
}
}
}
DeleteTriviallyDeadInstructions(DeadInsts);
}
CastedPointers.clear();
IVUsesByStride.clear();
StrideOrder.clear();