"git@repo.hca.bsc.es:rferrer/llvm-epi.git" did not exist on "cd808237b24c7d6d0bb7ddf577dba37c31a06a50"
Newer
Older
// CSEs we can find.
if (Uses[i].Base == Zero) return Zero;
// If this use is as an address we may be able to put CSEs in the addressing
// mode rather than hoisting them.
bool isAddrUse = isAddressUse(Uses[i].Inst, Uses[i].OperandValToReplace);
// We may need the UseTy below, but only when isAddrUse, so compute it
// only in that case.
const Type *UseTy = 0;
if (isAddrUse) {
UseTy = Uses[i].Inst->getType();
if (StoreInst *SI = dyn_cast<StoreInst>(Uses[i].Inst))
UseTy = SI->getOperand(0)->getType();
}
// Split the expression into subexprs.
SeparateSubExprs(SubExprs, Uses[i].Base, SE);
// Add one to SubExpressionUseData.Count for each subexpr present, and
// if the subexpr is not a valid immediate within an addressing mode use,
// set SubExpressionUseData.notAllUsesAreFree. We definitely want to
// hoist these out of the loop (if they are common to all uses).
for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
if (++SubExpressionUseData[SubExprs[j]].Count == 1)
UniqueSubExprs.push_back(SubExprs[j]);
if (!isAddrUse || !fitsInAddressMode(SubExprs[j], UseTy, TLI, false))
SubExpressionUseData[SubExprs[j]].notAllUsesAreFree = true;
}
SubExprs.clear();
}
// Now that we know how many times each is used, build Result. Iterate over
// UniqueSubexprs so that we have a stable ordering.
for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) {
std::map<SCEVHandle, SubExprUseData>::iterator I =
SubExpressionUseData.find(UniqueSubExprs[i]);
assert(I != SubExpressionUseData.end() && "Entry not found?");
if (I->second.Count == NumUsesInsideLoop) { // Found CSE!
if (I->second.notAllUsesAreFree)
Result = SE->getAddExpr(Result, I->first);
else
FreeResult = SE->getAddExpr(FreeResult, I->first);
} else
// Remove non-cse's from SubExpressionUseData.
SubExpressionUseData.erase(I);
}
if (FreeResult != Zero) {
// We have some subexpressions that can be subsumed into addressing
// modes in every use inside the loop. However, it's possible that
// there are so many of them that the combined FreeResult cannot
// be subsumed, or that the target cannot handle both a FreeResult
// and a Result in the same instruction (for example because it would
// require too many registers). Check this.
for (unsigned i=0; i<NumUses; ++i) {
if (!L->contains(Uses[i].Inst->getParent()))
continue;
// We know this is an addressing mode use; if there are any uses that
// are not, FreeResult would be Zero.
const Type *UseTy = Uses[i].Inst->getType();
if (StoreInst *SI = dyn_cast<StoreInst>(Uses[i].Inst))
UseTy = SI->getOperand(0)->getType();
if (!fitsInAddressMode(FreeResult, UseTy, TLI, Result!=Zero)) {
// FIXME: could split up FreeResult into pieces here, some hoisted
// and some not. There is no obvious advantage to this.
Result = SE->getAddExpr(Result, FreeResult);
FreeResult = Zero;
break;
}
}
}
// If we found no CSE's, return now.
if (Result == Zero) return Result;
// If we still have a FreeResult, remove its subexpressions from
// SubExpressionUseData. This means they will remain in the use Bases.
if (FreeResult != Zero) {
SeparateSubExprs(SubExprs, FreeResult, SE);
for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
std::map<SCEVHandle, SubExprUseData>::iterator I =
SubExpressionUseData.find(SubExprs[j]);
SubExpressionUseData.erase(I);
}
SubExprs.clear();
}
// Otherwise, remove all of the CSE's we found from each of the base values.
for (unsigned i = 0; i != NumUses; ++i) {
// Uses outside the loop don't necessarily include the common base, but
// the final IV value coming into those uses does. Instead of trying to
// remove the pieces of the common base, which might not be there,
// subtract off the base to compensate for this.
if (!L->contains(Uses[i].Inst->getParent())) {
Uses[i].Base = SE->getMinusSCEV(Uses[i].Base, Result);
// Split the expression into subexprs.
SeparateSubExprs(SubExprs, Uses[i].Base, SE);
// Remove any common subexpressions.
for (unsigned j = 0, e = SubExprs.size(); j != e; ++j)
if (SubExpressionUseData.count(SubExprs[j])) {
SubExprs.erase(SubExprs.begin()+j);
--j; --e;
}
// Finally, add the non-shared expressions together.
if (SubExprs.empty())
else
Uses[i].Base = SE->getAddExpr(SubExprs);
SubExprs.clear();
}
return Result;
}
/// ValidStride - Check whether the given Scale is valid for all loads and
/// stores in UsersToProcess.
bool LoopStrengthReduce::ValidStride(bool HasBaseReg,
int64_t Scale,
const std::vector<BasedUser>& UsersToProcess) {
if (!TLI)
return true;
for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) {
// If this is a load or other access, pass the type of the access in.
const Type *AccessTy = Type::VoidTy;
if (StoreInst *SI = dyn_cast<StoreInst>(UsersToProcess[i].Inst))
AccessTy = SI->getOperand(0)->getType();
else if (LoadInst *LI = dyn_cast<LoadInst>(UsersToProcess[i].Inst))
AccessTy = LI->getType();
else if (isa<PHINode>(UsersToProcess[i].Inst))
continue;
TargetLowering::AddrMode AM;
if (SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm))
AM.BaseOffs = SC->getValue()->getSExtValue();
AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero();
AM.Scale = Scale;
// If load[imm+r*scale] is illegal, bail out.
if (!TLI->isLegalAddressingMode(AM, AccessTy))
return false;
}
return true;
}
/// RequiresTypeConversion - Returns true if converting Ty to NewTy is not
/// a nop.
bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1,
const Type *Ty2) {
if (Ty1 == Ty2)
return false;
if (TLI && TLI->isTruncateFree(Ty1, Ty2))
return false;
return (!Ty1->canLosslesslyBitCastTo(Ty2) &&
!(isa<PointerType>(Ty2) &&
Ty1->canLosslesslyBitCastTo(UIntPtrTy)) &&
!(isa<PointerType>(Ty1) &&
Ty2->canLosslesslyBitCastTo(UIntPtrTy)));
}
/// CheckForIVReuse - Returns the multiple if the stride is the multiple
/// of a previous stride and it is a legal value for the target addressing
/// mode scale component and optional base reg. This allows the users of
/// this stride to be rewritten as prev iv * factor. It returns 0 if no
/// reuse is possible. Factors can be negative on same targets, e.g. ARM.
int64_t LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
bool AllUsesAreAddresses,
bool AllUsesAreOutsideLoop,
IVExpr &IV, const Type *Ty,
const std::vector<BasedUser>& UsersToProcess) {
if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
int64_t SInt = SC->getValue()->getSExtValue();
for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e;
++NewStride) {
std::map<SCEVHandle, IVsOfOneStride>::iterator SI =
IVsByStride.find(StrideOrder[NewStride]);
if (SI == IVsByStride.end())
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SI->first != Stride &&
(unsigned(abs(SInt)) < SSInt || (SInt % SSInt) != 0))
continue;
int64_t Scale = SInt / SSInt;
// Check that this stride is valid for all the types used for loads and
// stores; if it can be used for some and not others, we might as well use
// the original stride everywhere, since we have to create the IV for it
// anyway. If the scale is 1, then we don't need to worry about folding
// multiplications.
if (Scale == 1 ||
(AllUsesAreAddresses &&
ValidStride(HasBaseReg, Scale, UsersToProcess)))
for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
IE = SI->second.IVs.end(); II != IE; ++II)
// FIXME: Only handle base == 0 for now.
// Only reuse previous IV if it would not require a type conversion.
if (II->Base->isZero() &&
!RequiresTypeConversion(II->Base->getType(), Ty)) {
}
}
}
/// PartitionByIsUseOfPostIncrementedValue - Simple boolean predicate that
/// returns true if Val's isUseOfPostIncrementedValue is true.
static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) {
return Val.isUseOfPostIncrementedValue;
}
/// isNonConstantNegative - Return true if the specified scev is negated, but
Chris Lattner
committed
/// not a constant.
static bool isNonConstantNegative(const SCEVHandle &Expr) {
SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr);
if (!Mul) return false;
// If there is a constant factor, it will be first.
SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
if (!SC) return false;
// Return true if the value is negative, this matches things like (-42 * V).
return SC->getValue()->getValue().isNegative();
}
// CollectIVUsers - Transform our list of users and offsets to a bit more
// complex table. In this new vector, each 'BasedUser' contains 'Base', the base
// of the strided accesses, as well as the old information from Uses. We
// progressively move information from the Base field to the Imm field, until
// we eventually have the full access expression to rewrite the use.
SCEVHandle LoopStrengthReduce::CollectIVUsers(const SCEVHandle &Stride,
IVUsersOfOneStride &Uses,
Loop *L,
bool &AllUsesAreAddresses,
bool &AllUsesAreOutsideLoop,
std::vector<BasedUser> &UsersToProcess) {
UsersToProcess.reserve(Uses.Users.size());
for (unsigned i = 0, e = Uses.Users.size(); i != e; ++i) {
UsersToProcess.push_back(BasedUser(Uses.Users[i], SE));
// Move any loop variant operands from the offset field to the immediate
// field of the use, so that we don't try to use something before it is
// computed.
MoveLoopVariantsToImmediateField(UsersToProcess.back().Base,
UsersToProcess.back().Imm, L, SE);
assert(UsersToProcess.back().Base->isLoopInvariant(L) &&
Chris Lattner
committed
"Base value is not loop invariant!");
// We now have a whole bunch of uses of like-strided induction variables, but
// they might all have different bases. We want to emit one PHI node for this
// stride which we fold as many common expressions (between the IVs) into as
// possible. Start by identifying the common expressions in the base values
// for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find
// "A+B"), emit it to the preheader, then remove the expression from the
// UsersToProcess base values.
SCEVHandle CommonExprs =
RemoveCommonExpressionsFromUseBases(UsersToProcess, SE, L, TLI);
// Next, figure out what we can represent in the immediate fields of
// instructions. If we can represent anything there, move it to the imm
// fields of the BasedUsers. We do this so that it increases the commonality
// of the remaining uses.
unsigned NumPHI = 0;
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
// If the user is not in the current loop, this means it is using the exit
// value of the IV. Do not put anything in the base, make sure it's all in
// the immediate field to allow as much factoring as possible.
if (!L->contains(UsersToProcess[i].Inst->getParent())) {
UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm,
UsersToProcess[i].Base);
UsersToProcess[i].Base =
SE->getIntegerSCEV(0, UsersToProcess[i].Base->getType());
} else {
// Addressing modes can be folded into loads and stores. Be careful that
// the store is through the expression, not of the expression though.
bool isPHI = false;
bool isAddress = isAddressUse(UsersToProcess[i].Inst,
UsersToProcess[i].OperandValToReplace);
if (isa<PHINode>(UsersToProcess[i].Inst)) {
isPHI = true;
++NumPHI;
// Not all uses are outside the loop.
AllUsesAreOutsideLoop = false;
// If this use isn't an address, then not all uses are addresses.
MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base,
UsersToProcess[i].Imm, isAddress, L, SE);
}
}
// If one of the use if a PHI node and all other uses are addresses, still
// allow iv reuse. Essentially we are trading one constant multiplication
// for one fewer iv.
if (NumPHI > 1)
AllUsesAreAddresses = false;
return CommonExprs;
}
/// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single
/// stride of IV. All of the users may have different starting values, and this
/// may not be the only stride (we know it is if isOnlyStride is true).
void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
IVUsersOfOneStride &Uses,
Loop *L,
bool isOnlyStride) {
// If all the users are moved to another stride, then there is nothing to do.
if (Uses.Users.empty())
return;
// Keep track if every use in UsersToProcess is an address. If they all are,
// we may be able to rewrite the entire collection of them in terms of a
// smaller-stride IV.
bool AllUsesAreAddresses = true;
// Keep track if every use of a single stride is outside the loop. If so,
// we want to be more aggressive about reusing a smaller-stride IV; a
// multiply outside the loop is better than another IV inside. Well, usually.
bool AllUsesAreOutsideLoop = true;
// Transform our list of users and offsets to a bit more complex table. In
// this new vector, each 'BasedUser' contains 'Base' the base of the
// strided accessas well as the old information from Uses. We progressively
// move information from the Base field to the Imm field, until we eventually
// have the full access expression to rewrite the use.
std::vector<BasedUser> UsersToProcess;
SCEVHandle CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses,
AllUsesAreOutsideLoop,
UsersToProcess);
// If we managed to find some expressions in common, we'll need to carry
// their value in a register and add it in for each use. This will take up
// a register operand, which potentially restricts what stride values are
// valid.
bool HaveCommonExprs = !CommonExprs->isZero();
// If all uses are addresses, check if it is possible to reuse an IV with a
// stride that is a factor of this stride. And that the multiple is a number
// that can be encoded in the scale field of the target addressing mode. And
// that we will have a valid instruction after this substition, including the
// immediate field, if any.
PHINode *NewPHI = NULL;
Value *IncV = NULL;
IVExpr ReuseIV(SE->getIntegerSCEV(0, Type::Int32Ty),
SE->getIntegerSCEV(0, Type::Int32Ty),
0, 0);
int64_t RewriteFactor = 0;
RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
AllUsesAreOutsideLoop,
Stride, ReuseIV, CommonExprs->getType(),
UsersToProcess);
if (RewriteFactor != 0) {
DOUT << "BASED ON IV of STRIDE " << *ReuseIV.Stride
<< " and BASE " << *ReuseIV.Base << " :\n";
NewPHI = ReuseIV.PHI;
IncV = ReuseIV.IncV;
}
const Type *ReplacedTy = CommonExprs->getType();
// Now that we know what we need to do, insert the PHI node itself.
//
DOUT << "INSERTING IV of TYPE " << *ReplacedTy << " of STRIDE "
<< *Stride << " and BASE " << *CommonExprs << ": ";
SCEVExpander Rewriter(*SE, *LI);
SCEVExpander PreheaderRewriter(*SE, *LI);
BasicBlock *Preheader = L->getLoopPreheader();
Instruction *PreInsertPt = Preheader->getTerminator();
Instruction *PhiInsertBefore = L->getHeader()->begin();
BasicBlock *LatchBlock = L->getLoopLatch();
// Emit the initial base value into the loop preheader.
Value *CommonBaseV
= PreheaderRewriter.expandCodeFor(CommonExprs, PreInsertPt);
if (RewriteFactor == 0) {
// Create a new Phi for this base, and stick it in the loop header.
NewPHI = PHINode::Create(ReplacedTy, "iv.", PhiInsertBefore);
++NumInserted;
// Add common base to the new Phi node.
NewPHI->addIncoming(CommonBaseV, Preheader);
Chris Lattner
committed
// If the stride is negative, insert a sub instead of an add for the
// increment.
bool isNegative = isNonConstantNegative(Stride);
SCEVHandle IncAmount = Stride;
if (isNegative)
IncAmount = SE->getNegativeSCEV(Stride);
Chris Lattner
committed
// Insert the stride into the preheader.
Value *StrideV = PreheaderRewriter.expandCodeFor(IncAmount, PreInsertPt);
if (!isa<ConstantInt>(StrideV)) ++NumVariable;
Chris Lattner
committed
// Emit the increment of the base value before the terminator of the loop
// latch block, and add it to the Phi node.
SCEVHandle IncExp = SE->getUnknown(StrideV);
Chris Lattner
committed
if (isNegative)
IncExp = SE->getNegativeSCEV(IncExp);
IncExp = SE->getAddExpr(SE->getUnknown(NewPHI), IncExp);
IncV = Rewriter.expandCodeFor(IncExp, LatchBlock->getTerminator());
IncV->setName(NewPHI->getName()+".inc");
NewPHI->addIncoming(IncV, LatchBlock);
// Remember this in case a later stride is multiple of this.
IVsByStride[Stride].addIV(Stride, CommonExprs, NewPHI, IncV);
DOUT << " IV=%" << NewPHI->getNameStr() << " INC=%" << IncV->getNameStr();
} else {
Constant *C = dyn_cast<Constant>(CommonBaseV);
if (!C ||
(!C->isNullValue() &&
!fitsInAddressMode(SE->getUnknown(CommonBaseV), ReplacedTy,
TLI, false)))
// We want the common base emitted into the preheader! This is just
// using cast as a copy so BitCast (no-op cast) is appropriate
CommonBaseV = new BitCastInst(CommonBaseV, CommonBaseV->getType(),
"commonbase", PreInsertPt);
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
// We want to emit code for users inside the loop first. To do this, we
// rearrange BasedUser so that the entries at the end have
// isUseOfPostIncrementedValue = false, because we pop off the end of the
// vector (so we handle them first).
std::partition(UsersToProcess.begin(), UsersToProcess.end(),
PartitionByIsUseOfPostIncrementedValue);
// Sort this by base, so that things with the same base are handled
// together. By partitioning first and stable-sorting later, we are
// guaranteed that within each base we will pop off users from within the
// loop before users outside of the loop with a particular base.
//
// We would like to use stable_sort here, but we can't. The problem is that
// SCEVHandle's don't have a deterministic ordering w.r.t to each other, so
// we don't have anything to do a '<' comparison on. Because we think the
// number of uses is small, do a horrible bubble sort which just relies on
// ==.
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
// Get a base value.
SCEVHandle Base = UsersToProcess[i].Base;
// Compact everything with this base to be consecutive with this one.
for (unsigned j = i+1; j != e; ++j) {
if (UsersToProcess[j].Base == Base) {
std::swap(UsersToProcess[i+1], UsersToProcess[j]);
++i;
}
}
}
// Process all the users now. This outer loop handles all bases, the inner
// loop handles all users of a particular base.
while (!UsersToProcess.empty()) {
Chris Lattner
committed
SCEVHandle Base = UsersToProcess.back().Base;
// Emit the code for Base into the preheader.
Value *BaseV = PreheaderRewriter.expandCodeFor(Base, PreInsertPt);
DOUT << " INSERTING code for BASE = " << *Base << ":";
if (BaseV->hasName())
DOUT << " Result value name = %" << BaseV->getNameStr();
DOUT << "\n";
// If BaseV is a constant other than 0, make sure that it gets inserted into
// the preheader, instead of being forward substituted into the uses. We do
// this by forcing a BitCast (noop cast) to be inserted into the preheader
// in this case.
if (!C->isNullValue() && !fitsInAddressMode(Base, ReplacedTy,
TLI, false)) {
// We want this constant emitted into the preheader! This is just
// using cast as a copy so BitCast (no-op cast) is appropriate
BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",
// Emit the code to add the immediate offset to the Phi value, just before
// the instructions that we identified as using this stride and base.
Chris Lattner
committed
do {
Chris Lattner
committed
BasedUser &User = UsersToProcess.back();
// If this instruction wants to use the post-incremented value, move it
// after the post-inc and use its value instead of the PHI.
if (User.isUseOfPostIncrementedValue) {
// If this user is in the loop, make sure it is the last thing in the
// loop to ensure it is dominated by the increment.
if (L->contains(User.Inst->getParent()))
User.Inst->moveBefore(LatchBlock->getTerminator());
}
if (RewriteOp->getType() != ReplacedTy) {
Instruction::CastOps opcode = Instruction::Trunc;
if (ReplacedTy->getPrimitiveSizeInBits() ==
RewriteOp->getType()->getPrimitiveSizeInBits())
opcode = Instruction::BitCast;
RewriteOp = SCEVExpander::InsertCastOfTo(opcode, RewriteOp, ReplacedTy);
}
SCEVHandle RewriteExpr = SE->getUnknown(RewriteOp);
// If we had to insert new instructions for RewriteOp, we have to
// consider that they may not have been able to end up immediately
// next to RewriteOp, because non-PHI instructions may never precede
// PHI instructions in a block. In this case, remember where the last
// instruction was inserted so that if we're replacing a different
// PHI node, we can use the later point to expand the final
// RewriteExpr.
Instruction *NewBasePt = dyn_cast<Instruction>(RewriteOp);
if (RewriteOp == NewPHI) NewBasePt = 0;
// Clear the SCEVExpander's expression map so that we are guaranteed
// to have the code emitted where we expect it.
Rewriter.clear();
// If we are reusing the iv, then it must be multiplied by a constant
// factor take advantage of addressing mode scale component.
if (RewriteFactor != 0) {
RewriteExpr = SE->getMulExpr(SE->getIntegerSCEV(RewriteFactor,
RewriteExpr->getType()),
RewriteExpr);
// The common base is emitted in the loop preheader. But since we
// are reusing an IV, it has not been used to initialize the PHI node.
// Add it to the expression used to rewrite the uses.
if (!isa<ConstantInt>(CommonBaseV) ||
!cast<ConstantInt>(CommonBaseV)->isZero())
RewriteExpr = SE->getAddExpr(RewriteExpr,
SE->getUnknown(CommonBaseV));
}
// Now that we know what we need to do, insert code before User for the
// immediate and any loop-variant expressions.
if (!isa<ConstantInt>(BaseV) || !cast<ConstantInt>(BaseV)->isZero())
// Add BaseV to the PHI value if needed.
RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV));
User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt,
Rewriter, L, this,
Evan Cheng
committed
DeadInsts);
// Mark old value we replaced as possibly dead, so that it is eliminated
// if we just replaced the last use of that value.
DeadInsts.push_back(cast<Instruction>(User.OperandValToReplace));
Chris Lattner
committed
UsersToProcess.pop_back();
Chris Lattner
committed
// If there are any more users to process with the same base, process them
// now. We sorted by base above, so we just have to check the last elt.
Chris Lattner
committed
} while (!UsersToProcess.empty() && UsersToProcess.back().Base == Base);
// TODO: Next, find out which base index is the most common, pull it out.
}
// IMPORTANT TODO: Figure out how to partition the IV's with this stride, but
// different starting values, into different PHIs.
}
/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
/// set the IV user and stride information and return true, otherwise return
/// false.
bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
const SCEVHandle *&CondStride) {
for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e && !CondUse;
++Stride) {
std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI =
IVUsesByStride.find(StrideOrder[Stride]);
assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
for (std::vector<IVStrideUse>::iterator UI = SI->second.Users.begin(),
E = SI->second.Users.end(); UI != E; ++UI)
if (UI->User == Cond) {
// NOTE: we could handle setcc instructions with multiple uses here, but
// InstCombine does it as well for simple uses, it's not clear that it
// occurs enough in real life to handle.
CondUse = &*UI;
CondStride = &SI->first;
return true;
}
}
return false;
}
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
namespace {
// Constant strides come first which in turns are sorted by their absolute
// values. If absolute values are the same, then positive strides comes first.
// e.g.
// 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X
struct StrideCompare {
bool operator()(const SCEVHandle &LHS, const SCEVHandle &RHS) {
SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);
SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
if (LHSC && RHSC) {
int64_t LV = LHSC->getValue()->getSExtValue();
int64_t RV = RHSC->getValue()->getSExtValue();
uint64_t ALV = (LV < 0) ? -LV : LV;
uint64_t ARV = (RV < 0) ? -RV : RV;
if (ALV == ARV)
return LV > RV;
else
return ALV < ARV;
}
return (LHSC && !RHSC);
}
};
}
/// ChangeCompareStride - If a loop termination compare instruction is the
/// only use of its stride, and the compaison is against a constant value,
/// try eliminate the stride by moving the compare instruction to another
/// stride and change its constant operand accordingly. e.g.
///
/// loop:
/// ...
/// v1 = v1 + 3
/// v2 = v2 + 1
/// if (v2 < 10) goto loop
/// =>
/// loop:
/// ...
/// v1 = v1 + 3
/// if (v1 < 30) goto loop
ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
Evan Cheng
committed
IVStrideUse* &CondUse,
const SCEVHandle* &CondStride) {
if (StrideOrder.size() < 2 ||
IVUsesByStride[*CondStride].Users.size() != 1)
return Cond;
const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride);
if (!SC) return Cond;
ConstantInt *C = dyn_cast<ConstantInt>(Cond->getOperand(1));
if (!C) return Cond;
ICmpInst::Predicate Predicate = Cond->getPredicate();
int64_t CmpSSInt = SC->getValue()->getSExtValue();
int64_t CmpVal = C->getValue().getSExtValue();
unsigned BitWidth = C->getValue().getBitWidth();
uint64_t SignBit = 1ULL << (BitWidth-1);
const Type *CmpTy = C->getType();
const Type *NewCmpTy = NULL;
unsigned TyBits = CmpTy->getPrimitiveSizeInBits();
unsigned NewTyBits = 0;
int64_t NewCmpVal = CmpVal;
SCEVHandle *NewStride = NULL;
Value *NewIncV = NULL;
int64_t Scale = 1;
// Check stride constant and the comparision constant signs to detect
// overflow.
return Cond;
// Look for a suitable stride / iv as replacement.
std::stable_sort(StrideOrder.begin(), StrideOrder.end(), StrideCompare());
for (unsigned i = 0, e = StrideOrder.size(); i != e; ++i) {
std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI =
IVUsesByStride.find(StrideOrder[i]);
if (!isa<SCEVConstant>(SI->first))
continue;
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (abs(SSInt) <= abs(CmpSSInt) || (SSInt % CmpSSInt) != 0)
continue;
Scale = SSInt / CmpSSInt;
NewCmpVal = CmpVal * Scale;
APInt Mul = APInt(BitWidth, NewCmpVal);
// Check for overflow.
if (Mul.getSExtValue() != NewCmpVal) {
NewCmpVal = CmpVal;
continue;
}
// Watch out for overflow.
if (ICmpInst::isSignedPredicate(Predicate) &&
(CmpVal & SignBit) != (NewCmpVal & SignBit))
NewCmpVal = CmpVal;
if (NewCmpVal != CmpVal) {
// Pick the best iv to use trying to avoid a cast.
NewIncV = NULL;
for (std::vector<IVStrideUse>::iterator UI = SI->second.Users.begin(),
E = SI->second.Users.end(); UI != E; ++UI) {
NewIncV = UI->OperandValToReplace;
if (NewIncV->getType() == CmpTy)
break;
}
if (!NewIncV) {
NewCmpVal = CmpVal;
continue;
}
NewCmpTy = NewIncV->getType();
NewTyBits = isa<PointerType>(NewCmpTy)
? UIntPtrTy->getPrimitiveSizeInBits()
: NewCmpTy->getPrimitiveSizeInBits();
if (RequiresTypeConversion(NewCmpTy, CmpTy)) {
// Check if it is possible to rewrite it using
// an iv / stride of a smaller integer type.
bool TruncOk = false;
if (NewCmpTy->isInteger()) {
unsigned Bits = NewTyBits;
if (ICmpInst::isSignedPredicate(Predicate))
--Bits;
uint64_t Mask = (1ULL << Bits) - 1;
if (((uint64_t)NewCmpVal & Mask) == (uint64_t)NewCmpVal)
TruncOk = true;
}
if (!TruncOk) {
NewCmpVal = CmpVal;
continue;
}
}
// Don't rewrite if use offset is non-constant and the new type is
// of a different type.
// FIXME: too conservative?
if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->Offset)) {
NewCmpVal = CmpVal;
continue;
}
bool AllUsesAreAddresses = true;
bool AllUsesAreOutsideLoop = true;
std::vector<BasedUser> UsersToProcess;
SCEVHandle CommonExprs = CollectIVUsers(SI->first, SI->second, L,
AllUsesAreAddresses,
AllUsesAreOutsideLoop,
UsersToProcess);
// Avoid rewriting the compare instruction with an iv of new stride
// if it's likely the new stride uses will be rewritten using the
if (AllUsesAreAddresses &&
ValidStride(!CommonExprs->isZero(), Scale, UsersToProcess)) {
NewCmpVal = CmpVal;
continue;
}
Evan Cheng
committed
// If scale is negative, use swapped predicate unless it's testing
// for equality.
if (Scale < 0 && !Cond->isEquality())
Evan Cheng
committed
Predicate = ICmpInst::getSwappedPredicate(Predicate);
NewStride = &StrideOrder[i];
break;
}
}
// Forgo this transformation if it the increment happens to be
// unfortunately positioned after the condition, and the condition
// has multiple uses which prevent it from being moved immediately
// before the branch. See
// test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-*.ll
// for an example of this situation.
if (!Cond->hasOneUse()) {
for (BasicBlock::iterator I = Cond, E = Cond->getParent()->end();
I != E; ++I)
if (I == NewIncV)
return Cond;
if (NewCmpVal != CmpVal) {
// Create a new compare instruction using new stride / iv.
ICmpInst *OldCond = Cond;
Value *RHS;
if (!isa<PointerType>(NewCmpTy))
RHS = ConstantInt::get(NewCmpTy, NewCmpVal);
else {
RHS = ConstantInt::get(UIntPtrTy, NewCmpVal);
RHS = SCEVExpander::InsertCastOfTo(Instruction::IntToPtr, RHS, NewCmpTy);
}
Cond = new ICmpInst(Predicate, NewIncV, RHS,
L->getHeader()->getName() + ".termcond",
OldCond);
// Remove the old compare instruction. The old indvar is probably dead too.
DeadInsts.push_back(cast<Instruction>(CondUse->OperandValToReplace));
OldCond->replaceAllUsesWith(Cond);
OldCond->eraseFromParent();
IVUsesByStride[*CondStride].Users.pop_back();
SCEVHandle NewOffset = TyBits == NewTyBits
? SE->getMulExpr(CondUse->Offset,
SE->getConstant(ConstantInt::get(CmpTy, Scale)))
: SE->getConstant(ConstantInt::get(NewCmpTy,
cast<SCEVConstant>(CondUse->Offset)->getValue()->getSExtValue()*Scale));
IVUsesByStride[*NewStride].addUser(NewOffset, Cond, NewIncV);
CondUse = &IVUsesByStride[*NewStride].Users.back();
CondStride = NewStride;
++NumEliminated;
}
return Cond;
}
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
/// OptimizeSMax - Rewrite the loop's terminating condition if it uses
/// an smax computation.
///
/// This is a narrow solution to a specific, but acute, problem. For loops
/// like this:
///
/// i = 0;
/// do {
/// p[i] = 0.0;
/// } while (++i < n);
///
/// where the comparison is signed, the trip count isn't just 'n', because
/// 'n' could be negative. And unfortunately this can come up even for loops
/// where the user didn't use a C do-while loop. For example, seemingly
/// well-behaved top-test loops will commonly be lowered like this:
//
/// if (n > 0) {
/// i = 0;
/// do {
/// p[i] = 0.0;
/// } while (++i < n);
/// }
///
/// and then it's possible for subsequent optimization to obscure the if
/// test in such a way that indvars can't find it.
///
/// When indvars can't find the if test in loops like this, it creates a
/// signed-max expression, which allows it to give the loop a canonical
/// induction variable:
///
/// i = 0;
/// smax = n < 1 ? 1 : n;
/// do {
/// p[i] = 0.0;
/// } while (++i != smax);
///
/// Canonical induction variables are necessary because the loop passes
/// are designed around them. The most obvious example of this is the
/// LoopInfo analysis, which doesn't remember trip count values. It
/// expects to be able to rediscover the trip count each time it is
/// needed, and it does this using a simple analyis that only succeeds if
/// the loop has a canonical induction variable.
///
/// However, when it comes time to generate code, the maximum operation
/// can be quite costly, especially if it's inside of an outer loop.
///
/// This function solves this problem by detecting this type of loop and
/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
/// the instructions for the maximum computation.
///
ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond,
IVStrideUse* &CondUse) {
// Check that the loop matches the pattern we're looking for.
if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
Cond->getPredicate() != CmpInst::ICMP_NE)
return Cond;
SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
if (!Sel || !Sel->hasOneUse()) return Cond;
SCEVHandle IterationCount = SE->getIterationCount(L);
if (isa<SCEVCouldNotCompute>(IterationCount))
return Cond;
SCEVHandle One = SE->getIntegerSCEV(1, IterationCount->getType());
// Adjust for an annoying getIterationCount quirk.
IterationCount = SE->getAddExpr(IterationCount, One);
// Check for a max calculation that matches the pattern.
SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(IterationCount);
if (!SMax || SMax != SE->getSCEV(Sel)) return Cond;
SCEVHandle SMaxLHS = SMax->getOperand(0);
SCEVHandle SMaxRHS = SMax->getOperand(1);
if (!SMaxLHS || SMaxLHS != One) return Cond;
// Check the relevant induction variable for conformance to
// the pattern.
SCEVHandle IV = SE->getSCEV(Cond->getOperand(0));
SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
if (!AR || !AR->isAffine() ||
AR->getStart() != One ||
AR->getStepRecurrence(*SE) != One)
return Cond;
// Check the right operand of the select, and remember it, as it will
// be used in the new comparison instruction.
Value *NewRHS = 0;
if (SE->getSCEV(Sel->getOperand(1)) == SMaxRHS)
NewRHS = Sel->getOperand(1);
else if (SE->getSCEV(Sel->getOperand(2)) == SMaxRHS)
NewRHS = Sel->getOperand(2);
if (!NewRHS) return Cond;
// Ok, everything looks ok to change the condition into an SLT or SGE and
// delete the max calculation.
ICmpInst *NewCond =
new ICmpInst(Cond->getPredicate() == CmpInst::ICMP_NE ?
CmpInst::ICMP_SLT :
CmpInst::ICMP_SGE,
Cond->getOperand(0), NewRHS, "scmp", Cond);
// Delete the max calculation instructions.
SE->deleteValueFromRecords(Cond);
Cond->replaceAllUsesWith(NewCond);
Cond->eraseFromParent();
Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
SE->deleteValueFromRecords(Sel);
Sel->eraseFromParent();
if (Cmp->use_empty()) {
SE->deleteValueFromRecords(Cmp);
Cmp->eraseFromParent();
}
CondUse->User = NewCond;
return NewCond;
}
Devang Patel
committed
/// OptimizeShadowIV - If IV is used in a int-to-float cast
/// inside the loop then try to eliminate the cast opeation.
void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
SCEVHandle IterationCount = SE->getIterationCount(L);
if (isa<SCEVCouldNotCompute>(IterationCount))
return;
for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e;
++Stride) {
std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI =
IVUsesByStride.find(StrideOrder[Stride]);
assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
if (!isa<SCEVConstant>(SI->first))
continue;
for (std::vector<IVStrideUse>::iterator UI = SI->second.Users.begin(),
E = SI->second.Users.end(); UI != E; /* empty */) {
std::vector<IVStrideUse>::iterator CandidateUI = UI;
++UI;
Devang Patel
committed
Instruction *ShadowUse = CandidateUI->User;
const Type *DestTy = NULL;
/* If shadow use is a int->float cast then insert a second IV
to eliminate this cast.
Devang Patel
committed
for (unsigned i = 0; i < n; ++i)
foo((double)i);
is transformed into
Devang Patel
committed
double d = 0.0;
for (unsigned i = 0; i < n; ++i, ++d)
foo(d);
*/
if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->User))
Devang Patel
committed
DestTy = UCast->getDestTy();
else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->User))
Devang Patel
committed
DestTy = SCast->getDestTy();
Devang Patel
committed
if (!DestTy) continue;
if (TLI) {
/* If target does not support DestTy natively then do not apply
this transformation. */
MVT DVT = TLI->getValueType(DestTy);
if (!TLI->isTypeLegal(DVT)) continue;
}
Devang Patel
committed
PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
if (!PH) continue;
if (PH->getNumIncomingValues() != 2) continue;
const Type *SrcTy = PH->getType();
int Mantissa = DestTy->getFPMantissaWidth();
if (Mantissa == -1) continue;
if ((int)TD->getTypeSizeInBits(SrcTy) > Mantissa)
continue;
unsigned Entry, Latch;
if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
Entry = 0;
Latch = 1;
} else {
Entry = 1;
Latch = 0;
}