Newer
Older
// Split the expression into subexprs.
SeparateSubExprs(SubExprs, Uses[i].Base, SE);
// Remove any common subexpressions.
for (unsigned j = 0, e = SubExprs.size(); j != e; ++j)
if (SubExpressionUseCounts.count(SubExprs[j])) {
SubExprs.erase(SubExprs.begin()+j);
--j; --e;
}
// Finally, add the non-shared expressions together.
if (SubExprs.empty())
else
Uses[i].Base = SE->getAddExpr(SubExprs);
SubExprs.clear();
}
return Result;
}
/// ValidStride - Check whether the given Scale is valid for all loads and
/// stores in UsersToProcess.
bool LoopStrengthReduce::ValidStride(bool HasBaseReg,
int64_t Scale,
const std::vector<BasedUser>& UsersToProcess) {
if (!TLI)
return true;
for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) {
// If this is a load or other access, pass the type of the access in.
const Type *AccessTy = Type::VoidTy;
if (StoreInst *SI = dyn_cast<StoreInst>(UsersToProcess[i].Inst))
AccessTy = SI->getOperand(0)->getType();
else if (LoadInst *LI = dyn_cast<LoadInst>(UsersToProcess[i].Inst))
AccessTy = LI->getType();
else if (isa<PHINode>(UsersToProcess[i].Inst))
continue;
TargetLowering::AddrMode AM;
if (SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm))
AM.BaseOffs = SC->getValue()->getSExtValue();
AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero();
AM.Scale = Scale;
// If load[imm+r*scale] is illegal, bail out.
if (!TLI->isLegalAddressingMode(AM, AccessTy))
return false;
}
return true;
}
/// RequiresTypeConversion - Returns true if converting Ty to NewTy is not
/// a nop.
bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1,
const Type *Ty2) {
if (Ty1 == Ty2)
return false;
if (TLI && TLI->isTruncateFree(Ty1, Ty2))
return false;
return (!Ty1->canLosslesslyBitCastTo(Ty2) &&
!(isa<PointerType>(Ty2) &&
Ty1->canLosslesslyBitCastTo(UIntPtrTy)) &&
!(isa<PointerType>(Ty1) &&
Ty2->canLosslesslyBitCastTo(UIntPtrTy)));
}
/// CheckForIVReuse - Returns the multiple if the stride is the multiple
/// of a previous stride and it is a legal value for the target addressing
/// mode scale component and optional base reg. This allows the users of
/// this stride to be rewritten as prev iv * factor. It returns 0 if no
/// reuse is possible.
unsigned LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
bool AllUsesAreAddresses,
IVExpr &IV, const Type *Ty,
const std::vector<BasedUser>& UsersToProcess) {
if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
int64_t SInt = SC->getValue()->getSExtValue();
for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e;
++NewStride) {
std::map<SCEVHandle, IVsOfOneStride>::iterator SI =
IVsByStride.find(StrideOrder[NewStride]);
if (SI == IVsByStride.end())
continue;
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SI->first != Stride &&
(unsigned(abs(SInt)) < SSInt || (SInt % SSInt) != 0))
continue;
int64_t Scale = SInt / SSInt;
// Check that this stride is valid for all the types used for loads and
// stores; if it can be used for some and not others, we might as well use
// the original stride everywhere, since we have to create the IV for it
// anyway. If the scale is 1, then we don't need to worry about folding
// multiplications.
if (Scale == 1 ||
(AllUsesAreAddresses &&
ValidStride(HasBaseReg, Scale, UsersToProcess)))
for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
IE = SI->second.IVs.end(); II != IE; ++II)
// FIXME: Only handle base == 0 for now.
// Only reuse previous IV if it would not require a type conversion.
if (II->Base->isZero() &&
!RequiresTypeConversion(II->Base->getType(), Ty)) {
IV = *II;
return Scale;
}
}
}
}
/// PartitionByIsUseOfPostIncrementedValue - Simple boolean predicate that
/// returns true if Val's isUseOfPostIncrementedValue is true.
static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) {
return Val.isUseOfPostIncrementedValue;
}
/// isNonConstantNegative - Return true if the specified scev is negated, but
Chris Lattner
committed
/// not a constant.
static bool isNonConstantNegative(const SCEVHandle &Expr) {
SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr);
if (!Mul) return false;
// If there is a constant factor, it will be first.
SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
if (!SC) return false;
// Return true if the value is negative, this matches things like (-42 * V).
return SC->getValue()->getValue().isNegative();
}
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
/// isAddress - Returns true if the specified instruction is using the
/// specified value as an address.
static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
bool isAddress = isa<LoadInst>(Inst);
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (SI->getOperand(1) == OperandVal)
isAddress = true;
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
// Addressing modes can also be folded into prefetches and a variety
// of intrinsics.
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::prefetch:
case Intrinsic::x86_sse2_loadu_dq:
case Intrinsic::x86_sse2_loadu_pd:
case Intrinsic::x86_sse_loadu_ps:
case Intrinsic::x86_sse_storeu_ps:
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
case Intrinsic::x86_sse2_storel_dq:
if (II->getOperand(1) == OperandVal)
isAddress = true;
break;
}
}
return isAddress;
}
// CollectIVUsers - Transform our list of users and offsets to a bit more
// complex table. In this new vector, each 'BasedUser' contains 'Base', the base
// of the strided accesses, as well as the old information from Uses. We
// progressively move information from the Base field to the Imm field, until
// we eventually have the full access expression to rewrite the use.
SCEVHandle LoopStrengthReduce::CollectIVUsers(const SCEVHandle &Stride,
IVUsersOfOneStride &Uses,
Loop *L,
bool &AllUsesAreAddresses,
std::vector<BasedUser> &UsersToProcess) {
UsersToProcess.reserve(Uses.Users.size());
for (unsigned i = 0, e = Uses.Users.size(); i != e; ++i) {
UsersToProcess.push_back(BasedUser(Uses.Users[i], SE));
// Move any loop variant operands from the offset field to the immediate
// field of the use, so that we don't try to use something before it is
// computed.
MoveLoopVariantsToImmediateField(UsersToProcess.back().Base,
UsersToProcess.back().Imm, L, SE);
assert(UsersToProcess.back().Base->isLoopInvariant(L) &&
Chris Lattner
committed
"Base value is not loop invariant!");
// We now have a whole bunch of uses of like-strided induction variables, but
// they might all have different bases. We want to emit one PHI node for this
// stride which we fold as many common expressions (between the IVs) into as
// possible. Start by identifying the common expressions in the base values
// for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find
// "A+B"), emit it to the preheader, then remove the expression from the
// UsersToProcess base values.
SCEVHandle CommonExprs =
RemoveCommonExpressionsFromUseBases(UsersToProcess, SE, L);
// Next, figure out what we can represent in the immediate fields of
// instructions. If we can represent anything there, move it to the imm
// fields of the BasedUsers. We do this so that it increases the commonality
// of the remaining uses.
unsigned NumPHI = 0;
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
// If the user is not in the current loop, this means it is using the exit
// value of the IV. Do not put anything in the base, make sure it's all in
// the immediate field to allow as much factoring as possible.
if (!L->contains(UsersToProcess[i].Inst->getParent())) {
UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm,
UsersToProcess[i].Base);
UsersToProcess[i].Base =
SE->getIntegerSCEV(0, UsersToProcess[i].Base->getType());
} else {
// Addressing modes can be folded into loads and stores. Be careful that
// the store is through the expression, not of the expression though.
bool isPHI = false;
bool isAddress = isAddressUse(UsersToProcess[i].Inst,
UsersToProcess[i].OperandValToReplace);
if (isa<PHINode>(UsersToProcess[i].Inst)) {
isPHI = true;
++NumPHI;
// If this use isn't an address, then not all uses are addresses.
MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base,
UsersToProcess[i].Imm, isAddress, L, SE);
}
}
// If one of the use if a PHI node and all other uses are addresses, still
// allow iv reuse. Essentially we are trading one constant multiplication
// for one fewer iv.
if (NumPHI > 1)
AllUsesAreAddresses = false;
return CommonExprs;
}
/// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single
/// stride of IV. All of the users may have different starting values, and this
/// may not be the only stride (we know it is if isOnlyStride is true).
void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
IVUsersOfOneStride &Uses,
Loop *L,
bool isOnlyStride) {
// If all the users are moved to another stride, then there is nothing to do.
if (Uses.Users.empty())
return;
// Keep track if every use in UsersToProcess is an address. If they all are,
// we may be able to rewrite the entire collection of them in terms of a
// smaller-stride IV.
bool AllUsesAreAddresses = true;
// Transform our list of users and offsets to a bit more complex table. In
// this new vector, each 'BasedUser' contains 'Base' the base of the
// strided accessas well as the old information from Uses. We progressively
// move information from the Base field to the Imm field, until we eventually
// have the full access expression to rewrite the use.
std::vector<BasedUser> UsersToProcess;
SCEVHandle CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses,
UsersToProcess);
// If we managed to find some expressions in common, we'll need to carry
// their value in a register and add it in for each use. This will take up
// a register operand, which potentially restricts what stride values are
// valid.
bool HaveCommonExprs = !CommonExprs->isZero();
// If all uses are addresses, check if it is possible to reuse an IV with a
// stride that is a factor of this stride. And that the multiple is a number
// that can be encoded in the scale field of the target addressing mode. And
// that we will have a valid instruction after this substition, including the
// immediate field, if any.
PHINode *NewPHI = NULL;
Value *IncV = NULL;
IVExpr ReuseIV(SE->getIntegerSCEV(0, Type::Int32Ty),
SE->getIntegerSCEV(0, Type::Int32Ty),
0, 0);
RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
Stride, ReuseIV, CommonExprs->getType(),
UsersToProcess);
if (RewriteFactor != 0) {
DOUT << "BASED ON IV of STRIDE " << *ReuseIV.Stride
<< " and BASE " << *ReuseIV.Base << " :\n";
NewPHI = ReuseIV.PHI;
IncV = ReuseIV.IncV;
}
const Type *ReplacedTy = CommonExprs->getType();
// Now that we know what we need to do, insert the PHI node itself.
//
DOUT << "INSERTING IV of TYPE " << *ReplacedTy << " of STRIDE "
<< *Stride << " and BASE " << *CommonExprs << ": ";
SCEVExpander Rewriter(*SE, *LI);
SCEVExpander PreheaderRewriter(*SE, *LI);
BasicBlock *Preheader = L->getLoopPreheader();
Instruction *PreInsertPt = Preheader->getTerminator();
Instruction *PhiInsertBefore = L->getHeader()->begin();
BasicBlock *LatchBlock = L->getLoopLatch();
// Emit the initial base value into the loop preheader.
Value *CommonBaseV
= PreheaderRewriter.expandCodeFor(CommonExprs, PreInsertPt);
// Create a new Phi for this base, and stick it in the loop header.
NewPHI = PHINode::Create(ReplacedTy, "iv.", PhiInsertBefore);
++NumInserted;
// Add common base to the new Phi node.
NewPHI->addIncoming(CommonBaseV, Preheader);
Chris Lattner
committed
// If the stride is negative, insert a sub instead of an add for the
// increment.
bool isNegative = isNonConstantNegative(Stride);
SCEVHandle IncAmount = Stride;
if (isNegative)
IncAmount = SE->getNegativeSCEV(Stride);
Chris Lattner
committed
// Insert the stride into the preheader.
Value *StrideV = PreheaderRewriter.expandCodeFor(IncAmount, PreInsertPt);
if (!isa<ConstantInt>(StrideV)) ++NumVariable;
Chris Lattner
committed
// Emit the increment of the base value before the terminator of the loop
// latch block, and add it to the Phi node.
SCEVHandle IncExp = SE->getUnknown(StrideV);
Chris Lattner
committed
if (isNegative)
IncExp = SE->getNegativeSCEV(IncExp);
IncExp = SE->getAddExpr(SE->getUnknown(NewPHI), IncExp);
IncV = Rewriter.expandCodeFor(IncExp, LatchBlock->getTerminator());
IncV->setName(NewPHI->getName()+".inc");
NewPHI->addIncoming(IncV, LatchBlock);
// Remember this in case a later stride is multiple of this.
IVsByStride[Stride].addIV(Stride, CommonExprs, NewPHI, IncV);
DOUT << " IV=%" << NewPHI->getNameStr() << " INC=%" << IncV->getNameStr();
} else {
Constant *C = dyn_cast<Constant>(CommonBaseV);
if (!C ||
(!C->isNullValue() &&
!isTargetConstant(SE->getUnknown(CommonBaseV), ReplacedTy, TLI)))
// We want the common base emitted into the preheader! This is just
// using cast as a copy so BitCast (no-op cast) is appropriate
CommonBaseV = new BitCastInst(CommonBaseV, CommonBaseV->getType(),
"commonbase", PreInsertPt);
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
// We want to emit code for users inside the loop first. To do this, we
// rearrange BasedUser so that the entries at the end have
// isUseOfPostIncrementedValue = false, because we pop off the end of the
// vector (so we handle them first).
std::partition(UsersToProcess.begin(), UsersToProcess.end(),
PartitionByIsUseOfPostIncrementedValue);
// Sort this by base, so that things with the same base are handled
// together. By partitioning first and stable-sorting later, we are
// guaranteed that within each base we will pop off users from within the
// loop before users outside of the loop with a particular base.
//
// We would like to use stable_sort here, but we can't. The problem is that
// SCEVHandle's don't have a deterministic ordering w.r.t to each other, so
// we don't have anything to do a '<' comparison on. Because we think the
// number of uses is small, do a horrible bubble sort which just relies on
// ==.
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
// Get a base value.
SCEVHandle Base = UsersToProcess[i].Base;
// Compact everything with this base to be consequtive with this one.
for (unsigned j = i+1; j != e; ++j) {
if (UsersToProcess[j].Base == Base) {
std::swap(UsersToProcess[i+1], UsersToProcess[j]);
++i;
}
}
}
// Process all the users now. This outer loop handles all bases, the inner
// loop handles all users of a particular base.
while (!UsersToProcess.empty()) {
Chris Lattner
committed
SCEVHandle Base = UsersToProcess.back().Base;
// Emit the code for Base into the preheader.
Value *BaseV = PreheaderRewriter.expandCodeFor(Base, PreInsertPt);
DOUT << " INSERTING code for BASE = " << *Base << ":";
if (BaseV->hasName())
DOUT << " Result value name = %" << BaseV->getNameStr();
DOUT << "\n";
// If BaseV is a constant other than 0, make sure that it gets inserted into
// the preheader, instead of being forward substituted into the uses. We do
// this by forcing a BitCast (noop cast) to be inserted into the preheader
// in this case.
if (!C->isNullValue() && !isTargetConstant(Base, ReplacedTy, TLI)) {
// We want this constant emitted into the preheader! This is just
// using cast as a copy so BitCast (no-op cast) is appropriate
BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",
// Emit the code to add the immediate offset to the Phi value, just before
// the instructions that we identified as using this stride and base.
Chris Lattner
committed
do {
Chris Lattner
committed
BasedUser &User = UsersToProcess.back();
// If this instruction wants to use the post-incremented value, move it
// after the post-inc and use its value instead of the PHI.
if (User.isUseOfPostIncrementedValue) {
// If this user is in the loop, make sure it is the last thing in the
// loop to ensure it is dominated by the increment.
if (L->contains(User.Inst->getParent()))
User.Inst->moveBefore(LatchBlock->getTerminator());
}
if (RewriteOp->getType() != ReplacedTy) {
Instruction::CastOps opcode = Instruction::Trunc;
if (ReplacedTy->getPrimitiveSizeInBits() ==
RewriteOp->getType()->getPrimitiveSizeInBits())
opcode = Instruction::BitCast;
RewriteOp = SCEVExpander::InsertCastOfTo(opcode, RewriteOp, ReplacedTy);
}
SCEVHandle RewriteExpr = SE->getUnknown(RewriteOp);
// If we had to insert new instrutions for RewriteOp, we have to
// consider that they may not have been able to end up immediately
// next to RewriteOp, because non-PHI instructions may never precede
// PHI instructions in a block. In this case, remember where the last
// instruction was inserted so that if we're replacing a different
// PHI node, we can use the later point to expand the final
// RewriteExpr.
Instruction *NewBasePt = dyn_cast<Instruction>(RewriteOp);
if (RewriteOp == NewPHI) NewBasePt = 0;
// Clear the SCEVExpander's expression map so that we are guaranteed
// to have the code emitted where we expect it.
Rewriter.clear();
// If we are reusing the iv, then it must be multiplied by a constant
// factor take advantage of addressing mode scale component.
RewriteExpr = SE->getMulExpr(SE->getIntegerSCEV(RewriteFactor,
RewriteExpr->getType()),
RewriteExpr);
// The common base is emitted in the loop preheader. But since we
// are reusing an IV, it has not been used to initialize the PHI node.
// Add it to the expression used to rewrite the uses.
if (!isa<ConstantInt>(CommonBaseV) ||
!cast<ConstantInt>(CommonBaseV)->isZero())
RewriteExpr = SE->getAddExpr(RewriteExpr,
SE->getUnknown(CommonBaseV));
}
// Now that we know what we need to do, insert code before User for the
// immediate and any loop-variant expressions.
if (!isa<ConstantInt>(BaseV) || !cast<ConstantInt>(BaseV)->isZero())
// Add BaseV to the PHI value if needed.
RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV));
User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt,
Rewriter, L, this,
Evan Cheng
committed
DeadInsts);
// Mark old value we replaced as possibly dead, so that it is eliminated
// if we just replaced the last use of that value.
DeadInsts.push_back(cast<Instruction>(User.OperandValToReplace));
Chris Lattner
committed
UsersToProcess.pop_back();
Chris Lattner
committed
// If there are any more users to process with the same base, process them
// now. We sorted by base above, so we just have to check the last elt.
Chris Lattner
committed
} while (!UsersToProcess.empty() && UsersToProcess.back().Base == Base);
// TODO: Next, find out which base index is the most common, pull it out.
}
// IMPORTANT TODO: Figure out how to partition the IV's with this stride, but
// different starting values, into different PHIs.
}
/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
/// set the IV user and stride information and return true, otherwise return
/// false.
bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
const SCEVHandle *&CondStride) {
for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e && !CondUse;
++Stride) {
std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI =
IVUsesByStride.find(StrideOrder[Stride]);
assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
for (std::vector<IVStrideUse>::iterator UI = SI->second.Users.begin(),
E = SI->second.Users.end(); UI != E; ++UI)
if (UI->User == Cond) {
// NOTE: we could handle setcc instructions with multiple uses here, but
// InstCombine does it as well for simple uses, it's not clear that it
// occurs enough in real life to handle.
CondUse = &*UI;
CondStride = &SI->first;
return true;
}
}
return false;
}
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
namespace {
// Constant strides come first which in turns are sorted by their absolute
// values. If absolute values are the same, then positive strides comes first.
// e.g.
// 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X
struct StrideCompare {
bool operator()(const SCEVHandle &LHS, const SCEVHandle &RHS) {
SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);
SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
if (LHSC && RHSC) {
int64_t LV = LHSC->getValue()->getSExtValue();
int64_t RV = RHSC->getValue()->getSExtValue();
uint64_t ALV = (LV < 0) ? -LV : LV;
uint64_t ARV = (RV < 0) ? -RV : RV;
if (ALV == ARV)
return LV > RV;
else
return ALV < ARV;
}
return (LHSC && !RHSC);
}
};
}
/// ChangeCompareStride - If a loop termination compare instruction is the
/// only use of its stride, and the compaison is against a constant value,
/// try eliminate the stride by moving the compare instruction to another
/// stride and change its constant operand accordingly. e.g.
///
/// loop:
/// ...
/// v1 = v1 + 3
/// v2 = v2 + 1
/// if (v2 < 10) goto loop
/// =>
/// loop:
/// ...
/// v1 = v1 + 3
/// if (v1 < 30) goto loop
ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
Evan Cheng
committed
IVStrideUse* &CondUse,
const SCEVHandle* &CondStride) {
if (StrideOrder.size() < 2 ||
IVUsesByStride[*CondStride].Users.size() != 1)
return Cond;
const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride);
if (!SC) return Cond;
ConstantInt *C = dyn_cast<ConstantInt>(Cond->getOperand(1));
if (!C) return Cond;
ICmpInst::Predicate Predicate = Cond->getPredicate();
int64_t CmpSSInt = SC->getValue()->getSExtValue();
int64_t CmpVal = C->getValue().getSExtValue();
unsigned BitWidth = C->getValue().getBitWidth();
uint64_t SignBit = 1ULL << (BitWidth-1);
const Type *CmpTy = C->getType();
const Type *NewCmpTy = NULL;
unsigned TyBits = CmpTy->getPrimitiveSizeInBits();
unsigned NewTyBits = 0;
int64_t NewCmpVal = CmpVal;
SCEVHandle *NewStride = NULL;
Value *NewIncV = NULL;
int64_t Scale = 1;
// Check stride constant and the comparision constant signs to detect
// overflow.
return Cond;
// Look for a suitable stride / iv as replacement.
std::stable_sort(StrideOrder.begin(), StrideOrder.end(), StrideCompare());
for (unsigned i = 0, e = StrideOrder.size(); i != e; ++i) {
std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI =
IVUsesByStride.find(StrideOrder[i]);
if (!isa<SCEVConstant>(SI->first))
continue;
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (abs(SSInt) <= abs(CmpSSInt) || (SSInt % CmpSSInt) != 0)
continue;
Scale = SSInt / CmpSSInt;
NewCmpVal = CmpVal * Scale;
APInt Mul = APInt(BitWidth, NewCmpVal);
// Check for overflow.
if (Mul.getSExtValue() != NewCmpVal) {
NewCmpVal = CmpVal;
continue;
}
// Watch out for overflow.
if (ICmpInst::isSignedPredicate(Predicate) &&
(CmpVal & SignBit) != (NewCmpVal & SignBit))
NewCmpVal = CmpVal;
if (NewCmpVal != CmpVal) {
// Pick the best iv to use trying to avoid a cast.
NewIncV = NULL;
for (std::vector<IVStrideUse>::iterator UI = SI->second.Users.begin(),
E = SI->second.Users.end(); UI != E; ++UI) {
NewIncV = UI->OperandValToReplace;
if (NewIncV->getType() == CmpTy)
break;
}
if (!NewIncV) {
NewCmpVal = CmpVal;
continue;
}
NewCmpTy = NewIncV->getType();
NewTyBits = isa<PointerType>(NewCmpTy)
? UIntPtrTy->getPrimitiveSizeInBits()
: NewCmpTy->getPrimitiveSizeInBits();
if (RequiresTypeConversion(NewCmpTy, CmpTy)) {
// Check if it is possible to rewrite it using
// an iv / stride of a smaller integer type.
bool TruncOk = false;
if (NewCmpTy->isInteger()) {
unsigned Bits = NewTyBits;
if (ICmpInst::isSignedPredicate(Predicate))
--Bits;
uint64_t Mask = (1ULL << Bits) - 1;
if (((uint64_t)NewCmpVal & Mask) == (uint64_t)NewCmpVal)
TruncOk = true;
}
if (!TruncOk) {
NewCmpVal = CmpVal;
continue;
}
}
// Don't rewrite if use offset is non-constant and the new type is
// of a different type.
// FIXME: too conservative?
if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->Offset)) {
NewCmpVal = CmpVal;
continue;
}
bool AllUsesAreAddresses = true;
std::vector<BasedUser> UsersToProcess;
SCEVHandle CommonExprs = CollectIVUsers(SI->first, SI->second, L,
AllUsesAreAddresses,
UsersToProcess);
// Avoid rewriting the compare instruction with an iv of new stride
// if it's likely the new stride uses will be rewritten using the
if (AllUsesAreAddresses &&
ValidStride(!CommonExprs->isZero(), Scale, UsersToProcess)) {
NewCmpVal = CmpVal;
continue;
}
Evan Cheng
committed
// If scale is negative, use swapped predicate unless it's testing
// for equality.
if (Scale < 0 && !Cond->isEquality())
Evan Cheng
committed
Predicate = ICmpInst::getSwappedPredicate(Predicate);
NewStride = &StrideOrder[i];
break;
}
}
// Forgo this transformation if it the increment happens to be
// unfortunately positioned after the condition, and the condition
// has multiple uses which prevent it from being moved immediately
// before the branch. See
// test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-*.ll
// for an example of this situation.
if (!Cond->hasOneUse()) {
for (BasicBlock::iterator I = Cond, E = Cond->getParent()->end();
I != E; ++I)
if (I == NewIncV)
return Cond;
if (NewCmpVal != CmpVal) {
// Create a new compare instruction using new stride / iv.
ICmpInst *OldCond = Cond;
Value *RHS;
if (!isa<PointerType>(NewCmpTy))
RHS = ConstantInt::get(NewCmpTy, NewCmpVal);
else {
RHS = ConstantInt::get(UIntPtrTy, NewCmpVal);
RHS = SCEVExpander::InsertCastOfTo(Instruction::IntToPtr, RHS, NewCmpTy);
}
Cond = new ICmpInst(Predicate, NewIncV, RHS,
L->getHeader()->getName() + ".termcond",
OldCond);
// Remove the old compare instruction. The old indvar is probably dead too.
DeadInsts.push_back(cast<Instruction>(CondUse->OperandValToReplace));
OldCond->replaceAllUsesWith(Cond);
OldCond->eraseFromParent();
IVUsesByStride[*CondStride].Users.pop_back();
SCEVHandle NewOffset = TyBits == NewTyBits
? SE->getMulExpr(CondUse->Offset,
SE->getConstant(ConstantInt::get(CmpTy, Scale)))
: SE->getConstant(ConstantInt::get(NewCmpTy,
cast<SCEVConstant>(CondUse->Offset)->getValue()->getSExtValue()*Scale));
IVUsesByStride[*NewStride].addUser(NewOffset, Cond, NewIncV);
CondUse = &IVUsesByStride[*NewStride].Users.back();
CondStride = NewStride;
++NumEliminated;
}
return Cond;
}
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
/// OptimizeSMax - Rewrite the loop's terminating condition if it uses
/// an smax computation.
///
/// This is a narrow solution to a specific, but acute, problem. For loops
/// like this:
///
/// i = 0;
/// do {
/// p[i] = 0.0;
/// } while (++i < n);
///
/// where the comparison is signed, the trip count isn't just 'n', because
/// 'n' could be negative. And unfortunately this can come up even for loops
/// where the user didn't use a C do-while loop. For example, seemingly
/// well-behaved top-test loops will commonly be lowered like this:
//
/// if (n > 0) {
/// i = 0;
/// do {
/// p[i] = 0.0;
/// } while (++i < n);
/// }
///
/// and then it's possible for subsequent optimization to obscure the if
/// test in such a way that indvars can't find it.
///
/// When indvars can't find the if test in loops like this, it creates a
/// signed-max expression, which allows it to give the loop a canonical
/// induction variable:
///
/// i = 0;
/// smax = n < 1 ? 1 : n;
/// do {
/// p[i] = 0.0;
/// } while (++i != smax);
///
/// Canonical induction variables are necessary because the loop passes
/// are designed around them. The most obvious example of this is the
/// LoopInfo analysis, which doesn't remember trip count values. It
/// expects to be able to rediscover the trip count each time it is
/// needed, and it does this using a simple analyis that only succeeds if
/// the loop has a canonical induction variable.
///
/// However, when it comes time to generate code, the maximum operation
/// can be quite costly, especially if it's inside of an outer loop.
///
/// This function solves this problem by detecting this type of loop and
/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
/// the instructions for the maximum computation.
///
ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond,
IVStrideUse* &CondUse) {
// Check that the loop matches the pattern we're looking for.
if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
Cond->getPredicate() != CmpInst::ICMP_NE)
return Cond;
SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
if (!Sel || !Sel->hasOneUse()) return Cond;
SCEVHandle IterationCount = SE->getIterationCount(L);
if (isa<SCEVCouldNotCompute>(IterationCount))
return Cond;
SCEVHandle One = SE->getIntegerSCEV(1, IterationCount->getType());
// Adjust for an annoying getIterationCount quirk.
IterationCount = SE->getAddExpr(IterationCount, One);
// Check for a max calculation that matches the pattern.
SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(IterationCount);
if (!SMax || SMax != SE->getSCEV(Sel)) return Cond;
SCEVHandle SMaxLHS = SMax->getOperand(0);
SCEVHandle SMaxRHS = SMax->getOperand(1);
if (!SMaxLHS || SMaxLHS != One) return Cond;
// Check the relevant induction variable for conformance to
// the pattern.
SCEVHandle IV = SE->getSCEV(Cond->getOperand(0));
SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
if (!AR || !AR->isAffine() ||
AR->getStart() != One ||
AR->getStepRecurrence(*SE) != One)
return Cond;
// Check the right operand of the select, and remember it, as it will
// be used in the new comparison instruction.
Value *NewRHS = 0;
if (SE->getSCEV(Sel->getOperand(1)) == SMaxRHS)
NewRHS = Sel->getOperand(1);
else if (SE->getSCEV(Sel->getOperand(2)) == SMaxRHS)
NewRHS = Sel->getOperand(2);
if (!NewRHS) return Cond;
// Ok, everything looks ok to change the condition into an SLT or SGE and
// delete the max calculation.
ICmpInst *NewCond =
new ICmpInst(Cond->getPredicate() == CmpInst::ICMP_NE ?
CmpInst::ICMP_SLT :
CmpInst::ICMP_SGE,
Cond->getOperand(0), NewRHS, "scmp", Cond);
// Delete the max calculation instructions.
SE->deleteValueFromRecords(Cond);
Cond->replaceAllUsesWith(NewCond);
Cond->eraseFromParent();
Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
SE->deleteValueFromRecords(Sel);
Sel->eraseFromParent();
if (Cmp->use_empty()) {
SE->deleteValueFromRecords(Cmp);
Cmp->eraseFromParent();
}
CondUse->User = NewCond;
return NewCond;
}
Devang Patel
committed
/// OptimizeShadowIV - If IV is used in a int-to-float cast
/// inside the loop then try to eliminate the cast opeation.
void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
SCEVHandle IterationCount = SE->getIterationCount(L);
if (isa<SCEVCouldNotCompute>(IterationCount))
return;
for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e;
++Stride) {
std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI =
IVUsesByStride.find(StrideOrder[Stride]);
assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
if (!isa<SCEVConstant>(SI->first))
continue;
for (std::vector<IVStrideUse>::iterator UI = SI->second.Users.begin(),
E = SI->second.Users.end(); UI != E; /* empty */) {
std::vector<IVStrideUse>::iterator CandidateUI = UI;
++UI;
Devang Patel
committed
Instruction *ShadowUse = CandidateUI->User;
const Type *DestTy = NULL;
/* If shadow use is a int->float cast then insert a second IV
to eliminate this cast.
Devang Patel
committed
for (unsigned i = 0; i < n; ++i)
foo((double)i);
is transformed into
Devang Patel
committed
double d = 0.0;
for (unsigned i = 0; i < n; ++i, ++d)
foo(d);
*/
if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->User))
Devang Patel
committed
DestTy = UCast->getDestTy();
else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->User))
Devang Patel
committed
DestTy = SCast->getDestTy();
Devang Patel
committed
if (!DestTy) continue;
if (TLI) {
/* If target does not support DestTy natively then do not apply
this transformation. */
MVT DVT = TLI->getValueType(DestTy);
if (!TLI->isTypeLegal(DVT)) continue;
}
Devang Patel
committed
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
if (!PH) continue;
if (PH->getNumIncomingValues() != 2) continue;
const Type *SrcTy = PH->getType();
int Mantissa = DestTy->getFPMantissaWidth();
if (Mantissa == -1) continue;
if ((int)TD->getTypeSizeInBits(SrcTy) > Mantissa)
continue;
unsigned Entry, Latch;
if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
Entry = 0;
Latch = 1;
} else {
Entry = 1;
Latch = 0;
}
ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
if (!Init) continue;
ConstantFP *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
if (!Incr) continue;
if (Incr->getOpcode() != Instruction::Add
&& Incr->getOpcode() != Instruction::Sub)
continue;
/* Initialize new IV, double d = 0.0 in above example. */
ConstantInt *C = NULL;
if (Incr->getOperand(0) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(1));
else if (Incr->getOperand(1) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(0));
else
continue;
if (!C) continue;
/* Add new PHINode. */
PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
/* create new increment. '++d' in above example. */
Devang Patel
committed
ConstantFP *CFP = ConstantFP::get(DestTy, C->getZExtValue());
BinaryOperator *NewIncr =
BinaryOperator::Create(Incr->getOpcode(),
NewPH, CFP, "IV.S.next.", Incr);
NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
/* Remove cast operation */
SE->deleteValueFromRecords(ShadowUse);
ShadowUse->replaceAllUsesWith(NewPH);
ShadowUse->eraseFromParent();
SI->second.Users.erase(CandidateUI);
NumShadow++;
break;
}
}
}
// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar
// uses in the loop, look to see if we can eliminate some, in favor of using
// common indvars for the different uses.
void LoopStrengthReduce::OptimizeIndvars(Loop *L) {
// TODO: implement optzns here.
Devang Patel
committed
OptimizeShadowIV(L);
// Finally, get the terminating condition for the loop if possible. If we
// can, we want to change it to use a post-incremented version of its
// induction variable, to allow coalescing the live ranges for the IV into
// one register value.
PHINode *SomePHI = cast<PHINode>(L->getHeader()->begin());
BasicBlock *Preheader = L->getLoopPreheader();
BasicBlock *LatchBlock =
SomePHI->getIncomingBlock(SomePHI->getIncomingBlock(0) == Preheader);
BranchInst *TermBr = dyn_cast<BranchInst>(LatchBlock->getTerminator());
if (!TermBr || TermBr->isUnconditional() ||
!isa<ICmpInst>(TermBr->getCondition()))
return;
// Search IVUsesByStride to find Cond's IVUse if there is one.
IVStrideUse *CondUse = 0;
Chris Lattner
committed
const SCEVHandle *CondStride = 0;
if (!FindIVUserForCond(Cond, CondUse, CondStride))
return; // setcc doesn't use the IV.
// If the trip count is computed in terms of an smax (due to ScalarEvolution
// being unable to find a sufficient guard, for example), change the loop
// comparison to use SLT instead of NE.
Cond = OptimizeSMax(L, Cond, CondUse);
// If possible, change stride and operands of the compare instruction to
// eliminate one stride.
Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
// It's possible for the setcc instruction to be anywhere in the loop, and