"git@repo.hca.bsc.es:rferrer/llvm-epi.git" did not exist on "1cc8e1e1d7d78fc3f2185c5ba207cd21f227fa1c"
Newer
Older
static void SeparateSubExprs(std::vector<SCEVHandle> &SubExprs,
SCEVHandle Expr,
ScalarEvolution *SE) {
if (SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) {
for (unsigned j = 0, e = AE->getNumOperands(); j != e; ++j)
SeparateSubExprs(SubExprs, AE->getOperand(j), SE);
} else if (SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Expr)) {
SCEVHandle Zero = SE->getIntegerSCEV(0, Expr->getType());
if (SARE->getOperand(0) == Zero) {
SubExprs.push_back(Expr);
} else {
// Compute the addrec with zero as its base.
std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Zero; // Start with zero base.
SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop()));
SeparateSubExprs(SubExprs, SARE->getOperand(0), SE);
}
} else if (!Expr->isZero()) {
// Do not add zero.
SubExprs.push_back(Expr);
}
}
// This is logically local to the following function, but C++ says we have
// to make it file scope.
struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
/// RemoveCommonExpressionsFromUseBases - Look through all of the Bases of all
/// the Uses, removing any common subexpressions, except that if all such
/// subexpressions can be folded into an addressing mode for all uses inside
/// the loop (this case is referred to as "free" in comments herein) we do
/// not remove anything. This looks for things like (a+b+c) and
/// (a+c+d) and computes the common (a+c) subexpression. The common expression
/// is *removed* from the Bases and returned.
RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
ScalarEvolution *SE, Loop *L,
const TargetLowering *TLI) {
unsigned NumUses = Uses.size();
// Only one use? This is a very common case, so we handle it specially and
// cheaply.
SCEVHandle Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType());
SCEVHandle FreeResult = Zero;
// If the use is inside the loop, use its base, regardless of what it is:
// it is clearly shared across all the IV's. If the use is outside the loop
// (which means after it) we don't want to factor anything *into* the loop,
// so just use 0 as the base.
if (L->contains(Uses[0].Inst->getParent()))
std::swap(Result, Uses[0].Base);
return Result;
}
// To find common subexpressions, count how many of Uses use each expression.
// If any subexpressions are used Uses.size() times, they are common.
// Also track whether all uses of each expression can be moved into an
// an addressing mode "for free"; such expressions are left within the loop.
// struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
std::map<SCEVHandle, SubExprUseData> SubExpressionUseData;
// UniqueSubExprs - Keep track of all of the subexpressions we see in the
// order we see them.
std::vector<SCEVHandle> UniqueSubExprs;
std::vector<SCEVHandle> SubExprs;
for (unsigned i = 0; i != NumUses; ++i) {
// If the user is outside the loop, just ignore it for base computation.
// Since the user is outside the loop, it must be *after* the loop (if it
// were before, it could not be based on the loop IV). We don't want users
// after the loop to affect base computation of values *inside* the loop,
// because we can always add their offsets to the result IV after the loop
// is done, ensuring we get good code inside the loop.
if (!L->contains(Uses[i].Inst->getParent()))
continue;
NumUsesInsideLoop++;
// If the base is zero (which is common), return zero now, there are no
// CSEs we can find.
if (Uses[i].Base == Zero) return Zero;
// If this use is as an address we may be able to put CSEs in the addressing
// mode rather than hoisting them.
bool isAddrUse = isAddressUse(Uses[i].Inst, Uses[i].OperandValToReplace);
// We may need the UseTy below, but only when isAddrUse, so compute it
// only in that case.
const Type *UseTy = 0;
if (isAddrUse) {
UseTy = Uses[i].Inst->getType();
if (StoreInst *SI = dyn_cast<StoreInst>(Uses[i].Inst))
UseTy = SI->getOperand(0)->getType();
}
// Split the expression into subexprs.
SeparateSubExprs(SubExprs, Uses[i].Base, SE);
// Add one to SubExpressionUseData.Count for each subexpr present, and
// if the subexpr is not a valid immediate within an addressing mode use,
// set SubExpressionUseData.notAllUsesAreFree. We definitely want to
// hoist these out of the loop (if they are common to all uses).
for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
if (++SubExpressionUseData[SubExprs[j]].Count == 1)
UniqueSubExprs.push_back(SubExprs[j]);
if (!isAddrUse || !fitsInAddressMode(SubExprs[j], UseTy, TLI, false))
SubExpressionUseData[SubExprs[j]].notAllUsesAreFree = true;
}
SubExprs.clear();
}
// Now that we know how many times each is used, build Result. Iterate over
// UniqueSubexprs so that we have a stable ordering.
for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) {
std::map<SCEVHandle, SubExprUseData>::iterator I =
SubExpressionUseData.find(UniqueSubExprs[i]);
assert(I != SubExpressionUseData.end() && "Entry not found?");
if (I->second.Count == NumUsesInsideLoop) { // Found CSE!
if (I->second.notAllUsesAreFree)
Result = SE->getAddExpr(Result, I->first);
else
FreeResult = SE->getAddExpr(FreeResult, I->first);
} else
// Remove non-cse's from SubExpressionUseData.
SubExpressionUseData.erase(I);
}
if (FreeResult != Zero) {
// We have some subexpressions that can be subsumed into addressing
// modes in every use inside the loop. However, it's possible that
// there are so many of them that the combined FreeResult cannot
// be subsumed, or that the target cannot handle both a FreeResult
// and a Result in the same instruction (for example because it would
// require too many registers). Check this.
for (unsigned i=0; i<NumUses; ++i) {
if (!L->contains(Uses[i].Inst->getParent()))
continue;
// We know this is an addressing mode use; if there are any uses that
// are not, FreeResult would be Zero.
const Type *UseTy = Uses[i].Inst->getType();
if (StoreInst *SI = dyn_cast<StoreInst>(Uses[i].Inst))
UseTy = SI->getOperand(0)->getType();
if (!fitsInAddressMode(FreeResult, UseTy, TLI, Result!=Zero)) {
// FIXME: could split up FreeResult into pieces here, some hoisted
// and some not. There is no obvious advantage to this.
Result = SE->getAddExpr(Result, FreeResult);
FreeResult = Zero;
break;
}
}
}
// If we found no CSE's, return now.
if (Result == Zero) return Result;
// If we still have a FreeResult, remove its subexpressions from
// SubExpressionUseData. This means they will remain in the use Bases.
if (FreeResult != Zero) {
SeparateSubExprs(SubExprs, FreeResult, SE);
for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
std::map<SCEVHandle, SubExprUseData>::iterator I =
SubExpressionUseData.find(SubExprs[j]);
SubExpressionUseData.erase(I);
}
SubExprs.clear();
}
// Otherwise, remove all of the CSE's we found from each of the base values.
for (unsigned i = 0; i != NumUses; ++i) {
// Uses outside the loop don't necessarily include the common base, but
// the final IV value coming into those uses does. Instead of trying to
// remove the pieces of the common base, which might not be there,
// subtract off the base to compensate for this.
if (!L->contains(Uses[i].Inst->getParent())) {
Uses[i].Base = SE->getMinusSCEV(Uses[i].Base, Result);
// Split the expression into subexprs.
SeparateSubExprs(SubExprs, Uses[i].Base, SE);
// Remove any common subexpressions.
for (unsigned j = 0, e = SubExprs.size(); j != e; ++j)
if (SubExpressionUseData.count(SubExprs[j])) {
SubExprs.erase(SubExprs.begin()+j);
--j; --e;
}
// Finally, add the non-shared expressions together.
if (SubExprs.empty())
else
Uses[i].Base = SE->getAddExpr(SubExprs);
SubExprs.clear();
}
return Result;
}
/// ValidStride - Check whether the given Scale is valid for all loads and
/// stores in UsersToProcess.
bool LoopStrengthReduce::ValidStride(bool HasBaseReg,
int64_t Scale,
const std::vector<BasedUser>& UsersToProcess) {
if (!TLI)
return true;
for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) {
// If this is a load or other access, pass the type of the access in.
const Type *AccessTy = Type::VoidTy;
if (StoreInst *SI = dyn_cast<StoreInst>(UsersToProcess[i].Inst))
AccessTy = SI->getOperand(0)->getType();
else if (LoadInst *LI = dyn_cast<LoadInst>(UsersToProcess[i].Inst))
AccessTy = LI->getType();
else if (isa<PHINode>(UsersToProcess[i].Inst))
continue;
TargetLowering::AddrMode AM;
if (SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm))
AM.BaseOffs = SC->getValue()->getSExtValue();
AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero();
AM.Scale = Scale;
// If load[imm+r*scale] is illegal, bail out.
if (!TLI->isLegalAddressingMode(AM, AccessTy))
return false;
}
return true;
}
/// RequiresTypeConversion - Returns true if converting Ty1 to Ty2 is not
/// a nop.
bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1,
const Type *Ty2) {
if (Ty1 == Ty2)
return false;
if (Ty1->canLosslesslyBitCastTo(Ty2))
return false;
if (TLI && TLI->isTruncateFree(Ty1, Ty2))
return false;
if (isa<PointerType>(Ty2) && Ty1->canLosslesslyBitCastTo(UIntPtrTy))
return false;
if (isa<PointerType>(Ty1) && Ty2->canLosslesslyBitCastTo(UIntPtrTy))
return false;
return true;
}
/// CheckForIVReuse - Returns the multiple if the stride is the multiple
/// of a previous stride and it is a legal value for the target addressing
/// mode scale component and optional base reg. This allows the users of
/// this stride to be rewritten as prev iv * factor. It returns 0 if no
/// reuse is possible. Factors can be negative on same targets, e.g. ARM.
///
/// If all uses are outside the loop, we don't require that all multiplies
/// be folded into the addressing mode, nor even that the factor be constant;
/// a multiply (executed once) outside the loop is better than another IV
/// within. Well, usually.
SCEVHandle LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
bool AllUsesAreAddresses,
bool AllUsesAreOutsideLoop,
IVExpr &IV, const Type *Ty,
const std::vector<BasedUser>& UsersToProcess) {
if (SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
int64_t SInt = SC->getValue()->getSExtValue();
for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e;
++NewStride) {
std::map<SCEVHandle, IVsOfOneStride>::iterator SI =
IVsByStride.find(StrideOrder[NewStride]);
if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SI->first != Stride &&
(unsigned(abs(SInt)) < SSInt || (SInt % SSInt) != 0))
continue;
int64_t Scale = SInt / SSInt;
// Check that this stride is valid for all the types used for loads and
// stores; if it can be used for some and not others, we might as well use
// the original stride everywhere, since we have to create the IV for it
// anyway. If the scale is 1, then we don't need to worry about folding
// multiplications.
if (Scale == 1 ||
(AllUsesAreAddresses &&
ValidStride(HasBaseReg, Scale, UsersToProcess)))
for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
IE = SI->second.IVs.end(); II != IE; ++II)
// FIXME: Only handle base == 0 for now.
// Only reuse previous IV if it would not require a type conversion.
if (II->Base->isZero() &&
!RequiresTypeConversion(II->Base->getType(), Ty)) {
return SE->getIntegerSCEV(Scale, Stride->getType());
}
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
} else if (AllUsesAreOutsideLoop) {
// Accept nonconstant strides here; it is really really right to substitute
// an existing IV if we can.
for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e;
++NewStride) {
std::map<SCEVHandle, IVsOfOneStride>::iterator SI =
IVsByStride.find(StrideOrder[NewStride]);
if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
continue;
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SI->first != Stride && SSInt != 1)
continue;
for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
IE = SI->second.IVs.end(); II != IE; ++II)
// Accept nonzero base here.
// Only reuse previous IV if it would not require a type conversion.
if (!RequiresTypeConversion(II->Base->getType(), Ty)) {
IV = *II;
return Stride;
}
}
// Special case, old IV is -1*x and this one is x. Can treat this one as
// -1*old.
for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e;
++NewStride) {
std::map<SCEVHandle, IVsOfOneStride>::iterator SI =
IVsByStride.find(StrideOrder[NewStride]);
if (SI == IVsByStride.end())
continue;
if (SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(SI->first))
if (SCEVConstant *SC = dyn_cast<SCEVConstant>(ME->getOperand(0)))
if (Stride == ME->getOperand(1) &&
SC->getValue()->getSExtValue() == -1LL)
for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
IE = SI->second.IVs.end(); II != IE; ++II)
// Accept nonzero base here.
// Only reuse previous IV if it would not require type conversion.
if (!RequiresTypeConversion(II->Base->getType(), Ty)) {
IV = *II;
return SE->getIntegerSCEV(-1LL, Stride->getType());
}
}
}
return SE->getIntegerSCEV(0, Stride->getType());
}
/// PartitionByIsUseOfPostIncrementedValue - Simple boolean predicate that
/// returns true if Val's isUseOfPostIncrementedValue is true.
static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) {
return Val.isUseOfPostIncrementedValue;
}
/// isNonConstantNegative - Return true if the specified scev is negated, but
Chris Lattner
committed
/// not a constant.
static bool isNonConstantNegative(const SCEVHandle &Expr) {
SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr);
if (!Mul) return false;
// If there is a constant factor, it will be first.
SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
if (!SC) return false;
// Return true if the value is negative, this matches things like (-42 * V).
return SC->getValue()->getValue().isNegative();
}
// CollectIVUsers - Transform our list of users and offsets to a bit more
// complex table. In this new vector, each 'BasedUser' contains 'Base', the base
// of the strided accesses, as well as the old information from Uses. We
// progressively move information from the Base field to the Imm field, until
// we eventually have the full access expression to rewrite the use.
SCEVHandle LoopStrengthReduce::CollectIVUsers(const SCEVHandle &Stride,
IVUsersOfOneStride &Uses,
Loop *L,
bool &AllUsesAreAddresses,
bool &AllUsesAreOutsideLoop,
std::vector<BasedUser> &UsersToProcess) {
UsersToProcess.reserve(Uses.Users.size());
for (unsigned i = 0, e = Uses.Users.size(); i != e; ++i) {
UsersToProcess.push_back(BasedUser(Uses.Users[i], SE));
// Move any loop variant operands from the offset field to the immediate
// field of the use, so that we don't try to use something before it is
// computed.
MoveLoopVariantsToImmediateField(UsersToProcess.back().Base,
UsersToProcess.back().Imm, L, SE);
assert(UsersToProcess.back().Base->isLoopInvariant(L) &&
Chris Lattner
committed
"Base value is not loop invariant!");
// We now have a whole bunch of uses of like-strided induction variables, but
// they might all have different bases. We want to emit one PHI node for this
// stride which we fold as many common expressions (between the IVs) into as
// possible. Start by identifying the common expressions in the base values
// for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find
// "A+B"), emit it to the preheader, then remove the expression from the
// UsersToProcess base values.
SCEVHandle CommonExprs =
RemoveCommonExpressionsFromUseBases(UsersToProcess, SE, L, TLI);
// Next, figure out what we can represent in the immediate fields of
// instructions. If we can represent anything there, move it to the imm
// fields of the BasedUsers. We do this so that it increases the commonality
// of the remaining uses.
unsigned NumPHI = 0;
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
// If the user is not in the current loop, this means it is using the exit
// value of the IV. Do not put anything in the base, make sure it's all in
// the immediate field to allow as much factoring as possible.
if (!L->contains(UsersToProcess[i].Inst->getParent())) {
UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm,
UsersToProcess[i].Base);
UsersToProcess[i].Base =
SE->getIntegerSCEV(0, UsersToProcess[i].Base->getType());
} else {
// Addressing modes can be folded into loads and stores. Be careful that
// the store is through the expression, not of the expression though.
bool isPHI = false;
bool isAddress = isAddressUse(UsersToProcess[i].Inst,
UsersToProcess[i].OperandValToReplace);
if (isa<PHINode>(UsersToProcess[i].Inst)) {
isPHI = true;
++NumPHI;
// Not all uses are outside the loop.
AllUsesAreOutsideLoop = false;
// If this use isn't an address, then not all uses are addresses.
MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base,
UsersToProcess[i].Imm, isAddress, L, SE);
}
}
// If one of the use if a PHI node and all other uses are addresses, still
// allow iv reuse. Essentially we are trading one constant multiplication
// for one fewer iv.
if (NumPHI > 1)
AllUsesAreAddresses = false;
return CommonExprs;
}
/// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single
/// stride of IV. All of the users may have different starting values, and this
/// may not be the only stride (we know it is if isOnlyStride is true).
void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
IVUsersOfOneStride &Uses,
Loop *L,
bool isOnlyStride) {
// If all the users are moved to another stride, then there is nothing to do.
if (Uses.Users.empty())
return;
// Keep track if every use in UsersToProcess is an address. If they all are,
// we may be able to rewrite the entire collection of them in terms of a
// smaller-stride IV.
bool AllUsesAreAddresses = true;
// Keep track if every use of a single stride is outside the loop. If so,
// we want to be more aggressive about reusing a smaller-stride IV; a
// multiply outside the loop is better than another IV inside. Well, usually.
bool AllUsesAreOutsideLoop = true;
// Transform our list of users and offsets to a bit more complex table. In
// this new vector, each 'BasedUser' contains 'Base' the base of the
// strided accessas well as the old information from Uses. We progressively
// move information from the Base field to the Imm field, until we eventually
// have the full access expression to rewrite the use.
std::vector<BasedUser> UsersToProcess;
SCEVHandle CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses,
AllUsesAreOutsideLoop,
UsersToProcess);
// If we managed to find some expressions in common, we'll need to carry
// their value in a register and add it in for each use. This will take up
// a register operand, which potentially restricts what stride values are
// valid.
bool HaveCommonExprs = !CommonExprs->isZero();
// If all uses are addresses, check if it is possible to reuse an IV with a
// stride that is a factor of this stride. And that the multiple is a number
// that can be encoded in the scale field of the target addressing mode. And
// that we will have a valid instruction after this substition, including the
// immediate field, if any.
PHINode *NewPHI = NULL;
Value *IncV = NULL;
IVExpr ReuseIV(SE->getIntegerSCEV(0, Type::Int32Ty),
SE->getIntegerSCEV(0, Type::Int32Ty),
0, 0);
SCEVHandle RewriteFactor =
CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
AllUsesAreOutsideLoop,
Stride, ReuseIV, CommonExprs->getType(),
UsersToProcess);
if (!isa<SCEVConstant>(RewriteFactor) ||
!cast<SCEVConstant>(RewriteFactor)->isZero()) {
DOUT << "BASED ON IV of STRIDE " << *ReuseIV.Stride
<< " and BASE " << *ReuseIV.Base << " :\n";
NewPHI = ReuseIV.PHI;
IncV = ReuseIV.IncV;
}
const Type *ReplacedTy = CommonExprs->getType();
// Now that we know what we need to do, insert the PHI node itself.
//
DOUT << "INSERTING IV of TYPE " << *ReplacedTy << " of STRIDE "
<< *Stride << " and BASE " << *CommonExprs << ": ";
SCEVExpander Rewriter(*SE, *LI);
SCEVExpander PreheaderRewriter(*SE, *LI);
BasicBlock *Preheader = L->getLoopPreheader();
Instruction *PreInsertPt = Preheader->getTerminator();
Instruction *PhiInsertBefore = L->getHeader()->begin();
BasicBlock *LatchBlock = L->getLoopLatch();
// Emit the initial base value into the loop preheader.
Value *CommonBaseV
= PreheaderRewriter.expandCodeFor(CommonExprs, PreInsertPt);
if (isa<SCEVConstant>(RewriteFactor) &&
cast<SCEVConstant>(RewriteFactor)->isZero()) {
// Create a new Phi for this base, and stick it in the loop header.
NewPHI = PHINode::Create(ReplacedTy, "iv.", PhiInsertBefore);
++NumInserted;
// Add common base to the new Phi node.
NewPHI->addIncoming(CommonBaseV, Preheader);
Chris Lattner
committed
// If the stride is negative, insert a sub instead of an add for the
// increment.
bool isNegative = isNonConstantNegative(Stride);
SCEVHandle IncAmount = Stride;
if (isNegative)
IncAmount = SE->getNegativeSCEV(Stride);
Chris Lattner
committed
// Insert the stride into the preheader.
Value *StrideV = PreheaderRewriter.expandCodeFor(IncAmount, PreInsertPt);
if (!isa<ConstantInt>(StrideV)) ++NumVariable;
Chris Lattner
committed
// Emit the increment of the base value before the terminator of the loop
// latch block, and add it to the Phi node.
SCEVHandle IncExp = SE->getUnknown(StrideV);
Chris Lattner
committed
if (isNegative)
IncExp = SE->getNegativeSCEV(IncExp);
IncExp = SE->getAddExpr(SE->getUnknown(NewPHI), IncExp);
IncV = Rewriter.expandCodeFor(IncExp, LatchBlock->getTerminator());
IncV->setName(NewPHI->getName()+".inc");
NewPHI->addIncoming(IncV, LatchBlock);
// Remember this in case a later stride is multiple of this.
IVsByStride[Stride].addIV(Stride, CommonExprs, NewPHI, IncV);
DOUT << " IV=%" << NewPHI->getNameStr() << " INC=%" << IncV->getNameStr();
} else {
Constant *C = dyn_cast<Constant>(CommonBaseV);
if (!C ||
(!C->isNullValue() &&
!fitsInAddressMode(SE->getUnknown(CommonBaseV), ReplacedTy,
TLI, false)))
// We want the common base emitted into the preheader! This is just
// using cast as a copy so BitCast (no-op cast) is appropriate
CommonBaseV = new BitCastInst(CommonBaseV, CommonBaseV->getType(),
"commonbase", PreInsertPt);
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
// We want to emit code for users inside the loop first. To do this, we
// rearrange BasedUser so that the entries at the end have
// isUseOfPostIncrementedValue = false, because we pop off the end of the
// vector (so we handle them first).
std::partition(UsersToProcess.begin(), UsersToProcess.end(),
PartitionByIsUseOfPostIncrementedValue);
// Sort this by base, so that things with the same base are handled
// together. By partitioning first and stable-sorting later, we are
// guaranteed that within each base we will pop off users from within the
// loop before users outside of the loop with a particular base.
//
// We would like to use stable_sort here, but we can't. The problem is that
// SCEVHandle's don't have a deterministic ordering w.r.t to each other, so
// we don't have anything to do a '<' comparison on. Because we think the
// number of uses is small, do a horrible bubble sort which just relies on
// ==.
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
// Get a base value.
SCEVHandle Base = UsersToProcess[i].Base;
// Compact everything with this base to be consecutive with this one.
for (unsigned j = i+1; j != e; ++j) {
if (UsersToProcess[j].Base == Base) {
std::swap(UsersToProcess[i+1], UsersToProcess[j]);
++i;
}
}
}
// Process all the users now. This outer loop handles all bases, the inner
// loop handles all users of a particular base.
while (!UsersToProcess.empty()) {
Chris Lattner
committed
SCEVHandle Base = UsersToProcess.back().Base;
// Emit the code for Base into the preheader.
Value *BaseV = PreheaderRewriter.expandCodeFor(Base, PreInsertPt);
DOUT << " INSERTING code for BASE = " << *Base << ":";
if (BaseV->hasName())
DOUT << " Result value name = %" << BaseV->getNameStr();
DOUT << "\n";
// If BaseV is a constant other than 0, make sure that it gets inserted into
// the preheader, instead of being forward substituted into the uses. We do
// this by forcing a BitCast (noop cast) to be inserted into the preheader
// in this case.
if (!C->isNullValue() && !fitsInAddressMode(Base, ReplacedTy,
TLI, false)) {
// We want this constant emitted into the preheader! This is just
// using cast as a copy so BitCast (no-op cast) is appropriate
BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",
// Emit the code to add the immediate offset to the Phi value, just before
// the instructions that we identified as using this stride and base.
Chris Lattner
committed
do {
Chris Lattner
committed
BasedUser &User = UsersToProcess.back();
// If this instruction wants to use the post-incremented value, move it
// after the post-inc and use its value instead of the PHI.
if (User.isUseOfPostIncrementedValue) {
// If this user is in the loop, make sure it is the last thing in the
// loop to ensure it is dominated by the increment.
if (L->contains(User.Inst->getParent()))
User.Inst->moveBefore(LatchBlock->getTerminator());
}
if (RewriteOp->getType() != ReplacedTy) {
Instruction::CastOps opcode = Instruction::Trunc;
if (ReplacedTy->getPrimitiveSizeInBits() ==
RewriteOp->getType()->getPrimitiveSizeInBits())
opcode = Instruction::BitCast;
RewriteOp = SCEVExpander::InsertCastOfTo(opcode, RewriteOp, ReplacedTy);
}
SCEVHandle RewriteExpr = SE->getUnknown(RewriteOp);
// If we had to insert new instructions for RewriteOp, we have to
// consider that they may not have been able to end up immediately
// next to RewriteOp, because non-PHI instructions may never precede
// PHI instructions in a block. In this case, remember where the last
// instruction was inserted so that if we're replacing a different
// PHI node, we can use the later point to expand the final
// RewriteExpr.
Instruction *NewBasePt = dyn_cast<Instruction>(RewriteOp);
if (RewriteOp == NewPHI) NewBasePt = 0;
// Clear the SCEVExpander's expression map so that we are guaranteed
// to have the code emitted where we expect it.
Rewriter.clear();
// If we are reusing the iv, then it must be multiplied by a constant
// factor to take advantage of the addressing mode scale component.
if (!isa<SCEVConstant>(RewriteFactor) ||
!cast<SCEVConstant>(RewriteFactor)->isZero()) {
// If we're reusing an IV with a nonzero base (currently this happens
// only when all reuses are outside the loop) subtract that base here.
// The base has been used to initialize the PHI node but we don't want
// it here.
if (!ReuseIV.Base->isZero()) {
SCEVHandle typedBase = ReuseIV.Base;
if (RewriteExpr->getType()->getPrimitiveSizeInBits() !=
ReuseIV.Base->getType()->getPrimitiveSizeInBits()) {
// It's possible the original IV is a larger type than the new IV,
// in which case we have to truncate the Base. We checked in
// RequiresTypeConversion that this is valid.
assert (RewriteExpr->getType()->getPrimitiveSizeInBits() <
ReuseIV.Base->getType()->getPrimitiveSizeInBits() &&
"Unexpected lengthening conversion!");
typedBase = SE->getTruncateExpr(ReuseIV.Base,
RewriteExpr->getType());
}
RewriteExpr = SE->getMinusSCEV(RewriteExpr, typedBase);
}
// Multiply old variable, with base removed, by new scale factor.
RewriteExpr = SE->getMulExpr(RewriteFactor,
RewriteExpr);
// The common base is emitted in the loop preheader. But since we
// are reusing an IV, it has not been used to initialize the PHI node.
// Add it to the expression used to rewrite the uses.
// When this use is outside the loop, we earlier subtracted the
// common base, and are adding it back here. Use the same expression
// as before, rather than CommonBaseV, so DAGCombiner will zap it.
if (!isa<ConstantInt>(CommonBaseV) ||
!cast<ConstantInt>(CommonBaseV)->isZero()) {
if (L->contains(User.Inst->getParent()))
RewriteExpr = SE->getAddExpr(RewriteExpr,
SE->getUnknown(CommonBaseV));
else
RewriteExpr = SE->getAddExpr(RewriteExpr, CommonExprs);
}
}
// Now that we know what we need to do, insert code before User for the
// immediate and any loop-variant expressions.
if (!isa<ConstantInt>(BaseV) || !cast<ConstantInt>(BaseV)->isZero())
// Add BaseV to the PHI value if needed.
RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV));
User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt,
Rewriter, L, this,
Evan Cheng
committed
DeadInsts);
// Mark old value we replaced as possibly dead, so that it is eliminated
// if we just replaced the last use of that value.
DeadInsts.push_back(cast<Instruction>(User.OperandValToReplace));
Chris Lattner
committed
UsersToProcess.pop_back();
Chris Lattner
committed
// If there are any more users to process with the same base, process them
// now. We sorted by base above, so we just have to check the last elt.
Chris Lattner
committed
} while (!UsersToProcess.empty() && UsersToProcess.back().Base == Base);
// TODO: Next, find out which base index is the most common, pull it out.
}
// IMPORTANT TODO: Figure out how to partition the IV's with this stride, but
// different starting values, into different PHIs.
}
/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
/// set the IV user and stride information and return true, otherwise return
/// false.
bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
const SCEVHandle *&CondStride) {
for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e && !CondUse;
++Stride) {
std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI =
IVUsesByStride.find(StrideOrder[Stride]);
assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
for (std::vector<IVStrideUse>::iterator UI = SI->second.Users.begin(),
E = SI->second.Users.end(); UI != E; ++UI)
if (UI->User == Cond) {
// NOTE: we could handle setcc instructions with multiple uses here, but
// InstCombine does it as well for simple uses, it's not clear that it
// occurs enough in real life to handle.
CondUse = &*UI;
CondStride = &SI->first;
return true;
}
}
return false;
}
namespace {
// Constant strides come first which in turns are sorted by their absolute
// values. If absolute values are the same, then positive strides comes first.
// e.g.
// 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X
struct StrideCompare {
bool operator()(const SCEVHandle &LHS, const SCEVHandle &RHS) {
SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);
SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
if (LHSC && RHSC) {
int64_t LV = LHSC->getValue()->getSExtValue();
int64_t RV = RHSC->getValue()->getSExtValue();
uint64_t ALV = (LV < 0) ? -LV : LV;
uint64_t ARV = (RV < 0) ? -RV : RV;
if (ALV == ARV) {
if (LV != RV)
return LV > RV;
} else {
return ALV < ARV;
}
// If it's the same value but different type, sort by bit width so
// that we emit larger induction variables before smaller
// ones, letting the smaller be re-written in terms of larger ones.
return RHS->getBitWidth() < LHS->getBitWidth();
}
return LHSC && !RHSC;
}
};
}
/// ChangeCompareStride - If a loop termination compare instruction is the
/// only use of its stride, and the compaison is against a constant value,
/// try eliminate the stride by moving the compare instruction to another
/// stride and change its constant operand accordingly. e.g.
///
/// loop:
/// ...
/// v1 = v1 + 3
/// v2 = v2 + 1
/// if (v2 < 10) goto loop
/// =>
/// loop:
/// ...
/// v1 = v1 + 3
/// if (v1 < 30) goto loop
ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
Evan Cheng
committed
IVStrideUse* &CondUse,
const SCEVHandle* &CondStride) {
if (StrideOrder.size() < 2 ||
IVUsesByStride[*CondStride].Users.size() != 1)
return Cond;
const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride);
if (!SC) return Cond;
ConstantInt *C = dyn_cast<ConstantInt>(Cond->getOperand(1));
if (!C) return Cond;
ICmpInst::Predicate Predicate = Cond->getPredicate();
int64_t CmpSSInt = SC->getValue()->getSExtValue();
int64_t CmpVal = C->getValue().getSExtValue();
unsigned BitWidth = C->getValue().getBitWidth();
uint64_t SignBit = 1ULL << (BitWidth-1);
const Type *CmpTy = C->getType();
const Type *NewCmpTy = NULL;
unsigned TyBits = CmpTy->getPrimitiveSizeInBits();
unsigned NewTyBits = 0;
int64_t NewCmpVal = CmpVal;
SCEVHandle *NewStride = NULL;
Value *NewIncV = NULL;
int64_t Scale = 1;
// Check stride constant and the comparision constant signs to detect
// overflow.
return Cond;
// Look for a suitable stride / iv as replacement.
std::stable_sort(StrideOrder.begin(), StrideOrder.end(), StrideCompare());
for (unsigned i = 0, e = StrideOrder.size(); i != e; ++i) {
std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI =
IVUsesByStride.find(StrideOrder[i]);
if (!isa<SCEVConstant>(SI->first))
continue;
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (abs(SSInt) <= abs(CmpSSInt) || (SSInt % CmpSSInt) != 0)
continue;
Scale = SSInt / CmpSSInt;
NewCmpVal = CmpVal * Scale;
APInt Mul = APInt(BitWidth, NewCmpVal);
// Check for overflow.
if (Mul.getSExtValue() != NewCmpVal) {
NewCmpVal = CmpVal;
continue;
}
// Watch out for overflow.
if (ICmpInst::isSignedPredicate(Predicate) &&
(CmpVal & SignBit) != (NewCmpVal & SignBit))
NewCmpVal = CmpVal;
if (NewCmpVal != CmpVal) {
// Pick the best iv to use trying to avoid a cast.
NewIncV = NULL;
for (std::vector<IVStrideUse>::iterator UI = SI->second.Users.begin(),
E = SI->second.Users.end(); UI != E; ++UI) {
NewIncV = UI->OperandValToReplace;
if (NewIncV->getType() == CmpTy)
break;
}
if (!NewIncV) {
NewCmpVal = CmpVal;
continue;
}
NewCmpTy = NewIncV->getType();
NewTyBits = isa<PointerType>(NewCmpTy)
? UIntPtrTy->getPrimitiveSizeInBits()
: NewCmpTy->getPrimitiveSizeInBits();
if (RequiresTypeConversion(NewCmpTy, CmpTy)) {
// Check if it is possible to rewrite it using
// an iv / stride of a smaller integer type.
bool TruncOk = false;
if (NewCmpTy->isInteger()) {
unsigned Bits = NewTyBits;
if (ICmpInst::isSignedPredicate(Predicate))
--Bits;
uint64_t Mask = (1ULL << Bits) - 1;
if (((uint64_t)NewCmpVal & Mask) == (uint64_t)NewCmpVal)
TruncOk = true;
}
if (!TruncOk) {
NewCmpVal = CmpVal;
continue;
}
}
// Don't rewrite if use offset is non-constant and the new type is
// of a different type.
// FIXME: too conservative?
if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->Offset)) {
NewCmpVal = CmpVal;
continue;
}
bool AllUsesAreAddresses = true;
bool AllUsesAreOutsideLoop = true;
std::vector<BasedUser> UsersToProcess;
SCEVHandle CommonExprs = CollectIVUsers(SI->first, SI->second, L,
AllUsesAreAddresses,
AllUsesAreOutsideLoop,
UsersToProcess);
// Avoid rewriting the compare instruction with an iv of new stride
// if it's likely the new stride uses will be rewritten using the
// stride of the compare instruction.
if (AllUsesAreAddresses &&
ValidStride(!CommonExprs->isZero(), Scale, UsersToProcess)) {
NewCmpVal = CmpVal;
continue;
}
Evan Cheng
committed
// If scale is negative, use swapped predicate unless it's testing
// for equality.
if (Scale < 0 && !Cond->isEquality())
Evan Cheng
committed
Predicate = ICmpInst::getSwappedPredicate(Predicate);
NewStride = &StrideOrder[i];
break;
}
}
// Forgo this transformation if it the increment happens to be
// unfortunately positioned after the condition, and the condition
// has multiple uses which prevent it from being moved immediately
// before the branch. See
// test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-*.ll
// for an example of this situation.
if (!Cond->hasOneUse()) {
for (BasicBlock::iterator I = Cond, E = Cond->getParent()->end();
I != E; ++I)
if (I == NewIncV)
return Cond;
if (NewCmpVal != CmpVal) {
// Create a new compare instruction using new stride / iv.
ICmpInst *OldCond = Cond;
Value *RHS;
if (!isa<PointerType>(NewCmpTy))
RHS = ConstantInt::get(NewCmpTy, NewCmpVal);
else {
RHS = ConstantInt::get(UIntPtrTy, NewCmpVal);
RHS = SCEVExpander::InsertCastOfTo(Instruction::IntToPtr, RHS, NewCmpTy);
}
Cond = new ICmpInst(Predicate, NewIncV, RHS,
L->getHeader()->getName() + ".termcond",
OldCond);
// Remove the old compare instruction. The old indvar is probably dead too.
DeadInsts.push_back(cast<Instruction>(CondUse->OperandValToReplace));
OldCond->replaceAllUsesWith(Cond);
OldCond->eraseFromParent();
IVUsesByStride[*CondStride].Users.pop_back();
SCEVHandle NewOffset = TyBits == NewTyBits
? SE->getMulExpr(CondUse->Offset,
SE->getConstant(ConstantInt::get(CmpTy, Scale)))
: SE->getConstant(ConstantInt::get(NewCmpTy,
cast<SCEVConstant>(CondUse->Offset)->getValue()->getSExtValue()*Scale));
IVUsesByStride[*NewStride].addUser(NewOffset, Cond, NewIncV);
CondUse = &IVUsesByStride[*NewStride].Users.back();
CondStride = NewStride;
++NumEliminated;
}
return Cond;
}
/// OptimizeSMax - Rewrite the loop's terminating condition if it uses
/// an smax computation.
///
/// This is a narrow solution to a specific, but acute, problem. For loops
/// like this:
///
/// i = 0;
/// do {
/// p[i] = 0.0;
/// } while (++i < n);
///
/// where the comparison is signed, the trip count isn't just 'n', because
/// 'n' could be negative. And unfortunately this can come up even for loops