Newer
Older
Evan Cheng
committed
StrideNoReuse.count(SI->first))
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SI->first != Stride &&
(unsigned(abs64(SInt)) < SSInt || (SInt % SSInt) != 0))
continue;
int64_t Scale = SInt / SSInt;
// Check that this stride is valid for all the types used for loads and
// stores; if it can be used for some and not others, we might as well use
// the original stride everywhere, since we have to create the IV for it
// anyway. If the scale is 1, then we don't need to worry about folding
// multiplications.
if (Scale == 1 ||
(AllUsesAreAddresses &&
ValidScale(HasBaseReg, Scale, UsersToProcess))) {
// Prefer to reuse an IV with a base of zero.
for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
IE = SI->second.IVs.end(); II != IE; ++II)
// Only reuse previous IV if it would not require a type conversion
// and if the base difference can be folded.
if (II->Base->isZero() &&
!RequiresTypeConversion(II->Base->getType(), Ty)) {
return SE->getIntegerSCEV(Scale, Stride->getType());
// Otherwise, settle for an IV with a foldable base.
if (AllUsesAreAddresses)
for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
IE = SI->second.IVs.end(); II != IE; ++II)
// Only reuse previous IV if it would not require a type conversion
// and if the base difference can be folded.
if (SE->getEffectiveSCEVType(II->Base->getType()) ==
SE->getEffectiveSCEVType(Ty) &&
isa<SCEVConstant>(II->Base)) {
int64_t Base =
cast<SCEVConstant>(II->Base)->getValue()->getSExtValue();
if (Base > INT32_MIN && Base <= INT32_MAX &&
ValidOffset(HasBaseReg, -Base * Scale,
Scale, UsersToProcess)) {
IV = *II;
return SE->getIntegerSCEV(Scale, Stride->getType());
}
}
}
}
} else if (AllUsesAreOutsideLoop) {
// Accept nonconstant strides here; it is really really right to substitute
// an existing IV if we can.
for (unsigned NewStride = 0, e = IU->StrideOrder.size();
NewStride != e; ++NewStride) {
std::map<const SCEV*, IVsOfOneStride>::iterator SI =
IVsByStride.find(IU->StrideOrder[NewStride]);
if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
continue;
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SI->first != Stride && SSInt != 1)
continue;
for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
IE = SI->second.IVs.end(); II != IE; ++II)
// Accept nonzero base here.
// Only reuse previous IV if it would not require a type conversion.
if (!RequiresTypeConversion(II->Base->getType(), Ty)) {
IV = *II;
return Stride;
}
}
// Special case, old IV is -1*x and this one is x. Can treat this one as
// -1*old.
for (unsigned NewStride = 0, e = IU->StrideOrder.size();
NewStride != e; ++NewStride) {
std::map<const SCEV*, IVsOfOneStride>::iterator SI =
IVsByStride.find(IU->StrideOrder[NewStride]);
if (SI == IVsByStride.end())
continue;
if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(SI->first))
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(ME->getOperand(0)))
if (Stride == ME->getOperand(1) &&
SC->getValue()->getSExtValue() == -1LL)
for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
IE = SI->second.IVs.end(); II != IE; ++II)
// Accept nonzero base here.
// Only reuse previous IV if it would not require type conversion.
if (!RequiresTypeConversion(II->Base->getType(), Ty)) {
IV = *II;
return SE->getIntegerSCEV(-1LL, Stride->getType());
}
}
}
return SE->getIntegerSCEV(0, Stride->getType());
}
/// PartitionByIsUseOfPostIncrementedValue - Simple boolean predicate that
/// returns true if Val's isUseOfPostIncrementedValue is true.
static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) {
return Val.isUseOfPostIncrementedValue;
}
/// isNonConstantNegative - Return true if the specified scev is negated, but
Chris Lattner
committed
/// not a constant.
static bool isNonConstantNegative(const SCEV* const &Expr) {
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr);
Chris Lattner
committed
if (!Mul) return false;
// If there is a constant factor, it will be first.
const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
Chris Lattner
committed
if (!SC) return false;
// Return true if the value is negative, this matches things like (-42 * V).
return SC->getValue()->getValue().isNegative();
}
/// CollectIVUsers - Transform our list of users and offsets to a bit more
/// complex table. In this new vector, each 'BasedUser' contains 'Base', the base
/// of the strided accesses, as well as the old information from Uses. We
/// progressively move information from the Base field to the Imm field, until
/// we eventually have the full access expression to rewrite the use.
const SCEV* LoopStrengthReduce::CollectIVUsers(const SCEV* const &Stride,
IVUsersOfOneStride &Uses,
Loop *L,
bool &AllUsesAreAddresses,
bool &AllUsesAreOutsideLoop,
std::vector<BasedUser> &UsersToProcess) {
// FIXME: Generalize to non-affine IV's.
if (!Stride->isLoopInvariant(L))
return SE->getIntegerSCEV(0, Stride->getType());
UsersToProcess.reserve(Uses.Users.size());
for (ilist<IVStrideUse>::iterator I = Uses.Users.begin(),
E = Uses.Users.end(); I != E; ++I) {
UsersToProcess.push_back(BasedUser(*I, SE));
// Move any loop variant operands from the offset field to the immediate
// field of the use, so that we don't try to use something before it is
// computed.
MoveLoopVariantsToImmediateField(UsersToProcess.back().Base,
Evan Cheng
committed
UsersToProcess.back().Imm, L, SE);
assert(UsersToProcess.back().Base->isLoopInvariant(L) &&
Chris Lattner
committed
"Base value is not loop invariant!");
// We now have a whole bunch of uses of like-strided induction variables, but
// they might all have different bases. We want to emit one PHI node for this
// stride which we fold as many common expressions (between the IVs) into as
// possible. Start by identifying the common expressions in the base values
// for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find
// "A+B"), emit it to the preheader, then remove the expression from the
// UsersToProcess base values.
RemoveCommonExpressionsFromUseBases(UsersToProcess, SE, L, TLI);
// Next, figure out what we can represent in the immediate fields of
// instructions. If we can represent anything there, move it to the imm
// fields of the BasedUsers. We do this so that it increases the commonality
// of the remaining uses.
unsigned NumPHI = 0;
bool HasAddress = false;
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
// If the user is not in the current loop, this means it is using the exit
// value of the IV. Do not put anything in the base, make sure it's all in
// the immediate field to allow as much factoring as possible.
if (!L->contains(UsersToProcess[i].Inst->getParent())) {
UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm,
UsersToProcess[i].Base);
UsersToProcess[i].Base =
SE->getIntegerSCEV(0, UsersToProcess[i].Base->getType());
} else {
// Not all uses are outside the loop.
AllUsesAreOutsideLoop = false;
// Addressing modes can be folded into loads and stores. Be careful that
// the store is through the expression, not of the expression though.
bool isPHI = false;
bool isAddress = isAddressUse(UsersToProcess[i].Inst,
UsersToProcess[i].OperandValToReplace);
if (isa<PHINode>(UsersToProcess[i].Inst)) {
isPHI = true;
++NumPHI;
if (isAddress)
HasAddress = true;
// If this use isn't an address, then not all uses are addresses.
MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base,
UsersToProcess[i].Imm, isAddress, L, SE);
}
}
// If one of the use is a PHI node and all other uses are addresses, still
// allow iv reuse. Essentially we are trading one constant multiplication
// for one fewer iv.
if (NumPHI > 1)
AllUsesAreAddresses = false;
// There are no in-loop address uses.
if (AllUsesAreAddresses && (!HasAddress && !AllUsesAreOutsideLoop))
AllUsesAreAddresses = false;
return CommonExprs;
}
/// ShouldUseFullStrengthReductionMode - Test whether full strength-reduction
/// is valid and profitable for the given set of users of a stride. In
/// full strength-reduction mode, all addresses at the current stride are
/// strength-reduced all the way down to pointer arithmetic.
///
bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode(
const std::vector<BasedUser> &UsersToProcess,
const Loop *L,
bool AllUsesAreAddresses,
if (!EnableFullLSRMode)
return false;
// The heuristics below aim to avoid increasing register pressure, but
// fully strength-reducing all the addresses increases the number of
// add instructions, so don't do this when optimizing for size.
// TODO: If the loop is large, the savings due to simpler addresses
// may oughtweight the costs of the extra increment instructions.
if (L->getHeader()->getParent()->hasFnAttr(Attribute::OptimizeForSize))
return false;
// TODO: For now, don't do full strength reduction if there could
// potentially be greater-stride multiples of the current stride
// which could reuse the current stride IV.
if (IU->StrideOrder.back() != Stride)
return false;
// Iterate through the uses to find conditions that automatically rule out
// full-lsr mode.
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ) {
const SCEV *Base = UsersToProcess[i].Base;
const SCEV *Imm = UsersToProcess[i].Imm;
// If any users have a loop-variant component, they can't be fully
// strength-reduced.
if (Imm && !Imm->isLoopInvariant(L))
return false;
// If there are to users with the same base and the difference between
// the two Imm values can't be folded into the address, full
// strength reduction would increase register pressure.
do {
const SCEV *CurImm = UsersToProcess[i].Imm;
if ((CurImm || Imm) && CurImm != Imm) {
if (!CurImm) CurImm = SE->getIntegerSCEV(0, Stride->getType());
if (!Imm) Imm = SE->getIntegerSCEV(0, Stride->getType());
const Instruction *Inst = UsersToProcess[i].Inst;
const Type *AccessTy = getAccessType(Inst);
const SCEV* Diff = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm);
if (!Diff->isZero() &&
(!AllUsesAreAddresses ||
!fitsInAddressMode(Diff, AccessTy, TLI, /*HasBaseReg=*/true)))
return false;
}
} while (++i != e && Base == UsersToProcess[i].Base);
}
// If there's exactly one user in this stride, fully strength-reducing it
// won't increase register pressure. If it's starting from a non-zero base,
// it'll be simpler this way.
if (UsersToProcess.size() == 1 && !UsersToProcess[0].Base->isZero())
return true;
// Otherwise, if there are any users in this stride that don't require
// a register for their base, full strength-reduction will increase
// register pressure.
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
if (UsersToProcess[i].Base->isZero())
return false;
// Otherwise, go for it.
return true;
}
/// InsertAffinePhi Create and insert a PHI node for an induction variable
/// with the specified start and step values in the specified loop.
///
/// If NegateStride is true, the stride should be negated by using a
/// subtract instead of an add.
///
/// Return the created phi node.
static PHINode *InsertAffinePhi(const SCEV* Start, const SCEV* Step,
Evan Cheng
committed
Instruction *IVIncInsertPt,
SCEVExpander &Rewriter) {
assert(Start->isLoopInvariant(L) && "New PHI start is not loop invariant!");
assert(Step->isLoopInvariant(L) && "New PHI stride is not loop invariant!");
BasicBlock *Header = L->getHeader();
BasicBlock *Preheader = L->getLoopPreheader();
BasicBlock *LatchBlock = L->getLoopLatch();
const Type *Ty = Start->getType();
Ty = Rewriter.SE.getEffectiveSCEVType(Ty);
PHINode *PN = PHINode::Create(Ty, "lsr.iv", Header->begin());
PN->addIncoming(Rewriter.expandCodeFor(Start, Ty, Preheader->getTerminator()),
Preheader);
// If the stride is negative, insert a sub instead of an add for the
// increment.
bool isNegative = isNonConstantNegative(Step);
if (isNegative)
IncAmount = Rewriter.SE.getNegativeSCEV(Step);
// Insert an add instruction right before the terminator corresponding
Evan Cheng
committed
// to the back-edge or just before the only use. The location is determined
// by the caller and passed in as IVIncInsertPt.
Value *StepV = Rewriter.expandCodeFor(IncAmount, Ty,
Preheader->getTerminator());
Instruction *IncV;
if (isNegative) {
IncV = BinaryOperator::CreateSub(PN, StepV, "lsr.iv.next",
Evan Cheng
committed
IVIncInsertPt);
} else {
IncV = BinaryOperator::CreateAdd(PN, StepV, "lsr.iv.next",
Evan Cheng
committed
IVIncInsertPt);
}
if (!isa<ConstantInt>(StepV)) ++NumVariable;
PN->addIncoming(IncV, LatchBlock);
++NumInserted;
return PN;
}
static void SortUsersToProcess(std::vector<BasedUser> &UsersToProcess) {
// We want to emit code for users inside the loop first. To do this, we
// rearrange BasedUser so that the entries at the end have
// isUseOfPostIncrementedValue = false, because we pop off the end of the
// vector (so we handle them first).
std::partition(UsersToProcess.begin(), UsersToProcess.end(),
PartitionByIsUseOfPostIncrementedValue);
// Sort this by base, so that things with the same base are handled
// together. By partitioning first and stable-sorting later, we are
// guaranteed that within each base we will pop off users from within the
// loop before users outside of the loop with a particular base.
//
// We would like to use stable_sort here, but we can't. The problem is that
// const SCEV*'s don't have a deterministic ordering w.r.t to each other, so
// we don't have anything to do a '<' comparison on. Because we think the
// number of uses is small, do a horrible bubble sort which just relies on
// ==.
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
// Get a base value.
// Compact everything with this base to be consecutive with this one.
for (unsigned j = i+1; j != e; ++j) {
if (UsersToProcess[j].Base == Base) {
std::swap(UsersToProcess[i+1], UsersToProcess[j]);
++i;
}
}
}
}
/// PrepareToStrengthReduceFully - Prepare to fully strength-reduce
/// UsersToProcess, meaning lowering addresses all the way down to direct
/// pointer arithmetic.
///
void
LoopStrengthReduce::PrepareToStrengthReduceFully(
std::vector<BasedUser> &UsersToProcess,
const SCEV* Stride,
const SCEV* CommonExprs,
const Loop *L,
SCEVExpander &PreheaderRewriter) {
DOUT << " Fully reducing all users\n";
// Rewrite the UsersToProcess records, creating a separate PHI for each
// unique Base value.
Evan Cheng
committed
Instruction *IVIncInsertPt = L->getLoopLatch()->getTerminator();
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ) {
// TODO: The uses are grouped by base, but not sorted. We arbitrarily
// pick the first Imm value here to start with, and adjust it for the
// other uses.
const SCEV* Imm = UsersToProcess[i].Imm;
const SCEV* Base = UsersToProcess[i].Base;
const SCEV* Start = SE->getAddExpr(CommonExprs, Base, Imm);
Evan Cheng
committed
PHINode *Phi = InsertAffinePhi(Start, Stride, IVIncInsertPt, L,
PreheaderRewriter);
// Loop over all the users with the same base.
do {
UsersToProcess[i].Base = SE->getIntegerSCEV(0, Stride->getType());
UsersToProcess[i].Imm = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm);
UsersToProcess[i].Phi = Phi;
assert(UsersToProcess[i].Imm->isLoopInvariant(L) &&
"ShouldUseFullStrengthReductionMode should reject this!");
} while (++i != e && Base == UsersToProcess[i].Base);
}
}
Evan Cheng
committed
/// FindIVIncInsertPt - Return the location to insert the increment instruction.
/// If the only use if a use of postinc value, (must be the loop termination
/// condition), then insert it just before the use.
static Instruction *FindIVIncInsertPt(std::vector<BasedUser> &UsersToProcess,
const Loop *L) {
if (UsersToProcess.size() == 1 &&
UsersToProcess[0].isUseOfPostIncrementedValue &&
L->contains(UsersToProcess[0].Inst->getParent()))
return UsersToProcess[0].Inst;
return L->getLoopLatch()->getTerminator();
}
/// PrepareToStrengthReduceWithNewPhi - Insert a new induction variable for the
/// given users to share.
///
void
LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi(
std::vector<BasedUser> &UsersToProcess,
const SCEV* Stride,
const SCEV* CommonExprs,
Value *CommonBaseV,
Evan Cheng
committed
Instruction *IVIncInsertPt,
const Loop *L,
SCEVExpander &PreheaderRewriter) {
DOUT << " Inserting new PHI:\n";
PHINode *Phi = InsertAffinePhi(SE->getUnknown(CommonBaseV),
Evan Cheng
committed
Stride, IVIncInsertPt, L,
PreheaderRewriter);
// Remember this in case a later stride is multiple of this.
IVsByStride[Stride].addIV(Stride, CommonExprs, Phi);
// All the users will share this new IV.
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
UsersToProcess[i].Phi = Phi;
DOUT << " IV=";
DEBUG(WriteAsOperand(*DOUT, Phi, /*PrintType=*/false));
DOUT << "\n";
}
Evan Cheng
committed
/// PrepareToStrengthReduceFromSmallerStride - Prepare for the given users to
/// reuse an induction variable with a stride that is a factor of the current
/// induction variable.
///
void
LoopStrengthReduce::PrepareToStrengthReduceFromSmallerStride(
std::vector<BasedUser> &UsersToProcess,
Value *CommonBaseV,
const IVExpr &ReuseIV,
Instruction *PreInsertPt) {
DOUT << " Rewriting in terms of existing IV of STRIDE " << *ReuseIV.Stride
<< " and BASE " << *ReuseIV.Base << "\n";
// All the users will share the reused IV.
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
UsersToProcess[i].Phi = ReuseIV.PHI;
Constant *C = dyn_cast<Constant>(CommonBaseV);
if (C &&
(!C->isNullValue() &&
!fitsInAddressMode(SE->getUnknown(CommonBaseV), CommonBaseV->getType(),
TLI, false)))
// We want the common base emitted into the preheader! This is just
// using cast as a copy so BitCast (no-op cast) is appropriate
CommonBaseV = new BitCastInst(CommonBaseV, CommonBaseV->getType(),
"commonbase", PreInsertPt);
}
static bool IsImmFoldedIntoAddrMode(GlobalValue *GV, int64_t Offset,
const Type *AccessTy,
std::vector<BasedUser> &UsersToProcess,
const TargetLowering *TLI) {
SmallVector<Instruction*, 16> AddrModeInsts;
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
if (UsersToProcess[i].isUseOfPostIncrementedValue)
continue;
ExtAddrMode AddrMode =
AddressingModeMatcher::Match(UsersToProcess[i].OperandValToReplace,
AccessTy, UsersToProcess[i].Inst,
AddrModeInsts, *TLI);
if (GV && GV != AddrMode.BaseGV)
return false;
if (Offset && !AddrMode.BaseOffs)
// FIXME: How to accurate check it's immediate offset is folded.
return false;
AddrModeInsts.clear();
}
return true;
}
/// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single
/// stride of IV. All of the users may have different starting values, and this
/// may not be the only stride.
void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
IVUsersOfOneStride &Uses,
// If all the users are moved to another stride, then there is nothing to do.
if (Uses.Users.empty())
return;
// Keep track if every use in UsersToProcess is an address. If they all are,
// we may be able to rewrite the entire collection of them in terms of a
// smaller-stride IV.
bool AllUsesAreAddresses = true;
// Keep track if every use of a single stride is outside the loop. If so,
// we want to be more aggressive about reusing a smaller-stride IV; a
// multiply outside the loop is better than another IV inside. Well, usually.
bool AllUsesAreOutsideLoop = true;
// Transform our list of users and offsets to a bit more complex table. In
// this new vector, each 'BasedUser' contains 'Base' the base of the
// strided accessas well as the old information from Uses. We progressively
// move information from the Base field to the Imm field, until we eventually
// have the full access expression to rewrite the use.
std::vector<BasedUser> UsersToProcess;
const SCEV* CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses,
AllUsesAreOutsideLoop,
UsersToProcess);
// Sort the UsersToProcess array so that users with common bases are
// next to each other.
SortUsersToProcess(UsersToProcess);
// If we managed to find some expressions in common, we'll need to carry
// their value in a register and add it in for each use. This will take up
// a register operand, which potentially restricts what stride values are
// valid.
bool HaveCommonExprs = !CommonExprs->isZero();
const Type *ReplacedTy = CommonExprs->getType();
// If all uses are addresses, consider sinking the immediate part of the
// common expression back into uses if they can fit in the immediate fields.
if (TLI && HaveCommonExprs && AllUsesAreAddresses) {
const SCEV* NewCommon = CommonExprs;
const SCEV* Imm = SE->getIntegerSCEV(0, ReplacedTy);
MoveImmediateValues(TLI, Type::VoidTy, NewCommon, Imm, true, L, SE);
if (!Imm->isZero()) {
bool DoSink = true;
// If the immediate part of the common expression is a GV, check if it's
// possible to fold it into the target addressing mode.
GlobalValue *GV = 0;
if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(Imm))
GV = dyn_cast<GlobalValue>(SU->getValue());
int64_t Offset = 0;
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Imm))
Offset = SC->getValue()->getSExtValue();
if (GV || Offset)
// Pass VoidTy as the AccessTy to be conservative, because
// there could be multiple access types among all the uses.
DoSink = IsImmFoldedIntoAddrMode(GV, Offset, Type::VoidTy,
UsersToProcess, TLI);
if (DoSink) {
DOUT << " Sinking " << *Imm << " back down into uses\n";
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, Imm);
CommonExprs = NewCommon;
HaveCommonExprs = !CommonExprs->isZero();
++NumImmSunk;
}
}
}
// Now that we know what we need to do, insert the PHI node itself.
//
DOUT << "LSR: Examining IVs of TYPE " << *ReplacedTy << " of STRIDE "
<< *Stride << ":\n"
<< " Common base: " << *CommonExprs << "\n";
SCEVExpander Rewriter(*SE);
SCEVExpander PreheaderRewriter(*SE);
BasicBlock *Preheader = L->getLoopPreheader();
Instruction *PreInsertPt = Preheader->getTerminator();
BasicBlock *LatchBlock = L->getLoopLatch();
Evan Cheng
committed
Instruction *IVIncInsertPt = LatchBlock->getTerminator();
Value *CommonBaseV = Context->getNullValue(ReplacedTy);
const SCEV* RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy);
IVExpr ReuseIV(SE->getIntegerSCEV(0, Type::Int32Ty),
SE->getIntegerSCEV(0, Type::Int32Ty),
0);
/// Choose a strength-reduction strategy and prepare for it by creating
/// the necessary PHIs and adjusting the bookkeeping.
if (ShouldUseFullStrengthReductionMode(UsersToProcess, L,
AllUsesAreAddresses, Stride)) {
PrepareToStrengthReduceFully(UsersToProcess, Stride, CommonExprs, L,
PreheaderRewriter);
} else {
// Emit the initial base value into the loop preheader.
CommonBaseV = PreheaderRewriter.expandCodeFor(CommonExprs, ReplacedTy,
PreInsertPt);
// If all uses are addresses, check if it is possible to reuse an IV. The
// new IV must have a stride that is a multiple of the old stride; the
// multiple must be a number that can be encoded in the scale field of the
// target addressing mode; and we must have a valid instruction after this
// substitution, including the immediate field, if any.
RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
AllUsesAreOutsideLoop,
Stride, ReuseIV, ReplacedTy,
UsersToProcess);
Evan Cheng
committed
if (!RewriteFactor->isZero())
PrepareToStrengthReduceFromSmallerStride(UsersToProcess, CommonBaseV,
ReuseIV, PreInsertPt);
Evan Cheng
committed
else {
IVIncInsertPt = FindIVIncInsertPt(UsersToProcess, L);
PrepareToStrengthReduceWithNewPhi(UsersToProcess, Stride, CommonExprs,
CommonBaseV, IVIncInsertPt,
L, PreheaderRewriter);
}
// Process all the users now, replacing their strided uses with
// strength-reduced forms. This outer loop handles all bases, the inner
while (!UsersToProcess.empty()) {
Instruction *Inst = UsersToProcess.back().Inst;
// Emit the code for Base into the preheader.
Value *BaseV = 0;
if (!Base->isZero()) {
BaseV = PreheaderRewriter.expandCodeFor(Base, 0, PreInsertPt);
DOUT << " INSERTING code for BASE = " << *Base << ":";
if (BaseV->hasName())
DOUT << " Result value name = %" << BaseV->getNameStr();
DOUT << "\n";
// If BaseV is a non-zero constant, make sure that it gets inserted into
// the preheader, instead of being forward substituted into the uses. We
// do this by forcing a BitCast (noop cast) to be inserted into the
// preheader in this case.
if (!fitsInAddressMode(Base, getAccessType(Inst), TLI, false) &&
!isa<Instruction>(BaseV)) {
// We want this constant emitted into the preheader! This is just
// using cast as a copy so BitCast (no-op cast) is appropriate
BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",
// Emit the code to add the immediate offset to the Phi value, just before
// the instructions that we identified as using this stride and base.
Chris Lattner
committed
do {
Chris Lattner
committed
BasedUser &User = UsersToProcess.back();
Evan Cheng
committed
DOUT << " Examining ";
if (User.isUseOfPostIncrementedValue)
DOUT << "postinc";
else
DOUT << "preinc";
DOUT << " use ";
DEBUG(WriteAsOperand(*DOUT, UsersToProcess.back().OperandValToReplace,
/*PrintType=*/false));
DOUT << " in Inst: " << *(User.Inst);
// If this instruction wants to use the post-incremented value, move it
// after the post-inc and use its value instead of the PHI.
Value *RewriteOp = User.Phi;
if (User.isUseOfPostIncrementedValue) {
RewriteOp = User.Phi->getIncomingValueForBlock(LatchBlock);
// If this user is in the loop, make sure it is the last thing in the
Evan Cheng
committed
// loop to ensure it is dominated by the increment. In case it's the
// only use of the iv, the increment instruction is already before the
// use.
if (L->contains(User.Inst->getParent()) && User.Inst != IVIncInsertPt)
User.Inst->moveBefore(IVIncInsertPt);
}
const SCEV* RewriteExpr = SE->getUnknown(RewriteOp);
if (SE->getEffectiveSCEVType(RewriteOp->getType()) !=
SE->getEffectiveSCEVType(ReplacedTy)) {
assert(SE->getTypeSizeInBits(RewriteOp->getType()) >
SE->getTypeSizeInBits(ReplacedTy) &&
"Unexpected widening cast!");
RewriteExpr = SE->getTruncateExpr(RewriteExpr, ReplacedTy);
}
// If we had to insert new instructions for RewriteOp, we have to
// consider that they may not have been able to end up immediately
// next to RewriteOp, because non-PHI instructions may never precede
// PHI instructions in a block. In this case, remember where the last
// instruction was inserted so that if we're replacing a different
// PHI node, we can use the later point to expand the final
// RewriteExpr.
Instruction *NewBasePt = dyn_cast<Instruction>(RewriteOp);
if (RewriteOp == User.Phi) NewBasePt = 0;
// Clear the SCEVExpander's expression map so that we are guaranteed
// to have the code emitted where we expect it.
Rewriter.clear();
// If we are reusing the iv, then it must be multiplied by a constant
// factor to take advantage of the addressing mode scale component.
if (!RewriteFactor->isZero()) {
// If we're reusing an IV with a nonzero base (currently this happens
// only when all reuses are outside the loop) subtract that base here.
// The base has been used to initialize the PHI node but we don't want
// it here.
if (SE->getEffectiveSCEVType(RewriteExpr->getType()) !=
SE->getEffectiveSCEVType(ReuseIV.Base->getType())) {
// It's possible the original IV is a larger type than the new IV,
// in which case we have to truncate the Base. We checked in
// RequiresTypeConversion that this is valid.
assert(SE->getTypeSizeInBits(RewriteExpr->getType()) <
SE->getTypeSizeInBits(ReuseIV.Base->getType()) &&
"Unexpected lengthening conversion!");
typedBase = SE->getTruncateExpr(ReuseIV.Base,
RewriteExpr->getType());
}
RewriteExpr = SE->getMinusSCEV(RewriteExpr, typedBase);
}
// Multiply old variable, with base removed, by new scale factor.
RewriteExpr = SE->getMulExpr(RewriteFactor,
RewriteExpr);
// The common base is emitted in the loop preheader. But since we
// are reusing an IV, it has not been used to initialize the PHI node.
// Add it to the expression used to rewrite the uses.
// When this use is outside the loop, we earlier subtracted the
// common base, and are adding it back here. Use the same expression
// as before, rather than CommonBaseV, so DAGCombiner will zap it.
if (!CommonExprs->isZero()) {
if (L->contains(User.Inst->getParent()))
RewriteExpr = SE->getAddExpr(RewriteExpr,
SE->getUnknown(CommonBaseV));
else
RewriteExpr = SE->getAddExpr(RewriteExpr, CommonExprs);
}
}
// Now that we know what we need to do, insert code before User for the
// immediate and any loop-variant expressions.
if (BaseV)
// Add BaseV to the PHI value if needed.
RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV));
User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt,
Rewriter, L, this, *LI,
Evan Cheng
committed
DeadInsts);
// Mark old value we replaced as possibly dead, so that it is eliminated
// if we just replaced the last use of that value.
DeadInsts.push_back(User.OperandValToReplace);
Chris Lattner
committed
UsersToProcess.pop_back();
Chris Lattner
committed
// If there are any more users to process with the same base, process them
// now. We sorted by base above, so we just have to check the last elt.
Chris Lattner
committed
} while (!UsersToProcess.empty() && UsersToProcess.back().Base == Base);
// TODO: Next, find out which base index is the most common, pull it out.
}
// IMPORTANT TODO: Figure out how to partition the IV's with this stride, but
// different starting values, into different PHIs.
}
/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
/// set the IV user and stride information and return true, otherwise return
/// false.
bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
for (unsigned Stride = 0, e = IU->StrideOrder.size();
Stride != e && !CondUse; ++Stride) {
std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI =
IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
E = SI->second->Users.end(); UI != E; ++UI)
if (UI->getUser() == Cond) {
// NOTE: we could handle setcc instructions with multiple uses here, but
// InstCombine does it as well for simple uses, it's not clear that it
// occurs enough in real life to handle.
CondUse = UI;
CondStride = &SI->first;
return true;
}
}
return false;
}
namespace {
// Constant strides come first which in turns are sorted by their absolute
// values. If absolute values are the same, then positive strides comes first.
// e.g.
// 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X
struct StrideCompare {
const ScalarEvolution *SE;
explicit StrideCompare(const ScalarEvolution *se) : SE(se) {}
bool operator()(const SCEV* const &LHS, const SCEV* const &RHS) {
const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);
const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
if (LHSC && RHSC) {
int64_t LV = LHSC->getValue()->getSExtValue();
int64_t RV = RHSC->getValue()->getSExtValue();
uint64_t ALV = (LV < 0) ? -LV : LV;
uint64_t ARV = (RV < 0) ? -RV : RV;
if (ALV == ARV) {
if (LV != RV)
return LV > RV;
} else {
return ALV < ARV;
}
// If it's the same value but different type, sort by bit width so
// that we emit larger induction variables before smaller
// ones, letting the smaller be re-written in terms of larger ones.
return SE->getTypeSizeInBits(RHS->getType()) <
SE->getTypeSizeInBits(LHS->getType());
}
return LHSC && !RHSC;
}
};
}
/// ChangeCompareStride - If a loop termination compare instruction is the
/// only use of its stride, and the compaison is against a constant value,
/// try eliminate the stride by moving the compare instruction to another
/// stride and change its constant operand accordingly. e.g.
///
/// loop:
/// ...
/// v1 = v1 + 3
/// v2 = v2 + 1
/// if (v2 < 10) goto loop
/// =>
/// loop:
/// ...
/// v1 = v1 + 3
/// if (v1 < 30) goto loop
ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
Evan Cheng
committed
IVStrideUse* &CondUse,
// If there's only one stride in the loop, there's nothing to do here.
if (IU->StrideOrder.size() < 2)
return Cond;
// If there are other users of the condition's stride, don't bother
// trying to change the condition because the stride will still
// remain.
std::map<const SCEV*, IVUsersOfOneStride *>::iterator I =
IU->IVUsesByStride.find(*CondStride);
if (I == IU->IVUsesByStride.end() ||
I->second->Users.size() != 1)
return Cond;
// Only handle constant strides for now.
const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride);
if (!SC) return Cond;
ICmpInst::Predicate Predicate = Cond->getPredicate();
int64_t CmpSSInt = SC->getValue()->getSExtValue();
unsigned BitWidth = SE->getTypeSizeInBits((*CondStride)->getType());
const Type *CmpTy = Cond->getOperand(0)->getType();
unsigned TyBits = SE->getTypeSizeInBits(CmpTy);
Value *NewCmpLHS = NULL;
Value *NewCmpRHS = NULL;
int64_t Scale = 1;
const SCEV* NewOffset = SE->getIntegerSCEV(0, CmpTy);
if (ConstantInt *C = dyn_cast<ConstantInt>(Cond->getOperand(1))) {
int64_t CmpVal = C->getValue().getSExtValue();
// Check stride constant and the comparision constant signs to detect
// overflow.
if ((CmpVal & SignBit) != (CmpSSInt & SignBit))
return Cond;
// Look for a suitable stride / iv as replacement.
for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI =
IU->IVUsesByStride.find(IU->StrideOrder[i]);
if (!isa<SCEVConstant>(SI->first))
continue;
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SSInt == CmpSSInt ||
abs64(SSInt) < abs64(CmpSSInt) ||
(SSInt % CmpSSInt) != 0)
Scale = SSInt / CmpSSInt;
int64_t NewCmpVal = CmpVal * Scale;
APInt Mul = APInt(BitWidth*2, CmpVal, true);
Mul = Mul * APInt(BitWidth*2, Scale, true);
// Check for overflow.
// Check for overflow in the stride's type too.
if (!Mul.isSignedIntN(SE->getTypeSizeInBits(SI->first->getType())))
continue;
// Watch out for overflow.
if (ICmpInst::isSignedPredicate(Predicate) &&
(CmpVal & SignBit) != (NewCmpVal & SignBit))
continue;
if (NewCmpVal == CmpVal)
continue;
// Pick the best iv to use trying to avoid a cast.
NewCmpLHS = NULL;
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
E = SI->second->Users.end(); UI != E; ++UI) {
Value *Op = UI->getOperandValToReplace();
// If the IVStrideUse implies a cast, check for an actual cast which
// can be used to find the original IV expression.
if (SE->getEffectiveSCEVType(Op->getType()) !=
SE->getEffectiveSCEVType(SI->first->getType())) {
CastInst *CI = dyn_cast<CastInst>(Op);
// If it's not a simple cast, it's complicated.
if (!CI)
continue;
// If it's a cast from a type other than the stride type,
// it's complicated.
if (CI->getOperand(0)->getType() != SI->first->getType())
continue;
// Ok, we found the IV expression in the stride's type.
Op = CI->getOperand(0);
}
NewCmpLHS = Op;
if (NewCmpLHS->getType() == CmpTy)
break;
}
if (!NewCmpLHS)
continue;
NewCmpTy = NewCmpLHS->getType();
NewTyBits = SE->getTypeSizeInBits(NewCmpTy);
const Type *NewCmpIntTy = Context->getIntegerType(NewTyBits);
if (RequiresTypeConversion(NewCmpTy, CmpTy)) {
// Check if it is possible to rewrite it using
// an iv / stride of a smaller integer type.
unsigned Bits = NewTyBits;
if (ICmpInst::isSignedPredicate(Predicate))
--Bits;
uint64_t Mask = (1ULL << Bits) - 1;
if (((uint64_t)NewCmpVal & Mask) != (uint64_t)NewCmpVal)
continue;
}
// Don't rewrite if use offset is non-constant and the new type is
// of a different type.
// FIXME: too conservative?
if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->getOffset()))
bool AllUsesAreAddresses = true;
bool AllUsesAreOutsideLoop = true;
std::vector<BasedUser> UsersToProcess;
const SCEV* CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
AllUsesAreAddresses,
AllUsesAreOutsideLoop,
UsersToProcess);
// Avoid rewriting the compare instruction with an iv of new stride
// if it's likely the new stride uses will be rewritten using the
// stride of the compare instruction.
if (AllUsesAreAddresses &&
Evan Cheng
committed
ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
continue;
// Avoid rewriting the compare instruction with an iv which has
// implicit extension or truncation built into it.
// TODO: This is over-conservative.
if (SE->getTypeSizeInBits(CondUse->getOffset()->getType()) != TyBits)
continue;
// If scale is negative, use swapped predicate unless it's testing
// for equality.
if (Scale < 0 && !Cond->isEquality())
Predicate = ICmpInst::getSwappedPredicate(Predicate);
NewStride = &IU->StrideOrder[i];
if (!isa<PointerType>(NewCmpTy))
NewCmpRHS = Context->getConstantInt(NewCmpTy, NewCmpVal);
Constant *CI = Context->getConstantInt(NewCmpIntTy, NewCmpVal);
NewCmpRHS = Context->getConstantExprIntToPtr(CI, NewCmpTy);
}
NewOffset = TyBits == NewTyBits
? SE->getMulExpr(CondUse->getOffset(),
SE->getConstant(CmpTy, Scale))
: SE->getConstant(NewCmpIntTy,
cast<SCEVConstant>(CondUse->getOffset())->getValue()
->getSExtValue()*Scale);