Newer
Older
NewStride != e; ++NewStride) {
std::map<const SCEV *, IVsOfOneStride>::iterator SI =
IVsByStride.find(IU->StrideOrder[NewStride]);
if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
continue;
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SI->first != Stride && SSInt != 1)
continue;
for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
IE = SI->second.IVs.end(); II != IE; ++II)
// Accept nonzero base here.
// Only reuse previous IV if it would not require a type conversion.
if (!RequiresTypeConversion(II->Base->getType(), Ty)) {
IV = *II;
return Stride;
}
}
// Special case, old IV is -1*x and this one is x. Can treat this one as
// -1*old.
for (unsigned NewStride = 0, e = IU->StrideOrder.size();
NewStride != e; ++NewStride) {
std::map<const SCEV *, IVsOfOneStride>::iterator SI =
IVsByStride.find(IU->StrideOrder[NewStride]);
if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(SI->first))
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(ME->getOperand(0)))
if (Stride == ME->getOperand(1) &&
SC->getValue()->getSExtValue() == -1LL)
for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
IE = SI->second.IVs.end(); II != IE; ++II)
// Accept nonzero base here.
// Only reuse previous IV if it would not require type conversion.
if (!RequiresTypeConversion(II->Base->getType(), Ty)) {
IV = *II;
return SE->getIntegerSCEV(-1LL, Stride->getType());
}
}
}
return SE->getIntegerSCEV(0, Stride->getType());
}
/// PartitionByIsUseOfPostIncrementedValue - Simple boolean predicate that
/// returns true if Val's isUseOfPostIncrementedValue is true.
static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) {
return Val.isUseOfPostIncrementedValue;
}
/// isNonConstantNegative - Return true if the specified scev is negated, but
Chris Lattner
committed
/// not a constant.
static bool isNonConstantNegative(const SCEV *Expr) {
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr);
Chris Lattner
committed
if (!Mul) return false;
Chris Lattner
committed
// If there is a constant factor, it will be first.
const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
Chris Lattner
committed
if (!SC) return false;
Chris Lattner
committed
// Return true if the value is negative, this matches things like (-42 * V).
return SC->getValue()->getValue().isNegative();
}
/// CollectIVUsers - Transform our list of users and offsets to a bit more
/// complex table. In this new vector, each 'BasedUser' contains 'Base', the
/// base of the strided accesses, as well as the old information from Uses. We
/// progressively move information from the Base field to the Imm field, until
/// we eventually have the full access expression to rewrite the use.
const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *Stride,
IVUsersOfOneStride &Uses,
Loop *L,
bool &AllUsesAreAddresses,
bool &AllUsesAreOutsideLoop,
std::vector<BasedUser> &UsersToProcess) {
// FIXME: Generalize to non-affine IV's.
if (!Stride->isLoopInvariant(L))
return SE->getIntegerSCEV(0, Stride->getType());
UsersToProcess.reserve(Uses.Users.size());
for (ilist<IVStrideUse>::iterator I = Uses.Users.begin(),
E = Uses.Users.end(); I != E; ++I) {
UsersToProcess.push_back(BasedUser(*I, SE));
// Move any loop variant operands from the offset field to the immediate
// field of the use, so that we don't try to use something before it is
// computed.
MoveLoopVariantsToImmediateField(UsersToProcess.back().Base,
Evan Cheng
committed
UsersToProcess.back().Imm, L, SE);
assert(UsersToProcess.back().Base->isLoopInvariant(L) &&
Chris Lattner
committed
"Base value is not loop invariant!");
// We now have a whole bunch of uses of like-strided induction variables, but
// they might all have different bases. We want to emit one PHI node for this
// stride which we fold as many common expressions (between the IVs) into as
// possible. Start by identifying the common expressions in the base values
// for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find
// "A+B"), emit it to the preheader, then remove the expression from the
// UsersToProcess base values.
RemoveCommonExpressionsFromUseBases(UsersToProcess, SE, L, TLI);
// Next, figure out what we can represent in the immediate fields of
// instructions. If we can represent anything there, move it to the imm
// fields of the BasedUsers. We do this so that it increases the commonality
// of the remaining uses.
unsigned NumPHI = 0;
bool HasAddress = false;
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
// If the user is not in the current loop, this means it is using the exit
// value of the IV. Do not put anything in the base, make sure it's all in
// the immediate field to allow as much factoring as possible.
if (!L->contains(UsersToProcess[i].Inst)) {
UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm,
UsersToProcess[i].Base);
SE->getIntegerSCEV(0, UsersToProcess[i].Base->getType());
} else {
// Not all uses are outside the loop.
// Addressing modes can be folded into loads and stores. Be careful that
// the store is through the expression, not of the expression though.
bool isPHI = false;
bool isAddress = isAddressUse(UsersToProcess[i].Inst,
UsersToProcess[i].OperandValToReplace);
if (isa<PHINode>(UsersToProcess[i].Inst)) {
isPHI = true;
++NumPHI;
if (isAddress)
HasAddress = true;
// If this use isn't an address, then not all uses are addresses.
MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base,
UsersToProcess[i].Imm, isAddress, L, SE);
}
}
// If one of the use is a PHI node and all other uses are addresses, still
// allow iv reuse. Essentially we are trading one constant multiplication
// for one fewer iv.
if (NumPHI > 1)
AllUsesAreAddresses = false;
// There are no in-loop address uses.
if (AllUsesAreAddresses && (!HasAddress && !AllUsesAreOutsideLoop))
AllUsesAreAddresses = false;
return CommonExprs;
}
/// ShouldUseFullStrengthReductionMode - Test whether full strength-reduction
/// is valid and profitable for the given set of users of a stride. In
/// full strength-reduction mode, all addresses at the current stride are
/// strength-reduced all the way down to pointer arithmetic.
///
bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode(
const std::vector<BasedUser> &UsersToProcess,
const Loop *L,
bool AllUsesAreAddresses,
if (!EnableFullLSRMode)
return false;
// The heuristics below aim to avoid increasing register pressure, but
// fully strength-reducing all the addresses increases the number of
// add instructions, so don't do this when optimizing for size.
// TODO: If the loop is large, the savings due to simpler addresses
// may oughtweight the costs of the extra increment instructions.
if (L->getHeader()->getParent()->hasFnAttr(Attribute::OptimizeForSize))
return false;
// TODO: For now, don't do full strength reduction if there could
// potentially be greater-stride multiples of the current stride
// which could reuse the current stride IV.
if (IU->StrideOrder.back() != Stride)
return false;
// Iterate through the uses to find conditions that automatically rule out
// full-lsr mode.
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ) {
const SCEV *Base = UsersToProcess[i].Base;
const SCEV *Imm = UsersToProcess[i].Imm;
// If any users have a loop-variant component, they can't be fully
// strength-reduced.
if (Imm && !Imm->isLoopInvariant(L))
return false;
// If there are to users with the same base and the difference between
// the two Imm values can't be folded into the address, full
// strength reduction would increase register pressure.
do {
const SCEV *CurImm = UsersToProcess[i].Imm;
if ((CurImm || Imm) && CurImm != Imm) {
if (!CurImm) CurImm = SE->getIntegerSCEV(0, Stride->getType());
if (!Imm) Imm = SE->getIntegerSCEV(0, Stride->getType());
const Instruction *Inst = UsersToProcess[i].Inst;
const Type *AccessTy = getAccessType(Inst);
const SCEV *Diff = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm);
if (!Diff->isZero() &&
(!AllUsesAreAddresses ||
!fitsInAddressMode(Diff, AccessTy, TLI, /*HasBaseReg=*/true)))
return false;
}
} while (++i != e && Base == UsersToProcess[i].Base);
}
// If there's exactly one user in this stride, fully strength-reducing it
// won't increase register pressure. If it's starting from a non-zero base,
// it'll be simpler this way.
if (UsersToProcess.size() == 1 && !UsersToProcess[0].Base->isZero())
return true;
// Otherwise, if there are any users in this stride that don't require
// a register for their base, full strength-reduction will increase
// register pressure.
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
if (UsersToProcess[i].Base->isZero())
return false;
// Otherwise, go for it.
return true;
}
/// InsertAffinePhi Create and insert a PHI node for an induction variable
/// with the specified start and step values in the specified loop.
///
/// If NegateStride is true, the stride should be negated by using a
/// subtract instead of an add.
///
/// Return the created phi node.
static PHINode *InsertAffinePhi(const SCEV *Start, const SCEV *Step,
Evan Cheng
committed
Instruction *IVIncInsertPt,
SCEVExpander &Rewriter) {
assert(Start->isLoopInvariant(L) && "New PHI start is not loop invariant!");
assert(Step->isLoopInvariant(L) && "New PHI stride is not loop invariant!");
BasicBlock *Header = L->getHeader();
BasicBlock *Preheader = L->getLoopPreheader();
BasicBlock *LatchBlock = L->getLoopLatch();
const Type *Ty = Start->getType();
Ty = Rewriter.SE.getEffectiveSCEVType(Ty);
PHINode *PN = PHINode::Create(Ty, "lsr.iv", Header->begin());
PN->addIncoming(Rewriter.expandCodeFor(Start, Ty, Preheader->getTerminator()),
Preheader);
// If the stride is negative, insert a sub instead of an add for the
// increment.
bool isNegative = isNonConstantNegative(Step);
if (isNegative)
IncAmount = Rewriter.SE.getNegativeSCEV(Step);
// Insert an add instruction right before the terminator corresponding
Evan Cheng
committed
// to the back-edge or just before the only use. The location is determined
// by the caller and passed in as IVIncInsertPt.
Value *StepV = Rewriter.expandCodeFor(IncAmount, Ty,
Preheader->getTerminator());
Instruction *IncV;
if (isNegative) {
IncV = BinaryOperator::CreateSub(PN, StepV, "lsr.iv.next",
Evan Cheng
committed
IVIncInsertPt);
} else {
IncV = BinaryOperator::CreateAdd(PN, StepV, "lsr.iv.next",
Evan Cheng
committed
IVIncInsertPt);
}
if (!isa<ConstantInt>(StepV)) ++NumVariable;
PN->addIncoming(IncV, LatchBlock);
++NumInserted;
return PN;
}
static void SortUsersToProcess(std::vector<BasedUser> &UsersToProcess) {
// We want to emit code for users inside the loop first. To do this, we
// rearrange BasedUser so that the entries at the end have
// isUseOfPostIncrementedValue = false, because we pop off the end of the
// vector (so we handle them first).
std::partition(UsersToProcess.begin(), UsersToProcess.end(),
PartitionByIsUseOfPostIncrementedValue);
// Sort this by base, so that things with the same base are handled
// together. By partitioning first and stable-sorting later, we are
// guaranteed that within each base we will pop off users from within the
// loop before users outside of the loop with a particular base.
//
// We would like to use stable_sort here, but we can't. The problem is that
// const SCEV *'s don't have a deterministic ordering w.r.t to each other, so
// we don't have anything to do a '<' comparison on. Because we think the
// number of uses is small, do a horrible bubble sort which just relies on
// ==.
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
// Get a base value.
// Compact everything with this base to be consecutive with this one.
for (unsigned j = i+1; j != e; ++j) {
if (UsersToProcess[j].Base == Base) {
std::swap(UsersToProcess[i+1], UsersToProcess[j]);
++i;
}
}
}
}
/// PrepareToStrengthReduceFully - Prepare to fully strength-reduce
/// UsersToProcess, meaning lowering addresses all the way down to direct
/// pointer arithmetic.
///
void
LoopStrengthReduce::PrepareToStrengthReduceFully(
std::vector<BasedUser> &UsersToProcess,
const SCEV *Stride,
const SCEV *CommonExprs,
const Loop *L,
SCEVExpander &PreheaderRewriter) {
DEBUG(errs() << " Fully reducing all users\n");
// Rewrite the UsersToProcess records, creating a separate PHI for each
// unique Base value.
Evan Cheng
committed
Instruction *IVIncInsertPt = L->getLoopLatch()->getTerminator();
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ) {
// TODO: The uses are grouped by base, but not sorted. We arbitrarily
// pick the first Imm value here to start with, and adjust it for the
// other uses.
const SCEV *Imm = UsersToProcess[i].Imm;
const SCEV *Base = UsersToProcess[i].Base;
const SCEV *Start = SE->getAddExpr(CommonExprs, Base, Imm);
Evan Cheng
committed
PHINode *Phi = InsertAffinePhi(Start, Stride, IVIncInsertPt, L,
PreheaderRewriter);
// Loop over all the users with the same base.
do {
UsersToProcess[i].Base = SE->getIntegerSCEV(0, Stride->getType());
UsersToProcess[i].Imm = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm);
UsersToProcess[i].Phi = Phi;
assert(UsersToProcess[i].Imm->isLoopInvariant(L) &&
"ShouldUseFullStrengthReductionMode should reject this!");
} while (++i != e && Base == UsersToProcess[i].Base);
}
}
Evan Cheng
committed
/// FindIVIncInsertPt - Return the location to insert the increment instruction.
/// If the only use if a use of postinc value, (must be the loop termination
/// condition), then insert it just before the use.
static Instruction *FindIVIncInsertPt(std::vector<BasedUser> &UsersToProcess,
const Loop *L) {
if (UsersToProcess.size() == 1 &&
UsersToProcess[0].isUseOfPostIncrementedValue &&
L->contains(UsersToProcess[0].Inst))
Evan Cheng
committed
return UsersToProcess[0].Inst;
return L->getLoopLatch()->getTerminator();
}
/// PrepareToStrengthReduceWithNewPhi - Insert a new induction variable for the
/// given users to share.
///
void
LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi(
std::vector<BasedUser> &UsersToProcess,
const SCEV *Stride,
const SCEV *CommonExprs,
Value *CommonBaseV,
Evan Cheng
committed
Instruction *IVIncInsertPt,
const Loop *L,
SCEVExpander &PreheaderRewriter) {
DEBUG(errs() << " Inserting new PHI:\n");
PHINode *Phi = InsertAffinePhi(SE->getUnknown(CommonBaseV),
Evan Cheng
committed
Stride, IVIncInsertPt, L,
PreheaderRewriter);
// Remember this in case a later stride is multiple of this.
IVsByStride[Stride].addIV(Stride, CommonExprs, Phi);
// All the users will share this new IV.
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
UsersToProcess[i].Phi = Phi;
DEBUG(errs() << " IV=");
DEBUG(WriteAsOperand(errs(), Phi, /*PrintType=*/false));
DEBUG(errs() << "\n");
Evan Cheng
committed
/// PrepareToStrengthReduceFromSmallerStride - Prepare for the given users to
/// reuse an induction variable with a stride that is a factor of the current
/// induction variable.
///
void
LoopStrengthReduce::PrepareToStrengthReduceFromSmallerStride(
std::vector<BasedUser> &UsersToProcess,
Value *CommonBaseV,
const IVExpr &ReuseIV,
Instruction *PreInsertPt) {
DEBUG(errs() << " Rewriting in terms of existing IV of STRIDE "
<< *ReuseIV.Stride << " and BASE " << *ReuseIV.Base << "\n");
// All the users will share the reused IV.
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
UsersToProcess[i].Phi = ReuseIV.PHI;
Constant *C = dyn_cast<Constant>(CommonBaseV);
if (C &&
(!C->isNullValue() &&
!fitsInAddressMode(SE->getUnknown(CommonBaseV), CommonBaseV->getType(),
TLI, false)))
// We want the common base emitted into the preheader! This is just
// using cast as a copy so BitCast (no-op cast) is appropriate
CommonBaseV = new BitCastInst(CommonBaseV, CommonBaseV->getType(),
"commonbase", PreInsertPt);
}
static bool IsImmFoldedIntoAddrMode(GlobalValue *GV, int64_t Offset,
const Type *AccessTy,
std::vector<BasedUser> &UsersToProcess,
const TargetLowering *TLI) {
SmallVector<Instruction*, 16> AddrModeInsts;
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
if (UsersToProcess[i].isUseOfPostIncrementedValue)
continue;
ExtAddrMode AddrMode =
AddressingModeMatcher::Match(UsersToProcess[i].OperandValToReplace,
AccessTy, UsersToProcess[i].Inst,
AddrModeInsts, *TLI);
if (GV && GV != AddrMode.BaseGV)
return false;
if (Offset && !AddrMode.BaseOffs)
// FIXME: How to accurate check it's immediate offset is folded.
return false;
AddrModeInsts.clear();
}
return true;
}
Evan Cheng
committed
/// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a single
/// stride of IV. All of the users may have different starting values, and this
/// may not be the only stride.
LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *Stride,
// If all the users are moved to another stride, then there is nothing to do.
if (Uses.Users.empty())
return;
// Keep track if every use in UsersToProcess is an address. If they all are,
// we may be able to rewrite the entire collection of them in terms of a
// smaller-stride IV.
bool AllUsesAreAddresses = true;
// Keep track if every use of a single stride is outside the loop. If so,
// we want to be more aggressive about reusing a smaller-stride IV; a
// multiply outside the loop is better than another IV inside. Well, usually.
bool AllUsesAreOutsideLoop = true;
// Transform our list of users and offsets to a bit more complex table. In
// this new vector, each 'BasedUser' contains 'Base' the base of the
// strided accessas well as the old information from Uses. We progressively
// move information from the Base field to the Imm field, until we eventually
// have the full access expression to rewrite the use.
std::vector<BasedUser> UsersToProcess;
const SCEV *CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses,
AllUsesAreOutsideLoop,
UsersToProcess);
// Sort the UsersToProcess array so that users with common bases are
// next to each other.
SortUsersToProcess(UsersToProcess);
// If we managed to find some expressions in common, we'll need to carry
// their value in a register and add it in for each use. This will take up
// a register operand, which potentially restricts what stride values are
// valid.
bool HaveCommonExprs = !CommonExprs->isZero();
const Type *ReplacedTy = CommonExprs->getType();
// If all uses are addresses, consider sinking the immediate part of the
// common expression back into uses if they can fit in the immediate fields.
if (TLI && HaveCommonExprs && AllUsesAreAddresses) {
const SCEV *NewCommon = CommonExprs;
const SCEV *Imm = SE->getIntegerSCEV(0, ReplacedTy);
MoveImmediateValues(TLI, Type::getVoidTy(
L->getLoopPreheader()->getContext()),
NewCommon, Imm, true, L, SE);
if (!Imm->isZero()) {
bool DoSink = true;
// If the immediate part of the common expression is a GV, check if it's
// possible to fold it into the target addressing mode.
GlobalValue *GV = 0;
if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(Imm))
GV = dyn_cast<GlobalValue>(SU->getValue());
int64_t Offset = 0;
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Imm))
Offset = SC->getValue()->getSExtValue();
if (GV || Offset)
// Pass VoidTy as the AccessTy to be conservative, because
// there could be multiple access types among all the uses.
DoSink = IsImmFoldedIntoAddrMode(GV, Offset,
Type::getVoidTy(L->getLoopPreheader()->getContext()),
UsersToProcess, TLI);
if (DoSink) {
DEBUG(errs() << " Sinking " << *Imm << " back down into uses\n");
for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, Imm);
CommonExprs = NewCommon;
HaveCommonExprs = !CommonExprs->isZero();
++NumImmSunk;
}
}
}
// Now that we know what we need to do, insert the PHI node itself.
//
DEBUG(errs() << "LSR: Examining IVs of TYPE " << *ReplacedTy << " of STRIDE "
<< *Stride << ":\n"
<< " Common base: " << *CommonExprs << "\n");
SCEVExpander Rewriter(*SE);
SCEVExpander PreheaderRewriter(*SE);
BasicBlock *Preheader = L->getLoopPreheader();
Instruction *PreInsertPt = Preheader->getTerminator();
BasicBlock *LatchBlock = L->getLoopLatch();
Evan Cheng
committed
Instruction *IVIncInsertPt = LatchBlock->getTerminator();
Owen Anderson
committed
Value *CommonBaseV = Constant::getNullValue(ReplacedTy);
const SCEV *RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy);
IVExpr ReuseIV(SE->getIntegerSCEV(0,
Type::getInt32Ty(Preheader->getContext())),
Type::getInt32Ty(Preheader->getContext())),
0);
// Choose a strength-reduction strategy and prepare for it by creating
// the necessary PHIs and adjusting the bookkeeping.
if (ShouldUseFullStrengthReductionMode(UsersToProcess, L,
AllUsesAreAddresses, Stride)) {
PrepareToStrengthReduceFully(UsersToProcess, Stride, CommonExprs, L,
PreheaderRewriter);
} else {
// Emit the initial base value into the loop preheader.
CommonBaseV = PreheaderRewriter.expandCodeFor(CommonExprs, ReplacedTy,
PreInsertPt);
// If all uses are addresses, check if it is possible to reuse an IV. The
// new IV must have a stride that is a multiple of the old stride; the
// multiple must be a number that can be encoded in the scale field of the
// target addressing mode; and we must have a valid instruction after this
// substitution, including the immediate field, if any.
RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
AllUsesAreOutsideLoop,
Stride, ReuseIV, ReplacedTy,
UsersToProcess);
Evan Cheng
committed
if (!RewriteFactor->isZero())
PrepareToStrengthReduceFromSmallerStride(UsersToProcess, CommonBaseV,
ReuseIV, PreInsertPt);
Evan Cheng
committed
else {
IVIncInsertPt = FindIVIncInsertPt(UsersToProcess, L);
PrepareToStrengthReduceWithNewPhi(UsersToProcess, Stride, CommonExprs,
CommonBaseV, IVIncInsertPt,
L, PreheaderRewriter);
}
// Process all the users now, replacing their strided uses with
// strength-reduced forms. This outer loop handles all bases, the inner
while (!UsersToProcess.empty()) {
const SCEV *Base = UsersToProcess.back().Base;
Instruction *Inst = UsersToProcess.back().Inst;
// Emit the code for Base into the preheader.
Value *BaseV = 0;
if (!Base->isZero()) {
BaseV = PreheaderRewriter.expandCodeFor(Base, 0, PreInsertPt);
DEBUG(errs() << " INSERTING code for BASE = " << *Base << ":");
if (BaseV->hasName())
DEBUG(errs() << " Result value name = %" << BaseV->getName());
DEBUG(errs() << "\n");
// If BaseV is a non-zero constant, make sure that it gets inserted into
// the preheader, instead of being forward substituted into the uses. We
// do this by forcing a BitCast (noop cast) to be inserted into the
// preheader in this case.
if (!fitsInAddressMode(Base, getAccessType(Inst), TLI, false) &&
isa<Constant>(BaseV)) {
// We want this constant emitted into the preheader! This is just
// using cast as a copy so BitCast (no-op cast) is appropriate
BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",
// Emit the code to add the immediate offset to the Phi value, just before
// the instructions that we identified as using this stride and base.
Chris Lattner
committed
do {
Chris Lattner
committed
BasedUser &User = UsersToProcess.back();
DEBUG(errs() << " Examining ");
Evan Cheng
committed
if (User.isUseOfPostIncrementedValue)
DEBUG(errs() << "postinc");
Evan Cheng
committed
else
DEBUG(errs() << "preinc");
DEBUG(errs() << " use ");
DEBUG(WriteAsOperand(errs(), UsersToProcess.back().OperandValToReplace,
/*PrintType=*/false));
DEBUG(errs() << " in Inst: " << *User.Inst);
// If this instruction wants to use the post-incremented value, move it
// after the post-inc and use its value instead of the PHI.
Value *RewriteOp = User.Phi;
if (User.isUseOfPostIncrementedValue) {
RewriteOp = User.Phi->getIncomingValueForBlock(LatchBlock);
// If this user is in the loop, make sure it is the last thing in the
Evan Cheng
committed
// loop to ensure it is dominated by the increment. In case it's the
// only use of the iv, the increment instruction is already before the
// use.
if (L->contains(User.Inst) && User.Inst != IVIncInsertPt)
Evan Cheng
committed
User.Inst->moveBefore(IVIncInsertPt);
}
const SCEV *RewriteExpr = SE->getUnknown(RewriteOp);
if (SE->getEffectiveSCEVType(RewriteOp->getType()) !=
SE->getEffectiveSCEVType(ReplacedTy)) {
assert(SE->getTypeSizeInBits(RewriteOp->getType()) >
SE->getTypeSizeInBits(ReplacedTy) &&
"Unexpected widening cast!");
RewriteExpr = SE->getTruncateExpr(RewriteExpr, ReplacedTy);
}
// If we had to insert new instructions for RewriteOp, we have to
// consider that they may not have been able to end up immediately
// next to RewriteOp, because non-PHI instructions may never precede
// PHI instructions in a block. In this case, remember where the last
// instruction was inserted so that if we're replacing a different
// PHI node, we can use the later point to expand the final
// RewriteExpr.
Instruction *NewBasePt = dyn_cast<Instruction>(RewriteOp);
if (RewriteOp == User.Phi) NewBasePt = 0;
// Clear the SCEVExpander's expression map so that we are guaranteed
// to have the code emitted where we expect it.
Rewriter.clear();
// If we are reusing the iv, then it must be multiplied by a constant
// factor to take advantage of the addressing mode scale component.
if (!RewriteFactor->isZero()) {
// If we're reusing an IV with a nonzero base (currently this happens
// only when all reuses are outside the loop) subtract that base here.
// The base has been used to initialize the PHI node but we don't want
// it here.
if (SE->getEffectiveSCEVType(RewriteExpr->getType()) !=
SE->getEffectiveSCEVType(ReuseIV.Base->getType())) {
// It's possible the original IV is a larger type than the new IV,
// in which case we have to truncate the Base. We checked in
// RequiresTypeConversion that this is valid.
assert(SE->getTypeSizeInBits(RewriteExpr->getType()) <
SE->getTypeSizeInBits(ReuseIV.Base->getType()) &&
"Unexpected lengthening conversion!");
typedBase = SE->getTruncateExpr(ReuseIV.Base,
RewriteExpr->getType());
}
RewriteExpr = SE->getMinusSCEV(RewriteExpr, typedBase);
}
// Multiply old variable, with base removed, by new scale factor.
RewriteExpr = SE->getMulExpr(RewriteFactor,
RewriteExpr);
// The common base is emitted in the loop preheader. But since we
// are reusing an IV, it has not been used to initialize the PHI node.
// Add it to the expression used to rewrite the uses.
// When this use is outside the loop, we earlier subtracted the
// common base, and are adding it back here. Use the same expression
// as before, rather than CommonBaseV, so DAGCombiner will zap it.
if (!CommonExprs->isZero()) {
if (L->contains(User.Inst))
RewriteExpr = SE->getAddExpr(RewriteExpr,
SE->getUnknown(CommonBaseV));
else
RewriteExpr = SE->getAddExpr(RewriteExpr, CommonExprs);
}
}
// Now that we know what we need to do, insert code before User for the
// immediate and any loop-variant expressions.
if (BaseV)
// Add BaseV to the PHI value if needed.
RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV));
User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt,
Rewriter, L, this,
DeadInsts, SE);
// Mark old value we replaced as possibly dead, so that it is eliminated
// if we just replaced the last use of that value.
DeadInsts.push_back(User.OperandValToReplace);
Chris Lattner
committed
UsersToProcess.pop_back();
Chris Lattner
committed
// If there are any more users to process with the same base, process them
// now. We sorted by base above, so we just have to check the last elt.
Chris Lattner
committed
} while (!UsersToProcess.empty() && UsersToProcess.back().Base == Base);
// TODO: Next, find out which base index is the most common, pull it out.
}
// IMPORTANT TODO: Figure out how to partition the IV's with this stride, but
// different starting values, into different PHIs.
}
Evan Cheng
committed
void LoopStrengthReduce::StrengthReduceIVUsers(Loop *L) {
// Note: this processes each stride/type pair individually. All users
// passed into StrengthReduceIVUsersOfStride have the same type AND stride.
// Also, note that we iterate over IVUsesByStride indirectly by using
// StrideOrder. This extra layer of indirection makes the ordering of
// strides deterministic - not dependent on map order.
for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; ++Stride) {
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
// FIXME: Generalize to non-affine IV's.
if (!SI->first->isLoopInvariant(L))
continue;
StrengthReduceIVUsersOfStride(SI->first, *SI->second, L);
}
}
/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
/// set the IV user and stride information and return true, otherwise return
/// false.
bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond,
IVStrideUse *&CondUse,
const SCEV* &CondStride) {
for (unsigned Stride = 0, e = IU->StrideOrder.size();
Stride != e && !CondUse; ++Stride) {
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
E = SI->second->Users.end(); UI != E; ++UI)
if (UI->getUser() == Cond) {
// NOTE: we could handle setcc instructions with multiple uses here, but
// InstCombine does it as well for simple uses, it's not clear that it
// occurs enough in real life to handle.
CondUse = UI;
Evan Cheng
committed
CondStride = SI->first;
return true;
}
}
return false;
namespace {
// Constant strides come first which in turns are sorted by their absolute
// values. If absolute values are the same, then positive strides comes first.
// e.g.
// 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X
struct StrideCompare {
const ScalarEvolution *SE;
explicit StrideCompare(const ScalarEvolution *se) : SE(se) {}
bool operator()(const SCEV *LHS, const SCEV *RHS) {
const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);
const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
if (LHSC && RHSC) {
int64_t LV = LHSC->getValue()->getSExtValue();
int64_t RV = RHSC->getValue()->getSExtValue();
uint64_t ALV = (LV < 0) ? -LV : LV;
uint64_t ARV = (RV < 0) ? -RV : RV;
if (ALV == ARV) {
if (LV != RV)
return LV > RV;
} else {
return ALV < ARV;
}
// If it's the same value but different type, sort by bit width so
// that we emit larger induction variables before smaller
// ones, letting the smaller be re-written in terms of larger ones.
return SE->getTypeSizeInBits(RHS->getType()) <
SE->getTypeSizeInBits(LHS->getType());
}
return LHSC && !RHSC;
}
};
}
/// ChangeCompareStride - If a loop termination compare instruction is the
/// only use of its stride, and the compaison is against a constant value,
/// try eliminate the stride by moving the compare instruction to another
/// stride and change its constant operand accordingly. e.g.
///
/// loop:
/// ...
/// v1 = v1 + 3
/// v2 = v2 + 1
/// if (v2 < 10) goto loop
/// =>
/// loop:
/// ...
/// v1 = v1 + 3
/// if (v1 < 30) goto loop
ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
Evan Cheng
committed
IVStrideUse* &CondUse,
const SCEV* &CondStride,
bool PostPass) {
// If there's only one stride in the loop, there's nothing to do here.
if (IU->StrideOrder.size() < 2)
return Cond;
// If there are other users of the condition's stride, don't bother
// trying to change the condition because the stride will still
// remain.
std::map<const SCEV *, IVUsersOfOneStride *>::iterator I =
Evan Cheng
committed
IU->IVUsesByStride.find(CondStride);
if (I == IU->IVUsesByStride.end())
return Cond;
Evan Cheng
committed
if (I->second->Users.size() > 1) {
for (ilist<IVStrideUse>::iterator II = I->second->Users.begin(),
EE = I->second->Users.end(); II != EE; ++II) {
if (II->getUser() == Cond)
continue;
if (!isInstructionTriviallyDead(II->getUser()))
return Cond;
}
}
// Only handle constant strides for now.
Evan Cheng
committed
const SCEVConstant *SC = dyn_cast<SCEVConstant>(CondStride);
if (!SC) return Cond;
ICmpInst::Predicate Predicate = Cond->getPredicate();
int64_t CmpSSInt = SC->getValue()->getSExtValue();
Evan Cheng
committed
unsigned BitWidth = SE->getTypeSizeInBits(CondStride->getType());
const Type *CmpTy = Cond->getOperand(0)->getType();
unsigned TyBits = SE->getTypeSizeInBits(CmpTy);
Evan Cheng
committed
const SCEV *NewStride = NULL;
Value *NewCmpLHS = NULL;
Value *NewCmpRHS = NULL;
int64_t Scale = 1;
const SCEV *NewOffset = SE->getIntegerSCEV(0, CmpTy);
if (ConstantInt *C = dyn_cast<ConstantInt>(Cond->getOperand(1))) {
int64_t CmpVal = C->getValue().getSExtValue();
Evan Cheng
committed
// Check the relevant induction variable for conformance to
// the pattern.
const SCEV *IV = SE->getSCEV(Cond->getOperand(0));
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
if (!AR || !AR->isAffine())
return Cond;
const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
// Check stride constant and the comparision constant signs to detect
// overflow.
Evan Cheng
committed
if (StartC) {
if ((StartC->getValue()->getSExtValue() < CmpVal && CmpSSInt < 0) ||
(StartC->getValue()->getSExtValue() > CmpVal && CmpSSInt > 0))
return Cond;
} else {
// More restrictive check for the other cases.
if ((CmpVal & SignBit) != (CmpSSInt & SignBit))
return Cond;
}
// Look for a suitable stride / iv as replacement.
for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
IU->IVUsesByStride.find(IU->StrideOrder[i]);
Evan Cheng
committed
if (!isa<SCEVConstant>(SI->first) || SI->second->Users.empty())
continue;
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SSInt == CmpSSInt ||
abs64(SSInt) < abs64(CmpSSInt) ||
(SSInt % CmpSSInt) != 0)
Scale = SSInt / CmpSSInt;
int64_t NewCmpVal = CmpVal * Scale;
Evan Cheng
committed
// If old icmp value fits in icmp immediate field, but the new one doesn't
// try something else.
if (TLI &&
TLI->isLegalICmpImmediate(CmpVal) &&
!TLI->isLegalICmpImmediate(NewCmpVal))
continue;
APInt Mul = APInt(BitWidth*2, CmpVal, true);
Mul = Mul * APInt(BitWidth*2, Scale, true);
// Check for overflow.
// Check for overflow in the stride's type too.
if (!Mul.isSignedIntN(SE->getTypeSizeInBits(SI->first->getType())))
continue;
// Watch out for overflow.
if (ICmpInst::isSigned(Predicate) &&
(CmpVal & SignBit) != (NewCmpVal & SignBit))
continue;
// Pick the best iv to use trying to avoid a cast.
NewCmpLHS = NULL;
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
E = SI->second->Users.end(); UI != E; ++UI) {
Value *Op = UI->getOperandValToReplace();
// If the IVStrideUse implies a cast, check for an actual cast which
// can be used to find the original IV expression.
if (SE->getEffectiveSCEVType(Op->getType()) !=
SE->getEffectiveSCEVType(SI->first->getType())) {
CastInst *CI = dyn_cast<CastInst>(Op);
// If it's not a simple cast, it's complicated.
if (!CI)
continue;
// If it's a cast from a type other than the stride type,
// it's complicated.
if (CI->getOperand(0)->getType() != SI->first->getType())
continue;
// Ok, we found the IV expression in the stride's type.
Op = CI->getOperand(0);
}
NewCmpLHS = Op;
if (NewCmpLHS->getType() == CmpTy)
break;
}
if (!NewCmpLHS)
continue;
NewCmpTy = NewCmpLHS->getType();
NewTyBits = SE->getTypeSizeInBits(NewCmpTy);
const Type *NewCmpIntTy = IntegerType::get(Cond->getContext(), NewTyBits);
if (RequiresTypeConversion(NewCmpTy, CmpTy)) {
// Check if it is possible to rewrite it using
// an iv / stride of a smaller integer type.
unsigned Bits = NewTyBits;
if (ICmpInst::isSigned(Predicate))
--Bits;
uint64_t Mask = (1ULL << Bits) - 1;
if (((uint64_t)NewCmpVal & Mask) != (uint64_t)NewCmpVal)
continue;
}
// Don't rewrite if use offset is non-constant and the new type is
// of a different type.
// FIXME: too conservative?
if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->getOffset()))
Evan Cheng
committed
if (!PostPass) {
bool AllUsesAreAddresses = true;
bool AllUsesAreOutsideLoop = true;
std::vector<BasedUser> UsersToProcess;
const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
AllUsesAreAddresses,
AllUsesAreOutsideLoop,
UsersToProcess);
// Avoid rewriting the compare instruction with an iv of new stride
// if it's likely the new stride uses will be rewritten using the
// stride of the compare instruction.
if (AllUsesAreAddresses &&
ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
continue;
}
// Avoid rewriting the compare instruction with an iv which has
// implicit extension or truncation built into it.
// TODO: This is over-conservative.
if (SE->getTypeSizeInBits(CondUse->getOffset()->getType()) != TyBits)
continue;
// If scale is negative, use swapped predicate unless it's testing
// for equality.
if (Scale < 0 && !Cond->isEquality())
Predicate = ICmpInst::getSwappedPredicate(Predicate);
Evan Cheng
committed
NewStride = IU->StrideOrder[i];
if (!isa<PointerType>(NewCmpTy))
Owen Anderson
committed
NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal);
Owen Anderson
committed
Constant *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal);
NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy);