Newer
Older
}
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
static bool isEqual(const SmallVector<const SCEV *, 2> &LHS,
const SmallVector<const SCEV *, 2> &RHS) {
return LHS == RHS;
}
};
/// LSRUse - This class holds the state that LSR keeps for each use in
/// IVUsers, as well as uses invented by LSR itself. It includes information
/// about what kinds of things can be folded into the user, information about
/// the user itself, and information about how the use may be satisfied.
/// TODO: Represent multiple users of the same expression in common?
class LSRUse {
DenseSet<SmallVector<const SCEV *, 2>, UniquifierDenseMapInfo> Uniquifier;
public:
/// KindType - An enum for a kind of use, indicating what types of
/// scaled and immediate operands it might support.
enum KindType {
Basic, ///< A normal use, with no folding.
Special, ///< A special case of basic, allowing -1 scales.
Address, ///< An address use; folding according to TargetLowering
ICmpZero ///< An equality icmp with both operands folded into one.
// TODO: Add a generic icmp too?
};
KindType Kind;
Type *AccessTy;
SmallVector<int64_t, 8> Offsets;
int64_t MinOffset;
int64_t MaxOffset;
/// AllFixupsOutsideLoop - This records whether all of the fixups using this
/// LSRUse are outside of the loop, in which case some special-case heuristics
/// may be used.
bool AllFixupsOutsideLoop;
/// WidestFixupType - This records the widest use type for any fixup using
/// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
/// max fixup widths to be equivalent, because the narrower one may be relying
/// on the implicit truncation to truncate away bogus bits.
Type *WidestFixupType;
/// Formulae - A list of ways to build a value that can satisfy this user.
/// After the list is populated, one of these is selected heuristically and
/// used to formulate a replacement for OperandValToReplace in UserInst.
SmallVector<Formula, 12> Formulae;
/// Regs - The set of register candidates used by all formulae in this LSRUse.
SmallPtrSet<const SCEV *, 4> Regs;
LSRUse(KindType K, Type *T) : Kind(K), AccessTy(T),
MinOffset(INT64_MAX),
MaxOffset(INT64_MIN),
AllFixupsOutsideLoop(true),
WidestFixupType(0) {}
bool HasFormulaWithSameRegs(const Formula &F) const;
bool InsertFormula(const Formula &F);
void DeleteFormula(Formula &F);
void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
void print(raw_ostream &OS) const;
void dump() const;
};
/// HasFormula - Test whether this use as a formula which has the same
/// registers as the given formula.
bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
SmallVector<const SCEV *, 2> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for uniquifying.
std::sort(Key.begin(), Key.end());
return Uniquifier.count(Key);
}
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
bool LSRUse::InsertFormula(const Formula &F) {
SmallVector<const SCEV *, 2> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for uniquifying.
std::sort(Key.begin(), Key.end());
if (!Uniquifier.insert(Key).second)
return false;
// Using a register to hold the value of 0 is not profitable.
assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
"Zero allocated in a scaled register!");
#ifndef NDEBUG
for (SmallVectorImpl<const SCEV *>::const_iterator I =
F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I)
assert(!(*I)->isZero() && "Zero allocated in a base register!");
#endif
// Add the formula to the list.
Formulae.push_back(F);
// Record registers now being used by this use.
if (F.ScaledReg) Regs.insert(F.ScaledReg);
Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
return true;
}
/// DeleteFormula - Remove the given formula from this use's list.
void LSRUse::DeleteFormula(Formula &F) {
if (&F != &Formulae.back())
std::swap(F, Formulae.back());
Formulae.pop_back();
assert(!Formulae.empty() && "LSRUse has no formulae left!");
/// RecomputeRegs - Recompute the Regs field, and update RegUses.
void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
// Now that we've filtered out some formulae, recompute the Regs set.
SmallPtrSet<const SCEV *, 4> OldRegs = Regs;
Regs.clear();
for (SmallVectorImpl<Formula>::const_iterator I = Formulae.begin(),
E = Formulae.end(); I != E; ++I) {
const Formula &F = *I;
if (F.ScaledReg) Regs.insert(F.ScaledReg);
Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
}
// Update the RegTracker.
for (SmallPtrSet<const SCEV *, 4>::iterator I = OldRegs.begin(),
E = OldRegs.end(); I != E; ++I)
if (!Regs.count(*I))
RegUses.DropRegister(*I, LUIdx);
}
void LSRUse::print(raw_ostream &OS) const {
OS << "LSR Use: Kind=";
switch (Kind) {
case Basic: OS << "Basic"; break;
case Special: OS << "Special"; break;
case ICmpZero: OS << "ICmpZero"; break;
case Address:
OS << "Address of ";
Duncan Sands
committed
if (AccessTy->isPointerTy())
OS << "pointer"; // the full pointer type could be really verbose
OS << *AccessTy;
OS << ", Offsets={";
for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
E = Offsets.end(); I != E; ++I) {
OS << *I;
if (llvm::next(I) != E)
OS << ',';
}
OS << '}';
if (AllFixupsOutsideLoop)
OS << ", all-fixups-outside-loop";
if (WidestFixupType)
OS << ", widest fixup type: " << *WidestFixupType;
void LSRUse::dump() const {
print(errs()); errs() << '\n';
}
/// isLegalUse - Test whether the use described by AM is "legal", meaning it can
/// be completely folded into the user instruction at isel time. This includes
/// address-mode folding and special icmp tricks.
static bool isLegalUse(const TargetLowering::AddrMode &AM,
LSRUse::KindType Kind, Type *AccessTy,
const TargetLowering *TLI) {
switch (Kind) {
case LSRUse::Address:
// If we have low-level target information, ask the target if it can
// completely fold this address.
if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy);
// Otherwise, just guess that reg+reg addressing is legal.
return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1;
case LSRUse::ICmpZero:
// There's not even a target hook for querying whether it would be legal to
// fold a GV into an ICmp.
if (AM.BaseGV)
return false;
// ICmp only has two operands; don't allow more than two non-trivial parts.
if (AM.Scale != 0 && AM.HasBaseReg && AM.BaseOffs != 0)
return false;
// ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
// putting the scaled register in the other operand of the icmp.
if (AM.Scale != 0 && AM.Scale != -1)
// If we have low-level target information, ask the target if it can fold an
// integer immediate on an icmp.
if (AM.BaseOffs != 0) {
if (TLI) return TLI->isLegalICmpImmediate(-(uint64_t)AM.BaseOffs);
return false;
}
case LSRUse::Basic:
// Only handle single-register values.
return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
case LSRUse::Special:
// Only handle -1 scales, or no scale.
return AM.Scale == 0 || AM.Scale == -1;
}
return false;
}
static bool isLegalUse(TargetLowering::AddrMode AM,
int64_t MinOffset, int64_t MaxOffset,
LSRUse::KindType Kind, Type *AccessTy,
const TargetLowering *TLI) {
// Check for overflow.
if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=
(MinOffset > 0))
return false;
AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset;
if (isLegalUse(AM, Kind, AccessTy, TLI)) {
AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset;
// Check for overflow.
if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) !=
(MaxOffset > 0))
AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset;
return isLegalUse(AM, Kind, AccessTy, TLI);
return false;
}
static bool isAlwaysFoldable(int64_t BaseOffs,
GlobalValue *BaseGV,
bool HasBaseReg,
LSRUse::KindType Kind, Type *AccessTy,
// Fast-path: zero is always foldable.
if (BaseOffs == 0 && !BaseGV) return true;
// Conservatively, create an address with an immediate and a
// base and a scale.
TargetLowering::AddrMode AM;
AM.BaseOffs = BaseOffs;
AM.BaseGV = BaseGV;
AM.HasBaseReg = HasBaseReg;
AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
// Canonicalize a scale of 1 to a base register if the formula doesn't
// already have a base register.
if (!AM.HasBaseReg && AM.Scale == 1) {
AM.Scale = 0;
AM.HasBaseReg = true;
}
return isLegalUse(AM, Kind, AccessTy, TLI);
}
static bool isAlwaysFoldable(const SCEV *S,
int64_t MinOffset, int64_t MaxOffset,
bool HasBaseReg,
LSRUse::KindType Kind, Type *AccessTy,
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
const TargetLowering *TLI,
ScalarEvolution &SE) {
// Fast-path: zero is always foldable.
if (S->isZero()) return true;
// Conservatively, create an address with an immediate and a
// base and a scale.
int64_t BaseOffs = ExtractImmediate(S, SE);
GlobalValue *BaseGV = ExtractSymbol(S, SE);
// If there's anything else involved, it's not foldable.
if (!S->isZero()) return false;
// Fast-path: zero is always foldable.
if (BaseOffs == 0 && !BaseGV) return true;
// Conservatively, create an address with an immediate and a
// base and a scale.
TargetLowering::AddrMode AM;
AM.BaseOffs = BaseOffs;
AM.BaseGV = BaseGV;
AM.HasBaseReg = HasBaseReg;
AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
}
namespace {
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
/// UseMapDenseMapInfo - A DenseMapInfo implementation for holding
/// DenseMaps and DenseSets of pairs of const SCEV* and LSRUse::Kind.
struct UseMapDenseMapInfo {
static std::pair<const SCEV *, LSRUse::KindType> getEmptyKey() {
return std::make_pair(reinterpret_cast<const SCEV *>(-1), LSRUse::Basic);
}
static std::pair<const SCEV *, LSRUse::KindType> getTombstoneKey() {
return std::make_pair(reinterpret_cast<const SCEV *>(-2), LSRUse::Basic);
}
static unsigned
getHashValue(const std::pair<const SCEV *, LSRUse::KindType> &V) {
unsigned Result = DenseMapInfo<const SCEV *>::getHashValue(V.first);
Result ^= DenseMapInfo<unsigned>::getHashValue(unsigned(V.second));
return Result;
}
static bool isEqual(const std::pair<const SCEV *, LSRUse::KindType> &LHS,
const std::pair<const SCEV *, LSRUse::KindType> &RHS) {
return LHS == RHS;
}
};
/// LSRInstance - This class holds state for the main loop strength reduction
/// logic.
class LSRInstance {
IVUsers &IU;
ScalarEvolution &SE;
DominatorTree &DT;
LoopInfo &LI;
const TargetLowering *const TLI;
Loop *const L;
bool Changed;
/// IVIncInsertPos - This is the insert position that the current loop's
/// induction variable increment should be placed. In simple loops, this is
/// the latch block's terminator. But in more complicated cases, this is a
/// position which will dominate all the in-loop post-increment users.
Instruction *IVIncInsertPos;
/// Factors - Interesting factors between use strides.
SmallSetVector<int64_t, 8> Factors;
/// Types - Interesting use types, to facilitate truncation reuse.
SmallSetVector<Type *, 4> Types;
/// Fixups - The list of operands which are to be replaced.
SmallVector<LSRFixup, 16> Fixups;
/// Uses - The list of interesting uses.
SmallVector<LSRUse, 16> Uses;
/// RegUses - Track which uses use which register candidates.
RegUseTracker RegUses;
void OptimizeShadowIV();
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
void OptimizeLoopTermCond();
void CollectInterestingTypesAndFactors();
void CollectFixupsAndInitialFormulae();
LSRFixup &getNewFixup() {
Fixups.push_back(LSRFixup());
return Fixups.back();
}
// Support for sharing of LSRUses between LSRFixups.
typedef DenseMap<std::pair<const SCEV *, LSRUse::KindType>,
size_t,
UseMapDenseMapInfo> UseMapTy;
UseMapTy UseMap;
bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
LSRUse::KindType Kind, Type *AccessTy);
std::pair<size_t, int64_t> getUse(const SCEV *&Expr,
LSRUse::KindType Kind,
Type *AccessTy);
void DeleteUse(LSRUse &LU, size_t LUIdx);
LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void CountRegisters(const Formula &F, size_t LUIdx);
bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
void CollectLoopInvariantFixupsAndFormulae();
void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
unsigned Depth = 0);
void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateCrossUseConstantOffsets();
void GenerateAllReuseFormulae();
void FilterOutUndesirableDedicatedRegisters();
size_t EstimateSearchSpaceComplexity() const;
void NarrowSearchSpaceByDetectingSupersets();
void NarrowSearchSpaceByCollapsingUnrolledCode();
void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
void NarrowSearchSpaceByPickingWinnerRegs();
void NarrowSearchSpaceUsingHeuristics();
void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
Cost &SolutionCost,
SmallVectorImpl<const Formula *> &Workspace,
const Cost &CurCost,
const SmallPtrSet<const SCEV *, 16> &CurRegs,
DenseSet<const SCEV *> &VisitedRegs) const;
void Solve(SmallVectorImpl<const Formula *> &Solution) const;
BasicBlock::iterator
HoistInsertPosition(BasicBlock::iterator IP,
const SmallVectorImpl<Instruction *> &Inputs) const;
BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP,
const LSRFixup &LF,
const LSRUse &LU) const;
Value *Expand(const LSRFixup &LF,
const Formula &F,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts) const;
void RewriteForPHI(PHINode *PN, const LSRFixup &LF,
const Formula &F,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts,
Pass *P) const;
void Rewrite(const LSRFixup &LF,
const Formula &F,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts,
Pass *P) const;
void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Pass *P);
LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
bool getChanged() const { return Changed; }
void print_factors_and_types(raw_ostream &OS) const;
void print_fixups(raw_ostream &OS) const;
void print_uses(raw_ostream &OS) const;
void print(raw_ostream &OS) const;
void dump() const;
};
}
/// OptimizeShadowIV - If IV is used in a int-to-float cast
/// inside the loop then try to eliminate the cast operation.
void LSRInstance::OptimizeShadowIV() {
const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
return;
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
UI != E; /* empty */) {
IVUsers::const_iterator CandidateUI = UI;
++UI;
Instruction *ShadowUse = CandidateUI->getUser();
Type *DestTy = NULL;
bool IsSigned = false;
/* If shadow use is a int->float cast then insert a second IV
to eliminate this cast.
for (unsigned i = 0; i < n; ++i)
foo((double)i);
is transformed into
double d = 0.0;
for (unsigned i = 0; i < n; ++i, ++d)
foo(d);
*/
if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
IsSigned = false;
DestTy = UCast->getDestTy();
}
else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
IsSigned = true;
DestTy = SCast->getDestTy();
if (!DestTy) continue;
Evan Cheng
committed
if (TLI) {
// If target does not support DestTy natively then do not apply
// this transformation.
EVT DVT = TLI->getValueType(DestTy);
if (!TLI->isTypeLegal(DVT)) continue;
PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
if (!PH) continue;
if (PH->getNumIncomingValues() != 2) continue;
Type *SrcTy = PH->getType();
int Mantissa = DestTy->getFPMantissaWidth();
if (Mantissa == -1) continue;
if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
continue;
unsigned Entry, Latch;
if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
Entry = 0;
Latch = 1;
} else {
Entry = 1;
Latch = 0;
}
ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
if (!Init) continue;
Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
(double)Init->getSExtValue() :
(double)Init->getZExtValue());
BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
if (!Incr) continue;
if (Incr->getOpcode() != Instruction::Add
&& Incr->getOpcode() != Instruction::Sub)
continue;
/* Initialize new IV, double d = 0.0 in above example. */
ConstantInt *C = NULL;
if (Incr->getOperand(0) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(1));
else if (Incr->getOperand(1) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(0));
else
continue;
if (!C) continue;
// Ignore negative constants, as the code below doesn't handle them
// correctly. TODO: Remove this restriction.
if (!C->getValue().isStrictlyPositive()) continue;
/* Add new PHINode. */
PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH);
/* create new increment. '++d' in above example. */
Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
BinaryOperator *NewIncr =
BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
Instruction::FAdd : Instruction::FSub,
NewPH, CFP, "IV.S.next.", Incr);
NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
/* Remove cast operation */
ShadowUse->replaceAllUsesWith(NewPH);
ShadowUse->eraseFromParent();
Changed = true;
}
/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
/// set the IV user and stride information and return true, otherwise return
/// false.
bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
for (IVUsers::iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
if (UI->getUser() == Cond) {
// NOTE: we could handle setcc instructions with multiple uses here, but
// InstCombine does it as well for simple uses, it's not clear that it
// occurs enough in real life to handle.
CondUse = UI;
return true;
}
return false;
}
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
/// OptimizeMax - Rewrite the loop's terminating condition if it uses
/// a max computation.
///
/// This is a narrow solution to a specific, but acute, problem. For loops
/// like this:
///
/// i = 0;
/// do {
/// p[i] = 0.0;
/// } while (++i < n);
///
/// the trip count isn't just 'n', because 'n' might not be positive. And
/// unfortunately this can come up even for loops where the user didn't use
/// a C do-while loop. For example, seemingly well-behaved top-test loops
/// will commonly be lowered like this:
//
/// if (n > 0) {
/// i = 0;
/// do {
/// p[i] = 0.0;
/// } while (++i < n);
/// }
///
/// and then it's possible for subsequent optimization to obscure the if
/// test in such a way that indvars can't find it.
///
/// When indvars can't find the if test in loops like this, it creates a
/// max expression, which allows it to give the loop a canonical
/// induction variable:
///
/// i = 0;
/// max = n < 1 ? 1 : n;
/// do {
/// p[i] = 0.0;
/// } while (++i != max);
///
/// Canonical induction variables are necessary because the loop passes
/// are designed around them. The most obvious example of this is the
/// LoopInfo analysis, which doesn't remember trip count values. It
/// expects to be able to rediscover the trip count each time it is
/// needed, and it does this using a simple analysis that only succeeds if
/// the loop has a canonical induction variable.
///
/// However, when it comes time to generate code, the maximum operation
/// can be quite costly, especially if it's inside of an outer loop.
///
/// This function solves this problem by detecting this type of loop and
/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
/// the instructions for the maximum computation.
///
ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
// Check that the loop matches the pattern we're looking for.
if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
Cond->getPredicate() != CmpInst::ICMP_NE)
return Cond;
SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
if (!Sel || !Sel->hasOneUse()) return Cond;
const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
return Cond;
const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
// Add one to the backedge-taken count to get the trip count.
const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
if (IterationCount != SE.getSCEV(Sel)) return Cond;
// Check for a max calculation that matches the pattern. There's no check
// for ICMP_ULE here because the comparison would be with zero, which
// isn't interesting.
CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
const SCEVNAryExpr *Max = 0;
if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
Pred = ICmpInst::ICMP_SLE;
Max = S;
} else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
Pred = ICmpInst::ICMP_SLT;
Max = S;
} else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
Pred = ICmpInst::ICMP_ULT;
Max = U;
} else {
// No match; bail.
// To handle a max with more than two operands, this optimization would
// require additional checking and setup.
if (Max->getNumOperands() != 2)
return Cond;
const SCEV *MaxLHS = Max->getOperand(0);
const SCEV *MaxRHS = Max->getOperand(1);
// ScalarEvolution canonicalizes constants to the left. For < and >, look
// for a comparison with 1. For <= and >=, a comparison with zero.
if (!MaxLHS ||
(ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
return Cond;
// Check the relevant induction variable for conformance to
// the pattern.
const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
if (!AR || !AR->isAffine() ||
AR->getStart() != One ||
AR->getStepRecurrence(SE) != One)
assert(AR->getLoop() == L &&
"Loop condition operand is an addrec in a different loop!");
// Check the right operand of the select, and remember it, as it will
// be used in the new comparison instruction.
Value *NewRHS = 0;
if (ICmpInst::isTrueWhenEqual(Pred)) {
// Look for n+1, and grab n.
if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
if (isa<ConstantInt>(BO->getOperand(1)) &&
cast<ConstantInt>(BO->getOperand(1))->isOne() &&
SE.getSCEV(BO->getOperand(0)) == MaxRHS)
NewRHS = BO->getOperand(0);
if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
if (isa<ConstantInt>(BO->getOperand(1)) &&
cast<ConstantInt>(BO->getOperand(1))->isOne() &&
SE.getSCEV(BO->getOperand(0)) == MaxRHS)
NewRHS = BO->getOperand(0);
if (!NewRHS)
return Cond;
} else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
NewRHS = Sel->getOperand(1);
else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
NewRHS = Sel->getOperand(2);
else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
NewRHS = SU->getValue();
else
// Max doesn't match expected pattern.
return Cond;
// Determine the new comparison opcode. It may be signed or unsigned,
// and the original comparison may be either equality or inequality.
if (Cond->getPredicate() == CmpInst::ICMP_EQ)
Pred = CmpInst::getInversePredicate(Pred);
// Ok, everything looks ok to change the condition into an SLT or SGE and
// delete the max calculation.
ICmpInst *NewCond =
new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
// Delete the max calculation instructions.
Cond->replaceAllUsesWith(NewCond);
CondUse->setUser(NewCond);
Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
Cond->eraseFromParent();
Sel->eraseFromParent();
if (Cmp->use_empty())
Cmp->eraseFromParent();
return NewCond;
}
/// OptimizeLoopTermCond - Change loop terminating condition to use the
/// postinc iv when possible.
void
LSRInstance::OptimizeLoopTermCond() {
SmallPtrSet<Instruction *, 4> PostIncs;
Evan Cheng
committed
BasicBlock *LatchBlock = L->getLoopLatch();
Evan Cheng
committed
SmallVector<BasicBlock*, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
Evan Cheng
committed
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
BasicBlock *ExitingBlock = ExitingBlocks[i];
// Get the terminating condition for the loop if possible. If we
Evan Cheng
committed
// can, we want to change it to use a post-incremented version of its
// induction variable, to allow coalescing the live ranges for the IV into
// one register value.
Evan Cheng
committed
BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
if (!TermBr)
continue;
// FIXME: Overly conservative, termination condition could be an 'or' etc..
if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
continue;
// Search IVUsesByStride to find Cond's IVUse if there is one.
IVStrideUse *CondUse = 0;
ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
if (!FindIVUserForCond(Cond, CondUse))
Evan Cheng
committed
continue;
// If the trip count is computed in terms of a max (due to ScalarEvolution
// being unable to find a sufficient guard, for example), change the loop
// comparison to use SLT or ULT instead of NE.
// One consequence of doing this now is that it disrupts the count-down
// optimization. That's not always a bad thing though, because in such
// cases it may still be worthwhile to avoid a max.
Cond = OptimizeMax(Cond, CondUse);
// If this exiting block dominates the latch block, it may also use
// the post-inc value if it won't be shared with other uses.
// Check for dominance.
if (!DT.dominates(ExitingBlock, LatchBlock))
// Conservatively avoid trying to use the post-inc value in non-latch
// exits if there may be pre-inc users in intervening blocks.
if (LatchBlock != ExitingBlock)
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
// Test if the use is reachable from the exiting block. This dominator
// query is a conservative approximation of reachability.
if (&*UI != CondUse &&
!DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
// Conservatively assume there may be reuse if the quotient of their
// strides could be a legal scale.
const SCEV *A = IU.getStride(*CondUse, L);
const SCEV *B = IU.getStride(*UI, L);
if (!A || !B) continue;
if (SE.getTypeSizeInBits(A->getType()) !=
SE.getTypeSizeInBits(B->getType())) {
if (SE.getTypeSizeInBits(A->getType()) >
SE.getTypeSizeInBits(B->getType()))
B = SE.getSignExtendExpr(B, A->getType());
else
A = SE.getSignExtendExpr(A, B->getType());
}
if (const SCEVConstant *D =
dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
// Stride of one or negative one can have reuse with non-addresses.
goto decline_post_inc;
// Avoid weird situations.
if (C->getValue().getMinSignedBits() >= 64 ||
C->getValue().isMinSignedValue())
goto decline_post_inc;
// Without TLI, assume that any stride might be valid, and so any
// use might be shared.
if (!TLI)
goto decline_post_inc;
// Check for possible scaled-address reuse.
Type *AccessTy = getAccessType(UI->getUser());
TargetLowering::AddrMode AM;
if (TLI->isLegalAddressingMode(AM, AccessTy))
goto decline_post_inc;
AM.Scale = -AM.Scale;
if (TLI->isLegalAddressingMode(AM, AccessTy))
goto decline_post_inc;
}
}
<< *Cond << '\n');
Evan Cheng
committed
// It's possible for the setcc instruction to be anywhere in the loop, and
// possible for it to have multiple users. If it is not immediately before
// the exiting block branch, move it.
if (&*++BasicBlock::iterator(Cond) != TermBr) {
if (Cond->hasOneUse()) {
Evan Cheng
committed
Cond->moveBefore(TermBr);
} else {
// Clone the terminating condition and insert into the loopend.
ICmpInst *OldCond = Cond;
Evan Cheng
committed
Cond = cast<ICmpInst>(Cond->clone());
Cond->setName(L->getHeader()->getName() + ".termcond");
ExitingBlock->getInstList().insert(TermBr, Cond);
// Clone the IVUse, as the old use still exists!
CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
TermBr->replaceUsesOfWith(OldCond, Cond);
Evan Cheng
committed
}
}
Evan Cheng
committed
// If we get to here, we know that we can transform the setcc instruction to
// use the post-incremented version of the IV, allowing us to coalesce the
// live ranges for the IV correctly.
CondUse->transformToPostInc(L);
Evan Cheng
committed
Changed = true;
Evan Cheng
committed
PostIncs.insert(Cond);
decline_post_inc:;
// Determine an insertion point for the loop induction variable increment. It
// must dominate all the post-inc comparisons we just set up, and it must
// dominate the loop latch edge.
IVIncInsertPos = L->getLoopLatch()->getTerminator();
for (SmallPtrSet<Instruction *, 4>::const_iterator I = PostIncs.begin(),
E = PostIncs.end(); I != E; ++I) {
BasicBlock *BB =
DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
(*I)->getParent());
if (BB == (*I)->getParent())
IVIncInsertPos = *I;
else if (BB != IVIncInsertPos->getParent())
IVIncInsertPos = BB->getTerminator();
}
}
/// reconcileNewOffset - Determine if the given use can accommodate a fixup
/// at the given offset and other details. If so, update the use and
/// return true.
LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
LSRUse::KindType Kind, Type *AccessTy) {
int64_t NewMinOffset = LU.MinOffset;
int64_t NewMaxOffset = LU.MaxOffset;
Type *NewAccessTy = AccessTy;
// Check for a mismatched kind. It's tempting to collapse mismatched kinds to
// something conservative, however this can pessimize in the case that one of
// the uses will have all its uses outside the loop, for example.
if (LU.Kind != Kind)
return false;
// Conservatively assume HasBaseReg is true for now.
if (NewOffset < LU.MinOffset) {
if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg,
NewMinOffset = NewOffset;
} else if (NewOffset > LU.MaxOffset) {
if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg,
NewMaxOffset = NewOffset;
}
// Check for a mismatched access type, and fall back conservatively as needed.
// TODO: Be less conservative when the type is similar and can use the same
// addressing modes.
if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
NewAccessTy = Type::getVoidTy(AccessTy->getContext());
// Update the use.
LU.MinOffset = NewMinOffset;
LU.MaxOffset = NewMaxOffset;
LU.AccessTy = NewAccessTy;
if (NewOffset != LU.Offsets.back())
LU.Offsets.push_back(NewOffset);
return true;
}
/// getUse - Return an LSRUse index and an offset value for a fixup which
/// needs the given expression, with the given kind and optional access type.
/// Either reuse an existing use or create a new one, as needed.
std::pair<size_t, int64_t>
LSRInstance::getUse(const SCEV *&Expr,
LSRUse::KindType Kind, Type *AccessTy) {
const SCEV *Copy = Expr;
int64_t Offset = ExtractImmediate(Expr, SE);
// Basic uses can't accept any offset, for example.
if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) {
Expr = Copy;
Offset = 0;
}
std::pair<UseMapTy::iterator, bool> P =
UseMap.insert(std::make_pair(std::make_pair(Expr, Kind), 0));
if (!P.second) {
// A use already existed with this base.
size_t LUIdx = P.first->second;
LSRUse &LU = Uses[LUIdx];
if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy))
// Reuse this use.
return std::make_pair(LUIdx, Offset);
}
// Create a new use.
size_t LUIdx = Uses.size();
P.first->second = LUIdx;
Uses.push_back(LSRUse(Kind, AccessTy));
LSRUse &LU = Uses[LUIdx];
// We don't need to track redundant offsets, but we don't need to go out
// of our way here to avoid them.
if (LU.Offsets.empty() || Offset != LU.Offsets.back())
LU.Offsets.push_back(Offset);
LU.MinOffset = Offset;
LU.MaxOffset = Offset;
return std::make_pair(LUIdx, Offset);
}
/// DeleteUse - Delete the given use from the Uses list.
void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
if (&LU != &Uses.back())
std::swap(LU, Uses.back());
Uses.pop_back();
// Update RegUses.
RegUses.SwapAndDropUse(LUIdx, Uses.size());
}
/// FindUseWithFormula - Look for a use distinct from OrigLU which is has
/// a formula that has the same registers as the given formula.
LSRUse *
LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
const LSRUse &OrigLU) {
// Search all uses for the formula. This could be more clever.
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
// Check whether this use is close enough to OrigLU, to see whether it's
// worthwhile looking through its formulae.
// Ignore ICmpZero uses because they may contain formulae generated by
// GenerateICmpZeroScales, in which case adding fixup offsets may
// be invalid.
if (&LU != &OrigLU &&
LU.Kind != LSRUse::ICmpZero &&