"git@repo.hca.bsc.es:rferrer/llvm-epi.git" did not exist on "2d4ce5472ed9c8a2ba6f5965fee28fba4807fcfb"
Newer
Older
static bool isLegalUse(TargetLowering::AddrMode AM,
int64_t MinOffset, int64_t MaxOffset,
LSRUse::KindType Kind, const Type *AccessTy,
const TargetLowering *TLI) {
// Check for overflow.
if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=
(MinOffset > 0))
return false;
AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset;
if (isLegalUse(AM, Kind, AccessTy, TLI)) {
AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset;
// Check for overflow.
if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) !=
(MaxOffset > 0))
AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset;
return isLegalUse(AM, Kind, AccessTy, TLI);
return false;
}
static bool isAlwaysFoldable(int64_t BaseOffs,
GlobalValue *BaseGV,
bool HasBaseReg,
LSRUse::KindType Kind, const Type *AccessTy,
const TargetLowering *TLI,
ScalarEvolution &SE) {
// Fast-path: zero is always foldable.
if (BaseOffs == 0 && !BaseGV) return true;
// Conservatively, create an address with an immediate and a
// base and a scale.
TargetLowering::AddrMode AM;
AM.BaseOffs = BaseOffs;
AM.BaseGV = BaseGV;
AM.HasBaseReg = HasBaseReg;
AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
return isLegalUse(AM, Kind, AccessTy, TLI);
}
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
static bool isAlwaysFoldable(const SCEV *S,
int64_t MinOffset, int64_t MaxOffset,
bool HasBaseReg,
LSRUse::KindType Kind, const Type *AccessTy,
const TargetLowering *TLI,
ScalarEvolution &SE) {
// Fast-path: zero is always foldable.
if (S->isZero()) return true;
// Conservatively, create an address with an immediate and a
// base and a scale.
int64_t BaseOffs = ExtractImmediate(S, SE);
GlobalValue *BaseGV = ExtractSymbol(S, SE);
// If there's anything else involved, it's not foldable.
if (!S->isZero()) return false;
// Fast-path: zero is always foldable.
if (BaseOffs == 0 && !BaseGV) return true;
// Conservatively, create an address with an immediate and a
// base and a scale.
TargetLowering::AddrMode AM;
AM.BaseOffs = BaseOffs;
AM.BaseGV = BaseGV;
AM.HasBaseReg = HasBaseReg;
AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
}
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
/// FormulaSorter - This class implements an ordering for formulae which sorts
/// the by their standalone cost.
class FormulaSorter {
/// These two sets are kept empty, so that we compute standalone costs.
DenseSet<const SCEV *> VisitedRegs;
SmallPtrSet<const SCEV *, 16> Regs;
Loop *L;
LSRUse *LU;
ScalarEvolution &SE;
DominatorTree &DT;
public:
FormulaSorter(Loop *l, LSRUse &lu, ScalarEvolution &se, DominatorTree &dt)
: L(l), LU(&lu), SE(se), DT(dt) {}
bool operator()(const Formula &A, const Formula &B) {
Cost CostA;
CostA.RateFormula(A, Regs, VisitedRegs, L, LU->Offsets, SE, DT);
Regs.clear();
Cost CostB;
CostB.RateFormula(B, Regs, VisitedRegs, L, LU->Offsets, SE, DT);
Regs.clear();
return CostA < CostB;
}
};
/// LSRInstance - This class holds state for the main loop strength reduction
/// logic.
class LSRInstance {
IVUsers &IU;
ScalarEvolution &SE;
DominatorTree &DT;
const TargetLowering *const TLI;
Loop *const L;
bool Changed;
/// IVIncInsertPos - This is the insert position that the current loop's
/// induction variable increment should be placed. In simple loops, this is
/// the latch block's terminator. But in more complicated cases, this is a
/// position which will dominate all the in-loop post-increment users.
Instruction *IVIncInsertPos;
/// Factors - Interesting factors between use strides.
SmallSetVector<int64_t, 8> Factors;
/// Types - Interesting use types, to facilitate truncation reuse.
SmallSetVector<const Type *, 4> Types;
/// Fixups - The list of operands which are to be replaced.
SmallVector<LSRFixup, 16> Fixups;
/// Uses - The list of interesting uses.
SmallVector<LSRUse, 16> Uses;
/// RegUses - Track which uses use which register candidates.
RegUseTracker RegUses;
void OptimizeShadowIV();
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
bool OptimizeLoopTermCond();
void CollectInterestingTypesAndFactors();
void CollectFixupsAndInitialFormulae();
LSRFixup &getNewFixup() {
Fixups.push_back(LSRFixup());
return Fixups.back();
}
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
// Support for sharing of LSRUses between LSRFixups.
typedef DenseMap<const SCEV *, size_t> UseMapTy;
UseMapTy UseMap;
bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
LSRUse::KindType Kind, const Type *AccessTy);
std::pair<size_t, int64_t> getUse(const SCEV *&Expr,
LSRUse::KindType Kind,
const Type *AccessTy);
public:
void InsertInitialFormula(const SCEV *S, Loop *L, LSRUse &LU, size_t LUIdx);
void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void CountRegisters(const Formula &F, size_t LUIdx);
bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
void CollectLoopInvariantFixupsAndFormulae();
void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
unsigned Depth = 0);
void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateCrossUseConstantOffsets();
void GenerateAllReuseFormulae();
void FilterOutUndesirableDedicatedRegisters();
void NarrowSearchSpaceUsingHeuristics();
void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
Cost &SolutionCost,
SmallVectorImpl<const Formula *> &Workspace,
const Cost &CurCost,
const SmallPtrSet<const SCEV *, 16> &CurRegs,
DenseSet<const SCEV *> &VisitedRegs) const;
void Solve(SmallVectorImpl<const Formula *> &Solution) const;
Value *Expand(const LSRFixup &LF,
const Formula &F,
BasicBlock::iterator IP, Loop *L, Instruction *IVIncInsertPos,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts,
ScalarEvolution &SE, DominatorTree &DT) const;
void RewriteForPHI(PHINode *PN, const LSRFixup &LF,
const Formula &F,
Loop *L, Instruction *IVIncInsertPos,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts,
ScalarEvolution &SE, DominatorTree &DT,
Pass *P) const;
void Rewrite(const LSRFixup &LF,
const Formula &F,
Loop *L, Instruction *IVIncInsertPos,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts,
ScalarEvolution &SE, DominatorTree &DT,
Pass *P) const;
void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Pass *P);
LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
bool getChanged() const { return Changed; }
void print_factors_and_types(raw_ostream &OS) const;
void print_fixups(raw_ostream &OS) const;
void print_uses(raw_ostream &OS) const;
void print(raw_ostream &OS) const;
void dump() const;
};
}
/// OptimizeShadowIV - If IV is used in a int-to-float cast
/// inside the loop then try to eliminate the cast opeation.
void LSRInstance::OptimizeShadowIV() {
const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
return;
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
UI != E; /* empty */) {
IVUsers::const_iterator CandidateUI = UI;
++UI;
Instruction *ShadowUse = CandidateUI->getUser();
const Type *DestTy = NULL;
/* If shadow use is a int->float cast then insert a second IV
to eliminate this cast.
for (unsigned i = 0; i < n; ++i)
foo((double)i);
is transformed into
double d = 0.0;
for (unsigned i = 0; i < n; ++i, ++d)
foo(d);
*/
if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
DestTy = UCast->getDestTy();
else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser()))
DestTy = SCast->getDestTy();
if (!DestTy) continue;
Evan Cheng
committed
if (TLI) {
// If target does not support DestTy natively then do not apply
// this transformation.
EVT DVT = TLI->getValueType(DestTy);
if (!TLI->isTypeLegal(DVT)) continue;
PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
if (!PH) continue;
if (PH->getNumIncomingValues() != 2) continue;
const Type *SrcTy = PH->getType();
int Mantissa = DestTy->getFPMantissaWidth();
if (Mantissa == -1) continue;
if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
continue;
unsigned Entry, Latch;
if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
Entry = 0;
Latch = 1;
} else {
Entry = 1;
Latch = 0;
}
ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
if (!Init) continue;
Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
if (!Incr) continue;
if (Incr->getOpcode() != Instruction::Add
&& Incr->getOpcode() != Instruction::Sub)
continue;
/* Initialize new IV, double d = 0.0 in above example. */
ConstantInt *C = NULL;
if (Incr->getOperand(0) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(1));
else if (Incr->getOperand(1) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(0));
else
continue;
if (!C) continue;
// Ignore negative constants, as the code below doesn't handle them
// correctly. TODO: Remove this restriction.
if (!C->getValue().isStrictlyPositive()) continue;
/* Add new PHINode. */
PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
/* create new increment. '++d' in above example. */
Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
BinaryOperator *NewIncr =
BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
Instruction::FAdd : Instruction::FSub,
NewPH, CFP, "IV.S.next.", Incr);
NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
/* Remove cast operation */
ShadowUse->replaceAllUsesWith(NewPH);
ShadowUse->eraseFromParent();
break;
}
/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
/// set the IV user and stride information and return true, otherwise return
/// false.
bool LSRInstance::FindIVUserForCond(ICmpInst *Cond,
IVStrideUse *&CondUse) {
for (IVUsers::iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
if (UI->getUser() == Cond) {
// NOTE: we could handle setcc instructions with multiple uses here, but
// InstCombine does it as well for simple uses, it's not clear that it
// occurs enough in real life to handle.
CondUse = UI;
return true;
}
return false;
}
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
/// OptimizeMax - Rewrite the loop's terminating condition if it uses
/// a max computation.
///
/// This is a narrow solution to a specific, but acute, problem. For loops
/// like this:
///
/// i = 0;
/// do {
/// p[i] = 0.0;
/// } while (++i < n);
///
/// the trip count isn't just 'n', because 'n' might not be positive. And
/// unfortunately this can come up even for loops where the user didn't use
/// a C do-while loop. For example, seemingly well-behaved top-test loops
/// will commonly be lowered like this:
//
/// if (n > 0) {
/// i = 0;
/// do {
/// p[i] = 0.0;
/// } while (++i < n);
/// }
///
/// and then it's possible for subsequent optimization to obscure the if
/// test in such a way that indvars can't find it.
///
/// When indvars can't find the if test in loops like this, it creates a
/// max expression, which allows it to give the loop a canonical
/// induction variable:
///
/// i = 0;
/// max = n < 1 ? 1 : n;
/// do {
/// p[i] = 0.0;
/// } while (++i != max);
///
/// Canonical induction variables are necessary because the loop passes
/// are designed around them. The most obvious example of this is the
/// LoopInfo analysis, which doesn't remember trip count values. It
/// expects to be able to rediscover the trip count each time it is
/// needed, and it does this using a simple analysis that only succeeds if
/// the loop has a canonical induction variable.
///
/// However, when it comes time to generate code, the maximum operation
/// can be quite costly, especially if it's inside of an outer loop.
///
/// This function solves this problem by detecting this type of loop and
/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
/// the instructions for the maximum computation.
///
ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
// Check that the loop matches the pattern we're looking for.
if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
Cond->getPredicate() != CmpInst::ICMP_NE)
return Cond;
SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
if (!Sel || !Sel->hasOneUse()) return Cond;
const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
return Cond;
const SCEV *One = SE.getIntegerSCEV(1, BackedgeTakenCount->getType());
// Add one to the backedge-taken count to get the trip count.
const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One);
// Check for a max calculation that matches the pattern.
if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount))
return Cond;
const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount);
if (Max != SE.getSCEV(Sel)) return Cond;
// To handle a max with more than two operands, this optimization would
// require additional checking and setup.
if (Max->getNumOperands() != 2)
return Cond;
const SCEV *MaxLHS = Max->getOperand(0);
const SCEV *MaxRHS = Max->getOperand(1);
if (!MaxLHS || MaxLHS != One) return Cond;
// Check the relevant induction variable for conformance to
// the pattern.
const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
if (!AR || !AR->isAffine() ||
AR->getStart() != One ||
AR->getStepRecurrence(SE) != One)
assert(AR->getLoop() == L &&
"Loop condition operand is an addrec in a different loop!");
// Check the right operand of the select, and remember it, as it will
// be used in the new comparison instruction.
Value *NewRHS = 0;
if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
NewRHS = Sel->getOperand(1);
else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
NewRHS = Sel->getOperand(2);
if (!NewRHS) return Cond;
// Determine the new comparison opcode. It may be signed or unsigned,
// and the original comparison may be either equality or inequality.
CmpInst::Predicate Pred =
isa<SCEVSMaxExpr>(Max) ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
if (Cond->getPredicate() == CmpInst::ICMP_EQ)
Pred = CmpInst::getInversePredicate(Pred);
// Ok, everything looks ok to change the condition into an SLT or SGE and
// delete the max calculation.
ICmpInst *NewCond =
new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
// Delete the max calculation instructions.
Cond->replaceAllUsesWith(NewCond);
CondUse->setUser(NewCond);
Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
Cond->eraseFromParent();
Sel->eraseFromParent();
if (Cmp->use_empty())
Cmp->eraseFromParent();
return NewCond;
}
/// OptimizeLoopTermCond - Change loop terminating condition to use the
/// postinc iv when possible.
bool
LSRInstance::OptimizeLoopTermCond() {
SmallPtrSet<Instruction *, 4> PostIncs;
Evan Cheng
committed
BasicBlock *LatchBlock = L->getLoopLatch();
Evan Cheng
committed
SmallVector<BasicBlock*, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
Evan Cheng
committed
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
BasicBlock *ExitingBlock = ExitingBlocks[i];
// Get the terminating condition for the loop if possible. If we
Evan Cheng
committed
// can, we want to change it to use a post-incremented version of its
// induction variable, to allow coalescing the live ranges for the IV into
// one register value.
Evan Cheng
committed
BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
if (!TermBr)
continue;
// FIXME: Overly conservative, termination condition could be an 'or' etc..
if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
continue;
// Search IVUsesByStride to find Cond's IVUse if there is one.
IVStrideUse *CondUse = 0;
ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
if (!FindIVUserForCond(Cond, CondUse))
Evan Cheng
committed
continue;
// If the trip count is computed in terms of a max (due to ScalarEvolution
// being unable to find a sufficient guard, for example), change the loop
// comparison to use SLT or ULT instead of NE.
// One consequence of doing this now is that it disrupts the count-down
// optimization. That's not always a bad thing though, because in such
// cases it may still be worthwhile to avoid a max.
Cond = OptimizeMax(Cond, CondUse);
// If this exiting block dominates the latch block, it may also use
// the post-inc value if it won't be shared with other uses.
// Check for dominance.
if (!DT.dominates(ExitingBlock, LatchBlock))
// Conservatively avoid trying to use the post-inc value in non-latch
// exits if there may be pre-inc users in intervening blocks.
if (LatchBlock != ExitingBlock)
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
// Test if the use is reachable from the exiting block. This dominator
// query is a conservative approximation of reachability.
if (&*UI != CondUse &&
!DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
// Conservatively assume there may be reuse if the quotient of their
// strides could be a legal scale.
const SCEV *A = CondUse->getStride();
const SCEV *B = UI->getStride();
if (SE.getTypeSizeInBits(A->getType()) !=
SE.getTypeSizeInBits(B->getType())) {
if (SE.getTypeSizeInBits(A->getType()) >
SE.getTypeSizeInBits(B->getType()))
B = SE.getSignExtendExpr(B, A->getType());
else
A = SE.getSignExtendExpr(A, B->getType());
}
if (const SCEVConstant *D =
dyn_cast_or_null<SCEVConstant>(getSDiv(B, A, SE))) {
// Stride of one or negative one can have reuse with non-addresses.
if (D->getValue()->isOne() ||
D->getValue()->isAllOnesValue())
goto decline_post_inc;
// Avoid weird situations.
if (D->getValue()->getValue().getMinSignedBits() >= 64 ||
D->getValue()->getValue().isMinSignedValue())
goto decline_post_inc;
// Without TLI, assume that any stride might be valid, and so any
// use might be shared.
if (!TLI)
goto decline_post_inc;
// Check for possible scaled-address reuse.
const Type *AccessTy = getAccessType(UI->getUser());
TargetLowering::AddrMode AM;
AM.Scale = D->getValue()->getSExtValue();
if (TLI->isLegalAddressingMode(AM, AccessTy))
goto decline_post_inc;
AM.Scale = -AM.Scale;
if (TLI->isLegalAddressingMode(AM, AccessTy))
goto decline_post_inc;
}
}
<< *Cond << '\n');
Evan Cheng
committed
// It's possible for the setcc instruction to be anywhere in the loop, and
// possible for it to have multiple users. If it is not immediately before
// the exiting block branch, move it.
if (&*++BasicBlock::iterator(Cond) != TermBr) {
if (Cond->hasOneUse()) {
Evan Cheng
committed
Cond->moveBefore(TermBr);
} else {
// Clone the terminating condition and insert into the loopend.
ICmpInst *OldCond = Cond;
Evan Cheng
committed
Cond = cast<ICmpInst>(Cond->clone());
Cond->setName(L->getHeader()->getName() + ".termcond");
ExitingBlock->getInstList().insert(TermBr, Cond);
// Clone the IVUse, as the old use still exists!
CondUse = &IU.AddUser(CondUse->getStride(), CondUse->getOffset(),
Cond, CondUse->getOperandValToReplace());
TermBr->replaceUsesOfWith(OldCond, Cond);
Evan Cheng
committed
}
}
Evan Cheng
committed
// If we get to here, we know that we can transform the setcc instruction to
// use the post-incremented version of the IV, allowing us to coalesce the
// live ranges for the IV correctly.
CondUse->setOffset(SE.getMinusSCEV(CondUse->getOffset(),
CondUse->getStride()));
Evan Cheng
committed
CondUse->setIsUseOfPostIncrementedValue(true);
Changed = true;
Evan Cheng
committed
PostIncs.insert(Cond);
decline_post_inc:;
// Determine an insertion point for the loop induction variable increment. It
// must dominate all the post-inc comparisons we just set up, and it must
// dominate the loop latch edge.
IVIncInsertPos = L->getLoopLatch()->getTerminator();
for (SmallPtrSet<Instruction *, 4>::const_iterator I = PostIncs.begin(),
E = PostIncs.end(); I != E; ++I) {
BasicBlock *BB =
DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
(*I)->getParent());
if (BB == (*I)->getParent())
IVIncInsertPos = *I;
else if (BB != IVIncInsertPos->getParent())
IVIncInsertPos = BB->getTerminator();
}
return Changed;
}
bool
LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
LSRUse::KindType Kind, const Type *AccessTy) {
int64_t NewMinOffset = LU.MinOffset;
int64_t NewMaxOffset = LU.MaxOffset;
const Type *NewAccessTy = AccessTy;
// Check for a mismatched kind. It's tempting to collapse mismatched kinds to
// something conservative, however this can pessimize in the case that one of
// the uses will have all its uses outside the loop, for example.
if (LU.Kind != Kind)
return false;
// Conservatively assume HasBaseReg is true for now.
if (NewOffset < LU.MinOffset) {
if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, /*HasBaseReg=*/true,
Kind, AccessTy, TLI, SE))
NewMinOffset = NewOffset;
} else if (NewOffset > LU.MaxOffset) {
if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, /*HasBaseReg=*/true,
Kind, AccessTy, TLI, SE))
NewMaxOffset = NewOffset;
}
// Check for a mismatched access type, and fall back conservatively as needed.
if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
NewAccessTy = Type::getVoidTy(AccessTy->getContext());
// Update the use.
LU.MinOffset = NewMinOffset;
LU.MaxOffset = NewMaxOffset;
LU.AccessTy = NewAccessTy;
if (NewOffset != LU.Offsets.back())
LU.Offsets.push_back(NewOffset);
return true;
}
/// getUse - Return an LSRUse index and an offset value for a fixup which
/// needs the given expression, with the given kind and optional access type.
/// Either reuse an exisitng use or create a new one, as needed.
std::pair<size_t, int64_t>
LSRInstance::getUse(const SCEV *&Expr,
LSRUse::KindType Kind, const Type *AccessTy) {
const SCEV *Copy = Expr;
int64_t Offset = ExtractImmediate(Expr, SE);
// Basic uses can't accept any offset, for example.
if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true,
Kind, AccessTy, TLI, SE)) {
Expr = Copy;
Offset = 0;
}
std::pair<UseMapTy::iterator, bool> P =
UseMap.insert(std::make_pair(Expr, 0));
if (!P.second) {
// A use already existed with this base.
size_t LUIdx = P.first->second;
LSRUse &LU = Uses[LUIdx];
if (reconcileNewOffset(LU, Offset, Kind, AccessTy))
// Reuse this use.
return std::make_pair(LUIdx, Offset);
}
// Create a new use.
size_t LUIdx = Uses.size();
P.first->second = LUIdx;
Uses.push_back(LSRUse(Kind, AccessTy));
LSRUse &LU = Uses[LUIdx];
// We don't need to track redundant offsets, but we don't need to go out
// of our way here to avoid them.
if (LU.Offsets.empty() || Offset != LU.Offsets.back())
LU.Offsets.push_back(Offset);
LU.MinOffset = Offset;
LU.MaxOffset = Offset;
return std::make_pair(LUIdx, Offset);
}
void LSRInstance::CollectInterestingTypesAndFactors() {
SmallSetVector<const SCEV *, 4> Strides;
// Collect interesting types and factors.
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
const SCEV *Stride = UI->getStride();
// Collect interesting types.
Types.insert(SE.getEffectiveSCEVType(Stride->getType()));
// Collect interesting factors.
for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
Strides.begin(), SEnd = Strides.end(); NewStrideIter != SEnd;
++NewStrideIter) {
const SCEV *OldStride = Stride;
const SCEV *NewStride = *NewStrideIter;
if (OldStride == NewStride)
if (SE.getTypeSizeInBits(OldStride->getType()) !=
SE.getTypeSizeInBits(NewStride->getType())) {
if (SE.getTypeSizeInBits(OldStride->getType()) >
SE.getTypeSizeInBits(NewStride->getType()))
NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
else
OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
}
if (const SCEVConstant *Factor =
dyn_cast_or_null<SCEVConstant>(getSDiv(NewStride, OldStride,
SE, true))) {
if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
Factors.insert(Factor->getValue()->getValue().getSExtValue());
} else if (const SCEVConstant *Factor =
dyn_cast_or_null<SCEVConstant>(getSDiv(OldStride, NewStride,
SE, true))) {
if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
Factors.insert(Factor->getValue()->getValue().getSExtValue());
}
}
Strides.insert(Stride);
Evan Cheng
committed
// If all uses use the same type, don't bother looking for truncation-based
// reuse.
if (Types.size() == 1)
Types.clear();
DEBUG(print_factors_and_types(dbgs()));
void LSRInstance::CollectFixupsAndInitialFormulae() {
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
// Record the uses.
LSRFixup &LF = getNewFixup();
LF.UserInst = UI->getUser();
LF.OperandValToReplace = UI->getOperandValToReplace();
if (UI->isUseOfPostIncrementedValue())
LF.PostIncLoop = L;
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
LSRUse::KindType Kind = LSRUse::Basic;
const Type *AccessTy = 0;
if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
Kind = LSRUse::Address;
AccessTy = getAccessType(LF.UserInst);
}
const SCEV *S = IU.getCanonicalExpr(*UI);
// Equality (== and !=) ICmps are special. We can rewrite (i == N) as
// (N - i == 0), and this allows (N - i) to be the expression that we work
// with rather than just N or i, so we can consider the register
// requirements for both N and i at the same time. Limiting this code to
// equality icmps is not a problem because all interesting loops use
// equality icmps, thanks to IndVarSimplify.
if (ICmpInst *CI = dyn_cast<ICmpInst>(LF.UserInst))
if (CI->isEquality()) {
// Swap the operands if needed to put the OperandValToReplace on the
// left, for consistency.
Value *NV = CI->getOperand(1);
if (NV == LF.OperandValToReplace) {
CI->setOperand(1, CI->getOperand(0));
CI->setOperand(0, NV);
}
// x == y --> x - y == 0
const SCEV *N = SE.getSCEV(NV);
if (N->isLoopInvariant(L)) {
Kind = LSRUse::ICmpZero;
S = SE.getMinusSCEV(N, S);
}
// -1 and the negations of all interesting strides (except the negation
// of -1) are now also interesting.
for (size_t i = 0, e = Factors.size(); i != e; ++i)
if (Factors[i] != -1)
Factors.insert(-(uint64_t)Factors[i]);
Factors.insert(-1);
}
// Set up the initial formula for this use.
std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
LF.LUIdx = P.first;
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
LU.AllFixupsOutsideLoop &= !L->contains(LF.UserInst);
// If this is the first use of this LSRUse, give it a formula.
if (LU.Formulae.empty()) {
InsertInitialFormula(S, L, LU, LF.LUIdx);
CountRegisters(LU.Formulae.back(), LF.LUIdx);
}
}
DEBUG(print_fixups(dbgs()));
}
void
LSRInstance::InsertInitialFormula(const SCEV *S, Loop *L,
LSRUse &LU, size_t LUIdx) {
Formula F;
F.InitialMatch(S, L, SE, DT);
bool Inserted = InsertFormula(LU, LUIdx, F);
assert(Inserted && "Initial formula already exists!"); (void)Inserted;
}
void
LSRInstance::InsertSupplementalFormula(const SCEV *S,
LSRUse &LU, size_t LUIdx) {
Formula F;
F.BaseRegs.push_back(S);
F.AM.HasBaseReg = true;
bool Inserted = InsertFormula(LU, LUIdx, F);
assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
}
/// CountRegisters - Note which registers are used by the given formula,
/// updating RegUses.
void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
if (F.ScaledReg)
RegUses.CountRegister(F.ScaledReg, LUIdx);
for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
E = F.BaseRegs.end(); I != E; ++I)
RegUses.CountRegister(*I, LUIdx);
}
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
if (!LU.InsertFormula(LUIdx, F))
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
CountRegisters(F, LUIdx);
return true;
}
/// CollectLoopInvariantFixupsAndFormulae - Check for other uses of
/// loop-invariant values which we're tracking. These other uses will pin these
/// values in registers, making them less profitable for elimination.
/// TODO: This currently misses non-constant addrec step registers.
/// TODO: Should this give more weight to users inside the loop?
void
LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
SmallPtrSet<const SCEV *, 8> Inserted;
while (!Worklist.empty()) {
const SCEV *S = Worklist.pop_back_val();
if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
Worklist.insert(Worklist.end(), N->op_begin(), N->op_end());
else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
Worklist.push_back(C->getOperand());
else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
Worklist.push_back(D->getLHS());
Worklist.push_back(D->getRHS());
} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
if (!Inserted.insert(U)) continue;
const Value *V = U->getValue();
if (const Instruction *Inst = dyn_cast<Instruction>(V))
if (L->contains(Inst)) continue;
for (Value::use_const_iterator UI = V->use_begin(), UE = V->use_end();
UI != UE; ++UI) {
const Instruction *UserInst = dyn_cast<Instruction>(*UI);
// Ignore non-instructions.
if (!UserInst)
continue;
// Ignore instructions in other functions (as can happen with
// Constants).
if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
continue;
// Ignore instructions not dominated by the loop.
const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
UserInst->getParent() :
cast<PHINode>(UserInst)->getIncomingBlock(
PHINode::getIncomingValueNumForOperand(UI.getOperandNo()));
if (!DT.dominates(L->getHeader(), UseBB))
continue;
// Ignore uses which are part of other SCEV expressions, to avoid
// analyzing them multiple times.
if (SE.isSCEVable(UserInst->getType()) &&
!isa<SCEVUnknown>(SE.getSCEV(const_cast<Instruction *>(UserInst))))
continue;
// Ignore icmp instructions which are already being analyzed.
if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
unsigned OtherIdx = !UI.getOperandNo();
Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
if (SE.getSCEV(OtherOp)->hasComputableLoopEvolution(L))
continue;
}
LSRFixup &LF = getNewFixup();
LF.UserInst = const_cast<Instruction *>(UserInst);
LF.OperandValToReplace = UI.getUse();
std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, 0);
LF.LUIdx = P.first;
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
LU.AllFixupsOutsideLoop &= L->contains(LF.UserInst);
InsertSupplementalFormula(U, LU, LF.LUIdx);
CountRegisters(LU.Formulae.back(), Uses.size() - 1);
break;
}
}
}
}
/// CollectSubexprs - Split S into subexpressions which can be pulled out into
/// separate registers. If C is non-null, multiply each subexpression by C.
static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
SmallVectorImpl<const SCEV *> &Ops,
ScalarEvolution &SE) {
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
// Break out add operands.
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I)
CollectSubexprs(*I, C, Ops, SE);
return;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Split a non-zero base out of an addrec.
if (!AR->getStart()->isZero()) {
CollectSubexprs(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()),
AR->getStepRecurrence(SE),
AR->getLoop()), C, Ops, SE);
CollectSubexprs(AR->getStart(), C, Ops, SE);
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
return;
}
} else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
// Break (C * (a + b + c)) into C*a + C*b + C*c.
if (Mul->getNumOperands() == 2)
if (const SCEVConstant *Op0 =
dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
CollectSubexprs(Mul->getOperand(1),
C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
Ops, SE);
return;
}
}
// Otherwise use the value itself.
Ops.push_back(C ? SE.getMulExpr(C, S) : S);
}
/// GenerateReassociations - Split out subexpressions from adds and the bases of
/// addrecs.
void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
Formula Base,
unsigned Depth) {
// Arbitrarily cap recursion to protect compile time.
if (Depth >= 3) return;
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
const SCEV *BaseReg = Base.BaseRegs[i];
SmallVector<const SCEV *, 8> AddOps;
CollectSubexprs(BaseReg, 0, AddOps, SE);
if (AddOps.size() == 1) continue;
for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
JE = AddOps.end(); J != JE; ++J) {
// Don't pull a constant into a register if the constant could be folded
// into an immediate field.
if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
Base.getNumRegs() > 1,
LU.Kind, LU.AccessTy, TLI, SE))
continue;
// Collect all operands except *J.
SmallVector<const SCEV *, 8> InnerAddOps;
for (SmallVectorImpl<const SCEV *>::const_iterator K = AddOps.begin(),
KE = AddOps.end(); K != KE; ++K)
if (K != J)
InnerAddOps.push_back(*K);
// Don't leave just a constant behind in a register if the constant could
// be folded into an immediate field.
if (InnerAddOps.size() == 1 &&
isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset,
Base.getNumRegs() > 1,
LU.Kind, LU.AccessTy, TLI, SE))
continue;
Formula F = Base;
F.BaseRegs[i] = SE.getAddExpr(InnerAddOps);
F.BaseRegs.push_back(*J);
if (InsertFormula(LU, LUIdx, F))
// If that formula hadn't been seen before, recurse to find more like
// it.
GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1);
}
}
}