Newer
Older
continue;
Evan Cheng
committed
// Check that this scale is legal.
if (!isLegalUse(F.AM, LU.Kind, LU.AccessTy, TLI))
continue;
const SCEV *FactorS = SE.getSCEV(ConstantInt::get(IntTy, Factor));
Evan Cheng
committed
// Check that multiplying with each base register doesn't overflow.
for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
if (getSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
goto next;
}
Evan Cheng
committed
// Check that multiplying with the scaled register doesn't overflow.
if (F.ScaledReg) {
F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
if (getSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
continue;
}
Evan Cheng
committed
// If we make it here and it's legal, add it.
(void)InsertFormula(LU, LUIdx, F);
next:;
}
Evan Cheng
committed
}
/// GenerateFormulaeFromReplacedBaseReg - If removing base register with
/// index i from the BaseRegs list and adding the registers in AddOps
/// to the address forms an interesting formula, pursue it.
void
LSRInstance::GenerateFormulaeFromReplacedBaseReg(
LSRUse &LU,
unsigned LUIdx,
const Formula &Base, unsigned i,
const SmallVectorImpl<const SCEV *>
&AddOps) {
if (AddOps.empty()) return;
Formula F = Base;
std::swap(F.BaseRegs[i], F.BaseRegs.back());
F.BaseRegs.pop_back();
for (SmallVectorImpl<const SCEV *>::const_iterator I = AddOps.begin(),
E = AddOps.end(); I != E; ++I)
F.BaseRegs.push_back(*I);
F.AM.HasBaseReg = !F.BaseRegs.empty();
if (InsertFormula(LU, LUIdx, F))
// If that formula hadn't been seen before, recurse to find more like it.
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
GenerateReassociationReuse(LU, LUIdx, LU.Formulae.back());
}
/// GenerateReassociationReuse - Split out subexpressions from adds and
/// the bases of addrecs.
void LSRInstance::GenerateReassociationReuse(LSRUse &LU, unsigned LUIdx,
Formula Base) {
SmallVector<const SCEV *, 8> AddOps;
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
const SCEV *BaseReg = Base.BaseRegs[i];
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BaseReg)) {
for (SCEVAddExpr::op_iterator J = Add->op_begin(), JE = Add->op_end();
J != JE; ++J) {
// Don't pull a constant into a register if the constant could be
// folded into an immediate field.
if (isAlwaysFoldable(*J, true, LU.Kind, LU.AccessTy, TLI, SE)) continue;
SmallVector<const SCEV *, 8> InnerAddOps;
for (SCEVAddExpr::op_iterator K = Add->op_begin(); K != JE; ++K)
if (K != J)
InnerAddOps.push_back(*K);
// Splitting a 2-operand add both ways is redundant. Pruning this
// now saves compile time.
if (InnerAddOps.size() < 2 && next(J) == JE)
continue;
AddOps.push_back(*J);
const SCEV *InnerAdd = SE.getAddExpr(InnerAddOps);
AddOps.push_back(InnerAdd);
GenerateFormulaeFromReplacedBaseReg(LU, LUIdx, Base, i, AddOps);
AddOps.clear();
}
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BaseReg)) {
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(AR->getStart())) {
for (SCEVAddExpr::op_iterator J = Add->op_begin(), JE = Add->op_end();
J != JE; ++J) {
// Don't pull a constant into a register if the constant could be
// folded into an immediate field.
if (isAlwaysFoldable(*J, true, LU.Kind, LU.AccessTy, TLI, SE))
continue;
SmallVector<const SCEV *, 8> InnerAddOps;
for (SCEVAddExpr::op_iterator K = Add->op_begin(); K != JE; ++K)
if (K != J)
InnerAddOps.push_back(*K);
AddOps.push_back(*J);
const SCEV *InnerAdd = SE.getAddExpr(InnerAddOps);
AddOps.push_back(SE.getAddRecExpr(InnerAdd,
AR->getStepRecurrence(SE),
AR->getLoop()));
GenerateFormulaeFromReplacedBaseReg(LU, LUIdx, Base, i, AddOps);
AddOps.clear();
}
} else if (!isAlwaysFoldable(AR->getStart(), Base.BaseRegs.size() > 1,
LU.Kind, LU.AccessTy,
TLI, SE)) {
AddOps.push_back(AR->getStart());
AddOps.push_back(SE.getAddRecExpr(SE.getIntegerSCEV(0,
BaseReg->getType()),
AR->getStepRecurrence(SE),
AR->getLoop()));
GenerateFormulaeFromReplacedBaseReg(LU, LUIdx, Base, i, AddOps);
AddOps.clear();
}
}
}
}
/// GenerateCombinationReuse - Generate a formula consisting of all of the
/// loop-dominating registers added into a single register.
void LSRInstance::GenerateCombinationReuse(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// This method is only intersting on a plurality of registers.
if (Base.BaseRegs.size() <= 1) return;
Formula F = Base;
F.BaseRegs.clear();
SmallVector<const SCEV *, 4> Ops;
for (SmallVectorImpl<const SCEV *>::const_iterator
I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) {
const SCEV *BaseReg = *I;
if (BaseReg->properlyDominates(L->getHeader(), &DT) &&
!BaseReg->hasComputableLoopEvolution(L))
Ops.push_back(BaseReg);
else
F.BaseRegs.push_back(BaseReg);
}
if (Ops.size() > 1) {
F.BaseRegs.push_back(SE.getAddExpr(Ops));
(void)InsertFormula(LU, LUIdx, F);
}
}
/// GenerateScaledReuse - Generate stride factor reuse formulae by making
/// use of scaled-offset address modes, for example.
void LSRInstance::GenerateScaledReuse(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// Determine the integer type for the base formula.
const Type *IntTy = Base.getType();
if (!IntTy) return;
IntTy = SE.getEffectiveSCEVType(IntTy);
// Check each interesting stride.
for (SmallSetVector<int64_t, 4>::const_iterator
I = Factors.begin(), E = Factors.end(); I != E; ++I) {
int64_t Factor = *I;
// If this Formula already has a scaled register, we can't add another one.
if (Base.AM.Scale != 0)
Evan Cheng
committed
continue;
Formula F = Base;
F.AM.Scale = Factor;
// Check whether this scale is going to be legal.
if (!isLegalUse(F.AM, LU.Kind, LU.AccessTy, TLI)) {
// As a special-case, handle special out-of-loop Basic users specially.
// TODO: Reconsider this special case.
if (LU.Kind == LSRUse::Basic &&
isLegalUse(F.AM, LSRUse::Special, LU.AccessTy, TLI) &&
!L->contains(LU.UserInst))
LU.Kind = LSRUse::Special;
else
Evan Cheng
committed
continue;
}
// For each addrec base reg, apply the scale, if possible.
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
if (const SCEVAddRecExpr *AR =
dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
const SCEV *FactorS = SE.getSCEV(ConstantInt::get(IntTy, Factor));
// Divide out the factor, ignoring high bits, since we'll be
// scaling the value back up in the end.
if (const SCEV *Quotient = getSDiv(AR, FactorS, SE, true)) {
// TODO: This could be optimized to avoid all the copying.
Formula NewF = F;
NewF.ScaledReg = Quotient;
std::swap(NewF.BaseRegs[i], NewF.BaseRegs.back());
NewF.BaseRegs.pop_back();
NewF.AM.HasBaseReg = !NewF.BaseRegs.empty();
(void)InsertFormula(LU, LUIdx, NewF);
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
}
}
}
}
/// GenerateTruncateReuse - Generate reuse formulae from different IV types.
void LSRInstance::GenerateTruncateReuse(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// This requires TargetLowering to tell us which truncates are free.
if (!TLI) return;
// Don't attempt to truncate symbolic values.
if (Base.AM.BaseGV) return;
// Determine the integer type for the base formula.
const Type *DstTy = Base.getType();
if (!DstTy) return;
DstTy = SE.getEffectiveSCEVType(DstTy);
for (SmallSetVector<const Type *, 4>::const_iterator
I = Types.begin(), E = Types.end(); I != E; ++I) {
const Type *SrcTy = *I;
if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
Formula F = Base;
if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(),
JE = F.BaseRegs.end(); J != JE; ++J)
*J = SE.getAnyExtendExpr(*J, SrcTy);
(void)InsertFormula(LU, LUIdx, F);
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
}
}
}
namespace {
/// WorkItem - Helper class for GenerateConstantOffsetReuse. It's used to
/// defer modifications so that the search phase doesn't have to worry about
/// the data structures moving underneath it.
struct WorkItem {
LSRUse *LU;
size_t LUIdx;
int64_t Imm;
const SCEV *OrigReg;
WorkItem(LSRUse *U, size_t LI, int64_t I, const SCEV *R)
: LU(U), LUIdx(LI), Imm(I), OrigReg(R) {}
void print(raw_ostream &OS) const;
void dump() const;
};
void WorkItem::print(raw_ostream &OS) const {
OS << "in use ";
LU->print(OS);
OS << " (at index " << LUIdx << "), add offset " << Imm
<< " and compensate by adjusting refences to " << *OrigReg << "\n";
}
Evan Cheng
committed
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
void WorkItem::dump() const {
print(errs()); errs() << '\n';
}
}
/// GenerateConstantOffsetReuse - Look for registers which are a constant
/// distance apart and try to form reuse opportunities between them.
void LSRInstance::GenerateConstantOffsetReuse() {
// Group the registers by their value without any added constant offset.
typedef std::map<int64_t, const SCEV *> ImmMapTy;
typedef DenseMap<const SCEV *, ImmMapTy> RegMapTy;
RegMapTy Map;
SmallVector<const SCEV *, 8> Sequence;
for (SmallVectorImpl<const SCEV *>::iterator I = RegSequence.begin(),
E = RegSequence.end(); I != E; ++I) {
const SCEV *Reg = *I;
int64_t Imm = ExtractImmediate(Reg, SE);
std::pair<RegMapTy::iterator, bool> Pair =
Map.insert(std::make_pair(Reg, ImmMapTy()));
if (Pair.second)
Sequence.push_back(Reg);
Pair.first->second.insert(std::make_pair(Imm, *I));
}
// Insert an artificial expression at offset 0 (if there isn't one already),
// as this may lead to more reuse opportunities.
for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
E = Sequence.end(); I != E; ++I)
Map.find(*I)->second.insert(ImmMapTy::value_type(0, 0));
// Now examine each set of registers with the same base value. Build up
// a list of work to do and do the work in a separate step so that we're
// not adding formulae and register counts while we're searching.
SmallVector<WorkItem, 32> WorkItems;
for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
E = Sequence.end(); I != E; ++I) {
const SCEV *Reg = *I;
const ImmMapTy &Imms = Map.find(Reg)->second;
// Examine each offset.
for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
J != JE; ++J) {
const SCEV *OrigReg = J->second;
// Skip the artifical register at offset 0.
if (!OrigReg) continue;
int64_t JImm = J->first;
const SmallBitVector &Bits = RegUses.find(OrigReg)->second.Bits;
// Examine each other offset associated with the same register. This is
// quadradic in the number of registers with the same base, but it's
// uncommon for this to be a large number.
for (ImmMapTy::const_iterator M = Imms.begin(); M != JE; ++M) {
if (M == J) continue;
// Compute the difference between the two.
int64_t Imm = (uint64_t)JImm - M->first;
for (int LUIdx = Bits.find_first(); LUIdx != -1;
LUIdx = Bits.find_next(LUIdx))
// Make a memo of this use, offset, and register tuple.
WorkItems.push_back(WorkItem(&Uses[LUIdx], LUIdx, Imm, OrigReg));
}
}
}
// Now iterate through the worklist and add new formulae.
for (SmallVectorImpl<WorkItem>::const_iterator I = WorkItems.begin(),
E = WorkItems.end(); I != E; ++I) {
const WorkItem &WI = *I;
LSRUse &LU = *WI.LU;
size_t LUIdx = WI.LUIdx;
int64_t Imm = WI.Imm;
const SCEV *OrigReg = WI.OrigReg;
const Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy,
-(uint64_t)Imm));
for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
Formula F = LU.Formulae[L];
// Use the immediate in the scaled register.
if (F.ScaledReg == OrigReg) {
int64_t Offs = (uint64_t)F.AM.BaseOffs +
Imm * (uint64_t)F.AM.Scale;
// Don't create 50 + reg(-50).
if (F.referencesReg(SE.getSCEV(
ConstantInt::get(IntTy, -(uint64_t)Offs))))
continue;
Formula NewF = F;
NewF.AM.BaseOffs = Offs;
if (!isLegalUse(NewF.AM, LU.Kind, LU.AccessTy, TLI))
continue;
const SCEV *Diff = SE.getAddExpr(NegImmS, NewF.ScaledReg);
if (Diff->isZero()) continue;
NewF.ScaledReg = Diff;
(void)InsertFormula(LU, LUIdx, NewF);
}
// Use the immediate in a base register.
for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
const SCEV *BaseReg = F.BaseRegs[N];
if (BaseReg != OrigReg)
continue;
Formula NewF = F;
NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
if (!isLegalUse(NewF.AM, LU.Kind, LU.AccessTy, TLI))
Evan Cheng
committed
continue;
const SCEV *Diff = SE.getAddExpr(NegImmS, BaseReg);
if (Diff->isZero()) continue;
// Don't create 50 + reg(-50).
if (Diff ==
SE.getSCEV(ConstantInt::get(IntTy,
-(uint64_t)NewF.AM.BaseOffs)))
Evan Cheng
committed
continue;
NewF.BaseRegs[N] = Diff;
(void)InsertFormula(LU, LUIdx, NewF);
Evan Cheng
committed
}
}
}
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
}
/// GenerateAllReuseFormulae - Generate formulae for each use.
void
LSRInstance::GenerateAllReuseFormulae() {
SmallVector<Formula, 12> Save;
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateSymbolicOffsetReuse(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateICmpZeroScaledReuse(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateReassociationReuse(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateCombinationReuse(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateScaledReuse(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateTruncateReuse(LU, LUIdx, LU.Formulae[i]);
}
GenerateConstantOffsetReuse();
}
/// GenerateLoopInvariantRegisterUses - Check for other uses of loop-invariant
/// values which we're tracking. These other uses will pin these values in
/// registers, making them less profitable for elimination.
/// TODO: This currently misses non-constant addrec step registers.
/// TODO: Should this give more weight to users inside the loop?
void
LSRInstance::GenerateLoopInvariantRegisterUses() {
SmallVector<const SCEV *, 8> Worklist(RegSequence.begin(), RegSequence.end());
while (!Worklist.empty()) {
const SCEV *S = Worklist.pop_back_val();
if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
Worklist.insert(Worklist.end(), N->op_begin(), N->op_end());
else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
Worklist.push_back(C->getOperand());
else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
Worklist.push_back(D->getLHS());
Worklist.push_back(D->getRHS());
} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
const Value *V = U->getValue();
if (const Instruction *Inst = dyn_cast<Instruction>(V))
if (L->contains(Inst)) continue;
for (Value::use_const_iterator UI = V->use_begin(), UE = V->use_end();
UI != UE; ++UI) {
const Instruction *UserInst = dyn_cast<Instruction>(*UI);
// Ignore non-instructions.
if (!UserInst)
continue;
// Ignore instructions in other functions (as can happen with
// Constants).
if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
continue;
// Ignore instructions not dominated by the loop.
const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
UserInst->getParent() :
cast<PHINode>(UserInst)->getIncomingBlock(
PHINode::getIncomingValueNumForOperand(UI.getOperandNo()));
if (!DT.dominates(L->getHeader(), UseBB))
continue;
// Ignore uses which are part of other SCEV expressions, to avoid
// analyzing them multiple times.
if (SE.isSCEVable(UserInst->getType()) &&
!isa<SCEVUnknown>(SE.getSCEV(const_cast<Instruction *>(UserInst))))
continue;
// Ignore icmp instructions which are already being analyzed.
if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
unsigned OtherIdx = !UI.getOperandNo();
Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
if (SE.getSCEV(OtherOp)->hasComputableLoopEvolution(L))
continue;
}
LSRUse &LU = getNewUse();
LU.UserInst = const_cast<Instruction *>(UserInst);
LU.OperandValToReplace = UI.getUse();
LU.InsertSupplementalFormula(U);
CountRegisters(LU.Formulae.back(), Uses.size() - 1);
}
}
}
Evan Cheng
committed
#ifndef NDEBUG
static void debug_winner(SmallVector<LSRUse, 16> const &Uses) {
dbgs() << "LSR has selected formulae for each use:\n";
for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
E = Uses.end(); I != E; ++I) {
const LSRUse &LU = *I;
dbgs() << " ";
LU.print(dbgs());
dbgs() << '\n';
dbgs() << " ";
LU.Formulae.front().print(dbgs());
dbgs() << "\n";
}
#endif
LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
: IU(P->getAnalysis<IVUsers>()),
SE(P->getAnalysis<ScalarEvolution>()),
DT(P->getAnalysis<DominatorTree>()),
TLI(tli), L(l), Changed(false), IVIncInsertPos(0),
CurrentArbitraryRegIndex(0), MaxNumRegs(0) {
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm()) return;
// If there's no interesting work to be done, bail early.
if (IU.IVUsesByStride.empty()) return;
DEBUG(dbgs() << "\nLSR on loop ";
WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
dbgs() << ":\n");
// Sort the StrideOrder so we process larger strides first.
std::stable_sort(IU.StrideOrder.begin(), IU.StrideOrder.end(),
StrideCompare(SE));
/// OptimizeShadowIV - If IV is used in a int-to-float cast
/// inside the loop then try to eliminate the cast opeation.
OptimizeShadowIV();
// Change loop terminating condition to use the postinc iv when possible.
Changed |= OptimizeLoopTermCond();
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
for (SmallVectorImpl<const SCEV *>::const_iterator SIter =
IU.StrideOrder.begin(), SEnd = IU.StrideOrder.end();
SIter != SEnd; ++SIter) {
const SCEV *Stride = *SIter;
// Collect interesting types.
Types.insert(SE.getEffectiveSCEVType(Stride->getType()));
// Collect interesting factors.
for (SmallVectorImpl<const SCEV *>::const_iterator NewStrideIter =
SIter + 1; NewStrideIter != SEnd; ++NewStrideIter) {
const SCEV *OldStride = Stride;
const SCEV *NewStride = *NewStrideIter;
if (SE.getTypeSizeInBits(OldStride->getType()) !=
SE.getTypeSizeInBits(NewStride->getType())) {
if (SE.getTypeSizeInBits(OldStride->getType()) >
SE.getTypeSizeInBits(NewStride->getType()))
NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
else
OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
}
if (const SCEVConstant *Factor =
dyn_cast_or_null<SCEVConstant>(getSDiv(NewStride, OldStride,
SE, true)))
if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
Factors.insert(Factor->getValue()->getValue().getSExtValue());
}
std::map<const SCEV *, IVUsersOfOneStride *>::const_iterator SI =
IU.IVUsesByStride.find(Stride);
assert(SI != IU.IVUsesByStride.end() && "Stride doesn't exist!");
for (ilist<IVStrideUse>::const_iterator UI = SI->second->Users.begin(),
E = SI->second->Users.end(); UI != E; ++UI) {
// Record the uses.
LSRUse &LU = getNewUse();
LU.UserInst = UI->getUser();
LU.OperandValToReplace = UI->getOperandValToReplace();
if (isAddressUse(LU.UserInst, LU.OperandValToReplace)) {
LU.Kind = LSRUse::Address;
LU.AccessTy = getAccessType(LU.UserInst);
}
if (UI->isUseOfPostIncrementedValue())
LU.PostIncLoop = L;
const SCEV *S = IU.getCanonicalExpr(*UI);
// Equality (== and !=) ICmps are special. We can rewrite (i == N) as
// (N - i == 0), and this allows (N - i) to be the expression that we
// work with rather than just N or i, so we can consider the register
// requirements for both N and i at the same time. Limiting this code
// to equality icmps is not a problem because all interesting loops
// use equality icmps, thanks to IndVarSimplify.
if (ICmpInst *CI = dyn_cast<ICmpInst>(LU.UserInst))
if (CI->isEquality()) {
// Swap the operands if needed to put the OperandValToReplace on
// the left, for consistency.
Value *NV = CI->getOperand(1);
if (NV == LU.OperandValToReplace) {
CI->setOperand(1, CI->getOperand(0));
CI->setOperand(0, NV);
}
// x == y --> x - y == 0
const SCEV *N = SE.getSCEV(NV);
if (N->isLoopInvariant(L)) {
LU.Kind = LSRUse::ICmpZero;
S = SE.getMinusSCEV(N, S);
}
// -1 and the negations of all interesting strides (except the
// negation of -1) are now also interesting.
for (size_t i = 0, e = Factors.size(); i != e; ++i)
if (Factors[i] != -1)
Factors.insert(-(uint64_t)Factors[i]);
Factors.insert(-1);
}
// Set up the initial formula for this use.
LU.InsertInitialFormula(S, L, SE, DT);
CountRegisters(LU.Formulae.back(), Uses.size() - 1);
}
}
// If all uses use the same type, don't bother looking for truncation-based
// reuse.
if (Types.size() == 1)
Types.clear();
// If there are any uses of registers that we're tracking that have escaped
// IVUsers' attention, add trivial uses for them, so that the register
// voting process takes the into consideration.
GenerateLoopInvariantRegisterUses();
// Start by assuming we'll assign each use its own register. This is
// sometimes called "full" strength reduction, or "superhero" mode.
// Sometimes this is the best solution, but if there are opportunities for
// reuse we may find a better solution.
Score CurScore;
CurScore.RateInitial(Uses, L, SE);
MaxNumRegs = CurScore.getNumRegs();
// Now use the reuse data to generate a bunch of interesting ways
// to formulate the values needed for the uses.
GenerateAllReuseFormulae();
// Sort the formulae. TODO: This is redundantly sorted below.
for (SmallVectorImpl<LSRUse>::iterator I = Uses.begin(), E = Uses.end();
I != E; ++I) {
LSRUse &LU = *I;
std::stable_sort(LU.Formulae.begin(), LU.Formulae.end(),
ComplexitySorter());
}
// Ok, we've now collected all the uses and noted their register uses. The
// next step is to start looking at register reuse possibilities.
DEBUG(print(dbgs()); dbgs() << '\n'; IU.dump());
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
// Create a sorted list of registers with those with the most uses appearing
// earlier in the list. We'll visit them first, as they're the most likely
// to represent profitable reuse opportunities.
SmallVector<RegCount, 8> RegOrder;
for (SmallVectorImpl<const SCEV *>::const_iterator I =
RegSequence.begin(), E = RegSequence.end(); I != E; ++I)
RegOrder.push_back(RegCount(*I, RegUses.find(*I)->second));
std::stable_sort(RegOrder.begin(), RegOrder.end());
// Visit each register. Determine which ones represent profitable reuse
// opportunities and remember them.
// TODO: Extract this code into a function.
for (SmallVectorImpl<RegCount>::const_iterator I = RegOrder.begin(),
E = RegOrder.end(); I != E; ++I) {
const SCEV *Reg = I->Reg;
const SmallBitVector &Bits = I->Sort.Bits;
// Registers with only one use don't represent reuse opportunities, so
// when we get there, we're done.
if (Bits.count() <= 1) break;
DEBUG(dbgs() << "Reg " << *Reg << ": ";
I->Sort.print(dbgs());
dbgs() << '\n');
// Determine the total number of registers will be needed if we make use
// of the reuse opportunity represented by the current register.
Score NewScore;
NewScore.Rate(Reg, Bits, Uses, L, SE);
// Now decide whether this register's reuse opportunity is an overall win.
// Currently the decision is heavily skewed for register pressure.
if (!(NewScore < CurScore)) {
continue;
}
Evan Cheng
committed
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
// Ok, use this opportunity.
DEBUG(dbgs() << "This candidate has been accepted.\n");
CurScore = NewScore;
// Now that we've selected a new reuse opportunity, delete formulae that
// do not participate in that opportunity.
for (int j = Bits.find_first(); j != -1; j = Bits.find_next(j)) {
LSRUse &LU = Uses[j];
for (unsigned k = 0, h = LU.Formulae.size(); k != h; ++k) {
Formula &F = LU.Formulae[k];
if (!F.referencesReg(Reg)) {
std::swap(LU.Formulae[k], LU.Formulae.back());
LU.Formulae.pop_back();
--k; --h;
}
}
// Also re-sort the list to put the formulae with the fewest registers
// at the front.
// TODO: Do this earlier, we don't need it each time.
std::stable_sort(LU.Formulae.begin(), LU.Formulae.end(),
ComplexitySorter());
}
}
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
// Ok, we've now made all our decisions. The first formula for each use
// will be used.
DEBUG(dbgs() << "Concluding, we need "; CurScore.print(dbgs());
dbgs() << ".\n";
debug_winner(Uses));
// Free memory no longer needed.
RegOrder.clear();
Factors.clear();
Types.clear();
RegUses.clear();
RegSequence.clear();
// Keep track of instructions we may have made dead, so that
// we can remove them after we are done working.
SmallVector<WeakVH, 16> DeadInsts;
SCEVExpander Rewriter(SE);
Rewriter.disableCanonicalMode();
Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
// Expand the new value definitions and update the users.
for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
E = Uses.end(); I != E; ++I) {
// Formulae should be legal.
DEBUG(for (SmallVectorImpl<Formula>::const_iterator J = I->Formulae.begin(),
JE = I->Formulae.end(); J != JE; ++J)
assert(isLegalUse(J->AM, I->Kind, I->AccessTy, TLI) &&
"Illegal formula generated!"));
// Expand the new code and update the user.
I->Rewrite(L, IVIncInsertPos, Rewriter, DeadInsts, SE, DT, P);
Changed = true;
}
// Clean up after ourselves. This must be done before deleting any
// instructions.
Rewriter.clear();
Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
}
void LSRInstance::print(raw_ostream &OS) const {
if (MaxNumRegs != 0)
OS << "LSR is considering " << MaxNumRegs << " to be the maximum "
"number of registers needed.\n";
OS << "LSR has identified the following interesting factors and types: ";
bool First = true;
for (SmallSetVector<int64_t, 4>::const_iterator
I = Factors.begin(), E = Factors.end(); I != E; ++I) {
if (!First) OS << ", ";
First = false;
OS << '*' << *I;
}
for (SmallSetVector<const Type *, 4>::const_iterator
I = Types.begin(), E = Types.end(); I != E; ++I) {
if (!First) OS << ", ";
First = false;
OS << '(' << **I << ')';
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
OS << '\n';
OS << "LSR is examining the following uses, and candidate formulae:\n";
for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
E = Uses.end(); I != E; ++I) {
const LSRUse &LU = *I;
dbgs() << " ";
LU.print(OS);
OS << '\n';
for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
JE = LU.Formulae.end(); J != JE; ++J) {
OS << " ";
J->print(OS);
OS << "\n";
}
}
}
void LSRInstance::dump() const {
print(errs()); errs() << '\n';
}
namespace {
class LoopStrengthReduce : public LoopPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
const TargetLowering *const TLI;
public:
static char ID; // Pass ID, replacement for typeid
explicit LoopStrengthReduce(const TargetLowering *tli = NULL);
private:
bool runOnLoop(Loop *L, LPPassManager &LPM);
void getAnalysisUsage(AnalysisUsage &AU) const;
};
}
char LoopStrengthReduce::ID = 0;
static RegisterPass<LoopStrengthReduce>
X("loop-reduce", "Loop Strength Reduction");
Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
return new LoopStrengthReduce(TLI);
}
LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
: LoopPass(&ID), TLI(tli) {}
void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
// We split critical edges, so we change the CFG. However, we do update
// many analyses if they are around.
AU.addPreservedID(LoopSimplifyID);
AU.addPreserved<LoopInfo>();
AU.addPreserved("domfrontier");
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<DominatorTree>();
AU.addPreserved<DominatorTree>();
AU.addRequired<ScalarEvolution>();
AU.addPreserved<ScalarEvolution>();
AU.addRequired<IVUsers>();
AU.addPreserved<IVUsers>();
}
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
bool Changed = false;
// Run the main LSR transformation.
Changed |= LSRInstance(TLI, L, this).getChanged();
// At this point, it is worth checking to see if any recurrence PHIs are also
// dead, so that we can remove them as well.
Changed |= DeleteDeadPHIs(L->getHeader());