Newer
Older
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
// Record the uses.
LSRFixup &LF = getNewFixup();
LF.UserInst = UI->getUser();
LF.OperandValToReplace = UI->getOperandValToReplace();
LF.PostIncLoops = UI->getPostIncLoops();
LSRUse::KindType Kind = LSRUse::Basic;
const Type *AccessTy = 0;
if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
Kind = LSRUse::Address;
AccessTy = getAccessType(LF.UserInst);
}
const SCEV *S = IU.getExpr(*UI);
// Equality (== and !=) ICmps are special. We can rewrite (i == N) as
// (N - i == 0), and this allows (N - i) to be the expression that we work
// with rather than just N or i, so we can consider the register
// requirements for both N and i at the same time. Limiting this code to
// equality icmps is not a problem because all interesting loops use
// equality icmps, thanks to IndVarSimplify.
if (ICmpInst *CI = dyn_cast<ICmpInst>(LF.UserInst))
if (CI->isEquality()) {
// Swap the operands if needed to put the OperandValToReplace on the
// left, for consistency.
Value *NV = CI->getOperand(1);
if (NV == LF.OperandValToReplace) {
CI->setOperand(1, CI->getOperand(0));
CI->setOperand(0, NV);
NV = CI->getOperand(1);
Changed = true;
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
}
// x == y --> x - y == 0
const SCEV *N = SE.getSCEV(NV);
if (N->isLoopInvariant(L)) {
Kind = LSRUse::ICmpZero;
S = SE.getMinusSCEV(N, S);
}
// -1 and the negations of all interesting strides (except the negation
// of -1) are now also interesting.
for (size_t i = 0, e = Factors.size(); i != e; ++i)
if (Factors[i] != -1)
Factors.insert(-(uint64_t)Factors[i]);
Factors.insert(-1);
}
// Set up the initial formula for this use.
std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
LF.LUIdx = P.first;
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
// If this is the first use of this LSRUse, give it a formula.
if (LU.Formulae.empty()) {
InsertInitialFormula(S, LU, LF.LUIdx);
CountRegisters(LU.Formulae.back(), LF.LUIdx);
}
}
DEBUG(print_fixups(dbgs()));
}
/// InsertInitialFormula - Insert a formula for the given expression into
/// the given use, separating out loop-variant portions from loop-invariant
/// and loop-computable portions.
LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
Formula F;
F.InitialMatch(S, L, SE, DT);
bool Inserted = InsertFormula(LU, LUIdx, F);
assert(Inserted && "Initial formula already exists!"); (void)Inserted;
}
/// InsertSupplementalFormula - Insert a simple single-register formula for
/// the given expression into the given use.
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
void
LSRInstance::InsertSupplementalFormula(const SCEV *S,
LSRUse &LU, size_t LUIdx) {
Formula F;
F.BaseRegs.push_back(S);
F.AM.HasBaseReg = true;
bool Inserted = InsertFormula(LU, LUIdx, F);
assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
}
/// CountRegisters - Note which registers are used by the given formula,
/// updating RegUses.
void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
if (F.ScaledReg)
RegUses.CountRegister(F.ScaledReg, LUIdx);
for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
E = F.BaseRegs.end(); I != E; ++I)
RegUses.CountRegister(*I, LUIdx);
}
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
CountRegisters(F, LUIdx);
return true;
}
/// CollectLoopInvariantFixupsAndFormulae - Check for other uses of
/// loop-invariant values which we're tracking. These other uses will pin these
/// values in registers, making them less profitable for elimination.
/// TODO: This currently misses non-constant addrec step registers.
/// TODO: Should this give more weight to users inside the loop?
void
LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
SmallPtrSet<const SCEV *, 8> Inserted;
while (!Worklist.empty()) {
const SCEV *S = Worklist.pop_back_val();
if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
Worklist.append(N->op_begin(), N->op_end());
else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
Worklist.push_back(C->getOperand());
else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
Worklist.push_back(D->getLHS());
Worklist.push_back(D->getRHS());
} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
if (!Inserted.insert(U)) continue;
const Value *V = U->getValue();
if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
// Look for instructions defined outside the loop.
if (L->contains(Inst)) continue;
} else if (isa<UndefValue>(V))
// Undef doesn't have a live range, so it doesn't matter.
continue;
for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
UI != UE; ++UI) {
const Instruction *UserInst = dyn_cast<Instruction>(*UI);
// Ignore non-instructions.
if (!UserInst)
continue;
// Ignore instructions in other functions (as can happen with
// Constants).
if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
continue;
// Ignore instructions not dominated by the loop.
const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
UserInst->getParent() :
cast<PHINode>(UserInst)->getIncomingBlock(
PHINode::getIncomingValueNumForOperand(UI.getOperandNo()));
if (!DT.dominates(L->getHeader(), UseBB))
continue;
// Ignore uses which are part of other SCEV expressions, to avoid
// analyzing them multiple times.
if (SE.isSCEVable(UserInst->getType())) {
const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));
// If the user is a no-op, look through to its uses.
if (!isa<SCEVUnknown>(UserS))
continue;
if (UserS == U) {
Worklist.push_back(
SE.getUnknown(const_cast<Instruction *>(UserInst)));
continue;
}
}
// Ignore icmp instructions which are already being analyzed.
if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
unsigned OtherIdx = !UI.getOperandNo();
Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
if (SE.getSCEV(OtherOp)->hasComputableLoopEvolution(L))
continue;
}
LSRFixup &LF = getNewFixup();
LF.UserInst = const_cast<Instruction *>(UserInst);
LF.OperandValToReplace = UI.getUse();
std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, 0);
LF.LUIdx = P.first;
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
InsertSupplementalFormula(U, LU, LF.LUIdx);
CountRegisters(LU.Formulae.back(), Uses.size() - 1);
break;
}
}
}
}
/// CollectSubexprs - Split S into subexpressions which can be pulled out into
/// separate registers. If C is non-null, multiply each subexpression by C.
static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
SmallVectorImpl<const SCEV *> &Ops,
ScalarEvolution &SE) {
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
// Break out add operands.
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I)
CollectSubexprs(*I, C, Ops, SE);
return;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Split a non-zero base out of an addrec.
if (!AR->getStart()->isZero()) {
CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
AR->getStepRecurrence(SE),
AR->getLoop()), C, Ops, SE);
CollectSubexprs(AR->getStart(), C, Ops, SE);
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
return;
}
} else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
// Break (C * (a + b + c)) into C*a + C*b + C*c.
if (Mul->getNumOperands() == 2)
if (const SCEVConstant *Op0 =
dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
CollectSubexprs(Mul->getOperand(1),
C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
Ops, SE);
return;
}
}
// Otherwise use the value itself.
Ops.push_back(C ? SE.getMulExpr(C, S) : S);
}
/// GenerateReassociations - Split out subexpressions from adds and the bases of
/// addrecs.
void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
Formula Base,
unsigned Depth) {
// Arbitrarily cap recursion to protect compile time.
if (Depth >= 3) return;
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
const SCEV *BaseReg = Base.BaseRegs[i];
SmallVector<const SCEV *, 8> AddOps;
CollectSubexprs(BaseReg, 0, AddOps, SE);
if (AddOps.size() == 1) continue;
for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
JE = AddOps.end(); J != JE; ++J) {
// Don't pull a constant into a register if the constant could be folded
// into an immediate field.
if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
Base.getNumRegs() > 1,
LU.Kind, LU.AccessTy, TLI, SE))
continue;
// Collect all operands except *J.
SmallVector<const SCEV *, 8> InnerAddOps
( ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
InnerAddOps.append
(next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
// Don't leave just a constant behind in a register if the constant could
// be folded into an immediate field.
if (InnerAddOps.size() == 1 &&
isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset,
Base.getNumRegs() > 1,
LU.Kind, LU.AccessTy, TLI, SE))
continue;
const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
if (InnerSum->isZero())
continue;
Formula F = Base;
F.BaseRegs[i] = InnerSum;
F.BaseRegs.push_back(*J);
if (InsertFormula(LU, LUIdx, F))
// If that formula hadn't been seen before, recurse to find more like
// it.
GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1);
}
}
}
/// GenerateCombinations - Generate a formula consisting of all of the
/// loop-dominating registers added into a single register.
void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
// This method is only interesting on a plurality of registers.
if (Base.BaseRegs.size() <= 1) return;
Formula F = Base;
F.BaseRegs.clear();
SmallVector<const SCEV *, 4> Ops;
for (SmallVectorImpl<const SCEV *>::const_iterator
I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) {
const SCEV *BaseReg = *I;
if (BaseReg->properlyDominates(L->getHeader(), &DT) &&
!BaseReg->hasComputableLoopEvolution(L))
Ops.push_back(BaseReg);
else
F.BaseRegs.push_back(BaseReg);
}
if (Ops.size() > 1) {
const SCEV *Sum = SE.getAddExpr(Ops);
// TODO: If Sum is zero, it probably means ScalarEvolution missed an
// opportunity to fold something. For now, just ignore such cases
if (!Sum->isZero()) {
F.BaseRegs.push_back(Sum);
(void)InsertFormula(LU, LUIdx, F);
}
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
}
}
/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets.
void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// We can't add a symbolic offset if the address already contains one.
if (Base.AM.BaseGV) return;
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
const SCEV *G = Base.BaseRegs[i];
GlobalValue *GV = ExtractSymbol(G, SE);
if (G->isZero() || !GV)
continue;
Formula F = Base;
F.AM.BaseGV = GV;
if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
LU.Kind, LU.AccessTy, TLI))
continue;
F.BaseRegs[i] = G;
(void)InsertFormula(LU, LUIdx, F);
}
}
/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// TODO: For now, just add the min and max offset, because it usually isn't
// worthwhile looking at everything inbetween.
SmallVector<int64_t, 4> Worklist;
Worklist.push_back(LU.MinOffset);
if (LU.MaxOffset != LU.MinOffset)
Worklist.push_back(LU.MaxOffset);
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
const SCEV *G = Base.BaseRegs[i];
for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
E = Worklist.end(); I != E; ++I) {
Formula F = Base;
F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
LU.Kind, LU.AccessTy, TLI)) {
F.BaseRegs[i] = SE.getAddExpr(G, SE.getConstant(G->getType(), *I));
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
(void)InsertFormula(LU, LUIdx, F);
}
}
int64_t Imm = ExtractImmediate(G, SE);
if (G->isZero() || Imm == 0)
continue;
Formula F = Base;
F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm;
if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
LU.Kind, LU.AccessTy, TLI))
continue;
F.BaseRegs[i] = G;
(void)InsertFormula(LU, LUIdx, F);
}
}
/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up
/// the comparison. For example, x == y -> x*c == y*c.
void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
Formula Base) {
if (LU.Kind != LSRUse::ICmpZero) return;
// Determine the integer type for the base formula.
const Type *IntTy = Base.getType();
if (!IntTy) return;
if (SE.getTypeSizeInBits(IntTy) > 64) return;
// Don't do this if there is more than one offset.
if (LU.MinOffset != LU.MaxOffset) return;
assert(!Base.AM.BaseGV && "ICmpZero use is not legal!");
// Check each interesting stride.
for (SmallSetVector<int64_t, 8>::const_iterator
I = Factors.begin(), E = Factors.end(); I != E; ++I) {
int64_t Factor = *I;
Formula F = Base;
// Check that the multiplication doesn't overflow.
if (F.AM.BaseOffs == INT64_MIN && Factor == -1)
continue;
F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs * Factor;
if (F.AM.BaseOffs / Factor != Base.AM.BaseOffs)
continue;
// Check that multiplying with the use offset doesn't overflow.
int64_t Offset = LU.MinOffset;
if (Offset == INT64_MIN && Factor == -1)
continue;
Offset = (uint64_t)Offset * Factor;
continue;
// Check that this scale is legal.
if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
continue;
// Compensate for the use having MinOffset built into it.
F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Offset - LU.MinOffset;
const SCEV *FactorS = SE.getConstant(IntTy, Factor);
// Check that multiplying with each base register doesn't overflow.
for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
goto next;
}
// Check that multiplying with the scaled register doesn't overflow.
if (F.ScaledReg) {
F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
continue;
}
// If we make it here and it's legal, add it.
(void)InsertFormula(LU, LUIdx, F);
next:;
}
}
/// GenerateScales - Generate stride factor reuse formulae by making use of
/// scaled-offset address modes, for example.
void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
// Determine the integer type for the base formula.
const Type *IntTy = Base.getType();
if (!IntTy) return;
// If this Formula already has a scaled register, we can't add another one.
if (Base.AM.Scale != 0) return;
// Check each interesting stride.
for (SmallSetVector<int64_t, 8>::const_iterator
I = Factors.begin(), E = Factors.end(); I != E; ++I) {
int64_t Factor = *I;
Base.AM.Scale = Factor;
Base.AM.HasBaseReg = Base.BaseRegs.size() > 1;
// Check whether this scale is going to be legal.
if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
LU.Kind, LU.AccessTy, TLI)) {
// As a special-case, handle special out-of-loop Basic users specially.
// TODO: Reconsider this special case.
if (LU.Kind == LSRUse::Basic &&
isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
LSRUse::Special, LU.AccessTy, TLI) &&
LU.AllFixupsOutsideLoop)
LU.Kind = LSRUse::Special;
else
continue;
}
// For an ICmpZero, negating a solitary base register won't lead to
// new solutions.
if (LU.Kind == LSRUse::ICmpZero &&
!Base.AM.HasBaseReg && Base.AM.BaseOffs == 0 && !Base.AM.BaseGV)
continue;
// For each addrec base reg, apply the scale, if possible.
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
if (const SCEVAddRecExpr *AR =
dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
const SCEV *FactorS = SE.getConstant(IntTy, Factor);
if (FactorS->isZero())
continue;
// Divide out the factor, ignoring high bits, since we'll be
// scaling the value back up in the end.
if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) {
// TODO: This could be optimized to avoid all the copying.
Formula F = Base;
F.ScaledReg = Quotient;
F.DeleteBaseReg(F.BaseRegs[i]);
(void)InsertFormula(LU, LUIdx, F);
}
}
}
}
/// GenerateTruncates - Generate reuse formulae from different IV types.
void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
// This requires TargetLowering to tell us which truncates are free.
if (!TLI) return;
// Don't bother truncating symbolic values.
if (Base.AM.BaseGV) return;
// Determine the integer type for the base formula.
const Type *DstTy = Base.getType();
if (!DstTy) return;
DstTy = SE.getEffectiveSCEVType(DstTy);
for (SmallSetVector<const Type *, 4>::const_iterator
I = Types.begin(), E = Types.end(); I != E; ++I) {
const Type *SrcTy = *I;
if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
Formula F = Base;
if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(),
JE = F.BaseRegs.end(); J != JE; ++J)
*J = SE.getAnyExtendExpr(*J, SrcTy);
// TODO: This assumes we've done basic processing on all uses and
// have an idea what the register usage is.
if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
continue;
(void)InsertFormula(LU, LUIdx, F);
}
}
}
namespace {
/// WorkItem - Helper class for GenerateCrossUseConstantOffsets. It's used to
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
/// defer modifications so that the search phase doesn't have to worry about
/// the data structures moving underneath it.
struct WorkItem {
size_t LUIdx;
int64_t Imm;
const SCEV *OrigReg;
WorkItem(size_t LI, int64_t I, const SCEV *R)
: LUIdx(LI), Imm(I), OrigReg(R) {}
void print(raw_ostream &OS) const;
void dump() const;
};
}
void WorkItem::print(raw_ostream &OS) const {
OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
<< " , add offset " << Imm;
}
void WorkItem::dump() const {
print(errs()); errs() << '\n';
}
/// GenerateCrossUseConstantOffsets - Look for registers which are a constant
/// distance apart and try to form reuse opportunities between them.
void LSRInstance::GenerateCrossUseConstantOffsets() {
// Group the registers by their value without any added constant offset.
typedef std::map<int64_t, const SCEV *> ImmMapTy;
typedef DenseMap<const SCEV *, ImmMapTy> RegMapTy;
RegMapTy Map;
DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
SmallVector<const SCEV *, 8> Sequence;
for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
I != E; ++I) {
const SCEV *Reg = *I;
int64_t Imm = ExtractImmediate(Reg, SE);
std::pair<RegMapTy::iterator, bool> Pair =
Map.insert(std::make_pair(Reg, ImmMapTy()));
if (Pair.second)
Sequence.push_back(Reg);
Pair.first->second.insert(std::make_pair(Imm, *I));
UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(*I);
}
// Now examine each set of registers with the same base value. Build up
// a list of work to do and do the work in a separate step so that we're
// not adding formulae and register counts while we're searching.
SmallVector<WorkItem, 32> WorkItems;
SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems;
for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
E = Sequence.end(); I != E; ++I) {
const SCEV *Reg = *I;
const ImmMapTy &Imms = Map.find(Reg)->second;
// It's not worthwhile looking for reuse if there's only one offset.
if (Imms.size() == 1)
continue;
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
J != JE; ++J)
dbgs() << ' ' << J->first;
dbgs() << '\n');
// Examine each offset.
for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
J != JE; ++J) {
const SCEV *OrigReg = J->second;
int64_t JImm = J->first;
const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
if (!isa<SCEVConstant>(OrigReg) &&
UsedByIndicesMap[Reg].count() == 1) {
DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg << '\n');
continue;
}
// Conservatively examine offsets between this orig reg a few selected
// other orig regs.
ImmMapTy::const_iterator OtherImms[] = {
Imms.begin(), prior(Imms.end()),
Imms.upper_bound((Imms.begin()->first + prior(Imms.end())->first) / 2)
};
for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
ImmMapTy::const_iterator M = OtherImms[i];
if (M == J || M == JE) continue;
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
// Compute the difference between the two.
int64_t Imm = (uint64_t)JImm - M->first;
for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1;
LUIdx = UsedByIndices.find_next(LUIdx))
// Make a memo of this use, offset, and register tuple.
if (UniqueItems.insert(std::make_pair(LUIdx, Imm)))
WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
}
}
}
Map.clear();
Sequence.clear();
UsedByIndicesMap.clear();
UniqueItems.clear();
// Now iterate through the worklist and add new formulae.
for (SmallVectorImpl<WorkItem>::const_iterator I = WorkItems.begin(),
E = WorkItems.end(); I != E; ++I) {
const WorkItem &WI = *I;
size_t LUIdx = WI.LUIdx;
LSRUse &LU = Uses[LUIdx];
int64_t Imm = WI.Imm;
const SCEV *OrigReg = WI.OrigReg;
const Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
// Use the immediate in the scaled register.
if (F.ScaledReg == OrigReg) {
int64_t Offs = (uint64_t)F.AM.BaseOffs +
Imm * (uint64_t)F.AM.Scale;
// Don't create 50 + reg(-50).
if (F.referencesReg(SE.getSCEV(
ConstantInt::get(IntTy, -(uint64_t)Offs))))
continue;
Formula NewF = F;
NewF.AM.BaseOffs = Offs;
if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
LU.Kind, LU.AccessTy, TLI))
continue;
NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
// If the new scale is a constant in a register, and adding the constant
// value to the immediate would produce a value closer to zero than the
// immediate itself, then the formula isn't worthwhile.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
if (C->getValue()->getValue().isNegative() !=
(NewF.AM.BaseOffs < 0) &&
(C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale))
.ule(abs64(NewF.AM.BaseOffs)))
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
continue;
// OK, looks good.
(void)InsertFormula(LU, LUIdx, NewF);
} else {
// Use the immediate in a base register.
for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
const SCEV *BaseReg = F.BaseRegs[N];
if (BaseReg != OrigReg)
continue;
Formula NewF = F;
NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
LU.Kind, LU.AccessTy, TLI))
continue;
NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
// If the new formula has a constant in a register, and adding the
// constant value to the immediate would produce a value closer to
// zero than the immediate itself, then the formula isn't worthwhile.
for (SmallVectorImpl<const SCEV *>::const_iterator
J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end();
J != JE; ++J)
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
if ((C->getValue()->getValue() + NewF.AM.BaseOffs).abs().slt(
abs64(NewF.AM.BaseOffs)) &&
(C->getValue()->getValue() +
NewF.AM.BaseOffs).countTrailingZeros() >=
CountTrailingZeros_64(NewF.AM.BaseOffs))
goto skip_formula;
// Ok, looks good.
(void)InsertFormula(LU, LUIdx, NewF);
break;
skip_formula:;
}
}
}
}
}
/// GenerateAllReuseFormulae - Generate formulae for each use.
void
LSRInstance::GenerateAllReuseFormulae() {
// This is split into multiple loops so that hasRegsUsedByUsesOtherThan
// queries are more precise.
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
}
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateScales(LU, LUIdx, LU.Formulae[i]);
}
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
}
GenerateCrossUseConstantOffsets();
}
/// If their are multiple formulae with the same set of registers used
/// by other uses, pick the best one and delete the others.
void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
#ifndef NDEBUG
bool ChangedFormulae = false;
#endif
// Collect the best formula for each unique set of shared registers. This
// is reset for each use.
typedef DenseMap<SmallVector<const SCEV *, 2>, size_t, UniquifierDenseMapInfo>
BestFormulaeTy;
BestFormulaeTy BestFormulae;
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
FormulaSorter Sorter(L, LU, SE, DT);
DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n');
bool Any = false;
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
for (size_t FIdx = 0, NumForms = LU.Formulae.size();
FIdx != NumForms; ++FIdx) {
Formula &F = LU.Formulae[FIdx];
SmallVector<const SCEV *, 2> Key;
for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
JE = F.BaseRegs.end(); J != JE; ++J) {
const SCEV *Reg = *J;
if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
Key.push_back(Reg);
}
if (F.ScaledReg &&
RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for
// uniquifying.
std::sort(Key.begin(), Key.end());
std::pair<BestFormulaeTy::const_iterator, bool> P =
BestFormulae.insert(std::make_pair(Key, FIdx));
if (!P.second) {
Formula &Best = LU.Formulae[P.first->second];
if (Sorter.operator()(F, Best))
std::swap(F, Best);
DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
dbgs() << "\n"
" in favor of formula "; Best.print(dbgs());
dbgs() << '\n');
#ifndef NDEBUG
ChangedFormulae = true;
LU.DeleteFormula(F);
--FIdx;
--NumForms;
Any = true;
continue;
}
// Now that we've filtered out some formulae, recompute the Regs set.
if (Any)
LU.RecomputeRegs(LUIdx, RegUses);
// Reset this to prepare for the next use.
BestFormulae.clear();
}
DEBUG(if (ChangedFormulae) {
dbgs() << "\n"
"After filtering out undesirable candidates:\n";
print_uses(dbgs());
});
}
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
// This is a rough guess that seems to work fairly well.
static const size_t ComplexityLimit = UINT16_MAX;
/// EstimateSearchSpaceComplexity - Estimate the worst-case number of
/// solutions the solver might have to consider. It almost never considers
/// this many solutions because it prune the search space, but the pruning
/// isn't always sufficient.
size_t LSRInstance::EstimateSearchSpaceComplexity() const {
uint32_t Power = 1;
for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
E = Uses.end(); I != E; ++I) {
size_t FSize = I->Formulae.size();
if (FSize >= ComplexityLimit) {
Power = ComplexityLimit;
break;
}
Power *= FSize;
if (Power >= ComplexityLimit)
break;
}
return Power;
}
/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
/// formulae to choose from, use some rough heuristics to prune down the number
/// of formulae. This keeps the main solver from taking an extraordinary amount
/// of time in some worst-case scenarios.
void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
DEBUG(dbgs() << "The search space is too complex.\n");
DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
"which use a superset of registers used by other "
"formulae.\n");
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
bool Any = false;
for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
Formula &F = LU.Formulae[i];
// Look for a formula with a constant or GV in a register. If the use
// also has a formula with that same value in an immediate field,
// delete the one that uses a register.
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
for (SmallVectorImpl<const SCEV *>::const_iterator
I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
Formula NewF = F;
NewF.AM.BaseOffs += C->getValue()->getSExtValue();
NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
(I - F.BaseRegs.begin()));
if (LU.HasFormulaWithSameRegs(NewF)) {
DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
LU.DeleteFormula(F);
--i;
--e;
Any = true;
break;
}
} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
if (!F.AM.BaseGV) {
Formula NewF = F;
NewF.AM.BaseGV = GV;
NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
(I - F.BaseRegs.begin()));
if (LU.HasFormulaWithSameRegs(NewF)) {
DEBUG(dbgs() << " Deleting "; F.print(dbgs());
dbgs() << '\n');
LU.DeleteFormula(F);
--i;
--e;
Any = true;
break;
}
}
}
}
}
if (Any)
LU.RecomputeRegs(LUIdx, RegUses);
}
DEBUG(dbgs() << "After pre-selection:\n";
print_uses(dbgs()));
}
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
DEBUG(dbgs() << "The search space is too complex.\n");
DEBUG(dbgs() << "Narrowing the search space by assuming that uses "
"separated by a constant offset will use the same "
"registers.\n");
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
if (F.AM.BaseOffs != 0 && F.AM.Scale == 0) {
if (LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU)) {
if (reconcileNewOffset(*LUThatHas, F.AM.BaseOffs,
/*HasBaseReg=*/false,
LU.Kind, LU.AccessTy)) {
DEBUG(dbgs() << " Deleting use "; LU.print(dbgs());
dbgs() << '\n');
LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
// Delete formulae from the new use which are no longer legal.
bool Any = false;
for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
Formula &F = LUThatHas->Formulae[i];
if (!isLegalUse(F.AM,
LUThatHas->MinOffset, LUThatHas->MaxOffset,
LUThatHas->Kind, LUThatHas->AccessTy, TLI)) {
DEBUG(dbgs() << " Deleting "; F.print(dbgs());
dbgs() << '\n');
LUThatHas->DeleteFormula(F);
--i;
--e;
Any = true;
}
}
if (Any)
LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
// Update the relocs to reference the new use.
for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
E = Fixups.end(); I != E; ++I) {
LSRFixup &Fixup = *I;
if (Fixup.LUIdx == LUIdx) {
Fixup.LUIdx = LUThatHas - &Uses.front();
Fixup.Offset += F.AM.BaseOffs;
DEBUG(errs() << "New fixup has offset "
<< Fixup.Offset << '\n');
if (Fixup.LUIdx == NumUses-1)
Fixup.LUIdx = LUIdx;
}
// Delete the old use.
DeleteUse(LU);
--LUIdx;
--NumUses;
break;
}
}
}
}
}
DEBUG(dbgs() << "After pre-selection:\n";
print_uses(dbgs()));
}
// With all other options exhausted, loop until the system is simple
// enough to handle.
SmallPtrSet<const SCEV *, 4> Taken;
while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
// Ok, we have too many of formulae on our hands to conveniently handle.
// Use a rough heuristic to thin out the list.
DEBUG(dbgs() << "The search space is too complex.\n");
// Pick the register which is used by the most LSRUses, which is likely
// to be a good reuse register candidate.
const SCEV *Best = 0;
unsigned BestNum = 0;
for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
I != E; ++I) {
const SCEV *Reg = *I;
if (Taken.count(Reg))
continue;
if (!Best)
Best = Reg;
else {
unsigned Count = RegUses.getUsedByIndices(Reg).count();
if (Count > BestNum) {
Best = Reg;
BestNum = Count;