Newer
Older
for (SmallSetVector<const SCEV *, 4>::const_iterator
I = Strides.begin(), E = Strides.end(); I != E; ++I)
for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
llvm::next(I); NewStrideIter != E; ++NewStrideIter) {
const SCEV *OldStride = *I;
const SCEV *NewStride = *NewStrideIter;
if (SE.getTypeSizeInBits(OldStride->getType()) !=
SE.getTypeSizeInBits(NewStride->getType())) {
if (SE.getTypeSizeInBits(OldStride->getType()) >
SE.getTypeSizeInBits(NewStride->getType()))
NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
else
OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
}
if (const SCEVConstant *Factor =
dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
SE, true))) {
if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
Factors.insert(Factor->getValue()->getValue().getSExtValue());
} else if (const SCEVConstant *Factor =
dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
NewStride,
SE, true))) {
if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
Factors.insert(Factor->getValue()->getValue().getSExtValue());
}
}
Evan Cheng
committed
// If all uses use the same type, don't bother looking for truncation-based
// reuse.
if (Types.size() == 1)
Types.clear();
DEBUG(print_factors_and_types(dbgs()));
void LSRInstance::CollectFixupsAndInitialFormulae() {
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
// Record the uses.
LSRFixup &LF = getNewFixup();
LF.UserInst = UI->getUser();
LF.OperandValToReplace = UI->getOperandValToReplace();
LF.PostIncLoops = UI->getPostIncLoops();
LSRUse::KindType Kind = LSRUse::Basic;
const Type *AccessTy = 0;
if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
Kind = LSRUse::Address;
AccessTy = getAccessType(LF.UserInst);
}
const SCEV *S = IU.getExpr(*UI);
// Equality (== and !=) ICmps are special. We can rewrite (i == N) as
// (N - i == 0), and this allows (N - i) to be the expression that we work
// with rather than just N or i, so we can consider the register
// requirements for both N and i at the same time. Limiting this code to
// equality icmps is not a problem because all interesting loops use
// equality icmps, thanks to IndVarSimplify.
if (ICmpInst *CI = dyn_cast<ICmpInst>(LF.UserInst))
if (CI->isEquality()) {
// Swap the operands if needed to put the OperandValToReplace on the
// left, for consistency.
Value *NV = CI->getOperand(1);
if (NV == LF.OperandValToReplace) {
CI->setOperand(1, CI->getOperand(0));
CI->setOperand(0, NV);
NV = CI->getOperand(1);
Changed = true;
}
// x == y --> x - y == 0
const SCEV *N = SE.getSCEV(NV);
if (SE.isLoopInvariant(N, L)) {
// S is normalized, so normalize N before folding it into S
// to keep the result normalized.
N = TransformForPostIncUse(Normalize, N, CI, 0,
LF.PostIncLoops, SE, DT);
Kind = LSRUse::ICmpZero;
S = SE.getMinusSCEV(N, S);
}
// -1 and the negations of all interesting strides (except the negation
// of -1) are now also interesting.
for (size_t i = 0, e = Factors.size(); i != e; ++i)
if (Factors[i] != -1)
Factors.insert(-(uint64_t)Factors[i]);
Factors.insert(-1);
}
// Set up the initial formula for this use.
std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
LF.LUIdx = P.first;
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
if (!LU.WidestFixupType ||
SE.getTypeSizeInBits(LU.WidestFixupType) <
SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
LU.WidestFixupType = LF.OperandValToReplace->getType();
// If this is the first use of this LSRUse, give it a formula.
if (LU.Formulae.empty()) {
InsertInitialFormula(S, LU, LF.LUIdx);
CountRegisters(LU.Formulae.back(), LF.LUIdx);
}
}
DEBUG(print_fixups(dbgs()));
}
/// InsertInitialFormula - Insert a formula for the given expression into
/// the given use, separating out loop-variant portions from loop-invariant
/// and loop-computable portions.
LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
Formula F;
F.InitialMatch(S, L, SE);
bool Inserted = InsertFormula(LU, LUIdx, F);
assert(Inserted && "Initial formula already exists!"); (void)Inserted;
}
/// InsertSupplementalFormula - Insert a simple single-register formula for
/// the given expression into the given use.
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
void
LSRInstance::InsertSupplementalFormula(const SCEV *S,
LSRUse &LU, size_t LUIdx) {
Formula F;
F.BaseRegs.push_back(S);
F.AM.HasBaseReg = true;
bool Inserted = InsertFormula(LU, LUIdx, F);
assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
}
/// CountRegisters - Note which registers are used by the given formula,
/// updating RegUses.
void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
if (F.ScaledReg)
RegUses.CountRegister(F.ScaledReg, LUIdx);
for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
E = F.BaseRegs.end(); I != E; ++I)
RegUses.CountRegister(*I, LUIdx);
}
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
CountRegisters(F, LUIdx);
return true;
}
/// CollectLoopInvariantFixupsAndFormulae - Check for other uses of
/// loop-invariant values which we're tracking. These other uses will pin these
/// values in registers, making them less profitable for elimination.
/// TODO: This currently misses non-constant addrec step registers.
/// TODO: Should this give more weight to users inside the loop?
void
LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
SmallPtrSet<const SCEV *, 8> Inserted;
while (!Worklist.empty()) {
const SCEV *S = Worklist.pop_back_val();
if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
Worklist.append(N->op_begin(), N->op_end());
else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
Worklist.push_back(C->getOperand());
else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
Worklist.push_back(D->getLHS());
Worklist.push_back(D->getRHS());
} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
if (!Inserted.insert(U)) continue;
const Value *V = U->getValue();
if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
// Look for instructions defined outside the loop.
if (L->contains(Inst)) continue;
} else if (isa<UndefValue>(V))
// Undef doesn't have a live range, so it doesn't matter.
continue;
for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
UI != UE; ++UI) {
const Instruction *UserInst = dyn_cast<Instruction>(*UI);
// Ignore non-instructions.
if (!UserInst)
continue;
// Ignore instructions in other functions (as can happen with
// Constants).
if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
continue;
// Ignore instructions not dominated by the loop.
const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
UserInst->getParent() :
cast<PHINode>(UserInst)->getIncomingBlock(
PHINode::getIncomingValueNumForOperand(UI.getOperandNo()));
if (!DT.dominates(L->getHeader(), UseBB))
continue;
// Ignore uses which are part of other SCEV expressions, to avoid
// analyzing them multiple times.
if (SE.isSCEVable(UserInst->getType())) {
const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));
// If the user is a no-op, look through to its uses.
if (!isa<SCEVUnknown>(UserS))
continue;
if (UserS == U) {
Worklist.push_back(
SE.getUnknown(const_cast<Instruction *>(UserInst)));
continue;
}
}
// Ignore icmp instructions which are already being analyzed.
if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
unsigned OtherIdx = !UI.getOperandNo();
Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
continue;
}
LSRFixup &LF = getNewFixup();
LF.UserInst = const_cast<Instruction *>(UserInst);
LF.OperandValToReplace = UI.getUse();
std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, 0);
LF.LUIdx = P.first;
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
if (!LU.WidestFixupType ||
SE.getTypeSizeInBits(LU.WidestFixupType) <
SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
LU.WidestFixupType = LF.OperandValToReplace->getType();
InsertSupplementalFormula(U, LU, LF.LUIdx);
CountRegisters(LU.Formulae.back(), Uses.size() - 1);
break;
}
}
}
}
/// CollectSubexprs - Split S into subexpressions which can be pulled out into
/// separate registers. If C is non-null, multiply each subexpression by C.
static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
SmallVectorImpl<const SCEV *> &Ops,
const Loop *L,
ScalarEvolution &SE) {
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
// Break out add operands.
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I)
CollectSubexprs(*I, C, Ops, L, SE);
return;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Split a non-zero base out of an addrec.
if (!AR->getStart()->isZero()) {
CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
AR->getStepRecurrence(SE),
AR->getLoop(),
//FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
SCEV::FlagAnyWrap),
C, Ops, L, SE);
CollectSubexprs(AR->getStart(), C, Ops, L, SE);
return;
}
} else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
// Break (C * (a + b + c)) into C*a + C*b + C*c.
if (Mul->getNumOperands() == 2)
if (const SCEVConstant *Op0 =
dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
CollectSubexprs(Mul->getOperand(1),
C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
Ops, L, SE);
return;
}
}
// Otherwise use the value itself, optionally with a scale applied.
Ops.push_back(C ? SE.getMulExpr(C, S) : S);
}
/// GenerateReassociations - Split out subexpressions from adds and the bases of
/// addrecs.
void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
Formula Base,
unsigned Depth) {
// Arbitrarily cap recursion to protect compile time.
if (Depth >= 3) return;
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
const SCEV *BaseReg = Base.BaseRegs[i];
SmallVector<const SCEV *, 8> AddOps;
CollectSubexprs(BaseReg, 0, AddOps, L, SE);
if (AddOps.size() == 1) continue;
for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
JE = AddOps.end(); J != JE; ++J) {
// Loop-variant "unknown" values are uninteresting; we won't be able to
// do anything meaningful with them.
if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
continue;
// Don't pull a constant into a register if the constant could be folded
// into an immediate field.
if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
Base.getNumRegs() > 1,
LU.Kind, LU.AccessTy, TLI, SE))
continue;
// Collect all operands except *J.
SmallVector<const SCEV *, 8> InnerAddOps
(((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
InnerAddOps.append
(llvm::next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
// Don't leave just a constant behind in a register if the constant could
// be folded into an immediate field.
if (InnerAddOps.size() == 1 &&
isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset,
Base.getNumRegs() > 1,
LU.Kind, LU.AccessTy, TLI, SE))
continue;
const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
if (InnerSum->isZero())
continue;
Formula F = Base;
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
// Add the remaining pieces of the add back into the new formula.
const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
if (TLI && InnerSumSC &&
SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
InnerSumSC->getValue()->getZExtValue())) {
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
InnerSumSC->getValue()->getZExtValue();
F.BaseRegs.erase(F.BaseRegs.begin() + i);
} else
F.BaseRegs[i] = InnerSum;
// Add J as its own register, or an unfolded immediate.
const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
if (TLI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
SC->getValue()->getZExtValue()))
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
SC->getValue()->getZExtValue();
else
F.BaseRegs.push_back(*J);
if (InsertFormula(LU, LUIdx, F))
// If that formula hadn't been seen before, recurse to find more like
// it.
GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1);
}
}
}
/// GenerateCombinations - Generate a formula consisting of all of the
/// loop-dominating registers added into a single register.
void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
// This method is only interesting on a plurality of registers.
if (Base.BaseRegs.size() <= 1) return;
Formula F = Base;
F.BaseRegs.clear();
SmallVector<const SCEV *, 4> Ops;
for (SmallVectorImpl<const SCEV *>::const_iterator
I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) {
const SCEV *BaseReg = *I;
if (SE.properlyDominates(BaseReg, L->getHeader()) &&
!SE.hasComputableLoopEvolution(BaseReg, L))
Ops.push_back(BaseReg);
else
F.BaseRegs.push_back(BaseReg);
}
if (Ops.size() > 1) {
const SCEV *Sum = SE.getAddExpr(Ops);
// TODO: If Sum is zero, it probably means ScalarEvolution missed an
// opportunity to fold something. For now, just ignore such cases
if (!Sum->isZero()) {
F.BaseRegs.push_back(Sum);
(void)InsertFormula(LU, LUIdx, F);
}
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
}
}
/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets.
void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// We can't add a symbolic offset if the address already contains one.
if (Base.AM.BaseGV) return;
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
const SCEV *G = Base.BaseRegs[i];
GlobalValue *GV = ExtractSymbol(G, SE);
if (G->isZero() || !GV)
continue;
Formula F = Base;
F.AM.BaseGV = GV;
if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
LU.Kind, LU.AccessTy, TLI))
continue;
F.BaseRegs[i] = G;
(void)InsertFormula(LU, LUIdx, F);
}
}
/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// TODO: For now, just add the min and max offset, because it usually isn't
// worthwhile looking at everything inbetween.
SmallVector<int64_t, 2> Worklist;
Worklist.push_back(LU.MinOffset);
if (LU.MaxOffset != LU.MinOffset)
Worklist.push_back(LU.MaxOffset);
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
const SCEV *G = Base.BaseRegs[i];
for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
E = Worklist.end(); I != E; ++I) {
Formula F = Base;
F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
LU.Kind, LU.AccessTy, TLI)) {
// Add the offset to the base register.
const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
// If it cancelled out, drop the base register, otherwise update it.
if (NewG->isZero()) {
std::swap(F.BaseRegs[i], F.BaseRegs.back());
F.BaseRegs.pop_back();
} else
F.BaseRegs[i] = NewG;
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
(void)InsertFormula(LU, LUIdx, F);
}
}
int64_t Imm = ExtractImmediate(G, SE);
if (G->isZero() || Imm == 0)
continue;
Formula F = Base;
F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm;
if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
LU.Kind, LU.AccessTy, TLI))
continue;
F.BaseRegs[i] = G;
(void)InsertFormula(LU, LUIdx, F);
}
}
/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up
/// the comparison. For example, x == y -> x*c == y*c.
void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
Formula Base) {
if (LU.Kind != LSRUse::ICmpZero) return;
// Determine the integer type for the base formula.
const Type *IntTy = Base.getType();
if (!IntTy) return;
if (SE.getTypeSizeInBits(IntTy) > 64) return;
// Don't do this if there is more than one offset.
if (LU.MinOffset != LU.MaxOffset) return;
assert(!Base.AM.BaseGV && "ICmpZero use is not legal!");
// Check each interesting stride.
for (SmallSetVector<int64_t, 8>::const_iterator
I = Factors.begin(), E = Factors.end(); I != E; ++I) {
int64_t Factor = *I;
// Check that the multiplication doesn't overflow.
if (Base.AM.BaseOffs == INT64_MIN && Factor == -1)
int64_t NewBaseOffs = (uint64_t)Base.AM.BaseOffs * Factor;
if (NewBaseOffs / Factor != Base.AM.BaseOffs)
continue;
// Check that multiplying with the use offset doesn't overflow.
int64_t Offset = LU.MinOffset;
if (Offset == INT64_MIN && Factor == -1)
continue;
Offset = (uint64_t)Offset * Factor;
continue;
Formula F = Base;
F.AM.BaseOffs = NewBaseOffs;
// Check that this scale is legal.
if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
continue;
// Compensate for the use having MinOffset built into it.
F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Offset - LU.MinOffset;
const SCEV *FactorS = SE.getConstant(IntTy, Factor);
// Check that multiplying with each base register doesn't overflow.
for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
goto next;
}
// Check that multiplying with the scaled register doesn't overflow.
if (F.ScaledReg) {
F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
continue;
}
// Check that multiplying with the unfolded offset doesn't overflow.
if (F.UnfoldedOffset != 0) {
Dan Gohman
committed
if (F.UnfoldedOffset == INT64_MIN && Factor == -1)
continue;
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
continue;
}
// If we make it here and it's legal, add it.
(void)InsertFormula(LU, LUIdx, F);
next:;
}
}
/// GenerateScales - Generate stride factor reuse formulae by making use of
/// scaled-offset address modes, for example.
void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
// Determine the integer type for the base formula.
const Type *IntTy = Base.getType();
if (!IntTy) return;
// If this Formula already has a scaled register, we can't add another one.
if (Base.AM.Scale != 0) return;
// Check each interesting stride.
for (SmallSetVector<int64_t, 8>::const_iterator
I = Factors.begin(), E = Factors.end(); I != E; ++I) {
int64_t Factor = *I;
Base.AM.Scale = Factor;
Base.AM.HasBaseReg = Base.BaseRegs.size() > 1;
// Check whether this scale is going to be legal.
if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
LU.Kind, LU.AccessTy, TLI)) {
// As a special-case, handle special out-of-loop Basic users specially.
// TODO: Reconsider this special case.
if (LU.Kind == LSRUse::Basic &&
isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
LSRUse::Special, LU.AccessTy, TLI) &&
LU.AllFixupsOutsideLoop)
LU.Kind = LSRUse::Special;
else
continue;
}
// For an ICmpZero, negating a solitary base register won't lead to
// new solutions.
if (LU.Kind == LSRUse::ICmpZero &&
!Base.AM.HasBaseReg && Base.AM.BaseOffs == 0 && !Base.AM.BaseGV)
continue;
// For each addrec base reg, apply the scale, if possible.
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
if (const SCEVAddRecExpr *AR =
dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
const SCEV *FactorS = SE.getConstant(IntTy, Factor);
if (FactorS->isZero())
continue;
// Divide out the factor, ignoring high bits, since we'll be
// scaling the value back up in the end.
if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) {
// TODO: This could be optimized to avoid all the copying.
Formula F = Base;
F.ScaledReg = Quotient;
F.DeleteBaseReg(F.BaseRegs[i]);
(void)InsertFormula(LU, LUIdx, F);
}
}
}
}
/// GenerateTruncates - Generate reuse formulae from different IV types.
void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
// This requires TargetLowering to tell us which truncates are free.
if (!TLI) return;
// Don't bother truncating symbolic values.
if (Base.AM.BaseGV) return;
// Determine the integer type for the base formula.
const Type *DstTy = Base.getType();
if (!DstTy) return;
DstTy = SE.getEffectiveSCEVType(DstTy);
for (SmallSetVector<const Type *, 4>::const_iterator
I = Types.begin(), E = Types.end(); I != E; ++I) {
const Type *SrcTy = *I;
if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
Formula F = Base;
if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(),
JE = F.BaseRegs.end(); J != JE; ++J)
*J = SE.getAnyExtendExpr(*J, SrcTy);
// TODO: This assumes we've done basic processing on all uses and
// have an idea what the register usage is.
if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
continue;
(void)InsertFormula(LU, LUIdx, F);
}
}
}
namespace {
/// WorkItem - Helper class for GenerateCrossUseConstantOffsets. It's used to
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
/// defer modifications so that the search phase doesn't have to worry about
/// the data structures moving underneath it.
struct WorkItem {
size_t LUIdx;
int64_t Imm;
const SCEV *OrigReg;
WorkItem(size_t LI, int64_t I, const SCEV *R)
: LUIdx(LI), Imm(I), OrigReg(R) {}
void print(raw_ostream &OS) const;
void dump() const;
};
}
void WorkItem::print(raw_ostream &OS) const {
OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
<< " , add offset " << Imm;
}
void WorkItem::dump() const {
print(errs()); errs() << '\n';
}
/// GenerateCrossUseConstantOffsets - Look for registers which are a constant
/// distance apart and try to form reuse opportunities between them.
void LSRInstance::GenerateCrossUseConstantOffsets() {
// Group the registers by their value without any added constant offset.
typedef std::map<int64_t, const SCEV *> ImmMapTy;
typedef DenseMap<const SCEV *, ImmMapTy> RegMapTy;
RegMapTy Map;
DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
SmallVector<const SCEV *, 8> Sequence;
for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
I != E; ++I) {
const SCEV *Reg = *I;
int64_t Imm = ExtractImmediate(Reg, SE);
std::pair<RegMapTy::iterator, bool> Pair =
Map.insert(std::make_pair(Reg, ImmMapTy()));
if (Pair.second)
Sequence.push_back(Reg);
Pair.first->second.insert(std::make_pair(Imm, *I));
UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(*I);
}
// Now examine each set of registers with the same base value. Build up
// a list of work to do and do the work in a separate step so that we're
// not adding formulae and register counts while we're searching.
SmallVector<WorkItem, 32> WorkItems;
SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems;
for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
E = Sequence.end(); I != E; ++I) {
const SCEV *Reg = *I;
const ImmMapTy &Imms = Map.find(Reg)->second;
// It's not worthwhile looking for reuse if there's only one offset.
if (Imms.size() == 1)
continue;
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
J != JE; ++J)
dbgs() << ' ' << J->first;
dbgs() << '\n');
// Examine each offset.
for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
J != JE; ++J) {
const SCEV *OrigReg = J->second;
int64_t JImm = J->first;
const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
if (!isa<SCEVConstant>(OrigReg) &&
UsedByIndicesMap[Reg].count() == 1) {
DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg << '\n');
continue;
}
// Conservatively examine offsets between this orig reg a few selected
// other orig regs.
ImmMapTy::const_iterator OtherImms[] = {
Imms.begin(), prior(Imms.end()),
Imms.lower_bound((Imms.begin()->first + prior(Imms.end())->first) / 2)
};
for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
ImmMapTy::const_iterator M = OtherImms[i];
if (M == J || M == JE) continue;
// Compute the difference between the two.
int64_t Imm = (uint64_t)JImm - M->first;
for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1;
LUIdx = UsedByIndices.find_next(LUIdx))
// Make a memo of this use, offset, and register tuple.
if (UniqueItems.insert(std::make_pair(LUIdx, Imm)))
WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
}
}
}
Map.clear();
Sequence.clear();
UsedByIndicesMap.clear();
UniqueItems.clear();
// Now iterate through the worklist and add new formulae.
for (SmallVectorImpl<WorkItem>::const_iterator I = WorkItems.begin(),
E = WorkItems.end(); I != E; ++I) {
const WorkItem &WI = *I;
size_t LUIdx = WI.LUIdx;
LSRUse &LU = Uses[LUIdx];
int64_t Imm = WI.Imm;
const SCEV *OrigReg = WI.OrigReg;
const Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
// Use the immediate in the scaled register.
if (F.ScaledReg == OrigReg) {
int64_t Offs = (uint64_t)F.AM.BaseOffs +
Imm * (uint64_t)F.AM.Scale;
// Don't create 50 + reg(-50).
if (F.referencesReg(SE.getSCEV(
ConstantInt::get(IntTy, -(uint64_t)Offs))))
continue;
Formula NewF = F;
NewF.AM.BaseOffs = Offs;
if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
LU.Kind, LU.AccessTy, TLI))
continue;
NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
// If the new scale is a constant in a register, and adding the constant
// value to the immediate would produce a value closer to zero than the
// immediate itself, then the formula isn't worthwhile.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
if (C->getValue()->isNegative() !=
(NewF.AM.BaseOffs < 0) &&
(C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale))
.ule(abs64(NewF.AM.BaseOffs)))
continue;
// OK, looks good.
(void)InsertFormula(LU, LUIdx, NewF);
} else {
// Use the immediate in a base register.
for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
const SCEV *BaseReg = F.BaseRegs[N];
if (BaseReg != OrigReg)
continue;
Formula NewF = F;
NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
LU.Kind, LU.AccessTy, TLI)) {
if (!TLI ||
!TLI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
continue;
NewF = F;
NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
}
NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
// If the new formula has a constant in a register, and adding the
// constant value to the immediate would produce a value closer to
// zero than the immediate itself, then the formula isn't worthwhile.
for (SmallVectorImpl<const SCEV *>::const_iterator
J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end();
J != JE; ++J)
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
if ((C->getValue()->getValue() + NewF.AM.BaseOffs).abs().slt(
abs64(NewF.AM.BaseOffs)) &&
(C->getValue()->getValue() +
NewF.AM.BaseOffs).countTrailingZeros() >=
CountTrailingZeros_64(NewF.AM.BaseOffs))
goto skip_formula;
// Ok, looks good.
(void)InsertFormula(LU, LUIdx, NewF);
break;
skip_formula:;
}
}
}
}
}
/// GenerateAllReuseFormulae - Generate formulae for each use.
void
LSRInstance::GenerateAllReuseFormulae() {
// This is split into multiple loops so that hasRegsUsedByUsesOtherThan
// queries are more precise.
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
}
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateScales(LU, LUIdx, LU.Formulae[i]);
}
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
}
GenerateCrossUseConstantOffsets();
DEBUG(dbgs() << "\n"
"After generating reuse formulae:\n";
print_uses(dbgs()));
/// If there are multiple formulae with the same set of registers used
/// by other uses, pick the best one and delete the others.
void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
DenseSet<const SCEV *> VisitedRegs;
SmallPtrSet<const SCEV *, 16> Regs;
#ifndef NDEBUG
bool ChangedFormulae = false;
#endif
// Collect the best formula for each unique set of shared registers. This
// is reset for each use.
typedef DenseMap<SmallVector<const SCEV *, 2>, size_t, UniquifierDenseMapInfo>
BestFormulaeTy;
BestFormulaeTy BestFormulae;
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n');
bool Any = false;
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
for (size_t FIdx = 0, NumForms = LU.Formulae.size();
FIdx != NumForms; ++FIdx) {
Formula &F = LU.Formulae[FIdx];
SmallVector<const SCEV *, 2> Key;
for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
JE = F.BaseRegs.end(); J != JE; ++J) {
const SCEV *Reg = *J;
if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
Key.push_back(Reg);
}
if (F.ScaledReg &&
RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for
// uniquifying.
std::sort(Key.begin(), Key.end());
std::pair<BestFormulaeTy::const_iterator, bool> P =
BestFormulae.insert(std::make_pair(Key, FIdx));
if (!P.second) {
Formula &Best = LU.Formulae[P.first->second];
Cost CostF;
CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
Regs.clear();
Cost CostBest;
CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
Regs.clear();
if (CostF < CostBest)
std::swap(F, Best);
DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
dbgs() << "\n"
" in favor of formula "; Best.print(dbgs());
dbgs() << '\n');
#ifndef NDEBUG
ChangedFormulae = true;
LU.DeleteFormula(F);
--FIdx;
--NumForms;
Any = true;
continue;
}
// Now that we've filtered out some formulae, recompute the Regs set.
if (Any)
LU.RecomputeRegs(LUIdx, RegUses);
// Reset this to prepare for the next use.
BestFormulae.clear();
}
DEBUG(if (ChangedFormulae) {
dbgs() << "\n"
"After filtering out undesirable candidates:\n";
print_uses(dbgs());
});
}
// This is a rough guess that seems to work fairly well.
static const size_t ComplexityLimit = UINT16_MAX;
/// EstimateSearchSpaceComplexity - Estimate the worst-case number of
/// solutions the solver might have to consider. It almost never considers
/// this many solutions because it prune the search space, but the pruning
/// isn't always sufficient.
size_t LSRInstance::EstimateSearchSpaceComplexity() const {
for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
E = Uses.end(); I != E; ++I) {
size_t FSize = I->Formulae.size();
if (FSize >= ComplexityLimit) {
Power = ComplexityLimit;
break;
}
Power *= FSize;
if (Power >= ComplexityLimit)
break;
}
return Power;
}
/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset
/// of the registers of another formula, it won't help reduce register
/// pressure (though it may not necessarily hurt register pressure); remove
/// it to simplify the system.
void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
DEBUG(dbgs() << "The search space is too complex.\n");
DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
"which use a superset of registers used by other "
"formulae.\n");
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
bool Any = false;
for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
Formula &F = LU.Formulae[i];
// Look for a formula with a constant or GV in a register. If the use
// also has a formula with that same value in an immediate field,
// delete the one that uses a register.
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
for (SmallVectorImpl<const SCEV *>::const_iterator
I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
Formula NewF = F;
NewF.AM.BaseOffs += C->getValue()->getSExtValue();
NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
(I - F.BaseRegs.begin()));
if (LU.HasFormulaWithSameRegs(NewF)) {
DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
LU.DeleteFormula(F);
--i;
--e;
Any = true;
break;
}
} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
if (!F.AM.BaseGV) {
Formula NewF = F;
NewF.AM.BaseGV = GV;
NewF.BaseRegs.erase(NewF.BaseRegs.begin() +