Newer
Older
unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVectorImpl<LiveInterval*> &NewVRegs) {
Jakob Stoklund Olesen
committed
float BestCost = Hysteresis * calcSpillCost();
DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n');
const unsigned NoCand = ~0u;
unsigned BestCand = NoCand;
Jakob Stoklund Olesen
committed
Order.rewind();
while (unsigned PhysReg = Order.next()) {
// Discard bad candidates before we run out of interference cache cursors.
// This will only affect register classes with a lot of registers (>32).
if (NumCands == IntfCache.getMaxCursors()) {
unsigned WorstCount = ~0u;
unsigned Worst = 0;
for (unsigned i = 0; i != NumCands; ++i) {
if (i == BestCand)
continue;
unsigned Count = GlobalCand[i].LiveBundles.count();
if (Count < WorstCount)
Worst = i, WorstCount = Count;
}
--NumCands;
GlobalCand[Worst] = GlobalCand[NumCands];
}
if (GlobalCand.size() <= NumCands)
GlobalCand.resize(NumCands+1);
GlobalSplitCandidate &Cand = GlobalCand[NumCands];
Cand.reset(IntfCache, PhysReg);
Jakob Stoklund Olesen
committed
SpillPlacer->prepare(Cand.LiveBundles);
Jakob Stoklund Olesen
committed
float Cost;
if (!addSplitConstraints(Cand.Intf, Cost)) {
Jakob Stoklund Olesen
committed
DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n");
Jakob Stoklund Olesen
committed
continue;
}
Jakob Stoklund Olesen
committed
DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = " << Cost);
Jakob Stoklund Olesen
committed
if (Cost >= BestCost) {
DEBUG({
if (BestCand == NoCand)
dbgs() << " worse than no bundles\n";
else
dbgs() << " worse than "
<< PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
});
continue;
Jakob Stoklund Olesen
committed
SpillPlacer->finish();
// No live bundles, defer to splitSingleBlocks().
if (!Cand.LiveBundles.any()) {
DEBUG(dbgs() << " no bundles.\n");
continue;
Cost += calcGlobalSplitCost(Cand);
DEBUG({
dbgs() << ", total = " << Cost << " with bundles";
for (int i = Cand.LiveBundles.find_first(); i>=0;
i = Cand.LiveBundles.find_next(i))
dbgs() << " EB#" << i;
dbgs() << ".\n";
});
Jakob Stoklund Olesen
committed
if (Cost < BestCost) {
Jakob Stoklund Olesen
committed
BestCost = Hysteresis * Cost; // Prevent rounding effects.
}
}
return 0;
splitAroundRegion(VirtReg, GlobalCand[BestCand], NewVRegs);
return 0;
}
//===----------------------------------------------------------------------===//
// Local Splitting
//===----------------------------------------------------------------------===//
/// calcGapWeights - Compute the maximum spill weight that needs to be evicted
/// in order to use PhysReg between two entries in SA->UseSlots.
///
/// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1].
///
void RAGreedy::calcGapWeights(unsigned PhysReg,
SmallVectorImpl<float> &GapWeight) {
Jakob Stoklund Olesen
committed
assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
const unsigned NumGaps = Uses.size()-1;
// Start and end points for the interference check.
SlotIndex StartIdx = BI.LiveIn ? BI.FirstUse.getBaseIndex() : BI.FirstUse;
SlotIndex StopIdx = BI.LiveOut ? BI.LastUse.getBoundaryIndex() : BI.LastUse;
GapWeight.assign(NumGaps, 0.0f);
// Add interference from each overlapping register.
for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
if (!query(const_cast<LiveInterval&>(SA->getParent()), *AI)
.checkInterference())
continue;
// We know that VirtReg is a continuous interval from FirstUse to LastUse,
// so we don't need InterferenceQuery.
//
// Interference that overlaps an instruction is counted in both gaps
// surrounding the instruction. The exception is interference before
// StartIdx and after StopIdx.
//
LiveIntervalUnion::SegmentIter IntI = PhysReg2LiveUnion[*AI].find(StartIdx);
for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
// Skip the gaps before IntI.
while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
if (++Gap == NumGaps)
break;
if (Gap == NumGaps)
break;
// Update the gaps covered by IntI.
const float weight = IntI.value()->weight;
for (; Gap != NumGaps; ++Gap) {
GapWeight[Gap] = std::max(GapWeight[Gap], weight);
if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
break;
}
if (Gap == NumGaps)
break;
}
}
}
/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
/// basic block.
///
unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVectorImpl<LiveInterval*> &NewVRegs) {
Jakob Stoklund Olesen
committed
assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
// Note that it is possible to have an interval that is live-in or live-out
// while only covering a single block - A phi-def can use undef values from
// predecessors, and the block could be a single-block loop.
// We don't bother doing anything clever about such a case, we simply assume
// that the interval is continuous from FirstUse to LastUse. We should make
// sure that we don't do anything illegal to such an interval, though.
const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
if (Uses.size() <= 2)
return 0;
const unsigned NumGaps = Uses.size()-1;
DEBUG({
dbgs() << "tryLocalSplit: ";
for (unsigned i = 0, e = Uses.size(); i != e; ++i)
dbgs() << ' ' << SA->UseSlots[i];
dbgs() << '\n';
});
// Since we allow local split results to be split again, there is a risk of
// creating infinite loops. It is tempting to require that the new live
// ranges have less instructions than the original. That would guarantee
// convergence, but it is too strict. A live range with 3 instructions can be
// split 2+3 (including the COPY), and we want to allow that.
//
// Instead we use these rules:
//
// 1. Allow any split for ranges with getStage() < RS_Local. (Except for the
// noop split, of course).
// 2. Require progress be made for ranges with getStage() >= RS_Local. All
// the new ranges must have fewer instructions than before the split.
// 3. New ranges with the same number of instructions are marked RS_Local,
// smaller ranges are marked RS_New.
//
// These rules allow a 3 -> 2+3 split once, which we need. They also prevent
// excessive splitting and infinite loops.
//
bool ProgressRequired = getStage(VirtReg) >= RS_Local;
// Best split candidate.
unsigned BestBefore = NumGaps;
unsigned BestAfter = 0;
float BestDiff = 0;
const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB->getNumber());
SmallVector<float, 8> GapWeight;
Order.rewind();
while (unsigned PhysReg = Order.next()) {
// Keep track of the largest spill weight that would need to be evicted in
// order to make use of PhysReg between UseSlots[i] and UseSlots[i+1].
calcGapWeights(PhysReg, GapWeight);
// Try to find the best sequence of gaps to close.
// The new spill weight must be larger than any gap interference.
// We will split before Uses[SplitBefore] and after Uses[SplitAfter].
unsigned SplitBefore = 0, SplitAfter = 1;
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
// MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
// It is the spill weight that needs to be evicted.
float MaxGap = GapWeight[0];
for (;;) {
// Live before/after split?
const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' '
<< Uses[SplitBefore] << '-' << Uses[SplitAfter]
<< " i=" << MaxGap);
// Stop before the interval gets so big we wouldn't be making progress.
if (!LiveBefore && !LiveAfter) {
DEBUG(dbgs() << " all\n");
break;
}
// Should the interval be extended or shrunk?
bool Shrink = true;
// How many gaps would the new range have?
unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
// Legally, without causing looping?
bool Legal = !ProgressRequired || NewGaps < NumGaps;
if (Legal && MaxGap < HUGE_VALF) {
// Estimate the new spill weight. Each instruction reads or writes the
// register. Conservatively assume there are no read-modify-write
// instructions.
// Try to guess the size of the new interval.
const float EstWeight = normalizeSpillWeight(blockFreq * (NewGaps + 1),
Uses[SplitBefore].distance(Uses[SplitAfter]) +
(LiveBefore + LiveAfter)*SlotIndex::InstrDist);
// Would this split be possible to allocate?
// Never allocate all gaps, we wouldn't be making progress.
DEBUG(dbgs() << " w=" << EstWeight);
if (EstWeight * Hysteresis >= MaxGap) {
float Diff = EstWeight - MaxGap;
if (Diff > BestDiff) {
DEBUG(dbgs() << " (best)");
BestDiff = Hysteresis * Diff;
BestBefore = SplitBefore;
BestAfter = SplitAfter;
}
}
}
// Try to shrink.
if (Shrink) {
if (++SplitBefore < SplitAfter) {
DEBUG(dbgs() << " shrink\n");
// Recompute the max when necessary.
if (GapWeight[SplitBefore - 1] >= MaxGap) {
MaxGap = GapWeight[SplitBefore];
for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i)
MaxGap = std::max(MaxGap, GapWeight[i]);
}
continue;
}
MaxGap = 0;
}
// Try to extend the interval.
if (SplitAfter >= NumGaps) {
DEBUG(dbgs() << " end\n");
break;
}
DEBUG(dbgs() << " extend\n");
MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
}
}
// Didn't find any candidates?
if (BestBefore == NumGaps)
return 0;
DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore]
<< '-' << Uses[BestAfter] << ", " << BestDiff
<< ", " << (BestAfter - BestBefore + 1) << " instrs\n");
LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
Jakob Stoklund Olesen
committed
SE->reset(LREdit);
SE->openIntv();
SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]);
SE->useIntv(SegStart, SegStop);
SmallVector<unsigned, 8> IntvMap;
SE->finish(&IntvMap);
DebugVars->splitRegister(VirtReg.reg, LREdit.regs());
// If the new range has the same number of instructions as before, mark it as
// RS_Local so the next split will be forced to make progress. Otherwise,
// leave the new intervals as RS_New so they can compete.
bool LiveBefore = BestBefore != 0 || BI.LiveIn;
bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
if (NewGaps >= NumGaps) {
DEBUG(dbgs() << "Tagging non-progress ranges: ");
assert(!ProgressRequired && "Didn't make progress when it was required.");
for (unsigned i = 0, e = IntvMap.size(); i != e; ++i)
if (IntvMap[i] == 1) {
setStage(*LREdit.get(i), RS_Local);
DEBUG(dbgs() << PrintReg(LREdit.get(i)->reg));
}
DEBUG(dbgs() << '\n');
}
//===----------------------------------------------------------------------===//
// Live Range Splitting
//===----------------------------------------------------------------------===//
/// trySplit - Try to split VirtReg or one of its interferences, making it
/// assignable.
/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVectorImpl<LiveInterval*>&NewVRegs) {
// Local intervals are handled separately.
if (LIS->intervalIsInOneMBB(VirtReg)) {
NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled);
Jakob Stoklund Olesen
committed
SA->analyze(&VirtReg);
return tryLocalSplit(VirtReg, Order, NewVRegs);
}
NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled);
Jakob Stoklund Olesen
committed
// Don't iterate global splitting.
// Move straight to spilling if this range was produced by a global split.
if (getStage(VirtReg) >= RS_Global)
Jakob Stoklund Olesen
committed
return 0;
SA->analyze(&VirtReg);
// FIXME: SplitAnalysis may repair broken live ranges coming from the
// coalescer. That may cause the range to become allocatable which means that
// tryRegionSplit won't be making progress. This check should be replaced with
// an assertion when the coalescer is fixed.
if (SA->didRepairRange()) {
// VirtReg has changed, so all cached queries are invalid.
Jakob Stoklund Olesen
committed
invalidateVirtRegs();
if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
return PhysReg;
}
// First try to split around a region spanning multiple blocks.
unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
if (PhysReg || !NewVRegs.empty())
return PhysReg;
// Then isolate blocks with multiple uses.
SplitAnalysis::BlockPtrSet Blocks;
if (SA->getMultiUseBlocks(Blocks)) {
LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
SE->reset(LREdit);
SE->splitSingleBlocks(Blocks);
setStage(NewVRegs.begin(), NewVRegs.end(), RS_Global);
if (VerifyEnabled)
MF->verify(this, "After splitting live range around basic blocks");
}
// Don't assign any physregs.
return 0;
}
Jakob Stoklund Olesen
committed
//===----------------------------------------------------------------------===//
// Main Entry Point
//===----------------------------------------------------------------------===//
unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
SmallVectorImpl<LiveInterval*> &NewVRegs) {
Jakob Stoklund Olesen
committed
// First try assigning a free register.
Jakob Stoklund Olesen
committed
AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
return PhysReg;
LiveRangeStage Stage = getStage(VirtReg);
DEBUG(dbgs() << StageName[Stage]
<< " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n');
Jakob Stoklund Olesen
committed
// Try to evict a less worthy live range, but only for ranges from the primary
// queue. The RS_Second ranges already failed to do this, and they should not
// get a second chance until they have been split.
if (Stage != RS_Second)
if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs))
return PhysReg;
assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");
// The first time we see a live range, don't try to split or spill.
// Wait until the second time, when all smaller ranges have been allocated.
// This gives a better picture of the interference to split around.
if (Stage == RS_First) {
setStage(VirtReg, RS_Second);
NewVRegs.push_back(&VirtReg);
return 0;
}
Jakob Stoklund Olesen
committed
// If we couldn't allocate a register from spilling, there is probably some
// invalid inline assembly. The base class wil report it.
if (Stage >= RS_Spill || !VirtReg.isSpillable())
Jakob Stoklund Olesen
committed
return ~0u;
Jakob Stoklund Olesen
committed
Jakob Stoklund Olesen
committed
// Try splitting VirtReg or interferences.
unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
if (PhysReg || !NewVRegs.empty())
Jakob Stoklund Olesen
committed
// Finally spill VirtReg itself.
NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
LiveRangeEdit LRE(VirtReg, NewVRegs, this);
spiller().spill(LRE);
Jakob Stoklund Olesen
committed
setStage(NewVRegs.begin(), NewVRegs.end(), RS_Spill);
if (VerifyEnabled)
MF->verify(this, "After spilling");
// The live virtual register requesting allocation was spilled, so tell
// the caller not to allocate anything during this round.
return 0;
}
bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
<< "********** Function: "
<< ((Value*)mf.getFunction())->getName() << '\n');
MF = &mf;
Jakob Stoklund Olesen
committed
if (VerifyEnabled)
MF->verify(this, "Before greedy register allocator");
Jakob Stoklund Olesen
committed
RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>());
Indexes = &getAnalysis<SlotIndexes>();
DomTree = &getAnalysis<MachineDominatorTree>();
Jakob Stoklund Olesen
committed
SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
Jakob Stoklund Olesen
committed
Loops = &getAnalysis<MachineLoopInfo>();
Bundles = &getAnalysis<EdgeBundles>();
SpillPlacer = &getAnalysis<SpillPlacement>();
DebugVars = &getAnalysis<LiveDebugVariables>();
Jakob Stoklund Olesen
committed
SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
Jakob Stoklund Olesen
committed
SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree));
ExtraRegInfo.clear();
ExtraRegInfo.resize(MRI->getNumVirtRegs());
NextCascade = 1;
IntfCache.init(MF, &PhysReg2LiveUnion[0], Indexes, TRI);
Jakob Stoklund Olesen
committed
allocatePhysRegs();
addMBBLiveIns(MF);
Jakob Stoklund Olesen
committed
LIS->addKillFlags();
Jakob Stoklund Olesen
committed
{
NamedRegionTimer T("Rewriter", TimerGroupName, TimePassesIsEnabled);
Jakob Stoklund Olesen
committed
VRM->rewrite(Indexes);
Jakob Stoklund Olesen
committed
}
// Write out new DBG_VALUE instructions.
DebugVars->emitDebugValues(VRM);
// The pass output is in VirtRegMap. Release all the transient data.
releaseMemory();
return true;
}