Newer
Older
//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// MachineScheduler schedules machine instructions after phi elimination. It
// preserves LiveIntervals so it can be invoked before register allocation.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "misched"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/ScheduleDAGILP.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/PriorityQueue.h"
Andrew Trick
committed
#include <queue>
namespace llvm {
cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
cl::desc("Force top-down list scheduling"));
cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
cl::desc("Force bottom-up list scheduling"));
}
#ifndef NDEBUG
static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
cl::desc("Pop up a window to show MISched dags after they are processed"));
Lang Hames
committed
static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
#else
static bool ViewMISchedDAGs = false;
#endif // NDEBUG
// Threshold to very roughly model an out-of-order processor's instruction
// buffers. If the actual value of this threshold matters much in practice, then
// it can be specified by the machine model. For now, it's an experimental
// tuning knob to determine when and if it matters.
static cl::opt<unsigned> ILPWindow("ilp-window", cl::Hidden,
cl::desc("Allow expected latency to exceed the critical path by N cycles "
"before attempting to balance ILP"),
cl::init(10U));
// Experimental heuristics
static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
cl::desc("Enable load clustering."), cl::init(true));
// Experimental heuristics
static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
cl::desc("Enable scheduling for macro fusion."), cl::init(true));
Andrew Trick
committed
//===----------------------------------------------------------------------===//
// Machine Instruction Scheduling Pass and Registry
//===----------------------------------------------------------------------===//
MachineSchedContext::MachineSchedContext():
MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {
RegClassInfo = new RegisterClassInfo();
}
MachineSchedContext::~MachineSchedContext() {
delete RegClassInfo;
}
/// MachineScheduler runs after coalescing and before register allocation.
class MachineScheduler : public MachineSchedContext,
public MachineFunctionPass {
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
virtual void releaseMemory() {}
virtual bool runOnMachineFunction(MachineFunction&);
virtual void print(raw_ostream &O, const Module* = 0) const;
static char ID; // Class identification, replacement for typeinfo
};
} // namespace
char MachineScheduler::ID = 0;
char &llvm::MachineSchedulerID = MachineScheduler::ID;
INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
"Machine Instruction Scheduler", false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_END(MachineScheduler, "misched",
"Machine Instruction Scheduler", false, false)
MachineScheduler::MachineScheduler()
: MachineFunctionPass(ID) {
initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequiredID(MachineDominatorsID);
AU.addRequired<MachineLoopInfo>();
AU.addRequired<AliasAnalysis>();
AU.addRequired<TargetPassConfig>();
AU.addRequired<SlotIndexes>();
AU.addPreserved<SlotIndexes>();
AU.addRequired<LiveIntervals>();
AU.addPreserved<LiveIntervals>();
MachineFunctionPass::getAnalysisUsage(AU);
}
MachinePassRegistry MachineSchedRegistry::Registry;
/// A dummy default scheduler factory indicates whether the scheduler
/// is overridden on the command line.
static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
return 0;
}
/// MachineSchedOpt allows command line selection of the scheduler.
static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
RegisterPassParser<MachineSchedRegistry> >
MachineSchedOpt("misched",
cl::init(&useDefaultMachineSched), cl::Hidden,
cl::desc("Machine instruction scheduler to use"));
static MachineSchedRegistry
DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
useDefaultMachineSched);
/// Forward declare the standard machine scheduler. This will be used as the
/// default scheduler if the target does not set a default.
static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
/// Decrement this iterator until reaching the top or a non-debug instr.
static MachineBasicBlock::iterator
priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
assert(I != Beg && "reached the top of the region, cannot decrement");
while (--I != Beg) {
if (!I->isDebugValue())
break;
}
return I;
}
/// If this iterator is a debug value, increment until reaching the End or a
/// non-debug instruction.
static MachineBasicBlock::iterator
nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
if (!I->isDebugValue())
break;
}
return I;
}
/// Top-level MachineScheduler pass driver.
///
/// Visit blocks in function order. Divide each block into scheduling regions
/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
/// consistent with the DAG builder, which traverses the interior of the
/// scheduling regions bottom-up.
///
/// This design avoids exposing scheduling boundaries to the DAG builder,
/// simplifying the DAG builder's support for "special" target instructions.
/// At the same time the design allows target schedulers to operate across
/// scheduling boundaries, for example to bundle the boudary instructions
/// without reordering them. This creates complexity, because the target
/// scheduler must update the RegionBegin and RegionEnd positions cached by
/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
/// design would be to split blocks at scheduling boundaries, but LLVM has a
/// general bias against block splitting purely for implementation simplicity.
bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
// Initialize the context of the pass.
MF = &mf;
MLI = &getAnalysis<MachineLoopInfo>();
MDT = &getAnalysis<MachineDominatorTree>();
PassConfig = &getAnalysis<TargetPassConfig>();
AA = &getAnalysis<AliasAnalysis>();
LIS = &getAnalysis<LiveIntervals>();
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
RegClassInfo->runOnMachineFunction(*MF);
Andrew Trick
committed
// Select the scheduler, or set the default.
MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
if (Ctor == useDefaultMachineSched) {
// Get the default scheduler set by the target.
Ctor = MachineSchedRegistry::getDefault();
if (!Ctor) {
Ctor = createConvergingSched;
MachineSchedRegistry::setDefault(Ctor);
}
}
// Instantiate the selected scheduler.
OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
// Visit all machine basic blocks.
Andrew Trick
committed
//
// TODO: Visit blocks in global postorder or postorder within the bottom-up
// loop tree. Then we can optionally compute global RegPressure.
for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
MBB != MBBEnd; ++MBB) {
Scheduler->startBlock(MBB);
// Break the block into scheduling regions [I, RegionEnd), and schedule each
// region as soon as it is discovered. RegionEnd points the scheduling
Andrew Trick
committed
// boundary at the bottom of the region. The DAG does not include RegionEnd,
// but the region does (i.e. the next RegionEnd is above the previous
// RegionBegin). If the current block has no terminator then RegionEnd ==
// MBB->end() for the bottom region.
//
// The Scheduler may insert instructions during either schedule() or
// exitRegion(), even for empty regions. So the local iterators 'I' and
// 'RegionEnd' are invalid across these calls.
unsigned RemainingInstrs = MBB->size();
for(MachineBasicBlock::iterator RegionEnd = MBB->end();
Andrew Trick
committed
RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
Andrew Trick
committed
// Avoid decrementing RegionEnd for blocks with no terminator.
if (RegionEnd != MBB->end()
|| TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
--RegionEnd;
// Count the boundary instruction.
--RemainingInstrs;
// The next region starts above the previous region. Look backward in the
// instruction stream until we find the nearest boundary.
MachineBasicBlock::iterator I = RegionEnd;
for(;I != MBB->begin(); --I, --RemainingInstrs) {
if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
break;
}
// Notify the scheduler of the region, even if we may skip scheduling
// it. Perhaps it still needs to be bundled.
Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs);
// Skip empty scheduling regions (0 or 1 schedulable instructions).
if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
// Close the current region. Bundle the terminator if needed.
Andrew Trick
committed
// This invalidates 'RegionEnd' and 'I'.
Scheduler->exitRegion();
continue;
}
DEBUG(dbgs() << "********** MI Scheduling **********\n");
Craig Topper
committed
DEBUG(dbgs() << MF->getName()
<< ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: ";
if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
else dbgs() << "End";
dbgs() << " Remaining: " << RemainingInstrs << "\n");
Andrew Trick
committed
// This invalidates 'RegionEnd' and 'I'.
Scheduler->schedule();
Scheduler->exitRegion();
// Scheduling has invalidated the current iterator 'I'. Ask the
// scheduler for the top of it's scheduled region.
RegionEnd = Scheduler->begin();
}
assert(RemainingInstrs == 0 && "Instruction count mismatch!");
Scheduler->finishBlock();
}
Scheduler->finalizeSchedule();
DEBUG(LIS->print(dbgs()));
return true;
}
void MachineScheduler::print(raw_ostream &O, const Module* m) const {
// unimplemented
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void ReadyQueue::dump() {
dbgs() << Name << ": ";
for (unsigned i = 0, e = Queue.size(); i < e; ++i)
dbgs() << Queue[i]->NodeNum << " ";
dbgs() << "\n";
}
#endif
//===----------------------------------------------------------------------===//
// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
// preservation.
//===----------------------------------------------------------------------===//
Andrew Trick
committed
bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
if (SuccSU != &ExitSU) {
// Do not use WillCreateCycle, it assumes SD scheduling.
// If Pred is reachable from Succ, then the edge creates a cycle.
if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
return false;
Topo.AddPred(SuccSU, PredDep.getSUnit());
}
SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
// Return true regardless of whether a new edge needed to be inserted.
return true;
}
Andrew Trick
committed
/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
/// NumPredsLeft reaches zero, release the successor node.
///
/// FIXME: Adjust SuccSU height based on MinLatency.
void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
Andrew Trick
committed
SUnit *SuccSU = SuccEdge->getSUnit();
if (SuccEdge->isWeak()) {
--SuccSU->WeakPredsLeft;
if (SuccEdge->isCluster())
NextClusterSucc = SuccSU;
Andrew Trick
committed
#ifndef NDEBUG
if (SuccSU->NumPredsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
SuccSU->dump(this);
dbgs() << " has been released too many times!\n";
llvm_unreachable(0);
}
#endif
--SuccSU->NumPredsLeft;
if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
SchedImpl->releaseTopNode(SuccSU);
Andrew Trick
committed
}
/// releaseSuccessors - Call releaseSucc on each of SU's successors.
void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
Andrew Trick
committed
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
releaseSucc(SU, &*I);
}
}
/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
/// NumSuccsLeft reaches zero, release the predecessor node.
///
/// FIXME: Adjust PredSU height based on MinLatency.
void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
SUnit *PredSU = PredEdge->getSUnit();
if (PredEdge->isWeak()) {
--PredSU->WeakSuccsLeft;
if (PredEdge->isCluster())
NextClusterPred = PredSU;
#ifndef NDEBUG
if (PredSU->NumSuccsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
PredSU->dump(this);
dbgs() << " has been released too many times!\n";
llvm_unreachable(0);
}
#endif
--PredSU->NumSuccsLeft;
if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
SchedImpl->releaseBottomNode(PredSU);
}
/// releasePredecessors - Call releasePred on each of SU's predecessors.
void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
releasePred(SU, &*I);
}
}
void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
MachineBasicBlock::iterator InsertPos) {
// Advance RegionBegin if the first instruction moves down.
if (&*RegionBegin == MI)
++RegionBegin;
// Update the instruction stream.
BB->splice(InsertPos, BB, MI);
Andrew Trick
committed
LIS->handleMove(MI, /*UpdateFlags=*/true);
// Recede RegionBegin if an instruction moves above the first.
if (RegionBegin == InsertPos)
RegionBegin = MI;
}
bool ScheduleDAGMI::checkSchedLimit() {
#ifndef NDEBUG
if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
CurrentTop = CurrentBottom;
return false;
}
++NumInstrsScheduled;
#endif
return true;
}
Andrew Trick
committed
/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
/// crossing a scheduling boundary. [begin, end) includes all instructions in
/// the region, including the boundary itself and single-instruction regions
/// that don't get scheduled.
void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
MachineBasicBlock::iterator begin,
MachineBasicBlock::iterator end,
unsigned endcount)
{
ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
Andrew Trick
committed
// For convenience remember the end of the liveness region.
LiveRegionEnd =
(RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
}
// Setup the register pressure trackers for the top scheduled top and bottom
// scheduled regions.
void ScheduleDAGMI::initRegPressure() {
TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
// Close the RPTracker to finalize live ins.
RPTracker.closeRegion();
DEBUG(RPTracker.getPressure().dump(TRI));
Andrew Trick
committed
// Initialize the live ins and live outs.
TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
// Close one end of the tracker so we can call
// getMaxUpward/DownwardPressureDelta before advancing across any
// instructions. This converts currently live regs into live ins/outs.
TopRPTracker.closeTop();
BotRPTracker.closeBottom();
// Account for liveness generated by the region boundary.
if (LiveRegionEnd != RegionEnd)
BotRPTracker.recede();
assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
// Cache the list of excess pressure sets in this region. This will also track
// the max pressure in the scheduled code for these sets.
RegionCriticalPSets.clear();
std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure;
for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
unsigned Limit = TRI->getRegPressureSetLimit(i);
DEBUG(dbgs() << TRI->getRegPressureSetName(i)
<< "Limit " << Limit
<< " Actual " << RegionPressure[i] << "\n");
if (RegionPressure[i] > Limit)
RegionCriticalPSets.push_back(PressureElement(i, 0));
}
DEBUG(dbgs() << "Excess PSets: ";
for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
dbgs() << TRI->getRegPressureSetName(
RegionCriticalPSets[i].PSetID) << " ";
dbgs() << "\n");
}
// FIXME: When the pressure tracker deals in pressure differences then we won't
// iterate over all RegionCriticalPSets[i].
void ScheduleDAGMI::
updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
unsigned ID = RegionCriticalPSets[i].PSetID;
int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
if ((int)NewMaxPressure[ID] > MaxUnits)
MaxUnits = NewMaxPressure[ID];
}
Andrew Trick
committed
}
/// schedule - Called back from MachineScheduler::runOnMachineFunction
Andrew Trick
committed
/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
/// only includes instructions that have DAG nodes, not scheduling boundaries.
///
/// This is a skeletal driver, with all the functionality pushed into helpers,
/// so that it can be easilly extended by experimental schedulers. Generally,
/// implementing MachineSchedStrategy should be sufficient to implement a new
/// scheduling algorithm. However, if a scheduler further subclasses
/// ScheduleDAGMI then it will want to override this virtual method in order to
/// update any specialized state.
void ScheduleDAGMI::schedule() {
buildDAGWithRegPressure();
Topo.InitDAGTopologicalSorting();
DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
SUnits[su].dumpAll(this));
if (ViewMISchedDAGs) viewGraph();
initQueues();
bool IsTopNode = false;
while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
assert(!SU->isScheduled && "Node already scheduled");
if (!checkSchedLimit())
break;
scheduleMI(SU, IsTopNode);
updateQueues(SU, IsTopNode);
}
assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
placeDebugValues();
DEBUG({
unsigned BBNum = top()->getParent()->getNumber();
dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
dumpSchedule();
dbgs() << '\n';
});
}
/// Build the DAG and setup three register pressure trackers.
void ScheduleDAGMI::buildDAGWithRegPressure() {
Andrew Trick
committed
// Initialize the register pressure tracker used by buildSchedGraph.
RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
// Account for liveness generate by the region boundary.
if (LiveRegionEnd != RegionEnd)
RPTracker.recede();
Andrew Trick
committed
Andrew Trick
committed
// Build the DAG, and compute current register pressure.
Andrew Trick
committed
buildSchedGraph(AA, &RPTracker);
if (ViewMISchedDAGs) viewGraph();
Andrew Trick
committed
Andrew Trick
committed
// Initialize top/bottom trackers after computing region pressure.
initRegPressure();
Andrew Trick
committed
/// Apply each ScheduleDAGMutation step in order.
void ScheduleDAGMI::postprocessDAG() {
for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
Mutations[i]->apply(this);
}
}
// Release all DAG roots for scheduling.
//
// Nodes with unreleased weak edges can still be roots.
void ScheduleDAGMI::releaseRoots() {
SmallVector<SUnit*, 16> BotRoots;
for (std::vector<SUnit>::iterator
I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
// A SUnit is ready to top schedule if it has no predecessors.
if (!I->NumPredsLeft && SU != &EntrySU)
SchedImpl->releaseTopNode(SU);
// A SUnit is ready to bottom schedule if it has no successors.
if (!I->NumSuccsLeft && SU != &ExitSU)
BotRoots.push_back(SU);
}
// Release bottom roots in reverse order so the higher priority nodes appear
// first. This is more natural and slightly more efficient.
for (SmallVectorImpl<SUnit*>::const_reverse_iterator
I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
SchedImpl->releaseBottomNode(*I);
}
/// Identify DAG roots and setup scheduler queues.
void ScheduleDAGMI::initQueues() {
NextClusterSucc = NULL;
NextClusterPred = NULL;
// Initialize the strategy before modifying the DAG.
SchedImpl->initialize(this);
// Release all DAG roots for scheduling, not including EntrySU/ExitSU.
releaseRoots();
Andrew Trick
committed
releaseSuccessors(&EntrySU);
releasePredecessors(&ExitSU);
Andrew Trick
committed
SchedImpl->registerRoots();
CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
CurrentBottom = RegionEnd;
Andrew Trick
committed
/// Move an instruction and update register pressure.
void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) {
// Move the instruction to its new location in the instruction stream.
MachineInstr *MI = SU->getInstr();
Andrew Trick
committed
if (IsTopNode) {
assert(SU->isTopReady() && "node still has unscheduled dependencies");
if (&*CurrentTop == MI)
CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
else {
moveInstruction(MI, CurrentTop);
TopRPTracker.setPos(MI);
// Update top scheduled pressure.
TopRPTracker.advance();
assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
}
else {
assert(SU->isBottomReady() && "node still has unscheduled dependencies");
MachineBasicBlock::iterator priorII =
priorNonDebug(CurrentBottom, CurrentTop);
if (&*priorII == MI)
CurrentBottom = priorII;
Andrew Trick
committed
else {
if (&*CurrentTop == MI) {
CurrentTop = nextIfDebug(++CurrentTop, priorII);
TopRPTracker.setPos(CurrentTop);
moveInstruction(MI, CurrentBottom);
CurrentBottom = MI;
Andrew Trick
committed
}
// Update bottom scheduled pressure.
BotRPTracker.recede();
assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
Andrew Trick
committed
}
/// Update scheduler queues after scheduling an instruction.
void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
// Release dependent instructions for scheduling.
if (IsTopNode)
releaseSuccessors(SU);
else
releasePredecessors(SU);
SU->isScheduled = true;
// Notify the scheduling strategy after updating the DAG.
SchedImpl->schedNode(SU, IsTopNode);
}
/// Reinsert any remaining debug_values, just like the PostRA scheduler.
void ScheduleDAGMI::placeDebugValues() {
// If first instruction was a DBG_VALUE then put it back.
if (FirstDbgValue) {
BB->splice(RegionBegin, BB, FirstDbgValue);
RegionBegin = FirstDbgValue;
}
for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
MachineInstr *DbgValue = P.first;
MachineBasicBlock::iterator OrigPrevMI = P.second;
BB->splice(++OrigPrevMI, BB, DbgValue);
if (OrigPrevMI == llvm::prior(RegionEnd))
RegionEnd = DbgValue;
}
DbgValues.clear();
FirstDbgValue = NULL;
Andrew Trick
committed
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void ScheduleDAGMI::dumpSchedule() const {
for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
if (SUnit *SU = getSUnit(&(*MI)))
SU->dump(this);
else
dbgs() << "Missing SUnit\n";
}
}
#endif
//===----------------------------------------------------------------------===//
// LoadClusterMutation - DAG post-processing to cluster loads.
//===----------------------------------------------------------------------===//
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
namespace {
/// \brief Post-process the DAG to create cluster edges between neighboring
/// loads.
class LoadClusterMutation : public ScheduleDAGMutation {
struct LoadInfo {
SUnit *SU;
unsigned BaseReg;
unsigned Offset;
LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
: SU(su), BaseReg(reg), Offset(ofs) {}
};
static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS,
const LoadClusterMutation::LoadInfo &RHS);
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
public:
LoadClusterMutation(const TargetInstrInfo *tii,
const TargetRegisterInfo *tri)
: TII(tii), TRI(tri) {}
virtual void apply(ScheduleDAGMI *DAG);
protected:
void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
};
} // anonymous
bool LoadClusterMutation::LoadInfoLess(
const LoadClusterMutation::LoadInfo &LHS,
const LoadClusterMutation::LoadInfo &RHS) {
if (LHS.BaseReg != RHS.BaseReg)
return LHS.BaseReg < RHS.BaseReg;
return LHS.Offset < RHS.Offset;
}
void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
ScheduleDAGMI *DAG) {
SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
SUnit *SU = Loads[Idx];
unsigned BaseReg;
unsigned Offset;
if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
}
if (LoadRecords.size() < 2)
return;
std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess);
unsigned ClusterLength = 1;
for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
ClusterLength = 1;
continue;
}
SUnit *SUa = LoadRecords[Idx].SU;
SUnit *SUb = LoadRecords[Idx+1].SU;
if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
&& DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
<< SUb->NodeNum << ")\n");
// Copy successor edges from SUa to SUb. Interleaving computation
// dependent on SUa can prevent load combining due to register reuse.
// Predecessor edges do not need to be copied from SUb to SUa since nearby
// loads should have effectively the same inputs.
for (SUnit::const_succ_iterator
SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
if (SI->getSUnit() == SUb)
continue;
DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
}
++ClusterLength;
}
else
ClusterLength = 1;
}
}
/// \brief Callback from DAG postProcessing to create cluster edges for loads.
void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
// Map DAG NodeNum to store chain ID.
DenseMap<unsigned, unsigned> StoreChainIDs;
// Map each store chain to a set of dependent loads.
SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
SUnit *SU = &DAG->SUnits[Idx];
if (!SU->getInstr()->mayLoad())
continue;
unsigned ChainPredID = DAG->SUnits.size();
for (SUnit::const_pred_iterator
PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
if (PI->isCtrl()) {
ChainPredID = PI->getSUnit()->NodeNum;
break;
}
}
// Check if this chain-like pred has been seen
// before. ChainPredID==MaxNodeID for loads at the top of the schedule.
unsigned NumChains = StoreChainDependents.size();
std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
if (Result.second)
StoreChainDependents.resize(NumChains + 1);
StoreChainDependents[Result.first->second].push_back(SU);
}
// Iterate over the store chains.
for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
}
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
//===----------------------------------------------------------------------===//
// MacroFusion - DAG post-processing to encourage fusion of macro ops.
//===----------------------------------------------------------------------===//
namespace {
/// \brief Post-process the DAG to create cluster edges between instructions
/// that may be fused by the processor into a single operation.
class MacroFusion : public ScheduleDAGMutation {
const TargetInstrInfo *TII;
public:
MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
virtual void apply(ScheduleDAGMI *DAG);
};
} // anonymous
/// \brief Callback from DAG postProcessing to create cluster edges to encourage
/// fused operations.
void MacroFusion::apply(ScheduleDAGMI *DAG) {
// For now, assume targets can only fuse with the branch.
MachineInstr *Branch = DAG->ExitSU.getInstr();
if (!Branch)
return;
for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
SUnit *SU = &DAG->SUnits[--Idx];
if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
continue;
// Create a single weak edge from SU to ExitSU. The only effect is to cause
// bottom-up scheduling to heavily prioritize the clustered SU. There is no
// need to copy predecessor edges from ExitSU to SU, since top-down
// scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
// of SU, we could create an artificial edge from the deepest root, but it
// hasn't been needed yet.
bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
(void)Success;
assert(Success && "No DAG nodes should be reachable from ExitSU");
DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
break;
}
}
//===----------------------------------------------------------------------===//
// ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
//===----------------------------------------------------------------------===//
namespace {
/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
/// the schedule.
class ConvergingScheduler : public MachineSchedStrategy {
public:
/// Represent the type of SchedCandidate found within a single queue.
/// pickNodeBidirectional depends on these listed by decreasing priority.
enum CandReason {
NoCand, SingleExcess, SingleCritical, Cluster,
ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse,
NodeOrder};
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
#ifndef NDEBUG
static const char *getReasonStr(ConvergingScheduler::CandReason Reason);
#endif
/// Policy for scheduling the next instruction in the candidate's zone.
struct CandPolicy {
bool ReduceLatency;
unsigned ReduceResIdx;
unsigned DemandResIdx;
CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
};
/// Status of an instruction's critical resource consumption.
struct SchedResourceDelta {
// Count critical resources in the scheduled region required by SU.
unsigned CritResources;
// Count critical resources from another region consumed by SU.
unsigned DemandedResources;
SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
bool operator==(const SchedResourceDelta &RHS) const {
return CritResources == RHS.CritResources
&& DemandedResources == RHS.DemandedResources;
}
bool operator!=(const SchedResourceDelta &RHS) const {
return !operator==(RHS);
}
};
/// Store the state used by ConvergingScheduler heuristics, required for the
/// lifetime of one invocation of pickNode().
struct SchedCandidate {
// The best SUnit candidate.
SUnit *SU;
// The reason for this candidate.
CandReason Reason;
// Register pressure values for the best candidate.
RegPressureDelta RPDelta;
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
// Critical resource consumption of the best candidate.
SchedResourceDelta ResDelta;
SchedCandidate(const CandPolicy &policy)
: Policy(policy), SU(NULL), Reason(NoCand) {}
bool isValid() const { return SU; }
// Copy the status of another candidate without changing policy.
void setBest(SchedCandidate &Best) {
assert(Best.Reason != NoCand && "uninitialized Sched candidate");
SU = Best.SU;
Reason = Best.Reason;
RPDelta = Best.RPDelta;
ResDelta = Best.ResDelta;
}
void initResourceDelta(const ScheduleDAGMI *DAG,
const TargetSchedModel *SchedModel);
};
/// Summarize the unscheduled region.
struct SchedRemainder {
// Critical path through the DAG in expected latency.
unsigned CriticalPath;
// Unscheduled resources
SmallVector<unsigned, 16> RemainingCounts;
// Critical resource for the unscheduled zone.
unsigned CritResIdx;
// Number of micro-ops left to schedule.
unsigned RemainingMicroOps;
// Is the unscheduled zone resource limited.
bool IsResourceLimited;
unsigned MaxRemainingCount;
void reset() {
CriticalPath = 0;
RemainingCounts.clear();
CritResIdx = 0;
RemainingMicroOps = 0;
IsResourceLimited = false;
MaxRemainingCount = 0;
}
SchedRemainder() { reset(); }
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
/// Each Scheduling boundary is associated with ready queues. It tracks the
/// current cycle in the direction of movement, and maintains the state
/// of "hazards" and other interlocks at the current cycle.
const TargetSchedModel *SchedModel;
ReadyQueue Available;
ReadyQueue Pending;
bool CheckPending;
// For heuristics, keep a list of the nodes that immediately depend on the
// most recently scheduled node.
SmallPtrSet<const SUnit*, 8> NextSUs;
ScheduleHazardRecognizer *HazardRec;
unsigned CurrCycle;
unsigned IssueCount;
/// MinReadyCycle - Cycle of the soonest available instruction.
unsigned MinReadyCycle;
// The expected latency of the critical path in this scheduled zone.
unsigned ExpectedLatency;
// Resources used in the scheduled zone beyond this boundary.
SmallVector<unsigned, 16> ResourceCounts;
// Cache the critical resources ID in this scheduled zone.