Newer
Older
//===----- CriticalAntiDepBreaker.cpp - Anti-dep breaker -------- ---------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the CriticalAntiDepBreaker class, which
// implements register anti-dependence breaking along a blocks
// critical path during post-RA scheduler.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "post-RA-sched"
#include "CriticalAntiDepBreaker.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
CriticalAntiDepBreaker::
AntiDepBreaker(), MF(MFi),
MRI(MF.getRegInfo()),
TII(MF.getTarget().getInstrInfo()),
TRI(MF.getTarget().getRegisterInfo()),
AllocatableSet(TRI->getAllocatableSet(MF))
{
}
CriticalAntiDepBreaker::~CriticalAntiDepBreaker() {
}
void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// Clear out the register class data.
std::fill(Classes, array_endof(Classes),
static_cast<const TargetRegisterClass *>(0));
// Initialize the indices to indicate that no registers are live.
David Goodwin
committed
const unsigned BBSize = BB->size();
for (unsigned i = 0; i < TRI->getNumRegs(); ++i) {
KillIndices[i] = ~0u;
DefIndices[i] = BBSize;
}
// Clear "do not change" set.
KeepRegs.clear();
bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
// Determine the live-out physregs for this block.
if (IsReturnBlock) {
// In a return block, examine the function live-out regs.
for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
E = MRI.liveout_end(); I != E; ++I) {
unsigned Reg = *I;
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
KillIndices[Reg] = BB->size();
DefIndices[Reg] = ~0u;
// Repeat, for all aliases.
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
KillIndices[AliasReg] = BB->size();
DefIndices[AliasReg] = ~0u;
}
}
}
// In a non-return block, examine the live-in regs of all successors.
// Note a return block can have successors if the return instruction is
// predicated.
for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end(); SI != SE; ++SI)
for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
E = (*SI)->livein_end(); I != E; ++I) {
unsigned Reg = *I;
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
KillIndices[Reg] = BB->size();
DefIndices[Reg] = ~0u;
// Repeat, for all aliases.
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
KillIndices[AliasReg] = BB->size();
DefIndices[AliasReg] = ~0u;
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
// Mark live-out callee-saved registers. In a return block this is
// all callee-saved registers. In non-return this is any
// callee-saved register that is not saved in the prolog.
const MachineFrameInfo *MFI = MF.getFrameInfo();
BitVector Pristine = MFI->getPristineRegs(BB);
for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
unsigned Reg = *I;
if (!IsReturnBlock && !Pristine.test(Reg)) continue;
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
KillIndices[Reg] = BB->size();
DefIndices[Reg] = ~0u;
// Repeat, for all aliases.
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
KillIndices[AliasReg] = BB->size();
DefIndices[AliasReg] = ~0u;
}
}
}
void CriticalAntiDepBreaker::FinishBlock() {
RegRefs.clear();
KeepRegs.clear();
}
void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
unsigned InsertPosIndex) {
if (MI->isDebugValue())
return;
assert(Count < InsertPosIndex && "Instruction index out of expected range!");
// Any register which was defined within the previous scheduling region
// may have been rescheduled and its lifetime may overlap with registers
// in ways not reflected in our current liveness state. For each such
// register, adjust the liveness state to be conservatively correct.
David Goodwin
committed
for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg)
if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
assert(KillIndices[Reg] == ~0u && "Clobbered register is live!");
// Mark this register to be non-renamable.
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
// Move the def index to the end of the previous region, to reflect
// that the def could theoretically have been scheduled at the end.
DefIndices[Reg] = InsertPosIndex;
}
PrescanInstruction(MI);
ScanInstruction(MI, Count);
}
/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
/// critical path.
static const SDep *CriticalPathStep(const SUnit *SU) {
const SDep *Next = 0;
unsigned NextDepth = 0;
// Find the predecessor edge with the greatest depth.
for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
const SUnit *PredSU = P->getSUnit();
unsigned PredLatency = P->getLatency();
unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
// In the case of a latency tie, prefer an anti-dependency edge over
// other types of edges.
if (NextDepth < PredTotalLatency ||
(NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
NextDepth = PredTotalLatency;
Next = &*P;
}
}
return Next;
}
void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
// It's not safe to change register allocation for source operands of
// that have special allocation requirements. Also assume all registers
// used in a call must not be changed (ABI).
// FIXME: The issue with predicated instruction is more complex. We are being
// conservatively here because the kill markers cannot be trusted after
// if-conversion:
// %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
// ...
// STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395]
// %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12]
// STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
//
// The first R6 kill is not really a kill since it's killed by a predicated
// instruction which may not be executed. The second R6 def may or may not
// re-define R6 so it's not safe to change it since the last R6 use cannot be
// changed.
bool Special = MI->getDesc().isCall() ||
MI->getDesc().hasExtraSrcRegAllocReq() ||
TII->isPredicated(MI);
// Scan the register operands for this instruction and update
// Classes and RegRefs.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
const TargetRegisterClass *NewRC = 0;
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
if (i < MI->getDesc().getNumOperands())
NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
// For now, only allow the register to be changed if its register
// class is consistent across all uses.
if (!Classes[Reg] && NewRC)
Classes[Reg] = NewRC;
else if (!NewRC || Classes[Reg] != NewRC)
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
// Now check for aliases.
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
// If an alias of the reg is used during the live range, give up.
// Note that this allows us to skip checking if AntiDepReg
// overlaps with any of the aliases, among other things.
unsigned AliasReg = *Alias;
if (Classes[AliasReg]) {
Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
}
}
// If we're still willing to consider this register, note the reference.
if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
RegRefs.insert(std::make_pair(Reg, &MO));
if (MO.isUse() && Special) {
if (KeepRegs.insert(Reg)) {
for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
*Subreg; ++Subreg)
KeepRegs.insert(*Subreg);
}
}
}
}
void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
unsigned Count) {
// Update liveness.
// Proceding upwards, registers that are defed but not used in this
// instruction are now dead.
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
if (!TII->isPredicated(MI)) {
// Predicated defs are modeled as read + write, i.e. similar to two
// address updates.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
if (!MO.isDef()) continue;
// Ignore two-addr defs.
if (MI->isRegTiedToUseOperand(i)) continue;
DefIndices[Reg] = Count;
KillIndices[Reg] = ~0u;
assert(((KillIndices[Reg] == ~0u) !=
(DefIndices[Reg] == ~0u)) &&
"Kill and Def maps aren't consistent for Reg!");
KeepRegs.erase(Reg);
Classes[Reg] = 0;
RegRefs.erase(Reg);
// Repeat, for all subregs.
for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
*Subreg; ++Subreg) {
unsigned SubregReg = *Subreg;
DefIndices[SubregReg] = Count;
KillIndices[SubregReg] = ~0u;
KeepRegs.erase(SubregReg);
Classes[SubregReg] = 0;
RegRefs.erase(SubregReg);
}
// Conservatively mark super-registers as unusable.
for (const unsigned *Super = TRI->getSuperRegisters(Reg);
*Super; ++Super) {
unsigned SuperReg = *Super;
Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
}
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
}
}
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
if (!MO.isUse()) continue;
const TargetRegisterClass *NewRC = 0;
if (i < MI->getDesc().getNumOperands())
NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
// For now, only allow the register to be changed if its register
// class is consistent across all uses.
if (!Classes[Reg] && NewRC)
Classes[Reg] = NewRC;
else if (!NewRC || Classes[Reg] != NewRC)
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
RegRefs.insert(std::make_pair(Reg, &MO));
// It wasn't previously live but now it is, this is a kill.
if (KillIndices[Reg] == ~0u) {
KillIndices[Reg] = Count;
DefIndices[Reg] = ~0u;
assert(((KillIndices[Reg] == ~0u) !=
(DefIndices[Reg] == ~0u)) &&
"Kill and Def maps aren't consistent for Reg!");
}
// Repeat, for all aliases.
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
if (KillIndices[AliasReg] == ~0u) {
KillIndices[AliasReg] = Count;
DefIndices[AliasReg] = ~0u;
}
}
}
}
unsigned
CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI,
unsigned AntiDepReg,
unsigned LastNewReg,
for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF),
RE = RC->allocation_order_end(MF); R != RE; ++R) {
unsigned NewReg = *R;
// Don't replace a register with itself.
if (NewReg == AntiDepReg) continue;
// Don't replace a register with one that was recently used to repair
// an anti-dependence with this AntiDepReg, because that would
// re-introduce that anti-dependence.
if (NewReg == LastNewReg) continue;
// If the instruction already has a def of the NewReg, it's not suitable.
// For example, Instruction with multiple definitions can result in this
// condition.
if (MI->modifiesRegister(NewReg, TRI)) continue;
// If NewReg is dead and NewReg's most recent def is not before
// AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u))
&& "Kill and Def maps aren't consistent for AntiDepReg!");
assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u))
&& "Kill and Def maps aren't consistent for NewReg!");
if (KillIndices[NewReg] != ~0u ||
Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) ||
KillIndices[AntiDepReg] > DefIndices[NewReg])
continue;
return NewReg;
}
// No registers are free and available!
return 0;
}
unsigned CriticalAntiDepBreaker::
BreakAntiDependencies(const std::vector<SUnit>& SUnits,
MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
unsigned InsertPosIndex) {
// The code below assumes that there is at least one instruction,
// so just duck out immediately if the block is empty.
if (SUnits.empty()) return 0;
// Keep a map of the MachineInstr*'s back to the SUnit representing them.
// This is used for updating debug information.
DenseMap<MachineInstr*,const SUnit*> MISUnitMap;
// Find the node at the bottom of the critical path.
const SUnit *Max = 0;
for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
const SUnit *SU = &SUnits[i];
MISUnitMap[SU->getInstr()] = SU;
if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
Max = SU;
}
#ifndef NDEBUG
{
<< (Max->getDepth() + Max->Latency) << "\n");
for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
if (KillIndices[Reg] == ~0u)
}
#endif
// Track progress along the critical path through the SUnit graph as we walk
// the instructions.
const SUnit *CriticalPathSU = Max;
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
MachineInstr *CriticalPathMI = CriticalPathSU->getInstr();
// Consider this pattern:
// A = ...
// ... = A
// A = ...
// ... = A
// A = ...
// ... = A
// A = ...
// ... = A
// There are three anti-dependencies here, and without special care,
// we'd break all of them using the same register:
// A = ...
// ... = A
// B = ...
// ... = B
// B = ...
// ... = B
// B = ...
// ... = B
// because at each anti-dependence, B is the first register that
// isn't A which is free. This re-introduces anti-dependencies
// at all but one of the original anti-dependencies that we were
// trying to break. To avoid this, keep track of the most recent
// register that each register was replaced with, avoid
// using it to repair an anti-dependence on the same register.
// This lets us produce this:
// A = ...
// ... = A
// B = ...
// ... = B
// C = ...
// ... = C
// B = ...
// ... = B
// This still has an anti-dependence on B, but at least it isn't on the
// original critical path.
//
// TODO: If we tracked more than one register here, we could potentially
// fix that remaining critical edge too. This is a little more involved,
// because unlike the most recent register, less recent registers should
// still be considered, though only if no other registers are available.
unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {};
// Attempt to break anti-dependence edges on the critical path. Walk the
// instructions from the bottom up, tracking information about liveness
// as we go to help determine which registers are available.
unsigned Broken = 0;
unsigned Count = InsertPosIndex - 1;
for (MachineBasicBlock::iterator I = End, E = Begin;
I != E; --Count) {
MachineInstr *MI = --I;
if (MI->isDebugValue())
continue;
// Check if this instruction has a dependence on the critical path that
// is an anti-dependence that we may be able to break. If it is, set
// AntiDepReg to the non-zero register associated with the anti-dependence.
//
// We limit our attention to the critical path as a heuristic to avoid
// breaking anti-dependence edges that aren't going to significantly
// impact the overall schedule. There are a limited number of registers
// and we want to save them for the important edges.
// TODO: Instructions with multiple defs could have multiple
// anti-dependencies. The current code here only knows how to break one
// edge per instruction. Note that we'd have to be able to break all of
// the anti-dependencies in an instruction in order to be effective.
unsigned AntiDepReg = 0;
if (MI == CriticalPathMI) {
if (const SDep *Edge = CriticalPathStep(CriticalPathSU)) {
const SUnit *NextSU = Edge->getSUnit();
// Only consider anti-dependence edges.
if (Edge->getKind() == SDep::Anti) {
AntiDepReg = Edge->getReg();
assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
if (!AllocatableSet.test(AntiDepReg))
// Don't break anti-dependencies on non-allocatable registers.
AntiDepReg = 0;
else if (KeepRegs.count(AntiDepReg))
// Don't break anti-dependencies if an use down below requires
// this exact register.
AntiDepReg = 0;
else {
// If the SUnit has other dependencies on the SUnit that it
// anti-depends on, don't bother breaking the anti-dependency
// since those edges would prevent such units from being
// scheduled past each other regardless.
//
// Also, if there are dependencies on other SUnits with the
// same register as the anti-dependency, don't attempt to
// break it.
for (SUnit::const_pred_iterator P = CriticalPathSU->Preds.begin(),
PE = CriticalPathSU->Preds.end(); P != PE; ++P)
if (P->getSUnit() == NextSU ?
(P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
(P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
AntiDepReg = 0;
break;
}
}
}
CriticalPathSU = NextSU;
CriticalPathMI = CriticalPathSU->getInstr();
} else {
// We've reached the end of the critical path.
CriticalPathSU = 0;
CriticalPathMI = 0;
}
}
PrescanInstruction(MI);
// If MI's defs have a special allocation requirement, don't allow
// any def registers to be changed. Also assume all registers
// defined in a call must not be changed (ABI).
if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() ||
TII->isPredicated(MI))
// If this instruction's defs have special allocation requirement, don't
// break this anti-dependency.
AntiDepReg = 0;
else if (AntiDepReg) {
// If this instruction has a use of AntiDepReg, breaking it
// is invalid.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) {
AntiDepReg = 0;
break;
}
}
}
// Determine AntiDepReg's register class, if it is live and is
// consistently used within a single class.
const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0;
assert((AntiDepReg == 0 || RC != NULL) &&
"Register should be live if it's causing an anti-dependence!");
if (RC == reinterpret_cast<TargetRegisterClass *>(-1))
AntiDepReg = 0;
// Look for a suitable register to use to break the anti-depenence.
//
// TODO: Instead of picking the first free register, consider which might
// be the best.
if (AntiDepReg != 0) {
if (unsigned NewReg = findSuitableFreeRegister(MI, AntiDepReg,
LastNewReg[AntiDepReg],
RC)) {
<< TRI->getName(AntiDepReg)
<< " with " << RegRefs.count(AntiDepReg) << " references"
<< " using " << TRI->getName(NewReg) << "!\n");
// Update the references to the old register to refer to the new
// register.
std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
std::multimap<unsigned, MachineOperand *>::iterator>
Range = RegRefs.equal_range(AntiDepReg);
for (std::multimap<unsigned, MachineOperand *>::iterator
Q = Range.first, QE = Range.second; Q != QE; ++Q) {
Q->second->setReg(NewReg);
// If the SU for the instruction being updated has debug information
// related to the anti-dependency register, make sure to update that
// as well.
const SUnit *SU = MISUnitMap[Q->second->getParent()];
if (!SU) continue;
for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) {
MachineInstr *DI = SU->DbgInstrList[i];
assert (DI->getNumOperands()==3 && DI->getOperand(0).isReg() &&
DI->getOperand(0).getReg()
&& "Non register dbg_value attached to SUnit!");
if (DI->getOperand(0).getReg() == AntiDepReg)
DI->getOperand(0).setReg(NewReg);
}
}
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
// We just went back in time and modified history; the
// liveness information for the anti-depenence reg is now
// inconsistent. Set the state as if it were dead.
Classes[NewReg] = Classes[AntiDepReg];
DefIndices[NewReg] = DefIndices[AntiDepReg];
KillIndices[NewReg] = KillIndices[AntiDepReg];
assert(((KillIndices[NewReg] == ~0u) !=
(DefIndices[NewReg] == ~0u)) &&
"Kill and Def maps aren't consistent for NewReg!");
Classes[AntiDepReg] = 0;
DefIndices[AntiDepReg] = KillIndices[AntiDepReg];
KillIndices[AntiDepReg] = ~0u;
assert(((KillIndices[AntiDepReg] == ~0u) !=
(DefIndices[AntiDepReg] == ~0u)) &&
"Kill and Def maps aren't consistent for AntiDepReg!");
RegRefs.erase(AntiDepReg);
LastNewReg[AntiDepReg] = NewReg;
++Broken;
}
}
ScanInstruction(MI, Count);
}
return Broken;
}