Skip to content
RegAllocGreedy.cpp 49.3 KiB
Newer Older
    // Interference that overlaps an instruction is counted in both gaps
    // surrounding the instruction. The exception is interference before
    // StartIdx and after StopIdx.
    //
    LiveIntervalUnion::SegmentIter IntI = PhysReg2LiveUnion[*AI].find(StartIdx);
    for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
      // Skip the gaps before IntI.
      while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
        if (++Gap == NumGaps)
          break;
      if (Gap == NumGaps)
        break;

      // Update the gaps covered by IntI.
      const float weight = IntI.value()->weight;
      for (; Gap != NumGaps; ++Gap) {
        GapWeight[Gap] = std::max(GapWeight[Gap], weight);
        if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
          break;
      }
      if (Gap == NumGaps)
        break;
    }
  }
}

/// getPrevMappedIndex - Return the slot index of the last non-copy instruction
/// before MI that has a slot index. If MI is the first mapped instruction in
/// its block, return the block start index instead.
///
SlotIndex RAGreedy::getPrevMappedIndex(const MachineInstr *MI) {
  assert(MI && "Missing MachineInstr");
  const MachineBasicBlock *MBB = MI->getParent();
  MachineBasicBlock::const_iterator B = MBB->begin(), I = MI;
  while (I != B)
    if (!(--I)->isDebugValue() && !I->isCopy())
      return Indexes->getInstructionIndex(I);
  return Indexes->getMBBStartIdx(MBB);
}

/// calcPrevSlots - Fill in the PrevSlot array with the index of the previous
/// real non-copy instruction for each instruction in SA->UseSlots.
///
void RAGreedy::calcPrevSlots() {
  const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
  PrevSlot.clear();
  PrevSlot.reserve(Uses.size());
  for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
    const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i]);
    PrevSlot.push_back(getPrevMappedIndex(MI).getDefIndex());
  }
}

/// nextSplitPoint - Find the next index into SA->UseSlots > i such that it may
/// be beneficial to split before UseSlots[i].
///
/// 0 is always a valid split point
unsigned RAGreedy::nextSplitPoint(unsigned i) {
  const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
  const unsigned Size = Uses.size();
  assert(i != Size && "No split points after the end");
  // Allow split before i when Uses[i] is not adjacent to the previous use.
  while (++i != Size && PrevSlot[i].getBaseIndex() <= Uses[i-1].getBaseIndex())
    ;
  return i;
}

/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
/// basic block.
///
unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
                                 SmallVectorImpl<LiveInterval*> &NewVRegs) {
  assert(SA->LiveBlocks.size() == 1 && "Not a local interval");
  const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks.front();

  // Note that it is possible to have an interval that is live-in or live-out
  // while only covering a single block - A phi-def can use undef values from
  // predecessors, and the block could be a single-block loop.
  // We don't bother doing anything clever about such a case, we simply assume
  // that the interval is continuous from FirstUse to LastUse. We should make
  // sure that we don't do anything illegal to such an interval, though.

  const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
  if (Uses.size() <= 2)
    return 0;
  const unsigned NumGaps = Uses.size()-1;

  DEBUG({
    dbgs() << "tryLocalSplit: ";
    for (unsigned i = 0, e = Uses.size(); i != e; ++i)
      dbgs() << ' ' << SA->UseSlots[i];
    dbgs() << '\n';
  });

  // For every use, find the previous mapped non-copy instruction.
  // We use this to detect valid split points, and to estimate new interval
  // sizes.
  calcPrevSlots();

  unsigned BestBefore = NumGaps;
  unsigned BestAfter = 0;
  float BestDiff = 0;

  const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB->getNumber());
  SmallVector<float, 8> GapWeight;

  Order.rewind();
  while (unsigned PhysReg = Order.next()) {
    // Keep track of the largest spill weight that would need to be evicted in
    // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1].
    calcGapWeights(PhysReg, GapWeight);

    // Try to find the best sequence of gaps to close.
    // The new spill weight must be larger than any gap interference.

    // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
    unsigned SplitBefore = 0, SplitAfter = nextSplitPoint(1) - 1;

    // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
    // It is the spill weight that needs to be evicted.
    float MaxGap = GapWeight[0];
    for (unsigned i = 1; i != SplitAfter; ++i)
      MaxGap = std::max(MaxGap, GapWeight[i]);

    for (;;) {
      // Live before/after split?
      const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
      const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;

      DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' '
                   << Uses[SplitBefore] << '-' << Uses[SplitAfter]
                   << " i=" << MaxGap);

      // Stop before the interval gets so big we wouldn't be making progress.
      if (!LiveBefore && !LiveAfter) {
        DEBUG(dbgs() << " all\n");
        break;
      }
      // Should the interval be extended or shrunk?
      bool Shrink = true;
      if (MaxGap < HUGE_VALF) {
        // Estimate the new spill weight.
        //
        // Each instruction reads and writes the register, except the first
        // instr doesn't read when !FirstLive, and the last instr doesn't write
        // when !LastLive.
        //
        // We will be inserting copies before and after, so the total number of
        // reads and writes is 2 * EstUses.
        //
        const unsigned EstUses = 2*(SplitAfter - SplitBefore) +
                                 2*(LiveBefore + LiveAfter);

        // Try to guess the size of the new interval. This should be trivial,
        // but the slot index of an inserted copy can be a lot smaller than the
        // instruction it is inserted before if there are many dead indexes
        // between them.
        //
        // We measure the distance from the instruction before SplitBefore to
        // get a conservative estimate.
        //
        // The final distance can still be different if inserting copies
        // triggers a slot index renumbering.
        //
        const float EstWeight = normalizeSpillWeight(blockFreq * EstUses,
                              PrevSlot[SplitBefore].distance(Uses[SplitAfter]));
        // Would this split be possible to allocate?
        // Never allocate all gaps, we wouldn't be making progress.
        float Diff = EstWeight - MaxGap;
        DEBUG(dbgs() << " w=" << EstWeight << " d=" << Diff);
        if (Diff > 0) {
          Shrink = false;
          if (Diff > BestDiff) {
            DEBUG(dbgs() << " (best)");
            BestDiff = Diff;
            BestBefore = SplitBefore;
            BestAfter = SplitAfter;
          }
        }
      }

      // Try to shrink.
      if (Shrink) {
        SplitBefore = nextSplitPoint(SplitBefore);
        if (SplitBefore < SplitAfter) {
          DEBUG(dbgs() << " shrink\n");
          // Recompute the max when necessary.
          if (GapWeight[SplitBefore - 1] >= MaxGap) {
            MaxGap = GapWeight[SplitBefore];
            for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i)
              MaxGap = std::max(MaxGap, GapWeight[i]);
          }
          continue;
        }
        MaxGap = 0;
      }

      // Try to extend the interval.
      if (SplitAfter >= NumGaps) {
        DEBUG(dbgs() << " end\n");
        break;
      }

      DEBUG(dbgs() << " extend\n");
      for (unsigned e = nextSplitPoint(SplitAfter + 1) - 1;
           SplitAfter != e; ++SplitAfter)
        MaxGap = std::max(MaxGap, GapWeight[SplitAfter]);
          continue;
    }
  }

  // Didn't find any candidates?
  if (BestBefore == NumGaps)
    return 0;

  DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore]
               << '-' << Uses[BestAfter] << ", " << BestDiff
               << ", " << (BestAfter - BestBefore + 1) << " instrs\n");

  SmallVector<LiveInterval*, 4> SpillRegs;
  LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs);
  SE->reset(LREdit);

  SE->openIntv();
  SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
  SlotIndex SegStop  = SE->leaveIntvAfter(Uses[BestAfter]);
  SE->useIntv(SegStart, SegStop);
  SE->closeIntv();
  SE->finish();
  setStage(NewVRegs.begin(), NewVRegs.end(), RS_Local);
//===----------------------------------------------------------------------===//
//                          Live Range Splitting
//===----------------------------------------------------------------------===//

/// trySplit - Try to split VirtReg or one of its interferences, making it
/// assignable.
/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
                            SmallVectorImpl<LiveInterval*>&NewVRegs) {
  // Local intervals are handled separately.
  if (LIS->intervalIsInOneMBB(VirtReg)) {
    NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled);
    return tryLocalSplit(VirtReg, Order, NewVRegs);
  }

  NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled);
  // Don't iterate global splitting.
  // Move straight to spilling if this range was produced by a global split.
  LiveRangeStage Stage = getStage(VirtReg);
  if (Stage >= RS_Block)
    return 0;

  SA->analyze(&VirtReg);

  // First try to split around a region spanning multiple blocks.
  if (Stage < RS_Region) {
    unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
    if (PhysReg || !NewVRegs.empty())
      return PhysReg;
  }

  // Then isolate blocks with multiple uses.
  if (Stage < RS_Block) {
    SplitAnalysis::BlockPtrSet Blocks;
    if (SA->getMultiUseBlocks(Blocks)) {
      SmallVector<LiveInterval*, 4> SpillRegs;
      LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs);
      setStage(NewVRegs.begin(), NewVRegs.end(), RS_Block);
      if (VerifyEnabled)
        MF->verify(this, "After splitting live range around basic blocks");
    }
//===----------------------------------------------------------------------===//
//                            Main Entry Point
//===----------------------------------------------------------------------===//

unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
                                 SmallVectorImpl<LiveInterval*> &NewVRegs) {
  LiveRangeStage Stage = getStage(VirtReg);
  if (Stage == RS_Original)
    LRStage[VirtReg.reg] = RS_Second;

  AllocationOrder Order(VirtReg.reg, *VRM, ReservedRegs);
  while (unsigned PhysReg = Order.next()) {
    if (!checkPhysRegInterference(VirtReg, PhysReg))
      return PhysReg;
  }
  if (unsigned PhysReg = tryReassign(VirtReg, Order, NewVRegs))
    return PhysReg;

  if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs))
  assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");

  // The first time we see a live range, don't try to split or spill.
  // Wait until the second time, when all smaller ranges have been allocated.
  // This gives a better picture of the interference to split around.
  assert(Stage < RS_Spill && "Cannot allocate after spilling");

  unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
  if (PhysReg || !NewVRegs.empty())
  // Finally spill VirtReg itself.
  NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
  SmallVector<LiveInterval*, 1> pendingSpills;
  spiller().spill(&VirtReg, NewVRegs, pendingSpills);

  // The live virtual register requesting allocation was spilled, so tell
  // the caller not to allocate anything during this round.
  return 0;
}

bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
  DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
               << "********** Function: "
               << ((Value*)mf.getFunction())->getName() << '\n');

  MF = &mf;
    MF->verify(this, "Before greedy register allocator");
  RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>());
  Indexes = &getAnalysis<SlotIndexes>();
  DomTree = &getAnalysis<MachineDominatorTree>();
  ReservedRegs = TRI->getReservedRegs(*MF);
  SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
  Loops = &getAnalysis<MachineLoopInfo>();
  LoopRanges = &getAnalysis<MachineLoopRanges>();
  Bundles = &getAnalysis<EdgeBundles>();
  SpillPlacer = &getAnalysis<SpillPlacement>();

  SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
  SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree));
  allocatePhysRegs();
  addMBBLiveIns(MF);

  // Run rewriter
  {
    NamedRegionTimer T("Rewriter", TimerGroupName, TimePassesIsEnabled);

  // The pass output is in VirtRegMap. Release all the transient data.
  releaseMemory();

  return true;
}