Skip to content
MachineLICM.cpp 53.4 KiB
Newer Older
/// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
/// and an use in the current loop, return true if the target considered
/// it 'high'.
bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
                                        unsigned DefIdx, unsigned Reg) const {
  if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg))

  for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
         E = MRI->use_nodbg_end(); I != E; ++I) {
    MachineInstr *UseMI = &*I;
    if (!CurLoop->contains(UseMI->getParent()))
      continue;
    for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
      const MachineOperand &MO = UseMI->getOperand(i);
      if (!MO.isReg() || !MO.isUse())
        continue;
      unsigned MOReg = MO.getReg();
      if (MOReg != Reg)
        continue;

      if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i))
        return true;
    // Only look at the first in loop use.
    break;
/// IsCheapInstruction - Return true if the instruction is marked "cheap" or
/// the operand latency between its def and a use is one or less.
bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
  if (MI.isAsCheapAsAMove() || MI.isCopyLike())
    return true;
  if (!InstrItins || InstrItins->isEmpty())
    return false;

  bool isCheap = false;
  unsigned NumDefs = MI.getDesc().getNumDefs();
  for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
    MachineOperand &DefMO = MI.getOperand(i);
    if (!DefMO.isReg() || !DefMO.isDef())
      continue;
    --NumDefs;
    unsigned Reg = DefMO.getReg();
    if (TargetRegisterInfo::isPhysicalRegister(Reg))
      continue;

    if (!TII->hasLowDefLatency(InstrItins, &MI, i))
      return false;
    isCheap = true;
  }

  return isCheap;
}

/// CanCauseHighRegPressure - Visit BBs from header to current BB, check
/// if hoisting an instruction of the given cost matrix can cause high
/// register pressure.
bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost,
                                          bool CheapInstr) {
  for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
       CI != CE; ++CI) {
Andrew Trick's avatar
Andrew Trick committed
    if (CI->second <= 0)
      continue;

    unsigned RCId = CI->first;
    unsigned Limit = RegLimit[RCId];
    int Cost = CI->second;

    // Don't hoist cheap instructions if they would increase register pressure,
    // even if we're under the limit.
    if (CheapInstr)
      return true;

    for (unsigned i = BackTrace.size(); i != 0; --i) {
      SmallVector<unsigned, 8> &RP = BackTrace[i-1];
/// UpdateBackTraceRegPressure - Traverse the back trace from header to the
/// current block and update their register pressures to reflect the effect
/// of hoisting MI from the current block to the preheader.
void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
  if (MI->isImplicitDef())
    return;

  // First compute the 'cost' of the instruction, i.e. its contribution
  // to register pressure.
  DenseMap<unsigned, int> Cost;
  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
    const MachineOperand &MO = MI->getOperand(i);
    if (!MO.isReg() || MO.isImplicit())
      continue;
    unsigned Reg = MO.getReg();
    if (!TargetRegisterInfo::isVirtualRegister(Reg))
    unsigned RCId, RCCost;
    getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
    if (MO.isDef()) {
      DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
      if (CI != Cost.end())
        CI->second += RCCost;
      else
        Cost.insert(std::make_pair(RCId, RCCost));
    } else if (isOperandKill(MO, MRI)) {
      DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
      if (CI != Cost.end())
        CI->second -= RCCost;
      else
        Cost.insert(std::make_pair(RCId, -RCCost));
    }
  }

  // Update register pressure of blocks from loop header to current block.
  for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) {
    SmallVector<unsigned, 8> &RP = BackTrace[i];
    for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
         CI != CE; ++CI) {
      unsigned RCId = CI->first;
      RP[RCId] += CI->second;
    }
  }
}

/// IsProfitableToHoist - Return true if it is potentially profitable to hoist
/// the given loop invariant.
bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
  // Besides removing computation from the loop, hoisting an instruction has
  // these effects:
  //
  // - The value defined by the instruction becomes live across the entire
  //   loop. This increases register pressure in the loop.
  //
  // - If the value is used by a PHI in the loop, a copy will be required for
  //   lowering the PHI after extending the live range.
  //
  // - When hoisting the last use of a value in the loop, that value no longer
  //   needs to be live in the loop. This lowers register pressure in the loop.

  bool CheapInstr = IsCheapInstruction(MI);
  bool CreatesCopy = HasLoopPHIUse(&MI);

  // Don't hoist a cheap instruction if it would create a copy in the loop.
  if (CheapInstr && CreatesCopy) {
    DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
    return false;
  }
  // Rematerializable instructions should always be hoisted since the register
  // allocator can just pull them down again when needed.
  if (TII->isTriviallyReMaterializable(&MI, AA))
    return true;

  // Estimate register pressure to determine whether to LICM the instruction.
  // In low register pressure situation, we can be more aggressive about
  // hoisting. Also, favors hoisting long latency instructions even in
  // moderately high pressure situation.
  // Cheap instructions will only be hoisted if they don't increase register
  // pressure at all.
  // FIXME: If there are long latency loop-invariant instructions inside the
  // loop at this point, why didn't the optimizer's LICM hoist them?
  DenseMap<unsigned, int> Cost;
  for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
    const MachineOperand &MO = MI.getOperand(i);
    if (!MO.isReg() || MO.isImplicit())
      continue;
    unsigned Reg = MO.getReg();
    if (!TargetRegisterInfo::isVirtualRegister(Reg))
      continue;
    unsigned RCId, RCCost;
    getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost);
    if (MO.isDef()) {
      if (HasHighOperandLatency(MI, i, Reg)) {
        DEBUG(dbgs() << "Hoist High Latency: " << MI);
        ++NumHighLatency;
        return true;
      Cost[RCId] += RCCost;
    } else if (isOperandKill(MO, MRI)) {
      // Is a virtual register use is a kill, hoisting it out of the loop
      // may actually reduce register pressure or be register pressure
      // neutral.
      Cost[RCId] -= RCCost;
  // Visit BBs from header to current BB, if hoisting this doesn't cause
  // high register pressure, then it's safe to proceed.
  if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
    DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);
    ++NumLowRP;
    return true;
  }
  // Don't risk increasing register pressure if it would create copies.
  if (CreatesCopy) {
    DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
    return false;
  }
  // Do not "speculate" in high register pressure situation. If an
  // instruction is not guaranteed to be executed in the loop, it's best to be
  // conservative.
  if (AvoidSpeculation &&
      (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) {
    DEBUG(dbgs() << "Won't speculate: " << MI);
    return false;
  // High register pressure situation, only hoist if the instruction is going
  // to be remat'ed.
  if (!TII->isTriviallyReMaterializable(&MI, AA) &&
      !MI.isInvariantLoad(AA)) {
    DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
  // If not, we may be able to unfold a load and hoist that.
  // First test whether the instruction is loading from an amenable
  // memory location.
  if (!MI->isInvariantLoad(AA))
  // Next determine the register class for a temporary register.
  unsigned NewOpc =
    TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
                                    /*UnfoldLoad=*/true,
                                    /*UnfoldStore=*/false,
                                    &LoadRegIndex);
  const MCInstrDesc &MID = TII->get(NewOpc);
  if (MID.getNumDefs() != 1) return 0;
  MachineFunction &MF = *MI->getParent()->getParent();
  const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
  // Ok, we're unfolding. Create a temporary register and do the unfold.
  unsigned Reg = MRI->createVirtualRegister(RC);
  SmallVector<MachineInstr *, 2> NewMIs;
  bool Success =
    TII->unfoldMemoryOperand(MF, MI, Reg,
                             /*UnfoldLoad=*/true, /*UnfoldStore=*/false,
                             NewMIs);
  (void)Success;
  assert(Success &&
         "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
         "succeeded!");
  assert(NewMIs.size() == 2 &&
         "Unfolded a load into multiple instructions!");
  MachineBasicBlock *MBB = MI->getParent();
  MachineBasicBlock::iterator Pos = MI;
  MBB->insert(Pos, NewMIs[0]);
  MBB->insert(Pos, NewMIs[1]);
  // If unfolding produced a load that wasn't loop-invariant or profitable to
  // hoist, discard the new instructions and bail.
  if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
    NewMIs[0]->eraseFromParent();
    NewMIs[1]->eraseFromParent();
    return 0;
  }

  // Update register pressure for the unfolded instruction.
  UpdateRegPressure(NewMIs[1]);

  // Otherwise we successfully unfolded a load that we can hoist.
  MI->eraseFromParent();
  return NewMIs[0];
}

void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
  for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
    const MachineInstr *MI = &*I;
    unsigned Opcode = MI->getOpcode();
    DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
      CI = CSEMap.find(Opcode);
    if (CI != CSEMap.end())
      CI->second.push_back(MI);
    else {
      std::vector<const MachineInstr*> CSEMIs;
      CSEMIs.push_back(MI);
      CSEMap.insert(std::make_pair(Opcode, CSEMIs));
const MachineInstr*
MachineLICM::LookForDuplicate(const MachineInstr *MI,
                              std::vector<const MachineInstr*> &PrevMIs) {
Evan Cheng's avatar
Evan Cheng committed
  for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
    const MachineInstr *PrevMI = PrevMIs[i];
    if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0)))
Evan Cheng's avatar
Evan Cheng committed
      return PrevMI;
  }
  return 0;
}

bool MachineLICM::EliminateCSE(MachineInstr *MI,
          DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
  // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
  // the undef property onto uses.
  if (CI == CSEMap.end() || MI->isImplicitDef())
    return false;

  if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
David Greene's avatar
 
David Greene committed
    DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);

    // Replace virtual registers defined by MI by their counterparts defined
    // by Dup.
    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
      const MachineOperand &MO = MI->getOperand(i);

      // Physical registers may not differ here.
      assert((!MO.isReg() || MO.getReg() == 0 ||
              !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
              MO.getReg() == Dup->getOperand(i).getReg()) &&
             "Instructions with different phys regs are not identical!");

      if (MO.isReg() && MO.isDef() &&
          !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
        Defs.push_back(i);
    }

    SmallVector<const TargetRegisterClass*, 2> OrigRCs;
    for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
      unsigned Idx = Defs[i];
      unsigned Reg = MI->getOperand(Idx).getReg();
      unsigned DupReg = Dup->getOperand(Idx).getReg();
      OrigRCs.push_back(MRI->getRegClass(DupReg));

      if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
        // Restore old RCs if more than one defs.
        for (unsigned j = 0; j != i; ++j)
          MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
        return false;
Evan Cheng's avatar
Evan Cheng committed
    }

    for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
      unsigned Idx = Defs[i];
      unsigned Reg = MI->getOperand(Idx).getReg();
      unsigned DupReg = Dup->getOperand(Idx).getReg();
      MRI->replaceRegWith(Reg, DupReg);
      MRI->clearKillFlags(DupReg);
    }

Evan Cheng's avatar
Evan Cheng committed
  }
  return false;
}

Evan Cheng's avatar
Evan Cheng committed
/// MayCSE - Return true if the given instruction will be CSE'd if it's
/// hoisted out of the loop.
bool MachineLICM::MayCSE(MachineInstr *MI) {
  unsigned Opcode = MI->getOpcode();
  DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
    CI = CSEMap.find(Opcode);
  // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
  // the undef property onto uses.
  if (CI == CSEMap.end() || MI->isImplicitDef())
    return false;

  return LookForDuplicate(MI, CI->second) != 0;
}

/// Hoist - When an instruction is found to use only loop invariant operands
/// that are safe to hoist, this instruction is called to do the dirty work.
bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
  // First check whether we should hoist this instruction.
  if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
    // If not, try unfolding a hoistable load.
    MI = ExtractHoistableLoad(MI);
    if (!MI) return false;
  // Now move the instructions to the predecessor, inserting it before any
  // terminator instructions.
  DEBUG({
David Greene's avatar
 
David Greene committed
      dbgs() << "Hoisting " << *MI;
      if (Preheader->getBasicBlock())
David Greene's avatar
 
David Greene committed
        dbgs() << " to MachineBasicBlock "
      if (MI->getParent()->getBasicBlock())
David Greene's avatar
 
David Greene committed
        dbgs() << " from MachineBasicBlock "
David Greene's avatar
 
David Greene committed
      dbgs() << "\n";
  // If this is the first instruction being hoisted to the preheader,
  // initialize the CSE map with potential common expressions.
  // Look for opportunity to CSE the hoisted instruction.
  unsigned Opcode = MI->getOpcode();
  DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
    CI = CSEMap.find(Opcode);
Evan Cheng's avatar
Evan Cheng committed
  if (!EliminateCSE(MI, CI)) {
    // Otherwise, splice the instruction to the preheader.
    Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
    // Update register pressure for BBs from header to this block.
    UpdateBackTraceRegPressure(MI);

    // Clear the kill flags of any register this instruction defines,
    // since they may need to be live throughout the entire loop
    // rather than just live for part of it.
    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
      MachineOperand &MO = MI->getOperand(i);
      if (MO.isReg() && MO.isDef() && !MO.isDead())
    // Add to the CSE map.
    if (CI != CSEMap.end())
      CI->second.push_back(MI);
    else {
      std::vector<const MachineInstr*> CSEMIs;
      CSEMap.insert(std::make_pair(Opcode, CSEMIs));

MachineBasicBlock *MachineLICM::getCurPreheader() {
  // Determine the block to which to hoist instructions. If we can't find a
  // suitable loop predecessor, we can't do any hoisting.

  // If we've tried to get a preheader and failed, don't try again.
  if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
    return 0;

  if (!CurPreheader) {
    CurPreheader = CurLoop->getLoopPreheader();
    if (!CurPreheader) {
      MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
      if (!Pred) {
        CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
        return 0;
      }

      CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this);
      if (!CurPreheader) {
        CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
        return 0;
      }
    }
  }
  return CurPreheader;
}