Skip to content
MachineLICM.cpp 48.1 KiB
Newer Older
  // Update register pressure of blocks from loop header to current block.
  for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) {
    SmallVector<unsigned, 8> &RP = BackTrace[i];
    for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
         CI != CE; ++CI) {
      unsigned RCId = CI->first;
      RP[RCId] += CI->second;
    }
  }
}

/// IsProfitableToHoist - Return true if it is potentially profitable to hoist
/// the given loop invariant.
bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
  // If the instruction is cheap, only hoist if it is re-materilizable. LICM
  // will increase register pressure. It's probably not worth it if the
  // instruction is cheap.
  // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
  // these tend to help performance in low register pressure situation. The
  // trade off is it may cause spill in high pressure situation. It will end up
  // adding a store in the loop preheader. But the reload is no more expensive.
  // The side benefit is these loads are frequently CSE'ed.
    if (!TII->isTriviallyReMaterializable(&MI, AA))
    // Estimate register pressure to determine whether to LICM the instruction.
    // In low register pressure situation, we can be more aggressive about 
    // hoisting. Also, favors hoisting long latency instructions even in
    // moderately high pressure situation.
Dan Gohman's avatar
Dan Gohman committed
    // FIXME: If there are long latency loop-invariant instructions inside the
    // loop at this point, why didn't the optimizer's LICM hoist them?
    for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
      const MachineOperand &MO = MI.getOperand(i);
      if (!MO.isReg() || MO.isImplicit())
        continue;
      unsigned Reg = MO.getReg();
      if (!TargetRegisterInfo::isVirtualRegister(Reg))

      unsigned RCId, RCCost;
      getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost);
        if (HasHighOperandLatency(MI, i, Reg)) {
          ++NumHighLatency;
          return true;

        DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
        if (CI != Cost.end())
          CI->second += RCCost;
        else
          Cost.insert(std::make_pair(RCId, RCCost));
      } else if (isOperandKill(MO, MRI)) {
        // Is a virtual register use is a kill, hoisting it out of the loop
        // may actually reduce register pressure or be register pressure
        DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
        if (CI != Cost.end())
          CI->second -= RCCost;
        else
          Cost.insert(std::make_pair(RCId, -RCCost));
    // Visit BBs from header to current BB, if hoisting this doesn't cause
    // high register pressure, then it's safe to proceed.
    if (!CanCauseHighRegPressure(Cost)) {
    // Do not "speculate" in high register pressure situation. If an
    // instruction is not guaranteed to be executed in the loop, it's best to be
    // conservative.
    if (AvoidSpeculation &&
        (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI)))
      return false;

    // High register pressure situation, only hoist if the instruction is going to
    // be remat'ed.
    if (!TII->isTriviallyReMaterializable(&MI, AA) &&
        !MI.isInvariantLoad(AA))
  // If result(s) of this instruction is used by PHIs outside of the loop, then
  // don't hoist it if the instruction because it will introduce an extra copy.
  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
    const MachineOperand &MO = MI.getOperand(i);
    if (!MO.isReg() || !MO.isDef())
      continue;
MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
  // Don't unfold simple loads.
  if (MI->getDesc().canFoldAsLoad())
    return 0;

  // If not, we may be able to unfold a load and hoist that.
  // First test whether the instruction is loading from an amenable
  // memory location.
  if (!MI->isInvariantLoad(AA))
  // Next determine the register class for a temporary register.
  unsigned NewOpc =
    TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
                                    /*UnfoldLoad=*/true,
                                    /*UnfoldStore=*/false,
                                    &LoadRegIndex);
  const MCInstrDesc &MID = TII->get(NewOpc);
  if (MID.getNumDefs() != 1) return 0;
  const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI);
  // Ok, we're unfolding. Create a temporary register and do the unfold.
  unsigned Reg = MRI->createVirtualRegister(RC);

  MachineFunction &MF = *MI->getParent()->getParent();
  SmallVector<MachineInstr *, 2> NewMIs;
  bool Success =
    TII->unfoldMemoryOperand(MF, MI, Reg,
                             /*UnfoldLoad=*/true, /*UnfoldStore=*/false,
                             NewMIs);
  (void)Success;
  assert(Success &&
         "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
         "succeeded!");
  assert(NewMIs.size() == 2 &&
         "Unfolded a load into multiple instructions!");
  MachineBasicBlock *MBB = MI->getParent();
  MBB->insert(MI, NewMIs[0]);
  MBB->insert(MI, NewMIs[1]);
  // If unfolding produced a load that wasn't loop-invariant or profitable to
  // hoist, discard the new instructions and bail.
  if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
    NewMIs[0]->eraseFromParent();
    NewMIs[1]->eraseFromParent();
    return 0;
  }

  // Update register pressure for the unfolded instruction.
  UpdateRegPressure(NewMIs[1]);

  // Otherwise we successfully unfolded a load that we can hoist.
  MI->eraseFromParent();
  return NewMIs[0];
}

void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
  for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
    const MachineInstr *MI = &*I;
    unsigned Opcode = MI->getOpcode();
    DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
      CI = CSEMap.find(Opcode);
    if (CI != CSEMap.end())
      CI->second.push_back(MI);
    else {
      std::vector<const MachineInstr*> CSEMIs;
      CSEMIs.push_back(MI);
      CSEMap.insert(std::make_pair(Opcode, CSEMIs));
const MachineInstr*
MachineLICM::LookForDuplicate(const MachineInstr *MI,
                              std::vector<const MachineInstr*> &PrevMIs) {
Evan Cheng's avatar
Evan Cheng committed
  for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
    const MachineInstr *PrevMI = PrevMIs[i];
    if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0)))
Evan Cheng's avatar
Evan Cheng committed
      return PrevMI;
  }
  return 0;
}

bool MachineLICM::EliminateCSE(MachineInstr *MI,
          DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
  // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
  // the undef property onto uses.
  if (CI == CSEMap.end() || MI->isImplicitDef())
    return false;

  if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
David Greene's avatar
 
David Greene committed
    DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);

    // Replace virtual registers defined by MI by their counterparts defined
    // by Dup.
    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
      const MachineOperand &MO = MI->getOperand(i);

      // Physical registers may not differ here.
      assert((!MO.isReg() || MO.getReg() == 0 ||
              !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
              MO.getReg() == Dup->getOperand(i).getReg()) &&
             "Instructions with different phys regs are not identical!");

      if (MO.isReg() && MO.isDef() &&
          !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
        Defs.push_back(i);
    }

    SmallVector<const TargetRegisterClass*, 2> OrigRCs;
    for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
      unsigned Idx = Defs[i];
      unsigned Reg = MI->getOperand(Idx).getReg();
      unsigned DupReg = Dup->getOperand(Idx).getReg();
      OrigRCs.push_back(MRI->getRegClass(DupReg));

      if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
        // Restore old RCs if more than one defs.
        for (unsigned j = 0; j != i; ++j)
          MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
        return false;
Evan Cheng's avatar
Evan Cheng committed
    }

    for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
      unsigned Idx = Defs[i];
      unsigned Reg = MI->getOperand(Idx).getReg();
      unsigned DupReg = Dup->getOperand(Idx).getReg();
      MRI->replaceRegWith(Reg, DupReg);
      MRI->clearKillFlags(DupReg);
    }

Evan Cheng's avatar
Evan Cheng committed
  }
  return false;
}

Evan Cheng's avatar
Evan Cheng committed
/// MayCSE - Return true if the given instruction will be CSE'd if it's
/// hoisted out of the loop.
bool MachineLICM::MayCSE(MachineInstr *MI) {
  unsigned Opcode = MI->getOpcode();
  DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
    CI = CSEMap.find(Opcode);
  // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
  // the undef property onto uses.
  if (CI == CSEMap.end() || MI->isImplicitDef())
    return false;

  return LookForDuplicate(MI, CI->second) != 0;
}

/// Hoist - When an instruction is found to use only loop invariant operands
/// that are safe to hoist, this instruction is called to do the dirty work.
bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
  // First check whether we should hoist this instruction.
  if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
    // If not, try unfolding a hoistable load.
    MI = ExtractHoistableLoad(MI);
    if (!MI) return false;
  // Now move the instructions to the predecessor, inserting it before any
  // terminator instructions.
  DEBUG({
David Greene's avatar
 
David Greene committed
      dbgs() << "Hoisting " << *MI;
      if (Preheader->getBasicBlock())
David Greene's avatar
 
David Greene committed
        dbgs() << " to MachineBasicBlock "
      if (MI->getParent()->getBasicBlock())
David Greene's avatar
 
David Greene committed
        dbgs() << " from MachineBasicBlock "
David Greene's avatar
 
David Greene committed
      dbgs() << "\n";
  // If this is the first instruction being hoisted to the preheader,
  // initialize the CSE map with potential common expressions.
  // Look for opportunity to CSE the hoisted instruction.
  unsigned Opcode = MI->getOpcode();
  DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
    CI = CSEMap.find(Opcode);
Evan Cheng's avatar
Evan Cheng committed
  if (!EliminateCSE(MI, CI)) {
    // Otherwise, splice the instruction to the preheader.
    Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
    // Update register pressure for BBs from header to this block.
    UpdateBackTraceRegPressure(MI);

    // Clear the kill flags of any register this instruction defines,
    // since they may need to be live throughout the entire loop
    // rather than just live for part of it.
    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
      MachineOperand &MO = MI->getOperand(i);
      if (MO.isReg() && MO.isDef() && !MO.isDead())
    // Add to the CSE map.
    if (CI != CSEMap.end())
      CI->second.push_back(MI);
    else {
      std::vector<const MachineInstr*> CSEMIs;
      CSEMap.insert(std::make_pair(Opcode, CSEMIs));

MachineBasicBlock *MachineLICM::getCurPreheader() {
  // Determine the block to which to hoist instructions. If we can't find a
  // suitable loop predecessor, we can't do any hoisting.

  // If we've tried to get a preheader and failed, don't try again.
  if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
    return 0;

  if (!CurPreheader) {
    CurPreheader = CurLoop->getLoopPreheader();
    if (!CurPreheader) {
      MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
      if (!Pred) {
        CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
        return 0;
      }

      CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this);
      if (!CurPreheader) {
        CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
        return 0;
      }
    }
  }
  return CurPreheader;
}