Skip to content
JumpThreading.cpp 61.6 KiB
Newer Older
    if (DPI->second < Popularity)
      ; // ignore.
    else if (DPI->second == Popularity) {
      // If it is the same as what we've seen so far, keep track of it.
      SamePopularity.push_back(DPI->first);
    } else {
      // If it is more popular, remember it.
      SamePopularity.clear();
      MostPopularDest = DPI->first;
      Popularity = DPI->second;
  // Okay, now we know the most popular destination.  If there is more than one
  // destination, we need to determine one.  This is arbitrary, but we need
  // to make a deterministic decision.  Pick the first one that appears in the
  // successor list.
  if (!SamePopularity.empty()) {
    SamePopularity.push_back(MostPopularDest);
    TerminatorInst *TI = BB->getTerminator();
    for (unsigned i = 0; ; ++i) {
      assert(i != TI->getNumSuccessors() && "Didn't find any successor!");
      if (std::find(SamePopularity.begin(), SamePopularity.end(),
                    TI->getSuccessor(i)) == SamePopularity.end())
        continue;
      MostPopularDest = TI->getSuccessor(i);
  // Okay, we have finally picked the most popular destination.
  return MostPopularDest;
}

bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
                                           ConstantPreference Preference) {
  // If threading this would thread across a loop header, don't even try to
  // thread the edge.
  if (LoopHeaders.count(BB))
  PredValueInfoTy PredValues;
  if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference))
  assert(!PredValues.empty() &&
         "ComputeValueKnownInPredecessors returned true with no values");

David Greene's avatar
David Greene committed
  DEBUG(dbgs() << "IN BB: " << *BB;
        for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
          dbgs() << "  BB '" << BB->getName() << "': FOUND condition = "
            << *PredValues[i].first
            << " for pred '" << PredValues[i].second->getName() << "'.\n";
  // Decide what we want to thread through.  Convert our list of known values to
  // a list of known destinations for each pred.  This also discards duplicate
  // predecessors and keeps track of the undefined inputs (which are represented
  // as a null dest in the PredToDestList).
  SmallPtrSet<BasicBlock*, 16> SeenPreds;
  SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList;
  BasicBlock *OnlyDest = 0;
  BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
  for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
    BasicBlock *Pred = PredValues[i].second;
    if (!SeenPreds.insert(Pred))
      continue;  // Duplicate predecessor entry.
    // If the predecessor ends with an indirect goto, we can't change its
    // destination.
    if (isa<IndirectBrInst>(Pred->getTerminator()))
    Constant *Val = PredValues[i].first;
    if (isa<UndefValue>(Val))
      DestBB = 0;
    else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
      DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
    else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
      DestBB = SI->getSuccessor(SI->findCaseValue(cast<ConstantInt>(Val)));
    else {
      assert(isa<IndirectBrInst>(BB->getTerminator())
              && "Unexpected terminator");
      DestBB = cast<BlockAddress>(Val)->getBasicBlock();

    // If we have exactly one destination, remember it for efficiency below.
      OnlyDest = DestBB;
    else if (OnlyDest != DestBB)
      OnlyDest = MultipleDestSentinel;
    PredToDestList.push_back(std::make_pair(Pred, DestBB));
  // If all edges were unthreadable, we fail.
  if (PredToDestList.empty())
  // Determine which is the most common successor.  If we have many inputs and
  // this block is a switch, we want to start by threading the batch that goes
  // to the most popular destination first.  If we only know about one
  // threadable destination (the common case) we can avoid this.
  BasicBlock *MostPopularDest = OnlyDest;
  if (MostPopularDest == MultipleDestSentinel)
    MostPopularDest = FindMostPopularDest(BB, PredToDestList);
  // Now that we know what the most popular destination is, factor all
  // predecessors that will jump to it into a single predecessor.
  SmallVector<BasicBlock*, 16> PredsToFactor;
  for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
    if (PredToDestList[i].second == MostPopularDest) {
      BasicBlock *Pred = PredToDestList[i].first;
      // This predecessor may be a switch or something else that has multiple
      // edges to the block.  Factor each of these edges by listing them
      // according to # occurrences in PredsToFactor.
      TerminatorInst *PredTI = Pred->getTerminator();
      for (unsigned i = 0, e = PredTI->getNumSuccessors(); i != e; ++i)
        if (PredTI->getSuccessor(i) == BB)
          PredsToFactor.push_back(Pred);
    }

  // If the threadable edges are branching on an undefined value, we get to pick
  // the destination that these predecessors should get to.
  if (MostPopularDest == 0)
    MostPopularDest = BB->getTerminator()->
                            getSuccessor(GetBestDestForJumpOnUndef(BB));
  // Ok, try to thread it!
  return ThreadEdge(BB, PredsToFactor, MostPopularDest);
}

/// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on
/// a PHI node in the current block.  See if there are any simplifications we
/// can do based on inputs to the phi node.
bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) {
  BasicBlock *BB = PN->getParent();
  // TODO: We could make use of this to do it once for blocks with common PHI
  // values.
  SmallVector<BasicBlock*, 1> PredBBs;
  PredBBs.resize(1);
  // If any of the predecessor blocks end in an unconditional branch, we can
  // *duplicate* the conditional branch into that block in order to further
  // encourage jump threading and to eliminate cases where we have branch on a
  // phi of an icmp (branch on icmp is much better).
  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
    BasicBlock *PredBB = PN->getIncomingBlock(i);
    if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
      if (PredBr->isUnconditional()) {
        PredBBs[0] = PredBB;
        // Try to duplicate BB into PredBB.
        if (DuplicateCondBranchOnPHIIntoPred(BB, PredBBs))
          return true;
      }
/// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on
/// a xor instruction in the current block.  See if there are any
/// simplifications we can do based on inputs to the xor.
bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
  BasicBlock *BB = BO->getParent();
  // If either the LHS or RHS of the xor is a constant, don't do this
  // optimization.
  if (isa<ConstantInt>(BO->getOperand(0)) ||
      isa<ConstantInt>(BO->getOperand(1)))
    return false;
  // If the first instruction in BB isn't a phi, we won't be able to infer
  // anything special about any particular predecessor.
  if (!isa<PHINode>(BB->front()))
    return false;
  // If we have a xor as the branch input to this block, and we know that the
  // LHS or RHS of the xor in any predecessor is true/false, then we can clone
  // the condition into the predecessor and fix that value to true, saving some
  // logical ops on that path and encouraging other paths to simplify.
  //
  // This copies something like this:
  //
  //  BB:
  //    %X = phi i1 [1],  [%X']
  //    %Y = icmp eq i32 %A, %B
  //    %Z = xor i1 %X, %Y
  //    br i1 %Z, ...
  //
  // Into:
  //  BB':
  //    %Y = icmp ne i32 %A, %B
  //    br i1 %Z, ...

  PredValueInfoTy XorOpValues;
  if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
                                       WantInteger)) {
    if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
                                         WantInteger))
  assert(!XorOpValues.empty() &&
         "ComputeValueKnownInPredecessors returned true with no values");

  // Scan the information to see which is most popular: true or false.  The
  // predecessors can be of the set true, false, or undef.
  unsigned NumTrue = 0, NumFalse = 0;
  for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
    if (isa<UndefValue>(XorOpValues[i].first))
      // Ignore undefs for the count.
      continue;
    if (cast<ConstantInt>(XorOpValues[i].first)->isZero())
  // Determine which value to split on, true, false, or undef if neither.
  ConstantInt *SplitVal = 0;
  if (NumTrue > NumFalse)
    SplitVal = ConstantInt::getTrue(BB->getContext());
  else if (NumTrue != 0 || NumFalse != 0)
    SplitVal = ConstantInt::getFalse(BB->getContext());
  // Collect all of the blocks that this can be folded into so that we can
  // factor this once and clone it once.
  SmallVector<BasicBlock*, 8> BlocksToFoldInto;
  for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
    if (XorOpValues[i].first != SplitVal &&
        !isa<UndefValue>(XorOpValues[i].first))
      continue;

    BlocksToFoldInto.push_back(XorOpValues[i].second);
  }
  // If we inferred a value for all of the predecessors, then duplication won't
  // help us.  However, we can just replace the LHS or RHS with the constant.
  if (BlocksToFoldInto.size() ==
      cast<PHINode>(BB->front()).getNumIncomingValues()) {
    if (SplitVal == 0) {
      // If all preds provide undef, just nuke the xor, because it is undef too.
      BO->replaceAllUsesWith(UndefValue::get(BO->getType()));
      BO->eraseFromParent();
    } else if (SplitVal->isZero()) {
      // If all preds provide 0, replace the xor with the other input.
      BO->replaceAllUsesWith(BO->getOperand(isLHS));
      BO->eraseFromParent();
    } else {
      // If all preds provide 1, set the computed value to 1.
      BO->setOperand(!isLHS, SplitVal);
    }
  // Try to duplicate BB into PredBB.
Chris Lattner's avatar
Chris Lattner committed
  return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
/// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
/// predecessor to the PHIBB block.  If it has PHI nodes, add entries for
/// NewPred using the entries from OldPred (suitably mapped).
static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
                                            BasicBlock *OldPred,
                                            BasicBlock *NewPred,
                                     DenseMap<Instruction*, Value*> &ValueMap) {
  for (BasicBlock::iterator PNI = PHIBB->begin();
       PHINode *PN = dyn_cast<PHINode>(PNI); ++PNI) {
    // Ok, we have a PHI node.  Figure out what the incoming value was for the
    // DestBlock.
    Value *IV = PN->getIncomingValueForBlock(OldPred);
    // Remap the value if necessary.
    if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
      DenseMap<Instruction*, Value*>::iterator I = ValueMap.find(Inst);
      if (I != ValueMap.end())
        IV = I->second;
    PN->addIncoming(IV, NewPred);
/// ThreadEdge - We have decided that it is safe and profitable to factor the
/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
/// across BB.  Transform the IR to reflect this change.
bool JumpThreading::ThreadEdge(BasicBlock *BB,
                               const SmallVectorImpl<BasicBlock*> &PredBBs,
  // If threading to the same block as we come from, we would infinite loop.
  if (SuccBB == BB) {
David Greene's avatar
David Greene committed
    DEBUG(dbgs() << "  Not threading across BB '" << BB->getName()
          << "' - would thread to self!\n");
  // If threading this would thread across a loop header, don't thread the edge.
  // See the comments above FindLoopHeaders for justifications and caveats.
  if (LoopHeaders.count(BB)) {
David Greene's avatar
David Greene committed
    DEBUG(dbgs() << "  Not threading across loop header BB '" << BB->getName()
          << "' to dest BB '" << SuccBB->getName()
          << "' - it might create an irreducible loop!\n");
  unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
  if (JumpThreadCost > Threshold) {
David Greene's avatar
David Greene committed
    DEBUG(dbgs() << "  Not threading BB '" << BB->getName()
          << "' - Cost is too high: " << JumpThreadCost << "\n");
    return false;
  }
  // And finally, do it!  Start by factoring the predecessors is needed.
  BasicBlock *PredBB;
  if (PredBBs.size() == 1)
    PredBB = PredBBs[0];
  else {
David Greene's avatar
David Greene committed
    DEBUG(dbgs() << "  Factoring out " << PredBBs.size()
          << " common predecessors.\n");
    PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
                                    ".thr_comm", this);
  }
David Greene's avatar
David Greene committed
  DEBUG(dbgs() << "  Threading edge from '" << PredBB->getName() << "' to '"
        << SuccBB->getName() << "' with cost: " << JumpThreadCost
        << ", across block:\n    "
        << *BB << "\n");
  LVI->threadEdge(PredBB, BB, SuccBB);
  // We are going to have to map operands from the original BB block to the new
  // copy of the block 'NewBB'.  If there are PHI nodes in BB, evaluate them to
  // account for entry from PredBB.
  DenseMap<Instruction*, Value*> ValueMapping;

  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
                                         BB->getName()+".thread",
                                         BB->getParent(), BB);
  BasicBlock::iterator BI = BB->begin();
  for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
    ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
  // Clone the non-phi instructions of BB into NewBB, keeping track of the
  // mapping and using it to remap operands in the cloned instructions.
  for (; !isa<TerminatorInst>(BI); ++BI) {
    NewBB->getInstList().push_back(New);
    ValueMapping[BI] = New;
    // Remap operands to patch up intra-block references.
    for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
      if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
        DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
        if (I != ValueMapping.end())
          New->setOperand(i, I->second);
      }
  // We didn't copy the terminator from BB over to NewBB, because there is now
  // an unconditional jump to SuccBB.  Insert the unconditional jump.
  BranchInst *NewBI =BranchInst::Create(SuccBB, NewBB);
  NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
  // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
  // PHI nodes for NewBB now.
  AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
  // If there were values defined in BB that are used outside the block, then we
  // now have to update all uses of the value to use either the original value,
  // the cloned value, or some PHI derived value.  This can require arbitrary
  // PHI insertion, of which we are prepared to do, clean these up now.
  SSAUpdater SSAUpdate;
  SmallVector<Use*, 16> UsesToRename;
  for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
    // Scan all uses of this instruction to see if it is used outside of its
    // block, and if so, record them in UsesToRename.
    for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
         ++UI) {
      Instruction *User = cast<Instruction>(*UI);
      if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
        if (UserPN->getIncomingBlock(UI) == BB)
          continue;
      } else if (User->getParent() == BB)
        continue;
      UsesToRename.push_back(&UI.getUse());
    }
    // If there are no uses outside the block, we're done with this instruction.
    if (UsesToRename.empty())
      continue;
David Greene's avatar
David Greene committed
    DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");

    // We found a use of I outside of BB.  Rename all uses of I that are outside
    // its block to be uses of the appropriate PHI node etc.  See ValuesInBlocks
    // with the two values we know.
    SSAUpdate.Initialize(I->getType(), I->getName());
    SSAUpdate.AddAvailableValue(BB, I);
    SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]);
    while (!UsesToRename.empty())
      SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
David Greene's avatar
David Greene committed
    DEBUG(dbgs() << "\n");
  // Ok, NewBB is good to go.  Update the terminator of PredBB to jump to
  // NewBB instead of BB.  This eliminates predecessors from BB, which requires
  // us to simplify any PHI nodes in BB.
  TerminatorInst *PredTerm = PredBB->getTerminator();
  for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
    if (PredTerm->getSuccessor(i) == BB) {
      PredTerm->setSuccessor(i, NewBB);
    }
  // At this point, the IR is fully up to date and consistent.  Do a quick scan
  // over the new instructions and zap any that are constants or dead.  This
  // frequently happens because of phi translation.
  SimplifyInstructionsInBlock(NewBB, TD);
  // Threaded an edge!
  ++NumThreads;
  return true;

/// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
/// If we can duplicate the contents of BB up into PredBB do so now, this
/// improves the odds that the branch will be on an analyzable instruction like
/// a compare.
bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
                                 const SmallVectorImpl<BasicBlock *> &PredBBs) {
  assert(!PredBBs.empty() && "Can't handle an empty set");

  // If BB is a loop header, then duplicating this block outside the loop would
  // cause us to transform this into an irreducible loop, don't do this.
  // See the comments above FindLoopHeaders for justifications and caveats.
  if (LoopHeaders.count(BB)) {
David Greene's avatar
David Greene committed
    DEBUG(dbgs() << "  Not duplicating loop header '" << BB->getName()
          << "' into predecessor block '" << PredBBs[0]->getName()
          << "' - it might create an irreducible loop!\n");
    return false;
  }
  unsigned DuplicationCost = getJumpThreadDuplicationCost(BB);
  if (DuplicationCost > Threshold) {
David Greene's avatar
David Greene committed
    DEBUG(dbgs() << "  Not duplicating BB '" << BB->getName()
          << "' - Cost is too high: " << DuplicationCost << "\n");
    return false;
  }
  // And finally, do it!  Start by factoring the predecessors is needed.
  BasicBlock *PredBB;
  if (PredBBs.size() == 1)
    PredBB = PredBBs[0];
  else {
    DEBUG(dbgs() << "  Factoring out " << PredBBs.size()
          << " common predecessors.\n");
    PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
                                    ".thr_comm", this);
  }
  // Okay, we decided to do this!  Clone all the instructions in BB onto the end
  // of PredBB.
David Greene's avatar
David Greene committed
  DEBUG(dbgs() << "  Duplicating block '" << BB->getName() << "' into end of '"
        << PredBB->getName() << "' to eliminate branch on phi.  Cost: "
        << DuplicationCost << " block is:" << *BB << "\n");
  // Unless PredBB ends with an unconditional branch, split the edge so that we
  // can just clone the bits from BB into the end of the new PredBB.
  BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
  if (OldPredBranch == 0 || !OldPredBranch->isUnconditional()) {
    PredBB = SplitEdge(PredBB, BB, this);
    OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
  }
  // We are going to have to map operands from the original BB block into the
  // PredBB block.  Evaluate PHI nodes in BB.
  DenseMap<Instruction*, Value*> ValueMapping;
  BasicBlock::iterator BI = BB->begin();
  for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
    ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
  // Clone the non-phi instructions of BB into PredBB, keeping track of the
  // mapping and using it to remap operands in the cloned instructions.
  for (; BI != BB->end(); ++BI) {
    Instruction *New = BI->clone();
    // Remap operands to patch up intra-block references.
    for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
      if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
        DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
        if (I != ValueMapping.end())
          New->setOperand(i, I->second);
      }

    // If this instruction can be simplified after the operands are updated,
    // just use the simplified value instead.  This frequently happens due to
    // phi translation.
    if (Value *IV = SimplifyInstruction(New, TD)) {
      delete New;
      ValueMapping[BI] = IV;
    } else {
      // Otherwise, insert the new instruction into the block.
      New->setName(BI->getName());
      PredBB->getInstList().insert(OldPredBranch, New);
      ValueMapping[BI] = New;
    }
  // Check to see if the targets of the branch had PHI nodes. If so, we need to
  // add entries to the PHI nodes for branch from PredBB now.
  BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
  AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
                                  ValueMapping);
  AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
                                  ValueMapping);
  // If there were values defined in BB that are used outside the block, then we
  // now have to update all uses of the value to use either the original value,
  // the cloned value, or some PHI derived value.  This can require arbitrary
  // PHI insertion, of which we are prepared to do, clean these up now.
  SSAUpdater SSAUpdate;
  SmallVector<Use*, 16> UsesToRename;
  for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
    // Scan all uses of this instruction to see if it is used outside of its
    // block, and if so, record them in UsesToRename.
    for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
         ++UI) {
      Instruction *User = cast<Instruction>(*UI);
      if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
        if (UserPN->getIncomingBlock(UI) == BB)
          continue;
      } else if (User->getParent() == BB)
        continue;
      UsesToRename.push_back(&UI.getUse());
    }
    // If there are no uses outside the block, we're done with this instruction.
    if (UsesToRename.empty())
      continue;
David Greene's avatar
David Greene committed
    DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
    // We found a use of I outside of BB.  Rename all uses of I that are outside
    // its block to be uses of the appropriate PHI node etc.  See ValuesInBlocks
    // with the two values we know.
    SSAUpdate.Initialize(I->getType(), I->getName());
    SSAUpdate.AddAvailableValue(BB, I);
    SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]);
    while (!UsesToRename.empty())
      SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
David Greene's avatar
David Greene committed
    DEBUG(dbgs() << "\n");
  // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
  // that we nuked.
  // Remove the unconditional branch at the end of the PredBB block.
  OldPredBranch->eraseFromParent();