JumpThreading.cpp

//===- JumpThreading.cpp - Thread control through conditional blocks ------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the Jump Threading pass.
//
//===----------------------------------------------------------------------===//

#define DEBUG_TYPE "jump-threading"
#include "llvm/Transforms/Scalar.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Target/TargetData.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;

STATISTIC(NumThreads, "Number of jumps threaded");
STATISTIC(NumFolds,   "Number of terminators folded");
STATISTIC(NumDupes,   "Number of branch blocks duplicated to eliminate phi");

static cl::opt<unsigned>
Threshold("jump-threading-threshold", 
          cl::desc("Max block size to duplicate for jump threading"),
          cl::init(6), cl::Hidden);

namespace {
  /// This pass performs 'jump threading', which looks at blocks that have
  /// multiple predecessors and multiple successors.  If one or more of the
  /// predecessors of the block can be proven to always jump to one of the
  /// successors, we forward the edge from the predecessor to the successor by
  /// duplicating the contents of this block.
  ///
  /// An example of when this can occur is code like this:
  ///
  ///   if () { ...
  ///     X = 4;
  ///   }
  ///   if (X < 3) {
  ///
  /// In this case, the unconditional branch at the end of the first if can be
  /// revectored to the false side of the second if.
  ///
  class JumpThreading : public FunctionPass {
    TargetData *TD;
#ifdef NDEBUG
    SmallPtrSet<BasicBlock*, 16> LoopHeaders;
#else
    SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders;
#endif
  public:
    static char ID; // Pass identification
    JumpThreading() : FunctionPass(&ID) {}

    bool runOnFunction(Function &F);
    void FindLoopHeaders(Function &F);
    
    bool ProcessBlock(BasicBlock *BB);
    bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
                    BasicBlock *SuccBB);
    bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
                                          BasicBlock *PredBB);
    
    typedef SmallVectorImpl<std::pair<ConstantInt*,
                                      BasicBlock*> > PredValueInfo;
    
    bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,
                                         PredValueInfo &Result);
    bool ProcessThreadableEdges(Instruction *CondInst, BasicBlock *BB);
    
    
    bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
    bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);

    bool ProcessJumpOnPHI(PHINode *PN);
    
    bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
  };
}

char JumpThreading::ID = 0;
static RegisterPass<JumpThreading>
X("jump-threading", "Jump Threading");

// Public interface to the Jump Threading pass
FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }

/// runOnFunction - Top level algorithm.
///
bool JumpThreading::runOnFunction(Function &F) {
  DEBUG(errs() << "Jump threading on function '" << F.getName() << "'\n");
  TD = getAnalysisIfAvailable<TargetData>();
  
  FindLoopHeaders(F);
  
  bool AnotherIteration = true, EverChanged = false;
  while (AnotherIteration) {
    AnotherIteration = false;
    bool Changed = false;
    for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
      BasicBlock *BB = I;
      // Thread all of the branches we can over this block. 
      while (ProcessBlock(BB))
        Changed = true;
      
      ++I;
      
      // If the block is trivially dead, zap it.  This eliminates the successor
      // edges which simplifies the CFG.
      if (pred_begin(BB) == pred_end(BB) &&
          BB != &BB->getParent()->getEntryBlock()) {
        DEBUG(errs() << "  JT: Deleting dead block '" << BB->getName()
              << "' with terminator: " << *BB->getTerminator() << '\n');
        LoopHeaders.erase(BB);
        DeleteDeadBlock(BB);
        Changed = true;
      } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
        // Can't thread an unconditional jump, but if the block is "almost
        // empty", we can replace uses of it with uses of the successor and make
        // this dead.
        if (BI->isUnconditional() && 
            BB != &BB->getParent()->getEntryBlock()) {
          BasicBlock::iterator BBI = BB->getFirstNonPHI();
          // Ignore dbg intrinsics.
          while (isa<DbgInfoIntrinsic>(BBI))
            ++BBI;
          // If the terminator is the only non-phi instruction, try to nuke it.
          if (BBI->isTerminator()) {
            // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
            // block, we have to make sure it isn't in the LoopHeaders set.  We
            // reinsert afterward in the rare case when the block isn't deleted.
            bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
            
            if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
              Changed = true;
            else if (ErasedFromLoopHeaders)
              LoopHeaders.insert(BB);
          }
        }
      }
    }
    AnotherIteration = Changed;
    EverChanged |= Changed;
  }
  
  LoopHeaders.clear();
  return EverChanged;
}

/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to
/// thread across it.
static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
  /// Ignore PHI nodes, these will be flattened when duplication happens.
  BasicBlock::const_iterator I = BB->getFirstNonPHI();
  
  // Sum up the cost of each instruction until we get to the terminator.  Don't
  // include the terminator because the copy won't include it.
  unsigned Size = 0;
  for (; !isa<TerminatorInst>(I); ++I) {
    // Debugger intrinsics don't incur code size.
    if (isa<DbgInfoIntrinsic>(I)) continue;
    
    // If this is a pointer->pointer bitcast, it is free.
    if (isa<BitCastInst>(I) && isa<PointerType>(I->getType()))
      continue;
    
    // All other instructions count for at least one unit.
    ++Size;
    
    // Calls are more expensive.  If they are non-intrinsic calls, we model them
    // as having cost of 4.  If they are a non-vector intrinsic, we model them
    // as having cost of 2 total, and if they are a vector intrinsic, we model
    // them as having cost 1.
    if (const CallInst *CI = dyn_cast<CallInst>(I)) {
      if (!isa<IntrinsicInst>(CI))
        Size += 3;
      else if (!isa<VectorType>(CI->getType()))
        Size += 1;
    }
  }
  
  // Threading through a switch statement is particularly profitable.  If this
  // block ends in a switch, decrease its cost to make it more likely to happen.
  if (isa<SwitchInst>(I))