Skip to content
GVN.cpp 51.8 KiB
Newer Older
      }
      
      ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I));
    DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD: " << *LI);
    
    DenseMap<BasicBlock*, Value*> BlockReplValues;
    BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end());
    // Perform PHI construction.
    Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true);
    LI->replaceAllUsesWith(v);
    toErase.push_back(LI);
    NumGVNLoad++;
    return true;
  
  if (!EnablePRE || !EnableLoadPRE)
    return false;
  // Okay, we have *some* definitions of the value.  This means that the value
  // is available in some of our (transitive) predecessors.  Lets think about
  // doing PRE of this load.  This will involve inserting a new load into the
  // predecessor when it's not available.  We could do this in general, but
  // prefer to not increase code size.  As such, we only do this when we know
  // that we only have to insert *one* load (which means we're basically moving
  // the load, not inserting a new one).
  
  // Everything we do here is based on local predecessors of LI's block.  If it
  // only has one predecessor, bail now.
  BasicBlock *LoadBB = LI->getParent();
  if (LoadBB->getSinglePredecessor())
    return false;
  
  // If we have a repl set with LI itself in it, this means we have a loop where
  // at least one of the values is LI.  Since this means that we won't be able
  // to eliminate LI even if we insert uses in the other predecessors, we will
  // end up increasing code size.  Reject this by scanning for LI.
  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
    if (ValuesPerBlock[i].second == LI)
      return false;
  
  // Okay, we have some hope :).  Check to see if the loaded value is fully
  // available in all but one predecessor.
  // FIXME: If we could restructure the CFG, we could make a common pred with
  // all the preds that don't have an available LI and insert a new load into
  // that one block.
  BasicBlock *UnavailablePred = 0;

  DenseMap<BasicBlock*, bool> FullyAvailableBlocks;
  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
    FullyAvailableBlocks[ValuesPerBlock[i].first] = true;
  for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
    FullyAvailableBlocks[UnavailableBlocks[i]] = false;
  for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
       PI != E; ++PI) {
    if (IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
      continue;
    
    // If this load is not available in multiple predecessors, reject it.
    if (UnavailablePred && UnavailablePred != *PI)
      return false;
    UnavailablePred = *PI;
  }
  
  assert(UnavailablePred != 0 &&
         "Fully available value should be eliminated above!");
  
  // If the loaded pointer is PHI node defined in this block, do PHI translation
  // to get its value in the predecessor.
  Value *LoadPtr = LI->getOperand(0)->DoPHITranslation(LoadBB, UnavailablePred);
  
  // Make sure the value is live in the predecessor.  If it was defined by a
  // non-PHI instruction in this block, we don't know how to recompute it above.
  if (Instruction *LPInst = dyn_cast<Instruction>(LoadPtr))
    if (!DT->dominates(LPInst->getParent(), UnavailablePred)) {
      DEBUG(cerr << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: "
                 << *LPInst << *LI << "\n");
      return false;
    }
  
  // We don't currently handle critical edges :(
  if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) {
    DEBUG(cerr << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '"
                << UnavailablePred->getName() << "': " << *LI);
    return false;
  }

  // Okay, we can eliminate this load by inserting a reload in the predecessor
  // and using PHI construction to get the value in the other predecessors, do
  // it.
  DEBUG(cerr << "GVN REMOVING PRE LOAD: " << *LI);
  
  Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
                                LI->getAlignment(),
                                UnavailablePred->getTerminator());
  
  DenseMap<BasicBlock*, Value*> BlockReplValues;
  BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end());
  BlockReplValues[UnavailablePred] = NewLoad;
  
  // Perform PHI construction.
  Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true);
  LI->replaceAllUsesWith(v);
  toErase.push_back(LI);
  NumPRELoad++;
/// processLoad - Attempt to eliminate a load, first by eliminating it
/// locally, and then attempting non-local elimination if that fails.
bool GVN::processLoad(LoadInst *L, DenseMap<Value*, LoadInst*> &lastLoad,
                      SmallVectorImpl<Instruction*> &toErase) {
  if (L->isVolatile()) {
    lastLoad[L->getPointerOperand()] = L;
    return false;
  }
  
  Value* pointer = L->getPointerOperand();
  LoadInst*& last = lastLoad[pointer];
  
  // ... to a pointer that has been loaded from before...
  MemDepResult dep = MD->getDependency(L);
      L->getParent() != &L->getParent()->getParent()->getEntryBlock()) {
    removedNonLocal = processNonLocalLoad(L, toErase);
    
    if (!removedNonLocal)
      last = L;
    
    return removedNonLocal;
  }
  
  
  // Walk up the dependency chain until we either find
  // a dependency we can use, or we can't walk any further
  while (Instruction *DepInst = dep.getInst()) {
    if (StoreInst* S = dyn_cast<StoreInst>(DepInst)) {
      if (S->getPointerOperand() == pointer) {
        // Remove it!
        L->replaceAllUsesWith(S->getOperand(0));
        toErase.push_back(L);
        deletedLoad = true;
        NumGVNLoad++;
      }
      
      // Whether we removed it or not, we can't
      // go any further
      break;
    } else if (!isa<LoadInst>(DepInst)) {
      // Only want to handle loads below.
      break;
    } else if (!last) {
      // If we don't depend on a store, and we haven't
      // been loaded before, bail.
      break;
      // Remove it!
      L->replaceAllUsesWith(last);
      toErase.push_back(L);
      deletedLoad = true;
      NumGVNLoad++;
      break;
    } else {
      dep = MD->getDependencyFrom(L, DepInst, DepInst->getParent());
  // If this load really doesn't depend on anything, then we must be loading an
  // undef value.  This can happen when loading for a fresh allocation with no
  // intervening stores, for example.
  if (dep.isNone()) {
    // If this load depends directly on an allocation, there isn't
    // anything stored there; therefore, we can optimize this load
    // to undef.
    L->replaceAllUsesWith(UndefValue::get(L->getType()));
    toErase.push_back(L);
    deletedLoad = true;
    NumGVNLoad++;
Value* GVN::lookupNumber(BasicBlock* BB, uint32_t num) {
  DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB);
  if (I == localAvail.end())
    return 0;
  
  ValueNumberScope* locals = I->second;
  
  while (locals) {
    DenseMap<uint32_t, Value*>::iterator I = locals->table.find(num);
    if (I != locals->table.end())
      return I->second;
    else
      locals = locals->parent;
  }
  
  return 0;
}

/// processInstruction - When calculating availability, handle an instruction
/// by inserting it into the appropriate sets
bool GVN::processInstruction(Instruction *I,
                             DenseMap<Value*, LoadInst*> &lastSeenLoad,
                             SmallVectorImpl<Instruction*> &toErase) {
  if (LoadInst* L = dyn_cast<LoadInst>(I)) {
    bool changed = processLoad(L, lastSeenLoad, toErase);
    
    if (!changed) {
      unsigned num = VN.lookup_or_add(L);
      localAvail[I->getParent()]->table.insert(std::make_pair(num, L));
  uint32_t nextNum = VN.getNextUnusedValueNumber();
  // Allocations are always uniquely numbered, so we can save time and memory
  // by fast failing them.
  if (isa<AllocationInst>(I) || isa<TerminatorInst>(I)) {
    localAvail[I->getParent()]->table.insert(std::make_pair(num, I));
  // Collapse PHI nodes
    Value* constVal = CollapsePhi(p);
      for (PhiMapType::iterator PI = phiMap.begin(), PE = phiMap.end();
           PI != PE; ++PI)
        if (PI->second.count(p))
          PI->second.erase(p);
      p->replaceAllUsesWith(constVal);
      toErase.push_back(p);
      localAvail[I->getParent()]->table.insert(std::make_pair(num, I));
  
  // If the number we were assigned was a brand new VN, then we don't
  // need to do a lookup to see if the number already exists
  // somewhere in the domtree: it can't!
  } else if (num == nextNum) {
    localAvail[I->getParent()]->table.insert(std::make_pair(num, I));
    
  // Perform value-number based elimination
  } else if (Value* repl = lookupNumber(I->getParent(), num)) {
    I->replaceAllUsesWith(repl);
    toErase.push_back(I);
    return true;
    localAvail[I->getParent()]->table.insert(std::make_pair(num, I));
  }
  
  return false;
}

// GVN::runOnFunction - This is the main transformation entry point for a
// function.
//
Owen Anderson's avatar
Owen Anderson committed
bool GVN::runOnFunction(Function& F) {
  MD = &getAnalysis<MemoryDependenceAnalysis>();
  DT = &getAnalysis<DominatorTree>();
  VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
  VN.setMemDep(MD);
  VN.setDomTree(DT);
Owen Anderson's avatar
Owen Anderson committed
  bool changed = false;
  bool shouldContinue = true;
  
  // Merge unconditional branches, allowing PRE to catch more
  // optimization opportunities.
  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
    BasicBlock* BB = FI;
    ++FI;
    bool removedBlock = MergeBlockIntoPredecessor(BB, this);
    if (removedBlock) NumGVNBlocks++;
    
    changed |= removedBlock;
Owen Anderson's avatar
Owen Anderson committed
  while (shouldContinue) {
    shouldContinue = iterateOnFunction(F);
    changed |= shouldContinue;
  }
  
    bool PREChanged = true;
    while (PREChanged) {
      PREChanged = performPRE(F);
Owen Anderson's avatar
Owen Anderson committed
  return changed;
}


bool GVN::processBlock(DomTreeNode* DTN) {
  BasicBlock* BB = DTN->getBlock();
  SmallVector<Instruction*, 8> toErase;
  DenseMap<Value*, LoadInst*> lastSeenLoad;
  bool changed_function = false;
    localAvail[BB] =
                  new ValueNumberScope(localAvail[DTN->getIDom()->getBlock()]);
  else
    localAvail[BB] = new ValueNumberScope(0);
  for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
       BI != BE;) {
    changed_function |= processInstruction(BI, lastSeenLoad, toErase);
    if (toErase.empty()) {
      ++BI;
      continue;
    }
    
    // If we need some instructions deleted, do it now.
    NumGVNInstr += toErase.size();
    
    // Avoid iterator invalidation.
    bool AtStart = BI == BB->begin();
    if (!AtStart)
      --BI;

    for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
         E = toErase.end(); I != E; ++I) {
      DEBUG(cerr << "GVN removed: " << **I);
      MD->removeInstruction(*I);
      (*I)->eraseFromParent();

    if (AtStart)
      BI = BB->begin();
    else
      ++BI;
    
    toErase.clear();
  }
  
  return changed_function;
}

/// performPRE - Perform a purely local form of PRE that looks for diamond
/// control flow patterns and attempts to perform simple PRE at the join point.
bool GVN::performPRE(Function& F) {
  SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
  DenseMap<BasicBlock*, Value*> predMap;
  for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
       DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
    BasicBlock* CurrentBlock = *DI;
    
    // Nothing to PRE in the entry block.
    if (CurrentBlock == &F.getEntryBlock()) continue;
    
    for (BasicBlock::iterator BI = CurrentBlock->begin(),
         BE = CurrentBlock->end(); BI != BE; ) {
      Instruction *CurInst = BI++;
      
      if (isa<AllocationInst>(CurInst) || isa<TerminatorInst>(CurInst) ||
          isa<PHINode>(CurInst) || CurInst->mayReadFromMemory() ||
          CurInst->mayWriteToMemory())
      
      // Look for the predecessors for PRE opportunities.  We're
      // only trying to solve the basic diamond case, where
      // a value is computed in the successor and one predecessor,
      // but not the other.  We also explicitly disallow cases
      // where the successor is its own predecessor, because they're
      // more complicated to get right.
      unsigned numWith = 0;
      unsigned numWithout = 0;
      BasicBlock* PREPred = 0;
      for (pred_iterator PI = pred_begin(CurrentBlock),
           PE = pred_end(CurrentBlock); PI != PE; ++PI) {
        // We're not interested in PRE where the block is its
        // own predecessor, on in blocks with predecessors
        // that are not reachable.
        if (*PI == CurrentBlock) {
          break;
        } else if (!localAvail.count(*PI))  {
          numWithout = 2;
          break;
        }
        
        DenseMap<uint32_t, Value*>::iterator predV = 
                                            localAvail[*PI]->table.find(valno);
        if (predV == localAvail[*PI]->table.end()) {
          numWith++;
        }
      }
      
      // Don't do PRE when it might increase code size, i.e. when
      // we would need to insert instructions in more than one pred.
      // We can't do PRE safely on a critical edge, so instead we schedule
      // the edge to be split and perform the PRE the next time we iterate
      // on the function.
      unsigned succNum = 0;
      for (unsigned i = 0, e = PREPred->getTerminator()->getNumSuccessors();
           i != e; ++i)
        if (PREPred->getTerminator()->getSuccessor(i) == CurrentBlock) {
          succNum = i;
          break;
        }
        
      if (isCriticalEdge(PREPred->getTerminator(), succNum)) {
        toSplit.push_back(std::make_pair(PREPred->getTerminator(), succNum));
      // Instantiate the expression the in predecessor that lacked it.
      // Because we are going top-down through the block, all value numbers
      // will be available in the predecessor by the time we need them.  Any
      // that weren't original present will have been instantiated earlier
      // in this loop.
      for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
        Value *Op = PREInstr->getOperand(i);
        if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
          continue;
        
        if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) {
          PREInstr->setOperand(i, V);
        } else {
          success = false;
          break;
      }
      
      // Fail out if we encounter an operand that is not available in
      // the PRE predecessor.  This is typically because of loads which 
      // are not value numbered precisely.
      if (!success) {
        delete PREInstr;
        continue;
      }
      
      PREInstr->insertBefore(PREPred->getTerminator());
      VN.add(PREInstr, valno);
      NumGVNPRE++;
      
      // Update the availability map to include the new instruction.
      localAvail[PREPred]->table.insert(std::make_pair(valno, PREInstr));
      
      // Create a PHI to make the value available in this block.
      PHINode* Phi = PHINode::Create(CurInst->getType(),
                                     CurInst->getName() + ".pre-phi",
                                     CurrentBlock->begin());
      for (pred_iterator PI = pred_begin(CurrentBlock),
           PE = pred_end(CurrentBlock); PI != PE; ++PI)
      localAvail[CurrentBlock]->table[valno] = Phi;
      DEBUG(cerr << "GVN PRE removed: " << *CurInst);
      MD->removeInstruction(CurInst);
      CurInst->eraseFromParent();
      Changed = true;
  for (SmallVector<std::pair<TerminatorInst*, unsigned>, 4>::iterator
       I = toSplit.begin(), E = toSplit.end(); I != E; ++I) {
    SplitCriticalEdge(I->first, I->second, this);
    BasicBlock* NewBlock = I->first->getSuccessor(I->second);
    localAvail[NewBlock] =
             new ValueNumberScope(localAvail[I->first->getParent()]);
  }
// iterateOnFunction - Executes one iteration of GVN
Owen Anderson's avatar
Owen Anderson committed
bool GVN::iterateOnFunction(Function &F) {
  cleanupGlobalSets();
  // Top-down walk of the dominator tree
  for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
       DE = df_end(DT->getRootNode()); DI != DE; ++DI)

void GVN::cleanupGlobalSets() {
  VN.clear();
  phiMap.clear();

  for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
       I = localAvail.begin(), E = localAvail.end(); I != E; ++I)
    delete I->second;
  localAvail.clear();
}