Skip to content
LoopStrengthReduce.cpp 53.4 KiB
Newer Older
  if (RewriteFactor == 0) {
    // Create a new Phi for this base, and stick it in the loop header.
    NewPHI = new PHINode(ReplacedTy, "iv.", PhiInsertBefore);
    ++NumInserted;
    // Add common base to the new Phi node.
    NewPHI->addIncoming(CommonBaseV, Preheader);

    // Insert the stride into the preheader.
    Value *StrideV = PreheaderRewriter.expandCodeFor(Stride, PreInsertPt,
                                                     ReplacedTy);
    if (!isa<ConstantInt>(StrideV)) ++NumVariable;
    // Emit the increment of the base value before the terminator of the loop
    // latch block, and add it to the Phi node.
    SCEVHandle IncExp = SCEVAddExpr::get(SCEVUnknown::get(NewPHI),
                                         SCEVUnknown::get(StrideV));
    IncV = Rewriter.expandCodeFor(IncExp, LatchBlock->getTerminator(),
                                  ReplacedTy);
    IncV->setName(NewPHI->getName()+".inc");
    NewPHI->addIncoming(IncV, LatchBlock);

    // Remember this in case a later stride is multiple of this.
    IVsByStride[Stride].addIV(Stride, CommonExprs, NewPHI, IncV);
  } else {
    Constant *C = dyn_cast<Constant>(CommonBaseV);
    if (!C ||
        (!C->isNullValue() &&
         !isTargetConstant(SCEVUnknown::get(CommonBaseV), TLI)))
      // We want the common base emitted into the preheader!
      CommonBaseV = new CastInst(CommonBaseV, CommonBaseV->getType(),
                                 "commonbase", PreInsertPt);
  // Sort by the base value, so that all IVs with identical bases are next to
    SCEVHandle Base = UsersToProcess.back().Base;
    DEBUG(std::cerr << "  INSERTING code for BASE = " << *Base << ":\n");
    // Emit the code for Base into the preheader.
    Value *BaseV = PreheaderRewriter.expandCodeFor(Base, PreInsertPt,
                                                   ReplacedTy);
    
    // If BaseV is a constant other than 0, make sure that it gets inserted into
    // the preheader, instead of being forward substituted into the uses.  We do
    // this by forcing a noop cast to be inserted into the preheader in this
    // case.
    if (Constant *C = dyn_cast<Constant>(BaseV))
      if (!C->isNullValue() && !isTargetConstant(Base, TLI)) {
        // We want this constant emitted into the preheader!
        BaseV = new CastInst(BaseV, BaseV->getType(), "preheaderinsert",
                             PreInsertPt);       
      }
    
    // Emit the code to add the immediate offset to the Phi value, just before
    // the instructions that we identified as using this stride and base.
    unsigned ScanPos = 0;
    do {
      BasedUser &User = UsersToProcess.back();
      // If this instruction wants to use the post-incremented value, move it
      // after the post-inc and use its value instead of the PHI.
      Value *RewriteOp = NewPHI;
      if (User.isUseOfPostIncrementedValue) {
        RewriteOp = IncV;
Chris Lattner's avatar
Chris Lattner committed

        // If this user is in the loop, make sure it is the last thing in the
        // loop to ensure it is dominated by the increment.
        if (L->contains(User.Inst->getParent()))
          User.Inst->moveBefore(LatchBlock->getTerminator());
      SCEVHandle RewriteExpr = SCEVUnknown::get(RewriteOp);

      // Clear the SCEVExpander's expression map so that we are guaranteed
      // to have the code emitted where we expect it.
      Rewriter.clear();

      // If we are reusing the iv, then it must be multiplied by a constant
      // factor take advantage of addressing mode scale component.
      if (RewriteFactor != 0) {
        RewriteExpr =
          SCEVMulExpr::get(SCEVUnknown::getIntegerSCEV(RewriteFactor,
                                                       RewriteExpr->getType()),
                           RewriteExpr);

        // The common base is emitted in the loop preheader. But since we
        // are reusing an IV, it has not been used to initialize the PHI node.
        // Add it to the expression used to rewrite the uses.
        if (!isa<ConstantInt>(CommonBaseV) ||
            !cast<ConstantInt>(CommonBaseV)->isNullValue())
          RewriteExpr = SCEVAddExpr::get(RewriteExpr,
                                         SCEVUnknown::get(CommonBaseV));
      }
      // Now that we know what we need to do, insert code before User for the
      // immediate and any loop-variant expressions.
      if (!isa<ConstantInt>(BaseV) || !cast<ConstantInt>(BaseV)->isNullValue())
        // Add BaseV to the PHI value if needed.
        RewriteExpr = SCEVAddExpr::get(RewriteExpr, SCEVUnknown::get(BaseV));
      User.RewriteInstructionToUseNewBase(RewriteExpr, Rewriter, L, this);

      // Mark old value we replaced as possibly dead, so that it is elminated
      // if we just replaced the last use of that value.
      DeadInsts.insert(cast<Instruction>(User.OperandValToReplace));
      ++NumReduced;

      // If there are any more users to process with the same base, move one of
      // them to the end of the list so that we will process it.
      if (!UsersToProcess.empty()) {
        for (unsigned e = UsersToProcess.size(); ScanPos != e; ++ScanPos)
          if (UsersToProcess[ScanPos].Base == Base) {
            std::swap(UsersToProcess[ScanPos], UsersToProcess.back());
            break;
          }
      }
    } while (!UsersToProcess.empty() && UsersToProcess.back().Base == Base);
    // TODO: Next, find out which base index is the most common, pull it out.
  }

  // IMPORTANT TODO: Figure out how to partition the IV's with this stride, but
  // different starting values, into different PHIs.
// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar
// uses in the loop, look to see if we can eliminate some, in favor of using
// common indvars for the different uses.
void LoopStrengthReduce::OptimizeIndvars(Loop *L) {
  // TODO: implement optzns here.




  // Finally, get the terminating condition for the loop if possible.  If we
  // can, we want to change it to use a post-incremented version of its
  // induction variable, to allow coallescing the live ranges for the IV into
  // one register value.
  PHINode *SomePHI = cast<PHINode>(L->getHeader()->begin());
  BasicBlock  *Preheader = L->getLoopPreheader();
  BasicBlock *LatchBlock =
   SomePHI->getIncomingBlock(SomePHI->getIncomingBlock(0) == Preheader);
  BranchInst *TermBr = dyn_cast<BranchInst>(LatchBlock->getTerminator());
  if (!TermBr || TermBr->isUnconditional() ||
      !isa<SetCondInst>(TermBr->getCondition()))
    return;
  SetCondInst *Cond = cast<SetCondInst>(TermBr->getCondition());

  // Search IVUsesByStride to find Cond's IVUse if there is one.
  IVStrideUse *CondUse = 0;
  for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e && !CondUse;
       ++Stride) {
    std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI = 
      IVUsesByStride.find(StrideOrder[Stride]);
    assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
    
    for (std::vector<IVStrideUse>::iterator UI = SI->second.Users.begin(),
           E = SI->second.Users.end(); UI != E; ++UI)
        CondStride = &SI->first;
        // NOTE: we could handle setcc instructions with multiple uses here, but
        // InstCombine does it as well for simple uses, it's not clear that it
        // occurs enough in real life to handle.
        break;
      }
  if (!CondUse) return;  // setcc doesn't use the IV.

  // setcc stride is complex, don't mess with users.
  // FIXME: Evaluate whether this is a good idea or not.
  if (!isa<SCEVConstant>(*CondStride)) return;

  // It's possible for the setcc instruction to be anywhere in the loop, and
  // possible for it to have multiple users.  If it is not immediately before
  // the latch block branch, move it.
  if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) {
    if (Cond->hasOneUse()) {   // Condition has a single use, just move it.
      Cond->moveBefore(TermBr);
    } else {
      // Otherwise, clone the terminating condition and insert into the loopend.
      Cond = cast<SetCondInst>(Cond->clone());
      Cond->setName(L->getHeader()->getName() + ".termcond");
      LatchBlock->getInstList().insert(TermBr, Cond);
      
      // Clone the IVUse, as the old use still exists!
      IVUsesByStride[*CondStride].addUser(CondUse->Offset, Cond,
      CondUse = &IVUsesByStride[*CondStride].Users.back();
    }
  }

  // If we get to here, we know that we can transform the setcc instruction to
  // use the post-incremented version of the IV, allowing us to coallesce the
  // live ranges for the IV correctly.
  CondUse->Offset = SCEV::getMinusSCEV(CondUse->Offset, *CondStride);
  CondUse->isUseOfPostIncrementedValue = true;
}
namespace {
  // Constant strides come first which in turns are sorted by their absolute
  // values. If absolute values are the same, then positive strides comes first.
  // e.g.
  // 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X
  struct StrideCompare {
    bool operator()(const SCEVHandle &LHS, const SCEVHandle &RHS) {
      SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);
      SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
      if (LHSC && RHSC) {
        int64_t  LV = LHSC->getValue()->getSExtValue();
        int64_t  RV = RHSC->getValue()->getSExtValue();
        uint64_t ALV = (LV < 0) ? -LV : LV;
        uint64_t ARV = (RV < 0) ? -RV : RV;
        if (ALV == ARV)
          return LV > RV;
        else
          return ALV < ARV;
      } else
        return (LHSC && !RHSC);
    }
  };
}

void LoopStrengthReduce::runOnLoop(Loop *L) {
  // First step, transform all loops nesting inside of this loop.
  for (LoopInfo::iterator I = L->begin(), E = L->end(); I != E; ++I)
    runOnLoop(*I);

  // Next, find all uses of induction variables in this loop, and catagorize
  // them by stride.  Start by finding all of the PHI nodes in the header for
  // this loop.  If they are induction variables, inspect their uses.
  std::set<Instruction*> Processed;   // Don't reprocess instructions.
  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
    AddUsersIfInteresting(I, L, Processed);
  if (IVUsesByStride.empty()) return;

  // Optimize induction variables.  Some indvar uses can be transformed to use
  // strides that will be needed for other purposes.  A common example of this
  // is the exit test for the loop, which can often be rewritten to use the
  // computation of some other indvar to decide when to terminate the loop.
  OptimizeIndvars(L);


  // FIXME: We can widen subreg IV's here for RISC targets.  e.g. instead of
  // doing computation in byte values, promote to 32-bit values if safe.

  // FIXME: Attempt to reuse values across multiple IV's.  In particular, we
  // could have something like "for(i) { foo(i*8); bar(i*16) }", which should be
  // codegened as "for (j = 0;; j+=8) { foo(j); bar(j+j); }" on X86/PPC.  Need
  // to be careful that IV's are all the same type.  Only works for intptr_t
  // indvars.

  // If we only have one stride, we can more aggressively eliminate some things.
  bool HasOneStride = IVUsesByStride.size() == 1;

#ifndef NDEBUG
  DEBUG(std::cerr << "\nLSR on ");
  DEBUG(L->dump());
#endif

  // IVsByStride keeps IVs for one particular loop.
  IVsByStride.clear();

  // Sort the StrideOrder so we process larger strides first.
  std::stable_sort(StrideOrder.begin(), StrideOrder.end(), StrideCompare());

  // Note: this processes each stride/type pair individually.  All users passed
  // into StrengthReduceStridedIVUsers have the same type AND stride.  Also,
  // node that we iterate over IVUsesByStride indirectly by using StrideOrder.
  // This extra layer of indirection makes the ordering of strides deterministic
  // - not dependent on map order.
  for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) {
    std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI = 
      IVUsesByStride.find(StrideOrder[Stride]);
    assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
    StrengthReduceStridedIVUsers(SI->first, SI->second, L, HasOneStride);

  // Clean up after ourselves
  if (!DeadInsts.empty()) {
    DeleteTriviallyDeadInstructions(DeadInsts);

    BasicBlock::iterator I = L->getHeader()->begin();
    PHINode *PN;
    while ((PN = dyn_cast<PHINode>(I))) {
      ++I;  // Preincrement iterator to avoid invalidating it when deleting PN.
      
      // At this point, we know that we have killed one or more GEP
      // instructions.  It is worth checking to see if the cann indvar is also
      // dead, so that we can remove it as well.  The requirements for the cann
      // indvar to be considered dead are:
      // 1. the cann indvar has one use
      // 2. the use is an add instruction
      // 3. the add has one use
      // 4. the add is used by the cann indvar
      // If all four cases above are true, then we can remove both the add and
      // the cann indvar.
      // FIXME: this needs to eliminate an induction variable even if it's being
      // compared against some value to decide loop termination.
      if (PN->hasOneUse()) {
        BinaryOperator *BO = dyn_cast<BinaryOperator>(*(PN->use_begin()));
        if (BO && BO->hasOneUse()) {
          if (PN == *(BO->use_begin())) {
            DeadInsts.insert(BO);
            // Break the cycle, then delete the PHI.
            PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
            SE->deleteInstructionFromRecords(PN);
            PN->eraseFromParent();
    DeleteTriviallyDeadInstructions(DeadInsts);