Newer
Older
Owen Anderson
committed
//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass performs various transformations related to eliminating memcpy
// calls, or transforming sets of stores into memset's.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "memcpyopt"
#include "llvm/Transforms/Scalar.h"
#include "llvm/GlobalVariable.h"
#include "llvm/IRBuilder.h"
Owen Anderson
committed
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
Owen Anderson
committed
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Dominators.h"
Owen Anderson
committed
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
Owen Anderson
committed
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
Owen Anderson
committed
#include <list>
using namespace llvm;
STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
STATISTIC(NumMemSetInfer, "Number of memsets inferred");
STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
Owen Anderson
committed
Benjamin Kramer
committed
static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
bool &VariableIdxFound, const DataLayout &TD){
Owen Anderson
committed
// Skip over the first indices.
gep_type_iterator GTI = gep_type_begin(GEP);
for (unsigned i = 1; i != Idx; ++i, ++GTI)
/*skip along*/;
Owen Anderson
committed
// Compute the offset implied by the rest of the indices.
int64_t Offset = 0;
for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i));
if (OpC == 0)
return VariableIdxFound = true;
if (OpC->isZero()) continue; // No offset.
// Handle struct indices, which add their field offset to the pointer.
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
Owen Anderson
committed
Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
continue;
}
Owen Anderson
committed
// Otherwise, we have a sequential type like an array or vector. Multiply
// the index by the ElementSize.
uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
Owen Anderson
committed
Offset += Size*OpC->getSExtValue();
}
return Offset;
}
/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a
/// constant offset, and return that constant offset. For example, Ptr1 might
/// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8.
static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
Ptr1 = Ptr1->stripPointerCasts();
Ptr2 = Ptr2->stripPointerCasts();
Benjamin Kramer
committed
GEPOperator *GEP1 = dyn_cast<GEPOperator>(Ptr1);
GEPOperator *GEP2 = dyn_cast<GEPOperator>(Ptr2);
bool VariableIdxFound = false;
// If one pointer is a GEP and the other isn't, then see if the GEP is a
// constant offset from the base, as in "P" and "gep P, 1".
if (GEP1 && GEP2 == 0 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) {
Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD);
return !VariableIdxFound;
}
if (GEP2 && GEP1 == 0 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) {
Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD);
return !VariableIdxFound;
}
Owen Anderson
committed
// Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
// base. After that base, they may have some number of common (and
// potentially variable) indices. After that they handle some constant
// offset, which determines their offset from each other. At this point, we
// handle no other case.
if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
return false;
Owen Anderson
committed
// Skip any common indices and track the GEP types.
unsigned Idx = 1;
for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx)
if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
break;
int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD);
int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);
if (VariableIdxFound) return false;
Owen Anderson
committed
Offset = Offset2-Offset1;
return true;
}
/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value.
/// This allows us to analyze stores like:
/// store 0 -> P+1
/// store 0 -> P+0
/// store 0 -> P+3
/// store 0 -> P+2
/// which sometimes happens with stores to arrays of structs etc. When we see
/// the first store, we make a range [1, 2). The second store extends the range
/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
/// two ranges into [0, 3) which is memset'able.
namespace {
struct MemsetRange {
// Start/End - A semi range that describes the span that this range covers.
// The range is closed at the start and open at the end: [Start, End).
Owen Anderson
committed
int64_t Start, End;
/// StartPtr - The getelementptr instruction that points to the start of the
/// range.
Value *StartPtr;
Owen Anderson
committed
/// Alignment - The known alignment of the first store.
unsigned Alignment;
Owen Anderson
committed
/// TheStores - The actual stores that make up this range.
SmallVector<Instruction*, 16> TheStores;
bool isProfitableToUseMemset(const DataLayout &TD) const;
Owen Anderson
committed
};
} // end anon namespace
bool MemsetRange::isProfitableToUseMemset(const DataLayout &TD) const {
// If we found more than 4 stores to merge or 16 bytes, use memset.
if (TheStores.size() >= 4 || End-Start >= 16) return true;
// If there is nothing to merge, don't do anything.
if (TheStores.size() < 2) return false;
// If any of the stores are a memset, then it is always good to extend the
// memset.
for (unsigned i = 0, e = TheStores.size(); i != e; ++i)
if (!isa<StoreInst>(TheStores[i]))
return true;
Owen Anderson
committed
// Assume that the code generator is capable of merging pairs of stores
// together if it wants to.
if (TheStores.size() == 2) return false;
Owen Anderson
committed
// If we have fewer than 8 stores, it can still be worthwhile to do this.
// For example, merging 4 i8 stores into an i32 store is useful almost always.
// However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
// memset will be split into 2 32-bit stores anyway) and doing so can
// pessimize the llvm optimizer.
//
// Since we don't have perfect knowledge here, make some assumptions: assume
// the maximum GPR width is the same size as the pointer size and assume that
// this width can be stored. If so, check to see whether we will end up
// actually reducing the number of stores used.
unsigned Bytes = unsigned(End-Start);
unsigned NumPointerStores = Bytes/TD.getPointerSize();
Owen Anderson
committed
// Assume the remaining bytes if any are done a byte at a time.
unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize();
Owen Anderson
committed
// If we will reduce the # stores (according to this heuristic), do the
// transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
// etc.
return TheStores.size() > NumPointerStores+NumByteStores;
Owen Anderson
committed
namespace {
class MemsetRanges {
/// Ranges - A sorted list of the memset ranges. We use std::list here
/// because each element is relatively large and expensive to copy.
std::list<MemsetRange> Ranges;
typedef std::list<MemsetRange>::iterator range_iterator;
Owen Anderson
committed
public:
MemsetRanges(const DataLayout &td) : TD(td) {}
Owen Anderson
committed
typedef std::list<MemsetRange>::const_iterator const_iterator;
const_iterator begin() const { return Ranges.begin(); }
Loading
Loading full blame...