diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index 3f87f9fbdc7f2fca15a825761ca887ed225ffa59..4032bedbfc80f80fb14340ec8ba5516c18fdb3df 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -696,8 +696,7 @@ void CodeGenFunction::EmitAutoVarDecl(const VarDecl &D, // Get the element type. const llvm::Type *LElemTy = ConvertTypeForMem(Ty); - const llvm::Type *LElemPtrTy = - llvm::PointerType::get(LElemTy, Ty.getAddressSpace()); + const llvm::Type *LElemPtrTy = LElemTy->getPointerTo(Ty.getAddressSpace()); llvm::Value *VLASize = EmitVLASize(Ty); @@ -860,7 +859,7 @@ void CodeGenFunction::EmitAutoVarDecl(const VarDecl &D, } else if (Init->getType()->isAnyComplexType()) { EmitComplexExprIntoAddr(Init, Loc, isVolatile); } else { - EmitAggExpr(Init, AggValueSlot::forAddr(Loc, isVolatile, true)); + EmitAggExpr(Init, AggValueSlot::forAddr(Loc, isVolatile, true, false)); } } diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index e19baa97f1286333dbc44700d944346fd0a9bd84..ab694e311231ee5a3709c1f8895c67ce6dfbba48 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -497,11 +497,42 @@ void AggExprEmitter::VisitImplicitValueInitExpr(ImplicitValueInitExpr *E) { EmitNullInitializationToLValue(CGF.MakeAddrLValue(Slot.getAddr(), T), T); } +/// isSimpleZero - If emitting this value will obviously just cause a store of +/// zero to memory, return true. This can return false if uncertain, so it just +/// handles simple cases. +static bool isSimpleZero(const Expr *E, CodeGenFunction &CGF) { + // (0) + if (const ParenExpr *PE = dyn_cast(E)) + return isSimpleZero(PE->getSubExpr(), CGF); + // 0 + if (const IntegerLiteral *IL = dyn_cast(E)) + return IL->getValue() == 0; + // +0.0 + if (const FloatingLiteral *FL = dyn_cast(E)) + return FL->getValue().isPosZero(); + // int() + if ((isa(E) || isa(E)) && + CGF.getTypes().isZeroInitializable(E->getType())) + return true; + // (int*)0 - Null pointer expressions. + if (const CastExpr *ICE = dyn_cast(E)) + return ICE->getCastKind() == CK_NullToPointer; + // '\0' + if (const CharacterLiteral *CL = dyn_cast(E)) + return CL->getValue() == 0; + + // Otherwise, hard case: conservatively return false. + return false; +} + + void AggExprEmitter::EmitInitializationToLValue(Expr* E, LValue LV, QualType T) { // FIXME: Ignore result? // FIXME: Are initializers affected by volatile? - if (isa(E)) { + if (Dest.isZeroed() && isSimpleZero(E, CGF)) { + // Storing "i32 0" to a zero'd memory location is a noop. + } else if (isa(E)) { EmitNullInitializationToLValue(LV, T); } else if (T->isReferenceType()) { RValue RV = CGF.EmitReferenceBindingToExpr(E, /*InitializedDecl=*/0); @@ -509,13 +540,19 @@ AggExprEmitter::EmitInitializationToLValue(Expr* E, LValue LV, QualType T) { } else if (T->isAnyComplexType()) { CGF.EmitComplexExprIntoAddr(E, LV.getAddress(), false); } else if (CGF.hasAggregateLLVMType(T)) { - CGF.EmitAggExpr(E, AggValueSlot::forAddr(LV.getAddress(), false, true)); + CGF.EmitAggExpr(E, AggValueSlot::forAddr(LV.getAddress(), false, true, + false, Dest.isZeroed())); } else { CGF.EmitStoreThroughLValue(CGF.EmitAnyExpr(E), LV, T); } } void AggExprEmitter::EmitNullInitializationToLValue(LValue LV, QualType T) { + // If the destination slot is already zeroed out before the aggregate is + // copied into it, we don't have to emit any zeros here. + if (Dest.isZeroed() && CGF.getTypes().isZeroInitializable(T)) + return; + if (!CGF.hasAggregateLLVMType(T)) { // For non-aggregates, we can store zero llvm::Value *Null = llvm::Constant::getNullValue(CGF.ConvertType(T)); @@ -573,13 +610,27 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { // FIXME: were we intentionally ignoring address spaces and GC attributes? for (uint64_t i = 0; i != NumArrayElements; ++i) { + // If we're done emitting initializers and the destination is known-zeroed + // then we're done. + if (i == NumInitElements && + Dest.isZeroed() && + CGF.getTypes().isZeroInitializable(ElementType)) + break; + llvm::Value *NextVal = Builder.CreateStructGEP(DestPtr, i, ".array"); LValue LV = CGF.MakeAddrLValue(NextVal, ElementType); + if (i < NumInitElements) EmitInitializationToLValue(E->getInit(i), LV, ElementType); - else EmitNullInitializationToLValue(LV, ElementType); + + // If the GEP didn't get used because of a dead zero init or something + // else, clean it up for -O0 builds and general tidiness. + if (llvm::GetElementPtrInst *GEP = + dyn_cast(NextVal)) + if (GEP->use_empty()) + GEP->eraseFromParent(); } return; } @@ -612,13 +663,13 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { // FIXME: volatility FieldDecl *Field = E->getInitializedFieldInUnion(); - LValue FieldLoc = CGF.EmitLValueForFieldInitialization(DestPtr, Field, 0); + LValue FieldLoc = CGF.EmitLValueForFieldInitialization(DestPtr, Field, 0); if (NumInitElements) { // Store the initializer into the field EmitInitializationToLValue(E->getInit(0), FieldLoc, Field->getType()); } else { - // Default-initialize to null + // Default-initialize to null. EmitNullInitializationToLValue(FieldLoc, Field->getType()); } @@ -638,10 +689,16 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { if (Field->isUnnamedBitfield()) continue; + // Don't emit GEP before a noop store of zero. + if (CurInitVal == NumInitElements && Dest.isZeroed() && + CGF.getTypes().isZeroInitializable(E->getType())) + break; + // FIXME: volatility LValue FieldLoc = CGF.EmitLValueForFieldInitialization(DestPtr, *Field, 0); // We never generate write-barries for initialized fields. FieldLoc.setNonGC(true); + if (CurInitVal < NumInitElements) { // Store the initializer into the field. EmitInitializationToLValue(E->getInit(CurInitVal++), FieldLoc, @@ -650,6 +707,14 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { // We're out of initalizers; default-initialize to null EmitNullInitializationToLValue(FieldLoc, Field->getType()); } + + // If the GEP didn't get used because of a dead zero init or something + // else, clean it up for -O0 builds and general tidiness. + if (FieldLoc.isSimple()) + if (llvm::GetElementPtrInst *GEP = + dyn_cast(FieldLoc.getAddress())) + if (GEP->use_empty()) + GEP->eraseFromParent(); } } @@ -657,6 +722,69 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { // Entry Points into this File //===----------------------------------------------------------------------===// +/// GetNumNonZeroBytesInInit - Get an approximate count of the number of +/// non-zero bytes that will be stored when outputting the initializer for the +/// specified initializer expression. +static uint64_t GetNumNonZeroBytesInInit(const Expr *E, CodeGenFunction &CGF) { + if (const ParenExpr *PE = dyn_cast(E)) + return GetNumNonZeroBytesInInit(PE->getSubExpr(), CGF); + + // 0 and 0.0 won't require any non-zero stores! + if (isSimpleZero(E, CGF)) return 0; + + // If this is an initlist expr, sum up the size of sizes of the (present) + // elements. If this is something weird, assume the whole thing is non-zero. + const InitListExpr *ILE = dyn_cast(E); + if (ILE == 0 || !CGF.getTypes().isZeroInitializable(ILE->getType())) + return CGF.getContext().getTypeSize(E->getType())/8; + + uint64_t NumNonZeroBytes = 0; + for (unsigned i = 0, e = ILE->getNumInits(); i != e; ++i) + NumNonZeroBytes += GetNumNonZeroBytesInInit(ILE->getInit(i), CGF); + return NumNonZeroBytes; +} + +/// CheckAggExprForMemSetUse - If the initializer is large and has a lot of +/// zeros in it, emit a memset and avoid storing the individual zeros. +/// +static void CheckAggExprForMemSetUse(AggValueSlot &Slot, const Expr *E, + CodeGenFunction &CGF) { + // If the slot is already known to be zeroed, nothing to do. Don't mess with + // volatile stores. + if (Slot.isZeroed() || Slot.isVolatile() || Slot.getAddr() == 0) return; + + // If the type is 16-bytes or smaller, prefer individual stores over memset. + std::pair TypeInfo = + CGF.getContext().getTypeInfo(E->getType()); + if (TypeInfo.first/8 <= 16) + return; + + // Check to see if over 3/4 of the initializer are known to be zero. If so, + // we prefer to emit memset + individual stores for the rest. + uint64_t NumNonZeroBytes = GetNumNonZeroBytesInInit(E, CGF); + if (NumNonZeroBytes*4 > TypeInfo.first/8) + return; + + // Okay, it seems like a good idea to use an initial memset, emit the call. + llvm::Constant *SizeVal = CGF.Builder.getInt64(TypeInfo.first/8); + llvm::ConstantInt *AlignVal = CGF.Builder.getInt32(TypeInfo.second/8); + + llvm::Value *Loc = Slot.getAddr(); + const llvm::Type *BP = llvm::Type::getInt8PtrTy(CGF.getLLVMContext()); + + Loc = CGF.Builder.CreateBitCast(Loc, BP); + CGF.Builder.CreateCall5(CGF.CGM.getMemSetFn(Loc->getType(), + SizeVal->getType()), + Loc, CGF.Builder.getInt8(0), SizeVal, AlignVal, + CGF.Builder.getFalse()); + + // Tell the AggExprEmitter that the slot is known zero. + Slot.setZeroed(); +} + + + + /// EmitAggExpr - Emit the computation of the specified expression of aggregate /// type. The result is computed into DestPtr. Note that if DestPtr is null, /// the value of the aggregate expression is not needed. If VolatileDest is @@ -670,20 +798,20 @@ void CodeGenFunction::EmitAggExpr(const Expr *E, AggValueSlot Slot, bool IgnoreResult) { assert(E && hasAggregateLLVMType(E->getType()) && "Invalid aggregate expression to emit"); - assert((Slot.getAddr() != 0 || Slot.isIgnored()) - && "slot has bits but no address"); + assert((Slot.getAddr() != 0 || Slot.isIgnored()) && + "slot has bits but no address"); - AggExprEmitter(*this, Slot, IgnoreResult) - .Visit(const_cast(E)); + // Optimize the slot if possible. + CheckAggExprForMemSetUse(Slot, E, *this); + + AggExprEmitter(*this, Slot, IgnoreResult).Visit(const_cast(E)); } LValue CodeGenFunction::EmitAggExprToLValue(const Expr *E) { assert(hasAggregateLLVMType(E->getType()) && "Invalid argument!"); llvm::Value *Temp = CreateMemTemp(E->getType()); LValue LV = MakeAddrLValue(Temp, E->getType()); - AggValueSlot Slot - = AggValueSlot::forAddr(Temp, LV.isVolatileQualified(), false); - EmitAggExpr(E, Slot); + EmitAggExpr(E, AggValueSlot::forAddr(Temp, LV.isVolatileQualified(), false)); return LV; } diff --git a/clang/lib/CodeGen/CGValue.h b/clang/lib/CodeGen/CGValue.h index 2de5750084a23003623fc8281ea6a73c78b11c75..b7bb9d76a485e3eebfeb6cfd456e3dbdb48c9ef8 100644 --- a/clang/lib/CodeGen/CGValue.h +++ b/clang/lib/CodeGen/CGValue.h @@ -336,6 +336,11 @@ class AggValueSlot { bool VolatileFlag : 1; bool LifetimeFlag : 1; bool RequiresGCollection : 1; + + /// IsZeroed - This is set to true if the destination is known to be zero + /// before the assignment into it. This means that zero fields don't need to + /// be set. + bool IsZeroed : 1; public: /// ignored - Returns an aggregate value slot indicating that the @@ -343,7 +348,7 @@ public: static AggValueSlot ignored() { AggValueSlot AV; AV.Addr = 0; - AV.VolatileFlag = AV.LifetimeFlag = AV.RequiresGCollection = 0; + AV.VolatileFlag = AV.LifetimeFlag = AV.RequiresGCollection = AV.IsZeroed =0; return AV; } @@ -357,17 +362,19 @@ public: /// somewhere that ObjC GC calls should be emitted for static AggValueSlot forAddr(llvm::Value *Addr, bool Volatile, bool LifetimeExternallyManaged, - bool RequiresGCollection=false) { + bool RequiresGCollection = false, + bool IsZeroed = false) { AggValueSlot AV; AV.Addr = Addr; AV.VolatileFlag = Volatile; AV.LifetimeFlag = LifetimeExternallyManaged; AV.RequiresGCollection = RequiresGCollection; + AV.IsZeroed = IsZeroed; return AV; } static AggValueSlot forLValue(LValue LV, bool LifetimeExternallyManaged, - bool RequiresGCollection=false) { + bool RequiresGCollection = false) { return forAddr(LV.getAddress(), LV.isVolatileQualified(), LifetimeExternallyManaged, RequiresGCollection); } @@ -399,6 +406,10 @@ public: return RValue::getAggregate(getAddr(), isVolatile()); } + void setZeroed(bool V = true) { IsZeroed = V; } + bool isZeroed() const { + return IsZeroed; + } }; } // end namespace CodeGen diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 4a58d5b3574bb76d5bef90df74f5d9f48a7d3896..616c227ffbe156a79e1ef8bfcda5507ebb35ea1e 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -1662,8 +1662,7 @@ public: const BlockDeclRefExpr *BDRE); RValue EmitCXXExprWithTemporaries(const CXXExprWithTemporaries *E, - AggValueSlot Slot - = AggValueSlot::ignored()); + AggValueSlot Slot =AggValueSlot::ignored()); void EmitCXXThrowExpr(const CXXThrowExpr *E); diff --git a/clang/test/CodeGen/init.c b/clang/test/CodeGen/init.c index 2dee0432efcf5acc41ba76eda9fb755907028473..ccb04f343c6071aad180cea1af6def74f4a49815 100644 --- a/clang/test/CodeGen/init.c +++ b/clang/test/CodeGen/init.c @@ -70,3 +70,33 @@ char test8(int X) { // CHECK: store i8 98 // CHECK: store i8 99 } + +void bar(void*); + +// PR279 +int test9(int X) { + int Arr[100] = { X }; // Should use memset + bar(Arr); +// CHECK: @test9 +// CHECK: call void @llvm.memset +// CHECK-NOT: store i32 0 +// CHECK: call void @bar +} + +struct a { + int a, b, c, d, e, f, g, h, i, j, k, *p; +}; + +struct b { + struct a a,b,c,d,e,f,g; +}; + +int test10(int X) { + struct b S = { .a.a = X, .d.e = X, .f.e = 0, .f.f = 0, .f.p = 0 }; + bar(&S); + + // CHECK: @test10 + // CHECK: call void @llvm.memset + // CHECK-NOT: store i32 0 + // CHECK: call void @bar +}