diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp index 6fe5e188b1ac993c04579ec7a4d36d757eb5b03a..8f6855e6a3029c1e20d8403b87d533acc77eb9ff 100644 --- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -253,12 +253,7 @@ bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) { return false; AllCallsAreTailCalls = true; - // The local stack holds all alloca instructions and all byval arguments. AllocaDerivedValueTracker Tracker; - for (Argument &Arg : F.args()) { - if (Arg.hasByValAttr()) - Tracker.walk(&Arg); - } for (auto &BB : F) { for (auto &I : BB) if (AllocaInst *AI = dyn_cast(&I)) @@ -314,9 +309,8 @@ bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) { for (auto &Arg : CI->arg_operands()) { if (isa(Arg.getUser())) continue; - if (Argument *A = dyn_cast(Arg.getUser())) - if (!A->hasByValAttr()) - continue; + if (isa(Arg.getUser())) + continue; SafeToTail = false; break; } diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 1b2e2089415f809a8d7fbbbd3c2933d5002c6ff2..c1589400b85fc683ff3e47a553afc27e2166e85a 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -743,7 +743,8 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, const Function *CalledFunc, InlineFunctionInfo &IFI, - unsigned ByValAlignment) { + unsigned ByValAlignment, + bool &AddedNewAllocas) { PointerType *ArgTy = cast(Arg->getType()); Type *AggTy = ArgTy->getElementType(); @@ -785,6 +786,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, // Uses of the argument in the function should use our new alloca // instead. + AddedNewAllocas = true; return NewAlloca; } @@ -958,6 +960,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, SmallVector Returns; ClonedCodeInfo InlinedFunctionInfo; Function::iterator FirstNewBlock; + bool AddedNewAllocas = false; { // Scope to destroy VMap after cloning. ValueToValueMapTy VMap; @@ -981,7 +984,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // modify the struct. if (CS.isByValArgument(ArgNo)) { ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, - CalledFunc->getParamAlignment(ArgNo+1)); + CalledFunc->getParamAlignment(ArgNo+1), + AddedNewAllocas); if (ActualArg != *AI) ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI)); } @@ -1096,9 +1100,18 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // f -> musttail g -> tail f ==> f -> tail f // f -> g -> musttail f ==> f -> f // f -> g -> tail f ==> f -> f + // + // If an alloca was introduced in the frame due to a byval parameter + // being passed to a subsequent call, tail calls must have the tail + // stripped as they may not access variables in the caller's stack. + // A single alloca ripples through out as the alloca may be aliased by + // bitcasts or may escape and be mutated outside of the function. CallInst::TailCallKind ChildTCK = CI->getTailCallKind(); ChildTCK = std::min(CallSiteTailKind, ChildTCK); - CI->setTailCallKind(ChildTCK); + if (AddedNewAllocas) + CI->setTailCallKind(CallInst::TCK_None); + else + CI->setTailCallKind(ChildTCK); InlinedMustTailCalls |= CI->isMustTailCall(); // Calls inlined through a 'nounwind' call site should be marked diff --git a/llvm/test/Transforms/Inline/byval-tail-call.ll b/llvm/test/Transforms/Inline/byval-tail-call.ll index 154f3974b58dad9567f497677bd95f85f4212b59..95c31d2b8268c25c92491f0f82e4c1968891c51f 100644 --- a/llvm/test/Transforms/Inline/byval-tail-call.ll +++ b/llvm/test/Transforms/Inline/byval-tail-call.ll @@ -34,7 +34,7 @@ define void @frob(i32* %x) { ; CHECK: %[[VAL:.*]] = load i32* %x ; CHECK: store i32 %[[VAL]], i32* %[[POS]] ; CHECK: {{^ *}}call void @ext(i32* %[[POS]] -; CHECK: tail call void @ext(i32* null) +; CHECK: {{^ *}}call void @ext(i32* null) ; CHECK: ret void tail call void @qux(i32* byval %x) ret void diff --git a/llvm/test/Transforms/Inline/inline-tail.ll b/llvm/test/Transforms/Inline/inline-tail.ll index b40328e0a272ac6347e03254ccc59e5b60a2385a..565491adf5b735402653a701fda664058d8848d4 100644 --- a/llvm/test/Transforms/Inline/inline-tail.ll +++ b/llvm/test/Transforms/Inline/inline-tail.ll @@ -49,42 +49,6 @@ define void @test_musttail_basic_a(i32* %p) { ret void } -; Don't insert lifetime end markers here, the lifetime is trivially over due -; the return. -; CHECK: define void @test_byval_a( -; CHECK: musttail call void @test_byval_c( -; CHECK-NEXT: ret void - -declare void @test_byval_c(i32* byval %p) -define internal void @test_byval_b(i32* byval %p) { - musttail call void @test_byval_c(i32* byval %p) - ret void -} -define void @test_byval_a(i32* byval %p) { - musttail call void @test_byval_b(i32* byval %p) - ret void -} - -; Don't insert a stack restore, we're about to return. -; CHECK: define void @test_dynalloca_a( -; CHECK: call i8* @llvm.stacksave( -; CHECK: alloca i8, i32 %n -; CHECK: musttail call void @test_dynalloca_c( -; CHECK-NEXT: ret void - -declare void @escape(i8* %buf) -declare void @test_dynalloca_c(i32* byval %p, i32 %n) -define internal void @test_dynalloca_b(i32* byval %p, i32 %n) alwaysinline { - %buf = alloca i8, i32 %n ; dynamic alloca - call void @escape(i8* %buf) ; escape it - musttail call void @test_dynalloca_c(i32* byval %p, i32 %n) - ret void -} -define void @test_dynalloca_a(i32* byval %p, i32 %n) { - musttail call void @test_dynalloca_b(i32* byval %p, i32 %n) - ret void -} - ; We can't merge the returns. ; CHECK: define void @test_multiret_a( ; CHECK: musttail call void @test_multiret_c( diff --git a/llvm/test/Transforms/Inline/inlined-allocas.ll b/llvm/test/Transforms/Inline/inlined-allocas.ll new file mode 100644 index 0000000000000000000000000000000000000000..e2942816f1300597284caa1c004a322185f2107e --- /dev/null +++ b/llvm/test/Transforms/Inline/inlined-allocas.ll @@ -0,0 +1,58 @@ +; RUN: opt -dse -inline -S %s | FileCheck %s + +declare void @external(i32* byval) +declare i32 @identity(i32* byval) + +; An alloca in the inlinee should not force the tail to be stripped + +define void @inlinee_with_alloca() { + %local = alloca i32 + store i32 42, i32* %local, align 4 + tail call void @external(i32* byval %local) + ret void +} + +define void @inliner_without_alloca() { + tail call void @inlinee_with_alloca() + ret void +} + +; CHECK-LABEL: inliner_without_alloca +; CHECK-NEXT: %local.i = alloca i32 +; CHECK: store i32 42, i32* %local.i +; CHECK: tail call void @external +; CHECK: ret + +; An alloca in the inliner should not force the tail to be stripped + +define i32 @inliner_with_alloca() { + %local = alloca i32 + store i32 42, i32* %local, align 4 + %1 = tail call i32 @identity(i32* byval %local) + ret i32 %1 +} + +; CHECK-LABEL: inliner_with_alloca +; CHECK: %local = alloca i32 +; CHECK: store i32 42, i32* %local +; CHECK: %1 = tail call i32 @identity +; CHECK: ret i32 %1 + +; Force the synthesis of the value through the byval parameter. +; The alloca should force the tail to be stripped + +define void @inlinee_with_passthru(i32* byval %value) { + tail call void @external(i32* byval %value) + ret void +} + +define void @strip_tail(i32* %value) { + tail call void @inlinee_with_passthru(i32* %value) + ret void +} + +; CHECK-LABEL: strip_tail +; CHECK: %value1 = alloca i32 +; CHECK: {{^ *}}call void @external +; CHECK: ret void + diff --git a/llvm/test/Transforms/TailCallElim/basic.ll b/llvm/test/Transforms/TailCallElim/basic.ll index 8e9814b52bbce90e60bf5b6291bc639cef5c71e8..3b98f8c79673d4faf02f50a6542ffef4332265d8 100644 --- a/llvm/test/Transforms/TailCallElim/basic.ll +++ b/llvm/test/Transforms/TailCallElim/basic.ll @@ -147,7 +147,7 @@ cond_false: ; Don't tail call if a byval arg is captured. define void @test9(i32* byval %a) { ; CHECK-LABEL: define void @test9( -; CHECK: {{^ *}}call void @use( +; CHECK: tail call void @use( call void @use(i32* %a) ret void } diff --git a/llvm/test/Transforms/TailCallElim/byval.ll b/llvm/test/Transforms/TailCallElim/byval.ll new file mode 100644 index 0000000000000000000000000000000000000000..1150f7684e9be86d15378642fbb3e6bc370f6f05 --- /dev/null +++ b/llvm/test/Transforms/TailCallElim/byval.ll @@ -0,0 +1,34 @@ +; RUN: opt -mtriple i386 -Os -S %s -o - | FileCheck %s +; RUN: opt -mtriple x86_64 -Os -S %s -o - | FileCheck %s +; RUN: opt -mtriple armv7 -Os -S %s -o - | FileCheck %s + +%struct.D16 = type { [16 x double] } + +declare void @_Z2OpP3D16PKS_S2_(%struct.D16*, %struct.D16*, %struct.D16*) + +define void @_Z7TestRefRK3D16S1_(%struct.D16* noalias sret %agg.result, %struct.D16* %RHS, %struct.D16* %LHS) { + %1 = alloca %struct.D16*, align 8 + %2 = alloca %struct.D16*, align 8 + store %struct.D16* %RHS, %struct.D16** %1, align 8 + store %struct.D16* %LHS, %struct.D16** %2, align 8 + %3 = load %struct.D16** %1, align 8 + %4 = load %struct.D16** %2, align 8 + call void @_Z2OpP3D16PKS_S2_(%struct.D16* %agg.result, %struct.D16* %3, %struct.D16* %4) + ret void +} + +; CHECK: define void @_Z7TestRefRK3D16S1_({{.*}}) { +; CHECK: tail call void @_Z2OpP3D16PKS_S2_(%struct.D16* %agg.result, %struct.D16* %RHS, %struct.D16* %LHS) +; CHECK: ret void +; CHECK: } + +define void @_Z7TestVal3D16S_(%struct.D16* noalias sret %agg.result, %struct.D16* byval align 8 %RHS, %struct.D16* byval align 8 %LHS) { + call void @_Z2OpP3D16PKS_S2_(%struct.D16* %agg.result, %struct.D16* %RHS, %struct.D16* %LHS) + ret void +} + +; CHECK: define void @_Z7TestVal3D16S_({{.*}}) { +; CHECK: tail call void @_Z2OpP3D16PKS_S2_(%struct.D16* %agg.result, %struct.D16* %RHS, %struct.D16* %LHS) +; CHECK: ret void +; CHECK: } +