diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 6fe5e188b1ac993c04579ec7a4d36d757eb5b03a..8f6855e6a3029c1e20d8403b87d533acc77eb9ff 100644
--- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -253,12 +253,7 @@ bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) {
     return false;
   AllCallsAreTailCalls = true;
 
-  // The local stack holds all alloca instructions and all byval arguments.
   AllocaDerivedValueTracker Tracker;
-  for (Argument &Arg : F.args()) {
-    if (Arg.hasByValAttr())
-      Tracker.walk(&Arg);
-  }
   for (auto &BB : F) {
     for (auto &I : BB)
       if (AllocaInst *AI = dyn_cast<AllocaInst>(&I))
@@ -314,9 +309,8 @@ bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) {
         for (auto &Arg : CI->arg_operands()) {
           if (isa<Constant>(Arg.getUser()))
             continue;
-          if (Argument *A = dyn_cast<Argument>(Arg.getUser()))
-            if (!A->hasByValAttr())
-              continue;
+          if (isa<Argument>(Arg.getUser()))
+            continue;
           SafeToTail = false;
           break;
         }
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 1b2e2089415f809a8d7fbbbd3c2933d5002c6ff2..c1589400b85fc683ff3e47a553afc27e2166e85a 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -743,7 +743,8 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
 static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
                                   const Function *CalledFunc,
                                   InlineFunctionInfo &IFI,
-                                  unsigned ByValAlignment) {
+                                  unsigned ByValAlignment,
+                                  bool &AddedNewAllocas) {
   PointerType *ArgTy = cast<PointerType>(Arg->getType());
   Type *AggTy = ArgTy->getElementType();
 
@@ -785,6 +786,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
   
   // Uses of the argument in the function should use our new alloca
   // instead.
+  AddedNewAllocas = true;
   return NewAlloca;
 }
 
@@ -958,6 +960,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
   SmallVector<ReturnInst*, 8> Returns;
   ClonedCodeInfo InlinedFunctionInfo;
   Function::iterator FirstNewBlock;
+  bool AddedNewAllocas = false;
 
   { // Scope to destroy VMap after cloning.
     ValueToValueMapTy VMap;
@@ -981,7 +984,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
       // modify the struct.
       if (CS.isByValArgument(ArgNo)) {
         ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
-                                        CalledFunc->getParamAlignment(ArgNo+1));
+                                        CalledFunc->getParamAlignment(ArgNo+1),
+                                        AddedNewAllocas);
         if (ActualArg != *AI)
           ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI));
       }
@@ -1096,9 +1100,18 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
         //    f -> musttail g ->     tail f  ==>  f ->     tail f
         //    f ->          g -> musttail f  ==>  f ->          f
         //    f ->          g ->     tail f  ==>  f ->          f
+        //
+        // If an alloca was introduced in the frame due to a byval parameter
+        // being passed to a subsequent call, tail calls must have the tail
+        // stripped as they may not access variables in the caller's stack.
+        // A single alloca ripples through out as the alloca may be aliased by
+        // bitcasts or may escape and be mutated outside of the function.
         CallInst::TailCallKind ChildTCK = CI->getTailCallKind();
         ChildTCK = std::min(CallSiteTailKind, ChildTCK);
-        CI->setTailCallKind(ChildTCK);
+        if (AddedNewAllocas)
+          CI->setTailCallKind(CallInst::TCK_None);
+        else
+          CI->setTailCallKind(ChildTCK);
         InlinedMustTailCalls |= CI->isMustTailCall();
 
         // Calls inlined through a 'nounwind' call site should be marked
diff --git a/llvm/test/Transforms/Inline/byval-tail-call.ll b/llvm/test/Transforms/Inline/byval-tail-call.ll
index 154f3974b58dad9567f497677bd95f85f4212b59..95c31d2b8268c25c92491f0f82e4c1968891c51f 100644
--- a/llvm/test/Transforms/Inline/byval-tail-call.ll
+++ b/llvm/test/Transforms/Inline/byval-tail-call.ll
@@ -34,7 +34,7 @@ define void @frob(i32* %x) {
 ; CHECK: %[[VAL:.*]] = load i32* %x
 ; CHECK: store i32 %[[VAL]], i32* %[[POS]]
 ; CHECK: {{^ *}}call void @ext(i32* %[[POS]]
-; CHECK: tail call void @ext(i32* null)
+; CHECK: {{^ *}}call void @ext(i32* null)
 ; CHECK: ret void
   tail call void @qux(i32* byval %x)
   ret void
diff --git a/llvm/test/Transforms/Inline/inline-tail.ll b/llvm/test/Transforms/Inline/inline-tail.ll
index b40328e0a272ac6347e03254ccc59e5b60a2385a..565491adf5b735402653a701fda664058d8848d4 100644
--- a/llvm/test/Transforms/Inline/inline-tail.ll
+++ b/llvm/test/Transforms/Inline/inline-tail.ll
@@ -49,42 +49,6 @@ define void @test_musttail_basic_a(i32* %p) {
   ret void
 }
 
-; Don't insert lifetime end markers here, the lifetime is trivially over due
-; the return.
-; CHECK: define void @test_byval_a(
-; CHECK: musttail call void @test_byval_c(
-; CHECK-NEXT: ret void
-
-declare void @test_byval_c(i32* byval %p)
-define internal void @test_byval_b(i32* byval %p) {
-  musttail call void @test_byval_c(i32* byval %p)
-  ret void
-}
-define void @test_byval_a(i32* byval %p) {
-  musttail call void @test_byval_b(i32* byval %p)
-  ret void
-}
-
-; Don't insert a stack restore, we're about to return.
-; CHECK: define void @test_dynalloca_a(
-; CHECK: call i8* @llvm.stacksave(
-; CHECK: alloca i8, i32 %n
-; CHECK: musttail call void @test_dynalloca_c(
-; CHECK-NEXT: ret void
-
-declare void @escape(i8* %buf)
-declare void @test_dynalloca_c(i32* byval %p, i32 %n)
-define internal void @test_dynalloca_b(i32* byval %p, i32 %n) alwaysinline {
-  %buf = alloca i8, i32 %n              ; dynamic alloca
-  call void @escape(i8* %buf)           ; escape it
-  musttail call void @test_dynalloca_c(i32* byval %p, i32 %n)
-  ret void
-}
-define void @test_dynalloca_a(i32* byval %p, i32 %n) {
-  musttail call void @test_dynalloca_b(i32* byval %p, i32 %n)
-  ret void
-}
-
 ; We can't merge the returns.
 ; CHECK: define void @test_multiret_a(
 ; CHECK: musttail call void @test_multiret_c(
diff --git a/llvm/test/Transforms/Inline/inlined-allocas.ll b/llvm/test/Transforms/Inline/inlined-allocas.ll
new file mode 100644
index 0000000000000000000000000000000000000000..e2942816f1300597284caa1c004a322185f2107e
--- /dev/null
+++ b/llvm/test/Transforms/Inline/inlined-allocas.ll
@@ -0,0 +1,58 @@
+; RUN: opt -dse -inline -S %s | FileCheck %s
+
+declare void @external(i32* byval)
+declare i32 @identity(i32* byval)
+
+; An alloca in the inlinee should not force the tail to be stripped
+
+define void @inlinee_with_alloca() {
+  %local = alloca i32
+  store i32 42, i32* %local, align 4
+  tail call void @external(i32* byval %local)
+  ret void
+}
+
+define void @inliner_without_alloca() {
+  tail call void @inlinee_with_alloca()
+  ret void
+}
+
+; CHECK-LABEL: inliner_without_alloca
+; CHECK-NEXT: %local.i = alloca i32
+; CHECK: store i32 42, i32* %local.i
+; CHECK: tail call void @external
+; CHECK: ret
+
+; An alloca in the inliner should not force the tail to be stripped
+
+define i32 @inliner_with_alloca() {
+  %local = alloca i32
+  store i32 42, i32* %local, align 4
+  %1 = tail call i32 @identity(i32* byval %local)
+  ret i32 %1
+}
+
+; CHECK-LABEL: inliner_with_alloca
+; CHECK: %local = alloca i32
+; CHECK: store i32 42, i32* %local
+; CHECK: %1 = tail call i32 @identity
+; CHECK: ret i32 %1
+
+; Force the synthesis of the value through the byval parameter.
+; The alloca should force the tail to be stripped
+
+define void @inlinee_with_passthru(i32* byval %value) {
+  tail call void @external(i32* byval %value)
+  ret void
+}
+
+define void @strip_tail(i32* %value) {
+  tail call void @inlinee_with_passthru(i32* %value)
+  ret void
+}
+
+; CHECK-LABEL: strip_tail
+; CHECK: %value1 = alloca i32
+; CHECK: {{^ *}}call void @external
+; CHECK: ret void
+
diff --git a/llvm/test/Transforms/TailCallElim/basic.ll b/llvm/test/Transforms/TailCallElim/basic.ll
index 8e9814b52bbce90e60bf5b6291bc639cef5c71e8..3b98f8c79673d4faf02f50a6542ffef4332265d8 100644
--- a/llvm/test/Transforms/TailCallElim/basic.ll
+++ b/llvm/test/Transforms/TailCallElim/basic.ll
@@ -147,7 +147,7 @@ cond_false:
 ; Don't tail call if a byval arg is captured.
 define void @test9(i32* byval %a) {
 ; CHECK-LABEL: define void @test9(
-; CHECK: {{^ *}}call void @use(
+; CHECK: tail call void @use(
   call void @use(i32* %a)
   ret void
 }
diff --git a/llvm/test/Transforms/TailCallElim/byval.ll b/llvm/test/Transforms/TailCallElim/byval.ll
new file mode 100644
index 0000000000000000000000000000000000000000..1150f7684e9be86d15378642fbb3e6bc370f6f05
--- /dev/null
+++ b/llvm/test/Transforms/TailCallElim/byval.ll
@@ -0,0 +1,34 @@
+; RUN: opt -mtriple i386 -Os -S %s -o - | FileCheck %s
+; RUN: opt -mtriple x86_64 -Os -S %s -o - | FileCheck %s
+; RUN: opt -mtriple armv7 -Os -S %s -o - | FileCheck %s
+
+%struct.D16 = type { [16 x double] }
+
+declare void @_Z2OpP3D16PKS_S2_(%struct.D16*, %struct.D16*, %struct.D16*)
+
+define void @_Z7TestRefRK3D16S1_(%struct.D16* noalias sret %agg.result, %struct.D16* %RHS, %struct.D16* %LHS) {
+  %1 = alloca %struct.D16*, align 8
+  %2 = alloca %struct.D16*, align 8
+  store %struct.D16* %RHS, %struct.D16** %1, align 8
+  store %struct.D16* %LHS, %struct.D16** %2, align 8
+  %3 = load %struct.D16** %1, align 8
+  %4 = load %struct.D16** %2, align 8
+  call void @_Z2OpP3D16PKS_S2_(%struct.D16* %agg.result, %struct.D16* %3, %struct.D16* %4)
+  ret void
+}
+
+; CHECK: define void @_Z7TestRefRK3D16S1_({{.*}}) {
+; CHECK:   tail call void @_Z2OpP3D16PKS_S2_(%struct.D16* %agg.result, %struct.D16* %RHS, %struct.D16* %LHS)
+; CHECK:   ret void
+; CHECK: }
+
+define void @_Z7TestVal3D16S_(%struct.D16* noalias sret %agg.result, %struct.D16* byval align 8 %RHS, %struct.D16* byval align 8 %LHS) {
+  call void @_Z2OpP3D16PKS_S2_(%struct.D16* %agg.result, %struct.D16* %RHS, %struct.D16* %LHS)
+  ret void
+}
+
+; CHECK: define void @_Z7TestVal3D16S_({{.*}}) {
+; CHECK:   tail call void @_Z2OpP3D16PKS_S2_(%struct.D16* %agg.result, %struct.D16* %RHS, %struct.D16* %LHS)
+; CHECK:   ret void
+; CHECK: }
+