diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 68889bb78233408dee97aa9c841412878ae82486..6ed5ace2ae7d8788cf4baa224a930aede52d3347 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -23,6 +23,7 @@
 
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/IR/FMF.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
@@ -1570,6 +1571,9 @@ public:
   VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const;
   /// @}
 
+  Value *computeVectorLength(IRBuilderBase &Builder, Value *AVL,
+                             ElementCount VF) const;
+
   /// @}
 
 private:
@@ -1927,6 +1931,8 @@ public:
                                      Align Alignment) const = 0;
   virtual VPLegalization
   getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
+  virtual Value *computeVectorLength(IRBuilderBase &Builder, Value *AVL,
+                                     ElementCount VF) const = 0;
 };
 
 template <typename T>
@@ -2606,6 +2612,11 @@ public:
   getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
     return Impl.getVPLegalizationStrategy(PI);
   }
+
+  Value *computeVectorLength(IRBuilderBase &Builder, Value *AVL,
+                             ElementCount VF) const override {
+    return Impl.computeVectorLength(Builder, AVL, VF);
+  }
 };
 
 template <typename T>
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 480be9f723f23ab9f1e89c21ce7ddbcd2e2432ea..91f2ea473a8a3ebfeb66ae717e28151347d7bb02 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -19,6 +19,7 @@
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
@@ -862,6 +863,21 @@ public:
         /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert);
   }
 
+  Value *computeVectorLength(IRBuilderBase &Builder, Value *AVL,
+                             ElementCount VF) const {
+    if (!VF.isScalable()) {
+      return ConstantInt::get(Builder.getInt32Ty(), VF.getFixedValue());
+    }
+
+    Constant *EC =
+        ConstantInt::get(Builder.getInt32Ty(), VF.getKnownMinValue());
+    Value *VLMax = Builder.CreateVScale(EC, "vlmax");
+    Value *VL = Builder.CreateZExtOrTrunc(AVL, Builder.getInt32Ty(), "vl");
+
+    return Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::umin,
+                                   {VLMax, VL}, nullptr, "evl");
+  }
+
 protected:
   // Obtain the minimum required size to hold the value (without the sign)
   // In case of a vector it returns the min required size for one element.
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index ce1caafb92fb9d14495f0fb57228006860b2d659..243d01e12f14b4de5c70e425ab441b9218c94c7a 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -34,6 +34,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
@@ -2442,6 +2443,21 @@ public:
 
   InstructionCost getVectorSplitCost() { return 1; }
 
+  Value *computeVectorLength(IRBuilderBase &Builder, Value *AVL,
+                             ElementCount VF) const {
+    if (!VF.isScalable()) {
+      return ConstantInt::get(Builder.getInt32Ty(), VF.getFixedValue());
+    }
+
+    Constant *EC =
+        ConstantInt::get(Builder.getInt32Ty(), VF.getKnownMinValue());
+    Value *VLMax = Builder.CreateVScale(EC, "vlmax");
+    Value *VL = Builder.CreateZExtOrTrunc(AVL, Builder.getInt32Ty(), "vl");
+
+    return Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::umin,
+                                   {VLMax, VL}, nullptr, "evl");
+  }
+
   /// @}
 };
 
diff --git a/llvm/include/llvm/Transforms/Vectorize/VectorPredication.h b/llvm/include/llvm/Transforms/Vectorize/VectorPredication.h
new file mode 100644
index 0000000000000000000000000000000000000000..ce59854dbb95d9886957c9b208c95a5714eb288d
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Vectorize/VectorPredication.h
@@ -0,0 +1,55 @@
+#ifndef LLVM_TRANSFORMS_VECTORPREDICATION_H
+#define LLVM_TRANSFORMS_VECTORPREDICATION_H
+
+#include "llvm/ADT/MapVector.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+using InstToMaskEVLMap = DenseMap<Instruction *, std::pair<Value *, Value *>>;
+
+struct BlockData {
+  // Vector that stores all vector predicated memory writing operations found in
+  // the basic block. If after phase 1 is empty, then the basic block can be
+  // skipped by following phases.
+  SmallVector<Instruction *> MemoryWritingVPInstructions;
+
+  // Store all instructions of the basic block (in the same order as they are
+  // found), assigning to each the list of users. Skip PHIs and terminators.
+  MapVector<Instruction *, SmallPtrSet<Instruction *, 4>> TopologicalGraph;
+
+  // Map each full-length vector operation eligible to be transformed to a
+  // vector predication one with the (mask,evl) pair of its first vector
+  // predicated memory writing operation user.
+  InstToMaskEVLMap VecOpsToTransform;
+
+  // Ordered list representing the reverse order of how the basic block has to
+  // be transformed due to the new vector predicated instructions.
+  SmallVector<Instruction *> NewBBReverseOrder;
+
+  BlockData() = default;
+};
+
+class VectorPredicationPass : public PassInfoMixin<VectorPredicationPass> {
+private:
+  // List of instructions to be replaced by the new VP operations and that later
+  // should be removed, if possible.
+  DenseMap<Instruction *, Value *> OldInstructionsToRemove;
+
+  void analyseBasicBlock(BasicBlock &BB, BlockData &BBInfo);
+  void findCandidateVectorOperations(BasicBlock &BB, BlockData &BBInfo);
+  void addNewUsersToMasksAndEVLs(BasicBlock &BB, BlockData &BBInfo);
+  void buildNewBasicBlockSchedule(BasicBlock &BB, BlockData &BBInfo);
+  void emitNewBasicBlockSchedule(BasicBlock &BB, BlockData &BBInfo);
+  void transformCandidateVectorOperations(BasicBlock &BB, BlockData &BBInfo);
+
+  void removeOldInstructions();
+
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+  static StringRef name() { return "VectorPredicationPass"; }
+};
+
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_VECTORPREDICATION_H
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index e9c01e68fde2c9adc2729831f70fbf9520f6ed54..6928f2bc0b12a4c0f934d69c9f0916892deec3cc 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1195,6 +1195,12 @@ bool TargetTransformInfo::hasActiveVectorLength(unsigned Opcode, Type *DataType,
   return TTIImpl->hasActiveVectorLength(Opcode, DataType, Alignment);
 }
 
+Value *TargetTransformInfo::computeVectorLength(IRBuilderBase &Builder,
+                                                Value *AVL,
+                                                ElementCount VF) const {
+  return TTIImpl->computeVectorLength(Builder, AVL, VF);
+}
+
 TargetTransformInfo::Concept::~Concept() = default;
 
 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 4b8754df7fb6361699385760c36092e88c1df954..5b9f2da07873ee7077ce33e0b92ca22a7030af46 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -256,6 +256,7 @@
 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
 #include "llvm/Transforms/Vectorize/SLPVectorizer.h"
 #include "llvm/Transforms/Vectorize/VectorCombine.h"
+#include "llvm/Transforms/Vectorize/VectorPredication.h"
 #include <optional>
 
 using namespace llvm;
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 6cc66a0cb1320a8f65ad4498190e35ab6418e31b..4423c2c87072f97463d5747abb37da7dead730f1 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -129,6 +129,7 @@
 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
 #include "llvm/Transforms/Vectorize/SLPVectorizer.h"
 #include "llvm/Transforms/Vectorize/VectorCombine.h"
+#include "llvm/Transforms/Vectorize/VectorPredication.h"
 
 using namespace llvm;
 
@@ -286,6 +287,11 @@ static cl::opt<AttributorRunOption> AttributorRun(
                clEnumValN(AttributorRunOption::NONE, "none",
                           "disable attributor runs")));
 
+static cl::opt<bool>
+    EnableVectorPredication("enable-vector-predication", cl::init(false),
+                            cl::Hidden,
+                            cl::desc("Enable VectorPredicationPass."));
+
 PipelineTuningOptions::PipelineTuningOptions() {
   LoopInterleaving = true;
   LoopVectorization = true;
@@ -1230,6 +1236,10 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
         /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
   }
 
+  // Try to vector predicate vectorized functions.
+  if (EnableVectorPredication)
+    FPM.addPass(VectorPredicationPass());
+
   // Now that we've vectorized and unrolled loops, we may have more refined
   // alignment information, try to re-derive it here.
   FPM.addPass(AlignmentFromAssumptionsPass());
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 73ab87dd8823648fb6c4976ad1a73a3222495a07..f40aedacaa89113784ae9d9f5e1a13673d2cb13b 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -391,6 +391,7 @@ FUNCTION_PASS("tailcallelim", TailCallElimPass())
 FUNCTION_PASS("typepromotion", TypePromotionPass(TM))
 FUNCTION_PASS("unify-loop-exits", UnifyLoopExitsPass())
 FUNCTION_PASS("vector-combine", VectorCombinePass())
+FUNCTION_PASS("vector-predication", VectorPredicationPass())
 FUNCTION_PASS("verify", VerifierPass())
 FUNCTION_PASS("verify<domtree>", DominatorTreeVerifierPass())
 FUNCTION_PASS("verify<loops>", LoopVerifierPass())
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index fcc88d6d4682c0c4e54f237fec720cd6f0568221..222bc663c62eaede73f1c178943815b63deead0e 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -12,6 +12,7 @@
 #include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/CodeGen/CostTable.h"
 #include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/IntrinsicsRISCV.h"
 #include <cmath>
 #include <optional>
 using namespace llvm;
@@ -1484,3 +1485,29 @@ bool RISCVTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
                   C2.NumIVMuls, C2.NumBaseAdds,
                   C2.ScaleCost, C2.ImmCost, C2.SetupCost);
 }
+
+Value *RISCVTTIImpl::computeVectorLength(IRBuilderBase &Builder, Value *AVL,
+                                         ElementCount VF) const {
+  if (!VF.isScalable()) {
+    return ConstantInt::get(Builder.getInt32Ty(), VF.getFixedValue());
+  }
+
+  const unsigned SEW = 3; // SEW = 64, TODO: we should use ELEN here.
+  const std::map<unsigned int, unsigned int> LMULArgMap = {
+      {1, 0}, {2, 1}, {4, 2}, {8, 3}};
+
+  assert(AVL->getType()->isIntegerTy() &&
+         "Requested vector length should be an integer.");
+  assert(LMULArgMap.find(VF.getKnownMinValue()) != LMULArgMap.end() &&
+         "Invalid value for LMUL argument.");
+  Value *AVLArg = Builder.CreateZExtOrTrunc(AVL, Builder.getInt64Ty());
+  Constant *SEWArg = ConstantInt::get(Builder.getInt64Ty(), SEW);
+  Constant *LMULArg = ConstantInt::get(Builder.getInt64Ty(),
+                                       LMULArgMap.at(VF.getKnownMinValue()));
+  Value *EVLRes =
+      Builder.CreateIntrinsic(Intrinsic::riscv_vsetvli, {AVLArg->getType()},
+                              {AVLArg, SEWArg, LMULArg}, nullptr, "vl");
+
+  // NOTE: evl type is required to be i32.
+  return Builder.CreateZExtOrTrunc(EVLRes, Builder.getInt32Ty());
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 143079c470fb97e70ef12b5984f4e490b9c4db1f..3904b6913170abf210c39c408d9790c93ebff333 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -327,6 +327,9 @@ public:
 
   bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
                      const TargetTransformInfo::LSRCost &C2);
+
+  Value *computeVectorLength(IRBuilderBase &Builder, Value *AVL,
+                             ElementCount VF) const;
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
index 998dfd956575d3c1f21d71bdcd333ef20e1b24e4..bc9e4d281638cfd871df66603b4b77470014a951 100644
--- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
@@ -5,6 +5,7 @@ add_llvm_component_library(LLVMVectorize
   SLPVectorizer.cpp
   Vectorize.cpp
   VectorCombine.cpp
+  VectorPredication.cpp
   VPlan.cpp
   VPlanHCFGBuilder.cpp
   VPlanRecipes.cpp
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 92fb82eea714f12a8c99643f8af8bbd8ec705795..cc723e7129281e868454915e91040bb240b4ab1a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -370,6 +370,10 @@ static cl::opt<cl::boolOrDefault> ForceSafeDivisor(
     cl::desc(
         "Override cost based safe divisor widening for div/rem instructions"));
 
+cl::opt<bool> UseVectorPredicationIntrinsics(
+    "use-vp-intrinsics", cl::init(false), cl::Hidden,
+    cl::desc("Use Vector Predication intrinsics during vectorization."));
+
 /// A helper function that returns true if the given type is irregular. The
 /// type is irregular if its allocated size doesn't equal the store size of an
 /// element of the corresponding vector type.
@@ -2890,6 +2894,11 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
   if (VectorTripCount)
     return VectorTripCount;
 
+  // With VP intrinsics, we require tail-folding by masking; this way, we
+  // operate on a number of elements equal to the original loop trip count.
+  if (UseVectorPredicationIntrinsics)
+    return VectorTripCount = getOrCreateTripCount(InsertBlock);
+
   Value *TC = getOrCreateTripCount(InsertBlock);
   IRBuilder<> Builder(InsertBlock->getTerminator());
 
@@ -2926,6 +2935,7 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
   // the step does not evenly divide the trip count, no adjustment is necessary
   // since there will already be scalar iterations. Note that the minimum
   // iterations check ensures that N >= Step.
+  // TODO: we should probably honor the cost model also with VP intrinsics.
   if (Cost->requiresScalarEpilogue(VF)) {
     auto *IsZero = Builder.CreateICmpEQ(R, ConstantInt::get(R->getType(), 0));
     R = Builder.CreateSelect(IsZero, Step, R);
@@ -8189,12 +8199,13 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I,
       Reverse || Decision == LoopVectorizationCostModel::CM_Widen;
 
   if (LoadInst *Load = dyn_cast<LoadInst>(I))
-    return new VPWidenMemoryInstructionRecipe(*Load, Operands[0], Mask,
-                                              Consecutive, Reverse);
+    return new VPWidenMemoryInstructionRecipe(
+        *Load, Operands[0], Mask, Plan->getEVLPhi(), Consecutive, Reverse);
 
   StoreInst *Store = cast<StoreInst>(I);
   return new VPWidenMemoryInstructionRecipe(*Store, Operands[1], Operands[0],
-                                            Mask, Consecutive, Reverse);
+                                            Mask, Plan->getEVLPhi(),
+                                            Consecutive, Reverse);
 }
 
 /// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
@@ -8224,11 +8235,12 @@ static VPWidenIntOrFpInductionRecipe *createWidenInductionRecipes(
       vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep(), SE);
   if (auto *TruncI = dyn_cast<TruncInst>(PhiOrTrunc)) {
     return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc, TruncI,
-                                             !NeedsScalarIVOnly);
+                                             !NeedsScalarIVOnly,
+                                             Plan.getEVLPhi());
   }
   assert(isa<PHINode>(PhiOrTrunc) && "must be a phi node here");
-  return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc,
-                                           !NeedsScalarIVOnly);
+  return new VPWidenIntOrFpInductionRecipe(
+      Phi, Start, Step, IndDesc, !NeedsScalarIVOnly, Plan.getEVLPhi());
 }
 
 VPRecipeBase *VPRecipeBuilder::tryToOptimizeInductionPHI(
@@ -8698,28 +8710,51 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
 // Add the necessary canonical IV and branch recipes required to control the
 // loop.
 static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL,
-                                  TailFoldingStyle Style) {
-  Value *StartIdx = ConstantInt::get(IdxTy, 0);
-  auto *StartV = Plan.getOrAddVPValue(StartIdx);
+                                  TailFoldingStyle Style,
+                                  const TargetTransformInfo *TTI) {
+  VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
+  VPBasicBlock *Header = TopRegion->getEntryBasicBlock();
+
+  // Add the EVL recipe, used to calculate the correct IV increment.
+  VPEVLPHIRecipe *EVLRecipe = nullptr;
+  // TODO: TTI should be able to indicate if a target prefers vector predication
+  // intrinsics.
+  if (UseVectorPredicationIntrinsics) {
+    EVLRecipe = new VPEVLPHIRecipe(Plan.getOrCreateTripCount(), TTI);
+    Header->insert(EVLRecipe, Header->begin());
+  }
 
   // Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
+  Value *StartIdx = ConstantInt::get(IdxTy, 0);
+  auto *StartV = Plan.getOrAddVPValue(StartIdx);
   auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
-  VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
-  VPBasicBlock *Header = TopRegion->getEntryBasicBlock();
   Header->insert(CanonicalIVPHI, Header->begin());
 
   // Add a CanonicalIVIncrement{NUW} VPInstruction to increment the scalar
-  // IV by VF * UF.
+  // IV either by VF * UF or by the EVL values.
   bool HasNUW = Style == TailFoldingStyle::None;
+  SmallVector<VPValue *> IVOps = {CanonicalIVPHI};
+  if (EVLRecipe)
+    IVOps.push_back(EVLRecipe);
   auto *CanonicalIVIncrement =
       new VPInstruction(HasNUW ? VPInstruction::CanonicalIVIncrementNUW
                                : VPInstruction::CanonicalIVIncrement,
-                        {CanonicalIVPHI}, DL, "index.next");
+                        IVOps, DL, "index.next");
   CanonicalIVPHI->addOperand(CanonicalIVIncrement);
 
   VPBasicBlock *EB = TopRegion->getExitingBasicBlock();
   EB->appendRecipe(CanonicalIVIncrement);
 
+  // If we are working with vector predication instrinsics, add a NextEVL
+  // VPInstruction to calculate the remaining elements number.
+  if (EVLRecipe) {
+    auto *NextEVL =
+        new VPInstruction(VPInstruction::NextEVL,
+                          {EVLRecipe, CanonicalIVIncrement}, DL, "evl.next");
+    EVLRecipe->addOperand(NextEVL);
+    EB->appendRecipe(NextEVL);
+  }
+
   if (Style == TailFoldingStyle::DataAndControlFlow) {
     // Create the active lane mask instruction in the vplan preheader.
     VPBasicBlock *Preheader = Plan.getEntry()->getEntryBasicBlock();
@@ -8866,7 +8901,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
       getDebugLocFromInstOrOperands(Legal->getPrimaryInduction());
   addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(),
                         DLInst ? DLInst->getDebugLoc() : DebugLoc(),
-                        CM.getTailFoldingStyle());
+                        CM.getTailFoldingStyle(), TTI);
 
   // Scan the body of the loop in a topological order to visit each basic block
   // after having visited its predecessor basic blocks.
@@ -9072,7 +9107,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
   Term->eraseFromParent();
 
   addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), DebugLoc(),
-                        CM.getTailFoldingStyle());
+                        CM.getTailFoldingStyle(), TTI);
   return Plan;
 }
 
@@ -9272,24 +9307,27 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
     MulOp = Instruction::FMul;
   }
 
-  // Multiply the vectorization factor by the step using integer or
-  // floating-point arithmetic as appropriate.
-  Type *StepType = Step->getType();
-  Value *RuntimeVF;
-  if (Step->getType()->isFloatingPointTy())
-    RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF);
-  else
-    RuntimeVF = getRuntimeVF(Builder, StepType, State.VF);
-  Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
+  Value *SplatVF = nullptr;
+  if (!getEVL()) {
+    // Multiply the vectorization factor by the step using integer or
+    // floating-point arithmetic as appropriate.
+    Type *StepType = Step->getType();
+    Value *RuntimeVF;
+    if (Step->getType()->isFloatingPointTy())
+      RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF);
+    else
+      RuntimeVF = getRuntimeVF(Builder, StepType, State.VF);
+    Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
 
-  // Create a vector splat to use in the induction update.
-  //
-  // FIXME: If the step is non-constant, we create the vector splat with
-  //        IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
-  //        handle a constant vector splat.
-  Value *SplatVF = isa<Constant>(Mul)
-                       ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul))
-                       : Builder.CreateVectorSplat(State.VF, Mul);
+    // Create a vector splat to use in the induction update.
+    //
+    // FIXME: If the step is non-constant, we create the vector splat with
+    //        IRBuilder. IRBuilder can constant-fold the multiply, but it
+    //        doesn't handle a constant vector splat.
+    SplatVF = isa<Constant>(Mul)
+                  ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul))
+                  : Builder.CreateVectorSplat(State.VF, Mul);
+  }
   Builder.restoreIP(CurrIP);
 
   // We may need to add the step a number of times, depending on the unroll
@@ -9304,8 +9342,26 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
     if (isa<TruncInst>(EntryVal))
       State.addMetadata(LastInduction, EntryVal);
 
-    LastInduction = cast<Instruction>(
-        Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));
+    if (auto *EVLRecipe = getEVL()) {
+      // Ensure the types match.
+      Type *DestTy = LastInduction->getType()->getScalarType();
+      Value *EVL = State.get(EVLRecipe, Part);
+      if (DestTy->isIntegerTy()) {
+        EVL = Builder.CreateZExtOrTrunc(EVL, DestTy);
+      } else {
+        assert(DestTy->isFloatingPointTy());
+        EVL = Builder.CreateUIToFP(EVL, DestTy);
+      }
+      // Multiply the EVL by the step using integer or floating-point
+      // arithmetic as appropriate.
+      Value *Mul = Builder.CreateBinOp(MulOp, Step, EVL);
+      Value *SplatEVL = Builder.CreateVectorSplat(State.VF, Mul);
+      LastInduction = cast<Instruction>(
+          Builder.CreateBinOp(AddOp, LastInduction, SplatEVL, "step.add.vl"));
+    } else {
+      LastInduction = cast<Instruction>(
+          Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));
+    }
     LastInduction->setDebugLoc(EntryVal->getDebugLoc());
   }
 
@@ -9593,9 +9649,15 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
   auto &Builder = State.Builder;
   InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF);
   bool isMaskRequired = getMask();
-  if (isMaskRequired)
+  VPValue *VPEVL = getEVL();
+  if (isMaskRequired) {
     for (unsigned Part = 0; Part < State.UF; ++Part)
       BlockInMaskParts[Part] = State.get(getMask(), Part);
+  } else if (VPEVL) {
+    auto *MaskTy = VectorType::get(Builder.getInt1Ty(), State.VF);
+    for (unsigned Part = 0; Part < State.UF; ++Part)
+      BlockInMaskParts[Part] = ConstantInt::getTrue(MaskTy);
+  }
 
   const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * {
     // Calculate the pointer for the specific unroll-part.
@@ -9633,7 +9695,14 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
         BlockInMaskParts[Part] =
             Builder.CreateVectorReverse(BlockInMaskParts[Part], "reverse");
     } else {
-      Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part);
+      Value *Increment = nullptr;
+      if (VPEVL) {
+        Increment = Builder.getInt32(0); // EVL is always an i32.
+        for (unsigned int P = 0; P < Part; P++)
+          Increment = Builder.CreateAdd(Increment, State.get(VPEVL, P));
+      } else {
+        Increment = createStepForVF(Builder, IndexTy, State.VF, Part);
+      }
       PartPtr = cast<GetElementPtrInst>(
           Builder.CreateGEP(ScalarDataTy, Ptr, Increment));
       PartPtr->setIsInBounds(InBounds);
@@ -9651,10 +9720,19 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
       Instruction *NewSI = nullptr;
       Value *StoredVal = State.get(StoredValue, Part);
       if (CreateGatherScatter) {
-        Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
+        Value *MaskPart =
+            (isMaskRequired || VPEVL) ? BlockInMaskParts[Part] : nullptr;
         Value *VectorGep = State.get(getAddr(), Part);
-        NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
-                                            MaskPart);
+        if (VPEVL) {
+          auto *PtrsTy = cast<VectorType>(VectorGep->getType());
+          Value *Operands[] = {StoredVal, VectorGep, MaskPart,
+                               State.get(VPEVL, Part)};
+          NewSI = Builder.CreateIntrinsic(Intrinsic::vp_scatter,
+                                          {DataTy, PtrsTy}, Operands);
+        } else {
+          NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
+                                              MaskPart);
+        }
       } else {
         if (Reverse) {
           // If we store to reverse consecutive memory locations, then we need
@@ -9665,11 +9743,17 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
         }
         auto *VecPtr =
             CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0)));
-        if (isMaskRequired)
+        if (VPEVL) {
+          Value *Operands[] = {StoredVal, VecPtr, BlockInMaskParts[Part],
+                               State.get(VPEVL, Part)};
+          NewSI = Builder.CreateIntrinsic(
+              Intrinsic::vp_store, {DataTy, VecPtr->getType()}, Operands);
+        } else if (isMaskRequired) {
           NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
                                             BlockInMaskParts[Part]);
-        else
+        } else {
           NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
+        }
       }
       State.addMetadata(NewSI, SI);
     }
@@ -9682,21 +9766,37 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
   for (unsigned Part = 0; Part < State.UF; ++Part) {
     Value *NewLI;
     if (CreateGatherScatter) {
-      Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
+      Value *MaskPart =
+          (isMaskRequired || VPEVL) ? BlockInMaskParts[Part] : nullptr;
       Value *VectorGep = State.get(getAddr(), Part);
-      NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart,
-                                         nullptr, "wide.masked.gather");
+      if (VPEVL) {
+        auto *PtrsTy = cast<VectorType>(VectorGep->getType());
+        Value *Operands[] = {VectorGep, MaskPart, State.get(VPEVL, Part)};
+        NewLI = Builder.CreateIntrinsic(Intrinsic::vp_gather, {DataTy, PtrsTy},
+                                        Operands, nullptr, "vp.gather");
+      } else {
+        NewLI =
+            Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart,
+                                       nullptr, "wide.masked.gather");
+      }
       State.addMetadata(NewLI, LI);
     } else {
       auto *VecPtr =
           CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0)));
-      if (isMaskRequired)
+      if (VPEVL) {
+        Value *Operands[] = {VecPtr, BlockInMaskParts[Part],
+                             State.get(VPEVL, Part)};
+        NewLI = Builder.CreateIntrinsic(Intrinsic::vp_load,
+                                        {DataTy, VecPtr->getType()}, Operands,
+                                        nullptr, "vp.load");
+      } else if (isMaskRequired) {
         NewLI = Builder.CreateMaskedLoad(
             DataTy, VecPtr, Alignment, BlockInMaskParts[Part],
             PoisonValue::get(DataTy), "wide.masked.load");
-      else
+      } else {
         NewLI =
             Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment, "wide.load");
+      }
 
       // Add metadata to the load, but setVectorValue to the reverse shuffle.
       State.addMetadata(NewLI, LI);
@@ -10530,6 +10630,11 @@ LoopVectorizeResult LoopVectorizePass::runImpl(
 
 PreservedAnalyses LoopVectorizePass::run(Function &F,
                                          FunctionAnalysisManager &AM) {
+    assert((!UseVectorPredicationIntrinsics ||
+            PreferPredicateOverEpilogue ==
+                PreferPredicateTy::PredicateOrDontVectorize) &&
+           "Tail folding required when using VP intrinsics.");
+
     auto &LI = AM.getResult<LoopAnalysis>(F);
     // There are no loops in the function. Return before computing other expensive
     // analyses.
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index d554f438c8040d572239c52196300a4d8f3b7f6f..81e8b52ebb1ff70910360264d3c651ab2c37a05d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -596,6 +596,16 @@ VPlan::~VPlan() {
     delete P.second;
 }
 
+VPEVLPHIRecipe *VPlan::getEVLPhi() {
+  VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
+  for (VPRecipeBase &R : Header->phis()) {
+    if (isa<VPEVLPHIRecipe>(&R))
+      return cast<VPEVLPHIRecipe>(&R);
+  }
+  
+  return nullptr;
+}
+
 VPActiveLaneMaskPHIRecipe *VPlan::getActiveLaneMaskPhi() {
   VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
   for (VPRecipeBase &R : Header->phis()) {
@@ -711,6 +721,13 @@ void VPlan::execute(VPTransformState *State) {
     }
 
     auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
+    if (auto *EVLPhi = dyn_cast<VPEVLPHIRecipe>(PhiR)) {
+      PHINode *Phi = EVLPhi->getPhi();
+      Phi->addIncoming(State->get(EVLPhi->getBackedgeValue(), State->UF - 1),
+                       VectorLatchBB);
+      continue;
+    }
+
     // For  canonical IV, first-order recurrences and in-order reduction phis,
     // only a single part is generated, which provides the last part from the
     // previous iteration. For non-ordered reductions all UF parts are
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 986faaf9966426cd5aaaadd22ea24321b469d890..99091246dcda7b9399805792c17c2a2fd554c272 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -719,10 +719,10 @@ public:
   /// Returns the underlying instruction, if the recipe is a VPValue or nullptr
   /// otherwise.
   Instruction *getUnderlyingInstr() {
-    return cast<Instruction>(getVPSingleValue()->getUnderlyingValue());
+    return cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
   }
   const Instruction *getUnderlyingInstr() const {
-    return cast<Instruction>(getVPSingleValue()->getUnderlyingValue());
+    return cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
   }
 
   /// Method to support type inquiry through isa, cast, and dyn_cast.
@@ -797,7 +797,8 @@ public:
     CanonicalIVIncrementForPart,
     CanonicalIVIncrementForPartNUW,
     BranchOnCount,
-    BranchOnCond
+    BranchOnCond,
+    NextEVL
   };
 
 private:
@@ -1022,20 +1023,30 @@ class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue {
   const InductionDescriptor &IndDesc;
   bool NeedsVectorIV;
 
+  void addEVL(VPValue *EVLRecipe) {
+    if (EVLRecipe)
+      addOperand(EVLRecipe);
+  }
+
 public:
   VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
                                 const InductionDescriptor &IndDesc,
-                                bool NeedsVectorIV)
+                                bool NeedsVectorIV, VPValue *EVLRecipe)
       : VPRecipeBase(VPDef::VPWidenIntOrFpInductionSC, {Start, Step}),
         VPValue(this, IV), IV(IV), IndDesc(IndDesc),
-        NeedsVectorIV(NeedsVectorIV) {}
+        NeedsVectorIV(NeedsVectorIV) {
+    addEVL(EVLRecipe);
+  }
 
   VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
                                 const InductionDescriptor &IndDesc,
-                                TruncInst *Trunc, bool NeedsVectorIV)
+                                TruncInst *Trunc, bool NeedsVectorIV,
+                                VPValue *EVLRecipe)
       : VPRecipeBase(VPDef::VPWidenIntOrFpInductionSC, {Start, Step}),
         VPValue(this, Trunc), IV(IV), IndDesc(IndDesc),
-        NeedsVectorIV(NeedsVectorIV) {}
+        NeedsVectorIV(NeedsVectorIV) {
+    addEVL(EVLRecipe);
+  }
 
   ~VPWidenIntOrFpInductionRecipe() override = default;
 
@@ -1059,6 +1070,12 @@ public:
   VPValue *getStepValue() { return getOperand(1); }
   const VPValue *getStepValue() const { return getOperand(1); }
 
+  /// Return the EVL value of the current loop iteration.
+  VPValue *getEVL() { return getNumOperands() == 3 ? getOperand(2) : nullptr; }
+  const VPValue *getEVL() const {
+    return getNumOperands() == 3 ? getOperand(2) : nullptr;
+  }
+
   /// Returns the first defined value as TruncInst, if it is one or nullptr
   /// otherwise.
   TruncInst *getTruncInst() {
@@ -1629,8 +1646,8 @@ public:
 
 /// A Recipe for widening load/store operations.
 /// The recipe uses the following VPValues:
-/// - For load: Address, optional mask
-/// - For store: Address, stored value, optional mask
+/// - For load: Address, optional mask, optional evl
+/// - For store: Address, stored value, optional mask, optional evl
 /// TODO: We currently execute only per-part unless a specific instance is
 /// provided.
 class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
@@ -1642,33 +1659,41 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
   // Whether the consecutive loaded/stored addresses are in reverse order.
   bool Reverse;
 
-  void setMask(VPValue *Mask) {
-    if (!Mask)
-      return;
-    addOperand(Mask);
-  }
+  // Whether the instruction has a not all-ones mask.
+  bool Masked = false;
+
+  // Whether a vector length is available to the instruction.
+  bool HasVL = false;
+
+  void setMaskAndEVL(VPValue *Mask, VPValue *VPEVL) {
+    if (Mask) {
+      this->Masked = true;
+      addOperand(Mask);
+    }
 
-  bool isMasked() const {
-    return isStore() ? getNumOperands() == 3 : getNumOperands() == 2;
+    if (VPEVL) {
+      this->HasVL = true;
+      addOperand(VPEVL);
+    }
   }
 
 public:
   VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
-                                 bool Consecutive, bool Reverse)
+                                 VPValue *EVL, bool Consecutive, bool Reverse)
       : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr}),
         Ingredient(Load), Consecutive(Consecutive), Reverse(Reverse) {
     assert((Consecutive || !Reverse) && "Reverse implies consecutive");
     new VPValue(this, &Load);
-    setMask(Mask);
+    setMaskAndEVL(Mask, EVL);
   }
 
   VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr,
                                  VPValue *StoredValue, VPValue *Mask,
-                                 bool Consecutive, bool Reverse)
+                                 VPValue *EVL, bool Consecutive, bool Reverse)
       : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr, StoredValue}),
         Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) {
     assert((Consecutive || !Reverse) && "Reverse implies consecutive");
-    setMask(Mask);
+    setMaskAndEVL(Mask, EVL);
   }
 
   VP_CLASSOF_IMPL(VPDef::VPWidenMemoryInstructionSC)
@@ -1681,8 +1706,15 @@ public:
   /// Return the mask used by this recipe. Note that a full mask is represented
   /// by a nullptr.
   VPValue *getMask() const {
-    // Mask is optional and therefore the last operand.
-    return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
+    return Masked ? (HasVL ? getOperand(getNumOperands() - 2)
+                           : getOperand(getNumOperands() - 1))
+                  : nullptr;
+  }
+
+  /// Return the evl used by this recipe. If we are working with full-length
+  /// vectors, return nullptr.
+  VPValue *getEVL() const {
+    return HasVL ? getOperand(getNumOperands() - 1) : nullptr;
   }
 
   /// Returns true if this recipe is a store.
@@ -1826,6 +1858,33 @@ public:
 #endif
 };
 
+class VPEVLPHIRecipe : public VPHeaderPHIRecipe {
+  const TargetTransformInfo *TTI;
+  PHINode *Phi = nullptr;
+
+public:
+  VPEVLPHIRecipe(VPValue *StartEVL, const TargetTransformInfo *TTI)
+      : VPHeaderPHIRecipe(VPDef::VPWidenEVLSC, nullptr, StartEVL), TTI(TTI) {}
+
+  ~VPEVLPHIRecipe() override = default;
+
+  VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC)
+
+  PHINode *getPhi() const { return Phi; }
+
+  static inline bool classof(const VPHeaderPHIRecipe *D) {
+    return D->getVPDefID() == VPDef::VPWidenEVLSC;
+  }
+
+  void execute(VPTransformState &State) override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  /// Print the recipe.
+  void print(raw_ostream &O, const Twine &Indent,
+             VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
 /// A Recipe for widening the canonical induction variable of the vector loop.
 class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue {
 public:
@@ -2367,6 +2426,10 @@ public:
     return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
   }
 
+  /// Find and return the VPEVLPHIRecipe from the header - there should be only
+  /// one at most. If there isn't one, then return nullptr.
+  VPEVLPHIRecipe *getEVLPhi();
+
   /// Find and return the VPActiveLaneMaskPHIRecipe from the header - there
   /// be only one at most. If there isn't one, then return nullptr.
   VPActiveLaneMaskPHIRecipe *getActiveLaneMaskPhi();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index ff0b1df57ce4a4ab8998d1c080b0519fb930b4d9..1b69ac5d3d71a621b059a308cd36605de4773e6d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -35,6 +35,7 @@ using namespace llvm;
 using VectorParts = SmallVector<Value *, 2>;
 
 extern cl::opt<bool> EnableVPlanNativePath;
+extern cl::opt<bool> UseVectorPredicationIntrinsics;
 
 #define LV_NAME "loop-vectorize"
 #define DEBUG_TYPE LV_NAME
@@ -235,6 +236,15 @@ void VPInstruction::generateInstruction(VPTransformState &State,
     break;
   }
   case VPInstruction::ActiveLaneMask: {
+    if (UseVectorPredicationIntrinsics) {
+      State.set(this,
+                ConstantInt::getTrue(
+                    VectorType::get(State.Builder.getInt1Ty(), State.VF)),
+                Part);
+
+      break;
+    }
+
     // Get first lane of vector induction variable.
     Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
     // Get the original loop tripcount.
@@ -279,10 +289,21 @@ void VPInstruction::generateInstruction(VPTransformState &State,
     if (Part == 0) {
       bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW;
       auto *Phi = State.get(getOperand(0), 0);
-      // The loop step is equal to the vectorization factor (num of SIMD
-      // elements) times the unroll factor (num of SIMD instructions).
-      Value *Step =
-          createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
+      Value *Step = nullptr;
+      if (getNumOperands() == 2) {
+        // We have the EVL value available to use.
+        VPValue *VPEVL = getOperand(1);
+        Step = State.get(VPEVL, 0);
+        for (unsigned P = 1; P < State.UF; P++)
+          Step = Builder.CreateAdd(Step, State.get(VPEVL, P));
+
+        Step = Builder.CreateZExtOrTrunc(Step, Phi->getType());
+      } else {
+        // The loop step is equal to the vectorization factor (num of SIMD
+        // elements) times the unroll factor (num of SIMD instructions).
+        Step = createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
+      }
+
       Next = Builder.CreateAdd(Phi, Step, Name, IsNUW, false);
     } else {
       Next = State.get(this, 0);
@@ -353,6 +374,21 @@ void VPInstruction::generateInstruction(VPTransformState &State,
     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
     break;
   }
+  case VPInstruction::NextEVL: {
+    Value *Next = nullptr;
+    if (Part == 0) {
+      auto *EVLRecipe = cast<VPEVLPHIRecipe>(getOperand(0));
+      Value *StartEVL = State.get(EVLRecipe->getOperand(0), 0);
+      Value *IVIncrement = State.get(getOperand(1), 0);
+
+      Next = Builder.CreateSub(StartEVL, IVIncrement, "evl.next");
+    } else {
+      Next = State.get(this, 0);
+    }
+
+    State.set(this, Next, Part);
+    break;
+  }
   default:
     llvm_unreachable("Unsupported opcode for instruction");
   }
@@ -719,6 +755,9 @@ void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
 #endif
 
 bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
+  if (getEVL())
+    return false;
+
   auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
   auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep());
   return StartC && StartC->isZero() && StepC && StepC->isOne();
@@ -1329,3 +1368,30 @@ void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent,
   printOperands(O, SlotTracker);
 }
 #endif
+
+void VPEVLPHIRecipe::execute(VPTransformState &State) {
+  Value *StartEVL = State.get(getOperand(0), 0);
+  BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
+  this->Phi = State.Builder.CreatePHI(StartEVL->getType(), 2, "evl.phi");
+  this->Phi->addIncoming(StartEVL, VectorPH);
+
+  Value *PrevEVL = State.Builder.CreateZExtOrTrunc(
+      cast<Value>(this->Phi), State.Builder.getInt32Ty(), "evl.phi.cast");
+  Value *EVL = nullptr;
+  for (unsigned Part = 0; Part < State.UF; Part++) {
+    if (EVL)
+      PrevEVL = State.Builder.CreateSub(PrevEVL, EVL);
+    EVL = TTI->computeVectorLength(State.Builder, PrevEVL, State.VF);
+    State.set(this, EVL, Part);
+  }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPEVLPHIRecipe::print(raw_ostream &O, const Twine &Indent,
+                             VPSlotTracker &SlotTracker) const {
+  O << Indent << "EVL-PHI ";
+  printAsOperand(O, SlotTracker);
+  O << " = phi ";
+  printOperands(O, SlotTracker);
+}
+#endif
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 1cfba64f1fbefa957bcfa381986e137f971eb225..5070aa9a8dff157a4f40ac9b885d3e83ad7cd836 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -55,8 +55,8 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
           VPValue *Start = Plan->getOrAddVPValue(II->getStartValue());
           VPValue *Step =
               vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE);
-          NewRecipe =
-              new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, *II, true);
+          NewRecipe = new VPWidenIntOrFpInductionRecipe(
+              Phi, Start, Step, *II, true, Plan->getEVLPhi());
         } else {
           Plan->addVPValue(Phi, VPPhi);
           continue;
@@ -69,12 +69,13 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
         if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
           NewRecipe = new VPWidenMemoryInstructionRecipe(
               *Load, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)),
-              nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/);
+              nullptr /*Mask*/, nullptr /*EVL*/, false /*Consecutive*/,
+              false /*Reverse*/);
         } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
           NewRecipe = new VPWidenMemoryInstructionRecipe(
               *Store, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)),
               Plan->getOrAddVPValue(Store->getValueOperand()), nullptr /*Mask*/,
-              false /*Consecutive*/, false /*Reverse*/);
+              nullptr /*EVL*/, false /*Consecutive*/, false /*Reverse*/);
         } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
           NewRecipe = new VPWidenGEPRecipe(
               GEP, Plan->mapToVPValues(GEP->operands()), OrigLoop);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 62ec65cbfe5dd6241c0c3d1f26cd2da86d0cf531..994a677a5dba663a3531bcd9e93eb0ccb7b30276 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -354,6 +354,7 @@ public:
     VPCanonicalIVPHISC,
     VPActiveLaneMaskPHISC,
     VPFirstOrderRecurrencePHISC,
+    VPWidenEVLSC,
     VPWidenPHISC,
     VPWidenIntOrFpInductionSC,
     VPWidenPointerInductionSC,
diff --git a/llvm/lib/Transforms/Vectorize/VectorPredication.cpp b/llvm/lib/Transforms/Vectorize/VectorPredication.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cc6137a134d5ca291e31196ab0438d09c205a05e
--- /dev/null
+++ b/llvm/lib/Transforms/Vectorize/VectorPredication.cpp
@@ -0,0 +1,277 @@
+#include "llvm/Transforms/Vectorize/VectorPredication.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/VectorBuilder.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+#define DEBUG_TYPE "vector-predication"
+STATISTIC(Transforms, "Number of full-length -> evl vector transformation.");
+
+using namespace llvm;
+
+// Map each instruction to its uses and save all memory writing vector
+// predicated instructions found in the basic block.
+void VectorPredicationPass::analyseBasicBlock(BasicBlock &BB,
+                                              BlockData &BBInfo) {
+  for (Instruction &I : BB) {
+    if (isa<PHINode>(I) || I.isTerminator())
+      continue;
+
+    SmallPtrSet<Instruction *, 4> IUsers;
+    for (User *IU : I.users()) {
+      assert(isa<Instruction>(IU) && "Unexpected behaviour.");
+      auto *IUInst = cast<Instruction>(IU);
+      if (IUInst->getParent() != I.getParent())
+        continue;
+      if (isa<PHINode>(IUInst) || IUInst->isTerminator())
+        continue;
+
+      IUsers.insert(IUInst);
+    }
+    BBInfo.TopologicalGraph.insert({&I, IUsers});
+
+    if (auto *CI = dyn_cast<CallInst>(&I)) {
+      if (auto *CF = CI->getCalledFunction()) {
+        Intrinsic::ID ID = CF->getIntrinsicID();
+        if (ID == Intrinsic::vp_store || ID == Intrinsic::vp_scatter) {
+          BBInfo.MemoryWritingVPInstructions.push_back(&I);
+        }
+      }
+    }
+  }
+}
+
+static void findCandidateVectorOperation(BasicBlock &BB, Value *Op, Value *Mask,
+                                         Value *EVL,
+                                         InstToMaskEVLMap &VecOpsToTransform) {
+  auto *OpInst = dyn_cast<Instruction>(Op);
+  if (!OpInst)
+    return;
+
+  if (OpInst->getParent() != &BB)
+    return;
+
+  Intrinsic::ID VPID = VPIntrinsic::getForOpcode(OpInst->getOpcode());
+  if (VPID == Intrinsic::not_intrinsic)
+    return;
+
+  // If the instruction is already present in the map, it means it was already
+  // visited starting from a previous memory wrtiting vp operation.
+  if (!VecOpsToTransform
+           .insert(std::make_pair(OpInst, std::make_pair(Mask, EVL)))
+           .second) {
+    // We need to check if new mask and evl values differ from the old ones:
+    // - if they are the same, then there is nothing to do;
+    // - if only the mask differ, we use an allones mask;
+    // - otherwise, we remove the instruction from the map (i.e., no
+    //   transformation should happen)
+    auto It = VecOpsToTransform.find(OpInst);
+    assert(It != VecOpsToTransform.end());
+    Value *OldMask, *OldEVL;
+    std::tie(OldMask, OldEVL) = It->second;
+
+    if (Mask == OldMask && EVL == OldEVL)
+      return;
+
+    VecOpsToTransform.erase(OpInst);
+    if (EVL == OldEVL) {
+      VecOpsToTransform.insert(
+          std::make_pair(OpInst, std::make_pair(nullptr, EVL)));
+    }
+  }
+
+  // Recursively visit OpInst operands.
+  switch (VPID) {
+  default:
+    for (auto *OpVal : OpInst->operand_values())
+      findCandidateVectorOperation(BB, OpVal, Mask, EVL, VecOpsToTransform);
+    break;
+  case Intrinsic::vp_select: {
+    Value *Cond = OpInst->getOperand(0);
+    if (Cond->getType()->isVectorTy())
+      findCandidateVectorOperation(BB, Cond, nullptr, EVL, VecOpsToTransform);
+
+    // TODO: if the condition argument is a vector, we could backpropagate it
+    // as mask for the true branch and its negation as mask for the false one.
+    // WARNING: when creating the negation of the condition, we must ensure it
+    // dominates all uses.
+    findCandidateVectorOperation(BB, OpInst->getOperand(1), nullptr, EVL,
+                                 VecOpsToTransform);
+    findCandidateVectorOperation(BB, OpInst->getOperand(2), nullptr, EVL,
+                                 VecOpsToTransform);
+    break;
+  }
+  }
+}
+
+// For each vector predicated memory writing operation of the basic block, go
+// back to the stored vector defining instruction and verify it is a vector
+// operation. Add it to the list of instructions to be transformed into vector
+// predicated ones, then recursively repeat the process for its vector
+// arguments.
+void VectorPredicationPass::findCandidateVectorOperations(BasicBlock &BB,
+                                                          BlockData &BBInfo) {
+  if (BBInfo.MemoryWritingVPInstructions.empty())
+    return;
+
+  for (Instruction *I : BBInfo.MemoryWritingVPInstructions) {
+    assert(I->getParent() == &BB && "This is not the right basic block");
+    auto *VPI = cast<VPIntrinsic>(I);
+    Value *StoredOperand = VPI->getMemoryDataParam();
+    Value *MaskOperand = VPI->getMaskParam();
+    Value *EVLOperand = VPI->getVectorLengthParam();
+    // First, visit the mask operand (assigning an allones mask to this branch)
+    // and only then visit the stored operand.
+    findCandidateVectorOperation(BB, MaskOperand, nullptr, EVLOperand,
+                                 BBInfo.VecOpsToTransform);
+    findCandidateVectorOperation(BB, StoredOperand, MaskOperand, EVLOperand,
+                                 BBInfo.VecOpsToTransform);
+  }
+}
+
+// Add the candidates as users of the mask and evl linked to each of them.
+void VectorPredicationPass::addNewUsersToMasksAndEVLs(BasicBlock &BB,
+                                                      BlockData &BBInfo) {
+  if (BBInfo.VecOpsToTransform.empty())
+    return;
+
+  for (auto [K, V] : BBInfo.VecOpsToTransform) {
+    if (auto *MaskInst = dyn_cast_if_present<Instruction>(V.first))
+      BBInfo.TopologicalGraph[MaskInst].insert(K);
+    if (auto *EVLInst = dyn_cast<Instruction>(V.second))
+      BBInfo.TopologicalGraph[EVLInst].insert(K);
+  }
+}
+
+// Topologically sort, preserving as much as possible the original order.
+void VectorPredicationPass::buildNewBasicBlockSchedule(BasicBlock &BB,
+                                                       BlockData &BBInfo) {
+  if (BBInfo.VecOpsToTransform.empty())
+    return;
+
+  while (!BBInfo.TopologicalGraph.empty()) {
+    Instruction *Inst = nullptr;
+    for (auto B = BBInfo.TopologicalGraph.rbegin(),
+              E = BBInfo.TopologicalGraph.rend();
+         B != E; B++) {
+      if (B->second.empty()) {
+        Inst = B->first;
+        break;
+      }
+    }
+    assert(Inst && "Failed to empty topological graph!");
+
+    BBInfo.NewBBReverseOrder.push_back(Inst);
+    BBInfo.TopologicalGraph.erase(Inst);
+
+    for (auto B = BBInfo.TopologicalGraph.begin(),
+              E = BBInfo.TopologicalGraph.end();
+         B != E; B++) {
+      B->second.erase(Inst);
+    }
+  }
+}
+
+// Modify the basic block based on the topological order generated.
+void VectorPredicationPass::emitNewBasicBlockSchedule(BasicBlock &BB,
+                                                      BlockData &BBInfo) {
+  if (BBInfo.VecOpsToTransform.empty())
+    return;
+
+  Instruction *InsertPoint = BB.getTerminator();
+  for (Instruction *I : BBInfo.NewBBReverseOrder) {
+    I->moveBefore(InsertPoint);
+    InsertPoint = I;
+  }
+}
+
+// Transform candidates to vector predicated instructions.
+void VectorPredicationPass::transformCandidateVectorOperations(
+    BasicBlock &BB, BlockData &BBInfo) {
+  if (BBInfo.VecOpsToTransform.empty())
+    return;
+
+  for (auto [I, P] : BBInfo.VecOpsToTransform) {
+    Value *Mask, *EVL;
+    std::tie(Mask, EVL) = P;
+
+    IRBuilder<> Builder(I);
+    unsigned int Opcode = I->getOpcode();
+    Type *RetTy = I->getType();
+    SmallVector<Value *> Operands(I->value_op_begin(), I->value_op_end());
+    switch (Opcode) {
+    case Instruction::FCmp:
+    case Instruction::ICmp: {
+      Operands.clear();
+      auto *CmpI = cast<CmpInst>(I);
+      Value *PredOp = MetadataAsValue::get(
+          Builder.getContext(),
+          MDString::get(Builder.getContext(),
+                        CmpInst::getPredicateName(CmpI->getPredicate())));
+      Operands = {CmpI->getOperand(0), CmpI->getOperand(1), PredOp};
+      break;
+    }
+    case Instruction::Select: {
+      if (!I->getOperand(0)->getType()->isVectorTy()) {
+        Operands.clear();
+        Value *Op1 = I->getOperand(1);
+        Value *Op2 = I->getOperand(2);
+        Value *Cond = Builder.CreateVectorSplat(
+            cast<VectorType>(Op1->getType())->getElementCount(),
+            I->getOperand(0), "select.cond.splat");
+        Operands = {Cond, Op1, Op2};
+      }
+      break;
+    }
+    default:
+      break;
+    }
+
+    if (!Mask)
+      // nullptr means unmasked operation, hence we use an all-ones mask.
+      Mask = ConstantInt::getTrue(RetTy->getWithNewType(Builder.getInt1Ty()));
+
+    VectorBuilder VecBuilder(Builder);
+    VecBuilder.setMask(Mask).setEVL(EVL);
+    Value *NewVPOp =
+        VecBuilder.createVectorInstruction(Opcode, RetTy, Operands, "vp.op");
+
+    Transforms++; // Stats
+    OldInstructionsToRemove.insert(std::make_pair(I, NewVPOp));
+  }
+}
+
+// Remove old instructions, if possible.
+void VectorPredicationPass::removeOldInstructions() {
+  for (auto [I, NewVPOp] : OldInstructionsToRemove) {
+    I->replaceAllUsesWith(NewVPOp);
+    if (isInstructionTriviallyDead(I))
+      I->eraseFromParent();
+  }
+}
+
+PreservedAnalyses VectorPredicationPass::run(Function &F,
+                                             FunctionAnalysisManager &AM) {
+  assert(OldInstructionsToRemove.empty() &&
+         "Map should be cleared at the end of each run of the pass.");
+
+  for (BasicBlock &BB : F) {
+    BlockData BBInfo;
+
+    analyseBasicBlock(BB, BBInfo);
+    findCandidateVectorOperations(BB, BBInfo);
+    addNewUsersToMasksAndEVLs(BB, BBInfo);
+    buildNewBasicBlockSchedule(BB, BBInfo);
+    emitNewBasicBlockSchedule(BB, BBInfo);
+    transformCandidateVectorOperations(BB, BBInfo);
+  }
+
+  removeOldInstructions();
+  OldInstructionsToRemove.clear();
+
+  // TODO: think about which analysis are preserved.
+  return PreservedAnalyses::none();
+}
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vp_intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vp_intrinsics.ll
new file mode 100644
index 0000000000000000000000000000000000000000..ae636428f935698595b718e689cd34591d705df2
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vp_intrinsics.ll
@@ -0,0 +1,134 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S --passes=loop-vectorize -use-vp-intrinsics -prefer-predicate-over-epilogue=predicate-dont-vectorize -o - < %s | FileCheck %s
+
+; ModuleID = 'custom/simple.c'
+source_filename = "custom/simple.c"
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+target triple = "riscv64-unknown-unknown"
+
+; Input C code:
+; void addVec(long N, double *C, double *A, double *B) {
+;   long I;
+;   for (I = 0; I < N; I++)
+;     C[I] = A[I] + B[I];
+; }
+
+; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) vscale_range(2,1024)
+define dso_local void @addVec(i64 noundef %N, ptr nocapture noundef writeonly %C, ptr nocapture noundef readonly %A, ptr nocapture noundef readonly %B) local_unnamed_addr #0 {
+; CHECK-LABEL: @addVec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B3:%.*]] = ptrtoint ptr [[B:%.*]] to i64
+; CHECK-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A:%.*]] to i64
+; CHECK-NEXT:    [[C1:%.*]] = ptrtoint ptr [[C:%.*]] to i64
+; CHECK-NEXT:    [[CMP7:%.*]] = icmp sgt i64 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 10, i64 [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK:       vector.memcheck:
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 8
+; CHECK-NEXT:    [[TMP6:%.*]] = sub i64 [[C1]], [[A2]]
+; CHECK-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP4]], 8
+; CHECK-NEXT:    [[TMP8:%.*]] = sub i64 [[C1]], [[B3]]
+; CHECK-NEXT:    [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]]
+; CHECK-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
+; CHECK-NEXT:    br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[EVL_PHI:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[EVL_PHI_CAST:%.*]] = trunc i64 [[EVL_PHI]] to i32
+; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[EVL_PHI_CAST]] to i64
+; CHECK-NEXT:    [[VL:%.*]] = call i64 @llvm.riscv.vsetvli.i64(i64 [[TMP9]], i64 3, i64 0)
+; CHECK-NEXT:    [[TMP10:%.*]] = trunc i64 [[VL]] to i32
+; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[INDEX]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP12:%.*]] = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
+; CHECK-NEXT:    [[TMP13:%.*]] = add <vscale x 1 x i64> zeroinitializer, [[TMP12]]
+; CHECK-NEXT:    [[VEC_IV:%.*]] = add <vscale x 1 x i64> [[BROADCAST_SPLAT]], [[TMP13]]
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 0
+; CHECK-NEXT:    [[VP_LOAD:%.*]] = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr [[TMP15]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP10]]), !tbaa [[TBAA4:![0-9]+]]
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 0
+; CHECK-NEXT:    [[VP_LOAD5:%.*]] = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr [[TMP17]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP10]]), !tbaa [[TBAA4]]
+; CHECK-NEXT:    [[TMP18:%.*]] = fadd <vscale x 1 x double> [[VP_LOAD]], [[VP_LOAD5]]
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds double, ptr [[C]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds double, ptr [[TMP19]], i32 0
+; CHECK-NEXT:    call void @llvm.vp.store.nxv1f64.p0(<vscale x 1 x double> [[TMP18]], ptr [[TMP20]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP10]]), !tbaa [[TBAA4]]
+; CHECK-NEXT:    [[TMP21:%.*]] = zext i32 [[TMP10]] to i64
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP21]]
+; CHECK-NEXT:    [[EVL_NEXT]] = sub i64 [[N]], [[INDEX_NEXT]]
+; CHECK-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 true, label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_08:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_08]]
+; CHECK-NEXT:    [[TMP23:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA4]]
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[I_08]]
+; CHECK-NEXT:    [[TMP24:%.*]] = load double, ptr [[ARRAYIDX1]], align 8, !tbaa [[TBAA4]]
+; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[TMP23]], [[TMP24]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[C]], i64 [[I_08]]
+; CHECK-NEXT:    store double [[ADD]], ptr [[ARRAYIDX2]], align 8, !tbaa [[TBAA4]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_08]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp7 = icmp sgt i64 %N, 0
+  br i1 %cmp7, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %I.08 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds double, ptr %A, i64 %I.08
+  %0 = load double, ptr %arrayidx, align 8, !tbaa !4
+  %arrayidx1 = getelementptr inbounds double, ptr %B, i64 %I.08
+  %1 = load double, ptr %arrayidx1, align 8, !tbaa !4
+  %add = fadd double %0, %1
+  %arrayidx2 = getelementptr inbounds double, ptr %C, i64 %I.08
+  store double %add, ptr %arrayidx2, align 8, !tbaa !4
+  %inc = add nuw nsw i64 %I.08, 1
+  %exitcond.not = icmp eq i64 %inc, %N
+  br i1 %exitcond.not, label %for.end.loopexit, label %for.body, !llvm.loop !8
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}
+
+attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) vscale_range(2,1024) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv64" "target-features"="+64bit,+a,+c,+d,+f,+m,+relax,+v,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-e,-experimental-zawrs,-experimental-zca,-experimental-zcb,-experimental-zcd,-experimental-zcf,-experimental-zihintntl,-experimental-ztso,-experimental-zvfh,-h,-save-restore,-svinval,-svnapot,-svpbmt,-xtheadba,-xtheadvdot,-xventanacondops,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zdinx,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zicbom,-zicbop,-zicboz,-zihintpause,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-zmmul,-zvl1024b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl4096b,-zvl512b,-zvl65536b,-zvl8192b" }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"target-abi", !"lp64d"}
+!2 = !{i32 8, !"SmallDataLimit", i32 8}
+!3 = !{!"clang version 17.0.0"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"double", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C/C++ TBAA"}
+!8 = distinct !{!8, !9}
+!9 = !{!"llvm.loop.mustprogress"}
diff --git a/llvm/test/Transforms/VectorPredication/if-elif-else.ll b/llvm/test/Transforms/VectorPredication/if-elif-else.ll
new file mode 100644
index 0000000000000000000000000000000000000000..761d3bfe9d0bf0763715a427b092bdfc5bb56d30
--- /dev/null
+++ b/llvm/test/Transforms/VectorPredication/if-elif-else.ll
@@ -0,0 +1,270 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S --passes=vector-predication -o - < %s | FileCheck %s
+
+; ModuleID = 'custom/if-elif-else.c'
+source_filename = "custom/if-elif-else.c"
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+target triple = "riscv64-unknown-unknown"
+
+; Input C code:
+; void addVec(long N, double *C, double *A, double *B) {
+;   long I;
+;   for (I = 0; I < N; I++) {
+;     if (N < 50)
+;       C[I] = A[I] + B[I];
+;     else if (N > 75)
+;       C[I] = A[I] * B[I];
+;     else
+;       C[I] = 2 * A[I];
+;   }
+; }
+
+; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) vscale_range(2,1024)
+define dso_local void @addVec(i64 noundef %N, ptr nocapture noundef writeonly %C, ptr nocapture noundef readonly %A, ptr nocapture noundef readonly %B) local_unnamed_addr #0 {
+; CHECK-LABEL: @addVec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP30:%.*]] = icmp sgt i64 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP30]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body.lr.ph:
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i64 [[N]], 50
+; CHECK-NEXT:    [[CMP4:%.*]] = icmp ugt i64 [[N]], 75
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i64 [[N]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 10)
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ugt i64 [[TMP2]], [[TMP0]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       vector.memcheck:
+; CHECK-NEXT:    [[TMP4:%.*]] = shl i64 [[N]], 3
+; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[TMP4]]
+; CHECK-NEXT:    [[UGLYGEP32:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP4]]
+; CHECK-NEXT:    [[UGLYGEP33:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[TMP4]]
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ugt ptr [[UGLYGEP32]], [[C]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ugt ptr [[UGLYGEP]], [[A]]
+; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT:    [[BOUND034:%.*]] = icmp ugt ptr [[UGLYGEP33]], [[C]]
+; CHECK-NEXT:    [[BOUND135:%.*]] = icmp ugt ptr [[UGLYGEP]], [[B]]
+; CHECK-NEXT:    [[FOUND_CONFLICT36:%.*]] = and i1 [[BOUND034]], [[BOUND135]]
+; CHECK-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT36]]
+; CHECK-NEXT:    br i1 [[CONFLICT_RDX]], label [[FOR_BODY_PREHEADER]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT37:%.*]] = insertelement <vscale x 1 x i1> poison, i1 [[CMP1]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT38:%.*]] = shufflevector <vscale x 1 x i1> [[BROADCAST_SPLATINSERT37]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT39:%.*]] = insertelement <vscale x 1 x i1> poison, i1 [[CMP4]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT40:%.*]] = shufflevector <vscale x 1 x i1> [[BROADCAST_SPLATINSERT39]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = xor <vscale x 1 x i1> [[BROADCAST_SPLAT38]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP6:%.*]] = select <vscale x 1 x i1> [[TMP5]], <vscale x 1 x i1> [[BROADCAST_SPLAT40]], <vscale x 1 x i1> zeroinitializer
+; CHECK-NEXT:    [[TMP7:%.*]] = select <vscale x 1 x i1> [[BROADCAST_SPLAT38]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[BROADCAST_SPLAT40]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[EVL_PHI:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP8:%.*]] = and i64 [[EVL_PHI]], 4294967295
+; CHECK-NEXT:    [[VL:%.*]] = call i64 @llvm.riscv.vsetvli.i64(i64 [[TMP8]], i64 3, i64 0)
+; CHECK-NEXT:    [[TMP9:%.*]] = trunc i64 [[VL]] to i32
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    [[VP_LOAD:%.*]] = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr [[TMP10]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP9]]), !tbaa [[TBAA4:![0-9]+]], !alias.scope !8
+; CHECK-NEXT:    [[VP_OP1:%.*]] = call <vscale x 1 x double> @llvm.vp.fmul.nxv1f64(<vscale x 1 x double> [[VP_LOAD]], <vscale x 1 x double> shufflevector (<vscale x 1 x double> insertelement (<vscale x 1 x double> poison, double 2.000000e+00, i64 0), <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP9]])
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT:    [[VP_LOAD41:%.*]] = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr [[TMP11]], <vscale x 1 x i1> [[TMP6]], i32 [[TMP9]]), !tbaa [[TBAA4]], !alias.scope !11
+; CHECK-NEXT:    [[VP_OP3:%.*]] = call <vscale x 1 x double> @llvm.vp.fmul.nxv1f64(<vscale x 1 x double> [[VP_LOAD]], <vscale x 1 x double> [[VP_LOAD41]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP9]])
+; CHECK-NEXT:    [[VP_LOAD42:%.*]] = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr [[TMP11]], <vscale x 1 x i1> [[BROADCAST_SPLAT38]], i32 [[TMP9]]), !tbaa [[TBAA4]], !alias.scope !11
+; CHECK-NEXT:    [[VP_OP2:%.*]] = call <vscale x 1 x double> @llvm.vp.fadd.nxv1f64(<vscale x 1 x double> [[VP_LOAD]], <vscale x 1 x double> [[VP_LOAD42]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP9]])
+; CHECK-NEXT:    [[VP_OP:%.*]] = call <vscale x 1 x double> @llvm.vp.select.nxv1f64(<vscale x 1 x i1> [[TMP7]], <vscale x 1 x double> [[VP_OP2]], <vscale x 1 x double> [[VP_OP1]], i32 [[TMP9]])
+; CHECK-NEXT:    [[VP_OP4:%.*]] = call <vscale x 1 x double> @llvm.vp.select.nxv1f64(<vscale x 1 x i1> [[TMP6]], <vscale x 1 x double> [[VP_OP3]], <vscale x 1 x double> [[VP_OP]], i32 [[TMP9]])
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds double, ptr [[C]], i64 [[INDEX]]
+; CHECK-NEXT:    call void @llvm.vp.store.nxv1f64.p0(<vscale x 1 x double> [[VP_OP4]], ptr [[TMP12]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP9]]), !tbaa [[TBAA4]], !alias.scope !13, !noalias !15
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[VL]]
+; CHECK-NEXT:    [[EVL_NEXT]] = sub i64 [[N]], [[INDEX_NEXT]]
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[TMP13]], label [[FOR_END_LOOPEXIT44:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_031:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_031]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA4]]
+; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[I_031]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load double, ptr [[ARRAYIDX2]], align 8, !tbaa [[TBAA4]]
+; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[TMP14]], [[TMP15]]
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       if.else:
+; CHECK-NEXT:    br i1 [[CMP4]], label [[IF_THEN5:%.*]], label [[IF_ELSE9:%.*]]
+; CHECK:       if.then5:
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[I_031]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load double, ptr [[ARRAYIDX7]], align 8, !tbaa [[TBAA4]]
+; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[TMP14]], [[TMP16]]
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       if.else9:
+; CHECK-NEXT:    [[MUL11:%.*]] = fmul double [[TMP14]], 2.000000e+00
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[ADD_SINK:%.*]] = phi double [ [[ADD]], [[IF_THEN]] ], [ [[MUL11]], [[IF_ELSE9]] ], [ [[MUL]], [[IF_THEN5]] ]
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[C]], i64 [[I_031]]
+; CHECK-NEXT:    store double [[ADD_SINK]], ptr [[ARRAYIDX3]], align 8, !tbaa [[TBAA4]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_031]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end.loopexit44:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp30 = icmp sgt i64 %N, 0
+  br i1 %cmp30, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  %cmp1 = icmp ult i64 %N, 50
+  %cmp4 = icmp ugt i64 %N, 75
+  %0 = xor i64 %N, -1
+  %1 = call i64 @llvm.vscale.i64()
+  %2 = call i64 @llvm.umax.i64(i64 %1, i64 10)
+  %3 = icmp ugt i64 %2, %0
+  br i1 %3, label %for.body.preheader, label %vector.memcheck
+
+for.body.preheader:                               ; preds = %vector.memcheck, %for.body.lr.ph
+  br label %for.body
+
+vector.memcheck:                                  ; preds = %for.body.lr.ph
+  %4 = shl i64 %N, 3
+  %uglygep = getelementptr i8, ptr %C, i64 %4
+  %uglygep32 = getelementptr i8, ptr %A, i64 %4
+  %uglygep33 = getelementptr i8, ptr %B, i64 %4
+  %bound0 = icmp ugt ptr %uglygep32, %C
+  %bound1 = icmp ugt ptr %uglygep, %A
+  %found.conflict = and i1 %bound0, %bound1
+  %bound034 = icmp ugt ptr %uglygep33, %C
+  %bound135 = icmp ugt ptr %uglygep, %B
+  %found.conflict36 = and i1 %bound034, %bound135
+  %conflict.rdx = or i1 %found.conflict, %found.conflict36
+  br i1 %conflict.rdx, label %for.body.preheader, label %vector.ph
+
+vector.ph:                                        ; preds = %vector.memcheck
+  %broadcast.splatinsert37 = insertelement <vscale x 1 x i1> poison, i1 %cmp1, i64 0
+  %broadcast.splat38 = shufflevector <vscale x 1 x i1> %broadcast.splatinsert37, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %broadcast.splatinsert39 = insertelement <vscale x 1 x i1> poison, i1 %cmp4, i64 0
+  %broadcast.splat40 = shufflevector <vscale x 1 x i1> %broadcast.splatinsert39, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+  %5 = xor <vscale x 1 x i1> %broadcast.splat38, shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
+  %6 = select <vscale x 1 x i1> %5, <vscale x 1 x i1> %broadcast.splat40, <vscale x 1 x i1> zeroinitializer
+  %7 = select <vscale x 1 x i1> %broadcast.splat38, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %broadcast.splat40
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %evl.phi = phi i64 [ %N, %vector.ph ], [ %evl.next, %vector.body ]
+  %8 = and i64 %evl.phi, 4294967295
+  %vl = call i64 @llvm.riscv.vsetvli.i64(i64 %8, i64 3, i64 0)
+  %9 = trunc i64 %vl to i32
+  %10 = getelementptr inbounds double, ptr %A, i64 %index
+  %vp.load = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr %10, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 %9), !tbaa !4, !alias.scope !8
+  %11 = fmul <vscale x 1 x double> %vp.load, shufflevector (<vscale x 1 x double> insertelement (<vscale x 1 x double> poison, double 2.000000e+00, i64 0), <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer)
+  %12 = getelementptr double, ptr %B, i64 %index
+  %vp.load41 = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr %12, <vscale x 1 x i1> %6, i32 %9), !tbaa !4, !alias.scope !11
+  %13 = fmul <vscale x 1 x double> %vp.load, %vp.load41
+  %vp.load42 = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr %12, <vscale x 1 x i1> %broadcast.splat38, i32 %9), !tbaa !4, !alias.scope !11
+  %14 = fadd <vscale x 1 x double> %vp.load, %vp.load42
+  %predphi = select <vscale x 1 x i1> %7, <vscale x 1 x double> %14, <vscale x 1 x double> %11
+  %predphi43 = select <vscale x 1 x i1> %6, <vscale x 1 x double> %13, <vscale x 1 x double> %predphi
+  %15 = getelementptr inbounds double, ptr %C, i64 %index
+  call void @llvm.vp.store.nxv1f64.p0(<vscale x 1 x double> %predphi43, ptr %15, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 %9), !tbaa !4, !alias.scope !13, !noalias !15
+  %index.next = add i64 %index, %vl
+  %evl.next = sub i64 %N, %index.next
+  %16 = icmp eq i64 %index.next, %N
+  br i1 %16, label %for.end.loopexit44, label %vector.body, !llvm.loop !16
+
+for.body:                                         ; preds = %for.body.preheader, %for.inc
+  %I.031 = phi i64 [ %inc, %for.inc ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds double, ptr %A, i64 %I.031
+  %17 = load double, ptr %arrayidx, align 8, !tbaa !4
+  br i1 %cmp1, label %if.then, label %if.else
+
+if.then:                                          ; preds = %for.body
+  %arrayidx2 = getelementptr inbounds double, ptr %B, i64 %I.031
+  %18 = load double, ptr %arrayidx2, align 8, !tbaa !4
+  %add = fadd double %17, %18
+  br label %for.inc
+
+if.else:                                          ; preds = %for.body
+  br i1 %cmp4, label %if.then5, label %if.else9
+
+if.then5:                                         ; preds = %if.else
+  %arrayidx7 = getelementptr inbounds double, ptr %B, i64 %I.031
+  %19 = load double, ptr %arrayidx7, align 8, !tbaa !4
+  %mul = fmul double %17, %19
+  br label %for.inc
+
+if.else9:                                         ; preds = %if.else
+  %mul11 = fmul double %17, 2.000000e+00
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %if.else9, %if.then5
+  %add.sink = phi double [ %add, %if.then ], [ %mul11, %if.else9 ], [ %mul, %if.then5 ]
+  %arrayidx3 = getelementptr inbounds double, ptr %C, i64 %I.031
+  store double %add.sink, ptr %arrayidx3, align 8, !tbaa !4
+  %inc = add nuw nsw i64 %I.031, 1
+  %exitcond.not = icmp eq i64 %inc, %N
+  br i1 %exitcond.not, label %for.end.loopexit, label %for.body, !llvm.loop !20
+
+for.end.loopexit:                                 ; preds = %for.inc
+  br label %for.end
+
+for.end.loopexit44:                               ; preds = %vector.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit44, %for.end.loopexit, %entry
+  ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare i64 @llvm.vscale.i64() #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i64 @llvm.umax.i64(i64, i64) #2
+
+; Function Attrs: nounwind memory(none)
+declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg) #3
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64() #1
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: read)
+declare <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr nocapture, <vscale x 1 x i1>, i32) #4
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write)
+declare void @llvm.vp.store.nxv1f64.p0(<vscale x 1 x double>, ptr nocapture, <vscale x 1 x i1>, i32) #5
+
+attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) vscale_range(2,1024) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv64" "target-features"="+64bit,+a,+c,+d,+f,+m,+relax,+v,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-e,-experimental-zawrs,-experimental-zca,-experimental-zcb,-experimental-zcd,-experimental-zcf,-experimental-zihintntl,-experimental-ztso,-experimental-zvfh,-h,-save-restore,-svinval,-svnapot,-svpbmt,-xtheadba,-xtheadvdot,-xventanacondops,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zdinx,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zicbom,-zicbop,-zicboz,-zihintpause,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-zmmul,-zvl1024b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl4096b,-zvl512b,-zvl65536b,-zvl8192b" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #3 = { nounwind memory(none) }
+attributes #4 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) }
+attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"target-abi", !"lp64d"}
+!2 = !{i32 8, !"SmallDataLimit", i32 8}
+!3 = !{!"clang version 17.0.0"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"double", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C/C++ TBAA"}
+!8 = !{!9}
+!9 = distinct !{!9, !10}
+!10 = distinct !{!10, !"LVerDomain"}
+!11 = !{!12}
+!12 = distinct !{!12, !10}
+!13 = !{!14}
+!14 = distinct !{!14, !10}
+!15 = !{!9, !12}
+!16 = distinct !{!16, !17, !18, !19}
+!17 = !{!"llvm.loop.mustprogress"}
+!18 = !{!"llvm.loop.isvectorized", i32 1}
+!19 = !{!"llvm.loop.unroll.runtime.disable"}
+!20 = distinct !{!20, !17, !18}
diff --git a/llvm/test/Transforms/VectorPredication/if-else_scalar-cond.ll b/llvm/test/Transforms/VectorPredication/if-else_scalar-cond.ll
new file mode 100644
index 0000000000000000000000000000000000000000..ed8f28feeffc5d53d069ef125bc19f67b4651b1a
--- /dev/null
+++ b/llvm/test/Transforms/VectorPredication/if-else_scalar-cond.ll
@@ -0,0 +1,209 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S --passes=vector-predication -o - < %s | FileCheck %s
+
+; ModuleID = 'custom/if-else2.c'
+source_filename = "custom/if-else2.c"
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+target triple = "riscv64-unknown-unknown"
+
+; Input C code:
+; void addVec(long N, double *C, double *A, double *B) {
+;   long I;
+;   for (I = 0; I < N; I++) {
+;     if (N < 50)
+;       C[I] = A[I] + B[I];
+;     else
+;       C[I] = A[I] * B[I];
+;   }
+; }
+
+; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) vscale_range(2,1024)
+define dso_local void @addVec(i64 noundef %N, ptr nocapture noundef writeonly %C, ptr nocapture noundef readonly %A, ptr nocapture noundef readonly %B) local_unnamed_addr #0 {
+; CHECK-LABEL: @addVec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B22:%.*]] = ptrtoint ptr [[B:%.*]] to i64
+; CHECK-NEXT:    [[A21:%.*]] = ptrtoint ptr [[A:%.*]] to i64
+; CHECK-NEXT:    [[C20:%.*]] = ptrtoint ptr [[C:%.*]] to i64
+; CHECK-NEXT:    [[CMP18:%.*]] = icmp sgt i64 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP18]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body.lr.ph:
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i64 [[N]], 50
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i64 [[N]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 8)
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ugt i64 [[TMP2]], [[TMP0]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       vector.memcheck:
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 3
+; CHECK-NEXT:    [[TMP6:%.*]] = sub i64 [[C20]], [[A21]]
+; CHECK-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = shl nuw nsw i64 [[TMP4]], 3
+; CHECK-NEXT:    [[TMP8:%.*]] = sub i64 [[C20]], [[B22]]
+; CHECK-NEXT:    [[DIFF_CHECK23:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]]
+; CHECK-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK23]]
+; CHECK-NEXT:    br i1 [[CONFLICT_RDX]], label [[FOR_BODY_PREHEADER]], label [[VECTOR_BODY_PREHEADER:%.*]]
+; CHECK:       vector.body.preheader:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[VECTOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[EVL_PHI:%.*]] = phi i64 [ [[EVL_NEXT:%.*]], [[VECTOR_BODY]] ], [ [[N]], [[VECTOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = and i64 [[EVL_PHI]], 4294967295
+; CHECK-NEXT:    [[VL:%.*]] = call i64 @llvm.riscv.vsetvli.i64(i64 [[TMP9]], i64 3, i64 0)
+; CHECK-NEXT:    [[TMP10:%.*]] = trunc i64 [[VL]] to i32
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    [[VP_LOAD:%.*]] = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr [[TMP11]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP10]]), !tbaa [[TBAA4:![0-9]+]]
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT:    [[VP_LOAD24:%.*]] = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr [[TMP12]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP10]]), !tbaa [[TBAA4]]
+; CHECK-NEXT:    [[VP_OP:%.*]] = call <vscale x 1 x double> @llvm.vp.fadd.nxv1f64(<vscale x 1 x double> [[VP_LOAD]], <vscale x 1 x double> [[VP_LOAD24]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP10]])
+; CHECK-NEXT:    [[VP_OP2:%.*]] = call <vscale x 1 x double> @llvm.vp.fmul.nxv1f64(<vscale x 1 x double> [[VP_LOAD]], <vscale x 1 x double> [[VP_LOAD24]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP10]])
+; CHECK-NEXT:    [[SELECT_COND_SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 [[CMP1]], i64 0
+; CHECK-NEXT:    [[SELECT_COND_SPLAT_SPLAT:%.*]] = shufflevector <vscale x 1 x i1> [[SELECT_COND_SPLAT_SPLATINSERT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+; CHECK-NEXT:    [[VP_OP1:%.*]] = call <vscale x 1 x double> @llvm.vp.select.nxv1f64(<vscale x 1 x i1> [[SELECT_COND_SPLAT_SPLAT]], <vscale x 1 x double> [[VP_OP]], <vscale x 1 x double> [[VP_OP2]], i32 [[TMP10]])
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds double, ptr [[C]], i64 [[INDEX]]
+; CHECK-NEXT:    call void @llvm.vp.store.nxv1f64.p0(<vscale x 1 x double> [[VP_OP1]], ptr [[TMP13]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP10]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[VL]]
+; CHECK-NEXT:    [[EVL_NEXT]] = sub i64 [[N]], [[INDEX_NEXT]]
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[TMP14]], label [[FOR_END_LOOPEXIT25:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_019:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_019]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA4]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[I_019]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load double, ptr [[ARRAYIDX2]], align 8, !tbaa [[TBAA4]]
+; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[TMP15]], [[TMP16]]
+; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[TMP15]], [[TMP16]]
+; CHECK-NEXT:    [[MUL_SINK:%.*]] = select i1 [[CMP1]], double [[ADD]], double [[MUL]]
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds double, ptr [[C]], i64 [[I_019]]
+; CHECK-NEXT:    store double [[MUL_SINK]], ptr [[TMP17]], align 8
+; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_019]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end.loopexit25:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %B22 = ptrtoint ptr %B to i64
+  %A21 = ptrtoint ptr %A to i64
+  %C20 = ptrtoint ptr %C to i64
+  %cmp18 = icmp sgt i64 %N, 0
+  br i1 %cmp18, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  %cmp1 = icmp ult i64 %N, 50
+  %0 = xor i64 %N, -1
+  %1 = call i64 @llvm.vscale.i64()
+  %2 = call i64 @llvm.umax.i64(i64 %1, i64 8)
+  %3 = icmp ugt i64 %2, %0
+  br i1 %3, label %for.body.preheader, label %vector.memcheck
+
+for.body.preheader:                               ; preds = %vector.memcheck, %for.body.lr.ph
+  br label %for.body
+
+vector.memcheck:                                  ; preds = %for.body.lr.ph
+  %4 = call i64 @llvm.vscale.i64()
+  %5 = shl nuw nsw i64 %4, 3
+  %6 = sub i64 %C20, %A21
+  %diff.check = icmp ult i64 %6, %5
+  %7 = shl nuw nsw i64 %4, 3
+  %8 = sub i64 %C20, %B22
+  %diff.check23 = icmp ult i64 %8, %7
+  %conflict.rdx = or i1 %diff.check, %diff.check23
+  br i1 %conflict.rdx, label %for.body.preheader, label %vector.body.preheader
+
+vector.body.preheader:                            ; preds = %vector.memcheck
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body.preheader, %vector.body
+  %index = phi i64 [ %index.next, %vector.body ], [ 0, %vector.body.preheader ]
+  %evl.phi = phi i64 [ %evl.next, %vector.body ], [ %N, %vector.body.preheader ]
+  %9 = and i64 %evl.phi, 4294967295
+  %vl = call i64 @llvm.riscv.vsetvli.i64(i64 %9, i64 3, i64 0)
+  %10 = trunc i64 %vl to i32
+  %11 = getelementptr inbounds double, ptr %A, i64 %index
+  %vp.load = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr %11, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 %10), !tbaa !4
+  %12 = getelementptr inbounds double, ptr %B, i64 %index
+  %vp.load24 = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr %12, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 %10), !tbaa !4
+  %13 = fadd <vscale x 1 x double> %vp.load, %vp.load24
+  %14 = fmul <vscale x 1 x double> %vp.load, %vp.load24
+  %15 = select i1 %cmp1, <vscale x 1 x double> %13, <vscale x 1 x double> %14
+  %16 = getelementptr inbounds double, ptr %C, i64 %index
+  call void @llvm.vp.store.nxv1f64.p0(<vscale x 1 x double> %15, ptr %16, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 %10)
+  %index.next = add i64 %index, %vl
+  %evl.next = sub i64 %N, %index.next
+  %17 = icmp eq i64 %index.next, %N
+  br i1 %17, label %for.end.loopexit25, label %vector.body, !llvm.loop !8
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %I.019 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds double, ptr %A, i64 %I.019
+  %18 = load double, ptr %arrayidx, align 8, !tbaa !4
+  %arrayidx2 = getelementptr inbounds double, ptr %B, i64 %I.019
+  %19 = load double, ptr %arrayidx2, align 8, !tbaa !4
+  %add = fadd double %18, %19
+  %mul = fmul double %18, %19
+  %mul.sink = select i1 %cmp1, double %add, double %mul
+  %20 = getelementptr inbounds double, ptr %C, i64 %I.019
+  store double %mul.sink, ptr %20, align 8
+  %inc = add nuw nsw i64 %I.019, 1
+  %exitcond.not = icmp eq i64 %inc, %N
+  br i1 %exitcond.not, label %for.end.loopexit, label %for.body, !llvm.loop !12
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end.loopexit25:                               ; preds = %vector.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit25, %for.end.loopexit, %entry
+  ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare i64 @llvm.vscale.i64() #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i64 @llvm.umax.i64(i64, i64) #2
+
+; Function Attrs: nounwind memory(none)
+declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg) #3
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64() #1
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: read)
+declare <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr nocapture, <vscale x 1 x i1>, i32) #4
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write)
+declare void @llvm.vp.store.nxv1f64.p0(<vscale x 1 x double>, ptr nocapture, <vscale x 1 x i1>, i32) #5
+
+attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) vscale_range(2,1024) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv64" "target-features"="+64bit,+a,+c,+d,+f,+m,+relax,+v,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-e,-experimental-zawrs,-experimental-zca,-experimental-zcb,-experimental-zcd,-experimental-zcf,-experimental-zihintntl,-experimental-ztso,-experimental-zvfh,-h,-save-restore,-svinval,-svnapot,-svpbmt,-xtheadba,-xtheadvdot,-xventanacondops,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zdinx,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zicbom,-zicbop,-zicboz,-zihintpause,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-zmmul,-zvl1024b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl4096b,-zvl512b,-zvl65536b,-zvl8192b" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #3 = { nounwind memory(none) }
+attributes #4 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) }
+attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"target-abi", !"lp64d"}
+!2 = !{i32 8, !"SmallDataLimit", i32 8}
+!3 = !{!"clang version 17.0.0"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"double", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C/C++ TBAA"}
+!8 = distinct !{!8, !9, !10, !11}
+!9 = !{!"llvm.loop.mustprogress"}
+!10 = !{!"llvm.loop.isvectorized", i32 1}
+!11 = !{!"llvm.loop.unroll.runtime.disable"}
+!12 = distinct !{!12, !9, !10}
diff --git a/llvm/test/Transforms/VectorPredication/if-else_vec-cond.ll b/llvm/test/Transforms/VectorPredication/if-else_vec-cond.ll
new file mode 100644
index 0000000000000000000000000000000000000000..9c25aec38fdb86e7b389b4783af81af1c66437ab
--- /dev/null
+++ b/llvm/test/Transforms/VectorPredication/if-else_vec-cond.ll
@@ -0,0 +1,219 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S --passes=vector-predication -o - < %s | FileCheck %s
+
+; ModuleID = 'custom/if-else1.c'
+source_filename = "custom/if-else1.c"
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+target triple = "riscv64-unknown-unknown"
+
+; Input C code:
+; void addVec(long N, double *C, double *A, double *B) {
+;   long I;
+;   for (I = 0; I < N; I++) {
+;     if (I < 50)
+;       C[I] = A[I] + B[I];
+;     else
+;       C[I] = A[I] * B[I];
+;   }
+; }
+
+; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) vscale_range(2,1024)
+define dso_local void @addVec(i64 noundef %N, ptr nocapture noundef writeonly %C, ptr nocapture noundef readonly %A, ptr nocapture noundef readonly %B) local_unnamed_addr #0 {
+; CHECK-LABEL: @addVec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B22:%.*]] = ptrtoint ptr [[B:%.*]] to i64
+; CHECK-NEXT:    [[A21:%.*]] = ptrtoint ptr [[A:%.*]] to i64
+; CHECK-NEXT:    [[C20:%.*]] = ptrtoint ptr [[C:%.*]] to i64
+; CHECK-NEXT:    [[CMP18:%.*]] = icmp sgt i64 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP18]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i64 [[N]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 8)
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ugt i64 [[TMP2]], [[TMP0]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_BODY_PREHEADER25:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK:       for.body.preheader25:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       vector.memcheck:
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 3
+; CHECK-NEXT:    [[TMP6:%.*]] = sub i64 [[C20]], [[A21]]
+; CHECK-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = shl nuw nsw i64 [[TMP4]], 3
+; CHECK-NEXT:    [[TMP8:%.*]] = sub i64 [[C20]], [[B22]]
+; CHECK-NEXT:    [[DIFF_CHECK23:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]]
+; CHECK-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK23]]
+; CHECK-NEXT:    br i1 [[CONFLICT_RDX]], label [[FOR_BODY_PREHEADER25]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[TMP9:%.*]] = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[EVL_PHI:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[TMP9]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP10:%.*]] = and i64 [[EVL_PHI]], 4294967295
+; CHECK-NEXT:    [[VL:%.*]] = call i64 @llvm.riscv.vsetvli.i64(i64 [[TMP10]], i64 3, i64 0)
+; CHECK-NEXT:    [[TMP11:%.*]] = trunc i64 [[VL]] to i32
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[VL]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+; CHECK-NEXT:    [[VP_OP1:%.*]] = call <vscale x 1 x i1> @llvm.vp.icmp.nxv1i64(<vscale x 1 x i64> [[VEC_IND]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 50, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), metadata !"ult", <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP11]])
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    [[VP_LOAD:%.*]] = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr [[TMP12]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP11]]), !tbaa [[TBAA4:![0-9]+]]
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT:    [[VP_LOAD24:%.*]] = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr [[TMP13]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP11]]), !tbaa [[TBAA4]]
+; CHECK-NEXT:    [[VP_OP:%.*]] = call <vscale x 1 x double> @llvm.vp.fadd.nxv1f64(<vscale x 1 x double> [[VP_LOAD]], <vscale x 1 x double> [[VP_LOAD24]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP11]])
+; CHECK-NEXT:    [[VP_OP3:%.*]] = call <vscale x 1 x double> @llvm.vp.fmul.nxv1f64(<vscale x 1 x double> [[VP_LOAD]], <vscale x 1 x double> [[VP_LOAD24]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP11]])
+; CHECK-NEXT:    [[VP_OP2:%.*]] = call <vscale x 1 x double> @llvm.vp.select.nxv1f64(<vscale x 1 x i1> [[VP_OP1]], <vscale x 1 x double> [[VP_OP]], <vscale x 1 x double> [[VP_OP3]], i32 [[TMP11]])
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds double, ptr [[C]], i64 [[INDEX]]
+; CHECK-NEXT:    call void @llvm.vp.store.nxv1f64.p0(<vscale x 1 x double> [[VP_OP2]], ptr [[TMP14]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP11]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[VL]]
+; CHECK-NEXT:    [[EVL_NEXT]] = sub i64 [[N]], [[INDEX_NEXT]]
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[TMP15]], label [[FOR_END_LOOPEXIT26:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_019:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER25]] ]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i64 [[I_019]], 50
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_019]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA4]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[I_019]]
+; CHECK-NEXT:    [[TMP17:%.*]] = load double, ptr [[ARRAYIDX2]], align 8, !tbaa [[TBAA4]]
+; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[TMP16]], [[TMP17]]
+; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[TMP16]], [[TMP17]]
+; CHECK-NEXT:    [[MUL_SINK:%.*]] = select i1 [[CMP1]], double [[ADD]], double [[MUL]]
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds double, ptr [[C]], i64 [[I_019]]
+; CHECK-NEXT:    store double [[MUL_SINK]], ptr [[TMP18]], align 8
+; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_019]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end.loopexit26:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %B22 = ptrtoint ptr %B to i64
+  %A21 = ptrtoint ptr %A to i64
+  %C20 = ptrtoint ptr %C to i64
+  %cmp18 = icmp sgt i64 %N, 0
+  br i1 %cmp18, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  %0 = xor i64 %N, -1
+  %1 = call i64 @llvm.vscale.i64()
+  %2 = call i64 @llvm.umax.i64(i64 %1, i64 8)
+  %3 = icmp ugt i64 %2, %0
+  br i1 %3, label %for.body.preheader25, label %vector.memcheck
+
+for.body.preheader25:                             ; preds = %vector.memcheck, %for.body.preheader
+  br label %for.body
+
+vector.memcheck:                                  ; preds = %for.body.preheader
+  %4 = call i64 @llvm.vscale.i64()
+  %5 = shl nuw nsw i64 %4, 3
+  %6 = sub i64 %C20, %A21
+  %diff.check = icmp ult i64 %6, %5
+  %7 = shl nuw nsw i64 %4, 3
+  %8 = sub i64 %C20, %B22
+  %diff.check23 = icmp ult i64 %8, %7
+  %conflict.rdx = or i1 %diff.check, %diff.check23
+  br i1 %conflict.rdx, label %for.body.preheader25, label %vector.ph
+
+vector.ph:                                        ; preds = %vector.memcheck
+  %9 = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %evl.phi = phi i64 [ %N, %vector.ph ], [ %evl.next, %vector.body ]
+  %vec.ind = phi <vscale x 1 x i64> [ %9, %vector.ph ], [ %vec.ind.next, %vector.body ]
+  %10 = and i64 %evl.phi, 4294967295
+  %vl = call i64 @llvm.riscv.vsetvli.i64(i64 %10, i64 3, i64 0)
+  %11 = trunc i64 %vl to i32
+  %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %vl, i64 0
+  %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+  %12 = icmp ult <vscale x 1 x i64> %vec.ind, shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 50, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
+  %13 = getelementptr inbounds double, ptr %A, i64 %index
+  %vp.load = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr %13, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 %11), !tbaa !4
+  %14 = getelementptr inbounds double, ptr %B, i64 %index
+  %vp.load24 = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr %14, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 %11), !tbaa !4
+  %15 = fadd <vscale x 1 x double> %vp.load, %vp.load24
+  %16 = fmul <vscale x 1 x double> %vp.load, %vp.load24
+  %17 = select <vscale x 1 x i1> %12, <vscale x 1 x double> %15, <vscale x 1 x double> %16
+  %18 = getelementptr inbounds double, ptr %C, i64 %index
+  call void @llvm.vp.store.nxv1f64.p0(<vscale x 1 x double> %17, ptr %18, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 %11)
+  %index.next = add i64 %index, %vl
+  %evl.next = sub i64 %N, %index.next
+  %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat
+  %19 = icmp eq i64 %index.next, %N
+  br i1 %19, label %for.end.loopexit26, label %vector.body, !llvm.loop !8
+
+for.body:                                         ; preds = %for.body.preheader25, %for.body
+  %I.019 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader25 ]
+  %cmp1 = icmp ult i64 %I.019, 50
+  %arrayidx = getelementptr inbounds double, ptr %A, i64 %I.019
+  %20 = load double, ptr %arrayidx, align 8, !tbaa !4
+  %arrayidx2 = getelementptr inbounds double, ptr %B, i64 %I.019
+  %21 = load double, ptr %arrayidx2, align 8, !tbaa !4
+  %add = fadd double %20, %21
+  %mul = fmul double %20, %21
+  %mul.sink = select i1 %cmp1, double %add, double %mul
+  %22 = getelementptr inbounds double, ptr %C, i64 %I.019
+  store double %mul.sink, ptr %22, align 8
+  %inc = add nuw nsw i64 %I.019, 1
+  %exitcond.not = icmp eq i64 %inc, %N
+  br i1 %exitcond.not, label %for.end.loopexit, label %for.body, !llvm.loop !12
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end.loopexit26:                               ; preds = %vector.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit26, %for.end.loopexit, %entry
+  ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare i64 @llvm.vscale.i64() #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i64 @llvm.umax.i64(i64, i64) #2
+
+; Function Attrs: nounwind memory(none)
+declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg) #3
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64() #1
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: read)
+declare <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr nocapture, <vscale x 1 x i1>, i32) #4
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write)
+declare void @llvm.vp.store.nxv1f64.p0(<vscale x 1 x double>, ptr nocapture, <vscale x 1 x i1>, i32) #5
+
+attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) vscale_range(2,1024) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv64" "target-features"="+64bit,+a,+c,+d,+f,+m,+relax,+v,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-e,-experimental-zawrs,-experimental-zca,-experimental-zcb,-experimental-zcd,-experimental-zcf,-experimental-zihintntl,-experimental-ztso,-experimental-zvfh,-h,-save-restore,-svinval,-svnapot,-svpbmt,-xtheadba,-xtheadvdot,-xventanacondops,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zdinx,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zicbom,-zicbop,-zicboz,-zihintpause,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-zmmul,-zvl1024b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl4096b,-zvl512b,-zvl65536b,-zvl8192b" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #3 = { nounwind memory(none) }
+attributes #4 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) }
+attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"target-abi", !"lp64d"}
+!2 = !{i32 8, !"SmallDataLimit", i32 8}
+!3 = !{!"clang version 17.0.0"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"double", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C/C++ TBAA"}
+!8 = distinct !{!8, !9, !10, !11}
+!9 = !{!"llvm.loop.mustprogress"}
+!10 = !{!"llvm.loop.isvectorized", i32 1}
+!11 = !{!"llvm.loop.unroll.runtime.disable"}
+!12 = distinct !{!12, !9, !10}
diff --git a/llvm/test/Transforms/VectorPredication/simple_vector_sum.ll b/llvm/test/Transforms/VectorPredication/simple_vector_sum.ll
new file mode 100644
index 0000000000000000000000000000000000000000..116d883572eeb2f08b1956a7f9f80e34e61c3e60
--- /dev/null
+++ b/llvm/test/Transforms/VectorPredication/simple_vector_sum.ll
@@ -0,0 +1,193 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S --passes=vector-predication -o - < %s | FileCheck %s
+
+; ModuleID = 'custom/simple.c'
+source_filename = "custom/simple.c"
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+target triple = "riscv64-unknown-unknown"
+
+; Input C code:
+; void addVec(long N, double *C, double *A, double *B) {
+;   long I;
+;   for (I = 0; I < N; I++)
+;     C[I] = A[I] + B[I];
+; }
+
+; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) vscale_range(2,1024)
+define dso_local void @addVec(i64 noundef %N, ptr nocapture noundef writeonly %C, ptr nocapture noundef readonly %A, ptr nocapture noundef readonly %B) local_unnamed_addr #0 {
+; CHECK-LABEL: @addVec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B11:%.*]] = ptrtoint ptr [[B:%.*]] to i64
+; CHECK-NEXT:    [[A10:%.*]] = ptrtoint ptr [[A:%.*]] to i64
+; CHECK-NEXT:    [[C9:%.*]] = ptrtoint ptr [[C:%.*]] to i64
+; CHECK-NEXT:    [[CMP7:%.*]] = icmp sgt i64 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i64 [[N]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 10)
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ugt i64 [[TMP2]], [[TMP0]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_BODY_PREHEADER14:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK:       for.body.preheader14:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       vector.memcheck:
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 3
+; CHECK-NEXT:    [[TMP6:%.*]] = sub i64 [[C9]], [[A10]]
+; CHECK-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = shl nuw nsw i64 [[TMP4]], 3
+; CHECK-NEXT:    [[TMP8:%.*]] = sub i64 [[C9]], [[B11]]
+; CHECK-NEXT:    [[DIFF_CHECK12:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]]
+; CHECK-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK12]]
+; CHECK-NEXT:    br i1 [[CONFLICT_RDX]], label [[FOR_BODY_PREHEADER14]], label [[VECTOR_BODY_PREHEADER:%.*]]
+; CHECK:       vector.body.preheader:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[VECTOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[EVL_PHI:%.*]] = phi i64 [ [[EVL_NEXT:%.*]], [[VECTOR_BODY]] ], [ [[N]], [[VECTOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = and i64 [[EVL_PHI]], 4294967295
+; CHECK-NEXT:    [[VL:%.*]] = call i64 @llvm.riscv.vsetvli.i64(i64 [[TMP9]], i64 3, i64 0)
+; CHECK-NEXT:    [[TMP10:%.*]] = trunc i64 [[VL]] to i32
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    [[VP_LOAD:%.*]] = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr [[TMP11]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP10]]), !tbaa [[TBAA4:![0-9]+]]
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT:    [[VP_LOAD13:%.*]] = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr [[TMP12]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP10]]), !tbaa [[TBAA4]]
+; CHECK-NEXT:    [[VP_OP:%.*]] = call <vscale x 1 x double> @llvm.vp.fadd.nxv1f64(<vscale x 1 x double> [[VP_LOAD]], <vscale x 1 x double> [[VP_LOAD13]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP10]])
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds double, ptr [[C]], i64 [[INDEX]]
+; CHECK-NEXT:    call void @llvm.vp.store.nxv1f64.p0(<vscale x 1 x double> [[VP_OP]], ptr [[TMP13]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP10]]), !tbaa [[TBAA4]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[VL]]
+; CHECK-NEXT:    [[EVL_NEXT]] = sub i64 [[N]], [[INDEX_NEXT]]
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[TMP14]], label [[FOR_END_LOOPEXIT15:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_08:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER14]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_08]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA4]]
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[I_08]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load double, ptr [[ARRAYIDX1]], align 8, !tbaa [[TBAA4]]
+; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[TMP15]], [[TMP16]]
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[C]], i64 [[I_08]]
+; CHECK-NEXT:    store double [[ADD]], ptr [[ARRAYIDX2]], align 8, !tbaa [[TBAA4]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_08]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end.loopexit15:
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %B11 = ptrtoint ptr %B to i64
+  %A10 = ptrtoint ptr %A to i64
+  %C9 = ptrtoint ptr %C to i64
+  %cmp7 = icmp sgt i64 %N, 0
+  br i1 %cmp7, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  %0 = xor i64 %N, -1
+  %1 = call i64 @llvm.vscale.i64()
+  %2 = call i64 @llvm.umax.i64(i64 %1, i64 10)
+  %3 = icmp ugt i64 %2, %0
+  br i1 %3, label %for.body.preheader14, label %vector.memcheck
+
+for.body.preheader14:                             ; preds = %vector.memcheck, %for.body.preheader
+  br label %for.body
+
+vector.memcheck:                                  ; preds = %for.body.preheader
+  %4 = call i64 @llvm.vscale.i64()
+  %5 = shl nuw nsw i64 %4, 3
+  %6 = sub i64 %C9, %A10
+  %diff.check = icmp ult i64 %6, %5
+  %7 = shl nuw nsw i64 %4, 3
+  %8 = sub i64 %C9, %B11
+  %diff.check12 = icmp ult i64 %8, %7
+  %conflict.rdx = or i1 %diff.check, %diff.check12
+  br i1 %conflict.rdx, label %for.body.preheader14, label %vector.body.preheader
+
+vector.body.preheader:                            ; preds = %vector.memcheck
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body.preheader, %vector.body
+  %index = phi i64 [ %index.next, %vector.body ], [ 0, %vector.body.preheader ]
+  %evl.phi = phi i64 [ %evl.next, %vector.body ], [ %N, %vector.body.preheader ]
+  %9 = and i64 %evl.phi, 4294967295
+  %vl = call i64 @llvm.riscv.vsetvli.i64(i64 %9, i64 3, i64 0)
+  %10 = trunc i64 %vl to i32
+  %11 = getelementptr inbounds double, ptr %A, i64 %index
+  %vp.load = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr %11, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 %10), !tbaa !4
+  %12 = getelementptr inbounds double, ptr %B, i64 %index
+  %vp.load13 = call <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr %12, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 %10), !tbaa !4
+  %13 = fadd <vscale x 1 x double> %vp.load, %vp.load13
+  %14 = getelementptr inbounds double, ptr %C, i64 %index
+  call void @llvm.vp.store.nxv1f64.p0(<vscale x 1 x double> %13, ptr %14, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 %10), !tbaa !4
+  %index.next = add i64 %index, %vl
+  %evl.next = sub i64 %N, %index.next
+  %15 = icmp eq i64 %index.next, %N
+  br i1 %15, label %for.end.loopexit15, label %vector.body, !llvm.loop !8
+
+for.body:                                         ; preds = %for.body.preheader14, %for.body
+  %I.08 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader14 ]
+  %arrayidx = getelementptr inbounds double, ptr %A, i64 %I.08
+  %16 = load double, ptr %arrayidx, align 8, !tbaa !4
+  %arrayidx1 = getelementptr inbounds double, ptr %B, i64 %I.08
+  %17 = load double, ptr %arrayidx1, align 8, !tbaa !4
+  %add = fadd double %16, %17
+  %arrayidx2 = getelementptr inbounds double, ptr %C, i64 %I.08
+  store double %add, ptr %arrayidx2, align 8, !tbaa !4
+  %inc = add nuw nsw i64 %I.08, 1
+  %exitcond.not = icmp eq i64 %inc, %N
+  br i1 %exitcond.not, label %for.end.loopexit, label %for.body, !llvm.loop !12
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end.loopexit15:                               ; preds = %vector.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit15, %for.end.loopexit, %entry
+  ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare i64 @llvm.vscale.i64() #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i64 @llvm.umax.i64(i64, i64) #2
+
+; Function Attrs: nounwind memory(none)
+declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg) #3
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
+declare <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64() #1
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: read)
+declare <vscale x 1 x double> @llvm.vp.load.nxv1f64.p0(ptr nocapture, <vscale x 1 x i1>, i32) #4
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: write)
+declare void @llvm.vp.store.nxv1f64.p0(<vscale x 1 x double>, ptr nocapture, <vscale x 1 x i1>, i32) #5
+
+attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) vscale_range(2,1024) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv64" "target-features"="+64bit,+a,+c,+d,+f,+m,+relax,+v,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-e,-experimental-zawrs,-experimental-zca,-experimental-zcb,-experimental-zcd,-experimental-zcf,-experimental-zihintntl,-experimental-ztso,-experimental-zvfh,-h,-save-restore,-svinval,-svnapot,-svpbmt,-xtheadba,-xtheadvdot,-xventanacondops,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zdinx,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zicbom,-zicbop,-zicboz,-zihintpause,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-zmmul,-zvl1024b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl4096b,-zvl512b,-zvl65536b,-zvl8192b" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #3 = { nounwind memory(none) }
+attributes #4 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) }
+attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) }
+
+!llvm.module.flags = !{!0, !1, !2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"target-abi", !"lp64d"}
+!2 = !{i32 8, !"SmallDataLimit", i32 8}
+!3 = !{!"clang version 17.0.0"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"double", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C/C++ TBAA"}
+!8 = distinct !{!8, !9, !10, !11}
+!9 = !{!"llvm.loop.mustprogress"}
+!10 = !{!"llvm.loop.isvectorized", i32 1}
+!11 = !{!"llvm.loop.unroll.runtime.disable"}
+!12 = distinct !{!12, !9, !10}
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
index ff7ee53bfbcf037c3135c1aaf8a921179938853e..14462f0ef6b24e99bed9254569fe8e4a66c36341 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
@@ -1003,7 +1003,8 @@ TEST(VPRecipeTest, CastVPWidenMemoryInstructionRecipeToVPUserAndVPDef) {
       new LoadInst(Int32, UndefValue::get(Int32Ptr), "", false, Align(1));
   VPValue Addr;
   VPValue Mask;
-  VPWidenMemoryInstructionRecipe Recipe(*Load, &Addr, &Mask, true, false);
+  VPWidenMemoryInstructionRecipe Recipe(*Load, &Addr, &Mask, nullptr, true,
+                                        false);
   EXPECT_TRUE(isa<VPUser>(&Recipe));
   VPRecipeBase *BaseR = &Recipe;
   EXPECT_TRUE(isa<VPUser>(BaseR));
@@ -1099,7 +1100,8 @@ TEST(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
         new LoadInst(Int32, UndefValue::get(Int32Ptr), "", false, Align(1));
     VPValue Addr;
     VPValue Mask;
-    VPWidenMemoryInstructionRecipe Recipe(*Load, &Addr, &Mask, true, false);
+    VPWidenMemoryInstructionRecipe Recipe(*Load, &Addr, &Mask, nullptr, true,
+                                          false);
     EXPECT_FALSE(Recipe.mayHaveSideEffects());
     EXPECT_TRUE(Recipe.mayReadFromMemory());
     EXPECT_FALSE(Recipe.mayWriteToMemory());
@@ -1113,8 +1115,8 @@ TEST(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
     VPValue Addr;
     VPValue Mask;
     VPValue StoredV;
-    VPWidenMemoryInstructionRecipe Recipe(*Store, &Addr, &StoredV, &Mask, false,
-                                          false);
+    VPWidenMemoryInstructionRecipe Recipe(*Store, &Addr, &StoredV, &Mask,
+                                          nullptr, false, false);
     EXPECT_TRUE(Recipe.mayHaveSideEffects());
     EXPECT_FALSE(Recipe.mayReadFromMemory());
     EXPECT_TRUE(Recipe.mayWriteToMemory());