From a7f226f9dba3472173a1ea261a3f79114caf936c Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 27 Aug 2019 10:21:11 +0000 Subject: [PATCH] AArch64: avoid creating cycle in DAG for post-increment NEON ops. Inserting a value into Visited has the effect of terminating a search for predecessors if that node is seen. This is legitimate for the base address, and acts as a slight performance optimization, but the vector-building node can be paert of a legitimate cycle so we shouldn't stop searching there. PR43056. llvm-svn: 370036 --- .../Target/AArch64/AArch64ISelLowering.cpp | 2 +- .../AArch64/arm64-indexed-vector-ldst.ll | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index be248ee898c4..6d2f363858e1 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10694,7 +10694,7 @@ static SDValue performPostLD1Combine(SDNode *N, // are predecessors to each other or the Vector. SmallPtrSet Visited; SmallVector Worklist; - Visited.insert(N); + Visited.insert(Addr.getNode()); Worklist.push_back(User); Worklist.push_back(LD); Worklist.push_back(Vector.getNode()); diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll index 8b6a4cae7ed5..f3ac9b21f53b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -6319,3 +6319,22 @@ define void @test_ld1lane_build_i8(i8* %a, i8* %b, i8* %c, i8* %d, i8* %e, i8* store <8 x i8> %sub, <8 x i8>* %p ret void } + +define <4 x i32> @test_inc_cycle(<4 x i32> %vec, i32* %in) { +; CHECK-LABEL: test_inc_cycle: +; CHECK: ld1.s { v0 }[0], [x0]{{$}} + + %elt = load i32, i32* %in + %newvec = insertelement <4 x i32> %vec, i32 %elt, i32 0 + + ; %inc cannot be %elt directly because we check that the load is only + ; used by the insert before trying to form post-inc. + %inc.vec = bitcast <4 x i32> %newvec to <2 x i64> + %inc = extractelement <2 x i64> %inc.vec, i32 0 + %newaddr = getelementptr i32, i32* %in, i64 %inc + store i32* %newaddr, i32** @var + + ret <4 x i32> %newvec +} + +@var = global i32* null -- GitLab