From 498b53890d810676ccf05508f0296a0265308aca Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 18 Mar 2020 13:09:55 -0700 Subject: [PATCH] [SelectionDAGBuilder][FPEnv] Take into account SelectionDAG continuous CSE when setting the nofpexcept flag for constrained intrinsics SelectionDAG CSEs nodes based on their result type and operands, but not their flags. The flags are expected to be intersected when they are CSEd. In SelectionDAGBuilder, for FP nodes we manage both the fast math flags and the nofpexcept flag after the nodes have already been CSEd when they were created with getNode. The management of the fastmath flags before the constrained nodes prevents the nofpexcept management from working correctly. This commit moves the FMF handling for constrained intrinsics into their visitor and disables the common FMF handling for these nodes. Differential Revision: https://reviews.llvm.org/D75224 --- llvm/include/llvm/CodeGen/SelectionDAG.h | 2 +- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 3 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 47 ++++++++++--------- llvm/test/CodeGen/X86/fp-intrinsics-flags.ll | 4 +- 4 files changed, 29 insertions(+), 27 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 993a3c8bbf95..3cf7b6a34680 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -935,7 +935,7 @@ public: SDValue getNode(unsigned Opcode, const SDLoc &DL, ArrayRef ResultTys, ArrayRef Ops); SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, - ArrayRef Ops); + ArrayRef Ops, const SDNodeFlags Flags = SDNodeFlags()); // Specialize based on number of operands. SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index dcd072d7631f..5b673486af15 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7435,7 +7435,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, - ArrayRef Ops) { + ArrayRef Ops, const SDNodeFlags Flags) { if (VTList.NumVTs == 1) return getNode(Opcode, DL, VTList.VTs[0], Ops); @@ -7504,6 +7504,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, return SDValue(E, 0); N = newSDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList); + N->setFlags(Flags); createOperands(N, Ops); CSEMap.InsertNode(N, IP); } else { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index e124550eb0a5..29f15095af58 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1113,29 +1113,23 @@ void SelectionDAGBuilder::visit(const Instruction &I) { visit(I.getOpcode(), I); if (auto *FPMO = dyn_cast(&I)) { - // Propagate the fast-math-flags of this IR instruction to the DAG node that - // maps to this instruction. - // TODO: We could handle all flags (nsw, etc) here. - // TODO: If an IR instruction maps to >1 node, only the final node will have - // flags set. - if (SDNode *Node = getNodeForIRValue(&I)) { - SDNodeFlags IncomingFlags; - IncomingFlags.copyFMF(*FPMO); - if (!Node->getFlags().isDefined()) - Node->setFlags(IncomingFlags); - else - Node->intersectFlagsWith(IncomingFlags); - } - } - // Constrained FP intrinsics with fpexcept.ignore should also get - // the NoFPExcept flag. - if (auto *FPI = dyn_cast(&I)) - if (FPI->getExceptionBehavior() == fp::ExceptionBehavior::ebIgnore) + // ConstrainedFPIntrinsics handle their own FMF. + if (!isa(&I)) { + // Propagate the fast-math-flags of this IR instruction to the DAG node that + // maps to this instruction. + // TODO: We could handle all flags (nsw, etc) here. + // TODO: If an IR instruction maps to >1 node, only the final node will have + // flags set. if (SDNode *Node = getNodeForIRValue(&I)) { - SDNodeFlags Flags = Node->getFlags(); - Flags.setNoFPExcept(true); - Node->setFlags(Flags); + SDNodeFlags IncomingFlags; + IncomingFlags.copyFMF(*FPMO); + if (!Node->getFlags().isDefined()) + Node->setFlags(IncomingFlags); + else + Node->intersectFlagsWith(IncomingFlags); } + } + } if (!I.isTerminator() && !HasTailCall && !isStatepoint(&I)) // statepoints handle their exports internally @@ -7064,6 +7058,13 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( SDVTList VTs = DAG.getVTList(ValueVTs); fp::ExceptionBehavior EB = FPI.getExceptionBehavior().getValue(); + SDNodeFlags Flags; + if (EB == fp::ExceptionBehavior::ebIgnore) + Flags.setNoFPExcept(true); + + if (auto *FPOp = dyn_cast(&FPI)) + Flags.copyFMF(*FPOp); + unsigned Opcode; switch (FPI.getIntrinsicID()) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. @@ -7079,7 +7080,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( !TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), ValueVTs[0])) { Opers.pop_back(); - SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers); + SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers, Flags); pushOutChain(Mul, EB); Opcode = ISD::STRICT_FADD; Opers.clear(); @@ -7107,7 +7108,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( } } - SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers); + SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers, Flags); pushOutChain(Result, EB); SDValue FPResult = Result.getValue(0); diff --git a/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll b/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll index ff483ab17601..4f2859d4bffa 100644 --- a/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll @@ -108,7 +108,7 @@ entry: ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0) ; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.1, align 16) ; CHECK: [[MOVSDrm_alt:%[0-9]+]]:fr64 = MOVSDrm_alt %fixed-stack.3, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.3, align 16) -; CHECK: %3:fr64 = nofpexcept DIVSDrm [[MOVSDrm_alt]], %fixed-stack.2, 1, $noreg, 0, $noreg, implicit $mxcsr :: (load 8 from %fixed-stack.2) +; CHECK: %3:fr64 = DIVSDrm [[MOVSDrm_alt]], %fixed-stack.2, 1, $noreg, 0, $noreg, implicit $mxcsr :: (load 8 from %fixed-stack.2) ; CHECK: MOVSDmr killed [[MOV32rm1]], 1, $noreg, 0, $noreg, %3 :: (store 8 into %ir.x, align 4) ; CHECK: MOVSDmr killed [[MOV32rm]], 1, $noreg, 0, $noreg, %3 :: (store 8 into %ir.y, align 4) ; CHECK: RET 0 @@ -126,7 +126,7 @@ entry: ; CHECK-LABEL: name: sitofp_cse ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0, align 8) ; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.1) -; CHECK: %2:fr64 = nofpexcept CVTSI2SDrm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.2, align 16) +; CHECK: %2:fr64 = CVTSI2SDrm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.2, align 16) ; CHECK: MOVSDmr killed [[MOV32rm1]], 1, $noreg, 0, $noreg, %2 :: (store 8 into %ir.x, align 4) ; CHECK: MOVSDmr killed [[MOV32rm]], 1, $noreg, 0, $noreg, %2 :: (store 8 into %ir.y, align 4) ; CHECK: RET 0 -- GitLab