From af4211ad940025c2a1eb8b25d20196c1c1362d88 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Sat, 28 Sep 2013 00:12:32 +0000 Subject: [PATCH] [mips] Make sure loads from lazy-binding entries do not get CSE'd or hoisted out of loops. Previously, two consecutive calls to function "func" would result in the following sequence of instructions: 1. load $16, %got(func)($gp) // load address of lazy-binding stub. 2. move $25, $16 3. jalr $25 // jump to lazy-binding stub. 4. nop 5. move $25, $16 6. jalr $25 // jump to lazy-binding stub again. With this patch, the second call directly jumps to func's address, bypassing the lazy-binding resolution routine: 1. load $25, %got(func)($gp) // load address of lazy-binding stub. 2. jalr $25 // jump to lazy-binding stub. 3. nop 4. load $25, %got(func)($gp) // load resolved address of func. 5. jalr $25 // directly jump to func. llvm-svn: 191591 --- llvm/lib/Target/Mips/Mips16ISelLowering.cpp | 9 +++-- llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp | 3 +- llvm/lib/Target/Mips/MipsISelLowering.cpp | 26 +++++++++---- llvm/lib/Target/Mips/MipsISelLowering.h | 14 ++++--- llvm/test/CodeGen/Mips/brdelayslot.ll | 11 +++++- llvm/test/CodeGen/Mips/i64arg.ll | 10 ++--- llvm/test/CodeGen/Mips/largeimmprinting.ll | 4 +- llvm/test/CodeGen/Mips/lazy-binding.ll | 41 ++++++++++++++++++++ 8 files changed, 90 insertions(+), 28 deletions(-) create mode 100644 llvm/test/CodeGen/Mips/lazy-binding.ll diff --git a/llvm/lib/Target/Mips/Mips16ISelLowering.cpp b/llvm/lib/Target/Mips/Mips16ISelLowering.cpp index 89f3d3ba175c..ab649bd9882d 100644 --- a/llvm/lib/Target/Mips/Mips16ISelLowering.cpp +++ b/llvm/lib/Target/Mips/Mips16ISelLowering.cpp @@ -419,6 +419,8 @@ getOpndList(SmallVectorImpl &Ops, bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { SelectionDAG &DAG = CLI.DAG; + MachineFunction &MF = DAG.getMachineFunction(); + MipsFunctionInfo *FuncInfo = MF.getInfo(); const char* Mips16HelperFunction = 0; bool NeedMips16Helper = false; @@ -474,9 +476,10 @@ getOpndList(SmallVectorImpl &Ops, if (NeedMips16Helper) { RegsToPass.push_front(std::make_pair(V0Reg, Callee)); JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, getPointerTy()); - JumpTarget = getAddrGlobal(cast(JumpTarget), - JumpTarget.getValueType(), DAG, - MipsII::MO_GOT); + ExternalSymbolSDNode *S = cast(JumpTarget); + JumpTarget = getAddrGlobal(S, JumpTarget.getValueType(), DAG, + MipsII::MO_GOT, Chain, + FuncInfo->callPtrInfo(S->getSymbol())); } else RegsToPass.push_front(std::make_pair((unsigned)Mips::T9, Callee)); } diff --git a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp index 545a38dba323..34286db16af6 100644 --- a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -421,8 +421,7 @@ bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) { return false; if (const PseudoSourceValue *PSV = dyn_cast(V)) - return !PSV->PseudoSourceValue::isConstant(0) && - (V != PseudoSourceValue::getStack()); + return !PSV->isConstant(0) && V != PseudoSourceValue::getStack(); return true; } diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index 5aab3f8dd2f5..3eb2dfb6d67d 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -1468,10 +1468,12 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op, if (LargeGOT) return getAddrGlobalLargeGOT(N, Ty, DAG, MipsII::MO_GOT_HI16, - MipsII::MO_GOT_LO16); + MipsII::MO_GOT_LO16, DAG.getEntryNode(), + MachinePointerInfo::getGOT()); return getAddrGlobal(N, Ty, DAG, - HasMips64 ? MipsII::MO_GOT_DISP : MipsII::MO_GOT16); + HasMips64 ? MipsII::MO_GOT_DISP : MipsII::MO_GOT16, + DAG.getEntryNode(), MachinePointerInfo::getGOT()); } SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op, @@ -2313,6 +2315,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering(); + MipsFunctionInfo *FuncInfo = MF.getInfo(); bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; // Analyze operands of the call, assigning locations to each operand. @@ -2446,29 +2449,36 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (GlobalAddressSDNode *G = dyn_cast(Callee)) { if (IsPICCall) { - InternalLinkage = G->getGlobal()->hasInternalLinkage(); + const GlobalValue *Val = G->getGlobal(); + InternalLinkage = Val->hasInternalLinkage(); if (InternalLinkage) Callee = getAddrLocal(G, Ty, DAG, HasMips64); else if (LargeGOT) Callee = getAddrGlobalLargeGOT(G, Ty, DAG, MipsII::MO_CALL_HI16, - MipsII::MO_CALL_LO16); + MipsII::MO_CALL_LO16, Chain, + FuncInfo->callPtrInfo(Val)); else - Callee = getAddrGlobal(G, Ty, DAG, MipsII::MO_GOT_CALL); + Callee = getAddrGlobal(G, Ty, DAG, MipsII::MO_GOT_CALL, Chain, + FuncInfo->callPtrInfo(Val)); } else Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy(), 0, MipsII::MO_NO_FLAG); GlobalOrExternal = true; } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { + const char *Sym = S->getSymbol(); + if (!IsN64 && !IsPIC) // !N64 && static - Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(), + Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), MipsII::MO_NO_FLAG); else if (LargeGOT) Callee = getAddrGlobalLargeGOT(S, Ty, DAG, MipsII::MO_CALL_HI16, - MipsII::MO_CALL_LO16); + MipsII::MO_CALL_LO16, Chain, + FuncInfo->callPtrInfo(Sym)); else // N64 || PIC - Callee = getAddrGlobal(S, Ty, DAG, MipsII::MO_GOT_CALL); + Callee = getAddrGlobal(S, Ty, DAG, MipsII::MO_GOT_CALL, Chain, + FuncInfo->callPtrInfo(Sym)); GlobalOrExternal = true; } diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h index 7d9ab6a82e73..aa4bcc972142 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/llvm/lib/Target/Mips/MipsISelLowering.h @@ -275,12 +275,12 @@ namespace llvm { // (load (wrapper $gp, %got(sym))) template SDValue getAddrGlobal(NodeTy *N, EVT Ty, SelectionDAG &DAG, - unsigned Flag) const { + unsigned Flag, SDValue Chain, + const MachinePointerInfo &PtrInfo) const { SDLoc DL(N); SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty), getTargetNode(N, Ty, DAG, Flag)); - return DAG.getLoad(Ty, DL, DAG.getEntryNode(), Tgt, - MachinePointerInfo::getGOT(), false, false, false, 0); + return DAG.getLoad(Ty, DL, Chain, Tgt, PtrInfo, false, false, false, 0); } // This method creates the following nodes, which are necessary for @@ -289,15 +289,17 @@ namespace llvm { // (load (wrapper (add %hi(sym), $gp), %lo(sym))) template SDValue getAddrGlobalLargeGOT(NodeTy *N, EVT Ty, SelectionDAG &DAG, - unsigned HiFlag, unsigned LoFlag) const { + unsigned HiFlag, unsigned LoFlag, + SDValue Chain, + const MachinePointerInfo &PtrInfo) const { SDLoc DL(N); SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty, getTargetNode(N, Ty, DAG, HiFlag)); Hi = DAG.getNode(ISD::ADD, DL, Ty, Hi, getGlobalReg(DAG, Ty)); SDValue Wrapper = DAG.getNode(MipsISD::Wrapper, DL, Ty, Hi, getTargetNode(N, Ty, DAG, LoFlag)); - return DAG.getLoad(Ty, DL, DAG.getEntryNode(), Wrapper, - MachinePointerInfo::getGOT(), false, false, false, 0); + return DAG.getLoad(Ty, DL, Chain, Wrapper, PtrInfo, false, false, false, + 0); } // This method creates the following nodes, which are necessary for diff --git a/llvm/test/CodeGen/Mips/brdelayslot.ll b/llvm/test/CodeGen/Mips/brdelayslot.ll index 869ecd9a8392..68341c1ba25b 100644 --- a/llvm/test/CodeGen/Mips/brdelayslot.ll +++ b/llvm/test/CodeGen/Mips/brdelayslot.ll @@ -160,7 +160,14 @@ for.end: ; preds = %for.body, %entry ; ; SUCCBB-LABEL: succbbs_br1: ; SUCCBB: beqz ${{[0-9]+}}, $BB -; SUCCBB-NEXT: lw $25, %call16(foo100) +; SUCCBB-NEXT: lw ${{[0-9]+}}, %got(foo101)(${{[0-9]+}}) + +define internal fastcc void @foo101() { +entry: + tail call void @foo100() + tail call void @foo100() + ret void +} define void @succbbs_br1(i32 %a) { entry: @@ -168,7 +175,7 @@ entry: br i1 %tobool, label %if.end, label %if.then if.then: ; preds = %entry - tail call void @foo100() #1 + tail call fastcc void @foo101() br label %if.end if.end: ; preds = %entry, %if.then diff --git a/llvm/test/CodeGen/Mips/i64arg.ll b/llvm/test/CodeGen/Mips/i64arg.ll index 0b16424d94c2..b038ad5bf749 100644 --- a/llvm/test/CodeGen/Mips/i64arg.ll +++ b/llvm/test/CodeGen/Mips/i64arg.ll @@ -9,11 +9,11 @@ entry: ; CHECK: lw $25, %call16(ff1) ; CHECK: jalr tail call void @ff1(i32 %i, i64 1085102592623924856) nounwind -; CHECK: lw $25, %call16(ff2) -; CHECK: lw $[[R2:[0-9]+]], 80($sp) -; CHECK: lw $[[R3:[0-9]+]], 84($sp) -; CHECK: move $4, $[[R2]] -; CHECK: move $5, $[[R3]] +; CHECK-DAG: lw $25, %call16(ff2) +; CHECK-DAG: lw $[[R2:[0-9]+]], 80($sp) +; CHECK-DAG: lw $[[R3:[0-9]+]], 84($sp) +; CHECK-DAG: move $4, $[[R2]] +; CHECK-DAG: move $5, $[[R3]] ; CHECK: jalr $25 tail call void @ff2(i64 %ll, double 3.000000e+00) nounwind %sub = add nsw i32 %i, -1 diff --git a/llvm/test/CodeGen/Mips/largeimmprinting.ll b/llvm/test/CodeGen/Mips/largeimmprinting.ll index 1e96346d1dd7..09fee3d9063f 100644 --- a/llvm/test/CodeGen/Mips/largeimmprinting.ll +++ b/llvm/test/CodeGen/Mips/largeimmprinting.ll @@ -18,11 +18,11 @@ entry: ; 64: dsll $[[R0]], $[[R0]], 48 ; 64: daddiu $[[R0]], $[[R0]], -1 ; 64: dsll $[[R0]], $[[R0]], 16 -; 64: daddiu $[[R0]], $[[R0]], -48 +; 64: daddiu $[[R0]], $[[R0]], -32 ; 64: daddu $sp, $sp, $[[R0]] ; 64: lui $[[R1:[0-9]+]], 1 ; 64: daddu $[[R1]], $sp, $[[R1]] -; 64: sd $ra, 40($[[R1]]) +; 64: sd $ra, 24($[[R1]]) %agg.tmp = alloca %struct.S1, align 1 %tmp = getelementptr inbounds %struct.S1* %agg.tmp, i32 0, i32 0, i32 0 diff --git a/llvm/test/CodeGen/Mips/lazy-binding.ll b/llvm/test/CodeGen/Mips/lazy-binding.ll new file mode 100644 index 000000000000..839155adad9a --- /dev/null +++ b/llvm/test/CodeGen/Mips/lazy-binding.ll @@ -0,0 +1,41 @@ +; RUN: llc -march=mipsel < %s | FileCheck %s + +; CHECK-LABEL: foo6: +; CHECK: %while.body +; CHECK: lw $25, %call16(foo2)(${{[0-9]+}}) +; CHECK: jalr $25 +; CHECK: %while.end + +define void @foo6(i32 %n) { +entry: + %tobool1 = icmp eq i32 %n, 0 + br i1 %tobool1, label %while.end, label %while.body + +while.body: ; preds = %entry, %while.body + %n.addr.02 = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %dec = add nsw i32 %n.addr.02, -1 + tail call void @foo2() + %tobool = icmp eq i32 %dec, 0 + br i1 %tobool, label %while.end, label %while.body + +while.end: ; preds = %while.body, %entry + ret void +} + +declare void @foo2() + +; CHECK-LABEL: foo1: +; CHECK: lw $25, %call16(foo2)(${{[0-9]+}}) +; CHECK: jalr $25 +; CHECK: lw $25, %call16(foo2)(${{[0-9]+}}) +; CHECK: jalr $25 +; CHECK: lw $25, %call16(foo2)(${{[0-9]+}}) +; CHECK: jalr $25 + +define void @foo1() { +entry: + tail call void @foo2() + tail call void @foo2() + tail call void @foo2() + ret void +} -- GitLab