Newer
Older
Chris Lattner
committed
InFlag = Chain.getValue(2);
if (CopyVT != RVLocs[i].getValVT()) {
// Round the F80 the right size, which also moves to the appropriate xmm
// register.
Val = DAG.getNode(ISD::FP_ROUND, RVLocs[i].getValVT(), Val,
// This truncation won't change the value.
DAG.getIntPtrConstant(1));
}
Chris Lattner
committed
ResultVals.push_back(Val);
}
// Merge everything together with a MERGE_VALUES node.
ResultVals.push_back(Chain);
return DAG.getMergeValues(TheCall->getVTList(), &ResultVals[0],
Gabor Greif
committed
ResultVals.size()).getNode();
}
//===----------------------------------------------------------------------===//
// C & StdCall & Fast Calling Convention implementation
//===----------------------------------------------------------------------===//
// StdCall calling convention seems to be standard for many Windows' API
// routines and around. It differs from C calling convention just a little:
// callee should clean up the stack, not caller. Symbols should be also
// decorated in some fancy way :) It doesn't support any vector arguments.
// For info on fast calling convention see Fast Calling Convention (tail call)
// implementation LowerX86_32FastCCCallTo.
/// AddLiveIn - This helper function adds the specified physical register to the
/// MachineFunction as a live in value. It also creates a corresponding virtual
/// register for it.
static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
const TargetRegisterClass *RC) {
assert(RC->contains(PReg) && "Not the correct regclass!");
unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
MF.getRegInfo().addLiveIn(PReg, VReg);
/// CallIsStructReturn - Determines whether a CALL node uses struct return
/// semantics.
unsigned NumOps = (Op.getNumOperands() - 5) / 2;
if (!NumOps)
return false;
return cast<ARG_FLAGSSDNode>(Op.getOperand(6))->getArgFlags().isSRet();
}
/// ArgsAreStructReturn - Determines whether a FORMAL_ARGUMENTS node uses struct
/// return semantics.
static bool ArgsAreStructReturn(SDValue Op) {
Gabor Greif
committed
unsigned NumArgs = Op.getNode()->getNumValues() - 1;
if (!NumArgs)
return false;
return cast<ARG_FLAGSSDNode>(Op.getOperand(3))->getArgFlags().isSRet();
}
/// IsCalleePop - Determines whether a CALL or FORMAL_ARGUMENTS node requires
/// the callee to pop its own arguments. Callee pop is necessary to support tail
bool X86TargetLowering::IsCalleePop(SDValue Op) {
bool IsVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
if (IsVarArg)
return false;
switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) {
default:
return false;
case CallingConv::X86_StdCall:
return !Subtarget->is64Bit();
case CallingConv::X86_FastCall:
return !Subtarget->is64Bit();
case CallingConv::Fast:
return PerformTailCallOpt;
}
}
/// CCAssignFnForNode - Selects the correct CCAssignFn for a CALL or
/// FORMAL_ARGUMENTS node.
CCAssignFn *X86TargetLowering::CCAssignFnForNode(SDValue Op) const {
unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Subtarget->is64Bit()) {
if (Subtarget->isTargetWin64())
return CC_X86_Win64_C;
else if (CC == CallingConv::Fast && PerformTailCallOpt)
return CC_X86_64_TailCall;
else
return CC_X86_64_C;
if (CC == CallingConv::X86_FastCall)
return CC_X86_32_FastCall;
else if (CC == CallingConv::Fast && PerformTailCallOpt)
return CC_X86_32_TailCall;
Evan Cheng
committed
else if (CC == CallingConv::Fast)
return CC_X86_32_FastCC;
else
return CC_X86_32_C;
}
/// NameDecorationForFORMAL_ARGUMENTS - Selects the appropriate decoration to
/// apply to a MachineFunction containing a given FORMAL_ARGUMENTS node.
NameDecorationStyle
X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDValue Op) {
unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (CC == CallingConv::X86_FastCall)
return FastCall;
else if (CC == CallingConv::X86_StdCall)
return StdCall;
return None;
}
/// CallRequiresGOTInRegister - Check whether the call requires the GOT pointer
/// in a register before calling.
bool X86TargetLowering::CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall) {
return !IsTailCall && !Is64Bit &&
getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT();
}
/// CallRequiresFnAddressInReg - Check whether the call requires the function
/// address to be loaded in a register.
bool
X86TargetLowering::CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall) {
return !Is64Bit && IsTailCall &&
getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT();
}
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
/// by "Src" to address "Dst" with size and alignment information specified by
/// the specific parameter attribute. The copy will be passed as a byval
/// function parameter.
static SDValue
CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(),
/*AlwaysInline=*/true, NULL, 0, NULL, 0);
}
SDValue X86TargetLowering::LowerMemArgument(SDValue Op, SelectionDAG &DAG,
const CCValAssign &VA,
MachineFrameInfo *MFI,
unsigned CC,
// Create the nodes corresponding to a load from this parameter slot.
ISD::ArgFlagsTy Flags =
cast<ARG_FLAGSSDNode>(Op.getOperand(3 + i))->getArgFlags();
bool AlwaysUseMutable = (CC==CallingConv::Fast) && PerformTailCallOpt;
bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
// FIXME: For now, all byval parameter objects are marked mutable. This can be
// changed with more analysis.
// In case of tail call optimization mark all arguments mutable. Since they
// could be overwritten by lowering of arguments in case of a tail call.
int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
VA.getLocMemOffset(), isImmutable);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
return DAG.getLoad(VA.getValVT(), Root, FIN,
PseudoSourceValue::getFixedStack(FI), 0);
SDValue
X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
Anton Korobeynikov
committed
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
const Function* Fn = MF.getFunction();
if (Fn->hasExternalLinkage() &&
Subtarget->isTargetCygMing() &&
Fn->getName() == "main")
FuncInfo->setForceFramePointer(true);
// Decorate the function name.
FuncInfo->setDecorationStyle(NameDecorationForFORMAL_ARGUMENTS(Op));
MachineFrameInfo *MFI = MF.getFrameInfo();
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
unsigned CC = MF.getFunction()->getCallingConv();
bool Is64Bit = Subtarget->is64Bit();
bool IsWin64 = Subtarget->isTargetWin64();
assert(!(isVarArg && CC == CallingConv::Fast) &&
"Var args not supported with calling convention fastcc");
// Assign locations to all of the incoming arguments.
CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
Gabor Greif
committed
CCInfo.AnalyzeFormalArguments(Op.getNode(), CCAssignFnForNode(Op));
unsigned LastVal = ~0U;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
// TODO: If an arg is passed in two places (e.g. reg and stack), skip later
// places.
assert(VA.getValNo() != LastVal &&
"Don't support value assigned to multiple locs yet");
LastVal = VA.getValNo();
MVT RegVT = VA.getLocVT();
TargetRegisterClass *RC;
if (RegVT == MVT::i32)
RC = X86::GR32RegisterClass;
else if (Is64Bit && RegVT == MVT::i64)
else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
else if (RegVT.isVector()) {
assert(RegVT.getSizeInBits() == 64);
if (!Is64Bit)
RC = X86::VR64RegisterClass; // MMX values are passed in MMXs.
else {
// Darwin calling convention passes MMX values in either GPRs or
// XMMs in x86-64. Other targets pass them in memory.
if (RegVT != MVT::v1i64 && Subtarget->hasSSE2()) {
RC = X86::VR128RegisterClass; // MMX values are passed in XMMs.
RegVT = MVT::v2i64;
} else {
RC = X86::GR64RegisterClass; // v1i64 values are passed in GPRs.
RegVT = MVT::i64;
}
}
} else {
assert(0 && "Unknown argument type!");
unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
SDValue ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
// If this is an 8 or 16-bit value, it is really passed promoted to 32
// bits. Insert an assert[sz]ext to capture this, then truncate to the
// right size.
if (VA.getLocInfo() == CCValAssign::SExt)
ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
DAG.getValueType(VA.getValVT()));
else if (VA.getLocInfo() == CCValAssign::ZExt)
ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
DAG.getValueType(VA.getValVT()));
if (VA.getLocInfo() != CCValAssign::Full)
ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
// Handle MMX values passed in GPRs.
if (Is64Bit && RegVT != VA.getLocVT()) {
if (RegVT.getSizeInBits() == 64 && RC == X86::GR64RegisterClass)
ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue);
else if (RC == X86::VR128RegisterClass) {
ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i64, ArgValue,
DAG.getConstant(0, MVT::i64));
ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue);
}
}
ArgValues.push_back(ArgValue);
} else {
assert(VA.isMemLoc());
ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, CC, Root, i));
// The x86-64 ABI for returning structs by value requires that we copy
// the sret argument into %rax for the return. Save the argument into
// a virtual register so that we can access it from the return points.
if (Is64Bit && DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
unsigned Reg = FuncInfo->getSRetReturnReg();
if (!Reg) {
Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
FuncInfo->setSRetReturnReg(Reg);
}
SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), Reg, ArgValues[0]);
Root = DAG.getNode(ISD::TokenFactor, MVT::Other, Copy, Root);
}
// align stack specially for tail calls
StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
if (Is64Bit || CC != CallingConv::X86_FastCall) {
VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
Evan Cheng
committed
}
if (Is64Bit) {
unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
// FIXME: We should really autogenerate these arrays
static const unsigned GPR64ArgRegsWin64[] = {
X86::RCX, X86::RDX, X86::R8, X86::R9
};
static const unsigned XMMArgRegsWin64[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
};
static const unsigned GPR64ArgRegs64Bit[] = {
X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
};
static const unsigned XMMArgRegs64Bit[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
const unsigned *GPR64ArgRegs, *XMMArgRegs;
if (IsWin64) {
TotalNumIntRegs = 4; TotalNumXMMRegs = 4;
GPR64ArgRegs = GPR64ArgRegsWin64;
XMMArgRegs = XMMArgRegsWin64;
} else {
TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
GPR64ArgRegs = GPR64ArgRegs64Bit;
XMMArgRegs = XMMArgRegs64Bit;
}
unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
TotalNumIntRegs);
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs,
TotalNumXMMRegs);
// For X86-64, if there are vararg parameters that are passed via
// registers, then we must store them to their spots on the stack so they
// may be loaded by deferencing the result of va_next.
VarArgsGPOffset = NumIntRegs * 8;
VarArgsFPOffset = TotalNumIntRegs * 8 + NumXMMRegs * 16;
RegSaveFrameIndex = MFI->CreateStackObject(TotalNumIntRegs * 8 +
TotalNumXMMRegs * 16, 16);
// Store the integer parameter registers.
SmallVector<SDValue, 8> MemOps;
SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
SDValue FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
DAG.getIntPtrConstant(VarArgsGPOffset));
for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs],
X86::GR64RegisterClass);
SDValue Val = DAG.getCopyFromReg(Root, VReg, MVT::i64);
SDValue Store =
DAG.getStore(Val.getValue(1), Val, FIN,
PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
DAG.getIntPtrConstant(8));
}
// Now store the XMM (fp + vector) parameter registers.
FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
DAG.getIntPtrConstant(VarArgsFPOffset));
for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
X86::VR128RegisterClass);
SDValue Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
SDValue Store =
DAG.getStore(Val.getValue(1), Val, FIN,
PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
DAG.getIntPtrConstant(16));
}
if (!MemOps.empty())
Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
&MemOps[0], MemOps.size());
ArgValues.push_back(Root);
// Some CCs need callee pop.
if (IsCalleePop(Op)) {
BytesToPopOnReturn = StackSize; // Callee pops everything.
BytesCallerReserves = 0;
} else {
BytesToPopOnReturn = 0; // Callee pops nothing.
// If this is an sret function, the return should pop the hidden pointer.
if (!Is64Bit && ArgsAreStructReturn(Op))
BytesToPopOnReturn = 4;
BytesCallerReserves = StackSize;
}
if (!Is64Bit) {
RegSaveFrameIndex = 0xAAAAAAA; // RegSaveFrameIndex is X86-64 only.
if (CC == CallingConv::X86_FastCall)
VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs.
}
Anton Korobeynikov
committed
FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
// Return the new list of results.
Gabor Greif
committed
return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
ArgValues.size()).getValue(Op.getResNo());
}
SDValue
X86TargetLowering::LowerMemOpCallTo(SDValue Op, SelectionDAG &DAG,
const SDValue &StackPtr,
const CCValAssign &VA,
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
ISD::ArgFlagsTy Flags =
cast<ARG_FLAGSSDNode>(Op.getOperand(6+2*VA.getValNo()))->getArgFlags();
if (Flags.isByVal()) {
return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
}
return DAG.getStore(Chain, Arg, PtrOff,
PseudoSourceValue::getStack(), LocMemOffset);
}
/// EmitTailCallLoadRetAddr - Emit a load of return adress if tail call
/// optimization is performed and it is required.
X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
bool IsTailCall,
bool Is64Bit,
int FPDiff) {
if (!IsTailCall || FPDiff==0) return Chain;
// Adjust the Return address stack slot.
MVT VT = getPointerTy();
OutRetAddr = getReturnAddressFrameIndex(DAG);
// Load the "old" Return address.
OutRetAddr = DAG.getLoad(VT, Chain,OutRetAddr, NULL, 0);
Gabor Greif
committed
return SDValue(OutRetAddr.getNode(), 1);
}
/// EmitTailCallStoreRetAddr - Emit a store of the return adress if tail call
/// optimization is performed and it is required (FPDiff!=0).
EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
bool Is64Bit, int FPDiff) {
// Store the return address to the appropriate stack slot.
if (!FPDiff) return Chain;
// Calculate the new stack slot for the return address.
int SlotSize = Is64Bit ? 8 : 4;
int NewReturnAddrFI =
MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize);
MVT VT = Is64Bit ? MVT::i64 : MVT::i32;
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
Chain = DAG.getStore(Chain, RetAddrFrIdx, NewRetAddrFrIdx,
PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0);
return Chain;
}
SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
SDValue Chain = Op.getOperand(0);
unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
bool IsTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0
&& CC == CallingConv::Fast && PerformTailCallOpt;
SDValue Callee = Op.getOperand(4);
bool Is64Bit = Subtarget->is64Bit();
bool IsStructRet = CallIsStructReturn(Op);
assert(!(isVarArg && CC == CallingConv::Fast) &&
"Var args not supported with calling convention fastcc");
// Analyze operands of the call, assigning locations to each operand.
CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
Gabor Greif
committed
CCInfo.AnalyzeCallOperands(Op.getNode(), CCAssignFnForNode(Op));
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
int FPDiff = 0;
if (IsTailCall) {
// Lower arguments at fp - stackoffset + fpdiff.
unsigned NumBytesCallerPushed =
MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
FPDiff = NumBytesCallerPushed - NumBytes;
// Set the delta of movement of the returnaddr stackslot.
// But only set if delta is greater than previous delta.
if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta()))
MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
}
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes));
// Load return adress for tail calls.
Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, IsTailCall, Is64Bit,
FPDiff);
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
SDValue StackPtr;
// Walk the register/memloc assignments, inserting copies/loads. In the case
// of tail call optimization arguments are handle later.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDValue Arg = Op.getOperand(5+2*VA.getValNo());
bool isByVal = cast<ARG_FLAGSSDNode>(Op.getOperand(6+2*VA.getValNo()))->
getArgFlags().isByVal();
// Promote the value if needed.
switch (VA.getLocInfo()) {
default: assert(0 && "Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::SExt:
Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
break;
case CCValAssign::ZExt:
Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
break;
case CCValAssign::AExt:
Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
break;
}
Evan Cheng
committed
if (Is64Bit) {
MVT RegVT = VA.getLocVT();
if (RegVT.isVector() && RegVT.getSizeInBits() == 64)
Evan Cheng
committed
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
switch (VA.getLocReg()) {
default:
break;
case X86::RDI: case X86::RSI: case X86::RDX: case X86::RCX:
case X86::R8: {
// Special case: passing MMX values in GPR registers.
Arg = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Arg);
break;
}
case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3:
case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7: {
// Special case: passing MMX values in XMM registers.
Arg = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Arg);
Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Arg);
Arg = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64,
DAG.getNode(ISD::UNDEF, MVT::v2i64), Arg,
getMOVLMask(2, DAG));
break;
}
}
}
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else {
if (!IsTailCall || (IsTailCall && isByVal)) {
assert(VA.isMemLoc());
Gabor Greif
committed
if (StackPtr.getNode() == 0)
StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy());
MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
Arg));
}
}
}
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
&MemOpChains[0], MemOpChains.size());
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into registers.
// Tail call byval lowering might overwrite argument registers so in case of
// tail call optimization the copies to registers are lowered later.
if (!IsTailCall)
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
InFlag);
InFlag = Chain.getValue(1);
}
// ELF / PIC requires GOT in the EBX register before function calls via PLT
// GOT pointer.
if (CallRequiresGOTPtrInReg(Is64Bit, IsTailCall)) {
Chain = DAG.getCopyToReg(Chain, X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
InFlag);
InFlag = Chain.getValue(1);
}
// If we are tail calling and generating PIC/GOT style code load the address
// of the callee into ecx. The value in ecx is used as target of the tail
// jump. This is done to circumvent the ebx/callee-saved problem for tail
// calls on PIC/GOT architectures. Normally we would just put the address of
// GOT into ebx and then call target@PLT. But for tail callss ebx would be
// restored (since ebx is callee saved) before jumping to the target@PLT.
if (CallRequiresFnAddressInReg(Is64Bit, IsTailCall)) {
// Note: The actual moving to ecx is done further down.
GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
if (G && !G->getGlobal()->hasHiddenVisibility() &&
!G->getGlobal()->hasProtectedVisibility())
Callee = LowerGlobalAddress(Callee, DAG);
else if (isa<ExternalSymbolSDNode>(Callee))
Callee = LowerExternalSymbol(Callee,DAG);
}
if (Is64Bit && isVarArg) {
// From AMD64 ABI document:
// For calls that may call functions that use varargs or stdargs
// (prototype-less calls or calls to functions containing ellipsis (...) in
// the declaration) %al is used as hidden argument to specify the number
// of SSE registers used. The contents of %al do not need to match exactly
// the number of registers, but must be an ubound on the number of SSE
// registers used and is in the range 0 - 8 inclusive.
// FIXME: Verify this on Win64
// Count the number of XMM registers allocated.
static const unsigned XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
Chain = DAG.getCopyToReg(Chain, X86::AL,
DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
InFlag = Chain.getValue(1);
}
// For tail calls lower the arguments to the 'real' stack slot.
if (IsTailCall) {
SmallVector<SDValue, 8> MemOpChains2;
SDValue FIN;
int FI = 0;
// Do not flag preceeding copytoreg stuff together with the following stuff.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (!VA.isRegLoc()) {
assert(VA.isMemLoc());
SDValue Arg = Op.getOperand(5+2*VA.getValNo());
SDValue FlagsOp = Op.getOperand(6+2*VA.getValNo());
ISD::ArgFlagsTy Flags =
cast<ARG_FLAGSSDNode>(FlagsOp)->getArgFlags();
// Create frame index.
int32_t Offset = VA.getLocMemOffset()+FPDiff;
uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
FIN = DAG.getFrameIndex(FI, getPointerTy());
SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
Gabor Greif
committed
if (StackPtr.getNode() == 0)
StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy());
Source = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, Source);
MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, Chain,
} else {
MemOpChains2.push_back(
DAG.getStore(Chain, Arg, FIN,
PseudoSourceValue::getFixedStack(FI), 0));
}
}
}
if (!MemOpChains2.empty())
Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
&MemOpChains2[0], MemOpChains2.size());
// Copy arguments to their registers.
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
InFlag);
InFlag = Chain.getValue(1);
}
// Store the return address to the appropriate stack slot.
Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit,
FPDiff);
}
// If the callee is a GlobalAddress node (quite common, every direct call is)
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
Anton Korobeynikov
committed
// We should use extra load for direct calls to dllimported functions in
// non-JIT mode.
if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
getTargetMachine(), true))
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
} else if (IsTailCall) {
unsigned Opc = Is64Bit ? X86::R9 : X86::ECX;
Chain = DAG.getCopyToReg(Chain,
DAG.getRegister(Opc, getPointerTy()),
Callee,InFlag);
Callee = DAG.getRegister(Opc, getPointerTy());
// Add register as live out.
DAG.getMachineFunction().getRegInfo().addLiveOut(Opc);
// Returns a chain & a flag for retval copy to use.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
if (IsTailCall) {
Ops.push_back(Chain);
Ops.push_back(DAG.getIntPtrConstant(NumBytes));
Ops.push_back(DAG.getIntPtrConstant(0));
Gabor Greif
committed
if (InFlag.getNode())
Ops.push_back(InFlag);
Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
// Returns a chain & a flag for retval copy to use.
NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
Ops.clear();
}
Ops.push_back(Chain);
Ops.push_back(Callee);
if (IsTailCall)
Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
// Add argument registers to the end of the list so that they are known live
// into the call.
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
// Add an implicit use GOT pointer in EBX.
if (!IsTailCall && !Is64Bit &&
getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT())
Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
// Add an implicit use of AL for x86 vararg functions.
if (Is64Bit && isVarArg)
Ops.push_back(DAG.getRegister(X86::AL, MVT::i8));
Gabor Greif
committed
if (InFlag.getNode())
Ops.push_back(InFlag);
if (IsTailCall) {
Gabor Greif
committed
assert(InFlag.getNode() &&
"Flag must be set. Depend on flag being set in LowerRET");
Chain = DAG.getNode(X86ISD::TAILCALL,
Gabor Greif
committed
Op.getNode()->getVTList(), &Ops[0], Ops.size());
Gabor Greif
committed
return SDValue(Chain.getNode(), Op.getResNo());
}
Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
unsigned NumBytesForCalleeToPush;
if (IsCalleePop(Op))
NumBytesForCalleeToPush = NumBytes; // Callee pops everything
else if (!Is64Bit && IsStructRet)
// If this is is a call to a struct-return function, the callee
// pops the hidden struct pointer, so we have to push it back.
// This is common for Darwin/X86, Linux & Mingw32 targets.
NumBytesForCalleeToPush = 4;
else
NumBytesForCalleeToPush = 0; // Callee pops nothing.
// Returns a flag for retval copy to use.
Chain = DAG.getCALLSEQ_END(Chain,
DAG.getIntPtrConstant(NumBytes),
DAG.getIntPtrConstant(NumBytesForCalleeToPush),
InFlag = Chain.getValue(1);
// Handle result values, copying them out of physregs into vregs that we
// return.
return SDValue(LowerCallResult(Chain, InFlag, Op.getNode(), CC, DAG),
Op.getResNo());
}
//===----------------------------------------------------------------------===//
// Fast Calling Convention (tail call) implementation
//===----------------------------------------------------------------------===//
// Like std call, callee cleans arguments, convention except that ECX is
// reserved for storing the tail called function address. Only 2 registers are
// free for argument passing (inreg). Tail call optimization is performed
// provided:
// * tailcallopt is enabled
// * caller/callee are fastcc
// On X86_64 architecture with GOT-style position independent code only local
// (within module) calls are supported at the moment.
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
// To keep the stack aligned according to platform abi the function
// GetAlignedArgumentStackSize ensures that argument delta is always multiples
// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
// If a tail called function callee has more arguments than the caller the
// caller needs to make sure that there is room to move the RETADDR to. This is
// achieved by reserving an area the size of the argument delta right after the
// original REtADDR, but before the saved framepointer or the spilled registers
// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
// stack layout:
// arg1
// arg2
// RETADDR
// [ new RETADDR
// move area ]
// (possible EBP)
// ESI
// EDI
// local1 ..
/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
/// for a 16 byte align requirement.
unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
SelectionDAG& DAG) {
MachineFunction &MF = DAG.getMachineFunction();
const TargetMachine &TM = MF.getTarget();
const TargetFrameInfo &TFI = *TM.getFrameInfo();
unsigned StackAlignment = TFI.getStackAlignment();
uint64_t AlignMask = StackAlignment - 1;
int64_t Offset = StackSize;
unsigned SlotSize = Subtarget->is64Bit() ? 8 : 4;
if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
// Number smaller than 12 so just add the difference.
Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
} else {
// Mask out lower bits, add stackalignment once plus the 12 bytes.
Offset = ((~AlignMask) & Offset) + StackAlignment +
(StackAlignment-SlotSize);
}
}
/// IsEligibleForTailCallElimination - Check to see whether the next instruction
/// following the call is a return. A function is eligible if caller/callee
/// calling conventions match, currently only fastcc supports tail calls, and
/// the function CALL is immediatly followed by a RET.
bool X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Call,
SDValue Ret,
SelectionDAG& DAG) const {
if (!PerformTailCallOpt)
return false;
if (CheckTailCallReturnConstraints(Call, Ret)) {
MachineFunction &MF = DAG.getMachineFunction();
unsigned CallerCC = MF.getFunction()->getCallingConv();
unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue();
if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
// On x86/32Bit PIC/GOT tail calls are supported.
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ ||
!Subtarget->isPICStyleGOT()|| !Subtarget->is64Bit())
return true;
// Can only do local tail calls (in same module, hidden or protected) on
// x86_64 PIC/GOT at the moment.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
return G->getGlobal()->hasHiddenVisibility()
|| G->getGlobal()->hasProtectedVisibility();
}
}
return false;
}
FastISel *
X86TargetLowering::createFastISel(MachineFunction &mf,
DenseMap<const Value *, unsigned> &vm,
DenseMap<const BasicBlock *,
MachineBasicBlock *> &bm) {
return X86::createFastISel(mf, vm, bm);
}
//===----------------------------------------------------------------------===//
// Other Lowering Hooks
//===----------------------------------------------------------------------===//
SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
Anton Korobeynikov
committed
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
int ReturnAddrIndex = FuncInfo->getRAIndex();
if (ReturnAddrIndex == 0) {
// Set up a frame object for the return address.
if (Subtarget->is64Bit())
ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8);
else
ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
Anton Korobeynikov
committed
FuncInfo->setRAIndex(ReturnAddrIndex);
}
return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
}
/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86
/// specific condition code. It returns a false if it cannot do a direct
/// translation. X86CC is the translated CondCode. LHS/RHS are modified as
/// needed.
static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
X86CC = X86::COND_INVALID;
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
// X > -1 -> X == 0, jump !sign.
RHS = DAG.getConstant(0, RHS.getValueType());
X86CC = X86::COND_NS;
return true;
} else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
// X < 0 -> X == 0, jump on sign.
X86CC = X86::COND_S;
} else if (SetCCOpcode == ISD::SETLT && RHSC->getValue() == 1) {
// X < 1 -> X <= 0
RHS = DAG.getConstant(0, RHS.getValueType());
X86CC = X86::COND_LE;
return true;
case ISD::SETEQ: X86CC = X86::COND_E; break;
case ISD::SETGT: X86CC = X86::COND_G; break;
case ISD::SETGE: X86CC = X86::COND_GE; break;
case ISD::SETLT: X86CC = X86::COND_L; break;
case ISD::SETLE: X86CC = X86::COND_LE; break;
case ISD::SETNE: X86CC = X86::COND_NE; break;
case ISD::SETULT: X86CC = X86::COND_B; break;
case ISD::SETUGT: X86CC = X86::COND_A; break;
case ISD::SETULE: X86CC = X86::COND_BE; break;
case ISD::SETUGE: X86CC = X86::COND_AE; break;
Evan Cheng
committed
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
// First determine if it requires or is profitable to flip the operands.
bool Flip = false;
switch (SetCCOpcode) {
default: break;
case ISD::SETOLT:
case ISD::SETOLE:
case ISD::SETUGT:
case ISD::SETUGE:
Flip = true;
break;
}
// If LHS is a foldable load, but RHS is not, flip the condition.
if (!Flip &&
(ISD::isNON_EXTLoad(LHS.getNode()) && LHS.hasOneUse()) &&
!(ISD::isNON_EXTLoad(RHS.getNode()) && RHS.hasOneUse())) {
SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
Flip = true;
}
if (Flip)
std::swap(LHS, RHS);
// On a floating point condition, the flags are set as follows:
// ZF PF CF op
// 0 | 0 | 0 | X > Y
// 0 | 0 | 1 | X < Y
// 1 | 0 | 0 | X == Y
// 1 | 1 | 1 | unordered
switch (SetCCOpcode) {
default: break;
case ISD::SETUEQ:
Evan Cheng
committed
case ISD::SETEQ:
X86CC = X86::COND_E;
break;
case ISD::SETOLT: // flipped
Evan Cheng
committed
case ISD::SETGT:
X86CC = X86::COND_A;
break;
case ISD::SETOLE: // flipped
Evan Cheng
committed
case ISD::SETGE:
X86CC = X86::COND_AE;
break;
case ISD::SETUGT: // flipped
Evan Cheng
committed
case ISD::SETLT: