Newer
Older
"Var args not supported with calling convention fastcc or ghc");
// Analyze operands of the call, assigning locations to each operand.
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
ArgLocs, *DAG.getContext());
NAKAMURA Takumi
committed
// Allocate shadow area for Win64
if (IsWin64) {
CCInfo.AllocateStack(32, 8);
}
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
if (IsSibcall)
// This is a sibcall. The memory operands are available in caller's
// own caller's stack.
NumBytes = 0;
else if (GuaranteedTailCallOpt && IsTailCallConvention(CallConv))
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
int FPDiff = 0;
if (isTailCall && !IsSibcall) {
// Lower arguments at fp - stackoffset + fpdiff.
unsigned NumBytesCallerPushed =
MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
FPDiff = NumBytesCallerPushed - NumBytes;
// Set the delta of movement of the returnaddr stackslot.
// But only set if delta is greater than previous delta.
if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta()))
MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
}
if (!IsSibcall)
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
// Load return address for tail calls.
if (isTailCall && FPDiff)
Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
Is64Bit, FPDiff, dl);
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
SDValue StackPtr;
// Walk the register/memloc assignments, inserting copies/loads. In the case
// of tail call optimization arguments are handle later.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
Owen Anderson
committed
EVT RegVT = VA.getLocVT();
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
bool isByVal = Flags.isByVal();
// Promote the value if needed.
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::SExt:
Anton Korobeynikov
committed
Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
Anton Korobeynikov
committed
Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
Anton Korobeynikov
committed
if (RegVT.isVector() && RegVT.getSizeInBits() == 128) {
// Special case: passing MMX values in XMM registers.
Wesley Peck
committed
Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
Owen Anderson
committed
Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
Anton Korobeynikov
committed
} else
Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
break;
case CCValAssign::BCvt:
Wesley Peck
committed
Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg);
break;
Anton Korobeynikov
committed
case CCValAssign::Indirect: {
// Store the argument.
SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
Anton Korobeynikov
committed
Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
MachinePointerInfo::getFixedStack(FI),
Anton Korobeynikov
committed
Arg = SpillSlot;
break;
}
}
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
NAKAMURA Takumi
committed
if (isVarArg && IsWin64) {
// Win64 ABI requires argument XMM reg to be copied to the corresponding
// shadow reg if callee is a varargs function.
unsigned ShadowReg = 0;
switch (VA.getLocReg()) {
case X86::XMM0: ShadowReg = X86::RCX; break;
case X86::XMM1: ShadowReg = X86::RDX; break;
case X86::XMM2: ShadowReg = X86::R8; break;
case X86::XMM3: ShadowReg = X86::R9; break;
}
if (ShadowReg)
RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
}
} else if (!IsSibcall && (!isTailCall || isByVal)) {
assert(VA.isMemLoc());
if (StackPtr.getNode() == 0)
StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
dl, DAG, VA, Flags));
}
}
if (!MemOpChains.empty())
Owen Anderson
committed
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into registers.
// Tail call byval lowering might overwrite argument registers so in case of
// tail call optimization the copies to registers are lowered later.
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
if (Subtarget->isPICStyleGOT()) {
// ELF / PIC requires GOT in the EBX register before function calls via PLT
// GOT pointer.
Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg,
DebugLoc(), getPointerTy()),
InFlag);
InFlag = Chain.getValue(1);
} else {
// If we are tail calling and generating PIC/GOT style code load the
// address of the callee into ECX. The value in ecx is used as target of
// the tail jump. This is done to circumvent the ebx/callee-saved problem
// for tail calls on PIC/GOT architectures. Normally we would just put the
// address of GOT into ebx and then call target@PLT. But for tail calls
// ebx would be restored (since ebx is callee saved) before jumping to the
// target@PLT.
// Note: The actual moving to ECX is done further down.
GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
if (G && !G->getGlobal()->hasHiddenVisibility() &&
!G->getGlobal()->hasProtectedVisibility())
Callee = LowerGlobalAddress(Callee, DAG);
else if (isa<ExternalSymbolSDNode>(Callee))
Callee = LowerExternalSymbol(Callee, DAG);
}
}
NAKAMURA Takumi
committed
if (Is64Bit && isVarArg && !IsWin64) {
// From AMD64 ABI document:
// For calls that may call functions that use varargs or stdargs
// (prototype-less calls or calls to functions containing ellipsis (...) in
// the declaration) %al is used as hidden argument to specify the number
// of SSE registers used. The contents of %al do not need to match exactly
// the number of registers, but must be an ubound on the number of SSE
// registers used and is in the range 0 - 8 inclusive.
// Count the number of XMM registers allocated.
static const unsigned XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
Nate Begeman
committed
assert((Subtarget->hasXMM() || !NumXMMRegs)
Torok Edwin
committed
&& "SSE registers cannot be used when SSE is disabled");
Chain = DAG.getCopyToReg(Chain, dl, X86::AL,
Owen Anderson
committed
DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
// For tail calls lower the arguments to the 'real' stack slot.
if (isTailCall) {
// Force all the incoming stack arguments to be loaded from the stack
// before any new outgoing arguments are stored to the stack, because the
// outgoing stack slots may alias the incoming argument stack slots, and
// the alias isn't otherwise explicit. This is slightly more conservative
// than necessary, because it means that each store effectively depends
// on every argument instead of just those arguments it would clobber.
SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
SmallVector<SDValue, 8> MemOpChains2;
SDValue FIN;
int FI = 0;
// Do not flag preceding copytoreg stuff together with the following stuff.
if (GuaranteedTailCallOpt) {
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (VA.isRegLoc())
continue;
assert(VA.isMemLoc());
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
// Create frame index.
int32_t Offset = VA.getLocMemOffset()+FPDiff;
uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
FIN = DAG.getFrameIndex(FI, getPointerTy());
SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
Gabor Greif
committed
if (StackPtr.getNode() == 0)
StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr,
Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
ArgChain,
} else {
MemOpChains2.push_back(
DAG.getStore(ArgChain, dl, Arg, FIN,
MachinePointerInfo::getFixedStack(FI),
}
}
if (!MemOpChains2.empty())
Owen Anderson
committed
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains2[0], MemOpChains2.size());
// Copy arguments to their registers.
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
// Store the return address to the appropriate stack slot.
Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit,
}
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
// In the 64-bit large code model, we have to make all calls
// through a register, since the call instruction's 32-bit
// pc-relative offset may not be large enough to hold the whole
// address.
} else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
// If the callee is a GlobalAddress node (quite common, every direct call
// is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
// it.
Anton Korobeynikov
committed
// We should use extra load for direct calls to dllimported functions in
// non-JIT mode.
const GlobalValue *GV = G->getGlobal();
if (!GV->hasDLLImportLinkage()) {
unsigned char OpFlags = 0;
// On ELF targets, in both X86-64 and X86-32 mode, direct calls to
// external symbols most go through the PLT in PIC mode. If the symbol
// has hidden or protected visibility, or if it is static or local, then
// we don't need to use the PLT - we can directly call it.
if (Subtarget->isTargetELF() &&
getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
OpFlags = X86II::MO_PLT;
Chris Lattner
committed
} else if (Subtarget->isPICStyleStubAny() &&
Daniel Dunbar
committed
(!Subtarget->getTargetTriple().isMacOSX() ||
Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
OpFlags = X86II::MO_DARWIN_STUB;
}
Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
G->getOffset(), OpFlags);
}
Bill Wendling
committed
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
unsigned char OpFlags = 0;
// On ELF targets, in either X86-64 or X86-32 mode, direct calls to
// external symbols should go through the PLT.
if (Subtarget->isTargetELF() &&
getTargetMachine().getRelocationModel() == Reloc::PIC_) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
Daniel Dunbar
committed
(!Subtarget->getTargetTriple().isMacOSX() ||
Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
OpFlags = X86II::MO_DARWIN_STUB;
}
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
OpFlags);
// Returns a chain & a flag for retval copy to use.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
if (!IsSibcall && isTailCall) {
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
DAG.getIntPtrConstant(0, true), InFlag);
InFlag = Chain.getValue(1);
}
Ops.push_back(Chain);
Ops.push_back(Callee);
Owen Anderson
committed
Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
// Add argument registers to the end of the list so that they are known live
// into the call.
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
// Add an implicit use GOT pointer in EBX.
if (!isTailCall && Subtarget->isPICStyleGOT())
Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
// Add an implicit use of AL for non-Windows x86 64-bit vararg functions.
NAKAMURA Takumi
committed
if (Is64Bit && isVarArg && !IsWin64)
Owen Anderson
committed
Ops.push_back(DAG.getRegister(X86::AL, MVT::i8));
Gabor Greif
committed
if (InFlag.getNode())
Ops.push_back(InFlag);
// We used to do:
//// If this is the first return lowered for this function, add the regs
//// to the liveout set for the function.
// This isn't right, although it's probably harmless on x86; liveouts
// should be computed from returns not tail calls. Consider a void
// function making a tail call to a function returning int.
return DAG.getNode(X86ISD::TC_RETURN, dl,
NodeTys, &Ops[0], Ops.size());
}
Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
unsigned NumBytesForCalleeToPush;
if (Subtarget->IsCalleePop(isVarArg, CallConv))
NumBytesForCalleeToPush = NumBytes; // Callee pops everything
else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet)
// If this is a call to a struct-return function, the callee
// pops the hidden struct pointer, so we have to push it back.
// This is common for Darwin/X86, Linux & Mingw32 targets.
NumBytesForCalleeToPush = 4;
else
NumBytesForCalleeToPush = 0; // Callee pops nothing.
// Returns a flag for retval copy to use.
if (!IsSibcall) {
Chain = DAG.getCALLSEQ_END(Chain,
DAG.getIntPtrConstant(NumBytes, true),
DAG.getIntPtrConstant(NumBytesForCalleeToPush,
true),
InFlag);
InFlag = Chain.getValue(1);
}
// Handle result values, copying them out of physregs into vregs that we
// return.
return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
Ins, dl, DAG, InVals);
}
//===----------------------------------------------------------------------===//
// Fast Calling Convention (tail call) implementation
//===----------------------------------------------------------------------===//
// Like std call, callee cleans arguments, convention except that ECX is
// reserved for storing the tail called function address. Only 2 registers are
// free for argument passing (inreg). Tail call optimization is performed
// provided:
// * tailcallopt is enabled
// * caller/callee are fastcc
// On X86_64 architecture with GOT-style position independent code only local
// (within module) calls are supported at the moment.
// To keep the stack aligned according to platform abi the function
// GetAlignedArgumentStackSize ensures that argument delta is always multiples
// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
// If a tail called function callee has more arguments than the caller the
// caller needs to make sure that there is room to move the RETADDR to. This is
// achieved by reserving an area the size of the argument delta right after the
// original REtADDR, but before the saved framepointer or the spilled registers
// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
// stack layout:
// arg1
// arg2
// RETADDR
// [ new RETADDR
// move area ]
// (possible EBP)
// ESI
// EDI
// local1 ..
/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
/// for a 16 byte align requirement.
unsigned
X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
SelectionDAG& DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
const TargetMachine &TM = MF.getTarget();
Anton Korobeynikov
committed
const TargetFrameLowering &TFI = *TM.getFrameLowering();
uint64_t AlignMask = StackAlignment - 1;
uint64_t SlotSize = TD->getPointerSize();
if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
// Number smaller than 12 so just add the difference.
Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
} else {
// Mask out lower bits, add stackalignment once plus the 12 bytes.
Offset = ((~AlignMask) & Offset) + StackAlignment +
}
}
/// MatchingStackOffset - Return true if the given stack call argument is
/// already available in the same position (relatively) of the caller's
/// incoming argument stack.
static
bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
const X86InstrInfo *TII) {
unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
int FI = INT_MAX;
if (Arg.getOpcode() == ISD::CopyFromReg) {
unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
Jakob Stoklund Olesen
committed
if (!TargetRegisterInfo::isVirtualRegister(VR))
return false;
MachineInstr *Def = MRI->getVRegDef(VR);
if (!Def)
return false;
if (!Flags.isByVal()) {
if (!TII->isLoadFromStackSlot(Def, FI))
return false;
} else {
unsigned Opcode = Def->getOpcode();
if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) &&
Def->getOperand(1).isFI()) {
FI = Def->getOperand(1).getIndex();
Bytes = Flags.getByValSize();
} else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
if (Flags.isByVal())
// ByVal argument is passed in as a pointer but it's now being
// define @foo(%struct.X* %A) {
// tail call @bar(%struct.X* byval %A)
// }
return false;
SDValue Ptr = Ld->getBasePtr();
FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
if (!FINode)
return false;
FI = FINode->getIndex();
} else
return false;
assert(FI != INT_MAX);
if (!MFI->isFixedObjectIndex(FI))
return false;
return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
/// optimization should implement this function.
bool
X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
Sandeep Patel
committed
CallingConv::ID CalleeCC,
bool isCalleeStructRet,
bool isCallerStructRet,
Evan Cheng
committed
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
Evan Cheng
committed
const SmallVectorImpl<ISD::InputArg> &Ins,
if (!IsTailCallConvention(CalleeCC) &&
Evan Cheng
committed
CalleeCC != CallingConv::C)
return false;
// If -tailcallopt is specified, make fastcc functions tail-callable.
const MachineFunction &MF = DAG.getMachineFunction();
const Function *CallerF = DAG.getMachineFunction().getFunction();
CallingConv::ID CallerCC = CallerF->getCallingConv();
bool CCMatch = CallerCC == CalleeCC;
if (GuaranteedTailCallOpt) {
if (IsTailCallConvention(CalleeCC) && CCMatch)
// Look for obvious safe cases to perform tail call optimization that do not
// require ABI changes. This is what gcc calls sibcall.
// Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
// emit a special epilogue.
if (RegInfo->needsStackRealignment(MF))
return false;
// Do not sibcall optimize vararg calls unless the call site is not passing
// any arguments.
if (isVarArg && !Outs.empty())
// Also avoid sibcall optimization if either caller or callee uses struct
// return semantics.
if (isCalleeStructRet || isCallerStructRet)
return false;
// If the call result is in ST0 / ST1, it needs to be popped off the x87 stack.
// Therefore if it's not used by the call it is not safe to optimize this into
// a sibcall.
bool Unused = false;
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
if (!Ins[i].Used) {
Unused = true;
break;
}
}
if (Unused) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CalleeCC, false, getTargetMachine(),
RVLocs, *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign &VA = RVLocs[i];
if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
return false;
}
}
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
// If the calling conventions do not match, then we'd better make sure the
// results are returned in the same way as what the caller expects.
if (!CCMatch) {
SmallVector<CCValAssign, 16> RVLocs1;
CCState CCInfo1(CalleeCC, false, getTargetMachine(),
RVLocs1, *DAG.getContext());
CCInfo1.AnalyzeCallResult(Ins, RetCC_X86);
SmallVector<CCValAssign, 16> RVLocs2;
CCState CCInfo2(CallerCC, false, getTargetMachine(),
RVLocs2, *DAG.getContext());
CCInfo2.AnalyzeCallResult(Ins, RetCC_X86);
if (RVLocs1.size() != RVLocs2.size())
return false;
for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
return false;
if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
return false;
if (RVLocs1[i].isRegLoc()) {
if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
return false;
} else {
if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
return false;
}
}
}
Evan Cheng
committed
// If the callee takes no arguments then go on to check the results of the
// call.
if (!Outs.empty()) {
// Check if stack adjustment is needed. For now, do not do this if any
// argument is passed on the stack.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
ArgLocs, *DAG.getContext());
NAKAMURA Takumi
committed
// Allocate shadow area for Win64
if (Subtarget->isTargetWin64()) {
CCInfo.AllocateStack(32, 8);
}
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
if (CCInfo.getNextStackOffset()) {
MachineFunction &MF = DAG.getMachineFunction();
if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
return false;
// Check if the arguments are already laid out in the right way as
// the caller's fixed stack objects.
MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
const X86InstrInfo *TII =
((X86TargetMachine&)getTargetMachine()).getInstrInfo();
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
if (VA.getLocInfo() == CCValAssign::Indirect)
return false;
if (!VA.isRegLoc()) {
if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
MFI, MRI, TII))
return false;
}
}
}
Evan Cheng
committed
// If the tailcall address may be in a register, then make sure it's
// possible to register allocate for it. In 32-bit, the call address can
// only target EAX, EDX, or ECX since the tail call must be scheduled after
// callee-saved registers are restored. These happen to be the same
// registers used to pass 'inreg' arguments so watch out for those.
if (!Subtarget->is64Bit() &&
!isa<GlobalAddressSDNode>(Callee) &&
Evan Cheng
committed
!isa<ExternalSymbolSDNode>(Callee)) {
unsigned NumInRegs = 0;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (!VA.isRegLoc())
continue;
unsigned Reg = VA.getLocReg();
switch (Reg) {
default: break;
case X86::EAX: case X86::EDX: case X86::ECX:
if (++NumInRegs == 3)
Evan Cheng
committed
return false;
break;
Evan Cheng
committed
}
}
}
Evan Cheng
committed
}
Evan Cheng
committed
// An stdcall caller is expected to clean up its arguments; the callee
// isn't going to do that.
if (!CCMatch && CallerCC==CallingConv::X86_StdCall)
return false;
Evan Cheng
committed
return true;
}
X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
return X86::createFastISel(funcInfo);
}
//===----------------------------------------------------------------------===//
// Other Lowering Hooks
//===----------------------------------------------------------------------===//
Bruno Cardoso Lopes
committed
static bool MayFoldLoad(SDValue Op) {
return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
}
static bool MayFoldIntoStore(SDValue Op) {
return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
}
static bool isTargetShuffle(unsigned Opcode) {
switch(Opcode) {
default: return false;
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
case X86ISD::SHUFPD:
Bruno Cardoso Lopes
committed
case X86ISD::PALIGN:
case X86ISD::SHUFPS:
case X86ISD::MOVLHPS:
Bruno Cardoso Lopes
committed
case X86ISD::MOVLHPD:
Bruno Cardoso Lopes
committed
case X86ISD::MOVHLPS:
Bruno Cardoso Lopes
committed
case X86ISD::MOVLPS:
case X86ISD::MOVLPD:
case X86ISD::MOVSHDUP:
Bruno Cardoso Lopes
committed
case X86ISD::MOVSLDUP:
case X86ISD::MOVDDUP:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
Bruno Cardoso Lopes
committed
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
case X86ISD::VUNPCKLPS:
case X86ISD::VUNPCKLPD:
case X86ISD::VUNPCKLPSY:
case X86ISD::VUNPCKLPDY:
Bruno Cardoso Lopes
committed
case X86ISD::PUNPCKLWD:
case X86ISD::PUNPCKLBW:
case X86ISD::PUNPCKLDQ:
case X86ISD::PUNPCKLQDQ:
Bruno Cardoso Lopes
committed
case X86ISD::UNPCKHPS:
case X86ISD::UNPCKHPD:
Bruno Cardoso Lopes
committed
case X86ISD::PUNPCKHWD:
case X86ISD::PUNPCKHBW:
case X86ISD::PUNPCKHDQ:
case X86ISD::PUNPCKHQDQ:
return true;
}
return false;
}
static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
SDValue V1, SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
case X86ISD::MOVSHDUP:
Bruno Cardoso Lopes
committed
case X86ISD::MOVSLDUP:
case X86ISD::MOVDDUP:
return DAG.getNode(Opc, dl, VT, V1);
}
return SDValue();
}
static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
Bruno Cardoso Lopes
committed
SDValue V1, unsigned TargetMask, SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
Bruno Cardoso Lopes
committed
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
}
return SDValue();
}
Bruno Cardoso Lopes
committed
static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
SDValue V1, SDValue V2, unsigned TargetMask, SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
Bruno Cardoso Lopes
committed
case X86ISD::PALIGN:
Bruno Cardoso Lopes
committed
case X86ISD::SHUFPD:
case X86ISD::SHUFPS:
return DAG.getNode(Opc, dl, VT, V1, V2,
DAG.getConstant(TargetMask, MVT::i8));
}
return SDValue();
}
static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
SDValue V1, SDValue V2, SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
case X86ISD::MOVLHPS:
Bruno Cardoso Lopes
committed
case X86ISD::MOVLHPD:
Bruno Cardoso Lopes
committed
case X86ISD::MOVHLPS:
Bruno Cardoso Lopes
committed
case X86ISD::MOVLPS:
case X86ISD::MOVLPD:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
Bruno Cardoso Lopes
committed
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
case X86ISD::VUNPCKLPS:
case X86ISD::VUNPCKLPD:
case X86ISD::VUNPCKLPSY:
case X86ISD::VUNPCKLPDY:
Bruno Cardoso Lopes
committed
case X86ISD::PUNPCKLWD:
case X86ISD::PUNPCKLBW:
Bruno Cardoso Lopes
committed
case X86ISD::PUNPCKLDQ:
case X86ISD::PUNPCKLQDQ:
Bruno Cardoso Lopes
committed
case X86ISD::UNPCKHPS:
case X86ISD::UNPCKHPD:
Bruno Cardoso Lopes
committed
case X86ISD::PUNPCKHWD:
case X86ISD::PUNPCKHBW:
case X86ISD::PUNPCKHDQ:
case X86ISD::PUNPCKHQDQ:
Bruno Cardoso Lopes
committed
return DAG.getNode(Opc, dl, VT, V1, V2);
}
return SDValue();
}
SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
Anton Korobeynikov
committed
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
int ReturnAddrIndex = FuncInfo->getRAIndex();
if (ReturnAddrIndex == 0) {
// Set up a frame object for the return address.
uint64_t SlotSize = TD->getPointerSize();
ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
false);
Anton Korobeynikov
committed
FuncInfo->setRAIndex(ReturnAddrIndex);
}
return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
}
Anton Korobeynikov
committed
bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
bool hasSymbolicDisplacement) {
// Offset should fit into 32 bit immediate field.
Benjamin Kramer
committed
if (!isInt<32>(Offset))
Anton Korobeynikov
committed
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
return false;
// If we don't have a symbolic displacement - we don't have any extra
// restrictions.
if (!hasSymbolicDisplacement)
return true;
// FIXME: Some tweaks might be needed for medium code model.
if (M != CodeModel::Small && M != CodeModel::Kernel)
return false;
// For small code model we assume that latest object is 16MB before end of 31
// bits boundary. We may also accept pretty large negative constants knowing
// that all objects are in the positive half of address space.
if (M == CodeModel::Small && Offset < 16*1024*1024)
return true;
// For kernel code model we know that all object resist in the negative half
// of 32bits address space. We may not accept negative offsets, since they may
// be just off and we may accept pretty large positive ones.
if (M == CodeModel::Kernel && Offset > 0)
return true;
return false;
}
/// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86
/// specific condition code, returning the condition code and the LHS/RHS of the
/// comparison to make.
static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) {
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
// X > -1 -> X == 0, jump !sign.
RHS = DAG.getConstant(0, RHS.getValueType());
return X86::COND_NS;
} else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
// X < 0 -> X == 0, jump on sign.
return X86::COND_S;
} else if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
// X < 1 -> X <= 0
RHS = DAG.getConstant(0, RHS.getValueType());
return X86::COND_LE;
default: llvm_unreachable("Invalid integer condition!");
case ISD::SETEQ: return X86::COND_E;
case ISD::SETGT: return X86::COND_G;
case ISD::SETGE: return X86::COND_GE;
case ISD::SETLT: return X86::COND_L;
case ISD::SETLE: return X86::COND_LE;
case ISD::SETNE: return X86::COND_NE;
case ISD::SETULT: return X86::COND_B;
case ISD::SETUGT: return X86::COND_A;
case ISD::SETULE: return X86::COND_BE;
case ISD::SETUGE: return X86::COND_AE;
}
// First determine if it is required or is profitable to flip the operands.
// If LHS is a foldable load, but RHS is not, flip the condition.
Rafael Espindola
committed
if (ISD::isNON_EXTLoad(LHS.getNode()) &&
!ISD::isNON_EXTLoad(RHS.getNode())) {
SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
std::swap(LHS, RHS);
}
Evan Cheng
committed
switch (SetCCOpcode) {
default: break;
case ISD::SETOLT:
case ISD::SETOLE:
case ISD::SETUGT:
case ISD::SETUGE:
std::swap(LHS, RHS);
break;
// On a floating point condition, the flags are set as follows:
// ZF PF CF op
// 0 | 0 | 0 | X > Y
// 0 | 0 | 1 | X < Y
// 1 | 0 | 0 | X == Y
// 1 | 1 | 1 | unordered
switch (SetCCOpcode) {
default: llvm_unreachable("Condcode should be pre-legalized away");
case ISD::SETUEQ:
case ISD::SETEQ: return X86::COND_E;
case ISD::SETOLT: // flipped
case ISD::SETOGT:
case ISD::SETGT: return X86::COND_A;
case ISD::SETOLE: // flipped
case ISD::SETOGE:
case ISD::SETGE: return X86::COND_AE;
case ISD::SETUGT: // flipped
case ISD::SETULT:
case ISD::SETLT: return X86::COND_B;
case ISD::SETUGE: // flipped
case ISD::SETULE:
case ISD::SETLE: return X86::COND_BE;
case ISD::SETONE:
case ISD::SETNE: return X86::COND_NE;
case ISD::SETUO: return X86::COND_P;
case ISD::SETO: return X86::COND_NP;
case ISD::SETOEQ:
case ISD::SETUNE: return X86::COND_INVALID;
}
/// hasFPCMov - is there a floating point cmov for the specific X86 condition
/// code. Current x86 isa includes the following FP cmov instructions:
/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
switch (X86CC) {
default:
return false;
case X86::COND_B:
case X86::COND_BE:
case X86::COND_E:
case X86::COND_P:
case X86::COND_A:
case X86::COND_AE:
case X86::COND_NE:
case X86::COND_NP:
return true;
}
}
Evan Cheng
committed
/// isFPImmLegal - Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
Evan Cheng
committed
for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
return true;
}
return false;
}
/// isUndefOrInRange - Return true if Val is undef or if its value falls within
/// the specified range (L, H].
static bool isUndefOrInRange(int Val, int Low, int Hi) {
return (Val < 0) || (Val >= Low && Val < Hi);
/// isUndefOrEqual - Val is either less than zero (undef) or equal to the
/// specified value.
static bool isUndefOrEqual(int Val, int CmpVal) {
if (Val < 0 || Val == CmpVal)
return false;
/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference
/// the second operand.
Owen Anderson
committed
static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, EVT VT) {
return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
Owen Anderson
committed
if (VT == MVT::v2f64 || VT == MVT::v2i64)
return (Mask[0] < 2 && Mask[1] < 2);
return false;
bool X86::isPSHUFDMask(ShuffleVectorSDNode *N) {
N->getMask(M);
return ::isPSHUFDMask(M, N->getValueType(0));
}
/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFHW.
Owen Anderson
committed
static bool isPSHUFHWMask(const SmallVectorImpl<int> &Mask, EVT VT) {
Owen Anderson
committed
if (VT != MVT::v8i16)
// Lower quadword copied in order or undef.
for (int i = 0; i != 4; ++i)
if (Mask[i] >= 0 && Mask[i] != i)
for (int i = 4; i != 8; ++i)
if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7))