Newer
Older
Mon P Wang
committed
// trap. Do a custom widening for these operations in which we never
// generate more divides/remainder than the original vector width.
for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
if (!isTypeLegal((MVT::SimpleValueType)VT)) {
setOperationAction(ISD::SDIV, (MVT::SimpleValueType) VT, Custom);
setOperationAction(ISD::UDIV, (MVT::SimpleValueType) VT, Custom);
setOperationAction(ISD::SREM, (MVT::SimpleValueType) VT, Custom);
setOperationAction(ISD::UREM, (MVT::SimpleValueType) VT, Custom);
}
}
// FIXME: These should be based on subtarget info. Plus, the values should
// be smaller when we are in optimizing for size mode.
maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
maxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores
maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores
Evan Cheng
committed
setPrefLoopAlignment(16);
Evan Cheng
committed
benefitFromCodePlacementOpt = true;
}
Owen Anderson
committed
MVT::SimpleValueType X86TargetLowering::getSetCCResultType(EVT VT) const {
return MVT::i8;
}
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
/// the desired ByVal argument alignment.
static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) {
if (MaxAlign == 16)
return;
if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
if (VTy->getBitWidth() == 128)
MaxAlign = 16;
} else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
unsigned EltAlign = 0;
getMaxByValAlign(ATy->getElementType(), EltAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
} else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
unsigned EltAlign = 0;
getMaxByValAlign(STy->getElementType(i), EltAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
if (MaxAlign == 16)
break;
}
}
return;
}
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. For X86, aggregates
/// that contain SSE vectors are placed at 16-byte boundaries while the rest
/// are at 4-byte boundaries.
unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
if (Subtarget->is64Bit()) {
// Max of 8 and alignment of type.
unsigned TyAlign = TD->getABITypeAlignment(Ty);
if (TyAlign > 8)
return TyAlign;
return 8;
}
unsigned Align = 4;
if (Subtarget->hasSSE1())
getMaxByValAlign(Ty, Align);
return Align;
}
Evan Cheng
committed
/// getOptimalMemOpType - Returns the target specific optimal type for load
/// and store operations as a result of memset, memcpy, and memmove
Owen Anderson
committed
/// lowering. It returns MVT::iAny if SelectionDAG should be responsible for
Evan Cheng
committed
/// determining it.
Owen Anderson
committed
EVT
Evan Cheng
committed
X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
bool isSrcConst, bool isSrcStr,
SelectionDAG &DAG) const {
// FIXME: This turns off use of xmm stores for memset/memcpy on targets like
// linux. This is because the stack realignment code can't handle certain
// cases like PR2962. This should be removed when PR2962 is fixed.
const Function *F = DAG.getMachineFunction().getFunction();
bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
if (!NoImplicitFloatOps && Subtarget->getStackAlignment() >= 16) {
if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16)
Owen Anderson
committed
return MVT::v4i32;
if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16)
Owen Anderson
committed
return MVT::v4f32;
}
Evan Cheng
committed
if (Subtarget->is64Bit() && Size >= 8)
Owen Anderson
committed
return MVT::i64;
return MVT::i32;
Evan Cheng
committed
}
/// getJumpTableEncoding - Return the entry encoding for a jump table in the
/// current function. The returned value is a member of the
/// MachineJumpTableInfo::JTEntryKind enum.
unsigned X86TargetLowering::getJumpTableEncoding() const {
// In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
// symbol.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT())
return MachineJumpTableInfo::EK_Custom32;
// Otherwise, use the normal jump table encoding heuristics.
return TargetLowering::getJumpTableEncoding();
}
const MCExpr *
X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB,
unsigned uid,MCContext &Ctx) const{
assert(getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT());
// In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
// entries.
// FIXME: @GOTOFF should be a property of MCSymbolRefExpr not in the MCSymbol.
std::string Name = MBB->getSymbol(Ctx)->getName() + "@GOTOFF";
return MCSymbolRefExpr::Create(Ctx.GetOrCreateSymbol(StringRef(Name)), Ctx);
}
/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
/// jumptable.
SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
Chris Lattner
committed
if (!Subtarget->is64Bit())
// This doesn't have DebugLoc associated with it, but is not really the
// same as a Register.
return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc::getUnknownLoc(),
getPointerTy());
Bill Wendling
committed
/// getFunctionAlignment - Return the Log2 alignment of this function.
Bill Wendling
committed
unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4;
Bill Wendling
committed
}
//===----------------------------------------------------------------------===//
// Return Value Calling Convention Implementation
//===----------------------------------------------------------------------===//
#include "X86GenCallingConv.inc"
Kenneth Uildriks
committed
bool
X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<EVT> &OutTys,
const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
SelectionDAG &DAG) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
RVLocs, *DAG.getContext());
return CCInfo.CheckReturn(OutTys, ArgsFlags, RetCC_X86);
}
SDValue
X86TargetLowering::LowerReturn(SDValue Chain,
Sandeep Patel
committed
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
DebugLoc dl, SelectionDAG &DAG) {
Chris Lattner
committed
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
// If this is the first return lowered for this function, add the regs to the
// liveout set for the function.
if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
Chris Lattner
committed
for (unsigned i = 0; i != RVLocs.size(); ++i)
if (RVLocs[i].isRegLoc())
DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
}
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
// Operand #1 = Bytes To Pop
RetOps.push_back(DAG.getTargetConstant(getBytesToPopOnReturn(), MVT::i16));
// Copy the result values into the output registers.
Chris Lattner
committed
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
SDValue ValToCopy = Outs[i].Val;
// Returns in ST0/ST1 are handled specially: these are pushed as operands to
// the RET instruction and handled by the FP Stackifier.
if (VA.getLocReg() == X86::ST0 ||
VA.getLocReg() == X86::ST1) {
// If this is a copy from an xmm register to ST(0), use an FPExtend to
// change the value to the FP stack register class.
if (isScalarFPTypeInSSEReg(VA.getValVT()))
Owen Anderson
committed
ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
RetOps.push_back(ValToCopy);
// Don't emit a copytoreg.
continue;
}
// 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
// which is returned in RAX / RDX.
if (Subtarget->is64Bit()) {
Owen Anderson
committed
EVT ValVT = ValToCopy.getValueType();
if (ValVT.isVector() && ValVT.getSizeInBits() == 64) {
Owen Anderson
committed
ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy);
if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1)
Owen Anderson
committed
ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, ValToCopy);
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
Flag = Chain.getValue(1);
}
// The x86-64 ABI for returning structs by value requires that we copy
// the sret argument into %rax for the return. We saved the argument into
// a virtual register in the entry block, so now we copy the value out
// and into %rax.
if (Subtarget->is64Bit() &&
DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
unsigned Reg = FuncInfo->getSRetReturnReg();
if (!Reg) {
Owen Anderson
committed
Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
FuncInfo->setSRetReturnReg(Reg);
}
SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag);
Flag = Chain.getValue(1);
// RAX now acts like a return value.
MF.getRegInfo().addLiveOut(X86::RAX);
RetOps[0] = Chain; // Update chain.
// Add the flag if we have it.
Gabor Greif
committed
if (Flag.getNode())
RetOps.push_back(Flag);
return DAG.getNode(X86ISD::RET_FLAG, dl,
Owen Anderson
committed
MVT::Other, &RetOps[0], RetOps.size());
}
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
///
SDValue
X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
Sandeep Patel
committed
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) {
// Assign locations to each value returned by this call.
Chris Lattner
committed
SmallVector<CCValAssign, 16> RVLocs;
Torok Edwin
committed
bool Is64Bit = Subtarget->is64Bit();
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
// Copy all of the result registers out of their specified physreg.
Chris Lattner
committed
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
Owen Anderson
committed
EVT CopyVT = VA.getValVT();
Torok Edwin
committed
// If this is x86-64, and we disabled SSE, we can't return FP values
Owen Anderson
committed
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
llvm_report_error("SSE register return with SSE disabled");
Torok Edwin
committed
}
Chris Lattner
committed
// If this is a call to a function that returns an fp value on the floating
// point stack, but where we prefer to use the value in xmm registers, copy
// it out as F80 and use a truncate to move it from fp stack reg to xmm reg.
if ((VA.getLocReg() == X86::ST0 ||
VA.getLocReg() == X86::ST1) &&
isScalarFPTypeInSSEReg(VA.getValVT())) {
Owen Anderson
committed
CopyVT = MVT::f80;
}
SDValue Val;
if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) {
// For x86-64, MMX values are returned in XMM0 / XMM1 except for v1i64.
if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
Owen Anderson
committed
MVT::v2i64, InFlag).getValue(1);
Val = Chain.getValue(0);
Owen Anderson
committed
Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
Val, DAG.getConstant(0, MVT::i64));
} else {
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
Owen Anderson
committed
MVT::i64, InFlag).getValue(1);
Val = Chain.getValue(0);
}
Val = DAG.getNode(ISD::BIT_CONVERT, dl, CopyVT, Val);
} else {
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
CopyVT, InFlag).getValue(1);
Val = Chain.getValue(0);
}
Chris Lattner
committed
InFlag = Chain.getValue(2);
if (CopyVT != VA.getValVT()) {
Chris Lattner
committed
// Round the F80 the right size, which also moves to the appropriate xmm
// register.
Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
Chris Lattner
committed
// This truncation won't change the value.
DAG.getIntPtrConstant(1));
}
}
}
//===----------------------------------------------------------------------===//
// C & StdCall & Fast Calling Convention implementation
//===----------------------------------------------------------------------===//
// StdCall calling convention seems to be standard for many Windows' API
// routines and around. It differs from C calling convention just a little:
// callee should clean up the stack, not caller. Symbols should be also
// decorated in some fancy way :) It doesn't support any vector arguments.
// For info on fast calling convention see Fast Calling Convention (tail call)
// implementation LowerX86_32FastCCCallTo.
/// CallIsStructReturn - Determines whether a call uses struct return
static bool CallIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
if (Outs.empty())
return false;
}
/// ArgsAreStructReturn - Determines whether a function uses struct
static bool
ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
if (Ins.empty())
return false;
}
/// IsCalleePop - Determines whether the callee is required to pop its
/// own arguments. Callee pop is necessary to support tail calls.
Sandeep Patel
committed
bool X86TargetLowering::IsCalleePop(bool IsVarArg, CallingConv::ID CallingConv){
if (IsVarArg)
return false;
switch (CallingConv) {
default:
return false;
case CallingConv::X86_StdCall:
return !Subtarget->is64Bit();
case CallingConv::X86_FastCall:
return !Subtarget->is64Bit();
case CallingConv::Fast:
return PerformTailCallOpt;
}
}
/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
/// given CallingConvention value.
Sandeep Patel
committed
CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
if (Subtarget->is64Bit()) {
if (Subtarget->isTargetWin64())
return CC_X86_Win64_C;
if (CC == CallingConv::X86_FastCall)
return CC_X86_32_FastCall;
else if (CC == CallingConv::Fast)
return CC_X86_32_FastCC;
else
return CC_X86_32_C;
}
/// NameDecorationForCallConv - Selects the appropriate decoration to
/// apply to a MachineFunction containing a given calling convention.
NameDecorationStyle
Sandeep Patel
committed
X86TargetLowering::NameDecorationForCallConv(CallingConv::ID CallConv) {
if (CallConv == CallingConv::X86_FastCall)
return FastCall;
else if (CallConv == CallingConv::X86_StdCall)
return StdCall;
return None;
}
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
/// by "Src" to address "Dst" with size and alignment information specified by
/// the specific parameter attribute. The copy will be passed as a byval
/// function parameter.
static SDValue
CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
DebugLoc dl) {
Owen Anderson
committed
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
/*AlwaysInline=*/true, NULL, 0, NULL, 0);
}
SDValue
X86TargetLowering::LowerMemArgument(SDValue Chain,
Sandeep Patel
committed
CallingConv::ID CallConv,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
const CCValAssign &VA,
MachineFrameInfo *MFI,
unsigned i) {
// Create the nodes corresponding to a load from this parameter slot.
ISD::ArgFlagsTy Flags = Ins[i].Flags;
bool AlwaysUseMutable = (CallConv==CallingConv::Fast) && PerformTailCallOpt;
bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
EVT ValVT;
// If value is passed by pointer we have address passed instead of the value
// itself.
if (VA.getLocInfo() == CCValAssign::Indirect)
ValVT = VA.getLocVT();
else
ValVT = VA.getValVT();
// FIXME: For now, all byval parameter objects are marked mutable. This can be
// changed with more analysis.
// In case of tail call optimization mark all arguments mutable. Since they
// could be overwritten by lowering of arguments in case of a tail call.
int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
return DAG.getLoad(ValVT, dl, Chain, FIN,
PseudoSourceValue::getFixedStack(FI), 0);
X86TargetLowering::LowerFormalArguments(SDValue Chain,
Sandeep Patel
committed
CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl,
SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) {
MachineFunction &MF = DAG.getMachineFunction();
Anton Korobeynikov
committed
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
const Function* Fn = MF.getFunction();
if (Fn->hasExternalLinkage() &&
Subtarget->isTargetCygMing() &&
Fn->getName() == "main")
FuncInfo->setForceFramePointer(true);
// Decorate the function name.
FuncInfo->setDecorationStyle(NameDecorationForCallConv(CallConv));
MachineFrameInfo *MFI = MF.getFrameInfo();
bool Is64Bit = Subtarget->is64Bit();
bool IsWin64 = Subtarget->isTargetWin64();
assert(!(isVarArg && CallConv == CallingConv::Fast) &&
"Var args not supported with calling convention fastcc");
// Assign locations to all of the incoming arguments.
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
ArgLocs, *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv));
Anton Korobeynikov
committed
SDValue ArgValue;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
// TODO: If an arg is passed in two places (e.g. reg and stack), skip later
// places.
assert(VA.getValNo() != LastVal &&
"Don't support value assigned to multiple locs yet");
LastVal = VA.getValNo();
Owen Anderson
committed
EVT RegVT = VA.getLocVT();
Owen Anderson
committed
if (RegVT == MVT::i32)
Owen Anderson
committed
else if (Is64Bit && RegVT == MVT::i64)
Owen Anderson
committed
else if (RegVT == MVT::f32)
Owen Anderson
committed
else if (RegVT == MVT::f64)
else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
Anton Korobeynikov
committed
else if (RegVT.isVector() && RegVT.getSizeInBits() == 64)
RC = X86::VR64RegisterClass;
else
llvm_unreachable("Unknown argument type!");
ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
// If this is an 8 or 16-bit value, it is really passed promoted to 32
// bits. Insert an assert[sz]ext to capture this, then truncate to the
// right size.
if (VA.getLocInfo() == CCValAssign::SExt)
ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
DAG.getValueType(VA.getValVT()));
else if (VA.getLocInfo() == CCValAssign::ZExt)
ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
Anton Korobeynikov
committed
else if (VA.getLocInfo() == CCValAssign::BCvt)
ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
Anton Korobeynikov
committed
if (VA.isExtInLoc()) {
Anton Korobeynikov
committed
// Handle MMX values passed in XMM regs.
if (RegVT.isVector()) {
Owen Anderson
committed
ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
ArgValue, DAG.getConstant(0, MVT::i64));
Anton Korobeynikov
committed
ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
} else
ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
Anton Korobeynikov
committed
// If value is passed via pointer - do a load.
if (VA.getLocInfo() == CCValAssign::Indirect)
ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0);
Anton Korobeynikov
committed
// The x86-64 ABI for returning structs by value requires that we copy
// the sret argument into %rax for the return. Save the argument into
// a virtual register so that we can access it from the return points.
if (Is64Bit && MF.getFunction()->hasStructRetAttr()) {
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
unsigned Reg = FuncInfo->getSRetReturnReg();
if (!Reg) {
Owen Anderson
committed
Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
FuncInfo->setSRetReturnReg(Reg);
}
SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
Owen Anderson
committed
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
// align stack specially for tail calls
if (PerformTailCallOpt && CallConv == CallingConv::Fast)
StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
if (Is64Bit || CallConv != CallingConv::X86_FastCall) {
Evan Cheng
committed
}
if (Is64Bit) {
unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
// FIXME: We should really autogenerate these arrays
static const unsigned GPR64ArgRegsWin64[] = {
X86::RCX, X86::RDX, X86::R8, X86::R9
};
static const unsigned XMMArgRegsWin64[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
};
static const unsigned GPR64ArgRegs64Bit[] = {
X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
};
static const unsigned XMMArgRegs64Bit[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
const unsigned *GPR64ArgRegs, *XMMArgRegs;
if (IsWin64) {
TotalNumIntRegs = 4; TotalNumXMMRegs = 4;
GPR64ArgRegs = GPR64ArgRegsWin64;
XMMArgRegs = XMMArgRegsWin64;
} else {
TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
GPR64ArgRegs = GPR64ArgRegs64Bit;
XMMArgRegs = XMMArgRegs64Bit;
}
unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
TotalNumIntRegs);
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs,
TotalNumXMMRegs);
bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
"SSE register cannot be used when SSE is disabled!");
assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) &&
Torok Edwin
committed
"SSE register cannot be used when SSE is disabled!");
if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1())
Torok Edwin
committed
// Kernel mode asks for SSE to be disabled, so don't push them
// on the stack.
TotalNumXMMRegs = 0;
// For X86-64, if there are vararg parameters that are passed via
// registers, then we must store them to their spots on the stack so they
// may be loaded by deferencing the result of va_next.
VarArgsGPOffset = NumIntRegs * 8;
VarArgsFPOffset = TotalNumIntRegs * 8 + NumXMMRegs * 16;
RegSaveFrameIndex = MFI->CreateStackObject(TotalNumIntRegs * 8 +
// Store the integer parameter registers.
SmallVector<SDValue, 8> MemOps;
SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
unsigned Offset = VarArgsGPOffset;
for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
DAG.getIntPtrConstant(Offset));
unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
X86::GR64RegisterClass);
Owen Anderson
committed
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
DAG.getStore(Val.getValue(1), dl, Val, FIN,
PseudoSourceValue::getFixedStack(RegSaveFrameIndex),
Offset);
MemOps.push_back(Store);
Offset += 8;
}
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
if (TotalNumXMMRegs != 0 && NumXMMRegs != TotalNumXMMRegs) {
// Now store the XMM (fp + vector) parameter registers.
SmallVector<SDValue, 11> SaveXMMOps;
SaveXMMOps.push_back(Chain);
unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass);
SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
SaveXMMOps.push_back(ALVal);
SaveXMMOps.push_back(DAG.getIntPtrConstant(RegSaveFrameIndex));
SaveXMMOps.push_back(DAG.getIntPtrConstant(VarArgsFPOffset));
for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs],
X86::VR128RegisterClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
SaveXMMOps.push_back(Val);
}
MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
MVT::Other,
&SaveXMMOps[0], SaveXMMOps.size()));
}
if (!MemOps.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOps[0], MemOps.size());
// Some CCs need callee pop.
if (IsCalleePop(isVarArg, CallConv)) {
BytesToPopOnReturn = StackSize; // Callee pops everything.
BytesCallerReserves = 0;
} else {
BytesToPopOnReturn = 0; // Callee pops nothing.
// If this is an sret function, the return should pop the hidden pointer.
if (!Is64Bit && CallConv != CallingConv::Fast && ArgsAreStructReturn(Ins))
BytesToPopOnReturn = 4;
BytesCallerReserves = StackSize;
}
if (!Is64Bit) {
RegSaveFrameIndex = 0xAAAAAAA; // RegSaveFrameIndex is X86-64 only.
if (CallConv == CallingConv::X86_FastCall)
VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs.
}
Anton Korobeynikov
committed
FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
}
X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
SDValue StackPtr, SDValue Arg,
DebugLoc dl, SelectionDAG &DAG,
const CCValAssign &VA,
Anton Korobeynikov
committed
const unsigned FirstStackArgOffset = (Subtarget->isTargetWin64() ? 32 : 0);
unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
}
return DAG.getStore(Chain, dl, Arg, PtrOff,
PseudoSourceValue::getStack(), LocMemOffset);
}
/// EmitTailCallLoadRetAddr - Emit a load of return address if tail call
/// optimization is performed and it is required.
SDValue
X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
SDValue Chain,
bool IsTailCall,
bool Is64Bit,
int FPDiff,
DebugLoc dl) {
if (!IsTailCall || FPDiff==0) return Chain;
// Adjust the Return address stack slot.
Owen Anderson
committed
EVT VT = getPointerTy();
OutRetAddr = getReturnAddressFrameIndex(DAG);
// Load the "old" Return address.
OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0);
Gabor Greif
committed
return SDValue(OutRetAddr.getNode(), 1);
}
/// EmitTailCallStoreRetAddr - Emit a store of the return adress if tail call
/// optimization is performed and it is required (FPDiff!=0).
static SDValue
EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
bool Is64Bit, int FPDiff, DebugLoc dl) {
// Store the return address to the appropriate stack slot.
if (!FPDiff) return Chain;
// Calculate the new stack slot for the return address.
int SlotSize = Is64Bit ? 8 : 4;
int NewReturnAddrFI =
MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize,
true, false);
Owen Anderson
committed
EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0);
return Chain;
}
SDValue
X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Sandeep Patel
committed
CallingConv::ID CallConv, bool isVarArg,
bool isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) {
MachineFunction &MF = DAG.getMachineFunction();
bool Is64Bit = Subtarget->is64Bit();
bool IsStructRet = CallIsStructReturn(Outs);
assert((!isTailCall ||
(CallConv == CallingConv::Fast && PerformTailCallOpt)) &&
"IsEligibleForTailCallOptimization missed a case!");
assert(!(isVarArg && CallConv == CallingConv::Fast) &&
"Var args not supported with calling convention fastcc");
// Analyze operands of the call, assigning locations to each operand.
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
ArgLocs, *DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
if (PerformTailCallOpt && CallConv == CallingConv::Fast)
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
int FPDiff = 0;
// Lower arguments at fp - stackoffset + fpdiff.
unsigned NumBytesCallerPushed =
MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
FPDiff = NumBytesCallerPushed - NumBytes;
// Set the delta of movement of the returnaddr stackslot.
// But only set if delta is greater than previous delta.
if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta()))
MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
}
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
// Load return adress for tail calls.
Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, Is64Bit,
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
SDValue StackPtr;
// Walk the register/memloc assignments, inserting copies/loads. In the case
// of tail call optimization arguments are handle later.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
Owen Anderson
committed
EVT RegVT = VA.getLocVT();
SDValue Arg = Outs[i].Val;
ISD::ArgFlagsTy Flags = Outs[i].Flags;
bool isByVal = Flags.isByVal();
// Promote the value if needed.
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::SExt:
Anton Korobeynikov
committed
Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
Anton Korobeynikov
committed
Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
Anton Korobeynikov
committed
if (RegVT.isVector() && RegVT.getSizeInBits() == 128) {
// Special case: passing MMX values in XMM registers.
Owen Anderson
committed
Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
Anton Korobeynikov
committed
} else
Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
break;
case CCValAssign::BCvt:
Arg = DAG.getNode(ISD::BIT_CONVERT, dl, RegVT, Arg);
break;
Anton Korobeynikov
committed
case CCValAssign::Indirect: {
// Store the argument.
SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
Anton Korobeynikov
committed
Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
PseudoSourceValue::getFixedStack(FI), 0);
Anton Korobeynikov
committed
Arg = SpillSlot;
break;
}
}
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else {
if (!isTailCall || (isTailCall && isByVal)) {
assert(VA.isMemLoc());
Gabor Greif
committed
if (StackPtr.getNode() == 0)
StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
dl, DAG, VA, Flags));
}
}
}
if (!MemOpChains.empty())
Owen Anderson
committed
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into registers.
// Tail call byval lowering might overwrite argument registers so in case of
// tail call optimization the copies to registers are lowered later.
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
if (Subtarget->isPICStyleGOT()) {
// ELF / PIC requires GOT in the EBX register before function calls via PLT
// GOT pointer.
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg,
DebugLoc::getUnknownLoc(),
getPointerTy()),
InFlag);
InFlag = Chain.getValue(1);
} else {
// If we are tail calling and generating PIC/GOT style code load the
// address of the callee into ECX. The value in ecx is used as target of
// the tail jump. This is done to circumvent the ebx/callee-saved problem
// for tail calls on PIC/GOT architectures. Normally we would just put the
// address of GOT into ebx and then call target@PLT. But for tail calls
// ebx would be restored (since ebx is callee saved) before jumping to the
// target@PLT.
// Note: The actual moving to ECX is done further down.
GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
if (G && !G->getGlobal()->hasHiddenVisibility() &&
!G->getGlobal()->hasProtectedVisibility())
Callee = LowerGlobalAddress(Callee, DAG);
else if (isa<ExternalSymbolSDNode>(Callee))
Callee = LowerExternalSymbol(Callee, DAG);
}
}
if (Is64Bit && isVarArg) {
// From AMD64 ABI document:
// For calls that may call functions that use varargs or stdargs
// (prototype-less calls or calls to functions containing ellipsis (...) in
// the declaration) %al is used as hidden argument to specify the number
// of SSE registers used. The contents of %al do not need to match exactly
// the number of registers, but must be an ubound on the number of SSE
// registers used and is in the range 0 - 8 inclusive.
// FIXME: Verify this on Win64
// Count the number of XMM registers allocated.
static const unsigned XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
assert((Subtarget->hasSSE1() || !NumXMMRegs)
Torok Edwin
committed
&& "SSE registers cannot be used when SSE is disabled");
Chain = DAG.getCopyToReg(Chain, dl, X86::AL,
Owen Anderson
committed
DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
// For tail calls lower the arguments to the 'real' stack slot.
if (isTailCall) {
// Force all the incoming stack arguments to be loaded from the stack
// before any new outgoing arguments are stored to the stack, because the
// outgoing stack slots may alias the incoming argument stack slots, and
// the alias isn't otherwise explicit. This is slightly more conservative
// than necessary, because it means that each store effectively depends
// on every argument instead of just those arguments it would clobber.
SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
SmallVector<SDValue, 8> MemOpChains2;
SDValue FIN;
int FI = 0;
// Do not flag preceeding copytoreg stuff together with the following stuff.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (!VA.isRegLoc()) {
assert(VA.isMemLoc());
SDValue Arg = Outs[i].Val;
ISD::ArgFlagsTy Flags = Outs[i].Flags;
// Create frame index.
int32_t Offset = VA.getLocMemOffset()+FPDiff;
uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
FIN = DAG.getFrameIndex(FI, getPointerTy());
SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
Gabor Greif
committed
if (StackPtr.getNode() == 0)
StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr,
Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
ArgChain,
} else {
MemOpChains2.push_back(
DAG.getStore(ArgChain, dl, Arg, FIN,
PseudoSourceValue::getFixedStack(FI), 0));
}
}
if (!MemOpChains2.empty())
Owen Anderson
committed
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains2[0], MemOpChains2.size());
// Copy arguments to their registers.
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
// Store the return address to the appropriate stack slot.
Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit,
}
bool WasGlobalOrExternal = false;
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
assert(Is64Bit && "Large code model is only legal in 64-bit mode.");