Newer
Older
setOperationAction(ISD::AND, SVT, Promote);
AddPromotedToType (ISD::AND, SVT, MVT::v4i64);
setOperationAction(ISD::OR, SVT, Promote);
AddPromotedToType (ISD::OR, SVT, MVT::v4i64);
setOperationAction(ISD::XOR, SVT, Promote);
AddPromotedToType (ISD::XOR, SVT, MVT::v4i64);
setOperationAction(ISD::LOAD, SVT, Promote);
AddPromotedToType (ISD::LOAD, SVT, MVT::v4i64);
setOperationAction(ISD::SELECT, SVT, Promote);
AddPromotedToType (ISD::SELECT, SVT, MVT::v4i64);
// SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
// of this type with custom code.
for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; VT++) {
setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT, Custom);
}
// We want to custom lower some of our intrinsics.
Owen Anderson
committed
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
// Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
// handle type legalization for these operations here.
// FIXME: We really should do custom legalization for addition and
// subtraction on x86-32 once PR3203 is fixed. We really can't do much better
// than generic legalization for 64-bit multiplication-with-overflow, though.
for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
// Add/Sub/Mul with overflow operations are custom lowered.
MVT VT = IntVTs[i];
setOperationAction(ISD::SADDO, VT, Custom);
setOperationAction(ISD::UADDO, VT, Custom);
setOperationAction(ISD::SSUBO, VT, Custom);
setOperationAction(ISD::USUBO, VT, Custom);
setOperationAction(ISD::SMULO, VT, Custom);
setOperationAction(ISD::UMULO, VT, Custom);
Eli Friedman
committed
}
// There are no 8-bit 3-address imul/mul instructions
setOperationAction(ISD::SMULO, MVT::i8, Expand);
setOperationAction(ISD::UMULO, MVT::i8, Expand);
if (!Subtarget->is64Bit()) {
// These libcalls are not available in 32-bit.
setLibcallName(RTLIB::SHL_I128, 0);
setLibcallName(RTLIB::SRL_I128, 0);
setLibcallName(RTLIB::SRA_I128, 0);
}
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::BUILD_VECTOR);
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::SINT_TO_FP);
Evan Cheng
committed
if (Subtarget->is64Bit())
setTargetDAGCombine(ISD::MUL);
computeRegisterProperties();
// On Darwin, -Os means optimize for size without hurting performance,
// do not reduce the limit.
maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
Evan Cheng
committed
setPrefLoopAlignment(16);
Evan Cheng
committed
benefitFromCodePlacementOpt = true;
Eli Friedman
committed
setPrefFunctionAlignment(4);
}
Owen Anderson
committed
MVT::SimpleValueType X86TargetLowering::getSetCCResultType(EVT VT) const {
return MVT::i8;
}
/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
/// the desired ByVal argument alignment.
static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
if (MaxAlign == 16)
return;
if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
if (VTy->getBitWidth() == 128)
MaxAlign = 16;
} else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
unsigned EltAlign = 0;
getMaxByValAlign(ATy->getElementType(), EltAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
} else if (StructType *STy = dyn_cast<StructType>(Ty)) {
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
unsigned EltAlign = 0;
getMaxByValAlign(STy->getElementType(i), EltAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
if (MaxAlign == 16)
break;
}
}
return;
}
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. For X86, aggregates
/// that contain SSE vectors are placed at 16-byte boundaries while the rest
/// are at 4-byte boundaries.
unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
if (Subtarget->is64Bit()) {
// Max of 8 and alignment of type.
unsigned TyAlign = TD->getABITypeAlignment(Ty);
if (TyAlign > 8)
return TyAlign;
return 8;
}
unsigned Align = 4;
Nate Begeman
committed
if (Subtarget->hasXMM())
getMaxByValAlign(Ty, Align);
return Align;
}
Evan Cheng
committed
/// getOptimalMemOpType - Returns the target specific optimal type for load
Evan Cheng
committed
/// and store operations as a result of memset, memcpy, and memmove
/// lowering. If DstAlign is zero that means it's safe to destination
/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
/// means there isn't a need to check it against alignment requirement,
/// probably because the source does not need to be loaded. If
/// 'NonScalarIntSafe' is true, that means it's safe to return a
/// non-scalar-integer type, e.g. empty string source, constant, or loaded
/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
/// constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
Owen Anderson
committed
EVT
X86TargetLowering::getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
bool NonScalarIntSafe,
Evan Cheng
committed
bool MemcpyStrSrc,
MachineFunction &MF) const {
// FIXME: This turns off use of xmm stores for memset/memcpy on targets like
// linux. This is because the stack realignment code can't handle certain
// cases like PR2962. This should be removed when PR2962 is fixed.
const Function *F = MF.getFunction();
if (NonScalarIntSafe &&
!F->hasFnAttr(Attribute::NoImplicitFloat)) {
(Subtarget->isUnalignedMemAccessFast() ||
((DstAlign == 0 || DstAlign >= 16) &&
(SrcAlign == 0 || SrcAlign >= 16))) &&
Subtarget->getStackAlignment() >= 16) {
if (Subtarget->hasSSE2())
return MVT::v4i32;
if (Subtarget->hasSSE1())
Evan Cheng
committed
} else if (!MemcpyStrSrc && Size >= 8 &&
!Subtarget->is64Bit() &&
Subtarget->getStackAlignment() >= 8 &&
Nate Begeman
committed
Subtarget->hasXMMInt()) {
Evan Cheng
committed
// Do not use f64 to lower memcpy if source is string constant. It's
// better to use i32 to avoid the loads.
Evan Cheng
committed
}
}
Evan Cheng
committed
if (Subtarget->is64Bit() && Size >= 8)
Owen Anderson
committed
return MVT::i64;
return MVT::i32;
Evan Cheng
committed
}
/// getJumpTableEncoding - Return the entry encoding for a jump table in the
/// current function. The returned value is a member of the
/// MachineJumpTableInfo::JTEntryKind enum.
unsigned X86TargetLowering::getJumpTableEncoding() const {
// In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
// symbol.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT())
return MachineJumpTableInfo::EK_Custom32;
// Otherwise, use the normal jump table encoding heuristics.
return TargetLowering::getJumpTableEncoding();
}
const MCExpr *
X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB,
unsigned uid,MCContext &Ctx) const{
assert(getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT());
// In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
// entries.
return MCSymbolRefExpr::Create(MBB->getSymbol(),
MCSymbolRefExpr::VK_GOTOFF, Ctx);
}
/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
/// jumptable.
SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
SelectionDAG &DAG) const {
Chris Lattner
committed
if (!Subtarget->is64Bit())
// This doesn't have DebugLoc associated with it, but is not really the
// same as a Register.
return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), getPointerTy());
/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
/// MCExpr.
const MCExpr *X86TargetLowering::
getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
MCContext &Ctx) const {
// X86-64 uses RIP relative addressing based on the jump table label.
if (Subtarget->isPICStyleRIPRel())
return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
// Otherwise, the reference is relative to the PIC base.
return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
Anton Korobeynikov
committed
// FIXME: Why this routine is here? Move to RegInfo!
std::pair<const TargetRegisterClass*, uint8_t>
X86TargetLowering::findRepresentativeClass(EVT VT) const{
const TargetRegisterClass *RRC = 0;
uint8_t Cost = 1;
switch (VT.getSimpleVT().SimpleTy) {
default:
return TargetLowering::findRepresentativeClass(VT);
case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
RRC = (Subtarget->is64Bit()
? X86::GR64RegisterClass : X86::GR32RegisterClass);
break;
RRC = X86::VR64RegisterClass;
break;
case MVT::f32: case MVT::f64:
case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
case MVT::v4f32: case MVT::v2f64:
case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
case MVT::v4f64:
RRC = X86::VR128RegisterClass;
break;
}
return std::make_pair(RRC, Cost);
}
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
unsigned &Offset) const {
if (!Subtarget->isTargetLinux())
return false;
if (Subtarget->is64Bit()) {
// %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
Offset = 0x28;
if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
AddressSpace = 256;
else
AddressSpace = 257;
} else {
// %gs:0x14 on i386
Offset = 0x14;
AddressSpace = 256;
}
return true;
}
//===----------------------------------------------------------------------===//
// Return Value Calling Convention Implementation
//===----------------------------------------------------------------------===//
#include "X86GenCallingConv.inc"
X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
MachineFunction &MF, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
Kenneth Uildriks
committed
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC_X86);
Kenneth Uildriks
committed
}
SDValue
X86TargetLowering::LowerReturn(SDValue Chain,
Sandeep Patel
committed
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
Chris Lattner
committed
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
// Add the regs to the liveout set for the function.
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
for (unsigned i = 0; i != RVLocs.size(); ++i)
if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg()))
MRI.addLiveOut(RVLocs[i].getLocReg());
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
// Operand #1 = Bytes To Pop
RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(),
MVT::i16));
// Copy the result values into the output registers.
Chris Lattner
committed
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
SDValue ValToCopy = OutVals[i];
EVT ValVT = ValToCopy.getValueType();
// If this is x86-64, and we disabled SSE, we can't return FP values,
// or SSE or MMX vectors.
if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
Nate Begeman
committed
(Subtarget->is64Bit() && !Subtarget->hasXMM())) {
report_fatal_error("SSE register return with SSE disabled");
}
// Likewise we can't return F64 values with SSE1 only. gcc does so, but
// llvm-gcc has never done it right and no one has noticed, so this
// should be OK for now.
if (ValVT == MVT::f64 &&
Nate Begeman
committed
(Subtarget->is64Bit() && !Subtarget->hasXMMInt()))
report_fatal_error("SSE2 register return with SSE2 disabled");
// Returns in ST0/ST1 are handled specially: these are pushed as operands to
// the RET instruction and handled by the FP Stackifier.
if (VA.getLocReg() == X86::ST0 ||
VA.getLocReg() == X86::ST1) {
// If this is a copy from an xmm register to ST(0), use an FPExtend to
// change the value to the FP stack register class.
if (isScalarFPTypeInSSEReg(VA.getValVT()))
Owen Anderson
committed
ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
RetOps.push_back(ValToCopy);
// Don't emit a copytoreg.
continue;
}
// 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
// which is returned in RAX / RDX.
if (Subtarget->is64Bit()) {
if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
Wesley Peck
committed
ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy);
ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
ValToCopy);
// If we don't have SSE2 available, convert to v4f32 so the generated
// register is legal.
if (!Subtarget->hasSSE2())
Wesley Peck
committed
ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
Flag = Chain.getValue(1);
}
// The x86-64 ABI for returning structs by value requires that we copy
// the sret argument into %rax for the return. We saved the argument into
// a virtual register in the entry block, so now we copy the value out
// and into %rax.
if (Subtarget->is64Bit() &&
DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
unsigned Reg = FuncInfo->getSRetReturnReg();
"SRetReturnReg should have been set in LowerFormalArguments().");
SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag);
Flag = Chain.getValue(1);
// RAX now acts like a return value.
MRI.addLiveOut(X86::RAX);
RetOps[0] = Chain; // Update chain.
// Add the flag if we have it.
Gabor Greif
committed
if (Flag.getNode())
RetOps.push_back(Flag);
return DAG.getNode(X86ISD::RET_FLAG, dl,
Owen Anderson
committed
MVT::Other, &RetOps[0], RetOps.size());
}
bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const {
if (N->getNumValues() != 1)
return false;
if (!N->hasNUsesOfValue(1, 0))
return false;
SDNode *Copy = *N->use_begin();
if (Copy->getOpcode() != ISD::CopyToReg &&
Copy->getOpcode() != ISD::FP_EXTEND)
return false;
bool HasRet = false;
for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
UI != UE; ++UI) {
if (UI->getOpcode() != X86ISD::RET_FLAG)
return false;
HasRet = true;
}
return HasRet;
}
EVT
X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
ISD::NodeType ExtendKind) const {
Cameron Zwarich
committed
// TODO: Is this also valid on 32-bit?
if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
ReturnMVT = MVT::i8;
else
ReturnMVT = MVT::i32;
EVT MinVT = getRegisterType(Context, ReturnMVT);
return VT.bitsLT(MinVT) ? MinVT : VT;
Cameron Zwarich
committed
}
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
///
SDValue
X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
Sandeep Patel
committed
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
// Assign locations to each value returned by this call.
Chris Lattner
committed
SmallVector<CCValAssign, 16> RVLocs;
Torok Edwin
committed
bool Is64Bit = Subtarget->is64Bit();
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
// Copy all of the result registers out of their specified physreg.
Chris Lattner
committed
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
Owen Anderson
committed
EVT CopyVT = VA.getValVT();
Torok Edwin
committed
// If this is x86-64, and we disabled SSE, we can't return FP values
Owen Anderson
committed
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
Nate Begeman
committed
((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasXMM())) {
report_fatal_error("SSE register return with SSE disabled");
Torok Edwin
committed
}
Jakob Stoklund Olesen
committed
SDValue Val;
Chris Lattner
committed
// If this is a call to a function that returns an fp value on the floating
Jakob Stoklund Olesen
committed
// point stack, we must guarantee the the value is popped from the stack, so
// a CopyFromReg is not good enough - the copy instruction may be eliminated
// if the return value is not used. We use the FpPOP_RETVAL instruction
Jakob Stoklund Olesen
committed
// instead.
if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) {
// If we prefer to use the value in xmm registers, copy it out as f80 and
// use a truncate to move it from fp stack reg to xmm reg.
if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80;
SDValue Ops[] = { Chain, InFlag };
Chain = SDValue(DAG.getMachineNode(X86::FpPOP_RETVAL, dl, CopyVT,
MVT::Other, MVT::Glue, Ops, 2), 1);
Jakob Stoklund Olesen
committed
Val = Chain.getValue(0);
Jakob Stoklund Olesen
committed
// Round the f80 to the right size, which also moves it to the appropriate
// xmm register.
if (CopyVT != VA.getValVT())
Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
// This truncation won't change the value.
DAG.getIntPtrConstant(1));
} else {
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
CopyVT, InFlag).getValue(1);
Val = Chain.getValue(0);
}
Chris Lattner
committed
InFlag = Chain.getValue(2);
}
}
//===----------------------------------------------------------------------===//
// C & StdCall & Fast Calling Convention implementation
//===----------------------------------------------------------------------===//
// StdCall calling convention seems to be standard for many Windows' API
// routines and around. It differs from C calling convention just a little:
// callee should clean up the stack, not caller. Symbols should be also
// decorated in some fancy way :) It doesn't support any vector arguments.
// For info on fast calling convention see Fast Calling Convention (tail call)
// implementation LowerX86_32FastCCCallTo.
/// CallIsStructReturn - Determines whether a call uses struct return
static bool CallIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
if (Outs.empty())
return false;
}
/// ArgsAreStructReturn - Determines whether a function uses struct
static bool
ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
if (Ins.empty())
return false;
}
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
/// by "Src" to address "Dst" with size and alignment information specified by
/// the specific parameter attribute. The copy will be passed as a byval
/// function parameter.
static SDValue
CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
DebugLoc dl) {
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
/*isVolatile*/false, /*AlwaysInline=*/true,
MachinePointerInfo(), MachinePointerInfo());
}
/// IsTailCallConvention - Return true if the calling convention is one that
/// supports tail call optimization.
static bool IsTailCallConvention(CallingConv::ID CC) {
return (CC == CallingConv::Fast || CC == CallingConv::GHC);
}
bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
if (!CI->isTailCall())
return false;
CallSite CS(CI);
CallingConv::ID CalleeCC = CS.getCallingConv();
if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
return false;
return true;
}
/// FuncIsMadeTailCallSafe - Return true if the function is being made into
/// a tailcall target by changing its ABI.
static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) {
return GuaranteedTailCallOpt && IsTailCallConvention(CC);
SDValue
X86TargetLowering::LowerMemArgument(SDValue Chain,
Sandeep Patel
committed
CallingConv::ID CallConv,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
const CCValAssign &VA,
MachineFrameInfo *MFI,
unsigned i) const {
// Create the nodes corresponding to a load from this parameter slot.
ISD::ArgFlagsTy Flags = Ins[i].Flags;
bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv);
bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
EVT ValVT;
// If value is passed by pointer we have address passed instead of the value
// itself.
if (VA.getLocInfo() == CCValAssign::Indirect)
ValVT = VA.getLocVT();
else
ValVT = VA.getValVT();
// FIXME: For now, all byval parameter objects are marked mutable. This can be
// changed with more analysis.
// In case of tail call optimization mark all arguments mutable. Since they
// could be overwritten by lowering of arguments in case of a tail call.
Evan Cheng
committed
if (Flags.isByVal()) {
unsigned Bytes = Flags.getByValSize();
if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
Evan Cheng
committed
return DAG.getFrameIndex(FI, getPointerTy());
} else {
int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
VA.getLocMemOffset(), isImmutable);
Evan Cheng
committed
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
return DAG.getLoad(ValVT, dl, Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
Evan Cheng
committed
}
X86TargetLowering::LowerFormalArguments(SDValue Chain,
Sandeep Patel
committed
CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl,
SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals)
const {
MachineFunction &MF = DAG.getMachineFunction();
Anton Korobeynikov
committed
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
const Function* Fn = MF.getFunction();
if (Fn->hasExternalLinkage() &&
Subtarget->isTargetCygMing() &&
Fn->getName() == "main")
FuncInfo->setForceFramePointer(true);
MachineFrameInfo *MFI = MF.getFrameInfo();
bool Is64Bit = Subtarget->is64Bit();
bool IsWin64 = Subtarget->isTargetWin64();
assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
"Var args not supported with calling convention fastcc or ghc");
// Assign locations to all of the incoming arguments.
CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
NAKAMURA Takumi
committed
// Allocate shadow area for Win64
if (IsWin64) {
CCInfo.AllocateStack(32, 8);
}
CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
Anton Korobeynikov
committed
SDValue ArgValue;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
// TODO: If an arg is passed in two places (e.g. reg and stack), skip later
// places.
assert(VA.getValNo() != LastVal &&
"Don't support value assigned to multiple locs yet");
LastVal = VA.getValNo();
Owen Anderson
committed
EVT RegVT = VA.getLocVT();
Owen Anderson
committed
if (RegVT == MVT::i32)
Owen Anderson
committed
else if (Is64Bit && RegVT == MVT::i64)
Owen Anderson
committed
else if (RegVT == MVT::f32)
Owen Anderson
committed
else if (RegVT == MVT::f64)
Bruno Cardoso Lopes
committed
else if (RegVT.isVector() && RegVT.getSizeInBits() == 256)
RC = X86::VR256RegisterClass;
else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
Anton Korobeynikov
committed
RC = X86::VR64RegisterClass;
else
llvm_unreachable("Unknown argument type!");
Devang Patel
committed
unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
// If this is an 8 or 16-bit value, it is really passed promoted to 32
// bits. Insert an assert[sz]ext to capture this, then truncate to the
// right size.
if (VA.getLocInfo() == CCValAssign::SExt)
ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
DAG.getValueType(VA.getValVT()));
else if (VA.getLocInfo() == CCValAssign::ZExt)
ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
Anton Korobeynikov
committed
else if (VA.getLocInfo() == CCValAssign::BCvt)
Wesley Peck
committed
ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
Anton Korobeynikov
committed
if (VA.isExtInLoc()) {
Anton Korobeynikov
committed
// Handle MMX values passed in XMM regs.
if (RegVT.isVector()) {
ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(),
ArgValue);
Anton Korobeynikov
committed
} else
ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
Anton Korobeynikov
committed
// If value is passed via pointer - do a load.
if (VA.getLocInfo() == CCValAssign::Indirect)
ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
MachinePointerInfo(), false, false, 0);
Anton Korobeynikov
committed
// The x86-64 ABI for returning structs by value requires that we copy
// the sret argument into %rax for the return. Save the argument into
// a virtual register so that we can access it from the return points.
if (Is64Bit && MF.getFunction()->hasStructRetAttr()) {
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
unsigned Reg = FuncInfo->getSRetReturnReg();
if (!Reg) {
Owen Anderson
committed
Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
FuncInfo->setSRetReturnReg(Reg);
}
SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
Owen Anderson
committed
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
// Align stack specially for tail calls.
if (FuncIsMadeTailCallSafe(CallConv))
StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
NAKAMURA Takumi
committed
if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
CallConv != CallingConv::X86_ThisCall)) {
Jakob Stoklund Olesen
committed
FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true));
Evan Cheng
committed
}
if (Is64Bit) {
unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
// FIXME: We should really autogenerate these arrays
static const unsigned GPR64ArgRegsWin64[] = {
X86::RCX, X86::RDX, X86::R8, X86::R9
};
static const unsigned GPR64ArgRegs64Bit[] = {
X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
};
static const unsigned XMMArgRegs64Bit[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
const unsigned *GPR64ArgRegs;
unsigned NumXMMRegs = 0;
if (IsWin64) {
// The XMM registers which might contain var arg parameters are shadowed
// in their paired GPR. So we only need to save the GPR to their home
// slots.
TotalNumIntRegs = 4;
GPR64ArgRegs = GPR64ArgRegsWin64;
} else {
TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
GPR64ArgRegs = GPR64ArgRegs64Bit;
NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit, TotalNumXMMRegs);
}
unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
TotalNumIntRegs);
bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
Nate Begeman
committed
assert(!(NumXMMRegs && !Subtarget->hasXMM()) &&
"SSE register cannot be used when SSE is disabled!");
assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) &&
Torok Edwin
committed
"SSE register cannot be used when SSE is disabled!");
Nate Begeman
committed
if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasXMM())
Torok Edwin
committed
// Kernel mode asks for SSE to be disabled, so don't push them
// on the stack.
TotalNumXMMRegs = 0;
Anton Korobeynikov
committed
const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering();
// Get to the caller-allocated home save location. Add 8 to account
// for the return address.
int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
NAKAMURA Takumi
committed
// Fixup to set vararg frame on shadow area (4 x i64).
if (NumIntRegs < 4)
FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
} else {
// For X86-64, if there are vararg parameters that are passed via
// registers, then we must store them to their spots on the stack so they
// may be loaded by deferencing the result of va_next.
FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
FuncInfo->setRegSaveFrameIndex(
MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16,
false));
// Store the integer parameter registers.
SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
getPointerTy());
unsigned Offset = FuncInfo->getVarArgsGPOffset();
for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
DAG.getIntPtrConstant(Offset));
unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
Devang Patel
committed
X86::GR64RegisterClass);
Owen Anderson
committed
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
DAG.getStore(Val.getValue(1), dl, Val, FIN,
MachinePointerInfo::getFixedStack(
FuncInfo->getRegSaveFrameIndex(), Offset),
false, false, 0);
MemOps.push_back(Store);
Offset += 8;
}
if (TotalNumXMMRegs != 0 && NumXMMRegs != TotalNumXMMRegs) {
// Now store the XMM (fp + vector) parameter registers.
SmallVector<SDValue, 11> SaveXMMOps;
SaveXMMOps.push_back(Chain);
Devang Patel
committed
unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass);
SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
SaveXMMOps.push_back(ALVal);
SaveXMMOps.push_back(DAG.getIntPtrConstant(
FuncInfo->getRegSaveFrameIndex()));
SaveXMMOps.push_back(DAG.getIntPtrConstant(
FuncInfo->getVarArgsFPOffset()));
for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs],
Devang Patel
committed
X86::VR128RegisterClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
SaveXMMOps.push_back(Val);
}
MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
MVT::Other,
&SaveXMMOps[0], SaveXMMOps.size()));
}
if (!MemOps.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOps[0], MemOps.size());
// Some CCs need callee pop.
if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt)) {
FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
} else {
FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
// If this is an sret function, the return should pop the hidden pointer.
if (!Is64Bit && !IsTailCallConvention(CallConv) && ArgsAreStructReturn(Ins))
FuncInfo->setBytesToPopOnReturn(4);
if (!Is64Bit) {
// RegSaveFrameIndex is X86-64 only.
FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
if (CallConv == CallingConv::X86_FastCall ||
CallConv == CallingConv::X86_ThisCall)
// fastcc functions can't have varargs.
FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
}
}
X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
SDValue StackPtr, SDValue Arg,
DebugLoc dl, SelectionDAG &DAG,
const CCValAssign &VA,
ISD::ArgFlagsTy Flags) const {
NAKAMURA Takumi
committed
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
if (Flags.isByVal())
return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
return DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo::getStack(LocMemOffset),
}
/// EmitTailCallLoadRetAddr - Emit a load of return address if tail call
/// optimization is performed and it is required.
SDValue
X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
SDValue &OutRetAddr, SDValue Chain,
bool IsTailCall, bool Is64Bit,
int FPDiff, DebugLoc dl) const {
// Adjust the Return address stack slot.
Owen Anderson
committed
EVT VT = getPointerTy();
OutRetAddr = getReturnAddressFrameIndex(DAG);
// Load the "old" Return address.
OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
false, false, 0);
Gabor Greif
committed
return SDValue(OutRetAddr.getNode(), 1);
}
/// EmitTailCallStoreRetAddr - Emit a store of the return address if tail call
/// optimization is performed and it is required (FPDiff!=0).
static SDValue
EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
bool Is64Bit, int FPDiff, DebugLoc dl) {
// Store the return address to the appropriate stack slot.
if (!FPDiff) return Chain;
// Calculate the new stack slot for the return address.
int SlotSize = Is64Bit ? 8 : 4;
int NewReturnAddrFI =
MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false);
Owen Anderson
committed
EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
MachinePointerInfo::getFixedStack(NewReturnAddrFI),
return Chain;
}
Sandeep Patel
committed
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
bool Is64Bit = Subtarget->is64Bit();
NAKAMURA Takumi
committed
bool IsWin64 = Subtarget->isTargetWin64();
bool IsStructRet = CallIsStructReturn(Outs);
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
Outs, OutVals, Ins, DAG);
// Sibcalls are automatically detected tailcalls which do not require
// ABI changes.
if (!GuaranteedTailCallOpt && isTailCall)
if (isTailCall)
++NumTailCalls;
assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
"Var args not supported with calling convention fastcc or ghc");
// Analyze operands of the call, assigning locations to each operand.
CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
NAKAMURA Takumi
committed
// Allocate shadow area for Win64
if (IsWin64) {
CCInfo.AllocateStack(32, 8);
}
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
if (IsSibcall)
// This is a sibcall. The memory operands are available in caller's
// own caller's stack.
NumBytes = 0;
else if (GuaranteedTailCallOpt && IsTailCallConvention(CallConv))
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
int FPDiff = 0;
if (isTailCall && !IsSibcall) {
// Lower arguments at fp - stackoffset + fpdiff.
unsigned NumBytesCallerPushed =
MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
FPDiff = NumBytesCallerPushed - NumBytes;
// Set the delta of movement of the returnaddr stackslot.