Newer
Older
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
return !Subtarget->is64Bit();
case CallingConv::X86_FastCall:
return !Subtarget->is64Bit();
case CallingConv::Fast:
return PerformTailCallOpt;
}
}
// Selects the correct CCAssignFn for a CALL or FORMAL_ARGUMENTS node.
CCAssignFn *X86TargetLowering::CCAssignFnForNode(SDOperand Op) const {
unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Subtarget->is64Bit())
if (CC == CallingConv::Fast && PerformTailCallOpt)
return CC_X86_64_TailCall;
else
return CC_X86_64_C;
if (CC == CallingConv::X86_FastCall)
return CC_X86_32_FastCall;
else if (CC == CallingConv::Fast && PerformTailCallOpt)
return CC_X86_32_TailCall;
else
return CC_X86_32_C;
}
// Selects the appropriate decoration to apply to a MachineFunction containing a
// given FORMAL_ARGUMENTS node.
NameDecorationStyle
X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDOperand Op) {
unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (CC == CallingConv::X86_FastCall)
return FastCall;
else if (CC == CallingConv::X86_StdCall)
return StdCall;
return None;
}
// IsPossiblyOverwrittenArgumentOfTailCall - Check if the operand could possibly
// be overwritten when lowering the outgoing arguments in a tail call. Currently
// the implementation of this call is very conservative and assumes all
// arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with virtual
// registers would be overwritten by direct lowering.
// Possible improvement:
// Check FORMAL_ARGUMENTS corresponding MERGE_VALUES for CopyFromReg nodes
// indicating inreg passed arguments which also need not be lowered to a safe
// stack slot.
static bool IsPossiblyOverwrittenArgumentOfTailCall(SDOperand Op) {
RegisterSDNode * OpReg = NULL;
if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS ||
(Op.getOpcode()== ISD::CopyFromReg &&
(OpReg = cast<RegisterSDNode>(Op.getOperand(1))) &&
OpReg->getReg() >= TargetRegisterInfo::FirstVirtualRegister))
return true;
return false;
}
// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
// by "Src" to address "Dst" with size and alignment information specified by
// the specific parameter attribute. The copy will be passed as a byval function
// parameter.
static SDOperand
CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain,
unsigned Flags, SelectionDAG &DAG) {
unsigned Align = 1 <<
((Flags & ISD::ParamFlags::ByValAlign) >> ISD::ParamFlags::ByValAlignOffs);
unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >>
ISD::ParamFlags::ByValSizeOffs;
SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32);
return DAG.getMemcpy(Chain, Dst, Src, SizeNode, AlignNode, AlwaysInline);
}
SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG,
const CCValAssign &VA,
MachineFrameInfo *MFI,
SDOperand Root, unsigned i) {
// Create the nodes corresponding to a load from this parameter slot.
unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3 + i))->getValue();
bool isByVal = Flags & ISD::ParamFlags::ByVal;
// FIXME: For now, all byval parameter objects are marked mutable. This
// can be changed with more analysis.
int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
VA.getLocMemOffset(), !isByVal);
SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
return DAG.getLoad(VA.getValVT(), Root, FIN,
PseudoSourceValue::getFixedStack(), FI);
SDOperand
X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
Anton Korobeynikov
committed
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
const Function* Fn = MF.getFunction();
if (Fn->hasExternalLinkage() &&
Subtarget->isTargetCygMing() &&
Fn->getName() == "main")
FuncInfo->setForceFramePointer(true);
// Decorate the function name.
FuncInfo->setDecorationStyle(NameDecorationForFORMAL_ARGUMENTS(Op));
MachineFrameInfo *MFI = MF.getFrameInfo();
SDOperand Root = Op.getOperand(0);
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
unsigned CC = MF.getFunction()->getCallingConv();
bool Is64Bit = Subtarget->is64Bit();
assert(!(isVarArg && CC == CallingConv::Fast) &&
"Var args not supported with calling convention fastcc");
// Assign locations to all of the incoming arguments.
CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
CCInfo.AnalyzeFormalArguments(Op.Val, CCAssignFnForNode(Op));
SmallVector<SDOperand, 8> ArgValues;
unsigned LastVal = ~0U;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
// TODO: If an arg is passed in two places (e.g. reg and stack), skip later
// places.
assert(VA.getValNo() != LastVal &&
"Don't support value assigned to multiple locs yet");
LastVal = VA.getValNo();
if (VA.isRegLoc()) {
MVT::ValueType RegVT = VA.getLocVT();
TargetRegisterClass *RC;
if (RegVT == MVT::i32)
RC = X86::GR32RegisterClass;
else if (Is64Bit && RegVT == MVT::i64)
RC = X86::FR64RegisterClass;
else {
assert(MVT::isVector(RegVT));
if (Is64Bit && MVT::getSizeInBits(RegVT) == 64) {
RC = X86::GR64RegisterClass; // MMX values are passed in GPRs.
RegVT = MVT::i64;
} else
unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
// If this is an 8 or 16-bit value, it is really passed promoted to 32
// bits. Insert an assert[sz]ext to capture this, then truncate to the
// right size.
if (VA.getLocInfo() == CCValAssign::SExt)
ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
DAG.getValueType(VA.getValVT()));
else if (VA.getLocInfo() == CCValAssign::ZExt)
ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
DAG.getValueType(VA.getValVT()));
if (VA.getLocInfo() != CCValAssign::Full)
ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
// Handle MMX values passed in GPRs.
if (Is64Bit && RegVT != VA.getLocVT() && RC == X86::GR64RegisterClass &&
MVT::getSizeInBits(RegVT) == 64)
ArgValue = DAG.getNode(ISD::BIT_CONVERT, VA.getLocVT(), ArgValue);
ArgValues.push_back(ArgValue);
} else {
assert(VA.isMemLoc());
ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, Root, i));
// align stack specially for tail calls
if (CC == CallingConv::Fast)
StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
if (Is64Bit || CC != CallingConv::X86_FastCall) {
VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
Evan Cheng
committed
}
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
if (Is64Bit) {
static const unsigned GPR64ArgRegs[] = {
X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
};
static const unsigned XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6);
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
// For X86-64, if there are vararg parameters that are passed via
// registers, then we must store them to their spots on the stack so they
// may be loaded by deferencing the result of va_next.
VarArgsGPOffset = NumIntRegs * 8;
VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16;
RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16);
// Store the integer parameter registers.
SmallVector<SDOperand, 8> MemOps;
SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
DAG.getIntPtrConstant(VarArgsGPOffset));
for (; NumIntRegs != 6; ++NumIntRegs) {
unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs],
X86::GR64RegisterClass);
SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64);
SDOperand Store =
DAG.getStore(Val.getValue(1), Val, FIN,
PseudoSourceValue::getFixedStack(),
RegSaveFrameIndex);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
DAG.getIntPtrConstant(8));
}
// Now store the XMM (fp + vector) parameter registers.
FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
DAG.getIntPtrConstant(VarArgsFPOffset));
for (; NumXMMRegs != 8; ++NumXMMRegs) {
unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
X86::VR128RegisterClass);
SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
SDOperand Store =
DAG.getStore(Val.getValue(1), Val, FIN,
PseudoSourceValue::getFixedStack(),
RegSaveFrameIndex);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
DAG.getIntPtrConstant(16));
}
if (!MemOps.empty())
Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
&MemOps[0], MemOps.size());
// Make sure the instruction takes 8n+4 bytes to make sure the start of the
// arguments and the arguments after the retaddr has been pushed are
// aligned.
if (!Is64Bit && CC == CallingConv::X86_FastCall &&
!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows() &&
(StackSize & 7) == 0)
StackSize += 4;
ArgValues.push_back(Root);
// Some CCs need callee pop.
if (IsCalleePop(Op)) {
BytesToPopOnReturn = StackSize; // Callee pops everything.
BytesCallerReserves = 0;
} else {
BytesToPopOnReturn = 0; // Callee pops nothing.
// If this is an sret function, the return should pop the hidden pointer.
if (!Is64Bit && ArgsAreStructReturn(Op))
BytesToPopOnReturn = 4;
BytesCallerReserves = StackSize;
}
if (!Is64Bit) {
RegSaveFrameIndex = 0xAAAAAAA; // RegSaveFrameIndex is X86-64 only.
if (CC == CallingConv::X86_FastCall)
VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs.
}
Anton Korobeynikov
committed
FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);
// Return the new list of results.
return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
&ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
}
SDOperand
X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG,
const SDOperand &StackPtr,
const CCValAssign &VA,
SDOperand Chain,
SDOperand Arg) {
unsigned LocMemOffset = VA.getLocMemOffset();
SDOperand PtrOff = DAG.getIntPtrConstant(LocMemOffset);
PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
if (Flags & ISD::ParamFlags::ByVal) {
return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
}
return DAG.getStore(Chain, Arg, PtrOff,
PseudoSourceValue::getStack(), LocMemOffset);
}
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
/// ClassifyX86_64SRetCallReturn - Classify how to implement a x86-64
/// struct return call to the specified function. X86-64 ABI specifies
/// some SRet calls are actually returned in registers. Since current
/// LLVM cannot represent multi-value calls, they are represent as
/// calls where the results are passed in a hidden struct provided by
/// the caller. This function examines the type of the struct to
/// determine the correct way to implement the call.
X86::X86_64SRet
X86TargetLowering::ClassifyX86_64SRetCallReturn(const Function *Fn) {
// FIXME: Disabled for now.
return X86::InMemory;
const PointerType *PTy = cast<PointerType>(Fn->arg_begin()->getType());
const Type *RTy = PTy->getElementType();
unsigned Size = getTargetData()->getABITypeSize(RTy);
if (Size != 16 && Size != 32)
return X86::InMemory;
if (Size == 32) {
const StructType *STy = dyn_cast<StructType>(RTy);
if (!STy) return X86::InMemory;
if (STy->getNumElements() == 2 &&
STy->getElementType(0) == Type::X86_FP80Ty &&
STy->getElementType(1) == Type::X86_FP80Ty)
return X86::InX87;
}
bool AllFP = true;
for (Type::subtype_iterator I = RTy->subtype_begin(), E = RTy->subtype_end();
I != E; ++I) {
const Type *STy = I->get();
if (!STy->isFPOrFPVector()) {
AllFP = false;
break;
}
}
if (AllFP)
return X86::InSSE;
return X86::InGPR64;
}
void X86TargetLowering::X86_64AnalyzeSRetCallOperands(SDNode *TheCall,
CCAssignFn *Fn,
CCState &CCInfo) {
unsigned NumOps = (TheCall->getNumOperands() - 5) / 2;
for (unsigned i = 1; i != NumOps; ++i) {
MVT::ValueType ArgVT = TheCall->getOperand(5+2*i).getValueType();
SDOperand FlagOp = TheCall->getOperand(5+2*i+1);
unsigned ArgFlags =cast<ConstantSDNode>(FlagOp)->getValue();
if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo)) {
cerr << "Call operand #" << i << " has unhandled type "
<< MVT::getValueTypeString(ArgVT) << "\n";
abort();
}
}
}
SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
SDOperand Chain = Op.getOperand(0);
unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
bool IsTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0
&& CC == CallingConv::Fast && PerformTailCallOpt;
SDOperand Callee = Op.getOperand(4);
bool Is64Bit = Subtarget->is64Bit();
bool IsStructRet = CallIsStructReturn(Op);
assert(!(isVarArg && CC == CallingConv::Fast) &&
"Var args not supported with calling convention fastcc");
// Analyze operands of the call, assigning locations to each operand.
CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
CCAssignFn *CCFn = CCAssignFnForNode(Op);
X86::X86_64SRet SRetMethod = X86::InMemory;
if (Is64Bit && IsStructRet)
// FIXME: We can't figure out type of the sret structure for indirect
// calls. We need to copy more information from CallSite to the ISD::CALL
// node.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
SRetMethod =
ClassifyX86_64SRetCallReturn(dyn_cast<Function>(G->getGlobal()));
// UGLY HACK! For x86-64, some 128-bit aggregates are returns in a pair of
// registers. Unfortunately, llvm does not support i128 yet so we pretend it's
// a sret call.
if (SRetMethod != X86::InMemory)
X86_64AnalyzeSRetCallOperands(Op.Val, CCFn, CCInfo);
else
CCInfo.AnalyzeCallOperands(Op.Val, CCFn);
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
if (CC == CallingConv::Fast)
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
// Make sure the instruction takes 8n+4 bytes to make sure the start of the
// arguments and the arguments after the retaddr has been pushed are aligned.
if (!Is64Bit && CC == CallingConv::X86_FastCall &&
!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows() &&
(NumBytes & 7) == 0)
NumBytes += 4;
int FPDiff = 0;
if (IsTailCall) {
// Lower arguments at fp - stackoffset + fpdiff.
unsigned NumBytesCallerPushed =
MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
FPDiff = NumBytesCallerPushed - NumBytes;
// Set the delta of movement of the returnaddr stackslot.
// But only set if delta is greater than previous delta.
if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta()))
MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
}
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes));
SDOperand RetAddrFrIdx, NewRetAddrFrIdx;
if (IsTailCall) {
// Adjust the Return address stack slot.
if (FPDiff) {
MVT::ValueType VT = Is64Bit ? MVT::i64 : MVT::i32;
RetAddrFrIdx = getReturnAddressFrameIndex(DAG);
// Load the "old" Return address.
RetAddrFrIdx =
DAG.getLoad(VT, Chain,RetAddrFrIdx, NULL, 0);
// Calculate the new stack slot for the return address.
int SlotSize = Is64Bit ? 8 : 4;
int NewReturnAddrFI =
MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize);
NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
Chain = SDOperand(RetAddrFrIdx.Val, 1);
}
}
SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
SmallVector<SDOperand, 8> MemOpChains;
// Walk the register/memloc assignments, inserting copies/loads. For tail
// calls, lower arguments which could otherwise be possibly overwritten to the
// stack slot where they would go on normal function calls.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
// Promote the value if needed.
switch (VA.getLocInfo()) {
default: assert(0 && "Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::SExt:
Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
break;
case CCValAssign::ZExt:
Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
break;
case CCValAssign::AExt:
Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
break;
}
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else {
if (!IsTailCall || IsPossiblyOverwrittenArgumentOfTailCall(Arg)) {
assert(VA.isMemLoc());
if (StackPtr.Val == 0)
StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy());
MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
Arg));
}
}
}
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
&MemOpChains[0], MemOpChains.size());
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into registers.
SDOperand InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
InFlag);
InFlag = Chain.getValue(1);
}
if (IsTailCall)
InFlag = SDOperand(); // ??? Isn't this nuking the preceding loop's output?
// ELF / PIC requires GOT in the EBX register before function calls via PLT
// GOT pointer.
// Does not work with tail call since ebx is not restored correctly by
// tailcaller. TODO: at least for x86 - verify for x86-64
if (!IsTailCall && !Is64Bit &&
getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT()) {
Chain = DAG.getCopyToReg(Chain, X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
InFlag);
InFlag = Chain.getValue(1);
}
if (Is64Bit && isVarArg) {
// From AMD64 ABI document:
// For calls that may call functions that use varargs or stdargs
// (prototype-less calls or calls to functions containing ellipsis (...) in
// the declaration) %al is used as hidden argument to specify the number
// of SSE registers used. The contents of %al do not need to match exactly
// the number of registers, but must be an ubound on the number of SSE
// registers used and is in the range 0 - 8 inclusive.
// Count the number of XMM registers allocated.
static const unsigned XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
Chain = DAG.getCopyToReg(Chain, X86::AL,
DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
InFlag = Chain.getValue(1);
}
// For tail calls lower the arguments to the 'real' stack slot.
if (IsTailCall) {
SmallVector<SDOperand, 8> MemOpChains2;
SDOperand FIN;
int FI = 0;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (!VA.isRegLoc()) {
assert(VA.isMemLoc());
SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
// Create frame index.
int32_t Offset = VA.getLocMemOffset()+FPDiff;
uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8;
FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
FIN = DAG.getFrameIndex(FI, MVT::i32);
SDOperand Source = Arg;
// Copy from stack slots to stack slot of a tail called function. This
// needs to be done because if we would lower the arguments directly
// to their real stack slot we might end up overwriting each other.
// Get source stack slot.
Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
if (StackPtr.Val == 0)
StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy());
Source = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, Source);
if ((Flags & ISD::ParamFlags::ByVal)==0)
Source = DAG.getLoad(VA.getValVT(), Chain, Source, NULL, 0);
}
if (Flags & ISD::ParamFlags::ByVal) {
// Copy relative to framepointer.
MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, Chain,
Flags, DAG));
} else {
MemOpChains2.push_back(
DAG.getStore(Chain, Source, FIN,
PseudoSourceValue::getFixedStack(), FI));
}
}
}
if (!MemOpChains2.empty())
Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
&MemOpChains2[0], MemOpChains2.size());
// Store the return address to the appropriate stack slot.
if (FPDiff)
Chain = DAG.getStore(Chain,RetAddrFrIdx, NewRetAddrFrIdx, NULL, 0);
}
// If the callee is a GlobalAddress node (quite common, every direct call is)
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
Anton Korobeynikov
committed
// We should use extra load for direct calls to dllimported functions in
// non-JIT mode.
if ((IsTailCall || !Is64Bit ||
getTargetMachine().getCodeModel() != CodeModel::Large)
Anton Korobeynikov
committed
&& !Subtarget->GVRequiresExtraLoad(G->getGlobal(),
getTargetMachine(), true))
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
if (IsTailCall || !Is64Bit ||
getTargetMachine().getCodeModel() != CodeModel::Large)
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
} else if (IsTailCall) {
assert(Callee.getOpcode() == ISD::LOAD &&
"Function destination must be loaded into virtual register");
unsigned Opc = Is64Bit ? X86::R9 : X86::ECX;
Chain = DAG.getCopyToReg(Chain,
DAG.getRegister(Opc, getPointerTy()) ,
Callee,InFlag);
Callee = DAG.getRegister(Opc, getPointerTy());
// Add register as live out.
DAG.getMachineFunction().getRegInfo().addLiveOut(Opc);
// Returns a chain & a flag for retval copy to use.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
SmallVector<SDOperand, 8> Ops;
if (IsTailCall) {
Ops.push_back(Chain);
Ops.push_back(DAG.getIntPtrConstant(NumBytes));
Ops.push_back(DAG.getIntPtrConstant(0));
if (InFlag.Val)
Ops.push_back(InFlag);
Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
// Returns a chain & a flag for retval copy to use.
NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
Ops.clear();
}
Ops.push_back(Chain);
Ops.push_back(Callee);
if (IsTailCall)
Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
// Add an implicit use GOT pointer in EBX.
if (!IsTailCall && !Is64Bit &&
getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT())
Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
// Add argument registers to the end of the list so that they are known live
// into the call.
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
if (InFlag.Val)
Ops.push_back(InFlag);
if (IsTailCall) {
assert(InFlag.Val &&
"Flag must be set. Depend on flag being set in LowerRET");
Chain = DAG.getNode(X86ISD::TAILCALL,
Op.Val->getVTList(), &Ops[0], Ops.size());
return SDOperand(Chain.Val, Op.ResNo);
}
Chain = DAG.getNode(X86ISD::CALL, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
unsigned NumBytesForCalleeToPush;
if (IsCalleePop(Op))
NumBytesForCalleeToPush = NumBytes; // Callee pops everything
else if (!Is64Bit && IsStructRet)
// If this is is a call to a struct-return function, the callee
// pops the hidden struct pointer, so we have to push it back.
// This is common for Darwin/X86, Linux & Mingw32 targets.
NumBytesForCalleeToPush = 4;
else
NumBytesForCalleeToPush = 0; // Callee pops nothing.
// Returns a flag for retval copy to use.
Chain = DAG.getCALLSEQ_END(Chain,
DAG.getIntPtrConstant(NumBytes),
DAG.getIntPtrConstant(NumBytesForCalleeToPush),
InFlag = Chain.getValue(1);
// Handle result values, copying them out of physregs into vregs that we
// return.
switch (SRetMethod) {
default:
return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
case X86::InGPR64:
return SDOperand(LowerCallResultToTwo64BitRegs(Chain, InFlag, Op.Val,
X86::RAX, X86::RDX,
MVT::i64, DAG), Op.ResNo);
case X86::InSSE:
return SDOperand(LowerCallResultToTwo64BitRegs(Chain, InFlag, Op.Val,
X86::XMM0, X86::XMM1,
MVT::f64, DAG), Op.ResNo);
case X86::InX87:
return SDOperand(LowerCallResultToTwoX87Regs(Chain, InFlag, Op.Val, DAG),
Op.ResNo);
}
}
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
//===----------------------------------------------------------------------===//
// Fast Calling Convention (tail call) implementation
//===----------------------------------------------------------------------===//
// Like std call, callee cleans arguments, convention except that ECX is
// reserved for storing the tail called function address. Only 2 registers are
// free for argument passing (inreg). Tail call optimization is performed
// provided:
// * tailcallopt is enabled
// * caller/callee are fastcc
// * elf/pic is disabled OR
// * elf/pic enabled + callee is in module + callee has
// visibility protected or hidden
// To keep the stack aligned according to platform abi the function
// GetAlignedArgumentStackSize ensures that argument delta is always multiples
// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
// If a tail called function callee has more arguments than the caller the
// caller needs to make sure that there is room to move the RETADDR to. This is
// achieved by reserving an area the size of the argument delta right after the
// original REtADDR, but before the saved framepointer or the spilled registers
// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
// stack layout:
// arg1
// arg2
// RETADDR
// [ new RETADDR
// move area ]
// (possible EBP)
// ESI
// EDI
// local1 ..
/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
/// for a 16 byte align requirement.
unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
SelectionDAG& DAG) {
if (PerformTailCallOpt) {
MachineFunction &MF = DAG.getMachineFunction();
const TargetMachine &TM = MF.getTarget();
const TargetFrameInfo &TFI = *TM.getFrameInfo();
unsigned StackAlignment = TFI.getStackAlignment();
uint64_t AlignMask = StackAlignment - 1;
int64_t Offset = StackSize;
unsigned SlotSize = Subtarget->is64Bit() ? 8 : 4;
if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
// Number smaller than 12 so just add the difference.
Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
} else {
// Mask out lower bits, add stackalignment once plus the 12 bytes.
Offset = ((~AlignMask) & Offset) + StackAlignment +
(StackAlignment-SlotSize);
}
StackSize = Offset;
}
return StackSize;
}
/// IsEligibleForTailCallElimination - Check to see whether the next instruction
/// following the call is a return. A function is eligible if caller/callee
/// calling conventions match, currently only fastcc supports tail calls, and
/// the function CALL is immediatly followed by a RET.
bool X86TargetLowering::IsEligibleForTailCallOptimization(SDOperand Call,
SDOperand Ret,
SelectionDAG& DAG) const {
if (!PerformTailCallOpt)
return false;
// Check whether CALL node immediatly preceeds the RET node and whether the
// return uses the result of the node or is a void return.
unsigned NumOps = Ret.getNumOperands();
if ((NumOps == 1 &&
(Ret.getOperand(0) == SDOperand(Call.Val,1) ||
Ret.getOperand(0) == SDOperand(Call.Val,0))) ||
(NumOps > 1 &&
Ret.getOperand(0) == SDOperand(Call.Val,Call.Val->getNumValues()-1) &&
Ret.getOperand(1) == SDOperand(Call.Val,0))) {
MachineFunction &MF = DAG.getMachineFunction();
unsigned CallerCC = MF.getFunction()->getCallingConv();
unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue();
if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
SDOperand Callee = Call.getOperand(4);
// On elf/pic %ebx needs to be livein.
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ ||
!Subtarget->isPICStyleGOT())
return true;
// Can only do local tail calls with PIC.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
return G->getGlobal()->hasHiddenVisibility()
|| G->getGlobal()->hasProtectedVisibility();
}
}
return false;
}
//===----------------------------------------------------------------------===//
// Other Lowering Hooks
//===----------------------------------------------------------------------===//
SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
Anton Korobeynikov
committed
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
int ReturnAddrIndex = FuncInfo->getRAIndex();
if (ReturnAddrIndex == 0) {
// Set up a frame object for the return address.
if (Subtarget->is64Bit())
ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8);
else
ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
Anton Korobeynikov
committed
FuncInfo->setRAIndex(ReturnAddrIndex);
}
return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
}
/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86
/// specific condition code. It returns a false if it cannot do a direct
/// translation. X86CC is the translated CondCode. LHS/RHS are modified as
/// needed.
static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
unsigned &X86CC, SDOperand &LHS, SDOperand &RHS,
SelectionDAG &DAG) {
X86CC = X86::COND_INVALID;
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
// X > -1 -> X == 0, jump !sign.
RHS = DAG.getConstant(0, RHS.getValueType());
X86CC = X86::COND_NS;
return true;
} else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
// X < 0 -> X == 0, jump on sign.
X86CC = X86::COND_S;
} else if (SetCCOpcode == ISD::SETLT && RHSC->getValue() == 1) {
// X < 1 -> X <= 0
RHS = DAG.getConstant(0, RHS.getValueType());
X86CC = X86::COND_LE;
return true;
case ISD::SETEQ: X86CC = X86::COND_E; break;
case ISD::SETGT: X86CC = X86::COND_G; break;
case ISD::SETGE: X86CC = X86::COND_GE; break;
case ISD::SETLT: X86CC = X86::COND_L; break;
case ISD::SETLE: X86CC = X86::COND_LE; break;
case ISD::SETNE: X86CC = X86::COND_NE; break;
case ISD::SETULT: X86CC = X86::COND_B; break;
case ISD::SETUGT: X86CC = X86::COND_A; break;
case ISD::SETULE: X86CC = X86::COND_BE; break;
case ISD::SETUGE: X86CC = X86::COND_AE; break;
}
} else {
// On a floating point condition, the flags are set as follows:
// ZF PF CF op
// 0 | 0 | 0 | X > Y
// 0 | 0 | 1 | X < Y
// 1 | 0 | 0 | X == Y
// 1 | 1 | 1 | unordered
switch (SetCCOpcode) {
default: break;
case ISD::SETUEQ:
case ISD::SETEQ: X86CC = X86::COND_E; break;
case ISD::SETOLT: Flip = true; // Fallthrough
case ISD::SETGT: X86CC = X86::COND_A; break;
case ISD::SETOLE: Flip = true; // Fallthrough
case ISD::SETGE: X86CC = X86::COND_AE; break;
case ISD::SETUGT: Flip = true; // Fallthrough
case ISD::SETLT: X86CC = X86::COND_B; break;
case ISD::SETUGE: Flip = true; // Fallthrough
case ISD::SETLE: X86CC = X86::COND_BE; break;
case ISD::SETNE: X86CC = X86::COND_NE; break;
case ISD::SETUO: X86CC = X86::COND_P; break;
case ISD::SETO: X86CC = X86::COND_NP; break;
if (Flip)
std::swap(LHS, RHS);
return X86CC != X86::COND_INVALID;
/// hasFPCMov - is there a floating point cmov for the specific X86 condition
/// code. Current x86 isa includes the following FP cmov instructions:
/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
switch (X86CC) {
default:
return false;
case X86::COND_B:
case X86::COND_BE:
case X86::COND_E:
case X86::COND_P:
case X86::COND_A:
case X86::COND_AE:
case X86::COND_NE:
case X86::COND_NP:
return true;
}
}
/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode. Return
/// true if Op is undef or if its value falls within the specified range (L, H].
static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) {
if (Op.getOpcode() == ISD::UNDEF)
return true;
unsigned Val = cast<ConstantSDNode>(Op)->getValue();
return (Val >= Low && Val < Hi);
}
/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode. Return
/// true if Op is undef or if its value equal to the specified value.
static bool isUndefOrEqual(SDOperand Op, unsigned Val) {
if (Op.getOpcode() == ISD::UNDEF)
return true;
return cast<ConstantSDNode>(Op)->getValue() == Val;
/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to PSHUFD.
bool X86::isPSHUFDMask(SDNode *N) {
assert(N->getOpcode() == ISD::BUILD_VECTOR);
if (N->getNumOperands() != 2 && N->getNumOperands() != 4)
return false;
// Check if the value doesn't reference the second vector.
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
SDOperand Arg = N->getOperand(i);
if (Arg.getOpcode() == ISD::UNDEF) continue;
assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
if (cast<ConstantSDNode>(Arg)->getValue() >= e)
return false;
}
return true;
}
/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to PSHUFHW.
bool X86::isPSHUFHWMask(SDNode *N) {
assert(N->getOpcode() == ISD::BUILD_VECTOR);
if (N->getNumOperands() != 8)
return false;
// Lower quadword copied in order.
for (unsigned i = 0; i != 4; ++i) {
SDOperand Arg = N->getOperand(i);
if (Arg.getOpcode() == ISD::UNDEF) continue;
assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
if (cast<ConstantSDNode>(Arg)->getValue() != i)
return false;
}
// Upper quadword shuffled.
for (unsigned i = 4; i != 8; ++i) {
SDOperand Arg = N->getOperand(i);
if (Arg.getOpcode() == ISD::UNDEF) continue;
assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
if (Val < 4 || Val > 7)
return false;
}
return true;
}
/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to PSHUFLW.
bool X86::isPSHUFLWMask(SDNode *N) {
assert(N->getOpcode() == ISD::BUILD_VECTOR);
if (N->getNumOperands() != 8)
return false;
// Upper quadword copied in order.
for (unsigned i = 4; i != 8; ++i)
if (!isUndefOrEqual(N->getOperand(i), i))
return false;
// Lower quadword shuffled.