"git@repo.hca.bsc.es:rferrer/llvm-epi-0.8.git" did not exist on "f33c3bc51fcfbd2f4663d732fcc157accf98b5ef"
Newer
Older
//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by Chris Lattner and is distributed under
// the University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that X86 uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//
#include "X86.h"
#include "X86ISelLowering.h"
#include "X86MachineFunctionInfo.h"
#include "X86TargetMachine.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/ADT/VectorExtras.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SSARegMap.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
// FIXME: temporary.
#include "llvm/Support/CommandLine.h"
static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden,
cl::desc("Enable fastcc on X86"));
X86TargetLowering::X86TargetLowering(TargetMachine &TM)
: TargetLowering(TM) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
X86ScalarSSE = Subtarget->hasSSE2();
// Set up the TargetLowering object.
// X86 is weird, it always uses i8 for shift amounts and setcc results.
setShiftAmountType(MVT::i8);
setSetCCResultType(MVT::i8);
setSetCCResultContents(ZeroOrOneSetCCResult);
setSchedulingPreference(SchedulingForRegPressure);
setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0
setStackPointerRegisterToSaveRestore(X86::ESP);
// Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
setUseUnderscoreSetJmpLongJmp(true);
// Add legal addressing mode scale values.
addLegalAddressScale(8);
addLegalAddressScale(4);
addLegalAddressScale(2);
// Enter the ones which require both scale + index last. These are more
// expensive.
addLegalAddressScale(9);
addLegalAddressScale(5);
addLegalAddressScale(3);
// Set up the register classes.
Evan Cheng
committed
addRegisterClass(MVT::i8, X86::GR8RegisterClass);
addRegisterClass(MVT::i16, X86::GR16RegisterClass);
addRegisterClass(MVT::i32, X86::GR32RegisterClass);
// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
// operation.
setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
if (X86ScalarSSE)
// No SSE i64 SINT_TO_FP, so expand i32 UINT_TO_FP instead.
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
else
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
// Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
// this operation.
setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
// SSE has no i16 to fp conversion, only i32
if (X86ScalarSSE)
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
else {
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
}
// We can handle SINT_TO_FP and FP_TO_SINT from/to i64 even though i64
// isn't legal.
setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
// Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
// this operation.
setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
if (X86ScalarSSE) {
setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
} else {
setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
}
// Handle FP_TO_UINT by promoting the destination to a larger signed
// conversion.
setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
if (X86ScalarSSE && !Subtarget->hasSSE3())
// Expand FP_TO_UINT into a select.
// FIXME: We would like to use a Custom expander here eventually to do
// the optimal thing for SSE vs. the default expansion in the legalizer.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
else
// With SSE3 we can use fisttpll to convert to a signed i64.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand);
setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand);
setOperationAction(ISD::BR_CC , MVT::Other, Expand);
setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
setOperationAction(ISD::SEXTLOAD , MVT::i1 , Expand);
setOperationAction(ISD::FREM , MVT::f64 , Expand);
setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
setOperationAction(ISD::CTTZ , MVT::i8 , Expand);
setOperationAction(ISD::CTLZ , MVT::i8 , Expand);
setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
setOperationAction(ISD::CTTZ , MVT::i16 , Expand);
setOperationAction(ISD::CTLZ , MVT::i16 , Expand);
setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
setOperationAction(ISD::CTLZ , MVT::i32 , Expand);
setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
// These should be promoted to a larger select which is supported.
setOperationAction(ISD::SELECT , MVT::i1 , Promote);
setOperationAction(ISD::SELECT , MVT::i8 , Promote);
setOperationAction(ISD::SELECT , MVT::i16 , Custom);
setOperationAction(ISD::SELECT , MVT::i32 , Custom);
setOperationAction(ISD::SELECT , MVT::f32 , Custom);
setOperationAction(ISD::SELECT , MVT::f64 , Custom);
setOperationAction(ISD::SETCC , MVT::i8 , Custom);
setOperationAction(ISD::SETCC , MVT::i16 , Custom);
setOperationAction(ISD::SETCC , MVT::i32 , Custom);
setOperationAction(ISD::SETCC , MVT::f32 , Custom);
setOperationAction(ISD::SETCC , MVT::f64 , Custom);
setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
// 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
// X86 wants to expand memset / memcpy itself.
setOperationAction(ISD::MEMSET , MVT::Other, Custom);
setOperationAction(ISD::MEMCPY , MVT::Other, Custom);
// We don't have line number support yet.
setOperationAction(ISD::LOCATION, MVT::Other, Expand);
setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
// Use the default implementation.
setOperationAction(ISD::VAARG , MVT::Other, Expand);
setOperationAction(ISD::VACOPY , MVT::Other, Expand);
setOperationAction(ISD::VAEND , MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
if (X86ScalarSSE) {
// Set up the FP register classes.
addRegisterClass(MVT::f32, X86::FR32RegisterClass);
addRegisterClass(MVT::f64, X86::FR64RegisterClass);
// Use ANDPD to simulate FABS.
setOperationAction(ISD::FABS , MVT::f64, Custom);
setOperationAction(ISD::FABS , MVT::f32, Custom);
// Use XORP to simulate FNEG.
setOperationAction(ISD::FNEG , MVT::f64, Custom);
setOperationAction(ISD::FNEG , MVT::f32, Custom);
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FREM , MVT::f64, Expand);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
// Expand FP immediates into loads from the stack, except for the special
// cases we handle.
setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
addLegalFPImmediate(+0.0); // xorps / xorpd
} else {
// Set up the FP register classes.
addRegisterClass(MVT::f64, X86::RFPRegisterClass);
setOperationAction(ISD::UNDEF, MVT::f64, Expand);
if (!UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f64 , Expand);
setOperationAction(ISD::FCOS , MVT::f64 , Expand);
}
setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
addLegalFPImmediate(+0.0); // FLD0
addLegalFPImmediate(+1.0); // FLD1
addLegalFPImmediate(-0.0); // FLD0/FCHS
addLegalFPImmediate(-1.0); // FLD1/FCHS
}
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
for (unsigned VT = (unsigned)MVT::Vector + 1;
VT != (unsigned)MVT::LAST_VALUETYPE; VT++) {
setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand);
setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
addRegisterClass(MVT::v8i8, X86::VR64RegisterClass);
addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand);
}
addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
setOperationAction(ISD::AND, MVT::v4f32, Legal);
setOperationAction(ISD::OR, MVT::v4f32, Legal);
setOperationAction(ISD::XOR, MVT::v4f32, Legal);
setOperationAction(ISD::ADD, MVT::v4f32, Legal);
setOperationAction(ISD::SUB, MVT::v4f32, Legal);
setOperationAction(ISD::MUL, MVT::v4f32, Legal);
setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
}
addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
setOperationAction(ISD::ADD, MVT::v2f64, Legal);
setOperationAction(ISD::ADD, MVT::v16i8, Legal);
setOperationAction(ISD::ADD, MVT::v8i16, Legal);
setOperationAction(ISD::ADD, MVT::v4i32, Legal);
setOperationAction(ISD::SUB, MVT::v2f64, Legal);
setOperationAction(ISD::SUB, MVT::v16i8, Legal);
setOperationAction(ISD::SUB, MVT::v8i16, Legal);
setOperationAction(ISD::SUB, MVT::v4i32, Legal);
setOperationAction(ISD::MUL, MVT::v8i16, Legal);
setOperationAction(ISD::MUL, MVT::v2f64, Legal);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
// Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones.
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
}
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote);
AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64);
setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote);
AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64);
setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote);
AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64);
setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote);
AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64);
setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
// Custom lower v2i64 and v2f64 selects.
setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
}
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
computeRegisterProperties();
// FIXME: These should be based on subtarget info. Plus, the values should
// be smaller when we are in optimizing for size mode.
maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
allowUnalignedMemoryAccesses = true; // x86 supports it!
}
//===----------------------------------------------------------------------===//
// C Calling Convention implementation
//===----------------------------------------------------------------------===//
/// AddLiveIn - This helper function adds the specified physical register to the
/// MachineFunction as a live in value. It also creates a corresponding virtual
/// register for it.
static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
TargetRegisterClass *RC) {
assert(RC->contains(PReg) && "Not the correct regclass!");
unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
MF.addLiveIn(PReg, VReg);
return VReg;
}
/// HowToPassCCCArgument - Returns how an formal argument of the specified type
/// should be passed. If it is through stack, returns the size of the stack
/// slot; if it is through XMM register, returns the number of XMM registers
/// are needed.
static void
HowToPassCCCArgument(MVT::ValueType ObjectVT, unsigned NumXMMRegs,
unsigned &ObjSize, unsigned &ObjXMMRegs) {
Evan Cheng
committed
switch (ObjectVT) {
default: assert(0 && "Unhandled argument type!");
case MVT::i8: ObjSize = 1; break;
case MVT::i16: ObjSize = 2; break;
case MVT::i32: ObjSize = 4; break;
case MVT::i64: ObjSize = 8; break;
case MVT::f32: ObjSize = 4; break;
case MVT::f64: ObjSize = 8; break;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64:
case MVT::v4f32:
case MVT::v2f64:
if (NumXMMRegs < 4)
ObjXMMRegs = 1;
else
ObjSize = 16;
break;
Evan Cheng
committed
}
}
SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG) {
unsigned NumArgs = Op.Val->getNumValues() - 1;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
SDOperand Root = Op.getOperand(0);
std::vector<SDOperand> ArgValues;
Evan Cheng
committed
// Add DAG nodes to load the arguments... On entry to a function on the X86,
// the stack frame looks like this:
//
// [ESP] -- return address
// [ESP + 4] -- first argument (leftmost lexically)
// [ESP + 8] -- second argument, if first argument is <= 4 bytes in size
Evan Cheng
committed
// ...
//
unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot
unsigned NumXMMRegs = 0; // XMM regs used for parameter passing.
static const unsigned XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
};
for (unsigned i = 0; i < NumArgs; ++i) {
MVT::ValueType ObjectVT = Op.getValue(i).getValueType();
unsigned ArgIncrement = 4;
unsigned ObjSize = 0;
unsigned ObjXMMRegs = 0;
HowToPassCCCArgument(ObjectVT, NumXMMRegs, ObjSize, ObjXMMRegs);
ArgIncrement = ObjSize;
SDOperand ArgValue;
if (ObjXMMRegs) {
// Passed in a XMM register.
unsigned Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
X86::VR128RegisterClass);
ArgValue= DAG.getCopyFromReg(Root, Reg, ObjectVT);
ArgValues.push_back(ArgValue);
NumXMMRegs += ObjXMMRegs;
} else {
// XMM arguments have to be aligned on 16-byte boundary.
if (ObjSize == 16)
ArgOffset = ((ArgOffset + 15) / 16) * 16;
// Create the frame index object for this incoming parameter...
int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN,
DAG.getSrcValue(NULL));
ArgValues.push_back(ArgValue);
ArgOffset += ArgIncrement; // Move on to the next argument...
}
}
ArgValues.push_back(Root);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
if (isVarArg)
VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
ReturnAddrIndex = 0; // No return address slot generated yet.
BytesToPopOnReturn = 0; // Callee pops nothing.
BytesCallerReserves = ArgOffset;
// If this is a struct return on Darwin/X86, the callee pops the hidden struct
// pointer.
if (MF.getFunction()->getCallingConv() == CallingConv::CSRet &&
Subtarget->isTargetDarwin())
BytesToPopOnReturn = 4;
// Return the new list of results.
std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(),
Op.Val->value_end());
return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size());
}
SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG) {
SDOperand Chain = Op.getOperand(0);
unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
SDOperand Callee = Op.getOperand(4);
MVT::ValueType RetVT= Op.Val->getValueType(0);
unsigned NumOps = (Op.getNumOperands() - 5) / 2;
// Keep track of the number of XMM regs passed so far.
unsigned NumXMMRegs = 0;
static const unsigned XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
};
// Count how many bytes are to be pushed on the stack.
unsigned NumBytes = 0;
for (unsigned i = 0; i != NumOps; ++i) {
SDOperand Arg = Op.getOperand(5+2*i);
switch (Arg.getValueType()) {
default: assert(0 && "Unexpected ValueType for argument!");
case MVT::i8:
case MVT::i16:
case MVT::i32:
case MVT::f32:
NumBytes += 4;
break;
case MVT::i64:
case MVT::f64:
NumBytes += 8;
break;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64:
case MVT::v4f32:
if (NumXMMRegs < 4)
++NumXMMRegs;
else {
// XMM arguments have to be aligned on 16-byte boundary.
NumBytes = ((NumBytes + 15) / 16) * 16;
NumBytes += 16;
}
break;
}
}
Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
// Arguments go on the stack in reverse order, as specified by the ABI.
unsigned ArgOffset = 0;
NumXMMRegs = 0;
std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
std::vector<SDOperand> MemOpChains;
SDOperand StackPtr = DAG.getRegister(X86::ESP, getPointerTy());
for (unsigned i = 0; i != NumOps; ++i) {
SDOperand Arg = Op.getOperand(5+2*i);
switch (Arg.getValueType()) {
default: assert(0 && "Unexpected ValueType for argument!");
case MVT::i8:
// Promote the integer to 32 bits. If the input type is signed use a
// sign extend, otherwise use a zero extend.
unsigned ExtOp =
dyn_cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue() ?
ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
Arg = DAG.getNode(ExtOp, MVT::i32, Arg);
case MVT::i32:
case MVT::f32: {
SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
Arg, PtrOff, DAG.getSrcValue(NULL)));
ArgOffset += 4;
break;
}
case MVT::i64:
case MVT::f64: {
SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
Arg, PtrOff, DAG.getSrcValue(NULL)));
ArgOffset += 8;
break;
}
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64:
case MVT::v4f32:
if (NumXMMRegs < 4) {
RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg));
NumXMMRegs++;
} else {
// XMM arguments have to be aligned on 16-byte boundary.
ArgOffset = ((ArgOffset + 15) / 16) * 16;
SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
Arg, PtrOff, DAG.getSrcValue(NULL)));
ArgOffset += 16;
}
}
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
&MemOpChains[0], MemOpChains.size());
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into registers.
SDOperand InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
InFlag);
InFlag = Chain.getValue(1);
}
// If the callee is a GlobalAddress node (quite common, every direct call is)
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
std::vector<MVT::ValueType> NodeTys;
NodeTys.push_back(MVT::Other); // Returns a chain
NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
std::vector<SDOperand> Ops;
Ops.push_back(Chain);
Ops.push_back(Callee);
// Add argument registers to the end of the list so that they are known live
// into the call.
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
if (InFlag.Val)
Ops.push_back(InFlag);
Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
unsigned NumBytesForCalleeToPush = 0;
// If this is is a call to a struct-return function on Darwin/X86, the callee
// pops the hidden struct pointer, so we have to push it back.
if (CallingConv == CallingConv::CSRet && Subtarget->isTargetDarwin())
NumBytesForCalleeToPush = 4;
NodeTys.clear();
NodeTys.push_back(MVT::Other); // Returns a chain
if (RetVT != MVT::Other)
NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
Ops.clear();
Ops.push_back(Chain);
Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy()));
Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
if (RetVT != MVT::Other)
InFlag = Chain.getValue(1);
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
std::vector<SDOperand> ResultVals;
NodeTys.clear();
switch (RetVT) {
default: assert(0 && "Unknown value type to return!");
case MVT::Other: break;
case MVT::i8:
Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1);
ResultVals.push_back(Chain.getValue(0));
NodeTys.push_back(MVT::i8);
break;
case MVT::i16:
Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1);
ResultVals.push_back(Chain.getValue(0));
NodeTys.push_back(MVT::i16);
break;
case MVT::i32:
if (Op.Val->getValueType(1) == MVT::i32) {
Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
ResultVals.push_back(Chain.getValue(0));
Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32,
Chain.getValue(2)).getValue(1);
ResultVals.push_back(Chain.getValue(0));
NodeTys.push_back(MVT::i32);
} else {
Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
ResultVals.push_back(Chain.getValue(0));
NodeTys.push_back(MVT::i32);
break;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64:
case MVT::v4f32:
case MVT::v2f64:
Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1);
ResultVals.push_back(Chain.getValue(0));
NodeTys.push_back(RetVT);
break;
case MVT::f32:
case MVT::f64: {
std::vector<MVT::ValueType> Tys;
Tys.push_back(MVT::f64);
Tys.push_back(MVT::Other);
Tys.push_back(MVT::Flag);
std::vector<SDOperand> Ops;
Ops.push_back(Chain);
Ops.push_back(InFlag);
SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys,
&Ops[0], Ops.size());
Chain = RetVal.getValue(1);
InFlag = RetVal.getValue(2);
if (X86ScalarSSE) {
// FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
// shouldn't be necessary except that RFP cannot be live across
// multiple blocks. When stackifier is fixed, they can be uncoupled.
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
Tys.clear();
Ops.clear();
Ops.push_back(RetVal);
Ops.push_back(StackSlot);
Ops.push_back(DAG.getValueType(RetVT));
Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
RetVal = DAG.getLoad(RetVT, Chain, StackSlot,
DAG.getSrcValue(NULL));
Chain = RetVal.getValue(1);
if (RetVT == MVT::f32 && !X86ScalarSSE)
// FIXME: we would really like to remember that this FP_ROUND
// operation is okay to eliminate if we allow excess FP precision.
RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
ResultVals.push_back(RetVal);
NodeTys.push_back(RetVT);
break;
}
// If the function returns void, just return the chain.
if (ResultVals.empty())
return Chain;
// Otherwise, merge everything together with a MERGE_VALUES node.
NodeTys.push_back(MVT::Other);
ResultVals.push_back(Chain);
SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
&ResultVals[0], ResultVals.size());
return Res.getValue(Op.ResNo);
}
//===----------------------------------------------------------------------===//
// Fast Calling Convention implementation
//===----------------------------------------------------------------------===//
//
// The X86 'fast' calling convention passes up to two integer arguments in
// registers (an appropriate portion of EAX/EDX), passes arguments in C order,
// and requires that the callee pop its arguments off the stack (allowing proper
// tail calls), and has the same return value conventions as C calling convs.
//
// This calling convention always arranges for the callee pop value to be 8n+4
// bytes, which is needed for tail recursion elimination and stack alignment
// reasons.
//
// Note that this can be enhanced in the future to pass fp vals in registers
// (when we have a global fp allocator) and do other tricks.
//
/// HowToPassFastCCArgument - Returns how an formal argument of the specified
/// type should be passed. If it is through stack, returns the size of the stack
/// slot; if it is through integer or XMM register, returns the number of
/// integer or XMM registers are needed.
Evan Cheng
committed
static void
HowToPassFastCCArgument(MVT::ValueType ObjectVT,
unsigned NumIntRegs, unsigned NumXMMRegs,
unsigned &ObjSize, unsigned &ObjIntRegs,
unsigned &ObjXMMRegs) {
Evan Cheng
committed
ObjSize = 0;
Evan Cheng
committed
switch (ObjectVT) {
default: assert(0 && "Unhandled argument type!");
case MVT::i8:
Evan Cheng
committed
#if FASTCC_NUM_INT_ARGS_INREGS > 0
Evan Cheng
committed
if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS)
Evan Cheng
committed
else
Evan Cheng
committed
#endif
Evan Cheng
committed
ObjSize = 1;
break;
case MVT::i16:
Evan Cheng
committed
#if FASTCC_NUM_INT_ARGS_INREGS > 0
Evan Cheng
committed
if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS)
Evan Cheng
committed
else
Evan Cheng
committed
#endif
Evan Cheng
committed
ObjSize = 2;
break;
case MVT::i32:
Evan Cheng
committed
#if FASTCC_NUM_INT_ARGS_INREGS > 0
Evan Cheng
committed
if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS)
Evan Cheng
committed
else
Evan Cheng
committed
#endif
Evan Cheng
committed
ObjSize = 4;
break;
case MVT::i64:
Evan Cheng
committed
#if FASTCC_NUM_INT_ARGS_INREGS > 0
Evan Cheng
committed
if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) {
Evan Cheng
committed
} else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) {
Evan Cheng
committed
ObjSize = 4;
} else
Evan Cheng
committed
#endif
Evan Cheng
committed
ObjSize = 8;
case MVT::f32:
ObjSize = 4;
break;
case MVT::f64:
ObjSize = 8;
break;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64:
case MVT::v4f32:
case MVT::v2f64:
if (NumXMMRegs < 4)
ObjXMMRegs = 1;
else
ObjSize = 16;
break;
Evan Cheng
committed
}
}
SDOperand
X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) {
unsigned NumArgs = Op.Val->getNumValues()-1;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
SDOperand Root = Op.getOperand(0);
std::vector<SDOperand> ArgValues;
Evan Cheng
committed
// Add DAG nodes to load the arguments... On entry to a function the stack
// frame looks like this:
//
// [ESP] -- return address
// [ESP + 4] -- first nonreg argument (leftmost lexically)
// [ESP + 8] -- second nonreg argument, if 1st argument is <= 4 bytes in size
Evan Cheng
committed
// ...
unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot
// Keep track of the number of integer regs passed so far. This can be either
// 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both
// used).
unsigned NumIntRegs = 0;
unsigned NumXMMRegs = 0; // XMM regs used for parameter passing.
static const unsigned XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
};
for (unsigned i = 0; i < NumArgs; ++i) {
MVT::ValueType ObjectVT = Op.getValue(i).getValueType();
unsigned ArgIncrement = 4;
unsigned ObjSize = 0;
unsigned ObjIntRegs = 0;
unsigned ObjXMMRegs = 0;
HowToPassFastCCArgument(ObjectVT, NumIntRegs, NumXMMRegs,
ObjSize, ObjIntRegs, ObjXMMRegs);
ArgIncrement = ObjSize;
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
SDOperand ArgValue;
if (ObjIntRegs || ObjXMMRegs) {
switch (ObjectVT) {
default: assert(0 && "Unhandled argument type!");
case MVT::i8:
Reg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL,
X86::GR8RegisterClass);
ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i8);
break;
case MVT::i16:
Reg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX,
X86::GR16RegisterClass);
ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i16);
break;
case MVT::i32:
Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX,
X86::GR32RegisterClass);
ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32);
break;
case MVT::i64:
Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX,
X86::GR32RegisterClass);
ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32);
if (ObjIntRegs == 2) {
Reg = AddLiveIn(MF, X86::EDX, X86::GR32RegisterClass);
SDOperand ArgValue2 = DAG.getCopyFromReg(Root, Reg, MVT::i32);
ArgValue= DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2);
Evan Cheng
committed
}
break;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64:
case MVT::v4f32:
case MVT::v2f64:
Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], X86::VR128RegisterClass);
ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
break;
NumIntRegs += ObjIntRegs;
NumXMMRegs += ObjXMMRegs;
}
if (ObjSize) {
// XMM arguments have to be aligned on 16-byte boundary.
if (ObjSize == 16)
ArgOffset = ((ArgOffset + 15) / 16) * 16;
// Create the SelectionDAG nodes corresponding to a load from this
// parameter.
int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
if (ObjectVT == MVT::i64 && ObjIntRegs) {
SDOperand ArgValue2 = DAG.getLoad(Op.Val->getValueType(i), Root, FIN,
DAG.getSrcValue(NULL));
ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2);
} else
ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN,
DAG.getSrcValue(NULL));
ArgOffset += ArgIncrement; // Move on to the next argument.
Evan Cheng
committed
}
ArgValues.push_back(ArgValue);
ArgValues.push_back(Root);
// Make sure the instruction takes 8n+4 bytes to make sure the start of the
// arguments and the arguments after the retaddr has been pushed are aligned.
if ((ArgOffset & 7) == 0)
ArgOffset += 4;
VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs.
ReturnAddrIndex = 0; // No return address slot generated yet.
BytesToPopOnReturn = ArgOffset; // Callee pops all stack arguments.
BytesCallerReserves = 0;
// Finally, inform the code generator which regs we return values in.
switch (getValueType(MF.getFunction()->getReturnType())) {
default: assert(0 && "Unknown type!");
case MVT::isVoid: break;
case MVT::i8:
case MVT::i16:
case MVT::i32:
MF.addLiveOut(X86::EAX);
break;
case MVT::i64:
MF.addLiveOut(X86::EAX);
MF.addLiveOut(X86::EDX);
break;
case MVT::f32:
case MVT::f64:
MF.addLiveOut(X86::ST0);
break;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64:
case MVT::v4f32:
case MVT::v2f64:
MF.addLiveOut(X86::XMM0);
break;
}
Chris Lattner
committed
// Return the new list of results.
std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(),
Op.Val->value_end());
return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size());
}
SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG){
SDOperand Chain = Op.getOperand(0);
unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
SDOperand Callee = Op.getOperand(4);
MVT::ValueType RetVT= Op.Val->getValueType(0);
unsigned NumOps = (Op.getNumOperands() - 5) / 2;
// Count how many bytes are to be pushed on the stack.
unsigned NumBytes = 0;