"llvm/git@repo.hca.bsc.es:rferrer/llvm-epi-0.8.git" did not exist on "97c176b639fa48d8970f332b5b3ac886d25d7f5e"
Newer
Older
//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by Chris Lattner and is distributed under
// the University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that X86 uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//
#include "X86.h"
#include "X86ISelLowering.h"
#include "X86MachineFunctionInfo.h"
#include "X86TargetMachine.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/ADT/VectorExtras.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SSARegMap.h"
#include "llvm/Target/TargetOptions.h"
Evan Cheng
committed
#include "llvm/Support/CommandLine.h"
using namespace llvm;
// FIXME: temporary.
static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden,
cl::desc("Enable fastcc on X86"));
X86TargetLowering::X86TargetLowering(TargetMachine &TM)
: TargetLowering(TM) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
X86ScalarSSE = Subtarget->hasSSE2();
X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
// Set up the TargetLowering object.
// X86 is weird, it always uses i8 for shift amounts and setcc results.
setShiftAmountType(MVT::i8);
setSetCCResultType(MVT::i8);
setSetCCResultContents(ZeroOrOneSetCCResult);
setSchedulingPreference(SchedulingForRegPressure);
setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0
setStackPointerRegisterToSaveRestore(X86StackPtr);
if (Subtarget->isTargetDarwin()) {
// Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
setUseUnderscoreSetJmp(false);
setUseUnderscoreLongJmp(false);
// MS runtime is weird: it exports _setjmp, but longjmp!
setUseUnderscoreSetJmp(true);
setUseUnderscoreLongJmp(false);
} else {
setUseUnderscoreSetJmp(true);
setUseUnderscoreLongJmp(true);
}
// Add legal addressing mode scale values.
addLegalAddressScale(8);
addLegalAddressScale(4);
addLegalAddressScale(2);
// Enter the ones which require both scale + index last. These are more
// expensive.
addLegalAddressScale(9);
addLegalAddressScale(5);
addLegalAddressScale(3);
// Set up the register classes.
Evan Cheng
committed
addRegisterClass(MVT::i8, X86::GR8RegisterClass);
addRegisterClass(MVT::i16, X86::GR16RegisterClass);
addRegisterClass(MVT::i32, X86::GR32RegisterClass);
if (Subtarget->is64Bit())
addRegisterClass(MVT::i64, X86::GR64RegisterClass);
setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand);
// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
// operation.
setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand);
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
} else {
if (X86ScalarSSE)
// If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP.
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand);
else
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
}
// Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
// this operation.
setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
// SSE has no i16 to fp conversion, only i32
if (X86ScalarSSE)
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
else {
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
}
if (!Subtarget->is64Bit()) {
// Custom lower SINT_TO_FP and FP_TO_SINT from/to i64 in 32-bit mode.
setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
}
// Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
// this operation.
setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
if (X86ScalarSSE) {
setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
} else {
setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
}
// Handle FP_TO_UINT by promoting the destination to a larger signed
// conversion.
setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
} else {
if (X86ScalarSSE && !Subtarget->hasSSE3())
// Expand FP_TO_UINT into a select.
// FIXME: We would like to use a Custom expander here eventually to do
// the optimal thing for SSE vs. the default expansion in the legalizer.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
else
// With SSE3 we can use fisttpll to convert to a signed i64.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
}
Chris Lattner
committed
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
if (!X86ScalarSSE) {
setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand);
setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand);
}
setOperationAction(ISD::BR_JT , MVT::Other, Expand);
setOperationAction(ISD::BR_CC , MVT::Other, Expand);
setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
setOperationAction(ISD::FREM , MVT::f64 , Expand);
setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
setOperationAction(ISD::CTTZ , MVT::i8 , Expand);
setOperationAction(ISD::CTLZ , MVT::i8 , Expand);
setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
setOperationAction(ISD::CTTZ , MVT::i16 , Expand);
setOperationAction(ISD::CTLZ , MVT::i16 , Expand);
setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
setOperationAction(ISD::CTLZ , MVT::i32 , Expand);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
setOperationAction(ISD::CTLZ , MVT::i64 , Expand);
}
setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
// These should be promoted to a larger select which is supported.
setOperationAction(ISD::SELECT , MVT::i1 , Promote);
setOperationAction(ISD::SELECT , MVT::i8 , Promote);
setOperationAction(ISD::SELECT , MVT::i16 , Custom);
setOperationAction(ISD::SELECT , MVT::i32 , Custom);
setOperationAction(ISD::SELECT , MVT::f32 , Custom);
setOperationAction(ISD::SELECT , MVT::f64 , Custom);
setOperationAction(ISD::SETCC , MVT::i8 , Custom);
setOperationAction(ISD::SETCC , MVT::i16 , Custom);
setOperationAction(ISD::SETCC , MVT::i32 , Custom);
setOperationAction(ISD::SETCC , MVT::f32 , Custom);
setOperationAction(ISD::SETCC , MVT::f64 , Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::SELECT , MVT::i64 , Custom);
setOperationAction(ISD::SETCC , MVT::i64 , Custom);
}
setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom);
}
// 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
// X86 wants to expand memset / memcpy itself.
setOperationAction(ISD::MEMSET , MVT::Other, Custom);
setOperationAction(ISD::MEMCPY , MVT::Other, Custom);
// We don't have line number support yet.
setOperationAction(ISD::LOCATION, MVT::Other, Expand);
setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
if (!Subtarget->isTargetDarwin() &&
!Subtarget->isTargetELF() &&
setOperationAction(ISD::LABEL, MVT::Other, Expand);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
// Use the default implementation.
setOperationAction(ISD::VAARG , MVT::Other, Expand);
setOperationAction(ISD::VACOPY , MVT::Other, Expand);
setOperationAction(ISD::VAEND , MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
if (X86ScalarSSE) {
// Set up the FP register classes.
addRegisterClass(MVT::f32, X86::FR32RegisterClass);
addRegisterClass(MVT::f64, X86::FR64RegisterClass);
// Use ANDPD to simulate FABS.
setOperationAction(ISD::FABS , MVT::f64, Custom);
setOperationAction(ISD::FABS , MVT::f32, Custom);
// Use XORP to simulate FNEG.
setOperationAction(ISD::FNEG , MVT::f64, Custom);
setOperationAction(ISD::FNEG , MVT::f32, Custom);
// Use ANDPD and ORPD to simulate FCOPYSIGN.
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FREM , MVT::f64, Expand);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
// Expand FP immediates into loads from the stack, except for the special
// cases we handle.
setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
addLegalFPImmediate(+0.0); // xorps / xorpd
} else {
// Set up the FP register classes.
addRegisterClass(MVT::f64, X86::RFPRegisterClass);
setOperationAction(ISD::UNDEF, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
if (!UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f64 , Expand);
setOperationAction(ISD::FCOS , MVT::f64 , Expand);
}
setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
addLegalFPImmediate(+0.0); // FLD0
addLegalFPImmediate(+1.0); // FLD1
addLegalFPImmediate(-0.0); // FLD0/FCHS
addLegalFPImmediate(-1.0); // FLD1/FCHS
}
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
for (unsigned VT = (unsigned)MVT::Vector + 1;
VT != (unsigned)MVT::LAST_VALUETYPE; VT++) {
setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand);
setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand);
setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand);
setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand);
setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand);
setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
addRegisterClass(MVT::v8i8, X86::VR64RegisterClass);
addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand);
}
addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
setOperationAction(ISD::FADD, MVT::v4f32, Legal);
setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
}
addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
setOperationAction(ISD::ADD, MVT::v16i8, Legal);
setOperationAction(ISD::ADD, MVT::v8i16, Legal);
setOperationAction(ISD::ADD, MVT::v4i32, Legal);
setOperationAction(ISD::SUB, MVT::v16i8, Legal);
setOperationAction(ISD::SUB, MVT::v8i16, Legal);
setOperationAction(ISD::SUB, MVT::v4i32, Legal);
setOperationAction(ISD::MUL, MVT::v8i16, Legal);
setOperationAction(ISD::FADD, MVT::v2f64, Legal);
setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
// Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones.
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
}
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote);
AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64);
setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote);
AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64);
setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote);
AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64);
setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote);
AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64);
setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
// Custom lower v2i64 and v2f64 selects.
setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
}
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::SELECT);
computeRegisterProperties();
// FIXME: These should be based on subtarget info. Plus, the values should
// be smaller when we are in optimizing for size mode.
maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
allowUnalignedMemoryAccesses = true; // x86 supports it!
}
//===----------------------------------------------------------------------===//
// C & StdCall Calling Convention implementation
//===----------------------------------------------------------------------===//
// StdCall calling convention seems to be standard for many Windows' API
// routines and around. It differs from C calling convention just a little:
// callee should clean up the stack, not caller. Symbols should be also
// decorated in some fancy way :) It doesn't support any vector arguments.
/// AddLiveIn - This helper function adds the specified physical register to the
/// MachineFunction as a live in value. It also creates a corresponding virtual
/// register for it.
static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
const TargetRegisterClass *RC) {
assert(RC->contains(PReg) && "Not the correct regclass!");
unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
MF.addLiveIn(PReg, VReg);
return VReg;
}
/// HowToPassArgument - Returns how an formal argument of the specified type
/// should be passed. If it is through stack, returns the size of the stack
/// slot; if it is through integer or XMM register, returns the number of
/// integer or XMM registers are needed.
static void
HowToPassCallArgument(MVT::ValueType ObjectVT,
bool ArgInReg,
unsigned NumIntRegs, unsigned NumXMMRegs,
unsigned MaxNumIntRegs,
unsigned &ObjSize, unsigned &ObjIntRegs,
unsigned &ObjXMMRegs,
bool AllowVectors = true) {
ObjSize = 0;
ObjIntRegs = 0;
if (MaxNumIntRegs>3) {
// We don't have too much registers on ia32! :)
MaxNumIntRegs = 3;
}
Evan Cheng
committed
switch (ObjectVT) {
default: assert(0 && "Unhandled argument type!");
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
case MVT::i8:
if (ArgInReg && (NumIntRegs < MaxNumIntRegs))
ObjIntRegs = 1;
else
ObjSize = 1;
break;
case MVT::i16:
if (ArgInReg && (NumIntRegs < MaxNumIntRegs))
ObjIntRegs = 1;
else
ObjSize = 2;
break;
case MVT::i32:
if (ArgInReg && (NumIntRegs < MaxNumIntRegs))
ObjIntRegs = 1;
else
ObjSize = 4;
break;
case MVT::i64:
if (ArgInReg && (NumIntRegs+2 <= MaxNumIntRegs)) {
ObjIntRegs = 2;
} else if (ArgInReg && (NumIntRegs+1 <= MaxNumIntRegs)) {
ObjIntRegs = 1;
ObjSize = 4;
} else
ObjSize = 8;
case MVT::f32:
ObjSize = 4;
break;
case MVT::f64:
ObjSize = 8;
break;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64:
case MVT::v4f32:
case MVT::v2f64:
if (AllowVectors) {
if (NumXMMRegs < 4)
ObjXMMRegs = 1;
else
ObjSize = 16;
break;
} else
assert(0 && "Unhandled argument type [vector]!");
Evan Cheng
committed
}
}
SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG,
bool isStdCall) {
unsigned NumArgs = Op.Val->getNumValues() - 1;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
SDOperand Root = Op.getOperand(0);
SmallVector<SDOperand, 8> ArgValues;
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
Evan Cheng
committed
// Add DAG nodes to load the arguments... On entry to a function on the X86,
// the stack frame looks like this:
//
// [ESP] -- return address
// [ESP + 4] -- first argument (leftmost lexically)
// [ESP + 8] -- second argument, if first argument is <= 4 bytes in size
Evan Cheng
committed
// ...
//
unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot
unsigned NumSRetBytes= 0; // How much bytes on stack used for struct return
unsigned NumXMMRegs = 0; // XMM regs used for parameter passing.
unsigned NumIntRegs = 0; // Integer regs used for parameter passing
static const unsigned XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
};
static const unsigned GPRArgRegs[][3] = {
{ X86::AL, X86::DL, X86::CL },
{ X86::AX, X86::DX, X86::CX },
{ X86::EAX, X86::EDX, X86::ECX }
};
static const TargetRegisterClass* GPRClasses[3] = {
X86::GR8RegisterClass, X86::GR16RegisterClass, X86::GR32RegisterClass
};
// Handle regparm attribute
SmallVector<bool, 8> ArgInRegs(NumArgs, false);
SmallVector<bool, 8> SRetArgs(NumArgs, false);
if (!isVarArg) {
for (unsigned i = 0; i<NumArgs; ++i) {
unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3+i))->getValue();
ArgInRegs[i] = (Flags >> 1) & 1;
SRetArgs[i] = (Flags >> 2) & 1;
}
}
for (unsigned i = 0; i < NumArgs; ++i) {
MVT::ValueType ObjectVT = Op.getValue(i).getValueType();
unsigned ArgIncrement = 4;
unsigned ObjSize = 0;
unsigned ObjXMMRegs = 0;
unsigned ObjIntRegs = 0;
unsigned Reg = 0;
SDOperand ArgValue;
HowToPassCallArgument(ObjectVT,
ArgInRegs[i],
NumIntRegs, NumXMMRegs, 3,
ObjSize, ObjIntRegs, ObjXMMRegs,
!isStdCall);
ArgIncrement = ObjSize;
if (ObjIntRegs || ObjXMMRegs) {
switch (ObjectVT) {
default: assert(0 && "Unhandled argument type!");
case MVT::i8:
case MVT::i16:
case MVT::i32: {
unsigned RegToUse = GPRArgRegs[ObjectVT-MVT::i8][NumIntRegs];
Reg = AddLiveIn(MF, RegToUse, GPRClasses[ObjectVT-MVT::i8]);
ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
break;
}
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64:
case MVT::v4f32:
case MVT::v2f64:
assert(!isStdCall && "Unhandled argument type!");
Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], X86::VR128RegisterClass);
ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
break;
}
NumIntRegs += ObjIntRegs;
NumXMMRegs += ObjXMMRegs;
}
if (ObjSize) {
// XMM arguments have to be aligned on 16-byte boundary.
if (ObjSize == 16)
ArgOffset = ((ArgOffset + 15) / 16) * 16;
// Create the SelectionDAG nodes corresponding to a load from this
// parameter.
int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, NULL, 0);
ArgOffset += ArgIncrement; // Move on to the next argument.
if (SRetArgs[i])
NumSRetBytes += ArgIncrement;
ArgValues.push_back(ArgValue);
ArgValues.push_back(Root);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
if (isStdCall && !isVarArg) {
BytesToPopOnReturn = ArgOffset; // Callee pops everything..
BytesCallerReserves = 0;
} else {
BytesToPopOnReturn = NumSRetBytes; // Callee pops hidden struct pointer.
BytesCallerReserves = ArgOffset;
}
RegSaveFrameIndex = 0xAAAAAAA; // X86-64 only.
ReturnAddrIndex = 0; // No return address slot generated yet.
MF.getInfo<X86FunctionInfo>()->setBytesToPopOnReturn(BytesToPopOnReturn);
// Return the new list of results.
return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
&ArgValues[0], ArgValues.size());
}
SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG,
bool isStdCall) {
SDOperand Chain = Op.getOperand(0);
bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
SDOperand Callee = Op.getOperand(4);
MVT::ValueType RetVT= Op.Val->getValueType(0);
unsigned NumOps = (Op.getNumOperands() - 5) / 2;
static const unsigned XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
};
static const unsigned GPR32ArgRegs[] = {
X86::EAX, X86::EDX, X86::ECX
};
// Count how many bytes are to be pushed on the stack.
unsigned NumBytes = 0;
// Keep track of the number of integer regs passed so far.
unsigned NumIntRegs = 0;
// Keep track of the number of XMM regs passed so far.
unsigned NumXMMRegs = 0;
// How much bytes on stack used for struct return
unsigned NumSRetBytes= 0;
// Handle regparm attribute
SmallVector<bool, 8> ArgInRegs(NumOps, false);
SmallVector<bool, 8> SRetArgs(NumOps, false);
for (unsigned i = 0; i<NumOps; ++i) {
unsigned Flags =
dyn_cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue();
ArgInRegs[i] = (Flags >> 1) & 1;
SRetArgs[i] = (Flags >> 2) & 1;
}
// Calculate stack frame size
for (unsigned i = 0; i != NumOps; ++i) {
SDOperand Arg = Op.getOperand(5+2*i);
unsigned ArgIncrement = 4;
unsigned ObjSize = 0;
unsigned ObjIntRegs = 0;
unsigned ObjXMMRegs = 0;
HowToPassCallArgument(Arg.getValueType(),
ArgInRegs[i],
NumIntRegs, NumXMMRegs, 3,
ObjSize, ObjIntRegs, ObjXMMRegs,
!isStdCall);
if (ObjSize > 4)
ArgIncrement = ObjSize;
NumIntRegs += ObjIntRegs;
NumXMMRegs += ObjXMMRegs;
if (ObjSize) {
// XMM arguments have to be aligned on 16-byte boundary.
if (ObjSize == 16)
NumBytes = ((NumBytes + 15) / 16) * 16;
NumBytes += ArgIncrement;
}
}
Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
// Arguments go on the stack in reverse order, as specified by the ABI.
unsigned ArgOffset = 0;
NumXMMRegs = 0;
NumIntRegs = 0;
SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
SmallVector<SDOperand, 8> MemOpChains;
SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy());
for (unsigned i = 0; i != NumOps; ++i) {
SDOperand Arg = Op.getOperand(5+2*i);
unsigned ArgIncrement = 4;
unsigned ObjSize = 0;
unsigned ObjIntRegs = 0;
unsigned ObjXMMRegs = 0;
HowToPassCallArgument(Arg.getValueType(),
ArgInRegs[i],
NumIntRegs, NumXMMRegs, 3,
ObjSize, ObjIntRegs, ObjXMMRegs,
!isStdCall);
if (ObjSize > 4)
ArgIncrement = ObjSize;
if (Arg.getValueType() == MVT::i8 || Arg.getValueType() == MVT::i16) {
// Promote the integer to 32 bits. If the input type is signed use a
// sign extend, otherwise use a zero extend.
unsigned Flags = cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue();
unsigned ExtOp = (Flags & 1) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
Arg = DAG.getNode(ExtOp, MVT::i32, Arg);
if (ObjIntRegs || ObjXMMRegs) {
switch (Arg.getValueType()) {
default: assert(0 && "Unhandled argument type!");
case MVT::i32:
RegsToPass.push_back(std::make_pair(GPR32ArgRegs[NumIntRegs], Arg));
break;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64:
case MVT::v4f32:
case MVT::v2f64:
assert(!isStdCall && "Unhandled argument type!");
RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg));
break;
}
NumIntRegs += ObjIntRegs;
NumXMMRegs += ObjXMMRegs;
}
if (ObjSize) {
// XMM arguments have to be aligned on 16-byte boundary.
if (ObjSize == 16)
ArgOffset = ((ArgOffset + 15) / 16) * 16;
SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
ArgOffset += ArgIncrement; // Move on to the next argument.
if (SRetArgs[i])
NumSRetBytes += ArgIncrement;
}
}
// Sanity check: we haven't seen NumSRetBytes > 4
assert((NumSRetBytes<=4) &&
"Too much space for struct-return pointer requested");
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
&MemOpChains[0], MemOpChains.size());
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into registers.
SDOperand InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
InFlag);
InFlag = Chain.getValue(1);
}
Evan Cheng
committed
// ELF / PIC requires GOT in the EBX register before function calls via PLT
// GOT pointer.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT()) {
Chain = DAG.getCopyToReg(Chain, X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
InFlag);
InFlag = Chain.getValue(1);
}
// If the callee is a GlobalAddress node (quite common, every direct call is)
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
Anton Korobeynikov
committed
// We should use extra load for direct calls to dllimported functions in
// non-JIT mode.
if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
getTargetMachine(), true))
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
// Returns a chain & a flag for retval copy to use.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
SmallVector<SDOperand, 8> Ops;
Ops.push_back(Chain);
Ops.push_back(Callee);
// Add argument registers to the end of the list so that they are known live
// into the call.
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
Evan Cheng
committed
// Add an implicit use GOT pointer in EBX.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT())
Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
if (InFlag.Val)
Ops.push_back(InFlag);
Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
unsigned NumBytesForCalleeToPush = 0;
if (isStdCall) {
if (isVarArg) {
NumBytesForCalleeToPush = NumSRetBytes;
} else {
NumBytesForCalleeToPush = NumBytes;
}
} else {
// If this is is a call to a struct-return function, the callee
// pops the hidden struct pointer, so we have to push it back.
// This is common for Darwin/X86, Linux & Mingw32 targets.
NumBytesForCalleeToPush = NumSRetBytes;
}
if (RetVT != MVT::Other)
NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
else
NodeTys = DAG.getVTList(MVT::Other);
Ops.clear();
Ops.push_back(Chain);
Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy()));
Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
if (RetVT != MVT::Other)
InFlag = Chain.getValue(1);
SmallVector<SDOperand, 8> ResultVals;
switch (RetVT) {
default: assert(0 && "Unknown value type to return!");
case MVT::Other: break;
case MVT::i8:
Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1);
ResultVals.push_back(Chain.getValue(0));
NodeTys = DAG.getVTList(MVT::i8, MVT::Other);
break;
case MVT::i16:
Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1);
ResultVals.push_back(Chain.getValue(0));
NodeTys = DAG.getVTList(MVT::i16, MVT::Other);
break;
case MVT::i32:
if (Op.Val->getValueType(1) == MVT::i32) {
Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
ResultVals.push_back(Chain.getValue(0));
Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32,
Chain.getValue(2)).getValue(1);
ResultVals.push_back(Chain.getValue(0));
NodeTys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
} else {
Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
ResultVals.push_back(Chain.getValue(0));
NodeTys = DAG.getVTList(MVT::i32, MVT::Other);
break;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64:
case MVT::v4f32:
case MVT::v2f64:
assert(!isStdCall && "Unknown value type to return!");
Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1);
ResultVals.push_back(Chain.getValue(0));
NodeTys = DAG.getVTList(RetVT, MVT::Other);
break;
case MVT::f32:
case MVT::f64: {
SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
SDOperand GROps[] = { Chain, InFlag };
SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2);
Chain = RetVal.getValue(1);
InFlag = RetVal.getValue(2);
if (X86ScalarSSE) {
// FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
// shouldn't be necessary except that RFP cannot be live across
// multiple blocks. When stackifier is fixed, they can be uncoupled.
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
Tys = DAG.getVTList(MVT::Other);
SDOperand Ops[] = {
Chain, RetVal, StackSlot, DAG.getValueType(RetVT), InFlag
};
Chain = DAG.getNode(X86ISD::FST, Tys, Ops, 5);
RetVal = DAG.getLoad(RetVT, Chain, StackSlot, NULL, 0);
Chain = RetVal.getValue(1);
if (RetVT == MVT::f32 && !X86ScalarSSE)
// FIXME: we would really like to remember that this FP_ROUND
// operation is okay to eliminate if we allow excess FP precision.
RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
ResultVals.push_back(RetVal);
NodeTys = DAG.getVTList(RetVT, MVT::Other);
break;
}
// If the function returns void, just return the chain.
if (ResultVals.empty())
return Chain;
// Otherwise, merge everything together with a MERGE_VALUES node.
ResultVals.push_back(Chain);
SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
&ResultVals[0], ResultVals.size());
return Res.getValue(Op.ResNo);
}
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//===----------------------------------------------------------------------===//
// X86-64 C Calling Convention implementation
//===----------------------------------------------------------------------===//
/// HowToPassX86_64CCCArgument - Returns how an formal argument of the specified
/// type should be passed. If it is through stack, returns the size of the stack
/// slot; if it is through integer or XMM register, returns the number of
/// integer or XMM registers are needed.
static void
HowToPassX86_64CCCArgument(MVT::ValueType ObjectVT,
unsigned NumIntRegs, unsigned NumXMMRegs,
unsigned &ObjSize, unsigned &ObjIntRegs,
unsigned &ObjXMMRegs) {
ObjSize = 0;
ObjIntRegs = 0;
ObjXMMRegs = 0;
switch (ObjectVT) {
default: assert(0 && "Unhandled argument type!");
case MVT::i8:
case MVT::i16:
case MVT::i32:
case MVT::i64:
if (NumIntRegs < 6)
ObjIntRegs = 1;
else {
switch (ObjectVT) {
default: break;
case MVT::i8: ObjSize = 1; break;
case MVT::i16: ObjSize = 2; break;
case MVT::i32: ObjSize = 4; break;
case MVT::i64: ObjSize = 8; break;
}
}
break;
case MVT::f32:
case MVT::f64:
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64:
case MVT::v4f32:
case MVT::v2f64:
if (NumXMMRegs < 8)
ObjXMMRegs = 1;
else {
switch (ObjectVT) {