Newer
Older
Evan Cheng
committed
SDOperand Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
if (Res.Val)
return Res;
}
MVT VT = Op.getValueType();
// TODO: handle v16i8.
if (VT.getSizeInBits() == 16) {
SDOperand Vec = Op.getOperand(0);
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return DAG.getNode(ISD::TRUNCATE, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32,
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Vec),
Op.getOperand(1)));
// Transform it so it match pextrw which produces a 32-bit result.
MVT EVT = (MVT::SimpleValueType)(VT.getSimpleVT()+1);
SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
Op.getOperand(0), Op.getOperand(1));
SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
} else if (VT.getSizeInBits() == 32) {
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return Op;
// SHUFPS the element to the lowest double word, then movss.
MVT MaskVT = MVT::getIntVectorWithNumElements(4);
SmallVector<SDOperand, 8> IdxVec;
IdxVec.
push_back(DAG.getConstant(Idx, MaskVT.getVectorElementType()));
IdxVec.
push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType()));
IdxVec.
push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType()));
IdxVec.
push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType()));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
SDOperand Vec = Op.getOperand(0);
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
DAG.getIntPtrConstant(0));
} else if (VT.getSizeInBits() == 64) {
// FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b
// FIXME: seems like this should be unnecessary if mov{h,l}pd were taught
// to match extract_elt for f64.
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return Op;
// UNPCKHPD the element to the lowest double word, then movsd.
// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
// to a f64mem, the whole operation is folded into a single MOVHPDmr.
MVT MaskVT = MVT::getIntVectorWithNumElements(4);
SmallVector<SDOperand, 8> IdxVec;
IdxVec.push_back(DAG.getConstant(1, MaskVT.getVectorElementType()));
IdxVec.
push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType()));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
SDOperand Vec = Op.getOperand(0);
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
DAG.getIntPtrConstant(0));
}
return SDOperand();
}
SDOperand
X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDOperand Op, SelectionDAG &DAG){
MVT VT = Op.getValueType();
MVT EVT = VT.getVectorElementType();
SDOperand N0 = Op.getOperand(0);
SDOperand N1 = Op.getOperand(1);
SDOperand N2 = Op.getOperand(2);
if ((EVT.getSizeInBits() == 8) || (EVT.getSizeInBits() == 16)) {
unsigned Opc = (EVT.getSizeInBits() == 8) ? X86ISD::PINSRB
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
: X86ISD::PINSRW;
// Transform it so it match pinsr{b,w} which expects a GR32 as its second
// argument.
if (N1.getValueType() != MVT::i32)
N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getValue());
return DAG.getNode(Opc, VT, N0, N1, N2);
} else if (EVT == MVT::f32) {
// Bits [7:6] of the constant are the source select. This will always be
// zero here. The DAG Combiner may combine an extract_elt index into these
// bits. For example (insert (extract, 3), 2) could be matched by putting
// the '3' into bits [7:6] of X86ISD::INSERTPS.
// Bits [5:4] of the constant are the destination select. This is the
// value of the incoming immediate.
// Bits [3:0] of the constant are the zero mask. The DAG Combiner may
// combine either bitwise AND or insert of float 0.0 to set these bits.
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getValue() << 4);
return DAG.getNode(X86ISD::INSERTPS, VT, N0, N1, N2);
}
return SDOperand();
}
SDOperand
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
MVT VT = Op.getValueType();
MVT EVT = VT.getVectorElementType();
if (Subtarget->hasSSE41())
return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
if (EVT == MVT::i8)
return SDOperand();
SDOperand N0 = Op.getOperand(0);
SDOperand N1 = Op.getOperand(1);
SDOperand N2 = Op.getOperand(2);
if (EVT.getSizeInBits() == 16) {
// Transform it so it match pinsrw which expects a 16-bit value in a GR32
// as its second argument.
if (N1.getValueType() != MVT::i32)
N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getValue());
return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
}
return SDOperand();
}
SDOperand
X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
MVT VT = MVT::v2i32;
switch (Op.getValueType().getSimpleVT()) {
default: break;
case MVT::v16i8:
case MVT::v8i16:
VT = MVT::v4i32;
break;
}
return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, AnyExt));
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
// one of the above mentioned nodes. It has to be wrapped because otherwise
// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
// be used to form addressing mode. These wrapped nodes will be selected
// into MOV32ri.
SDOperand
X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(),
getPointerTy(),
CP->getAlignment());
Evan Cheng
committed
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->isPICStyleRIPRel()) {
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
}
return Result;
}
SDOperand
X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy());
Evan Cheng
committed
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->isPICStyleRIPRel()) {
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
Anton Korobeynikov
committed
// For Darwin & Mingw32, external and weak symbols are indirect, so we want to
// load the value at address GV, not the value of GV itself. This means that
// the GlobalAddress must be in the base or index register of the address, not
// the GV offset field. Platform check is inside GVRequiresExtraLoad() call
// The same applies for external symbols during PIC codegen
Anton Korobeynikov
committed
if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false))
Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result,
PseudoSourceValue::getGOT(), 0);
return Result;
}
Anton Korobeynikov
committed
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
Lauro Ramos Venancio
committed
static SDOperand
Anton Korobeynikov
committed
LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
Lauro Ramos Venancio
committed
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
SDOperand InFlag;
SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg,
PtrVT), InFlag);
InFlag = Chain.getValue(1);
// emit leal symbol@TLSGD(,%ebx,1), %eax
SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag);
SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
GA->getValueType(0),
GA->getOffset());
SDOperand Ops[] = { Chain, TGA, InFlag };
SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3);
InFlag = Result.getValue(2);
Chain = Result.getValue(1);
// call ___tls_get_addr. This function receives its argument in
// the register EAX.
Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag);
InFlag = Chain.getValue(1);
NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
SDOperand Ops1[] = { Chain,
DAG.getTargetExternalSymbol("___tls_get_addr",
PtrVT),
DAG.getRegister(X86::EAX, PtrVT),
DAG.getRegister(X86::EBX, PtrVT),
InFlag };
Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5);
InFlag = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag);
}
Anton Korobeynikov
committed
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit
static SDOperand
LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
Anton Korobeynikov
committed
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
SDOperand InFlag, Chain;
// emit leaq symbol@TLSGD(%rip), %rdi
SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag);
SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
GA->getValueType(0),
GA->getOffset());
SDOperand Ops[] = { DAG.getEntryNode(), TGA};
SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 2);
Chain = Result.getValue(1);
InFlag = Result.getValue(2);
// call ___tls_get_addr. This function receives its argument in
// the register RDI.
Chain = DAG.getCopyToReg(Chain, X86::RDI, Result, InFlag);
InFlag = Chain.getValue(1);
NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
SDOperand Ops1[] = { Chain,
DAG.getTargetExternalSymbol("___tls_get_addr",
PtrVT),
DAG.getRegister(X86::RDI, PtrVT),
InFlag };
Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 4);
InFlag = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, X86::RAX, PtrVT, InFlag);
}
Lauro Ramos Venancio
committed
// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or
// "local exec" model.
static SDOperand LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const MVT PtrVT) {
Lauro Ramos Venancio
committed
// Get the Thread Pointer
SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT);
// emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
// exec)
SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
GA->getValueType(0),
GA->getOffset());
SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA);
Lauro Ramos Venancio
committed
if (GA->getGlobal()->isDeclaration()) // initial exec TLS model
Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset,
PseudoSourceValue::getGOT(), 0);
Lauro Ramos Venancio
committed
Lauro Ramos Venancio
committed
// The address of the thread local variable is the add of the thread
// pointer with the offset of the variable.
return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset);
}
SDOperand
X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) {
// TODO: implement the "local dynamic" model
// TODO: implement the "initial exec"model for pic executables
Anton Korobeynikov
committed
assert(Subtarget->isTargetELF() &&
"TLS not implemented for non-ELF targets");
Lauro Ramos Venancio
committed
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
// If the relocation model is PIC, use the "General Dynamic" TLS Model,
// otherwise use the "Local Exec"TLS Model
Anton Korobeynikov
committed
if (Subtarget->is64Bit()) {
return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
} else {
if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
else
return LowerToTLSExecModel(GA, DAG, getPointerTy());
}
Lauro Ramos Venancio
committed
}
SDOperand
X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) {
const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
Evan Cheng
committed
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->isPICStyleRIPRel()) {
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
}
return Result;
}
SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->isPICStyleRIPRel()) {
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
}
return Result;
}
/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and
/// take a 2 x i32 value to shift plus a shift amount.
SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
MVT VT = Op.getValueType();
unsigned VTBits = VT.getSizeInBits();
bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
SDOperand ShOpLo = Op.getOperand(0);
SDOperand ShOpHi = Op.getOperand(1);
SDOperand ShAmt = Op.getOperand(2);
SDOperand Tmp1 = isSRA ?
DAG.getNode(ISD::SRA, VT, ShOpHi, DAG.getConstant(VTBits - 1, MVT::i8)) :
DAG.getConstant(0, VT);
SDOperand Tmp2, Tmp3;
if (Op.getOpcode() == ISD::SHL_PARTS) {
Tmp2 = DAG.getNode(X86ISD::SHLD, VT, ShOpHi, ShOpLo, ShAmt);
Tmp3 = DAG.getNode(ISD::SHL, VT, ShOpLo, ShAmt);
Tmp2 = DAG.getNode(X86ISD::SHRD, VT, ShOpLo, ShOpHi, ShAmt);
Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, VT, ShOpHi, ShAmt);
}
SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
DAG.getConstant(VTBits, MVT::i8));
SDOperand Cond = DAG.getNode(X86ISD::CMP, VT,
AndNode, DAG.getConstant(0, MVT::i8));
SDOperand Hi, Lo;
SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8);
SDOperand Ops0[4] = { Tmp2, Tmp3, CC, Cond };
SDOperand Ops1[4] = { Tmp3, Tmp1, CC, Cond };
if (Op.getOpcode() == ISD::SHL_PARTS) {
Hi = DAG.getNode(X86ISD::CMOV, VT, Ops0, 4);
Lo = DAG.getNode(X86ISD::CMOV, VT, Ops1, 4);
Lo = DAG.getNode(X86ISD::CMOV, VT, Ops0, 4);
Hi = DAG.getNode(X86ISD::CMOV, VT, Ops1, 4);
SDOperand Ops[2] = { Lo, Hi };
return DAG.getMergeValues(DAG.getVTList(VT, VT), Ops, 2);
}
SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
MVT SrcVT = Op.getOperand(0).getValueType();
assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 &&
"Unknown SINT_TO_FP to lower!");
// These are really Legal; caller falls through into that case.
if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType()))
return SDOperand();
if (SrcVT == MVT::i64 && Op.getValueType() != MVT::f80 &&
Subtarget->is64Bit())
return SDOperand();
unsigned Size = SrcVT.getSizeInBits()/8;
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0),
PseudoSourceValue::getFixedStack(),
// Build the FILD
SDVTList Tys;
bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
else
Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
SmallVector<SDOperand, 8> Ops;
Ops.push_back(Chain);
Ops.push_back(StackSlot);
Ops.push_back(DAG.getValueType(SrcVT));
SDOperand Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD,
Tys, &Ops[0], Ops.size());
Chain = Result.getValue(1);
SDOperand InFlag = Result.getValue(2);
// FIXME: Currently the FST is flagged to the FILD_FLAG. This
// shouldn't be necessary except that RFP cannot be live across
// multiple blocks. When stackifier is fixed, they can be uncoupled.
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
Tys = DAG.getVTList(MVT::Other);
SmallVector<SDOperand, 8> Ops;
Ops.push_back(Result);
Ops.push_back(StackSlot);
Ops.push_back(DAG.getValueType(Op.getValueType()));
Ops.push_back(InFlag);
Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot,
PseudoSourceValue::getFixedStack(), SSFI);
}
return Result;
}
std::pair<SDOperand,SDOperand> X86TargetLowering::
FP_TO_SINTHelper(SDOperand Op, SelectionDAG &DAG) {
assert(Op.getValueType().getSimpleVT() <= MVT::i64 &&
Op.getValueType().getSimpleVT() >= MVT::i16 &&
"Unknown FP_TO_SINT to lower!");
// These are really Legal.
if (Op.getValueType() == MVT::i32 &&
isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
if (Subtarget->is64Bit() &&
Op.getValueType() == MVT::i64 &&
Op.getOperand(0).getValueType() != MVT::f80)
// We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
// stack slot.
MachineFunction &MF = DAG.getMachineFunction();
unsigned MemSize = Op.getValueType().getSizeInBits()/8;
int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
unsigned Opc;
switch (Op.getValueType().getSimpleVT()) {
default: assert(0 && "Invalid FP_TO_SINT to lower!");
case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
}
SDOperand Chain = DAG.getEntryNode();
SDOperand Value = Op.getOperand(0);
if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) {
assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
Chain = DAG.getStore(Chain, Value, StackSlot,
PseudoSourceValue::getFixedStack(), SSFI);
SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
SDOperand Ops[] = {
Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
};
Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
Chain = Value.getValue(1);
SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
// Build the FP_TO_INT*_IN_MEM
SDOperand Ops[] = { Chain, Value, StackSlot };
SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3);
return std::make_pair(FIST, StackSlot);
}
SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
std::pair<SDOperand,SDOperand> Vals = FP_TO_SINTHelper(Op, DAG);
SDOperand FIST = Vals.first, StackSlot = Vals.second;
if (FIST.Val == 0) return SDOperand();
// Load the result.
return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0);
SDNode *X86TargetLowering::ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG) {
std::pair<SDOperand,SDOperand> Vals = FP_TO_SINTHelper(SDOperand(N, 0), DAG);
SDOperand FIST = Vals.first, StackSlot = Vals.second;
if (FIST.Val == 0) return 0;
MVT VT = N->getValueType(0);
// Return a load from the stack slot.
SDOperand Res = DAG.getLoad(VT, FIST, StackSlot, NULL, 0);
// Use a MERGE_VALUES node to drop the chain result value.
return DAG.getMergeValues(DAG.getVTList(VT), &Res, 1, false).Val;
}
SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
MVT VT = Op.getValueType();
MVT EltVT = VT;
if (VT.isVector())
EltVT = VT.getVectorElementType();
std::vector<Constant*> CV;
if (EltVT == MVT::f64) {
Constant *C = ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63))));
CV.push_back(C);
CV.push_back(C);
} else {
Constant *C = ConstantFP::get(APFloat(APInt(32, ~(1U << 31))));
CV.push_back(C);
CV.push_back(C);
CV.push_back(C);
CV.push_back(C);
Constant *C = ConstantVector::get(CV);
SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx,
PseudoSourceValue::getConstantPool(), 0,
false, 16);
return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
}
SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) {
MVT VT = Op.getValueType();
MVT EltVT = VT;
if (VT.isVector()) {
EltVT = VT.getVectorElementType();
EltNum = VT.getVectorNumElements();
std::vector<Constant*> CV;
if (EltVT == MVT::f64) {
Constant *C = ConstantFP::get(APFloat(APInt(64, 1ULL << 63)));
CV.push_back(C);
CV.push_back(C);
} else {
Constant *C = ConstantFP::get(APFloat(APInt(32, 1U << 31)));
CV.push_back(C);
CV.push_back(C);
CV.push_back(C);
CV.push_back(C);
Constant *C = ConstantVector::get(CV);
SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx,
PseudoSourceValue::getConstantPool(), 0,
false, 16);
return DAG.getNode(ISD::BIT_CONVERT, VT,
DAG.getNode(ISD::XOR, MVT::v2i64,
DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)),
DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask)));
} else {
return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
}
SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
SDOperand Op0 = Op.getOperand(0);
SDOperand Op1 = Op.getOperand(1);
MVT VT = Op.getValueType();
MVT SrcVT = Op1.getValueType();
// If second operand is smaller, extend it first.
if (SrcVT.bitsLT(VT)) {
Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1);
SrcVT = VT;
}
// And if it is bigger, shrink it first.
if (SrcVT.bitsGT(VT)) {
Op1 = DAG.getNode(ISD::FP_ROUND, VT, Op1, DAG.getIntPtrConstant(1));
SrcVT = VT;
}
// At this point the operands and the result should have the same
// type, and that won't be f80 since that is not custom lowered.
// First get the sign bit of second operand.
std::vector<Constant*> CV;
if (SrcVT == MVT::f64) {
CV.push_back(ConstantFP::get(APFloat(APInt(64, 1ULL << 63))));
CV.push_back(ConstantFP::get(APFloat(APInt(64, 0))));
} else {
CV.push_back(ConstantFP::get(APFloat(APInt(32, 1U << 31))));
CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
Constant *C = ConstantVector::get(CV);
SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx,
PseudoSourceValue::getConstantPool(), 0,
false, 16);
SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1);
// Shift sign bit right or left if the two operands have different types.
if (SrcVT.bitsGT(VT)) {
// Op0 is MVT::f32, Op1 is MVT::f64.
SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit);
SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit,
DAG.getConstant(32, MVT::i32));
SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit);
SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit,
DAG.getIntPtrConstant(0));
// Clear first operand sign bit.
CV.clear();
if (VT == MVT::f64) {
CV.push_back(ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63)))));
CV.push_back(ConstantFP::get(APFloat(APInt(64, 0))));
} else {
CV.push_back(ConstantFP::get(APFloat(APInt(32, ~(1U << 31)))));
CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
}
C = ConstantVector::get(CV);
CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx,
PseudoSourceValue::getConstantPool(), 0,
false, 16);
SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2);
// Or the value with the sign bit.
return DAG.getNode(X86ISD::FOR, VT, Val, SignBit);
SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) {
Evan Cheng
committed
assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
Evan Cheng
committed
SDOperand Op0 = Op.getOperand(0);
SDOperand Op1 = Op.getOperand(1);
SDOperand CC = Op.getOperand(2);
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
Evan Cheng
committed
unsigned X86CC;
if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC,
Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1);
return DAG.getNode(X86ISD::SETCC, MVT::i8,
Evan Cheng
committed
DAG.getConstant(X86CC, MVT::i8), Cond);
Evan Cheng
committed
assert(isFP && "Illegal integer SetCC!");
Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1);
Evan Cheng
committed
switch (SetCCOpcode) {
default: assert(false && "Illegal floating point SetCC!");
case ISD::SETOEQ: { // !PF & ZF
SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8,
Evan Cheng
committed
DAG.getConstant(X86::COND_NP, MVT::i8), Cond);
SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
Evan Cheng
committed
DAG.getConstant(X86::COND_E, MVT::i8), Cond);
return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2);
}
case ISD::SETUNE: { // PF | !ZF
SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8,
Evan Cheng
committed
DAG.getConstant(X86::COND_P, MVT::i8), Cond);
SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
Evan Cheng
committed
DAG.getConstant(X86::COND_NE, MVT::i8), Cond);
return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2);
}
}
}
SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) {
bool addTest = true;
SDOperand Cond = Op.getOperand(0);
SDOperand CC;
if (Cond.getOpcode() == ISD::SETCC)
Cond = LowerSETCC(Cond, DAG);
Evan Cheng
committed
// If condition flag is set by a X86ISD::CMP, then use it as the condition
// setting operand in place of the X86ISD::SETCC.
if (Cond.getOpcode() == X86ISD::SETCC) {
CC = Cond.getOperand(0);
SDOperand Cmp = Cond.getOperand(1);
unsigned Opc = Cmp.getOpcode();
MVT VT = Op.getValueType();
Evan Cheng
committed
bool IllegalFPCMov = false;
if (VT.isFloatingPoint() && !VT.isVector() &&
!isScalarFPTypeInSSEReg(VT)) // FPStack?
IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
if ((Opc == X86ISD::CMP ||
Opc == X86ISD::COMI ||
Opc == X86ISD::UCOMI) && !IllegalFPCMov) {
Evan Cheng
committed
Cond = Cmp;
Evan Cheng
committed
addTest = false;
}
}
if (addTest) {
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
Evan Cheng
committed
Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8));
Evan Cheng
committed
}
const MVT *VTs = DAG.getNodeValueTypes(Op.getValueType(),
Evan Cheng
committed
MVT::Flag);
SmallVector<SDOperand, 4> Ops;
// X86ISD::CMOV means set the result (which is operand 1) to the RHS if
// condition is true.
Ops.push_back(Op.getOperand(2));
Ops.push_back(Op.getOperand(1));
Ops.push_back(CC);
Ops.push_back(Cond);
return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
Evan Cheng
committed
}
SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) {
bool addTest = true;
SDOperand Chain = Op.getOperand(0);
SDOperand Cond = Op.getOperand(1);
SDOperand Dest = Op.getOperand(2);
SDOperand CC;
if (Cond.getOpcode() == ISD::SETCC)
Cond = LowerSETCC(Cond, DAG);
Evan Cheng
committed
// If condition flag is set by a X86ISD::CMP, then use it as the condition
// setting operand in place of the X86ISD::SETCC.
if (Cond.getOpcode() == X86ISD::SETCC) {
CC = Cond.getOperand(0);
SDOperand Cmp = Cond.getOperand(1);
unsigned Opc = Cmp.getOpcode();
if (Opc == X86ISD::CMP ||
Opc == X86ISD::COMI ||
Opc == X86ISD::UCOMI) {
Evan Cheng
committed
Cond = Cmp;
Evan Cheng
committed
addTest = false;
}
}
if (addTest) {
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8));
Evan Cheng
committed
}
return DAG.getNode(X86ISD::BRCOND, Op.getValueType(),
Evan Cheng
committed
Chain, Op.getOperand(2), CC, Cond);
}
// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
// Calls to _alloca is needed to probe the stack when allocating more than 4k
// bytes in one go. Touching the stack at 4K increments is necessary to ensure
// that the guard pages used by the OS virtual memory manager are allocated in
// correct sequence.
SDOperand
X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op,
SelectionDAG &DAG) {
assert(Subtarget->isTargetCygMing() &&
"This should be used only on Cygwin/Mingw targets");
Anton Korobeynikov
committed
// Get the inputs.
SDOperand Chain = Op.getOperand(0);
SDOperand Size = Op.getOperand(1);
// FIXME: Ensure alignment here
MVT IntPtr = getPointerTy();
MVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0));
Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag);
Flag = Chain.getValue(1);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
SDOperand Ops[] = { Chain,
DAG.getTargetExternalSymbol("_alloca", IntPtr),
DAG.getRegister(X86::EAX, IntPtr),
DAG.getRegister(X86StackPtr, SPTy),
Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 5);
Chain = DAG.getCALLSEQ_END(Chain,
DAG.getIntPtrConstant(0),
DAG.getIntPtrConstant(0),
Flag);
Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1);
SDOperand Ops1[2] = { Chain.getValue(0), Chain };
return DAG.getMergeValues(DAG.getVTList(SPTy, MVT::Other), Ops1, 2);
Anton Korobeynikov
committed
}
SDOperand
X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG,
SDOperand Chain,
SDOperand Dst, SDOperand Src,
SDOperand Size, unsigned Align,
const Value *DstSV, uint64_t DstSVOff) {
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
/// If not DWORD aligned or size is more than the threshold, call the library.
/// The libc version is likely to be faster for these cases. It can use the
/// address value and run time information about the CPU.
if ((Align & 3) == 0 ||
!ConstantSize ||
ConstantSize->getValue() > getSubtarget()->getMaxInlineSizeThreshold()) {
SDOperand InFlag(0, 0);
// Check to see if there is a specialized entry-point for memory zeroing.
ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
if (const char *bzeroEntry =
V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
MVT IntPtr = getPointerTy();
const Type *IntPtrTy = getTargetData()->getIntPtrType();
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Dst;
Entry.Ty = IntPtrTy;
Args.push_back(Entry);
Entry.Node = Size;
Args.push_back(Entry);
std::pair<SDOperand,SDOperand> CallResult =
LowerCallTo(Chain, Type::VoidTy, false, false, false, CallingConv::C,
false, DAG.getExternalSymbol(bzeroEntry, IntPtr),
Args, DAG);
return CallResult.second;
// Otherwise have the target-independent code call memset.
return SDOperand();
uint64_t SizeVal = ConstantSize->getValue();
SDOperand InFlag(0, 0);
SDOperand Count;
ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
unsigned BytesLeft = 0;
bool TwoRepStos = false;
if (ValC) {
unsigned ValReg;
// If the value is a constant, then we can potentially use larger sets.
switch (Align & 3) {
case 2: // WORD aligned
AVT = MVT::i16;
ValReg = X86::AX;
AVT = MVT::i32;
Val = (Val << 8) | Val;
Val = (Val << 16) | Val;
if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned
AVT = MVT::i64;
ValReg = X86::RAX;
Val = (Val << 32) | Val;
}
break;
default: // Byte aligned
AVT = MVT::i8;
ValReg = X86::AL;
Count = DAG.getIntPtrConstant(SizeVal);
}
if (AVT.bitsGT(MVT::i8)) {
unsigned UBytes = AVT.getSizeInBits() / 8;
Count = DAG.getIntPtrConstant(SizeVal / UBytes);
BytesLeft = SizeVal % UBytes;
Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
InFlag);
InFlag = Chain.getValue(1);
} else {
AVT = MVT::i8;
Count = DAG.getIntPtrConstant(SizeVal);
Chain = DAG.getCopyToReg(Chain, X86::AL, Src, InFlag);
InFlag = Chain.getValue(1);
Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
Count, InFlag);
InFlag = Chain.getValue(1);
Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
Dst, InFlag);
InFlag = Chain.getValue(1);
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
SmallVector<SDOperand, 8> Ops;
Ops.push_back(Chain);
Ops.push_back(DAG.getValueType(AVT));
Ops.push_back(InFlag);
Evan Cheng
committed
Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
if (TwoRepStos) {
InFlag = Chain.getValue(1);
Count = Size;
MVT CVT = Count.getValueType();
SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
Left, InFlag);
InFlag = Chain.getValue(1);
Tys = DAG.getVTList(MVT::Other, MVT::Flag);
Ops.clear();
Ops.push_back(Chain);
Ops.push_back(DAG.getValueType(MVT::i8));
Ops.push_back(InFlag);
Evan Cheng
committed
Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
} else if (BytesLeft) {
// Handle the last 1 - 7 bytes.
unsigned Offset = SizeVal - BytesLeft;
MVT AddrVT = Dst.getValueType();
MVT SizeVT = Size.getValueType();
Chain = DAG.getMemset(Chain,
DAG.getNode(ISD::ADD, AddrVT, Dst,
DAG.getConstant(Offset, AddrVT)),
Src,
DAG.getConstant(BytesLeft, SizeVT),
Align, DstSV, DstSVOff + Offset);
// TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
return Chain;
}
SDOperand
X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG,
SDOperand Chain,
SDOperand Dst, SDOperand Src,
SDOperand Size, unsigned Align,
bool AlwaysInline,
const Value *DstSV, uint64_t DstSVOff,
const Value *SrcSV, uint64_t SrcSVOff){
// This requires the copy size to be a constant, preferrably
// within a subtarget-specific limit.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
if (!ConstantSize)
return SDOperand();
uint64_t SizeVal = ConstantSize->getValue();
if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
return SDOperand();
unsigned BytesLeft = 0;
if (Align >= 8 && Subtarget->is64Bit())
AVT = MVT::i64;
else if (Align >= 4)
AVT = MVT::i32;
else if (Align >= 2)
AVT = MVT::i16;
else
AVT = MVT::i8;
unsigned UBytes = AVT.getSizeInBits() / 8;
unsigned CountVal = SizeVal / UBytes;
SDOperand Count = DAG.getIntPtrConstant(CountVal);
BytesLeft = SizeVal % UBytes;
SDOperand InFlag(0, 0);
Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
Count, InFlag);
InFlag = Chain.getValue(1);
Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
Dst, InFlag);
InFlag = Chain.getValue(1);
Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI,
Src, InFlag);