Newer
Older
Gabor Greif
committed
if (NewMask.getNode() != PermMask.getNode())
Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
Evan Cheng
committed
}
Evan Cheng
committed
}
Evan Cheng
committed
Gabor Greif
committed
if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) ||
X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) ||
X86::isUNPCKLMask(PermMask.getNode()) ||
X86::isUNPCKHMask(PermMask.getNode()))
Evan Cheng
committed
return Op;
if (V2IsSplat) {
// Normalize mask so all entries that point to V2 points to its first
// element then try to match unpck{h|l} again. If match, return a
// new vector_shuffle with the corrected mask.
SDValue NewMask = NormalizeMask(PermMask, DAG);
Gabor Greif
committed
if (NewMask.getNode() != PermMask.getNode()) {
if (X86::isUNPCKLMask(PermMask.getNode(), true)) {
SDValue NewMask = getUnpacklMask(NumElems, DAG);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
Gabor Greif
committed
} else if (X86::isUNPCKHMask(PermMask.getNode(), true)) {
SDValue NewMask = getUnpackhMask(NumElems, DAG);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
}
}
}
// Normalize the node to match x86 shuffle ops if needed
Gabor Greif
committed
if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.getNode()))
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (Commuted) {
// Commute is back and try unpck* again.
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
Gabor Greif
committed
if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) ||
X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) ||
X86::isUNPCKLMask(PermMask.getNode()) ||
X86::isUNPCKHMask(PermMask.getNode()))
return Op;
}
Evan Cheng
committed
// Try PSHUF* first, then SHUFP*.
// MMX doesn't have PSHUFD but it does have PSHUFW. While it's theoretically
// possible to shuffle a v2i32 using PSHUFW, that's not yet implemented.
Gabor Greif
committed
if (isMMX && NumElems == 4 && X86::isPSHUFDMask(PermMask.getNode())) {
Evan Cheng
committed
if (V2.getOpcode() != ISD::UNDEF)
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
DAG.getNode(ISD::UNDEF, VT), PermMask);
return Op;
}
if (!isMMX) {
if (Subtarget->hasSSE2() &&
Gabor Greif
committed
(X86::isPSHUFDMask(PermMask.getNode()) ||
X86::isPSHUFHWMask(PermMask.getNode()) ||
X86::isPSHUFLWMask(PermMask.getNode()))) {
Evan Cheng
committed
if (VT == MVT::v4f32) {
RVT = MVT::v4i32;
Op = DAG.getNode(ISD::VECTOR_SHUFFLE, RVT,
DAG.getNode(ISD::BIT_CONVERT, RVT, V1),
DAG.getNode(ISD::UNDEF, RVT), PermMask);
} else if (V2.getOpcode() != ISD::UNDEF)
Op = DAG.getNode(ISD::VECTOR_SHUFFLE, RVT, V1,
DAG.getNode(ISD::UNDEF, RVT), PermMask);
if (RVT != VT)
Op = DAG.getNode(ISD::BIT_CONVERT, VT, Op);
return Op;
}
Evan Cheng
committed
// Binary or unary shufps.
Gabor Greif
committed
if (X86::isSHUFPMask(PermMask.getNode()) ||
(V2.getOpcode() == ISD::UNDEF && X86::isPSHUFDMask(PermMask.getNode())))
return Op;
}
// Handle v8i16 specifically since SSE can do byte extraction and insertion.
if (VT == MVT::v8i16) {
SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this);
Gabor Greif
committed
if (NewOp.getNode())
return NewOp;
}
Evan Cheng
committed
// Handle all 4 wide cases with a number of shuffles except for MMX.
if (NumElems == 4 && !isMMX)
return LowerVECTOR_SHUFFLE_4wide(V1, V2, PermMask, VT, DAG);
SDValue
X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
MVT VT = Op.getValueType();
if (VT.getSizeInBits() == 8) {
SDValue Extract = DAG.getNode(X86ISD::PEXTRB, MVT::i32,
Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, MVT::i32, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
} else if (VT.getSizeInBits() == 16) {
SDValue Extract = DAG.getNode(X86ISD::PEXTRW, MVT::i32,
Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, MVT::i32, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
Evan Cheng
committed
} else if (VT == MVT::f32) {
// EXTRACTPS outputs to a GPR32 register which will require a movd to copy
// the result back to FR32 register. It's only worth matching if the
// result has a single use which is a store or a bitcast to i32.
Evan Cheng
committed
if (!Op.hasOneUse())
Gabor Greif
committed
SDNode *User = *Op.getNode()->use_begin();
if (User->getOpcode() != ISD::STORE &&
(User->getOpcode() != ISD::BIT_CONVERT ||
User->getValueType(0) != MVT::i32))
return SDValue();
SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32,
Evan Cheng
committed
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Op.getOperand(0)),
Op.getOperand(1));
return DAG.getNode(ISD::BIT_CONVERT, MVT::f32, Extract);
SDValue
X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
if (!isa<ConstantSDNode>(Op.getOperand(1)))
Evan Cheng
committed
if (Subtarget->hasSSE41()) {
SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
Gabor Greif
committed
if (Res.getNode())
Evan Cheng
committed
return Res;
}
MVT VT = Op.getValueType();
// TODO: handle v16i8.
if (VT.getSizeInBits() == 16) {
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return DAG.getNode(ISD::TRUNCATE, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32,
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Vec),
Op.getOperand(1)));
// Transform it so it match pextrw which produces a 32-bit result.
MVT EVT = (MVT::SimpleValueType)(VT.getSimpleVT()+1);
SDValue Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, EVT, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
} else if (VT.getSizeInBits() == 32) {
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return Op;
// SHUFPS the element to the lowest double word, then movss.
MVT MaskVT = MVT::getIntVectorWithNumElements(4);
IdxVec.
push_back(DAG.getConstant(Idx, MaskVT.getVectorElementType()));
IdxVec.
push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType()));
IdxVec.
push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType()));
IdxVec.
push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType()));
SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
DAG.getIntPtrConstant(0));
} else if (VT.getSizeInBits() == 64) {
// FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b
// FIXME: seems like this should be unnecessary if mov{h,l}pd were taught
// to match extract_elt for f64.
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return Op;
// UNPCKHPD the element to the lowest double word, then movsd.
// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
// to a f64mem, the whole operation is folded into a single MOVHPDmr.
MVT MaskVT = MVT::getIntVectorWithNumElements(2);
IdxVec.push_back(DAG.getConstant(1, MaskVT.getVectorElementType()));
IdxVec.
push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType()));
SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
DAG.getIntPtrConstant(0));
SDValue
X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){
MVT VT = Op.getValueType();
MVT EVT = VT.getVectorElementType();
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
SDValue N2 = Op.getOperand(2);
if ((EVT.getSizeInBits() == 8 || EVT.getSizeInBits() == 16) &&
isa<ConstantSDNode>(N2)) {
unsigned Opc = (EVT.getSizeInBits() == 8) ? X86ISD::PINSRB
: X86ISD::PINSRW;
// Transform it so it match pinsr{b,w} which expects a GR32 as its second
// argument.
if (N1.getValueType() != MVT::i32)
N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getValue());
return DAG.getNode(Opc, VT, N0, N1, N2);
} else if (EVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
// Bits [7:6] of the constant are the source select. This will always be
// zero here. The DAG Combiner may combine an extract_elt index into these
// bits. For example (insert (extract, 3), 2) could be matched by putting
// the '3' into bits [7:6] of X86ISD::INSERTPS.
// Bits [5:4] of the constant are the destination select. This is the
// value of the incoming immediate.
// Bits [3:0] of the constant are the zero mask. The DAG Combiner may
// combine either bitwise AND or insert of float 0.0 to set these bits.
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getValue() << 4);
return DAG.getNode(X86ISD::INSERTPS, VT, N0, N1, N2);
}
SDValue
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getValueType();
MVT EVT = VT.getVectorElementType();
if (Subtarget->hasSSE41())
return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
if (EVT == MVT::i8)
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
SDValue N2 = Op.getOperand(2);
if (EVT.getSizeInBits() == 16) {
// Transform it so it match pinsrw which expects a 16-bit value in a GR32
// as its second argument.
if (N1.getValueType() != MVT::i32)
N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getValue());
return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
}
SDValue
X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
if (Op.getValueType() == MVT::v2f32)
return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f32,
DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2i32,
DAG.getNode(ISD::BIT_CONVERT, MVT::i32,
Op.getOperand(0))));
SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
MVT VT = MVT::v2i32;
switch (Op.getValueType().getSimpleVT()) {
default: break;
case MVT::v16i8:
case MVT::v8i16:
VT = MVT::v4i32;
break;
}
return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, AnyExt));
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
// one of the above mentioned nodes. It has to be wrapped because otherwise
// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
// be used to form addressing mode. These wrapped nodes will be selected
// into MOV32ri.
SDValue
X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(),
getPointerTy(),
CP->getAlignment());
Evan Cheng
committed
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->isPICStyleRIPRel()) {
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
}
return Result;
}
SDValue
X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
SDValue Result = DAG.getTargetGlobalAddress(GV, getPointerTy());
Evan Cheng
committed
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->isPICStyleRIPRel()) {
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
Anton Korobeynikov
committed
// For Darwin & Mingw32, external and weak symbols are indirect, so we want to
// load the value at address GV, not the value of GV itself. This means that
// the GlobalAddress must be in the base or index register of the address, not
// the GV offset field. Platform check is inside GVRequiresExtraLoad() call
// The same applies for external symbols during PIC codegen
Anton Korobeynikov
committed
if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false))
Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result,
PseudoSourceValue::getGOT(), 0);
return Result;
}
Anton Korobeynikov
committed
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
Anton Korobeynikov
committed
LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
SDValue InFlag;
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX,
Lauro Ramos Venancio
committed
DAG.getNode(X86ISD::GlobalBaseReg,
PtrVT), InFlag);
InFlag = Chain.getValue(1);
// emit leal symbol@TLSGD(,%ebx,1), %eax
SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag);
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
Lauro Ramos Venancio
committed
GA->getValueType(0),
GA->getOffset());
SDValue Ops[] = { Chain, TGA, InFlag };
SDValue Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3);
Lauro Ramos Venancio
committed
InFlag = Result.getValue(2);
Chain = Result.getValue(1);
// call ___tls_get_addr. This function receives its argument in
// the register EAX.
Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag);
InFlag = Chain.getValue(1);
NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
Lauro Ramos Venancio
committed
DAG.getTargetExternalSymbol("___tls_get_addr",
PtrVT),
DAG.getRegister(X86::EAX, PtrVT),
DAG.getRegister(X86::EBX, PtrVT),
InFlag };
Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5);
InFlag = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag);
}
Anton Korobeynikov
committed
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit
Anton Korobeynikov
committed
LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
Anton Korobeynikov
committed
// emit leaq symbol@TLSGD(%rip), %rdi
SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag);
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
Anton Korobeynikov
committed
GA->getValueType(0),
GA->getOffset());
SDValue Ops[] = { DAG.getEntryNode(), TGA};
SDValue Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 2);
Anton Korobeynikov
committed
Chain = Result.getValue(1);
InFlag = Result.getValue(2);
// call __tls_get_addr. This function receives its argument in
Anton Korobeynikov
committed
// the register RDI.
Chain = DAG.getCopyToReg(Chain, X86::RDI, Result, InFlag);
InFlag = Chain.getValue(1);
NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
DAG.getTargetExternalSymbol("__tls_get_addr",
Anton Korobeynikov
committed
PtrVT),
DAG.getRegister(X86::RDI, PtrVT),
InFlag };
Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 4);
InFlag = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, X86::RAX, PtrVT, InFlag);
}
Lauro Ramos Venancio
committed
// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or
// "local exec" model.
static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
Lauro Ramos Venancio
committed
// Get the Thread Pointer
SDValue ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT);
Lauro Ramos Venancio
committed
// emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
// exec)
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
Lauro Ramos Venancio
committed
GA->getValueType(0),
GA->getOffset());
SDValue Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA);
Lauro Ramos Venancio
committed
if (GA->getGlobal()->isDeclaration()) // initial exec TLS model
Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset,
PseudoSourceValue::getGOT(), 0);
Lauro Ramos Venancio
committed
Lauro Ramos Venancio
committed
// The address of the thread local variable is the add of the thread
// pointer with the offset of the variable.
return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset);
}
SDValue
X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
Lauro Ramos Venancio
committed
// TODO: implement the "local dynamic" model
// TODO: implement the "initial exec"model for pic executables
Anton Korobeynikov
committed
assert(Subtarget->isTargetELF() &&
"TLS not implemented for non-ELF targets");
Lauro Ramos Venancio
committed
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
// If the relocation model is PIC, use the "General Dynamic" TLS Model,
// otherwise use the "Local Exec"TLS Model
Anton Korobeynikov
committed
if (Subtarget->is64Bit()) {
return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
} else {
if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
else
return LowerToTLSExecModel(GA, DAG, getPointerTy());
}
Lauro Ramos Venancio
committed
}
SDValue
X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) {
const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
Evan Cheng
committed
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->isPICStyleRIPRel()) {
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
}
return Result;
}
SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->isPICStyleRIPRel()) {
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
}
return Result;
}
/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and
/// take a 2 x i32 value to shift plus a shift amount.
SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
MVT VT = Op.getValueType();
unsigned VTBits = VT.getSizeInBits();
bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
SDValue Tmp1 = isSRA ?
DAG.getNode(ISD::SRA, VT, ShOpHi, DAG.getConstant(VTBits - 1, MVT::i8)) :
DAG.getConstant(0, VT);
if (Op.getOpcode() == ISD::SHL_PARTS) {
Tmp2 = DAG.getNode(X86ISD::SHLD, VT, ShOpHi, ShOpLo, ShAmt);
Tmp3 = DAG.getNode(ISD::SHL, VT, ShOpLo, ShAmt);
Tmp2 = DAG.getNode(X86ISD::SHRD, VT, ShOpLo, ShOpHi, ShAmt);
Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, VT, ShOpHi, ShAmt);
SDValue AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
DAG.getConstant(VTBits, MVT::i8));
AndNode, DAG.getConstant(0, MVT::i8));
SDValue Hi, Lo;
SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8);
SDValue Ops0[4] = { Tmp2, Tmp3, CC, Cond };
SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond };
if (Op.getOpcode() == ISD::SHL_PARTS) {
Hi = DAG.getNode(X86ISD::CMOV, VT, Ops0, 4);
Lo = DAG.getNode(X86ISD::CMOV, VT, Ops1, 4);
Lo = DAG.getNode(X86ISD::CMOV, VT, Ops0, 4);
Hi = DAG.getNode(X86ISD::CMOV, VT, Ops1, 4);
return DAG.getMergeValues(Ops, 2);
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
MVT SrcVT = Op.getOperand(0).getValueType();
assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 &&
"Unknown SINT_TO_FP to lower!");
// These are really Legal; caller falls through into that case.
if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType()))
if (SrcVT == MVT::i64 && Op.getValueType() != MVT::f80 &&
Subtarget->is64Bit())
unsigned Size = SrcVT.getSizeInBits()/8;
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
SDValue Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0),
PseudoSourceValue::getFixedStack(SSFI), 0);
// Build the FILD
SDVTList Tys;
bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
else
Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
Ops.push_back(Chain);
Ops.push_back(StackSlot);
Ops.push_back(DAG.getValueType(SrcVT));
SDValue Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD,
Tys, &Ops[0], Ops.size());
Chain = Result.getValue(1);
// FIXME: Currently the FST is flagged to the FILD_FLAG. This
// shouldn't be necessary except that RFP cannot be live across
// multiple blocks. When stackifier is fixed, they can be uncoupled.
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
Tys = DAG.getVTList(MVT::Other);
Ops.push_back(Result);
Ops.push_back(StackSlot);
Ops.push_back(DAG.getValueType(Op.getValueType()));
Ops.push_back(InFlag);
Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot,
PseudoSourceValue::getFixedStack(SSFI), 0);
}
return Result;
}
std::pair<SDValue,SDValue> X86TargetLowering::
FP_TO_SINTHelper(SDValue Op, SelectionDAG &DAG) {
assert(Op.getValueType().getSimpleVT() <= MVT::i64 &&
Op.getValueType().getSimpleVT() >= MVT::i16 &&
"Unknown FP_TO_SINT to lower!");
// These are really Legal.
if (Op.getValueType() == MVT::i32 &&
isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
if (Subtarget->is64Bit() &&
Op.getValueType() == MVT::i64 &&
Op.getOperand(0).getValueType() != MVT::f80)
// We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
// stack slot.
MachineFunction &MF = DAG.getMachineFunction();
unsigned MemSize = Op.getValueType().getSizeInBits()/8;
int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
unsigned Opc;
switch (Op.getValueType().getSimpleVT()) {
default: assert(0 && "Invalid FP_TO_SINT to lower!");
case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
}
SDValue Chain = DAG.getEntryNode();
SDValue Value = Op.getOperand(0);
if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) {
assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
Chain = DAG.getStore(Chain, Value, StackSlot,
PseudoSourceValue::getFixedStack(SSFI), 0);
SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
};
Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
Chain = Value.getValue(1);
SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
// Build the FP_TO_INT*_IN_MEM
SDValue Ops[] = { Chain, Value, StackSlot };
SDValue FIST = DAG.getNode(Opc, MVT::Other, Ops, 3);
SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
std::pair<SDValue,SDValue> Vals = FP_TO_SINTHelper(Op, DAG);
SDValue FIST = Vals.first, StackSlot = Vals.second;
Gabor Greif
committed
if (FIST.getNode() == 0) return SDValue();
// Load the result.
return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0);
SDNode *X86TargetLowering::ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG) {
std::pair<SDValue,SDValue> Vals = FP_TO_SINTHelper(SDValue(N, 0), DAG);
SDValue FIST = Vals.first, StackSlot = Vals.second;
Gabor Greif
committed
if (FIST.getNode() == 0) return 0;
MVT VT = N->getValueType(0);
// Return a load from the stack slot.
SDValue Res = DAG.getLoad(VT, FIST, StackSlot, NULL, 0);
// Use MERGE_VALUES to drop the chain result value and get a node with one
// result. This requires turning off getMergeValues simplification, since
// otherwise it will give us Res back.
Gabor Greif
committed
return DAG.getMergeValues(&Res, 1, false).getNode();
SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getValueType();
MVT EltVT = VT;
if (VT.isVector())
EltVT = VT.getVectorElementType();
std::vector<Constant*> CV;
if (EltVT == MVT::f64) {
Constant *C = ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63))));
CV.push_back(C);
CV.push_back(C);
} else {
Constant *C = ConstantFP::get(APFloat(APInt(32, ~(1U << 31))));
CV.push_back(C);
CV.push_back(C);
CV.push_back(C);
CV.push_back(C);
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
SDValue Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx,
PseudoSourceValue::getConstantPool(), 0,
false, 16);
return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
}
SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getValueType();
MVT EltVT = VT;
if (VT.isVector()) {
EltVT = VT.getVectorElementType();
EltNum = VT.getVectorNumElements();
std::vector<Constant*> CV;
if (EltVT == MVT::f64) {
Constant *C = ConstantFP::get(APFloat(APInt(64, 1ULL << 63)));
CV.push_back(C);
CV.push_back(C);
} else {
Constant *C = ConstantFP::get(APFloat(APInt(32, 1U << 31)));
CV.push_back(C);
CV.push_back(C);
CV.push_back(C);
CV.push_back(C);
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
SDValue Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx,
PseudoSourceValue::getConstantPool(), 0,
false, 16);
return DAG.getNode(ISD::BIT_CONVERT, VT,
DAG.getNode(ISD::XOR, MVT::v2i64,
DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)),
DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask)));
} else {
return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
}
SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
MVT VT = Op.getValueType();
MVT SrcVT = Op1.getValueType();
// If second operand is smaller, extend it first.
if (SrcVT.bitsLT(VT)) {
Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1);
SrcVT = VT;
}
// And if it is bigger, shrink it first.
if (SrcVT.bitsGT(VT)) {
Op1 = DAG.getNode(ISD::FP_ROUND, VT, Op1, DAG.getIntPtrConstant(1));
SrcVT = VT;
}
// At this point the operands and the result should have the same
// type, and that won't be f80 since that is not custom lowered.
// First get the sign bit of second operand.
std::vector<Constant*> CV;
if (SrcVT == MVT::f64) {
CV.push_back(ConstantFP::get(APFloat(APInt(64, 1ULL << 63))));
CV.push_back(ConstantFP::get(APFloat(APInt(64, 0))));
} else {
CV.push_back(ConstantFP::get(APFloat(APInt(32, 1U << 31))));
CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
SDValue Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx,
PseudoSourceValue::getConstantPool(), 0,
false, 16);
SDValue SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1);
// Shift sign bit right or left if the two operands have different types.
if (SrcVT.bitsGT(VT)) {
// Op0 is MVT::f32, Op1 is MVT::f64.
SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit);
SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit,
DAG.getConstant(32, MVT::i32));
SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit);
SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit,
DAG.getIntPtrConstant(0));
// Clear first operand sign bit.
CV.clear();
if (VT == MVT::f64) {
CV.push_back(ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63)))));
CV.push_back(ConstantFP::get(APFloat(APInt(64, 0))));
} else {
CV.push_back(ConstantFP::get(APFloat(APInt(32, ~(1U << 31)))));
CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
}
C = ConstantVector::get(CV);
CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
SDValue Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx,
PseudoSourceValue::getConstantPool(), 0,
false, 16);
SDValue Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2);
// Or the value with the sign bit.
return DAG.getNode(X86ISD::FOR, VT, Val, SignBit);
SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
Evan Cheng
committed
assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
SDValue Cond;
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
Evan Cheng
committed
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
Evan Cheng
committed
unsigned X86CC;
if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC,
Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1);
return DAG.getNode(X86ISD::SETCC, MVT::i8,
Evan Cheng
committed
DAG.getConstant(X86CC, MVT::i8), Cond);
Evan Cheng
committed
assert(isFP && "Illegal integer SetCC!");
Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1);
Evan Cheng
committed
switch (SetCCOpcode) {
default: assert(false && "Illegal floating point SetCC!");
case ISD::SETOEQ: { // !PF & ZF
SDValue Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8,
Evan Cheng
committed
DAG.getConstant(X86::COND_NP, MVT::i8), Cond);
SDValue Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
Evan Cheng
committed
DAG.getConstant(X86::COND_E, MVT::i8), Cond);
return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2);
}
case ISD::SETUNE: { // PF | !ZF
SDValue Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8,
Evan Cheng
committed
DAG.getConstant(X86::COND_P, MVT::i8), Cond);
SDValue Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
Evan Cheng
committed
DAG.getConstant(X86::COND_NE, MVT::i8), Cond);
return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2);
}
}
}
SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
SDValue Cond;
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
MVT VT = Op.getValueType();
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
if (isFP) {
unsigned SSECC = 8;
MVT VT0 = Op0.getValueType();
assert(VT0 == MVT::v4f32 || VT0 == MVT::v2f64);
unsigned Opc = VT0 == MVT::v4f32 ? X86ISD::CMPPS : X86ISD::CMPPD;
bool Swap = false;
switch (SetCCOpcode) {
default: break;
case ISD::SETEQ: SSECC = 0; break;
case ISD::SETOGT:
case ISD::SETGT: Swap = true; // Fallthrough
case ISD::SETLT:
case ISD::SETOLT: SSECC = 1; break;
case ISD::SETOGE:
case ISD::SETGE: Swap = true; // Fallthrough
case ISD::SETLE:
case ISD::SETOLE: SSECC = 2; break;
case ISD::SETUO: SSECC = 3; break;
case ISD::SETNE: SSECC = 4; break;
case ISD::SETULE: Swap = true;
case ISD::SETUGE: SSECC = 5; break;
case ISD::SETULT: Swap = true;
case ISD::SETUGT: SSECC = 6; break;
case ISD::SETO: SSECC = 7; break;
}
if (Swap)
std::swap(Op0, Op1);
// In the two special cases we can't handle, emit two comparisons.
if (SetCCOpcode == ISD::SETUEQ) {
UNORD = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(3, MVT::i8));
EQ = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(0, MVT::i8));
return DAG.getNode(ISD::OR, VT, UNORD, EQ);
}
else if (SetCCOpcode == ISD::SETONE) {
ORD = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(7, MVT::i8));
NEQ = DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(4, MVT::i8));
return DAG.getNode(ISD::AND, VT, ORD, NEQ);
}
assert(0 && "Illegal FP comparison");
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
}
// Handle all other FP comparisons here.
return DAG.getNode(Opc, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8));
}
// We are handling one of the integer comparisons here. Since SSE only has
// GT and EQ comparisons for integer, swapping operands and multiple
// operations may be required for some comparisons.
unsigned Opc = 0, EQOpc = 0, GTOpc = 0;
bool Swap = false, Invert = false, FlipSigns = false;
switch (VT.getSimpleVT()) {
default: break;
case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break;
case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break;
case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break;
case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break;
}
switch (SetCCOpcode) {
default: break;
case ISD::SETNE: Invert = true;
case ISD::SETEQ: Opc = EQOpc; break;
case ISD::SETLT: Swap = true;
case ISD::SETGT: Opc = GTOpc; break;
case ISD::SETGE: Swap = true;
case ISD::SETLE: Opc = GTOpc; Invert = true; break;
case ISD::SETULT: Swap = true;
case ISD::SETUGT: Opc = GTOpc; FlipSigns = true; break;
case ISD::SETUGE: Swap = true;
case ISD::SETULE: Opc = GTOpc; FlipSigns = true; Invert = true; break;
}
if (Swap)
std::swap(Op0, Op1);
// Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations.
if (FlipSigns) {
MVT EltVT = VT.getVectorElementType();
SDValue SignBit = DAG.getConstant(EltVT.getIntegerVTSignBit(), EltVT);
std::vector<SDValue> SignBits(VT.getVectorNumElements(), SignBit);
SDValue SignVec = DAG.getNode(ISD::BUILD_VECTOR, VT, &SignBits[0],
SignBits.size());
Op0 = DAG.getNode(ISD::XOR, VT, Op0, SignVec);
Op1 = DAG.getNode(ISD::XOR, VT, Op1, SignVec);
}
SDValue Result = DAG.getNode(Opc, VT, Op0, Op1);
// If the logical-not of the result is required, perform that now.
if (Invert) {
MVT EltVT = VT.getVectorElementType();
SDValue NegOne = DAG.getConstant(EltVT.getIntegerVTBitMask(), EltVT);
std::vector<SDValue> NegOnes(VT.getVectorNumElements(), NegOne);
SDValue NegOneV = DAG.getNode(ISD::BUILD_VECTOR, VT, &NegOnes[0],
NegOnes.size());
Result = DAG.getNode(ISD::XOR, VT, Result, NegOneV);
}
return Result;
}
Evan Cheng
committed
SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) {
bool addTest = true;
SDValue Cond = Op.getOperand(0);
SDValue CC;
if (Cond.getOpcode() == ISD::SETCC)
Cond = LowerSETCC(Cond, DAG);
Evan Cheng
committed
// If condition flag is set by a X86ISD::CMP, then use it as the condition
// setting operand in place of the X86ISD::SETCC.
if (Cond.getOpcode() == X86ISD::SETCC) {
CC = Cond.getOperand(0);
unsigned Opc = Cmp.getOpcode();
MVT VT = Op.getValueType();
Evan Cheng
committed
bool IllegalFPCMov = false;
if (VT.isFloatingPoint() && !VT.isVector() &&
!isScalarFPTypeInSSEReg(VT)) // FPStack?
IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
if ((Opc == X86ISD::CMP ||
Opc == X86ISD::COMI ||
Opc == X86ISD::UCOMI) && !IllegalFPCMov) {
Evan Cheng
committed
Cond = Cmp;
Evan Cheng
committed
addTest = false;
}
}
if (addTest) {
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
Evan Cheng
committed
Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8));
Evan Cheng
committed
}
const MVT *VTs = DAG.getNodeValueTypes(Op.getValueType(),
Evan Cheng
committed
MVT::Flag);
Evan Cheng
committed
// X86ISD::CMOV means set the result (which is operand 1) to the RHS if
// condition is true.
Ops.push_back(Op.getOperand(2));
Ops.push_back(Op.getOperand(1));
Ops.push_back(CC);
Ops.push_back(Cond);
return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
Evan Cheng
committed
}
SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
bool addTest = true;
SDValue Chain = Op.getOperand(0);
SDValue Cond = Op.getOperand(1);
SDValue Dest = Op.getOperand(2);
SDValue CC;