"git@repo.hca.bsc.es:rferrer/llvm-epi-0.8.git" did not exist on "41528aeb0b9664b6348ee47198c3d6452535c0aa"
Newer
Older
SDValue NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask,
Evan Cheng
committed
DAG, *this);
Gabor Greif
committed
if (NewOp.getNode()) {
SDValue NewV1 = NewOp.getOperand(0);
SDValue NewV2 = NewOp.getOperand(1);
SDValue NewMask = NewOp.getOperand(2);
Gabor Greif
committed
if (isCommutedMOVL(NewMask.getNode(), true, false)) {
Evan Cheng
committed
NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG);
return getVZextMovL(VT, NewOp.getValueType(), NewV2, DAG, Subtarget);
Evan Cheng
committed
}
}
Gabor Greif
committed
} else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask,
Evan Cheng
committed
DAG, *this);
Gabor Greif
committed
if (NewOp.getNode() && X86::isMOVLMask(NewOp.getOperand(2).getNode()))
return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1),
Evan Cheng
committed
DAG, Subtarget);
Evan Cheng
committed
}
}
// Check if this can be converted into a logical shift.
bool isLeft = false;
unsigned ShAmt = 0;
bool isShift = isVectorShift(Op, PermMask, DAG, isLeft, ShVal, ShAmt);
if (isShift && ShVal.hasOneUse()) {
// If the shifted value has multiple uses, it may be cheaper to use
// v_set0 + movlhps or movhlps, etc.
MVT EVT = VT.getVectorElementType();
ShAmt *= EVT.getSizeInBits();
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this);
}
Gabor Greif
committed
if (X86::isMOVLMask(PermMask.getNode())) {
Evan Cheng
committed
if (V1IsUndef)
return V2;
Gabor Greif
committed
if (ISD::isBuildVectorAllZeros(V1.getNode()))
return getVZextMovL(VT, VT, V2, DAG, Subtarget);
if (!isMMX)
return Op;
Evan Cheng
committed
}
Gabor Greif
committed
if (!isMMX && (X86::isMOVSHDUPMask(PermMask.getNode()) ||
X86::isMOVSLDUPMask(PermMask.getNode()) ||
X86::isMOVHLPSMask(PermMask.getNode()) ||
X86::isMOVHPMask(PermMask.getNode()) ||
X86::isMOVLPMask(PermMask.getNode())))
Gabor Greif
committed
if (ShouldXformToMOVHLPS(PermMask.getNode()) ||
ShouldXformToMOVLP(V1.getNode(), V2.getNode(), PermMask.getNode()))
return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (isShift) {
// No better options. Use a vshl / vsrl.
MVT EVT = VT.getVectorElementType();
ShAmt *= EVT.getSizeInBits();
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this);
}
bool Commuted = false;
// FIXME: This should also accept a bitcast of a splat? Be careful, not
// 1,1,1,1 -> v8i16 though.
Gabor Greif
committed
V1IsSplat = isSplatVector(V1.getNode());
V2IsSplat = isSplatVector(V2.getNode());
// Canonicalize the splat or undef, if present, to be on the RHS.
if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
std::swap(V1IsSplat, V2IsSplat);
std::swap(V1IsUndef, V2IsUndef);
Commuted = true;
Evan Cheng
committed
// FIXME: Figure out a cleaner way to do this.
Gabor Greif
committed
if (isCommutedMOVL(PermMask.getNode(), V2IsSplat, V2IsUndef)) {
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (V2IsSplat) {
// V2 is a splat, so the mask may be malformed. That is, it may point
// to any V2 element. The instruction selectior won't like this. Get
// a corrected mask and commute to form a proper MOVS{S|D}.
SDValue NewMask = getMOVLMask(NumElems, DAG);
Gabor Greif
committed
if (NewMask.getNode() != PermMask.getNode())
Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
Evan Cheng
committed
}
Evan Cheng
committed
}
Evan Cheng
committed
Gabor Greif
committed
if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) ||
X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) ||
X86::isUNPCKLMask(PermMask.getNode()) ||
X86::isUNPCKHMask(PermMask.getNode()))
Evan Cheng
committed
return Op;
if (V2IsSplat) {
// Normalize mask so all entries that point to V2 points to its first
// element then try to match unpck{h|l} again. If match, return a
// new vector_shuffle with the corrected mask.
SDValue NewMask = NormalizeMask(PermMask, DAG);
Gabor Greif
committed
if (NewMask.getNode() != PermMask.getNode()) {
if (X86::isUNPCKLMask(PermMask.getNode(), true)) {
SDValue NewMask = getUnpacklMask(NumElems, DAG);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
Gabor Greif
committed
} else if (X86::isUNPCKHMask(PermMask.getNode(), true)) {
SDValue NewMask = getUnpackhMask(NumElems, DAG);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
}
}
}
// Normalize the node to match x86 shuffle ops if needed
Gabor Greif
committed
if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.getNode()))
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (Commuted) {
// Commute is back and try unpck* again.
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
Gabor Greif
committed
if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) ||
X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) ||
X86::isUNPCKLMask(PermMask.getNode()) ||
X86::isUNPCKHMask(PermMask.getNode()))
return Op;
}
Evan Cheng
committed
// Try PSHUF* first, then SHUFP*.
// MMX doesn't have PSHUFD but it does have PSHUFW. While it's theoretically
// possible to shuffle a v2i32 using PSHUFW, that's not yet implemented.
Gabor Greif
committed
if (isMMX && NumElems == 4 && X86::isPSHUFDMask(PermMask.getNode())) {
Evan Cheng
committed
if (V2.getOpcode() != ISD::UNDEF)
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
DAG.getNode(ISD::UNDEF, VT), PermMask);
return Op;
}
if (!isMMX) {
if (Subtarget->hasSSE2() &&
Gabor Greif
committed
(X86::isPSHUFDMask(PermMask.getNode()) ||
X86::isPSHUFHWMask(PermMask.getNode()) ||
X86::isPSHUFLWMask(PermMask.getNode()))) {
Evan Cheng
committed
if (VT == MVT::v4f32) {
RVT = MVT::v4i32;
Op = DAG.getNode(ISD::VECTOR_SHUFFLE, RVT,
DAG.getNode(ISD::BIT_CONVERT, RVT, V1),
DAG.getNode(ISD::UNDEF, RVT), PermMask);
} else if (V2.getOpcode() != ISD::UNDEF)
Op = DAG.getNode(ISD::VECTOR_SHUFFLE, RVT, V1,
DAG.getNode(ISD::UNDEF, RVT), PermMask);
if (RVT != VT)
Op = DAG.getNode(ISD::BIT_CONVERT, VT, Op);
return Op;
}
Evan Cheng
committed
// Binary or unary shufps.
Gabor Greif
committed
if (X86::isSHUFPMask(PermMask.getNode()) ||
(V2.getOpcode() == ISD::UNDEF && X86::isPSHUFDMask(PermMask.getNode())))
return Op;
}
// Handle v8i16 specifically since SSE can do byte extraction and insertion.
if (VT == MVT::v8i16) {
SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this);
Gabor Greif
committed
if (NewOp.getNode())
return NewOp;
}
Evan Cheng
committed
// Handle all 4 wide cases with a number of shuffles except for MMX.
if (NumElems == 4 && !isMMX)
return LowerVECTOR_SHUFFLE_4wide(V1, V2, PermMask, VT, DAG);
SDValue
X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
MVT VT = Op.getValueType();
if (VT.getSizeInBits() == 8) {
SDValue Extract = DAG.getNode(X86ISD::PEXTRB, MVT::i32,
Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, MVT::i32, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
} else if (VT.getSizeInBits() == 16) {
SDValue Extract = DAG.getNode(X86ISD::PEXTRW, MVT::i32,
Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, MVT::i32, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
Evan Cheng
committed
} else if (VT == MVT::f32) {
// EXTRACTPS outputs to a GPR32 register which will require a movd to copy
// the result back to FR32 register. It's only worth matching if the
// result has a single use which is a store or a bitcast to i32.
Evan Cheng
committed
if (!Op.hasOneUse())
Gabor Greif
committed
SDNode *User = *Op.getNode()->use_begin();
if (User->getOpcode() != ISD::STORE &&
(User->getOpcode() != ISD::BIT_CONVERT ||
User->getValueType(0) != MVT::i32))
return SDValue();
SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32,
Evan Cheng
committed
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Op.getOperand(0)),
Op.getOperand(1));
return DAG.getNode(ISD::BIT_CONVERT, MVT::f32, Extract);
SDValue
X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
if (!isa<ConstantSDNode>(Op.getOperand(1)))
Evan Cheng
committed
if (Subtarget->hasSSE41()) {
SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
Gabor Greif
committed
if (Res.getNode())
Evan Cheng
committed
return Res;
}
MVT VT = Op.getValueType();
// TODO: handle v16i8.
if (VT.getSizeInBits() == 16) {
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
if (Idx == 0)
return DAG.getNode(ISD::TRUNCATE, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32,
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Vec),
Op.getOperand(1)));
// Transform it so it match pextrw which produces a 32-bit result.
MVT EVT = (MVT::SimpleValueType)(VT.getSimpleVT()+1);
SDValue Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, EVT, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
} else if (VT.getSizeInBits() == 32) {
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
if (Idx == 0)
return Op;
// SHUFPS the element to the lowest double word, then movss.
MVT MaskVT = MVT::getIntVectorWithNumElements(4);
IdxVec.
push_back(DAG.getConstant(Idx, MaskVT.getVectorElementType()));
IdxVec.
push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType()));
IdxVec.
push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType()));
IdxVec.
push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType()));
SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
DAG.getIntPtrConstant(0));
} else if (VT.getSizeInBits() == 64) {
// FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b
// FIXME: seems like this should be unnecessary if mov{h,l}pd were taught
// to match extract_elt for f64.
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
if (Idx == 0)
return Op;
// UNPCKHPD the element to the lowest double word, then movsd.
// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
// to a f64mem, the whole operation is folded into a single MOVHPDmr.
MVT MaskVT = MVT::getIntVectorWithNumElements(2);
IdxVec.push_back(DAG.getConstant(1, MaskVT.getVectorElementType()));
IdxVec.
push_back(DAG.getNode(ISD::UNDEF, MaskVT.getVectorElementType()));
SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
DAG.getIntPtrConstant(0));
SDValue
X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){
MVT VT = Op.getValueType();
MVT EVT = VT.getVectorElementType();
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
SDValue N2 = Op.getOperand(2);
if ((EVT.getSizeInBits() == 8 || EVT.getSizeInBits() == 16) &&
isa<ConstantSDNode>(N2)) {
unsigned Opc = (EVT.getSizeInBits() == 8) ? X86ISD::PINSRB
: X86ISD::PINSRW;
// Transform it so it match pinsr{b,w} which expects a GR32 as its second
// argument.
if (N1.getValueType() != MVT::i32)
N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
return DAG.getNode(Opc, VT, N0, N1, N2);
} else if (EVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
// Bits [7:6] of the constant are the source select. This will always be
// zero here. The DAG Combiner may combine an extract_elt index into these
// bits. For example (insert (extract, 3), 2) could be matched by putting
// the '3' into bits [7:6] of X86ISD::INSERTPS.
// Bits [5:4] of the constant are the destination select. This is the
// value of the incoming immediate.
// Bits [3:0] of the constant are the zero mask. The DAG Combiner may
// combine either bitwise AND or insert of float 0.0 to set these bits.
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4);
return DAG.getNode(X86ISD::INSERTPS, VT, N0, N1, N2);
}
SDValue
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getValueType();
MVT EVT = VT.getVectorElementType();
if (Subtarget->hasSSE41())
return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
if (EVT == MVT::i8)
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
SDValue N2 = Op.getOperand(2);
if (EVT.getSizeInBits() == 16) {
// Transform it so it match pinsrw which expects a 16-bit value in a GR32
// as its second argument.
if (N1.getValueType() != MVT::i32)
N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
}
SDValue
X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
if (Op.getValueType() == MVT::v2f32)
return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f32,
DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2i32,
DAG.getNode(ISD::BIT_CONVERT, MVT::i32,
Op.getOperand(0))));
SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
MVT VT = MVT::v2i32;
switch (Op.getValueType().getSimpleVT()) {
default: break;
case MVT::v16i8:
case MVT::v8i16:
VT = MVT::v4i32;
break;
}
return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, AnyExt));
Bill Wendling
committed
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
// one of the above mentioned nodes. It has to be wrapped because otherwise
// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
// be used to form addressing mode. These wrapped nodes will be selected
// into MOV32ri.
SDValue
X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(),
getPointerTy(),
CP->getAlignment());
Evan Cheng
committed
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->isPICStyleRIPRel()) {
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
}
return Result;
}
Evan Cheng
committed
X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV,
int64_t Offset,
Evan Cheng
committed
SelectionDAG &DAG) const {
bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_;
bool ExtraLoadRequired =
Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false);
// Create the TargetGlobalAddress node, folding in the constant
// offset if it is legal.
SDValue Result;
if (!IsPic && !ExtraLoadRequired && isInt32(Offset)) {
Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset);
Offset = 0;
} else
Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0);
Evan Cheng
committed
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (IsPic && !Subtarget->isPICStyleRIPRel()) {
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
Anton Korobeynikov
committed
// For Darwin & Mingw32, external and weak symbols are indirect, so we want to
// load the value at address GV, not the value of GV itself. This means that
// the GlobalAddress must be in the base or index register of the address, not
// the GV offset field. Platform check is inside GVRequiresExtraLoad() call
// The same applies for external symbols during PIC codegen
if (ExtraLoadRequired)
Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result,
PseudoSourceValue::getGOT(), 0);
// If there was a non-zero offset that we didn't fold, create an explicit
// addition for it.
if (Offset != 0)
Result = DAG.getNode(ISD::ADD, getPointerTy(), Result,
DAG.getConstant(Offset, getPointerTy()));
return Result;
}
Evan Cheng
committed
SDValue
X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
return LowerGlobalAddress(GV, Offset, DAG);
Evan Cheng
committed
}
Anton Korobeynikov
committed
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
Anton Korobeynikov
committed
LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
SDValue InFlag;
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX,
Lauro Ramos Venancio
committed
DAG.getNode(X86ISD::GlobalBaseReg,
PtrVT), InFlag);
InFlag = Chain.getValue(1);
// emit leal symbol@TLSGD(,%ebx,1), %eax
SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag);
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
Lauro Ramos Venancio
committed
GA->getValueType(0),
GA->getOffset());
SDValue Ops[] = { Chain, TGA, InFlag };
SDValue Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3);
Lauro Ramos Venancio
committed
InFlag = Result.getValue(2);
Chain = Result.getValue(1);
// call ___tls_get_addr. This function receives its argument in
// the register EAX.
Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag);
InFlag = Chain.getValue(1);
NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
Bill Wendling
committed
DAG.getTargetExternalSymbol("___tls_get_addr",
PtrVT),
Lauro Ramos Venancio
committed
DAG.getRegister(X86::EAX, PtrVT),
DAG.getRegister(X86::EBX, PtrVT),
InFlag };
Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5);
InFlag = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag);
}
Anton Korobeynikov
committed
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit
Anton Korobeynikov
committed
LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
Anton Korobeynikov
committed
// emit leaq symbol@TLSGD(%rip), %rdi
SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag);
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
Anton Korobeynikov
committed
GA->getValueType(0),
GA->getOffset());
SDValue Ops[] = { DAG.getEntryNode(), TGA};
SDValue Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 2);
Anton Korobeynikov
committed
Chain = Result.getValue(1);
InFlag = Result.getValue(2);
// call __tls_get_addr. This function receives its argument in
Anton Korobeynikov
committed
// the register RDI.
Chain = DAG.getCopyToReg(Chain, X86::RDI, Result, InFlag);
InFlag = Chain.getValue(1);
NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
Bill Wendling
committed
DAG.getTargetExternalSymbol("__tls_get_addr",
PtrVT),
Anton Korobeynikov
committed
DAG.getRegister(X86::RDI, PtrVT),
InFlag };
Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 4);
InFlag = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, X86::RAX, PtrVT, InFlag);
}
Lauro Ramos Venancio
committed
// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or
// "local exec" model.
static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
Lauro Ramos Venancio
committed
// Get the Thread Pointer
SDValue ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT);
Lauro Ramos Venancio
committed
// emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
// exec)
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
Lauro Ramos Venancio
committed
GA->getValueType(0),
GA->getOffset());
SDValue Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA);
Lauro Ramos Venancio
committed
if (GA->getGlobal()->isDeclaration()) // initial exec TLS model
Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset,
PseudoSourceValue::getGOT(), 0);
Lauro Ramos Venancio
committed
Lauro Ramos Venancio
committed
// The address of the thread local variable is the add of the thread
// pointer with the offset of the variable.
return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset);
}
SDValue
X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
Lauro Ramos Venancio
committed
// TODO: implement the "local dynamic" model
// TODO: implement the "initial exec"model for pic executables
Anton Korobeynikov
committed
assert(Subtarget->isTargetELF() &&
"TLS not implemented for non-ELF targets");
Lauro Ramos Venancio
committed
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
// If the relocation model is PIC, use the "General Dynamic" TLS Model,
// otherwise use the "Local Exec"TLS Model
Anton Korobeynikov
committed
if (Subtarget->is64Bit()) {
return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
} else {
if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
else
return LowerToTLSExecModel(GA, DAG, getPointerTy());
}
Lauro Ramos Venancio
committed
}
SDValue
X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) {
Bill Wendling
committed
const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
Evan Cheng
committed
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->isPICStyleRIPRel()) {
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
}
return Result;
}
SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->isPICStyleRIPRel()) {
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
}
return Result;
}
/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and
/// take a 2 x i32 value to shift plus a shift amount.
SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
MVT VT = Op.getValueType();
unsigned VTBits = VT.getSizeInBits();
bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
SDValue Tmp1 = isSRA ?
DAG.getNode(ISD::SRA, VT, ShOpHi, DAG.getConstant(VTBits - 1, MVT::i8)) :
DAG.getConstant(0, VT);
if (Op.getOpcode() == ISD::SHL_PARTS) {
Tmp2 = DAG.getNode(X86ISD::SHLD, VT, ShOpHi, ShOpLo, ShAmt);
Tmp3 = DAG.getNode(ISD::SHL, VT, ShOpLo, ShAmt);
Tmp2 = DAG.getNode(X86ISD::SHRD, VT, ShOpLo, ShOpHi, ShAmt);
Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, VT, ShOpHi, ShAmt);
SDValue AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
DAG.getConstant(VTBits, MVT::i8));
AndNode, DAG.getConstant(0, MVT::i8));
SDValue Hi, Lo;
SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8);
SDValue Ops0[4] = { Tmp2, Tmp3, CC, Cond };
SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond };
if (Op.getOpcode() == ISD::SHL_PARTS) {
Hi = DAG.getNode(X86ISD::CMOV, VT, Ops0, 4);
Lo = DAG.getNode(X86ISD::CMOV, VT, Ops1, 4);
Lo = DAG.getNode(X86ISD::CMOV, VT, Ops0, 4);
Hi = DAG.getNode(X86ISD::CMOV, VT, Ops1, 4);
return DAG.getMergeValues(Ops, 2);
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
MVT SrcVT = Op.getOperand(0).getValueType();
assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 &&
"Unknown SINT_TO_FP to lower!");
// These are really Legal; caller falls through into that case.
if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType()))
if (SrcVT == MVT::i64 && Op.getValueType() != MVT::f80 &&
Subtarget->is64Bit())
unsigned Size = SrcVT.getSizeInBits()/8;
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
SDValue Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0),
PseudoSourceValue::getFixedStack(SSFI), 0);
// Build the FILD
SDVTList Tys;
bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
else
Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
Ops.push_back(Chain);
Ops.push_back(StackSlot);
Ops.push_back(DAG.getValueType(SrcVT));
SDValue Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD,
Tys, &Ops[0], Ops.size());
Chain = Result.getValue(1);
// FIXME: Currently the FST is flagged to the FILD_FLAG. This
// shouldn't be necessary except that RFP cannot be live across
// multiple blocks. When stackifier is fixed, they can be uncoupled.
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
Tys = DAG.getVTList(MVT::Other);
Ops.push_back(Result);
Ops.push_back(StackSlot);
Ops.push_back(DAG.getValueType(Op.getValueType()));
Ops.push_back(InFlag);
Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot,
PseudoSourceValue::getFixedStack(SSFI), 0);
}
return Result;
}
SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
MVT SrcVT = Op.getOperand(0).getValueType();
assert(SrcVT.getSimpleVT() == MVT::i64 && "Unknown UINT_TO_FP to lower!");
// We only handle SSE2 f64 target here; caller can handle the rest.
if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64)
return SDValue();
// This algorithm is not obvious. Here it is in C code, more or less:
/*
double uint64_to_double( uint32_t hi, uint32_t lo )
{
static const __m128i exp = { 0x4330000045300000ULL, 0 };
static const __m128d bias = { 0x1.0p84, 0x1.0p52 };
// copy ints to xmm registers
__m128i xh = _mm_cvtsi32_si128( hi );
__m128i xl = _mm_cvtsi32_si128( lo );
// combine into low half of a single xmm register
__m128i x = _mm_unpacklo_epi32( xh, xl );
__m128d d;
double sd;
// merge in appropriate exponents to give the integer bits the
// right magnitude
x = _mm_unpacklo_epi32( x, exp );
// subtract away the biases to deal with the IEEE-754 double precision
// implicit 1
d = _mm_sub_pd( (__m128d) x, bias );
// All conversions up to here are exact. The correctly rounded result is
// calculated using the
// current rounding mode using the following horizontal add.
d = _mm_add_sd( d, _mm_unpackhi_pd( d, d ) );
_mm_store_sd( &sd, d ); //since we are returning doubles in XMM, this
// store doesn't really need to be here (except maybe to zero the other
// double)
return sd;
}
*/
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
// Build some magic constants.
std::vector<Constant*>CV0;
CV0.push_back(ConstantInt::get(APInt(32, 0x45300000)));
CV0.push_back(ConstantInt::get(APInt(32, 0x43300000)));
CV0.push_back(ConstantInt::get(APInt(32, 0)));
CV0.push_back(ConstantInt::get(APInt(32, 0)));
Constant *C0 = ConstantVector::get(CV0);
SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 4);
std::vector<Constant*>CV1;
CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4530000000000000ULL))));
CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4330000000000000ULL))));
Constant *C1 = ConstantVector::get(CV1);
SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 4);
SmallVector<SDValue, 4> MaskVec;
MaskVec.push_back(DAG.getConstant(0, MVT::i32));
MaskVec.push_back(DAG.getConstant(4, MVT::i32));
MaskVec.push_back(DAG.getConstant(1, MVT::i32));
MaskVec.push_back(DAG.getConstant(5, MVT::i32));
SDValue UnpcklMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, &MaskVec[0],
MaskVec.size());
SmallVector<SDValue, 4> MaskVec2;
MaskVec2.push_back(DAG.getConstant(1, MVT::i32));
MaskVec2.push_back(DAG.getConstant(0, MVT::i32));
SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, &MaskVec2[0],
MaskVec2.size());
SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4i32,
DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
Op.getOperand(0),
DAG.getIntPtrConstant(1)));
SDValue XR2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4i32,
DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
Op.getOperand(0),
DAG.getIntPtrConstant(0)));
SDValue Unpck1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32,
XR1, XR2, UnpcklMask);
SDValue CLod0 = DAG.getLoad(MVT::v4i32, DAG.getEntryNode(), CPIdx0,
PseudoSourceValue::getConstantPool(), 0, false, 16);
SDValue Unpck2 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32,
Unpck1, CLod0, UnpcklMask);
SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Unpck2);
SDValue CLod1 = DAG.getLoad(MVT::v2f64, CLod0.getValue(1), CPIdx1,
PseudoSourceValue::getConstantPool(), 0, false, 16);
SDValue Sub = DAG.getNode(ISD::FSUB, MVT::v2f64, XR2F, CLod1);
// Add the halves; easiest way is to swap them into another reg first.
SDValue Shuf = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2f64,
Sub, Sub, ShufMask);
SDValue Add = DAG.getNode(ISD::FADD, MVT::v2f64, Shuf, Sub);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f64, Add,
DAG.getIntPtrConstant(0));
}
std::pair<SDValue,SDValue> X86TargetLowering::
FP_TO_SINTHelper(SDValue Op, SelectionDAG &DAG) {
assert(Op.getValueType().getSimpleVT() <= MVT::i64 &&
Op.getValueType().getSimpleVT() >= MVT::i16 &&
"Unknown FP_TO_SINT to lower!");
// These are really Legal.
if (Op.getValueType() == MVT::i32 &&
isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
if (Subtarget->is64Bit() &&
Op.getValueType() == MVT::i64 &&
Op.getOperand(0).getValueType() != MVT::f80)
// We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
// stack slot.
MachineFunction &MF = DAG.getMachineFunction();
unsigned MemSize = Op.getValueType().getSizeInBits()/8;
int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
unsigned Opc;
switch (Op.getValueType().getSimpleVT()) {
default: assert(0 && "Invalid FP_TO_SINT to lower!");
case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
}
SDValue Chain = DAG.getEntryNode();
SDValue Value = Op.getOperand(0);
if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) {
assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
Chain = DAG.getStore(Chain, Value, StackSlot,
PseudoSourceValue::getFixedStack(SSFI), 0);
SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
};
Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
Chain = Value.getValue(1);
SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
// Build the FP_TO_INT*_IN_MEM
SDValue Ops[] = { Chain, Value, StackSlot };
SDValue FIST = DAG.getNode(Opc, MVT::Other, Ops, 3);
SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
std::pair<SDValue,SDValue> Vals = FP_TO_SINTHelper(Op, DAG);
SDValue FIST = Vals.first, StackSlot = Vals.second;
Gabor Greif
committed
if (FIST.getNode() == 0) return SDValue();
// Load the result.
return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0);
SDNode *X86TargetLowering::ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG) {
std::pair<SDValue,SDValue> Vals = FP_TO_SINTHelper(SDValue(N, 0), DAG);
SDValue FIST = Vals.first, StackSlot = Vals.second;
Gabor Greif
committed
if (FIST.getNode() == 0) return 0;
MVT VT = N->getValueType(0);
// Return a load from the stack slot.
SDValue Res = DAG.getLoad(VT, FIST, StackSlot, NULL, 0);
// Use MERGE_VALUES to drop the chain result value and get a node with one
// result. This requires turning off getMergeValues simplification, since
// otherwise it will give us Res back.
Gabor Greif
committed
return DAG.getMergeValues(&Res, 1, false).getNode();
SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getValueType();
MVT EltVT = VT;
if (VT.isVector())
EltVT = VT.getVectorElementType();
std::vector<Constant*> CV;
if (EltVT == MVT::f64) {
Constant *C = ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63))));
CV.push_back(C);
CV.push_back(C);
} else {
Constant *C = ConstantFP::get(APFloat(APInt(32, ~(1U << 31))));
CV.push_back(C);
CV.push_back(C);
CV.push_back(C);
CV.push_back(C);
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
SDValue Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx,
PseudoSourceValue::getConstantPool(), 0,
false, 16);
return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
}
SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getValueType();
MVT EltVT = VT;
if (VT.isVector()) {
EltVT = VT.getVectorElementType();
EltNum = VT.getVectorNumElements();
std::vector<Constant*> CV;
if (EltVT == MVT::f64) {
Constant *C = ConstantFP::get(APFloat(APInt(64, 1ULL << 63)));
CV.push_back(C);
CV.push_back(C);
} else {
Constant *C = ConstantFP::get(APFloat(APInt(32, 1U << 31)));
CV.push_back(C);
CV.push_back(C);
CV.push_back(C);
CV.push_back(C);
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
SDValue Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx,
PseudoSourceValue::getConstantPool(), 0,
false, 16);
return DAG.getNode(ISD::BIT_CONVERT, VT,
DAG.getNode(ISD::XOR, MVT::v2i64,
DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)),
DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask)));
} else {
return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
}
SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
MVT VT = Op.getValueType();
MVT SrcVT = Op1.getValueType();
// If second operand is smaller, extend it first.
if (SrcVT.bitsLT(VT)) {
Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1);
SrcVT = VT;
}
// And if it is bigger, shrink it first.
if (SrcVT.bitsGT(VT)) {
Op1 = DAG.getNode(ISD::FP_ROUND, VT, Op1, DAG.getIntPtrConstant(1));
SrcVT = VT;
}
// At this point the operands and the result should have the same
// type, and that won't be f80 since that is not custom lowered.
// First get the sign bit of second operand.
std::vector<Constant*> CV;
if (SrcVT == MVT::f64) {
CV.push_back(ConstantFP::get(APFloat(APInt(64, 1ULL << 63))));
CV.push_back(ConstantFP::get(APFloat(APInt(64, 0))));
} else {
CV.push_back(ConstantFP::get(APFloat(APInt(32, 1U << 31))));
CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
SDValue Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx,
PseudoSourceValue::getConstantPool(), 0,
false, 16);
SDValue SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1);
// Shift sign bit right or left if the two operands have different types.
if (SrcVT.bitsGT(VT)) {
// Op0 is MVT::f32, Op1 is MVT::f64.
SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit);
SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit,
DAG.getConstant(32, MVT::i32));
SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit);
SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit,
DAG.getIntPtrConstant(0));
// Clear first operand sign bit.
CV.clear();
if (VT == MVT::f64) {
CV.push_back(ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63)))));
CV.push_back(ConstantFP::get(APFloat(APInt(64, 0))));
} else {
CV.push_back(ConstantFP::get(APFloat(APInt(32, ~(1U << 31)))));
CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
}
C = ConstantVector::get(CV);
CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
SDValue Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx,
PseudoSourceValue::getConstantPool(), 0,
false, 16);
SDValue Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2);
// Or the value with the sign bit.
return DAG.getNode(X86ISD::FOR, VT, Val, SignBit);
SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
Evan Cheng
committed
assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");