Newer
Older
DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
&MaskVec[0], MaskVec.size()));
/// getVZextMovL - Return a zero-extending vector move low node.
Evan Cheng
committed
///
static SDValue getVZextMovL(MVT VT, MVT OpVT,
SDValue SrcOp, SelectionDAG &DAG,
const X86Subtarget *Subtarget, DebugLoc dl) {
Evan Cheng
committed
if (VT == MVT::v2f64 || VT == MVT::v4f32) {
LoadSDNode *LD = NULL;
Gabor Greif
committed
if (!isScalarLoadToVector(SrcOp.getNode(), &LD))
Evan Cheng
committed
LD = dyn_cast<LoadSDNode>(SrcOp);
if (!LD) {
// movssrr and movsdrr do not clear top bits. Try to use movd, movq
// instead.
MVT EVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
Evan Cheng
committed
if ((EVT != MVT::i64 || Subtarget->is64Bit()) &&
SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT &&
SrcOp.getOperand(0).getOperand(0).getValueType() == EVT) {
// PR2108
OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
OpVT,
Evan Cheng
committed
}
}
}
return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
DAG.getNode(ISD::BIT_CONVERT, dl,
Evan Cheng
committed
}
Evan Cheng
committed
/// LowerVECTOR_SHUFFLE_4wide - Handle all 4 wide cases with a number of
/// shuffles.
static SDValue
LowerVECTOR_SHUFFLE_4wide(SDValue V1, SDValue V2,
SDValue PermMask, MVT VT, SelectionDAG &DAG,
DebugLoc dl) {
Evan Cheng
committed
MVT MaskVT = PermMask.getValueType();
MVT MaskEVT = MaskVT.getVectorElementType();
SmallVector<std::pair<int, int>, 8> Locs;
Locs.resize(4);
SmallVector<SDValue, 8> Mask1(4, DAG.getUNDEF(MaskEVT));
Evan Cheng
committed
unsigned NumHi = 0;
unsigned NumLo = 0;
for (unsigned i = 0; i != 4; ++i) {
Evan Cheng
committed
if (Elt.getOpcode() == ISD::UNDEF) {
Locs[i] = std::make_pair(-1, -1);
} else {
unsigned Val = cast<ConstantSDNode>(Elt)->getZExtValue();
assert(Val < 8 && "Invalid VECTOR_SHUFFLE index!");
Evan Cheng
committed
if (Val < 4) {
Locs[i] = std::make_pair(0, NumLo);
Mask1[NumLo] = Elt;
NumLo++;
} else {
Locs[i] = std::make_pair(1, NumHi);
if (2+NumHi < 4)
Mask1[2+NumHi] = Elt;
NumHi++;
}
}
}
Evan Cheng
committed
if (NumLo <= 2 && NumHi <= 2) {
// If no more than two elements come from either vector. This can be
// implemented with two shuffles. First shuffle gather the elements.
// The second shuffle, which takes the first shuffle as both of its
// vector operands, put the elements into the right order.
V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
&Mask1[0], Mask1.size()));
SmallVector<SDValue, 8> Mask2(4, DAG.getUNDEF(MaskEVT));
Evan Cheng
committed
for (unsigned i = 0; i != 4; ++i) {
if (Locs[i].first == -1)
continue;
else {
unsigned Idx = (i < 2) ? 0 : 4;
Idx += Locs[i].first * 2 + Locs[i].second;
Mask2[i] = DAG.getConstant(Idx, MaskEVT);
}
}
return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V1,
DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
&Mask2[0], Mask2.size()));
} else if (NumLo == 3 || NumHi == 3) {
// Otherwise, we must have three elements from one vector, call it X, and
// one element from the other, call it Y. First, use a shufps to build an
// intermediate vector with the one element from Y and the element from X
// that will be in the same half in the final destination (the indexes don't
// matter). Then, use a shufps to build the final vector, taking the half
// containing the element from Y from the intermediate, and the other half
// from X.
if (NumHi == 3) {
// Normalize it so the 3 elements come from V1.
PermMask = CommuteVectorShuffleMask(PermMask, DAG, dl);
std::swap(V1, V2);
}
// Find the element from V2.
unsigned HiIndex;
for (HiIndex = 0; HiIndex < 3; ++HiIndex) {
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned Val = cast<ConstantSDNode>(Elt)->getZExtValue();
if (Val >= 4)
break;
}
Mask1[0] = PermMask.getOperand(HiIndex);
Mask1[1] = DAG.getUNDEF(MaskEVT);
Mask1[2] = PermMask.getOperand(HiIndex^1);
Mask1[3] = DAG.getUNDEF(MaskEVT);
V2 = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT, &Mask1[0], 4));
if (HiIndex >= 2) {
Mask1[0] = PermMask.getOperand(0);
Mask1[1] = PermMask.getOperand(1);
Mask1[2] = DAG.getConstant(HiIndex & 1 ? 6 : 4, MaskEVT);
Mask1[3] = DAG.getConstant(HiIndex & 1 ? 4 : 6, MaskEVT);
return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, dl,
MaskVT, &Mask1[0], 4));
} else {
Mask1[0] = DAG.getConstant(HiIndex & 1 ? 2 : 0, MaskEVT);
Mask1[1] = DAG.getConstant(HiIndex & 1 ? 0 : 2, MaskEVT);
Mask1[2] = PermMask.getOperand(2);
Mask1[3] = PermMask.getOperand(3);
if (Mask1[2].getOpcode() != ISD::UNDEF)
Mask1[2] =
DAG.getConstant(cast<ConstantSDNode>(Mask1[2])->getZExtValue()+4,
MaskEVT);
if (Mask1[3].getOpcode() != ISD::UNDEF)
Mask1[3] =
DAG.getConstant(cast<ConstantSDNode>(Mask1[3])->getZExtValue()+4,
MaskEVT);
return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V2, V1,
DAG.getNode(ISD::BUILD_VECTOR, dl,
MaskVT, &Mask1[0], 4));
}
Evan Cheng
committed
}
// Break it into (shuffle shuffle_hi, shuffle_lo).
Locs.clear();
SmallVector<SDValue,8> LoMask(4, DAG.getUNDEF(MaskEVT));
SmallVector<SDValue,8> HiMask(4, DAG.getUNDEF(MaskEVT));
Evan Cheng
committed
unsigned MaskIdx = 0;
unsigned LoIdx = 0;
unsigned HiIdx = 2;
for (unsigned i = 0; i != 4; ++i) {
if (i == 2) {
MaskPtr = &HiMask;
MaskIdx = 1;
LoIdx = 0;
HiIdx = 2;
}
Evan Cheng
committed
if (Elt.getOpcode() == ISD::UNDEF) {
Locs[i] = std::make_pair(-1, -1);
} else if (cast<ConstantSDNode>(Elt)->getZExtValue() < 4) {
Evan Cheng
committed
Locs[i] = std::make_pair(MaskIdx, LoIdx);
(*MaskPtr)[LoIdx] = Elt;
LoIdx++;
} else {
Locs[i] = std::make_pair(MaskIdx, HiIdx);
(*MaskPtr)[HiIdx] = Elt;
HiIdx++;
}
}
SDValue LoShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
Evan Cheng
committed
&LoMask[0], LoMask.size()));
SDValue HiShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
Evan Cheng
committed
&HiMask[0], HiMask.size()));
Evan Cheng
committed
for (unsigned i = 0; i != 4; ++i) {
if (Locs[i].first == -1) {
MaskOps.push_back(DAG.getUNDEF(MaskEVT));
Evan Cheng
committed
} else {
unsigned Idx = Locs[i].first * 4 + Locs[i].second;
MaskOps.push_back(DAG.getConstant(Idx, MaskEVT));
}
}
return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, LoShuffle, HiShuffle,
DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
&MaskOps[0], MaskOps.size()));
Evan Cheng
committed
}
SDValue
X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
SDValue PermMask = Op.getOperand(2);
MVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
unsigned NumElems = PermMask.getNumOperands();
bool isMMX = VT.getSizeInBits() == 64;
bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
Evan Cheng
committed
bool V1IsSplat = false;
bool V2IsSplat = false;
// FIXME: Check for legal shuffle and return?
Gabor Greif
committed
if (isUndefShuffle(Op.getNode()))
return DAG.getUNDEF(VT);
Gabor Greif
committed
if (isZeroShuffle(Op.getNode()))
return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
Gabor Greif
committed
if (isIdentityMask(PermMask.getNode()))
Evan Cheng
committed
return V1;
Gabor Greif
committed
else if (isIdentityMask(PermMask.getNode(), true))
Evan Cheng
committed
return V2;
// Canonicalize movddup shuffles.
if (V2IsUndef && Subtarget->hasSSE2() &&
VT.getSizeInBits() == 128 &&
X86::isMOVDDUPMask(PermMask.getNode()))
return CanonicalizeMovddup(Op, V1, PermMask, DAG, Subtarget->hasSSE3());
Gabor Greif
committed
if (isSplatMask(PermMask.getNode())) {
Evan Cheng
committed
if (isMMX || NumElems < 4) return Op;
// Promote it to a v4{if}32 splat.
return PromoteSplat(Op, DAG, Subtarget->hasSSE2());
Evan Cheng
committed
// If the shuffle can be profitably rewritten as a narrower shuffle, then
// do it!
if (VT == MVT::v8i16 || VT == MVT::v16i8) {
SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG,
*this, dl);
Gabor Greif
committed
if (NewOp.getNode())
return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
LowerVECTOR_SHUFFLE(NewOp, DAG));
Evan Cheng
committed
} else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
// FIXME: Figure out a cleaner way to do this.
// Try to make use of movq to zero out the top part.
Gabor Greif
committed
if (ISD::isBuildVectorAllZeros(V2.getNode())) {
SDValue NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask,
Gabor Greif
committed
if (NewOp.getNode()) {
SDValue NewV1 = NewOp.getOperand(0);
SDValue NewV2 = NewOp.getOperand(1);
SDValue NewMask = NewOp.getOperand(2);
Gabor Greif
committed
if (isCommutedMOVL(NewMask.getNode(), true, false)) {
Evan Cheng
committed
NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG);
return getVZextMovL(VT, NewOp.getValueType(), NewV2, DAG, Subtarget,
dl);
Evan Cheng
committed
}
}
Gabor Greif
committed
} else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask,
Gabor Greif
committed
if (NewOp.getNode() && X86::isMOVLMask(NewOp.getOperand(2).getNode()))
return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1),
Evan Cheng
committed
}
}
// Check if this can be converted into a logical shift.
bool isLeft = false;
unsigned ShAmt = 0;
bool isShift = isVectorShift(Op, PermMask, DAG, isLeft, ShVal, ShAmt);
if (isShift && ShVal.hasOneUse()) {
// If the shifted value has multiple uses, it may be cheaper to use
// v_set0 + movlhps or movhlps, etc.
MVT EVT = VT.getVectorElementType();
ShAmt *= EVT.getSizeInBits();
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
}
Gabor Greif
committed
if (X86::isMOVLMask(PermMask.getNode())) {
Evan Cheng
committed
if (V1IsUndef)
return V2;
Gabor Greif
committed
if (ISD::isBuildVectorAllZeros(V1.getNode()))
return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
if (!isMMX)
return Op;
Evan Cheng
committed
}
Gabor Greif
committed
if (!isMMX && (X86::isMOVSHDUPMask(PermMask.getNode()) ||
X86::isMOVSLDUPMask(PermMask.getNode()) ||
X86::isMOVHLPSMask(PermMask.getNode()) ||
X86::isMOVHPMask(PermMask.getNode()) ||
X86::isMOVLPMask(PermMask.getNode())))
Gabor Greif
committed
if (ShouldXformToMOVHLPS(PermMask.getNode()) ||
ShouldXformToMOVLP(V1.getNode(), V2.getNode(), PermMask.getNode()))
return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (isShift) {
// No better options. Use a vshl / vsrl.
MVT EVT = VT.getVectorElementType();
ShAmt *= EVT.getSizeInBits();
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
}
bool Commuted = false;
// FIXME: This should also accept a bitcast of a splat? Be careful, not
// 1,1,1,1 -> v8i16 though.
Gabor Greif
committed
V1IsSplat = isSplatVector(V1.getNode());
V2IsSplat = isSplatVector(V2.getNode());
// Canonicalize the splat or undef, if present, to be on the RHS.
if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
std::swap(V1IsSplat, V2IsSplat);
std::swap(V1IsUndef, V2IsUndef);
Commuted = true;
Evan Cheng
committed
// FIXME: Figure out a cleaner way to do this.
Gabor Greif
committed
if (isCommutedMOVL(PermMask.getNode(), V2IsSplat, V2IsUndef)) {
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (V2IsSplat) {
// V2 is a splat, so the mask may be malformed. That is, it may point
// to any V2 element. The instruction selectior won't like this. Get
// a corrected mask and commute to form a proper MOVS{S|D}.
SDValue NewMask = getMOVLMask(NumElems, DAG, dl);
Gabor Greif
committed
if (NewMask.getNode() != PermMask.getNode())
Op = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, NewMask);
Evan Cheng
committed
}
Evan Cheng
committed
}
Evan Cheng
committed
Gabor Greif
committed
if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) ||
X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) ||
X86::isUNPCKLMask(PermMask.getNode()) ||
X86::isUNPCKHMask(PermMask.getNode()))
Evan Cheng
committed
return Op;
if (V2IsSplat) {
// Normalize mask so all entries that point to V2 points to its first
// element then try to match unpck{h|l} again. If match, return a
// new vector_shuffle with the corrected mask.
SDValue NewMask = NormalizeMask(PermMask, DAG);
Gabor Greif
committed
if (NewMask.getNode() != PermMask.getNode()) {
if (X86::isUNPCKLMask(NewMask.getNode(), true)) {
SDValue NewMask = getUnpacklMask(NumElems, DAG, dl);
return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, NewMask);
} else if (X86::isUNPCKHMask(NewMask.getNode(), true)) {
SDValue NewMask = getUnpackhMask(NumElems, DAG, dl);
return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1, V2, NewMask);
}
}
}
// Normalize the node to match x86 shuffle ops if needed
Gabor Greif
committed
if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.getNode()))
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (Commuted) {
// Commute is back and try unpck* again.
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
Gabor Greif
committed
if (X86::isUNPCKL_v_undef_Mask(PermMask.getNode()) ||
X86::isUNPCKH_v_undef_Mask(PermMask.getNode()) ||
X86::isUNPCKLMask(PermMask.getNode()) ||
X86::isUNPCKHMask(PermMask.getNode()))
return Op;
}
// FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle.
Evan Cheng
committed
// Try PSHUF* first, then SHUFP*.
// MMX doesn't have PSHUFD but it does have PSHUFW. While it's theoretically
// possible to shuffle a v2i32 using PSHUFW, that's not yet implemented.
Gabor Greif
committed
if (isMMX && NumElems == 4 && X86::isPSHUFDMask(PermMask.getNode())) {
Evan Cheng
committed
if (V2.getOpcode() != ISD::UNDEF)
return DAG.getNode(ISD::VECTOR_SHUFFLE, dl, VT, V1,
DAG.getUNDEF(VT), PermMask);
Evan Cheng
committed
return Op;
}
if (!isMMX) {
if (Subtarget->hasSSE2() &&
Gabor Greif
committed
(X86::isPSHUFDMask(PermMask.getNode()) ||
X86::isPSHUFHWMask(PermMask.getNode()) ||
X86::isPSHUFLWMask(PermMask.getNode()))) {
Evan Cheng
committed
if (VT == MVT::v4f32) {
RVT = MVT::v4i32;
Op = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, RVT,
DAG.getNode(ISD::BIT_CONVERT, dl, RVT, V1),
DAG.getUNDEF(RVT), PermMask);
Evan Cheng
committed
} else if (V2.getOpcode() != ISD::UNDEF)
Op = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, RVT, V1,
DAG.getUNDEF(RVT), PermMask);
Evan Cheng
committed
if (RVT != VT)
Op = DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op);
return Op;
}
Evan Cheng
committed
// Binary or unary shufps.
Gabor Greif
committed
if (X86::isSHUFPMask(PermMask.getNode()) ||
(V2.getOpcode() == ISD::UNDEF && X86::isPSHUFDMask(PermMask.getNode())))
return Op;
}
// Handle v8i16 specifically since SSE can do byte extraction and insertion.
if (VT == MVT::v8i16) {
SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this, dl);
Gabor Greif
committed
if (NewOp.getNode())
return NewOp;
}
if (VT == MVT::v16i8) {
SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(V1, V2, PermMask, DAG, *this, dl);
if (NewOp.getNode())
return NewOp;
}
Evan Cheng
committed
// Handle all 4 wide cases with a number of shuffles except for MMX.
if (NumElems == 4 && !isMMX)
return LowerVECTOR_SHUFFLE_4wide(V1, V2, PermMask, VT, DAG, dl);
SDValue
X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
MVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
if (VT.getSizeInBits() == 8) {
SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32,
Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
} else if (VT.getSizeInBits() == 16) {
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
// If Idx is 0, it's cheaper to do a move instead of a pextrw.
if (Idx == 0)
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
DAG.getNode(ISD::BIT_CONVERT, dl,
MVT::v4i32,
Op.getOperand(0)),
Op.getOperand(1)));
SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32,
Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
Evan Cheng
committed
} else if (VT == MVT::f32) {
// EXTRACTPS outputs to a GPR32 register which will require a movd to copy
// the result back to FR32 register. It's only worth matching if the
// result has a single use which is a store or a bitcast to i32. And in
// the case of a store, it's not worth it if the index is a constant 0,
// because a MOVSSmr can be used instead, which is smaller and faster.
Evan Cheng
committed
if (!Op.hasOneUse())
Gabor Greif
committed
SDNode *User = *Op.getNode()->use_begin();
if ((User->getOpcode() != ISD::STORE ||
(isa<ConstantSDNode>(Op.getOperand(1)) &&
cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) &&
(User->getOpcode() != ISD::BIT_CONVERT ||
User->getValueType(0) != MVT::i32))
SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32,
Op.getOperand(0)),
Op.getOperand(1));
return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Extract);
} else if (VT == MVT::i32) {
// ExtractPS works with constant index.
if (isa<ConstantSDNode>(Op.getOperand(1)))
return Op;
SDValue
X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
if (!isa<ConstantSDNode>(Op.getOperand(1)))
Evan Cheng
committed
if (Subtarget->hasSSE41()) {
SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
Gabor Greif
committed
if (Res.getNode())
Evan Cheng
committed
return Res;
}
MVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
// TODO: handle v16i8.
if (VT.getSizeInBits() == 16) {
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
if (Idx == 0)
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
DAG.getNode(ISD::BIT_CONVERT, dl,
Op.getOperand(1)));
// Transform it so it match pextrw which produces a 32-bit result.
MVT EVT = (MVT::SimpleValueType)(VT.getSimpleVT()+1);
SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EVT,
Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EVT, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
} else if (VT.getSizeInBits() == 32) {
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
if (Idx == 0)
return Op;
// SHUFPS the element to the lowest double word, then movss.
MVT MaskVT = MVT::getIntVectorWithNumElements(4);
IdxVec.
push_back(DAG.getConstant(Idx, MaskVT.getVectorElementType()));
IdxVec.
push_back(DAG.getUNDEF(MaskVT.getVectorElementType()));
IdxVec.
push_back(DAG.getUNDEF(MaskVT.getVectorElementType()));
IdxVec.
push_back(DAG.getUNDEF(MaskVT.getVectorElementType()));
SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
&IdxVec[0], IdxVec.size());
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, Vec.getValueType(),
Vec, DAG.getUNDEF(Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
DAG.getIntPtrConstant(0));
} else if (VT.getSizeInBits() == 64) {
// FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b
// FIXME: seems like this should be unnecessary if mov{h,l}pd were taught
// to match extract_elt for f64.
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
if (Idx == 0)
return Op;
// UNPCKHPD the element to the lowest double word, then movsd.
// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
// to a f64mem, the whole operation is folded into a single MOVHPDmr.
MVT MaskVT = MVT::getIntVectorWithNumElements(2);
IdxVec.push_back(DAG.getConstant(1, MaskVT.getVectorElementType()));
IdxVec.
push_back(DAG.getUNDEF(MaskVT.getVectorElementType()));
SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVT,
&IdxVec[0], IdxVec.size());
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, dl, Vec.getValueType(),
Vec, DAG.getUNDEF(Vec.getValueType()),
Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
DAG.getIntPtrConstant(0));
SDValue
X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){
MVT VT = Op.getValueType();
MVT EVT = VT.getVectorElementType();
DebugLoc dl = Op.getDebugLoc();
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
SDValue N2 = Op.getOperand(2);
if ((EVT.getSizeInBits() == 8 || EVT.getSizeInBits() == 16) &&
isa<ConstantSDNode>(N2)) {
unsigned Opc = (EVT.getSizeInBits() == 8) ? X86ISD::PINSRB
// Transform it so it match pinsr{b,w} which expects a GR32 as its second
// argument.
if (N1.getValueType() != MVT::i32)
N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
return DAG.getNode(Opc, dl, VT, N0, N1, N2);
} else if (EVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
// Bits [7:6] of the constant are the source select. This will always be
// zero here. The DAG Combiner may combine an extract_elt index into these
// bits. For example (insert (extract, 3), 2) could be matched by putting
// the '3' into bits [7:6] of X86ISD::INSERTPS.
// Bits [5:4] of the constant are the destination select. This is the
// value of the incoming immediate.
// Bits [3:0] of the constant are the zero mask. The DAG Combiner may
// combine either bitwise AND or insert of float 0.0 to set these bits.
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4);
return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
} else if (EVT == MVT::i32) {
// InsertPS works with constant index.
if (isa<ConstantSDNode>(N2))
return Op;
SDValue
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getValueType();
MVT EVT = VT.getVectorElementType();
if (Subtarget->hasSSE41())
return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
if (EVT == MVT::i8)
DebugLoc dl = Op.getDebugLoc();
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
SDValue N2 = Op.getOperand(2);
if (EVT.getSizeInBits() == 16) {
// Transform it so it match pinsrw which expects a 16-bit value in a GR32
// as its second argument.
if (N1.getValueType() != MVT::i32)
N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2);
SDValue
X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
if (Op.getValueType() == MVT::v2f32)
return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f32,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i32,
DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32,
SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
MVT VT = MVT::v2i32;
switch (Op.getValueType().getSimpleVT()) {
default: break;
case MVT::v16i8:
case MVT::v8i16:
VT = MVT::v4i32;
break;
}
return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(),
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, AnyExt));
Bill Wendling
committed
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
// one of the above mentioned nodes. It has to be wrapped because otherwise
// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
// be used to form addressing mode. These wrapped nodes will be selected
// into MOV32ri.
SDValue
X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
// FIXME there isn't really any debug info here, should come from the parent
DebugLoc dl = CP->getDebugLoc();
SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(),
getPointerTy(),
CP->getAlignment());
Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->isPICStyleRIPRel()) {
Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg,
DebugLoc::getUnknownLoc(),
getPointerTy()),
}
return Result;
}
X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
int64_t Offset,
Evan Cheng
committed
SelectionDAG &DAG) const {
bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_;
bool ExtraLoadRequired =
Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false);
// Create the TargetGlobalAddress node, folding in the constant
// offset if it is legal.
SDValue Result;
if (!IsPic && !ExtraLoadRequired && isInt32(Offset)) {
Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset);
Offset = 0;
} else
Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0);
Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (IsPic && !Subtarget->isPICStyleRIPRel()) {
Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
Anton Korobeynikov
committed
// For Darwin & Mingw32, external and weak symbols are indirect, so we want to
// load the value at address GV, not the value of GV itself. This means that
// the GlobalAddress must be in the base or index register of the address, not
// the GV offset field. Platform check is inside GVRequiresExtraLoad() call
// The same applies for external symbols during PIC codegen
if (ExtraLoadRequired)
Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
PseudoSourceValue::getGOT(), 0);
// If there was a non-zero offset that we didn't fold, create an explicit
// addition for it.
if (Offset != 0)
Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result,
DAG.getConstant(Offset, getPointerTy()));
return Result;
}
Evan Cheng
committed
SDValue
X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
return LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG);
Evan Cheng
committed
}
Anton Korobeynikov
committed
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
Anton Korobeynikov
committed
LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
DebugLoc dl = GA->getDebugLoc(); // ? function entry point might be better
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
Lauro Ramos Venancio
committed
DAG.getNode(X86ISD::GlobalBaseReg,
DebugLoc::getUnknownLoc(),
Lauro Ramos Venancio
committed
PtrVT), InFlag);
InFlag = Chain.getValue(1);
// emit leal symbol@TLSGD(,%ebx,1), %eax
SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag);
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
Lauro Ramos Venancio
committed
GA->getValueType(0),
GA->getOffset());
SDValue Result = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 3);
Lauro Ramos Venancio
committed
InFlag = Result.getValue(2);
Chain = Result.getValue(1);
// call ___tls_get_addr. This function receives its argument in
// the register EAX.
Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Result, InFlag);
Lauro Ramos Venancio
committed
InFlag = Chain.getValue(1);
NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
Bill Wendling
committed
DAG.getTargetExternalSymbol("___tls_get_addr",
PtrVT),
Lauro Ramos Venancio
committed
DAG.getRegister(X86::EAX, PtrVT),
DAG.getRegister(X86::EBX, PtrVT),
InFlag };
Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops1, 5);
Lauro Ramos Venancio
committed
InFlag = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, dl, X86::EAX, PtrVT, InFlag);
Lauro Ramos Venancio
committed
}
Anton Korobeynikov
committed
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit
Anton Korobeynikov
committed
LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
DebugLoc dl = GA->getDebugLoc(); // ? function entry point might be better
Anton Korobeynikov
committed
// emit leaq symbol@TLSGD(%rip), %rdi
SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag);
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
Anton Korobeynikov
committed
GA->getValueType(0),
GA->getOffset());
SDValue Result = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 2);
Anton Korobeynikov
committed
Chain = Result.getValue(1);
InFlag = Result.getValue(2);
// call __tls_get_addr. This function receives its argument in
Anton Korobeynikov
committed
// the register RDI.
Chain = DAG.getCopyToReg(Chain, dl, X86::RDI, Result, InFlag);
Anton Korobeynikov
committed
InFlag = Chain.getValue(1);
NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
Bill Wendling
committed
DAG.getTargetExternalSymbol("__tls_get_addr",
PtrVT),
Anton Korobeynikov
committed
DAG.getRegister(X86::RDI, PtrVT),
InFlag };
Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops1, 4);
Anton Korobeynikov
committed
InFlag = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, dl, X86::RAX, PtrVT, InFlag);
Anton Korobeynikov
committed
}
Lauro Ramos Venancio
committed
// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or
// "local exec" model.
static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
Rafael Espindola
committed
const MVT PtrVT, TLSModel::Model model) {
DebugLoc dl = GA->getDebugLoc();
Lauro Ramos Venancio
committed
// Get the Thread Pointer
SDValue ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER,
DebugLoc::getUnknownLoc(), PtrVT);
Lauro Ramos Venancio
committed
// emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
// exec)
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
Lauro Ramos Venancio
committed
GA->getValueType(0),
GA->getOffset());
SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA);
Lauro Ramos Venancio
committed
Rafael Espindola
committed
if (model == TLSModel::InitialExec)
Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
PseudoSourceValue::getGOT(), 0);
Lauro Ramos Venancio
committed
Lauro Ramos Venancio
committed
// The address of the thread local variable is the add of the thread
// pointer with the offset of the variable.
return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
Lauro Ramos Venancio
committed
}
SDValue
X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
Lauro Ramos Venancio
committed
// TODO: implement the "local dynamic" model
// TODO: implement the "initial exec"model for pic executables
Anton Korobeynikov
committed
assert(Subtarget->isTargetELF() &&
"TLS not implemented for non-ELF targets");
Lauro Ramos Venancio
committed
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
Rafael Espindola
committed
GlobalValue *GV = GA->getGlobal();
TLSModel::Model model =
getTLSModel (GV, getTargetMachine().getRelocationModel());
Anton Korobeynikov
committed
if (Subtarget->is64Bit()) {
Rafael Espindola
committed
switch (model) {
case TLSModel::GeneralDynamic:
case TLSModel::LocalDynamic: // not implemented
case TLSModel::InitialExec: // not implemented
case TLSModel::LocalExec: // not implemented
return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
default:
assert (0 && "Unknown TLS model");
}
Anton Korobeynikov
committed
} else {
Rafael Espindola
committed
switch (model) {
case TLSModel::GeneralDynamic:
case TLSModel::LocalDynamic: // not implemented
Anton Korobeynikov
committed
return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
Rafael Espindola
committed
case TLSModel::InitialExec:
case TLSModel::LocalExec:
return LowerToTLSExecModel(GA, DAG, getPointerTy(), model);
default:
assert (0 && "Unknown TLS model");
}
Anton Korobeynikov
committed
}
Lauro Ramos Venancio
committed
}
SDValue
X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) {
// FIXME there isn't really any debug info here
DebugLoc dl = Op.getDebugLoc();
Bill Wendling
committed
const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->isPICStyleRIPRel()) {
Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg,
DebugLoc::getUnknownLoc(),
getPointerTy()),
Result);
}
return Result;
}
SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
// FIXME there isn't really any debug into here
DebugLoc dl = JT->getDebugLoc();
SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->isPICStyleRIPRel()) {
Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg,
DebugLoc::getUnknownLoc(),
getPointerTy()),
}
return Result;
}
/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and
/// take a 2 x i32 value to shift plus a shift amount.
SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
MVT VT = Op.getValueType();
unsigned VTBits = VT.getSizeInBits();
DebugLoc dl = Op.getDebugLoc();
bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
SDValue Tmp1 = isSRA ?
DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
DAG.getConstant(VTBits - 1, MVT::i8)) :
DAG.getConstant(0, VT);
if (Op.getOpcode() == ISD::SHL_PARTS) {
Tmp2 = DAG.getNode(X86ISD::SHLD, dl, VT, ShOpHi, ShOpLo, ShAmt);
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
Tmp2 = DAG.getNode(X86ISD::SHRD, dl, VT, ShOpLo, ShOpHi, ShAmt);
Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, ShAmt);
SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
DAG.getConstant(VTBits, MVT::i8));
SDValue Cond = DAG.getNode(X86ISD::CMP, dl, VT,
AndNode, DAG.getConstant(0, MVT::i8));
SDValue Hi, Lo;
SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8);
SDValue Ops0[4] = { Tmp2, Tmp3, CC, Cond };
SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond };
if (Op.getOpcode() == ISD::SHL_PARTS) {
Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4);
Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4);
Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4);
Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4);
return DAG.getMergeValues(Ops, 2, dl);
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
MVT SrcVT = Op.getOperand(0).getValueType();
assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 &&
"Unknown SINT_TO_FP to lower!");
// These are really Legal; caller falls through into that case.
if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType()))
if (SrcVT == MVT::i64 && Op.getValueType() != MVT::f80 &&
Subtarget->is64Bit())
DebugLoc dl = Op.getDebugLoc();
unsigned Size = SrcVT.getSizeInBits()/8;
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
PseudoSourceValue::getFixedStack(SSFI), 0);
// Build the FILD
SDVTList Tys;
bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
else
Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
Ops.push_back(Chain);
Ops.push_back(StackSlot);
Ops.push_back(DAG.getValueType(SrcVT));
SDValue Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, dl,
Tys, &Ops[0], Ops.size());
Chain = Result.getValue(1);
// FIXME: Currently the FST is flagged to the FILD_FLAG. This
// shouldn't be necessary except that RFP cannot be live across