Newer
Older
MaskVec[Idx] = DAG.getConstant(NumElems, EVT);
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size());
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
}
/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
///
static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
if (NumNonZero > 8)
return SDOperand();
SDOperand V(0, 0);
bool First = true;
for (unsigned i = 0; i < 16; ++i) {
bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
if (ThisIsNonZero && First) {
if (NumZero)
V = getZeroVector(MVT::v8i16, DAG);
else
V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
First = false;
}
if ((i & 1) != 0) {
SDOperand ThisElt(0, 0), LastElt(0, 0);
bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
if (LastIsNonZero) {
LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1));
}
if (ThisIsNonZero) {
ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i));
ThisElt = DAG.getNode(ISD::SHL, MVT::i16,
ThisElt, DAG.getConstant(8, MVT::i8));
if (LastIsNonZero)
ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt);
} else
ThisElt = LastElt;
if (ThisElt.Val)
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
}
}
return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V);
}
/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
///
static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
if (NumNonZero > 4)
return SDOperand();
SDOperand V(0, 0);
bool First = true;
for (unsigned i = 0; i < 8; ++i) {
bool isNonZero = (NonZeros & (1 << i)) != 0;
if (isNonZero) {
if (First) {
if (NumZero)
V = getZeroVector(MVT::v8i16, DAG);
else
V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
First = false;
}
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
}
}
return V;
}
SDOperand
X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
// All zero's are handled with pxor.
if (ISD::isBuildVectorAllZeros(Op.Val))
return Op;
// All one's are handled with pcmpeqd.
if (ISD::isBuildVectorAllOnes(Op.Val))
return Op;
MVT::ValueType VT = Op.getValueType();
MVT::ValueType EVT = MVT::getVectorElementType(VT);
unsigned EVTBits = MVT::getSizeInBits(EVT);
unsigned NumElems = Op.getNumOperands();
unsigned NumZero = 0;
unsigned NumNonZero = 0;
unsigned NonZeros = 0;
unsigned NumNonZeroImms = 0;
std::set<SDOperand> Values;
for (unsigned i = 0; i < NumElems; ++i) {
SDOperand Elt = Op.getOperand(i);
if (Elt.getOpcode() != ISD::UNDEF) {
Values.insert(Elt);
if (isZeroNode(Elt))
NumZero++;
else {
NonZeros |= (1 << i);
NumNonZero++;
if (Elt.getOpcode() == ISD::Constant ||
Elt.getOpcode() == ISD::ConstantFP)
NumNonZeroImms++;
}
}
}
if (NumNonZero == 0) {
if (NumZero == 0)
// All undef vector. Return an UNDEF.
return DAG.getNode(ISD::UNDEF, VT);
else
// A mix of zero and undef. Return a zero vector.
return getZeroVector(VT, DAG);
}
// Splat is obviously ok. Let legalizer expand it to a shuffle.
if (Values.size() == 1)
return SDOperand();
// Special case for single non-zero element.
unsigned Idx = CountTrailingZeros_32(NonZeros);
SDOperand Item = Op.getOperand(Idx);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
if (Idx == 0)
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
NumZero > 0, DAG);
if (EVTBits == 32) {
// Turn it into a shuffle of zero and zero-extended scalar to vector.
Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0,
DAG);
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
SmallVector<SDOperand, 8> MaskVec;
for (unsigned i = 0; i < NumElems; i++)
MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size());
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item,
DAG.getNode(ISD::UNDEF, VT), Mask);
}
}
// A vector full of immediates; various special cases are already
// handled, so this is best done with a single constant-pool load.
if (NumNonZero == NumNonZeroImms)
return SDOperand();
// Let legalizer expand 2-wide build_vectors.
if (EVTBits == 64)
return SDOperand();
// If element VT is < 32 bits, convert it to inserts into a zero vector.
if (EVTBits == 8 && NumElems == 16) {
SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
*this);
if (V.Val) return V;
}
if (EVTBits == 16 && NumElems == 8) {
SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
*this);
if (V.Val) return V;
}
// If element VT is == 32 bits, turn it into a number of shuffles.
SmallVector<SDOperand, 8> V;
V.resize(NumElems);
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
if (NumElems == 4 && NumZero > 0) {
for (unsigned i = 0; i < 4; ++i) {
bool isZero = !(NonZeros & (1 << i));
if (isZero)
V[i] = getZeroVector(VT, DAG);
else
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
}
for (unsigned i = 0; i < 2; ++i) {
switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
default: break;
case 0:
V[i] = V[i*2]; // Must be a zero vector.
break;
case 1:
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2],
getMOVLMask(NumElems, DAG));
break;
case 2:
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
getMOVLMask(NumElems, DAG));
break;
case 3:
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
getUnpacklMask(NumElems, DAG));
break;
}
}
Evan Cheng
committed
// Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd)
// clears the upper bits.
// FIXME: we can do the same for v4f32 case when we know both parts of
// the lower half come from scalar_to_vector (loadf32). We should do
// that in post legalizer dag combiner with target specific hooks.
if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0)
return V[0];
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
SmallVector<SDOperand, 8> MaskVec;
bool Reverse = (NonZeros & 0x3) == 2;
for (unsigned i = 0; i < 2; ++i)
if (Reverse)
MaskVec.push_back(DAG.getConstant(1-i, EVT));
else
MaskVec.push_back(DAG.getConstant(i, EVT));
Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
for (unsigned i = 0; i < 2; ++i)
if (Reverse)
MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT));
else
MaskVec.push_back(DAG.getConstant(i+NumElems, EVT));
SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size());
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask);
}
if (Values.size() > 2) {
// Expand into a number of unpckl*.
// e.g. for v4f32
// Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
// Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
SDOperand UnpckMask = getUnpacklMask(NumElems, DAG);
for (unsigned i = 0; i < NumElems; ++i)
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
NumElems >>= 1;
while (NumElems != 0) {
for (unsigned i = 0; i < NumElems; ++i)
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
UnpckMask);
NumElems >>= 1;
}
return V[0];
}
return SDOperand();
}
SDOperand
X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
SDOperand V1 = Op.getOperand(0);
SDOperand V2 = Op.getOperand(1);
SDOperand PermMask = Op.getOperand(2);
MVT::ValueType VT = Op.getValueType();
unsigned NumElems = PermMask.getNumOperands();
bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
Evan Cheng
committed
bool V1IsSplat = false;
bool V2IsSplat = false;
if (isUndefShuffle(Op.Val))
return DAG.getNode(ISD::UNDEF, VT);
if (isZeroShuffle(Op.Val))
return getZeroVector(VT, DAG);
Evan Cheng
committed
if (isIdentityMask(PermMask.Val))
return V1;
else if (isIdentityMask(PermMask.Val, true))
return V2;
if (isSplatMask(PermMask.Val)) {
if (NumElems <= 4) return Op;
// Promote it to a v4i32 splat.
if (X86::isMOVLMask(PermMask.Val))
return (V1IsUndef) ? V2 : Op;
if (X86::isMOVSHDUPMask(PermMask.Val) ||
X86::isMOVSLDUPMask(PermMask.Val) ||
X86::isMOVHLPSMask(PermMask.Val) ||
X86::isMOVHPMask(PermMask.Val) ||
X86::isMOVLPMask(PermMask.Val))
return Op;
if (ShouldXformToMOVHLPS(PermMask.Val) ||
ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val))
return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
bool Commuted = false;
V1IsSplat = isSplatVector(V1.Val);
V2IsSplat = isSplatVector(V2.Val);
if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
std::swap(V1IsSplat, V2IsSplat);
std::swap(V1IsUndef, V2IsUndef);
Commuted = true;
}
if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) {
if (V2IsUndef) return V1;
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (V2IsSplat) {
// V2 is a splat, so the mask may be malformed. That is, it may point
// to any V2 element. The instruction selectior won't like this. Get
// a corrected mask and commute to form a proper MOVS{S|D}.
SDOperand NewMask = getMOVLMask(NumElems, DAG);
if (NewMask.Val != PermMask.Val)
Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
Evan Cheng
committed
}
Evan Cheng
committed
}
Evan Cheng
committed
Evan Cheng
committed
if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
X86::isUNPCKH_v_undef_Mask(PermMask.Val) ||
Evan Cheng
committed
X86::isUNPCKLMask(PermMask.Val) ||
X86::isUNPCKHMask(PermMask.Val))
return Op;
if (V2IsSplat) {
// Normalize mask so all entries that point to V2 points to its first
// element then try to match unpck{h|l} again. If match, return a
// new vector_shuffle with the corrected mask.
SDOperand NewMask = NormalizeMask(PermMask, DAG);
if (NewMask.Val != PermMask.Val) {
if (X86::isUNPCKLMask(PermMask.Val, true)) {
SDOperand NewMask = getUnpacklMask(NumElems, DAG);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
} else if (X86::isUNPCKHMask(PermMask.Val, true)) {
SDOperand NewMask = getUnpackhMask(NumElems, DAG);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
}
}
}
// Normalize the node to match x86 shuffle ops if needed
if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val))
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (Commuted) {
// Commute is back and try unpck* again.
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
X86::isUNPCKH_v_undef_Mask(PermMask.Val) ||
X86::isUNPCKLMask(PermMask.Val) ||
X86::isUNPCKHMask(PermMask.Val))
return Op;
}
// If VT is integer, try PSHUF* first, then SHUFP*.
if (MVT::isInteger(VT)) {
// MMX doesn't have PSHUFD; it does have PSHUFW. While it's theoretically
// possible to shuffle a v2i32 using PSHUFW, that's not yet implemented.
if (((MVT::getSizeInBits(VT) != 64 || NumElems == 4) &&
X86::isPSHUFDMask(PermMask.Val)) ||
X86::isPSHUFHWMask(PermMask.Val) ||
X86::isPSHUFLWMask(PermMask.Val)) {
if (V2.getOpcode() != ISD::UNDEF)
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
return Op;
}
if (X86::isSHUFPMask(PermMask.Val) &&
MVT::getSizeInBits(VT) != 64) // Don't do this for MMX.
return Op;
// Handle v8i16 shuffle high / low shuffle node pair.
if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
SmallVector<SDOperand, 8> MaskVec;
for (unsigned i = 0; i != 4; ++i)
MaskVec.push_back(PermMask.getOperand(i));
for (unsigned i = 4; i != 8; ++i)
MaskVec.push_back(DAG.getConstant(i, BaseVT));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size());
V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
MaskVec.clear();
for (unsigned i = 0; i != 4; ++i)
MaskVec.push_back(DAG.getConstant(i, BaseVT));
for (unsigned i = 4; i != 8; ++i)
MaskVec.push_back(PermMask.getOperand(i));
Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size());
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
}
} else {
// Floating point cases in the other order.
if (X86::isSHUFPMask(PermMask.Val))
return Op;
if (X86::isPSHUFDMask(PermMask.Val) ||
X86::isPSHUFHWMask(PermMask.Val) ||
X86::isPSHUFLWMask(PermMask.Val)) {
if (V2.getOpcode() != ISD::UNDEF)
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
return Op;
}
}
if (NumElems == 4 &&
// Don't do this for MMX.
MVT::getSizeInBits(VT) != 64) {
MVT::ValueType MaskVT = PermMask.getValueType();
MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
SmallVector<std::pair<int, int>, 8> Locs;
Locs.reserve(NumElems);
SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
unsigned NumHi = 0;
unsigned NumLo = 0;
// If no more than two elements come from either vector. This can be
// implemented with two shuffles. First shuffle gather the elements.
// The second shuffle, which takes the first shuffle as both of its
// vector operands, put the elements into the right order.
for (unsigned i = 0; i != NumElems; ++i) {
SDOperand Elt = PermMask.getOperand(i);
if (Elt.getOpcode() == ISD::UNDEF) {
Locs[i] = std::make_pair(-1, -1);
} else {
unsigned Val = cast<ConstantSDNode>(Elt)->getValue();
if (Val < NumElems) {
Locs[i] = std::make_pair(0, NumLo);
Mask1[NumLo] = Elt;
NumLo++;
} else {
Locs[i] = std::make_pair(1, NumHi);
if (2+NumHi < NumElems)
Mask1[2+NumHi] = Elt;
NumHi++;
}
}
}
if (NumLo <= 2 && NumHi <= 2) {
V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&Mask1[0], Mask1.size()));
for (unsigned i = 0; i != NumElems; ++i) {
if (Locs[i].first == -1)
continue;
else {
unsigned Idx = (i < NumElems/2) ? 0 : NumElems;
Idx += Locs[i].first * (NumElems/2) + Locs[i].second;
Mask2[i] = DAG.getConstant(Idx, MaskEVT);
}
}
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&Mask2[0], Mask2.size()));
}
// Break it into (shuffle shuffle_hi, shuffle_lo).
Locs.clear();
SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
SmallVector<SDOperand,8> *MaskPtr = &LoMask;
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
unsigned MaskIdx = 0;
unsigned LoIdx = 0;
unsigned HiIdx = NumElems/2;
for (unsigned i = 0; i != NumElems; ++i) {
if (i == NumElems/2) {
MaskPtr = &HiMask;
MaskIdx = 1;
LoIdx = 0;
HiIdx = NumElems/2;
}
SDOperand Elt = PermMask.getOperand(i);
if (Elt.getOpcode() == ISD::UNDEF) {
Locs[i] = std::make_pair(-1, -1);
} else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) {
Locs[i] = std::make_pair(MaskIdx, LoIdx);
(*MaskPtr)[LoIdx] = Elt;
LoIdx++;
} else {
Locs[i] = std::make_pair(MaskIdx, HiIdx);
(*MaskPtr)[HiIdx] = Elt;
HiIdx++;
}
}
SDOperand LoShuffle =
DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&LoMask[0], LoMask.size()));
SDOperand HiShuffle =
DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&HiMask[0], HiMask.size()));
SmallVector<SDOperand, 8> MaskOps;
for (unsigned i = 0; i != NumElems; ++i) {
if (Locs[i].first == -1) {
MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
} else {
unsigned Idx = Locs[i].first * NumElems + Locs[i].second;
MaskOps.push_back(DAG.getConstant(Idx, MaskEVT));
}
}
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskOps[0], MaskOps.size()));
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
}
return SDOperand();
}
SDOperand
X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
if (!isa<ConstantSDNode>(Op.getOperand(1)))
return SDOperand();
MVT::ValueType VT = Op.getValueType();
// TODO: handle v16i8.
if (MVT::getSizeInBits(VT) == 16) {
// Transform it so it match pextrw which produces a 32-bit result.
MVT::ValueType EVT = (MVT::ValueType)(VT+1);
SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
Op.getOperand(0), Op.getOperand(1));
SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
} else if (MVT::getSizeInBits(VT) == 32) {
SDOperand Vec = Op.getOperand(0);
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return Op;
// SHUFPS the element to the lowest double word, then movss.
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
SmallVector<SDOperand, 8> IdxVec;
IdxVec.
push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT)));
IdxVec.
push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
IdxVec.
push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
IdxVec.
push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
DAG.getConstant(0, getPointerTy()));
} else if (MVT::getSizeInBits(VT) == 64) {
SDOperand Vec = Op.getOperand(0);
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return Op;
// UNPCKHPD the element to the lowest double word, then movsd.
// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
// to a f64mem, the whole operation is folded into a single MOVHPDmr.
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
SmallVector<SDOperand, 8> IdxVec;
IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT)));
IdxVec.
push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
DAG.getConstant(0, getPointerTy()));
}
return SDOperand();
}
SDOperand
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
Evan Cheng
committed
// Transform it so it match pinsrw which expects a 16-bit value in a GR32
// as its second argument.
MVT::ValueType VT = Op.getValueType();
MVT::ValueType BaseVT = MVT::getVectorElementType(VT);
SDOperand N0 = Op.getOperand(0);
SDOperand N1 = Op.getOperand(1);
SDOperand N2 = Op.getOperand(2);
if (MVT::getSizeInBits(BaseVT) == 16) {
if (N1.getValueType() != MVT::i32)
N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(),getPointerTy());
return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
} else if (MVT::getSizeInBits(BaseVT) == 32) {
unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
if (Idx == 0) {
// Use a movss.
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
SmallVector<SDOperand, 8> MaskVec;
MaskVec.push_back(DAG.getConstant(4, BaseVT));
for (unsigned i = 1; i <= 3; ++i)
MaskVec.push_back(DAG.getConstant(i, BaseVT));
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size()));
} else {
// Use two pinsrw instructions to insert a 32 bit value.
Idx <<= 1;
if (MVT::isFloatingPoint(N1.getValueType())) {
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1);
N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1);
N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1,
DAG.getConstant(0, getPointerTy()));
}
N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0);
N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
DAG.getConstant(Idx, getPointerTy()));
N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8));
N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
DAG.getConstant(Idx+1, getPointerTy()));
return DAG.getNode(ISD::BIT_CONVERT, VT, N0);
}
}
return SDOperand();
}
SDOperand
X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt);
}
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
// one of the above mentioned nodes. It has to be wrapped because otherwise
// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
// be used to form addressing mode. These wrapped nodes will be selected
// into MOV32ri.
SDOperand
X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(),
getPointerTy(),
CP->getAlignment());
Evan Cheng
committed
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->isPICStyleRIPRel()) {
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
}
return Result;
}
SDOperand
X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy());
Evan Cheng
committed
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->isPICStyleRIPRel()) {
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
Anton Korobeynikov
committed
// For Darwin & Mingw32, external and weak symbols are indirect, so we want to
// load the value at address GV, not the value of GV itself. This means that
// the GlobalAddress must be in the base or index register of the address, not
// the GV offset field. Platform check is inside GVRequiresExtraLoad() call
// The same applies for external symbols during PIC codegen
Anton Korobeynikov
committed
if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false))
Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0);
return Result;
}
Lauro Ramos Venancio
committed
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
// Lower ISD::GlobalTLSAddress using the "general dynamic" model
static SDOperand
LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const MVT::ValueType PtrVT) {
SDOperand InFlag;
SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg,
PtrVT), InFlag);
InFlag = Chain.getValue(1);
// emit leal symbol@TLSGD(,%ebx,1), %eax
SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag);
SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
GA->getValueType(0),
GA->getOffset());
SDOperand Ops[] = { Chain, TGA, InFlag };
SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3);
InFlag = Result.getValue(2);
Chain = Result.getValue(1);
// call ___tls_get_addr. This function receives its argument in
// the register EAX.
Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag);
InFlag = Chain.getValue(1);
NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
SDOperand Ops1[] = { Chain,
DAG.getTargetExternalSymbol("___tls_get_addr",
PtrVT),
DAG.getRegister(X86::EAX, PtrVT),
DAG.getRegister(X86::EBX, PtrVT),
InFlag };
Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5);
InFlag = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag);
}
// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or
// "local exec" model.
static SDOperand
LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const MVT::ValueType PtrVT) {
// Get the Thread Pointer
SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT);
// emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
// exec)
SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
GA->getValueType(0),
GA->getOffset());
SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA);
Lauro Ramos Venancio
committed
if (GA->getGlobal()->isDeclaration()) // initial exec TLS model
Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0);
Lauro Ramos Venancio
committed
// The address of the thread local variable is the add of the thread
// pointer with the offset of the variable.
return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset);
}
SDOperand
X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) {
// TODO: implement the "local dynamic" model
// TODO: implement the "initial exec"model for pic executables
assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() &&
"TLS not implemented for non-ELF and 64-bit targets");
Lauro Ramos Venancio
committed
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
// If the relocation model is PIC, use the "General Dynamic" TLS Model,
// otherwise use the "Local Exec"TLS Model
if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy());
else
return LowerToTLSExecModel(GA, DAG, getPointerTy());
}
SDOperand
X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) {
const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
Evan Cheng
committed
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->isPICStyleRIPRel()) {
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
}
return Result;
}
SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->isPICStyleRIPRel()) {
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
}
return Result;
}
/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and
/// take a 2 x i32 value to shift plus a shift amount.
SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
"Not an i64 shift!");
bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
SDOperand ShOpLo = Op.getOperand(0);
SDOperand ShOpHi = Op.getOperand(1);
SDOperand ShAmt = Op.getOperand(2);
SDOperand Tmp1 = isSRA ?
DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) :
DAG.getConstant(0, MVT::i32);
SDOperand Tmp2, Tmp3;
if (Op.getOpcode() == ISD::SHL_PARTS) {
Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
} else {
Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
}
const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
DAG.getConstant(32, MVT::i8));
SDOperand Cond = DAG.getNode(X86ISD::CMP, MVT::i32,
AndNode, DAG.getConstant(0, MVT::i8));
SDOperand Hi, Lo;
SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8);
VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag);
SmallVector<SDOperand, 4> Ops;
if (Op.getOpcode() == ISD::SHL_PARTS) {
Ops.push_back(Tmp2);
Ops.push_back(Tmp3);
Ops.push_back(CC);
Ops.push_back(Cond);
Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
Ops.clear();
Ops.push_back(Tmp3);
Ops.push_back(Tmp1);
Ops.push_back(CC);
Ops.push_back(Cond);
Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
} else {
Ops.push_back(Tmp2);
Ops.push_back(Tmp3);
Ops.push_back(CC);
Ops.push_back(Cond);
Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
Ops.clear();
Ops.push_back(Tmp3);
Ops.push_back(Tmp1);
Ops.push_back(CC);
Ops.push_back(Cond);
Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
}
VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32);
Ops.clear();
Ops.push_back(Lo);
Ops.push_back(Hi);
return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
}
SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
Op.getOperand(0).getValueType() >= MVT::i16 &&
"Unknown SINT_TO_FP to lower!");
SDOperand Result;
MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
unsigned Size = MVT::getSizeInBits(SrcVT)/8;
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0),
StackSlot, NULL, 0);
// These are really Legal; caller falls through into that case.
if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f32 && X86ScalarSSEf32)
return Result;
if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f64 && X86ScalarSSEf64)
if (SrcVT==MVT::i64 && Op.getValueType() != MVT::f80 &&
Subtarget->is64Bit())
return Result;
// Build the FILD
SDVTList Tys;
bool useSSE = (X86ScalarSSEf32 && Op.getValueType() == MVT::f32) ||
(X86ScalarSSEf64 && Op.getValueType() == MVT::f64);
Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
else
Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
SmallVector<SDOperand, 8> Ops;
Ops.push_back(Chain);
Ops.push_back(StackSlot);
Ops.push_back(DAG.getValueType(SrcVT));
Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG :X86ISD::FILD,
Tys, &Ops[0], Ops.size());
Chain = Result.getValue(1);
SDOperand InFlag = Result.getValue(2);
// FIXME: Currently the FST is flagged to the FILD_FLAG. This
// shouldn't be necessary except that RFP cannot be live across
// multiple blocks. When stackifier is fixed, they can be uncoupled.
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
Tys = DAG.getVTList(MVT::Other);
SmallVector<SDOperand, 8> Ops;
Ops.push_back(Result);
Ops.push_back(StackSlot);
Ops.push_back(DAG.getValueType(Op.getValueType()));
Ops.push_back(InFlag);
Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0);
}
return Result;
}
std::pair<SDOperand,SDOperand> X86TargetLowering::
FP_TO_SINTHelper(SDOperand Op, SelectionDAG &DAG) {
assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
"Unknown FP_TO_SINT to lower!");
// These are really Legal.
if (Op.getValueType() == MVT::i32 &&
X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32)
if (Op.getValueType() == MVT::i32 &&
X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64)
if (Subtarget->is64Bit() &&
Op.getValueType() == MVT::i64 &&
Op.getOperand(0).getValueType() != MVT::f80)
// We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
// stack slot.
MachineFunction &MF = DAG.getMachineFunction();
unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
unsigned Opc;
switch (Op.getValueType()) {
default: assert(0 && "Invalid FP_TO_SINT to lower!");
case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
}
SDOperand Chain = DAG.getEntryNode();
SDOperand Value = Op.getOperand(0);
if ((X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32) ||
(X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64)) {
assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0);
SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
SDOperand Ops[] = {
Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
};
Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
Chain = Value.getValue(1);
SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
// Build the FP_TO_INT*_IN_MEM
SDOperand Ops[] = { Chain, Value, StackSlot };
SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3);
return std::make_pair(FIST, StackSlot);
}
SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
std::pair<SDOperand,SDOperand> Vals = FP_TO_SINTHelper(Op, DAG);
SDOperand FIST = Vals.first, StackSlot = Vals.second;
if (FIST.Val == 0) return SDOperand();
// Load the result.
return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0);
SDNode *X86TargetLowering::ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG) {
std::pair<SDOperand,SDOperand> Vals = FP_TO_SINTHelper(SDOperand(N, 0), DAG);
SDOperand FIST = Vals.first, StackSlot = Vals.second;
if (FIST.Val == 0) return 0;
// Return an i64 load from the stack slot.
SDOperand Res = DAG.getLoad(MVT::i64, FIST, StackSlot, NULL, 0);
// Use a MERGE_VALUES node to drop the chain result value.
return DAG.getNode(ISD::MERGE_VALUES, MVT::i64, Res).Val;
}
SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
MVT::ValueType VT = Op.getValueType();
MVT::ValueType EltVT = VT;
if (MVT::isVector(VT))
EltVT = MVT::getVectorElementType(VT);