Newer
Older
continue;
}
assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
if (Val < NumElems)
MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
else
MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
}
std::swap(V1, V2);
Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
/// match movhlps. The lower half elements should come from upper half of
/// V1 (and in order), and the upper half elements should come from the upper
/// half of V2 (and in order).
static bool ShouldXformToMOVHLPS(SDNode *Mask) {
unsigned NumElems = Mask->getNumOperands();
if (NumElems != 4)
return false;
for (unsigned i = 0, e = 2; i != e; ++i)
if (!isUndefOrEqual(Mask->getOperand(i), i+2))
return false;
for (unsigned i = 2; i != 4; ++i)
if (!isUndefOrEqual(Mask->getOperand(i), i+4))
return false;
return true;
}
/// isScalarLoadToVector - Returns true if the node is a scalar load that
/// is promoted to a vector.
static inline bool isScalarLoadToVector(SDNode *N) {
if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
N = N->getOperand(0).Val;
return ISD::isNON_EXTLoad(N);
/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
/// match movlp{s|d}. The lower half elements should come from lower half of
/// V1 (and in order), and the upper half elements should come from the upper
/// half of V2 (and in order). And since V1 will become the source of the
/// MOVLP, it must be either a vector load or a scalar load to vector.
static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) {
if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
return false;
// Is V2 is a vector load, don't do this transformation. We will try to use
// load folding shufps op.
if (ISD::isNON_EXTLoad(V2))
return false;
unsigned NumElems = Mask->getNumOperands();
if (NumElems != 2 && NumElems != 4)
return false;
for (unsigned i = 0, e = NumElems/2; i != e; ++i)
if (!isUndefOrEqual(Mask->getOperand(i), i))
return false;
for (unsigned i = NumElems/2; i != NumElems; ++i)
if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems))
return false;
return true;
/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
/// all the same.
static bool isSplatVector(SDNode *N) {
if (N->getOpcode() != ISD::BUILD_VECTOR)
return false;
SDOperand SplatValue = N->getOperand(0);
for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
if (N->getOperand(i) != SplatValue)
return false;
return true;
}
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
/// to an undef.
static bool isUndefShuffle(SDNode *N) {
if (N->getOpcode() != ISD::BUILD_VECTOR)
return false;
SDOperand V1 = N->getOperand(0);
SDOperand V2 = N->getOperand(1);
SDOperand Mask = N->getOperand(2);
unsigned NumElems = Mask.getNumOperands();
for (unsigned i = 0; i != NumElems; ++i) {
SDOperand Arg = Mask.getOperand(i);
if (Arg.getOpcode() != ISD::UNDEF) {
unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
if (Val < NumElems && V1.getOpcode() != ISD::UNDEF)
return false;
else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF)
return false;
}
}
return true;
}
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
/// that point to V2 points to its first element.
static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) {
assert(Mask.getOpcode() == ISD::BUILD_VECTOR);
bool Changed = false;
std::vector<SDOperand> MaskVec;
unsigned NumElems = Mask.getNumOperands();
for (unsigned i = 0; i != NumElems; ++i) {
SDOperand Arg = Mask.getOperand(i);
if (Arg.getOpcode() != ISD::UNDEF) {
unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
if (Val > NumElems) {
Arg = DAG.getConstant(NumElems, Arg.getValueType());
Changed = true;
}
}
MaskVec.push_back(Arg);
}
if (Changed)
Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
&MaskVec[0], MaskVec.size());
return Mask;
}
/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
/// operation of specified width.
static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) {
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
std::vector<SDOperand> MaskVec;
MaskVec.push_back(DAG.getConstant(NumElems, BaseVT));
for (unsigned i = 1; i != NumElems; ++i)
MaskVec.push_back(DAG.getConstant(i, BaseVT));
return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
}
/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation
/// of specified width.
static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) {
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
std::vector<SDOperand> MaskVec;
for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
MaskVec.push_back(DAG.getConstant(i, BaseVT));
MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT));
}
return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation
/// of specified width.
static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) {
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
unsigned Half = NumElems/2;
std::vector<SDOperand> MaskVec;
for (unsigned i = 0; i != Half; ++i) {
MaskVec.push_back(DAG.getConstant(i + Half, BaseVT));
MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT));
}
return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
}
/// getZeroVector - Returns a vector of specified type with all zero elements.
///
static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
assert(MVT::isVector(VT) && "Expected a vector type");
unsigned NumElems = getVectorNumElements(VT);
MVT::ValueType EVT = MVT::getVectorBaseType(VT);
bool isFP = MVT::isFloatingPoint(EVT);
SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT);
std::vector<SDOperand> ZeroVec(NumElems, Zero);
return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size());
}
/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
///
static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
SDOperand V1 = Op.getOperand(0);
SDOperand Mask = Op.getOperand(2);
MVT::ValueType VT = Op.getValueType();
unsigned NumElems = Mask.getNumOperands();
Mask = getUnpacklMask(NumElems, DAG);
while (NumElems != 4) {
V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
NumElems >>= 1;
}
V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
Mask = getZeroVector(MaskVT, DAG);
SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
}
/// isZeroNode - Returns true if Elt is a constant zero or a floating point
/// constant +0.0.
static inline bool isZeroNode(SDOperand Elt) {
return ((isa<ConstantSDNode>(Elt) &&
cast<ConstantSDNode>(Elt)->getValue() == 0) ||
(isa<ConstantFPSDNode>(Elt) &&
cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0)));
}
/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
/// vector and zero or undef vector.
static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
unsigned NumElems, unsigned Idx,
bool isZero, SelectionDAG &DAG) {
SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT);
SDOperand Zero = DAG.getConstant(0, EVT);
std::vector<SDOperand> MaskVec(NumElems, Zero);
MaskVec[Idx] = DAG.getConstant(NumElems, EVT);
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size());
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
}
/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
///
static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
if (NumNonZero > 8)
return SDOperand();
SDOperand V(0, 0);
bool First = true;
for (unsigned i = 0; i < 16; ++i) {
bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
if (ThisIsNonZero && First) {
if (NumZero)
V = getZeroVector(MVT::v8i16, DAG);
else
V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
First = false;
}
if ((i & 1) != 0) {
SDOperand ThisElt(0, 0), LastElt(0, 0);
bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
if (LastIsNonZero) {
LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1));
}
if (ThisIsNonZero) {
ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i));
ThisElt = DAG.getNode(ISD::SHL, MVT::i16,
ThisElt, DAG.getConstant(8, MVT::i8));
if (LastIsNonZero)
ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt);
} else
ThisElt = LastElt;
if (ThisElt.Val)
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
}
}
return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V);
}
/// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16.
///
static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
if (NumNonZero > 4)
return SDOperand();
SDOperand V(0, 0);
bool First = true;
for (unsigned i = 0; i < 8; ++i) {
bool isNonZero = (NonZeros & (1 << i)) != 0;
if (isNonZero) {
if (First) {
if (NumZero)
V = getZeroVector(MVT::v8i16, DAG);
else
V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
First = false;
}
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
}
}
return V;
}
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
SDOperand
X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
// All zero's are handled with pxor.
if (ISD::isBuildVectorAllZeros(Op.Val))
return Op;
// All one's are handled with pcmpeqd.
if (ISD::isBuildVectorAllOnes(Op.Val))
return Op;
MVT::ValueType VT = Op.getValueType();
MVT::ValueType EVT = MVT::getVectorBaseType(VT);
unsigned EVTBits = MVT::getSizeInBits(EVT);
unsigned NumElems = Op.getNumOperands();
unsigned NumZero = 0;
unsigned NumNonZero = 0;
unsigned NonZeros = 0;
std::set<SDOperand> Values;
for (unsigned i = 0; i < NumElems; ++i) {
SDOperand Elt = Op.getOperand(i);
if (Elt.getOpcode() != ISD::UNDEF) {
Values.insert(Elt);
if (isZeroNode(Elt))
NumZero++;
else {
NonZeros |= (1 << i);
NumNonZero++;
}
}
}
if (NumNonZero == 0)
// Must be a mix of zero and undef. Return a zero vector.
return getZeroVector(VT, DAG);
// Splat is obviously ok. Let legalizer expand it to a shuffle.
if (Values.size() == 1)
return SDOperand();
// Special case for single non-zero element.
unsigned Idx = CountTrailingZeros_32(NonZeros);
SDOperand Item = Op.getOperand(Idx);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
if (Idx == 0)
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
NumZero > 0, DAG);
if (EVTBits == 32) {
// Turn it into a shuffle of zero and zero-extended scalar to vector.
Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0,
DAG);
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT);
std::vector<SDOperand> MaskVec;
for (unsigned i = 0; i < NumElems; i++)
MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size());
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item,
DAG.getNode(ISD::UNDEF, VT), Mask);
}
}
Evan Cheng
committed
// Let legalizer expand 2-wide build_vector's.
if (EVTBits == 64)
return SDOperand();
// If element VT is < 32 bits, convert it to inserts into a zero vector.
if (EVTBits == 8) {
SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
*this);
if (V.Val) return V;
}
if (EVTBits == 16) {
SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
*this);
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
if (V.Val) return V;
}
// If element VT is == 32 bits, turn it into a number of shuffles.
std::vector<SDOperand> V(NumElems);
if (NumElems == 4 && NumZero > 0) {
for (unsigned i = 0; i < 4; ++i) {
bool isZero = !(NonZeros & (1 << i));
if (isZero)
V[i] = getZeroVector(VT, DAG);
else
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
}
for (unsigned i = 0; i < 2; ++i) {
switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
default: break;
case 0:
V[i] = V[i*2]; // Must be a zero vector.
break;
case 1:
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2],
getMOVLMask(NumElems, DAG));
break;
case 2:
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
getMOVLMask(NumElems, DAG));
break;
case 3:
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
getUnpacklMask(NumElems, DAG));
break;
}
}
Evan Cheng
committed
// Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd)
// clears the upper bits.
// FIXME: we can do the same for v4f32 case when we know both parts of
// the lower half come from scalar_to_vector (loadf32). We should do
// that in post legalizer dag combiner with target specific hooks.
if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0)
return V[0];
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT);
std::vector<SDOperand> MaskVec;
bool Reverse = (NonZeros & 0x3) == 2;
for (unsigned i = 0; i < 2; ++i)
if (Reverse)
MaskVec.push_back(DAG.getConstant(1-i, EVT));
else
MaskVec.push_back(DAG.getConstant(i, EVT));
Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
for (unsigned i = 0; i < 2; ++i)
if (Reverse)
MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT));
else
MaskVec.push_back(DAG.getConstant(i+NumElems, EVT));
SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size());
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask);
}
if (Values.size() > 2) {
// Expand into a number of unpckl*.
// e.g. for v4f32
// Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
// Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
SDOperand UnpckMask = getUnpacklMask(NumElems, DAG);
for (unsigned i = 0; i < NumElems; ++i)
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
NumElems >>= 1;
while (NumElems != 0) {
for (unsigned i = 0; i < NumElems; ++i)
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
UnpckMask);
NumElems >>= 1;
}
return V[0];
}
return SDOperand();
}
SDOperand
X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
SDOperand V1 = Op.getOperand(0);
SDOperand V2 = Op.getOperand(1);
SDOperand PermMask = Op.getOperand(2);
MVT::ValueType VT = Op.getValueType();
unsigned NumElems = PermMask.getNumOperands();
bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
Evan Cheng
committed
bool V1IsSplat = false;
bool V2IsSplat = false;
if (isUndefShuffle(Op.Val))
return DAG.getNode(ISD::UNDEF, VT);
if (isSplatMask(PermMask.Val)) {
if (NumElems <= 4) return Op;
// Promote it to a v4i32 splat.
if (X86::isMOVLMask(PermMask.Val))
return (V1IsUndef) ? V2 : Op;
if (X86::isMOVSHDUPMask(PermMask.Val) ||
X86::isMOVSLDUPMask(PermMask.Val) ||
X86::isMOVHLPSMask(PermMask.Val) ||
X86::isMOVHPMask(PermMask.Val) ||
X86::isMOVLPMask(PermMask.Val))
return Op;
if (ShouldXformToMOVHLPS(PermMask.Val) ||
ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val))
return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
bool Commuted = false;
V1IsSplat = isSplatVector(V1.Val);
V2IsSplat = isSplatVector(V2.Val);
if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
std::swap(V1IsSplat, V2IsSplat);
std::swap(V1IsUndef, V2IsUndef);
Commuted = true;
}
if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) {
if (V2IsUndef) return V1;
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (V2IsSplat) {
// V2 is a splat, so the mask may be malformed. That is, it may point
// to any V2 element. The instruction selectior won't like this. Get
// a corrected mask and commute to form a proper MOVS{S|D}.
SDOperand NewMask = getMOVLMask(NumElems, DAG);
if (NewMask.Val != PermMask.Val)
Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
Evan Cheng
committed
}
Evan Cheng
committed
}
Evan Cheng
committed
Evan Cheng
committed
if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
X86::isUNPCKLMask(PermMask.Val) ||
X86::isUNPCKHMask(PermMask.Val))
return Op;
if (V2IsSplat) {
// Normalize mask so all entries that point to V2 points to its first
// element then try to match unpck{h|l} again. If match, return a
// new vector_shuffle with the corrected mask.
SDOperand NewMask = NormalizeMask(PermMask, DAG);
if (NewMask.Val != PermMask.Val) {
if (X86::isUNPCKLMask(PermMask.Val, true)) {
SDOperand NewMask = getUnpacklMask(NumElems, DAG);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
} else if (X86::isUNPCKHMask(PermMask.Val, true)) {
SDOperand NewMask = getUnpackhMask(NumElems, DAG);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
}
}
}
// Normalize the node to match x86 shuffle ops if needed
if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val))
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (Commuted) {
// Commute is back and try unpck* again.
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
X86::isUNPCKLMask(PermMask.Val) ||
X86::isUNPCKHMask(PermMask.Val))
return Op;
}
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
// If VT is integer, try PSHUF* first, then SHUFP*.
if (MVT::isInteger(VT)) {
if (X86::isPSHUFDMask(PermMask.Val) ||
X86::isPSHUFHWMask(PermMask.Val) ||
X86::isPSHUFLWMask(PermMask.Val)) {
if (V2.getOpcode() != ISD::UNDEF)
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
return Op;
}
if (X86::isSHUFPMask(PermMask.Val))
return Op;
// Handle v8i16 shuffle high / low shuffle node pair.
if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
std::vector<SDOperand> MaskVec;
for (unsigned i = 0; i != 4; ++i)
MaskVec.push_back(PermMask.getOperand(i));
for (unsigned i = 4; i != 8; ++i)
MaskVec.push_back(DAG.getConstant(i, BaseVT));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size());
V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
MaskVec.clear();
for (unsigned i = 0; i != 4; ++i)
MaskVec.push_back(DAG.getConstant(i, BaseVT));
for (unsigned i = 4; i != 8; ++i)
MaskVec.push_back(PermMask.getOperand(i));
Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size());
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
}
} else {
// Floating point cases in the other order.
if (X86::isSHUFPMask(PermMask.Val))
return Op;
if (X86::isPSHUFDMask(PermMask.Val) ||
X86::isPSHUFHWMask(PermMask.Val) ||
X86::isPSHUFLWMask(PermMask.Val)) {
if (V2.getOpcode() != ISD::UNDEF)
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
return Op;
}
}
if (NumElems == 4) {
MVT::ValueType MaskVT = PermMask.getValueType();
MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT);
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
std::vector<std::pair<int, int> > Locs;
Locs.reserve(NumElems);
std::vector<SDOperand> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
std::vector<SDOperand> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
unsigned NumHi = 0;
unsigned NumLo = 0;
// If no more than two elements come from either vector. This can be
// implemented with two shuffles. First shuffle gather the elements.
// The second shuffle, which takes the first shuffle as both of its
// vector operands, put the elements into the right order.
for (unsigned i = 0; i != NumElems; ++i) {
SDOperand Elt = PermMask.getOperand(i);
if (Elt.getOpcode() == ISD::UNDEF) {
Locs[i] = std::make_pair(-1, -1);
} else {
unsigned Val = cast<ConstantSDNode>(Elt)->getValue();
if (Val < NumElems) {
Locs[i] = std::make_pair(0, NumLo);
Mask1[NumLo] = Elt;
NumLo++;
} else {
Locs[i] = std::make_pair(1, NumHi);
if (2+NumHi < NumElems)
Mask1[2+NumHi] = Elt;
NumHi++;
}
}
}
if (NumLo <= 2 && NumHi <= 2) {
V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&Mask1[0], Mask1.size()));
for (unsigned i = 0; i != NumElems; ++i) {
if (Locs[i].first == -1)
continue;
else {
unsigned Idx = (i < NumElems/2) ? 0 : NumElems;
Idx += Locs[i].first * (NumElems/2) + Locs[i].second;
Mask2[i] = DAG.getConstant(Idx, MaskEVT);
}
}
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&Mask2[0], Mask2.size()));
}
// Break it into (shuffle shuffle_hi, shuffle_lo).
Locs.clear();
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
std::vector<SDOperand> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
std::vector<SDOperand> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
std::vector<SDOperand> *MaskPtr = &LoMask;
unsigned MaskIdx = 0;
unsigned LoIdx = 0;
unsigned HiIdx = NumElems/2;
for (unsigned i = 0; i != NumElems; ++i) {
if (i == NumElems/2) {
MaskPtr = &HiMask;
MaskIdx = 1;
LoIdx = 0;
HiIdx = NumElems/2;
}
SDOperand Elt = PermMask.getOperand(i);
if (Elt.getOpcode() == ISD::UNDEF) {
Locs[i] = std::make_pair(-1, -1);
} else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) {
Locs[i] = std::make_pair(MaskIdx, LoIdx);
(*MaskPtr)[LoIdx] = Elt;
LoIdx++;
} else {
Locs[i] = std::make_pair(MaskIdx, HiIdx);
(*MaskPtr)[HiIdx] = Elt;
HiIdx++;
}
}
SDOperand LoShuffle =
DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&LoMask[0], LoMask.size()));
SDOperand HiShuffle =
DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&HiMask[0], HiMask.size()));
std::vector<SDOperand> MaskOps;
for (unsigned i = 0; i != NumElems; ++i) {
if (Locs[i].first == -1) {
MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
} else {
unsigned Idx = Locs[i].first * NumElems + Locs[i].second;
MaskOps.push_back(DAG.getConstant(Idx, MaskEVT));
}
}
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskOps[0], MaskOps.size()));
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
}
return SDOperand();
}
SDOperand
X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
if (!isa<ConstantSDNode>(Op.getOperand(1)))
return SDOperand();
MVT::ValueType VT = Op.getValueType();
// TODO: handle v16i8.
if (MVT::getSizeInBits(VT) == 16) {
// Transform it so it match pextrw which produces a 32-bit result.
MVT::ValueType EVT = (MVT::ValueType)(VT+1);
SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
Op.getOperand(0), Op.getOperand(1));
SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
} else if (MVT::getSizeInBits(VT) == 32) {
SDOperand Vec = Op.getOperand(0);
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return Op;
// SHUFPS the element to the lowest double word, then movss.
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
std::vector<SDOperand> IdxVec;
IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT)));
IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
DAG.getConstant(0, getPointerTy()));
} else if (MVT::getSizeInBits(VT) == 64) {
SDOperand Vec = Op.getOperand(0);
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return Op;
// UNPCKHPD the element to the lowest double word, then movsd.
// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
// to a f64mem, the whole operation is folded into a single MOVHPDmr.
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
std::vector<SDOperand> IdxVec;
IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT)));
IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
DAG.getConstant(0, getPointerTy()));
}
return SDOperand();
}
SDOperand
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
Evan Cheng
committed
// Transform it so it match pinsrw which expects a 16-bit value in a GR32
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
// as its second argument.
MVT::ValueType VT = Op.getValueType();
MVT::ValueType BaseVT = MVT::getVectorBaseType(VT);
SDOperand N0 = Op.getOperand(0);
SDOperand N1 = Op.getOperand(1);
SDOperand N2 = Op.getOperand(2);
if (MVT::getSizeInBits(BaseVT) == 16) {
if (N1.getValueType() != MVT::i32)
N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32);
return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
} else if (MVT::getSizeInBits(BaseVT) == 32) {
unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
if (Idx == 0) {
// Use a movss.
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
std::vector<SDOperand> MaskVec;
MaskVec.push_back(DAG.getConstant(4, BaseVT));
for (unsigned i = 1; i <= 3; ++i)
MaskVec.push_back(DAG.getConstant(i, BaseVT));
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size()));
} else {
// Use two pinsrw instructions to insert a 32 bit value.
Idx <<= 1;
if (MVT::isFloatingPoint(N1.getValueType())) {
if (ISD::isNON_EXTLoad(N1.Val)) {
Evan Cheng
committed
// Just load directly from f32mem to GR32.
LoadSDNode *LD = cast<LoadSDNode>(N1);
N1 = DAG.getLoad(MVT::i32, LD->getChain(), LD->getBasePtr(),
LD->getSrcValue(), LD->getSrcValueOffset());
} else {
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1);
N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1);
N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1,
DAG.getConstant(0, getPointerTy()));
}
}
N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0);
N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
DAG.getConstant(Idx, getPointerTy()));
N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8));
N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
DAG.getConstant(Idx+1, getPointerTy()));
return DAG.getNode(ISD::BIT_CONVERT, VT, N0);
}
}
return SDOperand();
}
SDOperand
X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt);
}
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
// one of the above mentioned nodes. It has to be wrapped because otherwise
// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
// be used to form addressing mode. These wrapped nodes will be selected
// into MOV32ri.
SDOperand
X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(),
getPointerTy(),
CP->getAlignment());
Evan Cheng
committed
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
if (Subtarget->isTargetDarwin()) {
// With PIC, the address is actually $g + Offset.
if (!Subtarget->is64Bit() &&
getTargetMachine().getRelocationModel() == Reloc::PIC_)
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result);
}
return Result;
}
SDOperand
X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy());
Evan Cheng
committed
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
if (Subtarget->isTargetDarwin()) {
// With PIC, the address is actually $g + Offset.
if (!Subtarget->is64Bit() &&
getTargetMachine().getRelocationModel() == Reloc::PIC_)
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
// For Darwin, external and weak symbols are indirect, so we want to load
// the value at address GV, not the value of GV itself. This means that
// the GlobalAddress must be in the base or index register of the address,
// not the GV offset field.
if (getTargetMachine().getRelocationModel() != Reloc::Static &&
Subtarget->GVRequiresExtraLoad(GV, false))
Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0);
} else if (Subtarget->GVRequiresExtraLoad(GV, false)) {
Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0);
}
return Result;
}
SDOperand
X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) {
const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
Evan Cheng
committed
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
if (Subtarget->isTargetDarwin()) {
// With PIC, the address is actually $g + Offset.
if (!Subtarget->is64Bit() &&
getTargetMachine().getRelocationModel() == Reloc::PIC_)
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
}
return Result;
}
SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
"Not an i64 shift!");
bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
SDOperand ShOpLo = Op.getOperand(0);
SDOperand ShOpHi = Op.getOperand(1);
SDOperand ShAmt = Op.getOperand(2);
SDOperand Tmp1 = isSRA ?
DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) :
DAG.getConstant(0, MVT::i32);
SDOperand Tmp2, Tmp3;
if (Op.getOpcode() == ISD::SHL_PARTS) {
Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
} else {
Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
}
const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
DAG.getConstant(32, MVT::i8));
SDOperand COps[]={DAG.getEntryNode(), AndNode, DAG.getConstant(0, MVT::i8)};
SDOperand InFlag = DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1);
SDOperand Hi, Lo;
SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8);
VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag);
SmallVector<SDOperand, 4> Ops;
if (Op.getOpcode() == ISD::SHL_PARTS) {
Ops.push_back(Tmp2);
Ops.push_back(Tmp3);
Ops.push_back(CC);
Ops.push_back(InFlag);
Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
InFlag = Hi.getValue(1);
Ops.clear();
Ops.push_back(Tmp3);
Ops.push_back(Tmp1);
Ops.push_back(CC);
Ops.push_back(InFlag);
Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
} else {
Ops.push_back(Tmp2);
Ops.push_back(Tmp3);
Ops.push_back(CC);
Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
InFlag = Lo.getValue(1);
Ops.clear();
Ops.push_back(Tmp3);
Ops.push_back(Tmp1);
Ops.push_back(CC);
Ops.push_back(InFlag);
Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
}
VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32);
Ops.clear();
Ops.push_back(Lo);
Ops.push_back(Hi);
return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
}
SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
Op.getOperand(0).getValueType() >= MVT::i16 &&
"Unknown SINT_TO_FP to lower!");
SDOperand Result;
MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
unsigned Size = MVT::getSizeInBits(SrcVT)/8;
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0),
StackSlot, NULL, 0);
// Build the FILD
std::vector<MVT::ValueType> Tys;
Tys.push_back(MVT::f64);
Tys.push_back(MVT::Other);
if (X86ScalarSSE) Tys.push_back(MVT::Flag);
std::vector<SDOperand> Ops;
Ops.push_back(Chain);
Ops.push_back(StackSlot);
Ops.push_back(DAG.getValueType(SrcVT));
Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD,
Tys, &Ops[0], Ops.size());
if (X86ScalarSSE) {
Chain = Result.getValue(1);
SDOperand InFlag = Result.getValue(2);
// FIXME: Currently the FST is flagged to the FILD_FLAG. This
// shouldn't be necessary except that RFP cannot be live across
// multiple blocks. When stackifier is fixed, they can be uncoupled.
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);