Newer
Older
static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1,
SDOperand &V2, SDOperand &Mask,
SelectionDAG &DAG) {
MVT::ValueType VT = Op.getValueType();
MVT::ValueType MaskVT = Mask.getValueType();
MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT);
unsigned NumElems = Mask.getNumOperands();
std::vector<SDOperand> MaskVec;
for (unsigned i = 0; i != NumElems; ++i) {
SDOperand Arg = Mask.getOperand(i);
if (Arg.getOpcode() == ISD::UNDEF) {
MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT));
continue;
}
assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
if (Val < NumElems)
MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
else
MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
}
std::swap(V1, V2);
Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
/// match movhlps. The lower half elements should come from upper half of
/// V1 (and in order), and the upper half elements should come from the upper
/// half of V2 (and in order).
static bool ShouldXformToMOVHLPS(SDNode *Mask) {
unsigned NumElems = Mask->getNumOperands();
if (NumElems != 4)
return false;
for (unsigned i = 0, e = 2; i != e; ++i)
if (!isUndefOrEqual(Mask->getOperand(i), i+2))
return false;
for (unsigned i = 2; i != 4; ++i)
if (!isUndefOrEqual(Mask->getOperand(i), i+4))
return false;
return true;
}
/// isScalarLoadToVector - Returns true if the node is a scalar load that
/// is promoted to a vector.
static inline bool isScalarLoadToVector(SDNode *N) {
if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
N = N->getOperand(0).Val;
return ISD::isNON_EXTLoad(N);
/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
/// match movlp{s|d}. The lower half elements should come from lower half of
/// V1 (and in order), and the upper half elements should come from the upper
/// half of V2 (and in order). And since V1 will become the source of the
/// MOVLP, it must be either a vector load or a scalar load to vector.
static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) {
if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
return false;
// Is V2 is a vector load, don't do this transformation. We will try to use
// load folding shufps op.
if (ISD::isNON_EXTLoad(V2))
return false;
unsigned NumElems = Mask->getNumOperands();
if (NumElems != 2 && NumElems != 4)
return false;
for (unsigned i = 0, e = NumElems/2; i != e; ++i)
if (!isUndefOrEqual(Mask->getOperand(i), i))
return false;
for (unsigned i = NumElems/2; i != NumElems; ++i)
if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems))
return false;
return true;
/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
/// all the same.
static bool isSplatVector(SDNode *N) {
if (N->getOpcode() != ISD::BUILD_VECTOR)
return false;
SDOperand SplatValue = N->getOperand(0);
for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
if (N->getOperand(i) != SplatValue)
return false;
return true;
}
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
/// to an undef.
static bool isUndefShuffle(SDNode *N) {
if (N->getOpcode() != ISD::BUILD_VECTOR)
return false;
SDOperand V1 = N->getOperand(0);
SDOperand V2 = N->getOperand(1);
SDOperand Mask = N->getOperand(2);
unsigned NumElems = Mask.getNumOperands();
for (unsigned i = 0; i != NumElems; ++i) {
SDOperand Arg = Mask.getOperand(i);
if (Arg.getOpcode() != ISD::UNDEF) {
unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
if (Val < NumElems && V1.getOpcode() != ISD::UNDEF)
return false;
else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF)
return false;
}
}
return true;
}
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
/// that point to V2 points to its first element.
static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) {
assert(Mask.getOpcode() == ISD::BUILD_VECTOR);
bool Changed = false;
std::vector<SDOperand> MaskVec;
unsigned NumElems = Mask.getNumOperands();
for (unsigned i = 0; i != NumElems; ++i) {
SDOperand Arg = Mask.getOperand(i);
if (Arg.getOpcode() != ISD::UNDEF) {
unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
if (Val > NumElems) {
Arg = DAG.getConstant(NumElems, Arg.getValueType());
Changed = true;
}
}
MaskVec.push_back(Arg);
}
if (Changed)
Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
&MaskVec[0], MaskVec.size());
return Mask;
}
/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
/// operation of specified width.
static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) {
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
std::vector<SDOperand> MaskVec;
MaskVec.push_back(DAG.getConstant(NumElems, BaseVT));
for (unsigned i = 1; i != NumElems; ++i)
MaskVec.push_back(DAG.getConstant(i, BaseVT));
return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
}
/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation
/// of specified width.
static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) {
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
std::vector<SDOperand> MaskVec;
for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
MaskVec.push_back(DAG.getConstant(i, BaseVT));
MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT));
}
return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation
/// of specified width.
static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) {
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
unsigned Half = NumElems/2;
std::vector<SDOperand> MaskVec;
for (unsigned i = 0; i != Half; ++i) {
MaskVec.push_back(DAG.getConstant(i + Half, BaseVT));
MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT));
}
return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
}
/// getZeroVector - Returns a vector of specified type with all zero elements.
///
static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
assert(MVT::isVector(VT) && "Expected a vector type");
unsigned NumElems = getVectorNumElements(VT);
MVT::ValueType EVT = MVT::getVectorBaseType(VT);
bool isFP = MVT::isFloatingPoint(EVT);
SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT);
std::vector<SDOperand> ZeroVec(NumElems, Zero);
return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size());
}
/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
///
static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
SDOperand V1 = Op.getOperand(0);
SDOperand Mask = Op.getOperand(2);
MVT::ValueType VT = Op.getValueType();
unsigned NumElems = Mask.getNumOperands();
Mask = getUnpacklMask(NumElems, DAG);
while (NumElems != 4) {
V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
NumElems >>= 1;
}
V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
Mask = getZeroVector(MaskVT, DAG);
SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
}
/// isZeroNode - Returns true if Elt is a constant zero or a floating point
/// constant +0.0.
static inline bool isZeroNode(SDOperand Elt) {
return ((isa<ConstantSDNode>(Elt) &&
cast<ConstantSDNode>(Elt)->getValue() == 0) ||
(isa<ConstantFPSDNode>(Elt) &&
cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0)));
}
/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
/// vector and zero or undef vector.
static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
unsigned NumElems, unsigned Idx,
bool isZero, SelectionDAG &DAG) {
SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT);
SDOperand Zero = DAG.getConstant(0, EVT);
std::vector<SDOperand> MaskVec(NumElems, Zero);
MaskVec[Idx] = DAG.getConstant(NumElems, EVT);
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size());
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
}
/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
///
static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
if (NumNonZero > 8)
return SDOperand();
SDOperand V(0, 0);
bool First = true;
for (unsigned i = 0; i < 16; ++i) {
bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
if (ThisIsNonZero && First) {
if (NumZero)
V = getZeroVector(MVT::v8i16, DAG);
else
V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
First = false;
}
if ((i & 1) != 0) {
SDOperand ThisElt(0, 0), LastElt(0, 0);
bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
if (LastIsNonZero) {
LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1));
}
if (ThisIsNonZero) {
ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i));
ThisElt = DAG.getNode(ISD::SHL, MVT::i16,
ThisElt, DAG.getConstant(8, MVT::i8));
if (LastIsNonZero)
ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt);
} else
ThisElt = LastElt;
if (ThisElt.Val)
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
}
}
return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V);
}
/// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16.
///
static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
if (NumNonZero > 4)
return SDOperand();
SDOperand V(0, 0);
bool First = true;
for (unsigned i = 0; i < 8; ++i) {
bool isNonZero = (NonZeros & (1 << i)) != 0;
if (isNonZero) {
if (First) {
if (NumZero)
V = getZeroVector(MVT::v8i16, DAG);
else
V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
First = false;
}
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
}
}
return V;
}
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
SDOperand
X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
// All zero's are handled with pxor.
if (ISD::isBuildVectorAllZeros(Op.Val))
return Op;
// All one's are handled with pcmpeqd.
if (ISD::isBuildVectorAllOnes(Op.Val))
return Op;
MVT::ValueType VT = Op.getValueType();
MVT::ValueType EVT = MVT::getVectorBaseType(VT);
unsigned EVTBits = MVT::getSizeInBits(EVT);
unsigned NumElems = Op.getNumOperands();
unsigned NumZero = 0;
unsigned NumNonZero = 0;
unsigned NonZeros = 0;
std::set<SDOperand> Values;
for (unsigned i = 0; i < NumElems; ++i) {
SDOperand Elt = Op.getOperand(i);
if (Elt.getOpcode() != ISD::UNDEF) {
Values.insert(Elt);
if (isZeroNode(Elt))
NumZero++;
else {
NonZeros |= (1 << i);
NumNonZero++;
}
}
}
if (NumNonZero == 0)
// Must be a mix of zero and undef. Return a zero vector.
return getZeroVector(VT, DAG);
// Splat is obviously ok. Let legalizer expand it to a shuffle.
if (Values.size() == 1)
return SDOperand();
// Special case for single non-zero element.
unsigned Idx = CountTrailingZeros_32(NonZeros);
SDOperand Item = Op.getOperand(Idx);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
if (Idx == 0)
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
NumZero > 0, DAG);
if (EVTBits == 32) {
// Turn it into a shuffle of zero and zero-extended scalar to vector.
Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0,
DAG);
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT);
std::vector<SDOperand> MaskVec;
for (unsigned i = 0; i < NumElems; i++)
MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size());
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item,
DAG.getNode(ISD::UNDEF, VT), Mask);
}
}
Evan Cheng
committed
// Let legalizer expand 2-wide build_vector's.
if (EVTBits == 64)
return SDOperand();
// If element VT is < 32 bits, convert it to inserts into a zero vector.
if (EVTBits == 8) {
SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
*this);
if (V.Val) return V;
}
if (EVTBits == 16) {
SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
*this);
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
if (V.Val) return V;
}
// If element VT is == 32 bits, turn it into a number of shuffles.
std::vector<SDOperand> V(NumElems);
if (NumElems == 4 && NumZero > 0) {
for (unsigned i = 0; i < 4; ++i) {
bool isZero = !(NonZeros & (1 << i));
if (isZero)
V[i] = getZeroVector(VT, DAG);
else
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
}
for (unsigned i = 0; i < 2; ++i) {
switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
default: break;
case 0:
V[i] = V[i*2]; // Must be a zero vector.
break;
case 1:
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2],
getMOVLMask(NumElems, DAG));
break;
case 2:
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
getMOVLMask(NumElems, DAG));
break;
case 3:
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
getUnpacklMask(NumElems, DAG));
break;
}
}
Evan Cheng
committed
// Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd)
// clears the upper bits.
// FIXME: we can do the same for v4f32 case when we know both parts of
// the lower half come from scalar_to_vector (loadf32). We should do
// that in post legalizer dag combiner with target specific hooks.
if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0)
return V[0];
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT);
std::vector<SDOperand> MaskVec;
bool Reverse = (NonZeros & 0x3) == 2;
for (unsigned i = 0; i < 2; ++i)
if (Reverse)
MaskVec.push_back(DAG.getConstant(1-i, EVT));
else
MaskVec.push_back(DAG.getConstant(i, EVT));
Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
for (unsigned i = 0; i < 2; ++i)
if (Reverse)
MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT));
else
MaskVec.push_back(DAG.getConstant(i+NumElems, EVT));
SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size());
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask);
}
if (Values.size() > 2) {
// Expand into a number of unpckl*.
// e.g. for v4f32
// Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
// Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
SDOperand UnpckMask = getUnpacklMask(NumElems, DAG);
for (unsigned i = 0; i < NumElems; ++i)
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
NumElems >>= 1;
while (NumElems != 0) {
for (unsigned i = 0; i < NumElems; ++i)
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
UnpckMask);
NumElems >>= 1;
}
return V[0];
}
return SDOperand();
}
SDOperand
X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
SDOperand V1 = Op.getOperand(0);
SDOperand V2 = Op.getOperand(1);
SDOperand PermMask = Op.getOperand(2);
MVT::ValueType VT = Op.getValueType();
unsigned NumElems = PermMask.getNumOperands();
bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
Evan Cheng
committed
bool V1IsSplat = false;
bool V2IsSplat = false;
if (isUndefShuffle(Op.Val))
return DAG.getNode(ISD::UNDEF, VT);
if (isSplatMask(PermMask.Val)) {
if (NumElems <= 4) return Op;
// Promote it to a v4i32 splat.
if (X86::isMOVLMask(PermMask.Val))
return (V1IsUndef) ? V2 : Op;
if (X86::isMOVSHDUPMask(PermMask.Val) ||
X86::isMOVSLDUPMask(PermMask.Val) ||
X86::isMOVHLPSMask(PermMask.Val) ||
X86::isMOVHPMask(PermMask.Val) ||
X86::isMOVLPMask(PermMask.Val))
return Op;
if (ShouldXformToMOVHLPS(PermMask.Val) ||
ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val))
return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
bool Commuted = false;
V1IsSplat = isSplatVector(V1.Val);
V2IsSplat = isSplatVector(V2.Val);
if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
std::swap(V1IsSplat, V2IsSplat);
std::swap(V1IsUndef, V2IsUndef);
Commuted = true;
}
if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) {
if (V2IsUndef) return V1;
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (V2IsSplat) {
// V2 is a splat, so the mask may be malformed. That is, it may point
// to any V2 element. The instruction selectior won't like this. Get
// a corrected mask and commute to form a proper MOVS{S|D}.
SDOperand NewMask = getMOVLMask(NumElems, DAG);
if (NewMask.Val != PermMask.Val)
Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
Evan Cheng
committed
}
Evan Cheng
committed
}
Evan Cheng
committed
Evan Cheng
committed
if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
X86::isUNPCKLMask(PermMask.Val) ||
X86::isUNPCKHMask(PermMask.Val))
return Op;
if (V2IsSplat) {
// Normalize mask so all entries that point to V2 points to its first
// element then try to match unpck{h|l} again. If match, return a
// new vector_shuffle with the corrected mask.
SDOperand NewMask = NormalizeMask(PermMask, DAG);
if (NewMask.Val != PermMask.Val) {
if (X86::isUNPCKLMask(PermMask.Val, true)) {
SDOperand NewMask = getUnpacklMask(NumElems, DAG);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
} else if (X86::isUNPCKHMask(PermMask.Val, true)) {
SDOperand NewMask = getUnpackhMask(NumElems, DAG);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
}
}
}
// Normalize the node to match x86 shuffle ops if needed
if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val))
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (Commuted) {
// Commute is back and try unpck* again.
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
X86::isUNPCKLMask(PermMask.Val) ||
X86::isUNPCKHMask(PermMask.Val))
return Op;
}
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
// If VT is integer, try PSHUF* first, then SHUFP*.
if (MVT::isInteger(VT)) {
if (X86::isPSHUFDMask(PermMask.Val) ||
X86::isPSHUFHWMask(PermMask.Val) ||
X86::isPSHUFLWMask(PermMask.Val)) {
if (V2.getOpcode() != ISD::UNDEF)
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
return Op;
}
if (X86::isSHUFPMask(PermMask.Val))
return Op;
// Handle v8i16 shuffle high / low shuffle node pair.
if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
std::vector<SDOperand> MaskVec;
for (unsigned i = 0; i != 4; ++i)
MaskVec.push_back(PermMask.getOperand(i));
for (unsigned i = 4; i != 8; ++i)
MaskVec.push_back(DAG.getConstant(i, BaseVT));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size());
V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
MaskVec.clear();
for (unsigned i = 0; i != 4; ++i)
MaskVec.push_back(DAG.getConstant(i, BaseVT));
for (unsigned i = 4; i != 8; ++i)
MaskVec.push_back(PermMask.getOperand(i));
Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size());
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
}
} else {
// Floating point cases in the other order.
if (X86::isSHUFPMask(PermMask.Val))
return Op;
if (X86::isPSHUFDMask(PermMask.Val) ||
X86::isPSHUFHWMask(PermMask.Val) ||
X86::isPSHUFLWMask(PermMask.Val)) {
if (V2.getOpcode() != ISD::UNDEF)
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
return Op;
}
}
if (NumElems == 4) {
MVT::ValueType MaskVT = PermMask.getValueType();
MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT);
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
std::vector<std::pair<int, int> > Locs;
Locs.reserve(NumElems);
std::vector<SDOperand> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
std::vector<SDOperand> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
unsigned NumHi = 0;
unsigned NumLo = 0;
// If no more than two elements come from either vector. This can be
// implemented with two shuffles. First shuffle gather the elements.
// The second shuffle, which takes the first shuffle as both of its
// vector operands, put the elements into the right order.
for (unsigned i = 0; i != NumElems; ++i) {
SDOperand Elt = PermMask.getOperand(i);
if (Elt.getOpcode() == ISD::UNDEF) {
Locs[i] = std::make_pair(-1, -1);
} else {
unsigned Val = cast<ConstantSDNode>(Elt)->getValue();
if (Val < NumElems) {
Locs[i] = std::make_pair(0, NumLo);
Mask1[NumLo] = Elt;
NumLo++;
} else {
Locs[i] = std::make_pair(1, NumHi);
if (2+NumHi < NumElems)
Mask1[2+NumHi] = Elt;
NumHi++;
}
}
}
if (NumLo <= 2 && NumHi <= 2) {
V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&Mask1[0], Mask1.size()));
for (unsigned i = 0; i != NumElems; ++i) {
if (Locs[i].first == -1)
continue;
else {
unsigned Idx = (i < NumElems/2) ? 0 : NumElems;
Idx += Locs[i].first * (NumElems/2) + Locs[i].second;
Mask2[i] = DAG.getConstant(Idx, MaskEVT);
}
}
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&Mask2[0], Mask2.size()));
}
// Break it into (shuffle shuffle_hi, shuffle_lo).
Locs.clear();
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
std::vector<SDOperand> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
std::vector<SDOperand> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
std::vector<SDOperand> *MaskPtr = &LoMask;
unsigned MaskIdx = 0;
unsigned LoIdx = 0;
unsigned HiIdx = NumElems/2;
for (unsigned i = 0; i != NumElems; ++i) {
if (i == NumElems/2) {
MaskPtr = &HiMask;
MaskIdx = 1;
LoIdx = 0;
HiIdx = NumElems/2;
}
SDOperand Elt = PermMask.getOperand(i);
if (Elt.getOpcode() == ISD::UNDEF) {
Locs[i] = std::make_pair(-1, -1);
} else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) {
Locs[i] = std::make_pair(MaskIdx, LoIdx);
(*MaskPtr)[LoIdx] = Elt;
LoIdx++;
} else {
Locs[i] = std::make_pair(MaskIdx, HiIdx);
(*MaskPtr)[HiIdx] = Elt;
HiIdx++;
}
}
SDOperand LoShuffle =
DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&LoMask[0], LoMask.size()));
SDOperand HiShuffle =
DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&HiMask[0], HiMask.size()));
std::vector<SDOperand> MaskOps;
for (unsigned i = 0; i != NumElems; ++i) {
if (Locs[i].first == -1) {
MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
} else {
unsigned Idx = Locs[i].first * NumElems + Locs[i].second;
MaskOps.push_back(DAG.getConstant(Idx, MaskEVT));
}
}
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskOps[0], MaskOps.size()));
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
}
return SDOperand();
}
SDOperand
X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
if (!isa<ConstantSDNode>(Op.getOperand(1)))
return SDOperand();
MVT::ValueType VT = Op.getValueType();
// TODO: handle v16i8.
if (MVT::getSizeInBits(VT) == 16) {
// Transform it so it match pextrw which produces a 32-bit result.
MVT::ValueType EVT = (MVT::ValueType)(VT+1);
SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
Op.getOperand(0), Op.getOperand(1));
SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
} else if (MVT::getSizeInBits(VT) == 32) {
SDOperand Vec = Op.getOperand(0);
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return Op;
// SHUFPS the element to the lowest double word, then movss.
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
std::vector<SDOperand> IdxVec;
IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT)));
IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
DAG.getConstant(0, getPointerTy()));
} else if (MVT::getSizeInBits(VT) == 64) {
SDOperand Vec = Op.getOperand(0);
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return Op;
// UNPCKHPD the element to the lowest double word, then movsd.
// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
// to a f64mem, the whole operation is folded into a single MOVHPDmr.
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
std::vector<SDOperand> IdxVec;
IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT)));
IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
DAG.getConstant(0, getPointerTy()));
}
return SDOperand();
}
SDOperand
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
Evan Cheng
committed
// Transform it so it match pinsrw which expects a 16-bit value in a GR32
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
// as its second argument.
MVT::ValueType VT = Op.getValueType();
MVT::ValueType BaseVT = MVT::getVectorBaseType(VT);
SDOperand N0 = Op.getOperand(0);
SDOperand N1 = Op.getOperand(1);
SDOperand N2 = Op.getOperand(2);
if (MVT::getSizeInBits(BaseVT) == 16) {
if (N1.getValueType() != MVT::i32)
N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32);
return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
} else if (MVT::getSizeInBits(BaseVT) == 32) {
unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
if (Idx == 0) {
// Use a movss.
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
std::vector<SDOperand> MaskVec;
MaskVec.push_back(DAG.getConstant(4, BaseVT));
for (unsigned i = 1; i <= 3; ++i)
MaskVec.push_back(DAG.getConstant(i, BaseVT));
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size()));
} else {
// Use two pinsrw instructions to insert a 32 bit value.
Idx <<= 1;
if (MVT::isFloatingPoint(N1.getValueType())) {
if (ISD::isNON_EXTLoad(N1.Val)) {
Evan Cheng
committed
// Just load directly from f32mem to GR32.
LoadSDNode *LD = cast<LoadSDNode>(N1);
N1 = DAG.getLoad(MVT::i32, LD->getChain(), LD->getBasePtr(),
LD->getSrcValue(), LD->getSrcValueOffset());
} else {
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1);
N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1);
N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1,
DAG.getConstant(0, getPointerTy()));
}
}
N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0);
N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
DAG.getConstant(Idx, getPointerTy()));
N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8));
N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
DAG.getConstant(Idx+1, getPointerTy()));
return DAG.getNode(ISD::BIT_CONVERT, VT, N0);
}
}
return SDOperand();
}
SDOperand
X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt);
}
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
// one of the above mentioned nodes. It has to be wrapped because otherwise
// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
// be used to form addressing mode. These wrapped nodes will be selected
// into MOV32ri.
SDOperand
X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(),
getPointerTy(),
CP->getAlignment());
Evan Cheng
committed
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
if (Subtarget->isTargetDarwin()) {
// With PIC, the address is actually $g + Offset.
if (!Subtarget->is64Bit() &&
getTargetMachine().getRelocationModel() == Reloc::PIC_)
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result);
}
return Result;
}
SDOperand
X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy());
Evan Cheng
committed
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
if (Subtarget->isTargetDarwin()) {
// With PIC, the address is actually $g + Offset.
if (!Subtarget->is64Bit() &&
getTargetMachine().getRelocationModel() == Reloc::PIC_)
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
Anton Korobeynikov
committed
// For Darwin & Mingw32, external and weak symbols are indirect, so we want to
// load the value at address GV, not the value of GV itself. This means that
// the GlobalAddress must be in the base or index register of the address, not
// the GV offset field. Platform check is inside GVRequiresExtraLoad() call
if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false))
Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0);
return Result;
}
SDOperand
X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) {
const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
Evan Cheng
committed
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
if (Subtarget->isTargetDarwin()) {
// With PIC, the address is actually $g + Offset.
if (!Subtarget->is64Bit() &&
getTargetMachine().getRelocationModel() == Reloc::PIC_)
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
}
return Result;
}
SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
"Not an i64 shift!");
bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
SDOperand ShOpLo = Op.getOperand(0);
SDOperand ShOpHi = Op.getOperand(1);
SDOperand ShAmt = Op.getOperand(2);
SDOperand Tmp1 = isSRA ?
DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) :
DAG.getConstant(0, MVT::i32);
SDOperand Tmp2, Tmp3;
if (Op.getOpcode() == ISD::SHL_PARTS) {
Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
} else {
Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
}
const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
DAG.getConstant(32, MVT::i8));
SDOperand COps[]={DAG.getEntryNode(), AndNode, DAG.getConstant(0, MVT::i8)};
SDOperand InFlag = DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1);
SDOperand Hi, Lo;
SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8);
VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag);
SmallVector<SDOperand, 4> Ops;
if (Op.getOpcode() == ISD::SHL_PARTS) {
Ops.push_back(Tmp2);
Ops.push_back(Tmp3);
Ops.push_back(CC);
Ops.push_back(InFlag);
Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
InFlag = Hi.getValue(1);
Ops.clear();
Ops.push_back(Tmp3);
Ops.push_back(Tmp1);
Ops.push_back(CC);
Ops.push_back(InFlag);
Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
} else {
Ops.push_back(Tmp2);
Ops.push_back(Tmp3);
Ops.push_back(CC);
Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
InFlag = Lo.getValue(1);
Ops.clear();
Ops.push_back(Tmp3);
Ops.push_back(Tmp1);
Ops.push_back(CC);
Ops.push_back(InFlag);
Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
}
VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32);
Ops.clear();
Ops.push_back(Lo);
Ops.push_back(Hi);
return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
}
SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
Op.getOperand(0).getValueType() >= MVT::i16 &&
"Unknown SINT_TO_FP to lower!");
SDOperand Result;
MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
unsigned Size = MVT::getSizeInBits(SrcVT)/8;
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0),
StackSlot, NULL, 0);
// Build the FILD
std::vector<MVT::ValueType> Tys;
Tys.push_back(MVT::f64);
Tys.push_back(MVT::Other);
if (X86ScalarSSE) Tys.push_back(MVT::Flag);
std::vector<SDOperand> Ops;
Ops.push_back(Chain);
Ops.push_back(StackSlot);
Ops.push_back(DAG.getValueType(SrcVT));
Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD,
Tys, &Ops[0], Ops.size());