Newer
Older
SDOperand V(0, 0);
bool First = true;
for (unsigned i = 0; i < 16; ++i) {
bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
if (ThisIsNonZero && First) {
if (NumZero)
Evan Cheng
committed
V = getZeroVector(MVT::v8i16, true, DAG);
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
else
V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
First = false;
}
if ((i & 1) != 0) {
SDOperand ThisElt(0, 0), LastElt(0, 0);
bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
if (LastIsNonZero) {
LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1));
}
if (ThisIsNonZero) {
ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i));
ThisElt = DAG.getNode(ISD::SHL, MVT::i16,
ThisElt, DAG.getConstant(8, MVT::i8));
if (LastIsNonZero)
ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt);
} else
ThisElt = LastElt;
if (ThisElt.Val)
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
DAG.getIntPtrConstant(i/2));
}
}
return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V);
}
/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
///
static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
if (NumNonZero > 4)
return SDOperand();
SDOperand V(0, 0);
bool First = true;
for (unsigned i = 0; i < 8; ++i) {
bool isNonZero = (NonZeros & (1 << i)) != 0;
if (isNonZero) {
if (First) {
if (NumZero)
Evan Cheng
committed
V = getZeroVector(MVT::v8i16, true, DAG);
else
V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
First = false;
}
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
DAG.getIntPtrConstant(i));
}
}
return V;
}
/// getVShift - Return a vector logical shift node.
///
static SDOperand getVShift(bool isLeft, MVT VT, SDOperand SrcOp,
unsigned NumBits, SelectionDAG &DAG,
const TargetLowering &TLI) {
bool isMMX = VT.getSizeInBits() == 64;
MVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64;
unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
SrcOp = DAG.getNode(ISD::BIT_CONVERT, ShVT, SrcOp);
return DAG.getNode(ISD::BIT_CONVERT, VT,
DAG.getNode(Opc, ShVT, SrcOp,
DAG.getConstant(NumBits, TLI.getShiftAmountTy())));
}
SDOperand
X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
// All zero's are handled with pxor, all one's are handled with pcmpeqd.
if (ISD::isBuildVectorAllZeros(Op.Val) || ISD::isBuildVectorAllOnes(Op.Val)) {
// Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to
// 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
// eliminated on x86-32 hosts.
if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32)
return Op;
if (ISD::isBuildVectorAllOnes(Op.Val))
return getOnesVector(Op.getValueType(), DAG);
Evan Cheng
committed
return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG);
MVT VT = Op.getValueType();
MVT EVT = VT.getVectorElementType();
unsigned EVTBits = EVT.getSizeInBits();
unsigned NumElems = Op.getNumOperands();
unsigned NumZero = 0;
unsigned NumNonZero = 0;
unsigned NonZeros = 0;
bool IsAllConstants = true;
SmallSet<SDOperand, 8> Values;
for (unsigned i = 0; i < NumElems; ++i) {
SDOperand Elt = Op.getOperand(i);
Evan Cheng
committed
if (Elt.getOpcode() == ISD::UNDEF)
continue;
Values.insert(Elt);
if (Elt.getOpcode() != ISD::Constant &&
Elt.getOpcode() != ISD::ConstantFP)
IsAllConstants = false;
Evan Cheng
committed
if (isZeroNode(Elt))
NumZero++;
else {
NonZeros |= (1 << i);
NumNonZero++;
}
}
if (NumNonZero == 0) {
// All undef vector. Return an UNDEF. All zero vectors were handled above.
return DAG.getNode(ISD::UNDEF, VT);
}
// Special case for single non-zero, non-undef, element.
Evan Cheng
committed
if (NumNonZero == 1 && NumElems <= 4) {
unsigned Idx = CountTrailingZeros_32(NonZeros);
SDOperand Item = Op.getOperand(Idx);
// If this is an insertion of an i64 value on x86-32, and if the top bits of
// the value are obviously zero, truncate the value to i32 and do the
// insertion that way. Only do this if the value is non-constant or if the
// value is a constant being inserted into element 0. It is cheaper to do
// a constant pool load than it is to do a movd + shuffle.
if (EVT == MVT::i64 && !Subtarget->is64Bit() &&
(!IsAllConstants || Idx == 0)) {
if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
// Handle MMX and SSE both.
MVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32;
unsigned VecElts = VT == MVT::v2i64 ? 4 : 2;
// Truncate the value (which may itself be a constant) to i32, and
// convert it to a vector with movd (S2V+shuffle to zero extend).
Item = DAG.getNode(ISD::TRUNCATE, MVT::i32, Item);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VecVT, Item);
Evan Cheng
committed
Item = getShuffleVectorZeroOrUndef(Item, 0, true,
Subtarget->hasSSE2(), DAG);
// Now we have our 32-bit value zero extended in the low element of
// a vector. If Idx != 0, swizzle it into place.
if (Idx != 0) {
SDOperand Ops[] = {
Item, DAG.getNode(ISD::UNDEF, Item.getValueType()),
getSwapEltZeroMask(VecElts, Idx, DAG)
};
Item = DAG.getNode(ISD::VECTOR_SHUFFLE, VecVT, Ops, 3);
}
return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Item);
}
}
// If we have a constant or non-constant insertion into the low element of
// a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
// the rest of the elements. This will be matched as movd/movq/movss/movsd
// depending on what the source datatype is. Because we can only get here
// when NumElems <= 4, this only needs to handle i32/f32/i64/f64.
if (Idx == 0 &&
// Don't do this for i64 values on x86-32.
(EVT != MVT::i64 || Subtarget->is64Bit())) {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
Evan Cheng
committed
return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
Subtarget->hasSSE2(), DAG);
// Is it a vector logical left shift?
if (NumElems == 2 && Idx == 1 &&
isZeroNode(Op.getOperand(0)) && !isZeroNode(Op.getOperand(1))) {
unsigned NumBits = VT.getSizeInBits();
return getVShift(true, VT,
DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(1)),
NumBits/2, DAG, *this);
}
if (IsAllConstants) // Otherwise, it's better to do a constpool load.
Evan Cheng
committed
return SDOperand();
// Otherwise, if this is a vector with i32 or f32 elements, and the element
// is a non-constant being inserted into an element other than the low one,
// we can't use a constant pool load. Instead, use SCALAR_TO_VECTOR (aka
// movd/movss) to move this into the low element, then shuffle it into
// place.
if (EVTBits == 32) {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
// Turn it into a shuffle of zero and zero-extended scalar to vector.
Evan Cheng
committed
Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
Subtarget->hasSSE2(), DAG);
MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT MaskEVT = MaskVT.getVectorElementType();
SmallVector<SDOperand, 8> MaskVec;
for (unsigned i = 0; i < NumElems; i++)
MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size());
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item,
DAG.getNode(ISD::UNDEF, VT), Mask);
}
}
// Splat is obviously ok. Let legalizer expand it to a shuffle.
if (Values.size() == 1)
return SDOperand();
// A vector full of immediates; various special cases are already
// handled, so this is best done with a single constant-pool load.
if (IsAllConstants)
return SDOperand();
// Let legalizer expand 2-wide build_vectors.
Evan Cheng
committed
if (EVTBits == 64) {
if (NumNonZero == 1) {
// One half is zero or undef.
unsigned Idx = CountTrailingZeros_32(NonZeros);
SDOperand V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT,
Op.getOperand(Idx));
Evan Cheng
committed
return getShuffleVectorZeroOrUndef(V2, Idx, true,
Subtarget->hasSSE2(), DAG);
Evan Cheng
committed
}
return SDOperand();
Evan Cheng
committed
}
// If element VT is < 32 bits, convert it to inserts into a zero vector.
if (EVTBits == 8 && NumElems == 16) {
SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
*this);
if (V.Val) return V;
}
if (EVTBits == 16 && NumElems == 8) {
SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
*this);
if (V.Val) return V;
}
// If element VT is == 32 bits, turn it into a number of shuffles.
SmallVector<SDOperand, 8> V;
V.resize(NumElems);
if (NumElems == 4 && NumZero > 0) {
for (unsigned i = 0; i < 4; ++i) {
bool isZero = !(NonZeros & (1 << i));
if (isZero)
Evan Cheng
committed
V[i] = getZeroVector(VT, Subtarget->hasSSE2(), DAG);
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
else
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
}
for (unsigned i = 0; i < 2; ++i) {
switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
default: break;
case 0:
V[i] = V[i*2]; // Must be a zero vector.
break;
case 1:
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2],
getMOVLMask(NumElems, DAG));
break;
case 2:
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
getMOVLMask(NumElems, DAG));
break;
case 3:
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
getUnpacklMask(NumElems, DAG));
break;
}
}
MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT EVT = MaskVT.getVectorElementType();
SmallVector<SDOperand, 8> MaskVec;
bool Reverse = (NonZeros & 0x3) == 2;
for (unsigned i = 0; i < 2; ++i)
if (Reverse)
MaskVec.push_back(DAG.getConstant(1-i, EVT));
else
MaskVec.push_back(DAG.getConstant(i, EVT));
Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
for (unsigned i = 0; i < 2; ++i)
if (Reverse)
MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT));
else
MaskVec.push_back(DAG.getConstant(i+NumElems, EVT));
SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size());
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask);
}
if (Values.size() > 2) {
// Expand into a number of unpckl*.
// e.g. for v4f32
// Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
// Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
SDOperand UnpckMask = getUnpacklMask(NumElems, DAG);
for (unsigned i = 0; i < NumElems; ++i)
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
NumElems >>= 1;
while (NumElems != 0) {
for (unsigned i = 0; i < NumElems; ++i)
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
UnpckMask);
NumElems >>= 1;
}
return V[0];
}
return SDOperand();
}
static
SDOperand LowerVECTOR_SHUFFLEv8i16(SDOperand V1, SDOperand V2,
SDOperand PermMask, SelectionDAG &DAG,
TargetLowering &TLI) {
SDOperand NewV;
MVT MaskVT = MVT::getIntVectorWithNumElements(8);
MVT MaskEVT = MaskVT.getVectorElementType();
MVT PtrVT = TLI.getPointerTy();
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
SmallVector<SDOperand, 8> MaskElts(PermMask.Val->op_begin(),
PermMask.Val->op_end());
// First record which half of which vector the low elements come from.
SmallVector<unsigned, 4> LowQuad(4);
for (unsigned i = 0; i < 4; ++i) {
SDOperand Elt = MaskElts[i];
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
int QuadIdx = EltIdx / 4;
++LowQuad[QuadIdx];
}
int BestLowQuad = -1;
unsigned MaxQuad = 1;
for (unsigned i = 0; i < 4; ++i) {
if (LowQuad[i] > MaxQuad) {
BestLowQuad = i;
MaxQuad = LowQuad[i];
}
}
// Record which half of which vector the high elements come from.
SmallVector<unsigned, 4> HighQuad(4);
for (unsigned i = 4; i < 8; ++i) {
SDOperand Elt = MaskElts[i];
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
int QuadIdx = EltIdx / 4;
++HighQuad[QuadIdx];
}
int BestHighQuad = -1;
MaxQuad = 1;
for (unsigned i = 0; i < 4; ++i) {
if (HighQuad[i] > MaxQuad) {
BestHighQuad = i;
MaxQuad = HighQuad[i];
}
}
// If it's possible to sort parts of either half with PSHUF{H|L}W, then do it.
if (BestLowQuad != -1 || BestHighQuad != -1) {
// First sort the 4 chunks in order using shufpd.
SmallVector<SDOperand, 8> MaskVec;
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
if (BestLowQuad != -1)
MaskVec.push_back(DAG.getConstant(BestLowQuad, MVT::i32));
else
MaskVec.push_back(DAG.getConstant(0, MVT::i32));
if (BestHighQuad != -1)
MaskVec.push_back(DAG.getConstant(BestHighQuad, MVT::i32));
else
MaskVec.push_back(DAG.getConstant(1, MVT::i32));
SDOperand Mask= DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, &MaskVec[0],2);
NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64,
DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V1),
DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V2), Mask);
NewV = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, NewV);
// Now sort high and low parts separately.
BitVector InOrder(8);
if (BestLowQuad != -1) {
// Sort lower half in order using PSHUFLW.
MaskVec.clear();
bool AnyOutOrder = false;
for (unsigned i = 0; i != 4; ++i) {
SDOperand Elt = MaskElts[i];
if (Elt.getOpcode() == ISD::UNDEF) {
MaskVec.push_back(Elt);
InOrder.set(i);
} else {
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx != i)
AnyOutOrder = true;
MaskVec.push_back(DAG.getConstant(EltIdx % 4, MaskEVT));
// If this element is in the right place after this shuffle, then
// remember it.
if ((int)(EltIdx / 4) == BestLowQuad)
InOrder.set(i);
}
}
if (AnyOutOrder) {
for (unsigned i = 4; i != 8; ++i)
MaskVec.push_back(DAG.getConstant(i, MaskEVT));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask);
}
}
if (BestHighQuad != -1) {
// Sort high half in order using PSHUFHW if possible.
MaskVec.clear();
for (unsigned i = 0; i != 4; ++i)
MaskVec.push_back(DAG.getConstant(i, MaskEVT));
bool AnyOutOrder = false;
for (unsigned i = 4; i != 8; ++i) {
SDOperand Elt = MaskElts[i];
if (Elt.getOpcode() == ISD::UNDEF) {
MaskVec.push_back(Elt);
InOrder.set(i);
} else {
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx != i)
AnyOutOrder = true;
MaskVec.push_back(DAG.getConstant((EltIdx % 4) + 4, MaskEVT));
// If this element is in the right place after this shuffle, then
// remember it.
if ((int)(EltIdx / 4) == BestHighQuad)
InOrder.set(i);
}
}
if (AnyOutOrder) {
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask);
}
}
// The other elements are put in the right place using pextrw and pinsrw.
for (unsigned i = 0; i != 8; ++i) {
if (InOrder[i])
continue;
SDOperand Elt = MaskElts[i];
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
SDOperand ExtOp = (EltIdx < 8)
? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1,
DAG.getConstant(EltIdx, PtrVT))
: DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2,
DAG.getConstant(EltIdx - 8, PtrVT));
NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
DAG.getConstant(i, PtrVT));
}
return NewV;
}
// PSHUF{H|L}W are not used. Lower into extracts and inserts but try to use
///as few as possible.
// First, let's find out how many elements are already in the right order.
unsigned V1InOrder = 0;
unsigned V1FromV1 = 0;
unsigned V2InOrder = 0;
unsigned V2FromV2 = 0;
SmallVector<SDOperand, 8> V1Elts;
SmallVector<SDOperand, 8> V2Elts;
for (unsigned i = 0; i < 8; ++i) {
SDOperand Elt = MaskElts[i];
if (Elt.getOpcode() == ISD::UNDEF) {
V1Elts.push_back(Elt);
V2Elts.push_back(Elt);
continue;
}
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx == i) {
V1Elts.push_back(Elt);
V2Elts.push_back(DAG.getConstant(i+8, MaskEVT));
++V1InOrder;
} else if (EltIdx == i+8) {
V1Elts.push_back(Elt);
V2Elts.push_back(DAG.getConstant(i, MaskEVT));
++V2InOrder;
} else if (EltIdx < 8) {
V1Elts.push_back(Elt);
++V1FromV1;
V2Elts.push_back(DAG.getConstant(EltIdx-8, MaskEVT));
++V2FromV2;
}
}
if (V2InOrder > V1InOrder) {
PermMask = CommuteVectorShuffleMask(PermMask, DAG);
std::swap(V1, V2);
std::swap(V1Elts, V2Elts);
std::swap(V1FromV1, V2FromV2);
}
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
if ((V1FromV1 + V1InOrder) != 8) {
// Some elements are from V2.
if (V1FromV1) {
// If there are elements that are from V1 but out of place,
// then first sort them in place
SmallVector<SDOperand, 8> MaskVec;
for (unsigned i = 0; i < 8; ++i) {
SDOperand Elt = V1Elts[i];
if (Elt.getOpcode() == ISD::UNDEF) {
MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
continue;
}
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx >= 8)
MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
else
MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT));
}
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask);
}
NewV = V1;
for (unsigned i = 0; i < 8; ++i) {
SDOperand Elt = V1Elts[i];
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx < 8)
continue;
SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2,
DAG.getConstant(EltIdx - 8, PtrVT));
NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
DAG.getConstant(i, PtrVT));
return NewV;
} else {
// All elements are from V1.
NewV = V1;
for (unsigned i = 0; i < 8; ++i) {
SDOperand Elt = V1Elts[i];
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1,
DAG.getConstant(EltIdx, PtrVT));
NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
DAG.getConstant(i, PtrVT));
}
return NewV;
}
Evan Cheng
committed
/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be
/// done when every pair / quad of shuffle mask elements point to elements in
/// the right sequence. e.g.
/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15>
static
Evan Cheng
committed
SDOperand RewriteAsNarrowerShuffle(SDOperand V1, SDOperand V2,
SDOperand PermMask, SelectionDAG &DAG,
TargetLowering &TLI) {
unsigned NumElems = PermMask.getNumOperands();
Evan Cheng
committed
unsigned NewWidth = (NumElems == 4) ? 2 : 4;
MVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
MVT MaskEltVT = MaskVT.getVectorElementType();
MVT NewVT = MaskVT;
switch (VT.getSimpleVT()) {
default: assert(false && "Unexpected!");
Evan Cheng
committed
case MVT::v4f32: NewVT = MVT::v2f64; break;
case MVT::v4i32: NewVT = MVT::v2i64; break;
case MVT::v8i16: NewVT = MVT::v4i32; break;
case MVT::v16i8: NewVT = MVT::v4i32; break;
}
if (NewWidth == 2) {
Evan Cheng
committed
NewVT = MVT::v2i64;
else
NewVT = MVT::v2f64;
Evan Cheng
committed
unsigned Scale = NumElems / NewWidth;
SmallVector<SDOperand, 8> MaskVec;
for (unsigned i = 0; i < NumElems; i += Scale) {
unsigned StartIdx = ~0U;
for (unsigned j = 0; j < Scale; ++j) {
SDOperand Elt = PermMask.getOperand(i+j);
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (StartIdx == ~0U)
StartIdx = EltIdx - (EltIdx % Scale);
if (EltIdx != StartIdx + j)
return SDOperand();
}
if (StartIdx == ~0U)
MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEltVT));
else
MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MaskEltVT));
Evan Cheng
committed
V1 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V1);
V2 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V2);
return DAG.getNode(ISD::VECTOR_SHUFFLE, NewVT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size()));
/// getVZextMovL - Return a zero-extending vector move low node.
Evan Cheng
committed
///
static SDOperand getVZextMovL(MVT VT, MVT OpVT,
SDOperand SrcOp, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
Evan Cheng
committed
if (VT == MVT::v2f64 || VT == MVT::v4f32) {
LoadSDNode *LD = NULL;
if (!isScalarLoadToVector(SrcOp.Val, &LD))
LD = dyn_cast<LoadSDNode>(SrcOp);
if (!LD) {
// movssrr and movsdrr do not clear top bits. Try to use movd, movq
// instead.
MVT EVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
Evan Cheng
committed
if ((EVT != MVT::i64 || Subtarget->is64Bit()) &&
SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT &&
SrcOp.getOperand(0).getOperand(0).getValueType() == EVT) {
// PR2108
OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
return DAG.getNode(ISD::BIT_CONVERT, VT,
DAG.getNode(X86ISD::VZEXT_MOVL, OpVT,
Evan Cheng
committed
DAG.getNode(ISD::SCALAR_TO_VECTOR, OpVT,
SrcOp.getOperand(0).getOperand(0))));
}
}
}
return DAG.getNode(ISD::BIT_CONVERT, VT,
DAG.getNode(X86ISD::VZEXT_MOVL, OpVT,
Evan Cheng
committed
DAG.getNode(ISD::BIT_CONVERT, OpVT, SrcOp)));
}
Evan Cheng
committed
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
/// LowerVECTOR_SHUFFLE_4wide - Handle all 4 wide cases with a number of
/// shuffles.
static SDOperand
LowerVECTOR_SHUFFLE_4wide(SDOperand V1, SDOperand V2,
SDOperand PermMask, MVT VT, SelectionDAG &DAG) {
MVT MaskVT = PermMask.getValueType();
MVT MaskEVT = MaskVT.getVectorElementType();
SmallVector<std::pair<int, int>, 8> Locs;
Locs.reserve(4);
SmallVector<SDOperand, 8> Mask1(4, DAG.getNode(ISD::UNDEF, MaskEVT));
unsigned NumHi = 0;
unsigned NumLo = 0;
for (unsigned i = 0; i != 4; ++i) {
SDOperand Elt = PermMask.getOperand(i);
if (Elt.getOpcode() == ISD::UNDEF) {
Locs[i] = std::make_pair(-1, -1);
} else {
unsigned Val = cast<ConstantSDNode>(Elt)->getValue();
if (Val < 4) {
Locs[i] = std::make_pair(0, NumLo);
Mask1[NumLo] = Elt;
NumLo++;
} else {
Locs[i] = std::make_pair(1, NumHi);
if (2+NumHi < 4)
Mask1[2+NumHi] = Elt;
NumHi++;
}
}
}
Evan Cheng
committed
if (NumLo <= 2 && NumHi <= 2) {
// If no more than two elements come from either vector. This can be
// implemented with two shuffles. First shuffle gather the elements.
// The second shuffle, which takes the first shuffle as both of its
// vector operands, put the elements into the right order.
Evan Cheng
committed
V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&Mask1[0], Mask1.size()));
SmallVector<SDOperand, 8> Mask2(4, DAG.getNode(ISD::UNDEF, MaskEVT));
Evan Cheng
committed
for (unsigned i = 0; i != 4; ++i) {
if (Locs[i].first == -1)
continue;
else {
unsigned Idx = (i < 2) ? 0 : 4;
Idx += Locs[i].first * 2 + Locs[i].second;
Mask2[i] = DAG.getConstant(Idx, MaskEVT);
}
}
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&Mask2[0], Mask2.size()));
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
} else if (NumLo == 3 || NumHi == 3) {
// Otherwise, we must have three elements from one vector, call it X, and
// one element from the other, call it Y. First, use a shufps to build an
// intermediate vector with the one element from Y and the element from X
// that will be in the same half in the final destination (the indexes don't
// matter). Then, use a shufps to build the final vector, taking the half
// containing the element from Y from the intermediate, and the other half
// from X.
if (NumHi == 3) {
// Normalize it so the 3 elements come from V1.
PermMask = CommuteVectorShuffleMask(PermMask, DAG);
std::swap(V1, V2);
}
// Find the element from V2.
unsigned HiIndex;
for (HiIndex = 0; HiIndex < 3; ++HiIndex) {
SDOperand Elt = PermMask.getOperand(HiIndex);
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned Val = cast<ConstantSDNode>(Elt)->getValue();
if (Val >= 4)
break;
}
Mask1[0] = PermMask.getOperand(HiIndex);
Mask1[1] = DAG.getNode(ISD::UNDEF, MaskEVT);
Mask1[2] = PermMask.getOperand(HiIndex^1);
Mask1[3] = DAG.getNode(ISD::UNDEF, MaskEVT);
V2 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &Mask1[0], 4));
if (HiIndex >= 2) {
Mask1[0] = PermMask.getOperand(0);
Mask1[1] = PermMask.getOperand(1);
Mask1[2] = DAG.getConstant(HiIndex & 1 ? 6 : 4, MaskEVT);
Mask1[3] = DAG.getConstant(HiIndex & 1 ? 4 : 6, MaskEVT);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &Mask1[0], 4));
} else {
Mask1[0] = DAG.getConstant(HiIndex & 1 ? 2 : 0, MaskEVT);
Mask1[1] = DAG.getConstant(HiIndex & 1 ? 0 : 2, MaskEVT);
Mask1[2] = PermMask.getOperand(2);
Mask1[3] = PermMask.getOperand(3);
if (Mask1[2].getOpcode() != ISD::UNDEF)
Mask1[2] = DAG.getConstant(cast<ConstantSDNode>(Mask1[2])->getValue()+4,
MaskEVT);
if (Mask1[3].getOpcode() != ISD::UNDEF)
Mask1[3] = DAG.getConstant(cast<ConstantSDNode>(Mask1[3])->getValue()+4,
MaskEVT);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &Mask1[0], 4));
}
Evan Cheng
committed
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
}
// Break it into (shuffle shuffle_hi, shuffle_lo).
Locs.clear();
SmallVector<SDOperand,8> LoMask(4, DAG.getNode(ISD::UNDEF, MaskEVT));
SmallVector<SDOperand,8> HiMask(4, DAG.getNode(ISD::UNDEF, MaskEVT));
SmallVector<SDOperand,8> *MaskPtr = &LoMask;
unsigned MaskIdx = 0;
unsigned LoIdx = 0;
unsigned HiIdx = 2;
for (unsigned i = 0; i != 4; ++i) {
if (i == 2) {
MaskPtr = &HiMask;
MaskIdx = 1;
LoIdx = 0;
HiIdx = 2;
}
SDOperand Elt = PermMask.getOperand(i);
if (Elt.getOpcode() == ISD::UNDEF) {
Locs[i] = std::make_pair(-1, -1);
} else if (cast<ConstantSDNode>(Elt)->getValue() < 4) {
Locs[i] = std::make_pair(MaskIdx, LoIdx);
(*MaskPtr)[LoIdx] = Elt;
LoIdx++;
} else {
Locs[i] = std::make_pair(MaskIdx, HiIdx);
(*MaskPtr)[HiIdx] = Elt;
HiIdx++;
}
}
SDOperand LoShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&LoMask[0], LoMask.size()));
SDOperand HiShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&HiMask[0], HiMask.size()));
SmallVector<SDOperand, 8> MaskOps;
for (unsigned i = 0; i != 4; ++i) {
if (Locs[i].first == -1) {
MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
} else {
unsigned Idx = Locs[i].first * 4 + Locs[i].second;
MaskOps.push_back(DAG.getConstant(Idx, MaskEVT));
}
}
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskOps[0], MaskOps.size()));
}
SDOperand
X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
SDOperand V1 = Op.getOperand(0);
SDOperand V2 = Op.getOperand(1);
SDOperand PermMask = Op.getOperand(2);
MVT VT = Op.getValueType();
unsigned NumElems = PermMask.getNumOperands();
bool isMMX = VT.getSizeInBits() == 64;
bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
Evan Cheng
committed
bool V1IsSplat = false;
bool V2IsSplat = false;
if (isUndefShuffle(Op.Val))
return DAG.getNode(ISD::UNDEF, VT);
if (isZeroShuffle(Op.Val))
Evan Cheng
committed
return getZeroVector(VT, Subtarget->hasSSE2(), DAG);
Evan Cheng
committed
if (isIdentityMask(PermMask.Val))
return V1;
else if (isIdentityMask(PermMask.Val, true))
return V2;
if (isSplatMask(PermMask.Val)) {
Evan Cheng
committed
if (isMMX || NumElems < 4) return Op;
// Promote it to a v4{if}32 splat.
return PromoteSplat(Op, DAG, Subtarget->hasSSE2());
Evan Cheng
committed
// If the shuffle can be profitably rewritten as a narrower shuffle, then
// do it!
if (VT == MVT::v8i16 || VT == MVT::v16i8) {
SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
if (NewOp.Val)
return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
} else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
// FIXME: Figure out a cleaner way to do this.
// Try to make use of movq to zero out the top part.
if (ISD::isBuildVectorAllZeros(V2.Val)) {
Evan Cheng
committed
SDOperand NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask,
DAG, *this);
Evan Cheng
committed
if (NewOp.Val) {
SDOperand NewV1 = NewOp.getOperand(0);
SDOperand NewV2 = NewOp.getOperand(1);
SDOperand NewMask = NewOp.getOperand(2);
if (isCommutedMOVL(NewMask.Val, true, false)) {
NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG);
return getVZextMovL(VT, NewOp.getValueType(), NewV2, DAG, Subtarget);
Evan Cheng
committed
}
}
} else if (ISD::isBuildVectorAllZeros(V1.Val)) {
Evan Cheng
committed
SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask,
DAG, *this);
Evan Cheng
committed
if (NewOp.Val && X86::isMOVLMask(NewOp.getOperand(2).Val))
return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1),
Evan Cheng
committed
DAG, Subtarget);
Evan Cheng
committed
}
}
// Check if this can be converted into a logical shift.
bool isLeft = false;
unsigned ShAmt = 0;
SDOperand ShVal;
bool isShift = isVectorShift(Op, PermMask, DAG, isLeft, ShVal, ShAmt);
if (isShift && ShVal.hasOneUse()) {
// If the shifted value has multiple uses, it may be cheaper to use
// v_set0 + movlhps or movhlps, etc.
MVT EVT = VT.getVectorElementType();
ShAmt *= EVT.getSizeInBits();
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this);
}
Evan Cheng
committed
if (X86::isMOVLMask(PermMask.Val)) {
if (V1IsUndef)
return V2;
if (ISD::isBuildVectorAllZeros(V1.Val))
return getVZextMovL(VT, VT, V2, DAG, Subtarget);
if (!isMMX)
return Op;
Evan Cheng
committed
}
if (!isMMX && (X86::isMOVSHDUPMask(PermMask.Val) ||
X86::isMOVSLDUPMask(PermMask.Val) ||
X86::isMOVHLPSMask(PermMask.Val) ||
X86::isMOVHPMask(PermMask.Val) ||
X86::isMOVLPMask(PermMask.Val)))
if (ShouldXformToMOVHLPS(PermMask.Val) ||
ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val))
return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (isShift) {
// No better options. Use a vshl / vsrl.
MVT EVT = VT.getVectorElementType();
ShAmt *= EVT.getSizeInBits();
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this);
}
bool Commuted = false;
// FIXME: This should also accept a bitcast of a splat? Be careful, not
// 1,1,1,1 -> v8i16 though.
V1IsSplat = isSplatVector(V1.Val);
V2IsSplat = isSplatVector(V2.Val);
// Canonicalize the splat or undef, if present, to be on the RHS.
if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
std::swap(V1IsSplat, V2IsSplat);
std::swap(V1IsUndef, V2IsUndef);
Commuted = true;
Evan Cheng
committed
// FIXME: Figure out a cleaner way to do this.
if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) {
if (V2IsUndef) return V1;
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (V2IsSplat) {
// V2 is a splat, so the mask may be malformed. That is, it may point
// to any V2 element. The instruction selectior won't like this. Get
// a corrected mask and commute to form a proper MOVS{S|D}.
SDOperand NewMask = getMOVLMask(NumElems, DAG);
if (NewMask.Val != PermMask.Val)
Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
Evan Cheng
committed
}
Evan Cheng
committed
}
Evan Cheng
committed
Evan Cheng
committed
if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
X86::isUNPCKH_v_undef_Mask(PermMask.Val) ||
Evan Cheng
committed
X86::isUNPCKLMask(PermMask.Val) ||
X86::isUNPCKHMask(PermMask.Val))
return Op;
if (V2IsSplat) {
// Normalize mask so all entries that point to V2 points to its first
// element then try to match unpck{h|l} again. If match, return a
// new vector_shuffle with the corrected mask.
SDOperand NewMask = NormalizeMask(PermMask, DAG);
if (NewMask.Val != PermMask.Val) {
if (X86::isUNPCKLMask(PermMask.Val, true)) {
SDOperand NewMask = getUnpacklMask(NumElems, DAG);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
} else if (X86::isUNPCKHMask(PermMask.Val, true)) {
SDOperand NewMask = getUnpackhMask(NumElems, DAG);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
}
}
}
// Normalize the node to match x86 shuffle ops if needed
if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val))
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (Commuted) {
// Commute is back and try unpck* again.
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
X86::isUNPCKH_v_undef_Mask(PermMask.Val) ||
X86::isUNPCKLMask(PermMask.Val) ||
X86::isUNPCKHMask(PermMask.Val))
return Op;
}
Evan Cheng
committed
// Try PSHUF* first, then SHUFP*.
// MMX doesn't have PSHUFD but it does have PSHUFW. While it's theoretically
// possible to shuffle a v2i32 using PSHUFW, that's not yet implemented.
if (isMMX && NumElems == 4 && X86::isPSHUFDMask(PermMask.Val)) {
if (V2.getOpcode() != ISD::UNDEF)
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
DAG.getNode(ISD::UNDEF, VT), PermMask);
return Op;
}
if (!isMMX) {
if (Subtarget->hasSSE2() &&
(X86::isPSHUFDMask(PermMask.Val) ||
X86::isPSHUFHWMask(PermMask.Val) ||
X86::isPSHUFLWMask(PermMask.Val))) {
Evan Cheng
committed
if (VT == MVT::v4f32) {
RVT = MVT::v4i32;
Op = DAG.getNode(ISD::VECTOR_SHUFFLE, RVT,
DAG.getNode(ISD::BIT_CONVERT, RVT, V1),
DAG.getNode(ISD::UNDEF, RVT), PermMask);
} else if (V2.getOpcode() != ISD::UNDEF)
Op = DAG.getNode(ISD::VECTOR_SHUFFLE, RVT, V1,
DAG.getNode(ISD::UNDEF, RVT), PermMask);
if (RVT != VT)
Op = DAG.getNode(ISD::BIT_CONVERT, VT, Op);
return Op;
}
Evan Cheng
committed
// Binary or unary shufps.