Newer
Older
/// getOnesVector - Returns a vector of specified type with all bits set.
///
static SDOperand getOnesVector(MVT::ValueType VT, SelectionDAG &DAG) {
assert(MVT::isVector(VT) && "Expected a vector type");
// Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest
// type. This ensures they get CSE'd.
SDOperand Cst = DAG.getTargetConstant(~0U, MVT::i32);
SDOperand Vec;
if (MVT::getSizeInBits(VT) == 64) // MMX
Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, Cst, Cst);
else // SSE
Vec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Cst, Cst, Cst, Cst);
return DAG.getNode(ISD::BIT_CONVERT, VT, Vec);
}
/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
/// that point to V2 points to its first element.
static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) {
assert(Mask.getOpcode() == ISD::BUILD_VECTOR);
bool Changed = false;
SmallVector<SDOperand, 8> MaskVec;
unsigned NumElems = Mask.getNumOperands();
for (unsigned i = 0; i != NumElems; ++i) {
SDOperand Arg = Mask.getOperand(i);
if (Arg.getOpcode() != ISD::UNDEF) {
unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
if (Val > NumElems) {
Arg = DAG.getConstant(NumElems, Arg.getValueType());
Changed = true;
}
}
MaskVec.push_back(Arg);
}
if (Changed)
Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
&MaskVec[0], MaskVec.size());
return Mask;
}
/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
/// operation of specified width.
static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) {
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
SmallVector<SDOperand, 8> MaskVec;
MaskVec.push_back(DAG.getConstant(NumElems, BaseVT));
for (unsigned i = 1; i != NumElems; ++i)
MaskVec.push_back(DAG.getConstant(i, BaseVT));
return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
}
/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation
/// of specified width.
static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) {
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
SmallVector<SDOperand, 8> MaskVec;
for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
MaskVec.push_back(DAG.getConstant(i, BaseVT));
MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT));
}
return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation
/// of specified width.
static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) {
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType BaseVT = MVT::getVectorElementType(MaskVT);
unsigned Half = NumElems/2;
SmallVector<SDOperand, 8> MaskVec;
for (unsigned i = 0; i != Half; ++i) {
MaskVec.push_back(DAG.getConstant(i + Half, BaseVT));
MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT));
}
return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
}
/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
///
static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
SDOperand V1 = Op.getOperand(0);
SDOperand Mask = Op.getOperand(2);
MVT::ValueType VT = Op.getValueType();
unsigned NumElems = Mask.getNumOperands();
Mask = getUnpacklMask(NumElems, DAG);
while (NumElems != 4) {
V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
NumElems >>= 1;
}
V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
Mask = getZeroVector(MVT::v4i32, DAG);
SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
}
/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
/// vector of zero or undef vector. This produces a shuffle where the low
/// element of V2 is swizzled into the zero/undef vector, landing at element
/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
unsigned NumElems, unsigned Idx,
bool isZero, SelectionDAG &DAG) {
SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
SmallVector<SDOperand, 16> MaskVec;
for (unsigned i = 0; i != NumElems; ++i)
if (i == Idx) // If this is the insertion idx, put the low elt of V2 here.
MaskVec.push_back(DAG.getConstant(NumElems, EVT));
else
MaskVec.push_back(DAG.getConstant(i, EVT));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size());
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
}
/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
///
static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
if (NumNonZero > 8)
return SDOperand();
SDOperand V(0, 0);
bool First = true;
for (unsigned i = 0; i < 16; ++i) {
bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
if (ThisIsNonZero && First) {
if (NumZero)
V = getZeroVector(MVT::v8i16, DAG);
else
V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
First = false;
}
if ((i & 1) != 0) {
SDOperand ThisElt(0, 0), LastElt(0, 0);
bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
if (LastIsNonZero) {
LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1));
}
if (ThisIsNonZero) {
ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i));
ThisElt = DAG.getNode(ISD::SHL, MVT::i16,
ThisElt, DAG.getConstant(8, MVT::i8));
if (LastIsNonZero)
ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt);
} else
ThisElt = LastElt;
if (ThisElt.Val)
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
}
}
return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V);
}
/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
///
static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
if (NumNonZero > 4)
return SDOperand();
SDOperand V(0, 0);
bool First = true;
for (unsigned i = 0; i < 8; ++i) {
bool isNonZero = (NonZeros & (1 << i)) != 0;
if (isNonZero) {
if (First) {
if (NumZero)
V = getZeroVector(MVT::v8i16, DAG);
else
V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
First = false;
}
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
}
}
return V;
}
SDOperand
X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
// All zero's are handled with pxor, all one's are handled with pcmpeqd.
if (ISD::isBuildVectorAllZeros(Op.Val) || ISD::isBuildVectorAllOnes(Op.Val)) {
// Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to
// 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
// eliminated on x86-32 hosts.
if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32)
return Op;
if (ISD::isBuildVectorAllOnes(Op.Val))
return getOnesVector(Op.getValueType(), DAG);
return getZeroVector(Op.getValueType(), DAG);
}
MVT::ValueType VT = Op.getValueType();
MVT::ValueType EVT = MVT::getVectorElementType(VT);
unsigned EVTBits = MVT::getSizeInBits(EVT);
unsigned NumElems = Op.getNumOperands();
unsigned NumZero = 0;
unsigned NumNonZero = 0;
unsigned NonZeros = 0;
Evan Cheng
committed
bool HasNonImms = false;
SmallSet<SDOperand, 8> Values;
for (unsigned i = 0; i < NumElems; ++i) {
SDOperand Elt = Op.getOperand(i);
Evan Cheng
committed
if (Elt.getOpcode() == ISD::UNDEF)
continue;
Values.insert(Elt);
if (Elt.getOpcode() != ISD::Constant &&
Elt.getOpcode() != ISD::ConstantFP)
HasNonImms = true;
if (isZeroNode(Elt))
NumZero++;
else {
NonZeros |= (1 << i);
NumNonZero++;
}
}
if (NumNonZero == 0) {
// All undef vector. Return an UNDEF. All zero vectors were handled above.
return DAG.getNode(ISD::UNDEF, VT);
}
// Splat is obviously ok. Let legalizer expand it to a shuffle.
if (Values.size() == 1)
return SDOperand();
// Special case for single non-zero element.
Evan Cheng
committed
if (NumNonZero == 1 && NumElems <= 4) {
unsigned Idx = CountTrailingZeros_32(NonZeros);
SDOperand Item = Op.getOperand(Idx);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
if (Idx == 0)
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
NumZero > 0, DAG);
Evan Cheng
committed
else if (!HasNonImms) // Otherwise, it's better to do a constpool load.
return SDOperand();
if (EVTBits == 32) {
// Turn it into a shuffle of zero and zero-extended scalar to vector.
Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0,
DAG);
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
SmallVector<SDOperand, 8> MaskVec;
for (unsigned i = 0; i < NumElems; i++)
MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size());
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item,
DAG.getNode(ISD::UNDEF, VT), Mask);
}
}
// A vector full of immediates; various special cases are already
// handled, so this is best done with a single constant-pool load.
Evan Cheng
committed
if (!HasNonImms)
return SDOperand();
// Let legalizer expand 2-wide build_vectors.
if (EVTBits == 64)
return SDOperand();
// If element VT is < 32 bits, convert it to inserts into a zero vector.
if (EVTBits == 8 && NumElems == 16) {
SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
*this);
if (V.Val) return V;
}
if (EVTBits == 16 && NumElems == 8) {
SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
*this);
if (V.Val) return V;
}
// If element VT is == 32 bits, turn it into a number of shuffles.
SmallVector<SDOperand, 8> V;
V.resize(NumElems);
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
if (NumElems == 4 && NumZero > 0) {
for (unsigned i = 0; i < 4; ++i) {
bool isZero = !(NonZeros & (1 << i));
if (isZero)
V[i] = getZeroVector(VT, DAG);
else
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
}
for (unsigned i = 0; i < 2; ++i) {
switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
default: break;
case 0:
V[i] = V[i*2]; // Must be a zero vector.
break;
case 1:
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2],
getMOVLMask(NumElems, DAG));
break;
case 2:
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
getMOVLMask(NumElems, DAG));
break;
case 3:
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
getUnpacklMask(NumElems, DAG));
break;
}
}
Evan Cheng
committed
// Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd)
// clears the upper bits.
// FIXME: we can do the same for v4f32 case when we know both parts of
// the lower half come from scalar_to_vector (loadf32). We should do
// that in post legalizer dag combiner with target specific hooks.
if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0)
return V[0];
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
SmallVector<SDOperand, 8> MaskVec;
bool Reverse = (NonZeros & 0x3) == 2;
for (unsigned i = 0; i < 2; ++i)
if (Reverse)
MaskVec.push_back(DAG.getConstant(1-i, EVT));
else
MaskVec.push_back(DAG.getConstant(i, EVT));
Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
for (unsigned i = 0; i < 2; ++i)
if (Reverse)
MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT));
else
MaskVec.push_back(DAG.getConstant(i+NumElems, EVT));
SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size());
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask);
}
if (Values.size() > 2) {
// Expand into a number of unpckl*.
// e.g. for v4f32
// Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
// Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
SDOperand UnpckMask = getUnpacklMask(NumElems, DAG);
for (unsigned i = 0; i < NumElems; ++i)
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
NumElems >>= 1;
while (NumElems != 0) {
for (unsigned i = 0; i < NumElems; ++i)
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
UnpckMask);
NumElems >>= 1;
}
return V[0];
}
return SDOperand();
}
static
SDOperand LowerVECTOR_SHUFFLEv8i16(SDOperand V1, SDOperand V2,
SDOperand PermMask, SelectionDAG &DAG,
TargetLowering &TLI) {
SDOperand NewV;
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(8);
MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
MVT::ValueType PtrVT = TLI.getPointerTy();
SmallVector<SDOperand, 8> MaskElts(PermMask.Val->op_begin(),
PermMask.Val->op_end());
// First record which half of which vector the low elements come from.
SmallVector<unsigned, 4> LowQuad(4);
for (unsigned i = 0; i < 4; ++i) {
SDOperand Elt = MaskElts[i];
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
int QuadIdx = EltIdx / 4;
++LowQuad[QuadIdx];
}
int BestLowQuad = -1;
unsigned MaxQuad = 1;
for (unsigned i = 0; i < 4; ++i) {
if (LowQuad[i] > MaxQuad) {
BestLowQuad = i;
MaxQuad = LowQuad[i];
}
}
// Record which half of which vector the high elements come from.
SmallVector<unsigned, 4> HighQuad(4);
for (unsigned i = 4; i < 8; ++i) {
SDOperand Elt = MaskElts[i];
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
int QuadIdx = EltIdx / 4;
++HighQuad[QuadIdx];
}
int BestHighQuad = -1;
MaxQuad = 1;
for (unsigned i = 0; i < 4; ++i) {
if (HighQuad[i] > MaxQuad) {
BestHighQuad = i;
MaxQuad = HighQuad[i];
}
}
// If it's possible to sort parts of either half with PSHUF{H|L}W, then do it.
if (BestLowQuad != -1 || BestHighQuad != -1) {
// First sort the 4 chunks in order using shufpd.
SmallVector<SDOperand, 8> MaskVec;
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
if (BestLowQuad != -1)
MaskVec.push_back(DAG.getConstant(BestLowQuad, MVT::i32));
else
MaskVec.push_back(DAG.getConstant(0, MVT::i32));
if (BestHighQuad != -1)
MaskVec.push_back(DAG.getConstant(BestHighQuad, MVT::i32));
else
MaskVec.push_back(DAG.getConstant(1, MVT::i32));
SDOperand Mask= DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, &MaskVec[0],2);
NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64,
DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V1),
DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V2), Mask);
NewV = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, NewV);
// Now sort high and low parts separately.
BitVector InOrder(8);
if (BestLowQuad != -1) {
// Sort lower half in order using PSHUFLW.
MaskVec.clear();
bool AnyOutOrder = false;
for (unsigned i = 0; i != 4; ++i) {
SDOperand Elt = MaskElts[i];
if (Elt.getOpcode() == ISD::UNDEF) {
MaskVec.push_back(Elt);
InOrder.set(i);
} else {
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx != i)
AnyOutOrder = true;
MaskVec.push_back(DAG.getConstant(EltIdx % 4, MaskEVT));
// If this element is in the right place after this shuffle, then
// remember it.
if ((int)(EltIdx / 4) == BestLowQuad)
InOrder.set(i);
}
}
if (AnyOutOrder) {
for (unsigned i = 4; i != 8; ++i)
MaskVec.push_back(DAG.getConstant(i, MaskEVT));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask);
}
}
if (BestHighQuad != -1) {
// Sort high half in order using PSHUFHW if possible.
MaskVec.clear();
for (unsigned i = 0; i != 4; ++i)
MaskVec.push_back(DAG.getConstant(i, MaskEVT));
bool AnyOutOrder = false;
for (unsigned i = 4; i != 8; ++i) {
SDOperand Elt = MaskElts[i];
if (Elt.getOpcode() == ISD::UNDEF) {
MaskVec.push_back(Elt);
InOrder.set(i);
} else {
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx != i)
AnyOutOrder = true;
MaskVec.push_back(DAG.getConstant((EltIdx % 4) + 4, MaskEVT));
// If this element is in the right place after this shuffle, then
// remember it.
if ((int)(EltIdx / 4) == BestHighQuad)
InOrder.set(i);
}
}
if (AnyOutOrder) {
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask);
}
}
// The other elements are put in the right place using pextrw and pinsrw.
for (unsigned i = 0; i != 8; ++i) {
if (InOrder[i])
continue;
SDOperand Elt = MaskElts[i];
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx == i)
continue;
SDOperand ExtOp = (EltIdx < 8)
? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1,
DAG.getConstant(EltIdx, PtrVT))
: DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2,
DAG.getConstant(EltIdx - 8, PtrVT));
NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
DAG.getConstant(i, PtrVT));
}
return NewV;
}
// PSHUF{H|L}W are not used. Lower into extracts and inserts but try to use
///as few as possible.
// First, let's find out how many elements are already in the right order.
unsigned V1InOrder = 0;
unsigned V1FromV1 = 0;
unsigned V2InOrder = 0;
unsigned V2FromV2 = 0;
SmallVector<SDOperand, 8> V1Elts;
SmallVector<SDOperand, 8> V2Elts;
for (unsigned i = 0; i < 8; ++i) {
SDOperand Elt = MaskElts[i];
if (Elt.getOpcode() == ISD::UNDEF) {
V1Elts.push_back(Elt);
V2Elts.push_back(Elt);
continue;
}
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx == i) {
V1Elts.push_back(Elt);
V2Elts.push_back(DAG.getConstant(i+8, MaskEVT));
++V1InOrder;
} else if (EltIdx == i+8) {
V1Elts.push_back(Elt);
V2Elts.push_back(DAG.getConstant(i, MaskEVT));
++V2InOrder;
} else if (EltIdx < 8) {
V1Elts.push_back(Elt);
++V1FromV1;
V2Elts.push_back(DAG.getConstant(EltIdx-8, MaskEVT));
++V2FromV2;
}
}
if (V2InOrder > V1InOrder) {
PermMask = CommuteVectorShuffleMask(PermMask, DAG);
std::swap(V1, V2);
std::swap(V1Elts, V2Elts);
std::swap(V1FromV1, V2FromV2);
}
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
if ((V1FromV1 + V1InOrder) != 8) {
// Some elements are from V2.
if (V1FromV1) {
// If there are elements that are from V1 but out of place,
// then first sort them in place
SmallVector<SDOperand, 8> MaskVec;
for (unsigned i = 0; i < 8; ++i) {
SDOperand Elt = V1Elts[i];
if (Elt.getOpcode() == ISD::UNDEF) {
MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
continue;
}
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx >= 8)
MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
else
MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT));
}
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask);
}
NewV = V1;
for (unsigned i = 0; i < 8; ++i) {
SDOperand Elt = V1Elts[i];
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx < 8)
continue;
SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2,
DAG.getConstant(EltIdx - 8, PtrVT));
NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
DAG.getConstant(i, PtrVT));
return NewV;
} else {
// All elements are from V1.
NewV = V1;
for (unsigned i = 0; i < 8; ++i) {
SDOperand Elt = V1Elts[i];
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1,
DAG.getConstant(EltIdx, PtrVT));
NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
DAG.getConstant(i, PtrVT));
}
return NewV;
}
Evan Cheng
committed
/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be
/// done when every pair / quad of shuffle mask elements point to elements in
/// the right sequence. e.g.
/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15>
static
Evan Cheng
committed
SDOperand RewriteAsNarrowerShuffle(SDOperand V1, SDOperand V2,
MVT::ValueType VT,
SDOperand PermMask, SelectionDAG &DAG,
TargetLowering &TLI) {
unsigned NumElems = PermMask.getNumOperands();
Evan Cheng
committed
unsigned NewWidth = (NumElems == 4) ? 2 : 4;
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
MVT::ValueType NewVT = MaskVT;
switch (VT) {
case MVT::v4f32: NewVT = MVT::v2f64; break;
case MVT::v4i32: NewVT = MVT::v2i64; break;
case MVT::v8i16: NewVT = MVT::v4i32; break;
case MVT::v16i8: NewVT = MVT::v4i32; break;
default: assert(false && "Unexpected!");
}
if (NewWidth == 2)
if (MVT::isInteger(VT))
NewVT = MVT::v2i64;
else
NewVT = MVT::v2f64;
unsigned Scale = NumElems / NewWidth;
SmallVector<SDOperand, 8> MaskVec;
for (unsigned i = 0; i < NumElems; i += Scale) {
unsigned StartIdx = ~0U;
for (unsigned j = 0; j < Scale; ++j) {
SDOperand Elt = PermMask.getOperand(i+j);
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (StartIdx == ~0U)
StartIdx = EltIdx - (EltIdx % Scale);
if (EltIdx != StartIdx + j)
return SDOperand();
}
if (StartIdx == ~0U)
MaskVec.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
else
MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MVT::i32));
Evan Cheng
committed
V1 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V1);
V2 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V2);
return DAG.getNode(ISD::VECTOR_SHUFFLE, NewVT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size()));
SDOperand
X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
SDOperand V1 = Op.getOperand(0);
SDOperand V2 = Op.getOperand(1);
SDOperand PermMask = Op.getOperand(2);
MVT::ValueType VT = Op.getValueType();
unsigned NumElems = PermMask.getNumOperands();
bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
Evan Cheng
committed
bool V1IsSplat = false;
bool V2IsSplat = false;
if (isUndefShuffle(Op.Val))
return DAG.getNode(ISD::UNDEF, VT);
if (isZeroShuffle(Op.Val))
return getZeroVector(VT, DAG);
Evan Cheng
committed
if (isIdentityMask(PermMask.Val))
return V1;
else if (isIdentityMask(PermMask.Val, true))
return V2;
if (isSplatMask(PermMask.Val)) {
if (NumElems <= 4) return Op;
// Promote it to a v4i32 splat.
Evan Cheng
committed
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
// If the shuffle can be profitably rewritten as a narrower shuffle, then
// do it!
if (VT == MVT::v8i16 || VT == MVT::v16i8) {
SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
if (NewOp.Val)
return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
} else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
// FIXME: Figure out a cleaner way to do this.
// Try to make use of movq to zero out the top part.
if (ISD::isBuildVectorAllZeros(V2.Val)) {
SDOperand NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
if (NewOp.Val) {
SDOperand NewV1 = NewOp.getOperand(0);
SDOperand NewV2 = NewOp.getOperand(1);
SDOperand NewMask = NewOp.getOperand(2);
if (isCommutedMOVL(NewMask.Val, true, false)) {
NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG);
NewOp = DAG.getNode(ISD::VECTOR_SHUFFLE, NewOp.getValueType(),
NewV1, NewV2, getMOVLMask(2, DAG));
return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
}
}
} else if (ISD::isBuildVectorAllZeros(V1.Val)) {
SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
if (NewOp.Val && X86::isMOVLMask(NewOp.getOperand(2).Val))
return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
}
}
if (X86::isMOVLMask(PermMask.Val))
return (V1IsUndef) ? V2 : Op;
if (X86::isMOVSHDUPMask(PermMask.Val) ||
X86::isMOVSLDUPMask(PermMask.Val) ||
X86::isMOVHLPSMask(PermMask.Val) ||
X86::isMOVHPMask(PermMask.Val) ||
X86::isMOVLPMask(PermMask.Val))
return Op;
if (ShouldXformToMOVHLPS(PermMask.Val) ||
ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val))
return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
bool Commuted = false;
// FIXME: This should also accept a bitcast of a splat? Be careful, not
// 1,1,1,1 -> v8i16 though.
V1IsSplat = isSplatVector(V1.Val);
V2IsSplat = isSplatVector(V2.Val);
// Canonicalize the splat or undef, if present, to be on the RHS.
if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
std::swap(V1IsSplat, V2IsSplat);
std::swap(V1IsUndef, V2IsUndef);
Commuted = true;
Evan Cheng
committed
// FIXME: Figure out a cleaner way to do this.
if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) {
if (V2IsUndef) return V1;
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (V2IsSplat) {
// V2 is a splat, so the mask may be malformed. That is, it may point
// to any V2 element. The instruction selectior won't like this. Get
// a corrected mask and commute to form a proper MOVS{S|D}.
SDOperand NewMask = getMOVLMask(NumElems, DAG);
if (NewMask.Val != PermMask.Val)
Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
Evan Cheng
committed
}
Evan Cheng
committed
}
Evan Cheng
committed
Evan Cheng
committed
if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
X86::isUNPCKH_v_undef_Mask(PermMask.Val) ||
Evan Cheng
committed
X86::isUNPCKLMask(PermMask.Val) ||
X86::isUNPCKHMask(PermMask.Val))
return Op;
if (V2IsSplat) {
// Normalize mask so all entries that point to V2 points to its first
// element then try to match unpck{h|l} again. If match, return a
// new vector_shuffle with the corrected mask.
SDOperand NewMask = NormalizeMask(PermMask, DAG);
if (NewMask.Val != PermMask.Val) {
if (X86::isUNPCKLMask(PermMask.Val, true)) {
SDOperand NewMask = getUnpacklMask(NumElems, DAG);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
} else if (X86::isUNPCKHMask(PermMask.Val, true)) {
SDOperand NewMask = getUnpackhMask(NumElems, DAG);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
}
}
}
// Normalize the node to match x86 shuffle ops if needed
if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val))
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (Commuted) {
// Commute is back and try unpck* again.
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
X86::isUNPCKH_v_undef_Mask(PermMask.Val) ||
X86::isUNPCKLMask(PermMask.Val) ||
X86::isUNPCKHMask(PermMask.Val))
return Op;
}
// If VT is integer, try PSHUF* first, then SHUFP*.
if (MVT::isInteger(VT)) {
// MMX doesn't have PSHUFD; it does have PSHUFW. While it's theoretically
// possible to shuffle a v2i32 using PSHUFW, that's not yet implemented.
if (((MVT::getSizeInBits(VT) != 64 || NumElems == 4) &&
X86::isPSHUFDMask(PermMask.Val)) ||
X86::isPSHUFHWMask(PermMask.Val) ||
X86::isPSHUFLWMask(PermMask.Val)) {
if (V2.getOpcode() != ISD::UNDEF)
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
return Op;
}
if (X86::isSHUFPMask(PermMask.Val) &&
MVT::getSizeInBits(VT) != 64) // Don't do this for MMX.
return Op;
} else {
// Floating point cases in the other order.
if (X86::isSHUFPMask(PermMask.Val))
return Op;
if (X86::isPSHUFDMask(PermMask.Val) ||
X86::isPSHUFHWMask(PermMask.Val) ||
X86::isPSHUFLWMask(PermMask.Val)) {
if (V2.getOpcode() != ISD::UNDEF)
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
return Op;
}
}
// Handle v8i16 specifically since SSE can do byte extraction and insertion.
if (VT == MVT::v8i16) {
SDOperand NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this);
if (NewOp.Val)
return NewOp;
}
// Handle all 4 wide cases with a number of shuffles.
if (NumElems == 4 && MVT::getSizeInBits(VT) != 64) {
MVT::ValueType MaskVT = PermMask.getValueType();
MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
SmallVector<std::pair<int, int>, 8> Locs;
Locs.reserve(NumElems);
SmallVector<SDOperand, 8> Mask1(NumElems,
DAG.getNode(ISD::UNDEF, MaskEVT));
SmallVector<SDOperand, 8> Mask2(NumElems,
DAG.getNode(ISD::UNDEF, MaskEVT));
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
unsigned NumHi = 0;
unsigned NumLo = 0;
// If no more than two elements come from either vector. This can be
// implemented with two shuffles. First shuffle gather the elements.
// The second shuffle, which takes the first shuffle as both of its
// vector operands, put the elements into the right order.
for (unsigned i = 0; i != NumElems; ++i) {
SDOperand Elt = PermMask.getOperand(i);
if (Elt.getOpcode() == ISD::UNDEF) {
Locs[i] = std::make_pair(-1, -1);
} else {
unsigned Val = cast<ConstantSDNode>(Elt)->getValue();
if (Val < NumElems) {
Locs[i] = std::make_pair(0, NumLo);
Mask1[NumLo] = Elt;
NumLo++;
} else {
Locs[i] = std::make_pair(1, NumHi);
if (2+NumHi < NumElems)
Mask1[2+NumHi] = Elt;
NumHi++;
}
}
}
if (NumLo <= 2 && NumHi <= 2) {
V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&Mask1[0], Mask1.size()));
for (unsigned i = 0; i != NumElems; ++i) {
if (Locs[i].first == -1)
continue;
else {
unsigned Idx = (i < NumElems/2) ? 0 : NumElems;
Idx += Locs[i].first * (NumElems/2) + Locs[i].second;
Mask2[i] = DAG.getConstant(Idx, MaskEVT);
}
}
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&Mask2[0], Mask2.size()));
}
// Break it into (shuffle shuffle_hi, shuffle_lo).
Locs.clear();
SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
SmallVector<SDOperand,8> *MaskPtr = &LoMask;
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
unsigned MaskIdx = 0;
unsigned LoIdx = 0;
unsigned HiIdx = NumElems/2;
for (unsigned i = 0; i != NumElems; ++i) {
if (i == NumElems/2) {
MaskPtr = &HiMask;
MaskIdx = 1;
LoIdx = 0;
HiIdx = NumElems/2;
}
SDOperand Elt = PermMask.getOperand(i);
if (Elt.getOpcode() == ISD::UNDEF) {
Locs[i] = std::make_pair(-1, -1);
} else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) {
Locs[i] = std::make_pair(MaskIdx, LoIdx);
(*MaskPtr)[LoIdx] = Elt;
LoIdx++;
} else {
Locs[i] = std::make_pair(MaskIdx, HiIdx);
(*MaskPtr)[HiIdx] = Elt;
HiIdx++;
}
}
SDOperand LoShuffle =
DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&LoMask[0], LoMask.size()));
SDOperand HiShuffle =
DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&HiMask[0], HiMask.size()));
SmallVector<SDOperand, 8> MaskOps;
for (unsigned i = 0; i != NumElems; ++i) {
if (Locs[i].first == -1) {
MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
} else {
unsigned Idx = Locs[i].first * NumElems + Locs[i].second;
MaskOps.push_back(DAG.getConstant(Idx, MaskEVT));
}
}
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskOps[0], MaskOps.size()));
}
return SDOperand();
}
SDOperand
X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
if (!isa<ConstantSDNode>(Op.getOperand(1)))
return SDOperand();
MVT::ValueType VT = Op.getValueType();
// TODO: handle v16i8.
if (MVT::getSizeInBits(VT) == 16) {
SDOperand Vec = Op.getOperand(0);
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return DAG.getNode(ISD::TRUNCATE, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32,
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Vec),
Op.getOperand(1)));
// Transform it so it match pextrw which produces a 32-bit result.
MVT::ValueType EVT = (MVT::ValueType)(VT+1);
SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
Op.getOperand(0), Op.getOperand(1));
SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
} else if (MVT::getSizeInBits(VT) == 32) {
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return Op;
// SHUFPS the element to the lowest double word, then movss.
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
SmallVector<SDOperand, 8> IdxVec;
IdxVec.
push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT)));
IdxVec.
push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
IdxVec.
push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
IdxVec.
push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
SDOperand Vec = Op.getOperand(0);
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
DAG.getConstant(0, getPointerTy()));
} else if (MVT::getSizeInBits(VT) == 64) {