Newer
Older
// 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
// eliminated on x86-32 hosts.
if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32)
return Op;
if (ISD::isBuildVectorAllOnes(Op.Val))
return getOnesVector(Op.getValueType(), DAG);
return getZeroVector(Op.getValueType(), DAG);
}
MVT::ValueType VT = Op.getValueType();
MVT::ValueType EVT = MVT::getVectorElementType(VT);
unsigned EVTBits = MVT::getSizeInBits(EVT);
unsigned NumElems = Op.getNumOperands();
unsigned NumZero = 0;
unsigned NumNonZero = 0;
unsigned NonZeros = 0;
Evan Cheng
committed
bool HasNonImms = false;
SmallSet<SDOperand, 8> Values;
for (unsigned i = 0; i < NumElems; ++i) {
SDOperand Elt = Op.getOperand(i);
Evan Cheng
committed
if (Elt.getOpcode() == ISD::UNDEF)
continue;
Values.insert(Elt);
if (Elt.getOpcode() != ISD::Constant &&
Elt.getOpcode() != ISD::ConstantFP)
HasNonImms = true;
if (isZeroNode(Elt))
NumZero++;
else {
NonZeros |= (1 << i);
NumNonZero++;
}
}
if (NumNonZero == 0) {
// All undef vector. Return an UNDEF. All zero vectors were handled above.
return DAG.getNode(ISD::UNDEF, VT);
}
// Splat is obviously ok. Let legalizer expand it to a shuffle.
if (Values.size() == 1)
return SDOperand();
// Special case for single non-zero element.
Evan Cheng
committed
if (NumNonZero == 1 && NumElems <= 4) {
unsigned Idx = CountTrailingZeros_32(NonZeros);
SDOperand Item = Op.getOperand(Idx);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
if (Idx == 0)
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
NumZero > 0, DAG);
Evan Cheng
committed
else if (!HasNonImms) // Otherwise, it's better to do a constpool load.
return SDOperand();
if (EVTBits == 32) {
// Turn it into a shuffle of zero and zero-extended scalar to vector.
Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0,
DAG);
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
SmallVector<SDOperand, 8> MaskVec;
for (unsigned i = 0; i < NumElems; i++)
MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size());
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item,
DAG.getNode(ISD::UNDEF, VT), Mask);
}
}
// A vector full of immediates; various special cases are already
// handled, so this is best done with a single constant-pool load.
Evan Cheng
committed
if (!HasNonImms)
return SDOperand();
// Let legalizer expand 2-wide build_vectors.
if (EVTBits == 64)
return SDOperand();
// If element VT is < 32 bits, convert it to inserts into a zero vector.
if (EVTBits == 8 && NumElems == 16) {
SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
*this);
if (V.Val) return V;
}
if (EVTBits == 16 && NumElems == 8) {
SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
*this);
if (V.Val) return V;
}
// If element VT is == 32 bits, turn it into a number of shuffles.
SmallVector<SDOperand, 8> V;
V.resize(NumElems);
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
if (NumElems == 4 && NumZero > 0) {
for (unsigned i = 0; i < 4; ++i) {
bool isZero = !(NonZeros & (1 << i));
if (isZero)
V[i] = getZeroVector(VT, DAG);
else
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
}
for (unsigned i = 0; i < 2; ++i) {
switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
default: break;
case 0:
V[i] = V[i*2]; // Must be a zero vector.
break;
case 1:
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2],
getMOVLMask(NumElems, DAG));
break;
case 2:
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
getMOVLMask(NumElems, DAG));
break;
case 3:
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
getUnpacklMask(NumElems, DAG));
break;
}
}
Evan Cheng
committed
// Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd)
// clears the upper bits.
// FIXME: we can do the same for v4f32 case when we know both parts of
// the lower half come from scalar_to_vector (loadf32). We should do
// that in post legalizer dag combiner with target specific hooks.
if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0)
return V[0];
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT::ValueType EVT = MVT::getVectorElementType(MaskVT);
SmallVector<SDOperand, 8> MaskVec;
bool Reverse = (NonZeros & 0x3) == 2;
for (unsigned i = 0; i < 2; ++i)
if (Reverse)
MaskVec.push_back(DAG.getConstant(1-i, EVT));
else
MaskVec.push_back(DAG.getConstant(i, EVT));
Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
for (unsigned i = 0; i < 2; ++i)
if (Reverse)
MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT));
else
MaskVec.push_back(DAG.getConstant(i+NumElems, EVT));
SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size());
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask);
}
if (Values.size() > 2) {
// Expand into a number of unpckl*.
// e.g. for v4f32
// Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
// Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
SDOperand UnpckMask = getUnpacklMask(NumElems, DAG);
for (unsigned i = 0; i < NumElems; ++i)
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
NumElems >>= 1;
while (NumElems != 0) {
for (unsigned i = 0; i < NumElems; ++i)
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
UnpckMask);
NumElems >>= 1;
}
return V[0];
}
return SDOperand();
}
static
SDOperand LowerVECTOR_SHUFFLEv8i16(SDOperand V1, SDOperand V2,
SDOperand PermMask, SelectionDAG &DAG,
TargetLowering &TLI) {
SDOperand NewV;
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(8);
MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
MVT::ValueType PtrVT = TLI.getPointerTy();
SmallVector<SDOperand, 8> MaskElts(PermMask.Val->op_begin(),
PermMask.Val->op_end());
// First record which half of which vector the low elements come from.
SmallVector<unsigned, 4> LowQuad(4);
for (unsigned i = 0; i < 4; ++i) {
SDOperand Elt = MaskElts[i];
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
int QuadIdx = EltIdx / 4;
++LowQuad[QuadIdx];
}
int BestLowQuad = -1;
unsigned MaxQuad = 1;
for (unsigned i = 0; i < 4; ++i) {
if (LowQuad[i] > MaxQuad) {
BestLowQuad = i;
MaxQuad = LowQuad[i];
}
}
// Record which half of which vector the high elements come from.
SmallVector<unsigned, 4> HighQuad(4);
for (unsigned i = 4; i < 8; ++i) {
SDOperand Elt = MaskElts[i];
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
int QuadIdx = EltIdx / 4;
++HighQuad[QuadIdx];
}
int BestHighQuad = -1;
MaxQuad = 1;
for (unsigned i = 0; i < 4; ++i) {
if (HighQuad[i] > MaxQuad) {
BestHighQuad = i;
MaxQuad = HighQuad[i];
}
}
// If it's possible to sort parts of either half with PSHUF{H|L}W, then do it.
if (BestLowQuad != -1 || BestHighQuad != -1) {
// First sort the 4 chunks in order using shufpd.
SmallVector<SDOperand, 8> MaskVec;
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
if (BestLowQuad != -1)
MaskVec.push_back(DAG.getConstant(BestLowQuad, MVT::i32));
else
MaskVec.push_back(DAG.getConstant(0, MVT::i32));
if (BestHighQuad != -1)
MaskVec.push_back(DAG.getConstant(BestHighQuad, MVT::i32));
else
MaskVec.push_back(DAG.getConstant(1, MVT::i32));
SDOperand Mask= DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, &MaskVec[0],2);
NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64,
DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V1),
DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V2), Mask);
NewV = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, NewV);
// Now sort high and low parts separately.
BitVector InOrder(8);
if (BestLowQuad != -1) {
// Sort lower half in order using PSHUFLW.
MaskVec.clear();
bool AnyOutOrder = false;
for (unsigned i = 0; i != 4; ++i) {
SDOperand Elt = MaskElts[i];
if (Elt.getOpcode() == ISD::UNDEF) {
MaskVec.push_back(Elt);
InOrder.set(i);
} else {
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx != i)
AnyOutOrder = true;
MaskVec.push_back(DAG.getConstant(EltIdx % 4, MaskEVT));
// If this element is in the right place after this shuffle, then
// remember it.
if ((int)(EltIdx / 4) == BestLowQuad)
InOrder.set(i);
}
}
if (AnyOutOrder) {
for (unsigned i = 4; i != 8; ++i)
MaskVec.push_back(DAG.getConstant(i, MaskEVT));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask);
}
}
if (BestHighQuad != -1) {
// Sort high half in order using PSHUFHW if possible.
MaskVec.clear();
for (unsigned i = 0; i != 4; ++i)
MaskVec.push_back(DAG.getConstant(i, MaskEVT));
bool AnyOutOrder = false;
for (unsigned i = 4; i != 8; ++i) {
SDOperand Elt = MaskElts[i];
if (Elt.getOpcode() == ISD::UNDEF) {
MaskVec.push_back(Elt);
InOrder.set(i);
} else {
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx != i)
AnyOutOrder = true;
MaskVec.push_back(DAG.getConstant((EltIdx % 4) + 4, MaskEVT));
// If this element is in the right place after this shuffle, then
// remember it.
if ((int)(EltIdx / 4) == BestHighQuad)
InOrder.set(i);
}
}
if (AnyOutOrder) {
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask);
}
}
// The other elements are put in the right place using pextrw and pinsrw.
for (unsigned i = 0; i != 8; ++i) {
if (InOrder[i])
continue;
SDOperand Elt = MaskElts[i];
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx == i)
continue;
SDOperand ExtOp = (EltIdx < 8)
? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1,
DAG.getConstant(EltIdx, PtrVT))
: DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2,
DAG.getConstant(EltIdx - 8, PtrVT));
NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
DAG.getConstant(i, PtrVT));
}
return NewV;
}
// PSHUF{H|L}W are not used. Lower into extracts and inserts but try to use
///as few as possible.
// First, let's find out how many elements are already in the right order.
unsigned V1InOrder = 0;
unsigned V1FromV1 = 0;
unsigned V2InOrder = 0;
unsigned V2FromV2 = 0;
SmallVector<SDOperand, 8> V1Elts;
SmallVector<SDOperand, 8> V2Elts;
for (unsigned i = 0; i < 8; ++i) {
SDOperand Elt = MaskElts[i];
if (Elt.getOpcode() == ISD::UNDEF) {
V1Elts.push_back(Elt);
V2Elts.push_back(Elt);
continue;
}
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx == i) {
V1Elts.push_back(Elt);
V2Elts.push_back(DAG.getConstant(i+8, MaskEVT));
++V1InOrder;
} else if (EltIdx == i+8) {
V1Elts.push_back(Elt);
V2Elts.push_back(DAG.getConstant(i, MaskEVT));
++V2InOrder;
} else if (EltIdx < 8) {
V1Elts.push_back(Elt);
++V1FromV1;
V2Elts.push_back(DAG.getConstant(EltIdx-8, MaskEVT));
++V2FromV2;
}
}
if (V2InOrder > V1InOrder) {
PermMask = CommuteVectorShuffleMask(PermMask, DAG);
std::swap(V1, V2);
std::swap(V1Elts, V2Elts);
std::swap(V1FromV1, V2FromV2);
}
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
if ((V1FromV1 + V1InOrder) != 8) {
// Some elements are from V2.
if (V1FromV1) {
// If there are elements that are from V1 but out of place,
// then first sort them in place
SmallVector<SDOperand, 8> MaskVec;
for (unsigned i = 0; i < 8; ++i) {
SDOperand Elt = V1Elts[i];
if (Elt.getOpcode() == ISD::UNDEF) {
MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
continue;
}
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx >= 8)
MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
else
MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT));
}
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask);
}
NewV = V1;
for (unsigned i = 0; i < 8; ++i) {
SDOperand Elt = V1Elts[i];
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx < 8)
continue;
SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2,
DAG.getConstant(EltIdx - 8, PtrVT));
NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
DAG.getConstant(i, PtrVT));
return NewV;
} else {
// All elements are from V1.
NewV = V1;
for (unsigned i = 0; i < 8; ++i) {
SDOperand Elt = V1Elts[i];
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
SDOperand ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1,
DAG.getConstant(EltIdx, PtrVT));
NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
DAG.getConstant(i, PtrVT));
}
return NewV;
}
Evan Cheng
committed
/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be
/// done when every pair / quad of shuffle mask elements point to elements in
/// the right sequence. e.g.
/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15>
static
Evan Cheng
committed
SDOperand RewriteAsNarrowerShuffle(SDOperand V1, SDOperand V2,
MVT::ValueType VT,
SDOperand PermMask, SelectionDAG &DAG,
TargetLowering &TLI) {
unsigned NumElems = PermMask.getNumOperands();
Evan Cheng
committed
unsigned NewWidth = (NumElems == 4) ? 2 : 4;
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
MVT::ValueType NewVT = MaskVT;
switch (VT) {
case MVT::v4f32: NewVT = MVT::v2f64; break;
case MVT::v4i32: NewVT = MVT::v2i64; break;
case MVT::v8i16: NewVT = MVT::v4i32; break;
case MVT::v16i8: NewVT = MVT::v4i32; break;
default: assert(false && "Unexpected!");
}
if (NewWidth == 2) {
Evan Cheng
committed
if (MVT::isInteger(VT))
NewVT = MVT::v2i64;
else
NewVT = MVT::v2f64;
Evan Cheng
committed
unsigned Scale = NumElems / NewWidth;
SmallVector<SDOperand, 8> MaskVec;
for (unsigned i = 0; i < NumElems; i += Scale) {
unsigned StartIdx = ~0U;
for (unsigned j = 0; j < Scale; ++j) {
SDOperand Elt = PermMask.getOperand(i+j);
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (StartIdx == ~0U)
StartIdx = EltIdx - (EltIdx % Scale);
if (EltIdx != StartIdx + j)
return SDOperand();
}
if (StartIdx == ~0U)
MaskVec.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
else
MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MVT::i32));
Evan Cheng
committed
V1 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V1);
V2 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V2);
return DAG.getNode(ISD::VECTOR_SHUFFLE, NewVT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size()));
SDOperand
X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
SDOperand V1 = Op.getOperand(0);
SDOperand V2 = Op.getOperand(1);
SDOperand PermMask = Op.getOperand(2);
MVT::ValueType VT = Op.getValueType();
unsigned NumElems = PermMask.getNumOperands();
bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
Evan Cheng
committed
bool V1IsSplat = false;
bool V2IsSplat = false;
if (isUndefShuffle(Op.Val))
return DAG.getNode(ISD::UNDEF, VT);
if (isZeroShuffle(Op.Val))
return getZeroVector(VT, DAG);
Evan Cheng
committed
if (isIdentityMask(PermMask.Val))
return V1;
else if (isIdentityMask(PermMask.Val, true))
return V2;
if (isSplatMask(PermMask.Val)) {
if (NumElems <= 4) return Op;
// Promote it to a v4i32 splat.
Evan Cheng
committed
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
// If the shuffle can be profitably rewritten as a narrower shuffle, then
// do it!
if (VT == MVT::v8i16 || VT == MVT::v16i8) {
SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
if (NewOp.Val)
return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
} else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
// FIXME: Figure out a cleaner way to do this.
// Try to make use of movq to zero out the top part.
if (ISD::isBuildVectorAllZeros(V2.Val)) {
SDOperand NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
if (NewOp.Val) {
SDOperand NewV1 = NewOp.getOperand(0);
SDOperand NewV2 = NewOp.getOperand(1);
SDOperand NewMask = NewOp.getOperand(2);
if (isCommutedMOVL(NewMask.Val, true, false)) {
NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG);
NewOp = DAG.getNode(ISD::VECTOR_SHUFFLE, NewOp.getValueType(),
NewV1, NewV2, getMOVLMask(2, DAG));
return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
}
}
} else if (ISD::isBuildVectorAllZeros(V1.Val)) {
SDOperand NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
if (NewOp.Val && X86::isMOVLMask(NewOp.getOperand(2).Val))
return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
}
}
if (X86::isMOVLMask(PermMask.Val))
return (V1IsUndef) ? V2 : Op;
if (X86::isMOVSHDUPMask(PermMask.Val) ||
X86::isMOVSLDUPMask(PermMask.Val) ||
X86::isMOVHLPSMask(PermMask.Val) ||
X86::isMOVHPMask(PermMask.Val) ||
X86::isMOVLPMask(PermMask.Val))
return Op;
if (ShouldXformToMOVHLPS(PermMask.Val) ||
ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val))
return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
bool Commuted = false;
// FIXME: This should also accept a bitcast of a splat? Be careful, not
// 1,1,1,1 -> v8i16 though.
V1IsSplat = isSplatVector(V1.Val);
V2IsSplat = isSplatVector(V2.Val);
// Canonicalize the splat or undef, if present, to be on the RHS.
if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
std::swap(V1IsSplat, V2IsSplat);
std::swap(V1IsUndef, V2IsUndef);
Commuted = true;
Evan Cheng
committed
// FIXME: Figure out a cleaner way to do this.
if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) {
if (V2IsUndef) return V1;
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (V2IsSplat) {
// V2 is a splat, so the mask may be malformed. That is, it may point
// to any V2 element. The instruction selectior won't like this. Get
// a corrected mask and commute to form a proper MOVS{S|D}.
SDOperand NewMask = getMOVLMask(NumElems, DAG);
if (NewMask.Val != PermMask.Val)
Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
Evan Cheng
committed
}
Evan Cheng
committed
}
Evan Cheng
committed
Evan Cheng
committed
if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
X86::isUNPCKH_v_undef_Mask(PermMask.Val) ||
Evan Cheng
committed
X86::isUNPCKLMask(PermMask.Val) ||
X86::isUNPCKHMask(PermMask.Val))
return Op;
if (V2IsSplat) {
// Normalize mask so all entries that point to V2 points to its first
// element then try to match unpck{h|l} again. If match, return a
// new vector_shuffle with the corrected mask.
SDOperand NewMask = NormalizeMask(PermMask, DAG);
if (NewMask.Val != PermMask.Val) {
if (X86::isUNPCKLMask(PermMask.Val, true)) {
SDOperand NewMask = getUnpacklMask(NumElems, DAG);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
} else if (X86::isUNPCKHMask(PermMask.Val, true)) {
SDOperand NewMask = getUnpackhMask(NumElems, DAG);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
}
}
}
// Normalize the node to match x86 shuffle ops if needed
if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val))
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (Commuted) {
// Commute is back and try unpck* again.
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
X86::isUNPCKH_v_undef_Mask(PermMask.Val) ||
X86::isUNPCKLMask(PermMask.Val) ||
X86::isUNPCKHMask(PermMask.Val))
return Op;
}
// If VT is integer, try PSHUF* first, then SHUFP*.
if (MVT::isInteger(VT)) {
// MMX doesn't have PSHUFD; it does have PSHUFW. While it's theoretically
// possible to shuffle a v2i32 using PSHUFW, that's not yet implemented.
if (((MVT::getSizeInBits(VT) != 64 || NumElems == 4) &&
X86::isPSHUFDMask(PermMask.Val)) ||
X86::isPSHUFHWMask(PermMask.Val) ||
X86::isPSHUFLWMask(PermMask.Val)) {
if (V2.getOpcode() != ISD::UNDEF)
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
return Op;
}
if (X86::isSHUFPMask(PermMask.Val) &&
MVT::getSizeInBits(VT) != 64) // Don't do this for MMX.
return Op;
} else {
// Floating point cases in the other order.
if (X86::isSHUFPMask(PermMask.Val))
return Op;
if (X86::isPSHUFDMask(PermMask.Val) ||
X86::isPSHUFHWMask(PermMask.Val) ||
X86::isPSHUFLWMask(PermMask.Val)) {
if (V2.getOpcode() != ISD::UNDEF)
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
return Op;
}
}
// Handle v8i16 specifically since SSE can do byte extraction and insertion.
if (VT == MVT::v8i16) {
SDOperand NewOp = LowerVECTOR_SHUFFLEv8i16(V1, V2, PermMask, DAG, *this);
if (NewOp.Val)
return NewOp;
}
// Handle all 4 wide cases with a number of shuffles.
if (NumElems == 4 && MVT::getSizeInBits(VT) != 64) {
MVT::ValueType MaskVT = PermMask.getValueType();
MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
SmallVector<std::pair<int, int>, 8> Locs;
Locs.reserve(NumElems);
SmallVector<SDOperand, 8> Mask1(NumElems,
DAG.getNode(ISD::UNDEF, MaskEVT));
SmallVector<SDOperand, 8> Mask2(NumElems,
DAG.getNode(ISD::UNDEF, MaskEVT));
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
unsigned NumHi = 0;
unsigned NumLo = 0;
// If no more than two elements come from either vector. This can be
// implemented with two shuffles. First shuffle gather the elements.
// The second shuffle, which takes the first shuffle as both of its
// vector operands, put the elements into the right order.
for (unsigned i = 0; i != NumElems; ++i) {
SDOperand Elt = PermMask.getOperand(i);
if (Elt.getOpcode() == ISD::UNDEF) {
Locs[i] = std::make_pair(-1, -1);
} else {
unsigned Val = cast<ConstantSDNode>(Elt)->getValue();
if (Val < NumElems) {
Locs[i] = std::make_pair(0, NumLo);
Mask1[NumLo] = Elt;
NumLo++;
} else {
Locs[i] = std::make_pair(1, NumHi);
if (2+NumHi < NumElems)
Mask1[2+NumHi] = Elt;
NumHi++;
}
}
}
if (NumLo <= 2 && NumHi <= 2) {
V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&Mask1[0], Mask1.size()));
for (unsigned i = 0; i != NumElems; ++i) {
if (Locs[i].first == -1)
continue;
else {
unsigned Idx = (i < NumElems/2) ? 0 : NumElems;
Idx += Locs[i].first * (NumElems/2) + Locs[i].second;
Mask2[i] = DAG.getConstant(Idx, MaskEVT);
}
}
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&Mask2[0], Mask2.size()));
}
// Break it into (shuffle shuffle_hi, shuffle_lo).
Locs.clear();
SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
SmallVector<SDOperand,8> *MaskPtr = &LoMask;
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
unsigned MaskIdx = 0;
unsigned LoIdx = 0;
unsigned HiIdx = NumElems/2;
for (unsigned i = 0; i != NumElems; ++i) {
if (i == NumElems/2) {
MaskPtr = &HiMask;
MaskIdx = 1;
LoIdx = 0;
HiIdx = NumElems/2;
}
SDOperand Elt = PermMask.getOperand(i);
if (Elt.getOpcode() == ISD::UNDEF) {
Locs[i] = std::make_pair(-1, -1);
} else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) {
Locs[i] = std::make_pair(MaskIdx, LoIdx);
(*MaskPtr)[LoIdx] = Elt;
LoIdx++;
} else {
Locs[i] = std::make_pair(MaskIdx, HiIdx);
(*MaskPtr)[HiIdx] = Elt;
HiIdx++;
}
}
SDOperand LoShuffle =
DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&LoMask[0], LoMask.size()));
SDOperand HiShuffle =
DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&HiMask[0], HiMask.size()));
SmallVector<SDOperand, 8> MaskOps;
for (unsigned i = 0; i != NumElems; ++i) {
if (Locs[i].first == -1) {
MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
} else {
unsigned Idx = Locs[i].first * NumElems + Locs[i].second;
MaskOps.push_back(DAG.getConstant(Idx, MaskEVT));
}
}
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskOps[0], MaskOps.size()));
}
return SDOperand();
}
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
SDOperand
X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDOperand Op,
SelectionDAG &DAG) {
MVT::ValueType VT = Op.getValueType();
if (MVT::getSizeInBits(VT) == 8) {
SDOperand Extract = DAG.getNode(X86ISD::PEXTRB, MVT::i32,
Op.getOperand(0), Op.getOperand(1));
SDOperand Assert = DAG.getNode(ISD::AssertZext, MVT::i32, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
} else if (MVT::getSizeInBits(VT) == 16) {
SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, MVT::i32,
Op.getOperand(0), Op.getOperand(1));
SDOperand Assert = DAG.getNode(ISD::AssertZext, MVT::i32, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
}
return SDOperand();
}
SDOperand
X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
if (!isa<ConstantSDNode>(Op.getOperand(1)))
return SDOperand();
if (Subtarget->hasSSE41())
return LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
MVT::ValueType VT = Op.getValueType();
// TODO: handle v16i8.
if (MVT::getSizeInBits(VT) == 16) {
SDOperand Vec = Op.getOperand(0);
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return DAG.getNode(ISD::TRUNCATE, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32,
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Vec),
Op.getOperand(1)));
// Transform it so it match pextrw which produces a 32-bit result.
MVT::ValueType EVT = (MVT::ValueType)(VT+1);
SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
Op.getOperand(0), Op.getOperand(1));
SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
} else if (MVT::getSizeInBits(VT) == 32) {
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return Op;
// SHUFPS the element to the lowest double word, then movss.
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
SmallVector<SDOperand, 8> IdxVec;
IdxVec.
push_back(DAG.getConstant(Idx, MVT::getVectorElementType(MaskVT)));
IdxVec.
push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
IdxVec.
push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
IdxVec.
push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
SDOperand Vec = Op.getOperand(0);
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
DAG.getIntPtrConstant(0));
} else if (MVT::getSizeInBits(VT) == 64) {
// FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b
// FIXME: seems like this should be unnecessary if mov{h,l}pd were taught
// to match extract_elt for f64.
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
if (Idx == 0)
return Op;
// UNPCKHPD the element to the lowest double word, then movsd.
// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
// to a f64mem, the whole operation is folded into a single MOVHPDmr.
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
SmallVector<SDOperand, 8> IdxVec;
IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT)));
IdxVec.
push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&IdxVec[0], IdxVec.size());
SDOperand Vec = Op.getOperand(0);
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
DAG.getIntPtrConstant(0));
}
return SDOperand();
}
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
SDOperand
X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDOperand Op, SelectionDAG &DAG){
MVT::ValueType VT = Op.getValueType();
MVT::ValueType EVT = MVT::getVectorElementType(VT);
SDOperand N0 = Op.getOperand(0);
SDOperand N1 = Op.getOperand(1);
SDOperand N2 = Op.getOperand(2);
if ((MVT::getSizeInBits(EVT) == 8) || (MVT::getSizeInBits(EVT) == 16)) {
unsigned Opc = (MVT::getSizeInBits(EVT) == 8) ? X86ISD::PINSRB
: X86ISD::PINSRW;
// Transform it so it match pinsr{b,w} which expects a GR32 as its second
// argument.
if (N1.getValueType() != MVT::i32)
N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getValue());
return DAG.getNode(Opc, VT, N0, N1, N2);
} else if (EVT == MVT::f32) {
// Bits [7:6] of the constant are the source select. This will always be
// zero here. The DAG Combiner may combine an extract_elt index into these
// bits. For example (insert (extract, 3), 2) could be matched by putting
// the '3' into bits [7:6] of X86ISD::INSERTPS.
// Bits [5:4] of the constant are the destination select. This is the
// value of the incoming immediate.
// Bits [3:0] of the constant are the zero mask. The DAG Combiner may
// combine either bitwise AND or insert of float 0.0 to set these bits.
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getValue() << 4);
return DAG.getNode(X86ISD::INSERTPS, VT, N0, N1, N2);
}
return SDOperand();
}
SDOperand
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
MVT::ValueType VT = Op.getValueType();
MVT::ValueType EVT = MVT::getVectorElementType(VT);
if (Subtarget->hasSSE41())
return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
if (EVT == MVT::i8)
return SDOperand();
SDOperand N0 = Op.getOperand(0);
SDOperand N1 = Op.getOperand(1);
SDOperand N2 = Op.getOperand(2);
if (MVT::getSizeInBits(EVT) == 16) {
// Transform it so it match pinsrw which expects a 16-bit value in a GR32
// as its second argument.
if (N1.getValueType() != MVT::i32)
N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getValue());
return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
}
return SDOperand();
}
SDOperand
X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
MVT::ValueType VT = MVT::v2i32;
switch (Op.getValueType()) {
default: break;
case MVT::v16i8:
case MVT::v8i16:
VT = MVT::v4i32;
break;
}
return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, AnyExt));
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
// one of the above mentioned nodes. It has to be wrapped because otherwise
// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
// be used to form addressing mode. These wrapped nodes will be selected
// into MOV32ri.
SDOperand
X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(),
getPointerTy(),
CP->getAlignment());
Evan Cheng
committed
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->isPICStyleRIPRel()) {
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
}
return Result;
}
SDOperand
X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy());
Evan Cheng
committed
// If it's a debug information descriptor, don't mess with it.
if (DAG.isVerifiedDebugInfoDesc(Op))
return Result;
Evan Cheng
committed
Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->isPICStyleRIPRel()) {
Result = DAG.getNode(ISD::ADD, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
Result);
Anton Korobeynikov
committed
// For Darwin & Mingw32, external and weak symbols are indirect, so we want to
// load the value at address GV, not the value of GV itself. This means that
// the GlobalAddress must be in the base or index register of the address, not
// the GV offset field. Platform check is inside GVRequiresExtraLoad() call
// The same applies for external symbols during PIC codegen
Anton Korobeynikov
committed
if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false))
Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result,
PseudoSourceValue::getGOT(), 0);
return Result;
}
Lauro Ramos Venancio
committed
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
// Lower ISD::GlobalTLSAddress using the "general dynamic" model
static SDOperand
LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const MVT::ValueType PtrVT) {
SDOperand InFlag;
SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg,
PtrVT), InFlag);
InFlag = Chain.getValue(1);
// emit leal symbol@TLSGD(,%ebx,1), %eax
SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag);
SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
GA->getValueType(0),
GA->getOffset());
SDOperand Ops[] = { Chain, TGA, InFlag };
SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3);
InFlag = Result.getValue(2);
Chain = Result.getValue(1);
// call ___tls_get_addr. This function receives its argument in