Newer
Older
break;
}
return NumZeros;
}
/// isVectorShift - Returns true if the shuffle can be implemented as a
/// logical left or right shift of a vector.
static bool isVectorShift(SDValue Op, SDValue Mask, SelectionDAG &DAG,
bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
unsigned NumElems = Mask.getNumOperands();
isLeft = true;
unsigned NumZeros= getNumOfConsecutiveZeros(Op, Mask, NumElems, true, DAG);
if (!NumZeros) {
isLeft = false;
NumZeros = getNumOfConsecutiveZeros(Op, Mask, NumElems, false, DAG);
if (!NumZeros)
return false;
}
bool SeenV1 = false;
bool SeenV2 = false;
for (unsigned i = NumZeros; i < NumElems; ++i) {
unsigned Val = isLeft ? (i - NumZeros) : i;
SDValue Idx = Mask.getOperand(isLeft ? i : (i - NumZeros));
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
if (Idx.getOpcode() == ISD::UNDEF)
continue;
unsigned Index = cast<ConstantSDNode>(Idx)->getValue();
if (Index < NumElems)
SeenV1 = true;
else {
Index -= NumElems;
SeenV2 = true;
}
if (Index != Val)
return false;
}
if (SeenV1 && SeenV2)
return false;
ShVal = SeenV1 ? Op.getOperand(0) : Op.getOperand(1);
ShAmt = NumZeros;
return true;
}
/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
///
static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
if (NumNonZero > 8)
bool First = true;
for (unsigned i = 0; i < 16; ++i) {
bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
if (ThisIsNonZero && First) {
if (NumZero)
Evan Cheng
committed
V = getZeroVector(MVT::v8i16, true, DAG);
else
V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
First = false;
}
if ((i & 1) != 0) {
bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
if (LastIsNonZero) {
LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1));
}
if (ThisIsNonZero) {
ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i));
ThisElt = DAG.getNode(ISD::SHL, MVT::i16,
ThisElt, DAG.getConstant(8, MVT::i8));
if (LastIsNonZero)
ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt);
} else
ThisElt = LastElt;
Gabor Greif
committed
if (ThisElt.getNode())
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
DAG.getIntPtrConstant(i/2));
}
}
return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V);
}
/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
if (NumNonZero > 4)
bool First = true;
for (unsigned i = 0; i < 8; ++i) {
bool isNonZero = (NonZeros & (1 << i)) != 0;
if (isNonZero) {
if (First) {
if (NumZero)
Evan Cheng
committed
V = getZeroVector(MVT::v8i16, true, DAG);
else
V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
First = false;
}
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
DAG.getIntPtrConstant(i));
}
}
return V;
}
/// getVShift - Return a vector logical shift node.
///
static SDValue getVShift(bool isLeft, MVT VT, SDValue SrcOp,
unsigned NumBits, SelectionDAG &DAG,
const TargetLowering &TLI) {
bool isMMX = VT.getSizeInBits() == 64;
MVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64;
unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
SrcOp = DAG.getNode(ISD::BIT_CONVERT, ShVT, SrcOp);
return DAG.getNode(ISD::BIT_CONVERT, VT,
DAG.getNode(Opc, ShVT, SrcOp,
DAG.getConstant(NumBits, TLI.getShiftAmountTy())));
}
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
// All zero's are handled with pxor, all one's are handled with pcmpeqd.
if (ISD::isBuildVectorAllZeros(Op.getNode())
|| ISD::isBuildVectorAllOnes(Op.getNode())) {
// Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to
// 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
// eliminated on x86-32 hosts.
if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32)
return Op;
Gabor Greif
committed
if (ISD::isBuildVectorAllOnes(Op.getNode()))
return getOnesVector(Op.getValueType(), DAG);
Evan Cheng
committed
return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG);
MVT VT = Op.getValueType();
MVT EVT = VT.getVectorElementType();
unsigned EVTBits = EVT.getSizeInBits();
unsigned NumElems = Op.getNumOperands();
unsigned NumZero = 0;
unsigned NumNonZero = 0;
unsigned NonZeros = 0;
bool IsAllConstants = true;
for (unsigned i = 0; i < NumElems; ++i) {
Evan Cheng
committed
if (Elt.getOpcode() == ISD::UNDEF)
continue;
Values.insert(Elt);
if (Elt.getOpcode() != ISD::Constant &&
Elt.getOpcode() != ISD::ConstantFP)
IsAllConstants = false;
Evan Cheng
committed
if (isZeroNode(Elt))
NumZero++;
else {
NonZeros |= (1 << i);
NumNonZero++;
}
}
if (NumNonZero == 0) {
// All undef vector. Return an UNDEF. All zero vectors were handled above.
return DAG.getNode(ISD::UNDEF, VT);
}
// Special case for single non-zero, non-undef, element.
Evan Cheng
committed
if (NumNonZero == 1 && NumElems <= 4) {
unsigned Idx = CountTrailingZeros_32(NonZeros);
// If this is an insertion of an i64 value on x86-32, and if the top bits of
// the value are obviously zero, truncate the value to i32 and do the
// insertion that way. Only do this if the value is non-constant or if the
// value is a constant being inserted into element 0. It is cheaper to do
// a constant pool load than it is to do a movd + shuffle.
if (EVT == MVT::i64 && !Subtarget->is64Bit() &&
(!IsAllConstants || Idx == 0)) {
if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
// Handle MMX and SSE both.
MVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32;
unsigned VecElts = VT == MVT::v2i64 ? 4 : 2;
// Truncate the value (which may itself be a constant) to i32, and
// convert it to a vector with movd (S2V+shuffle to zero extend).
Item = DAG.getNode(ISD::TRUNCATE, MVT::i32, Item);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VecVT, Item);
Evan Cheng
committed
Item = getShuffleVectorZeroOrUndef(Item, 0, true,
Subtarget->hasSSE2(), DAG);
// Now we have our 32-bit value zero extended in the low element of
// a vector. If Idx != 0, swizzle it into place.
if (Idx != 0) {
Item, DAG.getNode(ISD::UNDEF, Item.getValueType()),
getSwapEltZeroMask(VecElts, Idx, DAG)
};
Item = DAG.getNode(ISD::VECTOR_SHUFFLE, VecVT, Ops, 3);
}
return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Item);
}
}
// If we have a constant or non-constant insertion into the low element of
// a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
// the rest of the elements. This will be matched as movd/movq/movss/movsd
// depending on what the source datatype is. Because we can only get here
// when NumElems <= 4, this only needs to handle i32/f32/i64/f64.
if (Idx == 0 &&
// Don't do this for i64 values on x86-32.
(EVT != MVT::i64 || Subtarget->is64Bit())) {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
Evan Cheng
committed
return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
Subtarget->hasSSE2(), DAG);
// Is it a vector logical left shift?
if (NumElems == 2 && Idx == 1 &&
isZeroNode(Op.getOperand(0)) && !isZeroNode(Op.getOperand(1))) {
unsigned NumBits = VT.getSizeInBits();
return getVShift(true, VT,
DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(1)),
NumBits/2, DAG, *this);
}
if (IsAllConstants) // Otherwise, it's better to do a constpool load.
// Otherwise, if this is a vector with i32 or f32 elements, and the element
// is a non-constant being inserted into an element other than the low one,
// we can't use a constant pool load. Instead, use SCALAR_TO_VECTOR (aka
// movd/movss) to move this into the low element, then shuffle it into
// place.
if (EVTBits == 32) {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
// Turn it into a shuffle of zero and zero-extended scalar to vector.
Evan Cheng
committed
Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
Subtarget->hasSSE2(), DAG);
MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT MaskEVT = MaskVT.getVectorElementType();
for (unsigned i = 0; i < NumElems; i++)
MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size());
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item,
DAG.getNode(ISD::UNDEF, VT), Mask);
}
}
// Splat is obviously ok. Let legalizer expand it to a shuffle.
if (Values.size() == 1)
// A vector full of immediates; various special cases are already
// handled, so this is best done with a single constant-pool load.
if (IsAllConstants)
// Let legalizer expand 2-wide build_vectors.
Evan Cheng
committed
if (EVTBits == 64) {
if (NumNonZero == 1) {
// One half is zero or undef.
unsigned Idx = CountTrailingZeros_32(NonZeros);
SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT,
Evan Cheng
committed
Op.getOperand(Idx));
Evan Cheng
committed
return getShuffleVectorZeroOrUndef(V2, Idx, true,
Subtarget->hasSSE2(), DAG);
Evan Cheng
committed
}
Evan Cheng
committed
}
// If element VT is < 32 bits, convert it to inserts into a zero vector.
if (EVTBits == 8 && NumElems == 16) {
SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
Gabor Greif
committed
if (V.getNode()) return V;
if (EVTBits == 16 && NumElems == 8) {
SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
Gabor Greif
committed
if (V.getNode()) return V;
}
// If element VT is == 32 bits, turn it into a number of shuffles.
V.resize(NumElems);
if (NumElems == 4 && NumZero > 0) {
for (unsigned i = 0; i < 4; ++i) {
bool isZero = !(NonZeros & (1 << i));
if (isZero)
Evan Cheng
committed
V[i] = getZeroVector(VT, Subtarget->hasSSE2(), DAG);
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
else
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
}
for (unsigned i = 0; i < 2; ++i) {
switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
default: break;
case 0:
V[i] = V[i*2]; // Must be a zero vector.
break;
case 1:
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2],
getMOVLMask(NumElems, DAG));
break;
case 2:
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
getMOVLMask(NumElems, DAG));
break;
case 3:
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
getUnpacklMask(NumElems, DAG));
break;
}
}
MVT MaskVT = MVT::getIntVectorWithNumElements(NumElems);
MVT EVT = MaskVT.getVectorElementType();
bool Reverse = (NonZeros & 0x3) == 2;
for (unsigned i = 0; i < 2; ++i)
if (Reverse)
MaskVec.push_back(DAG.getConstant(1-i, EVT));
else
MaskVec.push_back(DAG.getConstant(i, EVT));
Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
for (unsigned i = 0; i < 2; ++i)
if (Reverse)
MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT));
else
MaskVec.push_back(DAG.getConstant(i+NumElems, EVT));
SDValue ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size());
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask);
}
if (Values.size() > 2) {
// Expand into a number of unpckl*.
// e.g. for v4f32
// Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
// Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
SDValue UnpckMask = getUnpacklMask(NumElems, DAG);
for (unsigned i = 0; i < NumElems; ++i)
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
NumElems >>= 1;
while (NumElems != 0) {
for (unsigned i = 0; i < NumElems; ++i)
V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
UnpckMask);
NumElems >>= 1;
}
return V[0];
}
SDValue LowerVECTOR_SHUFFLEv8i16(SDValue V1, SDValue V2,
SDValue PermMask, SelectionDAG &DAG,
TargetLowering &TLI) {
MVT MaskVT = MVT::getIntVectorWithNumElements(8);
MVT MaskEVT = MaskVT.getVectorElementType();
MVT PtrVT = TLI.getPointerTy();
Gabor Greif
committed
SmallVector<SDValue, 8> MaskElts(PermMask.getNode()->op_begin(),
PermMask.getNode()->op_end());
// First record which half of which vector the low elements come from.
SmallVector<unsigned, 4> LowQuad(4);
for (unsigned i = 0; i < 4; ++i) {
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
int QuadIdx = EltIdx / 4;
++LowQuad[QuadIdx];
}
int BestLowQuad = -1;
unsigned MaxQuad = 1;
for (unsigned i = 0; i < 4; ++i) {
if (LowQuad[i] > MaxQuad) {
BestLowQuad = i;
MaxQuad = LowQuad[i];
}
}
// Record which half of which vector the high elements come from.
SmallVector<unsigned, 4> HighQuad(4);
for (unsigned i = 4; i < 8; ++i) {
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
int QuadIdx = EltIdx / 4;
++HighQuad[QuadIdx];
}
int BestHighQuad = -1;
MaxQuad = 1;
for (unsigned i = 0; i < 4; ++i) {
if (HighQuad[i] > MaxQuad) {
BestHighQuad = i;
MaxQuad = HighQuad[i];
}
}
// If it's possible to sort parts of either half with PSHUF{H|L}W, then do it.
if (BestLowQuad != -1 || BestHighQuad != -1) {
// First sort the 4 chunks in order using shufpd.
if (BestLowQuad != -1)
MaskVec.push_back(DAG.getConstant(BestLowQuad, MVT::i32));
else
MaskVec.push_back(DAG.getConstant(0, MVT::i32));
if (BestHighQuad != -1)
MaskVec.push_back(DAG.getConstant(BestHighQuad, MVT::i32));
else
MaskVec.push_back(DAG.getConstant(1, MVT::i32));
SDValue Mask= DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i32, &MaskVec[0],2);
NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v2i64,
DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V1),
DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, V2), Mask);
NewV = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, NewV);
// Now sort high and low parts separately.
BitVector InOrder(8);
if (BestLowQuad != -1) {
// Sort lower half in order using PSHUFLW.
MaskVec.clear();
bool AnyOutOrder = false;
for (unsigned i = 0; i != 4; ++i) {
if (Elt.getOpcode() == ISD::UNDEF) {
MaskVec.push_back(Elt);
InOrder.set(i);
} else {
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx != i)
AnyOutOrder = true;
MaskVec.push_back(DAG.getConstant(EltIdx % 4, MaskEVT));
// If this element is in the right place after this shuffle, then
// remember it.
if ((int)(EltIdx / 4) == BestLowQuad)
InOrder.set(i);
}
}
if (AnyOutOrder) {
for (unsigned i = 4; i != 8; ++i)
MaskVec.push_back(DAG.getConstant(i, MaskEVT));
SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask);
}
}
if (BestHighQuad != -1) {
// Sort high half in order using PSHUFHW if possible.
MaskVec.clear();
for (unsigned i = 0; i != 4; ++i)
MaskVec.push_back(DAG.getConstant(i, MaskEVT));
bool AnyOutOrder = false;
for (unsigned i = 4; i != 8; ++i) {
if (Elt.getOpcode() == ISD::UNDEF) {
MaskVec.push_back(Elt);
InOrder.set(i);
} else {
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx != i)
AnyOutOrder = true;
MaskVec.push_back(DAG.getConstant((EltIdx % 4) + 4, MaskEVT));
// If this element is in the right place after this shuffle, then
// remember it.
if ((int)(EltIdx / 4) == BestHighQuad)
InOrder.set(i);
}
}
if (AnyOutOrder) {
SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
NewV = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, NewV, NewV, Mask);
}
}
// The other elements are put in the right place using pextrw and pinsrw.
for (unsigned i = 0; i != 8; ++i) {
if (InOrder[i])
continue;
Bill Wendling
committed
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1,
DAG.getConstant(EltIdx, PtrVT))
: DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2,
DAG.getConstant(EltIdx - 8, PtrVT));
NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
DAG.getConstant(i, PtrVT));
}
return NewV;
}
// PSHUF{H|L}W are not used. Lower into extracts and inserts but try to use as
// few as possible. First, let's find out how many elements are already in the
// right order.
unsigned V1InOrder = 0;
unsigned V1FromV1 = 0;
unsigned V2InOrder = 0;
unsigned V2FromV2 = 0;
SmallVector<SDValue, 8> V1Elts;
SmallVector<SDValue, 8> V2Elts;
for (unsigned i = 0; i < 8; ++i) {
if (Elt.getOpcode() == ISD::UNDEF) {
V1Elts.push_back(Elt);
V2Elts.push_back(Elt);
continue;
}
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx == i) {
V1Elts.push_back(Elt);
V2Elts.push_back(DAG.getConstant(i+8, MaskEVT));
++V1InOrder;
} else if (EltIdx == i+8) {
V1Elts.push_back(Elt);
V2Elts.push_back(DAG.getConstant(i, MaskEVT));
++V2InOrder;
} else if (EltIdx < 8) {
V1Elts.push_back(Elt);
++V1FromV1;
V2Elts.push_back(DAG.getConstant(EltIdx-8, MaskEVT));
++V2FromV2;
}
}
if (V2InOrder > V1InOrder) {
PermMask = CommuteVectorShuffleMask(PermMask, DAG);
std::swap(V1, V2);
std::swap(V1Elts, V2Elts);
std::swap(V1FromV1, V2FromV2);
}
if ((V1FromV1 + V1InOrder) != 8) {
// Some elements are from V2.
if (V1FromV1) {
// If there are elements that are from V1 but out of place,
// then first sort them in place
for (unsigned i = 0; i < 8; ++i) {
if (Elt.getOpcode() == ISD::UNDEF) {
MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
continue;
}
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx >= 8)
MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
else
MaskVec.push_back(DAG.getConstant(EltIdx, MaskEVT));
}
SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], 8);
V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, V1, V1, Mask);
}
NewV = V1;
for (unsigned i = 0; i < 8; ++i) {
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (EltIdx < 8)
continue;
SDValue ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V2,
DAG.getConstant(EltIdx - 8, PtrVT));
NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
DAG.getConstant(i, PtrVT));
return NewV;
} else {
// All elements are from V1.
NewV = V1;
for (unsigned i = 0; i < 8; ++i) {
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
SDValue ExtOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, V1,
DAG.getConstant(EltIdx, PtrVT));
NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, NewV, ExtOp,
DAG.getConstant(i, PtrVT));
}
return NewV;
}
Evan Cheng
committed
/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be
/// done when every pair / quad of shuffle mask elements point to elements in
/// the right sequence. e.g.
/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15>
static
SDValue RewriteAsNarrowerShuffle(SDValue V1, SDValue V2,
TargetLowering &TLI) {
unsigned NumElems = PermMask.getNumOperands();
Evan Cheng
committed
unsigned NewWidth = (NumElems == 4) ? 2 : 4;
MVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
MVT MaskEltVT = MaskVT.getVectorElementType();
MVT NewVT = MaskVT;
switch (VT.getSimpleVT()) {
default: assert(false && "Unexpected!");
Evan Cheng
committed
case MVT::v4f32: NewVT = MVT::v2f64; break;
case MVT::v4i32: NewVT = MVT::v2i64; break;
case MVT::v8i16: NewVT = MVT::v4i32; break;
case MVT::v16i8: NewVT = MVT::v4i32; break;
}
if (NewWidth == 2) {
Evan Cheng
committed
NewVT = MVT::v2i64;
else
NewVT = MVT::v2f64;
Evan Cheng
committed
unsigned Scale = NumElems / NewWidth;
for (unsigned i = 0; i < NumElems; i += Scale) {
unsigned StartIdx = ~0U;
for (unsigned j = 0; j < Scale; ++j) {
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned EltIdx = cast<ConstantSDNode>(Elt)->getValue();
if (StartIdx == ~0U)
StartIdx = EltIdx - (EltIdx % Scale);
if (EltIdx != StartIdx + j)
}
if (StartIdx == ~0U)
MaskVec.push_back(DAG.getNode(ISD::UNDEF, MaskEltVT));
else
MaskVec.push_back(DAG.getConstant(StartIdx / Scale, MaskEltVT));
Evan Cheng
committed
V1 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V1);
V2 = DAG.getNode(ISD::BIT_CONVERT, NewVT, V2);
return DAG.getNode(ISD::VECTOR_SHUFFLE, NewVT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskVec[0], MaskVec.size()));
/// getVZextMovL - Return a zero-extending vector move low node.
Evan Cheng
committed
///
static SDValue getVZextMovL(MVT VT, MVT OpVT,
SDValue SrcOp, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
Evan Cheng
committed
if (VT == MVT::v2f64 || VT == MVT::v4f32) {
LoadSDNode *LD = NULL;
Gabor Greif
committed
if (!isScalarLoadToVector(SrcOp.getNode(), &LD))
Evan Cheng
committed
LD = dyn_cast<LoadSDNode>(SrcOp);
if (!LD) {
// movssrr and movsdrr do not clear top bits. Try to use movd, movq
// instead.
MVT EVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
Evan Cheng
committed
if ((EVT != MVT::i64 || Subtarget->is64Bit()) &&
SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT &&
SrcOp.getOperand(0).getOperand(0).getValueType() == EVT) {
// PR2108
OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
return DAG.getNode(ISD::BIT_CONVERT, VT,
DAG.getNode(X86ISD::VZEXT_MOVL, OpVT,
Evan Cheng
committed
DAG.getNode(ISD::SCALAR_TO_VECTOR, OpVT,
Evan Cheng
committed
}
}
}
return DAG.getNode(ISD::BIT_CONVERT, VT,
DAG.getNode(X86ISD::VZEXT_MOVL, OpVT,
Evan Cheng
committed
DAG.getNode(ISD::BIT_CONVERT, OpVT, SrcOp)));
}
Evan Cheng
committed
/// LowerVECTOR_SHUFFLE_4wide - Handle all 4 wide cases with a number of
/// shuffles.
static SDValue
LowerVECTOR_SHUFFLE_4wide(SDValue V1, SDValue V2,
SDValue PermMask, MVT VT, SelectionDAG &DAG) {
Evan Cheng
committed
MVT MaskVT = PermMask.getValueType();
MVT MaskEVT = MaskVT.getVectorElementType();
SmallVector<std::pair<int, int>, 8> Locs;
Locs.resize(4);
SmallVector<SDValue, 8> Mask1(4, DAG.getNode(ISD::UNDEF, MaskEVT));
Evan Cheng
committed
unsigned NumHi = 0;
unsigned NumLo = 0;
for (unsigned i = 0; i != 4; ++i) {
Evan Cheng
committed
if (Elt.getOpcode() == ISD::UNDEF) {
Locs[i] = std::make_pair(-1, -1);
} else {
unsigned Val = cast<ConstantSDNode>(Elt)->getValue();
assert(Val < 8 && "Invalid VECTOR_SHUFFLE index!");
Evan Cheng
committed
if (Val < 4) {
Locs[i] = std::make_pair(0, NumLo);
Mask1[NumLo] = Elt;
NumLo++;
} else {
Locs[i] = std::make_pair(1, NumHi);
if (2+NumHi < 4)
Mask1[2+NumHi] = Elt;
NumHi++;
}
}
}
Evan Cheng
committed
if (NumLo <= 2 && NumHi <= 2) {
// If no more than two elements come from either vector. This can be
// implemented with two shuffles. First shuffle gather the elements.
// The second shuffle, which takes the first shuffle as both of its
// vector operands, put the elements into the right order.
Evan Cheng
committed
V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&Mask1[0], Mask1.size()));
SmallVector<SDValue, 8> Mask2(4, DAG.getNode(ISD::UNDEF, MaskEVT));
Evan Cheng
committed
for (unsigned i = 0; i != 4; ++i) {
if (Locs[i].first == -1)
continue;
else {
unsigned Idx = (i < 2) ? 0 : 4;
Idx += Locs[i].first * 2 + Locs[i].second;
Mask2[i] = DAG.getConstant(Idx, MaskEVT);
}
}
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&Mask2[0], Mask2.size()));
} else if (NumLo == 3 || NumHi == 3) {
// Otherwise, we must have three elements from one vector, call it X, and
// one element from the other, call it Y. First, use a shufps to build an
// intermediate vector with the one element from Y and the element from X
// that will be in the same half in the final destination (the indexes don't
// matter). Then, use a shufps to build the final vector, taking the half
// containing the element from Y from the intermediate, and the other half
// from X.
if (NumHi == 3) {
// Normalize it so the 3 elements come from V1.
PermMask = CommuteVectorShuffleMask(PermMask, DAG);
std::swap(V1, V2);
}
// Find the element from V2.
unsigned HiIndex;
for (HiIndex = 0; HiIndex < 3; ++HiIndex) {
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
if (Elt.getOpcode() == ISD::UNDEF)
continue;
unsigned Val = cast<ConstantSDNode>(Elt)->getValue();
if (Val >= 4)
break;
}
Mask1[0] = PermMask.getOperand(HiIndex);
Mask1[1] = DAG.getNode(ISD::UNDEF, MaskEVT);
Mask1[2] = PermMask.getOperand(HiIndex^1);
Mask1[3] = DAG.getNode(ISD::UNDEF, MaskEVT);
V2 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &Mask1[0], 4));
if (HiIndex >= 2) {
Mask1[0] = PermMask.getOperand(0);
Mask1[1] = PermMask.getOperand(1);
Mask1[2] = DAG.getConstant(HiIndex & 1 ? 6 : 4, MaskEVT);
Mask1[3] = DAG.getConstant(HiIndex & 1 ? 4 : 6, MaskEVT);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &Mask1[0], 4));
} else {
Mask1[0] = DAG.getConstant(HiIndex & 1 ? 2 : 0, MaskEVT);
Mask1[1] = DAG.getConstant(HiIndex & 1 ? 0 : 2, MaskEVT);
Mask1[2] = PermMask.getOperand(2);
Mask1[3] = PermMask.getOperand(3);
if (Mask1[2].getOpcode() != ISD::UNDEF)
Mask1[2] = DAG.getConstant(cast<ConstantSDNode>(Mask1[2])->getValue()+4,
MaskEVT);
if (Mask1[3].getOpcode() != ISD::UNDEF)
Mask1[3] = DAG.getConstant(cast<ConstantSDNode>(Mask1[3])->getValue()+4,
MaskEVT);
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &Mask1[0], 4));
}
Evan Cheng
committed
}
// Break it into (shuffle shuffle_hi, shuffle_lo).
Locs.clear();
SmallVector<SDValue,8> LoMask(4, DAG.getNode(ISD::UNDEF, MaskEVT));
SmallVector<SDValue,8> HiMask(4, DAG.getNode(ISD::UNDEF, MaskEVT));
SmallVector<SDValue,8> *MaskPtr = &LoMask;
Evan Cheng
committed
unsigned MaskIdx = 0;
unsigned LoIdx = 0;
unsigned HiIdx = 2;
for (unsigned i = 0; i != 4; ++i) {
if (i == 2) {
MaskPtr = &HiMask;
MaskIdx = 1;
LoIdx = 0;
HiIdx = 2;
}
Evan Cheng
committed
if (Elt.getOpcode() == ISD::UNDEF) {
Locs[i] = std::make_pair(-1, -1);
} else if (cast<ConstantSDNode>(Elt)->getValue() < 4) {
Locs[i] = std::make_pair(MaskIdx, LoIdx);
(*MaskPtr)[LoIdx] = Elt;
LoIdx++;
} else {
Locs[i] = std::make_pair(MaskIdx, HiIdx);
(*MaskPtr)[HiIdx] = Elt;
HiIdx++;
}
}
SDValue LoShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
Evan Cheng
committed
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&LoMask[0], LoMask.size()));
SDValue HiShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
Evan Cheng
committed
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&HiMask[0], HiMask.size()));
Evan Cheng
committed
for (unsigned i = 0; i != 4; ++i) {
if (Locs[i].first == -1) {
MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
} else {
unsigned Idx = Locs[i].first * 4 + Locs[i].second;
MaskOps.push_back(DAG.getConstant(Idx, MaskEVT));
}
}
return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle,
DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
&MaskOps[0], MaskOps.size()));
}
SDValue
X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
SDValue PermMask = Op.getOperand(2);
MVT VT = Op.getValueType();
unsigned NumElems = PermMask.getNumOperands();
bool isMMX = VT.getSizeInBits() == 64;
bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
Evan Cheng
committed
bool V1IsSplat = false;
bool V2IsSplat = false;
Gabor Greif
committed
if (isUndefShuffle(Op.getNode()))
return DAG.getNode(ISD::UNDEF, VT);
Gabor Greif
committed
if (isZeroShuffle(Op.getNode()))
Evan Cheng
committed
return getZeroVector(VT, Subtarget->hasSSE2(), DAG);
Gabor Greif
committed
if (isIdentityMask(PermMask.getNode()))
Evan Cheng
committed
return V1;
Gabor Greif
committed
else if (isIdentityMask(PermMask.getNode(), true))
Evan Cheng
committed
return V2;
Gabor Greif
committed
if (isSplatMask(PermMask.getNode())) {
Evan Cheng
committed
if (isMMX || NumElems < 4) return Op;
// Promote it to a v4{if}32 splat.
return PromoteSplat(Op, DAG, Subtarget->hasSSE2());
Evan Cheng
committed
// If the shuffle can be profitably rewritten as a narrower shuffle, then
// do it!
if (VT == MVT::v8i16 || VT == MVT::v16i8) {
SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask, DAG, *this);
Gabor Greif
committed
if (NewOp.getNode())
Evan Cheng
committed
return DAG.getNode(ISD::BIT_CONVERT, VT, LowerVECTOR_SHUFFLE(NewOp, DAG));
} else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
// FIXME: Figure out a cleaner way to do this.
// Try to make use of movq to zero out the top part.
Gabor Greif
committed
if (ISD::isBuildVectorAllZeros(V2.getNode())) {
SDValue NewOp = RewriteAsNarrowerShuffle(V1, V2, VT, PermMask,
Evan Cheng
committed
DAG, *this);
Gabor Greif
committed
if (NewOp.getNode()) {
SDValue NewV1 = NewOp.getOperand(0);
SDValue NewV2 = NewOp.getOperand(1);
SDValue NewMask = NewOp.getOperand(2);
Gabor Greif
committed
if (isCommutedMOVL(NewMask.getNode(), true, false)) {
Evan Cheng
committed
NewOp = CommuteVectorShuffle(NewOp, NewV1, NewV2, NewMask, DAG);
return getVZextMovL(VT, NewOp.getValueType(), NewV2, DAG, Subtarget);
Evan Cheng
committed
}
}
Gabor Greif
committed
} else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
SDValue NewOp= RewriteAsNarrowerShuffle(V1, V2, VT, PermMask,
Evan Cheng
committed
DAG, *this);
Gabor Greif
committed
if (NewOp.getNode() && X86::isMOVLMask(NewOp.getOperand(2).getNode()))
return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1),
Evan Cheng
committed
DAG, Subtarget);
Evan Cheng
committed
}
}
// Check if this can be converted into a logical shift.
bool isLeft = false;
unsigned ShAmt = 0;
bool isShift = isVectorShift(Op, PermMask, DAG, isLeft, ShVal, ShAmt);
if (isShift && ShVal.hasOneUse()) {
// If the shifted value has multiple uses, it may be cheaper to use
// v_set0 + movlhps or movhlps, etc.
MVT EVT = VT.getVectorElementType();
ShAmt *= EVT.getSizeInBits();
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this);
}
Gabor Greif
committed
if (X86::isMOVLMask(PermMask.getNode())) {
Evan Cheng
committed
if (V1IsUndef)
return V2;
Gabor Greif
committed
if (ISD::isBuildVectorAllZeros(V1.getNode()))
return getVZextMovL(VT, VT, V2, DAG, Subtarget);
if (!isMMX)
return Op;
Evan Cheng
committed
}
Gabor Greif
committed
if (!isMMX && (X86::isMOVSHDUPMask(PermMask.getNode()) ||
X86::isMOVSLDUPMask(PermMask.getNode()) ||
X86::isMOVHLPSMask(PermMask.getNode()) ||
X86::isMOVHPMask(PermMask.getNode()) ||
X86::isMOVLPMask(PermMask.getNode())))
Gabor Greif
committed
if (ShouldXformToMOVHLPS(PermMask.getNode()) ||
ShouldXformToMOVLP(V1.getNode(), V2.getNode(), PermMask.getNode()))
return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (isShift) {
// No better options. Use a vshl / vsrl.
MVT EVT = VT.getVectorElementType();
ShAmt *= EVT.getSizeInBits();
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this);
}
bool Commuted = false;
// FIXME: This should also accept a bitcast of a splat? Be careful, not
// 1,1,1,1 -> v8i16 though.
Gabor Greif
committed
V1IsSplat = isSplatVector(V1.getNode());
V2IsSplat = isSplatVector(V2.getNode());
// Canonicalize the splat or undef, if present, to be on the RHS.
if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
std::swap(V1IsSplat, V2IsSplat);
std::swap(V1IsUndef, V2IsUndef);
Commuted = true;
Evan Cheng
committed
// FIXME: Figure out a cleaner way to do this.
Gabor Greif
committed
if (isCommutedMOVL(PermMask.getNode(), V2IsSplat, V2IsUndef)) {
Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
if (V2IsSplat) {
// V2 is a splat, so the mask may be malformed. That is, it may point
// to any V2 element. The instruction selectior won't like this. Get
// a corrected mask and commute to form a proper MOVS{S|D}.
SDValue NewMask = getMOVLMask(NumElems, DAG);