X86ISelLowering.cpp

static bool isUndefOrEqual(int Val, int CmpVal) {
  if (Val < 0 || Val == CmpVal)
    return true;
  return false;
}

/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFD or PSHUFW.  That is, it doesn't reference
/// the second operand.
static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, EVT VT) {
  if (VT == MVT::v4f32 || VT == MVT::v4i32 )
    return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
  if (VT == MVT::v2f64 || VT == MVT::v2i64)
    return (Mask[0] < 2 && Mask[1] < 2);
  return false;
}

bool X86::isPSHUFDMask(ShuffleVectorSDNode *N) {
  SmallVector<int, 8> M;
  N->getMask(M);
  return ::isPSHUFDMask(M, N->getValueType(0));
}

/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFHW.
static bool isPSHUFHWMask(const SmallVectorImpl<int> &Mask, EVT VT) {
  if (VT != MVT::v8i16)
    return false;

  // Lower quadword copied in order or undef.
  for (int i = 0; i != 4; ++i)
    if (Mask[i] >= 0 && Mask[i] != i)
      return false;

  // Upper quadword shuffled.
  for (int i = 4; i != 8; ++i)
    if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7))
      return false;

  return true;
}

bool X86::isPSHUFHWMask(ShuffleVectorSDNode *N) {
  SmallVector<int, 8> M;
  N->getMask(M);
  return ::isPSHUFHWMask(M, N->getValueType(0));
}

/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFLW.
static bool isPSHUFLWMask(const SmallVectorImpl<int> &Mask, EVT VT) {
  if (VT != MVT::v8i16)
    return false;

  // Upper quadword copied in order.
  for (int i = 4; i != 8; ++i)
    if (Mask[i] >= 0 && Mask[i] != i)
      return false;

  // Lower quadword shuffled.
  for (int i = 0; i != 4; ++i)
    if (Mask[i] >= 4)
      return false;

  return true;
}

bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) {
  SmallVector<int, 8> M;
  N->getMask(M);
  return ::isPSHUFLWMask(M, N->getValueType(0));
}

/// isPALIGNRMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PALIGNR.
static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
                          bool hasSSSE3) {
  int i, e = VT.getVectorNumElements();
  if (VT.getSizeInBits() != 128 && VT.getSizeInBits() != 64)
    return false;

  // Do not handle v2i64 / v2f64 shuffles with palignr.
  if (e < 4 || !hasSSSE3)
    return false;

  for (i = 0; i != e; ++i)
    if (Mask[i] >= 0)
      break;

  // All undef, not a palignr.
  if (i == e)
    return false;

  // Make sure we're shifting in the right direction.
  if (Mask[i] <= i)
    return false;

  int s = Mask[i] - i;

  // Check the rest of the elements to see if they are consecutive.
  for (++i; i != e; ++i) {
    int m = Mask[i];
    if (m >= 0 && m != s+i)
      return false;
  }
  return true;
}

/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to SHUFP*.
static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
  int NumElems = VT.getVectorNumElements();
  if (NumElems != 2 && NumElems != 4)
    return false;

  int Half = NumElems / 2;
  for (int i = 0; i < Half; ++i)
    if (!isUndefOrInRange(Mask[i], 0, NumElems))
      return false;
  for (int i = Half; i < NumElems; ++i)
    if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
      return false;

  return true;
}

bool X86::isSHUFPMask(ShuffleVectorSDNode *N) {
  SmallVector<int, 8> M;
  N->getMask(M);
  return ::isSHUFPMask(M, N->getValueType(0));
}

/// isCommutedSHUFP - Returns true if the shuffle mask is exactly
/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
/// half elements to come from vector 1 (which would equal the dest.) and
/// the upper half to come from vector 2.
static bool isCommutedSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
  int NumElems = VT.getVectorNumElements();

  if (NumElems != 2 && NumElems != 4)
    return false;

  int Half = NumElems / 2;
  for (int i = 0; i < Half; ++i)
    if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
      return false;
  for (int i = Half; i < NumElems; ++i)
    if (!isUndefOrInRange(Mask[i], 0, NumElems))
      return false;
  return true;
}

static bool isCommutedSHUFP(ShuffleVectorSDNode *N) {
  SmallVector<int, 8> M;
  N->getMask(M);
  return isCommutedSHUFPMask(M, N->getValueType(0));
}

/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
  if (N->getValueType(0).getVectorNumElements() != 4)
    return false;

  // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
  return isUndefOrEqual(N->getMaskElt(0), 6) &&
         isUndefOrEqual(N->getMaskElt(1), 7) &&
         isUndefOrEqual(N->getMaskElt(2), 2) &&
         isUndefOrEqual(N->getMaskElt(3), 3);
}

/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
/// <2, 3, 2, 3>
bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
  unsigned NumElems = N->getValueType(0).getVectorNumElements();

  if (NumElems != 4)
    return false;

  return isUndefOrEqual(N->getMaskElt(0), 2) &&
  isUndefOrEqual(N->getMaskElt(1), 3) &&
  isUndefOrEqual(N->getMaskElt(2), 2) &&
  isUndefOrEqual(N->getMaskElt(3), 3);
}

/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
  unsigned NumElems = N->getValueType(0).getVectorNumElements();

  if (NumElems != 2 && NumElems != 4)
    return false;

  for (unsigned i = 0; i < NumElems/2; ++i)
    if (!isUndefOrEqual(N->getMaskElt(i), i + NumElems))
      return false;

  for (unsigned i = NumElems/2; i < NumElems; ++i)
    if (!isUndefOrEqual(N->getMaskElt(i), i))
      return false;

  return true;
}

/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVLHPS.
bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) {
  unsigned NumElems = N->getValueType(0).getVectorNumElements();

  if ((NumElems != 2 && NumElems != 4)
      || N->getValueType(0).getSizeInBits() > 128)
    return false;

  for (unsigned i = 0; i < NumElems/2; ++i)
    if (!isUndefOrEqual(N->getMaskElt(i), i))
      return false;

  for (unsigned i = 0; i < NumElems/2; ++i)
    if (!isUndefOrEqual(N->getMaskElt(i + NumElems/2), i + NumElems))
      return false;

  return true;
}

/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKL.
static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
                         bool V2IsSplat = false) {
  int NumElts = VT.getVectorNumElements();

  assert((VT.is128BitVector() || VT.is256BitVector()) &&
         "Unsupported vector type for unpckh");

  if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8)
    return false;

  // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
  // independently on 128-bit lanes.
  unsigned NumLanes = VT.getSizeInBits()/128;
  unsigned NumLaneElts = NumElts/NumLanes;

  unsigned Start = 0;
  unsigned End = NumLaneElts;
  for (unsigned s = 0; s < NumLanes; ++s) {
    for (unsigned i = Start, j = s * NumLaneElts;
         i != End;
         i += 2, ++j) {
      int BitI  = Mask[i];
      int BitI1 = Mask[i+1];
      if (!isUndefOrEqual(BitI, j))
        return false;
      if (V2IsSplat) {
        if (!isUndefOrEqual(BitI1, NumElts))
          return false;
      } else {
        if (!isUndefOrEqual(BitI1, j + NumElts))
          return false;
      }
    }
    // Process the next 128 bits.
    Start += NumLaneElts;
    End += NumLaneElts;
  }

  return true;
}

bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
  SmallVector<int, 8> M;
  N->getMask(M);
  return ::isUNPCKLMask(M, N->getValueType(0), V2IsSplat);
}

/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKH.
static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT,
                         bool V2IsSplat = false) {
  int NumElts = VT.getVectorNumElements();

  assert((VT.is128BitVector() || VT.is256BitVector()) &&
         "Unsupported vector type for unpckh");

  if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8)
    return false;

  // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
  // independently on 128-bit lanes.
  unsigned NumLanes = VT.getSizeInBits()/128;
  unsigned NumLaneElts = NumElts/NumLanes;

  unsigned Start = 0;
  unsigned End = NumLaneElts;
  for (unsigned l = 0; l != NumLanes; ++l) {
    for (unsigned i = Start, j = (l*NumLaneElts)+NumLaneElts/2;
                             i != End; i += 2, ++j) {
      int BitI  = Mask[i];
      int BitI1 = Mask[i+1];
      if (!isUndefOrEqual(BitI, j))
        return false;
      if (V2IsSplat) {
        if (isUndefOrEqual(BitI1, NumElts))
          return false;
      } else {
        if (!isUndefOrEqual(BitI1, j+NumElts))
          return false;
      }
    }
    // Process the next 128 bits.
    Start += NumLaneElts;
    End += NumLaneElts;
  }
  return true;
}

bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
  SmallVector<int, 8> M;
  N->getMask(M);
  return ::isUNPCKHMask(M, N->getValueType(0), V2IsSplat);
}

/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
/// <0, 0, 1, 1>
static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
  int NumElems = VT.getVectorNumElements();
  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
    return false;

  // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
  // independently on 128-bit lanes.
  unsigned NumLanes = VT.getSizeInBits() / 128;
  unsigned NumLaneElts = NumElems / NumLanes;

  for (unsigned s = 0; s < NumLanes; ++s) {
    for (unsigned i = s * NumLaneElts, j = s * NumLaneElts;
         i != NumLaneElts * (s + 1);
         i += 2, ++j) {
      int BitI  = Mask[i];
      int BitI1 = Mask[i+1];

      if (!isUndefOrEqual(BitI, j))
        return false;
      if (!isUndefOrEqual(BitI1, j))
        return false;
    }
  }

  return true;
}

bool X86::isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N) {
  SmallVector<int, 8> M;
  N->getMask(M);
  return ::isUNPCKL_v_undef_Mask(M, N->getValueType(0));
}

/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
/// <2, 2, 3, 3>
static bool isUNPCKH_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
  int NumElems = VT.getVectorNumElements();
  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
    return false;

  for (int i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) {
    int BitI  = Mask[i];
    int BitI1 = Mask[i+1];
    if (!isUndefOrEqual(BitI, j))
      return false;
    if (!isUndefOrEqual(BitI1, j))
      return false;
  }
  return true;
}

bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) {
  SmallVector<int, 8> M;
  N->getMask(M);
  return ::isUNPCKH_v_undef_Mask(M, N->getValueType(0));
}

/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSS,
/// MOVSD, and MOVD, i.e. setting the lowest element.
static bool isMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT) {
  if (VT.getVectorElementType().getSizeInBits() < 32)
    return false;

  int NumElts = VT.getVectorNumElements();

  if (!isUndefOrEqual(Mask[0], NumElts))
    return false;

  for (int i = 1; i < NumElts; ++i)
    if (!isUndefOrEqual(Mask[i], i))
      return false;

  return true;
}

bool X86::isMOVLMask(ShuffleVectorSDNode *N) {
  SmallVector<int, 8> M;
  N->getMask(M);
  return ::isMOVLMask(M, N->getValueType(0));
}

/// isVPERMILPDMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to VPERMILPD*.
/// Note that VPERMIL mask matching is different depending whether theunderlying
/// type is 32 or 64. In the VPERMILPS the high half of the mask should point
/// to the same elements of the low, but to the higher half of the source.
/// In VPERMILPD the two lanes could be shuffled independently of each other
/// with the same restriction that lanes can't be crossed.
static bool isVPERMILPDMask(const SmallVectorImpl<int> &Mask, EVT VT,
                            const X86Subtarget *Subtarget) {
  int NumElts = VT.getVectorNumElements();
  int NumLanes = VT.getSizeInBits()/128;

  if (!Subtarget->hasAVX())
    return false;

  // Match any permutation of 128-bit vector with 64-bit types
  if (NumLanes == 1 && NumElts != 2)
    return false;

  // Only match 256-bit with 32 types
  if (VT.getSizeInBits() == 256 && NumElts != 4)
    return false;

  // The mask on the high lane is independent of the low. Both can match
  // any element in inside its own lane, but can't cross.
  int LaneSize = NumElts/NumLanes;
  for (int l = 0; l < NumLanes; ++l)
    for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) {
      int LaneStart = l*LaneSize;
      if (!isUndefOrInRange(Mask[i], LaneStart, LaneStart+LaneSize))
        return false;
    }

  return true;
}

/// isVPERMILPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to VPERMILPS*.
/// Note that VPERMIL mask matching is different depending whether theunderlying
/// type is 32 or 64. In the VPERMILPS the high half of the mask should point
/// to the same elements of the low, but to the higher half of the source.
/// In VPERMILPD the two lanes could be shuffled independently of each other
/// with the same restriction that lanes can't be crossed.
static bool isVPERMILPSMask(const SmallVectorImpl<int> &Mask, EVT VT,
                            const X86Subtarget *Subtarget) {
  unsigned NumElts = VT.getVectorNumElements();
  unsigned NumLanes = VT.getSizeInBits()/128;

  if (!Subtarget->hasAVX())
    return false;

  // Match any permutation of 128-bit vector with 32-bit types
  if (NumLanes == 1 && NumElts != 4)
    return false;

  // Only match 256-bit with 32 types
  if (VT.getSizeInBits() == 256 && NumElts != 8)
    return false;

  // The mask on the high lane should be the same as the low. Actually,
  // they can differ if any of the corresponding index in a lane is undef
  // and the other stays in range.
  int LaneSize = NumElts/NumLanes;
  for (int i = 0; i < LaneSize; ++i) {
    int HighElt = i+LaneSize;
    if (Mask[i] < 0 && (isUndefOrInRange(Mask[HighElt], LaneSize, NumElts)))
      continue;
    if (Mask[HighElt] < 0 && (isUndefOrInRange(Mask[i], 0, LaneSize)))
      continue;
    if (Mask[HighElt]-Mask[i] != LaneSize)
      return false;
  }

  return true;
}

/// getShuffleVPERMILPSImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_MASK mask with VPERMILPS* instructions.
static unsigned getShuffleVPERMILPSImmediate(SDNode *N) {
  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
  EVT VT = SVOp->getValueType(0);

  int NumElts = VT.getVectorNumElements();
  int NumLanes = VT.getSizeInBits()/128;
  int LaneSize = NumElts/NumLanes;

  // Although the mask is equal for both lanes do it twice to get the cases
  // where a mask will match because the same mask element is undef on the
  // first half but valid on the second. This would get pathological cases
  // such as: shuffle <u, 0, 1, 2, 4, 4, 5, 6>, which is completely valid.
  unsigned Mask = 0;
  for (int l = 0; l < NumLanes; ++l) {
    for (int i = 0; i < LaneSize; ++i) {
      int MaskElt = SVOp->getMaskElt(i+(l*LaneSize));
      if (MaskElt < 0)
        continue;
      Mask |= MaskElt << (i*2);
    }
  }

  return Mask;
}

/// getShuffleVPERMILPDImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_MASK mask with VPERMILPD* instructions.
static unsigned getShuffleVPERMILPDImmediate(SDNode *N) {
  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
  EVT VT = SVOp->getValueType(0);

  int NumElts = VT.getVectorNumElements();
  int NumLanes = VT.getSizeInBits()/128;

  unsigned Mask = 0;
  int LaneSize = NumElts/NumLanes;
  for (int l = 0; l < NumLanes; ++l)
    for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) {
      int MaskElt = SVOp->getMaskElt(i);
      if (MaskElt < 0)
        continue;
      Mask |= (MaskElt-l*LaneSize) << i;
    }

  return Mask;
}

/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
/// of what x86 movss want. X86 movs requires the lowest  element to be lowest
/// element of vector 2 and the other elements to come from vector 1 in order.
static bool isCommutedMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT,
                               bool V2IsSplat = false, bool V2IsUndef = false) {
  int NumOps = VT.getVectorNumElements();
  if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
    return false;

  if (!isUndefOrEqual(Mask[0], 0))
    return false;

  for (int i = 1; i < NumOps; ++i)
    if (!(isUndefOrEqual(Mask[i], i+NumOps) ||
          (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) ||
          (V2IsSplat && isUndefOrEqual(Mask[i], NumOps))))
      return false;

  return true;
}

static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false,
                           bool V2IsUndef = false) {
  SmallVector<int, 8> M;
  N->getMask(M);
  return isCommutedMOVLMask(M, N->getValueType(0), V2IsSplat, V2IsUndef);
}

/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
/// Masks to match: <1, 1, 3, 3> or <1, 1, 3, 3, 5, 5, 7, 7>
bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N,
                         const X86Subtarget *Subtarget) {
  if (!Subtarget->hasSSE3() && !Subtarget->hasAVX())
    return false;

  // The second vector must be undef
  if (N->getOperand(1).getOpcode() != ISD::UNDEF)
    return false;

  EVT VT = N->getValueType(0);
  unsigned NumElems = VT.getVectorNumElements();

  if ((VT.getSizeInBits() == 128 && NumElems != 4) ||
      (VT.getSizeInBits() == 256 && NumElems != 8))
    return false;

  // "i+1" is the value the indexed mask element must have
  for (unsigned i = 0; i < NumElems; i += 2)
    if (!isUndefOrEqual(N->getMaskElt(i), i+1) ||
        !isUndefOrEqual(N->getMaskElt(i+1), i+1))
      return false;

  return true;
}

/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
/// Masks to match: <0, 0, 2, 2> or <0, 0, 2, 2, 4, 4, 6, 6>
bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N,
                         const X86Subtarget *Subtarget) {
  if (!Subtarget->hasSSE3() && !Subtarget->hasAVX())
    return false;

  // The second vector must be undef
  if (N->getOperand(1).getOpcode() != ISD::UNDEF)
    return false;

  EVT VT = N->getValueType(0);
  unsigned NumElems = VT.getVectorNumElements();

  if ((VT.getSizeInBits() == 128 && NumElems != 4) ||
      (VT.getSizeInBits() == 256 && NumElems != 8))
    return false;

  // "i" is the value the indexed mask element must have
  for (unsigned i = 0; i < NumElems; i += 2)
    if (!isUndefOrEqual(N->getMaskElt(i), i) ||
        !isUndefOrEqual(N->getMaskElt(i+1), i))
      return false;

  return true;
}

/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVDDUP.
bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) {
  int e = N->getValueType(0).getVectorNumElements() / 2;

  for (int i = 0; i < e; ++i)
    if (!isUndefOrEqual(N->getMaskElt(i), i))
      return false;
  for (int i = 0; i < e; ++i)
    if (!isUndefOrEqual(N->getMaskElt(e+i), i))
      return false;
  return true;
}

/// isVEXTRACTF128Index - Return true if the specified
/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
/// suitable for input to VEXTRACTF128.
bool X86::isVEXTRACTF128Index(SDNode *N) {
  if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
    return false;

  // The index should be aligned on a 128-bit boundary.
  uint64_t Index =
    cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();

  unsigned VL = N->getValueType(0).getVectorNumElements();
  unsigned VBits = N->getValueType(0).getSizeInBits();
  unsigned ElSize = VBits / VL;
  bool Result = (Index * ElSize) % 128 == 0;

  return Result;
}

/// isVINSERTF128Index - Return true if the specified INSERT_SUBVECTOR
/// operand specifies a subvector insert that is suitable for input to
/// VINSERTF128.
bool X86::isVINSERTF128Index(SDNode *N) {
  if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
    return false;

  // The index should be aligned on a 128-bit boundary.
  uint64_t Index =
    cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();

  unsigned VL = N->getValueType(0).getVectorNumElements();
  unsigned VBits = N->getValueType(0).getSizeInBits();
  unsigned ElSize = VBits / VL;
  bool Result = (Index * ElSize) % 128 == 0;

  return Result;
}

/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
  int NumOperands = SVOp->getValueType(0).getVectorNumElements();

  unsigned Shift = (NumOperands == 4) ? 2 : 1;
  unsigned Mask = 0;
  for (int i = 0; i < NumOperands; ++i) {
    int Val = SVOp->getMaskElt(NumOperands-i-1);
    if (Val < 0) Val = 0;
    if (Val >= NumOperands) Val -= NumOperands;
    Mask |= Val;
    if (i != NumOperands - 1)
      Mask <<= Shift;
  }
  return Mask;
}

/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction.
unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
  unsigned Mask = 0;
  // 8 nodes, but we only care about the last 4.
  for (unsigned i = 7; i >= 4; --i) {
    int Val = SVOp->getMaskElt(i);
    if (Val >= 0)
      Mask |= (Val - 4);
    if (i != 4)
      Mask <<= 2;
  }
  return Mask;
}

/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction.
unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
  unsigned Mask = 0;
  // 8 nodes, but we only care about the first 4.
  for (int i = 3; i >= 0; --i) {
    int Val = SVOp->getMaskElt(i);
    if (Val >= 0)
      Mask |= Val;
    if (i != 0)
      Mask <<= 2;
  }
  return Mask;
}

/// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
unsigned X86::getShufflePALIGNRImmediate(SDNode *N) {
  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
  EVT VVT = N->getValueType(0);
  unsigned EltSize = VVT.getVectorElementType().getSizeInBits() >> 3;
  int Val = 0;

  unsigned i, e;
  for (i = 0, e = VVT.getVectorNumElements(); i != e; ++i) {
    Val = SVOp->getMaskElt(i);
    if (Val >= 0)
      break;
  }
  assert(Val - i > 0 && "PALIGNR imm should be positive");
  return (Val - i) * EltSize;
}

/// getExtractVEXTRACTF128Immediate - Return the appropriate immediate
/// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF128
/// instructions.
unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) {
  if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
    llvm_unreachable("Illegal extract subvector for VEXTRACTF128");

  uint64_t Index =
    cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();

  EVT VecVT = N->getOperand(0).getValueType();
  EVT ElVT = VecVT.getVectorElementType();

  unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
  return Index / NumElemsPerChunk;
}

/// getInsertVINSERTF128Immediate - Return the appropriate immediate
/// to insert at the specified INSERT_SUBVECTOR index with VINSERTF128
/// instructions.
unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) {
  if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
    llvm_unreachable("Illegal insert subvector for VINSERTF128");

  uint64_t Index =
    cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();

  EVT VecVT = N->getValueType(0);
  EVT ElVT = VecVT.getVectorElementType();

  unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
  return Index / NumElemsPerChunk;
}

/// isZeroNode - Returns true if Elt is a constant zero or a floating point
/// constant +0.0.
bool X86::isZeroNode(SDValue Elt) {
  return ((isa<ConstantSDNode>(Elt) &&
           cast<ConstantSDNode>(Elt)->isNullValue()) ||
          (isa<ConstantFPSDNode>(Elt) &&
           cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
}

/// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in
/// their permute mask.
static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
                                    SelectionDAG &DAG) {
  EVT VT = SVOp->getValueType(0);
  unsigned NumElems = VT.getVectorNumElements();
  SmallVector<int, 8> MaskVec;

  for (unsigned i = 0; i != NumElems; ++i) {
    int idx = SVOp->getMaskElt(i);
    if (idx < 0)
      MaskVec.push_back(idx);
    else if (idx < (int)NumElems)
      MaskVec.push_back(idx + NumElems);
    else
      MaskVec.push_back(idx - NumElems);
  }
  return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(1),
                              SVOp->getOperand(0), &MaskVec[0]);
}

/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
/// the two vector operands have swapped position.
static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) {
  unsigned NumElems = VT.getVectorNumElements();
  for (unsigned i = 0; i != NumElems; ++i) {
    int idx = Mask[i];
    if (idx < 0)
      continue;
    else if (idx < (int)NumElems)
      Mask[i] = idx + NumElems;
    else
      Mask[i] = idx - NumElems;
  }
}

/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
/// match movhlps. The lower half elements should come from upper half of
/// V1 (and in order), and the upper half elements should come from the upper
/// half of V2 (and in order).
static bool ShouldXformToMOVHLPS(ShuffleVectorSDNode *Op) {
  if (Op->getValueType(0).getVectorNumElements() != 4)
    return false;
  for (unsigned i = 0, e = 2; i != e; ++i)
    if (!isUndefOrEqual(Op->getMaskElt(i), i+2))
      return false;
  for (unsigned i = 2; i != 4; ++i)
    if (!isUndefOrEqual(Op->getMaskElt(i), i+4))
      return false;
  return true;
}

/// isScalarLoadToVector - Returns true if the node is a scalar load that
/// is promoted to a vector. It also returns the LoadSDNode by reference if
/// required.
static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) {
  if (N->getOpcode() != ISD::SCALAR_TO_VECTOR)
    return false;
  N = N->getOperand(0).getNode();
  if (!ISD::isNON_EXTLoad(N))
    return false;
  if (LD)
    *LD = cast<LoadSDNode>(N);
  return true;
}

/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
/// match movlp{s|d}. The lower half elements should come from lower half of
/// V1 (and in order), and the upper half elements should come from the upper
/// half of V2 (and in order). And since V1 will become the source of the
/// MOVLP, it must be either a vector load or a scalar load to vector.
static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
                               ShuffleVectorSDNode *Op) {
  if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
    return false;
  // Is V2 is a vector load, don't do this transformation. We will try to use
  // load folding shufps op.
  if (ISD::isNON_EXTLoad(V2))
    return false;

  unsigned NumElems = Op->getValueType(0).getVectorNumElements();

  if (NumElems != 2 && NumElems != 4)
    return false;
  for (unsigned i = 0, e = NumElems/2; i != e; ++i)
    if (!isUndefOrEqual(Op->getMaskElt(i), i))
      return false;
  for (unsigned i = NumElems/2; i != NumElems; ++i)
    if (!isUndefOrEqual(Op->getMaskElt(i), i+NumElems))
      return false;
  return true;
}

/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
/// all the same.
static bool isSplatVector(SDNode *N) {
  if (N->getOpcode() != ISD::BUILD_VECTOR)
    return false;

  SDValue SplatValue = N->getOperand(0);
  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
    if (N->getOperand(i) != SplatValue)
      return false;
  return true;
}

/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
/// to an zero vector.
/// FIXME: move to dag combiner / method on ShuffleVectorSDNode
static bool isZeroShuffle(ShuffleVectorSDNode *N) {
  SDValue V1 = N->getOperand(0);
  SDValue V2 = N->getOperand(1);
  unsigned NumElems = N->getValueType(0).getVectorNumElements();
  for (unsigned i = 0; i != NumElems; ++i) {
    int Idx = N->getMaskElt(i);
    if (Idx >= (int)NumElems) {
      unsigned Opc = V2.getOpcode();
      if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode()))
        continue;
      if (Opc != ISD::BUILD_VECTOR ||
          !X86::isZeroNode(V2.getOperand(Idx-NumElems)))
        return false;
    } else if (Idx >= 0) {
      unsigned Opc = V1.getOpcode();
      if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode()))
        continue;
      if (Opc != ISD::BUILD_VECTOR ||
          !X86::isZeroNode(V1.getOperand(Idx)))
        return false;
    }
  }
  return true;
}

/// getZeroVector - Returns a vector of specified type with all zero elements.
///
static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
                             DebugLoc dl) {
  assert(VT.isVector() && "Expected a vector type");

  // Always build SSE zero vectors as <4 x i32> bitcasted
  // to their dest type. This ensures they get CSE'd.
  SDValue Vec;
  if (VT.getSizeInBits() == 128) {  // SSE
    if (HasSSE2) {  // SSE2
      SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
    } else { // SSE1
      SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
    }
  } else if (VT.getSizeInBits() == 256) { // AVX
    // 256-bit logic and arithmetic instructions in AVX are
    // all floating-point, no support for integer ops. Default
    // to emitting fp zeroed vectors then.
    SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
    SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8);
  }
  return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
}

/// getOnesVector - Returns a vector of specified type with all bits set.
/// Always build ones vectors as <4 x i32>. For 256-bit types, use two
/// <4 x i32> inserted in a <8 x i32> appropriately. Then bitcast to their
/// original type, ensuring they get CSE'd.
static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
  assert(VT.isVector() && "Expected a vector type");
  assert((VT.is128BitVector() || VT.is256BitVector())
         && "Expected a 128-bit or 256-bit vector type");

  SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
  SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
                            Cst, Cst, Cst, Cst);

  if (VT.is256BitVector()) {
    SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, MVT::v8i32),
                              Vec, DAG.getConstant(0, MVT::i32), DAG, dl);
    Vec = Insert128BitVector(InsV, Vec,
                  DAG.getConstant(4 /* NumElems/2 */, MVT::i32), DAG, dl);
  }

  return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
}

/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
/// that point to V2 points to its first element.
static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
  EVT VT = SVOp->getValueType(0);
  unsigned NumElems = VT.getVectorNumElements();

  bool Changed = false;
  SmallVector<int, 8> MaskVec;
  SVOp->getMask(MaskVec);

  for (unsigned i = 0; i != NumElems; ++i) {
    if (MaskVec[i] > (int)NumElems) {
      MaskVec[i] = NumElems;
      Changed = true;
    }
  }
  if (Changed)
    return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(0),
                                SVOp->getOperand(1), &MaskVec[0]);
  return SDValue(SVOp, 0);
}

/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
/// operation of specified width.
static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
                       SDValue V2) {
  unsigned NumElems = VT.getVectorNumElements();
  SmallVector<int, 8> Mask;
  Mask.push_back(NumElems);
  for (unsigned i = 1; i != NumElems; ++i)
    Mask.push_back(i);
  return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
}

/// getUnpackl - Returns a vector_shuffle node for an unpackl operation.