X86ISelLowering.cpp

  return false;
}

static bool EltsFromConsecutiveLoads(SDNode *N, SDValue PermMask,
                                     unsigned NumElems, MVT EVT,
                                     SDNode *&Base,
                                     SelectionDAG &DAG, MachineFrameInfo *MFI,
                                     const TargetLowering &TLI) {
  Base = NULL;
  for (unsigned i = 0; i < NumElems; ++i) {
    SDValue Idx = PermMask.getOperand(i);
    if (Idx.getOpcode() == ISD::UNDEF) {
      if (!Base)
        return false;
      continue;
    }

    SDValue Elt = DAG.getShuffleScalarElt(N, i);
    if (!Elt.getNode() ||
        (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
      return false;
    if (!Base) {
      Base = Elt.getNode();
      if (Base->getOpcode() == ISD::UNDEF)
        return false;
      continue;
    }
    if (Elt.getOpcode() == ISD::UNDEF)
      continue;

    if (!TLI.isConsecutiveLoad(Elt.getNode(), Base,
                               EVT.getSizeInBits()/8, i, MFI))
      return false;
  }
  return true;
}

/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
/// if the load addresses are consecutive, non-overlapping, and in the right
/// order.
static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
                                       const TargetLowering &TLI) {
  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
  DebugLoc dl = N->getDebugLoc();
  MVT VT = N->getValueType(0);
  MVT EVT = VT.getVectorElementType();
  SDValue PermMask = N->getOperand(2);
  unsigned NumElems = PermMask.getNumOperands();
  SDNode *Base = NULL;
  if (!EltsFromConsecutiveLoads(N, PermMask, NumElems, EVT, Base,
                                DAG, MFI, TLI))
    return SDValue();

  LoadSDNode *LD = cast<LoadSDNode>(Base);
  if (isBaseAlignmentOfN(16, Base->getOperand(1).getNode(), TLI))
    return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
                       LD->getSrcValue(), LD->getSrcValueOffset(),
                       LD->isVolatile());
  return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
                     LD->getSrcValue(), LD->getSrcValueOffset(),
                     LD->isVolatile(), LD->getAlignment());
}

/// PerformBuildVectorCombine - build_vector 0,(load i64 / f64) -> movq / movsd.
static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG,
                                         TargetLowering::DAGCombinerInfo &DCI,
                                         const X86Subtarget *Subtarget,
                                         const TargetLowering &TLI) {
  unsigned NumOps = N->getNumOperands();
  DebugLoc dl = N->getDebugLoc();

  // Ignore single operand BUILD_VECTOR.
  if (NumOps == 1)
    return SDValue();

  MVT VT = N->getValueType(0);
  MVT EVT = VT.getVectorElementType();
  if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit())
    // We are looking for load i64 and zero extend. We want to transform
    // it before legalizer has a chance to expand it. Also look for i64
    // BUILD_PAIR bit casted to f64.
    return SDValue();
  // This must be an insertion into a zero vector.
  SDValue HighElt = N->getOperand(1);
  if (!isZeroNode(HighElt))
    return SDValue();

  // Value must be a load.
  SDNode *Base = N->getOperand(0).getNode();
  if (!isa<LoadSDNode>(Base)) {
    if (Base->getOpcode() != ISD::BIT_CONVERT)
      return SDValue();
    Base = Base->getOperand(0).getNode();
    if (!isa<LoadSDNode>(Base))
      return SDValue();
  }

  // Transform it into VZEXT_LOAD addr.
  LoadSDNode *LD = cast<LoadSDNode>(Base);

  // Load must not be an extload.
  if (LD->getExtensionType() != ISD::NON_EXTLOAD)
    return SDValue();

  // Load type should legal type so we don't have to legalize it.
  if (!TLI.isTypeLegal(VT))
    return SDValue();

  SDVTList Tys = DAG.getVTList(VT, MVT::Other);
  SDValue Ops[] = { LD->getChain(), LD->getBasePtr() };
  SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
  TargetLowering::TargetLoweringOpt TLO(DAG);
  TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1));
  DCI.CommitTargetLoweringOpt(TLO);
  return ResNode;
}

/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
                                    const X86Subtarget *Subtarget) {
  DebugLoc DL = N->getDebugLoc();
  SDValue Cond = N->getOperand(0);
  // Get the LHS/RHS of the select.
  SDValue LHS = N->getOperand(1);
  SDValue RHS = N->getOperand(2);
  
  // If we have SSE[12] support, try to form min/max nodes.
  if (Subtarget->hasSSE2() &&
      (LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) &&
      Cond.getOpcode() == ISD::SETCC) {
    ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();

    unsigned Opcode = 0;
    if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
      switch (CC) {
      default: break;
      case ISD::SETOLE: // (X <= Y) ? X : Y -> min
      case ISD::SETULE:
      case ISD::SETLE:
        if (!UnsafeFPMath) break;
        // FALL THROUGH.
      case ISD::SETOLT:  // (X olt/lt Y) ? X : Y -> min
      case ISD::SETLT:
        Opcode = X86ISD::FMIN;
        break;

      case ISD::SETOGT: // (X > Y) ? X : Y -> max
      case ISD::SETUGT:
      case ISD::SETGT:
        if (!UnsafeFPMath) break;
        // FALL THROUGH.
      case ISD::SETUGE:  // (X uge/ge Y) ? X : Y -> max
      case ISD::SETGE:
        Opcode = X86ISD::FMAX;
        break;
      }
    } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
      switch (CC) {
      default: break;
      case ISD::SETOGT: // (X > Y) ? Y : X -> min
      case ISD::SETUGT:
      case ISD::SETGT:
        if (!UnsafeFPMath) break;
        // FALL THROUGH.
      case ISD::SETUGE:  // (X uge/ge Y) ? Y : X -> min
      case ISD::SETGE:
        Opcode = X86ISD::FMIN;
        break;

      case ISD::SETOLE:   // (X <= Y) ? Y : X -> max
      case ISD::SETULE:
      case ISD::SETLE:
        if (!UnsafeFPMath) break;
        // FALL THROUGH.
      case ISD::SETOLT:   // (X olt/lt Y) ? Y : X -> max
      case ISD::SETLT:
        Opcode = X86ISD::FMAX;
        break;
      }
    }

    if (Opcode)
      return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
  }
  
  return SDValue();
}

/// PerformShiftCombine - Transforms vector shift nodes to use vector shifts
///                       when possible.
static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
                                   const X86Subtarget *Subtarget) {
  // On X86 with SSE2 support, we can transform this to a vector shift if
  // all elements are shifted by the same amount.  We can't do this in legalize
  // because the a constant vector is typically transformed to a constant pool
  // so we have no knowledge of the shift amount.
  if (!Subtarget->hasSSE2())
    return SDValue();

  MVT VT = N->getValueType(0);
  if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
    return SDValue();

  SDValue ShAmtOp = N->getOperand(1);
  MVT EltVT = VT.getVectorElementType();
  DebugLoc DL = N->getDebugLoc();
  SDValue BaseShAmt;
  if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) {
    unsigned NumElts = VT.getVectorNumElements();
    unsigned i = 0;
    for (; i != NumElts; ++i) {
      SDValue Arg = ShAmtOp.getOperand(i);
      if (Arg.getOpcode() == ISD::UNDEF) continue;
      BaseShAmt = Arg;
      break;
    }
    for (; i != NumElts; ++i) {
      SDValue Arg = ShAmtOp.getOperand(i);
      if (Arg.getOpcode() == ISD::UNDEF) continue;
      if (Arg != BaseShAmt) {
        return SDValue();
      }
    }
  } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE &&
             isSplatMask(ShAmtOp.getOperand(2).getNode())) {
      BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp,
                              DAG.getIntPtrConstant(0));
  } else
    return SDValue();

  if (EltVT.bitsGT(MVT::i32))
    BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt);
  else if (EltVT.bitsLT(MVT::i32))
    BaseShAmt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, BaseShAmt);

  // The shift amount is identical so we can do a vector shift.
  SDValue  ValOp = N->getOperand(0);
  switch (N->getOpcode()) {
  default:
    assert(0 && "Unknown shift opcode!");
    break;
  case ISD::SHL:
    if (VT == MVT::v2i64)
      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
                         DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
                         ValOp, BaseShAmt);
    if (VT == MVT::v4i32)
      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
                         DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
                         ValOp, BaseShAmt);
    if (VT == MVT::v8i16)
      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
                         DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
                         ValOp, BaseShAmt);
    break;
  case ISD::SRA:
    if (VT == MVT::v4i32)
      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
                         DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32),
                         ValOp, BaseShAmt);
    if (VT == MVT::v8i16)
      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
                         DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32),
                         ValOp, BaseShAmt);
    break;
  case ISD::SRL:
    if (VT == MVT::v2i64)
      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
                         DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
                         ValOp, BaseShAmt);
    if (VT == MVT::v4i32)
      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
                         DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32),
                         ValOp, BaseShAmt);
    if (VT ==  MVT::v8i16)
      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
                         DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
                         ValOp, BaseShAmt);
    break;
  }
  return SDValue();
}

/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
                                     const X86Subtarget *Subtarget) {
  // Turn load->store of MMX types into GPR load/stores.  This avoids clobbering
  // the FP state in cases where an emms may be missing.
  // A preferable solution to the general problem is to figure out the right
  // places to insert EMMS.  This qualifies as a quick hack.
  StoreSDNode *St = cast<StoreSDNode>(N);
  if (St->getValue().getValueType().isVector() &&
      St->getValue().getValueType().getSizeInBits() == 64 &&
      isa<LoadSDNode>(St->getValue()) &&
      !cast<LoadSDNode>(St->getValue())->isVolatile() &&
      St->getChain().hasOneUse() && !St->isVolatile()) {
    SDNode* LdVal = St->getValue().getNode();
    LoadSDNode *Ld = 0;
    int TokenFactorIndex = -1;
    SmallVector<SDValue, 8> Ops;
    SDNode* ChainVal = St->getChain().getNode();
    // Must be a store of a load.  We currently handle two cases:  the load
    // is a direct child, and it's under an intervening TokenFactor.  It is
    // possible to dig deeper under nested TokenFactors.
    if (ChainVal == LdVal)
      Ld = cast<LoadSDNode>(St->getChain());
    else if (St->getValue().hasOneUse() &&
             ChainVal->getOpcode() == ISD::TokenFactor) {
      for (unsigned i=0, e = ChainVal->getNumOperands(); i != e; ++i) {
        if (ChainVal->getOperand(i).getNode() == LdVal) {
          TokenFactorIndex = i;
          Ld = cast<LoadSDNode>(St->getValue());
        } else
          Ops.push_back(ChainVal->getOperand(i));
      }
    }
    if (Ld) {
      DebugLoc DL = N->getDebugLoc();
      // If we are a 64-bit capable x86, lower to a single movq load/store pair.
      if (Subtarget->is64Bit()) {
        SDValue NewLd = DAG.getLoad(MVT::i64, DL, Ld->getChain(),
                                      Ld->getBasePtr(), Ld->getSrcValue(),
                                      Ld->getSrcValueOffset(), Ld->isVolatile(),
                                      Ld->getAlignment());
        SDValue NewChain = NewLd.getValue(1);
        if (TokenFactorIndex != -1) {
          Ops.push_back(NewChain);
          NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Ops[0],
                                 Ops.size());
        }
        return DAG.getStore(NewChain, DL, NewLd, St->getBasePtr(),
                            St->getSrcValue(), St->getSrcValueOffset(),
                            St->isVolatile(), St->getAlignment());
      }

      // Otherwise, lower to two 32-bit copies.
      SDValue LoAddr = Ld->getBasePtr();
      SDValue HiAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, LoAddr,
                                     DAG.getConstant(4, MVT::i32));

      SDValue LoLd = DAG.getLoad(MVT::i32, DL, Ld->getChain(), LoAddr,
                                   Ld->getSrcValue(), Ld->getSrcValueOffset(),
                                   Ld->isVolatile(), Ld->getAlignment());
      SDValue HiLd = DAG.getLoad(MVT::i32, DL, Ld->getChain(), HiAddr,
                                   Ld->getSrcValue(), Ld->getSrcValueOffset()+4,
                                   Ld->isVolatile(),
                                   MinAlign(Ld->getAlignment(), 4));

      SDValue NewChain = LoLd.getValue(1);
      if (TokenFactorIndex != -1) {
        Ops.push_back(LoLd);
        Ops.push_back(HiLd);
        NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Ops[0],
                               Ops.size());
      }

      LoAddr = St->getBasePtr();
      HiAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, LoAddr,
                           DAG.getConstant(4, MVT::i32));

      SDValue LoSt = DAG.getStore(NewChain, DL, LoLd, LoAddr,
                          St->getSrcValue(), St->getSrcValueOffset(),
                          St->isVolatile(), St->getAlignment());
      SDValue HiSt = DAG.getStore(NewChain, DL, HiLd, HiAddr,
                                    St->getSrcValue(),
                                    St->getSrcValueOffset() + 4,
                                    St->isVolatile(),
                                    MinAlign(St->getAlignment(), 4));
      return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, LoSt, HiSt);
    }
  }
  return SDValue();
}

/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and
/// X86ISD::FXOR nodes.
static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
  assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
  // F[X]OR(0.0, x) -> x
  // F[X]OR(x, 0.0) -> x
  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
    if (C->getValueAPF().isPosZero())
      return N->getOperand(1);
  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
    if (C->getValueAPF().isPosZero())
      return N->getOperand(0);
  return SDValue();
}

/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes.
static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
  // FAND(0.0, x) -> 0.0
  // FAND(x, 0.0) -> 0.0
  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
    if (C->getValueAPF().isPosZero())
      return N->getOperand(0);
  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
    if (C->getValueAPF().isPosZero())
      return N->getOperand(1);
  return SDValue();
}

static SDValue PerformBTCombine(SDNode *N,
                                SelectionDAG &DAG,
                                TargetLowering::DAGCombinerInfo &DCI) {
  // BT ignores high bits in the bit index operand.
  SDValue Op1 = N->getOperand(1);
  if (Op1.hasOneUse()) {
    unsigned BitWidth = Op1.getValueSizeInBits();
    APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth));
    APInt KnownZero, KnownOne;
    TargetLowering::TargetLoweringOpt TLO(DAG);
    TargetLowering &TLI = DAG.getTargetLoweringInfo();
    if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) ||
        TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO))
      DCI.CommitTargetLoweringOpt(TLO);
  }
  return SDValue();
}

SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
                                             DAGCombinerInfo &DCI) const {
  SelectionDAG &DAG = DCI.DAG;
  switch (N->getOpcode()) {
  default: break;
  case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
  case ISD::BUILD_VECTOR:
    return PerformBuildVectorCombine(N, DAG, DCI, Subtarget, *this);
  case ISD::SELECT:         return PerformSELECTCombine(N, DAG, Subtarget);
  case ISD::SHL:
  case ISD::SRA:
  case ISD::SRL:            return PerformShiftCombine(N, DAG, Subtarget);
  case ISD::STORE:          return PerformSTORECombine(N, DAG, Subtarget);
  case X86ISD::FXOR:
  case X86ISD::FOR:         return PerformFORCombine(N, DAG);
  case X86ISD::FAND:        return PerformFANDCombine(N, DAG);
  case X86ISD::BT:          return PerformBTCombine(N, DAG, DCI);
  }

  return SDValue();
}

//===----------------------------------------------------------------------===//
//                           X86 Inline Assembly Support
//===----------------------------------------------------------------------===//

/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
X86TargetLowering::ConstraintType
X86TargetLowering::getConstraintType(const std::string &Constraint) const {
  if (Constraint.size() == 1) {
    switch (Constraint[0]) {
    case 'A':
      return C_Register;
    case 'f':
    case 'r':
    case 'R':
    case 'l':
    case 'q':
    case 'Q':
    case 'x':
    case 'y':
    case 'Y':
      return C_RegisterClass;
    case 'e':
    case 'Z':
      return C_Other;
    default:
      break;
    }
  }
  return TargetLowering::getConstraintType(Constraint);
}

/// LowerXConstraint - try to replace an X constraint, which matches anything,
/// with another that has more specific requirements based on the type of the
/// corresponding operand.
const char *X86TargetLowering::
LowerXConstraint(MVT ConstraintVT) const {
  // FP X constraints get lowered to SSE1/2 registers if available, otherwise
  // 'f' like normal targets.
  if (ConstraintVT.isFloatingPoint()) {
    if (Subtarget->hasSSE2())
      return "Y";
    if (Subtarget->hasSSE1())
      return "x";
  }

  return TargetLowering::LowerXConstraint(ConstraintVT);
}

/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector.  If it is invalid, don't add anything to Ops.
void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
                                                     char Constraint,
                                                     bool hasMemory,
                                                     std::vector<SDValue>&Ops,
                                                     SelectionDAG &DAG) const {
  SDValue Result(0, 0);

  switch (Constraint) {
  default: break;
  case 'I':
    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
      if (C->getZExtValue() <= 31) {
        Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
        break;
      }
    }
    return;
  case 'J':
    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
      if (C->getZExtValue() <= 63) {
        Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
        break;
      }
    }
    return;
  case 'N':
    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
      if (C->getZExtValue() <= 255) {
        Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
        break;
      }
    }
    return;
  case 'e': {
    // 32-bit signed value
    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
      const ConstantInt *CI = C->getConstantIntValue();
      if (CI->isValueValidForType(Type::Int32Ty, C->getSExtValue())) {
        // Widen to 64 bits here to get it sign extended.
        Result = DAG.getTargetConstant(C->getSExtValue(), MVT::i64);
        break;
      }
    // FIXME gcc accepts some relocatable values here too, but only in certain
    // memory models; it's complicated.
    }
    return;
  }
  case 'Z': {
    // 32-bit unsigned value
    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
      const ConstantInt *CI = C->getConstantIntValue();
      if (CI->isValueValidForType(Type::Int32Ty, C->getZExtValue())) {
        Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
        break;
      }
    }
    // FIXME gcc accepts some relocatable values here too, but only in certain
    // memory models; it's complicated.
    return;
  }
  case 'i': {
    // Literal immediates are always ok.
    if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) {
      // Widen to 64 bits here to get it sign extended.
      Result = DAG.getTargetConstant(CST->getSExtValue(), MVT::i64);
      break;
    }

    // If we are in non-pic codegen mode, we allow the address of a global (with
    // an optional displacement) to be used with 'i'.
    GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
    int64_t Offset = 0;

    // Match either (GA) or (GA+C)
    if (GA) {
      Offset = GA->getOffset();
    } else if (Op.getOpcode() == ISD::ADD) {
      ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
      GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
      if (C && GA) {
        Offset = GA->getOffset()+C->getZExtValue();
      } else {
        C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
        GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
        if (C && GA)
          Offset = GA->getOffset()+C->getZExtValue();
        else
          C = 0, GA = 0;
      }
    }

    if (GA) {
      if (hasMemory)
        Op = LowerGlobalAddress(GA->getGlobal(), Op.getDebugLoc(),
                                Offset, DAG);
      else
        Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
                                        Offset);
      Result = Op;
      break;
    }

    // Otherwise, not valid for this mode.
    return;
  }
  }

  if (Result.getNode()) {
    Ops.push_back(Result);
    return;
  }
  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory,
                                                      Ops, DAG);
}

std::vector<unsigned> X86TargetLowering::
getRegClassForInlineAsmConstraint(const std::string &Constraint,
                                  MVT VT) const {
  if (Constraint.size() == 1) {
    // FIXME: not handling fp-stack yet!
    switch (Constraint[0]) {      // GCC X86 Constraint Letters
    default: break;  // Unknown constraint letter
    case 'q':   // Q_REGS (GENERAL_REGS in 64-bit mode)
    case 'Q':   // Q_REGS
      if (VT == MVT::i32)
        return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);
      else if (VT == MVT::i16)
        return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0);
      else if (VT == MVT::i8)
        return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0);
      else if (VT == MVT::i64)
        return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0);
      break;
    }
  }

  return std::vector<unsigned>();
}

std::pair<unsigned, const TargetRegisterClass*>
X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
                                                MVT VT) const {
  // First, see if this is a constraint that directly corresponds to an LLVM
  // register class.
  if (Constraint.size() == 1) {
    // GCC Constraint Letters
    switch (Constraint[0]) {
    default: break;
    case 'r':   // GENERAL_REGS
    case 'R':   // LEGACY_REGS
    case 'l':   // INDEX_REGS
      if (VT == MVT::i8)
        return std::make_pair(0U, X86::GR8RegisterClass);
      if (VT == MVT::i16)
        return std::make_pair(0U, X86::GR16RegisterClass);
      if (VT == MVT::i32 || !Subtarget->is64Bit())
        return std::make_pair(0U, X86::GR32RegisterClass);
      return std::make_pair(0U, X86::GR64RegisterClass);
    case 'f':  // FP Stack registers.
      // If SSE is enabled for this VT, use f80 to ensure the isel moves the
      // value to the correct fpstack register class.
      if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT))
        return std::make_pair(0U, X86::RFP32RegisterClass);
      if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT))
        return std::make_pair(0U, X86::RFP64RegisterClass);
      return std::make_pair(0U, X86::RFP80RegisterClass);
    case 'y':   // MMX_REGS if MMX allowed.
      if (!Subtarget->hasMMX()) break;
      return std::make_pair(0U, X86::VR64RegisterClass);
    case 'Y':   // SSE_REGS if SSE2 allowed
      if (!Subtarget->hasSSE2()) break;
      // FALL THROUGH.
    case 'x':   // SSE_REGS if SSE1 allowed
      if (!Subtarget->hasSSE1()) break;

      switch (VT.getSimpleVT()) {
      default: break;
      // Scalar SSE types.
      case MVT::f32:
      case MVT::i32:
        return std::make_pair(0U, X86::FR32RegisterClass);
      case MVT::f64:
      case MVT::i64:
        return std::make_pair(0U, X86::FR64RegisterClass);
      // Vector types.
      case MVT::v16i8:
      case MVT::v8i16:
      case MVT::v4i32:
      case MVT::v2i64:
      case MVT::v4f32:
      case MVT::v2f64:
        return std::make_pair(0U, X86::VR128RegisterClass);
      }
      break;
    }
  }

  // Use the default implementation in TargetLowering to convert the register
  // constraint into a member of a register class.
  std::pair<unsigned, const TargetRegisterClass*> Res;
  Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);

  // Not found as a standard register?
  if (Res.second == 0) {
    // GCC calls "st(0)" just plain "st".
    if (StringsEqualNoCase("{st}", Constraint)) {
      Res.first = X86::ST0;
      Res.second = X86::RFP80RegisterClass;
    }
    // 'A' means EAX + EDX.
    if (Constraint == "A") {
      Res.first = X86::EAX;
      Res.second = X86::GRADRegisterClass;
    }
    return Res;
  }

  // Otherwise, check to see if this is a register class of the wrong value
  // type.  For example, we want to map "{ax},i32" -> {eax}, we don't want it to
  // turn into {ax},{dx}.
  if (Res.second->hasType(VT))
    return Res;   // Correct type already, nothing to do.

  // All of the single-register GCC register classes map their values onto
  // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp".  If we
  // really want an 8-bit or 32-bit register, map to the appropriate register
  // class and return the appropriate register.
  if (Res.second == X86::GR16RegisterClass) {
    if (VT == MVT::i8) {
      unsigned DestReg = 0;
      switch (Res.first) {
      default: break;
      case X86::AX: DestReg = X86::AL; break;
      case X86::DX: DestReg = X86::DL; break;
      case X86::CX: DestReg = X86::CL; break;
      case X86::BX: DestReg = X86::BL; break;
      }
      if (DestReg) {
        Res.first = DestReg;
        Res.second = Res.second = X86::GR8RegisterClass;
      }
    } else if (VT == MVT::i32) {
      unsigned DestReg = 0;
      switch (Res.first) {
      default: break;
      case X86::AX: DestReg = X86::EAX; break;
      case X86::DX: DestReg = X86::EDX; break;
      case X86::CX: DestReg = X86::ECX; break;
      case X86::BX: DestReg = X86::EBX; break;
      case X86::SI: DestReg = X86::ESI; break;
      case X86::DI: DestReg = X86::EDI; break;
      case X86::BP: DestReg = X86::EBP; break;
      case X86::SP: DestReg = X86::ESP; break;
      }
      if (DestReg) {
        Res.first = DestReg;
        Res.second = Res.second = X86::GR32RegisterClass;
      }
    } else if (VT == MVT::i64) {
      unsigned DestReg = 0;
      switch (Res.first) {
      default: break;
      case X86::AX: DestReg = X86::RAX; break;
      case X86::DX: DestReg = X86::RDX; break;
      case X86::CX: DestReg = X86::RCX; break;
      case X86::BX: DestReg = X86::RBX; break;
      case X86::SI: DestReg = X86::RSI; break;
      case X86::DI: DestReg = X86::RDI; break;
      case X86::BP: DestReg = X86::RBP; break;
      case X86::SP: DestReg = X86::RSP; break;
      }
      if (DestReg) {
        Res.first = DestReg;
        Res.second = Res.second = X86::GR64RegisterClass;
      }
    }
  } else if (Res.second == X86::FR32RegisterClass ||
             Res.second == X86::FR64RegisterClass ||
             Res.second == X86::VR128RegisterClass) {
    // Handle references to XMM physical registers that got mapped into the
    // wrong class.  This can happen with constraints like {xmm0} where the
    // target independent register mapper will just pick the first match it can
    // find, ignoring the required type.
    if (VT == MVT::f32)
      Res.second = X86::FR32RegisterClass;
    else if (VT == MVT::f64)
      Res.second = X86::FR64RegisterClass;
    else if (X86::VR128RegisterClass->hasType(VT))
      Res.second = X86::VR128RegisterClass;
  }

  return Res;
}

//===----------------------------------------------------------------------===//
//                           X86 Widen vector type
//===----------------------------------------------------------------------===//

/// getWidenVectorType: given a vector type, returns the type to widen
/// to (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself.
/// If there is no vector type that we want to widen to, returns MVT::Other
/// When and where to widen is target dependent based on the cost of
/// scalarizing vs using the wider vector type.

MVT X86TargetLowering::getWidenVectorType(MVT VT) const {
  assert(VT.isVector());
  if (isTypeLegal(VT))
    return VT;

  // TODO: In computeRegisterProperty, we can compute the list of legal vector
  //       type based on element type.  This would speed up our search (though
  //       it may not be worth it since the size of the list is relatively
  //       small).
  MVT EltVT = VT.getVectorElementType();
  unsigned NElts = VT.getVectorNumElements();

  // On X86, it make sense to widen any vector wider than 1
  if (NElts <= 1)
    return MVT::Other;

  for (unsigned nVT = MVT::FIRST_VECTOR_VALUETYPE;
       nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
    MVT SVT = (MVT::SimpleValueType)nVT;

    if (isTypeLegal(SVT) &&
        SVT.getVectorElementType() == EltVT &&
        SVT.getVectorNumElements() > NElts)
      return SVT;
  }
  return MVT::Other;
}