X86ISelLowering.cpp

    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
    if (Idx == 0)
      return Op;

    // UNPCKHPD the element to the lowest double word, then movsd.
    // Note if the lower 64 bits of the result of the UNPCKHPD is then stored
    // to a f64mem, the whole operation is folded into a single MOVHPDmr.
    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
    SmallVector<SDOperand, 8> IdxVec;
    IdxVec.push_back(DAG.getConstant(1, MVT::getVectorElementType(MaskVT)));
    IdxVec.
      push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorElementType(MaskVT)));
    SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
                                 &IdxVec[0], IdxVec.size());
    SDOperand Vec = Op.getOperand(0);
    Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
                      Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
                       DAG.getConstant(0, getPointerTy()));
  }

  return SDOperand();
}

SDOperand
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
  MVT::ValueType VT = Op.getValueType();
  MVT::ValueType EVT = MVT::getVectorElementType(VT);
  if (EVT == MVT::i8)
    return SDOperand();

  SDOperand N0 = Op.getOperand(0);
  SDOperand N1 = Op.getOperand(1);
  SDOperand N2 = Op.getOperand(2);

  if (MVT::getSizeInBits(EVT) == 16) {
    // Transform it so it match pinsrw which expects a 16-bit value in a GR32
    // as its second argument.
    if (N1.getValueType() != MVT::i32)
      N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
    if (N2.getValueType() != MVT::i32)
      N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(),getPointerTy());
    return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
  }

  N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
  unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
  MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
  SmallVector<SDOperand, 4> MaskVec;
  for (unsigned i = 0; i < 4; ++i)
    MaskVec.push_back(DAG.getConstant((i == Idx) ? i+4 : i, MaskEVT));
  return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
                     DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
                                 &MaskVec[0], MaskVec.size()));
}

SDOperand
X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
  SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
  return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt);
}

// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
// one of the above mentioned nodes. It has to be wrapped because otherwise
// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
// be used to form addressing mode. These wrapped nodes will be selected
// into MOV32ri.
SDOperand
X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
  SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(),
                                               getPointerTy(),
                                               CP->getAlignment());
  Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
  // With PIC, the address is actually $g + Offset.
  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
      !Subtarget->isPICStyleRIPRel()) {
    Result = DAG.getNode(ISD::ADD, getPointerTy(),
                         DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
                         Result);
  }

  return Result;
}

SDOperand
X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
  SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy());
  Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
  // With PIC, the address is actually $g + Offset.
  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
      !Subtarget->isPICStyleRIPRel()) {
    Result = DAG.getNode(ISD::ADD, getPointerTy(),
                         DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
                         Result);
  }
  
  // For Darwin & Mingw32, external and weak symbols are indirect, so we want to
  // load the value at address GV, not the value of GV itself. This means that
  // the GlobalAddress must be in the base or index register of the address, not
  // the GV offset field. Platform check is inside GVRequiresExtraLoad() call
  // The same applies for external symbols during PIC codegen
  if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false))
    Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0);

  return Result;
}

// Lower ISD::GlobalTLSAddress using the "general dynamic" model
static SDOperand
LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
                              const MVT::ValueType PtrVT) {
  SDOperand InFlag;
  SDOperand Chain = DAG.getCopyToReg(DAG.getEntryNode(), X86::EBX,
                                     DAG.getNode(X86ISD::GlobalBaseReg,
                                                 PtrVT), InFlag);
  InFlag = Chain.getValue(1);

  // emit leal symbol@TLSGD(,%ebx,1), %eax
  SDVTList NodeTys = DAG.getVTList(PtrVT, MVT::Other, MVT::Flag);
  SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
                                             GA->getValueType(0),
                                             GA->getOffset());
  SDOperand Ops[] = { Chain,  TGA, InFlag };
  SDOperand Result = DAG.getNode(X86ISD::TLSADDR, NodeTys, Ops, 3);
  InFlag = Result.getValue(2);
  Chain = Result.getValue(1);

  // call ___tls_get_addr. This function receives its argument in
  // the register EAX.
  Chain = DAG.getCopyToReg(Chain, X86::EAX, Result, InFlag);
  InFlag = Chain.getValue(1);

  NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
  SDOperand Ops1[] = { Chain,
                      DAG.getTargetExternalSymbol("___tls_get_addr",
                                                  PtrVT),
                      DAG.getRegister(X86::EAX, PtrVT),
                      DAG.getRegister(X86::EBX, PtrVT),
                      InFlag };
  Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops1, 5);
  InFlag = Chain.getValue(1);

  return DAG.getCopyFromReg(Chain, X86::EAX, PtrVT, InFlag);
}

// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or
// "local exec" model.
static SDOperand
LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
                         const MVT::ValueType PtrVT) {
  // Get the Thread Pointer
  SDOperand ThreadPointer = DAG.getNode(X86ISD::THREAD_POINTER, PtrVT);
  // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
  // exec)
  SDOperand TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
                                             GA->getValueType(0),
                                             GA->getOffset());
  SDOperand Offset = DAG.getNode(X86ISD::Wrapper, PtrVT, TGA);

  if (GA->getGlobal()->isDeclaration()) // initial exec TLS model
    Offset = DAG.getLoad(PtrVT, DAG.getEntryNode(), Offset, NULL, 0);

  // The address of the thread local variable is the add of the thread
  // pointer with the offset of the variable.
  return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset);
}

SDOperand
X86TargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) {
  // TODO: implement the "local dynamic" model
  // TODO: implement the "initial exec"model for pic executables
  assert(!Subtarget->is64Bit() && Subtarget->isTargetELF() &&
         "TLS not implemented for non-ELF and 64-bit targets");
  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
  // If the relocation model is PIC, use the "General Dynamic" TLS Model,
  // otherwise use the "Local Exec"TLS Model
  if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
    return LowerToTLSGeneralDynamicModel(GA, DAG, getPointerTy());
  else
    return LowerToTLSExecModel(GA, DAG, getPointerTy());
}

SDOperand
X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) {
  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
  SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
  Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
  // With PIC, the address is actually $g + Offset.
  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
      !Subtarget->isPICStyleRIPRel()) {
    Result = DAG.getNode(ISD::ADD, getPointerTy(),
                         DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
                         Result);
  }

  return Result;
}

SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
  SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
  Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
  // With PIC, the address is actually $g + Offset.
  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
      !Subtarget->isPICStyleRIPRel()) {
    Result = DAG.getNode(ISD::ADD, getPointerTy(),
                         DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
                         Result);
  }

  return Result;
}

/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and
/// take a 2 x i32 value to shift plus a shift amount. 
SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
  assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
         "Not an i64 shift!");
  bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
  SDOperand ShOpLo = Op.getOperand(0);
  SDOperand ShOpHi = Op.getOperand(1);
  SDOperand ShAmt  = Op.getOperand(2);
  SDOperand Tmp1 = isSRA ?
    DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) :
    DAG.getConstant(0, MVT::i32);

  SDOperand Tmp2, Tmp3;
  if (Op.getOpcode() == ISD::SHL_PARTS) {
    Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
    Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
  } else {
    Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
    Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
  }

  const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
  SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
                                  DAG.getConstant(32, MVT::i8));
  SDOperand Cond = DAG.getNode(X86ISD::CMP, MVT::i32,
                               AndNode, DAG.getConstant(0, MVT::i8));

  SDOperand Hi, Lo;
  SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8);
  VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag);
  SmallVector<SDOperand, 4> Ops;
  if (Op.getOpcode() == ISD::SHL_PARTS) {
    Ops.push_back(Tmp2);
    Ops.push_back(Tmp3);
    Ops.push_back(CC);
    Ops.push_back(Cond);
    Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());

    Ops.clear();
    Ops.push_back(Tmp3);
    Ops.push_back(Tmp1);
    Ops.push_back(CC);
    Ops.push_back(Cond);
    Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
  } else {
    Ops.push_back(Tmp2);
    Ops.push_back(Tmp3);
    Ops.push_back(CC);
    Ops.push_back(Cond);
    Lo = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());

    Ops.clear();
    Ops.push_back(Tmp3);
    Ops.push_back(Tmp1);
    Ops.push_back(CC);
    Ops.push_back(Cond);
    Hi = DAG.getNode(X86ISD::CMOV, MVT::i32, &Ops[0], Ops.size());
  }

  VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32);
  Ops.clear();
  Ops.push_back(Lo);
  Ops.push_back(Hi);
  return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
}

SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
  assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
         Op.getOperand(0).getValueType() >= MVT::i16 &&
         "Unknown SINT_TO_FP to lower!");

  SDOperand Result;
  MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
  unsigned Size = MVT::getSizeInBits(SrcVT)/8;
  MachineFunction &MF = DAG.getMachineFunction();
  int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
  SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
  SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0),
                                 StackSlot, NULL, 0);

  // These are really Legal; caller falls through into that case.
  if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f32 && X86ScalarSSEf32)
    return Result;
  if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f64 && X86ScalarSSEf64)
    return Result;
  if (SrcVT==MVT::i64 && Op.getValueType() != MVT::f80 && 
      Subtarget->is64Bit())
    return Result;

  // Build the FILD
  SDVTList Tys;
  bool useSSE = (X86ScalarSSEf32 && Op.getValueType() == MVT::f32) ||
                (X86ScalarSSEf64 && Op.getValueType() == MVT::f64);
  if (useSSE)
    Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
  else
    Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
  SmallVector<SDOperand, 8> Ops;
  Ops.push_back(Chain);
  Ops.push_back(StackSlot);
  Ops.push_back(DAG.getValueType(SrcVT));
  Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG :X86ISD::FILD,
                       Tys, &Ops[0], Ops.size());

  if (useSSE) {
    Chain = Result.getValue(1);
    SDOperand InFlag = Result.getValue(2);

    // FIXME: Currently the FST is flagged to the FILD_FLAG. This
    // shouldn't be necessary except that RFP cannot be live across
    // multiple blocks. When stackifier is fixed, they can be uncoupled.
    MachineFunction &MF = DAG.getMachineFunction();
    int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
    SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
    Tys = DAG.getVTList(MVT::Other);
    SmallVector<SDOperand, 8> Ops;
    Ops.push_back(Chain);
    Ops.push_back(Result);
    Ops.push_back(StackSlot);
    Ops.push_back(DAG.getValueType(Op.getValueType()));
    Ops.push_back(InFlag);
    Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
    Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0);
  }

  return Result;
}

std::pair<SDOperand,SDOperand> X86TargetLowering::
FP_TO_SINTHelper(SDOperand Op, SelectionDAG &DAG) {
  assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
         "Unknown FP_TO_SINT to lower!");

  // These are really Legal.
  if (Op.getValueType() == MVT::i32 && 
      X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32)
    return std::make_pair(SDOperand(), SDOperand());
  if (Op.getValueType() == MVT::i32 && 
      X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64)
    return std::make_pair(SDOperand(), SDOperand());
  if (Subtarget->is64Bit() &&
      Op.getValueType() == MVT::i64 &&
      Op.getOperand(0).getValueType() != MVT::f80)
    return std::make_pair(SDOperand(), SDOperand());

  // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
  // stack slot.
  MachineFunction &MF = DAG.getMachineFunction();
  unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
  int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
  SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
  unsigned Opc;
  switch (Op.getValueType()) {
  default: assert(0 && "Invalid FP_TO_SINT to lower!");
  case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
  case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
  case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
  }

  SDOperand Chain = DAG.getEntryNode();
  SDOperand Value = Op.getOperand(0);
  if ((X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32) ||
      (X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64)) {
    assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
    Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0);
    SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
    SDOperand Ops[] = {
      Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
    };
    Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
    Chain = Value.getValue(1);
    SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
    StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
  }

  // Build the FP_TO_INT*_IN_MEM
  SDOperand Ops[] = { Chain, Value, StackSlot };
  SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3);

  return std::make_pair(FIST, StackSlot);
}

SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
  std::pair<SDOperand,SDOperand> Vals = FP_TO_SINTHelper(Op, DAG);
  SDOperand FIST = Vals.first, StackSlot = Vals.second;
  if (FIST.Val == 0) return SDOperand();
  
  // Load the result.
  return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0);
}

SDNode *X86TargetLowering::ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG) {
  std::pair<SDOperand,SDOperand> Vals = FP_TO_SINTHelper(SDOperand(N, 0), DAG);
  SDOperand FIST = Vals.first, StackSlot = Vals.second;
  if (FIST.Val == 0) return 0;
  
  // Return an i64 load from the stack slot.
  SDOperand Res = DAG.getLoad(MVT::i64, FIST, StackSlot, NULL, 0);

  // Use a MERGE_VALUES node to drop the chain result value.
  return DAG.getNode(ISD::MERGE_VALUES, MVT::i64, Res).Val;
}  

SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
  MVT::ValueType VT = Op.getValueType();
  MVT::ValueType EltVT = VT;
  if (MVT::isVector(VT))
    EltVT = MVT::getVectorElementType(VT);
  const Type *OpNTy =  MVT::getTypeForValueType(EltVT);
  std::vector<Constant*> CV;
  if (EltVT == MVT::f64) {
    Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, ~(1ULL << 63))));
    CV.push_back(C);
    CV.push_back(C);
  } else {
    Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, ~(1U << 31))));
    CV.push_back(C);
    CV.push_back(C);
    CV.push_back(C);
    CV.push_back(C);
  }
  Constant *C = ConstantVector::get(CV);
  SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
  SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
                               false, 16);
  return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
}

SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) {
  MVT::ValueType VT = Op.getValueType();
  MVT::ValueType EltVT = VT;
  unsigned EltNum = 1;
  if (MVT::isVector(VT)) {
    EltVT = MVT::getVectorElementType(VT);
    EltNum = MVT::getVectorNumElements(VT);
  }
  const Type *OpNTy =  MVT::getTypeForValueType(EltVT);
  std::vector<Constant*> CV;
  if (EltVT == MVT::f64) {
    Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, 1ULL << 63)));
    CV.push_back(C);
    CV.push_back(C);
  } else {
    Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, 1U << 31)));
    CV.push_back(C);
    CV.push_back(C);
    CV.push_back(C);
    CV.push_back(C);
  }
  Constant *C = ConstantVector::get(CV);
  SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
  SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
                               false, 16);
  if (MVT::isVector(VT)) {
    return DAG.getNode(ISD::BIT_CONVERT, VT,
                       DAG.getNode(ISD::XOR, MVT::v2i64,
                    DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)),
                    DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask)));
  } else {
    return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
  }
}

SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
  SDOperand Op0 = Op.getOperand(0);
  SDOperand Op1 = Op.getOperand(1);
  MVT::ValueType VT = Op.getValueType();
  MVT::ValueType SrcVT = Op1.getValueType();
  const Type *SrcTy =  MVT::getTypeForValueType(SrcVT);

  // If second operand is smaller, extend it first.
  if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) {
    Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1);
    SrcVT = VT;
    SrcTy = MVT::getTypeForValueType(SrcVT);
  }
  // And if it is bigger, shrink it first.
  if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) {
    Op1 = DAG.getNode(ISD::FP_ROUND, VT, Op1);
    SrcVT = VT;
    SrcTy = MVT::getTypeForValueType(SrcVT);
  }

  // At this point the operands and the result should have the same
  // type, and that won't be f80 since that is not custom lowered.

  // First get the sign bit of second operand.
  std::vector<Constant*> CV;
  if (SrcVT == MVT::f64) {
    CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 1ULL << 63))));
    CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0))));
  } else {
    CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 1U << 31))));
    CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
    CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
    CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
  }
  Constant *C = ConstantVector::get(CV);
  SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
  SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, NULL, 0,
                                false, 16);
  SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1);

  // Shift sign bit right or left if the two operands have different types.
  if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) {
    // Op0 is MVT::f32, Op1 is MVT::f64.
    SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit);
    SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit,
                          DAG.getConstant(32, MVT::i32));
    SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit);
    SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit,
                          DAG.getConstant(0, getPointerTy()));
  }

  // Clear first operand sign bit.
  CV.clear();
  if (VT == MVT::f64) {
    CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, ~(1ULL << 63)))));
    CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0))));
  } else {
    CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, ~(1U << 31)))));
    CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
    CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
    CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
  }
  C = ConstantVector::get(CV);
  CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
  SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
                                false, 16);
  SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2);

  // Or the value with the sign bit.
  return DAG.getNode(X86ISD::FOR, VT, Val, SignBit);
}

SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) {
  assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
  SDOperand Cond;
  SDOperand Op0 = Op.getOperand(0);
  SDOperand Op1 = Op.getOperand(1);
  SDOperand CC = Op.getOperand(2);
  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
  bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType());
  unsigned X86CC;

  if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC,
                     Op0, Op1, DAG)) {
    Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1);
    return DAG.getNode(X86ISD::SETCC, MVT::i8,
                       DAG.getConstant(X86CC, MVT::i8), Cond);
  }

  assert(isFP && "Illegal integer SetCC!");

  Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1);
  switch (SetCCOpcode) {
  default: assert(false && "Illegal floating point SetCC!");
  case ISD::SETOEQ: {  // !PF & ZF
    SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8,
                                 DAG.getConstant(X86::COND_NP, MVT::i8), Cond);
    SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
                                 DAG.getConstant(X86::COND_E, MVT::i8), Cond);
    return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2);
  }
  case ISD::SETUNE: {  // PF | !ZF
    SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8,
                                 DAG.getConstant(X86::COND_P, MVT::i8), Cond);
    SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
                                 DAG.getConstant(X86::COND_NE, MVT::i8), Cond);
    return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2);
  }
  }
}


SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) {
  bool addTest = true;
  SDOperand Cond  = Op.getOperand(0);
  SDOperand CC;

  if (Cond.getOpcode() == ISD::SETCC)
    Cond = LowerSETCC(Cond, DAG);

  // If condition flag is set by a X86ISD::CMP, then use it as the condition
  // setting operand in place of the X86ISD::SETCC.
  if (Cond.getOpcode() == X86ISD::SETCC) {
    CC = Cond.getOperand(0);

    SDOperand Cmp = Cond.getOperand(1);
    unsigned Opc = Cmp.getOpcode();
    MVT::ValueType VT = Op.getValueType();
    bool IllegalFPCMov = false;
    if (VT == MVT::f32 && !X86ScalarSSEf32)
      IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
    else if (VT == MVT::f64 && !X86ScalarSSEf64)
      IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
    else if (VT == MVT::f80)
      IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
    if ((Opc == X86ISD::CMP ||
         Opc == X86ISD::COMI ||
         Opc == X86ISD::UCOMI) && !IllegalFPCMov) {
      Cond = Cmp;
      addTest = false;
    }
  }

  if (addTest) {
    CC = DAG.getConstant(X86::COND_NE, MVT::i8);
    Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8));
  }

  const MVT::ValueType *VTs = DAG.getNodeValueTypes(Op.getValueType(),
                                                    MVT::Flag);
  SmallVector<SDOperand, 4> Ops;
  // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
  // condition is true.
  Ops.push_back(Op.getOperand(2));
  Ops.push_back(Op.getOperand(1));
  Ops.push_back(CC);
  Ops.push_back(Cond);
  return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
}

SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) {
  bool addTest = true;
  SDOperand Chain = Op.getOperand(0);
  SDOperand Cond  = Op.getOperand(1);
  SDOperand Dest  = Op.getOperand(2);
  SDOperand CC;

  if (Cond.getOpcode() == ISD::SETCC)
    Cond = LowerSETCC(Cond, DAG);

  // If condition flag is set by a X86ISD::CMP, then use it as the condition
  // setting operand in place of the X86ISD::SETCC.
  if (Cond.getOpcode() == X86ISD::SETCC) {
    CC = Cond.getOperand(0);

    SDOperand Cmp = Cond.getOperand(1);
    unsigned Opc = Cmp.getOpcode();
    if (Opc == X86ISD::CMP ||
        Opc == X86ISD::COMI ||
        Opc == X86ISD::UCOMI) {
      Cond = Cmp;
      addTest = false;
    }
  }

  if (addTest) {
    CC = DAG.getConstant(X86::COND_NE, MVT::i8);
    Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8));
  }
  return DAG.getNode(X86ISD::BRCOND, Op.getValueType(),
                     Chain, Op.getOperand(2), CC, Cond);
}

SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
  unsigned CallingConv = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
  bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;

   if (Subtarget->is64Bit())
     if(CallingConv==CallingConv::Fast && isTailCall && PerformTailCallOpt)
       return LowerX86_TailCallTo(Op, DAG, CallingConv);
     else
       return LowerX86_64CCCCallTo(Op, DAG, CallingConv);
  else
    switch (CallingConv) {
    default:
      assert(0 && "Unsupported calling convention");
    case CallingConv::Fast:
      if (isTailCall && PerformTailCallOpt)
        return LowerX86_TailCallTo(Op, DAG, CallingConv);
      else
        return LowerCCCCallTo(Op,DAG, CallingConv);
    case CallingConv::C:
    case CallingConv::X86_StdCall:
      return LowerCCCCallTo(Op, DAG, CallingConv);
    case CallingConv::X86_FastCall:
      return LowerFastCCCallTo(Op, DAG, CallingConv);
    }
}


// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
// Calls to _alloca is needed to probe the stack when allocating more than 4k
// bytes in one go. Touching the stack at 4K increments is necessary to ensure
// that the guard pages used by the OS virtual memory manager are allocated in
// correct sequence.
SDOperand
X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op,
                                           SelectionDAG &DAG) {
  assert(Subtarget->isTargetCygMing() &&
         "This should be used only on Cygwin/Mingw targets");
  
  // Get the inputs.
  SDOperand Chain = Op.getOperand(0);
  SDOperand Size  = Op.getOperand(1);
  // FIXME: Ensure alignment here

  SDOperand Flag;
  
  MVT::ValueType IntPtr = getPointerTy();
  MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32);

  Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag);
  Flag = Chain.getValue(1);

  SDVTList  NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
  SDOperand Ops[] = { Chain,
                      DAG.getTargetExternalSymbol("_alloca", IntPtr),
                      DAG.getRegister(X86::EAX, IntPtr),
                      Flag };
  Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 4);
  Flag = Chain.getValue(1);

  Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1);
  
  std::vector<MVT::ValueType> Tys;
  Tys.push_back(SPTy);
  Tys.push_back(MVT::Other);
  SDOperand Ops1[2] = { Chain.getValue(0), Chain };
  return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2);
}

SDOperand
X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
  MachineFunction &MF = DAG.getMachineFunction();
  const Function* Fn = MF.getFunction();
  if (Fn->hasExternalLinkage() &&
      Subtarget->isTargetCygMing() &&
      Fn->getName() == "main")
    MF.getInfo<X86MachineFunctionInfo>()->setForceFramePointer(true);

  unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
  if (Subtarget->is64Bit())
    return LowerX86_64CCCArguments(Op, DAG);
  else
    switch(CC) {
    default:
      assert(0 && "Unsupported calling convention");
    case CallingConv::Fast:
      return LowerCCCArguments(Op,DAG, true);
      // Falls through
    case CallingConv::C:
      return LowerCCCArguments(Op, DAG);
    case CallingConv::X86_StdCall:
      MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(StdCall);
      return LowerCCCArguments(Op, DAG, true);
    case CallingConv::X86_FastCall:
      MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(FastCall);
      return LowerFastCCArguments(Op, DAG);
    }
}

SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
  SDOperand InFlag(0, 0);
  SDOperand Chain = Op.getOperand(0);
  unsigned Align =
    (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
  if (Align == 0) Align = 1;

  ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
  // If not DWORD aligned or size is more than the threshold, call memset.
  // The libc version is likely to be faster for these cases. It can use the
  // address value and run time information about the CPU.
  if ((Align & 3) != 0 ||
      (I && I->getValue() > Subtarget->getMaxInlineSizeThreshold())) {
    MVT::ValueType IntPtr = getPointerTy();
    const Type *IntPtrTy = getTargetData()->getIntPtrType();
    TargetLowering::ArgListTy Args; 
    TargetLowering::ArgListEntry Entry;
    Entry.Node = Op.getOperand(1);
    Entry.Ty = IntPtrTy;
    Args.push_back(Entry);
    // Extend the unsigned i8 argument to be an int value for the call.
    Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
    Entry.Ty = IntPtrTy;
    Args.push_back(Entry);
    Entry.Node = Op.getOperand(3);
    Args.push_back(Entry);
    std::pair<SDOperand,SDOperand> CallResult =
      LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
                  DAG.getExternalSymbol("memset", IntPtr), Args, DAG);
    return CallResult.second;
  }

  MVT::ValueType AVT;
  SDOperand Count;
  ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2));
  unsigned BytesLeft = 0;
  bool TwoRepStos = false;
  if (ValC) {
    unsigned ValReg;
    uint64_t Val = ValC->getValue() & 255;

    // If the value is a constant, then we can potentially use larger sets.
    switch (Align & 3) {
      case 2:   // WORD aligned
        AVT = MVT::i16;
        ValReg = X86::AX;
        Val = (Val << 8) | Val;
        break;
      case 0:  // DWORD aligned
        AVT = MVT::i32;
        ValReg = X86::EAX;
        Val = (Val << 8)  | Val;
        Val = (Val << 16) | Val;
        if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) {  // QWORD aligned
          AVT = MVT::i64;
          ValReg = X86::RAX;
          Val = (Val << 32) | Val;
        }
        break;
      default:  // Byte aligned
        AVT = MVT::i8;
        ValReg = X86::AL;
        Count = Op.getOperand(3);
        break;
    }

    if (AVT > MVT::i8) {
      if (I) {
        unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
        Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
        BytesLeft = I->getValue() % UBytes;
      } else {
        assert(AVT >= MVT::i32 &&
               "Do not use rep;stos if not at least DWORD aligned");
        Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
                            Op.getOperand(3), DAG.getConstant(2, MVT::i8));
        TwoRepStos = true;
      }
    }

    Chain  = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
                              InFlag);
    InFlag = Chain.getValue(1);
  } else {
    AVT = MVT::i8;
    Count  = Op.getOperand(3);
    Chain  = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
    InFlag = Chain.getValue(1);
  }

  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
                            Count, InFlag);
  InFlag = Chain.getValue(1);
  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
                            Op.getOperand(1), InFlag);
  InFlag = Chain.getValue(1);

  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
  SmallVector<SDOperand, 8> Ops;
  Ops.push_back(Chain);
  Ops.push_back(DAG.getValueType(AVT));
  Ops.push_back(InFlag);
  Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());

  if (TwoRepStos) {
    InFlag = Chain.getValue(1);
    Count = Op.getOperand(3);
    MVT::ValueType CVT = Count.getValueType();
    SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
                               DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
    Chain  = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
                              Left, InFlag);
    InFlag = Chain.getValue(1);
    Tys = DAG.getVTList(MVT::Other, MVT::Flag);
    Ops.clear();
    Ops.push_back(Chain);
    Ops.push_back(DAG.getValueType(MVT::i8));
    Ops.push_back(InFlag);
    Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
  } else if (BytesLeft) {
    // Issue stores for the last 1 - 7 bytes.
    SDOperand Value;
    unsigned Val = ValC->getValue() & 255;
    unsigned Offset = I->getValue() - BytesLeft;
    SDOperand DstAddr = Op.getOperand(1);
    MVT::ValueType AddrVT = DstAddr.getValueType();
    if (BytesLeft >= 4) {
      Val = (Val << 8)  | Val;
      Val = (Val << 16) | Val;
      Value = DAG.getConstant(Val, MVT::i32);
      Chain = DAG.getStore(Chain, Value,
                           DAG.getNode(ISD::ADD, AddrVT, DstAddr,
                                       DAG.getConstant(Offset, AddrVT)),
                           NULL, 0);
      BytesLeft -= 4;
      Offset += 4;
    }
    if (BytesLeft >= 2) {
      Value = DAG.getConstant((Val << 8) | Val, MVT::i16);
      Chain = DAG.getStore(Chain, Value,
                           DAG.getNode(ISD::ADD, AddrVT, DstAddr,
                                       DAG.getConstant(Offset, AddrVT)),
                           NULL, 0);
      BytesLeft -= 2;
      Offset += 2;
    }
    if (BytesLeft == 1) {
      Value = DAG.getConstant(Val, MVT::i8);
      Chain = DAG.getStore(Chain, Value,
                           DAG.getNode(ISD::ADD, AddrVT, DstAddr,
                                       DAG.getConstant(Offset, AddrVT)),
                           NULL, 0);
    }
  }

  return Chain;
}

SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain,
                                               SDOperand Dest,
                                               SDOperand Source,
                                               unsigned Size,
                                               unsigned Align,
                                               SelectionDAG &DAG) {
  MVT::ValueType AVT;
  unsigned BytesLeft = 0;
  switch (Align & 3) {
    case 2:   // WORD aligned
      AVT = MVT::i16;
      break;
    case 0:  // DWORD aligned
      AVT = MVT::i32;
      if (Subtarget->is64Bit() && ((Align & 0xF) == 0))  // QWORD aligned
        AVT = MVT::i64;
      break;
    default:  // Byte aligned
      AVT = MVT::i8;
      break;
  }

  unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
  SDOperand Count = DAG.getConstant(Size / UBytes, getPointerTy());
  BytesLeft = Size % UBytes;

  SDOperand InFlag(0, 0);
  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
                            Count, InFlag);
  InFlag = Chain.getValue(1);
  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
                            Dest, InFlag);
  InFlag = Chain.getValue(1);
  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI,
                            Source, InFlag);
  InFlag = Chain.getValue(1);

  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
  SmallVector<SDOperand, 8> Ops;
  Ops.push_back(Chain);
  Ops.push_back(DAG.getValueType(AVT));
  Ops.push_back(InFlag);
  Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());

  if (BytesLeft) {
    // Issue loads and stores for the last 1 - 7 bytes.
    unsigned Offset = Size - BytesLeft;
    SDOperand DstAddr = Dest;
    MVT::ValueType DstVT = DstAddr.getValueType();
    SDOperand SrcAddr = Source;
    MVT::ValueType SrcVT = SrcAddr.getValueType();
    SDOperand Value;
    if (BytesLeft >= 4) {
      Value = DAG.getLoad(MVT::i32, Chain,
                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
                                      DAG.getConstant(Offset, SrcVT)),
                          NULL, 0);
      Chain = Value.getValue(1);
      Chain = DAG.getStore(Chain, Value,
                           DAG.getNode(ISD::ADD, DstVT, DstAddr,
                                       DAG.getConstant(Offset, DstVT)),
                           NULL, 0);
      BytesLeft -= 4;
      Offset += 4;
    }
    if (BytesLeft >= 2) {
      Value = DAG.getLoad(MVT::i16, Chain,
                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
                                      DAG.getConstant(Offset, SrcVT)),
                          NULL, 0);