Skip to content
X86ISelLowering.cpp 226 KiB
Newer Older
  const Type *OpNTy =  MVT::getTypeForValueType(EltVT);
    Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, ~(1ULL << 63))));
    Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, ~(1U << 31))));
    CV.push_back(C);
    CV.push_back(C);
    CV.push_back(C);
    CV.push_back(C);
  Constant *C = ConstantVector::get(CV);
  SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
  SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
                               false, 16);
  return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
}

SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) {
  MVT::ValueType VT = Op.getValueType();
  unsigned EltNum = 1;
  if (MVT::isVector(VT)) {
    EltVT = MVT::getVectorElementType(VT);
    EltNum = MVT::getVectorNumElements(VT);
  }
  const Type *OpNTy =  MVT::getTypeForValueType(EltVT);
    Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(64, 1ULL << 63)));
    Constant *C = ConstantFP::get(OpNTy, APFloat(APInt(32, 1U << 31)));
    CV.push_back(C);
    CV.push_back(C);
    CV.push_back(C);
    CV.push_back(C);
  Constant *C = ConstantVector::get(CV);
  SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
  SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
                               false, 16);
  if (MVT::isVector(VT)) {
    return DAG.getNode(ISD::BIT_CONVERT, VT,
                       DAG.getNode(ISD::XOR, MVT::v2i64,
                    DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)),
                    DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask)));
  } else {
    return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
  }
SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
  SDOperand Op0 = Op.getOperand(0);
  SDOperand Op1 = Op.getOperand(1);
  MVT::ValueType VT = Op.getValueType();
  MVT::ValueType SrcVT = Op1.getValueType();
  const Type *SrcTy =  MVT::getTypeForValueType(SrcVT);

  // If second operand is smaller, extend it first.
  if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) {
    Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1);
    SrcVT = VT;
    SrcTy = MVT::getTypeForValueType(SrcVT);
  // And if it is bigger, shrink it first.
  if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) {
    Op1 = DAG.getNode(ISD::FP_ROUND, VT, Op1);
    SrcVT = VT;
    SrcTy = MVT::getTypeForValueType(SrcVT);
  }

  // At this point the operands and the result should have the same
  // type, and that won't be f80 since that is not custom lowered.
  // First get the sign bit of second operand.
  std::vector<Constant*> CV;
  if (SrcVT == MVT::f64) {
    CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 1ULL << 63))));
    CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0))));
    CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 1U << 31))));
    CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
    CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
    CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
  Constant *C = ConstantVector::get(CV);
  SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
  SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, NULL, 0,
                                false, 16);
  SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1);

  // Shift sign bit right or left if the two operands have different types.
  if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) {
    // Op0 is MVT::f32, Op1 is MVT::f64.
    SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit);
    SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit,
                          DAG.getConstant(32, MVT::i32));
    SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit);
    SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit,
                          DAG.getConstant(0, getPointerTy()));
  }

  // Clear first operand sign bit.
  CV.clear();
  if (VT == MVT::f64) {
    CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, ~(1ULL << 63)))));
    CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(64, 0))));
    CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, ~(1U << 31)))));
    CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
    CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
    CV.push_back(ConstantFP::get(SrcTy, APFloat(APInt(32, 0))));
  C = ConstantVector::get(CV);
  CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
  SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
                                false, 16);
  SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2);

  // Or the value with the sign bit.
  return DAG.getNode(X86ISD::FOR, VT, Val, SignBit);
SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) {
  assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
  SDOperand Cond;
  SDOperand Op0 = Op.getOperand(0);
  SDOperand Op1 = Op.getOperand(1);
  SDOperand CC = Op.getOperand(2);
  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
  bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType());
  unsigned X86CC;

  if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC,
                     Op0, Op1, DAG)) {
    Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1);
    return DAG.getNode(X86ISD::SETCC, MVT::i8,
                       DAG.getConstant(X86CC, MVT::i8), Cond);
  Cond = DAG.getNode(X86ISD::CMP, MVT::i32, Op0, Op1);
  switch (SetCCOpcode) {
  default: assert(false && "Illegal floating point SetCC!");
  case ISD::SETOEQ: {  // !PF & ZF
    SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8,
                                 DAG.getConstant(X86::COND_NP, MVT::i8), Cond);
    SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
                                 DAG.getConstant(X86::COND_E, MVT::i8), Cond);
    return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2);
  }
  case ISD::SETUNE: {  // PF | !ZF
    SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, MVT::i8,
                                 DAG.getConstant(X86::COND_P, MVT::i8), Cond);
    SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
                                 DAG.getConstant(X86::COND_NE, MVT::i8), Cond);
    return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2);
  }
  }
}


SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) {
  bool addTest = true;
  SDOperand Cond  = Op.getOperand(0);
  SDOperand CC;

  if (Cond.getOpcode() == ISD::SETCC)
    Cond = LowerSETCC(Cond, DAG);
  // If condition flag is set by a X86ISD::CMP, then use it as the condition
  // setting operand in place of the X86ISD::SETCC.
  if (Cond.getOpcode() == X86ISD::SETCC) {
    CC = Cond.getOperand(0);
    SDOperand Cmp = Cond.getOperand(1);
    unsigned Opc = Cmp.getOpcode();
    MVT::ValueType VT = Op.getValueType();
    bool IllegalFPCMov = false;
    if (VT == MVT::f32 && !X86ScalarSSEf32)
      IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
    else if (VT == MVT::f64 && !X86ScalarSSEf64)
      IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
    else if (VT == MVT::f80)
      IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
    if ((Opc == X86ISD::CMP ||
         Opc == X86ISD::COMI ||
         Opc == X86ISD::UCOMI) && !IllegalFPCMov) {
      addTest = false;
    }
  }

  if (addTest) {
    CC = DAG.getConstant(X86::COND_NE, MVT::i8);
    Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8));
  }

  const MVT::ValueType *VTs = DAG.getNodeValueTypes(Op.getValueType(),
                                                    MVT::Flag);
  SmallVector<SDOperand, 4> Ops;
  // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
  // condition is true.
  Ops.push_back(Op.getOperand(2));
  Ops.push_back(Op.getOperand(1));
  Ops.push_back(CC);
  Ops.push_back(Cond);
  return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) {
  bool addTest = true;
  SDOperand Chain = Op.getOperand(0);
  SDOperand Cond  = Op.getOperand(1);
  SDOperand Dest  = Op.getOperand(2);
  SDOperand CC;
  if (Cond.getOpcode() == ISD::SETCC)
    Cond = LowerSETCC(Cond, DAG);
  // If condition flag is set by a X86ISD::CMP, then use it as the condition
  // setting operand in place of the X86ISD::SETCC.
  if (Cond.getOpcode() == X86ISD::SETCC) {
    CC = Cond.getOperand(0);

    SDOperand Cmp = Cond.getOperand(1);
    unsigned Opc = Cmp.getOpcode();
    if (Opc == X86ISD::CMP ||
        Opc == X86ISD::COMI ||
        Opc == X86ISD::UCOMI) {
      addTest = false;
    }
  }

  if (addTest) {
    CC = DAG.getConstant(X86::COND_NE, MVT::i8);
    Cond= DAG.getNode(X86ISD::CMP, MVT::i32, Cond, DAG.getConstant(0, MVT::i8));
  return DAG.getNode(X86ISD::BRCOND, Op.getValueType(),
SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
  unsigned CallingConv = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
  bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;

   if (Subtarget->is64Bit())
     if(CallingConv==CallingConv::Fast && isTailCall && PerformTailCallOpt)
       return LowerX86_TailCallTo(Op, DAG, CallingConv);
     else
       return LowerX86_64CCCCallTo(Op, DAG, CallingConv);
      assert(0 && "Unsupported calling convention");
    case CallingConv::Fast:
      if (isTailCall && PerformTailCallOpt)
        return LowerX86_TailCallTo(Op, DAG, CallingConv);
      else
        return LowerCCCCallTo(Op,DAG, CallingConv);
    case CallingConv::C:
    case CallingConv::X86_StdCall:
      return LowerCCCCallTo(Op, DAG, CallingConv);
    case CallingConv::X86_FastCall:
      return LowerFastCCCallTo(Op, DAG, CallingConv);
Anton Korobeynikov's avatar
Anton Korobeynikov committed

// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
// Calls to _alloca is needed to probe the stack when allocating more than 4k
// bytes in one go. Touching the stack at 4K increments is necessary to ensure
// that the guard pages used by the OS virtual memory manager are allocated in
// correct sequence.
SDOperand
X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op,
                                           SelectionDAG &DAG) {
Anton Korobeynikov's avatar
Anton Korobeynikov committed
  assert(Subtarget->isTargetCygMing() &&
         "This should be used only on Cygwin/Mingw targets");
  
  // Get the inputs.
  SDOperand Chain = Op.getOperand(0);
  SDOperand Size  = Op.getOperand(1);
  // FIXME: Ensure alignment here

  SDOperand Flag;
  
  MVT::ValueType IntPtr = getPointerTy();
  MVT::ValueType SPTy = (Subtarget->is64Bit() ? MVT::i64 : MVT::i32);

  Chain = DAG.getCopyToReg(Chain, X86::EAX, Size, Flag);
  Flag = Chain.getValue(1);

  SDVTList  NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
  SDOperand Ops[] = { Chain,
                      DAG.getTargetExternalSymbol("_alloca", IntPtr),
                      DAG.getRegister(X86::EAX, IntPtr),
                      Flag };
  Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops, 4);
  Flag = Chain.getValue(1);

  Chain = DAG.getCopyFromReg(Chain, X86StackPtr, SPTy).getValue(1);
  
  std::vector<MVT::ValueType> Tys;
  Tys.push_back(SPTy);
  Tys.push_back(MVT::Other);
  SDOperand Ops1[2] = { Chain.getValue(0), Chain };
  return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2);
SDOperand
X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
  MachineFunction &MF = DAG.getMachineFunction();
  const Function* Fn = MF.getFunction();
  if (Fn->hasExternalLinkage() &&
Anton Korobeynikov's avatar
Anton Korobeynikov committed
      Subtarget->isTargetCygMing() &&
      Fn->getName() == "main")
    MF.getInfo<X86MachineFunctionInfo>()->setForceFramePointer(true);
  unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
Evan Cheng's avatar
Evan Cheng committed
  if (Subtarget->is64Bit())
    return LowerX86_64CCCArguments(Op, DAG);
    default:
      assert(0 && "Unsupported calling convention");
    case CallingConv::Fast:
      return LowerCCCArguments(Op,DAG, true);
    case CallingConv::C:
      return LowerCCCArguments(Op, DAG);
    case CallingConv::X86_StdCall:
      MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(StdCall);
      return LowerCCCArguments(Op, DAG, true);
    case CallingConv::X86_FastCall:
      MF.getInfo<X86MachineFunctionInfo>()->setDecorationStyle(FastCall);
      return LowerFastCCArguments(Op, DAG);
SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
  SDOperand InFlag(0, 0);
  SDOperand Chain = Op.getOperand(0);
  unsigned Align =
    (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
  if (Align == 0) Align = 1;

  ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
  // If not DWORD aligned or size is more than the threshold, call memset.
  // The libc version is likely to be faster for these cases. It can use the
  // address value and run time information about the CPU.
      (I && I->getValue() > Subtarget->getMaxInlineSizeThreshold())) {
    MVT::ValueType IntPtr = getPointerTy();
    const Type *IntPtrTy = getTargetData()->getIntPtrType();
Reid Spencer's avatar
Reid Spencer committed
    TargetLowering::ArgListTy Args; 
    TargetLowering::ArgListEntry Entry;
    Entry.Node = Op.getOperand(1);
    Entry.Ty = IntPtrTy;
    Args.push_back(Entry);
    // Extend the unsigned i8 argument to be an int value for the call.
Reid Spencer's avatar
Reid Spencer committed
    Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
    Entry.Ty = IntPtrTy;
    Args.push_back(Entry);
    Entry.Node = Op.getOperand(3);
    Args.push_back(Entry);
    std::pair<SDOperand,SDOperand> CallResult =
Reid Spencer's avatar
Reid Spencer committed
      LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
                  DAG.getExternalSymbol("memset", IntPtr), Args, DAG);
    return CallResult.second;
  }

  MVT::ValueType AVT;
  SDOperand Count;
  ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2));
  unsigned BytesLeft = 0;
  bool TwoRepStos = false;
  if (ValC) {
    unsigned ValReg;
Evan Cheng's avatar
Evan Cheng committed
    uint64_t Val = ValC->getValue() & 255;

    // If the value is a constant, then we can potentially use larger sets.
    switch (Align & 3) {
      case 2:   // WORD aligned
        AVT = MVT::i16;
        ValReg = X86::AX;
Evan Cheng's avatar
Evan Cheng committed
        Val = (Val << 8) | Val;
Evan Cheng's avatar
Evan Cheng committed
      case 0:  // DWORD aligned
Evan Cheng's avatar
Evan Cheng committed
        ValReg = X86::EAX;
        Val = (Val << 8)  | Val;
        Val = (Val << 16) | Val;
Evan Cheng's avatar
Evan Cheng committed
        if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) {  // QWORD aligned
          AVT = MVT::i64;
          ValReg = X86::RAX;
          Val = (Val << 32) | Val;
        }
        break;
      default:  // Byte aligned
        AVT = MVT::i8;
        ValReg = X86::AL;
Evan Cheng's avatar
Evan Cheng committed
        Count = Op.getOperand(3);
Evan Cheng's avatar
Evan Cheng committed
    if (AVT > MVT::i8) {
      if (I) {
        unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
        Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
        BytesLeft = I->getValue() % UBytes;
      } else {
        assert(AVT >= MVT::i32 &&
               "Do not use rep;stos if not at least DWORD aligned");
        Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
                            Op.getOperand(3), DAG.getConstant(2, MVT::i8));
        TwoRepStos = true;
      }
    }

    Chain  = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
                              InFlag);
    InFlag = Chain.getValue(1);
  } else {
    AVT = MVT::i8;
    Count  = Op.getOperand(3);
    Chain  = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
    InFlag = Chain.getValue(1);
Evan Cheng's avatar
Evan Cheng committed
  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
                            Count, InFlag);
Evan Cheng's avatar
Evan Cheng committed
  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
                            Op.getOperand(1), InFlag);
  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
  SmallVector<SDOperand, 8> Ops;
  Ops.push_back(Chain);
  Ops.push_back(DAG.getValueType(AVT));
  Ops.push_back(InFlag);
  Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
  if (TwoRepStos) {
    InFlag = Chain.getValue(1);
    Count = Op.getOperand(3);
    MVT::ValueType CVT = Count.getValueType();
    SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
Evan Cheng's avatar
Evan Cheng committed
                               DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
    Chain  = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
                              Left, InFlag);
    Tys = DAG.getVTList(MVT::Other, MVT::Flag);
    Ops.clear();
    Ops.push_back(Chain);
    Ops.push_back(DAG.getValueType(MVT::i8));
    Ops.push_back(InFlag);
    Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
Evan Cheng's avatar
Evan Cheng committed
    // Issue stores for the last 1 - 7 bytes.
    SDOperand Value;
    unsigned Val = ValC->getValue() & 255;
    unsigned Offset = I->getValue() - BytesLeft;
    SDOperand DstAddr = Op.getOperand(1);
    MVT::ValueType AddrVT = DstAddr.getValueType();
Evan Cheng's avatar
Evan Cheng committed
    if (BytesLeft >= 4) {
      Val = (Val << 8)  | Val;
      Val = (Val << 16) | Val;
      Value = DAG.getConstant(Val, MVT::i32);
Evan Cheng's avatar
Evan Cheng committed
      Chain = DAG.getStore(Chain, Value,
                           DAG.getNode(ISD::ADD, AddrVT, DstAddr,
                                       DAG.getConstant(Offset, AddrVT)),
Evan Cheng's avatar
Evan Cheng committed
      BytesLeft -= 4;
      Offset += 4;
    }
    if (BytesLeft >= 2) {
      Value = DAG.getConstant((Val << 8) | Val, MVT::i16);
Evan Cheng's avatar
Evan Cheng committed
      Chain = DAG.getStore(Chain, Value,
                           DAG.getNode(ISD::ADD, AddrVT, DstAddr,
                                       DAG.getConstant(Offset, AddrVT)),
      BytesLeft -= 2;
      Offset += 2;
    }
    if (BytesLeft == 1) {
      Value = DAG.getConstant(Val, MVT::i8);
Evan Cheng's avatar
Evan Cheng committed
      Chain = DAG.getStore(Chain, Value,
                           DAG.getNode(ISD::ADD, AddrVT, DstAddr,
                                       DAG.getConstant(Offset, AddrVT)),
SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain,
                                               SDOperand Dest,
                                               SDOperand Source,
                                               unsigned Size,
                                               unsigned Align,
                                               SelectionDAG &DAG) {
  MVT::ValueType AVT;
  unsigned BytesLeft = 0;
  switch (Align & 3) {
    case 2:   // WORD aligned
      AVT = MVT::i16;
      break;
Evan Cheng's avatar
Evan Cheng committed
    case 0:  // DWORD aligned
Evan Cheng's avatar
Evan Cheng committed
      if (Subtarget->is64Bit() && ((Align & 0xF) == 0))  // QWORD aligned
        AVT = MVT::i64;
      break;
    default:  // Byte aligned
      AVT = MVT::i8;
      break;
  }
  unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
  SDOperand Count = DAG.getConstant(Size / UBytes, getPointerTy());
  BytesLeft = Size % UBytes;
Evan Cheng's avatar
Evan Cheng committed
  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
                            Count, InFlag);
Evan Cheng's avatar
Evan Cheng committed
  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
Evan Cheng's avatar
Evan Cheng committed
  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI,
  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
  SmallVector<SDOperand, 8> Ops;
  Ops.push_back(Chain);
  Ops.push_back(DAG.getValueType(AVT));
  Ops.push_back(InFlag);
  Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
Evan Cheng's avatar
Evan Cheng committed
    // Issue loads and stores for the last 1 - 7 bytes.
    unsigned Offset = Size - BytesLeft;
    SDOperand DstAddr = Dest;
    MVT::ValueType DstVT = DstAddr.getValueType();
    SDOperand SrcAddr = Source;
    MVT::ValueType SrcVT = SrcAddr.getValueType();
    SDOperand Value;
Evan Cheng's avatar
Evan Cheng committed
    if (BytesLeft >= 4) {
      Value = DAG.getLoad(MVT::i32, Chain,
                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
                                      DAG.getConstant(Offset, SrcVT)),
Evan Cheng's avatar
Evan Cheng committed
      Chain = Value.getValue(1);
Evan Cheng's avatar
Evan Cheng committed
      Chain = DAG.getStore(Chain, Value,
                           DAG.getNode(ISD::ADD, DstVT, DstAddr,
                                       DAG.getConstant(Offset, DstVT)),
Evan Cheng's avatar
Evan Cheng committed
      BytesLeft -= 4;
      Offset += 4;
    }
    if (BytesLeft >= 2) {
      Value = DAG.getLoad(MVT::i16, Chain,
                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
                                      DAG.getConstant(Offset, SrcVT)),
Evan Cheng's avatar
Evan Cheng committed
      Chain = DAG.getStore(Chain, Value,
                           DAG.getNode(ISD::ADD, DstVT, DstAddr,
                                       DAG.getConstant(Offset, DstVT)),
      BytesLeft -= 2;
      Offset += 2;
    }

    if (BytesLeft == 1) {
      Value = DAG.getLoad(MVT::i8, Chain,
                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
                                      DAG.getConstant(Offset, SrcVT)),
Evan Cheng's avatar
Evan Cheng committed
      Chain = DAG.getStore(Chain, Value,
                           DAG.getNode(ISD::ADD, DstVT, DstAddr,
                                       DAG.getConstant(Offset, DstVT)),
Chris Lattner's avatar
Chris Lattner committed
/// Expand the result of: i64,outchain = READCYCLECOUNTER inchain
SDNode *X86TargetLowering::ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG){
  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
Chris Lattner's avatar
Chris Lattner committed
  SDOperand TheChain = N->getOperand(0);
  SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheChain, 1);
  if (Subtarget->is64Bit()) {
Chris Lattner's avatar
Chris Lattner committed
    SDOperand rax = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1));
    SDOperand rdx = DAG.getCopyFromReg(rax.getValue(1), X86::RDX,
                                       MVT::i64, rax.getValue(2));
    SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, rdx,
                                DAG.getConstant(32, MVT::i8));
Chris Lattner's avatar
Chris Lattner committed
      DAG.getNode(ISD::OR, MVT::i64, rax, Tmp), rdx.getValue(1)
    
    Tys = DAG.getVTList(MVT::i64, MVT::Other);
Chris Lattner's avatar
Chris Lattner committed
    return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2).Val;
Chris Lattner's avatar
Chris Lattner committed
  SDOperand eax = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1));
  SDOperand edx = DAG.getCopyFromReg(eax.getValue(1), X86::EDX,
                                       MVT::i32, eax.getValue(2));
  // Use a buildpair to merge the two 32-bit values into a 64-bit one. 
  SDOperand Ops[] = { eax, edx };
  Ops[0] = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Ops, 2);

  // Use a MERGE_VALUES to return the value and chain.
  Ops[1] = edx.getValue(1);
  Tys = DAG.getVTList(MVT::i64, MVT::Other);
  return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2).Val;
}

SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) {
  SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));

Evan Cheng's avatar
Evan Cheng committed
  if (!Subtarget->is64Bit()) {
    // vastart just stores the address of the VarArgsFrameIndex slot into the
    // memory location argument.
    SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
    return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(),
                        SV->getOffset());
Evan Cheng's avatar
Evan Cheng committed
  }

  // __va_list_tag:
  //   gp_offset         (0 - 6 * 8)
  //   fp_offset         (48 - 48 + 8 * 16)
  //   overflow_arg_area (point to parameters coming in memory).
  //   reg_save_area
  SmallVector<SDOperand, 8> MemOps;
Evan Cheng's avatar
Evan Cheng committed
  SDOperand FIN = Op.getOperand(1);
  // Store gp_offset
Evan Cheng's avatar
Evan Cheng committed
  SDOperand Store = DAG.getStore(Op.getOperand(0),
                                 DAG.getConstant(VarArgsGPOffset, MVT::i32),
                                 FIN, SV->getValue(), SV->getOffset());
Evan Cheng's avatar
Evan Cheng committed
  MemOps.push_back(Store);

  // Store fp_offset
  FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
                    DAG.getConstant(4, getPointerTy()));
Evan Cheng's avatar
Evan Cheng committed
  Store = DAG.getStore(Op.getOperand(0),
                       DAG.getConstant(VarArgsFPOffset, MVT::i32),
                       FIN, SV->getValue(), SV->getOffset());
Evan Cheng's avatar
Evan Cheng committed
  MemOps.push_back(Store);

  // Store ptr to overflow_arg_area
  FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
                    DAG.getConstant(4, getPointerTy()));
  SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
  Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(),
                       SV->getOffset());
Evan Cheng's avatar
Evan Cheng committed
  MemOps.push_back(Store);

  // Store ptr to reg_save_area.
  FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
                    DAG.getConstant(8, getPointerTy()));
  SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
  Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(),
                       SV->getOffset());
Evan Cheng's avatar
Evan Cheng committed
  MemOps.push_back(Store);
  return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size());
SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) {
  // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
  SDOperand Chain = Op.getOperand(0);
  SDOperand DstPtr = Op.getOperand(1);
  SDOperand SrcPtr = Op.getOperand(2);
  SrcValueSDNode *DstSV = cast<SrcValueSDNode>(Op.getOperand(3));
  SrcValueSDNode *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4));

  SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr,
                       SrcSV->getValue(), SrcSV->getOffset());
  Chain = SrcPtr.getValue(1);
  for (unsigned i = 0; i < 3; ++i) {
    SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr,
                                SrcSV->getValue(), SrcSV->getOffset());
    Chain = Val.getValue(1);
    Chain = DAG.getStore(Chain, Val, DstPtr,
                         DstSV->getValue(), DstSV->getOffset());
    if (i == 2)
      break;
    SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr, 
                         DAG.getConstant(8, getPointerTy()));
    DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr, 
                         DAG.getConstant(8, getPointerTy()));
  }
  return Chain;
}

SDOperand
X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
  switch (IntNo) {
  default: return SDOperand();    // Don't custom lower most intrinsics.
    // Comparison intrinsics.
  case Intrinsic::x86_sse_comieq_ss:
  case Intrinsic::x86_sse_comilt_ss:
  case Intrinsic::x86_sse_comile_ss:
  case Intrinsic::x86_sse_comigt_ss:
  case Intrinsic::x86_sse_comige_ss:
  case Intrinsic::x86_sse_comineq_ss:
  case Intrinsic::x86_sse_ucomieq_ss:
  case Intrinsic::x86_sse_ucomilt_ss:
  case Intrinsic::x86_sse_ucomile_ss:
  case Intrinsic::x86_sse_ucomigt_ss:
  case Intrinsic::x86_sse_ucomige_ss:
  case Intrinsic::x86_sse_ucomineq_ss:
  case Intrinsic::x86_sse2_comieq_sd:
  case Intrinsic::x86_sse2_comilt_sd:
  case Intrinsic::x86_sse2_comile_sd:
  case Intrinsic::x86_sse2_comigt_sd:
  case Intrinsic::x86_sse2_comige_sd:
  case Intrinsic::x86_sse2_comineq_sd:
  case Intrinsic::x86_sse2_ucomieq_sd:
  case Intrinsic::x86_sse2_ucomilt_sd:
  case Intrinsic::x86_sse2_ucomile_sd:
  case Intrinsic::x86_sse2_ucomigt_sd:
  case Intrinsic::x86_sse2_ucomige_sd:
  case Intrinsic::x86_sse2_ucomineq_sd: {
    unsigned Opc = 0;
    ISD::CondCode CC = ISD::SETCC_INVALID;
    switch (IntNo) {
    default: break;
    case Intrinsic::x86_sse_comieq_ss:
    case Intrinsic::x86_sse2_comieq_sd:
      Opc = X86ISD::COMI;
      CC = ISD::SETEQ;
      break;
    case Intrinsic::x86_sse_comilt_ss:
    case Intrinsic::x86_sse2_comilt_sd:
      Opc = X86ISD::COMI;
      CC = ISD::SETLT;
      break;
    case Intrinsic::x86_sse_comile_ss:
    case Intrinsic::x86_sse2_comile_sd:
      Opc = X86ISD::COMI;
      CC = ISD::SETLE;
      break;
    case Intrinsic::x86_sse_comigt_ss:
    case Intrinsic::x86_sse2_comigt_sd:
      Opc = X86ISD::COMI;
      CC = ISD::SETGT;
      break;
    case Intrinsic::x86_sse_comige_ss:
    case Intrinsic::x86_sse2_comige_sd:
      Opc = X86ISD::COMI;
      CC = ISD::SETGE;
      break;
    case Intrinsic::x86_sse_comineq_ss:
    case Intrinsic::x86_sse2_comineq_sd:
      Opc = X86ISD::COMI;
      CC = ISD::SETNE;
      break;
    case Intrinsic::x86_sse_ucomieq_ss:
    case Intrinsic::x86_sse2_ucomieq_sd:
      Opc = X86ISD::UCOMI;
      CC = ISD::SETEQ;
      break;
    case Intrinsic::x86_sse_ucomilt_ss:
    case Intrinsic::x86_sse2_ucomilt_sd:
      Opc = X86ISD::UCOMI;
      CC = ISD::SETLT;
      break;
    case Intrinsic::x86_sse_ucomile_ss:
    case Intrinsic::x86_sse2_ucomile_sd:
      Opc = X86ISD::UCOMI;
      CC = ISD::SETLE;
      break;
    case Intrinsic::x86_sse_ucomigt_ss:
    case Intrinsic::x86_sse2_ucomigt_sd:
      Opc = X86ISD::UCOMI;
      CC = ISD::SETGT;
      break;
    case Intrinsic::x86_sse_ucomige_ss:
    case Intrinsic::x86_sse2_ucomige_sd:
      Opc = X86ISD::UCOMI;
      CC = ISD::SETGE;
      break;
    case Intrinsic::x86_sse_ucomineq_ss:
    case Intrinsic::x86_sse2_ucomineq_sd:
      Opc = X86ISD::UCOMI;
      CC = ISD::SETNE;
      break;
    SDOperand LHS = Op.getOperand(1);
    SDOperand RHS = Op.getOperand(2);
    translateX86CC(CC, true, X86CC, LHS, RHS, DAG);
    SDOperand Cond = DAG.getNode(Opc, MVT::i32, LHS, RHS);
    SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8,
                                  DAG.getConstant(X86CC, MVT::i8), Cond);
    return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC);
SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) {
  // Depths > 0 not supported yet!
  if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
    return SDOperand();
  
  // Just load the return address
  SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
  return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0);
}

SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) {
  // Depths > 0 not supported yet!
  if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
    return SDOperand();
    
  SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
  return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI, 
                     DAG.getConstant(4, getPointerTy()));
}

SDOperand X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDOperand Op,
                                                       SelectionDAG &DAG) {
  // Is not yet supported on x86-64
  if (Subtarget->is64Bit())
    return SDOperand();
  
  return DAG.getConstant(8, getPointerTy());
}

SDOperand X86TargetLowering::LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG)
{
  assert(!Subtarget->is64Bit() &&
         "Lowering of eh_return builtin is not supported yet on x86-64");
    
  MachineFunction &MF = DAG.getMachineFunction();
  SDOperand Chain     = Op.getOperand(0);
  SDOperand Offset    = Op.getOperand(1);
  SDOperand Handler   = Op.getOperand(2);

  SDOperand Frame = DAG.getRegister(RegInfo->getFrameRegister(MF),
                                    getPointerTy());

  SDOperand StoreAddr = DAG.getNode(ISD::SUB, getPointerTy(), Frame,
                                    DAG.getConstant(-4UL, getPointerTy()));
  StoreAddr = DAG.getNode(ISD::ADD, getPointerTy(), StoreAddr, Offset);
  Chain = DAG.getStore(Chain, Handler, StoreAddr, NULL, 0);
  Chain = DAG.getCopyToReg(Chain, X86::ECX, StoreAddr);
  MF.addLiveOut(X86::ECX);

  return DAG.getNode(X86ISD::EH_RETURN, MVT::Other,
                     Chain, DAG.getRegister(X86::ECX, getPointerTy()));
}

SDOperand X86TargetLowering::LowerTRAMPOLINE(SDOperand Op,
                                             SelectionDAG &DAG) {
  SDOperand Root = Op.getOperand(0);
  SDOperand Trmp = Op.getOperand(1); // trampoline
  SDOperand FPtr = Op.getOperand(2); // nested function
  SDOperand Nest = Op.getOperand(3); // 'nest' parameter value

  SrcValueSDNode *TrmpSV = cast<SrcValueSDNode>(Op.getOperand(4));

  if (Subtarget->is64Bit()) {
    return SDOperand(); // not yet supported
  } else {
    Function *Func = (Function *)
      cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
    unsigned CC = Func->getCallingConv();

    switch (CC) {
    default:
      assert(0 && "Unsupported calling convention");
    case CallingConv::C:
    case CallingConv::X86_StdCall: {
      // Pass 'nest' parameter in ECX.
      // Must be kept in sync with X86CallingConv.td
      NestReg = X86::ECX;

      // Check that ECX wasn't needed by an 'inreg' parameter.
      const FunctionType *FTy = Func->getFunctionType();
      const ParamAttrsList *Attrs = FTy->getParamAttrs();

      if (Attrs && !Func->isVarArg()) {
        unsigned InRegCount = 0;
        unsigned Idx = 1;

        for (FunctionType::param_iterator I = FTy->param_begin(),
             E = FTy->param_end(); I != E; ++I, ++Idx)
          if (Attrs->paramHasAttr(Idx, ParamAttr::InReg))
            // FIXME: should only count parameters that are lowered to integers.
            InRegCount += (getTargetData()->getTypeSizeInBits(*I) + 31) / 32;

        if (InRegCount > 2) {
          cerr << "Nest register in use - reduce number of inreg parameters!\n";
          abort();
        }
      }
      break;
    }
    case CallingConv::X86_FastCall:
      // Pass 'nest' parameter in EAX.
      // Must be kept in sync with X86CallingConv.td
      NestReg = X86::EAX;
    const X86InstrInfo *TII =
      ((X86TargetMachine&)getTargetMachine()).getInstrInfo();

    SDOperand OutChains[4];
    SDOperand Addr, Disp;

    Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(10, MVT::i32));
    Disp = DAG.getNode(ISD::SUB, MVT::i32, FPtr, Addr);

    unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri);
    unsigned char N86Reg  = ((X86RegisterInfo&)RegInfo).getX86RegNum(NestReg);
    OutChains[0] = DAG.getStore(Root, DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
                                Trmp, TrmpSV->getValue(), TrmpSV->getOffset());

    Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32));
    OutChains[1] = DAG.getStore(Root, Nest, Addr, TrmpSV->getValue(),
                                TrmpSV->getOffset() + 1, false, 1);

    unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP);
    Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32));
    OutChains[2] = DAG.getStore(Root, DAG.getConstant(JMP, MVT::i8), Addr,
                                TrmpSV->getValue() + 5, TrmpSV->getOffset());

    Addr = DAG.getNode(ISD::ADD, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32));
    OutChains[3] = DAG.getStore(Root, Disp, Addr, TrmpSV->getValue(),
                                TrmpSV->getOffset() + 6, false, 1);

    SDOperand Ops[] =
      { Trmp, DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains, 4) };
    return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), Ops, 2);
SDOperand X86TargetLowering::LowerFLT_ROUNDS(SDOperand Op, SelectionDAG &DAG) {
  /*
   The rounding mode is in bits 11:10 of FPSR, and has the following
   settings:
     00 Round to nearest
     01 Round to -inf
     10 Round to +inf
     11 Round to 0

  FLT_ROUNDS, on the other hand, expects the following:
    -1 Undefined
     0 Round to 0
     1 Round to nearest
     2 Round to +inf
     3 Round to -inf

  To perform the conversion, we do:
    (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3)
  */

  MachineFunction &MF = DAG.getMachineFunction();
  const TargetMachine &TM = MF.getTarget();
  const TargetFrameInfo &TFI = *TM.getFrameInfo();
  unsigned StackAlignment = TFI.getStackAlignment();
  MVT::ValueType VT = Op.getValueType();

  // Save FP Control Word to stack slot
  int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment);
  SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());

  SDOperand Chain = DAG.getNode(X86ISD::FNSTCW16m, MVT::Other,
                                DAG.getEntryNode(), StackSlot);

  // Load FP Control Word from stack slot
  SDOperand CWD = DAG.getLoad(MVT::i16, Chain, StackSlot, NULL, 0);

  // Transform as necessary
  SDOperand CWD1 =
    DAG.getNode(ISD::SRL, MVT::i16,
                DAG.getNode(ISD::AND, MVT::i16,
                            CWD, DAG.getConstant(0x800, MVT::i16)),
                DAG.getConstant(11, MVT::i8));
  SDOperand CWD2 =